From 5ff807acdebdd75960418060f465bb8ba79e96a6 Mon Sep 17 00:00:00 2001 From: zeripath Date: Thu, 17 Jun 2021 19:01:33 +0100 Subject: [PATCH] Run processors on whole of text (#16155) (#16185) Backport #16155 There is an inefficiency in the design of our processors which means that Emoji and other processors run in order n^2 time. This PR forces the processors to process the entirety of text node before passing back up. The fundamental inefficiency remains but it should be significantly ameliorated. Signed-off-by: Andrew Thornton --- modules/emoji/emoji.go | 5 + modules/markup/html.go | 712 ++++++++++++++++++++---------------- modules/markup/html_test.go | 16 + 3 files changed, 414 insertions(+), 319 deletions(-) diff --git a/modules/emoji/emoji.go b/modules/emoji/emoji.go index 01fb764ce..85df2d697 100644 --- a/modules/emoji/emoji.go +++ b/modules/emoji/emoji.go @@ -6,6 +6,7 @@ package emoji import ( + "io" "sort" "strings" "sync" @@ -145,6 +146,8 @@ func (n *rememberSecondWriteWriter) Write(p []byte) (int, error) { if n.writecount == 2 { n.idx = n.pos n.end = n.pos + len(p) + n.pos += len(p) + return len(p), io.EOF } n.pos += len(p) return len(p), nil @@ -155,6 +158,8 @@ func (n *rememberSecondWriteWriter) WriteString(s string) (int, error) { if n.writecount == 2 { n.idx = n.pos n.end = n.pos + len(s) + n.pos += len(s) + return len(s), io.EOF } n.pos += len(s) return len(s), nil diff --git a/modules/markup/html.go b/modules/markup/html.go index 908b4de09..81e10df45 100644 --- a/modules/markup/html.go +++ b/modules/markup/html.go @@ -87,6 +87,7 @@ func isLinkStr(link string) bool { return validLinksPattern.MatchString(link) } +// FIXME: This function is not concurrent safe func getIssueFullPattern() *regexp.Regexp { if issueFullPattern == nil { issueFullPattern = regexp.MustCompile(regexp.QuoteMeta(setting.AppURL) + @@ -605,26 +606,38 @@ func replaceContentList(node *html.Node, i, j int, newNodes []*html.Node) { } func mentionProcessor(ctx *postProcessCtx, node *html.Node) { - // We replace only the first mention; other mentions will be addressed later - found, loc := references.FindFirstMentionBytes([]byte(node.Data)) - if !found { - return - } - mention := node.Data[loc.Start:loc.End] - var teams string - teams, ok := ctx.metas["teams"] - // FIXME: util.URLJoin may not be necessary here: - // - setting.AppURL is defined to have a terminal '/' so unless mention[1:] - // is an AppSubURL link we can probably fallback to concatenation. - // team mention should follow @orgName/teamName style - if ok && strings.Contains(mention, "/") { - mentionOrgAndTeam := strings.Split(mention, "/") - if mentionOrgAndTeam[0][1:] == ctx.metas["org"] && strings.Contains(teams, ","+strings.ToLower(mentionOrgAndTeam[1])+",") { - replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, "org", ctx.metas["org"], "teams", mentionOrgAndTeam[1]), mention, "mention")) + start := 0 + next := node.NextSibling + for node != nil && node != next && start < len(node.Data) { + // We replace only the first mention; other mentions will be addressed later + found, loc := references.FindFirstMentionBytes([]byte(node.Data[start:])) + if !found { + return } - return + loc.Start += start + loc.End += start + mention := node.Data[loc.Start:loc.End] + var teams string + teams, ok := ctx.metas["teams"] + // FIXME: util.URLJoin may not be necessary here: + // - setting.AppURL is defined to have a terminal '/' so unless mention[1:] + // is an AppSubURL link we can probably fallback to concatenation. + // team mention should follow @orgName/teamName style + if ok && strings.Contains(mention, "/") { + mentionOrgAndTeam := strings.Split(mention, "/") + if mentionOrgAndTeam[0][1:] == ctx.metas["org"] && strings.Contains(teams, ","+strings.ToLower(mentionOrgAndTeam[1])+",") { + replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, "org", ctx.metas["org"], "teams", mentionOrgAndTeam[1]), mention, "mention")) + node = node.NextSibling.NextSibling + start = 0 + continue + } + start = loc.End + continue + } + replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, mention[1:]), mention, "mention")) + node = node.NextSibling.NextSibling + start = 0 } - replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, mention[1:]), mention, "mention")) } func shortLinkProcessor(ctx *postProcessCtx, node *html.Node) { @@ -632,188 +645,195 @@ func shortLinkProcessor(ctx *postProcessCtx, node *html.Node) { } func shortLinkProcessorFull(ctx *postProcessCtx, node *html.Node, noLink bool) { - m := shortLinkPattern.FindStringSubmatchIndex(node.Data) - if m == nil { - return - } + next := node.NextSibling + for node != nil && node != next { + m := shortLinkPattern.FindStringSubmatchIndex(node.Data) + if m == nil { + return + } - content := node.Data[m[2]:m[3]] - tail := node.Data[m[4]:m[5]] - props := make(map[string]string) - - // MediaWiki uses [[link|text]], while GitHub uses [[text|link]] - // It makes page handling terrible, but we prefer GitHub syntax - // And fall back to MediaWiki only when it is obvious from the look - // Of text and link contents - sl := strings.Split(content, "|") - for _, v := range sl { - if equalPos := strings.IndexByte(v, '='); equalPos == -1 { - // There is no equal in this argument; this is a mandatory arg - if props["name"] == "" { - if isLinkStr(v) { - // If we clearly see it is a link, we save it so - - // But first we need to ensure, that if both mandatory args provided - // look like links, we stick to GitHub syntax - if props["link"] != "" { - props["name"] = props["link"] - } + content := node.Data[m[2]:m[3]] + tail := node.Data[m[4]:m[5]] + props := make(map[string]string) + + // MediaWiki uses [[link|text]], while GitHub uses [[text|link]] + // It makes page handling terrible, but we prefer GitHub syntax + // And fall back to MediaWiki only when it is obvious from the look + // Of text and link contents + sl := strings.Split(content, "|") + for _, v := range sl { + if equalPos := strings.IndexByte(v, '='); equalPos == -1 { + // There is no equal in this argument; this is a mandatory arg + if props["name"] == "" { + if isLinkStr(v) { + // If we clearly see it is a link, we save it so + + // But first we need to ensure, that if both mandatory args provided + // look like links, we stick to GitHub syntax + if props["link"] != "" { + props["name"] = props["link"] + } - props["link"] = strings.TrimSpace(v) + props["link"] = strings.TrimSpace(v) + } else { + props["name"] = v + } } else { - props["name"] = v + props["link"] = strings.TrimSpace(v) } } else { - props["link"] = strings.TrimSpace(v) - } - } else { - // There is an equal; optional argument. - - sep := strings.IndexByte(v, '=') - key, val := v[:sep], html.UnescapeString(v[sep+1:]) - - // When parsing HTML, x/net/html will change all quotes which are - // not used for syntax into UTF-8 quotes. So checking val[0] won't - // be enough, since that only checks a single byte. - if len(val) > 1 { - if (strings.HasPrefix(val, "“") && strings.HasSuffix(val, "”")) || - (strings.HasPrefix(val, "‘") && strings.HasSuffix(val, "’")) { - const lenQuote = len("‘") - val = val[lenQuote : len(val)-lenQuote] - } else if (strings.HasPrefix(val, "\"") && strings.HasSuffix(val, "\"")) || - (strings.HasPrefix(val, "'") && strings.HasSuffix(val, "'")) { - val = val[1 : len(val)-1] - } else if strings.HasPrefix(val, "'") && strings.HasSuffix(val, "’") { - const lenQuote = len("‘") - val = val[1 : len(val)-lenQuote] + // There is an equal; optional argument. + + sep := strings.IndexByte(v, '=') + key, val := v[:sep], html.UnescapeString(v[sep+1:]) + + // When parsing HTML, x/net/html will change all quotes which are + // not used for syntax into UTF-8 quotes. So checking val[0] won't + // be enough, since that only checks a single byte. + if len(val) > 1 { + if (strings.HasPrefix(val, "“") && strings.HasSuffix(val, "”")) || + (strings.HasPrefix(val, "‘") && strings.HasSuffix(val, "’")) { + const lenQuote = len("‘") + val = val[lenQuote : len(val)-lenQuote] + } else if (strings.HasPrefix(val, "\"") && strings.HasSuffix(val, "\"")) || + (strings.HasPrefix(val, "'") && strings.HasSuffix(val, "'")) { + val = val[1 : len(val)-1] + } else if strings.HasPrefix(val, "'") && strings.HasSuffix(val, "’") { + const lenQuote = len("‘") + val = val[1 : len(val)-lenQuote] + } } + props[key] = val } - props[key] = val } - } - - var name, link string - if props["link"] != "" { - link = props["link"] - } else if props["name"] != "" { - link = props["name"] - } - if props["title"] != "" { - name = props["title"] - } else if props["name"] != "" { - name = props["name"] - } else { - name = link - } - name += tail - image := false - switch ext := filepath.Ext(link); ext { - // fast path: empty string, ignore - case "": - break - case ".jpg", ".jpeg", ".png", ".tif", ".tiff", ".webp", ".gif", ".bmp", ".ico", ".svg": - image = true - } - - childNode := &html.Node{} - linkNode := &html.Node{ - FirstChild: childNode, - LastChild: childNode, - Type: html.ElementNode, - Data: "a", - DataAtom: atom.A, - } - childNode.Parent = linkNode - absoluteLink := isLinkStr(link) - if !absoluteLink { - if image { - link = strings.ReplaceAll(link, " ", "+") + var name, link string + if props["link"] != "" { + link = props["link"] + } else if props["name"] != "" { + link = props["name"] + } + if props["title"] != "" { + name = props["title"] + } else if props["name"] != "" { + name = props["name"] } else { - link = strings.ReplaceAll(link, " ", "-") + name = link } - if !strings.Contains(link, "/") { - link = url.PathEscape(link) + + name += tail + image := false + switch ext := filepath.Ext(link); ext { + // fast path: empty string, ignore + case "": + // leave image as false + case ".jpg", ".jpeg", ".png", ".tif", ".tiff", ".webp", ".gif", ".bmp", ".ico", ".svg": + image = true } - } - urlPrefix := ctx.urlPrefix - if image { + + childNode := &html.Node{} + linkNode := &html.Node{ + FirstChild: childNode, + LastChild: childNode, + Type: html.ElementNode, + Data: "a", + DataAtom: atom.A, + } + childNode.Parent = linkNode + absoluteLink := isLinkStr(link) if !absoluteLink { - if IsSameDomain(urlPrefix) { - urlPrefix = strings.Replace(urlPrefix, "/src/", "/raw/", 1) + if image { + link = strings.ReplaceAll(link, " ", "+") + } else { + link = strings.ReplaceAll(link, " ", "-") } - if ctx.isWikiMarkdown { - link = util.URLJoin("wiki", "raw", link) + if !strings.Contains(link, "/") { + link = url.PathEscape(link) } - link = util.URLJoin(urlPrefix, link) - } - title := props["title"] - if title == "" { - title = props["alt"] - } - if title == "" { - title = path.Base(name) - } - alt := props["alt"] - if alt == "" { - alt = name } + urlPrefix := ctx.urlPrefix + if image { + if !absoluteLink { + if IsSameDomain(urlPrefix) { + urlPrefix = strings.Replace(urlPrefix, "/src/", "/raw/", 1) + } + if ctx.isWikiMarkdown { + link = util.URLJoin("wiki", "raw", link) + } + link = util.URLJoin(urlPrefix, link) + } + title := props["title"] + if title == "" { + title = props["alt"] + } + if title == "" { + title = path.Base(name) + } + alt := props["alt"] + if alt == "" { + alt = name + } - // make the childNode an image - if we can, we also place the alt - childNode.Type = html.ElementNode - childNode.Data = "img" - childNode.DataAtom = atom.Img - childNode.Attr = []html.Attribute{ - {Key: "src", Val: link}, - {Key: "title", Val: title}, - {Key: "alt", Val: alt}, - } - if alt == "" { - childNode.Attr = childNode.Attr[:2] - } - } else { - if !absoluteLink { - if ctx.isWikiMarkdown { - link = util.URLJoin("wiki", link) + // make the childNode an image - if we can, we also place the alt + childNode.Type = html.ElementNode + childNode.Data = "img" + childNode.DataAtom = atom.Img + childNode.Attr = []html.Attribute{ + {Key: "src", Val: link}, + {Key: "title", Val: title}, + {Key: "alt", Val: alt}, + } + if alt == "" { + childNode.Attr = childNode.Attr[:2] + } + } else { + if !absoluteLink { + if ctx.isWikiMarkdown { + link = util.URLJoin("wiki", link) + } + link = util.URLJoin(urlPrefix, link) } - link = util.URLJoin(urlPrefix, link) + childNode.Type = html.TextNode + childNode.Data = name } - childNode.Type = html.TextNode - childNode.Data = name - } - if noLink { - linkNode = childNode - } else { - linkNode.Attr = []html.Attribute{{Key: "href", Val: link}} + if noLink { + linkNode = childNode + } else { + linkNode.Attr = []html.Attribute{{Key: "href", Val: link}} + } + replaceContent(node, m[0], m[1], linkNode) + node = node.NextSibling.NextSibling } - replaceContent(node, m[0], m[1], linkNode) } func fullIssuePatternProcessor(ctx *postProcessCtx, node *html.Node) { if ctx.metas == nil { return } - m := getIssueFullPattern().FindStringSubmatchIndex(node.Data) - if m == nil { - return - } - link := node.Data[m[0]:m[1]] - id := "#" + node.Data[m[2]:m[3]] - - // extract repo and org name from matched link like - // http://localhost:3000/gituser/myrepo/issues/1 - linkParts := strings.Split(path.Clean(link), "/") - matchOrg := linkParts[len(linkParts)-4] - matchRepo := linkParts[len(linkParts)-3] - - if matchOrg == ctx.metas["user"] && matchRepo == ctx.metas["repo"] { - // TODO if m[4]:m[5] is not nil, then link is to a comment, - // and we should indicate that in the text somehow - replaceContent(node, m[0], m[1], createLink(link, id, "ref-issue")) - - } else { - orgRepoID := matchOrg + "/" + matchRepo + id - replaceContent(node, m[0], m[1], createLink(link, orgRepoID, "ref-issue")) + next := node.NextSibling + for node != nil && node != next { + m := getIssueFullPattern().FindStringSubmatchIndex(node.Data) + if m == nil { + return + } + link := node.Data[m[0]:m[1]] + id := "#" + node.Data[m[2]:m[3]] + + // extract repo and org name from matched link like + // http://localhost:3000/gituser/myrepo/issues/1 + linkParts := strings.Split(path.Clean(link), "/") + matchOrg := linkParts[len(linkParts)-4] + matchRepo := linkParts[len(linkParts)-3] + + if matchOrg == ctx.metas["user"] && matchRepo == ctx.metas["repo"] { + // TODO if m[4]:m[5] is not nil, then link is to a comment, + // and we should indicate that in the text somehow + replaceContent(node, m[0], m[1], createLink(link, id, "ref-issue")) + } else { + orgRepoID := matchOrg + "/" + matchRepo + id + replaceContent(node, m[0], m[1], createLink(link, orgRepoID, "ref-issue")) + } + node = node.NextSibling.NextSibling } } @@ -821,70 +841,74 @@ func issueIndexPatternProcessor(ctx *postProcessCtx, node *html.Node) { if ctx.metas == nil { return } - var ( found bool ref *references.RenderizableReference ) - _, exttrack := ctx.metas["format"] - alphanum := ctx.metas["style"] == IssueNameStyleAlphanumeric - - // Repos with external issue trackers might still need to reference local PRs - // We need to concern with the first one that shows up in the text, whichever it is - found, ref = references.FindRenderizableReferenceNumeric(node.Data, exttrack && alphanum) - if exttrack && alphanum { - if found2, ref2 := references.FindRenderizableReferenceAlphanumeric(node.Data); found2 { - if !found || ref2.RefLocation.Start < ref.RefLocation.Start { - found = true - ref = ref2 + next := node.NextSibling + for node != nil && node != next { + _, exttrack := ctx.metas["format"] + alphanum := ctx.metas["style"] == IssueNameStyleAlphanumeric + + // Repos with external issue trackers might still need to reference local PRs + // We need to concern with the first one that shows up in the text, whichever it is + found, ref = references.FindRenderizableReferenceNumeric(node.Data, exttrack && alphanum) + if exttrack && alphanum { + if found2, ref2 := references.FindRenderizableReferenceAlphanumeric(node.Data); found2 { + if !found || ref2.RefLocation.Start < ref.RefLocation.Start { + found = true + ref = ref2 + } } } - } - if !found { - return - } - - var link *html.Node - reftext := node.Data[ref.RefLocation.Start:ref.RefLocation.End] - if exttrack && !ref.IsPull { - ctx.metas["index"] = ref.Issue - link = createLink(com.Expand(ctx.metas["format"], ctx.metas), reftext, "ref-issue") - } else { - // Path determines the type of link that will be rendered. It's unknown at this point whether - // the linked item is actually a PR or an issue. Luckily it's of no real consequence because - // Gitea will redirect on click as appropriate. - path := "issues" - if ref.IsPull { - path = "pulls" + if !found { + return } - if ref.Owner == "" { - link = createLink(util.URLJoin(setting.AppURL, ctx.metas["user"], ctx.metas["repo"], path, ref.Issue), reftext, "ref-issue") + + var link *html.Node + reftext := node.Data[ref.RefLocation.Start:ref.RefLocation.End] + if exttrack && !ref.IsPull { + ctx.metas["index"] = ref.Issue + link = createLink(com.Expand(ctx.metas["format"], ctx.metas), reftext, "ref-issue") } else { - link = createLink(util.URLJoin(setting.AppURL, ref.Owner, ref.Name, path, ref.Issue), reftext, "ref-issue") + // Path determines the type of link that will be rendered. It's unknown at this point whether + // the linked item is actually a PR or an issue. Luckily it's of no real consequence because + // Gitea will redirect on click as appropriate. + path := "issues" + if ref.IsPull { + path = "pulls" + } + if ref.Owner == "" { + link = createLink(util.URLJoin(setting.AppURL, ctx.metas["user"], ctx.metas["repo"], path, ref.Issue), reftext, "ref-issue") + } else { + link = createLink(util.URLJoin(setting.AppURL, ref.Owner, ref.Name, path, ref.Issue), reftext, "ref-issue") + } } - } - if ref.Action == references.XRefActionNone { - replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link) - return - } + if ref.Action == references.XRefActionNone { + replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link) + node = node.NextSibling.NextSibling + continue + } - // Decorate action keywords if actionable - var keyword *html.Node - if references.IsXrefActionable(ref, exttrack, alphanum) { - keyword = createKeyword(node.Data[ref.ActionLocation.Start:ref.ActionLocation.End]) - } else { - keyword = &html.Node{ + // Decorate action keywords if actionable + var keyword *html.Node + if references.IsXrefActionable(ref, exttrack, alphanum) { + keyword = createKeyword(node.Data[ref.ActionLocation.Start:ref.ActionLocation.End]) + } else { + keyword = &html.Node{ + Type: html.TextNode, + Data: node.Data[ref.ActionLocation.Start:ref.ActionLocation.End], + } + } + spaces := &html.Node{ Type: html.TextNode, - Data: node.Data[ref.ActionLocation.Start:ref.ActionLocation.End], + Data: node.Data[ref.ActionLocation.End:ref.RefLocation.Start], } + replaceContentList(node, ref.ActionLocation.Start, ref.RefLocation.End, []*html.Node{keyword, spaces, link}) + node = node.NextSibling.NextSibling.NextSibling.NextSibling } - spaces := &html.Node{ - Type: html.TextNode, - Data: node.Data[ref.ActionLocation.End:ref.RefLocation.Start], - } - replaceContentList(node, ref.ActionLocation.Start, ref.RefLocation.End, []*html.Node{keyword, spaces, link}) } // fullSha1PatternProcessor renders SHA containing URLs @@ -892,87 +916,112 @@ func fullSha1PatternProcessor(ctx *postProcessCtx, node *html.Node) { if ctx.metas == nil { return } - m := anySHA1Pattern.FindStringSubmatchIndex(node.Data) - if m == nil { - return - } - urlFull := node.Data[m[0]:m[1]] - text := base.ShortSha(node.Data[m[2]:m[3]]) + next := node.NextSibling + for node != nil && node != next { + m := anySHA1Pattern.FindStringSubmatchIndex(node.Data) + if m == nil { + return + } - // 3rd capture group matches a optional path - subpath := "" - if m[5] > 0 { - subpath = node.Data[m[4]:m[5]] - } + urlFull := node.Data[m[0]:m[1]] + text := base.ShortSha(node.Data[m[2]:m[3]]) - // 4th capture group matches a optional url hash - hash := "" - if m[7] > 0 { - hash = node.Data[m[6]:m[7]][1:] - } + // 3rd capture group matches a optional path + subpath := "" + if m[5] > 0 { + subpath = node.Data[m[4]:m[5]] + } - start := m[0] - end := m[1] + // 4th capture group matches a optional url hash + hash := "" + if m[7] > 0 { + hash = node.Data[m[6]:m[7]][1:] + } - // If url ends in '.', it's very likely that it is not part of the - // actual url but used to finish a sentence. - if strings.HasSuffix(urlFull, ".") { - end-- - urlFull = urlFull[:len(urlFull)-1] - if hash != "" { - hash = hash[:len(hash)-1] - } else if subpath != "" { - subpath = subpath[:len(subpath)-1] + start := m[0] + end := m[1] + + // If url ends in '.', it's very likely that it is not part of the + // actual url but used to finish a sentence. + if strings.HasSuffix(urlFull, ".") { + end-- + urlFull = urlFull[:len(urlFull)-1] + if hash != "" { + hash = hash[:len(hash)-1] + } else if subpath != "" { + subpath = subpath[:len(subpath)-1] + } } - } - if subpath != "" { - text += subpath - } + if subpath != "" { + text += subpath + } - if hash != "" { - text += " (" + hash + ")" - } + if hash != "" { + text += " (" + hash + ")" + } - replaceContent(node, start, end, createCodeLink(urlFull, text, "commit")) + replaceContent(node, start, end, createCodeLink(urlFull, text, "commit")) + node = node.NextSibling.NextSibling + } } // emojiShortCodeProcessor for rendering text like :smile: into emoji func emojiShortCodeProcessor(ctx *postProcessCtx, node *html.Node) { - - m := EmojiShortCodeRegex.FindStringSubmatchIndex(node.Data) - if m == nil { - return - } - - alias := node.Data[m[0]:m[1]] - alias = strings.ReplaceAll(alias, ":", "") - converted := emoji.FromAlias(alias) - if converted == nil { - // check if this is a custom reaction - s := strings.Join(setting.UI.Reactions, " ") + "gitea" - if strings.Contains(s, alias) { - replaceContent(node, m[0], m[1], createCustomEmoji(alias, "emoji")) + start := 0 + next := node.NextSibling + for node != nil && node != next && start < len(node.Data) { + m := EmojiShortCodeRegex.FindStringSubmatchIndex(node.Data[start:]) + if m == nil { return } - return - } + m[0] += start + m[1] += start + + start = m[1] + + alias := node.Data[m[0]:m[1]] + alias = strings.ReplaceAll(alias, ":", "") + converted := emoji.FromAlias(alias) + if converted == nil { + // check if this is a custom reaction + s := strings.Join(setting.UI.Reactions, " ") + "gitea" + if strings.Contains(s, alias) { + replaceContent(node, m[0], m[1], createCustomEmoji(alias, "emoji")) + node = node.NextSibling.NextSibling + start = 0 + continue + } + continue + } - replaceContent(node, m[0], m[1], createEmoji(converted.Emoji, "emoji", converted.Description)) + replaceContent(node, m[0], m[1], createEmoji(converted.Emoji, "emoji", converted.Description)) + node = node.NextSibling.NextSibling + start = 0 + } } // emoji processor to match emoji and add emoji class func emojiProcessor(ctx *postProcessCtx, node *html.Node) { - m := emoji.FindEmojiSubmatchIndex(node.Data) - if m == nil { - return - } - - codepoint := node.Data[m[0]:m[1]] - val := emoji.FromCode(codepoint) - if val != nil { - replaceContent(node, m[0], m[1], createEmoji(codepoint, "emoji", val.Description)) + start := 0 + next := node.NextSibling + for node != nil && node != next && start < len(node.Data) { + m := emoji.FindEmojiSubmatchIndex(node.Data[start:]) + if m == nil { + return + } + m[0] += start + m[1] += start + + codepoint := node.Data[m[0]:m[1]] + start = m[1] + val := emoji.FromCode(codepoint) + if val != nil { + replaceContent(node, m[0], m[1], createEmoji(codepoint, "emoji", val.Description)) + node = node.NextSibling.NextSibling + start = 0 + } } } @@ -982,49 +1031,69 @@ func sha1CurrentPatternProcessor(ctx *postProcessCtx, node *html.Node) { if ctx.metas == nil || ctx.metas["user"] == "" || ctx.metas["repo"] == "" || ctx.metas["repoPath"] == "" { return } - m := sha1CurrentPattern.FindStringSubmatchIndex(node.Data) - if m == nil { - return - } - hash := node.Data[m[2]:m[3]] - // The regex does not lie, it matches the hash pattern. - // However, a regex cannot know if a hash actually exists or not. - // We could assume that a SHA1 hash should probably contain alphas AND numerics - // but that is not always the case. - // Although unlikely, deadbeef and 1234567 are valid short forms of SHA1 hash - // as used by git and github for linking and thus we have to do similar. - // Because of this, we check to make sure that a matched hash is actually - // a commit in the repository before making it a link. - if _, err := git.NewCommand("rev-parse", "--verify", hash).RunInDirBytes(ctx.metas["repoPath"]); err != nil { - if !strings.Contains(err.Error(), "fatal: Needed a single revision") { - log.Debug("sha1CurrentPatternProcessor git rev-parse: %v", err) + + start := 0 + next := node.NextSibling + for node != nil && node != next && start < len(node.Data) { + m := sha1CurrentPattern.FindStringSubmatchIndex(node.Data[start:]) + if m == nil { + return + } + m[2] += start + m[3] += start + + hash := node.Data[m[2]:m[3]] + // The regex does not lie, it matches the hash pattern. + // However, a regex cannot know if a hash actually exists or not. + // We could assume that a SHA1 hash should probably contain alphas AND numerics + // but that is not always the case. + // Although unlikely, deadbeef and 1234567 are valid short forms of SHA1 hash + // as used by git and github for linking and thus we have to do similar. + // Because of this, we check to make sure that a matched hash is actually + // a commit in the repository before making it a link. + if _, err := git.NewCommand("rev-parse", "--verify", hash).RunInDirBytes(ctx.metas["repoPath"]); err != nil { + if !strings.Contains(err.Error(), "fatal: Needed a single revision") { + log.Debug("sha1CurrentPatternProcessor git rev-parse: %v", err) + } + start = m[3] + continue } - return - } - replaceContent(node, m[2], m[3], - createCodeLink(util.URLJoin(setting.AppURL, ctx.metas["user"], ctx.metas["repo"], "commit", hash), base.ShortSha(hash), "commit")) + replaceContent(node, m[2], m[3], + createCodeLink(util.URLJoin(setting.AppURL, ctx.metas["user"], ctx.metas["repo"], "commit", hash), base.ShortSha(hash), "commit")) + start = 0 + node = node.NextSibling.NextSibling + } } // emailAddressProcessor replaces raw email addresses with a mailto: link. func emailAddressProcessor(ctx *postProcessCtx, node *html.Node) { - m := emailRegex.FindStringSubmatchIndex(node.Data) - if m == nil { - return + next := node.NextSibling + for node != nil && node != next { + m := emailRegex.FindStringSubmatchIndex(node.Data) + if m == nil { + return + } + + mail := node.Data[m[2]:m[3]] + replaceContent(node, m[2], m[3], createLink("mailto:"+mail, mail, "mailto")) + node = node.NextSibling.NextSibling } - mail := node.Data[m[2]:m[3]] - replaceContent(node, m[2], m[3], createLink("mailto:"+mail, mail, "mailto")) } // linkProcessor creates links for any HTTP or HTTPS URL not captured by // markdown. func linkProcessor(ctx *postProcessCtx, node *html.Node) { - m := common.LinkRegex.FindStringIndex(node.Data) - if m == nil { - return + next := node.NextSibling + for node != nil && node != next { + m := common.LinkRegex.FindStringIndex(node.Data) + if m == nil { + return + } + uri := node.Data[m[0]:m[1]] + replaceContent(node, m[0], m[1], createLink(uri, uri, "link")) + node = node.NextSibling.NextSibling } - uri := node.Data[m[0]:m[1]] - replaceContent(node, m[0], m[1], createLink(uri, uri, "link")) } func genDefaultLinkProcessor(defaultLink string) processor { @@ -1048,12 +1117,17 @@ func genDefaultLinkProcessor(defaultLink string) processor { // descriptionLinkProcessor creates links for DescriptionHTML func descriptionLinkProcessor(ctx *postProcessCtx, node *html.Node) { - m := common.LinkRegex.FindStringIndex(node.Data) - if m == nil { - return + next := node.NextSibling + for node != nil && node != next { + m := common.LinkRegex.FindStringIndex(node.Data) + if m == nil { + return + } + + uri := node.Data[m[0]:m[1]] + replaceContent(node, m[0], m[1], createDescriptionLink(uri, uri)) + node = node.NextSibling.NextSibling } - uri := node.Data[m[0]:m[1]] - replaceContent(node, m[0], m[1], createDescriptionLink(uri, uri)) } func createDescriptionLink(href, content string) *html.Node { diff --git a/modules/markup/html_test.go b/modules/markup/html_test.go index aab223085..4d6344a72 100644 --- a/modules/markup/html_test.go +++ b/modules/markup/html_test.go @@ -425,3 +425,19 @@ func TestIssue16020(t *testing.T) { assert.NoError(t, err) assert.Equal(t, data, string(res)) } + +func BenchmarkEmojiPostprocess(b *testing.B) { + data := "🥰 " + for len(data) < 1<<16 { + data += data + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := PostProcess( + []byte(data), + "https://example.com", + localMetas, + false) + assert.NoError(b, err) + } +}