// Copyright 2019 The Gitea Authors. All rights reserved. // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. package mdstripper import ( "bytes" "sync" "io" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/markup/common" "github.com/yuin/goldmark" "github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/extension" "github.com/yuin/goldmark/parser" "github.com/yuin/goldmark/renderer" "github.com/yuin/goldmark/renderer/html" "github.com/yuin/goldmark/text" ) type stripRenderer struct { links []string empty bool } func (r *stripRenderer) Render(w io.Writer, source []byte, doc ast.Node) error { return ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) { if !entering { return ast.WalkContinue, nil } switch v := n.(type) { case *ast.Text: if !v.IsRaw() { _, prevSibIsText := n.PreviousSibling().(*ast.Text) coalesce := prevSibIsText r.processString( w, v.Text(source), coalesce) if v.SoftLineBreak() { r.doubleSpace(w) } } return ast.WalkContinue, nil case *ast.Link: r.processLink(w, v.Destination) return ast.WalkSkipChildren, nil case *ast.AutoLink: r.processLink(w, v.URL(source)) return ast.WalkSkipChildren, nil } return ast.WalkContinue, nil }) } func (r *stripRenderer) doubleSpace(w io.Writer) { if !r.empty { _, _ = w.Write([]byte{'\n'}) } } func (r *stripRenderer) processString(w io.Writer, text []byte, coalesce bool) { // Always break-up words if !coalesce { r.doubleSpace(w) } _, _ = w.Write(text) r.empty = false } func (r *stripRenderer) processLink(w io.Writer, link []byte) { // Links are processed out of band r.links = append(r.links, string(link)) } // GetLinks returns the list of link data collected while parsing func (r *stripRenderer) GetLinks() []string { return r.links } // AddOptions adds given option to this renderer. func (r *stripRenderer) AddOptions(...renderer.Option) { // no-op } // StripMarkdown parses markdown content by removing all markup and code blocks // in order to extract links and other references func StripMarkdown(rawBytes []byte) (string, []string) { buf, links := StripMarkdownBytes(rawBytes) return string(buf), links } var stripParser parser.Parser var once = sync.Once{} // StripMarkdownBytes parses markdown content by removing all markup and code blocks // in order to extract links and other references func StripMarkdownBytes(rawBytes []byte) ([]byte, []string) { once.Do(func() { gdMarkdown := goldmark.New( goldmark.WithExtensions(extension.Table, extension.Strikethrough, extension.TaskList, extension.DefinitionList, common.FootnoteExtension, common.Linkify, ), goldmark.WithParserOptions( parser.WithAttribute(), parser.WithAutoHeadingID(), ), goldmark.WithRendererOptions( html.WithUnsafe(), ), ) stripParser = gdMarkdown.Parser() }) stripper := &stripRenderer{ links: make([]string, 0, 10), empty: true, } reader := text.NewReader(rawBytes) doc := stripParser.Parse(reader) var buf bytes.Buffer if err := stripper.Render(&buf, rawBytes, doc); err != nil { log.Error("Unable to strip: %v", err) } return buf.Bytes(), stripper.GetLinks() }