1// Package markup is an umbrella package for all markups and their renderers.
2package markup
3
4import (
5 "bytes"
6 "fmt"
7 "io"
8 "io/fs"
9 "net/url"
10 "path"
11 "strings"
12
13 chromahtml "github.com/alecthomas/chroma/v2/formatters/html"
14 "github.com/alecthomas/chroma/v2/styles"
15 treeblood "github.com/wyatt915/goldmark-treeblood"
16 "github.com/yuin/goldmark"
17 highlighting "github.com/yuin/goldmark-highlighting/v2"
18 "github.com/yuin/goldmark/ast"
19 "github.com/yuin/goldmark/extension"
20 "github.com/yuin/goldmark/parser"
21 "github.com/yuin/goldmark/renderer/html"
22 "github.com/yuin/goldmark/text"
23 "github.com/yuin/goldmark/util"
24 callout "gitlab.com/staticnoise/goldmark-callout"
25 htmlparse "golang.org/x/net/html"
26
27 "tangled.org/core/api/tangled"
28 textension "tangled.org/core/appview/pages/markup/extension"
29 "tangled.org/core/appview/pages/repoinfo"
30)
31
32// RendererType defines the type of renderer to use based on context
33type RendererType int
34
35const (
36 // RendererTypeRepoMarkdown is for repository documentation markdown files
37 RendererTypeRepoMarkdown RendererType = iota
38 // RendererTypeDefault is non-repo markdown, like issues/pulls/comments.
39 RendererTypeDefault
40)
41
42// RenderContext holds the contextual data for rendering markdown.
43// It can be initialized empty, and that'll skip any transformations.
44type RenderContext struct {
45 CamoUrl string
46 CamoSecret string
47 repoinfo.RepoInfo
48 IsDev bool
49 RendererType RendererType
50 Sanitizer Sanitizer
51 Files fs.FS
52}
53
54func NewMarkdown() goldmark.Markdown {
55 md := goldmark.New(
56 goldmark.WithExtensions(
57 extension.GFM,
58 highlighting.NewHighlighting(
59 highlighting.WithFormatOptions(
60 chromahtml.Standalone(false),
61 chromahtml.WithClasses(true),
62 ),
63 highlighting.WithCustomStyle(styles.Get("catppuccin-latte")),
64 ),
65 extension.NewFootnote(
66 extension.WithFootnoteIDPrefix([]byte("footnote")),
67 ),
68 treeblood.MathML(),
69 callout.CalloutExtention,
70 textension.AtExt,
71 ),
72 goldmark.WithParserOptions(
73 parser.WithAutoHeadingID(),
74 ),
75 goldmark.WithRendererOptions(html.WithUnsafe()),
76 )
77 return md
78}
79
80func (rctx *RenderContext) RenderMarkdown(source string) string {
81 return rctx.RenderMarkdownWith(source, NewMarkdown())
82}
83
84func (rctx *RenderContext) RenderMarkdownWith(source string, md goldmark.Markdown) string {
85 if rctx != nil {
86 var transformers []util.PrioritizedValue
87
88 transformers = append(transformers, util.Prioritized(&MarkdownTransformer{rctx: rctx}, 10000))
89
90 md.Parser().AddOptions(
91 parser.WithASTTransformers(transformers...),
92 )
93 }
94
95 var buf bytes.Buffer
96 if err := md.Convert([]byte(source), &buf); err != nil {
97 return source
98 }
99
100 var processed strings.Builder
101 if err := postProcess(rctx, strings.NewReader(buf.String()), &processed); err != nil {
102 return source
103 }
104
105 return processed.String()
106}
107
108func postProcess(ctx *RenderContext, input io.Reader, output io.Writer) error {
109 node, err := htmlparse.Parse(io.MultiReader(
110 strings.NewReader("<html><body>"),
111 input,
112 strings.NewReader("</body></html>"),
113 ))
114 if err != nil {
115 return fmt.Errorf("failed to parse html: %w", err)
116 }
117
118 if node.Type == htmlparse.DocumentNode {
119 node = node.FirstChild
120 }
121
122 visitNode(ctx, node)
123
124 newNodes := make([]*htmlparse.Node, 0, 5)
125
126 if node.Data == "html" {
127 node = node.FirstChild
128 for node != nil && node.Data != "body" {
129 node = node.NextSibling
130 }
131 }
132 if node != nil {
133 if node.Data == "body" {
134 child := node.FirstChild
135 for child != nil {
136 newNodes = append(newNodes, child)
137 child = child.NextSibling
138 }
139 } else {
140 newNodes = append(newNodes, node)
141 }
142 }
143
144 for _, node := range newNodes {
145 if err := htmlparse.Render(output, node); err != nil {
146 return fmt.Errorf("failed to render processed html: %w", err)
147 }
148 }
149
150 return nil
151}
152
153func visitNode(ctx *RenderContext, node *htmlparse.Node) {
154 switch node.Type {
155 case htmlparse.ElementNode:
156 switch node.Data {
157 case "img", "source":
158 for i, attr := range node.Attr {
159 if attr.Key != "src" {
160 continue
161 }
162
163 camoUrl, _ := url.Parse(ctx.CamoUrl)
164 dstUrl, _ := url.Parse(attr.Val)
165 if dstUrl.Host != camoUrl.Host {
166 attr.Val = ctx.imageFromKnotTransformer(attr.Val)
167 attr.Val = ctx.camoImageLinkTransformer(attr.Val)
168 node.Attr[i] = attr
169 }
170 }
171 }
172
173 for n := node.FirstChild; n != nil; n = n.NextSibling {
174 visitNode(ctx, n)
175 }
176 default:
177 }
178}
179
180func (rctx *RenderContext) SanitizeDefault(html string) string {
181 return rctx.Sanitizer.SanitizeDefault(html)
182}
183
184func (rctx *RenderContext) SanitizeDescription(html string) string {
185 return rctx.Sanitizer.SanitizeDescription(html)
186}
187
188type MarkdownTransformer struct {
189 rctx *RenderContext
190}
191
192func (a *MarkdownTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) {
193 _ = ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
194 if !entering {
195 return ast.WalkContinue, nil
196 }
197
198 switch a.rctx.RendererType {
199 case RendererTypeRepoMarkdown:
200 switch n := n.(type) {
201 case *ast.Heading:
202 a.rctx.anchorHeadingTransformer(n)
203 case *ast.Link:
204 a.rctx.relativeLinkTransformer(n)
205 case *ast.Image:
206 a.rctx.imageFromKnotAstTransformer(n)
207 a.rctx.camoImageLinkAstTransformer(n)
208 }
209 case RendererTypeDefault:
210 switch n := n.(type) {
211 case *ast.Heading:
212 a.rctx.anchorHeadingTransformer(n)
213 case *ast.Image:
214 a.rctx.imageFromKnotAstTransformer(n)
215 a.rctx.camoImageLinkAstTransformer(n)
216 }
217 }
218
219 return ast.WalkContinue, nil
220 })
221}
222
223func (rctx *RenderContext) relativeLinkTransformer(link *ast.Link) {
224
225 dst := string(link.Destination)
226
227 if isAbsoluteUrl(dst) || isFragment(dst) || isMail(dst) {
228 return
229 }
230
231 actualPath := rctx.actualPath(dst)
232
233 newPath := path.Join("/", rctx.RepoInfo.FullName(), "tree", rctx.RepoInfo.Ref, actualPath)
234 link.Destination = []byte(newPath)
235}
236
237func (rctx *RenderContext) imageFromKnotTransformer(dst string) string {
238 if isAbsoluteUrl(dst) {
239 return dst
240 }
241
242 scheme := "https"
243 if rctx.IsDev {
244 scheme = "http"
245 }
246
247 actualPath := rctx.actualPath(dst)
248
249 repoName := fmt.Sprintf("%s/%s", rctx.RepoInfo.OwnerDid, rctx.RepoInfo.Name)
250
251 query := fmt.Sprintf("repo=%s&ref=%s&path=%s&raw=true",
252 url.PathEscape(repoName), url.PathEscape(rctx.RepoInfo.Ref), actualPath)
253
254 parsedURL := &url.URL{
255 Scheme: scheme,
256 Host: rctx.Knot,
257 Path: path.Join("/xrpc", tangled.RepoBlobNSID),
258 RawQuery: query,
259 }
260 newPath := parsedURL.String()
261 return newPath
262}
263
264func (rctx *RenderContext) imageFromKnotAstTransformer(img *ast.Image) {
265 dst := string(img.Destination)
266 img.Destination = []byte(rctx.imageFromKnotTransformer(dst))
267}
268
269func (rctx *RenderContext) anchorHeadingTransformer(h *ast.Heading) {
270 idGeneric, exists := h.AttributeString("id")
271 if !exists {
272 return // no id, nothing to do
273 }
274 id, ok := idGeneric.([]byte)
275 if !ok {
276 return
277 }
278
279 // create anchor link
280 anchor := ast.NewLink()
281 anchor.Destination = fmt.Appendf(nil, "#%s", string(id))
282 anchor.SetAttribute([]byte("class"), []byte("anchor"))
283
284 // create icon text
285 iconText := ast.NewString([]byte("#"))
286 anchor.AppendChild(anchor, iconText)
287
288 // set class on heading
289 h.SetAttribute([]byte("class"), []byte("heading"))
290
291 // append anchor to heading
292 h.AppendChild(h, anchor)
293}
294
295// actualPath decides when to join the file path with the
296// current repository directory (essentially only when the link
297// destination is relative. if it's absolute then we assume the
298// user knows what they're doing.)
299func (rctx *RenderContext) actualPath(dst string) string {
300 if path.IsAbs(dst) {
301 return dst
302 }
303
304 return path.Join(rctx.CurrentDir, dst)
305}
306
307// FindUserMentions returns Set of user handles from given markup soruce.
308// It doesn't guarntee unique DIDs
309func FindUserMentions(source string) []string {
310 var (
311 mentions []string
312 mentionsSet = make(map[string]struct{})
313 md = NewMarkdown()
314 sourceBytes = []byte(source)
315 root = md.Parser().Parse(text.NewReader(sourceBytes))
316 )
317 ast.Walk(root, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
318 if entering && n.Kind() == textension.KindAt {
319 handle := n.(*textension.AtNode).Handle
320 mentionsSet[handle] = struct{}{}
321 return ast.WalkSkipChildren, nil
322 }
323 return ast.WalkContinue, nil
324 })
325 for handle := range mentionsSet {
326 mentions = append(mentions, handle)
327 }
328 return mentions
329}
330
331func isAbsoluteUrl(link string) bool {
332 parsed, err := url.Parse(link)
333 if err != nil {
334 return false
335 }
336 return parsed.IsAbs()
337}
338
339func isFragment(link string) bool {
340 return strings.HasPrefix(link, "#")
341}
342
343func isMail(link string) bool {
344 return strings.HasPrefix(link, "mailto:")
345}