1// Package markup is an umbrella package for all markups and their renderers.
2package markup
3
4import (
5 "bytes"
6 "fmt"
7 "io"
8 "net/url"
9 "path"
10 "strings"
11
12 chromahtml "github.com/alecthomas/chroma/v2/formatters/html"
13 "github.com/alecthomas/chroma/v2/styles"
14 treeblood "github.com/wyatt915/goldmark-treeblood"
15 "github.com/yuin/goldmark"
16 highlighting "github.com/yuin/goldmark-highlighting/v2"
17 "github.com/yuin/goldmark/ast"
18 "github.com/yuin/goldmark/extension"
19 "github.com/yuin/goldmark/parser"
20 "github.com/yuin/goldmark/renderer/html"
21 "github.com/yuin/goldmark/text"
22 "github.com/yuin/goldmark/util"
23 htmlparse "golang.org/x/net/html"
24
25 "tangled.org/core/api/tangled"
26 "tangled.org/core/appview/pages/repoinfo"
27)
28
29// RendererType defines the type of renderer to use based on context
30type RendererType int
31
32const (
33 // RendererTypeRepoMarkdown is for repository documentation markdown files
34 RendererTypeRepoMarkdown RendererType = iota
35 // RendererTypeDefault is non-repo markdown, like issues/pulls/comments.
36 RendererTypeDefault
37)
38
39// RenderContext holds the contextual data for rendering markdown.
40// It can be initialized empty, and that'll skip any transformations.
41type RenderContext struct {
42 CamoUrl string
43 CamoSecret string
44 repoinfo.RepoInfo
45 IsDev bool
46 RendererType RendererType
47 Sanitizer Sanitizer
48}
49
50func (rctx *RenderContext) RenderMarkdown(source string) string {
51 md := goldmark.New(
52 goldmark.WithExtensions(
53 extension.GFM,
54 highlighting.NewHighlighting(
55 highlighting.WithFormatOptions(
56 chromahtml.Standalone(false),
57 chromahtml.WithClasses(true),
58 ),
59 highlighting.WithCustomStyle(styles.Get("catppuccin-latte")),
60 ),
61 extension.NewFootnote(
62 extension.WithFootnoteIDPrefix([]byte("footnote")),
63 ),
64 treeblood.MathML(),
65 ),
66 goldmark.WithParserOptions(
67 parser.WithAutoHeadingID(),
68 ),
69 goldmark.WithRendererOptions(html.WithUnsafe()),
70 )
71
72 if rctx != nil {
73 var transformers []util.PrioritizedValue
74
75 transformers = append(transformers, util.Prioritized(&MarkdownTransformer{rctx: rctx}, 10000))
76
77 md.Parser().AddOptions(
78 parser.WithASTTransformers(transformers...),
79 )
80 }
81
82 var buf bytes.Buffer
83 if err := md.Convert([]byte(source), &buf); err != nil {
84 return source
85 }
86
87 var processed strings.Builder
88 if err := postProcess(rctx, strings.NewReader(buf.String()), &processed); err != nil {
89 return source
90 }
91
92 return processed.String()
93}
94
95func postProcess(ctx *RenderContext, input io.Reader, output io.Writer) error {
96 node, err := htmlparse.Parse(io.MultiReader(
97 strings.NewReader("<html><body>"),
98 input,
99 strings.NewReader("</body></html>"),
100 ))
101 if err != nil {
102 return fmt.Errorf("failed to parse html: %w", err)
103 }
104
105 if node.Type == htmlparse.DocumentNode {
106 node = node.FirstChild
107 }
108
109 visitNode(ctx, node)
110
111 newNodes := make([]*htmlparse.Node, 0, 5)
112
113 if node.Data == "html" {
114 node = node.FirstChild
115 for node != nil && node.Data != "body" {
116 node = node.NextSibling
117 }
118 }
119 if node != nil {
120 if node.Data == "body" {
121 child := node.FirstChild
122 for child != nil {
123 newNodes = append(newNodes, child)
124 child = child.NextSibling
125 }
126 } else {
127 newNodes = append(newNodes, node)
128 }
129 }
130
131 for _, node := range newNodes {
132 if err := htmlparse.Render(output, node); err != nil {
133 return fmt.Errorf("failed to render processed html: %w", err)
134 }
135 }
136
137 return nil
138}
139
140func visitNode(ctx *RenderContext, node *htmlparse.Node) {
141 switch node.Type {
142 case htmlparse.ElementNode:
143 if node.Data == "img" || node.Data == "source" {
144 for i, attr := range node.Attr {
145 if attr.Key != "src" {
146 continue
147 }
148
149 camoUrl, _ := url.Parse(ctx.CamoUrl)
150 dstUrl, _ := url.Parse(attr.Val)
151 if dstUrl.Host != camoUrl.Host {
152 attr.Val = ctx.imageFromKnotTransformer(attr.Val)
153 attr.Val = ctx.camoImageLinkTransformer(attr.Val)
154 node.Attr[i] = attr
155 }
156 }
157 }
158
159 for n := node.FirstChild; n != nil; n = n.NextSibling {
160 visitNode(ctx, n)
161 }
162 default:
163 }
164}
165
166func (rctx *RenderContext) SanitizeDefault(html string) string {
167 return rctx.Sanitizer.SanitizeDefault(html)
168}
169
170func (rctx *RenderContext) SanitizeDescription(html string) string {
171 return rctx.Sanitizer.SanitizeDescription(html)
172}
173
174type MarkdownTransformer struct {
175 rctx *RenderContext
176}
177
178func (a *MarkdownTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) {
179 _ = ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
180 if !entering {
181 return ast.WalkContinue, nil
182 }
183
184 switch a.rctx.RendererType {
185 case RendererTypeRepoMarkdown:
186 switch n := n.(type) {
187 case *ast.Heading:
188 a.rctx.anchorHeadingTransformer(n)
189 case *ast.Link:
190 a.rctx.relativeLinkTransformer(n)
191 case *ast.Image:
192 a.rctx.imageFromKnotAstTransformer(n)
193 a.rctx.camoImageLinkAstTransformer(n)
194 }
195 case RendererTypeDefault:
196 switch n := n.(type) {
197 case *ast.Heading:
198 a.rctx.anchorHeadingTransformer(n)
199 case *ast.Image:
200 a.rctx.imageFromKnotAstTransformer(n)
201 a.rctx.camoImageLinkAstTransformer(n)
202 }
203 }
204
205 return ast.WalkContinue, nil
206 })
207}
208
209func (rctx *RenderContext) relativeLinkTransformer(link *ast.Link) {
210
211 dst := string(link.Destination)
212
213 if isAbsoluteUrl(dst) || isFragment(dst) || isMail(dst) {
214 return
215 }
216
217 actualPath := rctx.actualPath(dst)
218
219 newPath := path.Join("/", rctx.RepoInfo.FullName(), "tree", rctx.RepoInfo.Ref, actualPath)
220 link.Destination = []byte(newPath)
221}
222
223func (rctx *RenderContext) imageFromKnotTransformer(dst string) string {
224 if isAbsoluteUrl(dst) {
225 return dst
226 }
227
228 scheme := "https"
229 if rctx.IsDev {
230 scheme = "http"
231 }
232
233 actualPath := rctx.actualPath(dst)
234
235 repoName := fmt.Sprintf("%s/%s", rctx.RepoInfo.OwnerDid, rctx.RepoInfo.Name)
236
237 query := fmt.Sprintf("repo=%s&ref=%s&path=%s&raw=true",
238 url.PathEscape(repoName), url.PathEscape(rctx.RepoInfo.Ref), actualPath)
239
240 parsedURL := &url.URL{
241 Scheme: scheme,
242 Host: rctx.Knot,
243 Path: path.Join("/xrpc", tangled.RepoBlobNSID),
244 RawQuery: query,
245 }
246 newPath := parsedURL.String()
247 return newPath
248}
249
250func (rctx *RenderContext) imageFromKnotAstTransformer(img *ast.Image) {
251 dst := string(img.Destination)
252 img.Destination = []byte(rctx.imageFromKnotTransformer(dst))
253}
254
255func (rctx *RenderContext) anchorHeadingTransformer(h *ast.Heading) {
256 idGeneric, exists := h.AttributeString("id")
257 if !exists {
258 return // no id, nothing to do
259 }
260 id, ok := idGeneric.([]byte)
261 if !ok {
262 return
263 }
264
265 // create anchor link
266 anchor := ast.NewLink()
267 anchor.Destination = fmt.Appendf(nil, "#%s", string(id))
268 anchor.SetAttribute([]byte("class"), []byte("anchor"))
269
270 // create icon text
271 iconText := ast.NewString([]byte("#"))
272 anchor.AppendChild(anchor, iconText)
273
274 // set class on heading
275 h.SetAttribute([]byte("class"), []byte("heading"))
276
277 // append anchor to heading
278 h.AppendChild(h, anchor)
279}
280
281// actualPath decides when to join the file path with the
282// current repository directory (essentially only when the link
283// destination is relative. if it's absolute then we assume the
284// user knows what they're doing.)
285func (rctx *RenderContext) actualPath(dst string) string {
286 if path.IsAbs(dst) {
287 return dst
288 }
289
290 return path.Join(rctx.CurrentDir, dst)
291}
292
293func isAbsoluteUrl(link string) bool {
294 parsed, err := url.Parse(link)
295 if err != nil {
296 return false
297 }
298 return parsed.IsAbs()
299}
300
301func isFragment(link string) bool {
302 return strings.HasPrefix(link, "#")
303}
304
305func isMail(link string) bool {
306 return strings.HasPrefix(link, "mailto:")
307}