forked from
tangled.org/core
Monorepo for Tangled — https://tangled.org
1// Package markup is an umbrella package for all markups and their renderers.
2package markup
3
4import (
5 "bytes"
6 "fmt"
7 "io"
8 "net/url"
9 "path"
10 "strings"
11
12 "github.com/microcosm-cc/bluemonday"
13 "github.com/yuin/goldmark"
14 "github.com/yuin/goldmark/ast"
15 "github.com/yuin/goldmark/extension"
16 "github.com/yuin/goldmark/parser"
17 "github.com/yuin/goldmark/renderer/html"
18 "github.com/yuin/goldmark/text"
19 "github.com/yuin/goldmark/util"
20 htmlparse "golang.org/x/net/html"
21
22 "tangled.sh/tangled.sh/core/appview/pages/repoinfo"
23)
24
25// RendererType defines the type of renderer to use based on context
26type RendererType int
27
28const (
29 // RendererTypeRepoMarkdown is for repository documentation markdown files
30 RendererTypeRepoMarkdown RendererType = iota
31 // RendererTypeDefault is non-repo markdown, like issues/pulls/comments.
32 RendererTypeDefault
33)
34
35// RenderContext holds the contextual data for rendering markdown.
36// It can be initialized empty, and that'll skip any transformations.
37type RenderContext struct {
38 CamoUrl string
39 CamoSecret string
40 repoinfo.RepoInfo
41 IsDev bool
42 RendererType RendererType
43}
44
45func (rctx *RenderContext) RenderMarkdown(source string) string {
46 md := goldmark.New(
47 goldmark.WithExtensions(extension.GFM),
48 goldmark.WithParserOptions(
49 parser.WithAutoHeadingID(),
50 ),
51 goldmark.WithRendererOptions(html.WithUnsafe()),
52 )
53
54 if rctx != nil {
55 var transformers []util.PrioritizedValue
56
57 transformers = append(transformers, util.Prioritized(&MarkdownTransformer{rctx: rctx}, 10000))
58
59 md.Parser().AddOptions(
60 parser.WithASTTransformers(transformers...),
61 )
62 }
63
64 var buf bytes.Buffer
65 if err := md.Convert([]byte(source), &buf); err != nil {
66 return source
67 }
68
69 var processed strings.Builder
70 if err := postProcess(rctx, strings.NewReader(buf.String()), &processed); err != nil {
71 return source
72 }
73
74 return processed.String()
75}
76
77func postProcess(ctx *RenderContext, input io.Reader, output io.Writer) error {
78 node, err := htmlparse.Parse(io.MultiReader(
79 strings.NewReader("<html><body>"),
80 input,
81 strings.NewReader("</body></html>"),
82 ))
83 if err != nil {
84 return fmt.Errorf("failed to parse html: %w", err)
85 }
86
87 if node.Type == htmlparse.DocumentNode {
88 node = node.FirstChild
89 }
90
91 visitNode(ctx, node)
92
93 newNodes := make([]*htmlparse.Node, 0, 5)
94
95 if node.Data == "html" {
96 node = node.FirstChild
97 for node != nil && node.Data != "body" {
98 node = node.NextSibling
99 }
100 }
101 if node != nil {
102 if node.Data == "body" {
103 child := node.FirstChild
104 for child != nil {
105 newNodes = append(newNodes, child)
106 child = child.NextSibling
107 }
108 } else {
109 newNodes = append(newNodes, node)
110 }
111 }
112
113 for _, node := range newNodes {
114 if err := htmlparse.Render(output, node); err != nil {
115 return fmt.Errorf("failed to render processed html: %w", err)
116 }
117 }
118
119 return nil
120}
121
122func visitNode(ctx *RenderContext, node *htmlparse.Node) {
123 switch node.Type {
124 case htmlparse.ElementNode:
125 if node.Data == "img" || node.Data == "source" {
126 for i, attr := range node.Attr {
127 if attr.Key != "src" {
128 continue
129 }
130
131 camoUrl, _ := url.Parse(ctx.CamoUrl)
132 dstUrl, _ := url.Parse(attr.Val)
133 if dstUrl.Host != camoUrl.Host {
134 attr.Val = ctx.imageFromKnotTransformer(attr.Val)
135 attr.Val = ctx.camoImageLinkTransformer(attr.Val)
136 node.Attr[i] = attr
137 }
138 }
139 }
140
141 for n := node.FirstChild; n != nil; n = n.NextSibling {
142 visitNode(ctx, n)
143 }
144 default:
145 }
146}
147
148func (rctx *RenderContext) Sanitize(html string) string {
149 policy := bluemonday.UGCPolicy()
150
151 // video
152 policy.AllowElements("video")
153 policy.AllowAttrs("controls").OnElements("video")
154 policy.AllowElements("source")
155 policy.AllowAttrs("src", "type").OnElements("source")
156
157 // centering content
158 policy.AllowElements("center")
159
160 policy.AllowAttrs("align", "style", "width", "height").Globally()
161 policy.AllowStyles(
162 "margin",
163 "padding",
164 "text-align",
165 "font-weight",
166 "text-decoration",
167 "padding-left",
168 "padding-right",
169 "padding-top",
170 "padding-bottom",
171 "margin-left",
172 "margin-right",
173 "margin-top",
174 "margin-bottom",
175 )
176 return policy.Sanitize(html)
177}
178
179type MarkdownTransformer struct {
180 rctx *RenderContext
181}
182
183func (a *MarkdownTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) {
184 _ = ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
185 if !entering {
186 return ast.WalkContinue, nil
187 }
188
189 switch a.rctx.RendererType {
190 case RendererTypeRepoMarkdown:
191 switch n := n.(type) {
192 case *ast.Link:
193 a.rctx.relativeLinkTransformer(n)
194 case *ast.Image:
195 a.rctx.imageFromKnotAstTransformer(n)
196 a.rctx.camoImageLinkAstTransformer(n)
197 }
198 case RendererTypeDefault:
199 switch n := n.(type) {
200 case *ast.Image:
201 a.rctx.imageFromKnotAstTransformer(n)
202 a.rctx.camoImageLinkAstTransformer(n)
203 }
204 }
205
206 return ast.WalkContinue, nil
207 })
208}
209
210func (rctx *RenderContext) relativeLinkTransformer(link *ast.Link) {
211
212 dst := string(link.Destination)
213
214 if isAbsoluteUrl(dst) {
215 return
216 }
217
218 actualPath := rctx.actualPath(dst)
219
220 newPath := path.Join("/", rctx.RepoInfo.FullName(), "tree", rctx.RepoInfo.Ref, actualPath)
221 link.Destination = []byte(newPath)
222}
223
224func (rctx *RenderContext) imageFromKnotTransformer(dst string) string {
225 if isAbsoluteUrl(dst) {
226 return dst
227 }
228
229 scheme := "https"
230 if rctx.IsDev {
231 scheme = "http"
232 }
233
234 actualPath := rctx.actualPath(dst)
235
236 parsedURL := &url.URL{
237 Scheme: scheme,
238 Host: rctx.Knot,
239 Path: path.Join("/",
240 rctx.RepoInfo.OwnerDid,
241 rctx.RepoInfo.Name,
242 "raw",
243 url.PathEscape(rctx.RepoInfo.Ref),
244 actualPath),
245 }
246 newPath := parsedURL.String()
247 return newPath
248}
249
250func (rctx *RenderContext) imageFromKnotAstTransformer(img *ast.Image) {
251 dst := string(img.Destination)
252 img.Destination = []byte(rctx.imageFromKnotTransformer(dst))
253}
254
255// actualPath decides when to join the file path with the
256// current repository directory (essentially only when the link
257// destination is relative. if it's absolute then we assume the
258// user knows what they're doing.)
259func (rctx *RenderContext) actualPath(dst string) string {
260 if path.IsAbs(dst) {
261 return dst
262 }
263
264 return path.Join(rctx.CurrentDir, dst)
265}
266
267func isAbsoluteUrl(link string) bool {
268 parsed, err := url.Parse(link)
269 if err != nil {
270 return false
271 }
272 return parsed.IsAbs()
273}