forked from tangled.org/core
Monorepo for Tangled — https://tangled.org
1// Package markup is an umbrella package for all markups and their renderers. 2package markup 3 4import ( 5 "bytes" 6 "fmt" 7 "io" 8 "net/url" 9 "path" 10 "strings" 11 12 "github.com/microcosm-cc/bluemonday" 13 "github.com/yuin/goldmark" 14 "github.com/yuin/goldmark/ast" 15 "github.com/yuin/goldmark/extension" 16 "github.com/yuin/goldmark/parser" 17 "github.com/yuin/goldmark/renderer/html" 18 "github.com/yuin/goldmark/text" 19 "github.com/yuin/goldmark/util" 20 htmlparse "golang.org/x/net/html" 21 22 "tangled.sh/tangled.sh/core/appview/pages/repoinfo" 23) 24 25// RendererType defines the type of renderer to use based on context 26type RendererType int 27 28const ( 29 // RendererTypeRepoMarkdown is for repository documentation markdown files 30 RendererTypeRepoMarkdown RendererType = iota 31 // RendererTypeDefault is non-repo markdown, like issues/pulls/comments. 32 RendererTypeDefault 33) 34 35// RenderContext holds the contextual data for rendering markdown. 36// It can be initialized empty, and that'll skip any transformations. 37type RenderContext struct { 38 CamoUrl string 39 CamoSecret string 40 repoinfo.RepoInfo 41 IsDev bool 42 RendererType RendererType 43} 44 45func (rctx *RenderContext) RenderMarkdown(source string) string { 46 md := goldmark.New( 47 goldmark.WithExtensions(extension.GFM), 48 goldmark.WithParserOptions( 49 parser.WithAutoHeadingID(), 50 ), 51 goldmark.WithRendererOptions(html.WithUnsafe()), 52 ) 53 54 if rctx != nil { 55 var transformers []util.PrioritizedValue 56 57 transformers = append(transformers, util.Prioritized(&MarkdownTransformer{rctx: rctx}, 10000)) 58 59 md.Parser().AddOptions( 60 parser.WithASTTransformers(transformers...), 61 ) 62 } 63 64 var buf bytes.Buffer 65 if err := md.Convert([]byte(source), &buf); err != nil { 66 return source 67 } 68 69 var processed strings.Builder 70 if err := postProcess(rctx, strings.NewReader(buf.String()), &processed); err != nil { 71 return source 72 } 73 74 return processed.String() 75} 76 77func postProcess(ctx *RenderContext, input io.Reader, output io.Writer) error { 78 node, err := htmlparse.Parse(io.MultiReader( 79 strings.NewReader("<html><body>"), 80 input, 81 strings.NewReader("</body></html>"), 82 )) 83 if err != nil { 84 return fmt.Errorf("failed to parse html: %w", err) 85 } 86 87 if node.Type == htmlparse.DocumentNode { 88 node = node.FirstChild 89 } 90 91 visitNode(ctx, node) 92 93 newNodes := make([]*htmlparse.Node, 0, 5) 94 95 if node.Data == "html" { 96 node = node.FirstChild 97 for node != nil && node.Data != "body" { 98 node = node.NextSibling 99 } 100 } 101 if node != nil { 102 if node.Data == "body" { 103 child := node.FirstChild 104 for child != nil { 105 newNodes = append(newNodes, child) 106 child = child.NextSibling 107 } 108 } else { 109 newNodes = append(newNodes, node) 110 } 111 } 112 113 for _, node := range newNodes { 114 if err := htmlparse.Render(output, node); err != nil { 115 return fmt.Errorf("failed to render processed html: %w", err) 116 } 117 } 118 119 return nil 120} 121 122func visitNode(ctx *RenderContext, node *htmlparse.Node) { 123 switch node.Type { 124 case htmlparse.ElementNode: 125 if node.Data == "img" || node.Data == "source" { 126 for i, attr := range node.Attr { 127 if attr.Key != "src" { 128 continue 129 } 130 131 camoUrl, _ := url.Parse(ctx.CamoUrl) 132 dstUrl, _ := url.Parse(attr.Val) 133 if dstUrl.Host != camoUrl.Host { 134 attr.Val = ctx.imageFromKnotTransformer(attr.Val) 135 attr.Val = ctx.camoImageLinkTransformer(attr.Val) 136 node.Attr[i] = attr 137 } 138 } 139 } 140 141 for n := node.FirstChild; n != nil; n = n.NextSibling { 142 visitNode(ctx, n) 143 } 144 default: 145 } 146} 147 148func (rctx *RenderContext) Sanitize(html string) string { 149 policy := bluemonday.UGCPolicy() 150 151 // video 152 policy.AllowElements("video") 153 policy.AllowAttrs("controls").OnElements("video") 154 policy.AllowElements("source") 155 policy.AllowAttrs("src", "type").OnElements("source") 156 157 // centering content 158 policy.AllowElements("center") 159 160 policy.AllowAttrs("align", "style", "width", "height").Globally() 161 policy.AllowStyles( 162 "margin", 163 "padding", 164 "text-align", 165 "font-weight", 166 "text-decoration", 167 "padding-left", 168 "padding-right", 169 "padding-top", 170 "padding-bottom", 171 "margin-left", 172 "margin-right", 173 "margin-top", 174 "margin-bottom", 175 ) 176 return policy.Sanitize(html) 177} 178 179type MarkdownTransformer struct { 180 rctx *RenderContext 181} 182 183func (a *MarkdownTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) { 184 _ = ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) { 185 if !entering { 186 return ast.WalkContinue, nil 187 } 188 189 switch a.rctx.RendererType { 190 case RendererTypeRepoMarkdown: 191 switch n := n.(type) { 192 case *ast.Link: 193 a.rctx.relativeLinkTransformer(n) 194 case *ast.Image: 195 a.rctx.imageFromKnotAstTransformer(n) 196 a.rctx.camoImageLinkAstTransformer(n) 197 } 198 case RendererTypeDefault: 199 switch n := n.(type) { 200 case *ast.Image: 201 a.rctx.imageFromKnotAstTransformer(n) 202 a.rctx.camoImageLinkAstTransformer(n) 203 } 204 } 205 206 return ast.WalkContinue, nil 207 }) 208} 209 210func (rctx *RenderContext) relativeLinkTransformer(link *ast.Link) { 211 212 dst := string(link.Destination) 213 214 if isAbsoluteUrl(dst) { 215 return 216 } 217 218 actualPath := rctx.actualPath(dst) 219 220 newPath := path.Join("/", rctx.RepoInfo.FullName(), "tree", rctx.RepoInfo.Ref, actualPath) 221 link.Destination = []byte(newPath) 222} 223 224func (rctx *RenderContext) imageFromKnotTransformer(dst string) string { 225 if isAbsoluteUrl(dst) { 226 return dst 227 } 228 229 scheme := "https" 230 if rctx.IsDev { 231 scheme = "http" 232 } 233 234 actualPath := rctx.actualPath(dst) 235 236 parsedURL := &url.URL{ 237 Scheme: scheme, 238 Host: rctx.Knot, 239 Path: path.Join("/", 240 rctx.RepoInfo.OwnerDid, 241 rctx.RepoInfo.Name, 242 "raw", 243 url.PathEscape(rctx.RepoInfo.Ref), 244 actualPath), 245 } 246 newPath := parsedURL.String() 247 return newPath 248} 249 250func (rctx *RenderContext) imageFromKnotAstTransformer(img *ast.Image) { 251 dst := string(img.Destination) 252 img.Destination = []byte(rctx.imageFromKnotTransformer(dst)) 253} 254 255// actualPath decides when to join the file path with the 256// current repository directory (essentially only when the link 257// destination is relative. if it's absolute then we assume the 258// user knows what they're doing.) 259func (rctx *RenderContext) actualPath(dst string) string { 260 if path.IsAbs(dst) { 261 return dst 262 } 263 264 return path.Join(rctx.CurrentDir, dst) 265} 266 267func isAbsoluteUrl(link string) bool { 268 parsed, err := url.Parse(link) 269 if err != nil { 270 return false 271 } 272 return parsed.IsAbs() 273}