forked from tangled.org/core
Monorepo for Tangled — https://tangled.org
1// Package markup is an umbrella package for all markups and their renderers. 2package markup 3 4import ( 5 "bytes" 6 "fmt" 7 "io" 8 "io/fs" 9 "net/url" 10 "path" 11 "strings" 12 13 chromahtml "github.com/alecthomas/chroma/v2/formatters/html" 14 "github.com/alecthomas/chroma/v2/styles" 15 treeblood "github.com/wyatt915/goldmark-treeblood" 16 "github.com/yuin/goldmark" 17 highlighting "github.com/yuin/goldmark-highlighting/v2" 18 "github.com/yuin/goldmark/ast" 19 "github.com/yuin/goldmark/extension" 20 "github.com/yuin/goldmark/parser" 21 "github.com/yuin/goldmark/renderer/html" 22 "github.com/yuin/goldmark/text" 23 "github.com/yuin/goldmark/util" 24 callout "gitlab.com/staticnoise/goldmark-callout" 25 htmlparse "golang.org/x/net/html" 26 27 "tangled.org/core/api/tangled" 28 textension "tangled.org/core/appview/pages/markup/extension" 29 "tangled.org/core/appview/pages/repoinfo" 30) 31 32// RendererType defines the type of renderer to use based on context 33type RendererType int 34 35const ( 36 // RendererTypeRepoMarkdown is for repository documentation markdown files 37 RendererTypeRepoMarkdown RendererType = iota 38 // RendererTypeDefault is non-repo markdown, like issues/pulls/comments. 39 RendererTypeDefault 40) 41 42// RenderContext holds the contextual data for rendering markdown. 43// It can be initialized empty, and that'll skip any transformations. 44type RenderContext struct { 45 CamoUrl string 46 CamoSecret string 47 repoinfo.RepoInfo 48 IsDev bool 49 RendererType RendererType 50 Sanitizer Sanitizer 51 Files fs.FS 52} 53 54func NewMarkdown() goldmark.Markdown { 55 md := goldmark.New( 56 goldmark.WithExtensions( 57 extension.GFM, 58 highlighting.NewHighlighting( 59 highlighting.WithFormatOptions( 60 chromahtml.Standalone(false), 61 chromahtml.WithClasses(true), 62 ), 63 highlighting.WithCustomStyle(styles.Get("catppuccin-latte")), 64 ), 65 extension.NewFootnote( 66 extension.WithFootnoteIDPrefix([]byte("footnote")), 67 ), 68 treeblood.MathML(), 69 callout.CalloutExtention, 70 textension.AtExt, 71 ), 72 goldmark.WithParserOptions( 73 parser.WithAutoHeadingID(), 74 ), 75 goldmark.WithRendererOptions(html.WithUnsafe()), 76 ) 77 return md 78} 79 80func (rctx *RenderContext) RenderMarkdown(source string) string { 81 return rctx.RenderMarkdownWith(source, NewMarkdown()) 82} 83 84func (rctx *RenderContext) RenderMarkdownWith(source string, md goldmark.Markdown) string { 85 if rctx != nil { 86 var transformers []util.PrioritizedValue 87 88 transformers = append(transformers, util.Prioritized(&MarkdownTransformer{rctx: rctx}, 10000)) 89 90 md.Parser().AddOptions( 91 parser.WithASTTransformers(transformers...), 92 ) 93 } 94 95 var buf bytes.Buffer 96 if err := md.Convert([]byte(source), &buf); err != nil { 97 return source 98 } 99 100 var processed strings.Builder 101 if err := postProcess(rctx, strings.NewReader(buf.String()), &processed); err != nil { 102 return source 103 } 104 105 return processed.String() 106} 107 108func postProcess(ctx *RenderContext, input io.Reader, output io.Writer) error { 109 node, err := htmlparse.Parse(io.MultiReader( 110 strings.NewReader("<html><body>"), 111 input, 112 strings.NewReader("</body></html>"), 113 )) 114 if err != nil { 115 return fmt.Errorf("failed to parse html: %w", err) 116 } 117 118 if node.Type == htmlparse.DocumentNode { 119 node = node.FirstChild 120 } 121 122 visitNode(ctx, node) 123 124 newNodes := make([]*htmlparse.Node, 0, 5) 125 126 if node.Data == "html" { 127 node = node.FirstChild 128 for node != nil && node.Data != "body" { 129 node = node.NextSibling 130 } 131 } 132 if node != nil { 133 if node.Data == "body" { 134 child := node.FirstChild 135 for child != nil { 136 newNodes = append(newNodes, child) 137 child = child.NextSibling 138 } 139 } else { 140 newNodes = append(newNodes, node) 141 } 142 } 143 144 for _, node := range newNodes { 145 if err := htmlparse.Render(output, node); err != nil { 146 return fmt.Errorf("failed to render processed html: %w", err) 147 } 148 } 149 150 return nil 151} 152 153func visitNode(ctx *RenderContext, node *htmlparse.Node) { 154 switch node.Type { 155 case htmlparse.ElementNode: 156 switch node.Data { 157 case "img", "source": 158 for i, attr := range node.Attr { 159 if attr.Key != "src" { 160 continue 161 } 162 163 camoUrl, _ := url.Parse(ctx.CamoUrl) 164 dstUrl, _ := url.Parse(attr.Val) 165 if dstUrl.Host != camoUrl.Host { 166 attr.Val = ctx.imageFromKnotTransformer(attr.Val) 167 attr.Val = ctx.camoImageLinkTransformer(attr.Val) 168 node.Attr[i] = attr 169 } 170 } 171 } 172 173 for n := node.FirstChild; n != nil; n = n.NextSibling { 174 visitNode(ctx, n) 175 } 176 default: 177 } 178} 179 180func (rctx *RenderContext) SanitizeDefault(html string) string { 181 return rctx.Sanitizer.SanitizeDefault(html) 182} 183 184func (rctx *RenderContext) SanitizeDescription(html string) string { 185 return rctx.Sanitizer.SanitizeDescription(html) 186} 187 188type MarkdownTransformer struct { 189 rctx *RenderContext 190} 191 192func (a *MarkdownTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) { 193 _ = ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) { 194 if !entering { 195 return ast.WalkContinue, nil 196 } 197 198 switch a.rctx.RendererType { 199 case RendererTypeRepoMarkdown: 200 switch n := n.(type) { 201 case *ast.Heading: 202 a.rctx.anchorHeadingTransformer(n) 203 case *ast.Link: 204 a.rctx.relativeLinkTransformer(n) 205 case *ast.Image: 206 a.rctx.imageFromKnotAstTransformer(n) 207 a.rctx.camoImageLinkAstTransformer(n) 208 } 209 case RendererTypeDefault: 210 switch n := n.(type) { 211 case *ast.Heading: 212 a.rctx.anchorHeadingTransformer(n) 213 case *ast.Image: 214 a.rctx.imageFromKnotAstTransformer(n) 215 a.rctx.camoImageLinkAstTransformer(n) 216 } 217 } 218 219 return ast.WalkContinue, nil 220 }) 221} 222 223func (rctx *RenderContext) relativeLinkTransformer(link *ast.Link) { 224 225 dst := string(link.Destination) 226 227 if isAbsoluteUrl(dst) || isFragment(dst) || isMail(dst) { 228 return 229 } 230 231 actualPath := rctx.actualPath(dst) 232 233 newPath := path.Join("/", rctx.RepoInfo.FullName(), "tree", rctx.RepoInfo.Ref, actualPath) 234 link.Destination = []byte(newPath) 235} 236 237func (rctx *RenderContext) imageFromKnotTransformer(dst string) string { 238 if isAbsoluteUrl(dst) { 239 return dst 240 } 241 242 scheme := "https" 243 if rctx.IsDev { 244 scheme = "http" 245 } 246 247 actualPath := rctx.actualPath(dst) 248 249 repoName := fmt.Sprintf("%s/%s", rctx.RepoInfo.OwnerDid, rctx.RepoInfo.Name) 250 251 query := fmt.Sprintf("repo=%s&ref=%s&path=%s&raw=true", 252 url.PathEscape(repoName), url.PathEscape(rctx.RepoInfo.Ref), actualPath) 253 254 parsedURL := &url.URL{ 255 Scheme: scheme, 256 Host: rctx.Knot, 257 Path: path.Join("/xrpc", tangled.RepoBlobNSID), 258 RawQuery: query, 259 } 260 newPath := parsedURL.String() 261 return newPath 262} 263 264func (rctx *RenderContext) imageFromKnotAstTransformer(img *ast.Image) { 265 dst := string(img.Destination) 266 img.Destination = []byte(rctx.imageFromKnotTransformer(dst)) 267} 268 269func (rctx *RenderContext) anchorHeadingTransformer(h *ast.Heading) { 270 idGeneric, exists := h.AttributeString("id") 271 if !exists { 272 return // no id, nothing to do 273 } 274 id, ok := idGeneric.([]byte) 275 if !ok { 276 return 277 } 278 279 // create anchor link 280 anchor := ast.NewLink() 281 anchor.Destination = fmt.Appendf(nil, "#%s", string(id)) 282 anchor.SetAttribute([]byte("class"), []byte("anchor")) 283 284 // create icon text 285 iconText := ast.NewString([]byte("#")) 286 anchor.AppendChild(anchor, iconText) 287 288 // set class on heading 289 h.SetAttribute([]byte("class"), []byte("heading")) 290 291 // append anchor to heading 292 h.AppendChild(h, anchor) 293} 294 295// actualPath decides when to join the file path with the 296// current repository directory (essentially only when the link 297// destination is relative. if it's absolute then we assume the 298// user knows what they're doing.) 299func (rctx *RenderContext) actualPath(dst string) string { 300 if path.IsAbs(dst) { 301 return dst 302 } 303 304 return path.Join(rctx.CurrentDir, dst) 305} 306 307// FindUserMentions returns Set of user handles from given markup soruce. 308// It doesn't guarntee unique DIDs 309func FindUserMentions(source string) []string { 310 var ( 311 mentions []string 312 mentionsSet = make(map[string]struct{}) 313 md = NewMarkdown() 314 sourceBytes = []byte(source) 315 root = md.Parser().Parse(text.NewReader(sourceBytes)) 316 ) 317 ast.Walk(root, func(n ast.Node, entering bool) (ast.WalkStatus, error) { 318 if entering && n.Kind() == textension.KindAt { 319 handle := n.(*textension.AtNode).Handle 320 mentionsSet[handle] = struct{}{} 321 return ast.WalkSkipChildren, nil 322 } 323 return ast.WalkContinue, nil 324 }) 325 for handle := range mentionsSet { 326 mentions = append(mentions, handle) 327 } 328 return mentions 329} 330 331func isAbsoluteUrl(link string) bool { 332 parsed, err := url.Parse(link) 333 if err != nil { 334 return false 335 } 336 return parsed.IsAbs() 337} 338 339func isFragment(link string) bool { 340 return strings.HasPrefix(link, "#") 341} 342 343func isMail(link string) bool { 344 return strings.HasPrefix(link, "mailto:") 345}