forked from
tangled.org/core
Monorepo for Tangled — https://tangled.org
1package markup
2
3import (
4 "maps"
5 "regexp"
6 "slices"
7 "strings"
8
9 "github.com/alecthomas/chroma/v2"
10 "github.com/microcosm-cc/bluemonday"
11)
12
13type Sanitizer struct {
14 defaultPolicy *bluemonday.Policy
15 descriptionPolicy *bluemonday.Policy
16}
17
18func NewSanitizer() Sanitizer {
19 return Sanitizer{
20 defaultPolicy: defaultPolicy(),
21 descriptionPolicy: descriptionPolicy(),
22 }
23}
24
25func (s *Sanitizer) SanitizeDefault(html string) string {
26 return s.defaultPolicy.Sanitize(html)
27}
28func (s *Sanitizer) SanitizeDescription(html string) string {
29 return s.descriptionPolicy.Sanitize(html)
30}
31
32func defaultPolicy() *bluemonday.Policy {
33 policy := bluemonday.UGCPolicy()
34
35 // Allow generally safe attributes
36 generalSafeAttrs := []string{
37 "abbr", "accept", "accept-charset",
38 "accesskey", "action", "align", "alt",
39 "aria-describedby", "aria-hidden", "aria-label", "aria-labelledby",
40 "axis", "border", "cellpadding", "cellspacing", "char",
41 "charoff", "charset", "checked",
42 "clear", "cols", "colspan", "color",
43 "compact", "coords", "datetime", "dir",
44 "disabled", "enctype", "for", "frame",
45 "headers", "height", "hreflang",
46 "hspace", "ismap", "label", "lang",
47 "maxlength", "media", "method",
48 "multiple", "name", "nohref", "noshade",
49 "nowrap", "open", "prompt", "readonly", "rel", "rev",
50 "rows", "rowspan", "rules", "scope",
51 "selected", "shape", "size", "span",
52 "start", "summary", "tabindex", "target",
53 "title", "type", "usemap", "valign", "value",
54 "vspace", "width", "itemprop",
55 }
56
57 generalSafeElements := []string{
58 "h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "br", "b", "i", "strong", "em", "a", "pre", "code", "img", "tt",
59 "div", "ins", "del", "sup", "sub", "p", "ol", "ul", "table", "thead", "tbody", "tfoot", "blockquote", "label",
60 "dl", "dt", "dd", "kbd", "q", "samp", "var", "hr", "ruby", "rt", "rp", "li", "tr", "td", "th", "s", "strike", "summary",
61 "details", "caption", "figure", "figcaption",
62 "abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "video", "wbr",
63 }
64
65 policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...)
66
67 // video
68 policy.AllowAttrs("src", "autoplay", "controls").OnElements("video")
69
70 // checkboxes
71 policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
72 policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input")
73
74 // for code blocks
75 policy.AllowAttrs("class").Matching(regexp.MustCompile(`chroma`)).OnElements("pre")
76 policy.AllowAttrs("class").Matching(regexp.MustCompile(`anchor|footnote-ref|footnote-backref`)).OnElements("a")
77 policy.AllowAttrs("class").Matching(regexp.MustCompile(`heading`)).OnElements("h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8")
78 policy.AllowAttrs("class").Matching(regexp.MustCompile(strings.Join(slices.Collect(maps.Values(chroma.StandardTypes)), "|"))).OnElements("span")
79
80 // centering content
81 policy.AllowElements("center")
82
83 policy.AllowAttrs("align", "style", "width", "height").Globally()
84 policy.AllowStyles(
85 "margin",
86 "padding",
87 "text-align",
88 "font-weight",
89 "text-decoration",
90 "padding-left",
91 "padding-right",
92 "padding-top",
93 "padding-bottom",
94 "margin-left",
95 "margin-right",
96 "margin-top",
97 "margin-bottom",
98 )
99
100 // math
101 mathAttrs := []string{
102 "accent", "columnalign", "columnlines", "columnspan", "dir", "display",
103 "displaystyle", "encoding", "fence", "form", "largeop", "linebreak",
104 "linethickness", "lspace", "mathcolor", "mathsize", "mathvariant", "minsize",
105 "movablelimits", "notation", "rowalign", "rspace", "rowspacing", "rowspan",
106 "scriptlevel", "stretchy", "symmetric", "title", "voffset", "width",
107 }
108 mathElements := []string{
109 "annotation", "math", "menclose", "merror", "mfrac", "mi", "mmultiscripts",
110 "mn", "mo", "mover", "mpadded", "mprescripts", "mroot", "mrow", "mspace",
111 "msqrt", "mstyle", "msub", "msubsup", "msup", "mtable", "mtd", "mtext",
112 "mtr", "munder", "munderover", "semantics",
113 }
114 policy.AllowNoAttrs().OnElements(mathElements...)
115 policy.AllowAttrs(mathAttrs...).OnElements(mathElements...)
116
117 // goldmark-callout
118 policy.AllowAttrs("data-callout").OnElements("details")
119
120 return policy
121}
122
123func descriptionPolicy() *bluemonday.Policy {
124 policy := bluemonday.NewPolicy()
125 policy.AllowStandardURLs()
126
127 // allow italics and bold.
128 policy.AllowElements("i", "b", "em", "strong")
129
130 // allow code.
131 policy.AllowElements("code")
132
133 // allow links
134 policy.AllowAttrs("href", "target", "rel").OnElements("a")
135
136 return policy
137}