this repo has no description
1import type { Heading, List, ListItem, PhrasingContent, Root } from 'mdast';
2import { unified } from 'unified';
3import { visit } from 'unist-util-visit';
4import { defaultSchema as defaultSanitizeSchema } from 'hast-util-sanitize';
5import rehypeParse from 'rehype-parse';
6import rehypeSanitize from 'rehype-sanitize';
7import rehypeStringify from 'rehype-stringify';
8import rehypeRemark from 'rehype-remark';
9import remarkStringify from 'remark-stringify';
10import remarkUnlink from 'remark-unlink';
11import remarkNormalizeHeadings from 'remark-normalize-headings';
12import remarkGfm from 'remark-gfm';
13import remarkParse from 'remark-parse';
14import remarkSqueezeParagraphs from 'remark-squeeze-paragraphs';
15
16const toString = (nodes: PhrasingContent[]): string =>
17 nodes.map((node) => {
18 switch (node.type) {
19 case 'break':
20 return '\n';
21 case 'delete':
22 case 'emphasis':
23 case 'link':
24 case 'strong':
25 return toString(node.children);
26 case 'inlineCode':
27 return `\`${node.value}\``;
28 case 'text':
29 return node.value;
30 case 'footnoteReference':
31 case 'html':
32 case 'image':
33 case 'imageReference':
34 case 'linkReference':
35 default:
36 return '';
37 }
38 }).join('');
39
40export async function sanitizeHtml(html: string): Promise<string> {
41 const vfile = await unified()
42 .use(rehypeParse)
43 .use(rehypeSanitize, {
44 tagNames: [
45 ...defaultSanitizeSchema.tagNames!.filter((tag) => tag !== 'details'),
46 'content-region',
47 'footer',
48 'header',
49 'main',
50 'article',
51 'section',
52 'nav',
53 ],
54 strip: ['script', 'style', 'details'],
55 })
56 .use(rehypeStringify)
57 .process(html)
58 return vfile.toString();
59}
60
61export async function htmlToMarkdown(content: {
62 title: string;
63 html: string;
64}): Promise<string> {
65 function remarkDisqualify() {
66 return function (tree: Root) {
67 visit(tree, function (node, index, parent) {
68 if (!parent || typeof index !== 'number') {
69 return;
70 } else if (node.type === 'thematicBreak' && parent) {
71 parent.children.splice(index, 1);
72 } else if (node.type === 'table' && parent) {
73 if (node.children.length === 2) {
74 const heading = node.children[0]!;
75 const items = node.children[1]!;
76 const zip = heading.children.map((headRow, idx) => {
77 const itemRow = items.children[idx]!;
78 return {
79 type: 'listItem',
80 spread: false,
81 children: [
82 ...headRow.children,
83 ...itemRow.children,
84 ],
85 } as ListItem;
86 });
87 parent.children.splice(index, 1, {
88 type: 'list',
89 spread: false,
90 children: zip,
91 } as List);
92 }
93 } else if (node.type === 'image' || node.type === 'imageReference') {
94 parent.children.splice(index, 1);
95 } else if (node.type === 'link' || node.type === 'linkReference') {
96 if (node.children.length === 0)
97 parent.children.splice(index, 1);
98 } else if (node.type === 'html') {
99 parent.children.splice(index, 1);
100 } else if (node.type === 'heading') {
101 const child = node.children[0];
102 if (node.children.length === 0)
103 parent.children.splice(index, 1);
104 if (node.children.length > 1 || !child || child.type !== 'text')
105 return;
106 const value = child.value.trim();
107 switch (value) {
108 case 'Example':
109 case 'Remarks':
110 case 'Note':
111 case '':
112 parent.children.splice(index, 1);
113 break;
114 default:
115 return;
116 }
117 } else if (node.type === 'text') {
118 if (!parent || parent.type !== 'paragraph' || parent.children.length > 1)
119 return;
120 const value = node.value.trim();
121 if (
122 value.startsWith('Last updated on ') ||
123 value.startsWith('Copyright ')
124 ) {
125 parent.children.splice(index, 1);
126 return;
127 }
128 switch (value) {
129 case 'Loading...':
130 case 'Caution':
131 case 'tsx':
132 parent.children.splice(index, 1);
133 break;
134 default:
135 return;
136 }
137 }
138 });
139 };
140 }
141 const md = await unified()
142 .use(rehypeParse, { fragment: true })
143 .use(rehypeSanitize, {
144 strip: ['script', 'style', 'nav'],
145 })
146 .use(remarkGfm, {
147 tablePipeAlign: false,
148 tableCellPadding: false,
149 })
150 .use(rehypeRemark, { document: false })
151 .use(remarkDisqualify)
152 .use(remarkUnlink)
153 .use(remarkNormalizeHeadings)
154 .use(remarkSqueezeParagraphs)
155 .use(remarkStringify, {
156 incrementListMarker: false,
157 ruleSpaces: false,
158 tightDefinitions: true,
159 })
160 .process(content.html);
161 return md.toString().replace(/[\u200B-\u200D\uFEFF]/g, '');
162}
163
164export async function concatMarkdown(
165 contents: (string | null)[] | Promise<string | null>[]
166): Promise<string> {
167 const md = await unified()
168 .use(remarkParse, { fragment: true })
169 .use(remarkGfm, {
170 tablePipeAlign: false,
171 tableCellPadding: false,
172 })
173 .use(remarkNormalizeHeadings)
174 .use(remarkSqueezeParagraphs)
175 .use(remarkStringify, {
176 incrementListMarker: false,
177 ruleSpaces: false,
178 tightDefinitions: true,
179 })
180 .process(
181 (await Promise.all(contents)).join('\n\n')
182 );
183 return md.toString();
184}
185
186function extractTitle(markdown: string): string | null {
187 const tree = unified()
188 .use(remarkParse, { fragment: true })
189 .use(remarkGfm, {
190 tablePipeAlign: false,
191 tableCellPadding: false,
192 })
193 .parse(markdown);
194 const node = tree.children[0];
195 if (node && node.type === 'heading' && node.depth === 1) {
196 return toString(node.children);
197 } else {
198 return null;
199 }
200}
201
202export function remarkTitle(opts: { title: string }) {
203 return function checkTitleTransformer(root: Root) {
204 const node = root.children[0]!;
205 const replacement: Heading = {
206 type: 'heading',
207 depth: 1,
208 children: [
209 { type: 'text', value: opts.title }
210 ]
211 };
212 if (node && node.type === 'heading') {
213 node.depth = 1;
214 node.children = replacement.children;
215 } else {
216 root.children?.unshift(replacement);
217 }
218 }
219}
220
221export async function transferTitle(from: string, to: string): Promise<string> {
222 const title = extractTitle(from);
223 if (!title) return to;
224 const md = await unified()
225 .use(remarkParse, { fragment: true })
226 .use(remarkGfm, {
227 tablePipeAlign: false,
228 tableCellPadding: false,
229 })
230 .use(remarkTitle, { title })
231 .use(remarkStringify, {
232 bullet: '-',
233 incrementListMarker: false,
234 ruleSpaces: false,
235 tightDefinitions: true,
236 })
237 .process(to);
238 return md.toString();
239}