this repo has no description

Enforce title from original page

Changed files
+75 -3
src
+2 -2
src/page.ts
···
import { WeightedDiGraph, KruskalMST, Edge } from 'js-graph-algorithms';
import { extractContent, extractLinks, parseBody } from "./dom";
import { fetchHtml } from "./fetch";
-
import { htmlToMarkdown, sanitizeHtml } from "./unified";
+
import { htmlToMarkdown, sanitizeHtml, transferTitle } from "./unified";
import { rewriteMarkdown } from './rewrite';
import { makeCacheFileHelper } from './path';
···
const markdown = await extractContentToMarkdown(this.url, html);
if (!markdown) return (this.#content = null);
const rewritten = await rewriteMarkdown(this.url, markdown);
-
return (this.#content = rewritten);
+
return (this.#content = await transferTitle(markdown, rewritten));
}
}
+7
src/types.d.ts
···
+
declare module 'remark-title' {
+
interface TitleOptions {
+
title: string;
+
}
+
export default function remarkTitle(opts: TitleOptions): (tree: Root) => undefined;
+
export type Root = import('mdast').Root;
+
}
+66 -1
src/unified.ts
···
-
import type { List, ListItem, Root } from 'mdast';
+
import type { List, ListItem, PhrasingContent, Root } from 'mdast';
import { unified } from 'unified';
import { visit } from 'unist-util-visit';
import { defaultSchema as defaultSanitizeSchema } from 'hast-util-sanitize';
···
import remarkGfm from 'remark-gfm';
import remarkParse from 'remark-parse';
import remarkSqueezeParagraphs from 'remark-squeeze-paragraphs';
+
import remarkTitle from 'remark-title';
export async function sanitizeHtml(html: string): Promise<string> {
const vfile = await unified()
···
);
return md.toString();
}
+
+
function extractTitle(markdown: string): string | null {
+
let depth: number | null = null;
+
let title: string | null = null;
+
const toString = (nodes: PhrasingContent[]): string =>
+
nodes.map((node) => {
+
switch (node.type) {
+
case 'break':
+
return '\n';
+
case 'delete':
+
case 'emphasis':
+
case 'link':
+
case 'strong':
+
return toString(node.children);
+
case 'inlineCode':
+
return `\`${node.value}\``;
+
case 'text':
+
return node.value;
+
case 'footnoteReference':
+
case 'html':
+
case 'image':
+
case 'imageReference':
+
case 'linkReference':
+
default:
+
return '';
+
}
+
}).join('');
+
const tree = unified()
+
.use(remarkParse, { fragment: true })
+
.use(remarkGfm, {
+
tablePipeAlign: false,
+
tableCellPadding: false,
+
})
+
.parse(markdown);
+
visit(tree, function (node) {
+
if (node.type !== 'heading')
+
return;
+
if (!depth || node.depth < depth)
+
title = toString(node.children);
+
});
+
return title;
+
}
+
+
export async function transferTitle(from: string, to: string): Promise<string> {
+
const title = extractTitle(from);
+
if (!title) return to;
+
const md = await unified()
+
.use(remarkParse, { fragment: true })
+
.use(remarkGfm, {
+
tablePipeAlign: false,
+
tableCellPadding: false,
+
})
+
.use(remarkTitle, {
+
title,
+
})
+
.use(remarkStringify, {
+
bullet: '-',
+
incrementListMarker: false,
+
ruleSpaces: false,
+
tightDefinitions: true,
+
})
+
.process(to);
+
return md.toString();
+
}