this repo has no description

Switch up model and remove details elements

-8
bun.lock
···
"@mozilla/readability": "^0.6.0",
"@tsconfig/bun": "^1.0.8",
"ai": "^4.3.16",
-
"ai-fallback": "^0.1.5",
"debug": "^4.4.1",
"happy-dom": "^18.0.1",
"hast-util-sanitize": "^5.0.2",
"js-graph-algorithms": "^1.0.18",
"mdast": "^3.0.0",
-
"ollama-ai-provider": "^1.2.0",
"prettier": "^3.5.3",
"rehype-parse": "^9.0.1",
"rehype-remark": "^10.0.1",
···
"@ungap/structured-clone": ["@ungap/structured-clone@1.3.0", "", {}, "sha512-WmoN8qaIAo7WTYWbAZuG8PYEhn5fkz7dZrqTBZ7dtt//lL2Gwms1IcnQ5yHqjDfX8Ft5j4YzDM23f87zBfDe9g=="],
"ai": ["ai@4.3.16", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "@ai-sdk/provider-utils": "2.2.8", "@ai-sdk/react": "1.2.12", "@ai-sdk/ui-utils": "1.2.11", "@opentelemetry/api": "1.9.0", "jsondiffpatch": "0.6.0" }, "peerDependencies": { "react": "^18 || ^19 || ^19.0.0-rc", "zod": "^3.23.8" }, "optionalPeers": ["react"] }, "sha512-KUDwlThJ5tr2Vw0A1ZkbDKNME3wzWhuVfAOwIvFUzl1TPVDFAXDFTXio3p+jaKneB+dKNCvFFlolYmmgHttG1g=="],
-
-
"ai-fallback": ["ai-fallback@0.1.5", "", { "dependencies": { "@ai-sdk/provider": "^1", "@ai-sdk/provider-utils": "^2" } }, "sha512-/FhTd9SGMEUDYBKbO3ZyfS0CBGglJByMbMRQOGjjDYlxZinFZtn99w1SPh4NZYJWIP5jjoewytfZjp+30QPT1A=="],
"bail": ["bail@2.0.2", "", {}, "sha512-0xO6mYd7JB2YesxDKplafRpsiOzPt9V02ddPCLbY1xYGPOX24NTyN50qnUxgCPcSoYMhKpAuBTjQoRZCAkUDRw=="],
···
"nanoid": ["nanoid@3.3.11", "", { "bin": { "nanoid": "bin/nanoid.cjs" } }, "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w=="],
-
"ollama-ai-provider": ["ollama-ai-provider@1.2.0", "", { "dependencies": { "@ai-sdk/provider": "^1.0.0", "@ai-sdk/provider-utils": "^2.0.0", "partial-json": "0.1.7" }, "peerDependencies": { "zod": "^3.0.0" }, "optionalPeers": ["zod"] }, "sha512-jTNFruwe3O/ruJeppI/quoOUxG7NA6blG3ZyQj3lei4+NnJo7bi3eIRWqlVpRlu/mbzbFXeJSBuYQWF6pzGKww=="],
-
"parse5": ["parse5@7.3.0", "", { "dependencies": { "entities": "^6.0.0" } }, "sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw=="],
-
-
"partial-json": ["partial-json@0.1.7", "", {}, "sha512-Njv/59hHaokb/hRUjce3Hdv12wd60MtM9Z5Olmn+nehe0QDAsRtRbJPvJ0Z91TusF0SuZRIvnM+S4l6EIP8leA=="],
"prettier": ["prettier@3.5.3", "", { "bin": { "prettier": "bin/prettier.cjs" } }, "sha512-QQtaxnoDJeAkDvDKWCLiwIXkTgRhwYDEQCghU9Z6q03iyek/rxRh/2lC3HB7P8sWT2xC/y5JDctPLBIGzHKbhw=="],
-2
package.json
···
"@mozilla/readability": "^0.6.0",
"@tsconfig/bun": "^1.0.8",
"ai": "^4.3.16",
-
"ai-fallback": "^0.1.5",
"debug": "^4.4.1",
"happy-dom": "^18.0.1",
"hast-util-sanitize": "^5.0.2",
"js-graph-algorithms": "^1.0.18",
"mdast": "^3.0.0",
-
"ollama-ai-provider": "^1.2.0",
"prettier": "^3.5.3",
"rehype-parse": "^9.0.1",
"rehype-remark": "^10.0.1",
+3 -1
src/index.ts
···
log('crawl', site.name);
const pages = await crawl(site);
const contents: string[] = [];
-
for (const page of pages) {
+
for (let idx = 0; idx < pages.length; idx++) {
+
const page = pages[idx]!;
const content = await page.getContent();
if (content) contents.push(content);
+
log(`completed page ${idx + 1} of ${pages.length}`);
}
const output = await concatMarkdown(contents);
const formatted = await formatMarkdown(output);
+28 -21
src/rewrite.ts
···
import { createFallback } from 'ai-fallback';
import { streamText } from 'ai';
import { createOpenAI } from '@ai-sdk/openai';
-
import { createOllama } from 'ollama-ai-provider';
import * as fs from 'node:fs/promises';
import * as path from 'node:path';
···
await fs.mkdir(cacheDir, { recursive: true });
const getCacheFile = makeCacheFileHelper(cacheDir, '.txt');
-
if (!process.env.OPENAI_API_KEY) throw new Error('Missing OPENAI_API_KEY env var');
-
if (!process.env.OPENAI_API_URL) throw new Error('Missing OPENAI_API_URL env var');
-
const SYSTEM_PROMPT = `
Reformat markdown content you're given into an llms-full.txt file, also in markdown format
- Reformat for an AI and paraphrase where necessary, but be faithful to the original
-
- Avoid using emphasis and use Github Flavored markdown syntax
- Keep code snippets and keep them in TypeScript or TypeScript typings format
-
- Don't mention other content, pages, or external content (Remove sentences such as "Refer to", "Read more", "Learn how to")
-
- For markdown tables, keep all relevant information in the table and remove table legends and emoji
-
- Remove icon legends or irrelevant text
-
- Don't add to the content or use any knowledge you may have on the subject
-
- Format the output in AI-friendly markdown and preserve inline code
-
- Remove sub-headings if they don't add crucial information or context
-
- Don't wrap your output in a code block
+
- For markdown tables, keep all relevant information in the table
+
- Don't mention other content, pages, or external content
+
- Don't write your own content
+
- Don't add or use any knowledge you may have on the subject
+
- Don't add your own interpretation or notes and only reinterpret the input content
+
- Don't wrap the output in a markdown code block
+
Only return the reformatted markdown content and stop when you've processed all input markdown content
`;
const ai = createOpenAI({
···
baseURL: process.env.OPENAI_API_URL,
});
-
const ollama = createOllama({
-
baseURL: 'http://localhost:11434/api',
+
const ollama = createOpenAI({
+
baseURL: 'http://localhost:11434/v1',
});
export async function rewriteMarkdown(url: URL, input: string) {
···
} catch {}
log('prompting to rewrite', url.pathname);
const { textStream } = streamText({
+
temperature: 0.05,
+
maxSteps: 5,
+
experimental_continueSteps: true,
model: createFallback({
models: [
-
ollama('mistral-small3.1:24b'),
+
ollama('phi4:14b'),
ai('@cf/mistralai/mistral-small-3.1-24b-instruct'),
],
onError(error, modelId) {
log(`error using model ${modelId}`, error);
},
}),
-
maxSteps: 5,
-
experimental_continueSteps: true,
-
temperature: 0.05,
-
system: SYSTEM_PROMPT.trim(),
-
prompt: input,
+
onStepFinish({ finishReason, text }) {
+
if (finishReason !== 'stop')
+
log(`inference step (length: ${text.length})`, finishReason);
+
},
+
messages: [
+
{
+
role: 'system',
+
content: SYSTEM_PROMPT.trim(),
+
},
+
{
+
role: 'user',
+
content: input,
+
},
+
],
});
-
const output = [];
+
const output: string[] = [];
for await (const chunk of textStream)
output.push(chunk);
const text = output.join('');
+2 -2
src/unified.ts
···
.use(rehypeParse)
.use(rehypeSanitize, {
tagNames: [
-
...defaultSanitizeSchema.tagNames!,
+
...defaultSanitizeSchema.tagNames!.filter((tag) => tag !== 'details'),
'content-region',
'footer',
'header',
···
'section',
'nav',
],
-
strip: ['script', 'style'],
+
strip: ['script', 'style', 'details'],
})
.use(rehypeStringify)
.process(html)