this repo has no description
1import { debug } from 'debug';
2import * as path from 'path';
3import * as fs from 'fs/promises';
4
5import { crawl, type CrawlOptions } from './page';
6import { concatMarkdown } from './unified';
7import { formatMarkdown } from './prettier';
8
9const log = debug('llms-txt-gen:main');
10
11interface Site extends CrawlOptions {
12 name: string;
13}
14
15const outputPath = path.join(process.cwd(), 'out');
16await fs.mkdir(outputPath, { recursive: true });
17
18async function generate(site: Site) {
19 log('crawl', site.name);
20 const pages = await crawl(site);
21 const contents: string[] = [];
22 for (let idx = 0; idx < pages.length; idx++) {
23 const page = pages[idx]!;
24 const content = await page.getContent();
25 if (content) contents.push(content);
26 log(`completed page ${idx + 1} of ${pages.length}`);
27 }
28 const output = await concatMarkdown(contents);
29 const formatted = await formatMarkdown(site.name, output);
30 const file = path.join(outputPath, `llms-full-${site.name}.txt`);
31 await fs.writeFile(file, formatted, 'utf-8');
32}
33
34let sites: Site[] = [
35 {
36 name: 'react-native-reanimated',
37 baseURL: 'https://docs.swmansion.com/react-native-reanimated/',
38 include: [
39 '/react-native-reanimated/docs/(.*)',
40 ],
41 exclude: [
42 '/react-native-reanimated/docs/next/(.*)',
43 '/react-native-reanimated/docs/2.x/(.*)',
44 '/react-native-reanimated/docs/1.x/(.*)',
45 '/react-native-reanimated/docs/category/(.*)',
46 ],
47 },
48 {
49 name: 'react-native-gesture-handler',
50 baseURL: 'https://docs.swmansion.com/react-native-gesture-handler/',
51 include: [
52 '/react-native-gesture-handler/docs/(.*)',
53 ],
54 exclude: [
55 '/react-native-gesture-handler/docs/1.x/(.*)',
56 ],
57 },
58 {
59 name: 'nativewind',
60 baseURL: 'https://www.nativewind.dev/docs',
61 include: [
62 '/docs/(.*)',
63 ],
64 },
65 {
66 name: 'react-native',
67 baseURL: 'https://reactnative.dev/docs/getting-started',
68 include: [
69 '/docs/(.*)',
70 ],
71 exclude: [
72 '/docs/(0\\..*)',
73 '/docs/next/(.*)',
74 '/docs/legacy/(.*)',
75 '/docs/the-new-architecture/advanced-topics-components',
76 '/docs/the-new-architecture/advanced-topics-modules',
77 ],
78 },
79];
80
81const args = process.argv.slice(2);
82if (args.length) {
83 sites = sites.filter((site) => args.includes(site.name));
84}
85
86for (const site of sites) {
87 await generate(site);
88 log('generated llms-full.txt for', site.name);
89}