this repo has no description
1import { debug } from 'debug';
2import * as fs from 'node:fs/promises';
3import * as path from 'node:path';
4import { makeCacheFileHelper } from './path';
5
6const log = debug('llms-txt-gen:fetch');
7
8const cacheDir = path.join(process.cwd(), '.cache/fetch');
9await fs.mkdir(cacheDir, { recursive: true });
10const getCacheFile = makeCacheFileHelper(cacheDir);
11
12export async function fetchHtml(url: URL): Promise<string | null> {
13 const cacheFile = await getCacheFile(url);
14 let content: string;
15 try {
16 content = await fs.readFile(cacheFile, 'utf-8');
17 if (content) {
18 log('loading from cache', url.pathname);
19 return content;
20 }
21 } catch {}
22 log('loading', url.pathname);
23 const response = await fetch(url, {
24 headers: {
25 'Accept': 'text/html, application/xhtml+xml, application/xml',
26 'Accept-Language': 'en-US',
27 },
28 });
29 if (response.ok) {
30 const contentType = response.headers.get('Content-Type');
31 if (!contentType || !contentType.startsWith('text/html')) {
32 log('discarded', url.pathname);
33 return null;
34 }
35 content = await response.text();
36 await fs.writeFile(cacheFile, content, 'utf-8');
37 return content;
38 } else if (response.status < 500) {
39 return null;
40 } else {
41 throw new Error(`HTTP: ${response.status}`);
42 }
43}