1import argparse
2import hashlib
3import html
4import json
5import re
6import xml.sax.saxutils as xml
7
8from abc import abstractmethod
9from collections.abc import Mapping, Sequence
10from pathlib import Path
11from typing import Any, cast, ClassVar, Generic, get_args, NamedTuple
12
13from markdown_it.token import Token
14
15from . import md, options
16from .docbook import DocBookRenderer, Heading, make_xml_id
17from .html import HTMLRenderer, UnresolvedXrefError
18from .manual_structure import check_structure, FragmentType, is_include, TocEntry, TocEntryType, XrefTarget
19from .md import Converter, Renderer
20
21class BaseConverter(Converter[md.TR], Generic[md.TR]):
22 # per-converter configuration for ns:arg=value arguments to include blocks, following
23 # the include type. html converters need something like this to support chunking, or
24 # another external method like the chunktocs docbook uses (but block options seem like
25 # a much nicer of doing this).
26 INCLUDE_ARGS_NS: ClassVar[str]
27 INCLUDE_FRAGMENT_ALLOWED_ARGS: ClassVar[set[str]] = set()
28 INCLUDE_OPTIONS_ALLOWED_ARGS: ClassVar[set[str]] = set()
29
30 _base_paths: list[Path]
31 _current_type: list[TocEntryType]
32
33 def convert(self, infile: Path, outfile: Path) -> None:
34 self._base_paths = [ infile ]
35 self._current_type = ['book']
36 try:
37 tokens = self._parse(infile.read_text())
38 self._postprocess(infile, outfile, tokens)
39 converted = self._renderer.render(tokens)
40 outfile.write_text(converted)
41 except Exception as e:
42 raise RuntimeError(f"failed to render manual {infile}") from e
43
44 def _postprocess(self, infile: Path, outfile: Path, tokens: Sequence[Token]) -> None:
45 pass
46
47 def _parse(self, src: str) -> list[Token]:
48 tokens = super()._parse(src)
49 check_structure(self._current_type[-1], tokens)
50 for token in tokens:
51 if not is_include(token):
52 continue
53 directive = token.info[12:].split()
54 if not directive:
55 continue
56 args = { k: v for k, _sep, v in map(lambda s: s.partition('='), directive[1:]) }
57 typ = directive[0]
58 if typ == 'options':
59 token.type = 'included_options'
60 self._process_include_args(token, args, self.INCLUDE_OPTIONS_ALLOWED_ARGS)
61 self._parse_options(token, args)
62 else:
63 fragment_type = typ.removesuffix('s')
64 if fragment_type not in get_args(FragmentType):
65 raise RuntimeError(f"unsupported structural include type '{typ}'")
66 self._current_type.append(cast(FragmentType, fragment_type))
67 token.type = 'included_' + typ
68 self._process_include_args(token, args, self.INCLUDE_FRAGMENT_ALLOWED_ARGS)
69 self._parse_included_blocks(token, args)
70 self._current_type.pop()
71 return tokens
72
73 def _process_include_args(self, token: Token, args: dict[str, str], allowed: set[str]) -> None:
74 ns = self.INCLUDE_ARGS_NS + ":"
75 args = { k[len(ns):]: v for k, v in args.items() if k.startswith(ns) }
76 if unknown := set(args.keys()) - allowed:
77 assert token.map
78 raise RuntimeError(f"unrecognized include argument in line {token.map[0] + 1}", unknown)
79 token.meta['include-args'] = args
80
81 def _parse_included_blocks(self, token: Token, block_args: dict[str, str]) -> None:
82 assert token.map
83 included = token.meta['included'] = []
84 for (lnum, line) in enumerate(token.content.splitlines(), token.map[0] + 2):
85 line = line.strip()
86 path = self._base_paths[-1].parent / line
87 if path in self._base_paths:
88 raise RuntimeError(f"circular include found in line {lnum}")
89 try:
90 self._base_paths.append(path)
91 with open(path, 'r') as f:
92 tokens = self._parse(f.read())
93 included.append((tokens, path))
94 self._base_paths.pop()
95 except Exception as e:
96 raise RuntimeError(f"processing included file {path} from line {lnum}") from e
97
98 def _parse_options(self, token: Token, block_args: dict[str, str]) -> None:
99 assert token.map
100
101 items = {}
102 for (lnum, line) in enumerate(token.content.splitlines(), token.map[0] + 2):
103 if len(args := line.split(":", 1)) != 2:
104 raise RuntimeError(f"options directive with no argument in line {lnum}")
105 (k, v) = (args[0].strip(), args[1].strip())
106 if k in items:
107 raise RuntimeError(f"duplicate options directive {k} in line {lnum}")
108 items[k] = v
109 try:
110 id_prefix = items.pop('id-prefix')
111 varlist_id = items.pop('list-id')
112 source = items.pop('source')
113 except KeyError as e:
114 raise RuntimeError(f"options directive {e} missing in block at line {token.map[0] + 1}")
115 if items.keys():
116 raise RuntimeError(
117 f"unsupported options directives in block at line {token.map[0] + 1}",
118 " ".join(items.keys()))
119
120 try:
121 with open(self._base_paths[-1].parent / source, 'r') as f:
122 token.meta['id-prefix'] = id_prefix
123 token.meta['list-id'] = varlist_id
124 token.meta['source'] = json.load(f)
125 except Exception as e:
126 raise RuntimeError(f"processing options block in line {token.map[0] + 1}") from e
127
128class RendererMixin(Renderer):
129 _toplevel_tag: str
130 _revision: str
131
132 def __init__(self, toplevel_tag: str, revision: str, *args: Any, **kwargs: Any):
133 super().__init__(*args, **kwargs)
134 self._toplevel_tag = toplevel_tag
135 self._revision = revision
136 self.rules |= {
137 'included_sections': lambda *args: self._included_thing("section", *args),
138 'included_chapters': lambda *args: self._included_thing("chapter", *args),
139 'included_preface': lambda *args: self._included_thing("preface", *args),
140 'included_parts': lambda *args: self._included_thing("part", *args),
141 'included_appendix': lambda *args: self._included_thing("appendix", *args),
142 'included_options': self.included_options,
143 }
144
145 def render(self, tokens: Sequence[Token]) -> str:
146 # books get special handling because they have *two* title tags. doing this with
147 # generic code is more complicated than it's worth. the checks above have verified
148 # that both titles actually exist.
149 if self._toplevel_tag == 'book':
150 return self._render_book(tokens)
151
152 return super().render(tokens)
153
154 @abstractmethod
155 def _render_book(self, tokens: Sequence[Token]) -> str:
156 raise NotImplementedError()
157
158 @abstractmethod
159 def _included_thing(self, tag: str, token: Token, tokens: Sequence[Token], i: int) -> str:
160 raise NotImplementedError()
161
162 @abstractmethod
163 def included_options(self, token: Token, tokens: Sequence[Token], i: int) -> str:
164 raise NotImplementedError()
165
166class ManualDocBookRenderer(RendererMixin, DocBookRenderer):
167 def __init__(self, toplevel_tag: str, revision: str, manpage_urls: Mapping[str, str]):
168 super().__init__(toplevel_tag, revision, manpage_urls)
169
170 def _render_book(self, tokens: Sequence[Token]) -> str:
171 assert tokens[1].children
172 assert tokens[4].children
173 if (maybe_id := cast(str, tokens[0].attrs.get('id', ""))):
174 maybe_id = "xml:id=" + xml.quoteattr(maybe_id)
175 return (f'<book xmlns="http://docbook.org/ns/docbook"'
176 f' xmlns:xlink="http://www.w3.org/1999/xlink"'
177 f' {maybe_id} version="5.0">'
178 f' <title>{self.renderInline(tokens[1].children)}</title>'
179 f' <subtitle>{self.renderInline(tokens[4].children)}</subtitle>'
180 f' {super(DocBookRenderer, self).render(tokens[6:])}'
181 f'</book>')
182
183 def _heading_tag(self, token: Token, tokens: Sequence[Token], i: int) -> tuple[str, dict[str, str]]:
184 (tag, attrs) = super()._heading_tag(token, tokens, i)
185 # render() has already verified that we don't have supernumerary headings and since the
186 # book tag is handled specially we can leave the check this simple
187 if token.tag != 'h1':
188 return (tag, attrs)
189 return (self._toplevel_tag, attrs | {
190 'xmlns': "http://docbook.org/ns/docbook",
191 'xmlns:xlink': "http://www.w3.org/1999/xlink",
192 })
193
194 def _included_thing(self, tag: str, token: Token, tokens: Sequence[Token], i: int) -> str:
195 result = []
196 # close existing partintro. the generic render doesn't really need this because
197 # it doesn't have a concept of structure in the way the manual does.
198 if self._headings and self._headings[-1] == Heading('part', 1):
199 result.append("</partintro>")
200 self._headings[-1] = self._headings[-1]._replace(partintro_closed=True)
201 # must nest properly for structural includes. this requires saving at least
202 # the headings stack, but creating new renderers is cheap and much easier.
203 r = ManualDocBookRenderer(tag, self._revision, self._manpage_urls)
204 for (included, path) in token.meta['included']:
205 try:
206 result.append(r.render(included))
207 except Exception as e:
208 raise RuntimeError(f"rendering {path}") from e
209 return "".join(result)
210 def included_options(self, token: Token, tokens: Sequence[Token], i: int) -> str:
211 conv = options.DocBookConverter(self._manpage_urls, self._revision, 'fragment',
212 token.meta['list-id'], token.meta['id-prefix'])
213 conv.add_options(token.meta['source'])
214 return conv.finalize(fragment=True)
215
216 # TODO minimize docbook diffs with existing conversions. remove soon.
217 def paragraph_open(self, token: Token, tokens: Sequence[Token], i: int) -> str:
218 return super().paragraph_open(token, tokens, i) + "\n "
219 def paragraph_close(self, token: Token, tokens: Sequence[Token], i: int) -> str:
220 return "\n" + super().paragraph_close(token, tokens, i)
221 def code_block(self, token: Token, tokens: Sequence[Token], i: int) -> str:
222 return f"<programlisting>\n{xml.escape(token.content)}</programlisting>"
223 def fence(self, token: Token, tokens: Sequence[Token], i: int) -> str:
224 info = f" language={xml.quoteattr(token.info)}" if token.info != "" else ""
225 return f"<programlisting{info}>\n{xml.escape(token.content)}</programlisting>"
226
227class DocBookConverter(BaseConverter[ManualDocBookRenderer]):
228 INCLUDE_ARGS_NS = "docbook"
229
230 def __init__(self, manpage_urls: Mapping[str, str], revision: str):
231 super().__init__()
232 self._renderer = ManualDocBookRenderer('book', revision, manpage_urls)
233
234
235class HTMLParameters(NamedTuple):
236 generator: str
237 stylesheets: Sequence[str]
238 scripts: Sequence[str]
239 # number of levels in the rendered table of contents. tables are prepended to
240 # the content they apply to (entire document / document chunk / top-level section
241 # of a chapter), setting a depth of 0 omits the respective table.
242 toc_depth: int
243 chunk_toc_depth: int
244 section_toc_depth: int
245 media_dir: Path
246
247class ManualHTMLRenderer(RendererMixin, HTMLRenderer):
248 _base_path: Path
249 _in_dir: Path
250 _html_params: HTMLParameters
251
252 def __init__(self, toplevel_tag: str, revision: str, html_params: HTMLParameters,
253 manpage_urls: Mapping[str, str], xref_targets: dict[str, XrefTarget],
254 in_dir: Path, base_path: Path):
255 super().__init__(toplevel_tag, revision, manpage_urls, xref_targets)
256 self._in_dir = in_dir
257 self._base_path = base_path.absolute()
258 self._html_params = html_params
259
260 def _pull_image(self, src: str) -> str:
261 src_path = Path(src)
262 content = (self._in_dir / src_path).read_bytes()
263 # images may be used more than once, but we want to store them only once and
264 # in an easily accessible (ie, not input-file-path-dependent) location without
265 # having to maintain a mapping structure. hashing the file and using the hash
266 # as both the path of the final image provides both.
267 content_hash = hashlib.sha3_256(content).hexdigest()
268 target_name = f"{content_hash}{src_path.suffix}"
269 target_path = self._base_path / self._html_params.media_dir / target_name
270 target_path.write_bytes(content)
271 return f"./{self._html_params.media_dir}/{target_name}"
272
273 def _push(self, tag: str, hlevel_offset: int) -> Any:
274 result = (self._toplevel_tag, self._headings, self._attrspans, self._hlevel_offset, self._in_dir)
275 self._hlevel_offset += hlevel_offset
276 self._toplevel_tag, self._headings, self._attrspans = tag, [], []
277 return result
278
279 def _pop(self, state: Any) -> None:
280 (self._toplevel_tag, self._headings, self._attrspans, self._hlevel_offset, self._in_dir) = state
281
282 def _render_book(self, tokens: Sequence[Token]) -> str:
283 assert tokens[4].children
284 title_id = cast(str, tokens[0].attrs.get('id', ""))
285 title = self._xref_targets[title_id].title
286 # subtitles don't have IDs, so we can't use xrefs to get them
287 subtitle = self.renderInline(tokens[4].children)
288
289 toc = TocEntry.of(tokens[0])
290 return "\n".join([
291 self._file_header(toc),
292 ' <div class="book">',
293 ' <div class="titlepage">',
294 ' <div>',
295 f' <div><h1 class="title"><a id="{html.escape(title_id, True)}"></a>{title}</h1></div>',
296 f' <div><h2 class="subtitle">{subtitle}</h2></div>',
297 ' </div>',
298 " <hr />",
299 ' </div>',
300 self._build_toc(tokens, 0),
301 super(HTMLRenderer, self).render(tokens[6:]),
302 ' </div>',
303 self._file_footer(toc),
304 ])
305
306 def _file_header(self, toc: TocEntry) -> str:
307 prev_link, up_link, next_link = "", "", ""
308 prev_a, next_a, parent_title = "", "", " "
309 nav_html = ""
310 home = toc.root
311 if toc.prev:
312 prev_link = f'<link rel="prev" href="{toc.prev.target.href()}" title="{toc.prev.target.title}" />'
313 prev_a = f'<a accesskey="p" href="{toc.prev.target.href()}">Prev</a>'
314 if toc.parent:
315 up_link = (
316 f'<link rel="up" href="{toc.parent.target.href()}" '
317 f'title="{toc.parent.target.title}" />'
318 )
319 if (part := toc.parent) and part.kind != 'book':
320 assert part.target.title
321 parent_title = part.target.title
322 if toc.next:
323 next_link = f'<link rel="next" href="{toc.next.target.href()}" title="{toc.next.target.title}" />'
324 next_a = f'<a accesskey="n" href="{toc.next.target.href()}">Next</a>'
325 if toc.prev or toc.parent or toc.next:
326 nav_html = "\n".join([
327 ' <div class="navheader">',
328 ' <table width="100%" summary="Navigation header">',
329 ' <tr>',
330 f' <th colspan="3" align="center">{toc.target.title}</th>',
331 ' </tr>',
332 ' <tr>',
333 f' <td width="20%" align="left">{prev_a} </td>',
334 f' <th width="60%" align="center">{parent_title}</th>',
335 f' <td width="20%" align="right"> {next_a}</td>',
336 ' </tr>',
337 ' </table>',
338 ' <hr />',
339 ' </div>',
340 ])
341 return "\n".join([
342 '<?xml version="1.0" encoding="utf-8" standalone="no"?>',
343 '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"',
344 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">',
345 '<html xmlns="http://www.w3.org/1999/xhtml">',
346 ' <head>',
347 ' <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />',
348 f' <title>{toc.target.title}</title>',
349 "".join((f'<link rel="stylesheet" type="text/css" href="{html.escape(style, True)}" />'
350 for style in self._html_params.stylesheets)),
351 "".join((f'<script src="{html.escape(script, True)}" type="text/javascript"></script>'
352 for script in self._html_params.scripts)),
353 f' <meta name="generator" content="{html.escape(self._html_params.generator, True)}" />',
354 f' <link rel="home" href="{home.target.href()}" title="{home.target.title}" />' if home.target.href() else "",
355 f' {up_link}{prev_link}{next_link}',
356 ' </head>',
357 ' <body>',
358 nav_html,
359 ])
360
361 def _file_footer(self, toc: TocEntry) -> str:
362 # prev, next = self._get_prev_and_next()
363 prev_a, up_a, home_a, next_a = "", " ", " ", ""
364 prev_text, up_text, next_text = "", "", ""
365 nav_html = ""
366 home = toc.root
367 if toc.prev:
368 prev_a = f'<a accesskey="p" href="{toc.prev.target.href()}">Prev</a>'
369 assert toc.prev.target.title
370 prev_text = toc.prev.target.title
371 if toc.parent:
372 home_a = f'<a accesskey="h" href="{home.target.href()}">Home</a>'
373 if toc.parent != home:
374 up_a = f'<a accesskey="u" href="{toc.parent.target.href()}">Up</a>'
375 if toc.next:
376 next_a = f'<a accesskey="n" href="{toc.next.target.href()}">Next</a>'
377 assert toc.next.target.title
378 next_text = toc.next.target.title
379 if toc.prev or toc.parent or toc.next:
380 nav_html = "\n".join([
381 ' <div class="navfooter">',
382 ' <hr />',
383 ' <table width="100%" summary="Navigation footer">',
384 ' <tr>',
385 f' <td width="40%" align="left">{prev_a} </td>',
386 f' <td width="20%" align="center">{up_a}</td>',
387 f' <td width="40%" align="right"> {next_a}</td>',
388 ' </tr>',
389 ' <tr>',
390 f' <td width="40%" align="left" valign="top">{prev_text} </td>',
391 f' <td width="20%" align="center">{home_a}</td>',
392 f' <td width="40%" align="right" valign="top"> {next_text}</td>',
393 ' </tr>',
394 ' </table>',
395 ' </div>',
396 ])
397 return "\n".join([
398 nav_html,
399 ' </body>',
400 '</html>',
401 ])
402
403 def _heading_tag(self, token: Token, tokens: Sequence[Token], i: int) -> str:
404 if token.tag == 'h1':
405 return self._toplevel_tag
406 return super()._heading_tag(token, tokens, i)
407 def _build_toc(self, tokens: Sequence[Token], i: int) -> str:
408 toc = TocEntry.of(tokens[i])
409 if toc.kind == 'section' and self._html_params.section_toc_depth < 1:
410 return ""
411 def walk_and_emit(toc: TocEntry, depth: int) -> list[str]:
412 if depth <= 0:
413 return []
414 result = []
415 for child in toc.children:
416 result.append(
417 f'<dt>'
418 f' <span class="{html.escape(child.kind, True)}">'
419 f' <a href="{child.target.href()}">{child.target.toc_html}</a>'
420 f' </span>'
421 f'</dt>'
422 )
423 # we want to look straight through parts because docbook-xsl does too, but it
424 # also makes for more uesful top-level tocs.
425 next_level = walk_and_emit(child, depth - (0 if child.kind == 'part' else 1))
426 if next_level:
427 result.append(f'<dd><dl>{"".join(next_level)}</dl></dd>')
428 return result
429 def build_list(kind: str, id: str, lst: Sequence[TocEntry]) -> str:
430 if not lst:
431 return ""
432 entries = [
433 f'<dt>{i}. <a href="{e.target.href()}">{e.target.toc_html}</a></dt>'
434 for i, e in enumerate(lst, start=1)
435 ]
436 return (
437 f'<div class="{id}">'
438 f'<p><strong>List of {kind}</strong></p>'
439 f'<dl>{"".join(entries)}</dl>'
440 '</div>'
441 )
442 # we don't want to generate the "Title of Contents" header for sections,
443 # docbook doesn't and it's only distracting clutter unless it's the main table.
444 # we also want to generate tocs only for a top-level section (ie, one that is
445 # not itself contained in another section)
446 print_title = toc.kind != 'section'
447 if toc.kind == 'section':
448 if toc.parent and toc.parent.kind == 'section':
449 toc_depth = 0
450 else:
451 toc_depth = self._html_params.section_toc_depth
452 elif toc.starts_new_chunk and toc.kind != 'book':
453 toc_depth = self._html_params.chunk_toc_depth
454 else:
455 toc_depth = self._html_params.toc_depth
456 if not (items := walk_and_emit(toc, toc_depth)):
457 return ""
458 figures = build_list("Figures", "list-of-figures", toc.figures)
459 examples = build_list("Examples", "list-of-examples", toc.examples)
460 return "".join([
461 f'<div class="toc">',
462 ' <p><strong>Table of Contents</strong></p>' if print_title else "",
463 f' <dl class="toc">'
464 f' {"".join(items)}'
465 f' </dl>'
466 f'</div>'
467 f'{figures}'
468 f'{examples}'
469 ])
470
471 def _make_hN(self, level: int) -> tuple[str, str]:
472 # for some reason chapters don't increase the hN nesting count in docbook xslts. duplicate
473 # this for consistency.
474 if self._toplevel_tag == 'chapter':
475 level -= 1
476 # TODO docbook compat. these are never useful for us, but not having them breaks manual
477 # compare workflows while docbook is still allowed.
478 style = ""
479 if level + self._hlevel_offset < 3 \
480 and (self._toplevel_tag == 'section' or (self._toplevel_tag == 'chapter' and level > 0)):
481 style = "clear: both"
482 tag, hstyle = super()._make_hN(max(1, level))
483 return tag, style
484
485 def _included_thing(self, tag: str, token: Token, tokens: Sequence[Token], i: int) -> str:
486 outer, inner = [], []
487 # since books have no non-include content the toplevel book wrapper will not count
488 # towards nesting depth. other types will have at least a title+id heading which
489 # *does* count towards the nesting depth. chapters give a -1 to included sections
490 # mirroring the special handing in _make_hN. sigh.
491 hoffset = (
492 0 if not self._headings
493 else self._headings[-1].level - 1 if self._toplevel_tag == 'chapter'
494 else self._headings[-1].level
495 )
496 outer.append(self._maybe_close_partintro())
497 into = token.meta['include-args'].get('into-file')
498 fragments = token.meta['included']
499 state = self._push(tag, hoffset)
500 if into:
501 toc = TocEntry.of(fragments[0][0][0])
502 inner.append(self._file_header(toc))
503 # we do not set _hlevel_offset=0 because docbook doesn't either.
504 else:
505 inner = outer
506 in_dir = self._in_dir
507 for included, path in fragments:
508 try:
509 self._in_dir = (in_dir / path).parent
510 inner.append(self.render(included))
511 except Exception as e:
512 raise RuntimeError(f"rendering {path}") from e
513 if into:
514 inner.append(self._file_footer(toc))
515 (self._base_path / into).write_text("".join(inner))
516 self._pop(state)
517 return "".join(outer)
518
519 def included_options(self, token: Token, tokens: Sequence[Token], i: int) -> str:
520 conv = options.HTMLConverter(self._manpage_urls, self._revision,
521 token.meta['list-id'], token.meta['id-prefix'],
522 self._xref_targets)
523 conv.add_options(token.meta['source'])
524 return conv.finalize()
525
526def _to_base26(n: int) -> str:
527 return (_to_base26(n // 26) if n > 26 else "") + chr(ord("A") + n % 26)
528
529class HTMLConverter(BaseConverter[ManualHTMLRenderer]):
530 INCLUDE_ARGS_NS = "html"
531 INCLUDE_FRAGMENT_ALLOWED_ARGS = { 'into-file' }
532
533 _revision: str
534 _html_params: HTMLParameters
535 _manpage_urls: Mapping[str, str]
536 _xref_targets: dict[str, XrefTarget]
537 _redirection_targets: set[str]
538 _appendix_count: int = 0
539
540 def _next_appendix_id(self) -> str:
541 self._appendix_count += 1
542 return _to_base26(self._appendix_count - 1)
543
544 def __init__(self, revision: str, html_params: HTMLParameters, manpage_urls: Mapping[str, str]):
545 super().__init__()
546 self._revision, self._html_params, self._manpage_urls = revision, html_params, manpage_urls
547 self._xref_targets = {}
548 self._redirection_targets = set()
549 # renderer not set on purpose since it has a dependency on the output path!
550
551 def convert(self, infile: Path, outfile: Path) -> None:
552 self._renderer = ManualHTMLRenderer(
553 'book', self._revision, self._html_params, self._manpage_urls, self._xref_targets,
554 infile.parent, outfile.parent)
555 super().convert(infile, outfile)
556
557 def _parse(self, src: str) -> list[Token]:
558 tokens = super()._parse(src)
559 for token in tokens:
560 if not token.type.startswith('included_') \
561 or not (into := token.meta['include-args'].get('into-file')):
562 continue
563 assert token.map
564 if len(token.meta['included']) == 0:
565 raise RuntimeError(f"redirection target {into} in line {token.map[0] + 1} is empty!")
566 # we use blender-style //path to denote paths relative to the origin file
567 # (usually index.html). this makes everything a lot easier and clearer.
568 if not into.startswith("//") or '/' in into[2:]:
569 raise RuntimeError("html:into-file must be a relative-to-origin //filename", into)
570 into = token.meta['include-args']['into-file'] = into[2:]
571 if into in self._redirection_targets:
572 raise RuntimeError(f"redirection target {into} in line {token.map[0] + 1} is already in use")
573 self._redirection_targets.add(into)
574 return tokens
575
576 def _number_block(self, block: str, prefix: str, tokens: Sequence[Token], start: int = 1) -> int:
577 title_open, title_close = f'{block}_title_open', f'{block}_title_close'
578 for (i, token) in enumerate(tokens):
579 if token.type == title_open:
580 title = tokens[i + 1]
581 assert title.type == 'inline' and title.children
582 # the prefix is split into two tokens because the xref title_html will want
583 # only the first of the two, but both must be rendered into the example itself.
584 title.children = (
585 [
586 Token('text', '', 0, content=f'{prefix} {start}'),
587 Token('text', '', 0, content='. ')
588 ] + title.children
589 )
590 start += 1
591 elif token.type.startswith('included_') and token.type != 'included_options':
592 for sub, _path in token.meta['included']:
593 start = self._number_block(block, prefix, sub, start)
594 return start
595
596 # xref | (id, type, heading inlines, file, starts new file)
597 def _collect_ids(self, tokens: Sequence[Token], target_file: str, typ: str, file_changed: bool
598 ) -> list[XrefTarget | tuple[str, str, Token, str, bool]]:
599 result: list[XrefTarget | tuple[str, str, Token, str, bool]] = []
600 # collect all IDs and their xref substitutions. headings are deferred until everything
601 # has been parsed so we can resolve links in headings. if that's even used anywhere.
602 for (i, bt) in enumerate(tokens):
603 if bt.type == 'heading_open' and (id := cast(str, bt.attrs.get('id', ''))):
604 result.append((id, typ if bt.tag == 'h1' else 'section', tokens[i + 1], target_file,
605 i == 0 and file_changed))
606 elif bt.type == 'included_options':
607 id_prefix = bt.meta['id-prefix']
608 for opt in bt.meta['source'].keys():
609 id = make_xml_id(f"{id_prefix}{opt}")
610 name = html.escape(opt)
611 result.append(XrefTarget(id, f'<code class="option">{name}</code>', name, None, target_file))
612 elif bt.type.startswith('included_'):
613 sub_file = bt.meta['include-args'].get('into-file', target_file)
614 subtyp = bt.type.removeprefix('included_').removesuffix('s')
615 for si, (sub, _path) in enumerate(bt.meta['included']):
616 result += self._collect_ids(sub, sub_file, subtyp, si == 0 and sub_file != target_file)
617 elif bt.type == 'example_open' and (id := cast(str, bt.attrs.get('id', ''))):
618 result.append((id, 'example', tokens[i + 2], target_file, False))
619 elif bt.type == 'figure_open' and (id := cast(str, bt.attrs.get('id', ''))):
620 result.append((id, 'figure', tokens[i + 2], target_file, False))
621 elif bt.type == 'footnote_open' and (id := cast(str, bt.attrs.get('id', ''))):
622 result.append(XrefTarget(id, "???", None, None, target_file))
623 elif bt.type == 'footnote_ref' and (id := cast(str, bt.attrs.get('id', ''))):
624 result.append(XrefTarget(id, "???", None, None, target_file))
625 elif bt.type == 'inline':
626 assert bt.children is not None
627 result += self._collect_ids(bt.children, target_file, typ, False)
628 elif id := cast(str, bt.attrs.get('id', '')):
629 # anchors and examples have no titles we could use, but we'll have to put
630 # *something* here to communicate that there's no title.
631 result.append(XrefTarget(id, "???", None, None, target_file))
632 return result
633
634 def _render_xref(self, id: str, typ: str, inlines: Token, path: str, drop_fragment: bool) -> XrefTarget:
635 assert inlines.children
636 title_html = self._renderer.renderInline(inlines.children)
637 if typ == 'appendix':
638 # NOTE the docbook compat is strong here
639 n = self._next_appendix_id()
640 prefix = f"Appendix\u00A0{n}.\u00A0"
641 # HACK for docbook compat: prefix the title inlines with appendix id if
642 # necessary. the alternative is to mess with titlepage rendering in headings,
643 # which seems just a lot worse than this
644 prefix_tokens = [Token(type='text', tag='', nesting=0, content=prefix)]
645 inlines.children = prefix_tokens + list(inlines.children)
646 title = prefix + title_html
647 toc_html = f"{n}. {title_html}"
648 title_html = f"Appendix {n}"
649 elif typ in ['example', 'figure']:
650 # skip the prepended `{Example,Figure} N. ` from numbering
651 toc_html, title = self._renderer.renderInline(inlines.children[2:]), title_html
652 # xref title wants only the prepended text, sans the trailing colon and space
653 title_html = self._renderer.renderInline(inlines.children[0:1])
654 else:
655 toc_html, title = title_html, title_html
656 title_html = (
657 f"<em>{title_html}</em>"
658 if typ == 'chapter'
659 else title_html if typ in [ 'book', 'part' ]
660 else f'the section called “{title_html}”'
661 )
662 return XrefTarget(id, title_html, toc_html, re.sub('<.*?>', '', title), path, drop_fragment)
663
664 def _postprocess(self, infile: Path, outfile: Path, tokens: Sequence[Token]) -> None:
665 self._number_block('example', "Example", tokens)
666 self._number_block('figure', "Figure", tokens)
667 xref_queue = self._collect_ids(tokens, outfile.name, 'book', True)
668
669 failed = False
670 deferred = []
671 while xref_queue:
672 for item in xref_queue:
673 try:
674 target = item if isinstance(item, XrefTarget) else self._render_xref(*item)
675 except UnresolvedXrefError:
676 if failed:
677 raise
678 deferred.append(item)
679 continue
680
681 if target.id in self._xref_targets:
682 raise RuntimeError(f"found duplicate id #{target.id}")
683 self._xref_targets[target.id] = target
684 if len(deferred) == len(xref_queue):
685 failed = True # do another round and report the first error
686 xref_queue = deferred
687
688 paths_seen = set()
689 for t in self._xref_targets.values():
690 paths_seen.add(t.path)
691
692 if len(paths_seen) == 1:
693 for (k, t) in self._xref_targets.items():
694 self._xref_targets[k] = XrefTarget(
695 t.id,
696 t.title_html,
697 t.toc_html,
698 t.title,
699 t.path,
700 t.drop_fragment,
701 drop_target=True
702 )
703
704 TocEntry.collect_and_link(self._xref_targets, tokens)
705
706
707
708def _build_cli_db(p: argparse.ArgumentParser) -> None:
709 p.add_argument('--manpage-urls', required=True)
710 p.add_argument('--revision', required=True)
711 p.add_argument('infile', type=Path)
712 p.add_argument('outfile', type=Path)
713
714def _build_cli_html(p: argparse.ArgumentParser) -> None:
715 p.add_argument('--manpage-urls', required=True)
716 p.add_argument('--revision', required=True)
717 p.add_argument('--generator', default='nixos-render-docs')
718 p.add_argument('--stylesheet', default=[], action='append')
719 p.add_argument('--script', default=[], action='append')
720 p.add_argument('--toc-depth', default=1, type=int)
721 p.add_argument('--chunk-toc-depth', default=1, type=int)
722 p.add_argument('--section-toc-depth', default=0, type=int)
723 p.add_argument('--media-dir', default="media", type=Path)
724 p.add_argument('infile', type=Path)
725 p.add_argument('outfile', type=Path)
726
727def _run_cli_db(args: argparse.Namespace) -> None:
728 with open(args.manpage_urls, 'r') as manpage_urls:
729 md = DocBookConverter(json.load(manpage_urls), args.revision)
730 md.convert(args.infile, args.outfile)
731
732def _run_cli_html(args: argparse.Namespace) -> None:
733 with open(args.manpage_urls, 'r') as manpage_urls:
734 md = HTMLConverter(
735 args.revision,
736 HTMLParameters(args.generator, args.stylesheet, args.script, args.toc_depth,
737 args.chunk_toc_depth, args.section_toc_depth, args.media_dir),
738 json.load(manpage_urls))
739 md.convert(args.infile, args.outfile)
740
741def build_cli(p: argparse.ArgumentParser) -> None:
742 formats = p.add_subparsers(dest='format', required=True)
743 _build_cli_db(formats.add_parser('docbook'))
744 _build_cli_html(formats.add_parser('html'))
745
746def run_cli(args: argparse.Namespace) -> None:
747 if args.format == 'docbook':
748 _run_cli_db(args)
749 elif args.format == 'html':
750 _run_cli_html(args)
751 else:
752 raise RuntimeError('format not hooked up', args)