1import argparse 2import hashlib 3import html 4import json 5import re 6import xml.sax.saxutils as xml 7 8from abc import abstractmethod 9from collections.abc import Mapping, Sequence 10from pathlib import Path 11from typing import Any, cast, ClassVar, Generic, get_args, NamedTuple 12 13from markdown_it.token import Token 14 15from . import md, options 16from .docbook import DocBookRenderer, Heading, make_xml_id 17from .html import HTMLRenderer, UnresolvedXrefError 18from .manual_structure import check_structure, FragmentType, is_include, TocEntry, TocEntryType, XrefTarget 19from .md import Converter, Renderer 20 21class BaseConverter(Converter[md.TR], Generic[md.TR]): 22 # per-converter configuration for ns:arg=value arguments to include blocks, following 23 # the include type. html converters need something like this to support chunking, or 24 # another external method like the chunktocs docbook uses (but block options seem like 25 # a much nicer of doing this). 26 INCLUDE_ARGS_NS: ClassVar[str] 27 INCLUDE_FRAGMENT_ALLOWED_ARGS: ClassVar[set[str]] = set() 28 INCLUDE_OPTIONS_ALLOWED_ARGS: ClassVar[set[str]] = set() 29 30 _base_paths: list[Path] 31 _current_type: list[TocEntryType] 32 33 def convert(self, infile: Path, outfile: Path) -> None: 34 self._base_paths = [ infile ] 35 self._current_type = ['book'] 36 try: 37 tokens = self._parse(infile.read_text()) 38 self._postprocess(infile, outfile, tokens) 39 converted = self._renderer.render(tokens) 40 outfile.write_text(converted) 41 except Exception as e: 42 raise RuntimeError(f"failed to render manual {infile}") from e 43 44 def _postprocess(self, infile: Path, outfile: Path, tokens: Sequence[Token]) -> None: 45 pass 46 47 def _parse(self, src: str) -> list[Token]: 48 tokens = super()._parse(src) 49 check_structure(self._current_type[-1], tokens) 50 for token in tokens: 51 if not is_include(token): 52 continue 53 directive = token.info[12:].split() 54 if not directive: 55 continue 56 args = { k: v for k, _sep, v in map(lambda s: s.partition('='), directive[1:]) } 57 typ = directive[0] 58 if typ == 'options': 59 token.type = 'included_options' 60 self._process_include_args(token, args, self.INCLUDE_OPTIONS_ALLOWED_ARGS) 61 self._parse_options(token, args) 62 else: 63 fragment_type = typ.removesuffix('s') 64 if fragment_type not in get_args(FragmentType): 65 raise RuntimeError(f"unsupported structural include type '{typ}'") 66 self._current_type.append(cast(FragmentType, fragment_type)) 67 token.type = 'included_' + typ 68 self._process_include_args(token, args, self.INCLUDE_FRAGMENT_ALLOWED_ARGS) 69 self._parse_included_blocks(token, args) 70 self._current_type.pop() 71 return tokens 72 73 def _process_include_args(self, token: Token, args: dict[str, str], allowed: set[str]) -> None: 74 ns = self.INCLUDE_ARGS_NS + ":" 75 args = { k[len(ns):]: v for k, v in args.items() if k.startswith(ns) } 76 if unknown := set(args.keys()) - allowed: 77 assert token.map 78 raise RuntimeError(f"unrecognized include argument in line {token.map[0] + 1}", unknown) 79 token.meta['include-args'] = args 80 81 def _parse_included_blocks(self, token: Token, block_args: dict[str, str]) -> None: 82 assert token.map 83 included = token.meta['included'] = [] 84 for (lnum, line) in enumerate(token.content.splitlines(), token.map[0] + 2): 85 line = line.strip() 86 path = self._base_paths[-1].parent / line 87 if path in self._base_paths: 88 raise RuntimeError(f"circular include found in line {lnum}") 89 try: 90 self._base_paths.append(path) 91 with open(path, 'r') as f: 92 tokens = self._parse(f.read()) 93 included.append((tokens, path)) 94 self._base_paths.pop() 95 except Exception as e: 96 raise RuntimeError(f"processing included file {path} from line {lnum}") from e 97 98 def _parse_options(self, token: Token, block_args: dict[str, str]) -> None: 99 assert token.map 100 101 items = {} 102 for (lnum, line) in enumerate(token.content.splitlines(), token.map[0] + 2): 103 if len(args := line.split(":", 1)) != 2: 104 raise RuntimeError(f"options directive with no argument in line {lnum}") 105 (k, v) = (args[0].strip(), args[1].strip()) 106 if k in items: 107 raise RuntimeError(f"duplicate options directive {k} in line {lnum}") 108 items[k] = v 109 try: 110 id_prefix = items.pop('id-prefix') 111 varlist_id = items.pop('list-id') 112 source = items.pop('source') 113 except KeyError as e: 114 raise RuntimeError(f"options directive {e} missing in block at line {token.map[0] + 1}") 115 if items.keys(): 116 raise RuntimeError( 117 f"unsupported options directives in block at line {token.map[0] + 1}", 118 " ".join(items.keys())) 119 120 try: 121 with open(self._base_paths[-1].parent / source, 'r') as f: 122 token.meta['id-prefix'] = id_prefix 123 token.meta['list-id'] = varlist_id 124 token.meta['source'] = json.load(f) 125 except Exception as e: 126 raise RuntimeError(f"processing options block in line {token.map[0] + 1}") from e 127 128class RendererMixin(Renderer): 129 _toplevel_tag: str 130 _revision: str 131 132 def __init__(self, toplevel_tag: str, revision: str, *args: Any, **kwargs: Any): 133 super().__init__(*args, **kwargs) 134 self._toplevel_tag = toplevel_tag 135 self._revision = revision 136 self.rules |= { 137 'included_sections': lambda *args: self._included_thing("section", *args), 138 'included_chapters': lambda *args: self._included_thing("chapter", *args), 139 'included_preface': lambda *args: self._included_thing("preface", *args), 140 'included_parts': lambda *args: self._included_thing("part", *args), 141 'included_appendix': lambda *args: self._included_thing("appendix", *args), 142 'included_options': self.included_options, 143 } 144 145 def render(self, tokens: Sequence[Token]) -> str: 146 # books get special handling because they have *two* title tags. doing this with 147 # generic code is more complicated than it's worth. the checks above have verified 148 # that both titles actually exist. 149 if self._toplevel_tag == 'book': 150 return self._render_book(tokens) 151 152 return super().render(tokens) 153 154 @abstractmethod 155 def _render_book(self, tokens: Sequence[Token]) -> str: 156 raise NotImplementedError() 157 158 @abstractmethod 159 def _included_thing(self, tag: str, token: Token, tokens: Sequence[Token], i: int) -> str: 160 raise NotImplementedError() 161 162 @abstractmethod 163 def included_options(self, token: Token, tokens: Sequence[Token], i: int) -> str: 164 raise NotImplementedError() 165 166class ManualDocBookRenderer(RendererMixin, DocBookRenderer): 167 def __init__(self, toplevel_tag: str, revision: str, manpage_urls: Mapping[str, str]): 168 super().__init__(toplevel_tag, revision, manpage_urls) 169 170 def _render_book(self, tokens: Sequence[Token]) -> str: 171 assert tokens[1].children 172 assert tokens[4].children 173 if (maybe_id := cast(str, tokens[0].attrs.get('id', ""))): 174 maybe_id = "xml:id=" + xml.quoteattr(maybe_id) 175 return (f'<book xmlns="http://docbook.org/ns/docbook"' 176 f' xmlns:xlink="http://www.w3.org/1999/xlink"' 177 f' {maybe_id} version="5.0">' 178 f' <title>{self.renderInline(tokens[1].children)}</title>' 179 f' <subtitle>{self.renderInline(tokens[4].children)}</subtitle>' 180 f' {super(DocBookRenderer, self).render(tokens[6:])}' 181 f'</book>') 182 183 def _heading_tag(self, token: Token, tokens: Sequence[Token], i: int) -> tuple[str, dict[str, str]]: 184 (tag, attrs) = super()._heading_tag(token, tokens, i) 185 # render() has already verified that we don't have supernumerary headings and since the 186 # book tag is handled specially we can leave the check this simple 187 if token.tag != 'h1': 188 return (tag, attrs) 189 return (self._toplevel_tag, attrs | { 190 'xmlns': "http://docbook.org/ns/docbook", 191 'xmlns:xlink': "http://www.w3.org/1999/xlink", 192 }) 193 194 def _included_thing(self, tag: str, token: Token, tokens: Sequence[Token], i: int) -> str: 195 result = [] 196 # close existing partintro. the generic render doesn't really need this because 197 # it doesn't have a concept of structure in the way the manual does. 198 if self._headings and self._headings[-1] == Heading('part', 1): 199 result.append("</partintro>") 200 self._headings[-1] = self._headings[-1]._replace(partintro_closed=True) 201 # must nest properly for structural includes. this requires saving at least 202 # the headings stack, but creating new renderers is cheap and much easier. 203 r = ManualDocBookRenderer(tag, self._revision, self._manpage_urls) 204 for (included, path) in token.meta['included']: 205 try: 206 result.append(r.render(included)) 207 except Exception as e: 208 raise RuntimeError(f"rendering {path}") from e 209 return "".join(result) 210 def included_options(self, token: Token, tokens: Sequence[Token], i: int) -> str: 211 conv = options.DocBookConverter(self._manpage_urls, self._revision, 'fragment', 212 token.meta['list-id'], token.meta['id-prefix']) 213 conv.add_options(token.meta['source']) 214 return conv.finalize(fragment=True) 215 216 # TODO minimize docbook diffs with existing conversions. remove soon. 217 def paragraph_open(self, token: Token, tokens: Sequence[Token], i: int) -> str: 218 return super().paragraph_open(token, tokens, i) + "\n " 219 def paragraph_close(self, token: Token, tokens: Sequence[Token], i: int) -> str: 220 return "\n" + super().paragraph_close(token, tokens, i) 221 def code_block(self, token: Token, tokens: Sequence[Token], i: int) -> str: 222 return f"<programlisting>\n{xml.escape(token.content)}</programlisting>" 223 def fence(self, token: Token, tokens: Sequence[Token], i: int) -> str: 224 info = f" language={xml.quoteattr(token.info)}" if token.info != "" else "" 225 return f"<programlisting{info}>\n{xml.escape(token.content)}</programlisting>" 226 227class DocBookConverter(BaseConverter[ManualDocBookRenderer]): 228 INCLUDE_ARGS_NS = "docbook" 229 230 def __init__(self, manpage_urls: Mapping[str, str], revision: str): 231 super().__init__() 232 self._renderer = ManualDocBookRenderer('book', revision, manpage_urls) 233 234 235class HTMLParameters(NamedTuple): 236 generator: str 237 stylesheets: Sequence[str] 238 scripts: Sequence[str] 239 # number of levels in the rendered table of contents. tables are prepended to 240 # the content they apply to (entire document / document chunk / top-level section 241 # of a chapter), setting a depth of 0 omits the respective table. 242 toc_depth: int 243 chunk_toc_depth: int 244 section_toc_depth: int 245 media_dir: Path 246 247class ManualHTMLRenderer(RendererMixin, HTMLRenderer): 248 _base_path: Path 249 _in_dir: Path 250 _html_params: HTMLParameters 251 252 def __init__(self, toplevel_tag: str, revision: str, html_params: HTMLParameters, 253 manpage_urls: Mapping[str, str], xref_targets: dict[str, XrefTarget], 254 in_dir: Path, base_path: Path): 255 super().__init__(toplevel_tag, revision, manpage_urls, xref_targets) 256 self._in_dir = in_dir 257 self._base_path = base_path.absolute() 258 self._html_params = html_params 259 260 def _pull_image(self, src: str) -> str: 261 src_path = Path(src) 262 content = (self._in_dir / src_path).read_bytes() 263 # images may be used more than once, but we want to store them only once and 264 # in an easily accessible (ie, not input-file-path-dependent) location without 265 # having to maintain a mapping structure. hashing the file and using the hash 266 # as both the path of the final image provides both. 267 content_hash = hashlib.sha3_256(content).hexdigest() 268 target_name = f"{content_hash}{src_path.suffix}" 269 target_path = self._base_path / self._html_params.media_dir / target_name 270 target_path.write_bytes(content) 271 return f"./{self._html_params.media_dir}/{target_name}" 272 273 def _push(self, tag: str, hlevel_offset: int) -> Any: 274 result = (self._toplevel_tag, self._headings, self._attrspans, self._hlevel_offset, self._in_dir) 275 self._hlevel_offset += hlevel_offset 276 self._toplevel_tag, self._headings, self._attrspans = tag, [], [] 277 return result 278 279 def _pop(self, state: Any) -> None: 280 (self._toplevel_tag, self._headings, self._attrspans, self._hlevel_offset, self._in_dir) = state 281 282 def _render_book(self, tokens: Sequence[Token]) -> str: 283 assert tokens[4].children 284 title_id = cast(str, tokens[0].attrs.get('id', "")) 285 title = self._xref_targets[title_id].title 286 # subtitles don't have IDs, so we can't use xrefs to get them 287 subtitle = self.renderInline(tokens[4].children) 288 289 toc = TocEntry.of(tokens[0]) 290 return "\n".join([ 291 self._file_header(toc), 292 ' <div class="book">', 293 ' <div class="titlepage">', 294 ' <div>', 295 f' <div><h1 class="title"><a id="{html.escape(title_id, True)}"></a>{title}</h1></div>', 296 f' <div><h2 class="subtitle">{subtitle}</h2></div>', 297 ' </div>', 298 " <hr />", 299 ' </div>', 300 self._build_toc(tokens, 0), 301 super(HTMLRenderer, self).render(tokens[6:]), 302 ' </div>', 303 self._file_footer(toc), 304 ]) 305 306 def _file_header(self, toc: TocEntry) -> str: 307 prev_link, up_link, next_link = "", "", "" 308 prev_a, next_a, parent_title = "", "", "&nbsp;" 309 nav_html = "" 310 home = toc.root 311 if toc.prev: 312 prev_link = f'<link rel="prev" href="{toc.prev.target.href()}" title="{toc.prev.target.title}" />' 313 prev_a = f'<a accesskey="p" href="{toc.prev.target.href()}">Prev</a>' 314 if toc.parent: 315 up_link = ( 316 f'<link rel="up" href="{toc.parent.target.href()}" ' 317 f'title="{toc.parent.target.title}" />' 318 ) 319 if (part := toc.parent) and part.kind != 'book': 320 assert part.target.title 321 parent_title = part.target.title 322 if toc.next: 323 next_link = f'<link rel="next" href="{toc.next.target.href()}" title="{toc.next.target.title}" />' 324 next_a = f'<a accesskey="n" href="{toc.next.target.href()}">Next</a>' 325 if toc.prev or toc.parent or toc.next: 326 nav_html = "\n".join([ 327 ' <div class="navheader">', 328 ' <table width="100%" summary="Navigation header">', 329 ' <tr>', 330 f' <th colspan="3" align="center">{toc.target.title}</th>', 331 ' </tr>', 332 ' <tr>', 333 f' <td width="20%" align="left">{prev_a}&nbsp;</td>', 334 f' <th width="60%" align="center">{parent_title}</th>', 335 f' <td width="20%" align="right">&nbsp;{next_a}</td>', 336 ' </tr>', 337 ' </table>', 338 ' <hr />', 339 ' </div>', 340 ]) 341 return "\n".join([ 342 '<?xml version="1.0" encoding="utf-8" standalone="no"?>', 343 '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"', 344 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">', 345 '<html xmlns="http://www.w3.org/1999/xhtml">', 346 ' <head>', 347 ' <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />', 348 f' <title>{toc.target.title}</title>', 349 "".join((f'<link rel="stylesheet" type="text/css" href="{html.escape(style, True)}" />' 350 for style in self._html_params.stylesheets)), 351 "".join((f'<script src="{html.escape(script, True)}" type="text/javascript"></script>' 352 for script in self._html_params.scripts)), 353 f' <meta name="generator" content="{html.escape(self._html_params.generator, True)}" />', 354 f' <link rel="home" href="{home.target.href()}" title="{home.target.title}" />' if home.target.href() else "", 355 f' {up_link}{prev_link}{next_link}', 356 ' </head>', 357 ' <body>', 358 nav_html, 359 ]) 360 361 def _file_footer(self, toc: TocEntry) -> str: 362 # prev, next = self._get_prev_and_next() 363 prev_a, up_a, home_a, next_a = "", "&nbsp;", "&nbsp;", "" 364 prev_text, up_text, next_text = "", "", "" 365 nav_html = "" 366 home = toc.root 367 if toc.prev: 368 prev_a = f'<a accesskey="p" href="{toc.prev.target.href()}">Prev</a>' 369 assert toc.prev.target.title 370 prev_text = toc.prev.target.title 371 if toc.parent: 372 home_a = f'<a accesskey="h" href="{home.target.href()}">Home</a>' 373 if toc.parent != home: 374 up_a = f'<a accesskey="u" href="{toc.parent.target.href()}">Up</a>' 375 if toc.next: 376 next_a = f'<a accesskey="n" href="{toc.next.target.href()}">Next</a>' 377 assert toc.next.target.title 378 next_text = toc.next.target.title 379 if toc.prev or toc.parent or toc.next: 380 nav_html = "\n".join([ 381 ' <div class="navfooter">', 382 ' <hr />', 383 ' <table width="100%" summary="Navigation footer">', 384 ' <tr>', 385 f' <td width="40%" align="left">{prev_a}&nbsp;</td>', 386 f' <td width="20%" align="center">{up_a}</td>', 387 f' <td width="40%" align="right">&nbsp;{next_a}</td>', 388 ' </tr>', 389 ' <tr>', 390 f' <td width="40%" align="left" valign="top">{prev_text}&nbsp;</td>', 391 f' <td width="20%" align="center">{home_a}</td>', 392 f' <td width="40%" align="right" valign="top">&nbsp;{next_text}</td>', 393 ' </tr>', 394 ' </table>', 395 ' </div>', 396 ]) 397 return "\n".join([ 398 nav_html, 399 ' </body>', 400 '</html>', 401 ]) 402 403 def _heading_tag(self, token: Token, tokens: Sequence[Token], i: int) -> str: 404 if token.tag == 'h1': 405 return self._toplevel_tag 406 return super()._heading_tag(token, tokens, i) 407 def _build_toc(self, tokens: Sequence[Token], i: int) -> str: 408 toc = TocEntry.of(tokens[i]) 409 if toc.kind == 'section' and self._html_params.section_toc_depth < 1: 410 return "" 411 def walk_and_emit(toc: TocEntry, depth: int) -> list[str]: 412 if depth <= 0: 413 return [] 414 result = [] 415 for child in toc.children: 416 result.append( 417 f'<dt>' 418 f' <span class="{html.escape(child.kind, True)}">' 419 f' <a href="{child.target.href()}">{child.target.toc_html}</a>' 420 f' </span>' 421 f'</dt>' 422 ) 423 # we want to look straight through parts because docbook-xsl does too, but it 424 # also makes for more uesful top-level tocs. 425 next_level = walk_and_emit(child, depth - (0 if child.kind == 'part' else 1)) 426 if next_level: 427 result.append(f'<dd><dl>{"".join(next_level)}</dl></dd>') 428 return result 429 def build_list(kind: str, id: str, lst: Sequence[TocEntry]) -> str: 430 if not lst: 431 return "" 432 entries = [ 433 f'<dt>{i}. <a href="{e.target.href()}">{e.target.toc_html}</a></dt>' 434 for i, e in enumerate(lst, start=1) 435 ] 436 return ( 437 f'<div class="{id}">' 438 f'<p><strong>List of {kind}</strong></p>' 439 f'<dl>{"".join(entries)}</dl>' 440 '</div>' 441 ) 442 # we don't want to generate the "Title of Contents" header for sections, 443 # docbook doesn't and it's only distracting clutter unless it's the main table. 444 # we also want to generate tocs only for a top-level section (ie, one that is 445 # not itself contained in another section) 446 print_title = toc.kind != 'section' 447 if toc.kind == 'section': 448 if toc.parent and toc.parent.kind == 'section': 449 toc_depth = 0 450 else: 451 toc_depth = self._html_params.section_toc_depth 452 elif toc.starts_new_chunk and toc.kind != 'book': 453 toc_depth = self._html_params.chunk_toc_depth 454 else: 455 toc_depth = self._html_params.toc_depth 456 if not (items := walk_and_emit(toc, toc_depth)): 457 return "" 458 figures = build_list("Figures", "list-of-figures", toc.figures) 459 examples = build_list("Examples", "list-of-examples", toc.examples) 460 return "".join([ 461 f'<div class="toc">', 462 ' <p><strong>Table of Contents</strong></p>' if print_title else "", 463 f' <dl class="toc">' 464 f' {"".join(items)}' 465 f' </dl>' 466 f'</div>' 467 f'{figures}' 468 f'{examples}' 469 ]) 470 471 def _make_hN(self, level: int) -> tuple[str, str]: 472 # for some reason chapters don't increase the hN nesting count in docbook xslts. duplicate 473 # this for consistency. 474 if self._toplevel_tag == 'chapter': 475 level -= 1 476 # TODO docbook compat. these are never useful for us, but not having them breaks manual 477 # compare workflows while docbook is still allowed. 478 style = "" 479 if level + self._hlevel_offset < 3 \ 480 and (self._toplevel_tag == 'section' or (self._toplevel_tag == 'chapter' and level > 0)): 481 style = "clear: both" 482 tag, hstyle = super()._make_hN(max(1, level)) 483 return tag, style 484 485 def _included_thing(self, tag: str, token: Token, tokens: Sequence[Token], i: int) -> str: 486 outer, inner = [], [] 487 # since books have no non-include content the toplevel book wrapper will not count 488 # towards nesting depth. other types will have at least a title+id heading which 489 # *does* count towards the nesting depth. chapters give a -1 to included sections 490 # mirroring the special handing in _make_hN. sigh. 491 hoffset = ( 492 0 if not self._headings 493 else self._headings[-1].level - 1 if self._toplevel_tag == 'chapter' 494 else self._headings[-1].level 495 ) 496 outer.append(self._maybe_close_partintro()) 497 into = token.meta['include-args'].get('into-file') 498 fragments = token.meta['included'] 499 state = self._push(tag, hoffset) 500 if into: 501 toc = TocEntry.of(fragments[0][0][0]) 502 inner.append(self._file_header(toc)) 503 # we do not set _hlevel_offset=0 because docbook doesn't either. 504 else: 505 inner = outer 506 in_dir = self._in_dir 507 for included, path in fragments: 508 try: 509 self._in_dir = (in_dir / path).parent 510 inner.append(self.render(included)) 511 except Exception as e: 512 raise RuntimeError(f"rendering {path}") from e 513 if into: 514 inner.append(self._file_footer(toc)) 515 (self._base_path / into).write_text("".join(inner)) 516 self._pop(state) 517 return "".join(outer) 518 519 def included_options(self, token: Token, tokens: Sequence[Token], i: int) -> str: 520 conv = options.HTMLConverter(self._manpage_urls, self._revision, 521 token.meta['list-id'], token.meta['id-prefix'], 522 self._xref_targets) 523 conv.add_options(token.meta['source']) 524 return conv.finalize() 525 526def _to_base26(n: int) -> str: 527 return (_to_base26(n // 26) if n > 26 else "") + chr(ord("A") + n % 26) 528 529class HTMLConverter(BaseConverter[ManualHTMLRenderer]): 530 INCLUDE_ARGS_NS = "html" 531 INCLUDE_FRAGMENT_ALLOWED_ARGS = { 'into-file' } 532 533 _revision: str 534 _html_params: HTMLParameters 535 _manpage_urls: Mapping[str, str] 536 _xref_targets: dict[str, XrefTarget] 537 _redirection_targets: set[str] 538 _appendix_count: int = 0 539 540 def _next_appendix_id(self) -> str: 541 self._appendix_count += 1 542 return _to_base26(self._appendix_count - 1) 543 544 def __init__(self, revision: str, html_params: HTMLParameters, manpage_urls: Mapping[str, str]): 545 super().__init__() 546 self._revision, self._html_params, self._manpage_urls = revision, html_params, manpage_urls 547 self._xref_targets = {} 548 self._redirection_targets = set() 549 # renderer not set on purpose since it has a dependency on the output path! 550 551 def convert(self, infile: Path, outfile: Path) -> None: 552 self._renderer = ManualHTMLRenderer( 553 'book', self._revision, self._html_params, self._manpage_urls, self._xref_targets, 554 infile.parent, outfile.parent) 555 super().convert(infile, outfile) 556 557 def _parse(self, src: str) -> list[Token]: 558 tokens = super()._parse(src) 559 for token in tokens: 560 if not token.type.startswith('included_') \ 561 or not (into := token.meta['include-args'].get('into-file')): 562 continue 563 assert token.map 564 if len(token.meta['included']) == 0: 565 raise RuntimeError(f"redirection target {into} in line {token.map[0] + 1} is empty!") 566 # we use blender-style //path to denote paths relative to the origin file 567 # (usually index.html). this makes everything a lot easier and clearer. 568 if not into.startswith("//") or '/' in into[2:]: 569 raise RuntimeError("html:into-file must be a relative-to-origin //filename", into) 570 into = token.meta['include-args']['into-file'] = into[2:] 571 if into in self._redirection_targets: 572 raise RuntimeError(f"redirection target {into} in line {token.map[0] + 1} is already in use") 573 self._redirection_targets.add(into) 574 return tokens 575 576 def _number_block(self, block: str, prefix: str, tokens: Sequence[Token], start: int = 1) -> int: 577 title_open, title_close = f'{block}_title_open', f'{block}_title_close' 578 for (i, token) in enumerate(tokens): 579 if token.type == title_open: 580 title = tokens[i + 1] 581 assert title.type == 'inline' and title.children 582 # the prefix is split into two tokens because the xref title_html will want 583 # only the first of the two, but both must be rendered into the example itself. 584 title.children = ( 585 [ 586 Token('text', '', 0, content=f'{prefix} {start}'), 587 Token('text', '', 0, content='') 588 ] + title.children 589 ) 590 start += 1 591 elif token.type.startswith('included_') and token.type != 'included_options': 592 for sub, _path in token.meta['included']: 593 start = self._number_block(block, prefix, sub, start) 594 return start 595 596 # xref | (id, type, heading inlines, file, starts new file) 597 def _collect_ids(self, tokens: Sequence[Token], target_file: str, typ: str, file_changed: bool 598 ) -> list[XrefTarget | tuple[str, str, Token, str, bool]]: 599 result: list[XrefTarget | tuple[str, str, Token, str, bool]] = [] 600 # collect all IDs and their xref substitutions. headings are deferred until everything 601 # has been parsed so we can resolve links in headings. if that's even used anywhere. 602 for (i, bt) in enumerate(tokens): 603 if bt.type == 'heading_open' and (id := cast(str, bt.attrs.get('id', ''))): 604 result.append((id, typ if bt.tag == 'h1' else 'section', tokens[i + 1], target_file, 605 i == 0 and file_changed)) 606 elif bt.type == 'included_options': 607 id_prefix = bt.meta['id-prefix'] 608 for opt in bt.meta['source'].keys(): 609 id = make_xml_id(f"{id_prefix}{opt}") 610 name = html.escape(opt) 611 result.append(XrefTarget(id, f'<code class="option">{name}</code>', name, None, target_file)) 612 elif bt.type.startswith('included_'): 613 sub_file = bt.meta['include-args'].get('into-file', target_file) 614 subtyp = bt.type.removeprefix('included_').removesuffix('s') 615 for si, (sub, _path) in enumerate(bt.meta['included']): 616 result += self._collect_ids(sub, sub_file, subtyp, si == 0 and sub_file != target_file) 617 elif bt.type == 'example_open' and (id := cast(str, bt.attrs.get('id', ''))): 618 result.append((id, 'example', tokens[i + 2], target_file, False)) 619 elif bt.type == 'figure_open' and (id := cast(str, bt.attrs.get('id', ''))): 620 result.append((id, 'figure', tokens[i + 2], target_file, False)) 621 elif bt.type == 'footnote_open' and (id := cast(str, bt.attrs.get('id', ''))): 622 result.append(XrefTarget(id, "???", None, None, target_file)) 623 elif bt.type == 'footnote_ref' and (id := cast(str, bt.attrs.get('id', ''))): 624 result.append(XrefTarget(id, "???", None, None, target_file)) 625 elif bt.type == 'inline': 626 assert bt.children is not None 627 result += self._collect_ids(bt.children, target_file, typ, False) 628 elif id := cast(str, bt.attrs.get('id', '')): 629 # anchors and examples have no titles we could use, but we'll have to put 630 # *something* here to communicate that there's no title. 631 result.append(XrefTarget(id, "???", None, None, target_file)) 632 return result 633 634 def _render_xref(self, id: str, typ: str, inlines: Token, path: str, drop_fragment: bool) -> XrefTarget: 635 assert inlines.children 636 title_html = self._renderer.renderInline(inlines.children) 637 if typ == 'appendix': 638 # NOTE the docbook compat is strong here 639 n = self._next_appendix_id() 640 prefix = f"Appendix\u00A0{n}.\u00A0" 641 # HACK for docbook compat: prefix the title inlines with appendix id if 642 # necessary. the alternative is to mess with titlepage rendering in headings, 643 # which seems just a lot worse than this 644 prefix_tokens = [Token(type='text', tag='', nesting=0, content=prefix)] 645 inlines.children = prefix_tokens + list(inlines.children) 646 title = prefix + title_html 647 toc_html = f"{n}. {title_html}" 648 title_html = f"Appendix&nbsp;{n}" 649 elif typ in ['example', 'figure']: 650 # skip the prepended `{Example,Figure} N. ` from numbering 651 toc_html, title = self._renderer.renderInline(inlines.children[2:]), title_html 652 # xref title wants only the prepended text, sans the trailing colon and space 653 title_html = self._renderer.renderInline(inlines.children[0:1]) 654 else: 655 toc_html, title = title_html, title_html 656 title_html = ( 657 f"<em>{title_html}</em>" 658 if typ == 'chapter' 659 else title_html if typ in [ 'book', 'part' ] 660 else f'the section called “{title_html}' 661 ) 662 return XrefTarget(id, title_html, toc_html, re.sub('<.*?>', '', title), path, drop_fragment) 663 664 def _postprocess(self, infile: Path, outfile: Path, tokens: Sequence[Token]) -> None: 665 self._number_block('example', "Example", tokens) 666 self._number_block('figure', "Figure", tokens) 667 xref_queue = self._collect_ids(tokens, outfile.name, 'book', True) 668 669 failed = False 670 deferred = [] 671 while xref_queue: 672 for item in xref_queue: 673 try: 674 target = item if isinstance(item, XrefTarget) else self._render_xref(*item) 675 except UnresolvedXrefError: 676 if failed: 677 raise 678 deferred.append(item) 679 continue 680 681 if target.id in self._xref_targets: 682 raise RuntimeError(f"found duplicate id #{target.id}") 683 self._xref_targets[target.id] = target 684 if len(deferred) == len(xref_queue): 685 failed = True # do another round and report the first error 686 xref_queue = deferred 687 688 paths_seen = set() 689 for t in self._xref_targets.values(): 690 paths_seen.add(t.path) 691 692 if len(paths_seen) == 1: 693 for (k, t) in self._xref_targets.items(): 694 self._xref_targets[k] = XrefTarget( 695 t.id, 696 t.title_html, 697 t.toc_html, 698 t.title, 699 t.path, 700 t.drop_fragment, 701 drop_target=True 702 ) 703 704 TocEntry.collect_and_link(self._xref_targets, tokens) 705 706 707 708def _build_cli_db(p: argparse.ArgumentParser) -> None: 709 p.add_argument('--manpage-urls', required=True) 710 p.add_argument('--revision', required=True) 711 p.add_argument('infile', type=Path) 712 p.add_argument('outfile', type=Path) 713 714def _build_cli_html(p: argparse.ArgumentParser) -> None: 715 p.add_argument('--manpage-urls', required=True) 716 p.add_argument('--revision', required=True) 717 p.add_argument('--generator', default='nixos-render-docs') 718 p.add_argument('--stylesheet', default=[], action='append') 719 p.add_argument('--script', default=[], action='append') 720 p.add_argument('--toc-depth', default=1, type=int) 721 p.add_argument('--chunk-toc-depth', default=1, type=int) 722 p.add_argument('--section-toc-depth', default=0, type=int) 723 p.add_argument('--media-dir', default="media", type=Path) 724 p.add_argument('infile', type=Path) 725 p.add_argument('outfile', type=Path) 726 727def _run_cli_db(args: argparse.Namespace) -> None: 728 with open(args.manpage_urls, 'r') as manpage_urls: 729 md = DocBookConverter(json.load(manpage_urls), args.revision) 730 md.convert(args.infile, args.outfile) 731 732def _run_cli_html(args: argparse.Namespace) -> None: 733 with open(args.manpage_urls, 'r') as manpage_urls: 734 md = HTMLConverter( 735 args.revision, 736 HTMLParameters(args.generator, args.stylesheet, args.script, args.toc_depth, 737 args.chunk_toc_depth, args.section_toc_depth, args.media_dir), 738 json.load(manpage_urls)) 739 md.convert(args.infile, args.outfile) 740 741def build_cli(p: argparse.ArgumentParser) -> None: 742 formats = p.add_subparsers(dest='format', required=True) 743 _build_cli_db(formats.add_parser('docbook')) 744 _build_cli_html(formats.add_parser('html')) 745 746def run_cli(args: argparse.Namespace) -> None: 747 if args.format == 'docbook': 748 _run_cli_db(args) 749 elif args.format == 'html': 750 _run_cli_html(args) 751 else: 752 raise RuntimeError('format not hooked up', args)