at 24.05-pre 8.8 kB view raw
1from __future__ import annotations 2 3import dataclasses as dc 4import html 5import itertools 6 7from typing import cast, get_args, Iterable, Literal, Sequence 8 9from markdown_it.token import Token 10 11from .utils import Freezeable 12 13# FragmentType is used to restrict structural include blocks. 14FragmentType = Literal['preface', 'part', 'chapter', 'section', 'appendix'] 15 16# in the TOC all fragments are allowed, plus the all-encompassing book. 17TocEntryType = Literal['book', 'preface', 'part', 'chapter', 'section', 'appendix', 'example', 'figure'] 18 19def is_include(token: Token) -> bool: 20 return token.type == "fence" and token.info.startswith("{=include=} ") 21 22# toplevel file must contain only the title headings and includes, anything else 23# would cause strange rendering. 24def _check_book_structure(tokens: Sequence[Token]) -> None: 25 for token in tokens[6:]: 26 if not is_include(token): 27 assert token.map 28 raise RuntimeError(f"unexpected content in line {token.map[0] + 1}, " 29 "expected structural include") 30 31# much like books, parts may not contain headings other than their title heading. 32# this is a limitation of the current renderers and TOC generators that do not handle 33# this case well even though it is supported in docbook (and probably supportable 34# anywhere else). 35def _check_part_structure(tokens: Sequence[Token]) -> None: 36 _check_fragment_structure(tokens) 37 for token in tokens[3:]: 38 if token.type == 'heading_open': 39 assert token.map 40 raise RuntimeError(f"unexpected heading in line {token.map[0] + 1}") 41 42# two include blocks must either be adjacent or separated by a heading, otherwise 43# we cannot generate a correct TOC (since there'd be nothing to link to between 44# the two includes). 45def _check_fragment_structure(tokens: Sequence[Token]) -> None: 46 for i, token in enumerate(tokens): 47 if is_include(token) \ 48 and i + 1 < len(tokens) \ 49 and not (is_include(tokens[i + 1]) or tokens[i + 1].type == 'heading_open'): 50 assert token.map 51 raise RuntimeError(f"unexpected content in line {token.map[0] + 1}, " 52 "expected heading or structural include") 53 54def check_structure(kind: TocEntryType, tokens: Sequence[Token]) -> None: 55 wanted = { 'h1': 'title' } 56 wanted |= { 'h2': 'subtitle' } if kind == 'book' else {} 57 for (i, (tag, role)) in enumerate(wanted.items()): 58 if len(tokens) < 3 * (i + 1): 59 raise RuntimeError(f"missing {role} ({tag}) heading") 60 token = tokens[3 * i] 61 if token.type != 'heading_open' or token.tag != tag: 62 assert token.map 63 raise RuntimeError(f"expected {role} ({tag}) heading in line {token.map[0] + 1}", token) 64 for t in tokens[3 * len(wanted):]: 65 if t.type != 'heading_open' or not (role := wanted.get(t.tag, '')): 66 continue 67 assert t.map 68 raise RuntimeError( 69 f"only one {role} heading ({t.markup} [text...]) allowed per " 70 f"{kind}, but found a second in line {t.map[0] + 1}. " 71 "please remove all such headings except the first or demote the subsequent headings.", 72 t) 73 74 last_heading_level = 0 75 for token in tokens: 76 if token.type != 'heading_open': 77 continue 78 79 # book subtitle headings do not need an id, only book title headings do. 80 # every other headings needs one too. we need this to build a TOC and to 81 # provide stable links if the manual changes shape. 82 if 'id' not in token.attrs and (kind != 'book' or token.tag != 'h2'): 83 assert token.map 84 raise RuntimeError(f"heading in line {token.map[0] + 1} does not have an id") 85 86 level = int(token.tag[1:]) # because tag = h1..h6 87 if level > last_heading_level + 1: 88 assert token.map 89 raise RuntimeError(f"heading in line {token.map[0] + 1} skips one or more heading levels, " 90 "which is currently not allowed") 91 last_heading_level = level 92 93 if kind == 'book': 94 _check_book_structure(tokens) 95 elif kind == 'part': 96 _check_part_structure(tokens) 97 else: 98 _check_fragment_structure(tokens) 99 100@dc.dataclass(frozen=True) 101class XrefTarget: 102 id: str 103 """link label for `[](#local-references)`""" 104 title_html: str 105 """toc label""" 106 toc_html: str | None 107 """text for `<title>` tags and `title="..."` attributes""" 108 title: str | None 109 """path to file that contains the anchor""" 110 path: str 111 """whether to drop the `#anchor` from links when expanding xrefs""" 112 drop_fragment: bool = False 113 """whether to drop the `path.html` from links when expanding xrefs. 114 mostly useful for docbook compatibility""" 115 drop_target: bool = False 116 117 def href(self) -> str: 118 path = "" if self.drop_target else html.escape(self.path, True) 119 return path if self.drop_fragment else f"{path}#{html.escape(self.id, True)}" 120 121@dc.dataclass 122class TocEntry(Freezeable): 123 kind: TocEntryType 124 target: XrefTarget 125 parent: TocEntry | None = None 126 prev: TocEntry | None = None 127 next: TocEntry | None = None 128 children: list[TocEntry] = dc.field(default_factory=list) 129 starts_new_chunk: bool = False 130 examples: list[TocEntry] = dc.field(default_factory=list) 131 figures: list[TocEntry] = dc.field(default_factory=list) 132 133 @property 134 def root(self) -> TocEntry: 135 return self.parent.root if self.parent else self 136 137 @classmethod 138 def of(cls, token: Token) -> TocEntry: 139 entry = token.meta.get('TocEntry') 140 if not isinstance(entry, TocEntry): 141 raise RuntimeError('requested toc entry, none found', token) 142 return entry 143 144 @classmethod 145 def collect_and_link(cls, xrefs: dict[str, XrefTarget], tokens: Sequence[Token]) -> TocEntry: 146 entries, examples, figures = cls._collect_entries(xrefs, tokens, 'book') 147 148 def flatten_with_parent(this: TocEntry, parent: TocEntry | None) -> Iterable[TocEntry]: 149 this.parent = parent 150 return itertools.chain([this], *[ flatten_with_parent(c, this) for c in this.children ]) 151 152 flat = list(flatten_with_parent(entries, None)) 153 prev = flat[0] 154 prev.starts_new_chunk = True 155 paths_seen = set([prev.target.path]) 156 for c in flat[1:]: 157 if prev.target.path != c.target.path and c.target.path not in paths_seen: 158 c.starts_new_chunk = True 159 c.prev, prev.next = prev, c 160 prev = c 161 paths_seen.add(c.target.path) 162 163 flat[0].examples = examples 164 flat[0].figures = figures 165 166 for c in flat: 167 c.freeze() 168 169 return entries 170 171 @classmethod 172 def _collect_entries(cls, xrefs: dict[str, XrefTarget], tokens: Sequence[Token], 173 kind: TocEntryType) -> tuple[TocEntry, list[TocEntry], list[TocEntry]]: 174 # we assume that check_structure has been run recursively over the entire input. 175 # list contains (tag, entry) pairs that will collapse to a single entry for 176 # the full sequence. 177 entries: list[tuple[str, TocEntry]] = [] 178 examples: list[TocEntry] = [] 179 figures: list[TocEntry] = [] 180 for token in tokens: 181 if token.type.startswith('included_') and (included := token.meta.get('included')): 182 fragment_type_str = token.type[9:].removesuffix('s') 183 assert fragment_type_str in get_args(TocEntryType) 184 fragment_type = cast(TocEntryType, fragment_type_str) 185 for fragment, _path in included: 186 subentries, subexamples, subfigures = cls._collect_entries(xrefs, fragment, fragment_type) 187 entries[-1][1].children.append(subentries) 188 examples += subexamples 189 figures += subfigures 190 elif token.type == 'heading_open' and (id := cast(str, token.attrs.get('id', ''))): 191 while len(entries) > 1 and entries[-1][0] >= token.tag: 192 entries[-2][1].children.append(entries.pop()[1]) 193 entries.append((token.tag, 194 TocEntry(kind if token.tag == 'h1' else 'section', xrefs[id]))) 195 token.meta['TocEntry'] = entries[-1][1] 196 elif token.type == 'example_open' and (id := cast(str, token.attrs.get('id', ''))): 197 examples.append(TocEntry('example', xrefs[id])) 198 elif token.type == 'figure_open' and (id := cast(str, token.attrs.get('id', ''))): 199 figures.append(TocEntry('figure', xrefs[id])) 200 201 while len(entries) > 1: 202 entries[-2][1].children.append(entries.pop()[1]) 203 return (entries[0][1], examples, figures)