1from __future__ import annotations
2
3import dataclasses as dc
4import html
5import itertools
6
7from typing import cast, get_args, Iterable, Literal, Sequence
8
9from markdown_it.token import Token
10
11from .utils import Freezeable
12
13# FragmentType is used to restrict structural include blocks.
14FragmentType = Literal['preface', 'part', 'chapter', 'section', 'appendix']
15
16# in the TOC all fragments are allowed, plus the all-encompassing book.
17TocEntryType = Literal['book', 'preface', 'part', 'chapter', 'section', 'appendix', 'example', 'figure']
18
19def is_include(token: Token) -> bool:
20 return token.type == "fence" and token.info.startswith("{=include=} ")
21
22# toplevel file must contain only the title headings and includes, anything else
23# would cause strange rendering.
24def _check_book_structure(tokens: Sequence[Token]) -> None:
25 for token in tokens[6:]:
26 if not is_include(token):
27 assert token.map
28 raise RuntimeError(f"unexpected content in line {token.map[0] + 1}, "
29 "expected structural include")
30
31# much like books, parts may not contain headings other than their title heading.
32# this is a limitation of the current renderers and TOC generators that do not handle
33# this case well even though it is supported in docbook (and probably supportable
34# anywhere else).
35def _check_part_structure(tokens: Sequence[Token]) -> None:
36 _check_fragment_structure(tokens)
37 for token in tokens[3:]:
38 if token.type == 'heading_open':
39 assert token.map
40 raise RuntimeError(f"unexpected heading in line {token.map[0] + 1}")
41
42# two include blocks must either be adjacent or separated by a heading, otherwise
43# we cannot generate a correct TOC (since there'd be nothing to link to between
44# the two includes).
45def _check_fragment_structure(tokens: Sequence[Token]) -> None:
46 for i, token in enumerate(tokens):
47 if is_include(token) \
48 and i + 1 < len(tokens) \
49 and not (is_include(tokens[i + 1]) or tokens[i + 1].type == 'heading_open'):
50 assert token.map
51 raise RuntimeError(f"unexpected content in line {token.map[0] + 1}, "
52 "expected heading or structural include")
53
54def check_structure(kind: TocEntryType, tokens: Sequence[Token]) -> None:
55 wanted = { 'h1': 'title' }
56 wanted |= { 'h2': 'subtitle' } if kind == 'book' else {}
57 for (i, (tag, role)) in enumerate(wanted.items()):
58 if len(tokens) < 3 * (i + 1):
59 raise RuntimeError(f"missing {role} ({tag}) heading")
60 token = tokens[3 * i]
61 if token.type != 'heading_open' or token.tag != tag:
62 assert token.map
63 raise RuntimeError(f"expected {role} ({tag}) heading in line {token.map[0] + 1}", token)
64 for t in tokens[3 * len(wanted):]:
65 if t.type != 'heading_open' or not (role := wanted.get(t.tag, '')):
66 continue
67 assert t.map
68 raise RuntimeError(
69 f"only one {role} heading ({t.markup} [text...]) allowed per "
70 f"{kind}, but found a second in line {t.map[0] + 1}. "
71 "please remove all such headings except the first or demote the subsequent headings.",
72 t)
73
74 last_heading_level = 0
75 for token in tokens:
76 if token.type != 'heading_open':
77 continue
78
79 # book subtitle headings do not need an id, only book title headings do.
80 # every other headings needs one too. we need this to build a TOC and to
81 # provide stable links if the manual changes shape.
82 if 'id' not in token.attrs and (kind != 'book' or token.tag != 'h2'):
83 assert token.map
84 raise RuntimeError(f"heading in line {token.map[0] + 1} does not have an id")
85
86 level = int(token.tag[1:]) # because tag = h1..h6
87 if level > last_heading_level + 1:
88 assert token.map
89 raise RuntimeError(f"heading in line {token.map[0] + 1} skips one or more heading levels, "
90 "which is currently not allowed")
91 last_heading_level = level
92
93 if kind == 'book':
94 _check_book_structure(tokens)
95 elif kind == 'part':
96 _check_part_structure(tokens)
97 else:
98 _check_fragment_structure(tokens)
99
100@dc.dataclass(frozen=True)
101class XrefTarget:
102 id: str
103 """link label for `[](#local-references)`"""
104 title_html: str
105 """toc label"""
106 toc_html: str | None
107 """text for `<title>` tags and `title="..."` attributes"""
108 title: str | None
109 """path to file that contains the anchor"""
110 path: str
111 """whether to drop the `#anchor` from links when expanding xrefs"""
112 drop_fragment: bool = False
113 """whether to drop the `path.html` from links when expanding xrefs.
114 mostly useful for docbook compatibility"""
115 drop_target: bool = False
116
117 def href(self) -> str:
118 path = "" if self.drop_target else html.escape(self.path, True)
119 return path if self.drop_fragment else f"{path}#{html.escape(self.id, True)}"
120
121@dc.dataclass
122class TocEntry(Freezeable):
123 kind: TocEntryType
124 target: XrefTarget
125 parent: TocEntry | None = None
126 prev: TocEntry | None = None
127 next: TocEntry | None = None
128 children: list[TocEntry] = dc.field(default_factory=list)
129 starts_new_chunk: bool = False
130 examples: list[TocEntry] = dc.field(default_factory=list)
131 figures: list[TocEntry] = dc.field(default_factory=list)
132
133 @property
134 def root(self) -> TocEntry:
135 return self.parent.root if self.parent else self
136
137 @classmethod
138 def of(cls, token: Token) -> TocEntry:
139 entry = token.meta.get('TocEntry')
140 if not isinstance(entry, TocEntry):
141 raise RuntimeError('requested toc entry, none found', token)
142 return entry
143
144 @classmethod
145 def collect_and_link(cls, xrefs: dict[str, XrefTarget], tokens: Sequence[Token]) -> TocEntry:
146 entries, examples, figures = cls._collect_entries(xrefs, tokens, 'book')
147
148 def flatten_with_parent(this: TocEntry, parent: TocEntry | None) -> Iterable[TocEntry]:
149 this.parent = parent
150 return itertools.chain([this], *[ flatten_with_parent(c, this) for c in this.children ])
151
152 flat = list(flatten_with_parent(entries, None))
153 prev = flat[0]
154 prev.starts_new_chunk = True
155 paths_seen = set([prev.target.path])
156 for c in flat[1:]:
157 if prev.target.path != c.target.path and c.target.path not in paths_seen:
158 c.starts_new_chunk = True
159 c.prev, prev.next = prev, c
160 prev = c
161 paths_seen.add(c.target.path)
162
163 flat[0].examples = examples
164 flat[0].figures = figures
165
166 for c in flat:
167 c.freeze()
168
169 return entries
170
171 @classmethod
172 def _collect_entries(cls, xrefs: dict[str, XrefTarget], tokens: Sequence[Token],
173 kind: TocEntryType) -> tuple[TocEntry, list[TocEntry], list[TocEntry]]:
174 # we assume that check_structure has been run recursively over the entire input.
175 # list contains (tag, entry) pairs that will collapse to a single entry for
176 # the full sequence.
177 entries: list[tuple[str, TocEntry]] = []
178 examples: list[TocEntry] = []
179 figures: list[TocEntry] = []
180 for token in tokens:
181 if token.type.startswith('included_') and (included := token.meta.get('included')):
182 fragment_type_str = token.type[9:].removesuffix('s')
183 assert fragment_type_str in get_args(TocEntryType)
184 fragment_type = cast(TocEntryType, fragment_type_str)
185 for fragment, _path in included:
186 subentries, subexamples, subfigures = cls._collect_entries(xrefs, fragment, fragment_type)
187 entries[-1][1].children.append(subentries)
188 examples += subexamples
189 figures += subfigures
190 elif token.type == 'heading_open' and (id := cast(str, token.attrs.get('id', ''))):
191 while len(entries) > 1 and entries[-1][0] >= token.tag:
192 entries[-2][1].children.append(entries.pop()[1])
193 entries.append((token.tag,
194 TocEntry(kind if token.tag == 'h1' else 'section', xrefs[id])))
195 token.meta['TocEntry'] = entries[-1][1]
196 elif token.type == 'example_open' and (id := cast(str, token.attrs.get('id', ''))):
197 examples.append(TocEntry('example', xrefs[id]))
198 elif token.type == 'figure_open' and (id := cast(str, token.attrs.get('id', ''))):
199 figures.append(TocEntry('figure', xrefs[id]))
200
201 while len(entries) > 1:
202 entries[-2][1].children.append(entries.pop()[1])
203 return (entries[0][1], examples, figures)