···
mutable document_has_content : bool; (** True if we've emitted content tokens in current document *)
mutable adjacent_value_allowed_at : Position.t option; (** Position where adjacent : is allowed *)
mutable pending_value : bool; (** True if we've emitted a KEY and are waiting for VALUE *)
+
mutable flow_mapping_stack : bool list; (** Stack of whether each flow level is a mapping *)
···
document_has_content = false;
adjacent_value_allowed_at = None;
+
flow_mapping_stack = [];
let of_string s = create (Input.of_string s)
···
if t.allow_simple_key then begin
(* A simple key is required only if we're in a block context,
+
at the current indentation level, AND the current indent needs a block end.
+
This matches saphyr's logic and prevents false positives for values. *)
let required = t.flow_level = 0 &&
+
match t.indent_stack with
+
| { indent; needs_block_end = true; _ } :: _ ->
···
(* Just ! followed by suffix *)
("!", Buffer.contents buf ^ scan_tag_suffix t))
+
(* Validate that tag is followed by whitespace, break, or (in flow) flow indicator *)
+
(match Input.peek t.input with
+
| None -> () (* EOF is ok *)
+
| Some c when Input.is_whitespace c || Input.is_break c -> ()
+
| Some c when t.flow_level > 0 && Input.is_flow_indicator c -> ()
+
| _ -> Error.raise_at start (Invalid_tag "expected whitespace or line break after tag"));
let span = Span.make ~start ~stop:(Input.mark t.input) in
···
let start = Input.mark t.input in
ignore (Input.next t.input); (* consume opening single-quote *)
let buf = Buffer.create 64 in
+
let whitespace = Buffer.create 16 in (* Track trailing whitespace *)
+
let flush_whitespace () =
+
if Buffer.length whitespace > 0 then begin
+
Buffer.add_buffer buf whitespace;
+
Buffer.clear whitespace
match Input.peek t.input with
| None -> Error.raise_at start Unclosed_single_quote
···
(* Check for escaped quote ('') *)
(match Input.peek t.input with
Buffer.add_char buf '\'';
ignore (Input.next t.input);
+
(* End of string - flush any trailing whitespace *)
+
| Some ' ' | Some '\t' ->
+
(* Track whitespace - don't add to buf yet *)
+
Buffer.add_char whitespace (Option.get (Input.peek t.input));
+
ignore (Input.next t.input);
| Some '\n' | Some '\r' ->
+
(* Discard trailing whitespace before line break *)
+
Buffer.clear whitespace;
Input.consume_break t.input;
(* Skip leading whitespace on next line *)
while Input.next_is_blank t.input do
ignore (Input.next t.input)
+
(* Check for document boundary *)
if Input.at_document_boundary t.input then
Error.raise_at start Unclosed_single_quote;
+
(* Count empty lines (consecutive line breaks) *)
+
let empty_lines = ref 0 in
+
while Input.next_is_break t.input do
+
Input.consume_break t.input;
+
while Input.next_is_blank t.input do
+
ignore (Input.next t.input)
+
if Input.at_document_boundary t.input then
+
Error.raise_at start Unclosed_single_quote
+
(* Apply folding rules *)
+
if !empty_lines > 0 then begin
+
(* Empty lines: preserve as newlines *)
+
for _ = 1 to !empty_lines do
+
Buffer.add_char buf '\n'
+
(* Single break: fold to space (even at start of string) *)
+
Buffer.add_char buf ' ';
ignore (Input.next t.input);
···
(Invalid_block_scalar_header "expected newline after header");
let base_indent = current_indent t in
+
(* base_indent is the indent level from the stack, -1 if empty.
+
It's used directly for comparisons in implicit indent case. *)
let content_indent = ref (
match !explicit_indent with
+
(* Explicit indent: base_indent is 1-indexed column, convert to 0-indexed.
+
content_indent = (base_indent - 1) + n, but at least n for document level. *)
let base_level = max 0 (base_indent - 1) in
| None -> 0 (* Will be determined by first non-empty line *)
···
(* Line starts with fewer spaces than content_indent - dedented *)
end else if Input.next_is_blank t.input then begin
+
(* Line has spaces/tabs beyond content_indent - could be whitespace content or empty line.
+
For literal scalars, whitespace-only lines ARE content (not empty).
+
For folded scalars, whitespace-only lines that are "more indented" are preserved. *)
+
(* Literal: whitespace beyond content_indent is content, let read_lines handle it *)
+
(* Folded: check if rest is only blanks *)
+
while match Input.peek_nth t.input !idx with
+
| Some c when Input.is_blank c -> incr idx; true
+
match Input.peek_nth t.input (!idx) with
+
| None | Some '\n' | Some '\r' ->
+
(* Empty/whitespace-only line in folded - skip spaces *)
+
while Input.next_is_blank t.input do
+
ignore (Input.next t.input)
+
Buffer.add_char trailing_breaks '\n';
+
Input.consume_break t.input;
+
skip_to_content_indent ()
+
(* Has non-whitespace content *)
+
(* Implicit indent - skip empty lines without consuming spaces.
+
Note: Only SPACES count as indentation. Tabs are content, not indentation.
+
So we only check for spaces when determining if a line is "empty". *)
if Input.next_is_break t.input then begin
Buffer.add_char trailing_breaks '\n';
Input.consume_break t.input;
skip_to_content_indent ()
+
end else if Input.next_is (( = ) ' ') t.input then begin
+
(* Check if line is empty (only spaces before break) *)
while match Input.peek_nth t.input !idx with
+
| Some ' ' -> incr idx; true
match Input.peek_nth t.input (!idx) with
| None | Some '\n' | Some '\r' ->
+
(* Line has only spaces - empty line *)
+
while Input.next_is (( = ) ' ') t.input do
ignore (Input.next t.input)
Buffer.add_char trailing_breaks '\n';
Input.consume_break t.input;
skip_to_content_indent ()
+
(* Has content (including tabs which are content, not indentation) *)
+
(* Not at break or space - could be tab (content) or other *)
···
(* Determine content indent from first content line (implicit case) *)
let first_line = !content_indent = 0 in
+
(* base_indent is 1-indexed column, convert to 0-indexed for comparison with line_indent.
+
If base_indent = -1 (empty stack), then base_level = -1 means col 0 is valid. *)
+
let base_level = base_indent - 1 in
+
if !content_indent = 0 then begin
+
(* For implicit indent, content must be more indented than base_level. *)
+
if line_indent <= base_level then
+
false (* No content - first line not indented enough *)
+
content_indent := line_indent;
+
end else if line_indent < !content_indent then
+
false (* Dedented - done with content *)
+
if should_process then begin
+
(* Check if current line is "more indented" (has extra indent or starts with whitespace).
+
For folded scalars, lines that start with any whitespace (space or tab) after the
+
content indentation are "more indented" and preserve breaks.
+
Note: we check Input.next_is_blank BEFORE reading content to see if content starts with whitespace. *)
+
let trailing_blank = line_indent > !content_indent || Input.next_is_blank t.input in
(* Add trailing breaks to buffer *)
if Buffer.length buf > 0 then begin
···
let span = Span.make ~start ~stop:(Input.mark t.input) in
let token = if indicator = "---" then Token.Document_start else Token.Document_end in
(* Reset document content flag after document end marker *)
+
if indicator = "..." then begin
t.document_has_content <- false;
+
(* After document end marker, skip whitespace and check for end of line or comment *)
+
while Input.next_is_blank t.input do ignore (Input.next t.input) done;
+
(match Input.peek t.input with
+
| None -> () (* EOF is ok *)
+
| Some c when Input.is_break c -> ()
+
| Some '#' -> () (* Comment is ok *)
+
| _ -> Error.raise_at start (Invalid_directive "content not allowed after document end marker on same line"))
···
t.flow_indent <- column t;
t.flow_level <- t.flow_level + 1;
+
(* Track whether this is a mapping or sequence *)
+
let is_mapping = (token_type = Token.Flow_mapping_start) in
+
t.flow_mapping_stack <- is_mapping :: t.flow_mapping_stack;
t.allow_simple_key <- true;
t.simple_keys <- None :: t.simple_keys;
t.document_has_content <- true;
···
and fetch_flow_collection_end t token_type =
t.flow_level <- t.flow_level - 1;
+
t.flow_mapping_stack <- (match t.flow_mapping_stack with _ :: rest -> rest | [] -> []);
t.simple_keys <- (match t.simple_keys with _ :: rest -> rest | [] -> []);
t.allow_simple_key <- false;
let start = Input.mark t.input in
···
+
let start = Input.mark t.input in
(* Check for simple key *)
+
match t.simple_keys with
+
| Some sk :: _ when sk.sk_possible ->
+
(* In implicit flow mapping (inside a flow sequence), key and : must be on the same line.
+
In explicit flow mapping { }, key and : can span lines. *)
+
let is_implicit_flow_mapping = match t.flow_mapping_stack with
+
| false :: _ -> true (* false = we're in a sequence, so any mapping is implicit *)
+
if is_implicit_flow_mapping && sk.sk_position.line < (Input.position t.input).line then
+
Error.raise_at start Illegal_flow_key_line;
+
(* Insert KEY token before the simple key value *)
+
let key_span = Span.point sk.sk_position in
+
let key_token = { Token.token = Token.Key; span = key_span } in
+
(* We need to insert at the right position *)
+
let tokens = Queue.to_seq t.tokens |> Array.of_seq in
+
let insert_pos = sk.sk_token_number - t.tokens_taken in
+
Array.iteri (fun i tok ->
+
if i = insert_pos then Queue.add key_token t.tokens;
+
if insert_pos >= Array.length tokens then
+
Queue.add key_token t.tokens;
+
t.token_number <- t.token_number + 1;
+
t.pending_value <- true; (* We've inserted a KEY token, now waiting for VALUE *)
+
(* Roll indent for implicit block mapping *)
+
if t.flow_level = 0 then begin
+
let col = sk.sk_position.column in
+
if roll_indent t col ~sequence:false then begin
+
let span = Span.point sk.sk_position in
+
(* Insert block mapping start before key *)
+
let bm_token = { Token.token = Token.Block_mapping_start; span } in
+
let tokens = Queue.to_seq t.tokens |> Array.of_seq in
+
Array.iteri (fun i tok ->
+
if i = insert_pos then Queue.add bm_token t.tokens;
+
if insert_pos >= Array.length tokens then
+
Queue.add bm_token t.tokens;
+
t.token_number <- t.token_number + 1
+
t.simple_keys <- None :: (List.tl t.simple_keys);
+
(* No simple key - this is a complex value (or empty key) *)
+
if t.flow_level = 0 then begin
+
if not t.allow_simple_key then
+
Error.raise_at (Input.mark t.input) Expected_key;
+
if roll_indent t col ~sequence:false then begin
+
let span = Span.point (Input.mark t.input) in
+
emit t span Token.Block_mapping_start
+
(* Note: We don't emit KEY here. Empty key handling is done by the parser,
+
which emits empty scalar when it sees VALUE without preceding KEY. *)
+
(* In block context without simple key, allow simple keys for compact mappings like ": moon: white"
+
In flow context or after using a simple key, disallow simple keys *)
+
t.allow_simple_key <- (not used_simple_key) && (t.flow_level = 0);
t.document_has_content <- true;
let start = Input.mark t.input in
ignore (Input.next t.input);