···
mutable stream_ended : bool;
mutable indent_stack : indent list; (** Stack of indentation levels *)
mutable flow_level : int; (** Nesting depth in [] or {} *)
+
mutable flow_indent : int; (** Column where outermost flow collection started *)
mutable simple_keys : simple_key option list; (** Per flow-level simple key tracking *)
mutable allow_simple_key : bool;
+
mutable leading_whitespace : bool; (** True when at start of line (only whitespace seen) *)
+
mutable document_has_content : bool; (** True if we've emitted content tokens in current document *)
···
simple_keys = [None]; (* One entry for the base level *)
+
leading_whitespace = true; (* Start at beginning of stream *)
+
document_has_content = false;
let of_string s = create (Input.of_string s)
···
| { indent; _ } :: _ -> indent
+
(** Skip whitespace to end of line, checking for valid comments.
+
Returns true if any whitespace (including tabs) was found before a comment. *)
+
let skip_whitespace_and_comment t =
+
let has_whitespace = ref false in
+
(* Skip blanks (spaces and tabs) *)
while Input.next_is_blank t.input do
+
has_whitespace := true;
ignore (Input.next t.input)
+
(* Check for comment *)
if Input.next_is (( = ) '#') t.input then begin
+
(* Validate: comment must be preceded by whitespace or be at start of line *)
+
if not !has_whitespace then begin
+
(* Check if we're at the start of input or after a line break *)
+
match Input.peek_back t.input with
+
| None -> () (* Start of input - OK *)
+
| Some c when Input.is_break c -> () (* After line break - OK *)
+
(* Comment not preceded by whitespace - ERROR *)
+
Error.raise_at (Input.mark t.input) Invalid_comment
+
(* Skip to end of line *)
while not (Input.is_eof t.input) && not (Input.next_is_break t.input) do
ignore (Input.next t.input)
+
(** Skip blanks (spaces/tabs) and return whether tabs were found *)
+
let skip_blanks_check_tabs t =
+
let found_tab = ref false in
+
while Input.next_is_blank t.input do
+
if Input.peek t.input = Some '\t' then found_tab := true;
+
ignore (Input.next t.input)
+
(** Skip whitespace and comments, return true if at newline *)
+
let rec skip_to_next_token t =
+
(* Check for tabs used as indentation in block context *)
+
(match Input.peek t.input with
+
| Some '\t' when t.flow_level = 0 && t.leading_whitespace &&
+
(column t - 1) <= current_indent t ->
+
(* Tab found in indentation zone - this is invalid *)
+
(* Skip to end of line to check if line has content *)
+
let start_pos = Input.mark t.input in
+
while Input.next_is_blank t.input do
+
ignore (Input.next t.input)
+
(* If we have content on this line with a tab, raise error *)
+
if not (Input.next_is_break t.input) && not (Input.is_eof t.input) then
+
Error.raise_at start_pos Tab_in_indentation
+
(* Skip blanks and validate comments *)
+
skip_whitespace_and_comment t;
(* Skip line break in block context *)
if t.flow_level = 0 && Input.next_is_break t.input then begin
Input.consume_break t.input;
t.allow_simple_key <- true;
+
t.leading_whitespace <- true;
else if t.flow_level > 0 && Input.next_is_whitespace t.input then begin
···
while Input.next_is_blank t.input do
ignore (Input.next t.input)
+
(* Check for document boundary - this terminates the quoted string *)
+
if Input.at_document_boundary t.input then
+
Error.raise_at start Unclosed_single_quote;
···
+
(* Check for document boundary - this terminates the quoted string *)
+
if Input.at_document_boundary t.input then
+
Error.raise_at start Unclosed_double_quote;
(* Per YAML spec: single break = space, break + empty lines = newlines *)
if !empty_lines > 0 then begin
(* Empty lines: output N newlines where N = number of empty lines *)
···
| Some c2 when in_flow && Input.is_flow_indicator c2 -> false
+
(* # is a comment indicator only if preceded by whitespace *)
+
(* Check the previous character to determine if this is a comment *)
+
(match Input.peek_back t.input with
+
| None -> true (* At start - can't be comment indicator, allow it *)
+
| Some c when Input.is_whitespace c -> false (* Preceded by whitespace - comment *)
+
| Some c when Input.is_break c -> false (* At start of line - comment *)
+
| _ -> true) (* Not preceded by whitespace - part of scalar *)
| c when in_flow && Input.is_flow_indicator c -> false
| _ when Input.is_break c -> false
···
let start = Input.mark t.input in
let in_flow = t.flow_level > 0 in
let indent = current_indent t in
+
(* Validate flow collection indentation *)
+
if in_flow && (column t) < t.flow_indent then
+
Error.raise_at start Invalid_flow_indentation;
let buf = Buffer.create 64 in
let spaces = Buffer.create 16 in
let leading_blanks = ref false in
···
match Input.peek t.input with
| Some c when can_continue_plain t c ~in_flow ->
+
(* can_continue_plain already handles # correctly - it returns false
+
when # is preceded by whitespace (making it a comment indicator) *)
if Buffer.length spaces > 0 then begin
if !leading_blanks then begin
···
chomping := Chomping.Keep; ignore (Input.next t.input)
+
(* Skip whitespace and optional comment *)
+
skip_whitespace_and_comment t;
if Input.next_is_break t.input then
···
Note: we use col, not col-1, to allow entries at the same level. *)
+
(* We're about to process actual content, not leading whitespace *)
+
t.leading_whitespace <- false;
if Input.is_eof t.input then
else if Input.at_document_boundary t.input then
···
let span = Span.make ~start ~stop:(Input.mark t.input) in
let token = if indicator = "---" then Token.Document_start else Token.Document_end in
+
(* Reset document content flag after document end marker *)
+
if indicator = "..." then
+
t.document_has_content <- false;
+
(* Directives can only appear:
+
1. At stream start (before any document content)
+
2. After a document end marker (...)
+
If we've emitted content in the current document, we need a document end marker first *)
+
if t.document_has_content then
+
Error.raise_at (Input.mark t.input)
+
(Unexpected_token "directives must be separated from document content by document end marker (...)");
t.allow_simple_key <- false;
···
and fetch_flow_collection_start t token_type =
+
(* Record indent of outermost flow collection *)
+
if t.flow_level = 0 then
+
t.flow_indent <- column t;
t.flow_level <- t.flow_level + 1;
t.allow_simple_key <- true;
t.simple_keys <- None :: t.simple_keys;
+
t.document_has_content <- true;
let start = Input.mark t.input in
ignore (Input.next t.input);
let span = Span.make ~start ~stop:(Input.mark t.input) in
···
t.allow_simple_key <- true;
+
t.document_has_content <- true;
let start = Input.mark t.input in
ignore (Input.next t.input);
+
(* Check for tabs after - : pattern like -\t- is invalid *)
+
let found_tabs = skip_blanks_check_tabs t in
+
if found_tabs then begin
+
(* If we found tabs and next char is - followed by whitespace, error *)
+
match Input.peek t.input with
+
(match Input.peek_nth t.input 1 with
+
| None -> Error.raise_at start Tab_in_indentation
+
| Some c when Input.is_whitespace c ->
+
Error.raise_at start Tab_in_indentation
let span = Span.make ~start ~stop:(Input.mark t.input) in
emit t span Token.Block_entry
···
t.allow_simple_key <- t.flow_level = 0;
+
t.document_has_content <- true;
let start = Input.mark t.input in
ignore (Input.next t.input);
+
(* Check for tabs after ? : pattern like ?\t- or ?\tkey is invalid *)
+
let found_tabs = skip_blanks_check_tabs t in
+
if found_tabs && t.flow_level = 0 then begin
+
(* In block context, tabs after ? are not allowed *)
+
Error.raise_at start Tab_in_indentation
let span = Span.make ~start ~stop:(Input.mark t.input) in
···
+
(* In block context, allow_simple_key becomes true only after a line break,
+
not immediately after ':'. This prevents constructs like "key: - a".
+
The line break handling in skip_to_next_token will set it to true. *)
+
t.allow_simple_key <- false;
+
t.document_has_content <- true;
let start = Input.mark t.input in
ignore (Input.next t.input);
+
(* Check for tabs after : : pattern like :\t- is invalid in block context *)
+
let found_tabs = skip_blanks_check_tabs t in
+
if found_tabs && t.flow_level = 0 then begin
+
(* In block context, tabs after : followed by indicator are not allowed *)
+
match Input.peek t.input with
+
Error.raise_at start Tab_in_indentation
let span = Span.make ~start ~stop:(Input.mark t.input) in
t.allow_simple_key <- false;
+
t.document_has_content <- true;
let start = Input.mark t.input in
ignore (Input.next t.input); (* consume * *)
let name, span = scan_anchor_alias t in
···
t.allow_simple_key <- false;
+
t.document_has_content <- true;
let start = Input.mark t.input in
ignore (Input.next t.input); (* consume & *)
let name, span = scan_anchor_alias t in
···
t.allow_simple_key <- false;
+
t.document_has_content <- true;
let handle, suffix, span = scan_tag t in
emit t span (Token.Tag { handle; suffix })
and fetch_block_scalar t literal =
t.allow_simple_key <- true;
+
t.document_has_content <- true;
let value, style, span = scan_block_scalar t literal in
emit t span (Token.Scalar { style; value })
and fetch_single_quoted t =
t.allow_simple_key <- false;
+
t.document_has_content <- true;
let value, span = scan_single_quoted t in
emit t span (Token.Scalar { style = Scalar_style.Single_quoted; value })
and fetch_double_quoted t =
t.allow_simple_key <- false;
+
t.document_has_content <- true;
let value, span = scan_double_quoted t in
emit t span (Token.Scalar { style = Scalar_style.Double_quoted; value })
···
and fetch_plain_scalar t =
t.allow_simple_key <- false;
+
t.document_has_content <- true;
let value, span = scan_plain_scalar t in
emit t span (Token.Scalar { style = Scalar_style.Plain; value })