···
mutable stream_ended : bool;
mutable indent_stack : indent list; (** Stack of indentation levels *)
mutable flow_level : int; (** Nesting depth in [] or {} *)
27
+
mutable flow_indent : int; (** Column where outermost flow collection started *)
mutable simple_keys : simple_key option list; (** Per flow-level simple key tracking *)
mutable allow_simple_key : bool;
30
+
mutable leading_whitespace : bool; (** True when at start of line (only whitespace seen) *)
31
+
mutable document_has_content : bool; (** True if we've emitted content tokens in current document *)
···
simple_keys = [None]; (* One entry for the base level *)
47
+
leading_whitespace = true; (* Start at beginning of stream *)
48
+
document_has_content = false;
let of_string s = create (Input.of_string s)
···
| { indent; _ } :: _ -> indent
63
-
(** Skip whitespace and comments, return true if at newline *)
64
-
let rec skip_to_next_token t =
69
+
(** Skip whitespace to end of line, checking for valid comments.
70
+
Returns true if any whitespace (including tabs) was found before a comment. *)
71
+
let skip_whitespace_and_comment t =
72
+
let has_whitespace = ref false in
73
+
(* Skip blanks (spaces and tabs) *)
while Input.next_is_blank t.input do
75
+
has_whitespace := true;
ignore (Input.next t.input)
78
+
(* Check for comment *)
if Input.next_is (( = ) '#') t.input then begin
80
+
(* Validate: comment must be preceded by whitespace or be at start of line *)
81
+
if not !has_whitespace then begin
82
+
(* Check if we're at the start of input or after a line break *)
83
+
match Input.peek_back t.input with
84
+
| None -> () (* Start of input - OK *)
85
+
| Some c when Input.is_break c -> () (* After line break - OK *)
87
+
(* Comment not preceded by whitespace - ERROR *)
88
+
Error.raise_at (Input.mark t.input) Invalid_comment
90
+
(* Skip to end of line *)
while not (Input.is_eof t.input) && not (Input.next_is_break t.input) do
ignore (Input.next t.input)
96
+
(** Skip blanks (spaces/tabs) and return whether tabs were found *)
97
+
let skip_blanks_check_tabs t =
98
+
let found_tab = ref false in
99
+
while Input.next_is_blank t.input do
100
+
if Input.peek t.input = Some '\t' then found_tab := true;
101
+
ignore (Input.next t.input)
105
+
(** Skip whitespace and comments, return true if at newline *)
106
+
let rec skip_to_next_token t =
107
+
(* Check for tabs used as indentation in block context *)
108
+
(match Input.peek t.input with
109
+
| Some '\t' when t.flow_level = 0 && t.leading_whitespace &&
110
+
(column t - 1) <= current_indent t ->
111
+
(* Tab found in indentation zone - this is invalid *)
112
+
(* Skip to end of line to check if line has content *)
113
+
let start_pos = Input.mark t.input in
114
+
while Input.next_is_blank t.input do
115
+
ignore (Input.next t.input)
117
+
(* If we have content on this line with a tab, raise error *)
118
+
if not (Input.next_is_break t.input) && not (Input.is_eof t.input) then
119
+
Error.raise_at start_pos Tab_in_indentation
122
+
(* Skip blanks and validate comments *)
123
+
skip_whitespace_and_comment t;
(* Skip line break in block context *)
if t.flow_level = 0 && Input.next_is_break t.input then begin
Input.consume_break t.input;
t.allow_simple_key <- true;
128
+
t.leading_whitespace <- true;
else if t.flow_level > 0 && Input.next_is_whitespace t.input then begin
···
while Input.next_is_blank t.input do
ignore (Input.next t.input)
350
+
(* Check for document boundary - this terminates the quoted string *)
351
+
if Input.at_document_boundary t.input then
352
+
Error.raise_at start Unclosed_single_quote;
···
470
+
(* Check for document boundary - this terminates the quoted string *)
471
+
if Input.at_document_boundary t.input then
472
+
Error.raise_at start Unclosed_double_quote;
(* Per YAML spec: single break = space, break + empty lines = newlines *)
if !empty_lines > 0 then begin
(* Empty lines: output N newlines where N = number of empty lines *)
···
| Some c2 when in_flow && Input.is_flow_indicator c2 -> false
447
-
(* # is OK if not preceded by whitespace (checked at call site) *)
503
+
(* # is a comment indicator only if preceded by whitespace *)
504
+
(* Check the previous character to determine if this is a comment *)
505
+
(match Input.peek_back t.input with
506
+
| None -> true (* At start - can't be comment indicator, allow it *)
507
+
| Some c when Input.is_whitespace c -> false (* Preceded by whitespace - comment *)
508
+
| Some c when Input.is_break c -> false (* At start of line - comment *)
509
+
| _ -> true) (* Not preceded by whitespace - part of scalar *)
| c when in_flow && Input.is_flow_indicator c -> false
| _ when Input.is_break c -> false
···
let start = Input.mark t.input in
let in_flow = t.flow_level > 0 in
let indent = current_indent t in
519
+
(* Validate flow collection indentation *)
520
+
if in_flow && (column t) < t.flow_indent then
521
+
Error.raise_at start Invalid_flow_indentation;
let buf = Buffer.create 64 in
let spaces = Buffer.create 16 in
let leading_blanks = ref false in
···
match Input.peek t.input with
| Some c when can_continue_plain t c ~in_flow ->
466
-
(* Check for # preceded by space *)
467
-
if c = '#' && Buffer.length buf > 0 then
468
-
() (* Stop - # after content *)
530
+
(* can_continue_plain already handles # correctly - it returns false
531
+
when # is preceded by whitespace (making it a comment indicator) *)
if Buffer.length spaces > 0 then begin
if !leading_blanks then begin
···
chomping := Chomping.Keep; ignore (Input.next t.input)
570
-
(* Skip to end of line *)
571
-
while Input.next_is_blank t.input do
572
-
ignore (Input.next t.input)
575
-
(* Optional comment *)
576
-
if Input.next_is (( = ) '#') t.input then begin
577
-
while not (Input.is_eof t.input) && not (Input.next_is_break t.input) do
578
-
ignore (Input.next t.input)
633
+
(* Skip whitespace and optional comment *)
634
+
skip_whitespace_and_comment t;
if Input.next_is_break t.input then
···
Note: we use col, not col-1, to allow entries at the same level. *)
900
+
(* We're about to process actual content, not leading whitespace *)
901
+
t.leading_whitespace <- false;
if Input.is_eof t.input then
else if Input.at_document_boundary t.input then
···
let span = Span.make ~start ~stop:(Input.mark t.input) in
let token = if indicator = "---" then Token.Document_start else Token.Document_end in
961
+
(* Reset document content flag after document end marker *)
962
+
if indicator = "..." then
963
+
t.document_has_content <- false;
967
+
(* Directives can only appear:
968
+
1. At stream start (before any document content)
969
+
2. After a document end marker (...)
970
+
If we've emitted content in the current document, we need a document end marker first *)
971
+
if t.document_has_content then
972
+
Error.raise_at (Input.mark t.input)
973
+
(Unexpected_token "directives must be separated from document content by document end marker (...)");
t.allow_simple_key <- false;
···
and fetch_flow_collection_start t token_type =
982
+
(* Record indent of outermost flow collection *)
983
+
if t.flow_level = 0 then
984
+
t.flow_indent <- column t;
t.flow_level <- t.flow_level + 1;
t.allow_simple_key <- true;
t.simple_keys <- None :: t.simple_keys;
988
+
t.document_has_content <- true;
let start = Input.mark t.input in
ignore (Input.next t.input);
let span = Span.make ~start ~stop:(Input.mark t.input) in
···
t.allow_simple_key <- true;
1030
+
t.document_has_content <- true;
let start = Input.mark t.input in
ignore (Input.next t.input);
1034
+
(* Check for tabs after - : pattern like -\t- is invalid *)
1035
+
let found_tabs = skip_blanks_check_tabs t in
1036
+
if found_tabs then begin
1037
+
(* If we found tabs and next char is - followed by whitespace, error *)
1038
+
match Input.peek t.input with
1040
+
(match Input.peek_nth t.input 1 with
1041
+
| None -> Error.raise_at start Tab_in_indentation
1042
+
| Some c when Input.is_whitespace c ->
1043
+
Error.raise_at start Tab_in_indentation
let span = Span.make ~start ~stop:(Input.mark t.input) in
emit t span Token.Block_entry
···
t.allow_simple_key <- t.flow_level = 0;
1070
+
t.document_has_content <- true;
let start = Input.mark t.input in
ignore (Input.next t.input);
1074
+
(* Check for tabs after ? : pattern like ?\t- or ?\tkey is invalid *)
1075
+
let found_tabs = skip_blanks_check_tabs t in
1076
+
if found_tabs && t.flow_level = 0 then begin
1077
+
(* In block context, tabs after ? are not allowed *)
1078
+
Error.raise_at start Tab_in_indentation
let span = Span.make ~start ~stop:(Input.mark t.input) in
···
1044
-
t.allow_simple_key <- t.flow_level = 0;
1140
+
(* In block context, allow_simple_key becomes true only after a line break,
1141
+
not immediately after ':'. This prevents constructs like "key: - a".
1142
+
The line break handling in skip_to_next_token will set it to true. *)
1143
+
t.allow_simple_key <- false;
1144
+
t.document_has_content <- true;
let start = Input.mark t.input in
ignore (Input.next t.input);
1148
+
(* Check for tabs after : : pattern like :\t- is invalid in block context *)
1149
+
let found_tabs = skip_blanks_check_tabs t in
1150
+
if found_tabs && t.flow_level = 0 then begin
1151
+
(* In block context, tabs after : followed by indicator are not allowed *)
1152
+
match Input.peek t.input with
1153
+
| Some ('-' | '?') ->
1154
+
Error.raise_at start Tab_in_indentation
let span = Span.make ~start ~stop:(Input.mark t.input) in
t.allow_simple_key <- false;
1164
+
t.document_has_content <- true;
let start = Input.mark t.input in
ignore (Input.next t.input); (* consume * *)
let name, span = scan_anchor_alias t in
···
t.allow_simple_key <- false;
1174
+
t.document_has_content <- true;
let start = Input.mark t.input in
ignore (Input.next t.input); (* consume & *)
let name, span = scan_anchor_alias t in
···
t.allow_simple_key <- false;
1184
+
t.document_has_content <- true;
let handle, suffix, span = scan_tag t in
emit t span (Token.Tag { handle; suffix })
and fetch_block_scalar t literal =
t.allow_simple_key <- true;
1191
+
t.document_has_content <- true;
let value, style, span = scan_block_scalar t literal in
emit t span (Token.Scalar { style; value })
and fetch_single_quoted t =
t.allow_simple_key <- false;
1198
+
t.document_has_content <- true;
let value, span = scan_single_quoted t in
emit t span (Token.Scalar { style = Scalar_style.Single_quoted; value })
and fetch_double_quoted t =
t.allow_simple_key <- false;
1205
+
t.document_has_content <- true;
let value, span = scan_double_quoted t in
emit t span (Token.Scalar { style = Scalar_style.Double_quoted; value })
···
and fetch_plain_scalar t =
t.allow_simple_key <- false;
1226
+
t.document_has_content <- true;
let value, span = scan_plain_scalar t in
emit t span (Token.Scalar { style = Scalar_style.Plain; value })