···
mutable document_has_content : bool; (** True if we've emitted content tokens in current document *)
mutable adjacent_value_allowed_at : Position.t option; (** Position where adjacent : is allowed *)
mutable pending_value : bool; (** True if we've emitted a KEY and are waiting for VALUE *)
34
+
mutable flow_mapping_stack : bool list; (** Stack of whether each flow level is a mapping *)
···
document_has_content = false;
adjacent_value_allowed_at = None;
54
+
flow_mapping_stack = [];
let of_string s = create (Input.of_string s)
···
if t.allow_simple_key then begin
(* A simple key is required only if we're in a block context,
176
-
at the current indentation level, AND we have an active indent *)
178
+
at the current indentation level, AND the current indent needs a block end.
179
+
This matches saphyr's logic and prevents false positives for values. *)
let required = t.flow_level = 0 &&
178
-
t.indent_stack <> [] &&
179
-
current_indent t = column t - 1 in
181
+
match t.indent_stack with
182
+
| { indent; needs_block_end = true; _ } :: _ ->
···
(* Just ! followed by suffix *)
("!", Buffer.contents buf ^ scan_tag_suffix t))
363
+
(* Validate that tag is followed by whitespace, break, or (in flow) flow indicator *)
364
+
(match Input.peek t.input with
365
+
| None -> () (* EOF is ok *)
366
+
| Some c when Input.is_whitespace c || Input.is_break c -> ()
367
+
| Some c when t.flow_level > 0 && Input.is_flow_indicator c -> ()
368
+
| _ -> Error.raise_at start (Invalid_tag "expected whitespace or line break after tag"));
let span = Span.make ~start ~stop:(Input.mark t.input) in
···
let start = Input.mark t.input in
ignore (Input.next t.input); (* consume opening single-quote *)
let buf = Buffer.create 64 in
377
+
let whitespace = Buffer.create 16 in (* Track trailing whitespace *)
379
+
let flush_whitespace () =
380
+
if Buffer.length whitespace > 0 then begin
381
+
Buffer.add_buffer buf whitespace;
382
+
Buffer.clear whitespace
match Input.peek t.input with
| None -> Error.raise_at start Unclosed_single_quote
···
(* Check for escaped quote ('') *)
(match Input.peek t.input with
394
+
flush_whitespace ();
Buffer.add_char buf '\'';
ignore (Input.next t.input);
399
+
(* End of string - flush any trailing whitespace *)
400
+
flush_whitespace ())
401
+
| Some ' ' | Some '\t' ->
402
+
(* Track whitespace - don't add to buf yet *)
403
+
Buffer.add_char whitespace (Option.get (Input.peek t.input));
404
+
ignore (Input.next t.input);
| Some '\n' | Some '\r' ->
407
+
(* Discard trailing whitespace before line break *)
408
+
Buffer.clear whitespace;
Input.consume_break t.input;
379
-
(* Fold line break to space unless at start of content *)
380
-
if Buffer.length buf > 0 then
381
-
Buffer.add_char buf ' ';
(* Skip leading whitespace on next line *)
while Input.next_is_blank t.input do
ignore (Input.next t.input)
386
-
(* Check for document boundary - this terminates the quoted string *)
414
+
(* Check for document boundary *)
if Input.at_document_boundary t.input then
Error.raise_at start Unclosed_single_quote;
417
+
(* Count empty lines (consecutive line breaks) *)
418
+
let empty_lines = ref 0 in
419
+
while Input.next_is_break t.input do
421
+
Input.consume_break t.input;
422
+
while Input.next_is_blank t.input do
423
+
ignore (Input.next t.input)
425
+
if Input.at_document_boundary t.input then
426
+
Error.raise_at start Unclosed_single_quote
428
+
(* Apply folding rules *)
429
+
if !empty_lines > 0 then begin
430
+
(* Empty lines: preserve as newlines *)
431
+
for _ = 1 to !empty_lines do
432
+
Buffer.add_char buf '\n'
435
+
(* Single break: fold to space (even at start of string) *)
436
+
Buffer.add_char buf ' ';
439
+
flush_whitespace ();
ignore (Input.next t.input);
···
(Invalid_block_scalar_header "expected newline after header");
let base_indent = current_indent t in
763
+
(* base_indent is the indent level from the stack, -1 if empty.
764
+
It's used directly for comparisons in implicit indent case. *)
let content_indent = ref (
match !explicit_indent with
717
-
(* base_indent is a column (1-indexed), convert to indent level (0-indexed) *)
768
+
(* Explicit indent: base_indent is 1-indexed column, convert to 0-indexed.
769
+
content_indent = (base_indent - 1) + n, but at least n for document level. *)
let base_level = max 0 (base_indent - 1) in
| None -> 0 (* Will be determined by first non-empty line *)
···
(* Line starts with fewer spaces than content_indent - dedented *)
end else if Input.next_is_blank t.input then begin
748
-
(* Line has spaces beyond content_indent - check if rest is only blanks *)
750
-
let is_empty = ref false in
751
-
while not !is_empty do
752
-
match Input.peek_nth t.input !idx with
753
-
| Some c when Input.is_blank c -> incr idx
754
-
| Some c when Input.is_break c -> is_empty := true
755
-
| _ -> is_empty := true (* Not a break, so has content *)
757
-
(* Check if we found a break (empty line) or content *)
758
-
(match Input.peek_nth t.input (!idx) with
759
-
| None | Some '\n' | Some '\r' ->
760
-
(* Empty line - preserve spaces for literal scalars *)
761
-
if literal then begin
762
-
while Input.next_is_blank t.input do
763
-
Buffer.add_char trailing_breaks ' ';
764
-
ignore (Input.next t.input)
767
-
while Input.next_is_blank t.input do
768
-
ignore (Input.next t.input)
771
-
Buffer.add_char trailing_breaks '\n';
772
-
Input.consume_break t.input;
773
-
skip_to_content_indent ()
800
+
(* Line has spaces/tabs beyond content_indent - could be whitespace content or empty line.
801
+
For literal scalars, whitespace-only lines ARE content (not empty).
802
+
For folded scalars, whitespace-only lines that are "more indented" are preserved. *)
804
+
(* Literal: whitespace beyond content_indent is content, let read_lines handle it *)
807
+
(* Folded: check if rest is only blanks *)
809
+
while match Input.peek_nth t.input !idx with
810
+
| Some c when Input.is_blank c -> incr idx; true
813
+
match Input.peek_nth t.input (!idx) with
814
+
| None | Some '\n' | Some '\r' ->
815
+
(* Empty/whitespace-only line in folded - skip spaces *)
816
+
while Input.next_is_blank t.input do
817
+
ignore (Input.next t.input)
819
+
Buffer.add_char trailing_breaks '\n';
820
+
Input.consume_break t.input;
821
+
skip_to_content_indent ()
823
+
(* Has non-whitespace content *)
780
-
(* Implicit indent - skip empty lines without consuming spaces *)
829
+
(* Implicit indent - skip empty lines without consuming spaces.
830
+
Note: Only SPACES count as indentation. Tabs are content, not indentation.
831
+
So we only check for spaces when determining if a line is "empty". *)
if Input.next_is_break t.input then begin
Buffer.add_char trailing_breaks '\n';
Input.consume_break t.input;
skip_to_content_indent ()
785
-
end else if Input.next_is_blank t.input then begin
786
-
(* Check if line is empty *)
836
+
end else if Input.next_is (( = ) ' ') t.input then begin
837
+
(* Check if line is empty (only spaces before break) *)
while match Input.peek_nth t.input !idx with
789
-
| Some c when Input.is_blank c -> incr idx; true
840
+
| Some ' ' -> incr idx; true
match Input.peek_nth t.input (!idx) with
| None | Some '\n' | Some '\r' ->
795
-
while Input.next_is_blank t.input do
845
+
(* Line has only spaces - empty line *)
846
+
while Input.next_is (( = ) ' ') t.input do
ignore (Input.next t.input)
Buffer.add_char trailing_breaks '\n';
Input.consume_break t.input;
skip_to_content_indent ()
802
-
(* Has content - don't consume anything, return 0 as we haven't skipped *)
853
+
(* Has content (including tabs which are content, not indentation) *)
805
-
(* Not at break or blank - return 0 *)
856
+
(* Not at break or space - could be tab (content) or other *)
···
(* Determine content indent from first content line (implicit case) *)
let first_line = !content_indent = 0 in
830
-
if !content_indent = 0 then begin
831
-
if line_indent <= base_indent then begin
832
-
(* No content - restore position conceptually *)
835
-
content_indent := line_indent
881
+
(* base_indent is 1-indexed column, convert to 0-indexed for comparison with line_indent.
882
+
If base_indent = -1 (empty stack), then base_level = -1 means col 0 is valid. *)
883
+
let base_level = base_indent - 1 in
884
+
let should_process =
885
+
if !content_indent = 0 then begin
886
+
(* For implicit indent, content must be more indented than base_level. *)
887
+
if line_indent <= base_level then
888
+
false (* No content - first line not indented enough *)
890
+
content_indent := line_indent;
893
+
end else if line_indent < !content_indent then
894
+
false (* Dedented - done with content *)
838
-
if line_indent < !content_indent then begin
839
-
(* Dedented - done with content *)
842
-
(* Check if current line is "more indented" (has extra indent beyond content_indent) *)
843
-
let trailing_blank = line_indent > !content_indent in
899
+
if should_process then begin
900
+
(* Check if current line is "more indented" (has extra indent or starts with whitespace).
901
+
For folded scalars, lines that start with any whitespace (space or tab) after the
902
+
content indentation are "more indented" and preserve breaks.
903
+
Note: we check Input.next_is_blank BEFORE reading content to see if content starts with whitespace. *)
904
+
let trailing_blank = line_indent > !content_indent || Input.next_is_blank t.input in
(* Add trailing breaks to buffer *)
if Buffer.length buf > 0 then begin
···
let span = Span.make ~start ~stop:(Input.mark t.input) in
let token = if indicator = "---" then Token.Document_start else Token.Document_end in
(* Reset document content flag after document end marker *)
1058
-
if indicator = "..." then
1119
+
if indicator = "..." then begin
t.document_has_content <- false;
1121
+
(* After document end marker, skip whitespace and check for end of line or comment *)
1122
+
while Input.next_is_blank t.input do ignore (Input.next t.input) done;
1123
+
(match Input.peek t.input with
1124
+
| None -> () (* EOF is ok *)
1125
+
| Some c when Input.is_break c -> ()
1126
+
| Some '#' -> () (* Comment is ok *)
1127
+
| _ -> Error.raise_at start (Invalid_directive "content not allowed after document end marker on same line"))
···
t.flow_indent <- column t;
t.flow_level <- t.flow_level + 1;
1151
+
(* Track whether this is a mapping or sequence *)
1152
+
let is_mapping = (token_type = Token.Flow_mapping_start) in
1153
+
t.flow_mapping_stack <- is_mapping :: t.flow_mapping_stack;
t.allow_simple_key <- true;
t.simple_keys <- None :: t.simple_keys;
t.document_has_content <- true;
···
and fetch_flow_collection_end t token_type =
t.flow_level <- t.flow_level - 1;
1165
+
t.flow_mapping_stack <- (match t.flow_mapping_stack with _ :: rest -> rest | [] -> []);
t.simple_keys <- (match t.simple_keys with _ :: rest -> rest | [] -> []);
t.allow_simple_key <- false;
let start = Input.mark t.input in
···
1276
+
let start = Input.mark t.input in
(* Check for simple key *)
1204
-
(match t.simple_keys with
1205
-
| Some sk :: _ when sk.sk_possible ->
1206
-
(* Insert KEY token before the simple key value *)
1207
-
let key_span = Span.point sk.sk_position in
1208
-
let key_token = { Token.token = Token.Key; span = key_span } in
1209
-
(* We need to insert at the right position *)
1210
-
let tokens = Queue.to_seq t.tokens |> Array.of_seq in
1211
-
Queue.clear t.tokens;
1212
-
let insert_pos = sk.sk_token_number - t.tokens_taken in
1213
-
Array.iteri (fun i tok ->
1214
-
if i = insert_pos then Queue.add key_token t.tokens;
1215
-
Queue.add tok t.tokens
1217
-
if insert_pos >= Array.length tokens then
1218
-
Queue.add key_token t.tokens;
1219
-
t.token_number <- t.token_number + 1;
1220
-
t.pending_value <- true; (* We've inserted a KEY token, now waiting for VALUE *)
1221
-
(* Roll indent for implicit block mapping *)
1222
-
if t.flow_level = 0 then begin
1223
-
let col = sk.sk_position.column in
1224
-
if roll_indent t col ~sequence:false then begin
1225
-
let span = Span.point sk.sk_position in
1226
-
(* Insert block mapping start before key *)
1227
-
let bm_token = { Token.token = Token.Block_mapping_start; span } in
1228
-
let tokens = Queue.to_seq t.tokens |> Array.of_seq in
1229
-
Queue.clear t.tokens;
1230
-
Array.iteri (fun i tok ->
1231
-
if i = insert_pos then Queue.add bm_token t.tokens;
1232
-
Queue.add tok t.tokens
1234
-
if insert_pos >= Array.length tokens then
1235
-
Queue.add bm_token t.tokens;
1236
-
t.token_number <- t.token_number + 1
1239
-
t.simple_keys <- None :: (List.tl t.simple_keys)
1241
-
(* No simple key - this is a complex value (or empty key) *)
1242
-
if t.flow_level = 0 then begin
1243
-
if not t.allow_simple_key then
1244
-
Error.raise_at (Input.mark t.input) Expected_key;
1245
-
let col = column t in
1246
-
if roll_indent t col ~sequence:false then begin
1247
-
let span = Span.point (Input.mark t.input) in
1248
-
emit t span Token.Block_mapping_start
1250
-
(* Emit KEY token for empty key case (e.g., ": value") only if we don't already have a pending KEY *)
1251
-
if not t.pending_value then begin
1252
-
let span = Span.point (Input.mark t.input) in
1253
-
emit t span Token.Key;
1254
-
t.pending_value <- true
1278
+
let used_simple_key =
1279
+
match t.simple_keys with
1280
+
| Some sk :: _ when sk.sk_possible ->
1281
+
(* In implicit flow mapping (inside a flow sequence), key and : must be on the same line.
1282
+
In explicit flow mapping { }, key and : can span lines. *)
1283
+
let is_implicit_flow_mapping = match t.flow_mapping_stack with
1284
+
| false :: _ -> true (* false = we're in a sequence, so any mapping is implicit *)
1287
+
if is_implicit_flow_mapping && sk.sk_position.line < (Input.position t.input).line then
1288
+
Error.raise_at start Illegal_flow_key_line;
1289
+
(* Insert KEY token before the simple key value *)
1290
+
let key_span = Span.point sk.sk_position in
1291
+
let key_token = { Token.token = Token.Key; span = key_span } in
1292
+
(* We need to insert at the right position *)
1293
+
let tokens = Queue.to_seq t.tokens |> Array.of_seq in
1294
+
Queue.clear t.tokens;
1295
+
let insert_pos = sk.sk_token_number - t.tokens_taken in
1296
+
Array.iteri (fun i tok ->
1297
+
if i = insert_pos then Queue.add key_token t.tokens;
1298
+
Queue.add tok t.tokens
1300
+
if insert_pos >= Array.length tokens then
1301
+
Queue.add key_token t.tokens;
1302
+
t.token_number <- t.token_number + 1;
1303
+
t.pending_value <- true; (* We've inserted a KEY token, now waiting for VALUE *)
1304
+
(* Roll indent for implicit block mapping *)
1305
+
if t.flow_level = 0 then begin
1306
+
let col = sk.sk_position.column in
1307
+
if roll_indent t col ~sequence:false then begin
1308
+
let span = Span.point sk.sk_position in
1309
+
(* Insert block mapping start before key *)
1310
+
let bm_token = { Token.token = Token.Block_mapping_start; span } in
1311
+
let tokens = Queue.to_seq t.tokens |> Array.of_seq in
1312
+
Queue.clear t.tokens;
1313
+
Array.iteri (fun i tok ->
1314
+
if i = insert_pos then Queue.add bm_token t.tokens;
1315
+
Queue.add tok t.tokens
1317
+
if insert_pos >= Array.length tokens then
1318
+
Queue.add bm_token t.tokens;
1319
+
t.token_number <- t.token_number + 1
1322
+
t.simple_keys <- None :: (List.tl t.simple_keys);
1325
+
(* No simple key - this is a complex value (or empty key) *)
1326
+
if t.flow_level = 0 then begin
1327
+
if not t.allow_simple_key then
1328
+
Error.raise_at (Input.mark t.input) Expected_key;
1329
+
let col = column t in
1330
+
if roll_indent t col ~sequence:false then begin
1331
+
let span = Span.point (Input.mark t.input) in
1332
+
emit t span Token.Block_mapping_start
1334
+
(* Note: We don't emit KEY here. Empty key handling is done by the parser,
1335
+
which emits empty scalar when it sees VALUE without preceding KEY. *)
1258
-
(* In block context, allow_simple_key becomes true only after a line break,
1259
-
not immediately after ':'. This prevents constructs like "key: - a".
1260
-
The line break handling in skip_to_next_token will set it to true. *)
1261
-
t.allow_simple_key <- false;
1340
+
(* In block context without simple key, allow simple keys for compact mappings like ": moon: white"
1341
+
In flow context or after using a simple key, disallow simple keys *)
1342
+
t.allow_simple_key <- (not used_simple_key) && (t.flow_level = 0);
t.document_has_content <- true;
let start = Input.mark t.input in
ignore (Input.next t.input);