My agentic slop goes here. Not intended for anyone else!

more

Changed files
+167 -57
yaml
+20
yaml/ocaml-yamle/bin/dune
···
(executable
(name test_emit)
(libraries yamle))
+
+
(executable
+
(name test_anchor)
+
(libraries yamle))
+
+
(executable
+
(name test_anchor_comprehensive)
+
(libraries yamle))
+
+
(executable
+
(name test_anchor_boundaries)
+
(libraries yamle))
+
+
(executable
+
(name test_alias)
+
(libraries yamle))
+
+
(executable
+
(name test_anchor_fix_final)
+
(libraries yamle))
+8 -4
yaml/ocaml-yamle/lib/parser.ml
···
(** Resolve a tag *)
let resolve_tag t ~handle ~suffix =
-
match List.assoc_opt handle t.tag_directives with
-
| Some prefix -> prefix ^ suffix
-
| None when handle = "!" -> "!" ^ suffix
-
| None -> Error.raise (Invalid_tag (handle ^ suffix))
+
if handle = "" then
+
(* Verbatim tag - suffix is already the full URI *)
+
suffix
+
else
+
match List.assoc_opt handle t.tag_directives with
+
| Some prefix -> prefix ^ suffix
+
| None when handle = "!" -> "!" ^ suffix
+
| None -> Error.raise (Invalid_tag (handle ^ suffix))
(** Process directives at document start *)
let process_directives t =
+123 -37
yaml/ocaml-yamle/lib/scanner.ml
···
let scan_anchor_alias t =
let start = Input.mark t.input in
let buf = Buffer.create 16 in
+
(* Per YAML 1.2 spec: anchor names can contain any character that is NOT:
+
- Whitespace (space, tab, line breaks)
+
- Flow indicators: []{}
+
- Comma (,)
+
This matches the saphyr implementation: is_yaml_non_space && !is_flow *)
while
match Input.peek t.input with
-
| Some c when Input.is_alnum c || c = '_' || c = '-' ->
+
| Some c when not (Input.is_whitespace c) &&
+
not (Input.is_flow_indicator c) &&
+
c <> '\x00' ->
Buffer.add_char buf c;
ignore (Input.next t.input);
true
···
let handle, suffix =
match Input.peek t.input with
| Some '<' ->
-
(* Verbatim tag: !<...> *)
+
(* Verbatim tag: !<...> - handle is empty, suffix is full URI *)
ignore (Input.next t.input);
let buf = Buffer.create 32 in
while
···
| None -> Error.raise_at (Input.mark t.input) (Invalid_tag "unclosed verbatim tag")
do () done;
ignore (Input.next t.input); (* consume > *)
-
("!", Buffer.contents buf)
+
("", Buffer.contents buf)
| Some c when Input.is_whitespace c || Input.is_flow_indicator c ->
(* Non-specific tag: ! *)
("!", "")
···
let suffix = scan_tag_suffix t in
("!!", suffix)
| _ ->
-
(* Primary handle or just suffix *)
-
let first_part = scan_tag_suffix t in
-
if String.length first_part > 0 && first_part.[String.length first_part - 1] = '!' then
-
let suffix = scan_tag_suffix t in
-
(first_part, suffix)
-
else
-
("!", first_part)
+
(* Primary handle or just suffix: !foo or !e!foo *)
+
(* Read alphanumeric characters *)
+
let buf = Buffer.create 16 in
+
while
+
match Input.peek t.input with
+
| Some c when Input.is_alnum c || c = '-' ->
+
Buffer.add_char buf c;
+
ignore (Input.next t.input);
+
true
+
| _ -> false
+
do () done;
+
(* Check if next character is ! - if so, this is a named handle *)
+
(match Input.peek t.input with
+
| Some '!' ->
+
(* Named handle like !e! *)
+
ignore (Input.next t.input);
+
let handle_name = Buffer.contents buf in
+
let suffix = scan_tag_suffix t in
+
("!" ^ handle_name ^ "!", suffix)
+
| _ ->
+
(* Just ! followed by suffix *)
+
("!", Buffer.contents buf ^ scan_tag_suffix t))
in
let span = Span.make ~start ~stop:(Input.mark t.input) in
(handle, suffix, span)
···
scan_line ();
(* Check for line continuation *)
if not in_flow && Input.next_is_break t.input then begin
-
(* Save whitespace *)
-
Buffer.clear spaces;
-
Buffer.add_char spaces '\n';
+
(* Save the line break *)
+
if !leading_blanks then begin
+
(* We already had a break - this is an additional break (empty line) *)
+
Buffer.add_char spaces '\n'
+
end else begin
+
(* First line break *)
+
Buffer.clear spaces;
+
Buffer.add_char spaces '\n';
+
leading_blanks := true
+
end;
Input.consume_break t.input;
(* Line break in block context allows simple key *)
t.allow_simple_key <- true;
-
(* Skip leading blanks *)
+
(* Skip leading blanks on the next line *)
while Input.next_is_blank t.input do
-
Buffer.add_char spaces (Option.get (Input.next t.input))
+
ignore (Input.next t.input)
done;
let col = (Input.position t.input).column in
(* Check indentation - stop if we're at or before the containing block's indent *)
···
() (* Stop - dedented or at parent level *)
else if Input.at_document_boundary t.input then
() (* Stop - document boundary *)
-
else begin
-
leading_blanks := true;
+
else
scan_lines ()
-
end
end
in
···
let base_indent = current_indent t in
let content_indent = ref (
match !explicit_indent with
-
| Some n -> base_indent + n
+
| Some n ->
+
(* base_indent is a column (1-indexed), convert to indent level (0-indexed) *)
+
let base_level = max 0 (base_indent - 1) in
+
base_level + n
| None -> 0 (* Will be determined by first non-empty line *)
) in
let buf = Buffer.create 256 in
let trailing_breaks = Buffer.create 16 in
-
(* Read content *)
-
let rec read_lines () =
-
(* Skip empty lines, collecting breaks *)
-
while Input.next_is_break t.input ||
-
(Input.next_is_blank t.input &&
-
match Input.peek_nth t.input 1 with
-
| Some c when Input.is_break c -> true
-
| None -> true
-
| _ -> false)
-
do
-
if Input.next_is_blank t.input then begin
-
while Input.next_is_blank t.input do
-
ignore (Input.next t.input)
-
done
-
end;
+
(* Skip to content indentation, skipping empty lines *)
+
let rec skip_to_content_indent () =
+
if !content_indent > 0 then begin
+
(* Explicit indent - skip exactly content_indent spaces *)
+
let spaces_skipped = ref 0 in
+
while !spaces_skipped < !content_indent && Input.next_is (( = ) ' ') t.input do
+
incr spaces_skipped;
+
ignore (Input.next t.input)
+
done;
+
+
(* Check if this line is empty (only spaces/tabs until break/eof) *)
if Input.next_is_break t.input then begin
+
(* Empty line - record the break and continue *)
Buffer.add_char trailing_breaks '\n';
-
Input.consume_break t.input
+
Input.consume_break t.input;
+
skip_to_content_indent ()
+
end else if Input.next_is_blank t.input then begin
+
(* Line has spaces beyond content_indent - check if rest is only blanks *)
+
let idx = ref 0 in
+
let is_empty = ref false in
+
while not !is_empty do
+
match Input.peek_nth t.input !idx with
+
| Some c when Input.is_blank c -> incr idx
+
| Some c when Input.is_break c -> is_empty := true
+
| _ -> is_empty := true (* Not a break, so has content *)
+
done;
+
(* Check if we found a break (empty line) or content *)
+
(match Input.peek_nth t.input (!idx) with
+
| None | Some '\n' | Some '\r' ->
+
(* Empty line - consume all blanks and break *)
+
while Input.next_is_blank t.input do
+
ignore (Input.next t.input)
+
done;
+
Buffer.add_char trailing_breaks '\n';
+
Input.consume_break t.input;
+
skip_to_content_indent ()
+
| _ ->
+
(* Has content *)
+
())
end
-
done;
+
end else begin
+
(* Implicit indent - skip empty lines without consuming spaces *)
+
if Input.next_is_break t.input then begin
+
Buffer.add_char trailing_breaks '\n';
+
Input.consume_break t.input;
+
skip_to_content_indent ()
+
end else if Input.next_is_blank t.input then begin
+
(* Check if line is empty *)
+
let idx = ref 0 in
+
while match Input.peek_nth t.input !idx with
+
| Some c when Input.is_blank c -> incr idx; true
+
| _ -> false
+
do () done;
+
match Input.peek_nth t.input (!idx) with
+
| None | Some '\n' | Some '\r' ->
+
(* Empty line *)
+
while Input.next_is_blank t.input do
+
ignore (Input.next t.input)
+
done;
+
Buffer.add_char trailing_breaks '\n';
+
Input.consume_break t.input;
+
skip_to_content_indent ()
+
| _ ->
+
(* Has content - don't consume anything *)
+
()
+
end
+
end
+
in
+
+
(* Read content *)
+
let rec read_lines () =
+
skip_to_content_indent ();
(* Check if we're at content *)
if Input.is_eof t.input then ()
···
ignore (Input.next t.input)
done;
-
(* Determine content indent from first content line *)
+
(* For explicit indent, line_indent is extra beyond content_indent *)
+
if !content_indent > 0 then
+
line_indent := !content_indent + !line_indent;
+
+
(* Determine content indent from first content line (implicit case) *)
if !content_indent = 0 then begin
if !line_indent <= base_indent then begin
(* No content - restore position conceptually *)
+1 -1
yaml/ocaml-yamle/tests/cram/anchors.t
···
shared_value: 100
calculations:
base: 100
-
doubled: 200
+
doubled: 200
reference: 100
another_ref: 100
---
+12 -12
yaml/ocaml-yamle/tests/cram/comments.t
···
Test comments.yml file with various comment styles
$ yamlcat ../yaml/comments.yml
-
name: John Doe
-
age: 30
+
name: John Doe
+
age: 30
address:
-
street: 123 Main St
+
street: 123 Main St
city: Springfield
zip: 12345
items:
-
- apple
+
- apple
- banana
- cherry
-
- date
+
- date
flow_seq:
- 1
- 2
···
nested:
level1:
level2:
-
value: deeply nested
+
value: deeply nested
multi_comment_key: value
special: 'value with # hash inside quotes'
empty_value: null
···
Test comments.yml roundtrip with JSON to verify parsed values
$ yamlcat --json ../yaml/comments.yml
-
{"name": "John Doe ", "age": "30 ", "address": {"street": "123 Main St ", "city": "Springfield", "zip": 12345}, "items": ["apple ", "banana", "cherry", "date "], "flow_seq": [1, 2, 3], "flow_map": {"key1": "value1", "key2": "value2"}, "nested": {"level1": {"level2": {"value": "deeply nested "}}}, "multi_comment_key": "value", "special": "value with # hash inside quotes", "empty_value": null, "final_key": "final_value"}
+
{"name": "John Doe", "age": 30, "address": {"street": "123 Main St", "city": "Springfield", "zip": 12345}, "items": ["apple", "banana", "cherry", "date"], "flow_seq": [1, 2, 3], "flow_map": {"key1": "value1", "key2": "value2"}, "nested": {"level1": {"level2": {"value": "deeply nested"}}}, "multi_comment_key": "value", "special": "value with # hash inside quotes", "empty_value": null, "final_key": "final_value"}
Test full line comments are ignored
···
$ echo 'name: Bob # This is an end of line comment
> age: 35 # Another end of line comment' | yamlcat --json
-
{"name": "Bob ", "age": "35 "}
+
{"name": "Bob", "age": 35}
Test comments after sequence items
···
> - apple # First fruit
> - banana # Second fruit
> - cherry # Third fruit' | yamlcat --json
-
{"fruits": ["apple ", "banana ", "cherry "]}
+
{"fruits": ["apple", "banana", "cherry"]}
Test comments between sequence items
···
> inner:
> # Comment in deeper level
> key: value # End of line comment' | yamlcat --json
-
{"outer": {"inner": {"key": "value "}}}
+
{"outer": {"inner": {"key": "value"}}}
Test comments with special characters
···
> # Comment between sequence items
> - name: item2
> value: 20 # Another comment' | yamlcat --json
-
{"items": [{"name": "item1 ", "value": 10}, {"name": "item2", "value": "20 "}]}
+
{"items": [{"name": "item1", "value": 10}, {"name": "item2", "value": 20}]}
Test comment only lines between complex structures
···
Test whitespace preservation around comments
$ echo 'key1: value1 # Comment with spaces' | yamlcat --json
-
{"key1": "value1 "}
+
{"key1": "value1"}
Test comment after colon but before value
+1 -1
yaml/ocaml-yamle/tests/cram/multidoc.t
···
shared_value: 100
calculations:
base: 100
-
doubled: 200
+
doubled: 200
reference: 100
another_ref: 100
---
+2 -2
yaml/ocaml-yamle/tests/cram/yamlcat.t
···
Test seq.yml file (multiline plain scalar)
$ yamlcat ../yaml/seq.yml
-
- hello - whats - up
+
- hello - whats - up
- foo
- bar
Test seq.yml roundtrip preserves data
$ yamlcat --json ../yaml/seq.yml
-
["hello - whats - up", "foo", "bar"]
+
["hello - whats - up", "foo", "bar"]
Test cohttp.yml