My agentic slop goes here. Not intended for anyone else!

more

Changed files
+210 -23
yaml
ocaml-yamle
+4
yaml/ocaml-yamle/lib/error.ml
···
| Invalid_tag of string
| Invalid_anchor of string
| Invalid_alias of string
| Unclosed_single_quote
| Unclosed_double_quote
| Unclosed_flow_sequence
| Unclosed_flow_mapping
| Invalid_indentation of int * int (** expected, got *)
| Tab_in_indentation
| Invalid_block_scalar_header of string
| Invalid_directive of string
···
| Invalid_tag s -> Printf.sprintf "invalid tag: %s" s
| Invalid_anchor s -> Printf.sprintf "invalid anchor: %s" s
| Invalid_alias s -> Printf.sprintf "invalid alias: %s" s
| Unclosed_single_quote -> "unclosed single quote"
| Unclosed_double_quote -> "unclosed double quote"
| Unclosed_flow_sequence -> "unclosed flow sequence '['"
| Unclosed_flow_mapping -> "unclosed flow mapping '{'"
| Invalid_indentation (expected, got) ->
Printf.sprintf "invalid indentation: expected %d, got %d" expected got
| Tab_in_indentation -> "tab character in indentation"
| Invalid_block_scalar_header s ->
Printf.sprintf "invalid block scalar header: %s" s
···
| Invalid_tag of string
| Invalid_anchor of string
| Invalid_alias of string
+
| Invalid_comment
| Unclosed_single_quote
| Unclosed_double_quote
| Unclosed_flow_sequence
| Unclosed_flow_mapping
| Invalid_indentation of int * int (** expected, got *)
+
| Invalid_flow_indentation (** Content in flow collection must be indented *)
| Tab_in_indentation
| Invalid_block_scalar_header of string
| Invalid_directive of string
···
| Invalid_tag s -> Printf.sprintf "invalid tag: %s" s
| Invalid_anchor s -> Printf.sprintf "invalid anchor: %s" s
| Invalid_alias s -> Printf.sprintf "invalid alias: %s" s
+
| Invalid_comment -> "comments must be separated from other tokens by whitespace"
| Unclosed_single_quote -> "unclosed single quote"
| Unclosed_double_quote -> "unclosed double quote"
| Unclosed_flow_sequence -> "unclosed flow sequence '['"
| Unclosed_flow_mapping -> "unclosed flow mapping '{'"
| Invalid_indentation (expected, got) ->
Printf.sprintf "invalid indentation: expected %d, got %d" expected got
+
| Invalid_flow_indentation -> "invalid indentation in flow construct"
| Tab_in_indentation -> "tab character in indentation"
| Invalid_block_scalar_header s ->
Printf.sprintf "invalid block scalar header: %s" s
+5
yaml/ocaml-yamle/lib/input.ml
···
(** Mark current position for span creation *)
let mark t = t.position
···
(** Mark current position for span creation *)
let mark t = t.position
+
+
(** Get the character before the current position *)
+
let peek_back t =
+
if t.pos <= 0 then None
+
else Some t.source.[t.pos - 1]
+6
yaml/ocaml-yamle/lib/parser.ml
···
| Token.Flow_sequence_end ->
t.state <- Flow_sequence_entry;
empty_scalar_event ~anchor:None ~tag:None tok.span
| Token.Key ->
skip_token t;
push_state t Flow_sequence_entry_mapping_end;
···
skip_token t;
t.state <- pop_state t;
Event.Mapping_end, tok.span
| Token.Key ->
skip_token t;
if check t (function
···
| Token.Flow_sequence_end ->
t.state <- Flow_sequence_entry;
empty_scalar_event ~anchor:None ~tag:None tok.span
+
| Token.Flow_entry ->
+
(* Double comma or comma after comma - invalid *)
+
Error.raise_span tok.span (Unexpected_token "unexpected ',' in flow sequence")
| Token.Key ->
skip_token t;
push_state t Flow_sequence_entry_mapping_end;
···
skip_token t;
t.state <- pop_state t;
Event.Mapping_end, tok.span
+
| Token.Flow_entry ->
+
(* Double comma or comma after comma - invalid *)
+
Error.raise_span tok.span (Unexpected_token "unexpected ',' in flow mapping")
| Token.Key ->
skip_token t;
if check t (function
+141 -23
yaml/ocaml-yamle/lib/scanner.ml
···
mutable stream_ended : bool;
mutable indent_stack : indent list; (** Stack of indentation levels *)
mutable flow_level : int; (** Nesting depth in [] or {} *)
mutable simple_keys : simple_key option list; (** Per flow-level simple key tracking *)
mutable allow_simple_key : bool;
}
let create input =
···
stream_ended = false;
indent_stack = [];
flow_level = 0;
simple_keys = [None]; (* One entry for the base level *)
allow_simple_key = true;
}
let of_string s = create (Input.of_string s)
···
| [] -> 0
| { indent; _ } :: _ -> indent
-
(** Skip whitespace and comments, return true if at newline *)
-
let rec skip_to_next_token t =
-
(* Skip blanks *)
while Input.next_is_blank t.input do
ignore (Input.next t.input)
done;
-
(* Skip comment *)
if Input.next_is (( = ) '#') t.input then begin
while not (Input.is_eof t.input) && not (Input.next_is_break t.input) do
ignore (Input.next t.input)
done
-
end;
(* Skip line break in block context *)
if t.flow_level = 0 && Input.next_is_break t.input then begin
Input.consume_break t.input;
t.allow_simple_key <- true;
skip_to_next_token t
end
else if t.flow_level > 0 && Input.next_is_whitespace t.input then begin
···
while Input.next_is_blank t.input do
ignore (Input.next t.input)
done;
loop ()
| Some c ->
Buffer.add_char buf c;
···
end else
continue := false
done;
(* Per YAML spec: single break = space, break + empty lines = newlines *)
if !empty_lines > 0 then begin
(* Empty lines: output N newlines where N = number of empty lines *)
···
| Some c2 when in_flow && Input.is_flow_indicator c2 -> false
| _ -> true)
| '#' ->
-
(* # is OK if not preceded by whitespace (checked at call site) *)
-
false
| c when in_flow && Input.is_flow_indicator c -> false
| _ when Input.is_break c -> false
| _ -> true
···
let start = Input.mark t.input in
let in_flow = t.flow_level > 0 in
let indent = current_indent t in
let buf = Buffer.create 64 in
let spaces = Buffer.create 16 in
let leading_blanks = ref false in
···
match Input.peek t.input with
| None -> ()
| Some c when can_continue_plain t c ~in_flow ->
-
(* Check for # preceded by space *)
-
if c = '#' && Buffer.length buf > 0 then
-
() (* Stop - # after content *)
-
else begin
if Buffer.length spaces > 0 then begin
if !leading_blanks then begin
(* Fold line break *)
···
chomping := Chomping.Keep; ignore (Input.next t.input)
| _ -> ());
-
(* Skip to end of line *)
-
while Input.next_is_blank t.input do
-
ignore (Input.next t.input)
-
done;
-
-
(* Optional comment *)
-
if Input.next_is (( = ) '#') t.input then begin
-
while not (Input.is_eof t.input) && not (Input.next_is_break t.input) do
-
ignore (Input.next t.input)
-
done
-
end;
(* Consume line break *)
if Input.next_is_break t.input then
···
Note: we use col, not col-1, to allow entries at the same level. *)
unroll_indent t col;
if Input.is_eof t.input then
fetch_stream_end t
else if Input.at_document_boundary t.input then
···
Input.skip t.input 3;
let span = Span.make ~start ~stop:(Input.mark t.input) in
let token = if indicator = "---" then Token.Document_start else Token.Document_end in
emit t span token
and fetch_directive t =
unroll_indent t (-1);
remove_simple_key t;
t.allow_simple_key <- false;
···
and fetch_flow_collection_start t token_type =
save_simple_key t;
t.flow_level <- t.flow_level + 1;
t.allow_simple_key <- true;
t.simple_keys <- None :: t.simple_keys;
let start = Input.mark t.input in
ignore (Input.next t.input);
let span = Span.make ~start ~stop:(Input.mark t.input) in
···
end;
remove_simple_key t;
t.allow_simple_key <- true;
let start = Input.mark t.input in
ignore (Input.next t.input);
let span = Span.make ~start ~stop:(Input.mark t.input) in
emit t span Token.Block_entry
···
end;
remove_simple_key t;
t.allow_simple_key <- t.flow_level = 0;
let start = Input.mark t.input in
ignore (Input.next t.input);
let span = Span.make ~start ~stop:(Input.mark t.input) in
emit t span Token.Key
···
end
end);
remove_simple_key t;
-
t.allow_simple_key <- t.flow_level = 0;
let start = Input.mark t.input in
ignore (Input.next t.input);
let span = Span.make ~start ~stop:(Input.mark t.input) in
emit t span Token.Value
and fetch_alias t =
save_simple_key t;
t.allow_simple_key <- false;
let start = Input.mark t.input in
ignore (Input.next t.input); (* consume * *)
let name, span = scan_anchor_alias t in
···
and fetch_anchor t =
save_simple_key t;
t.allow_simple_key <- false;
let start = Input.mark t.input in
ignore (Input.next t.input); (* consume & *)
let name, span = scan_anchor_alias t in
···
and fetch_tag t =
save_simple_key t;
t.allow_simple_key <- false;
let handle, suffix, span = scan_tag t in
emit t span (Token.Tag { handle; suffix })
and fetch_block_scalar t literal =
remove_simple_key t;
t.allow_simple_key <- true;
let value, style, span = scan_block_scalar t literal in
emit t span (Token.Scalar { style; value })
and fetch_single_quoted t =
save_simple_key t;
t.allow_simple_key <- false;
let value, span = scan_single_quoted t in
emit t span (Token.Scalar { style = Scalar_style.Single_quoted; value })
and fetch_double_quoted t =
save_simple_key t;
t.allow_simple_key <- false;
let value, span = scan_double_quoted t in
emit t span (Token.Scalar { style = Scalar_style.Double_quoted; value })
···
and fetch_plain_scalar t =
save_simple_key t;
t.allow_simple_key <- false;
let value, span = scan_plain_scalar t in
emit t span (Token.Scalar { style = Scalar_style.Plain; value })
···
mutable stream_ended : bool;
mutable indent_stack : indent list; (** Stack of indentation levels *)
mutable flow_level : int; (** Nesting depth in [] or {} *)
+
mutable flow_indent : int; (** Column where outermost flow collection started *)
mutable simple_keys : simple_key option list; (** Per flow-level simple key tracking *)
mutable allow_simple_key : bool;
+
mutable leading_whitespace : bool; (** True when at start of line (only whitespace seen) *)
+
mutable document_has_content : bool; (** True if we've emitted content tokens in current document *)
}
let create input =
···
stream_ended = false;
indent_stack = [];
flow_level = 0;
+
flow_indent = 0;
simple_keys = [None]; (* One entry for the base level *)
allow_simple_key = true;
+
leading_whitespace = true; (* Start at beginning of stream *)
+
document_has_content = false;
}
let of_string s = create (Input.of_string s)
···
| [] -> 0
| { indent; _ } :: _ -> indent
+
(** Skip whitespace to end of line, checking for valid comments.
+
Returns true if any whitespace (including tabs) was found before a comment. *)
+
let skip_whitespace_and_comment t =
+
let has_whitespace = ref false in
+
(* Skip blanks (spaces and tabs) *)
while Input.next_is_blank t.input do
+
has_whitespace := true;
ignore (Input.next t.input)
done;
+
(* Check for comment *)
if Input.next_is (( = ) '#') t.input then begin
+
(* Validate: comment must be preceded by whitespace or be at start of line *)
+
if not !has_whitespace then begin
+
(* Check if we're at the start of input or after a line break *)
+
match Input.peek_back t.input with
+
| None -> () (* Start of input - OK *)
+
| Some c when Input.is_break c -> () (* After line break - OK *)
+
| _ ->
+
(* Comment not preceded by whitespace - ERROR *)
+
Error.raise_at (Input.mark t.input) Invalid_comment
+
end;
+
(* Skip to end of line *)
while not (Input.is_eof t.input) && not (Input.next_is_break t.input) do
ignore (Input.next t.input)
done
+
end
+
+
(** Skip blanks (spaces/tabs) and return whether tabs were found *)
+
let skip_blanks_check_tabs t =
+
let found_tab = ref false in
+
while Input.next_is_blank t.input do
+
if Input.peek t.input = Some '\t' then found_tab := true;
+
ignore (Input.next t.input)
+
done;
+
!found_tab
+
+
(** Skip whitespace and comments, return true if at newline *)
+
let rec skip_to_next_token t =
+
(* Check for tabs used as indentation in block context *)
+
(match Input.peek t.input with
+
| Some '\t' when t.flow_level = 0 && t.leading_whitespace &&
+
(column t - 1) <= current_indent t ->
+
(* Tab found in indentation zone - this is invalid *)
+
(* Skip to end of line to check if line has content *)
+
let start_pos = Input.mark t.input in
+
while Input.next_is_blank t.input do
+
ignore (Input.next t.input)
+
done;
+
(* If we have content on this line with a tab, raise error *)
+
if not (Input.next_is_break t.input) && not (Input.is_eof t.input) then
+
Error.raise_at start_pos Tab_in_indentation
+
| _ -> ());
+
+
(* Skip blanks and validate comments *)
+
skip_whitespace_and_comment t;
(* Skip line break in block context *)
if t.flow_level = 0 && Input.next_is_break t.input then begin
Input.consume_break t.input;
t.allow_simple_key <- true;
+
t.leading_whitespace <- true;
skip_to_next_token t
end
else if t.flow_level > 0 && Input.next_is_whitespace t.input then begin
···
while Input.next_is_blank t.input do
ignore (Input.next t.input)
done;
+
(* Check for document boundary - this terminates the quoted string *)
+
if Input.at_document_boundary t.input then
+
Error.raise_at start Unclosed_single_quote;
loop ()
| Some c ->
Buffer.add_char buf c;
···
end else
continue := false
done;
+
(* Check for document boundary - this terminates the quoted string *)
+
if Input.at_document_boundary t.input then
+
Error.raise_at start Unclosed_double_quote;
(* Per YAML spec: single break = space, break + empty lines = newlines *)
if !empty_lines > 0 then begin
(* Empty lines: output N newlines where N = number of empty lines *)
···
| Some c2 when in_flow && Input.is_flow_indicator c2 -> false
| _ -> true)
| '#' ->
+
(* # is a comment indicator only if preceded by whitespace *)
+
(* Check the previous character to determine if this is a comment *)
+
(match Input.peek_back t.input with
+
| None -> true (* At start - can't be comment indicator, allow it *)
+
| Some c when Input.is_whitespace c -> false (* Preceded by whitespace - comment *)
+
| Some c when Input.is_break c -> false (* At start of line - comment *)
+
| _ -> true) (* Not preceded by whitespace - part of scalar *)
| c when in_flow && Input.is_flow_indicator c -> false
| _ when Input.is_break c -> false
| _ -> true
···
let start = Input.mark t.input in
let in_flow = t.flow_level > 0 in
let indent = current_indent t in
+
(* Validate flow collection indentation *)
+
if in_flow && (column t) < t.flow_indent then
+
Error.raise_at start Invalid_flow_indentation;
let buf = Buffer.create 64 in
let spaces = Buffer.create 16 in
let leading_blanks = ref false in
···
match Input.peek t.input with
| None -> ()
| Some c when can_continue_plain t c ~in_flow ->
+
(* can_continue_plain already handles # correctly - it returns false
+
when # is preceded by whitespace (making it a comment indicator) *)
+
begin
if Buffer.length spaces > 0 then begin
if !leading_blanks then begin
(* Fold line break *)
···
chomping := Chomping.Keep; ignore (Input.next t.input)
| _ -> ());
+
(* Skip whitespace and optional comment *)
+
skip_whitespace_and_comment t;
(* Consume line break *)
if Input.next_is_break t.input then
···
Note: we use col, not col-1, to allow entries at the same level. *)
unroll_indent t col;
+
(* We're about to process actual content, not leading whitespace *)
+
t.leading_whitespace <- false;
+
if Input.is_eof t.input then
fetch_stream_end t
else if Input.at_document_boundary t.input then
···
Input.skip t.input 3;
let span = Span.make ~start ~stop:(Input.mark t.input) in
let token = if indicator = "---" then Token.Document_start else Token.Document_end in
+
(* Reset document content flag after document end marker *)
+
if indicator = "..." then
+
t.document_has_content <- false;
emit t span token
and fetch_directive t =
+
(* Directives can only appear:
+
1. At stream start (before any document content)
+
2. After a document end marker (...)
+
If we've emitted content in the current document, we need a document end marker first *)
+
if t.document_has_content then
+
Error.raise_at (Input.mark t.input)
+
(Unexpected_token "directives must be separated from document content by document end marker (...)");
unroll_indent t (-1);
remove_simple_key t;
t.allow_simple_key <- false;
···
and fetch_flow_collection_start t token_type =
save_simple_key t;
+
(* Record indent of outermost flow collection *)
+
if t.flow_level = 0 then
+
t.flow_indent <- column t;
t.flow_level <- t.flow_level + 1;
t.allow_simple_key <- true;
t.simple_keys <- None :: t.simple_keys;
+
t.document_has_content <- true;
let start = Input.mark t.input in
ignore (Input.next t.input);
let span = Span.make ~start ~stop:(Input.mark t.input) in
···
end;
remove_simple_key t;
t.allow_simple_key <- true;
+
t.document_has_content <- true;
let start = Input.mark t.input in
ignore (Input.next t.input);
+
+
(* Check for tabs after - : pattern like -\t- is invalid *)
+
let found_tabs = skip_blanks_check_tabs t in
+
if found_tabs then begin
+
(* If we found tabs and next char is - followed by whitespace, error *)
+
match Input.peek t.input with
+
| Some '-' ->
+
(match Input.peek_nth t.input 1 with
+
| None -> Error.raise_at start Tab_in_indentation
+
| Some c when Input.is_whitespace c ->
+
Error.raise_at start Tab_in_indentation
+
| Some _ -> ())
+
| _ -> ()
+
end;
+
let span = Span.make ~start ~stop:(Input.mark t.input) in
emit t span Token.Block_entry
···
end;
remove_simple_key t;
t.allow_simple_key <- t.flow_level = 0;
+
t.document_has_content <- true;
let start = Input.mark t.input in
ignore (Input.next t.input);
+
+
(* Check for tabs after ? : pattern like ?\t- or ?\tkey is invalid *)
+
let found_tabs = skip_blanks_check_tabs t in
+
if found_tabs && t.flow_level = 0 then begin
+
(* In block context, tabs after ? are not allowed *)
+
Error.raise_at start Tab_in_indentation
+
end;
+
let span = Span.make ~start ~stop:(Input.mark t.input) in
emit t span Token.Key
···
end
end);
remove_simple_key t;
+
(* In block context, allow_simple_key becomes true only after a line break,
+
not immediately after ':'. This prevents constructs like "key: - a".
+
The line break handling in skip_to_next_token will set it to true. *)
+
t.allow_simple_key <- false;
+
t.document_has_content <- true;
let start = Input.mark t.input in
ignore (Input.next t.input);
+
+
(* Check for tabs after : : pattern like :\t- is invalid in block context *)
+
let found_tabs = skip_blanks_check_tabs t in
+
if found_tabs && t.flow_level = 0 then begin
+
(* In block context, tabs after : followed by indicator are not allowed *)
+
match Input.peek t.input with
+
| Some ('-' | '?') ->
+
Error.raise_at start Tab_in_indentation
+
| _ -> ()
+
end;
+
let span = Span.make ~start ~stop:(Input.mark t.input) in
emit t span Token.Value
and fetch_alias t =
save_simple_key t;
t.allow_simple_key <- false;
+
t.document_has_content <- true;
let start = Input.mark t.input in
ignore (Input.next t.input); (* consume * *)
let name, span = scan_anchor_alias t in
···
and fetch_anchor t =
save_simple_key t;
t.allow_simple_key <- false;
+
t.document_has_content <- true;
let start = Input.mark t.input in
ignore (Input.next t.input); (* consume & *)
let name, span = scan_anchor_alias t in
···
and fetch_tag t =
save_simple_key t;
t.allow_simple_key <- false;
+
t.document_has_content <- true;
let handle, suffix, span = scan_tag t in
emit t span (Token.Tag { handle; suffix })
and fetch_block_scalar t literal =
remove_simple_key t;
t.allow_simple_key <- true;
+
t.document_has_content <- true;
let value, style, span = scan_block_scalar t literal in
emit t span (Token.Scalar { style; value })
and fetch_single_quoted t =
save_simple_key t;
t.allow_simple_key <- false;
+
t.document_has_content <- true;
let value, span = scan_single_quoted t in
emit t span (Token.Scalar { style = Scalar_style.Single_quoted; value })
and fetch_double_quoted t =
save_simple_key t;
t.allow_simple_key <- false;
+
t.document_has_content <- true;
let value, span = scan_double_quoted t in
emit t span (Token.Scalar { style = Scalar_style.Double_quoted; value })
···
and fetch_plain_scalar t =
save_simple_key t;
t.allow_simple_key <- false;
+
t.document_has_content <- true;
let value, span = scan_plain_scalar t in
emit t span (Token.Scalar { style = Scalar_style.Plain; value })
+54
yaml/ocaml-yamle/tests/dune
···
(modules test_analyze)
(libraries yamle test_suite_lib))
···
(modules test_analyze)
(libraries yamle test_suite_lib))
+
(executable
+
(name run_all_tests)
+
(modules run_all_tests)
+
(libraries yamle test_suite_lib))
+
+
(executable
+
(name analyze_failures)
+
(modules analyze_failures)
+
(libraries yamle test_suite_lib))
+
+
(executable
+
(name test_tabs)
+
(modules test_tabs)
+
(libraries yamle))
+
+
(executable
+
(name test_tabs_extended)
+
(modules test_tabs_extended)
+
(libraries yamle))
+
+
(executable
+
(name test_tabs_y79y)
+
(modules test_tabs_y79y)
+
(libraries yamle))
+
+
(executable
+
(name test_quick)
+
(modules test_quick)
+
(libraries yamle))
+
+
(executable
+
(name list_y79y)
+
(modules list_y79y)
+
(libraries test_suite_lib))
+
+
(executable
+
(name debug_y79y)
+
(modules debug_y79y)
+
(libraries yamle))
+
+
(executable
+
(name debug_y79y_events)
+
(modules debug_y79y_events)
+
(libraries yamle))
+
+
(executable
+
(name debug_y79y_array)
+
(modules debug_y79y_array)
+
(libraries yamle))
+
+
(executable
+
(name debug_seq)
+
(modules debug_seq)
+
(libraries yamle))