My agentic slop goes here. Not intended for anyone else!

more

Changed files
+125 -15
yaml
ocaml-yamle
+3
yaml/ocaml-yamle/lib/error.ml
···
| Invalid_flow_indentation (** Content in flow collection must be indented *)
| Tab_in_indentation
| Invalid_block_scalar_header of string
+
| Invalid_quoted_scalar_indentation of string
| Invalid_directive of string
| Invalid_yaml_version of string
| Invalid_tag_directive of string
···
| Tab_in_indentation -> "tab character in indentation"
| Invalid_block_scalar_header s ->
Printf.sprintf "invalid block scalar header: %s" s
+
| Invalid_quoted_scalar_indentation s ->
+
Printf.sprintf "%s" s
| Invalid_directive s -> Printf.sprintf "invalid directive: %s" s
| Invalid_yaml_version s -> Printf.sprintf "invalid YAML version: %s" s
| Invalid_tag_directive s -> Printf.sprintf "invalid TAG directive: %s" s
+39 -3
yaml/ocaml-yamle/lib/parser.ml
···
| Implicit_document_start
| Document_start
| Document_content
+
| Document_content_done (* After parsing a node, check for unexpected content *)
| Document_end
| Block_node
| Block_node_or_indentless_sequence
···
mutable tag_directives : (string * string) list;
mutable current_token : Token.spanned option;
mutable finished : bool;
+
mutable explicit_doc_end : bool; (** True if last doc ended with explicit ... *)
+
mutable stream_start : bool; (** True if we haven't emitted any documents yet *)
}
let create scanner = {
···
];
current_token = None;
finished = false;
+
explicit_doc_end = false;
+
stream_start = true;
}
let of_string s = create (Scanner.of_string s)
···
| None -> Span.point Position.initial
in
+
(* After first document, stream_start is false *)
+
t.stream_start <- false;
push_state t Document_end;
t.state <- Document_content;
Event.Document_start { version = t.version; implicit }, span
···
if not implicit then skip_token t;
+
(* Track if this document ended explicitly with ... *)
+
t.explicit_doc_end <- not implicit;
t.state <- Implicit_document_start;
Event.Document_end { implicit }, span
···
empty_scalar_event ~anchor:None ~tag:None tok.span
(** Main state machine dispatcher *)
-
let parse t =
+
let rec parse t =
match t.state with
| Stream_start ->
parse_stream_start t
···
| Implicit_document_start ->
(* Skip any document end markers before checking what's next *)
while check t (function Token.Document_end -> true | _ -> false) do
+
t.explicit_doc_end <- true; (* Seeing ... counts as explicit end *)
skip_token t
done;
···
t.state <- End;
t.finished <- true;
Event.Stream_end, tok.span
-
| Token.Version_directive _ | Token.Tag_directive _ | Token.Document_start ->
+
| Token.Version_directive _ | Token.Tag_directive _ ->
+
(* Directives are only allowed at stream start or after explicit ... (MUS6/01) *)
+
if not t.stream_start && not t.explicit_doc_end then
+
Error.raise_span tok.span (Invalid_directive "directives require explicit document end '...' before them");
+
parse_document_start t ~implicit:false
+
| Token.Document_start ->
parse_document_start t ~implicit:false
(* These tokens are invalid at document start - they indicate leftover junk *)
| Token.Flow_sequence_end | Token.Flow_mapping_end | Token.Flow_entry
···
let tok = current_token t in
t.state <- pop_state t;
empty_scalar_event ~anchor:None ~tag:None tok.span
-
end else
+
end else begin
+
(* Push Document_content_done so we return there after parsing the node.
+
This allows us to check for unexpected content after the node. *)
+
push_state t Document_content_done;
parse_node t ~block:true ~indentless:false
+
end
+
+
| Document_content_done ->
+
(* After parsing a node in document content, check for unexpected content *)
+
if check t (function
+
| Token.Version_directive _ | Token.Tag_directive _
+
| Token.Document_start | Token.Document_end | Token.Stream_end -> true
+
| _ -> false)
+
then begin
+
(* Valid document boundary - continue to Document_end *)
+
t.state <- pop_state t;
+
parse t (* Continue to emit the next event *)
+
end else begin
+
(* Unexpected content after document value - this is an error (KS4U, BS4K) *)
+
let tok = current_token t in
+
Error.raise_span tok.span
+
(Unexpected_token "content not allowed after document value")
+
end
| Document_end ->
parse_document_end t
+75 -12
yaml/ocaml-yamle/lib/scanner.ml
···
done
end
-
(** Skip blanks (spaces/tabs) and return whether tabs were found *)
+
(** Skip blanks (spaces/tabs) and return (found_tabs, found_spaces) *)
let skip_blanks_check_tabs t =
let found_tab = ref false in
+
let found_space = ref false in
while Input.next_is_blank t.input do
-
if Input.peek t.input = Some '\t' then found_tab := true;
+
(match Input.peek t.input with
+
| Some '\t' -> found_tab := true
+
| Some ' ' -> found_space := true
+
| _ -> ());
ignore (Input.next t.input)
done;
-
!found_tab
+
(!found_tab, !found_space)
(** Skip whitespace and comments, return true if at newline *)
let rec skip_to_next_token t =
···
Input.consume_break t.input;
(* Allow simple keys after line breaks in flow context *)
t.allow_simple_key <- true;
+
(* After line break in flow, check for tabs at start of line (Y79Y/03)
+
Tabs are not allowed as indentation - if tab is first char and results
+
in a column less than flow_indent, it's an error *)
+
if Input.next_is (( = ) '\t') t.input then begin
+
(* Tab at start of line in flow context - skip tabs and check position *)
+
let start_mark = Input.mark t.input in
+
while Input.next_is (( = ) '\t') t.input do
+
ignore (Input.next t.input)
+
done;
+
(* If only tabs were used (no spaces) and column < flow_indent, error *)
+
if not (Input.next_is_break t.input) && not (Input.is_eof t.input) &&
+
column t < t.flow_indent then
+
Error.raise_at start_mark Invalid_flow_indentation
+
end;
skip_to_next_token t
end else begin
ignore (Input.next t.input);
···
(* Check for document boundary *)
if Input.at_document_boundary t.input then
Error.raise_at start Unclosed_single_quote;
+
(* Check indentation: continuation must be > block indent (QB6E, DK95) *)
+
let col = column t in
+
let indent = current_indent t in
+
if not (Input.is_eof t.input) && not (Input.next_is_break t.input) && col <= indent && indent >= 0 then
+
Error.raise_at (Input.mark t.input) (Invalid_quoted_scalar_indentation "invalid indentation in quoted scalar");
(* Count empty lines (consecutive line breaks) *)
let empty_lines = ref 0 in
while Input.next_is_break t.input do
···
ignore (Input.next t.input)
done;
if Input.at_document_boundary t.input then
-
Error.raise_at start Unclosed_single_quote
+
Error.raise_at start Unclosed_single_quote;
+
(* Check indentation after each empty line too *)
+
let col = column t in
+
let indent = current_indent t in
+
if not (Input.is_eof t.input) && not (Input.next_is_break t.input) && col <= indent && indent >= 0 then
+
Error.raise_at (Input.mark t.input) (Invalid_quoted_scalar_indentation "invalid indentation in quoted scalar")
done;
(* Apply folding rules *)
if !empty_lines > 0 then begin
···
(* Count consecutive line breaks (empty lines) *)
let empty_lines = ref 0 in
let continue = ref true in
+
let started_with_tab = ref false in
while !continue do
+
(* Track if we start with a tab (for DK95/01 check) *)
+
if Input.next_is (( = ) '\t') t.input then started_with_tab := true;
(* Skip blanks (spaces/tabs) on the line *)
while Input.next_is_blank t.input do
ignore (Input.next t.input)
···
(* Check if we hit another line break (empty line) *)
if Input.next_is_break t.input then begin
Input.consume_break t.input;
-
incr empty_lines
+
incr empty_lines;
+
started_with_tab := false (* Reset for next line *)
end else
continue := false
done;
(* Check for document boundary - this terminates the quoted string *)
if Input.at_document_boundary t.input then
Error.raise_at start Unclosed_double_quote;
+
(* Check indentation: continuation must be > block indent (QB6E, DK95)
+
Note: must be strictly greater than block indent, not just equal *)
+
let col = column t in
+
let indent = current_indent t in
+
let start_col = start.column in
+
(* DK95/01: if continuation started with tabs and column < start column, error *)
+
if not (Input.is_eof t.input) && !started_with_tab && col < start_col then
+
Error.raise_at (Input.mark t.input) (Invalid_quoted_scalar_indentation "invalid indentation in quoted scalar");
+
if not (Input.is_eof t.input) && col <= indent && indent >= 0 then
+
Error.raise_at (Input.mark t.input) (Invalid_quoted_scalar_indentation "invalid indentation in quoted scalar");
(* Per YAML spec: single break = space, break + empty lines = newlines *)
if !empty_lines > 0 then begin
(* Empty lines: output N newlines where N = number of empty lines *)
···
let buf = Buffer.create 256 in
let trailing_breaks = Buffer.create 16 in
let leading_blank = ref false in (* Was the previous line "more indented"? *)
+
let max_empty_line_indent = ref 0 in (* Track max indent of empty lines before first content *)
(* Skip to content indentation, skipping empty lines.
Returns the number of spaces actually skipped (important for detecting dedentation). *)
···
match Input.peek_nth t.input (!idx) with
| None | Some '\n' | Some '\r' ->
(* Line has only spaces - empty line *)
+
(* Track max indent of empty lines for later validation *)
+
if !idx > !max_empty_line_indent then
+
max_empty_line_indent := !idx;
while Input.next_is (( = ) ' ') t.input do
ignore (Input.next t.input)
done;
···
| _ ->
(* Has content (including tabs which are content, not indentation) *)
0
+
end else if Input.next_is (( = ) '\t') t.input then begin
+
(* Tab at start of line in implicit indent mode - this is an error (Y79Y)
+
because tabs cannot be used as indentation in YAML *)
+
Error.raise_at (Input.mark t.input) Tab_in_indentation
end else
-
(* Not at break or space - could be tab (content) or other *)
+
(* Not at break or space - other content character *)
0
end
in
···
if line_indent <= base_level then
false (* No content - first line not indented enough *)
else begin
+
(* Validate: first content line must be indented at least as much as
+
the maximum indent seen on empty lines before it (5LLU, S98Z, W9L4) *)
+
if line_indent < !max_empty_line_indent && line_indent > base_level then
+
Error.raise_at (Input.mark t.input)
+
(Invalid_block_scalar_header "wrongly indented line in block scalar");
content_indent := line_indent;
true
end
···
while Input.next_is_digit t.input do
minor := !minor * 10 + (Char.code (Input.next_exn t.input) - Char.code '0')
done;
+
(* Validate: only whitespace and comments allowed before line break (MUS6) *)
+
skip_whitespace_and_comment t;
+
if not (Input.next_is_break t.input) && not (Input.is_eof t.input) then
+
Error.raise_at (Input.mark t.input) (Invalid_directive "expected comment or line break after version");
let span = Span.make ~start ~stop:(Input.mark t.input) in
Token.Version_directive { major = !major; minor = !minor }, span
···
ignore (Input.next t.input);
(* Check for tabs after - : pattern like -\t- is invalid *)
-
let found_tabs = skip_blanks_check_tabs t in
+
let (found_tabs, _found_spaces) = skip_blanks_check_tabs t in
if found_tabs then begin
(* If we found tabs and next char is - followed by whitespace, error *)
match Input.peek t.input with
···
ignore (Input.next t.input);
(* Check for tabs after ? : pattern like ?\t- or ?\tkey is invalid *)
-
let found_tabs = skip_blanks_check_tabs t in
+
let (found_tabs, _found_spaces) = skip_blanks_check_tabs t in
if found_tabs && t.flow_level = 0 then begin
(* In block context, tabs after ? are not allowed *)
Error.raise_at start Tab_in_indentation
···
let start = Input.mark t.input in
ignore (Input.next t.input);
-
(* Check for tabs after : : pattern like :\t- is invalid in block context *)
-
let found_tabs = skip_blanks_check_tabs t in
-
if found_tabs && t.flow_level = 0 then begin
-
(* In block context, tabs after : followed by indicator are not allowed *)
+
(* Check for tabs after : : patterns like :\t- or :\tkey: are invalid in block context (Y79Y/09)
+
However, :\t bar (tab followed by space then content) is valid (6BCT) *)
+
let (found_tabs, found_spaces) = skip_blanks_check_tabs t in
+
if found_tabs && not found_spaces && t.flow_level = 0 then begin
+
(* In block context, tabs-only after : followed by indicator or alphanumeric are not allowed *)
match Input.peek t.input with
| Some ('-' | '?') ->
+
Error.raise_at start Tab_in_indentation
+
| Some c when (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') ->
+
(* Tab-only followed by alphanumeric - likely a key, which is invalid *)
Error.raise_at start Tab_in_indentation
| _ -> ()
end;
+8
yaml/ocaml-yamle/tests/dune
···
(executable (name count_tests) (modules count_tests) (libraries test_suite_lib))
(executable (name test_bs4k) (modules test_bs4k) (libraries yamle))
(executable (name test_ks4u) (modules test_ks4u) (libraries yamle))
+
(executable (name test_5llu_quick) (modules test_5llu_quick) (libraries yamle))
+
(executable (name test_mus6_debug) (modules test_mus6_debug) (libraries yamle))
+
(executable (name debug_mus6_load) (modules debug_mus6_load) (libraries yamle test_suite_lib))
+
(executable (name debug_mus6_01) (modules debug_mus6_01) (libraries yamle))
+
(executable (name debug_dk95_01) (modules debug_dk95_01) (libraries yamle))
+
(executable (name debug_y79y_first) (modules debug_y79y_first) (libraries yamle))
+
(executable (name debug_y79y_03) (modules debug_y79y_03) (libraries yamle))
+
(executable (name debug_y79y_09) (modules debug_y79y_09) (libraries yamle))