commit 0b255fb8f45c5b00424e5efab1ee4bf89141c735 · anil.recoil.org/slop

+20

yaml/ocaml-yamle/bin/dune

···

       6
       6
        
       (executable

     

       7
       7
        
        (name test_emit)

     

       8
       8
        
        (libraries yamle))

     

       9
       9
       +
       

     

       10
       10
       +
       (executable

     

       11
       11
       +
        (name test_anchor)

     

       12
       12
       +
        (libraries yamle))

     

       13
       13
       +
       

     

       14
       14
       +
       (executable

     

       15
       15
       +
        (name test_anchor_comprehensive)

     

       16
       16
       +
        (libraries yamle))

     

       17
       17
       +
       

     

       18
       18
       +
       (executable

     

       19
       19
       +
        (name test_anchor_boundaries)

     

       20
       20
       +
        (libraries yamle))

     

       21
       21
       +
       

     

       22
       22
       +
       (executable

     

       23
       23
       +
        (name test_alias)

     

       24
       24
       +
        (libraries yamle))

     

       25
       25
       +
       

     

       26
       26
       +
       (executable

     

       27
       27
       +
        (name test_anchor_fix_final)

     

       28
       28
       +
        (libraries yamle))

+8 -4

yaml/ocaml-yamle/lib/parser.ml

···

       101
       101
        
       

     

       102
       102
        
       (** Resolve a tag *)

     

       103
       103
        
       let resolve_tag t ~handle ~suffix =

     

       104
       104
       -
         match List.assoc_opt handle t.tag_directives with

     

       105
       105
       -
         | Some prefix -> prefix ^ suffix

     

       106
       106
       -
         | None when handle = "!" -> "!" ^ suffix

     

       107
       107
       -
         | None -> Error.raise (Invalid_tag (handle ^ suffix))

     

       104
       104
       +
         if handle = "" then

     

       105
       105
       +
           (* Verbatim tag - suffix is already the full URI *)

     

       106
       106
       +
           suffix

     

       107
       107
       +
         else

     

       108
       108
       +
           match List.assoc_opt handle t.tag_directives with

     

       109
       109
       +
           | Some prefix -> prefix ^ suffix

     

       110
       110
       +
           | None when handle = "!" -> "!" ^ suffix

     

       111
       111
       +
           | None -> Error.raise (Invalid_tag (handle ^ suffix))

     

       108
       112
        
       

     

       109
       113
        
       (** Process directives at document start *)

     

       110
       114
        
       let process_directives t =

+123 -37

yaml/ocaml-yamle/lib/scanner.ml

···

       154
       154
        
       let scan_anchor_alias t =

     

       155
       155
        
         let start = Input.mark t.input in

     

       156
       156
        
         let buf = Buffer.create 16 in

     

       157
       157
       +
         (* Per YAML 1.2 spec: anchor names can contain any character that is NOT:

     

       158
       158
       +
            - Whitespace (space, tab, line breaks)

     

       159
       159
       +
            - Flow indicators: []{}

     

       160
       160
       +
            - Comma (,)

     

       161
       161
       +
            This matches the saphyr implementation: is_yaml_non_space && !is_flow *)

     

       157
       162
        
         while

     

       158
       163
        
           match Input.peek t.input with

     

       159
       159
       -
           | Some c when Input.is_alnum c || c = '_' || c = '-' ->

     

       164
       164
       +
           | Some c when not (Input.is_whitespace c) &&

     

       165
       165
       +
                         not (Input.is_flow_indicator c) &&

     

       166
       166
       +
                         c <> '\x00' ->

     

       160
       167
        
               Buffer.add_char buf c;

     

       161
       168
        
               ignore (Input.next t.input);

     

       162
       169
        
               true

     
···

       215
       222
        
         let handle, suffix =

     

       216
       223
        
           match Input.peek t.input with

     

       217
       224
        
           | Some '<' ->

     

       218
       218
       -
               (* Verbatim tag: !<...> *)

     

       225
       225
       +
               (* Verbatim tag: !<...> - handle is empty, suffix is full URI *)

     

       219
       226
        
               ignore (Input.next t.input);

     

       220
       227
        
               let buf = Buffer.create 32 in

     

       221
       228
        
               while

     
···

       228
       235
        
                 | None -> Error.raise_at (Input.mark t.input) (Invalid_tag "unclosed verbatim tag")

     

       229
       236
        
               do () done;

     

       230
       237
        
               ignore (Input.next t.input); (* consume > *)

     

       231
       231
       -
               ("!", Buffer.contents buf)

     

       238
       238
       +
               ("", Buffer.contents buf)

     

       232
       239
        
           | Some c when Input.is_whitespace c || Input.is_flow_indicator c ->

     

       233
       240
        
               (* Non-specific tag: ! *)

     

       234
       241
        
               ("!", "")

     
···

       238
       245
        
               let suffix = scan_tag_suffix t in

     

       239
       246
        
               ("!!", suffix)

     

       240
       247
        
           | _ ->

     

       241
       241
       -
               (* Primary handle or just suffix *)

     

       242
       242
       -
               let first_part = scan_tag_suffix t in

     

       243
       243
       -
               if String.length first_part > 0 && first_part.[String.length first_part - 1] = '!' then

     

       244
       244
       -
                 let suffix = scan_tag_suffix t in

     

       245
       245
       -
                 (first_part, suffix)

     

       246
       246
       -
               else

     

       247
       247
       -
                 ("!", first_part)

     

       248
       248
       +
               (* Primary handle or just suffix: !foo or !e!foo *)

     

       249
       249
       +
               (* Read alphanumeric characters *)

     

       250
       250
       +
               let buf = Buffer.create 16 in

     

       251
       251
       +
               while

     

       252
       252
       +
                 match Input.peek t.input with

     

       253
       253
       +
                 | Some c when Input.is_alnum c || c = '-' ->

     

       254
       254
       +
                     Buffer.add_char buf c;

     

       255
       255
       +
                     ignore (Input.next t.input);

     

       256
       256
       +
                     true

     

       257
       257
       +
                 | _ -> false

     

       258
       258
       +
               do () done;

     

       259
       259
       +
               (* Check if next character is ! - if so, this is a named handle *)

     

       260
       260
       +
               (match Input.peek t.input with

     

       261
       261
       +
                | Some '!' ->

     

       262
       262
       +
                    (* Named handle like !e! *)

     

       263
       263
       +
                    ignore (Input.next t.input);

     

       264
       264
       +
                    let handle_name = Buffer.contents buf in

     

       265
       265
       +
                    let suffix = scan_tag_suffix t in

     

       266
       266
       +
                    ("!" ^ handle_name ^ "!", suffix)

     

       267
       267
       +
                | _ ->

     

       268
       268
       +
                    (* Just ! followed by suffix *)

     

       269
       269
       +
                    ("!", Buffer.contents buf ^ scan_tag_suffix t))

     

       248
       270
        
         in

     

       249
       271
        
         let span = Span.make ~start ~stop:(Input.mark t.input) in

     

       250
       272
        
         (handle, suffix, span)

     
···

       471
       493
        
           scan_line ();

     

       472
       494
        
           (* Check for line continuation *)

     

       473
       495
        
           if not in_flow && Input.next_is_break t.input then begin

     

       474
       474
       -
             (* Save whitespace *)

     

       475
       475
       -
             Buffer.clear spaces;

     

       476
       476
       -
             Buffer.add_char spaces '\n';

     

       496
       496
       +
             (* Save the line break *)

     

       497
       497
       +
             if !leading_blanks then begin

     

       498
       498
       +
               (* We already had a break - this is an additional break (empty line) *)

     

       499
       499
       +
               Buffer.add_char spaces '\n'

     

       500
       500
       +
             end else begin

     

       501
       501
       +
               (* First line break *)

     

       502
       502
       +
               Buffer.clear spaces;

     

       503
       503
       +
               Buffer.add_char spaces '\n';

     

       504
       504
       +
               leading_blanks := true

     

       505
       505
       +
             end;

     

       477
       506
        
             Input.consume_break t.input;

     

       478
       507
        
             (* Line break in block context allows simple key *)

     

       479
       508
        
             t.allow_simple_key <- true;

     

       480
       480
       -
             (* Skip leading blanks *)

     

       509
       509
       +
             (* Skip leading blanks on the next line *)

     

       481
       510
        
             while Input.next_is_blank t.input do

     

       482
       482
       -
               Buffer.add_char spaces (Option.get (Input.next t.input))

     

       511
       511
       +
               ignore (Input.next t.input)

     

       483
       512
        
             done;

     

       484
       513
        
             let col = (Input.position t.input).column in

     

       485
       514
        
             (* Check indentation - stop if we're at or before the containing block's indent *)

     
···

       487
       516
        
               () (* Stop - dedented or at parent level *)

     

       488
       517
        
             else if Input.at_document_boundary t.input then

     

       489
       518
        
               () (* Stop - document boundary *)

     

       490
       490
       -
             else begin

     

       491
       491
       -
               leading_blanks := true;

     

       519
       519
       +
             else

     

       492
       520
        
               scan_lines ()

     

       493
       493
       -
             end

     

       494
       521
        
           end

     

       495
       522
        
         in

     

       496
       523
        
       

     
···

       562
       589
        
         let base_indent = current_indent t in

     

       563
       590
        
         let content_indent = ref (

     

       564
       591
        
           match !explicit_indent with

     

       565
       565
       -
           | Some n -> base_indent + n

     

       592
       592
       +
           | Some n ->

     

       593
       593
       +
               (* base_indent is a column (1-indexed), convert to indent level (0-indexed) *)

     

       594
       594
       +
               let base_level = max 0 (base_indent - 1) in

     

       595
       595
       +
               base_level + n

     

       566
       596
        
           | None -> 0  (* Will be determined by first non-empty line *)

     

       567
       597
        
         ) in

     

       568
       598
        
       

     

       569
       599
        
         let buf = Buffer.create 256 in

     

       570
       600
        
         let trailing_breaks = Buffer.create 16 in

     

       571
       601
        
       

     

       572
       572
       -
         (* Read content *)

     

       573
       573
       -
         let rec read_lines () =

     

       574
       574
       -
           (* Skip empty lines, collecting breaks *)

     

       575
       575
       -
           while Input.next_is_break t.input ||

     

       576
       576
       -
                 (Input.next_is_blank t.input &&

     

       577
       577
       -
                  match Input.peek_nth t.input 1 with

     

       578
       578
       -
                  | Some c when Input.is_break c -> true

     

       579
       579
       -
                  | None -> true

     

       580
       580
       -
                  | _ -> false)

     

       581
       581
       -
           do

     

       582
       582
       -
             if Input.next_is_blank t.input then begin

     

       583
       583
       -
               while Input.next_is_blank t.input do

     

       584
       584
       -
                 ignore (Input.next t.input)

     

       585
       585
       -
               done

     

       586
       586
       -
             end;

     

       602
       602
       +
         (* Skip to content indentation, skipping empty lines *)

     

       603
       603
       +
         let rec skip_to_content_indent () =

     

       604
       604
       +
           if !content_indent > 0 then begin

     

       605
       605
       +
             (* Explicit indent - skip exactly content_indent spaces *)

     

       606
       606
       +
             let spaces_skipped = ref 0 in

     

       607
       607
       +
             while !spaces_skipped < !content_indent && Input.next_is (( = ) ' ') t.input do

     

       608
       608
       +
               incr spaces_skipped;

     

       609
       609
       +
               ignore (Input.next t.input)

     

       610
       610
       +
             done;

     

       611
       611
       +
       

     

       612
       612
       +
             (* Check if this line is empty (only spaces/tabs until break/eof) *)

     

       587
       613
        
             if Input.next_is_break t.input then begin

     

       614
       614
       +
               (* Empty line - record the break and continue *)

     

       588
       615
        
               Buffer.add_char trailing_breaks '\n';

     

       589
       589
       -
               Input.consume_break t.input

     

       616
       616
       +
               Input.consume_break t.input;

     

       617
       617
       +
               skip_to_content_indent ()

     

       618
       618
       +
             end else if Input.next_is_blank t.input then begin

     

       619
       619
       +
               (* Line has spaces beyond content_indent - check if rest is only blanks *)

     

       620
       620
       +
               let idx = ref 0 in

     

       621
       621
       +
               let is_empty = ref false in

     

       622
       622
       +
               while not !is_empty do

     

       623
       623
       +
                 match Input.peek_nth t.input !idx with

     

       624
       624
       +
                 | Some c when Input.is_blank c -> incr idx

     

       625
       625
       +
                 | Some c when Input.is_break c -> is_empty := true

     

       626
       626
       +
                 | _ -> is_empty := true  (* Not a break, so has content *)

     

       627
       627
       +
               done;

     

       628
       628
       +
               (* Check if we found a break (empty line) or content *)

     

       629
       629
       +
               (match Input.peek_nth t.input (!idx) with

     

       630
       630
       +
                | None | Some '\n' | Some '\r' ->

     

       631
       631
       +
                    (* Empty line - consume all blanks and break *)

     

       632
       632
       +
                    while Input.next_is_blank t.input do

     

       633
       633
       +
                      ignore (Input.next t.input)

     

       634
       634
       +
                    done;

     

       635
       635
       +
                    Buffer.add_char trailing_breaks '\n';

     

       636
       636
       +
                    Input.consume_break t.input;

     

       637
       637
       +
                    skip_to_content_indent ()

     

       638
       638
       +
                | _ ->

     

       639
       639
       +
                    (* Has content *)

     

       640
       640
       +
                    ())

     

       590
       641
        
             end

     

       591
       591
       -
           done;

     

       642
       642
       +
           end else begin

     

       643
       643
       +
             (* Implicit indent - skip empty lines without consuming spaces *)

     

       644
       644
       +
             if Input.next_is_break t.input then begin

     

       645
       645
       +
               Buffer.add_char trailing_breaks '\n';

     

       646
       646
       +
               Input.consume_break t.input;

     

       647
       647
       +
               skip_to_content_indent ()

     

       648
       648
       +
             end else if Input.next_is_blank t.input then begin

     

       649
       649
       +
               (* Check if line is empty *)

     

       650
       650
       +
               let idx = ref 0 in

     

       651
       651
       +
               while match Input.peek_nth t.input !idx with

     

       652
       652
       +
                     | Some c when Input.is_blank c -> incr idx; true

     

       653
       653
       +
                     | _ -> false

     

       654
       654
       +
               do () done;

     

       655
       655
       +
               match Input.peek_nth t.input (!idx) with

     

       656
       656
       +
               | None | Some '\n' | Some '\r' ->

     

       657
       657
       +
                   (* Empty line *)

     

       658
       658
       +
                   while Input.next_is_blank t.input do

     

       659
       659
       +
                     ignore (Input.next t.input)

     

       660
       660
       +
                   done;

     

       661
       661
       +
                   Buffer.add_char trailing_breaks '\n';

     

       662
       662
       +
                   Input.consume_break t.input;

     

       663
       663
       +
                   skip_to_content_indent ()

     

       664
       664
       +
               | _ ->

     

       665
       665
       +
                   (* Has content - don't consume anything *)

     

       666
       666
       +
                   ()

     

       667
       667
       +
             end

     

       668
       668
       +
           end

     

       669
       669
       +
         in

     

       670
       670
       +
       

     

       671
       671
       +
         (* Read content *)

     

       672
       672
       +
         let rec read_lines () =

     

       673
       673
       +
           skip_to_content_indent ();

     

       592
       674
        
       

     

       593
       675
        
           (* Check if we're at content *)

     

       594
       676
        
           if Input.is_eof t.input then ()

     
···

       601
       683
        
               ignore (Input.next t.input)

     

       602
       684
        
             done;

     

       603
       685
        
       

     

       604
       604
       -
             (* Determine content indent from first content line *)

     

       686
       686
       +
             (* For explicit indent, line_indent is extra beyond content_indent *)

     

       687
       687
       +
             if !content_indent > 0 then

     

       688
       688
       +
               line_indent := !content_indent + !line_indent;

     

       689
       689
       +
       

     

       690
       690
       +
             (* Determine content indent from first content line (implicit case) *)

     

       605
       691
        
             if !content_indent = 0 then begin

     

       606
       692
        
               if !line_indent <= base_indent then begin

     

       607
       693
        
                 (* No content - restore position conceptually *)

+1 -1

yaml/ocaml-yamle/tests/cram/anchors.t

···

       145
       145
        
         shared_value: 100

     

       146
       146
        
         calculations:

     

       147
       147
        
           base: 100

     

       148
       148
       -
           doubled: 200  

     

       148
       148
       +
           doubled: 200

     

       149
       149
        
           reference: 100

     

       150
       150
        
           another_ref: 100

     

       151
       151
        
         ---

+12 -12

yaml/ocaml-yamle/tests/cram/comments.t

···

       1
       1
        
       Test comments.yml file with various comment styles

     

       2
       2
        
       

     

       3
       3
        
         $ yamlcat ../yaml/comments.yml

     

       4
       4
       -
         name: John Doe  

     

       5
       5
       -
         age: 30  

     

       4
       4
       +
         name: John Doe

     

       5
       5
       +
         age: 30

     

       6
       6
        
         address:

     

       7
       7
       -
           street: 123 Main St  

     

       7
       7
       +
           street: 123 Main St

     

       8
       8
        
           city: Springfield

     

       9
       9
        
           zip: 12345

     

       10
       10
        
         items:

     

       11
       11
       -
           - apple  

     

       11
       11
       +
           - apple

     

       12
       12
        
           - banana

     

       13
       13
        
           - cherry

     

       14
       14
       -
           - date  

     

       14
       14
       +
           - date

     

       15
       15
        
         flow_seq:

     

       16
       16
        
           - 1

     

       17
       17
        
           - 2

     
···

       22
       22
        
         nested:

     

       23
       23
        
           level1:

     

       24
       24
        
             level2:

     

       25
       25
       -
               value: deeply nested  

     

       25
       25
       +
               value: deeply nested

     

       26
       26
        
         multi_comment_key: value

     

       27
       27
        
         special: 'value with # hash inside quotes'

     

       28
       28
        
         empty_value: null

     
···

       31
       31
        
       Test comments.yml roundtrip with JSON to verify parsed values

     

       32
       32
        
       

     

       33
       33
        
         $ yamlcat --json ../yaml/comments.yml

     

       34
       34
       -
         {"name": "John Doe  ", "age": "30  ", "address": {"street": "123 Main St  ", "city": "Springfield", "zip": 12345}, "items": ["apple  ", "banana", "cherry", "date  "], "flow_seq": [1, 2, 3], "flow_map": {"key1": "value1", "key2": "value2"}, "nested": {"level1": {"level2": {"value": "deeply nested  "}}}, "multi_comment_key": "value", "special": "value with # hash inside quotes", "empty_value": null, "final_key": "final_value"}

     

       34
       34
       +
         {"name": "John Doe", "age": 30, "address": {"street": "123 Main St", "city": "Springfield", "zip": 12345}, "items": ["apple", "banana", "cherry", "date"], "flow_seq": [1, 2, 3], "flow_map": {"key1": "value1", "key2": "value2"}, "nested": {"level1": {"level2": {"value": "deeply nested"}}}, "multi_comment_key": "value", "special": "value with # hash inside quotes", "empty_value": null, "final_key": "final_value"}

     

       35
       35
        
       

     

       36
       36
        
       Test full line comments are ignored

     

       37
       37
        
       

     
···

       45
       45
        
       

     

       46
       46
        
         $ echo 'name: Bob  # This is an end of line comment

     

       47
       47
        
         > age: 35  # Another end of line comment' | yamlcat --json

     

       48
       48
       -
         {"name": "Bob  ", "age": "35  "}

     

       48
       48
       +
         {"name": "Bob", "age": 35}

     

       49
       49
        
       

     

       50
       50
        
       Test comments after sequence items

     

       51
       51
        
       

     
···

       53
       53
        
         >   - apple  # First fruit

     

       54
       54
        
         >   - banana  # Second fruit

     

       55
       55
        
         >   - cherry  # Third fruit' | yamlcat --json

     

       56
       56
       -
         {"fruits": ["apple  ", "banana  ", "cherry  "]}

     

       56
       56
       +
         {"fruits": ["apple", "banana", "cherry"]}

     

       57
       57
        
       

     

       58
       58
        
       Test comments between sequence items

     

       59
       59
        
       

     
···

       99
       99
        
         >   inner:

     

       100
       100
        
         >     # Comment in deeper level

     

       101
       101
        
         >     key: value  # End of line comment' | yamlcat --json

     

       102
       102
       -
         {"outer": {"inner": {"key": "value  "}}}

     

       102
       102
       +
         {"outer": {"inner": {"key": "value"}}}

     

       103
       103
        
       

     

       104
       104
        
       Test comments with special characters

     

       105
       105
        
       

     
···

       158
       158
        
         >   # Comment between sequence items

     

       159
       159
        
         >   - name: item2

     

       160
       160
        
         >     value: 20  # Another comment' | yamlcat --json

     

       161
       161
       -
         {"items": [{"name": "item1  ", "value": 10}, {"name": "item2", "value": "20  "}]}

     

       161
       161
       +
         {"items": [{"name": "item1", "value": 10}, {"name": "item2", "value": 20}]}

     

       162
       162
        
       

     

       163
       163
        
       Test comment only lines between complex structures

     

       164
       164
        
       

     
···

       188
       188
        
       Test whitespace preservation around comments

     

       189
       189
        
       

     

       190
       190
        
         $ echo 'key1:    value1   # Comment with spaces' | yamlcat --json

     

       191
       191
       -
         {"key1": "value1   "}

     

       191
       191
       +
         {"key1": "value1"}

     

       192
       192
        
       

     

       193
       193
        
       Test comment after colon but before value

     

       194
       194

+1 -1

yaml/ocaml-yamle/tests/cram/multidoc.t

···

       180
       180
        
         shared_value: 100

     

       181
       181
        
         calculations:

     

       182
       182
        
           base: 100

     

       183
       183
       -
           doubled: 200  

     

       183
       183
       +
           doubled: 200

     

       184
       184
        
           reference: 100

     

       185
       185
        
           another_ref: 100

     

       186
       186
        
         ---

+2 -2

yaml/ocaml-yamle/tests/cram/yamlcat.t

···

       56
       56
        
       Test seq.yml file (multiline plain scalar)

     

       57
       57
        
       

     

       58
       58
        
         $ yamlcat ../yaml/seq.yml

     

       59
       59
       -
         - hello  - whats  - up

     

       59
       59
       +
         - hello - whats - up

     

       60
       60
        
         - foo

     

       61
       61
        
         - bar

     

       62
       62
        
       

     

       63
       63
        
       Test seq.yml roundtrip preserves data

     

       64
       64
        
       

     

       65
       65
        
         $ yamlcat --json ../yaml/seq.yml

     

       66
       66
       -
         ["hello  - whats  - up", "foo", "bar"]

     

       66
       66
       +
         ["hello - whats - up", "foo", "bar"]

     

       67
       67
        
       

     

       68
       68
        
       Test cohttp.yml

     

       69
       69