GPS Exchange Format library/CLI in OCaml
at main 21 kB view raw
1(** GPX streaming parser using xmlm *) 2 3(** Parser state for streaming *) 4type parser_state = { 5 input : Xmlm.input; 6 mutable current_element : string list; (* Stack of current element names *) 7 text_buffer : Buffer.t; 8} 9 10(** Create a new parser state *) 11let make_parser input = { 12 input; 13 current_element = []; 14 text_buffer = Buffer.create 256; 15} 16 17(** Utility functions *) 18 19let get_attribute name attrs = 20 try 21 let value = List.find (fun ((_, n), _) -> n = name) attrs in 22 Some (snd value) 23 with Not_found -> None 24 25let require_attribute name attrs element = 26 match get_attribute name attrs with 27 | Some value -> Ok value 28 | None -> Error (Error.missing_attribute element name) 29 30let parse_float_opt s = 31 try Some (Float.of_string s) 32 with _ -> None 33 34let parse_int_opt s = 35 try Some (int_of_string s) 36 with _ -> None 37 38let parse_time s = 39 match Ptime.of_rfc3339 s with 40 | Ok (t, _, _) -> Some t 41 | Error _ -> None 42 43(** Result binding operators *) 44let (let*) = Result.bind 45 46let parse_coordinates attrs element = 47 let* lat_str = require_attribute "lat" attrs element in 48 let* lon_str = require_attribute "lon" attrs element in 49 match (Float.of_string lat_str, Float.of_string lon_str) with 50 | (lat_f, lon_f) -> 51 let* lat = Result.map_error Error.invalid_coordinate (Coordinate.latitude lat_f) in 52 let* lon = Result.map_error Error.invalid_coordinate (Coordinate.longitude lon_f) in 53 Ok (lat, lon) 54 | exception _ -> Error (Error.invalid_coordinate "Invalid coordinate format") 55 56(** Parse waypoint data from XML elements *) 57let rec parse_waypoint_data parser lat lon = 58 let wpt = Waypoint.make lat lon in 59 parse_waypoint_elements parser wpt 60 61and parse_waypoint_elements parser wpt = 62 let rec loop wpt = 63 match Xmlm.input parser.input with 64 | `El_start ((_, name), attrs) -> 65 parser.current_element <- name :: parser.current_element; 66 (match name with 67 | "ele" -> 68 let* text = parse_text_content parser in 69 (match parse_float_opt text with 70 | Some ele -> loop (Waypoint.with_elevation wpt ele) 71 | None -> loop wpt) 72 | "time" -> 73 let* text = parse_text_content parser in 74 loop (Waypoint.with_time wpt (parse_time text)) 75 | "magvar" -> 76 let* text = parse_text_content parser in 77 (match parse_float_opt text with 78 | Some f -> 79 (match Coordinate.degrees f with 80 | Ok deg -> loop (Waypoint.with_magvar wpt deg) 81 | Error _ -> loop wpt) 82 | None -> loop wpt) 83 | "geoidheight" -> 84 let* text = parse_text_content parser in 85 (match parse_float_opt text with 86 | Some h -> loop (Waypoint.with_geoidheight wpt h) 87 | None -> loop wpt) 88 | "name" -> 89 let* text = parse_text_content parser in 90 loop (Waypoint.with_name wpt text) 91 | "cmt" -> 92 let* text = parse_text_content parser in 93 loop (Waypoint.with_comment wpt text) 94 | "desc" -> 95 let* text = parse_text_content parser in 96 loop (Waypoint.with_description wpt text) 97 | "src" -> 98 let* text = parse_text_content parser in 99 loop (Waypoint.with_source wpt text) 100 | "sym" -> 101 let* text = parse_text_content parser in 102 loop (Waypoint.with_symbol wpt text) 103 | "type" -> 104 let* text = parse_text_content parser in 105 loop (Waypoint.with_type wpt text) 106 | "fix" -> 107 let* text = parse_text_content parser in 108 loop (Waypoint.with_fix wpt (Waypoint.fix_type_of_string text)) 109 | "sat" -> 110 let* text = parse_text_content parser in 111 (match parse_int_opt text with 112 | Some s -> loop (Waypoint.with_sat wpt s) 113 | None -> loop wpt) 114 | "hdop" -> 115 let* text = parse_text_content parser in 116 (match parse_float_opt text with 117 | Some f -> loop (Waypoint.with_hdop wpt f) 118 | None -> loop wpt) 119 | "vdop" -> 120 let* text = parse_text_content parser in 121 (match parse_float_opt text with 122 | Some f -> loop (Waypoint.with_vdop wpt f) 123 | None -> loop wpt) 124 | "pdop" -> 125 let* text = parse_text_content parser in 126 (match parse_float_opt text with 127 | Some f -> loop (Waypoint.with_pdop wpt f) 128 | None -> loop wpt) 129 | "ageofdgpsdata" -> 130 let* text = parse_text_content parser in 131 (match parse_float_opt text with 132 | Some f -> loop (Waypoint.with_ageofdgpsdata wpt f) 133 | None -> loop wpt) 134 | "dgpsid" -> 135 let* text = parse_text_content parser in 136 (match parse_int_opt text with 137 | Some id -> loop (Waypoint.with_dgpsid wpt id) 138 | None -> loop wpt) 139 | "link" -> 140 let* link = parse_link parser attrs in 141 loop (Waypoint.add_link wpt link) 142 | "extensions" -> 143 let* extensions = parse_extensions parser in 144 loop (Waypoint.add_extensions wpt extensions) 145 | _ -> 146 (* Skip unknown elements *) 147 let* _ = skip_element parser in 148 loop wpt) 149 | `El_end -> 150 parser.current_element <- List.tl parser.current_element; 151 Ok wpt 152 | `Data _ -> 153 (* Ignore text data at this level *) 154 loop wpt 155 | `Dtd _ -> 156 loop wpt 157 in 158 loop wpt 159 160and parse_text_content parser = 161 Buffer.clear parser.text_buffer; 162 let rec loop () = 163 match Xmlm.input parser.input with 164 | `Data text -> 165 Buffer.add_string parser.text_buffer text; 166 loop () 167 | `El_end -> 168 parser.current_element <- List.tl parser.current_element; 169 Ok (Buffer.contents parser.text_buffer) 170 | `El_start _ -> 171 Error (Error.invalid_xml "Unexpected element in text content") 172 | `Dtd _ -> 173 loop () 174 in 175 loop () 176 177and parse_link parser attrs = 178 let href = match get_attribute "href" attrs with 179 | Some h -> h 180 | None -> "" 181 in 182 let link = Link.make ~href () in 183 parse_link_elements parser link 184 185and parse_link_elements parser link = 186 let rec loop link = 187 match Xmlm.input parser.input with 188 | `El_start ((_, name), _) -> 189 parser.current_element <- name :: parser.current_element; 190 (match name with 191 | "text" -> 192 let* text = parse_text_content parser in 193 loop (Link.with_text link text) 194 | "type" -> 195 let* type_text = parse_text_content parser in 196 loop (Link.with_type link type_text) 197 | _ -> 198 let* _ = skip_element parser in 199 loop link) 200 | `El_end -> 201 parser.current_element <- List.tl parser.current_element; 202 Ok link 203 | `Data _ -> 204 loop link 205 | `Dtd _ -> 206 loop link 207 in 208 loop link 209 210and parse_extensions parser = 211 let rec loop acc = 212 match Xmlm.input parser.input with 213 | `El_start ((ns, name), attrs) -> 214 parser.current_element <- name :: parser.current_element; 215 let* ext = parse_extension parser ns name attrs in 216 loop (ext :: acc) 217 | `El_end -> 218 parser.current_element <- List.tl parser.current_element; 219 Ok (List.rev acc) 220 | `Data _ -> 221 loop acc 222 | `Dtd _ -> 223 loop acc 224 in 225 loop [] 226 227and parse_extension parser ns name attrs = 228 let namespace = if ns = "" then None else Some ns in 229 let attributes = List.map (fun ((_, n), v) -> (n, v)) attrs in 230 let* content = parse_extension_content parser in 231 Ok (Extension.make ?namespace ~name ~attributes ~content ()) 232 233and parse_extension_content parser = 234 Buffer.clear parser.text_buffer; 235 let rec loop elements = 236 match Xmlm.input parser.input with 237 | `Data text -> 238 Buffer.add_string parser.text_buffer text; 239 loop elements 240 | `El_start ((ns, name), attrs) -> 241 parser.current_element <- name :: parser.current_element; 242 let* ext = parse_extension parser ns name attrs in 243 loop (ext :: elements) 244 | `El_end -> 245 parser.current_element <- List.tl parser.current_element; 246 let text = String.trim (Buffer.contents parser.text_buffer) in 247 Ok (match (text, elements) with 248 | ("", []) -> Extension.text_content "" 249 | ("", els) -> Extension.elements_content (List.rev els) 250 | (t, []) -> Extension.text_content t 251 | (t, els) -> Extension.mixed_content t (List.rev els)) 252 | `Dtd _ -> 253 loop elements 254 in 255 loop [] 256 257and skip_element parser = 258 let rec loop depth = 259 match Xmlm.input parser.input with 260 | `El_start _ -> loop (depth + 1) 261 | `El_end when depth = 0 -> Ok () 262 | `El_end -> loop (depth - 1) 263 | `Data _ -> loop depth 264 | `Dtd _ -> loop depth 265 in 266 loop 0 267 268(** Parse a complete GPX document *) 269let rec parse_gpx parser = 270 (* Find the GPX root element *) 271 let rec find_gpx_root () = 272 match Xmlm.input parser.input with 273 | `El_start ((_, "gpx"), attrs) -> 274 parser.current_element <- ["gpx"]; 275 let* version = require_attribute "version" attrs "gpx" in 276 let* creator = require_attribute "creator" attrs "gpx" in 277 if version <> "1.0" && version <> "1.1" then 278 Error (Error.validation_error ("Unsupported GPX version: " ^ version ^ " (supported: 1.0, 1.1)")) 279 else 280 Ok (version, creator) 281 | `El_start _ -> 282 let* _ = skip_element parser in 283 find_gpx_root () 284 | `Dtd _ -> 285 find_gpx_root () 286 | `El_end -> 287 Error (Error.missing_element "gpx") 288 | `Data _ -> 289 find_gpx_root () 290 in 291 292 let* (version, creator) = find_gpx_root () in 293 let gpx = Doc.empty ~creator in 294 parse_gpx_elements parser { gpx with version } 295 296and parse_gpx_elements parser gpx = 297 let rec loop gpx = 298 match Xmlm.input parser.input with 299 | `El_start ((_, name), attrs) -> 300 parser.current_element <- name :: parser.current_element; 301 (match name with 302 | "metadata" -> 303 let* metadata = parse_metadata parser in 304 loop (Doc.with_metadata gpx metadata) 305 | "wpt" -> 306 let* (lat, lon) = parse_coordinates attrs "wpt" in 307 let* waypoint = parse_waypoint_data parser lat lon in 308 loop (Doc.add_waypoint gpx waypoint) 309 | "rte" -> 310 let* route = parse_route parser in 311 loop (Doc.add_route gpx route) 312 | "trk" -> 313 let* track = parse_track parser in 314 loop (Doc.add_track gpx track) 315 | "extensions" -> 316 let* extensions = parse_extensions parser in 317 loop (Doc.add_extensions gpx extensions) 318 | _ -> 319 let* _ = skip_element parser in 320 loop gpx) 321 | `El_end -> 322 Ok gpx 323 | `Data _ -> 324 loop gpx 325 | `Dtd _ -> 326 loop gpx 327 in 328 loop gpx 329 330and parse_author parser = 331 let rec loop name email link = 332 match Xmlm.input parser.input with 333 | `El_start ((_, element_name), attrs) -> 334 parser.current_element <- element_name :: parser.current_element; 335 (match element_name with 336 | "name" -> 337 let* text = parse_text_content parser in 338 loop (Some text) email link 339 | "email" -> 340 let* email_addr = parse_email_attrs attrs in 341 let* _ = skip_element parser in 342 loop name (Some email_addr) link 343 | "link" -> 344 let* parsed_link = parse_link parser attrs in 345 loop name email (Some parsed_link) 346 | _ -> 347 let* _ = skip_element parser in 348 loop name email link) 349 | `El_end -> 350 parser.current_element <- List.tl parser.current_element; 351 Ok (Link.make_person ?name ?email ?link ()) 352 | `Data _ -> 353 loop name email link 354 | `Dtd _ -> 355 loop name email link 356 in 357 loop None None None 358 359and parse_email_attrs attrs = 360 let get_attr name = 361 List.find_map (fun ((_, attr_name), value) -> 362 if attr_name = name then Some value else None 363 ) attrs 364 in 365 match get_attr "id", get_attr "domain" with 366 | Some id, Some domain -> Ok (id ^ "@" ^ domain) 367 | _ -> Error (Error.invalid_xml "Missing email id or domain attributes") 368 369and parse_copyright parser attrs = 370 let get_attr name = 371 List.find_map (fun ((_, attr_name), value) -> 372 if attr_name = name then Some value else None 373 ) attrs 374 in 375 let author = get_attr "author" in 376 let rec loop year license = 377 match Xmlm.input parser.input with 378 | `El_start ((_, element_name), _) -> 379 parser.current_element <- element_name :: parser.current_element; 380 (match element_name with 381 | "year" -> 382 let* text = parse_text_content parser in 383 (match parse_int_opt text with 384 | Some y -> loop (Some y) license 385 | None -> loop year license) 386 | "license" -> 387 let* text = parse_text_content parser in 388 loop year (Some text) 389 | _ -> 390 let* _ = skip_element parser in 391 loop year license) 392 | `El_end -> 393 parser.current_element <- List.tl parser.current_element; 394 (match author with 395 | Some auth -> Ok (Link.make_copyright ~author:auth ?year ?license ()) 396 | None -> Error (Error.invalid_xml "Missing copyright author attribute")) 397 | `Data _ -> 398 loop year license 399 | `Dtd _ -> 400 loop year license 401 in 402 loop None None 403 404and parse_bounds parser attrs = 405 let get_attr name = 406 List.find_map (fun ((_, attr_name), value) -> 407 if attr_name = name then Some value else None 408 ) attrs 409 in 410 let minlat_str = get_attr "minlat" in 411 let minlon_str = get_attr "minlon" in 412 let maxlat_str = get_attr "maxlat" in 413 let maxlon_str = get_attr "maxlon" in 414 415 (* Skip content since bounds is a self-closing element *) 416 let rec skip_bounds_content () = 417 match Xmlm.input parser.input with 418 | `El_end -> 419 parser.current_element <- List.tl parser.current_element; 420 Ok () 421 | `Data _ -> skip_bounds_content () 422 | _ -> skip_bounds_content () 423 in 424 let* () = skip_bounds_content () in 425 426 match minlat_str, minlon_str, maxlat_str, maxlon_str with 427 | Some minlat, Some minlon, Some maxlat, Some maxlon -> 428 (match 429 Float.of_string_opt minlat, Float.of_string_opt minlon, 430 Float.of_string_opt maxlat, Float.of_string_opt maxlon 431 with 432 | Some minlat_f, Some minlon_f, Some maxlat_f, Some maxlon_f -> 433 (match Metadata.Bounds.make_from_floats ~minlat:minlat_f ~minlon:minlon_f ~maxlat:maxlat_f ~maxlon:maxlon_f with 434 | Ok bounds -> Ok bounds 435 | Error msg -> Error (Error.invalid_xml ("Invalid bounds: " ^ msg))) 436 | _ -> Error (Error.invalid_xml ("Invalid bounds coordinates"))) 437 | _ -> Error (Error.invalid_xml ("Missing bounds attributes")) 438 439and parse_metadata parser = 440 let metadata = Metadata.empty in 441 let rec loop metadata = 442 match Xmlm.input parser.input with 443 | `El_start ((_, name), attrs) -> 444 parser.current_element <- name :: parser.current_element; 445 (match name with 446 | "name" -> 447 let* text = parse_text_content parser in 448 loop (Metadata.with_name metadata text) 449 | "desc" -> 450 let* text = parse_text_content parser in 451 loop (Metadata.with_description metadata text) 452 | "keywords" -> 453 let* text = parse_text_content parser in 454 loop (Metadata.with_keywords metadata text) 455 | "time" -> 456 let* text = parse_text_content parser in 457 loop (Metadata.with_time metadata (parse_time text)) 458 | "link" -> 459 let* link = parse_link parser attrs in 460 loop (Metadata.add_link metadata link) 461 | "author" -> 462 let* author = parse_author parser in 463 loop (Metadata.with_author metadata author) 464 | "copyright" -> 465 let* copyright = parse_copyright parser attrs in 466 loop (Metadata.with_copyright metadata copyright) 467 | "bounds" -> 468 let* bounds = parse_bounds parser attrs in 469 loop (Metadata.with_bounds metadata bounds) 470 | "extensions" -> 471 let* extensions = parse_extensions parser in 472 loop (Metadata.add_extensions metadata extensions) 473 | _ -> 474 let* _ = skip_element parser in 475 loop metadata) 476 | `El_end -> 477 parser.current_element <- List.tl parser.current_element; 478 Ok metadata 479 | `Data _ -> 480 loop metadata 481 | `Dtd _ -> 482 loop metadata 483 in 484 loop metadata 485 486and parse_route parser = 487 let route = Route.empty in 488 let rec loop route = 489 match Xmlm.input parser.input with 490 | `El_start ((_, name), attrs) -> 491 parser.current_element <- name :: parser.current_element; 492 (match name with 493 | "name" -> 494 let* text = parse_text_content parser in 495 loop (Route.with_name route text) 496 | "cmt" -> 497 let* text = parse_text_content parser in 498 loop (Route.with_comment route text) 499 | "desc" -> 500 let* text = parse_text_content parser in 501 loop (Route.with_description route text) 502 | "src" -> 503 let* text = parse_text_content parser in 504 loop (Route.with_source route text) 505 | "number" -> 506 let* text = parse_text_content parser in 507 (match parse_int_opt text with 508 | Some n -> loop (Route.with_number route n) 509 | None -> loop route) 510 | "type" -> 511 let* text = parse_text_content parser in 512 loop (Route.with_type route text) 513 | "rtept" -> 514 let* (lat, lon) = parse_coordinates attrs "rtept" in 515 let* rtept = parse_waypoint_data parser lat lon in 516 loop (Route.add_point route rtept) 517 | "link" -> 518 let* link = parse_link parser attrs in 519 loop (Route.add_link route link) 520 | "extensions" -> 521 let* extensions = parse_extensions parser in 522 loop (Route.add_extensions route extensions) 523 | _ -> 524 let* _ = skip_element parser in 525 loop route) 526 | `El_end -> 527 parser.current_element <- List.tl parser.current_element; 528 Ok route 529 | `Data _ -> 530 loop route 531 | `Dtd _ -> 532 loop route 533 in 534 loop route 535 536and parse_track parser = 537 let track = Track.empty in 538 let rec loop track = 539 match Xmlm.input parser.input with 540 | `El_start ((_, name), attrs) -> 541 parser.current_element <- name :: parser.current_element; 542 (match name with 543 | "name" -> 544 let* text = parse_text_content parser in 545 loop (Track.with_name track text) 546 | "cmt" -> 547 let* text = parse_text_content parser in 548 loop (Track.with_comment track text) 549 | "desc" -> 550 let* text = parse_text_content parser in 551 loop (Track.with_description track text) 552 | "src" -> 553 let* text = parse_text_content parser in 554 loop (Track.with_source track text) 555 | "number" -> 556 let* text = parse_text_content parser in 557 (match parse_int_opt text with 558 | Some n -> loop (Track.with_number track n) 559 | None -> loop track) 560 | "type" -> 561 let* text = parse_text_content parser in 562 loop (Track.with_type track text) 563 | "trkseg" -> 564 let* trkseg = parse_track_segment parser in 565 loop (Track.add_segment track trkseg) 566 | "link" -> 567 let* link = parse_link parser attrs in 568 loop (Track.add_link track link) 569 | "extensions" -> 570 let* extensions = parse_extensions parser in 571 loop (Track.add_extensions track extensions) 572 | _ -> 573 let* _ = skip_element parser in 574 loop track) 575 | `El_end -> 576 parser.current_element <- List.tl parser.current_element; 577 Ok track 578 | `Data _ -> 579 loop track 580 | `Dtd _ -> 581 loop track 582 in 583 loop track 584 585and parse_track_segment parser = 586 let trkseg = Track.Segment.empty in 587 let rec loop trkseg = 588 match Xmlm.input parser.input with 589 | `El_start ((_, name), attrs) -> 590 parser.current_element <- name :: parser.current_element; 591 (match name with 592 | "trkpt" -> 593 let* (lat, lon) = parse_coordinates attrs "trkpt" in 594 let* trkpt = parse_waypoint_data parser lat lon in 595 loop (Track.Segment.add_point trkseg trkpt) 596 | "extensions" -> 597 let* _ = parse_extensions parser in 598 loop trkseg 599 | _ -> 600 let* _ = skip_element parser in 601 loop trkseg) 602 | `El_end -> 603 parser.current_element <- List.tl parser.current_element; 604 Ok trkseg 605 | `Data _ -> 606 loop trkseg 607 | `Dtd _ -> 608 loop trkseg 609 in 610 loop trkseg 611 612(** Main parsing function *) 613let parse ?(validate=false) input = 614 let parser = make_parser input in 615 try 616 let result = parse_gpx parser in 617 match result, validate with 618 | Ok gpx, true -> 619 let validation = Validate.validate_gpx gpx in 620 if validation.is_valid then 621 Ok gpx 622 else 623 let error_msgs = List.filter (fun issue -> issue.Validate.level = `Error) validation.issues 624 |> List.map (fun issue -> issue.Validate.message) 625 |> String.concat "; " in 626 Error (Error.validation_error error_msgs) 627 | result, false -> result 628 | Error _ as result, true -> result (* Pass through parse errors even when validating *) 629 with 630 | Xmlm.Error ((line, col), error) -> 631 Error (Error.xml_error (Printf.sprintf "XML error at line %d, column %d: %s" 632 line col (Xmlm.error_message error))) 633 | exn -> 634 Error (Error.invalid_xml (Printexc.to_string exn)) 635 636(** Parse from string *) 637let parse_string ?(validate=false) s = 638 let input = Xmlm.make_input (`String (0, s)) in 639 parse ~validate input