1(** GPX streaming parser using xmlm *) 2 3open Types 4 5(** Parser state for streaming *) 6type parser_state = { 7 input : Xmlm.input; 8 mutable current_element : string list; (* Stack of current element names *) 9 text_buffer : Buffer.t; 10} 11 12(** Create a new parser state *) 13let make_parser input = { 14 input; 15 current_element = []; 16 text_buffer = Buffer.create 256; 17} 18 19(** Utility functions *) 20 21let get_attribute name attrs = 22 try 23 let value = List.find (fun ((_, n), _) -> n = name) attrs in 24 Some (snd value) 25 with Not_found -> None 26 27let require_attribute name attrs element = 28 match get_attribute name attrs with 29 | Some value -> Ok value 30 | None -> Error (Missing_required_attribute (element, name)) 31 32let parse_float_opt s = 33 try Some (Float.of_string s) 34 with _ -> None 35 36let parse_int_opt s = 37 try Some (int_of_string s) 38 with _ -> None 39 40let parse_time s = 41 match Ptime.of_rfc3339 s with 42 | Ok (t, _, _) -> Some t 43 | Error _ -> None 44 45(** Result binding operators *) 46let (let*) = Result.bind 47 48let parse_coordinates attrs element = 49 let* lat_str = require_attribute "lat" attrs element in 50 let* lon_str = require_attribute "lon" attrs element in 51 match (Float.of_string lat_str, Float.of_string lon_str) with 52 | (lat_f, lon_f) -> 53 let* lat = Result.map_error (fun s -> Invalid_coordinate s) (latitude lat_f) in 54 let* lon = Result.map_error (fun s -> Invalid_coordinate s) (longitude lon_f) in 55 Ok (lat, lon) 56 | exception _ -> Error (Invalid_coordinate "Invalid coordinate format") 57 58(** Parse waypoint data from XML elements *) 59let rec parse_waypoint_data parser lat lon = 60 let wpt = make_waypoint_data lat lon in 61 parse_waypoint_elements parser wpt 62 63and parse_waypoint_elements parser wpt = 64 let rec loop wpt = 65 match Xmlm.input parser.input with 66 | `El_start ((_, name), attrs) -> 67 parser.current_element <- name :: parser.current_element; 68 (match name with 69 | "ele" -> 70 let* text = parse_text_content parser in 71 (match parse_float_opt text with 72 | Some ele -> loop { wpt with ele = Some ele } 73 | None -> loop wpt) 74 | "time" -> 75 let* text = parse_text_content parser in 76 loop { wpt with time = parse_time text } 77 | "magvar" -> 78 let* text = parse_text_content parser in 79 (match parse_float_opt text with 80 | Some f -> 81 (match degrees f with 82 | Ok deg -> loop { wpt with magvar = Some deg } 83 | Error _ -> loop wpt) 84 | None -> loop wpt) 85 | "geoidheight" -> 86 let* text = parse_text_content parser in 87 (match parse_float_opt text with 88 | Some h -> loop { wpt with geoidheight = Some h } 89 | None -> loop wpt) 90 | "name" -> 91 let* text = parse_text_content parser in 92 loop { wpt with name = Some text } 93 | "cmt" -> 94 let* text = parse_text_content parser in 95 loop { wpt with cmt = Some text } 96 | "desc" -> 97 let* text = parse_text_content parser in 98 loop { wpt with desc = Some text } 99 | "src" -> 100 let* text = parse_text_content parser in 101 loop { wpt with src = Some text } 102 | "sym" -> 103 let* text = parse_text_content parser in 104 loop { wpt with sym = Some text } 105 | "type" -> 106 let* text = parse_text_content parser in 107 loop { wpt with type_ = Some text } 108 | "fix" -> 109 let* text = parse_text_content parser in 110 loop { wpt with fix = fix_type_of_string text } 111 | "sat" -> 112 let* text = parse_text_content parser in 113 (match parse_int_opt text with 114 | Some s -> loop { wpt with sat = Some s } 115 | None -> loop wpt) 116 | "hdop" | "vdop" | "pdop" -> 117 let* text = parse_text_content parser in 118 (match parse_float_opt text with 119 | Some f -> 120 (match name with 121 | "hdop" -> loop { wpt with hdop = Some f } 122 | "vdop" -> loop { wpt with vdop = Some f } 123 | "pdop" -> loop { wpt with pdop = Some f } 124 | _ -> loop wpt) 125 | None -> loop wpt) 126 | "ageofdgpsdata" -> 127 let* text = parse_text_content parser in 128 (match parse_float_opt text with 129 | Some f -> loop { wpt with ageofdgpsdata = Some f } 130 | None -> loop wpt) 131 | "dgpsid" -> 132 let* text = parse_text_content parser in 133 (match parse_int_opt text with 134 | Some id -> loop { wpt with dgpsid = Some id } 135 | None -> loop wpt) 136 | "link" -> 137 let* link = parse_link parser attrs in 138 loop { wpt with links = link :: wpt.links } 139 | "extensions" -> 140 let* extensions = parse_extensions parser in 141 loop { wpt with extensions = extensions @ wpt.extensions } 142 | _ -> 143 (* Skip unknown elements *) 144 let* _ = skip_element parser in 145 loop wpt) 146 | `El_end -> 147 parser.current_element <- List.tl parser.current_element; 148 Ok wpt 149 | `Data _ -> 150 (* Ignore text data at this level *) 151 loop wpt 152 | `Dtd _ -> 153 loop wpt 154 in 155 loop wpt 156 157and parse_text_content parser = 158 Buffer.clear parser.text_buffer; 159 let rec loop () = 160 match Xmlm.input parser.input with 161 | `Data text -> 162 Buffer.add_string parser.text_buffer text; 163 loop () 164 | `El_end -> 165 parser.current_element <- List.tl parser.current_element; 166 Ok (Buffer.contents parser.text_buffer) 167 | `El_start _ -> 168 Error (Invalid_xml "Unexpected element in text content") 169 | `Dtd _ -> 170 loop () 171 in 172 loop () 173 174and parse_link parser attrs = 175 let href = match get_attribute "href" attrs with 176 | Some h -> h 177 | None -> "" 178 in 179 let link = { href; text = None; type_ = None } in 180 parse_link_elements parser link 181 182and parse_link_elements parser link = 183 let rec loop link = 184 match Xmlm.input parser.input with 185 | `El_start ((_, name), _) -> 186 parser.current_element <- name :: parser.current_element; 187 (match name with 188 | "text" -> 189 let* text = parse_text_content parser in 190 loop { link with text = Some text } 191 | "type" -> 192 let* type_text = parse_text_content parser in 193 loop { link with type_ = Some type_text } 194 | _ -> 195 let* _ = skip_element parser in 196 loop link) 197 | `El_end -> 198 parser.current_element <- List.tl parser.current_element; 199 Ok link 200 | `Data _ -> 201 loop link 202 | `Dtd _ -> 203 loop link 204 in 205 loop link 206 207and parse_extensions parser = 208 let rec loop acc = 209 match Xmlm.input parser.input with 210 | `El_start ((ns, name), attrs) -> 211 parser.current_element <- name :: parser.current_element; 212 let* ext = parse_extension parser ns name attrs in 213 loop (ext :: acc) 214 | `El_end -> 215 parser.current_element <- List.tl parser.current_element; 216 Ok (List.rev acc) 217 | `Data _ -> 218 loop acc 219 | `Dtd _ -> 220 loop acc 221 in 222 loop [] 223 224and parse_extension parser ns name attrs = 225 let namespace = if ns = "" then None else Some ns in 226 let attributes = List.map (fun ((_, n), v) -> (n, v)) attrs in 227 let* content = parse_extension_content parser in 228 Ok { namespace; name; attributes; content } 229 230and parse_extension_content parser = 231 Buffer.clear parser.text_buffer; 232 let rec loop elements = 233 match Xmlm.input parser.input with 234 | `Data text -> 235 Buffer.add_string parser.text_buffer text; 236 loop elements 237 | `El_start ((ns, name), attrs) -> 238 parser.current_element <- name :: parser.current_element; 239 let* ext = parse_extension parser ns name attrs in 240 loop (ext :: elements) 241 | `El_end -> 242 parser.current_element <- List.tl parser.current_element; 243 let text = String.trim (Buffer.contents parser.text_buffer) in 244 Ok (match (text, elements) with 245 | ("", []) -> Text "" 246 | ("", els) -> Elements (List.rev els) 247 | (t, []) -> Text t 248 | (t, els) -> Mixed (t, List.rev els)) 249 | `Dtd _ -> 250 loop elements 251 in 252 loop [] 253 254and skip_element parser = 255 let rec loop depth = 256 match Xmlm.input parser.input with 257 | `El_start _ -> loop (depth + 1) 258 | `El_end when depth = 0 -> Ok () 259 | `El_end -> loop (depth - 1) 260 | `Data _ -> loop depth 261 | `Dtd _ -> loop depth 262 in 263 loop 0 264 265(** Parse a complete GPX document *) 266let rec parse_gpx parser = 267 (* Find the GPX root element *) 268 let rec find_gpx_root () = 269 match Xmlm.input parser.input with 270 | `El_start ((_, "gpx"), attrs) -> 271 parser.current_element <- ["gpx"]; 272 let* version = require_attribute "version" attrs "gpx" in 273 let* creator = require_attribute "creator" attrs "gpx" in 274 if version <> "1.0" && version <> "1.1" then 275 Error (Validation_error ("Unsupported GPX version: " ^ version ^ " (supported: 1.0, 1.1)")) 276 else 277 Ok (version, creator) 278 | `El_start _ -> 279 let* _ = skip_element parser in 280 find_gpx_root () 281 | `Dtd _ -> 282 find_gpx_root () 283 | `El_end -> 284 Error (Missing_required_element "gpx") 285 | `Data _ -> 286 find_gpx_root () 287 in 288 289 let* (version, creator) = find_gpx_root () in 290 let gpx = make_gpx ~creator in 291 parse_gpx_elements parser { gpx with version } 292 293and parse_gpx_elements parser gpx = 294 let rec loop gpx = 295 match Xmlm.input parser.input with 296 | `El_start ((_, name), attrs) -> 297 parser.current_element <- name :: parser.current_element; 298 (match name with 299 | "metadata" -> 300 let* metadata = parse_metadata parser in 301 loop { gpx with metadata = Some metadata } 302 | "wpt" -> 303 let* (lat, lon) = parse_coordinates attrs "wpt" in 304 let* waypoint = parse_waypoint_data parser lat lon in 305 loop { gpx with waypoints = waypoint :: gpx.waypoints } 306 | "rte" -> 307 let* route = parse_route parser in 308 loop { gpx with routes = route :: gpx.routes } 309 | "trk" -> 310 let* track = parse_track parser in 311 loop { gpx with tracks = track :: gpx.tracks } 312 | "extensions" -> 313 let* extensions = parse_extensions parser in 314 loop { gpx with extensions = extensions @ gpx.extensions } 315 | _ -> 316 let* _ = skip_element parser in 317 loop gpx) 318 | `El_end -> 319 Ok { gpx with 320 waypoints = List.rev gpx.waypoints; 321 routes = List.rev gpx.routes; 322 tracks = List.rev gpx.tracks } 323 | `Data _ -> 324 loop gpx 325 | `Dtd _ -> 326 loop gpx 327 in 328 loop gpx 329 330and parse_metadata parser = 331 let metadata = empty_metadata in 332 let rec loop (metadata : metadata) = 333 match Xmlm.input parser.input with 334 | `El_start ((_, name), attrs) -> 335 parser.current_element <- name :: parser.current_element; 336 (match name with 337 | "name" -> 338 let* text = parse_text_content parser in 339 loop { metadata with name = Some text } 340 | "desc" -> 341 let* text = parse_text_content parser in 342 loop { metadata with desc = Some text } 343 | "keywords" -> 344 let* text = parse_text_content parser in 345 loop { metadata with keywords = Some text } 346 | "time" -> 347 let* text = parse_text_content parser in 348 loop { metadata with time = parse_time text } 349 | "link" -> 350 let* link = parse_link parser attrs in 351 loop { metadata with links = link :: metadata.links } 352 | "extensions" -> 353 let* extensions = parse_extensions parser in 354 loop { metadata with extensions = extensions @ metadata.extensions } 355 | _ -> 356 let* _ = skip_element parser in 357 loop metadata) 358 | `El_end -> 359 parser.current_element <- List.tl parser.current_element; 360 Ok { metadata with links = List.rev metadata.links } 361 | `Data _ -> 362 loop metadata 363 | `Dtd _ -> 364 loop metadata 365 in 366 loop metadata 367 368and parse_route parser = 369 let route = { 370 name = None; cmt = None; desc = None; src = None; links = []; 371 number = None; type_ = None; extensions = []; rtepts = [] 372 } in 373 let rec loop (route : route) = 374 match Xmlm.input parser.input with 375 | `El_start ((_, name), attrs) -> 376 parser.current_element <- name :: parser.current_element; 377 (match name with 378 | "name" -> 379 let* text = parse_text_content parser in 380 loop { route with name = Some text } 381 | "cmt" -> 382 let* text = parse_text_content parser in 383 loop { route with cmt = Some text } 384 | "desc" -> 385 let* text = parse_text_content parser in 386 loop { route with desc = Some text } 387 | "src" -> 388 let* text = parse_text_content parser in 389 loop { route with src = Some text } 390 | "number" -> 391 let* text = parse_text_content parser in 392 (match parse_int_opt text with 393 | Some n -> loop { route with number = Some n } 394 | None -> loop route) 395 | "type" -> 396 let* text = parse_text_content parser in 397 loop { route with type_ = Some text } 398 | "rtept" -> 399 let* (lat, lon) = parse_coordinates attrs "rtept" in 400 let* rtept = parse_waypoint_data parser lat lon in 401 loop { route with rtepts = rtept :: route.rtepts } 402 | "link" -> 403 let* link = parse_link parser attrs in 404 loop { route with links = link :: route.links } 405 | "extensions" -> 406 let* extensions = parse_extensions parser in 407 loop { route with extensions = extensions @ route.extensions } 408 | _ -> 409 let* _ = skip_element parser in 410 loop route) 411 | `El_end -> 412 parser.current_element <- List.tl parser.current_element; 413 Ok { route with 414 rtepts = List.rev route.rtepts; 415 links = List.rev route.links } 416 | `Data _ -> 417 loop route 418 | `Dtd _ -> 419 loop route 420 in 421 loop route 422 423and parse_track parser = 424 let track = { 425 name = None; cmt = None; desc = None; src = None; links = []; 426 number = None; type_ = None; extensions = []; trksegs = [] 427 } in 428 let rec loop track = 429 match Xmlm.input parser.input with 430 | `El_start ((_, name), attrs) -> 431 parser.current_element <- name :: parser.current_element; 432 (match name with 433 | "name" -> 434 let* text = parse_text_content parser in 435 loop { track with name = Some text } 436 | "cmt" -> 437 let* text = parse_text_content parser in 438 loop { track with cmt = Some text } 439 | "desc" -> 440 let* text = parse_text_content parser in 441 loop { track with desc = Some text } 442 | "src" -> 443 let* text = parse_text_content parser in 444 loop { track with src = Some text } 445 | "number" -> 446 let* text = parse_text_content parser in 447 (match parse_int_opt text with 448 | Some n -> loop { track with number = Some n } 449 | None -> loop track) 450 | "type" -> 451 let* text = parse_text_content parser in 452 loop { track with type_ = Some text } 453 | "trkseg" -> 454 let* trkseg = parse_track_segment parser in 455 loop { track with trksegs = trkseg :: track.trksegs } 456 | "link" -> 457 let* link = parse_link parser attrs in 458 loop { track with links = link :: track.links } 459 | "extensions" -> 460 let* extensions = parse_extensions parser in 461 loop { track with extensions = extensions @ track.extensions } 462 | _ -> 463 let* _ = skip_element parser in 464 loop track) 465 | `El_end -> 466 parser.current_element <- List.tl parser.current_element; 467 Ok { track with 468 trksegs = List.rev track.trksegs; 469 links = List.rev track.links } 470 | `Data _ -> 471 loop track 472 | `Dtd _ -> 473 loop track 474 in 475 loop track 476 477and parse_track_segment parser = 478 let trkseg = { trkpts = []; extensions = [] } in 479 let rec loop trkseg = 480 match Xmlm.input parser.input with 481 | `El_start ((_, name), attrs) -> 482 parser.current_element <- name :: parser.current_element; 483 (match name with 484 | "trkpt" -> 485 let* (lat, lon) = parse_coordinates attrs "trkpt" in 486 let* trkpt = parse_waypoint_data parser lat lon in 487 loop { trkseg with trkpts = trkpt :: trkseg.trkpts } 488 | "extensions" -> 489 let* extensions = parse_extensions parser in 490 loop { trkseg with extensions = extensions @ trkseg.extensions } 491 | _ -> 492 let* _ = skip_element parser in 493 loop trkseg) 494 | `El_end -> 495 parser.current_element <- List.tl parser.current_element; 496 Ok { trkseg with trkpts = List.rev trkseg.trkpts } 497 | `Data _ -> 498 loop trkseg 499 | `Dtd _ -> 500 loop trkseg 501 in 502 loop trkseg 503 504(** Main parsing function *) 505let parse input = 506 let parser = make_parser input in 507 try 508 parse_gpx parser 509 with 510 | Xmlm.Error ((line, col), error) -> 511 Error (Xml_error (Printf.sprintf "XML error at line %d, column %d: %s" 512 line col (Xmlm.error_message error))) 513 | exn -> 514 Error (Invalid_xml (Printexc.to_string exn)) 515 516(** Parse from string *) 517let parse_string s = 518 let input = Xmlm.make_input (`String (0, s)) in 519 parse input