(** GPX streaming parser using xmlm *) (** Parser state for streaming *) type parser_state = { input : Xmlm.input; mutable current_element : string list; (* Stack of current element names *) text_buffer : Buffer.t; } (** Create a new parser state *) let make_parser input = { input; current_element = []; text_buffer = Buffer.create 256; } (** Utility functions *) let get_attribute name attrs = try let value = List.find (fun ((_, n), _) -> n = name) attrs in Some (snd value) with Not_found -> None let require_attribute name attrs element = match get_attribute name attrs with | Some value -> Ok value | None -> Error (Error.missing_attribute element name) let parse_float_opt s = try Some (Float.of_string s) with _ -> None let parse_int_opt s = try Some (int_of_string s) with _ -> None let parse_time s = match Ptime.of_rfc3339 s with | Ok (t, _, _) -> Some t | Error _ -> None (** Result binding operators *) let (let*) = Result.bind let parse_coordinates attrs element = let* lat_str = require_attribute "lat" attrs element in let* lon_str = require_attribute "lon" attrs element in match (Float.of_string lat_str, Float.of_string lon_str) with | (lat_f, lon_f) -> let* lat = Result.map_error Error.invalid_coordinate (Coordinate.latitude lat_f) in let* lon = Result.map_error Error.invalid_coordinate (Coordinate.longitude lon_f) in Ok (lat, lon) | exception _ -> Error (Error.invalid_coordinate "Invalid coordinate format") (** Parse waypoint data from XML elements *) let rec parse_waypoint_data parser lat lon = let wpt = Waypoint.make lat lon in parse_waypoint_elements parser wpt and parse_waypoint_elements parser wpt = let rec loop wpt = match Xmlm.input parser.input with | `El_start ((_, name), attrs) -> parser.current_element <- name :: parser.current_element; (match name with | "ele" -> let* text = parse_text_content parser in (match parse_float_opt text with | Some ele -> loop (Waypoint.with_elevation wpt ele) | None -> loop wpt) | "time" -> let* text = parse_text_content parser in loop (Waypoint.with_time wpt (parse_time text)) | "magvar" -> let* text = parse_text_content parser in (match parse_float_opt text with | Some f -> (match Coordinate.degrees f with | Ok deg -> loop (Waypoint.with_magvar wpt deg) | Error _ -> loop wpt) | None -> loop wpt) | "geoidheight" -> let* text = parse_text_content parser in (match parse_float_opt text with | Some h -> loop (Waypoint.with_geoidheight wpt h) | None -> loop wpt) | "name" -> let* text = parse_text_content parser in loop (Waypoint.with_name wpt text) | "cmt" -> let* text = parse_text_content parser in loop (Waypoint.with_comment wpt text) | "desc" -> let* text = parse_text_content parser in loop (Waypoint.with_description wpt text) | "src" -> let* text = parse_text_content parser in loop (Waypoint.with_source wpt text) | "sym" -> let* text = parse_text_content parser in loop (Waypoint.with_symbol wpt text) | "type" -> let* text = parse_text_content parser in loop (Waypoint.with_type wpt text) | "fix" -> let* text = parse_text_content parser in loop (Waypoint.with_fix wpt (Waypoint.fix_type_of_string text)) | "sat" -> let* text = parse_text_content parser in (match parse_int_opt text with | Some s -> loop (Waypoint.with_sat wpt s) | None -> loop wpt) | "hdop" -> let* text = parse_text_content parser in (match parse_float_opt text with | Some f -> loop (Waypoint.with_hdop wpt f) | None -> loop wpt) | "vdop" -> let* text = parse_text_content parser in (match parse_float_opt text with | Some f -> loop (Waypoint.with_vdop wpt f) | None -> loop wpt) | "pdop" -> let* text = parse_text_content parser in (match parse_float_opt text with | Some f -> loop (Waypoint.with_pdop wpt f) | None -> loop wpt) | "ageofdgpsdata" -> let* text = parse_text_content parser in (match parse_float_opt text with | Some f -> loop (Waypoint.with_ageofdgpsdata wpt f) | None -> loop wpt) | "dgpsid" -> let* text = parse_text_content parser in (match parse_int_opt text with | Some id -> loop (Waypoint.with_dgpsid wpt id) | None -> loop wpt) | "link" -> let* link = parse_link parser attrs in loop (Waypoint.add_link wpt link) | "extensions" -> let* extensions = parse_extensions parser in loop (Waypoint.add_extensions wpt extensions) | _ -> (* Skip unknown elements *) let* _ = skip_element parser in loop wpt) | `El_end -> parser.current_element <- List.tl parser.current_element; Ok wpt | `Data _ -> (* Ignore text data at this level *) loop wpt | `Dtd _ -> loop wpt in loop wpt and parse_text_content parser = Buffer.clear parser.text_buffer; let rec loop () = match Xmlm.input parser.input with | `Data text -> Buffer.add_string parser.text_buffer text; loop () | `El_end -> parser.current_element <- List.tl parser.current_element; Ok (Buffer.contents parser.text_buffer) | `El_start _ -> Error (Error.invalid_xml "Unexpected element in text content") | `Dtd _ -> loop () in loop () and parse_link parser attrs = let href = match get_attribute "href" attrs with | Some h -> h | None -> "" in let link = Link.make ~href () in parse_link_elements parser link and parse_link_elements parser link = let rec loop link = match Xmlm.input parser.input with | `El_start ((_, name), _) -> parser.current_element <- name :: parser.current_element; (match name with | "text" -> let* text = parse_text_content parser in loop (Link.with_text link text) | "type" -> let* type_text = parse_text_content parser in loop (Link.with_type link type_text) | _ -> let* _ = skip_element parser in loop link) | `El_end -> parser.current_element <- List.tl parser.current_element; Ok link | `Data _ -> loop link | `Dtd _ -> loop link in loop link and parse_extensions parser = let rec loop acc = match Xmlm.input parser.input with | `El_start ((ns, name), attrs) -> parser.current_element <- name :: parser.current_element; let* ext = parse_extension parser ns name attrs in loop (ext :: acc) | `El_end -> parser.current_element <- List.tl parser.current_element; Ok (List.rev acc) | `Data _ -> loop acc | `Dtd _ -> loop acc in loop [] and parse_extension parser ns name attrs = let namespace = if ns = "" then None else Some ns in let attributes = List.map (fun ((_, n), v) -> (n, v)) attrs in let* content = parse_extension_content parser in Ok (Extension.make ?namespace ~name ~attributes ~content ()) and parse_extension_content parser = Buffer.clear parser.text_buffer; let rec loop elements = match Xmlm.input parser.input with | `Data text -> Buffer.add_string parser.text_buffer text; loop elements | `El_start ((ns, name), attrs) -> parser.current_element <- name :: parser.current_element; let* ext = parse_extension parser ns name attrs in loop (ext :: elements) | `El_end -> parser.current_element <- List.tl parser.current_element; let text = String.trim (Buffer.contents parser.text_buffer) in Ok (match (text, elements) with | ("", []) -> Extension.text_content "" | ("", els) -> Extension.elements_content (List.rev els) | (t, []) -> Extension.text_content t | (t, els) -> Extension.mixed_content t (List.rev els)) | `Dtd _ -> loop elements in loop [] and skip_element parser = let rec loop depth = match Xmlm.input parser.input with | `El_start _ -> loop (depth + 1) | `El_end when depth = 0 -> Ok () | `El_end -> loop (depth - 1) | `Data _ -> loop depth | `Dtd _ -> loop depth in loop 0 (** Parse a complete GPX document *) let rec parse_gpx parser = (* Find the GPX root element *) let rec find_gpx_root () = match Xmlm.input parser.input with | `El_start ((_, "gpx"), attrs) -> parser.current_element <- ["gpx"]; let* version = require_attribute "version" attrs "gpx" in let* creator = require_attribute "creator" attrs "gpx" in if version <> "1.0" && version <> "1.1" then Error (Error.validation_error ("Unsupported GPX version: " ^ version ^ " (supported: 1.0, 1.1)")) else Ok (version, creator) | `El_start _ -> let* _ = skip_element parser in find_gpx_root () | `Dtd _ -> find_gpx_root () | `El_end -> Error (Error.missing_element "gpx") | `Data _ -> find_gpx_root () in let* (version, creator) = find_gpx_root () in let gpx = Doc.empty ~creator in parse_gpx_elements parser { gpx with version } and parse_gpx_elements parser gpx = let rec loop gpx = match Xmlm.input parser.input with | `El_start ((_, name), attrs) -> parser.current_element <- name :: parser.current_element; (match name with | "metadata" -> let* metadata = parse_metadata parser in loop (Doc.with_metadata gpx metadata) | "wpt" -> let* (lat, lon) = parse_coordinates attrs "wpt" in let* waypoint = parse_waypoint_data parser lat lon in loop (Doc.add_waypoint gpx waypoint) | "rte" -> let* route = parse_route parser in loop (Doc.add_route gpx route) | "trk" -> let* track = parse_track parser in loop (Doc.add_track gpx track) | "extensions" -> let* extensions = parse_extensions parser in loop (Doc.add_extensions gpx extensions) | _ -> let* _ = skip_element parser in loop gpx) | `El_end -> Ok gpx | `Data _ -> loop gpx | `Dtd _ -> loop gpx in loop gpx and parse_author parser = let rec loop name email link = match Xmlm.input parser.input with | `El_start ((_, element_name), attrs) -> parser.current_element <- element_name :: parser.current_element; (match element_name with | "name" -> let* text = parse_text_content parser in loop (Some text) email link | "email" -> let* email_addr = parse_email_attrs attrs in let* _ = skip_element parser in loop name (Some email_addr) link | "link" -> let* parsed_link = parse_link parser attrs in loop name email (Some parsed_link) | _ -> let* _ = skip_element parser in loop name email link) | `El_end -> parser.current_element <- List.tl parser.current_element; Ok (Link.make_person ?name ?email ?link ()) | `Data _ -> loop name email link | `Dtd _ -> loop name email link in loop None None None and parse_email_attrs attrs = let get_attr name = List.find_map (fun ((_, attr_name), value) -> if attr_name = name then Some value else None ) attrs in match get_attr "id", get_attr "domain" with | Some id, Some domain -> Ok (id ^ "@" ^ domain) | _ -> Error (Error.invalid_xml "Missing email id or domain attributes") and parse_copyright parser attrs = let get_attr name = List.find_map (fun ((_, attr_name), value) -> if attr_name = name then Some value else None ) attrs in let author = get_attr "author" in let rec loop year license = match Xmlm.input parser.input with | `El_start ((_, element_name), _) -> parser.current_element <- element_name :: parser.current_element; (match element_name with | "year" -> let* text = parse_text_content parser in (match parse_int_opt text with | Some y -> loop (Some y) license | None -> loop year license) | "license" -> let* text = parse_text_content parser in loop year (Some text) | _ -> let* _ = skip_element parser in loop year license) | `El_end -> parser.current_element <- List.tl parser.current_element; (match author with | Some auth -> Ok (Link.make_copyright ~author:auth ?year ?license ()) | None -> Error (Error.invalid_xml "Missing copyright author attribute")) | `Data _ -> loop year license | `Dtd _ -> loop year license in loop None None and parse_bounds parser attrs = let get_attr name = List.find_map (fun ((_, attr_name), value) -> if attr_name = name then Some value else None ) attrs in let minlat_str = get_attr "minlat" in let minlon_str = get_attr "minlon" in let maxlat_str = get_attr "maxlat" in let maxlon_str = get_attr "maxlon" in (* Skip content since bounds is a self-closing element *) let rec skip_bounds_content () = match Xmlm.input parser.input with | `El_end -> parser.current_element <- List.tl parser.current_element; Ok () | `Data _ -> skip_bounds_content () | _ -> skip_bounds_content () in let* () = skip_bounds_content () in match minlat_str, minlon_str, maxlat_str, maxlon_str with | Some minlat, Some minlon, Some maxlat, Some maxlon -> (match Float.of_string_opt minlat, Float.of_string_opt minlon, Float.of_string_opt maxlat, Float.of_string_opt maxlon with | Some minlat_f, Some minlon_f, Some maxlat_f, Some maxlon_f -> (match Metadata.Bounds.make_from_floats ~minlat:minlat_f ~minlon:minlon_f ~maxlat:maxlat_f ~maxlon:maxlon_f with | Ok bounds -> Ok bounds | Error msg -> Error (Error.invalid_xml ("Invalid bounds: " ^ msg))) | _ -> Error (Error.invalid_xml ("Invalid bounds coordinates"))) | _ -> Error (Error.invalid_xml ("Missing bounds attributes")) and parse_metadata parser = let metadata = Metadata.empty in let rec loop metadata = match Xmlm.input parser.input with | `El_start ((_, name), attrs) -> parser.current_element <- name :: parser.current_element; (match name with | "name" -> let* text = parse_text_content parser in loop (Metadata.with_name metadata text) | "desc" -> let* text = parse_text_content parser in loop (Metadata.with_description metadata text) | "keywords" -> let* text = parse_text_content parser in loop (Metadata.with_keywords metadata text) | "time" -> let* text = parse_text_content parser in loop (Metadata.with_time metadata (parse_time text)) | "link" -> let* link = parse_link parser attrs in loop (Metadata.add_link metadata link) | "author" -> let* author = parse_author parser in loop (Metadata.with_author metadata author) | "copyright" -> let* copyright = parse_copyright parser attrs in loop (Metadata.with_copyright metadata copyright) | "bounds" -> let* bounds = parse_bounds parser attrs in loop (Metadata.with_bounds metadata bounds) | "extensions" -> let* extensions = parse_extensions parser in loop (Metadata.add_extensions metadata extensions) | _ -> let* _ = skip_element parser in loop metadata) | `El_end -> parser.current_element <- List.tl parser.current_element; Ok metadata | `Data _ -> loop metadata | `Dtd _ -> loop metadata in loop metadata and parse_route parser = let route = Route.empty in let rec loop route = match Xmlm.input parser.input with | `El_start ((_, name), attrs) -> parser.current_element <- name :: parser.current_element; (match name with | "name" -> let* text = parse_text_content parser in loop (Route.with_name route text) | "cmt" -> let* text = parse_text_content parser in loop (Route.with_comment route text) | "desc" -> let* text = parse_text_content parser in loop (Route.with_description route text) | "src" -> let* text = parse_text_content parser in loop (Route.with_source route text) | "number" -> let* text = parse_text_content parser in (match parse_int_opt text with | Some n -> loop (Route.with_number route n) | None -> loop route) | "type" -> let* text = parse_text_content parser in loop (Route.with_type route text) | "rtept" -> let* (lat, lon) = parse_coordinates attrs "rtept" in let* rtept = parse_waypoint_data parser lat lon in loop (Route.add_point route rtept) | "link" -> let* link = parse_link parser attrs in loop (Route.add_link route link) | "extensions" -> let* extensions = parse_extensions parser in loop (Route.add_extensions route extensions) | _ -> let* _ = skip_element parser in loop route) | `El_end -> parser.current_element <- List.tl parser.current_element; Ok route | `Data _ -> loop route | `Dtd _ -> loop route in loop route and parse_track parser = let track = Track.empty in let rec loop track = match Xmlm.input parser.input with | `El_start ((_, name), attrs) -> parser.current_element <- name :: parser.current_element; (match name with | "name" -> let* text = parse_text_content parser in loop (Track.with_name track text) | "cmt" -> let* text = parse_text_content parser in loop (Track.with_comment track text) | "desc" -> let* text = parse_text_content parser in loop (Track.with_description track text) | "src" -> let* text = parse_text_content parser in loop (Track.with_source track text) | "number" -> let* text = parse_text_content parser in (match parse_int_opt text with | Some n -> loop (Track.with_number track n) | None -> loop track) | "type" -> let* text = parse_text_content parser in loop (Track.with_type track text) | "trkseg" -> let* trkseg = parse_track_segment parser in loop (Track.add_segment track trkseg) | "link" -> let* link = parse_link parser attrs in loop (Track.add_link track link) | "extensions" -> let* extensions = parse_extensions parser in loop (Track.add_extensions track extensions) | _ -> let* _ = skip_element parser in loop track) | `El_end -> parser.current_element <- List.tl parser.current_element; Ok track | `Data _ -> loop track | `Dtd _ -> loop track in loop track and parse_track_segment parser = let trkseg = Track.Segment.empty in let rec loop trkseg = match Xmlm.input parser.input with | `El_start ((_, name), attrs) -> parser.current_element <- name :: parser.current_element; (match name with | "trkpt" -> let* (lat, lon) = parse_coordinates attrs "trkpt" in let* trkpt = parse_waypoint_data parser lat lon in loop (Track.Segment.add_point trkseg trkpt) | "extensions" -> let* _ = parse_extensions parser in loop trkseg | _ -> let* _ = skip_element parser in loop trkseg) | `El_end -> parser.current_element <- List.tl parser.current_element; Ok trkseg | `Data _ -> loop trkseg | `Dtd _ -> loop trkseg in loop trkseg (** Main parsing function *) let parse ?(validate=false) input = let parser = make_parser input in try let result = parse_gpx parser in match result, validate with | Ok gpx, true -> let validation = Validate.validate_gpx gpx in if validation.is_valid then Ok gpx else let error_msgs = List.filter (fun issue -> issue.Validate.level = `Error) validation.issues |> List.map (fun issue -> issue.Validate.message) |> String.concat "; " in Error (Error.validation_error error_msgs) | result, false -> result | Error _ as result, true -> result (* Pass through parse errors even when validating *) with | Xmlm.Error ((line, col), error) -> Error (Error.xml_error (Printf.sprintf "XML error at line %d, column %d: %s" line col (Xmlm.error_message error))) | exn -> Error (Error.invalid_xml (Printexc.to_string exn)) (** Parse from string *) let parse_string ?(validate=false) s = let input = Xmlm.make_input (`String (0, s)) in parse ~validate input