(** GPX streaming parser using xmlm *) open Types (** Parser state for streaming *) type parser_state = { input : Xmlm.input; mutable current_element : string list; (* Stack of current element names *) text_buffer : Buffer.t; } (** Create a new parser state *) let make_parser input = { input; current_element = []; text_buffer = Buffer.create 256; } (** Utility functions *) let get_attribute name attrs = try let value = List.find (fun ((_, n), _) -> n = name) attrs in Some (snd value) with Not_found -> None let require_attribute name attrs element = match get_attribute name attrs with | Some value -> Ok value | None -> Error (Missing_required_attribute (element, name)) let parse_float_opt s = try Some (Float.of_string s) with _ -> None let parse_int_opt s = try Some (int_of_string s) with _ -> None let parse_time s = match Ptime.of_rfc3339 s with | Ok (t, _, _) -> Some t | Error _ -> None (** Result binding operators *) let (let*) = Result.bind let parse_coordinates attrs element = let* lat_str = require_attribute "lat" attrs element in let* lon_str = require_attribute "lon" attrs element in match (Float.of_string lat_str, Float.of_string lon_str) with | (lat_f, lon_f) -> let* lat = Result.map_error (fun s -> Invalid_coordinate s) (latitude lat_f) in let* lon = Result.map_error (fun s -> Invalid_coordinate s) (longitude lon_f) in Ok (lat, lon) | exception _ -> Error (Invalid_coordinate "Invalid coordinate format") (** Parse waypoint data from XML elements *) let rec parse_waypoint_data parser lat lon = let wpt = make_waypoint_data lat lon in parse_waypoint_elements parser wpt and parse_waypoint_elements parser wpt = let rec loop wpt = match Xmlm.input parser.input with | `El_start ((_, name), attrs) -> parser.current_element <- name :: parser.current_element; (match name with | "ele" -> let* text = parse_text_content parser in (match parse_float_opt text with | Some ele -> loop { wpt with ele = Some ele } | None -> loop wpt) | "time" -> let* text = parse_text_content parser in loop { wpt with time = parse_time text } | "magvar" -> let* text = parse_text_content parser in (match parse_float_opt text with | Some f -> (match degrees f with | Ok deg -> loop { wpt with magvar = Some deg } | Error _ -> loop wpt) | None -> loop wpt) | "geoidheight" -> let* text = parse_text_content parser in (match parse_float_opt text with | Some h -> loop { wpt with geoidheight = Some h } | None -> loop wpt) | "name" -> let* text = parse_text_content parser in loop { wpt with name = Some text } | "cmt" -> let* text = parse_text_content parser in loop { wpt with cmt = Some text } | "desc" -> let* text = parse_text_content parser in loop { wpt with desc = Some text } | "src" -> let* text = parse_text_content parser in loop { wpt with src = Some text } | "sym" -> let* text = parse_text_content parser in loop { wpt with sym = Some text } | "type" -> let* text = parse_text_content parser in loop { wpt with type_ = Some text } | "fix" -> let* text = parse_text_content parser in loop { wpt with fix = fix_type_of_string text } | "sat" -> let* text = parse_text_content parser in (match parse_int_opt text with | Some s -> loop { wpt with sat = Some s } | None -> loop wpt) | "hdop" | "vdop" | "pdop" -> let* text = parse_text_content parser in (match parse_float_opt text with | Some f -> (match name with | "hdop" -> loop { wpt with hdop = Some f } | "vdop" -> loop { wpt with vdop = Some f } | "pdop" -> loop { wpt with pdop = Some f } | _ -> loop wpt) | None -> loop wpt) | "ageofdgpsdata" -> let* text = parse_text_content parser in (match parse_float_opt text with | Some f -> loop { wpt with ageofdgpsdata = Some f } | None -> loop wpt) | "dgpsid" -> let* text = parse_text_content parser in (match parse_int_opt text with | Some id -> loop { wpt with dgpsid = Some id } | None -> loop wpt) | "link" -> let* link = parse_link parser attrs in loop { wpt with links = link :: wpt.links } | "extensions" -> let* extensions = parse_extensions parser in loop { wpt with extensions = extensions @ wpt.extensions } | _ -> (* Skip unknown elements *) let* _ = skip_element parser in loop wpt) | `El_end -> parser.current_element <- List.tl parser.current_element; Ok wpt | `Data _ -> (* Ignore text data at this level *) loop wpt | `Dtd _ -> loop wpt in loop wpt and parse_text_content parser = Buffer.clear parser.text_buffer; let rec loop () = match Xmlm.input parser.input with | `Data text -> Buffer.add_string parser.text_buffer text; loop () | `El_end -> parser.current_element <- List.tl parser.current_element; Ok (Buffer.contents parser.text_buffer) | `El_start _ -> Error (Invalid_xml "Unexpected element in text content") | `Dtd _ -> loop () in loop () and parse_link parser attrs = let href = match get_attribute "href" attrs with | Some h -> h | None -> "" in let link = { href; text = None; type_ = None } in parse_link_elements parser link and parse_link_elements parser link = let rec loop link = match Xmlm.input parser.input with | `El_start ((_, name), _) -> parser.current_element <- name :: parser.current_element; (match name with | "text" -> let* text = parse_text_content parser in loop { link with text = Some text } | "type" -> let* type_text = parse_text_content parser in loop { link with type_ = Some type_text } | _ -> let* _ = skip_element parser in loop link) | `El_end -> parser.current_element <- List.tl parser.current_element; Ok link | `Data _ -> loop link | `Dtd _ -> loop link in loop link and parse_extensions parser = let rec loop acc = match Xmlm.input parser.input with | `El_start ((ns, name), attrs) -> parser.current_element <- name :: parser.current_element; let* ext = parse_extension parser ns name attrs in loop (ext :: acc) | `El_end -> parser.current_element <- List.tl parser.current_element; Ok (List.rev acc) | `Data _ -> loop acc | `Dtd _ -> loop acc in loop [] and parse_extension parser ns name attrs = let namespace = if ns = "" then None else Some ns in let attributes = List.map (fun ((_, n), v) -> (n, v)) attrs in let* content = parse_extension_content parser in Ok { namespace; name; attributes; content } and parse_extension_content parser = Buffer.clear parser.text_buffer; let rec loop elements = match Xmlm.input parser.input with | `Data text -> Buffer.add_string parser.text_buffer text; loop elements | `El_start ((ns, name), attrs) -> parser.current_element <- name :: parser.current_element; let* ext = parse_extension parser ns name attrs in loop (ext :: elements) | `El_end -> parser.current_element <- List.tl parser.current_element; let text = String.trim (Buffer.contents parser.text_buffer) in Ok (match (text, elements) with | ("", []) -> Text "" | ("", els) -> Elements (List.rev els) | (t, []) -> Text t | (t, els) -> Mixed (t, List.rev els)) | `Dtd _ -> loop elements in loop [] and skip_element parser = let rec loop depth = match Xmlm.input parser.input with | `El_start _ -> loop (depth + 1) | `El_end when depth = 0 -> Ok () | `El_end -> loop (depth - 1) | `Data _ -> loop depth | `Dtd _ -> loop depth in loop 0 (** Parse a complete GPX document *) let rec parse_gpx parser = (* Find the GPX root element *) let rec find_gpx_root () = match Xmlm.input parser.input with | `El_start ((_, "gpx"), attrs) -> parser.current_element <- ["gpx"]; let* version = require_attribute "version" attrs "gpx" in let* creator = require_attribute "creator" attrs "gpx" in if version <> "1.0" && version <> "1.1" then Error (Validation_error ("Unsupported GPX version: " ^ version ^ " (supported: 1.0, 1.1)")) else Ok (version, creator) | `El_start _ -> let* _ = skip_element parser in find_gpx_root () | `Dtd _ -> find_gpx_root () | `El_end -> Error (Missing_required_element "gpx") | `Data _ -> find_gpx_root () in let* (version, creator) = find_gpx_root () in let gpx = make_gpx ~creator in parse_gpx_elements parser { gpx with version } and parse_gpx_elements parser gpx = let rec loop gpx = match Xmlm.input parser.input with | `El_start ((_, name), attrs) -> parser.current_element <- name :: parser.current_element; (match name with | "metadata" -> let* metadata = parse_metadata parser in loop { gpx with metadata = Some metadata } | "wpt" -> let* (lat, lon) = parse_coordinates attrs "wpt" in let* waypoint = parse_waypoint_data parser lat lon in loop { gpx with waypoints = waypoint :: gpx.waypoints } | "rte" -> let* route = parse_route parser in loop { gpx with routes = route :: gpx.routes } | "trk" -> let* track = parse_track parser in loop { gpx with tracks = track :: gpx.tracks } | "extensions" -> let* extensions = parse_extensions parser in loop { gpx with extensions = extensions @ gpx.extensions } | _ -> let* _ = skip_element parser in loop gpx) | `El_end -> Ok { gpx with waypoints = List.rev gpx.waypoints; routes = List.rev gpx.routes; tracks = List.rev gpx.tracks } | `Data _ -> loop gpx | `Dtd _ -> loop gpx in loop gpx and parse_metadata parser = let metadata = empty_metadata in let rec loop (metadata : metadata) = match Xmlm.input parser.input with | `El_start ((_, name), attrs) -> parser.current_element <- name :: parser.current_element; (match name with | "name" -> let* text = parse_text_content parser in loop { metadata with name = Some text } | "desc" -> let* text = parse_text_content parser in loop { metadata with desc = Some text } | "keywords" -> let* text = parse_text_content parser in loop { metadata with keywords = Some text } | "time" -> let* text = parse_text_content parser in loop { metadata with time = parse_time text } | "link" -> let* link = parse_link parser attrs in loop { metadata with links = link :: metadata.links } | "extensions" -> let* extensions = parse_extensions parser in loop { metadata with extensions = extensions @ metadata.extensions } | _ -> let* _ = skip_element parser in loop metadata) | `El_end -> parser.current_element <- List.tl parser.current_element; Ok { metadata with links = List.rev metadata.links } | `Data _ -> loop metadata | `Dtd _ -> loop metadata in loop metadata and parse_route parser = let route = { name = None; cmt = None; desc = None; src = None; links = []; number = None; type_ = None; extensions = []; rtepts = [] } in let rec loop (route : route) = match Xmlm.input parser.input with | `El_start ((_, name), attrs) -> parser.current_element <- name :: parser.current_element; (match name with | "name" -> let* text = parse_text_content parser in loop { route with name = Some text } | "cmt" -> let* text = parse_text_content parser in loop { route with cmt = Some text } | "desc" -> let* text = parse_text_content parser in loop { route with desc = Some text } | "src" -> let* text = parse_text_content parser in loop { route with src = Some text } | "number" -> let* text = parse_text_content parser in (match parse_int_opt text with | Some n -> loop { route with number = Some n } | None -> loop route) | "type" -> let* text = parse_text_content parser in loop { route with type_ = Some text } | "rtept" -> let* (lat, lon) = parse_coordinates attrs "rtept" in let* rtept = parse_waypoint_data parser lat lon in loop { route with rtepts = rtept :: route.rtepts } | "link" -> let* link = parse_link parser attrs in loop { route with links = link :: route.links } | "extensions" -> let* extensions = parse_extensions parser in loop { route with extensions = extensions @ route.extensions } | _ -> let* _ = skip_element parser in loop route) | `El_end -> parser.current_element <- List.tl parser.current_element; Ok { route with rtepts = List.rev route.rtepts; links = List.rev route.links } | `Data _ -> loop route | `Dtd _ -> loop route in loop route and parse_track parser = let track = { name = None; cmt = None; desc = None; src = None; links = []; number = None; type_ = None; extensions = []; trksegs = [] } in let rec loop track = match Xmlm.input parser.input with | `El_start ((_, name), attrs) -> parser.current_element <- name :: parser.current_element; (match name with | "name" -> let* text = parse_text_content parser in loop { track with name = Some text } | "cmt" -> let* text = parse_text_content parser in loop { track with cmt = Some text } | "desc" -> let* text = parse_text_content parser in loop { track with desc = Some text } | "src" -> let* text = parse_text_content parser in loop { track with src = Some text } | "number" -> let* text = parse_text_content parser in (match parse_int_opt text with | Some n -> loop { track with number = Some n } | None -> loop track) | "type" -> let* text = parse_text_content parser in loop { track with type_ = Some text } | "trkseg" -> let* trkseg = parse_track_segment parser in loop { track with trksegs = trkseg :: track.trksegs } | "link" -> let* link = parse_link parser attrs in loop { track with links = link :: track.links } | "extensions" -> let* extensions = parse_extensions parser in loop { track with extensions = extensions @ track.extensions } | _ -> let* _ = skip_element parser in loop track) | `El_end -> parser.current_element <- List.tl parser.current_element; Ok { track with trksegs = List.rev track.trksegs; links = List.rev track.links } | `Data _ -> loop track | `Dtd _ -> loop track in loop track and parse_track_segment parser = let trkseg = { trkpts = []; extensions = [] } in let rec loop trkseg = match Xmlm.input parser.input with | `El_start ((_, name), attrs) -> parser.current_element <- name :: parser.current_element; (match name with | "trkpt" -> let* (lat, lon) = parse_coordinates attrs "trkpt" in let* trkpt = parse_waypoint_data parser lat lon in loop { trkseg with trkpts = trkpt :: trkseg.trkpts } | "extensions" -> let* extensions = parse_extensions parser in loop { trkseg with extensions = extensions @ trkseg.extensions } | _ -> let* _ = skip_element parser in loop trkseg) | `El_end -> parser.current_element <- List.tl parser.current_element; Ok { trkseg with trkpts = List.rev trkseg.trkpts } | `Data _ -> loop trkseg | `Dtd _ -> loop trkseg in loop trkseg (** Main parsing function *) let parse input = let parser = make_parser input in try parse_gpx parser with | Xmlm.Error ((line, col), error) -> Error (Xml_error (Printf.sprintf "XML error at line %d, column %d: %s" line col (Xmlm.error_message error))) | exn -> Error (Invalid_xml (Printexc.to_string exn)) (** Parse from string *) let parse_string s = let input = Xmlm.make_input (`String (0, s)) in parse input