···
+
(** GPX streaming parser using xmlm *)
+
(** Parser state for streaming *)
+
mutable current_element : string list; (* Stack of current element names *)
+
mutable text_buffer : Buffer.t;
+
(** Create a new parser state *)
+
let make_parser input = {
+
text_buffer = Buffer.create 256;
+
(** Utility functions *)
+
let get_attribute name attrs =
+
let value = List.find (fun ((_, n), _) -> n = name) attrs in
+
let require_attribute name attrs element =
+
match get_attribute name attrs with
+
| Some value -> Ok value
+
| None -> Error (Missing_required_attribute (element, name))
+
let parse_float_opt s =
+
try Some (Float.of_string s)
+
try Some (int_of_string s)
+
match Ptime.of_rfc3339 s with
+
| Ok (t, _, _) -> Some t
+
(** Result binding operators *)
+
let (let*) = Result.bind
+
let parse_coordinates attrs element =
+
let* lat_str = require_attribute "lat" attrs element in
+
let* lon_str = require_attribute "lon" attrs element in
+
match (Float.of_string lat_str, Float.of_string lon_str) with
+
let* lat = Result.map_error (fun s -> Invalid_coordinate s) (latitude lat_f) in
+
let* lon = Result.map_error (fun s -> Invalid_coordinate s) (longitude lon_f) in
+
| exception _ -> Error (Invalid_coordinate "Invalid coordinate format")
+
(** Parse waypoint data from XML elements *)
+
let rec parse_waypoint_data parser lat lon =
+
let wpt = make_waypoint_data lat lon in
+
parse_waypoint_elements parser wpt
+
and parse_waypoint_elements parser wpt =
+
match Xmlm.input parser.input with
+
| `El_start ((_, name), attrs) ->
+
parser.current_element <- name :: parser.current_element;
+
let* text = parse_text_content parser in
+
(match parse_float_opt text with
+
| Some ele -> loop { wpt with ele = Some ele }
+
let* text = parse_text_content parser in
+
loop { wpt with time = parse_time text }
+
let* text = parse_text_content parser in
+
(match parse_float_opt text with
+
| Ok deg -> loop { wpt with magvar = Some deg }
+
let* text = parse_text_content parser in
+
(match parse_float_opt text with
+
| Some h -> loop { wpt with geoidheight = Some h }
+
let* text = parse_text_content parser in
+
loop { wpt with name = Some text }
+
let* text = parse_text_content parser in
+
loop { wpt with cmt = Some text }
+
let* text = parse_text_content parser in
+
loop { wpt with desc = Some text }
+
let* text = parse_text_content parser in
+
loop { wpt with src = Some text }
+
let* text = parse_text_content parser in
+
loop { wpt with sym = Some text }
+
let* text = parse_text_content parser in
+
loop { wpt with type_ = Some text }
+
let* text = parse_text_content parser in
+
loop { wpt with fix = fix_type_of_string text }
+
let* text = parse_text_content parser in
+
(match parse_int_opt text with
+
| Some s -> loop { wpt with sat = Some s }
+
| "hdop" | "vdop" | "pdop" ->
+
let* text = parse_text_content parser in
+
(match parse_float_opt text with
+
| "hdop" -> loop { wpt with hdop = Some f }
+
| "vdop" -> loop { wpt with vdop = Some f }
+
| "pdop" -> loop { wpt with pdop = Some f }
+
let* text = parse_text_content parser in
+
(match parse_float_opt text with
+
| Some f -> loop { wpt with ageofdgpsdata = Some f }
+
let* text = parse_text_content parser in
+
(match parse_int_opt text with
+
| Some id -> loop { wpt with dgpsid = Some id }
+
let* link = parse_link parser attrs in
+
loop { wpt with links = link :: wpt.links }
+
let* extensions = parse_extensions parser in
+
loop { wpt with extensions = extensions @ wpt.extensions }
+
(* Skip unknown elements *)
+
let* _ = skip_element parser in
+
parser.current_element <- List.tl parser.current_element;
+
(* Ignore text data at this level *)
+
and parse_text_content parser =
+
Buffer.clear parser.text_buffer;
+
match Xmlm.input parser.input with
+
Buffer.add_string parser.text_buffer text;
+
parser.current_element <- List.tl parser.current_element;
+
Ok (Buffer.contents parser.text_buffer)
+
Error (Invalid_xml "Unexpected element in text content")
+
and parse_link parser attrs =
+
let href = match get_attribute "href" attrs with
+
let link = { href; text = None; type_ = None } in
+
parse_link_elements parser link
+
and parse_link_elements parser link =
+
match Xmlm.input parser.input with
+
| `El_start ((_, name), _) ->
+
parser.current_element <- name :: parser.current_element;
+
let* text = parse_text_content parser in
+
loop { link with text = Some text }
+
let* type_text = parse_text_content parser in
+
loop { link with type_ = Some type_text }
+
let* _ = skip_element parser in
+
parser.current_element <- List.tl parser.current_element;
+
and parse_extensions parser =
+
match Xmlm.input parser.input with
+
| `El_start ((ns, name), attrs) ->
+
parser.current_element <- name :: parser.current_element;
+
let* ext = parse_extension parser ns name attrs in
+
parser.current_element <- List.tl parser.current_element;
+
and parse_extension parser ns name attrs =
+
let namespace = if ns = "" then None else Some ns in
+
let attributes = List.map (fun ((_, n), v) -> (n, v)) attrs in
+
let* content = parse_extension_content parser in
+
Ok { namespace; name; attributes; content }
+
and parse_extension_content parser =
+
Buffer.clear parser.text_buffer;
+
let rec loop elements =
+
match Xmlm.input parser.input with
+
Buffer.add_string parser.text_buffer text;
+
| `El_start ((ns, name), attrs) ->
+
parser.current_element <- name :: parser.current_element;
+
let* ext = parse_extension parser ns name attrs in
+
parser.current_element <- List.tl parser.current_element;
+
let text = String.trim (Buffer.contents parser.text_buffer) in
+
Ok (match (text, elements) with
+
| ("", els) -> Elements (List.rev els)
+
| (t, els) -> Mixed (t, List.rev els))
+
and skip_element parser =
+
match Xmlm.input parser.input with
+
| `El_start _ -> loop (depth + 1)
+
| `El_end when depth = 0 -> Ok ()
+
| `El_end -> loop (depth - 1)
+
| `Data _ -> loop depth
+
(** Parse a complete GPX document *)
+
let rec parse_gpx parser =
+
(* Find the GPX root element *)
+
let rec find_gpx_root () =
+
match Xmlm.input parser.input with
+
| `El_start ((_, "gpx"), attrs) ->
+
parser.current_element <- ["gpx"];
+
let* version = require_attribute "version" attrs "gpx" in
+
let* creator = require_attribute "creator" attrs "gpx" in
+
if version <> "1.1" then
+
Error (Validation_error ("Unsupported GPX version: " ^ version))
+
let* _ = skip_element parser in
+
Error (Missing_required_element "gpx")
+
let* (version, creator) = find_gpx_root () in
+
let gpx = make_gpx ~creator in
+
parse_gpx_elements parser { gpx with version }
+
and parse_gpx_elements parser gpx =
+
match Xmlm.input parser.input with
+
| `El_start ((_, name), attrs) ->
+
parser.current_element <- name :: parser.current_element;
+
let* metadata = parse_metadata parser in
+
loop { gpx with metadata = Some metadata }
+
let* (lat, lon) = parse_coordinates attrs "wpt" in
+
let* waypoint = parse_waypoint_data parser lat lon in
+
loop { gpx with waypoints = waypoint :: gpx.waypoints }
+
let* route = parse_route parser in
+
loop { gpx with routes = route :: gpx.routes }
+
let* track = parse_track parser in
+
loop { gpx with tracks = track :: gpx.tracks }
+
let* extensions = parse_extensions parser in
+
loop { gpx with extensions = extensions @ gpx.extensions }
+
let* _ = skip_element parser in
+
waypoints = List.rev gpx.waypoints;
+
routes = List.rev gpx.routes;
+
tracks = List.rev gpx.tracks }
+
and parse_metadata parser =
+
let metadata = empty_metadata in
+
let rec loop (metadata : metadata) =
+
match Xmlm.input parser.input with
+
| `El_start ((_, name), attrs) ->
+
parser.current_element <- name :: parser.current_element;
+
let* text = parse_text_content parser in
+
loop { metadata with name = Some text }
+
let* text = parse_text_content parser in
+
loop { metadata with desc = Some text }
+
let* text = parse_text_content parser in
+
loop { metadata with keywords = Some text }
+
let* text = parse_text_content parser in
+
loop { metadata with time = parse_time text }
+
let* link = parse_link parser attrs in
+
loop { metadata with links = link :: metadata.links }
+
let* extensions = parse_extensions parser in
+
loop { metadata with extensions = extensions @ metadata.extensions }
+
let* _ = skip_element parser in
+
parser.current_element <- List.tl parser.current_element;
+
Ok { metadata with links = List.rev metadata.links }
+
and parse_route parser =
+
name = None; cmt = None; desc = None; src = None; links = [];
+
number = None; type_ = None; extensions = []; rtepts = []
+
let rec loop (route : route) =
+
match Xmlm.input parser.input with
+
| `El_start ((_, name), attrs) ->
+
parser.current_element <- name :: parser.current_element;
+
let* text = parse_text_content parser in
+
loop { route with name = Some text }
+
let* text = parse_text_content parser in
+
loop { route with cmt = Some text }
+
let* text = parse_text_content parser in
+
loop { route with desc = Some text }
+
let* text = parse_text_content parser in
+
loop { route with src = Some text }
+
let* text = parse_text_content parser in
+
(match parse_int_opt text with
+
| Some n -> loop { route with number = Some n }
+
let* text = parse_text_content parser in
+
loop { route with type_ = Some text }
+
let* (lat, lon) = parse_coordinates attrs "rtept" in
+
let* rtept = parse_waypoint_data parser lat lon in
+
loop { route with rtepts = rtept :: route.rtepts }
+
let* link = parse_link parser attrs in
+
loop { route with links = link :: route.links }
+
let* extensions = parse_extensions parser in
+
loop { route with extensions = extensions @ route.extensions }
+
let* _ = skip_element parser in
+
parser.current_element <- List.tl parser.current_element;
+
rtepts = List.rev route.rtepts;
+
links = List.rev route.links }
+
and parse_track parser =
+
name = None; cmt = None; desc = None; src = None; links = [];
+
number = None; type_ = None; extensions = []; trksegs = []
+
match Xmlm.input parser.input with
+
| `El_start ((_, name), attrs) ->
+
parser.current_element <- name :: parser.current_element;
+
let* text = parse_text_content parser in
+
loop { track with name = Some text }
+
let* text = parse_text_content parser in
+
loop { track with cmt = Some text }
+
let* text = parse_text_content parser in
+
loop { track with desc = Some text }
+
let* text = parse_text_content parser in
+
loop { track with src = Some text }
+
let* text = parse_text_content parser in
+
(match parse_int_opt text with
+
| Some n -> loop { track with number = Some n }
+
let* text = parse_text_content parser in
+
loop { track with type_ = Some text }
+
let* trkseg = parse_track_segment parser in
+
loop { track with trksegs = trkseg :: track.trksegs }
+
let* link = parse_link parser attrs in
+
loop { track with links = link :: track.links }
+
let* extensions = parse_extensions parser in
+
loop { track with extensions = extensions @ track.extensions }
+
let* _ = skip_element parser in
+
parser.current_element <- List.tl parser.current_element;
+
trksegs = List.rev track.trksegs;
+
links = List.rev track.links }
+
and parse_track_segment parser =
+
let trkseg = { trkpts = []; extensions = [] } in
+
match Xmlm.input parser.input with
+
| `El_start ((_, name), attrs) ->
+
parser.current_element <- name :: parser.current_element;
+
let* (lat, lon) = parse_coordinates attrs "trkpt" in
+
let* trkpt = parse_waypoint_data parser lat lon in
+
loop { trkseg with trkpts = trkpt :: trkseg.trkpts }
+
let* extensions = parse_extensions parser in
+
loop { trkseg with extensions = extensions @ trkseg.extensions }
+
let* _ = skip_element parser in
+
parser.current_element <- List.tl parser.current_element;
+
Ok { trkseg with trkpts = List.rev trkseg.trkpts }
+
(** Main parsing function *)
+
let parser = make_parser input in
+
| Xmlm.Error ((line, col), error) ->
+
Error (Xml_error (Printf.sprintf "XML error at line %d, column %d: %s"
+
line col (Xmlm.error_message error)))
+
Error (Invalid_xml (Printexc.to_string exn))
+
(** Parse from string *)
+
let input = Xmlm.make_input (`String (0, s)) in