1(** GPX streaming parser using xmlm *)
2
3open Types
4
5(** Parser state for streaming *)
6type parser_state = {
7 input : Xmlm.input;
8 mutable current_element : string list; (* Stack of current element names *)
9 text_buffer : Buffer.t;
10}
11
12(** Create a new parser state *)
13let make_parser input = {
14 input;
15 current_element = [];
16 text_buffer = Buffer.create 256;
17}
18
19(** Utility functions *)
20
21let get_attribute name attrs =
22 try
23 let value = List.find (fun ((_, n), _) -> n = name) attrs in
24 Some (snd value)
25 with Not_found -> None
26
27let require_attribute name attrs element =
28 match get_attribute name attrs with
29 | Some value -> Ok value
30 | None -> Error (Missing_required_attribute (element, name))
31
32let parse_float_opt s =
33 try Some (Float.of_string s)
34 with _ -> None
35
36let parse_int_opt s =
37 try Some (int_of_string s)
38 with _ -> None
39
40let parse_time s =
41 match Ptime.of_rfc3339 s with
42 | Ok (t, _, _) -> Some t
43 | Error _ -> None
44
45(** Result binding operators *)
46let (let*) = Result.bind
47
48let parse_coordinates attrs element =
49 let* lat_str = require_attribute "lat" attrs element in
50 let* lon_str = require_attribute "lon" attrs element in
51 match (Float.of_string lat_str, Float.of_string lon_str) with
52 | (lat_f, lon_f) ->
53 let* lat = Result.map_error (fun s -> Invalid_coordinate s) (latitude lat_f) in
54 let* lon = Result.map_error (fun s -> Invalid_coordinate s) (longitude lon_f) in
55 Ok (lat, lon)
56 | exception _ -> Error (Invalid_coordinate "Invalid coordinate format")
57
58(** Parse waypoint data from XML elements *)
59let rec parse_waypoint_data parser lat lon =
60 let wpt = make_waypoint_data lat lon in
61 parse_waypoint_elements parser wpt
62
63and parse_waypoint_elements parser wpt =
64 let rec loop wpt =
65 match Xmlm.input parser.input with
66 | `El_start ((_, name), attrs) ->
67 parser.current_element <- name :: parser.current_element;
68 (match name with
69 | "ele" ->
70 let* text = parse_text_content parser in
71 (match parse_float_opt text with
72 | Some ele -> loop { wpt with ele = Some ele }
73 | None -> loop wpt)
74 | "time" ->
75 let* text = parse_text_content parser in
76 loop { wpt with time = parse_time text }
77 | "magvar" ->
78 let* text = parse_text_content parser in
79 (match parse_float_opt text with
80 | Some f ->
81 (match degrees f with
82 | Ok deg -> loop { wpt with magvar = Some deg }
83 | Error _ -> loop wpt)
84 | None -> loop wpt)
85 | "geoidheight" ->
86 let* text = parse_text_content parser in
87 (match parse_float_opt text with
88 | Some h -> loop { wpt with geoidheight = Some h }
89 | None -> loop wpt)
90 | "name" ->
91 let* text = parse_text_content parser in
92 loop { wpt with name = Some text }
93 | "cmt" ->
94 let* text = parse_text_content parser in
95 loop { wpt with cmt = Some text }
96 | "desc" ->
97 let* text = parse_text_content parser in
98 loop { wpt with desc = Some text }
99 | "src" ->
100 let* text = parse_text_content parser in
101 loop { wpt with src = Some text }
102 | "sym" ->
103 let* text = parse_text_content parser in
104 loop { wpt with sym = Some text }
105 | "type" ->
106 let* text = parse_text_content parser in
107 loop { wpt with type_ = Some text }
108 | "fix" ->
109 let* text = parse_text_content parser in
110 loop { wpt with fix = fix_type_of_string text }
111 | "sat" ->
112 let* text = parse_text_content parser in
113 (match parse_int_opt text with
114 | Some s -> loop { wpt with sat = Some s }
115 | None -> loop wpt)
116 | "hdop" | "vdop" | "pdop" ->
117 let* text = parse_text_content parser in
118 (match parse_float_opt text with
119 | Some f ->
120 (match name with
121 | "hdop" -> loop { wpt with hdop = Some f }
122 | "vdop" -> loop { wpt with vdop = Some f }
123 | "pdop" -> loop { wpt with pdop = Some f }
124 | _ -> loop wpt)
125 | None -> loop wpt)
126 | "ageofdgpsdata" ->
127 let* text = parse_text_content parser in
128 (match parse_float_opt text with
129 | Some f -> loop { wpt with ageofdgpsdata = Some f }
130 | None -> loop wpt)
131 | "dgpsid" ->
132 let* text = parse_text_content parser in
133 (match parse_int_opt text with
134 | Some id -> loop { wpt with dgpsid = Some id }
135 | None -> loop wpt)
136 | "link" ->
137 let* link = parse_link parser attrs in
138 loop { wpt with links = link :: wpt.links }
139 | "extensions" ->
140 let* extensions = parse_extensions parser in
141 loop { wpt with extensions = extensions @ wpt.extensions }
142 | _ ->
143 (* Skip unknown elements *)
144 let* _ = skip_element parser in
145 loop wpt)
146 | `El_end ->
147 parser.current_element <- List.tl parser.current_element;
148 Ok wpt
149 | `Data _ ->
150 (* Ignore text data at this level *)
151 loop wpt
152 | `Dtd _ ->
153 loop wpt
154 in
155 loop wpt
156
157and parse_text_content parser =
158 Buffer.clear parser.text_buffer;
159 let rec loop () =
160 match Xmlm.input parser.input with
161 | `Data text ->
162 Buffer.add_string parser.text_buffer text;
163 loop ()
164 | `El_end ->
165 parser.current_element <- List.tl parser.current_element;
166 Ok (Buffer.contents parser.text_buffer)
167 | `El_start _ ->
168 Error (Invalid_xml "Unexpected element in text content")
169 | `Dtd _ ->
170 loop ()
171 in
172 loop ()
173
174and parse_link parser attrs =
175 let href = match get_attribute "href" attrs with
176 | Some h -> h
177 | None -> ""
178 in
179 let link = { href; text = None; type_ = None } in
180 parse_link_elements parser link
181
182and parse_link_elements parser link =
183 let rec loop link =
184 match Xmlm.input parser.input with
185 | `El_start ((_, name), _) ->
186 parser.current_element <- name :: parser.current_element;
187 (match name with
188 | "text" ->
189 let* text = parse_text_content parser in
190 loop { link with text = Some text }
191 | "type" ->
192 let* type_text = parse_text_content parser in
193 loop { link with type_ = Some type_text }
194 | _ ->
195 let* _ = skip_element parser in
196 loop link)
197 | `El_end ->
198 parser.current_element <- List.tl parser.current_element;
199 Ok link
200 | `Data _ ->
201 loop link
202 | `Dtd _ ->
203 loop link
204 in
205 loop link
206
207and parse_extensions parser =
208 let rec loop acc =
209 match Xmlm.input parser.input with
210 | `El_start ((ns, name), attrs) ->
211 parser.current_element <- name :: parser.current_element;
212 let* ext = parse_extension parser ns name attrs in
213 loop (ext :: acc)
214 | `El_end ->
215 parser.current_element <- List.tl parser.current_element;
216 Ok (List.rev acc)
217 | `Data _ ->
218 loop acc
219 | `Dtd _ ->
220 loop acc
221 in
222 loop []
223
224and parse_extension parser ns name attrs =
225 let namespace = if ns = "" then None else Some ns in
226 let attributes = List.map (fun ((_, n), v) -> (n, v)) attrs in
227 let* content = parse_extension_content parser in
228 Ok { namespace; name; attributes; content }
229
230and parse_extension_content parser =
231 Buffer.clear parser.text_buffer;
232 let rec loop elements =
233 match Xmlm.input parser.input with
234 | `Data text ->
235 Buffer.add_string parser.text_buffer text;
236 loop elements
237 | `El_start ((ns, name), attrs) ->
238 parser.current_element <- name :: parser.current_element;
239 let* ext = parse_extension parser ns name attrs in
240 loop (ext :: elements)
241 | `El_end ->
242 parser.current_element <- List.tl parser.current_element;
243 let text = String.trim (Buffer.contents parser.text_buffer) in
244 Ok (match (text, elements) with
245 | ("", []) -> Text ""
246 | ("", els) -> Elements (List.rev els)
247 | (t, []) -> Text t
248 | (t, els) -> Mixed (t, List.rev els))
249 | `Dtd _ ->
250 loop elements
251 in
252 loop []
253
254and skip_element parser =
255 let rec loop depth =
256 match Xmlm.input parser.input with
257 | `El_start _ -> loop (depth + 1)
258 | `El_end when depth = 0 -> Ok ()
259 | `El_end -> loop (depth - 1)
260 | `Data _ -> loop depth
261 | `Dtd _ -> loop depth
262 in
263 loop 0
264
265(** Parse a complete GPX document *)
266let rec parse_gpx parser =
267 (* Find the GPX root element *)
268 let rec find_gpx_root () =
269 match Xmlm.input parser.input with
270 | `El_start ((_, "gpx"), attrs) ->
271 parser.current_element <- ["gpx"];
272 let* version = require_attribute "version" attrs "gpx" in
273 let* creator = require_attribute "creator" attrs "gpx" in
274 if version <> "1.0" && version <> "1.1" then
275 Error (Validation_error ("Unsupported GPX version: " ^ version ^ " (supported: 1.0, 1.1)"))
276 else
277 Ok (version, creator)
278 | `El_start _ ->
279 let* _ = skip_element parser in
280 find_gpx_root ()
281 | `Dtd _ ->
282 find_gpx_root ()
283 | `El_end ->
284 Error (Missing_required_element "gpx")
285 | `Data _ ->
286 find_gpx_root ()
287 in
288
289 let* (version, creator) = find_gpx_root () in
290 let gpx = make_gpx ~creator in
291 parse_gpx_elements parser { gpx with version }
292
293and parse_gpx_elements parser gpx =
294 let rec loop gpx =
295 match Xmlm.input parser.input with
296 | `El_start ((_, name), attrs) ->
297 parser.current_element <- name :: parser.current_element;
298 (match name with
299 | "metadata" ->
300 let* metadata = parse_metadata parser in
301 loop { gpx with metadata = Some metadata }
302 | "wpt" ->
303 let* (lat, lon) = parse_coordinates attrs "wpt" in
304 let* waypoint = parse_waypoint_data parser lat lon in
305 loop { gpx with waypoints = waypoint :: gpx.waypoints }
306 | "rte" ->
307 let* route = parse_route parser in
308 loop { gpx with routes = route :: gpx.routes }
309 | "trk" ->
310 let* track = parse_track parser in
311 loop { gpx with tracks = track :: gpx.tracks }
312 | "extensions" ->
313 let* extensions = parse_extensions parser in
314 loop { gpx with extensions = extensions @ gpx.extensions }
315 | _ ->
316 let* _ = skip_element parser in
317 loop gpx)
318 | `El_end ->
319 Ok { gpx with
320 waypoints = List.rev gpx.waypoints;
321 routes = List.rev gpx.routes;
322 tracks = List.rev gpx.tracks }
323 | `Data _ ->
324 loop gpx
325 | `Dtd _ ->
326 loop gpx
327 in
328 loop gpx
329
330and parse_metadata parser =
331 let metadata = empty_metadata in
332 let rec loop (metadata : metadata) =
333 match Xmlm.input parser.input with
334 | `El_start ((_, name), attrs) ->
335 parser.current_element <- name :: parser.current_element;
336 (match name with
337 | "name" ->
338 let* text = parse_text_content parser in
339 loop { metadata with name = Some text }
340 | "desc" ->
341 let* text = parse_text_content parser in
342 loop { metadata with desc = Some text }
343 | "keywords" ->
344 let* text = parse_text_content parser in
345 loop { metadata with keywords = Some text }
346 | "time" ->
347 let* text = parse_text_content parser in
348 loop { metadata with time = parse_time text }
349 | "link" ->
350 let* link = parse_link parser attrs in
351 loop { metadata with links = link :: metadata.links }
352 | "extensions" ->
353 let* extensions = parse_extensions parser in
354 loop { metadata with extensions = extensions @ metadata.extensions }
355 | _ ->
356 let* _ = skip_element parser in
357 loop metadata)
358 | `El_end ->
359 parser.current_element <- List.tl parser.current_element;
360 Ok { metadata with links = List.rev metadata.links }
361 | `Data _ ->
362 loop metadata
363 | `Dtd _ ->
364 loop metadata
365 in
366 loop metadata
367
368and parse_route parser =
369 let route = {
370 name = None; cmt = None; desc = None; src = None; links = [];
371 number = None; type_ = None; extensions = []; rtepts = []
372 } in
373 let rec loop (route : route) =
374 match Xmlm.input parser.input with
375 | `El_start ((_, name), attrs) ->
376 parser.current_element <- name :: parser.current_element;
377 (match name with
378 | "name" ->
379 let* text = parse_text_content parser in
380 loop { route with name = Some text }
381 | "cmt" ->
382 let* text = parse_text_content parser in
383 loop { route with cmt = Some text }
384 | "desc" ->
385 let* text = parse_text_content parser in
386 loop { route with desc = Some text }
387 | "src" ->
388 let* text = parse_text_content parser in
389 loop { route with src = Some text }
390 | "number" ->
391 let* text = parse_text_content parser in
392 (match parse_int_opt text with
393 | Some n -> loop { route with number = Some n }
394 | None -> loop route)
395 | "type" ->
396 let* text = parse_text_content parser in
397 loop { route with type_ = Some text }
398 | "rtept" ->
399 let* (lat, lon) = parse_coordinates attrs "rtept" in
400 let* rtept = parse_waypoint_data parser lat lon in
401 loop { route with rtepts = rtept :: route.rtepts }
402 | "link" ->
403 let* link = parse_link parser attrs in
404 loop { route with links = link :: route.links }
405 | "extensions" ->
406 let* extensions = parse_extensions parser in
407 loop { route with extensions = extensions @ route.extensions }
408 | _ ->
409 let* _ = skip_element parser in
410 loop route)
411 | `El_end ->
412 parser.current_element <- List.tl parser.current_element;
413 Ok { route with
414 rtepts = List.rev route.rtepts;
415 links = List.rev route.links }
416 | `Data _ ->
417 loop route
418 | `Dtd _ ->
419 loop route
420 in
421 loop route
422
423and parse_track parser =
424 let track = {
425 name = None; cmt = None; desc = None; src = None; links = [];
426 number = None; type_ = None; extensions = []; trksegs = []
427 } in
428 let rec loop track =
429 match Xmlm.input parser.input with
430 | `El_start ((_, name), attrs) ->
431 parser.current_element <- name :: parser.current_element;
432 (match name with
433 | "name" ->
434 let* text = parse_text_content parser in
435 loop { track with name = Some text }
436 | "cmt" ->
437 let* text = parse_text_content parser in
438 loop { track with cmt = Some text }
439 | "desc" ->
440 let* text = parse_text_content parser in
441 loop { track with desc = Some text }
442 | "src" ->
443 let* text = parse_text_content parser in
444 loop { track with src = Some text }
445 | "number" ->
446 let* text = parse_text_content parser in
447 (match parse_int_opt text with
448 | Some n -> loop { track with number = Some n }
449 | None -> loop track)
450 | "type" ->
451 let* text = parse_text_content parser in
452 loop { track with type_ = Some text }
453 | "trkseg" ->
454 let* trkseg = parse_track_segment parser in
455 loop { track with trksegs = trkseg :: track.trksegs }
456 | "link" ->
457 let* link = parse_link parser attrs in
458 loop { track with links = link :: track.links }
459 | "extensions" ->
460 let* extensions = parse_extensions parser in
461 loop { track with extensions = extensions @ track.extensions }
462 | _ ->
463 let* _ = skip_element parser in
464 loop track)
465 | `El_end ->
466 parser.current_element <- List.tl parser.current_element;
467 Ok { track with
468 trksegs = List.rev track.trksegs;
469 links = List.rev track.links }
470 | `Data _ ->
471 loop track
472 | `Dtd _ ->
473 loop track
474 in
475 loop track
476
477and parse_track_segment parser =
478 let trkseg = { trkpts = []; extensions = [] } in
479 let rec loop trkseg =
480 match Xmlm.input parser.input with
481 | `El_start ((_, name), attrs) ->
482 parser.current_element <- name :: parser.current_element;
483 (match name with
484 | "trkpt" ->
485 let* (lat, lon) = parse_coordinates attrs "trkpt" in
486 let* trkpt = parse_waypoint_data parser lat lon in
487 loop { trkseg with trkpts = trkpt :: trkseg.trkpts }
488 | "extensions" ->
489 let* extensions = parse_extensions parser in
490 loop { trkseg with extensions = extensions @ trkseg.extensions }
491 | _ ->
492 let* _ = skip_element parser in
493 loop trkseg)
494 | `El_end ->
495 parser.current_element <- List.tl parser.current_element;
496 Ok { trkseg with trkpts = List.rev trkseg.trkpts }
497 | `Data _ ->
498 loop trkseg
499 | `Dtd _ ->
500 loop trkseg
501 in
502 loop trkseg
503
504(** Main parsing function *)
505let parse input =
506 let parser = make_parser input in
507 try
508 parse_gpx parser
509 with
510 | Xmlm.Error ((line, col), error) ->
511 Error (Xml_error (Printf.sprintf "XML error at line %d, column %d: %s"
512 line col (Xmlm.error_message error)))
513 | exn ->
514 Error (Invalid_xml (Printexc.to_string exn))
515
516(** Parse from string *)
517let parse_string s =
518 let input = Xmlm.make_input (`String (0, s)) in
519 parse input