GPS Exchange Format library/CLI in OCaml
1(** GPX streaming parser using xmlm *)
2
3(** Parser state for streaming *)
4type parser_state = {
5 input : Xmlm.input;
6 mutable current_element : string list; (* Stack of current element names *)
7 text_buffer : Buffer.t;
8}
9
10(** Create a new parser state *)
11let make_parser input = {
12 input;
13 current_element = [];
14 text_buffer = Buffer.create 256;
15}
16
17(** Utility functions *)
18
19let get_attribute name attrs =
20 try
21 let value = List.find (fun ((_, n), _) -> n = name) attrs in
22 Some (snd value)
23 with Not_found -> None
24
25let require_attribute name attrs element =
26 match get_attribute name attrs with
27 | Some value -> Ok value
28 | None -> Error (Error.missing_attribute element name)
29
30let parse_float_opt s =
31 try Some (Float.of_string s)
32 with _ -> None
33
34let parse_int_opt s =
35 try Some (int_of_string s)
36 with _ -> None
37
38let parse_time s =
39 match Ptime.of_rfc3339 s with
40 | Ok (t, _, _) -> Some t
41 | Error _ -> None
42
43(** Result binding operators *)
44let (let*) = Result.bind
45
46let parse_coordinates attrs element =
47 let* lat_str = require_attribute "lat" attrs element in
48 let* lon_str = require_attribute "lon" attrs element in
49 match (Float.of_string lat_str, Float.of_string lon_str) with
50 | (lat_f, lon_f) ->
51 let* lat = Result.map_error Error.invalid_coordinate (Coordinate.latitude lat_f) in
52 let* lon = Result.map_error Error.invalid_coordinate (Coordinate.longitude lon_f) in
53 Ok (lat, lon)
54 | exception _ -> Error (Error.invalid_coordinate "Invalid coordinate format")
55
56(** Parse waypoint data from XML elements *)
57let rec parse_waypoint_data parser lat lon =
58 let wpt = Waypoint.make lat lon in
59 parse_waypoint_elements parser wpt
60
61and parse_waypoint_elements parser wpt =
62 let rec loop wpt =
63 match Xmlm.input parser.input with
64 | `El_start ((_, name), attrs) ->
65 parser.current_element <- name :: parser.current_element;
66 (match name with
67 | "ele" ->
68 let* text = parse_text_content parser in
69 (match parse_float_opt text with
70 | Some ele -> loop (Waypoint.with_elevation wpt ele)
71 | None -> loop wpt)
72 | "time" ->
73 let* text = parse_text_content parser in
74 loop (Waypoint.with_time wpt (parse_time text))
75 | "magvar" ->
76 let* text = parse_text_content parser in
77 (match parse_float_opt text with
78 | Some f ->
79 (match Coordinate.degrees f with
80 | Ok deg -> loop (Waypoint.with_magvar wpt deg)
81 | Error _ -> loop wpt)
82 | None -> loop wpt)
83 | "geoidheight" ->
84 let* text = parse_text_content parser in
85 (match parse_float_opt text with
86 | Some h -> loop (Waypoint.with_geoidheight wpt h)
87 | None -> loop wpt)
88 | "name" ->
89 let* text = parse_text_content parser in
90 loop (Waypoint.with_name wpt text)
91 | "cmt" ->
92 let* text = parse_text_content parser in
93 loop (Waypoint.with_comment wpt text)
94 | "desc" ->
95 let* text = parse_text_content parser in
96 loop (Waypoint.with_description wpt text)
97 | "src" ->
98 let* text = parse_text_content parser in
99 loop (Waypoint.with_source wpt text)
100 | "sym" ->
101 let* text = parse_text_content parser in
102 loop (Waypoint.with_symbol wpt text)
103 | "type" ->
104 let* text = parse_text_content parser in
105 loop (Waypoint.with_type wpt text)
106 | "fix" ->
107 let* text = parse_text_content parser in
108 loop (Waypoint.with_fix wpt (Waypoint.fix_type_of_string text))
109 | "sat" ->
110 let* text = parse_text_content parser in
111 (match parse_int_opt text with
112 | Some s -> loop (Waypoint.with_sat wpt s)
113 | None -> loop wpt)
114 | "hdop" ->
115 let* text = parse_text_content parser in
116 (match parse_float_opt text with
117 | Some f -> loop (Waypoint.with_hdop wpt f)
118 | None -> loop wpt)
119 | "vdop" ->
120 let* text = parse_text_content parser in
121 (match parse_float_opt text with
122 | Some f -> loop (Waypoint.with_vdop wpt f)
123 | None -> loop wpt)
124 | "pdop" ->
125 let* text = parse_text_content parser in
126 (match parse_float_opt text with
127 | Some f -> loop (Waypoint.with_pdop wpt f)
128 | None -> loop wpt)
129 | "ageofdgpsdata" ->
130 let* text = parse_text_content parser in
131 (match parse_float_opt text with
132 | Some f -> loop (Waypoint.with_ageofdgpsdata wpt f)
133 | None -> loop wpt)
134 | "dgpsid" ->
135 let* text = parse_text_content parser in
136 (match parse_int_opt text with
137 | Some id -> loop (Waypoint.with_dgpsid wpt id)
138 | None -> loop wpt)
139 | "link" ->
140 let* link = parse_link parser attrs in
141 loop (Waypoint.add_link wpt link)
142 | "extensions" ->
143 let* extensions = parse_extensions parser in
144 loop (Waypoint.add_extensions wpt extensions)
145 | _ ->
146 (* Skip unknown elements *)
147 let* _ = skip_element parser in
148 loop wpt)
149 | `El_end ->
150 parser.current_element <- List.tl parser.current_element;
151 Ok wpt
152 | `Data _ ->
153 (* Ignore text data at this level *)
154 loop wpt
155 | `Dtd _ ->
156 loop wpt
157 in
158 loop wpt
159
160and parse_text_content parser =
161 Buffer.clear parser.text_buffer;
162 let rec loop () =
163 match Xmlm.input parser.input with
164 | `Data text ->
165 Buffer.add_string parser.text_buffer text;
166 loop ()
167 | `El_end ->
168 parser.current_element <- List.tl parser.current_element;
169 Ok (Buffer.contents parser.text_buffer)
170 | `El_start _ ->
171 Error (Error.invalid_xml "Unexpected element in text content")
172 | `Dtd _ ->
173 loop ()
174 in
175 loop ()
176
177and parse_link parser attrs =
178 let href = match get_attribute "href" attrs with
179 | Some h -> h
180 | None -> ""
181 in
182 let link = Link.make ~href () in
183 parse_link_elements parser link
184
185and parse_link_elements parser link =
186 let rec loop link =
187 match Xmlm.input parser.input with
188 | `El_start ((_, name), _) ->
189 parser.current_element <- name :: parser.current_element;
190 (match name with
191 | "text" ->
192 let* text = parse_text_content parser in
193 loop (Link.with_text link text)
194 | "type" ->
195 let* type_text = parse_text_content parser in
196 loop (Link.with_type link type_text)
197 | _ ->
198 let* _ = skip_element parser in
199 loop link)
200 | `El_end ->
201 parser.current_element <- List.tl parser.current_element;
202 Ok link
203 | `Data _ ->
204 loop link
205 | `Dtd _ ->
206 loop link
207 in
208 loop link
209
210and parse_extensions parser =
211 let rec loop acc =
212 match Xmlm.input parser.input with
213 | `El_start ((ns, name), attrs) ->
214 parser.current_element <- name :: parser.current_element;
215 let* ext = parse_extension parser ns name attrs in
216 loop (ext :: acc)
217 | `El_end ->
218 parser.current_element <- List.tl parser.current_element;
219 Ok (List.rev acc)
220 | `Data _ ->
221 loop acc
222 | `Dtd _ ->
223 loop acc
224 in
225 loop []
226
227and parse_extension parser ns name attrs =
228 let namespace = if ns = "" then None else Some ns in
229 let attributes = List.map (fun ((_, n), v) -> (n, v)) attrs in
230 let* content = parse_extension_content parser in
231 Ok (Extension.make ?namespace ~name ~attributes ~content ())
232
233and parse_extension_content parser =
234 Buffer.clear parser.text_buffer;
235 let rec loop elements =
236 match Xmlm.input parser.input with
237 | `Data text ->
238 Buffer.add_string parser.text_buffer text;
239 loop elements
240 | `El_start ((ns, name), attrs) ->
241 parser.current_element <- name :: parser.current_element;
242 let* ext = parse_extension parser ns name attrs in
243 loop (ext :: elements)
244 | `El_end ->
245 parser.current_element <- List.tl parser.current_element;
246 let text = String.trim (Buffer.contents parser.text_buffer) in
247 Ok (match (text, elements) with
248 | ("", []) -> Extension.text_content ""
249 | ("", els) -> Extension.elements_content (List.rev els)
250 | (t, []) -> Extension.text_content t
251 | (t, els) -> Extension.mixed_content t (List.rev els))
252 | `Dtd _ ->
253 loop elements
254 in
255 loop []
256
257and skip_element parser =
258 let rec loop depth =
259 match Xmlm.input parser.input with
260 | `El_start _ -> loop (depth + 1)
261 | `El_end when depth = 0 -> Ok ()
262 | `El_end -> loop (depth - 1)
263 | `Data _ -> loop depth
264 | `Dtd _ -> loop depth
265 in
266 loop 0
267
268(** Parse a complete GPX document *)
269let rec parse_gpx parser =
270 (* Find the GPX root element *)
271 let rec find_gpx_root () =
272 match Xmlm.input parser.input with
273 | `El_start ((_, "gpx"), attrs) ->
274 parser.current_element <- ["gpx"];
275 let* version = require_attribute "version" attrs "gpx" in
276 let* creator = require_attribute "creator" attrs "gpx" in
277 if version <> "1.0" && version <> "1.1" then
278 Error (Error.validation_error ("Unsupported GPX version: " ^ version ^ " (supported: 1.0, 1.1)"))
279 else
280 Ok (version, creator)
281 | `El_start _ ->
282 let* _ = skip_element parser in
283 find_gpx_root ()
284 | `Dtd _ ->
285 find_gpx_root ()
286 | `El_end ->
287 Error (Error.missing_element "gpx")
288 | `Data _ ->
289 find_gpx_root ()
290 in
291
292 let* (version, creator) = find_gpx_root () in
293 let gpx = Doc.empty ~creator in
294 parse_gpx_elements parser { gpx with version }
295
296and parse_gpx_elements parser gpx =
297 let rec loop gpx =
298 match Xmlm.input parser.input with
299 | `El_start ((_, name), attrs) ->
300 parser.current_element <- name :: parser.current_element;
301 (match name with
302 | "metadata" ->
303 let* metadata = parse_metadata parser in
304 loop (Doc.with_metadata gpx metadata)
305 | "wpt" ->
306 let* (lat, lon) = parse_coordinates attrs "wpt" in
307 let* waypoint = parse_waypoint_data parser lat lon in
308 loop (Doc.add_waypoint gpx waypoint)
309 | "rte" ->
310 let* route = parse_route parser in
311 loop (Doc.add_route gpx route)
312 | "trk" ->
313 let* track = parse_track parser in
314 loop (Doc.add_track gpx track)
315 | "extensions" ->
316 let* extensions = parse_extensions parser in
317 loop (Doc.add_extensions gpx extensions)
318 | _ ->
319 let* _ = skip_element parser in
320 loop gpx)
321 | `El_end ->
322 Ok gpx
323 | `Data _ ->
324 loop gpx
325 | `Dtd _ ->
326 loop gpx
327 in
328 loop gpx
329
330and parse_author parser =
331 let rec loop name email link =
332 match Xmlm.input parser.input with
333 | `El_start ((_, element_name), attrs) ->
334 parser.current_element <- element_name :: parser.current_element;
335 (match element_name with
336 | "name" ->
337 let* text = parse_text_content parser in
338 loop (Some text) email link
339 | "email" ->
340 let* email_addr = parse_email_attrs attrs in
341 let* _ = skip_element parser in
342 loop name (Some email_addr) link
343 | "link" ->
344 let* parsed_link = parse_link parser attrs in
345 loop name email (Some parsed_link)
346 | _ ->
347 let* _ = skip_element parser in
348 loop name email link)
349 | `El_end ->
350 parser.current_element <- List.tl parser.current_element;
351 Ok (Link.make_person ?name ?email ?link ())
352 | `Data _ ->
353 loop name email link
354 | `Dtd _ ->
355 loop name email link
356 in
357 loop None None None
358
359and parse_email_attrs attrs =
360 let get_attr name =
361 List.find_map (fun ((_, attr_name), value) ->
362 if attr_name = name then Some value else None
363 ) attrs
364 in
365 match get_attr "id", get_attr "domain" with
366 | Some id, Some domain -> Ok (id ^ "@" ^ domain)
367 | _ -> Error (Error.invalid_xml "Missing email id or domain attributes")
368
369and parse_copyright parser attrs =
370 let get_attr name =
371 List.find_map (fun ((_, attr_name), value) ->
372 if attr_name = name then Some value else None
373 ) attrs
374 in
375 let author = get_attr "author" in
376 let rec loop year license =
377 match Xmlm.input parser.input with
378 | `El_start ((_, element_name), _) ->
379 parser.current_element <- element_name :: parser.current_element;
380 (match element_name with
381 | "year" ->
382 let* text = parse_text_content parser in
383 (match parse_int_opt text with
384 | Some y -> loop (Some y) license
385 | None -> loop year license)
386 | "license" ->
387 let* text = parse_text_content parser in
388 loop year (Some text)
389 | _ ->
390 let* _ = skip_element parser in
391 loop year license)
392 | `El_end ->
393 parser.current_element <- List.tl parser.current_element;
394 (match author with
395 | Some auth -> Ok (Link.make_copyright ~author:auth ?year ?license ())
396 | None -> Error (Error.invalid_xml "Missing copyright author attribute"))
397 | `Data _ ->
398 loop year license
399 | `Dtd _ ->
400 loop year license
401 in
402 loop None None
403
404and parse_bounds parser attrs =
405 let get_attr name =
406 List.find_map (fun ((_, attr_name), value) ->
407 if attr_name = name then Some value else None
408 ) attrs
409 in
410 let minlat_str = get_attr "minlat" in
411 let minlon_str = get_attr "minlon" in
412 let maxlat_str = get_attr "maxlat" in
413 let maxlon_str = get_attr "maxlon" in
414
415 (* Skip content since bounds is a self-closing element *)
416 let rec skip_bounds_content () =
417 match Xmlm.input parser.input with
418 | `El_end ->
419 parser.current_element <- List.tl parser.current_element;
420 Ok ()
421 | `Data _ -> skip_bounds_content ()
422 | _ -> skip_bounds_content ()
423 in
424 let* () = skip_bounds_content () in
425
426 match minlat_str, minlon_str, maxlat_str, maxlon_str with
427 | Some minlat, Some minlon, Some maxlat, Some maxlon ->
428 (match
429 Float.of_string_opt minlat, Float.of_string_opt minlon,
430 Float.of_string_opt maxlat, Float.of_string_opt maxlon
431 with
432 | Some minlat_f, Some minlon_f, Some maxlat_f, Some maxlon_f ->
433 (match Metadata.Bounds.make_from_floats ~minlat:minlat_f ~minlon:minlon_f ~maxlat:maxlat_f ~maxlon:maxlon_f with
434 | Ok bounds -> Ok bounds
435 | Error msg -> Error (Error.invalid_xml ("Invalid bounds: " ^ msg)))
436 | _ -> Error (Error.invalid_xml ("Invalid bounds coordinates")))
437 | _ -> Error (Error.invalid_xml ("Missing bounds attributes"))
438
439and parse_metadata parser =
440 let metadata = Metadata.empty in
441 let rec loop metadata =
442 match Xmlm.input parser.input with
443 | `El_start ((_, name), attrs) ->
444 parser.current_element <- name :: parser.current_element;
445 (match name with
446 | "name" ->
447 let* text = parse_text_content parser in
448 loop (Metadata.with_name metadata text)
449 | "desc" ->
450 let* text = parse_text_content parser in
451 loop (Metadata.with_description metadata text)
452 | "keywords" ->
453 let* text = parse_text_content parser in
454 loop (Metadata.with_keywords metadata text)
455 | "time" ->
456 let* text = parse_text_content parser in
457 loop (Metadata.with_time metadata (parse_time text))
458 | "link" ->
459 let* link = parse_link parser attrs in
460 loop (Metadata.add_link metadata link)
461 | "author" ->
462 let* author = parse_author parser in
463 loop (Metadata.with_author metadata author)
464 | "copyright" ->
465 let* copyright = parse_copyright parser attrs in
466 loop (Metadata.with_copyright metadata copyright)
467 | "bounds" ->
468 let* bounds = parse_bounds parser attrs in
469 loop (Metadata.with_bounds metadata bounds)
470 | "extensions" ->
471 let* extensions = parse_extensions parser in
472 loop (Metadata.add_extensions metadata extensions)
473 | _ ->
474 let* _ = skip_element parser in
475 loop metadata)
476 | `El_end ->
477 parser.current_element <- List.tl parser.current_element;
478 Ok metadata
479 | `Data _ ->
480 loop metadata
481 | `Dtd _ ->
482 loop metadata
483 in
484 loop metadata
485
486and parse_route parser =
487 let route = Route.empty in
488 let rec loop route =
489 match Xmlm.input parser.input with
490 | `El_start ((_, name), attrs) ->
491 parser.current_element <- name :: parser.current_element;
492 (match name with
493 | "name" ->
494 let* text = parse_text_content parser in
495 loop (Route.with_name route text)
496 | "cmt" ->
497 let* text = parse_text_content parser in
498 loop (Route.with_comment route text)
499 | "desc" ->
500 let* text = parse_text_content parser in
501 loop (Route.with_description route text)
502 | "src" ->
503 let* text = parse_text_content parser in
504 loop (Route.with_source route text)
505 | "number" ->
506 let* text = parse_text_content parser in
507 (match parse_int_opt text with
508 | Some n -> loop (Route.with_number route n)
509 | None -> loop route)
510 | "type" ->
511 let* text = parse_text_content parser in
512 loop (Route.with_type route text)
513 | "rtept" ->
514 let* (lat, lon) = parse_coordinates attrs "rtept" in
515 let* rtept = parse_waypoint_data parser lat lon in
516 loop (Route.add_point route rtept)
517 | "link" ->
518 let* link = parse_link parser attrs in
519 loop (Route.add_link route link)
520 | "extensions" ->
521 let* extensions = parse_extensions parser in
522 loop (Route.add_extensions route extensions)
523 | _ ->
524 let* _ = skip_element parser in
525 loop route)
526 | `El_end ->
527 parser.current_element <- List.tl parser.current_element;
528 Ok route
529 | `Data _ ->
530 loop route
531 | `Dtd _ ->
532 loop route
533 in
534 loop route
535
536and parse_track parser =
537 let track = Track.empty in
538 let rec loop track =
539 match Xmlm.input parser.input with
540 | `El_start ((_, name), attrs) ->
541 parser.current_element <- name :: parser.current_element;
542 (match name with
543 | "name" ->
544 let* text = parse_text_content parser in
545 loop (Track.with_name track text)
546 | "cmt" ->
547 let* text = parse_text_content parser in
548 loop (Track.with_comment track text)
549 | "desc" ->
550 let* text = parse_text_content parser in
551 loop (Track.with_description track text)
552 | "src" ->
553 let* text = parse_text_content parser in
554 loop (Track.with_source track text)
555 | "number" ->
556 let* text = parse_text_content parser in
557 (match parse_int_opt text with
558 | Some n -> loop (Track.with_number track n)
559 | None -> loop track)
560 | "type" ->
561 let* text = parse_text_content parser in
562 loop (Track.with_type track text)
563 | "trkseg" ->
564 let* trkseg = parse_track_segment parser in
565 loop (Track.add_segment track trkseg)
566 | "link" ->
567 let* link = parse_link parser attrs in
568 loop (Track.add_link track link)
569 | "extensions" ->
570 let* extensions = parse_extensions parser in
571 loop (Track.add_extensions track extensions)
572 | _ ->
573 let* _ = skip_element parser in
574 loop track)
575 | `El_end ->
576 parser.current_element <- List.tl parser.current_element;
577 Ok track
578 | `Data _ ->
579 loop track
580 | `Dtd _ ->
581 loop track
582 in
583 loop track
584
585and parse_track_segment parser =
586 let trkseg = Track.Segment.empty in
587 let rec loop trkseg =
588 match Xmlm.input parser.input with
589 | `El_start ((_, name), attrs) ->
590 parser.current_element <- name :: parser.current_element;
591 (match name with
592 | "trkpt" ->
593 let* (lat, lon) = parse_coordinates attrs "trkpt" in
594 let* trkpt = parse_waypoint_data parser lat lon in
595 loop (Track.Segment.add_point trkseg trkpt)
596 | "extensions" ->
597 let* _ = parse_extensions parser in
598 loop trkseg
599 | _ ->
600 let* _ = skip_element parser in
601 loop trkseg)
602 | `El_end ->
603 parser.current_element <- List.tl parser.current_element;
604 Ok trkseg
605 | `Data _ ->
606 loop trkseg
607 | `Dtd _ ->
608 loop trkseg
609 in
610 loop trkseg
611
612(** Main parsing function *)
613let parse ?(validate=false) input =
614 let parser = make_parser input in
615 try
616 let result = parse_gpx parser in
617 match result, validate with
618 | Ok gpx, true ->
619 let validation = Validate.validate_gpx gpx in
620 if validation.is_valid then
621 Ok gpx
622 else
623 let error_msgs = List.filter (fun issue -> issue.Validate.level = `Error) validation.issues
624 |> List.map (fun issue -> issue.Validate.message)
625 |> String.concat "; " in
626 Error (Error.validation_error error_msgs)
627 | result, false -> result
628 | Error _ as result, true -> result (* Pass through parse errors even when validating *)
629 with
630 | Xmlm.Error ((line, col), error) ->
631 Error (Error.xml_error (Printf.sprintf "XML error at line %d, column %d: %s"
632 line col (Xmlm.error_message error)))
633 | exn ->
634 Error (Error.invalid_xml (Printexc.to_string exn))
635
636(** Parse from string *)
637let parse_string ?(validate=false) s =
638 let input = Xmlm.make_input (`String (0, s)) in
639 parse ~validate input