···
1
+
(** GPX streaming parser using xmlm *)
5
+
(** Parser state for streaming *)
6
+
type parser_state = {
8
+
mutable current_element : string list; (* Stack of current element names *)
9
+
mutable text_buffer : Buffer.t;
12
+
(** Create a new parser state *)
13
+
let make_parser input = {
15
+
current_element = [];
16
+
text_buffer = Buffer.create 256;
19
+
(** Utility functions *)
21
+
let get_attribute name attrs =
23
+
let value = List.find (fun ((_, n), _) -> n = name) attrs in
25
+
with Not_found -> None
27
+
let require_attribute name attrs element =
28
+
match get_attribute name attrs with
29
+
| Some value -> Ok value
30
+
| None -> Error (Missing_required_attribute (element, name))
32
+
let parse_float_opt s =
33
+
try Some (Float.of_string s)
36
+
let parse_int_opt s =
37
+
try Some (int_of_string s)
41
+
match Ptime.of_rfc3339 s with
42
+
| Ok (t, _, _) -> Some t
45
+
(** Result binding operators *)
46
+
let (let*) = Result.bind
48
+
let parse_coordinates attrs element =
49
+
let* lat_str = require_attribute "lat" attrs element in
50
+
let* lon_str = require_attribute "lon" attrs element in
51
+
match (Float.of_string lat_str, Float.of_string lon_str) with
53
+
let* lat = Result.map_error (fun s -> Invalid_coordinate s) (latitude lat_f) in
54
+
let* lon = Result.map_error (fun s -> Invalid_coordinate s) (longitude lon_f) in
56
+
| exception _ -> Error (Invalid_coordinate "Invalid coordinate format")
58
+
(** Parse waypoint data from XML elements *)
59
+
let rec parse_waypoint_data parser lat lon =
60
+
let wpt = make_waypoint_data lat lon in
61
+
parse_waypoint_elements parser wpt
63
+
and parse_waypoint_elements parser wpt =
65
+
match Xmlm.input parser.input with
66
+
| `El_start ((_, name), attrs) ->
67
+
parser.current_element <- name :: parser.current_element;
70
+
let* text = parse_text_content parser in
71
+
(match parse_float_opt text with
72
+
| Some ele -> loop { wpt with ele = Some ele }
75
+
let* text = parse_text_content parser in
76
+
loop { wpt with time = parse_time text }
78
+
let* text = parse_text_content parser in
79
+
(match parse_float_opt text with
81
+
(match degrees f with
82
+
| Ok deg -> loop { wpt with magvar = Some deg }
83
+
| Error _ -> loop wpt)
86
+
let* text = parse_text_content parser in
87
+
(match parse_float_opt text with
88
+
| Some h -> loop { wpt with geoidheight = Some h }
91
+
let* text = parse_text_content parser in
92
+
loop { wpt with name = Some text }
94
+
let* text = parse_text_content parser in
95
+
loop { wpt with cmt = Some text }
97
+
let* text = parse_text_content parser in
98
+
loop { wpt with desc = Some text }
100
+
let* text = parse_text_content parser in
101
+
loop { wpt with src = Some text }
103
+
let* text = parse_text_content parser in
104
+
loop { wpt with sym = Some text }
106
+
let* text = parse_text_content parser in
107
+
loop { wpt with type_ = Some text }
109
+
let* text = parse_text_content parser in
110
+
loop { wpt with fix = fix_type_of_string text }
112
+
let* text = parse_text_content parser in
113
+
(match parse_int_opt text with
114
+
| Some s -> loop { wpt with sat = Some s }
115
+
| None -> loop wpt)
116
+
| "hdop" | "vdop" | "pdop" ->
117
+
let* text = parse_text_content parser in
118
+
(match parse_float_opt text with
121
+
| "hdop" -> loop { wpt with hdop = Some f }
122
+
| "vdop" -> loop { wpt with vdop = Some f }
123
+
| "pdop" -> loop { wpt with pdop = Some f }
125
+
| None -> loop wpt)
126
+
| "ageofdgpsdata" ->
127
+
let* text = parse_text_content parser in
128
+
(match parse_float_opt text with
129
+
| Some f -> loop { wpt with ageofdgpsdata = Some f }
130
+
| None -> loop wpt)
132
+
let* text = parse_text_content parser in
133
+
(match parse_int_opt text with
134
+
| Some id -> loop { wpt with dgpsid = Some id }
135
+
| None -> loop wpt)
137
+
let* link = parse_link parser attrs in
138
+
loop { wpt with links = link :: wpt.links }
140
+
let* extensions = parse_extensions parser in
141
+
loop { wpt with extensions = extensions @ wpt.extensions }
143
+
(* Skip unknown elements *)
144
+
let* _ = skip_element parser in
147
+
parser.current_element <- List.tl parser.current_element;
150
+
(* Ignore text data at this level *)
157
+
and parse_text_content parser =
158
+
Buffer.clear parser.text_buffer;
160
+
match Xmlm.input parser.input with
162
+
Buffer.add_string parser.text_buffer text;
165
+
parser.current_element <- List.tl parser.current_element;
166
+
Ok (Buffer.contents parser.text_buffer)
168
+
Error (Invalid_xml "Unexpected element in text content")
174
+
and parse_link parser attrs =
175
+
let href = match get_attribute "href" attrs with
179
+
let link = { href; text = None; type_ = None } in
180
+
parse_link_elements parser link
182
+
and parse_link_elements parser link =
183
+
let rec loop link =
184
+
match Xmlm.input parser.input with
185
+
| `El_start ((_, name), _) ->
186
+
parser.current_element <- name :: parser.current_element;
189
+
let* text = parse_text_content parser in
190
+
loop { link with text = Some text }
192
+
let* type_text = parse_text_content parser in
193
+
loop { link with type_ = Some type_text }
195
+
let* _ = skip_element parser in
198
+
parser.current_element <- List.tl parser.current_element;
207
+
and parse_extensions parser =
209
+
match Xmlm.input parser.input with
210
+
| `El_start ((ns, name), attrs) ->
211
+
parser.current_element <- name :: parser.current_element;
212
+
let* ext = parse_extension parser ns name attrs in
215
+
parser.current_element <- List.tl parser.current_element;
224
+
and parse_extension parser ns name attrs =
225
+
let namespace = if ns = "" then None else Some ns in
226
+
let attributes = List.map (fun ((_, n), v) -> (n, v)) attrs in
227
+
let* content = parse_extension_content parser in
228
+
Ok { namespace; name; attributes; content }
230
+
and parse_extension_content parser =
231
+
Buffer.clear parser.text_buffer;
232
+
let rec loop elements =
233
+
match Xmlm.input parser.input with
235
+
Buffer.add_string parser.text_buffer text;
237
+
| `El_start ((ns, name), attrs) ->
238
+
parser.current_element <- name :: parser.current_element;
239
+
let* ext = parse_extension parser ns name attrs in
240
+
loop (ext :: elements)
242
+
parser.current_element <- List.tl parser.current_element;
243
+
let text = String.trim (Buffer.contents parser.text_buffer) in
244
+
Ok (match (text, elements) with
245
+
| ("", []) -> Text ""
246
+
| ("", els) -> Elements (List.rev els)
247
+
| (t, []) -> Text t
248
+
| (t, els) -> Mixed (t, List.rev els))
254
+
and skip_element parser =
255
+
let rec loop depth =
256
+
match Xmlm.input parser.input with
257
+
| `El_start _ -> loop (depth + 1)
258
+
| `El_end when depth = 0 -> Ok ()
259
+
| `El_end -> loop (depth - 1)
260
+
| `Data _ -> loop depth
261
+
| `Dtd _ -> loop depth
265
+
(** Parse a complete GPX document *)
266
+
let rec parse_gpx parser =
267
+
(* Find the GPX root element *)
268
+
let rec find_gpx_root () =
269
+
match Xmlm.input parser.input with
270
+
| `El_start ((_, "gpx"), attrs) ->
271
+
parser.current_element <- ["gpx"];
272
+
let* version = require_attribute "version" attrs "gpx" in
273
+
let* creator = require_attribute "creator" attrs "gpx" in
274
+
if version <> "1.1" then
275
+
Error (Validation_error ("Unsupported GPX version: " ^ version))
277
+
Ok (version, creator)
279
+
let* _ = skip_element parser in
284
+
Error (Missing_required_element "gpx")
289
+
let* (version, creator) = find_gpx_root () in
290
+
let gpx = make_gpx ~creator in
291
+
parse_gpx_elements parser { gpx with version }
293
+
and parse_gpx_elements parser gpx =
295
+
match Xmlm.input parser.input with
296
+
| `El_start ((_, name), attrs) ->
297
+
parser.current_element <- name :: parser.current_element;
300
+
let* metadata = parse_metadata parser in
301
+
loop { gpx with metadata = Some metadata }
303
+
let* (lat, lon) = parse_coordinates attrs "wpt" in
304
+
let* waypoint = parse_waypoint_data parser lat lon in
305
+
loop { gpx with waypoints = waypoint :: gpx.waypoints }
307
+
let* route = parse_route parser in
308
+
loop { gpx with routes = route :: gpx.routes }
310
+
let* track = parse_track parser in
311
+
loop { gpx with tracks = track :: gpx.tracks }
313
+
let* extensions = parse_extensions parser in
314
+
loop { gpx with extensions = extensions @ gpx.extensions }
316
+
let* _ = skip_element parser in
320
+
waypoints = List.rev gpx.waypoints;
321
+
routes = List.rev gpx.routes;
322
+
tracks = List.rev gpx.tracks }
330
+
and parse_metadata parser =
331
+
let metadata = empty_metadata in
332
+
let rec loop (metadata : metadata) =
333
+
match Xmlm.input parser.input with
334
+
| `El_start ((_, name), attrs) ->
335
+
parser.current_element <- name :: parser.current_element;
338
+
let* text = parse_text_content parser in
339
+
loop { metadata with name = Some text }
341
+
let* text = parse_text_content parser in
342
+
loop { metadata with desc = Some text }
344
+
let* text = parse_text_content parser in
345
+
loop { metadata with keywords = Some text }
347
+
let* text = parse_text_content parser in
348
+
loop { metadata with time = parse_time text }
350
+
let* link = parse_link parser attrs in
351
+
loop { metadata with links = link :: metadata.links }
353
+
let* extensions = parse_extensions parser in
354
+
loop { metadata with extensions = extensions @ metadata.extensions }
356
+
let* _ = skip_element parser in
359
+
parser.current_element <- List.tl parser.current_element;
360
+
Ok { metadata with links = List.rev metadata.links }
368
+
and parse_route parser =
370
+
name = None; cmt = None; desc = None; src = None; links = [];
371
+
number = None; type_ = None; extensions = []; rtepts = []
373
+
let rec loop (route : route) =
374
+
match Xmlm.input parser.input with
375
+
| `El_start ((_, name), attrs) ->
376
+
parser.current_element <- name :: parser.current_element;
379
+
let* text = parse_text_content parser in
380
+
loop { route with name = Some text }
382
+
let* text = parse_text_content parser in
383
+
loop { route with cmt = Some text }
385
+
let* text = parse_text_content parser in
386
+
loop { route with desc = Some text }
388
+
let* text = parse_text_content parser in
389
+
loop { route with src = Some text }
391
+
let* text = parse_text_content parser in
392
+
(match parse_int_opt text with
393
+
| Some n -> loop { route with number = Some n }
394
+
| None -> loop route)
396
+
let* text = parse_text_content parser in
397
+
loop { route with type_ = Some text }
399
+
let* (lat, lon) = parse_coordinates attrs "rtept" in
400
+
let* rtept = parse_waypoint_data parser lat lon in
401
+
loop { route with rtepts = rtept :: route.rtepts }
403
+
let* link = parse_link parser attrs in
404
+
loop { route with links = link :: route.links }
406
+
let* extensions = parse_extensions parser in
407
+
loop { route with extensions = extensions @ route.extensions }
409
+
let* _ = skip_element parser in
412
+
parser.current_element <- List.tl parser.current_element;
414
+
rtepts = List.rev route.rtepts;
415
+
links = List.rev route.links }
423
+
and parse_track parser =
425
+
name = None; cmt = None; desc = None; src = None; links = [];
426
+
number = None; type_ = None; extensions = []; trksegs = []
428
+
let rec loop track =
429
+
match Xmlm.input parser.input with
430
+
| `El_start ((_, name), attrs) ->
431
+
parser.current_element <- name :: parser.current_element;
434
+
let* text = parse_text_content parser in
435
+
loop { track with name = Some text }
437
+
let* text = parse_text_content parser in
438
+
loop { track with cmt = Some text }
440
+
let* text = parse_text_content parser in
441
+
loop { track with desc = Some text }
443
+
let* text = parse_text_content parser in
444
+
loop { track with src = Some text }
446
+
let* text = parse_text_content parser in
447
+
(match parse_int_opt text with
448
+
| Some n -> loop { track with number = Some n }
449
+
| None -> loop track)
451
+
let* text = parse_text_content parser in
452
+
loop { track with type_ = Some text }
454
+
let* trkseg = parse_track_segment parser in
455
+
loop { track with trksegs = trkseg :: track.trksegs }
457
+
let* link = parse_link parser attrs in
458
+
loop { track with links = link :: track.links }
460
+
let* extensions = parse_extensions parser in
461
+
loop { track with extensions = extensions @ track.extensions }
463
+
let* _ = skip_element parser in
466
+
parser.current_element <- List.tl parser.current_element;
468
+
trksegs = List.rev track.trksegs;
469
+
links = List.rev track.links }
477
+
and parse_track_segment parser =
478
+
let trkseg = { trkpts = []; extensions = [] } in
479
+
let rec loop trkseg =
480
+
match Xmlm.input parser.input with
481
+
| `El_start ((_, name), attrs) ->
482
+
parser.current_element <- name :: parser.current_element;
485
+
let* (lat, lon) = parse_coordinates attrs "trkpt" in
486
+
let* trkpt = parse_waypoint_data parser lat lon in
487
+
loop { trkseg with trkpts = trkpt :: trkseg.trkpts }
489
+
let* extensions = parse_extensions parser in
490
+
loop { trkseg with extensions = extensions @ trkseg.extensions }
492
+
let* _ = skip_element parser in
495
+
parser.current_element <- List.tl parser.current_element;
496
+
Ok { trkseg with trkpts = List.rev trkseg.trkpts }
504
+
(** Main parsing function *)
506
+
let parser = make_parser input in
510
+
| Xmlm.Error ((line, col), error) ->
511
+
Error (Xml_error (Printf.sprintf "XML error at line %d, column %d: %s"
512
+
line col (Xmlm.error_message error)))
514
+
Error (Invalid_xml (Printexc.to_string exn))
516
+
(** Parse from string *)
517
+
let parse_string s =
518
+
let input = Xmlm.make_input (`String (0, s)) in