···
+
(* See the end of the file for the license *)
+
exception Read_error of string
+
let read_error fmt = Printf.ksprintf (fun s -> raise (Read_error s)) fmt
+
let magic_string = "\147NUMPY"
+
let magic_string_len = String.length magic_string
+
type packed_kind = P : (_, _) Bigarray.kind -> packed_kind
+
let dtype ~packed_kind =
+
| P Bigarray.Char -> "|"
+
| P _ -> if Sys.big_endian then ">" else "<"
+
| P Bigarray.Int32 -> "i4"
+
| P Bigarray.Int64 -> "i8"
+
| P Bigarray.Float16 -> "f16"
+
| P Bigarray.Float32 -> "f4"
+
| P Bigarray.Float64 -> "f8"
+
| P Bigarray.Int8_unsigned -> "u1"
+
| P Bigarray.Int8_signed -> "i1"
+
| P Bigarray.Int16_unsigned -> "u2"
+
| P Bigarray.Int16_signed -> "i2"
+
| P Bigarray.Char -> "S1"
+
| P Bigarray.Complex32 -> "c8" (* 2 32bits float. *)
+
| P Bigarray.Complex64 -> "c16" (* 2 64bits float. *)
+
| P Bigarray.Int -> failwith "Int is not supported"
+
| P Bigarray.Nativeint -> failwith "Nativeint is not supported."
+
let map_file file_descr ~pos kind layout shared shape =
+
let is_scalar = Array.length shape = 0 in
+
Unix.map_file file_descr ~pos kind layout shared
+
(if is_scalar then [| 1 |] else shape)
+
if is_scalar then Bigarray.reshape array [||] else array
+
let fortran_order (type a) ~(layout : a Bigarray.layout) =
+
| Bigarray.C_layout -> "False"
+
| Bigarray.Fortran_layout -> "True"
+
| [| dim1 |] -> Printf.sprintf "%d," dim1
+
| dims -> Array.to_list dims |> List.map string_of_int |> String.concat ", "
+
let full_header ?header_len ~layout ~packed_kind ~dims () =
+
Printf.sprintf "{'descr': '%s', 'fortran_order': %s, 'shape': (%s), }"
+
(dtype ~packed_kind) (fortran_order ~layout) (shape ~dims)
+
let total_len = String.length header + magic_string_len + 4 + 1 in
+
| None -> if total_len mod 16 = 0 then 0 else 16 - (total_len mod 16)
+
if header_len mod 16 <> 0 then
+
failwith "header_len has to be divisible by 16";
+
if header_len < total_len then
+
failwith "header_len is smaller than total_len";
+
let total_header_len = String.length header + padding_len + 1 in
+
Printf.sprintf "%s\001\000%c%c%s%s\n" magic_string
+
(total_header_len mod 256 |> Char.chr)
+
(total_header_len / 256 |> Char.chr)
+
(String.make padding_len ' ')
+
let with_file filename flags mask ~f =
+
let file_descr = Unix.openfile filename flags mask in
+
let result = f file_descr in
+
let write ?header_len bigarray filename =
+
with_file filename [ O_CREAT; O_TRUNC; O_RDWR ] 0o640 ~f:(fun file_descr ->
+
full_header () ?header_len
+
~layout:(Bigarray.Genarray.layout bigarray)
+
~packed_kind:(P (Bigarray.Genarray.kind bigarray))
+
~dims:(Bigarray.Genarray.dims bigarray)
+
let full_header_len = String.length full_header in
+
Unix.write_substring file_descr full_header 0 full_header_len
+
then raise Cannot_write;
+
~pos:(Int64.of_int full_header_len)
+
(Bigarray.Genarray.kind bigarray)
+
(Bigarray.Genarray.layout bigarray)
+
(Bigarray.Genarray.dims bigarray)
+
Bigarray.Genarray.blit bigarray file_array)
+
let write1 array1 filename = write (Bigarray.genarray_of_array1 array1) filename
+
let write2 array2 filename = write (Bigarray.genarray_of_array2 array2) filename
+
let write3 array3 filename = write (Bigarray.genarray_of_array3 array3) filename
+
module Batch_writer = struct
+
file_descr : Unix.file_descr;
+
mutable bytes_written_so_far : int;
+
mutable dims_and_packed_kind : (int array * packed_kind) option;
+
let append t bigarray =
+
~pos:(Int64.of_int t.bytes_written_so_far)
+
(Bigarray.Genarray.kind bigarray)
+
(Bigarray.Genarray.layout bigarray)
+
(Bigarray.Genarray.dims bigarray)
+
Bigarray.Genarray.blit bigarray file_array;
+
let size_in_bytes = Bigarray.Genarray.size_in_bytes bigarray in
+
t.bytes_written_so_far <- t.bytes_written_so_far + size_in_bytes;
+
match t.dims_and_packed_kind with
+
let dims = Bigarray.Genarray.dims bigarray in
+
let kind = Bigarray.Genarray.kind bigarray in
+
t.dims_and_packed_kind <- Some (dims, P kind)
+
| Some (dims, _kind) ->
+
let dims' = Bigarray.Genarray.dims bigarray in
+
let incorrect_dimensions =
+
match (Array.to_list dims, Array.to_list dims') with
+
| [], _ | _, [] -> true
+
| _ :: d, _ :: d' -> d <> d'
+
if incorrect_dimensions then
+
Printf.sprintf "Incorrect dimensions %s vs %s." (shape ~dims)
+
dims.(0) <- dims.(0) + dims'.(0)
+
Unix.openfile filename [ O_CREAT; O_TRUNC; O_RDWR ] 0o640
+
bytes_written_so_far = header_len;
+
dims_and_packed_kind = None;
+
assert (Unix.lseek t.file_descr 0 SEEK_SET = 0);
+
match t.dims_and_packed_kind with
+
| None -> failwith "Nothing to write"
+
| Some (dims, packed_kind) ->
+
full_header ~header_len ~layout:C_layout ~dims ~packed_kind ()
+
if Unix.write_substring t.file_descr header 0 header_len <> header_len then
+
Unix.close t.file_descr
+
let really_read fd len =
+
let buffer = Bytes.create len in
+
let read = Unix.read fd buffer offset (len - offset) in
+
if read + offset < len then loop (read + offset)
+
else if read = 0 then read_error "unexpected eof"
+
type packed_kind = P : (_, _) Bigarray.kind -> packed_kind
+
type t = { kind : packed_kind; fortran_order : bool; shape : int array }
+
let indexes = ref [] in
+
for i = 0 to String.length str - 1 do
+
| c when !parens = 0 && c = on -> indexes := i :: !indexes
+
(fun (prev_p, acc) index ->
+
(index, String.sub str (index + 1) (prev_p - index - 1) :: acc))
+
(String.length str, [])
+
|> fun (first_pos, acc) -> String.sub str 0 first_pos :: acc
+
let rec loopr start len =
+
if len = 0 then (start, len)
+
else if List.mem str.[start + len - 1] on then loopr start (len - 1)
+
let rec loopl start len =
+
if len = 0 then (start, len)
+
else if List.mem str.[start] on then loopl (start + 1) (len - 1)
+
let start, len = loopl 0 (String.length str) in
+
String.sub str start len
+
trim header ~on:[ '{'; ' '; '}'; '\n' ]
+
|> List.map String.trim
+
|> List.filter (fun s -> String.length s > 0)
+
|> List.map (fun header_field ->
+
match split header_field ~on:':' with
+
( trim name ~on:[ '\''; ' ' ],
+
trim value ~on:[ '\''; ' '; '('; ')' ] )
+
| _ -> read_error "unable to parse field %s" header_field)
+
try List.assoc field header_fields
+
with Not_found -> read_error "cannot find field %s" field
+
let kind = find_field "descr" in
+
if not Sys.big_endian then
+
read_error "big endian data but arch is little endian"
+
read_error "little endian data but arch is big endian"
+
| otherwise -> read_error "incorrect endianness %c" otherwise);
+
match String.sub kind 1 (String.length kind - 1) with
+
| "u1" -> P Int8_unsigned
+
| "i1" -> P Int8_signed
+
| "u2" -> P Int16_unsigned
+
| "i2" -> P Int16_signed
+
| otherwise -> read_error "incorrect descr %s" otherwise
+
match find_field "fortran_order" with
+
| otherwise -> read_error "incorrect fortran_order %s" otherwise
+
|> List.map String.trim
+
|> List.filter (fun s -> String.length s > 0)
+
|> List.map int_of_string
+
{ kind; fortran_order; shape }
+
type packed_array = P : (_, _, _) Bigarray.Genarray.t -> packed_array
+
type packed_array1 = P1 : (_, _, _) Bigarray.Array1.t -> packed_array1
+
type packed_array2 = P2 : (_, _, _) Bigarray.Array2.t -> packed_array2
+
type packed_array3 = P3 : (_, _, _) Bigarray.Array3.t -> packed_array3
+
let read_mmap filename ~shared =
+
let access = if shared then Unix.O_RDWR else O_RDONLY in
+
let file_descr = Unix.openfile filename [ access ] 0 in
+
let magic_string' = really_read file_descr magic_string_len in
+
if magic_string <> magic_string' then read_error "magic string mismatch";
+
let version = really_read file_descr 2 |> fun v -> v.[0] |> Char.code in
+
| _ -> read_error "unsupported version %d" version
+
let header, header_len =
+
really_read file_descr header_len_len |> fun str ->
+
let header_len = ref 0 in
+
for i = String.length str - 1 downto 0 do
+
header_len := (256 * !header_len) + Char.code str.[i]
+
(really_read file_descr !header_len, !header_len)
+
let header = Header.parse header in
+
(Int64.of_int (header_len + header_len_len + magic_string_len + 2), header)
+
let (Header.P kind) = header.kind in
+
let array = map_file file_descr ~pos kind layout shared header.shape in
+
Gc.finalise (fun _ -> Unix.close file_descr) array;
+
if header.fortran_order then build Fortran_layout else build C_layout
+
let read_mmap1 filename ~shared =
+
let (P array) = read_mmap filename ~shared in
+
P1 (Bigarray.array1_of_genarray array)
+
let read_mmap2 filename ~shared =
+
let (P array) = read_mmap filename ~shared in
+
P2 (Bigarray.array2_of_genarray array)
+
let read_mmap3 filename ~shared =
+
let (P array) = read_mmap filename ~shared in
+
P3 (Bigarray.array3_of_genarray array)
+
let read_copy filename =
+
let (P array) = read_mmap filename ~shared:false in
+
Bigarray.Genarray.create
+
(Bigarray.Genarray.kind array)
+
(Bigarray.Genarray.layout array)
+
(Bigarray.Genarray.dims array)
+
Bigarray.Genarray.blit array result;
+
let read_copy1 filename =
+
let (P array) = read_copy filename in
+
P1 (Bigarray.array1_of_genarray array)
+
let read_copy2 filename =
+
let (P array) = read_copy filename in
+
P2 (Bigarray.array2_of_genarray array)
+
let read_copy3 filename =
+
let (P array) = read_copy filename in
+
P3 (Bigarray.array3_of_genarray array)
+
let npy_suffix = ".npy"
+
let maybe_add_suffix array_name ~suffix =
+
match suffix with None -> npy_suffix | Some suffix -> suffix
+
type in_file = Zip.in_file
+
let open_in = Zip.open_in
+
|> List.map (fun entry ->
+
let filename = entry.Zip.filename in
+
if String.length filename < String.length npy_suffix then filename
+
let start_pos = String.length filename - String.length npy_suffix in
+
String.sub filename start_pos (String.length npy_suffix)
+
then String.sub filename 0 start_pos
+
let close_in = Zip.close_in
+
let read ?suffix t array_name =
+
let array_name = maybe_add_suffix array_name ~suffix in
+
try Zip.find_entry t array_name
+
raise (Invalid_argument ("unable to find " ^ array_name))
+
let tmp_file = Filename.temp_file "ocaml-npz" ".tmp" in
+
Zip.copy_entry_to_file t entry tmp_file;
+
let data = read_copy tmp_file in
+
type out_file = Zip.out_file
+
let open_out filename = Zip.open_out filename
+
let close_out = Zip.close_out
+
let write ?suffix t array_name array =
+
let array_name = maybe_add_suffix array_name ~suffix in
+
let tmp_file = Filename.temp_file "ocaml-npz" ".tmp" in
+
Zip.copy_file_to_entry tmp_file t array_name;
+
(** Type equalities module, used in conversion function *)
+
(** An equality type to extract type equalities *)
+
type ('a, 'b) t = W : ('a, 'a) t
+
(** Type equalities for bigarray kinds *)
+
let ( === ) : type a b c d.
+
(a, b) kind -> (c, d) kind -> ((a, b) kind, (c, d) kind) t option =
+
| Float32, Float32 -> Some W
+
| Float64, Float64 -> Some W
+
| Int8_signed, Int8_signed -> Some W
+
| Int8_unsigned, Int8_unsigned -> Some W
+
| Int16_signed, Int16_signed -> Some W
+
| Int16_unsigned, Int16_unsigned -> Some W
+
| Int32, Int32 -> Some W
+
| Int64, Int64 -> Some W
+
| Nativeint, Nativeint -> Some W
+
| Complex32, Complex32 -> Some W
+
| Complex64, Complex64 -> Some W
+
(** Type equalities for layout *)
+
let ( === ) : type a b.
+
a layout -> b layout -> (a layout, b layout) t option =
+
| Fortran_layout, Fortran_layout -> Some W
+
| C_layout, C_layout -> Some W
+
(** Conversion functions from packed arrays to bigarrays *)
+
let to_bigarray (type a b c) (layout : c Bigarray.layout)
+
(kind : (a, b) Bigarray.kind) (P x) =
+
match Eq.Layout.(Bigarray.Genarray.layout x === layout) with
+
match Eq.Kind.(Bigarray.Genarray.kind x === kind) with
+
| Some Eq.W -> Some (x : (a, b, c) Bigarray.Genarray.t))
+
let to_bigarray1 (type a b c) (layout : c Bigarray.layout)
+
(kind : (a, b) Bigarray.kind) (P1 x) =
+
match Eq.Layout.(Bigarray.Array1.layout x === layout) with
+
match Eq.Kind.(Bigarray.Array1.kind x === kind) with
+
| Some Eq.W -> Some (x : (a, b, c) Bigarray.Array1.t))
+
let to_bigarray2 (type a b c) (layout : c Bigarray.layout)
+
(kind : (a, b) Bigarray.kind) (P2 x) =
+
match Eq.Layout.(Bigarray.Array2.layout x === layout) with
+
match Eq.Kind.(Bigarray.Array2.kind x === kind) with
+
| Some Eq.W -> Some (x : (a, b, c) Bigarray.Array2.t))
+
let to_bigarray3 (type a b c) (layout : c Bigarray.layout)
+
(kind : (a, b) Bigarray.kind) (P3 x) =
+
match Eq.Layout.(Bigarray.Array3.layout x === layout) with
+
match Eq.Kind.(Bigarray.Array3.kind x === kind) with
+
| Some Eq.W -> Some (x : (a, b, c) Bigarray.Array3.t))
+
Version 2.0, January 2004
+
http://www.apache.org/licenses/
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
"License" shall mean the terms and conditions for use, reproduction,
+
and distribution as defined by Sections 1 through 9 of this document.
+
"Licensor" shall mean the copyright owner or entity authorized by
+
the copyright owner that is granting the License.
+
"Legal Entity" shall mean the union of the acting entity and all
+
other entities that control, are controlled by, or are under common
+
control with that entity. For the purposes of this definition,
+
"control" means (i) the power, direct or indirect, to cause the
+
direction or management of such entity, whether by contract or
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
+
outstanding shares, or (iii) beneficial ownership of such entity.
+
"You" (or "Your") shall mean an individual or Legal Entity
+
exercising permissions granted by this License.
+
"Source" form shall mean the preferred form for making modifications,
+
including but not limited to software source code, documentation
+
source, and configuration files.
+
"Object" form shall mean any form resulting from mechanical
+
transformation or translation of a Source form, including but
+
not limited to compiled object code, generated documentation,
+
and conversions to other media types.
+
"Work" shall mean the work of authorship, whether in Source or
+
Object form, made available under the License, as indicated by a
+
copyright notice that is included in or attached to the work
+
(an example is provided in the Appendix below).
+
"Derivative Works" shall mean any work, whether in Source or Object
+
form, that is based on (or derived from) the Work and for which the
+
editorial revisions, annotations, elaborations, or other modifications
+
represent, as a whole, an original work of authorship. For the purposes
+
of this License, Derivative Works shall not include works that remain
+
separable from, or merely link (or bind by name) to the interfaces of,
+
the Work and Derivative Works thereof.
+
"Contribution" shall mean any work of authorship, including
+
the original version of the Work and any modifications or additions
+
to that Work or Derivative Works thereof, that is intentionally
+
submitted to Licensor for inclusion in the Work by the copyright owner
+
or by an individual or Legal Entity authorized to submit on behalf of
+
the copyright owner. For the purposes of this definition, "submitted"
+
means any form of electronic, verbal, or written communication sent
+
to the Licensor or its representatives, including but not limited to
+
communication on electronic mailing lists, source code control systems,
+
and issue tracking systems that are managed by, or on behalf of, the
+
Licensor for the purpose of discussing and improving the Work, but
+
excluding communication that is conspicuously marked or otherwise
+
designated in writing by the copyright owner as "Not a Contribution."
+
"Contributor" shall mean Licensor and any individual or Legal Entity
+
on behalf of whom a Contribution has been received by Licensor and
+
subsequently incorporated within the Work.
+
2. Grant of Copyright License. Subject to the terms and conditions of
+
this License, each Contributor hereby grants to You a perpetual,
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+
copyright license to reproduce, prepare Derivative Works of,
+
publicly display, publicly perform, sublicense, and distribute the
+
Work and such Derivative Works in Source or Object form.
+
3. Grant of Patent License. Subject to the terms and conditions of
+
this License, each Contributor hereby grants to You a perpetual,
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+
(except as stated in this section) patent license to make, have made,
+
use, offer to sell, sell, import, and otherwise transfer the Work,
+
where such license applies only to those patent claims licensable
+
by such Contributor that are necessarily infringed by their
+
Contribution(s) alone or by combination of their Contribution(s)
+
with the Work to which such Contribution(s) was submitted. If You
+
institute patent litigation against any entity (including a
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
+
or a Contribution incorporated within the Work constitutes direct
+
or contributory patent infringement, then any patent licenses
+
granted to You under this License for that Work shall terminate
+
as of the date such litigation is filed.
+
4. Redistribution. You may reproduce and distribute copies of the
+
Work or Derivative Works thereof in any medium, with or without
+
modifications, and in Source or Object form, provided that You
+
meet the following conditions:
+
(a) You must give any other recipients of the Work or
+
Derivative Works a copy of this License; and
+
(b) You must cause any modified files to carry prominent notices
+
stating that You changed the files; and
+
(c) You must retain, in the Source form of any Derivative Works
+
that You distribute, all copyright, patent, trademark, and
+
attribution notices from the Source form of the Work,
+
excluding those notices that do not pertain to any part of
+
the Derivative Works; and
+
(d) If the Work includes a "NOTICE" text file as part of its
+
distribution, then any Derivative Works that You distribute must
+
include a readable copy of the attribution notices contained
+
within such NOTICE file, excluding those notices that do not
+
pertain to any part of the Derivative Works, in at least one
+
of the following places: within a NOTICE text file distributed
+
as part of the Derivative Works; within the Source form or
+
documentation, if provided along with the Derivative Works; or,
+
within a display generated by the Derivative Works, if and
+
wherever such third-party notices normally appear. The contents
+
of the NOTICE file are for informational purposes only and
+
do not modify the License. You may add Your own attribution
+
notices within Derivative Works that You distribute, alongside
+
or as an addendum to the NOTICE text from the Work, provided
+
that such additional attribution notices cannot be construed
+
as modifying the License.
+
You may add Your own copyright statement to Your modifications and
+
may provide additional or different license terms and conditions
+
for use, reproduction, or distribution of Your modifications, or
+
for any such Derivative Works as a whole, provided Your use,
+
reproduction, and distribution of the Work otherwise complies with
+
the conditions stated in this License.
+
5. Submission of Contributions. Unless You explicitly state otherwise,
+
any Contribution intentionally submitted for inclusion in the Work
+
by You to the Licensor shall be under the terms and conditions of
+
this License, without any additional terms or conditions.
+
Notwithstanding the above, nothing herein shall supersede or modify
+
the terms of any separate license agreement you may have executed
+
with Licensor regarding such Contributions.
+
6. Trademarks. This License does not grant permission to use the trade
+
names, trademarks, service marks, or product names of the Licensor,
+
except as required for reasonable and customary use in describing the
+
origin of the Work and reproducing the content of the NOTICE file.
+
7. Disclaimer of Warranty. Unless required by applicable law or
+
agreed to in writing, Licensor provides the Work (and each
+
Contributor provides its Contributions) on an "AS IS" BASIS,
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+
implied, including, without limitation, any warranties or conditions
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+
PARTICULAR PURPOSE. You are solely responsible for determining the
+
appropriateness of using or redistributing the Work and assume any
+
risks associated with Your exercise of permissions under this License.
+
8. Limitation of Liability. In no event and under no legal theory,
+
whether in tort (including negligence), contract, or otherwise,
+
unless required by applicable law (such as deliberate and grossly
+
negligent acts) or agreed to in writing, shall any Contributor be
+
liable to You for damages, including any direct, indirect, special,
+
incidental, or consequential damages of any character arising as a
+
result of this License or out of the use or inability to use the
+
Work (including but not limited to damages for loss of goodwill,
+
work stoppage, computer failure or malfunction, or any and all
+
other commercial damages or losses), even if such Contributor
+
has been advised of the possibility of such damages.
+
9. Accepting Warranty or Additional Liability. While redistributing
+
the Work or Derivative Works thereof, You may choose to offer,
+
and charge a fee for, acceptance of support, warranty, indemnity,
+
or other liability obligations and/or rights consistent with this
+
License. However, in accepting such obligations, You may act only
+
on Your own behalf and on Your sole responsibility, not on behalf
+
of any other Contributor, and only if You agree to indemnify,
+
defend, and hold each Contributor harmless for any liability
+
incurred by, or claims asserted against, such Contributor by reason
+
of your accepting any such warranty or additional liability.
+
END OF TERMS AND CONDITIONS
+
APPENDIX: How to apply the Apache License to your work.
+
To apply the Apache License to your work, attach the following
+
boilerplate notice, with the fields enclosed by brackets "{}"
+
replaced with your own identifying information. (Don't include
+
the brackets!) The text should be enclosed in the appropriate
+
comment syntax for the file format. We also recommend that a
+
file or class name and description of purpose be included on the
+
same "printed page" as the copyright notice for easier
+
identification within third-party archives.
+
Copyright {yyyy} {name of copyright owner}
+
Licensed under the Apache License, Version 2.0 (the "License");
+
you may not use this file except in compliance with the License.
+
You may obtain a copy of the License at
+
http://www.apache.org/licenses/LICENSE-2.0
+
Unless required by applicable law or agreed to in writing, software
+
distributed under the License is distributed on an "AS IS" BASIS,
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+
See the License for the specific language governing permissions and
+
limitations under the License. *)