compact binary serialization format with built-in compression
1use crate::{CompressionMethod, Error, FileHeader, Result, Tag, TagKind, len};
2use std::io::Read;
3
4use chrono::DateTime;
5#[cfg(feature = "gzip")]
6use flate2::read::{GzDecoder, ZlibDecoder};
7#[cfg(feature = "lz4")]
8use lz4::Decoder;
9use uuid::Uuid;
10
11/// A Hateno reader that can handle different compression methods.
12pub enum Reader<R: Read> {
13 None(RawReader<R>),
14 #[cfg(feature = "gzip")]
15 Gzip(RawReader<GzDecoder<R>>),
16 #[cfg(feature = "gzip")]
17 Zlib(RawReader<ZlibDecoder<R>>),
18 #[cfg(feature = "lz4")]
19 Lz4(RawReader<Decoder<R>>),
20}
21
22impl<R: Read> Reader<R> {
23 /// Create a Reader that automatically wraps the underlying reader in a
24 /// decompressor based on the compression method in the header.
25 pub fn new(mut reader: R) -> Result<Self> {
26 let header = Self::read_header(&mut reader)?;
27
28 let reader = match header.compression {
29 CompressionMethod::None => {
30 Reader::None(RawReader::new(reader, header))
31 }
32 CompressionMethod::Gzip => {
33 #[cfg(feature = "gzip")]
34 {
35 let gz_reader = GzDecoder::new(reader);
36 Reader::Gzip(RawReader::new(gz_reader, header))
37 }
38 #[cfg(not(feature = "gzip"))]
39 {
40 return Err(Error::UnsupportedCompression(
41 header.compression,
42 ));
43 }
44 }
45 CompressionMethod::Zlib => {
46 #[cfg(feature = "gzip")]
47 {
48 let zlib_reader = ZlibDecoder::new(reader);
49 Reader::Zlib(RawReader::new(zlib_reader, header))
50 }
51 #[cfg(not(feature = "gzip"))]
52 {
53 return Err(Error::UnsupportedCompression(
54 header.compression,
55 ));
56 }
57 }
58 CompressionMethod::Lz4 => {
59 #[cfg(feature = "lz4")]
60 {
61 let lz4_reader =
62 Decoder::new(reader).map_err(Error::IoError)?;
63 Reader::Lz4(RawReader::new(lz4_reader, header))
64 }
65 #[cfg(not(feature = "lz4"))]
66 {
67 return Err(Error::UnsupportedCompression(
68 header.compression,
69 ));
70 }
71 }
72 };
73
74 Ok(reader)
75 }
76
77 fn read_header(reader: &mut R) -> Result<FileHeader> {
78 let mut magic_bytes = [0u8; len::MAGIC_BYTES];
79 reader
80 .read_exact(&mut magic_bytes)
81 .map_err(|_| Error::UnexpectedEof)?;
82 if magic_bytes != FileHeader::MAGIC_BYTES {
83 return Err(Error::InvalidMagic(magic_bytes));
84 }
85
86 let mut version_byte = [0u8; len::BYTE];
87 reader.read_exact(&mut version_byte)?;
88 let version = version_byte[0];
89 if version != FileHeader::CURRENT_VERSION {
90 return Err(Error::UnsupportedVersion(version));
91 }
92
93 let mut flag_byte = [0u8; len::BYTE];
94 reader.read_exact(&mut flag_byte)?;
95 let little_endian = flag_byte[0] == 0;
96
97 let mut compression_byte = [0u8; len::BYTE];
98 reader.read_exact(&mut compression_byte)?;
99 let compression = CompressionMethod::from_byte(compression_byte[0])
100 .ok_or(Error::UnknownCompression(compression_byte[0]))?;
101
102 let mut length_bytes = [0u8; len::INT_32];
103 reader.read_exact(&mut length_bytes)?;
104 let payload_length = if little_endian {
105 u32::from_le_bytes(length_bytes)
106 } else {
107 u32::from_be_bytes(length_bytes)
108 };
109
110 Ok(FileHeader {
111 version,
112 little_endian,
113 compression,
114 payload_length,
115 })
116 }
117
118 pub fn file_header(&self) -> &FileHeader {
119 match self {
120 Reader::None(r) => &r.header,
121 #[cfg(feature = "gzip")]
122 Reader::Gzip(r) => &r.header,
123 #[cfg(feature = "gzip")]
124 Reader::Zlib(r) => &r.header,
125 #[cfg(feature = "lz4")]
126 Reader::Lz4(r) => &r.header,
127 }
128 }
129
130 pub fn read_tag(&mut self) -> Result<Tag> {
131 match self {
132 Reader::None(r) => r.read_tag(),
133 #[cfg(feature = "gzip")]
134 Reader::Gzip(r) => r.read_tag(),
135 #[cfg(feature = "gzip")]
136 Reader::Zlib(r) => r.read_tag(),
137 #[cfg(feature = "lz4")]
138 Reader::Lz4(r) => r.read_tag(),
139 }
140 }
141}
142
143pub struct RawReader<R: Read> {
144 reader: R,
145 position: usize,
146 header: FileHeader,
147}
148
149impl<R: Read> RawReader<R> {
150 fn new(reader: R, header: FileHeader) -> Self {
151 Self {
152 reader,
153 position: 0,
154 header,
155 }
156 }
157
158 pub fn read_tag(&mut self) -> Result<Tag> {
159 let kind = self.read_tag_kind()?;
160 self.read_tag_of_kind(kind)
161 }
162
163 fn read_tag_of_kind(&mut self, kind: TagKind) -> Result<Tag> {
164 match kind {
165 TagKind::U8 => Ok(Tag::U8(self.read_u8()?)),
166 TagKind::I8 => Ok(Tag::I8(self.read_i8()?)),
167 TagKind::U16 => Ok(Tag::U16(self.read_u16()?)),
168 TagKind::I16 => Ok(Tag::I16(self.read_i16()?)),
169 TagKind::U32 => Ok(Tag::U32(self.read_u32()?)),
170 TagKind::I32 => Ok(Tag::I32(self.read_i32()?)),
171 TagKind::U64 => Ok(Tag::U64(self.read_u64()?)),
172 TagKind::I64 => Ok(Tag::I64(self.read_i64()?)),
173 TagKind::F32 => Ok(Tag::F32(self.read_f32()?)),
174 TagKind::F64 => Ok(Tag::F64(self.read_f64()?)),
175 TagKind::Bool => Ok(Tag::Bool(self.read_bool()?)),
176 TagKind::String => Ok(Tag::String(self.read_string()?)),
177 TagKind::Option => Ok(self.read_option()?),
178 TagKind::List => Ok(self.read_list()?),
179 TagKind::Map => Ok(self.read_map()?),
180 TagKind::Array => Ok(self.read_array()?),
181 TagKind::Timestamp => Ok(self.read_timestamp()?),
182 TagKind::Uuid => Ok(self.read_uuid()?),
183 }
184 }
185
186 fn read_option(&mut self) -> Result<Tag> {
187 let kind_byte = self.read_u8()?;
188 let kind = TagKind::from_byte(kind_byte)
189 .ok_or(Error::InvalidTagKind(kind_byte))?;
190 let discriminant = self.read_u8()?;
191 match discriminant {
192 0x00 => Ok(Tag::Option(kind, None)),
193 0x01 => {
194 let inner = self.read_tag_of_kind(kind)?;
195 if kind != inner.kind() {
196 // shouldn't happen but doesnt hurt to check
197 return Err(Error::InvalidOptionKind(inner.kind(), kind));
198 }
199 Ok(Tag::Option(kind, Some(Box::new(inner))))
200 }
201 _ => Err(Error::InvalidOptionDiscriminant(discriminant)),
202 }
203 }
204
205 fn read_list(&mut self) -> Result<Tag> {
206 let length = self.read_u32()? as usize;
207 let mut elements = Vec::with_capacity(length);
208
209 for _ in 0..length {
210 elements.push(self.read_tag()?);
211 }
212
213 Ok(Tag::List(elements))
214 }
215
216 fn read_array(&mut self) -> Result<Tag> {
217 let length = self.read_u32()? as usize;
218 let element_kind = self.read_tag_kind()?;
219
220 if !element_kind.valid_for_array_element() {
221 return Err(Error::InvalidArrayTagKind(element_kind));
222 }
223
224 let mut elements = Vec::with_capacity(length);
225
226 for _ in 0..length {
227 elements.push(self.read_tag_of_kind(element_kind)?);
228 }
229
230 Ok(Tag::Array(element_kind, elements))
231 }
232
233 fn read_map(&mut self) -> Result<Tag> {
234 let length = self.read_u32()? as usize;
235 let mut elements = Vec::with_capacity(length);
236
237 for _ in 0..length {
238 let key = self.read_tag()?;
239 let value = self.read_tag()?;
240
241 elements.push((key, value));
242 }
243
244 Ok(Tag::Map(elements))
245 }
246
247 fn read_timestamp(&mut self) -> Result<Tag> {
248 let ms = self.read_i64()?;
249 let dt = DateTime::from_timestamp_millis(ms)
250 .ok_or(Error::MillisecondsOutOfRange(ms))?;
251 Ok(Tag::Timestamp(dt))
252 }
253
254 fn read_uuid(&mut self) -> Result<Tag> {
255 Ok(Tag::Uuid(Uuid::from_bytes(
256 self.read_bytes::<{ len::UUID }>()?,
257 )))
258 }
259
260 // primitives
261
262 fn read_exact(&mut self, buf: &mut [u8]) -> Result<()> {
263 self.reader
264 .read_exact(buf)
265 .map_err(|_| Error::UnexpectedEof)?;
266 self.position += buf.len();
267 Ok(())
268 }
269
270 fn read_bytes<const N: usize>(&mut self) -> Result<[u8; N]> {
271 let mut buf = [0u8; N];
272 self.read_exact(&mut buf)?;
273 Ok(buf)
274 }
275
276 fn read_bytes_vec(&mut self, n: usize) -> Result<Vec<u8>> {
277 let mut buf = vec![0u8; n];
278 self.read_exact(&mut buf)?;
279 Ok(buf)
280 }
281
282 fn read_tag_kind(&mut self) -> Result<TagKind> {
283 let byte = self.read_u8()?;
284 TagKind::from_byte(byte).ok_or(Error::InvalidTagKind(byte))
285 }
286
287 fn read_bool(&mut self) -> Result<bool> {
288 match self.read_u8()? {
289 0 => Ok(false),
290 1 => Ok(true),
291 v => Err(Error::InvalidBoolValue(v)),
292 }
293 }
294
295 fn read_u8(&mut self) -> Result<u8> {
296 Ok(self.read_bytes::<{ len::INT_8 }>()?[0])
297 }
298
299 fn read_u16(&mut self) -> Result<u16> {
300 let bytes = self.read_bytes::<{ len::INT_16 }>()?;
301 Ok(if self.header.little_endian {
302 u16::from_le_bytes(bytes)
303 } else {
304 u16::from_be_bytes(bytes)
305 })
306 }
307
308 fn read_u32(&mut self) -> Result<u32> {
309 let bytes = self.read_bytes::<{ len::INT_32 }>()?;
310 Ok(if self.header.little_endian {
311 u32::from_le_bytes(bytes)
312 } else {
313 u32::from_be_bytes(bytes)
314 })
315 }
316
317 fn read_u64(&mut self) -> Result<u64> {
318 let bytes = self.read_bytes::<{ len::INT_64 }>()?;
319 Ok(if self.header.little_endian {
320 u64::from_le_bytes(bytes)
321 } else {
322 u64::from_be_bytes(bytes)
323 })
324 }
325
326 fn read_f32(&mut self) -> Result<f32> {
327 let bytes = self.read_bytes::<{ len::FLOAT_32 }>()?;
328 Ok(if self.header.little_endian {
329 f32::from_le_bytes(bytes)
330 } else {
331 f32::from_be_bytes(bytes)
332 })
333 }
334
335 fn read_f64(&mut self) -> Result<f64> {
336 let bytes = self.read_bytes::<{ len::FLOAT_64 }>()?;
337 Ok(if self.header.little_endian {
338 f64::from_le_bytes(bytes)
339 } else {
340 f64::from_be_bytes(bytes)
341 })
342 }
343
344 fn read_i8(&mut self) -> Result<i8> {
345 Ok(self.read_u8()? as i8)
346 }
347
348 fn read_i16(&mut self) -> Result<i16> {
349 Ok(self.read_u16()? as i16)
350 }
351
352 fn read_i32(&mut self) -> Result<i32> {
353 Ok(self.read_u32()? as i32)
354 }
355
356 fn read_i64(&mut self) -> Result<i64> {
357 Ok(self.read_u64()? as i64)
358 }
359
360 fn read_string(&mut self) -> Result<String> {
361 let length = self.read_u32()? as usize;
362 let bytes = self.read_bytes_vec(length)?;
363 String::from_utf8(bytes).map_err(Error::InvalidUtf8)
364 }
365}