patch of add golomb coding · round #0 · pull #2 · altagos.dev/codings

+252

src/Golomb.zig

···

       1
       +
       //! Golomb codec implementation for encoding and decoding integers.

     

       2
       +
       //! Golomb coding is a lossless, variable-length encoding scheme that is optimal for geometric distributions.

     

       3
       +
       

     

       4
       +
       const std = @import("std");

     

       5
       +
       

     

       6
       +
       const Self = @This();

     

       7
       +
       

     

       8
       +
       /// Golomb parameter M - determines the division point for quotient and remainder

     

       9
       +
       m: usize,

     

       10
       +
       

     

       11
       +
       /// Internal bit buffer for accumulating bits during encoding/decoding

     

       12
       +
       bit_buffer: u8 = 0,

     

       13
       +
       /// Current bit position within the bit buffer (0-7)

     

       14
       +
       bit_idx: u8 = 0,

     

       15
       +
       /// Current byte position in the buffer

     

       16
       +
       byte_idx: usize = 0,

     

       17
       +
       

     

       18
       +
       /// Encodes a value using Golomb coding into the provided buffer.

     

       19
       +
       /// Returns the number of bits written.

     

       20
       +
       pub fn encode(

     

       21
       +
           self: *Self,

     

       22
       +
           buffer: []u8,

     

       23
       +
           value: usize,

     

       24
       +
           opts: struct {

     

       25
       +
               write_padding_bits: bool = false,

     

       26
       +
               reset_tmp_values: bool = true,

     

       27
       +
           },

     

       28
       +
       ) error{BufferTooSmall}!usize {

     

       29
       +
           if (self.m == 0) @panic("The Golomb parameter M must be larger than 0");

     

       30
       +
       

     

       31
       +
           const b_m = self.bM();

     

       32
       +
       

     

       33
       +
           const q = @divFloor(value, self.m) + 1;

     

       34
       +
           const b_q = bitLength(q) - 1;

     

       35
       +
           const r = @rem(value, self.m);

     

       36
       +
           const b_r = bitLength(r);

     

       37
       +
       

     

       38
       +
           const needed_bits = b_q + b_q + 1 + b_m;

     

       39
       +
           const buffer_len_bits = needed_bits + self.byte_idx * 8 + self.bit_idx;

     

       40
       +
       

     

       41
       +
           if (buffer_len_bits > buffer.len * 8) return error.BufferTooSmall;

     

       42
       +
       

     

       43
       +
           // Write q

     

       44
       +
           for (0..b_q) |_| {

     

       45
       +
               self.writeBit(buffer, 0);

     

       46
       +
           }

     

       47
       +
           self.writeBits(buffer, q, b_q + 1);

     

       48
       +
       

     

       49
       +
           // Write r

     

       50
       +
           for (0..(b_m - b_r)) |_| {

     

       51
       +
               self.writeBit(buffer, 0);

     

       52
       +
           }

     

       53
       +
           self.writeBits(buffer, r, b_r);

     

       54
       +
       

     

       55
       +
           // Write padding bits

     

       56
       +
           if (opts.write_padding_bits) {

     

       57
       +
               const padding = buffer.len * 8 - buffer_len_bits;

     

       58
       +
       

     

       59
       +
               for (0..padding) |_| {

     

       60
       +
                   self.writeBit(buffer, 0);

     

       61
       +
               }

     

       62
       +
       

     

       63
       +
               std.debug.assert(self.bit_buffer == 0);

     

       64
       +
           }

     

       65
       +
       

     

       66
       +
           // Reset helper vars

     

       67
       +
           if (opts.reset_tmp_values) {

     

       68
       +
               self.reset();

     

       69
       +
           }

     

       70
       +
       

     

       71
       +
           return buffer_len_bits;

     

       72
       +
       }

     

       73
       +
       

     

       74
       +
       /// Decodes a Golomb-encoded value from the buffer.

     

       75
       +
       /// Returns the decoded value.

     

       76
       +
       pub fn decode(

     

       77
       +
           self: *Self,

     

       78
       +
           buffer: []const u8,

     

       79
       +
           opts: struct { reset_tmp_values: bool = true },

     

       80
       +
       ) error{InvalidFormat}!usize {

     

       81
       +
           if (self.m == 0) @panic("The Golomb parameter M must be larger than 0");

     

       82
       +
       

     

       83
       +
           const b_m = self.bM();

     

       84
       +
       

     

       85
       +
           var q: usize = 0;

     

       86
       +
           var b_q: u8 = 0;

     

       87
       +
           var r: usize = 0;

     

       88
       +
       

     

       89
       +
           // Read b_q

     

       90
       +
           while (self.readBit(buffer)) |bit| {

     

       91
       +
               if (bit == 0) {

     

       92
       +
                   b_q += 1;

     

       93
       +
               } else if (bit == 1) {

     

       94
       +
                   q = 1;

     

       95
       +
                   break;

     

       96
       +
               } else {

     

       97
       +
                   return error.InvalidFormat;

     

       98
       +
               }

     

       99
       +
           }

     

       100
       +
       

     

       101
       +
           // Read q

     

       102
       +
           q <<= @as(u6, @intCast(b_q));

     

       103
       +
           q |= self.readBits(buffer, b_q) catch return error.InvalidFormat;

     

       104
       +
           q -= 1;

     

       105
       +
       

     

       106
       +
           // Read r

     

       107
       +
           r = self.readBits(buffer, b_m) catch return error.InvalidFormat;

     

       108
       +
       

     

       109
       +
           // Reset helper vars

     

       110
       +
           if (opts.reset_tmp_values) {

     

       111
       +
               self.reset();

     

       112
       +
           }

     

       113
       +
       

     

       114
       +
           return q * self.m + r;

     

       115
       +
       }

     

       116
       +
       

     

       117
       +
       /// Resets internal state variables used during encoding/decoding.

     

       118
       +
       pub fn reset(self: *Self) void {

     

       119
       +
           self.bit_buffer = 0;

     

       120
       +
           self.bit_idx = 0;

     

       121
       +
           self.byte_idx = 0;

     

       122
       +
       }

     

       123
       +
       

     

       124
       +
       /// Calculates the number of bits needed to represent the remainder in Golomb coding.

     

       125
       +
       fn bM(self: *const Self) u8 {

     

       126
       +
           const b = bitLength(self.m);

     

       127
       +
           return if (isPowerOfTwo(self.m)) b - 1 else b;

     

       128
       +
       }

     

       129
       +
       

     

       130
       +
       /// Writes a single bit to the buffer.

     

       131
       +
       fn writeBit(self: *Self, buffer: []u8, bit: u8) void {

     

       132
       +
           self.bit_buffer = (self.bit_buffer << 1) | (bit & 1);

     

       133
       +
           self.bit_idx += 1;

     

       134
       +
       

     

       135
       +
           if (self.bit_idx == 8) {

     

       136
       +
               buffer[self.byte_idx] = self.bit_buffer;

     

       137
       +
               self.byte_idx += 1;

     

       138
       +
               self.bit_buffer = 0;

     

       139
       +
               self.bit_idx = 0;

     

       140
       +
           }

     

       141
       +
       }

     

       142
       +
       

     

       143
       +
       /// Writes multiple bits from a value to the buffer.

     

       144
       +
       fn writeBits(self: *Self, buffer: []u8, value: usize, count: u8) void {

     

       145
       +
           var i = count;

     

       146
       +
           while (i > 0) {

     

       147
       +
               i -= 1;

     

       148
       +
               const bit = @as(u8, @intCast((value >> @as(u6, @intCast(i))) & 1));

     

       149
       +
               self.writeBit(buffer, bit);

     

       150
       +
           }

     

       151
       +
       }

     

       152
       +
       

     

       153
       +
       /// Reads a single bit from the buffer. Returns null if at end of buffer.

     

       154
       +
       fn readBit(self: *Self, buffer: []const u8) ?u8 {

     

       155
       +
           if (self.byte_idx > buffer.len) return null;

     

       156
       +
       

     

       157
       +
           const bit = (buffer[self.byte_idx] >> @as(u3, @intCast(7 - self.bit_idx))) & 1;

     

       158
       +
           self.bit_idx += 1;

     

       159
       +
       

     

       160
       +
           if (self.bit_idx == 8) {

     

       161
       +
               self.byte_idx += 1;

     

       162
       +
               self.bit_idx = 0;

     

       163
       +
           }

     

       164
       +
       

     

       165
       +
           return bit;

     

       166
       +
       }

     

       167
       +
       

     

       168
       +
       /// Reads multiple bits from the buffer and returns them as a value.

     

       169
       +
       fn readBits(self: *Self, buffer: []const u8, count: u8) !usize {

     

       170
       +
           var result: usize = 0;

     

       171
       +
       

     

       172
       +
           for (0..count) |_| {

     

       173
       +
               const bit = self.readBit(buffer) orelse return error.OutOfBounds;

     

       174
       +
               result = (result << 1) | @as(usize, bit);

     

       175
       +
           }

     

       176
       +
       

     

       177
       +
           return result;

     

       178
       +
       }

     

       179
       +
       

     

       180
       +
       /// Calculates the number of bits required to represent a value.

     

       181
       +
       fn bitLength(value: anytype) u8 {

     

       182
       +
           return @bitSizeOf(@TypeOf(value)) - @clz(value);

     

       183
       +
       }

     

       184
       +
       

     

       185
       +
       /// Checks if a value is a power of two.

     

       186
       +
       fn isPowerOfTwo(value: usize) bool {

     

       187
       +
           return (value & (value - 1)) == 0;

     

       188
       +
       }

     

       189
       +
       

     

       190
       +
       test "encode val = 42, m = 8" {

     

       191
       +
           const testing = std.testing;

     

       192
       +
       

     

       193
       +
           var gol = Self{ .m = 8 };

     

       194
       +
           const input: usize = 42;

     

       195
       +
           var encoded: [1]u8 = undefined;

     

       196
       +
           _ = try gol.encode(&encoded, input, .{});

     

       197
       +
           try testing.expectEqualSlices(u8, &.{50}, &encoded);

     

       198
       +
       }

     

       199
       +
       

     

       200
       +
       test "decode val = {50}, m = 8" {

     

       201
       +
           const testing = std.testing;

     

       202
       +
       

     

       203
       +
           var gol = Self{ .m = 8 };

     

       204
       +
           const input = &[_]u8{50};

     

       205
       +
           const decoded = try gol.decode(input, .{});

     

       206
       +
       

     

       207
       +
           try testing.expectEqual(42, decoded);

     

       208
       +
       }

     

       209
       +
       

     

       210
       +
       test "encode + decode val = 1564, m = 457" {

     

       211
       +
           const testing = std.testing;

     

       212
       +
       

     

       213
       +
           var gol = Self{ .m = 457 };

     

       214
       +
           const input: usize = 1564;

     

       215
       +
           var encoded: [2]u8 = undefined;

     

       216
       +
       

     

       217
       +
           _ = try gol.encode(

     

       218
       +
               &encoded,

     

       219
       +
               input,

     

       220
       +
               .{ .write_padding_bits = true },

     

       221
       +
           );

     

       222
       +
           try testing.expectEqualSlices(u8, &.{ 35, 4 }, &encoded);

     

       223
       +
       

     

       224
       +
           const decoded = try gol.decode(&encoded, .{});

     

       225
       +
           try testing.expectEqual(input, decoded);

     

       226
       +
       }

     

       227
       +
       

     

       228
       +
       test "encode multiple val = { 1564, 42 }, m = 457" {

     

       229
       +
           const testing = std.testing;

     

       230
       +
       

     

       231
       +
           var gol = Self{ .m = 457 };

     

       232
       +
           const input = [_]usize{ 1564, 42 };

     

       233
       +
           var encoded: [3]u8 = undefined;

     

       234
       +
       

     

       235
       +
           _ = try gol.encode(&encoded, input[0], .{ .reset_tmp_values = false });

     

       236
       +
           _ = try gol.encode(&encoded, input[1], .{});

     

       237
       +
       

     

       238
       +
           try testing.expectEqualSlices(u8, &.{ 35, 6, 42 }, &encoded);

     

       239
       +
       }

     

       240
       +
       

     

       241
       +
       test "decode multiple val = { 35, 6, 8 }, m = 457" {

     

       242
       +
           const testing = std.testing;

     

       243
       +
       

     

       244
       +
           var gol = Self{ .m = 457 };

     

       245
       +
           const input = &[_]u8{ 35, 6, 42 };

     

       246
       +
       

     

       247
       +
           const decoded1 = try gol.decode(input, .{ .reset_tmp_values = false });

     

       248
       +
           try testing.expectEqual(1564, decoded1);

     

       249
       +
       

     

       250
       +
           const decoded2 = try gol.decode(input, .{});

     

       251
       +
           try testing.expectEqual(42, decoded2);

     

       252
       +
       }

+3 -18

src/root.zig

···

       1
        
       //! By convention, root.zig is the root source file when making a library.

     

       2
        
       const std = @import("std");

     

       3
        
       

     

       4
       -
       pub fn bufferedPrint() !void {

     

       5
       -
           // Stdout is for the actual output of your application, for example if you

     

       6
       -
           // are implementing gzip, then only the compressed bytes should be sent to

     

       7
       -
           // stdout, not any debugging messages.

     

       8
       -
           var stdout_buffer: [1024]u8 = undefined;

     

       9
       -
           var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);

     

       10
       -
           const stdout = &stdout_writer.interface;

     

       11
        
       

     

       12
       -
           try stdout.print("Run `zig build test` to run the tests.\n", .{});

     

       13
       -
       

     

       14
       -
           try stdout.flush(); // Don't forget to flush!

     

       15
       -
       }

     

       16
       -
       

     

       17
       -
       pub fn add(a: i32, b: i32) i32 {

     

       18
       -
           return a + b;

     

       19
       -
       }

     

       20
       -
       

     

       21
       -
       test "basic add functionality" {

     

       22
       -
           try std.testing.expect(add(3, 7) == 10);

     

       23
        
       }

···

       1
        
       //! By convention, root.zig is the root source file when making a library.

     

       2
        
       const std = @import("std");

     

       3
        
       

     

       4
       +
       pub const Golomb = @import("Golomb.zig");

     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       5
        
       

     

       6
       +
       test {

     

       7
       +
           std.testing.refAllDecls(@This());

     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       0
        
       
     

       8
        
       }