From a12b8e42d3ca289911827afcf4396ab8f2295eb9 Mon Sep 17 00:00:00 2001 From: Altagos Date: Thu, 4 Sep 2025 12:47:37 +0200 Subject: [PATCH] add golomb coding Change-Id: oymrsszqlwqlxvnzltowyuukrpwtrkpl --- src/Golomb.zig | 252 +++++++++++++++++++++++++++++++++++++++++++++++++ src/root.zig | 21 +---- 2 files changed, 255 insertions(+), 18 deletions(-) create mode 100644 src/Golomb.zig diff --git a/src/Golomb.zig b/src/Golomb.zig new file mode 100644 index 0000000..ea7bfcc --- /dev/null +++ b/src/Golomb.zig @@ -0,0 +1,252 @@ +//! Golomb codec implementation for encoding and decoding integers. +//! Golomb coding is a lossless, variable-length encoding scheme that is optimal for geometric distributions. + +const std = @import("std"); + +const Self = @This(); + +/// Golomb parameter M - determines the division point for quotient and remainder +m: usize, + +/// Internal bit buffer for accumulating bits during encoding/decoding +bit_buffer: u8 = 0, +/// Current bit position within the bit buffer (0-7) +bit_idx: u8 = 0, +/// Current byte position in the buffer +byte_idx: usize = 0, + +/// Encodes a value using Golomb coding into the provided buffer. +/// Returns the number of bits written. +pub fn encode( + self: *Self, + buffer: []u8, + value: usize, + opts: struct { + write_padding_bits: bool = false, + reset_tmp_values: bool = true, + }, +) error{BufferTooSmall}!usize { + if (self.m == 0) @panic("The Golomb parameter M must be larger than 0"); + + const b_m = self.bM(); + + const q = @divFloor(value, self.m) + 1; + const b_q = bitLength(q) - 1; + const r = @rem(value, self.m); + const b_r = bitLength(r); + + const needed_bits = b_q + b_q + 1 + b_m; + const buffer_len_bits = needed_bits + self.byte_idx * 8 + self.bit_idx; + + if (buffer_len_bits > buffer.len * 8) return error.BufferTooSmall; + + // Write q + for (0..b_q) |_| { + self.writeBit(buffer, 0); + } + self.writeBits(buffer, q, b_q + 1); + + // Write r + for (0..(b_m - b_r)) |_| { + self.writeBit(buffer, 0); + } + self.writeBits(buffer, r, b_r); + + // Write padding bits + if (opts.write_padding_bits) { + const padding = buffer.len * 8 - buffer_len_bits; + + for (0..padding) |_| { + self.writeBit(buffer, 0); + } + + std.debug.assert(self.bit_buffer == 0); + } + + // Reset helper vars + if (opts.reset_tmp_values) { + self.reset(); + } + + return buffer_len_bits; +} + +/// Decodes a Golomb-encoded value from the buffer. +/// Returns the decoded value. +pub fn decode( + self: *Self, + buffer: []const u8, + opts: struct { reset_tmp_values: bool = true }, +) error{InvalidFormat}!usize { + if (self.m == 0) @panic("The Golomb parameter M must be larger than 0"); + + const b_m = self.bM(); + + var q: usize = 0; + var b_q: u8 = 0; + var r: usize = 0; + + // Read b_q + while (self.readBit(buffer)) |bit| { + if (bit == 0) { + b_q += 1; + } else if (bit == 1) { + q = 1; + break; + } else { + return error.InvalidFormat; + } + } + + // Read q + q <<= @as(u6, @intCast(b_q)); + q |= self.readBits(buffer, b_q) catch return error.InvalidFormat; + q -= 1; + + // Read r + r = self.readBits(buffer, b_m) catch return error.InvalidFormat; + + // Reset helper vars + if (opts.reset_tmp_values) { + self.reset(); + } + + return q * self.m + r; +} + +/// Resets internal state variables used during encoding/decoding. +pub fn reset(self: *Self) void { + self.bit_buffer = 0; + self.bit_idx = 0; + self.byte_idx = 0; +} + +/// Calculates the number of bits needed to represent the remainder in Golomb coding. +fn bM(self: *const Self) u8 { + const b = bitLength(self.m); + return if (isPowerOfTwo(self.m)) b - 1 else b; +} + +/// Writes a single bit to the buffer. +fn writeBit(self: *Self, buffer: []u8, bit: u8) void { + self.bit_buffer = (self.bit_buffer << 1) | (bit & 1); + self.bit_idx += 1; + + if (self.bit_idx == 8) { + buffer[self.byte_idx] = self.bit_buffer; + self.byte_idx += 1; + self.bit_buffer = 0; + self.bit_idx = 0; + } +} + +/// Writes multiple bits from a value to the buffer. +fn writeBits(self: *Self, buffer: []u8, value: usize, count: u8) void { + var i = count; + while (i > 0) { + i -= 1; + const bit = @as(u8, @intCast((value >> @as(u6, @intCast(i))) & 1)); + self.writeBit(buffer, bit); + } +} + +/// Reads a single bit from the buffer. Returns null if at end of buffer. +fn readBit(self: *Self, buffer: []const u8) ?u8 { + if (self.byte_idx > buffer.len) return null; + + const bit = (buffer[self.byte_idx] >> @as(u3, @intCast(7 - self.bit_idx))) & 1; + self.bit_idx += 1; + + if (self.bit_idx == 8) { + self.byte_idx += 1; + self.bit_idx = 0; + } + + return bit; +} + +/// Reads multiple bits from the buffer and returns them as a value. +fn readBits(self: *Self, buffer: []const u8, count: u8) !usize { + var result: usize = 0; + + for (0..count) |_| { + const bit = self.readBit(buffer) orelse return error.OutOfBounds; + result = (result << 1) | @as(usize, bit); + } + + return result; +} + +/// Calculates the number of bits required to represent a value. +fn bitLength(value: anytype) u8 { + return @bitSizeOf(@TypeOf(value)) - @clz(value); +} + +/// Checks if a value is a power of two. +fn isPowerOfTwo(value: usize) bool { + return (value & (value - 1)) == 0; +} + +test "encode val = 42, m = 8" { + const testing = std.testing; + + var gol = Self{ .m = 8 }; + const input: usize = 42; + var encoded: [1]u8 = undefined; + _ = try gol.encode(&encoded, input, .{}); + try testing.expectEqualSlices(u8, &.{50}, &encoded); +} + +test "decode val = {50}, m = 8" { + const testing = std.testing; + + var gol = Self{ .m = 8 }; + const input = &[_]u8{50}; + const decoded = try gol.decode(input, .{}); + + try testing.expectEqual(42, decoded); +} + +test "encode + decode val = 1564, m = 457" { + const testing = std.testing; + + var gol = Self{ .m = 457 }; + const input: usize = 1564; + var encoded: [2]u8 = undefined; + + _ = try gol.encode( + &encoded, + input, + .{ .write_padding_bits = true }, + ); + try testing.expectEqualSlices(u8, &.{ 35, 4 }, &encoded); + + const decoded = try gol.decode(&encoded, .{}); + try testing.expectEqual(input, decoded); +} + +test "encode multiple val = { 1564, 42 }, m = 457" { + const testing = std.testing; + + var gol = Self{ .m = 457 }; + const input = [_]usize{ 1564, 42 }; + var encoded: [3]u8 = undefined; + + _ = try gol.encode(&encoded, input[0], .{ .reset_tmp_values = false }); + _ = try gol.encode(&encoded, input[1], .{}); + + try testing.expectEqualSlices(u8, &.{ 35, 6, 42 }, &encoded); +} + +test "decode multiple val = { 35, 6, 8 }, m = 457" { + const testing = std.testing; + + var gol = Self{ .m = 457 }; + const input = &[_]u8{ 35, 6, 42 }; + + const decoded1 = try gol.decode(input, .{ .reset_tmp_values = false }); + try testing.expectEqual(1564, decoded1); + + const decoded2 = try gol.decode(input, .{}); + try testing.expectEqual(42, decoded2); +} diff --git a/src/root.zig b/src/root.zig index 94c7cd0..15e922a 100644 --- a/src/root.zig +++ b/src/root.zig @@ -1,23 +1,8 @@ //! By convention, root.zig is the root source file when making a library. const std = @import("std"); -pub fn bufferedPrint() !void { - // Stdout is for the actual output of your application, for example if you - // are implementing gzip, then only the compressed bytes should be sent to - // stdout, not any debugging messages. - var stdout_buffer: [1024]u8 = undefined; - var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer); - const stdout = &stdout_writer.interface; +pub const Golomb = @import("Golomb.zig"); - try stdout.print("Run `zig build test` to run the tests.\n", .{}); - - try stdout.flush(); // Don't forget to flush! -} - -pub fn add(a: i32, b: i32) i32 { - return a + b; -} - -test "basic add functionality" { - try std.testing.expect(add(3, 7) == 10); +test { + std.testing.refAllDecls(@This()); } -- 2.43.0