From a12b8e42d3ca289911827afcf4396ab8f2295eb9 Mon Sep 17 00:00:00 2001
From: Altagos <jakob@altagos.dev>
Date: Thu, 4 Sep 2025 12:47:37 +0200
Subject: [PATCH] add golomb coding
Change-Id: oymrsszqlwqlxvnzltowyuukrpwtrkpl

---
 src/Golomb.zig | 252 +++++++++++++++++++++++++++++++++++++++++++++++++
 src/root.zig   |  21 +----
 2 files changed, 255 insertions(+), 18 deletions(-)
 create mode 100644 src/Golomb.zig

diff --git a/src/Golomb.zig b/src/Golomb.zig
new file mode 100644
index 0000000..ea7bfcc
--- /dev/null
+++ b/src/Golomb.zig
@@ -0,0 +1,252 @@
+//! Golomb codec implementation for encoding and decoding integers.
+//! Golomb coding is a lossless, variable-length encoding scheme that is optimal for geometric distributions.
+
+const std = @import("std");
+
+const Self = @This();
+
+/// Golomb parameter M - determines the division point for quotient and remainder
+m: usize,
+
+/// Internal bit buffer for accumulating bits during encoding/decoding
+bit_buffer: u8 = 0,
+/// Current bit position within the bit buffer (0-7)
+bit_idx: u8 = 0,
+/// Current byte position in the buffer
+byte_idx: usize = 0,
+
+/// Encodes a value using Golomb coding into the provided buffer.
+/// Returns the number of bits written.
+pub fn encode(
+    self: *Self,
+    buffer: []u8,
+    value: usize,
+    opts: struct {
+        write_padding_bits: bool = false,
+        reset_tmp_values: bool = true,
+    },
+) error{BufferTooSmall}!usize {
+    if (self.m == 0) @panic("The Golomb parameter M must be larger than 0");
+
+    const b_m = self.bM();
+
+    const q = @divFloor(value, self.m) + 1;
+    const b_q = bitLength(q) - 1;
+    const r = @rem(value, self.m);
+    const b_r = bitLength(r);
+
+    const needed_bits = b_q + b_q + 1 + b_m;
+    const buffer_len_bits = needed_bits + self.byte_idx * 8 + self.bit_idx;
+
+    if (buffer_len_bits > buffer.len * 8) return error.BufferTooSmall;
+
+    // Write q
+    for (0..b_q) |_| {
+        self.writeBit(buffer, 0);
+    }
+    self.writeBits(buffer, q, b_q + 1);
+
+    // Write r
+    for (0..(b_m - b_r)) |_| {
+        self.writeBit(buffer, 0);
+    }
+    self.writeBits(buffer, r, b_r);
+
+    // Write padding bits
+    if (opts.write_padding_bits) {
+        const padding = buffer.len * 8 - buffer_len_bits;
+
+        for (0..padding) |_| {
+            self.writeBit(buffer, 0);
+        }
+
+        std.debug.assert(self.bit_buffer == 0);
+    }
+
+    // Reset helper vars
+    if (opts.reset_tmp_values) {
+        self.reset();
+    }
+
+    return buffer_len_bits;
+}
+
+/// Decodes a Golomb-encoded value from the buffer.
+/// Returns the decoded value.
+pub fn decode(
+    self: *Self,
+    buffer: []const u8,
+    opts: struct { reset_tmp_values: bool = true },
+) error{InvalidFormat}!usize {
+    if (self.m == 0) @panic("The Golomb parameter M must be larger than 0");
+
+    const b_m = self.bM();
+
+    var q: usize = 0;
+    var b_q: u8 = 0;
+    var r: usize = 0;
+
+    // Read b_q
+    while (self.readBit(buffer)) |bit| {
+        if (bit == 0) {
+            b_q += 1;
+        } else if (bit == 1) {
+            q = 1;
+            break;
+        } else {
+            return error.InvalidFormat;
+        }
+    }
+
+    // Read q
+    q <<= @as(u6, @intCast(b_q));
+    q |= self.readBits(buffer, b_q) catch return error.InvalidFormat;
+    q -= 1;
+
+    // Read r
+    r = self.readBits(buffer, b_m) catch return error.InvalidFormat;
+
+    // Reset helper vars
+    if (opts.reset_tmp_values) {
+        self.reset();
+    }
+
+    return q * self.m + r;
+}
+
+/// Resets internal state variables used during encoding/decoding.
+pub fn reset(self: *Self) void {
+    self.bit_buffer = 0;
+    self.bit_idx = 0;
+    self.byte_idx = 0;
+}
+
+/// Calculates the number of bits needed to represent the remainder in Golomb coding.
+fn bM(self: *const Self) u8 {
+    const b = bitLength(self.m);
+    return if (isPowerOfTwo(self.m)) b - 1 else b;
+}
+
+/// Writes a single bit to the buffer.
+fn writeBit(self: *Self, buffer: []u8, bit: u8) void {
+    self.bit_buffer = (self.bit_buffer << 1) | (bit & 1);
+    self.bit_idx += 1;
+
+    if (self.bit_idx == 8) {
+        buffer[self.byte_idx] = self.bit_buffer;
+        self.byte_idx += 1;
+        self.bit_buffer = 0;
+        self.bit_idx = 0;
+    }
+}
+
+/// Writes multiple bits from a value to the buffer.
+fn writeBits(self: *Self, buffer: []u8, value: usize, count: u8) void {
+    var i = count;
+    while (i > 0) {
+        i -= 1;
+        const bit = @as(u8, @intCast((value >> @as(u6, @intCast(i))) & 1));
+        self.writeBit(buffer, bit);
+    }
+}
+
+/// Reads a single bit from the buffer. Returns null if at end of buffer.
+fn readBit(self: *Self, buffer: []const u8) ?u8 {
+    if (self.byte_idx > buffer.len) return null;
+
+    const bit = (buffer[self.byte_idx] >> @as(u3, @intCast(7 - self.bit_idx))) & 1;
+    self.bit_idx += 1;
+
+    if (self.bit_idx == 8) {
+        self.byte_idx += 1;
+        self.bit_idx = 0;
+    }
+
+    return bit;
+}
+
+/// Reads multiple bits from the buffer and returns them as a value.
+fn readBits(self: *Self, buffer: []const u8, count: u8) !usize {
+    var result: usize = 0;
+
+    for (0..count) |_| {
+        const bit = self.readBit(buffer) orelse return error.OutOfBounds;
+        result = (result << 1) | @as(usize, bit);
+    }
+
+    return result;
+}
+
+/// Calculates the number of bits required to represent a value.
+fn bitLength(value: anytype) u8 {
+    return @bitSizeOf(@TypeOf(value)) - @clz(value);
+}
+
+/// Checks if a value is a power of two.
+fn isPowerOfTwo(value: usize) bool {
+    return (value & (value - 1)) == 0;
+}
+
+test "encode val = 42, m = 8" {
+    const testing = std.testing;
+
+    var gol = Self{ .m = 8 };
+    const input: usize = 42;
+    var encoded: [1]u8 = undefined;
+    _ = try gol.encode(&encoded, input, .{});
+    try testing.expectEqualSlices(u8, &.{50}, &encoded);
+}
+
+test "decode val = {50}, m = 8" {
+    const testing = std.testing;
+
+    var gol = Self{ .m = 8 };
+    const input = &[_]u8{50};
+    const decoded = try gol.decode(input, .{});
+
+    try testing.expectEqual(42, decoded);
+}
+
+test "encode + decode val = 1564, m = 457" {
+    const testing = std.testing;
+
+    var gol = Self{ .m = 457 };
+    const input: usize = 1564;
+    var encoded: [2]u8 = undefined;
+
+    _ = try gol.encode(
+        &encoded,
+        input,
+        .{ .write_padding_bits = true },
+    );
+    try testing.expectEqualSlices(u8, &.{ 35, 4 }, &encoded);
+
+    const decoded = try gol.decode(&encoded, .{});
+    try testing.expectEqual(input, decoded);
+}
+
+test "encode multiple val = { 1564, 42 }, m = 457" {
+    const testing = std.testing;
+
+    var gol = Self{ .m = 457 };
+    const input = [_]usize{ 1564, 42 };
+    var encoded: [3]u8 = undefined;
+
+    _ = try gol.encode(&encoded, input[0], .{ .reset_tmp_values = false });
+    _ = try gol.encode(&encoded, input[1], .{});
+
+    try testing.expectEqualSlices(u8, &.{ 35, 6, 42 }, &encoded);
+}
+
+test "decode multiple val = { 35, 6, 8 }, m = 457" {
+    const testing = std.testing;
+
+    var gol = Self{ .m = 457 };
+    const input = &[_]u8{ 35, 6, 42 };
+
+    const decoded1 = try gol.decode(input, .{ .reset_tmp_values = false });
+    try testing.expectEqual(1564, decoded1);
+
+    const decoded2 = try gol.decode(input, .{});
+    try testing.expectEqual(42, decoded2);
+}
diff --git a/src/root.zig b/src/root.zig
index 94c7cd0..15e922a 100644
--- a/src/root.zig
+++ b/src/root.zig
@@ -1,23 +1,8 @@
 //! By convention, root.zig is the root source file when making a library.
 const std = @import("std");
 
-pub fn bufferedPrint() !void {
-    // Stdout is for the actual output of your application, for example if you
-    // are implementing gzip, then only the compressed bytes should be sent to
-    // stdout, not any debugging messages.
-    var stdout_buffer: [1024]u8 = undefined;
-    var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);
-    const stdout = &stdout_writer.interface;
+pub const Golomb = @import("Golomb.zig");
 
-    try stdout.print("Run `zig build test` to run the tests.\n", .{});
-
-    try stdout.flush(); // Don't forget to flush!
-}
-
-pub fn add(a: i32, b: i32) i32 {
-    return a + b;
-}
-
-test "basic add functionality" {
-    try std.testing.expect(add(3, 7) == 10);
+test {
+    std.testing.refAllDecls(@This());
 }
-- 
2.43.0