My 1billion row challenge solutions in various languages

Auto reformat classes

+16 -8
src/main/java/dev/morling/onebrc/CalculateAverage_ebarlas.java
···
try {
var buffer = channel.map(FileChannel.MapMode.READ_ONLY, pStart, pSize);
partitions[pIdx] = processBuffer(buffer, pIdx == 0);
-
} catch (IOException e) {
throw new RuntimeException(e);
}
};
···
var t = target[j];
if (t == null) {
target[j] = current[j]; // copy ref from current to target
-
} else {
t.min = Math.min(t.min, current[j].min);
t.max = Math.max(t.max, current[j].max);
t.sum += current[j].sum;
···
st = stats[idx] = new Stats(key);
}
readingKey = false;
-
} else {
keyHash = HASH_FACTOR * keyHash + b;
}
-
} else {
if (b == '\n') {
var v = negative ? -val : val;
st.min = Math.min(st.min, v);
···
val = 0;
negative = false;
keyStart = buffer.position();
-
} else if (b == '-') {
negative = true;
-
} else if (b != '.') { // skip '.' since fractional tenth unit after decimal point is assumed
val = val * 10 + (b - '0');
}
}
···
}
private static byte[] readHeader(ByteBuffer buffer) { // read up to and including first newline (or end-of-input)
-
while (buffer.hasRemaining() && buffer.get() != '\n') ;
var header = new byte[buffer.position()];
buffer.get(0, header, 0, header.length);
return header;
}
-
record Partition(byte[] header, byte[] footer, Stats[] stats) {}
private static class Stats { // min, max, and sum values are modeled with integral types that represent tenths of a unit
final byte[] key;
···
try {
var buffer = channel.map(FileChannel.MapMode.READ_ONLY, pStart, pSize);
partitions[pIdx] = processBuffer(buffer, pIdx == 0);
+
}
+
catch (IOException e) {
throw new RuntimeException(e);
}
};
···
var t = target[j];
if (t == null) {
target[j] = current[j]; // copy ref from current to target
+
}
+
else {
t.min = Math.min(t.min, current[j].min);
t.max = Math.max(t.max, current[j].max);
t.sum += current[j].sum;
···
st = stats[idx] = new Stats(key);
}
readingKey = false;
+
}
+
else {
keyHash = HASH_FACTOR * keyHash + b;
}
+
}
+
else {
if (b == '\n') {
var v = negative ? -val : val;
st.min = Math.min(st.min, v);
···
val = 0;
negative = false;
keyStart = buffer.position();
+
}
+
else if (b == '-') {
negative = true;
+
}
+
else if (b != '.') { // skip '.' since fractional tenth unit after decimal point is assumed
val = val * 10 + (b - '0');
}
}
···
}
private static byte[] readHeader(ByteBuffer buffer) { // read up to and including first newline (or end-of-input)
+
while (buffer.hasRemaining() && buffer.get() != '\n')
+
;
var header = new byte[buffer.position()];
buffer.get(0, header, 0, header.length);
return header;
}
+
record Partition(byte[] header, byte[] footer, Stats[] stats) {
+
}
private static class Stats { // min, max, and sum values are modeled with integral types that represent tenths of a unit
final byte[] key;
+1 -2
src/main/java/dev/morling/onebrc/CalculateAverage_seijikun.java
···
private static final String FILE = "./measurements.txt";
-
private static class MeasurementAggregator {
private double min = Double.POSITIVE_INFINITY;
private double max = Double.NEGATIVE_INFINITY;
···
chunkStartPtr = chunkEndPtr;
}
-
try(var executor = Executors.newFixedThreadPool(jobCnt)) {
for (int i = 0; i < jobCnt; ++i) {
executor.submit(chunks[i]);
}
···
private static final String FILE = "./measurements.txt";
private static class MeasurementAggregator {
private double min = Double.POSITIVE_INFINITY;
private double max = Double.NEGATIVE_INFINITY;
···
chunkStartPtr = chunkEndPtr;
}
+
try (var executor = Executors.newFixedThreadPool(jobCnt)) {
for (int i = 0; i < jobCnt; ++i) {
executor.submit(chunks[i]);
}
+13 -13
src/main/java/dev/morling/onebrc/CalculateAverage_truelive.java
···
}
private static Map<String, Measurement> combineMaps(
-
final Map<String, Measurement> map1,
-
final Map<String, Measurement> map2
-
) {
for (final var entry : map2.entrySet()) {
map1.merge(entry.getKey(), entry.getValue(), Measurement::combineWith);
}
···
}
public static void main(final String[] args) throws IOException {
-
//long before = System.currentTimeMillis();
/**
* Shoutout to bjhara
*/
···
final MappedByteBuffer mbb = in.map(
FileChannel.MapMode.READ_ONLY,
start,
-
Math.min(CHUNK_SIZE, total - start)
-
);
int realEnd = mbb.limit() - 1;
while (mbb.get(realEnd) != '\n') {
realEnd--;
···
start += realEnd;
return mbb;
-
} catch (final IOException e) {
throw new RuntimeException(e);
}
}
};
final Map<String, Measurement> reduce = StreamSupport.stream(Spliterators.spliteratorUnknownSize(
-
iterator, Spliterator.IMMUTABLE), true)
-
.parallel()
-
.map(CalculateAverage_truelive::parseBuffer)
-
.reduce(CalculateAverage_truelive::combineMaps).get();
System.out.print("{");
System.out.print(
···
.collect(Collectors.joining(", ")));
System.out.println("}");
-
//System.out.println("Took: " + (System.currentTimeMillis() - before));
}
···
name = new String(arr, 0, len);
bug.position(pos);
bug.mark();
-
} else if (c == '\n') {
final int pos = bug.position();
bug.reset();
final int len = pos - bug.position();
···
}
private static Map<String, Measurement> combineMaps(
+
final Map<String, Measurement> map1,
+
final Map<String, Measurement> map2) {
for (final var entry : map2.entrySet()) {
map1.merge(entry.getKey(), entry.getValue(), Measurement::combineWith);
}
···
}
public static void main(final String[] args) throws IOException {
+
// long before = System.currentTimeMillis();
/**
* Shoutout to bjhara
*/
···
final MappedByteBuffer mbb = in.map(
FileChannel.MapMode.READ_ONLY,
start,
+
Math.min(CHUNK_SIZE, total - start));
int realEnd = mbb.limit() - 1;
while (mbb.get(realEnd) != '\n') {
realEnd--;
···
start += realEnd;
return mbb;
+
}
+
catch (final IOException e) {
throw new RuntimeException(e);
}
}
};
final Map<String, Measurement> reduce = StreamSupport.stream(Spliterators.spliteratorUnknownSize(
+
iterator, Spliterator.IMMUTABLE), true)
+
.parallel()
+
.map(CalculateAverage_truelive::parseBuffer)
+
.reduce(CalculateAverage_truelive::combineMaps).get();
System.out.print("{");
System.out.print(
···
.collect(Collectors.joining(", ")));
System.out.println("}");
+
// System.out.println("Took: " + (System.currentTimeMillis() - before));
}
···
name = new String(arr, 0, len);
bug.position(pos);
bug.mark();
+
}
+
else if (c == '\n') {
final int pos = bug.position();
bug.reset();
final int len = pos - bug.position();
+36 -36
src/main/java/dev/morling/onebrc/CreateMeasurements2.java
···
private static final String FILE = "./measurements2.txt";
static class WeatherStation {
-
final static char[] NUMBERS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9' };
final String id;
final int meanTemperature;
···
* @param buffer the buffer to append to
*/
void measurement(final CheaperCharBuffer buffer) {
-
// fake -10.9 to +10.9 variance without double operations and rounding
-
// gives us -10 to +10
int m = meanTemperature + (r.nextInt(21) - 10);
// gives us a decimal digit 0 to 9 as char
char d = NUMBERS[r.nextInt(10)];
···
// manual loop unroll for less jumps
for (int i = 0; i < count; i = i + 8) {
-
{
-
// try to fill teh cpu pipeline as much as possible with
-
// independent operations
-
int s1 = r1.nextInt(stationCount);
-
int s2 = r2.nextInt(stationCount);
-
int s3 = r3.nextInt(stationCount);
-
int s4 = r4.nextInt(stationCount);
-
// get us the ojects one after the other to have the array
-
// in our L1 cache and not push it out with other data
-
var w1 = stations.get(s1);
-
var w2 = stations.get(s2);
-
var w3 = stations.get(s3);
-
var w4 = stations.get(s4);
-
// write our data to our buffer
-
w1.measurement(sb);
-
w2.measurement(sb);
-
w3.measurement(sb);
-
w4.measurement(sb);
-
}
-
{
-
int s1 = r1.nextInt(stationCount);
-
int s2 = r2.nextInt(stationCount);
-
int s3 = r3.nextInt(stationCount);
-
int s4 = r4.nextInt(stationCount);
-
var w1 = stations.get(s1);
-
var w2 = stations.get(s2);
-
var w3 = stations.get(s3);
-
var w4 = stations.get(s4);
-
w1.measurement(sb);
-
w2.measurement(sb);
-
w3.measurement(sb);
-
w4.measurement(sb);
-
}
// write the buffer directly, no intermediate string copy
bw.write(sb.data_, 0, sb.length_);
···
private static final String FILE = "./measurements2.txt";
static class WeatherStation {
+
final static char[] NUMBERS = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' };
final String id;
final int meanTemperature;
···
* @param buffer the buffer to append to
*/
void measurement(final CheaperCharBuffer buffer) {
+
// fake -10.9 to +10.9 variance without double operations and rounding
+
// gives us -10 to +10
int m = meanTemperature + (r.nextInt(21) - 10);
// gives us a decimal digit 0 to 9 as char
char d = NUMBERS[r.nextInt(10)];
···
// manual loop unroll for less jumps
for (int i = 0; i < count; i = i + 8) {
+
{
+
// try to fill teh cpu pipeline as much as possible with
+
// independent operations
+
int s1 = r1.nextInt(stationCount);
+
int s2 = r2.nextInt(stationCount);
+
int s3 = r3.nextInt(stationCount);
+
int s4 = r4.nextInt(stationCount);
+
// get us the ojects one after the other to have the array
+
// in our L1 cache and not push it out with other data
+
var w1 = stations.get(s1);
+
var w2 = stations.get(s2);
+
var w3 = stations.get(s3);
+
var w4 = stations.get(s4);
+
// write our data to our buffer
+
w1.measurement(sb);
+
w2.measurement(sb);
+
w3.measurement(sb);
+
w4.measurement(sb);
+
}
+
{
+
int s1 = r1.nextInt(stationCount);
+
int s2 = r2.nextInt(stationCount);
+
int s3 = r3.nextInt(stationCount);
+
int s4 = r4.nextInt(stationCount);
+
var w1 = stations.get(s1);
+
var w2 = stations.get(s2);
+
var w3 = stations.get(s3);
+
var w4 = stations.get(s4);
+
w1.measurement(sb);
+
w2.measurement(sb);
+
w3.measurement(sb);
+
w4.measurement(sb);
+
}
// write the buffer directly, no intermediate string copy
bw.write(sb.data_, 0, sb.length_);