My 1billion row challenge solutions in various languages
1#!/bin/bash
2#
3# Copyright 2023 The original authors
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18set -eo pipefail
19
20if [ -z "$1" ]
21 then
22 echo "Usage: evaluate.sh <fork name> (<fork name 2> ...)"
23 echo " for each fork, there must be a 'calculate_average_<fork name>.sh' script and an optional 'prepare_<fork name>.sh'."
24 exit 1
25fi
26
27BOLD_WHITE='\033[1;37m'
28CYAN='\033[0;36m'
29GREEN='\033[0;32m'
30PURPLE='\033[0;35m'
31BOLD_RED='\033[1;31m'
32RED='\033[0;31m'
33BOLD_YELLOW='\033[1;33m'
34RESET='\033[0m' # No Color
35
36MEASUREMENTS_FILE="measurements_10K_1B.txt"
37RUNS=5
38DEFAULT_JAVA_VERSION="21.0.1-open"
39: "${BUILD_JAVA_VERSION:=21.0.1-open}"
40RUN_TIME_LIMIT=300 # seconds
41
42TIMEOUT=""
43if [ "$(uname -s)" == "Linux" ]; then
44 TIMEOUT="timeout -v $RUN_TIME_LIMIT"
45else # MacOs
46 if [ -x "$(command -v gtimeout)" ]; then
47 TIMEOUT="gtimeout -v $RUN_TIME_LIMIT" # from `brew install coreutils`
48 else
49 echo -e "${BOLD_YELLOW}WARNING${RESET} gtimeout not available, benchmark runs may take indefinitely long."
50 fi
51fi
52
53function check_command_installed {
54 if ! [ -x "$(command -v $1)" ]; then
55 echo "Error: $1 is not installed." >&2
56 exit 1
57 fi
58}
59
60function print_and_execute() {
61 echo "+ $@" >&2
62 "$@"
63}
64
65check_command_installed java
66check_command_installed hyperfine
67check_command_installed jq
68check_command_installed bc
69
70# Validate that ./calculate_average_<fork>.sh exists for each fork
71for fork in "$@"; do
72 if [ ! -f "./calculate_average_$fork.sh" ]; then
73 echo -e "${BOLD_RED}ERROR${RESET}: ./calculate_average_$fork.sh does not exist." >&2
74 exit 1
75 fi
76done
77
78## SDKMAN Setup
79# 1. Custom check for sdkman installed; not sure why check_command_installed doesn't detect it properly
80if [ ! -f "$HOME/.sdkman/bin/sdkman-init.sh" ]; then
81 echo -e "${BOLD_RED}ERROR${RESET}: sdkman is not installed." >&2
82 exit 1
83fi
84
85# 2. Init sdkman in this script
86source "$HOME/.sdkman/bin/sdkman-init.sh"
87
88# 3. make sure the default java version is installed
89if [ ! -d "$HOME/.sdkman/candidates/java/$DEFAULT_JAVA_VERSION" ]; then
90 print_and_execute sdk install java $DEFAULT_JAVA_VERSION
91fi
92
93# 4. Install missing SDK java versions in any of the prepare_*.sh scripts for the provided forks
94for fork in "$@"; do
95 if [ -f "./prepare_$fork.sh" ]; then
96 grep -h "^sdk use" "./prepare_$fork.sh" | cut -d' ' -f4 | while read -r version; do
97 if [ ! -d "$HOME/.sdkman/candidates/java/$version" ]; then
98 print_and_execute sdk install java $version
99 fi
100 done || true # grep returns exit code 1 when no match, `|| true` prevents the script from exiting early
101 fi
102done
103## END - SDKMAN Setup
104
105# Check if SMT is enabled (we want it disabled)
106if [ -f "/sys/devices/system/cpu/smt/active" ]; then
107 if [ "$(cat /sys/devices/system/cpu/smt/active)" != "0" ]; then
108 echo -e "${BOLD_YELLOW}WARNING${RESET} SMT is enabled"
109 fi
110fi
111
112# Check if Turbo Boost is enabled (we want it disabled)
113if [ -f "/sys/devices/system/cpu/cpufreq/boost" ]; then
114 if [ "$(cat /sys/devices/system/cpu/cpufreq/boost)" != "0" ]; then
115 echo -e "${BOLD_YELLOW}WARNING${RESET} Turbo Boost is enabled"
116 fi
117fi
118
119print_and_execute sdk use java $BUILD_JAVA_VERSION
120print_and_execute java --version
121# print_and_execute ./mvnw --quiet clean verify
122
123print_and_execute rm -f measurements.txt
124print_and_execute ln -s $MEASUREMENTS_FILE measurements.txt
125
126echo ""
127
128# check if measurements_xxx.out exists
129if [ ! -f "${MEASUREMENTS_FILE%.txt}.out" ]; then
130 echo -e "${BOLD_RED}ERROR${RESET}: ${MEASUREMENTS_FILE%.txt}.out does not exist." >&2
131 echo "Please create it with:"
132 echo ""
133 echo " ./calculate_average_baseline.sh > ${MEASUREMENTS_FILE%.txt}.out"
134 echo ""
135 exit 1
136fi
137
138# Run tests and benchmark for each fork
139filetimestamp=$(date +"%Y%m%d%H%M%S") # same for all fork.out files from this run
140failed=()
141for fork in "$@"; do
142 set +e # we don't want prepare.sh, test.sh or hyperfine failing on 1 fork to exit the script early
143
144 # Run prepare script
145 if [ -f "./prepare_$fork.sh" ]; then
146 print_and_execute source "./prepare_$fork.sh"
147 else
148 print_and_execute sdk use java $DEFAULT_JAVA_VERSION
149 fi
150
151 # Run the test suite
152 print_and_execute $TIMEOUT ./test.sh $fork
153 if [ $? -ne 0 ]; then
154 failed+=("$fork")
155 echo ""
156 echo -e "${BOLD_RED}FAILURE${RESET}: ./test.sh $fork failed"
157
158 continue
159 fi
160 echo ""
161
162 # Run the test on $MEASUREMENTS_FILE; this serves as the warmup
163 print_and_execute $TIMEOUT ./test.sh $fork $MEASUREMENTS_FILE
164 if [ $? -ne 0 ]; then
165 failed+=("$fork")
166 echo ""
167 echo -e "${BOLD_RED}FAILURE${RESET}: ./test.sh $fork $MEASUREMENTS_FILE failed"
168
169 continue
170 fi
171 echo ""
172
173 # re-link measurements.txt since test.sh deleted it
174 print_and_execute rm -f measurements.txt
175 print_and_execute ln -s $MEASUREMENTS_FILE measurements.txt
176
177 # Use hyperfine to run the benchmark for each fork
178 HYPERFINE_OPTS="--warmup 0 --runs $RUNS --export-json $fork-$filetimestamp-timing.json --output ./$fork-$filetimestamp.out"
179
180 # check if this script is running on a Linux box
181 if [ "$(uname -s)" == "Linux" ]; then
182 check_command_installed numactl
183
184 # Linux platform
185 # prepend this with numactl --physcpubind=0-7 for running it only with 8 cores
186 numactl --physcpubind=0-7 hyperfine $HYPERFINE_OPTS "$TIMEOUT ./calculate_average_$fork.sh 2>&1"
187 else # MacOS
188 hyperfine $HYPERFINE_OPTS "$TIMEOUT ./calculate_average_$fork.sh 2>&1"
189 fi
190 # Catch hyperfine command failed
191 if [ $? -ne 0 ]; then
192 failed+=("$fork")
193 # Hyperfine already prints the error message
194 echo ""
195 continue
196 fi
197done
198set -e
199
200# Summary
201echo -e "${BOLD_WHITE}Summary${RESET}"
202for fork in "$@"; do
203 # skip reporting results for failed forks
204 if [[ " ${failed[@]} " =~ " ${fork} " ]]; then
205 echo -e " ${RED}$fork${RESET}: command failed or output did not match"
206 continue
207 fi
208
209 # Trimmed mean = The slowest and the fastest runs are discarded, the
210 # mean value of the remaining three runs is the result for that contender
211 trimmed_mean=$(jq -r '.results[0].times | sort_by(.|tonumber) | .[1:-1] | add / length' $fork-$filetimestamp-timing.json)
212 raw_times=$(jq -r '.results[0].times | join(",")' $fork-$filetimestamp-timing.json)
213
214 if [ "$fork" == "$1" ]; then
215 color=$CYAN
216 elif [ "$fork" == "$2" ]; then
217 color=$GREEN
218 else
219 color=$PURPLE
220 fi
221
222 echo -e " ${color}$fork${RESET}: trimmed mean ${BOLD_WHITE}$trimmed_mean${RESET}, raw times ${BOLD_WHITE}$raw_times${RESET}"
223done
224echo ""
225
226## Leaderboard - prints the leaderboard in Markdown table format
227echo -e "${BOLD_WHITE}Leaderboard${RESET}"
228
229# 1. Create a temp file to store the leaderboard entries
230leaderboard_temp_file=$(mktemp)
231
232# 2. Process each fork and append the 1-line entry to the temp file
233for fork in "$@"; do
234 # skip reporting results for failed forks
235 if [[ " ${failed[@]} " =~ " ${fork} " ]]; then
236 continue
237 fi
238
239 trimmed_mean=$(jq -r '.results[0].times | sort_by(.|tonumber) | .[1:-1] | add / length' $fork-$filetimestamp-timing.json)
240
241 # trimmed_mean is in seconds
242 # Format trimmed_mean as MM::SS.mmm
243 # using bc
244 trimmed_mean_minutes=$(echo "$trimmed_mean / 60" | bc)
245 trimmed_mean_seconds=$(echo "$trimmed_mean % 60 / 1" | bc)
246 trimmed_mean_ms=$(echo "($trimmed_mean - $trimmed_mean_minutes * 60 - $trimmed_mean_seconds) * 1000 / 1" | bc)
247 trimmed_mean_formatted=$(printf "%02d:%02d.%03d" $trimmed_mean_minutes $trimmed_mean_seconds $trimmed_mean_ms)
248
249 # Get Github user's name from public Github API (rate limited after ~50 calls, so results are cached in github_users.txt)
250 set +e
251 github_user__name=$(grep "^$fork;" github_users.txt | cut -d ';' -f2)
252 if [ -z "$github_user__name" ]; then
253 github_user__name=$(curl -s https://api.github.com/users/$fork | jq -r '.name' | tr -d '"')
254 if [ "$github_user__name" != "null" ]; then
255 echo "$fork;$github_user__name" >> github_users.txt
256 else
257 github_user__name=$fork
258 fi
259 fi
260 set -e
261
262 # Read java version from prepare_$fork.sh if it exists, otherwise assume 21.0.1-open
263 java_version="21.0.1-open"
264 # Hard-coding the note message for now
265 notes=""
266 if [ -f "./prepare_$fork.sh" ]; then
267 java_version=$(grep -F "sdk use java" ./prepare_$fork.sh | cut -d' ' -f4)
268
269 if grep -F "native-image" -q ./prepare_$fork.sh ; then
270 notes="GraalVM native binary"
271 fi
272 fi
273
274 # check if Java source file uses Unsafe
275 if grep -F "theUnsafe" -q ./src/main/java*/dev/morling/onebrc/CalculateAverage_$fork.java ; then
276 # if notes is not empty, append a comma and space before the unsafe note
277 notes="${notes:+$notes, }uses Unsafe"
278 fi
279
280 echo -n "$trimmed_mean;" >> $leaderboard_temp_file # for sorting
281 echo -n "| # " >> $leaderboard_temp_file
282 echo -n "| $trimmed_mean_formatted " >> $leaderboard_temp_file
283 echo -n "| [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_$fork.java)" >> $leaderboard_temp_file
284 echo -n "| $java_version " >> $leaderboard_temp_file
285 echo -n "| [$github_user__name](https://github.com/$fork) " >> $leaderboard_temp_file
286 echo -n "| $notes " >> $leaderboard_temp_file
287 echo "|" >> $leaderboard_temp_file
288done
289
290# 3. Sort leaderboard_temp_file by trimmed_mean and remove the sorting column
291sort -n $leaderboard_temp_file | cut -d ';' -f 2 > $leaderboard_temp_file.sorted
292
293# 4. Print the leaderboard
294echo ""
295echo "| # | Result (m:s.ms) | Implementation | JDK | Submitter | Notes |"
296echo "|---|-----------------|--------------------|-----|---------------|-----------|"
297# If $leaderboard_temp_file.sorted has more than 3 entires, include rankings
298if [ $(wc -l < $leaderboard_temp_file.sorted) -gt 3 ]; then
299 head -n 1 $leaderboard_temp_file.sorted | tr '#' 1
300 head -n 2 $leaderboard_temp_file.sorted | tail -n 1 | tr '#' 2
301 head -n 3 $leaderboard_temp_file.sorted | tail -n 1 | tr '#' 3
302 tail -n+4 $leaderboard_temp_file.sorted | tr '#' ' '
303else
304 # Don't show rankings
305 cat $leaderboard_temp_file.sorted | tr '#' ' '
306fi
307echo ""
308
309# 5. Cleanup
310rm $leaderboard_temp_file
311## END - Leaderboard
312
313# Finalize .out files
314echo "Raw results saved to file(s):"
315for fork in "$@"; do
316 if [ -f "$fork-$filetimestamp-timing.json" ]; then
317 cat $fork-$filetimestamp-timing.json >> $fork-$filetimestamp.out
318 rm $fork-$filetimestamp-timing.json
319 fi
320
321 if [ -f "$fork-$filetimestamp.out" ]; then
322 echo " $fork-$filetimestamp.out"
323 fi
324done