My 1billion row challenge solutions in various languages
at main 11 kB view raw
1#!/bin/bash 2# 3# Copyright 2023 The original authors 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18set -eo pipefail 19 20if [ -z "$1" ] 21 then 22 echo "Usage: evaluate.sh <fork name> (<fork name 2> ...)" 23 echo " for each fork, there must be a 'calculate_average_<fork name>.sh' script and an optional 'prepare_<fork name>.sh'." 24 exit 1 25fi 26 27BOLD_WHITE='\033[1;37m' 28CYAN='\033[0;36m' 29GREEN='\033[0;32m' 30PURPLE='\033[0;35m' 31BOLD_RED='\033[1;31m' 32RED='\033[0;31m' 33BOLD_YELLOW='\033[1;33m' 34RESET='\033[0m' # No Color 35 36MEASUREMENTS_FILE="measurements_10K_1B.txt" 37RUNS=5 38DEFAULT_JAVA_VERSION="21.0.1-open" 39: "${BUILD_JAVA_VERSION:=21.0.1-open}" 40RUN_TIME_LIMIT=300 # seconds 41 42TIMEOUT="" 43if [ "$(uname -s)" == "Linux" ]; then 44 TIMEOUT="timeout -v $RUN_TIME_LIMIT" 45else # MacOs 46 if [ -x "$(command -v gtimeout)" ]; then 47 TIMEOUT="gtimeout -v $RUN_TIME_LIMIT" # from `brew install coreutils` 48 else 49 echo -e "${BOLD_YELLOW}WARNING${RESET} gtimeout not available, benchmark runs may take indefinitely long." 50 fi 51fi 52 53function check_command_installed { 54 if ! [ -x "$(command -v $1)" ]; then 55 echo "Error: $1 is not installed." >&2 56 exit 1 57 fi 58} 59 60function print_and_execute() { 61 echo "+ $@" >&2 62 "$@" 63} 64 65check_command_installed java 66check_command_installed hyperfine 67check_command_installed jq 68check_command_installed bc 69 70# Validate that ./calculate_average_<fork>.sh exists for each fork 71for fork in "$@"; do 72 if [ ! -f "./calculate_average_$fork.sh" ]; then 73 echo -e "${BOLD_RED}ERROR${RESET}: ./calculate_average_$fork.sh does not exist." >&2 74 exit 1 75 fi 76done 77 78## SDKMAN Setup 79# 1. Custom check for sdkman installed; not sure why check_command_installed doesn't detect it properly 80if [ ! -f "$HOME/.sdkman/bin/sdkman-init.sh" ]; then 81 echo -e "${BOLD_RED}ERROR${RESET}: sdkman is not installed." >&2 82 exit 1 83fi 84 85# 2. Init sdkman in this script 86source "$HOME/.sdkman/bin/sdkman-init.sh" 87 88# 3. make sure the default java version is installed 89if [ ! -d "$HOME/.sdkman/candidates/java/$DEFAULT_JAVA_VERSION" ]; then 90 print_and_execute sdk install java $DEFAULT_JAVA_VERSION 91fi 92 93# 4. Install missing SDK java versions in any of the prepare_*.sh scripts for the provided forks 94for fork in "$@"; do 95 if [ -f "./prepare_$fork.sh" ]; then 96 grep -h "^sdk use" "./prepare_$fork.sh" | cut -d' ' -f4 | while read -r version; do 97 if [ ! -d "$HOME/.sdkman/candidates/java/$version" ]; then 98 print_and_execute sdk install java $version 99 fi 100 done || true # grep returns exit code 1 when no match, `|| true` prevents the script from exiting early 101 fi 102done 103## END - SDKMAN Setup 104 105# Check if SMT is enabled (we want it disabled) 106if [ -f "/sys/devices/system/cpu/smt/active" ]; then 107 if [ "$(cat /sys/devices/system/cpu/smt/active)" != "0" ]; then 108 echo -e "${BOLD_YELLOW}WARNING${RESET} SMT is enabled" 109 fi 110fi 111 112# Check if Turbo Boost is enabled (we want it disabled) 113if [ -f "/sys/devices/system/cpu/cpufreq/boost" ]; then 114 if [ "$(cat /sys/devices/system/cpu/cpufreq/boost)" != "0" ]; then 115 echo -e "${BOLD_YELLOW}WARNING${RESET} Turbo Boost is enabled" 116 fi 117fi 118 119print_and_execute sdk use java $BUILD_JAVA_VERSION 120print_and_execute java --version 121# print_and_execute ./mvnw --quiet clean verify 122 123print_and_execute rm -f measurements.txt 124print_and_execute ln -s $MEASUREMENTS_FILE measurements.txt 125 126echo "" 127 128# check if measurements_xxx.out exists 129if [ ! -f "${MEASUREMENTS_FILE%.txt}.out" ]; then 130 echo -e "${BOLD_RED}ERROR${RESET}: ${MEASUREMENTS_FILE%.txt}.out does not exist." >&2 131 echo "Please create it with:" 132 echo "" 133 echo " ./calculate_average_baseline.sh > ${MEASUREMENTS_FILE%.txt}.out" 134 echo "" 135 exit 1 136fi 137 138# Run tests and benchmark for each fork 139filetimestamp=$(date +"%Y%m%d%H%M%S") # same for all fork.out files from this run 140failed=() 141for fork in "$@"; do 142 set +e # we don't want prepare.sh, test.sh or hyperfine failing on 1 fork to exit the script early 143 144 # Run prepare script 145 if [ -f "./prepare_$fork.sh" ]; then 146 print_and_execute source "./prepare_$fork.sh" 147 else 148 print_and_execute sdk use java $DEFAULT_JAVA_VERSION 149 fi 150 151 # Run the test suite 152 print_and_execute $TIMEOUT ./test.sh $fork 153 if [ $? -ne 0 ]; then 154 failed+=("$fork") 155 echo "" 156 echo -e "${BOLD_RED}FAILURE${RESET}: ./test.sh $fork failed" 157 158 continue 159 fi 160 echo "" 161 162 # Run the test on $MEASUREMENTS_FILE; this serves as the warmup 163 print_and_execute $TIMEOUT ./test.sh $fork $MEASUREMENTS_FILE 164 if [ $? -ne 0 ]; then 165 failed+=("$fork") 166 echo "" 167 echo -e "${BOLD_RED}FAILURE${RESET}: ./test.sh $fork $MEASUREMENTS_FILE failed" 168 169 continue 170 fi 171 echo "" 172 173 # re-link measurements.txt since test.sh deleted it 174 print_and_execute rm -f measurements.txt 175 print_and_execute ln -s $MEASUREMENTS_FILE measurements.txt 176 177 # Use hyperfine to run the benchmark for each fork 178 HYPERFINE_OPTS="--warmup 0 --runs $RUNS --export-json $fork-$filetimestamp-timing.json --output ./$fork-$filetimestamp.out" 179 180 # check if this script is running on a Linux box 181 if [ "$(uname -s)" == "Linux" ]; then 182 check_command_installed numactl 183 184 # Linux platform 185 # prepend this with numactl --physcpubind=0-7 for running it only with 8 cores 186 numactl --physcpubind=0-7 hyperfine $HYPERFINE_OPTS "$TIMEOUT ./calculate_average_$fork.sh 2>&1" 187 else # MacOS 188 hyperfine $HYPERFINE_OPTS "$TIMEOUT ./calculate_average_$fork.sh 2>&1" 189 fi 190 # Catch hyperfine command failed 191 if [ $? -ne 0 ]; then 192 failed+=("$fork") 193 # Hyperfine already prints the error message 194 echo "" 195 continue 196 fi 197done 198set -e 199 200# Summary 201echo -e "${BOLD_WHITE}Summary${RESET}" 202for fork in "$@"; do 203 # skip reporting results for failed forks 204 if [[ " ${failed[@]} " =~ " ${fork} " ]]; then 205 echo -e " ${RED}$fork${RESET}: command failed or output did not match" 206 continue 207 fi 208 209 # Trimmed mean = The slowest and the fastest runs are discarded, the 210 # mean value of the remaining three runs is the result for that contender 211 trimmed_mean=$(jq -r '.results[0].times | sort_by(.|tonumber) | .[1:-1] | add / length' $fork-$filetimestamp-timing.json) 212 raw_times=$(jq -r '.results[0].times | join(",")' $fork-$filetimestamp-timing.json) 213 214 if [ "$fork" == "$1" ]; then 215 color=$CYAN 216 elif [ "$fork" == "$2" ]; then 217 color=$GREEN 218 else 219 color=$PURPLE 220 fi 221 222 echo -e " ${color}$fork${RESET}: trimmed mean ${BOLD_WHITE}$trimmed_mean${RESET}, raw times ${BOLD_WHITE}$raw_times${RESET}" 223done 224echo "" 225 226## Leaderboard - prints the leaderboard in Markdown table format 227echo -e "${BOLD_WHITE}Leaderboard${RESET}" 228 229# 1. Create a temp file to store the leaderboard entries 230leaderboard_temp_file=$(mktemp) 231 232# 2. Process each fork and append the 1-line entry to the temp file 233for fork in "$@"; do 234 # skip reporting results for failed forks 235 if [[ " ${failed[@]} " =~ " ${fork} " ]]; then 236 continue 237 fi 238 239 trimmed_mean=$(jq -r '.results[0].times | sort_by(.|tonumber) | .[1:-1] | add / length' $fork-$filetimestamp-timing.json) 240 241 # trimmed_mean is in seconds 242 # Format trimmed_mean as MM::SS.mmm 243 # using bc 244 trimmed_mean_minutes=$(echo "$trimmed_mean / 60" | bc) 245 trimmed_mean_seconds=$(echo "$trimmed_mean % 60 / 1" | bc) 246 trimmed_mean_ms=$(echo "($trimmed_mean - $trimmed_mean_minutes * 60 - $trimmed_mean_seconds) * 1000 / 1" | bc) 247 trimmed_mean_formatted=$(printf "%02d:%02d.%03d" $trimmed_mean_minutes $trimmed_mean_seconds $trimmed_mean_ms) 248 249 # Get Github user's name from public Github API (rate limited after ~50 calls, so results are cached in github_users.txt) 250 set +e 251 github_user__name=$(grep "^$fork;" github_users.txt | cut -d ';' -f2) 252 if [ -z "$github_user__name" ]; then 253 github_user__name=$(curl -s https://api.github.com/users/$fork | jq -r '.name' | tr -d '"') 254 if [ "$github_user__name" != "null" ]; then 255 echo "$fork;$github_user__name" >> github_users.txt 256 else 257 github_user__name=$fork 258 fi 259 fi 260 set -e 261 262 # Read java version from prepare_$fork.sh if it exists, otherwise assume 21.0.1-open 263 java_version="21.0.1-open" 264 # Hard-coding the note message for now 265 notes="" 266 if [ -f "./prepare_$fork.sh" ]; then 267 java_version=$(grep -F "sdk use java" ./prepare_$fork.sh | cut -d' ' -f4) 268 269 if grep -F "native-image" -q ./prepare_$fork.sh ; then 270 notes="GraalVM native binary" 271 fi 272 fi 273 274 # check if Java source file uses Unsafe 275 if grep -F "theUnsafe" -q ./src/main/java*/dev/morling/onebrc/CalculateAverage_$fork.java ; then 276 # if notes is not empty, append a comma and space before the unsafe note 277 notes="${notes:+$notes, }uses Unsafe" 278 fi 279 280 echo -n "$trimmed_mean;" >> $leaderboard_temp_file # for sorting 281 echo -n "| # " >> $leaderboard_temp_file 282 echo -n "| $trimmed_mean_formatted " >> $leaderboard_temp_file 283 echo -n "| [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_$fork.java)" >> $leaderboard_temp_file 284 echo -n "| $java_version " >> $leaderboard_temp_file 285 echo -n "| [$github_user__name](https://github.com/$fork) " >> $leaderboard_temp_file 286 echo -n "| $notes " >> $leaderboard_temp_file 287 echo "|" >> $leaderboard_temp_file 288done 289 290# 3. Sort leaderboard_temp_file by trimmed_mean and remove the sorting column 291sort -n $leaderboard_temp_file | cut -d ';' -f 2 > $leaderboard_temp_file.sorted 292 293# 4. Print the leaderboard 294echo "" 295echo "| # | Result (m:s.ms) | Implementation | JDK | Submitter | Notes |" 296echo "|---|-----------------|--------------------|-----|---------------|-----------|" 297# If $leaderboard_temp_file.sorted has more than 3 entires, include rankings 298if [ $(wc -l < $leaderboard_temp_file.sorted) -gt 3 ]; then 299 head -n 1 $leaderboard_temp_file.sorted | tr '#' 1 300 head -n 2 $leaderboard_temp_file.sorted | tail -n 1 | tr '#' 2 301 head -n 3 $leaderboard_temp_file.sorted | tail -n 1 | tr '#' 3 302 tail -n+4 $leaderboard_temp_file.sorted | tr '#' ' ' 303else 304 # Don't show rankings 305 cat $leaderboard_temp_file.sorted | tr '#' ' ' 306fi 307echo "" 308 309# 5. Cleanup 310rm $leaderboard_temp_file 311## END - Leaderboard 312 313# Finalize .out files 314echo "Raw results saved to file(s):" 315for fork in "$@"; do 316 if [ -f "$fork-$filetimestamp-timing.json" ]; then 317 cat $fork-$filetimestamp-timing.json >> $fork-$filetimestamp.out 318 rm $fork-$filetimestamp-timing.json 319 fi 320 321 if [ -f "$fork-$filetimestamp.out" ]; then 322 echo " $fork-$filetimestamp.out" 323 fi 324done