1#!/usr/bin/env bash
2# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
3# The RocksDB regression test script.
4# REQUIREMENT: must be able to run make db_bench in the current directory
5#
6# This script will do the following things in order:
7#
8# 1. check out the specified rocksdb commit.
9# 2. build db_bench using the specified commit
10# 3. setup test directory $TEST_PATH.  If not specified, then the test directory
11#    will be "/tmp/rocksdb/regression_test"
12# 4. run set of benchmarks on the specified host
13#    (can be either locally or remotely)
14# 5. generate report in the $RESULT_PATH.  If RESULT_PATH is not specified,
15#    RESULT_PATH will be set to $TEST_PATH/current_time
16#
17# = Examples =
18# * Run the regression test using rocksdb commit abcdef that outputs results
19#   and temp files in "/my/output/dir"
20#r
21#   TEST_PATH=/my/output/dir COMMIT_ID=abcdef ./tools/regression_test.sh
22#
23# * Run the regression test on a remost host under "/my/output/dir" directory
24#   and stores the result locally in "/my/benchmark/results" using commit
25#   abcdef and with the rocksdb options specified in /my/path/to/OPTIONS-012345
26#   with 1000000000 keys in each benchmark in the regression test where each
27#   key and value are 100 and 900 bytes respectively:
28#
29#   REMOTE_USER_AT_HOST=yhchiang@my.remote.host \
30#       TEST_PATH=/my/output/dir \
31#       RESULT_PATH=/my/benchmark/results \
32#       COMMIT_ID=abcdef \
33#       OPTIONS_FILE=/my/path/to/OPTIONS-012345 \
34#       NUM_KEYS=1000000000 \
35#       KEY_SIZE=100 \
36#       VALUE_SIZE=900 \
37#       ./tools/regression_test.sh
38#
39# = Regression test environmental parameters =
40#   DEBUG: If true, then the script will not build db_bench if db_bench already
41#       exists
42#       Default: 0
43#   TEST_MODE: If 1, run fillseqdeterminstic and benchmarks both
44#       if 0, only run fillseqdeterministc
45#       if 2, only run benchmarks
46#       Default: 1
47#   TEST_PATH: the root directory of the regression test.
48#       Default: "/tmp/rocksdb/regression_test"
49#   RESULT_PATH: the directory where the regression results will be generated.
50#       Default: "$TEST_PATH/current_time"
51#   REMOTE_USER_AT_HOST: If set, then test will run on the specified host under
52#       TEST_PATH directory and outputs test results locally in RESULT_PATH
53#       The REMOTE_USER_AT_HOST should follow the format user-id@host.name
54#   DB_PATH: the path where the rocksdb database will be created during the
55#       regression test.  Default:  $TEST_PATH/db
56#   WAL_PATH: the path where the rocksdb WAL will be outputed.
57#       Default:  $TEST_PATH/wal
58#   OPTIONS_FILE:  If specified, then the regression test will use the specified
59#       file to initialize the RocksDB options in its benchmarks.  Note that
60#       this feature only work for commits after 88acd93 or rocksdb version
61#       later than 4.9.
62#   DELETE_TEST_PATH: If true, then the test directory will be deleted
63#       after the script ends.
64#       Default: 0
65#
66# = db_bench parameters =
67#   NUM_THREADS:  The number of concurrent foreground threads that will issue
68#       database operations in the benchmark.  Default: 16.
69#   NUM_KEYS:  The key range that will be used in the entire regression test.
70#       Default: 1G.
71#   NUM_OPS:  The number of operations (reads, writes, or deletes) that will
72#       be issued in EACH thread.
73#       Default: $NUM_KEYS / $NUM_THREADS
74#   KEY_SIZE:  The size of each key in bytes in db_bench.  Default: 100.
75#   VALUE_SIZE:  The size of each value in bytes in db_bench.  Default: 900.
76#   CACHE_SIZE:  The size of RocksDB block cache used in db_bench.  Default: 1G
77#   STATISTICS:  If 1, then statistics is on in db_bench.  Default: 0.
78#   COMPRESSION_RATIO:  The compression ratio of the key generated in db_bench.
79#       Default: 0.5.
80#   HISTOGRAM:  If 1, then the histogram feature on performance feature is on.
81#   STATS_PER_INTERVAL:  If 1, then the statistics will be reported for every
82#       STATS_INTERVAL_SECONDS seconds.  Default 1.
83#   STATS_INTERVAL_SECONDS:  If STATS_PER_INTERVAL is set to 1, then statistics
84#       will be reported for every STATS_INTERVAL_SECONDS.  Default 60.
85#   MAX_BACKGROUND_FLUSHES:  The maxinum number of concurrent flushes in
86#       db_bench.  Default: 4.
87#   MAX_BACKGROUND_COMPACTIONS:  The maximum number of concurrent compactions
88#       in db_bench.  Default: 16.
89#   NUM_HIGH_PRI_THREADS:  The number of high-pri threads available for
90#       concurrent flushes in db_bench.  Default: 4.
91#   NUM_LOW_PRI_THREADS:  The number of low-pri threads available for
92#       concurrent compactions in db_bench.  Default: 16.
93#   SEEK_NEXTS:  Controls how many Next() will be called after seek.
94#       Default: 10.
95#   SEED:  random seed that controls the randomness of the benchmark.
96#       Default: $( date +%s )
97
98#==============================================================================
99#  CONSTANT
100#==============================================================================
101TITLE_FORMAT="%40s,%25s,%30s,%7s,%9s,%8s,"
102TITLE_FORMAT+="%10s,%13s,%14s,%11s,%12s,"
103TITLE_FORMAT+="%7s,%11s,"
104TITLE_FORMAT+="%9s,%10s,%10s,%10s,%10s,%10s,%5s,"
105TITLE_FORMAT+="%5s,%5s,%5s" # time
106TITLE_FORMAT+="\n"
107
108DATA_FORMAT="%40s,%25s,%30s,%7s,%9s,%8s,"
109DATA_FORMAT+="%10s,%13.0f,%14s,%11s,%12s,"
110DATA_FORMAT+="%7s,%11s,"
111DATA_FORMAT+="%9.0f,%10.0f,%10.0f,%10.0f,%10.0f,%10.0f,%5.0f,"
112DATA_FORMAT+="%5.0f,%5.0f,%5.0f" # time
113DATA_FORMAT+="\n"
114
115MAIN_PATTERN="$1""[[:blank:]]+:.*[[:blank:]]+([0-9\.]+)[[:blank:]]+ops/sec"
116PERC_PATTERN="Percentiles: P50: ([0-9\.]+) P75: ([0-9\.]+) "
117PERC_PATTERN+="P99: ([0-9\.]+) P99.9: ([0-9\.]+) P99.99: ([0-9\.]+)"
118#==============================================================================
119
120function main {
121  TEST_ROOT_DIR=${TEST_PATH:-"/tmp/rocksdb/regression_test"}
122  init_arguments $TEST_ROOT_DIR
123
124  build_db_bench_and_ldb
125
126  setup_test_directory
127  if [ $TEST_MODE -le 1 ]; then
128      tmp=$DB_PATH
129      DB_PATH=$ORIGIN_PATH
130      test_remote "test -d $DB_PATH"
131      if [[ $? -ne 0 ]]; then
132          echo "Building DB..."
133          # compactall alone will not print ops or threads, which will fail update_report
134          run_db_bench "fillseq,compactall" $NUM_KEYS 1 0 0
135      fi
136      DB_PATH=$tmp
137  fi
138  if [ $TEST_MODE -ge 1 ]; then
139      build_checkpoint
140      run_db_bench "readrandom"
141      run_db_bench "readwhilewriting"
142      run_db_bench "deleterandom" $((NUM_KEYS / 10 / $NUM_THREADS))
143      run_db_bench "seekrandom"
144      run_db_bench "seekrandomwhilewriting"
145      run_db_bench "multireadrandom"
146  fi
147
148  cleanup_test_directory $TEST_ROOT_DIR
149  echo ""
150  echo "Benchmark completed!  Results are available in $RESULT_PATH"
151}
152
153############################################################################
154function init_arguments {
155  K=1024
156  M=$((1024 * K))
157  G=$((1024 * M))
158
159  current_time=$(date +"%F-%H:%M:%S")
160  RESULT_PATH=${RESULT_PATH:-"$1/results/$current_time"}
161  COMMIT_ID=`hg id -i 2>/dev/null || git rev-parse HEAD 2>/dev/null || echo 'unknown'`
162  SUMMARY_FILE="$RESULT_PATH/SUMMARY.csv"
163
164  DB_PATH=${3:-"$1/db"}
165  ORIGIN_PATH=${ORIGIN_PATH:-"$(dirname $(dirname $DB_PATH))/db"}
166  WAL_PATH=${4:-""}
167  if [ -z "$REMOTE_USER_AT_HOST" ]; then
168    DB_BENCH_DIR=${5:-"."}
169  else
170    DB_BENCH_DIR=${5:-"$1/db_bench"}
171  fi
172
173  DEBUG=${DEBUG:-0}
174  TEST_MODE=${TEST_MODE:-1}
175  SCP=${SCP:-"scp"}
176  SSH=${SSH:-"ssh"}
177  NUM_THREADS=${NUM_THREADS:-16}
178  NUM_KEYS=${NUM_KEYS:-$((1 * G))}  # key range
179  NUM_OPS=${NUM_OPS:-$(($NUM_KEYS / $NUM_THREADS))}
180  KEY_SIZE=${KEY_SIZE:-100}
181  VALUE_SIZE=${VALUE_SIZE:-900}
182  CACHE_SIZE=${CACHE_SIZE:-$((1 * G))}
183  STATISTICS=${STATISTICS:-0}
184  COMPRESSION_RATIO=${COMPRESSION_RATIO:-0.5}
185  HISTOGRAM=${HISTOGRAM:-1}
186  NUM_MULTI_DB=${NUM_MULTI_DB:-1}
187  STATS_PER_INTERVAL=${STATS_PER_INTERVAL:-1}
188  STATS_INTERVAL_SECONDS=${STATS_INTERVAL_SECONDS:-600}
189  MAX_BACKGROUND_FLUSHES=${MAX_BACKGROUND_FLUSHES:-4}
190  MAX_BACKGROUND_COMPACTIONS=${MAX_BACKGROUND_COMPACTIONS:-16}
191  NUM_HIGH_PRI_THREADS=${NUM_HIGH_PRI_THREADS:-4}
192  NUM_LOW_PRI_THREADS=${NUM_LOW_PRI_THREADS:-16}
193  DELETE_TEST_PATH=${DELETE_TEST_PATH:-0}
194  SEEK_NEXTS=${SEEK_NEXTS:-10}
195  SEED=${SEED:-$( date +%s )}
196  MULTIREAD_BATCH_SIZE=${MULTIREAD_BATCH_SIZE:-128}
197  MULTIREAD_STRIDE=${MULTIREAD_STRIDE:-12}
198  PERF_LEVEL=${PERF_LEVEL:-1}
199}
200
201# $1 --- benchmark name
202# $2 --- number of operations.  Default: $NUM_KEYS
203# $3 --- number of threads.  Default $NUM_THREADS
204# $4 --- use_existing_db.  Default: 1
205# $5 --- update_report. Default: 1
206function run_db_bench {
207  # this will terminate all currently-running db_bench
208  find_db_bench_cmd="ps aux | grep db_bench | grep -v grep | grep -v aux | awk '{print \$2}'"
209
210  ops=${2:-$NUM_OPS}
211  threads=${3:-$NUM_THREADS}
212  USE_EXISTING_DB=${4:-1}
213  UPDATE_REPORT=${5:-1}
214  echo ""
215  echo "======================================================================="
216  echo "Benchmark $1"
217  echo "======================================================================="
218  echo ""
219  db_bench_error=0
220  options_file_arg=$(setup_options_file)
221  echo "$options_file_arg"
222  # use `which time` to avoid using bash's internal time command
223  db_bench_cmd="("'\$(which time)'" -p $DB_BENCH_DIR/db_bench \
224      --benchmarks=$1 --db=$DB_PATH --wal_dir=$WAL_PATH \
225      --use_existing_db=$USE_EXISTING_DB \
226      --perf_level=$PERF_LEVEL \
227      --disable_auto_compactions \
228      --threads=$threads \
229      --num=$NUM_KEYS \
230      --reads=$ops \
231      --writes=$ops \
232      --deletes=$ops \
233      --key_size=$KEY_SIZE \
234      --value_size=$VALUE_SIZE \
235      --cache_size=$CACHE_SIZE \
236      --statistics=$STATISTICS \
237      $options_file_arg \
238      --compression_ratio=$COMPRESSION_RATIO \
239      --histogram=$HISTOGRAM \
240      --seek_nexts=$SEEK_NEXTS \
241      --stats_per_interval=$STATS_PER_INTERVAL \
242      --stats_interval_seconds=$STATS_INTERVAL_SECONDS \
243      --max_background_flushes=$MAX_BACKGROUND_FLUSHES \
244      --num_multi_db=$NUM_MULTI_DB \
245      --max_background_compactions=$MAX_BACKGROUND_COMPACTIONS \
246      --num_high_pri_threads=$NUM_HIGH_PRI_THREADS \
247      --num_low_pri_threads=$NUM_LOW_PRI_THREADS \
248      --seed=$SEED \
249      --multiread_batched=true \
250      --batch_size=$MULTIREAD_BATCH_SIZE \
251      --multiread_stride=$MULTIREAD_STRIDE) 2>&1"
252  ps_cmd="ps aux"
253  if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
254    echo "Running benchmark remotely on $REMOTE_USER_AT_HOST"
255    db_bench_cmd="$SSH $REMOTE_USER_AT_HOST \"$db_bench_cmd\""
256    ps_cmd="$SSH $REMOTE_USER_AT_HOST $ps_cmd"
257  fi
258
259  ## make sure no db_bench is running
260  # The following statement is necessary make sure "eval $ps_cmd" will success.
261  # Otherwise, if we simply check whether "$(eval $ps_cmd | grep db_bench)" is
262  # successful or not, then it will always be false since grep will return
263  # non-zero status when there's no matching output.
264  ps_output="$(eval $ps_cmd)"
265  exit_on_error $? "$ps_cmd"
266
267  # perform the actual command to check whether db_bench is running
268  grep_output="$(eval $ps_cmd | grep db_bench | grep -v grep)"
269  if [ "$grep_output" != "" ]; then
270    echo "Stopped regression_test.sh as there're still db_bench processes running:"
271    echo $grep_output
272    echo "Clean up test directory"
273    cleanup_test_directory $TEST_ROOT_DIR
274    exit 2
275  fi
276
277  ## run the db_bench
278  cmd="($db_bench_cmd || db_bench_error=1) | tee -a $RESULT_PATH/$1"
279  exit_on_error $?
280  echo $cmd
281  eval $cmd
282  exit_on_error $db_bench_error
283  if [ $UPDATE_REPORT -ne 0 ]; then
284    update_report "$1" "$RESULT_PATH/$1" $ops $threads
285  fi
286}
287
288function build_checkpoint {
289    cmd_prefix=""
290    if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
291        cmd_prefix="$SSH $REMOTE_USER_AT_HOST "
292    fi
293    if [ $NUM_MULTI_DB -gt 1 ]; then
294        dirs=$($cmd_prefix find $ORIGIN_PATH -type d -links 2)
295        for dir in $dirs; do
296            db_index=$(basename $dir)
297            echo "Building checkpoints: $ORIGIN_PATH/$db_index -> $DB_PATH/$db_index ..."
298            $cmd_prefix $DB_BENCH_DIR/ldb checkpoint --checkpoint_dir=$DB_PATH/$db_index \
299                        --db=$ORIGIN_PATH/$db_index --try_load_options 2>&1
300        done
301    else
302        # checkpoint cannot build in directory already exists
303        $cmd_prefix rm -rf $DB_PATH
304        echo "Building checkpoint: $ORIGIN_PATH -> $DB_PATH ..."
305        $cmd_prefix $DB_BENCH_DIR/ldb checkpoint --checkpoint_dir=$DB_PATH \
306                    --db=$ORIGIN_PATH --try_load_options 2>&1
307    fi
308}
309
310function multiply {
311  echo "$1 * $2" | bc
312}
313
314# $1 --- name of the benchmark
315# $2 --- the filename of the output log of db_bench
316function update_report {
317  main_result=`cat $2 | grep $1`
318  exit_on_error $?
319  perc_statement=`cat $2 | grep Percentile`
320  exit_on_error $?
321
322  # Obtain micros / op
323
324  [[ $main_result =~ $MAIN_PATTERN ]]
325  ops_per_s=${BASH_REMATCH[1]}
326
327  # Obtain percentile information
328  [[ $perc_statement =~ $PERC_PATTERN ]]
329  perc[0]=${BASH_REMATCH[1]}  # p50
330  perc[1]=${BASH_REMATCH[2]}  # p75
331  perc[2]=${BASH_REMATCH[3]}  # p99
332  perc[3]=${BASH_REMATCH[4]}  # p99.9
333  perc[4]=${BASH_REMATCH[5]}  # p99.99
334
335  # Parse the output of the time command
336  real_sec=`tail -3 $2 | grep real | awk '{print $2}'`
337  user_sec=`tail -3 $2 | grep user | awk '{print $2}'`
338  sys_sec=`tail -3 $2 | grep sys | awk '{print $2}'`
339
340  (printf "$DATA_FORMAT" \
341    $COMMIT_ID $1 $REMOTE_USER_AT_HOST $NUM_MULTI_DB $NUM_KEYS $KEY_SIZE $VALUE_SIZE \
342       $(multiply $COMPRESSION_RATIO 100) \
343       $3 $4 $CACHE_SIZE \
344       $MAX_BACKGROUND_FLUSHES $MAX_BACKGROUND_COMPACTIONS \
345       $ops_per_s \
346       $(multiply ${perc[0]} 1000) \
347       $(multiply ${perc[1]} 1000) \
348       $(multiply ${perc[2]} 1000) \
349       $(multiply ${perc[3]} 1000) \
350       $(multiply ${perc[4]} 1000) \
351       $DEBUG \
352       $real_sec \
353       $user_sec \
354       $sys_sec \
355       >> $SUMMARY_FILE)
356  exit_on_error $?
357}
358
359function exit_on_error {
360  if [ $1 -ne 0 ]; then
361    echo ""
362    echo "ERROR: Benchmark did not complete successfully."
363    if ! [ -z "$2" ]; then
364      echo "Failure command: $2"
365    fi
366    echo "Partial results are output to $RESULT_PATH"
367    echo "ERROR" >> $SUMMARY_FILE
368    exit $1
369  fi
370}
371
372function build_db_bench_and_ldb {
373  echo "Building db_bench & ldb ..."
374
375  make clean
376  exit_on_error $?
377
378  DEBUG_LEVEL=0 make db_bench ldb -j32
379  exit_on_error $?
380}
381
382function run_remote {
383  test_remote "$1"
384  exit_on_error $? "$1"
385}
386
387function test_remote {
388  if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
389      cmd="$SSH $REMOTE_USER_AT_HOST '$1'"
390  else
391      cmd="$1"
392  fi
393  eval "$cmd"
394}
395
396function run_local {
397  eval "$1"
398  exit_on_error $?
399}
400
401function setup_options_file {
402  if ! [ -z "$OPTIONS_FILE" ]; then
403    if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
404      options_file="$DB_BENCH_DIR/OPTIONS_FILE"
405      run_local "$SCP $OPTIONS_FILE $REMOTE_USER_AT_HOST:$options_file"
406    else
407      options_file="$OPTIONS_FILE"
408    fi
409    echo "--options_file=$options_file"
410  fi
411  echo ""
412}
413
414function setup_test_directory {
415  echo "Deleting old regression test directories and creating new ones"
416
417  run_remote "rm -rf $DB_PATH"
418  run_remote "rm -rf $DB_BENCH_DIR"
419  run_local "rm -rf $RESULT_PATH"
420
421  if ! [ -z "$WAL_PATH" ]; then
422    run_remote "rm -rf $WAL_PATH"
423    run_remote "mkdir -p $WAL_PATH"
424  fi
425
426  run_remote "mkdir -p $DB_PATH"
427
428  run_remote "mkdir -p $DB_BENCH_DIR"
429  run_remote "ls -l $DB_BENCH_DIR"
430
431  if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
432      run_local "$SCP ./db_bench $REMOTE_USER_AT_HOST:$DB_BENCH_DIR/db_bench"
433      run_local "$SCP ./ldb $REMOTE_USER_AT_HOST:$DB_BENCH_DIR/ldb"
434  fi
435
436  run_local "mkdir -p $RESULT_PATH"
437
438  (printf $TITLE_FORMAT \
439      "commit id" "benchmark" "user@host" "num-dbs" "key-range" "key-size" \
440      "value-size" "compress-rate" "ops-per-thread" "num-threads" "cache-size" \
441      "flushes" "compactions" \
442      "ops-per-s" "p50" "p75" "p99" "p99.9" "p99.99" "debug" \
443      "real-sec" "user-sec" "sys-sec" \
444      >> $SUMMARY_FILE)
445  exit_on_error $?
446}
447
448function cleanup_test_directory {
449
450  if [ $DELETE_TEST_PATH -ne 0 ]; then
451    echo "Clear old regression test directories and creating new ones"
452    run_remote "rm -rf $DB_PATH"
453    run_remote "rm -rf $WAL_PATH"
454    if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
455      run_remote "rm -rf $DB_BENCH_DIR"
456    fi
457    run_remote "rm -rf $1"
458  else
459    echo "------------ DEBUG MODE ------------"
460    echo "DB  PATH: $DB_PATH"
461    echo "WAL PATH: $WAL_PATH"
462  fi
463}
464
465############################################################################
466
467# shellcheck disable=SC2068
468main $@
469