1#!/usr/bin/env bash
2# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
3#
4# BlobDB benchmark script
5#
6# REQUIRES: benchmark.sh is in the tools subdirectory
7#
8# After the execution of this script, log files are available in $output_dir.
9# report.tsv provides high level statistics.
10#
11# Should be run from the parent of the tools directory. The command line is:
12#   [$env_vars] tools/run_blob_bench.sh
13#
14# This runs the following sequence of BlobDB performance tests:
15#   phase 1) write-only - bulkload+compact, overwrite+waitforcompaction
16#   phase 2) read-write - readwhilewriting, fwdrangewhilewriting
17#   phase 3) read-only - readrandom, fwdrange
18#
19
20# Exit Codes
21EXIT_INVALID_ARGS=1
22
23# Size constants
24K=1024
25M=$((1024 * K))
26G=$((1024 * M))
27T=$((1024 * G))
28
29function display_usage() {
30  echo "usage: run_blob_bench.sh [--help]"
31  echo ""
32  echo "Runs the following sequence of BlobDB benchmark tests using tools/benchmark.sh:"
33  echo -e "\tPhase 1: write-only tests: bulkload+compact, overwrite+waitforcompaction"
34  echo -e "\tPhase 2: read-write tests: readwhilewriting, fwdrangewhilewriting"
35  echo -e "\tPhase 3: read-only tests: readrandom, fwdrange"
36  echo ""
37  echo "Environment Variables:"
38  echo -e "\tJOB_ID\t\t\t\tIdentifier for the benchmark job, will appear in the results (default: empty)"
39  echo -e "\tDB_DIR\t\t\t\tPath for the RocksDB data directory (mandatory)"
40  echo -e "\tWAL_DIR\t\t\t\tPath for the RocksDB WAL directory (mandatory)"
41  echo -e "\tOUTPUT_DIR\t\t\tPath for the benchmark results (mandatory)"
42  echo -e "\tNUM_THREADS\t\t\tNumber of threads (default: 16)"
43  echo -e "\tCOMPRESSION_TYPE\t\tCompression type for the SST files (default: lz4)"
44  echo -e "\tDB_SIZE\t\t\t\tRaw (uncompressed) database size (default: 1 TB)"
45  echo -e "\tVALUE_SIZE\t\t\tValue size (default: 1 KB)"
46  echo -e "\tNUM_KEYS\t\t\tNumber of keys (default: raw database size divided by value size)"
47  echo -e "\tDURATION\t\t\tIndividual duration for read-write/read-only tests in seconds (default: 1800)"
48  echo -e "\tWRITE_BUFFER_SIZE\t\tWrite buffer (memtable) size (default: 1 GB)"
49  echo -e "\tENABLE_BLOB_FILES\t\tEnable blob files (default: 1)"
50  echo -e "\tMIN_BLOB_SIZE\t\t\tSize threshold for storing values in blob files (default: 0)"
51  echo -e "\tBLOB_FILE_SIZE\t\t\tBlob file size (default: same as write buffer size)"
52  echo -e "\tBLOB_COMPRESSION_TYPE\t\tCompression type for the blob files (default: lz4)"
53  echo -e "\tENABLE_BLOB_GC\t\t\tEnable blob garbage collection (default: 1)"
54  echo -e "\tBLOB_GC_AGE_CUTOFF\t\tBlob garbage collection age cutoff (default: 0.25)"
55  echo -e "\tBLOB_GC_FORCE_THRESHOLD\t\tThreshold for forcing garbage collection of the oldest blob files (default: 1.0)"
56  echo -e "\tTARGET_FILE_SIZE_BASE\t\tTarget SST file size for compactions (default: write buffer size, scaled down if blob files are enabled)"
57  echo -e "\tMAX_BYTES_FOR_LEVEL_BASE\tMaximum size for the base level (default: 8 * target SST file size)"
58}
59
60if [ $# -ge 1 ]; then
61  display_usage
62
63  if [ "$1" == "--help" ]; then
64    exit
65  else
66    exit $EXIT_INVALID_ARGS
67  fi
68fi
69
70# shellcheck disable=SC2153
71if [ -z "$DB_DIR" ]; then
72  echo "DB_DIR is not defined"
73  exit $EXIT_INVALID_ARGS
74fi
75
76# shellcheck disable=SC2153
77if [ -z "$WAL_DIR" ]; then
78  echo "WAL_DIR is not defined"
79  exit $EXIT_INVALID_ARGS
80fi
81
82# shellcheck disable=SC2153
83if [ -z "$OUTPUT_DIR" ]; then
84  echo "OUTPUT_DIR is not defined"
85  exit $EXIT_INVALID_ARGS
86fi
87
88# shellcheck disable=SC2153
89job_id=$JOB_ID
90
91db_dir=$DB_DIR
92wal_dir=$WAL_DIR
93output_dir=$OUTPUT_DIR
94
95num_threads=${NUM_THREADS:-16}
96
97compression_type=${COMPRESSION_TYPE:-lz4}
98
99db_size=${DB_SIZE:-$((1 * T))}
100value_size=${VALUE_SIZE:-$((1 * K))}
101num_keys=${NUM_KEYS:-$((db_size / value_size))}
102
103duration=${DURATION:-1800}
104
105write_buffer_size=${WRITE_BUFFER_SIZE:-$((1 * G))}
106
107enable_blob_files=${ENABLE_BLOB_FILES:-1}
108min_blob_size=${MIN_BLOB_SIZE:-0}
109blob_file_size=${BLOB_FILE_SIZE:-$write_buffer_size}
110blob_compression_type=${BLOB_COMPRESSION_TYPE:-lz4}
111enable_blob_garbage_collection=${ENABLE_BLOB_GC:-1}
112blob_garbage_collection_age_cutoff=${BLOB_GC_AGE_CUTOFF:-0.25}
113blob_garbage_collection_force_threshold=${BLOB_GC_FORCE_THRESHOLD:-1.0}
114
115if [ "$enable_blob_files" == "1" ]; then
116  target_file_size_base=${TARGET_FILE_SIZE_BASE:-$((32 * write_buffer_size / value_size))}
117else
118  target_file_size_base=${TARGET_FILE_SIZE_BASE:-$write_buffer_size}
119fi
120
121max_bytes_for_level_base=${MAX_BYTES_FOR_LEVEL_BASE:-$((8 * target_file_size_base))}
122
123echo "======================== Benchmark setup ========================"
124echo -e "Job ID:\t\t\t\t\t$job_id"
125echo -e "Data directory:\t\t\t\t$db_dir"
126echo -e "WAL directory:\t\t\t\t$wal_dir"
127echo -e "Output directory:\t\t\t$output_dir"
128echo -e "Number of threads:\t\t\t$num_threads"
129echo -e "Compression type for SST files:\t\t$compression_type"
130echo -e "Raw database size:\t\t\t$db_size"
131echo -e "Value size:\t\t\t\t$value_size"
132echo -e "Number of keys:\t\t\t\t$num_keys"
133echo -e "Duration of read-write/read-only tests:\t$duration"
134echo -e "Write buffer size:\t\t\t$write_buffer_size"
135echo -e "Blob files enabled:\t\t\t$enable_blob_files"
136echo -e "Blob size threshold:\t\t\t$min_blob_size"
137echo -e "Blob file size:\t\t\t\t$blob_file_size"
138echo -e "Compression type for blob files:\t$blob_compression_type"
139echo -e "Blob GC enabled:\t\t\t$enable_blob_garbage_collection"
140echo -e "Blob GC age cutoff:\t\t\t$blob_garbage_collection_age_cutoff"
141echo -e "Blob GC force threshold:\t\t$blob_garbage_collection_force_threshold"
142echo -e "Target SST file size:\t\t\t$target_file_size_base"
143echo -e "Maximum size of base level:\t\t$max_bytes_for_level_base"
144echo "================================================================="
145
146rm -rf "$db_dir"
147rm -rf "$wal_dir"
148rm -rf "$output_dir"
149
150ENV_VARS="\
151  JOB_ID=$job_id \
152  DB_DIR=$db_dir \
153  WAL_DIR=$wal_dir \
154  OUTPUT_DIR=$output_dir \
155  NUM_THREADS=$num_threads \
156  COMPRESSION_TYPE=$compression_type \
157  VALUE_SIZE=$value_size \
158  NUM_KEYS=$num_keys"
159
160ENV_VARS_D="$ENV_VARS DURATION=$duration"
161
162PARAMS="\
163  --enable_blob_files=$enable_blob_files \
164  --min_blob_size=$min_blob_size \
165  --blob_file_size=$blob_file_size \
166  --blob_compression_type=$blob_compression_type \
167  --write_buffer_size=$write_buffer_size \
168  --target_file_size_base=$target_file_size_base \
169  --max_bytes_for_level_base=$max_bytes_for_level_base"
170
171PARAMS_GC="$PARAMS \
172  --enable_blob_garbage_collection=$enable_blob_garbage_collection \
173  --blob_garbage_collection_age_cutoff=$blob_garbage_collection_age_cutoff \
174  --blob_garbage_collection_force_threshold=$blob_garbage_collection_force_threshold"
175
176# bulk load (using fillrandom) + compact
177env -u DURATION -S "$ENV_VARS" ./tools/benchmark.sh bulkload "$PARAMS"
178
179# overwrite + waitforcompaction
180env -u DURATION -S "$ENV_VARS" ./tools/benchmark.sh overwrite "$PARAMS_GC"
181
182# readwhilewriting
183env -S "$ENV_VARS_D" ./tools/benchmark.sh readwhilewriting "$PARAMS_GC"
184
185# fwdrangewhilewriting
186env -S "$ENV_VARS_D" ./tools/benchmark.sh fwdrangewhilewriting "$PARAMS_GC"
187
188# readrandom
189env -S "$ENV_VARS_D" ./tools/benchmark.sh readrandom "$PARAMS_GC"
190
191# fwdrange
192env -S "$ENV_VARS_D" ./tools/benchmark.sh fwdrange "$PARAMS_GC"
193
194# save logs to output directory
195cp "$db_dir"/LOG* "$output_dir/"
196