1#!/usr/bin/env bash 2# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3# 4# BlobDB benchmark script 5# 6# REQUIRES: benchmark.sh is in the tools subdirectory 7# 8# After the execution of this script, log files are available in $output_dir. 9# report.tsv provides high level statistics. 10# 11# Should be run from the parent of the tools directory. The command line is: 12# [$env_vars] tools/run_blob_bench.sh 13# 14# This runs the following sequence of BlobDB performance tests: 15# phase 1) write-only - bulkload+compact, overwrite+waitforcompaction 16# phase 2) read-write - readwhilewriting, fwdrangewhilewriting 17# phase 3) read-only - readrandom, fwdrange 18# 19 20# Exit Codes 21EXIT_INVALID_ARGS=1 22 23# Size constants 24K=1024 25M=$((1024 * K)) 26G=$((1024 * M)) 27T=$((1024 * G)) 28 29function display_usage() { 30 echo "usage: run_blob_bench.sh [--help]" 31 echo "" 32 echo "Runs the following sequence of BlobDB benchmark tests using tools/benchmark.sh:" 33 echo -e "\tPhase 1: write-only tests: bulkload+compact, overwrite+waitforcompaction" 34 echo -e "\tPhase 2: read-write tests: readwhilewriting, fwdrangewhilewriting" 35 echo -e "\tPhase 3: read-only tests: readrandom, fwdrange" 36 echo "" 37 echo "Environment Variables:" 38 echo -e "\tJOB_ID\t\t\t\tIdentifier for the benchmark job, will appear in the results (default: empty)" 39 echo -e "\tDB_DIR\t\t\t\tPath for the RocksDB data directory (mandatory)" 40 echo -e "\tWAL_DIR\t\t\t\tPath for the RocksDB WAL directory (mandatory)" 41 echo -e "\tOUTPUT_DIR\t\t\tPath for the benchmark results (mandatory)" 42 echo -e "\tNUM_THREADS\t\t\tNumber of threads (default: 16)" 43 echo -e "\tCOMPRESSION_TYPE\t\tCompression type for the SST files (default: lz4)" 44 echo -e "\tDB_SIZE\t\t\t\tRaw (uncompressed) database size (default: 1 TB)" 45 echo -e "\tVALUE_SIZE\t\t\tValue size (default: 1 KB)" 46 echo -e "\tNUM_KEYS\t\t\tNumber of keys (default: raw database size divided by value size)" 47 echo -e "\tDURATION\t\t\tIndividual duration for read-write/read-only tests in seconds (default: 1800)" 48 echo -e "\tWRITE_BUFFER_SIZE\t\tWrite buffer (memtable) size (default: 1 GB)" 49 echo -e "\tENABLE_BLOB_FILES\t\tEnable blob files (default: 1)" 50 echo -e "\tMIN_BLOB_SIZE\t\t\tSize threshold for storing values in blob files (default: 0)" 51 echo -e "\tBLOB_FILE_SIZE\t\t\tBlob file size (default: same as write buffer size)" 52 echo -e "\tBLOB_COMPRESSION_TYPE\t\tCompression type for the blob files (default: lz4)" 53 echo -e "\tENABLE_BLOB_GC\t\t\tEnable blob garbage collection (default: 1)" 54 echo -e "\tBLOB_GC_AGE_CUTOFF\t\tBlob garbage collection age cutoff (default: 0.25)" 55 echo -e "\tBLOB_GC_FORCE_THRESHOLD\t\tThreshold for forcing garbage collection of the oldest blob files (default: 1.0)" 56 echo -e "\tTARGET_FILE_SIZE_BASE\t\tTarget SST file size for compactions (default: write buffer size, scaled down if blob files are enabled)" 57 echo -e "\tMAX_BYTES_FOR_LEVEL_BASE\tMaximum size for the base level (default: 8 * target SST file size)" 58} 59 60if [ $# -ge 1 ]; then 61 display_usage 62 63 if [ "$1" == "--help" ]; then 64 exit 65 else 66 exit $EXIT_INVALID_ARGS 67 fi 68fi 69 70# shellcheck disable=SC2153 71if [ -z "$DB_DIR" ]; then 72 echo "DB_DIR is not defined" 73 exit $EXIT_INVALID_ARGS 74fi 75 76# shellcheck disable=SC2153 77if [ -z "$WAL_DIR" ]; then 78 echo "WAL_DIR is not defined" 79 exit $EXIT_INVALID_ARGS 80fi 81 82# shellcheck disable=SC2153 83if [ -z "$OUTPUT_DIR" ]; then 84 echo "OUTPUT_DIR is not defined" 85 exit $EXIT_INVALID_ARGS 86fi 87 88# shellcheck disable=SC2153 89job_id=$JOB_ID 90 91db_dir=$DB_DIR 92wal_dir=$WAL_DIR 93output_dir=$OUTPUT_DIR 94 95num_threads=${NUM_THREADS:-16} 96 97compression_type=${COMPRESSION_TYPE:-lz4} 98 99db_size=${DB_SIZE:-$((1 * T))} 100value_size=${VALUE_SIZE:-$((1 * K))} 101num_keys=${NUM_KEYS:-$((db_size / value_size))} 102 103duration=${DURATION:-1800} 104 105write_buffer_size=${WRITE_BUFFER_SIZE:-$((1 * G))} 106 107enable_blob_files=${ENABLE_BLOB_FILES:-1} 108min_blob_size=${MIN_BLOB_SIZE:-0} 109blob_file_size=${BLOB_FILE_SIZE:-$write_buffer_size} 110blob_compression_type=${BLOB_COMPRESSION_TYPE:-lz4} 111enable_blob_garbage_collection=${ENABLE_BLOB_GC:-1} 112blob_garbage_collection_age_cutoff=${BLOB_GC_AGE_CUTOFF:-0.25} 113blob_garbage_collection_force_threshold=${BLOB_GC_FORCE_THRESHOLD:-1.0} 114 115if [ "$enable_blob_files" == "1" ]; then 116 target_file_size_base=${TARGET_FILE_SIZE_BASE:-$((32 * write_buffer_size / value_size))} 117else 118 target_file_size_base=${TARGET_FILE_SIZE_BASE:-$write_buffer_size} 119fi 120 121max_bytes_for_level_base=${MAX_BYTES_FOR_LEVEL_BASE:-$((8 * target_file_size_base))} 122 123echo "======================== Benchmark setup ========================" 124echo -e "Job ID:\t\t\t\t\t$job_id" 125echo -e "Data directory:\t\t\t\t$db_dir" 126echo -e "WAL directory:\t\t\t\t$wal_dir" 127echo -e "Output directory:\t\t\t$output_dir" 128echo -e "Number of threads:\t\t\t$num_threads" 129echo -e "Compression type for SST files:\t\t$compression_type" 130echo -e "Raw database size:\t\t\t$db_size" 131echo -e "Value size:\t\t\t\t$value_size" 132echo -e "Number of keys:\t\t\t\t$num_keys" 133echo -e "Duration of read-write/read-only tests:\t$duration" 134echo -e "Write buffer size:\t\t\t$write_buffer_size" 135echo -e "Blob files enabled:\t\t\t$enable_blob_files" 136echo -e "Blob size threshold:\t\t\t$min_blob_size" 137echo -e "Blob file size:\t\t\t\t$blob_file_size" 138echo -e "Compression type for blob files:\t$blob_compression_type" 139echo -e "Blob GC enabled:\t\t\t$enable_blob_garbage_collection" 140echo -e "Blob GC age cutoff:\t\t\t$blob_garbage_collection_age_cutoff" 141echo -e "Blob GC force threshold:\t\t$blob_garbage_collection_force_threshold" 142echo -e "Target SST file size:\t\t\t$target_file_size_base" 143echo -e "Maximum size of base level:\t\t$max_bytes_for_level_base" 144echo "=================================================================" 145 146rm -rf "$db_dir" 147rm -rf "$wal_dir" 148rm -rf "$output_dir" 149 150ENV_VARS="\ 151 JOB_ID=$job_id \ 152 DB_DIR=$db_dir \ 153 WAL_DIR=$wal_dir \ 154 OUTPUT_DIR=$output_dir \ 155 NUM_THREADS=$num_threads \ 156 COMPRESSION_TYPE=$compression_type \ 157 VALUE_SIZE=$value_size \ 158 NUM_KEYS=$num_keys" 159 160ENV_VARS_D="$ENV_VARS DURATION=$duration" 161 162PARAMS="\ 163 --enable_blob_files=$enable_blob_files \ 164 --min_blob_size=$min_blob_size \ 165 --blob_file_size=$blob_file_size \ 166 --blob_compression_type=$blob_compression_type \ 167 --write_buffer_size=$write_buffer_size \ 168 --target_file_size_base=$target_file_size_base \ 169 --max_bytes_for_level_base=$max_bytes_for_level_base" 170 171PARAMS_GC="$PARAMS \ 172 --enable_blob_garbage_collection=$enable_blob_garbage_collection \ 173 --blob_garbage_collection_age_cutoff=$blob_garbage_collection_age_cutoff \ 174 --blob_garbage_collection_force_threshold=$blob_garbage_collection_force_threshold" 175 176# bulk load (using fillrandom) + compact 177env -u DURATION -S "$ENV_VARS" ./tools/benchmark.sh bulkload "$PARAMS" 178 179# overwrite + waitforcompaction 180env -u DURATION -S "$ENV_VARS" ./tools/benchmark.sh overwrite "$PARAMS_GC" 181 182# readwhilewriting 183env -S "$ENV_VARS_D" ./tools/benchmark.sh readwhilewriting "$PARAMS_GC" 184 185# fwdrangewhilewriting 186env -S "$ENV_VARS_D" ./tools/benchmark.sh fwdrangewhilewriting "$PARAMS_GC" 187 188# readrandom 189env -S "$ENV_VARS_D" ./tools/benchmark.sh readrandom "$PARAMS_GC" 190 191# fwdrange 192env -S "$ENV_VARS_D" ./tools/benchmark.sh fwdrange "$PARAMS_GC" 193 194# save logs to output directory 195cp "$db_dir"/LOG* "$output_dir/" 196