1 // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 5 #include <kcpolydb.h> 6 7 #include <cstdio> 8 #include <cstdlib> 9 10 #include "util/histogram.h" 11 #include "util/random.h" 12 #include "util/testutil.h" 13 14 // Comma-separated list of operations to run in the specified order 15 // Actual benchmarks: 16 // 17 // fillseq -- write N values in sequential key order in async mode 18 // fillrandom -- write N values in random key order in async mode 19 // overwrite -- overwrite N values in random key order in async mode 20 // fillseqsync -- write N/100 values in sequential key order in sync mode 21 // fillrandsync -- write N/100 values in random key order in sync mode 22 // fillrand100K -- write N/1000 100K values in random order in async mode 23 // fillseq100K -- write N/1000 100K values in seq order in async mode 24 // readseq -- read N times sequentially 25 // readseq100K -- read N/1000 100K values in sequential order in async mode 26 // readrand100K -- read N/1000 100K values in sequential order in async mode 27 // readrandom -- read N times in random order 28 static const char* FLAGS_benchmarks = 29 "fillseq," 30 "fillseqsync," 31 "fillrandsync," 32 "fillrandom," 33 "overwrite," 34 "readrandom," 35 "readseq," 36 "fillrand100K," 37 "fillseq100K," 38 "readseq100K," 39 "readrand100K,"; 40 41 // Number of key/values to place in database 42 static int FLAGS_num = 1000000; 43 44 // Number of read operations to do. If negative, do FLAGS_num reads. 45 static int FLAGS_reads = -1; 46 47 // Size of each value 48 static int FLAGS_value_size = 100; 49 50 // Arrange to generate values that shrink to this fraction of 51 // their original size after compression 52 static double FLAGS_compression_ratio = 0.5; 53 54 // Print histogram of operation timings 55 static bool FLAGS_histogram = false; 56 57 // Cache size. Default 4 MB 58 static int FLAGS_cache_size = 4194304; 59 60 // Page size. Default 1 KB 61 static int FLAGS_page_size = 1024; 62 63 // If true, do not destroy the existing database. If you set this 64 // flag and also specify a benchmark that wants a fresh database, that 65 // benchmark will fail. 66 static bool FLAGS_use_existing_db = false; 67 68 // Compression flag. If true, compression is on. If false, compression 69 // is off. 70 static bool FLAGS_compression = true; 71 72 // Use the db with the following name. 73 static const char* FLAGS_db = nullptr; 74 75 inline static void DBSynchronize(kyotocabinet::TreeDB* db_) { 76 // Synchronize will flush writes to disk 77 if (!db_->synchronize()) { 78 std::fprintf(stderr, "synchronize error: %s\n", db_->error().name()); 79 } 80 } 81 82 namespace leveldb { 83 84 // Helper for quickly generating random data. 85 namespace { 86 class RandomGenerator { 87 private: 88 std::string data_; 89 int pos_; 90 91 public: 92 RandomGenerator() { 93 // We use a limited amount of data over and over again and ensure 94 // that it is larger than the compression window (32KB), and also 95 // large enough to serve all typical value sizes we want to write. 96 Random rnd(301); 97 std::string piece; 98 while (data_.size() < 1048576) { 99 // Add a short fragment that is as compressible as specified 100 // by FLAGS_compression_ratio. 101 test::CompressibleString(&rnd, FLAGS_compression_ratio, 100, &piece); 102 data_.append(piece); 103 } 104 pos_ = 0; 105 } 106 107 Slice Generate(int len) { 108 if (pos_ + len > data_.size()) { 109 pos_ = 0; 110 assert(len < data_.size()); 111 } 112 pos_ += len; 113 return Slice(data_.data() + pos_ - len, len); 114 } 115 }; 116 117 static Slice TrimSpace(Slice s) { 118 int start = 0; 119 while (start < s.size() && isspace(s[start])) { 120 start++; 121 } 122 int limit = s.size(); 123 while (limit > start && isspace(s[limit - 1])) { 124 limit--; 125 } 126 return Slice(s.data() + start, limit - start); 127 } 128 129 } // namespace 130 131 class Benchmark { 132 private: 133 kyotocabinet::TreeDB* db_; 134 int db_num_; 135 int num_; 136 int reads_; 137 double start_; 138 double last_op_finish_; 139 int64_t bytes_; 140 std::string message_; 141 Histogram hist_; 142 RandomGenerator gen_; 143 Random rand_; 144 kyotocabinet::LZOCompressor<kyotocabinet::LZO::RAW> comp_; 145 146 // State kept for progress messages 147 int done_; 148 int next_report_; // When to report next 149 150 void PrintHeader() { 151 const int kKeySize = 16; 152 PrintEnvironment(); 153 std::fprintf(stdout, "Keys: %d bytes each\n", kKeySize); 154 std::fprintf( 155 stdout, "Values: %d bytes each (%d bytes after compression)\n", 156 FLAGS_value_size, 157 static_cast<int>(FLAGS_value_size * FLAGS_compression_ratio + 0.5)); 158 std::fprintf(stdout, "Entries: %d\n", num_); 159 std::fprintf(stdout, "RawSize: %.1f MB (estimated)\n", 160 ((static_cast<int64_t>(kKeySize + FLAGS_value_size) * num_) / 161 1048576.0)); 162 std::fprintf( 163 stdout, "FileSize: %.1f MB (estimated)\n", 164 (((kKeySize + FLAGS_value_size * FLAGS_compression_ratio) * num_) / 165 1048576.0)); 166 PrintWarnings(); 167 std::fprintf(stdout, "------------------------------------------------\n"); 168 } 169 170 void PrintWarnings() { 171 #if defined(__GNUC__) && !defined(__OPTIMIZE__) 172 std::fprintf( 173 stdout, 174 "WARNING: Optimization is disabled: benchmarks unnecessarily slow\n"); 175 #endif 176 #ifndef NDEBUG 177 std::fprintf( 178 stdout, 179 "WARNING: Assertions are enabled; benchmarks unnecessarily slow\n"); 180 #endif 181 } 182 183 void PrintEnvironment() { 184 std::fprintf( 185 stderr, "Kyoto Cabinet: version %s, lib ver %d, lib rev %d\n", 186 kyotocabinet::VERSION, kyotocabinet::LIBVER, kyotocabinet::LIBREV); 187 188 #if defined(__linux) 189 time_t now = time(nullptr); 190 std::fprintf(stderr, "Date: %s", 191 ctime(&now)); // ctime() adds newline 192 193 FILE* cpuinfo = std::fopen("/proc/cpuinfo", "r"); 194 if (cpuinfo != nullptr) { 195 char line[1000]; 196 int num_cpus = 0; 197 std::string cpu_type; 198 std::string cache_size; 199 while (fgets(line, sizeof(line), cpuinfo) != nullptr) { 200 const char* sep = strchr(line, ':'); 201 if (sep == nullptr) { 202 continue; 203 } 204 Slice key = TrimSpace(Slice(line, sep - 1 - line)); 205 Slice val = TrimSpace(Slice(sep + 1)); 206 if (key == "model name") { 207 ++num_cpus; 208 cpu_type = val.ToString(); 209 } else if (key == "cache size") { 210 cache_size = val.ToString(); 211 } 212 } 213 std::fclose(cpuinfo); 214 std::fprintf(stderr, "CPU: %d * %s\n", num_cpus, 215 cpu_type.c_str()); 216 std::fprintf(stderr, "CPUCache: %s\n", cache_size.c_str()); 217 } 218 #endif 219 } 220 221 void Start() { 222 start_ = Env::Default()->NowMicros() * 1e-6; 223 bytes_ = 0; 224 message_.clear(); 225 last_op_finish_ = start_; 226 hist_.Clear(); 227 done_ = 0; 228 next_report_ = 100; 229 } 230 231 void FinishedSingleOp() { 232 if (FLAGS_histogram) { 233 double now = Env::Default()->NowMicros() * 1e-6; 234 double micros = (now - last_op_finish_) * 1e6; 235 hist_.Add(micros); 236 if (micros > 20000) { 237 std::fprintf(stderr, "long op: %.1f micros%30s\r", micros, ""); 238 std::fflush(stderr); 239 } 240 last_op_finish_ = now; 241 } 242 243 done_++; 244 if (done_ >= next_report_) { 245 if (next_report_ < 1000) 246 next_report_ += 100; 247 else if (next_report_ < 5000) 248 next_report_ += 500; 249 else if (next_report_ < 10000) 250 next_report_ += 1000; 251 else if (next_report_ < 50000) 252 next_report_ += 5000; 253 else if (next_report_ < 100000) 254 next_report_ += 10000; 255 else if (next_report_ < 500000) 256 next_report_ += 50000; 257 else 258 next_report_ += 100000; 259 std::fprintf(stderr, "... finished %d ops%30s\r", done_, ""); 260 std::fflush(stderr); 261 } 262 } 263 264 void Stop(const Slice& name) { 265 double finish = Env::Default()->NowMicros() * 1e-6; 266 267 // Pretend at least one op was done in case we are running a benchmark 268 // that does not call FinishedSingleOp(). 269 if (done_ < 1) done_ = 1; 270 271 if (bytes_ > 0) { 272 char rate[100]; 273 std::snprintf(rate, sizeof(rate), "%6.1f MB/s", 274 (bytes_ / 1048576.0) / (finish - start_)); 275 if (!message_.empty()) { 276 message_ = std::string(rate) + " " + message_; 277 } else { 278 message_ = rate; 279 } 280 } 281 282 std::fprintf(stdout, "%-12s : %11.3f micros/op;%s%s\n", 283 name.ToString().c_str(), (finish - start_) * 1e6 / done_, 284 (message_.empty() ? "" : " "), message_.c_str()); 285 if (FLAGS_histogram) { 286 std::fprintf(stdout, "Microseconds per op:\n%s\n", 287 hist_.ToString().c_str()); 288 } 289 std::fflush(stdout); 290 } 291 292 public: 293 enum Order { SEQUENTIAL, RANDOM }; 294 enum DBState { FRESH, EXISTING }; 295 296 Benchmark() 297 : db_(nullptr), 298 num_(FLAGS_num), 299 reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads), 300 bytes_(0), 301 rand_(301) { 302 std::vector<std::string> files; 303 std::string test_dir; 304 Env::Default()->GetTestDirectory(&test_dir); 305 Env::Default()->GetChildren(test_dir.c_str(), &files); 306 if (!FLAGS_use_existing_db) { 307 for (int i = 0; i < files.size(); i++) { 308 if (Slice(files[i]).starts_with("dbbench_polyDB")) { 309 std::string file_name(test_dir); 310 file_name += "/"; 311 file_name += files[i]; 312 Env::Default()->RemoveFile(file_name.c_str()); 313 } 314 } 315 } 316 } 317 318 ~Benchmark() { 319 if (!db_->close()) { 320 std::fprintf(stderr, "close error: %s\n", db_->error().name()); 321 } 322 } 323 324 void Run() { 325 PrintHeader(); 326 Open(false); 327 328 const char* benchmarks = FLAGS_benchmarks; 329 while (benchmarks != nullptr) { 330 const char* sep = strchr(benchmarks, ','); 331 Slice name; 332 if (sep == nullptr) { 333 name = benchmarks; 334 benchmarks = nullptr; 335 } else { 336 name = Slice(benchmarks, sep - benchmarks); 337 benchmarks = sep + 1; 338 } 339 340 Start(); 341 342 bool known = true; 343 bool write_sync = false; 344 if (name == Slice("fillseq")) { 345 Write(write_sync, SEQUENTIAL, FRESH, num_, FLAGS_value_size, 1); 346 DBSynchronize(db_); 347 } else if (name == Slice("fillrandom")) { 348 Write(write_sync, RANDOM, FRESH, num_, FLAGS_value_size, 1); 349 DBSynchronize(db_); 350 } else if (name == Slice("overwrite")) { 351 Write(write_sync, RANDOM, EXISTING, num_, FLAGS_value_size, 1); 352 DBSynchronize(db_); 353 } else if (name == Slice("fillrandsync")) { 354 write_sync = true; 355 Write(write_sync, RANDOM, FRESH, num_ / 100, FLAGS_value_size, 1); 356 DBSynchronize(db_); 357 } else if (name == Slice("fillseqsync")) { 358 write_sync = true; 359 Write(write_sync, SEQUENTIAL, FRESH, num_ / 100, FLAGS_value_size, 1); 360 DBSynchronize(db_); 361 } else if (name == Slice("fillrand100K")) { 362 Write(write_sync, RANDOM, FRESH, num_ / 1000, 100 * 1000, 1); 363 DBSynchronize(db_); 364 } else if (name == Slice("fillseq100K")) { 365 Write(write_sync, SEQUENTIAL, FRESH, num_ / 1000, 100 * 1000, 1); 366 DBSynchronize(db_); 367 } else if (name == Slice("readseq")) { 368 ReadSequential(); 369 } else if (name == Slice("readrandom")) { 370 ReadRandom(); 371 } else if (name == Slice("readrand100K")) { 372 int n = reads_; 373 reads_ /= 1000; 374 ReadRandom(); 375 reads_ = n; 376 } else if (name == Slice("readseq100K")) { 377 int n = reads_; 378 reads_ /= 1000; 379 ReadSequential(); 380 reads_ = n; 381 } else { 382 known = false; 383 if (name != Slice()) { // No error message for empty name 384 std::fprintf(stderr, "unknown benchmark '%s'\n", 385 name.ToString().c_str()); 386 } 387 } 388 if (known) { 389 Stop(name); 390 } 391 } 392 } 393 394 private: 395 void Open(bool sync) { 396 assert(db_ == nullptr); 397 398 // Initialize db_ 399 db_ = new kyotocabinet::TreeDB(); 400 char file_name[100]; 401 db_num_++; 402 std::string test_dir; 403 Env::Default()->GetTestDirectory(&test_dir); 404 std::snprintf(file_name, sizeof(file_name), "%s/dbbench_polyDB-%d.kct", 405 test_dir.c_str(), db_num_); 406 407 // Create tuning options and open the database 408 int open_options = 409 kyotocabinet::PolyDB::OWRITER | kyotocabinet::PolyDB::OCREATE; 410 int tune_options = 411 kyotocabinet::TreeDB::TSMALL | kyotocabinet::TreeDB::TLINEAR; 412 if (FLAGS_compression) { 413 tune_options |= kyotocabinet::TreeDB::TCOMPRESS; 414 db_->tune_compressor(&comp_); 415 } 416 db_->tune_options(tune_options); 417 db_->tune_page_cache(FLAGS_cache_size); 418 db_->tune_page(FLAGS_page_size); 419 db_->tune_map(256LL << 20); 420 if (sync) { 421 open_options |= kyotocabinet::PolyDB::OAUTOSYNC; 422 } 423 if (!db_->open(file_name, open_options)) { 424 std::fprintf(stderr, "open error: %s\n", db_->error().name()); 425 } 426 } 427 428 void Write(bool sync, Order order, DBState state, int num_entries, 429 int value_size, int entries_per_batch) { 430 // Create new database if state == FRESH 431 if (state == FRESH) { 432 if (FLAGS_use_existing_db) { 433 message_ = "skipping (--use_existing_db is true)"; 434 return; 435 } 436 delete db_; 437 db_ = nullptr; 438 Open(sync); 439 Start(); // Do not count time taken to destroy/open 440 } 441 442 if (num_entries != num_) { 443 char msg[100]; 444 std::snprintf(msg, sizeof(msg), "(%d ops)", num_entries); 445 message_ = msg; 446 } 447 448 // Write to database 449 for (int i = 0; i < num_entries; i++) { 450 const int k = (order == SEQUENTIAL) ? i : (rand_.Next() % num_entries); 451 char key[100]; 452 std::snprintf(key, sizeof(key), "%016d", k); 453 bytes_ += value_size + strlen(key); 454 std::string cpp_key = key; 455 if (!db_->set(cpp_key, gen_.Generate(value_size).ToString())) { 456 std::fprintf(stderr, "set error: %s\n", db_->error().name()); 457 } 458 FinishedSingleOp(); 459 } 460 } 461 462 void ReadSequential() { 463 kyotocabinet::DB::Cursor* cur = db_->cursor(); 464 cur->jump(); 465 std::string ckey, cvalue; 466 while (cur->get(&ckey, &cvalue, true)) { 467 bytes_ += ckey.size() + cvalue.size(); 468 FinishedSingleOp(); 469 } 470 delete cur; 471 } 472 473 void ReadRandom() { 474 std::string value; 475 for (int i = 0; i < reads_; i++) { 476 char key[100]; 477 const int k = rand_.Next() % reads_; 478 std::snprintf(key, sizeof(key), "%016d", k); 479 db_->get(key, &value); 480 FinishedSingleOp(); 481 } 482 } 483 }; 484 485 } // namespace leveldb 486 487 int main(int argc, char** argv) { 488 std::string default_db_path; 489 for (int i = 1; i < argc; i++) { 490 double d; 491 int n; 492 char junk; 493 if (leveldb::Slice(argv[i]).starts_with("--benchmarks=")) { 494 FLAGS_benchmarks = argv[i] + strlen("--benchmarks="); 495 } else if (sscanf(argv[i], "--compression_ratio=%lf%c", &d, &junk) == 1) { 496 FLAGS_compression_ratio = d; 497 } else if (sscanf(argv[i], "--histogram=%d%c", &n, &junk) == 1 && 498 (n == 0 || n == 1)) { 499 FLAGS_histogram = n; 500 } else if (sscanf(argv[i], "--num=%d%c", &n, &junk) == 1) { 501 FLAGS_num = n; 502 } else if (sscanf(argv[i], "--reads=%d%c", &n, &junk) == 1) { 503 FLAGS_reads = n; 504 } else if (sscanf(argv[i], "--value_size=%d%c", &n, &junk) == 1) { 505 FLAGS_value_size = n; 506 } else if (sscanf(argv[i], "--cache_size=%d%c", &n, &junk) == 1) { 507 FLAGS_cache_size = n; 508 } else if (sscanf(argv[i], "--page_size=%d%c", &n, &junk) == 1) { 509 FLAGS_page_size = n; 510 } else if (sscanf(argv[i], "--compression=%d%c", &n, &junk) == 1 && 511 (n == 0 || n == 1)) { 512 FLAGS_compression = (n == 1) ? true : false; 513 } else if (strncmp(argv[i], "--db=", 5) == 0) { 514 FLAGS_db = argv[i] + 5; 515 } else { 516 std::fprintf(stderr, "Invalid flag '%s'\n", argv[i]); 517 std::exit(1); 518 } 519 } 520 521 // Choose a location for the test database if none given with --db=<path> 522 if (FLAGS_db == nullptr) { 523 leveldb::Env::Default()->GetTestDirectory(&default_db_path); 524 default_db_path += "/dbbench"; 525 FLAGS_db = default_db_path.c_str(); 526 } 527 528 leveldb::Benchmark benchmark; 529 benchmark.Run(); 530 return 0; 531 } 532