db_bench_tree_db.cc 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531
  1. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  4. #include <kcpolydb.h>
  5. #include <cstdio>
  6. #include <cstdlib>
  7. #include "util/histogram.h"
  8. #include "util/random.h"
  9. #include "util/testutil.h"
  10. // Comma-separated list of operations to run in the specified order
  11. // Actual benchmarks:
  12. //
  13. // fillseq -- write N values in sequential key order in async mode
  14. // fillrandom -- write N values in random key order in async mode
  15. // overwrite -- overwrite N values in random key order in async mode
  16. // fillseqsync -- write N/100 values in sequential key order in sync mode
  17. // fillrandsync -- write N/100 values in random key order in sync mode
  18. // fillrand100K -- write N/1000 100K values in random order in async mode
  19. // fillseq100K -- write N/1000 100K values in seq order in async mode
  20. // readseq -- read N times sequentially
  21. // readseq100K -- read N/1000 100K values in sequential order in async mode
  22. // readrand100K -- read N/1000 100K values in sequential order in async mode
  23. // readrandom -- read N times in random order
  24. static const char* FLAGS_benchmarks =
  25. "fillseq,"
  26. "fillseqsync,"
  27. "fillrandsync,"
  28. "fillrandom,"
  29. "overwrite,"
  30. "readrandom,"
  31. "readseq,"
  32. "fillrand100K,"
  33. "fillseq100K,"
  34. "readseq100K,"
  35. "readrand100K,";
  36. // Number of key/values to place in database
  37. static int FLAGS_num = 1000000;
  38. // Number of read operations to do. If negative, do FLAGS_num reads.
  39. static int FLAGS_reads = -1;
  40. // Size of each value
  41. static int FLAGS_value_size = 100;
  42. // Arrange to generate values that shrink to this fraction of
  43. // their original size after compression
  44. static double FLAGS_compression_ratio = 0.5;
  45. // Print histogram of operation timings
  46. static bool FLAGS_histogram = false;
  47. // Cache size. Default 4 MB
  48. static int FLAGS_cache_size = 4194304;
  49. // Page size. Default 1 KB
  50. static int FLAGS_page_size = 1024;
  51. // If true, do not destroy the existing database. If you set this
  52. // flag and also specify a benchmark that wants a fresh database, that
  53. // benchmark will fail.
  54. static bool FLAGS_use_existing_db = false;
  55. // Compression flag. If true, compression is on. If false, compression
  56. // is off.
  57. static bool FLAGS_compression = true;
  58. // Use the db with the following name.
  59. static const char* FLAGS_db = nullptr;
  60. inline static void DBSynchronize(kyotocabinet::TreeDB* db_) {
  61. // Synchronize will flush writes to disk
  62. if (!db_->synchronize()) {
  63. std::fprintf(stderr, "synchronize error: %s\n", db_->error().name());
  64. }
  65. }
  66. namespace leveldb {
  67. // Helper for quickly generating random data.
  68. namespace {
  69. class RandomGenerator {
  70. private:
  71. std::string data_;
  72. int pos_;
  73. public:
  74. RandomGenerator() {
  75. // We use a limited amount of data over and over again and ensure
  76. // that it is larger than the compression window (32KB), and also
  77. // large enough to serve all typical value sizes we want to write.
  78. Random rnd(301);
  79. std::string piece;
  80. while (data_.size() < 1048576) {
  81. // Add a short fragment that is as compressible as specified
  82. // by FLAGS_compression_ratio.
  83. test::CompressibleString(&rnd, FLAGS_compression_ratio, 100, &piece);
  84. data_.append(piece);
  85. }
  86. pos_ = 0;
  87. }
  88. Slice Generate(int len) {
  89. if (pos_ + len > data_.size()) {
  90. pos_ = 0;
  91. assert(len < data_.size());
  92. }
  93. pos_ += len;
  94. return Slice(data_.data() + pos_ - len, len);
  95. }
  96. };
  97. static Slice TrimSpace(Slice s) {
  98. int start = 0;
  99. while (start < s.size() && isspace(s[start])) {
  100. start++;
  101. }
  102. int limit = s.size();
  103. while (limit > start && isspace(s[limit - 1])) {
  104. limit--;
  105. }
  106. return Slice(s.data() + start, limit - start);
  107. }
  108. } // namespace
  109. class Benchmark {
  110. private:
  111. kyotocabinet::TreeDB* db_;
  112. int db_num_;
  113. int num_;
  114. int reads_;
  115. double start_;
  116. double last_op_finish_;
  117. int64_t bytes_;
  118. std::string message_;
  119. Histogram hist_;
  120. RandomGenerator gen_;
  121. Random rand_;
  122. kyotocabinet::LZOCompressor<kyotocabinet::LZO::RAW> comp_;
  123. // State kept for progress messages
  124. int done_;
  125. int next_report_; // When to report next
  126. void PrintHeader() {
  127. const int kKeySize = 16;
  128. PrintEnvironment();
  129. std::fprintf(stdout, "Keys: %d bytes each\n", kKeySize);
  130. std::fprintf(
  131. stdout, "Values: %d bytes each (%d bytes after compression)\n",
  132. FLAGS_value_size,
  133. static_cast<int>(FLAGS_value_size * FLAGS_compression_ratio + 0.5));
  134. std::fprintf(stdout, "Entries: %d\n", num_);
  135. std::fprintf(stdout, "RawSize: %.1f MB (estimated)\n",
  136. ((static_cast<int64_t>(kKeySize + FLAGS_value_size) * num_) /
  137. 1048576.0));
  138. std::fprintf(
  139. stdout, "FileSize: %.1f MB (estimated)\n",
  140. (((kKeySize + FLAGS_value_size * FLAGS_compression_ratio) * num_) /
  141. 1048576.0));
  142. PrintWarnings();
  143. std::fprintf(stdout, "------------------------------------------------\n");
  144. }
  145. void PrintWarnings() {
  146. #if defined(__GNUC__) && !defined(__OPTIMIZE__)
  147. std::fprintf(
  148. stdout,
  149. "WARNING: Optimization is disabled: benchmarks unnecessarily slow\n");
  150. #endif
  151. #ifndef NDEBUG
  152. std::fprintf(
  153. stdout,
  154. "WARNING: Assertions are enabled; benchmarks unnecessarily slow\n");
  155. #endif
  156. }
  157. void PrintEnvironment() {
  158. std::fprintf(
  159. stderr, "Kyoto Cabinet: version %s, lib ver %d, lib rev %d\n",
  160. kyotocabinet::VERSION, kyotocabinet::LIBVER, kyotocabinet::LIBREV);
  161. #if defined(__linux)
  162. time_t now = time(nullptr);
  163. std::fprintf(stderr, "Date: %s",
  164. ctime(&now)); // ctime() adds newline
  165. FILE* cpuinfo = std::fopen("/proc/cpuinfo", "r");
  166. if (cpuinfo != nullptr) {
  167. char line[1000];
  168. int num_cpus = 0;
  169. std::string cpu_type;
  170. std::string cache_size;
  171. while (fgets(line, sizeof(line), cpuinfo) != nullptr) {
  172. const char* sep = strchr(line, ':');
  173. if (sep == nullptr) {
  174. continue;
  175. }
  176. Slice key = TrimSpace(Slice(line, sep - 1 - line));
  177. Slice val = TrimSpace(Slice(sep + 1));
  178. if (key == "model name") {
  179. ++num_cpus;
  180. cpu_type = val.ToString();
  181. } else if (key == "cache size") {
  182. cache_size = val.ToString();
  183. }
  184. }
  185. std::fclose(cpuinfo);
  186. std::fprintf(stderr, "CPU: %d * %s\n", num_cpus,
  187. cpu_type.c_str());
  188. std::fprintf(stderr, "CPUCache: %s\n", cache_size.c_str());
  189. }
  190. #endif
  191. }
  192. void Start() {
  193. start_ = Env::Default()->NowMicros() * 1e-6;
  194. bytes_ = 0;
  195. message_.clear();
  196. last_op_finish_ = start_;
  197. hist_.Clear();
  198. done_ = 0;
  199. next_report_ = 100;
  200. }
  201. void FinishedSingleOp() {
  202. if (FLAGS_histogram) {
  203. double now = Env::Default()->NowMicros() * 1e-6;
  204. double micros = (now - last_op_finish_) * 1e6;
  205. hist_.Add(micros);
  206. if (micros > 20000) {
  207. std::fprintf(stderr, "long op: %.1f micros%30s\r", micros, "");
  208. std::fflush(stderr);
  209. }
  210. last_op_finish_ = now;
  211. }
  212. done_++;
  213. if (done_ >= next_report_) {
  214. if (next_report_ < 1000)
  215. next_report_ += 100;
  216. else if (next_report_ < 5000)
  217. next_report_ += 500;
  218. else if (next_report_ < 10000)
  219. next_report_ += 1000;
  220. else if (next_report_ < 50000)
  221. next_report_ += 5000;
  222. else if (next_report_ < 100000)
  223. next_report_ += 10000;
  224. else if (next_report_ < 500000)
  225. next_report_ += 50000;
  226. else
  227. next_report_ += 100000;
  228. std::fprintf(stderr, "... finished %d ops%30s\r", done_, "");
  229. std::fflush(stderr);
  230. }
  231. }
  232. void Stop(const Slice& name) {
  233. double finish = Env::Default()->NowMicros() * 1e-6;
  234. // Pretend at least one op was done in case we are running a benchmark
  235. // that does not call FinishedSingleOp().
  236. if (done_ < 1) done_ = 1;
  237. if (bytes_ > 0) {
  238. char rate[100];
  239. std::snprintf(rate, sizeof(rate), "%6.1f MB/s",
  240. (bytes_ / 1048576.0) / (finish - start_));
  241. if (!message_.empty()) {
  242. message_ = std::string(rate) + " " + message_;
  243. } else {
  244. message_ = rate;
  245. }
  246. }
  247. std::fprintf(stdout, "%-12s : %11.3f micros/op;%s%s\n",
  248. name.ToString().c_str(), (finish - start_) * 1e6 / done_,
  249. (message_.empty() ? "" : " "), message_.c_str());
  250. if (FLAGS_histogram) {
  251. std::fprintf(stdout, "Microseconds per op:\n%s\n",
  252. hist_.ToString().c_str());
  253. }
  254. std::fflush(stdout);
  255. }
  256. public:
  257. enum Order { SEQUENTIAL, RANDOM };
  258. enum DBState { FRESH, EXISTING };
  259. Benchmark()
  260. : db_(nullptr),
  261. num_(FLAGS_num),
  262. reads_(FLAGS_reads < 0 ? FLAGS_num : FLAGS_reads),
  263. bytes_(0),
  264. rand_(301) {
  265. std::vector<std::string> files;
  266. std::string test_dir;
  267. Env::Default()->GetTestDirectory(&test_dir);
  268. Env::Default()->GetChildren(test_dir.c_str(), &files);
  269. if (!FLAGS_use_existing_db) {
  270. for (int i = 0; i < files.size(); i++) {
  271. if (Slice(files[i]).starts_with("dbbench_polyDB")) {
  272. std::string file_name(test_dir);
  273. file_name += "/";
  274. file_name += files[i];
  275. Env::Default()->RemoveFile(file_name.c_str());
  276. }
  277. }
  278. }
  279. }
  280. ~Benchmark() {
  281. if (!db_->close()) {
  282. std::fprintf(stderr, "close error: %s\n", db_->error().name());
  283. }
  284. }
  285. void Run() {
  286. PrintHeader();
  287. Open(false);
  288. const char* benchmarks = FLAGS_benchmarks;
  289. while (benchmarks != nullptr) {
  290. const char* sep = strchr(benchmarks, ',');
  291. Slice name;
  292. if (sep == nullptr) {
  293. name = benchmarks;
  294. benchmarks = nullptr;
  295. } else {
  296. name = Slice(benchmarks, sep - benchmarks);
  297. benchmarks = sep + 1;
  298. }
  299. Start();
  300. bool known = true;
  301. bool write_sync = false;
  302. if (name == Slice("fillseq")) {
  303. Write(write_sync, SEQUENTIAL, FRESH, num_, FLAGS_value_size, 1);
  304. DBSynchronize(db_);
  305. } else if (name == Slice("fillrandom")) {
  306. Write(write_sync, RANDOM, FRESH, num_, FLAGS_value_size, 1);
  307. DBSynchronize(db_);
  308. } else if (name == Slice("overwrite")) {
  309. Write(write_sync, RANDOM, EXISTING, num_, FLAGS_value_size, 1);
  310. DBSynchronize(db_);
  311. } else if (name == Slice("fillrandsync")) {
  312. write_sync = true;
  313. Write(write_sync, RANDOM, FRESH, num_ / 100, FLAGS_value_size, 1);
  314. DBSynchronize(db_);
  315. } else if (name == Slice("fillseqsync")) {
  316. write_sync = true;
  317. Write(write_sync, SEQUENTIAL, FRESH, num_ / 100, FLAGS_value_size, 1);
  318. DBSynchronize(db_);
  319. } else if (name == Slice("fillrand100K")) {
  320. Write(write_sync, RANDOM, FRESH, num_ / 1000, 100 * 1000, 1);
  321. DBSynchronize(db_);
  322. } else if (name == Slice("fillseq100K")) {
  323. Write(write_sync, SEQUENTIAL, FRESH, num_ / 1000, 100 * 1000, 1);
  324. DBSynchronize(db_);
  325. } else if (name == Slice("readseq")) {
  326. ReadSequential();
  327. } else if (name == Slice("readrandom")) {
  328. ReadRandom();
  329. } else if (name == Slice("readrand100K")) {
  330. int n = reads_;
  331. reads_ /= 1000;
  332. ReadRandom();
  333. reads_ = n;
  334. } else if (name == Slice("readseq100K")) {
  335. int n = reads_;
  336. reads_ /= 1000;
  337. ReadSequential();
  338. reads_ = n;
  339. } else {
  340. known = false;
  341. if (name != Slice()) { // No error message for empty name
  342. std::fprintf(stderr, "unknown benchmark '%s'\n",
  343. name.ToString().c_str());
  344. }
  345. }
  346. if (known) {
  347. Stop(name);
  348. }
  349. }
  350. }
  351. private:
  352. void Open(bool sync) {
  353. assert(db_ == nullptr);
  354. // Initialize db_
  355. db_ = new kyotocabinet::TreeDB();
  356. char file_name[100];
  357. db_num_++;
  358. std::string test_dir;
  359. Env::Default()->GetTestDirectory(&test_dir);
  360. std::snprintf(file_name, sizeof(file_name), "%s/dbbench_polyDB-%d.kct",
  361. test_dir.c_str(), db_num_);
  362. // Create tuning options and open the database
  363. int open_options =
  364. kyotocabinet::PolyDB::OWRITER | kyotocabinet::PolyDB::OCREATE;
  365. int tune_options =
  366. kyotocabinet::TreeDB::TSMALL | kyotocabinet::TreeDB::TLINEAR;
  367. if (FLAGS_compression) {
  368. tune_options |= kyotocabinet::TreeDB::TCOMPRESS;
  369. db_->tune_compressor(&comp_);
  370. }
  371. db_->tune_options(tune_options);
  372. db_->tune_page_cache(FLAGS_cache_size);
  373. db_->tune_page(FLAGS_page_size);
  374. db_->tune_map(256LL << 20);
  375. if (sync) {
  376. open_options |= kyotocabinet::PolyDB::OAUTOSYNC;
  377. }
  378. if (!db_->open(file_name, open_options)) {
  379. std::fprintf(stderr, "open error: %s\n", db_->error().name());
  380. }
  381. }
  382. void Write(bool sync, Order order, DBState state, int num_entries,
  383. int value_size, int entries_per_batch) {
  384. // Create new database if state == FRESH
  385. if (state == FRESH) {
  386. if (FLAGS_use_existing_db) {
  387. message_ = "skipping (--use_existing_db is true)";
  388. return;
  389. }
  390. delete db_;
  391. db_ = nullptr;
  392. Open(sync);
  393. Start(); // Do not count time taken to destroy/open
  394. }
  395. if (num_entries != num_) {
  396. char msg[100];
  397. std::snprintf(msg, sizeof(msg), "(%d ops)", num_entries);
  398. message_ = msg;
  399. }
  400. // Write to database
  401. for (int i = 0; i < num_entries; i++) {
  402. const int k = (order == SEQUENTIAL) ? i : (rand_.Next() % num_entries);
  403. char key[100];
  404. std::snprintf(key, sizeof(key), "%016d", k);
  405. bytes_ += value_size + strlen(key);
  406. std::string cpp_key = key;
  407. if (!db_->set(cpp_key, gen_.Generate(value_size).ToString())) {
  408. std::fprintf(stderr, "set error: %s\n", db_->error().name());
  409. }
  410. FinishedSingleOp();
  411. }
  412. }
  413. void ReadSequential() {
  414. kyotocabinet::DB::Cursor* cur = db_->cursor();
  415. cur->jump();
  416. std::string ckey, cvalue;
  417. while (cur->get(&ckey, &cvalue, true)) {
  418. bytes_ += ckey.size() + cvalue.size();
  419. FinishedSingleOp();
  420. }
  421. delete cur;
  422. }
  423. void ReadRandom() {
  424. std::string value;
  425. for (int i = 0; i < reads_; i++) {
  426. char key[100];
  427. const int k = rand_.Next() % reads_;
  428. std::snprintf(key, sizeof(key), "%016d", k);
  429. db_->get(key, &value);
  430. FinishedSingleOp();
  431. }
  432. }
  433. };
  434. } // namespace leveldb
  435. int main(int argc, char** argv) {
  436. std::string default_db_path;
  437. for (int i = 1; i < argc; i++) {
  438. double d;
  439. int n;
  440. char junk;
  441. if (leveldb::Slice(argv[i]).starts_with("--benchmarks=")) {
  442. FLAGS_benchmarks = argv[i] + strlen("--benchmarks=");
  443. } else if (sscanf(argv[i], "--compression_ratio=%lf%c", &d, &junk) == 1) {
  444. FLAGS_compression_ratio = d;
  445. } else if (sscanf(argv[i], "--histogram=%d%c", &n, &junk) == 1 &&
  446. (n == 0 || n == 1)) {
  447. FLAGS_histogram = n;
  448. } else if (sscanf(argv[i], "--num=%d%c", &n, &junk) == 1) {
  449. FLAGS_num = n;
  450. } else if (sscanf(argv[i], "--reads=%d%c", &n, &junk) == 1) {
  451. FLAGS_reads = n;
  452. } else if (sscanf(argv[i], "--value_size=%d%c", &n, &junk) == 1) {
  453. FLAGS_value_size = n;
  454. } else if (sscanf(argv[i], "--cache_size=%d%c", &n, &junk) == 1) {
  455. FLAGS_cache_size = n;
  456. } else if (sscanf(argv[i], "--page_size=%d%c", &n, &junk) == 1) {
  457. FLAGS_page_size = n;
  458. } else if (sscanf(argv[i], "--compression=%d%c", &n, &junk) == 1 &&
  459. (n == 0 || n == 1)) {
  460. FLAGS_compression = (n == 1) ? true : false;
  461. } else if (strncmp(argv[i], "--db=", 5) == 0) {
  462. FLAGS_db = argv[i] + 5;
  463. } else {
  464. std::fprintf(stderr, "Invalid flag '%s'\n", argv[i]);
  465. std::exit(1);
  466. }
  467. }
  468. // Choose a location for the test database if none given with --db=<path>
  469. if (FLAGS_db == nullptr) {
  470. leveldb::Env::Default()->GetTestDirectory(&default_db_path);
  471. default_db_path += "/dbbench";
  472. FLAGS_db = default_db_path.c_str();
  473. }
  474. leveldb::Benchmark benchmark;
  475. benchmark.Run();
  476. return 0;
  477. }