db_test.cc 62 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158
  1. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  4. #include "leveldb/db.h"
  5. #include "leveldb/filter_policy.h"
  6. #include "db/db_impl.h"
  7. #include "db/filename.h"
  8. #include "db/version_set.h"
  9. #include "db/write_batch_internal.h"
  10. #include "leveldb/cache.h"
  11. #include "leveldb/env.h"
  12. #include "leveldb/table.h"
  13. #include "util/hash.h"
  14. #include "util/logging.h"
  15. #include "util/mutexlock.h"
  16. #include "util/testharness.h"
  17. #include "util/testutil.h"
  18. namespace leveldb {
  19. static std::string RandomString(Random* rnd, int len) {
  20. std::string r;
  21. test::RandomString(rnd, len, &r);
  22. return r;
  23. }
  24. namespace {
  25. class AtomicCounter {
  26. private:
  27. port::Mutex mu_;
  28. int count_;
  29. public:
  30. AtomicCounter() : count_(0) { }
  31. void Increment() {
  32. IncrementBy(1);
  33. }
  34. void IncrementBy(int count) {
  35. MutexLock l(&mu_);
  36. count_ += count;
  37. }
  38. int Read() {
  39. MutexLock l(&mu_);
  40. return count_;
  41. }
  42. void Reset() {
  43. MutexLock l(&mu_);
  44. count_ = 0;
  45. }
  46. };
  47. void DelayMilliseconds(int millis) {
  48. Env::Default()->SleepForMicroseconds(millis * 1000);
  49. }
  50. }
  51. // Special Env used to delay background operations
  52. class SpecialEnv : public EnvWrapper {
  53. public:
  54. // sstable/log Sync() calls are blocked while this pointer is non-NULL.
  55. port::AtomicPointer delay_data_sync_;
  56. // sstable/log Sync() calls return an error.
  57. port::AtomicPointer data_sync_error_;
  58. // Simulate no-space errors while this pointer is non-NULL.
  59. port::AtomicPointer no_space_;
  60. // Simulate non-writable file system while this pointer is non-NULL
  61. port::AtomicPointer non_writable_;
  62. // Force sync of manifest files to fail while this pointer is non-NULL
  63. port::AtomicPointer manifest_sync_error_;
  64. // Force write to manifest files to fail while this pointer is non-NULL
  65. port::AtomicPointer manifest_write_error_;
  66. bool count_random_reads_;
  67. AtomicCounter random_read_counter_;
  68. explicit SpecialEnv(Env* base) : EnvWrapper(base) {
  69. delay_data_sync_.Release_Store(NULL);
  70. data_sync_error_.Release_Store(NULL);
  71. no_space_.Release_Store(NULL);
  72. non_writable_.Release_Store(NULL);
  73. count_random_reads_ = false;
  74. manifest_sync_error_.Release_Store(NULL);
  75. manifest_write_error_.Release_Store(NULL);
  76. }
  77. Status NewWritableFile(const std::string& f, WritableFile** r) {
  78. class DataFile : public WritableFile {
  79. private:
  80. SpecialEnv* env_;
  81. WritableFile* base_;
  82. public:
  83. DataFile(SpecialEnv* env, WritableFile* base)
  84. : env_(env),
  85. base_(base) {
  86. }
  87. ~DataFile() { delete base_; }
  88. Status Append(const Slice& data) {
  89. if (env_->no_space_.Acquire_Load() != NULL) {
  90. // Drop writes on the floor
  91. return Status::OK();
  92. } else {
  93. return base_->Append(data);
  94. }
  95. }
  96. Status Close() { return base_->Close(); }
  97. Status Flush() { return base_->Flush(); }
  98. Status Sync() {
  99. if (env_->data_sync_error_.Acquire_Load() != NULL) {
  100. return Status::IOError("simulated data sync error");
  101. }
  102. while (env_->delay_data_sync_.Acquire_Load() != NULL) {
  103. DelayMilliseconds(100);
  104. }
  105. return base_->Sync();
  106. }
  107. };
  108. class ManifestFile : public WritableFile {
  109. private:
  110. SpecialEnv* env_;
  111. WritableFile* base_;
  112. public:
  113. ManifestFile(SpecialEnv* env, WritableFile* b) : env_(env), base_(b) { }
  114. ~ManifestFile() { delete base_; }
  115. Status Append(const Slice& data) {
  116. if (env_->manifest_write_error_.Acquire_Load() != NULL) {
  117. return Status::IOError("simulated writer error");
  118. } else {
  119. return base_->Append(data);
  120. }
  121. }
  122. Status Close() { return base_->Close(); }
  123. Status Flush() { return base_->Flush(); }
  124. Status Sync() {
  125. if (env_->manifest_sync_error_.Acquire_Load() != NULL) {
  126. return Status::IOError("simulated sync error");
  127. } else {
  128. return base_->Sync();
  129. }
  130. }
  131. };
  132. if (non_writable_.Acquire_Load() != NULL) {
  133. return Status::IOError("simulated write error");
  134. }
  135. Status s = target()->NewWritableFile(f, r);
  136. if (s.ok()) {
  137. if (strstr(f.c_str(), ".ldb") != NULL ||
  138. strstr(f.c_str(), ".log") != NULL) {
  139. *r = new DataFile(this, *r);
  140. } else if (strstr(f.c_str(), "MANIFEST") != NULL) {
  141. *r = new ManifestFile(this, *r);
  142. }
  143. }
  144. return s;
  145. }
  146. Status NewRandomAccessFile(const std::string& f, RandomAccessFile** r) {
  147. class CountingFile : public RandomAccessFile {
  148. private:
  149. RandomAccessFile* target_;
  150. AtomicCounter* counter_;
  151. public:
  152. CountingFile(RandomAccessFile* target, AtomicCounter* counter)
  153. : target_(target), counter_(counter) {
  154. }
  155. virtual ~CountingFile() { delete target_; }
  156. virtual Status Read(uint64_t offset, size_t n, Slice* result,
  157. char* scratch) const {
  158. counter_->Increment();
  159. return target_->Read(offset, n, result, scratch);
  160. }
  161. };
  162. Status s = target()->NewRandomAccessFile(f, r);
  163. if (s.ok() && count_random_reads_) {
  164. *r = new CountingFile(*r, &random_read_counter_);
  165. }
  166. return s;
  167. }
  168. };
  169. class DBTest {
  170. private:
  171. const FilterPolicy* filter_policy_;
  172. // Sequence of option configurations to try
  173. enum OptionConfig {
  174. kDefault,
  175. kReuse,
  176. kFilter,
  177. kUncompressed,
  178. kEnd
  179. };
  180. int option_config_;
  181. public:
  182. std::string dbname_;
  183. SpecialEnv* env_;
  184. DB* db_;
  185. Options last_options_;
  186. DBTest() : option_config_(kDefault),
  187. env_(new SpecialEnv(Env::Default())) {
  188. filter_policy_ = NewBloomFilterPolicy(10);
  189. dbname_ = test::TmpDir() + "/db_test";
  190. DestroyDB(dbname_, Options());
  191. db_ = NULL;
  192. Reopen();
  193. }
  194. ~DBTest() {
  195. delete db_;
  196. DestroyDB(dbname_, Options());
  197. delete env_;
  198. delete filter_policy_;
  199. }
  200. // Switch to a fresh database with the next option configuration to
  201. // test. Return false if there are no more configurations to test.
  202. bool ChangeOptions() {
  203. option_config_++;
  204. if (option_config_ >= kEnd) {
  205. return false;
  206. } else {
  207. DestroyAndReopen();
  208. return true;
  209. }
  210. }
  211. // Return the current option configuration.
  212. Options CurrentOptions() {
  213. Options options;
  214. options.reuse_logs = false;
  215. switch (option_config_) {
  216. case kReuse:
  217. options.reuse_logs = true;
  218. break;
  219. case kFilter:
  220. options.filter_policy = filter_policy_;
  221. break;
  222. case kUncompressed:
  223. options.compression = kNoCompression;
  224. break;
  225. default:
  226. break;
  227. }
  228. return options;
  229. }
  230. DBImpl* dbfull() {
  231. return reinterpret_cast<DBImpl*>(db_);
  232. }
  233. void Reopen(Options* options = NULL) {
  234. ASSERT_OK(TryReopen(options));
  235. }
  236. void Close() {
  237. delete db_;
  238. db_ = NULL;
  239. }
  240. void DestroyAndReopen(Options* options = NULL) {
  241. delete db_;
  242. db_ = NULL;
  243. DestroyDB(dbname_, Options());
  244. ASSERT_OK(TryReopen(options));
  245. }
  246. Status TryReopen(Options* options) {
  247. delete db_;
  248. db_ = NULL;
  249. Options opts;
  250. if (options != NULL) {
  251. opts = *options;
  252. } else {
  253. opts = CurrentOptions();
  254. opts.create_if_missing = true;
  255. }
  256. last_options_ = opts;
  257. return DB::Open(opts, dbname_, &db_);
  258. }
  259. Status Put(const std::string& k, const std::string& v) {
  260. return db_->Put(WriteOptions(), k, v);
  261. }
  262. Status Delete(const std::string& k) {
  263. return db_->Delete(WriteOptions(), k);
  264. }
  265. std::string Get(const std::string& k, const Snapshot* snapshot = NULL) {
  266. ReadOptions options;
  267. options.snapshot = snapshot;
  268. std::string result;
  269. Status s = db_->Get(options, k, &result);
  270. if (s.IsNotFound()) {
  271. result = "NOT_FOUND";
  272. } else if (!s.ok()) {
  273. result = s.ToString();
  274. }
  275. return result;
  276. }
  277. // Return a string that contains all key,value pairs in order,
  278. // formatted like "(k1->v1)(k2->v2)".
  279. std::string Contents() {
  280. std::vector<std::string> forward;
  281. std::string result;
  282. Iterator* iter = db_->NewIterator(ReadOptions());
  283. for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
  284. std::string s = IterStatus(iter);
  285. result.push_back('(');
  286. result.append(s);
  287. result.push_back(')');
  288. forward.push_back(s);
  289. }
  290. // Check reverse iteration results are the reverse of forward results
  291. size_t matched = 0;
  292. for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
  293. ASSERT_LT(matched, forward.size());
  294. ASSERT_EQ(IterStatus(iter), forward[forward.size() - matched - 1]);
  295. matched++;
  296. }
  297. ASSERT_EQ(matched, forward.size());
  298. delete iter;
  299. return result;
  300. }
  301. std::string AllEntriesFor(const Slice& user_key) {
  302. Iterator* iter = dbfull()->TEST_NewInternalIterator();
  303. InternalKey target(user_key, kMaxSequenceNumber, kTypeValue);
  304. iter->Seek(target.Encode());
  305. std::string result;
  306. if (!iter->status().ok()) {
  307. result = iter->status().ToString();
  308. } else {
  309. result = "[ ";
  310. bool first = true;
  311. while (iter->Valid()) {
  312. ParsedInternalKey ikey;
  313. if (!ParseInternalKey(iter->key(), &ikey)) {
  314. result += "CORRUPTED";
  315. } else {
  316. if (last_options_.comparator->Compare(ikey.user_key, user_key) != 0) {
  317. break;
  318. }
  319. if (!first) {
  320. result += ", ";
  321. }
  322. first = false;
  323. switch (ikey.type) {
  324. case kTypeValue:
  325. result += iter->value().ToString();
  326. break;
  327. case kTypeDeletion:
  328. result += "DEL";
  329. break;
  330. }
  331. }
  332. iter->Next();
  333. }
  334. if (!first) {
  335. result += " ";
  336. }
  337. result += "]";
  338. }
  339. delete iter;
  340. return result;
  341. }
  342. int NumTableFilesAtLevel(int level) {
  343. std::string property;
  344. ASSERT_TRUE(
  345. db_->GetProperty("leveldb.num-files-at-level" + NumberToString(level),
  346. &property));
  347. return atoi(property.c_str());
  348. }
  349. int TotalTableFiles() {
  350. int result = 0;
  351. for (int level = 0; level < config::kNumLevels; level++) {
  352. result += NumTableFilesAtLevel(level);
  353. }
  354. return result;
  355. }
  356. // Return spread of files per level
  357. std::string FilesPerLevel() {
  358. std::string result;
  359. int last_non_zero_offset = 0;
  360. for (int level = 0; level < config::kNumLevels; level++) {
  361. int f = NumTableFilesAtLevel(level);
  362. char buf[100];
  363. snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f);
  364. result += buf;
  365. if (f > 0) {
  366. last_non_zero_offset = result.size();
  367. }
  368. }
  369. result.resize(last_non_zero_offset);
  370. return result;
  371. }
  372. int CountFiles() {
  373. std::vector<std::string> files;
  374. env_->GetChildren(dbname_, &files);
  375. return static_cast<int>(files.size());
  376. }
  377. uint64_t Size(const Slice& start, const Slice& limit) {
  378. Range r(start, limit);
  379. uint64_t size;
  380. db_->GetApproximateSizes(&r, 1, &size);
  381. return size;
  382. }
  383. void Compact(const Slice& start, const Slice& limit) {
  384. db_->CompactRange(&start, &limit);
  385. }
  386. // Do n memtable compactions, each of which produces an sstable
  387. // covering the range [small,large].
  388. void MakeTables(int n, const std::string& small, const std::string& large) {
  389. for (int i = 0; i < n; i++) {
  390. Put(small, "begin");
  391. Put(large, "end");
  392. dbfull()->TEST_CompactMemTable();
  393. }
  394. }
  395. // Prevent pushing of new sstables into deeper levels by adding
  396. // tables that cover a specified range to all levels.
  397. void FillLevels(const std::string& smallest, const std::string& largest) {
  398. MakeTables(config::kNumLevels, smallest, largest);
  399. }
  400. void DumpFileCounts(const char* label) {
  401. fprintf(stderr, "---\n%s:\n", label);
  402. fprintf(stderr, "maxoverlap: %lld\n",
  403. static_cast<long long>(
  404. dbfull()->TEST_MaxNextLevelOverlappingBytes()));
  405. for (int level = 0; level < config::kNumLevels; level++) {
  406. int num = NumTableFilesAtLevel(level);
  407. if (num > 0) {
  408. fprintf(stderr, " level %3d : %d files\n", level, num);
  409. }
  410. }
  411. }
  412. std::string DumpSSTableList() {
  413. std::string property;
  414. db_->GetProperty("leveldb.sstables", &property);
  415. return property;
  416. }
  417. std::string IterStatus(Iterator* iter) {
  418. std::string result;
  419. if (iter->Valid()) {
  420. result = iter->key().ToString() + "->" + iter->value().ToString();
  421. } else {
  422. result = "(invalid)";
  423. }
  424. return result;
  425. }
  426. bool DeleteAnSSTFile() {
  427. std::vector<std::string> filenames;
  428. ASSERT_OK(env_->GetChildren(dbname_, &filenames));
  429. uint64_t number;
  430. FileType type;
  431. for (size_t i = 0; i < filenames.size(); i++) {
  432. if (ParseFileName(filenames[i], &number, &type) && type == kTableFile) {
  433. ASSERT_OK(env_->DeleteFile(TableFileName(dbname_, number)));
  434. return true;
  435. }
  436. }
  437. return false;
  438. }
  439. // Returns number of files renamed.
  440. int RenameLDBToSST() {
  441. std::vector<std::string> filenames;
  442. ASSERT_OK(env_->GetChildren(dbname_, &filenames));
  443. uint64_t number;
  444. FileType type;
  445. int files_renamed = 0;
  446. for (size_t i = 0; i < filenames.size(); i++) {
  447. if (ParseFileName(filenames[i], &number, &type) && type == kTableFile) {
  448. const std::string from = TableFileName(dbname_, number);
  449. const std::string to = SSTTableFileName(dbname_, number);
  450. ASSERT_OK(env_->RenameFile(from, to));
  451. files_renamed++;
  452. }
  453. }
  454. return files_renamed;
  455. }
  456. };
  457. TEST(DBTest, Empty) {
  458. do {
  459. ASSERT_TRUE(db_ != NULL);
  460. ASSERT_EQ("NOT_FOUND", Get("foo"));
  461. } while (ChangeOptions());
  462. }
  463. TEST(DBTest, ReadWrite) {
  464. do {
  465. ASSERT_OK(Put("foo", "v1"));
  466. ASSERT_EQ("v1", Get("foo"));
  467. ASSERT_OK(Put("bar", "v2"));
  468. ASSERT_OK(Put("foo", "v3"));
  469. ASSERT_EQ("v3", Get("foo"));
  470. ASSERT_EQ("v2", Get("bar"));
  471. } while (ChangeOptions());
  472. }
  473. TEST(DBTest, PutDeleteGet) {
  474. do {
  475. ASSERT_OK(db_->Put(WriteOptions(), "foo", "v1"));
  476. ASSERT_EQ("v1", Get("foo"));
  477. ASSERT_OK(db_->Put(WriteOptions(), "foo", "v2"));
  478. ASSERT_EQ("v2", Get("foo"));
  479. ASSERT_OK(db_->Delete(WriteOptions(), "foo"));
  480. ASSERT_EQ("NOT_FOUND", Get("foo"));
  481. } while (ChangeOptions());
  482. }
  483. TEST(DBTest, GetFromImmutableLayer) {
  484. do {
  485. Options options = CurrentOptions();
  486. options.env = env_;
  487. options.write_buffer_size = 100000; // Small write buffer
  488. Reopen(&options);
  489. ASSERT_OK(Put("foo", "v1"));
  490. ASSERT_EQ("v1", Get("foo"));
  491. env_->delay_data_sync_.Release_Store(env_); // Block sync calls
  492. Put("k1", std::string(100000, 'x')); // Fill memtable
  493. Put("k2", std::string(100000, 'y')); // Trigger compaction
  494. ASSERT_EQ("v1", Get("foo"));
  495. env_->delay_data_sync_.Release_Store(NULL); // Release sync calls
  496. } while (ChangeOptions());
  497. }
  498. TEST(DBTest, GetFromVersions) {
  499. do {
  500. ASSERT_OK(Put("foo", "v1"));
  501. dbfull()->TEST_CompactMemTable();
  502. ASSERT_EQ("v1", Get("foo"));
  503. } while (ChangeOptions());
  504. }
  505. TEST(DBTest, GetMemUsage) {
  506. do {
  507. ASSERT_OK(Put("foo", "v1"));
  508. std::string val;
  509. ASSERT_TRUE(db_->GetProperty("leveldb.approximate-memory-usage", &val));
  510. int mem_usage = atoi(val.c_str());
  511. ASSERT_GT(mem_usage, 0);
  512. ASSERT_LT(mem_usage, 5*1024*1024);
  513. } while (ChangeOptions());
  514. }
  515. TEST(DBTest, GetSnapshot) {
  516. do {
  517. // Try with both a short key and a long key
  518. for (int i = 0; i < 2; i++) {
  519. std::string key = (i == 0) ? std::string("foo") : std::string(200, 'x');
  520. ASSERT_OK(Put(key, "v1"));
  521. const Snapshot* s1 = db_->GetSnapshot();
  522. ASSERT_OK(Put(key, "v2"));
  523. ASSERT_EQ("v2", Get(key));
  524. ASSERT_EQ("v1", Get(key, s1));
  525. dbfull()->TEST_CompactMemTable();
  526. ASSERT_EQ("v2", Get(key));
  527. ASSERT_EQ("v1", Get(key, s1));
  528. db_->ReleaseSnapshot(s1);
  529. }
  530. } while (ChangeOptions());
  531. }
  532. TEST(DBTest, GetLevel0Ordering) {
  533. do {
  534. // Check that we process level-0 files in correct order. The code
  535. // below generates two level-0 files where the earlier one comes
  536. // before the later one in the level-0 file list since the earlier
  537. // one has a smaller "smallest" key.
  538. ASSERT_OK(Put("bar", "b"));
  539. ASSERT_OK(Put("foo", "v1"));
  540. dbfull()->TEST_CompactMemTable();
  541. ASSERT_OK(Put("foo", "v2"));
  542. dbfull()->TEST_CompactMemTable();
  543. ASSERT_EQ("v2", Get("foo"));
  544. } while (ChangeOptions());
  545. }
  546. TEST(DBTest, GetOrderedByLevels) {
  547. do {
  548. ASSERT_OK(Put("foo", "v1"));
  549. Compact("a", "z");
  550. ASSERT_EQ("v1", Get("foo"));
  551. ASSERT_OK(Put("foo", "v2"));
  552. ASSERT_EQ("v2", Get("foo"));
  553. dbfull()->TEST_CompactMemTable();
  554. ASSERT_EQ("v2", Get("foo"));
  555. } while (ChangeOptions());
  556. }
  557. TEST(DBTest, GetPicksCorrectFile) {
  558. do {
  559. // Arrange to have multiple files in a non-level-0 level.
  560. ASSERT_OK(Put("a", "va"));
  561. Compact("a", "b");
  562. ASSERT_OK(Put("x", "vx"));
  563. Compact("x", "y");
  564. ASSERT_OK(Put("f", "vf"));
  565. Compact("f", "g");
  566. ASSERT_EQ("va", Get("a"));
  567. ASSERT_EQ("vf", Get("f"));
  568. ASSERT_EQ("vx", Get("x"));
  569. } while (ChangeOptions());
  570. }
  571. TEST(DBTest, GetEncountersEmptyLevel) {
  572. do {
  573. // Arrange for the following to happen:
  574. // * sstable A in level 0
  575. // * nothing in level 1
  576. // * sstable B in level 2
  577. // Then do enough Get() calls to arrange for an automatic compaction
  578. // of sstable A. A bug would cause the compaction to be marked as
  579. // occurring at level 1 (instead of the correct level 0).
  580. // Step 1: First place sstables in levels 0 and 2
  581. int compaction_count = 0;
  582. while (NumTableFilesAtLevel(0) == 0 ||
  583. NumTableFilesAtLevel(2) == 0) {
  584. ASSERT_LE(compaction_count, 100) << "could not fill levels 0 and 2";
  585. compaction_count++;
  586. Put("a", "begin");
  587. Put("z", "end");
  588. dbfull()->TEST_CompactMemTable();
  589. }
  590. // Step 2: clear level 1 if necessary.
  591. dbfull()->TEST_CompactRange(1, NULL, NULL);
  592. ASSERT_EQ(NumTableFilesAtLevel(0), 1);
  593. ASSERT_EQ(NumTableFilesAtLevel(1), 0);
  594. ASSERT_EQ(NumTableFilesAtLevel(2), 1);
  595. // Step 3: read a bunch of times
  596. for (int i = 0; i < 1000; i++) {
  597. ASSERT_EQ("NOT_FOUND", Get("missing"));
  598. }
  599. // Step 4: Wait for compaction to finish
  600. DelayMilliseconds(1000);
  601. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  602. } while (ChangeOptions());
  603. }
  604. TEST(DBTest, IterEmpty) {
  605. Iterator* iter = db_->NewIterator(ReadOptions());
  606. iter->SeekToFirst();
  607. ASSERT_EQ(IterStatus(iter), "(invalid)");
  608. iter->SeekToLast();
  609. ASSERT_EQ(IterStatus(iter), "(invalid)");
  610. iter->Seek("foo");
  611. ASSERT_EQ(IterStatus(iter), "(invalid)");
  612. delete iter;
  613. }
  614. TEST(DBTest, IterSingle) {
  615. ASSERT_OK(Put("a", "va"));
  616. Iterator* iter = db_->NewIterator(ReadOptions());
  617. iter->SeekToFirst();
  618. ASSERT_EQ(IterStatus(iter), "a->va");
  619. iter->Next();
  620. ASSERT_EQ(IterStatus(iter), "(invalid)");
  621. iter->SeekToFirst();
  622. ASSERT_EQ(IterStatus(iter), "a->va");
  623. iter->Prev();
  624. ASSERT_EQ(IterStatus(iter), "(invalid)");
  625. iter->SeekToLast();
  626. ASSERT_EQ(IterStatus(iter), "a->va");
  627. iter->Next();
  628. ASSERT_EQ(IterStatus(iter), "(invalid)");
  629. iter->SeekToLast();
  630. ASSERT_EQ(IterStatus(iter), "a->va");
  631. iter->Prev();
  632. ASSERT_EQ(IterStatus(iter), "(invalid)");
  633. iter->Seek("");
  634. ASSERT_EQ(IterStatus(iter), "a->va");
  635. iter->Next();
  636. ASSERT_EQ(IterStatus(iter), "(invalid)");
  637. iter->Seek("a");
  638. ASSERT_EQ(IterStatus(iter), "a->va");
  639. iter->Next();
  640. ASSERT_EQ(IterStatus(iter), "(invalid)");
  641. iter->Seek("b");
  642. ASSERT_EQ(IterStatus(iter), "(invalid)");
  643. delete iter;
  644. }
  645. TEST(DBTest, IterMulti) {
  646. ASSERT_OK(Put("a", "va"));
  647. ASSERT_OK(Put("b", "vb"));
  648. ASSERT_OK(Put("c", "vc"));
  649. Iterator* iter = db_->NewIterator(ReadOptions());
  650. iter->SeekToFirst();
  651. ASSERT_EQ(IterStatus(iter), "a->va");
  652. iter->Next();
  653. ASSERT_EQ(IterStatus(iter), "b->vb");
  654. iter->Next();
  655. ASSERT_EQ(IterStatus(iter), "c->vc");
  656. iter->Next();
  657. ASSERT_EQ(IterStatus(iter), "(invalid)");
  658. iter->SeekToFirst();
  659. ASSERT_EQ(IterStatus(iter), "a->va");
  660. iter->Prev();
  661. ASSERT_EQ(IterStatus(iter), "(invalid)");
  662. iter->SeekToLast();
  663. ASSERT_EQ(IterStatus(iter), "c->vc");
  664. iter->Prev();
  665. ASSERT_EQ(IterStatus(iter), "b->vb");
  666. iter->Prev();
  667. ASSERT_EQ(IterStatus(iter), "a->va");
  668. iter->Prev();
  669. ASSERT_EQ(IterStatus(iter), "(invalid)");
  670. iter->SeekToLast();
  671. ASSERT_EQ(IterStatus(iter), "c->vc");
  672. iter->Next();
  673. ASSERT_EQ(IterStatus(iter), "(invalid)");
  674. iter->Seek("");
  675. ASSERT_EQ(IterStatus(iter), "a->va");
  676. iter->Seek("a");
  677. ASSERT_EQ(IterStatus(iter), "a->va");
  678. iter->Seek("ax");
  679. ASSERT_EQ(IterStatus(iter), "b->vb");
  680. iter->Seek("b");
  681. ASSERT_EQ(IterStatus(iter), "b->vb");
  682. iter->Seek("z");
  683. ASSERT_EQ(IterStatus(iter), "(invalid)");
  684. // Switch from reverse to forward
  685. iter->SeekToLast();
  686. iter->Prev();
  687. iter->Prev();
  688. iter->Next();
  689. ASSERT_EQ(IterStatus(iter), "b->vb");
  690. // Switch from forward to reverse
  691. iter->SeekToFirst();
  692. iter->Next();
  693. iter->Next();
  694. iter->Prev();
  695. ASSERT_EQ(IterStatus(iter), "b->vb");
  696. // Make sure iter stays at snapshot
  697. ASSERT_OK(Put("a", "va2"));
  698. ASSERT_OK(Put("a2", "va3"));
  699. ASSERT_OK(Put("b", "vb2"));
  700. ASSERT_OK(Put("c", "vc2"));
  701. ASSERT_OK(Delete("b"));
  702. iter->SeekToFirst();
  703. ASSERT_EQ(IterStatus(iter), "a->va");
  704. iter->Next();
  705. ASSERT_EQ(IterStatus(iter), "b->vb");
  706. iter->Next();
  707. ASSERT_EQ(IterStatus(iter), "c->vc");
  708. iter->Next();
  709. ASSERT_EQ(IterStatus(iter), "(invalid)");
  710. iter->SeekToLast();
  711. ASSERT_EQ(IterStatus(iter), "c->vc");
  712. iter->Prev();
  713. ASSERT_EQ(IterStatus(iter), "b->vb");
  714. iter->Prev();
  715. ASSERT_EQ(IterStatus(iter), "a->va");
  716. iter->Prev();
  717. ASSERT_EQ(IterStatus(iter), "(invalid)");
  718. delete iter;
  719. }
  720. TEST(DBTest, IterSmallAndLargeMix) {
  721. ASSERT_OK(Put("a", "va"));
  722. ASSERT_OK(Put("b", std::string(100000, 'b')));
  723. ASSERT_OK(Put("c", "vc"));
  724. ASSERT_OK(Put("d", std::string(100000, 'd')));
  725. ASSERT_OK(Put("e", std::string(100000, 'e')));
  726. Iterator* iter = db_->NewIterator(ReadOptions());
  727. iter->SeekToFirst();
  728. ASSERT_EQ(IterStatus(iter), "a->va");
  729. iter->Next();
  730. ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
  731. iter->Next();
  732. ASSERT_EQ(IterStatus(iter), "c->vc");
  733. iter->Next();
  734. ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
  735. iter->Next();
  736. ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
  737. iter->Next();
  738. ASSERT_EQ(IterStatus(iter), "(invalid)");
  739. iter->SeekToLast();
  740. ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
  741. iter->Prev();
  742. ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
  743. iter->Prev();
  744. ASSERT_EQ(IterStatus(iter), "c->vc");
  745. iter->Prev();
  746. ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
  747. iter->Prev();
  748. ASSERT_EQ(IterStatus(iter), "a->va");
  749. iter->Prev();
  750. ASSERT_EQ(IterStatus(iter), "(invalid)");
  751. delete iter;
  752. }
  753. TEST(DBTest, IterMultiWithDelete) {
  754. do {
  755. ASSERT_OK(Put("a", "va"));
  756. ASSERT_OK(Put("b", "vb"));
  757. ASSERT_OK(Put("c", "vc"));
  758. ASSERT_OK(Delete("b"));
  759. ASSERT_EQ("NOT_FOUND", Get("b"));
  760. Iterator* iter = db_->NewIterator(ReadOptions());
  761. iter->Seek("c");
  762. ASSERT_EQ(IterStatus(iter), "c->vc");
  763. iter->Prev();
  764. ASSERT_EQ(IterStatus(iter), "a->va");
  765. delete iter;
  766. } while (ChangeOptions());
  767. }
  768. TEST(DBTest, Recover) {
  769. do {
  770. ASSERT_OK(Put("foo", "v1"));
  771. ASSERT_OK(Put("baz", "v5"));
  772. Reopen();
  773. ASSERT_EQ("v1", Get("foo"));
  774. ASSERT_EQ("v1", Get("foo"));
  775. ASSERT_EQ("v5", Get("baz"));
  776. ASSERT_OK(Put("bar", "v2"));
  777. ASSERT_OK(Put("foo", "v3"));
  778. Reopen();
  779. ASSERT_EQ("v3", Get("foo"));
  780. ASSERT_OK(Put("foo", "v4"));
  781. ASSERT_EQ("v4", Get("foo"));
  782. ASSERT_EQ("v2", Get("bar"));
  783. ASSERT_EQ("v5", Get("baz"));
  784. } while (ChangeOptions());
  785. }
  786. TEST(DBTest, RecoveryWithEmptyLog) {
  787. do {
  788. ASSERT_OK(Put("foo", "v1"));
  789. ASSERT_OK(Put("foo", "v2"));
  790. Reopen();
  791. Reopen();
  792. ASSERT_OK(Put("foo", "v3"));
  793. Reopen();
  794. ASSERT_EQ("v3", Get("foo"));
  795. } while (ChangeOptions());
  796. }
  797. // Check that writes done during a memtable compaction are recovered
  798. // if the database is shutdown during the memtable compaction.
  799. TEST(DBTest, RecoverDuringMemtableCompaction) {
  800. do {
  801. Options options = CurrentOptions();
  802. options.env = env_;
  803. options.write_buffer_size = 1000000;
  804. Reopen(&options);
  805. // Trigger a long memtable compaction and reopen the database during it
  806. ASSERT_OK(Put("foo", "v1")); // Goes to 1st log file
  807. ASSERT_OK(Put("big1", std::string(10000000, 'x'))); // Fills memtable
  808. ASSERT_OK(Put("big2", std::string(1000, 'y'))); // Triggers compaction
  809. ASSERT_OK(Put("bar", "v2")); // Goes to new log file
  810. Reopen(&options);
  811. ASSERT_EQ("v1", Get("foo"));
  812. ASSERT_EQ("v2", Get("bar"));
  813. ASSERT_EQ(std::string(10000000, 'x'), Get("big1"));
  814. ASSERT_EQ(std::string(1000, 'y'), Get("big2"));
  815. } while (ChangeOptions());
  816. }
  817. static std::string Key(int i) {
  818. char buf[100];
  819. snprintf(buf, sizeof(buf), "key%06d", i);
  820. return std::string(buf);
  821. }
  822. TEST(DBTest, MinorCompactionsHappen) {
  823. Options options = CurrentOptions();
  824. options.write_buffer_size = 10000;
  825. Reopen(&options);
  826. const int N = 500;
  827. int starting_num_tables = TotalTableFiles();
  828. for (int i = 0; i < N; i++) {
  829. ASSERT_OK(Put(Key(i), Key(i) + std::string(1000, 'v')));
  830. }
  831. int ending_num_tables = TotalTableFiles();
  832. ASSERT_GT(ending_num_tables, starting_num_tables);
  833. for (int i = 0; i < N; i++) {
  834. ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(Key(i)));
  835. }
  836. Reopen();
  837. for (int i = 0; i < N; i++) {
  838. ASSERT_EQ(Key(i) + std::string(1000, 'v'), Get(Key(i)));
  839. }
  840. }
  841. TEST(DBTest, RecoverWithLargeLog) {
  842. {
  843. Options options = CurrentOptions();
  844. Reopen(&options);
  845. ASSERT_OK(Put("big1", std::string(200000, '1')));
  846. ASSERT_OK(Put("big2", std::string(200000, '2')));
  847. ASSERT_OK(Put("small3", std::string(10, '3')));
  848. ASSERT_OK(Put("small4", std::string(10, '4')));
  849. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  850. }
  851. // Make sure that if we re-open with a small write buffer size that
  852. // we flush table files in the middle of a large log file.
  853. Options options = CurrentOptions();
  854. options.write_buffer_size = 100000;
  855. Reopen(&options);
  856. ASSERT_EQ(NumTableFilesAtLevel(0), 3);
  857. ASSERT_EQ(std::string(200000, '1'), Get("big1"));
  858. ASSERT_EQ(std::string(200000, '2'), Get("big2"));
  859. ASSERT_EQ(std::string(10, '3'), Get("small3"));
  860. ASSERT_EQ(std::string(10, '4'), Get("small4"));
  861. ASSERT_GT(NumTableFilesAtLevel(0), 1);
  862. }
  863. TEST(DBTest, CompactionsGenerateMultipleFiles) {
  864. Options options = CurrentOptions();
  865. options.write_buffer_size = 100000000; // Large write buffer
  866. Reopen(&options);
  867. Random rnd(301);
  868. // Write 8MB (80 values, each 100K)
  869. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  870. std::vector<std::string> values;
  871. for (int i = 0; i < 80; i++) {
  872. values.push_back(RandomString(&rnd, 100000));
  873. ASSERT_OK(Put(Key(i), values[i]));
  874. }
  875. // Reopening moves updates to level-0
  876. Reopen(&options);
  877. dbfull()->TEST_CompactRange(0, NULL, NULL);
  878. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  879. ASSERT_GT(NumTableFilesAtLevel(1), 1);
  880. for (int i = 0; i < 80; i++) {
  881. ASSERT_EQ(Get(Key(i)), values[i]);
  882. }
  883. }
  884. TEST(DBTest, RepeatedWritesToSameKey) {
  885. Options options = CurrentOptions();
  886. options.env = env_;
  887. options.write_buffer_size = 100000; // Small write buffer
  888. Reopen(&options);
  889. // We must have at most one file per level except for level-0,
  890. // which may have up to kL0_StopWritesTrigger files.
  891. const int kMaxFiles = config::kNumLevels + config::kL0_StopWritesTrigger;
  892. Random rnd(301);
  893. std::string value = RandomString(&rnd, 2 * options.write_buffer_size);
  894. for (int i = 0; i < 5 * kMaxFiles; i++) {
  895. Put("key", value);
  896. ASSERT_LE(TotalTableFiles(), kMaxFiles);
  897. fprintf(stderr, "after %d: %d files\n", int(i+1), TotalTableFiles());
  898. }
  899. }
  900. TEST(DBTest, SparseMerge) {
  901. Options options = CurrentOptions();
  902. options.compression = kNoCompression;
  903. Reopen(&options);
  904. FillLevels("A", "Z");
  905. // Suppose there is:
  906. // small amount of data with prefix A
  907. // large amount of data with prefix B
  908. // small amount of data with prefix C
  909. // and that recent updates have made small changes to all three prefixes.
  910. // Check that we do not do a compaction that merges all of B in one shot.
  911. const std::string value(1000, 'x');
  912. Put("A", "va");
  913. // Write approximately 100MB of "B" values
  914. for (int i = 0; i < 100000; i++) {
  915. char key[100];
  916. snprintf(key, sizeof(key), "B%010d", i);
  917. Put(key, value);
  918. }
  919. Put("C", "vc");
  920. dbfull()->TEST_CompactMemTable();
  921. dbfull()->TEST_CompactRange(0, NULL, NULL);
  922. // Make sparse update
  923. Put("A", "va2");
  924. Put("B100", "bvalue2");
  925. Put("C", "vc2");
  926. dbfull()->TEST_CompactMemTable();
  927. // Compactions should not cause us to create a situation where
  928. // a file overlaps too much data at the next level.
  929. ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576);
  930. dbfull()->TEST_CompactRange(0, NULL, NULL);
  931. ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576);
  932. dbfull()->TEST_CompactRange(1, NULL, NULL);
  933. ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576);
  934. }
  935. static bool Between(uint64_t val, uint64_t low, uint64_t high) {
  936. bool result = (val >= low) && (val <= high);
  937. if (!result) {
  938. fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n",
  939. (unsigned long long)(val),
  940. (unsigned long long)(low),
  941. (unsigned long long)(high));
  942. }
  943. return result;
  944. }
  945. TEST(DBTest, ApproximateSizes) {
  946. do {
  947. Options options = CurrentOptions();
  948. options.write_buffer_size = 100000000; // Large write buffer
  949. options.compression = kNoCompression;
  950. DestroyAndReopen();
  951. ASSERT_TRUE(Between(Size("", "xyz"), 0, 0));
  952. Reopen(&options);
  953. ASSERT_TRUE(Between(Size("", "xyz"), 0, 0));
  954. // Write 8MB (80 values, each 100K)
  955. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  956. const int N = 80;
  957. static const int S1 = 100000;
  958. static const int S2 = 105000; // Allow some expansion from metadata
  959. Random rnd(301);
  960. for (int i = 0; i < N; i++) {
  961. ASSERT_OK(Put(Key(i), RandomString(&rnd, S1)));
  962. }
  963. // 0 because GetApproximateSizes() does not account for memtable space
  964. ASSERT_TRUE(Between(Size("", Key(50)), 0, 0));
  965. if (options.reuse_logs) {
  966. // Recovery will reuse memtable, and GetApproximateSizes() does not
  967. // account for memtable usage;
  968. Reopen(&options);
  969. ASSERT_TRUE(Between(Size("", Key(50)), 0, 0));
  970. continue;
  971. }
  972. // Check sizes across recovery by reopening a few times
  973. for (int run = 0; run < 3; run++) {
  974. Reopen(&options);
  975. for (int compact_start = 0; compact_start < N; compact_start += 10) {
  976. for (int i = 0; i < N; i += 10) {
  977. ASSERT_TRUE(Between(Size("", Key(i)), S1*i, S2*i));
  978. ASSERT_TRUE(Between(Size("", Key(i)+".suffix"), S1*(i+1), S2*(i+1)));
  979. ASSERT_TRUE(Between(Size(Key(i), Key(i+10)), S1*10, S2*10));
  980. }
  981. ASSERT_TRUE(Between(Size("", Key(50)), S1*50, S2*50));
  982. ASSERT_TRUE(Between(Size("", Key(50)+".suffix"), S1*50, S2*50));
  983. std::string cstart_str = Key(compact_start);
  984. std::string cend_str = Key(compact_start + 9);
  985. Slice cstart = cstart_str;
  986. Slice cend = cend_str;
  987. dbfull()->TEST_CompactRange(0, &cstart, &cend);
  988. }
  989. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  990. ASSERT_GT(NumTableFilesAtLevel(1), 0);
  991. }
  992. } while (ChangeOptions());
  993. }
  994. TEST(DBTest, ApproximateSizes_MixOfSmallAndLarge) {
  995. do {
  996. Options options = CurrentOptions();
  997. options.compression = kNoCompression;
  998. Reopen();
  999. Random rnd(301);
  1000. std::string big1 = RandomString(&rnd, 100000);
  1001. ASSERT_OK(Put(Key(0), RandomString(&rnd, 10000)));
  1002. ASSERT_OK(Put(Key(1), RandomString(&rnd, 10000)));
  1003. ASSERT_OK(Put(Key(2), big1));
  1004. ASSERT_OK(Put(Key(3), RandomString(&rnd, 10000)));
  1005. ASSERT_OK(Put(Key(4), big1));
  1006. ASSERT_OK(Put(Key(5), RandomString(&rnd, 10000)));
  1007. ASSERT_OK(Put(Key(6), RandomString(&rnd, 300000)));
  1008. ASSERT_OK(Put(Key(7), RandomString(&rnd, 10000)));
  1009. if (options.reuse_logs) {
  1010. // Need to force a memtable compaction since recovery does not do so.
  1011. ASSERT_OK(dbfull()->TEST_CompactMemTable());
  1012. }
  1013. // Check sizes across recovery by reopening a few times
  1014. for (int run = 0; run < 3; run++) {
  1015. Reopen(&options);
  1016. ASSERT_TRUE(Between(Size("", Key(0)), 0, 0));
  1017. ASSERT_TRUE(Between(Size("", Key(1)), 10000, 11000));
  1018. ASSERT_TRUE(Between(Size("", Key(2)), 20000, 21000));
  1019. ASSERT_TRUE(Between(Size("", Key(3)), 120000, 121000));
  1020. ASSERT_TRUE(Between(Size("", Key(4)), 130000, 131000));
  1021. ASSERT_TRUE(Between(Size("", Key(5)), 230000, 231000));
  1022. ASSERT_TRUE(Between(Size("", Key(6)), 240000, 241000));
  1023. ASSERT_TRUE(Between(Size("", Key(7)), 540000, 541000));
  1024. ASSERT_TRUE(Between(Size("", Key(8)), 550000, 560000));
  1025. ASSERT_TRUE(Between(Size(Key(3), Key(5)), 110000, 111000));
  1026. dbfull()->TEST_CompactRange(0, NULL, NULL);
  1027. }
  1028. } while (ChangeOptions());
  1029. }
  1030. TEST(DBTest, IteratorPinsRef) {
  1031. Put("foo", "hello");
  1032. // Get iterator that will yield the current contents of the DB.
  1033. Iterator* iter = db_->NewIterator(ReadOptions());
  1034. // Write to force compactions
  1035. Put("foo", "newvalue1");
  1036. for (int i = 0; i < 100; i++) {
  1037. ASSERT_OK(Put(Key(i), Key(i) + std::string(100000, 'v'))); // 100K values
  1038. }
  1039. Put("foo", "newvalue2");
  1040. iter->SeekToFirst();
  1041. ASSERT_TRUE(iter->Valid());
  1042. ASSERT_EQ("foo", iter->key().ToString());
  1043. ASSERT_EQ("hello", iter->value().ToString());
  1044. iter->Next();
  1045. ASSERT_TRUE(!iter->Valid());
  1046. delete iter;
  1047. }
  1048. TEST(DBTest, Snapshot) {
  1049. do {
  1050. Put("foo", "v1");
  1051. const Snapshot* s1 = db_->GetSnapshot();
  1052. Put("foo", "v2");
  1053. const Snapshot* s2 = db_->GetSnapshot();
  1054. Put("foo", "v3");
  1055. const Snapshot* s3 = db_->GetSnapshot();
  1056. Put("foo", "v4");
  1057. ASSERT_EQ("v1", Get("foo", s1));
  1058. ASSERT_EQ("v2", Get("foo", s2));
  1059. ASSERT_EQ("v3", Get("foo", s3));
  1060. ASSERT_EQ("v4", Get("foo"));
  1061. db_->ReleaseSnapshot(s3);
  1062. ASSERT_EQ("v1", Get("foo", s1));
  1063. ASSERT_EQ("v2", Get("foo", s2));
  1064. ASSERT_EQ("v4", Get("foo"));
  1065. db_->ReleaseSnapshot(s1);
  1066. ASSERT_EQ("v2", Get("foo", s2));
  1067. ASSERT_EQ("v4", Get("foo"));
  1068. db_->ReleaseSnapshot(s2);
  1069. ASSERT_EQ("v4", Get("foo"));
  1070. } while (ChangeOptions());
  1071. }
  1072. TEST(DBTest, HiddenValuesAreRemoved) {
  1073. do {
  1074. Random rnd(301);
  1075. FillLevels("a", "z");
  1076. std::string big = RandomString(&rnd, 50000);
  1077. Put("foo", big);
  1078. Put("pastfoo", "v");
  1079. const Snapshot* snapshot = db_->GetSnapshot();
  1080. Put("foo", "tiny");
  1081. Put("pastfoo2", "v2"); // Advance sequence number one more
  1082. ASSERT_OK(dbfull()->TEST_CompactMemTable());
  1083. ASSERT_GT(NumTableFilesAtLevel(0), 0);
  1084. ASSERT_EQ(big, Get("foo", snapshot));
  1085. ASSERT_TRUE(Between(Size("", "pastfoo"), 50000, 60000));
  1086. db_->ReleaseSnapshot(snapshot);
  1087. ASSERT_EQ(AllEntriesFor("foo"), "[ tiny, " + big + " ]");
  1088. Slice x("x");
  1089. dbfull()->TEST_CompactRange(0, NULL, &x);
  1090. ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]");
  1091. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  1092. ASSERT_GE(NumTableFilesAtLevel(1), 1);
  1093. dbfull()->TEST_CompactRange(1, NULL, &x);
  1094. ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]");
  1095. ASSERT_TRUE(Between(Size("", "pastfoo"), 0, 1000));
  1096. } while (ChangeOptions());
  1097. }
  1098. TEST(DBTest, DeletionMarkers1) {
  1099. Put("foo", "v1");
  1100. ASSERT_OK(dbfull()->TEST_CompactMemTable());
  1101. const int last = config::kMaxMemCompactLevel;
  1102. ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo => v1 is now in last level
  1103. // Place a table at level last-1 to prevent merging with preceding mutation
  1104. Put("a", "begin");
  1105. Put("z", "end");
  1106. dbfull()->TEST_CompactMemTable();
  1107. ASSERT_EQ(NumTableFilesAtLevel(last), 1);
  1108. ASSERT_EQ(NumTableFilesAtLevel(last-1), 1);
  1109. Delete("foo");
  1110. Put("foo", "v2");
  1111. ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]");
  1112. ASSERT_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2
  1113. ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]");
  1114. Slice z("z");
  1115. dbfull()->TEST_CompactRange(last-2, NULL, &z);
  1116. // DEL eliminated, but v1 remains because we aren't compacting that level
  1117. // (DEL can be eliminated because v2 hides v1).
  1118. ASSERT_EQ(AllEntriesFor("foo"), "[ v2, v1 ]");
  1119. dbfull()->TEST_CompactRange(last-1, NULL, NULL);
  1120. // Merging last-1 w/ last, so we are the base level for "foo", so
  1121. // DEL is removed. (as is v1).
  1122. ASSERT_EQ(AllEntriesFor("foo"), "[ v2 ]");
  1123. }
  1124. TEST(DBTest, DeletionMarkers2) {
  1125. Put("foo", "v1");
  1126. ASSERT_OK(dbfull()->TEST_CompactMemTable());
  1127. const int last = config::kMaxMemCompactLevel;
  1128. ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo => v1 is now in last level
  1129. // Place a table at level last-1 to prevent merging with preceding mutation
  1130. Put("a", "begin");
  1131. Put("z", "end");
  1132. dbfull()->TEST_CompactMemTable();
  1133. ASSERT_EQ(NumTableFilesAtLevel(last), 1);
  1134. ASSERT_EQ(NumTableFilesAtLevel(last-1), 1);
  1135. Delete("foo");
  1136. ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
  1137. ASSERT_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2
  1138. ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
  1139. dbfull()->TEST_CompactRange(last-2, NULL, NULL);
  1140. // DEL kept: "last" file overlaps
  1141. ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
  1142. dbfull()->TEST_CompactRange(last-1, NULL, NULL);
  1143. // Merging last-1 w/ last, so we are the base level for "foo", so
  1144. // DEL is removed. (as is v1).
  1145. ASSERT_EQ(AllEntriesFor("foo"), "[ ]");
  1146. }
  1147. TEST(DBTest, OverlapInLevel0) {
  1148. do {
  1149. ASSERT_EQ(config::kMaxMemCompactLevel, 2) << "Fix test to match config";
  1150. // Fill levels 1 and 2 to disable the pushing of new memtables to levels > 0.
  1151. ASSERT_OK(Put("100", "v100"));
  1152. ASSERT_OK(Put("999", "v999"));
  1153. dbfull()->TEST_CompactMemTable();
  1154. ASSERT_OK(Delete("100"));
  1155. ASSERT_OK(Delete("999"));
  1156. dbfull()->TEST_CompactMemTable();
  1157. ASSERT_EQ("0,1,1", FilesPerLevel());
  1158. // Make files spanning the following ranges in level-0:
  1159. // files[0] 200 .. 900
  1160. // files[1] 300 .. 500
  1161. // Note that files are sorted by smallest key.
  1162. ASSERT_OK(Put("300", "v300"));
  1163. ASSERT_OK(Put("500", "v500"));
  1164. dbfull()->TEST_CompactMemTable();
  1165. ASSERT_OK(Put("200", "v200"));
  1166. ASSERT_OK(Put("600", "v600"));
  1167. ASSERT_OK(Put("900", "v900"));
  1168. dbfull()->TEST_CompactMemTable();
  1169. ASSERT_EQ("2,1,1", FilesPerLevel());
  1170. // Compact away the placeholder files we created initially
  1171. dbfull()->TEST_CompactRange(1, NULL, NULL);
  1172. dbfull()->TEST_CompactRange(2, NULL, NULL);
  1173. ASSERT_EQ("2", FilesPerLevel());
  1174. // Do a memtable compaction. Before bug-fix, the compaction would
  1175. // not detect the overlap with level-0 files and would incorrectly place
  1176. // the deletion in a deeper level.
  1177. ASSERT_OK(Delete("600"));
  1178. dbfull()->TEST_CompactMemTable();
  1179. ASSERT_EQ("3", FilesPerLevel());
  1180. ASSERT_EQ("NOT_FOUND", Get("600"));
  1181. } while (ChangeOptions());
  1182. }
  1183. TEST(DBTest, L0_CompactionBug_Issue44_a) {
  1184. Reopen();
  1185. ASSERT_OK(Put("b", "v"));
  1186. Reopen();
  1187. ASSERT_OK(Delete("b"));
  1188. ASSERT_OK(Delete("a"));
  1189. Reopen();
  1190. ASSERT_OK(Delete("a"));
  1191. Reopen();
  1192. ASSERT_OK(Put("a", "v"));
  1193. Reopen();
  1194. Reopen();
  1195. ASSERT_EQ("(a->v)", Contents());
  1196. DelayMilliseconds(1000); // Wait for compaction to finish
  1197. ASSERT_EQ("(a->v)", Contents());
  1198. }
  1199. TEST(DBTest, L0_CompactionBug_Issue44_b) {
  1200. Reopen();
  1201. Put("","");
  1202. Reopen();
  1203. Delete("e");
  1204. Put("","");
  1205. Reopen();
  1206. Put("c", "cv");
  1207. Reopen();
  1208. Put("","");
  1209. Reopen();
  1210. Put("","");
  1211. DelayMilliseconds(1000); // Wait for compaction to finish
  1212. Reopen();
  1213. Put("d","dv");
  1214. Reopen();
  1215. Put("","");
  1216. Reopen();
  1217. Delete("d");
  1218. Delete("b");
  1219. Reopen();
  1220. ASSERT_EQ("(->)(c->cv)", Contents());
  1221. DelayMilliseconds(1000); // Wait for compaction to finish
  1222. ASSERT_EQ("(->)(c->cv)", Contents());
  1223. }
  1224. TEST(DBTest, ComparatorCheck) {
  1225. class NewComparator : public Comparator {
  1226. public:
  1227. virtual const char* Name() const { return "leveldb.NewComparator"; }
  1228. virtual int Compare(const Slice& a, const Slice& b) const {
  1229. return BytewiseComparator()->Compare(a, b);
  1230. }
  1231. virtual void FindShortestSeparator(std::string* s, const Slice& l) const {
  1232. BytewiseComparator()->FindShortestSeparator(s, l);
  1233. }
  1234. virtual void FindShortSuccessor(std::string* key) const {
  1235. BytewiseComparator()->FindShortSuccessor(key);
  1236. }
  1237. };
  1238. NewComparator cmp;
  1239. Options new_options = CurrentOptions();
  1240. new_options.comparator = &cmp;
  1241. Status s = TryReopen(&new_options);
  1242. ASSERT_TRUE(!s.ok());
  1243. ASSERT_TRUE(s.ToString().find("comparator") != std::string::npos)
  1244. << s.ToString();
  1245. }
  1246. TEST(DBTest, CustomComparator) {
  1247. class NumberComparator : public Comparator {
  1248. public:
  1249. virtual const char* Name() const { return "test.NumberComparator"; }
  1250. virtual int Compare(const Slice& a, const Slice& b) const {
  1251. return ToNumber(a) - ToNumber(b);
  1252. }
  1253. virtual void FindShortestSeparator(std::string* s, const Slice& l) const {
  1254. ToNumber(*s); // Check format
  1255. ToNumber(l); // Check format
  1256. }
  1257. virtual void FindShortSuccessor(std::string* key) const {
  1258. ToNumber(*key); // Check format
  1259. }
  1260. private:
  1261. static int ToNumber(const Slice& x) {
  1262. // Check that there are no extra characters.
  1263. ASSERT_TRUE(x.size() >= 2 && x[0] == '[' && x[x.size()-1] == ']')
  1264. << EscapeString(x);
  1265. int val;
  1266. char ignored;
  1267. ASSERT_TRUE(sscanf(x.ToString().c_str(), "[%i]%c", &val, &ignored) == 1)
  1268. << EscapeString(x);
  1269. return val;
  1270. }
  1271. };
  1272. NumberComparator cmp;
  1273. Options new_options = CurrentOptions();
  1274. new_options.create_if_missing = true;
  1275. new_options.comparator = &cmp;
  1276. new_options.filter_policy = NULL; // Cannot use bloom filters
  1277. new_options.write_buffer_size = 1000; // Compact more often
  1278. DestroyAndReopen(&new_options);
  1279. ASSERT_OK(Put("[10]", "ten"));
  1280. ASSERT_OK(Put("[0x14]", "twenty"));
  1281. for (int i = 0; i < 2; i++) {
  1282. ASSERT_EQ("ten", Get("[10]"));
  1283. ASSERT_EQ("ten", Get("[0xa]"));
  1284. ASSERT_EQ("twenty", Get("[20]"));
  1285. ASSERT_EQ("twenty", Get("[0x14]"));
  1286. ASSERT_EQ("NOT_FOUND", Get("[15]"));
  1287. ASSERT_EQ("NOT_FOUND", Get("[0xf]"));
  1288. Compact("[0]", "[9999]");
  1289. }
  1290. for (int run = 0; run < 2; run++) {
  1291. for (int i = 0; i < 1000; i++) {
  1292. char buf[100];
  1293. snprintf(buf, sizeof(buf), "[%d]", i*10);
  1294. ASSERT_OK(Put(buf, buf));
  1295. }
  1296. Compact("[0]", "[1000000]");
  1297. }
  1298. }
  1299. TEST(DBTest, ManualCompaction) {
  1300. ASSERT_EQ(config::kMaxMemCompactLevel, 2)
  1301. << "Need to update this test to match kMaxMemCompactLevel";
  1302. MakeTables(3, "p", "q");
  1303. ASSERT_EQ("1,1,1", FilesPerLevel());
  1304. // Compaction range falls before files
  1305. Compact("", "c");
  1306. ASSERT_EQ("1,1,1", FilesPerLevel());
  1307. // Compaction range falls after files
  1308. Compact("r", "z");
  1309. ASSERT_EQ("1,1,1", FilesPerLevel());
  1310. // Compaction range overlaps files
  1311. Compact("p1", "p9");
  1312. ASSERT_EQ("0,0,1", FilesPerLevel());
  1313. // Populate a different range
  1314. MakeTables(3, "c", "e");
  1315. ASSERT_EQ("1,1,2", FilesPerLevel());
  1316. // Compact just the new range
  1317. Compact("b", "f");
  1318. ASSERT_EQ("0,0,2", FilesPerLevel());
  1319. // Compact all
  1320. MakeTables(1, "a", "z");
  1321. ASSERT_EQ("0,1,2", FilesPerLevel());
  1322. db_->CompactRange(NULL, NULL);
  1323. ASSERT_EQ("0,0,1", FilesPerLevel());
  1324. }
  1325. TEST(DBTest, DBOpen_Options) {
  1326. std::string dbname = test::TmpDir() + "/db_options_test";
  1327. DestroyDB(dbname, Options());
  1328. // Does not exist, and create_if_missing == false: error
  1329. DB* db = NULL;
  1330. Options opts;
  1331. opts.create_if_missing = false;
  1332. Status s = DB::Open(opts, dbname, &db);
  1333. ASSERT_TRUE(strstr(s.ToString().c_str(), "does not exist") != NULL);
  1334. ASSERT_TRUE(db == NULL);
  1335. // Does not exist, and create_if_missing == true: OK
  1336. opts.create_if_missing = true;
  1337. s = DB::Open(opts, dbname, &db);
  1338. ASSERT_OK(s);
  1339. ASSERT_TRUE(db != NULL);
  1340. delete db;
  1341. db = NULL;
  1342. // Does exist, and error_if_exists == true: error
  1343. opts.create_if_missing = false;
  1344. opts.error_if_exists = true;
  1345. s = DB::Open(opts, dbname, &db);
  1346. ASSERT_TRUE(strstr(s.ToString().c_str(), "exists") != NULL);
  1347. ASSERT_TRUE(db == NULL);
  1348. // Does exist, and error_if_exists == false: OK
  1349. opts.create_if_missing = true;
  1350. opts.error_if_exists = false;
  1351. s = DB::Open(opts, dbname, &db);
  1352. ASSERT_OK(s);
  1353. ASSERT_TRUE(db != NULL);
  1354. delete db;
  1355. db = NULL;
  1356. }
  1357. TEST(DBTest, Locking) {
  1358. DB* db2 = NULL;
  1359. Status s = DB::Open(CurrentOptions(), dbname_, &db2);
  1360. ASSERT_TRUE(!s.ok()) << "Locking did not prevent re-opening db";
  1361. }
  1362. // Check that number of files does not grow when we are out of space
  1363. TEST(DBTest, NoSpace) {
  1364. Options options = CurrentOptions();
  1365. options.env = env_;
  1366. Reopen(&options);
  1367. ASSERT_OK(Put("foo", "v1"));
  1368. ASSERT_EQ("v1", Get("foo"));
  1369. Compact("a", "z");
  1370. const int num_files = CountFiles();
  1371. env_->no_space_.Release_Store(env_); // Force out-of-space errors
  1372. for (int i = 0; i < 10; i++) {
  1373. for (int level = 0; level < config::kNumLevels-1; level++) {
  1374. dbfull()->TEST_CompactRange(level, NULL, NULL);
  1375. }
  1376. }
  1377. env_->no_space_.Release_Store(NULL);
  1378. ASSERT_LT(CountFiles(), num_files + 3);
  1379. }
  1380. TEST(DBTest, NonWritableFileSystem) {
  1381. Options options = CurrentOptions();
  1382. options.write_buffer_size = 1000;
  1383. options.env = env_;
  1384. Reopen(&options);
  1385. ASSERT_OK(Put("foo", "v1"));
  1386. env_->non_writable_.Release_Store(env_); // Force errors for new files
  1387. std::string big(100000, 'x');
  1388. int errors = 0;
  1389. for (int i = 0; i < 20; i++) {
  1390. fprintf(stderr, "iter %d; errors %d\n", i, errors);
  1391. if (!Put("foo", big).ok()) {
  1392. errors++;
  1393. DelayMilliseconds(100);
  1394. }
  1395. }
  1396. ASSERT_GT(errors, 0);
  1397. env_->non_writable_.Release_Store(NULL);
  1398. }
  1399. TEST(DBTest, WriteSyncError) {
  1400. // Check that log sync errors cause the DB to disallow future writes.
  1401. // (a) Cause log sync calls to fail
  1402. Options options = CurrentOptions();
  1403. options.env = env_;
  1404. Reopen(&options);
  1405. env_->data_sync_error_.Release_Store(env_);
  1406. // (b) Normal write should succeed
  1407. WriteOptions w;
  1408. ASSERT_OK(db_->Put(w, "k1", "v1"));
  1409. ASSERT_EQ("v1", Get("k1"));
  1410. // (c) Do a sync write; should fail
  1411. w.sync = true;
  1412. ASSERT_TRUE(!db_->Put(w, "k2", "v2").ok());
  1413. ASSERT_EQ("v1", Get("k1"));
  1414. ASSERT_EQ("NOT_FOUND", Get("k2"));
  1415. // (d) make sync behave normally
  1416. env_->data_sync_error_.Release_Store(NULL);
  1417. // (e) Do a non-sync write; should fail
  1418. w.sync = false;
  1419. ASSERT_TRUE(!db_->Put(w, "k3", "v3").ok());
  1420. ASSERT_EQ("v1", Get("k1"));
  1421. ASSERT_EQ("NOT_FOUND", Get("k2"));
  1422. ASSERT_EQ("NOT_FOUND", Get("k3"));
  1423. }
  1424. TEST(DBTest, ManifestWriteError) {
  1425. // Test for the following problem:
  1426. // (a) Compaction produces file F
  1427. // (b) Log record containing F is written to MANIFEST file, but Sync() fails
  1428. // (c) GC deletes F
  1429. // (d) After reopening DB, reads fail since deleted F is named in log record
  1430. // We iterate twice. In the second iteration, everything is the
  1431. // same except the log record never makes it to the MANIFEST file.
  1432. for (int iter = 0; iter < 2; iter++) {
  1433. port::AtomicPointer* error_type = (iter == 0)
  1434. ? &env_->manifest_sync_error_
  1435. : &env_->manifest_write_error_;
  1436. // Insert foo=>bar mapping
  1437. Options options = CurrentOptions();
  1438. options.env = env_;
  1439. options.create_if_missing = true;
  1440. options.error_if_exists = false;
  1441. DestroyAndReopen(&options);
  1442. ASSERT_OK(Put("foo", "bar"));
  1443. ASSERT_EQ("bar", Get("foo"));
  1444. // Memtable compaction (will succeed)
  1445. dbfull()->TEST_CompactMemTable();
  1446. ASSERT_EQ("bar", Get("foo"));
  1447. const int last = config::kMaxMemCompactLevel;
  1448. ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo=>bar is now in last level
  1449. // Merging compaction (will fail)
  1450. error_type->Release_Store(env_);
  1451. dbfull()->TEST_CompactRange(last, NULL, NULL); // Should fail
  1452. ASSERT_EQ("bar", Get("foo"));
  1453. // Recovery: should not lose data
  1454. error_type->Release_Store(NULL);
  1455. Reopen(&options);
  1456. ASSERT_EQ("bar", Get("foo"));
  1457. }
  1458. }
  1459. TEST(DBTest, MissingSSTFile) {
  1460. ASSERT_OK(Put("foo", "bar"));
  1461. ASSERT_EQ("bar", Get("foo"));
  1462. // Dump the memtable to disk.
  1463. dbfull()->TEST_CompactMemTable();
  1464. ASSERT_EQ("bar", Get("foo"));
  1465. Close();
  1466. ASSERT_TRUE(DeleteAnSSTFile());
  1467. Options options = CurrentOptions();
  1468. options.paranoid_checks = true;
  1469. Status s = TryReopen(&options);
  1470. ASSERT_TRUE(!s.ok());
  1471. ASSERT_TRUE(s.ToString().find("issing") != std::string::npos)
  1472. << s.ToString();
  1473. }
  1474. TEST(DBTest, StillReadSST) {
  1475. ASSERT_OK(Put("foo", "bar"));
  1476. ASSERT_EQ("bar", Get("foo"));
  1477. // Dump the memtable to disk.
  1478. dbfull()->TEST_CompactMemTable();
  1479. ASSERT_EQ("bar", Get("foo"));
  1480. Close();
  1481. ASSERT_GT(RenameLDBToSST(), 0);
  1482. Options options = CurrentOptions();
  1483. options.paranoid_checks = true;
  1484. Status s = TryReopen(&options);
  1485. ASSERT_TRUE(s.ok());
  1486. ASSERT_EQ("bar", Get("foo"));
  1487. }
  1488. TEST(DBTest, FilesDeletedAfterCompaction) {
  1489. ASSERT_OK(Put("foo", "v2"));
  1490. Compact("a", "z");
  1491. const int num_files = CountFiles();
  1492. for (int i = 0; i < 10; i++) {
  1493. ASSERT_OK(Put("foo", "v2"));
  1494. Compact("a", "z");
  1495. }
  1496. ASSERT_EQ(CountFiles(), num_files);
  1497. }
  1498. TEST(DBTest, BloomFilter) {
  1499. env_->count_random_reads_ = true;
  1500. Options options = CurrentOptions();
  1501. options.env = env_;
  1502. options.block_cache = NewLRUCache(0); // Prevent cache hits
  1503. options.filter_policy = NewBloomFilterPolicy(10);
  1504. Reopen(&options);
  1505. // Populate multiple layers
  1506. const int N = 10000;
  1507. for (int i = 0; i < N; i++) {
  1508. ASSERT_OK(Put(Key(i), Key(i)));
  1509. }
  1510. Compact("a", "z");
  1511. for (int i = 0; i < N; i += 100) {
  1512. ASSERT_OK(Put(Key(i), Key(i)));
  1513. }
  1514. dbfull()->TEST_CompactMemTable();
  1515. // Prevent auto compactions triggered by seeks
  1516. env_->delay_data_sync_.Release_Store(env_);
  1517. // Lookup present keys. Should rarely read from small sstable.
  1518. env_->random_read_counter_.Reset();
  1519. for (int i = 0; i < N; i++) {
  1520. ASSERT_EQ(Key(i), Get(Key(i)));
  1521. }
  1522. int reads = env_->random_read_counter_.Read();
  1523. fprintf(stderr, "%d present => %d reads\n", N, reads);
  1524. ASSERT_GE(reads, N);
  1525. ASSERT_LE(reads, N + 2*N/100);
  1526. // Lookup present keys. Should rarely read from either sstable.
  1527. env_->random_read_counter_.Reset();
  1528. for (int i = 0; i < N; i++) {
  1529. ASSERT_EQ("NOT_FOUND", Get(Key(i) + ".missing"));
  1530. }
  1531. reads = env_->random_read_counter_.Read();
  1532. fprintf(stderr, "%d missing => %d reads\n", N, reads);
  1533. ASSERT_LE(reads, 3*N/100);
  1534. env_->delay_data_sync_.Release_Store(NULL);
  1535. Close();
  1536. delete options.block_cache;
  1537. delete options.filter_policy;
  1538. }
  1539. // Multi-threaded test:
  1540. namespace {
  1541. static const int kNumThreads = 4;
  1542. static const int kTestSeconds = 10;
  1543. static const int kNumKeys = 1000;
  1544. struct MTState {
  1545. DBTest* test;
  1546. port::AtomicPointer stop;
  1547. port::AtomicPointer counter[kNumThreads];
  1548. port::AtomicPointer thread_done[kNumThreads];
  1549. };
  1550. struct MTThread {
  1551. MTState* state;
  1552. int id;
  1553. };
  1554. static void MTThreadBody(void* arg) {
  1555. MTThread* t = reinterpret_cast<MTThread*>(arg);
  1556. int id = t->id;
  1557. DB* db = t->state->test->db_;
  1558. uintptr_t counter = 0;
  1559. fprintf(stderr, "... starting thread %d\n", id);
  1560. Random rnd(1000 + id);
  1561. std::string value;
  1562. char valbuf[1500];
  1563. while (t->state->stop.Acquire_Load() == NULL) {
  1564. t->state->counter[id].Release_Store(reinterpret_cast<void*>(counter));
  1565. int key = rnd.Uniform(kNumKeys);
  1566. char keybuf[20];
  1567. snprintf(keybuf, sizeof(keybuf), "%016d", key);
  1568. if (rnd.OneIn(2)) {
  1569. // Write values of the form <key, my id, counter>.
  1570. // We add some padding for force compactions.
  1571. snprintf(valbuf, sizeof(valbuf), "%d.%d.%-1000d",
  1572. key, id, static_cast<int>(counter));
  1573. ASSERT_OK(db->Put(WriteOptions(), Slice(keybuf), Slice(valbuf)));
  1574. } else {
  1575. // Read a value and verify that it matches the pattern written above.
  1576. Status s = db->Get(ReadOptions(), Slice(keybuf), &value);
  1577. if (s.IsNotFound()) {
  1578. // Key has not yet been written
  1579. } else {
  1580. // Check that the writer thread counter is >= the counter in the value
  1581. ASSERT_OK(s);
  1582. int k, w, c;
  1583. ASSERT_EQ(3, sscanf(value.c_str(), "%d.%d.%d", &k, &w, &c)) << value;
  1584. ASSERT_EQ(k, key);
  1585. ASSERT_GE(w, 0);
  1586. ASSERT_LT(w, kNumThreads);
  1587. ASSERT_LE(static_cast<uintptr_t>(c), reinterpret_cast<uintptr_t>(
  1588. t->state->counter[w].Acquire_Load()));
  1589. }
  1590. }
  1591. counter++;
  1592. }
  1593. t->state->thread_done[id].Release_Store(t);
  1594. fprintf(stderr, "... stopping thread %d after %d ops\n", id, int(counter));
  1595. }
  1596. } // namespace
  1597. TEST(DBTest, MultiThreaded) {
  1598. do {
  1599. // Initialize state
  1600. MTState mt;
  1601. mt.test = this;
  1602. mt.stop.Release_Store(0);
  1603. for (int id = 0; id < kNumThreads; id++) {
  1604. mt.counter[id].Release_Store(0);
  1605. mt.thread_done[id].Release_Store(0);
  1606. }
  1607. // Start threads
  1608. MTThread thread[kNumThreads];
  1609. for (int id = 0; id < kNumThreads; id++) {
  1610. thread[id].state = &mt;
  1611. thread[id].id = id;
  1612. env_->StartThread(MTThreadBody, &thread[id]);
  1613. }
  1614. // Let them run for a while
  1615. DelayMilliseconds(kTestSeconds * 1000);
  1616. // Stop the threads and wait for them to finish
  1617. mt.stop.Release_Store(&mt);
  1618. for (int id = 0; id < kNumThreads; id++) {
  1619. while (mt.thread_done[id].Acquire_Load() == NULL) {
  1620. DelayMilliseconds(100);
  1621. }
  1622. }
  1623. } while (ChangeOptions());
  1624. }
  1625. namespace {
  1626. typedef std::map<std::string, std::string> KVMap;
  1627. }
  1628. class ModelDB: public DB {
  1629. public:
  1630. class ModelSnapshot : public Snapshot {
  1631. public:
  1632. KVMap map_;
  1633. };
  1634. explicit ModelDB(const Options& options): options_(options) { }
  1635. ~ModelDB() { }
  1636. virtual Status Put(const WriteOptions& o, const Slice& k, const Slice& v) {
  1637. return DB::Put(o, k, v);
  1638. }
  1639. virtual Status Delete(const WriteOptions& o, const Slice& key) {
  1640. return DB::Delete(o, key);
  1641. }
  1642. virtual Status Get(const ReadOptions& options,
  1643. const Slice& key, std::string* value) {
  1644. assert(false); // Not implemented
  1645. return Status::NotFound(key);
  1646. }
  1647. virtual Iterator* NewIterator(const ReadOptions& options) {
  1648. if (options.snapshot == NULL) {
  1649. KVMap* saved = new KVMap;
  1650. *saved = map_;
  1651. return new ModelIter(saved, true);
  1652. } else {
  1653. const KVMap* snapshot_state =
  1654. &(reinterpret_cast<const ModelSnapshot*>(options.snapshot)->map_);
  1655. return new ModelIter(snapshot_state, false);
  1656. }
  1657. }
  1658. virtual const Snapshot* GetSnapshot() {
  1659. ModelSnapshot* snapshot = new ModelSnapshot;
  1660. snapshot->map_ = map_;
  1661. return snapshot;
  1662. }
  1663. virtual void ReleaseSnapshot(const Snapshot* snapshot) {
  1664. delete reinterpret_cast<const ModelSnapshot*>(snapshot);
  1665. }
  1666. virtual Status Write(const WriteOptions& options, WriteBatch* batch) {
  1667. class Handler : public WriteBatch::Handler {
  1668. public:
  1669. KVMap* map_;
  1670. virtual void Put(const Slice& key, const Slice& value) {
  1671. (*map_)[key.ToString()] = value.ToString();
  1672. }
  1673. virtual void Delete(const Slice& key) {
  1674. map_->erase(key.ToString());
  1675. }
  1676. };
  1677. Handler handler;
  1678. handler.map_ = &map_;
  1679. return batch->Iterate(&handler);
  1680. }
  1681. virtual bool GetProperty(const Slice& property, std::string* value) {
  1682. return false;
  1683. }
  1684. virtual void GetApproximateSizes(const Range* r, int n, uint64_t* sizes) {
  1685. for (int i = 0; i < n; i++) {
  1686. sizes[i] = 0;
  1687. }
  1688. }
  1689. virtual void CompactRange(const Slice* start, const Slice* end) {
  1690. }
  1691. private:
  1692. class ModelIter: public Iterator {
  1693. public:
  1694. ModelIter(const KVMap* map, bool owned)
  1695. : map_(map), owned_(owned), iter_(map_->end()) {
  1696. }
  1697. ~ModelIter() {
  1698. if (owned_) delete map_;
  1699. }
  1700. virtual bool Valid() const { return iter_ != map_->end(); }
  1701. virtual void SeekToFirst() { iter_ = map_->begin(); }
  1702. virtual void SeekToLast() {
  1703. if (map_->empty()) {
  1704. iter_ = map_->end();
  1705. } else {
  1706. iter_ = map_->find(map_->rbegin()->first);
  1707. }
  1708. }
  1709. virtual void Seek(const Slice& k) {
  1710. iter_ = map_->lower_bound(k.ToString());
  1711. }
  1712. virtual void Next() { ++iter_; }
  1713. virtual void Prev() { --iter_; }
  1714. virtual Slice key() const { return iter_->first; }
  1715. virtual Slice value() const { return iter_->second; }
  1716. virtual Status status() const { return Status::OK(); }
  1717. private:
  1718. const KVMap* const map_;
  1719. const bool owned_; // Do we own map_
  1720. KVMap::const_iterator iter_;
  1721. };
  1722. const Options options_;
  1723. KVMap map_;
  1724. };
  1725. static std::string RandomKey(Random* rnd) {
  1726. int len = (rnd->OneIn(3)
  1727. ? 1 // Short sometimes to encourage collisions
  1728. : (rnd->OneIn(100) ? rnd->Skewed(10) : rnd->Uniform(10)));
  1729. return test::RandomKey(rnd, len);
  1730. }
  1731. static bool CompareIterators(int step,
  1732. DB* model,
  1733. DB* db,
  1734. const Snapshot* model_snap,
  1735. const Snapshot* db_snap) {
  1736. ReadOptions options;
  1737. options.snapshot = model_snap;
  1738. Iterator* miter = model->NewIterator(options);
  1739. options.snapshot = db_snap;
  1740. Iterator* dbiter = db->NewIterator(options);
  1741. bool ok = true;
  1742. int count = 0;
  1743. for (miter->SeekToFirst(), dbiter->SeekToFirst();
  1744. ok && miter->Valid() && dbiter->Valid();
  1745. miter->Next(), dbiter->Next()) {
  1746. count++;
  1747. if (miter->key().compare(dbiter->key()) != 0) {
  1748. fprintf(stderr, "step %d: Key mismatch: '%s' vs. '%s'\n",
  1749. step,
  1750. EscapeString(miter->key()).c_str(),
  1751. EscapeString(dbiter->key()).c_str());
  1752. ok = false;
  1753. break;
  1754. }
  1755. if (miter->value().compare(dbiter->value()) != 0) {
  1756. fprintf(stderr, "step %d: Value mismatch for key '%s': '%s' vs. '%s'\n",
  1757. step,
  1758. EscapeString(miter->key()).c_str(),
  1759. EscapeString(miter->value()).c_str(),
  1760. EscapeString(miter->value()).c_str());
  1761. ok = false;
  1762. }
  1763. }
  1764. if (ok) {
  1765. if (miter->Valid() != dbiter->Valid()) {
  1766. fprintf(stderr, "step %d: Mismatch at end of iterators: %d vs. %d\n",
  1767. step, miter->Valid(), dbiter->Valid());
  1768. ok = false;
  1769. }
  1770. }
  1771. fprintf(stderr, "%d entries compared: ok=%d\n", count, ok);
  1772. delete miter;
  1773. delete dbiter;
  1774. return ok;
  1775. }
  1776. TEST(DBTest, Randomized) {
  1777. Random rnd(test::RandomSeed());
  1778. do {
  1779. ModelDB model(CurrentOptions());
  1780. const int N = 10000;
  1781. const Snapshot* model_snap = NULL;
  1782. const Snapshot* db_snap = NULL;
  1783. std::string k, v;
  1784. for (int step = 0; step < N; step++) {
  1785. if (step % 100 == 0) {
  1786. fprintf(stderr, "Step %d of %d\n", step, N);
  1787. }
  1788. // TODO(sanjay): Test Get() works
  1789. int p = rnd.Uniform(100);
  1790. if (p < 45) { // Put
  1791. k = RandomKey(&rnd);
  1792. v = RandomString(&rnd,
  1793. rnd.OneIn(20)
  1794. ? 100 + rnd.Uniform(100)
  1795. : rnd.Uniform(8));
  1796. ASSERT_OK(model.Put(WriteOptions(), k, v));
  1797. ASSERT_OK(db_->Put(WriteOptions(), k, v));
  1798. } else if (p < 90) { // Delete
  1799. k = RandomKey(&rnd);
  1800. ASSERT_OK(model.Delete(WriteOptions(), k));
  1801. ASSERT_OK(db_->Delete(WriteOptions(), k));
  1802. } else { // Multi-element batch
  1803. WriteBatch b;
  1804. const int num = rnd.Uniform(8);
  1805. for (int i = 0; i < num; i++) {
  1806. if (i == 0 || !rnd.OneIn(10)) {
  1807. k = RandomKey(&rnd);
  1808. } else {
  1809. // Periodically re-use the same key from the previous iter, so
  1810. // we have multiple entries in the write batch for the same key
  1811. }
  1812. if (rnd.OneIn(2)) {
  1813. v = RandomString(&rnd, rnd.Uniform(10));
  1814. b.Put(k, v);
  1815. } else {
  1816. b.Delete(k);
  1817. }
  1818. }
  1819. ASSERT_OK(model.Write(WriteOptions(), &b));
  1820. ASSERT_OK(db_->Write(WriteOptions(), &b));
  1821. }
  1822. if ((step % 100) == 0) {
  1823. ASSERT_TRUE(CompareIterators(step, &model, db_, NULL, NULL));
  1824. ASSERT_TRUE(CompareIterators(step, &model, db_, model_snap, db_snap));
  1825. // Save a snapshot from each DB this time that we'll use next
  1826. // time we compare things, to make sure the current state is
  1827. // preserved with the snapshot
  1828. if (model_snap != NULL) model.ReleaseSnapshot(model_snap);
  1829. if (db_snap != NULL) db_->ReleaseSnapshot(db_snap);
  1830. Reopen();
  1831. ASSERT_TRUE(CompareIterators(step, &model, db_, NULL, NULL));
  1832. model_snap = model.GetSnapshot();
  1833. db_snap = db_->GetSnapshot();
  1834. }
  1835. }
  1836. if (model_snap != NULL) model.ReleaseSnapshot(model_snap);
  1837. if (db_snap != NULL) db_->ReleaseSnapshot(db_snap);
  1838. } while (ChangeOptions());
  1839. }
  1840. std::string MakeKey(unsigned int num) {
  1841. char buf[30];
  1842. snprintf(buf, sizeof(buf), "%016u", num);
  1843. return std::string(buf);
  1844. }
  1845. void BM_LogAndApply(int iters, int num_base_files) {
  1846. std::string dbname = test::TmpDir() + "/leveldb_test_benchmark";
  1847. DestroyDB(dbname, Options());
  1848. DB* db = NULL;
  1849. Options opts;
  1850. opts.create_if_missing = true;
  1851. Status s = DB::Open(opts, dbname, &db);
  1852. ASSERT_OK(s);
  1853. ASSERT_TRUE(db != NULL);
  1854. delete db;
  1855. db = NULL;
  1856. Env* env = Env::Default();
  1857. port::Mutex mu;
  1858. MutexLock l(&mu);
  1859. InternalKeyComparator cmp(BytewiseComparator());
  1860. Options options;
  1861. VersionSet vset(dbname, &options, NULL, &cmp);
  1862. bool save_manifest;
  1863. ASSERT_OK(vset.Recover(&save_manifest));
  1864. VersionEdit vbase;
  1865. uint64_t fnum = 1;
  1866. for (int i = 0; i < num_base_files; i++) {
  1867. InternalKey start(MakeKey(2*fnum), 1, kTypeValue);
  1868. InternalKey limit(MakeKey(2*fnum+1), 1, kTypeDeletion);
  1869. vbase.AddFile(2, fnum++, 1 /* file size */, start, limit);
  1870. }
  1871. ASSERT_OK(vset.LogAndApply(&vbase, &mu));
  1872. uint64_t start_micros = env->NowMicros();
  1873. for (int i = 0; i < iters; i++) {
  1874. VersionEdit vedit;
  1875. vedit.DeleteFile(2, fnum);
  1876. InternalKey start(MakeKey(2*fnum), 1, kTypeValue);
  1877. InternalKey limit(MakeKey(2*fnum+1), 1, kTypeDeletion);
  1878. vedit.AddFile(2, fnum++, 1 /* file size */, start, limit);
  1879. vset.LogAndApply(&vedit, &mu);
  1880. }
  1881. uint64_t stop_micros = env->NowMicros();
  1882. unsigned int us = stop_micros - start_micros;
  1883. char buf[16];
  1884. snprintf(buf, sizeof(buf), "%d", num_base_files);
  1885. fprintf(stderr,
  1886. "BM_LogAndApply/%-6s %8d iters : %9u us (%7.0f us / iter)\n",
  1887. buf, iters, us, ((float)us) / iters);
  1888. }
  1889. } // namespace leveldb
  1890. int main(int argc, char** argv) {
  1891. if (argc > 1 && std::string(argv[1]) == "--benchmark") {
  1892. leveldb::BM_LogAndApply(1000, 1);
  1893. leveldb::BM_LogAndApply(1000, 100);
  1894. leveldb::BM_LogAndApply(1000, 10000);
  1895. leveldb::BM_LogAndApply(100, 100000);
  1896. return 0;
  1897. }
  1898. return leveldb::test::RunAllTests();
  1899. }