1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615 |
- // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style license that can be
- // found in the LICENSE file. See the AUTHORS file for names of contributors.
- #include "db/version_set.h"
- #include <algorithm>
- #include <stdio.h>
- #include "db/filename.h"
- #include "db/log_reader.h"
- #include "db/log_writer.h"
- #include "db/memtable.h"
- #include "db/table_cache.h"
- #include "leveldb/env.h"
- #include "leveldb/table_builder.h"
- #include "table/merger.h"
- #include "table/two_level_iterator.h"
- #include "util/coding.h"
- #include "util/logging.h"
- namespace leveldb {
- static size_t TargetFileSize(const Options* options) {
- return options->max_file_size;
- }
- // Maximum bytes of overlaps in grandparent (i.e., level+2) before we
- // stop building a single file in a level->level+1 compaction.
- static int64_t MaxGrandParentOverlapBytes(const Options* options) {
- return 10 * TargetFileSize(options);
- }
- // Maximum number of bytes in all compacted files. We avoid expanding
- // the lower level file set of a compaction if it would make the
- // total compaction cover more than this many bytes.
- static int64_t ExpandedCompactionByteSizeLimit(const Options* options) {
- return 25 * TargetFileSize(options);
- }
- static double MaxBytesForLevel(const Options* options, int level) {
- // Note: the result for level zero is not really used since we set
- // the level-0 compaction threshold based on number of files.
- // Result for both level-0 and level-1
- double result = 10. * 1048576.0;
- while (level > 1) {
- result *= 10;
- level--;
- }
- return result;
- }
- static uint64_t MaxFileSizeForLevel(const Options* options, int level) {
- // We could vary per level to reduce number of files?
- return TargetFileSize(options);
- }
- static int64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
- int64_t sum = 0;
- for (size_t i = 0; i < files.size(); i++) {
- sum += files[i]->file_size;
- }
- return sum;
- }
- Version::~Version() {
- assert(refs_ == 0);
- // Remove from linked list
- prev_->next_ = next_;
- next_->prev_ = prev_;
- // Drop references to files
- for (int level = 0; level < config::kNumLevels; level++) {
- for (size_t i = 0; i < files_[level].size(); i++) {
- FileMetaData* f = files_[level][i];
- assert(f->refs > 0);
- f->refs--;
- if (f->refs <= 0) {
- delete f;
- }
- }
- }
- }
- int FindFile(const InternalKeyComparator& icmp,
- const std::vector<FileMetaData*>& files,
- const Slice& key) {
- uint32_t left = 0;
- uint32_t right = files.size();
- while (left < right) {
- uint32_t mid = (left + right) / 2;
- const FileMetaData* f = files[mid];
- if (icmp.InternalKeyComparator::Compare(f->largest.Encode(), key) < 0) {
- // Key at "mid.largest" is < "target". Therefore all
- // files at or before "mid" are uninteresting.
- left = mid + 1;
- } else {
- // Key at "mid.largest" is >= "target". Therefore all files
- // after "mid" are uninteresting.
- right = mid;
- }
- }
- return right;
- }
- static bool AfterFile(const Comparator* ucmp,
- const Slice* user_key, const FileMetaData* f) {
- // null user_key occurs before all keys and is therefore never after *f
- return (user_key != nullptr &&
- ucmp->Compare(*user_key, f->largest.user_key()) > 0);
- }
- static bool BeforeFile(const Comparator* ucmp,
- const Slice* user_key, const FileMetaData* f) {
- // null user_key occurs after all keys and is therefore never before *f
- return (user_key != nullptr &&
- ucmp->Compare(*user_key, f->smallest.user_key()) < 0);
- }
- bool SomeFileOverlapsRange(
- const InternalKeyComparator& icmp,
- bool disjoint_sorted_files,
- const std::vector<FileMetaData*>& files,
- const Slice* smallest_user_key,
- const Slice* largest_user_key) {
- const Comparator* ucmp = icmp.user_comparator();
- if (!disjoint_sorted_files) {
- // Need to check against all files
- for (size_t i = 0; i < files.size(); i++) {
- const FileMetaData* f = files[i];
- if (AfterFile(ucmp, smallest_user_key, f) ||
- BeforeFile(ucmp, largest_user_key, f)) {
- // No overlap
- } else {
- return true; // Overlap
- }
- }
- return false;
- }
- // Binary search over file list
- uint32_t index = 0;
- if (smallest_user_key != nullptr) {
- // Find the earliest possible internal key for smallest_user_key
- InternalKey small_key(*smallest_user_key, kMaxSequenceNumber,kValueTypeForSeek);
- index = FindFile(icmp, files, small_key.Encode());
- }
- if (index >= files.size()) {
- // beginning of range is after all files, so no overlap.
- return false;
- }
- return !BeforeFile(ucmp, largest_user_key, files[index]);
- }
- // An internal iterator. For a given version/level pair, yields
- // information about the files in the level. For a given entry, key()
- // is the largest key that occurs in the file, and value() is an
- // 16-byte value containing the file number and file size, both
- // encoded using EncodeFixed64.
- class Version::LevelFileNumIterator : public Iterator {
- public:
- LevelFileNumIterator(const InternalKeyComparator& icmp,
- const std::vector<FileMetaData*>* flist)
- : icmp_(icmp),
- flist_(flist),
- index_(flist->size()) { // Marks as invalid
- }
- virtual bool Valid() const {
- return index_ < flist_->size();
- }
- virtual void Seek(const Slice& target) {
- index_ = FindFile(icmp_, *flist_, target);
- }
- virtual void SeekToFirst() { index_ = 0; }
- virtual void SeekToLast() {
- index_ = flist_->empty() ? 0 : flist_->size() - 1;
- }
- virtual void Next() {
- assert(Valid());
- index_++;
- }
- virtual void Prev() {
- assert(Valid());
- if (index_ == 0) {
- index_ = flist_->size(); // Marks as invalid
- } else {
- index_--;
- }
- }
- Slice key() const {
- assert(Valid());
- return (*flist_)[index_]->largest.Encode();
- }
- Slice value() const {
- assert(Valid());
- EncodeFixed64(value_buf_, (*flist_)[index_]->number);
- EncodeFixed64(value_buf_+8, (*flist_)[index_]->file_size);
- return Slice(value_buf_, sizeof(value_buf_));
- }
- virtual Status status() const { return Status::OK(); }
- private:
- const InternalKeyComparator icmp_;
- const std::vector<FileMetaData*>* const flist_;
- uint32_t index_;
- // Backing store for value(). Holds the file number and size.
- mutable char value_buf_[16];
- };
- static Iterator* GetFileIterator(void* arg,
- const ReadOptions& options,
- const Slice& file_value) {
- TableCache* cache = reinterpret_cast<TableCache*>(arg);
- if (file_value.size() != 16) {
- return NewErrorIterator(
- Status::Corruption("FileReader invoked with unexpected value"));
- } else {
- return cache->NewIterator(options,
- DecodeFixed64(file_value.data()),
- DecodeFixed64(file_value.data() + 8));
- }
- }
- Iterator* Version::NewConcatenatingIterator(const ReadOptions& options,
- int level) const {
- return NewTwoLevelIterator(
- new LevelFileNumIterator(vset_->icmp_, &files_[level]),
- &GetFileIterator, vset_->table_cache_, options);
- }
- void Version::AddIterators(const ReadOptions& options,
- std::vector<Iterator*>* iters) {
- // Merge all level zero files together since they may overlap
- for (size_t i = 0; i < files_[0].size(); i++) {
- iters->push_back(
- vset_->table_cache_->NewIterator(
- options, files_[0][i]->number, files_[0][i]->file_size));
- }
- // For levels > 0, we can use a concatenating iterator that sequentially
- // walks through the non-overlapping files in the level, opening them
- // lazily.
- for (int level = 1; level < config::kNumLevels; level++) {
- if (!files_[level].empty()) {
- iters->push_back(NewConcatenatingIterator(options, level));
- }
- }
- }
- // Callback from TableCache::Get()
- namespace {
- enum SaverState {
- kNotFound,
- kFound,
- kDeleted,
- kCorrupt,
- };
- struct Saver {
- SaverState state;
- const Comparator* ucmp;
- Slice user_key;
- std::string* value;
- };
- }
- static void SaveValue(void* arg, const Slice& ikey, const Slice& v) {
- Saver* s = reinterpret_cast<Saver*>(arg);
- ParsedInternalKey parsed_key;
- if (!ParseInternalKey(ikey, &parsed_key)) {
- s->state = kCorrupt;
- } else {
- if (s->ucmp->Compare(parsed_key.user_key, s->user_key) == 0) {
- s->state = (parsed_key.type == kTypeValue) ? kFound : kDeleted;
- if (s->state == kFound) {
- s->value->assign(v.data(), v.size());
- }
- }
- }
- }
- static bool NewestFirst(FileMetaData* a, FileMetaData* b) {
- return a->number > b->number;
- }
- void Version::ForEachOverlapping(Slice user_key, Slice internal_key,
- void* arg,
- bool (*func)(void*, int, FileMetaData*)) {
- // TODO(sanjay): Change Version::Get() to use this function.
- const Comparator* ucmp = vset_->icmp_.user_comparator();
- // Search level-0 in order from newest to oldest.
- std::vector<FileMetaData*> tmp;
- tmp.reserve(files_[0].size());
- for (uint32_t i = 0; i < files_[0].size(); i++) {
- FileMetaData* f = files_[0][i];
- if (ucmp->Compare(user_key, f->smallest.user_key()) >= 0 &&
- ucmp->Compare(user_key, f->largest.user_key()) <= 0) {
- tmp.push_back(f);
- }
- }
- if (!tmp.empty()) {
- std::sort(tmp.begin(), tmp.end(), NewestFirst);
- for (uint32_t i = 0; i < tmp.size(); i++) {
- if (!(*func)(arg, 0, tmp[i])) {
- return;
- }
- }
- }
- // Search other levels.
- for (int level = 1; level < config::kNumLevels; level++) {
- size_t num_files = files_[level].size();
- if (num_files == 0) continue;
- // Binary search to find earliest index whose largest key >= internal_key.
- uint32_t index = FindFile(vset_->icmp_, files_[level], internal_key);
- if (index < num_files) {
- FileMetaData* f = files_[level][index];
- if (ucmp->Compare(user_key, f->smallest.user_key()) < 0) {
- // All of "f" is past any data for user_key
- } else {
- if (!(*func)(arg, level, f)) {
- return;
- }
- }
- }
- }
- }
- Status Version::Get(const ReadOptions& options,
- const LookupKey& k,
- std::string* value,
- GetStats* stats) {
- Slice ikey = k.internal_key();
- Slice user_key = k.user_key();
- const Comparator* ucmp = vset_->icmp_.user_comparator();
- Status s;
- stats->seek_file = nullptr;
- stats->seek_file_level = -1;
- FileMetaData* last_file_read = nullptr;
- int last_file_read_level = -1;
- // We can search level-by-level since entries never hop across
- // levels. Therefore we are guaranteed that if we find data
- // in a smaller level, later levels are irrelevant.
- std::vector<FileMetaData*> tmp;
- FileMetaData* tmp2;
- for (int level = 0; level < config::kNumLevels; level++) {
- size_t num_files = files_[level].size();
- if (num_files == 0) continue;
- // Get the list of files to search in this level
- FileMetaData* const* files = &files_[level][0];
- if (level == 0) {
- // Level-0 files may overlap each other. Find all files that
- // overlap user_key and process them in order from newest to oldest.
- tmp.reserve(num_files);
- for (uint32_t i = 0; i < num_files; i++) {
- FileMetaData* f = files[i];
- if (ucmp->Compare(user_key, f->smallest.user_key()) >= 0 &&
- ucmp->Compare(user_key, f->largest.user_key()) <= 0) {
- tmp.push_back(f);
- }
- }
- if (tmp.empty()) continue;
- std::sort(tmp.begin(), tmp.end(), NewestFirst);
- files = &tmp[0];
- num_files = tmp.size();
- } else {
- // Binary search to find earliest index whose largest key >= ikey.
- uint32_t index = FindFile(vset_->icmp_, files_[level], ikey);
- if (index >= num_files) {
- files = nullptr;
- num_files = 0;
- } else {
- tmp2 = files[index];
- if (ucmp->Compare(user_key, tmp2->smallest.user_key()) < 0) {
- // All of "tmp2" is past any data for user_key
- files = nullptr;
- num_files = 0;
- } else {
- files = &tmp2;
- num_files = 1;
- }
- }
- }
- for (uint32_t i = 0; i < num_files; ++i) {
- if (last_file_read != nullptr && stats->seek_file == nullptr) {
- // We have had more than one seek for this read. Charge the 1st file.
- stats->seek_file = last_file_read;
- stats->seek_file_level = last_file_read_level;
- }
- FileMetaData* f = files[i];
- last_file_read = f;
- last_file_read_level = level;
- Saver saver;
- saver.state = kNotFound;
- saver.ucmp = ucmp;
- saver.user_key = user_key;
- saver.value = value;
- s = vset_->table_cache_->Get(options, f->number, f->file_size,
- ikey, &saver, SaveValue);
- if (!s.ok()) {
- return s;
- }
- switch (saver.state) {
- case kNotFound:
- break; // Keep searching in other files
- case kFound:
- return s;
- case kDeleted:
- s = Status::NotFound(Slice()); // Use empty error message for speed
- return s;
- case kCorrupt:
- s = Status::Corruption("corrupted key for ", user_key);
- return s;
- }
- }
- }
- return Status::NotFound(Slice()); // Use an empty error message for speed
- }
- bool Version::UpdateStats(const GetStats& stats) {
- FileMetaData* f = stats.seek_file;
- if (f != nullptr) {
- f->allowed_seeks--;
- if (f->allowed_seeks <= 0 && file_to_compact_ == nullptr) {
- file_to_compact_ = f;
- file_to_compact_level_ = stats.seek_file_level;
- return true;
- }
- }
- return false;
- }
- bool Version::RecordReadSample(Slice internal_key) {
- ParsedInternalKey ikey;
- if (!ParseInternalKey(internal_key, &ikey)) {
- return false;
- }
- struct State {
- GetStats stats; // Holds first matching file
- int matches;
- static bool Match(void* arg, int level, FileMetaData* f) {
- State* state = reinterpret_cast<State*>(arg);
- state->matches++;
- if (state->matches == 1) {
- // Remember first match.
- state->stats.seek_file = f;
- state->stats.seek_file_level = level;
- }
- // We can stop iterating once we have a second match.
- return state->matches < 2;
- }
- };
- State state;
- state.matches = 0;
- ForEachOverlapping(ikey.user_key, internal_key, &state, &State::Match);
- // Must have at least two matches since we want to merge across
- // files. But what if we have a single file that contains many
- // overwrites and deletions? Should we have another mechanism for
- // finding such files?
- if (state.matches >= 2) {
- // 1MB cost is about 1 seek (see comment in Builder::Apply).
- return UpdateStats(state.stats);
- }
- return false;
- }
- void Version::Ref() {
- ++refs_;
- }
- void Version::Unref() {
- assert(this != &vset_->dummy_versions_);
- assert(refs_ >= 1);
- --refs_;
- if (refs_ == 0) {
- delete this;
- }
- }
- bool Version::OverlapInLevel(int level,
- const Slice* smallest_user_key,
- const Slice* largest_user_key) {
- return SomeFileOverlapsRange(vset_->icmp_, (level > 0), files_[level],
- smallest_user_key, largest_user_key);
- }
- int Version::PickLevelForMemTableOutput(
- const Slice& smallest_user_key,
- const Slice& largest_user_key) {
- int level = 0;
- if (!OverlapInLevel(0, &smallest_user_key, &largest_user_key)) {
- // Push to next level if there is no overlap in next level,
- // and the #bytes overlapping in the level after that are limited.
- InternalKey start(smallest_user_key, kMaxSequenceNumber, kValueTypeForSeek);
- InternalKey limit(largest_user_key, 0, static_cast<ValueType>(0));
- std::vector<FileMetaData*> overlaps;
- while (level < config::kMaxMemCompactLevel) {
- if (OverlapInLevel(level + 1, &smallest_user_key, &largest_user_key)) {
- break;
- }
- if (level + 2 < config::kNumLevels) {
- // Check that file does not overlap too many grandparent bytes.
- GetOverlappingInputs(level + 2, &start, &limit, &overlaps);
- const int64_t sum = TotalFileSize(overlaps);
- if (sum > MaxGrandParentOverlapBytes(vset_->options_)) {
- break;
- }
- }
- level++;
- }
- }
- return level;
- }
- // Store in "*inputs" all files in "level" that overlap [begin,end]
- void Version::GetOverlappingInputs(
- int level,
- const InternalKey* begin,
- const InternalKey* end,
- std::vector<FileMetaData*>* inputs) {
- assert(level >= 0);
- assert(level < config::kNumLevels);
- inputs->clear();
- Slice user_begin, user_end;
- if (begin != nullptr) {
- user_begin = begin->user_key();
- }
- if (end != nullptr) {
- user_end = end->user_key();
- }
- const Comparator* user_cmp = vset_->icmp_.user_comparator();
- for (size_t i = 0; i < files_[level].size(); ) {
- FileMetaData* f = files_[level][i++];
- const Slice file_start = f->smallest.user_key();
- const Slice file_limit = f->largest.user_key();
- if (begin != nullptr && user_cmp->Compare(file_limit, user_begin) < 0) {
- // "f" is completely before specified range; skip it
- } else if (end != nullptr && user_cmp->Compare(file_start, user_end) > 0) {
- // "f" is completely after specified range; skip it
- } else {
- inputs->push_back(f);
- if (level == 0) {
- // Level-0 files may overlap each other. So check if the newly
- // added file has expanded the range. If so, restart search.
- if (begin != nullptr && user_cmp->Compare(file_start, user_begin) < 0) {
- user_begin = file_start;
- inputs->clear();
- i = 0;
- } else if (end != nullptr && user_cmp->Compare(file_limit,
- user_end) > 0) {
- user_end = file_limit;
- inputs->clear();
- i = 0;
- }
- }
- }
- }
- }
- std::string Version::DebugString() const {
- std::string r;
- for (int level = 0; level < config::kNumLevels; level++) {
- // E.g.,
- // --- level 1 ---
- // 17:123['a' .. 'd']
- // 20:43['e' .. 'g']
- r.append("--- level ");
- AppendNumberTo(&r, level);
- r.append(" ---\n");
- const std::vector<FileMetaData*>& files = files_[level];
- for (size_t i = 0; i < files.size(); i++) {
- r.push_back(' ');
- AppendNumberTo(&r, files[i]->number);
- r.push_back(':');
- AppendNumberTo(&r, files[i]->file_size);
- r.append("[");
- r.append(files[i]->smallest.DebugString());
- r.append(" .. ");
- r.append(files[i]->largest.DebugString());
- r.append("]\n");
- }
- }
- return r;
- }
- // A helper class so we can efficiently apply a whole sequence
- // of edits to a particular state without creating intermediate
- // Versions that contain full copies of the intermediate state.
- class VersionSet::Builder {
- private:
- // Helper to sort by v->files_[file_number].smallest
- struct BySmallestKey {
- const InternalKeyComparator* internal_comparator;
- bool operator()(FileMetaData* f1, FileMetaData* f2) const {
- int r = internal_comparator->Compare(f1->smallest, f2->smallest);
- if (r != 0) {
- return (r < 0);
- } else {
- // Break ties by file number
- return (f1->number < f2->number);
- }
- }
- };
- typedef std::set<FileMetaData*, BySmallestKey> FileSet;
- struct LevelState {
- std::set<uint64_t> deleted_files;
- FileSet* added_files;
- };
- VersionSet* vset_;
- Version* base_;
- LevelState levels_[config::kNumLevels];
- public:
- // Initialize a builder with the files from *base and other info from *vset
- Builder(VersionSet* vset, Version* base)
- : vset_(vset),
- base_(base) {
- base_->Ref();
- BySmallestKey cmp;
- cmp.internal_comparator = &vset_->icmp_;
- for (int level = 0; level < config::kNumLevels; level++) {
- levels_[level].added_files = new FileSet(cmp);
- }
- }
- ~Builder() {
- for (int level = 0; level < config::kNumLevels; level++) {
- const FileSet* added = levels_[level].added_files;
- std::vector<FileMetaData*> to_unref;
- to_unref.reserve(added->size());
- for (FileSet::const_iterator it = added->begin();
- it != added->end(); ++it) {
- to_unref.push_back(*it);
- }
- delete added;
- for (uint32_t i = 0; i < to_unref.size(); i++) {
- FileMetaData* f = to_unref[i];
- f->refs--;
- if (f->refs <= 0) {
- delete f;
- }
- }
- }
- base_->Unref();
- }
- // Apply all of the edits in *edit to the current state.
- void Apply(VersionEdit* edit) {
- // Update compaction pointers
- for (size_t i = 0; i < edit->compact_pointers_.size(); i++) {
- const int level = edit->compact_pointers_[i].first;
- vset_->compact_pointer_[level] =
- edit->compact_pointers_[i].second.Encode().ToString();
- }
- // Delete files
- const VersionEdit::DeletedFileSet& del = edit->deleted_files_;
- for (VersionEdit::DeletedFileSet::const_iterator iter = del.begin();
- iter != del.end();
- ++iter) {
- const int level = iter->first;
- const uint64_t number = iter->second;
- levels_[level].deleted_files.insert(number);
- }
- // Add new files
- for (size_t i = 0; i < edit->new_files_.size(); i++) {
- const int level = edit->new_files_[i].first;
- FileMetaData* f = new FileMetaData(edit->new_files_[i].second);
- f->refs = 1;
- // We arrange to automatically compact this file after
- // a certain number of seeks. Let's assume:
- // (1) One seek costs 10ms
- // (2) Writing or reading 1MB costs 10ms (100MB/s)
- // (3) A compaction of 1MB does 25MB of IO:
- // 1MB read from this level
- // 10-12MB read from next level (boundaries may be misaligned)
- // 10-12MB written to next level
- // This implies that 25 seeks cost the same as the compaction
- // of 1MB of data. I.e., one seek costs approximately the
- // same as the compaction of 40KB of data. We are a little
- // conservative and allow approximately one seek for every 16KB
- // of data before triggering a compaction.
- f->allowed_seeks = static_cast<int>((f->file_size / 16384U));
- if (f->allowed_seeks < 100) f->allowed_seeks = 100;
- levels_[level].deleted_files.erase(f->number);
- levels_[level].added_files->insert(f);
- }
- }
- // Save the current state in *v.
- void SaveTo(Version* v) {
- BySmallestKey cmp;
- cmp.internal_comparator = &vset_->icmp_;
- for (int level = 0; level < config::kNumLevels; level++) {
- // Merge the set of added files with the set of pre-existing files.
- // Drop any deleted files. Store the result in *v.
- const std::vector<FileMetaData*>& base_files = base_->files_[level];
- std::vector<FileMetaData*>::const_iterator base_iter = base_files.begin();
- std::vector<FileMetaData*>::const_iterator base_end = base_files.end();
- const FileSet* added = levels_[level].added_files;
- v->files_[level].reserve(base_files.size() + added->size());
- for (FileSet::const_iterator added_iter = added->begin();
- added_iter != added->end();
- ++added_iter) {
- // Add all smaller files listed in base_
- for (std::vector<FileMetaData*>::const_iterator bpos
- = std::upper_bound(base_iter, base_end, *added_iter, cmp);
- base_iter != bpos;
- ++base_iter) {
- MaybeAddFile(v, level, *base_iter);
- }
- MaybeAddFile(v, level, *added_iter);
- }
- // Add remaining base files
- for (; base_iter != base_end; ++base_iter) {
- MaybeAddFile(v, level, *base_iter);
- }
- #ifndef NDEBUG
- // Make sure there is no overlap in levels > 0
- if (level > 0) {
- for (uint32_t i = 1; i < v->files_[level].size(); i++) {
- const InternalKey& prev_end = v->files_[level][i-1]->largest;
- const InternalKey& this_begin = v->files_[level][i]->smallest;
- if (vset_->icmp_.Compare(prev_end, this_begin) >= 0) {
- fprintf(stderr, "overlapping ranges in same level %s vs. %s\n",
- prev_end.DebugString().c_str(),
- this_begin.DebugString().c_str());
- abort();
- }
- }
- }
- #endif
- }
- }
- void MaybeAddFile(Version* v, int level, FileMetaData* f) {
- if (levels_[level].deleted_files.count(f->number) > 0) {
- // File is deleted: do nothing
- } else {
- std::vector<FileMetaData*>* files = &v->files_[level];
- if (level > 0 && !files->empty()) {
- // Must not overlap
- assert(vset_->icmp_.Compare((*files)[files->size()-1]->largest,
- f->smallest) < 0);
- }
- f->refs++;
- files->push_back(f);
- }
- }
- };
- VersionSet::VersionSet(const std::string& dbname,
- const Options* options,
- TableCache* table_cache,
- const InternalKeyComparator* cmp)
- : env_(options->env),
- dbname_(dbname),
- options_(options),
- table_cache_(table_cache),
- icmp_(*cmp),
- next_file_number_(2),
- manifest_file_number_(0), // Filled by Recover()
- last_sequence_(0),
- log_number_(0),
- prev_log_number_(0),
- descriptor_file_(nullptr),
- descriptor_log_(nullptr),
- dummy_versions_(this),
- current_(nullptr) {
- AppendVersion(new Version(this));
- }
- VersionSet::~VersionSet() {
- current_->Unref();
- assert(dummy_versions_.next_ == &dummy_versions_); // List must be empty
- delete descriptor_log_;
- delete descriptor_file_;
- }
- void VersionSet::AppendVersion(Version* v) {
- // Make "v" current
- assert(v->refs_ == 0);
- assert(v != current_);
- if (current_ != nullptr) {
- current_->Unref();
- }
- current_ = v;
- v->Ref();
- // Append to linked list
- v->prev_ = dummy_versions_.prev_;
- v->next_ = &dummy_versions_;
- v->prev_->next_ = v;
- v->next_->prev_ = v;
- }
- Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu) {
- if (edit->has_log_number_) {
- assert(edit->log_number_ >= log_number_);
- assert(edit->log_number_ < next_file_number_);
- } else {
- edit->SetLogNumber(log_number_);
- }
- if (!edit->has_prev_log_number_) {
- edit->SetPrevLogNumber(prev_log_number_);
- }
- edit->SetNextFile(next_file_number_);
- edit->SetLastSequence(last_sequence_);
- Version* v = new Version(this);
- {
- Builder builder(this, current_);
- builder.Apply(edit);
- builder.SaveTo(v);
- }
- Finalize(v);
- // Initialize new descriptor log file if necessary by creating
- // a temporary file that contains a snapshot of the current version.
- std::string new_manifest_file;
- Status s;
- if (descriptor_log_ == nullptr) {
- // No reason to unlock *mu here since we only hit this path in the
- // first call to LogAndApply (when opening the database).
- assert(descriptor_file_ == nullptr);
- new_manifest_file = DescriptorFileName(dbname_, manifest_file_number_);
- edit->SetNextFile(next_file_number_);
- s = env_->NewWritableFile(new_manifest_file, &descriptor_file_);
- if (s.ok()) {
- descriptor_log_ = new log::Writer(descriptor_file_);
- s = WriteSnapshot(descriptor_log_);
- }
- }
- // Unlock during expensive MANIFEST log write
- {
- mu->Unlock();
- // Write new record to MANIFEST log
- if (s.ok()) {
- std::string record;
- edit->EncodeTo(&record);
- s = descriptor_log_->AddRecord(record);
- if (s.ok()) {
- s = descriptor_file_->Sync();
- }
- if (!s.ok()) {
- Log(options_->info_log, "MANIFEST write: %s\n", s.ToString().c_str());
- }
- }
- // If we just created a new descriptor file, install it by writing a
- // new CURRENT file that points to it.
- if (s.ok() && !new_manifest_file.empty()) {
- s = SetCurrentFile(env_, dbname_, manifest_file_number_);
- }
- mu->Lock();
- }
- // Install the new version
- if (s.ok()) {
- AppendVersion(v);
- log_number_ = edit->log_number_;
- prev_log_number_ = edit->prev_log_number_;
- } else {
- delete v;
- if (!new_manifest_file.empty()) {
- delete descriptor_log_;
- delete descriptor_file_;
- descriptor_log_ = nullptr;
- descriptor_file_ = nullptr;
- env_->DeleteFile(new_manifest_file);
- }
- }
- return s;
- }
- Status VersionSet::Recover(bool *save_manifest) {
- struct LogReporter : public log::Reader::Reporter {
- Status* status;
- virtual void Corruption(size_t bytes, const Status& s) {
- if (this->status->ok()) *this->status = s;
- }
- };
- // Read "CURRENT" file, which contains a pointer to the current manifest file
- std::string current;
- Status s = ReadFileToString(env_, CurrentFileName(dbname_), ¤t);
- if (!s.ok()) {
- return s;
- }
- if (current.empty() || current[current.size()-1] != '\n') {
- return Status::Corruption("CURRENT file does not end with newline");
- }
- current.resize(current.size() - 1);
- std::string dscname = dbname_ + "/" + current;
- SequentialFile* file;
- s = env_->NewSequentialFile(dscname, &file);
- if (!s.ok()) {
- if (s.IsNotFound()) {
- return Status::Corruption(
- "CURRENT points to a non-existent file", s.ToString());
- }
- return s;
- }
- bool have_log_number = false;
- bool have_prev_log_number = false;
- bool have_next_file = false;
- bool have_last_sequence = false;
- uint64_t next_file = 0;
- uint64_t last_sequence = 0;
- uint64_t log_number = 0;
- uint64_t prev_log_number = 0;
- Builder builder(this, current_);
- {
- LogReporter reporter;
- reporter.status = &s;
- log::Reader reader(file, &reporter, true/*checksum*/, 0/*initial_offset*/);
- Slice record;
- std::string scratch;
- while (reader.ReadRecord(&record, &scratch) && s.ok()) {
- VersionEdit edit;
- s = edit.DecodeFrom(record);
- if (s.ok()) {
- if (edit.has_comparator_ &&
- edit.comparator_ != icmp_.user_comparator()->Name()) {
- s = Status::InvalidArgument(
- edit.comparator_ + " does not match existing comparator ",
- icmp_.user_comparator()->Name());
- }
- }
- if (s.ok()) {
- builder.Apply(&edit);
- }
- if (edit.has_log_number_) {
- log_number = edit.log_number_;
- have_log_number = true;
- }
- if (edit.has_prev_log_number_) {
- prev_log_number = edit.prev_log_number_;
- have_prev_log_number = true;
- }
- if (edit.has_next_file_number_) {
- next_file = edit.next_file_number_;
- have_next_file = true;
- }
- if (edit.has_last_sequence_) {
- last_sequence = edit.last_sequence_;
- have_last_sequence = true;
- }
- }
- }
- delete file;
- file = nullptr;
- if (s.ok()) {
- if (!have_next_file) {
- s = Status::Corruption("no meta-nextfile entry in descriptor");
- } else if (!have_log_number) {
- s = Status::Corruption("no meta-lognumber entry in descriptor");
- } else if (!have_last_sequence) {
- s = Status::Corruption("no last-sequence-number entry in descriptor");
- }
- if (!have_prev_log_number) {
- prev_log_number = 0;
- }
- MarkFileNumberUsed(prev_log_number);
- MarkFileNumberUsed(log_number);
- }
- if (s.ok()) {
- Version* v = new Version(this);
- builder.SaveTo(v);
- // Install recovered version
- Finalize(v);
- AppendVersion(v);
- manifest_file_number_ = next_file;
- next_file_number_ = next_file + 1;
- last_sequence_ = last_sequence;
- log_number_ = log_number;
- prev_log_number_ = prev_log_number;
- // See if we can reuse the existing MANIFEST file.
- if (ReuseManifest(dscname, current)) {
- // No need to save new manifest
- } else {
- *save_manifest = true;
- }
- }
- return s;
- }
- bool VersionSet::ReuseManifest(const std::string& dscname,
- const std::string& dscbase) {
- if (!options_->reuse_logs) {
- return false;
- }
- FileType manifest_type;
- uint64_t manifest_number;
- uint64_t manifest_size;
- if (!ParseFileName(dscbase, &manifest_number, &manifest_type) ||
- manifest_type != kDescriptorFile ||
- !env_->GetFileSize(dscname, &manifest_size).ok() ||
- // Make new compacted MANIFEST if old one is too big
- manifest_size >= TargetFileSize(options_)) {
- return false;
- }
- assert(descriptor_file_ == nullptr);
- assert(descriptor_log_ == nullptr);
- Status r = env_->NewAppendableFile(dscname, &descriptor_file_);
- if (!r.ok()) {
- Log(options_->info_log, "Reuse MANIFEST: %s\n", r.ToString().c_str());
- assert(descriptor_file_ == nullptr);
- return false;
- }
- Log(options_->info_log, "Reusing MANIFEST %s\n", dscname.c_str());
- descriptor_log_ = new log::Writer(descriptor_file_, manifest_size);
- manifest_file_number_ = manifest_number;
- return true;
- }
- void VersionSet::MarkFileNumberUsed(uint64_t number) {
- if (next_file_number_ <= number) {
- next_file_number_ = number + 1;
- }
- }
- void VersionSet::Finalize(Version* v) {
- // Precomputed best level for next compaction
- int best_level = -1;
- double best_score = -1;
- for (int level = 0; level < config::kNumLevels-1; level++) {
- double score;
- if (level == 0) {
- // We treat level-0 specially by bounding the number of files
- // instead of number of bytes for two reasons:
- //
- // (1) With larger write-buffer sizes, it is nice not to do too
- // many level-0 compactions.
- //
- // (2) The files in level-0 are merged on every read and
- // therefore we wish to avoid too many files when the individual
- // file size is small (perhaps because of a small write-buffer
- // setting, or very high compression ratios, or lots of
- // overwrites/deletions).
- score = v->files_[level].size() /
- static_cast<double>(config::kL0_CompactionTrigger);
- } else {
- // Compute the ratio of current size to size limit.
- const uint64_t level_bytes = TotalFileSize(v->files_[level]);
- score =
- static_cast<double>(level_bytes) / MaxBytesForLevel(options_, level);
- }
- if (score > best_score) {
- best_level = level;
- best_score = score;
- }
- }
- v->compaction_level_ = best_level;
- v->compaction_score_ = best_score;
- }
- Status VersionSet::WriteSnapshot(log::Writer* log) {
- // TODO: Break up into multiple records to reduce memory usage on recovery?
- // Save metadata
- VersionEdit edit;
- edit.SetComparatorName(icmp_.user_comparator()->Name());
- // Save compaction pointers
- for (int level = 0; level < config::kNumLevels; level++) {
- if (!compact_pointer_[level].empty()) {
- InternalKey key;
- key.DecodeFrom(compact_pointer_[level]);
- edit.SetCompactPointer(level, key);
- }
- }
- // Save files
- for (int level = 0; level < config::kNumLevels; level++) {
- const std::vector<FileMetaData*>& files = current_->files_[level];
- for (size_t i = 0; i < files.size(); i++) {
- const FileMetaData* f = files[i];
- edit.AddFile(level, f->number, f->file_size, f->smallest, f->largest);
- }
- }
- std::string record;
- edit.EncodeTo(&record);
- return log->AddRecord(record);
- }
- int VersionSet::NumLevelFiles(int level) const {
- assert(level >= 0);
- assert(level < config::kNumLevels);
- return current_->files_[level].size();
- }
- const char* VersionSet::LevelSummary(LevelSummaryStorage* scratch) const {
- // Update code if kNumLevels changes
- assert(config::kNumLevels == 7);
- snprintf(scratch->buffer, sizeof(scratch->buffer),
- "files[ %d %d %d %d %d %d %d ]",
- int(current_->files_[0].size()),
- int(current_->files_[1].size()),
- int(current_->files_[2].size()),
- int(current_->files_[3].size()),
- int(current_->files_[4].size()),
- int(current_->files_[5].size()),
- int(current_->files_[6].size()));
- return scratch->buffer;
- }
- uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) {
- uint64_t result = 0;
- for (int level = 0; level < config::kNumLevels; level++) {
- const std::vector<FileMetaData*>& files = v->files_[level];
- for (size_t i = 0; i < files.size(); i++) {
- if (icmp_.Compare(files[i]->largest, ikey) <= 0) {
- // Entire file is before "ikey", so just add the file size
- result += files[i]->file_size;
- } else if (icmp_.Compare(files[i]->smallest, ikey) > 0) {
- // Entire file is after "ikey", so ignore
- if (level > 0) {
- // Files other than level 0 are sorted by meta->smallest, so
- // no further files in this level will contain data for
- // "ikey".
- break;
- }
- } else {
- // "ikey" falls in the range for this table. Add the
- // approximate offset of "ikey" within the table.
- Table* tableptr;
- Iterator* iter = table_cache_->NewIterator(
- ReadOptions(), files[i]->number, files[i]->file_size, &tableptr);
- if (tableptr != nullptr) {
- result += tableptr->ApproximateOffsetOf(ikey.Encode());
- }
- delete iter;
- }
- }
- }
- return result;
- }
- void VersionSet::AddLiveFiles(std::set<uint64_t>* live) {
- for (Version* v = dummy_versions_.next_;
- v != &dummy_versions_;
- v = v->next_) {
- for (int level = 0; level < config::kNumLevels; level++) {
- const std::vector<FileMetaData*>& files = v->files_[level];
- for (size_t i = 0; i < files.size(); i++) {
- live->insert(files[i]->number);
- }
- }
- }
- }
- int64_t VersionSet::NumLevelBytes(int level) const {
- assert(level >= 0);
- assert(level < config::kNumLevels);
- return TotalFileSize(current_->files_[level]);
- }
- int64_t VersionSet::MaxNextLevelOverlappingBytes() {
- int64_t result = 0;
- std::vector<FileMetaData*> overlaps;
- for (int level = 1; level < config::kNumLevels - 1; level++) {
- for (size_t i = 0; i < current_->files_[level].size(); i++) {
- const FileMetaData* f = current_->files_[level][i];
- current_->GetOverlappingInputs(level+1, &f->smallest, &f->largest,
- &overlaps);
- const int64_t sum = TotalFileSize(overlaps);
- if (sum > result) {
- result = sum;
- }
- }
- }
- return result;
- }
- // Stores the minimal range that covers all entries in inputs in
- // *smallest, *largest.
- // REQUIRES: inputs is not empty
- void VersionSet::GetRange(const std::vector<FileMetaData*>& inputs,
- InternalKey* smallest,
- InternalKey* largest) {
- assert(!inputs.empty());
- smallest->Clear();
- largest->Clear();
- for (size_t i = 0; i < inputs.size(); i++) {
- FileMetaData* f = inputs[i];
- if (i == 0) {
- *smallest = f->smallest;
- *largest = f->largest;
- } else {
- if (icmp_.Compare(f->smallest, *smallest) < 0) {
- *smallest = f->smallest;
- }
- if (icmp_.Compare(f->largest, *largest) > 0) {
- *largest = f->largest;
- }
- }
- }
- }
- // Stores the minimal range that covers all entries in inputs1 and inputs2
- // in *smallest, *largest.
- // REQUIRES: inputs is not empty
- void VersionSet::GetRange2(const std::vector<FileMetaData*>& inputs1,
- const std::vector<FileMetaData*>& inputs2,
- InternalKey* smallest,
- InternalKey* largest) {
- std::vector<FileMetaData*> all = inputs1;
- all.insert(all.end(), inputs2.begin(), inputs2.end());
- GetRange(all, smallest, largest);
- }
- Iterator* VersionSet::MakeInputIterator(Compaction* c) {
- ReadOptions options;
- options.verify_checksums = options_->paranoid_checks;
- options.fill_cache = false;
- // Level-0 files have to be merged together. For other levels,
- // we will make a concatenating iterator per level.
- // TODO(opt): use concatenating iterator for level-0 if there is no overlap
- const int space = (c->level() == 0 ? c->inputs_[0].size() + 1 : 2);
- Iterator** list = new Iterator*[space];
- int num = 0;
- for (int which = 0; which < 2; which++) {
- if (!c->inputs_[which].empty()) {
- if (c->level() + which == 0) {
- const std::vector<FileMetaData*>& files = c->inputs_[which];
- for (size_t i = 0; i < files.size(); i++) {
- list[num++] = table_cache_->NewIterator(
- options, files[i]->number, files[i]->file_size);
- }
- } else {
- // Create concatenating iterator for the files from this level
- list[num++] = NewTwoLevelIterator(
- new Version::LevelFileNumIterator(icmp_, &c->inputs_[which]),
- &GetFileIterator, table_cache_, options);
- }
- }
- }
- assert(num <= space);
- Iterator* result = NewMergingIterator(&icmp_, list, num);
- delete[] list;
- return result;
- }
- Compaction* VersionSet::PickCompaction() {
- Compaction* c;
- int level;
- // We prefer compactions triggered by too much data in a level over
- // the compactions triggered by seeks.
- const bool size_compaction = (current_->compaction_score_ >= 1);
- const bool seek_compaction = (current_->file_to_compact_ != nullptr);
- if (size_compaction) {
- level = current_->compaction_level_;
- assert(level >= 0);
- assert(level+1 < config::kNumLevels);
- c = new Compaction(options_, level);
- // Pick the first file that comes after compact_pointer_[level]
- for (size_t i = 0; i < current_->files_[level].size(); i++) {
- FileMetaData* f = current_->files_[level][i];
- if (compact_pointer_[level].empty() ||
- icmp_.Compare(f->largest.Encode(), compact_pointer_[level]) > 0) {
- c->inputs_[0].push_back(f);
- break;
- }
- }
- if (c->inputs_[0].empty()) {
- // Wrap-around to the beginning of the key space
- c->inputs_[0].push_back(current_->files_[level][0]);
- }
- } else if (seek_compaction) {
- level = current_->file_to_compact_level_;
- c = new Compaction(options_, level);
- c->inputs_[0].push_back(current_->file_to_compact_);
- } else {
- return nullptr;
- }
- c->input_version_ = current_;
- c->input_version_->Ref();
- // Files in level 0 may overlap each other, so pick up all overlapping ones
- if (level == 0) {
- InternalKey smallest, largest;
- GetRange(c->inputs_[0], &smallest, &largest);
- // Note that the next call will discard the file we placed in
- // c->inputs_[0] earlier and replace it with an overlapping set
- // which will include the picked file.
- current_->GetOverlappingInputs(0, &smallest, &largest, &c->inputs_[0]);
- assert(!c->inputs_[0].empty());
- }
- SetupOtherInputs(c);
- return c;
- }
- // Finds the largest key in a vector of files. Returns true if files it not
- // empty.
- bool FindLargestKey(const InternalKeyComparator& icmp,
- const std::vector<FileMetaData*>& files,
- InternalKey* largest_key) {
- if (files.empty()) {
- return false;
- }
- *largest_key = files[0]->largest;
- for (size_t i = 1; i < files.size(); ++i) {
- FileMetaData* f = files[i];
- if (icmp.Compare(f->largest, *largest_key) > 0) {
- *largest_key = f->largest;
- }
- }
- return true;
- }
- // Finds minimum file b2=(l2, u2) in level file for which l2 > u1 and
- // user_key(l2) = user_key(u1)
- FileMetaData* FindSmallestBoundaryFile(
- const InternalKeyComparator& icmp,
- const std::vector<FileMetaData*>& level_files,
- const InternalKey& largest_key) {
- const Comparator* user_cmp = icmp.user_comparator();
- FileMetaData* smallest_boundary_file = nullptr;
- for (size_t i = 0; i < level_files.size(); ++i) {
- FileMetaData* f = level_files[i];
- if (icmp.Compare(f->smallest, largest_key) > 0 &&
- user_cmp->Compare(f->smallest.user_key(), largest_key.user_key()) ==
- 0) {
- if (smallest_boundary_file == nullptr ||
- icmp.Compare(f->smallest, smallest_boundary_file->smallest) < 0) {
- smallest_boundary_file = f;
- }
- }
- }
- return smallest_boundary_file;
- }
- // Extracts the largest file b1 from |compaction_files| and then searches for a
- // b2 in |level_files| for which user_key(u1) = user_key(l2). If it finds such a
- // file b2 (known as a boundary file) it adds it to |compaction_files| and then
- // searches again using this new upper bound.
- //
- // If there are two blocks, b1=(l1, u1) and b2=(l2, u2) and
- // user_key(u1) = user_key(l2), and if we compact b1 but not b2 then a
- // subsequent get operation will yield an incorrect result because it will
- // return the record from b2 in level i rather than from b1 because it searches
- // level by level for records matching the supplied user key.
- //
- // parameters:
- // in level_files: List of files to search for boundary files.
- // in/out compaction_files: List of files to extend by adding boundary files.
- void AddBoundaryInputs(const InternalKeyComparator& icmp,
- const std::vector<FileMetaData*>& level_files,
- std::vector<FileMetaData*>* compaction_files) {
- InternalKey largest_key;
- // Quick return if compaction_files is empty.
- if (!FindLargestKey(icmp, *compaction_files, &largest_key)) {
- return;
- }
- bool continue_searching = true;
- while (continue_searching) {
- FileMetaData* smallest_boundary_file =
- FindSmallestBoundaryFile(icmp, level_files, largest_key);
- // If a boundary file was found advance largest_key, otherwise we're done.
- if (smallest_boundary_file != NULL) {
- compaction_files->push_back(smallest_boundary_file);
- largest_key = smallest_boundary_file->largest;
- } else {
- continue_searching = false;
- }
- }
- }
- void VersionSet::SetupOtherInputs(Compaction* c) {
- const int level = c->level();
- InternalKey smallest, largest;
- AddBoundaryInputs(icmp_, current_->files_[level], &c->inputs_[0]);
- GetRange(c->inputs_[0], &smallest, &largest);
- current_->GetOverlappingInputs(level+1, &smallest, &largest, &c->inputs_[1]);
- // Get entire range covered by compaction
- InternalKey all_start, all_limit;
- GetRange2(c->inputs_[0], c->inputs_[1], &all_start, &all_limit);
- // See if we can grow the number of inputs in "level" without
- // changing the number of "level+1" files we pick up.
- if (!c->inputs_[1].empty()) {
- std::vector<FileMetaData*> expanded0;
- current_->GetOverlappingInputs(level, &all_start, &all_limit, &expanded0);
- AddBoundaryInputs(icmp_, current_->files_[level], &expanded0);
- const int64_t inputs0_size = TotalFileSize(c->inputs_[0]);
- const int64_t inputs1_size = TotalFileSize(c->inputs_[1]);
- const int64_t expanded0_size = TotalFileSize(expanded0);
- if (expanded0.size() > c->inputs_[0].size() &&
- inputs1_size + expanded0_size <
- ExpandedCompactionByteSizeLimit(options_)) {
- InternalKey new_start, new_limit;
- GetRange(expanded0, &new_start, &new_limit);
- std::vector<FileMetaData*> expanded1;
- current_->GetOverlappingInputs(level+1, &new_start, &new_limit,
- &expanded1);
- if (expanded1.size() == c->inputs_[1].size()) {
- Log(options_->info_log,
- "Expanding@%d %d+%d (%ld+%ld bytes) to %d+%d (%ld+%ld bytes)\n",
- level,
- int(c->inputs_[0].size()),
- int(c->inputs_[1].size()),
- long(inputs0_size), long(inputs1_size),
- int(expanded0.size()),
- int(expanded1.size()),
- long(expanded0_size), long(inputs1_size));
- smallest = new_start;
- largest = new_limit;
- c->inputs_[0] = expanded0;
- c->inputs_[1] = expanded1;
- GetRange2(c->inputs_[0], c->inputs_[1], &all_start, &all_limit);
- }
- }
- }
- // Compute the set of grandparent files that overlap this compaction
- // (parent == level+1; grandparent == level+2)
- if (level + 2 < config::kNumLevels) {
- current_->GetOverlappingInputs(level + 2, &all_start, &all_limit,
- &c->grandparents_);
- }
- // Update the place where we will do the next compaction for this level.
- // We update this immediately instead of waiting for the VersionEdit
- // to be applied so that if the compaction fails, we will try a different
- // key range next time.
- compact_pointer_[level] = largest.Encode().ToString();
- c->edit_.SetCompactPointer(level, largest);
- }
- Compaction* VersionSet::CompactRange(
- int level,
- const InternalKey* begin,
- const InternalKey* end) {
- std::vector<FileMetaData*> inputs;
- current_->GetOverlappingInputs(level, begin, end, &inputs);
- if (inputs.empty()) {
- return nullptr;
- }
- // Avoid compacting too much in one shot in case the range is large.
- // But we cannot do this for level-0 since level-0 files can overlap
- // and we must not pick one file and drop another older file if the
- // two files overlap.
- if (level > 0) {
- const uint64_t limit = MaxFileSizeForLevel(options_, level);
- uint64_t total = 0;
- for (size_t i = 0; i < inputs.size(); i++) {
- uint64_t s = inputs[i]->file_size;
- total += s;
- if (total >= limit) {
- inputs.resize(i + 1);
- break;
- }
- }
- }
- Compaction* c = new Compaction(options_, level);
- c->input_version_ = current_;
- c->input_version_->Ref();
- c->inputs_[0] = inputs;
- SetupOtherInputs(c);
- return c;
- }
- Compaction::Compaction(const Options* options, int level)
- : level_(level),
- max_output_file_size_(MaxFileSizeForLevel(options, level)),
- input_version_(nullptr),
- grandparent_index_(0),
- seen_key_(false),
- overlapped_bytes_(0) {
- for (int i = 0; i < config::kNumLevels; i++) {
- level_ptrs_[i] = 0;
- }
- }
- Compaction::~Compaction() {
- if (input_version_ != nullptr) {
- input_version_->Unref();
- }
- }
- bool Compaction::IsTrivialMove() const {
- const VersionSet* vset = input_version_->vset_;
- // Avoid a move if there is lots of overlapping grandparent data.
- // Otherwise, the move could create a parent file that will require
- // a very expensive merge later on.
- return (num_input_files(0) == 1 && num_input_files(1) == 0 &&
- TotalFileSize(grandparents_) <=
- MaxGrandParentOverlapBytes(vset->options_));
- }
- void Compaction::AddInputDeletions(VersionEdit* edit) {
- for (int which = 0; which < 2; which++) {
- for (size_t i = 0; i < inputs_[which].size(); i++) {
- edit->DeleteFile(level_ + which, inputs_[which][i]->number);
- }
- }
- }
- bool Compaction::IsBaseLevelForKey(const Slice& user_key) {
- // Maybe use binary search to find right entry instead of linear search?
- const Comparator* user_cmp = input_version_->vset_->icmp_.user_comparator();
- for (int lvl = level_ + 2; lvl < config::kNumLevels; lvl++) {
- const std::vector<FileMetaData*>& files = input_version_->files_[lvl];
- for (; level_ptrs_[lvl] < files.size(); ) {
- FileMetaData* f = files[level_ptrs_[lvl]];
- if (user_cmp->Compare(user_key, f->largest.user_key()) <= 0) {
- // We've advanced far enough
- if (user_cmp->Compare(user_key, f->smallest.user_key()) >= 0) {
- // Key falls in this file's range, so definitely not base level
- return false;
- }
- break;
- }
- level_ptrs_[lvl]++;
- }
- }
- return true;
- }
- bool Compaction::ShouldStopBefore(const Slice& internal_key) {
- const VersionSet* vset = input_version_->vset_;
- // Scan to find earliest grandparent file that contains key.
- const InternalKeyComparator* icmp = &vset->icmp_;
- while (grandparent_index_ < grandparents_.size() &&
- icmp->Compare(internal_key,
- grandparents_[grandparent_index_]->largest.Encode()) > 0) {
- if (seen_key_) {
- overlapped_bytes_ += grandparents_[grandparent_index_]->file_size;
- }
- grandparent_index_++;
- }
- seen_key_ = true;
- if (overlapped_bytes_ > MaxGrandParentOverlapBytes(vset->options_)) {
- // Too much overlap for current output; start new output
- overlapped_bytes_ = 0;
- return true;
- } else {
- return false;
- }
- }
- void Compaction::ReleaseInputs() {
- if (input_version_ != nullptr) {
- input_version_->Unref();
- input_version_ = nullptr;
- }
- }
- } // namespace leveldb
|