123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109 |
- // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style license that can be
- // found in the LICENSE file. See the AUTHORS file for names of contributors.
- //
- // BlockBuilder generates blocks where keys are prefix-compressed:
- //
- // When we store a key, we drop the prefix shared with the previous
- // string. This helps reduce the space requirement significantly.
- // Furthermore, once every K keys, we do not apply the prefix
- // compression and store the entire key. We call this a "restart
- // point". The tail end of the block stores the offsets of all of the
- // restart points, and can be used to do a binary search when looking
- // for a particular key. Values are stored as-is (without compression)
- // immediately following the corresponding key.
- //
- // An entry for a particular key-value pair has the form:
- // shared_bytes: varint32
- // unshared_bytes: varint32
- // value_length: varint32
- // key_delta: char[unshared_bytes]
- // value: char[value_length]
- // shared_bytes == 0 for restart points.
- //
- // The trailer of the block has the form:
- // restarts: uint32[num_restarts]
- // num_restarts: uint32
- // restarts[i] contains the offset within the block of the ith restart point.
- #include "table/block_builder.h"
- #include <algorithm>
- #include <assert.h>
- #include "leveldb/comparator.h"
- #include "leveldb/table_builder.h"
- #include "util/coding.h"
- namespace leveldb {
- BlockBuilder::BlockBuilder(const Options* options)
- : options_(options),
- restarts_(),
- counter_(0),
- finished_(false) {
- assert(options->block_restart_interval >= 1);
- restarts_.push_back(0); // First restart point is at offset 0
- }
- void BlockBuilder::Reset() {
- buffer_.clear();
- restarts_.clear();
- restarts_.push_back(0); // First restart point is at offset 0
- counter_ = 0;
- finished_ = false;
- last_key_.clear();
- }
- size_t BlockBuilder::CurrentSizeEstimate() const {
- return (buffer_.size() + // Raw data buffer
- restarts_.size() * sizeof(uint32_t) + // Restart array
- sizeof(uint32_t)); // Restart array length
- }
- Slice BlockBuilder::Finish() {
- // Append restart array
- for (size_t i = 0; i < restarts_.size(); i++) {
- PutFixed32(&buffer_, restarts_[i]);
- }
- PutFixed32(&buffer_, restarts_.size());
- finished_ = true;
- return Slice(buffer_);
- }
- void BlockBuilder::Add(const Slice& key, const Slice& value) {
- Slice last_key_piece(last_key_);
- assert(!finished_);
- assert(counter_ <= options_->block_restart_interval);
- assert(buffer_.empty() // No values yet?
- || options_->comparator->Compare(key, last_key_piece) > 0);
- size_t shared = 0;
- if (counter_ < options_->block_restart_interval) {
- // See how much sharing to do with previous string
- const size_t min_length = std::min(last_key_piece.size(), key.size());
- while ((shared < min_length) && (last_key_piece[shared] == key[shared])) {
- shared++;
- }
- } else {
- // Restart compression
- restarts_.push_back(buffer_.size());
- counter_ = 0;
- }
- const size_t non_shared = key.size() - shared;
- // Add "<shared><non_shared><value_size>" to buffer_
- PutVarint32(&buffer_, shared);
- PutVarint32(&buffer_, non_shared);
- PutVarint32(&buffer_, value.size());
- // Add string delta to buffer_ followed by value
- buffer_.append(key.data() + shared, non_shared);
- buffer_.append(value.data(), value.size());
- // Update state
- last_key_.resize(shared);
- last_key_.append(key.data() + shared, non_shared);
- assert(Slice(last_key_) == key);
- counter_++;
- }
- } // namespace leveldb
|