table.cc 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  4. #include "leveldb/table.h"
  5. #include "leveldb/cache.h"
  6. #include "leveldb/comparator.h"
  7. #include "leveldb/env.h"
  8. #include "leveldb/filter_policy.h"
  9. #include "leveldb/options.h"
  10. #include "table/block.h"
  11. #include "table/filter_block.h"
  12. #include "table/format.h"
  13. #include "table/two_level_iterator.h"
  14. #include "util/coding.h"
  15. namespace leveldb {
  16. struct Table::Rep {
  17. ~Rep() {
  18. delete filter;
  19. delete [] filter_data;
  20. delete index_block;
  21. }
  22. Options options;
  23. Status status;
  24. RandomAccessFile* file;
  25. uint64_t cache_id;
  26. FilterBlockReader* filter;
  27. const char* filter_data;
  28. BlockHandle metaindex_handle; // Handle to metaindex_block: saved from footer
  29. Block* index_block;
  30. };
  31. Status Table::Open(const Options& options,
  32. RandomAccessFile* file,
  33. uint64_t size,
  34. Table** table) {
  35. *table = NULL;
  36. if (size < Footer::kEncodedLength) {
  37. return Status::Corruption("file is too short to be an sstable");
  38. }
  39. char footer_space[Footer::kEncodedLength];
  40. Slice footer_input;
  41. Status s = file->Read(size - Footer::kEncodedLength, Footer::kEncodedLength,
  42. &footer_input, footer_space);
  43. if (!s.ok()) return s;
  44. Footer footer;
  45. s = footer.DecodeFrom(&footer_input);
  46. if (!s.ok()) return s;
  47. // Read the index block
  48. BlockContents contents;
  49. Block* index_block = NULL;
  50. if (s.ok()) {
  51. ReadOptions opt;
  52. if (options.paranoid_checks) {
  53. opt.verify_checksums = true;
  54. }
  55. s = ReadBlock(file, opt, footer.index_handle(), &contents);
  56. if (s.ok()) {
  57. index_block = new Block(contents);
  58. }
  59. }
  60. if (s.ok()) {
  61. // We've successfully read the footer and the index block: we're
  62. // ready to serve requests.
  63. Rep* rep = new Table::Rep;
  64. rep->options = options;
  65. rep->file = file;
  66. rep->metaindex_handle = footer.metaindex_handle();
  67. rep->index_block = index_block;
  68. rep->cache_id = (options.block_cache ? options.block_cache->NewId() : 0);
  69. rep->filter_data = NULL;
  70. rep->filter = NULL;
  71. *table = new Table(rep);
  72. (*table)->ReadMeta(footer);
  73. } else {
  74. if (index_block) delete index_block;
  75. }
  76. return s;
  77. }
  78. void Table::ReadMeta(const Footer& footer) {
  79. if (rep_->options.filter_policy == NULL) {
  80. return; // Do not need any metadata
  81. }
  82. // TODO(sanjay): Skip this if footer.metaindex_handle() size indicates
  83. // it is an empty block.
  84. ReadOptions opt;
  85. if (rep_->options.paranoid_checks) {
  86. opt.verify_checksums = true;
  87. }
  88. BlockContents contents;
  89. if (!ReadBlock(rep_->file, opt, footer.metaindex_handle(), &contents).ok()) {
  90. // Do not propagate errors since meta info is not needed for operation
  91. return;
  92. }
  93. Block* meta = new Block(contents);
  94. Iterator* iter = meta->NewIterator(BytewiseComparator());
  95. std::string key = "filter.";
  96. key.append(rep_->options.filter_policy->Name());
  97. iter->Seek(key);
  98. if (iter->Valid() && iter->key() == Slice(key)) {
  99. ReadFilter(iter->value());
  100. }
  101. delete iter;
  102. delete meta;
  103. }
  104. void Table::ReadFilter(const Slice& filter_handle_value) {
  105. Slice v = filter_handle_value;
  106. BlockHandle filter_handle;
  107. if (!filter_handle.DecodeFrom(&v).ok()) {
  108. return;
  109. }
  110. // We might want to unify with ReadBlock() if we start
  111. // requiring checksum verification in Table::Open.
  112. ReadOptions opt;
  113. if (rep_->options.paranoid_checks) {
  114. opt.verify_checksums = true;
  115. }
  116. BlockContents block;
  117. if (!ReadBlock(rep_->file, opt, filter_handle, &block).ok()) {
  118. return;
  119. }
  120. if (block.heap_allocated) {
  121. rep_->filter_data = block.data.data(); // Will need to delete later
  122. }
  123. rep_->filter = new FilterBlockReader(rep_->options.filter_policy, block.data);
  124. }
  125. Table::~Table() {
  126. delete rep_;
  127. }
  128. static void DeleteBlock(void* arg, void* ignored) {
  129. delete reinterpret_cast<Block*>(arg);
  130. }
  131. static void DeleteCachedBlock(const Slice& key, void* value) {
  132. Block* block = reinterpret_cast<Block*>(value);
  133. delete block;
  134. }
  135. static void ReleaseBlock(void* arg, void* h) {
  136. Cache* cache = reinterpret_cast<Cache*>(arg);
  137. Cache::Handle* handle = reinterpret_cast<Cache::Handle*>(h);
  138. cache->Release(handle);
  139. }
  140. // Convert an index iterator value (i.e., an encoded BlockHandle)
  141. // into an iterator over the contents of the corresponding block.
  142. Iterator* Table::BlockReader(void* arg,
  143. const ReadOptions& options,
  144. const Slice& index_value) {
  145. Table* table = reinterpret_cast<Table*>(arg);
  146. Cache* block_cache = table->rep_->options.block_cache;
  147. Block* block = NULL;
  148. Cache::Handle* cache_handle = NULL;
  149. BlockHandle handle;
  150. Slice input = index_value;
  151. Status s = handle.DecodeFrom(&input);
  152. // We intentionally allow extra stuff in index_value so that we
  153. // can add more features in the future.
  154. if (s.ok()) {
  155. BlockContents contents;
  156. if (block_cache != NULL) {
  157. char cache_key_buffer[16];
  158. EncodeFixed64(cache_key_buffer, table->rep_->cache_id);
  159. EncodeFixed64(cache_key_buffer+8, handle.offset());
  160. Slice key(cache_key_buffer, sizeof(cache_key_buffer));
  161. cache_handle = block_cache->Lookup(key);
  162. if (cache_handle != NULL) {
  163. block = reinterpret_cast<Block*>(block_cache->Value(cache_handle));
  164. } else {
  165. s = ReadBlock(table->rep_->file, options, handle, &contents);
  166. if (s.ok()) {
  167. block = new Block(contents);
  168. if (contents.cachable && options.fill_cache) {
  169. cache_handle = block_cache->Insert(
  170. key, block, block->size(), &DeleteCachedBlock);
  171. }
  172. }
  173. }
  174. } else {
  175. s = ReadBlock(table->rep_->file, options, handle, &contents);
  176. if (s.ok()) {
  177. block = new Block(contents);
  178. }
  179. }
  180. }
  181. Iterator* iter;
  182. if (block != NULL) {
  183. iter = block->NewIterator(table->rep_->options.comparator);
  184. if (cache_handle == NULL) {
  185. iter->RegisterCleanup(&DeleteBlock, block, NULL);
  186. } else {
  187. iter->RegisterCleanup(&ReleaseBlock, block_cache, cache_handle);
  188. }
  189. } else {
  190. iter = NewErrorIterator(s);
  191. }
  192. return iter;
  193. }
  194. Iterator* Table::NewIterator(const ReadOptions& options) const {
  195. return NewTwoLevelIterator(
  196. rep_->index_block->NewIterator(rep_->options.comparator),
  197. &Table::BlockReader, const_cast<Table*>(this), options);
  198. }
  199. Status Table::InternalGet(const ReadOptions& options, const Slice& k,
  200. void* arg,
  201. void (*saver)(void*, const Slice&, const Slice&)) {
  202. Status s;
  203. Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator);
  204. iiter->Seek(k);
  205. if (iiter->Valid()) {
  206. Slice handle_value = iiter->value();
  207. FilterBlockReader* filter = rep_->filter;
  208. BlockHandle handle;
  209. if (filter != NULL &&
  210. handle.DecodeFrom(&handle_value).ok() &&
  211. !filter->KeyMayMatch(handle.offset(), k)) {
  212. // Not found
  213. } else {
  214. Iterator* block_iter = BlockReader(this, options, iiter->value());
  215. block_iter->Seek(k);
  216. if (block_iter->Valid()) {
  217. (*saver)(arg, block_iter->key(), block_iter->value());
  218. }
  219. s = block_iter->status();
  220. delete block_iter;
  221. }
  222. }
  223. if (s.ok()) {
  224. s = iiter->status();
  225. }
  226. delete iiter;
  227. return s;
  228. }
  229. uint64_t Table::ApproximateOffsetOf(const Slice& key) const {
  230. Iterator* index_iter =
  231. rep_->index_block->NewIterator(rep_->options.comparator);
  232. index_iter->Seek(key);
  233. uint64_t result;
  234. if (index_iter->Valid()) {
  235. BlockHandle handle;
  236. Slice input = index_iter->value();
  237. Status s = handle.DecodeFrom(&input);
  238. if (s.ok()) {
  239. result = handle.offset();
  240. } else {
  241. // Strange: we can't decode the block handle in the index block.
  242. // We'll just return the offset of the metaindex block, which is
  243. // close to the whole file size for this case.
  244. result = rep_->metaindex_handle.offset();
  245. }
  246. } else {
  247. // key is past the last key in the file. Approximate the offset
  248. // by returning the offset of the metaindex block (which is
  249. // right near the end of the file).
  250. result = rep_->metaindex_handle.offset();
  251. }
  252. delete index_iter;
  253. return result;
  254. }
  255. } // namespace leveldb