binary_annotator.cpp 58 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519
  1. #include "binary_annotator.h"
  2. #include <algorithm>
  3. #include <cstdint>
  4. #include <iostream>
  5. #include <limits>
  6. #include <string>
  7. #include <vector>
  8. #include "flatbuffers/base.h"
  9. #include "flatbuffers/reflection.h"
  10. #include "flatbuffers/util.h"
  11. #include "flatbuffers/verifier.h"
  12. namespace flatbuffers {
  13. namespace {
  14. static bool BinaryRegionSort(const BinaryRegion &a, const BinaryRegion &b) {
  15. return a.offset < b.offset;
  16. }
  17. static void SetError(BinaryRegionComment &comment, BinaryRegionStatus status,
  18. std::string message = "") {
  19. comment.status = status;
  20. comment.status_message = message;
  21. }
  22. static BinaryRegion MakeBinaryRegion(
  23. const uint64_t offset = 0, const uint64_t length = 0,
  24. const BinaryRegionType type = BinaryRegionType::Unknown,
  25. const uint64_t array_length = 0, const uint64_t points_to_offset = 0,
  26. BinaryRegionComment comment = {}) {
  27. BinaryRegion region;
  28. region.offset = offset;
  29. region.length = length;
  30. region.type = type;
  31. region.array_length = array_length;
  32. region.points_to_offset = points_to_offset;
  33. region.comment = std::move(comment);
  34. return region;
  35. }
  36. static BinarySection MakeBinarySection(const std::string &name,
  37. const BinarySectionType type,
  38. std::vector<BinaryRegion> regions) {
  39. BinarySection section;
  40. section.name = name;
  41. section.type = type;
  42. section.regions = std::move(regions);
  43. return section;
  44. }
  45. static BinarySection MakeSingleRegionBinarySection(const std::string &name,
  46. const BinarySectionType type,
  47. const BinaryRegion &region) {
  48. std::vector<BinaryRegion> regions;
  49. regions.push_back(region);
  50. return MakeBinarySection(name, type, std::move(regions));
  51. }
  52. static bool IsNonZeroRegion(const uint64_t offset, const uint64_t length,
  53. const uint8_t *const binary) {
  54. for (uint64_t i = offset; i < offset + length; ++i) {
  55. if (binary[i] != 0) { return true; }
  56. }
  57. return false;
  58. }
  59. static bool IsPrintableRegion(const uint64_t offset, const uint64_t length,
  60. const uint8_t *const binary) {
  61. for (uint64_t i = offset; i < offset + length; ++i) {
  62. if (!isprint(binary[i])) { return false; }
  63. }
  64. return true;
  65. }
  66. static BinarySection GenerateMissingSection(const uint64_t offset,
  67. const uint64_t length,
  68. const uint8_t *const binary) {
  69. std::vector<BinaryRegion> regions;
  70. // Check if the region is all zeros or not, as that can tell us if it is
  71. // padding or not.
  72. if (IsNonZeroRegion(offset, length, binary)) {
  73. // Some of the padding bytes are non-zero, so this might be an unknown
  74. // section of the binary.
  75. // TODO(dbaileychess): We could be a bit smarter with different sized
  76. // alignments. For now, the 8 byte check encompasses all the smaller
  77. // alignments.
  78. BinaryRegionComment comment;
  79. comment.type = BinaryRegionCommentType::Unknown;
  80. if (length >= 8) {
  81. SetError(comment, BinaryRegionStatus::WARN_NO_REFERENCES);
  82. } else {
  83. SetError(comment, BinaryRegionStatus::WARN_CORRUPTED_PADDING);
  84. }
  85. regions.push_back(MakeBinaryRegion(offset, length * sizeof(uint8_t),
  86. BinaryRegionType::Unknown, length, 0,
  87. comment));
  88. return MakeBinarySection("no known references", BinarySectionType::Unknown,
  89. std::move(regions));
  90. }
  91. BinaryRegionComment comment;
  92. comment.type = BinaryRegionCommentType::Padding;
  93. if (length >= 8) {
  94. SetError(comment, BinaryRegionStatus::WARN_PADDING_LENGTH);
  95. }
  96. // This region is most likely padding.
  97. regions.push_back(MakeBinaryRegion(offset, length * sizeof(uint8_t),
  98. BinaryRegionType::Uint8, length, 0,
  99. comment));
  100. return MakeBinarySection("", BinarySectionType::Padding, std::move(regions));
  101. }
  102. } // namespace
  103. std::map<uint64_t, BinarySection> BinaryAnnotator::Annotate() {
  104. flatbuffers::Verifier verifier(bfbs_, static_cast<size_t>(bfbs_length_));
  105. if ((is_size_prefixed_ &&
  106. !reflection::VerifySizePrefixedSchemaBuffer(verifier)) ||
  107. !reflection::VerifySchemaBuffer(verifier)) {
  108. return {};
  109. }
  110. // The binary is too short to read as a flatbuffers.
  111. if (binary_length_ < FLATBUFFERS_MIN_BUFFER_SIZE) { return {}; }
  112. // Make sure we start with a clean slate.
  113. vtables_.clear();
  114. sections_.clear();
  115. // First parse the header region which always start at offset 0.
  116. // The returned offset will point to the root_table location.
  117. const uint64_t root_table_offset = BuildHeader(0);
  118. if (IsValidOffset(root_table_offset)) {
  119. // Build the root table, and all else will be referenced from it.
  120. BuildTable(root_table_offset, BinarySectionType::RootTable,
  121. schema_->root_table());
  122. }
  123. // Now that all the sections are built, make sure the binary sections are
  124. // contiguous.
  125. FixMissingRegions();
  126. // Then scan the area between BinarySections insert padding sections that are
  127. // implied.
  128. FixMissingSections();
  129. return sections_;
  130. }
  131. uint64_t BinaryAnnotator::BuildHeader(const uint64_t header_offset) {
  132. uint64_t offset = header_offset;
  133. std::vector<BinaryRegion> regions;
  134. // If this binary is a size prefixed one, attempt to parse the size.
  135. if (is_size_prefixed_) {
  136. BinaryRegionComment prefix_length_comment;
  137. prefix_length_comment.type = BinaryRegionCommentType::SizePrefix;
  138. bool has_prefix_value = false;
  139. const auto prefix_length = ReadScalar<uoffset64_t>(offset);
  140. if (*prefix_length <= binary_length_) {
  141. regions.push_back(MakeBinaryRegion(offset, sizeof(uoffset64_t),
  142. BinaryRegionType::Uint64, 0, 0,
  143. prefix_length_comment));
  144. offset += sizeof(uoffset64_t);
  145. has_prefix_value = true;
  146. }
  147. if (!has_prefix_value) {
  148. const auto prefix_length = ReadScalar<uoffset_t>(offset);
  149. if (*prefix_length <= binary_length_) {
  150. regions.push_back(MakeBinaryRegion(offset, sizeof(uoffset_t),
  151. BinaryRegionType::Uint32, 0, 0,
  152. prefix_length_comment));
  153. offset += sizeof(uoffset_t);
  154. has_prefix_value = true;
  155. }
  156. }
  157. if (!has_prefix_value) {
  158. SetError(prefix_length_comment, BinaryRegionStatus::ERROR);
  159. }
  160. }
  161. const auto root_table_offset = ReadScalar<uint32_t>(offset);
  162. if (!root_table_offset.has_value()) {
  163. // This shouldn't occur, since we validate the min size of the buffer
  164. // before. But for completion sake, we shouldn't read passed the binary end.
  165. return std::numeric_limits<uint64_t>::max();
  166. }
  167. const auto root_table_loc = offset + *root_table_offset;
  168. BinaryRegionComment root_offset_comment;
  169. root_offset_comment.type = BinaryRegionCommentType::RootTableOffset;
  170. root_offset_comment.name = schema_->root_table()->name()->str();
  171. if (!IsValidOffset(root_table_loc)) {
  172. SetError(root_offset_comment,
  173. BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
  174. }
  175. regions.push_back(MakeBinaryRegion(offset, sizeof(uint32_t),
  176. BinaryRegionType::UOffset, 0,
  177. root_table_loc, root_offset_comment));
  178. offset += sizeof(uint32_t);
  179. if (IsValidRead(offset, flatbuffers::kFileIdentifierLength) &&
  180. IsPrintableRegion(offset, flatbuffers::kFileIdentifierLength, binary_)) {
  181. BinaryRegionComment comment;
  182. comment.type = BinaryRegionCommentType::FileIdentifier;
  183. // Check if the file identifier region has non-zero data, and assume its
  184. // the file identifier. Otherwise, it will get filled in with padding
  185. // later.
  186. regions.push_back(MakeBinaryRegion(
  187. offset, flatbuffers::kFileIdentifierLength * sizeof(uint8_t),
  188. BinaryRegionType::Char, flatbuffers::kFileIdentifierLength, 0,
  189. comment));
  190. }
  191. AddSection(header_offset, MakeBinarySection("", BinarySectionType::Header,
  192. std::move(regions)));
  193. return root_table_loc;
  194. }
  195. BinaryAnnotator::VTable *BinaryAnnotator::GetOrBuildVTable(
  196. const uint64_t vtable_offset, const reflection::Object *const table,
  197. const uint64_t offset_of_referring_table) {
  198. // Get a list of vtables (if any) already defined at this offset.
  199. std::list<VTable> &vtables = vtables_[vtable_offset];
  200. // See if this vtable for the table type has been generated before.
  201. for (VTable &vtable : vtables) {
  202. if (vtable.referring_table == table) { return &vtable; }
  203. }
  204. // If we are trying to make a new vtable and it is already encompassed by
  205. // another binary section, something is corrupted.
  206. if (vtables.empty() && ContainsSection(vtable_offset)) { return nullptr; }
  207. const std::string referring_table_name = table->name()->str();
  208. BinaryRegionComment vtable_size_comment;
  209. vtable_size_comment.type = BinaryRegionCommentType::VTableSize;
  210. const auto vtable_length = ReadScalar<uint16_t>(vtable_offset);
  211. if (!vtable_length.has_value()) {
  212. const uint64_t remaining = RemainingBytes(vtable_offset);
  213. SetError(vtable_size_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
  214. "2");
  215. AddSection(vtable_offset,
  216. MakeSingleRegionBinarySection(
  217. referring_table_name, BinarySectionType::VTable,
  218. MakeBinaryRegion(vtable_offset, remaining,
  219. BinaryRegionType::Unknown, remaining, 0,
  220. vtable_size_comment)));
  221. return nullptr;
  222. }
  223. // Vtables start with the size of the vtable
  224. const uint16_t vtable_size = vtable_length.value();
  225. if (!IsValidOffset(vtable_offset + vtable_size - 1)) {
  226. SetError(vtable_size_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_LONG);
  227. // The vtable_size points to off the end of the binary.
  228. AddSection(vtable_offset,
  229. MakeSingleRegionBinarySection(
  230. referring_table_name, BinarySectionType::VTable,
  231. MakeBinaryRegion(vtable_offset, sizeof(uint16_t),
  232. BinaryRegionType::Uint16, 0, 0,
  233. vtable_size_comment)));
  234. return nullptr;
  235. } else if (vtable_size < 2 * sizeof(uint16_t)) {
  236. SetError(vtable_size_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_SHORT,
  237. "4");
  238. // The size includes itself and the table size which are both uint16_t.
  239. AddSection(vtable_offset,
  240. MakeSingleRegionBinarySection(
  241. referring_table_name, BinarySectionType::VTable,
  242. MakeBinaryRegion(vtable_offset, sizeof(uint16_t),
  243. BinaryRegionType::Uint16, 0, 0,
  244. vtable_size_comment)));
  245. return nullptr;
  246. }
  247. std::vector<BinaryRegion> regions;
  248. regions.push_back(MakeBinaryRegion(vtable_offset, sizeof(uint16_t),
  249. BinaryRegionType::Uint16, 0, 0,
  250. vtable_size_comment));
  251. uint64_t offset = vtable_offset + sizeof(uint16_t);
  252. BinaryRegionComment ref_table_len_comment;
  253. ref_table_len_comment.type =
  254. BinaryRegionCommentType::VTableRefferingTableLength;
  255. // Ensure we can read the next uint16_t field, which is the size of the
  256. // referring table.
  257. const auto table_length = ReadScalar<uint16_t>(offset);
  258. if (!table_length.has_value()) {
  259. const uint64_t remaining = RemainingBytes(offset);
  260. SetError(ref_table_len_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
  261. "2");
  262. AddSection(offset, MakeSingleRegionBinarySection(
  263. referring_table_name, BinarySectionType::VTable,
  264. MakeBinaryRegion(
  265. offset, remaining, BinaryRegionType::Unknown,
  266. remaining, 0, ref_table_len_comment)));
  267. return nullptr;
  268. }
  269. // Then they have the size of the table they reference.
  270. const uint16_t table_size = table_length.value();
  271. if (!IsValidOffset(offset_of_referring_table + table_size - 1)) {
  272. SetError(ref_table_len_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_LONG);
  273. } else if (table_size < 4) {
  274. SetError(ref_table_len_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_SHORT,
  275. "4");
  276. }
  277. regions.push_back(MakeBinaryRegion(offset, sizeof(uint16_t),
  278. BinaryRegionType::Uint16, 0, 0,
  279. ref_table_len_comment));
  280. offset += sizeof(uint16_t);
  281. const uint64_t offset_start = offset;
  282. // A mapping between field (and its id) to the relative offset (uin16_t) from
  283. // the start of the table.
  284. std::map<uint16_t, VTable::Entry> fields;
  285. // Counter for determining if the binary has more vtable entries than the
  286. // schema provided. This can occur if the binary was created at a newer schema
  287. // version and is being processed with an older one.
  288. uint16_t fields_processed = 0;
  289. // Loop over all the fields.
  290. ForAllFields(table, /*reverse=*/false, [&](const reflection::Field *field) {
  291. const uint64_t field_offset = offset_start + field->id() * sizeof(uint16_t);
  292. if (field_offset >= vtable_offset + vtable_size) {
  293. // This field_offset is too large for this vtable, so it must come from a
  294. // newer schema than the binary was create with or the binary writer did
  295. // not write it. For either case, it is safe to ignore.
  296. // TODO(dbaileychess): We could show which fields are not set an their
  297. // default values if we want. We just need a way to make it obvious that
  298. // it isn't part of the buffer.
  299. return;
  300. }
  301. BinaryRegionComment field_comment;
  302. field_comment.type = BinaryRegionCommentType::VTableFieldOffset;
  303. field_comment.name = std::string(field->name()->c_str()) +
  304. "` (id: " + std::to_string(field->id()) + ")";
  305. const auto offset_from_table = ReadScalar<uint16_t>(field_offset);
  306. if (!offset_from_table.has_value()) {
  307. const uint64_t remaining = RemainingBytes(field_offset);
  308. SetError(field_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "2");
  309. regions.push_back(MakeBinaryRegion(field_offset, remaining,
  310. BinaryRegionType::Unknown, remaining,
  311. 0, field_comment));
  312. return;
  313. }
  314. if (!IsValidOffset(offset_of_referring_table + offset_from_table.value() -
  315. 1)) {
  316. SetError(field_comment, BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
  317. regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint16_t),
  318. BinaryRegionType::VOffset, 0, 0,
  319. field_comment));
  320. return;
  321. }
  322. VTable::Entry entry;
  323. entry.field = field;
  324. entry.offset_from_table = offset_from_table.value();
  325. fields.insert(std::make_pair(field->id(), entry));
  326. std::string default_label;
  327. if (offset_from_table.value() == 0) {
  328. // Not present, so could be default or be optional.
  329. if (field->required()) {
  330. SetError(field_comment,
  331. BinaryRegionStatus::ERROR_REQUIRED_FIELD_NOT_PRESENT);
  332. // If this is a required field, make it known this is an error.
  333. regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint16_t),
  334. BinaryRegionType::VOffset, 0, 0,
  335. field_comment));
  336. return;
  337. } else {
  338. // Its an optional field, so get the default value and interpret and
  339. // provided an annotation for it.
  340. if (IsScalar(field->type()->base_type())) {
  341. default_label += "<defaults to ";
  342. default_label += IsFloat(field->type()->base_type())
  343. ? std::to_string(field->default_real())
  344. : std::to_string(field->default_integer());
  345. default_label += "> (";
  346. } else {
  347. default_label += "<null> (";
  348. }
  349. default_label +=
  350. reflection::EnumNameBaseType(field->type()->base_type());
  351. default_label += ")";
  352. }
  353. }
  354. field_comment.default_value = default_label;
  355. regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint16_t),
  356. BinaryRegionType::VOffset, 0, 0,
  357. field_comment));
  358. fields_processed++;
  359. });
  360. // Check if we covered all the expectant fields. If not, we need to add them
  361. // as unknown fields.
  362. uint16_t expectant_vtable_fields =
  363. (vtable_size - sizeof(uint16_t) - sizeof(uint16_t)) / sizeof(uint16_t);
  364. // Prevent a bad binary from declaring a really large vtable_size, that we can
  365. // not independently verify.
  366. expectant_vtable_fields = std::min(
  367. static_cast<uint16_t>(fields_processed * 3), expectant_vtable_fields);
  368. for (uint16_t id = fields_processed; id < expectant_vtable_fields; ++id) {
  369. const uint64_t field_offset = offset_start + id * sizeof(uint16_t);
  370. const auto offset_from_table = ReadScalar<uint16_t>(field_offset);
  371. BinaryRegionComment field_comment;
  372. field_comment.type = BinaryRegionCommentType::VTableUnknownFieldOffset;
  373. field_comment.index = id;
  374. if (!offset_from_table.has_value()) {
  375. const uint64_t remaining = RemainingBytes(field_offset);
  376. SetError(field_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "2");
  377. regions.push_back(MakeBinaryRegion(field_offset, remaining,
  378. BinaryRegionType::Unknown, remaining,
  379. 0, field_comment));
  380. continue;
  381. }
  382. VTable::Entry entry;
  383. entry.field = nullptr; // No field to reference.
  384. entry.offset_from_table = offset_from_table.value();
  385. fields.insert(std::make_pair(id, entry));
  386. regions.push_back(MakeBinaryRegion(field_offset, sizeof(uint16_t),
  387. BinaryRegionType::VOffset, 0, 0,
  388. field_comment));
  389. }
  390. // If we have never added this vtable before record the Binary section.
  391. if (vtables.empty()) {
  392. sections_[vtable_offset] = MakeBinarySection(
  393. referring_table_name, BinarySectionType::VTable, std::move(regions));
  394. } else {
  395. // Add the current table name to the name of the section.
  396. sections_[vtable_offset].name += ", " + referring_table_name;
  397. }
  398. VTable vtable;
  399. vtable.referring_table = table;
  400. vtable.fields = std::move(fields);
  401. vtable.table_size = table_size;
  402. vtable.vtable_size = vtable_size;
  403. // Add this vtable to the collection of vtables at this offset.
  404. vtables.push_back(std::move(vtable));
  405. // Return the vtable we just added.
  406. return &vtables.back();
  407. }
  408. void BinaryAnnotator::BuildTable(const uint64_t table_offset,
  409. const BinarySectionType type,
  410. const reflection::Object *const table) {
  411. if (ContainsSection(table_offset)) { return; }
  412. BinaryRegionComment vtable_offset_comment;
  413. vtable_offset_comment.type = BinaryRegionCommentType::TableVTableOffset;
  414. const auto vtable_soffset = ReadScalar<int32_t>(table_offset);
  415. if (!vtable_soffset.has_value()) {
  416. const uint64_t remaining = RemainingBytes(table_offset);
  417. SetError(vtable_offset_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
  418. "4");
  419. AddSection(
  420. table_offset,
  421. MakeSingleRegionBinarySection(
  422. table->name()->str(), type,
  423. MakeBinaryRegion(table_offset, remaining, BinaryRegionType::Unknown,
  424. remaining, 0, vtable_offset_comment)));
  425. // If there aren't enough bytes left to read the vtable offset, there is
  426. // nothing we can do.
  427. return;
  428. }
  429. // Tables start with the vtable
  430. const uint64_t vtable_offset = table_offset - vtable_soffset.value();
  431. if (!IsValidOffset(vtable_offset)) {
  432. SetError(vtable_offset_comment,
  433. BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
  434. AddSection(table_offset,
  435. MakeSingleRegionBinarySection(
  436. table->name()->str(), type,
  437. MakeBinaryRegion(table_offset, sizeof(int32_t),
  438. BinaryRegionType::SOffset, 0, vtable_offset,
  439. vtable_offset_comment)));
  440. // There isn't much to do with an invalid vtable offset, as we won't be able
  441. // to intepret the rest of the table fields.
  442. return;
  443. }
  444. std::vector<BinaryRegion> regions;
  445. regions.push_back(MakeBinaryRegion(table_offset, sizeof(int32_t),
  446. BinaryRegionType::SOffset, 0,
  447. vtable_offset, vtable_offset_comment));
  448. // Parse the vtable first so we know what the rest of the fields in the table
  449. // are.
  450. const VTable *const vtable =
  451. GetOrBuildVTable(vtable_offset, table, table_offset);
  452. if (vtable == nullptr) {
  453. // There is no valid vtable for this table, so we cannot process the rest of
  454. // the table entries.
  455. return;
  456. }
  457. // This is the size and length of this table.
  458. const uint16_t table_size = vtable->table_size;
  459. uint64_t table_end_offset = table_offset + table_size;
  460. if (!IsValidOffset(table_end_offset - 1)) {
  461. // We already validated the table size in BuildVTable, but we have to make
  462. // sure we don't use a bad value here.
  463. table_end_offset = binary_length_;
  464. }
  465. // We need to iterate over the vtable fields by their offset in the binary,
  466. // not by their IDs. So copy them over to another vector that we can sort on
  467. // the offset_from_table property.
  468. std::vector<VTable::Entry> fields;
  469. for (const auto &vtable_field : vtable->fields) {
  470. fields.push_back(vtable_field.second);
  471. }
  472. std::stable_sort(fields.begin(), fields.end(),
  473. [](const VTable::Entry &a, const VTable::Entry &b) {
  474. return a.offset_from_table < b.offset_from_table;
  475. });
  476. // Iterate over all the fields by order of their offset.
  477. for (size_t i = 0; i < fields.size(); ++i) {
  478. const reflection::Field *field = fields[i].field;
  479. const uint16_t offset_from_table = fields[i].offset_from_table;
  480. if (offset_from_table == 0) {
  481. // Skip non-present fields.
  482. continue;
  483. }
  484. // The field offsets are relative to the start of the table.
  485. const uint64_t field_offset = table_offset + offset_from_table;
  486. if (!IsValidOffset(field_offset)) {
  487. // The field offset is larger than the binary, nothing we can do.
  488. continue;
  489. }
  490. // We have a vtable entry for a non-existant field, that means its a binary
  491. // generated by a newer schema than we are currently processing.
  492. if (field == nullptr) {
  493. // Calculate the length of this unknown field.
  494. const uint64_t unknown_field_length =
  495. // Check if there is another unknown field after this one.
  496. ((i + 1 < fields.size())
  497. ? table_offset + fields[i + 1].offset_from_table
  498. // Otherwise use the known end of the table.
  499. : table_end_offset) -
  500. field_offset;
  501. if (unknown_field_length == 0) { continue; }
  502. std::string hint;
  503. if (unknown_field_length == 4) {
  504. const auto relative_offset = ReadScalar<uint32_t>(field_offset);
  505. if (relative_offset.has_value()) {
  506. // The field is 4 in length, so it could be an offset? Provide a hint.
  507. hint += "<possibly an offset? Check Loc: +0x";
  508. hint += ToHex(field_offset + relative_offset.value());
  509. hint += ">";
  510. }
  511. }
  512. BinaryRegionComment unknown_field_comment;
  513. unknown_field_comment.type = BinaryRegionCommentType::TableUnknownField;
  514. if (!IsValidRead(field_offset, unknown_field_length)) {
  515. const uint64_t remaining = RemainingBytes(field_offset);
  516. SetError(unknown_field_comment,
  517. BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
  518. std::to_string(unknown_field_length));
  519. regions.push_back(MakeBinaryRegion(field_offset, remaining,
  520. BinaryRegionType::Unknown, remaining,
  521. 0, unknown_field_comment));
  522. continue;
  523. }
  524. unknown_field_comment.default_value = hint;
  525. regions.push_back(MakeBinaryRegion(
  526. field_offset, unknown_field_length, BinaryRegionType::Unknown,
  527. unknown_field_length, 0, unknown_field_comment));
  528. continue;
  529. }
  530. if (IsScalar(field->type()->base_type())) {
  531. // These are the raw values store in the table.
  532. const uint64_t type_size = GetTypeSize(field->type()->base_type());
  533. const BinaryRegionType region_type =
  534. GetRegionType(field->type()->base_type());
  535. BinaryRegionComment scalar_field_comment;
  536. scalar_field_comment.type = BinaryRegionCommentType::TableField;
  537. scalar_field_comment.name =
  538. std::string(field->name()->c_str()) + "` (" +
  539. reflection::EnumNameBaseType(field->type()->base_type()) + ")";
  540. if (!IsValidRead(field_offset, type_size)) {
  541. const uint64_t remaining = RemainingBytes(field_offset);
  542. SetError(scalar_field_comment,
  543. BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
  544. std::to_string(type_size));
  545. regions.push_back(MakeBinaryRegion(field_offset, remaining,
  546. BinaryRegionType::Unknown, remaining,
  547. 0, scalar_field_comment));
  548. continue;
  549. }
  550. if (IsUnionType(field)) {
  551. // This is a type for a union. Validate the value
  552. const auto enum_value = ReadScalar<uint8_t>(field_offset);
  553. // This should always have a value, due to the IsValidRead check above.
  554. if (!IsValidUnionValue(field, enum_value.value())) {
  555. SetError(scalar_field_comment,
  556. BinaryRegionStatus::ERROR_INVALID_UNION_TYPE);
  557. regions.push_back(MakeBinaryRegion(field_offset, type_size,
  558. region_type, 0, 0,
  559. scalar_field_comment));
  560. continue;
  561. }
  562. }
  563. regions.push_back(MakeBinaryRegion(field_offset, type_size, region_type,
  564. 0, 0, scalar_field_comment));
  565. continue;
  566. }
  567. // Read the offset
  568. uint64_t offset = 0;
  569. uint64_t length = sizeof(uint32_t);
  570. BinaryRegionType region_type = BinaryRegionType::UOffset;
  571. if (field->offset64()) {
  572. length = sizeof(uint64_t);
  573. region_type = BinaryRegionType::UOffset64;
  574. offset = ReadScalar<uint64_t>(field_offset).value_or(0);
  575. } else {
  576. offset = ReadScalar<uint32_t>(field_offset).value_or(0);
  577. }
  578. // const auto offset_from_field = ReadScalar<uint32_t>(field_offset);
  579. uint64_t offset_of_next_item = 0;
  580. BinaryRegionComment offset_field_comment;
  581. offset_field_comment.type = BinaryRegionCommentType::TableOffsetField;
  582. offset_field_comment.name = field->name()->c_str();
  583. const std::string offset_prefix =
  584. "offset to field `" + std::string(field->name()->c_str()) + "`";
  585. // Validate any field that isn't inline (i.e., non-structs).
  586. if (!IsInlineField(field)) {
  587. if (offset == 0) {
  588. const uint64_t remaining = RemainingBytes(field_offset);
  589. SetError(offset_field_comment,
  590. BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4");
  591. regions.push_back(MakeBinaryRegion(field_offset, remaining,
  592. BinaryRegionType::Unknown, remaining,
  593. 0, offset_field_comment));
  594. continue;
  595. }
  596. offset_of_next_item = field_offset + offset;
  597. if (!IsValidOffset(offset_of_next_item)) {
  598. SetError(offset_field_comment,
  599. BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
  600. regions.push_back(MakeBinaryRegion(field_offset, length, region_type, 0,
  601. offset_of_next_item,
  602. offset_field_comment));
  603. continue;
  604. }
  605. }
  606. switch (field->type()->base_type()) {
  607. case reflection::BaseType::Obj: {
  608. const reflection::Object *next_object =
  609. schema_->objects()->Get(field->type()->index());
  610. if (next_object->is_struct()) {
  611. // Structs are stored inline.
  612. BuildStruct(field_offset, regions, field->name()->c_str(),
  613. next_object);
  614. } else {
  615. offset_field_comment.default_value = "(table)";
  616. regions.push_back(MakeBinaryRegion(field_offset, length, region_type,
  617. 0, offset_of_next_item,
  618. offset_field_comment));
  619. BuildTable(offset_of_next_item, BinarySectionType::Table,
  620. next_object);
  621. }
  622. } break;
  623. case reflection::BaseType::String: {
  624. offset_field_comment.default_value = "(string)";
  625. regions.push_back(MakeBinaryRegion(field_offset, length, region_type, 0,
  626. offset_of_next_item,
  627. offset_field_comment));
  628. BuildString(offset_of_next_item, table, field);
  629. } break;
  630. case reflection::BaseType::Vector: {
  631. offset_field_comment.default_value = "(vector)";
  632. regions.push_back(MakeBinaryRegion(field_offset, length, region_type, 0,
  633. offset_of_next_item,
  634. offset_field_comment));
  635. BuildVector(offset_of_next_item, table, field, table_offset,
  636. vtable->fields);
  637. } break;
  638. case reflection::BaseType::Vector64: {
  639. offset_field_comment.default_value = "(vector64)";
  640. regions.push_back(MakeBinaryRegion(field_offset, length, region_type, 0,
  641. offset_of_next_item,
  642. offset_field_comment));
  643. BuildVector(offset_of_next_item, table, field, table_offset,
  644. vtable->fields);
  645. } break;
  646. case reflection::BaseType::Union: {
  647. const uint64_t union_offset = offset_of_next_item;
  648. // The union type field is always one less than the union itself.
  649. const uint16_t union_type_id = field->id() - 1;
  650. auto vtable_field = vtable->fields.find(union_type_id);
  651. if (vtable_field == vtable->fields.end()) {
  652. // TODO(dbaileychess): need to capture this error condition.
  653. break;
  654. }
  655. offset_field_comment.default_value = "(union)";
  656. const uint64_t type_offset =
  657. table_offset + vtable_field->second.offset_from_table;
  658. const auto realized_type = ReadScalar<uint8_t>(type_offset);
  659. if (!realized_type.has_value()) {
  660. const uint64_t remaining = RemainingBytes(type_offset);
  661. SetError(offset_field_comment,
  662. BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "1");
  663. regions.push_back(MakeBinaryRegion(
  664. type_offset, remaining, BinaryRegionType::Unknown, remaining, 0,
  665. offset_field_comment));
  666. continue;
  667. }
  668. if (!IsValidUnionValue(field, realized_type.value())) {
  669. // We already export an error in the union type field, so just skip
  670. // building the union itself and it will default to an unreference
  671. // Binary section.
  672. continue;
  673. }
  674. const std::string enum_type =
  675. BuildUnion(union_offset, realized_type.value(), field);
  676. offset_field_comment.default_value =
  677. "(union of type `" + enum_type + "`)";
  678. regions.push_back(MakeBinaryRegion(field_offset, length, region_type, 0,
  679. union_offset, offset_field_comment));
  680. } break;
  681. default: break;
  682. }
  683. }
  684. // Handle the case where there is padding after the last known binary
  685. // region. Calculate where we left off towards the expected end of the
  686. // table.
  687. const uint64_t i = regions.back().offset + regions.back().length + 1;
  688. if (i < table_end_offset) {
  689. const uint64_t pad_bytes = table_end_offset - i + 1;
  690. BinaryRegionComment padding_comment;
  691. padding_comment.type = BinaryRegionCommentType::Padding;
  692. regions.push_back(MakeBinaryRegion(i - 1, pad_bytes * sizeof(uint8_t),
  693. BinaryRegionType::Uint8, pad_bytes, 0,
  694. padding_comment));
  695. }
  696. AddSection(table_offset,
  697. MakeBinarySection(table->name()->str(), type, std::move(regions)));
  698. }
  699. uint64_t BinaryAnnotator::BuildStruct(const uint64_t struct_offset,
  700. std::vector<BinaryRegion> &regions,
  701. const std::string referring_field_name,
  702. const reflection::Object *const object) {
  703. if (!object->is_struct()) { return struct_offset; }
  704. uint64_t offset = struct_offset;
  705. // Loop over all the fields in increasing order
  706. ForAllFields(object, /*reverse=*/false, [&](const reflection::Field *field) {
  707. if (IsScalar(field->type()->base_type())) {
  708. // Structure Field value
  709. const uint64_t type_size = GetTypeSize(field->type()->base_type());
  710. const BinaryRegionType region_type =
  711. GetRegionType(field->type()->base_type());
  712. BinaryRegionComment comment;
  713. comment.type = BinaryRegionCommentType::StructField;
  714. comment.name = referring_field_name + "." + field->name()->str();
  715. comment.default_value = "of '" + object->name()->str() + "' (" +
  716. std::string(reflection::EnumNameBaseType(
  717. field->type()->base_type())) +
  718. ")";
  719. if (!IsValidRead(offset, type_size)) {
  720. const uint64_t remaining = RemainingBytes(offset);
  721. SetError(comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
  722. std::to_string(type_size));
  723. regions.push_back(MakeBinaryRegion(offset, remaining,
  724. BinaryRegionType::Unknown, remaining,
  725. 0, comment));
  726. // TODO(dbaileychess): Should I bail out here? This sets offset to the
  727. // end of the binary. So all other reads in the loop should fail.
  728. offset += remaining;
  729. return;
  730. }
  731. regions.push_back(
  732. MakeBinaryRegion(offset, type_size, region_type, 0, 0, comment));
  733. offset += type_size;
  734. } else if (field->type()->base_type() == reflection::BaseType::Obj) {
  735. // Structs are stored inline, even when nested.
  736. offset = BuildStruct(offset, regions,
  737. referring_field_name + "." + field->name()->str(),
  738. schema_->objects()->Get(field->type()->index()));
  739. } else if (field->type()->base_type() == reflection::BaseType::Array) {
  740. const bool is_scalar = IsScalar(field->type()->element());
  741. const uint64_t type_size = GetTypeSize(field->type()->element());
  742. const BinaryRegionType region_type =
  743. GetRegionType(field->type()->element());
  744. // Arrays are just repeated structures.
  745. for (uint16_t i = 0; i < field->type()->fixed_length(); ++i) {
  746. if (is_scalar) {
  747. BinaryRegionComment array_comment;
  748. array_comment.type = BinaryRegionCommentType::ArrayField;
  749. array_comment.name =
  750. referring_field_name + "." + field->name()->str();
  751. array_comment.index = i;
  752. array_comment.default_value =
  753. "of '" + object->name()->str() + "' (" +
  754. std::string(
  755. reflection::EnumNameBaseType(field->type()->element())) +
  756. ")";
  757. if (!IsValidRead(offset, type_size)) {
  758. const uint64_t remaining = RemainingBytes(offset);
  759. SetError(array_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
  760. std::to_string(type_size));
  761. regions.push_back(MakeBinaryRegion(offset, remaining,
  762. BinaryRegionType::Unknown,
  763. remaining, 0, array_comment));
  764. // TODO(dbaileychess): Should I bail out here? This sets offset to
  765. // the end of the binary. So all other reads in the loop should
  766. // fail.
  767. offset += remaining;
  768. break;
  769. }
  770. regions.push_back(MakeBinaryRegion(offset, type_size, region_type, 0,
  771. 0, array_comment));
  772. offset += type_size;
  773. } else {
  774. // Array of Structs.
  775. //
  776. // TODO(dbaileychess): This works, but the comments on the fields lose
  777. // some context. Need to figure a way how to plumb the nested arrays
  778. // comments together that isn't too confusing.
  779. offset =
  780. BuildStruct(offset, regions,
  781. referring_field_name + "." + field->name()->str(),
  782. schema_->objects()->Get(field->type()->index()));
  783. }
  784. }
  785. }
  786. // Insert any padding after this field.
  787. const uint16_t padding = field->padding();
  788. if (padding > 0 && IsValidOffset(offset + padding)) {
  789. BinaryRegionComment padding_comment;
  790. padding_comment.type = BinaryRegionCommentType::Padding;
  791. regions.push_back(MakeBinaryRegion(offset, padding,
  792. BinaryRegionType::Uint8, padding, 0,
  793. padding_comment));
  794. offset += padding;
  795. }
  796. });
  797. return offset;
  798. }
  799. void BinaryAnnotator::BuildString(const uint64_t string_offset,
  800. const reflection::Object *const table,
  801. const reflection::Field *const field) {
  802. // Check if we have already generated this string section, and this is a
  803. // shared string instance.
  804. if (ContainsSection(string_offset)) { return; }
  805. std::vector<BinaryRegion> regions;
  806. const auto string_length = ReadScalar<uint32_t>(string_offset);
  807. BinaryRegionComment string_length_comment;
  808. string_length_comment.type = BinaryRegionCommentType::StringLength;
  809. if (!string_length.has_value()) {
  810. const uint64_t remaining = RemainingBytes(string_offset);
  811. SetError(string_length_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
  812. "4");
  813. regions.push_back(MakeBinaryRegion(string_offset, remaining,
  814. BinaryRegionType::Unknown, remaining, 0,
  815. string_length_comment));
  816. } else {
  817. const uint32_t string_size = string_length.value();
  818. const uint64_t string_end =
  819. string_offset + sizeof(uint32_t) + string_size + sizeof(char);
  820. if (!IsValidOffset(string_end - 1)) {
  821. SetError(string_length_comment,
  822. BinaryRegionStatus::ERROR_LENGTH_TOO_LONG);
  823. regions.push_back(MakeBinaryRegion(string_offset, sizeof(uint32_t),
  824. BinaryRegionType::Uint32, 0, 0,
  825. string_length_comment));
  826. } else {
  827. regions.push_back(MakeBinaryRegion(string_offset, sizeof(uint32_t),
  828. BinaryRegionType::Uint32, 0, 0,
  829. string_length_comment));
  830. BinaryRegionComment string_comment;
  831. string_comment.type = BinaryRegionCommentType::StringValue;
  832. regions.push_back(MakeBinaryRegion(string_offset + sizeof(uint32_t),
  833. string_size, BinaryRegionType::Char,
  834. string_size, 0, string_comment));
  835. BinaryRegionComment string_terminator_comment;
  836. string_terminator_comment.type =
  837. BinaryRegionCommentType::StringTerminator;
  838. regions.push_back(MakeBinaryRegion(
  839. string_offset + sizeof(uint32_t) + string_size, sizeof(char),
  840. BinaryRegionType::Char, 0, 0, string_terminator_comment));
  841. }
  842. }
  843. AddSection(string_offset,
  844. MakeBinarySection(std::string(table->name()->c_str()) + "." +
  845. field->name()->c_str(),
  846. BinarySectionType::String, std::move(regions)));
  847. }
  848. void BinaryAnnotator::BuildVector(
  849. const uint64_t vector_offset, const reflection::Object *const table,
  850. const reflection::Field *const field, const uint64_t parent_table_offset,
  851. const std::map<uint16_t, VTable::Entry> vtable_fields) {
  852. if (ContainsSection(vector_offset)) { return; }
  853. BinaryRegionComment vector_length_comment;
  854. vector_length_comment.type = BinaryRegionCommentType::VectorLength;
  855. const bool is_64_bit_vector =
  856. field->type()->base_type() == reflection::BaseType::Vector64;
  857. flatbuffers::Optional<uint64_t> vector_length;
  858. uint32_t vector_length_size_type = 0;
  859. BinaryRegionType region_type = BinaryRegionType::Uint32;
  860. BinarySectionType section_type = BinarySectionType::Vector;
  861. if (is_64_bit_vector) {
  862. auto v = ReadScalar<uint64_t>(vector_offset);
  863. if (v.has_value()) { vector_length = v.value(); }
  864. vector_length_size_type = sizeof(uint64_t);
  865. region_type = BinaryRegionType::Uint64;
  866. section_type = BinarySectionType::Vector64;
  867. } else {
  868. auto v = ReadScalar<uint32_t>(vector_offset);
  869. if (v.has_value()) { vector_length = v.value(); }
  870. vector_length_size_type = sizeof(uint32_t);
  871. region_type = BinaryRegionType::Uint32;
  872. section_type = BinarySectionType::Vector;
  873. }
  874. if (!vector_length.has_value()) {
  875. const uint64_t remaining = RemainingBytes(vector_offset);
  876. SetError(vector_length_comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
  877. "4");
  878. AddSection(
  879. vector_offset,
  880. MakeSingleRegionBinarySection(
  881. std::string(table->name()->c_str()) + "." + field->name()->c_str(),
  882. BinarySectionType::Vector,
  883. MakeBinaryRegion(vector_offset, remaining,
  884. BinaryRegionType::Unknown, remaining, 0,
  885. vector_length_comment)));
  886. return;
  887. }
  888. // Validate there are enough bytes left in the binary to process all the
  889. // items.
  890. const uint64_t last_item_offset =
  891. vector_offset + vector_length_size_type +
  892. vector_length.value() * GetElementSize(field);
  893. if (!IsValidOffset(last_item_offset - 1)) {
  894. SetError(vector_length_comment, BinaryRegionStatus::ERROR_LENGTH_TOO_LONG);
  895. AddSection(
  896. vector_offset,
  897. MakeSingleRegionBinarySection(
  898. std::string(table->name()->c_str()) + "." + field->name()->c_str(),
  899. BinarySectionType::Vector,
  900. MakeBinaryRegion(vector_offset, vector_length_size_type,
  901. region_type, 0, 0, vector_length_comment)));
  902. return;
  903. }
  904. std::vector<BinaryRegion> regions;
  905. regions.push_back(MakeBinaryRegion(vector_offset, vector_length_size_type,
  906. region_type, 0, 0, vector_length_comment));
  907. // Consume the vector length offset.
  908. uint64_t offset = vector_offset + vector_length_size_type;
  909. switch (field->type()->element()) {
  910. case reflection::BaseType::Obj: {
  911. const reflection::Object *object =
  912. schema_->objects()->Get(field->type()->index());
  913. if (object->is_struct()) {
  914. // Vector of structs
  915. for (size_t i = 0; i < vector_length.value(); ++i) {
  916. // Structs are inline to the vector.
  917. const uint64_t next_offset =
  918. BuildStruct(offset, regions, "[" + NumToString(i) + "]", object);
  919. if (next_offset == offset) { break; }
  920. offset = next_offset;
  921. }
  922. } else {
  923. // Vector of objects
  924. for (size_t i = 0; i < vector_length.value(); ++i) {
  925. BinaryRegionComment vector_object_comment;
  926. vector_object_comment.type =
  927. BinaryRegionCommentType::VectorTableValue;
  928. vector_object_comment.index = i;
  929. const auto table_relative_offset = ReadScalar<uint32_t>(offset);
  930. if (!table_relative_offset.has_value()) {
  931. const uint64_t remaining = RemainingBytes(offset);
  932. SetError(vector_object_comment,
  933. BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4");
  934. regions.push_back(
  935. MakeBinaryRegion(offset, remaining, BinaryRegionType::Unknown,
  936. remaining, 0, vector_object_comment));
  937. break;
  938. }
  939. // The table offset is relative from the offset location itself.
  940. const uint64_t table_offset = offset + table_relative_offset.value();
  941. if (!IsValidOffset(table_offset)) {
  942. SetError(vector_object_comment,
  943. BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
  944. regions.push_back(MakeBinaryRegion(
  945. offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
  946. table_offset, vector_object_comment));
  947. offset += sizeof(uint32_t);
  948. continue;
  949. }
  950. if (table_offset == parent_table_offset) {
  951. SetError(vector_object_comment,
  952. BinaryRegionStatus::ERROR_CYCLE_DETECTED);
  953. // A cycle detected where a table vector field is pointing to
  954. // itself. This should only happen in corrupted files.
  955. regions.push_back(MakeBinaryRegion(
  956. offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
  957. table_offset, vector_object_comment));
  958. offset += sizeof(uint32_t);
  959. continue;
  960. }
  961. regions.push_back(MakeBinaryRegion(
  962. offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
  963. table_offset, vector_object_comment));
  964. // Consume the offset to the table.
  965. offset += sizeof(uint32_t);
  966. BuildTable(table_offset, BinarySectionType::Table, object);
  967. }
  968. }
  969. } break;
  970. case reflection::BaseType::String: {
  971. // Vector of strings
  972. for (size_t i = 0; i < vector_length.value(); ++i) {
  973. BinaryRegionComment vector_object_comment;
  974. vector_object_comment.type = BinaryRegionCommentType::VectorStringValue;
  975. vector_object_comment.index = i;
  976. const auto string_relative_offset = ReadScalar<uint32_t>(offset);
  977. if (!string_relative_offset.has_value()) {
  978. const uint64_t remaining = RemainingBytes(offset);
  979. SetError(vector_object_comment,
  980. BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4");
  981. regions.push_back(
  982. MakeBinaryRegion(offset, remaining, BinaryRegionType::Unknown,
  983. remaining, 0, vector_object_comment));
  984. break;
  985. }
  986. // The string offset is relative from the offset location itself.
  987. const uint64_t string_offset = offset + string_relative_offset.value();
  988. if (!IsValidOffset(string_offset)) {
  989. SetError(vector_object_comment,
  990. BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
  991. regions.push_back(MakeBinaryRegion(
  992. offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
  993. string_offset, vector_object_comment));
  994. offset += sizeof(uint32_t);
  995. continue;
  996. }
  997. regions.push_back(MakeBinaryRegion(
  998. offset, sizeof(uint32_t), BinaryRegionType::UOffset, 0,
  999. string_offset, vector_object_comment));
  1000. BuildString(string_offset, table, field);
  1001. offset += sizeof(uint32_t);
  1002. }
  1003. } break;
  1004. case reflection::BaseType::Union: {
  1005. // Vector of unions
  1006. // Unions have both their realized type (uint8_t for now) that are
  1007. // stored separately. These are stored in the field->index() - 1
  1008. // location.
  1009. const uint16_t union_type_vector_id = field->id() - 1;
  1010. auto vtable_entry = vtable_fields.find(union_type_vector_id);
  1011. if (vtable_entry == vtable_fields.end()) {
  1012. // TODO(dbaileychess): need to capture this error condition.
  1013. break;
  1014. }
  1015. const uint64_t union_type_vector_field_offset =
  1016. parent_table_offset + vtable_entry->second.offset_from_table;
  1017. const auto union_type_vector_field_relative_offset =
  1018. ReadScalar<uint16_t>(union_type_vector_field_offset);
  1019. if (!union_type_vector_field_relative_offset.has_value()) {
  1020. const uint64_t remaining = RemainingBytes(offset);
  1021. BinaryRegionComment vector_union_comment;
  1022. vector_union_comment.type = BinaryRegionCommentType::VectorUnionValue;
  1023. SetError(vector_union_comment,
  1024. BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "2");
  1025. regions.push_back(MakeBinaryRegion(offset, remaining,
  1026. BinaryRegionType::Unknown, remaining,
  1027. 0, vector_union_comment));
  1028. break;
  1029. }
  1030. // Get the offset to the first type (the + sizeof(uint32_t) is to skip
  1031. // over the vector length which we already know). Validation happens
  1032. // within the loop below.
  1033. const uint64_t union_type_vector_data_offset =
  1034. union_type_vector_field_offset +
  1035. union_type_vector_field_relative_offset.value() + sizeof(uint32_t);
  1036. for (size_t i = 0; i < vector_length.value(); ++i) {
  1037. BinaryRegionComment comment;
  1038. comment.type = BinaryRegionCommentType::VectorUnionValue;
  1039. comment.index = i;
  1040. const auto union_relative_offset = ReadScalar<uint32_t>(offset);
  1041. if (!union_relative_offset.has_value()) {
  1042. const uint64_t remaining = RemainingBytes(offset);
  1043. SetError(comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "4");
  1044. regions.push_back(MakeBinaryRegion(offset, remaining,
  1045. BinaryRegionType::Unknown,
  1046. remaining, 0, comment));
  1047. break;
  1048. }
  1049. // The union offset is relative from the offset location itself.
  1050. const uint64_t union_offset = offset + union_relative_offset.value();
  1051. if (!IsValidOffset(union_offset)) {
  1052. SetError(comment, BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY);
  1053. regions.push_back(MakeBinaryRegion(offset, sizeof(uint32_t),
  1054. BinaryRegionType::UOffset, 0,
  1055. union_offset, comment));
  1056. continue;
  1057. }
  1058. const auto realized_type =
  1059. ReadScalar<uint8_t>(union_type_vector_data_offset + i);
  1060. if (!realized_type.has_value()) {
  1061. SetError(comment, BinaryRegionStatus::ERROR_INCOMPLETE_BINARY, "1");
  1062. regions.push_back(MakeBinaryRegion(
  1063. offset, 0, BinaryRegionType::Unknown, 0, 0, comment));
  1064. continue;
  1065. }
  1066. if (!IsValidUnionValue(vtable_entry->second.field->type()->index(),
  1067. realized_type.value())) {
  1068. // We already export an error in the union type field, so just skip
  1069. // building the union itself and it will default to an unreference
  1070. // Binary section.
  1071. offset += sizeof(uint32_t);
  1072. continue;
  1073. }
  1074. const std::string enum_type =
  1075. BuildUnion(union_offset, realized_type.value(), field);
  1076. comment.default_value = "(`" + enum_type + "`)";
  1077. regions.push_back(MakeBinaryRegion(offset, sizeof(uint32_t),
  1078. BinaryRegionType::UOffset, 0,
  1079. union_offset, comment));
  1080. offset += sizeof(uint32_t);
  1081. }
  1082. } break;
  1083. default: {
  1084. if (IsScalar(field->type()->element())) {
  1085. const BinaryRegionType binary_region_type =
  1086. GetRegionType(field->type()->element());
  1087. const uint64_t type_size = GetTypeSize(field->type()->element());
  1088. // TODO(dbaileychess): It might be nicer to user the
  1089. // BinaryRegion.array_length field to indicate this.
  1090. for (size_t i = 0; i < vector_length.value(); ++i) {
  1091. BinaryRegionComment vector_scalar_comment;
  1092. vector_scalar_comment.type = BinaryRegionCommentType::VectorValue;
  1093. vector_scalar_comment.index = i;
  1094. if (!IsValidRead(offset, type_size)) {
  1095. const uint64_t remaining = RemainingBytes(offset);
  1096. SetError(vector_scalar_comment,
  1097. BinaryRegionStatus::ERROR_INCOMPLETE_BINARY,
  1098. std::to_string(type_size));
  1099. regions.push_back(
  1100. MakeBinaryRegion(offset, remaining, BinaryRegionType::Unknown,
  1101. remaining, 0, vector_scalar_comment));
  1102. break;
  1103. }
  1104. if (IsUnionType(field->type()->element())) {
  1105. // This is a type for a union. Validate the value
  1106. const auto enum_value = ReadScalar<uint8_t>(offset);
  1107. // This should always have a value, due to the IsValidRead check
  1108. // above.
  1109. if (!IsValidUnionValue(field->type()->index(),
  1110. enum_value.value())) {
  1111. SetError(vector_scalar_comment,
  1112. BinaryRegionStatus::ERROR_INVALID_UNION_TYPE);
  1113. regions.push_back(MakeBinaryRegion(offset, type_size,
  1114. binary_region_type, 0, 0,
  1115. vector_scalar_comment));
  1116. offset += type_size;
  1117. continue;
  1118. }
  1119. }
  1120. regions.push_back(MakeBinaryRegion(offset, type_size,
  1121. binary_region_type, 0, 0,
  1122. vector_scalar_comment));
  1123. offset += type_size;
  1124. }
  1125. }
  1126. } break;
  1127. }
  1128. AddSection(vector_offset,
  1129. MakeBinarySection(std::string(table->name()->c_str()) + "." +
  1130. field->name()->c_str(),
  1131. section_type, std::move(regions)));
  1132. }
  1133. std::string BinaryAnnotator::BuildUnion(const uint64_t union_offset,
  1134. const uint8_t realized_type,
  1135. const reflection::Field *const field) {
  1136. const reflection::Enum *next_enum =
  1137. schema_->enums()->Get(field->type()->index());
  1138. const reflection::EnumVal *enum_val = next_enum->values()->Get(realized_type);
  1139. if (ContainsSection(union_offset)) { return enum_val->name()->c_str(); }
  1140. const reflection::Type *union_type = enum_val->union_type();
  1141. if (union_type->base_type() == reflection::BaseType::Obj) {
  1142. const reflection::Object *object =
  1143. schema_->objects()->Get(union_type->index());
  1144. if (object->is_struct()) {
  1145. // Union of vectors point to a new Binary section
  1146. std::vector<BinaryRegion> regions;
  1147. BuildStruct(union_offset, regions, field->name()->c_str(), object);
  1148. AddSection(
  1149. union_offset,
  1150. MakeBinarySection(std::string(object->name()->c_str()) + "." +
  1151. field->name()->c_str(),
  1152. BinarySectionType::Union, std::move(regions)));
  1153. } else {
  1154. BuildTable(union_offset, BinarySectionType::Table, object);
  1155. }
  1156. }
  1157. // TODO(dbaileychess): handle the other union types.
  1158. return enum_val->name()->c_str();
  1159. }
  1160. void BinaryAnnotator::FixMissingRegions() {
  1161. std::vector<BinaryRegion> regions_to_insert;
  1162. for (auto &current_section : sections_) {
  1163. BinarySection &section = current_section.second;
  1164. if (section.regions.empty()) {
  1165. // TODO(dbaileychess): is this possible?
  1166. continue;
  1167. }
  1168. uint64_t offset = section.regions[0].offset + section.regions[0].length;
  1169. for (size_t i = 1; i < section.regions.size(); ++i) {
  1170. BinaryRegion &region = section.regions[i];
  1171. const uint64_t next_offset = region.offset;
  1172. if (!IsValidOffset(next_offset)) {
  1173. // TODO(dbaileychess): figure out how we get into this situation.
  1174. continue;
  1175. }
  1176. if (offset < next_offset) {
  1177. const uint64_t padding_bytes = next_offset - offset;
  1178. BinaryRegionComment comment;
  1179. comment.type = BinaryRegionCommentType::Padding;
  1180. if (IsNonZeroRegion(offset, padding_bytes, binary_)) {
  1181. SetError(comment, BinaryRegionStatus::WARN_NO_REFERENCES);
  1182. regions_to_insert.push_back(
  1183. MakeBinaryRegion(offset, padding_bytes, BinaryRegionType::Unknown,
  1184. padding_bytes, 0, comment));
  1185. } else {
  1186. regions_to_insert.push_back(
  1187. MakeBinaryRegion(offset, padding_bytes, BinaryRegionType::Uint8,
  1188. padding_bytes, 0, comment));
  1189. }
  1190. }
  1191. offset = next_offset + region.length;
  1192. }
  1193. if (!regions_to_insert.empty()) {
  1194. section.regions.insert(section.regions.end(), regions_to_insert.begin(),
  1195. regions_to_insert.end());
  1196. std::stable_sort(section.regions.begin(), section.regions.end(),
  1197. BinaryRegionSort);
  1198. regions_to_insert.clear();
  1199. }
  1200. }
  1201. }
  1202. void BinaryAnnotator::FixMissingSections() {
  1203. uint64_t offset = 0;
  1204. std::vector<BinarySection> sections_to_insert;
  1205. for (auto &current_section : sections_) {
  1206. BinarySection &section = current_section.second;
  1207. const uint64_t section_start_offset = current_section.first;
  1208. const uint64_t section_end_offset =
  1209. section.regions.back().offset + section.regions.back().length;
  1210. if (offset < section_start_offset) {
  1211. // We are at an offset that is less then the current section.
  1212. const uint64_t pad_bytes = section_start_offset - offset + 1;
  1213. sections_to_insert.push_back(
  1214. GenerateMissingSection(offset - 1, pad_bytes, binary_));
  1215. }
  1216. offset = section_end_offset + 1;
  1217. }
  1218. // Handle the case where there are still bytes left in the binary that are
  1219. // unaccounted for.
  1220. if (offset < binary_length_) {
  1221. const uint64_t pad_bytes = binary_length_ - offset + 1;
  1222. sections_to_insert.push_back(
  1223. GenerateMissingSection(offset - 1, pad_bytes, binary_));
  1224. }
  1225. for (const BinarySection &section_to_insert : sections_to_insert) {
  1226. AddSection(section_to_insert.regions[0].offset, section_to_insert);
  1227. }
  1228. }
  1229. bool BinaryAnnotator::ContainsSection(const uint64_t offset) {
  1230. auto it = sections_.lower_bound(offset);
  1231. // If the section is found, check that it is exactly equal its offset.
  1232. if (it != sections_.end() && it->first == offset) { return true; }
  1233. // If this was the first section, there are no other previous sections to
  1234. // check.
  1235. if (it == sections_.begin()) { return false; }
  1236. // Go back one section.
  1237. --it;
  1238. // And check that if the offset is covered by the section.
  1239. return offset >= it->first && offset < it->second.regions.back().offset +
  1240. it->second.regions.back().length;
  1241. }
  1242. } // namespace flatbuffers