annotated_binary_text_gen.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461
  1. #include "annotated_binary_text_gen.h"
  2. #include <algorithm>
  3. #include <cstdint>
  4. #include <fstream>
  5. #include <ostream>
  6. #include <sstream>
  7. #include <string>
  8. #include "binary_annotator.h"
  9. #include "flatbuffers/base.h"
  10. #include "flatbuffers/util.h"
  11. namespace flatbuffers {
  12. namespace {
  13. struct OutputConfig {
  14. size_t largest_type_string = 10;
  15. size_t largest_value_string = 20;
  16. size_t max_bytes_per_line = 8;
  17. size_t offset_max_char = 4;
  18. char delimiter = '|';
  19. bool include_vector_contents = true;
  20. };
  21. static std::string ToString(const BinarySectionType type) {
  22. switch (type) {
  23. case BinarySectionType::Header: return "header";
  24. case BinarySectionType::Table: return "table";
  25. case BinarySectionType::RootTable: return "root_table";
  26. case BinarySectionType::VTable: return "vtable";
  27. case BinarySectionType::Struct: return "struct";
  28. case BinarySectionType::String: return "string";
  29. case BinarySectionType::Vector: return "vector";
  30. case BinarySectionType::Vector64: return "vector64";
  31. case BinarySectionType::Unknown: return "unknown";
  32. case BinarySectionType::Union: return "union";
  33. case BinarySectionType::Padding: return "padding";
  34. default: return "todo";
  35. }
  36. }
  37. static bool IsOffset(const BinaryRegionType type) {
  38. return type == BinaryRegionType::UOffset ||
  39. type == BinaryRegionType::SOffset ||
  40. type == BinaryRegionType::UOffset64;
  41. }
  42. template<typename T> std::string ToString(T value) {
  43. if (std::is_floating_point<T>::value) {
  44. std::stringstream ss;
  45. ss << value;
  46. return ss.str();
  47. } else {
  48. return std::to_string(value);
  49. }
  50. }
  51. template<typename T>
  52. std::string ToValueString(const BinaryRegion &region, const uint8_t *binary) {
  53. std::string s;
  54. s += "0x";
  55. const T val = ReadScalar<T>(binary + region.offset);
  56. const uint64_t start_index = region.offset + region.length - 1;
  57. for (uint64_t i = 0; i < region.length; ++i) {
  58. s += ToHex(binary[start_index - i]);
  59. }
  60. s += " (";
  61. s += ToString(val);
  62. s += ")";
  63. return s;
  64. }
  65. template<>
  66. std::string ToValueString<std::string>(const BinaryRegion &region,
  67. const uint8_t *binary) {
  68. return std::string(reinterpret_cast<const char *>(binary + region.offset),
  69. static_cast<size_t>(region.array_length));
  70. }
  71. static std::string ToValueString(const BinaryRegion &region,
  72. const uint8_t *binary,
  73. const OutputConfig &output_config) {
  74. std::string s;
  75. if (region.array_length) {
  76. if (region.type == BinaryRegionType::Uint8 ||
  77. region.type == BinaryRegionType::Unknown) {
  78. // Interpret each value as a ASCII to aid debugging
  79. for (uint64_t i = 0; i < region.array_length; ++i) {
  80. const uint8_t c = *(binary + region.offset + i);
  81. s += isprint(c) ? static_cast<char>(c & 0x7F) : '.';
  82. }
  83. return s;
  84. } else if (region.type == BinaryRegionType::Char) {
  85. // string value
  86. return ToValueString<std::string>(region, binary);
  87. }
  88. }
  89. switch (region.type) {
  90. case BinaryRegionType::Uint32:
  91. return ToValueString<uint32_t>(region, binary);
  92. case BinaryRegionType::Int32: return ToValueString<int32_t>(region, binary);
  93. case BinaryRegionType::Uint16:
  94. return ToValueString<uint16_t>(region, binary);
  95. case BinaryRegionType::Int16: return ToValueString<int16_t>(region, binary);
  96. case BinaryRegionType::Bool: return ToValueString<bool>(region, binary);
  97. case BinaryRegionType::Uint8: return ToValueString<uint8_t>(region, binary);
  98. case BinaryRegionType::Char: return ToValueString<char>(region, binary);
  99. case BinaryRegionType::Byte:
  100. case BinaryRegionType::Int8: return ToValueString<int8_t>(region, binary);
  101. case BinaryRegionType::Int64: return ToValueString<int64_t>(region, binary);
  102. case BinaryRegionType::Uint64:
  103. return ToValueString<uint64_t>(region, binary);
  104. case BinaryRegionType::Double: return ToValueString<double>(region, binary);
  105. case BinaryRegionType::Float: return ToValueString<float>(region, binary);
  106. case BinaryRegionType::UType: return ToValueString<uint8_t>(region, binary);
  107. // Handle Offsets separately, incase they add additional details.
  108. case BinaryRegionType::UOffset64:
  109. s += ToValueString<uint64_t>(region, binary);
  110. break;
  111. case BinaryRegionType::UOffset:
  112. s += ToValueString<uint32_t>(region, binary);
  113. break;
  114. case BinaryRegionType::SOffset:
  115. s += ToValueString<int32_t>(region, binary);
  116. break;
  117. case BinaryRegionType::VOffset:
  118. s += ToValueString<uint16_t>(region, binary);
  119. break;
  120. default: break;
  121. }
  122. // If this is an offset type, include the calculated offset location in the
  123. // value.
  124. // TODO(dbaileychess): It might be nicer to put this in the comment field.
  125. if (IsOffset(region.type)) {
  126. s += " Loc: 0x";
  127. s += ToHex(region.points_to_offset, output_config.offset_max_char);
  128. }
  129. return s;
  130. }
  131. struct DocContinuation {
  132. // The start column where the value text first starts
  133. size_t value_start_column = 0;
  134. // The remaining part of the doc to print.
  135. std::string value;
  136. };
  137. static std::string GenerateTypeString(const BinaryRegion &region) {
  138. return ToString(region.type) +
  139. ((region.array_length)
  140. ? "[" + std::to_string(region.array_length) + "]"
  141. : "");
  142. }
  143. static std::string GenerateComment(const BinaryRegionComment &comment,
  144. const BinarySection &) {
  145. std::string s;
  146. switch (comment.type) {
  147. case BinaryRegionCommentType::Unknown: s = "unknown"; break;
  148. case BinaryRegionCommentType::SizePrefix: s = "size prefix"; break;
  149. case BinaryRegionCommentType::RootTableOffset:
  150. s = "offset to root table `" + comment.name + "`";
  151. break;
  152. // TODO(dbaileychess): make this lowercase to follow the convention.
  153. case BinaryRegionCommentType::FileIdentifier: s = "File Identifier"; break;
  154. case BinaryRegionCommentType::Padding: s = "padding"; break;
  155. case BinaryRegionCommentType::VTableSize: s = "size of this vtable"; break;
  156. case BinaryRegionCommentType::VTableRefferingTableLength:
  157. s = "size of referring table";
  158. break;
  159. case BinaryRegionCommentType::VTableFieldOffset:
  160. s = "offset to field `" + comment.name;
  161. break;
  162. case BinaryRegionCommentType::VTableUnknownFieldOffset:
  163. s = "offset to unknown field (id: " + std::to_string(comment.index) + ")";
  164. break;
  165. case BinaryRegionCommentType::TableVTableOffset:
  166. s = "offset to vtable";
  167. break;
  168. case BinaryRegionCommentType::TableField:
  169. s = "table field `" + comment.name;
  170. break;
  171. case BinaryRegionCommentType::TableUnknownField: s = "unknown field"; break;
  172. case BinaryRegionCommentType::TableOffsetField:
  173. s = "offset to field `" + comment.name + "`";
  174. break;
  175. case BinaryRegionCommentType::StructField:
  176. s = "struct field `" + comment.name + "`";
  177. break;
  178. case BinaryRegionCommentType::ArrayField:
  179. s = "array field `" + comment.name + "`[" +
  180. std::to_string(comment.index) + "]";
  181. break;
  182. case BinaryRegionCommentType::StringLength: s = "length of string"; break;
  183. case BinaryRegionCommentType::StringValue: s = "string literal"; break;
  184. case BinaryRegionCommentType::StringTerminator:
  185. s = "string terminator";
  186. break;
  187. case BinaryRegionCommentType::VectorLength:
  188. s = "length of vector (# items)";
  189. break;
  190. case BinaryRegionCommentType::VectorValue:
  191. s = "value[" + std::to_string(comment.index) + "]";
  192. break;
  193. case BinaryRegionCommentType::VectorTableValue:
  194. s = "offset to table[" + std::to_string(comment.index) + "]";
  195. break;
  196. case BinaryRegionCommentType::VectorStringValue:
  197. s = "offset to string[" + std::to_string(comment.index) + "]";
  198. break;
  199. case BinaryRegionCommentType::VectorUnionValue:
  200. s = "offset to union[" + std::to_string(comment.index) + "]";
  201. break;
  202. default: break;
  203. }
  204. if (!comment.default_value.empty()) { s += " " + comment.default_value; }
  205. switch (comment.status) {
  206. case BinaryRegionStatus::OK: break; // no-op
  207. case BinaryRegionStatus::WARN: s = "WARN: " + s; break;
  208. case BinaryRegionStatus::WARN_NO_REFERENCES:
  209. s = "WARN: nothing refers to this section.";
  210. break;
  211. case BinaryRegionStatus::WARN_CORRUPTED_PADDING:
  212. s = "WARN: could be corrupted padding region.";
  213. break;
  214. case BinaryRegionStatus::WARN_PADDING_LENGTH:
  215. s = "WARN: padding is longer than expected.";
  216. break;
  217. case BinaryRegionStatus::ERROR: s = "ERROR: " + s; break;
  218. case BinaryRegionStatus::ERROR_OFFSET_OUT_OF_BINARY:
  219. s = "ERROR: " + s + ". Invalid offset, points outside the binary.";
  220. break;
  221. case BinaryRegionStatus::ERROR_INCOMPLETE_BINARY:
  222. s = "ERROR: " + s + ". Incomplete binary, expected to read " +
  223. comment.status_message + " bytes.";
  224. break;
  225. case BinaryRegionStatus::ERROR_LENGTH_TOO_LONG:
  226. s = "ERROR: " + s + ". Longer than the binary.";
  227. break;
  228. case BinaryRegionStatus::ERROR_LENGTH_TOO_SHORT:
  229. s = "ERROR: " + s + ". Shorter than the minimum length: ";
  230. break;
  231. case BinaryRegionStatus::ERROR_REQUIRED_FIELD_NOT_PRESENT:
  232. s = "ERROR: " + s + ". Required field is not present.";
  233. break;
  234. case BinaryRegionStatus::ERROR_INVALID_UNION_TYPE:
  235. s = "ERROR: " + s + ". Invalid union type value.";
  236. break;
  237. case BinaryRegionStatus::ERROR_CYCLE_DETECTED:
  238. s = "ERROR: " + s + ". Invalid offset, cycle detected.";
  239. break;
  240. }
  241. return s;
  242. }
  243. static void GenerateDocumentation(std::ostream &os, const BinaryRegion &region,
  244. const BinarySection &section,
  245. const uint8_t *binary,
  246. DocContinuation &continuation,
  247. const OutputConfig &output_config) {
  248. // Check if there is a doc continuation that should be prioritized.
  249. if (continuation.value_start_column) {
  250. os << std::string(continuation.value_start_column - 2, ' ');
  251. os << output_config.delimiter << " ";
  252. os << continuation.value.substr(0, output_config.max_bytes_per_line);
  253. continuation.value = continuation.value.substr(
  254. std::min(output_config.max_bytes_per_line, continuation.value.size()));
  255. return;
  256. }
  257. size_t size_of = 0;
  258. {
  259. std::stringstream ss;
  260. ss << std::setw(static_cast<int>(output_config.largest_type_string))
  261. << std::left;
  262. ss << GenerateTypeString(region);
  263. os << ss.str();
  264. size_of = ss.str().size();
  265. }
  266. os << " " << output_config.delimiter << " ";
  267. if (region.array_length) {
  268. // Record where the value is first being outputted.
  269. continuation.value_start_column = 3 + size_of;
  270. // Get the full-length value, which we will chunk below.
  271. const std::string value = ToValueString(region, binary, output_config);
  272. std::stringstream ss;
  273. ss << std::setw(static_cast<int>(output_config.largest_value_string))
  274. << std::left;
  275. ss << value.substr(0, output_config.max_bytes_per_line);
  276. os << ss.str();
  277. continuation.value =
  278. value.substr(std::min(output_config.max_bytes_per_line, value.size()));
  279. } else {
  280. std::stringstream ss;
  281. ss << std::setw(static_cast<int>(output_config.largest_value_string))
  282. << std::left;
  283. ss << ToValueString(region, binary, output_config);
  284. os << ss.str();
  285. }
  286. os << " " << output_config.delimiter << " ";
  287. os << GenerateComment(region.comment, section);
  288. }
  289. static void GenerateRegion(std::ostream &os, const BinaryRegion &region,
  290. const BinarySection &section, const uint8_t *binary,
  291. const OutputConfig &output_config) {
  292. bool doc_generated = false;
  293. DocContinuation doc_continuation;
  294. for (uint64_t i = 0; i < region.length; ++i) {
  295. if ((i % output_config.max_bytes_per_line) == 0) {
  296. // Start a new line of output
  297. os << std::endl;
  298. os << " +0x" << ToHex(region.offset + i, output_config.offset_max_char);
  299. os << " " << output_config.delimiter;
  300. }
  301. // Add each byte
  302. os << " " << ToHex(binary[region.offset + i]);
  303. // Check for end of line or end of region conditions.
  304. if (((i + 1) % output_config.max_bytes_per_line == 0) ||
  305. i + 1 == region.length) {
  306. if (i + 1 == region.length) {
  307. // We are out of bytes but haven't the kMaxBytesPerLine, so we need to
  308. // zero those out to align everything globally.
  309. for (uint64_t j = i + 1; (j % output_config.max_bytes_per_line) != 0;
  310. ++j) {
  311. os << " ";
  312. }
  313. }
  314. os << " " << output_config.delimiter;
  315. // This is the end of the first line or its the last byte of the region,
  316. // generate the end-of-line documentation.
  317. if (!doc_generated) {
  318. os << " ";
  319. GenerateDocumentation(os, region, section, binary, doc_continuation,
  320. output_config);
  321. // If we have a value in the doc continuation, that means the doc is
  322. // being printed on multiple lines.
  323. doc_generated = doc_continuation.value.empty();
  324. }
  325. }
  326. }
  327. }
  328. static void GenerateSection(std::ostream &os, const BinarySection &section,
  329. const uint8_t *binary,
  330. const OutputConfig &output_config) {
  331. os << std::endl;
  332. os << ToString(section.type);
  333. if (!section.name.empty()) { os << " (" + section.name + ")"; }
  334. os << ":";
  335. // As a space saving measure, skip generating every vector element, just put
  336. // the first and last elements in the output. Skip the whole thing if there
  337. // are only three or fewer elements, as it doesn't save space.
  338. if ((section.type == BinarySectionType::Vector ||
  339. section.type == BinarySectionType::Vector64) &&
  340. !output_config.include_vector_contents && section.regions.size() > 4) {
  341. // Generate the length region which should be first.
  342. GenerateRegion(os, section.regions[0], section, binary, output_config);
  343. // Generate the first element.
  344. GenerateRegion(os, section.regions[1], section, binary, output_config);
  345. // Indicate that we omitted elements.
  346. os << std::endl
  347. << " <" << section.regions.size() - 3 << " regions omitted>";
  348. // Generate the last element.
  349. GenerateRegion(os, section.regions.back(), section, binary, output_config);
  350. os << std::endl;
  351. return;
  352. }
  353. for (const BinaryRegion &region : section.regions) {
  354. GenerateRegion(os, region, section, binary, output_config);
  355. }
  356. os << std::endl;
  357. }
  358. } // namespace
  359. bool AnnotatedBinaryTextGenerator::Generate(
  360. const std::string &filename, const std::string &schema_filename) {
  361. OutputConfig output_config;
  362. output_config.max_bytes_per_line = options_.max_bytes_per_line;
  363. output_config.include_vector_contents = options_.include_vector_contents;
  364. // Given the length of the binary, we can calculate the maximum number of
  365. // characters to display in the offset hex: (i.e. 2 would lead to 0XFF being
  366. // the max output).
  367. output_config.offset_max_char =
  368. binary_length_ > 0xFFFFFF
  369. ? 8
  370. : (binary_length_ > 0xFFFF ? 6 : (binary_length_ > 0xFF ? 4 : 2));
  371. // Find the largest type string of all the regions in this file, so we can
  372. // align the output nicely.
  373. output_config.largest_type_string = 0;
  374. for (const auto &section : annotations_) {
  375. for (const auto &region : section.second.regions) {
  376. std::string s = GenerateTypeString(region);
  377. if (s.size() > output_config.largest_type_string) {
  378. output_config.largest_type_string = s.size();
  379. }
  380. // Don't consider array regions, as they will be split to multiple lines.
  381. if (!region.array_length) {
  382. s = ToValueString(region, binary_, output_config);
  383. if (s.size() > output_config.largest_value_string) {
  384. output_config.largest_value_string = s.size();
  385. }
  386. }
  387. }
  388. }
  389. // Modify the output filename.
  390. std::string output_filename = StripExtension(filename);
  391. output_filename += options_.output_postfix;
  392. output_filename +=
  393. "." + (options_.output_extension.empty() ? GetExtension(filename)
  394. : options_.output_extension);
  395. std::ofstream ofs(output_filename.c_str());
  396. ofs << "// Annotated Flatbuffer Binary" << std::endl;
  397. ofs << "//" << std::endl;
  398. ofs << "// Schema file: " << schema_filename << std::endl;
  399. ofs << "// Binary file: " << filename << std::endl;
  400. // Generate each of the binary sections
  401. for (const auto &section : annotations_) {
  402. GenerateSection(ofs, section.second, binary_, output_config);
  403. }
  404. ofs.close();
  405. return true;
  406. }
  407. } // namespace flatbuffers