dwarf_eh.h 16 KB


  1. /*
  2. * Copyright 2010-2011 PathScale, Inc. All rights reserved.
  3. *
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions are met:
  6. *
  7. * 1. Redistributions of source code must retain the above copyright notice,
  8. * this list of conditions and the following disclaimer.
  9. *
  10. * 2. Redistributions in binary form must reproduce the above copyright notice,
  11. * this list of conditions and the following disclaimer in the documentation
  12. * and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
  15. * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  16. * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  17. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
  18. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  19. * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  20. * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  21. * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  22. * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  23. * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  24. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. /**
  27. * dwarf_eh.h - Defines some helper functions for parsing DWARF exception
  28. * handling tables.
  29. *
  30. * This file contains various helper functions that are independent of the
  31. * language-specific code. It can be used in any personality function for the
  32. * Itanium ABI.
  33. */
  34. #include <assert.h>
  35. // TODO: Factor out Itanium / ARM differences. We probably want an itanium.h
  36. // and arm.h that can be included by this file depending on the target ABI.
  37. // _GNU_SOURCE must be defined for unwind.h to expose some of the functions
  38. // that we want. If it isn't, then we define it and undefine it to make sure
  39. // that it doesn't impact the rest of the program.
  40. #ifndef _GNU_SOURCE
  41. # define _GNU_SOURCE 1
  42. # include "unwind.h"
  43. # undef _GNU_SOURCE
  44. #else
  45. # include "unwind.h"
  46. #endif
  47. #include <stdint.h>
  48. /// Type used for pointers into DWARF data
  49. typedef unsigned char *dw_eh_ptr_t;
  50. // Flag indicating a signed quantity
  51. #define DW_EH_PE_signed 0x08
  52. /// DWARF data encoding types.
  53. enum dwarf_data_encoding
  54. {
  55. /// Absolute pointer value
  56. DW_EH_PE_absptr = 0x00,
  57. /// Unsigned, little-endian, base 128-encoded (variable length).
  58. DW_EH_PE_uleb128 = 0x01,
  59. /// Unsigned 16-bit integer.
  60. DW_EH_PE_udata2 = 0x02,
  61. /// Unsigned 32-bit integer.
  62. DW_EH_PE_udata4 = 0x03,
  63. /// Unsigned 64-bit integer.
  64. DW_EH_PE_udata8 = 0x04,
  65. /// Signed, little-endian, base 128-encoded (variable length)
  66. DW_EH_PE_sleb128 = DW_EH_PE_uleb128 | DW_EH_PE_signed,
  67. /// Signed 16-bit integer.
  68. DW_EH_PE_sdata2 = DW_EH_PE_udata2 | DW_EH_PE_signed,
  69. /// Signed 32-bit integer.
  70. DW_EH_PE_sdata4 = DW_EH_PE_udata4 | DW_EH_PE_signed,
  71. /// Signed 32-bit integer.
  72. DW_EH_PE_sdata8 = DW_EH_PE_udata8 | DW_EH_PE_signed
  73. };
  74. /**
  75. * Returns the encoding for a DWARF EH table entry. The encoding is stored in
  76. * the low four of an octet. The high four bits store the addressing mode.
  77. */
  78. static inline enum dwarf_data_encoding get_encoding(unsigned char x)
  79. {
  80. return static_cast<enum dwarf_data_encoding>(x & 0xf);
  81. }
  82. /**
  83. * DWARF addressing mode constants. When reading a pointer value from a DWARF
  84. * exception table, you must know how it is stored and what the addressing mode
  85. * is. The low four bits tell you the encoding, allowing you to decode a
  86. * number. The high four bits tell you the addressing mode, allowing you to
  87. * turn that number into an address in memory.
  88. */
  89. enum dwarf_data_relative
  90. {
  91. /// Value is omitted
  92. DW_EH_PE_omit = 0xff,
  93. /// Value relative to program counter
  94. DW_EH_PE_pcrel = 0x10,
  95. /// Value relative to the text segment
  96. DW_EH_PE_textrel = 0x20,
  97. /// Value relative to the data segment
  98. DW_EH_PE_datarel = 0x30,
  99. /// Value relative to the start of the function
  100. DW_EH_PE_funcrel = 0x40,
  101. /// Aligned pointer (Not supported yet - are they actually used?)
  102. DW_EH_PE_aligned = 0x50,
  103. /// Pointer points to address of real value
  104. DW_EH_PE_indirect = 0x80
  105. };
  106. /**
  107. * Returns the addressing mode component of this encoding.
  108. */
  109. static inline enum dwarf_data_relative get_base(unsigned char x)
  110. {
  111. return static_cast<enum dwarf_data_relative>(x & 0x70);
  112. }
  113. /**
  114. * Returns whether an encoding represents an indirect address.
  115. */
  116. static int is_indirect(unsigned char x)
  117. {
  118. return ((x & DW_EH_PE_indirect) == DW_EH_PE_indirect);
  119. }
  120. /**
  121. * Returns the size of a fixed-size encoding. This function will abort if
  122. * called with a value that is not a fixed-size encoding.
  123. */
  124. static inline int dwarf_size_of_fixed_size_field(unsigned char type)
  125. {
  126. switch (get_encoding(type))
  127. {
  128. default: abort();
  129. case DW_EH_PE_sdata2:
  130. case DW_EH_PE_udata2: return 2;
  131. case DW_EH_PE_sdata4:
  132. case DW_EH_PE_udata4: return 4;
  133. case DW_EH_PE_sdata8:
  134. case DW_EH_PE_udata8: return 8;
  135. case DW_EH_PE_absptr: return sizeof(void*);
  136. }
  137. }
  138. /**
  139. * Read an unsigned, little-endian, base-128, DWARF value. Updates *data to
  140. * point to the end of the value. Stores the number of bits read in the value
  141. * pointed to by b, allowing you to determine the value of the highest bit, and
  142. * therefore the sign of a signed value.
  143. *
  144. * This function is not intended to be called directly. Use read_sleb128() or
  145. * read_uleb128() for reading signed and unsigned versions, respectively.
  146. */
  147. static uint64_t read_leb128(dw_eh_ptr_t *data, int *b)
  148. {
  149. uint64_t uleb = 0;
  150. unsigned int bit = 0;
  151. unsigned char digit = 0;
  152. // We have to read at least one octet, and keep reading until we get to one
  153. // with the high bit unset
  154. do
  155. {
  156. // This check is a bit too strict - we should also check the highest
  157. // bit of the digit.
  158. assert(bit < sizeof(uint64_t) * 8);
  159. // Get the base 128 digit
  160. digit = (**data) & 0x7f;
  161. // Add it to the current value
  162. uleb += digit << bit;
  163. // Increase the shift value
  164. bit += 7;
  165. // Proceed to the next octet
  166. (*data)++;
  167. // Terminate when we reach a value that does not have the high bit set
  168. // (i.e. which was not modified when we mask it with 0x7f)
  169. } while ((*(*data - 1)) != digit);
  170. *b = bit;
  171. return uleb;
  172. }
  173. /**
  174. * Reads an unsigned little-endian base-128 value starting at the address
  175. * pointed to by *data. Updates *data to point to the next byte after the end
  176. * of the variable-length value.
  177. */
  178. static int64_t read_uleb128(dw_eh_ptr_t *data)
  179. {
  180. int b;
  181. return read_leb128(data, &b);
  182. }
  183. /**
  184. * Reads a signed little-endian base-128 value starting at the address pointed
  185. * to by *data. Updates *data to point to the next byte after the end of the
  186. * variable-length value.
  187. */
  188. static int64_t read_sleb128(dw_eh_ptr_t *data)
  189. {
  190. int bits;
  191. // Read as if it's signed
  192. uint64_t uleb = read_leb128(data, &bits);
  193. // If the most significant bit read is 1, then we need to sign extend it
  194. if ((uleb >> (bits-1)) == 1)
  195. {
  196. // Sign extend by setting all bits in front of it to 1
  197. uleb |= static_cast<int64_t>(-1) << bits;
  198. }
  199. return static_cast<int64_t>(uleb);
  200. }
  201. /**
  202. * Reads a value using the specified encoding from the address pointed to by
  203. * *data. Updates the value of *data to point to the next byte after the end
  204. * of the data.
  205. */
  206. static uint64_t read_value(char encoding, dw_eh_ptr_t *data)
  207. {
  208. enum dwarf_data_encoding type = get_encoding(encoding);
  209. switch (type)
  210. {
  211. // Read fixed-length types
  212. #define READ(dwarf, type) \
  213. case dwarf:\
  214. {\
  215. type t;\
  216. memcpy(&t, *data, sizeof t);\
  217. *data += sizeof t;\
  218. return static_cast<uint64_t>(t);\
  219. }
  220. READ(DW_EH_PE_udata2, uint16_t)
  221. READ(DW_EH_PE_udata4, uint32_t)
  222. READ(DW_EH_PE_udata8, uint64_t)
  223. READ(DW_EH_PE_sdata2, int16_t)
  224. READ(DW_EH_PE_sdata4, int32_t)
  225. READ(DW_EH_PE_sdata8, int64_t)
  226. READ(DW_EH_PE_absptr, intptr_t)
  227. #undef READ
  228. // Read variable-length types
  229. case DW_EH_PE_sleb128:
  230. return read_sleb128(data);
  231. case DW_EH_PE_uleb128:
  232. return read_uleb128(data);
  233. default: abort();
  234. }
  235. }
  236. /**
  237. * Resolves an indirect value. This expects an unwind context, an encoding, a
  238. * decoded value, and the start of the region as arguments. The returned value
  239. * is a pointer to the address identified by the encoded value.
  240. *
  241. * If the encoding does not specify an indirect value, then this returns v.
  242. */
  243. static uint64_t resolve_indirect_value(_Unwind_Context *c,
  244. unsigned char encoding,
  245. int64_t v,
  246. dw_eh_ptr_t start)
  247. {
  248. switch (get_base(encoding))
  249. {
  250. case DW_EH_PE_pcrel:
  251. v += reinterpret_cast<uint64_t>(start);
  252. break;
  253. case DW_EH_PE_textrel:
  254. v += static_cast<uint64_t>(static_cast<uintptr_t>(_Unwind_GetTextRelBase(c)));
  255. break;
  256. case DW_EH_PE_datarel:
  257. v += static_cast<uint64_t>(static_cast<uintptr_t>(_Unwind_GetDataRelBase(c)));
  258. break;
  259. case DW_EH_PE_funcrel:
  260. v += static_cast<uint64_t>(static_cast<uintptr_t>(_Unwind_GetRegionStart(c)));
  261. default:
  262. break;
  263. }
  264. // If this is an indirect value, then it is really the address of the real
  265. // value
  266. // TODO: Check whether this should really always be a pointer - it seems to
  267. // be a GCC extensions, so not properly documented...
  268. if (is_indirect(encoding))
  269. {
  270. v = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(*reinterpret_cast<void**>(v)));
  271. }
  272. return v;
  273. }
  274. /**
  275. * Reads an encoding and a value, updating *data to point to the next byte.
  276. */
  277. static inline void read_value_with_encoding(_Unwind_Context *context,
  278. dw_eh_ptr_t *data,
  279. uint64_t *out)
  280. {
  281. dw_eh_ptr_t start = *data;
  282. unsigned char encoding = *((*data)++);
  283. // If this value is omitted, skip it and don't touch the output value
  284. if (encoding == DW_EH_PE_omit) { return; }
  285. *out = read_value(encoding, data);
  286. *out = resolve_indirect_value(context, encoding, *out, start);
  287. }
  288. /**
  289. * Structure storing a decoded language-specific data area. Use parse_lsda()
  290. * to generate an instance of this structure from the address returned by the
  291. * generic unwind library.
  292. *
  293. * You should not need to inspect the fields of this structure directly if you
  294. * are just using this header. The structure stores the locations of the
  295. * various tables used for unwinding exceptions and is used by the functions
  296. * for reading values from these tables.
  297. */
  298. struct dwarf_eh_lsda
  299. {
  300. /// The start of the region. This is a cache of the value returned by
  301. /// _Unwind_GetRegionStart().
  302. dw_eh_ptr_t region_start;
  303. /// The start of the landing pads table.
  304. dw_eh_ptr_t landing_pads;
  305. /// The start of the type table.
  306. dw_eh_ptr_t type_table;
  307. /// The encoding used for entries in the type tables.
  308. unsigned char type_table_encoding;
  309. /// The location of the call-site table.
  310. dw_eh_ptr_t call_site_table;
  311. /// The location of the action table.
  312. dw_eh_ptr_t action_table;
  313. /// The encoding used for entries in the call-site table.
  314. unsigned char callsite_encoding;
  315. };
  316. /**
  317. * Parse the header on the language-specific data area and return a structure
  318. * containing the addresses and encodings of the various tables.
  319. */
  320. static inline struct dwarf_eh_lsda parse_lsda(_Unwind_Context *context,
  321. unsigned char *data)
  322. {
  323. struct dwarf_eh_lsda lsda;
  324. lsda.region_start = reinterpret_cast<dw_eh_ptr_t>(_Unwind_GetRegionStart(context));
  325. // If the landing pads are relative to anything other than the start of
  326. // this region, find out where. This is @LPStart in the spec, although the
  327. // encoding that GCC uses does not quite match the spec.
  328. uint64_t v = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(lsda.region_start));
  329. read_value_with_encoding(context, &data, &v);
  330. lsda.landing_pads = reinterpret_cast<dw_eh_ptr_t>(static_cast<uintptr_t>(v));
  331. // If there is a type table, find out where it is. This is @TTBase in the
  332. // spec. Note: we find whether there is a type table pointer by checking
  333. // whether the leading byte is DW_EH_PE_omit (0xff), which is not what the
  334. // spec says, but does seem to be how G++ indicates this.
  335. lsda.type_table = 0;
  336. lsda.type_table_encoding = *data++;
  337. if (lsda.type_table_encoding != DW_EH_PE_omit)
  338. {
  339. v = read_uleb128(&data);
  340. dw_eh_ptr_t type_table = data;
  341. type_table += v;
  342. lsda.type_table = type_table;
  343. //lsda.type_table = (uintptr_t*)(data + v);
  344. }
  345. #if defined(__arm__) && !defined(__ARM_DWARF_EH__)
  346. lsda.type_table_encoding = (DW_EH_PE_pcrel | DW_EH_PE_indirect);
  347. #endif
  348. lsda.callsite_encoding = static_cast<enum dwarf_data_encoding>(*(data++));
  349. // Action table is immediately after the call site table
  350. lsda.action_table = data;
  351. uintptr_t callsite_size = static_cast<uintptr_t>(read_uleb128(&data));
  352. lsda.action_table = data + callsite_size;
  353. // Call site table is immediately after the header
  354. lsda.call_site_table = static_cast<dw_eh_ptr_t>(data);
  355. return lsda;
  356. }
  357. /**
  358. * Structure representing an action to be performed while unwinding. This
  359. * contains the address that should be unwound to and the action record that
  360. * provoked this action.
  361. */
  362. struct dwarf_eh_action
  363. {
  364. /**
  365. * The address that this action directs should be the new program counter
  366. * value after unwinding.
  367. */
  368. dw_eh_ptr_t landing_pad;
  369. /// The address of the action record.
  370. dw_eh_ptr_t action_record;
  371. };
  372. /**
  373. * Look up the landing pad that corresponds to the current invoke.
  374. * Returns true if record exists. The context is provided by the generic
  375. * unwind library and the lsda should be the result of a call to parse_lsda().
  376. *
  377. * The action record is returned via the result parameter.
  378. */
  379. static bool dwarf_eh_find_callsite(struct _Unwind_Context *context,
  380. struct dwarf_eh_lsda *lsda,
  381. struct dwarf_eh_action *result)
  382. {
  383. result->action_record = 0;
  384. result->landing_pad = 0;
  385. // The current instruction pointer offset within the region
  386. uint64_t ip = _Unwind_GetIP(context) - _Unwind_GetRegionStart(context);
  387. unsigned char *callsite_table = static_cast<unsigned char*>(lsda->call_site_table);
  388. while (callsite_table <= lsda->action_table)
  389. {
  390. // Once again, the layout deviates from the spec.
  391. uint64_t call_site_start, call_site_size, landing_pad, action;
  392. call_site_start = read_value(lsda->callsite_encoding, &callsite_table);
  393. call_site_size = read_value(lsda->callsite_encoding, &callsite_table);
  394. // Call site entries are sorted, so if we find a call site that's after
  395. // the current instruction pointer then there is no action associated
  396. // with this call and we should unwind straight through this frame
  397. // without doing anything.
  398. if (call_site_start > ip) { break; }
  399. // Read the address of the landing pad and the action from the call
  400. // site table.
  401. landing_pad = read_value(lsda->callsite_encoding, &callsite_table);
  402. action = read_uleb128(&callsite_table);
  403. // We should not include the call_site_start (beginning of the region)
  404. // address in the ip range. For each call site:
  405. //
  406. // address1: call proc
  407. // address2: next instruction
  408. //
  409. // The call stack contains address2 and not address1, address1 can be
  410. // at the end of another EH region.
  411. if (call_site_start < ip && ip <= call_site_start + call_site_size)
  412. {
  413. if (action)
  414. {
  415. // Action records are 1-biased so both no-record and zeroth
  416. // record can be stored.
  417. result->action_record = lsda->action_table + action - 1;
  418. }
  419. // No landing pad means keep unwinding.
  420. if (landing_pad)
  421. {
  422. // Landing pad is the offset from the value in the header
  423. result->landing_pad = lsda->landing_pads + landing_pad;
  424. }
  425. return true;
  426. }
  427. }
  428. return false;
  429. }
  430. /// Defines an exception class from 8 bytes (endian independent)
  431. #define EXCEPTION_CLASS(a,b,c,d,e,f,g,h) \
  432. ((static_cast<uint64_t>(a) << 56) +\
  433. (static_cast<uint64_t>(b) << 48) +\
  434. (static_cast<uint64_t>(c) << 40) +\
  435. (static_cast<uint64_t>(d) << 32) +\
  436. (static_cast<uint64_t>(e) << 24) +\
  437. (static_cast<uint64_t>(f) << 16) +\
  438. (static_cast<uint64_t>(g) << 8) +\
  439. (static_cast<uint64_t>(h)))
  440. #define GENERIC_EXCEPTION_CLASS(e,f,g,h) \
  441. (static_cast<uint32_t>(e) << 24) +\
  442. (static_cast<uint32_t>(f) << 16) +\
  443. (static_cast<uint32_t>(g) << 8) +\
  444. (static_cast<uint32_t>(h))