dis.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805
  1. """Disassembler of Python byte code into mnemonics."""
  2. import sys
  3. import types
  4. import collections
  5. import io
  6. from opcode import *
  7. from opcode import (
  8. __all__ as _opcodes_all,
  9. _cache_format,
  10. _inline_cache_entries,
  11. _nb_ops,
  12. _intrinsic_1_descs,
  13. _intrinsic_2_descs,
  14. _specializations,
  15. _specialized_instructions,
  16. )
  17. __all__ = ["code_info", "dis", "disassemble", "distb", "disco",
  18. "findlinestarts", "findlabels", "show_code",
  19. "get_instructions", "Instruction", "Bytecode"] + _opcodes_all
  20. del _opcodes_all
  21. _have_code = (types.MethodType, types.FunctionType, types.CodeType,
  22. classmethod, staticmethod, type)
  23. FORMAT_VALUE = opmap['FORMAT_VALUE']
  24. FORMAT_VALUE_CONVERTERS = (
  25. (None, ''),
  26. (str, 'str'),
  27. (repr, 'repr'),
  28. (ascii, 'ascii'),
  29. )
  30. MAKE_FUNCTION = opmap['MAKE_FUNCTION']
  31. MAKE_FUNCTION_FLAGS = ('defaults', 'kwdefaults', 'annotations', 'closure')
  32. LOAD_CONST = opmap['LOAD_CONST']
  33. RETURN_CONST = opmap['RETURN_CONST']
  34. LOAD_GLOBAL = opmap['LOAD_GLOBAL']
  35. BINARY_OP = opmap['BINARY_OP']
  36. JUMP_BACKWARD = opmap['JUMP_BACKWARD']
  37. FOR_ITER = opmap['FOR_ITER']
  38. SEND = opmap['SEND']
  39. LOAD_ATTR = opmap['LOAD_ATTR']
  40. LOAD_SUPER_ATTR = opmap['LOAD_SUPER_ATTR']
  41. CALL_INTRINSIC_1 = opmap['CALL_INTRINSIC_1']
  42. CALL_INTRINSIC_2 = opmap['CALL_INTRINSIC_2']
  43. CACHE = opmap["CACHE"]
  44. _all_opname = list(opname)
  45. _all_opmap = dict(opmap)
  46. _empty_slot = [slot for slot, name in enumerate(_all_opname) if name.startswith("<")]
  47. for spec_op, specialized in zip(_empty_slot, _specialized_instructions):
  48. # fill opname and opmap
  49. _all_opname[spec_op] = specialized
  50. _all_opmap[specialized] = spec_op
  51. deoptmap = {
  52. specialized: base for base, family in _specializations.items() for specialized in family
  53. }
  54. def _try_compile(source, name):
  55. """Attempts to compile the given source, first as an expression and
  56. then as a statement if the first approach fails.
  57. Utility function to accept strings in functions that otherwise
  58. expect code objects
  59. """
  60. try:
  61. return compile(source, name, 'eval')
  62. except SyntaxError:
  63. pass
  64. return compile(source, name, 'exec')
  65. def dis(x=None, *, file=None, depth=None, show_caches=False, adaptive=False):
  66. """Disassemble classes, methods, functions, and other compiled objects.
  67. With no argument, disassemble the last traceback.
  68. Compiled objects currently include generator objects, async generator
  69. objects, and coroutine objects, all of which store their code object
  70. in a special attribute.
  71. """
  72. if x is None:
  73. distb(file=file, show_caches=show_caches, adaptive=adaptive)
  74. return
  75. # Extract functions from methods.
  76. if hasattr(x, '__func__'):
  77. x = x.__func__
  78. # Extract compiled code objects from...
  79. if hasattr(x, '__code__'): # ...a function, or
  80. x = x.__code__
  81. elif hasattr(x, 'gi_code'): #...a generator object, or
  82. x = x.gi_code
  83. elif hasattr(x, 'ag_code'): #...an asynchronous generator object, or
  84. x = x.ag_code
  85. elif hasattr(x, 'cr_code'): #...a coroutine.
  86. x = x.cr_code
  87. # Perform the disassembly.
  88. if hasattr(x, '__dict__'): # Class or module
  89. items = sorted(x.__dict__.items())
  90. for name, x1 in items:
  91. if isinstance(x1, _have_code):
  92. print("Disassembly of %s:" % name, file=file)
  93. try:
  94. dis(x1, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive)
  95. except TypeError as msg:
  96. print("Sorry:", msg, file=file)
  97. print(file=file)
  98. elif hasattr(x, 'co_code'): # Code object
  99. _disassemble_recursive(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive)
  100. elif isinstance(x, (bytes, bytearray)): # Raw bytecode
  101. _disassemble_bytes(x, file=file, show_caches=show_caches)
  102. elif isinstance(x, str): # Source code
  103. _disassemble_str(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive)
  104. else:
  105. raise TypeError("don't know how to disassemble %s objects" %
  106. type(x).__name__)
  107. def distb(tb=None, *, file=None, show_caches=False, adaptive=False):
  108. """Disassemble a traceback (default: last traceback)."""
  109. if tb is None:
  110. try:
  111. if hasattr(sys, 'last_exc'):
  112. tb = sys.last_exc.__traceback__
  113. else:
  114. tb = sys.last_traceback
  115. except AttributeError:
  116. raise RuntimeError("no last traceback to disassemble") from None
  117. while tb.tb_next: tb = tb.tb_next
  118. disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file, show_caches=show_caches, adaptive=adaptive)
  119. # The inspect module interrogates this dictionary to build its
  120. # list of CO_* constants. It is also used by pretty_flags to
  121. # turn the co_flags field into a human readable list.
  122. COMPILER_FLAG_NAMES = {
  123. 1: "OPTIMIZED",
  124. 2: "NEWLOCALS",
  125. 4: "VARARGS",
  126. 8: "VARKEYWORDS",
  127. 16: "NESTED",
  128. 32: "GENERATOR",
  129. 64: "NOFREE",
  130. 128: "COROUTINE",
  131. 256: "ITERABLE_COROUTINE",
  132. 512: "ASYNC_GENERATOR",
  133. }
  134. def pretty_flags(flags):
  135. """Return pretty representation of code flags."""
  136. names = []
  137. for i in range(32):
  138. flag = 1<<i
  139. if flags & flag:
  140. names.append(COMPILER_FLAG_NAMES.get(flag, hex(flag)))
  141. flags ^= flag
  142. if not flags:
  143. break
  144. else:
  145. names.append(hex(flags))
  146. return ", ".join(names)
  147. class _Unknown:
  148. def __repr__(self):
  149. return "<unknown>"
  150. # Sentinel to represent values that cannot be calculated
  151. UNKNOWN = _Unknown()
  152. def _get_code_object(x):
  153. """Helper to handle methods, compiled or raw code objects, and strings."""
  154. # Extract functions from methods.
  155. if hasattr(x, '__func__'):
  156. x = x.__func__
  157. # Extract compiled code objects from...
  158. if hasattr(x, '__code__'): # ...a function, or
  159. x = x.__code__
  160. elif hasattr(x, 'gi_code'): #...a generator object, or
  161. x = x.gi_code
  162. elif hasattr(x, 'ag_code'): #...an asynchronous generator object, or
  163. x = x.ag_code
  164. elif hasattr(x, 'cr_code'): #...a coroutine.
  165. x = x.cr_code
  166. # Handle source code.
  167. if isinstance(x, str):
  168. x = _try_compile(x, "<disassembly>")
  169. # By now, if we don't have a code object, we can't disassemble x.
  170. if hasattr(x, 'co_code'):
  171. return x
  172. raise TypeError("don't know how to disassemble %s objects" %
  173. type(x).__name__)
  174. def _deoptop(op):
  175. name = _all_opname[op]
  176. return _all_opmap[deoptmap[name]] if name in deoptmap else op
  177. def _get_code_array(co, adaptive):
  178. return co._co_code_adaptive if adaptive else co.co_code
  179. def code_info(x):
  180. """Formatted details of methods, functions, or code."""
  181. return _format_code_info(_get_code_object(x))
  182. def _format_code_info(co):
  183. lines = []
  184. lines.append("Name: %s" % co.co_name)
  185. lines.append("Filename: %s" % co.co_filename)
  186. lines.append("Argument count: %s" % co.co_argcount)
  187. lines.append("Positional-only arguments: %s" % co.co_posonlyargcount)
  188. lines.append("Kw-only arguments: %s" % co.co_kwonlyargcount)
  189. lines.append("Number of locals: %s" % co.co_nlocals)
  190. lines.append("Stack size: %s" % co.co_stacksize)
  191. lines.append("Flags: %s" % pretty_flags(co.co_flags))
  192. if co.co_consts:
  193. lines.append("Constants:")
  194. for i_c in enumerate(co.co_consts):
  195. lines.append("%4d: %r" % i_c)
  196. if co.co_names:
  197. lines.append("Names:")
  198. for i_n in enumerate(co.co_names):
  199. lines.append("%4d: %s" % i_n)
  200. if co.co_varnames:
  201. lines.append("Variable names:")
  202. for i_n in enumerate(co.co_varnames):
  203. lines.append("%4d: %s" % i_n)
  204. if co.co_freevars:
  205. lines.append("Free variables:")
  206. for i_n in enumerate(co.co_freevars):
  207. lines.append("%4d: %s" % i_n)
  208. if co.co_cellvars:
  209. lines.append("Cell variables:")
  210. for i_n in enumerate(co.co_cellvars):
  211. lines.append("%4d: %s" % i_n)
  212. return "\n".join(lines)
  213. def show_code(co, *, file=None):
  214. """Print details of methods, functions, or code to *file*.
  215. If *file* is not provided, the output is printed on stdout.
  216. """
  217. print(code_info(co), file=file)
  218. Positions = collections.namedtuple(
  219. 'Positions',
  220. [
  221. 'lineno',
  222. 'end_lineno',
  223. 'col_offset',
  224. 'end_col_offset',
  225. ],
  226. defaults=[None] * 4
  227. )
  228. _Instruction = collections.namedtuple(
  229. "_Instruction",
  230. [
  231. 'opname',
  232. 'opcode',
  233. 'arg',
  234. 'argval',
  235. 'argrepr',
  236. 'offset',
  237. 'starts_line',
  238. 'is_jump_target',
  239. 'positions'
  240. ],
  241. defaults=[None]
  242. )
  243. _Instruction.opname.__doc__ = "Human readable name for operation"
  244. _Instruction.opcode.__doc__ = "Numeric code for operation"
  245. _Instruction.arg.__doc__ = "Numeric argument to operation (if any), otherwise None"
  246. _Instruction.argval.__doc__ = "Resolved arg value (if known), otherwise same as arg"
  247. _Instruction.argrepr.__doc__ = "Human readable description of operation argument"
  248. _Instruction.offset.__doc__ = "Start index of operation within bytecode sequence"
  249. _Instruction.starts_line.__doc__ = "Line started by this opcode (if any), otherwise None"
  250. _Instruction.is_jump_target.__doc__ = "True if other code jumps to here, otherwise False"
  251. _Instruction.positions.__doc__ = "dis.Positions object holding the span of source code covered by this instruction"
  252. _ExceptionTableEntry = collections.namedtuple("_ExceptionTableEntry",
  253. "start end target depth lasti")
  254. _OPNAME_WIDTH = 20
  255. _OPARG_WIDTH = 5
  256. class Instruction(_Instruction):
  257. """Details for a bytecode operation
  258. Defined fields:
  259. opname - human readable name for operation
  260. opcode - numeric code for operation
  261. arg - numeric argument to operation (if any), otherwise None
  262. argval - resolved arg value (if known), otherwise same as arg
  263. argrepr - human readable description of operation argument
  264. offset - start index of operation within bytecode sequence
  265. starts_line - line started by this opcode (if any), otherwise None
  266. is_jump_target - True if other code jumps to here, otherwise False
  267. positions - Optional dis.Positions object holding the span of source code
  268. covered by this instruction
  269. """
  270. def _disassemble(self, lineno_width=3, mark_as_current=False, offset_width=4):
  271. """Format instruction details for inclusion in disassembly output
  272. *lineno_width* sets the width of the line number field (0 omits it)
  273. *mark_as_current* inserts a '-->' marker arrow as part of the line
  274. *offset_width* sets the width of the instruction offset field
  275. """
  276. fields = []
  277. # Column: Source code line number
  278. if lineno_width:
  279. if self.starts_line is not None:
  280. lineno_fmt = "%%%dd" % lineno_width
  281. fields.append(lineno_fmt % self.starts_line)
  282. else:
  283. fields.append(' ' * lineno_width)
  284. # Column: Current instruction indicator
  285. if mark_as_current:
  286. fields.append('-->')
  287. else:
  288. fields.append(' ')
  289. # Column: Jump target marker
  290. if self.is_jump_target:
  291. fields.append('>>')
  292. else:
  293. fields.append(' ')
  294. # Column: Instruction offset from start of code sequence
  295. fields.append(repr(self.offset).rjust(offset_width))
  296. # Column: Opcode name
  297. fields.append(self.opname.ljust(_OPNAME_WIDTH))
  298. # Column: Opcode argument
  299. if self.arg is not None:
  300. fields.append(repr(self.arg).rjust(_OPARG_WIDTH))
  301. # Column: Opcode argument details
  302. if self.argrepr:
  303. fields.append('(' + self.argrepr + ')')
  304. return ' '.join(fields).rstrip()
  305. def get_instructions(x, *, first_line=None, show_caches=False, adaptive=False):
  306. """Iterator for the opcodes in methods, functions or code
  307. Generates a series of Instruction named tuples giving the details of
  308. each operations in the supplied code.
  309. If *first_line* is not None, it indicates the line number that should
  310. be reported for the first source line in the disassembled code.
  311. Otherwise, the source line information (if any) is taken directly from
  312. the disassembled code object.
  313. """
  314. co = _get_code_object(x)
  315. linestarts = dict(findlinestarts(co))
  316. if first_line is not None:
  317. line_offset = first_line - co.co_firstlineno
  318. else:
  319. line_offset = 0
  320. return _get_instructions_bytes(_get_code_array(co, adaptive),
  321. co._varname_from_oparg,
  322. co.co_names, co.co_consts,
  323. linestarts, line_offset,
  324. co_positions=co.co_positions(),
  325. show_caches=show_caches)
  326. def _get_const_value(op, arg, co_consts):
  327. """Helper to get the value of the const in a hasconst op.
  328. Returns the dereferenced constant if this is possible.
  329. Otherwise (if it is a LOAD_CONST and co_consts is not
  330. provided) returns the dis.UNKNOWN sentinel.
  331. """
  332. assert op in hasconst
  333. argval = UNKNOWN
  334. if co_consts is not None:
  335. argval = co_consts[arg]
  336. return argval
  337. def _get_const_info(op, arg, co_consts):
  338. """Helper to get optional details about const references
  339. Returns the dereferenced constant and its repr if the value
  340. can be calculated.
  341. Otherwise returns the sentinel value dis.UNKNOWN for the value
  342. and an empty string for its repr.
  343. """
  344. argval = _get_const_value(op, arg, co_consts)
  345. argrepr = repr(argval) if argval is not UNKNOWN else ''
  346. return argval, argrepr
  347. def _get_name_info(name_index, get_name, **extrainfo):
  348. """Helper to get optional details about named references
  349. Returns the dereferenced name as both value and repr if the name
  350. list is defined.
  351. Otherwise returns the sentinel value dis.UNKNOWN for the value
  352. and an empty string for its repr.
  353. """
  354. if get_name is not None:
  355. argval = get_name(name_index, **extrainfo)
  356. return argval, argval
  357. else:
  358. return UNKNOWN, ''
  359. def _parse_varint(iterator):
  360. b = next(iterator)
  361. val = b & 63
  362. while b&64:
  363. val <<= 6
  364. b = next(iterator)
  365. val |= b&63
  366. return val
  367. def _parse_exception_table(code):
  368. iterator = iter(code.co_exceptiontable)
  369. entries = []
  370. try:
  371. while True:
  372. start = _parse_varint(iterator)*2
  373. length = _parse_varint(iterator)*2
  374. end = start + length
  375. target = _parse_varint(iterator)*2
  376. dl = _parse_varint(iterator)
  377. depth = dl >> 1
  378. lasti = bool(dl&1)
  379. entries.append(_ExceptionTableEntry(start, end, target, depth, lasti))
  380. except StopIteration:
  381. return entries
  382. def _is_backward_jump(op):
  383. return 'JUMP_BACKWARD' in opname[op]
  384. def _get_instructions_bytes(code, varname_from_oparg=None,
  385. names=None, co_consts=None,
  386. linestarts=None, line_offset=0,
  387. exception_entries=(), co_positions=None,
  388. show_caches=False):
  389. """Iterate over the instructions in a bytecode string.
  390. Generates a sequence of Instruction namedtuples giving the details of each
  391. opcode. Additional information about the code's runtime environment
  392. (e.g. variable names, co_consts) can be specified using optional
  393. arguments.
  394. """
  395. co_positions = co_positions or iter(())
  396. get_name = None if names is None else names.__getitem__
  397. labels = set(findlabels(code))
  398. for start, end, target, _, _ in exception_entries:
  399. for i in range(start, end):
  400. labels.add(target)
  401. starts_line = None
  402. for offset, op, arg in _unpack_opargs(code):
  403. if linestarts is not None:
  404. starts_line = linestarts.get(offset, None)
  405. if starts_line is not None:
  406. starts_line += line_offset
  407. is_jump_target = offset in labels
  408. argval = None
  409. argrepr = ''
  410. positions = Positions(*next(co_positions, ()))
  411. deop = _deoptop(op)
  412. caches = _inline_cache_entries[deop]
  413. if arg is not None:
  414. # Set argval to the dereferenced value of the argument when
  415. # available, and argrepr to the string representation of argval.
  416. # _disassemble_bytes needs the string repr of the
  417. # raw name index for LOAD_GLOBAL, LOAD_CONST, etc.
  418. argval = arg
  419. if deop in hasconst:
  420. argval, argrepr = _get_const_info(deop, arg, co_consts)
  421. elif deop in hasname:
  422. if deop == LOAD_GLOBAL:
  423. argval, argrepr = _get_name_info(arg//2, get_name)
  424. if (arg & 1) and argrepr:
  425. argrepr = "NULL + " + argrepr
  426. elif deop == LOAD_ATTR:
  427. argval, argrepr = _get_name_info(arg//2, get_name)
  428. if (arg & 1) and argrepr:
  429. argrepr = "NULL|self + " + argrepr
  430. elif deop == LOAD_SUPER_ATTR:
  431. argval, argrepr = _get_name_info(arg//4, get_name)
  432. if (arg & 1) and argrepr:
  433. argrepr = "NULL|self + " + argrepr
  434. else:
  435. argval, argrepr = _get_name_info(arg, get_name)
  436. elif deop in hasjabs:
  437. argval = arg*2
  438. argrepr = "to " + repr(argval)
  439. elif deop in hasjrel:
  440. signed_arg = -arg if _is_backward_jump(deop) else arg
  441. argval = offset + 2 + signed_arg*2
  442. argval += 2 * caches
  443. argrepr = "to " + repr(argval)
  444. elif deop in haslocal or deop in hasfree:
  445. argval, argrepr = _get_name_info(arg, varname_from_oparg)
  446. elif deop in hascompare:
  447. argval = cmp_op[arg>>4]
  448. argrepr = argval
  449. elif deop == FORMAT_VALUE:
  450. argval, argrepr = FORMAT_VALUE_CONVERTERS[arg & 0x3]
  451. argval = (argval, bool(arg & 0x4))
  452. if argval[1]:
  453. if argrepr:
  454. argrepr += ', '
  455. argrepr += 'with format'
  456. elif deop == MAKE_FUNCTION:
  457. argrepr = ', '.join(s for i, s in enumerate(MAKE_FUNCTION_FLAGS)
  458. if arg & (1<<i))
  459. elif deop == BINARY_OP:
  460. _, argrepr = _nb_ops[arg]
  461. elif deop == CALL_INTRINSIC_1:
  462. argrepr = _intrinsic_1_descs[arg]
  463. elif deop == CALL_INTRINSIC_2:
  464. argrepr = _intrinsic_2_descs[arg]
  465. yield Instruction(_all_opname[op], op,
  466. arg, argval, argrepr,
  467. offset, starts_line, is_jump_target, positions)
  468. caches = _inline_cache_entries[deop]
  469. if not caches:
  470. continue
  471. if not show_caches:
  472. # We still need to advance the co_positions iterator:
  473. for _ in range(caches):
  474. next(co_positions, ())
  475. continue
  476. for name, size in _cache_format[opname[deop]].items():
  477. for i in range(size):
  478. offset += 2
  479. # Only show the fancy argrepr for a CACHE instruction when it's
  480. # the first entry for a particular cache value:
  481. if i == 0:
  482. data = code[offset: offset + 2 * size]
  483. argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}"
  484. else:
  485. argrepr = ""
  486. yield Instruction(
  487. "CACHE", CACHE, 0, None, argrepr, offset, None, False,
  488. Positions(*next(co_positions, ()))
  489. )
  490. def disassemble(co, lasti=-1, *, file=None, show_caches=False, adaptive=False):
  491. """Disassemble a code object."""
  492. linestarts = dict(findlinestarts(co))
  493. exception_entries = _parse_exception_table(co)
  494. _disassemble_bytes(_get_code_array(co, adaptive),
  495. lasti, co._varname_from_oparg,
  496. co.co_names, co.co_consts, linestarts, file=file,
  497. exception_entries=exception_entries,
  498. co_positions=co.co_positions(), show_caches=show_caches)
  499. def _disassemble_recursive(co, *, file=None, depth=None, show_caches=False, adaptive=False):
  500. disassemble(co, file=file, show_caches=show_caches, adaptive=adaptive)
  501. if depth is None or depth > 0:
  502. if depth is not None:
  503. depth = depth - 1
  504. for x in co.co_consts:
  505. if hasattr(x, 'co_code'):
  506. print(file=file)
  507. print("Disassembly of %r:" % (x,), file=file)
  508. _disassemble_recursive(
  509. x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive
  510. )
  511. def _disassemble_bytes(code, lasti=-1, varname_from_oparg=None,
  512. names=None, co_consts=None, linestarts=None,
  513. *, file=None, line_offset=0, exception_entries=(),
  514. co_positions=None, show_caches=False):
  515. # Omit the line number column entirely if we have no line number info
  516. show_lineno = bool(linestarts)
  517. if show_lineno:
  518. maxlineno = max(linestarts.values()) + line_offset
  519. if maxlineno >= 1000:
  520. lineno_width = len(str(maxlineno))
  521. else:
  522. lineno_width = 3
  523. else:
  524. lineno_width = 0
  525. maxoffset = len(code) - 2
  526. if maxoffset >= 10000:
  527. offset_width = len(str(maxoffset))
  528. else:
  529. offset_width = 4
  530. for instr in _get_instructions_bytes(code, varname_from_oparg, names,
  531. co_consts, linestarts,
  532. line_offset=line_offset,
  533. exception_entries=exception_entries,
  534. co_positions=co_positions,
  535. show_caches=show_caches):
  536. new_source_line = (show_lineno and
  537. instr.starts_line is not None and
  538. instr.offset > 0)
  539. if new_source_line:
  540. print(file=file)
  541. if show_caches:
  542. is_current_instr = instr.offset == lasti
  543. else:
  544. # Each CACHE takes 2 bytes
  545. is_current_instr = instr.offset <= lasti \
  546. <= instr.offset + 2 * _inline_cache_entries[_deoptop(instr.opcode)]
  547. print(instr._disassemble(lineno_width, is_current_instr, offset_width),
  548. file=file)
  549. if exception_entries:
  550. print("ExceptionTable:", file=file)
  551. for entry in exception_entries:
  552. lasti = " lasti" if entry.lasti else ""
  553. end = entry.end-2
  554. print(f" {entry.start} to {end} -> {entry.target} [{entry.depth}]{lasti}", file=file)
  555. def _disassemble_str(source, **kwargs):
  556. """Compile the source string, then disassemble the code object."""
  557. _disassemble_recursive(_try_compile(source, '<dis>'), **kwargs)
  558. disco = disassemble # XXX For backwards compatibility
  559. # Rely on C `int` being 32 bits for oparg
  560. _INT_BITS = 32
  561. # Value for c int when it overflows
  562. _INT_OVERFLOW = 2 ** (_INT_BITS - 1)
  563. def _unpack_opargs(code):
  564. extended_arg = 0
  565. caches = 0
  566. for i in range(0, len(code), 2):
  567. # Skip inline CACHE entries:
  568. if caches:
  569. caches -= 1
  570. continue
  571. op = code[i]
  572. deop = _deoptop(op)
  573. caches = _inline_cache_entries[deop]
  574. if deop in hasarg:
  575. arg = code[i+1] | extended_arg
  576. extended_arg = (arg << 8) if deop == EXTENDED_ARG else 0
  577. # The oparg is stored as a signed integer
  578. # If the value exceeds its upper limit, it will overflow and wrap
  579. # to a negative integer
  580. if extended_arg >= _INT_OVERFLOW:
  581. extended_arg -= 2 * _INT_OVERFLOW
  582. else:
  583. arg = None
  584. extended_arg = 0
  585. yield (i, op, arg)
  586. def findlabels(code):
  587. """Detect all offsets in a byte code which are jump targets.
  588. Return the list of offsets.
  589. """
  590. labels = []
  591. for offset, op, arg in _unpack_opargs(code):
  592. if arg is not None:
  593. deop = _deoptop(op)
  594. caches = _inline_cache_entries[deop]
  595. if deop in hasjrel:
  596. if _is_backward_jump(deop):
  597. arg = -arg
  598. label = offset + 2 + arg*2
  599. label += 2 * caches
  600. elif deop in hasjabs:
  601. label = arg*2
  602. else:
  603. continue
  604. if label not in labels:
  605. labels.append(label)
  606. return labels
  607. def findlinestarts(code):
  608. """Find the offsets in a byte code which are start of lines in the source.
  609. Generate pairs (offset, lineno)
  610. """
  611. lastline = None
  612. for start, end, line in code.co_lines():
  613. if line is not None and line != lastline:
  614. lastline = line
  615. yield start, line
  616. return
  617. def _find_imports(co):
  618. """Find import statements in the code
  619. Generate triplets (name, level, fromlist) where
  620. name is the imported module and level, fromlist are
  621. the corresponding args to __import__.
  622. """
  623. IMPORT_NAME = opmap['IMPORT_NAME']
  624. consts = co.co_consts
  625. names = co.co_names
  626. opargs = [(op, arg) for _, op, arg in _unpack_opargs(co.co_code)
  627. if op != EXTENDED_ARG]
  628. for i, (op, oparg) in enumerate(opargs):
  629. if op == IMPORT_NAME and i >= 2:
  630. from_op = opargs[i-1]
  631. level_op = opargs[i-2]
  632. if (from_op[0] in hasconst and level_op[0] in hasconst):
  633. level = _get_const_value(level_op[0], level_op[1], consts)
  634. fromlist = _get_const_value(from_op[0], from_op[1], consts)
  635. yield (names[oparg], level, fromlist)
  636. def _find_store_names(co):
  637. """Find names of variables which are written in the code
  638. Generate sequence of strings
  639. """
  640. STORE_OPS = {
  641. opmap['STORE_NAME'],
  642. opmap['STORE_GLOBAL']
  643. }
  644. names = co.co_names
  645. for _, op, arg in _unpack_opargs(co.co_code):
  646. if op in STORE_OPS:
  647. yield names[arg]
  648. class Bytecode:
  649. """The bytecode operations of a piece of code
  650. Instantiate this with a function, method, other compiled object, string of
  651. code, or a code object (as returned by compile()).
  652. Iterating over this yields the bytecode operations as Instruction instances.
  653. """
  654. def __init__(self, x, *, first_line=None, current_offset=None, show_caches=False, adaptive=False):
  655. self.codeobj = co = _get_code_object(x)
  656. if first_line is None:
  657. self.first_line = co.co_firstlineno
  658. self._line_offset = 0
  659. else:
  660. self.first_line = first_line
  661. self._line_offset = first_line - co.co_firstlineno
  662. self._linestarts = dict(findlinestarts(co))
  663. self._original_object = x
  664. self.current_offset = current_offset
  665. self.exception_entries = _parse_exception_table(co)
  666. self.show_caches = show_caches
  667. self.adaptive = adaptive
  668. def __iter__(self):
  669. co = self.codeobj
  670. return _get_instructions_bytes(_get_code_array(co, self.adaptive),
  671. co._varname_from_oparg,
  672. co.co_names, co.co_consts,
  673. self._linestarts,
  674. line_offset=self._line_offset,
  675. exception_entries=self.exception_entries,
  676. co_positions=co.co_positions(),
  677. show_caches=self.show_caches)
  678. def __repr__(self):
  679. return "{}({!r})".format(self.__class__.__name__,
  680. self._original_object)
  681. @classmethod
  682. def from_traceback(cls, tb, *, show_caches=False, adaptive=False):
  683. """ Construct a Bytecode from the given traceback """
  684. while tb.tb_next:
  685. tb = tb.tb_next
  686. return cls(
  687. tb.tb_frame.f_code, current_offset=tb.tb_lasti, show_caches=show_caches, adaptive=adaptive
  688. )
  689. def info(self):
  690. """Return formatted information about the code object."""
  691. return _format_code_info(self.codeobj)
  692. def dis(self):
  693. """Return a formatted view of the bytecode operations."""
  694. co = self.codeobj
  695. if self.current_offset is not None:
  696. offset = self.current_offset
  697. else:
  698. offset = -1
  699. with io.StringIO() as output:
  700. _disassemble_bytes(_get_code_array(co, self.adaptive),
  701. varname_from_oparg=co._varname_from_oparg,
  702. names=co.co_names, co_consts=co.co_consts,
  703. linestarts=self._linestarts,
  704. line_offset=self._line_offset,
  705. file=output,
  706. lasti=offset,
  707. exception_entries=self.exception_entries,
  708. co_positions=co.co_positions(),
  709. show_caches=self.show_caches)
  710. return output.getvalue()
  711. def main():
  712. import argparse
  713. parser = argparse.ArgumentParser()
  714. parser.add_argument('infile', type=argparse.FileType('rb'), nargs='?', default='-')
  715. args = parser.parse_args()
  716. with args.infile as infile:
  717. source = infile.read()
  718. code = compile(source, args.infile.name, "exec")
  719. dis(code)
  720. if __name__ == "__main__":
  721. main()