pycore_code.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496
  1. #ifndef Py_INTERNAL_CODE_H
  2. #define Py_INTERNAL_CODE_H
  3. #ifdef __cplusplus
  4. extern "C" {
  5. #endif
  6. #define CODE_MAX_WATCHERS 8
  7. /* PEP 659
  8. * Specialization and quickening structs and helper functions
  9. */
  10. // Inline caches. If you change the number of cache entries for an instruction,
  11. // you must *also* update the number of cache entries in Lib/opcode.py and bump
  12. // the magic number in Lib/importlib/_bootstrap_external.py!
  13. #define CACHE_ENTRIES(cache) (sizeof(cache)/sizeof(_Py_CODEUNIT))
  14. typedef struct {
  15. uint16_t counter;
  16. uint16_t index;
  17. uint16_t module_keys_version;
  18. uint16_t builtin_keys_version;
  19. } _PyLoadGlobalCache;
  20. #define INLINE_CACHE_ENTRIES_LOAD_GLOBAL CACHE_ENTRIES(_PyLoadGlobalCache)
  21. typedef struct {
  22. uint16_t counter;
  23. } _PyBinaryOpCache;
  24. #define INLINE_CACHE_ENTRIES_BINARY_OP CACHE_ENTRIES(_PyBinaryOpCache)
  25. typedef struct {
  26. uint16_t counter;
  27. } _PyUnpackSequenceCache;
  28. #define INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE \
  29. CACHE_ENTRIES(_PyUnpackSequenceCache)
  30. typedef struct {
  31. uint16_t counter;
  32. } _PyCompareOpCache;
  33. #define INLINE_CACHE_ENTRIES_COMPARE_OP CACHE_ENTRIES(_PyCompareOpCache)
  34. typedef struct {
  35. uint16_t counter;
  36. } _PyBinarySubscrCache;
  37. #define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache)
  38. typedef struct {
  39. uint16_t counter;
  40. } _PySuperAttrCache;
  41. #define INLINE_CACHE_ENTRIES_LOAD_SUPER_ATTR CACHE_ENTRIES(_PySuperAttrCache)
  42. typedef struct {
  43. uint16_t counter;
  44. uint16_t version[2];
  45. uint16_t index;
  46. } _PyAttrCache;
  47. typedef struct {
  48. uint16_t counter;
  49. uint16_t type_version[2];
  50. uint16_t keys_version[2];
  51. uint16_t descr[4];
  52. } _PyLoadMethodCache;
  53. // MUST be the max(_PyAttrCache, _PyLoadMethodCache)
  54. #define INLINE_CACHE_ENTRIES_LOAD_ATTR CACHE_ENTRIES(_PyLoadMethodCache)
  55. #define INLINE_CACHE_ENTRIES_STORE_ATTR CACHE_ENTRIES(_PyAttrCache)
  56. typedef struct {
  57. uint16_t counter;
  58. uint16_t func_version[2];
  59. } _PyCallCache;
  60. #define INLINE_CACHE_ENTRIES_CALL CACHE_ENTRIES(_PyCallCache)
  61. typedef struct {
  62. uint16_t counter;
  63. } _PyStoreSubscrCache;
  64. #define INLINE_CACHE_ENTRIES_STORE_SUBSCR CACHE_ENTRIES(_PyStoreSubscrCache)
  65. typedef struct {
  66. uint16_t counter;
  67. } _PyForIterCache;
  68. #define INLINE_CACHE_ENTRIES_FOR_ITER CACHE_ENTRIES(_PyForIterCache)
  69. typedef struct {
  70. uint16_t counter;
  71. } _PySendCache;
  72. #define INLINE_CACHE_ENTRIES_SEND CACHE_ENTRIES(_PySendCache)
  73. // Borrowed references to common callables:
  74. struct callable_cache {
  75. PyObject *isinstance;
  76. PyObject *len;
  77. PyObject *list_append;
  78. PyObject *object__getattribute__;
  79. };
  80. /* "Locals plus" for a code object is the set of locals + cell vars +
  81. * free vars. This relates to variable names as well as offsets into
  82. * the "fast locals" storage array of execution frames. The compiler
  83. * builds the list of names, their offsets, and the corresponding
  84. * kind of local.
  85. *
  86. * Those kinds represent the source of the initial value and the
  87. * variable's scope (as related to closures). A "local" is an
  88. * argument or other variable defined in the current scope. A "free"
  89. * variable is one that is defined in an outer scope and comes from
  90. * the function's closure. A "cell" variable is a local that escapes
  91. * into an inner function as part of a closure, and thus must be
  92. * wrapped in a cell. Any "local" can also be a "cell", but the
  93. * "free" kind is mutually exclusive with both.
  94. */
  95. // Note that these all fit within a byte, as do combinations.
  96. // Later, we will use the smaller numbers to differentiate the different
  97. // kinds of locals (e.g. pos-only arg, varkwargs, local-only).
  98. #define CO_FAST_HIDDEN 0x10
  99. #define CO_FAST_LOCAL 0x20
  100. #define CO_FAST_CELL 0x40
  101. #define CO_FAST_FREE 0x80
  102. typedef unsigned char _PyLocals_Kind;
  103. static inline _PyLocals_Kind
  104. _PyLocals_GetKind(PyObject *kinds, int i)
  105. {
  106. assert(PyBytes_Check(kinds));
  107. assert(0 <= i && i < PyBytes_GET_SIZE(kinds));
  108. char *ptr = PyBytes_AS_STRING(kinds);
  109. return (_PyLocals_Kind)(ptr[i]);
  110. }
  111. static inline void
  112. _PyLocals_SetKind(PyObject *kinds, int i, _PyLocals_Kind kind)
  113. {
  114. assert(PyBytes_Check(kinds));
  115. assert(0 <= i && i < PyBytes_GET_SIZE(kinds));
  116. char *ptr = PyBytes_AS_STRING(kinds);
  117. ptr[i] = (char) kind;
  118. }
  119. struct _PyCodeConstructor {
  120. /* metadata */
  121. PyObject *filename;
  122. PyObject *name;
  123. PyObject *qualname;
  124. int flags;
  125. /* the code */
  126. PyObject *code;
  127. int firstlineno;
  128. PyObject *linetable;
  129. /* used by the code */
  130. PyObject *consts;
  131. PyObject *names;
  132. /* mapping frame offsets to information */
  133. PyObject *localsplusnames; // Tuple of strings
  134. PyObject *localspluskinds; // Bytes object, one byte per variable
  135. /* args (within varnames) */
  136. int argcount;
  137. int posonlyargcount;
  138. // XXX Replace argcount with posorkwargcount (argcount - posonlyargcount).
  139. int kwonlyargcount;
  140. /* needed to create the frame */
  141. int stacksize;
  142. /* used by the eval loop */
  143. PyObject *exceptiontable;
  144. };
  145. // Using an "arguments struct" like this is helpful for maintainability
  146. // in a case such as this with many parameters. It does bear a risk:
  147. // if the struct changes and callers are not updated properly then the
  148. // compiler will not catch problems (like a missing argument). This can
  149. // cause hard-to-debug problems. The risk is mitigated by the use of
  150. // check_code() in codeobject.c. However, we may decide to switch
  151. // back to a regular function signature. Regardless, this approach
  152. // wouldn't be appropriate if this weren't a strictly internal API.
  153. // (See the comments in https://github.com/python/cpython/pull/26258.)
  154. PyAPI_FUNC(int) _PyCode_Validate(struct _PyCodeConstructor *);
  155. PyAPI_FUNC(PyCodeObject *) _PyCode_New(struct _PyCodeConstructor *);
  156. /* Private API */
  157. /* Getters for internal PyCodeObject data. */
  158. extern PyObject* _PyCode_GetVarnames(PyCodeObject *);
  159. extern PyObject* _PyCode_GetCellvars(PyCodeObject *);
  160. extern PyObject* _PyCode_GetFreevars(PyCodeObject *);
  161. extern PyObject* _PyCode_GetCode(PyCodeObject *);
  162. /** API for initializing the line number tables. */
  163. extern int _PyCode_InitAddressRange(PyCodeObject* co, PyCodeAddressRange *bounds);
  164. /** Out of process API for initializing the location table. */
  165. extern void _PyLineTable_InitAddressRange(
  166. const char *linetable,
  167. Py_ssize_t length,
  168. int firstlineno,
  169. PyCodeAddressRange *range);
  170. /** API for traversing the line number table. */
  171. extern int _PyLineTable_NextAddressRange(PyCodeAddressRange *range);
  172. extern int _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range);
  173. /* Specialization functions */
  174. extern void _Py_Specialize_LoadSuperAttr(PyObject *global_super, PyObject *cls,
  175. _Py_CODEUNIT *instr, int load_method);
  176. extern void _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr,
  177. PyObject *name);
  178. extern void _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr,
  179. PyObject *name);
  180. extern void _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins,
  181. _Py_CODEUNIT *instr, PyObject *name);
  182. extern void _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container,
  183. _Py_CODEUNIT *instr);
  184. extern void _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub,
  185. _Py_CODEUNIT *instr);
  186. extern void _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr,
  187. int nargs, PyObject *kwnames);
  188. extern void _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr,
  189. int oparg, PyObject **locals);
  190. extern void _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs,
  191. _Py_CODEUNIT *instr, int oparg);
  192. extern void _Py_Specialize_UnpackSequence(PyObject *seq, _Py_CODEUNIT *instr,
  193. int oparg);
  194. extern void _Py_Specialize_ForIter(PyObject *iter, _Py_CODEUNIT *instr, int oparg);
  195. extern void _Py_Specialize_Send(PyObject *receiver, _Py_CODEUNIT *instr);
  196. /* Finalizer function for static codeobjects used in deepfreeze.py */
  197. extern void _PyStaticCode_Fini(PyCodeObject *co);
  198. /* Function to intern strings of codeobjects and quicken the bytecode */
  199. extern int _PyStaticCode_Init(PyCodeObject *co);
  200. #ifdef Py_STATS
  201. #define STAT_INC(opname, name) do { if (_py_stats) _py_stats->opcode_stats[opname].specialization.name++; } while (0)
  202. #define STAT_DEC(opname, name) do { if (_py_stats) _py_stats->opcode_stats[opname].specialization.name--; } while (0)
  203. #define OPCODE_EXE_INC(opname) do { if (_py_stats) _py_stats->opcode_stats[opname].execution_count++; } while (0)
  204. #define CALL_STAT_INC(name) do { if (_py_stats) _py_stats->call_stats.name++; } while (0)
  205. #define OBJECT_STAT_INC(name) do { if (_py_stats) _py_stats->object_stats.name++; } while (0)
  206. #define OBJECT_STAT_INC_COND(name, cond) \
  207. do { if (_py_stats && cond) _py_stats->object_stats.name++; } while (0)
  208. #define EVAL_CALL_STAT_INC(name) do { if (_py_stats) _py_stats->call_stats.eval_calls[name]++; } while (0)
  209. #define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) \
  210. do { if (_py_stats && PyFunction_Check(callable)) _py_stats->call_stats.eval_calls[name]++; } while (0)
  211. // Used by the _opcode extension which is built as a shared library
  212. PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
  213. #else
  214. #define STAT_INC(opname, name) ((void)0)
  215. #define STAT_DEC(opname, name) ((void)0)
  216. #define OPCODE_EXE_INC(opname) ((void)0)
  217. #define CALL_STAT_INC(name) ((void)0)
  218. #define OBJECT_STAT_INC(name) ((void)0)
  219. #define OBJECT_STAT_INC_COND(name, cond) ((void)0)
  220. #define EVAL_CALL_STAT_INC(name) ((void)0)
  221. #define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) ((void)0)
  222. #endif // !Py_STATS
  223. // Utility functions for reading/writing 32/64-bit values in the inline caches.
  224. // Great care should be taken to ensure that these functions remain correct and
  225. // performant! They should compile to just "move" instructions on all supported
  226. // compilers and platforms.
  227. // We use memcpy to let the C compiler handle unaligned accesses and endianness
  228. // issues for us. It also seems to produce better code than manual copying for
  229. // most compilers (see https://blog.regehr.org/archives/959 for more info).
  230. static inline void
  231. write_u32(uint16_t *p, uint32_t val)
  232. {
  233. memcpy(p, &val, sizeof(val));
  234. }
  235. static inline void
  236. write_u64(uint16_t *p, uint64_t val)
  237. {
  238. memcpy(p, &val, sizeof(val));
  239. }
  240. static inline void
  241. write_obj(uint16_t *p, PyObject *val)
  242. {
  243. memcpy(p, &val, sizeof(val));
  244. }
  245. static inline uint16_t
  246. read_u16(uint16_t *p)
  247. {
  248. return *p;
  249. }
  250. static inline uint32_t
  251. read_u32(uint16_t *p)
  252. {
  253. uint32_t val;
  254. memcpy(&val, p, sizeof(val));
  255. return val;
  256. }
  257. static inline uint64_t
  258. read_u64(uint16_t *p)
  259. {
  260. uint64_t val;
  261. memcpy(&val, p, sizeof(val));
  262. return val;
  263. }
  264. static inline PyObject *
  265. read_obj(uint16_t *p)
  266. {
  267. PyObject *val;
  268. memcpy(&val, p, sizeof(val));
  269. return val;
  270. }
  271. /* See Objects/exception_handling_notes.txt for details.
  272. */
  273. static inline unsigned char *
  274. parse_varint(unsigned char *p, int *result) {
  275. int val = p[0] & 63;
  276. while (p[0] & 64) {
  277. p++;
  278. val = (val << 6) | (p[0] & 63);
  279. }
  280. *result = val;
  281. return p+1;
  282. }
  283. static inline int
  284. write_varint(uint8_t *ptr, unsigned int val)
  285. {
  286. int written = 1;
  287. while (val >= 64) {
  288. *ptr++ = 64 | (val & 63);
  289. val >>= 6;
  290. written++;
  291. }
  292. *ptr = (uint8_t)val;
  293. return written;
  294. }
  295. static inline int
  296. write_signed_varint(uint8_t *ptr, int val)
  297. {
  298. unsigned int uval;
  299. if (val < 0) {
  300. // (unsigned int)(-val) has an undefined behavior for INT_MIN
  301. uval = ((0 - (unsigned int)val) << 1) | 1;
  302. }
  303. else {
  304. uval = (unsigned int)val << 1;
  305. }
  306. return write_varint(ptr, uval);
  307. }
  308. static inline int
  309. write_location_entry_start(uint8_t *ptr, int code, int length)
  310. {
  311. assert((code & 15) == code);
  312. *ptr = 128 | (uint8_t)(code << 3) | (uint8_t)(length - 1);
  313. return 1;
  314. }
  315. /** Counters
  316. * The first 16-bit value in each inline cache is a counter.
  317. * When counting misses, the counter is treated as a simple unsigned value.
  318. *
  319. * When counting executions until the next specialization attempt,
  320. * exponential backoff is used to reduce the number of specialization failures.
  321. * The high 12 bits store the counter, the low 4 bits store the backoff exponent.
  322. * On a specialization failure, the backoff exponent is incremented and the
  323. * counter set to (2**backoff - 1).
  324. * Backoff == 6 -> starting counter == 63, backoff == 10 -> starting counter == 1023.
  325. */
  326. /* With a 16-bit counter, we have 12 bits for the counter value, and 4 bits for the backoff */
  327. #define ADAPTIVE_BACKOFF_BITS 4
  328. // A value of 1 means that we attempt to specialize the *second* time each
  329. // instruction is executed. Executing twice is a much better indicator of
  330. // "hotness" than executing once, but additional warmup delays only prevent
  331. // specialization. Most types stabilize by the second execution, too:
  332. #define ADAPTIVE_WARMUP_VALUE 1
  333. #define ADAPTIVE_WARMUP_BACKOFF 1
  334. // A value of 52 means that we attempt to re-specialize after 53 misses (a prime
  335. // number, useful for avoiding artifacts if every nth value is a different type
  336. // or something). Setting the backoff to 0 means that the counter is reset to
  337. // the same state as a warming-up instruction (value == 1, backoff == 1) after
  338. // deoptimization. This isn't strictly necessary, but it is bit easier to reason
  339. // about when thinking about the opcode transitions as a state machine:
  340. #define ADAPTIVE_COOLDOWN_VALUE 52
  341. #define ADAPTIVE_COOLDOWN_BACKOFF 0
  342. #define MAX_BACKOFF_VALUE (16 - ADAPTIVE_BACKOFF_BITS)
  343. static inline uint16_t
  344. adaptive_counter_bits(uint16_t value, uint16_t backoff) {
  345. return ((value << ADAPTIVE_BACKOFF_BITS)
  346. | (backoff & ((1 << ADAPTIVE_BACKOFF_BITS) - 1)));
  347. }
  348. static inline uint16_t
  349. adaptive_counter_warmup(void) {
  350. return adaptive_counter_bits(ADAPTIVE_WARMUP_VALUE,
  351. ADAPTIVE_WARMUP_BACKOFF);
  352. }
  353. static inline uint16_t
  354. adaptive_counter_cooldown(void) {
  355. return adaptive_counter_bits(ADAPTIVE_COOLDOWN_VALUE,
  356. ADAPTIVE_COOLDOWN_BACKOFF);
  357. }
  358. static inline uint16_t
  359. adaptive_counter_backoff(uint16_t counter) {
  360. uint16_t backoff = counter & ((1 << ADAPTIVE_BACKOFF_BITS) - 1);
  361. backoff++;
  362. if (backoff > MAX_BACKOFF_VALUE) {
  363. backoff = MAX_BACKOFF_VALUE;
  364. }
  365. uint16_t value = (uint16_t)(1 << backoff) - 1;
  366. return adaptive_counter_bits(value, backoff);
  367. }
  368. /* Line array cache for tracing */
  369. typedef struct _PyShimCodeDef {
  370. const uint8_t *code;
  371. int codelen;
  372. int stacksize;
  373. const char *cname;
  374. } _PyShimCodeDef;
  375. extern PyCodeObject *
  376. _Py_MakeShimCode(const _PyShimCodeDef *code);
  377. extern uint32_t _Py_next_func_version;
  378. /* Comparison bit masks. */
  379. /* Note this evaluates its arguments twice each */
  380. #define COMPARISON_BIT(x, y) (1 << (2 * ((x) >= (y)) + ((x) <= (y))))
  381. /*
  382. * The following bits are chosen so that the value of
  383. * COMPARSION_BIT(left, right)
  384. * masked by the values below will be non-zero if the
  385. * comparison is true, and zero if it is false */
  386. /* This is for values that are unordered, ie. NaN, not types that are unordered, e.g. sets */
  387. #define COMPARISON_UNORDERED 1
  388. #define COMPARISON_LESS_THAN 2
  389. #define COMPARISON_GREATER_THAN 4
  390. #define COMPARISON_EQUALS 8
  391. #define COMPARISON_NOT_EQUALS (COMPARISON_UNORDERED | COMPARISON_LESS_THAN | COMPARISON_GREATER_THAN)
  392. extern int _Py_Instrument(PyCodeObject *co, PyInterpreterState *interp);
  393. extern int _Py_GetBaseOpcode(PyCodeObject *code, int offset);
  394. #ifdef __cplusplus
  395. }
  396. #endif
  397. #endif /* !Py_INTERNAL_CODE_H */