pegen.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369
  1. #ifndef PEGEN_H
  2. #define PEGEN_H
  3. #define PY_SSIZE_T_CLEAN
  4. #include <Python.h>
  5. #include <pycore_ast.h>
  6. #include <pycore_token.h>
  7. #if 0
  8. #define PyPARSE_YIELD_IS_KEYWORD 0x0001
  9. #endif
  10. #define PyPARSE_DONT_IMPLY_DEDENT 0x0002
  11. #if 0
  12. #define PyPARSE_WITH_IS_KEYWORD 0x0003
  13. #define PyPARSE_PRINT_IS_FUNCTION 0x0004
  14. #define PyPARSE_UNICODE_LITERALS 0x0008
  15. #endif
  16. #define PyPARSE_IGNORE_COOKIE 0x0010
  17. #define PyPARSE_BARRY_AS_BDFL 0x0020
  18. #define PyPARSE_TYPE_COMMENTS 0x0040
  19. #define PyPARSE_ASYNC_HACKS 0x0080
  20. #define PyPARSE_ALLOW_INCOMPLETE_INPUT 0x0100
  21. #define CURRENT_POS (-5)
  22. typedef struct _memo {
  23. int type;
  24. void *node;
  25. int mark;
  26. struct _memo *next;
  27. } Memo;
  28. typedef struct {
  29. int type;
  30. PyObject *bytes;
  31. int level;
  32. int lineno, col_offset, end_lineno, end_col_offset;
  33. Memo *memo;
  34. PyObject *metadata;
  35. } Token;
  36. typedef struct {
  37. const char *str;
  38. int type;
  39. } KeywordToken;
  40. typedef struct {
  41. struct {
  42. int lineno;
  43. char *comment; // The " <tag>" in "# type: ignore <tag>"
  44. } *items;
  45. size_t size;
  46. size_t num_items;
  47. } growable_comment_array;
  48. typedef struct {
  49. struct tok_state *tok;
  50. Token **tokens;
  51. int mark;
  52. int fill, size;
  53. PyArena *arena;
  54. KeywordToken **keywords;
  55. char **soft_keywords;
  56. int n_keyword_lists;
  57. int start_rule;
  58. int *errcode;
  59. int parsing_started;
  60. PyObject* normalize;
  61. int starting_lineno;
  62. int starting_col_offset;
  63. int error_indicator;
  64. int flags;
  65. int feature_version;
  66. growable_comment_array type_ignore_comments;
  67. Token *known_err_token;
  68. int level;
  69. int call_invalid_rules;
  70. int debug;
  71. } Parser;
  72. typedef struct {
  73. cmpop_ty cmpop;
  74. expr_ty expr;
  75. } CmpopExprPair;
  76. typedef struct {
  77. expr_ty key;
  78. expr_ty value;
  79. } KeyValuePair;
  80. typedef struct {
  81. expr_ty key;
  82. pattern_ty pattern;
  83. } KeyPatternPair;
  84. typedef struct {
  85. arg_ty arg;
  86. expr_ty value;
  87. } NameDefaultPair;
  88. typedef struct {
  89. asdl_arg_seq *plain_names;
  90. asdl_seq *names_with_defaults; // asdl_seq* of NameDefaultsPair's
  91. } SlashWithDefault;
  92. typedef struct {
  93. arg_ty vararg;
  94. asdl_seq *kwonlyargs; // asdl_seq* of NameDefaultsPair's
  95. arg_ty kwarg;
  96. } StarEtc;
  97. typedef struct { operator_ty kind; } AugOperator;
  98. typedef struct {
  99. void *element;
  100. int is_keyword;
  101. } KeywordOrStarred;
  102. typedef struct {
  103. void *result;
  104. PyObject *metadata;
  105. } ResultTokenWithMetadata;
  106. // Internal parser functions
  107. #if defined(Py_DEBUG)
  108. void _PyPegen_clear_memo_statistics(void);
  109. PyObject *_PyPegen_get_memo_statistics(void);
  110. #endif
  111. int _PyPegen_insert_memo(Parser *p, int mark, int type, void *node);
  112. int _PyPegen_update_memo(Parser *p, int mark, int type, void *node);
  113. int _PyPegen_is_memoized(Parser *p, int type, void *pres);
  114. int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *);
  115. int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int);
  116. int _PyPegen_lookahead_with_string(int , expr_ty (func)(Parser *, const char*), Parser *, const char*);
  117. int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
  118. Token *_PyPegen_expect_token(Parser *p, int type);
  119. void* _PyPegen_expect_forced_result(Parser *p, void* result, const char* expected);
  120. Token *_PyPegen_expect_forced_token(Parser *p, int type, const char* expected);
  121. expr_ty _PyPegen_expect_soft_keyword(Parser *p, const char *keyword);
  122. expr_ty _PyPegen_soft_keyword_token(Parser *p);
  123. expr_ty _PyPegen_fstring_middle_token(Parser* p);
  124. Token *_PyPegen_get_last_nonnwhitespace_token(Parser *);
  125. int _PyPegen_fill_token(Parser *p);
  126. expr_ty _PyPegen_name_token(Parser *p);
  127. expr_ty _PyPegen_number_token(Parser *p);
  128. void *_PyPegen_string_token(Parser *p);
  129. Py_ssize_t _PyPegen_byte_offset_to_character_offset_line(PyObject *line, Py_ssize_t col_offset, Py_ssize_t end_col_offset);
  130. Py_ssize_t _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset);
  131. Py_ssize_t _PyPegen_byte_offset_to_character_offset_raw(const char*, Py_ssize_t col_offset);
  132. Py_ssize_t _PyPegen_calculate_display_width(PyObject *segment, Py_ssize_t character_offset);
  133. // Error handling functions and APIs
  134. typedef enum {
  135. STAR_TARGETS,
  136. DEL_TARGETS,
  137. FOR_TARGETS
  138. } TARGETS_TYPE;
  139. int _Pypegen_raise_decode_error(Parser *p);
  140. void _PyPegen_raise_tokenizer_init_error(PyObject *filename);
  141. int _Pypegen_tokenizer_error(Parser *p);
  142. void *_PyPegen_raise_error(Parser *p, PyObject *errtype, int use_mark, const char *errmsg, ...);
  143. void *_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
  144. Py_ssize_t lineno, Py_ssize_t col_offset,
  145. Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
  146. const char *errmsg, va_list va);
  147. void _Pypegen_set_syntax_error(Parser* p, Token* last_token);
  148. void _Pypegen_stack_overflow(Parser *p);
  149. Py_LOCAL_INLINE(void *)
  150. RAISE_ERROR_KNOWN_LOCATION(Parser *p, PyObject *errtype,
  151. Py_ssize_t lineno, Py_ssize_t col_offset,
  152. Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
  153. const char *errmsg, ...)
  154. {
  155. va_list va;
  156. va_start(va, errmsg);
  157. Py_ssize_t _col_offset = (col_offset == CURRENT_POS ? CURRENT_POS : col_offset + 1);
  158. Py_ssize_t _end_col_offset = (end_col_offset == CURRENT_POS ? CURRENT_POS : end_col_offset + 1);
  159. _PyPegen_raise_error_known_location(p, errtype, lineno, _col_offset, end_lineno, _end_col_offset, errmsg, va);
  160. va_end(va);
  161. return NULL;
  162. }
  163. #define RAISE_SYNTAX_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_SyntaxError, 0, msg, ##__VA_ARGS__)
  164. #define RAISE_INDENTATION_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_IndentationError, 0, msg, ##__VA_ARGS__)
  165. #define RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN(msg, ...) _PyPegen_raise_error(p, PyExc_SyntaxError, 1, msg, ##__VA_ARGS__)
  166. #define RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, msg, ...) \
  167. RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, (b)->end_lineno, (b)->end_col_offset, msg, ##__VA_ARGS__)
  168. #define RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, msg, ...) \
  169. RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, (a)->end_lineno, (a)->end_col_offset, msg, ##__VA_ARGS__)
  170. #define RAISE_SYNTAX_ERROR_STARTING_FROM(a, msg, ...) \
  171. RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, CURRENT_POS, CURRENT_POS, msg, ##__VA_ARGS__)
  172. #define RAISE_SYNTAX_ERROR_INVALID_TARGET(type, e) _RAISE_SYNTAX_ERROR_INVALID_TARGET(p, type, e)
  173. Py_LOCAL_INLINE(void *)
  174. CHECK_CALL(Parser *p, void *result)
  175. {
  176. if (result == NULL) {
  177. assert(PyErr_Occurred());
  178. p->error_indicator = 1;
  179. }
  180. return result;
  181. }
  182. /* This is needed for helper functions that are allowed to
  183. return NULL without an error. Example: _PyPegen_seq_extract_starred_exprs */
  184. Py_LOCAL_INLINE(void *)
  185. CHECK_CALL_NULL_ALLOWED(Parser *p, void *result)
  186. {
  187. if (result == NULL && PyErr_Occurred()) {
  188. p->error_indicator = 1;
  189. }
  190. return result;
  191. }
  192. #define CHECK(type, result) ((type) CHECK_CALL(p, result))
  193. #define CHECK_NULL_ALLOWED(type, result) ((type) CHECK_CALL_NULL_ALLOWED(p, result))
  194. expr_ty _PyPegen_get_invalid_target(expr_ty e, TARGETS_TYPE targets_type);
  195. const char *_PyPegen_get_expr_name(expr_ty);
  196. Py_LOCAL_INLINE(void *)
  197. _RAISE_SYNTAX_ERROR_INVALID_TARGET(Parser *p, TARGETS_TYPE type, void *e)
  198. {
  199. expr_ty invalid_target = CHECK_NULL_ALLOWED(expr_ty, _PyPegen_get_invalid_target(e, type));
  200. if (invalid_target != NULL) {
  201. const char *msg;
  202. if (type == STAR_TARGETS || type == FOR_TARGETS) {
  203. msg = "cannot assign to %s";
  204. }
  205. else {
  206. msg = "cannot delete %s";
  207. }
  208. return RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
  209. invalid_target,
  210. msg,
  211. _PyPegen_get_expr_name(invalid_target)
  212. );
  213. return RAISE_SYNTAX_ERROR_KNOWN_LOCATION(invalid_target, "invalid syntax");
  214. }
  215. return NULL;
  216. }
  217. // Action utility functions
  218. void *_PyPegen_dummy_name(Parser *p, ...);
  219. void * _PyPegen_seq_last_item(asdl_seq *seq);
  220. #define PyPegen_last_item(seq, type) ((type)_PyPegen_seq_last_item((asdl_seq*)seq))
  221. void * _PyPegen_seq_first_item(asdl_seq *seq);
  222. #define PyPegen_first_item(seq, type) ((type)_PyPegen_seq_first_item((asdl_seq*)seq))
  223. #define UNUSED(expr) do { (void)(expr); } while (0)
  224. #define EXTRA_EXPR(head, tail) head->lineno, (head)->col_offset, (tail)->end_lineno, (tail)->end_col_offset, p->arena
  225. #define EXTRA _start_lineno, _start_col_offset, _end_lineno, _end_col_offset, p->arena
  226. PyObject *_PyPegen_new_type_comment(Parser *, const char *);
  227. Py_LOCAL_INLINE(PyObject *)
  228. NEW_TYPE_COMMENT(Parser *p, Token *tc)
  229. {
  230. if (tc == NULL) {
  231. return NULL;
  232. }
  233. const char *bytes = PyBytes_AsString(tc->bytes);
  234. if (bytes == NULL) {
  235. goto error;
  236. }
  237. PyObject *tco = _PyPegen_new_type_comment(p, bytes);
  238. if (tco == NULL) {
  239. goto error;
  240. }
  241. return tco;
  242. error:
  243. p->error_indicator = 1; // Inline CHECK_CALL
  244. return NULL;
  245. }
  246. Py_LOCAL_INLINE(void *)
  247. INVALID_VERSION_CHECK(Parser *p, int version, char *msg, void *node)
  248. {
  249. if (node == NULL) {
  250. p->error_indicator = 1; // Inline CHECK_CALL
  251. return NULL;
  252. }
  253. if (p->feature_version < version) {
  254. p->error_indicator = 1;
  255. return RAISE_SYNTAX_ERROR("%s only supported in Python 3.%i and greater",
  256. msg, version);
  257. }
  258. return node;
  259. }
  260. #define CHECK_VERSION(type, version, msg, node) ((type) INVALID_VERSION_CHECK(p, version, msg, node))
  261. arg_ty _PyPegen_add_type_comment_to_arg(Parser *, arg_ty, Token *);
  262. PyObject *_PyPegen_new_identifier(Parser *, const char *);
  263. asdl_seq *_PyPegen_singleton_seq(Parser *, void *);
  264. asdl_seq *_PyPegen_seq_insert_in_front(Parser *, void *, asdl_seq *);
  265. asdl_seq *_PyPegen_seq_append_to_end(Parser *, asdl_seq *, void *);
  266. asdl_seq *_PyPegen_seq_flatten(Parser *, asdl_seq *);
  267. expr_ty _PyPegen_join_names_with_dot(Parser *, expr_ty, expr_ty);
  268. int _PyPegen_seq_count_dots(asdl_seq *);
  269. alias_ty _PyPegen_alias_for_star(Parser *, int, int, int, int, PyArena *);
  270. asdl_identifier_seq *_PyPegen_map_names_to_ids(Parser *, asdl_expr_seq *);
  271. CmpopExprPair *_PyPegen_cmpop_expr_pair(Parser *, cmpop_ty, expr_ty);
  272. asdl_int_seq *_PyPegen_get_cmpops(Parser *p, asdl_seq *);
  273. asdl_expr_seq *_PyPegen_get_exprs(Parser *, asdl_seq *);
  274. expr_ty _PyPegen_set_expr_context(Parser *, expr_ty, expr_context_ty);
  275. KeyValuePair *_PyPegen_key_value_pair(Parser *, expr_ty, expr_ty);
  276. asdl_expr_seq *_PyPegen_get_keys(Parser *, asdl_seq *);
  277. asdl_expr_seq *_PyPegen_get_values(Parser *, asdl_seq *);
  278. KeyPatternPair *_PyPegen_key_pattern_pair(Parser *, expr_ty, pattern_ty);
  279. asdl_expr_seq *_PyPegen_get_pattern_keys(Parser *, asdl_seq *);
  280. asdl_pattern_seq *_PyPegen_get_patterns(Parser *, asdl_seq *);
  281. NameDefaultPair *_PyPegen_name_default_pair(Parser *, arg_ty, expr_ty, Token *);
  282. SlashWithDefault *_PyPegen_slash_with_default(Parser *, asdl_arg_seq *, asdl_seq *);
  283. StarEtc *_PyPegen_star_etc(Parser *, arg_ty, asdl_seq *, arg_ty);
  284. arguments_ty _PyPegen_make_arguments(Parser *, asdl_arg_seq *, SlashWithDefault *,
  285. asdl_arg_seq *, asdl_seq *, StarEtc *);
  286. arguments_ty _PyPegen_empty_arguments(Parser *);
  287. expr_ty _PyPegen_formatted_value(Parser *, expr_ty, Token *, ResultTokenWithMetadata *, ResultTokenWithMetadata *, Token *,
  288. int, int, int, int, PyArena *);
  289. AugOperator *_PyPegen_augoperator(Parser*, operator_ty type);
  290. stmt_ty _PyPegen_function_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
  291. stmt_ty _PyPegen_class_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
  292. KeywordOrStarred *_PyPegen_keyword_or_starred(Parser *, void *, int);
  293. asdl_expr_seq *_PyPegen_seq_extract_starred_exprs(Parser *, asdl_seq *);
  294. asdl_keyword_seq *_PyPegen_seq_delete_starred_exprs(Parser *, asdl_seq *);
  295. expr_ty _PyPegen_collect_call_seqs(Parser *, asdl_expr_seq *, asdl_seq *,
  296. int lineno, int col_offset, int end_lineno,
  297. int end_col_offset, PyArena *arena);
  298. expr_ty _PyPegen_constant_from_token(Parser* p, Token* tok);
  299. expr_ty _PyPegen_decoded_constant_from_token(Parser* p, Token* tok);
  300. expr_ty _PyPegen_constant_from_string(Parser* p, Token* tok);
  301. expr_ty _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *, int, int, int, int, PyArena *);
  302. expr_ty _PyPegen_FetchRawForm(Parser *p, int, int, int, int);
  303. expr_ty _PyPegen_ensure_imaginary(Parser *p, expr_ty);
  304. expr_ty _PyPegen_ensure_real(Parser *p, expr_ty);
  305. asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *);
  306. int _PyPegen_check_barry_as_flufl(Parser *, Token *);
  307. int _PyPegen_check_legacy_stmt(Parser *p, expr_ty t);
  308. ResultTokenWithMetadata *_PyPegen_check_fstring_conversion(Parser *p, Token *, expr_ty t);
  309. ResultTokenWithMetadata *_PyPegen_setup_full_format_spec(Parser *, Token *, asdl_expr_seq *, int, int,
  310. int, int, PyArena *);
  311. mod_ty _PyPegen_make_module(Parser *, asdl_stmt_seq *);
  312. void *_PyPegen_arguments_parsing_error(Parser *, expr_ty);
  313. expr_ty _PyPegen_get_last_comprehension_item(comprehension_ty comprehension);
  314. void *_PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args, asdl_comprehension_seq *comprehensions);
  315. // Parser API
  316. Parser *_PyPegen_Parser_New(struct tok_state *, int, int, int, int *, PyArena *);
  317. void _PyPegen_Parser_Free(Parser *);
  318. mod_ty _PyPegen_run_parser_from_file_pointer(FILE *, int, PyObject *, const char *,
  319. const char *, const char *, PyCompilerFlags *, int *, PyArena *);
  320. void *_PyPegen_run_parser(Parser *);
  321. mod_ty _PyPegen_run_parser_from_string(const char *, int, PyObject *, PyCompilerFlags *, PyArena *);
  322. asdl_stmt_seq *_PyPegen_interactive_exit(Parser *);
  323. // TODO: move to the correct place in this file
  324. expr_ty _PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq* expr, Token*b);
  325. // Generated function in parse.c - function definition in python.gram
  326. void *_PyPegen_parse(Parser *);
  327. #endif