ConcreteSyntaxTree.pyx 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. cdef extern from "graminit.c":
  2. ctypedef struct grammar:
  3. pass
  4. cdef grammar _PyParser_Grammar
  5. cdef int Py_file_input
  6. cdef extern from "node.h":
  7. ctypedef struct node
  8. void PyNode_Free(node* n)
  9. int NCH(node* n)
  10. node* CHILD(node* n, int ix)
  11. node* RCHILD(node* n, int ix)
  12. short TYPE(node* n)
  13. char* STR(node* n)
  14. cdef extern from "parsetok.h":
  15. ctypedef struct perrdetail:
  16. pass
  17. cdef void PyParser_SetError(perrdetail *err) except *
  18. cdef node * PyParser_ParseStringFlagsFilenameEx(
  19. const char * s,
  20. const char * filename,
  21. grammar * g,
  22. int start,
  23. perrdetail * err_ret,
  24. int * flags)
  25. import distutils.sysconfig
  26. import os
  27. import re
  28. def extract_names(path):
  29. # All parse tree types are #defined in these files as ints.
  30. type_names = {}
  31. for line in open(path):
  32. if line.startswith('#define'):
  33. try:
  34. _, name, value = line.strip().split()
  35. type_names[int(value)] = name
  36. except:
  37. pass
  38. return type_names
  39. cdef dict type_names = {}
  40. cdef print_tree(node* n, indent=""):
  41. if not type_names:
  42. type_names.update(extract_names(
  43. os.path.join(distutils.sysconfig.get_python_inc(), 'token.h')))
  44. type_names.update(extract_names(
  45. os.path.join(os.path.dirname(__file__), 'graminit.h')))
  46. print indent, type_names.get(TYPE(n), 'unknown'), <object>STR(n) if NCH(n) == 0 else NCH(n)
  47. indent += " "
  48. for i in range(NCH(n)):
  49. print_tree(CHILD(n, i), indent)
  50. def handle_includes(source, path):
  51. # TODO: Use include directory.
  52. def include_here(include_line):
  53. included = os.path.join(os.path.dirname(path), include_line.group(1)[1:-1])
  54. if not os.path.exists(included):
  55. return include_line.group(0) + ' # no such path: ' + included
  56. return handle_includes(open(included).read(), path)
  57. # TODO: Proper string tokenizing.
  58. return re.sub(r'^include\s+([^\n]+[\'"])\s*(#.*)?$', include_here, source, flags=re.M)
  59. def p_module(path):
  60. cdef perrdetail err
  61. cdef int flags
  62. cdef node* n
  63. source = open(path).read()
  64. if '\ninclude ' in source:
  65. # TODO: Tokanizer needs to understand includes.
  66. source = handle_includes(source, path)
  67. path = "preparse(%s)" % path
  68. n = PyParser_ParseStringFlagsFilenameEx(
  69. source,
  70. path,
  71. &_PyParser_Grammar,
  72. Py_file_input,
  73. &err,
  74. &flags)
  75. if n:
  76. # print_tree(n)
  77. PyNode_Free(n)
  78. else:
  79. PyParser_SetError(&err)