parser.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. from typing import Union, Tuple
  2. from clickhouse_connect.driver.common import unescape_identifier
  3. # pylint: disable=too-many-branches
  4. def parse_callable(expr) -> Tuple[str, Tuple[Union[str, int], ...], str]:
  5. """
  6. Parses a single level ClickHouse optionally 'callable' function/identifier. The identifier is returned as the
  7. first value in the response tuple. If the expression is callable -- i.e. an identifier followed by 0 or more
  8. arguments in parentheses, the second returned value is a tuple of the comma separated arguments. The third and
  9. final tuple value is any text remaining after the initial expression for further parsing/processing.
  10. Examples:
  11. "Tuple(String, Enum('one' = 1, 'two' = 2))" will return "Tuple", ("String", "Enum('one' = 1,'two' = 2)"), ""
  12. "MergeTree() PARTITION BY key" will return "MergeTree", (), "PARTITION BY key"
  13. :param expr: ClickHouse DDL or Column Name expression
  14. :return: Tuple of the identifier, a tuple of arguments, and remaining text
  15. """
  16. expr = expr.strip()
  17. pos = expr.find('(')
  18. space = expr.find(' ')
  19. if pos == -1 and space == -1:
  20. return expr, (), ''
  21. if space != -1 and (pos == -1 or space < pos):
  22. return expr[:space], (), expr[space:].strip()
  23. name = expr[:pos]
  24. pos += 1 # Skip first paren
  25. values = []
  26. value = ''
  27. in_str = False
  28. level = 0
  29. def add_value():
  30. try:
  31. values.append(int(value))
  32. except ValueError:
  33. values.append(value)
  34. while True:
  35. char = expr[pos]
  36. pos += 1
  37. if in_str:
  38. value += char
  39. if char == "'":
  40. in_str = False
  41. elif char == '\\' and expr[pos] == "'" and expr[pos:pos + 4] != "' = " and expr[pos:pos + 2] != "')":
  42. value += expr[pos]
  43. pos += 1
  44. else:
  45. if level == 0:
  46. if char == ' ':
  47. space = pos
  48. temp_char = expr[space]
  49. while temp_char == ' ':
  50. space += 1
  51. temp_char = expr[space]
  52. if not value or temp_char in "()',=><0":
  53. char = temp_char
  54. pos = space + 1
  55. if char == ',':
  56. add_value()
  57. value = ''
  58. continue
  59. if char == ')':
  60. break
  61. if char == "'" and (not value or 'Enum' in value):
  62. in_str = True
  63. elif char == '(':
  64. level += 1
  65. elif char == ')' and level:
  66. level -= 1
  67. value += char
  68. if value != '':
  69. add_value()
  70. return name, tuple(values), expr[pos:].strip()
  71. def parse_enum(expr) -> Tuple[Tuple[str], Tuple[int]]:
  72. """
  73. Parse a ClickHouse enum definition expression of the form ('key1' = 1, 'key2' = 2)
  74. :param expr: ClickHouse enum expression/arguments
  75. :return: Parallel tuples of string enum keys and integer enum values
  76. """
  77. keys = []
  78. values = []
  79. pos = expr.find('(') + 1
  80. in_key = False
  81. key = []
  82. value = []
  83. while True:
  84. char = expr[pos]
  85. pos += 1
  86. if in_key:
  87. if char == "'":
  88. keys.append(''.join(key))
  89. key = []
  90. in_key = False
  91. elif char == '\\' and expr[pos] == "'" and expr[pos:pos + 4] != "' = " and expr[pos:] != "')":
  92. key.append(expr[pos])
  93. pos += 1
  94. else:
  95. key.append(char)
  96. elif char not in (' ', '='):
  97. if char == ',':
  98. values.append(int(''.join(value)))
  99. value = []
  100. elif char == ')':
  101. values.append(int(''.join(value)))
  102. break
  103. elif char == "'" and not value:
  104. in_key = True
  105. else:
  106. value.append(char)
  107. values, keys = zip(*sorted(zip(values, keys)))
  108. return tuple(keys), tuple(values)
  109. def parse_columns(expr: str):
  110. """
  111. Parse a ClickHouse column list of the form (col1 String, col2 Array(Tuple(String, Int32))). This also handles
  112. unnamed columns (such as Tuple definitions). Mixed named and unnamed columns are not currently supported.
  113. :param expr: ClickHouse enum expression/arguments
  114. :return: Parallel tuples of column types and column types (strings)
  115. """
  116. names = []
  117. columns = []
  118. pos = 1
  119. named = False
  120. level = 0
  121. label = ''
  122. quote = None
  123. while True:
  124. char = expr[pos]
  125. pos += 1
  126. if quote:
  127. if char == quote:
  128. quote = None
  129. elif char == '\\' and expr[pos] == "'" and expr[pos:pos + 4] != "' = " and expr[pos:pos + 2] != "')":
  130. label += expr[pos]
  131. pos += 1
  132. else:
  133. if level == 0:
  134. if char == ' ':
  135. if label and not named:
  136. names.append(unescape_identifier(label))
  137. label = ''
  138. named = True
  139. char = ''
  140. elif char == ',':
  141. columns.append(label)
  142. named = False
  143. label = ''
  144. continue
  145. elif char == ')':
  146. columns.append(label)
  147. break
  148. if char in ("'", '`') and (not label or 'Enum' in label):
  149. quote = char
  150. elif char == '(':
  151. level += 1
  152. elif char == ')':
  153. level -= 1
  154. label += char
  155. return tuple(names), tuple(columns)