prql.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251
  1. """
  2. pygments.lexers.prql
  3. ~~~~~~~~~~~~~~~~~~~~
  4. Lexer for the PRQL query language.
  5. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. from pygments.lexer import RegexLexer, combined, words, include, bygroups
  9. from pygments.token import Comment, Literal, Keyword, Name, Number, Operator, \
  10. Punctuation, String, Text, Whitespace
  11. __all__ = ['PrqlLexer']
  12. class PrqlLexer(RegexLexer):
  13. """
  14. For PRQL source code.
  15. grammar: https://github.com/PRQL/prql/tree/main/grammars
  16. """
  17. name = 'PRQL'
  18. url = 'https://prql-lang.org/'
  19. aliases = ['prql']
  20. filenames = ['*.prql']
  21. mimetypes = ['application/prql', 'application/x-prql']
  22. version_added = '2.17'
  23. builtinTypes = words((
  24. "bool",
  25. "int",
  26. "int8", "int16", "int32", "int64", "int128",
  27. "float",
  28. "text",
  29. "set"), suffix=r'\b')
  30. def innerstring_rules(ttype):
  31. return [
  32. # the new style '{}'.format(...) string formatting
  33. (r'\{'
  34. r'((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name
  35. r'(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?'
  36. r'\}', String.Interpol),
  37. (r'[^\\\'"%{\n]+', ttype),
  38. (r'[\'"\\]', ttype),
  39. (r'%|(\{{1,2})', ttype)
  40. ]
  41. def fstring_rules(ttype):
  42. return [
  43. (r'\}', String.Interpol),
  44. (r'\{', String.Interpol, 'expr-inside-fstring'),
  45. (r'[^\\\'"{}\n]+', ttype),
  46. (r'[\'"\\]', ttype),
  47. ]
  48. tokens = {
  49. 'root': [
  50. # Comments
  51. (r'#!.*', String.Doc),
  52. (r'#.*', Comment.Single),
  53. # Whitespace
  54. (r'\s+', Whitespace),
  55. # Modules
  56. (r'^(\s*)(module)(\s*)',
  57. bygroups(Whitespace, Keyword.Namespace, Whitespace),
  58. 'imports'),
  59. (builtinTypes, Keyword.Type),
  60. # Main
  61. (r'^prql ', Keyword.Reserved),
  62. ('let', Keyword.Declaration),
  63. include('keywords'),
  64. include('expr'),
  65. # Transforms
  66. (r'^[A-Za-z_][a-zA-Z0-9_]*', Keyword),
  67. ],
  68. 'expr': [
  69. # non-raw f-strings
  70. ('(f)(""")', bygroups(String.Affix, String.Double),
  71. combined('fstringescape', 'tdqf')),
  72. ("(f)(''')", bygroups(String.Affix, String.Single),
  73. combined('fstringescape', 'tsqf')),
  74. ('(f)(")', bygroups(String.Affix, String.Double),
  75. combined('fstringescape', 'dqf')),
  76. ("(f)(')", bygroups(String.Affix, String.Single),
  77. combined('fstringescape', 'sqf')),
  78. # non-raw s-strings
  79. ('(s)(""")', bygroups(String.Affix, String.Double),
  80. combined('stringescape', 'tdqf')),
  81. ("(s)(''')", bygroups(String.Affix, String.Single),
  82. combined('stringescape', 'tsqf')),
  83. ('(s)(")', bygroups(String.Affix, String.Double),
  84. combined('stringescape', 'dqf')),
  85. ("(s)(')", bygroups(String.Affix, String.Single),
  86. combined('stringescape', 'sqf')),
  87. # raw strings
  88. ('(?i)(r)(""")',
  89. bygroups(String.Affix, String.Double), 'tdqs'),
  90. ("(?i)(r)(''')",
  91. bygroups(String.Affix, String.Single), 'tsqs'),
  92. ('(?i)(r)(")',
  93. bygroups(String.Affix, String.Double), 'dqs'),
  94. ("(?i)(r)(')",
  95. bygroups(String.Affix, String.Single), 'sqs'),
  96. # non-raw strings
  97. ('"""', String.Double, combined('stringescape', 'tdqs')),
  98. ("'''", String.Single, combined('stringescape', 'tsqs')),
  99. ('"', String.Double, combined('stringescape', 'dqs')),
  100. ("'", String.Single, combined('stringescape', 'sqs')),
  101. # Time and dates
  102. (r'@\d{4}-\d{2}-\d{2}T\d{2}(:\d{2})?(:\d{2})?(\.\d{1,6})?(Z|[+-]\d{1,2}(:\d{1,2})?)?', Literal.Date),
  103. (r'@\d{4}-\d{2}-\d{2}', Literal.Date),
  104. (r'@\d{2}(:\d{2})?(:\d{2})?(\.\d{1,6})?(Z|[+-]\d{1,2}(:\d{1,2})?)?', Literal.Date),
  105. (r'[^\S\n]+', Text),
  106. include('numbers'),
  107. (r'->|=>|==|!=|>=|<=|~=|&&|\|\||\?\?|\/\/', Operator),
  108. (r'[-~+/*%=<>&^|.@]', Operator),
  109. (r'[]{}:(),;[]', Punctuation),
  110. include('functions'),
  111. # Variable Names
  112. (r'[A-Za-z_][a-zA-Z0-9_]*', Name.Variable),
  113. ],
  114. 'numbers': [
  115. (r'(\d(?:_?\d)*\.(?:\d(?:_?\d)*)?|(?:\d(?:_?\d)*)?\.\d(?:_?\d)*)'
  116. r'([eE][+-]?\d(?:_?\d)*)?', Number.Float),
  117. (r'\d(?:_?\d)*[eE][+-]?\d(?:_?\d)*j?', Number.Float),
  118. (r'0[oO](?:_?[0-7])+', Number.Oct),
  119. (r'0[bB](?:_?[01])+', Number.Bin),
  120. (r'0[xX](?:_?[a-fA-F0-9])+', Number.Hex),
  121. (r'\d(?:_?\d)*', Number.Integer),
  122. ],
  123. 'fstringescape': [
  124. include('stringescape'),
  125. ],
  126. 'bytesescape': [
  127. (r'\\([\\bfnrt"\']|\n|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
  128. ],
  129. 'stringescape': [
  130. (r'\\(N\{.*?\}|u\{[a-fA-F0-9]{1,6}\})', String.Escape),
  131. include('bytesescape')
  132. ],
  133. 'fstrings-single': fstring_rules(String.Single),
  134. 'fstrings-double': fstring_rules(String.Double),
  135. 'strings-single': innerstring_rules(String.Single),
  136. 'strings-double': innerstring_rules(String.Double),
  137. 'dqf': [
  138. (r'"', String.Double, '#pop'),
  139. (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
  140. include('fstrings-double')
  141. ],
  142. 'sqf': [
  143. (r"'", String.Single, '#pop'),
  144. (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
  145. include('fstrings-single')
  146. ],
  147. 'dqs': [
  148. (r'"', String.Double, '#pop'),
  149. (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
  150. include('strings-double')
  151. ],
  152. 'sqs': [
  153. (r"'", String.Single, '#pop'),
  154. (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
  155. include('strings-single')
  156. ],
  157. 'tdqf': [
  158. (r'"""', String.Double, '#pop'),
  159. include('fstrings-double'),
  160. (r'\n', String.Double)
  161. ],
  162. 'tsqf': [
  163. (r"'''", String.Single, '#pop'),
  164. include('fstrings-single'),
  165. (r'\n', String.Single)
  166. ],
  167. 'tdqs': [
  168. (r'"""', String.Double, '#pop'),
  169. include('strings-double'),
  170. (r'\n', String.Double)
  171. ],
  172. 'tsqs': [
  173. (r"'''", String.Single, '#pop'),
  174. include('strings-single'),
  175. (r'\n', String.Single)
  176. ],
  177. 'expr-inside-fstring': [
  178. (r'[{([]', Punctuation, 'expr-inside-fstring-inner'),
  179. # without format specifier
  180. (r'(=\s*)?' # debug (https://bugs.python.org/issue36817)
  181. r'\}', String.Interpol, '#pop'),
  182. # with format specifier
  183. # we'll catch the remaining '}' in the outer scope
  184. (r'(=\s*)?' # debug (https://bugs.python.org/issue36817)
  185. r':', String.Interpol, '#pop'),
  186. (r'\s+', Whitespace), # allow new lines
  187. include('expr'),
  188. ],
  189. 'expr-inside-fstring-inner': [
  190. (r'[{([]', Punctuation, 'expr-inside-fstring-inner'),
  191. (r'[])}]', Punctuation, '#pop'),
  192. (r'\s+', Whitespace), # allow new lines
  193. include('expr'),
  194. ],
  195. 'keywords': [
  196. (words((
  197. 'into', 'case', 'type', 'module', 'internal',
  198. ), suffix=r'\b'),
  199. Keyword),
  200. (words(('true', 'false', 'null'), suffix=r'\b'), Keyword.Constant),
  201. ],
  202. 'functions': [
  203. (words((
  204. "min", "max", "sum", "average", "stddev", "every", "any",
  205. "concat_array", "count", "lag", "lead", "first", "last",
  206. "rank", "rank_dense", "row_number", "round", "as", "in",
  207. "tuple_every", "tuple_map", "tuple_zip", "_eq", "_is_null",
  208. "from_text", "lower", "upper", "read_parquet", "read_csv"),
  209. suffix=r'\b'),
  210. Name.Function),
  211. ],
  212. 'comment': [
  213. (r'-(?!\})', Comment.Multiline),
  214. (r'\{-', Comment.Multiline, 'comment'),
  215. (r'[^-}]', Comment.Multiline),
  216. (r'-\}', Comment.Multiline, '#pop'),
  217. ],
  218. 'imports': [
  219. (r'\w+(\.\w+)*', Name.Class, '#pop'),
  220. ],
  221. }