sql.py 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027
  1. """
  2. pygments.lexers.sql
  3. ~~~~~~~~~~~~~~~~~~~
  4. Lexers for various SQL dialects and related interactive sessions.
  5. Postgres specific lexers:
  6. `PostgresLexer`
  7. A SQL lexer for the PostgreSQL dialect. Differences w.r.t. the SQL
  8. lexer are:
  9. - keywords and data types list parsed from the PG docs (run the
  10. `_postgres_builtins` module to update them);
  11. - Content of $-strings parsed using a specific lexer, e.g. the content
  12. of a PL/Python function is parsed using the Python lexer;
  13. - parse PG specific constructs: E-strings, $-strings, U&-strings,
  14. different operators and punctuation.
  15. `PlPgsqlLexer`
  16. A lexer for the PL/pgSQL language. Adds a few specific construct on
  17. top of the PG SQL lexer (such as <<label>>).
  18. `PostgresConsoleLexer`
  19. A lexer to highlight an interactive psql session:
  20. - identifies the prompt and does its best to detect the end of command
  21. in multiline statement where not all the lines are prefixed by a
  22. prompt, telling them apart from the output;
  23. - highlights errors in the output and notification levels;
  24. - handles psql backslash commands.
  25. `PostgresExplainLexer`
  26. A lexer to highlight Postgres execution plan.
  27. The ``tests/examplefiles`` contains a few test files with data to be
  28. parsed by these lexers.
  29. :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
  30. :license: BSD, see LICENSE for details.
  31. """
  32. import re
  33. from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words
  34. from pygments.token import Punctuation, Whitespace, Text, Comment, Operator, \
  35. Keyword, Name, String, Number, Generic, Literal
  36. from pygments.lexers import get_lexer_by_name, ClassNotFound
  37. from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES, \
  38. PSEUDO_TYPES, PLPGSQL_KEYWORDS, EXPLAIN_KEYWORDS
  39. from pygments.lexers._mysql_builtins import \
  40. MYSQL_CONSTANTS, \
  41. MYSQL_DATATYPES, \
  42. MYSQL_FUNCTIONS, \
  43. MYSQL_KEYWORDS, \
  44. MYSQL_OPTIMIZER_HINTS
  45. from pygments.lexers import _tsql_builtins
  46. __all__ = ['PostgresLexer', 'PlPgsqlLexer', 'PostgresConsoleLexer',
  47. 'PostgresExplainLexer', 'SqlLexer', 'TransactSqlLexer',
  48. 'MySqlLexer', 'SqliteConsoleLexer', 'RqlLexer']
  49. line_re = re.compile('.*?\n')
  50. sqlite_prompt_re = re.compile(r'^(?:sqlite| ...)>(?= )')
  51. language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE)
  52. do_re = re.compile(r'\bDO\b', re.IGNORECASE)
  53. # Regular expressions for analyse_text()
  54. name_between_bracket_re = re.compile(r'\[[a-zA-Z_]\w*\]')
  55. name_between_backtick_re = re.compile(r'`[a-zA-Z_]\w*`')
  56. tsql_go_re = re.compile(r'\bgo\b', re.IGNORECASE)
  57. tsql_declare_re = re.compile(r'\bdeclare\s+@', re.IGNORECASE)
  58. tsql_variable_re = re.compile(r'@[a-zA-Z_]\w*\b')
  59. def language_callback(lexer, match):
  60. """Parse the content of a $-string using a lexer
  61. The lexer is chosen looking for a nearby LANGUAGE or assumed as
  62. plpgsql if inside a DO statement and no LANGUAGE has been found.
  63. """
  64. lx = None
  65. m = language_re.match(lexer.text[match.end():match.end()+100])
  66. if m is not None:
  67. lx = lexer._get_lexer(m.group(1))
  68. else:
  69. m = list(language_re.finditer(
  70. lexer.text[max(0, match.start()-100):match.start()]))
  71. if m:
  72. lx = lexer._get_lexer(m[-1].group(1))
  73. else:
  74. m = list(do_re.finditer(
  75. lexer.text[max(0, match.start()-25):match.start()]))
  76. if m:
  77. lx = lexer._get_lexer('plpgsql')
  78. # 1 = $, 2 = delimiter, 3 = $
  79. yield (match.start(1), String, match.group(1))
  80. yield (match.start(2), String.Delimiter, match.group(2))
  81. yield (match.start(3), String, match.group(3))
  82. # 4 = string contents
  83. if lx:
  84. yield from lx.get_tokens_unprocessed(match.group(4))
  85. else:
  86. yield (match.start(4), String, match.group(4))
  87. # 5 = $, 6 = delimiter, 7 = $
  88. yield (match.start(5), String, match.group(5))
  89. yield (match.start(6), String.Delimiter, match.group(6))
  90. yield (match.start(7), String, match.group(7))
  91. class PostgresBase:
  92. """Base class for Postgres-related lexers.
  93. This is implemented as a mixin to avoid the Lexer metaclass kicking in.
  94. this way the different lexer don't have a common Lexer ancestor. If they
  95. had, _tokens could be created on this ancestor and not updated for the
  96. other classes, resulting e.g. in PL/pgSQL parsed as SQL. This shortcoming
  97. seem to suggest that regexp lexers are not really subclassable.
  98. """
  99. def get_tokens_unprocessed(self, text, *args):
  100. # Have a copy of the entire text to be used by `language_callback`.
  101. self.text = text
  102. yield from super().get_tokens_unprocessed(text, *args)
  103. def _get_lexer(self, lang):
  104. if lang.lower() == 'sql':
  105. return get_lexer_by_name('postgresql', **self.options)
  106. tries = [lang]
  107. if lang.startswith('pl'):
  108. tries.append(lang[2:])
  109. if lang.endswith('u'):
  110. tries.append(lang[:-1])
  111. if lang.startswith('pl') and lang.endswith('u'):
  112. tries.append(lang[2:-1])
  113. for lx in tries:
  114. try:
  115. return get_lexer_by_name(lx, **self.options)
  116. except ClassNotFound:
  117. pass
  118. else:
  119. # TODO: better logging
  120. # print >>sys.stderr, "language not found:", lang
  121. return None
  122. class PostgresLexer(PostgresBase, RegexLexer):
  123. """
  124. Lexer for the PostgreSQL dialect of SQL.
  125. .. versionadded:: 1.5
  126. """
  127. name = 'PostgreSQL SQL dialect'
  128. aliases = ['postgresql', 'postgres']
  129. mimetypes = ['text/x-postgresql']
  130. flags = re.IGNORECASE
  131. tokens = {
  132. 'root': [
  133. (r'\s+', Whitespace),
  134. (r'--.*\n?', Comment.Single),
  135. (r'/\*', Comment.Multiline, 'multiline-comments'),
  136. (r'(' + '|'.join(s.replace(" ", r"\s+")
  137. for s in DATATYPES + PSEUDO_TYPES) + r')\b',
  138. Name.Builtin),
  139. (words(KEYWORDS, suffix=r'\b'), Keyword),
  140. (r'[+*/<>=~!@#%^&|`?-]+', Operator),
  141. (r'::', Operator), # cast
  142. (r'\$\d+', Name.Variable),
  143. (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float),
  144. (r'[0-9]+', Number.Integer),
  145. (r"((?:E|U&)?)(')", bygroups(String.Affix, String.Single), 'string'),
  146. # quoted identifier
  147. (r'((?:U&)?)(")', bygroups(String.Affix, String.Name), 'quoted-ident'),
  148. (r'(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)', language_callback),
  149. (r'[a-z_]\w*', Name),
  150. # psql variable in SQL
  151. (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable),
  152. (r'[;:()\[\]{},.]', Punctuation),
  153. ],
  154. 'multiline-comments': [
  155. (r'/\*', Comment.Multiline, 'multiline-comments'),
  156. (r'\*/', Comment.Multiline, '#pop'),
  157. (r'[^/*]+', Comment.Multiline),
  158. (r'[/*]', Comment.Multiline)
  159. ],
  160. 'string': [
  161. (r"[^']+", String.Single),
  162. (r"''", String.Single),
  163. (r"'", String.Single, '#pop'),
  164. ],
  165. 'quoted-ident': [
  166. (r'[^"]+', String.Name),
  167. (r'""', String.Name),
  168. (r'"', String.Name, '#pop'),
  169. ],
  170. }
  171. class PlPgsqlLexer(PostgresBase, RegexLexer):
  172. """
  173. Handle the extra syntax in Pl/pgSQL language.
  174. .. versionadded:: 1.5
  175. """
  176. name = 'PL/pgSQL'
  177. aliases = ['plpgsql']
  178. mimetypes = ['text/x-plpgsql']
  179. flags = re.IGNORECASE
  180. tokens = {k: l[:] for (k, l) in PostgresLexer.tokens.items()}
  181. # extend the keywords list
  182. for i, pattern in enumerate(tokens['root']):
  183. if pattern[1] == Keyword:
  184. tokens['root'][i] = (
  185. words(KEYWORDS + PLPGSQL_KEYWORDS, suffix=r'\b'),
  186. Keyword)
  187. del i
  188. break
  189. else:
  190. assert 0, "SQL keywords not found"
  191. # Add specific PL/pgSQL rules (before the SQL ones)
  192. tokens['root'][:0] = [
  193. (r'\%[a-z]\w*\b', Name.Builtin), # actually, a datatype
  194. (r':=', Operator),
  195. (r'\<\<[a-z]\w*\>\>', Name.Label),
  196. (r'\#[a-z]\w*\b', Keyword.Pseudo), # #variable_conflict
  197. ]
  198. class PsqlRegexLexer(PostgresBase, RegexLexer):
  199. """
  200. Extend the PostgresLexer adding support specific for psql commands.
  201. This is not a complete psql lexer yet as it lacks prompt support
  202. and output rendering.
  203. """
  204. name = 'PostgreSQL console - regexp based lexer'
  205. aliases = [] # not public
  206. flags = re.IGNORECASE
  207. tokens = {k: l[:] for (k, l) in PostgresLexer.tokens.items()}
  208. tokens['root'].append(
  209. (r'\\[^\s]+', Keyword.Pseudo, 'psql-command'))
  210. tokens['psql-command'] = [
  211. (r'\n', Text, 'root'),
  212. (r'\s+', Whitespace),
  213. (r'\\[^\s]+', Keyword.Pseudo),
  214. (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable),
  215. (r"'(''|[^'])*'", String.Single),
  216. (r"`([^`])*`", String.Backtick),
  217. (r"[^\s]+", String.Symbol),
  218. ]
  219. re_prompt = re.compile(r'^(\S.*?)??[=\-\(\$\'\"][#>]')
  220. re_psql_command = re.compile(r'\s*\\')
  221. re_end_command = re.compile(r';\s*(--.*?)?$')
  222. re_psql_command = re.compile(r'(\s*)(\\.+?)(\s+)$')
  223. re_error = re.compile(r'(ERROR|FATAL):')
  224. re_message = re.compile(
  225. r'((?:DEBUG|INFO|NOTICE|WARNING|ERROR|'
  226. r'FATAL|HINT|DETAIL|CONTEXT|LINE [0-9]+):)(.*?\n)')
  227. class lookahead:
  228. """Wrap an iterator and allow pushing back an item."""
  229. def __init__(self, x):
  230. self.iter = iter(x)
  231. self._nextitem = None
  232. def __iter__(self):
  233. return self
  234. def send(self, i):
  235. self._nextitem = i
  236. return i
  237. def __next__(self):
  238. if self._nextitem is not None:
  239. ni = self._nextitem
  240. self._nextitem = None
  241. return ni
  242. return next(self.iter)
  243. next = __next__
  244. class PostgresConsoleLexer(Lexer):
  245. """
  246. Lexer for psql sessions.
  247. .. versionadded:: 1.5
  248. """
  249. name = 'PostgreSQL console (psql)'
  250. aliases = ['psql', 'postgresql-console', 'postgres-console']
  251. mimetypes = ['text/x-postgresql-psql']
  252. def get_tokens_unprocessed(self, data):
  253. sql = PsqlRegexLexer(**self.options)
  254. lines = lookahead(line_re.findall(data))
  255. # prompt-output cycle
  256. while 1:
  257. # consume the lines of the command: start with an optional prompt
  258. # and continue until the end of command is detected
  259. curcode = ''
  260. insertions = []
  261. for line in lines:
  262. # Identify a shell prompt in case of psql commandline example
  263. if line.startswith('$') and not curcode:
  264. lexer = get_lexer_by_name('console', **self.options)
  265. yield from lexer.get_tokens_unprocessed(line)
  266. break
  267. # Identify a psql prompt
  268. mprompt = re_prompt.match(line)
  269. if mprompt is not None:
  270. insertions.append((len(curcode),
  271. [(0, Generic.Prompt, mprompt.group())]))
  272. curcode += line[len(mprompt.group()):]
  273. else:
  274. curcode += line
  275. # Check if this is the end of the command
  276. # TODO: better handle multiline comments at the end with
  277. # a lexer with an external state?
  278. if re_psql_command.match(curcode) \
  279. or re_end_command.search(curcode):
  280. break
  281. # Emit the combined stream of command and prompt(s)
  282. yield from do_insertions(insertions,
  283. sql.get_tokens_unprocessed(curcode))
  284. # Emit the output lines
  285. out_token = Generic.Output
  286. for line in lines:
  287. mprompt = re_prompt.match(line)
  288. if mprompt is not None:
  289. # push the line back to have it processed by the prompt
  290. lines.send(line)
  291. break
  292. mmsg = re_message.match(line)
  293. if mmsg is not None:
  294. if mmsg.group(1).startswith("ERROR") \
  295. or mmsg.group(1).startswith("FATAL"):
  296. out_token = Generic.Error
  297. yield (mmsg.start(1), Generic.Strong, mmsg.group(1))
  298. yield (mmsg.start(2), out_token, mmsg.group(2))
  299. else:
  300. yield (0, out_token, line)
  301. else:
  302. return
  303. class PostgresExplainLexer(RegexLexer):
  304. """
  305. Handle PostgreSQL EXPLAIN output
  306. .. versionadded:: 2.15
  307. """
  308. name = 'PostgreSQL EXPLAIN dialect'
  309. aliases = ['postgres-explain']
  310. filenames = ['*.explain']
  311. mimetypes = ['text/x-postgresql-explain']
  312. tokens = {
  313. 'root': [
  314. (r'(:|\(|\)|ms|kB|->|\.\.|\,)', Punctuation),
  315. (r'(\s+)', Whitespace),
  316. # This match estimated cost and effectively measured counters with ANALYZE
  317. # Then, we move to instrumentation state
  318. (r'(cost)(=?)', bygroups(Name.Class, Punctuation), 'instrumentation'),
  319. (r'(actual)( )(=?)', bygroups(Name.Class, Whitespace, Punctuation), 'instrumentation'),
  320. # Misc keywords
  321. (words(('actual', 'Memory Usage', 'Memory', 'Buckets', 'Batches',
  322. 'originally', 'row', 'rows', 'Hits', 'Misses',
  323. 'Evictions', 'Overflows'), suffix=r'\b'),
  324. Comment.Single),
  325. (r'(hit|read|dirtied|written|write|time|calls)(=)', bygroups(Comment.Single, Operator)),
  326. (r'(shared|temp|local)', Keyword.Pseudo),
  327. # We move to sort state in order to emphasize specific keywords (especially disk access)
  328. (r'(Sort Method)(: )', bygroups(Comment.Preproc, Punctuation), 'sort'),
  329. # These keywords can be followed by an object, like a table
  330. (r'(Sort Key|Group Key|Presorted Key|Hash Key)(:)( )',
  331. bygroups(Comment.Preproc, Punctuation, Whitespace), 'object_name'),
  332. (r'(Cache Key|Cache Mode)(:)( )', bygroups(Comment, Punctuation, Whitespace), 'object_name'),
  333. # These keywords can be followed by a predicate
  334. (words(('Join Filter', 'Subplans Removed', 'Filter', 'Merge Cond',
  335. 'Hash Cond', 'Index Cond', 'Recheck Cond', 'Heap Blocks',
  336. 'TID Cond', 'Run Condition', 'Order By', 'Function Call',
  337. 'Table Function Call', 'Inner Unique', 'Params Evaluated',
  338. 'Single Copy', 'Sampling', 'One-Time Filter', 'Output',
  339. 'Relations', 'Remote SQL'), suffix=r'\b'),
  340. Comment.Preproc, 'predicate'),
  341. # Special keyword to handle ON CONFLICT
  342. (r'Conflict ', Comment.Preproc, 'conflict'),
  343. # Special keyword for InitPlan or SubPlan
  344. (r'(InitPlan|SubPlan)( )(\d+)( )',
  345. bygroups(Keyword, Whitespace, Number.Integer, Whitespace),
  346. 'init_plan'),
  347. (words(('Sort Method', 'Join Filter', 'Planning time',
  348. 'Planning Time', 'Execution time', 'Execution Time',
  349. 'Workers Planned', 'Workers Launched', 'Buffers',
  350. 'Planning', 'Worker', 'Query Identifier', 'Time',
  351. 'Full-sort Groups', 'Pre-sorted Groups'), suffix=r'\b'), Comment.Preproc),
  352. # Emphasize these keywords
  353. (words(('Rows Removed by Join Filter', 'Rows Removed by Filter',
  354. 'Rows Removed by Index Recheck',
  355. 'Heap Fetches', 'never executed'),
  356. suffix=r'\b'), Name.Exception),
  357. (r'(I/O Timings)(:)( )', bygroups(Name.Exception, Punctuation, Whitespace)),
  358. (words(EXPLAIN_KEYWORDS, suffix=r'\b'), Keyword),
  359. # join keywords
  360. (r'((Right|Left|Full|Semi|Anti) Join)', Keyword.Type),
  361. (r'(Parallel |Async |Finalize |Partial )', Comment.Preproc),
  362. (r'Backward', Comment.Preproc),
  363. (r'(Intersect|Except|Hash)', Comment.Preproc),
  364. (r'(CTE)( )(\w*)?', bygroups(Comment, Whitespace, Name.Variable)),
  365. # Treat "on" and "using" as a punctuation
  366. (r'(on|using)', Punctuation, 'object_name'),
  367. # strings
  368. (r"'(''|[^'])*'", String.Single),
  369. # numbers
  370. (r'\d+\.\d+', Number.Float),
  371. (r'(\d+)', Number.Integer),
  372. # boolean
  373. (r'(true|false)', Name.Constant),
  374. # explain header
  375. (r'\s*QUERY PLAN\s*\n\s*-+', Comment.Single),
  376. # Settings
  377. (r'(Settings)(:)( )', bygroups(Comment.Preproc, Punctuation, Whitespace), 'setting'),
  378. # Handle JIT counters
  379. (r'(JIT|Functions|Options|Timing)(:)', bygroups(Comment.Preproc, Punctuation)),
  380. (r'(Inlining|Optimization|Expressions|Deforming|Generation|Emission|Total)', Keyword.Pseudo),
  381. # Handle Triggers counters
  382. (r'(Trigger)( )(\S*)(:)( )',
  383. bygroups(Comment.Preproc, Whitespace, Name.Variable, Punctuation, Whitespace)),
  384. ],
  385. 'expression': [
  386. # matches any kind of parenthesized expression
  387. # the first opening paren is matched by the 'caller'
  388. (r'\(', Punctuation, '#push'),
  389. (r'\)', Punctuation, '#pop'),
  390. (r'(never executed)', Name.Exception),
  391. (r'[^)(]+', Comment),
  392. ],
  393. 'object_name': [
  394. # This is a cost or analyze measure
  395. (r'(\(cost)(=?)', bygroups(Name.Class, Punctuation), 'instrumentation'),
  396. (r'(\(actual)( )(=?)', bygroups(Name.Class, Whitespace, Punctuation), 'instrumentation'),
  397. # if object_name is parenthesized, mark opening paren as
  398. # punctuation, call 'expression', and exit state
  399. (r'\(', Punctuation, 'expression'),
  400. (r'(on)', Punctuation),
  401. # matches possibly schema-qualified table and column names
  402. (r'\w+(\.\w+)*( USING \S+| \w+ USING \S+)', Name.Variable),
  403. (r'\"?\w+\"?(?:\.\"?\w+\"?)?', Name.Variable),
  404. (r'\'\S*\'', Name.Variable),
  405. # if we encounter a comma, another object is listed
  406. (r',\n', Punctuation, 'object_name'),
  407. (r',', Punctuation, 'object_name'),
  408. # special case: "*SELECT*"
  409. (r'"\*SELECT\*( \d+)?"(.\w+)?', Name.Variable),
  410. (r'"\*VALUES\*(_\d+)?"(.\w+)?', Name.Variable),
  411. (r'"ANY_subquery"', Name.Variable),
  412. # Variable $1 ...
  413. (r'\$\d+', Name.Variable),
  414. # cast
  415. (r'::\w+', Name.Variable),
  416. (r' +', Whitespace),
  417. (r'"', Punctuation),
  418. (r'\[\.\.\.\]', Punctuation),
  419. (r'\)', Punctuation, '#pop'),
  420. ],
  421. 'predicate': [
  422. # if predicate is parenthesized, mark paren as punctuation
  423. (r'(\()([^\n]*)(\))', bygroups(Punctuation, Name.Variable, Punctuation), '#pop'),
  424. # otherwise color until newline
  425. (r'[^\n]*', Name.Variable, '#pop'),
  426. ],
  427. 'instrumentation': [
  428. (r'=|\.\.', Punctuation),
  429. (r' +', Whitespace),
  430. (r'(rows|width|time|loops)', Name.Class),
  431. (r'\d+\.\d+', Number.Float),
  432. (r'(\d+)', Number.Integer),
  433. (r'\)', Punctuation, '#pop'),
  434. ],
  435. 'conflict': [
  436. (r'(Resolution: )(\w+)', bygroups(Comment.Preproc, Name.Variable)),
  437. (r'(Arbiter \w+:)', Comment.Preproc, 'object_name'),
  438. (r'(Filter: )', Comment.Preproc, 'predicate'),
  439. ],
  440. 'setting': [
  441. (r'([a-z_]*?)(\s*)(=)(\s*)(\'.*?\')', bygroups(Name.Attribute, Whitespace, Operator, Whitespace, String)),
  442. (r'\, ', Punctuation),
  443. ],
  444. 'init_plan': [
  445. (r'\(', Punctuation),
  446. (r'returns \$\d+(,\$\d+)?', Name.Variable),
  447. (r'\)', Punctuation, '#pop'),
  448. ],
  449. 'sort': [
  450. (r':|kB', Punctuation),
  451. (r'(quicksort|top-N|heapsort|Average|Memory|Peak)', Comment.Prepoc),
  452. (r'(external|merge|Disk|sort)', Name.Exception),
  453. (r'(\d+)', Number.Integer),
  454. (r' +', Whitespace),
  455. ],
  456. }
  457. class SqlLexer(RegexLexer):
  458. """
  459. Lexer for Structured Query Language. Currently, this lexer does
  460. not recognize any special syntax except ANSI SQL.
  461. """
  462. name = 'SQL'
  463. aliases = ['sql']
  464. filenames = ['*.sql']
  465. mimetypes = ['text/x-sql']
  466. flags = re.IGNORECASE
  467. tokens = {
  468. 'root': [
  469. (r'\s+', Whitespace),
  470. (r'--.*\n?', Comment.Single),
  471. (r'/\*', Comment.Multiline, 'multiline-comments'),
  472. (words((
  473. 'ABORT', 'ABS', 'ABSOLUTE', 'ACCESS', 'ADA', 'ADD', 'ADMIN', 'AFTER',
  474. 'AGGREGATE', 'ALIAS', 'ALL', 'ALLOCATE', 'ALTER', 'ANALYSE', 'ANALYZE',
  475. 'AND', 'ANY', 'ARE', 'AS', 'ASC', 'ASENSITIVE', 'ASSERTION', 'ASSIGNMENT',
  476. 'ASYMMETRIC', 'AT', 'ATOMIC', 'AUTHORIZATION', 'AVG', 'BACKWARD',
  477. 'BEFORE', 'BEGIN', 'BETWEEN', 'BITVAR', 'BIT_LENGTH', 'BOTH', 'BREADTH',
  478. 'BY', 'C', 'CACHE', 'CALL', 'CALLED', 'CARDINALITY', 'CASCADE',
  479. 'CASCADED', 'CASE', 'CAST', 'CATALOG', 'CATALOG_NAME', 'CHAIN',
  480. 'CHARACTERISTICS', 'CHARACTER_LENGTH', 'CHARACTER_SET_CATALOG',
  481. 'CHARACTER_SET_NAME', 'CHARACTER_SET_SCHEMA', 'CHAR_LENGTH', 'CHECK',
  482. 'CHECKED', 'CHECKPOINT', 'CLASS', 'CLASS_ORIGIN', 'CLOB', 'CLOSE',
  483. 'CLUSTER', 'COALESCE', 'COBOL', 'COLLATE', 'COLLATION',
  484. 'COLLATION_CATALOG', 'COLLATION_NAME', 'COLLATION_SCHEMA', 'COLUMN',
  485. 'COLUMN_NAME', 'COMMAND_FUNCTION', 'COMMAND_FUNCTION_CODE', 'COMMENT',
  486. 'COMMIT', 'COMMITTED', 'COMPLETION', 'CONDITION_NUMBER', 'CONNECT',
  487. 'CONNECTION', 'CONNECTION_NAME', 'CONSTRAINT', 'CONSTRAINTS',
  488. 'CONSTRAINT_CATALOG', 'CONSTRAINT_NAME', 'CONSTRAINT_SCHEMA',
  489. 'CONSTRUCTOR', 'CONTAINS', 'CONTINUE', 'CONVERSION', 'CONVERT',
  490. 'COPY', 'CORRESPONDING', 'COUNT', 'CREATE', 'CREATEDB', 'CREATEUSER',
  491. 'CROSS', 'CUBE', 'CURRENT', 'CURRENT_DATE', 'CURRENT_PATH',
  492. 'CURRENT_ROLE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'CURRENT_USER',
  493. 'CURSOR', 'CURSOR_NAME', 'CYCLE', 'DATA', 'DATABASE',
  494. 'DATETIME_INTERVAL_CODE', 'DATETIME_INTERVAL_PRECISION', 'DAY',
  495. 'DEALLOCATE', 'DECLARE', 'DEFAULT', 'DEFAULTS', 'DEFERRABLE',
  496. 'DEFERRED', 'DEFINED', 'DEFINER', 'DELETE', 'DELIMITER', 'DELIMITERS',
  497. 'DEREF', 'DESC', 'DESCRIBE', 'DESCRIPTOR', 'DESTROY', 'DESTRUCTOR',
  498. 'DETERMINISTIC', 'DIAGNOSTICS', 'DICTIONARY', 'DISCONNECT', 'DISPATCH',
  499. 'DISTINCT', 'DO', 'DOMAIN', 'DROP', 'DYNAMIC', 'DYNAMIC_FUNCTION',
  500. 'DYNAMIC_FUNCTION_CODE', 'EACH', 'ELSE', 'ELSIF', 'ENCODING',
  501. 'ENCRYPTED', 'END', 'END-EXEC', 'EQUALS', 'ESCAPE', 'EVERY', 'EXCEPTION',
  502. 'EXCEPT', 'EXCLUDING', 'EXCLUSIVE', 'EXEC', 'EXECUTE', 'EXISTING',
  503. 'EXISTS', 'EXPLAIN', 'EXTERNAL', 'EXTRACT', 'FALSE', 'FETCH', 'FINAL',
  504. 'FIRST', 'FOR', 'FORCE', 'FOREIGN', 'FORTRAN', 'FORWARD', 'FOUND', 'FREE',
  505. 'FREEZE', 'FROM', 'FULL', 'FUNCTION', 'G', 'GENERAL', 'GENERATED', 'GET',
  506. 'GLOBAL', 'GO', 'GOTO', 'GRANT', 'GRANTED', 'GROUP', 'GROUPING',
  507. 'HANDLER', 'HAVING', 'HIERARCHY', 'HOLD', 'HOST', 'IDENTITY', 'IF',
  508. 'IGNORE', 'ILIKE', 'IMMEDIATE', 'IMMEDIATELY', 'IMMUTABLE', 'IMPLEMENTATION', 'IMPLICIT',
  509. 'IN', 'INCLUDING', 'INCREMENT', 'INDEX', 'INDITCATOR', 'INFIX',
  510. 'INHERITS', 'INITIALIZE', 'INITIALLY', 'INNER', 'INOUT', 'INPUT',
  511. 'INSENSITIVE', 'INSERT', 'INSTANTIABLE', 'INSTEAD', 'INTERSECT', 'INTO',
  512. 'INVOKER', 'IS', 'ISNULL', 'ISOLATION', 'ITERATE', 'JOIN', 'KEY',
  513. 'KEY_MEMBER', 'KEY_TYPE', 'LANCOMPILER', 'LANGUAGE', 'LARGE', 'LAST',
  514. 'LATERAL', 'LEADING', 'LEFT', 'LENGTH', 'LESS', 'LEVEL', 'LIKE', 'LIMIT',
  515. 'LISTEN', 'LOAD', 'LOCAL', 'LOCALTIME', 'LOCALTIMESTAMP', 'LOCATION',
  516. 'LOCATOR', 'LOCK', 'LOWER', 'MAP', 'MATCH', 'MAX', 'MAXVALUE',
  517. 'MESSAGE_LENGTH', 'MESSAGE_OCTET_LENGTH', 'MESSAGE_TEXT', 'METHOD', 'MIN',
  518. 'MINUTE', 'MINVALUE', 'MOD', 'MODE', 'MODIFIES', 'MODIFY', 'MONTH',
  519. 'MORE', 'MOVE', 'MUMPS', 'NAMES', 'NATIONAL', 'NATURAL', 'NCHAR', 'NCLOB',
  520. 'NEW', 'NEXT', 'NO', 'NOCREATEDB', 'NOCREATEUSER', 'NONE', 'NOT',
  521. 'NOTHING', 'NOTIFY', 'NOTNULL', 'NULL', 'NULLABLE', 'NULLIF', 'OBJECT',
  522. 'OCTET_LENGTH', 'OF', 'OFF', 'OFFSET', 'OIDS', 'OLD', 'ON', 'ONLY',
  523. 'OPEN', 'OPERATION', 'OPERATOR', 'OPTION', 'OPTIONS', 'OR', 'ORDER',
  524. 'ORDINALITY', 'OUT', 'OUTER', 'OUTPUT', 'OVERLAPS', 'OVERLAY',
  525. 'OVERRIDING', 'OWNER', 'PAD', 'PARAMETER', 'PARAMETERS', 'PARAMETER_MODE',
  526. 'PARAMETER_NAME', 'PARAMETER_ORDINAL_POSITION',
  527. 'PARAMETER_SPECIFIC_CATALOG', 'PARAMETER_SPECIFIC_NAME',
  528. 'PARAMETER_SPECIFIC_SCHEMA', 'PARTIAL', 'PASCAL', 'PENDANT', 'PERIOD', 'PLACING',
  529. 'PLI', 'POSITION', 'POSTFIX', 'PRECEEDS', 'PRECISION', 'PREFIX', 'PREORDER',
  530. 'PREPARE', 'PRESERVE', 'PRIMARY', 'PRIOR', 'PRIVILEGES', 'PROCEDURAL',
  531. 'PROCEDURE', 'PUBLIC', 'READ', 'READS', 'RECHECK', 'RECURSIVE', 'REF',
  532. 'REFERENCES', 'REFERENCING', 'REINDEX', 'RELATIVE', 'RENAME',
  533. 'REPEATABLE', 'REPLACE', 'RESET', 'RESTART', 'RESTRICT', 'RESULT',
  534. 'RETURN', 'RETURNED_LENGTH', 'RETURNED_OCTET_LENGTH', 'RETURNED_SQLSTATE',
  535. 'RETURNS', 'REVOKE', 'RIGHT', 'ROLE', 'ROLLBACK', 'ROLLUP', 'ROUTINE',
  536. 'ROUTINE_CATALOG', 'ROUTINE_NAME', 'ROUTINE_SCHEMA', 'ROW', 'ROWS',
  537. 'ROW_COUNT', 'RULE', 'SAVE_POINT', 'SCALE', 'SCHEMA', 'SCHEMA_NAME',
  538. 'SCOPE', 'SCROLL', 'SEARCH', 'SECOND', 'SECURITY', 'SELECT', 'SELF',
  539. 'SENSITIVE', 'SERIALIZABLE', 'SERVER_NAME', 'SESSION', 'SESSION_USER',
  540. 'SET', 'SETOF', 'SETS', 'SHARE', 'SHOW', 'SIMILAR', 'SIMPLE', 'SIZE',
  541. 'SOME', 'SOURCE', 'SPACE', 'SPECIFIC', 'SPECIFICTYPE', 'SPECIFIC_NAME',
  542. 'SQL', 'SQLCODE', 'SQLERROR', 'SQLEXCEPTION', 'SQLSTATE', 'SQLWARNINIG',
  543. 'STABLE', 'START', 'STATE', 'STATEMENT', 'STATIC', 'STATISTICS', 'STDIN',
  544. 'STDOUT', 'STORAGE', 'STRICT', 'STRUCTURE', 'STYPE', 'SUBCLASS_ORIGIN',
  545. 'SUBLIST', 'SUBSTRING', 'SUCCEEDS', 'SUM', 'SYMMETRIC', 'SYSID', 'SYSTEM',
  546. 'SYSTEM_USER', 'TABLE', 'TABLE_NAME', ' TEMP', 'TEMPLATE', 'TEMPORARY',
  547. 'TERMINATE', 'THAN', 'THEN', 'TIME', 'TIMESTAMP', 'TIMEZONE_HOUR',
  548. 'TIMEZONE_MINUTE', 'TO', 'TOAST', 'TRAILING', 'TRANSACTION',
  549. 'TRANSACTIONS_COMMITTED', 'TRANSACTIONS_ROLLED_BACK', 'TRANSACTION_ACTIVE',
  550. 'TRANSFORM', 'TRANSFORMS', 'TRANSLATE', 'TRANSLATION', 'TREAT', 'TRIGGER',
  551. 'TRIGGER_CATALOG', 'TRIGGER_NAME', 'TRIGGER_SCHEMA', 'TRIM', 'TRUE',
  552. 'TRUNCATE', 'TRUSTED', 'TYPE', 'UNCOMMITTED', 'UNDER', 'UNENCRYPTED',
  553. 'UNION', 'UNIQUE', 'UNKNOWN', 'UNLISTEN', 'UNNAMED', 'UNNEST', 'UNTIL',
  554. 'UPDATE', 'UPPER', 'USAGE', 'USER', 'USER_DEFINED_TYPE_CATALOG',
  555. 'USER_DEFINED_TYPE_NAME', 'USER_DEFINED_TYPE_SCHEMA', 'USING', 'VACUUM',
  556. 'VALID', 'VALIDATOR', 'VALUES', 'VARIABLE', 'VERBOSE',
  557. 'VERSION', 'VERSIONS', 'VERSIONING', 'VIEW',
  558. 'VOLATILE', 'WHEN', 'WHENEVER', 'WHERE', 'WITH', 'WITHOUT', 'WORK',
  559. 'WRITE', 'YEAR', 'ZONE'), suffix=r'\b'),
  560. Keyword),
  561. (words((
  562. 'ARRAY', 'BIGINT', 'BINARY', 'BIT', 'BLOB', 'BOOLEAN', 'CHAR',
  563. 'CHARACTER', 'DATE', 'DEC', 'DECIMAL', 'FLOAT', 'INT', 'INTEGER',
  564. 'INTERVAL', 'NUMBER', 'NUMERIC', 'REAL', 'SERIAL', 'SMALLINT',
  565. 'VARCHAR', 'VARYING', 'INT8', 'SERIAL8', 'TEXT'), suffix=r'\b'),
  566. Name.Builtin),
  567. (r'[+*/<>=~!@#%^&|`?-]', Operator),
  568. (r'[0-9]+', Number.Integer),
  569. # TODO: Backslash escapes?
  570. (r"'(''|[^'])*'", String.Single),
  571. (r'"(""|[^"])*"', String.Symbol), # not a real string literal in ANSI SQL
  572. (r'[a-z_][\w$]*', Name), # allow $s in strings for Oracle
  573. (r'[;:()\[\],.]', Punctuation)
  574. ],
  575. 'multiline-comments': [
  576. (r'/\*', Comment.Multiline, 'multiline-comments'),
  577. (r'\*/', Comment.Multiline, '#pop'),
  578. (r'[^/*]+', Comment.Multiline),
  579. (r'[/*]', Comment.Multiline)
  580. ]
  581. }
  582. def analyse_text(self, text):
  583. return
  584. class TransactSqlLexer(RegexLexer):
  585. """
  586. Transact-SQL (T-SQL) is Microsoft's and Sybase's proprietary extension to
  587. SQL.
  588. The list of keywords includes ODBC and keywords reserved for future use..
  589. """
  590. name = 'Transact-SQL'
  591. aliases = ['tsql', 't-sql']
  592. filenames = ['*.sql']
  593. mimetypes = ['text/x-tsql']
  594. flags = re.IGNORECASE
  595. tokens = {
  596. 'root': [
  597. (r'\s+', Whitespace),
  598. (r'--.*?$\n?', Comment.Single),
  599. (r'/\*', Comment.Multiline, 'multiline-comments'),
  600. (words(_tsql_builtins.OPERATORS), Operator),
  601. (words(_tsql_builtins.OPERATOR_WORDS, suffix=r'\b'), Operator.Word),
  602. (words(_tsql_builtins.TYPES, suffix=r'\b'), Name.Class),
  603. (words(_tsql_builtins.FUNCTIONS, suffix=r'\b'), Name.Function),
  604. (r'(goto)(\s+)(\w+\b)', bygroups(Keyword, Whitespace, Name.Label)),
  605. (words(_tsql_builtins.KEYWORDS, suffix=r'\b'), Keyword),
  606. (r'(\[)([^]]+)(\])', bygroups(Operator, Name, Operator)),
  607. (r'0x[0-9a-f]+', Number.Hex),
  608. # Float variant 1, for example: 1., 1.e2, 1.2e3
  609. (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float),
  610. # Float variant 2, for example: .1, .1e2
  611. (r'\.[0-9]+(e[+-]?[0-9]+)?', Number.Float),
  612. # Float variant 3, for example: 123e45
  613. (r'[0-9]+e[+-]?[0-9]+', Number.Float),
  614. (r'[0-9]+', Number.Integer),
  615. (r"'(''|[^'])*'", String.Single),
  616. (r'"(""|[^"])*"', String.Symbol),
  617. (r'[;(),.]', Punctuation),
  618. # Below we use \w even for the first "real" character because
  619. # tokens starting with a digit have already been recognized
  620. # as Number above.
  621. (r'@@\w+', Name.Builtin),
  622. (r'@\w+', Name.Variable),
  623. (r'(\w+)(:)', bygroups(Name.Label, Punctuation)),
  624. (r'#?#?\w+', Name), # names for temp tables and anything else
  625. (r'\?', Name.Variable.Magic), # parameter for prepared statements
  626. ],
  627. 'multiline-comments': [
  628. (r'/\*', Comment.Multiline, 'multiline-comments'),
  629. (r'\*/', Comment.Multiline, '#pop'),
  630. (r'[^/*]+', Comment.Multiline),
  631. (r'[/*]', Comment.Multiline)
  632. ]
  633. }
  634. def analyse_text(text):
  635. rating = 0
  636. if tsql_declare_re.search(text):
  637. # Found T-SQL variable declaration.
  638. rating = 1.0
  639. else:
  640. name_between_backtick_count = len(
  641. name_between_backtick_re.findall(text))
  642. name_between_bracket_count = len(
  643. name_between_bracket_re.findall(text))
  644. # We need to check if there are any names using
  645. # backticks or brackets, as otherwise both are 0
  646. # and 0 >= 2 * 0, so we would always assume it's true
  647. dialect_name_count = name_between_backtick_count + name_between_bracket_count
  648. if dialect_name_count >= 1 and \
  649. name_between_bracket_count >= 2 * name_between_backtick_count:
  650. # Found at least twice as many [name] as `name`.
  651. rating += 0.5
  652. elif name_between_bracket_count > name_between_backtick_count:
  653. rating += 0.2
  654. elif name_between_bracket_count > 0:
  655. rating += 0.1
  656. if tsql_variable_re.search(text) is not None:
  657. rating += 0.1
  658. if tsql_go_re.search(text) is not None:
  659. rating += 0.1
  660. return rating
  661. class MySqlLexer(RegexLexer):
  662. """The Oracle MySQL lexer.
  663. This lexer does not attempt to maintain strict compatibility with
  664. MariaDB syntax or keywords. Although MySQL and MariaDB's common code
  665. history suggests there may be significant overlap between the two,
  666. compatibility between the two is not a target for this lexer.
  667. """
  668. name = 'MySQL'
  669. aliases = ['mysql']
  670. mimetypes = ['text/x-mysql']
  671. flags = re.IGNORECASE
  672. tokens = {
  673. 'root': [
  674. (r'\s+', Whitespace),
  675. # Comments
  676. (r'(?:#|--\s+).*', Comment.Single),
  677. (r'/\*\+', Comment.Special, 'optimizer-hints'),
  678. (r'/\*', Comment.Multiline, 'multiline-comment'),
  679. # Hexadecimal literals
  680. (r"x'([0-9a-f]{2})+'", Number.Hex), # MySQL requires paired hex characters in this form.
  681. (r'0x[0-9a-f]+', Number.Hex),
  682. # Binary literals
  683. (r"b'[01]+'", Number.Bin),
  684. (r'0b[01]+', Number.Bin),
  685. # Numeric literals
  686. (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), # Mandatory integer, optional fraction and exponent
  687. (r'[0-9]*\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Mandatory fraction, optional integer and exponent
  688. (r'[0-9]+e[+-]?[0-9]+', Number.Float), # Exponents with integer significands are still floats
  689. (r'[0-9]+(?=[^0-9a-z$_\u0080-\uffff])', Number.Integer), # Integers that are not in a schema object name
  690. # Date literals
  691. (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}",
  692. Literal.Date),
  693. # Time literals
  694. (r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}",
  695. Literal.Date),
  696. # Timestamp literals
  697. (
  698. r"\{\s*ts\s*(?P<quote>['\"])\s*"
  699. r"\d{2}(?:\d{2})?.?\d{2}.?\d{2}" # Date part
  700. r"\s+" # Whitespace between date and time
  701. r"\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?" # Time part
  702. r"\s*(?P=quote)\s*\}",
  703. Literal.Date
  704. ),
  705. # String literals
  706. (r"'", String.Single, 'single-quoted-string'),
  707. (r'"', String.Double, 'double-quoted-string'),
  708. # Variables
  709. (r'@@(?:global\.|persist\.|persist_only\.|session\.)?[a-z_]+', Name.Variable),
  710. (r'@[a-z0-9_$.]+', Name.Variable),
  711. (r"@'", Name.Variable, 'single-quoted-variable'),
  712. (r'@"', Name.Variable, 'double-quoted-variable'),
  713. (r"@`", Name.Variable, 'backtick-quoted-variable'),
  714. (r'\?', Name.Variable), # For demonstrating prepared statements
  715. # Operators
  716. (r'[!%&*+/:<=>^|~-]+', Operator),
  717. # Exceptions; these words tokenize differently in different contexts.
  718. (r'\b(set)(?!\s*\()', Keyword),
  719. (r'\b(character)(\s+)(set)\b', bygroups(Keyword, Whitespace, Keyword)),
  720. # In all other known cases, "SET" is tokenized by MYSQL_DATATYPES.
  721. (words(MYSQL_CONSTANTS, prefix=r'\b', suffix=r'\b'), Name.Constant),
  722. (words(MYSQL_DATATYPES, prefix=r'\b', suffix=r'\b'), Keyword.Type),
  723. (words(MYSQL_KEYWORDS, prefix=r'\b', suffix=r'\b'), Keyword),
  724. (words(MYSQL_FUNCTIONS, prefix=r'\b', suffix=r'\b(\s*)(\()'),
  725. bygroups(Name.Function, Whitespace, Punctuation)),
  726. # Schema object names
  727. #
  728. # Note: Although the first regex supports unquoted all-numeric
  729. # identifiers, this will not be a problem in practice because
  730. # numeric literals have already been handled above.
  731. #
  732. ('[0-9a-z$_\u0080-\uffff]+', Name),
  733. (r'`', Name.Quoted, 'schema-object-name'),
  734. # Punctuation
  735. (r'[(),.;]', Punctuation),
  736. ],
  737. # Multiline comment substates
  738. # ---------------------------
  739. 'optimizer-hints': [
  740. (r'[^*a-z]+', Comment.Special),
  741. (r'\*/', Comment.Special, '#pop'),
  742. (words(MYSQL_OPTIMIZER_HINTS, suffix=r'\b'), Comment.Preproc),
  743. ('[a-z]+', Comment.Special),
  744. (r'\*', Comment.Special),
  745. ],
  746. 'multiline-comment': [
  747. (r'[^*]+', Comment.Multiline),
  748. (r'\*/', Comment.Multiline, '#pop'),
  749. (r'\*', Comment.Multiline),
  750. ],
  751. # String substates
  752. # ----------------
  753. 'single-quoted-string': [
  754. (r"[^'\\]+", String.Single),
  755. (r"''", String.Escape),
  756. (r"""\\[0'"bnrtZ\\%_]""", String.Escape),
  757. (r"'", String.Single, '#pop'),
  758. ],
  759. 'double-quoted-string': [
  760. (r'[^"\\]+', String.Double),
  761. (r'""', String.Escape),
  762. (r"""\\[0'"bnrtZ\\%_]""", String.Escape),
  763. (r'"', String.Double, '#pop'),
  764. ],
  765. # Variable substates
  766. # ------------------
  767. 'single-quoted-variable': [
  768. (r"[^']+", Name.Variable),
  769. (r"''", Name.Variable),
  770. (r"'", Name.Variable, '#pop'),
  771. ],
  772. 'double-quoted-variable': [
  773. (r'[^"]+', Name.Variable),
  774. (r'""', Name.Variable),
  775. (r'"', Name.Variable, '#pop'),
  776. ],
  777. 'backtick-quoted-variable': [
  778. (r'[^`]+', Name.Variable),
  779. (r'``', Name.Variable),
  780. (r'`', Name.Variable, '#pop'),
  781. ],
  782. # Schema object name substates
  783. # ----------------------------
  784. #
  785. # "Name.Quoted" and "Name.Quoted.Escape" are non-standard but
  786. # formatters will style them as "Name" by default but add
  787. # additional styles based on the token name. This gives users
  788. # flexibility to add custom styles as desired.
  789. #
  790. 'schema-object-name': [
  791. (r'[^`]+', Name.Quoted),
  792. (r'``', Name.Quoted.Escape),
  793. (r'`', Name.Quoted, '#pop'),
  794. ],
  795. }
  796. def analyse_text(text):
  797. rating = 0
  798. name_between_backtick_count = len(
  799. name_between_backtick_re.findall(text))
  800. name_between_bracket_count = len(
  801. name_between_bracket_re.findall(text))
  802. # Same logic as above in the TSQL analysis
  803. dialect_name_count = name_between_backtick_count + name_between_bracket_count
  804. if dialect_name_count >= 1 and \
  805. name_between_backtick_count >= 2 * name_between_bracket_count:
  806. # Found at least twice as many `name` as [name].
  807. rating += 0.5
  808. elif name_between_backtick_count > name_between_bracket_count:
  809. rating += 0.2
  810. elif name_between_backtick_count > 0:
  811. rating += 0.1
  812. return rating
  813. class SqliteConsoleLexer(Lexer):
  814. """
  815. Lexer for example sessions using sqlite3.
  816. .. versionadded:: 0.11
  817. """
  818. name = 'sqlite3con'
  819. aliases = ['sqlite3']
  820. filenames = ['*.sqlite3-console']
  821. mimetypes = ['text/x-sqlite3-console']
  822. def get_tokens_unprocessed(self, data):
  823. sql = SqlLexer(**self.options)
  824. curcode = ''
  825. insertions = []
  826. for match in line_re.finditer(data):
  827. line = match.group()
  828. prompt_match = sqlite_prompt_re.match(line)
  829. if prompt_match is not None:
  830. insertions.append((len(curcode),
  831. [(0, Generic.Prompt, line[:7])]))
  832. insertions.append((len(curcode),
  833. [(7, Whitespace, ' ')]))
  834. curcode += line[8:]
  835. else:
  836. if curcode:
  837. yield from do_insertions(insertions,
  838. sql.get_tokens_unprocessed(curcode))
  839. curcode = ''
  840. insertions = []
  841. if line.startswith('SQL error: '):
  842. yield (match.start(), Generic.Traceback, line)
  843. else:
  844. yield (match.start(), Generic.Output, line)
  845. if curcode:
  846. yield from do_insertions(insertions,
  847. sql.get_tokens_unprocessed(curcode))
  848. class RqlLexer(RegexLexer):
  849. """
  850. Lexer for Relation Query Language.
  851. .. versionadded:: 2.0
  852. """
  853. name = 'RQL'
  854. url = 'http://www.logilab.org/project/rql'
  855. aliases = ['rql']
  856. filenames = ['*.rql']
  857. mimetypes = ['text/x-rql']
  858. flags = re.IGNORECASE
  859. tokens = {
  860. 'root': [
  861. (r'\s+', Whitespace),
  862. (r'(DELETE|SET|INSERT|UNION|DISTINCT|WITH|WHERE|BEING|OR'
  863. r'|AND|NOT|GROUPBY|HAVING|ORDERBY|ASC|DESC|LIMIT|OFFSET'
  864. r'|TODAY|NOW|TRUE|FALSE|NULL|EXISTS)\b', Keyword),
  865. (r'[+*/<>=%-]', Operator),
  866. (r'(Any|is|instance_of|CWEType|CWRelation)\b', Name.Builtin),
  867. (r'[0-9]+', Number.Integer),
  868. (r'[A-Z_]\w*\??', Name),
  869. (r"'(''|[^'])*'", String.Single),
  870. (r'"(""|[^"])*"', String.Single),
  871. (r'[;:()\[\],.]', Punctuation)
  872. ],
  873. }