sql.py 42 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033
  1. """
  2. pygments.lexers.sql
  3. ~~~~~~~~~~~~~~~~~~~
  4. Lexers for various SQL dialects and related interactive sessions.
  5. Postgres specific lexers:
  6. `PostgresLexer`
  7. A SQL lexer for the PostgreSQL dialect. Differences w.r.t. the SQL
  8. lexer are:
  9. - keywords and data types list parsed from the PG docs (run the
  10. `_postgres_builtins` module to update them);
  11. - Content of $-strings parsed using a specific lexer, e.g. the content
  12. of a PL/Python function is parsed using the Python lexer;
  13. - parse PG specific constructs: E-strings, $-strings, U&-strings,
  14. different operators and punctuation.
  15. `PlPgsqlLexer`
  16. A lexer for the PL/pgSQL language. Adds a few specific construct on
  17. top of the PG SQL lexer (such as <<label>>).
  18. `PostgresConsoleLexer`
  19. A lexer to highlight an interactive psql session:
  20. - identifies the prompt and does its best to detect the end of command
  21. in multiline statement where not all the lines are prefixed by a
  22. prompt, telling them apart from the output;
  23. - highlights errors in the output and notification levels;
  24. - handles psql backslash commands.
  25. `PostgresExplainLexer`
  26. A lexer to highlight Postgres execution plan.
  27. The ``tests/examplefiles`` contains a few test files with data to be
  28. parsed by these lexers.
  29. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  30. :license: BSD, see LICENSE for details.
  31. """
  32. import re
  33. from pygments.lexer import Lexer, RegexLexer, do_insertions, bygroups, words
  34. from pygments.token import Punctuation, Whitespace, Text, Comment, Operator, \
  35. Keyword, Name, String, Number, Generic, Literal
  36. from pygments.lexers import get_lexer_by_name, ClassNotFound
  37. from pygments.lexers._postgres_builtins import KEYWORDS, DATATYPES, \
  38. PSEUDO_TYPES, PLPGSQL_KEYWORDS, EXPLAIN_KEYWORDS
  39. from pygments.lexers._mysql_builtins import \
  40. MYSQL_CONSTANTS, \
  41. MYSQL_DATATYPES, \
  42. MYSQL_FUNCTIONS, \
  43. MYSQL_KEYWORDS, \
  44. MYSQL_OPTIMIZER_HINTS
  45. from pygments.lexers import _tsql_builtins
  46. __all__ = ['PostgresLexer', 'PlPgsqlLexer', 'PostgresConsoleLexer',
  47. 'PostgresExplainLexer', 'SqlLexer', 'TransactSqlLexer',
  48. 'MySqlLexer', 'SqliteConsoleLexer', 'RqlLexer']
  49. line_re = re.compile('.*?\n')
  50. sqlite_prompt_re = re.compile(r'^(?:sqlite| ...)>(?= )')
  51. language_re = re.compile(r"\s+LANGUAGE\s+'?(\w+)'?", re.IGNORECASE)
  52. do_re = re.compile(r'\bDO\b', re.IGNORECASE)
  53. # Regular expressions for analyse_text()
  54. name_between_bracket_re = re.compile(r'\[[a-zA-Z_]\w*\]')
  55. name_between_backtick_re = re.compile(r'`[a-zA-Z_]\w*`')
  56. tsql_go_re = re.compile(r'\bgo\b', re.IGNORECASE)
  57. tsql_declare_re = re.compile(r'\bdeclare\s+@', re.IGNORECASE)
  58. tsql_variable_re = re.compile(r'@[a-zA-Z_]\w*\b')
  59. def language_callback(lexer, match):
  60. """Parse the content of a $-string using a lexer
  61. The lexer is chosen looking for a nearby LANGUAGE or assumed as
  62. plpgsql if inside a DO statement and no LANGUAGE has been found.
  63. """
  64. lx = None
  65. m = language_re.match(lexer.text[match.end():match.end()+100])
  66. if m is not None:
  67. lx = lexer._get_lexer(m.group(1))
  68. else:
  69. m = list(language_re.finditer(
  70. lexer.text[max(0, match.start()-100):match.start()]))
  71. if m:
  72. lx = lexer._get_lexer(m[-1].group(1))
  73. else:
  74. m = list(do_re.finditer(
  75. lexer.text[max(0, match.start()-25):match.start()]))
  76. if m:
  77. lx = lexer._get_lexer('plpgsql')
  78. # 1 = $, 2 = delimiter, 3 = $
  79. yield (match.start(1), String, match.group(1))
  80. yield (match.start(2), String.Delimiter, match.group(2))
  81. yield (match.start(3), String, match.group(3))
  82. # 4 = string contents
  83. if lx:
  84. yield from lx.get_tokens_unprocessed(match.group(4))
  85. else:
  86. yield (match.start(4), String, match.group(4))
  87. # 5 = $, 6 = delimiter, 7 = $
  88. yield (match.start(5), String, match.group(5))
  89. yield (match.start(6), String.Delimiter, match.group(6))
  90. yield (match.start(7), String, match.group(7))
  91. class PostgresBase:
  92. """Base class for Postgres-related lexers.
  93. This is implemented as a mixin to avoid the Lexer metaclass kicking in.
  94. this way the different lexer don't have a common Lexer ancestor. If they
  95. had, _tokens could be created on this ancestor and not updated for the
  96. other classes, resulting e.g. in PL/pgSQL parsed as SQL. This shortcoming
  97. seem to suggest that regexp lexers are not really subclassable.
  98. """
  99. def get_tokens_unprocessed(self, text, *args):
  100. # Have a copy of the entire text to be used by `language_callback`.
  101. self.text = text
  102. yield from super().get_tokens_unprocessed(text, *args)
  103. def _get_lexer(self, lang):
  104. if lang.lower() == 'sql':
  105. return get_lexer_by_name('postgresql', **self.options)
  106. tries = [lang]
  107. if lang.startswith('pl'):
  108. tries.append(lang[2:])
  109. if lang.endswith('u'):
  110. tries.append(lang[:-1])
  111. if lang.startswith('pl') and lang.endswith('u'):
  112. tries.append(lang[2:-1])
  113. for lx in tries:
  114. try:
  115. return get_lexer_by_name(lx, **self.options)
  116. except ClassNotFound:
  117. pass
  118. else:
  119. # TODO: better logging
  120. # print >>sys.stderr, "language not found:", lang
  121. return None
  122. class PostgresLexer(PostgresBase, RegexLexer):
  123. """
  124. Lexer for the PostgreSQL dialect of SQL.
  125. """
  126. name = 'PostgreSQL SQL dialect'
  127. aliases = ['postgresql', 'postgres']
  128. mimetypes = ['text/x-postgresql']
  129. url = 'https://www.postgresql.org'
  130. version_added = '1.5'
  131. flags = re.IGNORECASE
  132. tokens = {
  133. 'root': [
  134. (r'\s+', Whitespace),
  135. (r'--.*\n?', Comment.Single),
  136. (r'/\*', Comment.Multiline, 'multiline-comments'),
  137. (r'(' + '|'.join(s.replace(" ", r"\s+")
  138. for s in DATATYPES + PSEUDO_TYPES) + r')\b',
  139. Name.Builtin),
  140. (words(KEYWORDS, suffix=r'\b'), Keyword),
  141. (r'[+*/<>=~!@#%^&|`?-]+', Operator),
  142. (r'::', Operator), # cast
  143. (r'\$\d+', Name.Variable),
  144. (r'([0-9]*\.[0-9]*|[0-9]+)(e[+-]?[0-9]+)?', Number.Float),
  145. (r'[0-9]+', Number.Integer),
  146. (r"((?:E|U&)?)(')", bygroups(String.Affix, String.Single), 'string'),
  147. # quoted identifier
  148. (r'((?:U&)?)(")', bygroups(String.Affix, String.Name), 'quoted-ident'),
  149. (r'(?s)(\$)([^$]*)(\$)(.*?)(\$)(\2)(\$)', language_callback),
  150. (r'[a-z_]\w*', Name),
  151. # psql variable in SQL
  152. (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable),
  153. (r'[;:()\[\]{},.]', Punctuation),
  154. ],
  155. 'multiline-comments': [
  156. (r'/\*', Comment.Multiline, 'multiline-comments'),
  157. (r'\*/', Comment.Multiline, '#pop'),
  158. (r'[^/*]+', Comment.Multiline),
  159. (r'[/*]', Comment.Multiline)
  160. ],
  161. 'string': [
  162. (r"[^']+", String.Single),
  163. (r"''", String.Single),
  164. (r"'", String.Single, '#pop'),
  165. ],
  166. 'quoted-ident': [
  167. (r'[^"]+', String.Name),
  168. (r'""', String.Name),
  169. (r'"', String.Name, '#pop'),
  170. ],
  171. }
  172. class PlPgsqlLexer(PostgresBase, RegexLexer):
  173. """
  174. Handle the extra syntax in Pl/pgSQL language.
  175. """
  176. name = 'PL/pgSQL'
  177. aliases = ['plpgsql']
  178. mimetypes = ['text/x-plpgsql']
  179. url = 'https://www.postgresql.org/docs/current/plpgsql.html'
  180. version_added = '1.5'
  181. flags = re.IGNORECASE
  182. # FIXME: use inheritance
  183. tokens = {name: state[:] for (name, state) in PostgresLexer.tokens.items()}
  184. # extend the keywords list
  185. for i, pattern in enumerate(tokens['root']):
  186. if pattern[1] == Keyword:
  187. tokens['root'][i] = (
  188. words(KEYWORDS + PLPGSQL_KEYWORDS, suffix=r'\b'),
  189. Keyword)
  190. del i
  191. break
  192. else:
  193. assert 0, "SQL keywords not found"
  194. # Add specific PL/pgSQL rules (before the SQL ones)
  195. tokens['root'][:0] = [
  196. (r'\%[a-z]\w*\b', Name.Builtin), # actually, a datatype
  197. (r':=', Operator),
  198. (r'\<\<[a-z]\w*\>\>', Name.Label),
  199. (r'\#[a-z]\w*\b', Keyword.Pseudo), # #variable_conflict
  200. ]
  201. class PsqlRegexLexer(PostgresBase, RegexLexer):
  202. """
  203. Extend the PostgresLexer adding support specific for psql commands.
  204. This is not a complete psql lexer yet as it lacks prompt support
  205. and output rendering.
  206. """
  207. name = 'PostgreSQL console - regexp based lexer'
  208. aliases = [] # not public
  209. flags = re.IGNORECASE
  210. tokens = {name: state[:] for (name, state) in PostgresLexer.tokens.items()}
  211. tokens['root'].append(
  212. (r'\\[^\s]+', Keyword.Pseudo, 'psql-command'))
  213. tokens['psql-command'] = [
  214. (r'\n', Text, 'root'),
  215. (r'\s+', Whitespace),
  216. (r'\\[^\s]+', Keyword.Pseudo),
  217. (r""":(['"]?)[a-z]\w*\b\1""", Name.Variable),
  218. (r"'(''|[^'])*'", String.Single),
  219. (r"`([^`])*`", String.Backtick),
  220. (r"[^\s]+", String.Symbol),
  221. ]
  222. re_prompt = re.compile(r'^(\S.*?)??[=\-\(\$\'\"][#>]')
  223. re_psql_command = re.compile(r'\s*\\')
  224. re_end_command = re.compile(r';\s*(--.*?)?$')
  225. re_psql_command = re.compile(r'(\s*)(\\.+?)(\s+)$')
  226. re_error = re.compile(r'(ERROR|FATAL):')
  227. re_message = re.compile(
  228. r'((?:DEBUG|INFO|NOTICE|WARNING|ERROR|'
  229. r'FATAL|HINT|DETAIL|CONTEXT|LINE [0-9]+):)(.*?\n)')
  230. class lookahead:
  231. """Wrap an iterator and allow pushing back an item."""
  232. def __init__(self, x):
  233. self.iter = iter(x)
  234. self._nextitem = None
  235. def __iter__(self):
  236. return self
  237. def send(self, i):
  238. self._nextitem = i
  239. return i
  240. def __next__(self):
  241. if self._nextitem is not None:
  242. ni = self._nextitem
  243. self._nextitem = None
  244. return ni
  245. return next(self.iter)
  246. next = __next__
  247. class PostgresConsoleLexer(Lexer):
  248. """
  249. Lexer for psql sessions.
  250. """
  251. name = 'PostgreSQL console (psql)'
  252. aliases = ['psql', 'postgresql-console', 'postgres-console']
  253. mimetypes = ['text/x-postgresql-psql']
  254. url = 'https://www.postgresql.org'
  255. version_added = '1.5'
  256. def get_tokens_unprocessed(self, data):
  257. sql = PsqlRegexLexer(**self.options)
  258. lines = lookahead(line_re.findall(data))
  259. # prompt-output cycle
  260. while 1:
  261. # consume the lines of the command: start with an optional prompt
  262. # and continue until the end of command is detected
  263. curcode = ''
  264. insertions = []
  265. for line in lines:
  266. # Identify a shell prompt in case of psql commandline example
  267. if line.startswith('$') and not curcode:
  268. lexer = get_lexer_by_name('console', **self.options)
  269. yield from lexer.get_tokens_unprocessed(line)
  270. break
  271. # Identify a psql prompt
  272. mprompt = re_prompt.match(line)
  273. if mprompt is not None:
  274. insertions.append((len(curcode),
  275. [(0, Generic.Prompt, mprompt.group())]))
  276. curcode += line[len(mprompt.group()):]
  277. else:
  278. curcode += line
  279. # Check if this is the end of the command
  280. # TODO: better handle multiline comments at the end with
  281. # a lexer with an external state?
  282. if re_psql_command.match(curcode) \
  283. or re_end_command.search(curcode):
  284. break
  285. # Emit the combined stream of command and prompt(s)
  286. yield from do_insertions(insertions,
  287. sql.get_tokens_unprocessed(curcode))
  288. # Emit the output lines
  289. out_token = Generic.Output
  290. for line in lines:
  291. mprompt = re_prompt.match(line)
  292. if mprompt is not None:
  293. # push the line back to have it processed by the prompt
  294. lines.send(line)
  295. break
  296. mmsg = re_message.match(line)
  297. if mmsg is not None:
  298. if mmsg.group(1).startswith("ERROR") \
  299. or mmsg.group(1).startswith("FATAL"):
  300. out_token = Generic.Error
  301. yield (mmsg.start(1), Generic.Strong, mmsg.group(1))
  302. yield (mmsg.start(2), out_token, mmsg.group(2))
  303. else:
  304. yield (0, out_token, line)
  305. else:
  306. return
  307. class PostgresExplainLexer(RegexLexer):
  308. """
  309. Handle PostgreSQL EXPLAIN output
  310. """
  311. name = 'PostgreSQL EXPLAIN dialect'
  312. aliases = ['postgres-explain']
  313. filenames = ['*.explain']
  314. mimetypes = ['text/x-postgresql-explain']
  315. url = 'https://www.postgresql.org/docs/current/using-explain.html'
  316. version_added = '2.15'
  317. tokens = {
  318. 'root': [
  319. (r'(:|\(|\)|ms|kB|->|\.\.|\,)', Punctuation),
  320. (r'(\s+)', Whitespace),
  321. # This match estimated cost and effectively measured counters with ANALYZE
  322. # Then, we move to instrumentation state
  323. (r'(cost)(=?)', bygroups(Name.Class, Punctuation), 'instrumentation'),
  324. (r'(actual)( )(=?)', bygroups(Name.Class, Whitespace, Punctuation), 'instrumentation'),
  325. # Misc keywords
  326. (words(('actual', 'Memory Usage', 'Memory', 'Buckets', 'Batches',
  327. 'originally', 'row', 'rows', 'Hits', 'Misses',
  328. 'Evictions', 'Overflows'), suffix=r'\b'),
  329. Comment.Single),
  330. (r'(hit|read|dirtied|written|write|time|calls)(=)', bygroups(Comment.Single, Operator)),
  331. (r'(shared|temp|local)', Keyword.Pseudo),
  332. # We move to sort state in order to emphasize specific keywords (especially disk access)
  333. (r'(Sort Method)(: )', bygroups(Comment.Preproc, Punctuation), 'sort'),
  334. # These keywords can be followed by an object, like a table
  335. (r'(Sort Key|Group Key|Presorted Key|Hash Key)(:)( )',
  336. bygroups(Comment.Preproc, Punctuation, Whitespace), 'object_name'),
  337. (r'(Cache Key|Cache Mode)(:)( )', bygroups(Comment, Punctuation, Whitespace), 'object_name'),
  338. # These keywords can be followed by a predicate
  339. (words(('Join Filter', 'Subplans Removed', 'Filter', 'Merge Cond',
  340. 'Hash Cond', 'Index Cond', 'Recheck Cond', 'Heap Blocks',
  341. 'TID Cond', 'Run Condition', 'Order By', 'Function Call',
  342. 'Table Function Call', 'Inner Unique', 'Params Evaluated',
  343. 'Single Copy', 'Sampling', 'One-Time Filter', 'Output',
  344. 'Relations', 'Remote SQL'), suffix=r'\b'),
  345. Comment.Preproc, 'predicate'),
  346. # Special keyword to handle ON CONFLICT
  347. (r'Conflict ', Comment.Preproc, 'conflict'),
  348. # Special keyword for InitPlan or SubPlan
  349. (r'(InitPlan|SubPlan)( )(\d+)( )',
  350. bygroups(Keyword, Whitespace, Number.Integer, Whitespace),
  351. 'init_plan'),
  352. (words(('Sort Method', 'Join Filter', 'Planning time',
  353. 'Planning Time', 'Execution time', 'Execution Time',
  354. 'Workers Planned', 'Workers Launched', 'Buffers',
  355. 'Planning', 'Worker', 'Query Identifier', 'Time',
  356. 'Full-sort Groups', 'Pre-sorted Groups'), suffix=r'\b'), Comment.Preproc),
  357. # Emphasize these keywords
  358. (words(('Rows Removed by Join Filter', 'Rows Removed by Filter',
  359. 'Rows Removed by Index Recheck',
  360. 'Heap Fetches', 'never executed'),
  361. suffix=r'\b'), Name.Exception),
  362. (r'(I/O Timings)(:)( )', bygroups(Name.Exception, Punctuation, Whitespace)),
  363. (words(EXPLAIN_KEYWORDS, suffix=r'\b'), Keyword),
  364. # join keywords
  365. (r'((Right|Left|Full|Semi|Anti) Join)', Keyword.Type),
  366. (r'(Parallel |Async |Finalize |Partial )', Comment.Preproc),
  367. (r'Backward', Comment.Preproc),
  368. (r'(Intersect|Except|Hash)', Comment.Preproc),
  369. (r'(CTE)( )(\w*)?', bygroups(Comment, Whitespace, Name.Variable)),
  370. # Treat "on" and "using" as a punctuation
  371. (r'(on|using)', Punctuation, 'object_name'),
  372. # strings
  373. (r"'(''|[^'])*'", String.Single),
  374. # numbers
  375. (r'-?\d+\.\d+', Number.Float),
  376. (r'(-?\d+)', Number.Integer),
  377. # boolean
  378. (r'(true|false)', Name.Constant),
  379. # explain header
  380. (r'\s*QUERY PLAN\s*\n\s*-+', Comment.Single),
  381. # Settings
  382. (r'(Settings)(:)( )', bygroups(Comment.Preproc, Punctuation, Whitespace), 'setting'),
  383. # Handle JIT counters
  384. (r'(JIT|Functions|Options|Timing)(:)', bygroups(Comment.Preproc, Punctuation)),
  385. (r'(Inlining|Optimization|Expressions|Deforming|Generation|Emission|Total)', Keyword.Pseudo),
  386. # Handle Triggers counters
  387. (r'(Trigger)( )(\S*)(:)( )',
  388. bygroups(Comment.Preproc, Whitespace, Name.Variable, Punctuation, Whitespace)),
  389. ],
  390. 'expression': [
  391. # matches any kind of parenthesized expression
  392. # the first opening paren is matched by the 'caller'
  393. (r'\(', Punctuation, '#push'),
  394. (r'\)', Punctuation, '#pop'),
  395. (r'(never executed)', Name.Exception),
  396. (r'[^)(]+', Comment),
  397. ],
  398. 'object_name': [
  399. # This is a cost or analyze measure
  400. (r'(\(cost)(=?)', bygroups(Name.Class, Punctuation), 'instrumentation'),
  401. (r'(\(actual)( )(=?)', bygroups(Name.Class, Whitespace, Punctuation), 'instrumentation'),
  402. # if object_name is parenthesized, mark opening paren as
  403. # punctuation, call 'expression', and exit state
  404. (r'\(', Punctuation, 'expression'),
  405. (r'(on)', Punctuation),
  406. # matches possibly schema-qualified table and column names
  407. (r'\w+(\.\w+)*( USING \S+| \w+ USING \S+)', Name.Variable),
  408. (r'\"?\w+\"?(?:\.\"?\w+\"?)?', Name.Variable),
  409. (r'\'\S*\'', Name.Variable),
  410. # if we encounter a comma, another object is listed
  411. (r',\n', Punctuation, 'object_name'),
  412. (r',', Punctuation, 'object_name'),
  413. # special case: "*SELECT*"
  414. (r'"\*SELECT\*( \d+)?"(.\w+)?', Name.Variable),
  415. (r'"\*VALUES\*(_\d+)?"(.\w+)?', Name.Variable),
  416. (r'"ANY_subquery"', Name.Variable),
  417. # Variable $1 ...
  418. (r'\$\d+', Name.Variable),
  419. # cast
  420. (r'::\w+', Name.Variable),
  421. (r' +', Whitespace),
  422. (r'"', Punctuation),
  423. (r'\[\.\.\.\]', Punctuation),
  424. (r'\)', Punctuation, '#pop'),
  425. ],
  426. 'predicate': [
  427. # if predicate is parenthesized, mark paren as punctuation
  428. (r'(\()([^\n]*)(\))', bygroups(Punctuation, Name.Variable, Punctuation), '#pop'),
  429. # otherwise color until newline
  430. (r'[^\n]*', Name.Variable, '#pop'),
  431. ],
  432. 'instrumentation': [
  433. (r'=|\.\.', Punctuation),
  434. (r' +', Whitespace),
  435. (r'(rows|width|time|loops)', Name.Class),
  436. (r'\d+\.\d+', Number.Float),
  437. (r'(\d+)', Number.Integer),
  438. (r'\)', Punctuation, '#pop'),
  439. ],
  440. 'conflict': [
  441. (r'(Resolution: )(\w+)', bygroups(Comment.Preproc, Name.Variable)),
  442. (r'(Arbiter \w+:)', Comment.Preproc, 'object_name'),
  443. (r'(Filter: )', Comment.Preproc, 'predicate'),
  444. ],
  445. 'setting': [
  446. (r'([a-z_]*?)(\s*)(=)(\s*)(\'.*?\')', bygroups(Name.Attribute, Whitespace, Operator, Whitespace, String)),
  447. (r'\, ', Punctuation),
  448. ],
  449. 'init_plan': [
  450. (r'\(', Punctuation),
  451. (r'returns \$\d+(,\$\d+)?', Name.Variable),
  452. (r'\)', Punctuation, '#pop'),
  453. ],
  454. 'sort': [
  455. (r':|kB', Punctuation),
  456. (r'(quicksort|top-N|heapsort|Average|Memory|Peak)', Comment.Prepoc),
  457. (r'(external|merge|Disk|sort)', Name.Exception),
  458. (r'(\d+)', Number.Integer),
  459. (r' +', Whitespace),
  460. ],
  461. }
  462. class SqlLexer(RegexLexer):
  463. """
  464. Lexer for Structured Query Language. Currently, this lexer does
  465. not recognize any special syntax except ANSI SQL.
  466. """
  467. name = 'SQL'
  468. aliases = ['sql']
  469. filenames = ['*.sql']
  470. mimetypes = ['text/x-sql']
  471. url = 'https://en.wikipedia.org/wiki/SQL'
  472. version_added = ''
  473. flags = re.IGNORECASE
  474. tokens = {
  475. 'root': [
  476. (r'\s+', Whitespace),
  477. (r'--.*\n?', Comment.Single),
  478. (r'/\*', Comment.Multiline, 'multiline-comments'),
  479. (words((
  480. 'ABORT', 'ABS', 'ABSOLUTE', 'ACCESS', 'ADA', 'ADD', 'ADMIN', 'AFTER',
  481. 'AGGREGATE', 'ALIAS', 'ALL', 'ALLOCATE', 'ALTER', 'ANALYSE', 'ANALYZE',
  482. 'AND', 'ANY', 'ARE', 'AS', 'ASC', 'ASENSITIVE', 'ASSERTION', 'ASSIGNMENT',
  483. 'ASYMMETRIC', 'AT', 'ATOMIC', 'AUTHORIZATION', 'AVG', 'BACKWARD',
  484. 'BEFORE', 'BEGIN', 'BETWEEN', 'BITVAR', 'BIT_LENGTH', 'BOTH', 'BREADTH',
  485. 'BY', 'C', 'CACHE', 'CALL', 'CALLED', 'CARDINALITY', 'CASCADE',
  486. 'CASCADED', 'CASE', 'CAST', 'CATALOG', 'CATALOG_NAME', 'CHAIN',
  487. 'CHARACTERISTICS', 'CHARACTER_LENGTH', 'CHARACTER_SET_CATALOG',
  488. 'CHARACTER_SET_NAME', 'CHARACTER_SET_SCHEMA', 'CHAR_LENGTH', 'CHECK',
  489. 'CHECKED', 'CHECKPOINT', 'CLASS', 'CLASS_ORIGIN', 'CLOB', 'CLOSE',
  490. 'CLUSTER', 'COALESCE', 'COBOL', 'COLLATE', 'COLLATION',
  491. 'COLLATION_CATALOG', 'COLLATION_NAME', 'COLLATION_SCHEMA', 'COLUMN',
  492. 'COLUMN_NAME', 'COMMAND_FUNCTION', 'COMMAND_FUNCTION_CODE', 'COMMENT',
  493. 'COMMIT', 'COMMITTED', 'COMPLETION', 'CONDITION_NUMBER', 'CONNECT',
  494. 'CONNECTION', 'CONNECTION_NAME', 'CONSTRAINT', 'CONSTRAINTS',
  495. 'CONSTRAINT_CATALOG', 'CONSTRAINT_NAME', 'CONSTRAINT_SCHEMA',
  496. 'CONSTRUCTOR', 'CONTAINS', 'CONTINUE', 'CONVERSION', 'CONVERT',
  497. 'COPY', 'CORRESPONDING', 'COUNT', 'CREATE', 'CREATEDB', 'CREATEUSER',
  498. 'CROSS', 'CUBE', 'CURRENT', 'CURRENT_DATE', 'CURRENT_PATH',
  499. 'CURRENT_ROLE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'CURRENT_USER',
  500. 'CURSOR', 'CURSOR_NAME', 'CYCLE', 'DATA', 'DATABASE',
  501. 'DATETIME_INTERVAL_CODE', 'DATETIME_INTERVAL_PRECISION', 'DAY',
  502. 'DEALLOCATE', 'DECLARE', 'DEFAULT', 'DEFAULTS', 'DEFERRABLE',
  503. 'DEFERRED', 'DEFINED', 'DEFINER', 'DELETE', 'DELIMITER', 'DELIMITERS',
  504. 'DEREF', 'DESC', 'DESCRIBE', 'DESCRIPTOR', 'DESTROY', 'DESTRUCTOR',
  505. 'DETERMINISTIC', 'DIAGNOSTICS', 'DICTIONARY', 'DISCONNECT', 'DISPATCH',
  506. 'DISTINCT', 'DO', 'DOMAIN', 'DROP', 'DYNAMIC', 'DYNAMIC_FUNCTION',
  507. 'DYNAMIC_FUNCTION_CODE', 'EACH', 'ELSE', 'ELSIF', 'ENCODING',
  508. 'ENCRYPTED', 'END', 'END-EXEC', 'EQUALS', 'ESCAPE', 'EVERY', 'EXCEPTION',
  509. 'EXCEPT', 'EXCLUDING', 'EXCLUSIVE', 'EXEC', 'EXECUTE', 'EXISTING',
  510. 'EXISTS', 'EXPLAIN', 'EXTERNAL', 'EXTRACT', 'FALSE', 'FETCH', 'FINAL',
  511. 'FIRST', 'FOR', 'FORCE', 'FOREIGN', 'FORTRAN', 'FORWARD', 'FOUND', 'FREE',
  512. 'FREEZE', 'FROM', 'FULL', 'FUNCTION', 'G', 'GENERAL', 'GENERATED', 'GET',
  513. 'GLOBAL', 'GO', 'GOTO', 'GRANT', 'GRANTED', 'GROUP', 'GROUPING',
  514. 'HANDLER', 'HAVING', 'HIERARCHY', 'HOLD', 'HOST', 'IDENTITY', 'IF',
  515. 'IGNORE', 'ILIKE', 'IMMEDIATE', 'IMMEDIATELY', 'IMMUTABLE', 'IMPLEMENTATION', 'IMPLICIT',
  516. 'IN', 'INCLUDING', 'INCREMENT', 'INDEX', 'INDITCATOR', 'INFIX',
  517. 'INHERITS', 'INITIALIZE', 'INITIALLY', 'INNER', 'INOUT', 'INPUT',
  518. 'INSENSITIVE', 'INSERT', 'INSTANTIABLE', 'INSTEAD', 'INTERSECT', 'INTO',
  519. 'INVOKER', 'IS', 'ISNULL', 'ISOLATION', 'ITERATE', 'JOIN', 'KEY',
  520. 'KEY_MEMBER', 'KEY_TYPE', 'LANCOMPILER', 'LANGUAGE', 'LARGE', 'LAST',
  521. 'LATERAL', 'LEADING', 'LEFT', 'LENGTH', 'LESS', 'LEVEL', 'LIKE', 'LIMIT',
  522. 'LISTEN', 'LOAD', 'LOCAL', 'LOCALTIME', 'LOCALTIMESTAMP', 'LOCATION',
  523. 'LOCATOR', 'LOCK', 'LOWER', 'MAP', 'MATCH', 'MAX', 'MAXVALUE',
  524. 'MESSAGE_LENGTH', 'MESSAGE_OCTET_LENGTH', 'MESSAGE_TEXT', 'METHOD', 'MIN',
  525. 'MINUTE', 'MINVALUE', 'MOD', 'MODE', 'MODIFIES', 'MODIFY', 'MONTH',
  526. 'MORE', 'MOVE', 'MUMPS', 'NAMES', 'NATIONAL', 'NATURAL', 'NCHAR', 'NCLOB',
  527. 'NEW', 'NEXT', 'NO', 'NOCREATEDB', 'NOCREATEUSER', 'NONE', 'NOT',
  528. 'NOTHING', 'NOTIFY', 'NOTNULL', 'NULL', 'NULLABLE', 'NULLIF', 'OBJECT',
  529. 'OCTET_LENGTH', 'OF', 'OFF', 'OFFSET', 'OIDS', 'OLD', 'ON', 'ONLY',
  530. 'OPEN', 'OPERATION', 'OPERATOR', 'OPTION', 'OPTIONS', 'OR', 'ORDER',
  531. 'ORDINALITY', 'OUT', 'OUTER', 'OUTPUT', 'OVERLAPS', 'OVERLAY',
  532. 'OVERRIDING', 'OWNER', 'PAD', 'PARAMETER', 'PARAMETERS', 'PARAMETER_MODE',
  533. 'PARAMETER_NAME', 'PARAMETER_ORDINAL_POSITION',
  534. 'PARAMETER_SPECIFIC_CATALOG', 'PARAMETER_SPECIFIC_NAME',
  535. 'PARAMETER_SPECIFIC_SCHEMA', 'PARTIAL', 'PASCAL', 'PENDANT', 'PERIOD', 'PLACING',
  536. 'PLI', 'POSITION', 'POSTFIX', 'PRECEEDS', 'PRECISION', 'PREFIX', 'PREORDER',
  537. 'PREPARE', 'PRESERVE', 'PRIMARY', 'PRIOR', 'PRIVILEGES', 'PROCEDURAL',
  538. 'PROCEDURE', 'PUBLIC', 'READ', 'READS', 'RECHECK', 'RECURSIVE', 'REF',
  539. 'REFERENCES', 'REFERENCING', 'REINDEX', 'RELATIVE', 'RENAME',
  540. 'REPEATABLE', 'REPLACE', 'RESET', 'RESTART', 'RESTRICT', 'RESULT',
  541. 'RETURN', 'RETURNED_LENGTH', 'RETURNED_OCTET_LENGTH', 'RETURNED_SQLSTATE',
  542. 'RETURNS', 'REVOKE', 'RIGHT', 'ROLE', 'ROLLBACK', 'ROLLUP', 'ROUTINE',
  543. 'ROUTINE_CATALOG', 'ROUTINE_NAME', 'ROUTINE_SCHEMA', 'ROW', 'ROWS',
  544. 'ROW_COUNT', 'RULE', 'SAVE_POINT', 'SCALE', 'SCHEMA', 'SCHEMA_NAME',
  545. 'SCOPE', 'SCROLL', 'SEARCH', 'SECOND', 'SECURITY', 'SELECT', 'SELF',
  546. 'SENSITIVE', 'SERIALIZABLE', 'SERVER_NAME', 'SESSION', 'SESSION_USER',
  547. 'SET', 'SETOF', 'SETS', 'SHARE', 'SHOW', 'SIMILAR', 'SIMPLE', 'SIZE',
  548. 'SOME', 'SOURCE', 'SPACE', 'SPECIFIC', 'SPECIFICTYPE', 'SPECIFIC_NAME',
  549. 'SQL', 'SQLCODE', 'SQLERROR', 'SQLEXCEPTION', 'SQLSTATE', 'SQLWARNINIG',
  550. 'STABLE', 'START', 'STATE', 'STATEMENT', 'STATIC', 'STATISTICS', 'STDIN',
  551. 'STDOUT', 'STORAGE', 'STRICT', 'STRUCTURE', 'STYPE', 'SUBCLASS_ORIGIN',
  552. 'SUBLIST', 'SUBSTRING', 'SUCCEEDS', 'SUM', 'SYMMETRIC', 'SYSID', 'SYSTEM',
  553. 'SYSTEM_USER', 'TABLE', 'TABLE_NAME', ' TEMP', 'TEMPLATE', 'TEMPORARY',
  554. 'TERMINATE', 'THAN', 'THEN', 'TIME', 'TIMESTAMP', 'TIMEZONE_HOUR',
  555. 'TIMEZONE_MINUTE', 'TO', 'TOAST', 'TRAILING', 'TRANSACTION',
  556. 'TRANSACTIONS_COMMITTED', 'TRANSACTIONS_ROLLED_BACK', 'TRANSACTION_ACTIVE',
  557. 'TRANSFORM', 'TRANSFORMS', 'TRANSLATE', 'TRANSLATION', 'TREAT', 'TRIGGER',
  558. 'TRIGGER_CATALOG', 'TRIGGER_NAME', 'TRIGGER_SCHEMA', 'TRIM', 'TRUE',
  559. 'TRUNCATE', 'TRUSTED', 'TYPE', 'UNCOMMITTED', 'UNDER', 'UNENCRYPTED',
  560. 'UNION', 'UNIQUE', 'UNKNOWN', 'UNLISTEN', 'UNNAMED', 'UNNEST', 'UNTIL',
  561. 'UPDATE', 'UPPER', 'USAGE', 'USER', 'USER_DEFINED_TYPE_CATALOG',
  562. 'USER_DEFINED_TYPE_NAME', 'USER_DEFINED_TYPE_SCHEMA', 'USING', 'VACUUM',
  563. 'VALID', 'VALIDATOR', 'VALUES', 'VARIABLE', 'VERBOSE',
  564. 'VERSION', 'VERSIONS', 'VERSIONING', 'VIEW',
  565. 'VOLATILE', 'WHEN', 'WHENEVER', 'WHERE', 'WITH', 'WITHOUT', 'WORK',
  566. 'WRITE', 'YEAR', 'ZONE'), suffix=r'\b'),
  567. Keyword),
  568. (words((
  569. 'ARRAY', 'BIGINT', 'BINARY', 'BIT', 'BLOB', 'BOOLEAN', 'CHAR',
  570. 'CHARACTER', 'DATE', 'DEC', 'DECIMAL', 'FLOAT', 'INT', 'INTEGER',
  571. 'INTERVAL', 'NUMBER', 'NUMERIC', 'REAL', 'SERIAL', 'SMALLINT',
  572. 'VARCHAR', 'VARYING', 'INT8', 'SERIAL8', 'TEXT'), suffix=r'\b'),
  573. Name.Builtin),
  574. (r'[+*/<>=~!@#%^&|`?-]', Operator),
  575. (r'[0-9]+', Number.Integer),
  576. # TODO: Backslash escapes?
  577. (r"'(''|[^'])*'", String.Single),
  578. (r'"(""|[^"])*"', String.Symbol), # not a real string literal in ANSI SQL
  579. (r'[a-z_][\w$]*', Name), # allow $s in strings for Oracle
  580. (r'[;:()\[\],.]', Punctuation)
  581. ],
  582. 'multiline-comments': [
  583. (r'/\*', Comment.Multiline, 'multiline-comments'),
  584. (r'\*/', Comment.Multiline, '#pop'),
  585. (r'[^/*]+', Comment.Multiline),
  586. (r'[/*]', Comment.Multiline)
  587. ]
  588. }
  589. def analyse_text(self, text):
  590. return
  591. class TransactSqlLexer(RegexLexer):
  592. """
  593. Transact-SQL (T-SQL) is Microsoft's and Sybase's proprietary extension to
  594. SQL.
  595. The list of keywords includes ODBC and keywords reserved for future use..
  596. """
  597. name = 'Transact-SQL'
  598. aliases = ['tsql', 't-sql']
  599. filenames = ['*.sql']
  600. mimetypes = ['text/x-tsql']
  601. url = 'https://www.tsql.info'
  602. version_added = ''
  603. flags = re.IGNORECASE
  604. tokens = {
  605. 'root': [
  606. (r'\s+', Whitespace),
  607. (r'--.*?$\n?', Comment.Single),
  608. (r'/\*', Comment.Multiline, 'multiline-comments'),
  609. (words(_tsql_builtins.OPERATORS), Operator),
  610. (words(_tsql_builtins.OPERATOR_WORDS, suffix=r'\b'), Operator.Word),
  611. (words(_tsql_builtins.TYPES, suffix=r'\b'), Name.Class),
  612. (words(_tsql_builtins.FUNCTIONS, suffix=r'\b'), Name.Function),
  613. (r'(goto)(\s+)(\w+\b)', bygroups(Keyword, Whitespace, Name.Label)),
  614. (words(_tsql_builtins.KEYWORDS, suffix=r'\b'), Keyword),
  615. (r'(\[)([^]]+)(\])', bygroups(Operator, Name, Operator)),
  616. (r'0x[0-9a-f]+', Number.Hex),
  617. # Float variant 1, for example: 1., 1.e2, 1.2e3
  618. (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float),
  619. # Float variant 2, for example: .1, .1e2
  620. (r'\.[0-9]+(e[+-]?[0-9]+)?', Number.Float),
  621. # Float variant 3, for example: 123e45
  622. (r'[0-9]+e[+-]?[0-9]+', Number.Float),
  623. (r'[0-9]+', Number.Integer),
  624. (r"'(''|[^'])*'", String.Single),
  625. (r'"(""|[^"])*"', String.Symbol),
  626. (r'[;(),.]', Punctuation),
  627. # Below we use \w even for the first "real" character because
  628. # tokens starting with a digit have already been recognized
  629. # as Number above.
  630. (r'@@\w+', Name.Builtin),
  631. (r'@\w+', Name.Variable),
  632. (r'(\w+)(:)', bygroups(Name.Label, Punctuation)),
  633. (r'#?#?\w+', Name), # names for temp tables and anything else
  634. (r'\?', Name.Variable.Magic), # parameter for prepared statements
  635. ],
  636. 'multiline-comments': [
  637. (r'/\*', Comment.Multiline, 'multiline-comments'),
  638. (r'\*/', Comment.Multiline, '#pop'),
  639. (r'[^/*]+', Comment.Multiline),
  640. (r'[/*]', Comment.Multiline)
  641. ]
  642. }
  643. def analyse_text(text):
  644. rating = 0
  645. if tsql_declare_re.search(text):
  646. # Found T-SQL variable declaration.
  647. rating = 1.0
  648. else:
  649. name_between_backtick_count = len(
  650. name_between_backtick_re.findall(text))
  651. name_between_bracket_count = len(
  652. name_between_bracket_re.findall(text))
  653. # We need to check if there are any names using
  654. # backticks or brackets, as otherwise both are 0
  655. # and 0 >= 2 * 0, so we would always assume it's true
  656. dialect_name_count = name_between_backtick_count + name_between_bracket_count
  657. if dialect_name_count >= 1 and \
  658. name_between_bracket_count >= 2 * name_between_backtick_count:
  659. # Found at least twice as many [name] as `name`.
  660. rating += 0.5
  661. elif name_between_bracket_count > name_between_backtick_count:
  662. rating += 0.2
  663. elif name_between_bracket_count > 0:
  664. rating += 0.1
  665. if tsql_variable_re.search(text) is not None:
  666. rating += 0.1
  667. if tsql_go_re.search(text) is not None:
  668. rating += 0.1
  669. return rating
  670. class MySqlLexer(RegexLexer):
  671. """The Oracle MySQL lexer.
  672. This lexer does not attempt to maintain strict compatibility with
  673. MariaDB syntax or keywords. Although MySQL and MariaDB's common code
  674. history suggests there may be significant overlap between the two,
  675. compatibility between the two is not a target for this lexer.
  676. """
  677. name = 'MySQL'
  678. aliases = ['mysql']
  679. mimetypes = ['text/x-mysql']
  680. url = 'https://www.mysql.com'
  681. version_added = ''
  682. flags = re.IGNORECASE
  683. tokens = {
  684. 'root': [
  685. (r'\s+', Whitespace),
  686. # Comments
  687. (r'(?:#|--\s+).*', Comment.Single),
  688. (r'/\*\+', Comment.Special, 'optimizer-hints'),
  689. (r'/\*', Comment.Multiline, 'multiline-comment'),
  690. # Hexadecimal literals
  691. (r"x'([0-9a-f]{2})+'", Number.Hex), # MySQL requires paired hex characters in this form.
  692. (r'0x[0-9a-f]+', Number.Hex),
  693. # Binary literals
  694. (r"b'[01]+'", Number.Bin),
  695. (r'0b[01]+', Number.Bin),
  696. # Numeric literals
  697. (r'[0-9]+\.[0-9]*(e[+-]?[0-9]+)?', Number.Float), # Mandatory integer, optional fraction and exponent
  698. (r'[0-9]*\.[0-9]+(e[+-]?[0-9]+)?', Number.Float), # Mandatory fraction, optional integer and exponent
  699. (r'[0-9]+e[+-]?[0-9]+', Number.Float), # Exponents with integer significands are still floats
  700. (r'[0-9]+(?=[^0-9a-z$_\u0080-\uffff])', Number.Integer), # Integers that are not in a schema object name
  701. # Date literals
  702. (r"\{\s*d\s*(?P<quote>['\"])\s*\d{2}(\d{2})?.?\d{2}.?\d{2}\s*(?P=quote)\s*\}",
  703. Literal.Date),
  704. # Time literals
  705. (r"\{\s*t\s*(?P<quote>['\"])\s*(?:\d+\s+)?\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?\s*(?P=quote)\s*\}",
  706. Literal.Date),
  707. # Timestamp literals
  708. (
  709. r"\{\s*ts\s*(?P<quote>['\"])\s*"
  710. r"\d{2}(?:\d{2})?.?\d{2}.?\d{2}" # Date part
  711. r"\s+" # Whitespace between date and time
  712. r"\d{1,2}.?\d{1,2}.?\d{1,2}(\.\d*)?" # Time part
  713. r"\s*(?P=quote)\s*\}",
  714. Literal.Date
  715. ),
  716. # String literals
  717. (r"'", String.Single, 'single-quoted-string'),
  718. (r'"', String.Double, 'double-quoted-string'),
  719. # Variables
  720. (r'@@(?:global\.|persist\.|persist_only\.|session\.)?[a-z_]+', Name.Variable),
  721. (r'@[a-z0-9_$.]+', Name.Variable),
  722. (r"@'", Name.Variable, 'single-quoted-variable'),
  723. (r'@"', Name.Variable, 'double-quoted-variable'),
  724. (r"@`", Name.Variable, 'backtick-quoted-variable'),
  725. (r'\?', Name.Variable), # For demonstrating prepared statements
  726. # Operators
  727. (r'[!%&*+/:<=>^|~-]+', Operator),
  728. # Exceptions; these words tokenize differently in different contexts.
  729. (r'\b(set)(?!\s*\()', Keyword),
  730. (r'\b(character)(\s+)(set)\b', bygroups(Keyword, Whitespace, Keyword)),
  731. # In all other known cases, "SET" is tokenized by MYSQL_DATATYPES.
  732. (words(MYSQL_CONSTANTS, prefix=r'\b', suffix=r'\b'), Name.Constant),
  733. (words(MYSQL_DATATYPES, prefix=r'\b', suffix=r'\b'), Keyword.Type),
  734. (words(MYSQL_KEYWORDS, prefix=r'\b', suffix=r'\b'), Keyword),
  735. (words(MYSQL_FUNCTIONS, prefix=r'\b', suffix=r'\b(\s*)(\()'),
  736. bygroups(Name.Function, Whitespace, Punctuation)),
  737. # Schema object names
  738. #
  739. # Note: Although the first regex supports unquoted all-numeric
  740. # identifiers, this will not be a problem in practice because
  741. # numeric literals have already been handled above.
  742. #
  743. ('[0-9a-z$_\u0080-\uffff]+', Name),
  744. (r'`', Name.Quoted, 'schema-object-name'),
  745. # Punctuation
  746. (r'[(),.;]', Punctuation),
  747. ],
  748. # Multiline comment substates
  749. # ---------------------------
  750. 'optimizer-hints': [
  751. (r'[^*a-z]+', Comment.Special),
  752. (r'\*/', Comment.Special, '#pop'),
  753. (words(MYSQL_OPTIMIZER_HINTS, suffix=r'\b'), Comment.Preproc),
  754. ('[a-z]+', Comment.Special),
  755. (r'\*', Comment.Special),
  756. ],
  757. 'multiline-comment': [
  758. (r'[^*]+', Comment.Multiline),
  759. (r'\*/', Comment.Multiline, '#pop'),
  760. (r'\*', Comment.Multiline),
  761. ],
  762. # String substates
  763. # ----------------
  764. 'single-quoted-string': [
  765. (r"[^'\\]+", String.Single),
  766. (r"''", String.Escape),
  767. (r"""\\[0'"bnrtZ\\%_]""", String.Escape),
  768. (r"'", String.Single, '#pop'),
  769. ],
  770. 'double-quoted-string': [
  771. (r'[^"\\]+', String.Double),
  772. (r'""', String.Escape),
  773. (r"""\\[0'"bnrtZ\\%_]""", String.Escape),
  774. (r'"', String.Double, '#pop'),
  775. ],
  776. # Variable substates
  777. # ------------------
  778. 'single-quoted-variable': [
  779. (r"[^']+", Name.Variable),
  780. (r"''", Name.Variable),
  781. (r"'", Name.Variable, '#pop'),
  782. ],
  783. 'double-quoted-variable': [
  784. (r'[^"]+', Name.Variable),
  785. (r'""', Name.Variable),
  786. (r'"', Name.Variable, '#pop'),
  787. ],
  788. 'backtick-quoted-variable': [
  789. (r'[^`]+', Name.Variable),
  790. (r'``', Name.Variable),
  791. (r'`', Name.Variable, '#pop'),
  792. ],
  793. # Schema object name substates
  794. # ----------------------------
  795. #
  796. # "Name.Quoted" and "Name.Quoted.Escape" are non-standard but
  797. # formatters will style them as "Name" by default but add
  798. # additional styles based on the token name. This gives users
  799. # flexibility to add custom styles as desired.
  800. #
  801. 'schema-object-name': [
  802. (r'[^`]+', Name.Quoted),
  803. (r'``', Name.Quoted.Escape),
  804. (r'`', Name.Quoted, '#pop'),
  805. ],
  806. }
  807. def analyse_text(text):
  808. rating = 0
  809. name_between_backtick_count = len(
  810. name_between_backtick_re.findall(text))
  811. name_between_bracket_count = len(
  812. name_between_bracket_re.findall(text))
  813. # Same logic as above in the TSQL analysis
  814. dialect_name_count = name_between_backtick_count + name_between_bracket_count
  815. if dialect_name_count >= 1 and \
  816. name_between_backtick_count >= 2 * name_between_bracket_count:
  817. # Found at least twice as many `name` as [name].
  818. rating += 0.5
  819. elif name_between_backtick_count > name_between_bracket_count:
  820. rating += 0.2
  821. elif name_between_backtick_count > 0:
  822. rating += 0.1
  823. return rating
  824. class SqliteConsoleLexer(Lexer):
  825. """
  826. Lexer for example sessions using sqlite3.
  827. """
  828. name = 'sqlite3con'
  829. aliases = ['sqlite3']
  830. filenames = ['*.sqlite3-console']
  831. mimetypes = ['text/x-sqlite3-console']
  832. url = 'https://www.sqlite.org'
  833. version_added = '0.11'
  834. def get_tokens_unprocessed(self, data):
  835. sql = SqlLexer(**self.options)
  836. curcode = ''
  837. insertions = []
  838. for match in line_re.finditer(data):
  839. line = match.group()
  840. prompt_match = sqlite_prompt_re.match(line)
  841. if prompt_match is not None:
  842. insertions.append((len(curcode),
  843. [(0, Generic.Prompt, line[:7])]))
  844. insertions.append((len(curcode),
  845. [(7, Whitespace, ' ')]))
  846. curcode += line[8:]
  847. else:
  848. if curcode:
  849. yield from do_insertions(insertions,
  850. sql.get_tokens_unprocessed(curcode))
  851. curcode = ''
  852. insertions = []
  853. if line.startswith('SQL error: '):
  854. yield (match.start(), Generic.Traceback, line)
  855. else:
  856. yield (match.start(), Generic.Output, line)
  857. if curcode:
  858. yield from do_insertions(insertions,
  859. sql.get_tokens_unprocessed(curcode))
  860. class RqlLexer(RegexLexer):
  861. """
  862. Lexer for Relation Query Language.
  863. """
  864. name = 'RQL'
  865. url = 'http://www.logilab.org/project/rql'
  866. aliases = ['rql']
  867. filenames = ['*.rql']
  868. mimetypes = ['text/x-rql']
  869. version_added = '2.0'
  870. flags = re.IGNORECASE
  871. tokens = {
  872. 'root': [
  873. (r'\s+', Whitespace),
  874. (r'(DELETE|SET|INSERT|UNION|DISTINCT|WITH|WHERE|BEING|OR'
  875. r'|AND|NOT|GROUPBY|HAVING|ORDERBY|ASC|DESC|LIMIT|OFFSET'
  876. r'|TODAY|NOW|TRUE|FALSE|NULL|EXISTS)\b', Keyword),
  877. (r'[+*/<>=%-]', Operator),
  878. (r'(Any|is|instance_of|CWEType|CWRelation)\b', Name.Builtin),
  879. (r'[0-9]+', Number.Integer),
  880. (r'[A-Z_]\w*\??', Name),
  881. (r"'(''|[^'])*'", String.Single),
  882. (r'"(""|[^"])*"', String.Single),
  883. (r'[;:()\[\],.]', Punctuation)
  884. ],
  885. }