python.py 52 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198
  1. """
  2. pygments.lexers.python
  3. ~~~~~~~~~~~~~~~~~~~~~~
  4. Lexers for Python and related languages.
  5. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import keyword
  9. from pygments.lexer import DelegatingLexer, RegexLexer, include, \
  10. bygroups, using, default, words, combined, this
  11. from pygments.util import get_bool_opt, shebang_matches
  12. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  13. Number, Punctuation, Generic, Other, Error, Whitespace
  14. from pygments import unistring as uni
  15. __all__ = ['PythonLexer', 'PythonConsoleLexer', 'PythonTracebackLexer',
  16. 'Python2Lexer', 'Python2TracebackLexer',
  17. 'CythonLexer', 'DgLexer', 'NumPyLexer']
  18. class PythonLexer(RegexLexer):
  19. """
  20. For Python source code (version 3.x).
  21. .. versionchanged:: 2.5
  22. This is now the default ``PythonLexer``. It is still available as the
  23. alias ``Python3Lexer``.
  24. """
  25. name = 'Python'
  26. url = 'https://www.python.org'
  27. aliases = ['python', 'py', 'sage', 'python3', 'py3', 'bazel', 'starlark']
  28. filenames = [
  29. '*.py',
  30. '*.pyw',
  31. # Type stubs
  32. '*.pyi',
  33. # Jython
  34. '*.jy',
  35. # Sage
  36. '*.sage',
  37. # SCons
  38. '*.sc',
  39. 'SConstruct',
  40. 'SConscript',
  41. # Skylark/Starlark (used by Bazel, Buck, and Pants)
  42. '*.bzl',
  43. 'BUCK',
  44. 'BUILD',
  45. 'BUILD.bazel',
  46. 'WORKSPACE',
  47. # Twisted Application infrastructure
  48. '*.tac',
  49. ]
  50. mimetypes = ['text/x-python', 'application/x-python',
  51. 'text/x-python3', 'application/x-python3']
  52. version_added = '0.10'
  53. uni_name = f"[{uni.xid_start}][{uni.xid_continue}]*"
  54. def innerstring_rules(ttype):
  55. return [
  56. # the old style '%s' % (...) string formatting (still valid in Py3)
  57. (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
  58. '[hlL]?[E-GXc-giorsaux%]', String.Interpol),
  59. # the new style '{}'.format(...) string formatting
  60. (r'\{'
  61. r'((\w+)((\.\w+)|(\[[^\]]+\]))*)?' # field name
  62. r'(\![sra])?' # conversion
  63. r'(\:(.?[<>=\^])?[-+ ]?#?0?(\d+)?,?(\.\d+)?[E-GXb-gnosx%]?)?'
  64. r'\}', String.Interpol),
  65. # backslashes, quotes and formatting signs must be parsed one at a time
  66. (r'[^\\\'"%{\n]+', ttype),
  67. (r'[\'"\\]', ttype),
  68. # unhandled string formatting sign
  69. (r'%|(\{{1,2})', ttype)
  70. # newlines are an error (use "nl" state)
  71. ]
  72. def fstring_rules(ttype):
  73. return [
  74. # Assuming that a '}' is the closing brace after format specifier.
  75. # Sadly, this means that we won't detect syntax error. But it's
  76. # more important to parse correct syntax correctly, than to
  77. # highlight invalid syntax.
  78. (r'\}', String.Interpol),
  79. (r'\{', String.Interpol, 'expr-inside-fstring'),
  80. # backslashes, quotes and formatting signs must be parsed one at a time
  81. (r'[^\\\'"{}\n]+', ttype),
  82. (r'[\'"\\]', ttype),
  83. # newlines are an error (use "nl" state)
  84. ]
  85. tokens = {
  86. 'root': [
  87. (r'\n', Whitespace),
  88. (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")',
  89. bygroups(Whitespace, String.Affix, String.Doc)),
  90. (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')",
  91. bygroups(Whitespace, String.Affix, String.Doc)),
  92. (r'\A#!.+$', Comment.Hashbang),
  93. (r'#.*$', Comment.Single),
  94. (r'\\\n', Text),
  95. (r'\\', Text),
  96. include('keywords'),
  97. include('soft-keywords'),
  98. (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'),
  99. (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'),
  100. (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text),
  101. 'fromimport'),
  102. (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text),
  103. 'import'),
  104. include('expr'),
  105. ],
  106. 'expr': [
  107. # raw f-strings
  108. ('(?i)(rf|fr)(""")',
  109. bygroups(String.Affix, String.Double),
  110. combined('rfstringescape', 'tdqf')),
  111. ("(?i)(rf|fr)(''')",
  112. bygroups(String.Affix, String.Single),
  113. combined('rfstringescape', 'tsqf')),
  114. ('(?i)(rf|fr)(")',
  115. bygroups(String.Affix, String.Double),
  116. combined('rfstringescape', 'dqf')),
  117. ("(?i)(rf|fr)(')",
  118. bygroups(String.Affix, String.Single),
  119. combined('rfstringescape', 'sqf')),
  120. # non-raw f-strings
  121. ('([fF])(""")', bygroups(String.Affix, String.Double),
  122. combined('fstringescape', 'tdqf')),
  123. ("([fF])(''')", bygroups(String.Affix, String.Single),
  124. combined('fstringescape', 'tsqf')),
  125. ('([fF])(")', bygroups(String.Affix, String.Double),
  126. combined('fstringescape', 'dqf')),
  127. ("([fF])(')", bygroups(String.Affix, String.Single),
  128. combined('fstringescape', 'sqf')),
  129. # raw bytes and strings
  130. ('(?i)(rb|br|r)(""")',
  131. bygroups(String.Affix, String.Double), 'tdqs'),
  132. ("(?i)(rb|br|r)(''')",
  133. bygroups(String.Affix, String.Single), 'tsqs'),
  134. ('(?i)(rb|br|r)(")',
  135. bygroups(String.Affix, String.Double), 'dqs'),
  136. ("(?i)(rb|br|r)(')",
  137. bygroups(String.Affix, String.Single), 'sqs'),
  138. # non-raw strings
  139. ('([uU]?)(""")', bygroups(String.Affix, String.Double),
  140. combined('stringescape', 'tdqs')),
  141. ("([uU]?)(''')", bygroups(String.Affix, String.Single),
  142. combined('stringescape', 'tsqs')),
  143. ('([uU]?)(")', bygroups(String.Affix, String.Double),
  144. combined('stringescape', 'dqs')),
  145. ("([uU]?)(')", bygroups(String.Affix, String.Single),
  146. combined('stringescape', 'sqs')),
  147. # non-raw bytes
  148. ('([bB])(""")', bygroups(String.Affix, String.Double),
  149. combined('bytesescape', 'tdqs')),
  150. ("([bB])(''')", bygroups(String.Affix, String.Single),
  151. combined('bytesescape', 'tsqs')),
  152. ('([bB])(")', bygroups(String.Affix, String.Double),
  153. combined('bytesescape', 'dqs')),
  154. ("([bB])(')", bygroups(String.Affix, String.Single),
  155. combined('bytesescape', 'sqs')),
  156. (r'[^\S\n]+', Text),
  157. include('numbers'),
  158. (r'!=|==|<<|>>|:=|[-~+/*%=<>&^|.]', Operator),
  159. (r'[]{}:(),;[]', Punctuation),
  160. (r'(in|is|and|or|not)\b', Operator.Word),
  161. include('expr-keywords'),
  162. include('builtins'),
  163. include('magicfuncs'),
  164. include('magicvars'),
  165. include('name'),
  166. ],
  167. 'expr-inside-fstring': [
  168. (r'[{([]', Punctuation, 'expr-inside-fstring-inner'),
  169. # without format specifier
  170. (r'(=\s*)?' # debug (https://bugs.python.org/issue36817)
  171. r'(\![sraf])?' # conversion
  172. r'\}', String.Interpol, '#pop'),
  173. # with format specifier
  174. # we'll catch the remaining '}' in the outer scope
  175. (r'(=\s*)?' # debug (https://bugs.python.org/issue36817)
  176. r'(\![sraf])?' # conversion
  177. r':', String.Interpol, '#pop'),
  178. (r'\s+', Whitespace), # allow new lines
  179. include('expr'),
  180. ],
  181. 'expr-inside-fstring-inner': [
  182. (r'[{([]', Punctuation, 'expr-inside-fstring-inner'),
  183. (r'[])}]', Punctuation, '#pop'),
  184. (r'\s+', Whitespace), # allow new lines
  185. include('expr'),
  186. ],
  187. 'expr-keywords': [
  188. # Based on https://docs.python.org/3/reference/expressions.html
  189. (words((
  190. 'async for', 'await', 'else', 'for', 'if', 'lambda',
  191. 'yield', 'yield from'), suffix=r'\b'),
  192. Keyword),
  193. (words(('True', 'False', 'None'), suffix=r'\b'), Keyword.Constant),
  194. ],
  195. 'keywords': [
  196. (words((
  197. 'assert', 'async', 'await', 'break', 'continue', 'del', 'elif',
  198. 'else', 'except', 'finally', 'for', 'global', 'if', 'lambda',
  199. 'pass', 'raise', 'nonlocal', 'return', 'try', 'while', 'yield',
  200. 'yield from', 'as', 'with'), suffix=r'\b'),
  201. Keyword),
  202. (words(('True', 'False', 'None'), suffix=r'\b'), Keyword.Constant),
  203. ],
  204. 'soft-keywords': [
  205. # `match`, `case` and `_` soft keywords
  206. (r'(^[ \t]*)' # at beginning of line + possible indentation
  207. r'(match|case)\b' # a possible keyword
  208. r'(?![ \t]*(?:' # not followed by...
  209. r'[:,;=^&|@~)\]}]|(?:' + # characters and keywords that mean this isn't
  210. # pattern matching (but None/True/False is ok)
  211. r'|'.join(k for k in keyword.kwlist if k[0].islower()) + r')\b))',
  212. bygroups(Text, Keyword), 'soft-keywords-inner'),
  213. ],
  214. 'soft-keywords-inner': [
  215. # optional `_` keyword
  216. (r'(\s+)([^\n_]*)(_\b)', bygroups(Whitespace, using(this), Keyword)),
  217. default('#pop')
  218. ],
  219. 'builtins': [
  220. (words((
  221. '__import__', 'abs', 'aiter', 'all', 'any', 'bin', 'bool', 'bytearray',
  222. 'breakpoint', 'bytes', 'callable', 'chr', 'classmethod', 'compile',
  223. 'complex', 'delattr', 'dict', 'dir', 'divmod', 'enumerate', 'eval',
  224. 'filter', 'float', 'format', 'frozenset', 'getattr', 'globals',
  225. 'hasattr', 'hash', 'hex', 'id', 'input', 'int', 'isinstance',
  226. 'issubclass', 'iter', 'len', 'list', 'locals', 'map', 'max',
  227. 'memoryview', 'min', 'next', 'object', 'oct', 'open', 'ord', 'pow',
  228. 'print', 'property', 'range', 'repr', 'reversed', 'round', 'set',
  229. 'setattr', 'slice', 'sorted', 'staticmethod', 'str', 'sum', 'super',
  230. 'tuple', 'type', 'vars', 'zip'), prefix=r'(?<!\.)', suffix=r'\b'),
  231. Name.Builtin),
  232. (r'(?<!\.)(self|Ellipsis|NotImplemented|cls)\b', Name.Builtin.Pseudo),
  233. (words((
  234. 'ArithmeticError', 'AssertionError', 'AttributeError',
  235. 'BaseException', 'BufferError', 'BytesWarning', 'DeprecationWarning',
  236. 'EOFError', 'EnvironmentError', 'Exception', 'FloatingPointError',
  237. 'FutureWarning', 'GeneratorExit', 'IOError', 'ImportError',
  238. 'ImportWarning', 'IndentationError', 'IndexError', 'KeyError',
  239. 'KeyboardInterrupt', 'LookupError', 'MemoryError', 'NameError',
  240. 'NotImplementedError', 'OSError', 'OverflowError',
  241. 'PendingDeprecationWarning', 'ReferenceError', 'ResourceWarning',
  242. 'RuntimeError', 'RuntimeWarning', 'StopIteration',
  243. 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit',
  244. 'TabError', 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError',
  245. 'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError',
  246. 'UnicodeWarning', 'UserWarning', 'ValueError', 'VMSError',
  247. 'Warning', 'WindowsError', 'ZeroDivisionError',
  248. # new builtin exceptions from PEP 3151
  249. 'BlockingIOError', 'ChildProcessError', 'ConnectionError',
  250. 'BrokenPipeError', 'ConnectionAbortedError', 'ConnectionRefusedError',
  251. 'ConnectionResetError', 'FileExistsError', 'FileNotFoundError',
  252. 'InterruptedError', 'IsADirectoryError', 'NotADirectoryError',
  253. 'PermissionError', 'ProcessLookupError', 'TimeoutError',
  254. # others new in Python 3
  255. 'StopAsyncIteration', 'ModuleNotFoundError', 'RecursionError',
  256. 'EncodingWarning'),
  257. prefix=r'(?<!\.)', suffix=r'\b'),
  258. Name.Exception),
  259. ],
  260. 'magicfuncs': [
  261. (words((
  262. '__abs__', '__add__', '__aenter__', '__aexit__', '__aiter__',
  263. '__and__', '__anext__', '__await__', '__bool__', '__bytes__',
  264. '__call__', '__complex__', '__contains__', '__del__', '__delattr__',
  265. '__delete__', '__delitem__', '__dir__', '__divmod__', '__enter__',
  266. '__eq__', '__exit__', '__float__', '__floordiv__', '__format__',
  267. '__ge__', '__get__', '__getattr__', '__getattribute__',
  268. '__getitem__', '__gt__', '__hash__', '__iadd__', '__iand__',
  269. '__ifloordiv__', '__ilshift__', '__imatmul__', '__imod__',
  270. '__imul__', '__index__', '__init__', '__instancecheck__',
  271. '__int__', '__invert__', '__ior__', '__ipow__', '__irshift__',
  272. '__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__',
  273. '__len__', '__length_hint__', '__lshift__', '__lt__', '__matmul__',
  274. '__missing__', '__mod__', '__mul__', '__ne__', '__neg__',
  275. '__new__', '__next__', '__or__', '__pos__', '__pow__',
  276. '__prepare__', '__radd__', '__rand__', '__rdivmod__', '__repr__',
  277. '__reversed__', '__rfloordiv__', '__rlshift__', '__rmatmul__',
  278. '__rmod__', '__rmul__', '__ror__', '__round__', '__rpow__',
  279. '__rrshift__', '__rshift__', '__rsub__', '__rtruediv__',
  280. '__rxor__', '__set__', '__setattr__', '__setitem__', '__str__',
  281. '__sub__', '__subclasscheck__', '__truediv__',
  282. '__xor__'), suffix=r'\b'),
  283. Name.Function.Magic),
  284. ],
  285. 'magicvars': [
  286. (words((
  287. '__annotations__', '__bases__', '__class__', '__closure__',
  288. '__code__', '__defaults__', '__dict__', '__doc__', '__file__',
  289. '__func__', '__globals__', '__kwdefaults__', '__module__',
  290. '__mro__', '__name__', '__objclass__', '__qualname__',
  291. '__self__', '__slots__', '__weakref__'), suffix=r'\b'),
  292. Name.Variable.Magic),
  293. ],
  294. 'numbers': [
  295. (r'(\d(?:_?\d)*\.(?:\d(?:_?\d)*)?|(?:\d(?:_?\d)*)?\.\d(?:_?\d)*)'
  296. r'([eE][+-]?\d(?:_?\d)*)?', Number.Float),
  297. (r'\d(?:_?\d)*[eE][+-]?\d(?:_?\d)*j?', Number.Float),
  298. (r'0[oO](?:_?[0-7])+', Number.Oct),
  299. (r'0[bB](?:_?[01])+', Number.Bin),
  300. (r'0[xX](?:_?[a-fA-F0-9])+', Number.Hex),
  301. (r'\d(?:_?\d)*', Number.Integer),
  302. ],
  303. 'name': [
  304. (r'@' + uni_name, Name.Decorator),
  305. (r'@', Operator), # new matrix multiplication operator
  306. (uni_name, Name),
  307. ],
  308. 'funcname': [
  309. include('magicfuncs'),
  310. (uni_name, Name.Function, '#pop'),
  311. default('#pop'),
  312. ],
  313. 'classname': [
  314. (uni_name, Name.Class, '#pop'),
  315. ],
  316. 'import': [
  317. (r'(\s+)(as)(\s+)', bygroups(Text, Keyword, Text)),
  318. (r'\.', Name.Namespace),
  319. (uni_name, Name.Namespace),
  320. (r'(\s*)(,)(\s*)', bygroups(Text, Operator, Text)),
  321. default('#pop') # all else: go back
  322. ],
  323. 'fromimport': [
  324. (r'(\s+)(import)\b', bygroups(Text, Keyword.Namespace), '#pop'),
  325. (r'\.', Name.Namespace),
  326. # if None occurs here, it's "raise x from None", since None can
  327. # never be a module name
  328. (r'None\b', Keyword.Constant, '#pop'),
  329. (uni_name, Name.Namespace),
  330. default('#pop'),
  331. ],
  332. 'rfstringescape': [
  333. (r'\{\{', String.Escape),
  334. (r'\}\}', String.Escape),
  335. ],
  336. 'fstringescape': [
  337. include('rfstringescape'),
  338. include('stringescape'),
  339. ],
  340. 'bytesescape': [
  341. (r'\\([\\abfnrtv"\']|\n|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
  342. ],
  343. 'stringescape': [
  344. (r'\\(N\{.*?\}|u[a-fA-F0-9]{4}|U[a-fA-F0-9]{8})', String.Escape),
  345. include('bytesescape')
  346. ],
  347. 'fstrings-single': fstring_rules(String.Single),
  348. 'fstrings-double': fstring_rules(String.Double),
  349. 'strings-single': innerstring_rules(String.Single),
  350. 'strings-double': innerstring_rules(String.Double),
  351. 'dqf': [
  352. (r'"', String.Double, '#pop'),
  353. (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
  354. include('fstrings-double')
  355. ],
  356. 'sqf': [
  357. (r"'", String.Single, '#pop'),
  358. (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
  359. include('fstrings-single')
  360. ],
  361. 'dqs': [
  362. (r'"', String.Double, '#pop'),
  363. (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
  364. include('strings-double')
  365. ],
  366. 'sqs': [
  367. (r"'", String.Single, '#pop'),
  368. (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
  369. include('strings-single')
  370. ],
  371. 'tdqf': [
  372. (r'"""', String.Double, '#pop'),
  373. include('fstrings-double'),
  374. (r'\n', String.Double)
  375. ],
  376. 'tsqf': [
  377. (r"'''", String.Single, '#pop'),
  378. include('fstrings-single'),
  379. (r'\n', String.Single)
  380. ],
  381. 'tdqs': [
  382. (r'"""', String.Double, '#pop'),
  383. include('strings-double'),
  384. (r'\n', String.Double)
  385. ],
  386. 'tsqs': [
  387. (r"'''", String.Single, '#pop'),
  388. include('strings-single'),
  389. (r'\n', String.Single)
  390. ],
  391. }
  392. def analyse_text(text):
  393. return shebang_matches(text, r'pythonw?(3(\.\d)?)?') or \
  394. 'import ' in text[:1000]
  395. Python3Lexer = PythonLexer
  396. class Python2Lexer(RegexLexer):
  397. """
  398. For Python 2.x source code.
  399. .. versionchanged:: 2.5
  400. This class has been renamed from ``PythonLexer``. ``PythonLexer`` now
  401. refers to the Python 3 variant. File name patterns like ``*.py`` have
  402. been moved to Python 3 as well.
  403. """
  404. name = 'Python 2.x'
  405. url = 'https://www.python.org'
  406. aliases = ['python2', 'py2']
  407. filenames = [] # now taken over by PythonLexer (3.x)
  408. mimetypes = ['text/x-python2', 'application/x-python2']
  409. version_added = ''
  410. def innerstring_rules(ttype):
  411. return [
  412. # the old style '%s' % (...) string formatting
  413. (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
  414. '[hlL]?[E-GXc-giorsux%]', String.Interpol),
  415. # backslashes, quotes and formatting signs must be parsed one at a time
  416. (r'[^\\\'"%\n]+', ttype),
  417. (r'[\'"\\]', ttype),
  418. # unhandled string formatting sign
  419. (r'%', ttype),
  420. # newlines are an error (use "nl" state)
  421. ]
  422. tokens = {
  423. 'root': [
  424. (r'\n', Whitespace),
  425. (r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")',
  426. bygroups(Whitespace, String.Affix, String.Doc)),
  427. (r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')",
  428. bygroups(Whitespace, String.Affix, String.Doc)),
  429. (r'[^\S\n]+', Text),
  430. (r'\A#!.+$', Comment.Hashbang),
  431. (r'#.*$', Comment.Single),
  432. (r'[]{}:(),;[]', Punctuation),
  433. (r'\\\n', Text),
  434. (r'\\', Text),
  435. (r'(in|is|and|or|not)\b', Operator.Word),
  436. (r'!=|==|<<|>>|[-~+/*%=<>&^|.]', Operator),
  437. include('keywords'),
  438. (r'(def)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'funcname'),
  439. (r'(class)((?:\s|\\\s)+)', bygroups(Keyword, Text), 'classname'),
  440. (r'(from)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text),
  441. 'fromimport'),
  442. (r'(import)((?:\s|\\\s)+)', bygroups(Keyword.Namespace, Text),
  443. 'import'),
  444. include('builtins'),
  445. include('magicfuncs'),
  446. include('magicvars'),
  447. include('backtick'),
  448. ('([rR]|[uUbB][rR]|[rR][uUbB])(""")',
  449. bygroups(String.Affix, String.Double), 'tdqs'),
  450. ("([rR]|[uUbB][rR]|[rR][uUbB])(''')",
  451. bygroups(String.Affix, String.Single), 'tsqs'),
  452. ('([rR]|[uUbB][rR]|[rR][uUbB])(")',
  453. bygroups(String.Affix, String.Double), 'dqs'),
  454. ("([rR]|[uUbB][rR]|[rR][uUbB])(')",
  455. bygroups(String.Affix, String.Single), 'sqs'),
  456. ('([uUbB]?)(""")', bygroups(String.Affix, String.Double),
  457. combined('stringescape', 'tdqs')),
  458. ("([uUbB]?)(''')", bygroups(String.Affix, String.Single),
  459. combined('stringescape', 'tsqs')),
  460. ('([uUbB]?)(")', bygroups(String.Affix, String.Double),
  461. combined('stringescape', 'dqs')),
  462. ("([uUbB]?)(')", bygroups(String.Affix, String.Single),
  463. combined('stringescape', 'sqs')),
  464. include('name'),
  465. include('numbers'),
  466. ],
  467. 'keywords': [
  468. (words((
  469. 'assert', 'break', 'continue', 'del', 'elif', 'else', 'except',
  470. 'exec', 'finally', 'for', 'global', 'if', 'lambda', 'pass',
  471. 'print', 'raise', 'return', 'try', 'while', 'yield',
  472. 'yield from', 'as', 'with'), suffix=r'\b'),
  473. Keyword),
  474. ],
  475. 'builtins': [
  476. (words((
  477. '__import__', 'abs', 'all', 'any', 'apply', 'basestring', 'bin',
  478. 'bool', 'buffer', 'bytearray', 'bytes', 'callable', 'chr', 'classmethod',
  479. 'cmp', 'coerce', 'compile', 'complex', 'delattr', 'dict', 'dir', 'divmod',
  480. 'enumerate', 'eval', 'execfile', 'exit', 'file', 'filter', 'float',
  481. 'frozenset', 'getattr', 'globals', 'hasattr', 'hash', 'hex', 'id',
  482. 'input', 'int', 'intern', 'isinstance', 'issubclass', 'iter', 'len',
  483. 'list', 'locals', 'long', 'map', 'max', 'min', 'next', 'object',
  484. 'oct', 'open', 'ord', 'pow', 'property', 'range', 'raw_input', 'reduce',
  485. 'reload', 'repr', 'reversed', 'round', 'set', 'setattr', 'slice',
  486. 'sorted', 'staticmethod', 'str', 'sum', 'super', 'tuple', 'type',
  487. 'unichr', 'unicode', 'vars', 'xrange', 'zip'),
  488. prefix=r'(?<!\.)', suffix=r'\b'),
  489. Name.Builtin),
  490. (r'(?<!\.)(self|None|Ellipsis|NotImplemented|False|True|cls'
  491. r')\b', Name.Builtin.Pseudo),
  492. (words((
  493. 'ArithmeticError', 'AssertionError', 'AttributeError',
  494. 'BaseException', 'DeprecationWarning', 'EOFError', 'EnvironmentError',
  495. 'Exception', 'FloatingPointError', 'FutureWarning', 'GeneratorExit',
  496. 'IOError', 'ImportError', 'ImportWarning', 'IndentationError',
  497. 'IndexError', 'KeyError', 'KeyboardInterrupt', 'LookupError',
  498. 'MemoryError', 'NameError',
  499. 'NotImplementedError', 'OSError', 'OverflowError', 'OverflowWarning',
  500. 'PendingDeprecationWarning', 'ReferenceError',
  501. 'RuntimeError', 'RuntimeWarning', 'StandardError', 'StopIteration',
  502. 'SyntaxError', 'SyntaxWarning', 'SystemError', 'SystemExit',
  503. 'TabError', 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError',
  504. 'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError',
  505. 'UnicodeWarning', 'UserWarning', 'ValueError', 'VMSError', 'Warning',
  506. 'WindowsError', 'ZeroDivisionError'), prefix=r'(?<!\.)', suffix=r'\b'),
  507. Name.Exception),
  508. ],
  509. 'magicfuncs': [
  510. (words((
  511. '__abs__', '__add__', '__and__', '__call__', '__cmp__', '__coerce__',
  512. '__complex__', '__contains__', '__del__', '__delattr__', '__delete__',
  513. '__delitem__', '__delslice__', '__div__', '__divmod__', '__enter__',
  514. '__eq__', '__exit__', '__float__', '__floordiv__', '__ge__', '__get__',
  515. '__getattr__', '__getattribute__', '__getitem__', '__getslice__', '__gt__',
  516. '__hash__', '__hex__', '__iadd__', '__iand__', '__idiv__', '__ifloordiv__',
  517. '__ilshift__', '__imod__', '__imul__', '__index__', '__init__',
  518. '__instancecheck__', '__int__', '__invert__', '__iop__', '__ior__',
  519. '__ipow__', '__irshift__', '__isub__', '__iter__', '__itruediv__',
  520. '__ixor__', '__le__', '__len__', '__long__', '__lshift__', '__lt__',
  521. '__missing__', '__mod__', '__mul__', '__ne__', '__neg__', '__new__',
  522. '__nonzero__', '__oct__', '__op__', '__or__', '__pos__', '__pow__',
  523. '__radd__', '__rand__', '__rcmp__', '__rdiv__', '__rdivmod__', '__repr__',
  524. '__reversed__', '__rfloordiv__', '__rlshift__', '__rmod__', '__rmul__',
  525. '__rop__', '__ror__', '__rpow__', '__rrshift__', '__rshift__', '__rsub__',
  526. '__rtruediv__', '__rxor__', '__set__', '__setattr__', '__setitem__',
  527. '__setslice__', '__str__', '__sub__', '__subclasscheck__', '__truediv__',
  528. '__unicode__', '__xor__'), suffix=r'\b'),
  529. Name.Function.Magic),
  530. ],
  531. 'magicvars': [
  532. (words((
  533. '__bases__', '__class__', '__closure__', '__code__', '__defaults__',
  534. '__dict__', '__doc__', '__file__', '__func__', '__globals__',
  535. '__metaclass__', '__module__', '__mro__', '__name__', '__self__',
  536. '__slots__', '__weakref__'),
  537. suffix=r'\b'),
  538. Name.Variable.Magic),
  539. ],
  540. 'numbers': [
  541. (r'(\d+\.\d*|\d*\.\d+)([eE][+-]?[0-9]+)?j?', Number.Float),
  542. (r'\d+[eE][+-]?[0-9]+j?', Number.Float),
  543. (r'0[0-7]+j?', Number.Oct),
  544. (r'0[bB][01]+', Number.Bin),
  545. (r'0[xX][a-fA-F0-9]+', Number.Hex),
  546. (r'\d+L', Number.Integer.Long),
  547. (r'\d+j?', Number.Integer)
  548. ],
  549. 'backtick': [
  550. ('`.*?`', String.Backtick),
  551. ],
  552. 'name': [
  553. (r'@[\w.]+', Name.Decorator),
  554. (r'[a-zA-Z_]\w*', Name),
  555. ],
  556. 'funcname': [
  557. include('magicfuncs'),
  558. (r'[a-zA-Z_]\w*', Name.Function, '#pop'),
  559. default('#pop'),
  560. ],
  561. 'classname': [
  562. (r'[a-zA-Z_]\w*', Name.Class, '#pop')
  563. ],
  564. 'import': [
  565. (r'(?:[ \t]|\\\n)+', Text),
  566. (r'as\b', Keyword.Namespace),
  567. (r',', Operator),
  568. (r'[a-zA-Z_][\w.]*', Name.Namespace),
  569. default('#pop') # all else: go back
  570. ],
  571. 'fromimport': [
  572. (r'(?:[ \t]|\\\n)+', Text),
  573. (r'import\b', Keyword.Namespace, '#pop'),
  574. # if None occurs here, it's "raise x from None", since None can
  575. # never be a module name
  576. (r'None\b', Name.Builtin.Pseudo, '#pop'),
  577. # sadly, in "raise x from y" y will be highlighted as namespace too
  578. (r'[a-zA-Z_.][\w.]*', Name.Namespace),
  579. # anything else here also means "raise x from y" and is therefore
  580. # not an error
  581. default('#pop'),
  582. ],
  583. 'stringescape': [
  584. (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
  585. r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
  586. ],
  587. 'strings-single': innerstring_rules(String.Single),
  588. 'strings-double': innerstring_rules(String.Double),
  589. 'dqs': [
  590. (r'"', String.Double, '#pop'),
  591. (r'\\\\|\\"|\\\n', String.Escape), # included here for raw strings
  592. include('strings-double')
  593. ],
  594. 'sqs': [
  595. (r"'", String.Single, '#pop'),
  596. (r"\\\\|\\'|\\\n", String.Escape), # included here for raw strings
  597. include('strings-single')
  598. ],
  599. 'tdqs': [
  600. (r'"""', String.Double, '#pop'),
  601. include('strings-double'),
  602. (r'\n', String.Double)
  603. ],
  604. 'tsqs': [
  605. (r"'''", String.Single, '#pop'),
  606. include('strings-single'),
  607. (r'\n', String.Single)
  608. ],
  609. }
  610. def analyse_text(text):
  611. return shebang_matches(text, r'pythonw?2(\.\d)?')
  612. class _PythonConsoleLexerBase(RegexLexer):
  613. name = 'Python console session'
  614. aliases = ['pycon', 'python-console']
  615. mimetypes = ['text/x-python-doctest']
  616. """Auxiliary lexer for `PythonConsoleLexer`.
  617. Code tokens are output as ``Token.Other.Code``, traceback tokens as
  618. ``Token.Other.Traceback``.
  619. """
  620. tokens = {
  621. 'root': [
  622. (r'(>>> )(.*\n)', bygroups(Generic.Prompt, Other.Code), 'continuations'),
  623. # This happens, e.g., when tracebacks are embedded in documentation;
  624. # trailing whitespaces are often stripped in such contexts.
  625. (r'(>>>)(\n)', bygroups(Generic.Prompt, Whitespace)),
  626. (r'(\^C)?Traceback \(most recent call last\):\n', Other.Traceback, 'traceback'),
  627. # SyntaxError starts with this
  628. (r' File "[^"]+", line \d+', Other.Traceback, 'traceback'),
  629. (r'.*\n', Generic.Output),
  630. ],
  631. 'continuations': [
  632. (r'(\.\.\. )(.*\n)', bygroups(Generic.Prompt, Other.Code)),
  633. # See above.
  634. (r'(\.\.\.)(\n)', bygroups(Generic.Prompt, Whitespace)),
  635. default('#pop'),
  636. ],
  637. 'traceback': [
  638. # As soon as we see a traceback, consume everything until the next
  639. # >>> prompt.
  640. (r'(?=>>>( |$))', Text, '#pop'),
  641. (r'(KeyboardInterrupt)(\n)', bygroups(Name.Class, Whitespace)),
  642. (r'.*\n', Other.Traceback),
  643. ],
  644. }
  645. class PythonConsoleLexer(DelegatingLexer):
  646. """
  647. For Python console output or doctests, such as:
  648. .. sourcecode:: pycon
  649. >>> a = 'foo'
  650. >>> print(a)
  651. foo
  652. >>> 1 / 0
  653. Traceback (most recent call last):
  654. File "<stdin>", line 1, in <module>
  655. ZeroDivisionError: integer division or modulo by zero
  656. Additional options:
  657. `python3`
  658. Use Python 3 lexer for code. Default is ``True``.
  659. .. versionadded:: 1.0
  660. .. versionchanged:: 2.5
  661. Now defaults to ``True``.
  662. """
  663. name = 'Python console session'
  664. aliases = ['pycon', 'python-console']
  665. mimetypes = ['text/x-python-doctest']
  666. url = 'https://python.org'
  667. version_added = ''
  668. def __init__(self, **options):
  669. python3 = get_bool_opt(options, 'python3', True)
  670. if python3:
  671. pylexer = PythonLexer
  672. tblexer = PythonTracebackLexer
  673. else:
  674. pylexer = Python2Lexer
  675. tblexer = Python2TracebackLexer
  676. # We have two auxiliary lexers. Use DelegatingLexer twice with
  677. # different tokens. TODO: DelegatingLexer should support this
  678. # directly, by accepting a tuplet of auxiliary lexers and a tuple of
  679. # distinguishing tokens. Then we wouldn't need this intermediary
  680. # class.
  681. class _ReplaceInnerCode(DelegatingLexer):
  682. def __init__(self, **options):
  683. super().__init__(pylexer, _PythonConsoleLexerBase, Other.Code, **options)
  684. super().__init__(tblexer, _ReplaceInnerCode, Other.Traceback, **options)
  685. class PythonTracebackLexer(RegexLexer):
  686. """
  687. For Python 3.x tracebacks, with support for chained exceptions.
  688. .. versionchanged:: 2.5
  689. This is now the default ``PythonTracebackLexer``. It is still available
  690. as the alias ``Python3TracebackLexer``.
  691. """
  692. name = 'Python Traceback'
  693. aliases = ['pytb', 'py3tb']
  694. filenames = ['*.pytb', '*.py3tb']
  695. mimetypes = ['text/x-python-traceback', 'text/x-python3-traceback']
  696. url = 'https://python.org'
  697. version_added = '1.0'
  698. tokens = {
  699. 'root': [
  700. (r'\n', Whitespace),
  701. (r'^(\^C)?Traceback \(most recent call last\):\n', Generic.Traceback, 'intb'),
  702. (r'^During handling of the above exception, another '
  703. r'exception occurred:\n\n', Generic.Traceback),
  704. (r'^The above exception was the direct cause of the '
  705. r'following exception:\n\n', Generic.Traceback),
  706. (r'^(?= File "[^"]+", line \d+)', Generic.Traceback, 'intb'),
  707. (r'^.*\n', Other),
  708. ],
  709. 'intb': [
  710. (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)',
  711. bygroups(Text, Name.Builtin, Text, Number, Text, Name, Whitespace)),
  712. (r'^( File )("[^"]+")(, line )(\d+)(\n)',
  713. bygroups(Text, Name.Builtin, Text, Number, Whitespace)),
  714. (r'^( )(.+)(\n)',
  715. bygroups(Whitespace, using(PythonLexer), Whitespace), 'markers'),
  716. (r'^([ \t]*)(\.\.\.)(\n)',
  717. bygroups(Whitespace, Comment, Whitespace)), # for doctests...
  718. (r'^([^:]+)(: )(.+)(\n)',
  719. bygroups(Generic.Error, Text, Name, Whitespace), '#pop'),
  720. (r'^([a-zA-Z_][\w.]*)(:?\n)',
  721. bygroups(Generic.Error, Whitespace), '#pop'),
  722. default('#pop'),
  723. ],
  724. 'markers': [
  725. # Either `PEP 657 <https://www.python.org/dev/peps/pep-0657/>`
  726. # error locations in Python 3.11+, or single-caret markers
  727. # for syntax errors before that.
  728. (r'^( {4,})([~^]+)(\n)',
  729. bygroups(Whitespace, Punctuation.Marker, Whitespace),
  730. '#pop'),
  731. default('#pop'),
  732. ],
  733. }
  734. Python3TracebackLexer = PythonTracebackLexer
  735. class Python2TracebackLexer(RegexLexer):
  736. """
  737. For Python tracebacks.
  738. .. versionchanged:: 2.5
  739. This class has been renamed from ``PythonTracebackLexer``.
  740. ``PythonTracebackLexer`` now refers to the Python 3 variant.
  741. """
  742. name = 'Python 2.x Traceback'
  743. aliases = ['py2tb']
  744. filenames = ['*.py2tb']
  745. mimetypes = ['text/x-python2-traceback']
  746. url = 'https://python.org'
  747. version_added = '0.7'
  748. tokens = {
  749. 'root': [
  750. # Cover both (most recent call last) and (innermost last)
  751. # The optional ^C allows us to catch keyboard interrupt signals.
  752. (r'^(\^C)?(Traceback.*\n)',
  753. bygroups(Text, Generic.Traceback), 'intb'),
  754. # SyntaxError starts with this.
  755. (r'^(?= File "[^"]+", line \d+)', Generic.Traceback, 'intb'),
  756. (r'^.*\n', Other),
  757. ],
  758. 'intb': [
  759. (r'^( File )("[^"]+")(, line )(\d+)(, in )(.+)(\n)',
  760. bygroups(Text, Name.Builtin, Text, Number, Text, Name, Whitespace)),
  761. (r'^( File )("[^"]+")(, line )(\d+)(\n)',
  762. bygroups(Text, Name.Builtin, Text, Number, Whitespace)),
  763. (r'^( )(.+)(\n)',
  764. bygroups(Text, using(Python2Lexer), Whitespace), 'marker'),
  765. (r'^([ \t]*)(\.\.\.)(\n)',
  766. bygroups(Text, Comment, Whitespace)), # for doctests...
  767. (r'^([^:]+)(: )(.+)(\n)',
  768. bygroups(Generic.Error, Text, Name, Whitespace), '#pop'),
  769. (r'^([a-zA-Z_]\w*)(:?\n)',
  770. bygroups(Generic.Error, Whitespace), '#pop')
  771. ],
  772. 'marker': [
  773. # For syntax errors.
  774. (r'( {4,})(\^)', bygroups(Text, Punctuation.Marker), '#pop'),
  775. default('#pop'),
  776. ],
  777. }
  778. class CythonLexer(RegexLexer):
  779. """
  780. For Pyrex and Cython source code.
  781. """
  782. name = 'Cython'
  783. url = 'https://cython.org'
  784. aliases = ['cython', 'pyx', 'pyrex']
  785. filenames = ['*.pyx', '*.pxd', '*.pxi']
  786. mimetypes = ['text/x-cython', 'application/x-cython']
  787. version_added = '1.1'
  788. tokens = {
  789. 'root': [
  790. (r'\n', Whitespace),
  791. (r'^(\s*)("""(?:.|\n)*?""")', bygroups(Whitespace, String.Doc)),
  792. (r"^(\s*)('''(?:.|\n)*?''')", bygroups(Whitespace, String.Doc)),
  793. (r'[^\S\n]+', Text),
  794. (r'#.*$', Comment),
  795. (r'[]{}:(),;[]', Punctuation),
  796. (r'\\\n', Whitespace),
  797. (r'\\', Text),
  798. (r'(in|is|and|or|not)\b', Operator.Word),
  799. (r'(<)([a-zA-Z0-9.?]+)(>)',
  800. bygroups(Punctuation, Keyword.Type, Punctuation)),
  801. (r'!=|==|<<|>>|[-~+/*%=<>&^|.?]', Operator),
  802. (r'(from)(\d+)(<=)(\s+)(<)(\d+)(:)',
  803. bygroups(Keyword, Number.Integer, Operator, Name, Operator,
  804. Name, Punctuation)),
  805. include('keywords'),
  806. (r'(def|property)(\s+)', bygroups(Keyword, Text), 'funcname'),
  807. (r'(cp?def)(\s+)', bygroups(Keyword, Text), 'cdef'),
  808. # (should actually start a block with only cdefs)
  809. (r'(cdef)(:)', bygroups(Keyword, Punctuation)),
  810. (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'classname'),
  811. (r'(from)(\s+)', bygroups(Keyword, Text), 'fromimport'),
  812. (r'(c?import)(\s+)', bygroups(Keyword, Text), 'import'),
  813. include('builtins'),
  814. include('backtick'),
  815. ('(?:[rR]|[uU][rR]|[rR][uU])"""', String, 'tdqs'),
  816. ("(?:[rR]|[uU][rR]|[rR][uU])'''", String, 'tsqs'),
  817. ('(?:[rR]|[uU][rR]|[rR][uU])"', String, 'dqs'),
  818. ("(?:[rR]|[uU][rR]|[rR][uU])'", String, 'sqs'),
  819. ('[uU]?"""', String, combined('stringescape', 'tdqs')),
  820. ("[uU]?'''", String, combined('stringescape', 'tsqs')),
  821. ('[uU]?"', String, combined('stringescape', 'dqs')),
  822. ("[uU]?'", String, combined('stringescape', 'sqs')),
  823. include('name'),
  824. include('numbers'),
  825. ],
  826. 'keywords': [
  827. (words((
  828. 'assert', 'async', 'await', 'break', 'by', 'continue', 'ctypedef', 'del', 'elif',
  829. 'else', 'except', 'except?', 'exec', 'finally', 'for', 'fused', 'gil',
  830. 'global', 'if', 'include', 'lambda', 'nogil', 'pass', 'print',
  831. 'raise', 'return', 'try', 'while', 'yield', 'as', 'with'), suffix=r'\b'),
  832. Keyword),
  833. (r'(DEF|IF|ELIF|ELSE)\b', Comment.Preproc),
  834. ],
  835. 'builtins': [
  836. (words((
  837. '__import__', 'abs', 'all', 'any', 'apply', 'basestring', 'bin', 'bint',
  838. 'bool', 'buffer', 'bytearray', 'bytes', 'callable', 'chr',
  839. 'classmethod', 'cmp', 'coerce', 'compile', 'complex', 'delattr',
  840. 'dict', 'dir', 'divmod', 'enumerate', 'eval', 'execfile', 'exit',
  841. 'file', 'filter', 'float', 'frozenset', 'getattr', 'globals',
  842. 'hasattr', 'hash', 'hex', 'id', 'input', 'int', 'intern', 'isinstance',
  843. 'issubclass', 'iter', 'len', 'list', 'locals', 'long', 'map', 'max',
  844. 'min', 'next', 'object', 'oct', 'open', 'ord', 'pow', 'property', 'Py_ssize_t',
  845. 'range', 'raw_input', 'reduce', 'reload', 'repr', 'reversed',
  846. 'round', 'set', 'setattr', 'slice', 'sorted', 'staticmethod',
  847. 'str', 'sum', 'super', 'tuple', 'type', 'unichr', 'unicode', 'unsigned',
  848. 'vars', 'xrange', 'zip'), prefix=r'(?<!\.)', suffix=r'\b'),
  849. Name.Builtin),
  850. (r'(?<!\.)(self|None|Ellipsis|NotImplemented|False|True|NULL'
  851. r')\b', Name.Builtin.Pseudo),
  852. (words((
  853. 'ArithmeticError', 'AssertionError', 'AttributeError',
  854. 'BaseException', 'DeprecationWarning', 'EOFError', 'EnvironmentError',
  855. 'Exception', 'FloatingPointError', 'FutureWarning', 'GeneratorExit',
  856. 'IOError', 'ImportError', 'ImportWarning', 'IndentationError',
  857. 'IndexError', 'KeyError', 'KeyboardInterrupt', 'LookupError',
  858. 'MemoryError', 'NameError', 'NotImplemented', 'NotImplementedError',
  859. 'OSError', 'OverflowError', 'OverflowWarning',
  860. 'PendingDeprecationWarning', 'ReferenceError', 'RuntimeError',
  861. 'RuntimeWarning', 'StandardError', 'StopIteration', 'SyntaxError',
  862. 'SyntaxWarning', 'SystemError', 'SystemExit', 'TabError',
  863. 'TypeError', 'UnboundLocalError', 'UnicodeDecodeError',
  864. 'UnicodeEncodeError', 'UnicodeError', 'UnicodeTranslateError',
  865. 'UnicodeWarning', 'UserWarning', 'ValueError', 'Warning',
  866. 'ZeroDivisionError'), prefix=r'(?<!\.)', suffix=r'\b'),
  867. Name.Exception),
  868. ],
  869. 'numbers': [
  870. (r'(\d+\.?\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float),
  871. (r'0\d+', Number.Oct),
  872. (r'0[xX][a-fA-F0-9]+', Number.Hex),
  873. (r'\d+L', Number.Integer.Long),
  874. (r'\d+', Number.Integer)
  875. ],
  876. 'backtick': [
  877. ('`.*?`', String.Backtick),
  878. ],
  879. 'name': [
  880. (r'@\w+', Name.Decorator),
  881. (r'[a-zA-Z_]\w*', Name),
  882. ],
  883. 'funcname': [
  884. (r'[a-zA-Z_]\w*', Name.Function, '#pop')
  885. ],
  886. 'cdef': [
  887. (r'(public|readonly|extern|api|inline)\b', Keyword.Reserved),
  888. (r'(struct|enum|union|class)\b', Keyword),
  889. (r'([a-zA-Z_]\w*)(\s*)(?=[(:#=]|$)',
  890. bygroups(Name.Function, Text), '#pop'),
  891. (r'([a-zA-Z_]\w*)(\s*)(,)',
  892. bygroups(Name.Function, Text, Punctuation)),
  893. (r'from\b', Keyword, '#pop'),
  894. (r'as\b', Keyword),
  895. (r':', Punctuation, '#pop'),
  896. (r'(?=["\'])', Text, '#pop'),
  897. (r'[a-zA-Z_]\w*', Keyword.Type),
  898. (r'.', Text),
  899. ],
  900. 'classname': [
  901. (r'[a-zA-Z_]\w*', Name.Class, '#pop')
  902. ],
  903. 'import': [
  904. (r'(\s+)(as)(\s+)', bygroups(Text, Keyword, Text)),
  905. (r'[a-zA-Z_][\w.]*', Name.Namespace),
  906. (r'(\s*)(,)(\s*)', bygroups(Text, Operator, Text)),
  907. default('#pop') # all else: go back
  908. ],
  909. 'fromimport': [
  910. (r'(\s+)(c?import)\b', bygroups(Text, Keyword), '#pop'),
  911. (r'[a-zA-Z_.][\w.]*', Name.Namespace),
  912. # ``cdef foo from "header"``, or ``for foo from 0 < i < 10``
  913. default('#pop'),
  914. ],
  915. 'stringescape': [
  916. (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
  917. r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
  918. ],
  919. 'strings': [
  920. (r'%(\([a-zA-Z0-9]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
  921. '[hlL]?[E-GXc-giorsux%]', String.Interpol),
  922. (r'[^\\\'"%\n]+', String),
  923. # quotes, percents and backslashes must be parsed one at a time
  924. (r'[\'"\\]', String),
  925. # unhandled string formatting sign
  926. (r'%', String)
  927. # newlines are an error (use "nl" state)
  928. ],
  929. 'nl': [
  930. (r'\n', String)
  931. ],
  932. 'dqs': [
  933. (r'"', String, '#pop'),
  934. (r'\\\\|\\"|\\\n', String.Escape), # included here again for raw strings
  935. include('strings')
  936. ],
  937. 'sqs': [
  938. (r"'", String, '#pop'),
  939. (r"\\\\|\\'|\\\n", String.Escape), # included here again for raw strings
  940. include('strings')
  941. ],
  942. 'tdqs': [
  943. (r'"""', String, '#pop'),
  944. include('strings'),
  945. include('nl')
  946. ],
  947. 'tsqs': [
  948. (r"'''", String, '#pop'),
  949. include('strings'),
  950. include('nl')
  951. ],
  952. }
  953. class DgLexer(RegexLexer):
  954. """
  955. Lexer for dg,
  956. a functional and object-oriented programming language
  957. running on the CPython 3 VM.
  958. """
  959. name = 'dg'
  960. aliases = ['dg']
  961. filenames = ['*.dg']
  962. mimetypes = ['text/x-dg']
  963. url = 'http://pyos.github.io/dg'
  964. version_added = '1.6'
  965. tokens = {
  966. 'root': [
  967. (r'\s+', Text),
  968. (r'#.*?$', Comment.Single),
  969. (r'(?i)0b[01]+', Number.Bin),
  970. (r'(?i)0o[0-7]+', Number.Oct),
  971. (r'(?i)0x[0-9a-f]+', Number.Hex),
  972. (r'(?i)[+-]?[0-9]+\.[0-9]+(e[+-]?[0-9]+)?j?', Number.Float),
  973. (r'(?i)[+-]?[0-9]+e[+-]?\d+j?', Number.Float),
  974. (r'(?i)[+-]?[0-9]+j?', Number.Integer),
  975. (r"(?i)(br|r?b?)'''", String, combined('stringescape', 'tsqs', 'string')),
  976. (r'(?i)(br|r?b?)"""', String, combined('stringescape', 'tdqs', 'string')),
  977. (r"(?i)(br|r?b?)'", String, combined('stringescape', 'sqs', 'string')),
  978. (r'(?i)(br|r?b?)"', String, combined('stringescape', 'dqs', 'string')),
  979. (r"`\w+'*`", Operator),
  980. (r'\b(and|in|is|or|where)\b', Operator.Word),
  981. (r'[!$%&*+\-./:<-@\\^|~;,]+', Operator),
  982. (words((
  983. 'bool', 'bytearray', 'bytes', 'classmethod', 'complex', 'dict', 'dict\'',
  984. 'float', 'frozenset', 'int', 'list', 'list\'', 'memoryview', 'object',
  985. 'property', 'range', 'set', 'set\'', 'slice', 'staticmethod', 'str',
  986. 'super', 'tuple', 'tuple\'', 'type'),
  987. prefix=r'(?<!\.)', suffix=r'(?![\'\w])'),
  988. Name.Builtin),
  989. (words((
  990. '__import__', 'abs', 'all', 'any', 'bin', 'bind', 'chr', 'cmp', 'compile',
  991. 'complex', 'delattr', 'dir', 'divmod', 'drop', 'dropwhile', 'enumerate',
  992. 'eval', 'exhaust', 'filter', 'flip', 'foldl1?', 'format', 'fst',
  993. 'getattr', 'globals', 'hasattr', 'hash', 'head', 'hex', 'id', 'init',
  994. 'input', 'isinstance', 'issubclass', 'iter', 'iterate', 'last', 'len',
  995. 'locals', 'map', 'max', 'min', 'next', 'oct', 'open', 'ord', 'pow',
  996. 'print', 'repr', 'reversed', 'round', 'setattr', 'scanl1?', 'snd',
  997. 'sorted', 'sum', 'tail', 'take', 'takewhile', 'vars', 'zip'),
  998. prefix=r'(?<!\.)', suffix=r'(?![\'\w])'),
  999. Name.Builtin),
  1000. (r"(?<!\.)(self|Ellipsis|NotImplemented|None|True|False)(?!['\w])",
  1001. Name.Builtin.Pseudo),
  1002. (r"(?<!\.)[A-Z]\w*(Error|Exception|Warning)'*(?!['\w])",
  1003. Name.Exception),
  1004. (r"(?<!\.)(Exception|GeneratorExit|KeyboardInterrupt|StopIteration|"
  1005. r"SystemExit)(?!['\w])", Name.Exception),
  1006. (r"(?<![\w.])(except|finally|for|if|import|not|otherwise|raise|"
  1007. r"subclass|while|with|yield)(?!['\w])", Keyword.Reserved),
  1008. (r"[A-Z_]+'*(?!['\w])", Name),
  1009. (r"[A-Z]\w+'*(?!['\w])", Keyword.Type),
  1010. (r"\w+'*", Name),
  1011. (r'[()]', Punctuation),
  1012. (r'.', Error),
  1013. ],
  1014. 'stringescape': [
  1015. (r'\\([\\abfnrtv"\']|\n|N\{.*?\}|u[a-fA-F0-9]{4}|'
  1016. r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape)
  1017. ],
  1018. 'string': [
  1019. (r'%(\(\w+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?'
  1020. '[hlL]?[E-GXc-giorsux%]', String.Interpol),
  1021. (r'[^\\\'"%\n]+', String),
  1022. # quotes, percents and backslashes must be parsed one at a time
  1023. (r'[\'"\\]', String),
  1024. # unhandled string formatting sign
  1025. (r'%', String),
  1026. (r'\n', String)
  1027. ],
  1028. 'dqs': [
  1029. (r'"', String, '#pop')
  1030. ],
  1031. 'sqs': [
  1032. (r"'", String, '#pop')
  1033. ],
  1034. 'tdqs': [
  1035. (r'"""', String, '#pop')
  1036. ],
  1037. 'tsqs': [
  1038. (r"'''", String, '#pop')
  1039. ],
  1040. }
  1041. class NumPyLexer(PythonLexer):
  1042. """
  1043. A Python lexer recognizing Numerical Python builtins.
  1044. """
  1045. name = 'NumPy'
  1046. url = 'https://numpy.org/'
  1047. aliases = ['numpy']
  1048. version_added = '0.10'
  1049. # override the mimetypes to not inherit them from python
  1050. mimetypes = []
  1051. filenames = []
  1052. EXTRA_KEYWORDS = {
  1053. 'abs', 'absolute', 'accumulate', 'add', 'alen', 'all', 'allclose',
  1054. 'alltrue', 'alterdot', 'amax', 'amin', 'angle', 'any', 'append',
  1055. 'apply_along_axis', 'apply_over_axes', 'arange', 'arccos', 'arccosh',
  1056. 'arcsin', 'arcsinh', 'arctan', 'arctan2', 'arctanh', 'argmax', 'argmin',
  1057. 'argsort', 'argwhere', 'around', 'array', 'array2string', 'array_equal',
  1058. 'array_equiv', 'array_repr', 'array_split', 'array_str', 'arrayrange',
  1059. 'asanyarray', 'asarray', 'asarray_chkfinite', 'ascontiguousarray',
  1060. 'asfarray', 'asfortranarray', 'asmatrix', 'asscalar', 'astype',
  1061. 'atleast_1d', 'atleast_2d', 'atleast_3d', 'average', 'bartlett',
  1062. 'base_repr', 'beta', 'binary_repr', 'bincount', 'binomial',
  1063. 'bitwise_and', 'bitwise_not', 'bitwise_or', 'bitwise_xor', 'blackman',
  1064. 'bmat', 'broadcast', 'byte_bounds', 'bytes', 'byteswap', 'c_',
  1065. 'can_cast', 'ceil', 'choose', 'clip', 'column_stack', 'common_type',
  1066. 'compare_chararrays', 'compress', 'concatenate', 'conj', 'conjugate',
  1067. 'convolve', 'copy', 'corrcoef', 'correlate', 'cos', 'cosh', 'cov',
  1068. 'cross', 'cumprod', 'cumproduct', 'cumsum', 'delete', 'deprecate',
  1069. 'diag', 'diagflat', 'diagonal', 'diff', 'digitize', 'disp', 'divide',
  1070. 'dot', 'dsplit', 'dstack', 'dtype', 'dump', 'dumps', 'ediff1d', 'empty',
  1071. 'empty_like', 'equal', 'exp', 'expand_dims', 'expm1', 'extract', 'eye',
  1072. 'fabs', 'fastCopyAndTranspose', 'fft', 'fftfreq', 'fftshift', 'fill',
  1073. 'finfo', 'fix', 'flat', 'flatnonzero', 'flatten', 'fliplr', 'flipud',
  1074. 'floor', 'floor_divide', 'fmod', 'frexp', 'fromarrays', 'frombuffer',
  1075. 'fromfile', 'fromfunction', 'fromiter', 'frompyfunc', 'fromstring',
  1076. 'generic', 'get_array_wrap', 'get_include', 'get_numarray_include',
  1077. 'get_numpy_include', 'get_printoptions', 'getbuffer', 'getbufsize',
  1078. 'geterr', 'geterrcall', 'geterrobj', 'getfield', 'gradient', 'greater',
  1079. 'greater_equal', 'gumbel', 'hamming', 'hanning', 'histogram',
  1080. 'histogram2d', 'histogramdd', 'hsplit', 'hstack', 'hypot', 'i0',
  1081. 'identity', 'ifft', 'imag', 'index_exp', 'indices', 'inf', 'info',
  1082. 'inner', 'insert', 'int_asbuffer', 'interp', 'intersect1d',
  1083. 'intersect1d_nu', 'inv', 'invert', 'iscomplex', 'iscomplexobj',
  1084. 'isfinite', 'isfortran', 'isinf', 'isnan', 'isneginf', 'isposinf',
  1085. 'isreal', 'isrealobj', 'isscalar', 'issctype', 'issubclass_',
  1086. 'issubdtype', 'issubsctype', 'item', 'itemset', 'iterable', 'ix_',
  1087. 'kaiser', 'kron', 'ldexp', 'left_shift', 'less', 'less_equal', 'lexsort',
  1088. 'linspace', 'load', 'loads', 'loadtxt', 'log', 'log10', 'log1p', 'log2',
  1089. 'logical_and', 'logical_not', 'logical_or', 'logical_xor', 'logspace',
  1090. 'lstsq', 'mat', 'matrix', 'max', 'maximum', 'maximum_sctype',
  1091. 'may_share_memory', 'mean', 'median', 'meshgrid', 'mgrid', 'min',
  1092. 'minimum', 'mintypecode', 'mod', 'modf', 'msort', 'multiply', 'nan',
  1093. 'nan_to_num', 'nanargmax', 'nanargmin', 'nanmax', 'nanmin', 'nansum',
  1094. 'ndenumerate', 'ndim', 'ndindex', 'negative', 'newaxis', 'newbuffer',
  1095. 'newbyteorder', 'nonzero', 'not_equal', 'obj2sctype', 'ogrid', 'ones',
  1096. 'ones_like', 'outer', 'permutation', 'piecewise', 'pinv', 'pkgload',
  1097. 'place', 'poisson', 'poly', 'poly1d', 'polyadd', 'polyder', 'polydiv',
  1098. 'polyfit', 'polyint', 'polymul', 'polysub', 'polyval', 'power', 'prod',
  1099. 'product', 'ptp', 'put', 'putmask', 'r_', 'randint', 'random_integers',
  1100. 'random_sample', 'ranf', 'rank', 'ravel', 'real', 'real_if_close',
  1101. 'recarray', 'reciprocal', 'reduce', 'remainder', 'repeat', 'require',
  1102. 'reshape', 'resize', 'restoredot', 'right_shift', 'rint', 'roll',
  1103. 'rollaxis', 'roots', 'rot90', 'round', 'round_', 'row_stack', 's_',
  1104. 'sample', 'savetxt', 'sctype2char', 'searchsorted', 'seed', 'select',
  1105. 'set_numeric_ops', 'set_printoptions', 'set_string_function',
  1106. 'setbufsize', 'setdiff1d', 'seterr', 'seterrcall', 'seterrobj',
  1107. 'setfield', 'setflags', 'setmember1d', 'setxor1d', 'shape',
  1108. 'show_config', 'shuffle', 'sign', 'signbit', 'sin', 'sinc', 'sinh',
  1109. 'size', 'slice', 'solve', 'sometrue', 'sort', 'sort_complex', 'source',
  1110. 'split', 'sqrt', 'square', 'squeeze', 'standard_normal', 'std',
  1111. 'subtract', 'sum', 'svd', 'swapaxes', 'take', 'tan', 'tanh', 'tensordot',
  1112. 'test', 'tile', 'tofile', 'tolist', 'tostring', 'trace', 'transpose',
  1113. 'trapz', 'tri', 'tril', 'trim_zeros', 'triu', 'true_divide', 'typeDict',
  1114. 'typename', 'uniform', 'union1d', 'unique', 'unique1d', 'unravel_index',
  1115. 'unwrap', 'vander', 'var', 'vdot', 'vectorize', 'view', 'vonmises',
  1116. 'vsplit', 'vstack', 'weibull', 'where', 'who', 'zeros', 'zeros_like'
  1117. }
  1118. def get_tokens_unprocessed(self, text):
  1119. for index, token, value in \
  1120. PythonLexer.get_tokens_unprocessed(self, text):
  1121. if token is Name and value in self.EXTRA_KEYWORDS:
  1122. yield index, Keyword.Pseudo, value
  1123. else:
  1124. yield index, token, value
  1125. def analyse_text(text):
  1126. ltext = text[:1000]
  1127. return (shebang_matches(text, r'pythonw?(3(\.\d)?)?') or
  1128. 'import ' in ltext) \
  1129. and ('import numpy' in ltext or 'from numpy import' in ltext)