pascal.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.pascal
  4. ~~~~~~~~~~~~~~~~~~~~~~
  5. Lexers for Pascal family languages.
  6. :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. import re
  10. from pygments.lexer import Lexer, RegexLexer, include, bygroups, words, \
  11. using, this, default
  12. from pygments.util import get_bool_opt, get_list_opt
  13. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  14. Number, Punctuation, Error
  15. from pygments.scanner import Scanner
  16. # compatibility import
  17. from pygments.lexers.modula2 import Modula2Lexer
  18. __all__ = ['DelphiLexer', 'AdaLexer']
  19. class DelphiLexer(Lexer):
  20. """
  21. For `Delphi <http://www.borland.com/delphi/>`_ (Borland Object Pascal),
  22. Turbo Pascal and Free Pascal source code.
  23. Additional options accepted:
  24. `turbopascal`
  25. Highlight Turbo Pascal specific keywords (default: ``True``).
  26. `delphi`
  27. Highlight Borland Delphi specific keywords (default: ``True``).
  28. `freepascal`
  29. Highlight Free Pascal specific keywords (default: ``True``).
  30. `units`
  31. A list of units that should be considered builtin, supported are
  32. ``System``, ``SysUtils``, ``Classes`` and ``Math``.
  33. Default is to consider all of them builtin.
  34. """
  35. name = 'Delphi'
  36. aliases = ['delphi', 'pas', 'pascal', 'objectpascal']
  37. filenames = ['*.pas', '*.dpr']
  38. mimetypes = ['text/x-pascal']
  39. TURBO_PASCAL_KEYWORDS = (
  40. 'absolute', 'and', 'array', 'asm', 'begin', 'break', 'case',
  41. 'const', 'constructor', 'continue', 'destructor', 'div', 'do',
  42. 'downto', 'else', 'end', 'file', 'for', 'function', 'goto',
  43. 'if', 'implementation', 'in', 'inherited', 'inline', 'interface',
  44. 'label', 'mod', 'nil', 'not', 'object', 'of', 'on', 'operator',
  45. 'or', 'packed', 'procedure', 'program', 'record', 'reintroduce',
  46. 'repeat', 'self', 'set', 'shl', 'shr', 'string', 'then', 'to',
  47. 'type', 'unit', 'until', 'uses', 'var', 'while', 'with', 'xor'
  48. )
  49. DELPHI_KEYWORDS = (
  50. 'as', 'class', 'except', 'exports', 'finalization', 'finally',
  51. 'initialization', 'is', 'library', 'on', 'property', 'raise',
  52. 'threadvar', 'try'
  53. )
  54. FREE_PASCAL_KEYWORDS = (
  55. 'dispose', 'exit', 'false', 'new', 'true'
  56. )
  57. BLOCK_KEYWORDS = {
  58. 'begin', 'class', 'const', 'constructor', 'destructor', 'end',
  59. 'finalization', 'function', 'implementation', 'initialization',
  60. 'label', 'library', 'operator', 'procedure', 'program', 'property',
  61. 'record', 'threadvar', 'type', 'unit', 'uses', 'var'
  62. }
  63. FUNCTION_MODIFIERS = {
  64. 'alias', 'cdecl', 'export', 'inline', 'interrupt', 'nostackframe',
  65. 'pascal', 'register', 'safecall', 'softfloat', 'stdcall',
  66. 'varargs', 'name', 'dynamic', 'near', 'virtual', 'external',
  67. 'override', 'assembler'
  68. }
  69. # XXX: those aren't global. but currently we know no way for defining
  70. # them just for the type context.
  71. DIRECTIVES = {
  72. 'absolute', 'abstract', 'assembler', 'cppdecl', 'default', 'far',
  73. 'far16', 'forward', 'index', 'oldfpccall', 'private', 'protected',
  74. 'published', 'public'
  75. }
  76. BUILTIN_TYPES = {
  77. 'ansichar', 'ansistring', 'bool', 'boolean', 'byte', 'bytebool',
  78. 'cardinal', 'char', 'comp', 'currency', 'double', 'dword',
  79. 'extended', 'int64', 'integer', 'iunknown', 'longbool', 'longint',
  80. 'longword', 'pansichar', 'pansistring', 'pbool', 'pboolean',
  81. 'pbyte', 'pbytearray', 'pcardinal', 'pchar', 'pcomp', 'pcurrency',
  82. 'pdate', 'pdatetime', 'pdouble', 'pdword', 'pextended', 'phandle',
  83. 'pint64', 'pinteger', 'plongint', 'plongword', 'pointer',
  84. 'ppointer', 'pshortint', 'pshortstring', 'psingle', 'psmallint',
  85. 'pstring', 'pvariant', 'pwidechar', 'pwidestring', 'pword',
  86. 'pwordarray', 'pwordbool', 'real', 'real48', 'shortint',
  87. 'shortstring', 'single', 'smallint', 'string', 'tclass', 'tdate',
  88. 'tdatetime', 'textfile', 'thandle', 'tobject', 'ttime', 'variant',
  89. 'widechar', 'widestring', 'word', 'wordbool'
  90. }
  91. BUILTIN_UNITS = {
  92. 'System': (
  93. 'abs', 'acquireexceptionobject', 'addr', 'ansitoutf8',
  94. 'append', 'arctan', 'assert', 'assigned', 'assignfile',
  95. 'beginthread', 'blockread', 'blockwrite', 'break', 'chdir',
  96. 'chr', 'close', 'closefile', 'comptocurrency', 'comptodouble',
  97. 'concat', 'continue', 'copy', 'cos', 'dec', 'delete',
  98. 'dispose', 'doubletocomp', 'endthread', 'enummodules',
  99. 'enumresourcemodules', 'eof', 'eoln', 'erase', 'exceptaddr',
  100. 'exceptobject', 'exclude', 'exit', 'exp', 'filepos', 'filesize',
  101. 'fillchar', 'finalize', 'findclasshinstance', 'findhinstance',
  102. 'findresourcehinstance', 'flush', 'frac', 'freemem',
  103. 'get8087cw', 'getdir', 'getlasterror', 'getmem',
  104. 'getmemorymanager', 'getmodulefilename', 'getvariantmanager',
  105. 'halt', 'hi', 'high', 'inc', 'include', 'initialize', 'insert',
  106. 'int', 'ioresult', 'ismemorymanagerset', 'isvariantmanagerset',
  107. 'length', 'ln', 'lo', 'low', 'mkdir', 'move', 'new', 'odd',
  108. 'olestrtostring', 'olestrtostrvar', 'ord', 'paramcount',
  109. 'paramstr', 'pi', 'pos', 'pred', 'ptr', 'pucs4chars', 'random',
  110. 'randomize', 'read', 'readln', 'reallocmem',
  111. 'releaseexceptionobject', 'rename', 'reset', 'rewrite', 'rmdir',
  112. 'round', 'runerror', 'seek', 'seekeof', 'seekeoln',
  113. 'set8087cw', 'setlength', 'setlinebreakstyle',
  114. 'setmemorymanager', 'setstring', 'settextbuf',
  115. 'setvariantmanager', 'sin', 'sizeof', 'slice', 'sqr', 'sqrt',
  116. 'str', 'stringofchar', 'stringtoolestr', 'stringtowidechar',
  117. 'succ', 'swap', 'trunc', 'truncate', 'typeinfo',
  118. 'ucs4stringtowidestring', 'unicodetoutf8', 'uniquestring',
  119. 'upcase', 'utf8decode', 'utf8encode', 'utf8toansi',
  120. 'utf8tounicode', 'val', 'vararrayredim', 'varclear',
  121. 'widecharlentostring', 'widecharlentostrvar',
  122. 'widechartostring', 'widechartostrvar',
  123. 'widestringtoucs4string', 'write', 'writeln'
  124. ),
  125. 'SysUtils': (
  126. 'abort', 'addexitproc', 'addterminateproc', 'adjustlinebreaks',
  127. 'allocmem', 'ansicomparefilename', 'ansicomparestr',
  128. 'ansicomparetext', 'ansidequotedstr', 'ansiextractquotedstr',
  129. 'ansilastchar', 'ansilowercase', 'ansilowercasefilename',
  130. 'ansipos', 'ansiquotedstr', 'ansisamestr', 'ansisametext',
  131. 'ansistrcomp', 'ansistricomp', 'ansistrlastchar', 'ansistrlcomp',
  132. 'ansistrlicomp', 'ansistrlower', 'ansistrpos', 'ansistrrscan',
  133. 'ansistrscan', 'ansistrupper', 'ansiuppercase',
  134. 'ansiuppercasefilename', 'appendstr', 'assignstr', 'beep',
  135. 'booltostr', 'bytetocharindex', 'bytetocharlen', 'bytetype',
  136. 'callterminateprocs', 'changefileext', 'charlength',
  137. 'chartobyteindex', 'chartobytelen', 'comparemem', 'comparestr',
  138. 'comparetext', 'createdir', 'createguid', 'currentyear',
  139. 'currtostr', 'currtostrf', 'date', 'datetimetofiledate',
  140. 'datetimetostr', 'datetimetostring', 'datetimetosystemtime',
  141. 'datetimetotimestamp', 'datetostr', 'dayofweek', 'decodedate',
  142. 'decodedatefully', 'decodetime', 'deletefile', 'directoryexists',
  143. 'diskfree', 'disksize', 'disposestr', 'encodedate', 'encodetime',
  144. 'exceptionerrormessage', 'excludetrailingbackslash',
  145. 'excludetrailingpathdelimiter', 'expandfilename',
  146. 'expandfilenamecase', 'expanduncfilename', 'extractfiledir',
  147. 'extractfiledrive', 'extractfileext', 'extractfilename',
  148. 'extractfilepath', 'extractrelativepath', 'extractshortpathname',
  149. 'fileage', 'fileclose', 'filecreate', 'filedatetodatetime',
  150. 'fileexists', 'filegetattr', 'filegetdate', 'fileisreadonly',
  151. 'fileopen', 'fileread', 'filesearch', 'fileseek', 'filesetattr',
  152. 'filesetdate', 'filesetreadonly', 'filewrite', 'finalizepackage',
  153. 'findclose', 'findcmdlineswitch', 'findfirst', 'findnext',
  154. 'floattocurr', 'floattodatetime', 'floattodecimal', 'floattostr',
  155. 'floattostrf', 'floattotext', 'floattotextfmt', 'fmtloadstr',
  156. 'fmtstr', 'forcedirectories', 'format', 'formatbuf', 'formatcurr',
  157. 'formatdatetime', 'formatfloat', 'freeandnil', 'getcurrentdir',
  158. 'getenvironmentvariable', 'getfileversion', 'getformatsettings',
  159. 'getlocaleformatsettings', 'getmodulename', 'getpackagedescription',
  160. 'getpackageinfo', 'gettime', 'guidtostring', 'incamonth',
  161. 'includetrailingbackslash', 'includetrailingpathdelimiter',
  162. 'incmonth', 'initializepackage', 'interlockeddecrement',
  163. 'interlockedexchange', 'interlockedexchangeadd',
  164. 'interlockedincrement', 'inttohex', 'inttostr', 'isdelimiter',
  165. 'isequalguid', 'isleapyear', 'ispathdelimiter', 'isvalidident',
  166. 'languages', 'lastdelimiter', 'loadpackage', 'loadstr',
  167. 'lowercase', 'msecstotimestamp', 'newstr', 'nextcharindex', 'now',
  168. 'outofmemoryerror', 'quotedstr', 'raiselastoserror',
  169. 'raiselastwin32error', 'removedir', 'renamefile', 'replacedate',
  170. 'replacetime', 'safeloadlibrary', 'samefilename', 'sametext',
  171. 'setcurrentdir', 'showexception', 'sleep', 'stralloc', 'strbufsize',
  172. 'strbytetype', 'strcat', 'strcharlength', 'strcomp', 'strcopy',
  173. 'strdispose', 'strecopy', 'strend', 'strfmt', 'stricomp',
  174. 'stringreplace', 'stringtoguid', 'strlcat', 'strlcomp', 'strlcopy',
  175. 'strlen', 'strlfmt', 'strlicomp', 'strlower', 'strmove', 'strnew',
  176. 'strnextchar', 'strpas', 'strpcopy', 'strplcopy', 'strpos',
  177. 'strrscan', 'strscan', 'strtobool', 'strtobooldef', 'strtocurr',
  178. 'strtocurrdef', 'strtodate', 'strtodatedef', 'strtodatetime',
  179. 'strtodatetimedef', 'strtofloat', 'strtofloatdef', 'strtoint',
  180. 'strtoint64', 'strtoint64def', 'strtointdef', 'strtotime',
  181. 'strtotimedef', 'strupper', 'supports', 'syserrormessage',
  182. 'systemtimetodatetime', 'texttofloat', 'time', 'timestamptodatetime',
  183. 'timestamptomsecs', 'timetostr', 'trim', 'trimleft', 'trimright',
  184. 'tryencodedate', 'tryencodetime', 'tryfloattocurr', 'tryfloattodatetime',
  185. 'trystrtobool', 'trystrtocurr', 'trystrtodate', 'trystrtodatetime',
  186. 'trystrtofloat', 'trystrtoint', 'trystrtoint64', 'trystrtotime',
  187. 'unloadpackage', 'uppercase', 'widecomparestr', 'widecomparetext',
  188. 'widefmtstr', 'wideformat', 'wideformatbuf', 'widelowercase',
  189. 'widesamestr', 'widesametext', 'wideuppercase', 'win32check',
  190. 'wraptext'
  191. ),
  192. 'Classes': (
  193. 'activateclassgroup', 'allocatehwnd', 'bintohex', 'checksynchronize',
  194. 'collectionsequal', 'countgenerations', 'deallocatehwnd', 'equalrect',
  195. 'extractstrings', 'findclass', 'findglobalcomponent', 'getclass',
  196. 'groupdescendantswith', 'hextobin', 'identtoint',
  197. 'initinheritedcomponent', 'inttoident', 'invalidpoint',
  198. 'isuniqueglobalcomponentname', 'linestart', 'objectbinarytotext',
  199. 'objectresourcetotext', 'objecttexttobinary', 'objecttexttoresource',
  200. 'pointsequal', 'readcomponentres', 'readcomponentresex',
  201. 'readcomponentresfile', 'rect', 'registerclass', 'registerclassalias',
  202. 'registerclasses', 'registercomponents', 'registerintegerconsts',
  203. 'registernoicon', 'registernonactivex', 'smallpoint', 'startclassgroup',
  204. 'teststreamformat', 'unregisterclass', 'unregisterclasses',
  205. 'unregisterintegerconsts', 'unregistermoduleclasses',
  206. 'writecomponentresfile'
  207. ),
  208. 'Math': (
  209. 'arccos', 'arccosh', 'arccot', 'arccoth', 'arccsc', 'arccsch', 'arcsec',
  210. 'arcsech', 'arcsin', 'arcsinh', 'arctan2', 'arctanh', 'ceil',
  211. 'comparevalue', 'cosecant', 'cosh', 'cot', 'cotan', 'coth', 'csc',
  212. 'csch', 'cycletodeg', 'cycletograd', 'cycletorad', 'degtocycle',
  213. 'degtograd', 'degtorad', 'divmod', 'doubledecliningbalance',
  214. 'ensurerange', 'floor', 'frexp', 'futurevalue', 'getexceptionmask',
  215. 'getprecisionmode', 'getroundmode', 'gradtocycle', 'gradtodeg',
  216. 'gradtorad', 'hypot', 'inrange', 'interestpayment', 'interestrate',
  217. 'internalrateofreturn', 'intpower', 'isinfinite', 'isnan', 'iszero',
  218. 'ldexp', 'lnxp1', 'log10', 'log2', 'logn', 'max', 'maxintvalue',
  219. 'maxvalue', 'mean', 'meanandstddev', 'min', 'minintvalue', 'minvalue',
  220. 'momentskewkurtosis', 'netpresentvalue', 'norm', 'numberofperiods',
  221. 'payment', 'periodpayment', 'poly', 'popnstddev', 'popnvariance',
  222. 'power', 'presentvalue', 'radtocycle', 'radtodeg', 'radtograd',
  223. 'randg', 'randomrange', 'roundto', 'samevalue', 'sec', 'secant',
  224. 'sech', 'setexceptionmask', 'setprecisionmode', 'setroundmode',
  225. 'sign', 'simpleroundto', 'sincos', 'sinh', 'slndepreciation', 'stddev',
  226. 'sum', 'sumint', 'sumofsquares', 'sumsandsquares', 'syddepreciation',
  227. 'tan', 'tanh', 'totalvariance', 'variance'
  228. )
  229. }
  230. ASM_REGISTERS = {
  231. 'ah', 'al', 'ax', 'bh', 'bl', 'bp', 'bx', 'ch', 'cl', 'cr0',
  232. 'cr1', 'cr2', 'cr3', 'cr4', 'cs', 'cx', 'dh', 'di', 'dl', 'dr0',
  233. 'dr1', 'dr2', 'dr3', 'dr4', 'dr5', 'dr6', 'dr7', 'ds', 'dx',
  234. 'eax', 'ebp', 'ebx', 'ecx', 'edi', 'edx', 'es', 'esi', 'esp',
  235. 'fs', 'gs', 'mm0', 'mm1', 'mm2', 'mm3', 'mm4', 'mm5', 'mm6',
  236. 'mm7', 'si', 'sp', 'ss', 'st0', 'st1', 'st2', 'st3', 'st4', 'st5',
  237. 'st6', 'st7', 'xmm0', 'xmm1', 'xmm2', 'xmm3', 'xmm4', 'xmm5',
  238. 'xmm6', 'xmm7'
  239. }
  240. ASM_INSTRUCTIONS = {
  241. 'aaa', 'aad', 'aam', 'aas', 'adc', 'add', 'and', 'arpl', 'bound',
  242. 'bsf', 'bsr', 'bswap', 'bt', 'btc', 'btr', 'bts', 'call', 'cbw',
  243. 'cdq', 'clc', 'cld', 'cli', 'clts', 'cmc', 'cmova', 'cmovae',
  244. 'cmovb', 'cmovbe', 'cmovc', 'cmovcxz', 'cmove', 'cmovg',
  245. 'cmovge', 'cmovl', 'cmovle', 'cmovna', 'cmovnae', 'cmovnb',
  246. 'cmovnbe', 'cmovnc', 'cmovne', 'cmovng', 'cmovnge', 'cmovnl',
  247. 'cmovnle', 'cmovno', 'cmovnp', 'cmovns', 'cmovnz', 'cmovo',
  248. 'cmovp', 'cmovpe', 'cmovpo', 'cmovs', 'cmovz', 'cmp', 'cmpsb',
  249. 'cmpsd', 'cmpsw', 'cmpxchg', 'cmpxchg486', 'cmpxchg8b', 'cpuid',
  250. 'cwd', 'cwde', 'daa', 'das', 'dec', 'div', 'emms', 'enter', 'hlt',
  251. 'ibts', 'icebp', 'idiv', 'imul', 'in', 'inc', 'insb', 'insd',
  252. 'insw', 'int', 'int01', 'int03', 'int1', 'int3', 'into', 'invd',
  253. 'invlpg', 'iret', 'iretd', 'iretw', 'ja', 'jae', 'jb', 'jbe',
  254. 'jc', 'jcxz', 'jcxz', 'je', 'jecxz', 'jg', 'jge', 'jl', 'jle',
  255. 'jmp', 'jna', 'jnae', 'jnb', 'jnbe', 'jnc', 'jne', 'jng', 'jnge',
  256. 'jnl', 'jnle', 'jno', 'jnp', 'jns', 'jnz', 'jo', 'jp', 'jpe',
  257. 'jpo', 'js', 'jz', 'lahf', 'lar', 'lcall', 'lds', 'lea', 'leave',
  258. 'les', 'lfs', 'lgdt', 'lgs', 'lidt', 'ljmp', 'lldt', 'lmsw',
  259. 'loadall', 'loadall286', 'lock', 'lodsb', 'lodsd', 'lodsw',
  260. 'loop', 'loope', 'loopne', 'loopnz', 'loopz', 'lsl', 'lss', 'ltr',
  261. 'mov', 'movd', 'movq', 'movsb', 'movsd', 'movsw', 'movsx',
  262. 'movzx', 'mul', 'neg', 'nop', 'not', 'or', 'out', 'outsb', 'outsd',
  263. 'outsw', 'pop', 'popa', 'popad', 'popaw', 'popf', 'popfd', 'popfw',
  264. 'push', 'pusha', 'pushad', 'pushaw', 'pushf', 'pushfd', 'pushfw',
  265. 'rcl', 'rcr', 'rdmsr', 'rdpmc', 'rdshr', 'rdtsc', 'rep', 'repe',
  266. 'repne', 'repnz', 'repz', 'ret', 'retf', 'retn', 'rol', 'ror',
  267. 'rsdc', 'rsldt', 'rsm', 'sahf', 'sal', 'salc', 'sar', 'sbb',
  268. 'scasb', 'scasd', 'scasw', 'seta', 'setae', 'setb', 'setbe',
  269. 'setc', 'setcxz', 'sete', 'setg', 'setge', 'setl', 'setle',
  270. 'setna', 'setnae', 'setnb', 'setnbe', 'setnc', 'setne', 'setng',
  271. 'setnge', 'setnl', 'setnle', 'setno', 'setnp', 'setns', 'setnz',
  272. 'seto', 'setp', 'setpe', 'setpo', 'sets', 'setz', 'sgdt', 'shl',
  273. 'shld', 'shr', 'shrd', 'sidt', 'sldt', 'smi', 'smint', 'smintold',
  274. 'smsw', 'stc', 'std', 'sti', 'stosb', 'stosd', 'stosw', 'str',
  275. 'sub', 'svdc', 'svldt', 'svts', 'syscall', 'sysenter', 'sysexit',
  276. 'sysret', 'test', 'ud1', 'ud2', 'umov', 'verr', 'verw', 'wait',
  277. 'wbinvd', 'wrmsr', 'wrshr', 'xadd', 'xbts', 'xchg', 'xlat',
  278. 'xlatb', 'xor'
  279. }
  280. def __init__(self, **options):
  281. Lexer.__init__(self, **options)
  282. self.keywords = set()
  283. if get_bool_opt(options, 'turbopascal', True):
  284. self.keywords.update(self.TURBO_PASCAL_KEYWORDS)
  285. if get_bool_opt(options, 'delphi', True):
  286. self.keywords.update(self.DELPHI_KEYWORDS)
  287. if get_bool_opt(options, 'freepascal', True):
  288. self.keywords.update(self.FREE_PASCAL_KEYWORDS)
  289. self.builtins = set()
  290. for unit in get_list_opt(options, 'units', list(self.BUILTIN_UNITS)):
  291. self.builtins.update(self.BUILTIN_UNITS[unit])
  292. def get_tokens_unprocessed(self, text):
  293. scanner = Scanner(text, re.DOTALL | re.MULTILINE | re.IGNORECASE)
  294. stack = ['initial']
  295. in_function_block = False
  296. in_property_block = False
  297. was_dot = False
  298. next_token_is_function = False
  299. next_token_is_property = False
  300. collect_labels = False
  301. block_labels = set()
  302. brace_balance = [0, 0]
  303. while not scanner.eos:
  304. token = Error
  305. if stack[-1] == 'initial':
  306. if scanner.scan(r'\s+'):
  307. token = Text
  308. elif scanner.scan(r'\{.*?\}|\(\*.*?\*\)'):
  309. if scanner.match.startswith('$'):
  310. token = Comment.Preproc
  311. else:
  312. token = Comment.Multiline
  313. elif scanner.scan(r'//.*?$'):
  314. token = Comment.Single
  315. elif scanner.scan(r'[-+*\/=<>:;,.@\^]'):
  316. token = Operator
  317. # stop label highlighting on next ";"
  318. if collect_labels and scanner.match == ';':
  319. collect_labels = False
  320. elif scanner.scan(r'[\(\)\[\]]+'):
  321. token = Punctuation
  322. # abort function naming ``foo = Function(...)``
  323. next_token_is_function = False
  324. # if we are in a function block we count the open
  325. # braces because ootherwise it's impossible to
  326. # determine the end of the modifier context
  327. if in_function_block or in_property_block:
  328. if scanner.match == '(':
  329. brace_balance[0] += 1
  330. elif scanner.match == ')':
  331. brace_balance[0] -= 1
  332. elif scanner.match == '[':
  333. brace_balance[1] += 1
  334. elif scanner.match == ']':
  335. brace_balance[1] -= 1
  336. elif scanner.scan(r'[A-Za-z_][A-Za-z_0-9]*'):
  337. lowercase_name = scanner.match.lower()
  338. if lowercase_name == 'result':
  339. token = Name.Builtin.Pseudo
  340. elif lowercase_name in self.keywords:
  341. token = Keyword
  342. # if we are in a special block and a
  343. # block ending keyword occours (and the parenthesis
  344. # is balanced) we end the current block context
  345. if (in_function_block or in_property_block) and \
  346. lowercase_name in self.BLOCK_KEYWORDS and \
  347. brace_balance[0] <= 0 and \
  348. brace_balance[1] <= 0:
  349. in_function_block = False
  350. in_property_block = False
  351. brace_balance = [0, 0]
  352. block_labels = set()
  353. if lowercase_name in ('label', 'goto'):
  354. collect_labels = True
  355. elif lowercase_name == 'asm':
  356. stack.append('asm')
  357. elif lowercase_name == 'property':
  358. in_property_block = True
  359. next_token_is_property = True
  360. elif lowercase_name in ('procedure', 'operator',
  361. 'function', 'constructor',
  362. 'destructor'):
  363. in_function_block = True
  364. next_token_is_function = True
  365. # we are in a function block and the current name
  366. # is in the set of registered modifiers. highlight
  367. # it as pseudo keyword
  368. elif in_function_block and \
  369. lowercase_name in self.FUNCTION_MODIFIERS:
  370. token = Keyword.Pseudo
  371. # if we are in a property highlight some more
  372. # modifiers
  373. elif in_property_block and \
  374. lowercase_name in ('read', 'write'):
  375. token = Keyword.Pseudo
  376. next_token_is_function = True
  377. # if the last iteration set next_token_is_function
  378. # to true we now want this name highlighted as
  379. # function. so do that and reset the state
  380. elif next_token_is_function:
  381. # Look if the next token is a dot. If yes it's
  382. # not a function, but a class name and the
  383. # part after the dot a function name
  384. if scanner.test(r'\s*\.\s*'):
  385. token = Name.Class
  386. # it's not a dot, our job is done
  387. else:
  388. token = Name.Function
  389. next_token_is_function = False
  390. # same for properties
  391. elif next_token_is_property:
  392. token = Name.Property
  393. next_token_is_property = False
  394. # Highlight this token as label and add it
  395. # to the list of known labels
  396. elif collect_labels:
  397. token = Name.Label
  398. block_labels.add(scanner.match.lower())
  399. # name is in list of known labels
  400. elif lowercase_name in block_labels:
  401. token = Name.Label
  402. elif lowercase_name in self.BUILTIN_TYPES:
  403. token = Keyword.Type
  404. elif lowercase_name in self.DIRECTIVES:
  405. token = Keyword.Pseudo
  406. # builtins are just builtins if the token
  407. # before isn't a dot
  408. elif not was_dot and lowercase_name in self.builtins:
  409. token = Name.Builtin
  410. else:
  411. token = Name
  412. elif scanner.scan(r"'"):
  413. token = String
  414. stack.append('string')
  415. elif scanner.scan(r'\#(\d+|\$[0-9A-Fa-f]+)'):
  416. token = String.Char
  417. elif scanner.scan(r'\$[0-9A-Fa-f]+'):
  418. token = Number.Hex
  419. elif scanner.scan(r'\d+(?![eE]|\.[^.])'):
  420. token = Number.Integer
  421. elif scanner.scan(r'\d+(\.\d+([eE][+-]?\d+)?|[eE][+-]?\d+)'):
  422. token = Number.Float
  423. else:
  424. # if the stack depth is deeper than once, pop
  425. if len(stack) > 1:
  426. stack.pop()
  427. scanner.get_char()
  428. elif stack[-1] == 'string':
  429. if scanner.scan(r"''"):
  430. token = String.Escape
  431. elif scanner.scan(r"'"):
  432. token = String
  433. stack.pop()
  434. elif scanner.scan(r"[^']*"):
  435. token = String
  436. else:
  437. scanner.get_char()
  438. stack.pop()
  439. elif stack[-1] == 'asm':
  440. if scanner.scan(r'\s+'):
  441. token = Text
  442. elif scanner.scan(r'end'):
  443. token = Keyword
  444. stack.pop()
  445. elif scanner.scan(r'\{.*?\}|\(\*.*?\*\)'):
  446. if scanner.match.startswith('$'):
  447. token = Comment.Preproc
  448. else:
  449. token = Comment.Multiline
  450. elif scanner.scan(r'//.*?$'):
  451. token = Comment.Single
  452. elif scanner.scan(r"'"):
  453. token = String
  454. stack.append('string')
  455. elif scanner.scan(r'@@[A-Za-z_][A-Za-z_0-9]*'):
  456. token = Name.Label
  457. elif scanner.scan(r'[A-Za-z_][A-Za-z_0-9]*'):
  458. lowercase_name = scanner.match.lower()
  459. if lowercase_name in self.ASM_INSTRUCTIONS:
  460. token = Keyword
  461. elif lowercase_name in self.ASM_REGISTERS:
  462. token = Name.Builtin
  463. else:
  464. token = Name
  465. elif scanner.scan(r'[-+*\/=<>:;,.@\^]+'):
  466. token = Operator
  467. elif scanner.scan(r'[\(\)\[\]]+'):
  468. token = Punctuation
  469. elif scanner.scan(r'\$[0-9A-Fa-f]+'):
  470. token = Number.Hex
  471. elif scanner.scan(r'\d+(?![eE]|\.[^.])'):
  472. token = Number.Integer
  473. elif scanner.scan(r'\d+(\.\d+([eE][+-]?\d+)?|[eE][+-]?\d+)'):
  474. token = Number.Float
  475. else:
  476. scanner.get_char()
  477. stack.pop()
  478. # save the dot!!!11
  479. if scanner.match.strip():
  480. was_dot = scanner.match == '.'
  481. yield scanner.start_pos, token, scanner.match or ''
  482. class AdaLexer(RegexLexer):
  483. """
  484. For Ada source code.
  485. .. versionadded:: 1.3
  486. """
  487. name = 'Ada'
  488. aliases = ['ada', 'ada95', 'ada2005']
  489. filenames = ['*.adb', '*.ads', '*.ada']
  490. mimetypes = ['text/x-ada']
  491. flags = re.MULTILINE | re.IGNORECASE
  492. tokens = {
  493. 'root': [
  494. (r'[^\S\n]+', Text),
  495. (r'--.*?\n', Comment.Single),
  496. (r'[^\S\n]+', Text),
  497. (r'function|procedure|entry', Keyword.Declaration, 'subprogram'),
  498. (r'(subtype|type)(\s+)(\w+)',
  499. bygroups(Keyword.Declaration, Text, Keyword.Type), 'type_def'),
  500. (r'task|protected', Keyword.Declaration),
  501. (r'(subtype)(\s+)', bygroups(Keyword.Declaration, Text)),
  502. (r'(end)(\s+)', bygroups(Keyword.Reserved, Text), 'end'),
  503. (r'(pragma)(\s+)(\w+)', bygroups(Keyword.Reserved, Text,
  504. Comment.Preproc)),
  505. (r'(true|false|null)\b', Keyword.Constant),
  506. (words((
  507. 'Address', 'Byte', 'Boolean', 'Character', 'Controlled', 'Count',
  508. 'Cursor', 'Duration', 'File_Mode', 'File_Type', 'Float', 'Generator',
  509. 'Integer', 'Long_Float', 'Long_Integer', 'Long_Long_Float',
  510. 'Long_Long_Integer', 'Natural', 'Positive', 'Reference_Type',
  511. 'Short_Float', 'Short_Integer', 'Short_Short_Float',
  512. 'Short_Short_Integer', 'String', 'Wide_Character', 'Wide_String'),
  513. suffix=r'\b'),
  514. Keyword.Type),
  515. (r'(and(\s+then)?|in|mod|not|or(\s+else)|rem)\b', Operator.Word),
  516. (r'generic|private', Keyword.Declaration),
  517. (r'package', Keyword.Declaration, 'package'),
  518. (r'array\b', Keyword.Reserved, 'array_def'),
  519. (r'(with|use)(\s+)', bygroups(Keyword.Namespace, Text), 'import'),
  520. (r'(\w+)(\s*)(:)(\s*)(constant)',
  521. bygroups(Name.Constant, Text, Punctuation, Text,
  522. Keyword.Reserved)),
  523. (r'<<\w+>>', Name.Label),
  524. (r'(\w+)(\s*)(:)(\s*)(declare|begin|loop|for|while)',
  525. bygroups(Name.Label, Text, Punctuation, Text, Keyword.Reserved)),
  526. (words((
  527. 'abort', 'abs', 'abstract', 'accept', 'access', 'aliased', 'all',
  528. 'array', 'at', 'begin', 'body', 'case', 'constant', 'declare',
  529. 'delay', 'delta', 'digits', 'do', 'else', 'elsif', 'end', 'entry',
  530. 'exception', 'exit', 'interface', 'for', 'goto', 'if', 'is', 'limited',
  531. 'loop', 'new', 'null', 'of', 'or', 'others', 'out', 'overriding',
  532. 'pragma', 'protected', 'raise', 'range', 'record', 'renames', 'requeue',
  533. 'return', 'reverse', 'select', 'separate', 'subtype', 'synchronized',
  534. 'task', 'tagged', 'terminate', 'then', 'type', 'until', 'when',
  535. 'while', 'xor'), prefix=r'\b', suffix=r'\b'),
  536. Keyword.Reserved),
  537. (r'"[^"]*"', String),
  538. include('attribute'),
  539. include('numbers'),
  540. (r"'[^']'", String.Character),
  541. (r'(\w+)(\s*|[(,])', bygroups(Name, using(this))),
  542. (r"(<>|=>|:=|[()|:;,.'])", Punctuation),
  543. (r'[*<>+=/&-]', Operator),
  544. (r'\n+', Text),
  545. ],
  546. 'numbers': [
  547. (r'[0-9_]+#[0-9a-f]+#', Number.Hex),
  548. (r'[0-9_]+\.[0-9_]*', Number.Float),
  549. (r'[0-9_]+', Number.Integer),
  550. ],
  551. 'attribute': [
  552. (r"(')(\w+)", bygroups(Punctuation, Name.Attribute)),
  553. ],
  554. 'subprogram': [
  555. (r'\(', Punctuation, ('#pop', 'formal_part')),
  556. (r';', Punctuation, '#pop'),
  557. (r'is\b', Keyword.Reserved, '#pop'),
  558. (r'"[^"]+"|\w+', Name.Function),
  559. include('root'),
  560. ],
  561. 'end': [
  562. ('(if|case|record|loop|select)', Keyword.Reserved),
  563. (r'"[^"]+"|[\w.]+', Name.Function),
  564. (r'\s+', Text),
  565. (';', Punctuation, '#pop'),
  566. ],
  567. 'type_def': [
  568. (r';', Punctuation, '#pop'),
  569. (r'\(', Punctuation, 'formal_part'),
  570. (r'with|and|use', Keyword.Reserved),
  571. (r'array\b', Keyword.Reserved, ('#pop', 'array_def')),
  572. (r'record\b', Keyword.Reserved, ('record_def')),
  573. (r'(null record)(;)', bygroups(Keyword.Reserved, Punctuation), '#pop'),
  574. include('root'),
  575. ],
  576. 'array_def': [
  577. (r';', Punctuation, '#pop'),
  578. (r'(\w+)(\s+)(range)', bygroups(Keyword.Type, Text, Keyword.Reserved)),
  579. include('root'),
  580. ],
  581. 'record_def': [
  582. (r'end record', Keyword.Reserved, '#pop'),
  583. include('root'),
  584. ],
  585. 'import': [
  586. (r'[\w.]+', Name.Namespace, '#pop'),
  587. default('#pop'),
  588. ],
  589. 'formal_part': [
  590. (r'\)', Punctuation, '#pop'),
  591. (r'\w+', Name.Variable),
  592. (r',|:[^=]', Punctuation),
  593. (r'(in|not|null|out|access)\b', Keyword.Reserved),
  594. include('root'),
  595. ],
  596. 'package': [
  597. ('body', Keyword.Declaration),
  598. (r'is\s+new|renames', Keyword.Reserved),
  599. ('is', Keyword.Reserved, '#pop'),
  600. (';', Punctuation, '#pop'),
  601. (r'\(', Punctuation, 'package_instantiation'),
  602. (r'([\w.]+)', Name.Class),
  603. include('root'),
  604. ],
  605. 'package_instantiation': [
  606. (r'("[^"]+"|\w+)(\s+)(=>)', bygroups(Name.Variable, Text, Punctuation)),
  607. (r'[\w.\'"]', Text),
  608. (r'\)', Punctuation, '#pop'),
  609. include('root'),
  610. ],
  611. }