asm.py 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050
  1. """
  2. pygments.lexers.asm
  3. ~~~~~~~~~~~~~~~~~~~
  4. Lexers for assembly languages.
  5. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import RegexLexer, include, bygroups, using, words, \
  10. DelegatingLexer, default
  11. from pygments.lexers.c_cpp import CppLexer, CLexer
  12. from pygments.lexers.d import DLexer
  13. from pygments.token import Text, Name, Number, String, Comment, Punctuation, \
  14. Other, Keyword, Operator, Whitespace
  15. __all__ = ['GasLexer', 'ObjdumpLexer', 'DObjdumpLexer', 'CppObjdumpLexer',
  16. 'CObjdumpLexer', 'HsailLexer', 'LlvmLexer', 'LlvmMirBodyLexer',
  17. 'LlvmMirLexer', 'NasmLexer', 'NasmObjdumpLexer', 'TasmLexer',
  18. 'Ca65Lexer', 'Dasm16Lexer']
  19. class GasLexer(RegexLexer):
  20. """
  21. For Gas (AT&T) assembly code.
  22. """
  23. name = 'GAS'
  24. aliases = ['gas', 'asm']
  25. filenames = ['*.s', '*.S']
  26. mimetypes = ['text/x-gas']
  27. url = 'https://www.gnu.org/software/binutils'
  28. version_added = ''
  29. #: optional Comment or Whitespace
  30. string = r'"(\\"|[^"])*"'
  31. char = r'[\w$.@-]'
  32. identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)'
  33. number = r'(?:0[xX][a-fA-F0-9]+|#?-?\d+)'
  34. register = '%' + identifier + r'\b'
  35. tokens = {
  36. 'root': [
  37. include('whitespace'),
  38. (identifier + ':', Name.Label),
  39. (r'\.' + identifier, Name.Attribute, 'directive-args'),
  40. (r'lock|rep(n?z)?|data\d+', Name.Attribute),
  41. (identifier, Name.Function, 'instruction-args'),
  42. (r'[\r\n]+', Text)
  43. ],
  44. 'directive-args': [
  45. (identifier, Name.Constant),
  46. (string, String),
  47. ('@' + identifier, Name.Attribute),
  48. (number, Number.Integer),
  49. (register, Name.Variable),
  50. (r'[\r\n]+', Whitespace, '#pop'),
  51. (r'([;#]|//).*?\n', Comment.Single, '#pop'),
  52. (r'/[*].*?[*]/', Comment.Multiline),
  53. (r'/[*].*?\n[\w\W]*?[*]/', Comment.Multiline, '#pop'),
  54. include('punctuation'),
  55. include('whitespace')
  56. ],
  57. 'instruction-args': [
  58. # For objdump-disassembled code, shouldn't occur in
  59. # actual assembler input
  60. ('([a-z0-9]+)( )(<)('+identifier+')(>)',
  61. bygroups(Number.Hex, Text, Punctuation, Name.Constant,
  62. Punctuation)),
  63. ('([a-z0-9]+)( )(<)('+identifier+')([-+])('+number+')(>)',
  64. bygroups(Number.Hex, Text, Punctuation, Name.Constant,
  65. Punctuation, Number.Integer, Punctuation)),
  66. # Address constants
  67. (identifier, Name.Constant),
  68. (number, Number.Integer),
  69. # Registers
  70. (register, Name.Variable),
  71. # Numeric constants
  72. ('$'+number, Number.Integer),
  73. (r"$'(.|\\')'", String.Char),
  74. (r'[\r\n]+', Whitespace, '#pop'),
  75. (r'([;#]|//).*?\n', Comment.Single, '#pop'),
  76. (r'/[*].*?[*]/', Comment.Multiline),
  77. (r'/[*].*?\n[\w\W]*?[*]/', Comment.Multiline, '#pop'),
  78. include('punctuation'),
  79. include('whitespace')
  80. ],
  81. 'whitespace': [
  82. (r'\n', Whitespace),
  83. (r'\s+', Whitespace),
  84. (r'([;#]|//).*?\n', Comment.Single),
  85. (r'/[*][\w\W]*?[*]/', Comment.Multiline)
  86. ],
  87. 'punctuation': [
  88. (r'[-*,.()\[\]!:{}]+', Punctuation)
  89. ]
  90. }
  91. def analyse_text(text):
  92. if re.search(r'^\.(text|data|section)', text, re.M):
  93. return True
  94. elif re.search(r'^\.\w+', text, re.M):
  95. return 0.1
  96. def _objdump_lexer_tokens(asm_lexer):
  97. """
  98. Common objdump lexer tokens to wrap an ASM lexer.
  99. """
  100. hex_re = r'[0-9A-Za-z]'
  101. return {
  102. 'root': [
  103. # File name & format:
  104. ('(.*?)(:)( +file format )(.*?)$',
  105. bygroups(Name.Label, Punctuation, Text, String)),
  106. # Section header
  107. ('(Disassembly of section )(.*?)(:)$',
  108. bygroups(Text, Name.Label, Punctuation)),
  109. # Function labels
  110. # (With offset)
  111. ('('+hex_re+'+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$',
  112. bygroups(Number.Hex, Whitespace, Punctuation, Name.Function,
  113. Punctuation, Number.Hex, Punctuation)),
  114. # (Without offset)
  115. ('('+hex_re+'+)( )(<)(.*?)(>:)$',
  116. bygroups(Number.Hex, Whitespace, Punctuation, Name.Function,
  117. Punctuation)),
  118. # Code line with disassembled instructions
  119. ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *\t)([a-zA-Z].*?)$',
  120. bygroups(Whitespace, Name.Label, Whitespace, Number.Hex, Whitespace,
  121. using(asm_lexer))),
  122. # Code line without raw instructions (objdump --no-show-raw-insn)
  123. ('( *)('+hex_re+r'+:)( *\t)([a-zA-Z].*?)$',
  124. bygroups(Whitespace, Name.Label, Whitespace,
  125. using(asm_lexer))),
  126. # Code line with ascii
  127. ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *)(.*?)$',
  128. bygroups(Whitespace, Name.Label, Whitespace, Number.Hex, Whitespace, String)),
  129. # Continued code line, only raw opcodes without disassembled
  130. # instruction
  131. ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)$',
  132. bygroups(Whitespace, Name.Label, Whitespace, Number.Hex)),
  133. # Skipped a few bytes
  134. (r'\t\.\.\.$', Text),
  135. # Relocation line
  136. # (With offset)
  137. (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)([-+])(0x'+hex_re+'+)$',
  138. bygroups(Whitespace, Name.Label, Whitespace, Name.Property, Whitespace,
  139. Name.Constant, Punctuation, Number.Hex)),
  140. # (Without offset)
  141. (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)$',
  142. bygroups(Whitespace, Name.Label, Whitespace, Name.Property, Whitespace,
  143. Name.Constant)),
  144. (r'[^\n]+\n', Other)
  145. ]
  146. }
  147. class ObjdumpLexer(RegexLexer):
  148. """
  149. For the output of ``objdump -dr``.
  150. """
  151. name = 'objdump'
  152. aliases = ['objdump']
  153. filenames = ['*.objdump']
  154. mimetypes = ['text/x-objdump']
  155. url = 'https://www.gnu.org/software/binutils'
  156. version_added = ''
  157. tokens = _objdump_lexer_tokens(GasLexer)
  158. class DObjdumpLexer(DelegatingLexer):
  159. """
  160. For the output of ``objdump -Sr`` on compiled D files.
  161. """
  162. name = 'd-objdump'
  163. aliases = ['d-objdump']
  164. filenames = ['*.d-objdump']
  165. mimetypes = ['text/x-d-objdump']
  166. url = 'https://www.gnu.org/software/binutils'
  167. version_added = ''
  168. def __init__(self, **options):
  169. super().__init__(DLexer, ObjdumpLexer, **options)
  170. class CppObjdumpLexer(DelegatingLexer):
  171. """
  172. For the output of ``objdump -Sr`` on compiled C++ files.
  173. """
  174. name = 'cpp-objdump'
  175. aliases = ['cpp-objdump', 'c++-objdumb', 'cxx-objdump']
  176. filenames = ['*.cpp-objdump', '*.c++-objdump', '*.cxx-objdump']
  177. mimetypes = ['text/x-cpp-objdump']
  178. url = 'https://www.gnu.org/software/binutils'
  179. version_added = ''
  180. def __init__(self, **options):
  181. super().__init__(CppLexer, ObjdumpLexer, **options)
  182. class CObjdumpLexer(DelegatingLexer):
  183. """
  184. For the output of ``objdump -Sr`` on compiled C files.
  185. """
  186. name = 'c-objdump'
  187. aliases = ['c-objdump']
  188. filenames = ['*.c-objdump']
  189. mimetypes = ['text/x-c-objdump']
  190. url = 'https://www.gnu.org/software/binutils'
  191. version_added = ''
  192. def __init__(self, **options):
  193. super().__init__(CLexer, ObjdumpLexer, **options)
  194. class HsailLexer(RegexLexer):
  195. """
  196. For HSAIL assembly code.
  197. """
  198. name = 'HSAIL'
  199. aliases = ['hsail', 'hsa']
  200. filenames = ['*.hsail']
  201. mimetypes = ['text/x-hsail']
  202. url = 'https://en.wikipedia.org/wiki/Heterogeneous_System_Architecture#HSA_Intermediate_Layer'
  203. version_added = '2.2'
  204. string = r'"[^"]*?"'
  205. identifier = r'[a-zA-Z_][\w.]*'
  206. # Registers
  207. register_number = r'[0-9]+'
  208. register = r'(\$(c|s|d|q)' + register_number + r')\b'
  209. # Qualifiers
  210. alignQual = r'(align\(\d+\))'
  211. widthQual = r'(width\((\d+|all)\))'
  212. allocQual = r'(alloc\(agent\))'
  213. # Instruction Modifiers
  214. roundingMod = (r'((_ftz)?(_up|_down|_zero|_near))')
  215. datatypeMod = (r'_('
  216. # packedTypes
  217. r'u8x4|s8x4|u16x2|s16x2|u8x8|s8x8|u16x4|s16x4|u32x2|s32x2|'
  218. r'u8x16|s8x16|u16x8|s16x8|u32x4|s32x4|u64x2|s64x2|'
  219. r'f16x2|f16x4|f16x8|f32x2|f32x4|f64x2|'
  220. # baseTypes
  221. r'u8|s8|u16|s16|u32|s32|u64|s64|'
  222. r'b128|b8|b16|b32|b64|b1|'
  223. r'f16|f32|f64|'
  224. # opaqueType
  225. r'roimg|woimg|rwimg|samp|sig32|sig64)')
  226. # Numeric Constant
  227. float = r'((\d+\.)|(\d*\.\d+))[eE][+-]?\d+'
  228. hexfloat = r'0[xX](([0-9a-fA-F]+\.[0-9a-fA-F]*)|([0-9a-fA-F]*\.[0-9a-fA-F]+))[pP][+-]?\d+'
  229. ieeefloat = r'0((h|H)[0-9a-fA-F]{4}|(f|F)[0-9a-fA-F]{8}|(d|D)[0-9a-fA-F]{16})'
  230. tokens = {
  231. 'root': [
  232. include('whitespace'),
  233. include('comments'),
  234. (string, String),
  235. (r'@' + identifier + ':?', Name.Label),
  236. (register, Name.Variable.Anonymous),
  237. include('keyword'),
  238. (r'&' + identifier, Name.Variable.Global),
  239. (r'%' + identifier, Name.Variable),
  240. (hexfloat, Number.Hex),
  241. (r'0[xX][a-fA-F0-9]+', Number.Hex),
  242. (ieeefloat, Number.Float),
  243. (float, Number.Float),
  244. (r'\d+', Number.Integer),
  245. (r'[=<>{}\[\]()*.,:;!]|x\b', Punctuation)
  246. ],
  247. 'whitespace': [
  248. (r'(\n|\s)+', Whitespace),
  249. ],
  250. 'comments': [
  251. (r'/\*.*?\*/', Comment.Multiline),
  252. (r'//.*?\n', Comment.Single),
  253. ],
  254. 'keyword': [
  255. # Types
  256. (r'kernarg' + datatypeMod, Keyword.Type),
  257. # Regular keywords
  258. (r'\$(full|base|small|large|default|zero|near)', Keyword),
  259. (words((
  260. 'module', 'extension', 'pragma', 'prog', 'indirect', 'signature',
  261. 'decl', 'kernel', 'function', 'enablebreakexceptions',
  262. 'enabledetectexceptions', 'maxdynamicgroupsize', 'maxflatgridsize',
  263. 'maxflatworkgroupsize', 'requireddim', 'requiredgridsize',
  264. 'requiredworkgroupsize', 'requirenopartialworkgroups'),
  265. suffix=r'\b'), Keyword),
  266. # instructions
  267. (roundingMod, Keyword),
  268. (datatypeMod, Keyword),
  269. (r'_(' + alignQual + '|' + widthQual + ')', Keyword),
  270. (r'_kernarg', Keyword),
  271. (r'(nop|imagefence)\b', Keyword),
  272. (words((
  273. 'cleardetectexcept', 'clock', 'cuid', 'debugtrap', 'dim',
  274. 'getdetectexcept', 'groupbaseptr', 'kernargbaseptr', 'laneid',
  275. 'maxcuid', 'maxwaveid', 'packetid', 'setdetectexcept', 'waveid',
  276. 'workitemflatabsid', 'workitemflatid', 'nullptr', 'abs', 'bitrev',
  277. 'currentworkgroupsize', 'currentworkitemflatid', 'fract', 'ncos',
  278. 'neg', 'nexp2', 'nlog2', 'nrcp', 'nrsqrt', 'nsin', 'nsqrt',
  279. 'gridgroups', 'gridsize', 'not', 'sqrt', 'workgroupid',
  280. 'workgroupsize', 'workitemabsid', 'workitemid', 'ceil', 'floor',
  281. 'rint', 'trunc', 'add', 'bitmask', 'borrow', 'carry', 'copysign',
  282. 'div', 'rem', 'sub', 'shl', 'shr', 'and', 'or', 'xor', 'unpackhi',
  283. 'unpacklo', 'max', 'min', 'fma', 'mad', 'bitextract', 'bitselect',
  284. 'shuffle', 'cmov', 'bitalign', 'bytealign', 'lerp', 'nfma', 'mul',
  285. 'mulhi', 'mul24hi', 'mul24', 'mad24', 'mad24hi', 'bitinsert',
  286. 'combine', 'expand', 'lda', 'mov', 'pack', 'unpack', 'packcvt',
  287. 'unpackcvt', 'sad', 'sementp', 'ftos', 'stof', 'cmp', 'ld', 'st',
  288. '_eq', '_ne', '_lt', '_le', '_gt', '_ge', '_equ', '_neu', '_ltu',
  289. '_leu', '_gtu', '_geu', '_num', '_nan', '_seq', '_sne', '_slt',
  290. '_sle', '_sgt', '_sge', '_snum', '_snan', '_sequ', '_sneu', '_sltu',
  291. '_sleu', '_sgtu', '_sgeu', 'atomic', '_ld', '_st', '_cas', '_add',
  292. '_and', '_exch', '_max', '_min', '_or', '_sub', '_wrapdec',
  293. '_wrapinc', '_xor', 'ret', 'cvt', '_readonly', '_kernarg', '_global',
  294. 'br', 'cbr', 'sbr', '_scacq', '_screl', '_scar', '_rlx', '_wave',
  295. '_wg', '_agent', '_system', 'ldimage', 'stimage', '_v2', '_v3', '_v4',
  296. '_1d', '_2d', '_3d', '_1da', '_2da', '_1db', '_2ddepth', '_2dadepth',
  297. '_width', '_height', '_depth', '_array', '_channelorder',
  298. '_channeltype', 'querysampler', '_coord', '_filter', '_addressing',
  299. 'barrier', 'wavebarrier', 'initfbar', 'joinfbar', 'waitfbar',
  300. 'arrivefbar', 'leavefbar', 'releasefbar', 'ldf', 'activelaneid',
  301. 'activelanecount', 'activelanemask', 'activelanepermute', 'call',
  302. 'scall', 'icall', 'alloca', 'packetcompletionsig',
  303. 'addqueuewriteindex', 'casqueuewriteindex', 'ldqueuereadindex',
  304. 'stqueuereadindex', 'readonly', 'global', 'private', 'group',
  305. 'spill', 'arg', '_upi', '_downi', '_zeroi', '_neari', '_upi_sat',
  306. '_downi_sat', '_zeroi_sat', '_neari_sat', '_supi', '_sdowni',
  307. '_szeroi', '_sneari', '_supi_sat', '_sdowni_sat', '_szeroi_sat',
  308. '_sneari_sat', '_pp', '_ps', '_sp', '_ss', '_s', '_p', '_pp_sat',
  309. '_ps_sat', '_sp_sat', '_ss_sat', '_s_sat', '_p_sat')), Keyword),
  310. # Integer types
  311. (r'i[1-9]\d*', Keyword)
  312. ]
  313. }
  314. class LlvmLexer(RegexLexer):
  315. """
  316. For LLVM assembly code.
  317. """
  318. name = 'LLVM'
  319. url = 'https://llvm.org/docs/LangRef.html'
  320. aliases = ['llvm']
  321. filenames = ['*.ll']
  322. mimetypes = ['text/x-llvm']
  323. version_added = ''
  324. #: optional Comment or Whitespace
  325. string = r'"[^"]*?"'
  326. identifier = r'([-a-zA-Z$._][\w\-$.]*|' + string + ')'
  327. block_label = r'(' + identifier + r'|(\d+))'
  328. tokens = {
  329. 'root': [
  330. include('whitespace'),
  331. # Before keywords, because keywords are valid label names :(...
  332. (block_label + r'\s*:', Name.Label),
  333. include('keyword'),
  334. (r'%' + identifier, Name.Variable),
  335. (r'@' + identifier, Name.Variable.Global),
  336. (r'%\d+', Name.Variable.Anonymous),
  337. (r'@\d+', Name.Variable.Global),
  338. (r'#\d+', Name.Variable.Global),
  339. (r'!' + identifier, Name.Variable),
  340. (r'!\d+', Name.Variable.Anonymous),
  341. (r'c?' + string, String),
  342. (r'0[xX][a-fA-F0-9]+', Number),
  343. (r'-?\d+(?:[.]\d+)?(?:[eE][-+]?\d+(?:[.]\d+)?)?', Number),
  344. (r'[=<>{}\[\]()*.,!]|x\b', Punctuation)
  345. ],
  346. 'whitespace': [
  347. (r'(\n|\s+)+', Whitespace),
  348. (r';.*?\n', Comment)
  349. ],
  350. 'keyword': [
  351. # Regular keywords
  352. (words((
  353. 'aarch64_sve_vector_pcs', 'aarch64_vector_pcs', 'acq_rel',
  354. 'acquire', 'add', 'addrspace', 'addrspacecast', 'afn', 'alias',
  355. 'aliasee', 'align', 'alignLog2', 'alignstack', 'alloca',
  356. 'allocsize', 'allOnes', 'alwaysinline', 'alwaysInline',
  357. 'amdgpu_cs', 'amdgpu_es', 'amdgpu_gfx', 'amdgpu_gs',
  358. 'amdgpu_hs', 'amdgpu_kernel', 'amdgpu_ls', 'amdgpu_ps',
  359. 'amdgpu_vs', 'and', 'any', 'anyregcc', 'appending', 'arcp',
  360. 'argmemonly', 'args', 'arm_aapcs_vfpcc', 'arm_aapcscc',
  361. 'arm_apcscc', 'ashr', 'asm', 'atomic', 'atomicrmw',
  362. 'attributes', 'available_externally', 'avr_intrcc',
  363. 'avr_signalcc', 'bit', 'bitcast', 'bitMask', 'blockaddress',
  364. 'blockcount', 'br', 'branchFunnel', 'builtin', 'byArg',
  365. 'byref', 'byte', 'byteArray', 'byval', 'c', 'call', 'callbr',
  366. 'callee', 'caller', 'calls', 'canAutoHide', 'catch',
  367. 'catchpad', 'catchret', 'catchswitch', 'cc', 'ccc',
  368. 'cfguard_checkcc', 'cleanup', 'cleanuppad', 'cleanupret',
  369. 'cmpxchg', 'cold', 'coldcc', 'comdat', 'common', 'constant',
  370. 'contract', 'convergent', 'critical', 'cxx_fast_tlscc',
  371. 'datalayout', 'declare', 'default', 'define', 'deplibs',
  372. 'dereferenceable', 'dereferenceable_or_null', 'distinct',
  373. 'dllexport', 'dllimport', 'dso_local', 'dso_local_equivalent',
  374. 'dso_preemptable', 'dsoLocal', 'eq', 'exact', 'exactmatch',
  375. 'extern_weak', 'external', 'externally_initialized',
  376. 'extractelement', 'extractvalue', 'fadd', 'false', 'fast',
  377. 'fastcc', 'fcmp', 'fdiv', 'fence', 'filter', 'flags', 'fmul',
  378. 'fneg', 'fpext', 'fptosi', 'fptoui', 'fptrunc', 'freeze',
  379. 'frem', 'from', 'fsub', 'funcFlags', 'function', 'gc',
  380. 'getelementptr', 'ghccc', 'global', 'guid', 'gv', 'hash',
  381. 'hhvm_ccc', 'hhvmcc', 'hidden', 'hot', 'hotness', 'icmp',
  382. 'ifunc', 'inaccessiblemem_or_argmemonly',
  383. 'inaccessiblememonly', 'inalloca', 'inbounds', 'indir',
  384. 'indirectbr', 'info', 'initialexec', 'inline', 'inlineBits',
  385. 'inlinehint', 'inrange', 'inreg', 'insertelement',
  386. 'insertvalue', 'insts', 'intel_ocl_bicc', 'inteldialect',
  387. 'internal', 'inttoptr', 'invoke', 'jumptable', 'kind',
  388. 'landingpad', 'largest', 'linkage', 'linkonce', 'linkonce_odr',
  389. 'live', 'load', 'local_unnamed_addr', 'localdynamic',
  390. 'localexec', 'lshr', 'max', 'metadata', 'min', 'minsize',
  391. 'module', 'monotonic', 'msp430_intrcc', 'mul', 'mustprogress',
  392. 'musttail', 'naked', 'name', 'nand', 'ne', 'nest', 'ninf',
  393. 'nnan', 'noalias', 'nobuiltin', 'nocallback', 'nocapture',
  394. 'nocf_check', 'noduplicate', 'noduplicates', 'nofree',
  395. 'noimplicitfloat', 'noinline', 'noInline', 'nomerge', 'none',
  396. 'nonlazybind', 'nonnull', 'noprofile', 'norecurse',
  397. 'noRecurse', 'noredzone', 'noreturn', 'nosync', 'notail',
  398. 'notEligibleToImport', 'noundef', 'nounwind', 'nsw',
  399. 'nsz', 'null', 'null_pointer_is_valid', 'nuw', 'oeq', 'offset',
  400. 'oge', 'ogt', 'ole', 'olt', 'one', 'opaque', 'optforfuzzing',
  401. 'optnone', 'optsize', 'or', 'ord', 'param', 'params',
  402. 'partition', 'path', 'personality', 'phi', 'poison',
  403. 'preallocated', 'prefix', 'preserve_allcc', 'preserve_mostcc',
  404. 'private', 'prologue', 'protected', 'ptrtoint', 'ptx_device',
  405. 'ptx_kernel', 'readnone', 'readNone', 'readonly', 'readOnly',
  406. 'reassoc', 'refs', 'relbf', 'release', 'resByArg', 'resume',
  407. 'ret', 'returnDoesNotAlias', 'returned', 'returns_twice',
  408. 'safestack', 'samesize', 'sanitize_address',
  409. 'sanitize_hwaddress', 'sanitize_memory', 'sanitize_memtag',
  410. 'sanitize_thread', 'sdiv', 'section', 'select', 'seq_cst',
  411. 'sext', 'sge', 'sgt', 'shadowcallstack', 'shl',
  412. 'shufflevector', 'sideeffect', 'signext', 'single',
  413. 'singleImpl', 'singleImplName', 'sitofp', 'sizeM1',
  414. 'sizeM1BitWidth', 'sle', 'slt', 'source_filename',
  415. 'speculatable', 'speculative_load_hardening', 'spir_func',
  416. 'spir_kernel', 'srem', 'sret', 'ssp', 'sspreq', 'sspstrong',
  417. 'store', 'strictfp', 'sub', 'summaries', 'summary', 'swiftcc',
  418. 'swifterror', 'swiftself', 'switch', 'syncscope', 'tail',
  419. 'tailcc', 'target', 'thread_local', 'to', 'token', 'triple',
  420. 'true', 'trunc', 'type', 'typeCheckedLoadConstVCalls',
  421. 'typeCheckedLoadVCalls', 'typeid', 'typeidCompatibleVTable',
  422. 'typeIdInfo', 'typeTestAssumeConstVCalls',
  423. 'typeTestAssumeVCalls', 'typeTestRes', 'typeTests', 'udiv',
  424. 'ueq', 'uge', 'ugt', 'uitofp', 'ule', 'ult', 'umax', 'umin',
  425. 'undef', 'une', 'uniformRetVal', 'uniqueRetVal', 'unknown',
  426. 'unnamed_addr', 'uno', 'unordered', 'unreachable', 'unsat',
  427. 'unwind', 'urem', 'uselistorder', 'uselistorder_bb', 'uwtable',
  428. 'va_arg', 'varFlags', 'variable', 'vcall_visibility',
  429. 'vFuncId', 'virtFunc', 'virtualConstProp', 'void', 'volatile',
  430. 'vscale', 'vTableFuncs', 'weak', 'weak_odr', 'webkit_jscc',
  431. 'win64cc', 'within', 'wpdRes', 'wpdResolutions', 'writeonly',
  432. 'x', 'x86_64_sysvcc', 'x86_fastcallcc', 'x86_intrcc',
  433. 'x86_mmx', 'x86_regcallcc', 'x86_stdcallcc', 'x86_thiscallcc',
  434. 'x86_vectorcallcc', 'xchg', 'xor', 'zeroext',
  435. 'zeroinitializer', 'zext', 'immarg', 'willreturn'),
  436. suffix=r'\b'), Keyword),
  437. # Types
  438. (words(('void', 'half', 'bfloat', 'float', 'double', 'fp128',
  439. 'x86_fp80', 'ppc_fp128', 'label', 'metadata', 'x86_mmx',
  440. 'x86_amx', 'token', 'ptr')),
  441. Keyword.Type),
  442. # Integer types
  443. (r'i[1-9]\d*', Keyword.Type)
  444. ]
  445. }
  446. class LlvmMirBodyLexer(RegexLexer):
  447. """
  448. For LLVM MIR examples without the YAML wrapper.
  449. """
  450. name = 'LLVM-MIR Body'
  451. url = 'https://llvm.org/docs/MIRLangRef.html'
  452. aliases = ['llvm-mir-body']
  453. filenames = []
  454. mimetypes = []
  455. version_added = '2.6'
  456. tokens = {
  457. 'root': [
  458. # Attributes on basic blocks
  459. (words(('liveins', 'successors'), suffix=':'), Keyword),
  460. # Basic Block Labels
  461. (r'bb\.[0-9]+(\.[a-zA-Z0-9_.-]+)?( \(address-taken\))?:', Name.Label),
  462. (r'bb\.[0-9]+ \(%[a-zA-Z0-9_.-]+\)( \(address-taken\))?:', Name.Label),
  463. (r'%bb\.[0-9]+(\.\w+)?', Name.Label),
  464. # Stack references
  465. (r'%stack\.[0-9]+(\.\w+\.addr)?', Name),
  466. # Subreg indices
  467. (r'%subreg\.\w+', Name),
  468. # Virtual registers
  469. (r'%[a-zA-Z0-9_]+ *', Name.Variable, 'vreg'),
  470. # Reference to LLVM-IR global
  471. include('global'),
  472. # Reference to Intrinsic
  473. (r'intrinsic\(\@[a-zA-Z0-9_.]+\)', Name.Variable.Global),
  474. # Comparison predicates
  475. (words(('eq', 'ne', 'sgt', 'sge', 'slt', 'sle', 'ugt', 'uge', 'ult',
  476. 'ule'), prefix=r'intpred\(', suffix=r'\)'), Name.Builtin),
  477. (words(('oeq', 'one', 'ogt', 'oge', 'olt', 'ole', 'ugt', 'uge',
  478. 'ult', 'ule'), prefix=r'floatpred\(', suffix=r'\)'),
  479. Name.Builtin),
  480. # Physical registers
  481. (r'\$\w+', String.Single),
  482. # Assignment operator
  483. (r'=', Operator),
  484. # gMIR Opcodes
  485. (r'(G_ANYEXT|G_[SZ]EXT|G_SEXT_INREG|G_TRUNC|G_IMPLICIT_DEF|G_PHI|'
  486. r'G_FRAME_INDEX|G_GLOBAL_VALUE|G_INTTOPTR|G_PTRTOINT|G_BITCAST|'
  487. r'G_CONSTANT|G_FCONSTANT|G_VASTART|G_VAARG|G_CTLZ|G_CTLZ_ZERO_UNDEF|'
  488. r'G_CTTZ|G_CTTZ_ZERO_UNDEF|G_CTPOP|G_BSWAP|G_BITREVERSE|'
  489. r'G_ADDRSPACE_CAST|G_BLOCK_ADDR|G_JUMP_TABLE|G_DYN_STACKALLOC|'
  490. r'G_ADD|G_SUB|G_MUL|G_[SU]DIV|G_[SU]REM|G_AND|G_OR|G_XOR|G_SHL|'
  491. r'G_[LA]SHR|G_[IF]CMP|G_SELECT|G_GEP|G_PTR_MASK|G_SMIN|G_SMAX|'
  492. r'G_UMIN|G_UMAX|G_[US]ADDO|G_[US]ADDE|G_[US]SUBO|G_[US]SUBE|'
  493. r'G_[US]MULO|G_[US]MULH|G_FNEG|G_FPEXT|G_FPTRUNC|G_FPTO[US]I|'
  494. r'G_[US]ITOFP|G_FABS|G_FCOPYSIGN|G_FCANONICALIZE|G_FMINNUM|'
  495. r'G_FMAXNUM|G_FMINNUM_IEEE|G_FMAXNUM_IEEE|G_FMINIMUM|G_FMAXIMUM|'
  496. r'G_FADD|G_FSUB|G_FMUL|G_FMA|G_FMAD|G_FDIV|G_FREM|G_FPOW|G_FEXP|'
  497. r'G_FEXP2|G_FLOG|G_FLOG2|G_FLOG10|G_FCEIL|G_FCOS|G_FSIN|G_FSQRT|'
  498. r'G_FFLOOR|G_FRINT|G_FNEARBYINT|G_INTRINSIC_TRUNC|'
  499. r'G_INTRINSIC_ROUND|G_LOAD|G_[ZS]EXTLOAD|G_INDEXED_LOAD|'
  500. r'G_INDEXED_[ZS]EXTLOAD|G_STORE|G_INDEXED_STORE|'
  501. r'G_ATOMIC_CMPXCHG_WITH_SUCCESS|G_ATOMIC_CMPXCHG|'
  502. r'G_ATOMICRMW_(XCHG|ADD|SUB|AND|NAND|OR|XOR|MAX|MIN|UMAX|UMIN|FADD|'
  503. r'FSUB)'
  504. r'|G_FENCE|G_EXTRACT|G_UNMERGE_VALUES|G_INSERT|G_MERGE_VALUES|'
  505. r'G_BUILD_VECTOR|G_BUILD_VECTOR_TRUNC|G_CONCAT_VECTORS|'
  506. r'G_INTRINSIC|G_INTRINSIC_W_SIDE_EFFECTS|G_BR|G_BRCOND|'
  507. r'G_BRINDIRECT|G_BRJT|G_INSERT_VECTOR_ELT|G_EXTRACT_VECTOR_ELT|'
  508. r'G_SHUFFLE_VECTOR)\b',
  509. Name.Builtin),
  510. # Target independent opcodes
  511. (r'(COPY|PHI|INSERT_SUBREG|EXTRACT_SUBREG|REG_SEQUENCE)\b',
  512. Name.Builtin),
  513. # Flags
  514. (words(('killed', 'implicit')), Keyword),
  515. # ConstantInt values
  516. (r'(i[0-9]+)( +)', bygroups(Keyword.Type, Whitespace), 'constantint'),
  517. # ConstantFloat values
  518. (r'(half|float|double) +', Keyword.Type, 'constantfloat'),
  519. # Bare immediates
  520. include('integer'),
  521. # MMO's
  522. (r'(::)( *)', bygroups(Operator, Whitespace), 'mmo'),
  523. # MIR Comments
  524. (r';.*', Comment),
  525. # If we get here, assume it's a target instruction
  526. (r'[a-zA-Z0-9_]+', Name),
  527. # Everything else that isn't highlighted
  528. (r'[(), \n]+', Text),
  529. ],
  530. # The integer constant from a ConstantInt value
  531. 'constantint': [
  532. include('integer'),
  533. (r'(?=.)', Text, '#pop'),
  534. ],
  535. # The floating point constant from a ConstantFloat value
  536. 'constantfloat': [
  537. include('float'),
  538. (r'(?=.)', Text, '#pop'),
  539. ],
  540. 'vreg': [
  541. # The bank or class if there is one
  542. (r'( *)(:(?!:))', bygroups(Whitespace, Keyword), ('#pop', 'vreg_bank_or_class')),
  543. # The LLT if there is one
  544. (r'( *)(\()', bygroups(Whitespace, Text), 'vreg_type'),
  545. (r'(?=.)', Text, '#pop'),
  546. ],
  547. 'vreg_bank_or_class': [
  548. # The unassigned bank/class
  549. (r'( *)(_)', bygroups(Whitespace, Name.Variable.Magic)),
  550. (r'( *)([a-zA-Z0-9_]+)', bygroups(Whitespace, Name.Variable)),
  551. # The LLT if there is one
  552. (r'( *)(\()', bygroups(Whitespace, Text), 'vreg_type'),
  553. (r'(?=.)', Text, '#pop'),
  554. ],
  555. 'vreg_type': [
  556. # Scalar and pointer types
  557. (r'( *)([sp][0-9]+)', bygroups(Whitespace, Keyword.Type)),
  558. (r'( *)(<[0-9]+ *x *[sp][0-9]+>)', bygroups(Whitespace, Keyword.Type)),
  559. (r'\)', Text, '#pop'),
  560. (r'(?=.)', Text, '#pop'),
  561. ],
  562. 'mmo': [
  563. (r'\(', Text),
  564. (r' +', Whitespace),
  565. (words(('load', 'store', 'on', 'into', 'from', 'align', 'monotonic',
  566. 'acquire', 'release', 'acq_rel', 'seq_cst')),
  567. Keyword),
  568. # IR references
  569. (r'%ir\.[a-zA-Z0-9_.-]+', Name),
  570. (r'%ir-block\.[a-zA-Z0-9_.-]+', Name),
  571. (r'[-+]', Operator),
  572. include('integer'),
  573. include('global'),
  574. (r',', Punctuation),
  575. (r'\), \(', Text),
  576. (r'\)', Text, '#pop'),
  577. ],
  578. 'integer': [(r'-?[0-9]+', Number.Integer),],
  579. 'float': [(r'-?[0-9]+\.[0-9]+(e[+-][0-9]+)?', Number.Float)],
  580. 'global': [(r'\@[a-zA-Z0-9_.]+', Name.Variable.Global)],
  581. }
  582. class LlvmMirLexer(RegexLexer):
  583. """
  584. Lexer for the overall LLVM MIR document format.
  585. MIR is a human readable serialization format that's used to represent LLVM's
  586. machine specific intermediate representation. It allows LLVM's developers to
  587. see the state of the compilation process at various points, as well as test
  588. individual pieces of the compiler.
  589. """
  590. name = 'LLVM-MIR'
  591. url = 'https://llvm.org/docs/MIRLangRef.html'
  592. aliases = ['llvm-mir']
  593. filenames = ['*.mir']
  594. version_added = '2.6'
  595. tokens = {
  596. 'root': [
  597. # Comments are hashes at the YAML level
  598. (r'#.*', Comment),
  599. # Documents starting with | are LLVM-IR
  600. (r'--- \|$', Keyword, 'llvm_ir'),
  601. # Other documents are MIR
  602. (r'---', Keyword, 'llvm_mir'),
  603. # Consume everything else in one token for efficiency
  604. (r'[^-#]+|.', Text),
  605. ],
  606. 'llvm_ir': [
  607. # Documents end with '...' or '---'
  608. (r'(\.\.\.|(?=---))', Keyword, '#pop'),
  609. # Delegate to the LlvmLexer
  610. (r'((?:.|\n)+?)(?=(\.\.\.|---))', bygroups(using(LlvmLexer))),
  611. ],
  612. 'llvm_mir': [
  613. # Comments are hashes at the YAML level
  614. (r'#.*', Comment),
  615. # Documents end with '...' or '---'
  616. (r'(\.\.\.|(?=---))', Keyword, '#pop'),
  617. # Handle the simple attributes
  618. (r'name:', Keyword, 'name'),
  619. (words(('alignment', ),
  620. suffix=':'), Keyword, 'number'),
  621. (words(('legalized', 'regBankSelected', 'tracksRegLiveness',
  622. 'selected', 'exposesReturnsTwice'),
  623. suffix=':'), Keyword, 'boolean'),
  624. # Handle the attributes don't highlight inside
  625. (words(('registers', 'stack', 'fixedStack', 'liveins', 'frameInfo',
  626. 'machineFunctionInfo'),
  627. suffix=':'), Keyword),
  628. # Delegate the body block to the LlvmMirBodyLexer
  629. (r'body: *\|', Keyword, 'llvm_mir_body'),
  630. # Consume everything else
  631. (r'.+', Text),
  632. (r'\n', Whitespace),
  633. ],
  634. 'name': [
  635. (r'[^\n]+', Name),
  636. default('#pop'),
  637. ],
  638. 'boolean': [
  639. (r' *(true|false)', Name.Builtin),
  640. default('#pop'),
  641. ],
  642. 'number': [
  643. (r' *[0-9]+', Number),
  644. default('#pop'),
  645. ],
  646. 'llvm_mir_body': [
  647. # Documents end with '...' or '---'.
  648. # We have to pop llvm_mir_body and llvm_mir
  649. (r'(\.\.\.|(?=---))', Keyword, '#pop:2'),
  650. # Delegate the body block to the LlvmMirBodyLexer
  651. (r'((?:.|\n)+?)(?=\.\.\.|---)', bygroups(using(LlvmMirBodyLexer))),
  652. # The '...' is optional. If we didn't already find it then it isn't
  653. # there. There might be a '---' instead though.
  654. (r'(?!\.\.\.|---)((?:.|\n)+)', bygroups(using(LlvmMirBodyLexer))),
  655. ],
  656. }
  657. class NasmLexer(RegexLexer):
  658. """
  659. For Nasm (Intel) assembly code.
  660. """
  661. name = 'NASM'
  662. aliases = ['nasm']
  663. filenames = ['*.asm', '*.ASM', '*.nasm']
  664. mimetypes = ['text/x-nasm']
  665. url = 'https://nasm.us'
  666. version_added = ''
  667. # Tasm uses the same file endings, but TASM is not as common as NASM, so
  668. # we prioritize NASM higher by default
  669. priority = 1.0
  670. identifier = r'[a-z$._?][\w$.?#@~]*'
  671. hexn = r'(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)'
  672. octn = r'[0-7]+q'
  673. binn = r'[01]+b'
  674. decn = r'[0-9]+'
  675. floatn = decn + r'\.e?' + decn
  676. string = r'"(\\"|[^"\n])*"|' + r"'(\\'|[^'\n])*'|" + r"`(\\`|[^`\n])*`"
  677. declkw = r'(?:res|d)[bwdqt]|times'
  678. register = (r'(r[0-9][0-5]?[bwd]?|'
  679. r'[a-d][lh]|[er]?[a-d]x|[er]?[sb]p|[er]?[sd]i|[c-gs]s|st[0-7]|'
  680. r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]|k[0-7]|'
  681. r'[xyz]mm(?:[12][0-9]?|3[01]?|[04-9]))\b')
  682. wordop = r'seg|wrt|strict|rel|abs'
  683. type = r'byte|[dq]?word'
  684. # Directives must be followed by whitespace, otherwise CPU will match
  685. # cpuid for instance.
  686. directives = (r'(?:BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|'
  687. r'ORG|ALIGN|STRUC|ENDSTRUC|COMMON|CPU|GROUP|UPPERCASE|IMPORT|'
  688. r'EXPORT|LIBRARY|MODULE)(?=\s)')
  689. flags = re.IGNORECASE | re.MULTILINE
  690. tokens = {
  691. 'root': [
  692. (r'^\s*%', Comment.Preproc, 'preproc'),
  693. include('whitespace'),
  694. (identifier + ':', Name.Label),
  695. (rf'({identifier})(\s+)(equ)',
  696. bygroups(Name.Constant, Whitespace, Keyword.Declaration),
  697. 'instruction-args'),
  698. (directives, Keyword, 'instruction-args'),
  699. (declkw, Keyword.Declaration, 'instruction-args'),
  700. (identifier, Name.Function, 'instruction-args'),
  701. (r'[\r\n]+', Whitespace)
  702. ],
  703. 'instruction-args': [
  704. (string, String),
  705. (hexn, Number.Hex),
  706. (octn, Number.Oct),
  707. (binn, Number.Bin),
  708. (floatn, Number.Float),
  709. (decn, Number.Integer),
  710. include('punctuation'),
  711. (register, Name.Builtin),
  712. (identifier, Name.Variable),
  713. (r'[\r\n]+', Whitespace, '#pop'),
  714. include('whitespace')
  715. ],
  716. 'preproc': [
  717. (r'[^;\n]+', Comment.Preproc),
  718. (r';.*?\n', Comment.Single, '#pop'),
  719. (r'\n', Comment.Preproc, '#pop'),
  720. ],
  721. 'whitespace': [
  722. (r'\n', Whitespace),
  723. (r'[ \t]+', Whitespace),
  724. (r';.*', Comment.Single),
  725. (r'#.*', Comment.Single)
  726. ],
  727. 'punctuation': [
  728. (r'[,{}():\[\]]+', Punctuation),
  729. (r'[&|^<>+*/%~-]+', Operator),
  730. (r'[$]+', Keyword.Constant),
  731. (wordop, Operator.Word),
  732. (type, Keyword.Type)
  733. ],
  734. }
  735. def analyse_text(text):
  736. # Probably TASM
  737. if re.match(r'PROC', text, re.IGNORECASE):
  738. return False
  739. class NasmObjdumpLexer(ObjdumpLexer):
  740. """
  741. For the output of ``objdump -d -M intel``.
  742. """
  743. name = 'objdump-nasm'
  744. aliases = ['objdump-nasm']
  745. filenames = ['*.objdump-intel']
  746. mimetypes = ['text/x-nasm-objdump']
  747. url = 'https://www.gnu.org/software/binutils'
  748. version_added = '2.0'
  749. tokens = _objdump_lexer_tokens(NasmLexer)
  750. class TasmLexer(RegexLexer):
  751. """
  752. For Tasm (Turbo Assembler) assembly code.
  753. """
  754. name = 'TASM'
  755. aliases = ['tasm']
  756. filenames = ['*.asm', '*.ASM', '*.tasm']
  757. mimetypes = ['text/x-tasm']
  758. url = 'https://en.wikipedia.org/wiki/Turbo_Assembler'
  759. version_added = ''
  760. identifier = r'[@a-z$._?][\w$.?#@~]*'
  761. hexn = r'(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)'
  762. octn = r'[0-7]+q'
  763. binn = r'[01]+b'
  764. decn = r'[0-9]+'
  765. floatn = decn + r'\.e?' + decn
  766. string = r'"(\\"|[^"\n])*"|' + r"'(\\'|[^'\n])*'|" + r"`(\\`|[^`\n])*`"
  767. declkw = r'(?:res|d)[bwdqt]|times'
  768. register = (r'(r[0-9][0-5]?[bwd]|'
  769. r'[a-d][lh]|[er]?[a-d]x|[er]?[sb]p|[er]?[sd]i|[c-gs]s|st[0-7]|'
  770. r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7])\b')
  771. wordop = r'seg|wrt|strict'
  772. type = r'byte|[dq]?word'
  773. directives = (r'BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|'
  774. r'ORG|ALIGN|STRUC|ENDSTRUC|ENDS|COMMON|CPU|GROUP|UPPERCASE|INCLUDE|'
  775. r'EXPORT|LIBRARY|MODULE|PROC|ENDP|USES|ARG|DATASEG|UDATASEG|END|IDEAL|'
  776. r'P386|MODEL|ASSUME|CODESEG|SIZE')
  777. # T[A-Z][a-z] is more of a convention. Lexer should filter out STRUC definitions
  778. # and then 'add' them to datatype somehow.
  779. datatype = (r'db|dd|dw|T[A-Z][a-z]+')
  780. flags = re.IGNORECASE | re.MULTILINE
  781. tokens = {
  782. 'root': [
  783. (r'^\s*%', Comment.Preproc, 'preproc'),
  784. include('whitespace'),
  785. (identifier + ':', Name.Label),
  786. (directives, Keyword, 'instruction-args'),
  787. (rf'({identifier})(\s+)({datatype})',
  788. bygroups(Name.Constant, Whitespace, Keyword.Declaration),
  789. 'instruction-args'),
  790. (declkw, Keyword.Declaration, 'instruction-args'),
  791. (identifier, Name.Function, 'instruction-args'),
  792. (r'[\r\n]+', Whitespace)
  793. ],
  794. 'instruction-args': [
  795. (string, String),
  796. (hexn, Number.Hex),
  797. (octn, Number.Oct),
  798. (binn, Number.Bin),
  799. (floatn, Number.Float),
  800. (decn, Number.Integer),
  801. include('punctuation'),
  802. (register, Name.Builtin),
  803. (identifier, Name.Variable),
  804. # Do not match newline when it's preceded by a backslash
  805. (r'(\\)(\s*)(;.*)([\r\n])',
  806. bygroups(Text, Whitespace, Comment.Single, Whitespace)),
  807. (r'[\r\n]+', Whitespace, '#pop'),
  808. include('whitespace')
  809. ],
  810. 'preproc': [
  811. (r'[^;\n]+', Comment.Preproc),
  812. (r';.*?\n', Comment.Single, '#pop'),
  813. (r'\n', Comment.Preproc, '#pop'),
  814. ],
  815. 'whitespace': [
  816. (r'[\n\r]', Whitespace),
  817. (r'(\\)([\n\r])', bygroups(Text, Whitespace)),
  818. (r'[ \t]+', Whitespace),
  819. (r';.*', Comment.Single)
  820. ],
  821. 'punctuation': [
  822. (r'[,():\[\]]+', Punctuation),
  823. (r'[&|^<>+*=/%~-]+', Operator),
  824. (r'[$]+', Keyword.Constant),
  825. (wordop, Operator.Word),
  826. (type, Keyword.Type)
  827. ],
  828. }
  829. def analyse_text(text):
  830. # See above
  831. if re.match(r'PROC', text, re.I):
  832. return True
  833. class Ca65Lexer(RegexLexer):
  834. """
  835. For ca65 assembler sources.
  836. """
  837. name = 'ca65 assembler'
  838. aliases = ['ca65']
  839. filenames = ['*.s']
  840. url = 'https://cc65.github.io'
  841. version_added = '1.6'
  842. flags = re.IGNORECASE
  843. tokens = {
  844. 'root': [
  845. (r';.*', Comment.Single),
  846. (r'\s+', Whitespace),
  847. (r'[a-z_.@$][\w.@$]*:', Name.Label),
  848. (r'((ld|st)[axy]|(in|de)[cxy]|asl|lsr|ro[lr]|adc|sbc|cmp|cp[xy]'
  849. r'|cl[cvdi]|se[cdi]|jmp|jsr|bne|beq|bpl|bmi|bvc|bvs|bcc|bcs'
  850. r'|p[lh][ap]|rt[is]|brk|nop|ta[xy]|t[xy]a|txs|tsx|and|ora|eor'
  851. r'|bit)\b', Keyword),
  852. (r'\.\w+', Keyword.Pseudo),
  853. (r'[-+~*/^&|!<>=]', Operator),
  854. (r'"[^"\n]*.', String),
  855. (r"'[^'\n]*.", String.Char),
  856. (r'\$[0-9a-f]+|[0-9a-f]+h\b', Number.Hex),
  857. (r'\d+', Number.Integer),
  858. (r'%[01]+', Number.Bin),
  859. (r'[#,.:()=\[\]]', Punctuation),
  860. (r'[a-z_.@$][\w.@$]*', Name),
  861. ]
  862. }
  863. def analyse_text(self, text):
  864. # comments in GAS start with "#"
  865. if re.search(r'^\s*;', text, re.MULTILINE):
  866. return 0.9
  867. class Dasm16Lexer(RegexLexer):
  868. """
  869. For DCPU-16 Assembly.
  870. """
  871. name = 'DASM16'
  872. url = 'http://0x10c.com/doc/dcpu-16.txt'
  873. aliases = ['dasm16']
  874. filenames = ['*.dasm16', '*.dasm']
  875. mimetypes = ['text/x-dasm16']
  876. version_added = '2.4'
  877. INSTRUCTIONS = [
  878. 'SET',
  879. 'ADD', 'SUB',
  880. 'MUL', 'MLI',
  881. 'DIV', 'DVI',
  882. 'MOD', 'MDI',
  883. 'AND', 'BOR', 'XOR',
  884. 'SHR', 'ASR', 'SHL',
  885. 'IFB', 'IFC', 'IFE', 'IFN', 'IFG', 'IFA', 'IFL', 'IFU',
  886. 'ADX', 'SBX',
  887. 'STI', 'STD',
  888. 'JSR',
  889. 'INT', 'IAG', 'IAS', 'RFI', 'IAQ', 'HWN', 'HWQ', 'HWI',
  890. ]
  891. REGISTERS = [
  892. 'A', 'B', 'C',
  893. 'X', 'Y', 'Z',
  894. 'I', 'J',
  895. 'SP', 'PC', 'EX',
  896. 'POP', 'PEEK', 'PUSH'
  897. ]
  898. # Regexes yo
  899. char = r'[a-zA-Z0-9_$@.]'
  900. identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)'
  901. number = r'[+-]?(?:0[xX][a-zA-Z0-9]+|\d+)'
  902. binary_number = r'0b[01_]+'
  903. instruction = r'(?i)(' + '|'.join(INSTRUCTIONS) + ')'
  904. single_char = r"'\\?" + char + "'"
  905. string = r'"(\\"|[^"])*"'
  906. def guess_identifier(lexer, match):
  907. ident = match.group(0)
  908. klass = Name.Variable if ident.upper() in lexer.REGISTERS else Name.Label
  909. yield match.start(), klass, ident
  910. tokens = {
  911. 'root': [
  912. include('whitespace'),
  913. (':' + identifier, Name.Label),
  914. (identifier + ':', Name.Label),
  915. (instruction, Name.Function, 'instruction-args'),
  916. (r'\.' + identifier, Name.Function, 'data-args'),
  917. (r'[\r\n]+', Whitespace)
  918. ],
  919. 'numeric' : [
  920. (binary_number, Number.Integer),
  921. (number, Number.Integer),
  922. (single_char, String),
  923. ],
  924. 'arg' : [
  925. (identifier, guess_identifier),
  926. include('numeric')
  927. ],
  928. 'deref' : [
  929. (r'\+', Punctuation),
  930. (r'\]', Punctuation, '#pop'),
  931. include('arg'),
  932. include('whitespace')
  933. ],
  934. 'instruction-line' : [
  935. (r'[\r\n]+', Whitespace, '#pop'),
  936. (r';.*?$', Comment, '#pop'),
  937. include('whitespace')
  938. ],
  939. 'instruction-args': [
  940. (r',', Punctuation),
  941. (r'\[', Punctuation, 'deref'),
  942. include('arg'),
  943. include('instruction-line')
  944. ],
  945. 'data-args' : [
  946. (r',', Punctuation),
  947. include('numeric'),
  948. (string, String),
  949. include('instruction-line')
  950. ],
  951. 'whitespace': [
  952. (r'\n', Whitespace),
  953. (r'\s+', Whitespace),
  954. (r';.*?\n', Comment)
  955. ],
  956. }