asm.py 40 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037
  1. """
  2. pygments.lexers.asm
  3. ~~~~~~~~~~~~~~~~~~~
  4. Lexers for assembly languages.
  5. :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import RegexLexer, include, bygroups, using, words, \
  10. DelegatingLexer, default
  11. from pygments.lexers.c_cpp import CppLexer, CLexer
  12. from pygments.lexers.d import DLexer
  13. from pygments.token import Text, Name, Number, String, Comment, Punctuation, \
  14. Other, Keyword, Operator, Whitespace
  15. __all__ = ['GasLexer', 'ObjdumpLexer', 'DObjdumpLexer', 'CppObjdumpLexer',
  16. 'CObjdumpLexer', 'HsailLexer', 'LlvmLexer', 'LlvmMirBodyLexer',
  17. 'LlvmMirLexer', 'NasmLexer', 'NasmObjdumpLexer', 'TasmLexer',
  18. 'Ca65Lexer', 'Dasm16Lexer']
  19. class GasLexer(RegexLexer):
  20. """
  21. For Gas (AT&T) assembly code.
  22. """
  23. name = 'GAS'
  24. aliases = ['gas', 'asm']
  25. filenames = ['*.s', '*.S']
  26. mimetypes = ['text/x-gas']
  27. #: optional Comment or Whitespace
  28. string = r'"(\\"|[^"])*"'
  29. char = r'[\w$.@-]'
  30. identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)'
  31. number = r'(?:0[xX][a-fA-F0-9]+|#?-?\d+)'
  32. register = '%' + identifier + r'\b'
  33. tokens = {
  34. 'root': [
  35. include('whitespace'),
  36. (identifier + ':', Name.Label),
  37. (r'\.' + identifier, Name.Attribute, 'directive-args'),
  38. (r'lock|rep(n?z)?|data\d+', Name.Attribute),
  39. (identifier, Name.Function, 'instruction-args'),
  40. (r'[\r\n]+', Text)
  41. ],
  42. 'directive-args': [
  43. (identifier, Name.Constant),
  44. (string, String),
  45. ('@' + identifier, Name.Attribute),
  46. (number, Number.Integer),
  47. (register, Name.Variable),
  48. (r'[\r\n]+', Whitespace, '#pop'),
  49. (r'([;#]|//).*?\n', Comment.Single, '#pop'),
  50. (r'/[*].*?[*]/', Comment.Multiline),
  51. (r'/[*].*?\n[\w\W]*?[*]/', Comment.Multiline, '#pop'),
  52. include('punctuation'),
  53. include('whitespace')
  54. ],
  55. 'instruction-args': [
  56. # For objdump-disassembled code, shouldn't occur in
  57. # actual assembler input
  58. ('([a-z0-9]+)( )(<)('+identifier+')(>)',
  59. bygroups(Number.Hex, Text, Punctuation, Name.Constant,
  60. Punctuation)),
  61. ('([a-z0-9]+)( )(<)('+identifier+')([-+])('+number+')(>)',
  62. bygroups(Number.Hex, Text, Punctuation, Name.Constant,
  63. Punctuation, Number.Integer, Punctuation)),
  64. # Address constants
  65. (identifier, Name.Constant),
  66. (number, Number.Integer),
  67. # Registers
  68. (register, Name.Variable),
  69. # Numeric constants
  70. ('$'+number, Number.Integer),
  71. (r"$'(.|\\')'", String.Char),
  72. (r'[\r\n]+', Whitespace, '#pop'),
  73. (r'([;#]|//).*?\n', Comment.Single, '#pop'),
  74. (r'/[*].*?[*]/', Comment.Multiline),
  75. (r'/[*].*?\n[\w\W]*?[*]/', Comment.Multiline, '#pop'),
  76. include('punctuation'),
  77. include('whitespace')
  78. ],
  79. 'whitespace': [
  80. (r'\n', Whitespace),
  81. (r'\s+', Whitespace),
  82. (r'([;#]|//).*?\n', Comment.Single),
  83. (r'/[*][\w\W]*?[*]/', Comment.Multiline)
  84. ],
  85. 'punctuation': [
  86. (r'[-*,.()\[\]!:{}]+', Punctuation)
  87. ]
  88. }
  89. def analyse_text(text):
  90. if re.search(r'^\.(text|data|section)', text, re.M):
  91. return True
  92. elif re.search(r'^\.\w+', text, re.M):
  93. return 0.1
  94. def _objdump_lexer_tokens(asm_lexer):
  95. """
  96. Common objdump lexer tokens to wrap an ASM lexer.
  97. """
  98. hex_re = r'[0-9A-Za-z]'
  99. return {
  100. 'root': [
  101. # File name & format:
  102. ('(.*?)(:)( +file format )(.*?)$',
  103. bygroups(Name.Label, Punctuation, Text, String)),
  104. # Section header
  105. ('(Disassembly of section )(.*?)(:)$',
  106. bygroups(Text, Name.Label, Punctuation)),
  107. # Function labels
  108. # (With offset)
  109. ('('+hex_re+'+)( )(<)(.*?)([-+])(0[xX][A-Za-z0-9]+)(>:)$',
  110. bygroups(Number.Hex, Whitespace, Punctuation, Name.Function,
  111. Punctuation, Number.Hex, Punctuation)),
  112. # (Without offset)
  113. ('('+hex_re+'+)( )(<)(.*?)(>:)$',
  114. bygroups(Number.Hex, Whitespace, Punctuation, Name.Function,
  115. Punctuation)),
  116. # Code line with disassembled instructions
  117. ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *\t)([a-zA-Z].*?)$',
  118. bygroups(Whitespace, Name.Label, Whitespace, Number.Hex, Whitespace,
  119. using(asm_lexer))),
  120. # Code line without raw instructions (objdump --no-show-raw-insn)
  121. ('( *)('+hex_re+r'+:)( *\t)([a-zA-Z].*?)$',
  122. bygroups(Whitespace, Name.Label, Whitespace,
  123. using(asm_lexer))),
  124. # Code line with ascii
  125. ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)( *)(.*?)$',
  126. bygroups(Whitespace, Name.Label, Whitespace, Number.Hex, Whitespace, String)),
  127. # Continued code line, only raw opcodes without disassembled
  128. # instruction
  129. ('( *)('+hex_re+r'+:)(\t)((?:'+hex_re+hex_re+' )+)$',
  130. bygroups(Whitespace, Name.Label, Whitespace, Number.Hex)),
  131. # Skipped a few bytes
  132. (r'\t\.\.\.$', Text),
  133. # Relocation line
  134. # (With offset)
  135. (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)([-+])(0x'+hex_re+'+)$',
  136. bygroups(Whitespace, Name.Label, Whitespace, Name.Property, Whitespace,
  137. Name.Constant, Punctuation, Number.Hex)),
  138. # (Without offset)
  139. (r'(\t\t\t)('+hex_re+r'+:)( )([^\t]+)(\t)(.*?)$',
  140. bygroups(Whitespace, Name.Label, Whitespace, Name.Property, Whitespace,
  141. Name.Constant)),
  142. (r'[^\n]+\n', Other)
  143. ]
  144. }
  145. class ObjdumpLexer(RegexLexer):
  146. """
  147. For the output of ``objdump -dr``.
  148. """
  149. name = 'objdump'
  150. aliases = ['objdump']
  151. filenames = ['*.objdump']
  152. mimetypes = ['text/x-objdump']
  153. tokens = _objdump_lexer_tokens(GasLexer)
  154. class DObjdumpLexer(DelegatingLexer):
  155. """
  156. For the output of ``objdump -Sr`` on compiled D files.
  157. """
  158. name = 'd-objdump'
  159. aliases = ['d-objdump']
  160. filenames = ['*.d-objdump']
  161. mimetypes = ['text/x-d-objdump']
  162. def __init__(self, **options):
  163. super().__init__(DLexer, ObjdumpLexer, **options)
  164. class CppObjdumpLexer(DelegatingLexer):
  165. """
  166. For the output of ``objdump -Sr`` on compiled C++ files.
  167. """
  168. name = 'cpp-objdump'
  169. aliases = ['cpp-objdump', 'c++-objdumb', 'cxx-objdump']
  170. filenames = ['*.cpp-objdump', '*.c++-objdump', '*.cxx-objdump']
  171. mimetypes = ['text/x-cpp-objdump']
  172. def __init__(self, **options):
  173. super().__init__(CppLexer, ObjdumpLexer, **options)
  174. class CObjdumpLexer(DelegatingLexer):
  175. """
  176. For the output of ``objdump -Sr`` on compiled C files.
  177. """
  178. name = 'c-objdump'
  179. aliases = ['c-objdump']
  180. filenames = ['*.c-objdump']
  181. mimetypes = ['text/x-c-objdump']
  182. def __init__(self, **options):
  183. super().__init__(CLexer, ObjdumpLexer, **options)
  184. class HsailLexer(RegexLexer):
  185. """
  186. For HSAIL assembly code.
  187. .. versionadded:: 2.2
  188. """
  189. name = 'HSAIL'
  190. aliases = ['hsail', 'hsa']
  191. filenames = ['*.hsail']
  192. mimetypes = ['text/x-hsail']
  193. string = r'"[^"]*?"'
  194. identifier = r'[a-zA-Z_][\w.]*'
  195. # Registers
  196. register_number = r'[0-9]+'
  197. register = r'(\$(c|s|d|q)' + register_number + r')\b'
  198. # Qualifiers
  199. alignQual = r'(align\(\d+\))'
  200. widthQual = r'(width\((\d+|all)\))'
  201. allocQual = r'(alloc\(agent\))'
  202. # Instruction Modifiers
  203. roundingMod = (r'((_ftz)?(_up|_down|_zero|_near))')
  204. datatypeMod = (r'_('
  205. # packedTypes
  206. r'u8x4|s8x4|u16x2|s16x2|u8x8|s8x8|u16x4|s16x4|u32x2|s32x2|'
  207. r'u8x16|s8x16|u16x8|s16x8|u32x4|s32x4|u64x2|s64x2|'
  208. r'f16x2|f16x4|f16x8|f32x2|f32x4|f64x2|'
  209. # baseTypes
  210. r'u8|s8|u16|s16|u32|s32|u64|s64|'
  211. r'b128|b8|b16|b32|b64|b1|'
  212. r'f16|f32|f64|'
  213. # opaqueType
  214. r'roimg|woimg|rwimg|samp|sig32|sig64)')
  215. # Numeric Constant
  216. float = r'((\d+\.)|(\d*\.\d+))[eE][+-]?\d+'
  217. hexfloat = r'0[xX](([0-9a-fA-F]+\.[0-9a-fA-F]*)|([0-9a-fA-F]*\.[0-9a-fA-F]+))[pP][+-]?\d+'
  218. ieeefloat = r'0((h|H)[0-9a-fA-F]{4}|(f|F)[0-9a-fA-F]{8}|(d|D)[0-9a-fA-F]{16})'
  219. tokens = {
  220. 'root': [
  221. include('whitespace'),
  222. include('comments'),
  223. (string, String),
  224. (r'@' + identifier + ':?', Name.Label),
  225. (register, Name.Variable.Anonymous),
  226. include('keyword'),
  227. (r'&' + identifier, Name.Variable.Global),
  228. (r'%' + identifier, Name.Variable),
  229. (hexfloat, Number.Hex),
  230. (r'0[xX][a-fA-F0-9]+', Number.Hex),
  231. (ieeefloat, Number.Float),
  232. (float, Number.Float),
  233. (r'\d+', Number.Integer),
  234. (r'[=<>{}\[\]()*.,:;!]|x\b', Punctuation)
  235. ],
  236. 'whitespace': [
  237. (r'(\n|\s)+', Whitespace),
  238. ],
  239. 'comments': [
  240. (r'/\*.*?\*/', Comment.Multiline),
  241. (r'//.*?\n', Comment.Single),
  242. ],
  243. 'keyword': [
  244. # Types
  245. (r'kernarg' + datatypeMod, Keyword.Type),
  246. # Regular keywords
  247. (r'\$(full|base|small|large|default|zero|near)', Keyword),
  248. (words((
  249. 'module', 'extension', 'pragma', 'prog', 'indirect', 'signature',
  250. 'decl', 'kernel', 'function', 'enablebreakexceptions',
  251. 'enabledetectexceptions', 'maxdynamicgroupsize', 'maxflatgridsize',
  252. 'maxflatworkgroupsize', 'requireddim', 'requiredgridsize',
  253. 'requiredworkgroupsize', 'requirenopartialworkgroups'),
  254. suffix=r'\b'), Keyword),
  255. # instructions
  256. (roundingMod, Keyword),
  257. (datatypeMod, Keyword),
  258. (r'_(' + alignQual + '|' + widthQual + ')', Keyword),
  259. (r'_kernarg', Keyword),
  260. (r'(nop|imagefence)\b', Keyword),
  261. (words((
  262. 'cleardetectexcept', 'clock', 'cuid', 'debugtrap', 'dim',
  263. 'getdetectexcept', 'groupbaseptr', 'kernargbaseptr', 'laneid',
  264. 'maxcuid', 'maxwaveid', 'packetid', 'setdetectexcept', 'waveid',
  265. 'workitemflatabsid', 'workitemflatid', 'nullptr', 'abs', 'bitrev',
  266. 'currentworkgroupsize', 'currentworkitemflatid', 'fract', 'ncos',
  267. 'neg', 'nexp2', 'nlog2', 'nrcp', 'nrsqrt', 'nsin', 'nsqrt',
  268. 'gridgroups', 'gridsize', 'not', 'sqrt', 'workgroupid',
  269. 'workgroupsize', 'workitemabsid', 'workitemid', 'ceil', 'floor',
  270. 'rint', 'trunc', 'add', 'bitmask', 'borrow', 'carry', 'copysign',
  271. 'div', 'rem', 'sub', 'shl', 'shr', 'and', 'or', 'xor', 'unpackhi',
  272. 'unpacklo', 'max', 'min', 'fma', 'mad', 'bitextract', 'bitselect',
  273. 'shuffle', 'cmov', 'bitalign', 'bytealign', 'lerp', 'nfma', 'mul',
  274. 'mulhi', 'mul24hi', 'mul24', 'mad24', 'mad24hi', 'bitinsert',
  275. 'combine', 'expand', 'lda', 'mov', 'pack', 'unpack', 'packcvt',
  276. 'unpackcvt', 'sad', 'sementp', 'ftos', 'stof', 'cmp', 'ld', 'st',
  277. '_eq', '_ne', '_lt', '_le', '_gt', '_ge', '_equ', '_neu', '_ltu',
  278. '_leu', '_gtu', '_geu', '_num', '_nan', '_seq', '_sne', '_slt',
  279. '_sle', '_sgt', '_sge', '_snum', '_snan', '_sequ', '_sneu', '_sltu',
  280. '_sleu', '_sgtu', '_sgeu', 'atomic', '_ld', '_st', '_cas', '_add',
  281. '_and', '_exch', '_max', '_min', '_or', '_sub', '_wrapdec',
  282. '_wrapinc', '_xor', 'ret', 'cvt', '_readonly', '_kernarg', '_global',
  283. 'br', 'cbr', 'sbr', '_scacq', '_screl', '_scar', '_rlx', '_wave',
  284. '_wg', '_agent', '_system', 'ldimage', 'stimage', '_v2', '_v3', '_v4',
  285. '_1d', '_2d', '_3d', '_1da', '_2da', '_1db', '_2ddepth', '_2dadepth',
  286. '_width', '_height', '_depth', '_array', '_channelorder',
  287. '_channeltype', 'querysampler', '_coord', '_filter', '_addressing',
  288. 'barrier', 'wavebarrier', 'initfbar', 'joinfbar', 'waitfbar',
  289. 'arrivefbar', 'leavefbar', 'releasefbar', 'ldf', 'activelaneid',
  290. 'activelanecount', 'activelanemask', 'activelanepermute', 'call',
  291. 'scall', 'icall', 'alloca', 'packetcompletionsig',
  292. 'addqueuewriteindex', 'casqueuewriteindex', 'ldqueuereadindex',
  293. 'stqueuereadindex', 'readonly', 'global', 'private', 'group',
  294. 'spill', 'arg', '_upi', '_downi', '_zeroi', '_neari', '_upi_sat',
  295. '_downi_sat', '_zeroi_sat', '_neari_sat', '_supi', '_sdowni',
  296. '_szeroi', '_sneari', '_supi_sat', '_sdowni_sat', '_szeroi_sat',
  297. '_sneari_sat', '_pp', '_ps', '_sp', '_ss', '_s', '_p', '_pp_sat',
  298. '_ps_sat', '_sp_sat', '_ss_sat', '_s_sat', '_p_sat')), Keyword),
  299. # Integer types
  300. (r'i[1-9]\d*', Keyword)
  301. ]
  302. }
  303. class LlvmLexer(RegexLexer):
  304. """
  305. For LLVM assembly code.
  306. """
  307. name = 'LLVM'
  308. url = 'https://llvm.org/docs/LangRef.html'
  309. aliases = ['llvm']
  310. filenames = ['*.ll']
  311. mimetypes = ['text/x-llvm']
  312. #: optional Comment or Whitespace
  313. string = r'"[^"]*?"'
  314. identifier = r'([-a-zA-Z$._][\w\-$.]*|' + string + ')'
  315. block_label = r'(' + identifier + r'|(\d+))'
  316. tokens = {
  317. 'root': [
  318. include('whitespace'),
  319. # Before keywords, because keywords are valid label names :(...
  320. (block_label + r'\s*:', Name.Label),
  321. include('keyword'),
  322. (r'%' + identifier, Name.Variable),
  323. (r'@' + identifier, Name.Variable.Global),
  324. (r'%\d+', Name.Variable.Anonymous),
  325. (r'@\d+', Name.Variable.Global),
  326. (r'#\d+', Name.Variable.Global),
  327. (r'!' + identifier, Name.Variable),
  328. (r'!\d+', Name.Variable.Anonymous),
  329. (r'c?' + string, String),
  330. (r'0[xX][a-fA-F0-9]+', Number),
  331. (r'-?\d+(?:[.]\d+)?(?:[eE][-+]?\d+(?:[.]\d+)?)?', Number),
  332. (r'[=<>{}\[\]()*.,!]|x\b', Punctuation)
  333. ],
  334. 'whitespace': [
  335. (r'(\n|\s+)+', Whitespace),
  336. (r';.*?\n', Comment)
  337. ],
  338. 'keyword': [
  339. # Regular keywords
  340. (words((
  341. 'aarch64_sve_vector_pcs', 'aarch64_vector_pcs', 'acq_rel',
  342. 'acquire', 'add', 'addrspace', 'addrspacecast', 'afn', 'alias',
  343. 'aliasee', 'align', 'alignLog2', 'alignstack', 'alloca',
  344. 'allocsize', 'allOnes', 'alwaysinline', 'alwaysInline',
  345. 'amdgpu_cs', 'amdgpu_es', 'amdgpu_gfx', 'amdgpu_gs',
  346. 'amdgpu_hs', 'amdgpu_kernel', 'amdgpu_ls', 'amdgpu_ps',
  347. 'amdgpu_vs', 'and', 'any', 'anyregcc', 'appending', 'arcp',
  348. 'argmemonly', 'args', 'arm_aapcs_vfpcc', 'arm_aapcscc',
  349. 'arm_apcscc', 'ashr', 'asm', 'atomic', 'atomicrmw',
  350. 'attributes', 'available_externally', 'avr_intrcc',
  351. 'avr_signalcc', 'bit', 'bitcast', 'bitMask', 'blockaddress',
  352. 'blockcount', 'br', 'branchFunnel', 'builtin', 'byArg',
  353. 'byref', 'byte', 'byteArray', 'byval', 'c', 'call', 'callbr',
  354. 'callee', 'caller', 'calls', 'canAutoHide', 'catch',
  355. 'catchpad', 'catchret', 'catchswitch', 'cc', 'ccc',
  356. 'cfguard_checkcc', 'cleanup', 'cleanuppad', 'cleanupret',
  357. 'cmpxchg', 'cold', 'coldcc', 'comdat', 'common', 'constant',
  358. 'contract', 'convergent', 'critical', 'cxx_fast_tlscc',
  359. 'datalayout', 'declare', 'default', 'define', 'deplibs',
  360. 'dereferenceable', 'dereferenceable_or_null', 'distinct',
  361. 'dllexport', 'dllimport', 'dso_local', 'dso_local_equivalent',
  362. 'dso_preemptable', 'dsoLocal', 'eq', 'exact', 'exactmatch',
  363. 'extern_weak', 'external', 'externally_initialized',
  364. 'extractelement', 'extractvalue', 'fadd', 'false', 'fast',
  365. 'fastcc', 'fcmp', 'fdiv', 'fence', 'filter', 'flags', 'fmul',
  366. 'fneg', 'fpext', 'fptosi', 'fptoui', 'fptrunc', 'freeze',
  367. 'frem', 'from', 'fsub', 'funcFlags', 'function', 'gc',
  368. 'getelementptr', 'ghccc', 'global', 'guid', 'gv', 'hash',
  369. 'hhvm_ccc', 'hhvmcc', 'hidden', 'hot', 'hotness', 'icmp',
  370. 'ifunc', 'inaccessiblemem_or_argmemonly',
  371. 'inaccessiblememonly', 'inalloca', 'inbounds', 'indir',
  372. 'indirectbr', 'info', 'initialexec', 'inline', 'inlineBits',
  373. 'inlinehint', 'inrange', 'inreg', 'insertelement',
  374. 'insertvalue', 'insts', 'intel_ocl_bicc', 'inteldialect',
  375. 'internal', 'inttoptr', 'invoke', 'jumptable', 'kind',
  376. 'landingpad', 'largest', 'linkage', 'linkonce', 'linkonce_odr',
  377. 'live', 'load', 'local_unnamed_addr', 'localdynamic',
  378. 'localexec', 'lshr', 'max', 'metadata', 'min', 'minsize',
  379. 'module', 'monotonic', 'msp430_intrcc', 'mul', 'mustprogress',
  380. 'musttail', 'naked', 'name', 'nand', 'ne', 'nest', 'ninf',
  381. 'nnan', 'noalias', 'nobuiltin', 'nocallback', 'nocapture',
  382. 'nocf_check', 'noduplicate', 'noduplicates', 'nofree',
  383. 'noimplicitfloat', 'noinline', 'noInline', 'nomerge', 'none',
  384. 'nonlazybind', 'nonnull', 'noprofile', 'norecurse',
  385. 'noRecurse', 'noredzone', 'noreturn', 'nosync', 'notail',
  386. 'notEligibleToImport', 'noundef', 'nounwind', 'nsw',
  387. 'nsz', 'null', 'null_pointer_is_valid', 'nuw', 'oeq', 'offset',
  388. 'oge', 'ogt', 'ole', 'olt', 'one', 'opaque', 'optforfuzzing',
  389. 'optnone', 'optsize', 'or', 'ord', 'param', 'params',
  390. 'partition', 'path', 'personality', 'phi', 'poison',
  391. 'preallocated', 'prefix', 'preserve_allcc', 'preserve_mostcc',
  392. 'private', 'prologue', 'protected', 'ptrtoint', 'ptx_device',
  393. 'ptx_kernel', 'readnone', 'readNone', 'readonly', 'readOnly',
  394. 'reassoc', 'refs', 'relbf', 'release', 'resByArg', 'resume',
  395. 'ret', 'returnDoesNotAlias', 'returned', 'returns_twice',
  396. 'safestack', 'samesize', 'sanitize_address',
  397. 'sanitize_hwaddress', 'sanitize_memory', 'sanitize_memtag',
  398. 'sanitize_thread', 'sdiv', 'section', 'select', 'seq_cst',
  399. 'sext', 'sge', 'sgt', 'shadowcallstack', 'shl',
  400. 'shufflevector', 'sideeffect', 'signext', 'single',
  401. 'singleImpl', 'singleImplName', 'sitofp', 'sizeM1',
  402. 'sizeM1BitWidth', 'sle', 'slt', 'source_filename',
  403. 'speculatable', 'speculative_load_hardening', 'spir_func',
  404. 'spir_kernel', 'srem', 'sret', 'ssp', 'sspreq', 'sspstrong',
  405. 'store', 'strictfp', 'sub', 'summaries', 'summary', 'swiftcc',
  406. 'swifterror', 'swiftself', 'switch', 'syncscope', 'tail',
  407. 'tailcc', 'target', 'thread_local', 'to', 'token', 'triple',
  408. 'true', 'trunc', 'type', 'typeCheckedLoadConstVCalls',
  409. 'typeCheckedLoadVCalls', 'typeid', 'typeidCompatibleVTable',
  410. 'typeIdInfo', 'typeTestAssumeConstVCalls',
  411. 'typeTestAssumeVCalls', 'typeTestRes', 'typeTests', 'udiv',
  412. 'ueq', 'uge', 'ugt', 'uitofp', 'ule', 'ult', 'umax', 'umin',
  413. 'undef', 'une', 'uniformRetVal', 'uniqueRetVal', 'unknown',
  414. 'unnamed_addr', 'uno', 'unordered', 'unreachable', 'unsat',
  415. 'unwind', 'urem', 'uselistorder', 'uselistorder_bb', 'uwtable',
  416. 'va_arg', 'varFlags', 'variable', 'vcall_visibility',
  417. 'vFuncId', 'virtFunc', 'virtualConstProp', 'void', 'volatile',
  418. 'vscale', 'vTableFuncs', 'weak', 'weak_odr', 'webkit_jscc',
  419. 'win64cc', 'within', 'wpdRes', 'wpdResolutions', 'writeonly',
  420. 'x', 'x86_64_sysvcc', 'x86_fastcallcc', 'x86_intrcc',
  421. 'x86_mmx', 'x86_regcallcc', 'x86_stdcallcc', 'x86_thiscallcc',
  422. 'x86_vectorcallcc', 'xchg', 'xor', 'zeroext',
  423. 'zeroinitializer', 'zext', 'immarg', 'willreturn'),
  424. suffix=r'\b'), Keyword),
  425. # Types
  426. (words(('void', 'half', 'bfloat', 'float', 'double', 'fp128',
  427. 'x86_fp80', 'ppc_fp128', 'label', 'metadata', 'x86_mmx',
  428. 'x86_amx', 'token', 'ptr')),
  429. Keyword.Type),
  430. # Integer types
  431. (r'i[1-9]\d*', Keyword.Type)
  432. ]
  433. }
  434. class LlvmMirBodyLexer(RegexLexer):
  435. """
  436. For LLVM MIR examples without the YAML wrapper.
  437. .. versionadded:: 2.6
  438. """
  439. name = 'LLVM-MIR Body'
  440. url = 'https://llvm.org/docs/MIRLangRef.html'
  441. aliases = ['llvm-mir-body']
  442. filenames = []
  443. mimetypes = []
  444. tokens = {
  445. 'root': [
  446. # Attributes on basic blocks
  447. (words(('liveins', 'successors'), suffix=':'), Keyword),
  448. # Basic Block Labels
  449. (r'bb\.[0-9]+(\.[a-zA-Z0-9_.-]+)?( \(address-taken\))?:', Name.Label),
  450. (r'bb\.[0-9]+ \(%[a-zA-Z0-9_.-]+\)( \(address-taken\))?:', Name.Label),
  451. (r'%bb\.[0-9]+(\.\w+)?', Name.Label),
  452. # Stack references
  453. (r'%stack\.[0-9]+(\.\w+\.addr)?', Name),
  454. # Subreg indices
  455. (r'%subreg\.\w+', Name),
  456. # Virtual registers
  457. (r'%[a-zA-Z0-9_]+ *', Name.Variable, 'vreg'),
  458. # Reference to LLVM-IR global
  459. include('global'),
  460. # Reference to Intrinsic
  461. (r'intrinsic\(\@[a-zA-Z0-9_.]+\)', Name.Variable.Global),
  462. # Comparison predicates
  463. (words(('eq', 'ne', 'sgt', 'sge', 'slt', 'sle', 'ugt', 'uge', 'ult',
  464. 'ule'), prefix=r'intpred\(', suffix=r'\)'), Name.Builtin),
  465. (words(('oeq', 'one', 'ogt', 'oge', 'olt', 'ole', 'ugt', 'uge',
  466. 'ult', 'ule'), prefix=r'floatpred\(', suffix=r'\)'),
  467. Name.Builtin),
  468. # Physical registers
  469. (r'\$\w+', String.Single),
  470. # Assignment operator
  471. (r'=', Operator),
  472. # gMIR Opcodes
  473. (r'(G_ANYEXT|G_[SZ]EXT|G_SEXT_INREG|G_TRUNC|G_IMPLICIT_DEF|G_PHI|'
  474. r'G_FRAME_INDEX|G_GLOBAL_VALUE|G_INTTOPTR|G_PTRTOINT|G_BITCAST|'
  475. r'G_CONSTANT|G_FCONSTANT|G_VASTART|G_VAARG|G_CTLZ|G_CTLZ_ZERO_UNDEF|'
  476. r'G_CTTZ|G_CTTZ_ZERO_UNDEF|G_CTPOP|G_BSWAP|G_BITREVERSE|'
  477. r'G_ADDRSPACE_CAST|G_BLOCK_ADDR|G_JUMP_TABLE|G_DYN_STACKALLOC|'
  478. r'G_ADD|G_SUB|G_MUL|G_[SU]DIV|G_[SU]REM|G_AND|G_OR|G_XOR|G_SHL|'
  479. r'G_[LA]SHR|G_[IF]CMP|G_SELECT|G_GEP|G_PTR_MASK|G_SMIN|G_SMAX|'
  480. r'G_UMIN|G_UMAX|G_[US]ADDO|G_[US]ADDE|G_[US]SUBO|G_[US]SUBE|'
  481. r'G_[US]MULO|G_[US]MULH|G_FNEG|G_FPEXT|G_FPTRUNC|G_FPTO[US]I|'
  482. r'G_[US]ITOFP|G_FABS|G_FCOPYSIGN|G_FCANONICALIZE|G_FMINNUM|'
  483. r'G_FMAXNUM|G_FMINNUM_IEEE|G_FMAXNUM_IEEE|G_FMINIMUM|G_FMAXIMUM|'
  484. r'G_FADD|G_FSUB|G_FMUL|G_FMA|G_FMAD|G_FDIV|G_FREM|G_FPOW|G_FEXP|'
  485. r'G_FEXP2|G_FLOG|G_FLOG2|G_FLOG10|G_FCEIL|G_FCOS|G_FSIN|G_FSQRT|'
  486. r'G_FFLOOR|G_FRINT|G_FNEARBYINT|G_INTRINSIC_TRUNC|'
  487. r'G_INTRINSIC_ROUND|G_LOAD|G_[ZS]EXTLOAD|G_INDEXED_LOAD|'
  488. r'G_INDEXED_[ZS]EXTLOAD|G_STORE|G_INDEXED_STORE|'
  489. r'G_ATOMIC_CMPXCHG_WITH_SUCCESS|G_ATOMIC_CMPXCHG|'
  490. r'G_ATOMICRMW_(XCHG|ADD|SUB|AND|NAND|OR|XOR|MAX|MIN|UMAX|UMIN|FADD|'
  491. r'FSUB)'
  492. r'|G_FENCE|G_EXTRACT|G_UNMERGE_VALUES|G_INSERT|G_MERGE_VALUES|'
  493. r'G_BUILD_VECTOR|G_BUILD_VECTOR_TRUNC|G_CONCAT_VECTORS|'
  494. r'G_INTRINSIC|G_INTRINSIC_W_SIDE_EFFECTS|G_BR|G_BRCOND|'
  495. r'G_BRINDIRECT|G_BRJT|G_INSERT_VECTOR_ELT|G_EXTRACT_VECTOR_ELT|'
  496. r'G_SHUFFLE_VECTOR)\b',
  497. Name.Builtin),
  498. # Target independent opcodes
  499. (r'(COPY|PHI|INSERT_SUBREG|EXTRACT_SUBREG|REG_SEQUENCE)\b',
  500. Name.Builtin),
  501. # Flags
  502. (words(('killed', 'implicit')), Keyword),
  503. # ConstantInt values
  504. (r'(i[0-9]+)( +)', bygroups(Keyword.Type, Whitespace), 'constantint'),
  505. # ConstantFloat values
  506. (r'(half|float|double) +', Keyword.Type, 'constantfloat'),
  507. # Bare immediates
  508. include('integer'),
  509. # MMO's
  510. (r'(::)( *)', bygroups(Operator, Whitespace), 'mmo'),
  511. # MIR Comments
  512. (r';.*', Comment),
  513. # If we get here, assume it's a target instruction
  514. (r'[a-zA-Z0-9_]+', Name),
  515. # Everything else that isn't highlighted
  516. (r'[(), \n]+', Text),
  517. ],
  518. # The integer constant from a ConstantInt value
  519. 'constantint': [
  520. include('integer'),
  521. (r'(?=.)', Text, '#pop'),
  522. ],
  523. # The floating point constant from a ConstantFloat value
  524. 'constantfloat': [
  525. include('float'),
  526. (r'(?=.)', Text, '#pop'),
  527. ],
  528. 'vreg': [
  529. # The bank or class if there is one
  530. (r'( *)(:(?!:))', bygroups(Whitespace, Keyword), ('#pop', 'vreg_bank_or_class')),
  531. # The LLT if there is one
  532. (r'( *)(\()', bygroups(Whitespace, Text), 'vreg_type'),
  533. (r'(?=.)', Text, '#pop'),
  534. ],
  535. 'vreg_bank_or_class': [
  536. # The unassigned bank/class
  537. (r'( *)(_)', bygroups(Whitespace, Name.Variable.Magic)),
  538. (r'( *)([a-zA-Z0-9_]+)', bygroups(Whitespace, Name.Variable)),
  539. # The LLT if there is one
  540. (r'( *)(\()', bygroups(Whitespace, Text), 'vreg_type'),
  541. (r'(?=.)', Text, '#pop'),
  542. ],
  543. 'vreg_type': [
  544. # Scalar and pointer types
  545. (r'( *)([sp][0-9]+)', bygroups(Whitespace, Keyword.Type)),
  546. (r'( *)(<[0-9]+ *x *[sp][0-9]+>)', bygroups(Whitespace, Keyword.Type)),
  547. (r'\)', Text, '#pop'),
  548. (r'(?=.)', Text, '#pop'),
  549. ],
  550. 'mmo': [
  551. (r'\(', Text),
  552. (r' +', Whitespace),
  553. (words(('load', 'store', 'on', 'into', 'from', 'align', 'monotonic',
  554. 'acquire', 'release', 'acq_rel', 'seq_cst')),
  555. Keyword),
  556. # IR references
  557. (r'%ir\.[a-zA-Z0-9_.-]+', Name),
  558. (r'%ir-block\.[a-zA-Z0-9_.-]+', Name),
  559. (r'[-+]', Operator),
  560. include('integer'),
  561. include('global'),
  562. (r',', Punctuation),
  563. (r'\), \(', Text),
  564. (r'\)', Text, '#pop'),
  565. ],
  566. 'integer': [(r'-?[0-9]+', Number.Integer),],
  567. 'float': [(r'-?[0-9]+\.[0-9]+(e[+-][0-9]+)?', Number.Float)],
  568. 'global': [(r'\@[a-zA-Z0-9_.]+', Name.Variable.Global)],
  569. }
  570. class LlvmMirLexer(RegexLexer):
  571. """
  572. Lexer for the overall LLVM MIR document format.
  573. MIR is a human readable serialization format that's used to represent LLVM's
  574. machine specific intermediate representation. It allows LLVM's developers to
  575. see the state of the compilation process at various points, as well as test
  576. individual pieces of the compiler.
  577. .. versionadded:: 2.6
  578. """
  579. name = 'LLVM-MIR'
  580. url = 'https://llvm.org/docs/MIRLangRef.html'
  581. aliases = ['llvm-mir']
  582. filenames = ['*.mir']
  583. tokens = {
  584. 'root': [
  585. # Comments are hashes at the YAML level
  586. (r'#.*', Comment),
  587. # Documents starting with | are LLVM-IR
  588. (r'--- \|$', Keyword, 'llvm_ir'),
  589. # Other documents are MIR
  590. (r'---', Keyword, 'llvm_mir'),
  591. # Consume everything else in one token for efficiency
  592. (r'[^-#]+|.', Text),
  593. ],
  594. 'llvm_ir': [
  595. # Documents end with '...' or '---'
  596. (r'(\.\.\.|(?=---))', Keyword, '#pop'),
  597. # Delegate to the LlvmLexer
  598. (r'((?:.|\n)+?)(?=(\.\.\.|---))', bygroups(using(LlvmLexer))),
  599. ],
  600. 'llvm_mir': [
  601. # Comments are hashes at the YAML level
  602. (r'#.*', Comment),
  603. # Documents end with '...' or '---'
  604. (r'(\.\.\.|(?=---))', Keyword, '#pop'),
  605. # Handle the simple attributes
  606. (r'name:', Keyword, 'name'),
  607. (words(('alignment', ),
  608. suffix=':'), Keyword, 'number'),
  609. (words(('legalized', 'regBankSelected', 'tracksRegLiveness',
  610. 'selected', 'exposesReturnsTwice'),
  611. suffix=':'), Keyword, 'boolean'),
  612. # Handle the attributes don't highlight inside
  613. (words(('registers', 'stack', 'fixedStack', 'liveins', 'frameInfo',
  614. 'machineFunctionInfo'),
  615. suffix=':'), Keyword),
  616. # Delegate the body block to the LlvmMirBodyLexer
  617. (r'body: *\|', Keyword, 'llvm_mir_body'),
  618. # Consume everything else
  619. (r'.+', Text),
  620. (r'\n', Whitespace),
  621. ],
  622. 'name': [
  623. (r'[^\n]+', Name),
  624. default('#pop'),
  625. ],
  626. 'boolean': [
  627. (r' *(true|false)', Name.Builtin),
  628. default('#pop'),
  629. ],
  630. 'number': [
  631. (r' *[0-9]+', Number),
  632. default('#pop'),
  633. ],
  634. 'llvm_mir_body': [
  635. # Documents end with '...' or '---'.
  636. # We have to pop llvm_mir_body and llvm_mir
  637. (r'(\.\.\.|(?=---))', Keyword, '#pop:2'),
  638. # Delegate the body block to the LlvmMirBodyLexer
  639. (r'((?:.|\n)+?)(?=\.\.\.|---)', bygroups(using(LlvmMirBodyLexer))),
  640. # The '...' is optional. If we didn't already find it then it isn't
  641. # there. There might be a '---' instead though.
  642. (r'(?!\.\.\.|---)((?:.|\n)+)', bygroups(using(LlvmMirBodyLexer))),
  643. ],
  644. }
  645. class NasmLexer(RegexLexer):
  646. """
  647. For Nasm (Intel) assembly code.
  648. """
  649. name = 'NASM'
  650. aliases = ['nasm']
  651. filenames = ['*.asm', '*.ASM', '*.nasm']
  652. mimetypes = ['text/x-nasm']
  653. # Tasm uses the same file endings, but TASM is not as common as NASM, so
  654. # we prioritize NASM higher by default
  655. priority = 1.0
  656. identifier = r'[a-z$._?][\w$.?#@~]*'
  657. hexn = r'(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)'
  658. octn = r'[0-7]+q'
  659. binn = r'[01]+b'
  660. decn = r'[0-9]+'
  661. floatn = decn + r'\.e?' + decn
  662. string = r'"(\\"|[^"\n])*"|' + r"'(\\'|[^'\n])*'|" + r"`(\\`|[^`\n])*`"
  663. declkw = r'(?:res|d)[bwdqt]|times'
  664. register = (r'(r[0-9][0-5]?[bwd]?|'
  665. r'[a-d][lh]|[er]?[a-d]x|[er]?[sb]p|[er]?[sd]i|[c-gs]s|st[0-7]|'
  666. r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]|k[0-7]|'
  667. r'[xyz]mm(?:[12][0-9]?|3[01]?|[04-9]))\b')
  668. wordop = r'seg|wrt|strict|rel|abs'
  669. type = r'byte|[dq]?word'
  670. # Directives must be followed by whitespace, otherwise CPU will match
  671. # cpuid for instance.
  672. directives = (r'(?:BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|'
  673. r'ORG|ALIGN|STRUC|ENDSTRUC|COMMON|CPU|GROUP|UPPERCASE|IMPORT|'
  674. r'EXPORT|LIBRARY|MODULE)(?=\s)')
  675. flags = re.IGNORECASE | re.MULTILINE
  676. tokens = {
  677. 'root': [
  678. (r'^\s*%', Comment.Preproc, 'preproc'),
  679. include('whitespace'),
  680. (identifier + ':', Name.Label),
  681. (r'(%s)(\s+)(equ)' % identifier,
  682. bygroups(Name.Constant, Whitespace, Keyword.Declaration),
  683. 'instruction-args'),
  684. (directives, Keyword, 'instruction-args'),
  685. (declkw, Keyword.Declaration, 'instruction-args'),
  686. (identifier, Name.Function, 'instruction-args'),
  687. (r'[\r\n]+', Whitespace)
  688. ],
  689. 'instruction-args': [
  690. (string, String),
  691. (hexn, Number.Hex),
  692. (octn, Number.Oct),
  693. (binn, Number.Bin),
  694. (floatn, Number.Float),
  695. (decn, Number.Integer),
  696. include('punctuation'),
  697. (register, Name.Builtin),
  698. (identifier, Name.Variable),
  699. (r'[\r\n]+', Whitespace, '#pop'),
  700. include('whitespace')
  701. ],
  702. 'preproc': [
  703. (r'[^;\n]+', Comment.Preproc),
  704. (r';.*?\n', Comment.Single, '#pop'),
  705. (r'\n', Comment.Preproc, '#pop'),
  706. ],
  707. 'whitespace': [
  708. (r'\n', Whitespace),
  709. (r'[ \t]+', Whitespace),
  710. (r';.*', Comment.Single),
  711. (r'#.*', Comment.Single)
  712. ],
  713. 'punctuation': [
  714. (r'[,{}():\[\]]+', Punctuation),
  715. (r'[&|^<>+*/%~-]+', Operator),
  716. (r'[$]+', Keyword.Constant),
  717. (wordop, Operator.Word),
  718. (type, Keyword.Type)
  719. ],
  720. }
  721. def analyse_text(text):
  722. # Probably TASM
  723. if re.match(r'PROC', text, re.IGNORECASE):
  724. return False
  725. class NasmObjdumpLexer(ObjdumpLexer):
  726. """
  727. For the output of ``objdump -d -M intel``.
  728. .. versionadded:: 2.0
  729. """
  730. name = 'objdump-nasm'
  731. aliases = ['objdump-nasm']
  732. filenames = ['*.objdump-intel']
  733. mimetypes = ['text/x-nasm-objdump']
  734. tokens = _objdump_lexer_tokens(NasmLexer)
  735. class TasmLexer(RegexLexer):
  736. """
  737. For Tasm (Turbo Assembler) assembly code.
  738. """
  739. name = 'TASM'
  740. aliases = ['tasm']
  741. filenames = ['*.asm', '*.ASM', '*.tasm']
  742. mimetypes = ['text/x-tasm']
  743. identifier = r'[@a-z$._?][\w$.?#@~]*'
  744. hexn = r'(?:0x[0-9a-f]+|$0[0-9a-f]*|[0-9]+[0-9a-f]*h)'
  745. octn = r'[0-7]+q'
  746. binn = r'[01]+b'
  747. decn = r'[0-9]+'
  748. floatn = decn + r'\.e?' + decn
  749. string = r'"(\\"|[^"\n])*"|' + r"'(\\'|[^'\n])*'|" + r"`(\\`|[^`\n])*`"
  750. declkw = r'(?:res|d)[bwdqt]|times'
  751. register = (r'(r[0-9][0-5]?[bwd]|'
  752. r'[a-d][lh]|[er]?[a-d]x|[er]?[sb]p|[er]?[sd]i|[c-gs]s|st[0-7]|'
  753. r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7])\b')
  754. wordop = r'seg|wrt|strict'
  755. type = r'byte|[dq]?word'
  756. directives = (r'BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|'
  757. r'ORG|ALIGN|STRUC|ENDSTRUC|ENDS|COMMON|CPU|GROUP|UPPERCASE|INCLUDE|'
  758. r'EXPORT|LIBRARY|MODULE|PROC|ENDP|USES|ARG|DATASEG|UDATASEG|END|IDEAL|'
  759. r'P386|MODEL|ASSUME|CODESEG|SIZE')
  760. # T[A-Z][a-z] is more of a convention. Lexer should filter out STRUC definitions
  761. # and then 'add' them to datatype somehow.
  762. datatype = (r'db|dd|dw|T[A-Z][a-z]+')
  763. flags = re.IGNORECASE | re.MULTILINE
  764. tokens = {
  765. 'root': [
  766. (r'^\s*%', Comment.Preproc, 'preproc'),
  767. include('whitespace'),
  768. (identifier + ':', Name.Label),
  769. (directives, Keyword, 'instruction-args'),
  770. (r'(%s)(\s+)(%s)' % (identifier, datatype),
  771. bygroups(Name.Constant, Whitespace, Keyword.Declaration),
  772. 'instruction-args'),
  773. (declkw, Keyword.Declaration, 'instruction-args'),
  774. (identifier, Name.Function, 'instruction-args'),
  775. (r'[\r\n]+', Whitespace)
  776. ],
  777. 'instruction-args': [
  778. (string, String),
  779. (hexn, Number.Hex),
  780. (octn, Number.Oct),
  781. (binn, Number.Bin),
  782. (floatn, Number.Float),
  783. (decn, Number.Integer),
  784. include('punctuation'),
  785. (register, Name.Builtin),
  786. (identifier, Name.Variable),
  787. # Do not match newline when it's preceded by a backslash
  788. (r'(\\)(\s*)(;.*)([\r\n])',
  789. bygroups(Text, Whitespace, Comment.Single, Whitespace)),
  790. (r'[\r\n]+', Whitespace, '#pop'),
  791. include('whitespace')
  792. ],
  793. 'preproc': [
  794. (r'[^;\n]+', Comment.Preproc),
  795. (r';.*?\n', Comment.Single, '#pop'),
  796. (r'\n', Comment.Preproc, '#pop'),
  797. ],
  798. 'whitespace': [
  799. (r'[\n\r]', Whitespace),
  800. (r'(\\)([\n\r])', bygroups(Text, Whitespace)),
  801. (r'[ \t]+', Whitespace),
  802. (r';.*', Comment.Single)
  803. ],
  804. 'punctuation': [
  805. (r'[,():\[\]]+', Punctuation),
  806. (r'[&|^<>+*=/%~-]+', Operator),
  807. (r'[$]+', Keyword.Constant),
  808. (wordop, Operator.Word),
  809. (type, Keyword.Type)
  810. ],
  811. }
  812. def analyse_text(text):
  813. # See above
  814. if re.match(r'PROC', text, re.I):
  815. return True
  816. class Ca65Lexer(RegexLexer):
  817. """
  818. For ca65 assembler sources.
  819. .. versionadded:: 1.6
  820. """
  821. name = 'ca65 assembler'
  822. aliases = ['ca65']
  823. filenames = ['*.s']
  824. flags = re.IGNORECASE
  825. tokens = {
  826. 'root': [
  827. (r';.*', Comment.Single),
  828. (r'\s+', Whitespace),
  829. (r'[a-z_.@$][\w.@$]*:', Name.Label),
  830. (r'((ld|st)[axy]|(in|de)[cxy]|asl|lsr|ro[lr]|adc|sbc|cmp|cp[xy]'
  831. r'|cl[cvdi]|se[cdi]|jmp|jsr|bne|beq|bpl|bmi|bvc|bvs|bcc|bcs'
  832. r'|p[lh][ap]|rt[is]|brk|nop|ta[xy]|t[xy]a|txs|tsx|and|ora|eor'
  833. r'|bit)\b', Keyword),
  834. (r'\.\w+', Keyword.Pseudo),
  835. (r'[-+~*/^&|!<>=]', Operator),
  836. (r'"[^"\n]*.', String),
  837. (r"'[^'\n]*.", String.Char),
  838. (r'\$[0-9a-f]+|[0-9a-f]+h\b', Number.Hex),
  839. (r'\d+', Number.Integer),
  840. (r'%[01]+', Number.Bin),
  841. (r'[#,.:()=\[\]]', Punctuation),
  842. (r'[a-z_.@$][\w.@$]*', Name),
  843. ]
  844. }
  845. def analyse_text(self, text):
  846. # comments in GAS start with "#"
  847. if re.search(r'^\s*;', text, re.MULTILINE):
  848. return 0.9
  849. class Dasm16Lexer(RegexLexer):
  850. """
  851. For DCPU-16 Assembly.
  852. .. versionadded:: 2.4
  853. """
  854. name = 'DASM16'
  855. url = 'http://0x10c.com/doc/dcpu-16.txt'
  856. aliases = ['dasm16']
  857. filenames = ['*.dasm16', '*.dasm']
  858. mimetypes = ['text/x-dasm16']
  859. INSTRUCTIONS = [
  860. 'SET',
  861. 'ADD', 'SUB',
  862. 'MUL', 'MLI',
  863. 'DIV', 'DVI',
  864. 'MOD', 'MDI',
  865. 'AND', 'BOR', 'XOR',
  866. 'SHR', 'ASR', 'SHL',
  867. 'IFB', 'IFC', 'IFE', 'IFN', 'IFG', 'IFA', 'IFL', 'IFU',
  868. 'ADX', 'SBX',
  869. 'STI', 'STD',
  870. 'JSR',
  871. 'INT', 'IAG', 'IAS', 'RFI', 'IAQ', 'HWN', 'HWQ', 'HWI',
  872. ]
  873. REGISTERS = [
  874. 'A', 'B', 'C',
  875. 'X', 'Y', 'Z',
  876. 'I', 'J',
  877. 'SP', 'PC', 'EX',
  878. 'POP', 'PEEK', 'PUSH'
  879. ]
  880. # Regexes yo
  881. char = r'[a-zA-Z0-9_$@.]'
  882. identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)'
  883. number = r'[+-]?(?:0[xX][a-zA-Z0-9]+|\d+)'
  884. binary_number = r'0b[01_]+'
  885. instruction = r'(?i)(' + '|'.join(INSTRUCTIONS) + ')'
  886. single_char = r"'\\?" + char + "'"
  887. string = r'"(\\"|[^"])*"'
  888. def guess_identifier(lexer, match):
  889. ident = match.group(0)
  890. klass = Name.Variable if ident.upper() in lexer.REGISTERS else Name.Label
  891. yield match.start(), klass, ident
  892. tokens = {
  893. 'root': [
  894. include('whitespace'),
  895. (':' + identifier, Name.Label),
  896. (identifier + ':', Name.Label),
  897. (instruction, Name.Function, 'instruction-args'),
  898. (r'\.' + identifier, Name.Function, 'data-args'),
  899. (r'[\r\n]+', Whitespace)
  900. ],
  901. 'numeric' : [
  902. (binary_number, Number.Integer),
  903. (number, Number.Integer),
  904. (single_char, String),
  905. ],
  906. 'arg' : [
  907. (identifier, guess_identifier),
  908. include('numeric')
  909. ],
  910. 'deref' : [
  911. (r'\+', Punctuation),
  912. (r'\]', Punctuation, '#pop'),
  913. include('arg'),
  914. include('whitespace')
  915. ],
  916. 'instruction-line' : [
  917. (r'[\r\n]+', Whitespace, '#pop'),
  918. (r';.*?$', Comment, '#pop'),
  919. include('whitespace')
  920. ],
  921. 'instruction-args': [
  922. (r',', Punctuation),
  923. (r'\[', Punctuation, 'deref'),
  924. include('arg'),
  925. include('instruction-line')
  926. ],
  927. 'data-args' : [
  928. (r',', Punctuation),
  929. include('numeric'),
  930. (string, String),
  931. include('instruction-line')
  932. ],
  933. 'whitespace': [
  934. (r'\n', Whitespace),
  935. (r'\s+', Whitespace),
  936. (r';.*?\n', Comment)
  937. ],
  938. }