esoteric.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. """
  2. pygments.lexers.esoteric
  3. ~~~~~~~~~~~~~~~~~~~~~~~~
  4. Lexers for esoteric languages.
  5. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. from pygments.lexer import RegexLexer, include, words, bygroups
  9. from pygments.token import Comment, Operator, Keyword, Name, String, Number, \
  10. Punctuation, Error, Whitespace
  11. __all__ = ['BrainfuckLexer', 'BefungeLexer', 'RedcodeLexer', 'CAmkESLexer',
  12. 'CapDLLexer', 'AheuiLexer']
  13. class BrainfuckLexer(RegexLexer):
  14. """
  15. Lexer for the esoteric BrainFuck language.
  16. """
  17. name = 'Brainfuck'
  18. url = 'http://www.muppetlabs.com/~breadbox/bf/'
  19. aliases = ['brainfuck', 'bf']
  20. filenames = ['*.bf', '*.b']
  21. mimetypes = ['application/x-brainfuck']
  22. version_added = ''
  23. tokens = {
  24. 'common': [
  25. # use different colors for different instruction types
  26. (r'[.,]+', Name.Tag),
  27. (r'[+-]+', Name.Builtin),
  28. (r'[<>]+', Name.Variable),
  29. (r'[^.,+\-<>\[\]]+', Comment),
  30. ],
  31. 'root': [
  32. (r'\[', Keyword, 'loop'),
  33. (r'\]', Error),
  34. include('common'),
  35. ],
  36. 'loop': [
  37. (r'\[', Keyword, '#push'),
  38. (r'\]', Keyword, '#pop'),
  39. include('common'),
  40. ]
  41. }
  42. def analyse_text(text):
  43. """It's safe to assume that a program which mostly consists of + -
  44. and < > is brainfuck."""
  45. plus_minus_count = 0
  46. greater_less_count = 0
  47. range_to_check = max(256, len(text))
  48. for c in text[:range_to_check]:
  49. if c == '+' or c == '-':
  50. plus_minus_count += 1
  51. if c == '<' or c == '>':
  52. greater_less_count += 1
  53. if plus_minus_count > (0.25 * range_to_check):
  54. return 1.0
  55. if greater_less_count > (0.25 * range_to_check):
  56. return 1.0
  57. result = 0
  58. if '[-]' in text:
  59. result += 0.5
  60. return result
  61. class BefungeLexer(RegexLexer):
  62. """
  63. Lexer for the esoteric Befunge language.
  64. """
  65. name = 'Befunge'
  66. url = 'http://en.wikipedia.org/wiki/Befunge'
  67. aliases = ['befunge']
  68. filenames = ['*.befunge']
  69. mimetypes = ['application/x-befunge']
  70. version_added = '0.7'
  71. tokens = {
  72. 'root': [
  73. (r'[0-9a-f]', Number),
  74. (r'[+*/%!`-]', Operator), # Traditional math
  75. (r'[<>^v?\[\]rxjk]', Name.Variable), # Move, imperatives
  76. (r'[:\\$.,n]', Name.Builtin), # Stack ops, imperatives
  77. (r'[|_mw]', Keyword),
  78. (r'[{}]', Name.Tag), # Befunge-98 stack ops
  79. (r'".*?"', String.Double), # Strings don't appear to allow escapes
  80. (r'\'.', String.Single), # Single character
  81. (r'[#;]', Comment), # Trampoline... depends on direction hit
  82. (r'[pg&~=@iotsy]', Keyword), # Misc
  83. (r'[()A-Z]', Comment), # Fingerprints
  84. (r'\s+', Whitespace), # Whitespace doesn't matter
  85. ],
  86. }
  87. class CAmkESLexer(RegexLexer):
  88. """
  89. Basic lexer for the input language for the CAmkES component platform.
  90. """
  91. name = 'CAmkES'
  92. url = 'https://sel4.systems/CAmkES/'
  93. aliases = ['camkes', 'idl4']
  94. filenames = ['*.camkes', '*.idl4']
  95. version_added = '2.1'
  96. tokens = {
  97. 'root': [
  98. # C pre-processor directive
  99. (r'^(\s*)(#.*)(\n)', bygroups(Whitespace, Comment.Preproc,
  100. Whitespace)),
  101. # Whitespace, comments
  102. (r'\s+', Whitespace),
  103. (r'/\*(.|\n)*?\*/', Comment),
  104. (r'//.*$', Comment),
  105. (r'[\[(){},.;\]]', Punctuation),
  106. (r'[~!%^&*+=|?:<>/-]', Operator),
  107. (words(('assembly', 'attribute', 'component', 'composition',
  108. 'configuration', 'connection', 'connector', 'consumes',
  109. 'control', 'dataport', 'Dataport', 'Dataports', 'emits',
  110. 'event', 'Event', 'Events', 'export', 'from', 'group',
  111. 'hardware', 'has', 'interface', 'Interface', 'maybe',
  112. 'procedure', 'Procedure', 'Procedures', 'provides',
  113. 'template', 'thread', 'threads', 'to', 'uses', 'with'),
  114. suffix=r'\b'), Keyword),
  115. (words(('bool', 'boolean', 'Buf', 'char', 'character', 'double',
  116. 'float', 'in', 'inout', 'int', 'int16_6', 'int32_t',
  117. 'int64_t', 'int8_t', 'integer', 'mutex', 'out', 'real',
  118. 'refin', 'semaphore', 'signed', 'string', 'struct',
  119. 'uint16_t', 'uint32_t', 'uint64_t', 'uint8_t', 'uintptr_t',
  120. 'unsigned', 'void'),
  121. suffix=r'\b'), Keyword.Type),
  122. # Recognised attributes
  123. (r'[a-zA-Z_]\w*_(priority|domain|buffer)', Keyword.Reserved),
  124. (words(('dma_pool', 'from_access', 'to_access'), suffix=r'\b'),
  125. Keyword.Reserved),
  126. # CAmkES-level include
  127. (r'(import)(\s+)((?:<[^>]*>|"[^"]*");)',
  128. bygroups(Comment.Preproc, Whitespace, Comment.Preproc)),
  129. # C-level include
  130. (r'(include)(\s+)((?:<[^>]*>|"[^"]*");)',
  131. bygroups(Comment.Preproc, Whitespace, Comment.Preproc)),
  132. # Literals
  133. (r'0[xX][\da-fA-F]+', Number.Hex),
  134. (r'-?[\d]+', Number),
  135. (r'-?[\d]+\.[\d]+', Number.Float),
  136. (r'"[^"]*"', String),
  137. (r'[Tt]rue|[Ff]alse', Name.Builtin),
  138. # Identifiers
  139. (r'[a-zA-Z_]\w*', Name),
  140. ],
  141. }
  142. class CapDLLexer(RegexLexer):
  143. """
  144. Basic lexer for CapDL.
  145. The source of the primary tool that reads such specifications is available
  146. at https://github.com/seL4/capdl/tree/master/capDL-tool. Note that this
  147. lexer only supports a subset of the grammar. For example, identifiers can
  148. shadow type names, but these instances are currently incorrectly
  149. highlighted as types. Supporting this would need a stateful lexer that is
  150. considered unnecessarily complex for now.
  151. """
  152. name = 'CapDL'
  153. url = 'https://ssrg.nicta.com.au/publications/nictaabstracts/Kuz_KLW_10.abstract.pml'
  154. aliases = ['capdl']
  155. filenames = ['*.cdl']
  156. version_added = '2.2'
  157. tokens = {
  158. 'root': [
  159. # C pre-processor directive
  160. (r'^(\s*)(#.*)(\n)',
  161. bygroups(Whitespace, Comment.Preproc, Whitespace)),
  162. # Whitespace, comments
  163. (r'\s+', Whitespace),
  164. (r'/\*(.|\n)*?\*/', Comment),
  165. (r'(//|--).*$', Comment),
  166. (r'[<>\[(){},:;=\]]', Punctuation),
  167. (r'\.\.', Punctuation),
  168. (words(('arch', 'arm11', 'caps', 'child_of', 'ia32', 'irq', 'maps',
  169. 'objects'), suffix=r'\b'), Keyword),
  170. (words(('aep', 'asid_pool', 'cnode', 'ep', 'frame', 'io_device',
  171. 'io_ports', 'io_pt', 'notification', 'pd', 'pt', 'tcb',
  172. 'ut', 'vcpu'), suffix=r'\b'), Keyword.Type),
  173. # Properties
  174. (words(('asid', 'addr', 'badge', 'cached', 'dom', 'domainID', 'elf',
  175. 'fault_ep', 'G', 'guard', 'guard_size', 'init', 'ip',
  176. 'prio', 'sp', 'R', 'RG', 'RX', 'RW', 'RWG', 'RWX', 'W',
  177. 'WG', 'WX', 'level', 'masked', 'master_reply', 'paddr',
  178. 'ports', 'reply', 'uncached'), suffix=r'\b'),
  179. Keyword.Reserved),
  180. # Literals
  181. (r'0[xX][\da-fA-F]+', Number.Hex),
  182. (r'\d+(\.\d+)?(k|M)?', Number),
  183. (words(('bits',), suffix=r'\b'), Number),
  184. (words(('cspace', 'vspace', 'reply_slot', 'caller_slot',
  185. 'ipc_buffer_slot'), suffix=r'\b'), Number),
  186. # Identifiers
  187. (r'[a-zA-Z_][-@\.\w]*', Name),
  188. ],
  189. }
  190. class RedcodeLexer(RegexLexer):
  191. """
  192. A simple Redcode lexer based on ICWS'94.
  193. Contributed by Adam Blinkinsop <blinks@acm.org>.
  194. """
  195. name = 'Redcode'
  196. aliases = ['redcode']
  197. filenames = ['*.cw']
  198. url = 'https://en.wikipedia.org/wiki/Core_War'
  199. version_added = '0.8'
  200. opcodes = ('DAT', 'MOV', 'ADD', 'SUB', 'MUL', 'DIV', 'MOD',
  201. 'JMP', 'JMZ', 'JMN', 'DJN', 'CMP', 'SLT', 'SPL',
  202. 'ORG', 'EQU', 'END')
  203. modifiers = ('A', 'B', 'AB', 'BA', 'F', 'X', 'I')
  204. tokens = {
  205. 'root': [
  206. # Whitespace:
  207. (r'\s+', Whitespace),
  208. (r';.*$', Comment.Single),
  209. # Lexemes:
  210. # Identifiers
  211. (r'\b({})\b'.format('|'.join(opcodes)), Name.Function),
  212. (r'\b({})\b'.format('|'.join(modifiers)), Name.Decorator),
  213. (r'[A-Za-z_]\w+', Name),
  214. # Operators
  215. (r'[-+*/%]', Operator),
  216. (r'[#$@<>]', Operator), # mode
  217. (r'[.,]', Punctuation), # mode
  218. # Numbers
  219. (r'[-+]?\d+', Number.Integer),
  220. ],
  221. }
  222. class AheuiLexer(RegexLexer):
  223. """
  224. Aheui is esoteric language based on Korean alphabets.
  225. """
  226. name = 'Aheui'
  227. url = 'http://aheui.github.io/'
  228. aliases = ['aheui']
  229. filenames = ['*.aheui']
  230. version_added = ''
  231. tokens = {
  232. 'root': [
  233. ('['
  234. '나-낳냐-냫너-넣녀-녛노-놓뇨-눟뉴-닇'
  235. '다-닿댜-댷더-덯뎌-뎧도-돟됴-둫듀-딓'
  236. '따-땋땨-떃떠-떻뗘-뗳또-똫뚀-뚷뜌-띟'
  237. '라-랗랴-럏러-렇려-렿로-롷료-뤃류-릫'
  238. '마-맣먀-먛머-멓며-몋모-뫃묘-뭏뮤-믷'
  239. '바-밯뱌-뱧버-벟벼-볗보-봏뵤-붛뷰-빃'
  240. '빠-빻뺘-뺳뻐-뻫뼈-뼣뽀-뽛뾰-뿧쀼-삏'
  241. '사-샇샤-샿서-섷셔-셯소-솧쇼-숳슈-싛'
  242. '싸-쌓쌰-썋써-쎃쎠-쎻쏘-쏳쑈-쑿쓔-씧'
  243. '자-잫쟈-쟣저-젛져-졓조-좋죠-줗쥬-즿'
  244. '차-챃챠-챻처-첳쳐-쳫초-촣쵸-춯츄-칗'
  245. '카-캏캬-컇커-컿켜-켷코-콯쿄-쿻큐-킣'
  246. '타-탛탸-턓터-텋텨-톃토-톻툐-퉇튜-틯'
  247. '파-팧퍄-퍟퍼-펗펴-폏포-퐇표-풓퓨-픻'
  248. '하-핳햐-햫허-헣혀-혛호-홓효-훟휴-힇'
  249. ']', Operator),
  250. ('.', Comment),
  251. ],
  252. }