unicon.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. """
  2. pygments.lexers.unicon
  3. ~~~~~~~~~~~~~~~~~~~~~~
  4. Lexers for the Icon and Unicon languages, including ucode VM.
  5. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import RegexLexer, include, bygroups, words, using, this
  10. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  11. Number, Punctuation
  12. __all__ = ['IconLexer', 'UcodeLexer', 'UniconLexer']
  13. class UniconLexer(RegexLexer):
  14. """
  15. For Unicon source code.
  16. """
  17. name = 'Unicon'
  18. aliases = ['unicon']
  19. filenames = ['*.icn']
  20. mimetypes = ['text/unicon']
  21. url = 'https://www.unicon.org'
  22. version_added = '2.4'
  23. flags = re.MULTILINE
  24. tokens = {
  25. 'root': [
  26. (r'[^\S\n]+', Text),
  27. (r'#.*?\n', Comment.Single),
  28. (r'[^\S\n]+', Text),
  29. (r'class|method|procedure', Keyword.Declaration, 'subprogram'),
  30. (r'(record)(\s+)(\w+)',
  31. bygroups(Keyword.Declaration, Text, Keyword.Type), 'type_def'),
  32. (r'(#line|\$C|\$Cend|\$define|\$else|\$endif|\$error|\$ifdef|'
  33. r'\$ifndef|\$include|\$line|\$undef)\b', Keyword.PreProc),
  34. (r'(&null|&fail)\b', Keyword.Constant),
  35. (r'&allocated|&ascii|&clock|&collections|&column|&col|&control|'
  36. r'&cset|&current|&dateline|&date|&digits|&dump|'
  37. r'&errno|&errornumber|&errortext|&errorvalue|&error|&errout|'
  38. r'&eventcode|&eventvalue|&eventsource|&e|'
  39. r'&features|&file|&host|&input|&interval|&lcase|&letters|'
  40. r'&level|&line|&ldrag|&lpress|&lrelease|'
  41. r'&main|&mdrag|&meta|&mpress|&mrelease|&now|&output|'
  42. r'&phi|&pick|&pi|&pos|&progname|'
  43. r'&random|&rdrag|&regions|&resize|&row|&rpress|&rrelease|'
  44. r'&shift|&source|&storage|&subject|'
  45. r'&time|&trace|&ucase|&version|'
  46. r'&window|&x|&y', Keyword.Reserved),
  47. (r'(by|of|not|to)\b', Keyword.Reserved),
  48. (r'(global|local|static|abstract)\b', Keyword.Reserved),
  49. (r'package|link|import', Keyword.Declaration),
  50. (words((
  51. 'break', 'case', 'create', 'critical', 'default', 'end', 'all',
  52. 'do', 'else', 'every', 'fail', 'if', 'import', 'initial',
  53. 'initially', 'invocable', 'next',
  54. 'repeat', 'return', 'suspend',
  55. 'then', 'thread', 'until', 'while'), prefix=r'\b', suffix=r'\b'),
  56. Keyword.Reserved),
  57. (words((
  58. 'Abort', 'abs', 'acos', 'Active', 'Alert', 'any', 'Any', 'Arb',
  59. 'Arbno', 'args', 'array', 'asin', 'atan', 'atanh', 'Attrib',
  60. 'Bal', 'bal', 'Bg', 'Break', 'Breakx',
  61. 'callout', 'center', 'char', 'chdir', 'chmod', 'chown', 'chroot',
  62. 'classname', 'Clip', 'Clone', 'close', 'cofail', 'collect',
  63. 'Color', 'ColorValue', 'condvar', 'constructor', 'copy',
  64. 'CopyArea', 'cos', 'Couple', 'crypt', 'cset', 'ctime',
  65. 'dbcolumns', 'dbdriver', 'dbkeys', 'dblimits', 'dbproduct',
  66. 'dbtables', 'delay', 'delete', 'detab', 'display', 'DrawArc',
  67. 'DrawCircle', 'DrawCube', 'DrawCurve', 'DrawCylinder',
  68. 'DrawDisk', 'DrawImage', 'DrawLine', 'DrawPoint', 'DrawPolygon',
  69. 'DrawRectangle', 'DrawSegment', 'DrawSphere', 'DrawString',
  70. 'DrawTorus', 'dtor',
  71. 'entab', 'EraseArea', 'errorclear', 'Event', 'eventmask',
  72. 'EvGet', 'EvSend', 'exec', 'exit', 'exp', 'Eye',
  73. 'Fail', 'fcntl', 'fdup', 'Fence', 'fetch', 'Fg', 'fieldnames',
  74. 'filepair', 'FillArc', 'FillCircle', 'FillPolygon',
  75. 'FillRectangle', 'find', 'flock', 'flush', 'Font', 'fork',
  76. 'FreeColor', 'FreeSpace', 'function',
  77. 'get', 'getch', 'getche', 'getegid', 'getenv', 'geteuid',
  78. 'getgid', 'getgr', 'gethost', 'getpgrp', 'getpid', 'getppid',
  79. 'getpw', 'getrusage', 'getserv', 'GetSpace', 'gettimeofday',
  80. 'getuid', 'globalnames', 'GotoRC', 'GotoXY', 'gtime', 'hardlink',
  81. 'iand', 'icom', 'IdentityMatrix', 'image', 'InPort', 'insert',
  82. 'Int86', 'integer', 'ioctl', 'ior', 'ishift', 'istate', 'ixor',
  83. 'kbhit', 'key', 'keyword', 'kill',
  84. 'left', 'Len', 'list', 'load', 'loadfunc', 'localnames',
  85. 'lock', 'log', 'Lower', 'lstat',
  86. 'many', 'map', 'match', 'MatrixMode', 'max', 'member',
  87. 'membernames', 'methodnames', 'methods', 'min', 'mkdir', 'move',
  88. 'MultMatrix', 'mutex',
  89. 'name', 'NewColor', 'Normals', 'NotAny', 'numeric',
  90. 'open', 'opencl', 'oprec', 'ord', 'OutPort',
  91. 'PaletteChars', 'PaletteColor', 'PaletteKey', 'paramnames',
  92. 'parent', 'Pattern', 'Peek', 'Pending', 'pipe', 'Pixel',
  93. 'PlayAudio', 'Poke', 'pop', 'PopMatrix', 'Pos', 'pos',
  94. 'proc', 'pull', 'push', 'PushMatrix', 'PushRotate', 'PushScale',
  95. 'PushTranslate', 'put',
  96. 'QueryPointer',
  97. 'Raise', 'read', 'ReadImage', 'readlink', 'reads', 'ready',
  98. 'real', 'receive', 'Refresh', 'Rem', 'remove', 'rename',
  99. 'repl', 'reverse', 'right', 'rmdir', 'Rotate', 'Rpos',
  100. 'Rtab', 'rtod', 'runerr',
  101. 'save', 'Scale', 'seek', 'select', 'send', 'seq',
  102. 'serial', 'set', 'setenv', 'setgid', 'setgrent',
  103. 'sethostent', 'setpgrp', 'setpwent', 'setservent',
  104. 'setuid', 'signal', 'sin', 'sort', 'sortf', 'Span',
  105. 'spawn', 'sql', 'sqrt', 'stat', 'staticnames', 'stop',
  106. 'StopAudio', 'string', 'structure', 'Succeed', 'Swi',
  107. 'symlink', 'sys_errstr', 'system', 'syswrite',
  108. 'Tab', 'tab', 'table', 'tan',
  109. 'Texcoord', 'Texture', 'TextWidth', 'Translate',
  110. 'trap', 'trim', 'truncate', 'trylock', 'type',
  111. 'umask', 'Uncouple', 'unlock', 'upto', 'utime',
  112. 'variable', 'VAttrib',
  113. 'wait', 'WAttrib', 'WDefault', 'WFlush', 'where',
  114. 'WinAssociate', 'WinButton', 'WinColorDialog', 'WindowContents',
  115. 'WinEditRegion', 'WinFontDialog', 'WinMenuBar', 'WinOpenDialog',
  116. 'WinPlayMedia', 'WinSaveDialog', 'WinScrollBar', 'WinSelectDialog',
  117. 'write', 'WriteImage', 'writes', 'WSection',
  118. 'WSync'), prefix=r'\b', suffix=r'\b'),
  119. Name.Function),
  120. include('numbers'),
  121. (r'<@|<<@|>@|>>@|\.>|->|===|~===|\*\*|\+\+|--|\.|~==|~=|<=|>=|==|'
  122. r'=|<<=|<<|>>=|>>|:=:|:=|->|<->|\+:=|\|', Operator),
  123. (r'"(?:[^\\"]|\\.)*"', String),
  124. (r"'(?:[^\\']|\\.)*'", String.Character),
  125. (r'[*<>+=/&!?@~\\-]', Operator),
  126. (r'\^', Operator),
  127. (r'(\w+)(\s*|[(,])', bygroups(Name, using(this))),
  128. (r"[\[\]]", Punctuation),
  129. (r"<>|=>|[()|:;,.'`{}%&?]", Punctuation),
  130. (r'\n+', Text),
  131. ],
  132. 'numbers': [
  133. (r'\b([+-]?([2-9]|[12][0-9]|3[0-6])[rR][0-9a-zA-Z]+)\b', Number.Hex),
  134. (r'[+-]?[0-9]*\.([0-9]*)([Ee][+-]?[0-9]*)?', Number.Float),
  135. (r'\b([+-]?[0-9]+[KMGTPkmgtp]?)\b', Number.Integer),
  136. ],
  137. 'subprogram': [
  138. (r'\(', Punctuation, ('#pop', 'formal_part')),
  139. (r';', Punctuation, '#pop'),
  140. (r'"[^"]+"|\w+', Name.Function),
  141. include('root'),
  142. ],
  143. 'type_def': [
  144. (r'\(', Punctuation, 'formal_part'),
  145. ],
  146. 'formal_part': [
  147. (r'\)', Punctuation, '#pop'),
  148. (r'\w+', Name.Variable),
  149. (r',', Punctuation),
  150. (r'(:string|:integer|:real)\b', Keyword.Reserved),
  151. include('root'),
  152. ],
  153. }
  154. class IconLexer(RegexLexer):
  155. """
  156. Lexer for Icon.
  157. """
  158. name = 'Icon'
  159. aliases = ['icon']
  160. filenames = ['*.icon', '*.ICON']
  161. mimetypes = []
  162. url = 'https://www2.cs.arizona.edu/icon'
  163. version_added = '1.6'
  164. flags = re.MULTILINE
  165. tokens = {
  166. 'root': [
  167. (r'[^\S\n]+', Text),
  168. (r'#.*?\n', Comment.Single),
  169. (r'[^\S\n]+', Text),
  170. (r'class|method|procedure', Keyword.Declaration, 'subprogram'),
  171. (r'(record)(\s+)(\w+)',
  172. bygroups(Keyword.Declaration, Text, Keyword.Type), 'type_def'),
  173. (r'(#line|\$C|\$Cend|\$define|\$else|\$endif|\$error|\$ifdef|'
  174. r'\$ifndef|\$include|\$line|\$undef)\b', Keyword.PreProc),
  175. (r'(&null|&fail)\b', Keyword.Constant),
  176. (r'&allocated|&ascii|&clock|&collections|&column|&col|&control|'
  177. r'&cset|&current|&dateline|&date|&digits|&dump|'
  178. r'&errno|&errornumber|&errortext|&errorvalue|&error|&errout|'
  179. r'&eventcode|&eventvalue|&eventsource|&e|'
  180. r'&features|&file|&host|&input|&interval|&lcase|&letters|'
  181. r'&level|&line|&ldrag|&lpress|&lrelease|'
  182. r'&main|&mdrag|&meta|&mpress|&mrelease|&now|&output|'
  183. r'&phi|&pick|&pi|&pos|&progname|'
  184. r'&random|&rdrag|&regions|&resize|&row|&rpress|&rrelease|'
  185. r'&shift|&source|&storage|&subject|'
  186. r'&time|&trace|&ucase|&version|'
  187. r'&window|&x|&y', Keyword.Reserved),
  188. (r'(by|of|not|to)\b', Keyword.Reserved),
  189. (r'(global|local|static)\b', Keyword.Reserved),
  190. (r'link', Keyword.Declaration),
  191. (words((
  192. 'break', 'case', 'create', 'default', 'end', 'all',
  193. 'do', 'else', 'every', 'fail', 'if', 'initial',
  194. 'invocable', 'next',
  195. 'repeat', 'return', 'suspend',
  196. 'then', 'until', 'while'), prefix=r'\b', suffix=r'\b'),
  197. Keyword.Reserved),
  198. (words((
  199. 'abs', 'acos', 'Active', 'Alert', 'any',
  200. 'args', 'array', 'asin', 'atan', 'atanh', 'Attrib',
  201. 'bal', 'Bg',
  202. 'callout', 'center', 'char', 'chdir', 'chmod', 'chown', 'chroot',
  203. 'Clip', 'Clone', 'close', 'cofail', 'collect',
  204. 'Color', 'ColorValue', 'condvar', 'copy',
  205. 'CopyArea', 'cos', 'Couple', 'crypt', 'cset', 'ctime',
  206. 'delay', 'delete', 'detab', 'display', 'DrawArc',
  207. 'DrawCircle', 'DrawCube', 'DrawCurve', 'DrawCylinder',
  208. 'DrawDisk', 'DrawImage', 'DrawLine', 'DrawPoint', 'DrawPolygon',
  209. 'DrawRectangle', 'DrawSegment', 'DrawSphere', 'DrawString',
  210. 'DrawTorus', 'dtor',
  211. 'entab', 'EraseArea', 'errorclear', 'Event', 'eventmask',
  212. 'EvGet', 'EvSend', 'exec', 'exit', 'exp', 'Eye',
  213. 'fcntl', 'fdup', 'fetch', 'Fg', 'fieldnames',
  214. 'FillArc', 'FillCircle', 'FillPolygon',
  215. 'FillRectangle', 'find', 'flock', 'flush', 'Font',
  216. 'FreeColor', 'FreeSpace', 'function',
  217. 'get', 'getch', 'getche', 'getenv',
  218. 'GetSpace', 'gettimeofday',
  219. 'getuid', 'globalnames', 'GotoRC', 'GotoXY', 'gtime', 'hardlink',
  220. 'iand', 'icom', 'IdentityMatrix', 'image', 'InPort', 'insert',
  221. 'Int86', 'integer', 'ioctl', 'ior', 'ishift', 'istate', 'ixor',
  222. 'kbhit', 'key', 'keyword', 'kill',
  223. 'left', 'Len', 'list', 'load', 'loadfunc', 'localnames',
  224. 'lock', 'log', 'Lower', 'lstat',
  225. 'many', 'map', 'match', 'MatrixMode', 'max', 'member',
  226. 'membernames', 'methodnames', 'methods', 'min', 'mkdir', 'move',
  227. 'MultMatrix', 'mutex',
  228. 'name', 'NewColor', 'Normals', 'numeric',
  229. 'open', 'opencl', 'oprec', 'ord', 'OutPort',
  230. 'PaletteChars', 'PaletteColor', 'PaletteKey', 'paramnames',
  231. 'parent', 'Pattern', 'Peek', 'Pending', 'pipe', 'Pixel',
  232. 'Poke', 'pop', 'PopMatrix', 'Pos', 'pos',
  233. 'proc', 'pull', 'push', 'PushMatrix', 'PushRotate', 'PushScale',
  234. 'PushTranslate', 'put',
  235. 'QueryPointer',
  236. 'Raise', 'read', 'ReadImage', 'readlink', 'reads', 'ready',
  237. 'real', 'receive', 'Refresh', 'Rem', 'remove', 'rename',
  238. 'repl', 'reverse', 'right', 'rmdir', 'Rotate', 'Rpos',
  239. 'rtod', 'runerr',
  240. 'save', 'Scale', 'seek', 'select', 'send', 'seq',
  241. 'serial', 'set', 'setenv',
  242. 'setuid', 'signal', 'sin', 'sort', 'sortf',
  243. 'spawn', 'sql', 'sqrt', 'stat', 'staticnames', 'stop',
  244. 'string', 'structure', 'Swi',
  245. 'symlink', 'sys_errstr', 'system', 'syswrite',
  246. 'tab', 'table', 'tan',
  247. 'Texcoord', 'Texture', 'TextWidth', 'Translate',
  248. 'trap', 'trim', 'truncate', 'trylock', 'type',
  249. 'umask', 'Uncouple', 'unlock', 'upto', 'utime',
  250. 'variable',
  251. 'wait', 'WAttrib', 'WDefault', 'WFlush', 'where',
  252. 'WinAssociate', 'WinButton', 'WinColorDialog', 'WindowContents',
  253. 'WinEditRegion', 'WinFontDialog', 'WinMenuBar', 'WinOpenDialog',
  254. 'WinPlayMedia', 'WinSaveDialog', 'WinScrollBar', 'WinSelectDialog',
  255. 'write', 'WriteImage', 'writes', 'WSection',
  256. 'WSync'), prefix=r'\b', suffix=r'\b'),
  257. Name.Function),
  258. include('numbers'),
  259. (r'===|~===|\*\*|\+\+|--|\.|==|~==|<=|>=|=|~=|<<=|<<|>>=|>>|'
  260. r':=:|:=|<->|<-|\+:=|\|\||\|', Operator),
  261. (r'"(?:[^\\"]|\\.)*"', String),
  262. (r"'(?:[^\\']|\\.)*'", String.Character),
  263. (r'[*<>+=/&!?@~\\-]', Operator),
  264. (r'(\w+)(\s*|[(,])', bygroups(Name, using(this))),
  265. (r"[\[\]]", Punctuation),
  266. (r"<>|=>|[()|:;,.'`{}%\^&?]", Punctuation),
  267. (r'\n+', Text),
  268. ],
  269. 'numbers': [
  270. (r'\b([+-]?([2-9]|[12][0-9]|3[0-6])[rR][0-9a-zA-Z]+)\b', Number.Hex),
  271. (r'[+-]?[0-9]*\.([0-9]*)([Ee][+-]?[0-9]*)?', Number.Float),
  272. (r'\b([+-]?[0-9]+[KMGTPkmgtp]?)\b', Number.Integer),
  273. ],
  274. 'subprogram': [
  275. (r'\(', Punctuation, ('#pop', 'formal_part')),
  276. (r';', Punctuation, '#pop'),
  277. (r'"[^"]+"|\w+', Name.Function),
  278. include('root'),
  279. ],
  280. 'type_def': [
  281. (r'\(', Punctuation, 'formal_part'),
  282. ],
  283. 'formal_part': [
  284. (r'\)', Punctuation, '#pop'),
  285. (r'\w+', Name.Variable),
  286. (r',', Punctuation),
  287. (r'(:string|:integer|:real)\b', Keyword.Reserved),
  288. include('root'),
  289. ],
  290. }
  291. class UcodeLexer(RegexLexer):
  292. """
  293. Lexer for Icon ucode files.
  294. """
  295. name = 'ucode'
  296. aliases = ['ucode']
  297. filenames = ['*.u', '*.u1', '*.u2']
  298. mimetypes = []
  299. url = 'http://www.unicon.org'
  300. version_added = '2.4'
  301. flags = re.MULTILINE
  302. tokens = {
  303. 'root': [
  304. (r'(#.*\n)', Comment),
  305. (words((
  306. 'con', 'declend', 'end',
  307. 'global',
  308. 'impl', 'invocable',
  309. 'lab', 'link', 'local',
  310. 'record',
  311. 'uid', 'unions',
  312. 'version'),
  313. prefix=r'\b', suffix=r'\b'),
  314. Name.Function),
  315. (words((
  316. 'colm', 'filen', 'line', 'synt'),
  317. prefix=r'\b', suffix=r'\b'),
  318. Comment),
  319. (words((
  320. 'asgn',
  321. 'bang', 'bscan',
  322. 'cat', 'ccase', 'chfail',
  323. 'coact', 'cofail', 'compl',
  324. 'coret', 'create', 'cset',
  325. 'diff', 'div', 'dup',
  326. 'efail', 'einit', 'end', 'eqv', 'eret',
  327. 'error', 'escan', 'esusp',
  328. 'field',
  329. 'goto',
  330. 'init', 'int', 'inter',
  331. 'invoke',
  332. 'keywd',
  333. 'lconcat', 'lexeq', 'lexge',
  334. 'lexgt', 'lexle', 'lexlt', 'lexne',
  335. 'limit', 'llist', 'lsusp',
  336. 'mark', 'mark0', 'minus', 'mod', 'mult',
  337. 'neg', 'neqv', 'nonnull', 'noop', 'null',
  338. 'number', 'numeq', 'numge', 'numgt',
  339. 'numle', 'numlt', 'numne',
  340. 'pfail', 'plus', 'pnull', 'pop', 'power',
  341. 'pret', 'proc', 'psusp', 'push1', 'pushn1',
  342. 'random', 'rasgn', 'rcv', 'rcvbk', 'real',
  343. 'refresh', 'rswap',
  344. 'sdup', 'sect', 'size', 'snd', 'sndbk',
  345. 'str', 'subsc', 'swap',
  346. 'tabmat', 'tally', 'toby', 'trace',
  347. 'unmark',
  348. 'value', 'var'), prefix=r'\b', suffix=r'\b'),
  349. Keyword.Declaration),
  350. (words((
  351. 'any',
  352. 'case',
  353. 'endcase', 'endevery', 'endif',
  354. 'endifelse', 'endrepeat', 'endsuspend',
  355. 'enduntil', 'endwhile', 'every',
  356. 'if', 'ifelse',
  357. 'repeat',
  358. 'suspend',
  359. 'until',
  360. 'while'),
  361. prefix=r'\b', suffix=r'\b'),
  362. Name.Constant),
  363. (r'\d+(\s*|\.$|$)', Number.Integer),
  364. (r'[+-]?\d*\.\d+(E[-+]?\d+)?', Number.Float),
  365. (r'[+-]?\d+\.\d*(E[-+]?\d+)?', Number.Float),
  366. (r"(<>|=>|[()|:;,.'`]|[{}]|[%^]|[&?])", Punctuation),
  367. (r'\s+\b', Text),
  368. (r'[\w-]+', Text),
  369. ],
  370. }
  371. def analyse_text(text):
  372. """endsuspend and endrepeat are unique to this language, and
  373. \\self, /self doesn't seem to get used anywhere else either."""
  374. result = 0
  375. if 'endsuspend' in text:
  376. result += 0.1
  377. if 'endrepeat' in text:
  378. result += 0.1
  379. if ':=' in text:
  380. result += 0.01
  381. if 'procedure' in text and 'end' in text:
  382. result += 0.01
  383. # This seems quite unique to unicon -- doesn't appear in any other
  384. # example source we have (A quick search reveals that \SELF appears in
  385. # Perl/Raku code)
  386. if r'\self' in text and r'/self' in text:
  387. result += 0.5
  388. return result