modula2.py 51 KB


  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.modula2
  4. ~~~~~~~~~~~~~~~~~~~~~~~
  5. Multi-Dialect Lexer for Modula-2.
  6. :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. import re
  10. from pygments.lexer import RegexLexer, include
  11. from pygments.util import get_bool_opt, get_list_opt
  12. from pygments.token import Text, Comment, Operator, Keyword, Name, \
  13. String, Number, Punctuation, Error
  14. __all__ = ['Modula2Lexer']
  15. # Multi-Dialect Modula-2 Lexer
  16. class Modula2Lexer(RegexLexer):
  17. """
  18. For `Modula-2 <http://www.modula2.org/>`_ source code.
  19. The Modula-2 lexer supports several dialects. By default, it operates in
  20. fallback mode, recognising the *combined* literals, punctuation symbols
  21. and operators of all supported dialects, and the *combined* reserved words
  22. and builtins of PIM Modula-2, ISO Modula-2 and Modula-2 R10, while not
  23. differentiating between library defined identifiers.
  24. To select a specific dialect, a dialect option may be passed
  25. or a dialect tag may be embedded into a source file.
  26. Dialect Options:
  27. `m2pim`
  28. Select PIM Modula-2 dialect.
  29. `m2iso`
  30. Select ISO Modula-2 dialect.
  31. `m2r10`
  32. Select Modula-2 R10 dialect.
  33. `objm2`
  34. Select Objective Modula-2 dialect.
  35. The PIM and ISO dialect options may be qualified with a language extension.
  36. Language Extensions:
  37. `+aglet`
  38. Select Aglet Modula-2 extensions, available with m2iso.
  39. `+gm2`
  40. Select GNU Modula-2 extensions, available with m2pim.
  41. `+p1`
  42. Select p1 Modula-2 extensions, available with m2iso.
  43. `+xds`
  44. Select XDS Modula-2 extensions, available with m2iso.
  45. Passing a Dialect Option via Unix Commandline Interface
  46. Dialect options may be passed to the lexer using the `dialect` key.
  47. Only one such option should be passed. If multiple dialect options are
  48. passed, the first valid option is used, any subsequent options are ignored.
  49. Examples:
  50. `$ pygmentize -O full,dialect=m2iso -f html -o /path/to/output /path/to/input`
  51. Use ISO dialect to render input to HTML output
  52. `$ pygmentize -O full,dialect=m2iso+p1 -f rtf -o /path/to/output /path/to/input`
  53. Use ISO dialect with p1 extensions to render input to RTF output
  54. Embedding a Dialect Option within a source file
  55. A dialect option may be embedded in a source file in form of a dialect
  56. tag, a specially formatted comment that specifies a dialect option.
  57. Dialect Tag EBNF::
  58. dialectTag :
  59. OpeningCommentDelim Prefix dialectOption ClosingCommentDelim ;
  60. dialectOption :
  61. 'm2pim' | 'm2iso' | 'm2r10' | 'objm2' |
  62. 'm2iso+aglet' | 'm2pim+gm2' | 'm2iso+p1' | 'm2iso+xds' ;
  63. Prefix : '!' ;
  64. OpeningCommentDelim : '(*' ;
  65. ClosingCommentDelim : '*)' ;
  66. No whitespace is permitted between the tokens of a dialect tag.
  67. In the event that a source file contains multiple dialect tags, the first
  68. tag that contains a valid dialect option will be used and any subsequent
  69. dialect tags will be ignored. Ideally, a dialect tag should be placed
  70. at the beginning of a source file.
  71. An embedded dialect tag overrides a dialect option set via command line.
  72. Examples:
  73. ``(*!m2r10*) DEFINITION MODULE Foobar; ...``
  74. Use Modula2 R10 dialect to render this source file.
  75. ``(*!m2pim+gm2*) DEFINITION MODULE Bazbam; ...``
  76. Use PIM dialect with GNU extensions to render this source file.
  77. Algol Publication Mode:
  78. In Algol publication mode, source text is rendered for publication of
  79. algorithms in scientific papers and academic texts, following the format
  80. of the Revised Algol-60 Language Report. It is activated by passing
  81. one of two corresponding styles as an option:
  82. `algol`
  83. render reserved words lowercase underline boldface
  84. and builtins lowercase boldface italic
  85. `algol_nu`
  86. render reserved words lowercase boldface (no underlining)
  87. and builtins lowercase boldface italic
  88. The lexer automatically performs the required lowercase conversion when
  89. this mode is activated.
  90. Example:
  91. ``$ pygmentize -O full,style=algol -f latex -o /path/to/output /path/to/input``
  92. Render input file in Algol publication mode to LaTeX output.
  93. Rendering Mode of First Class ADT Identifiers:
  94. The rendering of standard library first class ADT identifiers is controlled
  95. by option flag "treat_stdlib_adts_as_builtins".
  96. When this option is turned on, standard library ADT identifiers are rendered
  97. as builtins. When it is turned off, they are rendered as ordinary library
  98. identifiers.
  99. `treat_stdlib_adts_as_builtins` (default: On)
  100. The option is useful for dialects that support ADTs as first class objects
  101. and provide ADTs in the standard library that would otherwise be built-in.
  102. At present, only Modula-2 R10 supports library ADTs as first class objects
  103. and therefore, no ADT identifiers are defined for any other dialects.
  104. Example:
  105. ``$ pygmentize -O full,dialect=m2r10,treat_stdlib_adts_as_builtins=Off ...``
  106. Render standard library ADTs as ordinary library types.
  107. .. versionadded:: 1.3
  108. .. versionchanged:: 2.1
  109. Added multi-dialect support.
  110. """
  111. name = 'Modula-2'
  112. aliases = ['modula2', 'm2']
  113. filenames = ['*.def', '*.mod']
  114. mimetypes = ['text/x-modula2']
  115. flags = re.MULTILINE | re.DOTALL
  116. tokens = {
  117. 'whitespace': [
  118. (r'\n+', Text), # blank lines
  119. (r'\s+', Text), # whitespace
  120. ],
  121. 'dialecttags': [
  122. # PIM Dialect Tag
  123. (r'\(\*!m2pim\*\)', Comment.Special),
  124. # ISO Dialect Tag
  125. (r'\(\*!m2iso\*\)', Comment.Special),
  126. # M2R10 Dialect Tag
  127. (r'\(\*!m2r10\*\)', Comment.Special),
  128. # ObjM2 Dialect Tag
  129. (r'\(\*!objm2\*\)', Comment.Special),
  130. # Aglet Extensions Dialect Tag
  131. (r'\(\*!m2iso\+aglet\*\)', Comment.Special),
  132. # GNU Extensions Dialect Tag
  133. (r'\(\*!m2pim\+gm2\*\)', Comment.Special),
  134. # p1 Extensions Dialect Tag
  135. (r'\(\*!m2iso\+p1\*\)', Comment.Special),
  136. # XDS Extensions Dialect Tag
  137. (r'\(\*!m2iso\+xds\*\)', Comment.Special),
  138. ],
  139. 'identifiers': [
  140. (r'([a-zA-Z_$][\w$]*)', Name),
  141. ],
  142. 'prefixed_number_literals': [
  143. #
  144. # Base-2, whole number
  145. (r'0b[01]+(\'[01]+)*', Number.Bin),
  146. #
  147. # Base-16, whole number
  148. (r'0[ux][0-9A-F]+(\'[0-9A-F]+)*', Number.Hex),
  149. ],
  150. 'plain_number_literals': [
  151. #
  152. # Base-10, real number with exponent
  153. (r'[0-9]+(\'[0-9]+)*' # integral part
  154. r'\.[0-9]+(\'[0-9]+)*' # fractional part
  155. r'[eE][+-]?[0-9]+(\'[0-9]+)*', # exponent
  156. Number.Float),
  157. #
  158. # Base-10, real number without exponent
  159. (r'[0-9]+(\'[0-9]+)*' # integral part
  160. r'\.[0-9]+(\'[0-9]+)*', # fractional part
  161. Number.Float),
  162. #
  163. # Base-10, whole number
  164. (r'[0-9]+(\'[0-9]+)*', Number.Integer),
  165. ],
  166. 'suffixed_number_literals': [
  167. #
  168. # Base-8, whole number
  169. (r'[0-7]+B', Number.Oct),
  170. #
  171. # Base-8, character code
  172. (r'[0-7]+C', Number.Oct),
  173. #
  174. # Base-16, number
  175. (r'[0-9A-F]+H', Number.Hex),
  176. ],
  177. 'string_literals': [
  178. (r"'(\\\\|\\'|[^'])*'", String), # single quoted string
  179. (r'"(\\\\|\\"|[^"])*"', String), # double quoted string
  180. ],
  181. 'digraph_operators': [
  182. # Dot Product Operator
  183. (r'\*\.', Operator),
  184. # Array Concatenation Operator
  185. (r'\+>', Operator), # M2R10 + ObjM2
  186. # Inequality Operator
  187. (r'<>', Operator), # ISO + PIM
  188. # Less-Or-Equal, Subset
  189. (r'<=', Operator),
  190. # Greater-Or-Equal, Superset
  191. (r'>=', Operator),
  192. # Identity Operator
  193. (r'==', Operator), # M2R10 + ObjM2
  194. # Type Conversion Operator
  195. (r'::', Operator), # M2R10 + ObjM2
  196. # Assignment Symbol
  197. (r':=', Operator),
  198. # Postfix Increment Mutator
  199. (r'\+\+', Operator), # M2R10 + ObjM2
  200. # Postfix Decrement Mutator
  201. (r'--', Operator), # M2R10 + ObjM2
  202. ],
  203. 'unigraph_operators': [
  204. # Arithmetic Operators
  205. (r'[+-]', Operator),
  206. (r'[*/]', Operator),
  207. # ISO 80000-2 compliant Set Difference Operator
  208. (r'\\', Operator), # M2R10 + ObjM2
  209. # Relational Operators
  210. (r'[=#<>]', Operator),
  211. # Dereferencing Operator
  212. (r'\^', Operator),
  213. # Dereferencing Operator Synonym
  214. (r'@', Operator), # ISO
  215. # Logical AND Operator Synonym
  216. (r'&', Operator), # PIM + ISO
  217. # Logical NOT Operator Synonym
  218. (r'~', Operator), # PIM + ISO
  219. # Smalltalk Message Prefix
  220. (r'`', Operator), # ObjM2
  221. ],
  222. 'digraph_punctuation': [
  223. # Range Constructor
  224. (r'\.\.', Punctuation),
  225. # Opening Chevron Bracket
  226. (r'<<', Punctuation), # M2R10 + ISO
  227. # Closing Chevron Bracket
  228. (r'>>', Punctuation), # M2R10 + ISO
  229. # Blueprint Punctuation
  230. (r'->', Punctuation), # M2R10 + ISO
  231. # Distinguish |# and # in M2 R10
  232. (r'\|#', Punctuation),
  233. # Distinguish ## and # in M2 R10
  234. (r'##', Punctuation),
  235. # Distinguish |* and * in M2 R10
  236. (r'\|\*', Punctuation),
  237. ],
  238. 'unigraph_punctuation': [
  239. # Common Punctuation
  240. (r'[()\[\]{},.:;|]', Punctuation),
  241. # Case Label Separator Synonym
  242. (r'!', Punctuation), # ISO
  243. # Blueprint Punctuation
  244. (r'\?', Punctuation), # M2R10 + ObjM2
  245. ],
  246. 'comments': [
  247. # Single Line Comment
  248. (r'^//.*?\n', Comment.Single), # M2R10 + ObjM2
  249. # Block Comment
  250. (r'\(\*([^$].*?)\*\)', Comment.Multiline),
  251. # Template Block Comment
  252. (r'/\*(.*?)\*/', Comment.Multiline), # M2R10 + ObjM2
  253. ],
  254. 'pragmas': [
  255. # ISO Style Pragmas
  256. (r'<\*.*?\*>', Comment.Preproc), # ISO, M2R10 + ObjM2
  257. # Pascal Style Pragmas
  258. (r'\(\*\$.*?\*\)', Comment.Preproc), # PIM
  259. ],
  260. 'root': [
  261. include('whitespace'),
  262. include('dialecttags'),
  263. include('pragmas'),
  264. include('comments'),
  265. include('identifiers'),
  266. include('suffixed_number_literals'), # PIM + ISO
  267. include('prefixed_number_literals'), # M2R10 + ObjM2
  268. include('plain_number_literals'),
  269. include('string_literals'),
  270. include('digraph_punctuation'),
  271. include('digraph_operators'),
  272. include('unigraph_punctuation'),
  273. include('unigraph_operators'),
  274. ]
  275. }
  276. # C o m m o n D a t a s e t s
  277. # Common Reserved Words Dataset
  278. common_reserved_words = (
  279. # 37 common reserved words
  280. 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV',
  281. 'DO', 'ELSE', 'ELSIF', 'END', 'EXIT', 'FOR', 'FROM', 'IF',
  282. 'IMPLEMENTATION', 'IMPORT', 'IN', 'LOOP', 'MOD', 'MODULE', 'NOT',
  283. 'OF', 'OR', 'POINTER', 'PROCEDURE', 'RECORD', 'REPEAT', 'RETURN',
  284. 'SET', 'THEN', 'TO', 'TYPE', 'UNTIL', 'VAR', 'WHILE',
  285. )
  286. # Common Builtins Dataset
  287. common_builtins = (
  288. # 16 common builtins
  289. 'ABS', 'BOOLEAN', 'CARDINAL', 'CHAR', 'CHR', 'FALSE', 'INTEGER',
  290. 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NIL', 'ODD', 'ORD', 'REAL',
  291. 'TRUE',
  292. )
  293. # Common Pseudo-Module Builtins Dataset
  294. common_pseudo_builtins = (
  295. # 4 common pseudo builtins
  296. 'ADDRESS', 'BYTE', 'WORD', 'ADR'
  297. )
  298. # P I M M o d u l a - 2 D a t a s e t s
  299. # Lexemes to Mark as Error Tokens for PIM Modula-2
  300. pim_lexemes_to_reject = (
  301. '!', '`', '@', '$', '%', '?', '\\', '==', '++', '--', '::', '*.',
  302. '+>', '->', '<<', '>>', '|#', '##',
  303. )
  304. # PIM Modula-2 Additional Reserved Words Dataset
  305. pim_additional_reserved_words = (
  306. # 3 additional reserved words
  307. 'EXPORT', 'QUALIFIED', 'WITH',
  308. )
  309. # PIM Modula-2 Additional Builtins Dataset
  310. pim_additional_builtins = (
  311. # 16 additional builtins
  312. 'BITSET', 'CAP', 'DEC', 'DISPOSE', 'EXCL', 'FLOAT', 'HALT', 'HIGH',
  313. 'INC', 'INCL', 'NEW', 'NIL', 'PROC', 'SIZE', 'TRUNC', 'VAL',
  314. )
  315. # PIM Modula-2 Additional Pseudo-Module Builtins Dataset
  316. pim_additional_pseudo_builtins = (
  317. # 5 additional pseudo builtins
  318. 'SYSTEM', 'PROCESS', 'TSIZE', 'NEWPROCESS', 'TRANSFER',
  319. )
  320. # I S O M o d u l a - 2 D a t a s e t s
  321. # Lexemes to Mark as Error Tokens for ISO Modula-2
  322. iso_lexemes_to_reject = (
  323. '`', '$', '%', '?', '\\', '==', '++', '--', '::', '*.', '+>', '->',
  324. '<<', '>>', '|#', '##',
  325. )
  326. # ISO Modula-2 Additional Reserved Words Dataset
  327. iso_additional_reserved_words = (
  328. # 9 additional reserved words (ISO 10514-1)
  329. 'EXCEPT', 'EXPORT', 'FINALLY', 'FORWARD', 'PACKEDSET', 'QUALIFIED',
  330. 'REM', 'RETRY', 'WITH',
  331. # 10 additional reserved words (ISO 10514-2 & ISO 10514-3)
  332. 'ABSTRACT', 'AS', 'CLASS', 'GUARD', 'INHERIT', 'OVERRIDE', 'READONLY',
  333. 'REVEAL', 'TRACED', 'UNSAFEGUARDED',
  334. )
  335. # ISO Modula-2 Additional Builtins Dataset
  336. iso_additional_builtins = (
  337. # 26 additional builtins (ISO 10514-1)
  338. 'BITSET', 'CAP', 'CMPLX', 'COMPLEX', 'DEC', 'DISPOSE', 'EXCL', 'FLOAT',
  339. 'HALT', 'HIGH', 'IM', 'INC', 'INCL', 'INT', 'INTERRUPTIBLE', 'LENGTH',
  340. 'LFLOAT', 'LONGCOMPLEX', 'NEW', 'PROC', 'PROTECTION', 'RE', 'SIZE',
  341. 'TRUNC', 'UNINTERRUBTIBLE', 'VAL',
  342. # 5 additional builtins (ISO 10514-2 & ISO 10514-3)
  343. 'CREATE', 'DESTROY', 'EMPTY', 'ISMEMBER', 'SELF',
  344. )
  345. # ISO Modula-2 Additional Pseudo-Module Builtins Dataset
  346. iso_additional_pseudo_builtins = (
  347. # 14 additional builtins (SYSTEM)
  348. 'SYSTEM', 'BITSPERLOC', 'LOCSPERBYTE', 'LOCSPERWORD', 'LOC',
  349. 'ADDADR', 'SUBADR', 'DIFADR', 'MAKEADR', 'ADR',
  350. 'ROTATE', 'SHIFT', 'CAST', 'TSIZE',
  351. # 13 additional builtins (COROUTINES)
  352. 'COROUTINES', 'ATTACH', 'COROUTINE', 'CURRENT', 'DETACH', 'HANDLER',
  353. 'INTERRUPTSOURCE', 'IOTRANSFER', 'IsATTACHED', 'LISTEN',
  354. 'NEWCOROUTINE', 'PROT', 'TRANSFER',
  355. # 9 additional builtins (EXCEPTIONS)
  356. 'EXCEPTIONS', 'AllocateSource', 'CurrentNumber', 'ExceptionNumber',
  357. 'ExceptionSource', 'GetMessage', 'IsCurrentSource',
  358. 'IsExceptionalExecution', 'RAISE',
  359. # 3 additional builtins (TERMINATION)
  360. 'TERMINATION', 'IsTerminating', 'HasHalted',
  361. # 4 additional builtins (M2EXCEPTION)
  362. 'M2EXCEPTION', 'M2Exceptions', 'M2Exception', 'IsM2Exception',
  363. 'indexException', 'rangeException', 'caseSelectException',
  364. 'invalidLocation', 'functionException', 'wholeValueException',
  365. 'wholeDivException', 'realValueException', 'realDivException',
  366. 'complexValueException', 'complexDivException', 'protException',
  367. 'sysException', 'coException', 'exException',
  368. )
  369. # M o d u l a - 2 R 1 0 D a t a s e t s
  370. # Lexemes to Mark as Error Tokens for Modula-2 R10
  371. m2r10_lexemes_to_reject = (
  372. '!', '`', '@', '$', '%', '&', '<>',
  373. )
  374. # Modula-2 R10 reserved words in addition to the common set
  375. m2r10_additional_reserved_words = (
  376. # 12 additional reserved words
  377. 'ALIAS', 'ARGLIST', 'BLUEPRINT', 'COPY', 'GENLIB', 'INDETERMINATE',
  378. 'NEW', 'NONE', 'OPAQUE', 'REFERENTIAL', 'RELEASE', 'RETAIN',
  379. # 2 additional reserved words with symbolic assembly option
  380. 'ASM', 'REG',
  381. )
  382. # Modula-2 R10 builtins in addition to the common set
  383. m2r10_additional_builtins = (
  384. # 26 additional builtins
  385. 'CARDINAL', 'COUNT', 'EMPTY', 'EXISTS', 'INSERT', 'LENGTH', 'LONGCARD',
  386. 'OCTET', 'PTR', 'PRED', 'READ', 'READNEW', 'REMOVE', 'RETRIEVE', 'SORT',
  387. 'STORE', 'SUBSET', 'SUCC', 'TLIMIT', 'TMAX', 'TMIN', 'TRUE', 'TSIZE',
  388. 'UNICHAR', 'WRITE', 'WRITEF',
  389. )
  390. # Modula-2 R10 Additional Pseudo-Module Builtins Dataset
  391. m2r10_additional_pseudo_builtins = (
  392. # 13 additional builtins (TPROPERTIES)
  393. 'TPROPERTIES', 'PROPERTY', 'LITERAL', 'TPROPERTY', 'TLITERAL',
  394. 'TBUILTIN', 'TDYN', 'TREFC', 'TNIL', 'TBASE', 'TPRECISION',
  395. 'TMAXEXP', 'TMINEXP',
  396. # 4 additional builtins (CONVERSION)
  397. 'CONVERSION', 'TSXFSIZE', 'SXF', 'VAL',
  398. # 35 additional builtins (UNSAFE)
  399. 'UNSAFE', 'CAST', 'INTRINSIC', 'AVAIL', 'ADD', 'SUB', 'ADDC', 'SUBC',
  400. 'FETCHADD', 'FETCHSUB', 'SHL', 'SHR', 'ASHR', 'ROTL', 'ROTR', 'ROTLC',
  401. 'ROTRC', 'BWNOT', 'BWAND', 'BWOR', 'BWXOR', 'BWNAND', 'BWNOR',
  402. 'SETBIT', 'TESTBIT', 'LSBIT', 'MSBIT', 'CSBITS', 'BAIL', 'HALT',
  403. 'TODO', 'FFI', 'ADDR', 'VARGLIST', 'VARGC',
  404. # 11 additional builtins (ATOMIC)
  405. 'ATOMIC', 'INTRINSIC', 'AVAIL', 'SWAP', 'CAS', 'INC', 'DEC', 'BWAND',
  406. 'BWNAND', 'BWOR', 'BWXOR',
  407. # 7 additional builtins (COMPILER)
  408. 'COMPILER', 'DEBUG', 'MODNAME', 'PROCNAME', 'LINENUM', 'DEFAULT',
  409. 'HASH',
  410. # 5 additional builtins (ASSEMBLER)
  411. 'ASSEMBLER', 'REGISTER', 'SETREG', 'GETREG', 'CODE',
  412. )
  413. # O b j e c t i v e M o d u l a - 2 D a t a s e t s
  414. # Lexemes to Mark as Error Tokens for Objective Modula-2
  415. objm2_lexemes_to_reject = (
  416. '!', '$', '%', '&', '<>',
  417. )
  418. # Objective Modula-2 Extensions
  419. # reserved words in addition to Modula-2 R10
  420. objm2_additional_reserved_words = (
  421. # 16 additional reserved words
  422. 'BYCOPY', 'BYREF', 'CLASS', 'CONTINUE', 'CRITICAL', 'INOUT', 'METHOD',
  423. 'ON', 'OPTIONAL', 'OUT', 'PRIVATE', 'PROTECTED', 'PROTOCOL', 'PUBLIC',
  424. 'SUPER', 'TRY',
  425. )
  426. # Objective Modula-2 Extensions
  427. # builtins in addition to Modula-2 R10
  428. objm2_additional_builtins = (
  429. # 3 additional builtins
  430. 'OBJECT', 'NO', 'YES',
  431. )
  432. # Objective Modula-2 Extensions
  433. # pseudo-module builtins in addition to Modula-2 R10
  434. objm2_additional_pseudo_builtins = (
  435. # None
  436. )
  437. # A g l e t M o d u l a - 2 D a t a s e t s
  438. # Aglet Extensions
  439. # reserved words in addition to ISO Modula-2
  440. aglet_additional_reserved_words = (
  441. # None
  442. )
  443. # Aglet Extensions
  444. # builtins in addition to ISO Modula-2
  445. aglet_additional_builtins = (
  446. # 9 additional builtins
  447. 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16',
  448. 'CARDINAL32', 'INTEGER8', 'INTEGER16', 'INTEGER32',
  449. )
  450. # Aglet Modula-2 Extensions
  451. # pseudo-module builtins in addition to ISO Modula-2
  452. aglet_additional_pseudo_builtins = (
  453. # None
  454. )
  455. # G N U M o d u l a - 2 D a t a s e t s
  456. # GNU Extensions
  457. # reserved words in addition to PIM Modula-2
  458. gm2_additional_reserved_words = (
  459. # 10 additional reserved words
  460. 'ASM', '__ATTRIBUTE__', '__BUILTIN__', '__COLUMN__', '__DATE__',
  461. '__FILE__', '__FUNCTION__', '__LINE__', '__MODULE__', 'VOLATILE',
  462. )
  463. # GNU Extensions
  464. # builtins in addition to PIM Modula-2
  465. gm2_additional_builtins = (
  466. # 21 additional builtins
  467. 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16',
  468. 'CARDINAL32', 'CARDINAL64', 'COMPLEX32', 'COMPLEX64', 'COMPLEX96',
  469. 'COMPLEX128', 'INTEGER8', 'INTEGER16', 'INTEGER32', 'INTEGER64',
  470. 'REAL8', 'REAL16', 'REAL32', 'REAL96', 'REAL128', 'THROW',
  471. )
  472. # GNU Extensions
  473. # pseudo-module builtins in addition to PIM Modula-2
  474. gm2_additional_pseudo_builtins = (
  475. # None
  476. )
  477. # p 1 M o d u l a - 2 D a t a s e t s
  478. # p1 Extensions
  479. # reserved words in addition to ISO Modula-2
  480. p1_additional_reserved_words = (
  481. # None
  482. )
  483. # p1 Extensions
  484. # builtins in addition to ISO Modula-2
  485. p1_additional_builtins = (
  486. # None
  487. )
  488. # p1 Modula-2 Extensions
  489. # pseudo-module builtins in addition to ISO Modula-2
  490. p1_additional_pseudo_builtins = (
  491. # 1 additional builtin
  492. 'BCD',
  493. )
  494. # X D S M o d u l a - 2 D a t a s e t s
  495. # XDS Extensions
  496. # reserved words in addition to ISO Modula-2
  497. xds_additional_reserved_words = (
  498. # 1 additional reserved word
  499. 'SEQ',
  500. )
  501. # XDS Extensions
  502. # builtins in addition to ISO Modula-2
  503. xds_additional_builtins = (
  504. # 9 additional builtins
  505. 'ASH', 'ASSERT', 'DIFFADR_TYPE', 'ENTIER', 'INDEX', 'LEN',
  506. 'LONGCARD', 'SHORTCARD', 'SHORTINT',
  507. )
  508. # XDS Modula-2 Extensions
  509. # pseudo-module builtins in addition to ISO Modula-2
  510. xds_additional_pseudo_builtins = (
  511. # 22 additional builtins (SYSTEM)
  512. 'PROCESS', 'NEWPROCESS', 'BOOL8', 'BOOL16', 'BOOL32', 'CARD8',
  513. 'CARD16', 'CARD32', 'INT8', 'INT16', 'INT32', 'REF', 'MOVE',
  514. 'FILL', 'GET', 'PUT', 'CC', 'int', 'unsigned', 'size_t', 'void'
  515. # 3 additional builtins (COMPILER)
  516. 'COMPILER', 'OPTION', 'EQUATION'
  517. )
  518. # P I M S t a n d a r d L i b r a r y D a t a s e t s
  519. # PIM Modula-2 Standard Library Modules Dataset
  520. pim_stdlib_module_identifiers = (
  521. 'Terminal', 'FileSystem', 'InOut', 'RealInOut', 'MathLib0', 'Storage',
  522. )
  523. # PIM Modula-2 Standard Library Types Dataset
  524. pim_stdlib_type_identifiers = (
  525. 'Flag', 'FlagSet', 'Response', 'Command', 'Lock', 'Permission',
  526. 'MediumType', 'File', 'FileProc', 'DirectoryProc', 'FileCommand',
  527. 'DirectoryCommand',
  528. )
  529. # PIM Modula-2 Standard Library Procedures Dataset
  530. pim_stdlib_proc_identifiers = (
  531. 'Read', 'BusyRead', 'ReadAgain', 'Write', 'WriteString', 'WriteLn',
  532. 'Create', 'Lookup', 'Close', 'Delete', 'Rename', 'SetRead', 'SetWrite',
  533. 'SetModify', 'SetOpen', 'Doio', 'SetPos', 'GetPos', 'Length', 'Reset',
  534. 'Again', 'ReadWord', 'WriteWord', 'ReadChar', 'WriteChar',
  535. 'CreateMedium', 'DeleteMedium', 'AssignName', 'DeassignName',
  536. 'ReadMedium', 'LookupMedium', 'OpenInput', 'OpenOutput', 'CloseInput',
  537. 'CloseOutput', 'ReadString', 'ReadInt', 'ReadCard', 'ReadWrd',
  538. 'WriteInt', 'WriteCard', 'WriteOct', 'WriteHex', 'WriteWrd',
  539. 'ReadReal', 'WriteReal', 'WriteFixPt', 'WriteRealOct', 'sqrt', 'exp',
  540. 'ln', 'sin', 'cos', 'arctan', 'entier', 'ALLOCATE', 'DEALLOCATE',
  541. )
  542. # PIM Modula-2 Standard Library Variables Dataset
  543. pim_stdlib_var_identifiers = (
  544. 'Done', 'termCH', 'in', 'out'
  545. )
  546. # PIM Modula-2 Standard Library Constants Dataset
  547. pim_stdlib_const_identifiers = (
  548. 'EOL',
  549. )
  550. # I S O S t a n d a r d L i b r a r y D a t a s e t s
  551. # ISO Modula-2 Standard Library Modules Dataset
  552. iso_stdlib_module_identifiers = (
  553. # TO DO
  554. )
  555. # ISO Modula-2 Standard Library Types Dataset
  556. iso_stdlib_type_identifiers = (
  557. # TO DO
  558. )
  559. # ISO Modula-2 Standard Library Procedures Dataset
  560. iso_stdlib_proc_identifiers = (
  561. # TO DO
  562. )
  563. # ISO Modula-2 Standard Library Variables Dataset
  564. iso_stdlib_var_identifiers = (
  565. # TO DO
  566. )
  567. # ISO Modula-2 Standard Library Constants Dataset
  568. iso_stdlib_const_identifiers = (
  569. # TO DO
  570. )
  571. # M 2 R 1 0 S t a n d a r d L i b r a r y D a t a s e t s
  572. # Modula-2 R10 Standard Library ADTs Dataset
  573. m2r10_stdlib_adt_identifiers = (
  574. 'BCD', 'LONGBCD', 'BITSET', 'SHORTBITSET', 'LONGBITSET',
  575. 'LONGLONGBITSET', 'COMPLEX', 'LONGCOMPLEX', 'SHORTCARD', 'LONGLONGCARD',
  576. 'SHORTINT', 'LONGLONGINT', 'POSINT', 'SHORTPOSINT', 'LONGPOSINT',
  577. 'LONGLONGPOSINT', 'BITSET8', 'BITSET16', 'BITSET32', 'BITSET64',
  578. 'BITSET128', 'BS8', 'BS16', 'BS32', 'BS64', 'BS128', 'CARDINAL8',
  579. 'CARDINAL16', 'CARDINAL32', 'CARDINAL64', 'CARDINAL128', 'CARD8',
  580. 'CARD16', 'CARD32', 'CARD64', 'CARD128', 'INTEGER8', 'INTEGER16',
  581. 'INTEGER32', 'INTEGER64', 'INTEGER128', 'INT8', 'INT16', 'INT32',
  582. 'INT64', 'INT128', 'STRING', 'UNISTRING',
  583. )
  584. # Modula-2 R10 Standard Library Blueprints Dataset
  585. m2r10_stdlib_blueprint_identifiers = (
  586. 'ProtoRoot', 'ProtoComputational', 'ProtoNumeric', 'ProtoScalar',
  587. 'ProtoNonScalar', 'ProtoCardinal', 'ProtoInteger', 'ProtoReal',
  588. 'ProtoComplex', 'ProtoVector', 'ProtoTuple', 'ProtoCompArray',
  589. 'ProtoCollection', 'ProtoStaticArray', 'ProtoStaticSet',
  590. 'ProtoStaticString', 'ProtoArray', 'ProtoString', 'ProtoSet',
  591. 'ProtoMultiSet', 'ProtoDictionary', 'ProtoMultiDict', 'ProtoExtension',
  592. 'ProtoIO', 'ProtoCardMath', 'ProtoIntMath', 'ProtoRealMath',
  593. )
  594. # Modula-2 R10 Standard Library Modules Dataset
  595. m2r10_stdlib_module_identifiers = (
  596. 'ASCII', 'BooleanIO', 'CharIO', 'UnicharIO', 'OctetIO',
  597. 'CardinalIO', 'LongCardIO', 'IntegerIO', 'LongIntIO', 'RealIO',
  598. 'LongRealIO', 'BCDIO', 'LongBCDIO', 'CardMath', 'LongCardMath',
  599. 'IntMath', 'LongIntMath', 'RealMath', 'LongRealMath', 'BCDMath',
  600. 'LongBCDMath', 'FileIO', 'FileSystem', 'Storage', 'IOSupport',
  601. )
  602. # Modula-2 R10 Standard Library Types Dataset
  603. m2r10_stdlib_type_identifiers = (
  604. 'File', 'Status',
  605. # TO BE COMPLETED
  606. )
  607. # Modula-2 R10 Standard Library Procedures Dataset
  608. m2r10_stdlib_proc_identifiers = (
  609. 'ALLOCATE', 'DEALLOCATE', 'SIZE',
  610. # TO BE COMPLETED
  611. )
  612. # Modula-2 R10 Standard Library Variables Dataset
  613. m2r10_stdlib_var_identifiers = (
  614. 'stdIn', 'stdOut', 'stdErr',
  615. )
  616. # Modula-2 R10 Standard Library Constants Dataset
  617. m2r10_stdlib_const_identifiers = (
  618. 'pi', 'tau',
  619. )
  620. # D i a l e c t s
  621. # Dialect modes
  622. dialects = (
  623. 'unknown',
  624. 'm2pim', 'm2iso', 'm2r10', 'objm2',
  625. 'm2iso+aglet', 'm2pim+gm2', 'm2iso+p1', 'm2iso+xds',
  626. )
  627. # D a t a b a s e s
  628. # Lexemes to Mark as Errors Database
  629. lexemes_to_reject_db = {
  630. # Lexemes to reject for unknown dialect
  631. 'unknown': (
  632. # LEAVE THIS EMPTY
  633. ),
  634. # Lexemes to reject for PIM Modula-2
  635. 'm2pim': (
  636. pim_lexemes_to_reject,
  637. ),
  638. # Lexemes to reject for ISO Modula-2
  639. 'm2iso': (
  640. iso_lexemes_to_reject,
  641. ),
  642. # Lexemes to reject for Modula-2 R10
  643. 'm2r10': (
  644. m2r10_lexemes_to_reject,
  645. ),
  646. # Lexemes to reject for Objective Modula-2
  647. 'objm2': (
  648. objm2_lexemes_to_reject,
  649. ),
  650. # Lexemes to reject for Aglet Modula-2
  651. 'm2iso+aglet': (
  652. iso_lexemes_to_reject,
  653. ),
  654. # Lexemes to reject for GNU Modula-2
  655. 'm2pim+gm2': (
  656. pim_lexemes_to_reject,
  657. ),
  658. # Lexemes to reject for p1 Modula-2
  659. 'm2iso+p1': (
  660. iso_lexemes_to_reject,
  661. ),
  662. # Lexemes to reject for XDS Modula-2
  663. 'm2iso+xds': (
  664. iso_lexemes_to_reject,
  665. ),
  666. }
  667. # Reserved Words Database
  668. reserved_words_db = {
  669. # Reserved words for unknown dialect
  670. 'unknown': (
  671. common_reserved_words,
  672. pim_additional_reserved_words,
  673. iso_additional_reserved_words,
  674. m2r10_additional_reserved_words,
  675. ),
  676. # Reserved words for PIM Modula-2
  677. 'm2pim': (
  678. common_reserved_words,
  679. pim_additional_reserved_words,
  680. ),
  681. # Reserved words for Modula-2 R10
  682. 'm2iso': (
  683. common_reserved_words,
  684. iso_additional_reserved_words,
  685. ),
  686. # Reserved words for ISO Modula-2
  687. 'm2r10': (
  688. common_reserved_words,
  689. m2r10_additional_reserved_words,
  690. ),
  691. # Reserved words for Objective Modula-2
  692. 'objm2': (
  693. common_reserved_words,
  694. m2r10_additional_reserved_words,
  695. objm2_additional_reserved_words,
  696. ),
  697. # Reserved words for Aglet Modula-2 Extensions
  698. 'm2iso+aglet': (
  699. common_reserved_words,
  700. iso_additional_reserved_words,
  701. aglet_additional_reserved_words,
  702. ),
  703. # Reserved words for GNU Modula-2 Extensions
  704. 'm2pim+gm2': (
  705. common_reserved_words,
  706. pim_additional_reserved_words,
  707. gm2_additional_reserved_words,
  708. ),
  709. # Reserved words for p1 Modula-2 Extensions
  710. 'm2iso+p1': (
  711. common_reserved_words,
  712. iso_additional_reserved_words,
  713. p1_additional_reserved_words,
  714. ),
  715. # Reserved words for XDS Modula-2 Extensions
  716. 'm2iso+xds': (
  717. common_reserved_words,
  718. iso_additional_reserved_words,
  719. xds_additional_reserved_words,
  720. ),
  721. }
  722. # Builtins Database
  723. builtins_db = {
  724. # Builtins for unknown dialect
  725. 'unknown': (
  726. common_builtins,
  727. pim_additional_builtins,
  728. iso_additional_builtins,
  729. m2r10_additional_builtins,
  730. ),
  731. # Builtins for PIM Modula-2
  732. 'm2pim': (
  733. common_builtins,
  734. pim_additional_builtins,
  735. ),
  736. # Builtins for ISO Modula-2
  737. 'm2iso': (
  738. common_builtins,
  739. iso_additional_builtins,
  740. ),
  741. # Builtins for ISO Modula-2
  742. 'm2r10': (
  743. common_builtins,
  744. m2r10_additional_builtins,
  745. ),
  746. # Builtins for Objective Modula-2
  747. 'objm2': (
  748. common_builtins,
  749. m2r10_additional_builtins,
  750. objm2_additional_builtins,
  751. ),
  752. # Builtins for Aglet Modula-2 Extensions
  753. 'm2iso+aglet': (
  754. common_builtins,
  755. iso_additional_builtins,
  756. aglet_additional_builtins,
  757. ),
  758. # Builtins for GNU Modula-2 Extensions
  759. 'm2pim+gm2': (
  760. common_builtins,
  761. pim_additional_builtins,
  762. gm2_additional_builtins,
  763. ),
  764. # Builtins for p1 Modula-2 Extensions
  765. 'm2iso+p1': (
  766. common_builtins,
  767. iso_additional_builtins,
  768. p1_additional_builtins,
  769. ),
  770. # Builtins for XDS Modula-2 Extensions
  771. 'm2iso+xds': (
  772. common_builtins,
  773. iso_additional_builtins,
  774. xds_additional_builtins,
  775. ),
  776. }
  777. # Pseudo-Module Builtins Database
  778. pseudo_builtins_db = {
  779. # Builtins for unknown dialect
  780. 'unknown': (
  781. common_pseudo_builtins,
  782. pim_additional_pseudo_builtins,
  783. iso_additional_pseudo_builtins,
  784. m2r10_additional_pseudo_builtins,
  785. ),
  786. # Builtins for PIM Modula-2
  787. 'm2pim': (
  788. common_pseudo_builtins,
  789. pim_additional_pseudo_builtins,
  790. ),
  791. # Builtins for ISO Modula-2
  792. 'm2iso': (
  793. common_pseudo_builtins,
  794. iso_additional_pseudo_builtins,
  795. ),
  796. # Builtins for ISO Modula-2
  797. 'm2r10': (
  798. common_pseudo_builtins,
  799. m2r10_additional_pseudo_builtins,
  800. ),
  801. # Builtins for Objective Modula-2
  802. 'objm2': (
  803. common_pseudo_builtins,
  804. m2r10_additional_pseudo_builtins,
  805. objm2_additional_pseudo_builtins,
  806. ),
  807. # Builtins for Aglet Modula-2 Extensions
  808. 'm2iso+aglet': (
  809. common_pseudo_builtins,
  810. iso_additional_pseudo_builtins,
  811. aglet_additional_pseudo_builtins,
  812. ),
  813. # Builtins for GNU Modula-2 Extensions
  814. 'm2pim+gm2': (
  815. common_pseudo_builtins,
  816. pim_additional_pseudo_builtins,
  817. gm2_additional_pseudo_builtins,
  818. ),
  819. # Builtins for p1 Modula-2 Extensions
  820. 'm2iso+p1': (
  821. common_pseudo_builtins,
  822. iso_additional_pseudo_builtins,
  823. p1_additional_pseudo_builtins,
  824. ),
  825. # Builtins for XDS Modula-2 Extensions
  826. 'm2iso+xds': (
  827. common_pseudo_builtins,
  828. iso_additional_pseudo_builtins,
  829. xds_additional_pseudo_builtins,
  830. ),
  831. }
  832. # Standard Library ADTs Database
  833. stdlib_adts_db = {
  834. # Empty entry for unknown dialect
  835. 'unknown': (
  836. # LEAVE THIS EMPTY
  837. ),
  838. # Standard Library ADTs for PIM Modula-2
  839. 'm2pim': (
  840. # No first class library types
  841. ),
  842. # Standard Library ADTs for ISO Modula-2
  843. 'm2iso': (
  844. # No first class library types
  845. ),
  846. # Standard Library ADTs for Modula-2 R10
  847. 'm2r10': (
  848. m2r10_stdlib_adt_identifiers,
  849. ),
  850. # Standard Library ADTs for Objective Modula-2
  851. 'objm2': (
  852. m2r10_stdlib_adt_identifiers,
  853. ),
  854. # Standard Library ADTs for Aglet Modula-2
  855. 'm2iso+aglet': (
  856. # No first class library types
  857. ),
  858. # Standard Library ADTs for GNU Modula-2
  859. 'm2pim+gm2': (
  860. # No first class library types
  861. ),
  862. # Standard Library ADTs for p1 Modula-2
  863. 'm2iso+p1': (
  864. # No first class library types
  865. ),
  866. # Standard Library ADTs for XDS Modula-2
  867. 'm2iso+xds': (
  868. # No first class library types
  869. ),
  870. }
  871. # Standard Library Modules Database
  872. stdlib_modules_db = {
  873. # Empty entry for unknown dialect
  874. 'unknown': (
  875. # LEAVE THIS EMPTY
  876. ),
  877. # Standard Library Modules for PIM Modula-2
  878. 'm2pim': (
  879. pim_stdlib_module_identifiers,
  880. ),
  881. # Standard Library Modules for ISO Modula-2
  882. 'm2iso': (
  883. iso_stdlib_module_identifiers,
  884. ),
  885. # Standard Library Modules for Modula-2 R10
  886. 'm2r10': (
  887. m2r10_stdlib_blueprint_identifiers,
  888. m2r10_stdlib_module_identifiers,
  889. m2r10_stdlib_adt_identifiers,
  890. ),
  891. # Standard Library Modules for Objective Modula-2
  892. 'objm2': (
  893. m2r10_stdlib_blueprint_identifiers,
  894. m2r10_stdlib_module_identifiers,
  895. ),
  896. # Standard Library Modules for Aglet Modula-2
  897. 'm2iso+aglet': (
  898. iso_stdlib_module_identifiers,
  899. ),
  900. # Standard Library Modules for GNU Modula-2
  901. 'm2pim+gm2': (
  902. pim_stdlib_module_identifiers,
  903. ),
  904. # Standard Library Modules for p1 Modula-2
  905. 'm2iso+p1': (
  906. iso_stdlib_module_identifiers,
  907. ),
  908. # Standard Library Modules for XDS Modula-2
  909. 'm2iso+xds': (
  910. iso_stdlib_module_identifiers,
  911. ),
  912. }
  913. # Standard Library Types Database
  914. stdlib_types_db = {
  915. # Empty entry for unknown dialect
  916. 'unknown': (
  917. # LEAVE THIS EMPTY
  918. ),
  919. # Standard Library Types for PIM Modula-2
  920. 'm2pim': (
  921. pim_stdlib_type_identifiers,
  922. ),
  923. # Standard Library Types for ISO Modula-2
  924. 'm2iso': (
  925. iso_stdlib_type_identifiers,
  926. ),
  927. # Standard Library Types for Modula-2 R10
  928. 'm2r10': (
  929. m2r10_stdlib_type_identifiers,
  930. ),
  931. # Standard Library Types for Objective Modula-2
  932. 'objm2': (
  933. m2r10_stdlib_type_identifiers,
  934. ),
  935. # Standard Library Types for Aglet Modula-2
  936. 'm2iso+aglet': (
  937. iso_stdlib_type_identifiers,
  938. ),
  939. # Standard Library Types for GNU Modula-2
  940. 'm2pim+gm2': (
  941. pim_stdlib_type_identifiers,
  942. ),
  943. # Standard Library Types for p1 Modula-2
  944. 'm2iso+p1': (
  945. iso_stdlib_type_identifiers,
  946. ),
  947. # Standard Library Types for XDS Modula-2
  948. 'm2iso+xds': (
  949. iso_stdlib_type_identifiers,
  950. ),
  951. }
  952. # Standard Library Procedures Database
  953. stdlib_procedures_db = {
  954. # Empty entry for unknown dialect
  955. 'unknown': (
  956. # LEAVE THIS EMPTY
  957. ),
  958. # Standard Library Procedures for PIM Modula-2
  959. 'm2pim': (
  960. pim_stdlib_proc_identifiers,
  961. ),
  962. # Standard Library Procedures for ISO Modula-2
  963. 'm2iso': (
  964. iso_stdlib_proc_identifiers,
  965. ),
  966. # Standard Library Procedures for Modula-2 R10
  967. 'm2r10': (
  968. m2r10_stdlib_proc_identifiers,
  969. ),
  970. # Standard Library Procedures for Objective Modula-2
  971. 'objm2': (
  972. m2r10_stdlib_proc_identifiers,
  973. ),
  974. # Standard Library Procedures for Aglet Modula-2
  975. 'm2iso+aglet': (
  976. iso_stdlib_proc_identifiers,
  977. ),
  978. # Standard Library Procedures for GNU Modula-2
  979. 'm2pim+gm2': (
  980. pim_stdlib_proc_identifiers,
  981. ),
  982. # Standard Library Procedures for p1 Modula-2
  983. 'm2iso+p1': (
  984. iso_stdlib_proc_identifiers,
  985. ),
  986. # Standard Library Procedures for XDS Modula-2
  987. 'm2iso+xds': (
  988. iso_stdlib_proc_identifiers,
  989. ),
  990. }
  991. # Standard Library Variables Database
  992. stdlib_variables_db = {
  993. # Empty entry for unknown dialect
  994. 'unknown': (
  995. # LEAVE THIS EMPTY
  996. ),
  997. # Standard Library Variables for PIM Modula-2
  998. 'm2pim': (
  999. pim_stdlib_var_identifiers,
  1000. ),
  1001. # Standard Library Variables for ISO Modula-2
  1002. 'm2iso': (
  1003. iso_stdlib_var_identifiers,
  1004. ),
  1005. # Standard Library Variables for Modula-2 R10
  1006. 'm2r10': (
  1007. m2r10_stdlib_var_identifiers,
  1008. ),
  1009. # Standard Library Variables for Objective Modula-2
  1010. 'objm2': (
  1011. m2r10_stdlib_var_identifiers,
  1012. ),
  1013. # Standard Library Variables for Aglet Modula-2
  1014. 'm2iso+aglet': (
  1015. iso_stdlib_var_identifiers,
  1016. ),
  1017. # Standard Library Variables for GNU Modula-2
  1018. 'm2pim+gm2': (
  1019. pim_stdlib_var_identifiers,
  1020. ),
  1021. # Standard Library Variables for p1 Modula-2
  1022. 'm2iso+p1': (
  1023. iso_stdlib_var_identifiers,
  1024. ),
  1025. # Standard Library Variables for XDS Modula-2
  1026. 'm2iso+xds': (
  1027. iso_stdlib_var_identifiers,
  1028. ),
  1029. }
  1030. # Standard Library Constants Database
  1031. stdlib_constants_db = {
  1032. # Empty entry for unknown dialect
  1033. 'unknown': (
  1034. # LEAVE THIS EMPTY
  1035. ),
  1036. # Standard Library Constants for PIM Modula-2
  1037. 'm2pim': (
  1038. pim_stdlib_const_identifiers,
  1039. ),
  1040. # Standard Library Constants for ISO Modula-2
  1041. 'm2iso': (
  1042. iso_stdlib_const_identifiers,
  1043. ),
  1044. # Standard Library Constants for Modula-2 R10
  1045. 'm2r10': (
  1046. m2r10_stdlib_const_identifiers,
  1047. ),
  1048. # Standard Library Constants for Objective Modula-2
  1049. 'objm2': (
  1050. m2r10_stdlib_const_identifiers,
  1051. ),
  1052. # Standard Library Constants for Aglet Modula-2
  1053. 'm2iso+aglet': (
  1054. iso_stdlib_const_identifiers,
  1055. ),
  1056. # Standard Library Constants for GNU Modula-2
  1057. 'm2pim+gm2': (
  1058. pim_stdlib_const_identifiers,
  1059. ),
  1060. # Standard Library Constants for p1 Modula-2
  1061. 'm2iso+p1': (
  1062. iso_stdlib_const_identifiers,
  1063. ),
  1064. # Standard Library Constants for XDS Modula-2
  1065. 'm2iso+xds': (
  1066. iso_stdlib_const_identifiers,
  1067. ),
  1068. }
  1069. # M e t h o d s
  1070. # initialise a lexer instance
  1071. def __init__(self, **options):
  1072. #
  1073. # check dialect options
  1074. #
  1075. dialects = get_list_opt(options, 'dialect', [])
  1076. #
  1077. for dialect_option in dialects:
  1078. if dialect_option in self.dialects[1:-1]:
  1079. # valid dialect option found
  1080. self.set_dialect(dialect_option)
  1081. break
  1082. #
  1083. # Fallback Mode (DEFAULT)
  1084. else:
  1085. # no valid dialect option
  1086. self.set_dialect('unknown')
  1087. #
  1088. self.dialect_set_by_tag = False
  1089. #
  1090. # check style options
  1091. #
  1092. styles = get_list_opt(options, 'style', [])
  1093. #
  1094. # use lowercase mode for Algol style
  1095. if 'algol' in styles or 'algol_nu' in styles:
  1096. self.algol_publication_mode = True
  1097. else:
  1098. self.algol_publication_mode = False
  1099. #
  1100. # Check option flags
  1101. #
  1102. self.treat_stdlib_adts_as_builtins = get_bool_opt(
  1103. options, 'treat_stdlib_adts_as_builtins', True)
  1104. #
  1105. # call superclass initialiser
  1106. RegexLexer.__init__(self, **options)
  1107. # Set lexer to a specified dialect
  1108. def set_dialect(self, dialect_id):
  1109. #
  1110. # if __debug__:
  1111. # print 'entered set_dialect with arg: ', dialect_id
  1112. #
  1113. # check dialect name against known dialects
  1114. if dialect_id not in self.dialects:
  1115. dialect = 'unknown' # default
  1116. else:
  1117. dialect = dialect_id
  1118. #
  1119. # compose lexemes to reject set
  1120. lexemes_to_reject_set = set()
  1121. # add each list of reject lexemes for this dialect
  1122. for list in self.lexemes_to_reject_db[dialect]:
  1123. lexemes_to_reject_set.update(set(list))
  1124. #
  1125. # compose reserved words set
  1126. reswords_set = set()
  1127. # add each list of reserved words for this dialect
  1128. for list in self.reserved_words_db[dialect]:
  1129. reswords_set.update(set(list))
  1130. #
  1131. # compose builtins set
  1132. builtins_set = set()
  1133. # add each list of builtins for this dialect excluding reserved words
  1134. for list in self.builtins_db[dialect]:
  1135. builtins_set.update(set(list).difference(reswords_set))
  1136. #
  1137. # compose pseudo-builtins set
  1138. pseudo_builtins_set = set()
  1139. # add each list of builtins for this dialect excluding reserved words
  1140. for list in self.pseudo_builtins_db[dialect]:
  1141. pseudo_builtins_set.update(set(list).difference(reswords_set))
  1142. #
  1143. # compose ADTs set
  1144. adts_set = set()
  1145. # add each list of ADTs for this dialect excluding reserved words
  1146. for list in self.stdlib_adts_db[dialect]:
  1147. adts_set.update(set(list).difference(reswords_set))
  1148. #
  1149. # compose modules set
  1150. modules_set = set()
  1151. # add each list of builtins for this dialect excluding builtins
  1152. for list in self.stdlib_modules_db[dialect]:
  1153. modules_set.update(set(list).difference(builtins_set))
  1154. #
  1155. # compose types set
  1156. types_set = set()
  1157. # add each list of types for this dialect excluding builtins
  1158. for list in self.stdlib_types_db[dialect]:
  1159. types_set.update(set(list).difference(builtins_set))
  1160. #
  1161. # compose procedures set
  1162. procedures_set = set()
  1163. # add each list of procedures for this dialect excluding builtins
  1164. for list in self.stdlib_procedures_db[dialect]:
  1165. procedures_set.update(set(list).difference(builtins_set))
  1166. #
  1167. # compose variables set
  1168. variables_set = set()
  1169. # add each list of variables for this dialect excluding builtins
  1170. for list in self.stdlib_variables_db[dialect]:
  1171. variables_set.update(set(list).difference(builtins_set))
  1172. #
  1173. # compose constants set
  1174. constants_set = set()
  1175. # add each list of constants for this dialect excluding builtins
  1176. for list in self.stdlib_constants_db[dialect]:
  1177. constants_set.update(set(list).difference(builtins_set))
  1178. #
  1179. # update lexer state
  1180. self.dialect = dialect
  1181. self.lexemes_to_reject = lexemes_to_reject_set
  1182. self.reserved_words = reswords_set
  1183. self.builtins = builtins_set
  1184. self.pseudo_builtins = pseudo_builtins_set
  1185. self.adts = adts_set
  1186. self.modules = modules_set
  1187. self.types = types_set
  1188. self.procedures = procedures_set
  1189. self.variables = variables_set
  1190. self.constants = constants_set
  1191. #
  1192. # if __debug__:
  1193. # print 'exiting set_dialect'
  1194. # print ' self.dialect: ', self.dialect
  1195. # print ' self.lexemes_to_reject: ', self.lexemes_to_reject
  1196. # print ' self.reserved_words: ', self.reserved_words
  1197. # print ' self.builtins: ', self.builtins
  1198. # print ' self.pseudo_builtins: ', self.pseudo_builtins
  1199. # print ' self.adts: ', self.adts
  1200. # print ' self.modules: ', self.modules
  1201. # print ' self.types: ', self.types
  1202. # print ' self.procedures: ', self.procedures
  1203. # print ' self.variables: ', self.variables
  1204. # print ' self.types: ', self.types
  1205. # print ' self.constants: ', self.constants
  1206. # Extracts a dialect name from a dialect tag comment string and checks
  1207. # the extracted name against known dialects. If a match is found, the
  1208. # matching name is returned, otherwise dialect id 'unknown' is returned
  1209. def get_dialect_from_dialect_tag(self, dialect_tag):
  1210. #
  1211. # if __debug__:
  1212. # print 'entered get_dialect_from_dialect_tag with arg: ', dialect_tag
  1213. #
  1214. # constants
  1215. left_tag_delim = '(*!'
  1216. right_tag_delim = '*)'
  1217. left_tag_delim_len = len(left_tag_delim)
  1218. right_tag_delim_len = len(right_tag_delim)
  1219. indicator_start = left_tag_delim_len
  1220. indicator_end = -(right_tag_delim_len)
  1221. #
  1222. # check comment string for dialect indicator
  1223. if len(dialect_tag) > (left_tag_delim_len + right_tag_delim_len) \
  1224. and dialect_tag.startswith(left_tag_delim) \
  1225. and dialect_tag.endswith(right_tag_delim):
  1226. #
  1227. # if __debug__:
  1228. # print 'dialect tag found'
  1229. #
  1230. # extract dialect indicator
  1231. indicator = dialect_tag[indicator_start:indicator_end]
  1232. #
  1233. # if __debug__:
  1234. # print 'extracted: ', indicator
  1235. #
  1236. # check against known dialects
  1237. for index in range(1, len(self.dialects)):
  1238. #
  1239. # if __debug__:
  1240. # print 'dialects[', index, ']: ', self.dialects[index]
  1241. #
  1242. if indicator == self.dialects[index]:
  1243. #
  1244. # if __debug__:
  1245. # print 'matching dialect found'
  1246. #
  1247. # indicator matches known dialect
  1248. return indicator
  1249. else:
  1250. # indicator does not match any dialect
  1251. return 'unknown' # default
  1252. else:
  1253. # invalid indicator string
  1254. return 'unknown' # default
  1255. # intercept the token stream, modify token attributes and return them
  1256. def get_tokens_unprocessed(self, text):
  1257. for index, token, value in RegexLexer.get_tokens_unprocessed(self, text):
  1258. #
  1259. # check for dialect tag if dialect has not been set by tag
  1260. if not self.dialect_set_by_tag and token == Comment.Special:
  1261. indicated_dialect = self.get_dialect_from_dialect_tag(value)
  1262. if indicated_dialect != 'unknown':
  1263. # token is a dialect indicator
  1264. # reset reserved words and builtins
  1265. self.set_dialect(indicated_dialect)
  1266. self.dialect_set_by_tag = True
  1267. #
  1268. # check for reserved words, predefined and stdlib identifiers
  1269. if token is Name:
  1270. if value in self.reserved_words:
  1271. token = Keyword.Reserved
  1272. if self.algol_publication_mode:
  1273. value = value.lower()
  1274. #
  1275. elif value in self.builtins:
  1276. token = Name.Builtin
  1277. if self.algol_publication_mode:
  1278. value = value.lower()
  1279. #
  1280. elif value in self.pseudo_builtins:
  1281. token = Name.Builtin.Pseudo
  1282. if self.algol_publication_mode:
  1283. value = value.lower()
  1284. #
  1285. elif value in self.adts:
  1286. if not self.treat_stdlib_adts_as_builtins:
  1287. token = Name.Namespace
  1288. else:
  1289. token = Name.Builtin.Pseudo
  1290. if self.algol_publication_mode:
  1291. value = value.lower()
  1292. #
  1293. elif value in self.modules:
  1294. token = Name.Namespace
  1295. #
  1296. elif value in self.types:
  1297. token = Name.Class
  1298. #
  1299. elif value in self.procedures:
  1300. token = Name.Function
  1301. #
  1302. elif value in self.variables:
  1303. token = Name.Variable
  1304. #
  1305. elif value in self.constants:
  1306. token = Name.Constant
  1307. #
  1308. elif token in Number:
  1309. #
  1310. # mark prefix number literals as error for PIM and ISO dialects
  1311. if self.dialect not in ('unknown', 'm2r10', 'objm2'):
  1312. if "'" in value or value[0:2] in ('0b', '0x', '0u'):
  1313. token = Error
  1314. #
  1315. elif self.dialect in ('m2r10', 'objm2'):
  1316. # mark base-8 number literals as errors for M2 R10 and ObjM2
  1317. if token is Number.Oct:
  1318. token = Error
  1319. # mark suffix base-16 literals as errors for M2 R10 and ObjM2
  1320. elif token is Number.Hex and 'H' in value:
  1321. token = Error
  1322. # mark real numbers with E as errors for M2 R10 and ObjM2
  1323. elif token is Number.Float and 'E' in value:
  1324. token = Error
  1325. #
  1326. elif token in Comment:
  1327. #
  1328. # mark single line comment as error for PIM and ISO dialects
  1329. if token is Comment.Single:
  1330. if self.dialect not in ('unknown', 'm2r10', 'objm2'):
  1331. token = Error
  1332. #
  1333. if token is Comment.Preproc:
  1334. # mark ISO pragma as error for PIM dialects
  1335. if value.startswith('<*') and \
  1336. self.dialect.startswith('m2pim'):
  1337. token = Error
  1338. # mark PIM pragma as comment for other dialects
  1339. elif value.startswith('(*$') and \
  1340. self.dialect != 'unknown' and \
  1341. not self.dialect.startswith('m2pim'):
  1342. token = Comment.Multiline
  1343. #
  1344. else: # token is neither Name nor Comment
  1345. #
  1346. # mark lexemes matching the dialect's error token set as errors
  1347. if value in self.lexemes_to_reject:
  1348. token = Error
  1349. #
  1350. # substitute lexemes when in Algol mode
  1351. if self.algol_publication_mode:
  1352. if value == '#':
  1353. value = u'≠'
  1354. elif value == '<=':
  1355. value = u'≤'
  1356. elif value == '>=':
  1357. value = u'≥'
  1358. elif value == '==':
  1359. value = u'≡'
  1360. elif value == '*.':
  1361. value = u'•'
  1362. # return result
  1363. yield index, token, value