1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561 |
- # -*- coding: utf-8 -*-
- """
- pygments.lexers.modula2
- ~~~~~~~~~~~~~~~~~~~~~~~
- Multi-Dialect Lexer for Modula-2.
- :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
- :license: BSD, see LICENSE for details.
- """
- import re
- from pygments.lexer import RegexLexer, include
- from pygments.util import get_bool_opt, get_list_opt
- from pygments.token import Text, Comment, Operator, Keyword, Name, \
- String, Number, Punctuation, Error
- __all__ = ['Modula2Lexer']
- # Multi-Dialect Modula-2 Lexer
- class Modula2Lexer(RegexLexer):
- """
- For `Modula-2 <http://www.modula2.org/>`_ source code.
- The Modula-2 lexer supports several dialects. By default, it operates in
- fallback mode, recognising the *combined* literals, punctuation symbols
- and operators of all supported dialects, and the *combined* reserved words
- and builtins of PIM Modula-2, ISO Modula-2 and Modula-2 R10, while not
- differentiating between library defined identifiers.
- To select a specific dialect, a dialect option may be passed
- or a dialect tag may be embedded into a source file.
- Dialect Options:
- `m2pim`
- Select PIM Modula-2 dialect.
- `m2iso`
- Select ISO Modula-2 dialect.
- `m2r10`
- Select Modula-2 R10 dialect.
- `objm2`
- Select Objective Modula-2 dialect.
- The PIM and ISO dialect options may be qualified with a language extension.
- Language Extensions:
- `+aglet`
- Select Aglet Modula-2 extensions, available with m2iso.
- `+gm2`
- Select GNU Modula-2 extensions, available with m2pim.
- `+p1`
- Select p1 Modula-2 extensions, available with m2iso.
- `+xds`
- Select XDS Modula-2 extensions, available with m2iso.
- Passing a Dialect Option via Unix Commandline Interface
- Dialect options may be passed to the lexer using the `dialect` key.
- Only one such option should be passed. If multiple dialect options are
- passed, the first valid option is used, any subsequent options are ignored.
- Examples:
- `$ pygmentize -O full,dialect=m2iso -f html -o /path/to/output /path/to/input`
- Use ISO dialect to render input to HTML output
- `$ pygmentize -O full,dialect=m2iso+p1 -f rtf -o /path/to/output /path/to/input`
- Use ISO dialect with p1 extensions to render input to RTF output
- Embedding a Dialect Option within a source file
- A dialect option may be embedded in a source file in form of a dialect
- tag, a specially formatted comment that specifies a dialect option.
- Dialect Tag EBNF::
- dialectTag :
- OpeningCommentDelim Prefix dialectOption ClosingCommentDelim ;
- dialectOption :
- 'm2pim' | 'm2iso' | 'm2r10' | 'objm2' |
- 'm2iso+aglet' | 'm2pim+gm2' | 'm2iso+p1' | 'm2iso+xds' ;
- Prefix : '!' ;
- OpeningCommentDelim : '(*' ;
- ClosingCommentDelim : '*)' ;
- No whitespace is permitted between the tokens of a dialect tag.
- In the event that a source file contains multiple dialect tags, the first
- tag that contains a valid dialect option will be used and any subsequent
- dialect tags will be ignored. Ideally, a dialect tag should be placed
- at the beginning of a source file.
- An embedded dialect tag overrides a dialect option set via command line.
- Examples:
- ``(*!m2r10*) DEFINITION MODULE Foobar; ...``
- Use Modula2 R10 dialect to render this source file.
- ``(*!m2pim+gm2*) DEFINITION MODULE Bazbam; ...``
- Use PIM dialect with GNU extensions to render this source file.
- Algol Publication Mode:
- In Algol publication mode, source text is rendered for publication of
- algorithms in scientific papers and academic texts, following the format
- of the Revised Algol-60 Language Report. It is activated by passing
- one of two corresponding styles as an option:
- `algol`
- render reserved words lowercase underline boldface
- and builtins lowercase boldface italic
- `algol_nu`
- render reserved words lowercase boldface (no underlining)
- and builtins lowercase boldface italic
- The lexer automatically performs the required lowercase conversion when
- this mode is activated.
- Example:
- ``$ pygmentize -O full,style=algol -f latex -o /path/to/output /path/to/input``
- Render input file in Algol publication mode to LaTeX output.
- Rendering Mode of First Class ADT Identifiers:
- The rendering of standard library first class ADT identifiers is controlled
- by option flag "treat_stdlib_adts_as_builtins".
- When this option is turned on, standard library ADT identifiers are rendered
- as builtins. When it is turned off, they are rendered as ordinary library
- identifiers.
- `treat_stdlib_adts_as_builtins` (default: On)
- The option is useful for dialects that support ADTs as first class objects
- and provide ADTs in the standard library that would otherwise be built-in.
- At present, only Modula-2 R10 supports library ADTs as first class objects
- and therefore, no ADT identifiers are defined for any other dialects.
- Example:
- ``$ pygmentize -O full,dialect=m2r10,treat_stdlib_adts_as_builtins=Off ...``
- Render standard library ADTs as ordinary library types.
- .. versionadded:: 1.3
- .. versionchanged:: 2.1
- Added multi-dialect support.
- """
- name = 'Modula-2'
- aliases = ['modula2', 'm2']
- filenames = ['*.def', '*.mod']
- mimetypes = ['text/x-modula2']
- flags = re.MULTILINE | re.DOTALL
- tokens = {
- 'whitespace': [
- (r'\n+', Text), # blank lines
- (r'\s+', Text), # whitespace
- ],
- 'dialecttags': [
- # PIM Dialect Tag
- (r'\(\*!m2pim\*\)', Comment.Special),
- # ISO Dialect Tag
- (r'\(\*!m2iso\*\)', Comment.Special),
- # M2R10 Dialect Tag
- (r'\(\*!m2r10\*\)', Comment.Special),
- # ObjM2 Dialect Tag
- (r'\(\*!objm2\*\)', Comment.Special),
- # Aglet Extensions Dialect Tag
- (r'\(\*!m2iso\+aglet\*\)', Comment.Special),
- # GNU Extensions Dialect Tag
- (r'\(\*!m2pim\+gm2\*\)', Comment.Special),
- # p1 Extensions Dialect Tag
- (r'\(\*!m2iso\+p1\*\)', Comment.Special),
- # XDS Extensions Dialect Tag
- (r'\(\*!m2iso\+xds\*\)', Comment.Special),
- ],
- 'identifiers': [
- (r'([a-zA-Z_$][\w$]*)', Name),
- ],
- 'prefixed_number_literals': [
- #
- # Base-2, whole number
- (r'0b[01]+(\'[01]+)*', Number.Bin),
- #
- # Base-16, whole number
- (r'0[ux][0-9A-F]+(\'[0-9A-F]+)*', Number.Hex),
- ],
- 'plain_number_literals': [
- #
- # Base-10, real number with exponent
- (r'[0-9]+(\'[0-9]+)*' # integral part
- r'\.[0-9]+(\'[0-9]+)*' # fractional part
- r'[eE][+-]?[0-9]+(\'[0-9]+)*', # exponent
- Number.Float),
- #
- # Base-10, real number without exponent
- (r'[0-9]+(\'[0-9]+)*' # integral part
- r'\.[0-9]+(\'[0-9]+)*', # fractional part
- Number.Float),
- #
- # Base-10, whole number
- (r'[0-9]+(\'[0-9]+)*', Number.Integer),
- ],
- 'suffixed_number_literals': [
- #
- # Base-8, whole number
- (r'[0-7]+B', Number.Oct),
- #
- # Base-8, character code
- (r'[0-7]+C', Number.Oct),
- #
- # Base-16, number
- (r'[0-9A-F]+H', Number.Hex),
- ],
- 'string_literals': [
- (r"'(\\\\|\\'|[^'])*'", String), # single quoted string
- (r'"(\\\\|\\"|[^"])*"', String), # double quoted string
- ],
- 'digraph_operators': [
- # Dot Product Operator
- (r'\*\.', Operator),
- # Array Concatenation Operator
- (r'\+>', Operator), # M2R10 + ObjM2
- # Inequality Operator
- (r'<>', Operator), # ISO + PIM
- # Less-Or-Equal, Subset
- (r'<=', Operator),
- # Greater-Or-Equal, Superset
- (r'>=', Operator),
- # Identity Operator
- (r'==', Operator), # M2R10 + ObjM2
- # Type Conversion Operator
- (r'::', Operator), # M2R10 + ObjM2
- # Assignment Symbol
- (r':=', Operator),
- # Postfix Increment Mutator
- (r'\+\+', Operator), # M2R10 + ObjM2
- # Postfix Decrement Mutator
- (r'--', Operator), # M2R10 + ObjM2
- ],
- 'unigraph_operators': [
- # Arithmetic Operators
- (r'[+-]', Operator),
- (r'[*/]', Operator),
- # ISO 80000-2 compliant Set Difference Operator
- (r'\\', Operator), # M2R10 + ObjM2
- # Relational Operators
- (r'[=#<>]', Operator),
- # Dereferencing Operator
- (r'\^', Operator),
- # Dereferencing Operator Synonym
- (r'@', Operator), # ISO
- # Logical AND Operator Synonym
- (r'&', Operator), # PIM + ISO
- # Logical NOT Operator Synonym
- (r'~', Operator), # PIM + ISO
- # Smalltalk Message Prefix
- (r'`', Operator), # ObjM2
- ],
- 'digraph_punctuation': [
- # Range Constructor
- (r'\.\.', Punctuation),
- # Opening Chevron Bracket
- (r'<<', Punctuation), # M2R10 + ISO
- # Closing Chevron Bracket
- (r'>>', Punctuation), # M2R10 + ISO
- # Blueprint Punctuation
- (r'->', Punctuation), # M2R10 + ISO
- # Distinguish |# and # in M2 R10
- (r'\|#', Punctuation),
- # Distinguish ## and # in M2 R10
- (r'##', Punctuation),
- # Distinguish |* and * in M2 R10
- (r'\|\*', Punctuation),
- ],
- 'unigraph_punctuation': [
- # Common Punctuation
- (r'[()\[\]{},.:;|]', Punctuation),
- # Case Label Separator Synonym
- (r'!', Punctuation), # ISO
- # Blueprint Punctuation
- (r'\?', Punctuation), # M2R10 + ObjM2
- ],
- 'comments': [
- # Single Line Comment
- (r'^//.*?\n', Comment.Single), # M2R10 + ObjM2
- # Block Comment
- (r'\(\*([^$].*?)\*\)', Comment.Multiline),
- # Template Block Comment
- (r'/\*(.*?)\*/', Comment.Multiline), # M2R10 + ObjM2
- ],
- 'pragmas': [
- # ISO Style Pragmas
- (r'<\*.*?\*>', Comment.Preproc), # ISO, M2R10 + ObjM2
- # Pascal Style Pragmas
- (r'\(\*\$.*?\*\)', Comment.Preproc), # PIM
- ],
- 'root': [
- include('whitespace'),
- include('dialecttags'),
- include('pragmas'),
- include('comments'),
- include('identifiers'),
- include('suffixed_number_literals'), # PIM + ISO
- include('prefixed_number_literals'), # M2R10 + ObjM2
- include('plain_number_literals'),
- include('string_literals'),
- include('digraph_punctuation'),
- include('digraph_operators'),
- include('unigraph_punctuation'),
- include('unigraph_operators'),
- ]
- }
- # C o m m o n D a t a s e t s
- # Common Reserved Words Dataset
- common_reserved_words = (
- # 37 common reserved words
- 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV',
- 'DO', 'ELSE', 'ELSIF', 'END', 'EXIT', 'FOR', 'FROM', 'IF',
- 'IMPLEMENTATION', 'IMPORT', 'IN', 'LOOP', 'MOD', 'MODULE', 'NOT',
- 'OF', 'OR', 'POINTER', 'PROCEDURE', 'RECORD', 'REPEAT', 'RETURN',
- 'SET', 'THEN', 'TO', 'TYPE', 'UNTIL', 'VAR', 'WHILE',
- )
- # Common Builtins Dataset
- common_builtins = (
- # 16 common builtins
- 'ABS', 'BOOLEAN', 'CARDINAL', 'CHAR', 'CHR', 'FALSE', 'INTEGER',
- 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NIL', 'ODD', 'ORD', 'REAL',
- 'TRUE',
- )
- # Common Pseudo-Module Builtins Dataset
- common_pseudo_builtins = (
- # 4 common pseudo builtins
- 'ADDRESS', 'BYTE', 'WORD', 'ADR'
- )
- # P I M M o d u l a - 2 D a t a s e t s
- # Lexemes to Mark as Error Tokens for PIM Modula-2
- pim_lexemes_to_reject = (
- '!', '`', '@', '$', '%', '?', '\\', '==', '++', '--', '::', '*.',
- '+>', '->', '<<', '>>', '|#', '##',
- )
- # PIM Modula-2 Additional Reserved Words Dataset
- pim_additional_reserved_words = (
- # 3 additional reserved words
- 'EXPORT', 'QUALIFIED', 'WITH',
- )
- # PIM Modula-2 Additional Builtins Dataset
- pim_additional_builtins = (
- # 16 additional builtins
- 'BITSET', 'CAP', 'DEC', 'DISPOSE', 'EXCL', 'FLOAT', 'HALT', 'HIGH',
- 'INC', 'INCL', 'NEW', 'NIL', 'PROC', 'SIZE', 'TRUNC', 'VAL',
- )
- # PIM Modula-2 Additional Pseudo-Module Builtins Dataset
- pim_additional_pseudo_builtins = (
- # 5 additional pseudo builtins
- 'SYSTEM', 'PROCESS', 'TSIZE', 'NEWPROCESS', 'TRANSFER',
- )
- # I S O M o d u l a - 2 D a t a s e t s
- # Lexemes to Mark as Error Tokens for ISO Modula-2
- iso_lexemes_to_reject = (
- '`', '$', '%', '?', '\\', '==', '++', '--', '::', '*.', '+>', '->',
- '<<', '>>', '|#', '##',
- )
- # ISO Modula-2 Additional Reserved Words Dataset
- iso_additional_reserved_words = (
- # 9 additional reserved words (ISO 10514-1)
- 'EXCEPT', 'EXPORT', 'FINALLY', 'FORWARD', 'PACKEDSET', 'QUALIFIED',
- 'REM', 'RETRY', 'WITH',
- # 10 additional reserved words (ISO 10514-2 & ISO 10514-3)
- 'ABSTRACT', 'AS', 'CLASS', 'GUARD', 'INHERIT', 'OVERRIDE', 'READONLY',
- 'REVEAL', 'TRACED', 'UNSAFEGUARDED',
- )
- # ISO Modula-2 Additional Builtins Dataset
- iso_additional_builtins = (
- # 26 additional builtins (ISO 10514-1)
- 'BITSET', 'CAP', 'CMPLX', 'COMPLEX', 'DEC', 'DISPOSE', 'EXCL', 'FLOAT',
- 'HALT', 'HIGH', 'IM', 'INC', 'INCL', 'INT', 'INTERRUPTIBLE', 'LENGTH',
- 'LFLOAT', 'LONGCOMPLEX', 'NEW', 'PROC', 'PROTECTION', 'RE', 'SIZE',
- 'TRUNC', 'UNINTERRUBTIBLE', 'VAL',
- # 5 additional builtins (ISO 10514-2 & ISO 10514-3)
- 'CREATE', 'DESTROY', 'EMPTY', 'ISMEMBER', 'SELF',
- )
- # ISO Modula-2 Additional Pseudo-Module Builtins Dataset
- iso_additional_pseudo_builtins = (
- # 14 additional builtins (SYSTEM)
- 'SYSTEM', 'BITSPERLOC', 'LOCSPERBYTE', 'LOCSPERWORD', 'LOC',
- 'ADDADR', 'SUBADR', 'DIFADR', 'MAKEADR', 'ADR',
- 'ROTATE', 'SHIFT', 'CAST', 'TSIZE',
- # 13 additional builtins (COROUTINES)
- 'COROUTINES', 'ATTACH', 'COROUTINE', 'CURRENT', 'DETACH', 'HANDLER',
- 'INTERRUPTSOURCE', 'IOTRANSFER', 'IsATTACHED', 'LISTEN',
- 'NEWCOROUTINE', 'PROT', 'TRANSFER',
- # 9 additional builtins (EXCEPTIONS)
- 'EXCEPTIONS', 'AllocateSource', 'CurrentNumber', 'ExceptionNumber',
- 'ExceptionSource', 'GetMessage', 'IsCurrentSource',
- 'IsExceptionalExecution', 'RAISE',
- # 3 additional builtins (TERMINATION)
- 'TERMINATION', 'IsTerminating', 'HasHalted',
- # 4 additional builtins (M2EXCEPTION)
- 'M2EXCEPTION', 'M2Exceptions', 'M2Exception', 'IsM2Exception',
- 'indexException', 'rangeException', 'caseSelectException',
- 'invalidLocation', 'functionException', 'wholeValueException',
- 'wholeDivException', 'realValueException', 'realDivException',
- 'complexValueException', 'complexDivException', 'protException',
- 'sysException', 'coException', 'exException',
- )
- # M o d u l a - 2 R 1 0 D a t a s e t s
- # Lexemes to Mark as Error Tokens for Modula-2 R10
- m2r10_lexemes_to_reject = (
- '!', '`', '@', '$', '%', '&', '<>',
- )
- # Modula-2 R10 reserved words in addition to the common set
- m2r10_additional_reserved_words = (
- # 12 additional reserved words
- 'ALIAS', 'ARGLIST', 'BLUEPRINT', 'COPY', 'GENLIB', 'INDETERMINATE',
- 'NEW', 'NONE', 'OPAQUE', 'REFERENTIAL', 'RELEASE', 'RETAIN',
- # 2 additional reserved words with symbolic assembly option
- 'ASM', 'REG',
- )
- # Modula-2 R10 builtins in addition to the common set
- m2r10_additional_builtins = (
- # 26 additional builtins
- 'CARDINAL', 'COUNT', 'EMPTY', 'EXISTS', 'INSERT', 'LENGTH', 'LONGCARD',
- 'OCTET', 'PTR', 'PRED', 'READ', 'READNEW', 'REMOVE', 'RETRIEVE', 'SORT',
- 'STORE', 'SUBSET', 'SUCC', 'TLIMIT', 'TMAX', 'TMIN', 'TRUE', 'TSIZE',
- 'UNICHAR', 'WRITE', 'WRITEF',
- )
- # Modula-2 R10 Additional Pseudo-Module Builtins Dataset
- m2r10_additional_pseudo_builtins = (
- # 13 additional builtins (TPROPERTIES)
- 'TPROPERTIES', 'PROPERTY', 'LITERAL', 'TPROPERTY', 'TLITERAL',
- 'TBUILTIN', 'TDYN', 'TREFC', 'TNIL', 'TBASE', 'TPRECISION',
- 'TMAXEXP', 'TMINEXP',
- # 4 additional builtins (CONVERSION)
- 'CONVERSION', 'TSXFSIZE', 'SXF', 'VAL',
- # 35 additional builtins (UNSAFE)
- 'UNSAFE', 'CAST', 'INTRINSIC', 'AVAIL', 'ADD', 'SUB', 'ADDC', 'SUBC',
- 'FETCHADD', 'FETCHSUB', 'SHL', 'SHR', 'ASHR', 'ROTL', 'ROTR', 'ROTLC',
- 'ROTRC', 'BWNOT', 'BWAND', 'BWOR', 'BWXOR', 'BWNAND', 'BWNOR',
- 'SETBIT', 'TESTBIT', 'LSBIT', 'MSBIT', 'CSBITS', 'BAIL', 'HALT',
- 'TODO', 'FFI', 'ADDR', 'VARGLIST', 'VARGC',
- # 11 additional builtins (ATOMIC)
- 'ATOMIC', 'INTRINSIC', 'AVAIL', 'SWAP', 'CAS', 'INC', 'DEC', 'BWAND',
- 'BWNAND', 'BWOR', 'BWXOR',
- # 7 additional builtins (COMPILER)
- 'COMPILER', 'DEBUG', 'MODNAME', 'PROCNAME', 'LINENUM', 'DEFAULT',
- 'HASH',
- # 5 additional builtins (ASSEMBLER)
- 'ASSEMBLER', 'REGISTER', 'SETREG', 'GETREG', 'CODE',
- )
- # O b j e c t i v e M o d u l a - 2 D a t a s e t s
- # Lexemes to Mark as Error Tokens for Objective Modula-2
- objm2_lexemes_to_reject = (
- '!', '$', '%', '&', '<>',
- )
- # Objective Modula-2 Extensions
- # reserved words in addition to Modula-2 R10
- objm2_additional_reserved_words = (
- # 16 additional reserved words
- 'BYCOPY', 'BYREF', 'CLASS', 'CONTINUE', 'CRITICAL', 'INOUT', 'METHOD',
- 'ON', 'OPTIONAL', 'OUT', 'PRIVATE', 'PROTECTED', 'PROTOCOL', 'PUBLIC',
- 'SUPER', 'TRY',
- )
- # Objective Modula-2 Extensions
- # builtins in addition to Modula-2 R10
- objm2_additional_builtins = (
- # 3 additional builtins
- 'OBJECT', 'NO', 'YES',
- )
- # Objective Modula-2 Extensions
- # pseudo-module builtins in addition to Modula-2 R10
- objm2_additional_pseudo_builtins = (
- # None
- )
- # A g l e t M o d u l a - 2 D a t a s e t s
- # Aglet Extensions
- # reserved words in addition to ISO Modula-2
- aglet_additional_reserved_words = (
- # None
- )
- # Aglet Extensions
- # builtins in addition to ISO Modula-2
- aglet_additional_builtins = (
- # 9 additional builtins
- 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16',
- 'CARDINAL32', 'INTEGER8', 'INTEGER16', 'INTEGER32',
- )
- # Aglet Modula-2 Extensions
- # pseudo-module builtins in addition to ISO Modula-2
- aglet_additional_pseudo_builtins = (
- # None
- )
- # G N U M o d u l a - 2 D a t a s e t s
- # GNU Extensions
- # reserved words in addition to PIM Modula-2
- gm2_additional_reserved_words = (
- # 10 additional reserved words
- 'ASM', '__ATTRIBUTE__', '__BUILTIN__', '__COLUMN__', '__DATE__',
- '__FILE__', '__FUNCTION__', '__LINE__', '__MODULE__', 'VOLATILE',
- )
- # GNU Extensions
- # builtins in addition to PIM Modula-2
- gm2_additional_builtins = (
- # 21 additional builtins
- 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16',
- 'CARDINAL32', 'CARDINAL64', 'COMPLEX32', 'COMPLEX64', 'COMPLEX96',
- 'COMPLEX128', 'INTEGER8', 'INTEGER16', 'INTEGER32', 'INTEGER64',
- 'REAL8', 'REAL16', 'REAL32', 'REAL96', 'REAL128', 'THROW',
- )
- # GNU Extensions
- # pseudo-module builtins in addition to PIM Modula-2
- gm2_additional_pseudo_builtins = (
- # None
- )
- # p 1 M o d u l a - 2 D a t a s e t s
- # p1 Extensions
- # reserved words in addition to ISO Modula-2
- p1_additional_reserved_words = (
- # None
- )
- # p1 Extensions
- # builtins in addition to ISO Modula-2
- p1_additional_builtins = (
- # None
- )
- # p1 Modula-2 Extensions
- # pseudo-module builtins in addition to ISO Modula-2
- p1_additional_pseudo_builtins = (
- # 1 additional builtin
- 'BCD',
- )
- # X D S M o d u l a - 2 D a t a s e t s
- # XDS Extensions
- # reserved words in addition to ISO Modula-2
- xds_additional_reserved_words = (
- # 1 additional reserved word
- 'SEQ',
- )
- # XDS Extensions
- # builtins in addition to ISO Modula-2
- xds_additional_builtins = (
- # 9 additional builtins
- 'ASH', 'ASSERT', 'DIFFADR_TYPE', 'ENTIER', 'INDEX', 'LEN',
- 'LONGCARD', 'SHORTCARD', 'SHORTINT',
- )
- # XDS Modula-2 Extensions
- # pseudo-module builtins in addition to ISO Modula-2
- xds_additional_pseudo_builtins = (
- # 22 additional builtins (SYSTEM)
- 'PROCESS', 'NEWPROCESS', 'BOOL8', 'BOOL16', 'BOOL32', 'CARD8',
- 'CARD16', 'CARD32', 'INT8', 'INT16', 'INT32', 'REF', 'MOVE',
- 'FILL', 'GET', 'PUT', 'CC', 'int', 'unsigned', 'size_t', 'void'
- # 3 additional builtins (COMPILER)
- 'COMPILER', 'OPTION', 'EQUATION'
- )
- # P I M S t a n d a r d L i b r a r y D a t a s e t s
- # PIM Modula-2 Standard Library Modules Dataset
- pim_stdlib_module_identifiers = (
- 'Terminal', 'FileSystem', 'InOut', 'RealInOut', 'MathLib0', 'Storage',
- )
- # PIM Modula-2 Standard Library Types Dataset
- pim_stdlib_type_identifiers = (
- 'Flag', 'FlagSet', 'Response', 'Command', 'Lock', 'Permission',
- 'MediumType', 'File', 'FileProc', 'DirectoryProc', 'FileCommand',
- 'DirectoryCommand',
- )
- # PIM Modula-2 Standard Library Procedures Dataset
- pim_stdlib_proc_identifiers = (
- 'Read', 'BusyRead', 'ReadAgain', 'Write', 'WriteString', 'WriteLn',
- 'Create', 'Lookup', 'Close', 'Delete', 'Rename', 'SetRead', 'SetWrite',
- 'SetModify', 'SetOpen', 'Doio', 'SetPos', 'GetPos', 'Length', 'Reset',
- 'Again', 'ReadWord', 'WriteWord', 'ReadChar', 'WriteChar',
- 'CreateMedium', 'DeleteMedium', 'AssignName', 'DeassignName',
- 'ReadMedium', 'LookupMedium', 'OpenInput', 'OpenOutput', 'CloseInput',
- 'CloseOutput', 'ReadString', 'ReadInt', 'ReadCard', 'ReadWrd',
- 'WriteInt', 'WriteCard', 'WriteOct', 'WriteHex', 'WriteWrd',
- 'ReadReal', 'WriteReal', 'WriteFixPt', 'WriteRealOct', 'sqrt', 'exp',
- 'ln', 'sin', 'cos', 'arctan', 'entier', 'ALLOCATE', 'DEALLOCATE',
- )
- # PIM Modula-2 Standard Library Variables Dataset
- pim_stdlib_var_identifiers = (
- 'Done', 'termCH', 'in', 'out'
- )
- # PIM Modula-2 Standard Library Constants Dataset
- pim_stdlib_const_identifiers = (
- 'EOL',
- )
- # I S O S t a n d a r d L i b r a r y D a t a s e t s
- # ISO Modula-2 Standard Library Modules Dataset
- iso_stdlib_module_identifiers = (
- # TO DO
- )
- # ISO Modula-2 Standard Library Types Dataset
- iso_stdlib_type_identifiers = (
- # TO DO
- )
- # ISO Modula-2 Standard Library Procedures Dataset
- iso_stdlib_proc_identifiers = (
- # TO DO
- )
- # ISO Modula-2 Standard Library Variables Dataset
- iso_stdlib_var_identifiers = (
- # TO DO
- )
- # ISO Modula-2 Standard Library Constants Dataset
- iso_stdlib_const_identifiers = (
- # TO DO
- )
- # M 2 R 1 0 S t a n d a r d L i b r a r y D a t a s e t s
- # Modula-2 R10 Standard Library ADTs Dataset
- m2r10_stdlib_adt_identifiers = (
- 'BCD', 'LONGBCD', 'BITSET', 'SHORTBITSET', 'LONGBITSET',
- 'LONGLONGBITSET', 'COMPLEX', 'LONGCOMPLEX', 'SHORTCARD', 'LONGLONGCARD',
- 'SHORTINT', 'LONGLONGINT', 'POSINT', 'SHORTPOSINT', 'LONGPOSINT',
- 'LONGLONGPOSINT', 'BITSET8', 'BITSET16', 'BITSET32', 'BITSET64',
- 'BITSET128', 'BS8', 'BS16', 'BS32', 'BS64', 'BS128', 'CARDINAL8',
- 'CARDINAL16', 'CARDINAL32', 'CARDINAL64', 'CARDINAL128', 'CARD8',
- 'CARD16', 'CARD32', 'CARD64', 'CARD128', 'INTEGER8', 'INTEGER16',
- 'INTEGER32', 'INTEGER64', 'INTEGER128', 'INT8', 'INT16', 'INT32',
- 'INT64', 'INT128', 'STRING', 'UNISTRING',
- )
- # Modula-2 R10 Standard Library Blueprints Dataset
- m2r10_stdlib_blueprint_identifiers = (
- 'ProtoRoot', 'ProtoComputational', 'ProtoNumeric', 'ProtoScalar',
- 'ProtoNonScalar', 'ProtoCardinal', 'ProtoInteger', 'ProtoReal',
- 'ProtoComplex', 'ProtoVector', 'ProtoTuple', 'ProtoCompArray',
- 'ProtoCollection', 'ProtoStaticArray', 'ProtoStaticSet',
- 'ProtoStaticString', 'ProtoArray', 'ProtoString', 'ProtoSet',
- 'ProtoMultiSet', 'ProtoDictionary', 'ProtoMultiDict', 'ProtoExtension',
- 'ProtoIO', 'ProtoCardMath', 'ProtoIntMath', 'ProtoRealMath',
- )
- # Modula-2 R10 Standard Library Modules Dataset
- m2r10_stdlib_module_identifiers = (
- 'ASCII', 'BooleanIO', 'CharIO', 'UnicharIO', 'OctetIO',
- 'CardinalIO', 'LongCardIO', 'IntegerIO', 'LongIntIO', 'RealIO',
- 'LongRealIO', 'BCDIO', 'LongBCDIO', 'CardMath', 'LongCardMath',
- 'IntMath', 'LongIntMath', 'RealMath', 'LongRealMath', 'BCDMath',
- 'LongBCDMath', 'FileIO', 'FileSystem', 'Storage', 'IOSupport',
- )
- # Modula-2 R10 Standard Library Types Dataset
- m2r10_stdlib_type_identifiers = (
- 'File', 'Status',
- # TO BE COMPLETED
- )
- # Modula-2 R10 Standard Library Procedures Dataset
- m2r10_stdlib_proc_identifiers = (
- 'ALLOCATE', 'DEALLOCATE', 'SIZE',
- # TO BE COMPLETED
- )
- # Modula-2 R10 Standard Library Variables Dataset
- m2r10_stdlib_var_identifiers = (
- 'stdIn', 'stdOut', 'stdErr',
- )
- # Modula-2 R10 Standard Library Constants Dataset
- m2r10_stdlib_const_identifiers = (
- 'pi', 'tau',
- )
- # D i a l e c t s
- # Dialect modes
- dialects = (
- 'unknown',
- 'm2pim', 'm2iso', 'm2r10', 'objm2',
- 'm2iso+aglet', 'm2pim+gm2', 'm2iso+p1', 'm2iso+xds',
- )
- # D a t a b a s e s
- # Lexemes to Mark as Errors Database
- lexemes_to_reject_db = {
- # Lexemes to reject for unknown dialect
- 'unknown': (
- # LEAVE THIS EMPTY
- ),
- # Lexemes to reject for PIM Modula-2
- 'm2pim': (
- pim_lexemes_to_reject,
- ),
- # Lexemes to reject for ISO Modula-2
- 'm2iso': (
- iso_lexemes_to_reject,
- ),
- # Lexemes to reject for Modula-2 R10
- 'm2r10': (
- m2r10_lexemes_to_reject,
- ),
- # Lexemes to reject for Objective Modula-2
- 'objm2': (
- objm2_lexemes_to_reject,
- ),
- # Lexemes to reject for Aglet Modula-2
- 'm2iso+aglet': (
- iso_lexemes_to_reject,
- ),
- # Lexemes to reject for GNU Modula-2
- 'm2pim+gm2': (
- pim_lexemes_to_reject,
- ),
- # Lexemes to reject for p1 Modula-2
- 'm2iso+p1': (
- iso_lexemes_to_reject,
- ),
- # Lexemes to reject for XDS Modula-2
- 'm2iso+xds': (
- iso_lexemes_to_reject,
- ),
- }
- # Reserved Words Database
- reserved_words_db = {
- # Reserved words for unknown dialect
- 'unknown': (
- common_reserved_words,
- pim_additional_reserved_words,
- iso_additional_reserved_words,
- m2r10_additional_reserved_words,
- ),
- # Reserved words for PIM Modula-2
- 'm2pim': (
- common_reserved_words,
- pim_additional_reserved_words,
- ),
- # Reserved words for Modula-2 R10
- 'm2iso': (
- common_reserved_words,
- iso_additional_reserved_words,
- ),
- # Reserved words for ISO Modula-2
- 'm2r10': (
- common_reserved_words,
- m2r10_additional_reserved_words,
- ),
- # Reserved words for Objective Modula-2
- 'objm2': (
- common_reserved_words,
- m2r10_additional_reserved_words,
- objm2_additional_reserved_words,
- ),
- # Reserved words for Aglet Modula-2 Extensions
- 'm2iso+aglet': (
- common_reserved_words,
- iso_additional_reserved_words,
- aglet_additional_reserved_words,
- ),
- # Reserved words for GNU Modula-2 Extensions
- 'm2pim+gm2': (
- common_reserved_words,
- pim_additional_reserved_words,
- gm2_additional_reserved_words,
- ),
- # Reserved words for p1 Modula-2 Extensions
- 'm2iso+p1': (
- common_reserved_words,
- iso_additional_reserved_words,
- p1_additional_reserved_words,
- ),
- # Reserved words for XDS Modula-2 Extensions
- 'm2iso+xds': (
- common_reserved_words,
- iso_additional_reserved_words,
- xds_additional_reserved_words,
- ),
- }
- # Builtins Database
- builtins_db = {
- # Builtins for unknown dialect
- 'unknown': (
- common_builtins,
- pim_additional_builtins,
- iso_additional_builtins,
- m2r10_additional_builtins,
- ),
- # Builtins for PIM Modula-2
- 'm2pim': (
- common_builtins,
- pim_additional_builtins,
- ),
- # Builtins for ISO Modula-2
- 'm2iso': (
- common_builtins,
- iso_additional_builtins,
- ),
- # Builtins for ISO Modula-2
- 'm2r10': (
- common_builtins,
- m2r10_additional_builtins,
- ),
- # Builtins for Objective Modula-2
- 'objm2': (
- common_builtins,
- m2r10_additional_builtins,
- objm2_additional_builtins,
- ),
- # Builtins for Aglet Modula-2 Extensions
- 'm2iso+aglet': (
- common_builtins,
- iso_additional_builtins,
- aglet_additional_builtins,
- ),
- # Builtins for GNU Modula-2 Extensions
- 'm2pim+gm2': (
- common_builtins,
- pim_additional_builtins,
- gm2_additional_builtins,
- ),
- # Builtins for p1 Modula-2 Extensions
- 'm2iso+p1': (
- common_builtins,
- iso_additional_builtins,
- p1_additional_builtins,
- ),
- # Builtins for XDS Modula-2 Extensions
- 'm2iso+xds': (
- common_builtins,
- iso_additional_builtins,
- xds_additional_builtins,
- ),
- }
- # Pseudo-Module Builtins Database
- pseudo_builtins_db = {
- # Builtins for unknown dialect
- 'unknown': (
- common_pseudo_builtins,
- pim_additional_pseudo_builtins,
- iso_additional_pseudo_builtins,
- m2r10_additional_pseudo_builtins,
- ),
- # Builtins for PIM Modula-2
- 'm2pim': (
- common_pseudo_builtins,
- pim_additional_pseudo_builtins,
- ),
- # Builtins for ISO Modula-2
- 'm2iso': (
- common_pseudo_builtins,
- iso_additional_pseudo_builtins,
- ),
- # Builtins for ISO Modula-2
- 'm2r10': (
- common_pseudo_builtins,
- m2r10_additional_pseudo_builtins,
- ),
- # Builtins for Objective Modula-2
- 'objm2': (
- common_pseudo_builtins,
- m2r10_additional_pseudo_builtins,
- objm2_additional_pseudo_builtins,
- ),
- # Builtins for Aglet Modula-2 Extensions
- 'm2iso+aglet': (
- common_pseudo_builtins,
- iso_additional_pseudo_builtins,
- aglet_additional_pseudo_builtins,
- ),
- # Builtins for GNU Modula-2 Extensions
- 'm2pim+gm2': (
- common_pseudo_builtins,
- pim_additional_pseudo_builtins,
- gm2_additional_pseudo_builtins,
- ),
- # Builtins for p1 Modula-2 Extensions
- 'm2iso+p1': (
- common_pseudo_builtins,
- iso_additional_pseudo_builtins,
- p1_additional_pseudo_builtins,
- ),
- # Builtins for XDS Modula-2 Extensions
- 'm2iso+xds': (
- common_pseudo_builtins,
- iso_additional_pseudo_builtins,
- xds_additional_pseudo_builtins,
- ),
- }
- # Standard Library ADTs Database
- stdlib_adts_db = {
- # Empty entry for unknown dialect
- 'unknown': (
- # LEAVE THIS EMPTY
- ),
- # Standard Library ADTs for PIM Modula-2
- 'm2pim': (
- # No first class library types
- ),
- # Standard Library ADTs for ISO Modula-2
- 'm2iso': (
- # No first class library types
- ),
- # Standard Library ADTs for Modula-2 R10
- 'm2r10': (
- m2r10_stdlib_adt_identifiers,
- ),
- # Standard Library ADTs for Objective Modula-2
- 'objm2': (
- m2r10_stdlib_adt_identifiers,
- ),
- # Standard Library ADTs for Aglet Modula-2
- 'm2iso+aglet': (
- # No first class library types
- ),
- # Standard Library ADTs for GNU Modula-2
- 'm2pim+gm2': (
- # No first class library types
- ),
- # Standard Library ADTs for p1 Modula-2
- 'm2iso+p1': (
- # No first class library types
- ),
- # Standard Library ADTs for XDS Modula-2
- 'm2iso+xds': (
- # No first class library types
- ),
- }
- # Standard Library Modules Database
- stdlib_modules_db = {
- # Empty entry for unknown dialect
- 'unknown': (
- # LEAVE THIS EMPTY
- ),
- # Standard Library Modules for PIM Modula-2
- 'm2pim': (
- pim_stdlib_module_identifiers,
- ),
- # Standard Library Modules for ISO Modula-2
- 'm2iso': (
- iso_stdlib_module_identifiers,
- ),
- # Standard Library Modules for Modula-2 R10
- 'm2r10': (
- m2r10_stdlib_blueprint_identifiers,
- m2r10_stdlib_module_identifiers,
- m2r10_stdlib_adt_identifiers,
- ),
- # Standard Library Modules for Objective Modula-2
- 'objm2': (
- m2r10_stdlib_blueprint_identifiers,
- m2r10_stdlib_module_identifiers,
- ),
- # Standard Library Modules for Aglet Modula-2
- 'm2iso+aglet': (
- iso_stdlib_module_identifiers,
- ),
- # Standard Library Modules for GNU Modula-2
- 'm2pim+gm2': (
- pim_stdlib_module_identifiers,
- ),
- # Standard Library Modules for p1 Modula-2
- 'm2iso+p1': (
- iso_stdlib_module_identifiers,
- ),
- # Standard Library Modules for XDS Modula-2
- 'm2iso+xds': (
- iso_stdlib_module_identifiers,
- ),
- }
- # Standard Library Types Database
- stdlib_types_db = {
- # Empty entry for unknown dialect
- 'unknown': (
- # LEAVE THIS EMPTY
- ),
- # Standard Library Types for PIM Modula-2
- 'm2pim': (
- pim_stdlib_type_identifiers,
- ),
- # Standard Library Types for ISO Modula-2
- 'm2iso': (
- iso_stdlib_type_identifiers,
- ),
- # Standard Library Types for Modula-2 R10
- 'm2r10': (
- m2r10_stdlib_type_identifiers,
- ),
- # Standard Library Types for Objective Modula-2
- 'objm2': (
- m2r10_stdlib_type_identifiers,
- ),
- # Standard Library Types for Aglet Modula-2
- 'm2iso+aglet': (
- iso_stdlib_type_identifiers,
- ),
- # Standard Library Types for GNU Modula-2
- 'm2pim+gm2': (
- pim_stdlib_type_identifiers,
- ),
- # Standard Library Types for p1 Modula-2
- 'm2iso+p1': (
- iso_stdlib_type_identifiers,
- ),
- # Standard Library Types for XDS Modula-2
- 'm2iso+xds': (
- iso_stdlib_type_identifiers,
- ),
- }
- # Standard Library Procedures Database
- stdlib_procedures_db = {
- # Empty entry for unknown dialect
- 'unknown': (
- # LEAVE THIS EMPTY
- ),
- # Standard Library Procedures for PIM Modula-2
- 'm2pim': (
- pim_stdlib_proc_identifiers,
- ),
- # Standard Library Procedures for ISO Modula-2
- 'm2iso': (
- iso_stdlib_proc_identifiers,
- ),
- # Standard Library Procedures for Modula-2 R10
- 'm2r10': (
- m2r10_stdlib_proc_identifiers,
- ),
- # Standard Library Procedures for Objective Modula-2
- 'objm2': (
- m2r10_stdlib_proc_identifiers,
- ),
- # Standard Library Procedures for Aglet Modula-2
- 'm2iso+aglet': (
- iso_stdlib_proc_identifiers,
- ),
- # Standard Library Procedures for GNU Modula-2
- 'm2pim+gm2': (
- pim_stdlib_proc_identifiers,
- ),
- # Standard Library Procedures for p1 Modula-2
- 'm2iso+p1': (
- iso_stdlib_proc_identifiers,
- ),
- # Standard Library Procedures for XDS Modula-2
- 'm2iso+xds': (
- iso_stdlib_proc_identifiers,
- ),
- }
- # Standard Library Variables Database
- stdlib_variables_db = {
- # Empty entry for unknown dialect
- 'unknown': (
- # LEAVE THIS EMPTY
- ),
- # Standard Library Variables for PIM Modula-2
- 'm2pim': (
- pim_stdlib_var_identifiers,
- ),
- # Standard Library Variables for ISO Modula-2
- 'm2iso': (
- iso_stdlib_var_identifiers,
- ),
- # Standard Library Variables for Modula-2 R10
- 'm2r10': (
- m2r10_stdlib_var_identifiers,
- ),
- # Standard Library Variables for Objective Modula-2
- 'objm2': (
- m2r10_stdlib_var_identifiers,
- ),
- # Standard Library Variables for Aglet Modula-2
- 'm2iso+aglet': (
- iso_stdlib_var_identifiers,
- ),
- # Standard Library Variables for GNU Modula-2
- 'm2pim+gm2': (
- pim_stdlib_var_identifiers,
- ),
- # Standard Library Variables for p1 Modula-2
- 'm2iso+p1': (
- iso_stdlib_var_identifiers,
- ),
- # Standard Library Variables for XDS Modula-2
- 'm2iso+xds': (
- iso_stdlib_var_identifiers,
- ),
- }
- # Standard Library Constants Database
- stdlib_constants_db = {
- # Empty entry for unknown dialect
- 'unknown': (
- # LEAVE THIS EMPTY
- ),
- # Standard Library Constants for PIM Modula-2
- 'm2pim': (
- pim_stdlib_const_identifiers,
- ),
- # Standard Library Constants for ISO Modula-2
- 'm2iso': (
- iso_stdlib_const_identifiers,
- ),
- # Standard Library Constants for Modula-2 R10
- 'm2r10': (
- m2r10_stdlib_const_identifiers,
- ),
- # Standard Library Constants for Objective Modula-2
- 'objm2': (
- m2r10_stdlib_const_identifiers,
- ),
- # Standard Library Constants for Aglet Modula-2
- 'm2iso+aglet': (
- iso_stdlib_const_identifiers,
- ),
- # Standard Library Constants for GNU Modula-2
- 'm2pim+gm2': (
- pim_stdlib_const_identifiers,
- ),
- # Standard Library Constants for p1 Modula-2
- 'm2iso+p1': (
- iso_stdlib_const_identifiers,
- ),
- # Standard Library Constants for XDS Modula-2
- 'm2iso+xds': (
- iso_stdlib_const_identifiers,
- ),
- }
- # M e t h o d s
- # initialise a lexer instance
- def __init__(self, **options):
- #
- # check dialect options
- #
- dialects = get_list_opt(options, 'dialect', [])
- #
- for dialect_option in dialects:
- if dialect_option in self.dialects[1:-1]:
- # valid dialect option found
- self.set_dialect(dialect_option)
- break
- #
- # Fallback Mode (DEFAULT)
- else:
- # no valid dialect option
- self.set_dialect('unknown')
- #
- self.dialect_set_by_tag = False
- #
- # check style options
- #
- styles = get_list_opt(options, 'style', [])
- #
- # use lowercase mode for Algol style
- if 'algol' in styles or 'algol_nu' in styles:
- self.algol_publication_mode = True
- else:
- self.algol_publication_mode = False
- #
- # Check option flags
- #
- self.treat_stdlib_adts_as_builtins = get_bool_opt(
- options, 'treat_stdlib_adts_as_builtins', True)
- #
- # call superclass initialiser
- RegexLexer.__init__(self, **options)
- # Set lexer to a specified dialect
- def set_dialect(self, dialect_id):
- #
- # if __debug__:
- # print 'entered set_dialect with arg: ', dialect_id
- #
- # check dialect name against known dialects
- if dialect_id not in self.dialects:
- dialect = 'unknown' # default
- else:
- dialect = dialect_id
- #
- # compose lexemes to reject set
- lexemes_to_reject_set = set()
- # add each list of reject lexemes for this dialect
- for list in self.lexemes_to_reject_db[dialect]:
- lexemes_to_reject_set.update(set(list))
- #
- # compose reserved words set
- reswords_set = set()
- # add each list of reserved words for this dialect
- for list in self.reserved_words_db[dialect]:
- reswords_set.update(set(list))
- #
- # compose builtins set
- builtins_set = set()
- # add each list of builtins for this dialect excluding reserved words
- for list in self.builtins_db[dialect]:
- builtins_set.update(set(list).difference(reswords_set))
- #
- # compose pseudo-builtins set
- pseudo_builtins_set = set()
- # add each list of builtins for this dialect excluding reserved words
- for list in self.pseudo_builtins_db[dialect]:
- pseudo_builtins_set.update(set(list).difference(reswords_set))
- #
- # compose ADTs set
- adts_set = set()
- # add each list of ADTs for this dialect excluding reserved words
- for list in self.stdlib_adts_db[dialect]:
- adts_set.update(set(list).difference(reswords_set))
- #
- # compose modules set
- modules_set = set()
- # add each list of builtins for this dialect excluding builtins
- for list in self.stdlib_modules_db[dialect]:
- modules_set.update(set(list).difference(builtins_set))
- #
- # compose types set
- types_set = set()
- # add each list of types for this dialect excluding builtins
- for list in self.stdlib_types_db[dialect]:
- types_set.update(set(list).difference(builtins_set))
- #
- # compose procedures set
- procedures_set = set()
- # add each list of procedures for this dialect excluding builtins
- for list in self.stdlib_procedures_db[dialect]:
- procedures_set.update(set(list).difference(builtins_set))
- #
- # compose variables set
- variables_set = set()
- # add each list of variables for this dialect excluding builtins
- for list in self.stdlib_variables_db[dialect]:
- variables_set.update(set(list).difference(builtins_set))
- #
- # compose constants set
- constants_set = set()
- # add each list of constants for this dialect excluding builtins
- for list in self.stdlib_constants_db[dialect]:
- constants_set.update(set(list).difference(builtins_set))
- #
- # update lexer state
- self.dialect = dialect
- self.lexemes_to_reject = lexemes_to_reject_set
- self.reserved_words = reswords_set
- self.builtins = builtins_set
- self.pseudo_builtins = pseudo_builtins_set
- self.adts = adts_set
- self.modules = modules_set
- self.types = types_set
- self.procedures = procedures_set
- self.variables = variables_set
- self.constants = constants_set
- #
- # if __debug__:
- # print 'exiting set_dialect'
- # print ' self.dialect: ', self.dialect
- # print ' self.lexemes_to_reject: ', self.lexemes_to_reject
- # print ' self.reserved_words: ', self.reserved_words
- # print ' self.builtins: ', self.builtins
- # print ' self.pseudo_builtins: ', self.pseudo_builtins
- # print ' self.adts: ', self.adts
- # print ' self.modules: ', self.modules
- # print ' self.types: ', self.types
- # print ' self.procedures: ', self.procedures
- # print ' self.variables: ', self.variables
- # print ' self.types: ', self.types
- # print ' self.constants: ', self.constants
- # Extracts a dialect name from a dialect tag comment string and checks
- # the extracted name against known dialects. If a match is found, the
- # matching name is returned, otherwise dialect id 'unknown' is returned
- def get_dialect_from_dialect_tag(self, dialect_tag):
- #
- # if __debug__:
- # print 'entered get_dialect_from_dialect_tag with arg: ', dialect_tag
- #
- # constants
- left_tag_delim = '(*!'
- right_tag_delim = '*)'
- left_tag_delim_len = len(left_tag_delim)
- right_tag_delim_len = len(right_tag_delim)
- indicator_start = left_tag_delim_len
- indicator_end = -(right_tag_delim_len)
- #
- # check comment string for dialect indicator
- if len(dialect_tag) > (left_tag_delim_len + right_tag_delim_len) \
- and dialect_tag.startswith(left_tag_delim) \
- and dialect_tag.endswith(right_tag_delim):
- #
- # if __debug__:
- # print 'dialect tag found'
- #
- # extract dialect indicator
- indicator = dialect_tag[indicator_start:indicator_end]
- #
- # if __debug__:
- # print 'extracted: ', indicator
- #
- # check against known dialects
- for index in range(1, len(self.dialects)):
- #
- # if __debug__:
- # print 'dialects[', index, ']: ', self.dialects[index]
- #
- if indicator == self.dialects[index]:
- #
- # if __debug__:
- # print 'matching dialect found'
- #
- # indicator matches known dialect
- return indicator
- else:
- # indicator does not match any dialect
- return 'unknown' # default
- else:
- # invalid indicator string
- return 'unknown' # default
- # intercept the token stream, modify token attributes and return them
- def get_tokens_unprocessed(self, text):
- for index, token, value in RegexLexer.get_tokens_unprocessed(self, text):
- #
- # check for dialect tag if dialect has not been set by tag
- if not self.dialect_set_by_tag and token == Comment.Special:
- indicated_dialect = self.get_dialect_from_dialect_tag(value)
- if indicated_dialect != 'unknown':
- # token is a dialect indicator
- # reset reserved words and builtins
- self.set_dialect(indicated_dialect)
- self.dialect_set_by_tag = True
- #
- # check for reserved words, predefined and stdlib identifiers
- if token is Name:
- if value in self.reserved_words:
- token = Keyword.Reserved
- if self.algol_publication_mode:
- value = value.lower()
- #
- elif value in self.builtins:
- token = Name.Builtin
- if self.algol_publication_mode:
- value = value.lower()
- #
- elif value in self.pseudo_builtins:
- token = Name.Builtin.Pseudo
- if self.algol_publication_mode:
- value = value.lower()
- #
- elif value in self.adts:
- if not self.treat_stdlib_adts_as_builtins:
- token = Name.Namespace
- else:
- token = Name.Builtin.Pseudo
- if self.algol_publication_mode:
- value = value.lower()
- #
- elif value in self.modules:
- token = Name.Namespace
- #
- elif value in self.types:
- token = Name.Class
- #
- elif value in self.procedures:
- token = Name.Function
- #
- elif value in self.variables:
- token = Name.Variable
- #
- elif value in self.constants:
- token = Name.Constant
- #
- elif token in Number:
- #
- # mark prefix number literals as error for PIM and ISO dialects
- if self.dialect not in ('unknown', 'm2r10', 'objm2'):
- if "'" in value or value[0:2] in ('0b', '0x', '0u'):
- token = Error
- #
- elif self.dialect in ('m2r10', 'objm2'):
- # mark base-8 number literals as errors for M2 R10 and ObjM2
- if token is Number.Oct:
- token = Error
- # mark suffix base-16 literals as errors for M2 R10 and ObjM2
- elif token is Number.Hex and 'H' in value:
- token = Error
- # mark real numbers with E as errors for M2 R10 and ObjM2
- elif token is Number.Float and 'E' in value:
- token = Error
- #
- elif token in Comment:
- #
- # mark single line comment as error for PIM and ISO dialects
- if token is Comment.Single:
- if self.dialect not in ('unknown', 'm2r10', 'objm2'):
- token = Error
- #
- if token is Comment.Preproc:
- # mark ISO pragma as error for PIM dialects
- if value.startswith('<*') and \
- self.dialect.startswith('m2pim'):
- token = Error
- # mark PIM pragma as comment for other dialects
- elif value.startswith('(*$') and \
- self.dialect != 'unknown' and \
- not self.dialect.startswith('m2pim'):
- token = Comment.Multiline
- #
- else: # token is neither Name nor Comment
- #
- # mark lexemes matching the dialect's error token set as errors
- if value in self.lexemes_to_reject:
- token = Error
- #
- # substitute lexemes when in Algol mode
- if self.algol_publication_mode:
- if value == '#':
- value = u'≠'
- elif value == '<=':
- value = u'≤'
- elif value == '>=':
- value = u'≥'
- elif value == '==':
- value = u'≡'
- elif value == '*.':
- value = u'•'
- # return result
- yield index, token, value
|