ecl.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. """
  2. pygments.lexers.ecl
  3. ~~~~~~~~~~~~~~~~~~~
  4. Lexers for the ECL language.
  5. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import RegexLexer, include, bygroups, words
  10. from pygments.token import Comment, Operator, Keyword, Name, String, \
  11. Number, Punctuation, Whitespace
  12. __all__ = ['ECLLexer']
  13. class ECLLexer(RegexLexer):
  14. """
  15. Lexer for the declarative big-data ECL language.
  16. """
  17. name = 'ECL'
  18. url = 'https://hpccsystems.com/training/documentation/ecl-language-reference/html'
  19. aliases = ['ecl']
  20. filenames = ['*.ecl']
  21. mimetypes = ['application/x-ecl']
  22. version_added = '1.5'
  23. flags = re.IGNORECASE | re.MULTILINE
  24. tokens = {
  25. 'root': [
  26. include('whitespace'),
  27. include('statements'),
  28. ],
  29. 'whitespace': [
  30. (r'\s+', Whitespace),
  31. (r'\/\/.*', Comment.Single),
  32. (r'/(\\\n)?\*(.|\n)*?\*(\\\n)?/', Comment.Multiline),
  33. ],
  34. 'statements': [
  35. include('types'),
  36. include('keywords'),
  37. include('functions'),
  38. include('hash'),
  39. (r'"', String, 'string'),
  40. (r'\'', String, 'string'),
  41. (r'(\d+\.\d*|\.\d+|\d+)e[+-]?\d+[lu]*', Number.Float),
  42. (r'(\d+\.\d*|\.\d+|\d+f)f?', Number.Float),
  43. (r'0x[0-9a-f]+[lu]*', Number.Hex),
  44. (r'0[0-7]+[lu]*', Number.Oct),
  45. (r'\d+[lu]*', Number.Integer),
  46. (r'[~!%^&*+=|?:<>/-]+', Operator),
  47. (r'[{}()\[\],.;]', Punctuation),
  48. (r'[a-z_]\w*', Name),
  49. ],
  50. 'hash': [
  51. (r'^#.*$', Comment.Preproc),
  52. ],
  53. 'types': [
  54. (r'(RECORD|END)\D', Keyword.Declaration),
  55. (r'((?:ASCII|BIG_ENDIAN|BOOLEAN|DATA|DECIMAL|EBCDIC|INTEGER|PATTERN|'
  56. r'QSTRING|REAL|RECORD|RULE|SET OF|STRING|TOKEN|UDECIMAL|UNICODE|'
  57. r'UNSIGNED|VARSTRING|VARUNICODE)\d*)(\s+)',
  58. bygroups(Keyword.Type, Whitespace)),
  59. ],
  60. 'keywords': [
  61. (words((
  62. 'APPLY', 'ASSERT', 'BUILD', 'BUILDINDEX', 'EVALUATE', 'FAIL',
  63. 'KEYDIFF', 'KEYPATCH', 'LOADXML', 'NOTHOR', 'NOTIFY', 'OUTPUT',
  64. 'PARALLEL', 'SEQUENTIAL', 'SOAPCALL', 'CHECKPOINT', 'DEPRECATED',
  65. 'FAILCODE', 'FAILMESSAGE', 'FAILURE', 'GLOBAL', 'INDEPENDENT',
  66. 'ONWARNING', 'PERSIST', 'PRIORITY', 'RECOVERY', 'STORED', 'SUCCESS',
  67. 'WAIT', 'WHEN'), suffix=r'\b'),
  68. Keyword.Reserved),
  69. # These are classed differently, check later
  70. (words((
  71. 'ALL', 'AND', 'ANY', 'AS', 'ATMOST', 'BEFORE', 'BEGINC++', 'BEST',
  72. 'BETWEEN', 'CASE', 'CONST', 'COUNTER', 'CSV', 'DESCEND', 'ENCRYPT',
  73. 'ENDC++', 'ENDMACRO', 'EXCEPT', 'EXCLUSIVE', 'EXPIRE', 'EXPORT',
  74. 'EXTEND', 'FALSE', 'FEW', 'FIRST', 'FLAT', 'FULL', 'FUNCTION',
  75. 'GROUP', 'HEADER', 'HEADING', 'HOLE', 'IFBLOCK', 'IMPORT', 'IN',
  76. 'JOINED', 'KEEP', 'KEYED', 'LAST', 'LEFT', 'LIMIT', 'LOAD', 'LOCAL',
  77. 'LOCALE', 'LOOKUP', 'MACRO', 'MANY', 'MAXCOUNT', 'MAXLENGTH',
  78. 'MIN SKEW', 'MODULE', 'INTERFACE', 'NAMED', 'NOCASE', 'NOROOT',
  79. 'NOSCAN', 'NOSORT', 'NOT', 'OF', 'ONLY', 'OPT', 'OR', 'OUTER',
  80. 'OVERWRITE', 'PACKED', 'PARTITION', 'PENALTY', 'PHYSICALLENGTH',
  81. 'PIPE', 'QUOTE', 'RELATIONSHIP', 'REPEAT', 'RETURN', 'RIGHT',
  82. 'SCAN', 'SELF', 'SEPARATOR', 'SERVICE', 'SHARED', 'SKEW', 'SKIP',
  83. 'SQL', 'STORE', 'TERMINATOR', 'THOR', 'THRESHOLD', 'TOKEN',
  84. 'TRANSFORM', 'TRIM', 'TRUE', 'TYPE', 'UNICODEORDER', 'UNSORTED',
  85. 'VALIDATE', 'VIRTUAL', 'WHOLE', 'WILD', 'WITHIN', 'XML', 'XPATH',
  86. '__COMPRESSED__'), suffix=r'\b'),
  87. Keyword.Reserved),
  88. ],
  89. 'functions': [
  90. (words((
  91. 'ABS', 'ACOS', 'ALLNODES', 'ASCII', 'ASIN', 'ASSTRING', 'ATAN',
  92. 'ATAN2', 'AVE', 'CASE', 'CHOOSE', 'CHOOSEN', 'CHOOSESETS',
  93. 'CLUSTERSIZE', 'COMBINE', 'CORRELATION', 'COS', 'COSH', 'COUNT',
  94. 'COVARIANCE', 'CRON', 'DATASET', 'DEDUP', 'DEFINE', 'DENORMALIZE',
  95. 'DISTRIBUTE', 'DISTRIBUTED', 'DISTRIBUTION', 'EBCDIC', 'ENTH',
  96. 'ERROR', 'EVALUATE', 'EVENT', 'EVENTEXTRA', 'EVENTNAME', 'EXISTS',
  97. 'EXP', 'FAILCODE', 'FAILMESSAGE', 'FETCH', 'FROMUNICODE',
  98. 'GETISVALID', 'GLOBAL', 'GRAPH', 'GROUP', 'HASH', 'HASH32',
  99. 'HASH64', 'HASHCRC', 'HASHMD5', 'HAVING', 'IF', 'INDEX',
  100. 'INTFORMAT', 'ISVALID', 'ITERATE', 'JOIN', 'KEYUNICODE', 'LENGTH',
  101. 'LIBRARY', 'LIMIT', 'LN', 'LOCAL', 'LOG', 'LOOP', 'MAP', 'MATCHED',
  102. 'MATCHLENGTH', 'MATCHPOSITION', 'MATCHTEXT', 'MATCHUNICODE', 'MAX',
  103. 'MERGE', 'MERGEJOIN', 'MIN', 'NOLOCAL', 'NONEMPTY', 'NORMALIZE',
  104. 'PARSE', 'PIPE', 'POWER', 'PRELOAD', 'PROCESS', 'PROJECT', 'PULL',
  105. 'RANDOM', 'RANGE', 'RANK', 'RANKED', 'REALFORMAT', 'RECORDOF',
  106. 'REGEXFIND', 'REGEXREPLACE', 'REGROUP', 'REJECTED', 'ROLLUP',
  107. 'ROUND', 'ROUNDUP', 'ROW', 'ROWDIFF', 'SAMPLE', 'SET', 'SIN',
  108. 'SINH', 'SIZEOF', 'SOAPCALL', 'SORT', 'SORTED', 'SQRT', 'STEPPED',
  109. 'STORED', 'SUM', 'TABLE', 'TAN', 'TANH', 'THISNODE', 'TOPN',
  110. 'TOUNICODE', 'TRANSFER', 'TRIM', 'TRUNCATE', 'TYPEOF', 'UNGROUP',
  111. 'UNICODEORDER', 'VARIANCE', 'WHICH', 'WORKUNIT', 'XMLDECODE',
  112. 'XMLENCODE', 'XMLTEXT', 'XMLUNICODE'), suffix=r'\b'),
  113. Name.Function),
  114. ],
  115. 'string': [
  116. (r'"', String, '#pop'),
  117. (r'\'', String, '#pop'),
  118. (r'[^"\']+', String),
  119. ],
  120. }
  121. def analyse_text(text):
  122. """This is very difficult to guess relative to other business languages.
  123. -> in conjunction with BEGIN/END seems relatively rare though."""
  124. result = 0
  125. if '->' in text:
  126. result += 0.01
  127. if 'BEGIN' in text:
  128. result += 0.01
  129. if 'END' in text:
  130. result += 0.01
  131. return result