ezhil.py 3.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. """
  2. pygments.lexers.ezhil
  3. ~~~~~~~~~~~~~~~~~~~~~
  4. Pygments lexers for Ezhil language.
  5. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import RegexLexer, include, words
  10. from pygments.token import Keyword, Comment, Name, String, Number, \
  11. Punctuation, Operator, Whitespace
  12. __all__ = ['EzhilLexer']
  13. class EzhilLexer(RegexLexer):
  14. """
  15. Lexer for Ezhil, a Tamil script-based programming language.
  16. """
  17. name = 'Ezhil'
  18. url = 'http://ezhillang.org'
  19. aliases = ['ezhil']
  20. filenames = ['*.n']
  21. mimetypes = ['text/x-ezhil']
  22. version_added = '2.1'
  23. # Refer to tamil.utf8.tamil_letters from open-tamil for a stricter version of this.
  24. # This much simpler version is close enough, and includes combining marks.
  25. _TALETTERS = '[a-zA-Z_]|[\u0b80-\u0bff]'
  26. tokens = {
  27. 'root': [
  28. include('keywords'),
  29. (r'#.*$', Comment.Single),
  30. (r'[@+/*,^\-%]|[!<>=]=?|&&?|\|\|?', Operator),
  31. ('இல்', Operator.Word),
  32. (words(('assert', 'max', 'min',
  33. 'நீளம்', 'சரம்_இடமாற்று', 'சரம்_கண்டுபிடி',
  34. 'பட்டியல்', 'பின்இணை', 'வரிசைப்படுத்து',
  35. 'எடு', 'தலைகீழ்', 'நீட்டிக்க', 'நுழைக்க', 'வை',
  36. 'கோப்பை_திற', 'கோப்பை_எழுது', 'கோப்பை_மூடு',
  37. 'pi', 'sin', 'cos', 'tan', 'sqrt', 'hypot', 'pow',
  38. 'exp', 'log', 'log10', 'exit',
  39. ), suffix=r'\b'), Name.Builtin),
  40. (r'(True|False)\b', Keyword.Constant),
  41. (r'[^\S\n]+', Whitespace),
  42. include('identifier'),
  43. include('literal'),
  44. (r'[(){}\[\]:;.]', Punctuation),
  45. ],
  46. 'keywords': [
  47. ('பதிப்பி|தேர்ந்தெடு|தேர்வு|ஏதேனில்|ஆனால்|இல்லைஆனால்|இல்லை|ஆக|ஒவ்வொன்றாக|இல்|வரை|செய்|முடியேனில்|பின்கொடு|முடி|நிரல்பாகம்|தொடர்|நிறுத்து|நிரல்பாகம்', Keyword),
  48. ],
  49. 'identifier': [
  50. ('(?:'+_TALETTERS+')(?:[0-9]|'+_TALETTERS+')*', Name),
  51. ],
  52. 'literal': [
  53. (r'".*?"', String),
  54. (r'\d+((\.\d*)?[eE][+-]?\d+|\.\d*)', Number.Float),
  55. (r'\d+', Number.Integer),
  56. ]
  57. }
  58. def analyse_text(text):
  59. """This language uses Tamil-script. We'll assume that if there's a
  60. decent amount of Tamil-characters, it's this language. This assumption
  61. is obviously horribly off if someone uses string literals in tamil
  62. in another language."""
  63. if len(re.findall(r'[\u0b80-\u0bff]', text)) > 10:
  64. return 0.25
  65. def __init__(self, **options):
  66. super().__init__(**options)
  67. self.encoding = options.get('encoding', 'utf-8')