cddl.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. """
  2. pygments.lexers.cddl
  3. ~~~~~~~~~~~~~~~~~~~~
  4. Lexer for the Concise data definition language (CDDL), a notational
  5. convention to express CBOR and JSON data structures.
  6. More information:
  7. https://datatracker.ietf.org/doc/rfc8610/
  8. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  9. :license: BSD, see LICENSE for details.
  10. """
  11. from pygments.lexer import RegexLexer, bygroups, include, words
  12. from pygments.token import Comment, Error, Keyword, Name, Number, Operator, \
  13. Punctuation, String, Whitespace
  14. __all__ = ['CddlLexer']
  15. class CddlLexer(RegexLexer):
  16. """
  17. Lexer for CDDL definitions.
  18. """
  19. name = "CDDL"
  20. url = 'https://datatracker.ietf.org/doc/rfc8610/'
  21. aliases = ["cddl"]
  22. filenames = ["*.cddl"]
  23. mimetypes = ["text/x-cddl"]
  24. version_added = '2.8'
  25. _prelude_types = [
  26. "any",
  27. "b64legacy",
  28. "b64url",
  29. "bigfloat",
  30. "bigint",
  31. "bignint",
  32. "biguint",
  33. "bool",
  34. "bstr",
  35. "bytes",
  36. "cbor-any",
  37. "decfrac",
  38. "eb16",
  39. "eb64legacy",
  40. "eb64url",
  41. "encoded-cbor",
  42. "false",
  43. "float",
  44. "float16",
  45. "float16-32",
  46. "float32",
  47. "float32-64",
  48. "float64",
  49. "int",
  50. "integer",
  51. "mime-message",
  52. "nil",
  53. "nint",
  54. "null",
  55. "number",
  56. "regexp",
  57. "tdate",
  58. "text",
  59. "time",
  60. "true",
  61. "tstr",
  62. "uint",
  63. "undefined",
  64. "unsigned",
  65. "uri",
  66. ]
  67. _controls = [
  68. ".and",
  69. ".bits",
  70. ".cbor",
  71. ".cborseq",
  72. ".default",
  73. ".eq",
  74. ".ge",
  75. ".gt",
  76. ".le",
  77. ".lt",
  78. ".ne",
  79. ".regexp",
  80. ".size",
  81. ".within",
  82. ]
  83. _re_id = (
  84. r"[$@A-Z_a-z]"
  85. r"(?:[\-\.]+(?=[$@0-9A-Z_a-z])|[$@0-9A-Z_a-z])*"
  86. )
  87. # While the spec reads more like "an int must not start with 0" we use a
  88. # lookahead here that says "after a 0 there must be no digit". This makes the
  89. # '0' the invalid character in '01', which looks nicer when highlighted.
  90. _re_uint = r"(?:0b[01]+|0x[0-9a-fA-F]+|[1-9]\d*|0(?!\d))"
  91. _re_int = r"-?" + _re_uint
  92. tokens = {
  93. "commentsandwhitespace": [(r"\s+", Whitespace), (r";.+$", Comment.Single)],
  94. "root": [
  95. include("commentsandwhitespace"),
  96. # tag types
  97. (rf"#(\d\.{_re_uint})?", Keyword.Type), # type or any
  98. # occurrence
  99. (
  100. rf"({_re_uint})?(\*)({_re_uint})?",
  101. bygroups(Number, Operator, Number),
  102. ),
  103. (r"\?|\+", Operator), # occurrence
  104. (r"\^", Operator), # cuts
  105. (r"(\.\.\.|\.\.)", Operator), # rangeop
  106. (words(_controls, suffix=r"\b"), Operator.Word), # ctlops
  107. # into choice op
  108. (rf"&(?=\s*({_re_id}|\())", Operator),
  109. (rf"~(?=\s*{_re_id})", Operator), # unwrap op
  110. (r"//|/(?!/)", Operator), # double und single slash
  111. (r"=>|/==|/=|=", Operator),
  112. (r"[\[\]{}\(\),<>:]", Punctuation),
  113. # Bytestrings
  114. (r"(b64)(')", bygroups(String.Affix, String.Single), "bstrb64url"),
  115. (r"(h)(')", bygroups(String.Affix, String.Single), "bstrh"),
  116. (r"'", String.Single, "bstr"),
  117. # Barewords as member keys (must be matched before values, types, typenames,
  118. # groupnames).
  119. # Token type is String as barewords are always interpreted as such.
  120. (rf"({_re_id})(\s*)(:)",
  121. bygroups(String, Whitespace, Punctuation)),
  122. # predefined types
  123. (words(_prelude_types, prefix=r"(?![\-_$@])\b", suffix=r"\b(?![\-_$@])"),
  124. Name.Builtin),
  125. # user-defined groupnames, typenames
  126. (_re_id, Name.Class),
  127. # values
  128. (r"0b[01]+", Number.Bin),
  129. (r"0o[0-7]+", Number.Oct),
  130. (r"0x[0-9a-fA-F]+(\.[0-9a-fA-F]+)?p[+-]?\d+", Number.Hex), # hexfloat
  131. (r"0x[0-9a-fA-F]+", Number.Hex), # hex
  132. # Float
  133. (rf"{_re_int}(?=(\.\d|e[+-]?\d))(?:\.\d+)?(?:e[+-]?\d+)?",
  134. Number.Float),
  135. # Int
  136. (_re_int, Number.Integer),
  137. (r'"(\\\\|\\"|[^"])*"', String.Double),
  138. ],
  139. "bstrb64url": [
  140. (r"'", String.Single, "#pop"),
  141. include("commentsandwhitespace"),
  142. (r"\\.", String.Escape),
  143. (r"[0-9a-zA-Z\-_=]+", String.Single),
  144. (r".", Error),
  145. # (r";.+$", Token.Other),
  146. ],
  147. "bstrh": [
  148. (r"'", String.Single, "#pop"),
  149. include("commentsandwhitespace"),
  150. (r"\\.", String.Escape),
  151. (r"[0-9a-fA-F]+", String.Single),
  152. (r".", Error),
  153. ],
  154. "bstr": [
  155. (r"'", String.Single, "#pop"),
  156. (r"\\.", String.Escape),
  157. (r"[^'\\]+", String.Single),
  158. ],
  159. }