lilypond.py 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. """
  2. pygments.lexers.lilypond
  3. ~~~~~~~~~~~~~~~~~~~~~~~~
  4. Lexer for LilyPond.
  5. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import bygroups, default, inherit, words
  10. from pygments.lexers.lisp import SchemeLexer
  11. from pygments.lexers._lilypond_builtins import (
  12. keywords, pitch_language_names, clefs, scales, repeat_types, units,
  13. chord_modifiers, pitches, music_functions, dynamics, articulations,
  14. music_commands, markup_commands, grobs, translators, contexts,
  15. context_properties, grob_properties, scheme_functions, paper_variables,
  16. header_variables
  17. )
  18. from pygments.token import Token
  19. __all__ = ["LilyPondLexer"]
  20. # In LilyPond, (unquoted) name tokens only contain letters, hyphens,
  21. # and underscores, where hyphens and underscores must not start or end
  22. # a name token.
  23. #
  24. # Note that many of the entities listed as LilyPond built-in keywords
  25. # (in file `_lilypond_builtins.py`) are only valid if surrounded by
  26. # double quotes, for example, 'hufnagel-fa1'. This means that
  27. # `NAME_END_RE` doesn't apply to such entities in valid LilyPond code.
  28. NAME_END_RE = r"(?=\d|[^\w\-]|[\-_][\W\d])"
  29. def builtin_words(names, backslash, suffix=NAME_END_RE):
  30. prefix = r"[\-_^]?"
  31. if backslash == "mandatory":
  32. prefix += r"\\"
  33. elif backslash == "optional":
  34. prefix += r"\\?"
  35. else:
  36. assert backslash == "disallowed"
  37. return words(names, prefix, suffix)
  38. class LilyPondLexer(SchemeLexer):
  39. """
  40. Lexer for input to LilyPond, a text-based music typesetter.
  41. .. important::
  42. This lexer is meant to be used in conjunction with the ``lilypond`` style.
  43. """
  44. name = 'LilyPond'
  45. url = 'https://lilypond.org'
  46. aliases = ['lilypond']
  47. filenames = ['*.ly']
  48. mimetypes = []
  49. version_added = '2.11'
  50. flags = re.DOTALL | re.MULTILINE
  51. # Because parsing LilyPond input is very tricky (and in fact
  52. # impossible without executing LilyPond when there is Scheme
  53. # code in the file), this lexer does not try to recognize
  54. # lexical modes. Instead, it catches the most frequent pieces
  55. # of syntax, and, above all, knows about many kinds of builtins.
  56. # In order to parse embedded Scheme, this lexer subclasses the SchemeLexer.
  57. # It redefines the 'root' state entirely, and adds a rule for #{ #}
  58. # to the 'value' state. The latter is used to parse a Scheme expression
  59. # after #.
  60. def get_tokens_unprocessed(self, text):
  61. """Highlight Scheme variables as LilyPond builtins when applicable."""
  62. for index, token, value in super().get_tokens_unprocessed(text):
  63. if token is Token.Name.Function or token is Token.Name.Variable:
  64. if value in scheme_functions:
  65. token = Token.Name.Builtin.SchemeFunction
  66. elif token is Token.Name.Builtin:
  67. token = Token.Name.Builtin.SchemeBuiltin
  68. yield index, token, value
  69. tokens = {
  70. "root": [
  71. # Whitespace.
  72. (r"\s+", Token.Text.Whitespace),
  73. # Multi-line comments. These are non-nestable.
  74. (r"%\{.*?%\}", Token.Comment.Multiline),
  75. # Simple comments.
  76. (r"%.*?$", Token.Comment.Single),
  77. # End of embedded LilyPond in Scheme.
  78. (r"#\}", Token.Punctuation, "#pop"),
  79. # Embedded Scheme, starting with # ("delayed"),
  80. # or $ (immediate). #@ and and $@ are the lesser known
  81. # "list splicing operators".
  82. (r"[#$]@?", Token.Punctuation, "value"),
  83. # Any kind of punctuation:
  84. # - sequential music: { },
  85. # - parallel music: << >>,
  86. # - voice separator: << \\ >>,
  87. # - chord: < >,
  88. # - bar check: |,
  89. # - dot in nested properties: \revert NoteHead.color,
  90. # - equals sign in assignments and lists for various commands:
  91. # \override Stem.color = red,
  92. # - comma as alternative syntax for lists: \time 3,3,2 4/4,
  93. # - colon in tremolos: c:32,
  94. # - double hyphen and underscore in lyrics: li -- ly -- pond __
  95. # (which must be preceded by ASCII whitespace)
  96. (r"""(?x)
  97. \\\\
  98. | (?<= \s ) (?: -- | __ )
  99. | [{}<>=.,:|]
  100. """, Token.Punctuation),
  101. # Pitches, with optional octavation marks, octave check,
  102. # and forced or cautionary accidental.
  103. (words(pitches, suffix=r"=?[',]*!?\??" + NAME_END_RE), Token.Pitch),
  104. # Strings, optionally with direction specifier.
  105. (r'[\-_^]?"', Token.String, "string"),
  106. # Numbers.
  107. (r"-?\d+\.\d+", Token.Number.Float), # 5. and .5 are not allowed
  108. (r"-?\d+/\d+", Token.Number.Fraction),
  109. # Integers, or durations with optional augmentation dots.
  110. # We have no way to distinguish these, so we highlight
  111. # them all as numbers.
  112. #
  113. # Normally, there is a space before the integer (being an
  114. # argument to a music function), which we check here. The
  115. # case without a space is handled below (as a fingering
  116. # number).
  117. (r"""(?x)
  118. (?<= \s ) -\d+
  119. | (?: (?: \d+ | \\breve | \\longa | \\maxima )
  120. \.* )
  121. """, Token.Number),
  122. # Separates duration and duration multiplier highlighted as fraction.
  123. (r"\*", Token.Number),
  124. # Ties, slurs, manual beams.
  125. (r"[~()[\]]", Token.Name.Builtin.Articulation),
  126. # Predefined articulation shortcuts. A direction specifier is
  127. # required here.
  128. (r"[\-_^][>^_!.\-+]", Token.Name.Builtin.Articulation),
  129. # Fingering numbers, string numbers.
  130. (r"[\-_^]?\\?\d+", Token.Name.Builtin.Articulation),
  131. # Builtins.
  132. (builtin_words(keywords, "mandatory"), Token.Keyword),
  133. (builtin_words(pitch_language_names, "disallowed"), Token.Name.PitchLanguage),
  134. (builtin_words(clefs, "disallowed"), Token.Name.Builtin.Clef),
  135. (builtin_words(scales, "mandatory"), Token.Name.Builtin.Scale),
  136. (builtin_words(repeat_types, "disallowed"), Token.Name.Builtin.RepeatType),
  137. (builtin_words(units, "mandatory"), Token.Number),
  138. (builtin_words(chord_modifiers, "disallowed"), Token.ChordModifier),
  139. (builtin_words(music_functions, "mandatory"), Token.Name.Builtin.MusicFunction),
  140. (builtin_words(dynamics, "mandatory"), Token.Name.Builtin.Dynamic),
  141. # Those like slurs that don't take a backslash are covered above.
  142. (builtin_words(articulations, "mandatory"), Token.Name.Builtin.Articulation),
  143. (builtin_words(music_commands, "mandatory"), Token.Name.Builtin.MusicCommand),
  144. (builtin_words(markup_commands, "mandatory"), Token.Name.Builtin.MarkupCommand),
  145. (builtin_words(grobs, "disallowed"), Token.Name.Builtin.Grob),
  146. (builtin_words(translators, "disallowed"), Token.Name.Builtin.Translator),
  147. # Optional backslash because of \layout { \context { \Score ... } }.
  148. (builtin_words(contexts, "optional"), Token.Name.Builtin.Context),
  149. (builtin_words(context_properties, "disallowed"), Token.Name.Builtin.ContextProperty),
  150. (builtin_words(grob_properties, "disallowed"),
  151. Token.Name.Builtin.GrobProperty,
  152. "maybe-subproperties"),
  153. # Optional backslashes here because output definitions are wrappers
  154. # around modules. Concretely, you can do, e.g.,
  155. # \paper { oddHeaderMarkup = \evenHeaderMarkup }
  156. (builtin_words(paper_variables, "optional"), Token.Name.Builtin.PaperVariable),
  157. (builtin_words(header_variables, "optional"), Token.Name.Builtin.HeaderVariable),
  158. # Other backslashed-escaped names (like dereferencing a
  159. # music variable), possibly with a direction specifier.
  160. (r"[\-_^]?\\.+?" + NAME_END_RE, Token.Name.BackslashReference),
  161. # Definition of a variable. Support assignments to alist keys
  162. # (myAlist.my-key.my-nested-key = \markup \spam \eggs).
  163. (r"""(?x)
  164. (?: [^\W\d] | - )+
  165. (?= (?: [^\W\d] | [\-.] )* \s* = )
  166. """, Token.Name.Lvalue),
  167. # Virtually everything can appear in markup mode, so we highlight
  168. # as text. Try to get a complete word, or we might wrongly lex
  169. # a suffix that happens to be a builtin as a builtin (e.g., "myStaff").
  170. (r"([^\W\d]|-)+?" + NAME_END_RE, Token.Text),
  171. (r".", Token.Text),
  172. ],
  173. "string": [
  174. (r'"', Token.String, "#pop"),
  175. (r'\\.', Token.String.Escape),
  176. (r'[^\\"]+', Token.String),
  177. ],
  178. "value": [
  179. # Scan a LilyPond value, then pop back since we had a
  180. # complete expression.
  181. (r"#\{", Token.Punctuation, ("#pop", "root")),
  182. inherit,
  183. ],
  184. # Grob subproperties are undeclared and it would be tedious
  185. # to maintain them by hand. Instead, this state allows recognizing
  186. # everything that looks like a-known-property.foo.bar-baz as
  187. # one single property name.
  188. "maybe-subproperties": [
  189. (r"\s+", Token.Text.Whitespace),
  190. (r"(\.)((?:[^\W\d]|-)+?)" + NAME_END_RE,
  191. bygroups(Token.Punctuation, Token.Name.Builtin.GrobProperty)),
  192. default("#pop"),
  193. ]
  194. }