bibtex.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. """
  2. pygments.lexers.bibtex
  3. ~~~~~~~~~~~~~~~~~~~~~~
  4. Lexers for BibTeX bibliography data and styles
  5. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import RegexLexer, ExtendedRegexLexer, include, default, \
  10. words
  11. from pygments.token import Name, Comment, String, Error, Number, Keyword, \
  12. Punctuation, Whitespace
  13. __all__ = ['BibTeXLexer', 'BSTLexer']
  14. class BibTeXLexer(ExtendedRegexLexer):
  15. """
  16. A lexer for BibTeX bibliography data format.
  17. """
  18. name = 'BibTeX'
  19. aliases = ['bibtex', 'bib']
  20. filenames = ['*.bib']
  21. mimetypes = ["text/x-bibtex"]
  22. version_added = '2.2'
  23. flags = re.IGNORECASE
  24. url = 'https://texfaq.org/FAQ-BibTeXing'
  25. ALLOWED_CHARS = r'@!$&*+\-./:;<>?\[\\\]^`|~'
  26. IDENTIFIER = '[{}][{}]*'.format('a-z_' + ALLOWED_CHARS, r'\w' + ALLOWED_CHARS)
  27. def open_brace_callback(self, match, ctx):
  28. opening_brace = match.group()
  29. ctx.opening_brace = opening_brace
  30. yield match.start(), Punctuation, opening_brace
  31. ctx.pos = match.end()
  32. def close_brace_callback(self, match, ctx):
  33. closing_brace = match.group()
  34. if (
  35. ctx.opening_brace == '{' and closing_brace != '}' or
  36. ctx.opening_brace == '(' and closing_brace != ')'
  37. ):
  38. yield match.start(), Error, closing_brace
  39. else:
  40. yield match.start(), Punctuation, closing_brace
  41. del ctx.opening_brace
  42. ctx.pos = match.end()
  43. tokens = {
  44. 'root': [
  45. include('whitespace'),
  46. (r'@comment(?!ary)', Comment),
  47. ('@preamble', Name.Class, ('closing-brace', 'value', 'opening-brace')),
  48. ('@string', Name.Class, ('closing-brace', 'field', 'opening-brace')),
  49. ('@' + IDENTIFIER, Name.Class,
  50. ('closing-brace', 'command-body', 'opening-brace')),
  51. ('.+', Comment),
  52. ],
  53. 'opening-brace': [
  54. include('whitespace'),
  55. (r'[{(]', open_brace_callback, '#pop'),
  56. ],
  57. 'closing-brace': [
  58. include('whitespace'),
  59. (r'[})]', close_brace_callback, '#pop'),
  60. ],
  61. 'command-body': [
  62. include('whitespace'),
  63. (r'[^\s\,\}]+', Name.Label, ('#pop', 'fields')),
  64. ],
  65. 'fields': [
  66. include('whitespace'),
  67. (',', Punctuation, 'field'),
  68. default('#pop'),
  69. ],
  70. 'field': [
  71. include('whitespace'),
  72. (IDENTIFIER, Name.Attribute, ('value', '=')),
  73. default('#pop'),
  74. ],
  75. '=': [
  76. include('whitespace'),
  77. ('=', Punctuation, '#pop'),
  78. ],
  79. 'value': [
  80. include('whitespace'),
  81. (IDENTIFIER, Name.Variable),
  82. ('"', String, 'quoted-string'),
  83. (r'\{', String, 'braced-string'),
  84. (r'[\d]+', Number),
  85. ('#', Punctuation),
  86. default('#pop'),
  87. ],
  88. 'quoted-string': [
  89. (r'\{', String, 'braced-string'),
  90. ('"', String, '#pop'),
  91. (r'[^\{\"]+', String),
  92. ],
  93. 'braced-string': [
  94. (r'\{', String, '#push'),
  95. (r'\}', String, '#pop'),
  96. (r'[^\{\}]+', String),
  97. ],
  98. 'whitespace': [
  99. (r'\s+', Whitespace),
  100. ],
  101. }
  102. class BSTLexer(RegexLexer):
  103. """
  104. A lexer for BibTeX bibliography styles.
  105. """
  106. name = 'BST'
  107. aliases = ['bst', 'bst-pybtex']
  108. filenames = ['*.bst']
  109. version_added = '2.2'
  110. flags = re.IGNORECASE | re.MULTILINE
  111. url = 'https://texfaq.org/FAQ-BibTeXing'
  112. tokens = {
  113. 'root': [
  114. include('whitespace'),
  115. (words(['read', 'sort']), Keyword),
  116. (words(['execute', 'integers', 'iterate', 'reverse', 'strings']),
  117. Keyword, ('group')),
  118. (words(['function', 'macro']), Keyword, ('group', 'group')),
  119. (words(['entry']), Keyword, ('group', 'group', 'group')),
  120. ],
  121. 'group': [
  122. include('whitespace'),
  123. (r'\{', Punctuation, ('#pop', 'group-end', 'body')),
  124. ],
  125. 'group-end': [
  126. include('whitespace'),
  127. (r'\}', Punctuation, '#pop'),
  128. ],
  129. 'body': [
  130. include('whitespace'),
  131. (r"\'[^#\"\{\}\s]+", Name.Function),
  132. (r'[^#\"\{\}\s]+\$', Name.Builtin),
  133. (r'[^#\"\{\}\s]+', Name.Variable),
  134. (r'"[^\"]*"', String),
  135. (r'#-?\d+', Number),
  136. (r'\{', Punctuation, ('group-end', 'body')),
  137. default('#pop'),
  138. ],
  139. 'whitespace': [
  140. (r'\s+', Whitespace),
  141. ('%.*?$', Comment.Single),
  142. ],
  143. }