bibtex.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. # -*- coding: utf-8 -*-
  2. """
  3. pygments.lexers.bibtex
  4. ~~~~~~~~~~~~~~~~~~~~~~
  5. Lexers for BibTeX bibliography data and styles
  6. :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
  7. :license: BSD, see LICENSE for details.
  8. """
  9. import re
  10. from pygments.lexer import RegexLexer, ExtendedRegexLexer, include, default, \
  11. words
  12. from pygments.token import Name, Comment, String, Error, Number, Text, \
  13. Keyword, Punctuation
  14. __all__ = ['BibTeXLexer', 'BSTLexer']
  15. class BibTeXLexer(ExtendedRegexLexer):
  16. """
  17. A lexer for BibTeX bibliography data format.
  18. .. versionadded:: 2.2
  19. """
  20. name = 'BibTeX'
  21. aliases = ['bib', 'bibtex']
  22. filenames = ['*.bib']
  23. mimetypes = ["text/x-bibtex"]
  24. flags = re.IGNORECASE
  25. ALLOWED_CHARS = r'@!$&*+\-./:;<>?\[\\\]^`|~'
  26. IDENTIFIER = '[{}][{}]*'.format('a-z_' + ALLOWED_CHARS, r'\w' + ALLOWED_CHARS)
  27. def open_brace_callback(self, match, ctx):
  28. opening_brace = match.group()
  29. ctx.opening_brace = opening_brace
  30. yield match.start(), Punctuation, opening_brace
  31. ctx.pos = match.end()
  32. def close_brace_callback(self, match, ctx):
  33. closing_brace = match.group()
  34. if (
  35. ctx.opening_brace == '{' and closing_brace != '}' or
  36. ctx.opening_brace == '(' and closing_brace != ')'
  37. ):
  38. yield match.start(), Error, closing_brace
  39. else:
  40. yield match.start(), Punctuation, closing_brace
  41. del ctx.opening_brace
  42. ctx.pos = match.end()
  43. tokens = {
  44. 'root': [
  45. include('whitespace'),
  46. ('@comment', Comment),
  47. ('@preamble', Name.Class, ('closing-brace', 'value', 'opening-brace')),
  48. ('@string', Name.Class, ('closing-brace', 'field', 'opening-brace')),
  49. ('@' + IDENTIFIER, Name.Class,
  50. ('closing-brace', 'command-body', 'opening-brace')),
  51. ('.+', Comment),
  52. ],
  53. 'opening-brace': [
  54. include('whitespace'),
  55. (r'[{(]', open_brace_callback, '#pop'),
  56. ],
  57. 'closing-brace': [
  58. include('whitespace'),
  59. (r'[})]', close_brace_callback, '#pop'),
  60. ],
  61. 'command-body': [
  62. include('whitespace'),
  63. (r'[^\s\,\}]+', Name.Label, ('#pop', 'fields')),
  64. ],
  65. 'fields': [
  66. include('whitespace'),
  67. (',', Punctuation, 'field'),
  68. default('#pop'),
  69. ],
  70. 'field': [
  71. include('whitespace'),
  72. (IDENTIFIER, Name.Attribute, ('value', '=')),
  73. default('#pop'),
  74. ],
  75. '=': [
  76. include('whitespace'),
  77. ('=', Punctuation, '#pop'),
  78. ],
  79. 'value': [
  80. include('whitespace'),
  81. (IDENTIFIER, Name.Variable),
  82. ('"', String, 'quoted-string'),
  83. (r'\{', String, 'braced-string'),
  84. (r'[\d]+', Number),
  85. ('#', Punctuation),
  86. default('#pop'),
  87. ],
  88. 'quoted-string': [
  89. (r'\{', String, 'braced-string'),
  90. ('"', String, '#pop'),
  91. (r'[^\{\"]+', String),
  92. ],
  93. 'braced-string': [
  94. (r'\{', String, '#push'),
  95. (r'\}', String, '#pop'),
  96. (r'[^\{\}]+', String),
  97. ],
  98. 'whitespace': [
  99. (r'\s+', Text),
  100. ],
  101. }
  102. class BSTLexer(RegexLexer):
  103. """
  104. A lexer for BibTeX bibliography styles.
  105. .. versionadded:: 2.2
  106. """
  107. name = 'BST'
  108. aliases = ['bst', 'bst-pybtex']
  109. filenames = ['*.bst']
  110. flags = re.IGNORECASE | re.MULTILINE
  111. tokens = {
  112. 'root': [
  113. include('whitespace'),
  114. (words(['read', 'sort']), Keyword),
  115. (words(['execute', 'integers', 'iterate', 'reverse', 'strings']),
  116. Keyword, ('group')),
  117. (words(['function', 'macro']), Keyword, ('group', 'group')),
  118. (words(['entry']), Keyword, ('group', 'group', 'group')),
  119. ],
  120. 'group': [
  121. include('whitespace'),
  122. (r'\{', Punctuation, ('#pop', 'group-end', 'body')),
  123. ],
  124. 'group-end': [
  125. include('whitespace'),
  126. (r'\}', Punctuation, '#pop'),
  127. ],
  128. 'body': [
  129. include('whitespace'),
  130. (r"\'[^#\"\{\}\s]+", Name.Function),
  131. (r'[^#\"\{\}\s]+\$', Name.Builtin),
  132. (r'[^#\"\{\}\s]+', Name.Variable),
  133. (r'"[^\"]*"', String),
  134. (r'#-?\d+', Number),
  135. (r'\{', Punctuation, ('group-end', 'body')),
  136. default('#pop'),
  137. ],
  138. 'whitespace': [
  139. (r'\s+', Text),
  140. ('%.*?$', Comment.SingleLine),
  141. ],
  142. }