php.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334
  1. """
  2. pygments.lexers.php
  3. ~~~~~~~~~~~~~~~~~~~
  4. Lexers for PHP and related languages.
  5. :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
  6. :license: BSD, see LICENSE for details.
  7. """
  8. import re
  9. from pygments.lexer import Lexer, RegexLexer, include, bygroups, default, \
  10. using, this, words, do_insertions, line_re
  11. from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
  12. Number, Punctuation, Other, Generic
  13. from pygments.util import get_bool_opt, get_list_opt, shebang_matches
  14. __all__ = ['ZephirLexer', 'PsyshConsoleLexer', 'PhpLexer']
  15. class ZephirLexer(RegexLexer):
  16. """
  17. For Zephir language source code.
  18. Zephir is a compiled high level language aimed
  19. to the creation of C-extensions for PHP.
  20. """
  21. name = 'Zephir'
  22. url = 'http://zephir-lang.com/'
  23. aliases = ['zephir']
  24. filenames = ['*.zep']
  25. version_added = '2.0'
  26. zephir_keywords = ['fetch', 'echo', 'isset', 'empty']
  27. zephir_type = ['bit', 'bits', 'string']
  28. flags = re.DOTALL | re.MULTILINE
  29. tokens = {
  30. 'commentsandwhitespace': [
  31. (r'\s+', Text),
  32. (r'//.*?\n', Comment.Single),
  33. (r'/\*.*?\*/', Comment.Multiline)
  34. ],
  35. 'slashstartsregex': [
  36. include('commentsandwhitespace'),
  37. (r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
  38. r'([gim]+\b|\B)', String.Regex, '#pop'),
  39. (r'/', Operator, '#pop'),
  40. default('#pop')
  41. ],
  42. 'badregex': [
  43. (r'\n', Text, '#pop')
  44. ],
  45. 'root': [
  46. (r'^(?=\s|/)', Text, 'slashstartsregex'),
  47. include('commentsandwhitespace'),
  48. (r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|'
  49. r'(<<|>>>?|==?|!=?|->|[-<>+*%&|^/])=?', Operator, 'slashstartsregex'),
  50. (r'[{(\[;,]', Punctuation, 'slashstartsregex'),
  51. (r'[})\].]', Punctuation),
  52. (r'(for|in|while|do|break|return|continue|switch|case|default|if|else|loop|'
  53. r'require|inline|throw|try|catch|finally|new|delete|typeof|instanceof|void|'
  54. r'namespace|use|extends|this|fetch|isset|unset|echo|fetch|likely|unlikely|'
  55. r'empty)\b', Keyword, 'slashstartsregex'),
  56. (r'(var|let|with|function)\b', Keyword.Declaration, 'slashstartsregex'),
  57. (r'(abstract|boolean|bool|char|class|const|double|enum|export|extends|final|'
  58. r'native|goto|implements|import|int|string|interface|long|ulong|char|uchar|'
  59. r'float|unsigned|private|protected|public|short|static|self|throws|reverse|'
  60. r'transient|volatile|readonly)\b', Keyword.Reserved),
  61. (r'(true|false|null|undefined)\b', Keyword.Constant),
  62. (r'(Array|Boolean|Date|_REQUEST|_COOKIE|_SESSION|'
  63. r'_GET|_POST|_SERVER|this|stdClass|range|count|iterator|'
  64. r'window)\b', Name.Builtin),
  65. (r'[$a-zA-Z_][\w\\]*', Name.Other),
  66. (r'[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
  67. (r'0x[0-9a-fA-F]+', Number.Hex),
  68. (r'[0-9]+', Number.Integer),
  69. (r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
  70. (r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
  71. ]
  72. }
  73. class PsyshConsoleLexer(Lexer):
  74. """
  75. For PsySH console output, such as:
  76. .. sourcecode:: psysh
  77. >>> $greeting = function($name): string {
  78. ... return "Hello, {$name}";
  79. ... };
  80. => Closure($name): string {#2371 …3}
  81. >>> $greeting('World')
  82. => "Hello, World"
  83. """
  84. name = 'PsySH console session for PHP'
  85. url = 'https://psysh.org/'
  86. aliases = ['psysh']
  87. version_added = '2.7'
  88. def __init__(self, **options):
  89. options['startinline'] = True
  90. Lexer.__init__(self, **options)
  91. def get_tokens_unprocessed(self, text):
  92. phplexer = PhpLexer(**self.options)
  93. curcode = ''
  94. insertions = []
  95. for match in line_re.finditer(text):
  96. line = match.group()
  97. if line.startswith('>>> ') or line.startswith('... '):
  98. insertions.append((len(curcode),
  99. [(0, Generic.Prompt, line[:4])]))
  100. curcode += line[4:]
  101. elif line.rstrip() == '...':
  102. insertions.append((len(curcode),
  103. [(0, Generic.Prompt, '...')]))
  104. curcode += line[3:]
  105. else:
  106. if curcode:
  107. yield from do_insertions(
  108. insertions, phplexer.get_tokens_unprocessed(curcode))
  109. curcode = ''
  110. insertions = []
  111. yield match.start(), Generic.Output, line
  112. if curcode:
  113. yield from do_insertions(insertions,
  114. phplexer.get_tokens_unprocessed(curcode))
  115. class PhpLexer(RegexLexer):
  116. """
  117. For PHP source code.
  118. For PHP embedded in HTML, use the `HtmlPhpLexer`.
  119. Additional options accepted:
  120. `startinline`
  121. If given and ``True`` the lexer starts highlighting with
  122. php code (i.e.: no starting ``<?php`` required). The default
  123. is ``False``.
  124. `funcnamehighlighting`
  125. If given and ``True``, highlight builtin function names
  126. (default: ``True``).
  127. `disabledmodules`
  128. If given, must be a list of module names whose function names
  129. should not be highlighted. By default all modules are highlighted
  130. except the special ``'unknown'`` module that includes functions
  131. that are known to php but are undocumented.
  132. To get a list of allowed modules have a look into the
  133. `_php_builtins` module:
  134. .. sourcecode:: pycon
  135. >>> from pygments.lexers._php_builtins import MODULES
  136. >>> MODULES.keys()
  137. ['PHP Options/Info', 'Zip', 'dba', ...]
  138. In fact the names of those modules match the module names from
  139. the php documentation.
  140. """
  141. name = 'PHP'
  142. url = 'https://www.php.net/'
  143. aliases = ['php', 'php3', 'php4', 'php5']
  144. filenames = ['*.php', '*.php[345]', '*.inc']
  145. mimetypes = ['text/x-php']
  146. version_added = ''
  147. # Note that a backslash is included, PHP uses a backslash as a namespace
  148. # separator.
  149. _ident_inner = r'(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*'
  150. # But not inside strings.
  151. _ident_nons = r'(?:[_a-z]|[^\x00-\x7f])(?:\w|[^\x00-\x7f])*'
  152. flags = re.IGNORECASE | re.DOTALL | re.MULTILINE
  153. tokens = {
  154. 'root': [
  155. (r'<\?(php)?', Comment.Preproc, 'php'),
  156. (r'[^<]+', Other),
  157. (r'<', Other)
  158. ],
  159. 'php': [
  160. (r'\?>', Comment.Preproc, '#pop'),
  161. (r'(<<<)([\'"]?)(' + _ident_nons + r')(\2\n.*?\n\s*)(\3)(;?)(\n)',
  162. bygroups(String, String, String.Delimiter, String, String.Delimiter,
  163. Punctuation, Text)),
  164. (r'\s+', Text),
  165. (r'#\[', Punctuation, 'attribute'),
  166. (r'#.*?\n', Comment.Single),
  167. (r'//.*?\n', Comment.Single),
  168. # put the empty comment here, it is otherwise seen as
  169. # the start of a docstring
  170. (r'/\*\*/', Comment.Multiline),
  171. (r'/\*\*.*?\*/', String.Doc),
  172. (r'/\*.*?\*/', Comment.Multiline),
  173. (r'(->|::)(\s*)(' + _ident_nons + ')',
  174. bygroups(Operator, Text, Name.Attribute)),
  175. (r'[~!%^&*+=|:.<>/@-]+', Operator),
  176. (r'\?', Operator), # don't add to the charclass above!
  177. (r'[\[\]{}();,]+', Punctuation),
  178. (r'(new)(\s+)(class)\b', bygroups(Keyword, Text, Keyword)),
  179. (r'(class)(\s+)', bygroups(Keyword, Text), 'classname'),
  180. (r'(function)(\s*)(?=\()', bygroups(Keyword, Text)),
  181. (r'(function)(\s+)(&?)(\s*)',
  182. bygroups(Keyword, Text, Operator, Text), 'functionname'),
  183. (r'(const)(\s+)(' + _ident_inner + ')',
  184. bygroups(Keyword, Text, Name.Constant)),
  185. (r'(and|E_PARSE|old_function|E_ERROR|or|as|E_WARNING|parent|'
  186. r'eval|PHP_OS|break|exit|case|extends|PHP_VERSION|cfunction|'
  187. r'FALSE|print|for|require|continue|foreach|require_once|'
  188. r'declare|return|default|static|do|switch|die|stdClass|'
  189. r'echo|else|TRUE|elseif|var|empty|if|xor|enddeclare|include|'
  190. r'virtual|endfor|include_once|while|endforeach|global|'
  191. r'endif|list|endswitch|new|endwhile|not|'
  192. r'array|E_ALL|NULL|final|php_user_filter|interface|'
  193. r'implements|public|private|protected|abstract|clone|try|'
  194. r'catch|throw|this|use|namespace|trait|yield|'
  195. r'finally|match)\b', Keyword),
  196. (r'(true|false|null)\b', Keyword.Constant),
  197. include('magicconstants'),
  198. (r'\$\{', Name.Variable, 'variablevariable'),
  199. (r'\$+' + _ident_inner, Name.Variable),
  200. (_ident_inner, Name.Other),
  201. (r'(\d+\.\d*|\d*\.\d+)(e[+-]?[0-9]+)?', Number.Float),
  202. (r'\d+e[+-]?[0-9]+', Number.Float),
  203. (r'0[0-7]+', Number.Oct),
  204. (r'0x[a-f0-9]+', Number.Hex),
  205. (r'\d+', Number.Integer),
  206. (r'0b[01]+', Number.Bin),
  207. (r"'([^'\\]*(?:\\.[^'\\]*)*)'", String.Single),
  208. (r'`([^`\\]*(?:\\.[^`\\]*)*)`', String.Backtick),
  209. (r'"', String.Double, 'string'),
  210. ],
  211. 'variablevariable': [
  212. (r'\}', Name.Variable, '#pop'),
  213. include('php')
  214. ],
  215. 'magicfuncs': [
  216. # source: http://php.net/manual/en/language.oop5.magic.php
  217. (words((
  218. '__construct', '__destruct', '__call', '__callStatic', '__get', '__set',
  219. '__isset', '__unset', '__sleep', '__wakeup', '__toString', '__invoke',
  220. '__set_state', '__clone', '__debugInfo',), suffix=r'\b'),
  221. Name.Function.Magic),
  222. ],
  223. 'magicconstants': [
  224. # source: http://php.net/manual/en/language.constants.predefined.php
  225. (words((
  226. '__LINE__', '__FILE__', '__DIR__', '__FUNCTION__', '__CLASS__',
  227. '__TRAIT__', '__METHOD__', '__NAMESPACE__',),
  228. suffix=r'\b'),
  229. Name.Constant),
  230. ],
  231. 'classname': [
  232. (_ident_inner, Name.Class, '#pop')
  233. ],
  234. 'functionname': [
  235. include('magicfuncs'),
  236. (_ident_inner, Name.Function, '#pop'),
  237. default('#pop')
  238. ],
  239. 'string': [
  240. (r'"', String.Double, '#pop'),
  241. (r'[^{$"\\]+', String.Double),
  242. (r'\\([nrt"$\\]|[0-7]{1,3}|x[0-9a-f]{1,2})', String.Escape),
  243. (r'\$' + _ident_nons + r'(\[\S+?\]|->' + _ident_nons + ')?',
  244. String.Interpol),
  245. (r'(\{\$\{)(.*?)(\}\})',
  246. bygroups(String.Interpol, using(this, _startinline=True),
  247. String.Interpol)),
  248. (r'(\{)(\$.*?)(\})',
  249. bygroups(String.Interpol, using(this, _startinline=True),
  250. String.Interpol)),
  251. (r'(\$\{)(\S+)(\})',
  252. bygroups(String.Interpol, Name.Variable, String.Interpol)),
  253. (r'[${\\]', String.Double)
  254. ],
  255. 'attribute': [
  256. (r'\]', Punctuation, '#pop'),
  257. (r'\(', Punctuation, 'attributeparams'),
  258. (_ident_inner, Name.Decorator),
  259. include('php')
  260. ],
  261. 'attributeparams': [
  262. (r'\)', Punctuation, '#pop'),
  263. include('php')
  264. ],
  265. }
  266. def __init__(self, **options):
  267. self.funcnamehighlighting = get_bool_opt(
  268. options, 'funcnamehighlighting', True)
  269. self.disabledmodules = get_list_opt(
  270. options, 'disabledmodules', ['unknown'])
  271. self.startinline = get_bool_opt(options, 'startinline', False)
  272. # private option argument for the lexer itself
  273. if '_startinline' in options:
  274. self.startinline = options.pop('_startinline')
  275. # collect activated functions in a set
  276. self._functions = set()
  277. if self.funcnamehighlighting:
  278. from pygments.lexers._php_builtins import MODULES
  279. for key, value in MODULES.items():
  280. if key not in self.disabledmodules:
  281. self._functions.update(value)
  282. RegexLexer.__init__(self, **options)
  283. def get_tokens_unprocessed(self, text):
  284. stack = ['root']
  285. if self.startinline:
  286. stack.append('php')
  287. for index, token, value in \
  288. RegexLexer.get_tokens_unprocessed(self, text, stack):
  289. if token is Name.Other:
  290. if value in self._functions:
  291. yield index, Name.Builtin, value
  292. continue
  293. yield index, token, value
  294. def analyse_text(text):
  295. if shebang_matches(text, r'php'):
  296. return True
  297. rv = 0.0
  298. if re.search(r'<\?(?!xml)', text):
  299. rv += 0.3
  300. return rv