EregToPregFixer.php 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. <?php
  2. declare(strict_types=1);
  3. /*
  4. * This file is part of PHP CS Fixer.
  5. *
  6. * (c) Fabien Potencier <fabien@symfony.com>
  7. * Dariusz Rumiński <dariusz.ruminski@gmail.com>
  8. *
  9. * This source file is subject to the MIT license that is bundled
  10. * with this source code in the file LICENSE.
  11. */
  12. namespace PhpCsFixer\Fixer\Alias;
  13. use PhpCsFixer\AbstractFixer;
  14. use PhpCsFixer\FixerDefinition\CodeSample;
  15. use PhpCsFixer\FixerDefinition\FixerDefinition;
  16. use PhpCsFixer\FixerDefinition\FixerDefinitionInterface;
  17. use PhpCsFixer\Preg;
  18. use PhpCsFixer\PregException;
  19. use PhpCsFixer\Tokenizer\Analyzer\FunctionsAnalyzer;
  20. use PhpCsFixer\Tokenizer\Token;
  21. use PhpCsFixer\Tokenizer\Tokens;
  22. /**
  23. * @author Matteo Beccati <matteo@beccati.com>
  24. */
  25. final class EregToPregFixer extends AbstractFixer
  26. {
  27. /**
  28. * @var list<array<int, string>> the list of the ext/ereg function names, their preg equivalent and the preg modifier(s), if any
  29. * all condensed in an array of arrays
  30. */
  31. private static array $functions = [
  32. ['ereg', 'preg_match', ''],
  33. ['eregi', 'preg_match', 'i'],
  34. ['ereg_replace', 'preg_replace', ''],
  35. ['eregi_replace', 'preg_replace', 'i'],
  36. ['split', 'preg_split', ''],
  37. ['spliti', 'preg_split', 'i'],
  38. ];
  39. /**
  40. * @var list<string> the list of preg delimiters, in order of preference
  41. */
  42. private static array $delimiters = ['/', '#', '!'];
  43. /**
  44. * {@inheritdoc}
  45. */
  46. public function getDefinition(): FixerDefinitionInterface
  47. {
  48. return new FixerDefinition(
  49. 'Replace deprecated `ereg` regular expression functions with `preg`.',
  50. [new CodeSample("<?php \$x = ereg('[A-Z]');\n")],
  51. null,
  52. 'Risky if the `ereg` function is overridden.'
  53. );
  54. }
  55. /**
  56. * {@inheritdoc}
  57. *
  58. * Must run after NoUselessConcatOperatorFixer.
  59. */
  60. public function getPriority(): int
  61. {
  62. return 0;
  63. }
  64. /**
  65. * {@inheritdoc}
  66. */
  67. public function isCandidate(Tokens $tokens): bool
  68. {
  69. return $tokens->isTokenKindFound(T_STRING);
  70. }
  71. /**
  72. * {@inheritdoc}
  73. */
  74. public function isRisky(): bool
  75. {
  76. return true;
  77. }
  78. /**
  79. * {@inheritdoc}
  80. */
  81. protected function applyFix(\SplFileInfo $file, Tokens $tokens): void
  82. {
  83. $end = $tokens->count() - 1;
  84. $functionsAnalyzer = new FunctionsAnalyzer();
  85. foreach (self::$functions as $map) {
  86. // the sequence is the function name, followed by "(" and a quoted string
  87. $seq = [[T_STRING, $map[0]], '(', [T_CONSTANT_ENCAPSED_STRING]];
  88. $currIndex = 0;
  89. while (true) {
  90. $match = $tokens->findSequence($seq, $currIndex, $end, false);
  91. // did we find a match?
  92. if (null === $match) {
  93. break;
  94. }
  95. // findSequence also returns the tokens, but we're only interested in the indices, i.e.:
  96. // 0 => function name,
  97. // 1 => bracket "("
  98. // 2 => quoted string passed as 1st parameter
  99. $match = array_keys($match);
  100. // advance tokenizer cursor
  101. $currIndex = $match[2];
  102. if (!$functionsAnalyzer->isGlobalFunctionCall($tokens, $match[0])) {
  103. continue;
  104. }
  105. // ensure the first parameter is just a string (e.g. has nothing appended)
  106. $next = $tokens->getNextMeaningfulToken($match[2]);
  107. if (null === $next || !$tokens[$next]->equalsAny([',', ')'])) {
  108. continue;
  109. }
  110. // convert to PCRE
  111. $regexTokenContent = $tokens[$match[2]]->getContent();
  112. if ('b' === $regexTokenContent[0] || 'B' === $regexTokenContent[0]) {
  113. $quote = $regexTokenContent[1];
  114. $prefix = $regexTokenContent[0];
  115. $string = substr($regexTokenContent, 2, -1);
  116. } else {
  117. $quote = $regexTokenContent[0];
  118. $prefix = '';
  119. $string = substr($regexTokenContent, 1, -1);
  120. }
  121. $delim = $this->getBestDelimiter($string);
  122. $preg = $delim.addcslashes($string, $delim).$delim.'D'.$map[2];
  123. // check if the preg is valid
  124. if (!$this->checkPreg($preg)) {
  125. continue;
  126. }
  127. // modify function and argument
  128. $tokens[$match[0]] = new Token([T_STRING, $map[1]]);
  129. $tokens[$match[2]] = new Token([T_CONSTANT_ENCAPSED_STRING, $prefix.$quote.$preg.$quote]);
  130. }
  131. }
  132. }
  133. /**
  134. * Check the validity of a PCRE.
  135. *
  136. * @param string $pattern the regular expression
  137. */
  138. private function checkPreg(string $pattern): bool
  139. {
  140. try {
  141. Preg::match($pattern, '');
  142. return true;
  143. } catch (PregException $e) {
  144. return false;
  145. }
  146. }
  147. /**
  148. * Get the delimiter that would require the least escaping in a regular expression.
  149. *
  150. * @param string $pattern the regular expression
  151. *
  152. * @return string the preg delimiter
  153. */
  154. private function getBestDelimiter(string $pattern): string
  155. {
  156. // try to find something that's not used
  157. $delimiters = [];
  158. foreach (self::$delimiters as $k => $d) {
  159. if (!str_contains($pattern, $d)) {
  160. return $d;
  161. }
  162. $delimiters[$d] = [substr_count($pattern, $d), $k];
  163. }
  164. // return the least used delimiter, using the position in the list as a tiebreaker
  165. uasort($delimiters, static function (array $a, array $b): int {
  166. if ($a[0] === $b[0]) {
  167. return $a[1] <=> $b[1];
  168. }
  169. return $a[0] <=> $b[0];
  170. });
  171. return key($delimiters);
  172. }
  173. }