EregToPregFixer.php 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. <?php
  2. declare(strict_types=1);
  3. /*
  4. * This file is part of PHP CS Fixer.
  5. *
  6. * (c) Fabien Potencier <fabien@symfony.com>
  7. * Dariusz Rumiński <dariusz.ruminski@gmail.com>
  8. *
  9. * This source file is subject to the MIT license that is bundled
  10. * with this source code in the file LICENSE.
  11. */
  12. namespace PhpCsFixer\Fixer\Alias;
  13. use PhpCsFixer\AbstractFixer;
  14. use PhpCsFixer\FixerDefinition\CodeSample;
  15. use PhpCsFixer\FixerDefinition\FixerDefinition;
  16. use PhpCsFixer\FixerDefinition\FixerDefinitionInterface;
  17. use PhpCsFixer\Preg;
  18. use PhpCsFixer\PregException;
  19. use PhpCsFixer\Tokenizer\Analyzer\FunctionsAnalyzer;
  20. use PhpCsFixer\Tokenizer\Token;
  21. use PhpCsFixer\Tokenizer\Tokens;
  22. /**
  23. * @author Matteo Beccati <matteo@beccati.com>
  24. */
  25. final class EregToPregFixer extends AbstractFixer
  26. {
  27. /**
  28. * @var list<array<int, string>> the list of the ext/ereg function names, their preg equivalent and the preg modifier(s), if any
  29. * all condensed in an array of arrays
  30. */
  31. private static array $functions = [
  32. ['ereg', 'preg_match', ''],
  33. ['eregi', 'preg_match', 'i'],
  34. ['ereg_replace', 'preg_replace', ''],
  35. ['eregi_replace', 'preg_replace', 'i'],
  36. ['split', 'preg_split', ''],
  37. ['spliti', 'preg_split', 'i'],
  38. ];
  39. /**
  40. * @var list<string> the list of preg delimiters, in order of preference
  41. */
  42. private static array $delimiters = ['/', '#', '!'];
  43. public function getDefinition(): FixerDefinitionInterface
  44. {
  45. return new FixerDefinition(
  46. 'Replace deprecated `ereg` regular expression functions with `preg`.',
  47. [new CodeSample("<?php \$x = ereg('[A-Z]');\n")],
  48. null,
  49. 'Risky if the `ereg` function is overridden.'
  50. );
  51. }
  52. /**
  53. * {@inheritdoc}
  54. *
  55. * Must run after NoUselessConcatOperatorFixer.
  56. */
  57. public function getPriority(): int
  58. {
  59. return 0;
  60. }
  61. public function isCandidate(Tokens $tokens): bool
  62. {
  63. return $tokens->isTokenKindFound(T_STRING);
  64. }
  65. public function isRisky(): bool
  66. {
  67. return true;
  68. }
  69. protected function applyFix(\SplFileInfo $file, Tokens $tokens): void
  70. {
  71. $end = $tokens->count() - 1;
  72. $functionsAnalyzer = new FunctionsAnalyzer();
  73. foreach (self::$functions as $map) {
  74. // the sequence is the function name, followed by "(" and a quoted string
  75. $seq = [[T_STRING, $map[0]], '(', [T_CONSTANT_ENCAPSED_STRING]];
  76. $currIndex = 0;
  77. while (true) {
  78. $match = $tokens->findSequence($seq, $currIndex, $end, false);
  79. // did we find a match?
  80. if (null === $match) {
  81. break;
  82. }
  83. // findSequence also returns the tokens, but we're only interested in the indices, i.e.:
  84. // 0 => function name,
  85. // 1 => parenthesis "("
  86. // 2 => quoted string passed as 1st parameter
  87. $match = array_keys($match);
  88. // advance tokenizer cursor
  89. $currIndex = $match[2];
  90. if (!$functionsAnalyzer->isGlobalFunctionCall($tokens, $match[0])) {
  91. continue;
  92. }
  93. // ensure the first parameter is just a string (e.g. has nothing appended)
  94. $next = $tokens->getNextMeaningfulToken($match[2]);
  95. if (null === $next || !$tokens[$next]->equalsAny([',', ')'])) {
  96. continue;
  97. }
  98. // convert to PCRE
  99. $regexTokenContent = $tokens[$match[2]]->getContent();
  100. if ('b' === $regexTokenContent[0] || 'B' === $regexTokenContent[0]) {
  101. $quote = $regexTokenContent[1];
  102. $prefix = $regexTokenContent[0];
  103. $string = substr($regexTokenContent, 2, -1);
  104. } else {
  105. $quote = $regexTokenContent[0];
  106. $prefix = '';
  107. $string = substr($regexTokenContent, 1, -1);
  108. }
  109. $delim = $this->getBestDelimiter($string);
  110. $preg = $delim.addcslashes($string, $delim).$delim.'D'.$map[2];
  111. // check if the preg is valid
  112. if (!$this->checkPreg($preg)) {
  113. continue;
  114. }
  115. // modify function and argument
  116. $tokens[$match[0]] = new Token([T_STRING, $map[1]]);
  117. $tokens[$match[2]] = new Token([T_CONSTANT_ENCAPSED_STRING, $prefix.$quote.$preg.$quote]);
  118. }
  119. }
  120. }
  121. /**
  122. * Check the validity of a PCRE.
  123. *
  124. * @param string $pattern the regular expression
  125. */
  126. private function checkPreg(string $pattern): bool
  127. {
  128. try {
  129. Preg::match($pattern, '');
  130. return true;
  131. } catch (PregException $e) {
  132. return false;
  133. }
  134. }
  135. /**
  136. * Get the delimiter that would require the least escaping in a regular expression.
  137. *
  138. * @param string $pattern the regular expression
  139. *
  140. * @return string the preg delimiter
  141. */
  142. private function getBestDelimiter(string $pattern): string
  143. {
  144. // try to find something that's not used
  145. $delimiters = [];
  146. foreach (self::$delimiters as $k => $d) {
  147. if (!str_contains($pattern, $d)) {
  148. return $d;
  149. }
  150. $delimiters[$d] = [substr_count($pattern, $d), $k];
  151. }
  152. // return the least used delimiter, using the position in the list as a tiebreaker
  153. uasort($delimiters, static function (array $a, array $b): int {
  154. if ($a[0] === $b[0]) {
  155. return $a[1] <=> $b[1];
  156. }
  157. return $a[0] <=> $b[0];
  158. });
  159. return key($delimiters);
  160. }
  161. }