EregToPregFixer.php 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. <?php
  2. /*
  3. * This file is part of the PHP CS utility.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * This source file is subject to the MIT license that is bundled
  8. * with this source code in the file LICENSE.
  9. */
  10. namespace Symfony\CS\Fixer\Contrib;
  11. use Symfony\CS\AbstractFixer;
  12. use Symfony\CS\Tokenizer\Tokens;
  13. use Symfony\CS\Utils;
  14. /**
  15. * @author Matteo Beccati <matteo@beccati.com>
  16. */
  17. class EregToPregFixer extends AbstractFixer
  18. {
  19. /**
  20. * @var array the list of the ext/ereg function names, their preg equivalent and the preg modifier(s), if any
  21. * all condensed in an array of arrays.
  22. */
  23. private static $functions = array(
  24. array('ereg', 'preg_match', ''),
  25. array('eregi', 'preg_match', 'i'),
  26. array('ereg_replace', 'preg_replace', ''),
  27. array('eregi_replace', 'preg_replace', 'i'),
  28. array('split', 'preg_split', ''),
  29. array('spliti', 'preg_split', 'i'),
  30. );
  31. /**
  32. * @var array the list of preg delimiters, in order of preference.
  33. */
  34. private static $delimiters = array('/', '#', '!');
  35. /**
  36. * {@inheritdoc}
  37. */
  38. public function fix(\SplFileInfo $file, $content)
  39. {
  40. if (!$this->cursoryMatch($content)) {
  41. return $content;
  42. }
  43. $tokens = Tokens::fromCode($content);
  44. $end = $tokens->count() - 1;
  45. foreach (self::$functions as $map) {
  46. // the sequence is the function name, followed by "(" and a quoted string
  47. $seq = array(array(T_STRING, $map[0]), '(', array(T_CONSTANT_ENCAPSED_STRING));
  48. $currIndex = 0;
  49. while (null !== $currIndex) {
  50. $match = $tokens->findSequence($seq, $currIndex, $end, false);
  51. // did we find a match?
  52. if (null === $match) {
  53. break;
  54. }
  55. // findSequence also returns the tokens, but we're only interested in the indexes, i.e.:
  56. // 0 => function name,
  57. // 1 => bracket "("
  58. // 2 => quoted string passed as 1st parameter
  59. $match = array_keys($match);
  60. // advance tokenizer cursor
  61. $currIndex = $match[2];
  62. // ensure it's a function call (not a method / static call)
  63. $prev = $tokens->getPrevMeaningfulToken($match[0]);
  64. if (null === $prev || $tokens[$prev]->isGivenKind(array(T_OBJECT_OPERATOR, T_DOUBLE_COLON))) {
  65. continue;
  66. }
  67. // ensure the first parameter is just a string (e.g. has nothing appended)
  68. $next = $tokens->getNextMeaningfulToken($match[2]);
  69. if (null === $next || !$tokens[$next]->equalsAny(array(',', ')'))) {
  70. continue;
  71. }
  72. // convert to PCRE
  73. $string = substr($tokens[$match[2]]->getContent(), 1, -1);
  74. $quote = substr($tokens[$match[2]]->getContent(), 0, 1);
  75. $delim = $this->getBestDelimiter($string);
  76. $preg = $delim.addcslashes($string, $delim).$delim.'D'.$map[2];
  77. // check if the preg is valid
  78. if (!$this->checkPreg($preg)) {
  79. continue;
  80. }
  81. // modify function and argument
  82. $tokens[$match[2]]->setContent($quote.$preg.$quote);
  83. $tokens[$match[0]]->setContent($map[1]);
  84. }
  85. }
  86. return $tokens->generateCode();
  87. }
  88. /**
  89. * {@inheritdoc}
  90. */
  91. public function getDescription()
  92. {
  93. return 'Replace deprecated ereg regular expression functions with preg. Warning! This could change code behavior.';
  94. }
  95. /**
  96. * Check the validity of a PCRE.
  97. *
  98. * @param string $pattern the regular expression
  99. *
  100. * @return bool
  101. */
  102. private function checkPreg($pattern)
  103. {
  104. return false !== @preg_match($pattern, '');
  105. }
  106. /**
  107. * Get the delimiter that would require the least escaping in a regular expression.
  108. *
  109. * @param string $pattern the regular expression
  110. *
  111. * @return string the preg delimiter
  112. */
  113. private function getBestDelimiter($pattern)
  114. {
  115. // try do find something that's not used
  116. $delimiters = array();
  117. foreach (self::$delimiters as $k => $d) {
  118. if (false === strpos($pattern, $d)) {
  119. return $d;
  120. }
  121. $delimiters[$d] = array(substr_count($pattern, $d), $k);
  122. }
  123. // return the least used delimiter, using the position in the list as a tie breaker
  124. uasort($delimiters, function ($a, $b) {
  125. if ($a[0] === $b[0]) {
  126. return Utils::cmpInt($a, $b);
  127. }
  128. return $a[0] < $b[0] ? -1 : 1;
  129. });
  130. return key($delimiters);
  131. }
  132. /**
  133. * Perform a quick search to see if any ext/ereg functions are used.
  134. *
  135. * @param string $content the content itself
  136. *
  137. * @return bool
  138. */
  139. private function cursoryMatch($content)
  140. {
  141. // just searching for "ereg" or "split" will do, since all the function names start with either of them
  142. return false !== stripos($content, 'ereg') || false !== stripos($content, 'split');
  143. }
  144. }