Browse Source

feat: Introduce `multiline_string_to_heredoc` fixer (#7665)

Michael Voříšek 1 year ago
parent
commit
3705f919d4

+ 3 - 0
doc/rules/index.rst

@@ -861,6 +861,9 @@ String Notation
 - `heredoc_to_nowdoc <./string_notation/heredoc_to_nowdoc.rst>`_
 
   Convert ``heredoc`` to ``nowdoc`` where possible.
+- `multiline_string_to_heredoc <./string_notation/multiline_string_to_heredoc.rst>`_
+
+  Convert multiline string to ``heredoc`` or ``nowdoc``.
 - `no_binary_string <./string_notation/no_binary_string.rst>`_
 
   There should not be a binary flag before strings.

+ 45 - 0
doc/rules/string_notation/multiline_string_to_heredoc.rst

@@ -0,0 +1,45 @@
+====================================
+Rule ``multiline_string_to_heredoc``
+====================================
+
+Convert multiline string to ``heredoc`` or ``nowdoc``.
+
+Examples
+--------
+
+Example #1
+~~~~~~~~~~
+
+.. code-block:: diff
+
+   --- Original
+   +++ New
+    <?php
+   -$a = 'line1
+   -line2';
+   +$a = <<<'EOD'
+   +line1
+   +line2
+   +EOD;
+
+Example #2
+~~~~~~~~~~
+
+.. code-block:: diff
+
+   --- Original
+   +++ New
+    <?php
+   -$a = "line1
+   -{$obj->getName()}";
+   +$a = <<<EOD
+   +line1
+   +{$obj->getName()}
+   +EOD;
+References
+----------
+
+- Fixer class: `PhpCsFixer\\Fixer\\StringNotation\\MultilineStringToHeredocFixer <./../../../src/Fixer/StringNotation/MultilineStringToHeredocFixer.php>`_
+- Test class: `PhpCsFixer\\Tests\\Fixer\\StringNotation\\MultilineStringToHeredocFixerTest <./../../../tests/Fixer/StringNotation/MultilineStringToHeredocFixerTest.php>`_
+
+The test class defines officially supported behaviour. Each test case is a part of our backward compatibility promise.

+ 1 - 1
src/Fixer/StringNotation/EscapeImplicitBackslashesFixer.php

@@ -82,7 +82,7 @@ final class EscapeImplicitBackslashesFixer extends AbstractFixer implements Conf
      * {@inheritdoc}
      *
      * Must run before HeredocToNowdocFixer, SingleQuoteFixer.
-     * Must run after BacktickToShellExecFixer.
+     * Must run after BacktickToShellExecFixer, MultilineStringToHeredocFixer.
      */
     public function getPriority(): int
     {

+ 166 - 0
src/Fixer/StringNotation/MultilineStringToHeredocFixer.php

@@ -0,0 +1,166 @@
+<?php
+
+declare(strict_types=1);
+
+/*
+ * This file is part of PHP CS Fixer.
+ *
+ * (c) Fabien Potencier <fabien@symfony.com>
+ *     Dariusz Rumiński <dariusz.ruminski@gmail.com>
+ *
+ * This source file is subject to the MIT license that is bundled
+ * with this source code in the file LICENSE.
+ */
+
+namespace PhpCsFixer\Fixer\StringNotation;
+
+use PhpCsFixer\AbstractFixer;
+use PhpCsFixer\FixerDefinition\CodeSample;
+use PhpCsFixer\FixerDefinition\FixerDefinition;
+use PhpCsFixer\FixerDefinition\FixerDefinitionInterface;
+use PhpCsFixer\Preg;
+use PhpCsFixer\Tokenizer\Token;
+use PhpCsFixer\Tokenizer\Tokens;
+
+/**
+ * @author Michael Vorisek <https://github.com/mvorisek>
+ */
+final class MultilineStringToHeredocFixer extends AbstractFixer
+{
+    public function getDefinition(): FixerDefinitionInterface
+    {
+        return new FixerDefinition(
+            'Convert multiline string to `heredoc` or `nowdoc`.',
+            [
+                new CodeSample(
+                    <<<'EOD'
+                        <?php
+                        $a = 'line1
+                        line2';
+                        EOD."\n"
+                ),
+                new CodeSample(
+                    <<<'EOD'
+                        <?php
+                        $a = "line1
+                        {$obj->getName()}";
+                        EOD."\n"
+                ),
+            ]
+        );
+    }
+
+    public function isCandidate(Tokens $tokens): bool
+    {
+        return $tokens->isAnyTokenKindsFound([T_CONSTANT_ENCAPSED_STRING, T_ENCAPSED_AND_WHITESPACE]);
+    }
+
+    /**
+     * {@inheritdoc}
+     *
+     * Must run before EscapeImplicitBackslashesFixer, HeredocIndentationFixer.
+     */
+    public function getPriority(): int
+    {
+        return 16;
+    }
+
+    protected function applyFix(\SplFileInfo $file, Tokens $tokens): void
+    {
+        $inHeredoc = false;
+        $complexStringStartIndex = null;
+        foreach ($tokens as $index => $token) {
+            if ($token->isGivenKind([T_START_HEREDOC, T_END_HEREDOC])) {
+                $inHeredoc = $token->isGivenKind(T_START_HEREDOC) || !$token->isGivenKind(T_END_HEREDOC);
+
+                continue;
+            }
+
+            if (null === $complexStringStartIndex) {
+                if ($token->isGivenKind(T_CONSTANT_ENCAPSED_STRING)) {
+                    $this->convertStringToHeredoc($tokens, $index, $index);
+
+                    // skip next 2 added tokens if replaced
+                    if ($tokens[$index]->isGivenKind(T_START_HEREDOC)) {
+                        $inHeredoc = true;
+                    }
+                } elseif ($token->equalsAny(['"', 'b"', 'B"'])) {
+                    $complexStringStartIndex = $index;
+                }
+            } elseif ($token->equals('"')) {
+                $this->convertStringToHeredoc($tokens, $complexStringStartIndex, $index);
+
+                $complexStringStartIndex = null;
+            }
+        }
+    }
+
+    private function convertStringToHeredoc(Tokens $tokens, int $stringStartIndex, int $stringEndIndex): void
+    {
+        $closingMarker = 'EOD';
+
+        if ($tokens[$stringStartIndex]->isGivenKind(T_CONSTANT_ENCAPSED_STRING)) {
+            $content = $tokens[$stringStartIndex]->getContent();
+            if ('b' === strtolower(substr($content, 0, 1))) {
+                $content = substr($content, 1);
+            }
+            $isSingleQuoted = str_starts_with($content, '\'');
+            $content = substr($content, 1, -1);
+
+            if ($isSingleQuoted) {
+                $content = Preg::replace('~\\\\([\\\\\'])~', '$1', $content);
+            } else {
+                $content = Preg::replace('~(\\\\\\\\)|\\\\(")~', '$1$2', $content);
+            }
+
+            $constantStringToken = new Token([T_ENCAPSED_AND_WHITESPACE, $content."\n"]);
+        } else {
+            $content = $tokens->generatePartialCode($stringStartIndex + 1, $stringEndIndex - 1);
+            $isSingleQuoted = false;
+            $constantStringToken = null;
+        }
+
+        if (!str_contains($content, "\n") && !str_contains($content, "\r")) {
+            return;
+        }
+
+        while (Preg::match('~(^|[\r\n])\s*'.preg_quote($closingMarker, '~').'(?!\w)~', $content)) {
+            $closingMarker .= '_';
+        }
+
+        $quoting = $isSingleQuoted ? '\'' : '';
+        $heredocStartToken = new Token([T_START_HEREDOC, '<<<'.$quoting.$closingMarker.$quoting."\n"]);
+        $heredocEndToken = new Token([T_END_HEREDOC, $closingMarker]);
+
+        if (null !== $constantStringToken) {
+            $tokens->overrideRange($stringStartIndex, $stringEndIndex, [
+                $heredocStartToken,
+                $constantStringToken,
+                $heredocEndToken,
+            ]);
+        } else {
+            for ($i = $stringStartIndex + 1; $i < $stringEndIndex; ++$i) {
+                if ($tokens[$i]->isGivenKind(T_ENCAPSED_AND_WHITESPACE)) {
+                    $tokens[$i] = new Token([
+                        $tokens[$i]->getId(),
+                        Preg::replace('~(\\\\\\\\)|\\\\(")~', '$1$2', $tokens[$i]->getContent()),
+                    ]);
+                }
+            }
+
+            $tokens[$stringStartIndex] = $heredocStartToken;
+            $tokens[$stringEndIndex] = $heredocEndToken;
+            if ($tokens[$stringEndIndex - 1]->isGivenKind(T_ENCAPSED_AND_WHITESPACE)) {
+                $tokens[$stringEndIndex - 1] = new Token([
+                    $tokens[$stringEndIndex - 1]->getId(),
+                    $tokens[$stringEndIndex - 1]->getContent()."\n",
+                ]);
+            } else {
+                $tokens->insertAt($stringEndIndex, new Token([
+                    T_ENCAPSED_AND_WHITESPACE,
+                    "\n",
+                ]));
+            }
+        }
+    }
+}

+ 1 - 1
src/Fixer/Whitespace/HeredocIndentationFixer.php

@@ -72,7 +72,7 @@ final class HeredocIndentationFixer extends AbstractFixer implements Configurabl
     /**
      * {@inheritdoc}
      *
-     * Must run after BracesFixer, StatementIndentationFixer.
+     * Must run after BracesFixer, MultilineStringToHeredocFixer, StatementIndentationFixer.
      */
     public function getPriority(): int
     {

+ 4 - 0
tests/AutoReview/FixerFactoryTest.php

@@ -532,6 +532,10 @@ final class FixerFactoryTest extends TestCase
             'modernize_types_casting' => [
                 'no_unneeded_control_parentheses',
             ],
+            'multiline_string_to_heredoc' => [
+                'escape_implicit_backslashes',
+                'heredoc_indentation',
+            ],
             'multiline_whitespace_before_semicolons' => [
                 'space_after_semicolon',
             ],

+ 362 - 0
tests/Fixer/StringNotation/MultilineStringToHeredocFixerTest.php

@@ -0,0 +1,362 @@
+<?php
+
+declare(strict_types=1);
+
+/*
+ * This file is part of PHP CS Fixer.
+ *
+ * (c) Fabien Potencier <fabien@symfony.com>
+ *     Dariusz Rumiński <dariusz.ruminski@gmail.com>
+ *
+ * This source file is subject to the MIT license that is bundled
+ * with this source code in the file LICENSE.
+ */
+
+namespace PhpCsFixer\Tests\Fixer\StringNotation;
+
+use PhpCsFixer\Tests\Test\AbstractFixerTestCase;
+
+/**
+ * @internal
+ *
+ * @covers \PhpCsFixer\Fixer\StringNotation\MultilineStringToHeredocFixer
+ */
+final class MultilineStringToHeredocFixerTest extends AbstractFixerTestCase
+{
+    /**
+     * @dataProvider provideFixCases
+     */
+    public function testFix(string $expected, ?string $input = null): void
+    {
+        $this->doTest($expected, $input);
+    }
+
+    /**
+     * @return iterable<array{0: string, 1?: null|string}>
+     */
+    public static function provideFixCases(): iterable
+    {
+        yield 'empty string' => [
+            '<?php $a = \'\';',
+        ];
+
+        yield 'single line string' => [
+            '<?php $a = \'a b\';',
+        ];
+
+        yield 'single line string with "\n"' => [
+            '<?php $a = \'a\nb\';',
+        ];
+
+        yield 'simple single quoted' => [
+            <<<'EOF'
+                <?php
+                $a = <<<'EOD'
+                line1
+                line2
+                EOD;
+                EOF,
+            <<<'EOD'
+                <?php
+                $a = 'line1
+                line2';
+                EOD,
+        ];
+
+        yield 'simple double quoted' => [
+            <<<'EOF'
+                <?php
+                $a = <<<EOD
+                line1
+                line2
+                EOD;
+                EOF,
+            <<<'EOD'
+                <?php
+                $a = "line1
+                line2";
+                EOD,
+        ];
+
+        yield 'colliding closing marker - one' => [
+            <<<'EOF'
+                <?php
+                $a = <<<'EOD_'
+                line1
+                EOD
+                line2
+                EOD_;
+                EOF,
+            <<<'EOF'
+                <?php
+                $a = 'line1
+                EOD
+                line2';
+                EOF,
+        ];
+
+        yield 'colliding closing marker - two' => [
+            <<<'EOF'
+                <?php
+                $a = <<<'EOD__'
+                line1
+                EOD
+                EOD_
+                line2
+                EOD__;
+                EOF,
+            <<<'EOF'
+                <?php
+                $a = 'line1
+                EOD
+                EOD_
+                line2';
+                EOF,
+        ];
+
+        yield 'single quoted unescape' => [
+            <<<'EOF'
+                <?php
+                $a = <<<'EOD'
+                line1
+                \
+                \n
+                '
+                \\'
+                \"
+                \
+
+                EOD;
+                EOF,
+            <<<'EOD'
+                <?php
+                $a = 'line1
+                \\
+                \n
+                \'
+                \\\\\'
+                \"
+                \
+                ';
+                EOD,
+        ];
+
+        yield 'double quoted unescape' => [
+            <<<'EOF'
+                <?php
+                $a = <<<EOD
+                line1
+                \\
+                \n
+                "
+                \\\\"
+                \'
+                \
+                "{$rawPath}"
+
+                EOD;
+                EOF,
+            <<<'EOD'
+                <?php
+                $a = "line1
+                \\
+                \n
+                \"
+                \\\\\"
+                \'
+                \
+                \"{$rawPath}\"
+                ";
+                EOD,
+        ];
+
+        yield 'single quoted /w variable' => [
+            <<<'EOF'
+                <?php
+                $a = <<<'EOD'
+                line1$var
+                line2
+                EOD;
+                EOF,
+            <<<'EOD'
+                <?php
+                $a = 'line1$var
+                line2';
+                EOD,
+        ];
+
+        yield 'double quoted /w simple variable' => [
+            <<<'EOF'
+                <?php
+                $a = <<<EOD
+                line1$var
+                line2
+                EOD;
+                EOF,
+            <<<'EOD'
+                <?php
+                $a = "line1$var
+                line2";
+                EOD,
+        ];
+
+        yield 'double quoted /w simple curly variable' => [
+            <<<'EOF'
+                <?php
+                $a = <<<EOD
+                line1{$var}
+                line2
+                EOD;
+                EOF,
+            <<<'EOD'
+                <?php
+                $a = "line1{$var}
+                line2";
+                EOD,
+        ];
+
+        yield 'double quoted /w complex curly variable' => [
+            <<<'EOF'
+                <?php
+                $a = <<<EOD
+                {$arr['foo'][3]}
+                { $obj->values[3]->name }
+                {${getName()}}
+                EOD;
+                EOF,
+            <<<'EOD'
+                <?php
+                $a = "{$arr['foo'][3]}
+                { $obj->values[3]->name }
+                {${getName()}}";
+                EOD,
+        ];
+
+        yield 'test stateful fixing loop' => [
+            <<<'EOF'
+                <?php
+                <<<EOD
+                $a
+                {$b['x']}
+                EOD;
+                <<<'EOD'
+                c
+                d
+                EOD;
+
+                <<<EOD
+                $a
+                $b
+                EOD;
+                <<<EOD
+                $c
+                $d
+                EOD;
+
+                'a';
+                <<<'EOD'
+                b
+                c
+                EOD;
+
+                <<<'EOD'
+                EOD;
+                <<<EOD
+                $a $b
+                EOD;
+                <<<'EOD'
+                c d
+                EOD;
+                <<<EOD
+                $a $b
+                EOD;
+                <<<EOD
+                $a
+                $b
+                EOD;
+                <<<'EOD'
+                $c
+                $d
+                EOD;
+                EOF,
+            <<<'EOF'
+                <?php
+                "$a
+                {$b['x']}";
+                'c
+                d';
+
+                "$a
+                $b";
+                "$c
+                $d";
+
+                'a';
+                'b
+                c';
+
+                <<<'EOD'
+                EOD;
+                <<<EOD
+                $a $b
+                EOD;
+                <<<'EOD'
+                c d
+                EOD;
+                <<<EOD
+                $a $b
+                EOD;
+                <<<EOD
+                $a
+                $b
+                EOD;
+                <<<'EOD'
+                $c
+                $d
+                EOD;
+                EOF,
+        ];
+
+        yield 'simple strings prefixed with b/B' => [
+            <<<'EOF'
+                <?php
+                $a = <<<'EOD'
+                line1
+                line2
+                EOD;
+                $b = <<<EOD
+                line1
+                line2
+                EOD;
+                EOF,
+            <<<'EOD'
+                <?php
+                $a = b'line1
+                line2';
+                $b = B"line1
+                line2";
+                EOD,
+        ];
+
+        yield 'double quoted /w simple variable prefixed with b/B' => [
+            <<<'EOF'
+                <?php
+                $a = <<<EOD
+                line1$var
+                line2
+                EOD;
+                $b = <<<EOD
+                line1$var
+                line2
+                EOD;
+                EOF,
+            <<<'EOD'
+                <?php
+                $a = b"line1$var
+                line2";
+                $b = B"line1$var
+                line2";
+                EOD,
+        ];
+    }
+}

+ 20 - 0
tests/Fixtures/Integration/priority/multiline_string_to_heredoc,escape_implicit_backslashes.test

@@ -0,0 +1,20 @@
+--TEST--
+Integration of fixers: multiline_string_to_heredoc,escape_implicit_backslashes.
+--RULESET--
+{
+    "multiline_string_to_heredoc": true,
+    "escape_implicit_backslashes": {"double_quoted": false}
+}
+--EXPECT--
+<?php
+$a = " \ ";
+$b = <<<EOD
+ \\
+line2
+EOD;
+
+--INPUT--
+<?php
+$a = " \ ";
+$b = " \
+line2";

+ 30 - 0
tests/Fixtures/Integration/priority/multiline_string_to_heredoc,heredoc_indentation.test

@@ -0,0 +1,30 @@
+--TEST--
+Integration of fixers: multiline_string_to_heredoc,heredoc_indentation.
+--RULESET--
+{
+    "multiline_string_to_heredoc": true,
+    "heredoc_indentation": true
+}
+--EXPECT--
+<?php
+$a = " x ";
+$b = <<<EOD
+     x
+
+    EOD;
+$c = <<<'EOD'
+
+     x
+
+
+    EOD;
+
+--INPUT--
+<?php
+$a = " x ";
+$b = " x
+";
+$c = '
+ x
+
+';