Browse Source

minor: Parse all phpdoc types using full grammar (#7010)

Michael Voříšek 1 year ago
parent
commit
d17f722548

+ 7 - 2
src/AbstractPhpdocToTypeDeclarationFixer.php

@@ -16,6 +16,7 @@ namespace PhpCsFixer;
 
 use PhpCsFixer\DocBlock\Annotation;
 use PhpCsFixer\DocBlock\DocBlock;
+use PhpCsFixer\DocBlock\TypeExpression;
 use PhpCsFixer\Fixer\ConfigurableFixerInterface;
 use PhpCsFixer\FixerConfiguration\FixerConfigurationResolver;
 use PhpCsFixer\FixerConfiguration\FixerConfigurationResolverInterface;
@@ -31,7 +32,8 @@ use PhpCsFixer\Tokenizer\Tokens;
  */
 abstract class AbstractPhpdocToTypeDeclarationFixer extends AbstractFixer implements ConfigurableFixerInterface
 {
-    private const CLASS_REGEX = '/^\\\\?[a-zA-Z_\\x7f-\\xff](?:\\\\?[a-zA-Z0-9_\\x7f-\\xff]+)*$/';
+    private const REGEX_CLASS = '(?:\\\\?+'.TypeExpression::REGEX_IDENTIFIER
+        .'(\\\\'.TypeExpression::REGEX_IDENTIFIER.')*+)';
 
     /**
      * @var array<string, int>
@@ -168,6 +170,9 @@ abstract class AbstractPhpdocToTypeDeclarationFixer extends AbstractFixer implem
     protected function getCommonTypeFromAnnotation(Annotation $annotation, bool $isReturnType): ?array
     {
         $typesExpression = $annotation->getTypeExpression();
+        if (null === $typesExpression) {
+            return null;
+        }
 
         $commonType = $typesExpression->getCommonType();
         $isNullable = $typesExpression->allowsNull();
@@ -196,7 +201,7 @@ abstract class AbstractPhpdocToTypeDeclarationFixer extends AbstractFixer implem
             if (false === $this->configuration['scalar_types']) {
                 return null;
             }
-        } elseif (1 !== Preg::match(self::CLASS_REGEX, $commonType)) {
+        } elseif (1 !== Preg::match('/^'.self::REGEX_CLASS.'$/', $commonType)) {
             return null;
         }
 

+ 14 - 7
src/DocBlock/Annotation.php

@@ -163,9 +163,13 @@ final class Annotation
     /**
      * @internal
      */
-    public function getTypeExpression(): TypeExpression
+    public function getTypeExpression(): ?TypeExpression
     {
-        return new TypeExpression($this->getTypesContent(), $this->namespace, $this->namespaceUses);
+        $typesContent = $this->getTypesContent();
+
+        return null === $typesContent
+            ? null
+            : new TypeExpression($typesContent, $this->namespace, $this->namespaceUses);
     }
 
     /**
@@ -175,7 +179,7 @@ final class Annotation
      */
     public function getVariableName()
     {
-        $type = preg_quote($this->getTypesContent(), '/');
+        $type = preg_quote($this->getTypesContent() ?? '', '/');
         $regex = "/@{$this->tag->getName()}\\s+({$type}\\s*)?(&\\s*)?(\\.{3}\\s*)?(?<variable>\\$.+?)(?:[\\s*]|$)/";
 
         if (Preg::match($regex, $this->lines[0]->getContent(), $matches)) {
@@ -193,7 +197,10 @@ final class Annotation
     public function getTypes(): array
     {
         if (null === $this->types) {
-            $this->types = $this->getTypeExpression()->getTypes();
+            $typeExpression = $this->getTypeExpression();
+            $this->types = null === $typeExpression
+                ? []
+                : $typeExpression->getTypes();
         }
 
         return $this->types;
@@ -275,7 +282,7 @@ final class Annotation
      *
      * Be careful modifying the underlying line as that won't flush the cache.
      */
-    private function getTypesContent(): string
+    private function getTypesContent(): ?string
     {
         if (null === $this->typesContent) {
             $name = $this->getTag()->getName();
@@ -285,14 +292,14 @@ final class Annotation
             }
 
             $matchingResult = Preg::match(
-                '{^(?:\s*\*|/\*\*)\s*@'.$name.'\s+'.TypeExpression::REGEX_TYPES.'(?:(?:[*\h\v]|\&?[\.\$]).*)?\r?$}isx',
+                '{^(?:\s*\*|/\*\*)\s*@'.$name.'\s+'.TypeExpression::REGEX_TYPES.'(?:(?:[*\h\v]|\&?[\.\$]).*)?\r?$}is',
                 $this->lines[0]->getContent(),
                 $matches
             );
 
             $this->typesContent = 1 === $matchingResult
                 ? $matches['types']
-                : '';
+                : null;
         }
 
         return $this->typesContent;

+ 63 - 56
src/DocBlock/TypeExpression.php

@@ -25,12 +25,18 @@ use PhpCsFixer\Utils;
 final class TypeExpression
 {
     /**
-     * Regex to match any types, shall be used with `x` modifier.
+     * Regex to match any PHP identifier.
      *
      * @internal
      */
-    public const REGEX_TYPES = '
-    (?<types> # several types separated by `|` or `&`
+    public const REGEX_IDENTIFIER = '(?:(?!(?<!\*)\d)[^\x00-\x2f\x3a-\x40\x5b-\x5e\x60\x7b-\x7f]++)';
+
+    /**
+     * Regex to match any PHPDoc type.
+     *
+     * @internal
+     */
+    public const REGEX_TYPES = '(?<types>(?x) # one or several types separated by `|` or `&`
 '.self::REGEX_TYPE.'
         (?:
             \h*(?<glue>[|&])\h*
@@ -38,36 +44,35 @@ final class TypeExpression
         )*+
     )';
 
-    private const REGEX_TYPE = '
-        (?<type> # single type
+    private const REGEX_TYPE = '(?<type>(?x) # single type
             (?<nullable>\??\h*)
             (?:
-                (?<object_like_array>
-                    (?<object_like_array_start>(?i)(?:array|list|object)(?-i)\h*\{\h*)
-                        (?<object_like_array_inners>
-                            (?<object_like_array_inner>
-                                (?<object_like_array_inner_key>(?:(?&constant)|(?&name))\h*\??\h*:\h*)?
-                                (?<object_like_array_inner_value>(?&types_inner))
-                            )
-                            (?:
-                                \h*,\h*
-                                (?&object_like_array_inner)
-                            )*
-                            (?:\h*,\h*)?
-                        )?
+                (?<array_shape>
+                    (?<array_shape_start>(?i)(?:array|list|object)(?-i)\h*\{\h*)
+                    (?<array_shape_inners>
+                        (?<array_shape_inner>
+                            (?<array_shape_inner_key>(?:(?&constant)|(?&identifier))\h*\??\h*:\h*|)
+                            (?<array_shape_inner_value>(?&types_inner))
+                        )
+                        (?:
+                            \h*,\h*
+                            (?&array_shape_inner)
+                        )*
+                        (?:\h*,\h*)?
+                    |)
                     \h*\}
                 )
                 |
                 (?<callable> # callable syntax, e.g. `callable(string): bool`
                     (?<callable_start>(?i)(?:callable|\\\\?Closure)(?-i)\h*\(\h*)
-                        (?<callable_arguments>
+                    (?<callable_arguments>
+                        (?&types_inner)
+                        (?:
+                            \h*,\h*
                             (?&types_inner)
-                            (?:
-                                \h*,\h*
-                                (?&types_inner)
-                            )*
-                            (?:\h*,\h*)?
-                        )?
+                        )*
+                        (?:\h*,\h*)?
+                    |)
                     \h*\)
                     (?:
                         \h*\:\h*
@@ -80,13 +85,13 @@ final class TypeExpression
                         (?&name)+
                         \h*<\h*
                     )
-                        (?<generic_types>
+                    (?<generic_types>
+                        (?&types_inner)
+                        (?:
+                            \h*,\h*
                             (?&types_inner)
-                            (?:
-                                \h*,\h*
-                                (?&types_inner)
-                            )*
-                        )
+                        )*
+                    )
                     \h*>
                 )
                 |
@@ -96,10 +101,9 @@ final class TypeExpression
                 |
                 (?<constant> # single constant value (case insensitive), e.g.: 1, -1.8E+6, `\'a\'`
                     (?i)
-                    null | true | false
                     # all sorts of numbers: with or without sign, supports literal separator and several numeric systems,
                     # e.g.: 1, +1.1, 1., .1, -1, 123E+8, 123_456_789, 0x7Fb4, 0b0110, 0o777
-                    | [+-]?(?:
+                    [+-]?(?:
                         (?:0b[01]++(?:_[01]++)*+)
                         | (?:0o[0-7]++(?:_[0-7]++)*+)
                         | (?:0x[\da-f]++(?:_[\da-f]++)*+)
@@ -109,13 +113,18 @@ final class TypeExpression
                     )
                     | \'(?:[^\'\\\\]|\\\\.)*+\'
                     | "(?:[^"\\\\]|\\\\.)*+"
-                    | [@$]?(?:this | self | static)
                     (?-i)
                 )
                 |
-                (?<name> # full name, e.g.: `int`, `\DateTime`, `\Foo\Bar`
+                (?<this> # self reference, e.g.: $this, $self, @static
+                    (?i)
+                    [@$](?:this | self | static)
+                    (?-i)
+                )
+                |
+                (?<name> # full name, e.g.: `int`, `\DateTime`, `\Foo\Bar`, `positive-int`
                     \\\\?+
-                    (?<identifier>(?!(?<!\*)\d)[^\x00-\x2f\x3a-\x40\x5b-\x5e\x60\x7b-\x7f]++)
+                    (?<identifier>'.self::REGEX_IDENTIFIER.')
                     (?:[\\\\\-](?&identifier))*+
                 )
                 |
@@ -165,13 +174,13 @@ final class TypeExpression
 
     private bool $isUnionType = false;
 
+    private string $typesGlue = '|';
+
     /**
      * @var list<array{start_index: int, expression: self}>
      */
     private array $innerTypeExpressions = [];
 
-    private string $typesGlue = '|';
-
     private ?NamespaceAnalysis $namespace;
 
     /**
@@ -305,17 +314,15 @@ final class TypeExpression
         $index = 0;
         while (true) {
             Preg::match(
-                '{\G'.self::REGEX_TYPE.'(?:\h*(?<glue>[|&])\h*|$)}x',
+                '{\G'.self::REGEX_TYPE.'(?:\h*(?<glue>[|&])\h*|$)}',
                 $this->value,
                 $matches,
                 PREG_OFFSET_CAPTURE,
                 $index
             );
 
-            if ([] === $matches) { // invalid phpdoc type
-                // TODO once all phpdoc types are parsed strictly using self::REGEX_TYPES,
-                // the parse cannot fail and we can throw here safely
-                return;
+            if ([] === $matches) {
+                throw new \Exception('Unable to parse phpdoc type '.var_export($this->value, true));
             }
 
             if (!$this->isUnionType) {
@@ -351,19 +358,19 @@ final class TypeExpression
         } elseif ('' !== ($matches['callable'][0] ?? '') && $matches['callable'][1] === $nullableLength) {
             $this->parseCommaSeparatedInnerTypes(
                 $index + \strlen($matches['callable_start'][0]),
-                $matches['callable_arguments'][0] ?? ''
+                $matches['callable_arguments'][0]
             );
 
-            if ('' !== ($matches['callable_return'] ?? '')) {
+            if ('' !== ($matches['callable_return'][0] ?? '')) {
                 $this->innerTypeExpressions[] = [
                     'start_index' => \strlen($this->value) - \strlen($matches['callable_return'][0]),
                     'expression' => $this->inner($matches['callable_return'][0]),
                 ];
             }
-        } elseif ('' !== ($matches['object_like_array'][0] ?? '') && $matches['object_like_array'][1] === $nullableLength) {
-            $this->parseObjectLikeArrayInnerTypes(
-                $index + \strlen($matches['object_like_array_start'][0]),
-                $matches['object_like_array_inners'][0] ?? ''
+        } elseif ('' !== ($matches['array_shape'][0] ?? '') && $matches['array_shape'][1] === $nullableLength) {
+            $this->parseArrayShapeInnerTypes(
+                $index + \strlen($matches['array_shape_start'][0]),
+                $matches['array_shape_inners'][0]
             );
         } elseif ('' !== ($matches['parenthesized'][0] ?? '') && $matches['parenthesized'][1] === $nullableLength) {
             $index += \strlen($matches['parenthesized_start'][0]);
@@ -410,7 +417,7 @@ final class TypeExpression
         $index = 0;
         while (\strlen($value) !== $index) {
             Preg::match(
-                '{\G'.self::REGEX_TYPES.'(?:\h*,\h*|$)}x',
+                '{\G'.self::REGEX_TYPES.'(?:\h*,\h*|$)}',
                 $value,
                 $matches,
                 0,
@@ -426,32 +433,32 @@ final class TypeExpression
         }
     }
 
-    private function parseObjectLikeArrayInnerTypes(int $startIndex, string $value): void
+    private function parseArrayShapeInnerTypes(int $startIndex, string $value): void
     {
         $index = 0;
         while (\strlen($value) !== $index) {
             Preg::match(
-                '{\G(?:(?=1)0'.self::REGEX_TYPES.'|(?<_object_like_array_inner>(?&object_like_array_inner))(?:\h*,\h*|$))}x',
+                '{\G(?:(?=1)0'.self::REGEX_TYPES.'|(?<_array_shape_inner>(?&array_shape_inner))(?:\h*,\h*|$))}',
                 $value,
                 $prematches,
                 0,
                 $index
             );
-            $consumedValue = $prematches['_object_like_array_inner'];
+            $consumedValue = $prematches['_array_shape_inner'];
             $consumedValueLength = \strlen($consumedValue);
             $consumedCommaLength = \strlen($prematches[0]) - $consumedValueLength;
 
             $addedPrefix = 'array{';
             Preg::match(
-                '{^'.self::REGEX_TYPES.'$}x',
+                '{^'.self::REGEX_TYPES.'$}',
                 $addedPrefix.$consumedValue.'}',
                 $matches,
                 PREG_OFFSET_CAPTURE
             );
 
             $this->innerTypeExpressions[] = [
-                'start_index' => $startIndex + $index + $matches['object_like_array_inner_value'][1] - \strlen($addedPrefix),
-                'expression' => $this->inner($matches['object_like_array_inner_value'][0]),
+                'start_index' => $startIndex + $index + $matches['array_shape_inner_value'][1] - \strlen($addedPrefix),
+                'expression' => $this->inner($matches['array_shape_inner_value'][0]),
             ];
 
             $index += $consumedValueLength + $consumedCommaLength;

+ 2 - 1
src/Fixer/Basic/PsrAutoloadingFixer.php

@@ -15,6 +15,7 @@ declare(strict_types=1);
 namespace PhpCsFixer\Fixer\Basic;
 
 use PhpCsFixer\AbstractFixer;
+use PhpCsFixer\DocBlock\TypeExpression;
 use PhpCsFixer\Fixer\ConfigurableFixerInterface;
 use PhpCsFixer\FixerConfiguration\FixerConfigurationResolver;
 use PhpCsFixer\FixerConfiguration\FixerConfigurationResolverInterface;
@@ -103,7 +104,7 @@ class InvalidName {}
             // ignore file with extension other than php
             ('php' !== $file->getExtension())
             // ignore file with name that cannot be a class name
-            || 0 === Preg::match('/^[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*$/', $file->getBasename('.php'))
+            || 0 === Preg::match('/^'.TypeExpression::REGEX_IDENTIFIER.'$/', $file->getBasename('.php'))
         ) {
             return false;
         }

+ 3 - 3
src/Fixer/Phpdoc/NoSuperfluousPhpdocTagsFixer.php

@@ -507,11 +507,11 @@ class Foo {
         array $symbolShortNames
     ): bool {
         if ('param' === $annotation->getTag()->getName()) {
-            $regex = '{@param(?:\s+'.TypeExpression::REGEX_TYPES.')?(?:\s+(?:\&\s*)?(?:\.{3}\s*)?\$\S+)?(?:\s+(?<description>(?!\*+\/)\S+))?}sx';
+            $regex = '{@param(?:\s+'.TypeExpression::REGEX_TYPES.')?(?:\s+(?:\&\s*)?(?:\.{3}\s*)?\$\S+)?(?:\s+(?<description>(?!\*+\/)\S+))?}s';
         } elseif ('var' === $annotation->getTag()->getName()) {
-            $regex = '{@var(?:\s+'.TypeExpression::REGEX_TYPES.')?(?:\s+\$\S+)?(?:\s+(?<description>(?!\*\/)\S+))?}sx';
+            $regex = '{@var(?:\s+'.TypeExpression::REGEX_TYPES.')?(?:\s+\$\S+)?(?:\s+(?<description>(?!\*\/)\S+))?}s';
         } else {
-            $regex = '{@return(?:\s+'.TypeExpression::REGEX_TYPES.')?(?:\s+(?<description>(?!\*\/)\S+))?}sx';
+            $regex = '{@return(?:\s+'.TypeExpression::REGEX_TYPES.')?(?:\s+(?<description>(?!\*\/)\S+))?}s';
         }
 
         if (1 !== Preg::match($regex, $annotation->getContent(), $matches)) {

+ 1 - 1
src/Fixer/Phpdoc/PhpdocAlignFixer.php

@@ -125,7 +125,7 @@ final class PhpdocAlignFixer extends AbstractFixer implements ConfigurableFixerI
         // optional <desc>
         $desc = '(?:\s+(?P<desc>\V*))';
 
-        $this->regex = '/'.$indentRegex.'\*\h*@(?J)(?:'.implode('|', $types).')'.$desc.'\h*\r?$/x';
+        $this->regex = '/'.$indentRegex.'\*\h*@(?J)(?:'.implode('|', $types).')'.$desc.'\h*\r?$/';
         $this->regexCommentLine = '/'.$indentRegex.'\*(?!\h?+@)(?:\s+(?P<desc>\V+))(?<!\*\/)\r?$/';
         $this->align = $this->configuration['align'];
     }

+ 2 - 6
src/Fixer/Phpdoc/PhpdocParamOrderFixer.php

@@ -17,6 +17,7 @@ namespace PhpCsFixer\Fixer\Phpdoc;
 use PhpCsFixer\AbstractFixer;
 use PhpCsFixer\DocBlock\Annotation;
 use PhpCsFixer\DocBlock\DocBlock;
+use PhpCsFixer\DocBlock\TypeExpression;
 use PhpCsFixer\FixerDefinition\CodeSample;
 use PhpCsFixer\FixerDefinition\FixerDefinition;
 use PhpCsFixer\FixerDefinition\FixerDefinitionInterface;
@@ -236,12 +237,7 @@ function m($a, array $b, Foo $c) {}
         $blockMatch = false;
         $blockIndices = [];
 
-        $typeDeclaration = sprintf('[\w\s<>,%s]*', preg_quote('\[]|?'));
-        $paramRegex = sprintf(
-            '/\*\s*@param\s*%s\s*&?\$\b%s\b/',
-            $typeDeclaration,
-            substr($identifier, 1) // Remove starting `$` from variable name
-        );
+        $paramRegex = '/\*\s*@param\s*(?:|'.TypeExpression::REGEX_TYPES.'\s*)&?(?=\$\b)'.preg_quote($identifier).'\b/';
 
         foreach ($paramAnnotations as $i => $param) {
             $blockStart = Preg::match('/\s*{\s*/', $param->getContent());

+ 11 - 15
src/Fixer/Phpdoc/PhpdocTypesFixer.php

@@ -15,6 +15,7 @@ declare(strict_types=1);
 namespace PhpCsFixer\Fixer\Phpdoc;
 
 use PhpCsFixer\AbstractPhpdocTypesFixer;
+use PhpCsFixer\DocBlock\TypeExpression;
 use PhpCsFixer\Fixer\ConfigurableFixerInterface;
 use PhpCsFixer\FixerConfiguration\AllowedValueSubset;
 use PhpCsFixer\FixerConfiguration\FixerConfigurationResolver;
@@ -67,7 +68,8 @@ final class PhpdocTypesFixer extends AbstractPhpdocTypesFixer implements Configu
         ],
     ];
 
-    private string $patternToFix = '';
+    /** @var array<string, true> */
+    private array $typesSetToFix;
 
     public function configure(array $configuration): void
     {
@@ -77,18 +79,7 @@ final class PhpdocTypesFixer extends AbstractPhpdocTypesFixer implements Configu
             return self::POSSIBLE_TYPES[$group];
         }, $this->configuration['groups']));
 
-        $this->patternToFix = sprintf(
-            '/(?<![a-zA-Z0-9_\x80-\xff]\\\\)(\b|.(?=\$))(%s)\b(?!(\\\\|:))/i',
-            implode(
-                '|',
-                array_map(
-                    static function (string $type): string {
-                        return preg_quote($type, '/');
-                    },
-                    $typesToFix
-                )
-            )
-        );
+        $this->typesSetToFix = array_combine($typesToFix, array_fill(0, \count($typesToFix), true));
     }
 
     public function getDefinition(): FixerDefinitionInterface
@@ -141,9 +132,14 @@ final class PhpdocTypesFixer extends AbstractPhpdocTypesFixer implements Configu
     protected function normalize(string $type): string
     {
         return Preg::replaceCallback(
-            $this->patternToFix,
+            '/(\b|(?=\$|\\\\))(\$|\\\\)?'.TypeExpression::REGEX_IDENTIFIER.'(?!\\\\|\h*:)/',
             function (array $matches): string {
-                return strtolower($matches[0]);
+                $valueLower = strtolower($matches[0]);
+                if (isset($this->typesSetToFix[$valueLower])) {
+                    return $valueLower;
+                }
+
+                return $matches[0];
             },
             $type
         );

+ 7 - 5
src/Fixer/Phpdoc/PhpdocTypesOrderFixer.php

@@ -131,11 +131,13 @@ final class PhpdocTypesOrderFixer extends AbstractFixer implements ConfigurableF
 
             foreach ($annotations as $annotation) {
                 // fix main types
-                $annotation->setTypes(
-                    $this->sortTypes(
-                        $annotation->getTypeExpression()
-                    )
-                );
+                if (null !== $annotation->getTypeExpression()) {
+                    $annotation->setTypes(
+                        $this->sortTypes(
+                            $annotation->getTypeExpression()
+                        )
+                    );
+                }
 
                 // fix @method parameters types
                 $line = $doc->getLine($annotation->getStart());

+ 2 - 1
src/Fixer/Phpdoc/PhpdocVarWithoutNameFixer.php

@@ -17,6 +17,7 @@ namespace PhpCsFixer\Fixer\Phpdoc;
 use PhpCsFixer\AbstractFixer;
 use PhpCsFixer\DocBlock\DocBlock;
 use PhpCsFixer\DocBlock\Line;
+use PhpCsFixer\DocBlock\TypeExpression;
 use PhpCsFixer\FixerDefinition\CodeSample;
 use PhpCsFixer\FixerDefinition\FixerDefinition;
 use PhpCsFixer\FixerDefinition\FixerDefinitionInterface;
@@ -115,7 +116,7 @@ final class Foo
     {
         $content = $line->getContent();
 
-        Preg::matchAll('/ (?!\$this(?![a-zA-Z0-9_\x7f-\xff]))\$(?!\d)[a-zA-Z0-9_\x7f-\xff]+/', $content, $matches);
+        Preg::matchAll('/ \$'.TypeExpression::REGEX_IDENTIFIER.'(?<!\$this)/', $content, $matches);
 
         if (isset($matches[0][0])) {
             $line->setContent(str_replace($matches[0][0], '', $content));

Some files were not shown because too many files changed in this diff