From 720c38746083b0578ca8f7deee96f30378edb3d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20Andr=C3=A9?= Date: Wed, 27 Nov 2024 08:42:39 +0100 Subject: [PATCH] [AssetMapper] Detect import with a sequence parser --- .../Compiler/JavaScriptImportPathCompiler.php | 40 +-- .../Parser/JavascriptSequenceParser.php | 187 ++++++++++++++ .../JavaScriptImportPathCompilerTest.php | 9 - .../Parser/JavascriptSequenceParserTest.php | 234 ++++++++++++++++++ 4 files changed, 427 insertions(+), 43 deletions(-) create mode 100644 src/Symfony/Component/AssetMapper/Compiler/Parser/JavascriptSequenceParser.php create mode 100644 src/Symfony/Component/AssetMapper/Tests/Compiler/Parser/JavascriptSequenceParserTest.php diff --git a/src/Symfony/Component/AssetMapper/Compiler/JavaScriptImportPathCompiler.php b/src/Symfony/Component/AssetMapper/Compiler/JavaScriptImportPathCompiler.php index ef78cad44e8fc..413d8d6d67cd8 100644 --- a/src/Symfony/Component/AssetMapper/Compiler/JavaScriptImportPathCompiler.php +++ b/src/Symfony/Component/AssetMapper/Compiler/JavaScriptImportPathCompiler.php @@ -13,6 +13,7 @@ use Psr\Log\LoggerInterface; use Symfony\Component\AssetMapper\AssetMapperInterface; +use Symfony\Component\AssetMapper\Compiler\Parser\JavascriptSequenceParser; use Symfony\Component\AssetMapper\Exception\CircularAssetsException; use Symfony\Component\AssetMapper\Exception\RuntimeException; use Symfony\Component\AssetMapper\ImportMap\ImportMapConfigReader; @@ -61,15 +62,13 @@ public function __construct( public function compile(string $content, MappedAsset $asset, AssetMapperInterface $assetMapper): string { - return preg_replace_callback(self::IMPORT_PATTERN, function ($matches) use ($asset, $assetMapper, $content) { - $fullImportString = $matches[0][0]; + $jsParser = new JavascriptSequenceParser($content); - // Ignore matches that did not capture import statements - if (!isset($matches[1][0])) { - return $fullImportString; - } + return preg_replace_callback(self::IMPORT_PATTERN, function ($matches) use ($asset, $assetMapper, $jsParser) { + $fullImportString = $matches[0][0]; - if ($this->isCommentedOut($matches[0][1], $content)) { + $jsParser->parseUntil($matches[0][1]); + if (!$jsParser->isExecutable()) { return $fullImportString; } @@ -146,33 +145,6 @@ private function handleMissingImport(string $message, ?\Throwable $e = null): vo }; } - /** - * Simple check for the most common types of comments. - * - * This is not a full parser, but should be good enough for most cases. - */ - private function isCommentedOut(mixed $offsetStart, string $fullContent): bool - { - $lineStart = strrpos($fullContent, "\n", $offsetStart - \strlen($fullContent)); - $lineContentBeforeImport = substr($fullContent, $lineStart, $offsetStart - $lineStart); - $firstTwoChars = substr(ltrim($lineContentBeforeImport), 0, 2); - if ('//' === $firstTwoChars) { - return true; - } - - if ('/*' === $firstTwoChars) { - $commentEnd = strpos($fullContent, '*/', $lineStart); - // if we can't find the end comment, be cautious: assume this is not a comment - if (false === $commentEnd) { - return false; - } - - return $offsetStart < $commentEnd; - } - - return false; - } - private function findAssetForBareImport(string $importedModule, AssetMapperInterface $assetMapper): ?MappedAsset { if (!$importMapEntry = $this->importMapConfigReader->findRootImportMapEntry($importedModule)) { diff --git a/src/Symfony/Component/AssetMapper/Compiler/Parser/JavascriptSequenceParser.php b/src/Symfony/Component/AssetMapper/Compiler/Parser/JavascriptSequenceParser.php new file mode 100644 index 0000000000000..943c0eea14f51 --- /dev/null +++ b/src/Symfony/Component/AssetMapper/Compiler/Parser/JavascriptSequenceParser.php @@ -0,0 +1,187 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\AssetMapper\Compiler\Parser; + +/** + * Parses JavaScript content to identify sequences of strings, comments, etc. + * + * @author Simon André + * + * @internal + */ +final class JavascriptSequenceParser +{ + private const STATE_DEFAULT = 0; + private const STATE_COMMENT = 1; + private const STATE_STRING = 2; + + private int $cursor = 0; + + private int $contentEnd; + + private string $pattern; + + private int $currentSequenceType = self::STATE_DEFAULT; + + private ?int $currentSequenceEnd = null; + + private const COMMENT_SEPARATORS = [ + '/*', // Multi-line comment + '//', // Single-line comment + '"', // Double quote + '\'', // Single quote + '`', // Backtick + ]; + + public function __construct( + private readonly string $content, + ) { + $this->contentEnd = \strlen($content); + + $this->pattern ??= '/'.implode('|', array_map( + fn (string $ch): string => preg_quote($ch, '/'), + self::COMMENT_SEPARATORS + )).'/'; + } + + public function isString(): bool + { + return self::STATE_STRING === $this->currentSequenceType; + } + + public function isExecutable(): bool + { + return self::STATE_DEFAULT === $this->currentSequenceType; + } + + public function isComment(): bool + { + return self::STATE_COMMENT === $this->currentSequenceType; + } + + public function parseUntil(int $position): void + { + if ($position > $this->contentEnd) { + throw new \RuntimeException('Cannot parse beyond the end of the content.'); + } + if ($position < $this->cursor) { + throw new \RuntimeException('Cannot parse backwards.'); + } + + while ($this->cursor <= $position) { + // Current CodeSequence ? + if (null !== $this->currentSequenceEnd) { + if ($this->currentSequenceEnd > $position) { + $this->cursor = $position; + + return; + } + + $this->cursor = $this->currentSequenceEnd; + $this->setSequence(self::STATE_DEFAULT, null); + } + + preg_match($this->pattern, $this->content, $matches, \PREG_OFFSET_CAPTURE, $this->cursor); + if (!$matches) { + $this->endsWithSequence(self::STATE_DEFAULT, $position); + + return; + } + + $matchPos = (int) $matches[0][1]; + $matchChar = $matches[0][0]; + + if ($matchPos > $position) { + $this->setSequence(self::STATE_DEFAULT, $matchPos - 1); + $this->cursor = $position; + + return; + } + + // Multi-line comment + if ('/*' === $matchChar) { + if (false === $endPos = strpos($this->content, '*/', $matchPos + 2)) { + $this->endsWithSequence(self::STATE_COMMENT, $position); + + return; + } + + $this->cursor = min($endPos + 2, $position); + $this->setSequence(self::STATE_COMMENT, $endPos + 2); + continue; + } + + // Single-line comment + if ('//' === $matchChar) { + if (false === $endPos = strpos($this->content, "\n", $matchPos + 2)) { + $this->endsWithSequence(self::STATE_COMMENT, $position); + + return; + } + + $this->cursor = min($endPos + 1, $position); + $this->setSequence(self::STATE_COMMENT, $endPos + 1); + continue; + } + + // Single-line string + if ('"' === $matchChar || "'" === $matchChar) { + if (false === $endPos = strpos($this->content, $matchChar, $matchPos + 1)) { + $this->endsWithSequence(self::STATE_STRING, $position); + + return; + } + while (false !== $endPos && '\\' == $this->content[$endPos - 1]) { + $endPos = strpos($this->content, $matchChar, $endPos + 1); + } + + $this->cursor = min($endPos + 1, $position); + $this->setSequence(self::STATE_STRING, $endPos + 1); + continue; + } + + // Multi-line string + if ('`' === $matchChar) { + if (false === $endPos = strpos($this->content, $matchChar, $matchPos + 1)) { + $this->endsWithSequence(self::STATE_STRING, $position); + + return; + } + while (false !== $endPos && '\\' == $this->content[$endPos - 1]) { + $endPos = strpos($this->content, $matchChar, $endPos + 1); + } + + $this->cursor = min($endPos + 1, $position); + $this->setSequence(self::STATE_STRING, $endPos + 1); + } + } + } + + /** + * @param int $type + */ + private function endsWithSequence(int $type, int $cursor): void + { + $this->cursor = $cursor; + $this->currentSequenceType = $type; + $this->currentSequenceEnd = $this->contentEnd; + } + + /** + * @param int $type + */ + private function setSequence(int $type, ?int $end = null): void + { + $this->currentSequenceType = $type; + $this->currentSequenceEnd = $end; + } +} diff --git a/src/Symfony/Component/AssetMapper/Tests/Compiler/JavaScriptImportPathCompilerTest.php b/src/Symfony/Component/AssetMapper/Tests/Compiler/JavaScriptImportPathCompilerTest.php index 9b1b2377665b1..084b5eefaa216 100644 --- a/src/Symfony/Component/AssetMapper/Tests/Compiler/JavaScriptImportPathCompilerTest.php +++ b/src/Symfony/Component/AssetMapper/Tests/Compiler/JavaScriptImportPathCompilerTest.php @@ -290,15 +290,6 @@ public static function provideCompileTests(): iterable 'expectedJavaScriptImports' => [], ]; - yield 'multi_line_comment_with_no_end_parsed_for_safety' => [ - 'input' => << ['/assets/other.js' => ['lazy' => true, 'asset' => 'other.js', 'add' => true]], - ]; - yield 'multi_line_comment_with_no_end_found_eventually_ignored' => [ 'input' => << + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Symfony\Component\AssetMapper\Tests\Compiler\Parser; + +use PHPUnit\Framework\TestCase; +use Symfony\Component\AssetMapper\Compiler\Parser\JavascriptSequenceParser; + +class JavascriptSequenceParserTest extends TestCase +{ + public function testParseEmptyContent() + { + $parser = new JavascriptSequenceParser(''); + + $this->assertTrue($parser->isExecutable()); + } + + public function testItThrowsWhenOutOfBounds() + { + $parser = new JavascriptSequenceParser(''); + + $this->expectException(\RuntimeException::class); + $this->expectExceptionMessage('Cannot parse beyond the end of the content.'); + + $parser->parseUntil(1); + } + + public function testItThrowWhenBackward() + { + $parser = new JavascriptSequenceParser(' '); + + $parser->parseUntil(2); + + $this->expectException(\RuntimeException::class); + $this->expectExceptionMessage('Cannot parse backwards.'); + + $parser->parseUntil(1); + } + + public function testParseToTheEnd() + { + $parser = new JavascriptSequenceParser('123'); + $parser->parseUntil(3); + + $this->assertTrue($parser->isExecutable()); + } + + /** + * @dataProvider provideSequenceCases + */ + public function testParseSequence(string $content, int $position, bool $isExcecutable) + { + $parser = new JavascriptSequenceParser($content); + $parser->parseUntil($position); + + $this->assertSame($isExcecutable, $parser->isExecutable()); + } + + /** + * @return iterable + */ + public static function provideSequenceCases(): iterable + { + yield 'empty' => [ + '', + 0, + true, + ]; + yield 'inline comment' => [ + '//', + 2, + false, + ]; + yield 'comment' => [ + '/* */', + 2, + false, + ]; + yield 'after comment' => [ + '/* */', + 5, + true, + ]; + yield 'multi-line comment' => [ + '/** + abc + */', + 2, + false, + ]; + yield 'after multi-line comment' => [ + "/** \n */ abc", + 8, + true, + ]; + } + + /** + * @dataProvider provideCommentCases + */ + public function testIdentifyComment(string $content, int $position, bool $isComment) + { + $parser = new JavascriptSequenceParser($content); + $parser->parseUntil($position); + + $this->assertSame($isComment, $parser->isComment()); + $this->assertSame(!$isComment, $parser->isExecutable()); + } + + /** + * @return iterable + */ + public static function provideCommentCases(): iterable + { + yield 'empty' => [ + '', + 0, + false, + ]; + yield 'inline comment' => [ + '//', + 2, + true, + ]; + yield 'comment' => [ + '/* */', + 2, + true, + ]; + yield 'multi-line comment' => [ + '/** + abc + */', + 2, + true, + ]; + yield 'after multi-line comment' => [ + "/** \n */ abc", + 8, + false, + ]; + yield 'after comment' => [ + '/* */', + 5, + false, + ]; + yield 'comment after comment' => [ + '/* */ //', + 7, + true, + ]; + yield 'comment after multi-line comment' => [ + '/* */ /**/', + 8, + true, + ]; + yield 'multi-line comment after comment' => [ + '// /* */', + 8, + true, + ]; + } + + /** + * @dataProvider provideStringCases + */ + public function testIdentifyStrings(string $content, int $position, bool $isString) + { + $parser = new JavascriptSequenceParser($content); + $parser->parseUntil($position); + + $this->assertSame($isString, $parser->isString()); + } + + /** + * @return iterable + */ + public static function provideStringCases(): iterable + { + yield 'empty' => [ + '', + 0, + false, + ]; + yield 'before single quote' => [ + " '", + 0, + false, + ]; + yield 'on single quote' => [ + "'", + 0, + true, + ]; + yield 'between single quotes' => [ + "' '", + 2, + true, + ]; + yield 'after single quote' => [ + "'' ", + 3, + false, + ]; + yield 'before double quote' => [ + ' "', + 0, + false, + ]; + yield 'on double quote' => [ + '"', + 0, + true, + ]; + yield 'between double quotes' => [ + '" "', + 2, + true, + ]; + yield 'after double quote' => [ + '"" ', + 3, + false, + ]; + } +}