From f8b3564594ead596c7a2d848984b0967a5aa23c2 Mon Sep 17 00:00:00 2001 From: Josh Sommers Date: Sun, 2 Jun 2024 11:10:28 -1000 Subject: [PATCH 1/8] Just a quick typo fix. --- src/HTML5/Parser/CharacterReference.php | 2 +- src/HTML5/Parser/Tokenizer.php | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/HTML5/Parser/CharacterReference.php b/src/HTML5/Parser/CharacterReference.php index 490b5487..56a206cb 100644 --- a/src/HTML5/Parser/CharacterReference.php +++ b/src/HTML5/Parser/CharacterReference.php @@ -48,7 +48,7 @@ public static function lookupDecimal($int) } /** - * Given a hexidecimal number, return the UTF-8 character. + * Given a hexadecimal number, return the UTF-8 character. * * @param $hexdec * diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index e8b4aa09..0699cf1b 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -1127,7 +1127,7 @@ protected function decodeCharacterReference($inAttribute = false) return '&'; } - // Hexidecimal encoding. + // Hexadecimal encoding. // X[0-9a-fA-F]+; // x[0-9a-fA-F]+; if ('x' === $tok || 'X' === $tok) { From 9c30da5a9312a7636ddc38e739b9a702ef21f542 Mon Sep 17 00:00:00 2001 From: W0rma Date: Thu, 31 Oct 2024 20:36:42 +0100 Subject: [PATCH 2/8] Bump actions/checkout version --- .github/workflows/benchmark.yaml | 2 +- .github/workflows/ci.yaml | 2 +- .github/workflows/cs.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml index 17a38b87..dd84591a 100644 --- a/.github/workflows/benchmark.yaml +++ b/.github/workflows/benchmark.yaml @@ -21,7 +21,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup PHP uses: shivammathur/setup-php@v2 diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 4efa2ff5..8d353177 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -25,7 +25,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup PHP uses: shivammathur/setup-php@v2 diff --git a/.github/workflows/cs.yaml b/.github/workflows/cs.yaml index c84c4dd3..38d6364d 100644 --- a/.github/workflows/cs.yaml +++ b/.github/workflows/cs.yaml @@ -19,7 +19,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup PHP uses: shivammathur/setup-php@v2 From dc8be11969aa8e06caa5322501b6ca18e949dd44 Mon Sep 17 00:00:00 2001 From: W0rma Date: Thu, 31 Oct 2024 20:36:46 +0100 Subject: [PATCH 3/8] Add PHP 8.4 to CI config --- .github/workflows/ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 8d353177..1ae2ed54 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -22,6 +22,7 @@ jobs: - "8.1" - "8.2" - "8.3" + - "8.4" steps: - name: Checkout code From c245c143cce48769f8c8a19dffa74ad0bc68059f Mon Sep 17 00:00:00 2001 From: W0rma Date: Thu, 31 Oct 2024 20:36:42 +0100 Subject: [PATCH 4/8] Bump actions/checkout version --- .github/workflows/benchmark.yaml | 2 +- .github/workflows/ci.yaml | 2 +- .github/workflows/cs.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/benchmark.yaml b/.github/workflows/benchmark.yaml index 17a38b87..dd84591a 100644 --- a/.github/workflows/benchmark.yaml +++ b/.github/workflows/benchmark.yaml @@ -21,7 +21,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup PHP uses: shivammathur/setup-php@v2 diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 4efa2ff5..8d353177 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -25,7 +25,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup PHP uses: shivammathur/setup-php@v2 diff --git a/.github/workflows/cs.yaml b/.github/workflows/cs.yaml index c84c4dd3..38d6364d 100644 --- a/.github/workflows/cs.yaml +++ b/.github/workflows/cs.yaml @@ -19,7 +19,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup PHP uses: shivammathur/setup-php@v2 From 69627855e33549939659121da16127119ab63bd2 Mon Sep 17 00:00:00 2001 From: W0rma Date: Thu, 31 Oct 2024 20:38:02 +0100 Subject: [PATCH 5/8] Apply cs fixer v2.19.3 --- .github/workflows/cs.yaml | 2 +- .php_cs.dist | 1 + src/HTML5.php | 1 - src/HTML5/Parser/DOMTreeBuilder.php | 2 -- src/HTML5/Parser/Tokenizer.php | 1 + src/HTML5/Parser/TreeBuildingRules.php | 1 - test/HTML5/Parser/DOMTreeBuilderTest.php | 2 +- test/HTML5/Parser/ScannerTest.php | 2 +- test/HTML5/Parser/TokenizerTest.php | 2 +- test/HTML5/Parser/TreeBuildingRulesTest.php | 6 +++--- test/HTML5/Serializer/OutputRulesTest.php | 2 +- 11 files changed, 10 insertions(+), 12 deletions(-) diff --git a/.github/workflows/cs.yaml b/.github/workflows/cs.yaml index 38d6364d..266666de 100644 --- a/.github/workflows/cs.yaml +++ b/.github/workflows/cs.yaml @@ -28,5 +28,5 @@ jobs: - name: cs fix run: | - wget -q https://github.com/FriendsOfPHP/PHP-CS-Fixer/releases/download/v2.13.1/php-cs-fixer.phar + wget -q https://github.com/FriendsOfPHP/PHP-CS-Fixer/releases/download/v2.19.3/php-cs-fixer.phar php php-cs-fixer.phar fix --dry-run --diff diff --git a/.php_cs.dist b/.php_cs.dist index d5e49183..f6abb593 100644 --- a/.php_cs.dist +++ b/.php_cs.dist @@ -7,6 +7,7 @@ $finder = PhpCsFixer\Finder::create() return PhpCsFixer\Config::create() ->setRules(array( '@Symfony' => true, + 'array_syntax' => false, 'concat_space' => array('spacing' => 'one'), 'phpdoc_annotation_without_dot' => false, )) diff --git a/src/HTML5.php b/src/HTML5.php index c857145f..49a90daf 100644 --- a/src/HTML5.php +++ b/src/HTML5.php @@ -146,7 +146,6 @@ public function hasErrors() * Parse an input string. * * @param string $input - * @param array $options * * @return \DOMDocument */ diff --git a/src/HTML5/Parser/DOMTreeBuilder.php b/src/HTML5/Parser/DOMTreeBuilder.php index d165b66b..85651179 100644 --- a/src/HTML5/Parser/DOMTreeBuilder.php +++ b/src/HTML5/Parser/DOMTreeBuilder.php @@ -231,8 +231,6 @@ public function fragment() * * This is used for handling Processor Instructions as they are * inserted. If omitted, PI's are inserted directly into the DOM tree. - * - * @param InstructionProcessor $proc */ public function setInstructionProcessor(InstructionProcessor $proc) { diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index 0699cf1b..feb9a66e 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -729,6 +729,7 @@ protected function isCommentEnd() // Test for '!>' if ('!' == $this->scanner->current() && '>' == $this->scanner->peek()) { $this->scanner->consume(); // Consume the last '>' + return true; } // Unread '-' and one of '!' or '>'; diff --git a/src/HTML5/Parser/TreeBuildingRules.php b/src/HTML5/Parser/TreeBuildingRules.php index 00d3951f..4c6983b2 100644 --- a/src/HTML5/Parser/TreeBuildingRules.php +++ b/src/HTML5/Parser/TreeBuildingRules.php @@ -80,7 +80,6 @@ public function evaluate($new, $current) case 'thead': case 'tfoot': case 'table': // Spec isn't explicit about this, but it's necessary. - return $this->closeIfCurrentMatches($new, $current, array( 'thead', 'tfoot', diff --git a/test/HTML5/Parser/DOMTreeBuilderTest.php b/test/HTML5/Parser/DOMTreeBuilderTest.php index 659378c6..1e1898c9 100644 --- a/test/HTML5/Parser/DOMTreeBuilderTest.php +++ b/test/HTML5/Parser/DOMTreeBuilderTest.php @@ -6,9 +6,9 @@ namespace Masterminds\HTML5\Tests\Parser; +use Masterminds\HTML5\Parser\DOMTreeBuilder; use Masterminds\HTML5\Parser\Scanner; use Masterminds\HTML5\Parser\Tokenizer; -use Masterminds\HTML5\Parser\DOMTreeBuilder; /** * These tests are functional, not necessarily unit tests. diff --git a/test/HTML5/Parser/ScannerTest.php b/test/HTML5/Parser/ScannerTest.php index 9f75c4db..a181c1ba 100644 --- a/test/HTML5/Parser/ScannerTest.php +++ b/test/HTML5/Parser/ScannerTest.php @@ -6,8 +6,8 @@ namespace Masterminds\HTML5\Tests\Parser; -use Masterminds\HTML5\Parser\StringInputStream; use Masterminds\HTML5\Parser\Scanner; +use Masterminds\HTML5\Parser\StringInputStream; class ScannerTest extends \Masterminds\HTML5\Tests\TestCase { diff --git a/test/HTML5/Parser/TokenizerTest.php b/test/HTML5/Parser/TokenizerTest.php index c9643c52..374896f6 100644 --- a/test/HTML5/Parser/TokenizerTest.php +++ b/test/HTML5/Parser/TokenizerTest.php @@ -2,9 +2,9 @@ namespace Masterminds\HTML5\Tests\Parser; -use Masterminds\HTML5\Parser\UTF8Utils; use Masterminds\HTML5\Parser\Scanner; use Masterminds\HTML5\Parser\Tokenizer; +use Masterminds\HTML5\Parser\UTF8Utils; class TokenizerTest extends \Masterminds\HTML5\Tests\TestCase { diff --git a/test/HTML5/Parser/TreeBuildingRulesTest.php b/test/HTML5/Parser/TreeBuildingRulesTest.php index 45c68bcf..ae17a349 100644 --- a/test/HTML5/Parser/TreeBuildingRulesTest.php +++ b/test/HTML5/Parser/TreeBuildingRulesTest.php @@ -6,10 +6,10 @@ namespace Masterminds\HTML5\Tests\Parser; -use Masterminds\HTML5\Parser\TreeBuildingRules; -use Masterminds\HTML5\Parser\Tokenizer; -use Masterminds\HTML5\Parser\Scanner; use Masterminds\HTML5\Parser\DOMTreeBuilder; +use Masterminds\HTML5\Parser\Scanner; +use Masterminds\HTML5\Parser\Tokenizer; +use Masterminds\HTML5\Parser\TreeBuildingRules; /** * These tests are functional, not necessarily unit tests. diff --git a/test/HTML5/Serializer/OutputRulesTest.php b/test/HTML5/Serializer/OutputRulesTest.php index 57415797..bb098533 100644 --- a/test/HTML5/Serializer/OutputRulesTest.php +++ b/test/HTML5/Serializer/OutputRulesTest.php @@ -2,9 +2,9 @@ namespace Masterminds\HTML5\Tests\Serializer; +use Masterminds\HTML5; use Masterminds\HTML5\Serializer\OutputRules; use Masterminds\HTML5\Serializer\Traverser; -use Masterminds\HTML5; class OutputRulesTest extends \Masterminds\HTML5\Tests\TestCase { From 45a61078b6cd0fa356983c0ff9b8e441bc76ca54 Mon Sep 17 00:00:00 2001 From: Link1515 Date: Tue, 17 Dec 2024 16:22:27 +0800 Subject: [PATCH 6/8] fix(Tokenizer): ignore duplicate attributes --- src/HTML5/Parser/Tokenizer.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index feb9a66e..c196d1aa 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -507,7 +507,7 @@ protected function attribute(&$attributes) $this->scanner->whitespace(); $val = $this->attributeValue(); - if ($isValidAttribute) { + if ($isValidAttribute && !array_key_exists($name, $attributes)) { $attributes[$name] = $val; } From 1660321f8b40ca25a74334b67170c62d944216aa Mon Sep 17 00:00:00 2001 From: Link1515 Date: Tue, 17 Dec 2024 17:07:36 +0800 Subject: [PATCH 7/8] test(Tokenizer): add tests for duplicate attribute validation --- test/HTML5/Parser/TokenizerTest.php | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/HTML5/Parser/TokenizerTest.php b/test/HTML5/Parser/TokenizerTest.php index 374896f6..86401c73 100644 --- a/test/HTML5/Parser/TokenizerTest.php +++ b/test/HTML5/Parser/TokenizerTest.php @@ -705,6 +705,14 @@ public function testTagAttributes() ), false, ), + // duplicate attributes must be ignored + '' => array( + 'foo', + array( + 'bar' => 'first', + ), + false, + ), ); $this->isAllGood('startTag', 2, $good); From 9e650079de1526d4fcb2f03c5235f21f02b3a242 Mon Sep 17 00:00:00 2001 From: Oliver Hader Date: Thu, 6 Mar 2025 22:29:17 +0100 Subject: [PATCH 8/8] Address php-cs-fixer remarks --- src/HTML5/Parser/Tokenizer.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php index c196d1aa..77e268fb 100644 --- a/src/HTML5/Parser/Tokenizer.php +++ b/src/HTML5/Parser/Tokenizer.php @@ -507,7 +507,7 @@ protected function attribute(&$attributes) $this->scanner->whitespace(); $val = $this->attributeValue(); - if ($isValidAttribute && !array_key_exists($name, $attributes)) { + if ($isValidAttribute && !array_key_exists($name, $attributes)) { $attributes[$name] = $val; }