diff --git a/src/Symfony/Component/DomCrawler/Crawler.php b/src/Symfony/Component/DomCrawler/Crawler.php index 36c7a41e5fbc7..aacb94ad37f2f 100644 --- a/src/Symfony/Component/DomCrawler/Crawler.php +++ b/src/Symfony/Component/DomCrawler/Crawler.php @@ -156,24 +156,17 @@ public function addContent($content, $type = null) return; } - $charset = null; - if (false !== $pos = stripos($type, 'charset=')) { - $charset = substr($type, $pos + 8); - if (false !== $pos = strpos($charset, ';')) { - $charset = substr($charset, 0, $pos); - } - } + $charset = preg_match('//u', $content) ? 'UTF-8' : 'ISO-8859-1'; // http://www.w3.org/TR/encoding/#encodings // http://www.w3.org/TR/REC-xml/#NT-EncName - if (null === $charset && - preg_match('/\]+charset *= *["\']?([a-zA-Z\-0-9_:.]+)/i', $content, $matches)) { - $charset = $matches[1]; - } + $content = preg_replace_callback('/(charset *= *["\']?)([a-zA-Z\-0-9_:.]+)/i', function ($m) use (&$charset) { + if ('charset=' === $this->convertToHtmlEntities('charset=', $m[2])) { + $charset = $m[2]; + } - if (null === $charset) { - $charset = preg_match('//u', $content) ? 'UTF-8' : 'ISO-8859-1'; - } + return $m[1].$charset; + }, $content, 1); if ('x' === $xmlMatches[1]) { $this->addXmlContent($content, $charset); diff --git a/src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTest.php b/src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTest.php index 697306c53ee28..96d9177673c25 100644 --- a/src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTest.php +++ b/src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTest.php @@ -187,6 +187,10 @@ public function testAddContent() $crawler = $this->createCrawler(); $crawler->addContent($this->getDoctype().'中文'); $this->assertEquals('中文', $crawler->filterXPath('//span')->text(), '->addContent() guess wrong charset'); + + $crawler = $this->createCrawler(); + $crawler->addContent($this->getDoctype().'
'); + $this->assertEquals('foo', $crawler->filterXPath('//div')->attr('class'), '->addContent() ignores bad charset'); } /**