diff --git a/src/Symfony/Component/Serializer/CHANGELOG.md b/src/Symfony/Component/Serializer/CHANGELOG.md index 524c775d5c6e2..0958f5054062a 100644 --- a/src/Symfony/Component/Serializer/CHANGELOG.md +++ b/src/Symfony/Component/Serializer/CHANGELOG.md @@ -5,6 +5,7 @@ CHANGELOG ----- * deprecated the `XmlEncoder::TYPE_CASE_ATTRIBUTES` constant, use `XmlEncoder::TYPE_CAST_ATTRIBUTES` instead + * added option to output a UTF-8 BOM in CSV encoder via `CsvEncoder::OUTPUT_UTF8_BOM_KEY` context option 4.3.0 ----- diff --git a/src/Symfony/Component/Serializer/Encoder/CsvEncoder.php b/src/Symfony/Component/Serializer/Encoder/CsvEncoder.php index 3b3ac1decc8bc..59fe793a3fe67 100644 --- a/src/Symfony/Component/Serializer/Encoder/CsvEncoder.php +++ b/src/Symfony/Component/Serializer/Encoder/CsvEncoder.php @@ -12,6 +12,7 @@ namespace Symfony\Component\Serializer\Encoder; use Symfony\Component\Serializer\Exception\InvalidArgumentException; +use Symfony\Component\Serializer\Exception\UnexpectedValueException; /** * Encodes CSV data. @@ -30,6 +31,9 @@ class CsvEncoder implements EncoderInterface, DecoderInterface const ESCAPE_FORMULAS_KEY = 'csv_escape_formulas'; const AS_COLLECTION_KEY = 'as_collection'; const NO_HEADERS_KEY = 'no_headers'; + const OUTPUT_UTF8_BOM_KEY = 'output_utf8_bom'; + + private const UTF8_BOM = "\xEF\xBB\xBF"; private $formulasStartCharacters = ['=', '-', '+', '@']; private $defaultContext = [ @@ -40,6 +44,7 @@ class CsvEncoder implements EncoderInterface, DecoderInterface self::HEADERS_KEY => [], self::KEY_SEPARATOR_KEY => '.', self::NO_HEADERS_KEY => false, + self::OUTPUT_UTF8_BOM_KEY => false, ]; /** @@ -90,7 +95,7 @@ public function encode($data, $format, array $context = []) } } - list($delimiter, $enclosure, $escapeChar, $keySeparator, $headers, $escapeFormulas) = $this->getCsvOptions($context); + list($delimiter, $enclosure, $escapeChar, $keySeparator, $headers, $escapeFormulas, $outputBom) = $this->getCsvOptions($context); foreach ($data as &$value) { $flattened = []; @@ -114,6 +119,14 @@ public function encode($data, $format, array $context = []) $value = stream_get_contents($handle); fclose($handle); + if ($outputBom) { + if (!preg_match('//u', $value)) { + throw new UnexpectedValueException('You are trying to add a UTF-8 BOM to a non UTF-8 text.'); + } + + $value = self::UTF8_BOM.$value; + } + return $value; } @@ -134,6 +147,10 @@ public function decode($data, $format, array $context = []) fwrite($handle, $data); rewind($handle); + if (0 === strpos($data, self::UTF8_BOM)) { + fseek($handle, \strlen(self::UTF8_BOM)); + } + $headers = null; $nbHeaders = 0; $headerCount = []; @@ -238,12 +255,13 @@ private function getCsvOptions(array $context): array $keySeparator = $context[self::KEY_SEPARATOR_KEY] ?? $this->defaultContext[self::KEY_SEPARATOR_KEY]; $headers = $context[self::HEADERS_KEY] ?? $this->defaultContext[self::HEADERS_KEY]; $escapeFormulas = $context[self::ESCAPE_FORMULAS_KEY] ?? $this->defaultContext[self::ESCAPE_FORMULAS_KEY]; + $outputBom = $context[self::OUTPUT_UTF8_BOM_KEY] ?? $this->defaultContext[self::OUTPUT_UTF8_BOM_KEY]; if (!\is_array($headers)) { throw new InvalidArgumentException(sprintf('The "%s" context variable must be an array or null, given "%s".', self::HEADERS_KEY, \gettype($headers))); } - return [$delimiter, $enclosure, $escapeChar, $keySeparator, $headers, $escapeFormulas]; + return [$delimiter, $enclosure, $escapeChar, $keySeparator, $headers, $escapeFormulas, $outputBom]; } /** diff --git a/src/Symfony/Component/Serializer/Tests/Encoder/CsvEncoderTest.php b/src/Symfony/Component/Serializer/Tests/Encoder/CsvEncoderTest.php index f859cd76679ef..52a181cba9d87 100644 --- a/src/Symfony/Component/Serializer/Tests/Encoder/CsvEncoderTest.php +++ b/src/Symfony/Component/Serializer/Tests/Encoder/CsvEncoderTest.php @@ -13,6 +13,7 @@ use PHPUnit\Framework\TestCase; use Symfony\Component\Serializer\Encoder\CsvEncoder; +use Symfony\Component\Serializer\Exception\UnexpectedValueException; /** * @author Kévin Dunglas @@ -595,4 +596,38 @@ public function testDecodeWithoutHeader() CsvEncoder::NO_HEADERS_KEY => true, ])); } + + public function testBOMIsAddedOnDemand() + { + $value = ['foo' => 'hello', 'bar' => 'hey ho']; + + $this->assertEquals("\xEF\xBB\xBF".<<<'CSV' +foo,bar +hello,"hey ho" + +CSV + , $this->encoder->encode($value, 'csv', [CsvEncoder::OUTPUT_UTF8_BOM_KEY => true])); + } + + public function testBOMCanNotBeAddedToNonUtf8Csv() + { + $value = [mb_convert_encoding('ÄÖÜ', 'ISO-8859-1', 'UTF-8')]; + + $this->expectException(UnexpectedValueException::class); + $this->expectExceptionMessage('You are trying to add an UTF-8 BOM to a non UTF-8 text.'); + $this->encoder->encode($value, 'csv', [CsvEncoder::OUTPUT_UTF8_BOM_KEY => true]); + } + + public function testBOMIsStripped() + { + $csv = "\xEF\xBB\xBF".<<<'CSV' +foo,bar +hello,"hey ho" + +CSV; + $this->assertEquals( + ['foo' => 'hello', 'bar' => 'hey ho'], + $this->encoder->decode($csv, 'csv', [CsvEncoder::AS_COLLECTION_KEY => false]) + ); + } }