Skip to content

Commit 618b90d

Browse files
committed
Add context options to handle BOM
1 parent e3b513b commit 618b90d

File tree

4 files changed

+89
-2
lines changed

4 files changed

+89
-2
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
<?php
2+
3+
namespace Symfony\Component\Serializer;
4+
5+
final class ByteOrderMark
6+
{
7+
/**
8+
* UTF-8 BOM sequence.
9+
*/
10+
const BOM_UTF8 = "\xEF\xBB\xBF";
11+
12+
/**
13+
* UTF-16 BE BOM sequence.
14+
*/
15+
const BOM_UTF16_BE = "\xFE\xFF";
16+
17+
/**
18+
* UTF-16 LE BOM sequence.
19+
*/
20+
const BOM_UTF16_LE = "\xFF\xFE";
21+
22+
/**
23+
* UTF-32 BE BOM sequence.
24+
*/
25+
const BOM_UTF32_BE = "\x00\x00\xFE\xFF";
26+
27+
/**
28+
* UTF-32 LE BOM sequence.
29+
*/
30+
const BOM_UTF32_LE = "\xFF\xFE\x00\x00";
31+
}

src/Symfony/Component/Serializer/CHANGELOG.md

+3
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ CHANGELOG
55
-----
66

77
* deprecated the `XmlEncoder::TYPE_CASE_ATTRIBUTES` constant, use `XmlEncoder::TYPE_CAST_ATTRIBUTES` instead
8+
* added common BOMs in `ByteOrderMark`
9+
* added option to output BOM in CSV encoder via `CsvEncoder::OUTPUT_BOM` context option
10+
* added option to skip BOM in CSV being decoded via `CsvEncoder::SKIP_INPUT_BOM` (defaults to `true`)
811

912
4.3.0
1013
-----

src/Symfony/Component/Serializer/Encoder/CsvEncoder.php

+29-2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
namespace Symfony\Component\Serializer\Encoder;
1313

14+
use Symfony\Component\Serializer\ByteOrderMark;
1415
use Symfony\Component\Serializer\Exception\InvalidArgumentException;
1516

1617
/**
@@ -30,6 +31,8 @@ class CsvEncoder implements EncoderInterface, DecoderInterface
3031
const ESCAPE_FORMULAS_KEY = 'csv_escape_formulas';
3132
const AS_COLLECTION_KEY = 'as_collection';
3233
const NO_HEADERS_KEY = 'no_headers';
34+
const OUTPUT_BOM = 'output_bom';
35+
const SKIP_INPUT_BOM = 'skip_input_bom';
3336

3437
private $formulasStartCharacters = ['=', '-', '+', '@'];
3538
private $defaultContext = [
@@ -40,6 +43,8 @@ class CsvEncoder implements EncoderInterface, DecoderInterface
4043
self::HEADERS_KEY => [],
4144
self::KEY_SEPARATOR_KEY => '.',
4245
self::NO_HEADERS_KEY => false,
46+
self::OUTPUT_BOM => '',
47+
self::SKIP_INPUT_BOM => true,
4348
];
4449

4550
/**
@@ -90,7 +95,7 @@ public function encode($data, $format, array $context = [])
9095
}
9196
}
9297

93-
list($delimiter, $enclosure, $escapeChar, $keySeparator, $headers, $escapeFormulas) = $this->getCsvOptions($context);
98+
list($delimiter, $enclosure, $escapeChar, $keySeparator, $headers, $escapeFormulas, $outputBOM) = $this->getCsvOptions($context);
9499

95100
foreach ($data as &$value) {
96101
$flattened = [];
@@ -101,6 +106,8 @@ public function encode($data, $format, array $context = [])
101106

102107
$headers = array_merge(array_values($headers), array_diff($this->extractHeaders($data), $headers));
103108

109+
fwrite($handle, $outputBOM);
110+
104111
if (!($context[self::NO_HEADERS_KEY] ?? false)) {
105112
fputcsv($handle, $headers, $delimiter, $enclosure, $escapeChar);
106113
}
@@ -134,6 +141,11 @@ public function decode($data, $format, array $context = [])
134141
fwrite($handle, $data);
135142
rewind($handle);
136143

144+
if (($context[self::SKIP_INPUT_BOM] ?? true) === false) {
145+
$inputBom = $this->determineBom(substr($data, 0, 4));
146+
fseek($handle, strlen($inputBom));
147+
}
148+
137149
$headers = null;
138150
$nbHeaders = 0;
139151
$headerCount = [];
@@ -238,12 +250,13 @@ private function getCsvOptions(array $context): array
238250
$keySeparator = $context[self::KEY_SEPARATOR_KEY] ?? $this->defaultContext[self::KEY_SEPARATOR_KEY];
239251
$headers = $context[self::HEADERS_KEY] ?? $this->defaultContext[self::HEADERS_KEY];
240252
$escapeFormulas = $context[self::ESCAPE_FORMULAS_KEY] ?? $this->defaultContext[self::ESCAPE_FORMULAS_KEY];
253+
$outputBOM = $context[self::OUTPUT_BOM] ?? $this->defaultContext[self::OUTPUT_BOM];
241254

242255
if (!\is_array($headers)) {
243256
throw new InvalidArgumentException(sprintf('The "%s" context variable must be an array or null, given "%s".', self::HEADERS_KEY, \gettype($headers)));
244257
}
245258

246-
return [$delimiter, $enclosure, $escapeChar, $keySeparator, $headers, $escapeFormulas];
259+
return [$delimiter, $enclosure, $escapeChar, $keySeparator, $headers, $escapeFormulas, $outputBOM];
247260
}
248261

249262
/**
@@ -281,4 +294,18 @@ private function extractHeaders(iterable $data): array
281294

282295
return $headers;
283296
}
297+
298+
private function determineBom(string $text): string
299+
{
300+
static $list;
301+
302+
$list = $list ?? (new \ReflectionClass(ByteOrderMark::class))->getConstants();
303+
foreach ($list as $sequence) {
304+
if (0 === strpos($text, $sequence)) {
305+
return $sequence;
306+
}
307+
}
308+
309+
return '';
310+
}
284311
}

src/Symfony/Component/Serializer/Tests/Encoder/CsvEncoderTest.php

+26
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
namespace Symfony\Component\Serializer\Tests\Encoder;
1313

1414
use PHPUnit\Framework\TestCase;
15+
use Symfony\Component\Serializer\ByteOrderMark;
1516
use Symfony\Component\Serializer\Encoder\CsvEncoder;
1617

1718
/**
@@ -595,4 +596,29 @@ public function testDecodeWithoutHeader()
595596
CsvEncoder::NO_HEADERS_KEY => true,
596597
]));
597598
}
599+
600+
public function testBOMIsAddedOnDemand()
601+
{
602+
$value = ['foo' => 'hello', 'bar' => 'hey ho'];
603+
604+
$this->assertEquals(ByteOrderMark::BOM_UTF8 . <<<'CSV'
605+
foo,bar
606+
hello,"hey ho"
607+
608+
CSV
609+
, $this->encoder->encode($value, 'csv', [CsvEncoder::OUTPUT_BOM => ByteOrderMark::BOM_UTF8]));
610+
}
611+
612+
public function testBOMIsStripped()
613+
{
614+
$csv = ByteOrderMark::BOM_UTF8 . <<<'CSV'
615+
foo,bar
616+
hello,"hey ho"
617+
618+
CSV;
619+
$this->assertEquals(
620+
['foo' => 'hello', 'bar' => 'hey ho'],
621+
$this->encoder->decode($csv, 'csv', [CsvEncoder::SKIP_INPUT_BOM => false])
622+
);
623+
}
598624
}

0 commit comments

Comments
 (0)