-
-
Notifications
You must be signed in to change notification settings - Fork 9.6k
CsvEncoder handling variable structures and custom header order #24256
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ | |
* Encodes CSV data. | ||
* | ||
* @author Kévin Dunglas <dunglas@gmail.com> | ||
* @author Oliver Hoff <oliver@hofff.com> | ||
*/ | ||
class CsvEncoder implements EncoderInterface, DecoderInterface | ||
{ | ||
|
@@ -25,6 +26,7 @@ class CsvEncoder implements EncoderInterface, DecoderInterface | |
const ENCLOSURE_KEY = 'csv_enclosure'; | ||
const ESCAPE_CHAR_KEY = 'csv_escape_char'; | ||
const KEY_SEPARATOR_KEY = 'csv_key_separator'; | ||
const HEADERS_KEY = 'csv_headers'; | ||
|
||
private $delimiter; | ||
private $enclosure; | ||
|
@@ -69,21 +71,22 @@ public function encode($data, $format, array $context = array()) | |
} | ||
} | ||
|
||
list($delimiter, $enclosure, $escapeChar, $keySeparator) = $this->getCsvOptions($context); | ||
list($delimiter, $enclosure, $escapeChar, $keySeparator, $headers) = $this->getCsvOptions($context); | ||
|
||
$headers = null; | ||
foreach ($data as $value) { | ||
$result = array(); | ||
$this->flatten($value, $result, $keySeparator); | ||
foreach ($data as &$value) { | ||
$flattened = array(); | ||
$this->flatten($value, $flattened, $keySeparator); | ||
$value = $flattened; | ||
} | ||
unset($value); | ||
|
||
if (null === $headers) { | ||
$headers = array_keys($result); | ||
fputcsv($handle, $headers, $delimiter, $enclosure, $escapeChar); | ||
} elseif (array_keys($result) !== $headers) { | ||
throw new InvalidArgumentException('To use the CSV encoder, each line in the data array must have the same structure. You may want to use a custom normalizer class to normalize the data format before passing it to the CSV encoder.'); | ||
} | ||
$headers = array_merge(array_values($headers), array_diff($this->extractHeaders($data), $headers)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Computing the header list dynamically like you've done is very interesting but it can be very costly on large datasets. Shouldn't it introduced as an option? It would be nice to be able to choose between the previous behavior and your implementation. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it should be in the same complexity class as the rest of the encoder. we can reduce the cost a little bit by removing the |
||
|
||
fputcsv($handle, $headers, $delimiter, $enclosure, $escapeChar); | ||
|
||
fputcsv($handle, $result, $delimiter, $enclosure, $escapeChar); | ||
$headers = array_fill_keys($headers, ''); | ||
foreach ($data as $row) { | ||
fputcsv($handle, array_replace($headers, $row), $delimiter, $enclosure, $escapeChar); | ||
} | ||
|
||
rewind($handle); | ||
|
@@ -194,7 +197,50 @@ private function getCsvOptions(array $context) | |
$enclosure = isset($context[self::ENCLOSURE_KEY]) ? $context[self::ENCLOSURE_KEY] : $this->enclosure; | ||
$escapeChar = isset($context[self::ESCAPE_CHAR_KEY]) ? $context[self::ESCAPE_CHAR_KEY] : $this->escapeChar; | ||
$keySeparator = isset($context[self::KEY_SEPARATOR_KEY]) ? $context[self::KEY_SEPARATOR_KEY] : $this->keySeparator; | ||
$headers = isset($context[self::HEADERS_KEY]) ? $context[self::HEADERS_KEY] : array(); | ||
|
||
if (!is_array($headers)) { | ||
throw new InvalidArgumentException(sprintf('The "%s" context variable must be an array or null, given "%s".', self::HEADERS_KEY, gettype($headers))); | ||
} | ||
|
||
return array($delimiter, $enclosure, $escapeChar, $keySeparator, $headers); | ||
} | ||
|
||
/** | ||
* @param array $data | ||
* | ||
* @return string[] | ||
*/ | ||
private function extractHeaders(array $data) | ||
{ | ||
$headers = array(); | ||
$flippedHeaders = array(); | ||
|
||
foreach ($data as $row) { | ||
$previousHeader = null; | ||
|
||
foreach ($row as $header => $_) { | ||
if (isset($flippedHeaders[$header])) { | ||
$previousHeader = $header; | ||
continue; | ||
} | ||
|
||
if (null === $previousHeader) { | ||
$n = count($headers); | ||
} else { | ||
$n = $flippedHeaders[$previousHeader] + 1; | ||
|
||
for ($j = count($headers); $j > $n; --$j) { | ||
++$flippedHeaders[$headers[$j] = $headers[$j - 1]]; | ||
} | ||
} | ||
|
||
$headers[$n] = $header; | ||
$flippedHeaders[$header] = $n; | ||
$previousHeader = $header; | ||
} | ||
} | ||
|
||
return array($delimiter, $enclosure, $escapeChar, $keySeparator); | ||
return $headers; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -135,12 +135,42 @@ public function testEncodeEmptyArray() | |
$this->assertEquals("\n\n", $this->encoder->encode(array(array()), 'csv')); | ||
} | ||
|
||
/** | ||
* @expectedException \Symfony\Component\Serializer\Exception\InvalidArgumentException | ||
*/ | ||
public function testEncodeNonFlattenableStructure() | ||
public function testEncodeVariableStructure() | ||
{ | ||
$value = array( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about: array(
'a' => array('a', 'b'),
'a' => 'foo',
) This case looks not easy to handle, it's one of the reasons of the current behavior. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. actually this one gets handled just fine, but the result may be a little bit unexpected:
i dont think we need to handle this differently in the encoder, because that one is indeed a custom normalizers job There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The output is a bit weird but why not. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. btw this is more a problem of the flattening algorithm, which is untouched by the PR. [
['a' => ['foo', 'bar'], 'a.0' => 'baz'],
] |
||
array('a' => array('foo', 'bar')), | ||
array('a' => array(), 'b' => 'baz'), | ||
array('a' => array('bar', 'foo'), 'c' => 'pong'), | ||
); | ||
$csv = <<<CSV | ||
a.0,a.1,c,b | ||
foo,bar,, | ||
,,,baz | ||
bar,foo,pong, | ||
|
||
CSV; | ||
|
||
$this->assertEquals($csv, $this->encoder->encode($value, 'csv')); | ||
} | ||
|
||
public function testEncodeCustomHeaders() | ||
{ | ||
$this->encoder->encode(array(array('a' => array('foo', 'bar')), array('a' => array())), 'csv'); | ||
$context = array( | ||
CsvEncoder::HEADERS_KEY => array( | ||
'b', | ||
'c', | ||
), | ||
); | ||
$value = array( | ||
array('a' => 'foo', 'b' => 'bar'), | ||
); | ||
$csv = <<<CSV | ||
b,c,a | ||
bar,,foo | ||
|
||
CSV; | ||
|
||
$this->assertEquals($csv, $this->encoder->encode($value, 'csv', $context)); | ||
} | ||
|
||
public function testSupportsDecoding() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What's the benefit of unsetting this var explicitly?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
its a cleanup of the dangling reference var. if one would do
$value = 'foo'
after the loop, he would overwrite the last iterated item in the array.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes but
$value
is not accessed after.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
its more of a safety for futher modifications (i do it always as a best practice), this gets lost fast and could cause confusion (though in this case is covered by testcases)