Skip to content

[Translator][Loader] added XLIFF 2.0 support. #15717

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Sep 16, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/Symfony/Component/Translation/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ CHANGELOG

* deprecated Translator::getMessages(), rely on TranslatorBagInterface::getCatalogue() instead.
* added options `as_tree`, `inline` to YamlFileDumper
* added support for XLIFF 2.0.
* added support for XLIFF target and tool attributes.
* added message parameters to DataCollectorTranslator.
* [DEPRECATION] The `DiffOperation` class has been deprecated and
Expand Down
83 changes: 71 additions & 12 deletions src/Symfony/Component/Translation/Dumper/XliffFileDumper.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,45 @@ class XliffFileDumper extends FileDumper
*/
protected function formatCatalogue(MessageCatalogue $messages, $domain, array $options = array())
{
$xliffVersion = '1.2';
if (array_key_exists('xliff_version', $options)) {
$xliffVersion = $options['xliff_version'];
}

if (array_key_exists('default_locale', $options)) {
$defaultLocale = $options['default_locale'];
} else {
$defaultLocale = \Locale::getDefault();
}

if ('1.2' === $xliffVersion) {
return $this->dumpXliff1($defaultLocale, $messages, $domain, $options);
}
if ('2.0' === $xliffVersion) {
return $this->dumpXliff2($defaultLocale, $messages, $domain, $options);
}

throw new \InvalidArgumentException(sprintf('No support implemented for dumping XLIFF version "%s".', $xliffVersion));
}

/**
* {@inheritdoc}
*/
protected function format(MessageCatalogue $messages, $domain)
{
return $this->formatCatalogue($messages, $domain);
}

/**
* {@inheritdoc}
*/
protected function getExtension()
{
return 'xlf';
}

private function dumpXliff1($defaultLocale, MessageCatalogue $messages, $domain, array $options = array())
{
$toolInfo = array('tool-id' => 'symfony', 'tool-name' => 'Symfony');
if (array_key_exists('tool_info', $options)) {
$toolInfo = array_merge($toolInfo, $options['tool_info']);
Expand Down Expand Up @@ -103,20 +136,46 @@ protected function formatCatalogue(MessageCatalogue $messages, $domain, array $o
return $dom->saveXML();
}

/**
* {@inheritdoc}
*/
protected function format(MessageCatalogue $messages, $domain)
private function dumpXliff2($defaultLocale, MessageCatalogue $messages, $domain, array $options = array())
{
return $this->formatCatalogue($messages, $domain);
}
$dom = new \DOMDocument('1.0', 'utf-8');
$dom->formatOutput = true;

/**
* {@inheritdoc}
*/
protected function getExtension()
{
return 'xlf';
$xliff = $dom->appendChild($dom->createElement('xliff'));
$xliff->setAttribute('xmlns', 'urn:oasis:names:tc:xliff:document:2.0');
$xliff->setAttribute('version', '2.0');
$xliff->setAttribute('srcLang', str_replace('_', '-', $defaultLocale));
$xliff->setAttribute('trgLang', str_replace('_', '-', $messages->getLocale()));

$xliffFile = $xliff->appendChild($dom->createElement('file'));
$xliffFile->setAttribute('id', $domain.'.'.$messages->getLocale());

foreach ($messages->all($domain) as $source => $target) {
$translation = $dom->createElement('unit');
$translation->setAttribute('id', md5($source));

$segment = $translation->appendChild($dom->createElement('segment'));

$s = $segment->appendChild($dom->createElement('source'));
$s->appendChild($dom->createTextNode($source));

// Does the target contain characters requiring a CDATA section?
$text = 1 === preg_match('/[&<>]/', $target) ? $dom->createCDATASection($target) : $dom->createTextNode($target);

$targetElement = $dom->createElement('target');
$metadata = $messages->getMetadata($source, $domain);
if ($this->hasMetadataArrayInfo('target-attributes', $metadata)) {
foreach ($metadata['target-attributes'] as $name => $value) {
$targetElement->setAttribute($name, $value);
}
}
$t = $segment->appendChild($targetElement);
$t->appendChild($text);

$xliffFile->appendChild($translation);
}

return $dom->saveXML();
}

/**
Expand Down
193 changes: 154 additions & 39 deletions src/Symfony/Component/Translation/Loader/XliffFileLoader.php
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,49 @@ public function load($resource, $locale, $domain = 'messages')
throw new NotFoundResourceException(sprintf('File "%s" not found.', $resource));
}

list($xml, $encoding) = $this->parseFile($resource);
$xml->registerXPathNamespace('xliff', 'urn:oasis:names:tc:xliff:document:1.2');

$catalogue = new MessageCatalogue($locale);
$this->extract($resource, $catalogue, $domain);

if (class_exists('Symfony\Component\Config\Resource\FileResource')) {
$catalogue->addResource(new FileResource($resource));
}

return $catalogue;
}

private function extract($resource, MessageCatalogue $catalogue, $domain)
{
try {
$dom = XmlUtils::loadFile($resource);
} catch (\InvalidArgumentException $e) {
throw new InvalidResourceException(sprintf('Unable to load "%s": %s', $resource, $e->getMessage()), $e->getCode(), $e);
}

$xliffVersion = $this->getVersionNumber($dom);
$this->validateSchema($xliffVersion, $dom, $this->getSchema($xliffVersion));

if ('1.2' === $xliffVersion) {
$this->extractXliff1($dom, $catalogue, $domain);
}

if ('2.0' === $xliffVersion) {
$this->extractXliff2($dom, $catalogue, $domain);
}
}

/**
* Extract messages and metadata from DOMDocument into a MessageCatalogue.
*
* @param \DOMDocument $dom Source to extract messages and metadata
* @param MessageCatalogue $catalogue Catalogue where we'll collect messages and metadata
* @param string $domain The domain
*/
private function extractXliff1(\DOMDocument $dom, MessageCatalogue $catalogue, $domain)
{
$xml = simplexml_import_dom($dom);
$encoding = strtoupper($dom->encoding);

$xml->registerXPathNamespace('xliff', 'urn:oasis:names:tc:xliff:document:1.2');
foreach ($xml->xpath('//xliff:trans-unit') as $translation) {
$attributes = $translation->attributes();

Expand All @@ -64,17 +103,47 @@ public function load($resource, $locale, $domain = 'messages')
$metadata['notes'] = $notes;
}
if (isset($translation->target) && $translation->target->attributes()) {
$metadata['target-attributes'] = $translation->target->attributes();
$metadata['target-attributes'] = array();
foreach ($translation->target->attributes() as $key => $value) {
$metadata['target-attributes'][$key] = (string) $value;
}
}

$catalogue->setMetadata((string) $source, $metadata, $domain);
}
}

if (class_exists('Symfony\Component\Config\Resource\FileResource')) {
$catalogue->addResource(new FileResource($resource));
}
/**
* @param \DOMDocument $dom
* @param MessageCatalogue $catalogue
* @param string $domain
*/
private function extractXliff2(\DOMDocument $dom, MessageCatalogue $catalogue, $domain)
{
$xml = simplexml_import_dom($dom);
$encoding = strtoupper($dom->encoding);

return $catalogue;
$xml->registerXPathNamespace('xliff', 'urn:oasis:names:tc:xliff:document:2.0');

foreach ($xml->xpath('//xliff:unit/xliff:segment') as $segment) {
$source = $segment->source;

// If the xlf file has another encoding specified, try to convert it because
// simple_xml will always return utf-8 encoded values
$target = $this->utf8ToCharset((string) (isset($segment->target) ? $segment->target : $source), $encoding);

$catalogue->set((string) $source, $target, $domain);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

support for notes and target attributes should be added for XLiff 2 for the equivalent features

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok I'll look into it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I only added support for target attributes
but for notes there's no way to do a mapping because aren't assigned to specific segments, but to whole units.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@stof what do you think to take account of notes only when a unit contains one segment and ignore other cases ?

<unit id="1">
    <notes>
        <note priority="1" appliesTo="target">foo</note>
    </notes>
    <segment>
        <source>foo</source>
        <target>foo</target>
    </segment>
</unit>


$metadata = array();
if (isset($segment->target) && $segment->target->attributes()) {
$metadata['target-attributes'] = array();
foreach ($segment->target->attributes() as $key => $value) {
$metadata['target-attributes'][$key] = (string) $value;
}
}

$catalogue->setMetadata((string) $source, $metadata, $domain);
}
}

/**
Expand Down Expand Up @@ -103,51 +172,64 @@ private function utf8ToCharset($content, $encoding = null)
}

/**
* Validates and parses the given file into a SimpleXMLElement.
*
* @param string $file
*
* @throws \RuntimeException
*
* @return \SimpleXMLElement
* @param string $file
* @param \DOMDocument $dom
* @param string $schema source of the schema
*
* @throws InvalidResourceException
*/
private function parseFile($file)
private function validateSchema($file, \DOMDocument $dom, $schema)
{
try {
$dom = XmlUtils::loadFile($file);
} catch (\InvalidArgumentException $e) {
throw new InvalidResourceException(sprintf('Unable to load "%s": %s', $file, $e->getMessage()), $e->getCode(), $e);
}

$internalErrors = libxml_use_internal_errors(true);

$location = str_replace('\\', '/', __DIR__).'/schema/dic/xliff-core/xml.xsd';
$parts = explode('/', $location);
if (0 === stripos($location, 'phar://')) {
$tmpfile = tempnam(sys_get_temp_dir(), 'sf2');
if ($tmpfile) {
copy($location, $tmpfile);
$parts = explode('/', str_replace('\\', '/', $tmpfile));
}
}
$drive = '\\' === DIRECTORY_SEPARATOR ? array_shift($parts).'/' : '';
$location = 'file:///'.$drive.implode('/', array_map('rawurlencode', $parts));

$source = file_get_contents(__DIR__.'/schema/dic/xliff-core/xliff-core-1.2-strict.xsd');
$source = str_replace('http://www.w3.org/2001/xml.xsd', $location, $source);

if (!@$dom->schemaValidateSource($source)) {
if (!@$dom->schemaValidateSource($schema)) {
throw new InvalidResourceException(sprintf('Invalid resource provided: "%s"; Errors: %s', $file, implode("\n", $this->getXmlErrors($internalErrors))));
}

$dom->normalizeDocument();

libxml_clear_errors();
libxml_use_internal_errors($internalErrors);
}

private function getSchema($xliffVersion)
{
if ('1.2' === $xliffVersion) {
$schemaSource = file_get_contents(__DIR__.'/schema/dic/xliff-core/xliff-core-1.2-strict.xsd');
$xmlUri = 'http://www.w3.org/2001/xml.xsd';
} elseif ('2.0' === $xliffVersion) {
$schemaSource = file_get_contents(__DIR__.'/schema/dic/xliff-core/xliff-core-2.0.xsd');
$xmlUri = 'informativeCopiesOf3rdPartySchemas/w3c/xml.xsd';
} else {
throw new \InvalidArgumentException(sprintf('No support implemented for loading XLIFF version "%s".', $xliffVersion));
}

return array(simplexml_import_dom($dom), strtoupper($dom->encoding));
return $this->fixXmlLocation($schemaSource, $xmlUri);
}

/**
* Internally changes the URI of a dependent xsd to be loaded locally.
*
* @param string $schemaSource Current content of schema file
* @param string $xmlUri External URI of XML to convert to local
*
* @return string
*/
private function fixXmlLocation($schemaSource, $xmlUri)
{
$newPath = str_replace('\\', '/', __DIR__).'/schema/dic/xliff-core/xml.xsd';
$parts = explode('/', $newPath);
if (0 === stripos($newPath, 'phar://')) {
$tmpfile = tempnam(sys_get_temp_dir(), 'sf2');
if ($tmpfile) {
copy($newPath, $tmpfile);
$parts = explode('/', str_replace('\\', '/', $tmpfile));
}
}
$drive = '\\' === DIRECTORY_SEPARATOR ? array_shift($parts).'/' : '';
$newPath = 'file:///'.$drive.implode('/', array_map('rawurlencode', $parts));

return str_replace($xmlUri, $newPath, $schemaSource);
}

/**
Expand Down Expand Up @@ -178,6 +260,39 @@ private function getXmlErrors($internalErrors)
}

/**
* Gets xliff file version based on the root "version" attribute.
* Defaults to 1.2 for backwards compatibility.
*
* @param \DOMDocument $dom
*
* @throws \InvalidArgumentException
*
* @return string
*/
private function getVersionNumber(\DOMDocument $dom)
{
/** @var \DOMNode $xliff */
foreach ($dom->getElementsByTagName('xliff') as $xliff) {
$version = $xliff->attributes->getNamedItem('version');
if ($version) {
return $version->nodeValue;
}

$namespace = $xliff->attributes->getNamedItem('xmlns');
if ($namespace) {
if (substr_compare('urn:oasis:names:tc:xliff:document:', $namespace->nodeValue, 0, 34) !== 0) {
throw new \InvalidArgumentException(sprintf('Not a valid XLIFF namespace "%s"', $namespace));
}

return substr($namespace, 34);
}
}

// Falls back to v1.2
return '1.2';
}

/*
* @param \SimpleXMLElement|null $noteElement
* @param string|null $encoding
*
Expand Down
Loading