Skip to content

[Routing] Add seamless support for unicode requirements #19604

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 25, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/Symfony/Component/Routing/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ CHANGELOG
-----

* Added support for `bool`, `int`, `float`, `string`, `list` and `map` defaults in XML configurations.
* Added support for UTF-8 requirements

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unicode is a proper noun, so it should be capitalized.

2.8.0
-----
Expand Down
4 changes: 2 additions & 2 deletions src/Symfony/Component/Routing/Generator/UrlGenerator.php
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ protected function doGenerate($variables, $defaults, $requirements, $tokens, $pa
if ('variable' === $token[0]) {
if (!$optional || !array_key_exists($token[3], $defaults) || null !== $mergedParams[$token[3]] && (string) $mergedParams[$token[3]] !== (string) $defaults[$token[3]]) {
// check requirement
if (null !== $this->strictRequirements && !preg_match('#^'.$token[2].'$#', $mergedParams[$token[3]])) {
if (null !== $this->strictRequirements && !preg_match('#^'.$token[2].'$#'.(empty($token[4]) ? '' : 'u'), $mergedParams[$token[3]])) {
if ($this->strictRequirements) {
throw new InvalidParameterException(strtr($message, array('{parameter}' => $token[3], '{route}' => $name, '{expected}' => $token[2], '{given}' => $mergedParams[$token[3]])));
}
Expand Down Expand Up @@ -212,7 +212,7 @@ protected function doGenerate($variables, $defaults, $requirements, $tokens, $pa
$routeHost = '';
foreach ($hostTokens as $token) {
if ('variable' === $token[0]) {
if (null !== $this->strictRequirements && !preg_match('#^'.$token[2].'$#i', $mergedParams[$token[3]])) {
if (null !== $this->strictRequirements && !preg_match('#^'.$token[2].'$#i'.(empty($token[4]) ? '' : 'u'), $mergedParams[$token[3]])) {
if ($this->strictRequirements) {
throw new InvalidParameterException(strtr($message, array('{parameter}' => $token[3], '{route}' => $name, '{expected}' => $token[2], '{given}' => $mergedParams[$token[3]])));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,9 @@ private function compileRoute(Route $route, $name, $supportsRedirections, $paren
}

$supportsTrailingSlash = $supportsRedirections && (!$methods || in_array('HEAD', $methods));
$regex = $compiledRoute->getRegex();

if (!count($compiledRoute->getPathVariables()) && false !== preg_match('#^(.)\^(?P<url>.*?)\$\1#', $compiledRoute->getRegex(), $m)) {
if (!count($compiledRoute->getPathVariables()) && false !== preg_match('#^(.)\^(?P<url>.*?)\$\1#'.(substr($regex, -1) === 'u' ? 'u' : ''), $regex, $m)) {
if ($supportsTrailingSlash && substr($m['url'], -1) === '/') {
$conditions[] = sprintf("rtrim(\$pathinfo, '/') === %s", var_export(rtrim(str_replace('\\', '', $m['url']), '/'), true));
$hasTrailingSlash = true;
Expand All @@ -236,7 +237,6 @@ private function compileRoute(Route $route, $name, $supportsRedirections, $paren
$conditions[] = sprintf('0 === strpos($pathinfo, %s)', var_export($compiledRoute->getStaticPrefix(), true));
}

$regex = $compiledRoute->getRegex();
if ($supportsTrailingSlash && $pos = strpos($regex, '/$')) {
$regex = substr($regex, 0, $pos).'/?$'.substr($regex, $pos + 2);
$hasTrailingSlash = true;
Expand Down
1 change: 1 addition & 0 deletions src/Symfony/Component/Routing/Route.php
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ class Route implements \Serializable
* Available options:
*
* * compiler_class: A class name able to compile this route instance (RouteCompiler by default)
* * utf8: Whether UTF-8 matching is enforced ot not
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

utf8 is now an option on the route. ping @stof (I don't know were I've got this idea that it wasn't possible...)

*
* @param string $path The path pattern to match
* @param array $defaults An array of default parameter values
Expand Down
61 changes: 53 additions & 8 deletions src/Symfony/Component/Routing/RouteCompiler.php
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,16 @@ private static function compilePattern(Route $route, $pattern, $isHost)
$matches = array();
$pos = 0;
$defaultSeparator = $isHost ? '.' : '/';
$useUtf8 = preg_match('//u', $pattern);
$needsUtf8 = $route->getOption('utf8');

if (!$needsUtf8 && $useUtf8 && preg_match('/[\x80-\xFF]/', $pattern)) {
$needsUtf8 = true;
@trigger_error(sprintf('Using UTF-8 route patterns without setting the "utf8" option is deprecated since Symfony 3.2 and will throw a LogicException in 4.0. Turn on the "utf8" route option for pattern "%s".', $pattern), E_USER_DEPRECATED);
}
if (!$useUtf8 && $needsUtf8) {
throw new \LogicException(sprintf('Cannot mix UTF-8 requirements with non-UTF-8 pattern "%s".', $pattern));
}

// Match all variables enclosed in "{}" and iterate over them. But we only want to match the innermost variable
// in case of nested "{}", e.g. {foo{bar}}. This in ensured because \w does not match "{" or "}" itself.
Expand All @@ -100,7 +110,15 @@ private static function compilePattern(Route $route, $pattern, $isHost)
// get all static text preceding the current variable
$precedingText = substr($pattern, $pos, $match[0][1] - $pos);
$pos = $match[0][1] + strlen($match[0][0]);
$precedingChar = strlen($precedingText) > 0 ? substr($precedingText, -1) : '';

if (!strlen($precedingText)) {
$precedingChar = '';
} elseif ($useUtf8) {
preg_match('/.$/u', $precedingText, $precedingChar);
$precedingChar = $precedingChar[0];
} else {
$precedingChar = substr($precedingText, -1);
}
$isSeparator = '' !== $precedingChar && false !== strpos(static::SEPARATORS, $precedingChar);

if (is_numeric($varName)) {
Expand All @@ -110,8 +128,8 @@ private static function compilePattern(Route $route, $pattern, $isHost)
throw new \LogicException(sprintf('Route pattern "%s" cannot reference variable name "%s" more than once.', $pattern, $varName));
}

if ($isSeparator && strlen($precedingText) > 1) {
$tokens[] = array('text', substr($precedingText, 0, -1));
if ($isSeparator && $precedingText !== $precedingChar) {
$tokens[] = array('text', substr($precedingText, 0, -strlen($precedingChar)));
} elseif (!$isSeparator && strlen($precedingText) > 0) {
$tokens[] = array('text', $precedingText);
}
Expand All @@ -126,7 +144,7 @@ private static function compilePattern(Route $route, $pattern, $isHost)
// If {page} would also match the separating dot, {_format} would never match as {page} will eagerly consume everything.
// Also even if {_format} was not optional the requirement prevents that {page} matches something that was originally
// part of {_format} when generating the URL, e.g. _format = 'mobile.html'.
$nextSeparator = self::findNextSeparator($followingPattern);
$nextSeparator = self::findNextSeparator($followingPattern, $useUtf8);
$regexp = sprintf(
'[^%s%s]+',
preg_quote($defaultSeparator, self::REGEX_DELIMITER),
Expand All @@ -140,6 +158,16 @@ private static function compilePattern(Route $route, $pattern, $isHost)
// directly adjacent, e.g. '/{x}{y}'.
$regexp .= '+';
}
} else {
if (!preg_match('//u', $regexp)) {
$useUtf8 = false;
} elseif (!$needsUtf8 && preg_match('/[\x80-\xFF]|(?<!\\\\)\\\\(?:\\\\\\\\)*+(?-i:X|[pP][\{CLMNPSZ]|x\{[A-Fa-f0-9]{3})/', $regexp)) {
$needsUtf8 = true;
@trigger_error(sprintf('Using UTF-8 route requirements without setting the "utf8" option is deprecated since Symfony 3.2 and will throw a LogicException in 4.0. Turn on the "utf8" route option for variable "%s" in pattern "%s".', $varName, $pattern), E_USER_DEPRECATED);
}
if (!$useUtf8 && $needsUtf8) {
throw new \LogicException(sprintf('Cannot mix UTF-8 requirement with non-UTF-8 charset for variable "%s" in pattern "%s".', $varName, $pattern));
}
}

$tokens[] = array('variable', $isSeparator ? $precedingChar : '', $regexp, $varName);
Expand Down Expand Up @@ -168,10 +196,21 @@ private static function compilePattern(Route $route, $pattern, $isHost)
for ($i = 0, $nbToken = count($tokens); $i < $nbToken; ++$i) {
$regexp .= self::computeRegexp($tokens, $i, $firstOptional);
}
$regexp = self::REGEX_DELIMITER.'^'.$regexp.'$'.self::REGEX_DELIMITER.'s'.($isHost ? 'i' : '');

// enable Utf8 matching if really required
if ($needsUtf8) {
$regexp .= 'u';
for ($i = 0, $nbToken = count($tokens); $i < $nbToken; ++$i) {
if ('variable' === $tokens[$i][0]) {
$tokens[$i][] = true;
}
}
}

return array(
'staticPrefix' => 'text' === $tokens[0][0] ? $tokens[0][1] : '',
'regex' => self::REGEX_DELIMITER.'^'.$regexp.'$'.self::REGEX_DELIMITER.'s'.($isHost ? 'i' : ''),
'regex' => $regexp,
'tokens' => array_reverse($tokens),
'variables' => $variables,
);
Expand All @@ -181,19 +220,25 @@ private static function compilePattern(Route $route, $pattern, $isHost)
* Returns the next static character in the Route pattern that will serve as a separator.
*
* @param string $pattern The route pattern
* @param bool $useUtf8 Whether the character is encoded in UTF-8 or not
*
* @return string The next static character that functions as separator (or empty string when none available)
*/
private static function findNextSeparator($pattern)
private static function findNextSeparator($pattern, $useUtf8)
{
if ('' == $pattern) {
// return empty string if pattern is empty or false (false which can be returned by substr)
return '';
}
// first remove all placeholders from the pattern so we can find the next real static character
$pattern = preg_replace('#\{\w+\}#', '', $pattern);
if ('' === $pattern = preg_replace('#\{\w+\}#', '', $pattern)) {
return '';
}
if ($useUtf8) {
preg_match('/^./u', $pattern, $pattern);
}

return isset($pattern[0]) && false !== strpos(static::SEPARATORS, $pattern[0]) ? $pattern[0] : '';
return false !== strpos(static::SEPARATORS, $pattern[0]) ? $pattern[0] : '';
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,15 @@ public function testGenerateForRouteWithInvalidMandatoryParameter()
$this->getGenerator($routes)->generate('test', array('foo' => 'bar'), UrlGeneratorInterface::ABSOLUTE_URL);
}

/**
* @expectedException \Symfony\Component\Routing\Exception\InvalidParameterException
*/
public function testGenerateForRouteWithInvalidUtf8Parameter()
{
$routes = $this->getRoutes('test', new Route('/testing/{foo}', array(), array('foo' => '\pL+'), array('utf8' => true)));
$this->getGenerator($routes)->generate('test', array('foo' => 'abc123'), UrlGeneratorInterface::ABSOLUTE_URL);
}

/**
* @expectedException \Symfony\Component\Routing\Exception\InvalidParameterException
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ public function testMatchNonAlpha()
{
$collection = new RouteCollection();
$chars = '!"$%éà &\'()*+,./:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ\\[]^_`abcdefghijklmnopqrstuvwxyz{|}~-';
$collection->add('foo', new Route('/{foo}/bar', array(), array('foo' => '['.preg_quote($chars).']+')));
$collection->add('foo', new Route('/{foo}/bar', array(), array('foo' => '['.preg_quote($chars).']+'), array('utf8' => true)));

$matcher = new UrlMatcher($collection, new RequestContext());
$this->assertEquals(array('_route' => 'foo', 'foo' => $chars), $matcher->match('/'.rawurlencode($chars).'/bar'));
Expand Down
108 changes: 108 additions & 0 deletions src/Symfony/Component/Routing/Tests/RouteCompilerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@

namespace Symfony\Component\Routing\Tests;

use Symfony\Bridge\PhpUnit\ErrorAssert;
use Symfony\Component\Routing\Route;
use Symfony\Component\Routing\RouteCompiler;

class RouteCompilerTest extends \PHPUnit_Framework_TestCase
{
Expand Down Expand Up @@ -162,6 +164,87 @@ public function provideCompileData()
array('text', '/foo'),
),
),

array(
'Static non UTF-8 route',
array("/fo\xE9"),
"/fo\xE9", "#^/fo\xE9$#s", array(), array(
array('text', "/fo\xE9"),
),
),

array(
'Route with an explicit UTF-8 requirement',
array('/{bar}', array('bar' => null), array('bar' => '.'), array('utf8' => true)),
'', '#^/(?P<bar>.)?$#su', array('bar'), array(
array('variable', '/', '.', 'bar', true),
),
),
);
}

/**
* @dataProvider provideCompileImplicitUtf8Data
* @requires function Symfony\Bridge\PhpUnit\ErrorAssert::assertDeprecationsAreTriggered
*/
public function testCompileImplicitUtf8Data($name, $arguments, $prefix, $regex, $variables, $tokens, $deprecationType)
{
$deprecations = array(
sprintf('Using UTF-8 route %s without setting the "utf8" option is deprecated', $deprecationType),
);

ErrorAssert::assertDeprecationsAreTriggered($deprecations, function () use ($name, $arguments, $prefix, $regex, $variables, $tokens) {
$r = new \ReflectionClass('Symfony\\Component\\Routing\\Route');
$route = $r->newInstanceArgs($arguments);

$compiled = $route->compile();
$this->assertEquals($prefix, $compiled->getStaticPrefix(), $name.' (static prefix)');
$this->assertEquals($regex, $compiled->getRegex(), $name.' (regex)');
$this->assertEquals($variables, $compiled->getVariables(), $name.' (variables)');
$this->assertEquals($tokens, $compiled->getTokens(), $name.' (tokens)');
});
}

public function provideCompileImplicitUtf8Data()
{
return array(
array(
'Static UTF-8 route',
array('/foé'),
'/foé', '#^/foé$#su', array(), array(
array('text', '/foé'),
),
'patterns',
),

array(
'Route with an implicit UTF-8 requirement',
array('/{bar}', array('bar' => null), array('bar' => 'é')),
'', '#^/(?P<bar>é)?$#su', array('bar'), array(
array('variable', '/', 'é', 'bar', true),
),
'requirements',
),

array(
'Route with a UTF-8 class requirement',
array('/{bar}', array('bar' => null), array('bar' => '\pM')),
'', '#^/(?P<bar>\pM)?$#su', array('bar'), array(
array('variable', '/', '\pM', 'bar', true),
),
'requirements',
),

array(
'Route with a UTF-8 separator',
array('/foo/{bar}§{_format}', array(), array(), array('compiler_class' => Utf8RouteCompiler::class)),
'/foo', '#^/foo/(?P<bar>[^/§]++)§(?P<_format>[^/]++)$#su', array('bar', '_format'), array(
array('variable', '§', '[^/]++', '_format', true),
array('variable', '/', '[^/§]++', 'bar', true),
array('text', '/foo'),
),
'patterns',
),
);
}

Expand All @@ -175,6 +258,26 @@ public function testRouteWithSameVariableTwice()
$compiled = $route->compile();
}

/**
* @expectedException \LogicException
*/
public function testRouteCharsetMismatch()
{
$route = new Route("/\xE9/{bar}", array(), array('bar' => '.'), array('utf8' => true));

$compiled = $route->compile();
}

/**
* @expectedException \LogicException
*/
public function testRequirementCharsetMismatch()
{
$route = new Route('/foo/{bar}', array(), array('bar' => "\xE9"), array('utf8' => true));

$compiled = $route->compile();
}

/**
* @expectedException \InvalidArgumentException
*/
Expand Down Expand Up @@ -275,3 +378,8 @@ public function provideCompileWithHostData()
);
}
}

class Utf8RouteCompiler extends RouteCompiler
{
const SEPARATORS = '/§';
}