Skip to content

Commit 274edfc

Browse files
committed
[String] Add the wcswdith() method
1 parent aa9ccf8 commit 274edfc

13 files changed

+2759
-0
lines changed

src/Symfony/Component/String/AbstractString.php

+5
Original file line numberDiff line numberDiff line change
@@ -646,6 +646,11 @@ public function truncate(int $length, string $ellipsis = ''): self
646646
*/
647647
abstract public function upper(): self;
648648

649+
/**
650+
* Returns the string printable length on a terminal.
651+
*/
652+
abstract public function wcswidth(): int;
653+
649654
abstract public function width(bool $ignoreAnsiDecoration = true): int;
650655

651656
/**

src/Symfony/Component/String/AbstractUnicodeString.php

+82
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,88 @@ public function upper(): parent
425425
return $str;
426426
}
427427

428+
/**
429+
* {@inheritdoc}
430+
*
431+
* If the string contains a non-printable character, -1 is returned.
432+
*
433+
* Based on https://github.com/jquast/wcwidth that is a Python implementation of https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c.
434+
*/
435+
public function wcswidth(): int
436+
{
437+
$width = 0;
438+
439+
$length = $this->length();
440+
for ($i = 0; $i <= $length; ++$i) {
441+
foreach ($this->codePointsAt($i) as $codePoint) {
442+
if (
443+
0 === $codePoint || // NULL
444+
0x034F === $codePoint || // COMBINING GRAPHEME JOINER
445+
(0x200B <= $codePoint && 0x200F >= $codePoint) || // ZERO WIDTH SPACE to RIGHT-TO-LEFT MARK
446+
0x2028 === $codePoint || // LINE SEPARATOR
447+
0x2029 === $codePoint || // PARAGRAPH SEPARATOR
448+
(0x202A <= $codePoint && 0x202E >= $codePoint) || // LEFT-TO-RIGHT EMBEDDING to RIGHT-TO-LEFT OVERRIDE
449+
(0x2060 <= $codePoint && 0x2063 >= $codePoint) // WORD JOINER to INVISIBLE SEPARATOR
450+
) {
451+
continue;
452+
}
453+
454+
if (
455+
32 > $codePoint || // C0 control characters
456+
(0x07F <= $codePoint && 0x0A0 > $codePoint) // C1 control characters and DEL
457+
) {
458+
return -1;
459+
}
460+
461+
static $tableZero;
462+
if (null === $tableZero) {
463+
$tableZero = require __DIR__.'/Resources/data/wcswidth_table_zero.php';
464+
}
465+
466+
if ($codePoint >= $tableZero[0][0] && $codePoint <= $tableZero[$ubound = \count($tableZero) - 1][1]) {
467+
$lbound = 0;
468+
while ($ubound >= $lbound) {
469+
$mid = floor(($lbound + $ubound) / 2);
470+
471+
if ($codePoint > $tableZero[$mid][1]) {
472+
$lbound = $mid + 1;
473+
} elseif ($codePoint < $tableZero[$mid][0]) {
474+
$ubound = $mid - 1;
475+
} else {
476+
continue 2;
477+
}
478+
}
479+
}
480+
481+
static $tableWide;
482+
if (null === $tableWide) {
483+
$tableWide = require __DIR__.'/Resources/data/wcswidth_table_wide.php';
484+
}
485+
486+
if ($codePoint >= $tableWide[0][0] && $codePoint <= $tableWide[$ubound = \count($tableWide) - 1][1]) {
487+
$lbound = 0;
488+
while ($ubound >= $lbound) {
489+
$mid = floor(($lbound + $ubound) / 2);
490+
491+
if ($codePoint > $tableWide[$mid][1]) {
492+
$lbound = $mid + 1;
493+
} elseif ($codePoint < $tableWide[$mid][0]) {
494+
$ubound = $mid - 1;
495+
} else {
496+
$width += 2;
497+
498+
continue 2;
499+
}
500+
}
501+
}
502+
503+
++$width;
504+
}
505+
}
506+
507+
return $width;
508+
}
509+
428510
public function width(bool $ignoreAnsiDecoration = true): int
429511
{
430512
$width = 0;

src/Symfony/Component/String/ByteString.php

+8
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,14 @@ public function upper(): parent
458458
return $str;
459459
}
460460

461+
/**
462+
* {@inheritdoc}
463+
*/
464+
public function wcswidth(): int
465+
{
466+
return $this->width();
467+
}
468+
461469
public function width(bool $ignoreAnsiDecoration = true): int
462470
{
463471
$width = 0;

src/Symfony/Component/String/CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ CHANGELOG
55
-----
66

77
* Added the `AbstractString::reverse()` method.
8+
* Added the `AbstractString::wcswidth()` method.
89

910
5.0.0
1011
-----
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\Component\String\Data;
13+
14+
use Symfony\Component\HttpClient\HttpClient;
15+
use Symfony\Component\String\Exception\RuntimeException;
16+
use Symfony\Component\VarExporter\VarExporter;
17+
18+
/**
19+
* @internal
20+
*/
21+
final class WcswidthDataGenerator
22+
{
23+
private $outDir;
24+
25+
private $client;
26+
27+
public function __construct(string $outDir)
28+
{
29+
$this->outDir = $outDir;
30+
31+
$this->client = HttpClient::createForBaseUri('https://www.unicode.org/Public/UNIDATA/');
32+
}
33+
34+
public function generate(): void
35+
{
36+
$this->writeWideWidthData();
37+
38+
$this->writeZeroWidthData();
39+
}
40+
41+
private function writeWideWidthData(): void
42+
{
43+
if (!preg_match('/^# EastAsianWidth-(\d+\.\d+\.\d+)\.txt/', $content = $this->client->request('GET', 'EastAsianWidth.txt')->getContent(), $matches)) {
44+
throw new RuntimeException('The Unicode version could not be determined.');
45+
}
46+
47+
$version = $matches[1];
48+
49+
if (!preg_match_all('/^([A-H\d]{4,})(?:\.\.([A-H\d]{4,}))?;[W|F]/m', $content, $matches, PREG_SET_ORDER)) {
50+
throw new RuntimeException('The wide width pattern did not match anything.');
51+
}
52+
53+
$this->write('wcswidth_table_wide.php', $version, $matches);
54+
}
55+
56+
private function writeZeroWidthData(): void
57+
{
58+
if (!preg_match('/^# DerivedGeneralCategory-(\d+\.\d+\.\d+)\.txt/', $content = $this->client->request('GET', 'extracted/DerivedGeneralCategory.txt')->getContent(), $matches)) {
59+
throw new RuntimeException('The Unicode version could not be determined.');
60+
}
61+
62+
$version = $matches[1];
63+
64+
if (!preg_match_all('/^([A-H\d]{4,})(?:\.\.([A-H\d]{4,}))? *; (?:Me|Mn)/m', $content, $matches, PREG_SET_ORDER)) {
65+
throw new RuntimeException('The zero width pattern did not match anything.');
66+
}
67+
68+
$this->write('wcswidth_table_zero.php', $version, $matches);
69+
}
70+
71+
private function write(string $fileName, string $version, array $rawData): void
72+
{
73+
$content = $this->getHeader($version).'return '.VarExporter::export($this->format($rawData)).";\n";
74+
75+
if (false == file_put_contents($this->outDir.'/'.$fileName, $content)) {
76+
throw new RuntimeException(sprintf('The "%s" file could not be written.', $fileName));
77+
}
78+
}
79+
80+
private function getHeader(string $version): string
81+
{
82+
$date = (new \DateTimeImmutable())->format('c');
83+
84+
return <<<EOT
85+
<?php
86+
87+
/*
88+
* This file is part of the Symfony package.
89+
*
90+
* (c) Fabien Potencier <fabien@symfony.com>
91+
*
92+
* For the full copyright and license information, please view the LICENSE
93+
* file that was distributed with this source code.
94+
*/
95+
96+
/*
97+
* This file has been auto-generated by the Symfony String Component for internal use.
98+
*
99+
* Unicode version: $version
100+
* Date: $date
101+
*/
102+
103+
104+
EOT;
105+
}
106+
107+
private function format(array $rawData): array
108+
{
109+
$data = array_map(static function (array $row): array {
110+
$start = $row[1];
111+
$end = $row[2] ?? $start;
112+
113+
return [hexdec($start), hexdec($end)];
114+
}, $rawData);
115+
116+
usort($data, static function (array $a, array $b) {
117+
return $a[0] - $b[0];
118+
});
119+
120+
return $data;
121+
}
122+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
$autoload = __DIR__.'/../../vendor/autoload.php';
13+
14+
if (!file_exists($autoload)) {
15+
bailout('You should run "composer install" in the component before running this script.');
16+
}
17+
18+
require_once $autoload;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
define('LINE_WIDTH', 75);
13+
14+
function bailout(string $message): void
15+
{
16+
echo wordwrap($message, LINE_WIDTH)." Aborting.\n";
17+
18+
exit(1);
19+
}
20+
21+
error_reporting(E_ALL);
22+
23+
set_error_handler(static function (int $type, string $msg, string $file, int $line): void {
24+
throw new \ErrorException($msg, 0, $type, $file, $line);
25+
});
26+
27+
set_exception_handler(static function (\Throwable $exception): void {
28+
echo "\n";
29+
30+
$cause = $exception;
31+
$root = true;
32+
33+
while (null !== $cause) {
34+
if (!$root) {
35+
echo "Caused by\n";
36+
}
37+
38+
echo get_class($cause).': '.$cause->getMessage()."\n";
39+
echo "\n";
40+
echo $cause->getFile().':'.$cause->getLine()."\n";
41+
echo $cause->getTraceAsString()."\n";
42+
43+
$cause = $cause->getPrevious();
44+
$root = false;
45+
}
46+
});
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\Component\String\Data\WcswidthDataGenerator;
13+
14+
require_once __DIR__.'/common.php';
15+
require_once __DIR__.'/autoload.php';
16+
17+
echo "Generating wcswidth tables data...\n";
18+
19+
(new WcswidthDataGenerator(dirname(__DIR__).'/data'))->generate();
20+
21+
echo "Done.\n";

0 commit comments

Comments
 (0)