@@ -352,9 +352,6 @@ public function replaceMatches(string $fromRegexp, $to): parent
352
352
return $ str ;
353
353
}
354
354
355
- /**
356
- * {@inheritdoc}
357
- */
358
355
public function reverse (): parent
359
356
{
360
357
$ str = clone $ this ;
@@ -444,22 +441,21 @@ public function width(bool $ignoreAnsiDecoration = true): int
444
441
$ s = str_replace (["\r\n" , "\r" ], "\n" , $ s );
445
442
}
446
443
444
+ if (!$ ignoreAnsiDecoration ) {
445
+ $ s = preg_replace ('/[\p{Cc}\x7F]++/u ' , '' , $ s );
446
+ }
447
+
447
448
foreach (explode ("\n" , $ s ) as $ s ) {
448
449
if ($ ignoreAnsiDecoration ) {
449
- $ s = preg_replace ('/\x1B(?:
450
+ $ s = preg_replace ('/(?: \x1B(?:
450
451
\[ [\x30-\x3F]*+ [\x20-\x2F]*+ [0x40-\x7E]
451
452
| [P\]X^_] .*? \x1B \\\\
452
453
| [\x41-\x7E]
453
- )/x ' , '' , $ s );
454
+ )|[\p{Cc}\x7F]++)/xu ' , '' , $ s );
454
455
}
455
456
456
- $ w = substr_count ($ s , "\xAD" ) - substr_count ($ s , "\x08" );
457
- $ s = preg_replace ('/[\x00\x05\x07\p{Mn}\p{Me}\p{Cf}\x{1160}-\x{11FF}\x{200B}]+/u ' , '' , $ s );
458
- $ s = preg_replace ('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u ' , '' , $ s , -1 , $ wide );
459
-
460
- if ($ width < $ w += mb_strlen ($ s , 'UTF-8 ' ) + ($ wide << 1 )) {
461
- $ width = $ w ;
462
- }
457
+ // Non printable characters have been dropped, so wcswidth cannot logically return -1.
458
+ $ width += $ this ->wcswidth ($ s );
463
459
}
464
460
465
461
return $ width ;
@@ -503,4 +499,80 @@ private function pad(int $len, self $pad, int $type): parent
503
499
throw new InvalidArgumentException ('Invalid padding type. ' );
504
500
}
505
501
}
502
+
503
+ /**
504
+ * Based on https://github.com/jquast/wcwidth, a Python implementation of https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c.
505
+ */
506
+ private function wcswidth (string $ string ): int
507
+ {
508
+ $ width = 0 ;
509
+
510
+ foreach (preg_split ('//u ' , $ string , -1 , PREG_SPLIT_NO_EMPTY ) as $ c ) {
511
+ $ codePoint = mb_ord ($ c , 'UTF-8 ' );
512
+
513
+ if (0 === $ codePoint // NULL
514
+ || 0x034F === $ codePoint // COMBINING GRAPHEME JOINER
515
+ || (0x200B <= $ codePoint && 0x200F >= $ codePoint ) // ZERO WIDTH SPACE to RIGHT-TO-LEFT MARK
516
+ || 0x2028 === $ codePoint // LINE SEPARATOR
517
+ || 0x2029 === $ codePoint // PARAGRAPH SEPARATOR
518
+ || (0x202A <= $ codePoint && 0x202E >= $ codePoint ) // LEFT-TO-RIGHT EMBEDDING to RIGHT-TO-LEFT OVERRIDE
519
+ || (0x2060 <= $ codePoint && 0x2063 >= $ codePoint ) // WORD JOINER to INVISIBLE SEPARATOR
520
+ ) {
521
+ continue ;
522
+ }
523
+
524
+ // Non printable characters
525
+ if (32 > $ codePoint // C0 control characters
526
+ || (0x07F <= $ codePoint && 0x0A0 > $ codePoint ) // C1 control characters and DEL
527
+ ) {
528
+ return -1 ;
529
+ }
530
+
531
+ static $ tableZero ;
532
+ if (null === $ tableZero ) {
533
+ $ tableZero = require __DIR__ .'/Resources/data/wcswidth_table_zero.php ' ;
534
+ }
535
+
536
+ if ($ codePoint >= $ tableZero [0 ][0 ] && $ codePoint <= $ tableZero [$ ubound = \count ($ tableZero ) - 1 ][1 ]) {
537
+ $ lbound = 0 ;
538
+ while ($ ubound >= $ lbound ) {
539
+ $ mid = floor (($ lbound + $ ubound ) / 2 );
540
+
541
+ if ($ codePoint > $ tableZero [$ mid ][1 ]) {
542
+ $ lbound = $ mid + 1 ;
543
+ } elseif ($ codePoint < $ tableZero [$ mid ][0 ]) {
544
+ $ ubound = $ mid - 1 ;
545
+ } else {
546
+ continue 2 ;
547
+ }
548
+ }
549
+ }
550
+
551
+ static $ tableWide ;
552
+ if (null === $ tableWide ) {
553
+ $ tableWide = require __DIR__ .'/Resources/data/wcswidth_table_wide.php ' ;
554
+ }
555
+
556
+ if ($ codePoint >= $ tableWide [0 ][0 ] && $ codePoint <= $ tableWide [$ ubound = \count ($ tableWide ) - 1 ][1 ]) {
557
+ $ lbound = 0 ;
558
+ while ($ ubound >= $ lbound ) {
559
+ $ mid = floor (($ lbound + $ ubound ) / 2 );
560
+
561
+ if ($ codePoint > $ tableWide [$ mid ][1 ]) {
562
+ $ lbound = $ mid + 1 ;
563
+ } elseif ($ codePoint < $ tableWide [$ mid ][0 ]) {
564
+ $ ubound = $ mid - 1 ;
565
+ } else {
566
+ $ width += 2 ;
567
+
568
+ continue 2 ;
569
+ }
570
+ }
571
+ }
572
+
573
+ ++$ width ;
574
+ }
575
+
576
+ return $ width ;
577
+ }
506
578
}
0 commit comments