Skip to content

Commit 5338313

Browse files
authored
StringUtils.wrapText will not split surrogate pair. (#103)
Previously, StringUtils.wrapText(".\uD800\uDC00.", 2) // ^ index 2 points here would insert a `<br/>` between two paired surrogates. Now, it shifts the insertion point to a whole code-point boundary.
1 parent 55d71fe commit 5338313

File tree

2 files changed

+15
-3
lines changed

2 files changed

+15
-3
lines changed

java-diff-utils/src/main/java/com/github/difflib/text/StringUtils.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@
2121
final class StringUtils {
2222

2323
/**
24-
* Replaces all opening an closing tags with <code>&lt;</code> or <code>&gt;</code>.
24+
* Replaces all opening and closing tags with <code>&lt;</code> or <code>&gt;</code>.
2525
*
2626
* @param str
27-
* @return
27+
* @return str with some HTML meta characters escaped.
2828
*/
2929
public static String htmlEntites(String str) {
3030
return str.replace("<", "&lt;").replace(">", "&gt;");
@@ -61,7 +61,17 @@ public static String wrapText(String line, int columnWidth) {
6161
StringBuilder b = new StringBuilder(line);
6262

6363
for (int count = 0; length > widthIndex; count++) {
64-
b.insert(widthIndex + delimiter * count, "<br/>");
64+
int breakPoint = widthIndex + delimiter * count;
65+
if (Character.isHighSurrogate(b.charAt(breakPoint - 1)) &&
66+
Character.isLowSurrogate(b.charAt(breakPoint))) {
67+
// Shift a breakpoint that would split a supplemental code-point.
68+
breakPoint += 1;
69+
if (breakPoint == b.length()) {
70+
// Break before instead of after if this is the last code-point.
71+
breakPoint -= 2;
72+
}
73+
}
74+
b.insert(breakPoint, "<br/>");
6575
widthIndex += columnWidth;
6676
}
6777

java-diff-utils/src/test/java/com/github/difflib/text/StringUtilsTest.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ public void testWrapText_String_int() {
4949
assertEquals("te<br/>st", StringUtils.wrapText("test", 2));
5050
assertEquals("tes<br/>t", StringUtils.wrapText("test", 3));
5151
assertEquals("test", StringUtils.wrapText("test", 10));
52+
assertEquals(".\uD800\uDC01<br/>.", StringUtils.wrapText(".\uD800\uDC01.", 2));
53+
assertEquals("..<br/>\uD800\uDC01", StringUtils.wrapText("..\uD800\uDC01", 3));
5254
}
5355

5456
@Test

0 commit comments

Comments
 (0)