Skip to content

Commit 719b342

Browse files
committed
Shrink Unicode category table.
Missing entries can implicitly be considered "unassigned". Discussion: https://postgr.es/m/ff4c2f2f9c8fc7ca27c1c24ae37ecaeaeaff6b53.camel@j-davis.com
1 parent d16a0c1 commit 719b342

File tree

3 files changed

+15
-723
lines changed

3 files changed

+15
-723
lines changed

src/common/unicode/generate-unicode_category_table.pl

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -72,15 +72,21 @@
7272
# the current range, emit the current range and initialize a new
7373
# range representing the gap.
7474
if ($range_end + 1 != $code && $range_category ne $gap_category) {
75-
push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category});
75+
if ($range_category ne $CATEGORY_UNASSIGNED) {
76+
push(@category_ranges, {start => $range_start, end => $range_end,
77+
category => $range_category});
78+
}
7679
$range_start = $range_end + 1;
7780
$range_end = $code - 1;
7881
$range_category = $gap_category;
7982
}
8083

8184
# different category; new range
8285
if ($range_category ne $category) {
83-
push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category});
86+
if ($range_category ne $CATEGORY_UNASSIGNED) {
87+
push(@category_ranges, {start => $range_start, end => $range_end,
88+
category => $range_category});
89+
}
8490
$range_start = $code;
8591
$range_end = $code;
8692
$range_category = $category;
@@ -109,14 +115,9 @@
109115
if $gap_category ne $CATEGORY_UNASSIGNED;
110116

111117
# emit final range
112-
push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category});
113-
114-
# emit range for any unassigned code points after last entry
115-
if ($range_end < 0x10FFFF) {
116-
$range_start = $range_end + 1;
117-
$range_end = 0x10FFFF;
118-
$range_category = $CATEGORY_UNASSIGNED;
119-
push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category});
118+
if ($range_category ne $CATEGORY_UNASSIGNED) {
119+
push(@category_ranges, {start => $range_start, end => $range_end,
120+
category => $range_category});
120121
}
121122

122123
my $num_ranges = scalar @category_ranges;

src/common/unicode_category.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,7 @@ unicode_category(pg_wchar ucs)
2828
int mid;
2929
int max = lengthof(unicode_categories) - 1;
3030

31-
Assert(ucs >= unicode_categories[0].first &&
32-
ucs <= unicode_categories[max].last);
31+
Assert(ucs <= 0x10ffff);
3332

3433
while (max >= min)
3534
{
@@ -42,8 +41,7 @@ unicode_category(pg_wchar ucs)
4241
return unicode_categories[mid].category;
4342
}
4443

45-
Assert(false);
46-
return (pg_unicode_category) - 1;
44+
return PG_U_UNASSIGNED;
4745
}
4846

4947
/*

0 commit comments

Comments
 (0)