Skip to content

Commit f955dca

Browse files
committed
Fixed some case-folding and added Table A.1 for IDNA (ethers-io#42).
1 parent c09de16 commit f955dca

File tree

10 files changed

+10456
-87
lines changed

10 files changed

+10456
-87
lines changed

packages/strings/src.ts/idna.ts

Lines changed: 69 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,12 @@
22

33
import { toUtf8CodePoints, _toUtf8String, UnicodeNormalizationForm } from "./utf8";
44

5-
let _tmp = 0;
6-
75
type Ranged = {
8-
l: number,
9-
h: number,
10-
d?: number,
11-
s?: number,
12-
e?: Array<number>
6+
l: number, // Lo value
7+
h: number, // High value (less the lo)
8+
d?: number, // Delta/stride (default: 1)
9+
s?: number, // Shift (default: 1)
10+
e?: Array<number> // Exceptions to skip
1311
};
1412

1513
type Table = { [ src: number ]: Array<number> };
@@ -40,6 +38,37 @@ function createTable(data: string, func?: (value: string) => Array<number>): Tab
4038
return result;
4139
}
4240

41+
function createRangeTable(data: string): Array<Ranged> {
42+
let hi = 0;
43+
return data.split(",").map((v) => {
44+
let comps = v.split("-");
45+
if (comps.length === 1) {
46+
comps[1] = "0";
47+
} else if (comps[1] === "") {
48+
comps[1] = "1";
49+
}
50+
51+
let lo = hi + parseInt(comps[0], 16);
52+
hi = parseInt(comps[1], 16);
53+
return { l: lo, h: hi };
54+
});
55+
}
56+
57+
function matchMap(value: number, ranges: Array<Ranged>): Ranged {
58+
let lo = 0;
59+
for (let i = 0; i < ranges.length; i++) {
60+
let range = ranges[i];
61+
lo += range.l;
62+
if (value >= lo && value <= lo + range.h && ((value - lo) % (range.d || 1)) === 0) {
63+
if (range.e && range.e.indexOf(value - lo) !== -1) { continue; }
64+
return range;
65+
}
66+
}
67+
return null;
68+
}
69+
70+
const Table_A_1_ranges = createRangeTable("221,13-1b,5f-,40-10,51-f,11-3,3-3,2-2,2-4,8,2,15,2d,28-8,88,48,27-,3-5,11-20,27-,8,28,3-5,12,18,b-a,1c-4,6-16,2-d,2-2,2,1b-4,17-9,8f-,10,f,1f-2,1c-34,33-14e,4,36-,13-,6-2,1a-f,4,9-,3-,17,8,2-2,5-,2,8-,3-,4-8,2-3,3,6-,16-6,2-,7-3,3-,17,8,3,3,3-,2,6-3,3-,4-a,5,2-6,10-b,4,8,2,4,17,8,3,6-,b,4,4-,2-e,2-4,b-10,4,9-,3-,17,8,3-,5-,9-2,3-,4-7,3-3,3,4-3,c-10,3,7-2,4,5-2,3,2,3-2,3-2,4-2,9,4-3,6-2,4,5-8,2-e,d-d,4,9,4,18,b,6-3,8,4,5-6,3-8,3-3,b-11,3,9,4,18,b,6-3,8,4,5-6,3-6,2,3-3,b-11,3,9,4,18,11-3,7-,4,5-8,2-7,3-3,b-11,3,13-2,19,a,2-,8-2,2-3,7,2,9-11,4-b,3b-3,1e-24,3,2-,3,2-,2-5,5,8,4,2,2-,3,e,4-,6,2,7-,b-,3-21,49,23-5,1c-3,9,25,10-,2-2f,23,6,3,8-2,5-5,1b-45,27-9,2a-,2-3,5b-4,45-4,53-5,8,40,2,5-,8,2,5-,28,2,5-,20,2,5-,8,2,5-,8,8,18,20,2,5-,8,28,14-5,1d-22,56-b,277-8,1e-2,52-e,e,8-a,18-8,15-b,e,4,3-b,5e-2,b-15,10,b-5,59-7,2b-555,9d-3,5b-5,17-,7-,27-,7-,9,2,2,2,20-,36,10,f-,7,14-,4,a,54-3,2-6,6-5,9-,1c-10,13-1d,1c-14,3c-,10-6,32-b,240-30,28-18,c-14,a0,115-,3,66-,b-76,5,5-,1d,24,2,5-2,2,8-,35-2,19,f-10,1d-3,311-37f,1b,5a-b,d7-19,d-3,41,57-,68-4,29-3,5f,29-37,2e-2,25-c,2c-2,4e-3,30,78-3,64-,20,19b7-49,51a7-59,48e-2,38-738,2ba5-5b,222f-,3c-94,8-b,6-4,1b,6,2,3,3,6d-20,16e-f,41-,37-7,2e-2,11-f,5-b,18-,b,14,5-3,6,88-,2,bf-2,7-,7-,7-,4-2,8,8-9,8-2ff,20,5-b,1c-b4,27-,27-cbb1,f7-9,28-2,b5-221,56,48,3-,2-,3-,5,d,2,5,3,42,5-,9,8,1d,5,6,2-2,8,153-3,123-3,33-27fd,a6da-5128,21f-5df,3-fffd,3-fffd,3-fffd,3-fffd,3-fffd,3-fffd,3-fffd,3-fffd,3-fffd,3-fffd,3-fffd,3,2-1d,61-ff7d");
71+
4372
// @TODO: Make this relative...
4473
const Table_B_1_flags = "ad,34f,1806,180b,180c,180d,200b,200c,200d,2060,feff".split(",").map((v) => parseInt(v, 16));
4574

@@ -91,31 +120,8 @@ const Table_B_2_lut_abs = createTable("b5:3bc,c3:ff,7:73,2:253,5:254,3:256,1:257
91120
const Table_B_2_lut_rel = createTable("179:1,2:1,2:1,5:1,2:1,a:4f,a:1,8:1,2:1,2:1,3:1,5:1,3:1,4:1,2:1,3:1,4:1,8:2,1:1,2:2,1:1,2:2,27:2,195:26,2:25,1:25,1:25,2:40,2:3f,1:3f,33:1,11:-6,1:-9,1ac7:-3a,6d:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,9:-8,1:-8,1:-8,1:-8,1:-8,1:-8,b:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,9:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,9:-8,1:-8,1:-8,1:-8,1:-8,1:-8,c:-8,2:-8,2:-8,2:-8,9:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,1:-8,49:-8,1:-8,1:-4a,1:-4a,d:-56,1:-56,1:-56,1:-56,d:-8,1:-8,f:-8,1:-8,3:-7");
92121
const Table_B_2_complex = createTable("df:00730073,51:00690307,19:02BC006E,a7:006A030C,18a:002003B9,16:03B903080301,20:03C503080301,1d7:05650582,190f:00680331,1:00740308,1:0077030A,1:0079030A,1:006102BE,b6:03C50313,2:03C503130300,2:03C503130301,2:03C503130342,2a:1F0003B9,1:1F0103B9,1:1F0203B9,1:1F0303B9,1:1F0403B9,1:1F0503B9,1:1F0603B9,1:1F0703B9,1:1F0003B9,1:1F0103B9,1:1F0203B9,1:1F0303B9,1:1F0403B9,1:1F0503B9,1:1F0603B9,1:1F0703B9,1:1F2003B9,1:1F2103B9,1:1F2203B9,1:1F2303B9,1:1F2403B9,1:1F2503B9,1:1F2603B9,1:1F2703B9,1:1F2003B9,1:1F2103B9,1:1F2203B9,1:1F2303B9,1:1F2403B9,1:1F2503B9,1:1F2603B9,1:1F2703B9,1:1F6003B9,1:1F6103B9,1:1F6203B9,1:1F6303B9,1:1F6403B9,1:1F6503B9,1:1F6603B9,1:1F6703B9,1:1F6003B9,1:1F6103B9,1:1F6203B9,1:1F6303B9,1:1F6403B9,1:1F6503B9,1:1F6603B9,1:1F6703B9,3:1F7003B9,1:03B103B9,1:03AC03B9,2:03B10342,1:03B1034203B9,5:03B103B9,6:1F7403B9,1:03B703B9,1:03AE03B9,2:03B70342,1:03B7034203B9,5:03B703B9,6:03B903080300,1:03B903080301,3:03B90342,1:03B903080342,b:03C503080300,1:03C503080301,1:03C10313,2:03C50342,1:03C503080342,b:1F7C03B9,1:03C903B9,1:03CE03B9,2:03C90342,1:03C9034203B9,5:03C903B9,ac:00720073,5b:00B00063,6:00B00066,d:006E006F,a:0073006D,1:00740065006C,1:0074006D,124f:006800700061,2:00610075,2:006F0076,b:00700061,1:006E0061,1:03BC0061,1:006D0061,1:006B0061,1:006B0062,1:006D0062,1:00670062,3:00700066,1:006E0066,1:03BC0066,4:0068007A,1:006B0068007A,1:006D0068007A,1:00670068007A,1:00740068007A,15:00700061,1:006B00700061,1:006D00700061,1:006700700061,8:00700076,1:006E0076,1:03BC0076,1:006D0076,1:006B0076,1:006D0076,1:00700077,1:006E0077,1:03BC0077,1:006D0077,1:006B0077,1:006D0077,1:006B03C9,1:006D03C9,2:00620071,3:00632215006B0067,1:0063006F002E,1:00640062,1:00670079,2:00680070,2:006B006B,1:006B006D,9:00700068,2:00700070006D,1:00700072,2:00730076,1:00770062,c723:00660066,1:00660069,1:0066006C,1:006600660069,1:00660066006C,1:00730074,1:00730074,d:05740576,1:05740565,1:0574056B,1:057E0576,1:0574056D", bytes2);
93122

94-
_tmp = 0;
95-
const Table_C_flags = "70f,f71,18e".split(",").map((v) => {
96-
_tmp += parseInt(v, 16);
97-
return _tmp;
98-
});
99-
_tmp = 0;
100-
const Table_C_ranges = "80-20,2c0,1cc0-f,28-7,37-4,b-5,f86-b,a810-20ff,25d0-1f,229-6,d17a-7,2e8b,10000,10000,10000,10000,10000,10000,10000,10000,10000,10000,10000,10000,22-5f".split(",").map((v) => {
101-
let comps = v.split("-");
102-
if (comps.length === 1) { comps[1] = "1"; }
103-
_tmp += parseInt(comps[0], 16);
104-
return { l: _tmp, h: parseInt(comps[1], 16) }
105-
});
123+
const Table_C_ranges = createRangeTable("80-20,2a0-,39c,32,f71,18e,7f2-f,19-7,30-4,7-5,f81-b,5,a800-20ff,4d1-1f,110,fa-6,d174-7,2e84-,ffff-,ffff-,ffff-,ffff-,ffff-,ffff-,ffff-,ffff-,ffff-,ffff-,ffff-,ffff-,2,1f-5f,ff7f-20001");
106124

107-
function matchMap(value: number, ranges: Array<Ranged>): Ranged {
108-
let lo = 0;
109-
for (let i = 0; i < ranges.length; i++) {
110-
let range = ranges[i];
111-
lo += range.l;
112-
if (value >= lo && value <= lo + range.h && ((value - lo) % (range.d || 1)) === 0) {
113-
if (range.e && range.e.indexOf(value - lo) !== -1) { continue; }
114-
return range;
115-
}
116-
}
117-
return null;
118-
}
119125

120126
function flatten(values: Array<Array<number>>): Array<number> {
121127
return values.reduce((accum, value) => {
@@ -124,28 +130,36 @@ function flatten(values: Array<Array<number>>): Array<number> {
124130
}, [ ]);
125131
}
126132

133+
export function _nameprepTableA1(codepoint: number): boolean {
134+
return !!matchMap(codepoint, Table_A_1_ranges);
135+
}
136+
127137
export function _nameprepTableB2(codepoint: number): Array<number> {
128-
let match = matchMap(codepoint, Table_B_2_ranges);
129-
if (match) { return [ codepoint + match.s ]; }
138+
let range = matchMap(codepoint, Table_B_2_ranges);
139+
if (range) { return [ codepoint + range.s ]; }
140+
141+
let codes = Table_B_2_lut_abs[codepoint];
142+
if (codes) { return codes; }
130143

131-
let codes = Table_B_2_lut_abs[codepoint];
132-
if (codes) { return codes; }
144+
let shift = Table_B_2_lut_rel[codepoint];
145+
if (shift) { return [ codepoint + shift[0] ]; }
133146

134-
let shift = Table_B_2_lut_rel[codepoint];
135-
if (shift) { return [ codepoint + shift[0] ]; }
147+
let complex = Table_B_2_complex[codepoint];
148+
if (complex) { return complex; }
136149

137-
let complex = Table_B_2_complex[codepoint];
138-
if (complex) { return complex; }
150+
return null;
151+
}
139152

140-
return null;
153+
export function _nameprepTableC(codepoint: number): boolean {
154+
return !!matchMap(codepoint, Table_C_ranges);
141155
}
142156

143157
export function nameprep(value: string): string {
144158

145159
// This allows platforms with incomplete normalize to bypass
146160
// it for very basic names which the built-in toLowerCase
147161
// will certainly handle correctly
148-
if (value.match(/^[a-z0-9-]*$/i)) { return value.toLowerCase(); }
162+
if (value.match(/^[a-z0-9-]*$/i) && value.length <= 59) { return value.toLowerCase(); }
149163

150164
// Get the code points (keeping the current normalization)
151165
let codes = toUtf8CodePoints(value);
@@ -163,17 +177,21 @@ export function nameprep(value: string): string {
163177
return [ code ];
164178
}));
165179

166-
// Normalize using fomr KC
180+
// Normalize using form KC
167181
codes = toUtf8CodePoints(_toUtf8String(codes), UnicodeNormalizationForm.NFKC);
168182

169-
// Prohibit C.1.2, C.2.2, C.3, C.4, C.5, C.6, C.7, C.8, C.9
183+
// Prohibit Tables C.1.2, C.2.2, C.3, C.4, C.5, C.6, C.7, C.8, C.9
170184
codes.forEach((code) => {
171-
if (Table_C_flags.indexOf(code) >= 0) { throw new Error("invalid character code"); }
172-
Table_C_ranges.forEach((range) => {
173-
if (code >= range.l && code <= range.l + range.h) {
174-
throw new Error("STRINGPREP_CONTAINS_PROHIBITED");
175-
}
176-
});
185+
if (_nameprepTableC(code)) {
186+
throw new Error("STRINGPREP_CONTAINS_PROHIBITED");
187+
}
188+
});
189+
190+
// Prohibit Unassigned Code Points (Table A.1)
191+
codes.forEach((code) => {
192+
if (_nameprepTableA1(code)) {
193+
throw new Error("STRINGPREP_CONTAINS_UNASSIGNED");
194+
}
177195
});
178196

179197
// IDNA extras
@@ -187,6 +205,8 @@ export function nameprep(value: string): string {
187205
// IDNA: 4.2.4
188206
if (name.length > 63) { throw new Error("too long"); }
189207

208+
209+
190210
return name;
191211
}
192212

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import json
2+
import re
3+
4+
output = ""
5+
for line in file("test-vectors-00.txt"):
6+
line = line.strip()
7+
if line == "" or line[0:1] == "#":
8+
continue
9+
if line.startswith("Josefsson") or line.startswith("Internet-Draft"):
10+
continue
11+
output += line.replace("\n", "")
12+
13+
Tests = [ ]
14+
15+
def get_byte(v):
16+
if len(v) == 1:
17+
return ord(v)
18+
return int(v[2:4], 16)
19+
20+
def get_string(value):
21+
value = value.strip()
22+
if value[0] == '"' and value[-1] == '"':
23+
return map(get_byte, re.findall("(\\\\x[0-9a-fA-F]{2}|.)", value[1:-1].replace('""', '')))
24+
if value.lower() == "null":
25+
return None
26+
raise Exception("unhandled")
27+
28+
Tests = [ ]
29+
30+
matches = re.findall("({(?:.|\n)*?})", output)
31+
for m in matches:
32+
comps = m[1:-1].split(",")
33+
test = dict(
34+
comment = comps[0].strip()[1:-1],
35+
input = get_string(comps[1]),
36+
output = get_string(comps[2])
37+
)
38+
if len(comps) >= 4:
39+
test["profile"] = get_string(comps[3])
40+
if len(comps) >= 5:
41+
test["flags"] = comps[4].strip()
42+
if len(comps) >= 6:
43+
test["rc"] = comps[5].strip()
44+
Tests.append(test)
45+
46+
print json.dumps(Tests)

packages/testcases/input/nameprep/generate-b2.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -149,17 +149,28 @@ def add_simple_data(data):
149149
mappings.append(data)
150150
debug[data["l"]] = "MAP:" + str(data)
151151

152+
# Create complex table (things that map to more than one byte)
152153
complex = { }
154+
complex_output = [ ];
153155
for (src, dst, reason) in weird:
154156
for word in dst.split(" "):
157+
complex_output.append(int(word, 16))
155158
if len(word) != 4: raise Exception("hmmm")
156159
complex[int(src, 16)] = dst.replace(" ", "")
160+
161+
# Experimenting: We can easily create a LUT for the individual
162+
# components, as there is substantial overlap.
163+
#complex_output = dict((x, True) for x in complex_output).keys()
164+
#complex_output.sort()
165+
#print "COM", complex_output, len(complex_output)
166+
167+
# Sort mappings by lo
157168
mappings.sort(lambda a, b: cmp(a["l"], b["l"]))
158169

159-
debug_keys = debug.keys()
160-
debug_keys.sort()
161-
for d in debug_keys:
162-
print d, debug[d]
170+
#debug_keys = debug.keys()
171+
#debug_keys.sort()
172+
#for d in debug_keys:
173+
# print d, debug[d]
163174

164175
#print mappings
165176

Lines changed: 22 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
def hexify(v):
2+
return hex(v)[2:]
3+
14
prohibit = [ ]
25

36
table = None
@@ -28,42 +31,27 @@
2831
prohibit = list(dict([(p, True) for p in prohibit]).keys())
2932
prohibit.sort()
3033

31-
prohibit_single = [ ]
32-
prohibit_range = [ ]
34+
output = [ dict(lo = prohibit[0], hi = prohibit[0]) ]
3335

34-
last_range_start = None
35-
last = 0
36-
for p in prohibit:
37-
if p - 1 == last:
38-
if last_range_start is None:
39-
last_range_start = last
40-
if len(prohibit_single) > 0 and prohibit_single[-1] == last:
41-
prohibit_single.pop()
36+
for p in prohibit[1:]:
37+
if p - 1 == output[-1]["hi"]:
38+
output[-1]["hi"] = p
4239
else:
43-
if last_range_start is not None:
44-
print "Range", last_range_start, last - last_range_start, hex(last_range_start)
45-
length = last - last_range_start
46-
if length == 1:
47-
length = ""
48-
else:
49-
length = "-" + hex(length)[2:]
50-
prohibit_range.append([ last_range_start, length ])
51-
last_range_start = None
52-
else:
53-
print "Single", p, hex(p)
54-
prohibit_single.append(p)
55-
last = p
40+
output.append(dict(lo = p, hi = p))
5641

57-
last = 0
58-
for i in xrange(0, len(prohibit_single)):
59-
v = prohibit_single[i]
60-
prohibit_single[i] -= last
61-
last = v
62-
print 'const Table_C_lut = "' + ",".join(hex(x)[2:] for x in prohibit_single) + '";'
42+
print output
6343

6444
last = 0
65-
for item in prohibit_range:
66-
v = item[0]
67-
item[0] -= last
68-
last = v
69-
print 'const Table_C_ranges = "' + ",".join(("%s%s" % (hex(p[0])[2:], p[1])) for p in prohibit_range) + '";';
45+
for r in output:
46+
r["h"] = r["hi"] - r["lo"]
47+
r["l"] = r["lo"] - last
48+
last = r["hi"]
49+
50+
r["range"] = hexify(r["l"])
51+
if r["h"] > 1:
52+
r["range"] += "-" + hexify(r["h"])
53+
elif r["h"] > 0:
54+
r["range"] += "-"
55+
56+
print 'const Table_C_ranges = "' + ",".join(x["range"] for x in output) + '";'
57+

0 commit comments

Comments
 (0)