Skip to content

Commit db57ffc

Browse files
committed
More lookups moved to perfect hashing
1 parent 3a4a8f6 commit db57ffc

File tree

3 files changed

+6678
-3468
lines changed

3 files changed

+6678
-3468
lines changed

scripts/unicode.py

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,6 @@ def _compute_stream_safe_tables(self):
287287
hexify = lambda c: '{:04X}'.format(c)
288288

289289
def gen_combining_class(combining_classes, out):
290-
out.write("#[inline]\n")
291290
(salt, keys) = minimal_perfect_hash(combining_classes)
292291
out.write("pub fn canonical_combining_class(c: char) -> u8 {\n")
293292
out.write(" mph_lookup(c.into(), &[\n")
@@ -299,7 +298,7 @@ def gen_combining_class(combining_classes, out):
299298
kv = int(combining_classes[k]) | (k << 8)
300299
out.write(" 0x{:x},\n".format(kv))
301300
out.write(" ],\n")
302-
out.write(" u8_lookup_fk, u8_lookup_fv, 0) as u8\n")
301+
out.write(" u8_lookup_fk, u8_lookup_fv, 0)\n")
303302
out.write("}\n")
304303

305304
def gen_composition_table(canon_comp, out):
@@ -376,16 +375,20 @@ def gen_nfkd_qc(prop_tables, out):
376375

377376
def gen_combining_mark(general_category_mark, out):
378377
out.write("#[inline]\n")
378+
(salt, keys) = minimal_perfect_hash(general_category_mark)
379379
out.write("pub fn is_combining_mark(c: char) -> bool {\n")
380-
out.write(" match c {\n")
381-
382-
for char in sorted(general_category_mark):
383-
out.write(" '\\u{%s}' => true,\n" % hexify(char))
384-
385-
out.write(" _ => false,\n")
386-
out.write(" }\n")
380+
out.write(" mph_lookup(c.into(), &[\n")
381+
for s in salt:
382+
out.write(" 0x{:x},\n".format(s))
383+
out.write(" ],\n")
384+
out.write(" &[\n")
385+
for k in keys:
386+
out.write(" 0x{:x},\n".format(k))
387+
out.write(" ],\n")
388+
out.write(" bool_lookup_fk, bool_lookup_fv, false)\n")
387389
out.write("}\n")
388390

391+
389392
def gen_stream_safe(leading, trailing, out):
390393
out.write("#[inline]\n")
391394
out.write("pub fn stream_safe_leading_nonstarters(c: char) -> usize {\n")
@@ -399,15 +402,18 @@ def gen_stream_safe(leading, trailing, out):
399402
out.write("}\n")
400403
out.write("\n")
401404

402-
out.write("#[inline]\n")
405+
(salt, keys) = minimal_perfect_hash(trailing)
403406
out.write("pub fn stream_safe_trailing_nonstarters(c: char) -> usize {\n")
404-
out.write(" match c {\n")
405-
406-
for char, num_trailing in sorted(trailing.items()):
407-
out.write(" '\\u{%s}' => %d,\n" % (hexify(char), num_trailing))
408-
409-
out.write(" _ => 0,\n")
410-
out.write(" }\n")
407+
out.write(" mph_lookup(c.into(), &[\n")
408+
for s in salt:
409+
out.write(" 0x{:x},\n".format(s))
410+
out.write(" ],\n")
411+
out.write(" &[\n")
412+
for k in keys:
413+
kv = int(trailing[k]) | (k << 8)
414+
out.write(" 0x{:x},\n".format(kv))
415+
out.write(" ],\n")
416+
out.write(" u8_lookup_fk, u8_lookup_fv, 0) as usize\n")
411417
out.write("}\n")
412418

413419
def gen_tests(tests, out):

src/perfect_hash.rs

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
1313
// This function is based on multiplication being fast and is "good enough". Also
1414
// it can share some work between the unsalted and salted versions.
15+
#[inline]
1516
fn my_hash(key: u32, salt: u32, n: usize) -> usize {
1617
let y = key.wrapping_add(salt).wrapping_mul(2654435769);
1718
let y = y ^ key.wrapping_mul(0x31415926);
@@ -33,11 +34,25 @@ pub(crate) fn mph_lookup<KV, V, FK, FV>(x: u32, salt: &[u16], kv: &[KV], fk: FK,
3334
}
3435

3536
/// Extract the key in a 24 bit key and 8 bit value packed in a u32.
37+
#[inline]
3638
pub(crate) fn u8_lookup_fk(kv: u32) -> u32 {
3739
kv >> 8
3840
}
3941

4042
/// Extract the value in a 24 bit key and 8 bit value packed in a u32.
41-
pub(crate) fn u8_lookup_fv(kv: u32) -> u32 {
42-
kv & 0xff
43+
#[inline]
44+
pub(crate) fn u8_lookup_fv(kv: u32) -> u8 {
45+
(kv & 0xff) as u8
46+
}
47+
48+
/// Extract the key for a boolean lookup.
49+
#[inline]
50+
pub(crate) fn bool_lookup_fk(kv: u32) -> u32 {
51+
kv
52+
}
53+
54+
/// Extract the value for a boolean lookup.
55+
#[inline]
56+
pub(crate) fn bool_lookup_fv(_kv: u32) -> bool {
57+
true
4358
}

0 commit comments

Comments
 (0)