forked from foliojs/unicode-properties
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathindex.js
182 lines (159 loc) · 4.38 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
import UnicodeTrie from 'unicode-trie';
import pako from 'pako';
import * as base64 from 'base64-arraybuffer';
import base64DeflatedData from './data.json';
import base64DeflatedTrie from './trie.json';
// Trie is serialized as a Buffer in node, but here
// we may be running in a browser so we make an Uint8Array
const data = JSON.parse(
String.fromCharCode.apply(
String, pako.inflate(base64.decode(base64DeflatedData))
),
);
const trieData = pako.inflate(base64.decode(base64DeflatedTrie));
const trie = new UnicodeTrie(trieData);
const log2 = Math.log2 || (n => Math.log(n) / Math.LN2);
const bits = (n) => ((log2(n) + 1) | 0);
// compute the number of bits stored for each field
const CATEGORY_BITS = bits(data.categories.length - 1);
const COMBINING_BITS = bits(data.combiningClasses.length - 1);
const SCRIPT_BITS = bits(data.scripts.length - 1);
const EAW_BITS = bits(data.eaw.length - 1);
const NUMBER_BITS = 10;
// compute shift and mask values for each field
const CATEGORY_SHIFT = COMBINING_BITS + SCRIPT_BITS + EAW_BITS + NUMBER_BITS;
const COMBINING_SHIFT = SCRIPT_BITS + EAW_BITS + NUMBER_BITS;
const SCRIPT_SHIFT = EAW_BITS + NUMBER_BITS;
const EAW_SHIFT = NUMBER_BITS;
const CATEGORY_MASK = (1 << CATEGORY_BITS) - 1;
const COMBINING_MASK = (1 << COMBINING_BITS) - 1;
const SCRIPT_MASK = (1 << SCRIPT_BITS) - 1;
const EAW_MASK = (1 << EAW_BITS) - 1;
const NUMBER_MASK = (1 << NUMBER_BITS) - 1;
export const getCategory = (codePoint) => {
const val = trie.get(codePoint);
return data.categories[(val >> CATEGORY_SHIFT) & CATEGORY_MASK];
};
export const getCombiningClass = (codePoint) => {
const val = trie.get(codePoint);
return data.combiningClasses[(val >> COMBINING_SHIFT) & COMBINING_MASK];
};
export const getScript = (codePoint) => {
const val = trie.get(codePoint);
return data.scripts[(val >> SCRIPT_SHIFT) & SCRIPT_MASK];
};
export const getEastAsianWidth = (codePoint) => {
const val = trie.get(codePoint);
return data.eaw[(val >> EAW_SHIFT) & EAW_MASK];
};
export const getNumericValue = (codePoint) => {
let val = trie.get(codePoint);
let num = val & NUMBER_MASK;
if (num === 0) {
return null;
} else if (num <= 50) {
return num - 1;
} else if (num < 0x1e0) {
const numerator = (num >> 4) - 12;
const denominator = (num & 0xf) + 1;
return numerator / denominator;
} else if (num < 0x300) {
val = (num >> 5) - 14;
let exp = (num & 0x1f) + 2;
while (exp > 0) {
val *= 10;
exp--;
}
return val;
} else {
val = (num >> 2) - 0xbf;
let exp = (num & 3) + 1;
while (exp > 0) {
val *= 60;
exp--;
}
return val;
}
};
export const isAlphabetic = (codePoint) => {
const category = getCategory(codePoint);
return (
category === 'Lu' ||
category === 'Ll' ||
category === 'Lt' ||
category === 'Lm' ||
category === 'Lo' ||
category === 'Nl'
)
};
export const isDigit = (codePoint) => (
getCategory(codePoint) === 'Nd'
);
export const isPunctuation = (codePoint) => {
const category = getCategory(codePoint);
return (
category === 'Pc' ||
category === 'Pd' ||
category === 'Pe' ||
category === 'Pf' ||
category === 'Pi' ||
category === 'Po' ||
category === 'Ps'
);
};
export const isLowerCase = (codePoint) => {
return getCategory(codePoint) === 'Ll';
};
export const isUpperCase = (codePoint) => (
getCategory(codePoint) === 'Lu'
);
export const isTitleCase = (codePoint) => (
getCategory(codePoint) === 'Lt'
);
export const isWhiteSpace = (codePoint) => {
const category = getCategory(codePoint);
return (
category === 'Zs' ||
category === 'Zl' ||
category === 'Zp'
);
};
export const isBaseForm = (codePoint) => {
const category = getCategory(codePoint);
return (
category === 'Nd' ||
category === 'No' ||
category === 'Nl' ||
category === 'Lu' ||
category === 'Ll' ||
category === 'Lt' ||
category === 'Lm' ||
category === 'Lo' ||
category === 'Me' ||
category === 'Mc'
)
};
export const isMark = (codePoint) => {
const category = getCategory(codePoint);
return (
category === 'Mn' ||
category === 'Me' ||
category === 'Mc'
);
};
export default {
getCategory,
getCombiningClass,
getScript,
getEastAsianWidth,
getNumericValue,
isAlphabetic,
isDigit,
isPunctuation,
isLowerCase,
isUpperCase,
isTitleCase,
isWhiteSpace,
isBaseForm,
isMark
}