Skip to content

Commit fd4268a

Browse files
committed
Metaphone: added example tests
The current metaphone implementation passes all tests added to the bottom of the function. If anyone finds a better place for such things (didn't want to add such a huge comment to top of the file), feel free to move them.
1 parent 4d2ec3c commit fd4268a

File tree

1 file changed

+119
-0
lines changed

1 file changed

+119
-0
lines changed

functions/strings/metaphone.js

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,4 +218,123 @@ function metaphone(word, max_phonemes) {
218218
}
219219

220220
return meta;
221+
222+
/*
223+
" abc", "ABK", // skip leading whitespace
224+
"1234.678!@abc", "ABK", // skip leading non-alpha chars
225+
"aero", "ER", // leading 'a' followed by 'e' turns into 'e'
226+
"air", "AR", // leading 'a' turns into 'e', other vowels ignored
227+
// leading vowels added to result
228+
"egg", "EK",
229+
"if", "IF",
230+
"of", "OF",
231+
"use", "US",
232+
// other vowels ignored
233+
"xAEIOU", "S",
234+
// GN, KN, PN become 'N'
235+
"gnome", "NM",
236+
"knight", "NFT",
237+
"pneumatic", "NMTK",
238+
// leading 'WR' becomes 'R'
239+
"wrong", "RNK",
240+
// leading 'WH+vowel" becomes 'W'
241+
"wheel", "WL",
242+
// leading 'X' becomes 'S', 'KS' otherwise
243+
"xerox", "SRKS",
244+
"exchange", "EKSXNJ",
245+
// duplicate chars, except 'C' are ignored
246+
"accuracy", "AKKRS",
247+
"blogger", "BLKR",
248+
"fffound", "FNT",
249+
// ignore 'B' if after 'M'
250+
"billboard", "BLBRT",
251+
"symbol", "SML",
252+
// 'CIA' -> 'X'
253+
"special", "SPXL",
254+
// 'SC[IEY]' -> 'C' ignored
255+
"science", "SNS",
256+
// '[^S]C' -> 'C' becomes 'S'
257+
"dance", "TNS",
258+
// 'CH' -> 'X'
259+
"change", "XNJ",
260+
"school", "SXL",
261+
// 'C' -> 'K'
262+
"micro", "MKR",
263+
// 'DGE', 'DGI', DGY' -> 'J'
264+
// 'T' otherwise
265+
"bridge", "BRJ",
266+
"pidgin", "PJN",
267+
"edgy", "EJ",
268+
"handgun", "HNTKN",
269+
"draw", "TR",
270+
//'GN\b' 'GNED' -> ignore 'G'
271+
"sign", "SN",
272+
"signed", "SNT",
273+
"signs", "SKNS",
274+
// [^G]G[EIY] -> 'J'...
275+
"agency", "AJNS",
276+
// 'GH' -> 'F' if not b--gh, d--gh, h--gh
277+
"night", "NFT",
278+
"bright", "BRT",
279+
"height", "HT",
280+
"midnight", "MTNT",
281+
// 'K' otherwise
282+
"jogger", "JKR",
283+
// '[^CGPST]H[AEIOU]' -> 'H', ignore otherwise
284+
"horse", "HRS",
285+
"adhere", "ATHR",
286+
"mahjong", "MJNK",
287+
"fight", "FFT", // interesting
288+
"ghost", "FST",
289+
// 'K' -> 'K' if not after 'C'
290+
"ski", "SK",
291+
"brick", "BRK",
292+
// 'PH' -> 'F'
293+
"phrase", "FRS",
294+
// 'P.' -> 'P'
295+
"hypnotic", "PNTK",
296+
"topnotch", "TPNX",
297+
// 'Q' -> 'K'
298+
"quit", "KT",
299+
"squid", "SKT",
300+
// 'SIO', 'SIA', 'SH' -> 'X'
301+
"version", "FRXN",
302+
"silesia", "SLX",
303+
"enthusiasm", "EN0XSM",
304+
"shell", "XL",
305+
// 'S' -> 'S' in other cases
306+
"spy", "SP",
307+
"system", "SSTM",
308+
// 'TIO', 'TIA' -> 'X'
309+
"ratio", "RX",
310+
"nation", "NXN",
311+
"spatial", "SPXL",
312+
// 'TH' -> '0'
313+
"the", "0",
314+
"nth", "N0",
315+
"truth", "TR0",
316+
// 'TCH' -> ignore 'T'
317+
"watch", "WX",
318+
// 'T' otherwise
319+
"vote", "FT",
320+
"tweet", "TWT",
321+
// 'V' -> 'F'
322+
"evolve", "EFLF",
323+
// 'W' -> 'W' if followed by vowel
324+
"rewrite", "RRT",
325+
"outwrite", "OTRT",
326+
"artwork", "ARTWRK",
327+
// 'X' -> 'KS' if not first char
328+
"excel", "EKSSL",
329+
// 'Y' -> 'Y' if followed by vowel
330+
"cyan", "SYN",
331+
"way", "W",
332+
"hybrid", "BRT",
333+
// 'Z' -> 'S'
334+
"zip", "SP",
335+
"zoom", "SM",
336+
"jazz", "JS",
337+
"zigzag", "SKSK",
338+
"abc abc", "ABKBK" // eventhough there are two words, second 'a' is ignored
339+
*/
221340
}

0 commit comments

Comments
 (0)