|
| 1 | +# number_words.py |
| 2 | +# |
| 3 | +# Copyright 2020, Paul McGuire |
| 4 | +# |
| 5 | +# Parser/evaluator for expressions of numbers as written out in words: |
| 6 | +# - one |
| 7 | +# - seven |
| 8 | +# - twelve |
| 9 | +# - twenty six |
| 10 | +# - forty-two |
| 11 | +# - one hundred and seven |
| 12 | +# |
| 13 | +# |
| 14 | +# BNF: |
| 15 | +""" |
| 16 | + optional_and ::= ["and" | "-"] |
| 17 | + optional_dash ::= ["-"] |
| 18 | + units ::= one | two | three | ... | nine |
| 19 | + teens_only ::= eleven | twelve | ... | nineteen |
| 20 | + teens ::= ten | teens_only |
| 21 | + tens ::= twenty | thirty | ... | ninety |
| 22 | + hundreds ::= (units | teens_only | tens optional_dash units) "hundred" |
| 23 | + one_to_99 ::= units | teens | (tens [optional_dash units]) |
| 24 | + thousands = one_to_99 "thousand" |
| 25 | +
|
| 26 | + number = [thousands] [hundreds] optional_and units | [thousands] optional_and hundreds | thousands |
| 27 | +""" |
| 28 | +import pyparsing as pp |
| 29 | +from operator import mul |
| 30 | +import pyparsing.diagram |
| 31 | + |
| 32 | +def define_numeric_word(s, value): |
| 33 | + return pp.CaselessKeyword(s).addParseAction(lambda: value) |
| 34 | + |
| 35 | +def define_numeric_word_range(s, vals): |
| 36 | + if isinstance(s, str): |
| 37 | + s = s.split() |
| 38 | + return pp.MatchFirst(define_numeric_word(nm, nm_value) for nm, nm_value in zip(s, vals)) |
| 39 | + |
| 40 | +opt_dash = pp.Optional(pp.Suppress("-")).setName("optional '-'") |
| 41 | +opt_and = pp.Optional((pp.CaselessKeyword("and") | "-").suppress()).setName("optional 'and'") |
| 42 | + |
| 43 | +zero = define_numeric_word_range("zero oh", [0, 0]) |
| 44 | +one_to_9 = define_numeric_word_range("one two three four five six seven eight nine", range(1, 9 + 1)).setName("1-9") |
| 45 | +eleven_to_19 = define_numeric_word_range("eleven twelve thirteen fourteen fifteen sixteen seventeen eighteen nineteen", |
| 46 | + range(11, 19 + 1)).setName("eleven_to_19") |
| 47 | +ten_to_19 = (define_numeric_word("ten", 10) | eleven_to_19).setName("ten_to_19") |
| 48 | +one_to_19 = (one_to_9 | ten_to_19).setName("1-19") |
| 49 | +tens = define_numeric_word_range("twenty thirty forty fifty sixty seventy eighty ninety", range(20, 90+1, 10)) |
| 50 | +hundreds = (one_to_9 | eleven_to_19 | (tens + opt_dash + one_to_9)) + define_numeric_word("hundred", 100) |
| 51 | +one_to_99 = (one_to_19 | (tens + pp.Optional(opt_dash + one_to_9)).addParseAction(sum)).setName("1-99") |
| 52 | +one_to_999 = ((pp.Optional(hundreds + opt_and) + one_to_99 | hundreds).addParseAction(sum)).setName("1-999") |
| 53 | +thousands = one_to_999 + define_numeric_word("thousand", 1000) |
| 54 | +hundreds.setName("100s") |
| 55 | +thousands.setName("1000s") |
| 56 | + |
| 57 | +def multiply(t): |
| 58 | + return mul(*t) |
| 59 | +hundreds.addParseAction(multiply) |
| 60 | +thousands.addParseAction(multiply) |
| 61 | + |
| 62 | +numeric_expression = (pp.Optional(thousands + opt_and) |
| 63 | + + pp.Optional(hundreds + opt_and) |
| 64 | + + one_to_99 |
| 65 | + | pp.Optional(thousands + opt_and) |
| 66 | + + hundreds |
| 67 | + | thousands |
| 68 | + ).setName("numeric_words") |
| 69 | +numeric_expression.addParseAction(sum) |
| 70 | + |
| 71 | + |
| 72 | +if __name__ == '__main__': |
| 73 | + numeric_expression.runTests(""" |
| 74 | + one |
| 75 | + seven |
| 76 | + twelve |
| 77 | + twenty six |
| 78 | + forty-two |
| 79 | + two hundred |
| 80 | + twelve hundred |
| 81 | + one hundred and eleven |
| 82 | + ninety nine thousand nine hundred and ninety nine |
| 83 | + nine hundred thousand nine hundred and ninety nine |
| 84 | + nine hundred and ninety nine thousand nine hundred and ninety nine |
| 85 | + nineteen hundred thousand nineteen hundred and ninety nine |
| 86 | + """) |
| 87 | + |
| 88 | + # create railroad diagram |
| 89 | + numeric_expression.create_diagram("numeric_words_diagram.html") |
0 commit comments