|
4 | 4 |
|
5 | 5 | import re
|
6 | 6 |
|
| 7 | +def japanese_to_romaji(japanese_text): |
| 8 | + """ |
| 9 | + Convert Japanese hiragana and katakana to romaji (Latin alphabet representation). |
| 10 | +
|
| 11 | + Args: |
| 12 | + japanese_text (str): Text containing hiragana and/or katakana characters |
| 13 | +
|
| 14 | + Returns: |
| 15 | + str: The romaji (Latin alphabet) equivalent |
| 16 | + """ |
| 17 | + # Dictionary mapping kana characters to their romaji equivalents |
| 18 | + kana_map = { |
| 19 | + # Katakana characters |
| 20 | + 'ア': 'a', 'イ': 'i', 'ウ': 'u', 'エ': 'e', 'オ': 'o', |
| 21 | + 'カ': 'ka', 'キ': 'ki', 'ク': 'ku', 'ケ': 'ke', 'コ': 'ko', |
| 22 | + 'サ': 'sa', 'シ': 'shi', 'ス': 'su', 'セ': 'se', 'ソ': 'so', |
| 23 | + 'タ': 'ta', 'チ': 'chi', 'ツ': 'tsu', 'テ': 'te', 'ト': 'to', |
| 24 | + 'ナ': 'na', 'ニ': 'ni', 'ヌ': 'nu', 'ネ': 'ne', 'ノ': 'no', |
| 25 | + 'ハ': 'ha', 'ヒ': 'hi', 'フ': 'fu', 'ヘ': 'he', 'ホ': 'ho', |
| 26 | + 'マ': 'ma', 'ミ': 'mi', 'ム': 'mu', 'メ': 'me', 'モ': 'mo', |
| 27 | + 'ヤ': 'ya', 'ユ': 'yu', 'ヨ': 'yo', |
| 28 | + 'ラ': 'ra', 'リ': 'ri', 'ル': 'ru', 'レ': 're', 'ロ': 'ro', |
| 29 | + 'ワ': 'wa', 'ヲ': 'wo', 'ン': 'n', |
| 30 | + |
| 31 | + # Katakana voiced consonants |
| 32 | + 'ガ': 'ga', 'ギ': 'gi', 'グ': 'gu', 'ゲ': 'ge', 'ゴ': 'go', |
| 33 | + 'ザ': 'za', 'ジ': 'ji', 'ズ': 'zu', 'ゼ': 'ze', 'ゾ': 'zo', |
| 34 | + 'ダ': 'da', 'ヂ': 'ji', 'ヅ': 'zu', 'デ': 'de', 'ド': 'do', |
| 35 | + 'バ': 'ba', 'ビ': 'bi', 'ブ': 'bu', 'ベ': 'be', 'ボ': 'bo', |
| 36 | + 'パ': 'pa', 'ピ': 'pi', 'プ': 'pu', 'ペ': 'pe', 'ポ': 'po', |
| 37 | + |
| 38 | + # Katakana combinations |
| 39 | + 'キャ': 'kya', 'キュ': 'kyu', 'キョ': 'kyo', |
| 40 | + 'シャ': 'sha', 'シュ': 'shu', 'ショ': 'sho', |
| 41 | + 'チャ': 'cha', 'チュ': 'chu', 'チョ': 'cho', |
| 42 | + 'ニャ': 'nya', 'ニュ': 'nyu', 'ニョ': 'nyo', |
| 43 | + 'ヒャ': 'hya', 'ヒュ': 'hyu', 'ヒョ': 'hyo', |
| 44 | + 'ミャ': 'mya', 'ミュ': 'myu', 'ミョ': 'myo', |
| 45 | + 'リャ': 'rya', 'リュ': 'ryu', 'リョ': 'ryo', |
| 46 | + 'ギャ': 'gya', 'ギュ': 'gyu', 'ギョ': 'gyo', |
| 47 | + 'ジャ': 'ja', 'ジュ': 'ju', 'ジョ': 'jo', |
| 48 | + 'ビャ': 'bya', 'ビュ': 'byu', 'ビョ': 'byo', |
| 49 | + 'ピャ': 'pya', 'ピュ': 'pyu', 'ピョ': 'pyo', |
| 50 | + |
| 51 | + # Katakana small characters and special cases |
| 52 | + 'ッ': '', # Small tsu (doubles the following consonant) |
| 53 | + 'ャ': 'ya', 'ュ': 'yu', 'ョ': 'yo', |
| 54 | + |
| 55 | + # Katakana extras |
| 56 | + 'ヴ': 'vu', 'ファ': 'fa', 'フィ': 'fi', 'フェ': 'fe', 'フォ': 'fo', |
| 57 | + 'ウィ': 'wi', 'ウェ': 'we', 'ウォ': 'wo', |
| 58 | + |
| 59 | + # Hiragana characters |
| 60 | + 'あ': 'a', 'い': 'i', 'う': 'u', 'え': 'e', 'お': 'o', |
| 61 | + 'か': 'ka', 'き': 'ki', 'く': 'ku', 'け': 'ke', 'こ': 'ko', |
| 62 | + 'さ': 'sa', 'し': 'shi', 'す': 'su', 'せ': 'se', 'そ': 'so', |
| 63 | + 'た': 'ta', 'ち': 'chi', 'つ': 'tsu', 'て': 'te', 'と': 'to', |
| 64 | + 'な': 'na', 'に': 'ni', 'ぬ': 'nu', 'ね': 'ne', 'の': 'no', |
| 65 | + 'は': 'ha', 'ひ': 'hi', 'ふ': 'fu', 'へ': 'he', 'ほ': 'ho', |
| 66 | + 'ま': 'ma', 'み': 'mi', 'む': 'mu', 'め': 'me', 'も': 'mo', |
| 67 | + 'や': 'ya', 'ゆ': 'yu', 'よ': 'yo', |
| 68 | + 'ら': 'ra', 'り': 'ri', 'る': 'ru', 'れ': 're', 'ろ': 'ro', |
| 69 | + 'わ': 'wa', 'を': 'wo', 'ん': 'n', |
| 70 | + |
| 71 | + # Hiragana voiced consonants |
| 72 | + 'が': 'ga', 'ぎ': 'gi', 'ぐ': 'gu', 'げ': 'ge', 'ご': 'go', |
| 73 | + 'ざ': 'za', 'じ': 'ji', 'ず': 'zu', 'ぜ': 'ze', 'ぞ': 'zo', |
| 74 | + 'だ': 'da', 'ぢ': 'ji', 'づ': 'zu', 'で': 'de', 'ど': 'do', |
| 75 | + 'ば': 'ba', 'び': 'bi', 'ぶ': 'bu', 'べ': 'be', 'ぼ': 'bo', |
| 76 | + 'ぱ': 'pa', 'ぴ': 'pi', 'ぷ': 'pu', 'ぺ': 'pe', 'ぽ': 'po', |
| 77 | + |
| 78 | + # Hiragana combinations |
| 79 | + 'きゃ': 'kya', 'きゅ': 'kyu', 'きょ': 'kyo', |
| 80 | + 'しゃ': 'sha', 'しゅ': 'shu', 'しょ': 'sho', |
| 81 | + 'ちゃ': 'cha', 'ちゅ': 'chu', 'ちょ': 'cho', |
| 82 | + 'にゃ': 'nya', 'にゅ': 'nyu', 'にょ': 'nyo', |
| 83 | + 'ひゃ': 'hya', 'ひゅ': 'hyu', 'ひょ': 'hyo', |
| 84 | + 'みゃ': 'mya', 'みゅ': 'myu', 'みょ': 'myo', |
| 85 | + 'りゃ': 'rya', 'りゅ': 'ryu', 'りょ': 'ryo', |
| 86 | + 'ぎゃ': 'gya', 'ぎゅ': 'gyu', 'ぎょ': 'gyo', |
| 87 | + 'じゃ': 'ja', 'じゅ': 'ju', 'じょ': 'jo', |
| 88 | + 'びゃ': 'bya', 'びゅ': 'byu', 'びょ': 'byo', |
| 89 | + 'ぴゃ': 'pya', 'ぴゅ': 'pyu', 'ぴょ': 'pyo', |
| 90 | + |
| 91 | + # Hiragana small characters and special cases |
| 92 | + 'っ': '', # Small tsu (doubles the following consonant) |
| 93 | + 'ゃ': 'ya', 'ゅ': 'yu', 'ょ': 'yo', |
| 94 | + |
| 95 | + # Common punctuation and spaces |
| 96 | + ' ': ' ', # Japanese space |
| 97 | + '、': ', ', '。': '. ', |
| 98 | + } |
| 99 | + |
| 100 | + result = [] |
| 101 | + i = 0 |
| 102 | + |
| 103 | + while i < len(japanese_text): |
| 104 | + # Check for small tsu (doubling the following consonant) |
| 105 | + if i < len(japanese_text) - 1 and (japanese_text[i] == 'っ' or japanese_text[i] == 'ッ'): |
| 106 | + if i < len(japanese_text) - 1 and japanese_text[i+1] in kana_map: |
| 107 | + next_romaji = kana_map[japanese_text[i+1]] |
| 108 | + if next_romaji and next_romaji[0] not in 'aiueon': |
| 109 | + result.append(next_romaji[0]) # Double the consonant |
| 110 | + i += 1 |
| 111 | + continue |
| 112 | + |
| 113 | + # Check for combinations with small ya, yu, yo |
| 114 | + if i < len(japanese_text) - 1 and japanese_text[i+1] in ('ゃ', 'ゅ', 'ょ', 'ャ', 'ュ', 'ョ'): |
| 115 | + combo = japanese_text[i:i+2] |
| 116 | + if combo in kana_map: |
| 117 | + result.append(kana_map[combo]) |
| 118 | + i += 2 |
| 119 | + continue |
| 120 | + |
| 121 | + # Regular character |
| 122 | + if japanese_text[i] in kana_map: |
| 123 | + result.append(kana_map[japanese_text[i]]) |
| 124 | + else: |
| 125 | + # If it's not in our map, keep it as is (might be kanji, romaji, etc.) |
| 126 | + result.append(japanese_text[i]) |
| 127 | + |
| 128 | + i += 1 |
| 129 | + |
| 130 | + return ''.join(result) |
| 131 | + |
7 | 132 | def number_to_text(num, ordinal=False):
|
8 | 133 | """
|
9 | 134 | Convert a number (int or float) to its text representation.
|
|
0 commit comments