Skip to content

Commit 5d3cc85

Browse files
Make japanese hiragana and katakana characters work with ACE. (comfyanonymous#7997)
1 parent c7c025b commit 5d3cc85

File tree

3 files changed

+135
-2
lines changed

3 files changed

+135
-2
lines changed

comfy/sd.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@ def __init__(self, sd=None, device=None, config=None, dtype=None, metadata=None)
441441
self.working_dtypes = [torch.float16, torch.bfloat16, torch.float32]
442442
elif "vocoder.backbone.channel_layers.0.0.bias" in sd: #Ace Step Audio
443443
self.first_stage_model = comfy.ldm.ace.vae.music_dcae_pipeline.MusicDCAE(source_sample_rate=44100)
444-
self.memory_used_encode = lambda shape, dtype: (shape[2] * 300) * model_management.dtype_size(dtype)
444+
self.memory_used_encode = lambda shape, dtype: (shape[2] * 330) * model_management.dtype_size(dtype)
445445
self.memory_used_decode = lambda shape, dtype: (shape[2] * shape[3] * 87000) * model_management.dtype_size(dtype)
446446
self.latent_channels = 8
447447
self.output_channels = 2

comfy/text_encoders/ace.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import logging
88

99
from tokenizers import Tokenizer
10-
from .ace_text_cleaners import multilingual_cleaners
10+
from .ace_text_cleaners import multilingual_cleaners, japanese_to_romaji
1111

1212
SUPPORT_LANGUAGES = {
1313
"en": 259, "de": 260, "fr": 262, "es": 284, "it": 285,
@@ -65,6 +65,14 @@ def __call__(self, string):
6565
if "spa" in lang:
6666
lang = "es"
6767

68+
try:
69+
line_out = japanese_to_romaji(line)
70+
if line_out != line:
71+
lang = "ja"
72+
line = line_out
73+
except:
74+
pass
75+
6876
try:
6977
if structure_pattern.match(line):
7078
token_idx = self.encode(line, "en")

comfy/text_encoders/ace_text_cleaners.py

+125
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,131 @@
44

55
import re
66

7+
def japanese_to_romaji(japanese_text):
8+
"""
9+
Convert Japanese hiragana and katakana to romaji (Latin alphabet representation).
10+
11+
Args:
12+
japanese_text (str): Text containing hiragana and/or katakana characters
13+
14+
Returns:
15+
str: The romaji (Latin alphabet) equivalent
16+
"""
17+
# Dictionary mapping kana characters to their romaji equivalents
18+
kana_map = {
19+
# Katakana characters
20+
'ア': 'a', 'イ': 'i', 'ウ': 'u', 'エ': 'e', 'オ': 'o',
21+
'カ': 'ka', 'キ': 'ki', 'ク': 'ku', 'ケ': 'ke', 'コ': 'ko',
22+
'サ': 'sa', 'シ': 'shi', 'ス': 'su', 'セ': 'se', 'ソ': 'so',
23+
'タ': 'ta', 'チ': 'chi', 'ツ': 'tsu', 'テ': 'te', 'ト': 'to',
24+
'ナ': 'na', 'ニ': 'ni', 'ヌ': 'nu', 'ネ': 'ne', 'ノ': 'no',
25+
'ハ': 'ha', 'ヒ': 'hi', 'フ': 'fu', 'ヘ': 'he', 'ホ': 'ho',
26+
'マ': 'ma', 'ミ': 'mi', 'ム': 'mu', 'メ': 'me', 'モ': 'mo',
27+
'ヤ': 'ya', 'ユ': 'yu', 'ヨ': 'yo',
28+
'ラ': 'ra', 'リ': 'ri', 'ル': 'ru', 'レ': 're', 'ロ': 'ro',
29+
'ワ': 'wa', 'ヲ': 'wo', 'ン': 'n',
30+
31+
# Katakana voiced consonants
32+
'ガ': 'ga', 'ギ': 'gi', 'グ': 'gu', 'ゲ': 'ge', 'ゴ': 'go',
33+
'ザ': 'za', 'ジ': 'ji', 'ズ': 'zu', 'ゼ': 'ze', 'ゾ': 'zo',
34+
'ダ': 'da', 'ヂ': 'ji', 'ヅ': 'zu', 'デ': 'de', 'ド': 'do',
35+
'バ': 'ba', 'ビ': 'bi', 'ブ': 'bu', 'ベ': 'be', 'ボ': 'bo',
36+
'パ': 'pa', 'ピ': 'pi', 'プ': 'pu', 'ペ': 'pe', 'ポ': 'po',
37+
38+
# Katakana combinations
39+
'キャ': 'kya', 'キュ': 'kyu', 'キョ': 'kyo',
40+
'シャ': 'sha', 'シュ': 'shu', 'ショ': 'sho',
41+
'チャ': 'cha', 'チュ': 'chu', 'チョ': 'cho',
42+
'ニャ': 'nya', 'ニュ': 'nyu', 'ニョ': 'nyo',
43+
'ヒャ': 'hya', 'ヒュ': 'hyu', 'ヒョ': 'hyo',
44+
'ミャ': 'mya', 'ミュ': 'myu', 'ミョ': 'myo',
45+
'リャ': 'rya', 'リュ': 'ryu', 'リョ': 'ryo',
46+
'ギャ': 'gya', 'ギュ': 'gyu', 'ギョ': 'gyo',
47+
'ジャ': 'ja', 'ジュ': 'ju', 'ジョ': 'jo',
48+
'ビャ': 'bya', 'ビュ': 'byu', 'ビョ': 'byo',
49+
'ピャ': 'pya', 'ピュ': 'pyu', 'ピョ': 'pyo',
50+
51+
# Katakana small characters and special cases
52+
'ッ': '', # Small tsu (doubles the following consonant)
53+
'ャ': 'ya', 'ュ': 'yu', 'ョ': 'yo',
54+
55+
# Katakana extras
56+
'ヴ': 'vu', 'ファ': 'fa', 'フィ': 'fi', 'フェ': 'fe', 'フォ': 'fo',
57+
'ウィ': 'wi', 'ウェ': 'we', 'ウォ': 'wo',
58+
59+
# Hiragana characters
60+
'あ': 'a', 'い': 'i', 'う': 'u', 'え': 'e', 'お': 'o',
61+
'か': 'ka', 'き': 'ki', 'く': 'ku', 'け': 'ke', 'こ': 'ko',
62+
'さ': 'sa', 'し': 'shi', 'す': 'su', 'せ': 'se', 'そ': 'so',
63+
'た': 'ta', 'ち': 'chi', 'つ': 'tsu', 'て': 'te', 'と': 'to',
64+
'な': 'na', 'に': 'ni', 'ぬ': 'nu', 'ね': 'ne', 'の': 'no',
65+
'は': 'ha', 'ひ': 'hi', 'ふ': 'fu', 'へ': 'he', 'ほ': 'ho',
66+
'ま': 'ma', 'み': 'mi', 'む': 'mu', 'め': 'me', 'も': 'mo',
67+
'や': 'ya', 'ゆ': 'yu', 'よ': 'yo',
68+
'ら': 'ra', 'り': 'ri', 'る': 'ru', 'れ': 're', 'ろ': 'ro',
69+
'わ': 'wa', 'を': 'wo', 'ん': 'n',
70+
71+
# Hiragana voiced consonants
72+
'が': 'ga', 'ぎ': 'gi', 'ぐ': 'gu', 'げ': 'ge', 'ご': 'go',
73+
'ざ': 'za', 'じ': 'ji', 'ず': 'zu', 'ぜ': 'ze', 'ぞ': 'zo',
74+
'だ': 'da', 'ぢ': 'ji', 'づ': 'zu', 'で': 'de', 'ど': 'do',
75+
'ば': 'ba', 'び': 'bi', 'ぶ': 'bu', 'べ': 'be', 'ぼ': 'bo',
76+
'ぱ': 'pa', 'ぴ': 'pi', 'ぷ': 'pu', 'ぺ': 'pe', 'ぽ': 'po',
77+
78+
# Hiragana combinations
79+
'きゃ': 'kya', 'きゅ': 'kyu', 'きょ': 'kyo',
80+
'しゃ': 'sha', 'しゅ': 'shu', 'しょ': 'sho',
81+
'ちゃ': 'cha', 'ちゅ': 'chu', 'ちょ': 'cho',
82+
'にゃ': 'nya', 'にゅ': 'nyu', 'にょ': 'nyo',
83+
'ひゃ': 'hya', 'ひゅ': 'hyu', 'ひょ': 'hyo',
84+
'みゃ': 'mya', 'みゅ': 'myu', 'みょ': 'myo',
85+
'りゃ': 'rya', 'りゅ': 'ryu', 'りょ': 'ryo',
86+
'ぎゃ': 'gya', 'ぎゅ': 'gyu', 'ぎょ': 'gyo',
87+
'じゃ': 'ja', 'じゅ': 'ju', 'じょ': 'jo',
88+
'びゃ': 'bya', 'びゅ': 'byu', 'びょ': 'byo',
89+
'ぴゃ': 'pya', 'ぴゅ': 'pyu', 'ぴょ': 'pyo',
90+
91+
# Hiragana small characters and special cases
92+
'っ': '', # Small tsu (doubles the following consonant)
93+
'ゃ': 'ya', 'ゅ': 'yu', 'ょ': 'yo',
94+
95+
# Common punctuation and spaces
96+
' ': ' ', # Japanese space
97+
'、': ', ', '。': '. ',
98+
}
99+
100+
result = []
101+
i = 0
102+
103+
while i < len(japanese_text):
104+
# Check for small tsu (doubling the following consonant)
105+
if i < len(japanese_text) - 1 and (japanese_text[i] == 'っ' or japanese_text[i] == 'ッ'):
106+
if i < len(japanese_text) - 1 and japanese_text[i+1] in kana_map:
107+
next_romaji = kana_map[japanese_text[i+1]]
108+
if next_romaji and next_romaji[0] not in 'aiueon':
109+
result.append(next_romaji[0]) # Double the consonant
110+
i += 1
111+
continue
112+
113+
# Check for combinations with small ya, yu, yo
114+
if i < len(japanese_text) - 1 and japanese_text[i+1] in ('ゃ', 'ゅ', 'ょ', 'ャ', 'ュ', 'ョ'):
115+
combo = japanese_text[i:i+2]
116+
if combo in kana_map:
117+
result.append(kana_map[combo])
118+
i += 2
119+
continue
120+
121+
# Regular character
122+
if japanese_text[i] in kana_map:
123+
result.append(kana_map[japanese_text[i]])
124+
else:
125+
# If it's not in our map, keep it as is (might be kanji, romaji, etc.)
126+
result.append(japanese_text[i])
127+
128+
i += 1
129+
130+
return ''.join(result)
131+
7132
def number_to_text(num, ordinal=False):
8133
"""
9134
Convert a number (int or float) to its text representation.

0 commit comments

Comments
 (0)