Skip to content

Commit 1e33460

Browse files
committed
vspace, hspaceを追加
1 parent 76275ce commit 1e33460

File tree

2 files changed

+2631
-1
lines changed

2 files changed

+2631
-1
lines changed

atcodertools/fmtprediction/tokenize_format.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,14 @@ def _is_ascii(s):
1212

1313

1414
DOTS_PATTERNS = ["ldots", "cdots", "vdots", "ddots", "dots"]
15+
SPACE_PATTERNS = ["hspace", "vspace"]
1516

1617

1718
def _is_noise(s):
1819
if any(pattern in s for pattern in DOTS_PATTERNS):
1920
return True
21+
if any(pattern in s for pattern in SPACE_PATTERNS):
22+
return True
2023

2124
return s == ":" or s == "...." or s == "..." or s == ".." or s == "."
2225

@@ -68,6 +71,7 @@ def _remove_spaces_in_curly_brackets(input_format):
6871
def _sanitized_tokens(input_format: str) -> List[str]:
6972
input_format = input_format.replace("\n", " ").replace("…", " ").replace("...", " ").replace(
7073
"..", " ").replace("‥", " ").replace("\\ ", " ").replace("}", "} ").replace(" ", " ").replace(", ", ",")
74+
input_format = input_format.replace(" _ ", "_") # 空白の添字を削除
7175
input_format = _remove_spaces_in_curly_brackets(input_format)
7276
input_format = _divide_consecutive_vars(input_format)
7377
input_format = _normalize_index(input_format)

0 commit comments

Comments
 (0)