Skip to content

Commit 8d57384

Browse files
committed
Minor revisions
1 parent 34f1de6 commit 8d57384

File tree

2 files changed

+5
-2
lines changed

2 files changed

+5
-2
lines changed

supar/utils/tokenizer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@ class Tokenizer:
66
def __init__(self, lang='en'):
77
import stanza
88
try:
9-
self.pipeline = stanza.Pipeline(lang=lang, processors='tokenize', tokenize_no_ssplit=True)
9+
self.pipeline = stanza.Pipeline(lang=lang, processors='tokenize', verbose=False, tokenize_no_ssplit=True)
1010
except Exception:
1111
stanza.download(lang=lang, resources_url='stanford')
12-
self.pipeline = stanza.Pipeline(lang=lang, processors='tokenize', tokenize_no_ssplit=True)
12+
self.pipeline = stanza.Pipeline(lang=lang, processors='tokenize', verbose=False, tokenize_no_ssplit=True)
1313

1414
def __call__(self, text):
1515
return [i.text for i in self.pipeline(text).sentences[0].tokens]

supar/utils/vocab.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ def __setstate__(self, state):
6363
state['stoi'] = stoi
6464
self.__dict__.update(state)
6565

66+
def items(self):
67+
return self.stoi.items()
68+
6669
def extend(self, tokens):
6770
self.itos.extend(sorted(set(tokens).difference(self.stoi)))
6871
self.stoi.update({token: i for i, token in enumerate(self.itos)})

0 commit comments

Comments
 (0)