Skip to content

Commit 872202e

Browse files
committed
1 parent 76f6be6 commit 872202e

File tree

7 files changed

+32
-33
lines changed

7 files changed

+32
-33
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
setup(
66
name='supar',
7-
version='1.1.0',
7+
version='1.1.1',
88
author='Yu Zhang',
99
author_email='yzhang.cs@outlook.com',
1010
description='Syntactic/Semantic Parsing Models',

supar/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
'VISemanticDependencyParser',
1717
'Parser']
1818

19-
__version__ = '1.1.0'
19+
__version__ = '1.1.1'
2020

2121
PARSER = {parser.NAME: parser for parser in [BiaffineDependencyParser,
2222
CRFDependencyParser,

supar/parsers/con.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from supar.models import CRFConstituencyModel, VIConstituencyModel
88
from supar.parsers.parser import Parser
99
from supar.utils import Config, Dataset, Embedding
10-
from supar.utils.common import bos, eos, pad, unk
10+
from supar.utils.common import BOS, EOS, PAD, UNK
1111
from supar.utils.field import ChartField, Field, RawField, SubwordField
1212
from supar.utils.logging import get_logger, progress_bar
1313
from supar.utils.metric import SpanMetric
@@ -251,7 +251,7 @@ def build(cls, path, min_freq=2, fix_len=20, **kwargs):
251251
return parser
252252

253253
logger.info("Building the fields")
254-
WORD = Field('words', pad=pad, unk=unk, bos=bos, eos=eos, lower=True)
254+
WORD = Field('words', pad=PAD, unk=UNK, bos=BOS, eos=EOS, lower=True)
255255
TAG, CHAR, BERT = None, None, None
256256
if args.encoder != 'lstm':
257257
from transformers import (AutoTokenizer, GPT2Tokenizer,
@@ -267,11 +267,11 @@ def build(cls, path, min_freq=2, fix_len=20, **kwargs):
267267
fn=None if not isinstance(t, (GPT2Tokenizer, GPT2TokenizerFast)) else lambda x: ' '+x)
268268
WORD.vocab = t.get_vocab()
269269
else:
270-
WORD = Field('words', pad=pad, unk=unk, bos=bos, eos=eos, lower=True)
270+
WORD = Field('words', pad=PAD, unk=UNK, bos=BOS, eos=EOS, lower=True)
271271
if 'tag' in args.feat:
272-
TAG = Field('tags', bos=bos, eos=eos)
272+
TAG = Field('tags', bos=BOS, eos=EOS)
273273
if 'char' in args.feat:
274-
CHAR = SubwordField('chars', pad=pad, unk=unk, bos=bos, eos=eos, fix_len=args.fix_len)
274+
CHAR = SubwordField('chars', pad=PAD, unk=UNK, bos=BOS, eos=EOS, fix_len=args.fix_len)
275275
if 'bert' in args.feat:
276276
from transformers import (AutoTokenizer, GPT2Tokenizer,
277277
GPT2TokenizerFast)

supar/parsers/dep.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
CRFDependencyModel, VIDependencyModel)
99
from supar.parsers.parser import Parser
1010
from supar.utils import Config, Dataset, Embedding
11-
from supar.utils.common import bos, pad, unk
11+
from supar.utils.common import BOS, PAD, UNK
1212
from supar.utils.field import ChartField, Field, RawField, SubwordField
1313
from supar.utils.fn import ispunct
1414
from supar.utils.logging import get_logger, progress_bar
@@ -272,11 +272,11 @@ def build(cls, path, min_freq=2, fix_len=20, **kwargs):
272272
fn=None if not isinstance(t, (GPT2Tokenizer, GPT2TokenizerFast)) else lambda x: ' '+x)
273273
WORD.vocab = t.get_vocab()
274274
else:
275-
WORD = Field('words', pad=pad, unk=unk, bos=bos, lower=True)
275+
WORD = Field('words', pad=PAD, unk=UNK, bos=BOS, lower=True)
276276
if 'tag' in args.feat:
277-
TAG = Field('tags', bos=bos)
277+
TAG = Field('tags', bos=BOS)
278278
if 'char' in args.feat:
279-
CHAR = SubwordField('chars', pad=pad, unk=unk, bos=bos, fix_len=args.fix_len)
279+
CHAR = SubwordField('chars', pad=PAD, unk=UNK, bos=BOS, fix_len=args.fix_len)
280280
if 'bert' in args.feat:
281281
from transformers import (AutoTokenizer, GPT2Tokenizer,
282282
GPT2TokenizerFast)
@@ -290,8 +290,8 @@ def build(cls, path, min_freq=2, fix_len=20, **kwargs):
290290
fn=None if not isinstance(t, (GPT2Tokenizer, GPT2TokenizerFast)) else lambda x: ' '+x)
291291
BERT.vocab = t.get_vocab()
292292
TEXT = RawField('texts')
293-
ARC = Field('arcs', bos=bos, use_vocab=False, fn=CoNLL.get_arcs)
294-
REL = Field('rels', bos=bos)
293+
ARC = Field('arcs', bos=BOS, use_vocab=False, fn=CoNLL.get_arcs)
294+
REL = Field('rels', bos=BOS)
295295
transform = CoNLL(FORM=(WORD, TEXT, CHAR, BERT), CPOS=TAG, HEAD=ARC, DEPREL=REL)
296296

297297
train = Dataset(transform, args.train)
@@ -795,11 +795,11 @@ def build(cls, path, min_freq=2, fix_len=20, **kwargs):
795795
fn=None if not isinstance(t, (GPT2Tokenizer, GPT2TokenizerFast)) else lambda x: ' '+x)
796796
WORD.vocab = t.get_vocab()
797797
else:
798-
WORD = Field('words', pad=pad, unk=unk, bos=bos, lower=True)
798+
WORD = Field('words', pad=PAD, unk=UNK, bos=BOS, lower=True)
799799
if 'tag' in args.feat:
800-
TAG = Field('tags', bos=bos)
800+
TAG = Field('tags', bos=BOS)
801801
if 'char' in args.feat:
802-
CHAR = SubwordField('chars', pad=pad, unk=unk, bos=bos, fix_len=args.fix_len)
802+
CHAR = SubwordField('chars', pad=PAD, unk=UNK, bos=BOS, fix_len=args.fix_len)
803803
if 'bert' in args.feat:
804804
from transformers import (AutoTokenizer, GPT2Tokenizer,
805805
GPT2TokenizerFast)
@@ -813,9 +813,9 @@ def build(cls, path, min_freq=2, fix_len=20, **kwargs):
813813
fn=None if not isinstance(t, (GPT2Tokenizer, GPT2TokenizerFast)) else lambda x: ' '+x)
814814
BERT.vocab = t.get_vocab()
815815
TEXT = RawField('texts')
816-
ARC = Field('arcs', bos=bos, use_vocab=False, fn=CoNLL.get_arcs)
817-
SIB = ChartField('sibs', bos=bos, use_vocab=False, fn=CoNLL.get_sibs)
818-
REL = Field('rels', bos=bos)
816+
ARC = Field('arcs', bos=BOS, use_vocab=False, fn=CoNLL.get_arcs)
817+
SIB = ChartField('sibs', bos=BOS, use_vocab=False, fn=CoNLL.get_sibs)
818+
REL = Field('rels', bos=BOS)
819819
transform = CoNLL(FORM=(WORD, TEXT, CHAR, BERT), CPOS=TAG, HEAD=(ARC, SIB), DEPREL=REL)
820820

821821
train = Dataset(transform, args.train)

supar/parsers/sdp.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
VISemanticDependencyModel)
99
from supar.parsers.parser import Parser
1010
from supar.utils import Config, Dataset, Embedding
11-
from supar.utils.common import bos, pad, unk
11+
from supar.utils.common import BOS, PAD, UNK
1212
from supar.utils.field import ChartField, Field, SubwordField
1313
from supar.utils.logging import get_logger, progress_bar
1414
from supar.utils.metric import ChartMetric
@@ -223,7 +223,7 @@ def build(cls, path, min_freq=7, fix_len=20, **kwargs):
223223
return parser
224224

225225
logger.info("Building the fields")
226-
WORD = Field('words', pad=pad, unk=unk, bos=bos, lower=True)
226+
WORD = Field('words', pad=PAD, unk=UNK, bos=BOS, lower=True)
227227
TAG, CHAR, LEMMA, BERT = None, None, None, None
228228
if args.encoder != 'lstm':
229229
from transformers import (AutoTokenizer, GPT2Tokenizer,
@@ -238,13 +238,13 @@ def build(cls, path, min_freq=7, fix_len=20, **kwargs):
238238
fn=None if not isinstance(t, (GPT2Tokenizer, GPT2TokenizerFast)) else lambda x: ' '+x)
239239
WORD.vocab = t.get_vocab()
240240
else:
241-
WORD = Field('words', pad=pad, unk=unk, bos=bos, lower=True)
241+
WORD = Field('words', pad=PAD, unk=UNK, bos=BOS, lower=True)
242242
if 'tag' in args.feat:
243-
TAG = Field('tags', bos=bos)
243+
TAG = Field('tags', bos=BOS)
244244
if 'char' in args.feat:
245-
CHAR = SubwordField('chars', pad=pad, unk=unk, bos=bos, fix_len=args.fix_len)
245+
CHAR = SubwordField('chars', pad=PAD, unk=UNK, bos=BOS, fix_len=args.fix_len)
246246
if 'lemma' in args.feat:
247-
LEMMA = Field('lemmas', pad=pad, unk=unk, bos=bos, lower=True)
247+
LEMMA = Field('lemmas', pad=PAD, unk=UNK, bos=BOS, lower=True)
248248
if 'bert' in args.feat:
249249
from transformers import (AutoTokenizer, GPT2Tokenizer,
250250
GPT2TokenizerFast)

supar/utils/common.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -*- coding: utf-8 -*-
22

3-
pad = '<pad>'
4-
unk = '<unk>'
5-
bos = '<bos>'
6-
eos = '<eos>'
3+
PAD = '<pad>'
4+
UNK = '<unk>'
5+
BOS = '<bos>'
6+
EOS = '<eos>'

supar/utils/transform.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import nltk
77
from supar.utils.logging import get_logger, progress_bar
88
from supar.utils.tokenizer import Tokenizer
9-
from torch.distributions.utils import lazy_property
109

1110
logger = get_logger(__name__)
1211

@@ -46,7 +45,7 @@ def __call__(self, sentences):
4645
def __getitem__(self, index):
4746
return getattr(self, self.fields[index])
4847

49-
@lazy_property
48+
@property
5049
def flattened_fields(self):
5150
flattened = []
5251
for field in self:
@@ -138,7 +137,7 @@ def __setstate__(self, state):
138137
class CoNLL(Transform):
139138
r"""
140139
The CoNLL object holds ten fields required for CoNLL-X data format :cite:`buchholz-marsi-2006-conll`.
141-
Each field can be binded with one or more :class:`~supar.utils.field.Field` objects. For example,
140+
Each field can be bound to one or more :class:`~supar.utils.field.Field` objects. For example,
142141
``FORM`` can contain both :class:`~supar.utils.field.Field` and :class:`~supar.utils.field.SubwordField`
143142
to produce tensors for words and subwords.
144143
@@ -611,7 +610,7 @@ def factorize(cls, tree, delete_labels=None, equal_labels=None):
611610
delete_labels (set[str]):
612611
A set of labels to be ignored. This is used for evaluation.
613612
If it is a pre-terminal label, delete the word along with the brackets.
614-
If it is a non-terminal label, just delete the brackets (don't delete childrens).
613+
If it is a non-terminal label, just delete the brackets (don't delete children).
615614
In `EVALB`_, the default set is:
616615
{'TOP', 'S1', '-NONE-', ',', ':', '``', "''", '.', '?', '!', ''}
617616
Default: ``None``.

0 commit comments

Comments
 (0)