Skip to content

Commit 4cdeff1

Browse files
committed
SuPar v1.1.0
1 parent 7f36b10 commit 4cdeff1

25 files changed

+3393
-2839
lines changed

setup.py

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44

55
setup(
66
name='supar',
7-
version='1.0.1',
7+
version='1.1.0',
88
author='Yu Zhang',
99
author_email='yzhang.cs@outlook.com',
10-
description='Syntactic Parsing Models',
10+
description='Syntactic/Semantic Parsing Models',
1111
long_description=open('README.md', 'r').read(),
1212
long_description_content_type='text/markdown',
1313
url='https://github.com/yzhangcs/parser',
@@ -20,25 +20,24 @@
2020
'Topic :: Text Processing :: Linguistic'
2121
],
2222
setup_requires=[
23-
'setuptools>=18.0',
23+
'setuptools>=56.0',
2424
],
2525
install_requires=[
26-
'torch>=1.7.0',
27-
'transformers>=3.1.0',
26+
'torch>=1.7.1',
27+
'transformers>=4.0.0',
2828
'nltk',
2929
'stanza',
3030
'dill'],
3131
entry_points={
3232
'console_scripts': [
33-
'biaffine-dependency=supar.cmds.biaffine_dependency:main',
34-
'crfnp-dependency=supar.cmds.crfnp_dependency:main',
35-
'crf-dependency=supar.cmds.crf_dependency:main',
36-
'crf2o-dependency=supar.cmds.crf2o_dependency:main',
37-
'crf-constituency=supar.cmds.crf_constituency:main',
38-
'biaffine-semantic-dependency=supar.cmds.biaffine_semantic_dependency:main',
39-
'vi-semantic-dependency=supar.cmds.vi_semantic_dependency:main'
33+
'biaffine-dep=supar.cmds.biaffine_dep:main',
34+
'crf-dep=supar.cmds.crf_dep:main',
35+
'crf2o-dep=supar.cmds.crf2o_dep:main',
36+
'crf-con=supar.cmds.crf_con:main',
37+
'biaffine-sdp=supar.cmds.biaffine_sdp:main',
38+
'vi-sdp=supar.cmds.vi_sdp:main'
4039
]
4140
},
42-
python_requires='>=3.6',
41+
python_requires='>=3.7',
4342
zip_safe=False
4443
)

supar/__init__.py

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,42 +2,51 @@
22

33
from .parsers import (BiaffineDependencyParser,
44
BiaffineSemanticDependencyParser, CRF2oDependencyParser,
5-
CRFConstituencyParser, CRFDependencyParser,
6-
CRFNPDependencyParser, Parser,
5+
CRFConstituencyParser, CRFDependencyParser, Parser,
6+
VIConstituencyParser, VIDependencyParser,
77
VISemanticDependencyParser)
88

99
__all__ = ['BiaffineDependencyParser',
10-
'CRFNPDependencyParser',
1110
'CRFDependencyParser',
1211
'CRF2oDependencyParser',
12+
'VIDependencyParser',
1313
'CRFConstituencyParser',
14+
'VIConstituencyParser',
1415
'BiaffineSemanticDependencyParser',
1516
'VISemanticDependencyParser',
1617
'Parser']
1718

18-
__version__ = '1.0.1'
19+
__version__ = '1.1.0'
1920

2021
PARSER = {parser.NAME: parser for parser in [BiaffineDependencyParser,
21-
CRFNPDependencyParser,
2222
CRFDependencyParser,
2323
CRF2oDependencyParser,
24+
VIDependencyParser,
2425
CRFConstituencyParser,
26+
VIConstituencyParser,
2527
BiaffineSemanticDependencyParser,
2628
VISemanticDependencyParser]}
2729

28-
PRETRAINED = {
29-
'biaffine-dep-en': 'https://github.com/yzhangcs/parser/releases/download/v1.0.0/ptb.biaffine.dependency.char.zip',
30-
'biaffine-dep-zh': 'https://github.com/yzhangcs/parser/releases/download/v1.0.0/ctb7.biaffine.dependency.char.zip',
31-
'biaffine-dep-bert-en': 'https://github.com/yzhangcs/parser/releases/download/v1.0.0/ptb.biaffine.dependency.bert.zip',
32-
'biaffine-dep-bert-zh': 'https://github.com/yzhangcs/parser/releases/download/v1.0.0/ctb7.biaffine.dependency.bert.zip',
33-
'crfnp-dep-en': 'https://github.com/yzhangcs/parser/releases/download/v1.0.0/ptb.crfnp.dependency.char.zip',
34-
'crfnp-dep-zh': 'https://github.com/yzhangcs/parser/releases/download/v1.0.0/ctb7.crfnp.dependency.char.zip',
35-
'crf-dep-en': 'https://github.com/yzhangcs/parser/releases/download/v1.0.0/ptb.crf.dependency.char.zip',
36-
'crf-dep-zh': 'https://github.com/yzhangcs/parser/releases/download/v1.0.0/ctb7.crf.dependency.char.zip',
37-
'crf2o-dep-en': 'https://github.com/yzhangcs/parser/releases/download/v1.0.0/ptb.crf2o.dependency.char.zip',
38-
'crf2o-dep-zh': 'https://github.com/yzhangcs/parser/releases/download/v1.0.0/ctb7.crf2o.dependency.char.zip',
39-
'crf-con-en': 'https://github.com/yzhangcs/parser/releases/download/v1.0.0/ptb.crf.constituency.char.zip',
40-
'crf-con-zh': 'https://github.com/yzhangcs/parser/releases/download/v1.0.0/ctb7.crf.constituency.char.zip',
41-
'crf-con-bert-en': 'https://github.com/yzhangcs/parser/releases/download/v1.0.0/ptb.crf.constituency.bert.zip',
42-
'crf-con-bert-zh': 'https://github.com/yzhangcs/parser/releases/download/v1.0.0/ctb7.crf.constituency.bert.zip'
30+
SRC = 'https://github.com/yzhangcs/parser/releases/download'
31+
NAME = {
32+
'biaffine-dep-en': 'ptb.biaffine.dep.lstm.char',
33+
'biaffine-dep-zh': 'ctb7.biaffine.dep.lstm.char',
34+
'crf2o-dep-en': 'ptb.crf2o.dep.lstm.char',
35+
'crf2o-dep-zh': 'ctb7.crf2o.dep.lstm.char',
36+
'biaffine-dep-roberta-en': 'ptb.biaffine.dep.roberta',
37+
'biaffine-dep-electra-zh': 'ctb7.biaffine.dep.electra',
38+
'biaffine-dep-xlmr': 'ud.biaffine.dep.xlmr',
39+
'crf-con-en': 'ptb.crf.con.lstm.char',
40+
'crf-con-zh': 'ctb7.crf.con.lstm.char',
41+
'crf-con-roberta-en': 'ptb.crf.con.roberta',
42+
'crf-con-electra-zh': 'ctb7.crf.con.electra',
43+
'crf-con-xlmr': 'spmrl.crf.con.xlmr',
44+
'biaffine-sdp-en': 'dm.biaffine.sdp.lstm.tag-char-lemma',
45+
'biaffine-sdp-zh': 'semeval16.biaffine.sdp.lstm.tag-char-lemma',
46+
'vi-sdp-en': 'dm.vi.sdp.lstm.tag-char-lemma',
47+
'vi-sdp-zh': 'semeval16.vi.sdp.lstm.tag-char-lemma',
48+
'biaffine-sdp-roberta-en': 'dm.biaffine.sdp.roberta',
49+
'biaffine-sdp-electra-zh': 'semeval16.biaffine.sdp.electra'
4350
}
51+
MODEL = {n: f'{SRC}/v{__version__}/{m}.zip' for n, m in NAME.items()}
52+
CONFIG = {n: f'{SRC}/v{__version__}/{m}.ini' for n, m in NAME.items()}

supar/cmds/biaffine_dependency.py renamed to supar/cmds/biaffine_dep.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,15 @@
99
def main():
1010
parser = argparse.ArgumentParser(description='Create Biaffine Dependency Parser.')
1111
parser.add_argument('--tree', action='store_true', help='whether to ensure well-formedness')
12-
parser.add_argument('--proj', action='store_true', help='whether to projectivise the data')
12+
parser.add_argument('--proj', action='store_true', help='whether to projectivize the data')
1313
parser.add_argument('--partial', action='store_true', help='whether partial annotation is included')
1414
parser.set_defaults(Parser=BiaffineDependencyParser)
1515
subparsers = parser.add_subparsers(title='Commands', dest='mode')
1616
# train
1717
subparser = subparsers.add_parser('train', help='Train a parser.')
18-
subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'bert'], help='choices of additional features')
18+
subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'bert'], nargs='+', help='features to use')
1919
subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first')
20+
subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use')
2021
subparser.add_argument('--punct', action='store_true', help='whether to include punctuation')
2122
subparser.add_argument('--max-len', type=int, help='max length of the sentences')
2223
subparser.add_argument('--buckets', default=32, type=int, help='max num of buckets to use')
@@ -26,18 +27,18 @@ def main():
2627
subparser.add_argument('--embed', default='data/glove.6B.100d.txt', help='path to pretrained embeddings')
2728
subparser.add_argument('--unk', default='unk', help='unk token in pretrained embeddings')
2829
subparser.add_argument('--n-embed', default=100, type=int, help='dimension of embeddings')
29-
subparser.add_argument('--bert', default='bert-base-cased', help='which bert model to use')
30+
subparser.add_argument('--bert', default='bert-base-cased', help='which BERT model to use')
3031
# evaluate
3132
subparser = subparsers.add_parser('evaluate', help='Evaluate the specified parser and dataset.')
3233
subparser.add_argument('--punct', action='store_true', help='whether to include punctuation')
3334
subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use')
3435
subparser.add_argument('--data', default='data/ptb/test.conllx', help='path to dataset')
3536
# predict
3637
subparser = subparsers.add_parser('predict', help='Use a trained parser to make predictions.')
37-
subparser.add_argument('--prob', action='store_true', help='whether to output probs')
3838
subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use')
3939
subparser.add_argument('--data', default='data/ptb/test.conllx', help='path to dataset')
4040
subparser.add_argument('--pred', default='pred.conllx', help='path to predicted result')
41+
subparser.add_argument('--prob', action='store_true', help='whether to output probs')
4142
parse(parser)
4243

4344

supar/cmds/biaffine_semantic_dependency.py renamed to supar/cmds/biaffine_sdp.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,9 @@ def main():
1212
subparsers = parser.add_subparsers(title='Commands', dest='mode')
1313
# train
1414
subparser = subparsers.add_parser('train', help='Train a parser.')
15-
subparser.add_argument('--feat', '-f', default='tag,char,lemma', help='additional features to use,separated by commas.')
15+
subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'lemma', 'bert'], nargs='+', help='features to use')
1616
subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first')
17+
subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use')
1718
subparser.add_argument('--max-len', type=int, help='max length of the sentences')
1819
subparser.add_argument('--buckets', default=32, type=int, help='max num of buckets to use')
1920
subparser.add_argument('--train', default='data/sdp/DM/train.conllu', help='path to train file')
@@ -22,17 +23,18 @@ def main():
2223
subparser.add_argument('--embed', default='data/glove.6B.100d.txt', help='path to pretrained embeddings')
2324
subparser.add_argument('--unk', default='unk', help='unk token in pretrained embeddings')
2425
subparser.add_argument('--n-embed', default=100, type=int, help='dimension of embeddings')
25-
subparser.add_argument('--bert', default='bert-base-cased', help='which bert model to use')
26+
subparser.add_argument('--n-embed-proj', default=125, type=int, help='dimension of projected embeddings')
27+
subparser.add_argument('--bert', default='bert-base-cased', help='which BERT model to use')
2628
# evaluate
2729
subparser = subparsers.add_parser('evaluate', help='Evaluate the specified parser and dataset.')
2830
subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use')
2931
subparser.add_argument('--data', default='data/sdp/DM/test.conllu', help='path to dataset')
3032
# predict
3133
subparser = subparsers.add_parser('predict', help='Use a trained parser to make predictions.')
32-
subparser.add_argument('--prob', action='store_true', help='whether to output probs')
3334
subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use')
3435
subparser.add_argument('--data', default='data/sdp/DM/test.conllu', help='path to dataset')
3536
subparser.add_argument('--pred', default='pred.conllu', help='path to predicted result')
37+
subparser.add_argument('--prob', action='store_true', help='whether to output probs')
3638
parse(parser)
3739

3840

supar/cmds/crf2o_dependency.py renamed to supar/cmds/crf2o_dep.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@ def main():
1616
subparsers = parser.add_subparsers(title='Commands', dest='mode')
1717
# train
1818
subparser = subparsers.add_parser('train', help='Train a parser.')
19-
subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'bert'], help='choices of additional features')
19+
subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'bert'], nargs='+', help='features to use')
2020
subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first')
21+
subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use')
2122
subparser.add_argument('--punct', action='store_true', help='whether to include punctuation')
2223
subparser.add_argument('--max-len', type=int, help='max length of the sentences')
2324
subparser.add_argument('--buckets', default=32, type=int, help='max num of buckets to use')
@@ -27,18 +28,18 @@ def main():
2728
subparser.add_argument('--embed', default='data/glove.6B.100d.txt', help='path to pretrained embeddings')
2829
subparser.add_argument('--unk', default='unk', help='unk token in pretrained embeddings')
2930
subparser.add_argument('--n-embed', default=100, type=int, help='dimension of embeddings')
30-
subparser.add_argument('--bert', default='bert-base-cased', help='which bert model to use')
31+
subparser.add_argument('--bert', default='bert-base-cased', help='which BERT model to use')
3132
# evaluate
3233
subparser = subparsers.add_parser('evaluate', help='Evaluate the specified parser and dataset.')
3334
subparser.add_argument('--punct', action='store_true', help='whether to include punctuation')
3435
subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use')
3536
subparser.add_argument('--data', default='data/ptb/test.conllx', help='path to dataset')
3637
# predict
3738
subparser = subparsers.add_parser('predict', help='Use a trained parser to make predictions.')
38-
subparser.add_argument('--prob', action='store_true', help='whether to output probs')
3939
subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use')
4040
subparser.add_argument('--data', default='data/ptb/test.conllx', help='path to dataset')
4141
subparser.add_argument('--pred', default='pred.conllx', help='path to predicted result')
42+
subparser.add_argument('--prob', action='store_true', help='whether to output probs')
4243
parse(parser)
4344

4445

supar/cmds/crf_constituency.py renamed to supar/cmds/crf_con.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,9 @@ def main():
1313
subparsers = parser.add_subparsers(title='Commands', dest='mode')
1414
# train
1515
subparser = subparsers.add_parser('train', help='Train a parser.')
16-
subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'bert'], help='choices of additional features')
16+
subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'bert'], nargs='+', help='features to use')
1717
subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first')
18+
subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use')
1819
subparser.add_argument('--max-len', type=int, help='max length of the sentences')
1920
subparser.add_argument('--buckets', default=32, type=int, help='max num of buckets to use')
2021
subparser.add_argument('--train', default='data/ptb/train.pid', help='path to train file')
@@ -23,17 +24,17 @@ def main():
2324
subparser.add_argument('--embed', default='data/glove.6B.100d.txt', help='path to pretrained embeddings')
2425
subparser.add_argument('--unk', default='unk', help='unk token in pretrained embeddings')
2526
subparser.add_argument('--n-embed', default=100, type=int, help='dimension of embeddings')
26-
subparser.add_argument('--bert', default='bert-base-cased', help='which bert model to use')
27+
subparser.add_argument('--bert', default='bert-base-cased', help='which BERT model to use')
2728
# evaluate
2829
subparser = subparsers.add_parser('evaluate', help='Evaluate the specified parser and dataset.')
2930
subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use')
3031
subparser.add_argument('--data', default='data/ptb/test.pid', help='path to dataset')
3132
# predict
3233
subparser = subparsers.add_parser('predict', help='Use a trained parser to make predictions.')
33-
subparser.add_argument('--prob', action='store_true', help='whether to output probs')
3434
subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use')
3535
subparser.add_argument('--data', default='data/ptb/test.pid', help='path to dataset')
3636
subparser.add_argument('--pred', default='pred.pid', help='path to predicted result')
37+
subparser.add_argument('--prob', action='store_true', help='whether to output probs')
3738
parse(parser)
3839

3940

supar/cmds/crf_dependency.py renamed to supar/cmds/crf_dep.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@ def main():
1616
subparsers = parser.add_subparsers(title='Commands', dest='mode')
1717
# train
1818
subparser = subparsers.add_parser('train', help='Train a parser.')
19-
subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'bert'], help='choices of additional features')
19+
subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'bert'], nargs='+', help='features to use')
2020
subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first')
21+
subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use')
2122
subparser.add_argument('--punct', action='store_true', help='whether to include punctuation')
2223
subparser.add_argument('--max-len', type=int, help='max length of the sentences')
2324
subparser.add_argument('--buckets', default=32, type=int, help='max num of buckets to use')
@@ -27,18 +28,18 @@ def main():
2728
subparser.add_argument('--embed', default='data/glove.6B.100d.txt', help='path to pretrained embeddings')
2829
subparser.add_argument('--unk', default='unk', help='unk token in pretrained embeddings')
2930
subparser.add_argument('--n-embed', default=100, type=int, help='dimension of embeddings')
30-
subparser.add_argument('--bert', default='bert-base-cased', help='which bert model to use')
31+
subparser.add_argument('--bert', default='bert-base-cased', help='which BERT model to use')
3132
# evaluate
3233
subparser = subparsers.add_parser('evaluate', help='Evaluate the specified parser and dataset.')
3334
subparser.add_argument('--punct', action='store_true', help='whether to include punctuation')
3435
subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use')
3536
subparser.add_argument('--data', default='data/ptb/test.conllx', help='path to dataset')
3637
# predict
3738
subparser = subparsers.add_parser('predict', help='Use a trained parser to make predictions.')
38-
subparser.add_argument('--prob', action='store_true', help='whether to output probs')
3939
subparser.add_argument('--buckets', default=8, type=int, help='max num of buckets to use')
4040
subparser.add_argument('--data', default='data/ptb/test.conllx', help='path to dataset')
4141
subparser.add_argument('--pred', default='pred.conllx', help='path to predicted result')
42+
subparser.add_argument('--prob', action='store_true', help='whether to output probs')
4243
parse(parser)
4344

4445

0 commit comments

Comments
 (0)