Skip to content

Commit a4befaa

Browse files
committed
ELMo support
1 parent 62f660c commit a4befaa

File tree

20 files changed

+203
-30
lines changed

20 files changed

+203
-30
lines changed

supar/cmds/biaffine_dep.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def main():
1515
subparsers = parser.add_subparsers(title='Commands', dest='mode')
1616
# train
1717
subparser = subparsers.add_parser('train', help='Train a parser.')
18-
subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'bert'], nargs='+', help='features to use')
18+
subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'elmo', 'bert'], nargs='+', help='features to use')
1919
subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first')
2020
subparser.add_argument('--checkpoint', action='store_true', help='whether to load a checkpoint to restore training')
2121
subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use')

supar/cmds/biaffine_sdp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def main():
1212
subparsers = parser.add_subparsers(title='Commands', dest='mode')
1313
# train
1414
subparser = subparsers.add_parser('train', help='Train a parser.')
15-
subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'lemma', 'bert'], nargs='+', help='features to use')
15+
subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'lemma', 'elmo', 'bert'], nargs='+', help='features to use')
1616
subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first')
1717
subparser.add_argument('--checkpoint', action='store_true', help='whether to load a checkpoint to restore training')
1818
subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use')

supar/cmds/crf2o_dep.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def main():
1616
subparsers = parser.add_subparsers(title='Commands', dest='mode')
1717
# train
1818
subparser = subparsers.add_parser('train', help='Train a parser.')
19-
subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'bert'], nargs='+', help='features to use')
19+
subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'elmo', 'bert'], nargs='+', help='features to use')
2020
subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first')
2121
subparser.add_argument('--checkpoint', action='store_true', help='whether to load a checkpoint to restore training')
2222
subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use')

supar/cmds/crf_con.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def main():
1313
subparsers = parser.add_subparsers(title='Commands', dest='mode')
1414
# train
1515
subparser = subparsers.add_parser('train', help='Train a parser.')
16-
subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'bert'], nargs='+', help='features to use')
16+
subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'elmo', 'bert'], nargs='+', help='features to use')
1717
subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first')
1818
subparser.add_argument('--checkpoint', action='store_true', help='whether to load a checkpoint to restore training')
1919
subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use')

supar/cmds/crf_dep.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def main():
1616
subparsers = parser.add_subparsers(title='Commands', dest='mode')
1717
# train
1818
subparser = subparsers.add_parser('train', help='Train a parser.')
19-
subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'bert'], nargs='+', help='features to use')
19+
subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'elmo', 'bert'], nargs='+', help='features to use')
2020
subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first')
2121
subparser.add_argument('--checkpoint', action='store_true', help='whether to load a checkpoint to restore training')
2222
subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use')

supar/cmds/vi_con.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def main():
1212
subparsers = parser.add_subparsers(title='Commands', dest='mode')
1313
# train
1414
subparser = subparsers.add_parser('train', help='Train a parser.')
15-
subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'bert'], nargs='+', help='features to use')
15+
subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'elmo', 'bert'], nargs='+', help='features to use')
1616
subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first')
1717
subparser.add_argument('--checkpoint', action='store_true', help='whether to load a checkpoint to restore training')
1818
subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use')

supar/cmds/vi_dep.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def main():
1515
subparsers = parser.add_subparsers(title='Commands', dest='mode')
1616
# train
1717
subparser = subparsers.add_parser('train', help='Train a parser.')
18-
subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'bert'], nargs='+', help='features to use')
18+
subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'elmo', 'bert'], nargs='+', help='features to use')
1919
subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first')
2020
subparser.add_argument('--checkpoint', action='store_true', help='whether to load a checkpoint to restore training')
2121
subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use')

supar/cmds/vi_sdp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def main():
1212
subparsers = parser.add_subparsers(title='Commands', dest='mode')
1313
# train
1414
subparser = subparsers.add_parser('train', help='Train a parser.')
15-
subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'lemma', 'bert'], nargs='+', help='features to use')
15+
subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'lemma', 'elmo', 'bert'], nargs='+', help='features to use')
1616
subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first')
1717
subparser.add_argument('--checkpoint', action='store_true', help='whether to load a checkpoint to restore training')
1818
subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use')

supar/cmds/vi_srl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def main():
1212
subparsers = parser.add_subparsers(title='Commands', dest='mode')
1313
# train
1414
subparser = subparsers.add_parser('train', help='Train a parser.')
15-
subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'lemma', 'bert'], nargs='+', help='features to use')
15+
subparser.add_argument('--feat', '-f', choices=['tag', 'char', 'lemma', 'elmo', 'bert'], nargs='+', help='features to use')
1616
subparser.add_argument('--build', '-b', action='store_true', help='whether to build the model first')
1717
subparser.add_argument('--checkpoint', action='store_true', help='whether to load a checkpoint to restore training')
1818
subparser.add_argument('--encoder', choices=['lstm', 'bert'], default='lstm', help='encoder to use')

supar/models/con.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,11 @@ class CRFConstituencyModel(Model):
4545
The size of hidden states of CharLSTM, required if using CharLSTM. Default: 100.
4646
char_pad_index (int):
4747
The index of the padding token in the character vocabulary, required if using CharLSTM. Default: 0.
48+
elmo (str):
49+
Name of the pretrained ELMo registered in `ELMoEmbedding.OPTION`. Default: ``'original_5b'``.
50+
elmo_bos_eos (tuple[bool]):
51+
A tuple of two boolean values indicating whether to keep start/end boundaries of elmo outputs.
52+
Default: ``(True, False)``.
4853
bert (str):
4954
Specifies which kind of language model to use, e.g., ``'bert-base-cased'``.
5055
This is required if ``encoder='bert'`` or using BERT features. The full list can be found in `transformers`_.
@@ -100,6 +105,8 @@ def __init__(self,
100105
n_char_embed=50,
101106
n_char_hidden=100,
102107
char_pad_index=0,
108+
elmo='original_5b',
109+
elmo_bos_eos=(True, True),
103110
bert=None,
104111
n_bert_layers=4,
105112
mix_dropout=.0,
@@ -247,6 +254,11 @@ class VIConstituencyModel(CRFConstituencyModel):
247254
The size of hidden states of CharLSTM, required if using CharLSTM. Default: 100.
248255
char_pad_index (int):
249256
The index of the padding token in the character vocabulary, required if using CharLSTM. Default: 0.
257+
elmo (str):
258+
Name of the pretrained ELMo registered in `ELMoEmbedding.OPTION`. Default: ``'original_5b'``.
259+
elmo_bos_eos (tuple[bool]):
260+
A tuple of two boolean values indicating whether to keep start/end boundaries of elmo outputs.
261+
Default: ``(True, False)``.
250262
bert (str):
251263
Specifies which kind of language model to use, e.g., ``'bert-base-cased'``.
252264
This is required if ``encoder='bert'`` or using BERT features. The full list can be found in `transformers`_.
@@ -310,6 +322,8 @@ def __init__(self,
310322
n_char_embed=50,
311323
n_char_hidden=100,
312324
char_pad_index=0,
325+
elmo='original_5b',
326+
elmo_bos_eos=(True, True),
313327
bert=None,
314328
n_bert_layers=4,
315329
mix_dropout=.0,

supar/models/dep.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ class BiaffineDependencyModel(Model):
4747
The size of hidden states of CharLSTM, required if using CharLSTM. Default: 100.
4848
char_pad_index (int):
4949
The index of the padding token in the character vocabulary, required if using CharLSTM. Default: 0.
50+
elmo (str):
51+
Name of the pretrained ELMo registered in `ELMoEmbedding.OPTION`. Default: ``'original_5b'``.
52+
elmo_bos_eos (tuple[bool]):
53+
A tuple of two boolean values indicating whether to keep start/end boundaries of elmo outputs.
54+
Default: ``(True, False)``.
5055
bert (str):
5156
Specifies which kind of language model to use, e.g., ``'bert-base-cased'``.
5257
This is required if ``encoder='bert'`` or using BERT features. The full list can be found in `transformers`_.
@@ -104,6 +109,8 @@ def __init__(self,
104109
n_char_embed=50,
105110
n_char_hidden=100,
106111
char_pad_index=0,
112+
elmo='original_5b',
113+
elmo_bos_eos=(True, False),
107114
bert=None,
108115
n_bert_layers=4,
109116
mix_dropout=.0,
@@ -262,6 +269,11 @@ class CRFDependencyModel(BiaffineDependencyModel):
262269
The size of hidden states of CharLSTM, required if using CharLSTM. Default: 100.
263270
char_pad_index (int):
264271
The index of the padding token in the character vocabulary, required if using CharLSTM. Default: 0.
272+
elmo (str):
273+
Name of the pretrained ELMo registered in `ELMoEmbedding.OPTION`. Default: ``'original_5b'``.
274+
elmo_bos_eos (tuple[bool]):
275+
A tuple of two boolean values indicating whether to keep start/end boundaries of elmo outputs.
276+
Default: ``(True, False)``.
265277
bert (str):
266278
Specifies which kind of language model to use, e.g., ``'bert-base-cased'``.
267279
This is required if ``encoder='bert'`` or using BERT features. The full list can be found in `transformers`_.
@@ -379,6 +391,11 @@ class CRF2oDependencyModel(BiaffineDependencyModel):
379391
The size of hidden states of CharLSTM, required if using CharLSTM. Default: 100.
380392
char_pad_index (int):
381393
The index of the padding token in the character vocabulary, required if using CharLSTM. Default: 0.
394+
elmo (str):
395+
Name of the pretrained ELMo registered in `ELMoEmbedding.OPTION`. Default: ``'original_5b'``.
396+
elmo_bos_eos (tuple[bool]):
397+
A tuple of two boolean values indicating whether to keep start/end boundaries of elmo outputs.
398+
Default: ``(True, False)``.
382399
bert (str):
383400
Specifies which kind of language model to use, e.g., ``'bert-base-cased'``.
384401
This is required if ``encoder='bert'`` or using BERT features. The full list can be found in `transformers`_.
@@ -435,6 +452,8 @@ def __init__(self,
435452
n_char_embed=50,
436453
n_char_hidden=100,
437454
char_pad_index=0,
455+
elmo='original_5b',
456+
elmo_bos_eos=(True, False),
438457
bert=None,
439458
n_bert_layers=4,
440459
mix_dropout=.0,
@@ -618,6 +637,11 @@ class VIDependencyModel(BiaffineDependencyModel):
618637
The size of hidden states of CharLSTM, required if using CharLSTM. Default: 100.
619638
char_pad_index (int):
620639
The index of the padding token in the character vocabulary, required if using CharLSTM. Default: 0.
640+
elmo (str):
641+
Name of the pretrained ELMo registered in `ELMoEmbedding.OPTION`. Default: ``'original_5b'``.
642+
elmo_bos_eos (tuple[bool]):
643+
A tuple of two boolean values indicating whether to keep start/end boundaries of elmo outputs.
644+
Default: ``(True, False)``.
621645
bert (str):
622646
Specifies which kind of language model to use, e.g., ``'bert-base-cased'``.
623647
This is required if ``encoder='bert'`` or using BERT features. The full list can be found in `transformers`_.
@@ -683,6 +707,8 @@ def __init__(self,
683707
n_char_embed=50,
684708
n_char_hidden=100,
685709
char_pad_index=0,
710+
elmo='original_5b',
711+
elmo_bos_eos=(True, False),
686712
bert=None,
687713
n_bert_layers=4,
688714
mix_dropout=.0,

supar/models/model.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22

33
import torch
44
import torch.nn as nn
5-
from supar.modules import (CharLSTM, IndependentDropout, SharedDropout,
6-
TransformerEmbedding, VariationalLSTM)
5+
from supar.modules import (CharLSTM, ELMoEmbedding, IndependentDropout,
6+
SharedDropout, TransformerEmbedding,
7+
VariationalLSTM)
78
from supar.utils import Config
89
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
910

@@ -24,6 +25,8 @@ def __init__(self,
2425
n_char_hidden=100,
2526
char_pad_index=0,
2627
char_dropout=0,
28+
elmo_bos_eos=(True, True),
29+
elmo_dropout=0.5,
2730
bert=None,
2831
n_bert_layers=4,
2932
mix_dropout=.0,
@@ -63,6 +66,13 @@ def __init__(self,
6366
self.lemma_embed = nn.Embedding(num_embeddings=n_lemmas,
6467
embedding_dim=n_feat_embed)
6568
n_input += n_feat_embed
69+
if 'elmo' in feat:
70+
self.elmo_embed = ELMoEmbedding(n_out=n_feat_embed,
71+
bos_eos=elmo_bos_eos,
72+
dropout=elmo_dropout,
73+
requires_grad=(not freeze))
74+
n_input += self.elmo_embed.n_out
75+
6676
if 'bert' in feat:
6777
self.bert_embed = TransformerEmbedding(model=bert,
6878
n_layers=n_bert_layers,
@@ -126,6 +136,8 @@ def embed(self, words, feats):
126136
feat_embeds.append(self.tag_embed(feats.pop()))
127137
if 'char' in self.args.feat:
128138
feat_embeds.append(self.char_embed(feats.pop(0)))
139+
if 'elmo' in self.args.feat:
140+
feat_embeds.append(self.elmo_embed(feats.pop(0)))
129141
if 'bert' in self.args.feat:
130142
feat_embeds.append(self.bert_embed(feats.pop(0)))
131143
if 'lemma' in self.args.feat:

supar/models/sdp.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ class BiaffineSemanticDependencyModel(Model):
4747
The size of hidden states of CharLSTM, required if using CharLSTM. Default: 100.
4848
char_pad_index (int):
4949
The index of the padding token in the character vocabulary, required if using CharLSTM. Default: 0.
50+
elmo (str):
51+
Name of the pretrained ELMo registered in `ELMoEmbedding.OPTION`. Default: ``'original_5b'``.
52+
elmo_bos_eos (tuple[bool]):
53+
A tuple of two boolean values indicating whether to keep start/end boundaries of elmo outputs.
54+
Default: ``(True, False)``.
5055
bert (str):
5156
Specifies which kind of language model to use, e.g., ``'bert-base-cased'``.
5257
This is required if ``encoder='bert'`` or using BERT features. The full list can be found in `transformers`_.
@@ -108,6 +113,8 @@ def __init__(self,
108113
n_char_hidden=400,
109114
char_pad_index=0,
110115
char_dropout=0.33,
116+
elmo='original_5b',
117+
elmo_bos_eos=(True, False),
111118
bert=None,
112119
n_bert_layers=4,
113120
mix_dropout=.0,
@@ -253,6 +260,11 @@ class VISemanticDependencyModel(BiaffineSemanticDependencyModel):
253260
The size of hidden states of CharLSTM, required if using CharLSTM. Default: 100.
254261
char_pad_index (int):
255262
The index of the padding token in the character vocabulary, required if using CharLSTM. Default: 0.
263+
elmo (str):
264+
Name of the pretrained ELMo registered in `ELMoEmbedding.OPTION`. Default: ``'original_5b'``.
265+
elmo_bos_eos (tuple[bool]):
266+
A tuple of two boolean values indicating whether to keep start/end boundaries of elmo outputs.
267+
Default: ``(True, False)``.
256268
bert (str):
257269
Specifies which kind of language model to use, e.g., ``'bert-base-cased'``.
258270
This is required if ``encoder='bert'`` or using BERT features. The full list can be found in `transformers`_.
@@ -322,6 +334,8 @@ def __init__(self,
322334
n_char_hidden=100,
323335
char_pad_index=0,
324336
char_dropout=0,
337+
elmo='original_5b',
338+
elmo_bos_eos=(True, False),
325339
bert=None,
326340
n_bert_layers=4,
327341
mix_dropout=.0,

supar/models/srl.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ class VISemanticRoleLabelingModel(Model):
4747
The size of hidden states of CharLSTM, required if using CharLSTM. Default: 100.
4848
char_pad_index (int):
4949
The index of the padding token in the character vocabulary, required if using CharLSTM. Default: 0.
50+
elmo (str):
51+
Name of the pretrained ELMo registered in `ELMoEmbedding.OPTION`. Default: ``'original_5b'``.
52+
elmo_bos_eos (tuple[bool]):
53+
A tuple of two boolean values indicating whether to keep start/end boundaries of elmo outputs.
54+
Default: ``(True, False)``.
5055
bert (str):
5156
Specifies which kind of language model to use, e.g., ``'bert-base-cased'``.
5257
This is required if ``encoder='bert'`` or using BERT features. The full list can be found in `transformers`_.
@@ -116,6 +121,8 @@ def __init__(self,
116121
n_char_hidden=100,
117122
char_pad_index=0,
118123
char_dropout=0,
124+
elmo='original_5b',
125+
elmo_bos_eos=(True, False),
119126
bert=None,
120127
n_bert_layers=4,
121128
mix_dropout=.0,

supar/modules/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
from .dropout import IndependentDropout, SharedDropout
55
from .lstm import CharLSTM, VariationalLSTM
66
from .mlp import MLP
7+
from .pretrained import ELMoEmbedding, TransformerEmbedding
78
from .scalar_mix import ScalarMix
8-
from .transformer import TransformerEmbedding
99

10-
__all__ = ['MLP', 'TransformerEmbedding', 'Biaffine', 'CharLSTM',
10+
__all__ = ['MLP', 'TransformerEmbedding', 'Biaffine', 'CharLSTM', 'ELMoEmbedding',
1111
'IndependentDropout', 'ScalarMix', 'SharedDropout', 'Triaffine', 'VariationalLSTM']

0 commit comments

Comments
 (0)