Skip to content

Commit 01e3776

Browse files
committed
2 parents b28ff54 + 1ae02e9 commit 01e3776

File tree

5 files changed

+36
-21
lines changed

5 files changed

+36
-21
lines changed

README.md

+9-4
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,20 @@
55
- - [x] conll2003
66
- - [ ] atis
77
### Neural NER
8-
- - [ ] CharLSTM+WordLSTM+CRF: [Lample .etc, NAACL16](http://www.aclweb.org/anthology/N/N16/N16-1030.pdf)
8+
- - [x] CharLSTM+WordLSTM+CRF: [Lample .etc, NAACL16](http://www.aclweb.org/anthology/N/N16/N16-1030.pdf)
99
- - [x] Make a CoNLL-2003 batcher
1010
- - [x] Implement trainer
1111
- - [x] Implement WordLSTM + softmax
1212
- - [x] Implement CharLSTM + WordLSTM + softmax
13-
- - [ ] Implement WordLSTM + CRF
14-
- - [ ] Implement CharLSTM + WordLSTM + CRF
13+
- - [x] Implement CharLSTM + WordLSTM + CRF
1514

1615
### Slot Filling + intent prediciton
1716
- - [ ] [Attention-Based Recurrent Neural Network Models for Joint Intent Detection and Slot Filling](https://arxiv.org/abs/1609.01454)
18-
17+
- - [ ] Make a ATIS batcher
18+
- - [ ] Implement trainer
19+
- - [ ] Implement slot filler
20+
- - [ ] Implement intent
21+
1922
### Tree VAE
2023
- - [ ] [STRUCTVAE: Tree-structured Latent Variable Models for Semi-supervised Semantic Parsing](https://arxiv.org/abs/1806.07832)
2124

@@ -25,3 +28,5 @@ conda install pytorch -c pytorch
2528
2629
```
2730
CoNLL-2003 can be downloaded from https://www.clips.uantwerpen.be/conll2003/ner/
31+
32+
ATIS dataset can be downloaded from [split 0](http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/atis.fold0.pkl.gz) [split 1](http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/atis.fold1.pkl.gz) [split 2](http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/atis.fold2.pkl.gz) [split 3](http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/atis.fold3.pkl.gz) [split 4](http://lisaweb.iro.umontreal.ca/transfert/lisa/users/mesnilgr/atis/atis.fold4.pkl.gz)

neural_ner/config.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ class Config(object):
44
pass
55

66
config = Config()
7-
root_dir = os.path.join(os.path.expanduser('~'), 'Downloads/sequence_prediction')
7+
root_dir = os.path.join(os.path.expanduser('~'), 'sequence_prediction')
88
config.data_dir = os.path.join(root_dir, 'CoNLL-2003')
99
config.log_root = os.path.join(root_dir, 'log')
1010

@@ -40,6 +40,6 @@ class Config(object):
4040

4141
config.vocab_size = int(4e5)
4242

43-
config.is_cuda = False
43+
config.is_cuda = True
4444

45-
config.is_l2_loss = False
45+
config.is_l2_loss = False

neural_ner/crf.py

+20-10
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,9 @@
55
from data_utils.sentence_utils import Constants
66
import numpy as np
77

8-
def get_mask(lengths):
9-
seq_lens = lengths.view(-1, 1)
10-
max_len = torch.max(seq_lens)
11-
range_tensor = torch.arange(max_len).unsqueeze(0)
12-
range_tensor = range_tensor.expand(seq_lens.size(0), range_tensor.size(1))
13-
mask = (range_tensor < seq_lens).float()
14-
return mask
158

169
class CRF_Loss(nn.Module):
17-
def __init__(self, tagset_size):
10+
def __init__(self, tagset_size, config):
1811
super(CRF_Loss, self).__init__()
1912
self.start_tag = tagset_size
2013
self.end_tag = tagset_size + 1
@@ -25,6 +18,7 @@ def __init__(self, tagset_size):
2518

2619
self.transitions.data[self.end_tag, :] = -10000
2720
self.transitions.data[:, self.start_tag] = -10000
21+
self.config = config
2822

2923
def get_log_p_z(self, emissions, mask, seq_len):
3024
log_alpha = emissions[:, 0].clone()
@@ -76,7 +70,7 @@ def forward(self, emissions, tags):
7670
return self.log_likelihood(emissions, tags)
7771

7872
def viterbi_decode(self, emissions, lengths):
79-
mask = get_mask(lengths)
73+
mask = self.get_mask(lengths)
8074
seq_len = emissions.shape[1]
8175

8276
log_prob = emissions[:, 0].clone()
@@ -87,7 +81,10 @@ def viterbi_decode(self, emissions, lengths):
8781
best_scores_list = []
8882
best_scores_list.append(end_scores.unsqueeze(1))
8983

90-
best_paths_list = [torch.Tensor().long()]
84+
best_paths_0 = torch.Tensor().long()
85+
if self.config.is_cuda:
86+
best_paths_0 = best_paths_0.cuda()
87+
best_paths_list = [best_paths_0]
9188

9289
for idx in range(1, seq_len):
9390
broadcast_emissions = emissions[:, idx].unsqueeze(1)
@@ -108,6 +105,10 @@ def viterbi_decode(self, emissions, lengths):
108105

109106
valid_index_tensor = torch.tensor(0).long()
110107
padding_tensor = torch.tensor(Constants.TAG_PAD_ID).long()
108+
109+
if self.config.is_cuda:
110+
valid_index_tensor = valid_index_tensor.cuda()
111+
padding_tensor = padding_tensor.cuda()
111112

112113
labels = max_indices_from_scores[:, seq_len - 1]
113114
labels = torch.where(mask[:, seq_len - 1] != 1.0, padding_tensor, labels)
@@ -133,3 +134,12 @@ def viterbi_decode(self, emissions, lengths):
133134

134135
return best_scores, torch.flip(all_labels, [1])
135136

137+
def get_mask(self, lengths):
138+
seq_lens = lengths.view(-1, 1)
139+
max_len = torch.max(seq_lens)
140+
range_tensor = torch.arange(max_len).unsqueeze(0)
141+
range_tensor = range_tensor.expand(seq_lens.size(0), range_tensor.size(1))
142+
if self.config.is_cuda:
143+
range_tensor = range_tensor.cuda()
144+
mask = (range_tensor < seq_lens).float()
145+
return mask

neural_ner/model.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ def __init__(self, vocab, config):
187187
super(NER_SOFTMAX_CHAR_CRF, self).__init__()
188188

189189
self.featurizer = NER_SOFTMAX_CHAR(vocab, config)
190-
self.crf = CRF_Loss(len(vocab.id_to_tag))
190+
self.crf = CRF_Loss(len(vocab.id_to_tag), config)
191191
self.config = config
192192

193193
def forward(self, batch):
@@ -208,4 +208,4 @@ def get_loss(self, logits, y, s_lens):
208208

209209
def predict(self, emissions, lengths):
210210
best_scores, pred = self.crf.viterbi_decode(emissions, lengths)
211-
return pred
211+
return pred

neural_ner/process_training.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def train(self):
4444
train_dir, summary_writer = setup_train_dir(self.config)
4545

4646
params = list(filter(lambda p: p.requires_grad, self.model.parameters()))
47-
optimizer = Adam(params, lr=0.001, amsgrad=True)
47+
optimizer = Adam(params, amsgrad=True)
4848

4949
num_params = sum(p.numel() for p in params)
5050
logging.info("Number of params: %d" % num_params)
@@ -73,7 +73,7 @@ def train(self):
7373
logging.info(
7474
'epoch %d, iter %d, loss %.5f, smoothed loss %.5f, grad norm %.5f, param norm %.5f, batch time %.3f' %
7575
(epoch, global_step, train_loss, exp_loss, grad_norm, param_norm, iter_time))
76-
76+
7777
if pre_epoch < epoch:
7878
epoch_toc = time.time()
7979
logging.info("End of epoch %i. Time for epoch: %f" % (epoch, epoch_toc - epoch_tic))

0 commit comments

Comments
 (0)