Skip to content

Commit 0a703c3

Browse files
committed
2 parents 91e8420 + 661e58f commit 0a703c3

File tree

1 file changed

+7
-19
lines changed

1 file changed

+7
-19
lines changed

neural_ner/data_utils/utils.py

+7-19
Original file line numberDiff line numberDiff line change
@@ -21,26 +21,14 @@ def load_sentences_json(path, tag_scheme):
2121

2222
json_data = json.loads(line)
2323
entities = json_data['entities']
24-
sentence = [[t] for t in json_data['tokens']]
25-
curr = 0
26-
for e in entities:
27-
name = e['name']
28-
end = e['end']
29-
begin = e['begin']
30-
31-
while curr < begin:
32-
sentence[curr].append(Constants.ENTITY_OTHER_TAG)
33-
curr += 1
24+
sentence = [[t, Constants.ENTITY_OTHER_TAG] for t in json_data['tokens']]
3425

35-
sentence[curr].append(Constants.ENTITY_BEGIN + name)
36-
curr += 1
37-
while curr <= end:
38-
sentence[curr].append(Constants.ENTITY_INSIDE + name)
39-
curr += 1
26+
for e in entities:
27+
name, end, begin = e['name'], e['end'], e['begin']
4028

41-
while curr < len(sentence):
42-
sentence[curr].append('O')
43-
curr += 1
29+
sentence[begin][1] = Constants.ENTITY_BEGIN + name
30+
for i in range(begin+1, end+1):
31+
sentence[i][1] = Constants.ENTITY_INSIDE + name
4432

4533
sentences.append(sentence)
4634

@@ -82,7 +70,7 @@ def prepare_dataset(sentences, vocab, config):
8270
return data
8371

8472
def get_chunks(seq):
85-
col_names = ['name', 'end', 'begin']
73+
col_names = ['name', 'begin', 'end']
8674
chunks = []
8775
chunk_type, chunk_start = None, None
8876
for i, tok in enumerate(seq):

0 commit comments

Comments
 (0)