@@ -21,26 +21,14 @@ def load_sentences_json(path, tag_scheme):
21
21
22
22
json_data = json .loads (line )
23
23
entities = json_data ['entities' ]
24
- sentence = [[t ] for t in json_data ['tokens' ]]
25
- curr = 0
26
- for e in entities :
27
- name = e ['name' ]
28
- end = e ['end' ]
29
- begin = e ['begin' ]
30
-
31
- while curr < begin :
32
- sentence [curr ].append (Constants .ENTITY_OTHER_TAG )
33
- curr += 1
24
+ sentence = [[t , Constants .ENTITY_OTHER_TAG ] for t in json_data ['tokens' ]]
34
25
35
- sentence [curr ].append (Constants .ENTITY_BEGIN + name )
36
- curr += 1
37
- while curr <= end :
38
- sentence [curr ].append (Constants .ENTITY_INSIDE + name )
39
- curr += 1
26
+ for e in entities :
27
+ name , end , begin = e ['name' ], e ['end' ], e ['begin' ]
40
28
41
- while curr < len ( sentence ):
42
- sentence [ curr ]. append ( 'O' )
43
- curr += 1
29
+ sentence [ begin ][ 1 ] = Constants . ENTITY_BEGIN + name
30
+ for i in range ( begin + 1 , end + 1 ):
31
+ sentence [ i ][ 1 ] = Constants . ENTITY_INSIDE + name
44
32
45
33
sentences .append (sentence )
46
34
@@ -82,7 +70,7 @@ def prepare_dataset(sentences, vocab, config):
82
70
return data
83
71
84
72
def get_chunks (seq ):
85
- col_names = ['name' , 'end ' , 'begin ' ]
73
+ col_names = ['name' , 'begin ' , 'end ' ]
86
74
chunks = []
87
75
chunk_type , chunk_start = None , None
88
76
for i , tok in enumerate (seq ):
0 commit comments