Skip to content

Commit bca2cfa

Browse files
author
zhuwei12
committed
add util.py
1 parent ce97a09 commit bca2cfa

File tree

1 file changed

+244
-0
lines changed
  • cv_actions/Homework/homework2

1 file changed

+244
-0
lines changed

cv_actions/Homework/homework2/util.py

Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
from pickle import load
2+
from numpy import array
3+
from keras.preprocessing.sequence import pad_sequences
4+
from keras.utils import to_categorical
5+
6+
def load_doc(filename):
7+
"""读取文本文件为string
8+
Args:
9+
filename: 文本文件
10+
Returns:
11+
string, 文本文件的内容
12+
"""
13+
# open the file as read only
14+
file = open(filename, 'r')
15+
# read all text
16+
text = file.read()
17+
# close the file
18+
file.close()
19+
return text
20+
21+
22+
def to_list(captions):
23+
"""将一个字典(key为文件名, value为图像标题list)转换为图像标题list
24+
25+
Args:
26+
captions: 一个字典, key为文件名, value为图像标题list
27+
28+
Returns:
29+
图像标题list
30+
31+
"""
32+
all_desc = list()
33+
# 遍历每幅图片
34+
for key in captions.keys():
35+
[all_desc.append(d) for d in captions[key]]
36+
return all_desc
37+
38+
39+
def get_max_length(captions):
40+
"""从标题字典计算图像标题里面最长的标题的长度
41+
Args:
42+
captions: 一个dict, key为文件名(不带.jpg后缀), value为图像标题list
43+
Returns:
44+
最长标题的长度
45+
"""
46+
lines = to_list(captions)
47+
return max(len(d.split()) for d in lines)
48+
49+
50+
def load_set(filename):
51+
"""从文本文件加载图像名set
52+
Args:
53+
filename: 文本文件,每一行都包含一个图像文件名(包含.jpg文件后缀)
54+
Returns:get_max_length
55+
set, 文件名,去除了.jpg后缀
56+
"""
57+
58+
doc = load_doc(filename)
59+
dataset = list()
60+
# process line by line
61+
for line in doc.split('\n'):
62+
# skip empty lines
63+
if len(line) < 1:
64+
continue
65+
# get the image identifier
66+
identifier = line.split('.')[0]
67+
dataset.append(identifier)
68+
return set(dataset)
69+
70+
def load_image_names(filename):
71+
"""
72+
从文本文件加载图像名set
73+
:param filename:文本文件,每一行都包含一个图像文件名(包含.jpg文件后缀)
74+
:return:set,文件名,去了.jpg后缀
75+
"""
76+
doc = load_doc(filename)
77+
data_set = list()
78+
# process line by line
79+
for line in doc.split('\n'):
80+
# skip empty lines
81+
if len(line) < 1:
82+
continue
83+
# get the image identifier
84+
identifier = line.split('.')[0]
85+
data_set.append(identifier)
86+
return set(data_set)
87+
88+
89+
def load_clean_captions(filename, dataset):
90+
"""为图像标题首尾分别加上'startseq ' 和 ' endseq', 作为自动标题生成的起始和终止
91+
Args:
92+
filename: 文本文件,每一行由图像名,和图像标题构成, 图像的标题已经进行了清洗
93+
dataset: 图像名list
94+
Returns:
95+
dict, key为图像名, value为添加了'startseq'和'endseq'的标题list
96+
"""
97+
# load document
98+
doc = load_doc(filename)
99+
descriptions = dict()
100+
for line in doc.split('\n'):
101+
# split line by white space
102+
tokens = line.split()
103+
# split id from description
104+
image_id, image_desc = tokens[0], tokens[1:]
105+
# skip images not in the set
106+
if image_id in dataset:
107+
# create list
108+
if image_id not in descriptions:
109+
descriptions[image_id] = list()
110+
# wrap description in tokens
111+
desc = 'startseq ' + ' '.join(image_desc) + ' endseq'
112+
# store
113+
descriptions[image_id].append(desc)
114+
return descriptions
115+
116+
117+
def load_photo_features(filename, dataset):
118+
"""从图像特征文件中加载给定图像名list对应的图像特征
119+
120+
Args:
121+
filename: 包含图像特征的文件名, 文件加载以后是一个字典,
122+
key为'Flicker8k_Dataset/' + 文件名,
123+
value为文件名对应的图表的特征
124+
dataset: 图像文件名list
125+
126+
Returns:
127+
图像特征字典, key为文件名,
128+
value为文件名对应的图表的特征
129+
130+
"""
131+
# load all features
132+
all_features = load(open(filename, 'rb'))
133+
# filter features
134+
features = {k: all_features[k] for k in dataset}
135+
return features
136+
137+
#根据数据训练模型
138+
#读取一组图像id
139+
def load_ids(fn):
140+
doc = load_doc(fn)
141+
ret = list()
142+
for line in doc.split('\n'):
143+
if len(line) < 1:
144+
continue
145+
id = line.split('.')[0]
146+
ret.append(id)
147+
return set(ret)
148+
149+
150+
def create_sequences(tokenizer, max_length, descriptions, photos_features, vocab_size):
151+
"""
152+
从输入的图片标题list和图片特征构造LSTM的一组输入
153+
154+
Args:
155+
:param tokenizer: 英文单词和整数转换的工具keras.preprocessing.text.Tokenizer
156+
:param max_length: 训练数据集中最长的标题的长度
157+
:param descriptions: dict, key 为图像的名(不带.jpg后缀), value 为list, 包含一个图像的几个不同的描述
158+
:param photos_features: dict, key 为图像的名(不带.jpg后缀), value 为numpy array 图像的特征
159+
:param vocab_size: 训练集中表的单词数量
160+
:return: tuple:
161+
第一个元素为 numpy array, 元素为图像的特征, 它本身也是 numpy.array
162+
第二个元素为 numpy array, 元素为图像标题的前缀, 它自身也是 numpy.array
163+
第三个元素为 numpy array, 元素为图像标题的下一个单词(根据图像特征和标题的前缀产生) 也为numpy.array
164+
165+
Examples:
166+
from pickle import load
167+
tokenizer = load(open('tokenizer.pkl', 'rb'))
168+
max_length = 6
169+
descriptions = {'1235345':['startseq one bird on tree endseq', "startseq red bird on tree endseq"],
170+
'1234546':['startseq one boy play water endseq', "startseq one boy run across water endseq"]}
171+
photo_features = {'1235345':[ 0.434, 0.534, 0.212, 0.98 ],
172+
'1234546':[ 0.534, 0.634, 0.712, 0.28 ]}
173+
vocab_size = 7378
174+
print(create_sequences(tokenizer, max_length, descriptions, photo_features, vocab_size))
175+
(array([[ 0.434, 0.534, 0.212, 0.98 ],
176+
[ 0.434, 0.534, 0.212, 0.98 ],
177+
[ 0.434, 0.534, 0.212, 0.98 ],
178+
[ 0.434, 0.534, 0.212, 0.98 ],
179+
[ 0.434, 0.534, 0.212, 0.98 ],
180+
[ 0.434, 0.534, 0.212, 0.98 ],
181+
[ 0.434, 0.534, 0.212, 0.98 ],
182+
[ 0.434, 0.534, 0.212, 0.98 ],
183+
[ 0.434, 0.534, 0.212, 0.98 ],
184+
[ 0.434, 0.534, 0.212, 0.98 ],
185+
[ 0.534, 0.634, 0.712, 0.28 ],
186+
[ 0.534, 0.634, 0.712, 0.28 ],
187+
[ 0.534, 0.634, 0.712, 0.28 ],
188+
[ 0.534, 0.634, 0.712, 0.28 ],
189+
[ 0.534, 0.634, 0.712, 0.28 ],
190+
[ 0.534, 0.634, 0.712, 0.28 ],
191+
[ 0.534, 0.634, 0.712, 0.28 ],
192+
[ 0.534, 0.634, 0.712, 0.28 ],
193+
[ 0.534, 0.634, 0.712, 0.28 ],
194+
[ 0.534, 0.634, 0.712, 0.28 ],
195+
[ 0.534, 0.634, 0.712, 0.28 ]]),
196+
array([[ 0, 0, 0, 0, 0, 2],
197+
[ 0, 0, 0, 0, 2, 59],
198+
[ 0, 0, 0, 2, 59, 254],
199+
[ 0, 0, 2, 59, 254, 6],
200+
[ 0, 2, 59, 254, 6, 134],
201+
[ 0, 0, 0, 0, 0, 2],
202+
[ 0, 0, 0, 0, 2, 26],
203+
[ 0, 0, 0, 2, 26, 254],
204+
[ 0, 0, 2, 26, 254, 6],
205+
[ 0, 2, 26, 254, 6, 134],
206+
[ 0, 0, 0, 0, 0, 2],
207+
[ 0, 0, 0, 0, 2, 59],
208+
[ 0, 0, 0, 2, 59, 16],
209+
[ 0, 0, 2, 59, 16, 82],
210+
[ 0, 2, 59, 16, 82, 24],
211+
[ 0, 0, 0, 0, 0, 2],
212+
[ 0, 0, 0, 0, 2, 59],
213+
[ 0, 0, 0, 2, 59, 16],
214+
[ 0, 0, 2, 59, 16, 165],
215+
[ 0, 2, 59, 16, 165, 127],
216+
[ 2, 59, 16, 165, 127, 24]]),
217+
array([[ 0., 0., 0., ..., 0., 0., 0.],
218+
[ 0., 0., 0., ..., 0., 0., 0.],
219+
[ 0., 0., 0., ..., 0., 0., 0.],
220+
...,
221+
[ 0., 0., 0., ..., 0., 0., 0.],
222+
[ 0., 0., 0., ..., 0., 0., 0.],
223+
[ 0., 0., 0., ..., 0., 0., 0.]]))
224+
"""
225+
X1, X2, y = list(), list(), list()
226+
for key, desc_list in descriptions.items():
227+
for desc in desc_list:
228+
seq = tokenizer.texts_to_sequences([desc])[0]
229+
for i in range(1, len(seq)):
230+
in_seq, out_seq = seq[:i], seq[i]
231+
#填充in_seq,使得其长度为max_length
232+
in_seq = pad_sequences([in_seq], maxlen = max_length)[0]
233+
out_seq = to_categorical([out_seq], num_classes = vocab_size)[0]
234+
X1.append(photos_features[key][0])
235+
X2.append(in_seq)
236+
y.append(out_seq)
237+
return array(X1), array(X2), array(y)
238+
239+
if __name__ == '__main__':
240+
path = 'D:/learningDoc/CVSource/CVSource/cv_action_codes/Homework/homework2/'
241+
train_image_names = load_image_names(path + 'task4/Flickr_8k.trainImages.txt')
242+
print("train_image_names=%d"%len(train_image_names))
243+
descriptions = load_clean_captions(path + 'task5/descriptions.txt', train_image_names)
244+
print(descriptions)

0 commit comments

Comments
 (0)