Skip to content

Commit 9f58c3d

Browse files
committed
convert text in textsum/data_convert_example.py
1 parent e060553 commit 9f58c3d

File tree

1 file changed

+12
-1
lines changed

1 file changed

+12
-1
lines changed

research/textsum/data_convert_example.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import struct
1010
import sys
1111

12+
import six
1213
import tensorflow as tf
1314
from tensorflow.core.example import example_pb2
1415

@@ -32,7 +33,15 @@ def _binary_to_text():
3233
tf_example = example_pb2.Example.FromString(tf_example_str)
3334
examples = []
3435
for key in tf_example.features.feature:
35-
examples.append('%s=%s' % (key, tf_example.features.feature[key].bytes_list.value[0]))
36+
value = tf_example.features.feature[key].bytes_list.value[0]
37+
38+
# Convert to byte strings (PY2) or unicode strings (PY3)
39+
if six.PY2:
40+
key = key.encode('utf-8')
41+
else:
42+
value = value.decode('utf-8')
43+
44+
examples.append('%s=%s' % (key, value))
3645
writer.write('%s\n' % '\t'.join(examples))
3746
reader.close()
3847
writer.close()
@@ -45,6 +54,8 @@ def _text_to_binary():
4554
tf_example = example_pb2.Example()
4655
for feature in inp.strip().split('\t'):
4756
(k, v) = feature.split('=')
57+
if six.PY3:
58+
v = v.encode('utf-8')
4859
tf_example.features.feature[k].bytes_list.value.extend([v])
4960
tf_example_str = tf_example.SerializeToString()
5061
str_len = len(tf_example_str)

0 commit comments

Comments
 (0)