ipcoder
diff --git a/‎research/gan/cyclegan/data_provider.py
Lines changed: 150 additions & 0 deletions b/‎research/gan/cyclegan/data_provider.py
Lines changed: 150 additions & 0 deletions
diff --git a/‎research/gan/cyclegan/data_provider_test.py
Lines changed: 101 additions & 0 deletions b/‎research/gan/cyclegan/data_provider_test.py
Lines changed: 101 additions & 0 deletions
@@ -0,0 +1,150 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Contains code for loading and preprocessing image data."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+import numpy as np
+import tensorflow as tf
+
+
+def normalize_image(image):
+  """Rescale from range [0, 255] to [-1, 1]."""
+  return (tf.to_float(image) - 127.5) / 127.5
+
+
+def undo_normalize_image(normalized_image):
+  """Convert to a numpy array that can be read by PIL."""
+  # Convert from NHWC to HWC.
+  normalized_image = np.squeeze(normalized_image, axis=0)
+  return np.uint8(normalized_image * 127.5 + 127.5)
+
+
+def _sample_patch(image, patch_size):
+  """Crop image to square shape and resize it to `patch_size`.
+
+  Args:
+    image: A 3D `Tensor` of HWC format.
+    patch_size: A Python scalar.  The output image size.
+
+  Returns:
+    A 3D `Tensor` of HWC format which has the shape of
+    [patch_size, patch_size, 3].
+  """
+  image_shape = tf.shape(image)
+  height, width = image_shape[0], image_shape[1]
+  target_size = tf.minimum(height, width)
+  image = tf.image.resize_image_with_crop_or_pad(image, target_size,
+                                                 target_size)
+  # tf.image.resize_area only accepts 4D tensor, so expand dims first.
+  image = tf.expand_dims(image, axis=0)
+  image = tf.image.resize_images(image, [patch_size, patch_size])
+  image = tf.squeeze(image, axis=0)
+  # Force image num_channels = 3
+  image = tf.tile(image, [1, 1, tf.maximum(1, 4 - tf.shape(image)[2])])
+  image = tf.slice(image, [0, 0, 0], [patch_size, patch_size, 3])
+  return image
+
+
+def full_image_to_patch(image, patch_size):
+  image = normalize_image(image)
+  # Sample a patch of fixed size.
+  image_patch = _sample_patch(image, patch_size)
+  image_patch.shape.assert_is_compatible_with([patch_size, patch_size, 3])
+  return image_patch
+
+
+def _provide_custom_dataset(image_file_pattern,
+                            batch_size,
+                            shuffle=True,
+                            num_threads=1,
+                            patch_size=128):
+  """Provides batches of custom image data.
+
+  Args:
+    image_file_pattern: A string of glob pattern of image files.
+    batch_size: The number of images in each batch.
+    shuffle: Whether to shuffle the read images.  Defaults to True.
+    num_threads: Number of prefetching threads.  Defaults to 1.
+    patch_size: Size of the path to extract from the image.  Defaults to 128.
+
+  Returns:
+    A float `Tensor` of shape [batch_size, patch_size, patch_size, 3]
+    representing a batch of images.
+  """
+  filename_queue = tf.train.string_input_producer(
+      tf.train.match_filenames_once(image_file_pattern),
+      shuffle=shuffle,
+      capacity=5 * batch_size)
+  image_reader = tf.WholeFileReader()
+
+  _, image_bytes = image_reader.read(filename_queue)
+  image = tf.image.decode_image(image_bytes)
+  image_patch = full_image_to_patch(image, patch_size)
+
+  if shuffle:
+    return tf.train.shuffle_batch(
+        [image_patch],
+        batch_size=batch_size,
+        num_threads=num_threads,
+        capacity=5 * batch_size,
+        min_after_dequeue=batch_size)
+  else:
+    return tf.train.batch(
+        [image_patch],
+        batch_size=batch_size,
+        num_threads=1,  # no threads so it's deterministic
+        capacity=5 * batch_size)
+
+
+def provide_custom_datasets(image_file_patterns,
+                            batch_size,
+                            shuffle=True,
+                            num_threads=1,
+                            patch_size=128):
+  """Provides multiple batches of custom image data.
+
+  Args:
+    image_file_patterns: A list of glob patterns of image files.
+    batch_size: The number of images in each batch.
+    shuffle: Whether to shuffle the read images.  Defaults to True.
+    num_threads: Number of prefetching threads.  Defaults to 1.
+    patch_size: Size of the patch to extract from the image.  Defaults to 128.
+
+  Returns:
+    A list of float `Tensor`s with the same size of `image_file_patterns`.
+    Each of the `Tensor` in the list has a shape of
+    [batch_size, patch_size, patch_size, 3] representing a batch of images.
+
+  Raises:
+    ValueError: If image_file_patterns is not a list or tuple.
+  """
+  if not isinstance(image_file_patterns, (list, tuple)):
+    raise ValueError(
+        '`image_file_patterns` should be either list or tuple, but was {}.'.
+        format(type(image_file_patterns)))
+  custom_datasets = []
+  for pattern in image_file_patterns:
+    custom_datasets.append(
+        _provide_custom_dataset(
+            pattern,
+            batch_size=batch_size,
+            shuffle=shuffle,
+            num_threads=num_threads,
+            patch_size=patch_size))
+  return custom_datasets
@@ -0,0 +1,101 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for data_provider."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+import numpy as np
+
+import tensorflow as tf
+
+import data_provider
+
+mock = tf.test.mock
+
+
+class DataProviderTest(tf.test.TestCase):
+
+  def test_normalize_image(self):
+    image = tf.random_uniform(shape=(8, 8, 3), maxval=256, dtype=tf.int32)
+    rescaled_image = data_provider.normalize_image(image)
+    self.assertEqual(tf.float32, rescaled_image.dtype)
+    self.assertListEqual(image.shape.as_list(), rescaled_image.shape.as_list())
+    with self.test_session(use_gpu=True) as sess:
+      rescaled_image_out = sess.run(rescaled_image)
+      self.assertTrue(np.all(np.abs(rescaled_image_out) <= 1.0))
+
+  def test_sample_patch(self):
+    image = tf.zeros(shape=(8, 8, 3))
+    patch1 = data_provider._sample_patch(image, 7)
+    patch2 = data_provider._sample_patch(image, 10)
+    image = tf.zeros(shape=(8, 8, 1))
+    patch3 = data_provider._sample_patch(image, 10)
+    with self.test_session(use_gpu=True) as sess:
+      self.assertTupleEqual((7, 7, 3), sess.run(patch1).shape)
+      self.assertTupleEqual((10, 10, 3), sess.run(patch2).shape)
+      self.assertTupleEqual((10, 10, 3), sess.run(patch3).shape)
+
+  def _get_testdata_dir(self):
+    return os.path.join(
+        tf.flags.FLAGS.test_srcdir,
+        'google3/third_party/tensorflow_models/gan/cyclegan/testdata')
+
+  def test_custom_dataset_provider(self):
+    file_pattern = os.path.join(self._get_testdata_dir(), '*.jpg')
+    batch_size = 3
+    patch_size = 8
+    images = data_provider._provide_custom_dataset(
+        file_pattern, batch_size=batch_size, patch_size=patch_size)
+    self.assertListEqual([batch_size, patch_size, patch_size, 3],
+                         images.shape.as_list())
+    self.assertEqual(tf.float32, images.dtype)
+
+    with self.test_session(use_gpu=True) as sess:
+      sess.run(tf.local_variables_initializer())
+      with tf.contrib.slim.queues.QueueRunners(sess):
+        images_out = sess.run(images)
+        self.assertTupleEqual((batch_size, patch_size, patch_size, 3),
+                              images_out.shape)
+        self.assertTrue(np.all(np.abs(images_out) <= 1.0))
+
+  def test_custom_datasets_provider(self):
+    file_pattern = os.path.join(self._get_testdata_dir(), '*.jpg')
+    batch_size = 3
+    patch_size = 8
+    images_list = data_provider.provide_custom_datasets(
+        [file_pattern, file_pattern],
+        batch_size=batch_size,
+        patch_size=patch_size)
+    for images in images_list:
+      self.assertListEqual([batch_size, patch_size, patch_size, 3],
+                           images.shape.as_list())
+      self.assertEqual(tf.float32, images.dtype)
+
+    with self.test_session(use_gpu=True) as sess:
+      sess.run(tf.local_variables_initializer())
+      with tf.contrib.slim.queues.QueueRunners(sess):
+        images_out_list = sess.run(images_list)
+        for images_out in images_out_list:
+          self.assertTupleEqual((batch_size, patch_size, patch_size, 3),
+                                images_out.shape)
+          self.assertTrue(np.all(np.abs(images_out) <= 1.0))
+
+
+if __name__ == '__main__':
+  tf.test.main()