ipcoder
diff --git a/‎.gitignore
Lines changed: 0 additions & 2 deletions b/‎.gitignore
Lines changed: 0 additions & 2 deletions
diff --git a/‎CODEOWNERS
Lines changed: 2 additions & 0 deletions b/‎CODEOWNERS
Lines changed: 2 additions & 0 deletions
diff --git a/‎official/datasets/movielens.py
Lines changed: 6 additions & 2 deletions b/‎official/datasets/movielens.py
Lines changed: 6 additions & 2 deletions
diff --git a/‎official/keras_application_models/dataset.py
Lines changed: 4 additions & 5 deletions b/‎official/keras_application_models/dataset.py
Lines changed: 4 additions & 5 deletions
diff --git a/‎official/mnist/mnist_eager.py
Lines changed: 2 additions & 2 deletions b/‎official/mnist/mnist_eager.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎official/recommendation/README.md
Lines changed: 3 additions & 2 deletions b/‎official/recommendation/README.md
Lines changed: 3 additions & 2 deletions
diff --git a/‎official/recommendation/constants.py
Lines changed: 67 additions & 0 deletions b/‎official/recommendation/constants.py
Lines changed: 67 additions & 0 deletions
@@ -90,5 +90,3 @@ ENV/
 
 # PyCharm
 .idea/
-
-samples/outreach/blogs/serving_blogpost/data/
@@ -10,6 +10,7 @@
 /research/brain_coder/ @danabo
 /research/cognitive_mapping_and_planning/ @s-gupta
 /research/compression/ @nmjohn
+/research/deep_contextual_bandits/ @rikel
 /research/deeplab/ @aquariusjay @yknzhu @gpapan
 /research/delf/ @andrefaraujo
 /research/differential_privacy/ @ilyamironov @ananthr
@@ -19,6 +20,7 @@
 /research/global_objectives/ @mackeya-google
 /research/im2txt/ @cshallue
 /research/inception/ @shlens @vincentvanhoucke
+/research/keypointnet/ @mnorouzi
 /research/learned_optimizer/ @olganw @nirum
 /research/learning_to_remember_rare_events/ @lukaszkaiser @ofirnachum
 /research/learning_unsupervised_learning/ @lukemetz @nirum
 
@@ -133,8 +133,12 @@ def _progress(count, block_size, total_size):
       _regularize_20m_dataset(temp_dir)
 
     for fname in tf.gfile.ListDirectory(temp_dir):
-      tf.gfile.Copy(os.path.join(temp_dir, fname),
-                    os.path.join(data_subdir, fname))
+      if not tf.gfile.Exists(os.path.join(data_subdir, fname)):
+        tf.gfile.Copy(os.path.join(temp_dir, fname),
+                      os.path.join(data_subdir, fname))
+      else:
+        tf.logging.info("Skipping copy of {}, as it already exists in the "
+                        "destination folder.".format(fname))
 
   finally:
     tf.gfile.DeleteRecursively(temp_dir)
 
@@ -18,7 +18,6 @@
 from __future__ import print_function
 
 import tensorflow as tf
-
 from official.utils.misc import model_helpers  # pylint: disable=g-bad-import-order
 
 # Default values for dataset.
@@ -29,7 +28,7 @@
 def _get_default_image_size(model):
   """Provide default image size for each model."""
   image_size = (224, 224)
-  if model in ["inception", "xception", "inceptionresnet"]:
+  if model in ["inceptionv3", "xception", "inceptionresnetv2"]:
     image_size = (299, 299)
   elif model in ["nasnetlarge"]:
     image_size = (331, 331)
@@ -42,8 +41,8 @@ def generate_synthetic_input_dataset(model, batch_size):
   image_shape = (batch_size,) + image_size + (_NUM_CHANNELS,)
   label_shape = (batch_size, _NUM_CLASSES)
 
-  return model_helpers.generate_synthetic_data(
+  dataset = model_helpers.generate_synthetic_data(
       input_shape=tf.TensorShape(image_shape),
-      input_dtype=tf.float32,
       label_shape=tf.TensorShape(label_shape),
-      label_dtype=tf.float32)
+  )
+  return dataset
@@ -83,8 +83,8 @@ def train(model, optimizer, dataset, step_counter, log_interval=None):
 
 def test(model, dataset):
   """Perform an evaluation of `model` on the examples from `dataset`."""
-  avg_loss = tfe.metrics.Mean('loss')
-  accuracy = tfe.metrics.Accuracy('accuracy')
+  avg_loss = tfe.metrics.Mean('loss', dtype=tf.float32)
+  accuracy = tfe.metrics.Accuracy('accuracy', dtype=tf.float32)
 
   for (images, labels) in dataset:
     logits = model(images, training=False)
 
@@ -43,15 +43,16 @@ In both datasets, the timestamp is represented in seconds since midnight Coordin
 ### Download and preprocess dataset
 To download the dataset, please install Pandas package first. Then issue the following command:
 ```
-python movielens_dataset.py
+python ../datasets/movielens.py
 ```
 Arguments:
   * `--data_dir`: Directory where to download and save the preprocessed data. By default, it is `/tmp/movielens-data/`.
   * `--dataset`: The dataset name to be downloaded and preprocessed. By default, it is `ml-1m`.
 
 Use the `--help` or `-h` flag to get a full list of possible arguments.
 
-Note the ml-20m dataset is large (the rating file is ~500 MB), and it may take several minutes (~10 mins) for data preprocessing.
+Note the ml-20m dataset is large (the rating file is ~500 MB), and it may take several minutes (~2 mins) for data preprocessing.
+Both the ml-1m and ml-20m datasets will be coerced into a common format when downloaded.
 
 ### Train and evaluate model
 To train and evaluate the model, issue the following command:
 
@@ -0,0 +1,67 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Central location for NCF specific values."""
+
+import os
+import time
+
+
+# ==============================================================================
+# == Main Thread Data Processing ===============================================
+# ==============================================================================
+class Paths(object):
+  """Container for various path information used while training NCF."""
+
+  def __init__(self, data_dir, cache_id=None):
+    self.cache_id = cache_id or int(time.time())
+    self.data_dir = data_dir
+    self.cache_root = os.path.join(
+        self.data_dir, "{}_ncf_recommendation_cache".format(self.cache_id))
+    self.train_shard_subdir = os.path.join(self.cache_root,
+                                           "raw_training_shards")
+    self.train_shard_template = os.path.join(self.train_shard_subdir,
+                                             "positive_shard_{}.pickle")
+    self.train_epoch_dir = os.path.join(self.cache_root, "training_epochs")
+    self.eval_data_subdir = os.path.join(self.cache_root, "eval_data")
+    self.eval_raw_file = os.path.join(self.eval_data_subdir, "raw.pickle")
+    self.eval_record_template_temp = os.path.join(self.eval_data_subdir,
+                                                  "eval_records.temp")
+    self.eval_record_template = os.path.join(
+        self.eval_data_subdir, "padded_eval_batch_size_{}.tfrecords")
+    self.subproc_alive = os.path.join(self.cache_root, "subproc.alive")
+
+
+APPROX_PTS_PER_TRAIN_SHARD = 128000
+
+# In both datasets, each user has at least 20 ratings.
+MIN_NUM_RATINGS = 20
+
+# The number of negative examples attached with a positive example
+# when performing evaluation.
+NUM_EVAL_NEGATIVES = 999
+
+# ==============================================================================
+# == Subprocess Data Generation ================================================
+# ==============================================================================
+CYCLES_TO_BUFFER = 3  # The number of train cycles worth of data to "run ahead"
+                      # of the main training loop.
+
+READY_FILE = "ready.json"
+TRAIN_RECORD_TEMPLATE = "train_{}.tfrecords"
+
+TIMEOUT_SECONDS = 3600 * 2  # If the train loop goes more than two hours without
+                            # consuming an epoch of data, this is a good
+                            # indicator that the main thread is dead and the
+                            # subprocess is orphaned.