Merge pull request tensorflow#2831 from martinwicke/r0.9

martinwicke · web-flow · commit 384fed752eae · 2016-06-19T18:05:17.000-07:00
Fixes to TensorArray and Functional ops
diff --git a/tensorflow/core/kernels/tensor_array.cc b/tensorflow/core/kernels/tensor_array.cc
@@ -75,6 +75,8 @@ TF_CALL_GPU_NUMBER_TYPES(TENSOR_ARRAY_SET_ZERO_GPU);
 
 }  // namespace tensor_array
 
+std::atomic<int64> TensorArray::tensor_array_counter{0};
+
 Status TensorArray::CopyShapesFrom(TensorArray* rhs) {
   mutex_lock l(mu_);
   mutex_lock l_rhs(*rhs->mu());
diff --git a/tensorflow/core/kernels/tensor_array.h b/tensorflow/core/kernels/tensor_array.h
@@ -124,6 +124,8 @@ TF_CALL_GPU_NUMBER_TYPES(TENSOR_ARRAY_SET_ZERO_GPU);
 //
 class TensorArray : public ResourceBase {
  public:
+  static std::atomic<int64> tensor_array_counter;
+
   // Construct a TensorArray for holding Tensors of type 'dtype' with
   // 'N' elements.  While the underlying storage is a std::vector and
   // can hold more than MAX_INT entries, in practice we do not expect
diff --git a/tensorflow/core/kernels/tensor_array_ops.cc b/tensorflow/core/kernels/tensor_array_ops.cc
@@ -147,14 +147,18 @@ class TensorArrayOp : public TensorArrayCreationOp {
     const int32 size = tensor_size->scalar<int32>()();
 
     auto handle = tensor_array_output_handle->flat<string>();
+    string unique_tensor_array_name =
+        strings::StrCat(tensor_array_name_, "_",
+                        TensorArray::tensor_array_counter.fetch_add(1));
     handle(0) = "_tensor_arrays";
-    handle(1) = tensor_array_name_;
+    handle(1) = unique_tensor_array_name;
 
     TensorArray* tensor_array = new TensorArray(
         dtype_, *tensor_array_output_handle, size, dynamic_size_,
         false /* multiple_writes_aggregate */, clear_after_read_);
 
-    TF_RETURN_IF_ERROR(rm->Create(handle(0), tensor_array_name_, tensor_array));
+    TF_RETURN_IF_ERROR(
+        rm->Create(handle(0), unique_tensor_array_name, tensor_array));
 
     *output_tensor_array = tensor_array;
 
diff --git a/tensorflow/python/kernel_tests/functional_ops_test.py b/tensorflow/python/kernel_tests/functional_ops_test.py
@@ -23,6 +23,15 @@
 import tensorflow as tf
 
 
+def simple_scoped_fn(a, x):
+  """Simple function: (a, x) -> 2(x+a), but with "2" as a variable in scope."""
+  with tf.variable_scope("body"):
+    # Dummy variable, just to check that scoping works as intended.
+    two = tf.get_variable("two", [], dtype=tf.int32,
+                          initializer=tf.constant_initializer(2))
+    return tf.mul(tf.add(a, x), two)
+
+
 class FunctionalOpsTest(tf.test.TestCase):
 
   def testFoldl_Simple(self):
@@ -36,6 +45,24 @@ def testFoldl_Simple(self):
           lambda a, x: tf.mul(tf.add(a, x), 2), elems, initializer=10)
       self.assertAllEqual(880, r.eval())
 
+  def testFoldl_Scoped(self):
+    with self.test_session() as sess:
+      with tf.variable_scope("root") as varscope:
+        elems = tf.constant([1, 2, 3, 4, 5, 6], name="data")
+
+        r = tf.foldl(simple_scoped_fn, elems)
+        # Check that we have the one variable we asked for here.
+        self.assertEqual(len(tf.trainable_variables()), 1)
+        self.assertEqual(tf.trainable_variables()[0].name, "root/body/two:0")
+        sess.run([tf.initialize_all_variables()])
+        self.assertAllEqual(208, r.eval())
+
+        # Now let's reuse our single variable.
+        varscope.reuse_variables()
+        r = tf.foldl(simple_scoped_fn, elems, initializer=10)
+        self.assertEqual(len(tf.trainable_variables()), 1)
+        self.assertAllEqual(880, r.eval())
+
   def testFoldr_Simple(self):
     with self.test_session():
       elems = tf.constant([1, 2, 3, 4, 5, 6], name="data")
@@ -47,6 +74,24 @@ def testFoldr_Simple(self):
           lambda a, x: tf.mul(tf.add(a, x), 2), elems, initializer=10)
       self.assertAllEqual(1282, r.eval())
 
+  def testFoldr_Scoped(self):
+    with self.test_session() as sess:
+      with tf.variable_scope("root") as varscope:
+        elems = tf.constant([1, 2, 3, 4, 5, 6], name="data")
+
+        r = tf.foldr(simple_scoped_fn, elems)
+        # Check that we have the one variable we asked for here.
+        self.assertEqual(len(tf.trainable_variables()), 1)
+        self.assertEqual(tf.trainable_variables()[0].name, "root/body/two:0")
+        sess.run([tf.initialize_all_variables()])
+        self.assertAllEqual(450, r.eval())
+
+        # Now let's reuse our single variable.
+        varscope.reuse_variables()
+        r = tf.foldr(simple_scoped_fn, elems, initializer=10)
+        self.assertEqual(len(tf.trainable_variables()), 1)
+        self.assertAllEqual(1282, r.eval())
+
   def testFold_Grad(self):
     with self.test_session():
       elems = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], name="data")
@@ -69,6 +114,34 @@ def testMap_Simple(self):
       r = tf.map_fn(lambda x: tf.mul(tf.add(x, 3), 2), elems)
       self.assertAllEqual(np.array([(x + 3) * 2 for x in nums]), r.eval())
 
+  def testMap_Scoped(self):
+    with self.test_session() as sess:
+
+      def double_scoped(x):
+        """2x with a dummy 2 that is scoped."""
+        with tf.variable_scope("body"):
+          # Dummy variable, just to check that scoping works as intended.
+          two = tf.get_variable("two", [], dtype=tf.int32,
+                                initializer=tf.constant_initializer(2))
+          return tf.mul(x, two)
+
+      with tf.variable_scope("root") as varscope:
+        elems = tf.constant([1, 2, 3, 4, 5, 6], name="data")
+        doubles = np.array([2*x for x in [1, 2, 3, 4, 5, 6]])
+
+        r = tf.map_fn(double_scoped, elems)
+        # Check that we have the one variable we asked for here.
+        self.assertEqual(len(tf.trainable_variables()), 1)
+        self.assertEqual(tf.trainable_variables()[0].name, "root/body/two:0")
+        sess.run([tf.initialize_all_variables()])
+        self.assertAllEqual(doubles, r.eval())
+
+        # Now let's reuse our single variable.
+        varscope.reuse_variables()
+        r = tf.map_fn(double_scoped, elems)
+        self.assertEqual(len(tf.trainable_variables()), 1)
+        self.assertAllEqual(doubles, r.eval())
+
   def testMap_SimpleNotTensor(self):
     with self.test_session():
       nums = [1, 2, 3, 4, 5, 6]
@@ -87,6 +160,48 @@ def testScan_Simple(self):
           lambda a, x: tf.mul(a, x), elems, initializer=v)
       self.assertAllEqual([2., 4., 12., 48., 240., 1440.], r.eval())
 
+  def testScan_Scoped(self):
+    with self.test_session() as sess:
+      with tf.variable_scope("root") as varscope:
+        elems = tf.constant([1, 2, 3, 4, 5, 6], name="data")
+
+        r = tf.scan(simple_scoped_fn, elems)
+        # Check that we have the one variable we asked for here.
+        self.assertEqual(len(tf.trainable_variables()), 1)
+        self.assertEqual(tf.trainable_variables()[0].name, "root/body/two:0")
+        sess.run([tf.initialize_all_variables()])
+        results = np.array([1, 6, 18, 44, 98, 208])
+        self.assertAllEqual(results, r.eval())
+
+        # Now let's reuse our single variable.
+        varscope.reuse_variables()
+        r = tf.scan(simple_scoped_fn, elems, initializer=2)
+        self.assertEqual(len(tf.trainable_variables()), 1)
+        results = np.array([6, 16, 38, 84, 178, 368])
+        self.assertAllEqual(results, r.eval())
+
+  def testScanFoldl_Nested(self):
+    with self.test_session():
+      elems = tf.constant([1.0, 2.0, 3.0, 4.0], name="data")
+      inner_elems = tf.constant([0.5, 0.5], name="data")
+
+      def r_inner(a, x):
+        return tf.foldl(lambda b, y: b * y * x, inner_elems, initializer=a)
+
+      r = tf.scan(r_inner, elems)
+
+      # t == 0 (returns 1)
+      # t == 1, a == 1, x == 2 (returns 1)
+      #   t_0 == 0, b == a == 1, y == 0.5, returns b * y * x = 1
+      #   t_1 == 1, b == 1,      y == 0.5, returns b * y * x = 1
+      # t == 2, a == 1, x == 3 (returns 1.5*1.5 == 2.25)
+      #   t_0 == 0, b == a == 1, y == 0.5, returns b * y * x = 1.5
+      #   t_1 == 1, b == 1.5,    y == 0.5, returns b * y * x = 1.5*1.5
+      # t == 3, a == 2.25, x == 4 (returns 9)
+      #   t_0 == 0, b == a == 2.25, y == 0.5, returns b * y * x = 4.5
+      #   t_1 == 1, b == 4.5,       y == 0.5, returns b * y * x = 9
+      self.assertAllClose([1., 1., 2.25, 9.], r.eval())
+
   def testScan_Control(self):
     with self.test_session() as sess:
       s = tf.placeholder(tf.float32, shape=[None])
diff --git a/tensorflow/python/kernel_tests/rnn_cell_test.py b/tensorflow/python/kernel_tests/rnn_cell_test.py
@@ -251,9 +251,11 @@ def testEmbeddingWrapper(self):
       with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)):
         x = tf.zeros([1, 1], dtype=tf.int32)
         m = tf.zeros([1, 2])
-        g, new_m = tf.nn.rnn_cell.EmbeddingWrapper(
+        embedding_cell = tf.nn.rnn_cell.EmbeddingWrapper(
             tf.nn.rnn_cell.GRUCell(2),
-            embedding_classes=3, embedding_size=2)(x, m)
+            embedding_classes=3, embedding_size=2)
+        self.assertEqual(embedding_cell.output_size, 2)
+        g, new_m = embedding_cell(x, m)
         sess.run([tf.initialize_all_variables()])
         res = sess.run([g, new_m], {x.name: np.array([[1]]),
                                     m.name: np.array([[0.1, 0.1]])})
diff --git a/tensorflow/python/kernel_tests/tensor_array_ops_test.py b/tensorflow/python/kernel_tests/tensor_array_ops_test.py
@@ -22,7 +22,6 @@
 import numpy as np
 import tensorflow as tf
 
-from tensorflow.python.framework import errors
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.ops import gen_data_flow_ops
 from tensorflow.python.ops import tensor_array_grad
@@ -462,7 +461,7 @@ def _testTensorArrayWriteGradientAddMultipleAdds(self, dtype):
       # Assert that if multiple_writes_aggregate is not enabled,
       # multiple writes raise an exception.
       with self.assertRaisesOpError(
-          r"TensorArray foo: Could not write to TensorArray index 2 because "
+          r"TensorArray foo_.*: Could not write to TensorArray index 2 because "
           r"it has already been written to."):
         w1.flow.eval()
 
@@ -495,16 +494,22 @@ def testMultiTensorArray(self):
       r = r1 + r2
       self.assertAllClose(9.0, r.eval())
 
-  def testDuplicateTensorArrayFails(self):
+  def testDuplicateTensorArrayHasDifferentName(self):
     with self.test_session(use_gpu=self._use_gpu) as session:
       h1 = tensor_array_ops.TensorArray(
           size=1, dtype=tf.float32, tensor_array_name="foo")
       c1 = h1.write(0, 4.0)
       h2 = tensor_array_ops.TensorArray(
           size=1, dtype=tf.float32, tensor_array_name="foo")
       c2 = h2.write(0, 5.0)
-      with self.assertRaises(errors.AlreadyExistsError):
-        session.run([c1.flow, c2.flow])
+      _, _, c1h, c2h = session.run([c1.flow, c2.flow, c1.handle, c2.handle])
+      c1h = [x.decode("ascii") for x in c1h]
+      c2h = [x.decode("ascii") for x in c2h]
+      self.assertEqual(c1h[0], "_tensor_arrays")
+      self.assertEqual(c2h[0], "_tensor_arrays")
+      self.assertTrue(c1h[1].startswith("foo_"))
+      self.assertTrue(c2h[1].startswith("foo_"))
+      self.assertNotEqual(c1h[1], c2h[1])
 
   def _testTensorArrayGradientWriteReadType(self, dtype):
     with self.test_session(use_gpu=self._use_gpu) as session:
@@ -692,13 +697,6 @@ def testWriteCloseTensorArray(self):
       w1 = w0.write(1, [3.0])
       w1.close().run()  # Expected to run without problems
 
-      ta = tensor_array_ops.TensorArray(
-          dtype=tf.float32, tensor_array_name="foo", size=3)
-      with self.assertRaisesOpError(
-          r"TensorArray foo has already been closed."):
-        with tf.control_dependencies([w1.close()]):
-          w1.write(2, 3.0).flow.eval()
-
   def _testWhileLoopWritePackGradients(self, dynamic_size, dtype):
     np_dtype = dtype.as_numpy_dtype
     with self.test_session(use_gpu=self._use_gpu) as session:
diff --git a/tensorflow/python/ops/functional_ops.py b/tensorflow/python/ops/functional_ops.py
@@ -87,12 +87,15 @@ def foldl(fn, elems, initializer=None, parallel_iterations=10, back_prop=True,
   if not callable(fn):
     raise TypeError("fn must be callable.")
 
-  # TODO(ebrevdo): Change to using colocate_with here and in other methods.
-  with vs.variable_op_scope([elems], name, "foldl") as varscope:
-    # Any get_variable calls fn will cache the first call locally
+  with ops.op_scope([elems], name, "foldl"):
+    # Any get_variable calls in fn will cache the first call locally
     # and not issue repeated network I/O requests for each iteration.
+    varscope = vs.get_variable_scope()
+    varscope_caching_device_was_none = False
     if varscope.caching_device is None:
+      # TODO(ebrevdo): Change to using colocate_with here and in other methods.
       varscope.set_caching_device(lambda op: op.device)
+      varscope_caching_device_was_none = True
 
     # Convert elems to tensor array.
     elems = ops.convert_to_tensor(elems, name="elems")
@@ -117,6 +120,9 @@ def compute(i, a):
         parallel_iterations=parallel_iterations,
         back_prop=back_prop,
         swap_memory=swap_memory)
+
+    if varscope_caching_device_was_none:
+      varscope.set_caching_device(None)
     return r_a
 
 
@@ -161,11 +167,15 @@ def foldr(fn, elems, initializer=None, parallel_iterations=10, back_prop=True,
   if not callable(fn):
     raise TypeError("fn must be callable.")
 
-  with vs.variable_op_scope([elems], name, "foldr") as varscope:
-    # Any get_variable calls fn will cache the first call locally
+  with ops.op_scope([elems], name, "foldr"):
+    # Any get_variable calls in fn will cache the first call locally
     # and not issue repeated network I/O requests for each iteration.
+    varscope = vs.get_variable_scope()
+    varscope_caching_device_was_none = False
     if varscope.caching_device is None:
+      # TODO(ebrevdo): Change to using colocate_with here and in other methods.
       varscope.set_caching_device(lambda op: op.device)
+      varscope_caching_device_was_none = True
 
     # Convert elems to tensor array.
     elems = ops.convert_to_tensor(elems, name="elems")
@@ -190,6 +200,9 @@ def compute(i, a):
         parallel_iterations=parallel_iterations,
         back_prop=back_prop,
         swap_memory=swap_memory)
+
+    if varscope_caching_device_was_none:
+      varscope.set_caching_device(None)
     return r_a
 
 
@@ -232,11 +245,15 @@ def map_fn(fn, elems, dtype=None, parallel_iterations=10, back_prop=True,
   if not callable(fn):
     raise TypeError("fn must be callable.")
 
-  with vs.variable_op_scope([elems], name, "map") as varscope:
-    # Any get_variable calls fn will cache the first call locally
+  with ops.op_scope([elems], name, "map"):
+    # Any get_variable calls in fn will cache the first call locally
     # and not issue repeated network I/O requests for each iteration.
+    varscope = vs.get_variable_scope()
+    varscope_caching_device_was_none = False
     if varscope.caching_device is None:
+      # TODO(ebrevdo): Change to using colocate_with here and in other methods.
       varscope.set_caching_device(lambda op: op.device)
+      varscope_caching_device_was_none = True
 
     elems = ops.convert_to_tensor(elems, name="elems")
     dtype = dtype if dtype else elems.dtype
@@ -263,6 +280,9 @@ def compute(i, ta):
     result = r_a.pack()
     result.set_shape(elems.get_shape().with_rank_at_least(1)[0:1].concatenate(
         result.get_shape()[1:]))
+
+    if varscope_caching_device_was_none:
+      varscope.set_caching_device(None)
     return result
 
 
@@ -307,11 +327,15 @@ def scan(fn, elems, initializer=None, parallel_iterations=10, back_prop=True,
   if not callable(fn):
     raise TypeError("fn must be callable.")
 
-  with vs.variable_op_scope([elems], name, "scan") as varscope:
-    # Any get_variable calls fn will cache the first call locally
+  with ops.op_scope([elems], name, "scan"):
+    # Any get_variable calls in fn will cache the first call locally
     # and not issue repeated network I/O requests for each iteration.
+    varscope = vs.get_variable_scope()
+    varscope_caching_device_was_none = False
     if varscope.caching_device is None:
+      # TODO(ebrevdo): Change to using colocate_with here and in other methods.
       varscope.set_caching_device(lambda op: op.device)
+      varscope_caching_device_was_none = True
 
     # Convert elems to tensor array.
     elems = ops.convert_to_tensor(elems, name="elems")
@@ -346,6 +370,9 @@ def compute(i, a, ta):
     result = r_a.pack()
     result.set_shape(elems.get_shape().with_rank_at_least(1)[0:1].concatenate(
         result.get_shape()[1:]))
+
+    if varscope_caching_device_was_none:
+      varscope.set_caching_device(None)
     return result
 
 
diff --git a/tensorflow/python/ops/rnn.py b/tensorflow/python/ops/rnn.py
@@ -140,7 +140,7 @@ def rnn(cell, inputs, initial_state=None, dtype=None,
       max_sequence_length = math_ops.reduce_max(sequence_length)
 
     for time, input_ in enumerate(inputs):
-      if time > 0: vs.get_variable_scope().reuse_variables()
+      if time > 0: varscope.reuse_variables()
       # pylint: disable=cell-var-from-loop
       call_cell = lambda: cell(input_, state)
       # pylint: enable=cell-var-from-loop
diff --git a/tensorflow/python/ops/rnn_cell.py b/tensorflow/python/ops/rnn_cell.py
@@ -769,6 +769,10 @@ def __init__(self, cell, embedding_classes, embedding_size, initializer=None):
   def state_size(self):
     return self._cell.state_size
 
+  @property
+  def output_size(self):
+    return self._cell.output_size
+
   def __call__(self, inputs, state, scope=None):
     """Run the cell on embedded inputs."""
     with vs.variable_scope(scope or type(self).__name__):  # "EmbeddingWrapper"
diff --git a/tensorflow/python/ops/tensor_array_ops.py b/tensorflow/python/ops/tensor_array_ops.py