[XLA:GPU] Check the reduce input shape when multi-output fusing reduces

d0k · tensorflower-gardener · commit f9ae897fdcba · 2018-06-12T01:37:11.000-07:00
Otherwise we can end up in a situation where incompatible reduces that happen
to have the same output shape are fused.

PiperOrigin-RevId: 200180013
diff --git a/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc b/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc
@@ -47,12 +47,16 @@ bool GpuMultiOutputFusion::ShapesCompatibleForFusion(HloInstruction* instr1,
         element_instr = fused_expression_root;
       }
     }
+    // Special handling of kReduce instructions -- the fusion
+    // applies to the first operand.
+    if (element_instr->opcode() == HloOpcode::kReduce) {
+      return element_instr->operand(0)->shape();
+    }
     return element_instr->shape();
   };
 
   // The elementwise output shapes must be the same (including layout)
-  return ShapeUtil::ShapeUtil::Equal(get_element_shape(instr1),
-                                     get_element_shape(instr2));
+  return ShapeUtil::Equal(get_element_shape(instr1), get_element_shape(instr2));
 }
 
 bool GpuMultiOutputFusion::IsProfitableOperand(HloInstruction* instr) {
diff --git a/tensorflow/compiler/xla/service/gpu/multi_output_fusion_test.cc b/tensorflow/compiler/xla/service/gpu/multi_output_fusion_test.cc
@@ -36,6 +36,11 @@ const char kModulePrefix[] = R"(
       scalar_lhs = f32[] parameter(0)
       scalar_rhs = f32[] parameter(1)
       ROOT add = f32[] add(scalar_lhs, scalar_rhs)
+    }
+    scalar_mul_computation {
+      scalar_lhs = f32[] parameter(0)
+      scalar_rhs = f32[] parameter(1)
+      ROOT mul = f32[] add(scalar_lhs, scalar_rhs)
     })";
 
 TEST_F(InstructionFusionTest, MultiOutputFusionSiblingReduceAndReduceFusion) {
@@ -67,6 +72,34 @@ TEST_F(InstructionFusionTest, MultiOutputFusionSiblingReduceAndReduceFusion) {
               op::Tuple(op::Reduce(), op::Reduce()));
 }
 
+TEST_F(InstructionFusionTest, MultiOutputFusionDifferentReduceInputShapes) {
+  auto module = ParseHloString(tensorflow::strings::StrCat(kModulePrefix, R"(
+    fused_computation_1 {
+      p1.1 = f32[6400]{0} parameter(1)
+      mul = f32[6400]{0} multiply(p1.1, p1.1)
+      const.1 = f32[] parameter(0)
+      ROOT reduce.1 = f32[] reduce(p1.1, const.1), dimensions={0}, to_apply=scalar_add_computation
+    }
+
+    fused_computation_2 {
+      p1.2 = f32[6400]{0} parameter(1)
+      r1 = f32[64,100]{0,1} reshape(p1.2)
+      const.2 = f32[] parameter(0)
+      ROOT reduce.2 = f32[] reduce(r1, const.2), dimensions={1,0}, to_apply=scalar_mul_computation
+    }
+
+    ENTRY entry {
+      p0 = f32[] parameter(0)
+      p1 = f32[6400]{0} parameter(1)
+      const.2 = f32[] constant(1)
+      fusion.1 = f32[] fusion(p0, p1), kind=kInput, calls=fused_computation_1
+      fusion.2 = f32[] fusion(p0, p1), kind=kInput, calls=fused_computation_2
+      ROOT root = (f32[], f32[]) tuple(fusion.1, fusion.2)
+    })"))
+                    .ValueOrDie();
+  ASSERT_FALSE(GpuMultiOutputFusion().Run(module.get()).ValueOrDie());
+}
+
 TEST_F(InstructionFusionTest, MultiOutputFusionSiblingReduceFusions) {
   // Two sibling fusions with reduce instruction roots sharing the same input
   // param.

Original file line number	Diff line number	Diff line change
`@@ -47,12 +47,16 @@ bool GpuMultiOutputFusion::ShapesCompatibleForFusion(HloInstruction* instr1,`
`47`	`47`	`element_instr = fused_expression_root;`
`48`	`48`	`}`
`49`	`49`	`}`
	`50`	`+ // Special handling of kReduce instructions -- the fusion`
	`51`	`+ // applies to the first operand.`
	`52`	`+ if (element_instr->opcode() == HloOpcode::kReduce) {`
	`53`	`+ return element_instr->operand(0)->shape();`
	`54`	`+ }`
`50`	`55`	`return element_instr->shape();`
`51`	`56`	`};`
`52`	`57`
`53`	`58`	`// The elementwise output shapes must be the same (including layout)`
`54`		`- return ShapeUtil::ShapeUtil::Equal(get_element_shape(instr1),`
`55`		`- get_element_shape(instr2));`
	`59`	`+ return ShapeUtil::Equal(get_element_shape(instr1), get_element_shape(instr2));`
`56`	`60`	`}`
`57`	`61`
`58`	`62`	`bool GpuMultiOutputFusion::IsProfitableOperand(HloInstruction* instr) {`