Skip to content

Conversation

ergawy
Copy link
Member

@ergawy ergawy commented Sep 4, 2025

… GPU

Fixes a bug related to insertion points when inlining multi-block
combiner reduction regions. The IP at the end of the inlined region was
not used resulting in emitting BBs with multiple terminators.
@llvmbot
Copy link
Member

llvmbot commented Sep 4, 2025

@llvm/pr-subscribers-mlir-llvm
@llvm/pr-subscribers-mlir

@llvm/pr-subscribers-flang-openmp

Author: Kareem Ergawy (ergawy)

Changes

Fixes a bug related to insertion points when inlining multi-block combiner reduction regions. The IP at the end of the inlined region was not used resulting in emitting BBs with multiple terminators.


Full diff: https://github.com/llvm/llvm-project/pull/156837.diff

2 Files Affected:

  • (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+3)
  • (added) mlir/test/Target/LLVMIR/omptarget-multi-block-reduction.mlir (+85)
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 3d5e487c8990f..fe00a2a5696dc 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -3506,6 +3506,8 @@ Expected<Function *> OpenMPIRBuilder::createReductionFunction(
         return AfterIP.takeError();
       if (!Builder.GetInsertBlock())
         return ReductionFunc;
+
+      Builder.SetInsertPoint(AfterIP->getBlock(), AfterIP->getPoint());
       Builder.CreateStore(Reduced, LHSPtr);
     }
   }
@@ -3750,6 +3752,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU(
           RI.ReductionGen(Builder.saveIP(), RHSValue, LHSValue, Reduced);
       if (!AfterIP)
         return AfterIP.takeError();
+      Builder.SetInsertPoint(AfterIP->getBlock(), AfterIP->getPoint());
       Builder.CreateStore(Reduced, LHS, false);
     }
   }
diff --git a/mlir/test/Target/LLVMIR/omptarget-multi-block-reduction.mlir b/mlir/test/Target/LLVMIR/omptarget-multi-block-reduction.mlir
new file mode 100644
index 0000000000000..aaf06d2d0e0c2
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-multi-block-reduction.mlir
@@ -0,0 +1,85 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// Verifies that the IR builder can handle reductions with multi-block combiner
+// regions on the GPU.
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<"dlti.alloca_memory_space" = 5 : ui64, "dlti.global_memory_space" = 1 : ui64>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} {
+  llvm.func @bar() {}
+  llvm.func @baz() {}
+
+  omp.declare_reduction @add_reduction_byref_box_5xf32 : !llvm.ptr alloc {
+    %0 = llvm.mlir.constant(1 : i64) : i64
+    %1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> : (i64) -> !llvm.ptr<5>
+    %2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
+    omp.yield(%2 : !llvm.ptr)
+  } init {
+  ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+    omp.yield(%arg1 : !llvm.ptr)
+  } combiner {
+  ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr):
+    llvm.call @bar() : () -> ()
+    llvm.br ^bb3
+
+  ^bb3:  // pred: ^bb1
+    llvm.call @baz() : () -> ()
+    omp.yield(%arg0 : !llvm.ptr)
+  }
+  llvm.func @foo_() {
+    %c1 = llvm.mlir.constant(1 : i64) : i64
+    %10 = llvm.alloca %c1 x !llvm.array<5 x f32> {bindc_name = "x"} : (i64) -> !llvm.ptr<5>
+    %11 = llvm.addrspacecast %10 : !llvm.ptr<5> to !llvm.ptr
+    %74 = omp.map.info var_ptr(%11 : !llvm.ptr, !llvm.array<5 x f32>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "x"}
+    omp.target map_entries(%74 -> %arg0 : !llvm.ptr) {
+      %c1_2 = llvm.mlir.constant(1 : i32) : i32
+      %c10 = llvm.mlir.constant(10 : i32) : i32
+      omp.teams reduction(byref @add_reduction_byref_box_5xf32 %arg0 -> %arg2 : !llvm.ptr) {
+        omp.parallel {
+          omp.distribute {
+            omp.wsloop {
+              omp.loop_nest (%arg5) : i32 = (%c1_2) to (%c10) inclusive step (%c1_2) {
+                omp.yield
+              }
+            } {omp.composite}
+          } {omp.composite}
+          omp.terminator
+        } {omp.composite}
+        omp.terminator
+      }
+      omp.terminator
+    }
+    llvm.return
+  }
+}
+
+// CHECK:      call void @__kmpc_parallel_51({{.*}}, i32 1, i32 -1, i32 -1,
+// CHECK-SAME:   ptr @[[PAR_OUTLINED:.*]], ptr null, ptr %2, i64 1)
+
+// CHECK: define internal void @[[PAR_OUTLINED]]{{.*}} {
+// CHECK:   .omp.reduction.then:
+// CHECK:     br label %omp.reduction.nonatomic.body
+
+// CHECK:   omp.reduction.nonatomic.body:
+// CHECK:     call void @bar()
+// CHECK:     br label %[[BODY_2ND_BB:.*]]
+
+// CHECK:   [[BODY_2ND_BB]]:
+// CHECK:     call void @baz()
+// CHECK:     br label %[[CONT_BB:.*]]
+
+// CHECK:   [[CONT_BB]]:
+// CHECK:     br label %.omp.reduction.done
+// CHECK: }
+
+// CHECK: define internal void @"{{.*}}$reduction$reduction_func"(ptr noundef %0, ptr noundef %1) #0 {
+// CHECK:     br label %omp.reduction.nonatomic.body
+
+// CHECK:   [[BODY_2ND_BB:.*]]:
+// CHECK:     call void @baz()
+// CHECK:     br label %omp.region.cont
+
+
+// CHECK: omp.reduction.nonatomic.body:
+// CHECK:   call void @bar()
+// CHECK:     br label %[[BODY_2ND_BB]]
+
+// CHECK: }

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants