Skip to content

Commit 664d2f5

Browse files
committed
Add tanh lowering from Standard dialect to NVVM and ROCDL.
Summary: The tanh lowering from Standard dialect to NVVM and ROCDL was not working. The conversion pattern are inserted in the lowering files. The test cases for the lowerings were added in the test files. Reviewers: nicolasvasilache, ftynse, herhut Reviewed By: ftynse, herhut Subscribers: merge_guards_bot, ftynse, jholewinski, mehdi_amini, rriddle, jpienaar, burmako, shauheen, antiagainst, csigg, arpith-jacob, mgester, lucyrfox, herhut, liufengdb, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D73471
1 parent 92600c2 commit 664d2f5

File tree

4 files changed

+32
-0
lines changed

4 files changed

+32
-0
lines changed

mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -746,6 +746,8 @@ void mlir::populateGpuToNVVMConversionPatterns(
746746
"__nv_cos");
747747
patterns.insert<OpToFuncCallLowering<ExpOp>>(converter, "__nv_expf",
748748
"__nv_exp");
749+
patterns.insert<OpToFuncCallLowering<TanhOp>>(converter, "__nv_tanhf",
750+
"__nv_tanh");
749751
}
750752

751753
std::unique_ptr<OpPassBase<gpu::GPUModuleOp>>

mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ class LowerGpuOpsToROCDLOpsPass
5858
"_ocml_cos_f64");
5959
patterns.insert<OpToFuncCallLowering<ExpOp>>(converter, "_ocml_exp_f32",
6060
"_ocml_exp_f64");
61+
patterns.insert<OpToFuncCallLowering<TanhOp>>(converter, "_ocml_tanh_f32",
62+
"_ocml_tanh_f64");
6163

6264
ConversionTarget target(getContext());
6365
target.addLegalDialect<LLVM::LLVMDialect, ROCDL::ROCDLDialect>();

mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,20 @@ gpu.module @test_module {
156156

157157
// -----
158158

159+
gpu.module @test_module {
160+
// CHECK: llvm.func @__nv_tanhf(!llvm.float) -> !llvm.float
161+
// CHECK: llvm.func @__nv_tanh(!llvm.double) -> !llvm.double
162+
// CHECK-LABEL: func @gpu_tanh
163+
func @gpu_tanh(%arg_f32 : f32, %arg_f64 : f64) {
164+
%result32 = std.tanh %arg_f32 : f32
165+
// CHECK: llvm.call @__nv_tanhf(%{{.*}}) : (!llvm.float) -> !llvm.float
166+
%result64 = std.tanh %arg_f64 : f64
167+
// CHECK: llvm.call @__nv_tanh(%{{.*}}) : (!llvm.double) -> !llvm.double
168+
std.return
169+
}
170+
}
171+
172+
// -----
159173
gpu.module @test_module {
160174
// CHECK: llvm.func @__nv_expf(!llvm.float) -> !llvm.float
161175
// CHECK: llvm.func @__nv_exp(!llvm.double) -> !llvm.double

mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,20 @@ gpu.module @kernel_module {
8383

8484
// -----
8585

86+
gpu.module @kernel_module {
87+
// CHECK: llvm.func @_ocml_tanh_f32(!llvm.float) -> !llvm.float
88+
// CHECK: llvm.func @_ocml_tanh_f64(!llvm.double) -> !llvm.double
89+
// CHECK-LABEL: func @gpu_tanh
90+
func @gpu_tanh(%arg_f32 : f32, %arg_f64 : f64) {
91+
%result32 = std.tanh %arg_f32 : f32
92+
// CHECK: llvm.call @_ocml_tanh_f32(%{{.*}}) : (!llvm.float) -> !llvm.float
93+
%result64 = std.tanh %arg_f64 : f64
94+
// CHECK: llvm.call @_ocml_tanh_f64(%{{.*}}) : (!llvm.double) -> !llvm.double
95+
std.return
96+
}
97+
}
98+
99+
// -----
86100
gpu.module @kernel_module {
87101
// CHECK: llvm.func @_ocml_exp_f32(!llvm.float) -> !llvm.float
88102
// CHECK: llvm.func @_ocml_exp_f64(!llvm.double) -> !llvm.double

0 commit comments

Comments
 (0)