Skip to content

Commit 202ab27

Browse files
committed
[mlir] Added missing GPU lowering ops.
Summary: This diff adds missing GPU lowering ops to MLIR. Reviewers: herhut, pifon2a, ftynse Tags: #pre-merge_beta_testing, #llvm Differential Revision: https://reviews.llvm.org/D72439
1 parent 6b68670 commit 202ab27

File tree

4 files changed

+106
-2
lines changed

4 files changed

+106
-2
lines changed

mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -712,7 +712,8 @@ class LowerGpuOpsToNVVMOpsPass : public ModulePass<LowerGpuOpsToNVVMOpsPass> {
712712
populateGpuToNVVMConversionPatterns(converter, patterns);
713713
ConversionTarget target(getContext());
714714
target.addIllegalDialect<gpu::GPUDialect>();
715-
target.addIllegalOp<LLVM::ExpOp>();
715+
target.addIllegalOp<LLVM::FAbsOp, LLVM::FCeilOp, LLVM::CosOp,
716+
LLVM::ExpOP>();
716717
target.addIllegalOp<FuncOp>();
717718
target.addLegalDialect<LLVM::LLVMDialect>();
718719
target.addLegalDialect<NVVM::NVVMDialect>();
@@ -739,6 +740,12 @@ void mlir::populateGpuToNVVMConversionPatterns(
739740
NVVM::GridDimYOp, NVVM::GridDimZOp>,
740741
GPUAllReduceOpLowering, GPUShuffleOpLowering, GPUFuncOpLowering,
741742
GPUReturnOpLowering>(converter);
743+
patterns.insert<OpToFuncCallLowering<AbsFOp>>(converter, "__nv_fabsf",
744+
"__nv_fabs");
745+
patterns.insert<OpToFuncCallLowering<CeilFOp>>(converter, "__nv_ceilf",
746+
"__nv_ceil");
747+
patterns.insert<OpToFuncCallLowering<CosOp>>(converter, "__nv_cosf",
748+
"__nv_cos");
742749
patterns.insert<OpToFuncCallLowering<ExpOp>>(converter, "__nv_expf",
743750
"__nv_exp");
744751
}

mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,19 @@ class LowerGpuOpsToROCDLOpsPass : public ModulePass<LowerGpuOpsToROCDLOpsPass> {
5151
GPUIndexIntrinsicOpLowering<gpu::GridDimOp, ROCDL::GridDimXOp,
5252
ROCDL::GridDimYOp, ROCDL::GridDimZOp>>(
5353
converter);
54+
patterns.insert<OpToFuncCallLowering<AbsFOp>>(converter, "_ocml_fabs_f32",
55+
"_ocml_fabs_f64");
56+
patterns.insert<OpToFuncCallLowering<CeilFOp>>(converter, "_ocml_ceil_f32",
57+
"_ocml_ceil_f64");
58+
patterns.insert<OpToFuncCallLowering<CosOp>>(converter, "_ocml_cos_f32",
59+
"_ocml_cos_f64");
5460
patterns.insert<OpToFuncCallLowering<ExpOp>>(converter, "_ocml_exp_f32",
5561
"_ocml_exp_f64");
5662

5763
ConversionTarget target(getContext());
5864
target.addLegalDialect<LLVM::LLVMDialect, ROCDL::ROCDLDialect>();
59-
target.addIllegalOp<LLVM::ExpOp>();
65+
target.addIllegalOp<LLVM::FAbsOp, LLVM::FCeilOp, LLVM::CosOP,
66+
LLVM::ExpOp>();
6067
target.addDynamicallyLegalOp<FuncOp>(
6168
[&](FuncOp op) { return converter.isSignatureLegal(op.getType()); });
6269
if (failed(applyPartialConversion(m, target, patterns, &converter)))

mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,51 @@ module attributes {gpu.kernel_module} {
111111

112112
// -----
113113

114+
module attributes {gpu.kernel_module} {
115+
// CHECK: llvm.func @__nv_fabsf(!llvm.float) -> !llvm.float
116+
// CHECK: llvm.func @__nv_fabs(!llvm.double) -> !llvm.double
117+
// CHECK-LABEL: func @gpu_fabs
118+
func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) {
119+
%result32 = std.absf %arg_f32 : f32
120+
// CHECK: llvm.call @__nv_fabsf(%{{.*}}) : (!llvm.float) -> !llvm.float
121+
%result64 = std.absf %arg_f64 : f64
122+
// CHECK: llvm.call @__nv_fabs(%{{.*}}) : (!llvm.double) -> !llvm.double
123+
std.return
124+
}
125+
}
126+
127+
// -----
128+
129+
module attributes {gpu.kernel_module} {
130+
// CHECK: llvm.func @__nv_ceilf(!llvm.float) -> !llvm.float
131+
// CHECK: llvm.func @__nv_ceil(!llvm.double) -> !llvm.double
132+
// CHECK-LABEL: func @gpu_ceil
133+
func @gpu_ceil(%arg_f32 : f32, %arg_f64 : f64) {
134+
%result32 = std.ceilf %arg_f32 : f32
135+
// CHECK: llvm.call @__nv_ceilf(%{{.*}}) : (!llvm.float) -> !llvm.float
136+
%result64 = std.ceilf %arg_f64 : f64
137+
// CHECK: llvm.call @__nv_ceil(%{{.*}}) : (!llvm.double) -> !llvm.double
138+
std.return
139+
}
140+
}
141+
142+
// -----
143+
144+
module attributes {gpu.kernel_module} {
145+
// CHECK: llvm.func @__nv_cosf(!llvm.float) -> !llvm.float
146+
// CHECK: llvm.func @__nv_cos(!llvm.double) -> !llvm.double
147+
// CHECK-LABEL: func @gpu_cos
148+
func @gpu_cos(%arg_f32 : f32, %arg_f64 : f64) {
149+
%result32 = std.cos %arg_f32 : f32
150+
// CHECK: llvm.call @__nv_cosf(%{{.*}}) : (!llvm.float) -> !llvm.float
151+
%result64 = std.cos %arg_f64 : f64
152+
// CHECK: llvm.call @__nv_cos(%{{.*}}) : (!llvm.double) -> !llvm.double
153+
std.return
154+
}
155+
}
156+
157+
// -----
158+
114159
module attributes {gpu.kernel_module} {
115160
// CHECK: llvm.func @__nv_expf(!llvm.float) -> !llvm.float
116161
// CHECK: llvm.func @__nv_exp(!llvm.double) -> !llvm.double

mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,51 @@ module attributes {gpu.kernel_module} {
3838

3939
// -----
4040

41+
module attributes {gpu.kernel_module} {
42+
// CHECK: llvm.func @_ocml_fabs_f32(!llvm.float) -> !llvm.float
43+
// CHECK: llvm.func @_ocml_fabs_f64(!llvm.double) -> !llvm.double
44+
// CHECK-LABEL: func @gpu_fabs
45+
func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) {
46+
%result32 = std.absf %arg_f32 : f32
47+
// CHECK: llvm.call @_ocml_fabs_f32(%{{.*}}) : (!llvm.float) -> !llvm.float
48+
%result64 = std.absf %arg_f64 : f64
49+
// CHECK: llvm.call @_ocml_fabs_f64(%{{.*}}) : (!llvm.double) -> !llvm.double
50+
std.return
51+
}
52+
}
53+
54+
// -----
55+
56+
module attributes {gpu.kernel_module} {
57+
// CHECK: llvm.func @_ocml_ceil_f32(!llvm.float) -> !llvm.float
58+
// CHECK: llvm.func @_ocml_ceil_f64(!llvm.double) -> !llvm.double
59+
// CHECK-LABEL: func @gpu_ceil
60+
func @gpu_ceil(%arg_f32 : f32, %arg_f64 : f64) {
61+
%result32 = std.ceilf %arg_f32 : f32
62+
// CHECK: llvm.call @_ocml_ceil_f32(%{{.*}}) : (!llvm.float) -> !llvm.float
63+
%result64 = std.ceilf %arg_f64 : f64
64+
// CHECK: llvm.call @_ocml_ceil_f64(%{{.*}}) : (!llvm.double) -> !llvm.double
65+
std.return
66+
}
67+
}
68+
69+
// -----
70+
71+
module attributes {gpu.kernel_module} {
72+
// CHECK: llvm.func @_ocml_cos_f32(!llvm.float) -> !llvm.float
73+
// CHECK: llvm.func @_ocml_cos_f64(!llvm.double) -> !llvm.double
74+
// CHECK-LABEL: func @gpu_cos
75+
func @gpu_cos(%arg_f32 : f32, %arg_f64 : f64) {
76+
%result32 = std.cos %arg_f32 : f32
77+
// CHECK: llvm.call @_ocml_cos_f32(%{{.*}}) : (!llvm.float) -> !llvm.float
78+
%result64 = std.cos %arg_f64 : f64
79+
// CHECK: llvm.call @_ocml_cos_f64(%{{.*}}) : (!llvm.double) -> !llvm.double
80+
std.return
81+
}
82+
}
83+
84+
// -----
85+
4186
module attributes {gpu.kernel_module} {
4287
// CHECK: llvm.func @_ocml_exp_f32(!llvm.float) -> !llvm.float
4388
// CHECK: llvm.func @_ocml_exp_f64(!llvm.double) -> !llvm.double

0 commit comments

Comments
 (0)