Skip to content

Commit 2e667d0

Browse files
committed
[FPEnv][SystemZ] Platform-specific builtin constrained FP enablement
When constrained floating point is enabled the SystemZ-specific builtins don't use constrained intrinsics in some cases. Fix that. Differential Revision: https://reviews.llvm.org/D72722
1 parent 24d7a09 commit 2e667d0

6 files changed

+1171
-26
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 78 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -13310,45 +13310,71 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
1331013310
case SystemZ::BI__builtin_s390_vfsqdb: {
1331113311
llvm::Type *ResultType = ConvertType(E->getType());
1331213312
Value *X = EmitScalarExpr(E->getArg(0));
13313-
Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
13314-
return Builder.CreateCall(F, X);
13313+
if (Builder.getIsFPConstrained()) {
13314+
Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType);
13315+
return Builder.CreateConstrainedFPCall(F, { X });
13316+
} else {
13317+
Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
13318+
return Builder.CreateCall(F, X);
13319+
}
1331513320
}
1331613321
case SystemZ::BI__builtin_s390_vfmasb:
1331713322
case SystemZ::BI__builtin_s390_vfmadb: {
1331813323
llvm::Type *ResultType = ConvertType(E->getType());
1331913324
Value *X = EmitScalarExpr(E->getArg(0));
1332013325
Value *Y = EmitScalarExpr(E->getArg(1));
1332113326
Value *Z = EmitScalarExpr(E->getArg(2));
13322-
Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
13323-
return Builder.CreateCall(F, {X, Y, Z});
13327+
if (Builder.getIsFPConstrained()) {
13328+
Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
13329+
return Builder.CreateConstrainedFPCall(F, {X, Y, Z});
13330+
} else {
13331+
Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
13332+
return Builder.CreateCall(F, {X, Y, Z});
13333+
}
1332413334
}
1332513335
case SystemZ::BI__builtin_s390_vfmssb:
1332613336
case SystemZ::BI__builtin_s390_vfmsdb: {
1332713337
llvm::Type *ResultType = ConvertType(E->getType());
1332813338
Value *X = EmitScalarExpr(E->getArg(0));
1332913339
Value *Y = EmitScalarExpr(E->getArg(1));
1333013340
Value *Z = EmitScalarExpr(E->getArg(2));
13331-
Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
13332-
return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
13341+
if (Builder.getIsFPConstrained()) {
13342+
Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
13343+
return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
13344+
} else {
13345+
Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
13346+
return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
13347+
}
1333313348
}
1333413349
case SystemZ::BI__builtin_s390_vfnmasb:
1333513350
case SystemZ::BI__builtin_s390_vfnmadb: {
1333613351
llvm::Type *ResultType = ConvertType(E->getType());
1333713352
Value *X = EmitScalarExpr(E->getArg(0));
1333813353
Value *Y = EmitScalarExpr(E->getArg(1));
1333913354
Value *Z = EmitScalarExpr(E->getArg(2));
13340-
Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
13341-
return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
13355+
if (Builder.getIsFPConstrained()) {
13356+
Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
13357+
return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");
13358+
} else {
13359+
Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
13360+
return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
13361+
}
1334213362
}
1334313363
case SystemZ::BI__builtin_s390_vfnmssb:
1334413364
case SystemZ::BI__builtin_s390_vfnmsdb: {
1334513365
llvm::Type *ResultType = ConvertType(E->getType());
1334613366
Value *X = EmitScalarExpr(E->getArg(0));
1334713367
Value *Y = EmitScalarExpr(E->getArg(1));
1334813368
Value *Z = EmitScalarExpr(E->getArg(2));
13349-
Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
13350-
Value *NegZ = Builder.CreateFNeg(Z, "neg");
13351-
return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));
13369+
if (Builder.getIsFPConstrained()) {
13370+
Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);
13371+
Value *NegZ = Builder.CreateFNeg(Z, "sub");
13372+
return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ}));
13373+
} else {
13374+
Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
13375+
Value *NegZ = Builder.CreateFNeg(Z, "neg");
13376+
return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));
13377+
}
1335213378
}
1335313379
case SystemZ::BI__builtin_s390_vflpsb:
1335413380
case SystemZ::BI__builtin_s390_vflpdb: {
@@ -13377,30 +13403,42 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
1337713403
// Check whether this instance can be represented via a LLVM standard
1337813404
// intrinsic. We only support some combinations of M4 and M5.
1337913405
Intrinsic::ID ID = Intrinsic::not_intrinsic;
13406+
Intrinsic::ID CI;
1338013407
switch (M4.getZExtValue()) {
1338113408
default: break;
1338213409
case 0: // IEEE-inexact exception allowed
1338313410
switch (M5.getZExtValue()) {
1338413411
default: break;
13385-
case 0: ID = Intrinsic::rint; break;
13412+
case 0: ID = Intrinsic::rint;
13413+
CI = Intrinsic::experimental_constrained_rint; break;
1338613414
}
1338713415
break;
1338813416
case 4: // IEEE-inexact exception suppressed
1338913417
switch (M5.getZExtValue()) {
1339013418
default: break;
13391-
case 0: ID = Intrinsic::nearbyint; break;
13392-
case 1: ID = Intrinsic::round; break;
13393-
case 5: ID = Intrinsic::trunc; break;
13394-
case 6: ID = Intrinsic::ceil; break;
13395-
case 7: ID = Intrinsic::floor; break;
13419+
case 0: ID = Intrinsic::nearbyint;
13420+
CI = Intrinsic::experimental_constrained_nearbyint; break;
13421+
case 1: ID = Intrinsic::round;
13422+
CI = Intrinsic::experimental_constrained_round; break;
13423+
case 5: ID = Intrinsic::trunc;
13424+
CI = Intrinsic::experimental_constrained_trunc; break;
13425+
case 6: ID = Intrinsic::ceil;
13426+
CI = Intrinsic::experimental_constrained_ceil; break;
13427+
case 7: ID = Intrinsic::floor;
13428+
CI = Intrinsic::experimental_constrained_floor; break;
1339613429
}
1339713430
break;
1339813431
}
1339913432
if (ID != Intrinsic::not_intrinsic) {
13400-
Function *F = CGM.getIntrinsic(ID, ResultType);
13401-
return Builder.CreateCall(F, X);
13433+
if (Builder.getIsFPConstrained()) {
13434+
Function *F = CGM.getIntrinsic(CI, ResultType);
13435+
return Builder.CreateConstrainedFPCall(F, X);
13436+
} else {
13437+
Function *F = CGM.getIntrinsic(ID, ResultType);
13438+
return Builder.CreateCall(F, X);
13439+
}
1340213440
}
13403-
switch (BuiltinID) {
13441+
switch (BuiltinID) { // FIXME: constrained version?
1340413442
case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
1340513443
case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
1340613444
default: llvm_unreachable("Unknown BuiltinID");
@@ -13423,13 +13461,20 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
1342313461
// Check whether this instance can be represented via a LLVM standard
1342413462
// intrinsic. We only support some values of M4.
1342513463
Intrinsic::ID ID = Intrinsic::not_intrinsic;
13464+
Intrinsic::ID CI;
1342613465
switch (M4.getZExtValue()) {
1342713466
default: break;
13428-
case 4: ID = Intrinsic::maxnum; break;
13467+
case 4: ID = Intrinsic::maxnum;
13468+
CI = Intrinsic::experimental_constrained_maxnum; break;
1342913469
}
1343013470
if (ID != Intrinsic::not_intrinsic) {
13431-
Function *F = CGM.getIntrinsic(ID, ResultType);
13432-
return Builder.CreateCall(F, {X, Y});
13471+
if (Builder.getIsFPConstrained()) {
13472+
Function *F = CGM.getIntrinsic(CI, ResultType);
13473+
return Builder.CreateConstrainedFPCall(F, {X, Y});
13474+
} else {
13475+
Function *F = CGM.getIntrinsic(ID, ResultType);
13476+
return Builder.CreateCall(F, {X, Y});
13477+
}
1343313478
}
1343413479
switch (BuiltinID) {
1343513480
case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
@@ -13453,13 +13498,20 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
1345313498
// Check whether this instance can be represented via a LLVM standard
1345413499
// intrinsic. We only support some values of M4.
1345513500
Intrinsic::ID ID = Intrinsic::not_intrinsic;
13501+
Intrinsic::ID CI;
1345613502
switch (M4.getZExtValue()) {
1345713503
default: break;
13458-
case 4: ID = Intrinsic::minnum; break;
13504+
case 4: ID = Intrinsic::minnum;
13505+
CI = Intrinsic::experimental_constrained_minnum; break;
1345913506
}
1346013507
if (ID != Intrinsic::not_intrinsic) {
13461-
Function *F = CGM.getIntrinsic(ID, ResultType);
13462-
return Builder.CreateCall(F, {X, Y});
13508+
if (Builder.getIsFPConstrained()) {
13509+
Function *F = CGM.getIntrinsic(CI, ResultType);
13510+
return Builder.CreateConstrainedFPCall(F, {X, Y});
13511+
} else {
13512+
Function *F = CGM.getIntrinsic(ID, ResultType);
13513+
return Builder.CreateCall(F, {X, Y});
13514+
}
1346313515
}
1346413516
switch (BuiltinID) {
1346513517
case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
// REQUIRES: systemz-registered-target
2+
// RUN: %clang_cc1 -target-cpu z13 -triple s390x-ibm-linux -flax-vector-conversions=none \
3+
// RUN: -ffp-exception-behavior=strict -Wall -Wno-unused -Werror -emit-llvm %s -o - | FileCheck %s
4+
5+
typedef __attribute__((vector_size(16))) signed long long vec_slong;
6+
typedef __attribute__((vector_size(16))) double vec_double;
7+
8+
volatile vec_slong vsl;
9+
volatile vec_double vd;
10+
11+
int cc;
12+
13+
void test_float(void) {
14+
vsl = __builtin_s390_vfcedbs(vd, vd, &cc);
15+
// CHECK: call { <2 x i64>, i32 } @llvm.s390.vfcedbs(<2 x double> %{{.*}}, <2 x double> %{{.*}})
16+
vsl = __builtin_s390_vfchdbs(vd, vd, &cc);
17+
// CHECK: call { <2 x i64>, i32 } @llvm.s390.vfchdbs(<2 x double> %{{.*}}, <2 x double> %{{.*}})
18+
vsl = __builtin_s390_vfchedbs(vd, vd, &cc);
19+
// CHECK: call { <2 x i64>, i32 } @llvm.s390.vfchedbs(<2 x double> %{{.*}}, <2 x double> %{{.*}})
20+
21+
vsl = __builtin_s390_vftcidb(vd, 0, &cc);
22+
// CHECK: call { <2 x i64>, i32 } @llvm.s390.vftcidb(<2 x double> %{{.*}}, i32 0)
23+
vsl = __builtin_s390_vftcidb(vd, 4095, &cc);
24+
// CHECK: call { <2 x i64>, i32 } @llvm.s390.vftcidb(<2 x double> %{{.*}}, i32 4095)
25+
26+
vd = __builtin_s390_vfsqdb(vd);
27+
// CHECK: call <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double> %{{.*}})
28+
29+
vd = __builtin_s390_vfmadb(vd, vd, vd);
30+
// CHECK: call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
31+
vd = __builtin_s390_vfmsdb(vd, vd, vd);
32+
// CHECK: [[NEG:%[^ ]+]] = fneg <2 x double> {{.*}}
33+
// CHECK: call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]], {{.*}})
34+
35+
vd = __builtin_s390_vflpdb(vd);
36+
// CHECK: call <2 x double> @llvm.fabs.v2f64(<2 x double> %{{.*}})
37+
vd = __builtin_s390_vflndb(vd);
38+
// CHECK: [[ABS:%[^ ]+]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> %{{.*}})
39+
// CHECK: fneg <2 x double> [[ABS]]
40+
41+
vd = __builtin_s390_vfidb(vd, 0, 0);
42+
// CHECK: call <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double> %{{.*}})
43+
vd = __builtin_s390_vfidb(vd, 4, 0);
44+
// CHECK: call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double> %{{.*}})
45+
vd = __builtin_s390_vfidb(vd, 4, 1);
46+
// CHECK: call <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double> %{{.*}})
47+
vd = __builtin_s390_vfidb(vd, 4, 5);
48+
// CHECK: call <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double> %{{.*}})
49+
vd = __builtin_s390_vfidb(vd, 4, 6);
50+
// CHECK: call <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double> %{{.*}})
51+
vd = __builtin_s390_vfidb(vd, 4, 7);
52+
// CHECK: call <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double> %{{.*}})
53+
vd = __builtin_s390_vfidb(vd, 4, 4);
54+
// CHECK: call <2 x double> @llvm.s390.vfidb(<2 x double> %{{.*}}, i32 4, i32 4)
55+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// REQUIRES: systemz-registered-target
2+
// RUN: %clang_cc1 -target-cpu z14 -triple s390x-ibm-linux -flax-vector-conversions=none \
3+
// RUN: -ffp-exception-behavior=strict -Wall -Wno-unused -Werror -emit-llvm %s -o - | FileCheck %s
4+
5+
typedef __attribute__((vector_size(16))) double vec_double;
6+
typedef __attribute__((vector_size(16))) float vec_float;
7+
8+
volatile vec_double vd;
9+
volatile vec_float vf;
10+
11+
void test_float(void) {
12+
vd = __builtin_s390_vfmaxdb(vd, vd, 4);
13+
// CHECK: call <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}})
14+
15+
vd = __builtin_s390_vfmindb(vd, vd, 4);
16+
// CHECK: call <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}})
17+
vd = __builtin_s390_vfmindb(vd, vd, 0);
18+
19+
vd = __builtin_s390_vfnmadb(vd, vd, vd);
20+
// CHECK: [[RES:%[^ ]+]] = call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
21+
// CHECK: fneg <2 x double> [[RES]]
22+
23+
vd = __builtin_s390_vfnmsdb(vd, vd, vd);
24+
// CHECK: [[NEG:%[^ ]+]] = fneg <2 x double> {{.*}}
25+
// CHECK: [[RES:%[^ ]+]] = call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]], metadata !{{.*}})
26+
// CHECK: fneg <2 x double> [[RES]]
27+
28+
vf = __builtin_s390_vfmaxsb(vf, vf, 4);
29+
// CHECK: call <4 x float> @llvm.experimental.constrained.maxnum.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !{{.*}})
30+
31+
vf = __builtin_s390_vfminsb(vf, vf, 4);
32+
// CHECK: call <4 x float> @llvm.experimental.constrained.minnum.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !{{.*}})
33+
34+
vf = __builtin_s390_vfsqsb(vf);
35+
// CHECK: call <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float> %{{.*}}, metadata !{{.*}})
36+
37+
vf = __builtin_s390_vfmasb(vf, vf, vf);
38+
// CHECK: call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !{{.*}})
39+
vf = __builtin_s390_vfmssb(vf, vf, vf);
40+
// CHECK: [[NEG:%[^ ]+]] = fneg <4 x float> %{{.*}}
41+
// CHECK: call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]], metadata !{{.*}})
42+
vf = __builtin_s390_vfnmasb(vf, vf, vf);
43+
// CHECK: [[RES:%[^ ]+]] = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !{{.*}})
44+
// CHECK: fneg <4 x float> [[RES]]
45+
vf = __builtin_s390_vfnmssb(vf, vf, vf);
46+
// CHECK: [[NEG:%[^ ]+]] = fneg <4 x float> %{{.*}}
47+
// CHECK: [[RES:%[^ ]+]] = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]], metadata !{{.*}})
48+
// CHECK: fneg <4 x float> [[RES]]
49+
50+
vf = __builtin_s390_vflpsb(vf);
51+
// CHECK: call <4 x float> @llvm.fabs.v4f32(<4 x float> %{{.*}})
52+
vf = __builtin_s390_vflnsb(vf);
53+
// CHECK: [[ABS:%[^ ]+]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> %{{.*}})
54+
// CHECK: fneg <4 x float> [[ABS]]
55+
56+
vf = __builtin_s390_vfisb(vf, 0, 0);
57+
// CHECK: call <4 x float> @llvm.experimental.constrained.rint.v4f32(<4 x float> %{{.*}}, metadata !{{.*}})
58+
vf = __builtin_s390_vfisb(vf, 4, 0);
59+
// CHECK: call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float> %{{.*}}, metadata !{{.*}})
60+
vf = __builtin_s390_vfisb(vf, 4, 1);
61+
// CHECK: call <4 x float> @llvm.experimental.constrained.round.v4f32(<4 x float> %{{.*}}, metadata !{{.*}})
62+
vf = __builtin_s390_vfisb(vf, 4, 5);
63+
// CHECK: call <4 x float> @llvm.experimental.constrained.trunc.v4f32(<4 x float> %{{.*}}, metadata !{{.*}})
64+
vf = __builtin_s390_vfisb(vf, 4, 6);
65+
// CHECK: call <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x float> %{{.*}}, metadata !{{.*}})
66+
vf = __builtin_s390_vfisb(vf, 4, 7);
67+
// CHECK: call <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float> %{{.*}}, metadata !{{.*}})
68+
}
69+

0 commit comments

Comments
 (0)