-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[RISCV] Use XORI/SLLI/ADDI to when materializing select of constants #155845
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Use XORI/SLLI/ADDI to when materializing select of constants #155845
Conversation
This case is the inverse of the one introuced in llvm#155644. The complexity with the inversion is that we need to also invert the condition before shifting it. I had originally planned to only do so when the condition was "cheaply" invertible (i.e. didn't require the xori), but when looking more closely at the diffs I noticed that while the XORI prevents this from being an icount improvement, and actually lengthens slightly the critical path, it does still reduce the number of registers needed.
@llvm/pr-subscribers-backend-risc-v Author: Philip Reames (preames) ChangesThis case is the inverse of the one introduced in #155644. The complexity with the inversion is that we need to also invert the condition before shifting it. I had originally planned to only do so when the condition was "cheaply" invertible (i.e. didn't require the xori), but when looking more closely at the diffs I noticed that while the XORI prevents this from being an icount improvement, and actually lengthens slightly the critical path, it does still reduce the number of registers needed. (This still doesn't tackle the merging the code into the non-zicond path - that's still a future change.) Full diff: https://github.com/llvm/llvm-project/pull/155845.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4c39bcf8494a4..9c268012b24d2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -9287,13 +9287,19 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
}
- // Use SHL/ADDI to avoid having to materialize a constant in register
- // TODO: Handle the inverse case when the condition can be cheaply flipped
+ // Use SHL/ADDI (and possible XORI) to avoid having to materialize
+ // a constant in register
if ((TrueVal - FalseVal).isPowerOf2() && FalseVal.isSignedIntN(12)) {
SDValue Log2 = DAG.getConstant((TrueVal - FalseVal).logBase2(), DL, VT);
SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
return DAG.getNode(ISD::ADD, DL, VT, FalseV, BitDiff);
}
+ if ((FalseVal - TrueVal).isPowerOf2() && TrueVal.isSignedIntN(12)) {
+ SDValue Log2 = DAG.getConstant((FalseVal - TrueVal).logBase2(), DL, VT);
+ CondV = DAG.getLogicalNOT(DL, CondV, CondV->getValueType(0));
+ SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
+ return DAG.getNode(ISD::ADD, DL, VT, TrueV, BitDiff);
+ }
auto getCost = [&](const APInt &Delta, const APInt &Addend) {
const int DeltaCost = RISCVMatInt::getIntMatCost(
diff --git a/llvm/test/CodeGen/RISCV/select-const.ll b/llvm/test/CodeGen/RISCV/select-const.ll
index e838710878d68..3d87b7d18ff56 100644
--- a/llvm/test/CodeGen/RISCV/select-const.ll
+++ b/llvm/test/CodeGen/RISCV/select-const.ll
@@ -85,8 +85,8 @@ define signext i32 @select_const_int_harder(i1 zeroext %a) nounwind {
;
; RV32ZICOND-LABEL: select_const_int_harder:
; RV32ZICOND: # %bb.0:
-; RV32ZICOND-NEXT: li a1, 32
-; RV32ZICOND-NEXT: czero.nez a0, a1, a0
+; RV32ZICOND-NEXT: xori a0, a0, 1
+; RV32ZICOND-NEXT: slli a0, a0, 5
; RV32ZICOND-NEXT: addi a0, a0, 6
; RV32ZICOND-NEXT: ret
;
@@ -112,8 +112,8 @@ define signext i32 @select_const_int_harder(i1 zeroext %a) nounwind {
;
; RV64ZICOND-LABEL: select_const_int_harder:
; RV64ZICOND: # %bb.0:
-; RV64ZICOND-NEXT: li a1, 32
-; RV64ZICOND-NEXT: czero.nez a0, a1, a0
+; RV64ZICOND-NEXT: xori a0, a0, 1
+; RV64ZICOND-NEXT: slli a0, a0, 5
; RV64ZICOND-NEXT: addiw a0, a0, 6
; RV64ZICOND-NEXT: ret
%1 = select i1 %a, i32 6, i32 38
@@ -636,8 +636,7 @@ define i32 @diff_shl_addi(i32 signext %x) {
; RV32ZICOND-LABEL: diff_shl_addi:
; RV32ZICOND: # %bb.0:
; RV32ZICOND-NEXT: srli a0, a0, 31
-; RV32ZICOND-NEXT: lui a1, 4
-; RV32ZICOND-NEXT: czero.eqz a0, a1, a0
+; RV32ZICOND-NEXT: slli a0, a0, 14
; RV32ZICOND-NEXT: addi a0, a0, 25
; RV32ZICOND-NEXT: ret
;
@@ -666,8 +665,7 @@ define i32 @diff_shl_addi(i32 signext %x) {
; RV64ZICOND-LABEL: diff_shl_addi:
; RV64ZICOND: # %bb.0:
; RV64ZICOND-NEXT: srli a0, a0, 63
-; RV64ZICOND-NEXT: lui a1, 4
-; RV64ZICOND-NEXT: czero.eqz a0, a1, a0
+; RV64ZICOND-NEXT: slli a0, a0, 14
; RV64ZICOND-NEXT: addiw a0, a0, 25
; RV64ZICOND-NEXT: ret
%cmp = icmp sgt i32 %x, -1
diff --git a/llvm/test/CodeGen/RISCV/select.ll b/llvm/test/CodeGen/RISCV/select.ll
index c897dd9368248..1e7bb4295938b 100644
--- a/llvm/test/CodeGen/RISCV/select.ll
+++ b/llvm/test/CodeGen/RISCV/select.ll
@@ -1750,15 +1750,15 @@ define i32 @select_cst5(i1 zeroext %cond) {
;
; RV64IMXVTCONDOPS-LABEL: select_cst5:
; RV64IMXVTCONDOPS: # %bb.0:
-; RV64IMXVTCONDOPS-NEXT: li a1, 2
-; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
+; RV64IMXVTCONDOPS-NEXT: xori a0, a0, 1
+; RV64IMXVTCONDOPS-NEXT: slli a0, a0, 1
; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 2047
; RV64IMXVTCONDOPS-NEXT: ret
;
; CHECKZICOND-LABEL: select_cst5:
; CHECKZICOND: # %bb.0:
-; CHECKZICOND-NEXT: li a1, 2
-; CHECKZICOND-NEXT: czero.nez a0, a1, a0
+; CHECKZICOND-NEXT: xori a0, a0, 1
+; CHECKZICOND-NEXT: slli a0, a0, 1
; CHECKZICOND-NEXT: addi a0, a0, 2047
; CHECKZICOND-NEXT: ret
%ret = select i1 %cond, i32 2047, i32 2049
@@ -1826,22 +1826,22 @@ define i32 @select_cst_diff2(i1 zeroext %cond) {
;
; RV64IMXVTCONDOPS-LABEL: select_cst_diff2:
; RV64IMXVTCONDOPS: # %bb.0:
-; RV64IMXVTCONDOPS-NEXT: li a1, 2
-; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
+; RV64IMXVTCONDOPS-NEXT: xori a0, a0, 1
+; RV64IMXVTCONDOPS-NEXT: slli a0, a0, 1
; RV64IMXVTCONDOPS-NEXT: addiw a0, a0, 120
; RV64IMXVTCONDOPS-NEXT: ret
;
; RV32IMZICOND-LABEL: select_cst_diff2:
; RV32IMZICOND: # %bb.0:
-; RV32IMZICOND-NEXT: li a1, 2
-; RV32IMZICOND-NEXT: czero.nez a0, a1, a0
+; RV32IMZICOND-NEXT: xori a0, a0, 1
+; RV32IMZICOND-NEXT: slli a0, a0, 1
; RV32IMZICOND-NEXT: addi a0, a0, 120
; RV32IMZICOND-NEXT: ret
;
; RV64IMZICOND-LABEL: select_cst_diff2:
; RV64IMZICOND: # %bb.0:
-; RV64IMZICOND-NEXT: li a1, 2
-; RV64IMZICOND-NEXT: czero.nez a0, a1, a0
+; RV64IMZICOND-NEXT: xori a0, a0, 1
+; RV64IMZICOND-NEXT: slli a0, a0, 1
; RV64IMZICOND-NEXT: addiw a0, a0, 120
; RV64IMZICOND-NEXT: ret
%ret = select i1 %cond, i32 120, i32 122
@@ -1949,15 +1949,15 @@ define i32 @select_cst_diff4_invert(i1 zeroext %cond) {
;
; RV64IMXVTCONDOPS-LABEL: select_cst_diff4_invert:
; RV64IMXVTCONDOPS: # %bb.0:
-; RV64IMXVTCONDOPS-NEXT: li a1, 4
-; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
+; RV64IMXVTCONDOPS-NEXT: xori a0, a0, 1
+; RV64IMXVTCONDOPS-NEXT: slli a0, a0, 2
; RV64IMXVTCONDOPS-NEXT: addi a0, a0, 6
; RV64IMXVTCONDOPS-NEXT: ret
;
; CHECKZICOND-LABEL: select_cst_diff4_invert:
; CHECKZICOND: # %bb.0:
-; CHECKZICOND-NEXT: li a1, 4
-; CHECKZICOND-NEXT: czero.nez a0, a1, a0
+; CHECKZICOND-NEXT: xori a0, a0, 1
+; CHECKZICOND-NEXT: slli a0, a0, 2
; CHECKZICOND-NEXT: addi a0, a0, 6
; CHECKZICOND-NEXT: ret
%ret = select i1 %cond, i32 6, i32 10
@@ -2029,22 +2029,22 @@ define i32 @select_cst_diff8_invert(i1 zeroext %cond) {
;
; RV64IMXVTCONDOPS-LABEL: select_cst_diff8_invert:
; RV64IMXVTCONDOPS: # %bb.0:
-; RV64IMXVTCONDOPS-NEXT: li a1, 8
-; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
+; RV64IMXVTCONDOPS-NEXT: xori a0, a0, 1
+; RV64IMXVTCONDOPS-NEXT: slli a0, a0, 3
; RV64IMXVTCONDOPS-NEXT: addiw a0, a0, 6
; RV64IMXVTCONDOPS-NEXT: ret
;
; RV32IMZICOND-LABEL: select_cst_diff8_invert:
; RV32IMZICOND: # %bb.0:
-; RV32IMZICOND-NEXT: li a1, 8
-; RV32IMZICOND-NEXT: czero.nez a0, a1, a0
+; RV32IMZICOND-NEXT: xori a0, a0, 1
+; RV32IMZICOND-NEXT: slli a0, a0, 3
; RV32IMZICOND-NEXT: addi a0, a0, 6
; RV32IMZICOND-NEXT: ret
;
; RV64IMZICOND-LABEL: select_cst_diff8_invert:
; RV64IMZICOND: # %bb.0:
-; RV64IMZICOND-NEXT: li a1, 8
-; RV64IMZICOND-NEXT: czero.nez a0, a1, a0
+; RV64IMZICOND-NEXT: xori a0, a0, 1
+; RV64IMZICOND-NEXT: slli a0, a0, 3
; RV64IMZICOND-NEXT: addiw a0, a0, 6
; RV64IMZICOND-NEXT: ret
%ret = select i1 %cond, i32 6, i32 14
@@ -2117,22 +2117,22 @@ define i32 @select_cst_diff1024_invert(i1 zeroext %cond) {
;
; RV64IMXVTCONDOPS-LABEL: select_cst_diff1024_invert:
; RV64IMXVTCONDOPS: # %bb.0:
-; RV64IMXVTCONDOPS-NEXT: li a1, 1024
-; RV64IMXVTCONDOPS-NEXT: vt.maskcn a0, a1, a0
+; RV64IMXVTCONDOPS-NEXT: xori a0, a0, 1
+; RV64IMXVTCONDOPS-NEXT: slli a0, a0, 10
; RV64IMXVTCONDOPS-NEXT: addiw a0, a0, 6
; RV64IMXVTCONDOPS-NEXT: ret
;
; RV32IMZICOND-LABEL: select_cst_diff1024_invert:
; RV32IMZICOND: # %bb.0:
-; RV32IMZICOND-NEXT: li a1, 1024
-; RV32IMZICOND-NEXT: czero.nez a0, a1, a0
+; RV32IMZICOND-NEXT: xori a0, a0, 1
+; RV32IMZICOND-NEXT: slli a0, a0, 10
; RV32IMZICOND-NEXT: addi a0, a0, 6
; RV32IMZICOND-NEXT: ret
;
; RV64IMZICOND-LABEL: select_cst_diff1024_invert:
; RV64IMZICOND: # %bb.0:
-; RV64IMZICOND-NEXT: li a1, 1024
-; RV64IMZICOND-NEXT: czero.nez a0, a1, a0
+; RV64IMZICOND-NEXT: xori a0, a0, 1
+; RV64IMZICOND-NEXT: slli a0, a0, 10
; RV64IMZICOND-NEXT: addiw a0, a0, 6
; RV64IMZICOND-NEXT: ret
%ret = select i1 %cond, i32 6, i32 1030
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This case is the inverse of the one introduced in #155644. The complexity with the inversion is that we need to also invert the condition before shifting it. I had originally planned to only do so when the condition was "cheaply" invertible (i.e. didn't require the xori), but when looking more closely at the diffs I noticed that while the XORI prevents this from being an icount improvement, and actually lengthens slightly the critical path, it does still reduce the number of registers needed.
(This still doesn't tackle the merging the code into the non-zicond path - that's still a future change.)