Skip to content

Conversation

AZero13
Copy link
Contributor

@AZero13 AZero13 commented Aug 14, 2025

This should be the peephole's job. Because and sets V flag to 0, this is why signed comparisons with 0 are okay to replace with tst. Note this is only for AArch64, because ANDS on ARM leaves the V flag the same.

Fixes: #154387

@llvmbot
Copy link
Member

llvmbot commented Aug 14, 2025

@llvm/pr-subscribers-backend-aarch64

Author: AZero13 (AZero13)

Changes

This should be the peephole's job. Because and sets V flag to 0, this is why signed comparisons with 0 are okay to replace with tst. Note this is only for AArch64, because ANDS on ARM leaves the V flag the same.


Full diff: https://github.com/llvm/llvm-project/pull/153608.diff

3 Files Affected:

  • (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.cpp (+75-1)
  • (modified) llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp (+20)
  • (added) llvm/test/CodeGen/AArch64/arm64-regress-opt-cmp-signed.mir (+41)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index a55f103bff385..1d909600413aa 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1433,6 +1433,34 @@ static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
     return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
   case AArch64::SUBSXrx:
     return AArch64::SUBXrx;
+  case AArch64::ANDSWri:
+    return AArch64::ANDWri;
+  case AArch64::ANDSWrr:
+    return AArch64::ANDWrr;
+  case AArch64::ANDSWrs:
+    return AArch64::ANDWrs;
+  case AArch64::ANDSXri:
+    return AArch64::ANDXri;
+  case AArch64::ANDSXrr:
+    return AArch64::ANDXrr;
+  case AArch64::ANDSXrs:
+    return AArch64::ANDXrs;
+  case AArch64::BICSWrr:
+    return AArch64::BICWrr;
+  case AArch64::BICSXrr:
+    return AArch64::BICXrr;
+  case AArch64::BICSWrs:
+    return AArch64::BICWrs;
+  case AArch64::BICSXrs:
+    return AArch64::BICXrs;
+  case AArch64::SBCSXr:
+    return AArch64::SBCXr;
+  case AArch64::SBCSWr:
+    return AArch64::SBCWr;
+  case AArch64::ADCSXr:
+    return AArch64::ADCXr;
+  case AArch64::ADCSWr:
+    return AArch64::ADCWr;
   }
 }
 
@@ -1717,6 +1745,16 @@ static unsigned sForm(MachineInstr &Instr) {
   case AArch64::SUBSWri:
   case AArch64::SUBSXrr:
   case AArch64::SUBSXri:
+  case AArch64::ANDSWri:
+  case AArch64::ANDSWrr:
+  case AArch64::ANDSWrs:
+  case AArch64::ANDSXri:
+  case AArch64::ANDSXrr:
+  case AArch64::ANDSXrs:
+  case AArch64::BICSWrr:
+  case AArch64::BICSXrr:
+  case AArch64::BICSWrs:
+  case AArch64::BICSXrs:
     return Instr.getOpcode();
 
   case AArch64::ADDWrr:
@@ -1747,6 +1785,22 @@ static unsigned sForm(MachineInstr &Instr) {
     return AArch64::ANDSWri;
   case AArch64::ANDXri:
     return AArch64::ANDSXri;
+  case AArch64::ANDWrr:
+    return AArch64::ANDSWrr;
+  case AArch64::ANDWrs:
+    return AArch64::ANDSWrs;
+  case AArch64::ANDXrr:
+    return AArch64::ANDSXrr;
+  case AArch64::ANDXrs:
+    return AArch64::ANDSXrs;
+  case AArch64::BICWrr:
+    return AArch64::BICSWrr;
+  case AArch64::BICXrr:
+    return AArch64::BICSXrr;
+  case AArch64::BICWrs:
+    return AArch64::BICSWrs;
+  case AArch64::BICXrs:
+    return AArch64::BICSXrs;
   }
 }
 
@@ -1884,6 +1938,26 @@ static bool isSUBSRegImm(unsigned Opcode) {
   return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
 }
 
+
+static bool isANDOpcode(MachineInstr &MI) {
+  unsigned Opc = sForm(MI);
+  switch (Opc) {
+  case AArch64::ANDSWri:
+  case AArch64::ANDSWrr:
+  case AArch64::ANDSWrs:
+  case AArch64::ANDSXri:
+  case AArch64::ANDSXrr:
+  case AArch64::ANDSXrs:
+  case AArch64::BICSWrr:
+  case AArch64::BICSXrr:
+  case AArch64::BICSWrs:
+  case AArch64::BICSXrs:
+    return true;
+  default:
+    return false;
+  }
+}
+
 /// Check if CmpInstr can be substituted by MI.
 ///
 /// CmpInstr can be substituted:
@@ -1921,7 +1995,7 @@ static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
   // 1) MI and CmpInstr set N and V to the same value.
   // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
   //    signed overflow occurs, so CmpInstr could still be simplified away.
-  if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
+  if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap) && !isANDOpcode(MI))
     return false;
 
   AccessKind AccessToCheck = AK_Write;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
index 4bd025da636ca..318346c2c8aec 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
@@ -87,6 +87,26 @@ unsigned getNonFlagSettingVariant(unsigned Opc) {
     return AArch64::ADDXri;
   case AArch64::ADDSWri:
     return AArch64::ADDWri;
+  case AArch64::ANDSWri:
+    return AArch64::ANDWri;
+  case AArch64::ANDSWrr:
+    return AArch64::ANDWrr;
+  case AArch64::ANDSWrs:
+    return AArch64::ANDWrs;
+  case AArch64::ANDSXri:
+    return AArch64::ANDXri;
+  case AArch64::ANDSXrr:
+    return AArch64::ANDXrr;
+  case AArch64::ANDSXrs:
+    return AArch64::ANDXrs;
+  case AArch64::BICSWrr:
+    return AArch64::BICWrr;
+  case AArch64::BICSXrr:
+    return AArch64::BICXrr;
+  case AArch64::BICSWrs:
+    return AArch64::BICWrs;
+  case AArch64::BICSXrs:
+    return AArch64::BICXrs;
   case AArch64::SBCSXr:
     return AArch64::SBCXr;
   case AArch64::SBCSWr:
diff --git a/llvm/test/CodeGen/AArch64/arm64-regress-opt-cmp-signed.mir b/llvm/test/CodeGen/AArch64/arm64-regress-opt-cmp-signed.mir
new file mode 100644
index 0000000000000..27a8f91f30a8b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-regress-opt-cmp-signed.mir
@@ -0,0 +1,41 @@
+# RUN: llc -mtriple=aarch64-linux-gnu -run-pass peephole-opt -o - %s | FileCheck %s
+# CHECK: %1:gpr32common = ANDSWri {{.*}}
+# CHECK-NOT: $wzr = SUBSWri {{.*}}
+--- |
+  define i32 @test01() nounwind {
+  entry:
+    %0 = select i1 true, i32 1, i32 0
+    %1 = and i32 %0, 65535
+    %2 = icmp sgt i32 %1, 0
+    br i1 %2, label %if.then, label %if.end
+
+  if.then:                                      ; preds = %entry
+    ret i32 1
+
+  if.end:                                       ; preds = %entry
+    ret i32 0
+  }
+...
+---
+name:            test01
+registers:
+  - { id: 0, class: gpr32 }
+  - { id: 1, class: gpr32common }
+body:             |
+  bb.0.entry:
+    successors: %bb.2.if.end, %bb.1.if.then
+
+    %0 = MOVi32imm 1
+    %1 = ANDWri killed %1, 15
+    $wzr = SUBSWri killed %1, 0, 0, implicit-def $nzcv
+    Bcc 12, %bb.2.if.end, implicit $nzcv
+
+  bb.1.if.then:
+    $w0 = MOVi32imm 1
+    RET_ReallyLR implicit $w0
+
+  bb.2.if.end:
+    $w0 = MOVi32imm 0
+    RET_ReallyLR implicit $w0
+
+...

Copy link

github-actions bot commented Aug 14, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@AZero13 AZero13 force-pushed the ede branch 3 times, most recently from c1453eb to 38d4b65 Compare August 15, 2025 22:56
@AZero13
Copy link
Contributor Author

AZero13 commented Aug 18, 2025

@davemgreen what do you think about this?

@AZero13
Copy link
Contributor Author

AZero13 commented Aug 19, 2025

@MacDue Ping?

@MacDue
Copy link
Member

MacDue commented Aug 21, 2025

I think the utility of your change would be more apparent if you pre-comitted some tests. IIUC this is folding things like:

        and     w8, w9, w8
        cmp     w8, #0

to

tst w9, w8

?

@AZero13
Copy link
Contributor Author

AZero13 commented Aug 21, 2025

I think the utility of your change would be more apparent if you pre-comitted some tests. IIUC this is folding things like:


        and     w8, w9, w8

        cmp     w8, #0

to


tst w9, w8

?

Yes exactly that's the point

@AZero13
Copy link
Contributor Author

AZero13 commented Aug 22, 2025

@MacDue Done!

This should be the peephole's job. Because and sets V flag to 0, this is why signed comparisons with 0 are okay to replace with tst. Note this is only for AArch64, because ANDS on ARM leaves the V flag the same.

Fixes: llvm#154387
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

[AArch64] AND optimization does not work if there is a freeze between AND and the cmp
3 participants