[HashRecognize] Track visited in ValueEvolution #147812

artagnon · 2025-07-09T19:45:10Z

Require that all Instructions in the Loop are visited by ValueEvolution, as any stray instructions would complicate life for the optimization.

llvmbot · 2025-07-09T19:45:39Z

@llvm/pr-subscribers-llvm-analysis

Author: Ramkumar Ramachandra (artagnon)

Changes

Require that all Instructions in the Loop are visited by ValueEvolution, as any stray instructions would complicate life for the optimization.

Full diff: https://github.com/llvm/llvm-project/pull/147812.diff

2 Files Affected:

(modified) llvm/lib/Analysis/HashRecognize.cpp (+43-9)
(modified) llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll (+29-3)

diff --git a/llvm/lib/Analysis/HashRecognize.cpp b/llvm/lib/Analysis/HashRecognize.cpp
index 2cc3ad5f18482..f032593492287 100644
--- a/llvm/lib/Analysis/HashRecognize.cpp
+++ b/llvm/lib/Analysis/HashRecognize.cpp
@@ -91,6 +91,10 @@ class ValueEvolution {
   APInt GenPoly;
   StringRef ErrStr;
 
+  // A set of instructions visited by ValueEvolution. Anything that's not in the
+  // use-def chain of the PHIs' evolution will be reported as unvisited.
+  SmallPtrSet<const Instruction *, 16> Visited;
+
   // Compute the KnownBits of a BinaryOperator.
   KnownBits computeBinOp(const BinaryOperator *I);
 
@@ -102,15 +106,19 @@ class ValueEvolution {
 
 public:
   // ValueEvolution is meant to be constructed with the TripCount of the loop,
-  // and whether the polynomial algorithm is big-endian, for the significant-bit
-  // check.
-  ValueEvolution(unsigned TripCount, bool ByteOrderSwapped);
+  // whether the polynomial algorithm is big-endian for the significant-bit
+  // check, and an initial value for the Visited set.
+  ValueEvolution(unsigned TripCount, bool ByteOrderSwapped,
+                 ArrayRef<const Instruction *> InitVisited);
 
   // Given a list of PHI nodes along with their incoming value from within the
   // loop, computeEvolutions computes the KnownBits of each of the PHI nodes on
   // the final iteration. Returns true on success and false on error.
   bool computeEvolutions(ArrayRef<PhiStepPair> PhiEvolutions);
 
+  // Query the Visited set.
+  bool isVisited(const Instruction *I) const { return Visited.contains(I); }
+
   // In case ValueEvolution encounters an error, this is meant to be used for a
   // precise error message.
   StringRef getError() const { return ErrStr; }
@@ -120,8 +128,11 @@ class ValueEvolution {
   KnownPhiMap KnownPhis;
 };
 
-ValueEvolution::ValueEvolution(unsigned TripCount, bool ByteOrderSwapped)
-    : TripCount(TripCount), ByteOrderSwapped(ByteOrderSwapped) {}
+ValueEvolution::ValueEvolution(unsigned TripCount, bool ByteOrderSwapped,
+                               ArrayRef<const Instruction *> InitVisited)
+    : TripCount(TripCount), ByteOrderSwapped(ByteOrderSwapped) {
+  Visited.insert_range(InitVisited);
+}
 
 KnownBits ValueEvolution::computeBinOp(const BinaryOperator *I) {
   KnownBits KnownL(compute(I->getOperand(0)));
@@ -177,6 +188,9 @@ KnownBits ValueEvolution::computeBinOp(const BinaryOperator *I) {
 KnownBits ValueEvolution::computeInstr(const Instruction *I) {
   unsigned BitWidth = I->getType()->getScalarSizeInBits();
 
+  // computeInstr is the only entry-point that needs to update the Visited set.
+  Visited.insert(I);
+
   // We look up in the map that contains the KnownBits of the PHI from the
   // previous iteration.
   if (const PHINode *P = dyn_cast<PHINode>(I))
@@ -185,9 +199,14 @@ KnownBits ValueEvolution::computeInstr(const Instruction *I) {
   // Compute the KnownBits for a Select(Cmp()), forcing it to take the branch
   // that is predicated on the (least|most)-significant-bit check.
   CmpPredicate Pred;
-  Value *L, *R, *TV, *FV;
-  if (match(I, m_Select(m_ICmp(Pred, m_Value(L), m_Value(R)), m_Value(TV),
-                        m_Value(FV)))) {
+  Value *L, *R;
+  Instruction *TV, *FV;
+  if (match(I, m_Select(m_ICmp(Pred, m_Value(L), m_Value(R)), m_Instruction(TV),
+                        m_Instruction(FV)))) {
+    Visited.insert(cast<Instruction>(I->getOperand(0)));
+    Visited.insert(TV);
+    Visited.insert(FV);
+
     // We need to check LCR against [0, 2) in the little-endian case, because
     // the RCR check is insufficient: it is simply [0, 1).
     if (!ByteOrderSwapped) {
@@ -209,6 +228,9 @@ KnownBits ValueEvolution::computeInstr(const Instruction *I) {
     ConstantRange CheckRCR(APInt::getZero(ICmpBW),
                            ByteOrderSwapped ? APInt::getSignedMinValue(ICmpBW)
                                             : APInt(ICmpBW, 1));
+
+    // We only compute KnownBits of either TV or FV, as the other value would
+    // just be a bit-shift as checked by isBigEndianBitShift.
     if (AllowedR == CheckRCR)
       return compute(TV);
     if (AllowedR.inverse() == CheckRCR)
@@ -629,11 +651,23 @@ HashRecognize::recognizeCRC() const {
   if (SimpleRecurrence)
     PhiEvolutions.emplace_back(SimpleRecurrence.Phi, SimpleRecurrence.BO);
 
-  ValueEvolution VE(TC, *ByteOrderSwapped);
+  // Initialize the Visited set in ValueEvolution with the IndVar-related
+  // instructions.
+  std::initializer_list<const Instruction *> InitVisited = {
+      IndVar, Latch->getTerminator(), L.getLatchCmpInst(),
+      cast<Instruction>(IndVar->getIncomingValueForBlock(Latch))};
+
+  ValueEvolution VE(TC, *ByteOrderSwapped, InitVisited);
   if (!VE.computeEvolutions(PhiEvolutions))
     return VE.getError();
   KnownBits ResultBits = VE.KnownPhis.at(ConditionalRecurrence.Phi);
 
+  // Any unvisited instructions from the KnownBits propagation can complicate
+  // the optimization, which would just replace the entire loop with the
+  // table-lookup version of the hash algorithm.
+  if (any_of(*Latch, [VE](const Instruction &I) { return !VE.isVisited(&I); }))
+    return "Found stray unvisited instructions";
+
   unsigned N = std::min(TC, ResultBits.getBitWidth());
   auto IsZero = [](const KnownBits &K) { return K.isZero(); };
   if (!checkExtractBits(ResultBits, N, IsZero, *ByteOrderSwapped))
diff --git a/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll b/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll
index 247a105940e6e..3926c467375ed 100644
--- a/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll
+++ b/llvm/test/Analysis/HashRecognize/cyclic-redundancy-check.ll
@@ -909,10 +909,10 @@ exit:                                              ; preds = %loop
   ret i16 %crc.next
 }
 
-define i16 @not.crc.bad.cast(i8 %msg, i16 %checksum) {
-; CHECK-LABEL: 'not.crc.bad.cast'
+define i16 @not.crc.bad.endian.swapped.sb.check(i8 %msg, i16 %checksum) {
+; CHECK-LABEL: 'not.crc.bad.endian.swapped.sb.check'
 ; CHECK-NEXT:  Did not find a hash algorithm
-; CHECK-NEXT:  Reason: Expected bottom 8 bits zero (????????00001011)
+; CHECK-NEXT:  Reason: Found stray unvisited instructions
 ;
 entry:
   br label %loop
@@ -1189,3 +1189,29 @@ loop:                                              ; preds = %loop, %entry
 exit:                                              ; preds = %loop
   ret i16 %crc.next
 }
+
+define i16 @not.crc.stray.unvisited.call(i16 %crc.init) {
+; CHECK-LABEL: 'not.crc.stray.unvisited.call'
+; CHECK-NEXT:  Did not find a hash algorithm
+; CHECK-NEXT:  Reason: Found stray unvisited instructions
+;
+entry:
+  br label %loop
+
+loop:                                              ; preds = %loop, %entry
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ]
+  %crc.shl = shl i16 %crc, 1
+  %crc.xor = xor i16 %crc.shl, 4129
+  %check.sb = icmp slt i16 %crc, 0
+  %crc.next = select i1 %check.sb, i16 %crc.xor, i16 %crc.shl
+  call void @print(i16 %crc.next)
+  %iv.next = add nuw nsw i32 %iv, 1
+  %exit.cond = icmp samesign ult i32 %iv, 7
+  br i1 %exit.cond, label %loop, label %exit
+
+exit:                                              ; preds = %loop
+  ret i16 %crc.next
+}
+
+declare void @print(i16)

pfusik

How about simpler approaches:

Instead of the Visited set, have a ToVisit set initialized with all the loop instructions, erase while visiting and at the end just check if the set is empty.
Instead of a set, only track the number of instructions. This would work if the instructions are visited once - is that the case?

llvm/lib/Analysis/HashRecognize.cpp

artagnon

Instead of the Visited set, have a ToVisit set initialized with all the loop instructions, erase while visiting and at the end just check if the set is empty.

I tried it, and it's exactly equivalent, with minor regression to the elegance of the code (due to missing erase_range, SmallPtrSetImpl stuff), and same computational complexity. I've simplified the check at the end.

Instead of a set, only track the number of instructions. This would work if the instructions are visited once - is that the case?

No, the instructions are visited trip-count times.

llvm/lib/Analysis/HashRecognize.cpp

Require that all Instructions in the Loop are visited by ValueEvolution, as any stray instructions would complicate life for the optimization.

llvm/lib/Analysis/HashRecognize.cpp

pfusik · 2025-07-14T08:56:30Z

Instead of the Visited set, have a ToVisit set initialized with all the loop instructions, erase while visiting and at the end just check if the set is empty.

I tried it, and it's exactly equivalent, with minor regression to the elegance of the code (due to missing erase_range, SmallPtrSetImpl stuff), and same computational complexity. I've simplified the check at the end.

Okay. I wasn't sure what's simpler.

Instead of a set, only track the number of instructions. This would work if the instructions are visited once - is that the case?

No, the instructions are visited trip-count times.

If that's exactly trip-count times per instructions, we could compare against the number multiplied by TC.

llvm/lib/Analysis/HashRecognize.cpp

artagnon · 2025-07-14T09:01:49Z

Instead of a set, only track the number of instructions. This would work if the instructions are visited once - is that the case?

No, the instructions are visited trip-count times.

If that's exactly trip-count times per instructions, we could compare against the number multiplied by TC.

True, but the code would become a lot more cryptic, with various ++NumVisited statements.

pfusik · 2025-07-14T09:06:27Z

If that's exactly trip-count times per instructions, we could compare against the number multiplied by TC.

True, but the code would become a lot more cryptic, with various ++NumVisited statements.

I agree. Let's keep it as-is.

pfusik

This is a step in a good direction to make sure the loop calculates a CRC and is free from side effects.

Require that all Instructions in the Loop are visited by ValueEvolution, as any stray instructions would complicate life for the optimization.

artagnon requested review from nikic and pfusik July 9, 2025 19:45

llvmbot added the llvm:analysis Includes value tracking, cost tables and constant folding label Jul 9, 2025

pfusik requested changes Jul 10, 2025

View reviewed changes

artagnon commented Jul 10, 2025

View reviewed changes

llvm/lib/Analysis/HashRecognize.cpp Outdated Show resolved Hide resolved

llvm/lib/Analysis/HashRecognize.cpp Show resolved Hide resolved

llvm/lib/Analysis/HashRecognize.cpp Outdated Show resolved Hide resolved

artagnon added 4 commits July 10, 2025 23:50

[HashRecognize] Pre-commit test for stray-unvisited

c13d2ca

[HashRecognize] Track visited in ValueEvolution

bc7936e

Require that all Instructions in the Loop are visited by ValueEvolution, as any stray instructions would complicate life for the optimization.

[HashRecognize] Minor simplification (NFC)

3a96e7f

[HashRecognize] NFC simplification

1a4cda3

artagnon force-pushed the hr-visitedset branch from 6ec6823 to 1a4cda3 Compare July 10, 2025 23:24

pfusik reviewed Jul 14, 2025

View reviewed changes

llvm/lib/Analysis/HashRecognize.cpp Outdated Show resolved Hide resolved

pfusik reviewed Jul 14, 2025

View reviewed changes

llvm/lib/Analysis/HashRecognize.cpp Outdated Show resolved Hide resolved

llvm/lib/Analysis/HashRecognize.cpp Show resolved Hide resolved

llvm/lib/Analysis/HashRecognize.cpp Outdated Show resolved Hide resolved

artagnon added 2 commits July 14, 2025 10:11

[HashRecognize] Address review

886b494

[HashRecognize] Add call.sb.check test

30e2e1b

artagnon mentioned this pull request Jul 14, 2025

[HashRecognize] Strip ValueEvolution #148620

Merged

artagnon force-pushed the hr-visitedset branch from c4de450 to 30e2e1b Compare July 14, 2025 12:39

pfusik approved these changes Jul 16, 2025

View reviewed changes

artagnon merged commit 8ef1a0e into llvm:main Jul 16, 2025
9 checks passed

artagnon deleted the hr-visitedset branch July 16, 2025 14:28

This was referenced Jul 23, 2025

test abhinavgaba/llvm-project#2

Closed

Add dataFence plugin interface abhinavgaba/llvm-project#3

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[HashRecognize] Track visited in ValueEvolution #147812

[HashRecognize] Track visited in ValueEvolution #147812

Uh oh!

artagnon commented Jul 9, 2025 •

edited

Loading

Uh oh!

llvmbot commented Jul 9, 2025

Uh oh!

pfusik left a comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

artagnon left a comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

pfusik commented Jul 14, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

artagnon commented Jul 14, 2025

Uh oh!

pfusik commented Jul 14, 2025

Uh oh!

pfusik left a comment

Uh oh!

Uh oh!

Uh oh!

[HashRecognize] Track visited in ValueEvolution #147812

[HashRecognize] Track visited in ValueEvolution #147812

Uh oh!

Conversation

artagnon commented Jul 9, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Jul 9, 2025

Uh oh!

pfusik left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

artagnon left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

pfusik commented Jul 14, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

artagnon commented Jul 14, 2025

Uh oh!

pfusik commented Jul 14, 2025

Uh oh!

pfusik left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

artagnon commented Jul 9, 2025 •

edited

Loading