@@ -6732,7 +6732,7 @@ static bool getTargetShuffleAndZeroables(SDValue N, SmallVectorImpl<int> &Mask,
6732
6732
V1 = peekThroughBitcasts(V1);
6733
6733
V2 = peekThroughBitcasts(V2);
6734
6734
6735
- assert((VT.getSizeInBits() % Mask.size() ) == 0 &&
6735
+ assert((VT.getSizeInBits() % Size ) == 0 &&
6736
6736
"Illegal split of shuffle value type");
6737
6737
unsigned EltSizeInBits = VT.getSizeInBits() / Size;
6738
6738
@@ -10423,24 +10423,31 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, const SDLoc &DL,
10423
10423
/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
10424
10424
/// as many lanes with this technique as possible to simplify the remaining
10425
10425
/// shuffle.
10426
- static APInt computeZeroableShuffleElements(ArrayRef<int> Mask,
10427
- SDValue V1, SDValue V2) {
10428
- APInt Zeroable(Mask.size(), 0);
10426
+ static void computeZeroableShuffleElements(ArrayRef<int> Mask,
10427
+ SDValue V1, SDValue V2,
10428
+ APInt &KnownUndef, APInt &KnownZero) {
10429
+ int Size = Mask.size();
10430
+ KnownUndef = KnownZero = APInt::getNullValue(Size);
10431
+
10429
10432
V1 = peekThroughBitcasts(V1);
10430
10433
V2 = peekThroughBitcasts(V2);
10431
10434
10432
10435
bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
10433
10436
bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
10434
10437
10435
10438
int VectorSizeInBits = V1.getValueSizeInBits();
10436
- int ScalarSizeInBits = VectorSizeInBits / Mask.size() ;
10439
+ int ScalarSizeInBits = VectorSizeInBits / Size ;
10437
10440
assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
10438
10441
10439
- for (int i = 0, Size = Mask.size() ; i < Size; ++i) {
10442
+ for (int i = 0; i < Size; ++i) {
10440
10443
int M = Mask[i];
10441
10444
// Handle the easy cases.
10442
- if (M < 0 || (M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
10443
- Zeroable.setBit(i);
10445
+ if (M < 0) {
10446
+ KnownUndef.setBit(i);
10447
+ continue;
10448
+ }
10449
+ if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
10450
+ KnownZero.setBit(i);
10444
10451
continue;
10445
10452
}
10446
10453
@@ -10457,20 +10464,20 @@ static APInt computeZeroableShuffleElements(ArrayRef<int> Mask,
10457
10464
if ((Size % V.getNumOperands()) == 0) {
10458
10465
int Scale = Size / V->getNumOperands();
10459
10466
SDValue Op = V.getOperand(M / Scale);
10460
- if (Op.isUndef() || X86::isZeroNode(Op))
10461
- Zeroable.setBit(i);
10467
+ if (Op.isUndef())
10468
+ KnownUndef.setBit(i);
10469
+ if (X86::isZeroNode(Op))
10470
+ KnownZero.setBit(i);
10462
10471
else if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
10463
10472
APInt Val = Cst->getAPIntValue();
10464
- Val.lshrInPlace((M % Scale) * ScalarSizeInBits);
10465
- Val = Val.getLoBits(ScalarSizeInBits);
10473
+ Val = Val.extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits);
10466
10474
if (Val == 0)
10467
- Zeroable .setBit(i);
10475
+ KnownZero .setBit(i);
10468
10476
} else if (ConstantFPSDNode *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
10469
10477
APInt Val = Cst->getValueAPF().bitcastToAPInt();
10470
- Val.lshrInPlace((M % Scale) * ScalarSizeInBits);
10471
- Val = Val.getLoBits(ScalarSizeInBits);
10478
+ Val = Val.extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits);
10472
10479
if (Val == 0)
10473
- Zeroable .setBit(i);
10480
+ KnownZero .setBit(i);
10474
10481
}
10475
10482
continue;
10476
10483
}
@@ -10479,18 +10486,20 @@ static APInt computeZeroableShuffleElements(ArrayRef<int> Mask,
10479
10486
// elements must be UNDEF or ZERO.
10480
10487
if ((V.getNumOperands() % Size) == 0) {
10481
10488
int Scale = V->getNumOperands() / Size;
10482
- bool AllZeroable = true;
10489
+ bool AllUndef = true;
10490
+ bool AllZero = true;
10483
10491
for (int j = 0; j < Scale; ++j) {
10484
10492
SDValue Op = V.getOperand((M * Scale) + j);
10485
- AllZeroable &= (Op.isUndef() || X86::isZeroNode(Op));
10493
+ AllUndef &= Op.isUndef();
10494
+ AllZero &= X86::isZeroNode(Op);
10486
10495
}
10487
- if (AllZeroable)
10488
- Zeroable.setBit(i);
10496
+ if (AllUndef)
10497
+ KnownUndef.setBit(i);
10498
+ if (AllZero)
10499
+ KnownZero.setBit(i);
10489
10500
continue;
10490
10501
}
10491
10502
}
10492
-
10493
- return Zeroable;
10494
10503
}
10495
10504
10496
10505
// The Shuffle result is as follow:
@@ -17077,7 +17086,10 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
17077
17086
// We actually see shuffles that are entirely re-arrangements of a set of
17078
17087
// zero inputs. This mostly happens while decomposing complex shuffles into
17079
17088
// simple ones. Directly lower these as a buildvector of zeros.
17080
- APInt Zeroable = computeZeroableShuffleElements(OrigMask, V1, V2);
17089
+ APInt KnownUndef, KnownZero;
17090
+ computeZeroableShuffleElements(OrigMask, V1, V2, KnownUndef, KnownZero);
17091
+
17092
+ APInt Zeroable = KnownUndef | KnownZero;
17081
17093
if (Zeroable.isAllOnesValue())
17082
17094
return getZeroVector(VT, Subtarget, DAG, DL);
17083
17095
0 commit comments