-
Notifications
You must be signed in to change notification settings - Fork 14.9k
release/21.x: [LoongArch] Fix broadcast load with extension. (#155960) #156384
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@heiher What do you think about merging this PR to the release branch? |
@llvm/pr-subscribers-backend-loongarch Author: None (llvmbot) ChangesBackport 2320529 Requested by: @tangaac Full diff: https://github.com/llvm/llvm-project/pull/156384.diff 3 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 12cf04bbbab56..d6adcf37f06f9 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2430,11 +2430,14 @@ static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp,
}
// make sure that this load is valid and only has one user.
- if (!IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
+ if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
return SDValue();
- if (IsIdeneity) {
- auto *LN = cast<LoadSDNode>(IdentitySrc);
+ auto *LN = cast<LoadSDNode>(IdentitySrc);
+ auto ExtType = LN->getExtensionType();
+
+ if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
+ VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
SDVTList Tys =
LN->isIndexed()
? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll b/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll
index 976924bdca686..6035b8822cef7 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll
@@ -18,6 +18,32 @@ define <4 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst) {
ret <4 x i64> %tmp2
}
+define <16 x i16> @should_not_be_optimized_sext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_sext_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld.b $a0, $a0, 0
+; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0
+; CHECK-NEXT: ret
+ %tmp = load i8, ptr %ptr
+ %tmp1 = sext i8 %tmp to i16
+ %tmp2 = insertelement <16 x i16> zeroinitializer, i16 %tmp1, i32 0
+ %tmp3 = shufflevector <16 x i16> %tmp2, <16 x i16> poison, <16 x i32> zeroinitializer
+ ret <16 x i16> %tmp3
+}
+
+define <16 x i16> @should_not_be_optimized_zext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_zext_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld.bu $a0, $a0, 0
+; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0
+; CHECK-NEXT: ret
+ %tmp = load i8, ptr %ptr
+ %tmp1 = zext i8 %tmp to i16
+ %tmp2 = insertelement <16 x i16> zeroinitializer, i16 %tmp1, i32 0
+ %tmp3 = shufflevector <16 x i16> %tmp2, <16 x i16> poison, <16 x i32> zeroinitializer
+ ret <16 x i16> %tmp3
+}
+
define <4 x i64> @xvldrepl_d_unaligned_offset(ptr %ptr) {
; CHECK-LABEL: xvldrepl_d_unaligned_offset:
; CHECK: # %bb.0:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll b/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
index c46747ef30509..f058acf5e45e5 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
@@ -1,8 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s
-; TODO: Load a element and splat it to a vector could be lowerd to vldrepl
-
; A load has more than one user shouldn't be lowered to vldrepl
define <2 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst){
; CHECK-LABEL: should_not_be_optimized:
@@ -18,6 +16,32 @@ define <2 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst){
ret <2 x i64> %tmp2
}
+define <8 x i16> @should_not_be_optimized_sext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_sext_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld.b $a0, $a0, 0
+; CHECK-NEXT: vreplgr2vr.h $vr0, $a0
+; CHECK-NEXT: ret
+ %tmp = load i8, ptr %ptr
+ %tmp1 = sext i8 %tmp to i16
+ %tmp2 = insertelement <8 x i16> zeroinitializer, i16 %tmp1, i32 0
+ %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> poison, <8 x i32> zeroinitializer
+ ret <8 x i16> %tmp3
+}
+
+define <8 x i16> @should_not_be_optimized_zext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_zext_load:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld.bu $a0, $a0, 0
+; CHECK-NEXT: vreplgr2vr.h $vr0, $a0
+; CHECK-NEXT: ret
+ %tmp = load i8, ptr %ptr
+ %tmp1 = zext i8 %tmp to i16
+ %tmp2 = insertelement <8 x i16> zeroinitializer, i16 %tmp1, i32 0
+ %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> poison, <8 x i32> zeroinitializer
+ ret <8 x i16> %tmp3
+}
+
define <2 x i64> @vldrepl_d_unaligned_offset(ptr %ptr) {
; CHECK-LABEL: vldrepl_d_unaligned_offset:
; CHECK: # %bb.0:
|
@tangaac (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. |
PR llvm#135896 introduces [x]vldrepl instructions without handling extension. This patch will fix that. (cherry picked from commit 2320529)
Backport 2320529
Requested by: @tangaac