Skip to content

Conversation

llvmbot
Copy link
Member

@llvmbot llvmbot commented Sep 2, 2025

Backport 2320529

Requested by: @tangaac

@llvmbot
Copy link
Member Author

llvmbot commented Sep 2, 2025

@heiher What do you think about merging this PR to the release branch?

@llvmbot
Copy link
Member Author

llvmbot commented Sep 2, 2025

@llvm/pr-subscribers-backend-loongarch

Author: None (llvmbot)

Changes

Backport 2320529

Requested by: @tangaac


Full diff: https://github.com/llvm/llvm-project/pull/156384.diff

3 Files Affected:

  • (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+6-3)
  • (modified) llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll (+26)
  • (modified) llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll (+26-2)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 12cf04bbbab56..d6adcf37f06f9 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2430,11 +2430,14 @@ static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp,
   }
 
   // make sure that this load is valid and only has one user.
-  if (!IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
+  if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
     return SDValue();
 
-  if (IsIdeneity) {
-    auto *LN = cast<LoadSDNode>(IdentitySrc);
+  auto *LN = cast<LoadSDNode>(IdentitySrc);
+  auto ExtType = LN->getExtensionType();
+
+  if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
+      VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
     SDVTList Tys =
         LN->isIndexed()
             ? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll b/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll
index 976924bdca686..6035b8822cef7 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/broadcast-load.ll
@@ -18,6 +18,32 @@ define <4 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst) {
   ret <4 x i64> %tmp2
 }
 
+define <16 x i16> @should_not_be_optimized_sext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_sext_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.b $a0, $a0, 0
+; CHECK-NEXT:    xvreplgr2vr.h $xr0, $a0
+; CHECK-NEXT:    ret
+  %tmp = load i8, ptr %ptr
+  %tmp1 = sext i8 %tmp to i16
+  %tmp2 = insertelement <16 x i16> zeroinitializer, i16 %tmp1, i32 0
+  %tmp3 = shufflevector <16 x i16> %tmp2, <16 x i16> poison, <16 x i32> zeroinitializer
+  ret <16 x i16> %tmp3
+}
+
+define <16 x i16> @should_not_be_optimized_zext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_zext_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.bu $a0, $a0, 0
+; CHECK-NEXT:    xvreplgr2vr.h $xr0, $a0
+; CHECK-NEXT:    ret
+  %tmp = load i8, ptr %ptr
+  %tmp1 = zext i8 %tmp to i16
+  %tmp2 = insertelement <16 x i16> zeroinitializer, i16 %tmp1, i32 0
+  %tmp3 = shufflevector <16 x i16> %tmp2, <16 x i16> poison, <16 x i32> zeroinitializer
+  ret <16 x i16> %tmp3
+}
+
 define <4 x i64> @xvldrepl_d_unaligned_offset(ptr %ptr) {
 ; CHECK-LABEL: xvldrepl_d_unaligned_offset:
 ; CHECK:       # %bb.0:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll b/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
index c46747ef30509..f058acf5e45e5 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/broadcast-load.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc --mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s
 
-; TODO: Load a element and splat it to a vector could be lowerd to vldrepl
-
 ; A load has more than one user shouldn't be lowered to vldrepl
 define <2 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst){
 ; CHECK-LABEL: should_not_be_optimized:
@@ -18,6 +16,32 @@ define <2 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst){
   ret <2 x i64> %tmp2
 }
 
+define <8 x i16> @should_not_be_optimized_sext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_sext_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.b $a0, $a0, 0
+; CHECK-NEXT:    vreplgr2vr.h $vr0, $a0
+; CHECK-NEXT:    ret
+  %tmp = load i8, ptr %ptr
+  %tmp1 = sext i8 %tmp to i16
+  %tmp2 = insertelement <8 x i16> zeroinitializer, i16 %tmp1, i32 0
+  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> poison, <8 x i32> zeroinitializer
+  ret <8 x i16> %tmp3
+}
+
+define <8 x i16> @should_not_be_optimized_zext_load(ptr %ptr) {
+; CHECK-LABEL: should_not_be_optimized_zext_load:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld.bu $a0, $a0, 0
+; CHECK-NEXT:    vreplgr2vr.h $vr0, $a0
+; CHECK-NEXT:    ret
+  %tmp = load i8, ptr %ptr
+  %tmp1 = zext i8 %tmp to i16
+  %tmp2 = insertelement <8 x i16> zeroinitializer, i16 %tmp1, i32 0
+  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> poison, <8 x i32> zeroinitializer
+  ret <8 x i16> %tmp3
+}
+
 define <2 x i64> @vldrepl_d_unaligned_offset(ptr %ptr) {
 ; CHECK-LABEL: vldrepl_d_unaligned_offset:
 ; CHECK:       # %bb.0:

@github-project-automation github-project-automation bot moved this from Needs Triage to Needs Merge in LLVM Release Status Sep 2, 2025
@tru tru merged commit 9560864 into llvm:release/21.x Sep 3, 2025
1 check was pending
@github-project-automation github-project-automation bot moved this from Needs Merge to Done in LLVM Release Status Sep 3, 2025
Copy link

github-actions bot commented Sep 3, 2025

@tangaac (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR.

PR llvm#135896 introduces [x]vldrepl instructions without handling
extension.
This patch will fix that.

(cherry picked from commit 2320529)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
Development

Successfully merging this pull request may close these issues.

4 participants