[InferAlignment] Increase alignment in masked load / store instrinsics if known #156057

jhuber6 · 2025-08-29T16:36:27Z

Summary:
The masked load / store LLVM intrinsics take an argument for the
alignment. If the user is pessimistic about alignment they can provide a
value of 1 for an unaligned load. This patch updates infer-alignment to
increase the alignment value of the alignment argument if it is known
greater than the provided one.

Ignoring the gather / scatter versions for now since they contain many
pointers.

llvmbot · 2025-08-29T16:37:03Z

@llvm/pr-subscribers-llvm-transforms

Author: Joseph Huber (jhuber6)

Changes

Summary:
The masked load / store LLVM intrinsics take an argument for the
alignment. If the user is pessimistic about alignment they can provide a
value of 1 for an unaligned load. This patch updates instcombine to
increase the alignment value of the alignment argument if it is known
greater than the provided one.

Ignoring the gather / scatter versions for now since they contain many
pointers.

Full diff: https://github.com/llvm/llvm-project/pull/156057.diff

3 Files Affected:

(modified) llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp (+34-14)
(modified) llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll (+1-1)
(modified) llvm/test/Transforms/InstCombine/masked_intrinsics.ll (+31)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 42b65dde67255..7e50e55ae24c8 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -288,8 +288,11 @@ Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) {
 // * Narrow width by halfs excluding zero/undef lanes
 Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
   Value *LoadPtr = II.getArgOperand(0);
-  const Align Alignment =
-      cast<ConstantInt>(II.getArgOperand(1))->getAlignValue();
+  Align Alignment = cast<ConstantInt>(II.getArgOperand(1))->getAlignValue();
+
+  Align KnownAlign = getKnownAlignment(LoadPtr, DL, &II, &AC, &DT);
+  if (Alignment < KnownAlign)
+    Alignment = KnownAlign;
 
   // If the mask is all ones or undefs, this is a plain vector load of the 1st
   // argument.
@@ -310,6 +313,15 @@ Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
     return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3));
   }
 
+  // Update the alignment if the known value is greater than the provided one.
+  if (cast<ConstantInt>(II.getArgOperand(1))->getAlignValue() < Alignment) {
+    SmallVector<Value *> Args(II.arg_begin(), II.arg_end());
+    Args[1] = Builder.getInt32(Alignment.value());
+    CallInst *CI = Builder.CreateCall(II.getCalledFunction(), Args);
+    CI->copyMetadata(II);
+    return CI;
+  }
+
   return nullptr;
 }
 
@@ -317,33 +329,41 @@ Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
 // * Single constant active lane -> store
 // * Narrow width by halfs excluding zero/undef lanes
 Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
-  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
-  if (!ConstMask)
-    return nullptr;
+  Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
+
+  Align KnownAlign = getKnownAlignment(II.getArgOperand(1), DL, &II, &AC, &DT);
+  if (Alignment < KnownAlign)
+    Alignment = KnownAlign;
 
   // If the mask is all zeros, this instruction does nothing.
-  if (ConstMask->isNullValue())
+  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
+  if (ConstMask && ConstMask->isNullValue())
     return eraseInstFromFunction(II);
 
   // If the mask is all ones, this is a plain vector store of the 1st argument.
-  if (ConstMask->isAllOnesValue()) {
+  if (ConstMask && ConstMask->isAllOnesValue()) {
     Value *StorePtr = II.getArgOperand(1);
-    Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
     StoreInst *S =
         new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
     S->copyMetadata(II);
     return S;
   }
 
-  if (isa<ScalableVectorType>(ConstMask->getType()))
+  if (ConstMask && isa<ScalableVectorType>(ConstMask->getType()))
     return nullptr;
 
   // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
-  APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
-  APInt PoisonElts(DemandedElts.getBitWidth(), 0);
-  if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
-                                            PoisonElts))
-    return replaceOperand(II, 0, V);
+  if (ConstMask) {
+    APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
+    APInt PoisonElts(DemandedElts.getBitWidth(), 0);
+    if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
+                                              PoisonElts))
+      return replaceOperand(II, 0, V);
+  }
+
+  // Update the alignment if the known value is greater than the provided one.
+  if (cast<ConstantInt>(II.getArgOperand(2))->getAlignValue() < Alignment)
+    return replaceOperand(II, 2, Builder.getInt32(Alignment.value()));
 
   return nullptr;
 }
diff --git a/llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll b/llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll
index 918ea605a10bf..6ba52c178b8d4 100644
--- a/llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll
+++ b/llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll
@@ -7,7 +7,7 @@
 define void @combine_masked_load_store_from_constant_array(ptr %ptr) {
 ; CHECK-LABEL: @combine_masked_load_store_from_constant_array(
 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i32(i32 0, i32 10)
-; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr nonnull @contant_int_array, i32 8, <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> zeroinitializer)
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr nonnull @contant_int_array, i32 16, <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> zeroinitializer)
 ; CHECK-NEXT:    call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP2]], ptr [[PTR:%.*]], i32 1, <vscale x 2 x i1> [[TMP1]])
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
index 8f7683419a82a..6c1168e6c1c70 100644
--- a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
@@ -439,3 +439,34 @@ define <2 x i64> @negative_gather_v2i64_uniform_ptrs_no_all_active_mask(ptr %src
 declare <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
 declare <2 x i64> @llvm.masked.gather.v2i64(<2 x ptr>, i32, <2 x i1>, <2 x i64>)
 
+; Alignment tests
+
+define <2 x i32> @unaligned_load(<2 x i1> %mask, ptr %ptr) {
+; CHECK-LABEL: @unaligned_load(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 64) ]
+; CHECK-NEXT:    [[MASKED_LOAD:%.*]] = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr [[PTR]], i32 64, <2 x i1> [[MASK:%.*]], <2 x i32> poison)
+; CHECK-NEXT:    ret <2 x i32> [[MASKED_LOAD]]
+;
+entry:
+  call void @llvm.assume(i1 true) [ "align"(ptr %ptr, i64 64) ]
+  %masked_load = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %ptr, i32 1, <2 x i1> %mask, <2 x i32> poison)
+  ret <2 x i32> %masked_load
+}
+
+define void @unaligned_store(<2 x i1> %mask, <2 x i32> %val, ptr %ptr) {
+; CHECK-LABEL: @unaligned_store(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 64) ]
+; CHECK-NEXT:    tail call void @llvm.masked.store.v2i32.p0(<2 x i32> [[VAL:%.*]], ptr [[PTR]], i32 64, <2 x i1> [[MASK:%.*]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  call void @llvm.assume(i1 true) [ "align"(ptr %ptr, i64 64) ]
+  tail call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %ptr, i32 1, <2 x i1> %mask)
+  ret void
+}
+
+declare void @llvm.assume(i1)
+declare <2 x i32> @llvm.masked.load.v2i32.p0(ptr, i32, <2 x i1>, <2 x i32>)
+declare void @llvm.masked.store.v2i32.p0(<2 x i32>, ptr, i32, <2 x i1>)

Summary: Right now these enformce alignment, which isn't convenient for the user on platforms that support unaligned accesses. The options are to either permit passing the alignment manually, or just assume it's unaligned unless the user specifies it. I've added llvm#156057 which should make the requiested alignment show up on the intrinsic if the user passed `__builtin_assume_aligned`, however that's only with optimizations. This shouldn't cause issues unless the backend categorically decides to reject an unaligned access.

nikic

This should happen in the InferAlignment pass instead.

nikic · 2025-08-29T17:53:04Z

As a side note, we should really replace this argument with an align attribute on the pointer parameter...

jhuber6 · 2025-08-29T18:34:18Z

As a side note, we should really replace this argument with an align attribute on the pointer parameter...

Yeah, I was wondering why it was needed since the compress and expand variants don't use it. I'm guessing that would need some auto-upgrade handling or something.

…s if known Summary: The masked load / store LLVM intrinsics take an argument for the alignment. If the user is pessimistic about alignment they can provide a value of `1` for an unaligned load. This patch updates infer-alignment to increase the alignment value of the alignment argument if it is known greater than the provided one. Ignoring the gather / scatter versions for now since they contain many pointers.

jhuber6 · 2025-08-29T19:15:58Z

This should happen in the InferAlignment pass instead.

Done.

llvm/lib/Transforms/Scalar/InferAlignment.cpp

nikic · 2025-08-30T20:54:35Z

As a side note, we should really replace this argument with an align attribute on the pointer parameter...

Yeah, I was wondering why it was needed since the compress and expand variants don't use it. I'm guessing that would need some auto-upgrade handling or something.

Right. We've done this change for the memcpy etc intrinsics in the past:

llvm-project/llvm/lib/IR/AutoUpgrade.cpp

Lines 5100 to 5133 in d39772c

    
           case Intrinsic::memcpy: 
        
           case Intrinsic::memmove: 
        
           case Intrinsic::memset: { 
        
             // We have to make sure that the call signature is what we're expecting. 
        
             // We only want to change the old signatures by removing the alignment arg: 
        
             //  @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1) 
        
             //    -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1) 
        
             //  @llvm.memset...(i8*, i8, i[32|64], i32, i1) 
        
             //    -> @llvm.memset...(i8*, i8, i[32|64], i1) 
        
             // Note: i8*'s in the above can be any pointer type 
        
             if (CI->arg_size() != 5) { 
        
               DefaultCase(); 
        
               return; 
        
             } 
        
             // Remove alignment argument (3), and add alignment attributes to the 
        
             // dest/src pointers. 
        
             Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1), 
        
                               CI->getArgOperand(2), CI->getArgOperand(4)}; 
        
             NewCall = Builder.CreateCall(NewFn, Args); 
        
             AttributeList OldAttrs = CI->getAttributes(); 
        
             AttributeList NewAttrs = AttributeList::get( 
        
                 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(), 
        
                 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1), 
        
                  OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)}); 
        
             NewCall->setAttributes(NewAttrs); 
        
             auto *MemCI = cast<MemIntrinsic>(NewCall); 
        
             // All mem intrinsics support dest alignment. 
        
             const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3)); 
        
             MemCI->setDestAlignment(Align->getMaybeAlignValue()); 
        
             // Memcpy/Memmove also support source alignment. 
        
             if (auto *MTI = dyn_cast<MemTransferInst>(MemCI)) 
        
               MTI->setSourceAlignment(Align->getMaybeAlignValue()); 
        
             break; 
        
           }

nikic

LGTM

llvm/lib/Transforms/Scalar/InferAlignment.cpp

RKSimon

LGTM with one very petty minor - cheers!

RKSimon · 2025-08-31T09:58:53Z

llvm/lib/Transforms/Scalar/InferAlignment.cpp

-  // TODO: Also handle memory intrinsics.
-  return false;
+
+  IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);


(style) auto *II = dyn_cast<IntrinsicInst>(I);

llvm-ci · 2025-09-02T11:13:54Z

LLVM Buildbot has detected a new failure on builder cross-project-tests-sie-ubuntu-dwarf5 running on doug-worker-1b while building llvm at step 6 "test-build-unified-tree-check-cross-project".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/163/builds/25722

Here is the relevant piece of the build log for the reference

Step 6 (test-build-unified-tree-check-cross-project) failure: test (failure)
******************** TEST 'cross-project-tests :: debuginfo-tests/dexter/feature_tests/commands/perfect/float_range_watch/float_range_no_arg.cpp' FAILED ********************
Exit Code: 2

Command Output (stderr):
--
clang++ -O0 -glldb -std=gnu++11 /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/float_range_watch/float_range_no_arg.cpp -o /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/projects/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/float_range_watch/Output/float_range_no_arg.cpp.tmp # RUN: at line 10
+ clang++ -O0 -glldb -std=gnu++11 /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/float_range_watch/float_range_no_arg.cpp -o /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/projects/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/float_range_watch/Output/float_range_no_arg.cpp.tmp
"/usr/bin/python3.10" "/home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/dexter.py" test --fail-lt 1.0 -w --debugger lldb-dap --lldb-executable "/home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/bin/lldb-dap" --binary /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/projects/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/float_range_watch/Output/float_range_no_arg.cpp.tmp -- /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/float_range_watch/float_range_no_arg.cpp | /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/bin/FileCheck /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/float_range_watch/float_range_no_arg.cpp # RUN: at line 11
+ /usr/bin/python3.10 /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/dexter.py test --fail-lt 1.0 -w --debugger lldb-dap --lldb-executable /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/bin/lldb-dap --binary /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/projects/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/float_range_watch/Output/float_range_no_arg.cpp.tmp -- /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/float_range_watch/float_range_no_arg.cpp
+ /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/bin/FileCheck /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/float_range_watch/float_range_no_arg.cpp


****************************************

jhuber6 requested a review from nikic as a code owner August 29, 2025 16:36

jhuber6 requested review from alexey-bataev, efriedma-quic, erichkeane and RKSimon August 29, 2025 16:36

llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Aug 29, 2025

jhuber6 requested a review from arsenm August 29, 2025 17:07

jhuber6 mentioned this pull request Aug 29, 2025

[Clang] Assume unaligned in maksed load / store builtins #156063

Open

nikic requested changes Aug 29, 2025

View reviewed changes

jhuber6 force-pushed the masked_align branch from 881b483 to 13c384c Compare August 29, 2025 19:14

jhuber6 changed the title ~~[InstCombine] Increase alignment in masked load / store instrinsics if known~~ [InferAlignment] Increase alignment in masked load / store instrinsics if known Aug 29, 2025

nikic reviewed Aug 30, 2025

View reviewed changes

llvm/lib/Transforms/Scalar/InferAlignment.cpp Outdated Show resolved Hide resolved

llvm/lib/Transforms/Scalar/InferAlignment.cpp Outdated Show resolved Hide resolved

llvm/lib/Transforms/Scalar/InferAlignment.cpp Outdated Show resolved Hide resolved

comments

a74177b

nikic approved these changes Aug 30, 2025

View reviewed changes

arsenm reviewed Aug 31, 2025

View reviewed changes

llvm/lib/Transforms/Scalar/InferAlignment.cpp Show resolved Hide resolved

llvm/lib/Transforms/Scalar/InferAlignment.cpp Outdated Show resolved Hide resolved

llvm/lib/Transforms/Scalar/InferAlignment.cpp Outdated Show resolved Hide resolved

comments

b07bc27

RKSimon approved these changes Aug 31, 2025

View reviewed changes

jhuber6 merged commit abda8be into llvm:main Sep 2, 2025
9 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[InferAlignment] Increase alignment in masked load / store instrinsics if known #156057

[InferAlignment] Increase alignment in masked load / store instrinsics if known #156057

Uh oh!

jhuber6 commented Aug 29, 2025 •

edited

Loading

Uh oh!

llvmbot commented Aug 29, 2025

Uh oh!

nikic left a comment

Uh oh!

nikic commented Aug 29, 2025

Uh oh!

jhuber6 commented Aug 29, 2025

Uh oh!

jhuber6 commented Aug 29, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

nikic commented Aug 30, 2025

Uh oh!

nikic left a comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

RKSimon left a comment

Uh oh!

RKSimon Aug 31, 2025

Uh oh!

Uh oh!

llvm-ci commented Sep 2, 2025

Uh oh!

Uh oh!

[InferAlignment] Increase alignment in masked load / store instrinsics if known #156057

[InferAlignment] Increase alignment in masked load / store instrinsics if known #156057

Uh oh!

Conversation

jhuber6 commented Aug 29, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Aug 29, 2025

Uh oh!

nikic left a comment

Choose a reason for hiding this comment

Uh oh!

nikic commented Aug 29, 2025

Uh oh!

jhuber6 commented Aug 29, 2025

Uh oh!

jhuber6 commented Aug 29, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

nikic commented Aug 30, 2025

Uh oh!

nikic left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

RKSimon left a comment

Choose a reason for hiding this comment

Uh oh!

RKSimon Aug 31, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvm-ci commented Sep 2, 2025

Uh oh!

Uh oh!

jhuber6 commented Aug 29, 2025 •

edited

Loading