Skip to content

Commit 879c825

Browse files
committed
[instrinsics] Add @llvm.memcpy.inline instrinsics
Summary: This is a follow up on D61634. It adds an LLVM IR intrinsic to allow better implementation of memcpy from C++. A follow up CL will add the intrinsics in Clang. Reviewers: courbet, theraven, t.p.northover, jdoerfert, tejohnson Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D71710
1 parent 2533bc2 commit 879c825

File tree

10 files changed

+215
-3
lines changed

10 files changed

+215
-3
lines changed

llvm/docs/LangRef.rst

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11719,6 +11719,65 @@ the argument.
1171911719
If "len" is 0, the pointers may be NULL or dangling. However, they must still
1172011720
be appropriately aligned.
1172111721

11722+
.. _int_memcpy_inline:
11723+
11724+
'``llvm.memcpy.inline``' Intrinsic
11725+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
11726+
11727+
Syntax:
11728+
"""""""
11729+
11730+
This is an overloaded intrinsic. You can use ``llvm.memcpy.inline`` on any
11731+
integer bit width and for different address spaces. Not all targets
11732+
support all bit widths however.
11733+
11734+
::
11735+
11736+
declare void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* <dest>, i8* <src>,
11737+
i32 <len>, i1 <isvolatile>)
11738+
declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* <dest>, i8* <src>,
11739+
i64 <len>, i1 <isvolatile>)
11740+
11741+
Overview:
11742+
"""""""""
11743+
11744+
The '``llvm.memcpy.inline.*``' intrinsics copy a block of memory from the
11745+
source location to the destination location and guarantees that no external
11746+
functions are called.
11747+
11748+
Note that, unlike the standard libc function, the ``llvm.memcpy.inline.*``
11749+
intrinsics do not return a value, takes extra isvolatile
11750+
arguments and the pointers can be in specified address spaces.
11751+
11752+
Arguments:
11753+
""""""""""
11754+
11755+
The first argument is a pointer to the destination, the second is a
11756+
pointer to the source. The third argument is a constant integer argument
11757+
specifying the number of bytes to copy, and the fourth is a
11758+
boolean indicating a volatile access.
11759+
11760+
The :ref:`align <attr_align>` parameter attribute can be provided
11761+
for the first and second arguments.
11762+
11763+
If the ``isvolatile`` parameter is ``true``, the ``llvm.memcpy.inline`` call is
11764+
a :ref:`volatile operation <volatile>`. The detailed access behavior is not
11765+
very cleanly specified and it is unwise to depend on it.
11766+
11767+
Semantics:
11768+
""""""""""
11769+
11770+
The '``llvm.memcpy.inline.*``' intrinsics copy a block of memory from the
11771+
source location to the destination location, which are not allowed to
11772+
overlap. It copies "len" bytes of memory over. If the argument is known
11773+
to be aligned to some boundary, this can be specified as an attribute on
11774+
the argument.
11775+
11776+
If "len" is 0, the pointers may be NULL or dangling. However, they must still
11777+
be appropriately aligned.
11778+
11779+
The generated code is guaranteed not to call any external functions.
11780+
1172211781
.. _int_memmove:
1172311782

1172411783
'``llvm.memmove``' Intrinsic

llvm/include/llvm/IR/IntrinsicInst.h

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,7 @@ namespace llvm {
582582
case Intrinsic::memcpy:
583583
case Intrinsic::memmove:
584584
case Intrinsic::memset:
585+
case Intrinsic::memcpy_inline:
585586
return true;
586587
default: return false;
587588
}
@@ -608,8 +609,14 @@ namespace llvm {
608609
public:
609610
// Methods for support type inquiry through isa, cast, and dyn_cast:
610611
static bool classof(const IntrinsicInst *I) {
611-
return I->getIntrinsicID() == Intrinsic::memcpy ||
612-
I->getIntrinsicID() == Intrinsic::memmove;
612+
switch (I->getIntrinsicID()) {
613+
case Intrinsic::memcpy:
614+
case Intrinsic::memmove:
615+
case Intrinsic::memcpy_inline:
616+
return true;
617+
default:
618+
return false;
619+
}
613620
}
614621
static bool classof(const Value *V) {
615622
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
@@ -640,6 +647,21 @@ namespace llvm {
640647
}
641648
};
642649

650+
/// This class wraps the llvm.memcpy.inline intrinsic.
651+
class MemCpyInlineInst : public MemTransferInst {
652+
public:
653+
ConstantInt *getLength() const {
654+
return cast<ConstantInt>(MemTransferInst::getLength());
655+
}
656+
// Methods for support type inquiry through isa, cast, and dyn_cast:
657+
static bool classof(const IntrinsicInst *I) {
658+
return I->getIntrinsicID() == Intrinsic::memcpy_inline;
659+
}
660+
static bool classof(const Value *V) {
661+
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
662+
}
663+
};
664+
643665
// The common base class for any memset/memmove/memcpy intrinsics;
644666
// whether they be atomic or non-atomic.
645667
// i.e. llvm.element.unordered.atomic.memset/memcpy/memmove
@@ -656,6 +678,7 @@ namespace llvm {
656678
static bool classof(const IntrinsicInst *I) {
657679
switch (I->getIntrinsicID()) {
658680
case Intrinsic::memcpy:
681+
case Intrinsic::memcpy_inline:
659682
case Intrinsic::memmove:
660683
case Intrinsic::memset:
661684
case Intrinsic::memcpy_element_unordered_atomic:
@@ -698,6 +721,7 @@ namespace llvm {
698721
static bool classof(const IntrinsicInst *I) {
699722
switch (I->getIntrinsicID()) {
700723
case Intrinsic::memcpy:
724+
case Intrinsic::memcpy_inline:
701725
case Intrinsic::memmove:
702726
case Intrinsic::memcpy_element_unordered_atomic:
703727
case Intrinsic::memmove_element_unordered_atomic:
@@ -719,6 +743,7 @@ namespace llvm {
719743
static bool classof(const IntrinsicInst *I) {
720744
switch (I->getIntrinsicID()) {
721745
case Intrinsic::memcpy:
746+
case Intrinsic::memcpy_inline:
722747
case Intrinsic::memcpy_element_unordered_atomic:
723748
return true;
724749
default:

llvm/include/llvm/IR/Intrinsics.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -511,6 +511,20 @@ def int_memcpy : Intrinsic<[],
511511
llvm_i1_ty],
512512
[IntrArgMemOnly, IntrWillReturn, NoCapture<0>, NoCapture<1>,
513513
NoAlias<0>, NoAlias<1>, WriteOnly<0>, ReadOnly<1>, ImmArg<3>]>;
514+
515+
// Memcpy semantic that is guaranteed to be inlined.
516+
// In particular this means that the generated code is not allowed to call any
517+
// external function.
518+
// The third argument (specifying the size) must be a constant.
519+
def int_memcpy_inline
520+
: Intrinsic<[],
521+
[ llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty ],
522+
[ IntrArgMemOnly, IntrWillReturn,
523+
NoCapture<0>, NoCapture<1>,
524+
NoAlias<0>, NoAlias<1>,
525+
WriteOnly<0>, ReadOnly<1>,
526+
ImmArg<2>, ImmArg<3> ]>;
527+
514528
def int_memmove : Intrinsic<[],
515529
[llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
516530
llvm_i1_ty],

llvm/lib/Analysis/Lint.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,22 @@ void Lint::visitCallSite(CallSite CS) {
345345
"Undefined behavior: memcpy source and destination overlap", &I);
346346
break;
347347
}
348+
case Intrinsic::memcpy_inline: {
349+
MemCpyInlineInst *MCII = cast<MemCpyInlineInst>(&I);
350+
const uint64_t Size = MCII->getLength()->getValue().getLimitedValue();
351+
visitMemoryReference(I, MCII->getDest(), Size, MCII->getDestAlignment(),
352+
nullptr, MemRef::Write);
353+
visitMemoryReference(I, MCII->getSource(), Size,
354+
MCII->getSourceAlignment(), nullptr, MemRef::Read);
355+
356+
// Check that the memcpy arguments don't overlap. The AliasAnalysis API
357+
// isn't expressive enough for what we really want to do. Known partial
358+
// overlap is not distinguished from the case where nothing is known.
359+
const LocationSize LS = LocationSize::precise(Size);
360+
Assert(AA->alias(MCII->getSource(), LS, MCII->getDest(), LS) != MustAlias,
361+
"Undefined behavior: memcpy source and destination overlap", &I);
362+
break;
363+
}
348364
case Intrinsic::memmove: {
349365
MemMoveInst *MMI = cast<MemMoveInst>(&I);
350366
// TODO: If the size is known, use it.

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5840,12 +5840,33 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
58405840
// node.
58415841
SDValue Root = isVol ? getRoot() : getMemoryRoot();
58425842
SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Align, isVol,
5843-
false, isTC,
5843+
/* AlwaysInline */ false, isTC,
58445844
MachinePointerInfo(I.getArgOperand(0)),
58455845
MachinePointerInfo(I.getArgOperand(1)));
58465846
updateDAGForMaybeTailCall(MC);
58475847
return;
58485848
}
5849+
case Intrinsic::memcpy_inline: {
5850+
const auto &MCI = cast<MemCpyInlineInst>(I);
5851+
SDValue Dst = getValue(I.getArgOperand(0));
5852+
SDValue Src = getValue(I.getArgOperand(1));
5853+
SDValue Size = getValue(I.getArgOperand(2));
5854+
assert(isa<ConstantSDNode>(Size) && "memcpy_inline needs constant size");
5855+
// @llvm.memcpy.inline defines 0 and 1 to both mean no alignment.
5856+
Align DstAlign = MCI.getDestAlign().valueOrOne();
5857+
Align SrcAlign = MCI.getSourceAlign().valueOrOne();
5858+
Align Alignment = commonAlignment(DstAlign, SrcAlign);
5859+
bool isVol = MCI.isVolatile();
5860+
bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
5861+
// FIXME: Support passing different dest/src alignments to the memcpy DAG
5862+
// node.
5863+
SDValue MC = DAG.getMemcpy(
5864+
getRoot(), sdl, Dst, Src, Size, Alignment.value(), isVol,
5865+
/* AlwaysInline */ true, isTC, MachinePointerInfo(I.getArgOperand(0)),
5866+
MachinePointerInfo(I.getArgOperand(1)));
5867+
updateDAGForMaybeTailCall(MC);
5868+
return;
5869+
}
58495870
case Intrinsic::memset: {
58505871
const auto &MSI = cast<MemSetInst>(I);
58515872
SDValue Op1 = getValue(I.getArgOperand(0));

llvm/lib/IR/Verifier.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4347,6 +4347,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
43474347
visitDbgLabelIntrinsic("label", cast<DbgLabelInst>(Call));
43484348
break;
43494349
case Intrinsic::memcpy:
4350+
case Intrinsic::memcpy_inline:
43504351
case Intrinsic::memmove:
43514352
case Intrinsic::memset: {
43524353
const auto *MI = cast<MemIntrinsic>(&Call);
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 | FileCheck -check-prefix=X64 %s
3+
4+
; NOTE: This is expected to fail on target that do not support memcpy.
5+
; RUN: llc < %s -mtriple=r600-unknown-linux-gnu 2> %t.err || true
6+
; RUN: FileCheck --input-file %t.err -check-prefix=R600 %s
7+
8+
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
9+
declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
10+
11+
define void @test1(i8* %a, i8* %b) nounwind {
12+
; X64-LABEL: test1:
13+
; X64: # %bb.0:
14+
; X64-NEXT: movq (%rsi), %rax
15+
; X64-NEXT: movq %rax, (%rdi)
16+
; X64-NEXT: retq
17+
; R600: LLVM ERROR
18+
tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %a, i8* %b, i64 8, i1 0 )
19+
ret void
20+
}
21+
22+
define void @regular_memcpy_calls_external_function(i8* %a, i8* %b) nounwind {
23+
; X64-LABEL: regular_memcpy_calls_external_function:
24+
; X64: # %bb.0:
25+
; X64-NEXT: movl $128, %edx
26+
; X64-NEXT: jmp memcpy # TAILCALL
27+
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 128, i1 0 )
28+
ret void
29+
}
30+
31+
define void @inlined_copy_doesnt_call_external_function(i8* %a, i8* %b) nounwind {
32+
; X64-LABEL: inlined_copy_doesnt_call_external_function:
33+
; X64: # %bb.0:
34+
; X64-NEXT: movl $128, %ecx
35+
; X64-NEXT: rep;movsb (%rsi), %es:(%rdi)
36+
; X64-NEXT: retq
37+
tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %a, i8* %b, i64 128, i1 0 )
38+
ret void
39+
}

llvm/test/Other/lint.ll

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ target datalayout = "e-p:64:64:64"
44
declare fastcc void @bar()
55
declare void @llvm.stackrestore(i8*)
66
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
7+
declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
78
declare void @has_sret(i8* sret %p)
89
declare void @has_noaliases(i32* noalias %p, i32* %q)
910
declare void @one_arg(i32)
@@ -80,6 +81,8 @@ define i32 @foo() noreturn {
8081

8182
; CHECK: Write to read-only memory
8283
call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (i32* @CG to i8*), i8* bitcast (i32* @CG2 to i8*), i64 1, i1 0)
84+
; CHECK: Write to read-only memory
85+
call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* bitcast (i32* @CG to i8*), i8* bitcast (i32* @CG2 to i8*), i64 1, i1 0)
8386
; CHECK: Unusual: noalias argument aliases another argument
8487
call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (i32* @CG to i8*), i8* bitcast (i32* @CG to i8*), i64 1, i1 0)
8588

@@ -189,3 +192,11 @@ entry:
189192
; CHECK: Undefined behavior: indirectbr with no destinations
190193
indirectbr i8* null, []
191194
}
195+
196+
define i32 @memcpy_inline_same_address() noreturn {
197+
%buf = alloca i64, align 1
198+
%ptr = bitcast i64* %buf to i8*
199+
; CHECK: Unusual: noalias argument aliases another argument
200+
call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* %ptr, i8* %ptr, i64 1, i1 false)
201+
unreachable
202+
}

llvm/test/Verifier/intrinsic-immarg.ll

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,23 @@ define void @memcpy(i8* %dest, i8* %src, i1 %is.volatile) {
2727
ret void
2828
}
2929

30+
declare void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1)
31+
define void @memcpy_inline_is_volatile(i8* %dest, i8* %src, i1 %is.volatile) {
32+
; CHECK: immarg operand has non-immediate parameter
33+
; CHECK-NEXT: i1 %is.volatile
34+
; CHECK-NEXT: call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 8, i1 %is.volatile)
35+
call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 8, i1 %is.volatile)
36+
ret void
37+
}
38+
39+
define void @memcpy_inline_variable_size(i8* %dest, i8* %src, i32 %size) {
40+
; CHECK: immarg operand has non-immediate parameter
41+
; CHECK-NEXT: i32 %size
42+
; CHECK-NEXT: call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %size, i1 true)
43+
call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %size, i1 true)
44+
ret void
45+
}
46+
3047
declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1)
3148
define void @memmove(i8* %dest, i8* %src, i1 %is.volatile) {
3249
; CHECK: immarg operand has non-immediate parameter

llvm/test/Verifier/memcpy-inline.ll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
; RUN: not opt -verify < %s 2>&1 | FileCheck %s
2+
3+
; CHECK: alignment is not a power of two
4+
5+
define void @foo(i8* %P, i8* %Q) {
6+
call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* align 3 %P, i8* %Q, i32 4, i1 false)
7+
ret void
8+
}
9+
declare void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind

0 commit comments

Comments
 (0)