-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[X86] Set MaxAtomicSizeInBitsSupported. #75112
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This will result in larger atomic operations getting expanded to __atomic_* libcalls via AtomicExpandPass, which matches what Clang already does in the frontend.
@llvm/pr-subscribers-github-workflow @llvm/pr-subscribers-backend-x86 Author: James Y Knight (jyknight) ChangesThis will result in larger atomic operations getting expanded to Patch is 25.11 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/75112.diff 6 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d69976342fcbd0..b284378a5fb292 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -142,11 +142,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setLibcallName(RTLIB::POWI_F64, nullptr);
}
- // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
- // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
- // FIXME: Should we be limiting the atomic size on other configs? Default is
- // 1024.
- if (!Subtarget.canUseCMPXCHG8B())
+ if (Subtarget.canUseCMPXCHG16B())
+ setMaxAtomicSizeInBitsSupported(128);
+ else if (Subtarget.canUseCMPXCHG8B())
+ setMaxAtomicSizeInBitsSupported(64);
+ else
setMaxAtomicSizeInBitsSupported(32);
setMaxDivRemBitWidthSupported(Subtarget.is64Bit() ? 128 : 64);
diff --git a/llvm/test/CodeGen/X86/atomic-idempotent.ll b/llvm/test/CodeGen/X86/atomic-idempotent.ll
index 3a9648bd1fbb50..d5c46485068a64 100644
--- a/llvm/test/CodeGen/X86/atomic-idempotent.ll
+++ b/llvm/test/CodeGen/X86/atomic-idempotent.ll
@@ -170,117 +170,130 @@ define i128 @or128(ptr %p) {
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: xorl %esi, %esi
; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: callq __sync_fetch_and_or_16@PLT
+; X64-NEXT: xorl %ecx, %ecx
+; X64-NEXT: callq __atomic_fetch_or_16@PLT
; X64-NEXT: popq %rcx
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
-; X86-SSE2-LABEL: or128:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pushl %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
-; X86-SSE2-NEXT: .cfi_offset %ebp, -8
-; X86-SSE2-NEXT: movl %esp, %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
-; X86-SSE2-NEXT: pushl %esi
-; X86-SSE2-NEXT: andl $-16, %esp
-; X86-SSE2-NEXT: subl $32, %esp
-; X86-SSE2-NEXT: .cfi_offset %esi, -12
-; X86-SSE2-NEXT: movl 8(%ebp), %esi
-; X86-SSE2-NEXT: movl %esp, %eax
-; X86-SSE2-NEXT: pushl $0
-; X86-SSE2-NEXT: pushl $0
-; X86-SSE2-NEXT: pushl $0
-; X86-SSE2-NEXT: pushl $0
-; X86-SSE2-NEXT: pushl 12(%ebp)
-; X86-SSE2-NEXT: pushl %eax
-; X86-SSE2-NEXT: calll __sync_fetch_and_or_16
-; X86-SSE2-NEXT: addl $20, %esp
-; X86-SSE2-NEXT: movaps (%esp), %xmm0
-; X86-SSE2-NEXT: movaps %xmm0, (%esi)
-; X86-SSE2-NEXT: movl %esi, %eax
-; X86-SSE2-NEXT: leal -4(%ebp), %esp
-; X86-SSE2-NEXT: popl %esi
-; X86-SSE2-NEXT: popl %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
-; X86-SSE2-NEXT: retl $4
-;
-; X86-SLM-LABEL: or128:
-; X86-SLM: # %bb.0:
-; X86-SLM-NEXT: pushl %ebp
-; X86-SLM-NEXT: .cfi_def_cfa_offset 8
-; X86-SLM-NEXT: .cfi_offset %ebp, -8
-; X86-SLM-NEXT: movl %esp, %ebp
-; X86-SLM-NEXT: .cfi_def_cfa_register %ebp
-; X86-SLM-NEXT: pushl %edi
-; X86-SLM-NEXT: pushl %esi
-; X86-SLM-NEXT: andl $-16, %esp
-; X86-SLM-NEXT: subl $16, %esp
-; X86-SLM-NEXT: .cfi_offset %esi, -16
-; X86-SLM-NEXT: .cfi_offset %edi, -12
-; X86-SLM-NEXT: movl 8(%ebp), %esi
-; X86-SLM-NEXT: movl 12(%ebp), %eax
-; X86-SLM-NEXT: movl %esp, %ecx
-; X86-SLM-NEXT: pushl $0
-; X86-SLM-NEXT: pushl $0
-; X86-SLM-NEXT: pushl $0
-; X86-SLM-NEXT: pushl $0
-; X86-SLM-NEXT: pushl %eax
-; X86-SLM-NEXT: pushl %ecx
-; X86-SLM-NEXT: calll __sync_fetch_and_or_16
-; X86-SLM-NEXT: addl $20, %esp
-; X86-SLM-NEXT: movl (%esp), %eax
-; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-SLM-NEXT: movl %edi, 8(%esi)
-; X86-SLM-NEXT: movl %edx, 12(%esi)
-; X86-SLM-NEXT: movl %eax, (%esi)
-; X86-SLM-NEXT: movl %ecx, 4(%esi)
-; X86-SLM-NEXT: movl %esi, %eax
-; X86-SLM-NEXT: leal -8(%ebp), %esp
-; X86-SLM-NEXT: popl %esi
-; X86-SLM-NEXT: popl %edi
-; X86-SLM-NEXT: popl %ebp
-; X86-SLM-NEXT: .cfi_def_cfa %esp, 4
-; X86-SLM-NEXT: retl $4
+; X86-GENERIC-LABEL: or128:
+; X86-GENERIC: # %bb.0:
+; X86-GENERIC-NEXT: pushl %ebp
+; X86-GENERIC-NEXT: .cfi_def_cfa_offset 8
+; X86-GENERIC-NEXT: .cfi_offset %ebp, -8
+; X86-GENERIC-NEXT: movl %esp, %ebp
+; X86-GENERIC-NEXT: .cfi_def_cfa_register %ebp
+; X86-GENERIC-NEXT: pushl %ebx
+; X86-GENERIC-NEXT: pushl %edi
+; X86-GENERIC-NEXT: pushl %esi
+; X86-GENERIC-NEXT: andl $-16, %esp
+; X86-GENERIC-NEXT: subl $48, %esp
+; X86-GENERIC-NEXT: .cfi_offset %esi, -20
+; X86-GENERIC-NEXT: .cfi_offset %edi, -16
+; X86-GENERIC-NEXT: .cfi_offset %ebx, -12
+; X86-GENERIC-NEXT: movl 12(%ebp), %edi
+; X86-GENERIC-NEXT: movl 12(%edi), %ecx
+; X86-GENERIC-NEXT: movl 8(%edi), %edx
+; X86-GENERIC-NEXT: movl (%edi), %ebx
+; X86-GENERIC-NEXT: movl 4(%edi), %esi
+; X86-GENERIC-NEXT: .p2align 4, 0x90
+; X86-GENERIC-NEXT: .LBB4_1: # %atomicrmw.start
+; X86-GENERIC-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-GENERIC-NEXT: movl %ebx, (%esp)
+; X86-GENERIC-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-GENERIC-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-GENERIC-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-GENERIC-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-GENERIC-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-GENERIC-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-GENERIC-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-GENERIC-NEXT: pushl $0
+; X86-GENERIC-NEXT: pushl $0
+; X86-GENERIC-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-GENERIC-NEXT: pushl %eax
+; X86-GENERIC-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-GENERIC-NEXT: pushl %eax
+; X86-GENERIC-NEXT: pushl %edi
+; X86-GENERIC-NEXT: pushl $16
+; X86-GENERIC-NEXT: calll __atomic_compare_exchange@PLT
+; X86-GENERIC-NEXT: addl $24, %esp
+; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-GENERIC-NEXT: movl (%esp), %ebx
+; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-GENERIC-NEXT: testb %al, %al
+; X86-GENERIC-NEXT: je .LBB4_1
+; X86-GENERIC-NEXT: # %bb.2: # %atomicrmw.end
+; X86-GENERIC-NEXT: movl 8(%ebp), %eax
+; X86-GENERIC-NEXT: movl %ebx, (%eax)
+; X86-GENERIC-NEXT: movl %esi, 4(%eax)
+; X86-GENERIC-NEXT: movl %edx, 8(%eax)
+; X86-GENERIC-NEXT: movl %ecx, 12(%eax)
+; X86-GENERIC-NEXT: leal -12(%ebp), %esp
+; X86-GENERIC-NEXT: popl %esi
+; X86-GENERIC-NEXT: popl %edi
+; X86-GENERIC-NEXT: popl %ebx
+; X86-GENERIC-NEXT: popl %ebp
+; X86-GENERIC-NEXT: .cfi_def_cfa %esp, 4
+; X86-GENERIC-NEXT: retl $4
;
; X86-ATOM-LABEL: or128:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: pushl %ebp
; X86-ATOM-NEXT: .cfi_def_cfa_offset 8
; X86-ATOM-NEXT: .cfi_offset %ebp, -8
-; X86-ATOM-NEXT: leal (%esp), %ebp
+; X86-ATOM-NEXT: movl %esp, %ebp
; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp
+; X86-ATOM-NEXT: pushl %ebx
; X86-ATOM-NEXT: pushl %edi
; X86-ATOM-NEXT: pushl %esi
; X86-ATOM-NEXT: andl $-16, %esp
; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp
-; X86-ATOM-NEXT: .cfi_offset %esi, -16
-; X86-ATOM-NEXT: .cfi_offset %edi, -12
-; X86-ATOM-NEXT: movl 8(%ebp), %esi
-; X86-ATOM-NEXT: movl 12(%ebp), %eax
-; X86-ATOM-NEXT: movl %esp, %ecx
-; X86-ATOM-NEXT: pushl $0
-; X86-ATOM-NEXT: pushl $0
+; X86-ATOM-NEXT: .cfi_offset %esi, -20
+; X86-ATOM-NEXT: .cfi_offset %edi, -16
+; X86-ATOM-NEXT: .cfi_offset %ebx, -12
+; X86-ATOM-NEXT: movl 12(%ebp), %edi
+; X86-ATOM-NEXT: movl 12(%edi), %ecx
+; X86-ATOM-NEXT: movl 8(%edi), %edx
+; X86-ATOM-NEXT: movl (%edi), %esi
+; X86-ATOM-NEXT: movl 4(%edi), %ebx
+; X86-ATOM-NEXT: .p2align 4, 0x90
+; X86-ATOM-NEXT: .LBB4_1: # %atomicrmw.start
+; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-ATOM-NEXT: movl %esi, (%esp)
+; X86-ATOM-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %esi, {{[0-9]+}}(%esp)
; X86-ATOM-NEXT: pushl $0
; X86-ATOM-NEXT: pushl $0
+; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-ATOM-NEXT: pushl %eax
+; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-ATOM-NEXT: pushl %eax
-; X86-ATOM-NEXT: pushl %ecx
-; X86-ATOM-NEXT: calll __sync_fetch_and_or_16
+; X86-ATOM-NEXT: pushl %edi
+; X86-ATOM-NEXT: pushl $16
+; X86-ATOM-NEXT: calll __atomic_compare_exchange@PLT
; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp
-; X86-ATOM-NEXT: movl (%esp), %ecx
+; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-ATOM-NEXT: movl %eax, 8(%esi)
-; X86-ATOM-NEXT: movl %edi, 12(%esi)
-; X86-ATOM-NEXT: movl %ecx, (%esi)
-; X86-ATOM-NEXT: movl %esi, %eax
-; X86-ATOM-NEXT: movl %edx, 4(%esi)
-; X86-ATOM-NEXT: leal -8(%ebp), %esp
+; X86-ATOM-NEXT: testb %al, %al
+; X86-ATOM-NEXT: movl (%esp), %esi
+; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-ATOM-NEXT: je .LBB4_1
+; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
+; X86-ATOM-NEXT: movl 8(%ebp), %eax
+; X86-ATOM-NEXT: movl %esi, (%eax)
+; X86-ATOM-NEXT: movl %ebx, 4(%eax)
+; X86-ATOM-NEXT: movl %edx, 8(%eax)
+; X86-ATOM-NEXT: movl %ecx, 12(%eax)
+; X86-ATOM-NEXT: leal -12(%ebp), %esp
; X86-ATOM-NEXT: popl %esi
; X86-ATOM-NEXT: popl %edi
+; X86-ATOM-NEXT: popl %ebx
; X86-ATOM-NEXT: popl %ebp
; X86-ATOM-NEXT: .cfi_def_cfa %esp, 4
; X86-ATOM-NEXT: retl $4
@@ -507,78 +520,120 @@ define void @or128_nouse_seq_cst(ptr %p) {
; X64-NEXT: .cfi_def_cfa_offset 16
; X64-NEXT: xorl %esi, %esi
; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: callq __sync_fetch_and_or_16@PLT
+; X64-NEXT: movl $5, %ecx
+; X64-NEXT: callq __atomic_fetch_or_16@PLT
; X64-NEXT: popq %rax
; X64-NEXT: .cfi_def_cfa_offset 8
; X64-NEXT: retq
;
-; X86-SSE2-LABEL: or128_nouse_seq_cst:
-; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: pushl %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
-; X86-SSE2-NEXT: .cfi_offset %ebp, -8
-; X86-SSE2-NEXT: movl %esp, %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
-; X86-SSE2-NEXT: andl $-16, %esp
-; X86-SSE2-NEXT: subl $32, %esp
-; X86-SSE2-NEXT: movl %esp, %eax
-; X86-SSE2-NEXT: pushl $0
-; X86-SSE2-NEXT: pushl $0
-; X86-SSE2-NEXT: pushl $0
-; X86-SSE2-NEXT: pushl $0
-; X86-SSE2-NEXT: pushl 8(%ebp)
-; X86-SSE2-NEXT: pushl %eax
-; X86-SSE2-NEXT: calll __sync_fetch_and_or_16
-; X86-SSE2-NEXT: addl $20, %esp
-; X86-SSE2-NEXT: movl %ebp, %esp
-; X86-SSE2-NEXT: popl %ebp
-; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
-; X86-SSE2-NEXT: retl
-;
-; X86-SLM-LABEL: or128_nouse_seq_cst:
-; X86-SLM: # %bb.0:
-; X86-SLM-NEXT: pushl %ebp
-; X86-SLM-NEXT: .cfi_def_cfa_offset 8
-; X86-SLM-NEXT: .cfi_offset %ebp, -8
-; X86-SLM-NEXT: movl %esp, %ebp
-; X86-SLM-NEXT: .cfi_def_cfa_register %ebp
-; X86-SLM-NEXT: andl $-16, %esp
-; X86-SLM-NEXT: subl $32, %esp
-; X86-SLM-NEXT: movl 8(%ebp), %eax
-; X86-SLM-NEXT: movl %esp, %ecx
-; X86-SLM-NEXT: pushl $0
-; X86-SLM-NEXT: pushl $0
-; X86-SLM-NEXT: pushl $0
-; X86-SLM-NEXT: pushl $0
-; X86-SLM-NEXT: pushl %eax
-; X86-SLM-NEXT: pushl %ecx
-; X86-SLM-NEXT: calll __sync_fetch_and_or_16
-; X86-SLM-NEXT: addl $20, %esp
-; X86-SLM-NEXT: movl %ebp, %esp
-; X86-SLM-NEXT: popl %ebp
-; X86-SLM-NEXT: .cfi_def_cfa %esp, 4
-; X86-SLM-NEXT: retl
+; X86-GENERIC-LABEL: or128_nouse_seq_cst:
+; X86-GENERIC: # %bb.0:
+; X86-GENERIC-NEXT: pushl %ebp
+; X86-GENERIC-NEXT: .cfi_def_cfa_offset 8
+; X86-GENERIC-NEXT: .cfi_offset %ebp, -8
+; X86-GENERIC-NEXT: movl %esp, %ebp
+; X86-GENERIC-NEXT: .cfi_def_cfa_register %ebp
+; X86-GENERIC-NEXT: pushl %ebx
+; X86-GENERIC-NEXT: pushl %edi
+; X86-GENERIC-NEXT: pushl %esi
+; X86-GENERIC-NEXT: andl $-16, %esp
+; X86-GENERIC-NEXT: subl $48, %esp
+; X86-GENERIC-NEXT: .cfi_offset %esi, -20
+; X86-GENERIC-NEXT: .cfi_offset %edi, -16
+; X86-GENERIC-NEXT: .cfi_offset %ebx, -12
+; X86-GENERIC-NEXT: movl 8(%ebp), %esi
+; X86-GENERIC-NEXT: movl 12(%esi), %ecx
+; X86-GENERIC-NEXT: movl 8(%esi), %edi
+; X86-GENERIC-NEXT: movl (%esi), %edx
+; X86-GENERIC-NEXT: movl 4(%esi), %ebx
+; X86-GENERIC-NEXT: .p2align 4, 0x90
+; X86-GENERIC-NEXT: .LBB12_1: # %atomicrmw.start
+; X86-GENERIC-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-GENERIC-NEXT: movl %edx, (%esp)
+; X86-GENERIC-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-GENERIC-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-GENERIC-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-GENERIC-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-GENERIC-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-GENERIC-NEXT: movl %ebx, {{[0-9]+}}(%esp)
+; X86-GENERIC-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-GENERIC-NEXT: pushl $5
+; X86-GENERIC-NEXT: pushl $5
+; X86-GENERIC-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-GENERIC-NEXT: pushl %eax
+; X86-GENERIC-NEXT: leal {{[0-9]+}}(%esp), %eax
+; X86-GENERIC-NEXT: pushl %eax
+; X86-GENERIC-NEXT: pushl %esi
+; X86-GENERIC-NEXT: pushl $16
+; X86-GENERIC-NEXT: calll __atomic_compare_exchange@PLT
+; X86-GENERIC-NEXT: addl $24, %esp
+; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-GENERIC-NEXT: movl (%esp), %edx
+; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-GENERIC-NEXT: testb %al, %al
+; X86-GENERIC-NEXT: je .LBB12_1
+; X86-GENERIC-NEXT: # %bb.2: # %atomicrmw.end
+; X86-GENERIC-NEXT: leal -12(%ebp), %esp
+; X86-GENERIC-NEXT: popl %esi
+; X86-GENERIC-NEXT: popl %edi
+; X86-GENERIC-NEXT: popl %ebx
+; X86-GENERIC-NEXT: popl %ebp
+; X86-GENERIC-NEXT: .cfi_def_cfa %esp, 4
+; X86-GENERIC-NEXT: retl
;
; X86-ATOM-LABEL: or128_nouse_seq_cst:
; X86-ATOM: # %bb.0:
; X86-ATOM-NEXT: pushl %ebp
; X86-ATOM-NEXT: .cfi_def_cfa_offset 8
; X86-ATOM-NEXT: .cfi_offset %ebp, -8
-; X86-ATOM-NEXT: leal (%esp), %ebp
+; X86-ATOM-NEXT: movl %esp, %ebp
; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp
+; X86-ATOM-NEXT: pushl %ebx
+; X86-ATOM-NEXT: pushl %edi
+; X86-ATOM-NEXT: pushl %esi
; X86-ATOM-NEXT: andl $-16, %esp
; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp
-; X86-ATOM-NEXT: movl 8(%ebp), %eax
-; X86-ATOM-NEXT: movl %esp, %ecx
-; X86-ATOM-NEXT: pushl $0
-; X86-ATOM-NEXT: pushl $0
-; X86-ATOM-NEXT: pushl $0
-; X86-ATOM-NEXT: pushl $0
+; X86-ATOM-NEXT: .cfi_offset %esi, -20
+; X86-ATOM-NEXT: .cfi_offset %edi, -16
+; X86-ATOM-NEXT: .cfi_offset %ebx, -12
+; X86-ATOM-NEXT: movl 8(%ebp), %esi
+; X86-ATOM-NEXT: movl %esp, %ebx
+; X86-ATOM-NEXT: movl 12(%esi), %ecx
+; X86-ATOM-NEXT: movl 8(%esi), %edx
+; X86-ATOM-NEXT: movl (%esi), %eax
+; X86-ATOM-NEXT: movl 4(%esi), %edi
+; X86-ATOM-NEXT: .p2align 4, 0x90
+; X86-ATOM-NEXT: .LBB12_1: # %atomicrmw.start
+; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-ATOM-NEXT: movl %eax, (%esp)
+; X86-ATOM-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %edi, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-ATOM-NEXT: pushl $5
+; X86-ATOM-NEXT: pushl $5
+; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-ATOM-NEXT: pushl %eax
-; X86-ATOM-NEXT: pushl %ecx
-; X86-ATOM-NEXT: calll __sync_fetch_and_or_16
+; X86-ATOM-NEXT: pushl %ebx
+; X86-ATOM-NEXT: pushl %esi
+; X86-ATOM-NEXT: pushl $16
+; X86-ATOM-NEXT: calll __atomic_compare_exchange@PLT
; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp
-; X86-ATOM-NEXT: movl %ebp, %esp
+; X86-ATOM-NEXT: testb %al, %al
+; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-ATOM-NEXT: movl (%esp), %eax
+; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-ATOM-NEXT: je .LBB12_1
+; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
+; X86-ATOM-NEXT: leal -12(%ebp), %esp
+; X86-ATOM-NEXT: popl %esi
+; X86-ATOM-NEXT: popl %edi
+; X86-ATOM-NEXT: popl %ebx
; X86-ATOM-NEXT: popl %ebp
; X86-ATOM-NEXT: .cfi_def_cfa %esp, 4
; X86-ATOM-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/atomic-nocx16.ll b/llvm/test/CodeGen/X86/atomic-nocx16.ll
index 5677541242a249..a014da80f189be 100644
--- a/llvm/test/CodeGen/X86/atomic-nocx16.ll
+++ b/llvm/test/CodeGen/X86/atomic-nocx16.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=corei7 -mattr=-cx16 | FileCheck %s
-; RUN: llc < %s -mtriple=i386-linux-gnu -verify-machineinstrs -mattr=cx16 | FileCheck -check-prefix=CHECK %s
+; RUN: llc < %s -mtriple=i386-linux-gnu -verify-machineinstrs -mattr=cx16 | FileCheck -check-prefix=CHECK32 %s
;; Verify that 128-bit atomics emit a libcall without cx16
;; available.
@@ -10,25 +10,35 @@
; CHECK-LABEL: test:
define void @test(ptr %a) nounwind {
entry:
-; CHECK: __sync_val_compare_and_swap_16
+; CHECK: __atomic_compare_exchange_16
+; CHECK32: __atomic_compare_exchange
%0 = cmpxchg ptr %a, i128 1, i128 1 seq_cst seq_cst
-; CHECK: __sync_lock_test_and_set_16
+; CHECK: __atomic_exchange_16
+; CHECK32: __atomic_exchange
%1 = atomicrmw xchg ptr %a, i128 1 seq_cst
-; CHECK: __sync_fetch_and_add_16
+; CHECK: __atomic_fetch_add_16
+; CHECK32: __atomic_compare_exchange
%2 = atomicrmw add ptr %a, i128 1 seq_cst
-; CHECK: __sync_fetch_and_sub_16
+; CHECK: __atomic_fetch_sub_16
+; CHECK32: __atomic_compare_exchange
%3 = atomicrmw sub ptr %a, i128 1 seq_cst
-; CHECK: __sync_fetch_and_and_16
+; CHECK: __atomic_fetch_and_16
+; CHECK32: __atomic_compare_exchange
%4 = atomicrmw and ptr %a, i128 1 seq_cst
-; CHECK: __sync_fetch_and_nand_16
+; CHECK: __atomic_fetch_nand_16
+; CHECK32: __atomic_compare_exchange
%5 = atomicrmw nand ptr %a, i128 1 seq_cst
-; CHECK: __sync_fetch_and_or_16
+; CHECK: __atomic_fetch_or_16
+; CHECK32: __atomic_compare_exchange
%6 = atomicrmw or ptr %a, i128 1 seq_cst
-; CHECK: __sync_fetch_and_xor_16
+; CHECK: __atomic_fetch_xor_16
+; CHECK32: __atomic_compare_exchange
%7 = atomicrmw xor ptr %a, i128 1 seq_cst
-; CHECK: __sync_val_compare_and_swap_16
+; CHECK: __atomic_load_16
+; CHECK32: __atomic_load
%8 = load atomic i128, ptr %a seq_cst, align 16
-; CHECK: __sync_lock_test_and_set_16
+; CHECK: __atomic_store_16
+; CHECK32: __atomic_store
store atomic i128 %8, ptr %a seq_cst, align 16
ret void
}
@@ -36,14 +46,20 @@ entry:
; CHECK-LABEL: test_fp:
define void @test_fp(fp128* %a) nounwind {
entry:
-; CHECK: __sync_lock_test_and_set_16
+; CHECK: __atomic_exchange_16
+; CHECK32: __atomic_exchange
%0 = atomicrmw xchg fp128* %a, fp128 0xL00000000000000004000900000000000 seq_cst
-; Currently fails to compile:
-; %1 = atomicrmw fadd fp128* %a, fp128 0xL00000000000000004000900000000000 seq_cst
-; %2 = atomicrmw fsub fp128* %a, fp128 0xL00000000000000004000900000000000 seq_cst
-; CHECK: __sync_val_compare_and_swap_16
- %1 = load atomic fp128, fp128* %a seq_cst, align 16
-; CHECK: __sync_lock_test_and_set_16
- store atomic fp128 %1, fp128* %a seq_cst, align 16
+; CHECK: __atomic_compare_exchange_16
+; CHECK32: __atomic_compare_exchange
+ %1 = atomicrmw fadd fp128* %a, fp128 0xL00000000000000004000900000000000 seq_cst
+; CHECK: __atomic_compare_exchange_16
+; CHECK32: __atomic_compare_exchange
+ %2 = atomicrmw fsub fp128* %a, fp128 0xL00000000000000004000900000000000 seq_cst
+; CHECK: __atomic_load_16
+; CHECK32: __atomic_load
+ %3 = load atomic fp128, fp128* %a seq_cst, align 16
+; CHECK: __atomic_store_16
+; CHECK32: __atomic_store
+ store atomic fp128 %3, fp128* %a seq_cst, align 16
ret void
}
diff --git a/llvm/test/CodeGen/X86/atomic-ops-ancient-64.ll b/llvm/test/CodeGen/X86/atomic-ops-ancient-64.ll
index 493c9a897f06b3..bc99caeea12b6c 100644
--- a/llvm/test/CodeGen/X86/atomic-ops-ancient-64.ll
+++ b/llvm/test/CodeGen/X86/atomic-ops-ancient-64.ll
@@ -1,44 +1,43 @@
-; RUN: llc -mtriple...
[truncated]
|
You can test this locally with the following command:git-clang-format --diff a37fa2a8e1c827f1ff04b0b13b83cf97eefe74c0 6742c9847c94c07438bdce23944bbdb0b43d3a95 -- llvm/lib/Target/X86/X86ISelLowering.cpp View the diff from clang-format here.diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b284378a5f..6534ca343e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -55007,8 +55007,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
APInt OpUndefElts;
SmallVector<APInt> OpEltBits;
if (!getTargetConstantBitsFromNode(Ops[I], EltSizeInBits, OpUndefElts,
- OpEltBits, true, false))
- break;
+ OpEltBits, true, false))
+ break;
EltBits.append(OpEltBits);
UndefElts.insertBits(OpUndefElts, I * OpUndefElts.getBitWidth());
}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM (if you rebase the clang-format warnings should go away)
This will result in larger atomic operations getting expanded to
__atomic_*
libcalls via AtomicExpandPass, which matches what Clang already does in the frontend.