-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[PowerPC] ppc64-P9-vabsd.ll - update v16i8 abdu test now that it vectorizes in the middle-end #154712
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…orizes in the middle-end The scalarized IR was written before improvements to SLP / cost models ensured that the abs intrinsic was easily vectorizable opt -O3 : https://zig.godbolt.org/z/39T65vh8M Now that it is we need a more useful llc test We could add the old test to Transforms/PhaseOrdering if there's any concern about future coverage?
@llvm/pr-subscribers-backend-powerpc Author: Simon Pilgrim (RKSimon) ChangesThe scalarized IR was written before improvements to SLP / cost models ensured that the abs intrinsic was easily vectorizable opt -O3 : https://zig.godbolt.org/z/39T65vh8M Now that it is we need a more useful llc test We could add the old test to Transforms/PhaseOrdering if there's any concern about future coverage? Patch is 43.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/154712.diff 1 Files Affected:
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
index b540948b20f75..eaab932c41df7 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
@@ -190,1000 +190,25 @@ entry:
ret <8 x i16> %6
}
-; FIXME: This does not produce ISD::ABS. This does not even vectorize correctly!
-; This function should look like sub_absv_32 and sub_absv_16 except that the type is v16i8.
-; Function Attrs: norecurse nounwind readnone
define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr {
-; CHECK-PWR9-LE-LABEL: sub_absv_8_ext:
-; CHECK-PWR9-LE: # %bb.0: # %entry
-; CHECK-PWR9-LE-NEXT: li r3, 0
-; CHECK-PWR9-LE-NEXT: li r5, 2
-; CHECK-PWR9-LE-NEXT: li r4, 1
-; CHECK-PWR9-LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-PWR9-LE-NEXT: vextubrx r6, r3, v2
-; CHECK-PWR9-LE-NEXT: vextubrx r3, r3, v3
-; CHECK-PWR9-LE-NEXT: vextubrx r8, r5, v2
-; CHECK-PWR9-LE-NEXT: vextubrx r5, r5, v3
-; CHECK-PWR9-LE-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-PWR9-LE-NEXT: std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-PWR9-LE-NEXT: std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-PWR9-LE-NEXT: std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-PWR9-LE-NEXT: std r25, -56(r1) # 8-byte Folded Spill
-; CHECK-PWR9-LE-NEXT: clrlwi r6, r6, 24
-; CHECK-PWR9-LE-NEXT: clrlwi r3, r3, 24
-; CHECK-PWR9-LE-NEXT: clrlwi r8, r8, 24
-; CHECK-PWR9-LE-NEXT: clrlwi r5, r5, 24
-; CHECK-PWR9-LE-NEXT: vextubrx r7, r4, v2
-; CHECK-PWR9-LE-NEXT: vextubrx r4, r4, v3
-; CHECK-PWR9-LE-NEXT: sub r3, r6, r3
-; CHECK-PWR9-LE-NEXT: sub r6, r8, r5
-; CHECK-PWR9-LE-NEXT: clrlwi r7, r7, 24
-; CHECK-PWR9-LE-NEXT: clrlwi r4, r4, 24
-; CHECK-PWR9-LE-NEXT: sub r4, r7, r4
-; CHECK-PWR9-LE-NEXT: srawi r5, r3, 31
-; CHECK-PWR9-LE-NEXT: srawi r7, r4, 31
-; CHECK-PWR9-LE-NEXT: xor r3, r3, r5
-; CHECK-PWR9-LE-NEXT: xor r4, r4, r7
-; CHECK-PWR9-LE-NEXT: sub r5, r3, r5
-; CHECK-PWR9-LE-NEXT: srawi r3, r6, 31
-; CHECK-PWR9-LE-NEXT: sub r4, r4, r7
-; CHECK-PWR9-LE-NEXT: xor r6, r6, r3
-; CHECK-PWR9-LE-NEXT: sub r3, r6, r3
-; CHECK-PWR9-LE-NEXT: li r6, 3
-; CHECK-PWR9-LE-NEXT: vextubrx r7, r6, v2
-; CHECK-PWR9-LE-NEXT: vextubrx r6, r6, v3
-; CHECK-PWR9-LE-NEXT: clrlwi r7, r7, 24
-; CHECK-PWR9-LE-NEXT: clrlwi r6, r6, 24
-; CHECK-PWR9-LE-NEXT: sub r6, r7, r6
-; CHECK-PWR9-LE-NEXT: srawi r7, r6, 31
-; CHECK-PWR9-LE-NEXT: xor r6, r6, r7
-; CHECK-PWR9-LE-NEXT: sub r6, r6, r7
-; CHECK-PWR9-LE-NEXT: li r7, 4
-; CHECK-PWR9-LE-NEXT: vextubrx r8, r7, v2
-; CHECK-PWR9-LE-NEXT: vextubrx r7, r7, v3
-; CHECK-PWR9-LE-NEXT: mtvsrd v4, r6
-; CHECK-PWR9-LE-NEXT: clrlwi r8, r8, 24
-; CHECK-PWR9-LE-NEXT: clrlwi r7, r7, 24
-; CHECK-PWR9-LE-NEXT: sub r7, r8, r7
-; CHECK-PWR9-LE-NEXT: srawi r8, r7, 31
-; CHECK-PWR9-LE-NEXT: xor r7, r7, r8
-; CHECK-PWR9-LE-NEXT: sub r7, r7, r8
-; CHECK-PWR9-LE-NEXT: li r8, 5
-; CHECK-PWR9-LE-NEXT: vextubrx r9, r8, v2
-; CHECK-PWR9-LE-NEXT: vextubrx r8, r8, v3
-; CHECK-PWR9-LE-NEXT: clrlwi r9, r9, 24
-; CHECK-PWR9-LE-NEXT: clrlwi r8, r8, 24
-; CHECK-PWR9-LE-NEXT: sub r8, r9, r8
-; CHECK-PWR9-LE-NEXT: srawi r9, r8, 31
-; CHECK-PWR9-LE-NEXT: xor r8, r8, r9
-; CHECK-PWR9-LE-NEXT: sub r8, r8, r9
-; CHECK-PWR9-LE-NEXT: li r9, 6
-; CHECK-PWR9-LE-NEXT: vextubrx r10, r9, v2
-; CHECK-PWR9-LE-NEXT: vextubrx r9, r9, v3
-; CHECK-PWR9-LE-NEXT: clrlwi r10, r10, 24
-; CHECK-PWR9-LE-NEXT: clrlwi r9, r9, 24
-; CHECK-PWR9-LE-NEXT: sub r9, r10, r9
-; CHECK-PWR9-LE-NEXT: srawi r10, r9, 31
-; CHECK-PWR9-LE-NEXT: xor r9, r9, r10
-; CHECK-PWR9-LE-NEXT: sub r9, r9, r10
-; CHECK-PWR9-LE-NEXT: li r10, 7
-; CHECK-PWR9-LE-NEXT: vextubrx r11, r10, v2
-; CHECK-PWR9-LE-NEXT: vextubrx r10, r10, v3
-; CHECK-PWR9-LE-NEXT: clrlwi r11, r11, 24
-; CHECK-PWR9-LE-NEXT: clrlwi r10, r10, 24
-; CHECK-PWR9-LE-NEXT: sub r10, r11, r10
-; CHECK-PWR9-LE-NEXT: srawi r11, r10, 31
-; CHECK-PWR9-LE-NEXT: xor r10, r10, r11
-; CHECK-PWR9-LE-NEXT: sub r10, r10, r11
-; CHECK-PWR9-LE-NEXT: li r11, 8
-; CHECK-PWR9-LE-NEXT: vextubrx r12, r11, v2
-; CHECK-PWR9-LE-NEXT: vextubrx r11, r11, v3
-; CHECK-PWR9-LE-NEXT: mtvsrd v5, r10
-; CHECK-PWR9-LE-NEXT: clrlwi r12, r12, 24
-; CHECK-PWR9-LE-NEXT: clrlwi r11, r11, 24
-; CHECK-PWR9-LE-NEXT: sub r11, r12, r11
-; CHECK-PWR9-LE-NEXT: srawi r12, r11, 31
-; CHECK-PWR9-LE-NEXT: xor r11, r11, r12
-; CHECK-PWR9-LE-NEXT: sub r11, r11, r12
-; CHECK-PWR9-LE-NEXT: li r12, 9
-; CHECK-PWR9-LE-NEXT: vextubrx r0, r12, v2
-; CHECK-PWR9-LE-NEXT: vextubrx r12, r12, v3
-; CHECK-PWR9-LE-NEXT: clrlwi r0, r0, 24
-; CHECK-PWR9-LE-NEXT: clrlwi r12, r12, 24
-; CHECK-PWR9-LE-NEXT: sub r12, r0, r12
-; CHECK-PWR9-LE-NEXT: srawi r0, r12, 31
-; CHECK-PWR9-LE-NEXT: xor r12, r12, r0
-; CHECK-PWR9-LE-NEXT: sub r12, r12, r0
-; CHECK-PWR9-LE-NEXT: li r0, 10
-; CHECK-PWR9-LE-NEXT: vextubrx r30, r0, v2
-; CHECK-PWR9-LE-NEXT: vextubrx r0, r0, v3
-; CHECK-PWR9-LE-NEXT: clrlwi r30, r30, 24
-; CHECK-PWR9-LE-NEXT: clrlwi r0, r0, 24
-; CHECK-PWR9-LE-NEXT: sub r0, r30, r0
-; CHECK-PWR9-LE-NEXT: srawi r30, r0, 31
-; CHECK-PWR9-LE-NEXT: xor r0, r0, r30
-; CHECK-PWR9-LE-NEXT: sub r0, r0, r30
-; CHECK-PWR9-LE-NEXT: li r30, 11
-; CHECK-PWR9-LE-NEXT: vextubrx r29, r30, v2
-; CHECK-PWR9-LE-NEXT: vextubrx r30, r30, v3
-; CHECK-PWR9-LE-NEXT: clrlwi r29, r29, 24
-; CHECK-PWR9-LE-NEXT: clrlwi r30, r30, 24
-; CHECK-PWR9-LE-NEXT: sub r30, r29, r30
-; CHECK-PWR9-LE-NEXT: srawi r29, r30, 31
-; CHECK-PWR9-LE-NEXT: xor r30, r30, r29
-; CHECK-PWR9-LE-NEXT: sub r30, r30, r29
-; CHECK-PWR9-LE-NEXT: li r29, 12
-; CHECK-PWR9-LE-NEXT: vextubrx r28, r29, v2
-; CHECK-PWR9-LE-NEXT: vextubrx r29, r29, v3
-; CHECK-PWR9-LE-NEXT: clrlwi r28, r28, 24
-; CHECK-PWR9-LE-NEXT: clrlwi r29, r29, 24
-; CHECK-PWR9-LE-NEXT: sub r29, r28, r29
-; CHECK-PWR9-LE-NEXT: srawi r28, r29, 31
-; CHECK-PWR9-LE-NEXT: xor r29, r29, r28
-; CHECK-PWR9-LE-NEXT: sub r29, r29, r28
-; CHECK-PWR9-LE-NEXT: li r28, 13
-; CHECK-PWR9-LE-NEXT: vextubrx r27, r28, v2
-; CHECK-PWR9-LE-NEXT: vextubrx r28, r28, v3
-; CHECK-PWR9-LE-NEXT: clrlwi r27, r27, 24
-; CHECK-PWR9-LE-NEXT: clrlwi r28, r28, 24
-; CHECK-PWR9-LE-NEXT: sub r28, r27, r28
-; CHECK-PWR9-LE-NEXT: srawi r27, r28, 31
-; CHECK-PWR9-LE-NEXT: xor r28, r28, r27
-; CHECK-PWR9-LE-NEXT: sub r28, r28, r27
-; CHECK-PWR9-LE-NEXT: li r27, 14
-; CHECK-PWR9-LE-NEXT: vextubrx r26, r27, v2
-; CHECK-PWR9-LE-NEXT: vextubrx r27, r27, v3
-; CHECK-PWR9-LE-NEXT: clrlwi r26, r26, 24
-; CHECK-PWR9-LE-NEXT: clrlwi r27, r27, 24
-; CHECK-PWR9-LE-NEXT: sub r27, r26, r27
-; CHECK-PWR9-LE-NEXT: srawi r26, r27, 31
-; CHECK-PWR9-LE-NEXT: xor r27, r27, r26
-; CHECK-PWR9-LE-NEXT: sub r27, r27, r26
-; CHECK-PWR9-LE-NEXT: li r26, 15
-; CHECK-PWR9-LE-NEXT: vextubrx r25, r26, v2
-; CHECK-PWR9-LE-NEXT: vextubrx r26, r26, v3
-; CHECK-PWR9-LE-NEXT: mtvsrd v2, r5
-; CHECK-PWR9-LE-NEXT: mtvsrd v3, r4
-; CHECK-PWR9-LE-NEXT: vmrghb v2, v3, v2
-; CHECK-PWR9-LE-NEXT: mtvsrd v3, r3
-; CHECK-PWR9-LE-NEXT: clrlwi r25, r25, 24
-; CHECK-PWR9-LE-NEXT: clrlwi r26, r26, 24
-; CHECK-PWR9-LE-NEXT: vmrghb v3, v4, v3
-; CHECK-PWR9-LE-NEXT: mtvsrd v4, r8
-; CHECK-PWR9-LE-NEXT: sub r26, r25, r26
-; CHECK-PWR9-LE-NEXT: vmrglh v2, v3, v2
-; CHECK-PWR9-LE-NEXT: mtvsrd v3, r7
-; CHECK-PWR9-LE-NEXT: srawi r25, r26, 31
-; CHECK-PWR9-LE-NEXT: vmrghb v3, v4, v3
-; CHECK-PWR9-LE-NEXT: mtvsrd v4, r9
-; CHECK-PWR9-LE-NEXT: xor r26, r26, r25
-; CHECK-PWR9-LE-NEXT: vmrghb v4, v5, v4
-; CHECK-PWR9-LE-NEXT: sub r26, r26, r25
-; CHECK-PWR9-LE-NEXT: ld r25, -56(r1) # 8-byte Folded Reload
-; CHECK-PWR9-LE-NEXT: mtvsrd v5, r26
-; CHECK-PWR9-LE-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-PWR9-LE-NEXT: vmrglh v3, v4, v3
-; CHECK-PWR9-LE-NEXT: mtvsrd v4, r30
-; CHECK-PWR9-LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-PWR9-LE-NEXT: xxmrglw vs0, v3, v2
-; CHECK-PWR9-LE-NEXT: mtvsrd v2, r11
-; CHECK-PWR9-LE-NEXT: mtvsrd v3, r12
-; CHECK-PWR9-LE-NEXT: vmrghb v2, v3, v2
-; CHECK-PWR9-LE-NEXT: mtvsrd v3, r0
-; CHECK-PWR9-LE-NEXT: vmrghb v3, v4, v3
-; CHECK-PWR9-LE-NEXT: mtvsrd v4, r28
-; CHECK-PWR9-LE-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-PWR9-LE-NEXT: vmrglh v2, v3, v2
-; CHECK-PWR9-LE-NEXT: mtvsrd v3, r29
-; CHECK-PWR9-LE-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-PWR9-LE-NEXT: vmrghb v3, v4, v3
-; CHECK-PWR9-LE-NEXT: mtvsrd v4, r27
-; CHECK-PWR9-LE-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-PWR9-LE-NEXT: vmrghb v4, v5, v4
-; CHECK-PWR9-LE-NEXT: vmrglh v3, v4, v3
-; CHECK-PWR9-LE-NEXT: xxmrglw vs1, v3, v2
-; CHECK-PWR9-LE-NEXT: xxmrgld v2, vs1, vs0
-; CHECK-PWR9-LE-NEXT: blr
-;
-; CHECK-PWR9-BE-LABEL: sub_absv_8_ext:
-; CHECK-PWR9-BE: # %bb.0: # %entry
-; CHECK-PWR9-BE-NEXT: li r3, 0
-; CHECK-PWR9-BE-NEXT: li r4, 1
-; CHECK-PWR9-BE-NEXT: li r5, 2
-; CHECK-PWR9-BE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-PWR9-BE-NEXT: vextublx r6, r3, v2
-; CHECK-PWR9-BE-NEXT: vextublx r3, r3, v3
-; CHECK-PWR9-BE-NEXT: vextublx r7, r4, v2
-; CHECK-PWR9-BE-NEXT: vextublx r4, r4, v3
-; CHECK-PWR9-BE-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-PWR9-BE-NEXT: std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-PWR9-BE-NEXT: std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-PWR9-BE-NEXT: std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-PWR9-BE-NEXT: std r25, -56(r1) # 8-byte Folded Spill
-; CHECK-PWR9-BE-NEXT: clrlwi r6, r6, 24
-; CHECK-PWR9-BE-NEXT: clrlwi r3, r3, 24
-; CHECK-PWR9-BE-NEXT: clrlwi r7, r7, 24
-; CHECK-PWR9-BE-NEXT: clrlwi r4, r4, 24
-; CHECK-PWR9-BE-NEXT: vextublx r8, r5, v2
-; CHECK-PWR9-BE-NEXT: vextublx r5, r5, v3
-; CHECK-PWR9-BE-NEXT: sub r3, r6, r3
-; CHECK-PWR9-BE-NEXT: sub r4, r7, r4
-; CHECK-PWR9-BE-NEXT: clrlwi r8, r8, 24
-; CHECK-PWR9-BE-NEXT: clrlwi r5, r5, 24
-; CHECK-PWR9-BE-NEXT: sub r5, r8, r5
-; CHECK-PWR9-BE-NEXT: srawi r6, r3, 31
-; CHECK-PWR9-BE-NEXT: srawi r7, r4, 31
-; CHECK-PWR9-BE-NEXT: srawi r8, r5, 31
-; CHECK-PWR9-BE-NEXT: xor r3, r3, r6
-; CHECK-PWR9-BE-NEXT: xor r4, r4, r7
-; CHECK-PWR9-BE-NEXT: xor r5, r5, r8
-; CHECK-PWR9-BE-NEXT: sub r3, r3, r6
-; CHECK-PWR9-BE-NEXT: li r6, 3
-; CHECK-PWR9-BE-NEXT: sub r4, r4, r7
-; CHECK-PWR9-BE-NEXT: sub r5, r5, r8
-; CHECK-PWR9-BE-NEXT: vextublx r7, r6, v2
-; CHECK-PWR9-BE-NEXT: vextublx r6, r6, v3
-; CHECK-PWR9-BE-NEXT: clrlwi r7, r7, 24
-; CHECK-PWR9-BE-NEXT: clrlwi r6, r6, 24
-; CHECK-PWR9-BE-NEXT: sub r6, r7, r6
-; CHECK-PWR9-BE-NEXT: srawi r7, r6, 31
-; CHECK-PWR9-BE-NEXT: xor r6, r6, r7
-; CHECK-PWR9-BE-NEXT: sub r6, r6, r7
-; CHECK-PWR9-BE-NEXT: li r7, 4
-; CHECK-PWR9-BE-NEXT: vextublx r8, r7, v2
-; CHECK-PWR9-BE-NEXT: vextublx r7, r7, v3
-; CHECK-PWR9-BE-NEXT: clrlwi r8, r8, 24
-; CHECK-PWR9-BE-NEXT: clrlwi r7, r7, 24
-; CHECK-PWR9-BE-NEXT: sub r7, r8, r7
-; CHECK-PWR9-BE-NEXT: srawi r8, r7, 31
-; CHECK-PWR9-BE-NEXT: xor r7, r7, r8
-; CHECK-PWR9-BE-NEXT: sub r7, r7, r8
-; CHECK-PWR9-BE-NEXT: li r8, 5
-; CHECK-PWR9-BE-NEXT: vextublx r9, r8, v2
-; CHECK-PWR9-BE-NEXT: vextublx r8, r8, v3
-; CHECK-PWR9-BE-NEXT: clrlwi r9, r9, 24
-; CHECK-PWR9-BE-NEXT: clrlwi r8, r8, 24
-; CHECK-PWR9-BE-NEXT: sub r8, r9, r8
-; CHECK-PWR9-BE-NEXT: srawi r9, r8, 31
-; CHECK-PWR9-BE-NEXT: xor r8, r8, r9
-; CHECK-PWR9-BE-NEXT: sub r8, r8, r9
-; CHECK-PWR9-BE-NEXT: li r9, 6
-; CHECK-PWR9-BE-NEXT: vextublx r10, r9, v2
-; CHECK-PWR9-BE-NEXT: vextublx r9, r9, v3
-; CHECK-PWR9-BE-NEXT: clrlwi r10, r10, 24
-; CHECK-PWR9-BE-NEXT: clrlwi r9, r9, 24
-; CHECK-PWR9-BE-NEXT: sub r9, r10, r9
-; CHECK-PWR9-BE-NEXT: srawi r10, r9, 31
-; CHECK-PWR9-BE-NEXT: xor r9, r9, r10
-; CHECK-PWR9-BE-NEXT: sub r9, r9, r10
-; CHECK-PWR9-BE-NEXT: li r10, 7
-; CHECK-PWR9-BE-NEXT: vextublx r11, r10, v2
-; CHECK-PWR9-BE-NEXT: vextublx r10, r10, v3
-; CHECK-PWR9-BE-NEXT: mtfprwz f2, r9
-; CHECK-PWR9-BE-NEXT: clrlwi r11, r11, 24
-; CHECK-PWR9-BE-NEXT: clrlwi r10, r10, 24
-; CHECK-PWR9-BE-NEXT: sub r10, r11, r10
-; CHECK-PWR9-BE-NEXT: srawi r11, r10, 31
-; CHECK-PWR9-BE-NEXT: xor r10, r10, r11
-; CHECK-PWR9-BE-NEXT: sub r10, r10, r11
-; CHECK-PWR9-BE-NEXT: li r11, 8
-; CHECK-PWR9-BE-NEXT: vextublx r12, r11, v2
-; CHECK-PWR9-BE-NEXT: vextublx r11, r11, v3
-; CHECK-PWR9-BE-NEXT: clrlwi r12, r12, 24
-; CHECK-PWR9-BE-NEXT: clrlwi r11, r11, 24
-; CHECK-PWR9-BE-NEXT: sub r11, r12, r11
-; CHECK-PWR9-BE-NEXT: srawi r12, r11, 31
-; CHECK-PWR9-BE-NEXT: xor r11, r11, r12
-; CHECK-PWR9-BE-NEXT: sub r11, r11, r12
-; CHECK-PWR9-BE-NEXT: li r12, 9
-; CHECK-PWR9-BE-NEXT: vextublx r0, r12, v2
-; CHECK-PWR9-BE-NEXT: vextublx r12, r12, v3
-; CHECK-PWR9-BE-NEXT: clrlwi r0, r0, 24
-; CHECK-PWR9-BE-NEXT: clrlwi r12, r12, 24
-; CHECK-PWR9-BE-NEXT: sub r12, r0, r12
-; CHECK-PWR9-BE-NEXT: srawi r0, r12, 31
-; CHECK-PWR9-BE-NEXT: xor r12, r12, r0
-; CHECK-PWR9-BE-NEXT: sub r12, r12, r0
-; CHECK-PWR9-BE-NEXT: li r0, 10
-; CHECK-PWR9-BE-NEXT: vextublx r30, r0, v2
-; CHECK-PWR9-BE-NEXT: vextublx r0, r0, v3
-; CHECK-PWR9-BE-NEXT: mtvsrwz v4, r12
-; CHECK-PWR9-BE-NEXT: clrlwi r30, r30, 24
-; CHECK-PWR9-BE-NEXT: clrlwi r0, r0, 24
-; CHECK-PWR9-BE-NEXT: sub r0, r30, r0
-; CHECK-PWR9-BE-NEXT: srawi r30, r0, 31
-; CHECK-PWR9-BE-NEXT: xor r0, r0, r30
-; CHECK-PWR9-BE-NEXT: sub r0, r0, r30
-; CHECK-PWR9-BE-NEXT: li r30, 11
-; CHECK-PWR9-BE-NEXT: vextublx r29, r30, v2
-; CHECK-PWR9-BE-NEXT: vextublx r30, r30, v3
-; CHECK-PWR9-BE-NEXT: clrlwi r29, r29, 24
-; CHECK-PWR9-BE-NEXT: clrlwi r30, r30, 24
-; CHECK-PWR9-BE-NEXT: sub r30, r29, r30
-; CHECK-PWR9-BE-NEXT: srawi r29, r30, 31
-; CHECK-PWR9-BE-NEXT: xor r30, r30, r29
-; CHECK-PWR9-BE-NEXT: sub r30, r30, r29
-; CHECK-PWR9-BE-NEXT: li r29, 12
-; CHECK-PWR9-BE-NEXT: vextublx r28, r29, v2
-; CHECK-PWR9-BE-NEXT: vextublx r29, r29, v3
-; CHECK-PWR9-BE-NEXT: clrlwi r28, r28, 24
-; CHECK-PWR9-BE-NEXT: clrlwi r29, r29, 24
-; CHECK-PWR9-BE-NEXT: sub r29, r28, r29
-; CHECK-PWR9-BE-NEXT: srawi r28, r29, 31
-; CHECK-PWR9-BE-NEXT: xor r29, r29, r28
-; CHECK-PWR9-BE-NEXT: sub r29, r29, r28
-; CHECK-PWR9-BE-NEXT: li r28, 13
-; CHECK-PWR9-BE-NEXT: vextublx r27, r28, v2
-; CHECK-PWR9-BE-NEXT: vextublx r28, r28, v3
-; CHECK-PWR9-BE-NEXT: clrlwi r27, r27, 24
-; CHECK-PWR9-BE-NEXT: clrlwi r28, r28, 24
-; CHECK-PWR9-BE-NEXT: sub r28, r27, r28
-; CHECK-PWR9-BE-NEXT: srawi r27, r28, 31
-; CHECK-PWR9-BE-NEXT: xor r28, r28, r27
-; CHECK-PWR9-BE-NEXT: sub r28, r28, r27
-; CHECK-PWR9-BE-NEXT: li r27, 14
-; CHECK-PWR9-BE-NEXT: vextublx r26, r27, v2
-; CHECK-PWR9-BE-NEXT: vextublx r27, r27, v3
-; CHECK-PWR9-BE-NEXT: clrlwi r26, r26, 24
-; CHECK-PWR9-BE-NEXT: clrlwi r27, r27, 24
-; CHECK-PWR9-BE-NEXT: sub r27, r26, r27
-; CHECK-PWR9-BE-NEXT: srawi r26, r27, 31
-; CHECK-PWR9-BE-NEXT: xor r27, r27, r26
-; CHECK-PWR9-BE-NEXT: sub r27, r27, r26
-; CHECK-PWR9-BE-NEXT: li r26, 15
-; CHECK-PWR9-BE-NEXT: vextublx r25, r26, v2
-; CHECK-PWR9-BE-NEXT: vextublx r26, r26, v3
-; CHECK-PWR9-BE-NEXT: mtfprwz f0, r27
-; CHECK-PWR9-BE-NEXT: addis r27, r2, .LCPI9_0@toc@ha
-; CHECK-PWR9-BE-NEXT: mtvsrwz v3, r28
-; CHECK-PWR9-BE-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
-; CHECK-PWR9-BE-NEXT: addi r27, r27, .LCPI9_0@toc@l
-; CHECK-PWR9-BE-NEXT: clrlwi r25, r25, 24
-; CHECK-PWR9-BE-NEXT: clrlwi r26, r26, 24
-; CHECK-PWR9-BE-NEXT: lxv vs1, 0(r27)
-; CHECK-PWR9-BE-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-PWR9-BE-NEXT: sub r26, r25, r26
-; CHECK-PWR9-BE-NEXT: srawi r25, r26, 31
-; CHECK-PWR9-BE-NEXT: xor r26, r26, r25
-; CHECK-PWR9-BE-NEXT: sub r26, r26, r25
-; CHECK-PWR9-BE-NEXT: ld r25, -56(r1) # 8-byte Folded Reload
-; CHECK-PWR9-BE-NEXT: mtvsrwz v2, r26
-; CHECK-PWR9-BE-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-PWR9-BE-NEXT: xxperm v2, vs0, vs1
-; CHECK-PWR9-BE-NEXT: mtfprwz f0, r29
-; CHECK-PWR9-BE-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-PWR9-BE-NEXT: xxperm v3, vs0, vs1
-; CHECK-PWR9-BE-NEXT: mtfprwz f0, r0
-; CHECK-PWR9-BE-NEXT: vmrghh v2, v3, v2
-; CHECK-PWR9-BE-NEXT: mtvsrwz v3, r30
-; CHECK-PWR9-BE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-PWR9-BE-NEXT: xxperm v3, vs0, vs1
-; CHECK-PWR9-BE-NEXT: mtfprwz f0, r11
-; CHECK-PWR9-BE-NEXT: xxperm v4, vs0, vs1
-; CHECK-PWR9-BE-NEXT: vmrghh v3, v4, v3
-; CHECK-PWR9-BE-NEXT: mtvsrwz v4, r4
-; CHECK-PWR9-BE-NEXT: xxmrghw vs0, v3, v2
-; CHECK-PWR9-BE-NEXT: mtvsrwz v2, r10
-; CHECK-PWR9-BE-NEXT: mtvsrwz v3, r8
-; CHECK-PWR9-BE-NEXT: xxperm v2, vs2, vs1
-; CHECK-PWR9-BE-NEXT: mtfprwz f2, r7
-; CHECK-PWR9-BE-NEXT: xxperm v3, vs2, vs1
-; CHECK-PWR9-BE-NEXT: mtfprwz f2, r5
-; CHECK-PWR9-BE-NEXT: vmrghh v2, v3, v2
-; CHECK-PWR9-BE-NEXT: mtvsrwz v3, r6
-; CHECK-PWR9-BE-NEXT: xxperm v3, vs2, vs1
-; CHECK-PWR9-BE-NEXT: mtfprwz f2, r3
-; CHECK-PWR9-BE-NEXT: xxperm v4, vs2, vs1
-; CHECK-PWR9-BE-NEXT: vmrghh v3, v4, v3
-; CHECK-PWR9-BE-NEXT: xxmrghw vs1, v3, v2
-; CHECK-PWR9-BE-NEXT: xxmrghd v2, vs1, vs0
-; CHECK-PWR9-BE-NEXT: blr
-;
-; CHECK-PWR8-LABEL: sub_absv_8_ext:
-; CHECK-PWR8: # %bb.0: # %entry
-; CHECK-PWR8-NEXT: xxswapd vs0, v2
-; CHECK-PWR8-NEXT: xxswapd vs1, v3
-; CHECK-PWR8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT: std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT: std r26, -48(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT: mffprd r11, f0
-; CHECK-PWR8-NEXT: mffprd r8, f1
-; CHECK-PWR8-NEXT: std r27, -40(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT: std r25, -56(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT: clrldi r3, r11, 56
-; CHECK-PWR8-NEXT: clrldi r4, r8, 56
-; CHECK-PWR8-NEXT: rldicl r5, r11, 56, 56
-; CHECK-PWR8-NEXT: rldicl r6, r8, 56, 56
-; CHECK-PWR8-NEXT: rldicl r7, r11, 48, 56
-; CHECK-PWR8-NEXT: rldicl r9, r8, 48, 56
-; CHECK-PWR8-NEXT: rldicl r0, r11, 32, 56
-; CHECK-PWR8-NEXT: rldicl r30, r8, 32, 56
-; CHECK-PWR8-NEXT: rldicl r29, r11, 24, 56
-; CHECK-PWR8-NEXT: rldicl r28, r8, 24, 56
-; CHECK-PWR8-NEXT: rldicl r10, r11, 40, 56
-; CHECK-PWR8-NEXT: rldicl r12, r8, 40, 56
-; CHECK-PWR8-NEXT: rldicl r27, r11, 16, 56
-; CHECK-PWR8-NEXT: rldicl r11, r11, 8, 56
-; CHECK-PWR8-NEXT: std r24, -64(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT: clrlwi r3, r3, 24
-; CHECK-PWR8-NEXT: clrlwi r4, r4, 24
-; CHECK-PWR8-NEXT: clrlwi r5, r5, 24
-; CHECK-PWR8-NEXT: clrlwi r6, r6, 24
-; CHECK-PWR8-NEXT: clrlwi r7, r7, 24
-; CHECK-PWR8-NEXT: clrlwi r9, r9, 24
-; CHECK-PWR8-NEXT: sub r3, r3, r4
-; CHECK-PWR8-NEXT: clrlwi r0, r0, 24
-; CHECK-PWR8-NEXT: clrlwi r30, r30, 24
-; CHECK-PWR8-NEXT: sub r4, r5, r6
-; CHECK-PWR8-NEXT: sub r5, r7, r9
-; CHECK-PWR8-NEXT: clrlwi r29, r29, 24
-; CHECK-PWR8-NEXT: clrlwi r28, r28, 24
-; CHECK-PWR8-NEXT: sub r7, r0, r30
-; CHECK-PWR8-NEXT: sub r9, r29, r28
-; CHECK-PWR8-NEXT: clrlwi r10, r10, 24
-; CHECK-PWR8-NEXT: clrlwi r12, r12, 24
-; CHECK-PWR8-NEXT: sub r6, r10, r12
-; CHECK-PWR8-NEXT: clrlwi r27, r27, 24
-; CHECK-PWR8-NEXT: clrlwi r11, r11, 24
-; CHECK-PWR8-NEXT: srawi r0, r5, 31
-; CHECK-PWR8-NEXT: srawi r29, r7, 31
-; CHECK-PWR8-NEXT: srawi r12, r4, 31
-; CHECK-PWR8-NEXT: srawi r28, r9, 31
-; CHECK-PWR8-NEXT: srawi r30, r6, 31
-; CHECK-PWR8-NEXT: srawi r10, r3, 31
-; CHECK-PWR8-NEXT: xor r5, r5, r0
-; CHECK-PWR8-NEXT: xor r26, r7, r29
-; CHECK-PWR8-NEXT: sub r7, r5, r0
-; CHECK-PWR8-NEXT: rldicl r5, r8, 16, 56
-; CHECK-PWR8-NEXT: rldicl r8, r8, 8, 56
-; CHECK-PWR8-NEXT: xor r4, r4, r12
-; CHECK-PWR8-NEXT: xor r25, r9, r28
-; CHECK-PWR8-NEXT: sub r9, r4, r12
-; CHECK-PWR8-NEXT: sub r4, r26, r29
-...
[truncated]
|
ping? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. I don't think preserving the old test is necessary.
The scalarized IR was written before improvements to SLP / cost models ensured that the abs intrinsic was easily vectorizable
opt -O3 : https://zig.godbolt.org/z/39T65vh8M
Now that it is we need a more useful llc test