4
4
define arm_aapcs_vfpcc <8 x i16 > @zext_unscaled_i8_i16 (i8* %base , <8 x i16 >* %offptr ) {
5
5
; CHECK-LABEL: zext_unscaled_i8_i16:
6
6
; CHECK: @ %bb.0: @ %entry
7
- ; CHECK-NEXT: .save {r4, r5, r7, lr}
8
- ; CHECK-NEXT: push {r4, r5, r7, lr}
9
- ; CHECK-NEXT: vldrh.u32 q0, [r1]
10
- ; CHECK-NEXT: vldrh.u32 q1, [r1, #8]
11
- ; CHECK-NEXT: vadd.i32 q0, q0, r0
12
- ; CHECK-NEXT: vadd.i32 q1, q1, r0
13
- ; CHECK-NEXT: vmov r2, s3
14
- ; CHECK-NEXT: vmov r3, s1
15
- ; CHECK-NEXT: vmov r5, s0
16
- ; CHECK-NEXT: vmov r0, s4
17
- ; CHECK-NEXT: vmov r1, s5
18
- ; CHECK-NEXT: vmov r4, s7
19
- ; CHECK-NEXT: ldrb.w r12, [r2]
20
- ; CHECK-NEXT: vmov r2, s2
21
- ; CHECK-NEXT: ldrb.w lr, [r3]
22
- ; CHECK-NEXT: vmov r3, s6
23
- ; CHECK-NEXT: ldrb r5, [r5]
24
- ; CHECK-NEXT: ldrb r0, [r0]
25
- ; CHECK-NEXT: vmov.16 q0[0], r5
26
- ; CHECK-NEXT: ldrb r1, [r1]
27
- ; CHECK-NEXT: vmov.16 q0[1], lr
28
- ; CHECK-NEXT: ldrb r4, [r4]
29
- ; CHECK-NEXT: ldrb r2, [r2]
30
- ; CHECK-NEXT: ldrb r3, [r3]
31
- ; CHECK-NEXT: vmov.16 q0[2], r2
32
- ; CHECK-NEXT: vmov.16 q0[3], r12
33
- ; CHECK-NEXT: vmov.16 q0[4], r0
34
- ; CHECK-NEXT: vmov.16 q0[5], r1
35
- ; CHECK-NEXT: vmov.16 q0[6], r3
36
- ; CHECK-NEXT: vmov.16 q0[7], r4
37
- ; CHECK-NEXT: vmovlb.u8 q0, q0
38
- ; CHECK-NEXT: pop {r4, r5, r7, pc}
7
+ ; CHECK-NEXT: vldrh.u16 q1, [r1]
8
+ ; CHECK-NEXT: vldrb.u16 q0, [r0, q1]
9
+ ; CHECK-NEXT: bx lr
39
10
entry:
40
11
%offs = load <8 x i16 >, <8 x i16 >* %offptr , align 2
41
12
%offs.zext = zext <8 x i16 > %offs to <8 x i32 >
@@ -48,38 +19,9 @@ entry:
48
19
define arm_aapcs_vfpcc <8 x i16 > @sext_unscaled_i8_i16 (i8* %base , <8 x i16 >* %offptr ) {
49
20
; CHECK-LABEL: sext_unscaled_i8_i16:
50
21
; CHECK: @ %bb.0: @ %entry
51
- ; CHECK-NEXT: .save {r4, r5, r7, lr}
52
- ; CHECK-NEXT: push {r4, r5, r7, lr}
53
- ; CHECK-NEXT: vldrh.u32 q0, [r1]
54
- ; CHECK-NEXT: vldrh.u32 q1, [r1, #8]
55
- ; CHECK-NEXT: vadd.i32 q0, q0, r0
56
- ; CHECK-NEXT: vadd.i32 q1, q1, r0
57
- ; CHECK-NEXT: vmov r2, s3
58
- ; CHECK-NEXT: vmov r3, s1
59
- ; CHECK-NEXT: vmov r5, s0
60
- ; CHECK-NEXT: vmov r0, s4
61
- ; CHECK-NEXT: vmov r1, s5
62
- ; CHECK-NEXT: vmov r4, s7
63
- ; CHECK-NEXT: ldrb.w r12, [r2]
64
- ; CHECK-NEXT: vmov r2, s2
65
- ; CHECK-NEXT: ldrb.w lr, [r3]
66
- ; CHECK-NEXT: vmov r3, s6
67
- ; CHECK-NEXT: ldrb r5, [r5]
68
- ; CHECK-NEXT: ldrb r0, [r0]
69
- ; CHECK-NEXT: vmov.16 q0[0], r5
70
- ; CHECK-NEXT: ldrb r1, [r1]
71
- ; CHECK-NEXT: vmov.16 q0[1], lr
72
- ; CHECK-NEXT: ldrb r4, [r4]
73
- ; CHECK-NEXT: ldrb r2, [r2]
74
- ; CHECK-NEXT: ldrb r3, [r3]
75
- ; CHECK-NEXT: vmov.16 q0[2], r2
76
- ; CHECK-NEXT: vmov.16 q0[3], r12
77
- ; CHECK-NEXT: vmov.16 q0[4], r0
78
- ; CHECK-NEXT: vmov.16 q0[5], r1
79
- ; CHECK-NEXT: vmov.16 q0[6], r3
80
- ; CHECK-NEXT: vmov.16 q0[7], r4
81
- ; CHECK-NEXT: vmovlb.s8 q0, q0
82
- ; CHECK-NEXT: pop {r4, r5, r7, pc}
22
+ ; CHECK-NEXT: vldrh.u16 q1, [r1]
23
+ ; CHECK-NEXT: vldrb.s16 q0, [r0, q1]
24
+ ; CHECK-NEXT: bx lr
83
25
entry:
84
26
%offs = load <8 x i16 >, <8 x i16 >* %offptr , align 2
85
27
%offs.zext = zext <8 x i16 > %offs to <8 x i32 >
@@ -122,38 +64,9 @@ entry:
122
64
define arm_aapcs_vfpcc <8 x i16 > @zext_unsigned_unscaled_i8_i8 (i8* %base , <8 x i8 >* %offptr ) {
123
65
; CHECK-LABEL: zext_unsigned_unscaled_i8_i8:
124
66
; CHECK: @ %bb.0: @ %entry
125
- ; CHECK-NEXT: .save {r4, r5, r7, lr}
126
- ; CHECK-NEXT: push {r4, r5, r7, lr}
127
- ; CHECK-NEXT: vldrb.u32 q0, [r1]
128
- ; CHECK-NEXT: vldrb.u32 q1, [r1, #4]
129
- ; CHECK-NEXT: vadd.i32 q0, q0, r0
130
- ; CHECK-NEXT: vadd.i32 q1, q1, r0
131
- ; CHECK-NEXT: vmov r2, s3
132
- ; CHECK-NEXT: vmov r3, s1
133
- ; CHECK-NEXT: vmov r5, s0
134
- ; CHECK-NEXT: vmov r0, s4
135
- ; CHECK-NEXT: vmov r1, s5
136
- ; CHECK-NEXT: vmov r4, s7
137
- ; CHECK-NEXT: ldrb.w r12, [r2]
138
- ; CHECK-NEXT: vmov r2, s2
139
- ; CHECK-NEXT: ldrb.w lr, [r3]
140
- ; CHECK-NEXT: vmov r3, s6
141
- ; CHECK-NEXT: ldrb r5, [r5]
142
- ; CHECK-NEXT: ldrb r0, [r0]
143
- ; CHECK-NEXT: vmov.16 q0[0], r5
144
- ; CHECK-NEXT: ldrb r1, [r1]
145
- ; CHECK-NEXT: vmov.16 q0[1], lr
146
- ; CHECK-NEXT: ldrb r4, [r4]
147
- ; CHECK-NEXT: ldrb r2, [r2]
148
- ; CHECK-NEXT: ldrb r3, [r3]
149
- ; CHECK-NEXT: vmov.16 q0[2], r2
150
- ; CHECK-NEXT: vmov.16 q0[3], r12
151
- ; CHECK-NEXT: vmov.16 q0[4], r0
152
- ; CHECK-NEXT: vmov.16 q0[5], r1
153
- ; CHECK-NEXT: vmov.16 q0[6], r3
154
- ; CHECK-NEXT: vmov.16 q0[7], r4
155
- ; CHECK-NEXT: vmovlb.u8 q0, q0
156
- ; CHECK-NEXT: pop {r4, r5, r7, pc}
67
+ ; CHECK-NEXT: vldrb.u16 q1, [r1]
68
+ ; CHECK-NEXT: vldrb.u16 q0, [r0, q1]
69
+ ; CHECK-NEXT: bx lr
157
70
entry:
158
71
%offs = load <8 x i8 >, <8 x i8 >* %offptr , align 1
159
72
%offs.zext = zext <8 x i8 > %offs to <8 x i32 >
@@ -166,38 +79,9 @@ entry:
166
79
define arm_aapcs_vfpcc <8 x i16 > @sext_unsigned_unscaled_i8_i8 (i8* %base , <8 x i8 >* %offptr ) {
167
80
; CHECK-LABEL: sext_unsigned_unscaled_i8_i8:
168
81
; CHECK: @ %bb.0: @ %entry
169
- ; CHECK-NEXT: .save {r4, r5, r7, lr}
170
- ; CHECK-NEXT: push {r4, r5, r7, lr}
171
- ; CHECK-NEXT: vldrb.u32 q0, [r1]
172
- ; CHECK-NEXT: vldrb.u32 q1, [r1, #4]
173
- ; CHECK-NEXT: vadd.i32 q0, q0, r0
174
- ; CHECK-NEXT: vadd.i32 q1, q1, r0
175
- ; CHECK-NEXT: vmov r2, s3
176
- ; CHECK-NEXT: vmov r3, s1
177
- ; CHECK-NEXT: vmov r5, s0
178
- ; CHECK-NEXT: vmov r0, s4
179
- ; CHECK-NEXT: vmov r1, s5
180
- ; CHECK-NEXT: vmov r4, s7
181
- ; CHECK-NEXT: ldrb.w r12, [r2]
182
- ; CHECK-NEXT: vmov r2, s2
183
- ; CHECK-NEXT: ldrb.w lr, [r3]
184
- ; CHECK-NEXT: vmov r3, s6
185
- ; CHECK-NEXT: ldrb r5, [r5]
186
- ; CHECK-NEXT: ldrb r0, [r0]
187
- ; CHECK-NEXT: vmov.16 q0[0], r5
188
- ; CHECK-NEXT: ldrb r1, [r1]
189
- ; CHECK-NEXT: vmov.16 q0[1], lr
190
- ; CHECK-NEXT: ldrb r4, [r4]
191
- ; CHECK-NEXT: ldrb r2, [r2]
192
- ; CHECK-NEXT: ldrb r3, [r3]
193
- ; CHECK-NEXT: vmov.16 q0[2], r2
194
- ; CHECK-NEXT: vmov.16 q0[3], r12
195
- ; CHECK-NEXT: vmov.16 q0[4], r0
196
- ; CHECK-NEXT: vmov.16 q0[5], r1
197
- ; CHECK-NEXT: vmov.16 q0[6], r3
198
- ; CHECK-NEXT: vmov.16 q0[7], r4
199
- ; CHECK-NEXT: vmovlb.s8 q0, q0
200
- ; CHECK-NEXT: pop {r4, r5, r7, pc}
82
+ ; CHECK-NEXT: vldrb.u16 q1, [r1]
83
+ ; CHECK-NEXT: vldrb.s16 q0, [r0, q1]
84
+ ; CHECK-NEXT: bx lr
201
85
entry:
202
86
%offs = load <8 x i8 >, <8 x i8 >* %offptr , align 1
203
87
%offs.zext = zext <8 x i8 > %offs to <8 x i32 >
0 commit comments