@@ -193,15 +193,16 @@ AES_ENTRY(aes_cbc_encrypt)
193
193
cbz w6 , .Lcbcencloop
194
194
195
195
ld1 {v0.16b} , [ x5 ] / * get iv * /
196
- enc_prepare w3 , x2 , x5
196
+ enc_prepare w3 , x2 , x6
197
197
198
198
.Lcbcencloop:
199
199
ld1 {v1.16b} , [ x1 ], # 16 / * get next pt block * /
200
200
eor v0.16b , v0.16b , v1.16b / * .. and xor with iv * /
201
- encrypt_block v0 , w3 , x2 , x5 , w6
201
+ encrypt_block v0 , w3 , x2 , x6 , w7
202
202
st1 {v0.16b} , [ x0 ], # 16
203
203
subs w4 , w4 , # 1
204
204
bne .Lcbcencloop
205
+ st1 {v0.16b} , [ x5 ] / * return iv * /
205
206
ret
206
207
AES_ENDPROC(aes_cbc_encrypt)
207
208
@@ -211,7 +212,7 @@ AES_ENTRY(aes_cbc_decrypt)
211
212
cbz w6 , .LcbcdecloopNx
212
213
213
214
ld1 {v7.16b} , [ x5 ] / * get iv * /
214
- dec_prepare w3 , x2 , x5
215
+ dec_prepare w3 , x2 , x6
215
216
216
217
.LcbcdecloopNx:
217
218
#if INTERLEAVE >= 2
@@ -248,14 +249,15 @@ AES_ENTRY(aes_cbc_decrypt)
248
249
.Lcbcdecloop:
249
250
ld1 {v1.16b} , [ x1 ], # 16 / * get next ct block * /
250
251
mov v0.16b , v1.16b / * ... and copy to v0 * /
251
- decrypt_block v0 , w3 , x2 , x5 , w6
252
+ decrypt_block v0 , w3 , x2 , x6 , w7
252
253
eor v0.16b , v0.16b , v7.16b / * xor with iv => pt * /
253
254
mov v7.16b , v1.16b / * ct is next iv * /
254
255
st1 {v0.16b} , [ x0 ], # 16
255
256
subs w4 , w4 , # 1
256
257
bne .Lcbcdecloop
257
258
.Lcbcdecout:
258
259
FRAME_POP
260
+ st1 {v7.16b} , [ x5 ] / * return iv * /
259
261
ret
260
262
AES_ENDPROC(aes_cbc_decrypt)
261
263
@@ -267,36 +269,27 @@ AES_ENDPROC(aes_cbc_decrypt)
267
269
268
270
AES_ENTRY(aes_ctr_encrypt)
269
271
FRAME_PUSH
270
- cbnz w6 , .Lctrfirst / * 1st time around? * /
271
- umov x5 , v4.d [ 1 ] / * keep swabbed ctr in reg * /
272
- rev x5 , x5
273
- #if INTERLEAVE >= 2
274
- cmn w5 , w4 / * 32 bit overflow? * /
275
- bcs .Lctrinc
276
- add x5 , x5 , # 1 / * increment BE ctr * /
277
- b .LctrincNx
278
- #else
279
- b .Lctrinc
280
- #endif
281
- .Lctrfirst:
272
+ cbz w6 , .Lctrnotfirst / * 1st time around? * /
282
273
enc_prepare w3 , x2 , x6
283
274
ld1 {v4.16b} , [ x5 ]
284
- umov x5 , v4.d [ 1 ] / * keep swabbed ctr in reg * /
285
- rev x5 , x5
275
+
276
+ .Lctrnotfirst:
277
+ umov x8 , v4.d [ 1 ] / * keep swabbed ctr in reg * /
278
+ rev x8 , x8
286
279
#if INTERLEAVE >= 2
287
- cmn w5 , w4 / * 32 bit overflow? * /
280
+ cmn w8 , w4 / * 32 bit overflow? * /
288
281
bcs .Lctrloop
289
282
.LctrloopNx:
290
283
subs w4 , w4 , #INTERLEAVE
291
284
bmi .Lctr1x
292
285
#if INTERLEAVE == 2
293
286
mov v0.8b , v4.8b
294
287
mov v1.8b , v4.8b
295
- rev x7 , x5
296
- add x5 , x5 , # 1
288
+ rev x7 , x8
289
+ add x8 , x8 , # 1
297
290
ins v0.d [ 1 ], x7
298
- rev x7 , x5
299
- add x5 , x5 , # 1
291
+ rev x7 , x8
292
+ add x8 , x8 , # 1
300
293
ins v1.d [ 1 ], x7
301
294
ld1 {v2.16b - v3.16b} , [ x1 ], # 32 / * get 2 input blocks * /
302
295
do_encrypt_block2x
@@ -305,7 +298,7 @@ AES_ENTRY(aes_ctr_encrypt)
305
298
st1 {v0.16b - v1.16b} , [ x0 ], # 32
306
299
#else
307
300
ldr q8 , = 0x30000000200000001 / * addends 1 , 2 , 3 [, 0 ] * /
308
- dup v7.4s , w5
301
+ dup v7.4s , w8
309
302
mov v0.16b , v4.16b
310
303
add v7.4s , v7.4s , v8.4s
311
304
mov v1.16b , v4.16b
@@ -323,49 +316,52 @@ AES_ENTRY(aes_ctr_encrypt)
323
316
eor v2.16b , v7.16b , v2.16b
324
317
eor v3.16b , v5.16b , v3.16b
325
318
st1 {v0.16b - v3.16b} , [ x0 ], # 64
326
- add x5 , x5 , #INTERLEAVE
319
+ add x8 , x8 , #INTERLEAVE
327
320
#endif
328
- cbz w4 , .LctroutNx
329
- .LctrincNx:
330
- rev x7 , x5
321
+ rev x7 , x8
331
322
ins v4.d [ 1 ], x7
323
+ cbz w4 , .Lctrout
332
324
b .LctrloopNx
333
- .LctroutNx:
334
- sub x5 , x5 , # 1
335
- rev x7 , x5
336
- ins v4.d [ 1 ], x7
337
- b .Lctrout
338
325
.Lctr1x:
339
326
adds w4 , w4 , #INTERLEAVE
340
327
beq .Lctrout
341
328
#endif
342
329
.Lctrloop:
343
330
mov v0.16b , v4.16b
344
331
encrypt_block v0 , w3 , x2 , x6 , w7
332
+
333
+ adds x8 , x8 , # 1 / * increment BE ctr * /
334
+ rev x7 , x8
335
+ ins v4.d [ 1 ], x7
336
+ bcs .Lctrcarry / * overflow? * /
337
+
338
+ .Lctrcarrydone:
345
339
subs w4 , w4 , # 1
346
340
bmi .Lctrhalfblock / * blocks < 0 means 1 / 2 block * /
347
341
ld1 {v3.16b} , [ x1 ], # 16
348
342
eor v3.16b , v0.16b , v3.16b
349
343
st1 {v3.16b} , [ x0 ], # 16
350
- beq .Lctrout
351
- .Lctrinc:
352
- adds x5 , x5 , # 1 / * increment BE ctr * /
353
- rev x7 , x5
354
- ins v4.d [ 1 ], x7
355
- bcc .Lctrloop / * no overflow? * /
356
- umov x7 , v4.d [ 0 ] / * load upper word of ctr * /
357
- rev x7 , x7 / * ... to handle the carry * /
358
- add x7 , x7 , # 1
359
- rev x7 , x7
360
- ins v4.d [ 0 ], x7
361
- b .Lctrloop
344
+ bne .Lctrloop
345
+
346
+ .Lctrout:
347
+ st1 {v4.16b} , [ x5 ] / * return next CTR value * /
348
+ FRAME_POP
349
+ ret
350
+
362
351
.Lctrhalfblock:
363
352
ld1 {v3.8b} , [ x1 ]
364
353
eor v3.8b , v0.8b , v3.8b
365
354
st1 {v3.8b} , [ x0 ]
366
- .Lctrout:
367
355
FRAME_POP
368
356
ret
357
+
358
+ .Lctrcarry:
359
+ umov x7 , v4.d [ 0 ] / * load upper word of ctr * /
360
+ rev x7 , x7 / * ... to handle the carry * /
361
+ add x7 , x7 , # 1
362
+ rev x7 , x7
363
+ ins v4.d [ 0 ], x7
364
+ b .Lctrcarrydone
369
365
AES_ENDPROC(aes_ctr_encrypt)
370
366
.ltorg
371
367
0 commit comments