Skip to content

Commit dd597fb

Browse files
Ard Biesheuvelherbertx
authored andcommitted
crypto: arm64/aes-blk - add support for CTS-CBC mode
Currently, we rely on the generic CTS chaining mode wrapper to instantiate the cts(cbc(aes)) skcipher. Due to the high performance of the ARMv8 Crypto Extensions AES instructions (~1 cycles per byte), any overhead in the chaining mode layers is amplified, and so it pays off considerably to fold the CTS handling into the SIMD routines. On Cortex-A53, this results in a ~50% speedup for smaller input sizes. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
1 parent 6e7de6a commit dd597fb

File tree

2 files changed

+243
-1
lines changed

2 files changed

+243
-1
lines changed

arch/arm64/crypto/aes-glue.c

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <crypto/internal/hash.h>
1616
#include <crypto/internal/simd.h>
1717
#include <crypto/internal/skcipher.h>
18+
#include <crypto/scatterwalk.h>
1819
#include <linux/module.h>
1920
#include <linux/cpufeature.h>
2021
#include <crypto/xts.h>
@@ -31,6 +32,8 @@
3132
#define aes_ecb_decrypt ce_aes_ecb_decrypt
3233
#define aes_cbc_encrypt ce_aes_cbc_encrypt
3334
#define aes_cbc_decrypt ce_aes_cbc_decrypt
35+
#define aes_cbc_cts_encrypt ce_aes_cbc_cts_encrypt
36+
#define aes_cbc_cts_decrypt ce_aes_cbc_cts_decrypt
3437
#define aes_ctr_encrypt ce_aes_ctr_encrypt
3538
#define aes_xts_encrypt ce_aes_xts_encrypt
3639
#define aes_xts_decrypt ce_aes_xts_decrypt
@@ -45,6 +48,8 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
4548
#define aes_ecb_decrypt neon_aes_ecb_decrypt
4649
#define aes_cbc_encrypt neon_aes_cbc_encrypt
4750
#define aes_cbc_decrypt neon_aes_cbc_decrypt
51+
#define aes_cbc_cts_encrypt neon_aes_cbc_cts_encrypt
52+
#define aes_cbc_cts_decrypt neon_aes_cbc_cts_decrypt
4853
#define aes_ctr_encrypt neon_aes_ctr_encrypt
4954
#define aes_xts_encrypt neon_aes_xts_encrypt
5055
#define aes_xts_decrypt neon_aes_xts_decrypt
@@ -73,6 +78,11 @@ asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
7378
asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[],
7479
int rounds, int blocks, u8 iv[]);
7580

81+
asmlinkage void aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
82+
int rounds, int bytes, u8 const iv[]);
83+
asmlinkage void aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
84+
int rounds, int bytes, u8 const iv[]);
85+
7686
asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
7787
int rounds, int blocks, u8 ctr[]);
7888

@@ -87,6 +97,12 @@ asmlinkage void aes_mac_update(u8 const in[], u32 const rk[], int rounds,
8797
int blocks, u8 dg[], int enc_before,
8898
int enc_after);
8999

100+
struct cts_cbc_req_ctx {
101+
struct scatterlist sg_src[2];
102+
struct scatterlist sg_dst[2];
103+
struct skcipher_request subreq;
104+
};
105+
90106
struct crypto_aes_xts_ctx {
91107
struct crypto_aes_ctx key1;
92108
struct crypto_aes_ctx __aligned(8) key2;
@@ -209,6 +225,136 @@ static int cbc_decrypt(struct skcipher_request *req)
209225
return err;
210226
}
211227

228+
static int cts_cbc_init_tfm(struct crypto_skcipher *tfm)
229+
{
230+
crypto_skcipher_set_reqsize(tfm, sizeof(struct cts_cbc_req_ctx));
231+
return 0;
232+
}
233+
234+
static int cts_cbc_encrypt(struct skcipher_request *req)
235+
{
236+
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
237+
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
238+
struct cts_cbc_req_ctx *rctx = skcipher_request_ctx(req);
239+
int err, rounds = 6 + ctx->key_length / 4;
240+
int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
241+
struct scatterlist *src = req->src, *dst = req->dst;
242+
struct skcipher_walk walk;
243+
244+
skcipher_request_set_tfm(&rctx->subreq, tfm);
245+
246+
if (req->cryptlen == AES_BLOCK_SIZE)
247+
cbc_blocks = 1;
248+
249+
if (cbc_blocks > 0) {
250+
unsigned int blocks;
251+
252+
skcipher_request_set_crypt(&rctx->subreq, req->src, req->dst,
253+
cbc_blocks * AES_BLOCK_SIZE,
254+
req->iv);
255+
256+
err = skcipher_walk_virt(&walk, &rctx->subreq, false);
257+
258+
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
259+
kernel_neon_begin();
260+
aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
261+
ctx->key_enc, rounds, blocks, walk.iv);
262+
kernel_neon_end();
263+
err = skcipher_walk_done(&walk,
264+
walk.nbytes % AES_BLOCK_SIZE);
265+
}
266+
if (err)
267+
return err;
268+
269+
if (req->cryptlen == AES_BLOCK_SIZE)
270+
return 0;
271+
272+
dst = src = scatterwalk_ffwd(rctx->sg_src, req->src,
273+
rctx->subreq.cryptlen);
274+
if (req->dst != req->src)
275+
dst = scatterwalk_ffwd(rctx->sg_dst, req->dst,
276+
rctx->subreq.cryptlen);
277+
}
278+
279+
/* handle ciphertext stealing */
280+
skcipher_request_set_crypt(&rctx->subreq, src, dst,
281+
req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
282+
req->iv);
283+
284+
err = skcipher_walk_virt(&walk, &rctx->subreq, false);
285+
if (err)
286+
return err;
287+
288+
kernel_neon_begin();
289+
aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
290+
ctx->key_enc, rounds, walk.nbytes, walk.iv);
291+
kernel_neon_end();
292+
293+
return skcipher_walk_done(&walk, 0);
294+
}
295+
296+
static int cts_cbc_decrypt(struct skcipher_request *req)
297+
{
298+
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
299+
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
300+
struct cts_cbc_req_ctx *rctx = skcipher_request_ctx(req);
301+
int err, rounds = 6 + ctx->key_length / 4;
302+
int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
303+
struct scatterlist *src = req->src, *dst = req->dst;
304+
struct skcipher_walk walk;
305+
306+
skcipher_request_set_tfm(&rctx->subreq, tfm);
307+
308+
if (req->cryptlen == AES_BLOCK_SIZE)
309+
cbc_blocks = 1;
310+
311+
if (cbc_blocks > 0) {
312+
unsigned int blocks;
313+
314+
skcipher_request_set_crypt(&rctx->subreq, req->src, req->dst,
315+
cbc_blocks * AES_BLOCK_SIZE,
316+
req->iv);
317+
318+
err = skcipher_walk_virt(&walk, &rctx->subreq, false);
319+
320+
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
321+
kernel_neon_begin();
322+
aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
323+
ctx->key_dec, rounds, blocks, walk.iv);
324+
kernel_neon_end();
325+
err = skcipher_walk_done(&walk,
326+
walk.nbytes % AES_BLOCK_SIZE);
327+
}
328+
if (err)
329+
return err;
330+
331+
if (req->cryptlen == AES_BLOCK_SIZE)
332+
return 0;
333+
334+
dst = src = scatterwalk_ffwd(rctx->sg_src, req->src,
335+
rctx->subreq.cryptlen);
336+
if (req->dst != req->src)
337+
dst = scatterwalk_ffwd(rctx->sg_dst, req->dst,
338+
rctx->subreq.cryptlen);
339+
}
340+
341+
/* handle ciphertext stealing */
342+
skcipher_request_set_crypt(&rctx->subreq, src, dst,
343+
req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
344+
req->iv);
345+
346+
err = skcipher_walk_virt(&walk, &rctx->subreq, false);
347+
if (err)
348+
return err;
349+
350+
kernel_neon_begin();
351+
aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
352+
ctx->key_dec, rounds, walk.nbytes, walk.iv);
353+
kernel_neon_end();
354+
355+
return skcipher_walk_done(&walk, 0);
356+
}
357+
212358
static int ctr_encrypt(struct skcipher_request *req)
213359
{
214360
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
@@ -334,6 +480,25 @@ static struct skcipher_alg aes_algs[] = { {
334480
.setkey = skcipher_aes_setkey,
335481
.encrypt = cbc_encrypt,
336482
.decrypt = cbc_decrypt,
483+
}, {
484+
.base = {
485+
.cra_name = "__cts(cbc(aes))",
486+
.cra_driver_name = "__cts-cbc-aes-" MODE,
487+
.cra_priority = PRIO,
488+
.cra_flags = CRYPTO_ALG_INTERNAL,
489+
.cra_blocksize = 1,
490+
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
491+
.cra_module = THIS_MODULE,
492+
},
493+
.min_keysize = AES_MIN_KEY_SIZE,
494+
.max_keysize = AES_MAX_KEY_SIZE,
495+
.ivsize = AES_BLOCK_SIZE,
496+
.chunksize = AES_BLOCK_SIZE,
497+
.walksize = 2 * AES_BLOCK_SIZE,
498+
.setkey = skcipher_aes_setkey,
499+
.encrypt = cts_cbc_encrypt,
500+
.decrypt = cts_cbc_decrypt,
501+
.init = cts_cbc_init_tfm,
337502
}, {
338503
.base = {
339504
.cra_name = "__ctr(aes)",

arch/arm64/crypto/aes-modes.S

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,84 @@ AES_ENTRY(aes_cbc_decrypt)
170170
AES_ENDPROC(aes_cbc_decrypt)
171171

172172

173+
/*
174+
* aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
175+
* int rounds, int bytes, u8 const iv[])
176+
* aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
177+
* int rounds, int bytes, u8 const iv[])
178+
*/
179+
180+
AES_ENTRY(aes_cbc_cts_encrypt)
181+
adr_l x8, .Lcts_permute_table
182+
sub x4, x4, #16
183+
add x9, x8, #32
184+
add x8, x8, x4
185+
sub x9, x9, x4
186+
ld1 {v3.16b}, [x8]
187+
ld1 {v4.16b}, [x9]
188+
189+
ld1 {v0.16b}, [x1], x4 /* overlapping loads */
190+
ld1 {v1.16b}, [x1]
191+
192+
ld1 {v5.16b}, [x5] /* get iv */
193+
enc_prepare w3, x2, x6
194+
195+
eor v0.16b, v0.16b, v5.16b /* xor with iv */
196+
tbl v1.16b, {v1.16b}, v4.16b
197+
encrypt_block v0, w3, x2, x6, w7
198+
199+
eor v1.16b, v1.16b, v0.16b
200+
tbl v0.16b, {v0.16b}, v3.16b
201+
encrypt_block v1, w3, x2, x6, w7
202+
203+
add x4, x0, x4
204+
st1 {v0.16b}, [x4] /* overlapping stores */
205+
st1 {v1.16b}, [x0]
206+
ret
207+
AES_ENDPROC(aes_cbc_cts_encrypt)
208+
209+
AES_ENTRY(aes_cbc_cts_decrypt)
210+
adr_l x8, .Lcts_permute_table
211+
sub x4, x4, #16
212+
add x9, x8, #32
213+
add x8, x8, x4
214+
sub x9, x9, x4
215+
ld1 {v3.16b}, [x8]
216+
ld1 {v4.16b}, [x9]
217+
218+
ld1 {v0.16b}, [x1], x4 /* overlapping loads */
219+
ld1 {v1.16b}, [x1]
220+
221+
ld1 {v5.16b}, [x5] /* get iv */
222+
dec_prepare w3, x2, x6
223+
224+
tbl v2.16b, {v1.16b}, v4.16b
225+
decrypt_block v0, w3, x2, x6, w7
226+
eor v2.16b, v2.16b, v0.16b
227+
228+
tbx v0.16b, {v1.16b}, v4.16b
229+
tbl v2.16b, {v2.16b}, v3.16b
230+
decrypt_block v0, w3, x2, x6, w7
231+
eor v0.16b, v0.16b, v5.16b /* xor with iv */
232+
233+
add x4, x0, x4
234+
st1 {v2.16b}, [x4] /* overlapping stores */
235+
st1 {v0.16b}, [x0]
236+
ret
237+
AES_ENDPROC(aes_cbc_cts_decrypt)
238+
239+
.section ".rodata", "a"
240+
.align 6
241+
.Lcts_permute_table:
242+
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
243+
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
244+
.byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
245+
.byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
246+
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
247+
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
248+
.previous
249+
250+
173251
/*
174252
* aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
175253
* int blocks, u8 ctr[])
@@ -253,7 +331,6 @@ AES_ENTRY(aes_ctr_encrypt)
253331
ins v4.d[0], x7
254332
b .Lctrcarrydone
255333
AES_ENDPROC(aes_ctr_encrypt)
256-
.ltorg
257334

258335

259336
/*

0 commit comments

Comments
 (0)