Skip to content

Commit db91af0

Browse files
Ard Biesheuvelherbertx
authored andcommitted
crypto: algapi - make crypto_xor() and crypto_inc() alignment agnostic
Instead of unconditionally forcing 4 byte alignment for all generic chaining modes that rely on crypto_xor() or crypto_inc() (which may result in unnecessary copying of data when the underlying hardware can perform unaligned accesses efficiently), make those functions deal with unaligned input explicitly, but only if the Kconfig symbol HAVE_EFFICIENT_UNALIGNED_ACCESS is set. This will allow us to drop the alignmasks from the CBC, CMAC, CTR, CTS, PCBC and SEQIV drivers. For crypto_inc(), this simply involves making the 4-byte stride conditional on HAVE_EFFICIENT_UNALIGNED_ACCESS being set, given that it typically operates on 16 byte buffers. For crypto_xor(), an algorithm is implemented that simply runs through the input using the largest strides possible if unaligned accesses are allowed. If they are not, an optimal sequence of memory accesses is emitted that takes the relative alignment of the input buffers into account, e.g., if the relative misalignment of dst and src is 4 bytes, the entire xor operation will be completed using 4 byte loads and stores (modulo unaligned bits at the start and end). Note that all expressions involving misalign are simply eliminated by the compiler when HAVE_EFFICIENT_UNALIGNED_ACCESS is defined. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
1 parent 7d6e910 commit db91af0

File tree

8 files changed

+70
-34
lines changed

8 files changed

+70
-34
lines changed

crypto/algapi.c

Lines changed: 50 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -962,34 +962,66 @@ void crypto_inc(u8 *a, unsigned int size)
962962
__be32 *b = (__be32 *)(a + size);
963963
u32 c;
964964

965-
for (; size >= 4; size -= 4) {
966-
c = be32_to_cpu(*--b) + 1;
967-
*b = cpu_to_be32(c);
968-
if (c)
969-
return;
970-
}
965+
if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
966+
!((unsigned long)b & (__alignof__(*b) - 1)))
967+
for (; size >= 4; size -= 4) {
968+
c = be32_to_cpu(*--b) + 1;
969+
*b = cpu_to_be32(c);
970+
if (c)
971+
return;
972+
}
971973

972974
crypto_inc_byte(a, size);
973975
}
974976
EXPORT_SYMBOL_GPL(crypto_inc);
975977

976-
static inline void crypto_xor_byte(u8 *a, const u8 *b, unsigned int size)
978+
void __crypto_xor(u8 *dst, const u8 *src, unsigned int len)
977979
{
978-
for (; size; size--)
979-
*a++ ^= *b++;
980-
}
980+
int relalign = 0;
981+
982+
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
983+
int size = sizeof(unsigned long);
984+
int d = ((unsigned long)dst ^ (unsigned long)src) & (size - 1);
985+
986+
relalign = d ? 1 << __ffs(d) : size;
987+
988+
/*
989+
* If we care about alignment, process as many bytes as
990+
* needed to advance dst and src to values whose alignments
991+
* equal their relative alignment. This will allow us to
992+
* process the remainder of the input using optimal strides.
993+
*/
994+
while (((unsigned long)dst & (relalign - 1)) && len > 0) {
995+
*dst++ ^= *src++;
996+
len--;
997+
}
998+
}
981999

982-
void crypto_xor(u8 *dst, const u8 *src, unsigned int size)
983-
{
984-
u32 *a = (u32 *)dst;
985-
u32 *b = (u32 *)src;
1000+
while (IS_ENABLED(CONFIG_64BIT) && len >= 8 && !(relalign & 7)) {
1001+
*(u64 *)dst ^= *(u64 *)src;
1002+
dst += 8;
1003+
src += 8;
1004+
len -= 8;
1005+
}
9861006

987-
for (; size >= 4; size -= 4)
988-
*a++ ^= *b++;
1007+
while (len >= 4 && !(relalign & 3)) {
1008+
*(u32 *)dst ^= *(u32 *)src;
1009+
dst += 4;
1010+
src += 4;
1011+
len -= 4;
1012+
}
1013+
1014+
while (len >= 2 && !(relalign & 1)) {
1015+
*(u16 *)dst ^= *(u16 *)src;
1016+
dst += 2;
1017+
src += 2;
1018+
len -= 2;
1019+
}
9891020

990-
crypto_xor_byte((u8 *)a, (u8 *)b, size);
1021+
while (len--)
1022+
*dst++ ^= *src++;
9911023
}
992-
EXPORT_SYMBOL_GPL(crypto_xor);
1024+
EXPORT_SYMBOL_GPL(__crypto_xor);
9931025

9941026
unsigned int crypto_alg_extsize(struct crypto_alg *alg)
9951027
{

crypto/cbc.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -145,9 +145,6 @@ static int crypto_cbc_create(struct crypto_template *tmpl, struct rtattr **tb)
145145
inst->alg.base.cra_blocksize = alg->cra_blocksize;
146146
inst->alg.base.cra_alignmask = alg->cra_alignmask;
147147

148-
/* We access the data as u32s when xoring. */
149-
inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
150-
151148
inst->alg.ivsize = alg->cra_blocksize;
152149
inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize;
153150
inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize;

crypto/cmac.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -260,8 +260,7 @@ static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb)
260260
if (err)
261261
goto out_free_inst;
262262

263-
/* We access the data as u32s when xoring. */
264-
alignmask = alg->cra_alignmask | (__alignof__(u32) - 1);
263+
alignmask = alg->cra_alignmask;
265264
inst->alg.base.cra_alignmask = alignmask;
266265
inst->alg.base.cra_priority = alg->cra_priority;
267266
inst->alg.base.cra_blocksize = alg->cra_blocksize;

crypto/ctr.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ static struct crypto_instance *crypto_ctr_alloc(struct rtattr **tb)
209209
inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER;
210210
inst->alg.cra_priority = alg->cra_priority;
211211
inst->alg.cra_blocksize = 1;
212-
inst->alg.cra_alignmask = alg->cra_alignmask | (__alignof__(u32) - 1);
212+
inst->alg.cra_alignmask = alg->cra_alignmask;
213213
inst->alg.cra_type = &crypto_blkcipher_type;
214214

215215
inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize;

crypto/cts.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -374,9 +374,6 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb)
374374
inst->alg.base.cra_blocksize = alg->base.cra_blocksize;
375375
inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
376376

377-
/* We access the data as u32s when xoring. */
378-
inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
379-
380377
inst->alg.ivsize = alg->base.cra_blocksize;
381378
inst->alg.chunksize = crypto_skcipher_alg_chunksize(alg);
382379
inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg);

crypto/pcbc.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -260,9 +260,6 @@ static int crypto_pcbc_create(struct crypto_template *tmpl, struct rtattr **tb)
260260
inst->alg.base.cra_blocksize = alg->cra_blocksize;
261261
inst->alg.base.cra_alignmask = alg->cra_alignmask;
262262

263-
/* We access the data as u32s when xoring. */
264-
inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
265-
266263
inst->alg.ivsize = alg->cra_blocksize;
267264
inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize;
268265
inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize;

crypto/seqiv.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,6 @@ static int seqiv_aead_create(struct crypto_template *tmpl, struct rtattr **tb)
153153
if (IS_ERR(inst))
154154
return PTR_ERR(inst);
155155

156-
inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
157-
158156
spawn = aead_instance_ctx(inst);
159157
alg = crypto_spawn_aead_alg(spawn);
160158

include/crypto/algapi.h

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,9 +191,25 @@ static inline unsigned int crypto_queue_len(struct crypto_queue *queue)
191191
return queue->qlen;
192192
}
193193

194-
/* These functions require the input/output to be aligned as u32. */
195194
void crypto_inc(u8 *a, unsigned int size);
196-
void crypto_xor(u8 *dst, const u8 *src, unsigned int size);
195+
void __crypto_xor(u8 *dst, const u8 *src, unsigned int size);
196+
197+
static inline void crypto_xor(u8 *dst, const u8 *src, unsigned int size)
198+
{
199+
if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
200+
__builtin_constant_p(size) &&
201+
(size % sizeof(unsigned long)) == 0) {
202+
unsigned long *d = (unsigned long *)dst;
203+
unsigned long *s = (unsigned long *)src;
204+
205+
while (size > 0) {
206+
*d++ ^= *s++;
207+
size -= sizeof(unsigned long);
208+
}
209+
} else {
210+
__crypto_xor(dst, src, size);
211+
}
212+
}
197213

198214
int blkcipher_walk_done(struct blkcipher_desc *desc,
199215
struct blkcipher_walk *walk, int err);

0 commit comments

Comments
 (0)