|
3 | 3 | *
|
4 | 4 | * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
|
5 | 5 | *
|
6 |
| - * Based on arch/x86/crypto/ghash-pmullni-intel_asm.S |
7 |
| - * |
8 |
| - * Copyright (c) 2009 Intel Corp. |
9 |
| - * Author: Huang Ying <ying.huang@intel.com> |
10 |
| - * Vinodh Gopal |
11 |
| - * Erdinc Ozturk |
12 |
| - * Deniz Karakoyunlu |
13 |
| - * |
14 | 6 | * This program is free software; you can redistribute it and/or modify it
|
15 | 7 | * under the terms of the GNU General Public License version 2 as published
|
16 | 8 | * by the Free Software Foundation.
|
|
19 | 11 | #include <linux/linkage.h>
|
20 | 12 | #include <asm/assembler.h>
|
21 | 13 |
|
22 |
| - DATA .req v0 |
23 |
| - SHASH .req v1 |
24 |
| - IN1 .req v2 |
| 14 | + SHASH .req v0 |
| 15 | + SHASH2 .req v1 |
25 | 16 | T1 .req v2
|
26 | 17 | T2 .req v3
|
27 |
| - T3 .req v4 |
28 |
| - VZR .req v5 |
| 18 | + MASK .req v4 |
| 19 | + XL .req v5 |
| 20 | + XM .req v6 |
| 21 | + XH .req v7 |
| 22 | + IN1 .req v7 |
29 | 23 |
|
30 | 24 | .text
|
31 | 25 | .arch armv8-a+crypto
|
|
35 | 29 | * struct ghash_key const *k, const char *head)
|
36 | 30 | */
|
37 | 31 | ENTRY(pmull_ghash_update)
|
38 |
| - ld1 {DATA.16b}, [x1] |
39 | 32 | ld1 {SHASH.16b}, [x3]
|
40 |
| - eor VZR.16b, VZR.16b, VZR.16b |
| 33 | + ld1 {XL.16b}, [x1] |
| 34 | + movi MASK.16b, #0xe1 |
| 35 | + ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 |
| 36 | + shl MASK.2d, MASK.2d, #57 |
| 37 | + eor SHASH2.16b, SHASH2.16b, SHASH.16b |
41 | 38 |
|
42 | 39 | /* do the head block first, if supplied */
|
43 | 40 | cbz x4, 0f
|
44 |
| - ld1 {IN1.2d}, [x4] |
| 41 | + ld1 {T1.2d}, [x4] |
45 | 42 | b 1f
|
46 | 43 |
|
47 |
| -0: ld1 {IN1.2d}, [x2], #16 |
| 44 | +0: ld1 {T1.2d}, [x2], #16 |
48 | 45 | sub w0, w0, #1
|
49 |
| -1: ext IN1.16b, IN1.16b, IN1.16b, #8 |
50 |
| -CPU_LE( rev64 IN1.16b, IN1.16b ) |
51 |
| - eor DATA.16b, DATA.16b, IN1.16b |
52 | 46 |
|
53 |
| - /* multiply DATA by SHASH in GF(2^128) */ |
54 |
| - ext T2.16b, DATA.16b, DATA.16b, #8 |
55 |
| - ext T3.16b, SHASH.16b, SHASH.16b, #8 |
56 |
| - eor T2.16b, T2.16b, DATA.16b |
57 |
| - eor T3.16b, T3.16b, SHASH.16b |
| 47 | +1: /* multiply XL by SHASH in GF(2^128) */ |
| 48 | +CPU_LE( rev64 T1.16b, T1.16b ) |
58 | 49 |
|
59 |
| - pmull2 T1.1q, SHASH.2d, DATA.2d // a1 * b1 |
60 |
| - pmull DATA.1q, SHASH.1d, DATA.1d // a0 * b0 |
61 |
| - pmull T2.1q, T2.1d, T3.1d // (a1 + a0)(b1 + b0) |
62 |
| - eor T2.16b, T2.16b, T1.16b // (a0 * b1) + (a1 * b0) |
63 |
| - eor T2.16b, T2.16b, DATA.16b |
| 50 | + ext T2.16b, XL.16b, XL.16b, #8 |
| 51 | + ext IN1.16b, T1.16b, T1.16b, #8 |
| 52 | + eor T1.16b, T1.16b, T2.16b |
| 53 | + eor XL.16b, XL.16b, IN1.16b |
64 | 54 |
|
65 |
| - ext T3.16b, VZR.16b, T2.16b, #8 |
66 |
| - ext T2.16b, T2.16b, VZR.16b, #8 |
67 |
| - eor DATA.16b, DATA.16b, T3.16b |
68 |
| - eor T1.16b, T1.16b, T2.16b // <T1:DATA> is result of |
69 |
| - // carry-less multiplication |
| 55 | + pmull2 XH.1q, SHASH.2d, XL.2d // a1 * b1 |
| 56 | + eor T1.16b, T1.16b, XL.16b |
| 57 | + pmull XL.1q, SHASH.1d, XL.1d // a0 * b0 |
| 58 | + pmull XM.1q, SHASH2.1d, T1.1d // (a1 + a0)(b1 + b0) |
70 | 59 |
|
71 |
| - /* first phase of the reduction */ |
72 |
| - shl T3.2d, DATA.2d, #1 |
73 |
| - eor T3.16b, T3.16b, DATA.16b |
74 |
| - shl T3.2d, T3.2d, #5 |
75 |
| - eor T3.16b, T3.16b, DATA.16b |
76 |
| - shl T3.2d, T3.2d, #57 |
77 |
| - ext T2.16b, VZR.16b, T3.16b, #8 |
78 |
| - ext T3.16b, T3.16b, VZR.16b, #8 |
79 |
| - eor DATA.16b, DATA.16b, T2.16b |
80 |
| - eor T1.16b, T1.16b, T3.16b |
| 60 | + ext T1.16b, XL.16b, XH.16b, #8 |
| 61 | + eor T2.16b, XL.16b, XH.16b |
| 62 | + eor XM.16b, XM.16b, T1.16b |
| 63 | + eor XM.16b, XM.16b, T2.16b |
| 64 | + pmull T2.1q, XL.1d, MASK.1d |
81 | 65 |
|
82 |
| - /* second phase of the reduction */ |
83 |
| - ushr T2.2d, DATA.2d, #5 |
84 |
| - eor T2.16b, T2.16b, DATA.16b |
85 |
| - ushr T2.2d, T2.2d, #1 |
86 |
| - eor T2.16b, T2.16b, DATA.16b |
87 |
| - ushr T2.2d, T2.2d, #1 |
88 |
| - eor T1.16b, T1.16b, T2.16b |
89 |
| - eor DATA.16b, DATA.16b, T1.16b |
| 66 | + mov XH.d[0], XM.d[1] |
| 67 | + mov XM.d[1], XL.d[0] |
| 68 | + |
| 69 | + eor XL.16b, XM.16b, T2.16b |
| 70 | + ext T2.16b, XL.16b, XL.16b, #8 |
| 71 | + pmull XL.1q, XL.1d, MASK.1d |
| 72 | + eor T2.16b, T2.16b, XH.16b |
| 73 | + eor XL.16b, XL.16b, T2.16b |
90 | 74 |
|
91 | 75 | cbnz w0, 0b
|
92 | 76 |
|
93 |
| - st1 {DATA.16b}, [x1] |
| 77 | + st1 {XL.16b}, [x1] |
94 | 78 | ret
|
95 | 79 | ENDPROC(pmull_ghash_update)
|
0 commit comments