Gentoo Archives: gentoo-commits

From: Mike Pagano <mpagano@g.o>
To: gentoo-commits@l.g.o
Subject: [gentoo-commits] proj/linux-patches:5.4 commit in: /
Date: Thu, 30 Jul 2020 14:59:02
Message-Id: 1596121070.f0bdbaca29aa5d7622ee6d92358f5c0d624b31da.mpagano@gentoo
1 commit: f0bdbaca29aa5d7622ee6d92358f5c0d624b31da
2 Author: Mike Pagano <mpagano <AT> gentoo <DOT> org>
3 AuthorDate: Thu Jul 30 14:57:50 2020 +0000
4 Commit: Mike Pagano <mpagano <AT> gentoo <DOT> org>
5 CommitDate: Thu Jul 30 14:57:50 2020 +0000
6 URL: https://gitweb.gentoo.org/proj/linux-patches.git/commit/?id=f0bdbaca
7
8 Add wireguard to genpatches
9
10 Signed-off-by: Mike Pagano <mpagano <AT> gentoo.org>
11
12 0000_README | 4 +
13 2400_wireguard-backport-v5.4.53.patch | 62233 ++++++++++++++++++++++++++++++++
14 2 files changed, 62237 insertions(+)
15
16 diff --git a/0000_README b/0000_README
17 index d3d1536..fb63537 100644
18 --- a/0000_README
19 +++ b/0000_README
20 @@ -271,6 +271,10 @@ Patch: 2000_BT-Check-key-sizes-only-if-Secure-Simple-Pairing-enabled.patch
21 From: https://lore.kernel.org/linux-bluetooth/20190522070540.48895-1-marcel@××××××××.org/raw
22 Desc: Bluetooth: Check key sizes only when Secure Simple Pairing is enabled. See bug #686758
23
24 +Patch: 2400_wireguard-backport-v5.4.53.patch
25 +From: https://git.zx2c4.com/wireguard-linux/
26 +Desc: Extremely simple yet fast and modern VPN that utilizes state-of-the-art cryptography
27 +
28 Patch: 2600_enable-key-swapping-for-apple-mac.patch
29 From: https://github.com/free5lot/hid-apple-patched
30 Desc: This hid-apple patch enables swapping of the FN and left Control keys and some additional on some apple keyboards. See bug #622902
31
32 diff --git a/2400_wireguard-backport-v5.4.53.patch b/2400_wireguard-backport-v5.4.53.patch
33 new file mode 100644
34 index 0000000..f2297bd
35 --- /dev/null
36 +++ b/2400_wireguard-backport-v5.4.53.patch
37 @@ -0,0 +1,62233 @@
38 +From 8cec11dad447e163a1752a349c0bc01ca5dcf9b5 Mon Sep 17 00:00:00 2001
39 +From: Ard Biesheuvel <ardb@××××××.org>
40 +Date: Fri, 8 Nov 2019 13:22:07 +0100
41 +Subject: crypto: lib - tidy up lib/crypto Kconfig and Makefile
42 +
43 +commit 746b2e024c67aa605ac12d135cd7085a49cf9dc4 upstream.
44 +
45 +In preparation of introducing a set of crypto library interfaces, tidy
46 +up the Makefile and split off the Kconfig symbols into a separate file.
47 +
48 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
49 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
50 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
51 +---
52 + crypto/Kconfig | 13 +------------
53 + lib/crypto/Kconfig | 15 +++++++++++++++
54 + lib/crypto/Makefile | 16 ++++++++--------
55 + 3 files changed, 24 insertions(+), 20 deletions(-)
56 + create mode 100644 lib/crypto/Kconfig
57 +
58 +diff --git a/crypto/Kconfig b/crypto/Kconfig
59 +index b2cc0ad3792a..7d19b46a7ef7 100644
60 +--- a/crypto/Kconfig
61 ++++ b/crypto/Kconfig
62 +@@ -878,9 +878,6 @@ config CRYPTO_SHA1_PPC_SPE
63 + SHA-1 secure hash standard (DFIPS 180-4) implemented
64 + using powerpc SPE SIMD instruction set.
65 +
66 +-config CRYPTO_LIB_SHA256
67 +- tristate
68 +-
69 + config CRYPTO_SHA256
70 + tristate "SHA224 and SHA256 digest algorithm"
71 + select CRYPTO_HASH
72 +@@ -1019,9 +1016,6 @@ config CRYPTO_GHASH_CLMUL_NI_INTEL
73 +
74 + comment "Ciphers"
75 +
76 +-config CRYPTO_LIB_AES
77 +- tristate
78 +-
79 + config CRYPTO_AES
80 + tristate "AES cipher algorithms"
81 + select CRYPTO_ALGAPI
82 +@@ -1150,9 +1144,6 @@ config CRYPTO_ANUBIS
83 + <https://www.cosic.esat.kuleuven.be/nessie/reports/>
84 + <http://www.larc.usp.br/~pbarreto/AnubisPage.html>
85 +
86 +-config CRYPTO_LIB_ARC4
87 +- tristate
88 +-
89 + config CRYPTO_ARC4
90 + tristate "ARC4 cipher algorithm"
91 + select CRYPTO_BLKCIPHER
92 +@@ -1339,9 +1330,6 @@ config CRYPTO_CAST6_AVX_X86_64
93 + This module provides the Cast6 cipher algorithm that processes
94 + eight blocks parallel using the AVX instruction set.
95 +
96 +-config CRYPTO_LIB_DES
97 +- tristate
98 +-
99 + config CRYPTO_DES
100 + tristate "DES and Triple DES EDE cipher algorithms"
101 + select CRYPTO_ALGAPI
102 +@@ -1845,6 +1833,7 @@ config CRYPTO_STATS
103 + config CRYPTO_HASH_INFO
104 + bool
105 +
106 ++source "lib/crypto/Kconfig"
107 + source "drivers/crypto/Kconfig"
108 + source "crypto/asymmetric_keys/Kconfig"
109 + source "certs/Kconfig"
110 +diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
111 +new file mode 100644
112 +index 000000000000..261430051595
113 +--- /dev/null
114 ++++ b/lib/crypto/Kconfig
115 +@@ -0,0 +1,15 @@
116 ++# SPDX-License-Identifier: GPL-2.0
117 ++
118 ++comment "Crypto library routines"
119 ++
120 ++config CRYPTO_LIB_AES
121 ++ tristate
122 ++
123 ++config CRYPTO_LIB_ARC4
124 ++ tristate
125 ++
126 ++config CRYPTO_LIB_DES
127 ++ tristate
128 ++
129 ++config CRYPTO_LIB_SHA256
130 ++ tristate
131 +diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
132 +index cbe0b6a6450d..63de4cb3fcf8 100644
133 +--- a/lib/crypto/Makefile
134 ++++ b/lib/crypto/Makefile
135 +@@ -1,13 +1,13 @@
136 + # SPDX-License-Identifier: GPL-2.0
137 +
138 +-obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o
139 +-libaes-y := aes.o
140 ++obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o
141 ++libaes-y := aes.o
142 +
143 +-obj-$(CONFIG_CRYPTO_LIB_ARC4) += libarc4.o
144 +-libarc4-y := arc4.o
145 ++obj-$(CONFIG_CRYPTO_LIB_ARC4) += libarc4.o
146 ++libarc4-y := arc4.o
147 +
148 +-obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o
149 +-libdes-y := des.o
150 ++obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o
151 ++libdes-y := des.o
152 +
153 +-obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o
154 +-libsha256-y := sha256.o
155 ++obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o
156 ++libsha256-y := sha256.o
157 +--
158 +cgit v1.2.3-4-ga26e
159 +
160 +
161 +From 6d1d95d130c320738d92c335188c6f695ab2e9c3 Mon Sep 17 00:00:00 2001
162 +From: Ard Biesheuvel <ardb@××××××.org>
163 +Date: Fri, 8 Nov 2019 13:22:08 +0100
164 +Subject: crypto: chacha - move existing library code into lib/crypto
165 +
166 +commit 5fb8ef25803ef33e2eb60b626435828b937bed75 upstream.
167 +
168 +Currently, our generic ChaCha implementation consists of a permute
169 +function in lib/chacha.c that operates on the 64-byte ChaCha state
170 +directly [and which is always included into the core kernel since it
171 +is used by the /dev/random driver], and the crypto API plumbing to
172 +expose it as a skcipher.
173 +
174 +In order to support in-kernel users that need the ChaCha streamcipher
175 +but have no need [or tolerance] for going through the abstractions of
176 +the crypto API, let's expose the streamcipher bits via a library API
177 +as well, in a way that permits the implementation to be superseded by
178 +an architecture specific one if provided.
179 +
180 +So move the streamcipher code into a separate module in lib/crypto,
181 +and expose the init() and crypt() routines to users of the library.
182 +
183 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
184 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
185 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
186 +---
187 + arch/arm/crypto/chacha-neon-glue.c | 2 +-
188 + arch/arm64/crypto/chacha-neon-glue.c | 2 +-
189 + arch/x86/crypto/chacha_glue.c | 2 +-
190 + crypto/Kconfig | 1 +
191 + crypto/chacha_generic.c | 60 ++----------------
192 + include/crypto/chacha.h | 77 +++++++++++++++++------
193 + include/crypto/internal/chacha.h | 53 ++++++++++++++++
194 + lib/Makefile | 3 +-
195 + lib/chacha.c | 113 ----------------------------------
196 + lib/crypto/Kconfig | 26 ++++++++
197 + lib/crypto/Makefile | 4 ++
198 + lib/crypto/chacha.c | 115 +++++++++++++++++++++++++++++++++++
199 + lib/crypto/libchacha.c | 35 +++++++++++
200 + 13 files changed, 303 insertions(+), 190 deletions(-)
201 + create mode 100644 include/crypto/internal/chacha.h
202 + delete mode 100644 lib/chacha.c
203 + create mode 100644 lib/crypto/chacha.c
204 + create mode 100644 lib/crypto/libchacha.c
205 +
206 +diff --git a/arch/arm/crypto/chacha-neon-glue.c b/arch/arm/crypto/chacha-neon-glue.c
207 +index a8e9b534c8da..26576772f18b 100644
208 +--- a/arch/arm/crypto/chacha-neon-glue.c
209 ++++ b/arch/arm/crypto/chacha-neon-glue.c
210 +@@ -20,7 +20,7 @@
211 + */
212 +
213 + #include <crypto/algapi.h>
214 +-#include <crypto/chacha.h>
215 ++#include <crypto/internal/chacha.h>
216 + #include <crypto/internal/simd.h>
217 + #include <crypto/internal/skcipher.h>
218 + #include <linux/kernel.h>
219 +diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
220 +index 1495d2b18518..d4cc61bfe79d 100644
221 +--- a/arch/arm64/crypto/chacha-neon-glue.c
222 ++++ b/arch/arm64/crypto/chacha-neon-glue.c
223 +@@ -20,7 +20,7 @@
224 + */
225 +
226 + #include <crypto/algapi.h>
227 +-#include <crypto/chacha.h>
228 ++#include <crypto/internal/chacha.h>
229 + #include <crypto/internal/simd.h>
230 + #include <crypto/internal/skcipher.h>
231 + #include <linux/kernel.h>
232 +diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
233 +index 388f95a4ec24..bc62daa8dafd 100644
234 +--- a/arch/x86/crypto/chacha_glue.c
235 ++++ b/arch/x86/crypto/chacha_glue.c
236 +@@ -7,7 +7,7 @@
237 + */
238 +
239 + #include <crypto/algapi.h>
240 +-#include <crypto/chacha.h>
241 ++#include <crypto/internal/chacha.h>
242 + #include <crypto/internal/simd.h>
243 + #include <crypto/internal/skcipher.h>
244 + #include <linux/kernel.h>
245 +diff --git a/crypto/Kconfig b/crypto/Kconfig
246 +index 7d19b46a7ef7..f29bf10c0462 100644
247 +--- a/crypto/Kconfig
248 ++++ b/crypto/Kconfig
249 +@@ -1393,6 +1393,7 @@ config CRYPTO_SALSA20
250 +
251 + config CRYPTO_CHACHA20
252 + tristate "ChaCha stream cipher algorithms"
253 ++ select CRYPTO_LIB_CHACHA_GENERIC
254 + select CRYPTO_BLKCIPHER
255 + help
256 + The ChaCha20, XChaCha20, and XChaCha12 stream cipher algorithms.
257 +diff --git a/crypto/chacha_generic.c b/crypto/chacha_generic.c
258 +index 085d8d219987..ebae6d9d9b32 100644
259 +--- a/crypto/chacha_generic.c
260 ++++ b/crypto/chacha_generic.c
261 +@@ -8,29 +8,10 @@
262 +
263 + #include <asm/unaligned.h>
264 + #include <crypto/algapi.h>
265 +-#include <crypto/chacha.h>
266 ++#include <crypto/internal/chacha.h>
267 + #include <crypto/internal/skcipher.h>
268 + #include <linux/module.h>
269 +
270 +-static void chacha_docrypt(u32 *state, u8 *dst, const u8 *src,
271 +- unsigned int bytes, int nrounds)
272 +-{
273 +- /* aligned to potentially speed up crypto_xor() */
274 +- u8 stream[CHACHA_BLOCK_SIZE] __aligned(sizeof(long));
275 +-
276 +- while (bytes >= CHACHA_BLOCK_SIZE) {
277 +- chacha_block(state, stream, nrounds);
278 +- crypto_xor_cpy(dst, src, stream, CHACHA_BLOCK_SIZE);
279 +- bytes -= CHACHA_BLOCK_SIZE;
280 +- dst += CHACHA_BLOCK_SIZE;
281 +- src += CHACHA_BLOCK_SIZE;
282 +- }
283 +- if (bytes) {
284 +- chacha_block(state, stream, nrounds);
285 +- crypto_xor_cpy(dst, src, stream, bytes);
286 +- }
287 +-}
288 +-
289 + static int chacha_stream_xor(struct skcipher_request *req,
290 + const struct chacha_ctx *ctx, const u8 *iv)
291 + {
292 +@@ -48,8 +29,8 @@ static int chacha_stream_xor(struct skcipher_request *req,
293 + if (nbytes < walk.total)
294 + nbytes = round_down(nbytes, CHACHA_BLOCK_SIZE);
295 +
296 +- chacha_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
297 +- nbytes, ctx->nrounds);
298 ++ chacha_crypt_generic(state, walk.dst.virt.addr,
299 ++ walk.src.virt.addr, nbytes, ctx->nrounds);
300 + err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
301 + }
302 +
303 +@@ -58,41 +39,10 @@ static int chacha_stream_xor(struct skcipher_request *req,
304 +
305 + void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv)
306 + {
307 +- state[0] = 0x61707865; /* "expa" */
308 +- state[1] = 0x3320646e; /* "nd 3" */
309 +- state[2] = 0x79622d32; /* "2-by" */
310 +- state[3] = 0x6b206574; /* "te k" */
311 +- state[4] = ctx->key[0];
312 +- state[5] = ctx->key[1];
313 +- state[6] = ctx->key[2];
314 +- state[7] = ctx->key[3];
315 +- state[8] = ctx->key[4];
316 +- state[9] = ctx->key[5];
317 +- state[10] = ctx->key[6];
318 +- state[11] = ctx->key[7];
319 +- state[12] = get_unaligned_le32(iv + 0);
320 +- state[13] = get_unaligned_le32(iv + 4);
321 +- state[14] = get_unaligned_le32(iv + 8);
322 +- state[15] = get_unaligned_le32(iv + 12);
323 ++ chacha_init_generic(state, ctx->key, iv);
324 + }
325 + EXPORT_SYMBOL_GPL(crypto_chacha_init);
326 +
327 +-static int chacha_setkey(struct crypto_skcipher *tfm, const u8 *key,
328 +- unsigned int keysize, int nrounds)
329 +-{
330 +- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
331 +- int i;
332 +-
333 +- if (keysize != CHACHA_KEY_SIZE)
334 +- return -EINVAL;
335 +-
336 +- for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
337 +- ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
338 +-
339 +- ctx->nrounds = nrounds;
340 +- return 0;
341 +-}
342 +-
343 + int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
344 + unsigned int keysize)
345 + {
346 +@@ -126,7 +76,7 @@ int crypto_xchacha_crypt(struct skcipher_request *req)
347 +
348 + /* Compute the subkey given the original key and first 128 nonce bits */
349 + crypto_chacha_init(state, ctx, req->iv);
350 +- hchacha_block(state, subctx.key, ctx->nrounds);
351 ++ hchacha_block_generic(state, subctx.key, ctx->nrounds);
352 + subctx.nrounds = ctx->nrounds;
353 +
354 + /* Build the real IV */
355 +diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h
356 +index d1e723c6a37d..5c662f8fecac 100644
357 +--- a/include/crypto/chacha.h
358 ++++ b/include/crypto/chacha.h
359 +@@ -15,9 +15,8 @@
360 + #ifndef _CRYPTO_CHACHA_H
361 + #define _CRYPTO_CHACHA_H
362 +
363 +-#include <crypto/skcipher.h>
364 ++#include <asm/unaligned.h>
365 + #include <linux/types.h>
366 +-#include <linux/crypto.h>
367 +
368 + /* 32-bit stream position, then 96-bit nonce (RFC7539 convention) */
369 + #define CHACHA_IV_SIZE 16
370 +@@ -29,26 +28,70 @@
371 + /* 192-bit nonce, then 64-bit stream position */
372 + #define XCHACHA_IV_SIZE 32
373 +
374 +-struct chacha_ctx {
375 +- u32 key[8];
376 +- int nrounds;
377 +-};
378 +-
379 +-void chacha_block(u32 *state, u8 *stream, int nrounds);
380 ++void chacha_block_generic(u32 *state, u8 *stream, int nrounds);
381 + static inline void chacha20_block(u32 *state, u8 *stream)
382 + {
383 +- chacha_block(state, stream, 20);
384 ++ chacha_block_generic(state, stream, 20);
385 + }
386 +-void hchacha_block(const u32 *in, u32 *out, int nrounds);
387 +
388 +-void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv);
389 ++void hchacha_block_arch(const u32 *state, u32 *out, int nrounds);
390 ++void hchacha_block_generic(const u32 *state, u32 *out, int nrounds);
391 ++
392 ++static inline void hchacha_block(const u32 *state, u32 *out, int nrounds)
393 ++{
394 ++ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA))
395 ++ hchacha_block_arch(state, out, nrounds);
396 ++ else
397 ++ hchacha_block_generic(state, out, nrounds);
398 ++}
399 +
400 +-int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
401 +- unsigned int keysize);
402 +-int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
403 +- unsigned int keysize);
404 ++void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv);
405 ++static inline void chacha_init_generic(u32 *state, const u32 *key, const u8 *iv)
406 ++{
407 ++ state[0] = 0x61707865; /* "expa" */
408 ++ state[1] = 0x3320646e; /* "nd 3" */
409 ++ state[2] = 0x79622d32; /* "2-by" */
410 ++ state[3] = 0x6b206574; /* "te k" */
411 ++ state[4] = key[0];
412 ++ state[5] = key[1];
413 ++ state[6] = key[2];
414 ++ state[7] = key[3];
415 ++ state[8] = key[4];
416 ++ state[9] = key[5];
417 ++ state[10] = key[6];
418 ++ state[11] = key[7];
419 ++ state[12] = get_unaligned_le32(iv + 0);
420 ++ state[13] = get_unaligned_le32(iv + 4);
421 ++ state[14] = get_unaligned_le32(iv + 8);
422 ++ state[15] = get_unaligned_le32(iv + 12);
423 ++}
424 +
425 +-int crypto_chacha_crypt(struct skcipher_request *req);
426 +-int crypto_xchacha_crypt(struct skcipher_request *req);
427 ++static inline void chacha_init(u32 *state, const u32 *key, const u8 *iv)
428 ++{
429 ++ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA))
430 ++ chacha_init_arch(state, key, iv);
431 ++ else
432 ++ chacha_init_generic(state, key, iv);
433 ++}
434 ++
435 ++void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src,
436 ++ unsigned int bytes, int nrounds);
437 ++void chacha_crypt_generic(u32 *state, u8 *dst, const u8 *src,
438 ++ unsigned int bytes, int nrounds);
439 ++
440 ++static inline void chacha_crypt(u32 *state, u8 *dst, const u8 *src,
441 ++ unsigned int bytes, int nrounds)
442 ++{
443 ++ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CHACHA))
444 ++ chacha_crypt_arch(state, dst, src, bytes, nrounds);
445 ++ else
446 ++ chacha_crypt_generic(state, dst, src, bytes, nrounds);
447 ++}
448 ++
449 ++static inline void chacha20_crypt(u32 *state, u8 *dst, const u8 *src,
450 ++ unsigned int bytes)
451 ++{
452 ++ chacha_crypt(state, dst, src, bytes, 20);
453 ++}
454 +
455 + #endif /* _CRYPTO_CHACHA_H */
456 +diff --git a/include/crypto/internal/chacha.h b/include/crypto/internal/chacha.h
457 +new file mode 100644
458 +index 000000000000..c0e40b245431
459 +--- /dev/null
460 ++++ b/include/crypto/internal/chacha.h
461 +@@ -0,0 +1,53 @@
462 ++/* SPDX-License-Identifier: GPL-2.0 */
463 ++
464 ++#ifndef _CRYPTO_INTERNAL_CHACHA_H
465 ++#define _CRYPTO_INTERNAL_CHACHA_H
466 ++
467 ++#include <crypto/chacha.h>
468 ++#include <crypto/internal/skcipher.h>
469 ++#include <linux/crypto.h>
470 ++
471 ++struct chacha_ctx {
472 ++ u32 key[8];
473 ++ int nrounds;
474 ++};
475 ++
476 ++void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv);
477 ++
478 ++static inline int chacha_setkey(struct crypto_skcipher *tfm, const u8 *key,
479 ++ unsigned int keysize, int nrounds)
480 ++{
481 ++ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
482 ++ int i;
483 ++
484 ++ if (keysize != CHACHA_KEY_SIZE)
485 ++ return -EINVAL;
486 ++
487 ++ for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
488 ++ ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
489 ++
490 ++ ctx->nrounds = nrounds;
491 ++ return 0;
492 ++}
493 ++
494 ++static inline int chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
495 ++ unsigned int keysize)
496 ++{
497 ++ return chacha_setkey(tfm, key, keysize, 20);
498 ++}
499 ++
500 ++static int inline chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
501 ++ unsigned int keysize)
502 ++{
503 ++ return chacha_setkey(tfm, key, keysize, 12);
504 ++}
505 ++
506 ++int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
507 ++ unsigned int keysize);
508 ++int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
509 ++ unsigned int keysize);
510 ++
511 ++int crypto_chacha_crypt(struct skcipher_request *req);
512 ++int crypto_xchacha_crypt(struct skcipher_request *req);
513 ++
514 ++#endif /* _CRYPTO_CHACHA_H */
515 +diff --git a/lib/Makefile b/lib/Makefile
516 +index c5892807e06f..5af38fd5cc60 100644
517 +--- a/lib/Makefile
518 ++++ b/lib/Makefile
519 +@@ -26,8 +26,7 @@ endif
520 +
521 + lib-y := ctype.o string.o vsprintf.o cmdline.o \
522 + rbtree.o radix-tree.o timerqueue.o xarray.o \
523 +- idr.o extable.o \
524 +- sha1.o chacha.o irq_regs.o argv_split.o \
525 ++ idr.o extable.o sha1.o irq_regs.o argv_split.o \
526 + flex_proportions.o ratelimit.o show_mem.o \
527 + is_single_threaded.o plist.o decompress.o kobject_uevent.o \
528 + earlycpio.o seq_buf.o siphash.o dec_and_lock.o \
529 +diff --git a/lib/chacha.c b/lib/chacha.c
530 +deleted file mode 100644
531 +index c7c9826564d3..000000000000
532 +--- a/lib/chacha.c
533 ++++ /dev/null
534 +@@ -1,113 +0,0 @@
535 +-// SPDX-License-Identifier: GPL-2.0-or-later
536 +-/*
537 +- * The "hash function" used as the core of the ChaCha stream cipher (RFC7539)
538 +- *
539 +- * Copyright (C) 2015 Martin Willi
540 +- */
541 +-
542 +-#include <linux/kernel.h>
543 +-#include <linux/export.h>
544 +-#include <linux/bitops.h>
545 +-#include <linux/cryptohash.h>
546 +-#include <asm/unaligned.h>
547 +-#include <crypto/chacha.h>
548 +-
549 +-static void chacha_permute(u32 *x, int nrounds)
550 +-{
551 +- int i;
552 +-
553 +- /* whitelist the allowed round counts */
554 +- WARN_ON_ONCE(nrounds != 20 && nrounds != 12);
555 +-
556 +- for (i = 0; i < nrounds; i += 2) {
557 +- x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 16);
558 +- x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 16);
559 +- x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 16);
560 +- x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 16);
561 +-
562 +- x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 12);
563 +- x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 12);
564 +- x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 12);
565 +- x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 12);
566 +-
567 +- x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 8);
568 +- x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 8);
569 +- x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 8);
570 +- x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 8);
571 +-
572 +- x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 7);
573 +- x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 7);
574 +- x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 7);
575 +- x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 7);
576 +-
577 +- x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 16);
578 +- x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 16);
579 +- x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 16);
580 +- x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 16);
581 +-
582 +- x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 12);
583 +- x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 12);
584 +- x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 12);
585 +- x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 12);
586 +-
587 +- x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 8);
588 +- x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 8);
589 +- x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 8);
590 +- x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 8);
591 +-
592 +- x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 7);
593 +- x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 7);
594 +- x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 7);
595 +- x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 7);
596 +- }
597 +-}
598 +-
599 +-/**
600 +- * chacha_block - generate one keystream block and increment block counter
601 +- * @state: input state matrix (16 32-bit words)
602 +- * @stream: output keystream block (64 bytes)
603 +- * @nrounds: number of rounds (20 or 12; 20 is recommended)
604 +- *
605 +- * This is the ChaCha core, a function from 64-byte strings to 64-byte strings.
606 +- * The caller has already converted the endianness of the input. This function
607 +- * also handles incrementing the block counter in the input matrix.
608 +- */
609 +-void chacha_block(u32 *state, u8 *stream, int nrounds)
610 +-{
611 +- u32 x[16];
612 +- int i;
613 +-
614 +- memcpy(x, state, 64);
615 +-
616 +- chacha_permute(x, nrounds);
617 +-
618 +- for (i = 0; i < ARRAY_SIZE(x); i++)
619 +- put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]);
620 +-
621 +- state[12]++;
622 +-}
623 +-EXPORT_SYMBOL(chacha_block);
624 +-
625 +-/**
626 +- * hchacha_block - abbreviated ChaCha core, for XChaCha
627 +- * @in: input state matrix (16 32-bit words)
628 +- * @out: output (8 32-bit words)
629 +- * @nrounds: number of rounds (20 or 12; 20 is recommended)
630 +- *
631 +- * HChaCha is the ChaCha equivalent of HSalsa and is an intermediate step
632 +- * towards XChaCha (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf). HChaCha
633 +- * skips the final addition of the initial state, and outputs only certain words
634 +- * of the state. It should not be used for streaming directly.
635 +- */
636 +-void hchacha_block(const u32 *in, u32 *out, int nrounds)
637 +-{
638 +- u32 x[16];
639 +-
640 +- memcpy(x, in, 64);
641 +-
642 +- chacha_permute(x, nrounds);
643 +-
644 +- memcpy(&out[0], &x[0], 16);
645 +- memcpy(&out[4], &x[12], 16);
646 +-}
647 +-EXPORT_SYMBOL(hchacha_block);
648 +diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
649 +index 261430051595..6a11931ae105 100644
650 +--- a/lib/crypto/Kconfig
651 ++++ b/lib/crypto/Kconfig
652 +@@ -8,6 +8,32 @@ config CRYPTO_LIB_AES
653 + config CRYPTO_LIB_ARC4
654 + tristate
655 +
656 ++config CRYPTO_ARCH_HAVE_LIB_CHACHA
657 ++ tristate
658 ++ help
659 ++ Declares whether the architecture provides an arch-specific
660 ++ accelerated implementation of the ChaCha library interface,
661 ++ either builtin or as a module.
662 ++
663 ++config CRYPTO_LIB_CHACHA_GENERIC
664 ++ tristate
665 ++ select CRYPTO_ALGAPI
666 ++ help
667 ++ This symbol can be depended upon by arch implementations of the
668 ++ ChaCha library interface that require the generic code as a
669 ++ fallback, e.g., for SIMD implementations. If no arch specific
670 ++ implementation is enabled, this implementation serves the users
671 ++ of CRYPTO_LIB_CHACHA.
672 ++
673 ++config CRYPTO_LIB_CHACHA
674 ++ tristate "ChaCha library interface"
675 ++ depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA
676 ++ select CRYPTO_LIB_CHACHA_GENERIC if CRYPTO_ARCH_HAVE_LIB_CHACHA=n
677 ++ help
678 ++ Enable the ChaCha library interface. This interface may be fulfilled
679 ++ by either the generic implementation or an arch-specific one, if one
680 ++ is available and enabled.
681 ++
682 + config CRYPTO_LIB_DES
683 + tristate
684 +
685 +diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
686 +index 63de4cb3fcf8..0ce40604e104 100644
687 +--- a/lib/crypto/Makefile
688 ++++ b/lib/crypto/Makefile
689 +@@ -1,5 +1,9 @@
690 + # SPDX-License-Identifier: GPL-2.0
691 +
692 ++# chacha is used by the /dev/random driver which is always builtin
693 ++obj-y += chacha.o
694 ++obj-$(CONFIG_CRYPTO_LIB_CHACHA_GENERIC) += libchacha.o
695 ++
696 + obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o
697 + libaes-y := aes.o
698 +
699 +diff --git a/lib/crypto/chacha.c b/lib/crypto/chacha.c
700 +new file mode 100644
701 +index 000000000000..65ead6b0c7e0
702 +--- /dev/null
703 ++++ b/lib/crypto/chacha.c
704 +@@ -0,0 +1,115 @@
705 ++// SPDX-License-Identifier: GPL-2.0-or-later
706 ++/*
707 ++ * The "hash function" used as the core of the ChaCha stream cipher (RFC7539)
708 ++ *
709 ++ * Copyright (C) 2015 Martin Willi
710 ++ */
711 ++
712 ++#include <linux/bug.h>
713 ++#include <linux/kernel.h>
714 ++#include <linux/export.h>
715 ++#include <linux/bitops.h>
716 ++#include <linux/string.h>
717 ++#include <linux/cryptohash.h>
718 ++#include <asm/unaligned.h>
719 ++#include <crypto/chacha.h>
720 ++
721 ++static void chacha_permute(u32 *x, int nrounds)
722 ++{
723 ++ int i;
724 ++
725 ++ /* whitelist the allowed round counts */
726 ++ WARN_ON_ONCE(nrounds != 20 && nrounds != 12);
727 ++
728 ++ for (i = 0; i < nrounds; i += 2) {
729 ++ x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 16);
730 ++ x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 16);
731 ++ x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 16);
732 ++ x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 16);
733 ++
734 ++ x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 12);
735 ++ x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 12);
736 ++ x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 12);
737 ++ x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 12);
738 ++
739 ++ x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 8);
740 ++ x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 8);
741 ++ x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 8);
742 ++ x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 8);
743 ++
744 ++ x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 7);
745 ++ x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 7);
746 ++ x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 7);
747 ++ x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 7);
748 ++
749 ++ x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 16);
750 ++ x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 16);
751 ++ x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 16);
752 ++ x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 16);
753 ++
754 ++ x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 12);
755 ++ x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 12);
756 ++ x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 12);
757 ++ x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 12);
758 ++
759 ++ x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 8);
760 ++ x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 8);
761 ++ x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 8);
762 ++ x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 8);
763 ++
764 ++ x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 7);
765 ++ x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 7);
766 ++ x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 7);
767 ++ x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 7);
768 ++ }
769 ++}
770 ++
771 ++/**
772 ++ * chacha_block - generate one keystream block and increment block counter
773 ++ * @state: input state matrix (16 32-bit words)
774 ++ * @stream: output keystream block (64 bytes)
775 ++ * @nrounds: number of rounds (20 or 12; 20 is recommended)
776 ++ *
777 ++ * This is the ChaCha core, a function from 64-byte strings to 64-byte strings.
778 ++ * The caller has already converted the endianness of the input. This function
779 ++ * also handles incrementing the block counter in the input matrix.
780 ++ */
781 ++void chacha_block_generic(u32 *state, u8 *stream, int nrounds)
782 ++{
783 ++ u32 x[16];
784 ++ int i;
785 ++
786 ++ memcpy(x, state, 64);
787 ++
788 ++ chacha_permute(x, nrounds);
789 ++
790 ++ for (i = 0; i < ARRAY_SIZE(x); i++)
791 ++ put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]);
792 ++
793 ++ state[12]++;
794 ++}
795 ++EXPORT_SYMBOL(chacha_block_generic);
796 ++
797 ++/**
798 ++ * hchacha_block_generic - abbreviated ChaCha core, for XChaCha
799 ++ * @state: input state matrix (16 32-bit words)
800 ++ * @out: output (8 32-bit words)
801 ++ * @nrounds: number of rounds (20 or 12; 20 is recommended)
802 ++ *
803 ++ * HChaCha is the ChaCha equivalent of HSalsa and is an intermediate step
804 ++ * towards XChaCha (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf). HChaCha
805 ++ * skips the final addition of the initial state, and outputs only certain words
806 ++ * of the state. It should not be used for streaming directly.
807 ++ */
808 ++void hchacha_block_generic(const u32 *state, u32 *stream, int nrounds)
809 ++{
810 ++ u32 x[16];
811 ++
812 ++ memcpy(x, state, 64);
813 ++
814 ++ chacha_permute(x, nrounds);
815 ++
816 ++ memcpy(&stream[0], &x[0], 16);
817 ++ memcpy(&stream[4], &x[12], 16);
818 ++}
819 ++EXPORT_SYMBOL(hchacha_block_generic);
820 +diff --git a/lib/crypto/libchacha.c b/lib/crypto/libchacha.c
821 +new file mode 100644
822 +index 000000000000..dabc3accae05
823 +--- /dev/null
824 ++++ b/lib/crypto/libchacha.c
825 +@@ -0,0 +1,35 @@
826 ++// SPDX-License-Identifier: GPL-2.0-or-later
827 ++/*
828 ++ * The ChaCha stream cipher (RFC7539)
829 ++ *
830 ++ * Copyright (C) 2015 Martin Willi
831 ++ */
832 ++
833 ++#include <linux/kernel.h>
834 ++#include <linux/export.h>
835 ++#include <linux/module.h>
836 ++
837 ++#include <crypto/algapi.h> // for crypto_xor_cpy
838 ++#include <crypto/chacha.h>
839 ++
840 ++void chacha_crypt_generic(u32 *state, u8 *dst, const u8 *src,
841 ++ unsigned int bytes, int nrounds)
842 ++{
843 ++ /* aligned to potentially speed up crypto_xor() */
844 ++ u8 stream[CHACHA_BLOCK_SIZE] __aligned(sizeof(long));
845 ++
846 ++ while (bytes >= CHACHA_BLOCK_SIZE) {
847 ++ chacha_block_generic(state, stream, nrounds);
848 ++ crypto_xor_cpy(dst, src, stream, CHACHA_BLOCK_SIZE);
849 ++ bytes -= CHACHA_BLOCK_SIZE;
850 ++ dst += CHACHA_BLOCK_SIZE;
851 ++ src += CHACHA_BLOCK_SIZE;
852 ++ }
853 ++ if (bytes) {
854 ++ chacha_block_generic(state, stream, nrounds);
855 ++ crypto_xor_cpy(dst, src, stream, bytes);
856 ++ }
857 ++}
858 ++EXPORT_SYMBOL(chacha_crypt_generic);
859 ++
860 ++MODULE_LICENSE("GPL");
861 +--
862 +cgit v1.2.3-4-ga26e
863 +
864 +
865 +From 90d7bb75ca11b255dda5db615979ed182dc32f73 Mon Sep 17 00:00:00 2001
866 +From: Ard Biesheuvel <ardb@××××××.org>
867 +Date: Fri, 8 Nov 2019 13:22:09 +0100
868 +Subject: crypto: x86/chacha - depend on generic chacha library instead of
869 + crypto driver
870 +
871 +commit 28e8d89b1ce8d2e7badfb5f69971dd635acb8863 upstream.
872 +
873 +In preparation of extending the x86 ChaCha driver to also expose the ChaCha
874 +library interface, drop the dependency on the chacha_generic crypto driver
875 +as a non-SIMD fallback, and depend on the generic ChaCha library directly.
876 +This way, we only pull in the code we actually need, without registering
877 +a set of ChaCha skciphers that we will never use.
878 +
879 +Since turning the FPU on and off is cheap these days, simplify the SIMD
880 +routine by dropping the per-page yield, which makes for a cleaner switch
881 +to the library API as well. This also allows use to invoke the skcipher
882 +walk routines in non-atomic mode.
883 +
884 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
885 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
886 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
887 +---
888 + arch/x86/crypto/chacha_glue.c | 90 +++++++++++++++++--------------------------
889 + crypto/Kconfig | 2 +-
890 + 2 files changed, 36 insertions(+), 56 deletions(-)
891 +
892 +diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
893 +index bc62daa8dafd..0aabb382edce 100644
894 +--- a/arch/x86/crypto/chacha_glue.c
895 ++++ b/arch/x86/crypto/chacha_glue.c
896 +@@ -123,37 +123,38 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
897 + }
898 + }
899 +
900 +-static int chacha_simd_stream_xor(struct skcipher_walk *walk,
901 ++static int chacha_simd_stream_xor(struct skcipher_request *req,
902 + const struct chacha_ctx *ctx, const u8 *iv)
903 + {
904 + u32 *state, state_buf[16 + 2] __aligned(8);
905 +- int next_yield = 4096; /* bytes until next FPU yield */
906 +- int err = 0;
907 ++ struct skcipher_walk walk;
908 ++ int err;
909 ++
910 ++ err = skcipher_walk_virt(&walk, req, false);
911 +
912 + BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
913 + state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
914 +
915 +- crypto_chacha_init(state, ctx, iv);
916 ++ chacha_init_generic(state, ctx->key, iv);
917 +
918 +- while (walk->nbytes > 0) {
919 +- unsigned int nbytes = walk->nbytes;
920 ++ while (walk.nbytes > 0) {
921 ++ unsigned int nbytes = walk.nbytes;
922 +
923 +- if (nbytes < walk->total) {
924 +- nbytes = round_down(nbytes, walk->stride);
925 +- next_yield -= nbytes;
926 +- }
927 ++ if (nbytes < walk.total)
928 ++ nbytes = round_down(nbytes, walk.stride);
929 +
930 +- chacha_dosimd(state, walk->dst.virt.addr, walk->src.virt.addr,
931 +- nbytes, ctx->nrounds);
932 +-
933 +- if (next_yield <= 0) {
934 +- /* temporarily allow preemption */
935 +- kernel_fpu_end();
936 ++ if (!crypto_simd_usable()) {
937 ++ chacha_crypt_generic(state, walk.dst.virt.addr,
938 ++ walk.src.virt.addr, nbytes,
939 ++ ctx->nrounds);
940 ++ } else {
941 + kernel_fpu_begin();
942 +- next_yield = 4096;
943 ++ chacha_dosimd(state, walk.dst.virt.addr,
944 ++ walk.src.virt.addr, nbytes,
945 ++ ctx->nrounds);
946 ++ kernel_fpu_end();
947 + }
948 +-
949 +- err = skcipher_walk_done(walk, walk->nbytes - nbytes);
950 ++ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
951 + }
952 +
953 + return err;
954 +@@ -163,55 +164,34 @@ static int chacha_simd(struct skcipher_request *req)
955 + {
956 + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
957 + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
958 +- struct skcipher_walk walk;
959 +- int err;
960 +
961 +- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
962 +- return crypto_chacha_crypt(req);
963 +-
964 +- err = skcipher_walk_virt(&walk, req, true);
965 +- if (err)
966 +- return err;
967 +-
968 +- kernel_fpu_begin();
969 +- err = chacha_simd_stream_xor(&walk, ctx, req->iv);
970 +- kernel_fpu_end();
971 +- return err;
972 ++ return chacha_simd_stream_xor(req, ctx, req->iv);
973 + }
974 +
975 + static int xchacha_simd(struct skcipher_request *req)
976 + {
977 + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
978 + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
979 +- struct skcipher_walk walk;
980 +- struct chacha_ctx subctx;
981 + u32 *state, state_buf[16 + 2] __aligned(8);
982 ++ struct chacha_ctx subctx;
983 + u8 real_iv[16];
984 +- int err;
985 +-
986 +- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
987 +- return crypto_xchacha_crypt(req);
988 +-
989 +- err = skcipher_walk_virt(&walk, req, true);
990 +- if (err)
991 +- return err;
992 +
993 + BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
994 + state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
995 +- crypto_chacha_init(state, ctx, req->iv);
996 +-
997 +- kernel_fpu_begin();
998 +-
999 +- hchacha_block_ssse3(state, subctx.key, ctx->nrounds);
1000 ++ chacha_init_generic(state, ctx->key, req->iv);
1001 ++
1002 ++ if (req->cryptlen > CHACHA_BLOCK_SIZE && crypto_simd_usable()) {
1003 ++ kernel_fpu_begin();
1004 ++ hchacha_block_ssse3(state, subctx.key, ctx->nrounds);
1005 ++ kernel_fpu_end();
1006 ++ } else {
1007 ++ hchacha_block_generic(state, subctx.key, ctx->nrounds);
1008 ++ }
1009 + subctx.nrounds = ctx->nrounds;
1010 +
1011 + memcpy(&real_iv[0], req->iv + 24, 8);
1012 + memcpy(&real_iv[8], req->iv + 16, 8);
1013 +- err = chacha_simd_stream_xor(&walk, &subctx, real_iv);
1014 +-
1015 +- kernel_fpu_end();
1016 +-
1017 +- return err;
1018 ++ return chacha_simd_stream_xor(req, &subctx, real_iv);
1019 + }
1020 +
1021 + static struct skcipher_alg algs[] = {
1022 +@@ -227,7 +207,7 @@ static struct skcipher_alg algs[] = {
1023 + .max_keysize = CHACHA_KEY_SIZE,
1024 + .ivsize = CHACHA_IV_SIZE,
1025 + .chunksize = CHACHA_BLOCK_SIZE,
1026 +- .setkey = crypto_chacha20_setkey,
1027 ++ .setkey = chacha20_setkey,
1028 + .encrypt = chacha_simd,
1029 + .decrypt = chacha_simd,
1030 + }, {
1031 +@@ -242,7 +222,7 @@ static struct skcipher_alg algs[] = {
1032 + .max_keysize = CHACHA_KEY_SIZE,
1033 + .ivsize = XCHACHA_IV_SIZE,
1034 + .chunksize = CHACHA_BLOCK_SIZE,
1035 +- .setkey = crypto_chacha20_setkey,
1036 ++ .setkey = chacha20_setkey,
1037 + .encrypt = xchacha_simd,
1038 + .decrypt = xchacha_simd,
1039 + }, {
1040 +@@ -257,7 +237,7 @@ static struct skcipher_alg algs[] = {
1041 + .max_keysize = CHACHA_KEY_SIZE,
1042 + .ivsize = XCHACHA_IV_SIZE,
1043 + .chunksize = CHACHA_BLOCK_SIZE,
1044 +- .setkey = crypto_chacha12_setkey,
1045 ++ .setkey = chacha12_setkey,
1046 + .encrypt = xchacha_simd,
1047 + .decrypt = xchacha_simd,
1048 + },
1049 +diff --git a/crypto/Kconfig b/crypto/Kconfig
1050 +index f29bf10c0462..564a3f7b40b8 100644
1051 +--- a/crypto/Kconfig
1052 ++++ b/crypto/Kconfig
1053 +@@ -1417,7 +1417,7 @@ config CRYPTO_CHACHA20_X86_64
1054 + tristate "ChaCha stream cipher algorithms (x86_64/SSSE3/AVX2/AVX-512VL)"
1055 + depends on X86 && 64BIT
1056 + select CRYPTO_BLKCIPHER
1057 +- select CRYPTO_CHACHA20
1058 ++ select CRYPTO_LIB_CHACHA_GENERIC
1059 + help
1060 + SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20,
1061 + XChaCha20, and XChaCha12 stream ciphers.
1062 +--
1063 +cgit v1.2.3-4-ga26e
1064 +
1065 +
1066 +From 1ba2280e0873bcedc02e33b55c2250159da3b717 Mon Sep 17 00:00:00 2001
1067 +From: Ard Biesheuvel <ardb@××××××.org>
1068 +Date: Fri, 8 Nov 2019 13:22:10 +0100
1069 +Subject: crypto: x86/chacha - expose SIMD ChaCha routine as library function
1070 +
1071 +commit 84e03fa39fbe95a5567d43bff458c6d3b3a23ad1 upstream.
1072 +
1073 +Wire the existing x86 SIMD ChaCha code into the new ChaCha library
1074 +interface, so that users of the library interface will get the
1075 +accelerated version when available.
1076 +
1077 +Given that calls into the library API will always go through the
1078 +routines in this module if it is enabled, switch to static keys
1079 +to select the optimal implementation available (which may be none
1080 +at all, in which case we defer to the generic implementation for
1081 +all invocations).
1082 +
1083 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
1084 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
1085 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
1086 +---
1087 + arch/x86/crypto/chacha_glue.c | 91 +++++++++++++++++++++++++++++++------------
1088 + crypto/Kconfig | 1 +
1089 + include/crypto/chacha.h | 6 +++
1090 + 3 files changed, 73 insertions(+), 25 deletions(-)
1091 +
1092 +diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
1093 +index 0aabb382edce..b391e13a9e41 100644
1094 +--- a/arch/x86/crypto/chacha_glue.c
1095 ++++ b/arch/x86/crypto/chacha_glue.c
1096 +@@ -21,24 +21,24 @@ asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
1097 + asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
1098 + unsigned int len, int nrounds);
1099 + asmlinkage void hchacha_block_ssse3(const u32 *state, u32 *out, int nrounds);
1100 +-#ifdef CONFIG_AS_AVX2
1101 ++
1102 + asmlinkage void chacha_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
1103 + unsigned int len, int nrounds);
1104 + asmlinkage void chacha_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
1105 + unsigned int len, int nrounds);
1106 + asmlinkage void chacha_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
1107 + unsigned int len, int nrounds);
1108 +-static bool chacha_use_avx2;
1109 +-#ifdef CONFIG_AS_AVX512
1110 ++
1111 + asmlinkage void chacha_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
1112 + unsigned int len, int nrounds);
1113 + asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
1114 + unsigned int len, int nrounds);
1115 + asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
1116 + unsigned int len, int nrounds);
1117 +-static bool chacha_use_avx512vl;
1118 +-#endif
1119 +-#endif
1120 ++
1121 ++static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd);
1122 ++static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2);
1123 ++static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl);
1124 +
1125 + static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
1126 + {
1127 +@@ -49,9 +49,8 @@ static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
1128 + static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
1129 + unsigned int bytes, int nrounds)
1130 + {
1131 +-#ifdef CONFIG_AS_AVX2
1132 +-#ifdef CONFIG_AS_AVX512
1133 +- if (chacha_use_avx512vl) {
1134 ++ if (IS_ENABLED(CONFIG_AS_AVX512) &&
1135 ++ static_branch_likely(&chacha_use_avx512vl)) {
1136 + while (bytes >= CHACHA_BLOCK_SIZE * 8) {
1137 + chacha_8block_xor_avx512vl(state, dst, src, bytes,
1138 + nrounds);
1139 +@@ -79,8 +78,9 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
1140 + return;
1141 + }
1142 + }
1143 +-#endif
1144 +- if (chacha_use_avx2) {
1145 ++
1146 ++ if (IS_ENABLED(CONFIG_AS_AVX2) &&
1147 ++ static_branch_likely(&chacha_use_avx2)) {
1148 + while (bytes >= CHACHA_BLOCK_SIZE * 8) {
1149 + chacha_8block_xor_avx2(state, dst, src, bytes, nrounds);
1150 + bytes -= CHACHA_BLOCK_SIZE * 8;
1151 +@@ -104,7 +104,7 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
1152 + return;
1153 + }
1154 + }
1155 +-#endif
1156 ++
1157 + while (bytes >= CHACHA_BLOCK_SIZE * 4) {
1158 + chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds);
1159 + bytes -= CHACHA_BLOCK_SIZE * 4;
1160 +@@ -123,6 +123,43 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
1161 + }
1162 + }
1163 +
1164 ++void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
1165 ++{
1166 ++ state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
1167 ++
1168 ++ if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable()) {
1169 ++ hchacha_block_generic(state, stream, nrounds);
1170 ++ } else {
1171 ++ kernel_fpu_begin();
1172 ++ hchacha_block_ssse3(state, stream, nrounds);
1173 ++ kernel_fpu_end();
1174 ++ }
1175 ++}
1176 ++EXPORT_SYMBOL(hchacha_block_arch);
1177 ++
1178 ++void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
1179 ++{
1180 ++ state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
1181 ++
1182 ++ chacha_init_generic(state, key, iv);
1183 ++}
1184 ++EXPORT_SYMBOL(chacha_init_arch);
1185 ++
1186 ++void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
1187 ++ int nrounds)
1188 ++{
1189 ++ state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
1190 ++
1191 ++ if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable() ||
1192 ++ bytes <= CHACHA_BLOCK_SIZE)
1193 ++ return chacha_crypt_generic(state, dst, src, bytes, nrounds);
1194 ++
1195 ++ kernel_fpu_begin();
1196 ++ chacha_dosimd(state, dst, src, bytes, nrounds);
1197 ++ kernel_fpu_end();
1198 ++}
1199 ++EXPORT_SYMBOL(chacha_crypt_arch);
1200 ++
1201 + static int chacha_simd_stream_xor(struct skcipher_request *req,
1202 + const struct chacha_ctx *ctx, const u8 *iv)
1203 + {
1204 +@@ -143,7 +180,8 @@ static int chacha_simd_stream_xor(struct skcipher_request *req,
1205 + if (nbytes < walk.total)
1206 + nbytes = round_down(nbytes, walk.stride);
1207 +
1208 +- if (!crypto_simd_usable()) {
1209 ++ if (!static_branch_likely(&chacha_use_simd) ||
1210 ++ !crypto_simd_usable()) {
1211 + chacha_crypt_generic(state, walk.dst.virt.addr,
1212 + walk.src.virt.addr, nbytes,
1213 + ctx->nrounds);
1214 +@@ -246,18 +284,21 @@ static struct skcipher_alg algs[] = {
1215 + static int __init chacha_simd_mod_init(void)
1216 + {
1217 + if (!boot_cpu_has(X86_FEATURE_SSSE3))
1218 +- return -ENODEV;
1219 +-
1220 +-#ifdef CONFIG_AS_AVX2
1221 +- chacha_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
1222 +- boot_cpu_has(X86_FEATURE_AVX2) &&
1223 +- cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
1224 +-#ifdef CONFIG_AS_AVX512
1225 +- chacha_use_avx512vl = chacha_use_avx2 &&
1226 +- boot_cpu_has(X86_FEATURE_AVX512VL) &&
1227 +- boot_cpu_has(X86_FEATURE_AVX512BW); /* kmovq */
1228 +-#endif
1229 +-#endif
1230 ++ return 0;
1231 ++
1232 ++ static_branch_enable(&chacha_use_simd);
1233 ++
1234 ++ if (IS_ENABLED(CONFIG_AS_AVX2) &&
1235 ++ boot_cpu_has(X86_FEATURE_AVX) &&
1236 ++ boot_cpu_has(X86_FEATURE_AVX2) &&
1237 ++ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
1238 ++ static_branch_enable(&chacha_use_avx2);
1239 ++
1240 ++ if (IS_ENABLED(CONFIG_AS_AVX512) &&
1241 ++ boot_cpu_has(X86_FEATURE_AVX512VL) &&
1242 ++ boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */
1243 ++ static_branch_enable(&chacha_use_avx512vl);
1244 ++ }
1245 + return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
1246 + }
1247 +
1248 +diff --git a/crypto/Kconfig b/crypto/Kconfig
1249 +index 564a3f7b40b8..649dc564f242 100644
1250 +--- a/crypto/Kconfig
1251 ++++ b/crypto/Kconfig
1252 +@@ -1418,6 +1418,7 @@ config CRYPTO_CHACHA20_X86_64
1253 + depends on X86 && 64BIT
1254 + select CRYPTO_BLKCIPHER
1255 + select CRYPTO_LIB_CHACHA_GENERIC
1256 ++ select CRYPTO_ARCH_HAVE_LIB_CHACHA
1257 + help
1258 + SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20,
1259 + XChaCha20, and XChaCha12 stream ciphers.
1260 +diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h
1261 +index 5c662f8fecac..2676f4fbd4c1 100644
1262 +--- a/include/crypto/chacha.h
1263 ++++ b/include/crypto/chacha.h
1264 +@@ -25,6 +25,12 @@
1265 + #define CHACHA_BLOCK_SIZE 64
1266 + #define CHACHAPOLY_IV_SIZE 12
1267 +
1268 ++#ifdef CONFIG_X86_64
1269 ++#define CHACHA_STATE_WORDS ((CHACHA_BLOCK_SIZE + 12) / sizeof(u32))
1270 ++#else
1271 ++#define CHACHA_STATE_WORDS (CHACHA_BLOCK_SIZE / sizeof(u32))
1272 ++#endif
1273 ++
1274 + /* 192-bit nonce, then 64-bit stream position */
1275 + #define XCHACHA_IV_SIZE 32
1276 +
1277 +--
1278 +cgit v1.2.3-4-ga26e
1279 +
1280 +
1281 +From 29299e881847d16429a35622d000b8704bcc3802 Mon Sep 17 00:00:00 2001
1282 +From: Ard Biesheuvel <ardb@××××××.org>
1283 +Date: Fri, 8 Nov 2019 13:22:11 +0100
1284 +Subject: crypto: arm64/chacha - depend on generic chacha library instead of
1285 + crypto driver
1286 +
1287 +commit c77da4867cbb7841177275dbb250f5c09679fae4 upstream.
1288 +
1289 +Depend on the generic ChaCha library routines instead of pulling in the
1290 +generic ChaCha skcipher driver, which is more than we need, and makes
1291 +managing the dependencies between the generic library, generic driver,
1292 +accelerated library and driver more complicated.
1293 +
1294 +While at it, drop the logic to prefer the scalar code on short inputs.
1295 +Turning the NEON on and off is cheap these days, and one major use case
1296 +for ChaCha20 is ChaCha20-Poly1305, which is guaranteed to hit the scalar
1297 +path upon every invocation (when doing the Poly1305 nonce generation)
1298 +
1299 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
1300 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
1301 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
1302 +---
1303 + arch/arm64/crypto/Kconfig | 2 +-
1304 + arch/arm64/crypto/chacha-neon-glue.c | 40 ++++++++++++++++++++----------------
1305 + 2 files changed, 23 insertions(+), 19 deletions(-)
1306 +
1307 +diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
1308 +index 4922c4451e7c..fdf52d5f18f9 100644
1309 +--- a/arch/arm64/crypto/Kconfig
1310 ++++ b/arch/arm64/crypto/Kconfig
1311 +@@ -103,7 +103,7 @@ config CRYPTO_CHACHA20_NEON
1312 + tristate "ChaCha20, XChaCha20, and XChaCha12 stream ciphers using NEON instructions"
1313 + depends on KERNEL_MODE_NEON
1314 + select CRYPTO_BLKCIPHER
1315 +- select CRYPTO_CHACHA20
1316 ++ select CRYPTO_LIB_CHACHA_GENERIC
1317 +
1318 + config CRYPTO_NHPOLY1305_NEON
1319 + tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)"
1320 +diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
1321 +index d4cc61bfe79d..cae2cb92eca8 100644
1322 +--- a/arch/arm64/crypto/chacha-neon-glue.c
1323 ++++ b/arch/arm64/crypto/chacha-neon-glue.c
1324 +@@ -68,7 +68,7 @@ static int chacha_neon_stream_xor(struct skcipher_request *req,
1325 +
1326 + err = skcipher_walk_virt(&walk, req, false);
1327 +
1328 +- crypto_chacha_init(state, ctx, iv);
1329 ++ chacha_init_generic(state, ctx->key, iv);
1330 +
1331 + while (walk.nbytes > 0) {
1332 + unsigned int nbytes = walk.nbytes;
1333 +@@ -76,10 +76,16 @@ static int chacha_neon_stream_xor(struct skcipher_request *req,
1334 + if (nbytes < walk.total)
1335 + nbytes = rounddown(nbytes, walk.stride);
1336 +
1337 +- kernel_neon_begin();
1338 +- chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
1339 +- nbytes, ctx->nrounds);
1340 +- kernel_neon_end();
1341 ++ if (!crypto_simd_usable()) {
1342 ++ chacha_crypt_generic(state, walk.dst.virt.addr,
1343 ++ walk.src.virt.addr, nbytes,
1344 ++ ctx->nrounds);
1345 ++ } else {
1346 ++ kernel_neon_begin();
1347 ++ chacha_doneon(state, walk.dst.virt.addr,
1348 ++ walk.src.virt.addr, nbytes, ctx->nrounds);
1349 ++ kernel_neon_end();
1350 ++ }
1351 + err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
1352 + }
1353 +
1354 +@@ -91,9 +97,6 @@ static int chacha_neon(struct skcipher_request *req)
1355 + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
1356 + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
1357 +
1358 +- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
1359 +- return crypto_chacha_crypt(req);
1360 +-
1361 + return chacha_neon_stream_xor(req, ctx, req->iv);
1362 + }
1363 +
1364 +@@ -105,14 +108,15 @@ static int xchacha_neon(struct skcipher_request *req)
1365 + u32 state[16];
1366 + u8 real_iv[16];
1367 +
1368 +- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
1369 +- return crypto_xchacha_crypt(req);
1370 ++ chacha_init_generic(state, ctx->key, req->iv);
1371 +
1372 +- crypto_chacha_init(state, ctx, req->iv);
1373 +-
1374 +- kernel_neon_begin();
1375 +- hchacha_block_neon(state, subctx.key, ctx->nrounds);
1376 +- kernel_neon_end();
1377 ++ if (crypto_simd_usable()) {
1378 ++ kernel_neon_begin();
1379 ++ hchacha_block_neon(state, subctx.key, ctx->nrounds);
1380 ++ kernel_neon_end();
1381 ++ } else {
1382 ++ hchacha_block_generic(state, subctx.key, ctx->nrounds);
1383 ++ }
1384 + subctx.nrounds = ctx->nrounds;
1385 +
1386 + memcpy(&real_iv[0], req->iv + 24, 8);
1387 +@@ -134,7 +138,7 @@ static struct skcipher_alg algs[] = {
1388 + .ivsize = CHACHA_IV_SIZE,
1389 + .chunksize = CHACHA_BLOCK_SIZE,
1390 + .walksize = 5 * CHACHA_BLOCK_SIZE,
1391 +- .setkey = crypto_chacha20_setkey,
1392 ++ .setkey = chacha20_setkey,
1393 + .encrypt = chacha_neon,
1394 + .decrypt = chacha_neon,
1395 + }, {
1396 +@@ -150,7 +154,7 @@ static struct skcipher_alg algs[] = {
1397 + .ivsize = XCHACHA_IV_SIZE,
1398 + .chunksize = CHACHA_BLOCK_SIZE,
1399 + .walksize = 5 * CHACHA_BLOCK_SIZE,
1400 +- .setkey = crypto_chacha20_setkey,
1401 ++ .setkey = chacha20_setkey,
1402 + .encrypt = xchacha_neon,
1403 + .decrypt = xchacha_neon,
1404 + }, {
1405 +@@ -166,7 +170,7 @@ static struct skcipher_alg algs[] = {
1406 + .ivsize = XCHACHA_IV_SIZE,
1407 + .chunksize = CHACHA_BLOCK_SIZE,
1408 + .walksize = 5 * CHACHA_BLOCK_SIZE,
1409 +- .setkey = crypto_chacha12_setkey,
1410 ++ .setkey = chacha12_setkey,
1411 + .encrypt = xchacha_neon,
1412 + .decrypt = xchacha_neon,
1413 + }
1414 +--
1415 +cgit v1.2.3-4-ga26e
1416 +
1417 +
1418 +From 2308e3e99a94041960a21741bc7b418ceae1976d Mon Sep 17 00:00:00 2001
1419 +From: Ard Biesheuvel <ardb@××××××.org>
1420 +Date: Fri, 8 Nov 2019 13:22:12 +0100
1421 +Subject: crypto: arm64/chacha - expose arm64 ChaCha routine as library
1422 + function
1423 +
1424 +commit b3aad5bad26a01a4bd8c49a5c5f52aec665f3b7c upstream.
1425 +
1426 +Expose the accelerated NEON ChaCha routine directly as a symbol
1427 +export so that users of the ChaCha library API can use it directly.
1428 +
1429 +Given that calls into the library API will always go through the
1430 +routines in this module if it is enabled, switch to static keys
1431 +to select the optimal implementation available (which may be none
1432 +at all, in which case we defer to the generic implementation for
1433 +all invocations).
1434 +
1435 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
1436 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
1437 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
1438 +---
1439 + arch/arm64/crypto/Kconfig | 1 +
1440 + arch/arm64/crypto/chacha-neon-glue.c | 53 ++++++++++++++++++++++++++++--------
1441 + 2 files changed, 43 insertions(+), 11 deletions(-)
1442 +
1443 +diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
1444 +index fdf52d5f18f9..17bada4b9dd2 100644
1445 +--- a/arch/arm64/crypto/Kconfig
1446 ++++ b/arch/arm64/crypto/Kconfig
1447 +@@ -104,6 +104,7 @@ config CRYPTO_CHACHA20_NEON
1448 + depends on KERNEL_MODE_NEON
1449 + select CRYPTO_BLKCIPHER
1450 + select CRYPTO_LIB_CHACHA_GENERIC
1451 ++ select CRYPTO_ARCH_HAVE_LIB_CHACHA
1452 +
1453 + config CRYPTO_NHPOLY1305_NEON
1454 + tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)"
1455 +diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
1456 +index cae2cb92eca8..46cd4297761c 100644
1457 +--- a/arch/arm64/crypto/chacha-neon-glue.c
1458 ++++ b/arch/arm64/crypto/chacha-neon-glue.c
1459 +@@ -23,6 +23,7 @@
1460 + #include <crypto/internal/chacha.h>
1461 + #include <crypto/internal/simd.h>
1462 + #include <crypto/internal/skcipher.h>
1463 ++#include <linux/jump_label.h>
1464 + #include <linux/kernel.h>
1465 + #include <linux/module.h>
1466 +
1467 +@@ -36,6 +37,8 @@ asmlinkage void chacha_4block_xor_neon(u32 *state, u8 *dst, const u8 *src,
1468 + int nrounds, int bytes);
1469 + asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
1470 +
1471 ++static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
1472 ++
1473 + static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
1474 + int bytes, int nrounds)
1475 + {
1476 +@@ -59,6 +62,37 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
1477 + }
1478 + }
1479 +
1480 ++void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
1481 ++{
1482 ++ if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) {
1483 ++ hchacha_block_generic(state, stream, nrounds);
1484 ++ } else {
1485 ++ kernel_neon_begin();
1486 ++ hchacha_block_neon(state, stream, nrounds);
1487 ++ kernel_neon_end();
1488 ++ }
1489 ++}
1490 ++EXPORT_SYMBOL(hchacha_block_arch);
1491 ++
1492 ++void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
1493 ++{
1494 ++ chacha_init_generic(state, key, iv);
1495 ++}
1496 ++EXPORT_SYMBOL(chacha_init_arch);
1497 ++
1498 ++void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
1499 ++ int nrounds)
1500 ++{
1501 ++ if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE ||
1502 ++ !crypto_simd_usable())
1503 ++ return chacha_crypt_generic(state, dst, src, bytes, nrounds);
1504 ++
1505 ++ kernel_neon_begin();
1506 ++ chacha_doneon(state, dst, src, bytes, nrounds);
1507 ++ kernel_neon_end();
1508 ++}
1509 ++EXPORT_SYMBOL(chacha_crypt_arch);
1510 ++
1511 + static int chacha_neon_stream_xor(struct skcipher_request *req,
1512 + const struct chacha_ctx *ctx, const u8 *iv)
1513 + {
1514 +@@ -76,7 +110,8 @@ static int chacha_neon_stream_xor(struct skcipher_request *req,
1515 + if (nbytes < walk.total)
1516 + nbytes = rounddown(nbytes, walk.stride);
1517 +
1518 +- if (!crypto_simd_usable()) {
1519 ++ if (!static_branch_likely(&have_neon) ||
1520 ++ !crypto_simd_usable()) {
1521 + chacha_crypt_generic(state, walk.dst.virt.addr,
1522 + walk.src.virt.addr, nbytes,
1523 + ctx->nrounds);
1524 +@@ -109,14 +144,7 @@ static int xchacha_neon(struct skcipher_request *req)
1525 + u8 real_iv[16];
1526 +
1527 + chacha_init_generic(state, ctx->key, req->iv);
1528 +-
1529 +- if (crypto_simd_usable()) {
1530 +- kernel_neon_begin();
1531 +- hchacha_block_neon(state, subctx.key, ctx->nrounds);
1532 +- kernel_neon_end();
1533 +- } else {
1534 +- hchacha_block_generic(state, subctx.key, ctx->nrounds);
1535 +- }
1536 ++ hchacha_block_arch(state, subctx.key, ctx->nrounds);
1537 + subctx.nrounds = ctx->nrounds;
1538 +
1539 + memcpy(&real_iv[0], req->iv + 24, 8);
1540 +@@ -179,14 +207,17 @@ static struct skcipher_alg algs[] = {
1541 + static int __init chacha_simd_mod_init(void)
1542 + {
1543 + if (!cpu_have_named_feature(ASIMD))
1544 +- return -ENODEV;
1545 ++ return 0;
1546 ++
1547 ++ static_branch_enable(&have_neon);
1548 +
1549 + return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
1550 + }
1551 +
1552 + static void __exit chacha_simd_mod_fini(void)
1553 + {
1554 +- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
1555 ++ if (cpu_have_named_feature(ASIMD))
1556 ++ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
1557 + }
1558 +
1559 + module_init(chacha_simd_mod_init);
1560 +--
1561 +cgit v1.2.3-4-ga26e
1562 +
1563 +
1564 +From c3c9c99ab0a65e008cceac6ec6b6aed4a1e71321 Mon Sep 17 00:00:00 2001
1565 +From: Ard Biesheuvel <ardb@××××××.org>
1566 +Date: Fri, 8 Nov 2019 13:22:13 +0100
1567 +Subject: crypto: arm/chacha - import Eric Biggers's scalar accelerated ChaCha
1568 + code
1569 +
1570 +commit 29621d099f9c642b22a69dc8e7e20c108473a392 upstream.
1571 +
1572 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
1573 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
1574 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
1575 +---
1576 + arch/arm/crypto/chacha-scalar-core.S | 461 +++++++++++++++++++++++++++++++++++
1577 + 1 file changed, 461 insertions(+)
1578 + create mode 100644 arch/arm/crypto/chacha-scalar-core.S
1579 +
1580 +diff --git a/arch/arm/crypto/chacha-scalar-core.S b/arch/arm/crypto/chacha-scalar-core.S
1581 +new file mode 100644
1582 +index 000000000000..2140319b64a0
1583 +--- /dev/null
1584 ++++ b/arch/arm/crypto/chacha-scalar-core.S
1585 +@@ -0,0 +1,461 @@
1586 ++/* SPDX-License-Identifier: GPL-2.0 */
1587 ++/*
1588 ++ * Copyright (C) 2018 Google, Inc.
1589 ++ */
1590 ++
1591 ++#include <linux/linkage.h>
1592 ++#include <asm/assembler.h>
1593 ++
1594 ++/*
1595 ++ * Design notes:
1596 ++ *
1597 ++ * 16 registers would be needed to hold the state matrix, but only 14 are
1598 ++ * available because 'sp' and 'pc' cannot be used. So we spill the elements
1599 ++ * (x8, x9) to the stack and swap them out with (x10, x11). This adds one
1600 ++ * 'ldrd' and one 'strd' instruction per round.
1601 ++ *
1602 ++ * All rotates are performed using the implicit rotate operand accepted by the
1603 ++ * 'add' and 'eor' instructions. This is faster than using explicit rotate
1604 ++ * instructions. To make this work, we allow the values in the second and last
1605 ++ * rows of the ChaCha state matrix (rows 'b' and 'd') to temporarily have the
1606 ++ * wrong rotation amount. The rotation amount is then fixed up just in time
1607 ++ * when the values are used. 'brot' is the number of bits the values in row 'b'
1608 ++ * need to be rotated right to arrive at the correct values, and 'drot'
1609 ++ * similarly for row 'd'. (brot, drot) start out as (0, 0) but we make it such
1610 ++ * that they end up as (25, 24) after every round.
1611 ++ */
1612 ++
1613 ++ // ChaCha state registers
1614 ++ X0 .req r0
1615 ++ X1 .req r1
1616 ++ X2 .req r2
1617 ++ X3 .req r3
1618 ++ X4 .req r4
1619 ++ X5 .req r5
1620 ++ X6 .req r6
1621 ++ X7 .req r7
1622 ++ X8_X10 .req r8 // shared by x8 and x10
1623 ++ X9_X11 .req r9 // shared by x9 and x11
1624 ++ X12 .req r10
1625 ++ X13 .req r11
1626 ++ X14 .req r12
1627 ++ X15 .req r14
1628 ++
1629 ++.Lexpand_32byte_k:
1630 ++ // "expand 32-byte k"
1631 ++ .word 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
1632 ++
1633 ++#ifdef __thumb2__
1634 ++# define adrl adr
1635 ++#endif
1636 ++
1637 ++.macro __rev out, in, t0, t1, t2
1638 ++.if __LINUX_ARM_ARCH__ >= 6
1639 ++ rev \out, \in
1640 ++.else
1641 ++ lsl \t0, \in, #24
1642 ++ and \t1, \in, #0xff00
1643 ++ and \t2, \in, #0xff0000
1644 ++ orr \out, \t0, \in, lsr #24
1645 ++ orr \out, \out, \t1, lsl #8
1646 ++ orr \out, \out, \t2, lsr #8
1647 ++.endif
1648 ++.endm
1649 ++
1650 ++.macro _le32_bswap x, t0, t1, t2
1651 ++#ifdef __ARMEB__
1652 ++ __rev \x, \x, \t0, \t1, \t2
1653 ++#endif
1654 ++.endm
1655 ++
1656 ++.macro _le32_bswap_4x a, b, c, d, t0, t1, t2
1657 ++ _le32_bswap \a, \t0, \t1, \t2
1658 ++ _le32_bswap \b, \t0, \t1, \t2
1659 ++ _le32_bswap \c, \t0, \t1, \t2
1660 ++ _le32_bswap \d, \t0, \t1, \t2
1661 ++.endm
1662 ++
1663 ++.macro __ldrd a, b, src, offset
1664 ++#if __LINUX_ARM_ARCH__ >= 6
1665 ++ ldrd \a, \b, [\src, #\offset]
1666 ++#else
1667 ++ ldr \a, [\src, #\offset]
1668 ++ ldr \b, [\src, #\offset + 4]
1669 ++#endif
1670 ++.endm
1671 ++
1672 ++.macro __strd a, b, dst, offset
1673 ++#if __LINUX_ARM_ARCH__ >= 6
1674 ++ strd \a, \b, [\dst, #\offset]
1675 ++#else
1676 ++ str \a, [\dst, #\offset]
1677 ++ str \b, [\dst, #\offset + 4]
1678 ++#endif
1679 ++.endm
1680 ++
1681 ++.macro _halfround a1, b1, c1, d1, a2, b2, c2, d2
1682 ++
1683 ++ // a += b; d ^= a; d = rol(d, 16);
1684 ++ add \a1, \a1, \b1, ror #brot
1685 ++ add \a2, \a2, \b2, ror #brot
1686 ++ eor \d1, \a1, \d1, ror #drot
1687 ++ eor \d2, \a2, \d2, ror #drot
1688 ++ // drot == 32 - 16 == 16
1689 ++
1690 ++ // c += d; b ^= c; b = rol(b, 12);
1691 ++ add \c1, \c1, \d1, ror #16
1692 ++ add \c2, \c2, \d2, ror #16
1693 ++ eor \b1, \c1, \b1, ror #brot
1694 ++ eor \b2, \c2, \b2, ror #brot
1695 ++ // brot == 32 - 12 == 20
1696 ++
1697 ++ // a += b; d ^= a; d = rol(d, 8);
1698 ++ add \a1, \a1, \b1, ror #20
1699 ++ add \a2, \a2, \b2, ror #20
1700 ++ eor \d1, \a1, \d1, ror #16
1701 ++ eor \d2, \a2, \d2, ror #16
1702 ++ // drot == 32 - 8 == 24
1703 ++
1704 ++ // c += d; b ^= c; b = rol(b, 7);
1705 ++ add \c1, \c1, \d1, ror #24
1706 ++ add \c2, \c2, \d2, ror #24
1707 ++ eor \b1, \c1, \b1, ror #20
1708 ++ eor \b2, \c2, \b2, ror #20
1709 ++ // brot == 32 - 7 == 25
1710 ++.endm
1711 ++
1712 ++.macro _doubleround
1713 ++
1714 ++ // column round
1715 ++
1716 ++ // quarterrounds: (x0, x4, x8, x12) and (x1, x5, x9, x13)
1717 ++ _halfround X0, X4, X8_X10, X12, X1, X5, X9_X11, X13
1718 ++
1719 ++ // save (x8, x9); restore (x10, x11)
1720 ++ __strd X8_X10, X9_X11, sp, 0
1721 ++ __ldrd X8_X10, X9_X11, sp, 8
1722 ++
1723 ++ // quarterrounds: (x2, x6, x10, x14) and (x3, x7, x11, x15)
1724 ++ _halfround X2, X6, X8_X10, X14, X3, X7, X9_X11, X15
1725 ++
1726 ++ .set brot, 25
1727 ++ .set drot, 24
1728 ++
1729 ++ // diagonal round
1730 ++
1731 ++ // quarterrounds: (x0, x5, x10, x15) and (x1, x6, x11, x12)
1732 ++ _halfround X0, X5, X8_X10, X15, X1, X6, X9_X11, X12
1733 ++
1734 ++ // save (x10, x11); restore (x8, x9)
1735 ++ __strd X8_X10, X9_X11, sp, 8
1736 ++ __ldrd X8_X10, X9_X11, sp, 0
1737 ++
1738 ++ // quarterrounds: (x2, x7, x8, x13) and (x3, x4, x9, x14)
1739 ++ _halfround X2, X7, X8_X10, X13, X3, X4, X9_X11, X14
1740 ++.endm
1741 ++
1742 ++.macro _chacha_permute nrounds
1743 ++ .set brot, 0
1744 ++ .set drot, 0
1745 ++ .rept \nrounds / 2
1746 ++ _doubleround
1747 ++ .endr
1748 ++.endm
1749 ++
1750 ++.macro _chacha nrounds
1751 ++
1752 ++.Lnext_block\@:
1753 ++ // Stack: unused0-unused1 x10-x11 x0-x15 OUT IN LEN
1754 ++ // Registers contain x0-x9,x12-x15.
1755 ++
1756 ++ // Do the core ChaCha permutation to update x0-x15.
1757 ++ _chacha_permute \nrounds
1758 ++
1759 ++ add sp, #8
1760 ++ // Stack: x10-x11 orig_x0-orig_x15 OUT IN LEN
1761 ++ // Registers contain x0-x9,x12-x15.
1762 ++ // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
1763 ++
1764 ++ // Free up some registers (r8-r12,r14) by pushing (x8-x9,x12-x15).
1765 ++ push {X8_X10, X9_X11, X12, X13, X14, X15}
1766 ++
1767 ++ // Load (OUT, IN, LEN).
1768 ++ ldr r14, [sp, #96]
1769 ++ ldr r12, [sp, #100]
1770 ++ ldr r11, [sp, #104]
1771 ++
1772 ++ orr r10, r14, r12
1773 ++
1774 ++ // Use slow path if fewer than 64 bytes remain.
1775 ++ cmp r11, #64
1776 ++ blt .Lxor_slowpath\@
1777 ++
1778 ++ // Use slow path if IN and/or OUT isn't 4-byte aligned. Needed even on
1779 ++ // ARMv6+, since ldmia and stmia (used below) still require alignment.
1780 ++ tst r10, #3
1781 ++ bne .Lxor_slowpath\@
1782 ++
1783 ++ // Fast path: XOR 64 bytes of aligned data.
1784 ++
1785 ++ // Stack: x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN
1786 ++ // Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is OUT.
1787 ++ // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
1788 ++
1789 ++ // x0-x3
1790 ++ __ldrd r8, r9, sp, 32
1791 ++ __ldrd r10, r11, sp, 40
1792 ++ add X0, X0, r8
1793 ++ add X1, X1, r9
1794 ++ add X2, X2, r10
1795 ++ add X3, X3, r11
1796 ++ _le32_bswap_4x X0, X1, X2, X3, r8, r9, r10
1797 ++ ldmia r12!, {r8-r11}
1798 ++ eor X0, X0, r8
1799 ++ eor X1, X1, r9
1800 ++ eor X2, X2, r10
1801 ++ eor X3, X3, r11
1802 ++ stmia r14!, {X0-X3}
1803 ++
1804 ++ // x4-x7
1805 ++ __ldrd r8, r9, sp, 48
1806 ++ __ldrd r10, r11, sp, 56
1807 ++ add X4, r8, X4, ror #brot
1808 ++ add X5, r9, X5, ror #brot
1809 ++ ldmia r12!, {X0-X3}
1810 ++ add X6, r10, X6, ror #brot
1811 ++ add X7, r11, X7, ror #brot
1812 ++ _le32_bswap_4x X4, X5, X6, X7, r8, r9, r10
1813 ++ eor X4, X4, X0
1814 ++ eor X5, X5, X1
1815 ++ eor X6, X6, X2
1816 ++ eor X7, X7, X3
1817 ++ stmia r14!, {X4-X7}
1818 ++
1819 ++ // x8-x15
1820 ++ pop {r0-r7} // (x8-x9,x12-x15,x10-x11)
1821 ++ __ldrd r8, r9, sp, 32
1822 ++ __ldrd r10, r11, sp, 40
1823 ++ add r0, r0, r8 // x8
1824 ++ add r1, r1, r9 // x9
1825 ++ add r6, r6, r10 // x10
1826 ++ add r7, r7, r11 // x11
1827 ++ _le32_bswap_4x r0, r1, r6, r7, r8, r9, r10
1828 ++ ldmia r12!, {r8-r11}
1829 ++ eor r0, r0, r8 // x8
1830 ++ eor r1, r1, r9 // x9
1831 ++ eor r6, r6, r10 // x10
1832 ++ eor r7, r7, r11 // x11
1833 ++ stmia r14!, {r0,r1,r6,r7}
1834 ++ ldmia r12!, {r0,r1,r6,r7}
1835 ++ __ldrd r8, r9, sp, 48
1836 ++ __ldrd r10, r11, sp, 56
1837 ++ add r2, r8, r2, ror #drot // x12
1838 ++ add r3, r9, r3, ror #drot // x13
1839 ++ add r4, r10, r4, ror #drot // x14
1840 ++ add r5, r11, r5, ror #drot // x15
1841 ++ _le32_bswap_4x r2, r3, r4, r5, r9, r10, r11
1842 ++ ldr r9, [sp, #72] // load LEN
1843 ++ eor r2, r2, r0 // x12
1844 ++ eor r3, r3, r1 // x13
1845 ++ eor r4, r4, r6 // x14
1846 ++ eor r5, r5, r7 // x15
1847 ++ subs r9, #64 // decrement and check LEN
1848 ++ stmia r14!, {r2-r5}
1849 ++
1850 ++ beq .Ldone\@
1851 ++
1852 ++.Lprepare_for_next_block\@:
1853 ++
1854 ++ // Stack: x0-x15 OUT IN LEN
1855 ++
1856 ++ // Increment block counter (x12)
1857 ++ add r8, #1
1858 ++
1859 ++ // Store updated (OUT, IN, LEN)
1860 ++ str r14, [sp, #64]
1861 ++ str r12, [sp, #68]
1862 ++ str r9, [sp, #72]
1863 ++
1864 ++ mov r14, sp
1865 ++
1866 ++ // Store updated block counter (x12)
1867 ++ str r8, [sp, #48]
1868 ++
1869 ++ sub sp, #16
1870 ++
1871 ++ // Reload state and do next block
1872 ++ ldmia r14!, {r0-r11} // load x0-x11
1873 ++ __strd r10, r11, sp, 8 // store x10-x11 before state
1874 ++ ldmia r14, {r10-r12,r14} // load x12-x15
1875 ++ b .Lnext_block\@
1876 ++
1877 ++.Lxor_slowpath\@:
1878 ++ // Slow path: < 64 bytes remaining, or unaligned input or output buffer.
1879 ++ // We handle it by storing the 64 bytes of keystream to the stack, then
1880 ++ // XOR-ing the needed portion with the data.
1881 ++
1882 ++ // Allocate keystream buffer
1883 ++ sub sp, #64
1884 ++ mov r14, sp
1885 ++
1886 ++ // Stack: ks0-ks15 x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN
1887 ++ // Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is &ks0.
1888 ++ // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
1889 ++
1890 ++ // Save keystream for x0-x3
1891 ++ __ldrd r8, r9, sp, 96
1892 ++ __ldrd r10, r11, sp, 104
1893 ++ add X0, X0, r8
1894 ++ add X1, X1, r9
1895 ++ add X2, X2, r10
1896 ++ add X3, X3, r11
1897 ++ _le32_bswap_4x X0, X1, X2, X3, r8, r9, r10
1898 ++ stmia r14!, {X0-X3}
1899 ++
1900 ++ // Save keystream for x4-x7
1901 ++ __ldrd r8, r9, sp, 112
1902 ++ __ldrd r10, r11, sp, 120
1903 ++ add X4, r8, X4, ror #brot
1904 ++ add X5, r9, X5, ror #brot
1905 ++ add X6, r10, X6, ror #brot
1906 ++ add X7, r11, X7, ror #brot
1907 ++ _le32_bswap_4x X4, X5, X6, X7, r8, r9, r10
1908 ++ add r8, sp, #64
1909 ++ stmia r14!, {X4-X7}
1910 ++
1911 ++ // Save keystream for x8-x15
1912 ++ ldm r8, {r0-r7} // (x8-x9,x12-x15,x10-x11)
1913 ++ __ldrd r8, r9, sp, 128
1914 ++ __ldrd r10, r11, sp, 136
1915 ++ add r0, r0, r8 // x8
1916 ++ add r1, r1, r9 // x9
1917 ++ add r6, r6, r10 // x10
1918 ++ add r7, r7, r11 // x11
1919 ++ _le32_bswap_4x r0, r1, r6, r7, r8, r9, r10
1920 ++ stmia r14!, {r0,r1,r6,r7}
1921 ++ __ldrd r8, r9, sp, 144
1922 ++ __ldrd r10, r11, sp, 152
1923 ++ add r2, r8, r2, ror #drot // x12
1924 ++ add r3, r9, r3, ror #drot // x13
1925 ++ add r4, r10, r4, ror #drot // x14
1926 ++ add r5, r11, r5, ror #drot // x15
1927 ++ _le32_bswap_4x r2, r3, r4, r5, r9, r10, r11
1928 ++ stmia r14, {r2-r5}
1929 ++
1930 ++ // Stack: ks0-ks15 unused0-unused7 x0-x15 OUT IN LEN
1931 ++ // Registers: r8 is block counter, r12 is IN.
1932 ++
1933 ++ ldr r9, [sp, #168] // LEN
1934 ++ ldr r14, [sp, #160] // OUT
1935 ++ cmp r9, #64
1936 ++ mov r0, sp
1937 ++ movle r1, r9
1938 ++ movgt r1, #64
1939 ++ // r1 is number of bytes to XOR, in range [1, 64]
1940 ++
1941 ++.if __LINUX_ARM_ARCH__ < 6
1942 ++ orr r2, r12, r14
1943 ++ tst r2, #3 // IN or OUT misaligned?
1944 ++ bne .Lxor_next_byte\@
1945 ++.endif
1946 ++
1947 ++ // XOR a word at a time
1948 ++.rept 16
1949 ++ subs r1, #4
1950 ++ blt .Lxor_words_done\@
1951 ++ ldr r2, [r12], #4
1952 ++ ldr r3, [r0], #4
1953 ++ eor r2, r2, r3
1954 ++ str r2, [r14], #4
1955 ++.endr
1956 ++ b .Lxor_slowpath_done\@
1957 ++.Lxor_words_done\@:
1958 ++ ands r1, r1, #3
1959 ++ beq .Lxor_slowpath_done\@
1960 ++
1961 ++ // XOR a byte at a time
1962 ++.Lxor_next_byte\@:
1963 ++ ldrb r2, [r12], #1
1964 ++ ldrb r3, [r0], #1
1965 ++ eor r2, r2, r3
1966 ++ strb r2, [r14], #1
1967 ++ subs r1, #1
1968 ++ bne .Lxor_next_byte\@
1969 ++
1970 ++.Lxor_slowpath_done\@:
1971 ++ subs r9, #64
1972 ++ add sp, #96
1973 ++ bgt .Lprepare_for_next_block\@
1974 ++
1975 ++.Ldone\@:
1976 ++.endm // _chacha
1977 ++
1978 ++/*
1979 ++ * void chacha20_arm(u8 *out, const u8 *in, size_t len, const u32 key[8],
1980 ++ * const u32 iv[4]);
1981 ++ */
1982 ++ENTRY(chacha20_arm)
1983 ++ cmp r2, #0 // len == 0?
1984 ++ reteq lr
1985 ++
1986 ++ push {r0-r2,r4-r11,lr}
1987 ++
1988 ++ // Push state x0-x15 onto stack.
1989 ++ // Also store an extra copy of x10-x11 just before the state.
1990 ++
1991 ++ ldr r4, [sp, #48] // iv
1992 ++ mov r0, sp
1993 ++ sub sp, #80
1994 ++
1995 ++ // iv: x12-x15
1996 ++ ldm r4, {X12,X13,X14,X15}
1997 ++ stmdb r0!, {X12,X13,X14,X15}
1998 ++
1999 ++ // key: x4-x11
2000 ++ __ldrd X8_X10, X9_X11, r3, 24
2001 ++ __strd X8_X10, X9_X11, sp, 8
2002 ++ stmdb r0!, {X8_X10, X9_X11}
2003 ++ ldm r3, {X4-X9_X11}
2004 ++ stmdb r0!, {X4-X9_X11}
2005 ++
2006 ++ // constants: x0-x3
2007 ++ adrl X3, .Lexpand_32byte_k
2008 ++ ldm X3, {X0-X3}
2009 ++ __strd X0, X1, sp, 16
2010 ++ __strd X2, X3, sp, 24
2011 ++
2012 ++ _chacha 20
2013 ++
2014 ++ add sp, #76
2015 ++ pop {r4-r11, pc}
2016 ++ENDPROC(chacha20_arm)
2017 ++
2018 ++/*
2019 ++ * void hchacha20_arm(const u32 state[16], u32 out[8]);
2020 ++ */
2021 ++ENTRY(hchacha20_arm)
2022 ++ push {r1,r4-r11,lr}
2023 ++
2024 ++ mov r14, r0
2025 ++ ldmia r14!, {r0-r11} // load x0-x11
2026 ++ push {r10-r11} // store x10-x11 to stack
2027 ++ ldm r14, {r10-r12,r14} // load x12-x15
2028 ++ sub sp, #8
2029 ++
2030 ++ _chacha_permute 20
2031 ++
2032 ++ // Skip over (unused0-unused1, x10-x11)
2033 ++ add sp, #16
2034 ++
2035 ++ // Fix up rotations of x12-x15
2036 ++ ror X12, X12, #drot
2037 ++ ror X13, X13, #drot
2038 ++ pop {r4} // load 'out'
2039 ++ ror X14, X14, #drot
2040 ++ ror X15, X15, #drot
2041 ++
2042 ++ // Store (x0-x3,x12-x15) to 'out'
2043 ++ stm r4, {X0,X1,X2,X3,X12,X13,X14,X15}
2044 ++
2045 ++ pop {r4-r11,pc}
2046 ++ENDPROC(hchacha20_arm)
2047 +--
2048 +cgit v1.2.3-4-ga26e
2049 +
2050 +
2051 +From f619db1e2d2969576e7b1754542d26f6119fc17e Mon Sep 17 00:00:00 2001
2052 +From: Ard Biesheuvel <ardb@××××××.org>
2053 +Date: Fri, 8 Nov 2019 13:22:14 +0100
2054 +Subject: crypto: arm/chacha - remove dependency on generic ChaCha driver
2055 +
2056 +commit b36d8c09e710c71f6a9690b6586fea2d1c9e1e27 upstream.
2057 +
2058 +Instead of falling back to the generic ChaCha skcipher driver for
2059 +non-SIMD cases, use a fast scalar implementation for ARM authored
2060 +by Eric Biggers. This removes the module dependency on chacha-generic
2061 +altogether, which also simplifies things when we expose the ChaCha
2062 +library interface from this module.
2063 +
2064 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
2065 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
2066 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
2067 +---
2068 + arch/arm/crypto/Kconfig | 4 +-
2069 + arch/arm/crypto/Makefile | 3 +-
2070 + arch/arm/crypto/chacha-glue.c | 304 +++++++++++++++++++++++++++++++++++
2071 + arch/arm/crypto/chacha-neon-glue.c | 202 -----------------------
2072 + arch/arm/crypto/chacha-scalar-core.S | 65 ++++----
2073 + arch/arm64/crypto/chacha-neon-glue.c | 2 +-
2074 + 6 files changed, 340 insertions(+), 240 deletions(-)
2075 + create mode 100644 arch/arm/crypto/chacha-glue.c
2076 + delete mode 100644 arch/arm/crypto/chacha-neon-glue.c
2077 +
2078 +diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
2079 +index 043b0b18bf7e..cee414afeabc 100644
2080 +--- a/arch/arm/crypto/Kconfig
2081 ++++ b/arch/arm/crypto/Kconfig
2082 +@@ -127,10 +127,8 @@ config CRYPTO_CRC32_ARM_CE
2083 + select CRYPTO_HASH
2084 +
2085 + config CRYPTO_CHACHA20_NEON
2086 +- tristate "NEON accelerated ChaCha stream cipher algorithms"
2087 +- depends on KERNEL_MODE_NEON
2088 ++ tristate "NEON and scalar accelerated ChaCha stream cipher algorithms"
2089 + select CRYPTO_BLKCIPHER
2090 +- select CRYPTO_CHACHA20
2091 +
2092 + config CRYPTO_NHPOLY1305_NEON
2093 + tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)"
2094 +diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
2095 +index 4180f3a13512..6b97dffcf90f 100644
2096 +--- a/arch/arm/crypto/Makefile
2097 ++++ b/arch/arm/crypto/Makefile
2098 +@@ -53,7 +53,8 @@ aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o
2099 + ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
2100 + crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
2101 + crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
2102 +-chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o
2103 ++chacha-neon-y := chacha-scalar-core.o chacha-glue.o
2104 ++chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o
2105 + nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
2106 +
2107 + ifdef REGENERATE_ARM_CRYPTO
2108 +diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c
2109 +new file mode 100644
2110 +index 000000000000..eb40efb3eb34
2111 +--- /dev/null
2112 ++++ b/arch/arm/crypto/chacha-glue.c
2113 +@@ -0,0 +1,304 @@
2114 ++// SPDX-License-Identifier: GPL-2.0
2115 ++/*
2116 ++ * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
2117 ++ * including ChaCha20 (RFC7539)
2118 ++ *
2119 ++ * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@××××××.org>
2120 ++ * Copyright (C) 2015 Martin Willi
2121 ++ */
2122 ++
2123 ++#include <crypto/algapi.h>
2124 ++#include <crypto/internal/chacha.h>
2125 ++#include <crypto/internal/simd.h>
2126 ++#include <crypto/internal/skcipher.h>
2127 ++#include <linux/kernel.h>
2128 ++#include <linux/module.h>
2129 ++
2130 ++#include <asm/cputype.h>
2131 ++#include <asm/hwcap.h>
2132 ++#include <asm/neon.h>
2133 ++#include <asm/simd.h>
2134 ++
2135 ++asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
2136 ++ int nrounds);
2137 ++asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
2138 ++ int nrounds);
2139 ++asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds);
2140 ++asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
2141 ++
2142 ++asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
2143 ++ const u32 *state, int nrounds);
2144 ++
2145 ++static inline bool neon_usable(void)
2146 ++{
2147 ++ return crypto_simd_usable();
2148 ++}
2149 ++
2150 ++static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
2151 ++ unsigned int bytes, int nrounds)
2152 ++{
2153 ++ u8 buf[CHACHA_BLOCK_SIZE];
2154 ++
2155 ++ while (bytes >= CHACHA_BLOCK_SIZE * 4) {
2156 ++ chacha_4block_xor_neon(state, dst, src, nrounds);
2157 ++ bytes -= CHACHA_BLOCK_SIZE * 4;
2158 ++ src += CHACHA_BLOCK_SIZE * 4;
2159 ++ dst += CHACHA_BLOCK_SIZE * 4;
2160 ++ state[12] += 4;
2161 ++ }
2162 ++ while (bytes >= CHACHA_BLOCK_SIZE) {
2163 ++ chacha_block_xor_neon(state, dst, src, nrounds);
2164 ++ bytes -= CHACHA_BLOCK_SIZE;
2165 ++ src += CHACHA_BLOCK_SIZE;
2166 ++ dst += CHACHA_BLOCK_SIZE;
2167 ++ state[12]++;
2168 ++ }
2169 ++ if (bytes) {
2170 ++ memcpy(buf, src, bytes);
2171 ++ chacha_block_xor_neon(state, buf, buf, nrounds);
2172 ++ memcpy(dst, buf, bytes);
2173 ++ }
2174 ++}
2175 ++
2176 ++static int chacha_stream_xor(struct skcipher_request *req,
2177 ++ const struct chacha_ctx *ctx, const u8 *iv,
2178 ++ bool neon)
2179 ++{
2180 ++ struct skcipher_walk walk;
2181 ++ u32 state[16];
2182 ++ int err;
2183 ++
2184 ++ err = skcipher_walk_virt(&walk, req, false);
2185 ++
2186 ++ chacha_init_generic(state, ctx->key, iv);
2187 ++
2188 ++ while (walk.nbytes > 0) {
2189 ++ unsigned int nbytes = walk.nbytes;
2190 ++
2191 ++ if (nbytes < walk.total)
2192 ++ nbytes = round_down(nbytes, walk.stride);
2193 ++
2194 ++ if (!neon) {
2195 ++ chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr,
2196 ++ nbytes, state, ctx->nrounds);
2197 ++ state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE);
2198 ++ } else {
2199 ++ kernel_neon_begin();
2200 ++ chacha_doneon(state, walk.dst.virt.addr,
2201 ++ walk.src.virt.addr, nbytes, ctx->nrounds);
2202 ++ kernel_neon_end();
2203 ++ }
2204 ++ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
2205 ++ }
2206 ++
2207 ++ return err;
2208 ++}
2209 ++
2210 ++static int do_chacha(struct skcipher_request *req, bool neon)
2211 ++{
2212 ++ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
2213 ++ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
2214 ++
2215 ++ return chacha_stream_xor(req, ctx, req->iv, neon);
2216 ++}
2217 ++
2218 ++static int chacha_arm(struct skcipher_request *req)
2219 ++{
2220 ++ return do_chacha(req, false);
2221 ++}
2222 ++
2223 ++static int chacha_neon(struct skcipher_request *req)
2224 ++{
2225 ++ return do_chacha(req, neon_usable());
2226 ++}
2227 ++
2228 ++static int do_xchacha(struct skcipher_request *req, bool neon)
2229 ++{
2230 ++ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
2231 ++ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
2232 ++ struct chacha_ctx subctx;
2233 ++ u32 state[16];
2234 ++ u8 real_iv[16];
2235 ++
2236 ++ chacha_init_generic(state, ctx->key, req->iv);
2237 ++
2238 ++ if (!neon) {
2239 ++ hchacha_block_arm(state, subctx.key, ctx->nrounds);
2240 ++ } else {
2241 ++ kernel_neon_begin();
2242 ++ hchacha_block_neon(state, subctx.key, ctx->nrounds);
2243 ++ kernel_neon_end();
2244 ++ }
2245 ++ subctx.nrounds = ctx->nrounds;
2246 ++
2247 ++ memcpy(&real_iv[0], req->iv + 24, 8);
2248 ++ memcpy(&real_iv[8], req->iv + 16, 8);
2249 ++ return chacha_stream_xor(req, &subctx, real_iv, neon);
2250 ++}
2251 ++
2252 ++static int xchacha_arm(struct skcipher_request *req)
2253 ++{
2254 ++ return do_xchacha(req, false);
2255 ++}
2256 ++
2257 ++static int xchacha_neon(struct skcipher_request *req)
2258 ++{
2259 ++ return do_xchacha(req, neon_usable());
2260 ++}
2261 ++
2262 ++static struct skcipher_alg arm_algs[] = {
2263 ++ {
2264 ++ .base.cra_name = "chacha20",
2265 ++ .base.cra_driver_name = "chacha20-arm",
2266 ++ .base.cra_priority = 200,
2267 ++ .base.cra_blocksize = 1,
2268 ++ .base.cra_ctxsize = sizeof(struct chacha_ctx),
2269 ++ .base.cra_module = THIS_MODULE,
2270 ++
2271 ++ .min_keysize = CHACHA_KEY_SIZE,
2272 ++ .max_keysize = CHACHA_KEY_SIZE,
2273 ++ .ivsize = CHACHA_IV_SIZE,
2274 ++ .chunksize = CHACHA_BLOCK_SIZE,
2275 ++ .setkey = chacha20_setkey,
2276 ++ .encrypt = chacha_arm,
2277 ++ .decrypt = chacha_arm,
2278 ++ }, {
2279 ++ .base.cra_name = "xchacha20",
2280 ++ .base.cra_driver_name = "xchacha20-arm",
2281 ++ .base.cra_priority = 200,
2282 ++ .base.cra_blocksize = 1,
2283 ++ .base.cra_ctxsize = sizeof(struct chacha_ctx),
2284 ++ .base.cra_module = THIS_MODULE,
2285 ++
2286 ++ .min_keysize = CHACHA_KEY_SIZE,
2287 ++ .max_keysize = CHACHA_KEY_SIZE,
2288 ++ .ivsize = XCHACHA_IV_SIZE,
2289 ++ .chunksize = CHACHA_BLOCK_SIZE,
2290 ++ .setkey = chacha20_setkey,
2291 ++ .encrypt = xchacha_arm,
2292 ++ .decrypt = xchacha_arm,
2293 ++ }, {
2294 ++ .base.cra_name = "xchacha12",
2295 ++ .base.cra_driver_name = "xchacha12-arm",
2296 ++ .base.cra_priority = 200,
2297 ++ .base.cra_blocksize = 1,
2298 ++ .base.cra_ctxsize = sizeof(struct chacha_ctx),
2299 ++ .base.cra_module = THIS_MODULE,
2300 ++
2301 ++ .min_keysize = CHACHA_KEY_SIZE,
2302 ++ .max_keysize = CHACHA_KEY_SIZE,
2303 ++ .ivsize = XCHACHA_IV_SIZE,
2304 ++ .chunksize = CHACHA_BLOCK_SIZE,
2305 ++ .setkey = chacha12_setkey,
2306 ++ .encrypt = xchacha_arm,
2307 ++ .decrypt = xchacha_arm,
2308 ++ },
2309 ++};
2310 ++
2311 ++static struct skcipher_alg neon_algs[] = {
2312 ++ {
2313 ++ .base.cra_name = "chacha20",
2314 ++ .base.cra_driver_name = "chacha20-neon",
2315 ++ .base.cra_priority = 300,
2316 ++ .base.cra_blocksize = 1,
2317 ++ .base.cra_ctxsize = sizeof(struct chacha_ctx),
2318 ++ .base.cra_module = THIS_MODULE,
2319 ++
2320 ++ .min_keysize = CHACHA_KEY_SIZE,
2321 ++ .max_keysize = CHACHA_KEY_SIZE,
2322 ++ .ivsize = CHACHA_IV_SIZE,
2323 ++ .chunksize = CHACHA_BLOCK_SIZE,
2324 ++ .walksize = 4 * CHACHA_BLOCK_SIZE,
2325 ++ .setkey = chacha20_setkey,
2326 ++ .encrypt = chacha_neon,
2327 ++ .decrypt = chacha_neon,
2328 ++ }, {
2329 ++ .base.cra_name = "xchacha20",
2330 ++ .base.cra_driver_name = "xchacha20-neon",
2331 ++ .base.cra_priority = 300,
2332 ++ .base.cra_blocksize = 1,
2333 ++ .base.cra_ctxsize = sizeof(struct chacha_ctx),
2334 ++ .base.cra_module = THIS_MODULE,
2335 ++
2336 ++ .min_keysize = CHACHA_KEY_SIZE,
2337 ++ .max_keysize = CHACHA_KEY_SIZE,
2338 ++ .ivsize = XCHACHA_IV_SIZE,
2339 ++ .chunksize = CHACHA_BLOCK_SIZE,
2340 ++ .walksize = 4 * CHACHA_BLOCK_SIZE,
2341 ++ .setkey = chacha20_setkey,
2342 ++ .encrypt = xchacha_neon,
2343 ++ .decrypt = xchacha_neon,
2344 ++ }, {
2345 ++ .base.cra_name = "xchacha12",
2346 ++ .base.cra_driver_name = "xchacha12-neon",
2347 ++ .base.cra_priority = 300,
2348 ++ .base.cra_blocksize = 1,
2349 ++ .base.cra_ctxsize = sizeof(struct chacha_ctx),
2350 ++ .base.cra_module = THIS_MODULE,
2351 ++
2352 ++ .min_keysize = CHACHA_KEY_SIZE,
2353 ++ .max_keysize = CHACHA_KEY_SIZE,
2354 ++ .ivsize = XCHACHA_IV_SIZE,
2355 ++ .chunksize = CHACHA_BLOCK_SIZE,
2356 ++ .walksize = 4 * CHACHA_BLOCK_SIZE,
2357 ++ .setkey = chacha12_setkey,
2358 ++ .encrypt = xchacha_neon,
2359 ++ .decrypt = xchacha_neon,
2360 ++ }
2361 ++};
2362 ++
2363 ++static int __init chacha_simd_mod_init(void)
2364 ++{
2365 ++ int err;
2366 ++
2367 ++ err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
2368 ++ if (err)
2369 ++ return err;
2370 ++
2371 ++ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
2372 ++ int i;
2373 ++
2374 ++ switch (read_cpuid_part()) {
2375 ++ case ARM_CPU_PART_CORTEX_A7:
2376 ++ case ARM_CPU_PART_CORTEX_A5:
2377 ++ /*
2378 ++ * The Cortex-A7 and Cortex-A5 do not perform well with
2379 ++ * the NEON implementation but do incredibly with the
2380 ++ * scalar one and use less power.
2381 ++ */
2382 ++ for (i = 0; i < ARRAY_SIZE(neon_algs); i++)
2383 ++ neon_algs[i].base.cra_priority = 0;
2384 ++ break;
2385 ++ }
2386 ++
2387 ++ err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
2388 ++ if (err)
2389 ++ crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
2390 ++ }
2391 ++ return err;
2392 ++}
2393 ++
2394 ++static void __exit chacha_simd_mod_fini(void)
2395 ++{
2396 ++ crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
2397 ++ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON))
2398 ++ crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
2399 ++}
2400 ++
2401 ++module_init(chacha_simd_mod_init);
2402 ++module_exit(chacha_simd_mod_fini);
2403 ++
2404 ++MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)");
2405 ++MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@××××××.org>");
2406 ++MODULE_LICENSE("GPL v2");
2407 ++MODULE_ALIAS_CRYPTO("chacha20");
2408 ++MODULE_ALIAS_CRYPTO("chacha20-arm");
2409 ++MODULE_ALIAS_CRYPTO("xchacha20");
2410 ++MODULE_ALIAS_CRYPTO("xchacha20-arm");
2411 ++MODULE_ALIAS_CRYPTO("xchacha12");
2412 ++MODULE_ALIAS_CRYPTO("xchacha12-arm");
2413 ++#ifdef CONFIG_KERNEL_MODE_NEON
2414 ++MODULE_ALIAS_CRYPTO("chacha20-neon");
2415 ++MODULE_ALIAS_CRYPTO("xchacha20-neon");
2416 ++MODULE_ALIAS_CRYPTO("xchacha12-neon");
2417 ++#endif
2418 +diff --git a/arch/arm/crypto/chacha-neon-glue.c b/arch/arm/crypto/chacha-neon-glue.c
2419 +deleted file mode 100644
2420 +index 26576772f18b..000000000000
2421 +--- a/arch/arm/crypto/chacha-neon-glue.c
2422 ++++ /dev/null
2423 +@@ -1,202 +0,0 @@
2424 +-/*
2425 +- * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
2426 +- * including ChaCha20 (RFC7539)
2427 +- *
2428 +- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@××××××.org>
2429 +- *
2430 +- * This program is free software; you can redistribute it and/or modify
2431 +- * it under the terms of the GNU General Public License version 2 as
2432 +- * published by the Free Software Foundation.
2433 +- *
2434 +- * Based on:
2435 +- * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
2436 +- *
2437 +- * Copyright (C) 2015 Martin Willi
2438 +- *
2439 +- * This program is free software; you can redistribute it and/or modify
2440 +- * it under the terms of the GNU General Public License as published by
2441 +- * the Free Software Foundation; either version 2 of the License, or
2442 +- * (at your option) any later version.
2443 +- */
2444 +-
2445 +-#include <crypto/algapi.h>
2446 +-#include <crypto/internal/chacha.h>
2447 +-#include <crypto/internal/simd.h>
2448 +-#include <crypto/internal/skcipher.h>
2449 +-#include <linux/kernel.h>
2450 +-#include <linux/module.h>
2451 +-
2452 +-#include <asm/hwcap.h>
2453 +-#include <asm/neon.h>
2454 +-#include <asm/simd.h>
2455 +-
2456 +-asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
2457 +- int nrounds);
2458 +-asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
2459 +- int nrounds);
2460 +-asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
2461 +-
2462 +-static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
2463 +- unsigned int bytes, int nrounds)
2464 +-{
2465 +- u8 buf[CHACHA_BLOCK_SIZE];
2466 +-
2467 +- while (bytes >= CHACHA_BLOCK_SIZE * 4) {
2468 +- chacha_4block_xor_neon(state, dst, src, nrounds);
2469 +- bytes -= CHACHA_BLOCK_SIZE * 4;
2470 +- src += CHACHA_BLOCK_SIZE * 4;
2471 +- dst += CHACHA_BLOCK_SIZE * 4;
2472 +- state[12] += 4;
2473 +- }
2474 +- while (bytes >= CHACHA_BLOCK_SIZE) {
2475 +- chacha_block_xor_neon(state, dst, src, nrounds);
2476 +- bytes -= CHACHA_BLOCK_SIZE;
2477 +- src += CHACHA_BLOCK_SIZE;
2478 +- dst += CHACHA_BLOCK_SIZE;
2479 +- state[12]++;
2480 +- }
2481 +- if (bytes) {
2482 +- memcpy(buf, src, bytes);
2483 +- chacha_block_xor_neon(state, buf, buf, nrounds);
2484 +- memcpy(dst, buf, bytes);
2485 +- }
2486 +-}
2487 +-
2488 +-static int chacha_neon_stream_xor(struct skcipher_request *req,
2489 +- const struct chacha_ctx *ctx, const u8 *iv)
2490 +-{
2491 +- struct skcipher_walk walk;
2492 +- u32 state[16];
2493 +- int err;
2494 +-
2495 +- err = skcipher_walk_virt(&walk, req, false);
2496 +-
2497 +- crypto_chacha_init(state, ctx, iv);
2498 +-
2499 +- while (walk.nbytes > 0) {
2500 +- unsigned int nbytes = walk.nbytes;
2501 +-
2502 +- if (nbytes < walk.total)
2503 +- nbytes = round_down(nbytes, walk.stride);
2504 +-
2505 +- kernel_neon_begin();
2506 +- chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
2507 +- nbytes, ctx->nrounds);
2508 +- kernel_neon_end();
2509 +- err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
2510 +- }
2511 +-
2512 +- return err;
2513 +-}
2514 +-
2515 +-static int chacha_neon(struct skcipher_request *req)
2516 +-{
2517 +- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
2518 +- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
2519 +-
2520 +- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
2521 +- return crypto_chacha_crypt(req);
2522 +-
2523 +- return chacha_neon_stream_xor(req, ctx, req->iv);
2524 +-}
2525 +-
2526 +-static int xchacha_neon(struct skcipher_request *req)
2527 +-{
2528 +- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
2529 +- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
2530 +- struct chacha_ctx subctx;
2531 +- u32 state[16];
2532 +- u8 real_iv[16];
2533 +-
2534 +- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
2535 +- return crypto_xchacha_crypt(req);
2536 +-
2537 +- crypto_chacha_init(state, ctx, req->iv);
2538 +-
2539 +- kernel_neon_begin();
2540 +- hchacha_block_neon(state, subctx.key, ctx->nrounds);
2541 +- kernel_neon_end();
2542 +- subctx.nrounds = ctx->nrounds;
2543 +-
2544 +- memcpy(&real_iv[0], req->iv + 24, 8);
2545 +- memcpy(&real_iv[8], req->iv + 16, 8);
2546 +- return chacha_neon_stream_xor(req, &subctx, real_iv);
2547 +-}
2548 +-
2549 +-static struct skcipher_alg algs[] = {
2550 +- {
2551 +- .base.cra_name = "chacha20",
2552 +- .base.cra_driver_name = "chacha20-neon",
2553 +- .base.cra_priority = 300,
2554 +- .base.cra_blocksize = 1,
2555 +- .base.cra_ctxsize = sizeof(struct chacha_ctx),
2556 +- .base.cra_module = THIS_MODULE,
2557 +-
2558 +- .min_keysize = CHACHA_KEY_SIZE,
2559 +- .max_keysize = CHACHA_KEY_SIZE,
2560 +- .ivsize = CHACHA_IV_SIZE,
2561 +- .chunksize = CHACHA_BLOCK_SIZE,
2562 +- .walksize = 4 * CHACHA_BLOCK_SIZE,
2563 +- .setkey = crypto_chacha20_setkey,
2564 +- .encrypt = chacha_neon,
2565 +- .decrypt = chacha_neon,
2566 +- }, {
2567 +- .base.cra_name = "xchacha20",
2568 +- .base.cra_driver_name = "xchacha20-neon",
2569 +- .base.cra_priority = 300,
2570 +- .base.cra_blocksize = 1,
2571 +- .base.cra_ctxsize = sizeof(struct chacha_ctx),
2572 +- .base.cra_module = THIS_MODULE,
2573 +-
2574 +- .min_keysize = CHACHA_KEY_SIZE,
2575 +- .max_keysize = CHACHA_KEY_SIZE,
2576 +- .ivsize = XCHACHA_IV_SIZE,
2577 +- .chunksize = CHACHA_BLOCK_SIZE,
2578 +- .walksize = 4 * CHACHA_BLOCK_SIZE,
2579 +- .setkey = crypto_chacha20_setkey,
2580 +- .encrypt = xchacha_neon,
2581 +- .decrypt = xchacha_neon,
2582 +- }, {
2583 +- .base.cra_name = "xchacha12",
2584 +- .base.cra_driver_name = "xchacha12-neon",
2585 +- .base.cra_priority = 300,
2586 +- .base.cra_blocksize = 1,
2587 +- .base.cra_ctxsize = sizeof(struct chacha_ctx),
2588 +- .base.cra_module = THIS_MODULE,
2589 +-
2590 +- .min_keysize = CHACHA_KEY_SIZE,
2591 +- .max_keysize = CHACHA_KEY_SIZE,
2592 +- .ivsize = XCHACHA_IV_SIZE,
2593 +- .chunksize = CHACHA_BLOCK_SIZE,
2594 +- .walksize = 4 * CHACHA_BLOCK_SIZE,
2595 +- .setkey = crypto_chacha12_setkey,
2596 +- .encrypt = xchacha_neon,
2597 +- .decrypt = xchacha_neon,
2598 +- }
2599 +-};
2600 +-
2601 +-static int __init chacha_simd_mod_init(void)
2602 +-{
2603 +- if (!(elf_hwcap & HWCAP_NEON))
2604 +- return -ENODEV;
2605 +-
2606 +- return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
2607 +-}
2608 +-
2609 +-static void __exit chacha_simd_mod_fini(void)
2610 +-{
2611 +- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
2612 +-}
2613 +-
2614 +-module_init(chacha_simd_mod_init);
2615 +-module_exit(chacha_simd_mod_fini);
2616 +-
2617 +-MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)");
2618 +-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@××××××.org>");
2619 +-MODULE_LICENSE("GPL v2");
2620 +-MODULE_ALIAS_CRYPTO("chacha20");
2621 +-MODULE_ALIAS_CRYPTO("chacha20-neon");
2622 +-MODULE_ALIAS_CRYPTO("xchacha20");
2623 +-MODULE_ALIAS_CRYPTO("xchacha20-neon");
2624 +-MODULE_ALIAS_CRYPTO("xchacha12");
2625 +-MODULE_ALIAS_CRYPTO("xchacha12-neon");
2626 +diff --git a/arch/arm/crypto/chacha-scalar-core.S b/arch/arm/crypto/chacha-scalar-core.S
2627 +index 2140319b64a0..2985b80a45b5 100644
2628 +--- a/arch/arm/crypto/chacha-scalar-core.S
2629 ++++ b/arch/arm/crypto/chacha-scalar-core.S
2630 +@@ -41,14 +41,6 @@
2631 + X14 .req r12
2632 + X15 .req r14
2633 +
2634 +-.Lexpand_32byte_k:
2635 +- // "expand 32-byte k"
2636 +- .word 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
2637 +-
2638 +-#ifdef __thumb2__
2639 +-# define adrl adr
2640 +-#endif
2641 +-
2642 + .macro __rev out, in, t0, t1, t2
2643 + .if __LINUX_ARM_ARCH__ >= 6
2644 + rev \out, \in
2645 +@@ -391,61 +383,65 @@
2646 + .endm // _chacha
2647 +
2648 + /*
2649 +- * void chacha20_arm(u8 *out, const u8 *in, size_t len, const u32 key[8],
2650 +- * const u32 iv[4]);
2651 ++ * void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
2652 ++ * const u32 *state, int nrounds);
2653 + */
2654 +-ENTRY(chacha20_arm)
2655 ++ENTRY(chacha_doarm)
2656 + cmp r2, #0 // len == 0?
2657 + reteq lr
2658 +
2659 ++ ldr ip, [sp]
2660 ++ cmp ip, #12
2661 ++
2662 + push {r0-r2,r4-r11,lr}
2663 +
2664 + // Push state x0-x15 onto stack.
2665 + // Also store an extra copy of x10-x11 just before the state.
2666 +
2667 +- ldr r4, [sp, #48] // iv
2668 +- mov r0, sp
2669 +- sub sp, #80
2670 +-
2671 +- // iv: x12-x15
2672 +- ldm r4, {X12,X13,X14,X15}
2673 +- stmdb r0!, {X12,X13,X14,X15}
2674 ++ add X12, r3, #48
2675 ++ ldm X12, {X12,X13,X14,X15}
2676 ++ push {X12,X13,X14,X15}
2677 ++ sub sp, sp, #64
2678 +
2679 +- // key: x4-x11
2680 +- __ldrd X8_X10, X9_X11, r3, 24
2681 ++ __ldrd X8_X10, X9_X11, r3, 40
2682 + __strd X8_X10, X9_X11, sp, 8
2683 +- stmdb r0!, {X8_X10, X9_X11}
2684 +- ldm r3, {X4-X9_X11}
2685 +- stmdb r0!, {X4-X9_X11}
2686 +-
2687 +- // constants: x0-x3
2688 +- adrl X3, .Lexpand_32byte_k
2689 +- ldm X3, {X0-X3}
2690 ++ __strd X8_X10, X9_X11, sp, 56
2691 ++ ldm r3, {X0-X9_X11}
2692 + __strd X0, X1, sp, 16
2693 + __strd X2, X3, sp, 24
2694 ++ __strd X4, X5, sp, 32
2695 ++ __strd X6, X7, sp, 40
2696 ++ __strd X8_X10, X9_X11, sp, 48
2697 +
2698 ++ beq 1f
2699 + _chacha 20
2700 +
2701 +- add sp, #76
2702 ++0: add sp, #76
2703 + pop {r4-r11, pc}
2704 +-ENDPROC(chacha20_arm)
2705 ++
2706 ++1: _chacha 12
2707 ++ b 0b
2708 ++ENDPROC(chacha_doarm)
2709 +
2710 + /*
2711 +- * void hchacha20_arm(const u32 state[16], u32 out[8]);
2712 ++ * void hchacha_block_arm(const u32 state[16], u32 out[8], int nrounds);
2713 + */
2714 +-ENTRY(hchacha20_arm)
2715 ++ENTRY(hchacha_block_arm)
2716 + push {r1,r4-r11,lr}
2717 +
2718 ++ cmp r2, #12 // ChaCha12 ?
2719 ++
2720 + mov r14, r0
2721 + ldmia r14!, {r0-r11} // load x0-x11
2722 + push {r10-r11} // store x10-x11 to stack
2723 + ldm r14, {r10-r12,r14} // load x12-x15
2724 + sub sp, #8
2725 +
2726 ++ beq 1f
2727 + _chacha_permute 20
2728 +
2729 + // Skip over (unused0-unused1, x10-x11)
2730 +- add sp, #16
2731 ++0: add sp, #16
2732 +
2733 + // Fix up rotations of x12-x15
2734 + ror X12, X12, #drot
2735 +@@ -458,4 +454,7 @@ ENTRY(hchacha20_arm)
2736 + stm r4, {X0,X1,X2,X3,X12,X13,X14,X15}
2737 +
2738 + pop {r4-r11,pc}
2739 +-ENDPROC(hchacha20_arm)
2740 ++
2741 ++1: _chacha_permute 12
2742 ++ b 0b
2743 ++ENDPROC(hchacha_block_arm)
2744 +diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
2745 +index 46cd4297761c..b08029d7bde6 100644
2746 +--- a/arch/arm64/crypto/chacha-neon-glue.c
2747 ++++ b/arch/arm64/crypto/chacha-neon-glue.c
2748 +@@ -1,5 +1,5 @@
2749 + /*
2750 +- * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
2751 ++ * ARM NEON and scalar accelerated ChaCha and XChaCha stream ciphers,
2752 + * including ChaCha20 (RFC7539)
2753 + *
2754 + * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@××××××.org>
2755 +--
2756 +cgit v1.2.3-4-ga26e
2757 +
2758 +
2759 +From 5d8b7bd04e0c51d8f402e47f9e34587f089c0a66 Mon Sep 17 00:00:00 2001
2760 +From: Ard Biesheuvel <ardb@××××××.org>
2761 +Date: Fri, 8 Nov 2019 13:22:15 +0100
2762 +Subject: crypto: arm/chacha - expose ARM ChaCha routine as library function
2763 +
2764 +commit a44a3430d71bad4ee56788a59fff099b291ea54c upstream.
2765 +
2766 +Expose the accelerated NEON ChaCha routine directly as a symbol
2767 +export so that users of the ChaCha library API can use it directly.
2768 +
2769 +Given that calls into the library API will always go through the
2770 +routines in this module if it is enabled, switch to static keys
2771 +to select the optimal implementation available (which may be none
2772 +at all, in which case we defer to the generic implementation for
2773 +all invocations).
2774 +
2775 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
2776 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
2777 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
2778 +---
2779 + arch/arm/crypto/Kconfig | 1 +
2780 + arch/arm/crypto/chacha-glue.c | 41 ++++++++++++++++++++++++++++++++++++++++-
2781 + 2 files changed, 41 insertions(+), 1 deletion(-)
2782 +
2783 +diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
2784 +index cee414afeabc..b25ffec04417 100644
2785 +--- a/arch/arm/crypto/Kconfig
2786 ++++ b/arch/arm/crypto/Kconfig
2787 +@@ -129,6 +129,7 @@ config CRYPTO_CRC32_ARM_CE
2788 + config CRYPTO_CHACHA20_NEON
2789 + tristate "NEON and scalar accelerated ChaCha stream cipher algorithms"
2790 + select CRYPTO_BLKCIPHER
2791 ++ select CRYPTO_ARCH_HAVE_LIB_CHACHA
2792 +
2793 + config CRYPTO_NHPOLY1305_NEON
2794 + tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)"
2795 +diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c
2796 +index eb40efb3eb34..3f0c057aa050 100644
2797 +--- a/arch/arm/crypto/chacha-glue.c
2798 ++++ b/arch/arm/crypto/chacha-glue.c
2799 +@@ -11,6 +11,7 @@
2800 + #include <crypto/internal/chacha.h>
2801 + #include <crypto/internal/simd.h>
2802 + #include <crypto/internal/skcipher.h>
2803 ++#include <linux/jump_label.h>
2804 + #include <linux/kernel.h>
2805 + #include <linux/module.h>
2806 +
2807 +@@ -29,9 +30,11 @@ asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
2808 + asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
2809 + const u32 *state, int nrounds);
2810 +
2811 ++static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon);
2812 ++
2813 + static inline bool neon_usable(void)
2814 + {
2815 +- return crypto_simd_usable();
2816 ++ return static_branch_likely(&use_neon) && crypto_simd_usable();
2817 + }
2818 +
2819 + static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
2820 +@@ -60,6 +63,40 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
2821 + }
2822 + }
2823 +
2824 ++void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
2825 ++{
2826 ++ if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
2827 ++ hchacha_block_arm(state, stream, nrounds);
2828 ++ } else {
2829 ++ kernel_neon_begin();
2830 ++ hchacha_block_neon(state, stream, nrounds);
2831 ++ kernel_neon_end();
2832 ++ }
2833 ++}
2834 ++EXPORT_SYMBOL(hchacha_block_arch);
2835 ++
2836 ++void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
2837 ++{
2838 ++ chacha_init_generic(state, key, iv);
2839 ++}
2840 ++EXPORT_SYMBOL(chacha_init_arch);
2841 ++
2842 ++void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
2843 ++ int nrounds)
2844 ++{
2845 ++ if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() ||
2846 ++ bytes <= CHACHA_BLOCK_SIZE) {
2847 ++ chacha_doarm(dst, src, bytes, state, nrounds);
2848 ++ state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE);
2849 ++ return;
2850 ++ }
2851 ++
2852 ++ kernel_neon_begin();
2853 ++ chacha_doneon(state, dst, src, bytes, nrounds);
2854 ++ kernel_neon_end();
2855 ++}
2856 ++EXPORT_SYMBOL(chacha_crypt_arch);
2857 ++
2858 + static int chacha_stream_xor(struct skcipher_request *req,
2859 + const struct chacha_ctx *ctx, const u8 *iv,
2860 + bool neon)
2861 +@@ -269,6 +306,8 @@ static int __init chacha_simd_mod_init(void)
2862 + for (i = 0; i < ARRAY_SIZE(neon_algs); i++)
2863 + neon_algs[i].base.cra_priority = 0;
2864 + break;
2865 ++ default:
2866 ++ static_branch_enable(&use_neon);
2867 + }
2868 +
2869 + err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
2870 +--
2871 +cgit v1.2.3-4-ga26e
2872 +
2873 +
2874 +From 9845c5f8d99afa2466300a71f346caa74581bdd9 Mon Sep 17 00:00:00 2001
2875 +From: "Jason A. Donenfeld" <Jason@×××××.com>
2876 +Date: Fri, 8 Nov 2019 13:22:16 +0100
2877 +Subject: crypto: mips/chacha - import 32r2 ChaCha code from Zinc
2878 +MIME-Version: 1.0
2879 +Content-Type: text/plain; charset=UTF-8
2880 +Content-Transfer-Encoding: 8bit
2881 +
2882 +commit 49aa7c00eddf8d8f462b0256bd82e81762d7b0c6 upstream.
2883 +
2884 +This imports the accelerated MIPS 32r2 ChaCha20 implementation from the
2885 +Zinc patch set.
2886 +
2887 +Co-developed-by: René van Dorst <opensource@××××××.com>
2888 +Signed-off-by: René van Dorst <opensource@××××××.com>
2889 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
2890 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
2891 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
2892 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
2893 +---
2894 + arch/mips/crypto/chacha-core.S | 424 +++++++++++++++++++++++++++++++++++++++++
2895 + 1 file changed, 424 insertions(+)
2896 + create mode 100644 arch/mips/crypto/chacha-core.S
2897 +
2898 +diff --git a/arch/mips/crypto/chacha-core.S b/arch/mips/crypto/chacha-core.S
2899 +new file mode 100644
2900 +index 000000000000..a81e02db95e7
2901 +--- /dev/null
2902 ++++ b/arch/mips/crypto/chacha-core.S
2903 +@@ -0,0 +1,424 @@
2904 ++/* SPDX-License-Identifier: GPL-2.0 OR MIT */
2905 ++/*
2906 ++ * Copyright (C) 2016-2018 René van Dorst <opensource@××××××.com>. All Rights Reserved.
2907 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
2908 ++ */
2909 ++
2910 ++#define MASK_U32 0x3c
2911 ++#define CHACHA20_BLOCK_SIZE 64
2912 ++#define STACK_SIZE 32
2913 ++
2914 ++#define X0 $t0
2915 ++#define X1 $t1
2916 ++#define X2 $t2
2917 ++#define X3 $t3
2918 ++#define X4 $t4
2919 ++#define X5 $t5
2920 ++#define X6 $t6
2921 ++#define X7 $t7
2922 ++#define X8 $t8
2923 ++#define X9 $t9
2924 ++#define X10 $v1
2925 ++#define X11 $s6
2926 ++#define X12 $s5
2927 ++#define X13 $s4
2928 ++#define X14 $s3
2929 ++#define X15 $s2
2930 ++/* Use regs which are overwritten on exit for Tx so we don't leak clear data. */
2931 ++#define T0 $s1
2932 ++#define T1 $s0
2933 ++#define T(n) T ## n
2934 ++#define X(n) X ## n
2935 ++
2936 ++/* Input arguments */
2937 ++#define STATE $a0
2938 ++#define OUT $a1
2939 ++#define IN $a2
2940 ++#define BYTES $a3
2941 ++
2942 ++/* Output argument */
2943 ++/* NONCE[0] is kept in a register and not in memory.
2944 ++ * We don't want to touch original value in memory.
2945 ++ * Must be incremented every loop iteration.
2946 ++ */
2947 ++#define NONCE_0 $v0
2948 ++
2949 ++/* SAVED_X and SAVED_CA are set in the jump table.
2950 ++ * Use regs which are overwritten on exit else we don't leak clear data.
2951 ++ * They are used to handling the last bytes which are not multiple of 4.
2952 ++ */
2953 ++#define SAVED_X X15
2954 ++#define SAVED_CA $s7
2955 ++
2956 ++#define IS_UNALIGNED $s7
2957 ++
2958 ++#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
2959 ++#define MSB 0
2960 ++#define LSB 3
2961 ++#define ROTx rotl
2962 ++#define ROTR(n) rotr n, 24
2963 ++#define CPU_TO_LE32(n) \
2964 ++ wsbh n; \
2965 ++ rotr n, 16;
2966 ++#else
2967 ++#define MSB 3
2968 ++#define LSB 0
2969 ++#define ROTx rotr
2970 ++#define CPU_TO_LE32(n)
2971 ++#define ROTR(n)
2972 ++#endif
2973 ++
2974 ++#define FOR_EACH_WORD(x) \
2975 ++ x( 0); \
2976 ++ x( 1); \
2977 ++ x( 2); \
2978 ++ x( 3); \
2979 ++ x( 4); \
2980 ++ x( 5); \
2981 ++ x( 6); \
2982 ++ x( 7); \
2983 ++ x( 8); \
2984 ++ x( 9); \
2985 ++ x(10); \
2986 ++ x(11); \
2987 ++ x(12); \
2988 ++ x(13); \
2989 ++ x(14); \
2990 ++ x(15);
2991 ++
2992 ++#define FOR_EACH_WORD_REV(x) \
2993 ++ x(15); \
2994 ++ x(14); \
2995 ++ x(13); \
2996 ++ x(12); \
2997 ++ x(11); \
2998 ++ x(10); \
2999 ++ x( 9); \
3000 ++ x( 8); \
3001 ++ x( 7); \
3002 ++ x( 6); \
3003 ++ x( 5); \
3004 ++ x( 4); \
3005 ++ x( 3); \
3006 ++ x( 2); \
3007 ++ x( 1); \
3008 ++ x( 0);
3009 ++
3010 ++#define PLUS_ONE_0 1
3011 ++#define PLUS_ONE_1 2
3012 ++#define PLUS_ONE_2 3
3013 ++#define PLUS_ONE_3 4
3014 ++#define PLUS_ONE_4 5
3015 ++#define PLUS_ONE_5 6
3016 ++#define PLUS_ONE_6 7
3017 ++#define PLUS_ONE_7 8
3018 ++#define PLUS_ONE_8 9
3019 ++#define PLUS_ONE_9 10
3020 ++#define PLUS_ONE_10 11
3021 ++#define PLUS_ONE_11 12
3022 ++#define PLUS_ONE_12 13
3023 ++#define PLUS_ONE_13 14
3024 ++#define PLUS_ONE_14 15
3025 ++#define PLUS_ONE_15 16
3026 ++#define PLUS_ONE(x) PLUS_ONE_ ## x
3027 ++#define _CONCAT3(a,b,c) a ## b ## c
3028 ++#define CONCAT3(a,b,c) _CONCAT3(a,b,c)
3029 ++
3030 ++#define STORE_UNALIGNED(x) \
3031 ++CONCAT3(.Lchacha20_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
3032 ++ .if (x != 12); \
3033 ++ lw T0, (x*4)(STATE); \
3034 ++ .endif; \
3035 ++ lwl T1, (x*4)+MSB ## (IN); \
3036 ++ lwr T1, (x*4)+LSB ## (IN); \
3037 ++ .if (x == 12); \
3038 ++ addu X ## x, NONCE_0; \
3039 ++ .else; \
3040 ++ addu X ## x, T0; \
3041 ++ .endif; \
3042 ++ CPU_TO_LE32(X ## x); \
3043 ++ xor X ## x, T1; \
3044 ++ swl X ## x, (x*4)+MSB ## (OUT); \
3045 ++ swr X ## x, (x*4)+LSB ## (OUT);
3046 ++
3047 ++#define STORE_ALIGNED(x) \
3048 ++CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
3049 ++ .if (x != 12); \
3050 ++ lw T0, (x*4)(STATE); \
3051 ++ .endif; \
3052 ++ lw T1, (x*4) ## (IN); \
3053 ++ .if (x == 12); \
3054 ++ addu X ## x, NONCE_0; \
3055 ++ .else; \
3056 ++ addu X ## x, T0; \
3057 ++ .endif; \
3058 ++ CPU_TO_LE32(X ## x); \
3059 ++ xor X ## x, T1; \
3060 ++ sw X ## x, (x*4) ## (OUT);
3061 ++
3062 ++/* Jump table macro.
3063 ++ * Used for setup and handling the last bytes, which are not multiple of 4.
3064 ++ * X15 is free to store Xn
3065 ++ * Every jumptable entry must be equal in size.
3066 ++ */
3067 ++#define JMPTBL_ALIGNED(x) \
3068 ++.Lchacha20_mips_jmptbl_aligned_ ## x: ; \
3069 ++ .set noreorder; \
3070 ++ b .Lchacha20_mips_xor_aligned_ ## x ## _b; \
3071 ++ .if (x == 12); \
3072 ++ addu SAVED_X, X ## x, NONCE_0; \
3073 ++ .else; \
3074 ++ addu SAVED_X, X ## x, SAVED_CA; \
3075 ++ .endif; \
3076 ++ .set reorder
3077 ++
3078 ++#define JMPTBL_UNALIGNED(x) \
3079 ++.Lchacha20_mips_jmptbl_unaligned_ ## x: ; \
3080 ++ .set noreorder; \
3081 ++ b .Lchacha20_mips_xor_unaligned_ ## x ## _b; \
3082 ++ .if (x == 12); \
3083 ++ addu SAVED_X, X ## x, NONCE_0; \
3084 ++ .else; \
3085 ++ addu SAVED_X, X ## x, SAVED_CA; \
3086 ++ .endif; \
3087 ++ .set reorder
3088 ++
3089 ++#define AXR(A, B, C, D, K, L, M, N, V, W, Y, Z, S) \
3090 ++ addu X(A), X(K); \
3091 ++ addu X(B), X(L); \
3092 ++ addu X(C), X(M); \
3093 ++ addu X(D), X(N); \
3094 ++ xor X(V), X(A); \
3095 ++ xor X(W), X(B); \
3096 ++ xor X(Y), X(C); \
3097 ++ xor X(Z), X(D); \
3098 ++ rotl X(V), S; \
3099 ++ rotl X(W), S; \
3100 ++ rotl X(Y), S; \
3101 ++ rotl X(Z), S;
3102 ++
3103 ++.text
3104 ++.set reorder
3105 ++.set noat
3106 ++.globl chacha20_mips
3107 ++.ent chacha20_mips
3108 ++chacha20_mips:
3109 ++ .frame $sp, STACK_SIZE, $ra
3110 ++
3111 ++ addiu $sp, -STACK_SIZE
3112 ++
3113 ++ /* Return bytes = 0. */
3114 ++ beqz BYTES, .Lchacha20_mips_end
3115 ++
3116 ++ lw NONCE_0, 48(STATE)
3117 ++
3118 ++ /* Save s0-s7 */
3119 ++ sw $s0, 0($sp)
3120 ++ sw $s1, 4($sp)
3121 ++ sw $s2, 8($sp)
3122 ++ sw $s3, 12($sp)
3123 ++ sw $s4, 16($sp)
3124 ++ sw $s5, 20($sp)
3125 ++ sw $s6, 24($sp)
3126 ++ sw $s7, 28($sp)
3127 ++
3128 ++ /* Test IN or OUT is unaligned.
3129 ++ * IS_UNALIGNED = ( IN | OUT ) & 0x00000003
3130 ++ */
3131 ++ or IS_UNALIGNED, IN, OUT
3132 ++ andi IS_UNALIGNED, 0x3
3133 ++
3134 ++ /* Set number of rounds */
3135 ++ li $at, 20
3136 ++
3137 ++ b .Lchacha20_rounds_start
3138 ++
3139 ++.align 4
3140 ++.Loop_chacha20_rounds:
3141 ++ addiu IN, CHACHA20_BLOCK_SIZE
3142 ++ addiu OUT, CHACHA20_BLOCK_SIZE
3143 ++ addiu NONCE_0, 1
3144 ++
3145 ++.Lchacha20_rounds_start:
3146 ++ lw X0, 0(STATE)
3147 ++ lw X1, 4(STATE)
3148 ++ lw X2, 8(STATE)
3149 ++ lw X3, 12(STATE)
3150 ++
3151 ++ lw X4, 16(STATE)
3152 ++ lw X5, 20(STATE)
3153 ++ lw X6, 24(STATE)
3154 ++ lw X7, 28(STATE)
3155 ++ lw X8, 32(STATE)
3156 ++ lw X9, 36(STATE)
3157 ++ lw X10, 40(STATE)
3158 ++ lw X11, 44(STATE)
3159 ++
3160 ++ move X12, NONCE_0
3161 ++ lw X13, 52(STATE)
3162 ++ lw X14, 56(STATE)
3163 ++ lw X15, 60(STATE)
3164 ++
3165 ++.Loop_chacha20_xor_rounds:
3166 ++ addiu $at, -2
3167 ++ AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16);
3168 ++ AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12);
3169 ++ AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8);
3170 ++ AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7);
3171 ++ AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16);
3172 ++ AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12);
3173 ++ AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8);
3174 ++ AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7);
3175 ++ bnez $at, .Loop_chacha20_xor_rounds
3176 ++
3177 ++ addiu BYTES, -(CHACHA20_BLOCK_SIZE)
3178 ++
3179 ++ /* Is data src/dst unaligned? Jump */
3180 ++ bnez IS_UNALIGNED, .Loop_chacha20_unaligned
3181 ++
3182 ++ /* Set number rounds here to fill delayslot. */
3183 ++ li $at, 20
3184 ++
3185 ++ /* BYTES < 0, it has no full block. */
3186 ++ bltz BYTES, .Lchacha20_mips_no_full_block_aligned
3187 ++
3188 ++ FOR_EACH_WORD_REV(STORE_ALIGNED)
3189 ++
3190 ++ /* BYTES > 0? Loop again. */
3191 ++ bgtz BYTES, .Loop_chacha20_rounds
3192 ++
3193 ++ /* Place this here to fill delay slot */
3194 ++ addiu NONCE_0, 1
3195 ++
3196 ++ /* BYTES < 0? Handle last bytes */
3197 ++ bltz BYTES, .Lchacha20_mips_xor_bytes
3198 ++
3199 ++.Lchacha20_mips_xor_done:
3200 ++ /* Restore used registers */
3201 ++ lw $s0, 0($sp)
3202 ++ lw $s1, 4($sp)
3203 ++ lw $s2, 8($sp)
3204 ++ lw $s3, 12($sp)
3205 ++ lw $s4, 16($sp)
3206 ++ lw $s5, 20($sp)
3207 ++ lw $s6, 24($sp)
3208 ++ lw $s7, 28($sp)
3209 ++
3210 ++ /* Write NONCE_0 back to right location in state */
3211 ++ sw NONCE_0, 48(STATE)
3212 ++
3213 ++.Lchacha20_mips_end:
3214 ++ addiu $sp, STACK_SIZE
3215 ++ jr $ra
3216 ++
3217 ++.Lchacha20_mips_no_full_block_aligned:
3218 ++ /* Restore the offset on BYTES */
3219 ++ addiu BYTES, CHACHA20_BLOCK_SIZE
3220 ++
3221 ++ /* Get number of full WORDS */
3222 ++ andi $at, BYTES, MASK_U32
3223 ++
3224 ++ /* Load upper half of jump table addr */
3225 ++ lui T0, %hi(.Lchacha20_mips_jmptbl_aligned_0)
3226 ++
3227 ++ /* Calculate lower half jump table offset */
3228 ++ ins T0, $at, 1, 6
3229 ++
3230 ++ /* Add offset to STATE */
3231 ++ addu T1, STATE, $at
3232 ++
3233 ++ /* Add lower half jump table addr */
3234 ++ addiu T0, %lo(.Lchacha20_mips_jmptbl_aligned_0)
3235 ++
3236 ++ /* Read value from STATE */
3237 ++ lw SAVED_CA, 0(T1)
3238 ++
3239 ++ /* Store remaining bytecounter as negative value */
3240 ++ subu BYTES, $at, BYTES
3241 ++
3242 ++ jr T0
3243 ++
3244 ++ /* Jump table */
3245 ++ FOR_EACH_WORD(JMPTBL_ALIGNED)
3246 ++
3247 ++
3248 ++.Loop_chacha20_unaligned:
3249 ++ /* Set number rounds here to fill delayslot. */
3250 ++ li $at, 20
3251 ++
3252 ++ /* BYTES > 0, it has no full block. */
3253 ++ bltz BYTES, .Lchacha20_mips_no_full_block_unaligned
3254 ++
3255 ++ FOR_EACH_WORD_REV(STORE_UNALIGNED)
3256 ++
3257 ++ /* BYTES > 0? Loop again. */
3258 ++ bgtz BYTES, .Loop_chacha20_rounds
3259 ++
3260 ++ /* Write NONCE_0 back to right location in state */
3261 ++ sw NONCE_0, 48(STATE)
3262 ++
3263 ++ .set noreorder
3264 ++ /* Fall through to byte handling */
3265 ++ bgez BYTES, .Lchacha20_mips_xor_done
3266 ++.Lchacha20_mips_xor_unaligned_0_b:
3267 ++.Lchacha20_mips_xor_aligned_0_b:
3268 ++ /* Place this here to fill delay slot */
3269 ++ addiu NONCE_0, 1
3270 ++ .set reorder
3271 ++
3272 ++.Lchacha20_mips_xor_bytes:
3273 ++ addu IN, $at
3274 ++ addu OUT, $at
3275 ++ /* First byte */
3276 ++ lbu T1, 0(IN)
3277 ++ addiu $at, BYTES, 1
3278 ++ CPU_TO_LE32(SAVED_X)
3279 ++ ROTR(SAVED_X)
3280 ++ xor T1, SAVED_X
3281 ++ sb T1, 0(OUT)
3282 ++ beqz $at, .Lchacha20_mips_xor_done
3283 ++ /* Second byte */
3284 ++ lbu T1, 1(IN)
3285 ++ addiu $at, BYTES, 2
3286 ++ ROTx SAVED_X, 8
3287 ++ xor T1, SAVED_X
3288 ++ sb T1, 1(OUT)
3289 ++ beqz $at, .Lchacha20_mips_xor_done
3290 ++ /* Third byte */
3291 ++ lbu T1, 2(IN)
3292 ++ ROTx SAVED_X, 8
3293 ++ xor T1, SAVED_X
3294 ++ sb T1, 2(OUT)
3295 ++ b .Lchacha20_mips_xor_done
3296 ++
3297 ++.Lchacha20_mips_no_full_block_unaligned:
3298 ++ /* Restore the offset on BYTES */
3299 ++ addiu BYTES, CHACHA20_BLOCK_SIZE
3300 ++
3301 ++ /* Get number of full WORDS */
3302 ++ andi $at, BYTES, MASK_U32
3303 ++
3304 ++ /* Load upper half of jump table addr */
3305 ++ lui T0, %hi(.Lchacha20_mips_jmptbl_unaligned_0)
3306 ++
3307 ++ /* Calculate lower half jump table offset */
3308 ++ ins T0, $at, 1, 6
3309 ++
3310 ++ /* Add offset to STATE */
3311 ++ addu T1, STATE, $at
3312 ++
3313 ++ /* Add lower half jump table addr */
3314 ++ addiu T0, %lo(.Lchacha20_mips_jmptbl_unaligned_0)
3315 ++
3316 ++ /* Read value from STATE */
3317 ++ lw SAVED_CA, 0(T1)
3318 ++
3319 ++ /* Store remaining bytecounter as negative value */
3320 ++ subu BYTES, $at, BYTES
3321 ++
3322 ++ jr T0
3323 ++
3324 ++ /* Jump table */
3325 ++ FOR_EACH_WORD(JMPTBL_UNALIGNED)
3326 ++.end chacha20_mips
3327 ++.set at
3328 +--
3329 +cgit v1.2.3-4-ga26e
3330 +
3331 +
3332 +From d2e98f07f007404e2b4bfe60b4dfe124364eb848 Mon Sep 17 00:00:00 2001
3333 +From: Ard Biesheuvel <ardb@××××××.org>
3334 +Date: Fri, 8 Nov 2019 13:22:17 +0100
3335 +Subject: crypto: mips/chacha - wire up accelerated 32r2 code from Zinc
3336 +MIME-Version: 1.0
3337 +Content-Type: text/plain; charset=UTF-8
3338 +Content-Transfer-Encoding: 8bit
3339 +
3340 +commit 3a2f58f3ba4f6f44e33d1a48240d5eadb882cb59 upstream.
3341 +
3342 +This integrates the accelerated MIPS 32r2 implementation of ChaCha
3343 +into both the API and library interfaces of the kernel crypto stack.
3344 +
3345 +The significance of this is that, in addition to becoming available
3346 +as an accelerated library implementation, it can also be used by
3347 +existing crypto API code such as Adiantum (for block encryption on
3348 +ultra low performance cores) or IPsec using chacha20poly1305. These
3349 +are use cases that have already opted into using the abstract crypto
3350 +API. In order to support Adiantum, the core assembler routine has
3351 +been adapted to take the round count as a function argument rather
3352 +than hardcoding it to 20.
3353 +
3354 +Co-developed-by: René van Dorst <opensource@××××××.com>
3355 +Signed-off-by: René van Dorst <opensource@××××××.com>
3356 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
3357 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
3358 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
3359 +---
3360 + arch/mips/Makefile | 2 +-
3361 + arch/mips/crypto/Makefile | 4 ++
3362 + arch/mips/crypto/chacha-core.S | 159 ++++++++++++++++++++++++++++++-----------
3363 + arch/mips/crypto/chacha-glue.c | 150 ++++++++++++++++++++++++++++++++++++++
3364 + crypto/Kconfig | 6 ++
3365 + 5 files changed, 277 insertions(+), 44 deletions(-)
3366 + create mode 100644 arch/mips/crypto/chacha-glue.c
3367 +
3368 +diff --git a/arch/mips/Makefile b/arch/mips/Makefile
3369 +index 5403a91ce098..573409c85c81 100644
3370 +--- a/arch/mips/Makefile
3371 ++++ b/arch/mips/Makefile
3372 +@@ -334,7 +334,7 @@ libs-$(CONFIG_MIPS_FP_SUPPORT) += arch/mips/math-emu/
3373 + # See arch/mips/Kbuild for content of core part of the kernel
3374 + core-y += arch/mips/
3375 +
3376 +-drivers-$(CONFIG_MIPS_CRC_SUPPORT) += arch/mips/crypto/
3377 ++drivers-y += arch/mips/crypto/
3378 + drivers-$(CONFIG_OPROFILE) += arch/mips/oprofile/
3379 +
3380 + # suspend and hibernation support
3381 +diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile
3382 +index e07aca572c2e..b528b9d300f1 100644
3383 +--- a/arch/mips/crypto/Makefile
3384 ++++ b/arch/mips/crypto/Makefile
3385 +@@ -4,3 +4,7 @@
3386 + #
3387 +
3388 + obj-$(CONFIG_CRYPTO_CRC32_MIPS) += crc32-mips.o
3389 ++
3390 ++obj-$(CONFIG_CRYPTO_CHACHA_MIPS) += chacha-mips.o
3391 ++chacha-mips-y := chacha-core.o chacha-glue.o
3392 ++AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots
3393 +diff --git a/arch/mips/crypto/chacha-core.S b/arch/mips/crypto/chacha-core.S
3394 +index a81e02db95e7..5755f69cfe00 100644
3395 +--- a/arch/mips/crypto/chacha-core.S
3396 ++++ b/arch/mips/crypto/chacha-core.S
3397 +@@ -125,7 +125,7 @@
3398 + #define CONCAT3(a,b,c) _CONCAT3(a,b,c)
3399 +
3400 + #define STORE_UNALIGNED(x) \
3401 +-CONCAT3(.Lchacha20_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
3402 ++CONCAT3(.Lchacha_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
3403 + .if (x != 12); \
3404 + lw T0, (x*4)(STATE); \
3405 + .endif; \
3406 +@@ -142,7 +142,7 @@ CONCAT3(.Lchacha20_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
3407 + swr X ## x, (x*4)+LSB ## (OUT);
3408 +
3409 + #define STORE_ALIGNED(x) \
3410 +-CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
3411 ++CONCAT3(.Lchacha_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
3412 + .if (x != 12); \
3413 + lw T0, (x*4)(STATE); \
3414 + .endif; \
3415 +@@ -162,9 +162,9 @@ CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
3416 + * Every jumptable entry must be equal in size.
3417 + */
3418 + #define JMPTBL_ALIGNED(x) \
3419 +-.Lchacha20_mips_jmptbl_aligned_ ## x: ; \
3420 ++.Lchacha_mips_jmptbl_aligned_ ## x: ; \
3421 + .set noreorder; \
3422 +- b .Lchacha20_mips_xor_aligned_ ## x ## _b; \
3423 ++ b .Lchacha_mips_xor_aligned_ ## x ## _b; \
3424 + .if (x == 12); \
3425 + addu SAVED_X, X ## x, NONCE_0; \
3426 + .else; \
3427 +@@ -173,9 +173,9 @@ CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
3428 + .set reorder
3429 +
3430 + #define JMPTBL_UNALIGNED(x) \
3431 +-.Lchacha20_mips_jmptbl_unaligned_ ## x: ; \
3432 ++.Lchacha_mips_jmptbl_unaligned_ ## x: ; \
3433 + .set noreorder; \
3434 +- b .Lchacha20_mips_xor_unaligned_ ## x ## _b; \
3435 ++ b .Lchacha_mips_xor_unaligned_ ## x ## _b; \
3436 + .if (x == 12); \
3437 + addu SAVED_X, X ## x, NONCE_0; \
3438 + .else; \
3439 +@@ -200,15 +200,18 @@ CONCAT3(.Lchacha20_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
3440 + .text
3441 + .set reorder
3442 + .set noat
3443 +-.globl chacha20_mips
3444 +-.ent chacha20_mips
3445 +-chacha20_mips:
3446 ++.globl chacha_crypt_arch
3447 ++.ent chacha_crypt_arch
3448 ++chacha_crypt_arch:
3449 + .frame $sp, STACK_SIZE, $ra
3450 +
3451 ++ /* Load number of rounds */
3452 ++ lw $at, 16($sp)
3453 ++
3454 + addiu $sp, -STACK_SIZE
3455 +
3456 + /* Return bytes = 0. */
3457 +- beqz BYTES, .Lchacha20_mips_end
3458 ++ beqz BYTES, .Lchacha_mips_end
3459 +
3460 + lw NONCE_0, 48(STATE)
3461 +
3462 +@@ -228,18 +231,15 @@ chacha20_mips:
3463 + or IS_UNALIGNED, IN, OUT
3464 + andi IS_UNALIGNED, 0x3
3465 +
3466 +- /* Set number of rounds */
3467 +- li $at, 20
3468 +-
3469 +- b .Lchacha20_rounds_start
3470 ++ b .Lchacha_rounds_start
3471 +
3472 + .align 4
3473 +-.Loop_chacha20_rounds:
3474 ++.Loop_chacha_rounds:
3475 + addiu IN, CHACHA20_BLOCK_SIZE
3476 + addiu OUT, CHACHA20_BLOCK_SIZE
3477 + addiu NONCE_0, 1
3478 +
3479 +-.Lchacha20_rounds_start:
3480 ++.Lchacha_rounds_start:
3481 + lw X0, 0(STATE)
3482 + lw X1, 4(STATE)
3483 + lw X2, 8(STATE)
3484 +@@ -259,7 +259,7 @@ chacha20_mips:
3485 + lw X14, 56(STATE)
3486 + lw X15, 60(STATE)
3487 +
3488 +-.Loop_chacha20_xor_rounds:
3489 ++.Loop_chacha_xor_rounds:
3490 + addiu $at, -2
3491 + AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16);
3492 + AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12);
3493 +@@ -269,31 +269,31 @@ chacha20_mips:
3494 + AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12);
3495 + AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8);
3496 + AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7);
3497 +- bnez $at, .Loop_chacha20_xor_rounds
3498 ++ bnez $at, .Loop_chacha_xor_rounds
3499 +
3500 + addiu BYTES, -(CHACHA20_BLOCK_SIZE)
3501 +
3502 + /* Is data src/dst unaligned? Jump */
3503 +- bnez IS_UNALIGNED, .Loop_chacha20_unaligned
3504 ++ bnez IS_UNALIGNED, .Loop_chacha_unaligned
3505 +
3506 + /* Set number rounds here to fill delayslot. */
3507 +- li $at, 20
3508 ++ lw $at, (STACK_SIZE+16)($sp)
3509 +
3510 + /* BYTES < 0, it has no full block. */
3511 +- bltz BYTES, .Lchacha20_mips_no_full_block_aligned
3512 ++ bltz BYTES, .Lchacha_mips_no_full_block_aligned
3513 +
3514 + FOR_EACH_WORD_REV(STORE_ALIGNED)
3515 +
3516 + /* BYTES > 0? Loop again. */
3517 +- bgtz BYTES, .Loop_chacha20_rounds
3518 ++ bgtz BYTES, .Loop_chacha_rounds
3519 +
3520 + /* Place this here to fill delay slot */
3521 + addiu NONCE_0, 1
3522 +
3523 + /* BYTES < 0? Handle last bytes */
3524 +- bltz BYTES, .Lchacha20_mips_xor_bytes
3525 ++ bltz BYTES, .Lchacha_mips_xor_bytes
3526 +
3527 +-.Lchacha20_mips_xor_done:
3528 ++.Lchacha_mips_xor_done:
3529 + /* Restore used registers */
3530 + lw $s0, 0($sp)
3531 + lw $s1, 4($sp)
3532 +@@ -307,11 +307,11 @@ chacha20_mips:
3533 + /* Write NONCE_0 back to right location in state */
3534 + sw NONCE_0, 48(STATE)
3535 +
3536 +-.Lchacha20_mips_end:
3537 ++.Lchacha_mips_end:
3538 + addiu $sp, STACK_SIZE
3539 + jr $ra
3540 +
3541 +-.Lchacha20_mips_no_full_block_aligned:
3542 ++.Lchacha_mips_no_full_block_aligned:
3543 + /* Restore the offset on BYTES */
3544 + addiu BYTES, CHACHA20_BLOCK_SIZE
3545 +
3546 +@@ -319,7 +319,7 @@ chacha20_mips:
3547 + andi $at, BYTES, MASK_U32
3548 +
3549 + /* Load upper half of jump table addr */
3550 +- lui T0, %hi(.Lchacha20_mips_jmptbl_aligned_0)
3551 ++ lui T0, %hi(.Lchacha_mips_jmptbl_aligned_0)
3552 +
3553 + /* Calculate lower half jump table offset */
3554 + ins T0, $at, 1, 6
3555 +@@ -328,7 +328,7 @@ chacha20_mips:
3556 + addu T1, STATE, $at
3557 +
3558 + /* Add lower half jump table addr */
3559 +- addiu T0, %lo(.Lchacha20_mips_jmptbl_aligned_0)
3560 ++ addiu T0, %lo(.Lchacha_mips_jmptbl_aligned_0)
3561 +
3562 + /* Read value from STATE */
3563 + lw SAVED_CA, 0(T1)
3564 +@@ -342,31 +342,31 @@ chacha20_mips:
3565 + FOR_EACH_WORD(JMPTBL_ALIGNED)
3566 +
3567 +
3568 +-.Loop_chacha20_unaligned:
3569 ++.Loop_chacha_unaligned:
3570 + /* Set number rounds here to fill delayslot. */
3571 +- li $at, 20
3572 ++ lw $at, (STACK_SIZE+16)($sp)
3573 +
3574 + /* BYTES > 0, it has no full block. */
3575 +- bltz BYTES, .Lchacha20_mips_no_full_block_unaligned
3576 ++ bltz BYTES, .Lchacha_mips_no_full_block_unaligned
3577 +
3578 + FOR_EACH_WORD_REV(STORE_UNALIGNED)
3579 +
3580 + /* BYTES > 0? Loop again. */
3581 +- bgtz BYTES, .Loop_chacha20_rounds
3582 ++ bgtz BYTES, .Loop_chacha_rounds
3583 +
3584 + /* Write NONCE_0 back to right location in state */
3585 + sw NONCE_0, 48(STATE)
3586 +
3587 + .set noreorder
3588 + /* Fall through to byte handling */
3589 +- bgez BYTES, .Lchacha20_mips_xor_done
3590 +-.Lchacha20_mips_xor_unaligned_0_b:
3591 +-.Lchacha20_mips_xor_aligned_0_b:
3592 ++ bgez BYTES, .Lchacha_mips_xor_done
3593 ++.Lchacha_mips_xor_unaligned_0_b:
3594 ++.Lchacha_mips_xor_aligned_0_b:
3595 + /* Place this here to fill delay slot */
3596 + addiu NONCE_0, 1
3597 + .set reorder
3598 +
3599 +-.Lchacha20_mips_xor_bytes:
3600 ++.Lchacha_mips_xor_bytes:
3601 + addu IN, $at
3602 + addu OUT, $at
3603 + /* First byte */
3604 +@@ -376,22 +376,22 @@ chacha20_mips:
3605 + ROTR(SAVED_X)
3606 + xor T1, SAVED_X
3607 + sb T1, 0(OUT)
3608 +- beqz $at, .Lchacha20_mips_xor_done
3609 ++ beqz $at, .Lchacha_mips_xor_done
3610 + /* Second byte */
3611 + lbu T1, 1(IN)
3612 + addiu $at, BYTES, 2
3613 + ROTx SAVED_X, 8
3614 + xor T1, SAVED_X
3615 + sb T1, 1(OUT)
3616 +- beqz $at, .Lchacha20_mips_xor_done
3617 ++ beqz $at, .Lchacha_mips_xor_done
3618 + /* Third byte */
3619 + lbu T1, 2(IN)
3620 + ROTx SAVED_X, 8
3621 + xor T1, SAVED_X
3622 + sb T1, 2(OUT)
3623 +- b .Lchacha20_mips_xor_done
3624 ++ b .Lchacha_mips_xor_done
3625 +
3626 +-.Lchacha20_mips_no_full_block_unaligned:
3627 ++.Lchacha_mips_no_full_block_unaligned:
3628 + /* Restore the offset on BYTES */
3629 + addiu BYTES, CHACHA20_BLOCK_SIZE
3630 +
3631 +@@ -399,7 +399,7 @@ chacha20_mips:
3632 + andi $at, BYTES, MASK_U32
3633 +
3634 + /* Load upper half of jump table addr */
3635 +- lui T0, %hi(.Lchacha20_mips_jmptbl_unaligned_0)
3636 ++ lui T0, %hi(.Lchacha_mips_jmptbl_unaligned_0)
3637 +
3638 + /* Calculate lower half jump table offset */
3639 + ins T0, $at, 1, 6
3640 +@@ -408,7 +408,7 @@ chacha20_mips:
3641 + addu T1, STATE, $at
3642 +
3643 + /* Add lower half jump table addr */
3644 +- addiu T0, %lo(.Lchacha20_mips_jmptbl_unaligned_0)
3645 ++ addiu T0, %lo(.Lchacha_mips_jmptbl_unaligned_0)
3646 +
3647 + /* Read value from STATE */
3648 + lw SAVED_CA, 0(T1)
3649 +@@ -420,5 +420,78 @@ chacha20_mips:
3650 +
3651 + /* Jump table */
3652 + FOR_EACH_WORD(JMPTBL_UNALIGNED)
3653 +-.end chacha20_mips
3654 ++.end chacha_crypt_arch
3655 ++.set at
3656 ++
3657 ++/* Input arguments
3658 ++ * STATE $a0
3659 ++ * OUT $a1
3660 ++ * NROUND $a2
3661 ++ */
3662 ++
3663 ++#undef X12
3664 ++#undef X13
3665 ++#undef X14
3666 ++#undef X15
3667 ++
3668 ++#define X12 $a3
3669 ++#define X13 $at
3670 ++#define X14 $v0
3671 ++#define X15 STATE
3672 ++
3673 ++.set noat
3674 ++.globl hchacha_block_arch
3675 ++.ent hchacha_block_arch
3676 ++hchacha_block_arch:
3677 ++ .frame $sp, STACK_SIZE, $ra
3678 ++
3679 ++ addiu $sp, -STACK_SIZE
3680 ++
3681 ++ /* Save X11(s6) */
3682 ++ sw X11, 0($sp)
3683 ++
3684 ++ lw X0, 0(STATE)
3685 ++ lw X1, 4(STATE)
3686 ++ lw X2, 8(STATE)
3687 ++ lw X3, 12(STATE)
3688 ++ lw X4, 16(STATE)
3689 ++ lw X5, 20(STATE)
3690 ++ lw X6, 24(STATE)
3691 ++ lw X7, 28(STATE)
3692 ++ lw X8, 32(STATE)
3693 ++ lw X9, 36(STATE)
3694 ++ lw X10, 40(STATE)
3695 ++ lw X11, 44(STATE)
3696 ++ lw X12, 48(STATE)
3697 ++ lw X13, 52(STATE)
3698 ++ lw X14, 56(STATE)
3699 ++ lw X15, 60(STATE)
3700 ++
3701 ++.Loop_hchacha_xor_rounds:
3702 ++ addiu $a2, -2
3703 ++ AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16);
3704 ++ AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12);
3705 ++ AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8);
3706 ++ AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7);
3707 ++ AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16);
3708 ++ AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12);
3709 ++ AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8);
3710 ++ AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7);
3711 ++ bnez $a2, .Loop_hchacha_xor_rounds
3712 ++
3713 ++ /* Restore used register */
3714 ++ lw X11, 0($sp)
3715 ++
3716 ++ sw X0, 0(OUT)
3717 ++ sw X1, 4(OUT)
3718 ++ sw X2, 8(OUT)
3719 ++ sw X3, 12(OUT)
3720 ++ sw X12, 16(OUT)
3721 ++ sw X13, 20(OUT)
3722 ++ sw X14, 24(OUT)
3723 ++ sw X15, 28(OUT)
3724 ++
3725 ++ addiu $sp, STACK_SIZE
3726 ++ jr $ra
3727 ++.end hchacha_block_arch
3728 + .set at
3729 +diff --git a/arch/mips/crypto/chacha-glue.c b/arch/mips/crypto/chacha-glue.c
3730 +new file mode 100644
3731 +index 000000000000..779e399c9bef
3732 +--- /dev/null
3733 ++++ b/arch/mips/crypto/chacha-glue.c
3734 +@@ -0,0 +1,150 @@
3735 ++// SPDX-License-Identifier: GPL-2.0
3736 ++/*
3737 ++ * MIPS accelerated ChaCha and XChaCha stream ciphers,
3738 ++ * including ChaCha20 (RFC7539)
3739 ++ *
3740 ++ * Copyright (C) 2019 Linaro, Ltd. <ard.biesheuvel@××××××.org>
3741 ++ */
3742 ++
3743 ++#include <asm/byteorder.h>
3744 ++#include <crypto/algapi.h>
3745 ++#include <crypto/internal/chacha.h>
3746 ++#include <crypto/internal/skcipher.h>
3747 ++#include <linux/kernel.h>
3748 ++#include <linux/module.h>
3749 ++
3750 ++asmlinkage void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src,
3751 ++ unsigned int bytes, int nrounds);
3752 ++EXPORT_SYMBOL(chacha_crypt_arch);
3753 ++
3754 ++asmlinkage void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds);
3755 ++EXPORT_SYMBOL(hchacha_block_arch);
3756 ++
3757 ++void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
3758 ++{
3759 ++ chacha_init_generic(state, key, iv);
3760 ++}
3761 ++EXPORT_SYMBOL(chacha_init_arch);
3762 ++
3763 ++static int chacha_mips_stream_xor(struct skcipher_request *req,
3764 ++ const struct chacha_ctx *ctx, const u8 *iv)
3765 ++{
3766 ++ struct skcipher_walk walk;
3767 ++ u32 state[16];
3768 ++ int err;
3769 ++
3770 ++ err = skcipher_walk_virt(&walk, req, false);
3771 ++
3772 ++ chacha_init_generic(state, ctx->key, iv);
3773 ++
3774 ++ while (walk.nbytes > 0) {
3775 ++ unsigned int nbytes = walk.nbytes;
3776 ++
3777 ++ if (nbytes < walk.total)
3778 ++ nbytes = round_down(nbytes, walk.stride);
3779 ++
3780 ++ chacha_crypt(state, walk.dst.virt.addr, walk.src.virt.addr,
3781 ++ nbytes, ctx->nrounds);
3782 ++ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
3783 ++ }
3784 ++
3785 ++ return err;
3786 ++}
3787 ++
3788 ++static int chacha_mips(struct skcipher_request *req)
3789 ++{
3790 ++ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
3791 ++ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
3792 ++
3793 ++ return chacha_mips_stream_xor(req, ctx, req->iv);
3794 ++}
3795 ++
3796 ++static int xchacha_mips(struct skcipher_request *req)
3797 ++{
3798 ++ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
3799 ++ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
3800 ++ struct chacha_ctx subctx;
3801 ++ u32 state[16];
3802 ++ u8 real_iv[16];
3803 ++
3804 ++ chacha_init_generic(state, ctx->key, req->iv);
3805 ++
3806 ++ hchacha_block(state, subctx.key, ctx->nrounds);
3807 ++ subctx.nrounds = ctx->nrounds;
3808 ++
3809 ++ memcpy(&real_iv[0], req->iv + 24, 8);
3810 ++ memcpy(&real_iv[8], req->iv + 16, 8);
3811 ++ return chacha_mips_stream_xor(req, &subctx, real_iv);
3812 ++}
3813 ++
3814 ++static struct skcipher_alg algs[] = {
3815 ++ {
3816 ++ .base.cra_name = "chacha20",
3817 ++ .base.cra_driver_name = "chacha20-mips",
3818 ++ .base.cra_priority = 200,
3819 ++ .base.cra_blocksize = 1,
3820 ++ .base.cra_ctxsize = sizeof(struct chacha_ctx),
3821 ++ .base.cra_module = THIS_MODULE,
3822 ++
3823 ++ .min_keysize = CHACHA_KEY_SIZE,
3824 ++ .max_keysize = CHACHA_KEY_SIZE,
3825 ++ .ivsize = CHACHA_IV_SIZE,
3826 ++ .chunksize = CHACHA_BLOCK_SIZE,
3827 ++ .setkey = chacha20_setkey,
3828 ++ .encrypt = chacha_mips,
3829 ++ .decrypt = chacha_mips,
3830 ++ }, {
3831 ++ .base.cra_name = "xchacha20",
3832 ++ .base.cra_driver_name = "xchacha20-mips",
3833 ++ .base.cra_priority = 200,
3834 ++ .base.cra_blocksize = 1,
3835 ++ .base.cra_ctxsize = sizeof(struct chacha_ctx),
3836 ++ .base.cra_module = THIS_MODULE,
3837 ++
3838 ++ .min_keysize = CHACHA_KEY_SIZE,
3839 ++ .max_keysize = CHACHA_KEY_SIZE,
3840 ++ .ivsize = XCHACHA_IV_SIZE,
3841 ++ .chunksize = CHACHA_BLOCK_SIZE,
3842 ++ .setkey = chacha20_setkey,
3843 ++ .encrypt = xchacha_mips,
3844 ++ .decrypt = xchacha_mips,
3845 ++ }, {
3846 ++ .base.cra_name = "xchacha12",
3847 ++ .base.cra_driver_name = "xchacha12-mips",
3848 ++ .base.cra_priority = 200,
3849 ++ .base.cra_blocksize = 1,
3850 ++ .base.cra_ctxsize = sizeof(struct chacha_ctx),
3851 ++ .base.cra_module = THIS_MODULE,
3852 ++
3853 ++ .min_keysize = CHACHA_KEY_SIZE,
3854 ++ .max_keysize = CHACHA_KEY_SIZE,
3855 ++ .ivsize = XCHACHA_IV_SIZE,
3856 ++ .chunksize = CHACHA_BLOCK_SIZE,
3857 ++ .setkey = chacha12_setkey,
3858 ++ .encrypt = xchacha_mips,
3859 ++ .decrypt = xchacha_mips,
3860 ++ }
3861 ++};
3862 ++
3863 ++static int __init chacha_simd_mod_init(void)
3864 ++{
3865 ++ return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
3866 ++}
3867 ++
3868 ++static void __exit chacha_simd_mod_fini(void)
3869 ++{
3870 ++ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
3871 ++}
3872 ++
3873 ++module_init(chacha_simd_mod_init);
3874 ++module_exit(chacha_simd_mod_fini);
3875 ++
3876 ++MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (MIPS accelerated)");
3877 ++MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@××××××.org>");
3878 ++MODULE_LICENSE("GPL v2");
3879 ++MODULE_ALIAS_CRYPTO("chacha20");
3880 ++MODULE_ALIAS_CRYPTO("chacha20-mips");
3881 ++MODULE_ALIAS_CRYPTO("xchacha20");
3882 ++MODULE_ALIAS_CRYPTO("xchacha20-mips");
3883 ++MODULE_ALIAS_CRYPTO("xchacha12");
3884 ++MODULE_ALIAS_CRYPTO("xchacha12-mips");
3885 +diff --git a/crypto/Kconfig b/crypto/Kconfig
3886 +index 649dc564f242..6b5e14cee475 100644
3887 +--- a/crypto/Kconfig
3888 ++++ b/crypto/Kconfig
3889 +@@ -1423,6 +1423,12 @@ config CRYPTO_CHACHA20_X86_64
3890 + SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20,
3891 + XChaCha20, and XChaCha12 stream ciphers.
3892 +
3893 ++config CRYPTO_CHACHA_MIPS
3894 ++ tristate "ChaCha stream cipher algorithms (MIPS 32r2 optimized)"
3895 ++ depends on CPU_MIPS32_R2
3896 ++ select CRYPTO_BLKCIPHER
3897 ++ select CRYPTO_ARCH_HAVE_LIB_CHACHA
3898 ++
3899 + config CRYPTO_SEED
3900 + tristate "SEED cipher algorithm"
3901 + select CRYPTO_ALGAPI
3902 +--
3903 +cgit v1.2.3-4-ga26e
3904 +
3905 +
3906 +From 1051837587fe3d6bcb1534628b62ffe9ede1b023 Mon Sep 17 00:00:00 2001
3907 +From: Ard Biesheuvel <ardb@××××××.org>
3908 +Date: Fri, 8 Nov 2019 13:22:18 +0100
3909 +Subject: crypto: chacha - unexport chacha_generic routines
3910 +
3911 +commit 22cf705360707ced15f9fe5423938f313c7df536 upstream.
3912 +
3913 +Now that all users of generic ChaCha code have moved to the core library,
3914 +there is no longer a need for the generic ChaCha skcpiher driver to
3915 +export parts of it implementation for reuse by other drivers. So drop
3916 +the exports, and make the symbols static.
3917 +
3918 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
3919 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
3920 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
3921 +---
3922 + crypto/chacha_generic.c | 26 ++++++++------------------
3923 + include/crypto/internal/chacha.h | 10 ----------
3924 + 2 files changed, 8 insertions(+), 28 deletions(-)
3925 +
3926 +diff --git a/crypto/chacha_generic.c b/crypto/chacha_generic.c
3927 +index ebae6d9d9b32..c1b147318393 100644
3928 +--- a/crypto/chacha_generic.c
3929 ++++ b/crypto/chacha_generic.c
3930 +@@ -21,7 +21,7 @@ static int chacha_stream_xor(struct skcipher_request *req,
3931 +
3932 + err = skcipher_walk_virt(&walk, req, false);
3933 +
3934 +- crypto_chacha_init(state, ctx, iv);
3935 ++ chacha_init_generic(state, ctx->key, iv);
3936 +
3937 + while (walk.nbytes > 0) {
3938 + unsigned int nbytes = walk.nbytes;
3939 +@@ -37,36 +37,27 @@ static int chacha_stream_xor(struct skcipher_request *req,
3940 + return err;
3941 + }
3942 +
3943 +-void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv)
3944 +-{
3945 +- chacha_init_generic(state, ctx->key, iv);
3946 +-}
3947 +-EXPORT_SYMBOL_GPL(crypto_chacha_init);
3948 +-
3949 +-int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
3950 +- unsigned int keysize)
3951 ++static int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
3952 ++ unsigned int keysize)
3953 + {
3954 + return chacha_setkey(tfm, key, keysize, 20);
3955 + }
3956 +-EXPORT_SYMBOL_GPL(crypto_chacha20_setkey);
3957 +
3958 +-int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
3959 +- unsigned int keysize)
3960 ++static int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
3961 ++ unsigned int keysize)
3962 + {
3963 + return chacha_setkey(tfm, key, keysize, 12);
3964 + }
3965 +-EXPORT_SYMBOL_GPL(crypto_chacha12_setkey);
3966 +
3967 +-int crypto_chacha_crypt(struct skcipher_request *req)
3968 ++static int crypto_chacha_crypt(struct skcipher_request *req)
3969 + {
3970 + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
3971 + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
3972 +
3973 + return chacha_stream_xor(req, ctx, req->iv);
3974 + }
3975 +-EXPORT_SYMBOL_GPL(crypto_chacha_crypt);
3976 +
3977 +-int crypto_xchacha_crypt(struct skcipher_request *req)
3978 ++static int crypto_xchacha_crypt(struct skcipher_request *req)
3979 + {
3980 + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
3981 + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
3982 +@@ -75,7 +66,7 @@ int crypto_xchacha_crypt(struct skcipher_request *req)
3983 + u8 real_iv[16];
3984 +
3985 + /* Compute the subkey given the original key and first 128 nonce bits */
3986 +- crypto_chacha_init(state, ctx, req->iv);
3987 ++ chacha_init_generic(state, ctx->key, req->iv);
3988 + hchacha_block_generic(state, subctx.key, ctx->nrounds);
3989 + subctx.nrounds = ctx->nrounds;
3990 +
3991 +@@ -86,7 +77,6 @@ int crypto_xchacha_crypt(struct skcipher_request *req)
3992 + /* Generate the stream and XOR it with the data */
3993 + return chacha_stream_xor(req, &subctx, real_iv);
3994 + }
3995 +-EXPORT_SYMBOL_GPL(crypto_xchacha_crypt);
3996 +
3997 + static struct skcipher_alg algs[] = {
3998 + {
3999 +diff --git a/include/crypto/internal/chacha.h b/include/crypto/internal/chacha.h
4000 +index c0e40b245431..aa5d4a16aac5 100644
4001 +--- a/include/crypto/internal/chacha.h
4002 ++++ b/include/crypto/internal/chacha.h
4003 +@@ -12,8 +12,6 @@ struct chacha_ctx {
4004 + int nrounds;
4005 + };
4006 +
4007 +-void crypto_chacha_init(u32 *state, const struct chacha_ctx *ctx, const u8 *iv);
4008 +-
4009 + static inline int chacha_setkey(struct crypto_skcipher *tfm, const u8 *key,
4010 + unsigned int keysize, int nrounds)
4011 + {
4012 +@@ -42,12 +40,4 @@ static int inline chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
4013 + return chacha_setkey(tfm, key, keysize, 12);
4014 + }
4015 +
4016 +-int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
4017 +- unsigned int keysize);
4018 +-int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
4019 +- unsigned int keysize);
4020 +-
4021 +-int crypto_chacha_crypt(struct skcipher_request *req);
4022 +-int crypto_xchacha_crypt(struct skcipher_request *req);
4023 +-
4024 + #endif /* _CRYPTO_CHACHA_H */
4025 +--
4026 +cgit v1.2.3-4-ga26e
4027 +
4028 +
4029 +From d0eff888076fb2a385b05d8276216e66d2ae5923 Mon Sep 17 00:00:00 2001
4030 +From: Ard Biesheuvel <ardb@××××××.org>
4031 +Date: Fri, 8 Nov 2019 13:22:19 +0100
4032 +Subject: crypto: poly1305 - move core routines into a separate library
4033 +
4034 +commit 48ea8c6ebc96bc0990e12ee1c43d0832c23576bb upstream.
4035 +
4036 +Move the core Poly1305 routines shared between the generic Poly1305
4037 +shash driver and the Adiantum and NHPoly1305 drivers into a separate
4038 +library so that using just this pieces does not pull in the crypto
4039 +API pieces of the generic Poly1305 routine.
4040 +
4041 +In a subsequent patch, we will augment this generic library with
4042 +init/update/final routines so that Poyl1305 algorithm can be used
4043 +directly without the need for using the crypto API's shash abstraction.
4044 +
4045 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
4046 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
4047 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
4048 +---
4049 + arch/x86/crypto/poly1305_glue.c | 2 +-
4050 + crypto/Kconfig | 5 +-
4051 + crypto/adiantum.c | 5 +-
4052 + crypto/nhpoly1305.c | 3 +-
4053 + crypto/poly1305_generic.c | 195 ++-----------------------------------
4054 + include/crypto/internal/poly1305.h | 67 +++++++++++++
4055 + include/crypto/poly1305.h | 23 -----
4056 + lib/crypto/Kconfig | 3 +
4057 + lib/crypto/Makefile | 3 +
4058 + lib/crypto/poly1305.c | 158 ++++++++++++++++++++++++++++++
4059 + 10 files changed, 248 insertions(+), 216 deletions(-)
4060 + create mode 100644 include/crypto/internal/poly1305.h
4061 + create mode 100644 lib/crypto/poly1305.c
4062 +
4063 +diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
4064 +index 4a1c05dce950..6ccf8eb26324 100644
4065 +--- a/arch/x86/crypto/poly1305_glue.c
4066 ++++ b/arch/x86/crypto/poly1305_glue.c
4067 +@@ -7,8 +7,8 @@
4068 +
4069 + #include <crypto/algapi.h>
4070 + #include <crypto/internal/hash.h>
4071 ++#include <crypto/internal/poly1305.h>
4072 + #include <crypto/internal/simd.h>
4073 +-#include <crypto/poly1305.h>
4074 + #include <linux/crypto.h>
4075 + #include <linux/kernel.h>
4076 + #include <linux/module.h>
4077 +diff --git a/crypto/Kconfig b/crypto/Kconfig
4078 +index 6b5e14cee475..b70b9d7c6e2f 100644
4079 +--- a/crypto/Kconfig
4080 ++++ b/crypto/Kconfig
4081 +@@ -446,7 +446,7 @@ config CRYPTO_KEYWRAP
4082 + config CRYPTO_NHPOLY1305
4083 + tristate
4084 + select CRYPTO_HASH
4085 +- select CRYPTO_POLY1305
4086 ++ select CRYPTO_LIB_POLY1305_GENERIC
4087 +
4088 + config CRYPTO_NHPOLY1305_SSE2
4089 + tristate "NHPoly1305 hash function (x86_64 SSE2 implementation)"
4090 +@@ -467,7 +467,7 @@ config CRYPTO_NHPOLY1305_AVX2
4091 + config CRYPTO_ADIANTUM
4092 + tristate "Adiantum support"
4093 + select CRYPTO_CHACHA20
4094 +- select CRYPTO_POLY1305
4095 ++ select CRYPTO_LIB_POLY1305_GENERIC
4096 + select CRYPTO_NHPOLY1305
4097 + select CRYPTO_MANAGER
4098 + help
4099 +@@ -686,6 +686,7 @@ config CRYPTO_GHASH
4100 + config CRYPTO_POLY1305
4101 + tristate "Poly1305 authenticator algorithm"
4102 + select CRYPTO_HASH
4103 ++ select CRYPTO_LIB_POLY1305_GENERIC
4104 + help
4105 + Poly1305 authenticator algorithm, RFC7539.
4106 +
4107 +diff --git a/crypto/adiantum.c b/crypto/adiantum.c
4108 +index 395a3ddd3707..aded26092268 100644
4109 +--- a/crypto/adiantum.c
4110 ++++ b/crypto/adiantum.c
4111 +@@ -33,6 +33,7 @@
4112 + #include <crypto/b128ops.h>
4113 + #include <crypto/chacha.h>
4114 + #include <crypto/internal/hash.h>
4115 ++#include <crypto/internal/poly1305.h>
4116 + #include <crypto/internal/skcipher.h>
4117 + #include <crypto/nhpoly1305.h>
4118 + #include <crypto/scatterwalk.h>
4119 +@@ -242,11 +243,11 @@ static void adiantum_hash_header(struct skcipher_request *req)
4120 +
4121 + BUILD_BUG_ON(sizeof(header) % POLY1305_BLOCK_SIZE != 0);
4122 + poly1305_core_blocks(&state, &tctx->header_hash_key,
4123 +- &header, sizeof(header) / POLY1305_BLOCK_SIZE);
4124 ++ &header, sizeof(header) / POLY1305_BLOCK_SIZE, 1);
4125 +
4126 + BUILD_BUG_ON(TWEAK_SIZE % POLY1305_BLOCK_SIZE != 0);
4127 + poly1305_core_blocks(&state, &tctx->header_hash_key, req->iv,
4128 +- TWEAK_SIZE / POLY1305_BLOCK_SIZE);
4129 ++ TWEAK_SIZE / POLY1305_BLOCK_SIZE, 1);
4130 +
4131 + poly1305_core_emit(&state, &rctx->header_hash);
4132 + }
4133 +diff --git a/crypto/nhpoly1305.c b/crypto/nhpoly1305.c
4134 +index 9ab4e07cde4d..f6b6a52092b4 100644
4135 +--- a/crypto/nhpoly1305.c
4136 ++++ b/crypto/nhpoly1305.c
4137 +@@ -33,6 +33,7 @@
4138 + #include <asm/unaligned.h>
4139 + #include <crypto/algapi.h>
4140 + #include <crypto/internal/hash.h>
4141 ++#include <crypto/internal/poly1305.h>
4142 + #include <crypto/nhpoly1305.h>
4143 + #include <linux/crypto.h>
4144 + #include <linux/kernel.h>
4145 +@@ -78,7 +79,7 @@ static void process_nh_hash_value(struct nhpoly1305_state *state,
4146 + BUILD_BUG_ON(NH_HASH_BYTES % POLY1305_BLOCK_SIZE != 0);
4147 +
4148 + poly1305_core_blocks(&state->poly_state, &key->poly_key, state->nh_hash,
4149 +- NH_HASH_BYTES / POLY1305_BLOCK_SIZE);
4150 ++ NH_HASH_BYTES / POLY1305_BLOCK_SIZE, 1);
4151 + }
4152 +
4153 + /*
4154 +diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
4155 +index adc40298c749..067f493c2504 100644
4156 +--- a/crypto/poly1305_generic.c
4157 ++++ b/crypto/poly1305_generic.c
4158 +@@ -13,27 +13,12 @@
4159 +
4160 + #include <crypto/algapi.h>
4161 + #include <crypto/internal/hash.h>
4162 +-#include <crypto/poly1305.h>
4163 ++#include <crypto/internal/poly1305.h>
4164 + #include <linux/crypto.h>
4165 + #include <linux/kernel.h>
4166 + #include <linux/module.h>
4167 + #include <asm/unaligned.h>
4168 +
4169 +-static inline u64 mlt(u64 a, u64 b)
4170 +-{
4171 +- return a * b;
4172 +-}
4173 +-
4174 +-static inline u32 sr(u64 v, u_char n)
4175 +-{
4176 +- return v >> n;
4177 +-}
4178 +-
4179 +-static inline u32 and(u32 v, u32 mask)
4180 +-{
4181 +- return v & mask;
4182 +-}
4183 +-
4184 + int crypto_poly1305_init(struct shash_desc *desc)
4185 + {
4186 + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
4187 +@@ -47,124 +32,8 @@ int crypto_poly1305_init(struct shash_desc *desc)
4188 + }
4189 + EXPORT_SYMBOL_GPL(crypto_poly1305_init);
4190 +
4191 +-void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key)
4192 +-{
4193 +- /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
4194 +- key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff;
4195 +- key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03;
4196 +- key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff;
4197 +- key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff;
4198 +- key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff;
4199 +-}
4200 +-EXPORT_SYMBOL_GPL(poly1305_core_setkey);
4201 +-
4202 +-/*
4203 +- * Poly1305 requires a unique key for each tag, which implies that we can't set
4204 +- * it on the tfm that gets accessed by multiple users simultaneously. Instead we
4205 +- * expect the key as the first 32 bytes in the update() call.
4206 +- */
4207 +-unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
4208 +- const u8 *src, unsigned int srclen)
4209 +-{
4210 +- if (!dctx->sset) {
4211 +- if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
4212 +- poly1305_core_setkey(&dctx->r, src);
4213 +- src += POLY1305_BLOCK_SIZE;
4214 +- srclen -= POLY1305_BLOCK_SIZE;
4215 +- dctx->rset = true;
4216 +- }
4217 +- if (srclen >= POLY1305_BLOCK_SIZE) {
4218 +- dctx->s[0] = get_unaligned_le32(src + 0);
4219 +- dctx->s[1] = get_unaligned_le32(src + 4);
4220 +- dctx->s[2] = get_unaligned_le32(src + 8);
4221 +- dctx->s[3] = get_unaligned_le32(src + 12);
4222 +- src += POLY1305_BLOCK_SIZE;
4223 +- srclen -= POLY1305_BLOCK_SIZE;
4224 +- dctx->sset = true;
4225 +- }
4226 +- }
4227 +- return srclen;
4228 +-}
4229 +-EXPORT_SYMBOL_GPL(crypto_poly1305_setdesckey);
4230 +-
4231 +-static void poly1305_blocks_internal(struct poly1305_state *state,
4232 +- const struct poly1305_key *key,
4233 +- const void *src, unsigned int nblocks,
4234 +- u32 hibit)
4235 +-{
4236 +- u32 r0, r1, r2, r3, r4;
4237 +- u32 s1, s2, s3, s4;
4238 +- u32 h0, h1, h2, h3, h4;
4239 +- u64 d0, d1, d2, d3, d4;
4240 +-
4241 +- if (!nblocks)
4242 +- return;
4243 +-
4244 +- r0 = key->r[0];
4245 +- r1 = key->r[1];
4246 +- r2 = key->r[2];
4247 +- r3 = key->r[3];
4248 +- r4 = key->r[4];
4249 +-
4250 +- s1 = r1 * 5;
4251 +- s2 = r2 * 5;
4252 +- s3 = r3 * 5;
4253 +- s4 = r4 * 5;
4254 +-
4255 +- h0 = state->h[0];
4256 +- h1 = state->h[1];
4257 +- h2 = state->h[2];
4258 +- h3 = state->h[3];
4259 +- h4 = state->h[4];
4260 +-
4261 +- do {
4262 +- /* h += m[i] */
4263 +- h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff;
4264 +- h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff;
4265 +- h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff;
4266 +- h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff;
4267 +- h4 += (get_unaligned_le32(src + 12) >> 8) | hibit;
4268 +-
4269 +- /* h *= r */
4270 +- d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
4271 +- mlt(h3, s2) + mlt(h4, s1);
4272 +- d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) +
4273 +- mlt(h3, s3) + mlt(h4, s2);
4274 +- d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) +
4275 +- mlt(h3, s4) + mlt(h4, s3);
4276 +- d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) +
4277 +- mlt(h3, r0) + mlt(h4, s4);
4278 +- d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) +
4279 +- mlt(h3, r1) + mlt(h4, r0);
4280 +-
4281 +- /* (partial) h %= p */
4282 +- d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff);
4283 +- d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff);
4284 +- d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff);
4285 +- d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff);
4286 +- h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff);
4287 +- h1 += h0 >> 26; h0 = h0 & 0x3ffffff;
4288 +-
4289 +- src += POLY1305_BLOCK_SIZE;
4290 +- } while (--nblocks);
4291 +-
4292 +- state->h[0] = h0;
4293 +- state->h[1] = h1;
4294 +- state->h[2] = h2;
4295 +- state->h[3] = h3;
4296 +- state->h[4] = h4;
4297 +-}
4298 +-
4299 +-void poly1305_core_blocks(struct poly1305_state *state,
4300 +- const struct poly1305_key *key,
4301 +- const void *src, unsigned int nblocks)
4302 +-{
4303 +- poly1305_blocks_internal(state, key, src, nblocks, 1 << 24);
4304 +-}
4305 +-EXPORT_SYMBOL_GPL(poly1305_core_blocks);
4306 +-
4307 +-static void poly1305_blocks(struct poly1305_desc_ctx *dctx,
4308 +- const u8 *src, unsigned int srclen, u32 hibit)
4309 ++static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
4310 ++ unsigned int srclen)
4311 + {
4312 + unsigned int datalen;
4313 +
4314 +@@ -174,8 +43,8 @@ static void poly1305_blocks(struct poly1305_desc_ctx *dctx,
4315 + srclen = datalen;
4316 + }
4317 +
4318 +- poly1305_blocks_internal(&dctx->h, &dctx->r,
4319 +- src, srclen / POLY1305_BLOCK_SIZE, hibit);
4320 ++ poly1305_core_blocks(&dctx->h, &dctx->r, src,
4321 ++ srclen / POLY1305_BLOCK_SIZE, 1);
4322 + }
4323 +
4324 + int crypto_poly1305_update(struct shash_desc *desc,
4325 +@@ -193,13 +62,13 @@ int crypto_poly1305_update(struct shash_desc *desc,
4326 +
4327 + if (dctx->buflen == POLY1305_BLOCK_SIZE) {
4328 + poly1305_blocks(dctx, dctx->buf,
4329 +- POLY1305_BLOCK_SIZE, 1 << 24);
4330 ++ POLY1305_BLOCK_SIZE);
4331 + dctx->buflen = 0;
4332 + }
4333 + }
4334 +
4335 + if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
4336 +- poly1305_blocks(dctx, src, srclen, 1 << 24);
4337 ++ poly1305_blocks(dctx, src, srclen);
4338 + src += srclen - (srclen % POLY1305_BLOCK_SIZE);
4339 + srclen %= POLY1305_BLOCK_SIZE;
4340 + }
4341 +@@ -213,54 +82,6 @@ int crypto_poly1305_update(struct shash_desc *desc,
4342 + }
4343 + EXPORT_SYMBOL_GPL(crypto_poly1305_update);
4344 +
4345 +-void poly1305_core_emit(const struct poly1305_state *state, void *dst)
4346 +-{
4347 +- u32 h0, h1, h2, h3, h4;
4348 +- u32 g0, g1, g2, g3, g4;
4349 +- u32 mask;
4350 +-
4351 +- /* fully carry h */
4352 +- h0 = state->h[0];
4353 +- h1 = state->h[1];
4354 +- h2 = state->h[2];
4355 +- h3 = state->h[3];
4356 +- h4 = state->h[4];
4357 +-
4358 +- h2 += (h1 >> 26); h1 = h1 & 0x3ffffff;
4359 +- h3 += (h2 >> 26); h2 = h2 & 0x3ffffff;
4360 +- h4 += (h3 >> 26); h3 = h3 & 0x3ffffff;
4361 +- h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff;
4362 +- h1 += (h0 >> 26); h0 = h0 & 0x3ffffff;
4363 +-
4364 +- /* compute h + -p */
4365 +- g0 = h0 + 5;
4366 +- g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff;
4367 +- g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff;
4368 +- g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff;
4369 +- g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff;
4370 +-
4371 +- /* select h if h < p, or h + -p if h >= p */
4372 +- mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
4373 +- g0 &= mask;
4374 +- g1 &= mask;
4375 +- g2 &= mask;
4376 +- g3 &= mask;
4377 +- g4 &= mask;
4378 +- mask = ~mask;
4379 +- h0 = (h0 & mask) | g0;
4380 +- h1 = (h1 & mask) | g1;
4381 +- h2 = (h2 & mask) | g2;
4382 +- h3 = (h3 & mask) | g3;
4383 +- h4 = (h4 & mask) | g4;
4384 +-
4385 +- /* h = h % (2^128) */
4386 +- put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0);
4387 +- put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4);
4388 +- put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8);
4389 +- put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12);
4390 +-}
4391 +-EXPORT_SYMBOL_GPL(poly1305_core_emit);
4392 +-
4393 + int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
4394 + {
4395 + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
4396 +@@ -274,7 +95,7 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
4397 + dctx->buf[dctx->buflen++] = 1;
4398 + memset(dctx->buf + dctx->buflen, 0,
4399 + POLY1305_BLOCK_SIZE - dctx->buflen);
4400 +- poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 0);
4401 ++ poly1305_core_blocks(&dctx->h, &dctx->r, dctx->buf, 1, 0);
4402 + }
4403 +
4404 + poly1305_core_emit(&dctx->h, digest);
4405 +diff --git a/include/crypto/internal/poly1305.h b/include/crypto/internal/poly1305.h
4406 +new file mode 100644
4407 +index 000000000000..cb58e61f73a7
4408 +--- /dev/null
4409 ++++ b/include/crypto/internal/poly1305.h
4410 +@@ -0,0 +1,67 @@
4411 ++/* SPDX-License-Identifier: GPL-2.0 */
4412 ++/*
4413 ++ * Common values for the Poly1305 algorithm
4414 ++ */
4415 ++
4416 ++#ifndef _CRYPTO_INTERNAL_POLY1305_H
4417 ++#define _CRYPTO_INTERNAL_POLY1305_H
4418 ++
4419 ++#include <asm/unaligned.h>
4420 ++#include <linux/types.h>
4421 ++#include <crypto/poly1305.h>
4422 ++
4423 ++struct shash_desc;
4424 ++
4425 ++/*
4426 ++ * Poly1305 core functions. These implement the ε-almost-∆-universal hash
4427 ++ * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce
4428 ++ * ("s key") at the end. They also only support block-aligned inputs.
4429 ++ */
4430 ++void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key);
4431 ++static inline void poly1305_core_init(struct poly1305_state *state)
4432 ++{
4433 ++ *state = (struct poly1305_state){};
4434 ++}
4435 ++
4436 ++void poly1305_core_blocks(struct poly1305_state *state,
4437 ++ const struct poly1305_key *key, const void *src,
4438 ++ unsigned int nblocks, u32 hibit);
4439 ++void poly1305_core_emit(const struct poly1305_state *state, void *dst);
4440 ++
4441 ++/* Crypto API helper functions for the Poly1305 MAC */
4442 ++int crypto_poly1305_init(struct shash_desc *desc);
4443 ++
4444 ++int crypto_poly1305_update(struct shash_desc *desc,
4445 ++ const u8 *src, unsigned int srclen);
4446 ++int crypto_poly1305_final(struct shash_desc *desc, u8 *dst);
4447 ++
4448 ++/*
4449 ++ * Poly1305 requires a unique key for each tag, which implies that we can't set
4450 ++ * it on the tfm that gets accessed by multiple users simultaneously. Instead we
4451 ++ * expect the key as the first 32 bytes in the update() call.
4452 ++ */
4453 ++static inline
4454 ++unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
4455 ++ const u8 *src, unsigned int srclen)
4456 ++{
4457 ++ if (!dctx->sset) {
4458 ++ if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
4459 ++ poly1305_core_setkey(&dctx->r, src);
4460 ++ src += POLY1305_BLOCK_SIZE;
4461 ++ srclen -= POLY1305_BLOCK_SIZE;
4462 ++ dctx->rset = true;
4463 ++ }
4464 ++ if (srclen >= POLY1305_BLOCK_SIZE) {
4465 ++ dctx->s[0] = get_unaligned_le32(src + 0);
4466 ++ dctx->s[1] = get_unaligned_le32(src + 4);
4467 ++ dctx->s[2] = get_unaligned_le32(src + 8);
4468 ++ dctx->s[3] = get_unaligned_le32(src + 12);
4469 ++ src += POLY1305_BLOCK_SIZE;
4470 ++ srclen -= POLY1305_BLOCK_SIZE;
4471 ++ dctx->sset = true;
4472 ++ }
4473 ++ }
4474 ++ return srclen;
4475 ++}
4476 ++
4477 ++#endif
4478 +diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h
4479 +index 34317ed2071e..f5a4319c2a1f 100644
4480 +--- a/include/crypto/poly1305.h
4481 ++++ b/include/crypto/poly1305.h
4482 +@@ -38,27 +38,4 @@ struct poly1305_desc_ctx {
4483 + bool sset;
4484 + };
4485 +
4486 +-/*
4487 +- * Poly1305 core functions. These implement the ε-almost-∆-universal hash
4488 +- * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce
4489 +- * ("s key") at the end. They also only support block-aligned inputs.
4490 +- */
4491 +-void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key);
4492 +-static inline void poly1305_core_init(struct poly1305_state *state)
4493 +-{
4494 +- memset(state->h, 0, sizeof(state->h));
4495 +-}
4496 +-void poly1305_core_blocks(struct poly1305_state *state,
4497 +- const struct poly1305_key *key,
4498 +- const void *src, unsigned int nblocks);
4499 +-void poly1305_core_emit(const struct poly1305_state *state, void *dst);
4500 +-
4501 +-/* Crypto API helper functions for the Poly1305 MAC */
4502 +-int crypto_poly1305_init(struct shash_desc *desc);
4503 +-unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
4504 +- const u8 *src, unsigned int srclen);
4505 +-int crypto_poly1305_update(struct shash_desc *desc,
4506 +- const u8 *src, unsigned int srclen);
4507 +-int crypto_poly1305_final(struct shash_desc *desc, u8 *dst);
4508 +-
4509 + #endif
4510 +diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
4511 +index 6a11931ae105..c4882d29879e 100644
4512 +--- a/lib/crypto/Kconfig
4513 ++++ b/lib/crypto/Kconfig
4514 +@@ -37,5 +37,8 @@ config CRYPTO_LIB_CHACHA
4515 + config CRYPTO_LIB_DES
4516 + tristate
4517 +
4518 ++config CRYPTO_LIB_POLY1305_GENERIC
4519 ++ tristate
4520 ++
4521 + config CRYPTO_LIB_SHA256
4522 + tristate
4523 +diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
4524 +index 0ce40604e104..b58ab6843a9d 100644
4525 +--- a/lib/crypto/Makefile
4526 ++++ b/lib/crypto/Makefile
4527 +@@ -13,5 +13,8 @@ libarc4-y := arc4.o
4528 + obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o
4529 + libdes-y := des.o
4530 +
4531 ++obj-$(CONFIG_CRYPTO_LIB_POLY1305_GENERIC) += libpoly1305.o
4532 ++libpoly1305-y := poly1305.o
4533 ++
4534 + obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o
4535 + libsha256-y := sha256.o
4536 +diff --git a/lib/crypto/poly1305.c b/lib/crypto/poly1305.c
4537 +new file mode 100644
4538 +index 000000000000..f019a57dbc1b
4539 +--- /dev/null
4540 ++++ b/lib/crypto/poly1305.c
4541 +@@ -0,0 +1,158 @@
4542 ++// SPDX-License-Identifier: GPL-2.0-or-later
4543 ++/*
4544 ++ * Poly1305 authenticator algorithm, RFC7539
4545 ++ *
4546 ++ * Copyright (C) 2015 Martin Willi
4547 ++ *
4548 ++ * Based on public domain code by Andrew Moon and Daniel J. Bernstein.
4549 ++ */
4550 ++
4551 ++#include <crypto/internal/poly1305.h>
4552 ++#include <linux/kernel.h>
4553 ++#include <linux/module.h>
4554 ++#include <asm/unaligned.h>
4555 ++
4556 ++static inline u64 mlt(u64 a, u64 b)
4557 ++{
4558 ++ return a * b;
4559 ++}
4560 ++
4561 ++static inline u32 sr(u64 v, u_char n)
4562 ++{
4563 ++ return v >> n;
4564 ++}
4565 ++
4566 ++static inline u32 and(u32 v, u32 mask)
4567 ++{
4568 ++ return v & mask;
4569 ++}
4570 ++
4571 ++void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key)
4572 ++{
4573 ++ /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
4574 ++ key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff;
4575 ++ key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03;
4576 ++ key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff;
4577 ++ key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff;
4578 ++ key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff;
4579 ++}
4580 ++EXPORT_SYMBOL_GPL(poly1305_core_setkey);
4581 ++
4582 ++void poly1305_core_blocks(struct poly1305_state *state,
4583 ++ const struct poly1305_key *key, const void *src,
4584 ++ unsigned int nblocks, u32 hibit)
4585 ++{
4586 ++ u32 r0, r1, r2, r3, r4;
4587 ++ u32 s1, s2, s3, s4;
4588 ++ u32 h0, h1, h2, h3, h4;
4589 ++ u64 d0, d1, d2, d3, d4;
4590 ++
4591 ++ if (!nblocks)
4592 ++ return;
4593 ++
4594 ++ r0 = key->r[0];
4595 ++ r1 = key->r[1];
4596 ++ r2 = key->r[2];
4597 ++ r3 = key->r[3];
4598 ++ r4 = key->r[4];
4599 ++
4600 ++ s1 = r1 * 5;
4601 ++ s2 = r2 * 5;
4602 ++ s3 = r3 * 5;
4603 ++ s4 = r4 * 5;
4604 ++
4605 ++ h0 = state->h[0];
4606 ++ h1 = state->h[1];
4607 ++ h2 = state->h[2];
4608 ++ h3 = state->h[3];
4609 ++ h4 = state->h[4];
4610 ++
4611 ++ do {
4612 ++ /* h += m[i] */
4613 ++ h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff;
4614 ++ h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff;
4615 ++ h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff;
4616 ++ h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff;
4617 ++ h4 += (get_unaligned_le32(src + 12) >> 8) | (hibit << 24);
4618 ++
4619 ++ /* h *= r */
4620 ++ d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
4621 ++ mlt(h3, s2) + mlt(h4, s1);
4622 ++ d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) +
4623 ++ mlt(h3, s3) + mlt(h4, s2);
4624 ++ d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) +
4625 ++ mlt(h3, s4) + mlt(h4, s3);
4626 ++ d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) +
4627 ++ mlt(h3, r0) + mlt(h4, s4);
4628 ++ d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) +
4629 ++ mlt(h3, r1) + mlt(h4, r0);
4630 ++
4631 ++ /* (partial) h %= p */
4632 ++ d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff);
4633 ++ d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff);
4634 ++ d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff);
4635 ++ d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff);
4636 ++ h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff);
4637 ++ h1 += h0 >> 26; h0 = h0 & 0x3ffffff;
4638 ++
4639 ++ src += POLY1305_BLOCK_SIZE;
4640 ++ } while (--nblocks);
4641 ++
4642 ++ state->h[0] = h0;
4643 ++ state->h[1] = h1;
4644 ++ state->h[2] = h2;
4645 ++ state->h[3] = h3;
4646 ++ state->h[4] = h4;
4647 ++}
4648 ++EXPORT_SYMBOL_GPL(poly1305_core_blocks);
4649 ++
4650 ++void poly1305_core_emit(const struct poly1305_state *state, void *dst)
4651 ++{
4652 ++ u32 h0, h1, h2, h3, h4;
4653 ++ u32 g0, g1, g2, g3, g4;
4654 ++ u32 mask;
4655 ++
4656 ++ /* fully carry h */
4657 ++ h0 = state->h[0];
4658 ++ h1 = state->h[1];
4659 ++ h2 = state->h[2];
4660 ++ h3 = state->h[3];
4661 ++ h4 = state->h[4];
4662 ++
4663 ++ h2 += (h1 >> 26); h1 = h1 & 0x3ffffff;
4664 ++ h3 += (h2 >> 26); h2 = h2 & 0x3ffffff;
4665 ++ h4 += (h3 >> 26); h3 = h3 & 0x3ffffff;
4666 ++ h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff;
4667 ++ h1 += (h0 >> 26); h0 = h0 & 0x3ffffff;
4668 ++
4669 ++ /* compute h + -p */
4670 ++ g0 = h0 + 5;
4671 ++ g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff;
4672 ++ g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff;
4673 ++ g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff;
4674 ++ g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff;
4675 ++
4676 ++ /* select h if h < p, or h + -p if h >= p */
4677 ++ mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
4678 ++ g0 &= mask;
4679 ++ g1 &= mask;
4680 ++ g2 &= mask;
4681 ++ g3 &= mask;
4682 ++ g4 &= mask;
4683 ++ mask = ~mask;
4684 ++ h0 = (h0 & mask) | g0;
4685 ++ h1 = (h1 & mask) | g1;
4686 ++ h2 = (h2 & mask) | g2;
4687 ++ h3 = (h3 & mask) | g3;
4688 ++ h4 = (h4 & mask) | g4;
4689 ++
4690 ++ /* h = h % (2^128) */
4691 ++ put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0);
4692 ++ put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4);
4693 ++ put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8);
4694 ++ put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12);
4695 ++}
4696 ++EXPORT_SYMBOL_GPL(poly1305_core_emit);
4697 ++
4698 ++MODULE_LICENSE("GPL");
4699 ++MODULE_AUTHOR("Martin Willi <martin@××××××××××.org>");
4700 +--
4701 +cgit v1.2.3-4-ga26e
4702 +
4703 +
4704 +From 8fcde4632d851b3cfd29d7de883d1803a50a55b1 Mon Sep 17 00:00:00 2001
4705 +From: Ard Biesheuvel <ardb@××××××.org>
4706 +Date: Fri, 8 Nov 2019 13:22:20 +0100
4707 +Subject: crypto: x86/poly1305 - unify Poly1305 state struct with generic code
4708 +
4709 +commit ad8f5b88383ea685f2b8df2a12ee3e08089a1287 upstream.
4710 +
4711 +In preparation of exposing a Poly1305 library interface directly from
4712 +the accelerated x86 driver, align the state descriptor of the x86 code
4713 +with the one used by the generic driver. This is needed to make the
4714 +library interface unified between all implementations.
4715 +
4716 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
4717 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
4718 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
4719 +---
4720 + arch/x86/crypto/poly1305_glue.c | 88 +++++++++++++-------------------------
4721 + crypto/poly1305_generic.c | 6 +--
4722 + include/crypto/internal/poly1305.h | 4 +-
4723 + include/crypto/poly1305.h | 18 ++++----
4724 + 4 files changed, 43 insertions(+), 73 deletions(-)
4725 +
4726 +diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
4727 +index 6ccf8eb26324..b43b93c95e79 100644
4728 +--- a/arch/x86/crypto/poly1305_glue.c
4729 ++++ b/arch/x86/crypto/poly1305_glue.c
4730 +@@ -14,40 +14,14 @@
4731 + #include <linux/module.h>
4732 + #include <asm/simd.h>
4733 +
4734 +-struct poly1305_simd_desc_ctx {
4735 +- struct poly1305_desc_ctx base;
4736 +- /* derived key u set? */
4737 +- bool uset;
4738 +-#ifdef CONFIG_AS_AVX2
4739 +- /* derived keys r^3, r^4 set? */
4740 +- bool wset;
4741 +-#endif
4742 +- /* derived Poly1305 key r^2 */
4743 +- u32 u[5];
4744 +- /* ... silently appended r^3 and r^4 when using AVX2 */
4745 +-};
4746 +-
4747 + asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src,
4748 + const u32 *r, unsigned int blocks);
4749 + asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r,
4750 + unsigned int blocks, const u32 *u);
4751 +-#ifdef CONFIG_AS_AVX2
4752 + asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r,
4753 + unsigned int blocks, const u32 *u);
4754 +-static bool poly1305_use_avx2;
4755 +-#endif
4756 +
4757 +-static int poly1305_simd_init(struct shash_desc *desc)
4758 +-{
4759 +- struct poly1305_simd_desc_ctx *sctx = shash_desc_ctx(desc);
4760 +-
4761 +- sctx->uset = false;
4762 +-#ifdef CONFIG_AS_AVX2
4763 +- sctx->wset = false;
4764 +-#endif
4765 +-
4766 +- return crypto_poly1305_init(desc);
4767 +-}
4768 ++static bool poly1305_use_avx2 __ro_after_init;
4769 +
4770 + static void poly1305_simd_mult(u32 *a, const u32 *b)
4771 + {
4772 +@@ -63,53 +37,49 @@ static void poly1305_simd_mult(u32 *a, const u32 *b)
4773 + static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
4774 + const u8 *src, unsigned int srclen)
4775 + {
4776 +- struct poly1305_simd_desc_ctx *sctx;
4777 + unsigned int blocks, datalen;
4778 +
4779 +- BUILD_BUG_ON(offsetof(struct poly1305_simd_desc_ctx, base));
4780 +- sctx = container_of(dctx, struct poly1305_simd_desc_ctx, base);
4781 +-
4782 + if (unlikely(!dctx->sset)) {
4783 + datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
4784 + src += srclen - datalen;
4785 + srclen = datalen;
4786 + }
4787 +
4788 +-#ifdef CONFIG_AS_AVX2
4789 +- if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) {
4790 +- if (unlikely(!sctx->wset)) {
4791 +- if (!sctx->uset) {
4792 +- memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
4793 +- poly1305_simd_mult(sctx->u, dctx->r.r);
4794 +- sctx->uset = true;
4795 ++ if (IS_ENABLED(CONFIG_AS_AVX2) &&
4796 ++ poly1305_use_avx2 &&
4797 ++ srclen >= POLY1305_BLOCK_SIZE * 4) {
4798 ++ if (unlikely(dctx->rset < 4)) {
4799 ++ if (dctx->rset < 2) {
4800 ++ dctx->r[1] = dctx->r[0];
4801 ++ poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
4802 + }
4803 +- memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u));
4804 +- poly1305_simd_mult(sctx->u + 5, dctx->r.r);
4805 +- memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u));
4806 +- poly1305_simd_mult(sctx->u + 10, dctx->r.r);
4807 +- sctx->wset = true;
4808 ++ dctx->r[2] = dctx->r[1];
4809 ++ poly1305_simd_mult(dctx->r[2].r, dctx->r[0].r);
4810 ++ dctx->r[3] = dctx->r[2];
4811 ++ poly1305_simd_mult(dctx->r[3].r, dctx->r[0].r);
4812 ++ dctx->rset = 4;
4813 + }
4814 + blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
4815 +- poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks,
4816 +- sctx->u);
4817 ++ poly1305_4block_avx2(dctx->h.h, src, dctx->r[0].r, blocks,
4818 ++ dctx->r[1].r);
4819 + src += POLY1305_BLOCK_SIZE * 4 * blocks;
4820 + srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
4821 + }
4822 +-#endif
4823 ++
4824 + if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
4825 +- if (unlikely(!sctx->uset)) {
4826 +- memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
4827 +- poly1305_simd_mult(sctx->u, dctx->r.r);
4828 +- sctx->uset = true;
4829 ++ if (unlikely(dctx->rset < 2)) {
4830 ++ dctx->r[1] = dctx->r[0];
4831 ++ poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
4832 ++ dctx->rset = 2;
4833 + }
4834 + blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
4835 +- poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks,
4836 +- sctx->u);
4837 ++ poly1305_2block_sse2(dctx->h.h, src, dctx->r[0].r,
4838 ++ blocks, dctx->r[1].r);
4839 + src += POLY1305_BLOCK_SIZE * 2 * blocks;
4840 + srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
4841 + }
4842 + if (srclen >= POLY1305_BLOCK_SIZE) {
4843 +- poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1);
4844 ++ poly1305_block_sse2(dctx->h.h, src, dctx->r[0].r, 1);
4845 + srclen -= POLY1305_BLOCK_SIZE;
4846 + }
4847 + return srclen;
4848 +@@ -159,10 +129,10 @@ static int poly1305_simd_update(struct shash_desc *desc,
4849 +
4850 + static struct shash_alg alg = {
4851 + .digestsize = POLY1305_DIGEST_SIZE,
4852 +- .init = poly1305_simd_init,
4853 ++ .init = crypto_poly1305_init,
4854 + .update = poly1305_simd_update,
4855 + .final = crypto_poly1305_final,
4856 +- .descsize = sizeof(struct poly1305_simd_desc_ctx),
4857 ++ .descsize = sizeof(struct poly1305_desc_ctx),
4858 + .base = {
4859 + .cra_name = "poly1305",
4860 + .cra_driver_name = "poly1305-simd",
4861 +@@ -177,14 +147,14 @@ static int __init poly1305_simd_mod_init(void)
4862 + if (!boot_cpu_has(X86_FEATURE_XMM2))
4863 + return -ENODEV;
4864 +
4865 +-#ifdef CONFIG_AS_AVX2
4866 +- poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
4867 ++ poly1305_use_avx2 = IS_ENABLED(CONFIG_AS_AVX2) &&
4868 ++ boot_cpu_has(X86_FEATURE_AVX) &&
4869 + boot_cpu_has(X86_FEATURE_AVX2) &&
4870 + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
4871 +- alg.descsize = sizeof(struct poly1305_simd_desc_ctx);
4872 ++ alg.descsize = sizeof(struct poly1305_desc_ctx) + 5 * sizeof(u32);
4873 + if (poly1305_use_avx2)
4874 + alg.descsize += 10 * sizeof(u32);
4875 +-#endif
4876 ++
4877 + return crypto_register_shash(&alg);
4878 + }
4879 +
4880 +diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
4881 +index 067f493c2504..f3fcd9578a47 100644
4882 +--- a/crypto/poly1305_generic.c
4883 ++++ b/crypto/poly1305_generic.c
4884 +@@ -25,7 +25,7 @@ int crypto_poly1305_init(struct shash_desc *desc)
4885 +
4886 + poly1305_core_init(&dctx->h);
4887 + dctx->buflen = 0;
4888 +- dctx->rset = false;
4889 ++ dctx->rset = 0;
4890 + dctx->sset = false;
4891 +
4892 + return 0;
4893 +@@ -43,7 +43,7 @@ static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
4894 + srclen = datalen;
4895 + }
4896 +
4897 +- poly1305_core_blocks(&dctx->h, &dctx->r, src,
4898 ++ poly1305_core_blocks(&dctx->h, dctx->r, src,
4899 + srclen / POLY1305_BLOCK_SIZE, 1);
4900 + }
4901 +
4902 +@@ -95,7 +95,7 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
4903 + dctx->buf[dctx->buflen++] = 1;
4904 + memset(dctx->buf + dctx->buflen, 0,
4905 + POLY1305_BLOCK_SIZE - dctx->buflen);
4906 +- poly1305_core_blocks(&dctx->h, &dctx->r, dctx->buf, 1, 0);
4907 ++ poly1305_core_blocks(&dctx->h, dctx->r, dctx->buf, 1, 0);
4908 + }
4909 +
4910 + poly1305_core_emit(&dctx->h, digest);
4911 +diff --git a/include/crypto/internal/poly1305.h b/include/crypto/internal/poly1305.h
4912 +index cb58e61f73a7..04fa269e5534 100644
4913 +--- a/include/crypto/internal/poly1305.h
4914 ++++ b/include/crypto/internal/poly1305.h
4915 +@@ -46,10 +46,10 @@ unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
4916 + {
4917 + if (!dctx->sset) {
4918 + if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
4919 +- poly1305_core_setkey(&dctx->r, src);
4920 ++ poly1305_core_setkey(dctx->r, src);
4921 + src += POLY1305_BLOCK_SIZE;
4922 + srclen -= POLY1305_BLOCK_SIZE;
4923 +- dctx->rset = true;
4924 ++ dctx->rset = 1;
4925 + }
4926 + if (srclen >= POLY1305_BLOCK_SIZE) {
4927 + dctx->s[0] = get_unaligned_le32(src + 0);
4928 +diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h
4929 +index f5a4319c2a1f..36b5886cb50c 100644
4930 +--- a/include/crypto/poly1305.h
4931 ++++ b/include/crypto/poly1305.h
4932 +@@ -22,20 +22,20 @@ struct poly1305_state {
4933 + };
4934 +
4935 + struct poly1305_desc_ctx {
4936 +- /* key */
4937 +- struct poly1305_key r;
4938 +- /* finalize key */
4939 +- u32 s[4];
4940 +- /* accumulator */
4941 +- struct poly1305_state h;
4942 + /* partial buffer */
4943 + u8 buf[POLY1305_BLOCK_SIZE];
4944 + /* bytes used in partial buffer */
4945 + unsigned int buflen;
4946 +- /* r key has been set */
4947 +- bool rset;
4948 +- /* s key has been set */
4949 ++ /* how many keys have been set in r[] */
4950 ++ unsigned short rset;
4951 ++ /* whether s[] has been set */
4952 + bool sset;
4953 ++ /* finalize key */
4954 ++ u32 s[4];
4955 ++ /* accumulator */
4956 ++ struct poly1305_state h;
4957 ++ /* key */
4958 ++ struct poly1305_key r[1];
4959 + };
4960 +
4961 + #endif
4962 +--
4963 +cgit v1.2.3-4-ga26e
4964 +
4965 +
4966 +From e2a2a1222fddbb59e30814425f55a1e86a1aac30 Mon Sep 17 00:00:00 2001
4967 +From: Ard Biesheuvel <ardb@××××××.org>
4968 +Date: Fri, 8 Nov 2019 13:22:21 +0100
4969 +Subject: crypto: poly1305 - expose init/update/final library interface
4970 +
4971 +commit a1d93064094cc5e24d64e35cf093e7191d0c9344 upstream.
4972 +
4973 +Expose the existing generic Poly1305 code via a init/update/final
4974 +library interface so that callers are not required to go through
4975 +the crypto API's shash abstraction to access it. At the same time,
4976 +make some preparations so that the library implementation can be
4977 +superseded by an accelerated arch-specific version in the future.
4978 +
4979 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
4980 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
4981 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
4982 +---
4983 + crypto/poly1305_generic.c | 22 +-------------
4984 + include/crypto/poly1305.h | 38 +++++++++++++++++++++++-
4985 + lib/crypto/Kconfig | 26 +++++++++++++++++
4986 + lib/crypto/poly1305.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++
4987 + 4 files changed, 138 insertions(+), 22 deletions(-)
4988 +
4989 +diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
4990 +index f3fcd9578a47..afe9a9e576dd 100644
4991 +--- a/crypto/poly1305_generic.c
4992 ++++ b/crypto/poly1305_generic.c
4993 +@@ -85,31 +85,11 @@ EXPORT_SYMBOL_GPL(crypto_poly1305_update);
4994 + int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
4995 + {
4996 + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
4997 +- __le32 digest[4];
4998 +- u64 f = 0;
4999 +
5000 + if (unlikely(!dctx->sset))
5001 + return -ENOKEY;
5002 +
5003 +- if (unlikely(dctx->buflen)) {
5004 +- dctx->buf[dctx->buflen++] = 1;
5005 +- memset(dctx->buf + dctx->buflen, 0,
5006 +- POLY1305_BLOCK_SIZE - dctx->buflen);
5007 +- poly1305_core_blocks(&dctx->h, dctx->r, dctx->buf, 1, 0);
5008 +- }
5009 +-
5010 +- poly1305_core_emit(&dctx->h, digest);
5011 +-
5012 +- /* mac = (h + s) % (2^128) */
5013 +- f = (f >> 32) + le32_to_cpu(digest[0]) + dctx->s[0];
5014 +- put_unaligned_le32(f, dst + 0);
5015 +- f = (f >> 32) + le32_to_cpu(digest[1]) + dctx->s[1];
5016 +- put_unaligned_le32(f, dst + 4);
5017 +- f = (f >> 32) + le32_to_cpu(digest[2]) + dctx->s[2];
5018 +- put_unaligned_le32(f, dst + 8);
5019 +- f = (f >> 32) + le32_to_cpu(digest[3]) + dctx->s[3];
5020 +- put_unaligned_le32(f, dst + 12);
5021 +-
5022 ++ poly1305_final_generic(dctx, dst);
5023 + return 0;
5024 + }
5025 + EXPORT_SYMBOL_GPL(crypto_poly1305_final);
5026 +diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h
5027 +index 36b5886cb50c..74c6e1cd73ee 100644
5028 +--- a/include/crypto/poly1305.h
5029 ++++ b/include/crypto/poly1305.h
5030 +@@ -35,7 +35,43 @@ struct poly1305_desc_ctx {
5031 + /* accumulator */
5032 + struct poly1305_state h;
5033 + /* key */
5034 +- struct poly1305_key r[1];
5035 ++ struct poly1305_key r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE];
5036 + };
5037 +
5038 ++void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key);
5039 ++void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 *key);
5040 ++
5041 ++static inline void poly1305_init(struct poly1305_desc_ctx *desc, const u8 *key)
5042 ++{
5043 ++ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305))
5044 ++ poly1305_init_arch(desc, key);
5045 ++ else
5046 ++ poly1305_init_generic(desc, key);
5047 ++}
5048 ++
5049 ++void poly1305_update_arch(struct poly1305_desc_ctx *desc, const u8 *src,
5050 ++ unsigned int nbytes);
5051 ++void poly1305_update_generic(struct poly1305_desc_ctx *desc, const u8 *src,
5052 ++ unsigned int nbytes);
5053 ++
5054 ++static inline void poly1305_update(struct poly1305_desc_ctx *desc,
5055 ++ const u8 *src, unsigned int nbytes)
5056 ++{
5057 ++ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305))
5058 ++ poly1305_update_arch(desc, src, nbytes);
5059 ++ else
5060 ++ poly1305_update_generic(desc, src, nbytes);
5061 ++}
5062 ++
5063 ++void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *digest);
5064 ++void poly1305_final_generic(struct poly1305_desc_ctx *desc, u8 *digest);
5065 ++
5066 ++static inline void poly1305_final(struct poly1305_desc_ctx *desc, u8 *digest)
5067 ++{
5068 ++ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305))
5069 ++ poly1305_final_arch(desc, digest);
5070 ++ else
5071 ++ poly1305_final_generic(desc, digest);
5072 ++}
5073 ++
5074 + #endif
5075 +diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
5076 +index c4882d29879e..a731ea36bd5c 100644
5077 +--- a/lib/crypto/Kconfig
5078 ++++ b/lib/crypto/Kconfig
5079 +@@ -37,8 +37,34 @@ config CRYPTO_LIB_CHACHA
5080 + config CRYPTO_LIB_DES
5081 + tristate
5082 +
5083 ++config CRYPTO_LIB_POLY1305_RSIZE
5084 ++ int
5085 ++ default 1
5086 ++
5087 ++config CRYPTO_ARCH_HAVE_LIB_POLY1305
5088 ++ tristate
5089 ++ help
5090 ++ Declares whether the architecture provides an arch-specific
5091 ++ accelerated implementation of the Poly1305 library interface,
5092 ++ either builtin or as a module.
5093 ++
5094 + config CRYPTO_LIB_POLY1305_GENERIC
5095 + tristate
5096 ++ help
5097 ++ This symbol can be depended upon by arch implementations of the
5098 ++ Poly1305 library interface that require the generic code as a
5099 ++ fallback, e.g., for SIMD implementations. If no arch specific
5100 ++ implementation is enabled, this implementation serves the users
5101 ++ of CRYPTO_LIB_POLY1305.
5102 ++
5103 ++config CRYPTO_LIB_POLY1305
5104 ++ tristate "Poly1305 library interface"
5105 ++ depends on CRYPTO_ARCH_HAVE_LIB_POLY1305 || !CRYPTO_ARCH_HAVE_LIB_POLY1305
5106 ++ select CRYPTO_LIB_POLY1305_GENERIC if CRYPTO_ARCH_HAVE_LIB_POLY1305=n
5107 ++ help
5108 ++ Enable the Poly1305 library interface. This interface may be fulfilled
5109 ++ by either the generic implementation or an arch-specific one, if one
5110 ++ is available and enabled.
5111 +
5112 + config CRYPTO_LIB_SHA256
5113 + tristate
5114 +diff --git a/lib/crypto/poly1305.c b/lib/crypto/poly1305.c
5115 +index f019a57dbc1b..32ec293c65ae 100644
5116 +--- a/lib/crypto/poly1305.c
5117 ++++ b/lib/crypto/poly1305.c
5118 +@@ -154,5 +154,79 @@ void poly1305_core_emit(const struct poly1305_state *state, void *dst)
5119 + }
5120 + EXPORT_SYMBOL_GPL(poly1305_core_emit);
5121 +
5122 ++void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 *key)
5123 ++{
5124 ++ poly1305_core_setkey(desc->r, key);
5125 ++ desc->s[0] = get_unaligned_le32(key + 16);
5126 ++ desc->s[1] = get_unaligned_le32(key + 20);
5127 ++ desc->s[2] = get_unaligned_le32(key + 24);
5128 ++ desc->s[3] = get_unaligned_le32(key + 28);
5129 ++ poly1305_core_init(&desc->h);
5130 ++ desc->buflen = 0;
5131 ++ desc->sset = true;
5132 ++ desc->rset = 1;
5133 ++}
5134 ++EXPORT_SYMBOL_GPL(poly1305_init_generic);
5135 ++
5136 ++void poly1305_update_generic(struct poly1305_desc_ctx *desc, const u8 *src,
5137 ++ unsigned int nbytes)
5138 ++{
5139 ++ unsigned int bytes;
5140 ++
5141 ++ if (unlikely(desc->buflen)) {
5142 ++ bytes = min(nbytes, POLY1305_BLOCK_SIZE - desc->buflen);
5143 ++ memcpy(desc->buf + desc->buflen, src, bytes);
5144 ++ src += bytes;
5145 ++ nbytes -= bytes;
5146 ++ desc->buflen += bytes;
5147 ++
5148 ++ if (desc->buflen == POLY1305_BLOCK_SIZE) {
5149 ++ poly1305_core_blocks(&desc->h, desc->r, desc->buf, 1, 1);
5150 ++ desc->buflen = 0;
5151 ++ }
5152 ++ }
5153 ++
5154 ++ if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
5155 ++ poly1305_core_blocks(&desc->h, desc->r, src,
5156 ++ nbytes / POLY1305_BLOCK_SIZE, 1);
5157 ++ src += nbytes - (nbytes % POLY1305_BLOCK_SIZE);
5158 ++ nbytes %= POLY1305_BLOCK_SIZE;
5159 ++ }
5160 ++
5161 ++ if (unlikely(nbytes)) {
5162 ++ desc->buflen = nbytes;
5163 ++ memcpy(desc->buf, src, nbytes);
5164 ++ }
5165 ++}
5166 ++EXPORT_SYMBOL_GPL(poly1305_update_generic);
5167 ++
5168 ++void poly1305_final_generic(struct poly1305_desc_ctx *desc, u8 *dst)
5169 ++{
5170 ++ __le32 digest[4];
5171 ++ u64 f = 0;
5172 ++
5173 ++ if (unlikely(desc->buflen)) {
5174 ++ desc->buf[desc->buflen++] = 1;
5175 ++ memset(desc->buf + desc->buflen, 0,
5176 ++ POLY1305_BLOCK_SIZE - desc->buflen);
5177 ++ poly1305_core_blocks(&desc->h, desc->r, desc->buf, 1, 0);
5178 ++ }
5179 ++
5180 ++ poly1305_core_emit(&desc->h, digest);
5181 ++
5182 ++ /* mac = (h + s) % (2^128) */
5183 ++ f = (f >> 32) + le32_to_cpu(digest[0]) + desc->s[0];
5184 ++ put_unaligned_le32(f, dst + 0);
5185 ++ f = (f >> 32) + le32_to_cpu(digest[1]) + desc->s[1];
5186 ++ put_unaligned_le32(f, dst + 4);
5187 ++ f = (f >> 32) + le32_to_cpu(digest[2]) + desc->s[2];
5188 ++ put_unaligned_le32(f, dst + 8);
5189 ++ f = (f >> 32) + le32_to_cpu(digest[3]) + desc->s[3];
5190 ++ put_unaligned_le32(f, dst + 12);
5191 ++
5192 ++ *desc = (struct poly1305_desc_ctx){};
5193 ++}
5194 ++EXPORT_SYMBOL_GPL(poly1305_final_generic);
5195 ++
5196 + MODULE_LICENSE("GPL");
5197 + MODULE_AUTHOR("Martin Willi <martin@××××××××××.org>");
5198 +--
5199 +cgit v1.2.3-4-ga26e
5200 +
5201 +
5202 +From e9691178ff9689501674a2f66d69fa10ad298e98 Mon Sep 17 00:00:00 2001
5203 +From: Ard Biesheuvel <ardb@××××××.org>
5204 +Date: Fri, 8 Nov 2019 13:22:22 +0100
5205 +Subject: crypto: x86/poly1305 - depend on generic library not generic shash
5206 +
5207 +commit 1b2c6a5120489d41c8ea3b8dacd0b4586289b158 upstream.
5208 +
5209 +Remove the dependency on the generic Poly1305 driver. Instead, depend
5210 +on the generic library so that we only reuse code without pulling in
5211 +the generic skcipher implementation as well.
5212 +
5213 +While at it, remove the logic that prefers the non-SIMD path for short
5214 +inputs - this is no longer necessary after recent FPU handling changes
5215 +on x86.
5216 +
5217 +Since this removes the last remaining user of the routines exported
5218 +by the generic shash driver, unexport them and make them static.
5219 +
5220 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
5221 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
5222 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
5223 +---
5224 + arch/x86/crypto/poly1305_glue.c | 66 +++++++++++++++++++++++++++++++-------
5225 + crypto/Kconfig | 2 +-
5226 + crypto/poly1305_generic.c | 11 +++----
5227 + include/crypto/internal/poly1305.h | 9 ------
5228 + 4 files changed, 60 insertions(+), 28 deletions(-)
5229 +
5230 +diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
5231 +index b43b93c95e79..a5b3a054604c 100644
5232 +--- a/arch/x86/crypto/poly1305_glue.c
5233 ++++ b/arch/x86/crypto/poly1305_glue.c
5234 +@@ -34,6 +34,24 @@ static void poly1305_simd_mult(u32 *a, const u32 *b)
5235 + poly1305_block_sse2(a, m, b, 1);
5236 + }
5237 +
5238 ++static unsigned int poly1305_scalar_blocks(struct poly1305_desc_ctx *dctx,
5239 ++ const u8 *src, unsigned int srclen)
5240 ++{
5241 ++ unsigned int datalen;
5242 ++
5243 ++ if (unlikely(!dctx->sset)) {
5244 ++ datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
5245 ++ src += srclen - datalen;
5246 ++ srclen = datalen;
5247 ++ }
5248 ++ if (srclen >= POLY1305_BLOCK_SIZE) {
5249 ++ poly1305_core_blocks(&dctx->h, dctx->r, src,
5250 ++ srclen / POLY1305_BLOCK_SIZE, 1);
5251 ++ srclen %= POLY1305_BLOCK_SIZE;
5252 ++ }
5253 ++ return srclen;
5254 ++}
5255 ++
5256 + static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
5257 + const u8 *src, unsigned int srclen)
5258 + {
5259 +@@ -91,12 +109,6 @@ static int poly1305_simd_update(struct shash_desc *desc,
5260 + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
5261 + unsigned int bytes;
5262 +
5263 +- /* kernel_fpu_begin/end is costly, use fallback for small updates */
5264 +- if (srclen <= 288 || !crypto_simd_usable())
5265 +- return crypto_poly1305_update(desc, src, srclen);
5266 +-
5267 +- kernel_fpu_begin();
5268 +-
5269 + if (unlikely(dctx->buflen)) {
5270 + bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
5271 + memcpy(dctx->buf + dctx->buflen, src, bytes);
5272 +@@ -105,25 +117,57 @@ static int poly1305_simd_update(struct shash_desc *desc,
5273 + dctx->buflen += bytes;
5274 +
5275 + if (dctx->buflen == POLY1305_BLOCK_SIZE) {
5276 +- poly1305_simd_blocks(dctx, dctx->buf,
5277 +- POLY1305_BLOCK_SIZE);
5278 ++ if (likely(crypto_simd_usable())) {
5279 ++ kernel_fpu_begin();
5280 ++ poly1305_simd_blocks(dctx, dctx->buf,
5281 ++ POLY1305_BLOCK_SIZE);
5282 ++ kernel_fpu_end();
5283 ++ } else {
5284 ++ poly1305_scalar_blocks(dctx, dctx->buf,
5285 ++ POLY1305_BLOCK_SIZE);
5286 ++ }
5287 + dctx->buflen = 0;
5288 + }
5289 + }
5290 +
5291 + if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
5292 +- bytes = poly1305_simd_blocks(dctx, src, srclen);
5293 ++ if (likely(crypto_simd_usable())) {
5294 ++ kernel_fpu_begin();
5295 ++ bytes = poly1305_simd_blocks(dctx, src, srclen);
5296 ++ kernel_fpu_end();
5297 ++ } else {
5298 ++ bytes = poly1305_scalar_blocks(dctx, src, srclen);
5299 ++ }
5300 + src += srclen - bytes;
5301 + srclen = bytes;
5302 + }
5303 +
5304 +- kernel_fpu_end();
5305 +-
5306 + if (unlikely(srclen)) {
5307 + dctx->buflen = srclen;
5308 + memcpy(dctx->buf, src, srclen);
5309 + }
5310 ++}
5311 ++
5312 ++static int crypto_poly1305_init(struct shash_desc *desc)
5313 ++{
5314 ++ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
5315 ++
5316 ++ poly1305_core_init(&dctx->h);
5317 ++ dctx->buflen = 0;
5318 ++ dctx->rset = 0;
5319 ++ dctx->sset = false;
5320 ++
5321 ++ return 0;
5322 ++}
5323 ++
5324 ++static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
5325 ++{
5326 ++ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
5327 ++
5328 ++ if (unlikely(!dctx->sset))
5329 ++ return -ENOKEY;
5330 +
5331 ++ poly1305_final_generic(dctx, dst);
5332 + return 0;
5333 + }
5334 +
5335 +diff --git a/crypto/Kconfig b/crypto/Kconfig
5336 +index b70b9d7c6e2f..6178aa627141 100644
5337 +--- a/crypto/Kconfig
5338 ++++ b/crypto/Kconfig
5339 +@@ -697,7 +697,7 @@ config CRYPTO_POLY1305
5340 + config CRYPTO_POLY1305_X86_64
5341 + tristate "Poly1305 authenticator algorithm (x86_64/SSE2/AVX2)"
5342 + depends on X86 && 64BIT
5343 +- select CRYPTO_POLY1305
5344 ++ select CRYPTO_LIB_POLY1305_GENERIC
5345 + help
5346 + Poly1305 authenticator algorithm, RFC7539.
5347 +
5348 +diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
5349 +index afe9a9e576dd..21edbd8c99fb 100644
5350 +--- a/crypto/poly1305_generic.c
5351 ++++ b/crypto/poly1305_generic.c
5352 +@@ -19,7 +19,7 @@
5353 + #include <linux/module.h>
5354 + #include <asm/unaligned.h>
5355 +
5356 +-int crypto_poly1305_init(struct shash_desc *desc)
5357 ++static int crypto_poly1305_init(struct shash_desc *desc)
5358 + {
5359 + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
5360 +
5361 +@@ -30,7 +30,6 @@ int crypto_poly1305_init(struct shash_desc *desc)
5362 +
5363 + return 0;
5364 + }
5365 +-EXPORT_SYMBOL_GPL(crypto_poly1305_init);
5366 +
5367 + static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
5368 + unsigned int srclen)
5369 +@@ -47,8 +46,8 @@ static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
5370 + srclen / POLY1305_BLOCK_SIZE, 1);
5371 + }
5372 +
5373 +-int crypto_poly1305_update(struct shash_desc *desc,
5374 +- const u8 *src, unsigned int srclen)
5375 ++static int crypto_poly1305_update(struct shash_desc *desc,
5376 ++ const u8 *src, unsigned int srclen)
5377 + {
5378 + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
5379 + unsigned int bytes;
5380 +@@ -80,9 +79,8 @@ int crypto_poly1305_update(struct shash_desc *desc,
5381 +
5382 + return 0;
5383 + }
5384 +-EXPORT_SYMBOL_GPL(crypto_poly1305_update);
5385 +
5386 +-int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
5387 ++static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
5388 + {
5389 + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
5390 +
5391 +@@ -92,7 +90,6 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
5392 + poly1305_final_generic(dctx, dst);
5393 + return 0;
5394 + }
5395 +-EXPORT_SYMBOL_GPL(crypto_poly1305_final);
5396 +
5397 + static struct shash_alg poly1305_alg = {
5398 + .digestsize = POLY1305_DIGEST_SIZE,
5399 +diff --git a/include/crypto/internal/poly1305.h b/include/crypto/internal/poly1305.h
5400 +index 04fa269e5534..479b0cab2a1a 100644
5401 +--- a/include/crypto/internal/poly1305.h
5402 ++++ b/include/crypto/internal/poly1305.h
5403 +@@ -10,8 +10,6 @@
5404 + #include <linux/types.h>
5405 + #include <crypto/poly1305.h>
5406 +
5407 +-struct shash_desc;
5408 +-
5409 + /*
5410 + * Poly1305 core functions. These implement the ε-almost-∆-universal hash
5411 + * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce
5412 +@@ -28,13 +26,6 @@ void poly1305_core_blocks(struct poly1305_state *state,
5413 + unsigned int nblocks, u32 hibit);
5414 + void poly1305_core_emit(const struct poly1305_state *state, void *dst);
5415 +
5416 +-/* Crypto API helper functions for the Poly1305 MAC */
5417 +-int crypto_poly1305_init(struct shash_desc *desc);
5418 +-
5419 +-int crypto_poly1305_update(struct shash_desc *desc,
5420 +- const u8 *src, unsigned int srclen);
5421 +-int crypto_poly1305_final(struct shash_desc *desc, u8 *dst);
5422 +-
5423 + /*
5424 + * Poly1305 requires a unique key for each tag, which implies that we can't set
5425 + * it on the tfm that gets accessed by multiple users simultaneously. Instead we
5426 +--
5427 +cgit v1.2.3-4-ga26e
5428 +
5429 +
5430 +From e3931599c0fc83536f70d9e787a87625fb219717 Mon Sep 17 00:00:00 2001
5431 +From: Ard Biesheuvel <ardb@××××××.org>
5432 +Date: Fri, 8 Nov 2019 13:22:23 +0100
5433 +Subject: crypto: x86/poly1305 - expose existing driver as poly1305 library
5434 +
5435 +commit f0e89bcfbb894e5844cd1bbf6b3cf7c63cb0f5ac upstream.
5436 +
5437 +Implement the arch init/update/final Poly1305 library routines in the
5438 +accelerated SIMD driver for x86 so they are accessible to users of
5439 +the Poly1305 library interface as well.
5440 +
5441 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
5442 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
5443 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
5444 +---
5445 + arch/x86/crypto/poly1305_glue.c | 57 +++++++++++++++++++++++++++++------------
5446 + crypto/Kconfig | 1 +
5447 + lib/crypto/Kconfig | 1 +
5448 + 3 files changed, 43 insertions(+), 16 deletions(-)
5449 +
5450 +diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
5451 +index a5b3a054604c..370cd88068ec 100644
5452 +--- a/arch/x86/crypto/poly1305_glue.c
5453 ++++ b/arch/x86/crypto/poly1305_glue.c
5454 +@@ -10,6 +10,7 @@
5455 + #include <crypto/internal/poly1305.h>
5456 + #include <crypto/internal/simd.h>
5457 + #include <linux/crypto.h>
5458 ++#include <linux/jump_label.h>
5459 + #include <linux/kernel.h>
5460 + #include <linux/module.h>
5461 + #include <asm/simd.h>
5462 +@@ -21,7 +22,8 @@ asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r,
5463 + asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r,
5464 + unsigned int blocks, const u32 *u);
5465 +
5466 +-static bool poly1305_use_avx2 __ro_after_init;
5467 ++static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_simd);
5468 ++static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2);
5469 +
5470 + static void poly1305_simd_mult(u32 *a, const u32 *b)
5471 + {
5472 +@@ -64,7 +66,7 @@ static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
5473 + }
5474 +
5475 + if (IS_ENABLED(CONFIG_AS_AVX2) &&
5476 +- poly1305_use_avx2 &&
5477 ++ static_branch_likely(&poly1305_use_avx2) &&
5478 + srclen >= POLY1305_BLOCK_SIZE * 4) {
5479 + if (unlikely(dctx->rset < 4)) {
5480 + if (dctx->rset < 2) {
5481 +@@ -103,10 +105,15 @@ static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
5482 + return srclen;
5483 + }
5484 +
5485 +-static int poly1305_simd_update(struct shash_desc *desc,
5486 +- const u8 *src, unsigned int srclen)
5487 ++void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key)
5488 ++{
5489 ++ poly1305_init_generic(desc, key);
5490 ++}
5491 ++EXPORT_SYMBOL(poly1305_init_arch);
5492 ++
5493 ++void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
5494 ++ unsigned int srclen)
5495 + {
5496 +- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
5497 + unsigned int bytes;
5498 +
5499 + if (unlikely(dctx->buflen)) {
5500 +@@ -117,7 +124,8 @@ static int poly1305_simd_update(struct shash_desc *desc,
5501 + dctx->buflen += bytes;
5502 +
5503 + if (dctx->buflen == POLY1305_BLOCK_SIZE) {
5504 +- if (likely(crypto_simd_usable())) {
5505 ++ if (static_branch_likely(&poly1305_use_simd) &&
5506 ++ likely(crypto_simd_usable())) {
5507 + kernel_fpu_begin();
5508 + poly1305_simd_blocks(dctx, dctx->buf,
5509 + POLY1305_BLOCK_SIZE);
5510 +@@ -131,7 +139,8 @@ static int poly1305_simd_update(struct shash_desc *desc,
5511 + }
5512 +
5513 + if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
5514 +- if (likely(crypto_simd_usable())) {
5515 ++ if (static_branch_likely(&poly1305_use_simd) &&
5516 ++ likely(crypto_simd_usable())) {
5517 + kernel_fpu_begin();
5518 + bytes = poly1305_simd_blocks(dctx, src, srclen);
5519 + kernel_fpu_end();
5520 +@@ -147,6 +156,13 @@ static int poly1305_simd_update(struct shash_desc *desc,
5521 + memcpy(dctx->buf, src, srclen);
5522 + }
5523 + }
5524 ++EXPORT_SYMBOL(poly1305_update_arch);
5525 ++
5526 ++void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *digest)
5527 ++{
5528 ++ poly1305_final_generic(desc, digest);
5529 ++}
5530 ++EXPORT_SYMBOL(poly1305_final_arch);
5531 +
5532 + static int crypto_poly1305_init(struct shash_desc *desc)
5533 + {
5534 +@@ -171,6 +187,15 @@ static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
5535 + return 0;
5536 + }
5537 +
5538 ++static int poly1305_simd_update(struct shash_desc *desc,
5539 ++ const u8 *src, unsigned int srclen)
5540 ++{
5541 ++ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
5542 ++
5543 ++ poly1305_update_arch(dctx, src, srclen);
5544 ++ return 0;
5545 ++}
5546 ++
5547 + static struct shash_alg alg = {
5548 + .digestsize = POLY1305_DIGEST_SIZE,
5549 + .init = crypto_poly1305_init,
5550 +@@ -189,15 +214,15 @@ static struct shash_alg alg = {
5551 + static int __init poly1305_simd_mod_init(void)
5552 + {
5553 + if (!boot_cpu_has(X86_FEATURE_XMM2))
5554 +- return -ENODEV;
5555 +-
5556 +- poly1305_use_avx2 = IS_ENABLED(CONFIG_AS_AVX2) &&
5557 +- boot_cpu_has(X86_FEATURE_AVX) &&
5558 +- boot_cpu_has(X86_FEATURE_AVX2) &&
5559 +- cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
5560 +- alg.descsize = sizeof(struct poly1305_desc_ctx) + 5 * sizeof(u32);
5561 +- if (poly1305_use_avx2)
5562 +- alg.descsize += 10 * sizeof(u32);
5563 ++ return 0;
5564 ++
5565 ++ static_branch_enable(&poly1305_use_simd);
5566 ++
5567 ++ if (IS_ENABLED(CONFIG_AS_AVX2) &&
5568 ++ boot_cpu_has(X86_FEATURE_AVX) &&
5569 ++ boot_cpu_has(X86_FEATURE_AVX2) &&
5570 ++ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
5571 ++ static_branch_enable(&poly1305_use_avx2);
5572 +
5573 + return crypto_register_shash(&alg);
5574 + }
5575 +diff --git a/crypto/Kconfig b/crypto/Kconfig
5576 +index 6178aa627141..15cfb02c3e49 100644
5577 +--- a/crypto/Kconfig
5578 ++++ b/crypto/Kconfig
5579 +@@ -698,6 +698,7 @@ config CRYPTO_POLY1305_X86_64
5580 + tristate "Poly1305 authenticator algorithm (x86_64/SSE2/AVX2)"
5581 + depends on X86 && 64BIT
5582 + select CRYPTO_LIB_POLY1305_GENERIC
5583 ++ select CRYPTO_ARCH_HAVE_LIB_POLY1305
5584 + help
5585 + Poly1305 authenticator algorithm, RFC7539.
5586 +
5587 +diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
5588 +index a731ea36bd5c..181754615f73 100644
5589 +--- a/lib/crypto/Kconfig
5590 ++++ b/lib/crypto/Kconfig
5591 +@@ -39,6 +39,7 @@ config CRYPTO_LIB_DES
5592 +
5593 + config CRYPTO_LIB_POLY1305_RSIZE
5594 + int
5595 ++ default 4 if X86_64
5596 + default 1
5597 +
5598 + config CRYPTO_ARCH_HAVE_LIB_POLY1305
5599 +--
5600 +cgit v1.2.3-4-ga26e
5601 +
5602 +
5603 +From f144abe4f57bc0e7a3eda77d7d8f9f4128a9e40a Mon Sep 17 00:00:00 2001
5604 +From: Ard Biesheuvel <ardb@××××××.org>
5605 +Date: Fri, 8 Nov 2019 13:22:24 +0100
5606 +Subject: crypto: arm64/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON
5607 + implementation
5608 +
5609 +commit f569ca16475155013525686d0f73bc379c67e635 upstream.
5610 +
5611 +This is a straight import of the OpenSSL/CRYPTOGAMS Poly1305 implementation
5612 +for NEON authored by Andy Polyakov, and contributed by him to the OpenSSL
5613 +project. The file 'poly1305-armv8.pl' is taken straight from this upstream
5614 +GitHub repository [0] at commit ec55a08dc0244ce570c4fc7cade330c60798952f,
5615 +and already contains all the changes required to build it as part of a
5616 +Linux kernel module.
5617 +
5618 +[0] https://github.com/dot-asm/cryptogams
5619 +
5620 +Co-developed-by: Andy Polyakov <appro@××××××××××.org>
5621 +Signed-off-by: Andy Polyakov <appro@××××××××××.org>
5622 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
5623 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
5624 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
5625 +---
5626 + arch/arm64/crypto/Kconfig | 6 +
5627 + arch/arm64/crypto/Makefile | 10 +-
5628 + arch/arm64/crypto/poly1305-armv8.pl | 913 ++++++++++++++++++++++++++++++
5629 + arch/arm64/crypto/poly1305-core.S_shipped | 835 +++++++++++++++++++++++++++
5630 + arch/arm64/crypto/poly1305-glue.c | 237 ++++++++
5631 + lib/crypto/Kconfig | 1 +
5632 + 6 files changed, 2001 insertions(+), 1 deletion(-)
5633 + create mode 100644 arch/arm64/crypto/poly1305-armv8.pl
5634 + create mode 100644 arch/arm64/crypto/poly1305-core.S_shipped
5635 + create mode 100644 arch/arm64/crypto/poly1305-glue.c
5636 +
5637 +diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
5638 +index 17bada4b9dd2..30d9b24ee86e 100644
5639 +--- a/arch/arm64/crypto/Kconfig
5640 ++++ b/arch/arm64/crypto/Kconfig
5641 +@@ -106,6 +106,12 @@ config CRYPTO_CHACHA20_NEON
5642 + select CRYPTO_LIB_CHACHA_GENERIC
5643 + select CRYPTO_ARCH_HAVE_LIB_CHACHA
5644 +
5645 ++config CRYPTO_POLY1305_NEON
5646 ++ tristate "Poly1305 hash function using scalar or NEON instructions"
5647 ++ depends on KERNEL_MODE_NEON
5648 ++ select CRYPTO_HASH
5649 ++ select CRYPTO_ARCH_HAVE_LIB_POLY1305
5650 ++
5651 + config CRYPTO_NHPOLY1305_NEON
5652 + tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)"
5653 + depends on KERNEL_MODE_NEON
5654 +diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
5655 +index 0435f2a0610e..d0901e610df3 100644
5656 +--- a/arch/arm64/crypto/Makefile
5657 ++++ b/arch/arm64/crypto/Makefile
5658 +@@ -50,6 +50,10 @@ sha512-arm64-y := sha512-glue.o sha512-core.o
5659 + obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
5660 + chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o
5661 +
5662 ++obj-$(CONFIG_CRYPTO_POLY1305_NEON) += poly1305-neon.o
5663 ++poly1305-neon-y := poly1305-core.o poly1305-glue.o
5664 ++AFLAGS_poly1305-core.o += -Dpoly1305_init=poly1305_init_arm64
5665 ++
5666 + obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
5667 + nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
5668 +
5669 +@@ -68,11 +72,15 @@ ifdef REGENERATE_ARM64_CRYPTO
5670 + quiet_cmd_perlasm = PERLASM $@
5671 + cmd_perlasm = $(PERL) $(<) void $(@)
5672 +
5673 ++$(src)/poly1305-core.S_shipped: $(src)/poly1305-armv8.pl
5674 ++ $(call cmd,perlasm)
5675 ++
5676 + $(src)/sha256-core.S_shipped: $(src)/sha512-armv8.pl
5677 + $(call cmd,perlasm)
5678 +
5679 + $(src)/sha512-core.S_shipped: $(src)/sha512-armv8.pl
5680 + $(call cmd,perlasm)
5681 ++
5682 + endif
5683 +
5684 +-clean-files += sha256-core.S sha512-core.S
5685 ++clean-files += poly1305-core.S sha256-core.S sha512-core.S
5686 +diff --git a/arch/arm64/crypto/poly1305-armv8.pl b/arch/arm64/crypto/poly1305-armv8.pl
5687 +new file mode 100644
5688 +index 000000000000..6e5576d19af8
5689 +--- /dev/null
5690 ++++ b/arch/arm64/crypto/poly1305-armv8.pl
5691 +@@ -0,0 +1,913 @@
5692 ++#!/usr/bin/env perl
5693 ++# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause
5694 ++#
5695 ++# ====================================================================
5696 ++# Written by Andy Polyakov, @dot-asm, initially for the OpenSSL
5697 ++# project.
5698 ++# ====================================================================
5699 ++#
5700 ++# This module implements Poly1305 hash for ARMv8.
5701 ++#
5702 ++# June 2015
5703 ++#
5704 ++# Numbers are cycles per processed byte with poly1305_blocks alone.
5705 ++#
5706 ++# IALU/gcc-4.9 NEON
5707 ++#
5708 ++# Apple A7 1.86/+5% 0.72
5709 ++# Cortex-A53 2.69/+58% 1.47
5710 ++# Cortex-A57 2.70/+7% 1.14
5711 ++# Denver 1.64/+50% 1.18(*)
5712 ++# X-Gene 2.13/+68% 2.27
5713 ++# Mongoose 1.77/+75% 1.12
5714 ++# Kryo 2.70/+55% 1.13
5715 ++# ThunderX2 1.17/+95% 1.36
5716 ++#
5717 ++# (*) estimate based on resources availability is less than 1.0,
5718 ++# i.e. measured result is worse than expected, presumably binary
5719 ++# translator is not almighty;
5720 ++
5721 ++$flavour=shift;
5722 ++$output=shift;
5723 ++
5724 ++if ($flavour && $flavour ne "void") {
5725 ++ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
5726 ++ ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
5727 ++ ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
5728 ++ die "can't locate arm-xlate.pl";
5729 ++
5730 ++ open STDOUT,"| \"$^X\" $xlate $flavour $output";
5731 ++} else {
5732 ++ open STDOUT,">$output";
5733 ++}
5734 ++
5735 ++my ($ctx,$inp,$len,$padbit) = map("x$_",(0..3));
5736 ++my ($mac,$nonce)=($inp,$len);
5737 ++
5738 ++my ($h0,$h1,$h2,$r0,$r1,$s1,$t0,$t1,$d0,$d1,$d2) = map("x$_",(4..14));
5739 ++
5740 ++$code.=<<___;
5741 ++#ifndef __KERNEL__
5742 ++# include "arm_arch.h"
5743 ++.extern OPENSSL_armcap_P
5744 ++#endif
5745 ++
5746 ++.text
5747 ++
5748 ++// forward "declarations" are required for Apple
5749 ++.globl poly1305_blocks
5750 ++.globl poly1305_emit
5751 ++
5752 ++.globl poly1305_init
5753 ++.type poly1305_init,%function
5754 ++.align 5
5755 ++poly1305_init:
5756 ++ cmp $inp,xzr
5757 ++ stp xzr,xzr,[$ctx] // zero hash value
5758 ++ stp xzr,xzr,[$ctx,#16] // [along with is_base2_26]
5759 ++
5760 ++ csel x0,xzr,x0,eq
5761 ++ b.eq .Lno_key
5762 ++
5763 ++#ifndef __KERNEL__
5764 ++ adrp x17,OPENSSL_armcap_P
5765 ++ ldr w17,[x17,#:lo12:OPENSSL_armcap_P]
5766 ++#endif
5767 ++
5768 ++ ldp $r0,$r1,[$inp] // load key
5769 ++ mov $s1,#0xfffffffc0fffffff
5770 ++ movk $s1,#0x0fff,lsl#48
5771 ++#ifdef __AARCH64EB__
5772 ++ rev $r0,$r0 // flip bytes
5773 ++ rev $r1,$r1
5774 ++#endif
5775 ++ and $r0,$r0,$s1 // &=0ffffffc0fffffff
5776 ++ and $s1,$s1,#-4
5777 ++ and $r1,$r1,$s1 // &=0ffffffc0ffffffc
5778 ++ mov w#$s1,#-1
5779 ++ stp $r0,$r1,[$ctx,#32] // save key value
5780 ++ str w#$s1,[$ctx,#48] // impossible key power value
5781 ++
5782 ++#ifndef __KERNEL__
5783 ++ tst w17,#ARMV7_NEON
5784 ++
5785 ++ adr $d0,.Lpoly1305_blocks
5786 ++ adr $r0,.Lpoly1305_blocks_neon
5787 ++ adr $d1,.Lpoly1305_emit
5788 ++
5789 ++ csel $d0,$d0,$r0,eq
5790 ++
5791 ++# ifdef __ILP32__
5792 ++ stp w#$d0,w#$d1,[$len]
5793 ++# else
5794 ++ stp $d0,$d1,[$len]
5795 ++# endif
5796 ++#endif
5797 ++ mov x0,#1
5798 ++.Lno_key:
5799 ++ ret
5800 ++.size poly1305_init,.-poly1305_init
5801 ++
5802 ++.type poly1305_blocks,%function
5803 ++.align 5
5804 ++poly1305_blocks:
5805 ++.Lpoly1305_blocks:
5806 ++ ands $len,$len,#-16
5807 ++ b.eq .Lno_data
5808 ++
5809 ++ ldp $h0,$h1,[$ctx] // load hash value
5810 ++ ldp $h2,x17,[$ctx,#16] // [along with is_base2_26]
5811 ++ ldp $r0,$r1,[$ctx,#32] // load key value
5812 ++
5813 ++#ifdef __AARCH64EB__
5814 ++ lsr $d0,$h0,#32
5815 ++ mov w#$d1,w#$h0
5816 ++ lsr $d2,$h1,#32
5817 ++ mov w15,w#$h1
5818 ++ lsr x16,$h2,#32
5819 ++#else
5820 ++ mov w#$d0,w#$h0
5821 ++ lsr $d1,$h0,#32
5822 ++ mov w#$d2,w#$h1
5823 ++ lsr x15,$h1,#32
5824 ++ mov w16,w#$h2
5825 ++#endif
5826 ++
5827 ++ add $d0,$d0,$d1,lsl#26 // base 2^26 -> base 2^64
5828 ++ lsr $d1,$d2,#12
5829 ++ adds $d0,$d0,$d2,lsl#52
5830 ++ add $d1,$d1,x15,lsl#14
5831 ++ adc $d1,$d1,xzr
5832 ++ lsr $d2,x16,#24
5833 ++ adds $d1,$d1,x16,lsl#40
5834 ++ adc $d2,$d2,xzr
5835 ++
5836 ++ cmp x17,#0 // is_base2_26?
5837 ++ add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
5838 ++ csel $h0,$h0,$d0,eq // choose between radixes
5839 ++ csel $h1,$h1,$d1,eq
5840 ++ csel $h2,$h2,$d2,eq
5841 ++
5842 ++.Loop:
5843 ++ ldp $t0,$t1,[$inp],#16 // load input
5844 ++ sub $len,$len,#16
5845 ++#ifdef __AARCH64EB__
5846 ++ rev $t0,$t0
5847 ++ rev $t1,$t1
5848 ++#endif
5849 ++ adds $h0,$h0,$t0 // accumulate input
5850 ++ adcs $h1,$h1,$t1
5851 ++
5852 ++ mul $d0,$h0,$r0 // h0*r0
5853 ++ adc $h2,$h2,$padbit
5854 ++ umulh $d1,$h0,$r0
5855 ++
5856 ++ mul $t0,$h1,$s1 // h1*5*r1
5857 ++ umulh $t1,$h1,$s1
5858 ++
5859 ++ adds $d0,$d0,$t0
5860 ++ mul $t0,$h0,$r1 // h0*r1
5861 ++ adc $d1,$d1,$t1
5862 ++ umulh $d2,$h0,$r1
5863 ++
5864 ++ adds $d1,$d1,$t0
5865 ++ mul $t0,$h1,$r0 // h1*r0
5866 ++ adc $d2,$d2,xzr
5867 ++ umulh $t1,$h1,$r0
5868 ++
5869 ++ adds $d1,$d1,$t0
5870 ++ mul $t0,$h2,$s1 // h2*5*r1
5871 ++ adc $d2,$d2,$t1
5872 ++ mul $t1,$h2,$r0 // h2*r0
5873 ++
5874 ++ adds $d1,$d1,$t0
5875 ++ adc $d2,$d2,$t1
5876 ++
5877 ++ and $t0,$d2,#-4 // final reduction
5878 ++ and $h2,$d2,#3
5879 ++ add $t0,$t0,$d2,lsr#2
5880 ++ adds $h0,$d0,$t0
5881 ++ adcs $h1,$d1,xzr
5882 ++ adc $h2,$h2,xzr
5883 ++
5884 ++ cbnz $len,.Loop
5885 ++
5886 ++ stp $h0,$h1,[$ctx] // store hash value
5887 ++ stp $h2,xzr,[$ctx,#16] // [and clear is_base2_26]
5888 ++
5889 ++.Lno_data:
5890 ++ ret
5891 ++.size poly1305_blocks,.-poly1305_blocks
5892 ++
5893 ++.type poly1305_emit,%function
5894 ++.align 5
5895 ++poly1305_emit:
5896 ++.Lpoly1305_emit:
5897 ++ ldp $h0,$h1,[$ctx] // load hash base 2^64
5898 ++ ldp $h2,$r0,[$ctx,#16] // [along with is_base2_26]
5899 ++ ldp $t0,$t1,[$nonce] // load nonce
5900 ++
5901 ++#ifdef __AARCH64EB__
5902 ++ lsr $d0,$h0,#32
5903 ++ mov w#$d1,w#$h0
5904 ++ lsr $d2,$h1,#32
5905 ++ mov w15,w#$h1
5906 ++ lsr x16,$h2,#32
5907 ++#else
5908 ++ mov w#$d0,w#$h0
5909 ++ lsr $d1,$h0,#32
5910 ++ mov w#$d2,w#$h1
5911 ++ lsr x15,$h1,#32
5912 ++ mov w16,w#$h2
5913 ++#endif
5914 ++
5915 ++ add $d0,$d0,$d1,lsl#26 // base 2^26 -> base 2^64
5916 ++ lsr $d1,$d2,#12
5917 ++ adds $d0,$d0,$d2,lsl#52
5918 ++ add $d1,$d1,x15,lsl#14
5919 ++ adc $d1,$d1,xzr
5920 ++ lsr $d2,x16,#24
5921 ++ adds $d1,$d1,x16,lsl#40
5922 ++ adc $d2,$d2,xzr
5923 ++
5924 ++ cmp $r0,#0 // is_base2_26?
5925 ++ csel $h0,$h0,$d0,eq // choose between radixes
5926 ++ csel $h1,$h1,$d1,eq
5927 ++ csel $h2,$h2,$d2,eq
5928 ++
5929 ++ adds $d0,$h0,#5 // compare to modulus
5930 ++ adcs $d1,$h1,xzr
5931 ++ adc $d2,$h2,xzr
5932 ++
5933 ++ tst $d2,#-4 // see if it's carried/borrowed
5934 ++
5935 ++ csel $h0,$h0,$d0,eq
5936 ++ csel $h1,$h1,$d1,eq
5937 ++
5938 ++#ifdef __AARCH64EB__
5939 ++ ror $t0,$t0,#32 // flip nonce words
5940 ++ ror $t1,$t1,#32
5941 ++#endif
5942 ++ adds $h0,$h0,$t0 // accumulate nonce
5943 ++ adc $h1,$h1,$t1
5944 ++#ifdef __AARCH64EB__
5945 ++ rev $h0,$h0 // flip output bytes
5946 ++ rev $h1,$h1
5947 ++#endif
5948 ++ stp $h0,$h1,[$mac] // write result
5949 ++
5950 ++ ret
5951 ++.size poly1305_emit,.-poly1305_emit
5952 ++___
5953 ++my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("v$_.4s",(0..8));
5954 ++my ($IN01_0,$IN01_1,$IN01_2,$IN01_3,$IN01_4) = map("v$_.2s",(9..13));
5955 ++my ($IN23_0,$IN23_1,$IN23_2,$IN23_3,$IN23_4) = map("v$_.2s",(14..18));
5956 ++my ($ACC0,$ACC1,$ACC2,$ACC3,$ACC4) = map("v$_.2d",(19..23));
5957 ++my ($H0,$H1,$H2,$H3,$H4) = map("v$_.2s",(24..28));
5958 ++my ($T0,$T1,$MASK) = map("v$_",(29..31));
5959 ++
5960 ++my ($in2,$zeros)=("x16","x17");
5961 ++my $is_base2_26 = $zeros; # borrow
5962 ++
5963 ++$code.=<<___;
5964 ++.type poly1305_mult,%function
5965 ++.align 5
5966 ++poly1305_mult:
5967 ++ mul $d0,$h0,$r0 // h0*r0
5968 ++ umulh $d1,$h0,$r0
5969 ++
5970 ++ mul $t0,$h1,$s1 // h1*5*r1
5971 ++ umulh $t1,$h1,$s1
5972 ++
5973 ++ adds $d0,$d0,$t0
5974 ++ mul $t0,$h0,$r1 // h0*r1
5975 ++ adc $d1,$d1,$t1
5976 ++ umulh $d2,$h0,$r1
5977 ++
5978 ++ adds $d1,$d1,$t0
5979 ++ mul $t0,$h1,$r0 // h1*r0
5980 ++ adc $d2,$d2,xzr
5981 ++ umulh $t1,$h1,$r0
5982 ++
5983 ++ adds $d1,$d1,$t0
5984 ++ mul $t0,$h2,$s1 // h2*5*r1
5985 ++ adc $d2,$d2,$t1
5986 ++ mul $t1,$h2,$r0 // h2*r0
5987 ++
5988 ++ adds $d1,$d1,$t0
5989 ++ adc $d2,$d2,$t1
5990 ++
5991 ++ and $t0,$d2,#-4 // final reduction
5992 ++ and $h2,$d2,#3
5993 ++ add $t0,$t0,$d2,lsr#2
5994 ++ adds $h0,$d0,$t0
5995 ++ adcs $h1,$d1,xzr
5996 ++ adc $h2,$h2,xzr
5997 ++
5998 ++ ret
5999 ++.size poly1305_mult,.-poly1305_mult
6000 ++
6001 ++.type poly1305_splat,%function
6002 ++.align 4
6003 ++poly1305_splat:
6004 ++ and x12,$h0,#0x03ffffff // base 2^64 -> base 2^26
6005 ++ ubfx x13,$h0,#26,#26
6006 ++ extr x14,$h1,$h0,#52
6007 ++ and x14,x14,#0x03ffffff
6008 ++ ubfx x15,$h1,#14,#26
6009 ++ extr x16,$h2,$h1,#40
6010 ++
6011 ++ str w12,[$ctx,#16*0] // r0
6012 ++ add w12,w13,w13,lsl#2 // r1*5
6013 ++ str w13,[$ctx,#16*1] // r1
6014 ++ add w13,w14,w14,lsl#2 // r2*5
6015 ++ str w12,[$ctx,#16*2] // s1
6016 ++ str w14,[$ctx,#16*3] // r2
6017 ++ add w14,w15,w15,lsl#2 // r3*5
6018 ++ str w13,[$ctx,#16*4] // s2
6019 ++ str w15,[$ctx,#16*5] // r3
6020 ++ add w15,w16,w16,lsl#2 // r4*5
6021 ++ str w14,[$ctx,#16*6] // s3
6022 ++ str w16,[$ctx,#16*7] // r4
6023 ++ str w15,[$ctx,#16*8] // s4
6024 ++
6025 ++ ret
6026 ++.size poly1305_splat,.-poly1305_splat
6027 ++
6028 ++#ifdef __KERNEL__
6029 ++.globl poly1305_blocks_neon
6030 ++#endif
6031 ++.type poly1305_blocks_neon,%function
6032 ++.align 5
6033 ++poly1305_blocks_neon:
6034 ++.Lpoly1305_blocks_neon:
6035 ++ ldr $is_base2_26,[$ctx,#24]
6036 ++ cmp $len,#128
6037 ++ b.lo .Lpoly1305_blocks
6038 ++
6039 ++ .inst 0xd503233f // paciasp
6040 ++ stp x29,x30,[sp,#-80]!
6041 ++ add x29,sp,#0
6042 ++
6043 ++ stp d8,d9,[sp,#16] // meet ABI requirements
6044 ++ stp d10,d11,[sp,#32]
6045 ++ stp d12,d13,[sp,#48]
6046 ++ stp d14,d15,[sp,#64]
6047 ++
6048 ++ cbz $is_base2_26,.Lbase2_64_neon
6049 ++
6050 ++ ldp w10,w11,[$ctx] // load hash value base 2^26
6051 ++ ldp w12,w13,[$ctx,#8]
6052 ++ ldr w14,[$ctx,#16]
6053 ++
6054 ++ tst $len,#31
6055 ++ b.eq .Leven_neon
6056 ++
6057 ++ ldp $r0,$r1,[$ctx,#32] // load key value
6058 ++
6059 ++ add $h0,x10,x11,lsl#26 // base 2^26 -> base 2^64
6060 ++ lsr $h1,x12,#12
6061 ++ adds $h0,$h0,x12,lsl#52
6062 ++ add $h1,$h1,x13,lsl#14
6063 ++ adc $h1,$h1,xzr
6064 ++ lsr $h2,x14,#24
6065 ++ adds $h1,$h1,x14,lsl#40
6066 ++ adc $d2,$h2,xzr // can be partially reduced...
6067 ++
6068 ++ ldp $d0,$d1,[$inp],#16 // load input
6069 ++ sub $len,$len,#16
6070 ++ add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
6071 ++
6072 ++#ifdef __AARCH64EB__
6073 ++ rev $d0,$d0
6074 ++ rev $d1,$d1
6075 ++#endif
6076 ++ adds $h0,$h0,$d0 // accumulate input
6077 ++ adcs $h1,$h1,$d1
6078 ++ adc $h2,$h2,$padbit
6079 ++
6080 ++ bl poly1305_mult
6081 ++
6082 ++ and x10,$h0,#0x03ffffff // base 2^64 -> base 2^26
6083 ++ ubfx x11,$h0,#26,#26
6084 ++ extr x12,$h1,$h0,#52
6085 ++ and x12,x12,#0x03ffffff
6086 ++ ubfx x13,$h1,#14,#26
6087 ++ extr x14,$h2,$h1,#40
6088 ++
6089 ++ b .Leven_neon
6090 ++
6091 ++.align 4
6092 ++.Lbase2_64_neon:
6093 ++ ldp $r0,$r1,[$ctx,#32] // load key value
6094 ++
6095 ++ ldp $h0,$h1,[$ctx] // load hash value base 2^64
6096 ++ ldr $h2,[$ctx,#16]
6097 ++
6098 ++ tst $len,#31
6099 ++ b.eq .Linit_neon
6100 ++
6101 ++ ldp $d0,$d1,[$inp],#16 // load input
6102 ++ sub $len,$len,#16
6103 ++ add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
6104 ++#ifdef __AARCH64EB__
6105 ++ rev $d0,$d0
6106 ++ rev $d1,$d1
6107 ++#endif
6108 ++ adds $h0,$h0,$d0 // accumulate input
6109 ++ adcs $h1,$h1,$d1
6110 ++ adc $h2,$h2,$padbit
6111 ++
6112 ++ bl poly1305_mult
6113 ++
6114 ++.Linit_neon:
6115 ++ ldr w17,[$ctx,#48] // first table element
6116 ++ and x10,$h0,#0x03ffffff // base 2^64 -> base 2^26
6117 ++ ubfx x11,$h0,#26,#26
6118 ++ extr x12,$h1,$h0,#52
6119 ++ and x12,x12,#0x03ffffff
6120 ++ ubfx x13,$h1,#14,#26
6121 ++ extr x14,$h2,$h1,#40
6122 ++
6123 ++ cmp w17,#-1 // is value impossible?
6124 ++ b.ne .Leven_neon
6125 ++
6126 ++ fmov ${H0},x10
6127 ++ fmov ${H1},x11
6128 ++ fmov ${H2},x12
6129 ++ fmov ${H3},x13
6130 ++ fmov ${H4},x14
6131 ++
6132 ++ ////////////////////////////////// initialize r^n table
6133 ++ mov $h0,$r0 // r^1
6134 ++ add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
6135 ++ mov $h1,$r1
6136 ++ mov $h2,xzr
6137 ++ add $ctx,$ctx,#48+12
6138 ++ bl poly1305_splat
6139 ++
6140 ++ bl poly1305_mult // r^2
6141 ++ sub $ctx,$ctx,#4
6142 ++ bl poly1305_splat
6143 ++
6144 ++ bl poly1305_mult // r^3
6145 ++ sub $ctx,$ctx,#4
6146 ++ bl poly1305_splat
6147 ++
6148 ++ bl poly1305_mult // r^4
6149 ++ sub $ctx,$ctx,#4
6150 ++ bl poly1305_splat
6151 ++ sub $ctx,$ctx,#48 // restore original $ctx
6152 ++ b .Ldo_neon
6153 ++
6154 ++.align 4
6155 ++.Leven_neon:
6156 ++ fmov ${H0},x10
6157 ++ fmov ${H1},x11
6158 ++ fmov ${H2},x12
6159 ++ fmov ${H3},x13
6160 ++ fmov ${H4},x14
6161 ++
6162 ++.Ldo_neon:
6163 ++ ldp x8,x12,[$inp,#32] // inp[2:3]
6164 ++ subs $len,$len,#64
6165 ++ ldp x9,x13,[$inp,#48]
6166 ++ add $in2,$inp,#96
6167 ++ adr $zeros,.Lzeros
6168 ++
6169 ++ lsl $padbit,$padbit,#24
6170 ++ add x15,$ctx,#48
6171 ++
6172 ++#ifdef __AARCH64EB__
6173 ++ rev x8,x8
6174 ++ rev x12,x12
6175 ++ rev x9,x9
6176 ++ rev x13,x13
6177 ++#endif
6178 ++ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
6179 ++ and x5,x9,#0x03ffffff
6180 ++ ubfx x6,x8,#26,#26
6181 ++ ubfx x7,x9,#26,#26
6182 ++ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
6183 ++ extr x8,x12,x8,#52
6184 ++ extr x9,x13,x9,#52
6185 ++ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
6186 ++ fmov $IN23_0,x4
6187 ++ and x8,x8,#0x03ffffff
6188 ++ and x9,x9,#0x03ffffff
6189 ++ ubfx x10,x12,#14,#26
6190 ++ ubfx x11,x13,#14,#26
6191 ++ add x12,$padbit,x12,lsr#40
6192 ++ add x13,$padbit,x13,lsr#40
6193 ++ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
6194 ++ fmov $IN23_1,x6
6195 ++ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
6196 ++ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
6197 ++ fmov $IN23_2,x8
6198 ++ fmov $IN23_3,x10
6199 ++ fmov $IN23_4,x12
6200 ++
6201 ++ ldp x8,x12,[$inp],#16 // inp[0:1]
6202 ++ ldp x9,x13,[$inp],#48
6203 ++
6204 ++ ld1 {$R0,$R1,$S1,$R2},[x15],#64
6205 ++ ld1 {$S2,$R3,$S3,$R4},[x15],#64
6206 ++ ld1 {$S4},[x15]
6207 ++
6208 ++#ifdef __AARCH64EB__
6209 ++ rev x8,x8
6210 ++ rev x12,x12
6211 ++ rev x9,x9
6212 ++ rev x13,x13
6213 ++#endif
6214 ++ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
6215 ++ and x5,x9,#0x03ffffff
6216 ++ ubfx x6,x8,#26,#26
6217 ++ ubfx x7,x9,#26,#26
6218 ++ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
6219 ++ extr x8,x12,x8,#52
6220 ++ extr x9,x13,x9,#52
6221 ++ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
6222 ++ fmov $IN01_0,x4
6223 ++ and x8,x8,#0x03ffffff
6224 ++ and x9,x9,#0x03ffffff
6225 ++ ubfx x10,x12,#14,#26
6226 ++ ubfx x11,x13,#14,#26
6227 ++ add x12,$padbit,x12,lsr#40
6228 ++ add x13,$padbit,x13,lsr#40
6229 ++ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
6230 ++ fmov $IN01_1,x6
6231 ++ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
6232 ++ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
6233 ++ movi $MASK.2d,#-1
6234 ++ fmov $IN01_2,x8
6235 ++ fmov $IN01_3,x10
6236 ++ fmov $IN01_4,x12
6237 ++ ushr $MASK.2d,$MASK.2d,#38
6238 ++
6239 ++ b.ls .Lskip_loop
6240 ++
6241 ++.align 4
6242 ++.Loop_neon:
6243 ++ ////////////////////////////////////////////////////////////////
6244 ++ // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
6245 ++ // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
6246 ++ // \___________________/
6247 ++ // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
6248 ++ // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
6249 ++ // \___________________/ \____________________/
6250 ++ //
6251 ++ // Note that we start with inp[2:3]*r^2. This is because it
6252 ++ // doesn't depend on reduction in previous iteration.
6253 ++ ////////////////////////////////////////////////////////////////
6254 ++ // d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0
6255 ++ // d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*5*r4
6256 ++ // d2 = h0*r2 + h1*r1 + h2*r0 + h3*5*r4 + h4*5*r3
6257 ++ // d1 = h0*r1 + h1*r0 + h2*5*r4 + h3*5*r3 + h4*5*r2
6258 ++ // d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1
6259 ++
6260 ++ subs $len,$len,#64
6261 ++ umull $ACC4,$IN23_0,${R4}[2]
6262 ++ csel $in2,$zeros,$in2,lo
6263 ++ umull $ACC3,$IN23_0,${R3}[2]
6264 ++ umull $ACC2,$IN23_0,${R2}[2]
6265 ++ ldp x8,x12,[$in2],#16 // inp[2:3] (or zero)
6266 ++ umull $ACC1,$IN23_0,${R1}[2]
6267 ++ ldp x9,x13,[$in2],#48
6268 ++ umull $ACC0,$IN23_0,${R0}[2]
6269 ++#ifdef __AARCH64EB__
6270 ++ rev x8,x8
6271 ++ rev x12,x12
6272 ++ rev x9,x9
6273 ++ rev x13,x13
6274 ++#endif
6275 ++
6276 ++ umlal $ACC4,$IN23_1,${R3}[2]
6277 ++ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
6278 ++ umlal $ACC3,$IN23_1,${R2}[2]
6279 ++ and x5,x9,#0x03ffffff
6280 ++ umlal $ACC2,$IN23_1,${R1}[2]
6281 ++ ubfx x6,x8,#26,#26
6282 ++ umlal $ACC1,$IN23_1,${R0}[2]
6283 ++ ubfx x7,x9,#26,#26
6284 ++ umlal $ACC0,$IN23_1,${S4}[2]
6285 ++ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
6286 ++
6287 ++ umlal $ACC4,$IN23_2,${R2}[2]
6288 ++ extr x8,x12,x8,#52
6289 ++ umlal $ACC3,$IN23_2,${R1}[2]
6290 ++ extr x9,x13,x9,#52
6291 ++ umlal $ACC2,$IN23_2,${R0}[2]
6292 ++ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
6293 ++ umlal $ACC1,$IN23_2,${S4}[2]
6294 ++ fmov $IN23_0,x4
6295 ++ umlal $ACC0,$IN23_2,${S3}[2]
6296 ++ and x8,x8,#0x03ffffff
6297 ++
6298 ++ umlal $ACC4,$IN23_3,${R1}[2]
6299 ++ and x9,x9,#0x03ffffff
6300 ++ umlal $ACC3,$IN23_3,${R0}[2]
6301 ++ ubfx x10,x12,#14,#26
6302 ++ umlal $ACC2,$IN23_3,${S4}[2]
6303 ++ ubfx x11,x13,#14,#26
6304 ++ umlal $ACC1,$IN23_3,${S3}[2]
6305 ++ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
6306 ++ umlal $ACC0,$IN23_3,${S2}[2]
6307 ++ fmov $IN23_1,x6
6308 ++
6309 ++ add $IN01_2,$IN01_2,$H2
6310 ++ add x12,$padbit,x12,lsr#40
6311 ++ umlal $ACC4,$IN23_4,${R0}[2]
6312 ++ add x13,$padbit,x13,lsr#40
6313 ++ umlal $ACC3,$IN23_4,${S4}[2]
6314 ++ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
6315 ++ umlal $ACC2,$IN23_4,${S3}[2]
6316 ++ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
6317 ++ umlal $ACC1,$IN23_4,${S2}[2]
6318 ++ fmov $IN23_2,x8
6319 ++ umlal $ACC0,$IN23_4,${S1}[2]
6320 ++ fmov $IN23_3,x10
6321 ++
6322 ++ ////////////////////////////////////////////////////////////////
6323 ++ // (hash+inp[0:1])*r^4 and accumulate
6324 ++
6325 ++ add $IN01_0,$IN01_0,$H0
6326 ++ fmov $IN23_4,x12
6327 ++ umlal $ACC3,$IN01_2,${R1}[0]
6328 ++ ldp x8,x12,[$inp],#16 // inp[0:1]
6329 ++ umlal $ACC0,$IN01_2,${S3}[0]
6330 ++ ldp x9,x13,[$inp],#48
6331 ++ umlal $ACC4,$IN01_2,${R2}[0]
6332 ++ umlal $ACC1,$IN01_2,${S4}[0]
6333 ++ umlal $ACC2,$IN01_2,${R0}[0]
6334 ++#ifdef __AARCH64EB__
6335 ++ rev x8,x8
6336 ++ rev x12,x12
6337 ++ rev x9,x9
6338 ++ rev x13,x13
6339 ++#endif
6340 ++
6341 ++ add $IN01_1,$IN01_1,$H1
6342 ++ umlal $ACC3,$IN01_0,${R3}[0]
6343 ++ umlal $ACC4,$IN01_0,${R4}[0]
6344 ++ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
6345 ++ umlal $ACC2,$IN01_0,${R2}[0]
6346 ++ and x5,x9,#0x03ffffff
6347 ++ umlal $ACC0,$IN01_0,${R0}[0]
6348 ++ ubfx x6,x8,#26,#26
6349 ++ umlal $ACC1,$IN01_0,${R1}[0]
6350 ++ ubfx x7,x9,#26,#26
6351 ++
6352 ++ add $IN01_3,$IN01_3,$H3
6353 ++ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
6354 ++ umlal $ACC3,$IN01_1,${R2}[0]
6355 ++ extr x8,x12,x8,#52
6356 ++ umlal $ACC4,$IN01_1,${R3}[0]
6357 ++ extr x9,x13,x9,#52
6358 ++ umlal $ACC0,$IN01_1,${S4}[0]
6359 ++ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
6360 ++ umlal $ACC2,$IN01_1,${R1}[0]
6361 ++ fmov $IN01_0,x4
6362 ++ umlal $ACC1,$IN01_1,${R0}[0]
6363 ++ and x8,x8,#0x03ffffff
6364 ++
6365 ++ add $IN01_4,$IN01_4,$H4
6366 ++ and x9,x9,#0x03ffffff
6367 ++ umlal $ACC3,$IN01_3,${R0}[0]
6368 ++ ubfx x10,x12,#14,#26
6369 ++ umlal $ACC0,$IN01_3,${S2}[0]
6370 ++ ubfx x11,x13,#14,#26
6371 ++ umlal $ACC4,$IN01_3,${R1}[0]
6372 ++ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
6373 ++ umlal $ACC1,$IN01_3,${S3}[0]
6374 ++ fmov $IN01_1,x6
6375 ++ umlal $ACC2,$IN01_3,${S4}[0]
6376 ++ add x12,$padbit,x12,lsr#40
6377 ++
6378 ++ umlal $ACC3,$IN01_4,${S4}[0]
6379 ++ add x13,$padbit,x13,lsr#40
6380 ++ umlal $ACC0,$IN01_4,${S1}[0]
6381 ++ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
6382 ++ umlal $ACC4,$IN01_4,${R0}[0]
6383 ++ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
6384 ++ umlal $ACC1,$IN01_4,${S2}[0]
6385 ++ fmov $IN01_2,x8
6386 ++ umlal $ACC2,$IN01_4,${S3}[0]
6387 ++ fmov $IN01_3,x10
6388 ++ fmov $IN01_4,x12
6389 ++
6390 ++ /////////////////////////////////////////////////////////////////
6391 ++ // lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
6392 ++ // and P. Schwabe
6393 ++ //
6394 ++ // [see discussion in poly1305-armv4 module]
6395 ++
6396 ++ ushr $T0.2d,$ACC3,#26
6397 ++ xtn $H3,$ACC3
6398 ++ ushr $T1.2d,$ACC0,#26
6399 ++ and $ACC0,$ACC0,$MASK.2d
6400 ++ add $ACC4,$ACC4,$T0.2d // h3 -> h4
6401 ++ bic $H3,#0xfc,lsl#24 // &=0x03ffffff
6402 ++ add $ACC1,$ACC1,$T1.2d // h0 -> h1
6403 ++
6404 ++ ushr $T0.2d,$ACC4,#26
6405 ++ xtn $H4,$ACC4
6406 ++ ushr $T1.2d,$ACC1,#26
6407 ++ xtn $H1,$ACC1
6408 ++ bic $H4,#0xfc,lsl#24
6409 ++ add $ACC2,$ACC2,$T1.2d // h1 -> h2
6410 ++
6411 ++ add $ACC0,$ACC0,$T0.2d
6412 ++ shl $T0.2d,$T0.2d,#2
6413 ++ shrn $T1.2s,$ACC2,#26
6414 ++ xtn $H2,$ACC2
6415 ++ add $ACC0,$ACC0,$T0.2d // h4 -> h0
6416 ++ bic $H1,#0xfc,lsl#24
6417 ++ add $H3,$H3,$T1.2s // h2 -> h3
6418 ++ bic $H2,#0xfc,lsl#24
6419 ++
6420 ++ shrn $T0.2s,$ACC0,#26
6421 ++ xtn $H0,$ACC0
6422 ++ ushr $T1.2s,$H3,#26
6423 ++ bic $H3,#0xfc,lsl#24
6424 ++ bic $H0,#0xfc,lsl#24
6425 ++ add $H1,$H1,$T0.2s // h0 -> h1
6426 ++ add $H4,$H4,$T1.2s // h3 -> h4
6427 ++
6428 ++ b.hi .Loop_neon
6429 ++
6430 ++.Lskip_loop:
6431 ++ dup $IN23_2,${IN23_2}[0]
6432 ++ add $IN01_2,$IN01_2,$H2
6433 ++
6434 ++ ////////////////////////////////////////////////////////////////
6435 ++ // multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
6436 ++
6437 ++ adds $len,$len,#32
6438 ++ b.ne .Long_tail
6439 ++
6440 ++ dup $IN23_2,${IN01_2}[0]
6441 ++ add $IN23_0,$IN01_0,$H0
6442 ++ add $IN23_3,$IN01_3,$H3
6443 ++ add $IN23_1,$IN01_1,$H1
6444 ++ add $IN23_4,$IN01_4,$H4
6445 ++
6446 ++.Long_tail:
6447 ++ dup $IN23_0,${IN23_0}[0]
6448 ++ umull2 $ACC0,$IN23_2,${S3}
6449 ++ umull2 $ACC3,$IN23_2,${R1}
6450 ++ umull2 $ACC4,$IN23_2,${R2}
6451 ++ umull2 $ACC2,$IN23_2,${R0}
6452 ++ umull2 $ACC1,$IN23_2,${S4}
6453 ++
6454 ++ dup $IN23_1,${IN23_1}[0]
6455 ++ umlal2 $ACC0,$IN23_0,${R0}
6456 ++ umlal2 $ACC2,$IN23_0,${R2}
6457 ++ umlal2 $ACC3,$IN23_0,${R3}
6458 ++ umlal2 $ACC4,$IN23_0,${R4}
6459 ++ umlal2 $ACC1,$IN23_0,${R1}
6460 ++
6461 ++ dup $IN23_3,${IN23_3}[0]
6462 ++ umlal2 $ACC0,$IN23_1,${S4}
6463 ++ umlal2 $ACC3,$IN23_1,${R2}
6464 ++ umlal2 $ACC2,$IN23_1,${R1}
6465 ++ umlal2 $ACC4,$IN23_1,${R3}
6466 ++ umlal2 $ACC1,$IN23_1,${R0}
6467 ++
6468 ++ dup $IN23_4,${IN23_4}[0]
6469 ++ umlal2 $ACC3,$IN23_3,${R0}
6470 ++ umlal2 $ACC4,$IN23_3,${R1}
6471 ++ umlal2 $ACC0,$IN23_3,${S2}
6472 ++ umlal2 $ACC1,$IN23_3,${S3}
6473 ++ umlal2 $ACC2,$IN23_3,${S4}
6474 ++
6475 ++ umlal2 $ACC3,$IN23_4,${S4}
6476 ++ umlal2 $ACC0,$IN23_4,${S1}
6477 ++ umlal2 $ACC4,$IN23_4,${R0}
6478 ++ umlal2 $ACC1,$IN23_4,${S2}
6479 ++ umlal2 $ACC2,$IN23_4,${S3}
6480 ++
6481 ++ b.eq .Lshort_tail
6482 ++
6483 ++ ////////////////////////////////////////////////////////////////
6484 ++ // (hash+inp[0:1])*r^4:r^3 and accumulate
6485 ++
6486 ++ add $IN01_0,$IN01_0,$H0
6487 ++ umlal $ACC3,$IN01_2,${R1}
6488 ++ umlal $ACC0,$IN01_2,${S3}
6489 ++ umlal $ACC4,$IN01_2,${R2}
6490 ++ umlal $ACC1,$IN01_2,${S4}
6491 ++ umlal $ACC2,$IN01_2,${R0}
6492 ++
6493 ++ add $IN01_1,$IN01_1,$H1
6494 ++ umlal $ACC3,$IN01_0,${R3}
6495 ++ umlal $ACC0,$IN01_0,${R0}
6496 ++ umlal $ACC4,$IN01_0,${R4}
6497 ++ umlal $ACC1,$IN01_0,${R1}
6498 ++ umlal $ACC2,$IN01_0,${R2}
6499 ++
6500 ++ add $IN01_3,$IN01_3,$H3
6501 ++ umlal $ACC3,$IN01_1,${R2}
6502 ++ umlal $ACC0,$IN01_1,${S4}
6503 ++ umlal $ACC4,$IN01_1,${R3}
6504 ++ umlal $ACC1,$IN01_1,${R0}
6505 ++ umlal $ACC2,$IN01_1,${R1}
6506 ++
6507 ++ add $IN01_4,$IN01_4,$H4
6508 ++ umlal $ACC3,$IN01_3,${R0}
6509 ++ umlal $ACC0,$IN01_3,${S2}
6510 ++ umlal $ACC4,$IN01_3,${R1}
6511 ++ umlal $ACC1,$IN01_3,${S3}
6512 ++ umlal $ACC2,$IN01_3,${S4}
6513 ++
6514 ++ umlal $ACC3,$IN01_4,${S4}
6515 ++ umlal $ACC0,$IN01_4,${S1}
6516 ++ umlal $ACC4,$IN01_4,${R0}
6517 ++ umlal $ACC1,$IN01_4,${S2}
6518 ++ umlal $ACC2,$IN01_4,${S3}
6519 ++
6520 ++.Lshort_tail:
6521 ++ ////////////////////////////////////////////////////////////////
6522 ++ // horizontal add
6523 ++
6524 ++ addp $ACC3,$ACC3,$ACC3
6525 ++ ldp d8,d9,[sp,#16] // meet ABI requirements
6526 ++ addp $ACC0,$ACC0,$ACC0
6527 ++ ldp d10,d11,[sp,#32]
6528 ++ addp $ACC4,$ACC4,$ACC4
6529 ++ ldp d12,d13,[sp,#48]
6530 ++ addp $ACC1,$ACC1,$ACC1
6531 ++ ldp d14,d15,[sp,#64]
6532 ++ addp $ACC2,$ACC2,$ACC2
6533 ++ ldr x30,[sp,#8]
6534 ++ .inst 0xd50323bf // autiasp
6535 ++
6536 ++ ////////////////////////////////////////////////////////////////
6537 ++ // lazy reduction, but without narrowing
6538 ++
6539 ++ ushr $T0.2d,$ACC3,#26
6540 ++ and $ACC3,$ACC3,$MASK.2d
6541 ++ ushr $T1.2d,$ACC0,#26
6542 ++ and $ACC0,$ACC0,$MASK.2d
6543 ++
6544 ++ add $ACC4,$ACC4,$T0.2d // h3 -> h4
6545 ++ add $ACC1,$ACC1,$T1.2d // h0 -> h1
6546 ++
6547 ++ ushr $T0.2d,$ACC4,#26
6548 ++ and $ACC4,$ACC4,$MASK.2d
6549 ++ ushr $T1.2d,$ACC1,#26
6550 ++ and $ACC1,$ACC1,$MASK.2d
6551 ++ add $ACC2,$ACC2,$T1.2d // h1 -> h2
6552 ++
6553 ++ add $ACC0,$ACC0,$T0.2d
6554 ++ shl $T0.2d,$T0.2d,#2
6555 ++ ushr $T1.2d,$ACC2,#26
6556 ++ and $ACC2,$ACC2,$MASK.2d
6557 ++ add $ACC0,$ACC0,$T0.2d // h4 -> h0
6558 ++ add $ACC3,$ACC3,$T1.2d // h2 -> h3
6559 ++
6560 ++ ushr $T0.2d,$ACC0,#26
6561 ++ and $ACC0,$ACC0,$MASK.2d
6562 ++ ushr $T1.2d,$ACC3,#26
6563 ++ and $ACC3,$ACC3,$MASK.2d
6564 ++ add $ACC1,$ACC1,$T0.2d // h0 -> h1
6565 ++ add $ACC4,$ACC4,$T1.2d // h3 -> h4
6566 ++
6567 ++ ////////////////////////////////////////////////////////////////
6568 ++ // write the result, can be partially reduced
6569 ++
6570 ++ st4 {$ACC0,$ACC1,$ACC2,$ACC3}[0],[$ctx],#16
6571 ++ mov x4,#1
6572 ++ st1 {$ACC4}[0],[$ctx]
6573 ++ str x4,[$ctx,#8] // set is_base2_26
6574 ++
6575 ++ ldr x29,[sp],#80
6576 ++ ret
6577 ++.size poly1305_blocks_neon,.-poly1305_blocks_neon
6578 ++
6579 ++.align 5
6580 ++.Lzeros:
6581 ++.long 0,0,0,0,0,0,0,0
6582 ++.asciz "Poly1305 for ARMv8, CRYPTOGAMS by \@dot-asm"
6583 ++.align 2
6584 ++#if !defined(__KERNEL__) && !defined(_WIN64)
6585 ++.comm OPENSSL_armcap_P,4,4
6586 ++.hidden OPENSSL_armcap_P
6587 ++#endif
6588 ++___
6589 ++
6590 ++foreach (split("\n",$code)) {
6591 ++ s/\b(shrn\s+v[0-9]+)\.[24]d/$1.2s/ or
6592 ++ s/\b(fmov\s+)v([0-9]+)[^,]*,\s*x([0-9]+)/$1d$2,x$3/ or
6593 ++ (m/\bdup\b/ and (s/\.[24]s/.2d/g or 1)) or
6594 ++ (m/\b(eor|and)/ and (s/\.[248][sdh]/.16b/g or 1)) or
6595 ++ (m/\bum(ul|la)l\b/ and (s/\.4s/.2s/g or 1)) or
6596 ++ (m/\bum(ul|la)l2\b/ and (s/\.2s/.4s/g or 1)) or
6597 ++ (m/\bst[1-4]\s+{[^}]+}\[/ and (s/\.[24]d/.s/g or 1));
6598 ++
6599 ++ s/\.[124]([sd])\[/.$1\[/;
6600 ++ s/w#x([0-9]+)/w$1/g;
6601 ++
6602 ++ print $_,"\n";
6603 ++}
6604 ++close STDOUT;
6605 +diff --git a/arch/arm64/crypto/poly1305-core.S_shipped b/arch/arm64/crypto/poly1305-core.S_shipped
6606 +new file mode 100644
6607 +index 000000000000..8d1c4e420ccd
6608 +--- /dev/null
6609 ++++ b/arch/arm64/crypto/poly1305-core.S_shipped
6610 +@@ -0,0 +1,835 @@
6611 ++#ifndef __KERNEL__
6612 ++# include "arm_arch.h"
6613 ++.extern OPENSSL_armcap_P
6614 ++#endif
6615 ++
6616 ++.text
6617 ++
6618 ++// forward "declarations" are required for Apple
6619 ++.globl poly1305_blocks
6620 ++.globl poly1305_emit
6621 ++
6622 ++.globl poly1305_init
6623 ++.type poly1305_init,%function
6624 ++.align 5
6625 ++poly1305_init:
6626 ++ cmp x1,xzr
6627 ++ stp xzr,xzr,[x0] // zero hash value
6628 ++ stp xzr,xzr,[x0,#16] // [along with is_base2_26]
6629 ++
6630 ++ csel x0,xzr,x0,eq
6631 ++ b.eq .Lno_key
6632 ++
6633 ++#ifndef __KERNEL__
6634 ++ adrp x17,OPENSSL_armcap_P
6635 ++ ldr w17,[x17,#:lo12:OPENSSL_armcap_P]
6636 ++#endif
6637 ++
6638 ++ ldp x7,x8,[x1] // load key
6639 ++ mov x9,#0xfffffffc0fffffff
6640 ++ movk x9,#0x0fff,lsl#48
6641 ++#ifdef __AARCH64EB__
6642 ++ rev x7,x7 // flip bytes
6643 ++ rev x8,x8
6644 ++#endif
6645 ++ and x7,x7,x9 // &=0ffffffc0fffffff
6646 ++ and x9,x9,#-4
6647 ++ and x8,x8,x9 // &=0ffffffc0ffffffc
6648 ++ mov w9,#-1
6649 ++ stp x7,x8,[x0,#32] // save key value
6650 ++ str w9,[x0,#48] // impossible key power value
6651 ++
6652 ++#ifndef __KERNEL__
6653 ++ tst w17,#ARMV7_NEON
6654 ++
6655 ++ adr x12,.Lpoly1305_blocks
6656 ++ adr x7,.Lpoly1305_blocks_neon
6657 ++ adr x13,.Lpoly1305_emit
6658 ++
6659 ++ csel x12,x12,x7,eq
6660 ++
6661 ++# ifdef __ILP32__
6662 ++ stp w12,w13,[x2]
6663 ++# else
6664 ++ stp x12,x13,[x2]
6665 ++# endif
6666 ++#endif
6667 ++ mov x0,#1
6668 ++.Lno_key:
6669 ++ ret
6670 ++.size poly1305_init,.-poly1305_init
6671 ++
6672 ++.type poly1305_blocks,%function
6673 ++.align 5
6674 ++poly1305_blocks:
6675 ++.Lpoly1305_blocks:
6676 ++ ands x2,x2,#-16
6677 ++ b.eq .Lno_data
6678 ++
6679 ++ ldp x4,x5,[x0] // load hash value
6680 ++ ldp x6,x17,[x0,#16] // [along with is_base2_26]
6681 ++ ldp x7,x8,[x0,#32] // load key value
6682 ++
6683 ++#ifdef __AARCH64EB__
6684 ++ lsr x12,x4,#32
6685 ++ mov w13,w4
6686 ++ lsr x14,x5,#32
6687 ++ mov w15,w5
6688 ++ lsr x16,x6,#32
6689 ++#else
6690 ++ mov w12,w4
6691 ++ lsr x13,x4,#32
6692 ++ mov w14,w5
6693 ++ lsr x15,x5,#32
6694 ++ mov w16,w6
6695 ++#endif
6696 ++
6697 ++ add x12,x12,x13,lsl#26 // base 2^26 -> base 2^64
6698 ++ lsr x13,x14,#12
6699 ++ adds x12,x12,x14,lsl#52
6700 ++ add x13,x13,x15,lsl#14
6701 ++ adc x13,x13,xzr
6702 ++ lsr x14,x16,#24
6703 ++ adds x13,x13,x16,lsl#40
6704 ++ adc x14,x14,xzr
6705 ++
6706 ++ cmp x17,#0 // is_base2_26?
6707 ++ add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
6708 ++ csel x4,x4,x12,eq // choose between radixes
6709 ++ csel x5,x5,x13,eq
6710 ++ csel x6,x6,x14,eq
6711 ++
6712 ++.Loop:
6713 ++ ldp x10,x11,[x1],#16 // load input
6714 ++ sub x2,x2,#16
6715 ++#ifdef __AARCH64EB__
6716 ++ rev x10,x10
6717 ++ rev x11,x11
6718 ++#endif
6719 ++ adds x4,x4,x10 // accumulate input
6720 ++ adcs x5,x5,x11
6721 ++
6722 ++ mul x12,x4,x7 // h0*r0
6723 ++ adc x6,x6,x3
6724 ++ umulh x13,x4,x7
6725 ++
6726 ++ mul x10,x5,x9 // h1*5*r1
6727 ++ umulh x11,x5,x9
6728 ++
6729 ++ adds x12,x12,x10
6730 ++ mul x10,x4,x8 // h0*r1
6731 ++ adc x13,x13,x11
6732 ++ umulh x14,x4,x8
6733 ++
6734 ++ adds x13,x13,x10
6735 ++ mul x10,x5,x7 // h1*r0
6736 ++ adc x14,x14,xzr
6737 ++ umulh x11,x5,x7
6738 ++
6739 ++ adds x13,x13,x10
6740 ++ mul x10,x6,x9 // h2*5*r1
6741 ++ adc x14,x14,x11
6742 ++ mul x11,x6,x7 // h2*r0
6743 ++
6744 ++ adds x13,x13,x10
6745 ++ adc x14,x14,x11
6746 ++
6747 ++ and x10,x14,#-4 // final reduction
6748 ++ and x6,x14,#3
6749 ++ add x10,x10,x14,lsr#2
6750 ++ adds x4,x12,x10
6751 ++ adcs x5,x13,xzr
6752 ++ adc x6,x6,xzr
6753 ++
6754 ++ cbnz x2,.Loop
6755 ++
6756 ++ stp x4,x5,[x0] // store hash value
6757 ++ stp x6,xzr,[x0,#16] // [and clear is_base2_26]
6758 ++
6759 ++.Lno_data:
6760 ++ ret
6761 ++.size poly1305_blocks,.-poly1305_blocks
6762 ++
6763 ++.type poly1305_emit,%function
6764 ++.align 5
6765 ++poly1305_emit:
6766 ++.Lpoly1305_emit:
6767 ++ ldp x4,x5,[x0] // load hash base 2^64
6768 ++ ldp x6,x7,[x0,#16] // [along with is_base2_26]
6769 ++ ldp x10,x11,[x2] // load nonce
6770 ++
6771 ++#ifdef __AARCH64EB__
6772 ++ lsr x12,x4,#32
6773 ++ mov w13,w4
6774 ++ lsr x14,x5,#32
6775 ++ mov w15,w5
6776 ++ lsr x16,x6,#32
6777 ++#else
6778 ++ mov w12,w4
6779 ++ lsr x13,x4,#32
6780 ++ mov w14,w5
6781 ++ lsr x15,x5,#32
6782 ++ mov w16,w6
6783 ++#endif
6784 ++
6785 ++ add x12,x12,x13,lsl#26 // base 2^26 -> base 2^64
6786 ++ lsr x13,x14,#12
6787 ++ adds x12,x12,x14,lsl#52
6788 ++ add x13,x13,x15,lsl#14
6789 ++ adc x13,x13,xzr
6790 ++ lsr x14,x16,#24
6791 ++ adds x13,x13,x16,lsl#40
6792 ++ adc x14,x14,xzr
6793 ++
6794 ++ cmp x7,#0 // is_base2_26?
6795 ++ csel x4,x4,x12,eq // choose between radixes
6796 ++ csel x5,x5,x13,eq
6797 ++ csel x6,x6,x14,eq
6798 ++
6799 ++ adds x12,x4,#5 // compare to modulus
6800 ++ adcs x13,x5,xzr
6801 ++ adc x14,x6,xzr
6802 ++
6803 ++ tst x14,#-4 // see if it's carried/borrowed
6804 ++
6805 ++ csel x4,x4,x12,eq
6806 ++ csel x5,x5,x13,eq
6807 ++
6808 ++#ifdef __AARCH64EB__
6809 ++ ror x10,x10,#32 // flip nonce words
6810 ++ ror x11,x11,#32
6811 ++#endif
6812 ++ adds x4,x4,x10 // accumulate nonce
6813 ++ adc x5,x5,x11
6814 ++#ifdef __AARCH64EB__
6815 ++ rev x4,x4 // flip output bytes
6816 ++ rev x5,x5
6817 ++#endif
6818 ++ stp x4,x5,[x1] // write result
6819 ++
6820 ++ ret
6821 ++.size poly1305_emit,.-poly1305_emit
6822 ++.type poly1305_mult,%function
6823 ++.align 5
6824 ++poly1305_mult:
6825 ++ mul x12,x4,x7 // h0*r0
6826 ++ umulh x13,x4,x7
6827 ++
6828 ++ mul x10,x5,x9 // h1*5*r1
6829 ++ umulh x11,x5,x9
6830 ++
6831 ++ adds x12,x12,x10
6832 ++ mul x10,x4,x8 // h0*r1
6833 ++ adc x13,x13,x11
6834 ++ umulh x14,x4,x8
6835 ++
6836 ++ adds x13,x13,x10
6837 ++ mul x10,x5,x7 // h1*r0
6838 ++ adc x14,x14,xzr
6839 ++ umulh x11,x5,x7
6840 ++
6841 ++ adds x13,x13,x10
6842 ++ mul x10,x6,x9 // h2*5*r1
6843 ++ adc x14,x14,x11
6844 ++ mul x11,x6,x7 // h2*r0
6845 ++
6846 ++ adds x13,x13,x10
6847 ++ adc x14,x14,x11
6848 ++
6849 ++ and x10,x14,#-4 // final reduction
6850 ++ and x6,x14,#3
6851 ++ add x10,x10,x14,lsr#2
6852 ++ adds x4,x12,x10
6853 ++ adcs x5,x13,xzr
6854 ++ adc x6,x6,xzr
6855 ++
6856 ++ ret
6857 ++.size poly1305_mult,.-poly1305_mult
6858 ++
6859 ++.type poly1305_splat,%function
6860 ++.align 4
6861 ++poly1305_splat:
6862 ++ and x12,x4,#0x03ffffff // base 2^64 -> base 2^26
6863 ++ ubfx x13,x4,#26,#26
6864 ++ extr x14,x5,x4,#52
6865 ++ and x14,x14,#0x03ffffff
6866 ++ ubfx x15,x5,#14,#26
6867 ++ extr x16,x6,x5,#40
6868 ++
6869 ++ str w12,[x0,#16*0] // r0
6870 ++ add w12,w13,w13,lsl#2 // r1*5
6871 ++ str w13,[x0,#16*1] // r1
6872 ++ add w13,w14,w14,lsl#2 // r2*5
6873 ++ str w12,[x0,#16*2] // s1
6874 ++ str w14,[x0,#16*3] // r2
6875 ++ add w14,w15,w15,lsl#2 // r3*5
6876 ++ str w13,[x0,#16*4] // s2
6877 ++ str w15,[x0,#16*5] // r3
6878 ++ add w15,w16,w16,lsl#2 // r4*5
6879 ++ str w14,[x0,#16*6] // s3
6880 ++ str w16,[x0,#16*7] // r4
6881 ++ str w15,[x0,#16*8] // s4
6882 ++
6883 ++ ret
6884 ++.size poly1305_splat,.-poly1305_splat
6885 ++
6886 ++#ifdef __KERNEL__
6887 ++.globl poly1305_blocks_neon
6888 ++#endif
6889 ++.type poly1305_blocks_neon,%function
6890 ++.align 5
6891 ++poly1305_blocks_neon:
6892 ++.Lpoly1305_blocks_neon:
6893 ++ ldr x17,[x0,#24]
6894 ++ cmp x2,#128
6895 ++ b.lo .Lpoly1305_blocks
6896 ++
6897 ++ .inst 0xd503233f // paciasp
6898 ++ stp x29,x30,[sp,#-80]!
6899 ++ add x29,sp,#0
6900 ++
6901 ++ stp d8,d9,[sp,#16] // meet ABI requirements
6902 ++ stp d10,d11,[sp,#32]
6903 ++ stp d12,d13,[sp,#48]
6904 ++ stp d14,d15,[sp,#64]
6905 ++
6906 ++ cbz x17,.Lbase2_64_neon
6907 ++
6908 ++ ldp w10,w11,[x0] // load hash value base 2^26
6909 ++ ldp w12,w13,[x0,#8]
6910 ++ ldr w14,[x0,#16]
6911 ++
6912 ++ tst x2,#31
6913 ++ b.eq .Leven_neon
6914 ++
6915 ++ ldp x7,x8,[x0,#32] // load key value
6916 ++
6917 ++ add x4,x10,x11,lsl#26 // base 2^26 -> base 2^64
6918 ++ lsr x5,x12,#12
6919 ++ adds x4,x4,x12,lsl#52
6920 ++ add x5,x5,x13,lsl#14
6921 ++ adc x5,x5,xzr
6922 ++ lsr x6,x14,#24
6923 ++ adds x5,x5,x14,lsl#40
6924 ++ adc x14,x6,xzr // can be partially reduced...
6925 ++
6926 ++ ldp x12,x13,[x1],#16 // load input
6927 ++ sub x2,x2,#16
6928 ++ add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
6929 ++
6930 ++#ifdef __AARCH64EB__
6931 ++ rev x12,x12
6932 ++ rev x13,x13
6933 ++#endif
6934 ++ adds x4,x4,x12 // accumulate input
6935 ++ adcs x5,x5,x13
6936 ++ adc x6,x6,x3
6937 ++
6938 ++ bl poly1305_mult
6939 ++
6940 ++ and x10,x4,#0x03ffffff // base 2^64 -> base 2^26
6941 ++ ubfx x11,x4,#26,#26
6942 ++ extr x12,x5,x4,#52
6943 ++ and x12,x12,#0x03ffffff
6944 ++ ubfx x13,x5,#14,#26
6945 ++ extr x14,x6,x5,#40
6946 ++
6947 ++ b .Leven_neon
6948 ++
6949 ++.align 4
6950 ++.Lbase2_64_neon:
6951 ++ ldp x7,x8,[x0,#32] // load key value
6952 ++
6953 ++ ldp x4,x5,[x0] // load hash value base 2^64
6954 ++ ldr x6,[x0,#16]
6955 ++
6956 ++ tst x2,#31
6957 ++ b.eq .Linit_neon
6958 ++
6959 ++ ldp x12,x13,[x1],#16 // load input
6960 ++ sub x2,x2,#16
6961 ++ add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
6962 ++#ifdef __AARCH64EB__
6963 ++ rev x12,x12
6964 ++ rev x13,x13
6965 ++#endif
6966 ++ adds x4,x4,x12 // accumulate input
6967 ++ adcs x5,x5,x13
6968 ++ adc x6,x6,x3
6969 ++
6970 ++ bl poly1305_mult
6971 ++
6972 ++.Linit_neon:
6973 ++ ldr w17,[x0,#48] // first table element
6974 ++ and x10,x4,#0x03ffffff // base 2^64 -> base 2^26
6975 ++ ubfx x11,x4,#26,#26
6976 ++ extr x12,x5,x4,#52
6977 ++ and x12,x12,#0x03ffffff
6978 ++ ubfx x13,x5,#14,#26
6979 ++ extr x14,x6,x5,#40
6980 ++
6981 ++ cmp w17,#-1 // is value impossible?
6982 ++ b.ne .Leven_neon
6983 ++
6984 ++ fmov d24,x10
6985 ++ fmov d25,x11
6986 ++ fmov d26,x12
6987 ++ fmov d27,x13
6988 ++ fmov d28,x14
6989 ++
6990 ++ ////////////////////////////////// initialize r^n table
6991 ++ mov x4,x7 // r^1
6992 ++ add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
6993 ++ mov x5,x8
6994 ++ mov x6,xzr
6995 ++ add x0,x0,#48+12
6996 ++ bl poly1305_splat
6997 ++
6998 ++ bl poly1305_mult // r^2
6999 ++ sub x0,x0,#4
7000 ++ bl poly1305_splat
7001 ++
7002 ++ bl poly1305_mult // r^3
7003 ++ sub x0,x0,#4
7004 ++ bl poly1305_splat
7005 ++
7006 ++ bl poly1305_mult // r^4
7007 ++ sub x0,x0,#4
7008 ++ bl poly1305_splat
7009 ++ sub x0,x0,#48 // restore original x0
7010 ++ b .Ldo_neon
7011 ++
7012 ++.align 4
7013 ++.Leven_neon:
7014 ++ fmov d24,x10
7015 ++ fmov d25,x11
7016 ++ fmov d26,x12
7017 ++ fmov d27,x13
7018 ++ fmov d28,x14
7019 ++
7020 ++.Ldo_neon:
7021 ++ ldp x8,x12,[x1,#32] // inp[2:3]
7022 ++ subs x2,x2,#64
7023 ++ ldp x9,x13,[x1,#48]
7024 ++ add x16,x1,#96
7025 ++ adr x17,.Lzeros
7026 ++
7027 ++ lsl x3,x3,#24
7028 ++ add x15,x0,#48
7029 ++
7030 ++#ifdef __AARCH64EB__
7031 ++ rev x8,x8
7032 ++ rev x12,x12
7033 ++ rev x9,x9
7034 ++ rev x13,x13
7035 ++#endif
7036 ++ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
7037 ++ and x5,x9,#0x03ffffff
7038 ++ ubfx x6,x8,#26,#26
7039 ++ ubfx x7,x9,#26,#26
7040 ++ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
7041 ++ extr x8,x12,x8,#52
7042 ++ extr x9,x13,x9,#52
7043 ++ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
7044 ++ fmov d14,x4
7045 ++ and x8,x8,#0x03ffffff
7046 ++ and x9,x9,#0x03ffffff
7047 ++ ubfx x10,x12,#14,#26
7048 ++ ubfx x11,x13,#14,#26
7049 ++ add x12,x3,x12,lsr#40
7050 ++ add x13,x3,x13,lsr#40
7051 ++ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
7052 ++ fmov d15,x6
7053 ++ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
7054 ++ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
7055 ++ fmov d16,x8
7056 ++ fmov d17,x10
7057 ++ fmov d18,x12
7058 ++
7059 ++ ldp x8,x12,[x1],#16 // inp[0:1]
7060 ++ ldp x9,x13,[x1],#48
7061 ++
7062 ++ ld1 {v0.4s,v1.4s,v2.4s,v3.4s},[x15],#64
7063 ++ ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x15],#64
7064 ++ ld1 {v8.4s},[x15]
7065 ++
7066 ++#ifdef __AARCH64EB__
7067 ++ rev x8,x8
7068 ++ rev x12,x12
7069 ++ rev x9,x9
7070 ++ rev x13,x13
7071 ++#endif
7072 ++ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
7073 ++ and x5,x9,#0x03ffffff
7074 ++ ubfx x6,x8,#26,#26
7075 ++ ubfx x7,x9,#26,#26
7076 ++ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
7077 ++ extr x8,x12,x8,#52
7078 ++ extr x9,x13,x9,#52
7079 ++ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
7080 ++ fmov d9,x4
7081 ++ and x8,x8,#0x03ffffff
7082 ++ and x9,x9,#0x03ffffff
7083 ++ ubfx x10,x12,#14,#26
7084 ++ ubfx x11,x13,#14,#26
7085 ++ add x12,x3,x12,lsr#40
7086 ++ add x13,x3,x13,lsr#40
7087 ++ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
7088 ++ fmov d10,x6
7089 ++ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
7090 ++ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
7091 ++ movi v31.2d,#-1
7092 ++ fmov d11,x8
7093 ++ fmov d12,x10
7094 ++ fmov d13,x12
7095 ++ ushr v31.2d,v31.2d,#38
7096 ++
7097 ++ b.ls .Lskip_loop
7098 ++
7099 ++.align 4
7100 ++.Loop_neon:
7101 ++ ////////////////////////////////////////////////////////////////
7102 ++ // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
7103 ++ // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
7104 ++ // ___________________/
7105 ++ // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
7106 ++ // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
7107 ++ // ___________________/ ____________________/
7108 ++ //
7109 ++ // Note that we start with inp[2:3]*r^2. This is because it
7110 ++ // doesn't depend on reduction in previous iteration.
7111 ++ ////////////////////////////////////////////////////////////////
7112 ++ // d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0
7113 ++ // d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*5*r4
7114 ++ // d2 = h0*r2 + h1*r1 + h2*r0 + h3*5*r4 + h4*5*r3
7115 ++ // d1 = h0*r1 + h1*r0 + h2*5*r4 + h3*5*r3 + h4*5*r2
7116 ++ // d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1
7117 ++
7118 ++ subs x2,x2,#64
7119 ++ umull v23.2d,v14.2s,v7.s[2]
7120 ++ csel x16,x17,x16,lo
7121 ++ umull v22.2d,v14.2s,v5.s[2]
7122 ++ umull v21.2d,v14.2s,v3.s[2]
7123 ++ ldp x8,x12,[x16],#16 // inp[2:3] (or zero)
7124 ++ umull v20.2d,v14.2s,v1.s[2]
7125 ++ ldp x9,x13,[x16],#48
7126 ++ umull v19.2d,v14.2s,v0.s[2]
7127 ++#ifdef __AARCH64EB__
7128 ++ rev x8,x8
7129 ++ rev x12,x12
7130 ++ rev x9,x9
7131 ++ rev x13,x13
7132 ++#endif
7133 ++
7134 ++ umlal v23.2d,v15.2s,v5.s[2]
7135 ++ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
7136 ++ umlal v22.2d,v15.2s,v3.s[2]
7137 ++ and x5,x9,#0x03ffffff
7138 ++ umlal v21.2d,v15.2s,v1.s[2]
7139 ++ ubfx x6,x8,#26,#26
7140 ++ umlal v20.2d,v15.2s,v0.s[2]
7141 ++ ubfx x7,x9,#26,#26
7142 ++ umlal v19.2d,v15.2s,v8.s[2]
7143 ++ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
7144 ++
7145 ++ umlal v23.2d,v16.2s,v3.s[2]
7146 ++ extr x8,x12,x8,#52
7147 ++ umlal v22.2d,v16.2s,v1.s[2]
7148 ++ extr x9,x13,x9,#52
7149 ++ umlal v21.2d,v16.2s,v0.s[2]
7150 ++ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
7151 ++ umlal v20.2d,v16.2s,v8.s[2]
7152 ++ fmov d14,x4
7153 ++ umlal v19.2d,v16.2s,v6.s[2]
7154 ++ and x8,x8,#0x03ffffff
7155 ++
7156 ++ umlal v23.2d,v17.2s,v1.s[2]
7157 ++ and x9,x9,#0x03ffffff
7158 ++ umlal v22.2d,v17.2s,v0.s[2]
7159 ++ ubfx x10,x12,#14,#26
7160 ++ umlal v21.2d,v17.2s,v8.s[2]
7161 ++ ubfx x11,x13,#14,#26
7162 ++ umlal v20.2d,v17.2s,v6.s[2]
7163 ++ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
7164 ++ umlal v19.2d,v17.2s,v4.s[2]
7165 ++ fmov d15,x6
7166 ++
7167 ++ add v11.2s,v11.2s,v26.2s
7168 ++ add x12,x3,x12,lsr#40
7169 ++ umlal v23.2d,v18.2s,v0.s[2]
7170 ++ add x13,x3,x13,lsr#40
7171 ++ umlal v22.2d,v18.2s,v8.s[2]
7172 ++ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
7173 ++ umlal v21.2d,v18.2s,v6.s[2]
7174 ++ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
7175 ++ umlal v20.2d,v18.2s,v4.s[2]
7176 ++ fmov d16,x8
7177 ++ umlal v19.2d,v18.2s,v2.s[2]
7178 ++ fmov d17,x10
7179 ++
7180 ++ ////////////////////////////////////////////////////////////////
7181 ++ // (hash+inp[0:1])*r^4 and accumulate
7182 ++
7183 ++ add v9.2s,v9.2s,v24.2s
7184 ++ fmov d18,x12
7185 ++ umlal v22.2d,v11.2s,v1.s[0]
7186 ++ ldp x8,x12,[x1],#16 // inp[0:1]
7187 ++ umlal v19.2d,v11.2s,v6.s[0]
7188 ++ ldp x9,x13,[x1],#48
7189 ++ umlal v23.2d,v11.2s,v3.s[0]
7190 ++ umlal v20.2d,v11.2s,v8.s[0]
7191 ++ umlal v21.2d,v11.2s,v0.s[0]
7192 ++#ifdef __AARCH64EB__
7193 ++ rev x8,x8
7194 ++ rev x12,x12
7195 ++ rev x9,x9
7196 ++ rev x13,x13
7197 ++#endif
7198 ++
7199 ++ add v10.2s,v10.2s,v25.2s
7200 ++ umlal v22.2d,v9.2s,v5.s[0]
7201 ++ umlal v23.2d,v9.2s,v7.s[0]
7202 ++ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
7203 ++ umlal v21.2d,v9.2s,v3.s[0]
7204 ++ and x5,x9,#0x03ffffff
7205 ++ umlal v19.2d,v9.2s,v0.s[0]
7206 ++ ubfx x6,x8,#26,#26
7207 ++ umlal v20.2d,v9.2s,v1.s[0]
7208 ++ ubfx x7,x9,#26,#26
7209 ++
7210 ++ add v12.2s,v12.2s,v27.2s
7211 ++ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
7212 ++ umlal v22.2d,v10.2s,v3.s[0]
7213 ++ extr x8,x12,x8,#52
7214 ++ umlal v23.2d,v10.2s,v5.s[0]
7215 ++ extr x9,x13,x9,#52
7216 ++ umlal v19.2d,v10.2s,v8.s[0]
7217 ++ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
7218 ++ umlal v21.2d,v10.2s,v1.s[0]
7219 ++ fmov d9,x4
7220 ++ umlal v20.2d,v10.2s,v0.s[0]
7221 ++ and x8,x8,#0x03ffffff
7222 ++
7223 ++ add v13.2s,v13.2s,v28.2s
7224 ++ and x9,x9,#0x03ffffff
7225 ++ umlal v22.2d,v12.2s,v0.s[0]
7226 ++ ubfx x10,x12,#14,#26
7227 ++ umlal v19.2d,v12.2s,v4.s[0]
7228 ++ ubfx x11,x13,#14,#26
7229 ++ umlal v23.2d,v12.2s,v1.s[0]
7230 ++ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
7231 ++ umlal v20.2d,v12.2s,v6.s[0]
7232 ++ fmov d10,x6
7233 ++ umlal v21.2d,v12.2s,v8.s[0]
7234 ++ add x12,x3,x12,lsr#40
7235 ++
7236 ++ umlal v22.2d,v13.2s,v8.s[0]
7237 ++ add x13,x3,x13,lsr#40
7238 ++ umlal v19.2d,v13.2s,v2.s[0]
7239 ++ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
7240 ++ umlal v23.2d,v13.2s,v0.s[0]
7241 ++ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
7242 ++ umlal v20.2d,v13.2s,v4.s[0]
7243 ++ fmov d11,x8
7244 ++ umlal v21.2d,v13.2s,v6.s[0]
7245 ++ fmov d12,x10
7246 ++ fmov d13,x12
7247 ++
7248 ++ /////////////////////////////////////////////////////////////////
7249 ++ // lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
7250 ++ // and P. Schwabe
7251 ++ //
7252 ++ // [see discussion in poly1305-armv4 module]
7253 ++
7254 ++ ushr v29.2d,v22.2d,#26
7255 ++ xtn v27.2s,v22.2d
7256 ++ ushr v30.2d,v19.2d,#26
7257 ++ and v19.16b,v19.16b,v31.16b
7258 ++ add v23.2d,v23.2d,v29.2d // h3 -> h4
7259 ++ bic v27.2s,#0xfc,lsl#24 // &=0x03ffffff
7260 ++ add v20.2d,v20.2d,v30.2d // h0 -> h1
7261 ++
7262 ++ ushr v29.2d,v23.2d,#26
7263 ++ xtn v28.2s,v23.2d
7264 ++ ushr v30.2d,v20.2d,#26
7265 ++ xtn v25.2s,v20.2d
7266 ++ bic v28.2s,#0xfc,lsl#24
7267 ++ add v21.2d,v21.2d,v30.2d // h1 -> h2
7268 ++
7269 ++ add v19.2d,v19.2d,v29.2d
7270 ++ shl v29.2d,v29.2d,#2
7271 ++ shrn v30.2s,v21.2d,#26
7272 ++ xtn v26.2s,v21.2d
7273 ++ add v19.2d,v19.2d,v29.2d // h4 -> h0
7274 ++ bic v25.2s,#0xfc,lsl#24
7275 ++ add v27.2s,v27.2s,v30.2s // h2 -> h3
7276 ++ bic v26.2s,#0xfc,lsl#24
7277 ++
7278 ++ shrn v29.2s,v19.2d,#26
7279 ++ xtn v24.2s,v19.2d
7280 ++ ushr v30.2s,v27.2s,#26
7281 ++ bic v27.2s,#0xfc,lsl#24
7282 ++ bic v24.2s,#0xfc,lsl#24
7283 ++ add v25.2s,v25.2s,v29.2s // h0 -> h1
7284 ++ add v28.2s,v28.2s,v30.2s // h3 -> h4
7285 ++
7286 ++ b.hi .Loop_neon
7287 ++
7288 ++.Lskip_loop:
7289 ++ dup v16.2d,v16.d[0]
7290 ++ add v11.2s,v11.2s,v26.2s
7291 ++
7292 ++ ////////////////////////////////////////////////////////////////
7293 ++ // multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
7294 ++
7295 ++ adds x2,x2,#32
7296 ++ b.ne .Long_tail
7297 ++
7298 ++ dup v16.2d,v11.d[0]
7299 ++ add v14.2s,v9.2s,v24.2s
7300 ++ add v17.2s,v12.2s,v27.2s
7301 ++ add v15.2s,v10.2s,v25.2s
7302 ++ add v18.2s,v13.2s,v28.2s
7303 ++
7304 ++.Long_tail:
7305 ++ dup v14.2d,v14.d[0]
7306 ++ umull2 v19.2d,v16.4s,v6.4s
7307 ++ umull2 v22.2d,v16.4s,v1.4s
7308 ++ umull2 v23.2d,v16.4s,v3.4s
7309 ++ umull2 v21.2d,v16.4s,v0.4s
7310 ++ umull2 v20.2d,v16.4s,v8.4s
7311 ++
7312 ++ dup v15.2d,v15.d[0]
7313 ++ umlal2 v19.2d,v14.4s,v0.4s
7314 ++ umlal2 v21.2d,v14.4s,v3.4s
7315 ++ umlal2 v22.2d,v14.4s,v5.4s
7316 ++ umlal2 v23.2d,v14.4s,v7.4s
7317 ++ umlal2 v20.2d,v14.4s,v1.4s
7318 ++
7319 ++ dup v17.2d,v17.d[0]
7320 ++ umlal2 v19.2d,v15.4s,v8.4s
7321 ++ umlal2 v22.2d,v15.4s,v3.4s
7322 ++ umlal2 v21.2d,v15.4s,v1.4s
7323 ++ umlal2 v23.2d,v15.4s,v5.4s
7324 ++ umlal2 v20.2d,v15.4s,v0.4s
7325 ++
7326 ++ dup v18.2d,v18.d[0]
7327 ++ umlal2 v22.2d,v17.4s,v0.4s
7328 ++ umlal2 v23.2d,v17.4s,v1.4s
7329 ++ umlal2 v19.2d,v17.4s,v4.4s
7330 ++ umlal2 v20.2d,v17.4s,v6.4s
7331 ++ umlal2 v21.2d,v17.4s,v8.4s
7332 ++
7333 ++ umlal2 v22.2d,v18.4s,v8.4s
7334 ++ umlal2 v19.2d,v18.4s,v2.4s
7335 ++ umlal2 v23.2d,v18.4s,v0.4s
7336 ++ umlal2 v20.2d,v18.4s,v4.4s
7337 ++ umlal2 v21.2d,v18.4s,v6.4s
7338 ++
7339 ++ b.eq .Lshort_tail
7340 ++
7341 ++ ////////////////////////////////////////////////////////////////
7342 ++ // (hash+inp[0:1])*r^4:r^3 and accumulate
7343 ++
7344 ++ add v9.2s,v9.2s,v24.2s
7345 ++ umlal v22.2d,v11.2s,v1.2s
7346 ++ umlal v19.2d,v11.2s,v6.2s
7347 ++ umlal v23.2d,v11.2s,v3.2s
7348 ++ umlal v20.2d,v11.2s,v8.2s
7349 ++ umlal v21.2d,v11.2s,v0.2s
7350 ++
7351 ++ add v10.2s,v10.2s,v25.2s
7352 ++ umlal v22.2d,v9.2s,v5.2s
7353 ++ umlal v19.2d,v9.2s,v0.2s
7354 ++ umlal v23.2d,v9.2s,v7.2s
7355 ++ umlal v20.2d,v9.2s,v1.2s
7356 ++ umlal v21.2d,v9.2s,v3.2s
7357 ++
7358 ++ add v12.2s,v12.2s,v27.2s
7359 ++ umlal v22.2d,v10.2s,v3.2s
7360 ++ umlal v19.2d,v10.2s,v8.2s
7361 ++ umlal v23.2d,v10.2s,v5.2s
7362 ++ umlal v20.2d,v10.2s,v0.2s
7363 ++ umlal v21.2d,v10.2s,v1.2s
7364 ++
7365 ++ add v13.2s,v13.2s,v28.2s
7366 ++ umlal v22.2d,v12.2s,v0.2s
7367 ++ umlal v19.2d,v12.2s,v4.2s
7368 ++ umlal v23.2d,v12.2s,v1.2s
7369 ++ umlal v20.2d,v12.2s,v6.2s
7370 ++ umlal v21.2d,v12.2s,v8.2s
7371 ++
7372 ++ umlal v22.2d,v13.2s,v8.2s
7373 ++ umlal v19.2d,v13.2s,v2.2s
7374 ++ umlal v23.2d,v13.2s,v0.2s
7375 ++ umlal v20.2d,v13.2s,v4.2s
7376 ++ umlal v21.2d,v13.2s,v6.2s
7377 ++
7378 ++.Lshort_tail:
7379 ++ ////////////////////////////////////////////////////////////////
7380 ++ // horizontal add
7381 ++
7382 ++ addp v22.2d,v22.2d,v22.2d
7383 ++ ldp d8,d9,[sp,#16] // meet ABI requirements
7384 ++ addp v19.2d,v19.2d,v19.2d
7385 ++ ldp d10,d11,[sp,#32]
7386 ++ addp v23.2d,v23.2d,v23.2d
7387 ++ ldp d12,d13,[sp,#48]
7388 ++ addp v20.2d,v20.2d,v20.2d
7389 ++ ldp d14,d15,[sp,#64]
7390 ++ addp v21.2d,v21.2d,v21.2d
7391 ++ ldr x30,[sp,#8]
7392 ++ .inst 0xd50323bf // autiasp
7393 ++
7394 ++ ////////////////////////////////////////////////////////////////
7395 ++ // lazy reduction, but without narrowing
7396 ++
7397 ++ ushr v29.2d,v22.2d,#26
7398 ++ and v22.16b,v22.16b,v31.16b
7399 ++ ushr v30.2d,v19.2d,#26
7400 ++ and v19.16b,v19.16b,v31.16b
7401 ++
7402 ++ add v23.2d,v23.2d,v29.2d // h3 -> h4
7403 ++ add v20.2d,v20.2d,v30.2d // h0 -> h1
7404 ++
7405 ++ ushr v29.2d,v23.2d,#26
7406 ++ and v23.16b,v23.16b,v31.16b
7407 ++ ushr v30.2d,v20.2d,#26
7408 ++ and v20.16b,v20.16b,v31.16b
7409 ++ add v21.2d,v21.2d,v30.2d // h1 -> h2
7410 ++
7411 ++ add v19.2d,v19.2d,v29.2d
7412 ++ shl v29.2d,v29.2d,#2
7413 ++ ushr v30.2d,v21.2d,#26
7414 ++ and v21.16b,v21.16b,v31.16b
7415 ++ add v19.2d,v19.2d,v29.2d // h4 -> h0
7416 ++ add v22.2d,v22.2d,v30.2d // h2 -> h3
7417 ++
7418 ++ ushr v29.2d,v19.2d,#26
7419 ++ and v19.16b,v19.16b,v31.16b
7420 ++ ushr v30.2d,v22.2d,#26
7421 ++ and v22.16b,v22.16b,v31.16b
7422 ++ add v20.2d,v20.2d,v29.2d // h0 -> h1
7423 ++ add v23.2d,v23.2d,v30.2d // h3 -> h4
7424 ++
7425 ++ ////////////////////////////////////////////////////////////////
7426 ++ // write the result, can be partially reduced
7427 ++
7428 ++ st4 {v19.s,v20.s,v21.s,v22.s}[0],[x0],#16
7429 ++ mov x4,#1
7430 ++ st1 {v23.s}[0],[x0]
7431 ++ str x4,[x0,#8] // set is_base2_26
7432 ++
7433 ++ ldr x29,[sp],#80
7434 ++ ret
7435 ++.size poly1305_blocks_neon,.-poly1305_blocks_neon
7436 ++
7437 ++.align 5
7438 ++.Lzeros:
7439 ++.long 0,0,0,0,0,0,0,0
7440 ++.asciz "Poly1305 for ARMv8, CRYPTOGAMS by @dot-asm"
7441 ++.align 2
7442 ++#if !defined(__KERNEL__) && !defined(_WIN64)
7443 ++.comm OPENSSL_armcap_P,4,4
7444 ++.hidden OPENSSL_armcap_P
7445 ++#endif
7446 +diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c
7447 +new file mode 100644
7448 +index 000000000000..dd843d0ee83a
7449 +--- /dev/null
7450 ++++ b/arch/arm64/crypto/poly1305-glue.c
7451 +@@ -0,0 +1,237 @@
7452 ++// SPDX-License-Identifier: GPL-2.0
7453 ++/*
7454 ++ * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64
7455 ++ *
7456 ++ * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@××××××.org>
7457 ++ */
7458 ++
7459 ++#include <asm/hwcap.h>
7460 ++#include <asm/neon.h>
7461 ++#include <asm/simd.h>
7462 ++#include <asm/unaligned.h>
7463 ++#include <crypto/algapi.h>
7464 ++#include <crypto/internal/hash.h>
7465 ++#include <crypto/internal/poly1305.h>
7466 ++#include <crypto/internal/simd.h>
7467 ++#include <linux/cpufeature.h>
7468 ++#include <linux/crypto.h>
7469 ++#include <linux/jump_label.h>
7470 ++#include <linux/module.h>
7471 ++
7472 ++asmlinkage void poly1305_init_arm64(void *state, const u8 *key);
7473 ++asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit);
7474 ++asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
7475 ++asmlinkage void poly1305_emit(void *state, __le32 *digest, const u32 *nonce);
7476 ++
7477 ++static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
7478 ++
7479 ++void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
7480 ++{
7481 ++ poly1305_init_arm64(&dctx->h, key);
7482 ++ dctx->s[0] = get_unaligned_le32(key + 16);
7483 ++ dctx->s[1] = get_unaligned_le32(key + 20);
7484 ++ dctx->s[2] = get_unaligned_le32(key + 24);
7485 ++ dctx->s[3] = get_unaligned_le32(key + 28);
7486 ++ dctx->buflen = 0;
7487 ++}
7488 ++EXPORT_SYMBOL(poly1305_init_arch);
7489 ++
7490 ++static int neon_poly1305_init(struct shash_desc *desc)
7491 ++{
7492 ++ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
7493 ++
7494 ++ dctx->buflen = 0;
7495 ++ dctx->rset = 0;
7496 ++ dctx->sset = false;
7497 ++
7498 ++ return 0;
7499 ++}
7500 ++
7501 ++static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
7502 ++ u32 len, u32 hibit, bool do_neon)
7503 ++{
7504 ++ if (unlikely(!dctx->sset)) {
7505 ++ if (!dctx->rset) {
7506 ++ poly1305_init_arch(dctx, src);
7507 ++ src += POLY1305_BLOCK_SIZE;
7508 ++ len -= POLY1305_BLOCK_SIZE;
7509 ++ dctx->rset = 1;
7510 ++ }
7511 ++ if (len >= POLY1305_BLOCK_SIZE) {
7512 ++ dctx->s[0] = get_unaligned_le32(src + 0);
7513 ++ dctx->s[1] = get_unaligned_le32(src + 4);
7514 ++ dctx->s[2] = get_unaligned_le32(src + 8);
7515 ++ dctx->s[3] = get_unaligned_le32(src + 12);
7516 ++ src += POLY1305_BLOCK_SIZE;
7517 ++ len -= POLY1305_BLOCK_SIZE;
7518 ++ dctx->sset = true;
7519 ++ }
7520 ++ if (len < POLY1305_BLOCK_SIZE)
7521 ++ return;
7522 ++ }
7523 ++
7524 ++ len &= ~(POLY1305_BLOCK_SIZE - 1);
7525 ++
7526 ++ if (static_branch_likely(&have_neon) && likely(do_neon))
7527 ++ poly1305_blocks_neon(&dctx->h, src, len, hibit);
7528 ++ else
7529 ++ poly1305_blocks(&dctx->h, src, len, hibit);
7530 ++}
7531 ++
7532 ++static void neon_poly1305_do_update(struct poly1305_desc_ctx *dctx,
7533 ++ const u8 *src, u32 len, bool do_neon)
7534 ++{
7535 ++ if (unlikely(dctx->buflen)) {
7536 ++ u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
7537 ++
7538 ++ memcpy(dctx->buf + dctx->buflen, src, bytes);
7539 ++ src += bytes;
7540 ++ len -= bytes;
7541 ++ dctx->buflen += bytes;
7542 ++
7543 ++ if (dctx->buflen == POLY1305_BLOCK_SIZE) {
7544 ++ neon_poly1305_blocks(dctx, dctx->buf,
7545 ++ POLY1305_BLOCK_SIZE, 1, false);
7546 ++ dctx->buflen = 0;
7547 ++ }
7548 ++ }
7549 ++
7550 ++ if (likely(len >= POLY1305_BLOCK_SIZE)) {
7551 ++ neon_poly1305_blocks(dctx, src, len, 1, do_neon);
7552 ++ src += round_down(len, POLY1305_BLOCK_SIZE);
7553 ++ len %= POLY1305_BLOCK_SIZE;
7554 ++ }
7555 ++
7556 ++ if (unlikely(len)) {
7557 ++ dctx->buflen = len;
7558 ++ memcpy(dctx->buf, src, len);
7559 ++ }
7560 ++}
7561 ++
7562 ++static int neon_poly1305_update(struct shash_desc *desc,
7563 ++ const u8 *src, unsigned int srclen)
7564 ++{
7565 ++ bool do_neon = crypto_simd_usable() && srclen > 128;
7566 ++ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
7567 ++
7568 ++ if (static_branch_likely(&have_neon) && do_neon)
7569 ++ kernel_neon_begin();
7570 ++ neon_poly1305_do_update(dctx, src, srclen, do_neon);
7571 ++ if (static_branch_likely(&have_neon) && do_neon)
7572 ++ kernel_neon_end();
7573 ++ return 0;
7574 ++}
7575 ++
7576 ++void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
7577 ++ unsigned int nbytes)
7578 ++{
7579 ++ if (unlikely(dctx->buflen)) {
7580 ++ u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
7581 ++
7582 ++ memcpy(dctx->buf + dctx->buflen, src, bytes);
7583 ++ src += bytes;
7584 ++ nbytes -= bytes;
7585 ++ dctx->buflen += bytes;
7586 ++
7587 ++ if (dctx->buflen == POLY1305_BLOCK_SIZE) {
7588 ++ poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1);
7589 ++ dctx->buflen = 0;
7590 ++ }
7591 ++ }
7592 ++
7593 ++ if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
7594 ++ unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
7595 ++
7596 ++ if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
7597 ++ kernel_neon_begin();
7598 ++ poly1305_blocks_neon(&dctx->h, src, len, 1);
7599 ++ kernel_neon_end();
7600 ++ } else {
7601 ++ poly1305_blocks(&dctx->h, src, len, 1);
7602 ++ }
7603 ++ src += len;
7604 ++ nbytes %= POLY1305_BLOCK_SIZE;
7605 ++ }
7606 ++
7607 ++ if (unlikely(nbytes)) {
7608 ++ dctx->buflen = nbytes;
7609 ++ memcpy(dctx->buf, src, nbytes);
7610 ++ }
7611 ++}
7612 ++EXPORT_SYMBOL(poly1305_update_arch);
7613 ++
7614 ++void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
7615 ++{
7616 ++ __le32 digest[4];
7617 ++ u64 f = 0;
7618 ++
7619 ++ if (unlikely(dctx->buflen)) {
7620 ++ dctx->buf[dctx->buflen++] = 1;
7621 ++ memset(dctx->buf + dctx->buflen, 0,
7622 ++ POLY1305_BLOCK_SIZE - dctx->buflen);
7623 ++ poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
7624 ++ }
7625 ++
7626 ++ poly1305_emit(&dctx->h, digest, dctx->s);
7627 ++
7628 ++ /* mac = (h + s) % (2^128) */
7629 ++ f = (f >> 32) + le32_to_cpu(digest[0]);
7630 ++ put_unaligned_le32(f, dst);
7631 ++ f = (f >> 32) + le32_to_cpu(digest[1]);
7632 ++ put_unaligned_le32(f, dst + 4);
7633 ++ f = (f >> 32) + le32_to_cpu(digest[2]);
7634 ++ put_unaligned_le32(f, dst + 8);
7635 ++ f = (f >> 32) + le32_to_cpu(digest[3]);
7636 ++ put_unaligned_le32(f, dst + 12);
7637 ++
7638 ++ *dctx = (struct poly1305_desc_ctx){};
7639 ++}
7640 ++EXPORT_SYMBOL(poly1305_final_arch);
7641 ++
7642 ++static int neon_poly1305_final(struct shash_desc *desc, u8 *dst)
7643 ++{
7644 ++ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
7645 ++
7646 ++ if (unlikely(!dctx->sset))
7647 ++ return -ENOKEY;
7648 ++
7649 ++ poly1305_final_arch(dctx, dst);
7650 ++ return 0;
7651 ++}
7652 ++
7653 ++static struct shash_alg neon_poly1305_alg = {
7654 ++ .init = neon_poly1305_init,
7655 ++ .update = neon_poly1305_update,
7656 ++ .final = neon_poly1305_final,
7657 ++ .digestsize = POLY1305_DIGEST_SIZE,
7658 ++ .descsize = sizeof(struct poly1305_desc_ctx),
7659 ++
7660 ++ .base.cra_name = "poly1305",
7661 ++ .base.cra_driver_name = "poly1305-neon",
7662 ++ .base.cra_priority = 200,
7663 ++ .base.cra_blocksize = POLY1305_BLOCK_SIZE,
7664 ++ .base.cra_module = THIS_MODULE,
7665 ++};
7666 ++
7667 ++static int __init neon_poly1305_mod_init(void)
7668 ++{
7669 ++ if (!cpu_have_named_feature(ASIMD))
7670 ++ return 0;
7671 ++
7672 ++ static_branch_enable(&have_neon);
7673 ++
7674 ++ return crypto_register_shash(&neon_poly1305_alg);
7675 ++}
7676 ++
7677 ++static void __exit neon_poly1305_mod_exit(void)
7678 ++{
7679 ++ if (cpu_have_named_feature(ASIMD))
7680 ++ crypto_unregister_shash(&neon_poly1305_alg);
7681 ++}
7682 ++
7683 ++module_init(neon_poly1305_mod_init);
7684 ++module_exit(neon_poly1305_mod_exit);
7685 ++
7686 ++MODULE_LICENSE("GPL v2");
7687 ++MODULE_ALIAS_CRYPTO("poly1305");
7688 ++MODULE_ALIAS_CRYPTO("poly1305-neon");
7689 +diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
7690 +index 181754615f73..9923445e8225 100644
7691 +--- a/lib/crypto/Kconfig
7692 ++++ b/lib/crypto/Kconfig
7693 +@@ -40,6 +40,7 @@ config CRYPTO_LIB_DES
7694 + config CRYPTO_LIB_POLY1305_RSIZE
7695 + int
7696 + default 4 if X86_64
7697 ++ default 9 if ARM64
7698 + default 1
7699 +
7700 + config CRYPTO_ARCH_HAVE_LIB_POLY1305
7701 +--
7702 +cgit v1.2.3-4-ga26e
7703 +
7704 +
7705 +From 005319eb6225817831b578c7e9a665001b34906d Mon Sep 17 00:00:00 2001
7706 +From: Ard Biesheuvel <ardb@××××××.org>
7707 +Date: Fri, 8 Nov 2019 13:22:25 +0100
7708 +Subject: crypto: arm/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON
7709 + implementation
7710 +
7711 +commit a6b803b3ddc793d6db0c16f12fc12d30d20fa9cc upstream.
7712 +
7713 +This is a straight import of the OpenSSL/CRYPTOGAMS Poly1305 implementation
7714 +for NEON authored by Andy Polyakov, and contributed by him to the OpenSSL
7715 +project. The file 'poly1305-armv4.pl' is taken straight from this upstream
7716 +GitHub repository [0] at commit ec55a08dc0244ce570c4fc7cade330c60798952f,
7717 +and already contains all the changes required to build it as part of a
7718 +Linux kernel module.
7719 +
7720 +[0] https://github.com/dot-asm/cryptogams
7721 +
7722 +Co-developed-by: Andy Polyakov <appro@××××××××××.org>
7723 +Signed-off-by: Andy Polyakov <appro@××××××××××.org>
7724 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
7725 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
7726 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
7727 +---
7728 + arch/arm/crypto/Kconfig | 5 +
7729 + arch/arm/crypto/Makefile | 12 +-
7730 + arch/arm/crypto/poly1305-armv4.pl | 1236 +++++++++++++++++++++++++++++++
7731 + arch/arm/crypto/poly1305-core.S_shipped | 1158 +++++++++++++++++++++++++++++
7732 + arch/arm/crypto/poly1305-glue.c | 276 +++++++
7733 + lib/crypto/Kconfig | 2 +-
7734 + 6 files changed, 2687 insertions(+), 2 deletions(-)
7735 + create mode 100644 arch/arm/crypto/poly1305-armv4.pl
7736 + create mode 100644 arch/arm/crypto/poly1305-core.S_shipped
7737 + create mode 100644 arch/arm/crypto/poly1305-glue.c
7738 +
7739 +diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
7740 +index b25ffec04417..2e8a9289bded 100644
7741 +--- a/arch/arm/crypto/Kconfig
7742 ++++ b/arch/arm/crypto/Kconfig
7743 +@@ -131,6 +131,11 @@ config CRYPTO_CHACHA20_NEON
7744 + select CRYPTO_BLKCIPHER
7745 + select CRYPTO_ARCH_HAVE_LIB_CHACHA
7746 +
7747 ++config CRYPTO_POLY1305_ARM
7748 ++ tristate "Accelerated scalar and SIMD Poly1305 hash implementations"
7749 ++ select CRYPTO_HASH
7750 ++ select CRYPTO_ARCH_HAVE_LIB_POLY1305
7751 ++
7752 + config CRYPTO_NHPOLY1305_NEON
7753 + tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)"
7754 + depends on KERNEL_MODE_NEON
7755 +diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
7756 +index 6b97dffcf90f..4f6a8a81dabc 100644
7757 +--- a/arch/arm/crypto/Makefile
7758 ++++ b/arch/arm/crypto/Makefile
7759 +@@ -10,6 +10,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
7760 + obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
7761 + obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
7762 + obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
7763 ++obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o
7764 + obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
7765 +
7766 + ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
7767 +@@ -55,12 +56,16 @@ crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
7768 + crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
7769 + chacha-neon-y := chacha-scalar-core.o chacha-glue.o
7770 + chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o
7771 ++poly1305-arm-y := poly1305-core.o poly1305-glue.o
7772 + nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
7773 +
7774 + ifdef REGENERATE_ARM_CRYPTO
7775 + quiet_cmd_perl = PERL $@
7776 + cmd_perl = $(PERL) $(<) > $(@)
7777 +
7778 ++$(src)/poly1305-core.S_shipped: $(src)/poly1305-armv4.pl
7779 ++ $(call cmd,perl)
7780 ++
7781 + $(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
7782 + $(call cmd,perl)
7783 +
7784 +@@ -68,4 +73,9 @@ $(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl
7785 + $(call cmd,perl)
7786 + endif
7787 +
7788 +-clean-files += sha256-core.S sha512-core.S
7789 ++clean-files += poly1305-core.S sha256-core.S sha512-core.S
7790 ++
7791 ++# massage the perlasm code a bit so we only get the NEON routine if we need it
7792 ++poly1305-aflags-$(CONFIG_CPU_V7) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=5
7793 ++poly1305-aflags-$(CONFIG_KERNEL_MODE_NEON) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=7
7794 ++AFLAGS_poly1305-core.o += $(poly1305-aflags-y)
7795 +diff --git a/arch/arm/crypto/poly1305-armv4.pl b/arch/arm/crypto/poly1305-armv4.pl
7796 +new file mode 100644
7797 +index 000000000000..6d79498d3115
7798 +--- /dev/null
7799 ++++ b/arch/arm/crypto/poly1305-armv4.pl
7800 +@@ -0,0 +1,1236 @@
7801 ++#!/usr/bin/env perl
7802 ++# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause
7803 ++#
7804 ++# ====================================================================
7805 ++# Written by Andy Polyakov, @dot-asm, initially for the OpenSSL
7806 ++# project.
7807 ++# ====================================================================
7808 ++#
7809 ++# IALU(*)/gcc-4.4 NEON
7810 ++#
7811 ++# ARM11xx(ARMv6) 7.78/+100% -
7812 ++# Cortex-A5 6.35/+130% 3.00
7813 ++# Cortex-A8 6.25/+115% 2.36
7814 ++# Cortex-A9 5.10/+95% 2.55
7815 ++# Cortex-A15 3.85/+85% 1.25(**)
7816 ++# Snapdragon S4 5.70/+100% 1.48(**)
7817 ++#
7818 ++# (*) this is for -march=armv6, i.e. with bunch of ldrb loading data;
7819 ++# (**) these are trade-off results, they can be improved by ~8% but at
7820 ++# the cost of 15/12% regression on Cortex-A5/A7, it's even possible
7821 ++# to improve Cortex-A9 result, but then A5/A7 loose more than 20%;
7822 ++
7823 ++$flavour = shift;
7824 ++if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
7825 ++else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
7826 ++
7827 ++if ($flavour && $flavour ne "void") {
7828 ++ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
7829 ++ ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
7830 ++ ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
7831 ++ die "can't locate arm-xlate.pl";
7832 ++
7833 ++ open STDOUT,"| \"$^X\" $xlate $flavour $output";
7834 ++} else {
7835 ++ open STDOUT,">$output";
7836 ++}
7837 ++
7838 ++($ctx,$inp,$len,$padbit)=map("r$_",(0..3));
7839 ++
7840 ++$code.=<<___;
7841 ++#ifndef __KERNEL__
7842 ++# include "arm_arch.h"
7843 ++#else
7844 ++# define __ARM_ARCH__ __LINUX_ARM_ARCH__
7845 ++# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__
7846 ++# define poly1305_init poly1305_init_arm
7847 ++# define poly1305_blocks poly1305_blocks_arm
7848 ++# define poly1305_emit poly1305_emit_arm
7849 ++.globl poly1305_blocks_neon
7850 ++#endif
7851 ++
7852 ++#if defined(__thumb2__)
7853 ++.syntax unified
7854 ++.thumb
7855 ++#else
7856 ++.code 32
7857 ++#endif
7858 ++
7859 ++.text
7860 ++
7861 ++.globl poly1305_emit
7862 ++.globl poly1305_blocks
7863 ++.globl poly1305_init
7864 ++.type poly1305_init,%function
7865 ++.align 5
7866 ++poly1305_init:
7867 ++.Lpoly1305_init:
7868 ++ stmdb sp!,{r4-r11}
7869 ++
7870 ++ eor r3,r3,r3
7871 ++ cmp $inp,#0
7872 ++ str r3,[$ctx,#0] @ zero hash value
7873 ++ str r3,[$ctx,#4]
7874 ++ str r3,[$ctx,#8]
7875 ++ str r3,[$ctx,#12]
7876 ++ str r3,[$ctx,#16]
7877 ++ str r3,[$ctx,#36] @ clear is_base2_26
7878 ++ add $ctx,$ctx,#20
7879 ++
7880 ++#ifdef __thumb2__
7881 ++ it eq
7882 ++#endif
7883 ++ moveq r0,#0
7884 ++ beq .Lno_key
7885 ++
7886 ++#if __ARM_MAX_ARCH__>=7
7887 ++ mov r3,#-1
7888 ++ str r3,[$ctx,#28] @ impossible key power value
7889 ++# ifndef __KERNEL__
7890 ++ adr r11,.Lpoly1305_init
7891 ++ ldr r12,.LOPENSSL_armcap
7892 ++# endif
7893 ++#endif
7894 ++ ldrb r4,[$inp,#0]
7895 ++ mov r10,#0x0fffffff
7896 ++ ldrb r5,[$inp,#1]
7897 ++ and r3,r10,#-4 @ 0x0ffffffc
7898 ++ ldrb r6,[$inp,#2]
7899 ++ ldrb r7,[$inp,#3]
7900 ++ orr r4,r4,r5,lsl#8
7901 ++ ldrb r5,[$inp,#4]
7902 ++ orr r4,r4,r6,lsl#16
7903 ++ ldrb r6,[$inp,#5]
7904 ++ orr r4,r4,r7,lsl#24
7905 ++ ldrb r7,[$inp,#6]
7906 ++ and r4,r4,r10
7907 ++
7908 ++#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
7909 ++# if !defined(_WIN32)
7910 ++ ldr r12,[r11,r12] @ OPENSSL_armcap_P
7911 ++# endif
7912 ++# if defined(__APPLE__) || defined(_WIN32)
7913 ++ ldr r12,[r12]
7914 ++# endif
7915 ++#endif
7916 ++ ldrb r8,[$inp,#7]
7917 ++ orr r5,r5,r6,lsl#8
7918 ++ ldrb r6,[$inp,#8]
7919 ++ orr r5,r5,r7,lsl#16
7920 ++ ldrb r7,[$inp,#9]
7921 ++ orr r5,r5,r8,lsl#24
7922 ++ ldrb r8,[$inp,#10]
7923 ++ and r5,r5,r3
7924 ++
7925 ++#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
7926 ++ tst r12,#ARMV7_NEON @ check for NEON
7927 ++# ifdef __thumb2__
7928 ++ adr r9,.Lpoly1305_blocks_neon
7929 ++ adr r11,.Lpoly1305_blocks
7930 ++ it ne
7931 ++ movne r11,r9
7932 ++ adr r12,.Lpoly1305_emit
7933 ++ orr r11,r11,#1 @ thumb-ify addresses
7934 ++ orr r12,r12,#1
7935 ++# else
7936 ++ add r12,r11,#(.Lpoly1305_emit-.Lpoly1305_init)
7937 ++ ite eq
7938 ++ addeq r11,r11,#(.Lpoly1305_blocks-.Lpoly1305_init)
7939 ++ addne r11,r11,#(.Lpoly1305_blocks_neon-.Lpoly1305_init)
7940 ++# endif
7941 ++#endif
7942 ++ ldrb r9,[$inp,#11]
7943 ++ orr r6,r6,r7,lsl#8
7944 ++ ldrb r7,[$inp,#12]
7945 ++ orr r6,r6,r8,lsl#16
7946 ++ ldrb r8,[$inp,#13]
7947 ++ orr r6,r6,r9,lsl#24
7948 ++ ldrb r9,[$inp,#14]
7949 ++ and r6,r6,r3
7950 ++
7951 ++ ldrb r10,[$inp,#15]
7952 ++ orr r7,r7,r8,lsl#8
7953 ++ str r4,[$ctx,#0]
7954 ++ orr r7,r7,r9,lsl#16
7955 ++ str r5,[$ctx,#4]
7956 ++ orr r7,r7,r10,lsl#24
7957 ++ str r6,[$ctx,#8]
7958 ++ and r7,r7,r3
7959 ++ str r7,[$ctx,#12]
7960 ++#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
7961 ++ stmia r2,{r11,r12} @ fill functions table
7962 ++ mov r0,#1
7963 ++#else
7964 ++ mov r0,#0
7965 ++#endif
7966 ++.Lno_key:
7967 ++ ldmia sp!,{r4-r11}
7968 ++#if __ARM_ARCH__>=5
7969 ++ ret @ bx lr
7970 ++#else
7971 ++ tst lr,#1
7972 ++ moveq pc,lr @ be binary compatible with V4, yet
7973 ++ bx lr @ interoperable with Thumb ISA:-)
7974 ++#endif
7975 ++.size poly1305_init,.-poly1305_init
7976 ++___
7977 ++{
7978 ++my ($h0,$h1,$h2,$h3,$h4,$r0,$r1,$r2,$r3)=map("r$_",(4..12));
7979 ++my ($s1,$s2,$s3)=($r1,$r2,$r3);
7980 ++
7981 ++$code.=<<___;
7982 ++.type poly1305_blocks,%function
7983 ++.align 5
7984 ++poly1305_blocks:
7985 ++.Lpoly1305_blocks:
7986 ++ stmdb sp!,{r3-r11,lr}
7987 ++
7988 ++ ands $len,$len,#-16
7989 ++ beq .Lno_data
7990 ++
7991 ++ add $len,$len,$inp @ end pointer
7992 ++ sub sp,sp,#32
7993 ++
7994 ++#if __ARM_ARCH__<7
7995 ++ ldmia $ctx,{$h0-$r3} @ load context
7996 ++ add $ctx,$ctx,#20
7997 ++ str $len,[sp,#16] @ offload stuff
7998 ++ str $ctx,[sp,#12]
7999 ++#else
8000 ++ ldr lr,[$ctx,#36] @ is_base2_26
8001 ++ ldmia $ctx!,{$h0-$h4} @ load hash value
8002 ++ str $len,[sp,#16] @ offload stuff
8003 ++ str $ctx,[sp,#12]
8004 ++
8005 ++ adds $r0,$h0,$h1,lsl#26 @ base 2^26 -> base 2^32
8006 ++ mov $r1,$h1,lsr#6
8007 ++ adcs $r1,$r1,$h2,lsl#20
8008 ++ mov $r2,$h2,lsr#12
8009 ++ adcs $r2,$r2,$h3,lsl#14
8010 ++ mov $r3,$h3,lsr#18
8011 ++ adcs $r3,$r3,$h4,lsl#8
8012 ++ mov $len,#0
8013 ++ teq lr,#0
8014 ++ str $len,[$ctx,#16] @ clear is_base2_26
8015 ++ adc $len,$len,$h4,lsr#24
8016 ++
8017 ++ itttt ne
8018 ++ movne $h0,$r0 @ choose between radixes
8019 ++ movne $h1,$r1
8020 ++ movne $h2,$r2
8021 ++ movne $h3,$r3
8022 ++ ldmia $ctx,{$r0-$r3} @ load key
8023 ++ it ne
8024 ++ movne $h4,$len
8025 ++#endif
8026 ++
8027 ++ mov lr,$inp
8028 ++ cmp $padbit,#0
8029 ++ str $r1,[sp,#20]
8030 ++ str $r2,[sp,#24]
8031 ++ str $r3,[sp,#28]
8032 ++ b .Loop
8033 ++
8034 ++.align 4
8035 ++.Loop:
8036 ++#if __ARM_ARCH__<7
8037 ++ ldrb r0,[lr],#16 @ load input
8038 ++# ifdef __thumb2__
8039 ++ it hi
8040 ++# endif
8041 ++ addhi $h4,$h4,#1 @ 1<<128
8042 ++ ldrb r1,[lr,#-15]
8043 ++ ldrb r2,[lr,#-14]
8044 ++ ldrb r3,[lr,#-13]
8045 ++ orr r1,r0,r1,lsl#8
8046 ++ ldrb r0,[lr,#-12]
8047 ++ orr r2,r1,r2,lsl#16
8048 ++ ldrb r1,[lr,#-11]
8049 ++ orr r3,r2,r3,lsl#24
8050 ++ ldrb r2,[lr,#-10]
8051 ++ adds $h0,$h0,r3 @ accumulate input
8052 ++
8053 ++ ldrb r3,[lr,#-9]
8054 ++ orr r1,r0,r1,lsl#8
8055 ++ ldrb r0,[lr,#-8]
8056 ++ orr r2,r1,r2,lsl#16
8057 ++ ldrb r1,[lr,#-7]
8058 ++ orr r3,r2,r3,lsl#24
8059 ++ ldrb r2,[lr,#-6]
8060 ++ adcs $h1,$h1,r3
8061 ++
8062 ++ ldrb r3,[lr,#-5]
8063 ++ orr r1,r0,r1,lsl#8
8064 ++ ldrb r0,[lr,#-4]
8065 ++ orr r2,r1,r2,lsl#16
8066 ++ ldrb r1,[lr,#-3]
8067 ++ orr r3,r2,r3,lsl#24
8068 ++ ldrb r2,[lr,#-2]
8069 ++ adcs $h2,$h2,r3
8070 ++
8071 ++ ldrb r3,[lr,#-1]
8072 ++ orr r1,r0,r1,lsl#8
8073 ++ str lr,[sp,#8] @ offload input pointer
8074 ++ orr r2,r1,r2,lsl#16
8075 ++ add $s1,$r1,$r1,lsr#2
8076 ++ orr r3,r2,r3,lsl#24
8077 ++#else
8078 ++ ldr r0,[lr],#16 @ load input
8079 ++ it hi
8080 ++ addhi $h4,$h4,#1 @ padbit
8081 ++ ldr r1,[lr,#-12]
8082 ++ ldr r2,[lr,#-8]
8083 ++ ldr r3,[lr,#-4]
8084 ++# ifdef __ARMEB__
8085 ++ rev r0,r0
8086 ++ rev r1,r1
8087 ++ rev r2,r2
8088 ++ rev r3,r3
8089 ++# endif
8090 ++ adds $h0,$h0,r0 @ accumulate input
8091 ++ str lr,[sp,#8] @ offload input pointer
8092 ++ adcs $h1,$h1,r1
8093 ++ add $s1,$r1,$r1,lsr#2
8094 ++ adcs $h2,$h2,r2
8095 ++#endif
8096 ++ add $s2,$r2,$r2,lsr#2
8097 ++ adcs $h3,$h3,r3
8098 ++ add $s3,$r3,$r3,lsr#2
8099 ++
8100 ++ umull r2,r3,$h1,$r0
8101 ++ adc $h4,$h4,#0
8102 ++ umull r0,r1,$h0,$r0
8103 ++ umlal r2,r3,$h4,$s1
8104 ++ umlal r0,r1,$h3,$s1
8105 ++ ldr $r1,[sp,#20] @ reload $r1
8106 ++ umlal r2,r3,$h2,$s3
8107 ++ umlal r0,r1,$h1,$s3
8108 ++ umlal r2,r3,$h3,$s2
8109 ++ umlal r0,r1,$h2,$s2
8110 ++ umlal r2,r3,$h0,$r1
8111 ++ str r0,[sp,#0] @ future $h0
8112 ++ mul r0,$s2,$h4
8113 ++ ldr $r2,[sp,#24] @ reload $r2
8114 ++ adds r2,r2,r1 @ d1+=d0>>32
8115 ++ eor r1,r1,r1
8116 ++ adc lr,r3,#0 @ future $h2
8117 ++ str r2,[sp,#4] @ future $h1
8118 ++
8119 ++ mul r2,$s3,$h4
8120 ++ eor r3,r3,r3
8121 ++ umlal r0,r1,$h3,$s3
8122 ++ ldr $r3,[sp,#28] @ reload $r3
8123 ++ umlal r2,r3,$h3,$r0
8124 ++ umlal r0,r1,$h2,$r0
8125 ++ umlal r2,r3,$h2,$r1
8126 ++ umlal r0,r1,$h1,$r1
8127 ++ umlal r2,r3,$h1,$r2
8128 ++ umlal r0,r1,$h0,$r2
8129 ++ umlal r2,r3,$h0,$r3
8130 ++ ldr $h0,[sp,#0]
8131 ++ mul $h4,$r0,$h4
8132 ++ ldr $h1,[sp,#4]
8133 ++
8134 ++ adds $h2,lr,r0 @ d2+=d1>>32
8135 ++ ldr lr,[sp,#8] @ reload input pointer
8136 ++ adc r1,r1,#0
8137 ++ adds $h3,r2,r1 @ d3+=d2>>32
8138 ++ ldr r0,[sp,#16] @ reload end pointer
8139 ++ adc r3,r3,#0
8140 ++ add $h4,$h4,r3 @ h4+=d3>>32
8141 ++
8142 ++ and r1,$h4,#-4
8143 ++ and $h4,$h4,#3
8144 ++ add r1,r1,r1,lsr#2 @ *=5
8145 ++ adds $h0,$h0,r1
8146 ++ adcs $h1,$h1,#0
8147 ++ adcs $h2,$h2,#0
8148 ++ adcs $h3,$h3,#0
8149 ++ adc $h4,$h4,#0
8150 ++
8151 ++ cmp r0,lr @ done yet?
8152 ++ bhi .Loop
8153 ++
8154 ++ ldr $ctx,[sp,#12]
8155 ++ add sp,sp,#32
8156 ++ stmdb $ctx,{$h0-$h4} @ store the result
8157 ++
8158 ++.Lno_data:
8159 ++#if __ARM_ARCH__>=5
8160 ++ ldmia sp!,{r3-r11,pc}
8161 ++#else
8162 ++ ldmia sp!,{r3-r11,lr}
8163 ++ tst lr,#1
8164 ++ moveq pc,lr @ be binary compatible with V4, yet
8165 ++ bx lr @ interoperable with Thumb ISA:-)
8166 ++#endif
8167 ++.size poly1305_blocks,.-poly1305_blocks
8168 ++___
8169 ++}
8170 ++{
8171 ++my ($ctx,$mac,$nonce)=map("r$_",(0..2));
8172 ++my ($h0,$h1,$h2,$h3,$h4,$g0,$g1,$g2,$g3)=map("r$_",(3..11));
8173 ++my $g4=$ctx;
8174 ++
8175 ++$code.=<<___;
8176 ++.type poly1305_emit,%function
8177 ++.align 5
8178 ++poly1305_emit:
8179 ++.Lpoly1305_emit:
8180 ++ stmdb sp!,{r4-r11}
8181 ++
8182 ++ ldmia $ctx,{$h0-$h4}
8183 ++
8184 ++#if __ARM_ARCH__>=7
8185 ++ ldr ip,[$ctx,#36] @ is_base2_26
8186 ++
8187 ++ adds $g0,$h0,$h1,lsl#26 @ base 2^26 -> base 2^32
8188 ++ mov $g1,$h1,lsr#6
8189 ++ adcs $g1,$g1,$h2,lsl#20
8190 ++ mov $g2,$h2,lsr#12
8191 ++ adcs $g2,$g2,$h3,lsl#14
8192 ++ mov $g3,$h3,lsr#18
8193 ++ adcs $g3,$g3,$h4,lsl#8
8194 ++ mov $g4,#0
8195 ++ adc $g4,$g4,$h4,lsr#24
8196 ++
8197 ++ tst ip,ip
8198 ++ itttt ne
8199 ++ movne $h0,$g0
8200 ++ movne $h1,$g1
8201 ++ movne $h2,$g2
8202 ++ movne $h3,$g3
8203 ++ it ne
8204 ++ movne $h4,$g4
8205 ++#endif
8206 ++
8207 ++ adds $g0,$h0,#5 @ compare to modulus
8208 ++ adcs $g1,$h1,#0
8209 ++ adcs $g2,$h2,#0
8210 ++ adcs $g3,$h3,#0
8211 ++ adc $g4,$h4,#0
8212 ++ tst $g4,#4 @ did it carry/borrow?
8213 ++
8214 ++#ifdef __thumb2__
8215 ++ it ne
8216 ++#endif
8217 ++ movne $h0,$g0
8218 ++ ldr $g0,[$nonce,#0]
8219 ++#ifdef __thumb2__
8220 ++ it ne
8221 ++#endif
8222 ++ movne $h1,$g1
8223 ++ ldr $g1,[$nonce,#4]
8224 ++#ifdef __thumb2__
8225 ++ it ne
8226 ++#endif
8227 ++ movne $h2,$g2
8228 ++ ldr $g2,[$nonce,#8]
8229 ++#ifdef __thumb2__
8230 ++ it ne
8231 ++#endif
8232 ++ movne $h3,$g3
8233 ++ ldr $g3,[$nonce,#12]
8234 ++
8235 ++ adds $h0,$h0,$g0
8236 ++ adcs $h1,$h1,$g1
8237 ++ adcs $h2,$h2,$g2
8238 ++ adc $h3,$h3,$g3
8239 ++
8240 ++#if __ARM_ARCH__>=7
8241 ++# ifdef __ARMEB__
8242 ++ rev $h0,$h0
8243 ++ rev $h1,$h1
8244 ++ rev $h2,$h2
8245 ++ rev $h3,$h3
8246 ++# endif
8247 ++ str $h0,[$mac,#0]
8248 ++ str $h1,[$mac,#4]
8249 ++ str $h2,[$mac,#8]
8250 ++ str $h3,[$mac,#12]
8251 ++#else
8252 ++ strb $h0,[$mac,#0]
8253 ++ mov $h0,$h0,lsr#8
8254 ++ strb $h1,[$mac,#4]
8255 ++ mov $h1,$h1,lsr#8
8256 ++ strb $h2,[$mac,#8]
8257 ++ mov $h2,$h2,lsr#8
8258 ++ strb $h3,[$mac,#12]
8259 ++ mov $h3,$h3,lsr#8
8260 ++
8261 ++ strb $h0,[$mac,#1]
8262 ++ mov $h0,$h0,lsr#8
8263 ++ strb $h1,[$mac,#5]
8264 ++ mov $h1,$h1,lsr#8
8265 ++ strb $h2,[$mac,#9]
8266 ++ mov $h2,$h2,lsr#8
8267 ++ strb $h3,[$mac,#13]
8268 ++ mov $h3,$h3,lsr#8
8269 ++
8270 ++ strb $h0,[$mac,#2]
8271 ++ mov $h0,$h0,lsr#8
8272 ++ strb $h1,[$mac,#6]
8273 ++ mov $h1,$h1,lsr#8
8274 ++ strb $h2,[$mac,#10]
8275 ++ mov $h2,$h2,lsr#8
8276 ++ strb $h3,[$mac,#14]
8277 ++ mov $h3,$h3,lsr#8
8278 ++
8279 ++ strb $h0,[$mac,#3]
8280 ++ strb $h1,[$mac,#7]
8281 ++ strb $h2,[$mac,#11]
8282 ++ strb $h3,[$mac,#15]
8283 ++#endif
8284 ++ ldmia sp!,{r4-r11}
8285 ++#if __ARM_ARCH__>=5
8286 ++ ret @ bx lr
8287 ++#else
8288 ++ tst lr,#1
8289 ++ moveq pc,lr @ be binary compatible with V4, yet
8290 ++ bx lr @ interoperable with Thumb ISA:-)
8291 ++#endif
8292 ++.size poly1305_emit,.-poly1305_emit
8293 ++___
8294 ++{
8295 ++my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("d$_",(0..9));
8296 ++my ($D0,$D1,$D2,$D3,$D4, $H0,$H1,$H2,$H3,$H4) = map("q$_",(5..14));
8297 ++my ($T0,$T1,$MASK) = map("q$_",(15,4,0));
8298 ++
8299 ++my ($in2,$zeros,$tbl0,$tbl1) = map("r$_",(4..7));
8300 ++
8301 ++$code.=<<___;
8302 ++#if __ARM_MAX_ARCH__>=7
8303 ++.fpu neon
8304 ++
8305 ++.type poly1305_init_neon,%function
8306 ++.align 5
8307 ++poly1305_init_neon:
8308 ++.Lpoly1305_init_neon:
8309 ++ ldr r3,[$ctx,#48] @ first table element
8310 ++ cmp r3,#-1 @ is value impossible?
8311 ++ bne .Lno_init_neon
8312 ++
8313 ++ ldr r4,[$ctx,#20] @ load key base 2^32
8314 ++ ldr r5,[$ctx,#24]
8315 ++ ldr r6,[$ctx,#28]
8316 ++ ldr r7,[$ctx,#32]
8317 ++
8318 ++ and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26
8319 ++ mov r3,r4,lsr#26
8320 ++ mov r4,r5,lsr#20
8321 ++ orr r3,r3,r5,lsl#6
8322 ++ mov r5,r6,lsr#14
8323 ++ orr r4,r4,r6,lsl#12
8324 ++ mov r6,r7,lsr#8
8325 ++ orr r5,r5,r7,lsl#18
8326 ++ and r3,r3,#0x03ffffff
8327 ++ and r4,r4,#0x03ffffff
8328 ++ and r5,r5,#0x03ffffff
8329 ++
8330 ++ vdup.32 $R0,r2 @ r^1 in both lanes
8331 ++ add r2,r3,r3,lsl#2 @ *5
8332 ++ vdup.32 $R1,r3
8333 ++ add r3,r4,r4,lsl#2
8334 ++ vdup.32 $S1,r2
8335 ++ vdup.32 $R2,r4
8336 ++ add r4,r5,r5,lsl#2
8337 ++ vdup.32 $S2,r3
8338 ++ vdup.32 $R3,r5
8339 ++ add r5,r6,r6,lsl#2
8340 ++ vdup.32 $S3,r4
8341 ++ vdup.32 $R4,r6
8342 ++ vdup.32 $S4,r5
8343 ++
8344 ++ mov $zeros,#2 @ counter
8345 ++
8346 ++.Lsquare_neon:
8347 ++ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
8348 ++ @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
8349 ++ @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
8350 ++ @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
8351 ++ @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
8352 ++ @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
8353 ++
8354 ++ vmull.u32 $D0,$R0,${R0}[1]
8355 ++ vmull.u32 $D1,$R1,${R0}[1]
8356 ++ vmull.u32 $D2,$R2,${R0}[1]
8357 ++ vmull.u32 $D3,$R3,${R0}[1]
8358 ++ vmull.u32 $D4,$R4,${R0}[1]
8359 ++
8360 ++ vmlal.u32 $D0,$R4,${S1}[1]
8361 ++ vmlal.u32 $D1,$R0,${R1}[1]
8362 ++ vmlal.u32 $D2,$R1,${R1}[1]
8363 ++ vmlal.u32 $D3,$R2,${R1}[1]
8364 ++ vmlal.u32 $D4,$R3,${R1}[1]
8365 ++
8366 ++ vmlal.u32 $D0,$R3,${S2}[1]
8367 ++ vmlal.u32 $D1,$R4,${S2}[1]
8368 ++ vmlal.u32 $D3,$R1,${R2}[1]
8369 ++ vmlal.u32 $D2,$R0,${R2}[1]
8370 ++ vmlal.u32 $D4,$R2,${R2}[1]
8371 ++
8372 ++ vmlal.u32 $D0,$R2,${S3}[1]
8373 ++ vmlal.u32 $D3,$R0,${R3}[1]
8374 ++ vmlal.u32 $D1,$R3,${S3}[1]
8375 ++ vmlal.u32 $D2,$R4,${S3}[1]
8376 ++ vmlal.u32 $D4,$R1,${R3}[1]
8377 ++
8378 ++ vmlal.u32 $D3,$R4,${S4}[1]
8379 ++ vmlal.u32 $D0,$R1,${S4}[1]
8380 ++ vmlal.u32 $D1,$R2,${S4}[1]
8381 ++ vmlal.u32 $D2,$R3,${S4}[1]
8382 ++ vmlal.u32 $D4,$R0,${R4}[1]
8383 ++
8384 ++ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
8385 ++ @ lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
8386 ++ @ and P. Schwabe
8387 ++ @
8388 ++ @ H0>>+H1>>+H2>>+H3>>+H4
8389 ++ @ H3>>+H4>>*5+H0>>+H1
8390 ++ @
8391 ++ @ Trivia.
8392 ++ @
8393 ++ @ Result of multiplication of n-bit number by m-bit number is
8394 ++ @ n+m bits wide. However! Even though 2^n is a n+1-bit number,
8395 ++ @ m-bit number multiplied by 2^n is still n+m bits wide.
8396 ++ @
8397 ++ @ Sum of two n-bit numbers is n+1 bits wide, sum of three - n+2,
8398 ++ @ and so is sum of four. Sum of 2^m n-m-bit numbers and n-bit
8399 ++ @ one is n+1 bits wide.
8400 ++ @
8401 ++ @ >>+ denotes Hnext += Hn>>26, Hn &= 0x3ffffff. This means that
8402 ++ @ H0, H2, H3 are guaranteed to be 26 bits wide, while H1 and H4
8403 ++ @ can be 27. However! In cases when their width exceeds 26 bits
8404 ++ @ they are limited by 2^26+2^6. This in turn means that *sum*
8405 ++ @ of the products with these values can still be viewed as sum
8406 ++ @ of 52-bit numbers as long as the amount of addends is not a
8407 ++ @ power of 2. For example,
8408 ++ @
8409 ++ @ H4 = H4*R0 + H3*R1 + H2*R2 + H1*R3 + H0 * R4,
8410 ++ @
8411 ++ @ which can't be larger than 5 * (2^26 + 2^6) * (2^26 + 2^6), or
8412 ++ @ 5 * (2^52 + 2*2^32 + 2^12), which in turn is smaller than
8413 ++ @ 8 * (2^52) or 2^55. However, the value is then multiplied by
8414 ++ @ by 5, so we should be looking at 5 * 5 * (2^52 + 2^33 + 2^12),
8415 ++ @ which is less than 32 * (2^52) or 2^57. And when processing
8416 ++ @ data we are looking at triple as many addends...
8417 ++ @
8418 ++ @ In key setup procedure pre-reduced H0 is limited by 5*4+1 and
8419 ++ @ 5*H4 - by 5*5 52-bit addends, or 57 bits. But when hashing the
8420 ++ @ input H0 is limited by (5*4+1)*3 addends, or 58 bits, while
8421 ++ @ 5*H4 by 5*5*3, or 59[!] bits. How is this relevant? vmlal.u32
8422 ++ @ instruction accepts 2x32-bit input and writes 2x64-bit result.
8423 ++ @ This means that result of reduction have to be compressed upon
8424 ++ @ loop wrap-around. This can be done in the process of reduction
8425 ++ @ to minimize amount of instructions [as well as amount of
8426 ++ @ 128-bit instructions, which benefits low-end processors], but
8427 ++ @ one has to watch for H2 (which is narrower than H0) and 5*H4
8428 ++ @ not being wider than 58 bits, so that result of right shift
8429 ++ @ by 26 bits fits in 32 bits. This is also useful on x86,
8430 ++ @ because it allows to use paddd in place for paddq, which
8431 ++ @ benefits Atom, where paddq is ridiculously slow.
8432 ++
8433 ++ vshr.u64 $T0,$D3,#26
8434 ++ vmovn.i64 $D3#lo,$D3
8435 ++ vshr.u64 $T1,$D0,#26
8436 ++ vmovn.i64 $D0#lo,$D0
8437 ++ vadd.i64 $D4,$D4,$T0 @ h3 -> h4
8438 ++ vbic.i32 $D3#lo,#0xfc000000 @ &=0x03ffffff
8439 ++ vadd.i64 $D1,$D1,$T1 @ h0 -> h1
8440 ++ vbic.i32 $D0#lo,#0xfc000000
8441 ++
8442 ++ vshrn.u64 $T0#lo,$D4,#26
8443 ++ vmovn.i64 $D4#lo,$D4
8444 ++ vshr.u64 $T1,$D1,#26
8445 ++ vmovn.i64 $D1#lo,$D1
8446 ++ vadd.i64 $D2,$D2,$T1 @ h1 -> h2
8447 ++ vbic.i32 $D4#lo,#0xfc000000
8448 ++ vbic.i32 $D1#lo,#0xfc000000
8449 ++
8450 ++ vadd.i32 $D0#lo,$D0#lo,$T0#lo
8451 ++ vshl.u32 $T0#lo,$T0#lo,#2
8452 ++ vshrn.u64 $T1#lo,$D2,#26
8453 ++ vmovn.i64 $D2#lo,$D2
8454 ++ vadd.i32 $D0#lo,$D0#lo,$T0#lo @ h4 -> h0
8455 ++ vadd.i32 $D3#lo,$D3#lo,$T1#lo @ h2 -> h3
8456 ++ vbic.i32 $D2#lo,#0xfc000000
8457 ++
8458 ++ vshr.u32 $T0#lo,$D0#lo,#26
8459 ++ vbic.i32 $D0#lo,#0xfc000000
8460 ++ vshr.u32 $T1#lo,$D3#lo,#26
8461 ++ vbic.i32 $D3#lo,#0xfc000000
8462 ++ vadd.i32 $D1#lo,$D1#lo,$T0#lo @ h0 -> h1
8463 ++ vadd.i32 $D4#lo,$D4#lo,$T1#lo @ h3 -> h4
8464 ++
8465 ++ subs $zeros,$zeros,#1
8466 ++ beq .Lsquare_break_neon
8467 ++
8468 ++ add $tbl0,$ctx,#(48+0*9*4)
8469 ++ add $tbl1,$ctx,#(48+1*9*4)
8470 ++
8471 ++ vtrn.32 $R0,$D0#lo @ r^2:r^1
8472 ++ vtrn.32 $R2,$D2#lo
8473 ++ vtrn.32 $R3,$D3#lo
8474 ++ vtrn.32 $R1,$D1#lo
8475 ++ vtrn.32 $R4,$D4#lo
8476 ++
8477 ++ vshl.u32 $S2,$R2,#2 @ *5
8478 ++ vshl.u32 $S3,$R3,#2
8479 ++ vshl.u32 $S1,$R1,#2
8480 ++ vshl.u32 $S4,$R4,#2
8481 ++ vadd.i32 $S2,$S2,$R2
8482 ++ vadd.i32 $S1,$S1,$R1
8483 ++ vadd.i32 $S3,$S3,$R3
8484 ++ vadd.i32 $S4,$S4,$R4
8485 ++
8486 ++ vst4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]!
8487 ++ vst4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]!
8488 ++ vst4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
8489 ++ vst4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
8490 ++ vst1.32 {${S4}[0]},[$tbl0,:32]
8491 ++ vst1.32 {${S4}[1]},[$tbl1,:32]
8492 ++
8493 ++ b .Lsquare_neon
8494 ++
8495 ++.align 4
8496 ++.Lsquare_break_neon:
8497 ++ add $tbl0,$ctx,#(48+2*4*9)
8498 ++ add $tbl1,$ctx,#(48+3*4*9)
8499 ++
8500 ++ vmov $R0,$D0#lo @ r^4:r^3
8501 ++ vshl.u32 $S1,$D1#lo,#2 @ *5
8502 ++ vmov $R1,$D1#lo
8503 ++ vshl.u32 $S2,$D2#lo,#2
8504 ++ vmov $R2,$D2#lo
8505 ++ vshl.u32 $S3,$D3#lo,#2
8506 ++ vmov $R3,$D3#lo
8507 ++ vshl.u32 $S4,$D4#lo,#2
8508 ++ vmov $R4,$D4#lo
8509 ++ vadd.i32 $S1,$S1,$D1#lo
8510 ++ vadd.i32 $S2,$S2,$D2#lo
8511 ++ vadd.i32 $S3,$S3,$D3#lo
8512 ++ vadd.i32 $S4,$S4,$D4#lo
8513 ++
8514 ++ vst4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]!
8515 ++ vst4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]!
8516 ++ vst4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
8517 ++ vst4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
8518 ++ vst1.32 {${S4}[0]},[$tbl0]
8519 ++ vst1.32 {${S4}[1]},[$tbl1]
8520 ++
8521 ++.Lno_init_neon:
8522 ++ ret @ bx lr
8523 ++.size poly1305_init_neon,.-poly1305_init_neon
8524 ++
8525 ++.type poly1305_blocks_neon,%function
8526 ++.align 5
8527 ++poly1305_blocks_neon:
8528 ++.Lpoly1305_blocks_neon:
8529 ++ ldr ip,[$ctx,#36] @ is_base2_26
8530 ++
8531 ++ cmp $len,#64
8532 ++ blo .Lpoly1305_blocks
8533 ++
8534 ++ stmdb sp!,{r4-r7}
8535 ++ vstmdb sp!,{d8-d15} @ ABI specification says so
8536 ++
8537 ++ tst ip,ip @ is_base2_26?
8538 ++ bne .Lbase2_26_neon
8539 ++
8540 ++ stmdb sp!,{r1-r3,lr}
8541 ++ bl .Lpoly1305_init_neon
8542 ++
8543 ++ ldr r4,[$ctx,#0] @ load hash value base 2^32
8544 ++ ldr r5,[$ctx,#4]
8545 ++ ldr r6,[$ctx,#8]
8546 ++ ldr r7,[$ctx,#12]
8547 ++ ldr ip,[$ctx,#16]
8548 ++
8549 ++ and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26
8550 ++ mov r3,r4,lsr#26
8551 ++ veor $D0#lo,$D0#lo,$D0#lo
8552 ++ mov r4,r5,lsr#20
8553 ++ orr r3,r3,r5,lsl#6
8554 ++ veor $D1#lo,$D1#lo,$D1#lo
8555 ++ mov r5,r6,lsr#14
8556 ++ orr r4,r4,r6,lsl#12
8557 ++ veor $D2#lo,$D2#lo,$D2#lo
8558 ++ mov r6,r7,lsr#8
8559 ++ orr r5,r5,r7,lsl#18
8560 ++ veor $D3#lo,$D3#lo,$D3#lo
8561 ++ and r3,r3,#0x03ffffff
8562 ++ orr r6,r6,ip,lsl#24
8563 ++ veor $D4#lo,$D4#lo,$D4#lo
8564 ++ and r4,r4,#0x03ffffff
8565 ++ mov r1,#1
8566 ++ and r5,r5,#0x03ffffff
8567 ++ str r1,[$ctx,#36] @ set is_base2_26
8568 ++
8569 ++ vmov.32 $D0#lo[0],r2
8570 ++ vmov.32 $D1#lo[0],r3
8571 ++ vmov.32 $D2#lo[0],r4
8572 ++ vmov.32 $D3#lo[0],r5
8573 ++ vmov.32 $D4#lo[0],r6
8574 ++ adr $zeros,.Lzeros
8575 ++
8576 ++ ldmia sp!,{r1-r3,lr}
8577 ++ b .Lhash_loaded
8578 ++
8579 ++.align 4
8580 ++.Lbase2_26_neon:
8581 ++ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
8582 ++ @ load hash value
8583 ++
8584 ++ veor $D0#lo,$D0#lo,$D0#lo
8585 ++ veor $D1#lo,$D1#lo,$D1#lo
8586 ++ veor $D2#lo,$D2#lo,$D2#lo
8587 ++ veor $D3#lo,$D3#lo,$D3#lo
8588 ++ veor $D4#lo,$D4#lo,$D4#lo
8589 ++ vld4.32 {$D0#lo[0],$D1#lo[0],$D2#lo[0],$D3#lo[0]},[$ctx]!
8590 ++ adr $zeros,.Lzeros
8591 ++ vld1.32 {$D4#lo[0]},[$ctx]
8592 ++ sub $ctx,$ctx,#16 @ rewind
8593 ++
8594 ++.Lhash_loaded:
8595 ++ add $in2,$inp,#32
8596 ++ mov $padbit,$padbit,lsl#24
8597 ++ tst $len,#31
8598 ++ beq .Leven
8599 ++
8600 ++ vld4.32 {$H0#lo[0],$H1#lo[0],$H2#lo[0],$H3#lo[0]},[$inp]!
8601 ++ vmov.32 $H4#lo[0],$padbit
8602 ++ sub $len,$len,#16
8603 ++ add $in2,$inp,#32
8604 ++
8605 ++# ifdef __ARMEB__
8606 ++ vrev32.8 $H0,$H0
8607 ++ vrev32.8 $H3,$H3
8608 ++ vrev32.8 $H1,$H1
8609 ++ vrev32.8 $H2,$H2
8610 ++# endif
8611 ++ vsri.u32 $H4#lo,$H3#lo,#8 @ base 2^32 -> base 2^26
8612 ++ vshl.u32 $H3#lo,$H3#lo,#18
8613 ++
8614 ++ vsri.u32 $H3#lo,$H2#lo,#14
8615 ++ vshl.u32 $H2#lo,$H2#lo,#12
8616 ++ vadd.i32 $H4#hi,$H4#lo,$D4#lo @ add hash value and move to #hi
8617 ++
8618 ++ vbic.i32 $H3#lo,#0xfc000000
8619 ++ vsri.u32 $H2#lo,$H1#lo,#20
8620 ++ vshl.u32 $H1#lo,$H1#lo,#6
8621 ++
8622 ++ vbic.i32 $H2#lo,#0xfc000000
8623 ++ vsri.u32 $H1#lo,$H0#lo,#26
8624 ++ vadd.i32 $H3#hi,$H3#lo,$D3#lo
8625 ++
8626 ++ vbic.i32 $H0#lo,#0xfc000000
8627 ++ vbic.i32 $H1#lo,#0xfc000000
8628 ++ vadd.i32 $H2#hi,$H2#lo,$D2#lo
8629 ++
8630 ++ vadd.i32 $H0#hi,$H0#lo,$D0#lo
8631 ++ vadd.i32 $H1#hi,$H1#lo,$D1#lo
8632 ++
8633 ++ mov $tbl1,$zeros
8634 ++ add $tbl0,$ctx,#48
8635 ++
8636 ++ cmp $len,$len
8637 ++ b .Long_tail
8638 ++
8639 ++.align 4
8640 ++.Leven:
8641 ++ subs $len,$len,#64
8642 ++ it lo
8643 ++ movlo $in2,$zeros
8644 ++
8645 ++ vmov.i32 $H4,#1<<24 @ padbit, yes, always
8646 ++ vld4.32 {$H0#lo,$H1#lo,$H2#lo,$H3#lo},[$inp] @ inp[0:1]
8647 ++ add $inp,$inp,#64
8648 ++ vld4.32 {$H0#hi,$H1#hi,$H2#hi,$H3#hi},[$in2] @ inp[2:3] (or 0)
8649 ++ add $in2,$in2,#64
8650 ++ itt hi
8651 ++ addhi $tbl1,$ctx,#(48+1*9*4)
8652 ++ addhi $tbl0,$ctx,#(48+3*9*4)
8653 ++
8654 ++# ifdef __ARMEB__
8655 ++ vrev32.8 $H0,$H0
8656 ++ vrev32.8 $H3,$H3
8657 ++ vrev32.8 $H1,$H1
8658 ++ vrev32.8 $H2,$H2
8659 ++# endif
8660 ++ vsri.u32 $H4,$H3,#8 @ base 2^32 -> base 2^26
8661 ++ vshl.u32 $H3,$H3,#18
8662 ++
8663 ++ vsri.u32 $H3,$H2,#14
8664 ++ vshl.u32 $H2,$H2,#12
8665 ++
8666 ++ vbic.i32 $H3,#0xfc000000
8667 ++ vsri.u32 $H2,$H1,#20
8668 ++ vshl.u32 $H1,$H1,#6
8669 ++
8670 ++ vbic.i32 $H2,#0xfc000000
8671 ++ vsri.u32 $H1,$H0,#26
8672 ++
8673 ++ vbic.i32 $H0,#0xfc000000
8674 ++ vbic.i32 $H1,#0xfc000000
8675 ++
8676 ++ bls .Lskip_loop
8677 ++
8678 ++ vld4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! @ load r^2
8679 ++ vld4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! @ load r^4
8680 ++ vld4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
8681 ++ vld4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
8682 ++ b .Loop_neon
8683 ++
8684 ++.align 5
8685 ++.Loop_neon:
8686 ++ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
8687 ++ @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
8688 ++ @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
8689 ++ @ \___________________/
8690 ++ @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
8691 ++ @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
8692 ++ @ \___________________/ \____________________/
8693 ++ @
8694 ++ @ Note that we start with inp[2:3]*r^2. This is because it
8695 ++ @ doesn't depend on reduction in previous iteration.
8696 ++ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
8697 ++ @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
8698 ++ @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
8699 ++ @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
8700 ++ @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
8701 ++ @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
8702 ++
8703 ++ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
8704 ++ @ inp[2:3]*r^2
8705 ++
8706 ++ vadd.i32 $H2#lo,$H2#lo,$D2#lo @ accumulate inp[0:1]
8707 ++ vmull.u32 $D2,$H2#hi,${R0}[1]
8708 ++ vadd.i32 $H0#lo,$H0#lo,$D0#lo
8709 ++ vmull.u32 $D0,$H0#hi,${R0}[1]
8710 ++ vadd.i32 $H3#lo,$H3#lo,$D3#lo
8711 ++ vmull.u32 $D3,$H3#hi,${R0}[1]
8712 ++ vmlal.u32 $D2,$H1#hi,${R1}[1]
8713 ++ vadd.i32 $H1#lo,$H1#lo,$D1#lo
8714 ++ vmull.u32 $D1,$H1#hi,${R0}[1]
8715 ++
8716 ++ vadd.i32 $H4#lo,$H4#lo,$D4#lo
8717 ++ vmull.u32 $D4,$H4#hi,${R0}[1]
8718 ++ subs $len,$len,#64
8719 ++ vmlal.u32 $D0,$H4#hi,${S1}[1]
8720 ++ it lo
8721 ++ movlo $in2,$zeros
8722 ++ vmlal.u32 $D3,$H2#hi,${R1}[1]
8723 ++ vld1.32 ${S4}[1],[$tbl1,:32]
8724 ++ vmlal.u32 $D1,$H0#hi,${R1}[1]
8725 ++ vmlal.u32 $D4,$H3#hi,${R1}[1]
8726 ++
8727 ++ vmlal.u32 $D0,$H3#hi,${S2}[1]
8728 ++ vmlal.u32 $D3,$H1#hi,${R2}[1]
8729 ++ vmlal.u32 $D4,$H2#hi,${R2}[1]
8730 ++ vmlal.u32 $D1,$H4#hi,${S2}[1]
8731 ++ vmlal.u32 $D2,$H0#hi,${R2}[1]
8732 ++
8733 ++ vmlal.u32 $D3,$H0#hi,${R3}[1]
8734 ++ vmlal.u32 $D0,$H2#hi,${S3}[1]
8735 ++ vmlal.u32 $D4,$H1#hi,${R3}[1]
8736 ++ vmlal.u32 $D1,$H3#hi,${S3}[1]
8737 ++ vmlal.u32 $D2,$H4#hi,${S3}[1]
8738 ++
8739 ++ vmlal.u32 $D3,$H4#hi,${S4}[1]
8740 ++ vmlal.u32 $D0,$H1#hi,${S4}[1]
8741 ++ vmlal.u32 $D4,$H0#hi,${R4}[1]
8742 ++ vmlal.u32 $D1,$H2#hi,${S4}[1]
8743 ++ vmlal.u32 $D2,$H3#hi,${S4}[1]
8744 ++
8745 ++ vld4.32 {$H0#hi,$H1#hi,$H2#hi,$H3#hi},[$in2] @ inp[2:3] (or 0)
8746 ++ add $in2,$in2,#64
8747 ++
8748 ++ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
8749 ++ @ (hash+inp[0:1])*r^4 and accumulate
8750 ++
8751 ++ vmlal.u32 $D3,$H3#lo,${R0}[0]
8752 ++ vmlal.u32 $D0,$H0#lo,${R0}[0]
8753 ++ vmlal.u32 $D4,$H4#lo,${R0}[0]
8754 ++ vmlal.u32 $D1,$H1#lo,${R0}[0]
8755 ++ vmlal.u32 $D2,$H2#lo,${R0}[0]
8756 ++ vld1.32 ${S4}[0],[$tbl0,:32]
8757 ++
8758 ++ vmlal.u32 $D3,$H2#lo,${R1}[0]
8759 ++ vmlal.u32 $D0,$H4#lo,${S1}[0]
8760 ++ vmlal.u32 $D4,$H3#lo,${R1}[0]
8761 ++ vmlal.u32 $D1,$H0#lo,${R1}[0]
8762 ++ vmlal.u32 $D2,$H1#lo,${R1}[0]
8763 ++
8764 ++ vmlal.u32 $D3,$H1#lo,${R2}[0]
8765 ++ vmlal.u32 $D0,$H3#lo,${S2}[0]
8766 ++ vmlal.u32 $D4,$H2#lo,${R2}[0]
8767 ++ vmlal.u32 $D1,$H4#lo,${S2}[0]
8768 ++ vmlal.u32 $D2,$H0#lo,${R2}[0]
8769 ++
8770 ++ vmlal.u32 $D3,$H0#lo,${R3}[0]
8771 ++ vmlal.u32 $D0,$H2#lo,${S3}[0]
8772 ++ vmlal.u32 $D4,$H1#lo,${R3}[0]
8773 ++ vmlal.u32 $D1,$H3#lo,${S3}[0]
8774 ++ vmlal.u32 $D3,$H4#lo,${S4}[0]
8775 ++
8776 ++ vmlal.u32 $D2,$H4#lo,${S3}[0]
8777 ++ vmlal.u32 $D0,$H1#lo,${S4}[0]
8778 ++ vmlal.u32 $D4,$H0#lo,${R4}[0]
8779 ++ vmov.i32 $H4,#1<<24 @ padbit, yes, always
8780 ++ vmlal.u32 $D1,$H2#lo,${S4}[0]
8781 ++ vmlal.u32 $D2,$H3#lo,${S4}[0]
8782 ++
8783 ++ vld4.32 {$H0#lo,$H1#lo,$H2#lo,$H3#lo},[$inp] @ inp[0:1]
8784 ++ add $inp,$inp,#64
8785 ++# ifdef __ARMEB__
8786 ++ vrev32.8 $H0,$H0
8787 ++ vrev32.8 $H1,$H1
8788 ++ vrev32.8 $H2,$H2
8789 ++ vrev32.8 $H3,$H3
8790 ++# endif
8791 ++
8792 ++ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
8793 ++ @ lazy reduction interleaved with base 2^32 -> base 2^26 of
8794 ++ @ inp[0:3] previously loaded to $H0-$H3 and smashed to $H0-$H4.
8795 ++
8796 ++ vshr.u64 $T0,$D3,#26
8797 ++ vmovn.i64 $D3#lo,$D3
8798 ++ vshr.u64 $T1,$D0,#26
8799 ++ vmovn.i64 $D0#lo,$D0
8800 ++ vadd.i64 $D4,$D4,$T0 @ h3 -> h4
8801 ++ vbic.i32 $D3#lo,#0xfc000000
8802 ++ vsri.u32 $H4,$H3,#8 @ base 2^32 -> base 2^26
8803 ++ vadd.i64 $D1,$D1,$T1 @ h0 -> h1
8804 ++ vshl.u32 $H3,$H3,#18
8805 ++ vbic.i32 $D0#lo,#0xfc000000
8806 ++
8807 ++ vshrn.u64 $T0#lo,$D4,#26
8808 ++ vmovn.i64 $D4#lo,$D4
8809 ++ vshr.u64 $T1,$D1,#26
8810 ++ vmovn.i64 $D1#lo,$D1
8811 ++ vadd.i64 $D2,$D2,$T1 @ h1 -> h2
8812 ++ vsri.u32 $H3,$H2,#14
8813 ++ vbic.i32 $D4#lo,#0xfc000000
8814 ++ vshl.u32 $H2,$H2,#12
8815 ++ vbic.i32 $D1#lo,#0xfc000000
8816 ++
8817 ++ vadd.i32 $D0#lo,$D0#lo,$T0#lo
8818 ++ vshl.u32 $T0#lo,$T0#lo,#2
8819 ++ vbic.i32 $H3,#0xfc000000
8820 ++ vshrn.u64 $T1#lo,$D2,#26
8821 ++ vmovn.i64 $D2#lo,$D2
8822 ++ vaddl.u32 $D0,$D0#lo,$T0#lo @ h4 -> h0 [widen for a sec]
8823 ++ vsri.u32 $H2,$H1,#20
8824 ++ vadd.i32 $D3#lo,$D3#lo,$T1#lo @ h2 -> h3
8825 ++ vshl.u32 $H1,$H1,#6
8826 ++ vbic.i32 $D2#lo,#0xfc000000
8827 ++ vbic.i32 $H2,#0xfc000000
8828 ++
8829 ++ vshrn.u64 $T0#lo,$D0,#26 @ re-narrow
8830 ++ vmovn.i64 $D0#lo,$D0
8831 ++ vsri.u32 $H1,$H0,#26
8832 ++ vbic.i32 $H0,#0xfc000000
8833 ++ vshr.u32 $T1#lo,$D3#lo,#26
8834 ++ vbic.i32 $D3#lo,#0xfc000000
8835 ++ vbic.i32 $D0#lo,#0xfc000000
8836 ++ vadd.i32 $D1#lo,$D1#lo,$T0#lo @ h0 -> h1
8837 ++ vadd.i32 $D4#lo,$D4#lo,$T1#lo @ h3 -> h4
8838 ++ vbic.i32 $H1,#0xfc000000
8839 ++
8840 ++ bhi .Loop_neon
8841 ++
8842 ++.Lskip_loop:
8843 ++ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
8844 ++ @ multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
8845 ++
8846 ++ add $tbl1,$ctx,#(48+0*9*4)
8847 ++ add $tbl0,$ctx,#(48+1*9*4)
8848 ++ adds $len,$len,#32
8849 ++ it ne
8850 ++ movne $len,#0
8851 ++ bne .Long_tail
8852 ++
8853 ++ vadd.i32 $H2#hi,$H2#lo,$D2#lo @ add hash value and move to #hi
8854 ++ vadd.i32 $H0#hi,$H0#lo,$D0#lo
8855 ++ vadd.i32 $H3#hi,$H3#lo,$D3#lo
8856 ++ vadd.i32 $H1#hi,$H1#lo,$D1#lo
8857 ++ vadd.i32 $H4#hi,$H4#lo,$D4#lo
8858 ++
8859 ++.Long_tail:
8860 ++ vld4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! @ load r^1
8861 ++ vld4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! @ load r^2
8862 ++
8863 ++ vadd.i32 $H2#lo,$H2#lo,$D2#lo @ can be redundant
8864 ++ vmull.u32 $D2,$H2#hi,$R0
8865 ++ vadd.i32 $H0#lo,$H0#lo,$D0#lo
8866 ++ vmull.u32 $D0,$H0#hi,$R0
8867 ++ vadd.i32 $H3#lo,$H3#lo,$D3#lo
8868 ++ vmull.u32 $D3,$H3#hi,$R0
8869 ++ vadd.i32 $H1#lo,$H1#lo,$D1#lo
8870 ++ vmull.u32 $D1,$H1#hi,$R0
8871 ++ vadd.i32 $H4#lo,$H4#lo,$D4#lo
8872 ++ vmull.u32 $D4,$H4#hi,$R0
8873 ++
8874 ++ vmlal.u32 $D0,$H4#hi,$S1
8875 ++ vld4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
8876 ++ vmlal.u32 $D3,$H2#hi,$R1
8877 ++ vld4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
8878 ++ vmlal.u32 $D1,$H0#hi,$R1
8879 ++ vmlal.u32 $D4,$H3#hi,$R1
8880 ++ vmlal.u32 $D2,$H1#hi,$R1
8881 ++
8882 ++ vmlal.u32 $D3,$H1#hi,$R2
8883 ++ vld1.32 ${S4}[1],[$tbl1,:32]
8884 ++ vmlal.u32 $D0,$H3#hi,$S2
8885 ++ vld1.32 ${S4}[0],[$tbl0,:32]
8886 ++ vmlal.u32 $D4,$H2#hi,$R2
8887 ++ vmlal.u32 $D1,$H4#hi,$S2
8888 ++ vmlal.u32 $D2,$H0#hi,$R2
8889 ++
8890 ++ vmlal.u32 $D3,$H0#hi,$R3
8891 ++ it ne
8892 ++ addne $tbl1,$ctx,#(48+2*9*4)
8893 ++ vmlal.u32 $D0,$H2#hi,$S3
8894 ++ it ne
8895 ++ addne $tbl0,$ctx,#(48+3*9*4)
8896 ++ vmlal.u32 $D4,$H1#hi,$R3
8897 ++ vmlal.u32 $D1,$H3#hi,$S3
8898 ++ vmlal.u32 $D2,$H4#hi,$S3
8899 ++
8900 ++ vmlal.u32 $D3,$H4#hi,$S4
8901 ++ vorn $MASK,$MASK,$MASK @ all-ones, can be redundant
8902 ++ vmlal.u32 $D0,$H1#hi,$S4
8903 ++ vshr.u64 $MASK,$MASK,#38
8904 ++ vmlal.u32 $D4,$H0#hi,$R4
8905 ++ vmlal.u32 $D1,$H2#hi,$S4
8906 ++ vmlal.u32 $D2,$H3#hi,$S4
8907 ++
8908 ++ beq .Lshort_tail
8909 ++
8910 ++ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
8911 ++ @ (hash+inp[0:1])*r^4:r^3 and accumulate
8912 ++
8913 ++ vld4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! @ load r^3
8914 ++ vld4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! @ load r^4
8915 ++
8916 ++ vmlal.u32 $D2,$H2#lo,$R0
8917 ++ vmlal.u32 $D0,$H0#lo,$R0
8918 ++ vmlal.u32 $D3,$H3#lo,$R0
8919 ++ vmlal.u32 $D1,$H1#lo,$R0
8920 ++ vmlal.u32 $D4,$H4#lo,$R0
8921 ++
8922 ++ vmlal.u32 $D0,$H4#lo,$S1
8923 ++ vld4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
8924 ++ vmlal.u32 $D3,$H2#lo,$R1
8925 ++ vld4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
8926 ++ vmlal.u32 $D1,$H0#lo,$R1
8927 ++ vmlal.u32 $D4,$H3#lo,$R1
8928 ++ vmlal.u32 $D2,$H1#lo,$R1
8929 ++
8930 ++ vmlal.u32 $D3,$H1#lo,$R2
8931 ++ vld1.32 ${S4}[1],[$tbl1,:32]
8932 ++ vmlal.u32 $D0,$H3#lo,$S2
8933 ++ vld1.32 ${S4}[0],[$tbl0,:32]
8934 ++ vmlal.u32 $D4,$H2#lo,$R2
8935 ++ vmlal.u32 $D1,$H4#lo,$S2
8936 ++ vmlal.u32 $D2,$H0#lo,$R2
8937 ++
8938 ++ vmlal.u32 $D3,$H0#lo,$R3
8939 ++ vmlal.u32 $D0,$H2#lo,$S3
8940 ++ vmlal.u32 $D4,$H1#lo,$R3
8941 ++ vmlal.u32 $D1,$H3#lo,$S3
8942 ++ vmlal.u32 $D2,$H4#lo,$S3
8943 ++
8944 ++ vmlal.u32 $D3,$H4#lo,$S4
8945 ++ vorn $MASK,$MASK,$MASK @ all-ones
8946 ++ vmlal.u32 $D0,$H1#lo,$S4
8947 ++ vshr.u64 $MASK,$MASK,#38
8948 ++ vmlal.u32 $D4,$H0#lo,$R4
8949 ++ vmlal.u32 $D1,$H2#lo,$S4
8950 ++ vmlal.u32 $D2,$H3#lo,$S4
8951 ++
8952 ++.Lshort_tail:
8953 ++ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
8954 ++ @ horizontal addition
8955 ++
8956 ++ vadd.i64 $D3#lo,$D3#lo,$D3#hi
8957 ++ vadd.i64 $D0#lo,$D0#lo,$D0#hi
8958 ++ vadd.i64 $D4#lo,$D4#lo,$D4#hi
8959 ++ vadd.i64 $D1#lo,$D1#lo,$D1#hi
8960 ++ vadd.i64 $D2#lo,$D2#lo,$D2#hi
8961 ++
8962 ++ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
8963 ++ @ lazy reduction, but without narrowing
8964 ++
8965 ++ vshr.u64 $T0,$D3,#26
8966 ++ vand.i64 $D3,$D3,$MASK
8967 ++ vshr.u64 $T1,$D0,#26
8968 ++ vand.i64 $D0,$D0,$MASK
8969 ++ vadd.i64 $D4,$D4,$T0 @ h3 -> h4
8970 ++ vadd.i64 $D1,$D1,$T1 @ h0 -> h1
8971 ++
8972 ++ vshr.u64 $T0,$D4,#26
8973 ++ vand.i64 $D4,$D4,$MASK
8974 ++ vshr.u64 $T1,$D1,#26
8975 ++ vand.i64 $D1,$D1,$MASK
8976 ++ vadd.i64 $D2,$D2,$T1 @ h1 -> h2
8977 ++
8978 ++ vadd.i64 $D0,$D0,$T0
8979 ++ vshl.u64 $T0,$T0,#2
8980 ++ vshr.u64 $T1,$D2,#26
8981 ++ vand.i64 $D2,$D2,$MASK
8982 ++ vadd.i64 $D0,$D0,$T0 @ h4 -> h0
8983 ++ vadd.i64 $D3,$D3,$T1 @ h2 -> h3
8984 ++
8985 ++ vshr.u64 $T0,$D0,#26
8986 ++ vand.i64 $D0,$D0,$MASK
8987 ++ vshr.u64 $T1,$D3,#26
8988 ++ vand.i64 $D3,$D3,$MASK
8989 ++ vadd.i64 $D1,$D1,$T0 @ h0 -> h1
8990 ++ vadd.i64 $D4,$D4,$T1 @ h3 -> h4
8991 ++
8992 ++ cmp $len,#0
8993 ++ bne .Leven
8994 ++
8995 ++ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
8996 ++ @ store hash value
8997 ++
8998 ++ vst4.32 {$D0#lo[0],$D1#lo[0],$D2#lo[0],$D3#lo[0]},[$ctx]!
8999 ++ vst1.32 {$D4#lo[0]},[$ctx]
9000 ++
9001 ++ vldmia sp!,{d8-d15} @ epilogue
9002 ++ ldmia sp!,{r4-r7}
9003 ++ ret @ bx lr
9004 ++.size poly1305_blocks_neon,.-poly1305_blocks_neon
9005 ++
9006 ++.align 5
9007 ++.Lzeros:
9008 ++.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9009 ++#ifndef __KERNEL__
9010 ++.LOPENSSL_armcap:
9011 ++# ifdef _WIN32
9012 ++.word OPENSSL_armcap_P
9013 ++# else
9014 ++.word OPENSSL_armcap_P-.Lpoly1305_init
9015 ++# endif
9016 ++.comm OPENSSL_armcap_P,4,4
9017 ++.hidden OPENSSL_armcap_P
9018 ++#endif
9019 ++#endif
9020 ++___
9021 ++} }
9022 ++$code.=<<___;
9023 ++.asciz "Poly1305 for ARMv4/NEON, CRYPTOGAMS by \@dot-asm"
9024 ++.align 2
9025 ++___
9026 ++
9027 ++foreach (split("\n",$code)) {
9028 ++ s/\`([^\`]*)\`/eval $1/geo;
9029 ++
9030 ++ s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or
9031 ++ s/\bret\b/bx lr/go or
9032 ++ s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4
9033 ++
9034 ++ print $_,"\n";
9035 ++}
9036 ++close STDOUT; # enforce flush
9037 +diff --git a/arch/arm/crypto/poly1305-core.S_shipped b/arch/arm/crypto/poly1305-core.S_shipped
9038 +new file mode 100644
9039 +index 000000000000..37b71d990293
9040 +--- /dev/null
9041 ++++ b/arch/arm/crypto/poly1305-core.S_shipped
9042 +@@ -0,0 +1,1158 @@
9043 ++#ifndef __KERNEL__
9044 ++# include "arm_arch.h"
9045 ++#else
9046 ++# define __ARM_ARCH__ __LINUX_ARM_ARCH__
9047 ++# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__
9048 ++# define poly1305_init poly1305_init_arm
9049 ++# define poly1305_blocks poly1305_blocks_arm
9050 ++# define poly1305_emit poly1305_emit_arm
9051 ++.globl poly1305_blocks_neon
9052 ++#endif
9053 ++
9054 ++#if defined(__thumb2__)
9055 ++.syntax unified
9056 ++.thumb
9057 ++#else
9058 ++.code 32
9059 ++#endif
9060 ++
9061 ++.text
9062 ++
9063 ++.globl poly1305_emit
9064 ++.globl poly1305_blocks
9065 ++.globl poly1305_init
9066 ++.type poly1305_init,%function
9067 ++.align 5
9068 ++poly1305_init:
9069 ++.Lpoly1305_init:
9070 ++ stmdb sp!,{r4-r11}
9071 ++
9072 ++ eor r3,r3,r3
9073 ++ cmp r1,#0
9074 ++ str r3,[r0,#0] @ zero hash value
9075 ++ str r3,[r0,#4]
9076 ++ str r3,[r0,#8]
9077 ++ str r3,[r0,#12]
9078 ++ str r3,[r0,#16]
9079 ++ str r3,[r0,#36] @ clear is_base2_26
9080 ++ add r0,r0,#20
9081 ++
9082 ++#ifdef __thumb2__
9083 ++ it eq
9084 ++#endif
9085 ++ moveq r0,#0
9086 ++ beq .Lno_key
9087 ++
9088 ++#if __ARM_MAX_ARCH__>=7
9089 ++ mov r3,#-1
9090 ++ str r3,[r0,#28] @ impossible key power value
9091 ++# ifndef __KERNEL__
9092 ++ adr r11,.Lpoly1305_init
9093 ++ ldr r12,.LOPENSSL_armcap
9094 ++# endif
9095 ++#endif
9096 ++ ldrb r4,[r1,#0]
9097 ++ mov r10,#0x0fffffff
9098 ++ ldrb r5,[r1,#1]
9099 ++ and r3,r10,#-4 @ 0x0ffffffc
9100 ++ ldrb r6,[r1,#2]
9101 ++ ldrb r7,[r1,#3]
9102 ++ orr r4,r4,r5,lsl#8
9103 ++ ldrb r5,[r1,#4]
9104 ++ orr r4,r4,r6,lsl#16
9105 ++ ldrb r6,[r1,#5]
9106 ++ orr r4,r4,r7,lsl#24
9107 ++ ldrb r7,[r1,#6]
9108 ++ and r4,r4,r10
9109 ++
9110 ++#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
9111 ++# if !defined(_WIN32)
9112 ++ ldr r12,[r11,r12] @ OPENSSL_armcap_P
9113 ++# endif
9114 ++# if defined(__APPLE__) || defined(_WIN32)
9115 ++ ldr r12,[r12]
9116 ++# endif
9117 ++#endif
9118 ++ ldrb r8,[r1,#7]
9119 ++ orr r5,r5,r6,lsl#8
9120 ++ ldrb r6,[r1,#8]
9121 ++ orr r5,r5,r7,lsl#16
9122 ++ ldrb r7,[r1,#9]
9123 ++ orr r5,r5,r8,lsl#24
9124 ++ ldrb r8,[r1,#10]
9125 ++ and r5,r5,r3
9126 ++
9127 ++#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
9128 ++ tst r12,#ARMV7_NEON @ check for NEON
9129 ++# ifdef __thumb2__
9130 ++ adr r9,.Lpoly1305_blocks_neon
9131 ++ adr r11,.Lpoly1305_blocks
9132 ++ it ne
9133 ++ movne r11,r9
9134 ++ adr r12,.Lpoly1305_emit
9135 ++ orr r11,r11,#1 @ thumb-ify addresses
9136 ++ orr r12,r12,#1
9137 ++# else
9138 ++ add r12,r11,#(.Lpoly1305_emit-.Lpoly1305_init)
9139 ++ ite eq
9140 ++ addeq r11,r11,#(.Lpoly1305_blocks-.Lpoly1305_init)
9141 ++ addne r11,r11,#(.Lpoly1305_blocks_neon-.Lpoly1305_init)
9142 ++# endif
9143 ++#endif
9144 ++ ldrb r9,[r1,#11]
9145 ++ orr r6,r6,r7,lsl#8
9146 ++ ldrb r7,[r1,#12]
9147 ++ orr r6,r6,r8,lsl#16
9148 ++ ldrb r8,[r1,#13]
9149 ++ orr r6,r6,r9,lsl#24
9150 ++ ldrb r9,[r1,#14]
9151 ++ and r6,r6,r3
9152 ++
9153 ++ ldrb r10,[r1,#15]
9154 ++ orr r7,r7,r8,lsl#8
9155 ++ str r4,[r0,#0]
9156 ++ orr r7,r7,r9,lsl#16
9157 ++ str r5,[r0,#4]
9158 ++ orr r7,r7,r10,lsl#24
9159 ++ str r6,[r0,#8]
9160 ++ and r7,r7,r3
9161 ++ str r7,[r0,#12]
9162 ++#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
9163 ++ stmia r2,{r11,r12} @ fill functions table
9164 ++ mov r0,#1
9165 ++#else
9166 ++ mov r0,#0
9167 ++#endif
9168 ++.Lno_key:
9169 ++ ldmia sp!,{r4-r11}
9170 ++#if __ARM_ARCH__>=5
9171 ++ bx lr @ bx lr
9172 ++#else
9173 ++ tst lr,#1
9174 ++ moveq pc,lr @ be binary compatible with V4, yet
9175 ++ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
9176 ++#endif
9177 ++.size poly1305_init,.-poly1305_init
9178 ++.type poly1305_blocks,%function
9179 ++.align 5
9180 ++poly1305_blocks:
9181 ++.Lpoly1305_blocks:
9182 ++ stmdb sp!,{r3-r11,lr}
9183 ++
9184 ++ ands r2,r2,#-16
9185 ++ beq .Lno_data
9186 ++
9187 ++ add r2,r2,r1 @ end pointer
9188 ++ sub sp,sp,#32
9189 ++
9190 ++#if __ARM_ARCH__<7
9191 ++ ldmia r0,{r4-r12} @ load context
9192 ++ add r0,r0,#20
9193 ++ str r2,[sp,#16] @ offload stuff
9194 ++ str r0,[sp,#12]
9195 ++#else
9196 ++ ldr lr,[r0,#36] @ is_base2_26
9197 ++ ldmia r0!,{r4-r8} @ load hash value
9198 ++ str r2,[sp,#16] @ offload stuff
9199 ++ str r0,[sp,#12]
9200 ++
9201 ++ adds r9,r4,r5,lsl#26 @ base 2^26 -> base 2^32
9202 ++ mov r10,r5,lsr#6
9203 ++ adcs r10,r10,r6,lsl#20
9204 ++ mov r11,r6,lsr#12
9205 ++ adcs r11,r11,r7,lsl#14
9206 ++ mov r12,r7,lsr#18
9207 ++ adcs r12,r12,r8,lsl#8
9208 ++ mov r2,#0
9209 ++ teq lr,#0
9210 ++ str r2,[r0,#16] @ clear is_base2_26
9211 ++ adc r2,r2,r8,lsr#24
9212 ++
9213 ++ itttt ne
9214 ++ movne r4,r9 @ choose between radixes
9215 ++ movne r5,r10
9216 ++ movne r6,r11
9217 ++ movne r7,r12
9218 ++ ldmia r0,{r9-r12} @ load key
9219 ++ it ne
9220 ++ movne r8,r2
9221 ++#endif
9222 ++
9223 ++ mov lr,r1
9224 ++ cmp r3,#0
9225 ++ str r10,[sp,#20]
9226 ++ str r11,[sp,#24]
9227 ++ str r12,[sp,#28]
9228 ++ b .Loop
9229 ++
9230 ++.align 4
9231 ++.Loop:
9232 ++#if __ARM_ARCH__<7
9233 ++ ldrb r0,[lr],#16 @ load input
9234 ++# ifdef __thumb2__
9235 ++ it hi
9236 ++# endif
9237 ++ addhi r8,r8,#1 @ 1<<128
9238 ++ ldrb r1,[lr,#-15]
9239 ++ ldrb r2,[lr,#-14]
9240 ++ ldrb r3,[lr,#-13]
9241 ++ orr r1,r0,r1,lsl#8
9242 ++ ldrb r0,[lr,#-12]
9243 ++ orr r2,r1,r2,lsl#16
9244 ++ ldrb r1,[lr,#-11]
9245 ++ orr r3,r2,r3,lsl#24
9246 ++ ldrb r2,[lr,#-10]
9247 ++ adds r4,r4,r3 @ accumulate input
9248 ++
9249 ++ ldrb r3,[lr,#-9]
9250 ++ orr r1,r0,r1,lsl#8
9251 ++ ldrb r0,[lr,#-8]
9252 ++ orr r2,r1,r2,lsl#16
9253 ++ ldrb r1,[lr,#-7]
9254 ++ orr r3,r2,r3,lsl#24
9255 ++ ldrb r2,[lr,#-6]
9256 ++ adcs r5,r5,r3
9257 ++
9258 ++ ldrb r3,[lr,#-5]
9259 ++ orr r1,r0,r1,lsl#8
9260 ++ ldrb r0,[lr,#-4]
9261 ++ orr r2,r1,r2,lsl#16
9262 ++ ldrb r1,[lr,#-3]
9263 ++ orr r3,r2,r3,lsl#24
9264 ++ ldrb r2,[lr,#-2]
9265 ++ adcs r6,r6,r3
9266 ++
9267 ++ ldrb r3,[lr,#-1]
9268 ++ orr r1,r0,r1,lsl#8
9269 ++ str lr,[sp,#8] @ offload input pointer
9270 ++ orr r2,r1,r2,lsl#16
9271 ++ add r10,r10,r10,lsr#2
9272 ++ orr r3,r2,r3,lsl#24
9273 ++#else
9274 ++ ldr r0,[lr],#16 @ load input
9275 ++ it hi
9276 ++ addhi r8,r8,#1 @ padbit
9277 ++ ldr r1,[lr,#-12]
9278 ++ ldr r2,[lr,#-8]
9279 ++ ldr r3,[lr,#-4]
9280 ++# ifdef __ARMEB__
9281 ++ rev r0,r0
9282 ++ rev r1,r1
9283 ++ rev r2,r2
9284 ++ rev r3,r3
9285 ++# endif
9286 ++ adds r4,r4,r0 @ accumulate input
9287 ++ str lr,[sp,#8] @ offload input pointer
9288 ++ adcs r5,r5,r1
9289 ++ add r10,r10,r10,lsr#2
9290 ++ adcs r6,r6,r2
9291 ++#endif
9292 ++ add r11,r11,r11,lsr#2
9293 ++ adcs r7,r7,r3
9294 ++ add r12,r12,r12,lsr#2
9295 ++
9296 ++ umull r2,r3,r5,r9
9297 ++ adc r8,r8,#0
9298 ++ umull r0,r1,r4,r9
9299 ++ umlal r2,r3,r8,r10
9300 ++ umlal r0,r1,r7,r10
9301 ++ ldr r10,[sp,#20] @ reload r10
9302 ++ umlal r2,r3,r6,r12
9303 ++ umlal r0,r1,r5,r12
9304 ++ umlal r2,r3,r7,r11
9305 ++ umlal r0,r1,r6,r11
9306 ++ umlal r2,r3,r4,r10
9307 ++ str r0,[sp,#0] @ future r4
9308 ++ mul r0,r11,r8
9309 ++ ldr r11,[sp,#24] @ reload r11
9310 ++ adds r2,r2,r1 @ d1+=d0>>32
9311 ++ eor r1,r1,r1
9312 ++ adc lr,r3,#0 @ future r6
9313 ++ str r2,[sp,#4] @ future r5
9314 ++
9315 ++ mul r2,r12,r8
9316 ++ eor r3,r3,r3
9317 ++ umlal r0,r1,r7,r12
9318 ++ ldr r12,[sp,#28] @ reload r12
9319 ++ umlal r2,r3,r7,r9
9320 ++ umlal r0,r1,r6,r9
9321 ++ umlal r2,r3,r6,r10
9322 ++ umlal r0,r1,r5,r10
9323 ++ umlal r2,r3,r5,r11
9324 ++ umlal r0,r1,r4,r11
9325 ++ umlal r2,r3,r4,r12
9326 ++ ldr r4,[sp,#0]
9327 ++ mul r8,r9,r8
9328 ++ ldr r5,[sp,#4]
9329 ++
9330 ++ adds r6,lr,r0 @ d2+=d1>>32
9331 ++ ldr lr,[sp,#8] @ reload input pointer
9332 ++ adc r1,r1,#0
9333 ++ adds r7,r2,r1 @ d3+=d2>>32
9334 ++ ldr r0,[sp,#16] @ reload end pointer
9335 ++ adc r3,r3,#0
9336 ++ add r8,r8,r3 @ h4+=d3>>32
9337 ++
9338 ++ and r1,r8,#-4
9339 ++ and r8,r8,#3
9340 ++ add r1,r1,r1,lsr#2 @ *=5
9341 ++ adds r4,r4,r1
9342 ++ adcs r5,r5,#0
9343 ++ adcs r6,r6,#0
9344 ++ adcs r7,r7,#0
9345 ++ adc r8,r8,#0
9346 ++
9347 ++ cmp r0,lr @ done yet?
9348 ++ bhi .Loop
9349 ++
9350 ++ ldr r0,[sp,#12]
9351 ++ add sp,sp,#32
9352 ++ stmdb r0,{r4-r8} @ store the result
9353 ++
9354 ++.Lno_data:
9355 ++#if __ARM_ARCH__>=5
9356 ++ ldmia sp!,{r3-r11,pc}
9357 ++#else
9358 ++ ldmia sp!,{r3-r11,lr}
9359 ++ tst lr,#1
9360 ++ moveq pc,lr @ be binary compatible with V4, yet
9361 ++ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
9362 ++#endif
9363 ++.size poly1305_blocks,.-poly1305_blocks
9364 ++.type poly1305_emit,%function
9365 ++.align 5
9366 ++poly1305_emit:
9367 ++.Lpoly1305_emit:
9368 ++ stmdb sp!,{r4-r11}
9369 ++
9370 ++ ldmia r0,{r3-r7}
9371 ++
9372 ++#if __ARM_ARCH__>=7
9373 ++ ldr ip,[r0,#36] @ is_base2_26
9374 ++
9375 ++ adds r8,r3,r4,lsl#26 @ base 2^26 -> base 2^32
9376 ++ mov r9,r4,lsr#6
9377 ++ adcs r9,r9,r5,lsl#20
9378 ++ mov r10,r5,lsr#12
9379 ++ adcs r10,r10,r6,lsl#14
9380 ++ mov r11,r6,lsr#18
9381 ++ adcs r11,r11,r7,lsl#8
9382 ++ mov r0,#0
9383 ++ adc r0,r0,r7,lsr#24
9384 ++
9385 ++ tst ip,ip
9386 ++ itttt ne
9387 ++ movne r3,r8
9388 ++ movne r4,r9
9389 ++ movne r5,r10
9390 ++ movne r6,r11
9391 ++ it ne
9392 ++ movne r7,r0
9393 ++#endif
9394 ++
9395 ++ adds r8,r3,#5 @ compare to modulus
9396 ++ adcs r9,r4,#0
9397 ++ adcs r10,r5,#0
9398 ++ adcs r11,r6,#0
9399 ++ adc r0,r7,#0
9400 ++ tst r0,#4 @ did it carry/borrow?
9401 ++
9402 ++#ifdef __thumb2__
9403 ++ it ne
9404 ++#endif
9405 ++ movne r3,r8
9406 ++ ldr r8,[r2,#0]
9407 ++#ifdef __thumb2__
9408 ++ it ne
9409 ++#endif
9410 ++ movne r4,r9
9411 ++ ldr r9,[r2,#4]
9412 ++#ifdef __thumb2__
9413 ++ it ne
9414 ++#endif
9415 ++ movne r5,r10
9416 ++ ldr r10,[r2,#8]
9417 ++#ifdef __thumb2__
9418 ++ it ne
9419 ++#endif
9420 ++ movne r6,r11
9421 ++ ldr r11,[r2,#12]
9422 ++
9423 ++ adds r3,r3,r8
9424 ++ adcs r4,r4,r9
9425 ++ adcs r5,r5,r10
9426 ++ adc r6,r6,r11
9427 ++
9428 ++#if __ARM_ARCH__>=7
9429 ++# ifdef __ARMEB__
9430 ++ rev r3,r3
9431 ++ rev r4,r4
9432 ++ rev r5,r5
9433 ++ rev r6,r6
9434 ++# endif
9435 ++ str r3,[r1,#0]
9436 ++ str r4,[r1,#4]
9437 ++ str r5,[r1,#8]
9438 ++ str r6,[r1,#12]
9439 ++#else
9440 ++ strb r3,[r1,#0]
9441 ++ mov r3,r3,lsr#8
9442 ++ strb r4,[r1,#4]
9443 ++ mov r4,r4,lsr#8
9444 ++ strb r5,[r1,#8]
9445 ++ mov r5,r5,lsr#8
9446 ++ strb r6,[r1,#12]
9447 ++ mov r6,r6,lsr#8
9448 ++
9449 ++ strb r3,[r1,#1]
9450 ++ mov r3,r3,lsr#8
9451 ++ strb r4,[r1,#5]
9452 ++ mov r4,r4,lsr#8
9453 ++ strb r5,[r1,#9]
9454 ++ mov r5,r5,lsr#8
9455 ++ strb r6,[r1,#13]
9456 ++ mov r6,r6,lsr#8
9457 ++
9458 ++ strb r3,[r1,#2]
9459 ++ mov r3,r3,lsr#8
9460 ++ strb r4,[r1,#6]
9461 ++ mov r4,r4,lsr#8
9462 ++ strb r5,[r1,#10]
9463 ++ mov r5,r5,lsr#8
9464 ++ strb r6,[r1,#14]
9465 ++ mov r6,r6,lsr#8
9466 ++
9467 ++ strb r3,[r1,#3]
9468 ++ strb r4,[r1,#7]
9469 ++ strb r5,[r1,#11]
9470 ++ strb r6,[r1,#15]
9471 ++#endif
9472 ++ ldmia sp!,{r4-r11}
9473 ++#if __ARM_ARCH__>=5
9474 ++ bx lr @ bx lr
9475 ++#else
9476 ++ tst lr,#1
9477 ++ moveq pc,lr @ be binary compatible with V4, yet
9478 ++ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
9479 ++#endif
9480 ++.size poly1305_emit,.-poly1305_emit
9481 ++#if __ARM_MAX_ARCH__>=7
9482 ++.fpu neon
9483 ++
9484 ++.type poly1305_init_neon,%function
9485 ++.align 5
9486 ++poly1305_init_neon:
9487 ++.Lpoly1305_init_neon:
9488 ++ ldr r3,[r0,#48] @ first table element
9489 ++ cmp r3,#-1 @ is value impossible?
9490 ++ bne .Lno_init_neon
9491 ++
9492 ++ ldr r4,[r0,#20] @ load key base 2^32
9493 ++ ldr r5,[r0,#24]
9494 ++ ldr r6,[r0,#28]
9495 ++ ldr r7,[r0,#32]
9496 ++
9497 ++ and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26
9498 ++ mov r3,r4,lsr#26
9499 ++ mov r4,r5,lsr#20
9500 ++ orr r3,r3,r5,lsl#6
9501 ++ mov r5,r6,lsr#14
9502 ++ orr r4,r4,r6,lsl#12
9503 ++ mov r6,r7,lsr#8
9504 ++ orr r5,r5,r7,lsl#18
9505 ++ and r3,r3,#0x03ffffff
9506 ++ and r4,r4,#0x03ffffff
9507 ++ and r5,r5,#0x03ffffff
9508 ++
9509 ++ vdup.32 d0,r2 @ r^1 in both lanes
9510 ++ add r2,r3,r3,lsl#2 @ *5
9511 ++ vdup.32 d1,r3
9512 ++ add r3,r4,r4,lsl#2
9513 ++ vdup.32 d2,r2
9514 ++ vdup.32 d3,r4
9515 ++ add r4,r5,r5,lsl#2
9516 ++ vdup.32 d4,r3
9517 ++ vdup.32 d5,r5
9518 ++ add r5,r6,r6,lsl#2
9519 ++ vdup.32 d6,r4
9520 ++ vdup.32 d7,r6
9521 ++ vdup.32 d8,r5
9522 ++
9523 ++ mov r5,#2 @ counter
9524 ++
9525 ++.Lsquare_neon:
9526 ++ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
9527 ++ @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
9528 ++ @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
9529 ++ @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
9530 ++ @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
9531 ++ @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
9532 ++
9533 ++ vmull.u32 q5,d0,d0[1]
9534 ++ vmull.u32 q6,d1,d0[1]
9535 ++ vmull.u32 q7,d3,d0[1]
9536 ++ vmull.u32 q8,d5,d0[1]
9537 ++ vmull.u32 q9,d7,d0[1]
9538 ++
9539 ++ vmlal.u32 q5,d7,d2[1]
9540 ++ vmlal.u32 q6,d0,d1[1]
9541 ++ vmlal.u32 q7,d1,d1[1]
9542 ++ vmlal.u32 q8,d3,d1[1]
9543 ++ vmlal.u32 q9,d5,d1[1]
9544 ++
9545 ++ vmlal.u32 q5,d5,d4[1]
9546 ++ vmlal.u32 q6,d7,d4[1]
9547 ++ vmlal.u32 q8,d1,d3[1]
9548 ++ vmlal.u32 q7,d0,d3[1]
9549 ++ vmlal.u32 q9,d3,d3[1]
9550 ++
9551 ++ vmlal.u32 q5,d3,d6[1]
9552 ++ vmlal.u32 q8,d0,d5[1]
9553 ++ vmlal.u32 q6,d5,d6[1]
9554 ++ vmlal.u32 q7,d7,d6[1]
9555 ++ vmlal.u32 q9,d1,d5[1]
9556 ++
9557 ++ vmlal.u32 q8,d7,d8[1]
9558 ++ vmlal.u32 q5,d1,d8[1]
9559 ++ vmlal.u32 q6,d3,d8[1]
9560 ++ vmlal.u32 q7,d5,d8[1]
9561 ++ vmlal.u32 q9,d0,d7[1]
9562 ++
9563 ++ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
9564 ++ @ lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
9565 ++ @ and P. Schwabe
9566 ++ @
9567 ++ @ H0>>+H1>>+H2>>+H3>>+H4
9568 ++ @ H3>>+H4>>*5+H0>>+H1
9569 ++ @
9570 ++ @ Trivia.
9571 ++ @
9572 ++ @ Result of multiplication of n-bit number by m-bit number is
9573 ++ @ n+m bits wide. However! Even though 2^n is a n+1-bit number,
9574 ++ @ m-bit number multiplied by 2^n is still n+m bits wide.
9575 ++ @
9576 ++ @ Sum of two n-bit numbers is n+1 bits wide, sum of three - n+2,
9577 ++ @ and so is sum of four. Sum of 2^m n-m-bit numbers and n-bit
9578 ++ @ one is n+1 bits wide.
9579 ++ @
9580 ++ @ >>+ denotes Hnext += Hn>>26, Hn &= 0x3ffffff. This means that
9581 ++ @ H0, H2, H3 are guaranteed to be 26 bits wide, while H1 and H4
9582 ++ @ can be 27. However! In cases when their width exceeds 26 bits
9583 ++ @ they are limited by 2^26+2^6. This in turn means that *sum*
9584 ++ @ of the products with these values can still be viewed as sum
9585 ++ @ of 52-bit numbers as long as the amount of addends is not a
9586 ++ @ power of 2. For example,
9587 ++ @
9588 ++ @ H4 = H4*R0 + H3*R1 + H2*R2 + H1*R3 + H0 * R4,
9589 ++ @
9590 ++ @ which can't be larger than 5 * (2^26 + 2^6) * (2^26 + 2^6), or
9591 ++ @ 5 * (2^52 + 2*2^32 + 2^12), which in turn is smaller than
9592 ++ @ 8 * (2^52) or 2^55. However, the value is then multiplied by
9593 ++ @ by 5, so we should be looking at 5 * 5 * (2^52 + 2^33 + 2^12),
9594 ++ @ which is less than 32 * (2^52) or 2^57. And when processing
9595 ++ @ data we are looking at triple as many addends...
9596 ++ @
9597 ++ @ In key setup procedure pre-reduced H0 is limited by 5*4+1 and
9598 ++ @ 5*H4 - by 5*5 52-bit addends, or 57 bits. But when hashing the
9599 ++ @ input H0 is limited by (5*4+1)*3 addends, or 58 bits, while
9600 ++ @ 5*H4 by 5*5*3, or 59[!] bits. How is this relevant? vmlal.u32
9601 ++ @ instruction accepts 2x32-bit input and writes 2x64-bit result.
9602 ++ @ This means that result of reduction have to be compressed upon
9603 ++ @ loop wrap-around. This can be done in the process of reduction
9604 ++ @ to minimize amount of instructions [as well as amount of
9605 ++ @ 128-bit instructions, which benefits low-end processors], but
9606 ++ @ one has to watch for H2 (which is narrower than H0) and 5*H4
9607 ++ @ not being wider than 58 bits, so that result of right shift
9608 ++ @ by 26 bits fits in 32 bits. This is also useful on x86,
9609 ++ @ because it allows to use paddd in place for paddq, which
9610 ++ @ benefits Atom, where paddq is ridiculously slow.
9611 ++
9612 ++ vshr.u64 q15,q8,#26
9613 ++ vmovn.i64 d16,q8
9614 ++ vshr.u64 q4,q5,#26
9615 ++ vmovn.i64 d10,q5
9616 ++ vadd.i64 q9,q9,q15 @ h3 -> h4
9617 ++ vbic.i32 d16,#0xfc000000 @ &=0x03ffffff
9618 ++ vadd.i64 q6,q6,q4 @ h0 -> h1
9619 ++ vbic.i32 d10,#0xfc000000
9620 ++
9621 ++ vshrn.u64 d30,q9,#26
9622 ++ vmovn.i64 d18,q9
9623 ++ vshr.u64 q4,q6,#26
9624 ++ vmovn.i64 d12,q6
9625 ++ vadd.i64 q7,q7,q4 @ h1 -> h2
9626 ++ vbic.i32 d18,#0xfc000000
9627 ++ vbic.i32 d12,#0xfc000000
9628 ++
9629 ++ vadd.i32 d10,d10,d30
9630 ++ vshl.u32 d30,d30,#2
9631 ++ vshrn.u64 d8,q7,#26
9632 ++ vmovn.i64 d14,q7
9633 ++ vadd.i32 d10,d10,d30 @ h4 -> h0
9634 ++ vadd.i32 d16,d16,d8 @ h2 -> h3
9635 ++ vbic.i32 d14,#0xfc000000
9636 ++
9637 ++ vshr.u32 d30,d10,#26
9638 ++ vbic.i32 d10,#0xfc000000
9639 ++ vshr.u32 d8,d16,#26
9640 ++ vbic.i32 d16,#0xfc000000
9641 ++ vadd.i32 d12,d12,d30 @ h0 -> h1
9642 ++ vadd.i32 d18,d18,d8 @ h3 -> h4
9643 ++
9644 ++ subs r5,r5,#1
9645 ++ beq .Lsquare_break_neon
9646 ++
9647 ++ add r6,r0,#(48+0*9*4)
9648 ++ add r7,r0,#(48+1*9*4)
9649 ++
9650 ++ vtrn.32 d0,d10 @ r^2:r^1
9651 ++ vtrn.32 d3,d14
9652 ++ vtrn.32 d5,d16
9653 ++ vtrn.32 d1,d12
9654 ++ vtrn.32 d7,d18
9655 ++
9656 ++ vshl.u32 d4,d3,#2 @ *5
9657 ++ vshl.u32 d6,d5,#2
9658 ++ vshl.u32 d2,d1,#2
9659 ++ vshl.u32 d8,d7,#2
9660 ++ vadd.i32 d4,d4,d3
9661 ++ vadd.i32 d2,d2,d1
9662 ++ vadd.i32 d6,d6,d5
9663 ++ vadd.i32 d8,d8,d7
9664 ++
9665 ++ vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]!
9666 ++ vst4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]!
9667 ++ vst4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]!
9668 ++ vst4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]!
9669 ++ vst1.32 {d8[0]},[r6,:32]
9670 ++ vst1.32 {d8[1]},[r7,:32]
9671 ++
9672 ++ b .Lsquare_neon
9673 ++
9674 ++.align 4
9675 ++.Lsquare_break_neon:
9676 ++ add r6,r0,#(48+2*4*9)
9677 ++ add r7,r0,#(48+3*4*9)
9678 ++
9679 ++ vmov d0,d10 @ r^4:r^3
9680 ++ vshl.u32 d2,d12,#2 @ *5
9681 ++ vmov d1,d12
9682 ++ vshl.u32 d4,d14,#2
9683 ++ vmov d3,d14
9684 ++ vshl.u32 d6,d16,#2
9685 ++ vmov d5,d16
9686 ++ vshl.u32 d8,d18,#2
9687 ++ vmov d7,d18
9688 ++ vadd.i32 d2,d2,d12
9689 ++ vadd.i32 d4,d4,d14
9690 ++ vadd.i32 d6,d6,d16
9691 ++ vadd.i32 d8,d8,d18
9692 ++
9693 ++ vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]!
9694 ++ vst4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]!
9695 ++ vst4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]!
9696 ++ vst4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]!
9697 ++ vst1.32 {d8[0]},[r6]
9698 ++ vst1.32 {d8[1]},[r7]
9699 ++
9700 ++.Lno_init_neon:
9701 ++ bx lr @ bx lr
9702 ++.size poly1305_init_neon,.-poly1305_init_neon
9703 ++
9704 ++.type poly1305_blocks_neon,%function
9705 ++.align 5
9706 ++poly1305_blocks_neon:
9707 ++.Lpoly1305_blocks_neon:
9708 ++ ldr ip,[r0,#36] @ is_base2_26
9709 ++
9710 ++ cmp r2,#64
9711 ++ blo .Lpoly1305_blocks
9712 ++
9713 ++ stmdb sp!,{r4-r7}
9714 ++ vstmdb sp!,{d8-d15} @ ABI specification says so
9715 ++
9716 ++ tst ip,ip @ is_base2_26?
9717 ++ bne .Lbase2_26_neon
9718 ++
9719 ++ stmdb sp!,{r1-r3,lr}
9720 ++ bl .Lpoly1305_init_neon
9721 ++
9722 ++ ldr r4,[r0,#0] @ load hash value base 2^32
9723 ++ ldr r5,[r0,#4]
9724 ++ ldr r6,[r0,#8]
9725 ++ ldr r7,[r0,#12]
9726 ++ ldr ip,[r0,#16]
9727 ++
9728 ++ and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26
9729 ++ mov r3,r4,lsr#26
9730 ++ veor d10,d10,d10
9731 ++ mov r4,r5,lsr#20
9732 ++ orr r3,r3,r5,lsl#6
9733 ++ veor d12,d12,d12
9734 ++ mov r5,r6,lsr#14
9735 ++ orr r4,r4,r6,lsl#12
9736 ++ veor d14,d14,d14
9737 ++ mov r6,r7,lsr#8
9738 ++ orr r5,r5,r7,lsl#18
9739 ++ veor d16,d16,d16
9740 ++ and r3,r3,#0x03ffffff
9741 ++ orr r6,r6,ip,lsl#24
9742 ++ veor d18,d18,d18
9743 ++ and r4,r4,#0x03ffffff
9744 ++ mov r1,#1
9745 ++ and r5,r5,#0x03ffffff
9746 ++ str r1,[r0,#36] @ set is_base2_26
9747 ++
9748 ++ vmov.32 d10[0],r2
9749 ++ vmov.32 d12[0],r3
9750 ++ vmov.32 d14[0],r4
9751 ++ vmov.32 d16[0],r5
9752 ++ vmov.32 d18[0],r6
9753 ++ adr r5,.Lzeros
9754 ++
9755 ++ ldmia sp!,{r1-r3,lr}
9756 ++ b .Lhash_loaded
9757 ++
9758 ++.align 4
9759 ++.Lbase2_26_neon:
9760 ++ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
9761 ++ @ load hash value
9762 ++
9763 ++ veor d10,d10,d10
9764 ++ veor d12,d12,d12
9765 ++ veor d14,d14,d14
9766 ++ veor d16,d16,d16
9767 ++ veor d18,d18,d18
9768 ++ vld4.32 {d10[0],d12[0],d14[0],d16[0]},[r0]!
9769 ++ adr r5,.Lzeros
9770 ++ vld1.32 {d18[0]},[r0]
9771 ++ sub r0,r0,#16 @ rewind
9772 ++
9773 ++.Lhash_loaded:
9774 ++ add r4,r1,#32
9775 ++ mov r3,r3,lsl#24
9776 ++ tst r2,#31
9777 ++ beq .Leven
9778 ++
9779 ++ vld4.32 {d20[0],d22[0],d24[0],d26[0]},[r1]!
9780 ++ vmov.32 d28[0],r3
9781 ++ sub r2,r2,#16
9782 ++ add r4,r1,#32
9783 ++
9784 ++# ifdef __ARMEB__
9785 ++ vrev32.8 q10,q10
9786 ++ vrev32.8 q13,q13
9787 ++ vrev32.8 q11,q11
9788 ++ vrev32.8 q12,q12
9789 ++# endif
9790 ++ vsri.u32 d28,d26,#8 @ base 2^32 -> base 2^26
9791 ++ vshl.u32 d26,d26,#18
9792 ++
9793 ++ vsri.u32 d26,d24,#14
9794 ++ vshl.u32 d24,d24,#12
9795 ++ vadd.i32 d29,d28,d18 @ add hash value and move to #hi
9796 ++
9797 ++ vbic.i32 d26,#0xfc000000
9798 ++ vsri.u32 d24,d22,#20
9799 ++ vshl.u32 d22,d22,#6
9800 ++
9801 ++ vbic.i32 d24,#0xfc000000
9802 ++ vsri.u32 d22,d20,#26
9803 ++ vadd.i32 d27,d26,d16
9804 ++
9805 ++ vbic.i32 d20,#0xfc000000
9806 ++ vbic.i32 d22,#0xfc000000
9807 ++ vadd.i32 d25,d24,d14
9808 ++
9809 ++ vadd.i32 d21,d20,d10
9810 ++ vadd.i32 d23,d22,d12
9811 ++
9812 ++ mov r7,r5
9813 ++ add r6,r0,#48
9814 ++
9815 ++ cmp r2,r2
9816 ++ b .Long_tail
9817 ++
9818 ++.align 4
9819 ++.Leven:
9820 ++ subs r2,r2,#64
9821 ++ it lo
9822 ++ movlo r4,r5
9823 ++
9824 ++ vmov.i32 q14,#1<<24 @ padbit, yes, always
9825 ++ vld4.32 {d20,d22,d24,d26},[r1] @ inp[0:1]
9826 ++ add r1,r1,#64
9827 ++ vld4.32 {d21,d23,d25,d27},[r4] @ inp[2:3] (or 0)
9828 ++ add r4,r4,#64
9829 ++ itt hi
9830 ++ addhi r7,r0,#(48+1*9*4)
9831 ++ addhi r6,r0,#(48+3*9*4)
9832 ++
9833 ++# ifdef __ARMEB__
9834 ++ vrev32.8 q10,q10
9835 ++ vrev32.8 q13,q13
9836 ++ vrev32.8 q11,q11
9837 ++ vrev32.8 q12,q12
9838 ++# endif
9839 ++ vsri.u32 q14,q13,#8 @ base 2^32 -> base 2^26
9840 ++ vshl.u32 q13,q13,#18
9841 ++
9842 ++ vsri.u32 q13,q12,#14
9843 ++ vshl.u32 q12,q12,#12
9844 ++
9845 ++ vbic.i32 q13,#0xfc000000
9846 ++ vsri.u32 q12,q11,#20
9847 ++ vshl.u32 q11,q11,#6
9848 ++
9849 ++ vbic.i32 q12,#0xfc000000
9850 ++ vsri.u32 q11,q10,#26
9851 ++
9852 ++ vbic.i32 q10,#0xfc000000
9853 ++ vbic.i32 q11,#0xfc000000
9854 ++
9855 ++ bls .Lskip_loop
9856 ++
9857 ++ vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^2
9858 ++ vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^4
9859 ++ vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]!
9860 ++ vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]!
9861 ++ b .Loop_neon
9862 ++
9863 ++.align 5
9864 ++.Loop_neon:
9865 ++ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
9866 ++ @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
9867 ++ @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
9868 ++ @ ___________________/
9869 ++ @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
9870 ++ @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
9871 ++ @ ___________________/ ____________________/
9872 ++ @
9873 ++ @ Note that we start with inp[2:3]*r^2. This is because it
9874 ++ @ doesn't depend on reduction in previous iteration.
9875 ++ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
9876 ++ @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
9877 ++ @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
9878 ++ @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
9879 ++ @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
9880 ++ @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
9881 ++
9882 ++ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
9883 ++ @ inp[2:3]*r^2
9884 ++
9885 ++ vadd.i32 d24,d24,d14 @ accumulate inp[0:1]
9886 ++ vmull.u32 q7,d25,d0[1]
9887 ++ vadd.i32 d20,d20,d10
9888 ++ vmull.u32 q5,d21,d0[1]
9889 ++ vadd.i32 d26,d26,d16
9890 ++ vmull.u32 q8,d27,d0[1]
9891 ++ vmlal.u32 q7,d23,d1[1]
9892 ++ vadd.i32 d22,d22,d12
9893 ++ vmull.u32 q6,d23,d0[1]
9894 ++
9895 ++ vadd.i32 d28,d28,d18
9896 ++ vmull.u32 q9,d29,d0[1]
9897 ++ subs r2,r2,#64
9898 ++ vmlal.u32 q5,d29,d2[1]
9899 ++ it lo
9900 ++ movlo r4,r5
9901 ++ vmlal.u32 q8,d25,d1[1]
9902 ++ vld1.32 d8[1],[r7,:32]
9903 ++ vmlal.u32 q6,d21,d1[1]
9904 ++ vmlal.u32 q9,d27,d1[1]
9905 ++
9906 ++ vmlal.u32 q5,d27,d4[1]
9907 ++ vmlal.u32 q8,d23,d3[1]
9908 ++ vmlal.u32 q9,d25,d3[1]
9909 ++ vmlal.u32 q6,d29,d4[1]
9910 ++ vmlal.u32 q7,d21,d3[1]
9911 ++
9912 ++ vmlal.u32 q8,d21,d5[1]
9913 ++ vmlal.u32 q5,d25,d6[1]
9914 ++ vmlal.u32 q9,d23,d5[1]
9915 ++ vmlal.u32 q6,d27,d6[1]
9916 ++ vmlal.u32 q7,d29,d6[1]
9917 ++
9918 ++ vmlal.u32 q8,d29,d8[1]
9919 ++ vmlal.u32 q5,d23,d8[1]
9920 ++ vmlal.u32 q9,d21,d7[1]
9921 ++ vmlal.u32 q6,d25,d8[1]
9922 ++ vmlal.u32 q7,d27,d8[1]
9923 ++
9924 ++ vld4.32 {d21,d23,d25,d27},[r4] @ inp[2:3] (or 0)
9925 ++ add r4,r4,#64
9926 ++
9927 ++ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
9928 ++ @ (hash+inp[0:1])*r^4 and accumulate
9929 ++
9930 ++ vmlal.u32 q8,d26,d0[0]
9931 ++ vmlal.u32 q5,d20,d0[0]
9932 ++ vmlal.u32 q9,d28,d0[0]
9933 ++ vmlal.u32 q6,d22,d0[0]
9934 ++ vmlal.u32 q7,d24,d0[0]
9935 ++ vld1.32 d8[0],[r6,:32]
9936 ++
9937 ++ vmlal.u32 q8,d24,d1[0]
9938 ++ vmlal.u32 q5,d28,d2[0]
9939 ++ vmlal.u32 q9,d26,d1[0]
9940 ++ vmlal.u32 q6,d20,d1[0]
9941 ++ vmlal.u32 q7,d22,d1[0]
9942 ++
9943 ++ vmlal.u32 q8,d22,d3[0]
9944 ++ vmlal.u32 q5,d26,d4[0]
9945 ++ vmlal.u32 q9,d24,d3[0]
9946 ++ vmlal.u32 q6,d28,d4[0]
9947 ++ vmlal.u32 q7,d20,d3[0]
9948 ++
9949 ++ vmlal.u32 q8,d20,d5[0]
9950 ++ vmlal.u32 q5,d24,d6[0]
9951 ++ vmlal.u32 q9,d22,d5[0]
9952 ++ vmlal.u32 q6,d26,d6[0]
9953 ++ vmlal.u32 q8,d28,d8[0]
9954 ++
9955 ++ vmlal.u32 q7,d28,d6[0]
9956 ++ vmlal.u32 q5,d22,d8[0]
9957 ++ vmlal.u32 q9,d20,d7[0]
9958 ++ vmov.i32 q14,#1<<24 @ padbit, yes, always
9959 ++ vmlal.u32 q6,d24,d8[0]
9960 ++ vmlal.u32 q7,d26,d8[0]
9961 ++
9962 ++ vld4.32 {d20,d22,d24,d26},[r1] @ inp[0:1]
9963 ++ add r1,r1,#64
9964 ++# ifdef __ARMEB__
9965 ++ vrev32.8 q10,q10
9966 ++ vrev32.8 q11,q11
9967 ++ vrev32.8 q12,q12
9968 ++ vrev32.8 q13,q13
9969 ++# endif
9970 ++
9971 ++ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
9972 ++ @ lazy reduction interleaved with base 2^32 -> base 2^26 of
9973 ++ @ inp[0:3] previously loaded to q10-q13 and smashed to q10-q14.
9974 ++
9975 ++ vshr.u64 q15,q8,#26
9976 ++ vmovn.i64 d16,q8
9977 ++ vshr.u64 q4,q5,#26
9978 ++ vmovn.i64 d10,q5
9979 ++ vadd.i64 q9,q9,q15 @ h3 -> h4
9980 ++ vbic.i32 d16,#0xfc000000
9981 ++ vsri.u32 q14,q13,#8 @ base 2^32 -> base 2^26
9982 ++ vadd.i64 q6,q6,q4 @ h0 -> h1
9983 ++ vshl.u32 q13,q13,#18
9984 ++ vbic.i32 d10,#0xfc000000
9985 ++
9986 ++ vshrn.u64 d30,q9,#26
9987 ++ vmovn.i64 d18,q9
9988 ++ vshr.u64 q4,q6,#26
9989 ++ vmovn.i64 d12,q6
9990 ++ vadd.i64 q7,q7,q4 @ h1 -> h2
9991 ++ vsri.u32 q13,q12,#14
9992 ++ vbic.i32 d18,#0xfc000000
9993 ++ vshl.u32 q12,q12,#12
9994 ++ vbic.i32 d12,#0xfc000000
9995 ++
9996 ++ vadd.i32 d10,d10,d30
9997 ++ vshl.u32 d30,d30,#2
9998 ++ vbic.i32 q13,#0xfc000000
9999 ++ vshrn.u64 d8,q7,#26
10000 ++ vmovn.i64 d14,q7
10001 ++ vaddl.u32 q5,d10,d30 @ h4 -> h0 [widen for a sec]
10002 ++ vsri.u32 q12,q11,#20
10003 ++ vadd.i32 d16,d16,d8 @ h2 -> h3
10004 ++ vshl.u32 q11,q11,#6
10005 ++ vbic.i32 d14,#0xfc000000
10006 ++ vbic.i32 q12,#0xfc000000
10007 ++
10008 ++ vshrn.u64 d30,q5,#26 @ re-narrow
10009 ++ vmovn.i64 d10,q5
10010 ++ vsri.u32 q11,q10,#26
10011 ++ vbic.i32 q10,#0xfc000000
10012 ++ vshr.u32 d8,d16,#26
10013 ++ vbic.i32 d16,#0xfc000000
10014 ++ vbic.i32 d10,#0xfc000000
10015 ++ vadd.i32 d12,d12,d30 @ h0 -> h1
10016 ++ vadd.i32 d18,d18,d8 @ h3 -> h4
10017 ++ vbic.i32 q11,#0xfc000000
10018 ++
10019 ++ bhi .Loop_neon
10020 ++
10021 ++.Lskip_loop:
10022 ++ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
10023 ++ @ multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
10024 ++
10025 ++ add r7,r0,#(48+0*9*4)
10026 ++ add r6,r0,#(48+1*9*4)
10027 ++ adds r2,r2,#32
10028 ++ it ne
10029 ++ movne r2,#0
10030 ++ bne .Long_tail
10031 ++
10032 ++ vadd.i32 d25,d24,d14 @ add hash value and move to #hi
10033 ++ vadd.i32 d21,d20,d10
10034 ++ vadd.i32 d27,d26,d16
10035 ++ vadd.i32 d23,d22,d12
10036 ++ vadd.i32 d29,d28,d18
10037 ++
10038 ++.Long_tail:
10039 ++ vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^1
10040 ++ vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^2
10041 ++
10042 ++ vadd.i32 d24,d24,d14 @ can be redundant
10043 ++ vmull.u32 q7,d25,d0
10044 ++ vadd.i32 d20,d20,d10
10045 ++ vmull.u32 q5,d21,d0
10046 ++ vadd.i32 d26,d26,d16
10047 ++ vmull.u32 q8,d27,d0
10048 ++ vadd.i32 d22,d22,d12
10049 ++ vmull.u32 q6,d23,d0
10050 ++ vadd.i32 d28,d28,d18
10051 ++ vmull.u32 q9,d29,d0
10052 ++
10053 ++ vmlal.u32 q5,d29,d2
10054 ++ vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]!
10055 ++ vmlal.u32 q8,d25,d1
10056 ++ vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]!
10057 ++ vmlal.u32 q6,d21,d1
10058 ++ vmlal.u32 q9,d27,d1
10059 ++ vmlal.u32 q7,d23,d1
10060 ++
10061 ++ vmlal.u32 q8,d23,d3
10062 ++ vld1.32 d8[1],[r7,:32]
10063 ++ vmlal.u32 q5,d27,d4
10064 ++ vld1.32 d8[0],[r6,:32]
10065 ++ vmlal.u32 q9,d25,d3
10066 ++ vmlal.u32 q6,d29,d4
10067 ++ vmlal.u32 q7,d21,d3
10068 ++
10069 ++ vmlal.u32 q8,d21,d5
10070 ++ it ne
10071 ++ addne r7,r0,#(48+2*9*4)
10072 ++ vmlal.u32 q5,d25,d6
10073 ++ it ne
10074 ++ addne r6,r0,#(48+3*9*4)
10075 ++ vmlal.u32 q9,d23,d5
10076 ++ vmlal.u32 q6,d27,d6
10077 ++ vmlal.u32 q7,d29,d6
10078 ++
10079 ++ vmlal.u32 q8,d29,d8
10080 ++ vorn q0,q0,q0 @ all-ones, can be redundant
10081 ++ vmlal.u32 q5,d23,d8
10082 ++ vshr.u64 q0,q0,#38
10083 ++ vmlal.u32 q9,d21,d7
10084 ++ vmlal.u32 q6,d25,d8
10085 ++ vmlal.u32 q7,d27,d8
10086 ++
10087 ++ beq .Lshort_tail
10088 ++
10089 ++ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
10090 ++ @ (hash+inp[0:1])*r^4:r^3 and accumulate
10091 ++
10092 ++ vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^3
10093 ++ vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^4
10094 ++
10095 ++ vmlal.u32 q7,d24,d0
10096 ++ vmlal.u32 q5,d20,d0
10097 ++ vmlal.u32 q8,d26,d0
10098 ++ vmlal.u32 q6,d22,d0
10099 ++ vmlal.u32 q9,d28,d0
10100 ++
10101 ++ vmlal.u32 q5,d28,d2
10102 ++ vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]!
10103 ++ vmlal.u32 q8,d24,d1
10104 ++ vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]!
10105 ++ vmlal.u32 q6,d20,d1
10106 ++ vmlal.u32 q9,d26,d1
10107 ++ vmlal.u32 q7,d22,d1
10108 ++
10109 ++ vmlal.u32 q8,d22,d3
10110 ++ vld1.32 d8[1],[r7,:32]
10111 ++ vmlal.u32 q5,d26,d4
10112 ++ vld1.32 d8[0],[r6,:32]
10113 ++ vmlal.u32 q9,d24,d3
10114 ++ vmlal.u32 q6,d28,d4
10115 ++ vmlal.u32 q7,d20,d3
10116 ++
10117 ++ vmlal.u32 q8,d20,d5
10118 ++ vmlal.u32 q5,d24,d6
10119 ++ vmlal.u32 q9,d22,d5
10120 ++ vmlal.u32 q6,d26,d6
10121 ++ vmlal.u32 q7,d28,d6
10122 ++
10123 ++ vmlal.u32 q8,d28,d8
10124 ++ vorn q0,q0,q0 @ all-ones
10125 ++ vmlal.u32 q5,d22,d8
10126 ++ vshr.u64 q0,q0,#38
10127 ++ vmlal.u32 q9,d20,d7
10128 ++ vmlal.u32 q6,d24,d8
10129 ++ vmlal.u32 q7,d26,d8
10130 ++
10131 ++.Lshort_tail:
10132 ++ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
10133 ++ @ horizontal addition
10134 ++
10135 ++ vadd.i64 d16,d16,d17
10136 ++ vadd.i64 d10,d10,d11
10137 ++ vadd.i64 d18,d18,d19
10138 ++ vadd.i64 d12,d12,d13
10139 ++ vadd.i64 d14,d14,d15
10140 ++
10141 ++ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
10142 ++ @ lazy reduction, but without narrowing
10143 ++
10144 ++ vshr.u64 q15,q8,#26
10145 ++ vand.i64 q8,q8,q0
10146 ++ vshr.u64 q4,q5,#26
10147 ++ vand.i64 q5,q5,q0
10148 ++ vadd.i64 q9,q9,q15 @ h3 -> h4
10149 ++ vadd.i64 q6,q6,q4 @ h0 -> h1
10150 ++
10151 ++ vshr.u64 q15,q9,#26
10152 ++ vand.i64 q9,q9,q0
10153 ++ vshr.u64 q4,q6,#26
10154 ++ vand.i64 q6,q6,q0
10155 ++ vadd.i64 q7,q7,q4 @ h1 -> h2
10156 ++
10157 ++ vadd.i64 q5,q5,q15
10158 ++ vshl.u64 q15,q15,#2
10159 ++ vshr.u64 q4,q7,#26
10160 ++ vand.i64 q7,q7,q0
10161 ++ vadd.i64 q5,q5,q15 @ h4 -> h0
10162 ++ vadd.i64 q8,q8,q4 @ h2 -> h3
10163 ++
10164 ++ vshr.u64 q15,q5,#26
10165 ++ vand.i64 q5,q5,q0
10166 ++ vshr.u64 q4,q8,#26
10167 ++ vand.i64 q8,q8,q0
10168 ++ vadd.i64 q6,q6,q15 @ h0 -> h1
10169 ++ vadd.i64 q9,q9,q4 @ h3 -> h4
10170 ++
10171 ++ cmp r2,#0
10172 ++ bne .Leven
10173 ++
10174 ++ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
10175 ++ @ store hash value
10176 ++
10177 ++ vst4.32 {d10[0],d12[0],d14[0],d16[0]},[r0]!
10178 ++ vst1.32 {d18[0]},[r0]
10179 ++
10180 ++ vldmia sp!,{d8-d15} @ epilogue
10181 ++ ldmia sp!,{r4-r7}
10182 ++ bx lr @ bx lr
10183 ++.size poly1305_blocks_neon,.-poly1305_blocks_neon
10184 ++
10185 ++.align 5
10186 ++.Lzeros:
10187 ++.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
10188 ++#ifndef __KERNEL__
10189 ++.LOPENSSL_armcap:
10190 ++# ifdef _WIN32
10191 ++.word OPENSSL_armcap_P
10192 ++# else
10193 ++.word OPENSSL_armcap_P-.Lpoly1305_init
10194 ++# endif
10195 ++.comm OPENSSL_armcap_P,4,4
10196 ++.hidden OPENSSL_armcap_P
10197 ++#endif
10198 ++#endif
10199 ++.asciz "Poly1305 for ARMv4/NEON, CRYPTOGAMS by @dot-asm"
10200 ++.align 2
10201 +diff --git a/arch/arm/crypto/poly1305-glue.c b/arch/arm/crypto/poly1305-glue.c
10202 +new file mode 100644
10203 +index 000000000000..74a725ac89c9
10204 +--- /dev/null
10205 ++++ b/arch/arm/crypto/poly1305-glue.c
10206 +@@ -0,0 +1,276 @@
10207 ++// SPDX-License-Identifier: GPL-2.0
10208 ++/*
10209 ++ * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM
10210 ++ *
10211 ++ * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@××××××.org>
10212 ++ */
10213 ++
10214 ++#include <asm/hwcap.h>
10215 ++#include <asm/neon.h>
10216 ++#include <asm/simd.h>
10217 ++#include <asm/unaligned.h>
10218 ++#include <crypto/algapi.h>
10219 ++#include <crypto/internal/hash.h>
10220 ++#include <crypto/internal/poly1305.h>
10221 ++#include <crypto/internal/simd.h>
10222 ++#include <linux/cpufeature.h>
10223 ++#include <linux/crypto.h>
10224 ++#include <linux/jump_label.h>
10225 ++#include <linux/module.h>
10226 ++
10227 ++void poly1305_init_arm(void *state, const u8 *key);
10228 ++void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit);
10229 ++void poly1305_emit_arm(void *state, __le32 *digest, const u32 *nonce);
10230 ++
10231 ++void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit)
10232 ++{
10233 ++}
10234 ++
10235 ++static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
10236 ++
10237 ++void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
10238 ++{
10239 ++ poly1305_init_arm(&dctx->h, key);
10240 ++ dctx->s[0] = get_unaligned_le32(key + 16);
10241 ++ dctx->s[1] = get_unaligned_le32(key + 20);
10242 ++ dctx->s[2] = get_unaligned_le32(key + 24);
10243 ++ dctx->s[3] = get_unaligned_le32(key + 28);
10244 ++ dctx->buflen = 0;
10245 ++}
10246 ++EXPORT_SYMBOL(poly1305_init_arch);
10247 ++
10248 ++static int arm_poly1305_init(struct shash_desc *desc)
10249 ++{
10250 ++ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
10251 ++
10252 ++ dctx->buflen = 0;
10253 ++ dctx->rset = 0;
10254 ++ dctx->sset = false;
10255 ++
10256 ++ return 0;
10257 ++}
10258 ++
10259 ++static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
10260 ++ u32 len, u32 hibit, bool do_neon)
10261 ++{
10262 ++ if (unlikely(!dctx->sset)) {
10263 ++ if (!dctx->rset) {
10264 ++ poly1305_init_arm(&dctx->h, src);
10265 ++ src += POLY1305_BLOCK_SIZE;
10266 ++ len -= POLY1305_BLOCK_SIZE;
10267 ++ dctx->rset = 1;
10268 ++ }
10269 ++ if (len >= POLY1305_BLOCK_SIZE) {
10270 ++ dctx->s[0] = get_unaligned_le32(src + 0);
10271 ++ dctx->s[1] = get_unaligned_le32(src + 4);
10272 ++ dctx->s[2] = get_unaligned_le32(src + 8);
10273 ++ dctx->s[3] = get_unaligned_le32(src + 12);
10274 ++ src += POLY1305_BLOCK_SIZE;
10275 ++ len -= POLY1305_BLOCK_SIZE;
10276 ++ dctx->sset = true;
10277 ++ }
10278 ++ if (len < POLY1305_BLOCK_SIZE)
10279 ++ return;
10280 ++ }
10281 ++
10282 ++ len &= ~(POLY1305_BLOCK_SIZE - 1);
10283 ++
10284 ++ if (static_branch_likely(&have_neon) && likely(do_neon))
10285 ++ poly1305_blocks_neon(&dctx->h, src, len, hibit);
10286 ++ else
10287 ++ poly1305_blocks_arm(&dctx->h, src, len, hibit);
10288 ++}
10289 ++
10290 ++static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx,
10291 ++ const u8 *src, u32 len, bool do_neon)
10292 ++{
10293 ++ if (unlikely(dctx->buflen)) {
10294 ++ u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
10295 ++
10296 ++ memcpy(dctx->buf + dctx->buflen, src, bytes);
10297 ++ src += bytes;
10298 ++ len -= bytes;
10299 ++ dctx->buflen += bytes;
10300 ++
10301 ++ if (dctx->buflen == POLY1305_BLOCK_SIZE) {
10302 ++ arm_poly1305_blocks(dctx, dctx->buf,
10303 ++ POLY1305_BLOCK_SIZE, 1, false);
10304 ++ dctx->buflen = 0;
10305 ++ }
10306 ++ }
10307 ++
10308 ++ if (likely(len >= POLY1305_BLOCK_SIZE)) {
10309 ++ arm_poly1305_blocks(dctx, src, len, 1, do_neon);
10310 ++ src += round_down(len, POLY1305_BLOCK_SIZE);
10311 ++ len %= POLY1305_BLOCK_SIZE;
10312 ++ }
10313 ++
10314 ++ if (unlikely(len)) {
10315 ++ dctx->buflen = len;
10316 ++ memcpy(dctx->buf, src, len);
10317 ++ }
10318 ++}
10319 ++
10320 ++static int arm_poly1305_update(struct shash_desc *desc,
10321 ++ const u8 *src, unsigned int srclen)
10322 ++{
10323 ++ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
10324 ++
10325 ++ arm_poly1305_do_update(dctx, src, srclen, false);
10326 ++ return 0;
10327 ++}
10328 ++
10329 ++static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc,
10330 ++ const u8 *src,
10331 ++ unsigned int srclen)
10332 ++{
10333 ++ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
10334 ++ bool do_neon = crypto_simd_usable() && srclen > 128;
10335 ++
10336 ++ if (static_branch_likely(&have_neon) && do_neon)
10337 ++ kernel_neon_begin();
10338 ++ arm_poly1305_do_update(dctx, src, srclen, do_neon);
10339 ++ if (static_branch_likely(&have_neon) && do_neon)
10340 ++ kernel_neon_end();
10341 ++ return 0;
10342 ++}
10343 ++
10344 ++void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
10345 ++ unsigned int nbytes)
10346 ++{
10347 ++ bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
10348 ++ crypto_simd_usable();
10349 ++
10350 ++ if (unlikely(dctx->buflen)) {
10351 ++ u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
10352 ++
10353 ++ memcpy(dctx->buf + dctx->buflen, src, bytes);
10354 ++ src += bytes;
10355 ++ nbytes -= bytes;
10356 ++ dctx->buflen += bytes;
10357 ++
10358 ++ if (dctx->buflen == POLY1305_BLOCK_SIZE) {
10359 ++ poly1305_blocks_arm(&dctx->h, dctx->buf,
10360 ++ POLY1305_BLOCK_SIZE, 1);
10361 ++ dctx->buflen = 0;
10362 ++ }
10363 ++ }
10364 ++
10365 ++ if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
10366 ++ unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
10367 ++
10368 ++ if (static_branch_likely(&have_neon) && do_neon) {
10369 ++ kernel_neon_begin();
10370 ++ poly1305_blocks_neon(&dctx->h, src, len, 1);
10371 ++ kernel_neon_end();
10372 ++ } else {
10373 ++ poly1305_blocks_arm(&dctx->h, src, len, 1);
10374 ++ }
10375 ++ src += len;
10376 ++ nbytes %= POLY1305_BLOCK_SIZE;
10377 ++ }
10378 ++
10379 ++ if (unlikely(nbytes)) {
10380 ++ dctx->buflen = nbytes;
10381 ++ memcpy(dctx->buf, src, nbytes);
10382 ++ }
10383 ++}
10384 ++EXPORT_SYMBOL(poly1305_update_arch);
10385 ++
10386 ++void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
10387 ++{
10388 ++ __le32 digest[4];
10389 ++ u64 f = 0;
10390 ++
10391 ++ if (unlikely(dctx->buflen)) {
10392 ++ dctx->buf[dctx->buflen++] = 1;
10393 ++ memset(dctx->buf + dctx->buflen, 0,
10394 ++ POLY1305_BLOCK_SIZE - dctx->buflen);
10395 ++ poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
10396 ++ }
10397 ++
10398 ++ poly1305_emit_arm(&dctx->h, digest, dctx->s);
10399 ++
10400 ++ /* mac = (h + s) % (2^128) */
10401 ++ f = (f >> 32) + le32_to_cpu(digest[0]);
10402 ++ put_unaligned_le32(f, dst);
10403 ++ f = (f >> 32) + le32_to_cpu(digest[1]);
10404 ++ put_unaligned_le32(f, dst + 4);
10405 ++ f = (f >> 32) + le32_to_cpu(digest[2]);
10406 ++ put_unaligned_le32(f, dst + 8);
10407 ++ f = (f >> 32) + le32_to_cpu(digest[3]);
10408 ++ put_unaligned_le32(f, dst + 12);
10409 ++
10410 ++ *dctx = (struct poly1305_desc_ctx){};
10411 ++}
10412 ++EXPORT_SYMBOL(poly1305_final_arch);
10413 ++
10414 ++static int arm_poly1305_final(struct shash_desc *desc, u8 *dst)
10415 ++{
10416 ++ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
10417 ++
10418 ++ if (unlikely(!dctx->sset))
10419 ++ return -ENOKEY;
10420 ++
10421 ++ poly1305_final_arch(dctx, dst);
10422 ++ return 0;
10423 ++}
10424 ++
10425 ++static struct shash_alg arm_poly1305_algs[] = {{
10426 ++ .init = arm_poly1305_init,
10427 ++ .update = arm_poly1305_update,
10428 ++ .final = arm_poly1305_final,
10429 ++ .digestsize = POLY1305_DIGEST_SIZE,
10430 ++ .descsize = sizeof(struct poly1305_desc_ctx),
10431 ++
10432 ++ .base.cra_name = "poly1305",
10433 ++ .base.cra_driver_name = "poly1305-arm",
10434 ++ .base.cra_priority = 150,
10435 ++ .base.cra_blocksize = POLY1305_BLOCK_SIZE,
10436 ++ .base.cra_module = THIS_MODULE,
10437 ++#ifdef CONFIG_KERNEL_MODE_NEON
10438 ++}, {
10439 ++ .init = arm_poly1305_init,
10440 ++ .update = arm_poly1305_update_neon,
10441 ++ .final = arm_poly1305_final,
10442 ++ .digestsize = POLY1305_DIGEST_SIZE,
10443 ++ .descsize = sizeof(struct poly1305_desc_ctx),
10444 ++
10445 ++ .base.cra_name = "poly1305",
10446 ++ .base.cra_driver_name = "poly1305-neon",
10447 ++ .base.cra_priority = 200,
10448 ++ .base.cra_blocksize = POLY1305_BLOCK_SIZE,
10449 ++ .base.cra_module = THIS_MODULE,
10450 ++#endif
10451 ++}};
10452 ++
10453 ++static int __init arm_poly1305_mod_init(void)
10454 ++{
10455 ++ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
10456 ++ (elf_hwcap & HWCAP_NEON))
10457 ++ static_branch_enable(&have_neon);
10458 ++ else
10459 ++ /* register only the first entry */
10460 ++ return crypto_register_shash(&arm_poly1305_algs[0]);
10461 ++
10462 ++ return crypto_register_shashes(arm_poly1305_algs,
10463 ++ ARRAY_SIZE(arm_poly1305_algs));
10464 ++}
10465 ++
10466 ++static void __exit arm_poly1305_mod_exit(void)
10467 ++{
10468 ++ if (!static_branch_likely(&have_neon)) {
10469 ++ crypto_unregister_shash(&arm_poly1305_algs[0]);
10470 ++ return;
10471 ++ }
10472 ++ crypto_unregister_shashes(arm_poly1305_algs,
10473 ++ ARRAY_SIZE(arm_poly1305_algs));
10474 ++}
10475 ++
10476 ++module_init(arm_poly1305_mod_init);
10477 ++module_exit(arm_poly1305_mod_exit);
10478 ++
10479 ++MODULE_LICENSE("GPL v2");
10480 ++MODULE_ALIAS_CRYPTO("poly1305");
10481 ++MODULE_ALIAS_CRYPTO("poly1305-arm");
10482 ++MODULE_ALIAS_CRYPTO("poly1305-neon");
10483 +diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
10484 +index 9923445e8225..9bd15b227e78 100644
10485 +--- a/lib/crypto/Kconfig
10486 ++++ b/lib/crypto/Kconfig
10487 +@@ -40,7 +40,7 @@ config CRYPTO_LIB_DES
10488 + config CRYPTO_LIB_POLY1305_RSIZE
10489 + int
10490 + default 4 if X86_64
10491 +- default 9 if ARM64
10492 ++ default 9 if ARM || ARM64
10493 + default 1
10494 +
10495 + config CRYPTO_ARCH_HAVE_LIB_POLY1305
10496 +--
10497 +cgit v1.2.3-4-ga26e
10498 +
10499 +
10500 +From de8968cf9c923fbc432c267a15e742ae11d3ccff Mon Sep 17 00:00:00 2001
10501 +From: Ard Biesheuvel <ardb@××××××.org>
10502 +Date: Fri, 8 Nov 2019 13:22:26 +0100
10503 +Subject: crypto: mips/poly1305 - incorporate OpenSSL/CRYPTOGAMS optimized
10504 + implementation
10505 +MIME-Version: 1.0
10506 +Content-Type: text/plain; charset=UTF-8
10507 +Content-Transfer-Encoding: 8bit
10508 +
10509 +commit a11d055e7a64ac34a5e99b6fe731299449cbcd58 upstream.
10510 +
10511 +This is a straight import of the OpenSSL/CRYPTOGAMS Poly1305 implementation for
10512 +MIPS authored by Andy Polyakov, a prior 64-bit only version of which has been
10513 +contributed by him to the OpenSSL project. The file 'poly1305-mips.pl' is taken
10514 +straight from this upstream GitHub repository [0] at commit
10515 +d22ade312a7af958ec955620b0d241cf42c37feb, and already contains all the changes
10516 +required to build it as part of a Linux kernel module.
10517 +
10518 +[0] https://github.com/dot-asm/cryptogams
10519 +
10520 +Co-developed-by: Andy Polyakov <appro@××××××××××.org>
10521 +Signed-off-by: Andy Polyakov <appro@××××××××××.org>
10522 +Co-developed-by: René van Dorst <opensource@××××××.com>
10523 +Signed-off-by: René van Dorst <opensource@××××××.com>
10524 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
10525 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
10526 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
10527 +---
10528 + arch/mips/crypto/Makefile | 14 +
10529 + arch/mips/crypto/poly1305-glue.c | 203 ++++++
10530 + arch/mips/crypto/poly1305-mips.pl | 1273 +++++++++++++++++++++++++++++++++++++
10531 + crypto/Kconfig | 5 +
10532 + lib/crypto/Kconfig | 1 +
10533 + 5 files changed, 1496 insertions(+)
10534 + create mode 100644 arch/mips/crypto/poly1305-glue.c
10535 + create mode 100644 arch/mips/crypto/poly1305-mips.pl
10536 +
10537 +diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile
10538 +index b528b9d300f1..8e1deaf00e0c 100644
10539 +--- a/arch/mips/crypto/Makefile
10540 ++++ b/arch/mips/crypto/Makefile
10541 +@@ -8,3 +8,17 @@ obj-$(CONFIG_CRYPTO_CRC32_MIPS) += crc32-mips.o
10542 + obj-$(CONFIG_CRYPTO_CHACHA_MIPS) += chacha-mips.o
10543 + chacha-mips-y := chacha-core.o chacha-glue.o
10544 + AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots
10545 ++
10546 ++obj-$(CONFIG_CRYPTO_POLY1305_MIPS) += poly1305-mips.o
10547 ++poly1305-mips-y := poly1305-core.o poly1305-glue.o
10548 ++
10549 ++perlasm-flavour-$(CONFIG_CPU_MIPS32) := o32
10550 ++perlasm-flavour-$(CONFIG_CPU_MIPS64) := 64
10551 ++
10552 ++quiet_cmd_perlasm = PERLASM $@
10553 ++ cmd_perlasm = $(PERL) $(<) $(perlasm-flavour-y) $(@)
10554 ++
10555 ++$(obj)/poly1305-core.S: $(src)/poly1305-mips.pl FORCE
10556 ++ $(call if_changed,perlasm)
10557 ++
10558 ++targets += poly1305-core.S
10559 +diff --git a/arch/mips/crypto/poly1305-glue.c b/arch/mips/crypto/poly1305-glue.c
10560 +new file mode 100644
10561 +index 000000000000..b759b6ccc361
10562 +--- /dev/null
10563 ++++ b/arch/mips/crypto/poly1305-glue.c
10564 +@@ -0,0 +1,203 @@
10565 ++// SPDX-License-Identifier: GPL-2.0
10566 ++/*
10567 ++ * OpenSSL/Cryptogams accelerated Poly1305 transform for MIPS
10568 ++ *
10569 ++ * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@××××××.org>
10570 ++ */
10571 ++
10572 ++#include <asm/unaligned.h>
10573 ++#include <crypto/algapi.h>
10574 ++#include <crypto/internal/hash.h>
10575 ++#include <crypto/internal/poly1305.h>
10576 ++#include <linux/cpufeature.h>
10577 ++#include <linux/crypto.h>
10578 ++#include <linux/module.h>
10579 ++
10580 ++asmlinkage void poly1305_init_mips(void *state, const u8 *key);
10581 ++asmlinkage void poly1305_blocks_mips(void *state, const u8 *src, u32 len, u32 hibit);
10582 ++asmlinkage void poly1305_emit_mips(void *state, __le32 *digest, const u32 *nonce);
10583 ++
10584 ++void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
10585 ++{
10586 ++ poly1305_init_mips(&dctx->h, key);
10587 ++ dctx->s[0] = get_unaligned_le32(key + 16);
10588 ++ dctx->s[1] = get_unaligned_le32(key + 20);
10589 ++ dctx->s[2] = get_unaligned_le32(key + 24);
10590 ++ dctx->s[3] = get_unaligned_le32(key + 28);
10591 ++ dctx->buflen = 0;
10592 ++}
10593 ++EXPORT_SYMBOL(poly1305_init_arch);
10594 ++
10595 ++static int mips_poly1305_init(struct shash_desc *desc)
10596 ++{
10597 ++ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
10598 ++
10599 ++ dctx->buflen = 0;
10600 ++ dctx->rset = 0;
10601 ++ dctx->sset = false;
10602 ++
10603 ++ return 0;
10604 ++}
10605 ++
10606 ++static void mips_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
10607 ++ u32 len, u32 hibit)
10608 ++{
10609 ++ if (unlikely(!dctx->sset)) {
10610 ++ if (!dctx->rset) {
10611 ++ poly1305_init_mips(&dctx->h, src);
10612 ++ src += POLY1305_BLOCK_SIZE;
10613 ++ len -= POLY1305_BLOCK_SIZE;
10614 ++ dctx->rset = 1;
10615 ++ }
10616 ++ if (len >= POLY1305_BLOCK_SIZE) {
10617 ++ dctx->s[0] = get_unaligned_le32(src + 0);
10618 ++ dctx->s[1] = get_unaligned_le32(src + 4);
10619 ++ dctx->s[2] = get_unaligned_le32(src + 8);
10620 ++ dctx->s[3] = get_unaligned_le32(src + 12);
10621 ++ src += POLY1305_BLOCK_SIZE;
10622 ++ len -= POLY1305_BLOCK_SIZE;
10623 ++ dctx->sset = true;
10624 ++ }
10625 ++ if (len < POLY1305_BLOCK_SIZE)
10626 ++ return;
10627 ++ }
10628 ++
10629 ++ len &= ~(POLY1305_BLOCK_SIZE - 1);
10630 ++
10631 ++ poly1305_blocks_mips(&dctx->h, src, len, hibit);
10632 ++}
10633 ++
10634 ++static int mips_poly1305_update(struct shash_desc *desc, const u8 *src,
10635 ++ unsigned int len)
10636 ++{
10637 ++ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
10638 ++
10639 ++ if (unlikely(dctx->buflen)) {
10640 ++ u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
10641 ++
10642 ++ memcpy(dctx->buf + dctx->buflen, src, bytes);
10643 ++ src += bytes;
10644 ++ len -= bytes;
10645 ++ dctx->buflen += bytes;
10646 ++
10647 ++ if (dctx->buflen == POLY1305_BLOCK_SIZE) {
10648 ++ mips_poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 1);
10649 ++ dctx->buflen = 0;
10650 ++ }
10651 ++ }
10652 ++
10653 ++ if (likely(len >= POLY1305_BLOCK_SIZE)) {
10654 ++ mips_poly1305_blocks(dctx, src, len, 1);
10655 ++ src += round_down(len, POLY1305_BLOCK_SIZE);
10656 ++ len %= POLY1305_BLOCK_SIZE;
10657 ++ }
10658 ++
10659 ++ if (unlikely(len)) {
10660 ++ dctx->buflen = len;
10661 ++ memcpy(dctx->buf, src, len);
10662 ++ }
10663 ++ return 0;
10664 ++}
10665 ++
10666 ++void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
10667 ++ unsigned int nbytes)
10668 ++{
10669 ++ if (unlikely(dctx->buflen)) {
10670 ++ u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
10671 ++
10672 ++ memcpy(dctx->buf + dctx->buflen, src, bytes);
10673 ++ src += bytes;
10674 ++ nbytes -= bytes;
10675 ++ dctx->buflen += bytes;
10676 ++
10677 ++ if (dctx->buflen == POLY1305_BLOCK_SIZE) {
10678 ++ poly1305_blocks_mips(&dctx->h, dctx->buf,
10679 ++ POLY1305_BLOCK_SIZE, 1);
10680 ++ dctx->buflen = 0;
10681 ++ }
10682 ++ }
10683 ++
10684 ++ if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
10685 ++ unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
10686 ++
10687 ++ poly1305_blocks_mips(&dctx->h, src, len, 1);
10688 ++ src += len;
10689 ++ nbytes %= POLY1305_BLOCK_SIZE;
10690 ++ }
10691 ++
10692 ++ if (unlikely(nbytes)) {
10693 ++ dctx->buflen = nbytes;
10694 ++ memcpy(dctx->buf, src, nbytes);
10695 ++ }
10696 ++}
10697 ++EXPORT_SYMBOL(poly1305_update_arch);
10698 ++
10699 ++void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
10700 ++{
10701 ++ __le32 digest[4];
10702 ++ u64 f = 0;
10703 ++
10704 ++ if (unlikely(dctx->buflen)) {
10705 ++ dctx->buf[dctx->buflen++] = 1;
10706 ++ memset(dctx->buf + dctx->buflen, 0,
10707 ++ POLY1305_BLOCK_SIZE - dctx->buflen);
10708 ++ poly1305_blocks_mips(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
10709 ++ }
10710 ++
10711 ++ poly1305_emit_mips(&dctx->h, digest, dctx->s);
10712 ++
10713 ++ /* mac = (h + s) % (2^128) */
10714 ++ f = (f >> 32) + le32_to_cpu(digest[0]);
10715 ++ put_unaligned_le32(f, dst);
10716 ++ f = (f >> 32) + le32_to_cpu(digest[1]);
10717 ++ put_unaligned_le32(f, dst + 4);
10718 ++ f = (f >> 32) + le32_to_cpu(digest[2]);
10719 ++ put_unaligned_le32(f, dst + 8);
10720 ++ f = (f >> 32) + le32_to_cpu(digest[3]);
10721 ++ put_unaligned_le32(f, dst + 12);
10722 ++
10723 ++ *dctx = (struct poly1305_desc_ctx){};
10724 ++}
10725 ++EXPORT_SYMBOL(poly1305_final_arch);
10726 ++
10727 ++static int mips_poly1305_final(struct shash_desc *desc, u8 *dst)
10728 ++{
10729 ++ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
10730 ++
10731 ++ if (unlikely(!dctx->sset))
10732 ++ return -ENOKEY;
10733 ++
10734 ++ poly1305_final_arch(dctx, dst);
10735 ++ return 0;
10736 ++}
10737 ++
10738 ++static struct shash_alg mips_poly1305_alg = {
10739 ++ .init = mips_poly1305_init,
10740 ++ .update = mips_poly1305_update,
10741 ++ .final = mips_poly1305_final,
10742 ++ .digestsize = POLY1305_DIGEST_SIZE,
10743 ++ .descsize = sizeof(struct poly1305_desc_ctx),
10744 ++
10745 ++ .base.cra_name = "poly1305",
10746 ++ .base.cra_driver_name = "poly1305-mips",
10747 ++ .base.cra_priority = 200,
10748 ++ .base.cra_blocksize = POLY1305_BLOCK_SIZE,
10749 ++ .base.cra_module = THIS_MODULE,
10750 ++};
10751 ++
10752 ++static int __init mips_poly1305_mod_init(void)
10753 ++{
10754 ++ return crypto_register_shash(&mips_poly1305_alg);
10755 ++}
10756 ++
10757 ++static void __exit mips_poly1305_mod_exit(void)
10758 ++{
10759 ++ crypto_unregister_shash(&mips_poly1305_alg);
10760 ++}
10761 ++
10762 ++module_init(mips_poly1305_mod_init);
10763 ++module_exit(mips_poly1305_mod_exit);
10764 ++
10765 ++MODULE_LICENSE("GPL v2");
10766 ++MODULE_ALIAS_CRYPTO("poly1305");
10767 ++MODULE_ALIAS_CRYPTO("poly1305-mips");
10768 +diff --git a/arch/mips/crypto/poly1305-mips.pl b/arch/mips/crypto/poly1305-mips.pl
10769 +new file mode 100644
10770 +index 000000000000..b05bab884ed2
10771 +--- /dev/null
10772 ++++ b/arch/mips/crypto/poly1305-mips.pl
10773 +@@ -0,0 +1,1273 @@
10774 ++#!/usr/bin/env perl
10775 ++# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause
10776 ++#
10777 ++# ====================================================================
10778 ++# Written by Andy Polyakov, @dot-asm, originally for the OpenSSL
10779 ++# project.
10780 ++# ====================================================================
10781 ++
10782 ++# Poly1305 hash for MIPS.
10783 ++#
10784 ++# May 2016
10785 ++#
10786 ++# Numbers are cycles per processed byte with poly1305_blocks alone.
10787 ++#
10788 ++# IALU/gcc
10789 ++# R1x000 ~5.5/+130% (big-endian)
10790 ++# Octeon II 2.50/+70% (little-endian)
10791 ++#
10792 ++# March 2019
10793 ++#
10794 ++# Add 32-bit code path.
10795 ++#
10796 ++# October 2019
10797 ++#
10798 ++# Modulo-scheduling reduction allows to omit dependency chain at the
10799 ++# end of inner loop and improve performance. Also optimize MIPS32R2
10800 ++# code path for MIPS 1004K core. Per René von Dorst's suggestions.
10801 ++#
10802 ++# IALU/gcc
10803 ++# R1x000 ~9.8/? (big-endian)
10804 ++# Octeon II 3.65/+140% (little-endian)
10805 ++# MT7621/1004K 4.75/? (little-endian)
10806 ++#
10807 ++######################################################################
10808 ++# There is a number of MIPS ABI in use, O32 and N32/64 are most
10809 ++# widely used. Then there is a new contender: NUBI. It appears that if
10810 ++# one picks the latter, it's possible to arrange code in ABI neutral
10811 ++# manner. Therefore let's stick to NUBI register layout:
10812 ++#
10813 ++($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
10814 ++($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
10815 ++($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
10816 ++($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
10817 ++#
10818 ++# The return value is placed in $a0. Following coding rules facilitate
10819 ++# interoperability:
10820 ++#
10821 ++# - never ever touch $tp, "thread pointer", former $gp [o32 can be
10822 ++# excluded from the rule, because it's specified volatile];
10823 ++# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
10824 ++# old code];
10825 ++# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
10826 ++#
10827 ++# For reference here is register layout for N32/64 MIPS ABIs:
10828 ++#
10829 ++# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
10830 ++# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
10831 ++# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
10832 ++# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
10833 ++# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
10834 ++#
10835 ++# <appro@×××××××.org>
10836 ++#
10837 ++######################################################################
10838 ++
10839 ++$flavour = shift || "64"; # supported flavours are o32,n32,64,nubi32,nubi64
10840 ++
10841 ++$v0 = ($flavour =~ /nubi/i) ? $a0 : $t0;
10842 ++
10843 ++if ($flavour =~ /64|n32/i) {{{
10844 ++######################################################################
10845 ++# 64-bit code path
10846 ++#
10847 ++
10848 ++my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
10849 ++my ($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1);
10850 ++
10851 ++$code.=<<___;
10852 ++#if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \\
10853 ++ defined(_MIPS_ARCH_MIPS64R6)) \\
10854 ++ && !defined(_MIPS_ARCH_MIPS64R2)
10855 ++# define _MIPS_ARCH_MIPS64R2
10856 ++#endif
10857 ++
10858 ++#if defined(_MIPS_ARCH_MIPS64R6)
10859 ++# define dmultu(rs,rt)
10860 ++# define mflo(rd,rs,rt) dmulu rd,rs,rt
10861 ++# define mfhi(rd,rs,rt) dmuhu rd,rs,rt
10862 ++#else
10863 ++# define dmultu(rs,rt) dmultu rs,rt
10864 ++# define mflo(rd,rs,rt) mflo rd
10865 ++# define mfhi(rd,rs,rt) mfhi rd
10866 ++#endif
10867 ++
10868 ++#ifdef __KERNEL__
10869 ++# define poly1305_init poly1305_init_mips
10870 ++# define poly1305_blocks poly1305_blocks_mips
10871 ++# define poly1305_emit poly1305_emit_mips
10872 ++#endif
10873 ++
10874 ++#if defined(__MIPSEB__) && !defined(MIPSEB)
10875 ++# define MIPSEB
10876 ++#endif
10877 ++
10878 ++#ifdef MIPSEB
10879 ++# define MSB 0
10880 ++# define LSB 7
10881 ++#else
10882 ++# define MSB 7
10883 ++# define LSB 0
10884 ++#endif
10885 ++
10886 ++.text
10887 ++.set noat
10888 ++.set noreorder
10889 ++
10890 ++.align 5
10891 ++.globl poly1305_init
10892 ++.ent poly1305_init
10893 ++poly1305_init:
10894 ++ .frame $sp,0,$ra
10895 ++ .set reorder
10896 ++
10897 ++ sd $zero,0($ctx)
10898 ++ sd $zero,8($ctx)
10899 ++ sd $zero,16($ctx)
10900 ++
10901 ++ beqz $inp,.Lno_key
10902 ++
10903 ++#if defined(_MIPS_ARCH_MIPS64R6)
10904 ++ andi $tmp0,$inp,7 # $inp % 8
10905 ++ dsubu $inp,$inp,$tmp0 # align $inp
10906 ++ sll $tmp0,$tmp0,3 # byte to bit offset
10907 ++ ld $in0,0($inp)
10908 ++ ld $in1,8($inp)
10909 ++ beqz $tmp0,.Laligned_key
10910 ++ ld $tmp2,16($inp)
10911 ++
10912 ++ subu $tmp1,$zero,$tmp0
10913 ++# ifdef MIPSEB
10914 ++ dsllv $in0,$in0,$tmp0
10915 ++ dsrlv $tmp3,$in1,$tmp1
10916 ++ dsllv $in1,$in1,$tmp0
10917 ++ dsrlv $tmp2,$tmp2,$tmp1
10918 ++# else
10919 ++ dsrlv $in0,$in0,$tmp0
10920 ++ dsllv $tmp3,$in1,$tmp1
10921 ++ dsrlv $in1,$in1,$tmp0
10922 ++ dsllv $tmp2,$tmp2,$tmp1
10923 ++# endif
10924 ++ or $in0,$in0,$tmp3
10925 ++ or $in1,$in1,$tmp2
10926 ++.Laligned_key:
10927 ++#else
10928 ++ ldl $in0,0+MSB($inp)
10929 ++ ldl $in1,8+MSB($inp)
10930 ++ ldr $in0,0+LSB($inp)
10931 ++ ldr $in1,8+LSB($inp)
10932 ++#endif
10933 ++#ifdef MIPSEB
10934 ++# if defined(_MIPS_ARCH_MIPS64R2)
10935 ++ dsbh $in0,$in0 # byte swap
10936 ++ dsbh $in1,$in1
10937 ++ dshd $in0,$in0
10938 ++ dshd $in1,$in1
10939 ++# else
10940 ++ ori $tmp0,$zero,0xFF
10941 ++ dsll $tmp2,$tmp0,32
10942 ++ or $tmp0,$tmp2 # 0x000000FF000000FF
10943 ++
10944 ++ and $tmp1,$in0,$tmp0 # byte swap
10945 ++ and $tmp3,$in1,$tmp0
10946 ++ dsrl $tmp2,$in0,24
10947 ++ dsrl $tmp4,$in1,24
10948 ++ dsll $tmp1,24
10949 ++ dsll $tmp3,24
10950 ++ and $tmp2,$tmp0
10951 ++ and $tmp4,$tmp0
10952 ++ dsll $tmp0,8 # 0x0000FF000000FF00
10953 ++ or $tmp1,$tmp2
10954 ++ or $tmp3,$tmp4
10955 ++ and $tmp2,$in0,$tmp0
10956 ++ and $tmp4,$in1,$tmp0
10957 ++ dsrl $in0,8
10958 ++ dsrl $in1,8
10959 ++ dsll $tmp2,8
10960 ++ dsll $tmp4,8
10961 ++ and $in0,$tmp0
10962 ++ and $in1,$tmp0
10963 ++ or $tmp1,$tmp2
10964 ++ or $tmp3,$tmp4
10965 ++ or $in0,$tmp1
10966 ++ or $in1,$tmp3
10967 ++ dsrl $tmp1,$in0,32
10968 ++ dsrl $tmp3,$in1,32
10969 ++ dsll $in0,32
10970 ++ dsll $in1,32
10971 ++ or $in0,$tmp1
10972 ++ or $in1,$tmp3
10973 ++# endif
10974 ++#endif
10975 ++ li $tmp0,1
10976 ++ dsll $tmp0,32 # 0x0000000100000000
10977 ++ daddiu $tmp0,-63 # 0x00000000ffffffc1
10978 ++ dsll $tmp0,28 # 0x0ffffffc10000000
10979 ++ daddiu $tmp0,-1 # 0x0ffffffc0fffffff
10980 ++
10981 ++ and $in0,$tmp0
10982 ++ daddiu $tmp0,-3 # 0x0ffffffc0ffffffc
10983 ++ and $in1,$tmp0
10984 ++
10985 ++ sd $in0,24($ctx)
10986 ++ dsrl $tmp0,$in1,2
10987 ++ sd $in1,32($ctx)
10988 ++ daddu $tmp0,$in1 # s1 = r1 + (r1 >> 2)
10989 ++ sd $tmp0,40($ctx)
10990 ++
10991 ++.Lno_key:
10992 ++ li $v0,0 # return 0
10993 ++ jr $ra
10994 ++.end poly1305_init
10995 ++___
10996 ++{
10997 ++my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000";
10998 ++
10999 ++my ($h0,$h1,$h2,$r0,$r1,$rs1,$d0,$d1,$d2) =
11000 ++ ($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2);
11001 ++my ($shr,$shl) = ($s6,$s7); # used on R6
11002 ++
11003 ++$code.=<<___;
11004 ++.align 5
11005 ++.globl poly1305_blocks
11006 ++.ent poly1305_blocks
11007 ++poly1305_blocks:
11008 ++ .set noreorder
11009 ++ dsrl $len,4 # number of complete blocks
11010 ++ bnez $len,poly1305_blocks_internal
11011 ++ nop
11012 ++ jr $ra
11013 ++ nop
11014 ++.end poly1305_blocks
11015 ++
11016 ++.align 5
11017 ++.ent poly1305_blocks_internal
11018 ++poly1305_blocks_internal:
11019 ++ .set noreorder
11020 ++#if defined(_MIPS_ARCH_MIPS64R6)
11021 ++ .frame $sp,8*8,$ra
11022 ++ .mask $SAVED_REGS_MASK|0x000c0000,-8
11023 ++ dsubu $sp,8*8
11024 ++ sd $s7,56($sp)
11025 ++ sd $s6,48($sp)
11026 ++#else
11027 ++ .frame $sp,6*8,$ra
11028 ++ .mask $SAVED_REGS_MASK,-8
11029 ++ dsubu $sp,6*8
11030 ++#endif
11031 ++ sd $s5,40($sp)
11032 ++ sd $s4,32($sp)
11033 ++___
11034 ++$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
11035 ++ sd $s3,24($sp)
11036 ++ sd $s2,16($sp)
11037 ++ sd $s1,8($sp)
11038 ++ sd $s0,0($sp)
11039 ++___
11040 ++$code.=<<___;
11041 ++ .set reorder
11042 ++
11043 ++#if defined(_MIPS_ARCH_MIPS64R6)
11044 ++ andi $shr,$inp,7
11045 ++ dsubu $inp,$inp,$shr # align $inp
11046 ++ sll $shr,$shr,3 # byte to bit offset
11047 ++ subu $shl,$zero,$shr
11048 ++#endif
11049 ++
11050 ++ ld $h0,0($ctx) # load hash value
11051 ++ ld $h1,8($ctx)
11052 ++ ld $h2,16($ctx)
11053 ++
11054 ++ ld $r0,24($ctx) # load key
11055 ++ ld $r1,32($ctx)
11056 ++ ld $rs1,40($ctx)
11057 ++
11058 ++ dsll $len,4
11059 ++ daddu $len,$inp # end of buffer
11060 ++ b .Loop
11061 ++
11062 ++.align 4
11063 ++.Loop:
11064 ++#if defined(_MIPS_ARCH_MIPS64R6)
11065 ++ ld $in0,0($inp) # load input
11066 ++ ld $in1,8($inp)
11067 ++ beqz $shr,.Laligned_inp
11068 ++
11069 ++ ld $tmp2,16($inp)
11070 ++# ifdef MIPSEB
11071 ++ dsllv $in0,$in0,$shr
11072 ++ dsrlv $tmp3,$in1,$shl
11073 ++ dsllv $in1,$in1,$shr
11074 ++ dsrlv $tmp2,$tmp2,$shl
11075 ++# else
11076 ++ dsrlv $in0,$in0,$shr
11077 ++ dsllv $tmp3,$in1,$shl
11078 ++ dsrlv $in1,$in1,$shr
11079 ++ dsllv $tmp2,$tmp2,$shl
11080 ++# endif
11081 ++ or $in0,$in0,$tmp3
11082 ++ or $in1,$in1,$tmp2
11083 ++.Laligned_inp:
11084 ++#else
11085 ++ ldl $in0,0+MSB($inp) # load input
11086 ++ ldl $in1,8+MSB($inp)
11087 ++ ldr $in0,0+LSB($inp)
11088 ++ ldr $in1,8+LSB($inp)
11089 ++#endif
11090 ++ daddiu $inp,16
11091 ++#ifdef MIPSEB
11092 ++# if defined(_MIPS_ARCH_MIPS64R2)
11093 ++ dsbh $in0,$in0 # byte swap
11094 ++ dsbh $in1,$in1
11095 ++ dshd $in0,$in0
11096 ++ dshd $in1,$in1
11097 ++# else
11098 ++ ori $tmp0,$zero,0xFF
11099 ++ dsll $tmp2,$tmp0,32
11100 ++ or $tmp0,$tmp2 # 0x000000FF000000FF
11101 ++
11102 ++ and $tmp1,$in0,$tmp0 # byte swap
11103 ++ and $tmp3,$in1,$tmp0
11104 ++ dsrl $tmp2,$in0,24
11105 ++ dsrl $tmp4,$in1,24
11106 ++ dsll $tmp1,24
11107 ++ dsll $tmp3,24
11108 ++ and $tmp2,$tmp0
11109 ++ and $tmp4,$tmp0
11110 ++ dsll $tmp0,8 # 0x0000FF000000FF00
11111 ++ or $tmp1,$tmp2
11112 ++ or $tmp3,$tmp4
11113 ++ and $tmp2,$in0,$tmp0
11114 ++ and $tmp4,$in1,$tmp0
11115 ++ dsrl $in0,8
11116 ++ dsrl $in1,8
11117 ++ dsll $tmp2,8
11118 ++ dsll $tmp4,8
11119 ++ and $in0,$tmp0
11120 ++ and $in1,$tmp0
11121 ++ or $tmp1,$tmp2
11122 ++ or $tmp3,$tmp4
11123 ++ or $in0,$tmp1
11124 ++ or $in1,$tmp3
11125 ++ dsrl $tmp1,$in0,32
11126 ++ dsrl $tmp3,$in1,32
11127 ++ dsll $in0,32
11128 ++ dsll $in1,32
11129 ++ or $in0,$tmp1
11130 ++ or $in1,$tmp3
11131 ++# endif
11132 ++#endif
11133 ++ dsrl $tmp1,$h2,2 # modulo-scheduled reduction
11134 ++ andi $h2,$h2,3
11135 ++ dsll $tmp0,$tmp1,2
11136 ++
11137 ++ daddu $d0,$h0,$in0 # accumulate input
11138 ++ daddu $tmp1,$tmp0
11139 ++ sltu $tmp0,$d0,$h0
11140 ++ daddu $d0,$d0,$tmp1 # ... and residue
11141 ++ sltu $tmp1,$d0,$tmp1
11142 ++ daddu $d1,$h1,$in1
11143 ++ daddu $tmp0,$tmp1
11144 ++ sltu $tmp1,$d1,$h1
11145 ++ daddu $d1,$tmp0
11146 ++
11147 ++ dmultu ($r0,$d0) # h0*r0
11148 ++ daddu $d2,$h2,$padbit
11149 ++ sltu $tmp0,$d1,$tmp0
11150 ++ mflo ($h0,$r0,$d0)
11151 ++ mfhi ($h1,$r0,$d0)
11152 ++
11153 ++ dmultu ($rs1,$d1) # h1*5*r1
11154 ++ daddu $d2,$tmp1
11155 ++ daddu $d2,$tmp0
11156 ++ mflo ($tmp0,$rs1,$d1)
11157 ++ mfhi ($tmp1,$rs1,$d1)
11158 ++
11159 ++ dmultu ($r1,$d0) # h0*r1
11160 ++ mflo ($tmp2,$r1,$d0)
11161 ++ mfhi ($h2,$r1,$d0)
11162 ++ daddu $h0,$tmp0
11163 ++ daddu $h1,$tmp1
11164 ++ sltu $tmp0,$h0,$tmp0
11165 ++
11166 ++ dmultu ($r0,$d1) # h1*r0
11167 ++ daddu $h1,$tmp0
11168 ++ daddu $h1,$tmp2
11169 ++ mflo ($tmp0,$r0,$d1)
11170 ++ mfhi ($tmp1,$r0,$d1)
11171 ++
11172 ++ dmultu ($rs1,$d2) # h2*5*r1
11173 ++ sltu $tmp2,$h1,$tmp2
11174 ++ daddu $h2,$tmp2
11175 ++ mflo ($tmp2,$rs1,$d2)
11176 ++
11177 ++ dmultu ($r0,$d2) # h2*r0
11178 ++ daddu $h1,$tmp0
11179 ++ daddu $h2,$tmp1
11180 ++ mflo ($tmp3,$r0,$d2)
11181 ++ sltu $tmp0,$h1,$tmp0
11182 ++ daddu $h2,$tmp0
11183 ++
11184 ++ daddu $h1,$tmp2
11185 ++ sltu $tmp2,$h1,$tmp2
11186 ++ daddu $h2,$tmp2
11187 ++ daddu $h2,$tmp3
11188 ++
11189 ++ bne $inp,$len,.Loop
11190 ++
11191 ++ sd $h0,0($ctx) # store hash value
11192 ++ sd $h1,8($ctx)
11193 ++ sd $h2,16($ctx)
11194 ++
11195 ++ .set noreorder
11196 ++#if defined(_MIPS_ARCH_MIPS64R6)
11197 ++ ld $s7,56($sp)
11198 ++ ld $s6,48($sp)
11199 ++#endif
11200 ++ ld $s5,40($sp) # epilogue
11201 ++ ld $s4,32($sp)
11202 ++___
11203 ++$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi epilogue
11204 ++ ld $s3,24($sp)
11205 ++ ld $s2,16($sp)
11206 ++ ld $s1,8($sp)
11207 ++ ld $s0,0($sp)
11208 ++___
11209 ++$code.=<<___;
11210 ++ jr $ra
11211 ++#if defined(_MIPS_ARCH_MIPS64R6)
11212 ++ daddu $sp,8*8
11213 ++#else
11214 ++ daddu $sp,6*8
11215 ++#endif
11216 ++.end poly1305_blocks_internal
11217 ++___
11218 ++}
11219 ++{
11220 ++my ($ctx,$mac,$nonce) = ($a0,$a1,$a2);
11221 ++
11222 ++$code.=<<___;
11223 ++.align 5
11224 ++.globl poly1305_emit
11225 ++.ent poly1305_emit
11226 ++poly1305_emit:
11227 ++ .frame $sp,0,$ra
11228 ++ .set reorder
11229 ++
11230 ++ ld $tmp2,16($ctx)
11231 ++ ld $tmp0,0($ctx)
11232 ++ ld $tmp1,8($ctx)
11233 ++
11234 ++ li $in0,-4 # final reduction
11235 ++ dsrl $in1,$tmp2,2
11236 ++ and $in0,$tmp2
11237 ++ andi $tmp2,$tmp2,3
11238 ++ daddu $in0,$in1
11239 ++
11240 ++ daddu $tmp0,$tmp0,$in0
11241 ++ sltu $in1,$tmp0,$in0
11242 ++ daddiu $in0,$tmp0,5 # compare to modulus
11243 ++ daddu $tmp1,$tmp1,$in1
11244 ++ sltiu $tmp3,$in0,5
11245 ++ sltu $tmp4,$tmp1,$in1
11246 ++ daddu $in1,$tmp1,$tmp3
11247 ++ daddu $tmp2,$tmp2,$tmp4
11248 ++ sltu $tmp3,$in1,$tmp3
11249 ++ daddu $tmp2,$tmp2,$tmp3
11250 ++
11251 ++ dsrl $tmp2,2 # see if it carried/borrowed
11252 ++ dsubu $tmp2,$zero,$tmp2
11253 ++
11254 ++ xor $in0,$tmp0
11255 ++ xor $in1,$tmp1
11256 ++ and $in0,$tmp2
11257 ++ and $in1,$tmp2
11258 ++ xor $in0,$tmp0
11259 ++ xor $in1,$tmp1
11260 ++
11261 ++ lwu $tmp0,0($nonce) # load nonce
11262 ++ lwu $tmp1,4($nonce)
11263 ++ lwu $tmp2,8($nonce)
11264 ++ lwu $tmp3,12($nonce)
11265 ++ dsll $tmp1,32
11266 ++ dsll $tmp3,32
11267 ++ or $tmp0,$tmp1
11268 ++ or $tmp2,$tmp3
11269 ++
11270 ++ daddu $in0,$tmp0 # accumulate nonce
11271 ++ daddu $in1,$tmp2
11272 ++ sltu $tmp0,$in0,$tmp0
11273 ++ daddu $in1,$tmp0
11274 ++
11275 ++ dsrl $tmp0,$in0,8 # write mac value
11276 ++ dsrl $tmp1,$in0,16
11277 ++ dsrl $tmp2,$in0,24
11278 ++ sb $in0,0($mac)
11279 ++ dsrl $tmp3,$in0,32
11280 ++ sb $tmp0,1($mac)
11281 ++ dsrl $tmp0,$in0,40
11282 ++ sb $tmp1,2($mac)
11283 ++ dsrl $tmp1,$in0,48
11284 ++ sb $tmp2,3($mac)
11285 ++ dsrl $tmp2,$in0,56
11286 ++ sb $tmp3,4($mac)
11287 ++ dsrl $tmp3,$in1,8
11288 ++ sb $tmp0,5($mac)
11289 ++ dsrl $tmp0,$in1,16
11290 ++ sb $tmp1,6($mac)
11291 ++ dsrl $tmp1,$in1,24
11292 ++ sb $tmp2,7($mac)
11293 ++
11294 ++ sb $in1,8($mac)
11295 ++ dsrl $tmp2,$in1,32
11296 ++ sb $tmp3,9($mac)
11297 ++ dsrl $tmp3,$in1,40
11298 ++ sb $tmp0,10($mac)
11299 ++ dsrl $tmp0,$in1,48
11300 ++ sb $tmp1,11($mac)
11301 ++ dsrl $tmp1,$in1,56
11302 ++ sb $tmp2,12($mac)
11303 ++ sb $tmp3,13($mac)
11304 ++ sb $tmp0,14($mac)
11305 ++ sb $tmp1,15($mac)
11306 ++
11307 ++ jr $ra
11308 ++.end poly1305_emit
11309 ++.rdata
11310 ++.asciiz "Poly1305 for MIPS64, CRYPTOGAMS by \@dot-asm"
11311 ++.align 2
11312 ++___
11313 ++}
11314 ++}}} else {{{
11315 ++######################################################################
11316 ++# 32-bit code path
11317 ++#
11318 ++
11319 ++my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
11320 ++my ($in0,$in1,$in2,$in3,$tmp0,$tmp1,$tmp2,$tmp3) =
11321 ++ ($a4,$a5,$a6,$a7,$at,$t0,$t1,$t2);
11322 ++
11323 ++$code.=<<___;
11324 ++#if (defined(_MIPS_ARCH_MIPS32R3) || defined(_MIPS_ARCH_MIPS32R5) || \\
11325 ++ defined(_MIPS_ARCH_MIPS32R6)) \\
11326 ++ && !defined(_MIPS_ARCH_MIPS32R2)
11327 ++# define _MIPS_ARCH_MIPS32R2
11328 ++#endif
11329 ++
11330 ++#if defined(_MIPS_ARCH_MIPS32R6)
11331 ++# define multu(rs,rt)
11332 ++# define mflo(rd,rs,rt) mulu rd,rs,rt
11333 ++# define mfhi(rd,rs,rt) muhu rd,rs,rt
11334 ++#else
11335 ++# define multu(rs,rt) multu rs,rt
11336 ++# define mflo(rd,rs,rt) mflo rd
11337 ++# define mfhi(rd,rs,rt) mfhi rd
11338 ++#endif
11339 ++
11340 ++#ifdef __KERNEL__
11341 ++# define poly1305_init poly1305_init_mips
11342 ++# define poly1305_blocks poly1305_blocks_mips
11343 ++# define poly1305_emit poly1305_emit_mips
11344 ++#endif
11345 ++
11346 ++#if defined(__MIPSEB__) && !defined(MIPSEB)
11347 ++# define MIPSEB
11348 ++#endif
11349 ++
11350 ++#ifdef MIPSEB
11351 ++# define MSB 0
11352 ++# define LSB 3
11353 ++#else
11354 ++# define MSB 3
11355 ++# define LSB 0
11356 ++#endif
11357 ++
11358 ++.text
11359 ++.set noat
11360 ++.set noreorder
11361 ++
11362 ++.align 5
11363 ++.globl poly1305_init
11364 ++.ent poly1305_init
11365 ++poly1305_init:
11366 ++ .frame $sp,0,$ra
11367 ++ .set reorder
11368 ++
11369 ++ sw $zero,0($ctx)
11370 ++ sw $zero,4($ctx)
11371 ++ sw $zero,8($ctx)
11372 ++ sw $zero,12($ctx)
11373 ++ sw $zero,16($ctx)
11374 ++
11375 ++ beqz $inp,.Lno_key
11376 ++
11377 ++#if defined(_MIPS_ARCH_MIPS32R6)
11378 ++ andi $tmp0,$inp,3 # $inp % 4
11379 ++ subu $inp,$inp,$tmp0 # align $inp
11380 ++ sll $tmp0,$tmp0,3 # byte to bit offset
11381 ++ lw $in0,0($inp)
11382 ++ lw $in1,4($inp)
11383 ++ lw $in2,8($inp)
11384 ++ lw $in3,12($inp)
11385 ++ beqz $tmp0,.Laligned_key
11386 ++
11387 ++ lw $tmp2,16($inp)
11388 ++ subu $tmp1,$zero,$tmp0
11389 ++# ifdef MIPSEB
11390 ++ sllv $in0,$in0,$tmp0
11391 ++ srlv $tmp3,$in1,$tmp1
11392 ++ sllv $in1,$in1,$tmp0
11393 ++ or $in0,$in0,$tmp3
11394 ++ srlv $tmp3,$in2,$tmp1
11395 ++ sllv $in2,$in2,$tmp0
11396 ++ or $in1,$in1,$tmp3
11397 ++ srlv $tmp3,$in3,$tmp1
11398 ++ sllv $in3,$in3,$tmp0
11399 ++ or $in2,$in2,$tmp3
11400 ++ srlv $tmp2,$tmp2,$tmp1
11401 ++ or $in3,$in3,$tmp2
11402 ++# else
11403 ++ srlv $in0,$in0,$tmp0
11404 ++ sllv $tmp3,$in1,$tmp1
11405 ++ srlv $in1,$in1,$tmp0
11406 ++ or $in0,$in0,$tmp3
11407 ++ sllv $tmp3,$in2,$tmp1
11408 ++ srlv $in2,$in2,$tmp0
11409 ++ or $in1,$in1,$tmp3
11410 ++ sllv $tmp3,$in3,$tmp1
11411 ++ srlv $in3,$in3,$tmp0
11412 ++ or $in2,$in2,$tmp3
11413 ++ sllv $tmp2,$tmp2,$tmp1
11414 ++ or $in3,$in3,$tmp2
11415 ++# endif
11416 ++.Laligned_key:
11417 ++#else
11418 ++ lwl $in0,0+MSB($inp)
11419 ++ lwl $in1,4+MSB($inp)
11420 ++ lwl $in2,8+MSB($inp)
11421 ++ lwl $in3,12+MSB($inp)
11422 ++ lwr $in0,0+LSB($inp)
11423 ++ lwr $in1,4+LSB($inp)
11424 ++ lwr $in2,8+LSB($inp)
11425 ++ lwr $in3,12+LSB($inp)
11426 ++#endif
11427 ++#ifdef MIPSEB
11428 ++# if defined(_MIPS_ARCH_MIPS32R2)
11429 ++ wsbh $in0,$in0 # byte swap
11430 ++ wsbh $in1,$in1
11431 ++ wsbh $in2,$in2
11432 ++ wsbh $in3,$in3
11433 ++ rotr $in0,$in0,16
11434 ++ rotr $in1,$in1,16
11435 ++ rotr $in2,$in2,16
11436 ++ rotr $in3,$in3,16
11437 ++# else
11438 ++ srl $tmp0,$in0,24 # byte swap
11439 ++ srl $tmp1,$in0,8
11440 ++ andi $tmp2,$in0,0xFF00
11441 ++ sll $in0,$in0,24
11442 ++ andi $tmp1,0xFF00
11443 ++ sll $tmp2,$tmp2,8
11444 ++ or $in0,$tmp0
11445 ++ srl $tmp0,$in1,24
11446 ++ or $tmp1,$tmp2
11447 ++ srl $tmp2,$in1,8
11448 ++ or $in0,$tmp1
11449 ++ andi $tmp1,$in1,0xFF00
11450 ++ sll $in1,$in1,24
11451 ++ andi $tmp2,0xFF00
11452 ++ sll $tmp1,$tmp1,8
11453 ++ or $in1,$tmp0
11454 ++ srl $tmp0,$in2,24
11455 ++ or $tmp2,$tmp1
11456 ++ srl $tmp1,$in2,8
11457 ++ or $in1,$tmp2
11458 ++ andi $tmp2,$in2,0xFF00
11459 ++ sll $in2,$in2,24
11460 ++ andi $tmp1,0xFF00
11461 ++ sll $tmp2,$tmp2,8
11462 ++ or $in2,$tmp0
11463 ++ srl $tmp0,$in3,24
11464 ++ or $tmp1,$tmp2
11465 ++ srl $tmp2,$in3,8
11466 ++ or $in2,$tmp1
11467 ++ andi $tmp1,$in3,0xFF00
11468 ++ sll $in3,$in3,24
11469 ++ andi $tmp2,0xFF00
11470 ++ sll $tmp1,$tmp1,8
11471 ++ or $in3,$tmp0
11472 ++ or $tmp2,$tmp1
11473 ++ or $in3,$tmp2
11474 ++# endif
11475 ++#endif
11476 ++ lui $tmp0,0x0fff
11477 ++ ori $tmp0,0xffff # 0x0fffffff
11478 ++ and $in0,$in0,$tmp0
11479 ++ subu $tmp0,3 # 0x0ffffffc
11480 ++ and $in1,$in1,$tmp0
11481 ++ and $in2,$in2,$tmp0
11482 ++ and $in3,$in3,$tmp0
11483 ++
11484 ++ sw $in0,20($ctx)
11485 ++ sw $in1,24($ctx)
11486 ++ sw $in2,28($ctx)
11487 ++ sw $in3,32($ctx)
11488 ++
11489 ++ srl $tmp1,$in1,2
11490 ++ srl $tmp2,$in2,2
11491 ++ srl $tmp3,$in3,2
11492 ++ addu $in1,$in1,$tmp1 # s1 = r1 + (r1 >> 2)
11493 ++ addu $in2,$in2,$tmp2
11494 ++ addu $in3,$in3,$tmp3
11495 ++ sw $in1,36($ctx)
11496 ++ sw $in2,40($ctx)
11497 ++ sw $in3,44($ctx)
11498 ++.Lno_key:
11499 ++ li $v0,0
11500 ++ jr $ra
11501 ++.end poly1305_init
11502 ++___
11503 ++{
11504 ++my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x00fff000" : "0x00ff0000";
11505 ++
11506 ++my ($h0,$h1,$h2,$h3,$h4, $r0,$r1,$r2,$r3, $rs1,$rs2,$rs3) =
11507 ++ ($s0,$s1,$s2,$s3,$s4, $s5,$s6,$s7,$s8, $s9,$s10,$s11);
11508 ++my ($d0,$d1,$d2,$d3) =
11509 ++ ($a4,$a5,$a6,$a7);
11510 ++my $shr = $t2; # used on R6
11511 ++my $one = $t2; # used on R2
11512 ++
11513 ++$code.=<<___;
11514 ++.globl poly1305_blocks
11515 ++.align 5
11516 ++.ent poly1305_blocks
11517 ++poly1305_blocks:
11518 ++ .frame $sp,16*4,$ra
11519 ++ .mask $SAVED_REGS_MASK,-4
11520 ++ .set noreorder
11521 ++ subu $sp, $sp,4*12
11522 ++ sw $s11,4*11($sp)
11523 ++ sw $s10,4*10($sp)
11524 ++ sw $s9, 4*9($sp)
11525 ++ sw $s8, 4*8($sp)
11526 ++ sw $s7, 4*7($sp)
11527 ++ sw $s6, 4*6($sp)
11528 ++ sw $s5, 4*5($sp)
11529 ++ sw $s4, 4*4($sp)
11530 ++___
11531 ++$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
11532 ++ sw $s3, 4*3($sp)
11533 ++ sw $s2, 4*2($sp)
11534 ++ sw $s1, 4*1($sp)
11535 ++ sw $s0, 4*0($sp)
11536 ++___
11537 ++$code.=<<___;
11538 ++ .set reorder
11539 ++
11540 ++ srl $len,4 # number of complete blocks
11541 ++ li $one,1
11542 ++ beqz $len,.Labort
11543 ++
11544 ++#if defined(_MIPS_ARCH_MIPS32R6)
11545 ++ andi $shr,$inp,3
11546 ++ subu $inp,$inp,$shr # align $inp
11547 ++ sll $shr,$shr,3 # byte to bit offset
11548 ++#endif
11549 ++
11550 ++ lw $h0,0($ctx) # load hash value
11551 ++ lw $h1,4($ctx)
11552 ++ lw $h2,8($ctx)
11553 ++ lw $h3,12($ctx)
11554 ++ lw $h4,16($ctx)
11555 ++
11556 ++ lw $r0,20($ctx) # load key
11557 ++ lw $r1,24($ctx)
11558 ++ lw $r2,28($ctx)
11559 ++ lw $r3,32($ctx)
11560 ++ lw $rs1,36($ctx)
11561 ++ lw $rs2,40($ctx)
11562 ++ lw $rs3,44($ctx)
11563 ++
11564 ++ sll $len,4
11565 ++ addu $len,$len,$inp # end of buffer
11566 ++ b .Loop
11567 ++
11568 ++.align 4
11569 ++.Loop:
11570 ++#if defined(_MIPS_ARCH_MIPS32R6)
11571 ++ lw $d0,0($inp) # load input
11572 ++ lw $d1,4($inp)
11573 ++ lw $d2,8($inp)
11574 ++ lw $d3,12($inp)
11575 ++ beqz $shr,.Laligned_inp
11576 ++
11577 ++ lw $t0,16($inp)
11578 ++ subu $t1,$zero,$shr
11579 ++# ifdef MIPSEB
11580 ++ sllv $d0,$d0,$shr
11581 ++ srlv $at,$d1,$t1
11582 ++ sllv $d1,$d1,$shr
11583 ++ or $d0,$d0,$at
11584 ++ srlv $at,$d2,$t1
11585 ++ sllv $d2,$d2,$shr
11586 ++ or $d1,$d1,$at
11587 ++ srlv $at,$d3,$t1
11588 ++ sllv $d3,$d3,$shr
11589 ++ or $d2,$d2,$at
11590 ++ srlv $t0,$t0,$t1
11591 ++ or $d3,$d3,$t0
11592 ++# else
11593 ++ srlv $d0,$d0,$shr
11594 ++ sllv $at,$d1,$t1
11595 ++ srlv $d1,$d1,$shr
11596 ++ or $d0,$d0,$at
11597 ++ sllv $at,$d2,$t1
11598 ++ srlv $d2,$d2,$shr
11599 ++ or $d1,$d1,$at
11600 ++ sllv $at,$d3,$t1
11601 ++ srlv $d3,$d3,$shr
11602 ++ or $d2,$d2,$at
11603 ++ sllv $t0,$t0,$t1
11604 ++ or $d3,$d3,$t0
11605 ++# endif
11606 ++.Laligned_inp:
11607 ++#else
11608 ++ lwl $d0,0+MSB($inp) # load input
11609 ++ lwl $d1,4+MSB($inp)
11610 ++ lwl $d2,8+MSB($inp)
11611 ++ lwl $d3,12+MSB($inp)
11612 ++ lwr $d0,0+LSB($inp)
11613 ++ lwr $d1,4+LSB($inp)
11614 ++ lwr $d2,8+LSB($inp)
11615 ++ lwr $d3,12+LSB($inp)
11616 ++#endif
11617 ++#ifdef MIPSEB
11618 ++# if defined(_MIPS_ARCH_MIPS32R2)
11619 ++ wsbh $d0,$d0 # byte swap
11620 ++ wsbh $d1,$d1
11621 ++ wsbh $d2,$d2
11622 ++ wsbh $d3,$d3
11623 ++ rotr $d0,$d0,16
11624 ++ rotr $d1,$d1,16
11625 ++ rotr $d2,$d2,16
11626 ++ rotr $d3,$d3,16
11627 ++# else
11628 ++ srl $at,$d0,24 # byte swap
11629 ++ srl $t0,$d0,8
11630 ++ andi $t1,$d0,0xFF00
11631 ++ sll $d0,$d0,24
11632 ++ andi $t0,0xFF00
11633 ++ sll $t1,$t1,8
11634 ++ or $d0,$at
11635 ++ srl $at,$d1,24
11636 ++ or $t0,$t1
11637 ++ srl $t1,$d1,8
11638 ++ or $d0,$t0
11639 ++ andi $t0,$d1,0xFF00
11640 ++ sll $d1,$d1,24
11641 ++ andi $t1,0xFF00
11642 ++ sll $t0,$t0,8
11643 ++ or $d1,$at
11644 ++ srl $at,$d2,24
11645 ++ or $t1,$t0
11646 ++ srl $t0,$d2,8
11647 ++ or $d1,$t1
11648 ++ andi $t1,$d2,0xFF00
11649 ++ sll $d2,$d2,24
11650 ++ andi $t0,0xFF00
11651 ++ sll $t1,$t1,8
11652 ++ or $d2,$at
11653 ++ srl $at,$d3,24
11654 ++ or $t0,$t1
11655 ++ srl $t1,$d3,8
11656 ++ or $d2,$t0
11657 ++ andi $t0,$d3,0xFF00
11658 ++ sll $d3,$d3,24
11659 ++ andi $t1,0xFF00
11660 ++ sll $t0,$t0,8
11661 ++ or $d3,$at
11662 ++ or $t1,$t0
11663 ++ or $d3,$t1
11664 ++# endif
11665 ++#endif
11666 ++ srl $t0,$h4,2 # modulo-scheduled reduction
11667 ++ andi $h4,$h4,3
11668 ++ sll $at,$t0,2
11669 ++
11670 ++ addu $d0,$d0,$h0 # accumulate input
11671 ++ addu $t0,$t0,$at
11672 ++ sltu $h0,$d0,$h0
11673 ++ addu $d0,$d0,$t0 # ... and residue
11674 ++ sltu $at,$d0,$t0
11675 ++
11676 ++ addu $d1,$d1,$h1
11677 ++ addu $h0,$h0,$at # carry
11678 ++ sltu $h1,$d1,$h1
11679 ++ addu $d1,$d1,$h0
11680 ++ sltu $h0,$d1,$h0
11681 ++
11682 ++ addu $d2,$d2,$h2
11683 ++ addu $h1,$h1,$h0 # carry
11684 ++ sltu $h2,$d2,$h2
11685 ++ addu $d2,$d2,$h1
11686 ++ sltu $h1,$d2,$h1
11687 ++
11688 ++ addu $d3,$d3,$h3
11689 ++ addu $h2,$h2,$h1 # carry
11690 ++ sltu $h3,$d3,$h3
11691 ++ addu $d3,$d3,$h2
11692 ++
11693 ++#if defined(_MIPS_ARCH_MIPS32R2) && !defined(_MIPS_ARCH_MIPS32R6)
11694 ++ multu $r0,$d0 # d0*r0
11695 ++ sltu $h2,$d3,$h2
11696 ++ maddu $rs3,$d1 # d1*s3
11697 ++ addu $h3,$h3,$h2 # carry
11698 ++ maddu $rs2,$d2 # d2*s2
11699 ++ addu $h4,$h4,$padbit
11700 ++ maddu $rs1,$d3 # d3*s1
11701 ++ addu $h4,$h4,$h3
11702 ++ mfhi $at
11703 ++ mflo $h0
11704 ++
11705 ++ multu $r1,$d0 # d0*r1
11706 ++ maddu $r0,$d1 # d1*r0
11707 ++ maddu $rs3,$d2 # d2*s3
11708 ++ maddu $rs2,$d3 # d3*s2
11709 ++ maddu $rs1,$h4 # h4*s1
11710 ++ maddu $at,$one # hi*1
11711 ++ mfhi $at
11712 ++ mflo $h1
11713 ++
11714 ++ multu $r2,$d0 # d0*r2
11715 ++ maddu $r1,$d1 # d1*r1
11716 ++ maddu $r0,$d2 # d2*r0
11717 ++ maddu $rs3,$d3 # d3*s3
11718 ++ maddu $rs2,$h4 # h4*s2
11719 ++ maddu $at,$one # hi*1
11720 ++ mfhi $at
11721 ++ mflo $h2
11722 ++
11723 ++ mul $t0,$r0,$h4 # h4*r0
11724 ++
11725 ++ multu $r3,$d0 # d0*r3
11726 ++ maddu $r2,$d1 # d1*r2
11727 ++ maddu $r1,$d2 # d2*r1
11728 ++ maddu $r0,$d3 # d3*r0
11729 ++ maddu $rs3,$h4 # h4*s3
11730 ++ maddu $at,$one # hi*1
11731 ++ mfhi $at
11732 ++ mflo $h3
11733 ++
11734 ++ addiu $inp,$inp,16
11735 ++
11736 ++ addu $h4,$t0,$at
11737 ++#else
11738 ++ multu ($r0,$d0) # d0*r0
11739 ++ mflo ($h0,$r0,$d0)
11740 ++ mfhi ($h1,$r0,$d0)
11741 ++
11742 ++ sltu $h2,$d3,$h2
11743 ++ addu $h3,$h3,$h2 # carry
11744 ++
11745 ++ multu ($rs3,$d1) # d1*s3
11746 ++ mflo ($at,$rs3,$d1)
11747 ++ mfhi ($t0,$rs3,$d1)
11748 ++
11749 ++ addu $h4,$h4,$padbit
11750 ++ addiu $inp,$inp,16
11751 ++ addu $h4,$h4,$h3
11752 ++
11753 ++ multu ($rs2,$d2) # d2*s2
11754 ++ mflo ($a3,$rs2,$d2)
11755 ++ mfhi ($t1,$rs2,$d2)
11756 ++ addu $h0,$h0,$at
11757 ++ addu $h1,$h1,$t0
11758 ++ multu ($rs1,$d3) # d3*s1
11759 ++ sltu $at,$h0,$at
11760 ++ addu $h1,$h1,$at
11761 ++
11762 ++ mflo ($at,$rs1,$d3)
11763 ++ mfhi ($t0,$rs1,$d3)
11764 ++ addu $h0,$h0,$a3
11765 ++ addu $h1,$h1,$t1
11766 ++ multu ($r1,$d0) # d0*r1
11767 ++ sltu $a3,$h0,$a3
11768 ++ addu $h1,$h1,$a3
11769 ++
11770 ++
11771 ++ mflo ($a3,$r1,$d0)
11772 ++ mfhi ($h2,$r1,$d0)
11773 ++ addu $h0,$h0,$at
11774 ++ addu $h1,$h1,$t0
11775 ++ multu ($r0,$d1) # d1*r0
11776 ++ sltu $at,$h0,$at
11777 ++ addu $h1,$h1,$at
11778 ++
11779 ++ mflo ($at,$r0,$d1)
11780 ++ mfhi ($t0,$r0,$d1)
11781 ++ addu $h1,$h1,$a3
11782 ++ sltu $a3,$h1,$a3
11783 ++ multu ($rs3,$d2) # d2*s3
11784 ++ addu $h2,$h2,$a3
11785 ++
11786 ++ mflo ($a3,$rs3,$d2)
11787 ++ mfhi ($t1,$rs3,$d2)
11788 ++ addu $h1,$h1,$at
11789 ++ addu $h2,$h2,$t0
11790 ++ multu ($rs2,$d3) # d3*s2
11791 ++ sltu $at,$h1,$at
11792 ++ addu $h2,$h2,$at
11793 ++
11794 ++ mflo ($at,$rs2,$d3)
11795 ++ mfhi ($t0,$rs2,$d3)
11796 ++ addu $h1,$h1,$a3
11797 ++ addu $h2,$h2,$t1
11798 ++ multu ($rs1,$h4) # h4*s1
11799 ++ sltu $a3,$h1,$a3
11800 ++ addu $h2,$h2,$a3
11801 ++
11802 ++ mflo ($a3,$rs1,$h4)
11803 ++ addu $h1,$h1,$at
11804 ++ addu $h2,$h2,$t0
11805 ++ multu ($r2,$d0) # d0*r2
11806 ++ sltu $at,$h1,$at
11807 ++ addu $h2,$h2,$at
11808 ++
11809 ++
11810 ++ mflo ($at,$r2,$d0)
11811 ++ mfhi ($h3,$r2,$d0)
11812 ++ addu $h1,$h1,$a3
11813 ++ sltu $a3,$h1,$a3
11814 ++ multu ($r1,$d1) # d1*r1
11815 ++ addu $h2,$h2,$a3
11816 ++
11817 ++ mflo ($a3,$r1,$d1)
11818 ++ mfhi ($t1,$r1,$d1)
11819 ++ addu $h2,$h2,$at
11820 ++ sltu $at,$h2,$at
11821 ++ multu ($r0,$d2) # d2*r0
11822 ++ addu $h3,$h3,$at
11823 ++
11824 ++ mflo ($at,$r0,$d2)
11825 ++ mfhi ($t0,$r0,$d2)
11826 ++ addu $h2,$h2,$a3
11827 ++ addu $h3,$h3,$t1
11828 ++ multu ($rs3,$d3) # d3*s3
11829 ++ sltu $a3,$h2,$a3
11830 ++ addu $h3,$h3,$a3
11831 ++
11832 ++ mflo ($a3,$rs3,$d3)
11833 ++ mfhi ($t1,$rs3,$d3)
11834 ++ addu $h2,$h2,$at
11835 ++ addu $h3,$h3,$t0
11836 ++ multu ($rs2,$h4) # h4*s2
11837 ++ sltu $at,$h2,$at
11838 ++ addu $h3,$h3,$at
11839 ++
11840 ++ mflo ($at,$rs2,$h4)
11841 ++ addu $h2,$h2,$a3
11842 ++ addu $h3,$h3,$t1
11843 ++ multu ($r3,$d0) # d0*r3
11844 ++ sltu $a3,$h2,$a3
11845 ++ addu $h3,$h3,$a3
11846 ++
11847 ++
11848 ++ mflo ($a3,$r3,$d0)
11849 ++ mfhi ($t1,$r3,$d0)
11850 ++ addu $h2,$h2,$at
11851 ++ sltu $at,$h2,$at
11852 ++ multu ($r2,$d1) # d1*r2
11853 ++ addu $h3,$h3,$at
11854 ++
11855 ++ mflo ($at,$r2,$d1)
11856 ++ mfhi ($t0,$r2,$d1)
11857 ++ addu $h3,$h3,$a3
11858 ++ sltu $a3,$h3,$a3
11859 ++ multu ($r0,$d3) # d3*r0
11860 ++ addu $t1,$t1,$a3
11861 ++
11862 ++ mflo ($a3,$r0,$d3)
11863 ++ mfhi ($d3,$r0,$d3)
11864 ++ addu $h3,$h3,$at
11865 ++ addu $t1,$t1,$t0
11866 ++ multu ($r1,$d2) # d2*r1
11867 ++ sltu $at,$h3,$at
11868 ++ addu $t1,$t1,$at
11869 ++
11870 ++ mflo ($at,$r1,$d2)
11871 ++ mfhi ($t0,$r1,$d2)
11872 ++ addu $h3,$h3,$a3
11873 ++ addu $t1,$t1,$d3
11874 ++ multu ($rs3,$h4) # h4*s3
11875 ++ sltu $a3,$h3,$a3
11876 ++ addu $t1,$t1,$a3
11877 ++
11878 ++ mflo ($a3,$rs3,$h4)
11879 ++ addu $h3,$h3,$at
11880 ++ addu $t1,$t1,$t0
11881 ++ multu ($r0,$h4) # h4*r0
11882 ++ sltu $at,$h3,$at
11883 ++ addu $t1,$t1,$at
11884 ++
11885 ++
11886 ++ mflo ($h4,$r0,$h4)
11887 ++ addu $h3,$h3,$a3
11888 ++ sltu $a3,$h3,$a3
11889 ++ addu $t1,$t1,$a3
11890 ++ addu $h4,$h4,$t1
11891 ++
11892 ++ li $padbit,1 # if we loop, padbit is 1
11893 ++#endif
11894 ++ bne $inp,$len,.Loop
11895 ++
11896 ++ sw $h0,0($ctx) # store hash value
11897 ++ sw $h1,4($ctx)
11898 ++ sw $h2,8($ctx)
11899 ++ sw $h3,12($ctx)
11900 ++ sw $h4,16($ctx)
11901 ++
11902 ++ .set noreorder
11903 ++.Labort:
11904 ++ lw $s11,4*11($sp)
11905 ++ lw $s10,4*10($sp)
11906 ++ lw $s9, 4*9($sp)
11907 ++ lw $s8, 4*8($sp)
11908 ++ lw $s7, 4*7($sp)
11909 ++ lw $s6, 4*6($sp)
11910 ++ lw $s5, 4*5($sp)
11911 ++ lw $s4, 4*4($sp)
11912 ++___
11913 ++$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
11914 ++ lw $s3, 4*3($sp)
11915 ++ lw $s2, 4*2($sp)
11916 ++ lw $s1, 4*1($sp)
11917 ++ lw $s0, 4*0($sp)
11918 ++___
11919 ++$code.=<<___;
11920 ++ jr $ra
11921 ++ addu $sp,$sp,4*12
11922 ++.end poly1305_blocks
11923 ++___
11924 ++}
11925 ++{
11926 ++my ($ctx,$mac,$nonce,$tmp4) = ($a0,$a1,$a2,$a3);
11927 ++
11928 ++$code.=<<___;
11929 ++.align 5
11930 ++.globl poly1305_emit
11931 ++.ent poly1305_emit
11932 ++poly1305_emit:
11933 ++ .frame $sp,0,$ra
11934 ++ .set reorder
11935 ++
11936 ++ lw $tmp4,16($ctx)
11937 ++ lw $tmp0,0($ctx)
11938 ++ lw $tmp1,4($ctx)
11939 ++ lw $tmp2,8($ctx)
11940 ++ lw $tmp3,12($ctx)
11941 ++
11942 ++ li $in0,-4 # final reduction
11943 ++ srl $ctx,$tmp4,2
11944 ++ and $in0,$in0,$tmp4
11945 ++ andi $tmp4,$tmp4,3
11946 ++ addu $ctx,$ctx,$in0
11947 ++
11948 ++ addu $tmp0,$tmp0,$ctx
11949 ++ sltu $ctx,$tmp0,$ctx
11950 ++ addiu $in0,$tmp0,5 # compare to modulus
11951 ++ addu $tmp1,$tmp1,$ctx
11952 ++ sltiu $in1,$in0,5
11953 ++ sltu $ctx,$tmp1,$ctx
11954 ++ addu $in1,$in1,$tmp1
11955 ++ addu $tmp2,$tmp2,$ctx
11956 ++ sltu $in2,$in1,$tmp1
11957 ++ sltu $ctx,$tmp2,$ctx
11958 ++ addu $in2,$in2,$tmp2
11959 ++ addu $tmp3,$tmp3,$ctx
11960 ++ sltu $in3,$in2,$tmp2
11961 ++ sltu $ctx,$tmp3,$ctx
11962 ++ addu $in3,$in3,$tmp3
11963 ++ addu $tmp4,$tmp4,$ctx
11964 ++ sltu $ctx,$in3,$tmp3
11965 ++ addu $ctx,$tmp4
11966 ++
11967 ++ srl $ctx,2 # see if it carried/borrowed
11968 ++ subu $ctx,$zero,$ctx
11969 ++
11970 ++ xor $in0,$tmp0
11971 ++ xor $in1,$tmp1
11972 ++ xor $in2,$tmp2
11973 ++ xor $in3,$tmp3
11974 ++ and $in0,$ctx
11975 ++ and $in1,$ctx
11976 ++ and $in2,$ctx
11977 ++ and $in3,$ctx
11978 ++ xor $in0,$tmp0
11979 ++ xor $in1,$tmp1
11980 ++ xor $in2,$tmp2
11981 ++ xor $in3,$tmp3
11982 ++
11983 ++ lw $tmp0,0($nonce) # load nonce
11984 ++ lw $tmp1,4($nonce)
11985 ++ lw $tmp2,8($nonce)
11986 ++ lw $tmp3,12($nonce)
11987 ++
11988 ++ addu $in0,$tmp0 # accumulate nonce
11989 ++ sltu $ctx,$in0,$tmp0
11990 ++
11991 ++ addu $in1,$tmp1
11992 ++ sltu $tmp1,$in1,$tmp1
11993 ++ addu $in1,$ctx
11994 ++ sltu $ctx,$in1,$ctx
11995 ++ addu $ctx,$tmp1
11996 ++
11997 ++ addu $in2,$tmp2
11998 ++ sltu $tmp2,$in2,$tmp2
11999 ++ addu $in2,$ctx
12000 ++ sltu $ctx,$in2,$ctx
12001 ++ addu $ctx,$tmp2
12002 ++
12003 ++ addu $in3,$tmp3
12004 ++ addu $in3,$ctx
12005 ++
12006 ++ srl $tmp0,$in0,8 # write mac value
12007 ++ srl $tmp1,$in0,16
12008 ++ srl $tmp2,$in0,24
12009 ++ sb $in0, 0($mac)
12010 ++ sb $tmp0,1($mac)
12011 ++ srl $tmp0,$in1,8
12012 ++ sb $tmp1,2($mac)
12013 ++ srl $tmp1,$in1,16
12014 ++ sb $tmp2,3($mac)
12015 ++ srl $tmp2,$in1,24
12016 ++ sb $in1, 4($mac)
12017 ++ sb $tmp0,5($mac)
12018 ++ srl $tmp0,$in2,8
12019 ++ sb $tmp1,6($mac)
12020 ++ srl $tmp1,$in2,16
12021 ++ sb $tmp2,7($mac)
12022 ++ srl $tmp2,$in2,24
12023 ++ sb $in2, 8($mac)
12024 ++ sb $tmp0,9($mac)
12025 ++ srl $tmp0,$in3,8
12026 ++ sb $tmp1,10($mac)
12027 ++ srl $tmp1,$in3,16
12028 ++ sb $tmp2,11($mac)
12029 ++ srl $tmp2,$in3,24
12030 ++ sb $in3, 12($mac)
12031 ++ sb $tmp0,13($mac)
12032 ++ sb $tmp1,14($mac)
12033 ++ sb $tmp2,15($mac)
12034 ++
12035 ++ jr $ra
12036 ++.end poly1305_emit
12037 ++.rdata
12038 ++.asciiz "Poly1305 for MIPS32, CRYPTOGAMS by \@dot-asm"
12039 ++.align 2
12040 ++___
12041 ++}
12042 ++}}}
12043 ++
12044 ++$output=pop and open STDOUT,">$output";
12045 ++print $code;
12046 ++close STDOUT;
12047 +diff --git a/crypto/Kconfig b/crypto/Kconfig
12048 +index 15cfb02c3e49..0be5b4092f18 100644
12049 +--- a/crypto/Kconfig
12050 ++++ b/crypto/Kconfig
12051 +@@ -707,6 +707,11 @@ config CRYPTO_POLY1305_X86_64
12052 + in IETF protocols. This is the x86_64 assembler implementation using SIMD
12053 + instructions.
12054 +
12055 ++config CRYPTO_POLY1305_MIPS
12056 ++ tristate "Poly1305 authenticator algorithm (MIPS optimized)"
12057 ++ depends on CPU_MIPS32 || (CPU_MIPS64 && 64BIT)
12058 ++ select CRYPTO_ARCH_HAVE_LIB_POLY1305
12059 ++
12060 + config CRYPTO_MD4
12061 + tristate "MD4 digest algorithm"
12062 + select CRYPTO_HASH
12063 +diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
12064 +index 9bd15b227e78..d15ec5382986 100644
12065 +--- a/lib/crypto/Kconfig
12066 ++++ b/lib/crypto/Kconfig
12067 +@@ -39,6 +39,7 @@ config CRYPTO_LIB_DES
12068 +
12069 + config CRYPTO_LIB_POLY1305_RSIZE
12070 + int
12071 ++ default 2 if MIPS
12072 + default 4 if X86_64
12073 + default 9 if ARM || ARM64
12074 + default 1
12075 +--
12076 +cgit v1.2.3-4-ga26e
12077 +
12078 +
12079 +From 10df52cf5fa5d09d6d1d0788d1f670a927a15929 Mon Sep 17 00:00:00 2001
12080 +From: "Jason A. Donenfeld" <Jason@×××××.com>
12081 +Date: Fri, 8 Nov 2019 13:22:28 +0100
12082 +Subject: crypto: blake2s - generic C library implementation and selftest
12083 +
12084 +commit 66d7fb94e4ffe5acc589e0b2b4710aecc1f07a28 upstream.
12085 +
12086 +The C implementation was originally based on Samuel Neves' public
12087 +domain reference implementation but has since been heavily modified
12088 +for the kernel. We're able to do compile-time optimizations by moving
12089 +some scaffolding around the final function into the header file.
12090 +
12091 +Information: https://blake2.net/
12092 +
12093 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
12094 +Signed-off-by: Samuel Neves <sneves@××××××.pt>
12095 +Co-developed-by: Samuel Neves <sneves@××××××.pt>
12096 +[ardb: - move from lib/zinc to lib/crypto
12097 + - remove simd handling
12098 + - rewrote selftest for better coverage
12099 + - use fixed digest length for blake2s_hmac() and rename to
12100 + blake2s256_hmac() ]
12101 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
12102 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
12103 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
12104 +---
12105 + include/crypto/blake2s.h | 106 +++++++
12106 + include/crypto/internal/blake2s.h | 19 ++
12107 + lib/crypto/Kconfig | 25 ++
12108 + lib/crypto/Makefile | 10 +
12109 + lib/crypto/blake2s-generic.c | 111 +++++++
12110 + lib/crypto/blake2s-selftest.c | 622 ++++++++++++++++++++++++++++++++++++++
12111 + lib/crypto/blake2s.c | 126 ++++++++
12112 + 7 files changed, 1019 insertions(+)
12113 + create mode 100644 include/crypto/blake2s.h
12114 + create mode 100644 include/crypto/internal/blake2s.h
12115 + create mode 100644 lib/crypto/blake2s-generic.c
12116 + create mode 100644 lib/crypto/blake2s-selftest.c
12117 + create mode 100644 lib/crypto/blake2s.c
12118 +
12119 +diff --git a/include/crypto/blake2s.h b/include/crypto/blake2s.h
12120 +new file mode 100644
12121 +index 000000000000..b471deac28ff
12122 +--- /dev/null
12123 ++++ b/include/crypto/blake2s.h
12124 +@@ -0,0 +1,106 @@
12125 ++/* SPDX-License-Identifier: GPL-2.0 OR MIT */
12126 ++/*
12127 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
12128 ++ */
12129 ++
12130 ++#ifndef BLAKE2S_H
12131 ++#define BLAKE2S_H
12132 ++
12133 ++#include <linux/types.h>
12134 ++#include <linux/kernel.h>
12135 ++#include <linux/string.h>
12136 ++
12137 ++#include <asm/bug.h>
12138 ++
12139 ++enum blake2s_lengths {
12140 ++ BLAKE2S_BLOCK_SIZE = 64,
12141 ++ BLAKE2S_HASH_SIZE = 32,
12142 ++ BLAKE2S_KEY_SIZE = 32,
12143 ++
12144 ++ BLAKE2S_128_HASH_SIZE = 16,
12145 ++ BLAKE2S_160_HASH_SIZE = 20,
12146 ++ BLAKE2S_224_HASH_SIZE = 28,
12147 ++ BLAKE2S_256_HASH_SIZE = 32,
12148 ++};
12149 ++
12150 ++struct blake2s_state {
12151 ++ u32 h[8];
12152 ++ u32 t[2];
12153 ++ u32 f[2];
12154 ++ u8 buf[BLAKE2S_BLOCK_SIZE];
12155 ++ unsigned int buflen;
12156 ++ unsigned int outlen;
12157 ++};
12158 ++
12159 ++enum blake2s_iv {
12160 ++ BLAKE2S_IV0 = 0x6A09E667UL,
12161 ++ BLAKE2S_IV1 = 0xBB67AE85UL,
12162 ++ BLAKE2S_IV2 = 0x3C6EF372UL,
12163 ++ BLAKE2S_IV3 = 0xA54FF53AUL,
12164 ++ BLAKE2S_IV4 = 0x510E527FUL,
12165 ++ BLAKE2S_IV5 = 0x9B05688CUL,
12166 ++ BLAKE2S_IV6 = 0x1F83D9ABUL,
12167 ++ BLAKE2S_IV7 = 0x5BE0CD19UL,
12168 ++};
12169 ++
12170 ++void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen);
12171 ++void blake2s_final(struct blake2s_state *state, u8 *out);
12172 ++
12173 ++static inline void blake2s_init_param(struct blake2s_state *state,
12174 ++ const u32 param)
12175 ++{
12176 ++ *state = (struct blake2s_state){{
12177 ++ BLAKE2S_IV0 ^ param,
12178 ++ BLAKE2S_IV1,
12179 ++ BLAKE2S_IV2,
12180 ++ BLAKE2S_IV3,
12181 ++ BLAKE2S_IV4,
12182 ++ BLAKE2S_IV5,
12183 ++ BLAKE2S_IV6,
12184 ++ BLAKE2S_IV7,
12185 ++ }};
12186 ++}
12187 ++
12188 ++static inline void blake2s_init(struct blake2s_state *state,
12189 ++ const size_t outlen)
12190 ++{
12191 ++ blake2s_init_param(state, 0x01010000 | outlen);
12192 ++ state->outlen = outlen;
12193 ++}
12194 ++
12195 ++static inline void blake2s_init_key(struct blake2s_state *state,
12196 ++ const size_t outlen, const void *key,
12197 ++ const size_t keylen)
12198 ++{
12199 ++ WARN_ON(IS_ENABLED(DEBUG) && (!outlen || outlen > BLAKE2S_HASH_SIZE ||
12200 ++ !key || !keylen || keylen > BLAKE2S_KEY_SIZE));
12201 ++
12202 ++ blake2s_init_param(state, 0x01010000 | keylen << 8 | outlen);
12203 ++ memcpy(state->buf, key, keylen);
12204 ++ state->buflen = BLAKE2S_BLOCK_SIZE;
12205 ++ state->outlen = outlen;
12206 ++}
12207 ++
12208 ++static inline void blake2s(u8 *out, const u8 *in, const u8 *key,
12209 ++ const size_t outlen, const size_t inlen,
12210 ++ const size_t keylen)
12211 ++{
12212 ++ struct blake2s_state state;
12213 ++
12214 ++ WARN_ON(IS_ENABLED(DEBUG) && ((!in && inlen > 0) || !out || !outlen ||
12215 ++ outlen > BLAKE2S_HASH_SIZE || keylen > BLAKE2S_KEY_SIZE ||
12216 ++ (!key && keylen)));
12217 ++
12218 ++ if (keylen)
12219 ++ blake2s_init_key(&state, outlen, key, keylen);
12220 ++ else
12221 ++ blake2s_init(&state, outlen);
12222 ++
12223 ++ blake2s_update(&state, in, inlen);
12224 ++ blake2s_final(&state, out);
12225 ++}
12226 ++
12227 ++void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen,
12228 ++ const size_t keylen);
12229 ++
12230 ++#endif /* BLAKE2S_H */
12231 +diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h
12232 +new file mode 100644
12233 +index 000000000000..941693effc7d
12234 +--- /dev/null
12235 ++++ b/include/crypto/internal/blake2s.h
12236 +@@ -0,0 +1,19 @@
12237 ++/* SPDX-License-Identifier: GPL-2.0 OR MIT */
12238 ++
12239 ++#ifndef BLAKE2S_INTERNAL_H
12240 ++#define BLAKE2S_INTERNAL_H
12241 ++
12242 ++#include <crypto/blake2s.h>
12243 ++
12244 ++void blake2s_compress_generic(struct blake2s_state *state,const u8 *block,
12245 ++ size_t nblocks, const u32 inc);
12246 ++
12247 ++void blake2s_compress_arch(struct blake2s_state *state,const u8 *block,
12248 ++ size_t nblocks, const u32 inc);
12249 ++
12250 ++static inline void blake2s_set_lastblock(struct blake2s_state *state)
12251 ++{
12252 ++ state->f[0] = -1;
12253 ++}
12254 ++
12255 ++#endif /* BLAKE2S_INTERNAL_H */
12256 +diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
12257 +index d15ec5382986..7ad98b624e55 100644
12258 +--- a/lib/crypto/Kconfig
12259 ++++ b/lib/crypto/Kconfig
12260 +@@ -8,6 +8,31 @@ config CRYPTO_LIB_AES
12261 + config CRYPTO_LIB_ARC4
12262 + tristate
12263 +
12264 ++config CRYPTO_ARCH_HAVE_LIB_BLAKE2S
12265 ++ tristate
12266 ++ help
12267 ++ Declares whether the architecture provides an arch-specific
12268 ++ accelerated implementation of the Blake2s library interface,
12269 ++ either builtin or as a module.
12270 ++
12271 ++config CRYPTO_LIB_BLAKE2S_GENERIC
12272 ++ tristate
12273 ++ help
12274 ++ This symbol can be depended upon by arch implementations of the
12275 ++ Blake2s library interface that require the generic code as a
12276 ++ fallback, e.g., for SIMD implementations. If no arch specific
12277 ++ implementation is enabled, this implementation serves the users
12278 ++ of CRYPTO_LIB_BLAKE2S.
12279 ++
12280 ++config CRYPTO_LIB_BLAKE2S
12281 ++ tristate "BLAKE2s hash function library"
12282 ++ depends on CRYPTO_ARCH_HAVE_LIB_BLAKE2S || !CRYPTO_ARCH_HAVE_LIB_BLAKE2S
12283 ++ select CRYPTO_LIB_BLAKE2S_GENERIC if CRYPTO_ARCH_HAVE_LIB_BLAKE2S=n
12284 ++ help
12285 ++ Enable the Blake2s library interface. This interface may be fulfilled
12286 ++ by either the generic implementation or an arch-specific one, if one
12287 ++ is available and enabled.
12288 ++
12289 + config CRYPTO_ARCH_HAVE_LIB_CHACHA
12290 + tristate
12291 + help
12292 +diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
12293 +index b58ab6843a9d..8ca66b5f9807 100644
12294 +--- a/lib/crypto/Makefile
12295 ++++ b/lib/crypto/Makefile
12296 +@@ -10,6 +10,12 @@ libaes-y := aes.o
12297 + obj-$(CONFIG_CRYPTO_LIB_ARC4) += libarc4.o
12298 + libarc4-y := arc4.o
12299 +
12300 ++obj-$(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) += libblake2s-generic.o
12301 ++libblake2s-generic-y += blake2s-generic.o
12302 ++
12303 ++obj-$(CONFIG_CRYPTO_LIB_BLAKE2S) += libblake2s.o
12304 ++libblake2s-y += blake2s.o
12305 ++
12306 + obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o
12307 + libdes-y := des.o
12308 +
12309 +@@ -18,3 +24,7 @@ libpoly1305-y := poly1305.o
12310 +
12311 + obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o
12312 + libsha256-y := sha256.o
12313 ++
12314 ++ifneq ($(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS),y)
12315 ++libblake2s-y += blake2s-selftest.o
12316 ++endif
12317 +diff --git a/lib/crypto/blake2s-generic.c b/lib/crypto/blake2s-generic.c
12318 +new file mode 100644
12319 +index 000000000000..04ff8df24513
12320 +--- /dev/null
12321 ++++ b/lib/crypto/blake2s-generic.c
12322 +@@ -0,0 +1,111 @@
12323 ++// SPDX-License-Identifier: GPL-2.0 OR MIT
12324 ++/*
12325 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
12326 ++ *
12327 ++ * This is an implementation of the BLAKE2s hash and PRF functions.
12328 ++ *
12329 ++ * Information: https://blake2.net/
12330 ++ *
12331 ++ */
12332 ++
12333 ++#include <crypto/internal/blake2s.h>
12334 ++#include <linux/types.h>
12335 ++#include <linux/string.h>
12336 ++#include <linux/kernel.h>
12337 ++#include <linux/module.h>
12338 ++#include <linux/init.h>
12339 ++#include <linux/bug.h>
12340 ++#include <asm/unaligned.h>
12341 ++
12342 ++static const u8 blake2s_sigma[10][16] = {
12343 ++ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
12344 ++ { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
12345 ++ { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
12346 ++ { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
12347 ++ { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
12348 ++ { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
12349 ++ { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
12350 ++ { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
12351 ++ { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
12352 ++ { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
12353 ++};
12354 ++
12355 ++static inline void blake2s_increment_counter(struct blake2s_state *state,
12356 ++ const u32 inc)
12357 ++{
12358 ++ state->t[0] += inc;
12359 ++ state->t[1] += (state->t[0] < inc);
12360 ++}
12361 ++
12362 ++void blake2s_compress_generic(struct blake2s_state *state,const u8 *block,
12363 ++ size_t nblocks, const u32 inc)
12364 ++{
12365 ++ u32 m[16];
12366 ++ u32 v[16];
12367 ++ int i;
12368 ++
12369 ++ WARN_ON(IS_ENABLED(DEBUG) &&
12370 ++ (nblocks > 1 && inc != BLAKE2S_BLOCK_SIZE));
12371 ++
12372 ++ while (nblocks > 0) {
12373 ++ blake2s_increment_counter(state, inc);
12374 ++ memcpy(m, block, BLAKE2S_BLOCK_SIZE);
12375 ++ le32_to_cpu_array(m, ARRAY_SIZE(m));
12376 ++ memcpy(v, state->h, 32);
12377 ++ v[ 8] = BLAKE2S_IV0;
12378 ++ v[ 9] = BLAKE2S_IV1;
12379 ++ v[10] = BLAKE2S_IV2;
12380 ++ v[11] = BLAKE2S_IV3;
12381 ++ v[12] = BLAKE2S_IV4 ^ state->t[0];
12382 ++ v[13] = BLAKE2S_IV5 ^ state->t[1];
12383 ++ v[14] = BLAKE2S_IV6 ^ state->f[0];
12384 ++ v[15] = BLAKE2S_IV7 ^ state->f[1];
12385 ++
12386 ++#define G(r, i, a, b, c, d) do { \
12387 ++ a += b + m[blake2s_sigma[r][2 * i + 0]]; \
12388 ++ d = ror32(d ^ a, 16); \
12389 ++ c += d; \
12390 ++ b = ror32(b ^ c, 12); \
12391 ++ a += b + m[blake2s_sigma[r][2 * i + 1]]; \
12392 ++ d = ror32(d ^ a, 8); \
12393 ++ c += d; \
12394 ++ b = ror32(b ^ c, 7); \
12395 ++} while (0)
12396 ++
12397 ++#define ROUND(r) do { \
12398 ++ G(r, 0, v[0], v[ 4], v[ 8], v[12]); \
12399 ++ G(r, 1, v[1], v[ 5], v[ 9], v[13]); \
12400 ++ G(r, 2, v[2], v[ 6], v[10], v[14]); \
12401 ++ G(r, 3, v[3], v[ 7], v[11], v[15]); \
12402 ++ G(r, 4, v[0], v[ 5], v[10], v[15]); \
12403 ++ G(r, 5, v[1], v[ 6], v[11], v[12]); \
12404 ++ G(r, 6, v[2], v[ 7], v[ 8], v[13]); \
12405 ++ G(r, 7, v[3], v[ 4], v[ 9], v[14]); \
12406 ++} while (0)
12407 ++ ROUND(0);
12408 ++ ROUND(1);
12409 ++ ROUND(2);
12410 ++ ROUND(3);
12411 ++ ROUND(4);
12412 ++ ROUND(5);
12413 ++ ROUND(6);
12414 ++ ROUND(7);
12415 ++ ROUND(8);
12416 ++ ROUND(9);
12417 ++
12418 ++#undef G
12419 ++#undef ROUND
12420 ++
12421 ++ for (i = 0; i < 8; ++i)
12422 ++ state->h[i] ^= v[i] ^ v[i + 8];
12423 ++
12424 ++ block += BLAKE2S_BLOCK_SIZE;
12425 ++ --nblocks;
12426 ++ }
12427 ++}
12428 ++
12429 ++EXPORT_SYMBOL(blake2s_compress_generic);
12430 ++
12431 ++MODULE_LICENSE("GPL v2");
12432 ++MODULE_DESCRIPTION("BLAKE2s hash function");
12433 ++MODULE_AUTHOR("Jason A. Donenfeld <Jason@×××××.com>");
12434 +diff --git a/lib/crypto/blake2s-selftest.c b/lib/crypto/blake2s-selftest.c
12435 +new file mode 100644
12436 +index 000000000000..79ef404a990d
12437 +--- /dev/null
12438 ++++ b/lib/crypto/blake2s-selftest.c
12439 +@@ -0,0 +1,622 @@
12440 ++// SPDX-License-Identifier: GPL-2.0 OR MIT
12441 ++/*
12442 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
12443 ++ */
12444 ++
12445 ++#include <crypto/blake2s.h>
12446 ++#include <linux/string.h>
12447 ++
12448 ++/*
12449 ++ * blake2s_testvecs[] generated with the program below (using libb2-dev and
12450 ++ * libssl-dev [OpenSSL])
12451 ++ *
12452 ++ * #include <blake2.h>
12453 ++ * #include <stdint.h>
12454 ++ * #include <stdio.h>
12455 ++ *
12456 ++ * #include <openssl/evp.h>
12457 ++ * #include <openssl/hmac.h>
12458 ++ *
12459 ++ * #define BLAKE2S_TESTVEC_COUNT 256
12460 ++ *
12461 ++ * static void print_vec(const uint8_t vec[], int len)
12462 ++ * {
12463 ++ * int i;
12464 ++ *
12465 ++ * printf(" { ");
12466 ++ * for (i = 0; i < len; i++) {
12467 ++ * if (i && (i % 12) == 0)
12468 ++ * printf("\n ");
12469 ++ * printf("0x%02x, ", vec[i]);
12470 ++ * }
12471 ++ * printf("},\n");
12472 ++ * }
12473 ++ *
12474 ++ * int main(void)
12475 ++ * {
12476 ++ * uint8_t key[BLAKE2S_KEYBYTES];
12477 ++ * uint8_t buf[BLAKE2S_TESTVEC_COUNT];
12478 ++ * uint8_t hash[BLAKE2S_OUTBYTES];
12479 ++ * int i, j;
12480 ++ *
12481 ++ * key[0] = key[1] = 1;
12482 ++ * for (i = 2; i < BLAKE2S_KEYBYTES; ++i)
12483 ++ * key[i] = key[i - 2] + key[i - 1];
12484 ++ *
12485 ++ * for (i = 0; i < BLAKE2S_TESTVEC_COUNT; ++i)
12486 ++ * buf[i] = (uint8_t)i;
12487 ++ *
12488 ++ * printf("static const u8 blake2s_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {\n");
12489 ++ *
12490 ++ * for (i = 0; i < BLAKE2S_TESTVEC_COUNT; ++i) {
12491 ++ * int outlen = 1 + i % BLAKE2S_OUTBYTES;
12492 ++ * int keylen = (13 * i) % (BLAKE2S_KEYBYTES + 1);
12493 ++ *
12494 ++ * blake2s(hash, buf, key + BLAKE2S_KEYBYTES - keylen, outlen, i,
12495 ++ * keylen);
12496 ++ * print_vec(hash, outlen);
12497 ++ * }
12498 ++ * printf("};\n\n");
12499 ++ *
12500 ++ * printf("static const u8 blake2s_hmac_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {\n");
12501 ++ *
12502 ++ * HMAC(EVP_blake2s256(), key, sizeof(key), buf, sizeof(buf), hash, NULL);
12503 ++ * print_vec(hash, BLAKE2S_OUTBYTES);
12504 ++ *
12505 ++ * HMAC(EVP_blake2s256(), buf, sizeof(buf), key, sizeof(key), hash, NULL);
12506 ++ * print_vec(hash, BLAKE2S_OUTBYTES);
12507 ++ *
12508 ++ * printf("};\n");
12509 ++ *
12510 ++ * return 0;
12511 ++ *}
12512 ++ */
12513 ++static const u8 blake2s_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {
12514 ++ { 0xa1, },
12515 ++ { 0x7c, 0x89, },
12516 ++ { 0x74, 0x0e, 0xd4, },
12517 ++ { 0x47, 0x0c, 0x21, 0x15, },
12518 ++ { 0x18, 0xd6, 0x9c, 0xa6, 0xc4, },
12519 ++ { 0x13, 0x5d, 0x16, 0x63, 0x2e, 0xf9, },
12520 ++ { 0x2c, 0xb5, 0x04, 0xb7, 0x99, 0xe2, 0x73, },
12521 ++ { 0x9a, 0x0f, 0xd2, 0x39, 0xd6, 0x68, 0x1b, 0x92, },
12522 ++ { 0xc8, 0xde, 0x7a, 0xea, 0x2f, 0xf4, 0xd2, 0xe3, 0x2b, },
12523 ++ { 0x5b, 0xf9, 0x43, 0x52, 0x0c, 0x12, 0xba, 0xb5, 0x93, 0x9f, },
12524 ++ { 0xc6, 0x2c, 0x4e, 0x80, 0xfc, 0x32, 0x5b, 0x33, 0xb8, 0xb8, 0x0a, },
12525 ++ { 0xa7, 0x5c, 0xfd, 0x3a, 0xcc, 0xbf, 0x90, 0xca, 0xb7, 0x97, 0xde, 0xd8, },
12526 ++ { 0x66, 0xca, 0x3c, 0xc4, 0x19, 0xef, 0x92, 0x66, 0x3f, 0x21, 0x8f, 0xda,
12527 ++ 0xb7, },
12528 ++ { 0xba, 0xe5, 0xbb, 0x30, 0x25, 0x94, 0x6d, 0xc3, 0x89, 0x09, 0xc4, 0x25,
12529 ++ 0x52, 0x3e, },
12530 ++ { 0xa2, 0xef, 0x0e, 0x52, 0x0b, 0x5f, 0xa2, 0x01, 0x6d, 0x0a, 0x25, 0xbc,
12531 ++ 0x57, 0xe2, 0x27, },
12532 ++ { 0x4f, 0xe0, 0xf9, 0x52, 0x12, 0xda, 0x84, 0xb7, 0xab, 0xae, 0xb0, 0xa6,
12533 ++ 0x47, 0x2a, 0xc7, 0xf5, },
12534 ++ { 0x56, 0xe7, 0xa8, 0x1c, 0x4c, 0xca, 0xed, 0x90, 0x31, 0xec, 0x87, 0x43,
12535 ++ 0xe7, 0x72, 0x08, 0xec, 0xbe, },
12536 ++ { 0x7e, 0xdf, 0x80, 0x1c, 0x93, 0x33, 0xfd, 0x53, 0x44, 0xba, 0xfd, 0x96,
12537 ++ 0xe1, 0xbb, 0xb5, 0x65, 0xa5, 0x00, },
12538 ++ { 0xec, 0x6b, 0xed, 0xf7, 0x7b, 0x62, 0x1d, 0x7d, 0xf4, 0x82, 0xf3, 0x1e,
12539 ++ 0x18, 0xff, 0x2b, 0xc4, 0x06, 0x20, 0x2a, },
12540 ++ { 0x74, 0x98, 0xd7, 0x68, 0x63, 0xed, 0x87, 0xe4, 0x5d, 0x8d, 0x9e, 0x1d,
12541 ++ 0xfd, 0x2a, 0xbb, 0x86, 0xac, 0xe9, 0x2a, 0x89, },
12542 ++ { 0x89, 0xc3, 0x88, 0xce, 0x2b, 0x33, 0x1e, 0x10, 0xd1, 0x37, 0x20, 0x86,
12543 ++ 0x28, 0x43, 0x70, 0xd9, 0xfb, 0x96, 0xd9, 0xb5, 0xd3, },
12544 ++ { 0xcb, 0x56, 0x74, 0x41, 0x8d, 0x80, 0x01, 0x9a, 0x6b, 0x38, 0xe1, 0x41,
12545 ++ 0xad, 0x9c, 0x62, 0x74, 0xce, 0x35, 0xd5, 0x6c, 0x89, 0x6e, },
12546 ++ { 0x79, 0xaf, 0x94, 0x59, 0x99, 0x26, 0xe1, 0xc9, 0x34, 0xfe, 0x7c, 0x22,
12547 ++ 0xf7, 0x43, 0xd7, 0x65, 0xd4, 0x48, 0x18, 0xac, 0x3d, 0xfd, 0x93, },
12548 ++ { 0x85, 0x0d, 0xff, 0xb8, 0x3e, 0x87, 0x41, 0xb0, 0x95, 0xd3, 0x3d, 0x00,
12549 ++ 0x47, 0x55, 0x9e, 0xd2, 0x69, 0xea, 0xbf, 0xe9, 0x7a, 0x2d, 0x61, 0x45, },
12550 ++ { 0x03, 0xe0, 0x85, 0xec, 0x54, 0xb5, 0x16, 0x53, 0xa8, 0xc4, 0x71, 0xe9,
12551 ++ 0x6a, 0xe7, 0xcb, 0xc4, 0x15, 0x02, 0xfc, 0x34, 0xa4, 0xa4, 0x28, 0x13,
12552 ++ 0xd1, },
12553 ++ { 0xe3, 0x34, 0x4b, 0xe1, 0xd0, 0x4b, 0x55, 0x61, 0x8f, 0xc0, 0x24, 0x05,
12554 ++ 0xe6, 0xe0, 0x3d, 0x70, 0x24, 0x4d, 0xda, 0xb8, 0x91, 0x05, 0x29, 0x07,
12555 ++ 0x01, 0x3e, },
12556 ++ { 0x61, 0xff, 0x01, 0x72, 0xb1, 0x4d, 0xf6, 0xfe, 0xd1, 0xd1, 0x08, 0x74,
12557 ++ 0xe6, 0x91, 0x44, 0xeb, 0x61, 0xda, 0x40, 0xaf, 0xfc, 0x8c, 0x91, 0x6b,
12558 ++ 0xec, 0x13, 0xed, },
12559 ++ { 0xd4, 0x40, 0xd2, 0xa0, 0x7f, 0xc1, 0x58, 0x0c, 0x85, 0xa0, 0x86, 0xc7,
12560 ++ 0x86, 0xb9, 0x61, 0xc9, 0xea, 0x19, 0x86, 0x1f, 0xab, 0x07, 0xce, 0x37,
12561 ++ 0x72, 0x67, 0x09, 0xfc, },
12562 ++ { 0x9e, 0xf8, 0x18, 0x67, 0x93, 0x10, 0x9b, 0x39, 0x75, 0xe8, 0x8b, 0x38,
12563 ++ 0x82, 0x7d, 0xb8, 0xb7, 0xa5, 0xaf, 0xe6, 0x6a, 0x22, 0x5e, 0x1f, 0x9c,
12564 ++ 0x95, 0x29, 0x19, 0xf2, 0x4b, },
12565 ++ { 0xc8, 0x62, 0x25, 0xf5, 0x98, 0xc9, 0xea, 0xe5, 0x29, 0x3a, 0xd3, 0x22,
12566 ++ 0xeb, 0xeb, 0x07, 0x7c, 0x15, 0x07, 0xee, 0x15, 0x61, 0xbb, 0x05, 0x30,
12567 ++ 0x99, 0x7f, 0x11, 0xf6, 0x0a, 0x1d, },
12568 ++ { 0x68, 0x70, 0xf7, 0x90, 0xa1, 0x8b, 0x1f, 0x0f, 0xbb, 0xce, 0xd2, 0x0e,
12569 ++ 0x33, 0x1f, 0x7f, 0xa9, 0x78, 0xa8, 0xa6, 0x81, 0x66, 0xab, 0x8d, 0xcd,
12570 ++ 0x58, 0x55, 0x3a, 0x0b, 0x7a, 0xdb, 0xb5, },
12571 ++ { 0xdd, 0x35, 0xd2, 0xb4, 0xf6, 0xc7, 0xea, 0xab, 0x64, 0x24, 0x4e, 0xfe,
12572 ++ 0xe5, 0x3d, 0x4e, 0x95, 0x8b, 0x6d, 0x6c, 0xbc, 0xb0, 0xf8, 0x88, 0x61,
12573 ++ 0x09, 0xb7, 0x78, 0xa3, 0x31, 0xfe, 0xd9, 0x2f, },
12574 ++ { 0x0a, },
12575 ++ { 0x6e, 0xd4, },
12576 ++ { 0x64, 0xe9, 0xd1, },
12577 ++ { 0x30, 0xdd, 0x71, 0xef, },
12578 ++ { 0x11, 0xb5, 0x0c, 0x87, 0xc9, },
12579 ++ { 0x06, 0x1c, 0x6d, 0x04, 0x82, 0xd0, },
12580 ++ { 0x5c, 0x42, 0x0b, 0xee, 0xc5, 0x9c, 0xb2, },
12581 ++ { 0xe8, 0x29, 0xd6, 0xb4, 0x5d, 0xf7, 0x2b, 0x93, },
12582 ++ { 0x18, 0xca, 0x27, 0x72, 0x43, 0x39, 0x16, 0xbc, 0x6a, },
12583 ++ { 0x39, 0x8f, 0xfd, 0x64, 0xf5, 0x57, 0x23, 0xb0, 0x45, 0xf8, },
12584 ++ { 0xbb, 0x3a, 0x78, 0x6b, 0x02, 0x1d, 0x0b, 0x16, 0xe3, 0xb2, 0x9a, },
12585 ++ { 0xb8, 0xb4, 0x0b, 0xe5, 0xd4, 0x1d, 0x0d, 0x85, 0x49, 0x91, 0x35, 0xfa, },
12586 ++ { 0x6d, 0x48, 0x2a, 0x0c, 0x42, 0x08, 0xbd, 0xa9, 0x78, 0x6f, 0x18, 0xaf,
12587 ++ 0xe2, },
12588 ++ { 0x10, 0x45, 0xd4, 0x58, 0x88, 0xec, 0x4e, 0x1e, 0xf6, 0x14, 0x92, 0x64,
12589 ++ 0x7e, 0xb0, },
12590 ++ { 0x8b, 0x0b, 0x95, 0xee, 0x92, 0xc6, 0x3b, 0x91, 0xf1, 0x1e, 0xeb, 0x51,
12591 ++ 0x98, 0x0a, 0x8d, },
12592 ++ { 0xa3, 0x50, 0x4d, 0xa5, 0x1d, 0x03, 0x68, 0xe9, 0x57, 0x78, 0xd6, 0x04,
12593 ++ 0xf1, 0xc3, 0x94, 0xd8, },
12594 ++ { 0xb8, 0x66, 0x6e, 0xdd, 0x46, 0x15, 0xae, 0x3d, 0x83, 0x7e, 0xcf, 0xe7,
12595 ++ 0x2c, 0xe8, 0x8f, 0xc7, 0x34, },
12596 ++ { 0x2e, 0xc0, 0x1f, 0x29, 0xea, 0xf6, 0xb9, 0xe2, 0xc2, 0x93, 0xeb, 0x41,
12597 ++ 0x0d, 0xf0, 0x0a, 0x13, 0x0e, 0xa2, },
12598 ++ { 0x71, 0xb8, 0x33, 0xa9, 0x1b, 0xac, 0xf1, 0xb5, 0x42, 0x8f, 0x5e, 0x81,
12599 ++ 0x34, 0x43, 0xb7, 0xa4, 0x18, 0x5c, 0x47, },
12600 ++ { 0xda, 0x45, 0xb8, 0x2e, 0x82, 0x1e, 0xc0, 0x59, 0x77, 0x9d, 0xfa, 0xb4,
12601 ++ 0x1c, 0x5e, 0xa0, 0x2b, 0x33, 0x96, 0x5a, 0x58, },
12602 ++ { 0xe3, 0x09, 0x05, 0xa9, 0xeb, 0x48, 0x13, 0xad, 0x71, 0x88, 0x81, 0x9a,
12603 ++ 0x3e, 0x2c, 0xe1, 0x23, 0x99, 0x13, 0x35, 0x9f, 0xb5, },
12604 ++ { 0xb7, 0x86, 0x2d, 0x16, 0xe1, 0x04, 0x00, 0x47, 0x47, 0x61, 0x31, 0xfb,
12605 ++ 0x14, 0xac, 0xd8, 0xe9, 0xe3, 0x49, 0xbd, 0xf7, 0x9c, 0x3f, },
12606 ++ { 0x7f, 0xd9, 0x95, 0xa8, 0xa7, 0xa0, 0xcc, 0xba, 0xef, 0xb1, 0x0a, 0xa9,
12607 ++ 0x21, 0x62, 0x08, 0x0f, 0x1b, 0xff, 0x7b, 0x9d, 0xae, 0xb2, 0x95, },
12608 ++ { 0x85, 0x99, 0xea, 0x33, 0xe0, 0x56, 0xff, 0x13, 0xc6, 0x61, 0x8c, 0xf9,
12609 ++ 0x57, 0x05, 0x03, 0x11, 0xf9, 0xfb, 0x3a, 0xf7, 0xce, 0xbb, 0x52, 0x30, },
12610 ++ { 0xb2, 0x72, 0x9c, 0xf8, 0x77, 0x4e, 0x8f, 0x6b, 0x01, 0x6c, 0xff, 0x4e,
12611 ++ 0x4f, 0x02, 0xd2, 0xbc, 0xeb, 0x51, 0x28, 0x99, 0x50, 0xab, 0xc4, 0x42,
12612 ++ 0xe3, },
12613 ++ { 0x8b, 0x0a, 0xb5, 0x90, 0x8f, 0xf5, 0x7b, 0xdd, 0xba, 0x47, 0x37, 0xc9,
12614 ++ 0x2a, 0xd5, 0x4b, 0x25, 0x08, 0x8b, 0x02, 0x17, 0xa7, 0x9e, 0x6b, 0x6e,
12615 ++ 0xe3, 0x90, },
12616 ++ { 0x90, 0xdd, 0xf7, 0x75, 0xa7, 0xa3, 0x99, 0x5e, 0x5b, 0x7d, 0x75, 0xc3,
12617 ++ 0x39, 0x6b, 0xa0, 0xe2, 0x44, 0x53, 0xb1, 0x9e, 0xc8, 0xf1, 0x77, 0x10,
12618 ++ 0x58, 0x06, 0x9a, },
12619 ++ { 0x99, 0x52, 0xf0, 0x49, 0xa8, 0x8c, 0xec, 0xa6, 0x97, 0x32, 0x13, 0xb5,
12620 ++ 0xf7, 0xa3, 0x8e, 0xfb, 0x4b, 0x59, 0x31, 0x3d, 0x01, 0x59, 0x98, 0x5d,
12621 ++ 0x53, 0x03, 0x1a, 0x39, },
12622 ++ { 0x9f, 0xe0, 0xc2, 0xe5, 0x5d, 0x93, 0xd6, 0x9b, 0x47, 0x8f, 0x9b, 0xe0,
12623 ++ 0x26, 0x35, 0x84, 0x20, 0x1d, 0xc5, 0x53, 0x10, 0x0f, 0x22, 0xb9, 0xb5,
12624 ++ 0xd4, 0x36, 0xb1, 0xac, 0x73, },
12625 ++ { 0x30, 0x32, 0x20, 0x3b, 0x10, 0x28, 0xec, 0x1f, 0x4f, 0x9b, 0x47, 0x59,
12626 ++ 0xeb, 0x7b, 0xee, 0x45, 0xfb, 0x0c, 0x49, 0xd8, 0x3d, 0x69, 0xbd, 0x90,
12627 ++ 0x2c, 0xf0, 0x9e, 0x8d, 0xbf, 0xd5, },
12628 ++ { 0x2a, 0x37, 0x73, 0x7f, 0xf9, 0x96, 0x19, 0xaa, 0x25, 0xd8, 0x13, 0x28,
12629 ++ 0x01, 0x29, 0x89, 0xdf, 0x6e, 0x0c, 0x9b, 0x43, 0x44, 0x51, 0xe9, 0x75,
12630 ++ 0x26, 0x0c, 0xb7, 0x87, 0x66, 0x0b, 0x5f, },
12631 ++ { 0x23, 0xdf, 0x96, 0x68, 0x91, 0x86, 0xd0, 0x93, 0x55, 0x33, 0x24, 0xf6,
12632 ++ 0xba, 0x08, 0x75, 0x5b, 0x59, 0x11, 0x69, 0xb8, 0xb9, 0xe5, 0x2c, 0x77,
12633 ++ 0x02, 0xf6, 0x47, 0xee, 0x81, 0xdd, 0xb9, 0x06, },
12634 ++ { 0x9d, },
12635 ++ { 0x9d, 0x7d, },
12636 ++ { 0xfd, 0xc3, 0xda, },
12637 ++ { 0xe8, 0x82, 0xcd, 0x21, },
12638 ++ { 0xc3, 0x1d, 0x42, 0x4c, 0x74, },
12639 ++ { 0xe9, 0xda, 0xf1, 0xa2, 0xe5, 0x7c, },
12640 ++ { 0x52, 0xb8, 0x6f, 0x81, 0x5c, 0x3a, 0x4c, },
12641 ++ { 0x5b, 0x39, 0x26, 0xfc, 0x92, 0x5e, 0xe0, 0x49, },
12642 ++ { 0x59, 0xe4, 0x7c, 0x93, 0x1c, 0xf9, 0x28, 0x93, 0xde, },
12643 ++ { 0xde, 0xdf, 0xb2, 0x43, 0x61, 0x0b, 0x86, 0x16, 0x4c, 0x2e, },
12644 ++ { 0x14, 0x8f, 0x75, 0x51, 0xaf, 0xb9, 0xee, 0x51, 0x5a, 0xae, 0x23, },
12645 ++ { 0x43, 0x5f, 0x50, 0xd5, 0x70, 0xb0, 0x5b, 0x87, 0xf5, 0xd9, 0xb3, 0x6d, },
12646 ++ { 0x66, 0x0a, 0x64, 0x93, 0x79, 0x71, 0x94, 0x40, 0xb7, 0x68, 0x2d, 0xd3,
12647 ++ 0x63, },
12648 ++ { 0x15, 0x00, 0xc4, 0x0c, 0x7d, 0x1b, 0x10, 0xa9, 0x73, 0x1b, 0x90, 0x6f,
12649 ++ 0xe6, 0xa9, },
12650 ++ { 0x34, 0x75, 0xf3, 0x86, 0x8f, 0x56, 0xcf, 0x2a, 0x0a, 0xf2, 0x62, 0x0a,
12651 ++ 0xf6, 0x0e, 0x20, },
12652 ++ { 0xb1, 0xde, 0xc9, 0xf5, 0xdb, 0xf3, 0x2f, 0x4c, 0xd6, 0x41, 0x7d, 0x39,
12653 ++ 0x18, 0x3e, 0xc7, 0xc3, },
12654 ++ { 0xc5, 0x89, 0xb2, 0xf8, 0xb8, 0xc0, 0xa3, 0xb9, 0x3b, 0x10, 0x6d, 0x7c,
12655 ++ 0x92, 0xfc, 0x7f, 0x34, 0x41, },
12656 ++ { 0xc4, 0xd8, 0xef, 0xba, 0xef, 0xd2, 0xaa, 0xc5, 0x6c, 0x8e, 0x3e, 0xbb,
12657 ++ 0x12, 0xfc, 0x0f, 0x72, 0xbf, 0x0f, },
12658 ++ { 0xdd, 0x91, 0xd1, 0x15, 0x9e, 0x7d, 0xf8, 0xc1, 0xb9, 0x14, 0x63, 0x96,
12659 ++ 0xb5, 0xcb, 0x83, 0x1d, 0x35, 0x1c, 0xec, },
12660 ++ { 0xa9, 0xf8, 0x52, 0xc9, 0x67, 0x76, 0x2b, 0xad, 0xfb, 0xd8, 0x3a, 0xa6,
12661 ++ 0x74, 0x02, 0xae, 0xb8, 0x25, 0x2c, 0x63, 0x49, },
12662 ++ { 0x77, 0x1f, 0x66, 0x70, 0xfd, 0x50, 0x29, 0xaa, 0xeb, 0xdc, 0xee, 0xba,
12663 ++ 0x75, 0x98, 0xdc, 0x93, 0x12, 0x3f, 0xdc, 0x7c, 0x38, },
12664 ++ { 0xe2, 0xe1, 0x89, 0x5c, 0x37, 0x38, 0x6a, 0xa3, 0x40, 0xac, 0x3f, 0xb0,
12665 ++ 0xca, 0xfc, 0xa7, 0xf3, 0xea, 0xf9, 0x0f, 0x5d, 0x8e, 0x39, },
12666 ++ { 0x0f, 0x67, 0xc8, 0x38, 0x01, 0xb1, 0xb7, 0xb8, 0xa2, 0xe7, 0x0a, 0x6d,
12667 ++ 0xd2, 0x63, 0x69, 0x9e, 0xcc, 0xf0, 0xf2, 0xbe, 0x9b, 0x98, 0xdd, },
12668 ++ { 0x13, 0xe1, 0x36, 0x30, 0xfe, 0xc6, 0x01, 0x8a, 0xa1, 0x63, 0x96, 0x59,
12669 ++ 0xc2, 0xa9, 0x68, 0x3f, 0x58, 0xd4, 0x19, 0x0c, 0x40, 0xf3, 0xde, 0x02, },
12670 ++ { 0xa3, 0x9e, 0xce, 0xda, 0x42, 0xee, 0x8c, 0x6c, 0x5a, 0x7d, 0xdc, 0x89,
12671 ++ 0x02, 0x77, 0xdd, 0xe7, 0x95, 0xbb, 0xff, 0x0d, 0xa4, 0xb5, 0x38, 0x1e,
12672 ++ 0xaf, },
12673 ++ { 0x9a, 0xf6, 0xb5, 0x9a, 0x4f, 0xa9, 0x4f, 0x2c, 0x35, 0x3c, 0x24, 0xdc,
12674 ++ 0x97, 0x6f, 0xd9, 0xa1, 0x7d, 0x1a, 0x85, 0x0b, 0xf5, 0xda, 0x2e, 0xe7,
12675 ++ 0xb1, 0x1d, },
12676 ++ { 0x84, 0x1e, 0x8e, 0x3d, 0x45, 0xa5, 0xf2, 0x27, 0xf3, 0x31, 0xfe, 0xb9,
12677 ++ 0xfb, 0xc5, 0x45, 0x99, 0x99, 0xdd, 0x93, 0x43, 0x02, 0xee, 0x58, 0xaf,
12678 ++ 0xee, 0x6a, 0xbe, },
12679 ++ { 0x07, 0x2f, 0xc0, 0xa2, 0x04, 0xc4, 0xab, 0x7c, 0x26, 0xbb, 0xa8, 0xd8,
12680 ++ 0xe3, 0x1c, 0x75, 0x15, 0x64, 0x5d, 0x02, 0x6a, 0xf0, 0x86, 0xe9, 0xcd,
12681 ++ 0x5c, 0xef, 0xa3, 0x25, },
12682 ++ { 0x2f, 0x3b, 0x1f, 0xb5, 0x91, 0x8f, 0x86, 0xe0, 0xdc, 0x31, 0x48, 0xb6,
12683 ++ 0xa1, 0x8c, 0xfd, 0x75, 0xbb, 0x7d, 0x3d, 0xc1, 0xf0, 0x10, 0x9a, 0xd8,
12684 ++ 0x4b, 0x0e, 0xe3, 0x94, 0x9f, },
12685 ++ { 0x29, 0xbb, 0x8f, 0x6c, 0xd1, 0xf2, 0xb6, 0xaf, 0xe5, 0xe3, 0x2d, 0xdc,
12686 ++ 0x6f, 0xa4, 0x53, 0x88, 0xd8, 0xcf, 0x4d, 0x45, 0x42, 0x62, 0xdb, 0xdf,
12687 ++ 0xf8, 0x45, 0xc2, 0x13, 0xec, 0x35, },
12688 ++ { 0x06, 0x3c, 0xe3, 0x2c, 0x15, 0xc6, 0x43, 0x03, 0x81, 0xfb, 0x08, 0x76,
12689 ++ 0x33, 0xcb, 0x02, 0xc1, 0xba, 0x33, 0xe5, 0xe0, 0xd1, 0x92, 0xa8, 0x46,
12690 ++ 0x28, 0x3f, 0x3e, 0x9d, 0x2c, 0x44, 0x54, },
12691 ++ { 0xea, 0xbb, 0x96, 0xf8, 0xd1, 0x8b, 0x04, 0x11, 0x40, 0x78, 0x42, 0x02,
12692 ++ 0x19, 0xd1, 0xbc, 0x65, 0x92, 0xd3, 0xc3, 0xd6, 0xd9, 0x19, 0xe7, 0xc3,
12693 ++ 0x40, 0x97, 0xbd, 0xd4, 0xed, 0xfa, 0x5e, 0x28, },
12694 ++ { 0x02, },
12695 ++ { 0x52, 0xa8, },
12696 ++ { 0x38, 0x25, 0x0d, },
12697 ++ { 0xe3, 0x04, 0xd4, 0x92, },
12698 ++ { 0x97, 0xdb, 0xf7, 0x81, 0xca, },
12699 ++ { 0x8a, 0x56, 0x9d, 0x62, 0x56, 0xcc, },
12700 ++ { 0xa1, 0x8e, 0x3c, 0x72, 0x8f, 0x63, 0x03, },
12701 ++ { 0xf7, 0xf3, 0x39, 0x09, 0x0a, 0xa1, 0xbb, 0x23, },
12702 ++ { 0x6b, 0x03, 0xc0, 0xe9, 0xd9, 0x83, 0x05, 0x22, 0x01, },
12703 ++ { 0x1b, 0x4b, 0xf5, 0xd6, 0x4f, 0x05, 0x75, 0x91, 0x4c, 0x7f, },
12704 ++ { 0x4c, 0x8c, 0x25, 0x20, 0x21, 0xcb, 0xc2, 0x4b, 0x3a, 0x5b, 0x8d, },
12705 ++ { 0x56, 0xe2, 0x77, 0xa0, 0xb6, 0x9f, 0x81, 0xec, 0x83, 0x75, 0xc4, 0xf9, },
12706 ++ { 0x71, 0x70, 0x0f, 0xad, 0x4d, 0x35, 0x81, 0x9d, 0x88, 0x69, 0xf9, 0xaa,
12707 ++ 0xd3, },
12708 ++ { 0x50, 0x6e, 0x86, 0x6e, 0x43, 0xc0, 0xc2, 0x44, 0xc2, 0xe2, 0xa0, 0x1c,
12709 ++ 0xb7, 0x9a, },
12710 ++ { 0xe4, 0x7e, 0x72, 0xc6, 0x12, 0x8e, 0x7c, 0xfc, 0xbd, 0xe2, 0x08, 0x31,
12711 ++ 0x3d, 0x47, 0x3d, },
12712 ++ { 0x08, 0x97, 0x5b, 0x80, 0xae, 0xc4, 0x1d, 0x50, 0x77, 0xdf, 0x1f, 0xd0,
12713 ++ 0x24, 0xf0, 0x17, 0xc0, },
12714 ++ { 0x01, 0xb6, 0x29, 0xf4, 0xaf, 0x78, 0x5f, 0xb6, 0x91, 0xdd, 0x76, 0x76,
12715 ++ 0xd2, 0xfd, 0x0c, 0x47, 0x40, },
12716 ++ { 0xa1, 0xd8, 0x09, 0x97, 0x7a, 0xa6, 0xc8, 0x94, 0xf6, 0x91, 0x7b, 0xae,
12717 ++ 0x2b, 0x9f, 0x0d, 0x83, 0x48, 0xf7, },
12718 ++ { 0x12, 0xd5, 0x53, 0x7d, 0x9a, 0xb0, 0xbe, 0xd9, 0xed, 0xe9, 0x9e, 0xee,
12719 ++ 0x61, 0x5b, 0x42, 0xf2, 0xc0, 0x73, 0xc0, },
12720 ++ { 0xd5, 0x77, 0xd6, 0x5c, 0x6e, 0xa5, 0x69, 0x2b, 0x3b, 0x8c, 0xd6, 0x7d,
12721 ++ 0x1d, 0xbe, 0x2c, 0xa1, 0x02, 0x21, 0xcd, 0x29, },
12722 ++ { 0xa4, 0x98, 0x80, 0xca, 0x22, 0xcf, 0x6a, 0xab, 0x5e, 0x40, 0x0d, 0x61,
12723 ++ 0x08, 0x21, 0xef, 0xc0, 0x6c, 0x52, 0xb4, 0xb0, 0x53, },
12724 ++ { 0xbf, 0xaf, 0x8f, 0x3b, 0x7a, 0x97, 0x33, 0xe5, 0xca, 0x07, 0x37, 0xfd,
12725 ++ 0x15, 0xdf, 0xce, 0x26, 0x2a, 0xb1, 0xa7, 0x0b, 0xb3, 0xac, },
12726 ++ { 0x16, 0x22, 0xe1, 0xbc, 0x99, 0x4e, 0x01, 0xf0, 0xfa, 0xff, 0x8f, 0xa5,
12727 ++ 0x0c, 0x61, 0xb0, 0xad, 0xcc, 0xb1, 0xe1, 0x21, 0x46, 0xfa, 0x2e, },
12728 ++ { 0x11, 0x5b, 0x0b, 0x2b, 0xe6, 0x14, 0xc1, 0xd5, 0x4d, 0x71, 0x5e, 0x17,
12729 ++ 0xea, 0x23, 0xdd, 0x6c, 0xbd, 0x1d, 0xbe, 0x12, 0x1b, 0xee, 0x4c, 0x1a, },
12730 ++ { 0x40, 0x88, 0x22, 0xf3, 0x20, 0x6c, 0xed, 0xe1, 0x36, 0x34, 0x62, 0x2c,
12731 ++ 0x98, 0x83, 0x52, 0xe2, 0x25, 0xee, 0xe9, 0xf5, 0xe1, 0x17, 0xf0, 0x5c,
12732 ++ 0xae, },
12733 ++ { 0xc3, 0x76, 0x37, 0xde, 0x95, 0x8c, 0xca, 0x2b, 0x0c, 0x23, 0xe7, 0xb5,
12734 ++ 0x38, 0x70, 0x61, 0xcc, 0xff, 0xd3, 0x95, 0x7b, 0xf3, 0xff, 0x1f, 0x9d,
12735 ++ 0x59, 0x00, },
12736 ++ { 0x0c, 0x19, 0x52, 0x05, 0x22, 0x53, 0xcb, 0x48, 0xd7, 0x10, 0x0e, 0x7e,
12737 ++ 0x14, 0x69, 0xb5, 0xa2, 0x92, 0x43, 0xa3, 0x9e, 0x4b, 0x8f, 0x51, 0x2c,
12738 ++ 0x5a, 0x2c, 0x3b, },
12739 ++ { 0xe1, 0x9d, 0x70, 0x70, 0x28, 0xec, 0x86, 0x40, 0x55, 0x33, 0x56, 0xda,
12740 ++ 0x88, 0xca, 0xee, 0xc8, 0x6a, 0x20, 0xb1, 0xe5, 0x3d, 0x57, 0xf8, 0x3c,
12741 ++ 0x10, 0x07, 0x2a, 0xc4, },
12742 ++ { 0x0b, 0xae, 0xf1, 0xc4, 0x79, 0xee, 0x1b, 0x3d, 0x27, 0x35, 0x8d, 0x14,
12743 ++ 0xd6, 0xae, 0x4e, 0x3c, 0xe9, 0x53, 0x50, 0xb5, 0xcc, 0x0c, 0xf7, 0xdf,
12744 ++ 0xee, 0xa1, 0x74, 0xd6, 0x71, },
12745 ++ { 0xe6, 0xa4, 0xf4, 0x99, 0x98, 0xb9, 0x80, 0xea, 0x96, 0x7f, 0x4f, 0x33,
12746 ++ 0xcf, 0x74, 0x25, 0x6f, 0x17, 0x6c, 0xbf, 0xf5, 0x5c, 0x38, 0xd0, 0xff,
12747 ++ 0x96, 0xcb, 0x13, 0xf9, 0xdf, 0xfd, },
12748 ++ { 0xbe, 0x92, 0xeb, 0xba, 0x44, 0x2c, 0x24, 0x74, 0xd4, 0x03, 0x27, 0x3c,
12749 ++ 0x5d, 0x5b, 0x03, 0x30, 0x87, 0x63, 0x69, 0xe0, 0xb8, 0x94, 0xf4, 0x44,
12750 ++ 0x7e, 0xad, 0xcd, 0x20, 0x12, 0x16, 0x79, },
12751 ++ { 0x30, 0xf1, 0xc4, 0x8e, 0x05, 0x90, 0x2a, 0x97, 0x63, 0x94, 0x46, 0xff,
12752 ++ 0xce, 0xd8, 0x67, 0xa7, 0xac, 0x33, 0x8c, 0x95, 0xb7, 0xcd, 0xa3, 0x23,
12753 ++ 0x98, 0x9d, 0x76, 0x6c, 0x9d, 0xa8, 0xd6, 0x8a, },
12754 ++ { 0xbe, },
12755 ++ { 0x17, 0x6c, },
12756 ++ { 0x1a, 0x42, 0x4f, },
12757 ++ { 0xba, 0xaf, 0xb7, 0x65, },
12758 ++ { 0xc2, 0x63, 0x43, 0x6a, 0xea, },
12759 ++ { 0xe4, 0x4d, 0xad, 0xf2, 0x0b, 0x02, },
12760 ++ { 0x04, 0xc7, 0xc4, 0x7f, 0xa9, 0x2b, 0xce, },
12761 ++ { 0x66, 0xf6, 0x67, 0xcb, 0x03, 0x53, 0xc8, 0xf1, },
12762 ++ { 0x56, 0xa3, 0x60, 0x78, 0xc9, 0x5f, 0x70, 0x1b, 0x5e, },
12763 ++ { 0x99, 0xff, 0x81, 0x7c, 0x13, 0x3c, 0x29, 0x79, 0x4b, 0x65, },
12764 ++ { 0x51, 0x10, 0x50, 0x93, 0x01, 0x93, 0xb7, 0x01, 0xc9, 0x18, 0xb7, },
12765 ++ { 0x8e, 0x3c, 0x42, 0x1e, 0x5e, 0x7d, 0xc1, 0x50, 0x70, 0x1f, 0x00, 0x98, },
12766 ++ { 0x5f, 0xd9, 0x9b, 0xc8, 0xd7, 0xb2, 0x72, 0x62, 0x1a, 0x1e, 0xba, 0x92,
12767 ++ 0xe9, },
12768 ++ { 0x70, 0x2b, 0xba, 0xfe, 0xad, 0x5d, 0x96, 0x3f, 0x27, 0xc2, 0x41, 0x6d,
12769 ++ 0xc4, 0xb3, },
12770 ++ { 0xae, 0xe0, 0xd5, 0xd4, 0xc7, 0xae, 0x15, 0x5e, 0xdc, 0xdd, 0x33, 0x60,
12771 ++ 0xd7, 0xd3, 0x5e, },
12772 ++ { 0x79, 0x8e, 0xbc, 0x9e, 0x20, 0xb9, 0x19, 0x4b, 0x63, 0x80, 0xf3, 0x16,
12773 ++ 0xaf, 0x39, 0xbd, 0x92, },
12774 ++ { 0xc2, 0x0e, 0x85, 0xa0, 0x0b, 0x9a, 0xb0, 0xec, 0xde, 0x38, 0xd3, 0x10,
12775 ++ 0xd9, 0xa7, 0x66, 0x27, 0xcf, },
12776 ++ { 0x0e, 0x3b, 0x75, 0x80, 0x67, 0x14, 0x0c, 0x02, 0x90, 0xd6, 0xb3, 0x02,
12777 ++ 0x81, 0xf6, 0xa6, 0x87, 0xce, 0x58, },
12778 ++ { 0x79, 0xb5, 0xe9, 0x5d, 0x52, 0x4d, 0xf7, 0x59, 0xf4, 0x2e, 0x27, 0xdd,
12779 ++ 0xb3, 0xed, 0x57, 0x5b, 0x82, 0xea, 0x6f, },
12780 ++ { 0xa2, 0x97, 0xf5, 0x80, 0x02, 0x3d, 0xde, 0xa3, 0xf9, 0xf6, 0xab, 0xe3,
12781 ++ 0x57, 0x63, 0x7b, 0x9b, 0x10, 0x42, 0x6f, 0xf2, },
12782 ++ { 0x12, 0x7a, 0xfc, 0xb7, 0x67, 0x06, 0x0c, 0x78, 0x1a, 0xfe, 0x88, 0x4f,
12783 ++ 0xc6, 0xac, 0x52, 0x96, 0x64, 0x28, 0x97, 0x84, 0x06, },
12784 ++ { 0xc5, 0x04, 0x44, 0x6b, 0xb2, 0xa5, 0xa4, 0x66, 0xe1, 0x76, 0xa2, 0x51,
12785 ++ 0xf9, 0x59, 0x69, 0x97, 0x56, 0x0b, 0xbf, 0x50, 0xb3, 0x34, },
12786 ++ { 0x21, 0x32, 0x6b, 0x42, 0xb5, 0xed, 0x71, 0x8d, 0xf7, 0x5a, 0x35, 0xe3,
12787 ++ 0x90, 0xe2, 0xee, 0xaa, 0x89, 0xf6, 0xc9, 0x9c, 0x4d, 0x73, 0xf4, },
12788 ++ { 0x4c, 0xa6, 0x09, 0xf4, 0x48, 0xe7, 0x46, 0xbc, 0x49, 0xfc, 0xe5, 0xda,
12789 ++ 0xd1, 0x87, 0x13, 0x17, 0x4c, 0x59, 0x71, 0x26, 0x5b, 0x2c, 0x42, 0xb7, },
12790 ++ { 0x13, 0x63, 0xf3, 0x40, 0x02, 0xe5, 0xa3, 0x3a, 0x5e, 0x8e, 0xf8, 0xb6,
12791 ++ 0x8a, 0x49, 0x60, 0x76, 0x34, 0x72, 0x94, 0x73, 0xf6, 0xd9, 0x21, 0x6a,
12792 ++ 0x26, },
12793 ++ { 0xdf, 0x75, 0x16, 0x10, 0x1b, 0x5e, 0x81, 0xc3, 0xc8, 0xde, 0x34, 0x24,
12794 ++ 0xb0, 0x98, 0xeb, 0x1b, 0x8f, 0xa1, 0x9b, 0x05, 0xee, 0xa5, 0xe9, 0x35,
12795 ++ 0xf4, 0x1d, },
12796 ++ { 0xcd, 0x21, 0x93, 0x6e, 0x5b, 0xa0, 0x26, 0x2b, 0x21, 0x0e, 0xa0, 0xb9,
12797 ++ 0x1c, 0xb5, 0xbb, 0xb8, 0xf8, 0x1e, 0xff, 0x5c, 0xa8, 0xf9, 0x39, 0x46,
12798 ++ 0x4e, 0x29, 0x26, },
12799 ++ { 0x73, 0x7f, 0x0e, 0x3b, 0x0b, 0x5c, 0xf9, 0x60, 0xaa, 0x88, 0xa1, 0x09,
12800 ++ 0xb1, 0x5d, 0x38, 0x7b, 0x86, 0x8f, 0x13, 0x7a, 0x8d, 0x72, 0x7a, 0x98,
12801 ++ 0x1a, 0x5b, 0xff, 0xc9, },
12802 ++ { 0xd3, 0x3c, 0x61, 0x71, 0x44, 0x7e, 0x31, 0x74, 0x98, 0x9d, 0x9a, 0xd2,
12803 ++ 0x27, 0xf3, 0x46, 0x43, 0x42, 0x51, 0xd0, 0x5f, 0xe9, 0x1c, 0x5c, 0x69,
12804 ++ 0xbf, 0xf6, 0xbe, 0x3c, 0x40, },
12805 ++ { 0x31, 0x99, 0x31, 0x9f, 0xaa, 0x43, 0x2e, 0x77, 0x3e, 0x74, 0x26, 0x31,
12806 ++ 0x5e, 0x61, 0xf1, 0x87, 0xe2, 0xeb, 0x9b, 0xcd, 0xd0, 0x3a, 0xee, 0x20,
12807 ++ 0x7e, 0x10, 0x0a, 0x0b, 0x7e, 0xfa, },
12808 ++ { 0xa4, 0x27, 0x80, 0x67, 0x81, 0x2a, 0xa7, 0x62, 0xf7, 0x6e, 0xda, 0xd4,
12809 ++ 0x5c, 0x39, 0x74, 0xad, 0x7e, 0xbe, 0xad, 0xa5, 0x84, 0x7f, 0xa9, 0x30,
12810 ++ 0x5d, 0xdb, 0xe2, 0x05, 0x43, 0xf7, 0x1b, },
12811 ++ { 0x0b, 0x37, 0xd8, 0x02, 0xe1, 0x83, 0xd6, 0x80, 0xf2, 0x35, 0xc2, 0xb0,
12812 ++ 0x37, 0xef, 0xef, 0x5e, 0x43, 0x93, 0xf0, 0x49, 0x45, 0x0a, 0xef, 0xb5,
12813 ++ 0x76, 0x70, 0x12, 0x44, 0xc4, 0xdb, 0xf5, 0x7a, },
12814 ++ { 0x1f, },
12815 ++ { 0x82, 0x60, },
12816 ++ { 0xcc, 0xe3, 0x08, },
12817 ++ { 0x56, 0x17, 0xe4, 0x59, },
12818 ++ { 0xe2, 0xd7, 0x9e, 0xc4, 0x4c, },
12819 ++ { 0xb2, 0xad, 0xd3, 0x78, 0x58, 0x5a, },
12820 ++ { 0xce, 0x43, 0xb4, 0x02, 0x96, 0xab, 0x3c, },
12821 ++ { 0xe6, 0x05, 0x1a, 0x73, 0x22, 0x32, 0xbb, 0x77, },
12822 ++ { 0x23, 0xe7, 0xda, 0xfe, 0x2c, 0xef, 0x8c, 0x22, 0xec, },
12823 ++ { 0xe9, 0x8e, 0x55, 0x38, 0xd1, 0xd7, 0x35, 0x23, 0x98, 0xc7, },
12824 ++ { 0xb5, 0x81, 0x1a, 0xe5, 0xb5, 0xa5, 0xd9, 0x4d, 0xca, 0x41, 0xe7, },
12825 ++ { 0x41, 0x16, 0x16, 0x95, 0x8d, 0x9e, 0x0c, 0xea, 0x8c, 0x71, 0x9a, 0xc1, },
12826 ++ { 0x7c, 0x33, 0xc0, 0xa4, 0x00, 0x62, 0xea, 0x60, 0x67, 0xe4, 0x20, 0xbc,
12827 ++ 0x5b, },
12828 ++ { 0xdb, 0xb1, 0xdc, 0xfd, 0x08, 0xc0, 0xde, 0x82, 0xd1, 0xde, 0x38, 0xc0,
12829 ++ 0x90, 0x48, },
12830 ++ { 0x37, 0x18, 0x2e, 0x0d, 0x61, 0xaa, 0x61, 0xd7, 0x86, 0x20, 0x16, 0x60,
12831 ++ 0x04, 0xd9, 0xd5, },
12832 ++ { 0xb0, 0xcf, 0x2c, 0x4c, 0x5e, 0x5b, 0x4f, 0x2a, 0x23, 0x25, 0x58, 0x47,
12833 ++ 0xe5, 0x31, 0x06, 0x70, },
12834 ++ { 0x91, 0xa0, 0xa3, 0x86, 0x4e, 0xe0, 0x72, 0x38, 0x06, 0x67, 0x59, 0x5c,
12835 ++ 0x70, 0x25, 0xdb, 0x33, 0x27, },
12836 ++ { 0x44, 0x58, 0x66, 0xb8, 0x58, 0xc7, 0x13, 0xed, 0x4c, 0xc0, 0xf4, 0x9a,
12837 ++ 0x1e, 0x67, 0x75, 0x33, 0xb6, 0xb8, },
12838 ++ { 0x7f, 0x98, 0x4a, 0x8e, 0x50, 0xa2, 0x5c, 0xcd, 0x59, 0xde, 0x72, 0xb3,
12839 ++ 0x9d, 0xc3, 0x09, 0x8a, 0xab, 0x56, 0xf1, },
12840 ++ { 0x80, 0x96, 0x49, 0x1a, 0x59, 0xa2, 0xc5, 0xd5, 0xa7, 0x20, 0x8a, 0xb7,
12841 ++ 0x27, 0x62, 0x84, 0x43, 0xc6, 0xe1, 0x1b, 0x5d, },
12842 ++ { 0x6b, 0xb7, 0x2b, 0x26, 0x62, 0x14, 0x70, 0x19, 0x3d, 0x4d, 0xac, 0xac,
12843 ++ 0x63, 0x58, 0x5e, 0x94, 0xb5, 0xb7, 0xe8, 0xe8, 0xa2, },
12844 ++ { 0x20, 0xa8, 0xc0, 0xfd, 0x63, 0x3d, 0x6e, 0x98, 0xcf, 0x0c, 0x49, 0x98,
12845 ++ 0xe4, 0x5a, 0xfe, 0x8c, 0xaa, 0x70, 0x82, 0x1c, 0x7b, 0x74, },
12846 ++ { 0xc8, 0xe8, 0xdd, 0xdf, 0x69, 0x30, 0x01, 0xc2, 0x0f, 0x7e, 0x2f, 0x11,
12847 ++ 0xcc, 0x3e, 0x17, 0xa5, 0x69, 0x40, 0x3f, 0x0e, 0x79, 0x7f, 0xcf, },
12848 ++ { 0xdb, 0x61, 0xc0, 0xe2, 0x2e, 0x49, 0x07, 0x31, 0x1d, 0x91, 0x42, 0x8a,
12849 ++ 0xfc, 0x5e, 0xd3, 0xf8, 0x56, 0x1f, 0x2b, 0x73, 0xfd, 0x9f, 0xb2, 0x8e, },
12850 ++ { 0x0c, 0x89, 0x55, 0x0c, 0x1f, 0x59, 0x2c, 0x9d, 0x1b, 0x29, 0x1d, 0x41,
12851 ++ 0x1d, 0xe6, 0x47, 0x8f, 0x8c, 0x2b, 0xea, 0x8f, 0xf0, 0xff, 0x21, 0x70,
12852 ++ 0x88, },
12853 ++ { 0x12, 0x18, 0x95, 0xa6, 0x59, 0xb1, 0x31, 0x24, 0x45, 0x67, 0x55, 0xa4,
12854 ++ 0x1a, 0x2d, 0x48, 0x67, 0x1b, 0x43, 0x88, 0x2d, 0x8e, 0xa0, 0x70, 0xb3,
12855 ++ 0xc6, 0xbb, },
12856 ++ { 0xe7, 0xb1, 0x1d, 0xb2, 0x76, 0x4d, 0x68, 0x68, 0x68, 0x23, 0x02, 0x55,
12857 ++ 0x3a, 0xe2, 0xe5, 0xd5, 0x4b, 0x43, 0xf9, 0x34, 0x77, 0x5c, 0xa1, 0xf5,
12858 ++ 0x55, 0xfd, 0x4f, },
12859 ++ { 0x8c, 0x87, 0x5a, 0x08, 0x3a, 0x73, 0xad, 0x61, 0xe1, 0xe7, 0x99, 0x7e,
12860 ++ 0xf0, 0x5d, 0xe9, 0x5d, 0x16, 0x43, 0x80, 0x2f, 0xd0, 0x66, 0x34, 0xe2,
12861 ++ 0x42, 0x64, 0x3b, 0x1a, },
12862 ++ { 0x39, 0xc1, 0x99, 0xcf, 0x22, 0xbf, 0x16, 0x8f, 0x9f, 0x80, 0x7f, 0x95,
12863 ++ 0x0a, 0x05, 0x67, 0x27, 0xe7, 0x15, 0xdf, 0x9d, 0xb2, 0xfe, 0x1c, 0xb5,
12864 ++ 0x1d, 0x60, 0x8f, 0x8a, 0x1d, },
12865 ++ { 0x9b, 0x6e, 0x08, 0x09, 0x06, 0x73, 0xab, 0x68, 0x02, 0x62, 0x1a, 0xe4,
12866 ++ 0xd4, 0xdf, 0xc7, 0x02, 0x4c, 0x6a, 0x5f, 0xfd, 0x23, 0xac, 0xae, 0x6d,
12867 ++ 0x43, 0xa4, 0x7a, 0x50, 0x60, 0x3c, },
12868 ++ { 0x1d, 0xb4, 0xc6, 0xe1, 0xb1, 0x4b, 0xe3, 0xf2, 0xe2, 0x1a, 0x73, 0x1b,
12869 ++ 0xa0, 0x92, 0xa7, 0xf5, 0xff, 0x8f, 0x8b, 0x5d, 0xdf, 0xa8, 0x04, 0xb3,
12870 ++ 0xb0, 0xf7, 0xcc, 0x12, 0xfa, 0x35, 0x46, },
12871 ++ { 0x49, 0x45, 0x97, 0x11, 0x0f, 0x1c, 0x60, 0x8e, 0xe8, 0x47, 0x30, 0xcf,
12872 ++ 0x60, 0xa8, 0x71, 0xc5, 0x1b, 0xe9, 0x39, 0x4d, 0x49, 0xb6, 0x12, 0x1f,
12873 ++ 0x24, 0xab, 0x37, 0xff, 0x83, 0xc2, 0xe1, 0x3a, },
12874 ++ { 0x60, },
12875 ++ { 0x24, 0x26, },
12876 ++ { 0x47, 0xeb, 0xc9, },
12877 ++ { 0x4a, 0xd0, 0xbc, 0xf0, },
12878 ++ { 0x8e, 0x2b, 0xc9, 0x85, 0x3c, },
12879 ++ { 0xa2, 0x07, 0x15, 0xb8, 0x12, 0x74, },
12880 ++ { 0x0f, 0xdb, 0x5b, 0x33, 0x69, 0xfe, 0x4b, },
12881 ++ { 0xa2, 0x86, 0x54, 0xf4, 0xfd, 0xb2, 0xd4, 0xe6, },
12882 ++ { 0xbb, 0x84, 0x78, 0x49, 0x27, 0x8e, 0x61, 0xda, 0x60, },
12883 ++ { 0x04, 0xc3, 0xcd, 0xaa, 0x8f, 0xa7, 0x03, 0xc9, 0xf9, 0xb6, },
12884 ++ { 0xf8, 0x27, 0x1d, 0x61, 0xdc, 0x21, 0x42, 0xdd, 0xad, 0x92, 0x40, },
12885 ++ { 0x12, 0x87, 0xdf, 0xc2, 0x41, 0x45, 0x5a, 0x36, 0x48, 0x5b, 0x51, 0x2b, },
12886 ++ { 0xbb, 0x37, 0x5d, 0x1f, 0xf1, 0x68, 0x7a, 0xc4, 0xa5, 0xd2, 0xa4, 0x91,
12887 ++ 0x8d, },
12888 ++ { 0x5b, 0x27, 0xd1, 0x04, 0x54, 0x52, 0x9f, 0xa3, 0x47, 0x86, 0x33, 0x33,
12889 ++ 0xbf, 0xa0, },
12890 ++ { 0xcf, 0x04, 0xea, 0xf8, 0x03, 0x2a, 0x43, 0xff, 0xa6, 0x68, 0x21, 0x4c,
12891 ++ 0xd5, 0x4b, 0xed, },
12892 ++ { 0xaf, 0xb8, 0xbc, 0x63, 0x0f, 0x18, 0x4d, 0xe2, 0x7a, 0xdd, 0x46, 0x44,
12893 ++ 0xc8, 0x24, 0x0a, 0xb7, },
12894 ++ { 0x3e, 0xdc, 0x36, 0xe4, 0x89, 0xb1, 0xfa, 0xc6, 0x40, 0x93, 0x2e, 0x75,
12895 ++ 0xb2, 0x15, 0xd1, 0xb1, 0x10, },
12896 ++ { 0x6c, 0xd8, 0x20, 0x3b, 0x82, 0x79, 0xf9, 0xc8, 0xbc, 0x9d, 0xe0, 0x35,
12897 ++ 0xbe, 0x1b, 0x49, 0x1a, 0xbc, 0x3a, },
12898 ++ { 0x78, 0x65, 0x2c, 0xbe, 0x35, 0x67, 0xdc, 0x78, 0xd4, 0x41, 0xf6, 0xc9,
12899 ++ 0xde, 0xde, 0x1f, 0x18, 0x13, 0x31, 0x11, },
12900 ++ { 0x8a, 0x7f, 0xb1, 0x33, 0x8f, 0x0c, 0x3c, 0x0a, 0x06, 0x61, 0xf0, 0x47,
12901 ++ 0x29, 0x1b, 0x29, 0xbc, 0x1c, 0x47, 0xef, 0x7a, },
12902 ++ { 0x65, 0x91, 0xf1, 0xe6, 0xb3, 0x96, 0xd3, 0x8c, 0xc2, 0x4a, 0x59, 0x35,
12903 ++ 0x72, 0x8e, 0x0b, 0x9a, 0x87, 0xca, 0x34, 0x7b, 0x63, },
12904 ++ { 0x5f, 0x08, 0x87, 0x80, 0x56, 0x25, 0x89, 0x77, 0x61, 0x8c, 0x64, 0xa1,
12905 ++ 0x59, 0x6d, 0x59, 0x62, 0xe8, 0x4a, 0xc8, 0x58, 0x99, 0xd1, },
12906 ++ { 0x23, 0x87, 0x1d, 0xed, 0x6f, 0xf2, 0x91, 0x90, 0xe2, 0xfe, 0x43, 0x21,
12907 ++ 0xaf, 0x97, 0xc6, 0xbc, 0xd7, 0x15, 0xc7, 0x2d, 0x08, 0x77, 0x91, },
12908 ++ { 0x90, 0x47, 0x9a, 0x9e, 0x3a, 0xdf, 0xf3, 0xc9, 0x4c, 0x1e, 0xa7, 0xd4,
12909 ++ 0x6a, 0x32, 0x90, 0xfe, 0xb7, 0xb6, 0x7b, 0xfa, 0x96, 0x61, 0xfb, 0xa4, },
12910 ++ { 0xb1, 0x67, 0x60, 0x45, 0xb0, 0x96, 0xc5, 0x15, 0x9f, 0x4d, 0x26, 0xd7,
12911 ++ 0x9d, 0xf1, 0xf5, 0x6d, 0x21, 0x00, 0x94, 0x31, 0x64, 0x94, 0xd3, 0xa7,
12912 ++ 0xd3, },
12913 ++ { 0x02, 0x3e, 0xaf, 0xf3, 0x79, 0x73, 0xa5, 0xf5, 0xcc, 0x7a, 0x7f, 0xfb,
12914 ++ 0x79, 0x2b, 0x85, 0x8c, 0x88, 0x72, 0x06, 0xbe, 0xfe, 0xaf, 0xc1, 0x16,
12915 ++ 0xa6, 0xd6, },
12916 ++ { 0x2a, 0xb0, 0x1a, 0xe5, 0xaa, 0x6e, 0xb3, 0xae, 0x53, 0x85, 0x33, 0x80,
12917 ++ 0x75, 0xae, 0x30, 0xe6, 0xb8, 0x72, 0x42, 0xf6, 0x25, 0x4f, 0x38, 0x88,
12918 ++ 0x55, 0xd1, 0xa9, },
12919 ++ { 0x90, 0xd8, 0x0c, 0xc0, 0x93, 0x4b, 0x4f, 0x9e, 0x65, 0x6c, 0xa1, 0x54,
12920 ++ 0xa6, 0xf6, 0x6e, 0xca, 0xd2, 0xbb, 0x7e, 0x6a, 0x1c, 0xd3, 0xce, 0x46,
12921 ++ 0xef, 0xb0, 0x00, 0x8d, },
12922 ++ { 0xed, 0x9c, 0x49, 0xcd, 0xc2, 0xde, 0x38, 0x0e, 0xe9, 0x98, 0x6c, 0xc8,
12923 ++ 0x90, 0x9e, 0x3c, 0xd4, 0xd3, 0xeb, 0x88, 0x32, 0xc7, 0x28, 0xe3, 0x94,
12924 ++ 0x1c, 0x9f, 0x8b, 0xf3, 0xcb, },
12925 ++ { 0xac, 0xe7, 0x92, 0x16, 0xb4, 0x14, 0xa0, 0xe4, 0x04, 0x79, 0xa2, 0xf4,
12926 ++ 0x31, 0xe6, 0x0c, 0x26, 0xdc, 0xbf, 0x2f, 0x69, 0x1b, 0x55, 0x94, 0x67,
12927 ++ 0xda, 0x0c, 0xd7, 0x32, 0x1f, 0xef, },
12928 ++ { 0x68, 0x63, 0x85, 0x57, 0x95, 0x9e, 0x42, 0x27, 0x41, 0x43, 0x42, 0x02,
12929 ++ 0xa5, 0x78, 0xa7, 0xc6, 0x43, 0xc1, 0x6a, 0xba, 0x70, 0x80, 0xcd, 0x04,
12930 ++ 0xb6, 0x78, 0x76, 0x29, 0xf3, 0xe8, 0xa0, },
12931 ++ { 0xe6, 0xac, 0x8d, 0x9d, 0xf0, 0xc0, 0xf7, 0xf7, 0xe3, 0x3e, 0x4e, 0x28,
12932 ++ 0x0f, 0x59, 0xb2, 0x67, 0x9e, 0x84, 0x34, 0x42, 0x96, 0x30, 0x2b, 0xca,
12933 ++ 0x49, 0xb6, 0xc5, 0x9a, 0x84, 0x59, 0xa7, 0x81, },
12934 ++ { 0x7e, },
12935 ++ { 0x1e, 0x21, },
12936 ++ { 0x26, 0xd3, 0xdd, },
12937 ++ { 0x2c, 0xd4, 0xb3, 0x3d, },
12938 ++ { 0x86, 0x7b, 0x76, 0x3c, 0xf0, },
12939 ++ { 0x12, 0xc3, 0x70, 0x1d, 0x55, 0x18, },
12940 ++ { 0x96, 0xc2, 0xbd, 0x61, 0x55, 0xf4, 0x24, },
12941 ++ { 0x20, 0x51, 0xf7, 0x86, 0x58, 0x8f, 0x07, 0x2a, },
12942 ++ { 0x93, 0x15, 0xa8, 0x1d, 0xda, 0x97, 0xee, 0x0e, 0x6c, },
12943 ++ { 0x39, 0x93, 0xdf, 0xd5, 0x0e, 0xca, 0xdc, 0x7a, 0x92, 0xce, },
12944 ++ { 0x60, 0xd5, 0xfd, 0xf5, 0x1b, 0x26, 0x82, 0x26, 0x73, 0x02, 0xbc, },
12945 ++ { 0x98, 0xf2, 0x34, 0xe1, 0xf5, 0xfb, 0x00, 0xac, 0x10, 0x4a, 0x38, 0x9f, },
12946 ++ { 0xda, 0x3a, 0x92, 0x8a, 0xd0, 0xcd, 0x12, 0xcd, 0x15, 0xbb, 0xab, 0x77,
12947 ++ 0x66, },
12948 ++ { 0xa2, 0x92, 0x1a, 0xe5, 0xca, 0x0c, 0x30, 0x75, 0xeb, 0xaf, 0x00, 0x31,
12949 ++ 0x55, 0x66, },
12950 ++ { 0x06, 0xea, 0xfd, 0x3e, 0x86, 0x38, 0x62, 0x4e, 0xa9, 0x12, 0xa4, 0x12,
12951 ++ 0x43, 0xbf, 0xa1, },
12952 ++ { 0xe4, 0x71, 0x7b, 0x94, 0xdb, 0xa0, 0xd2, 0xff, 0x9b, 0xeb, 0xad, 0x8e,
12953 ++ 0x95, 0x8a, 0xc5, 0xed, },
12954 ++ { 0x25, 0x5a, 0x77, 0x71, 0x41, 0x0e, 0x7a, 0xe9, 0xed, 0x0c, 0x10, 0xef,
12955 ++ 0xf6, 0x2b, 0x3a, 0xba, 0x60, },
12956 ++ { 0xee, 0xe2, 0xa3, 0x67, 0x64, 0x1d, 0xc6, 0x04, 0xc4, 0xe1, 0x68, 0xd2,
12957 ++ 0x6e, 0xd2, 0x91, 0x75, 0x53, 0x07, },
12958 ++ { 0xe0, 0xf6, 0x4d, 0x8f, 0x68, 0xfc, 0x06, 0x7e, 0x18, 0x79, 0x7f, 0x2b,
12959 ++ 0x6d, 0xef, 0x46, 0x7f, 0xab, 0xb2, 0xad, },
12960 ++ { 0x3d, 0x35, 0x88, 0x9f, 0x2e, 0xcf, 0x96, 0x45, 0x07, 0x60, 0x71, 0x94,
12961 ++ 0x00, 0x8d, 0xbf, 0xf4, 0xef, 0x46, 0x2e, 0x3c, },
12962 ++ { 0x43, 0xcf, 0x98, 0xf7, 0x2d, 0xf4, 0x17, 0xe7, 0x8c, 0x05, 0x2d, 0x9b,
12963 ++ 0x24, 0xfb, 0x4d, 0xea, 0x4a, 0xec, 0x01, 0x25, 0x29, },
12964 ++ { 0x8e, 0x73, 0x9a, 0x78, 0x11, 0xfe, 0x48, 0xa0, 0x3b, 0x1a, 0x26, 0xdf,
12965 ++ 0x25, 0xe9, 0x59, 0x1c, 0x70, 0x07, 0x9f, 0xdc, 0xa0, 0xa6, },
12966 ++ { 0xe8, 0x47, 0x71, 0xc7, 0x3e, 0xdf, 0xb5, 0x13, 0xb9, 0x85, 0x13, 0xa8,
12967 ++ 0x54, 0x47, 0x6e, 0x59, 0x96, 0x09, 0x13, 0x5f, 0x82, 0x16, 0x0b, },
12968 ++ { 0xfb, 0xc0, 0x8c, 0x03, 0x21, 0xb3, 0xc4, 0xb5, 0x43, 0x32, 0x6c, 0xea,
12969 ++ 0x7f, 0xa8, 0x43, 0x91, 0xe8, 0x4e, 0x3f, 0xbf, 0x45, 0x58, 0x6a, 0xa3, },
12970 ++ { 0x55, 0xf8, 0xf3, 0x00, 0x76, 0x09, 0xef, 0x69, 0x5d, 0xd2, 0x8a, 0xf2,
12971 ++ 0x65, 0xc3, 0xcb, 0x9b, 0x43, 0xfd, 0xb1, 0x7e, 0x7f, 0xa1, 0x94, 0xb0,
12972 ++ 0xd7, },
12973 ++ { 0xaa, 0x13, 0xc1, 0x51, 0x40, 0x6d, 0x8d, 0x4c, 0x0a, 0x95, 0x64, 0x7b,
12974 ++ 0xd1, 0x96, 0xb6, 0x56, 0xb4, 0x5b, 0xcf, 0xd6, 0xd9, 0x15, 0x97, 0xdd,
12975 ++ 0xb6, 0xef, },
12976 ++ { 0xaf, 0xb7, 0x36, 0xb0, 0x04, 0xdb, 0xd7, 0x9c, 0x9a, 0x44, 0xc4, 0xf6,
12977 ++ 0x1f, 0x12, 0x21, 0x2d, 0x59, 0x30, 0x54, 0xab, 0x27, 0x61, 0xa3, 0x57,
12978 ++ 0xef, 0xf8, 0x53, },
12979 ++ { 0x97, 0x34, 0x45, 0x3e, 0xce, 0x7c, 0x35, 0xa2, 0xda, 0x9f, 0x4b, 0x46,
12980 ++ 0x6c, 0x11, 0x67, 0xff, 0x2f, 0x76, 0x58, 0x15, 0x71, 0xfa, 0x44, 0x89,
12981 ++ 0x89, 0xfd, 0xf7, 0x99, },
12982 ++ { 0x1f, 0xb1, 0x62, 0xeb, 0x83, 0xc5, 0x9c, 0x89, 0xf9, 0x2c, 0xd2, 0x03,
12983 ++ 0x61, 0xbc, 0xbb, 0xa5, 0x74, 0x0e, 0x9b, 0x7e, 0x82, 0x3e, 0x70, 0x0a,
12984 ++ 0xa9, 0x8f, 0x2b, 0x59, 0xfb, },
12985 ++ { 0xf8, 0xca, 0x5e, 0x3a, 0x4f, 0x9e, 0x10, 0x69, 0x10, 0xd5, 0x4c, 0xeb,
12986 ++ 0x1a, 0x0f, 0x3c, 0x6a, 0x98, 0xf5, 0xb0, 0x97, 0x5b, 0x37, 0x2f, 0x0d,
12987 ++ 0xbd, 0x42, 0x4b, 0x69, 0xa1, 0x82, },
12988 ++ { 0x12, 0x8c, 0x6d, 0x52, 0x08, 0xef, 0x74, 0xb2, 0xe6, 0xaa, 0xd3, 0xb0,
12989 ++ 0x26, 0xb0, 0xd9, 0x94, 0xb6, 0x11, 0x45, 0x0e, 0x36, 0x71, 0x14, 0x2d,
12990 ++ 0x41, 0x8c, 0x21, 0x53, 0x31, 0xe9, 0x68, },
12991 ++ { 0xee, 0xea, 0x0d, 0x89, 0x47, 0x7e, 0x72, 0xd1, 0xd8, 0xce, 0x58, 0x4c,
12992 ++ 0x94, 0x1f, 0x0d, 0x51, 0x08, 0xa3, 0xb6, 0x3d, 0xe7, 0x82, 0x46, 0x92,
12993 ++ 0xd6, 0x98, 0x6b, 0x07, 0x10, 0x65, 0x52, 0x65, },
12994 ++};
12995 ++
12996 ++static const u8 blake2s_hmac_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {
12997 ++ { 0xce, 0xe1, 0x57, 0x69, 0x82, 0xdc, 0xbf, 0x43, 0xad, 0x56, 0x4c, 0x70,
12998 ++ 0xed, 0x68, 0x16, 0x96, 0xcf, 0xa4, 0x73, 0xe8, 0xe8, 0xfc, 0x32, 0x79,
12999 ++ 0x08, 0x0a, 0x75, 0x82, 0xda, 0x3f, 0x05, 0x11, },
13000 ++ { 0x77, 0x2f, 0x0c, 0x71, 0x41, 0xf4, 0x4b, 0x2b, 0xb3, 0xc6, 0xb6, 0xf9,
13001 ++ 0x60, 0xde, 0xe4, 0x52, 0x38, 0x66, 0xe8, 0xbf, 0x9b, 0x96, 0xc4, 0x9f,
13002 ++ 0x60, 0xd9, 0x24, 0x37, 0x99, 0xd6, 0xec, 0x31, },
13003 ++};
13004 ++
13005 ++bool __init blake2s_selftest(void)
13006 ++{
13007 ++ u8 key[BLAKE2S_KEY_SIZE];
13008 ++ u8 buf[ARRAY_SIZE(blake2s_testvecs)];
13009 ++ u8 hash[BLAKE2S_HASH_SIZE];
13010 ++ struct blake2s_state state;
13011 ++ bool success = true;
13012 ++ int i, l;
13013 ++
13014 ++ key[0] = key[1] = 1;
13015 ++ for (i = 2; i < sizeof(key); ++i)
13016 ++ key[i] = key[i - 2] + key[i - 1];
13017 ++
13018 ++ for (i = 0; i < sizeof(buf); ++i)
13019 ++ buf[i] = (u8)i;
13020 ++
13021 ++ for (i = l = 0; i < ARRAY_SIZE(blake2s_testvecs); l = (l + 37) % ++i) {
13022 ++ int outlen = 1 + i % BLAKE2S_HASH_SIZE;
13023 ++ int keylen = (13 * i) % (BLAKE2S_KEY_SIZE + 1);
13024 ++
13025 ++ blake2s(hash, buf, key + BLAKE2S_KEY_SIZE - keylen, outlen, i,
13026 ++ keylen);
13027 ++ if (memcmp(hash, blake2s_testvecs[i], outlen)) {
13028 ++ pr_err("blake2s self-test %d: FAIL\n", i + 1);
13029 ++ success = false;
13030 ++ }
13031 ++
13032 ++ if (!keylen)
13033 ++ blake2s_init(&state, outlen);
13034 ++ else
13035 ++ blake2s_init_key(&state, outlen,
13036 ++ key + BLAKE2S_KEY_SIZE - keylen,
13037 ++ keylen);
13038 ++
13039 ++ blake2s_update(&state, buf, l);
13040 ++ blake2s_update(&state, buf + l, i - l);
13041 ++ blake2s_final(&state, hash);
13042 ++ if (memcmp(hash, blake2s_testvecs[i], outlen)) {
13043 ++ pr_err("blake2s init/update/final self-test %d: FAIL\n",
13044 ++ i + 1);
13045 ++ success = false;
13046 ++ }
13047 ++ }
13048 ++
13049 ++ if (success) {
13050 ++ blake2s256_hmac(hash, buf, key, sizeof(buf), sizeof(key));
13051 ++ success &= !memcmp(hash, blake2s_hmac_testvecs[0], BLAKE2S_HASH_SIZE);
13052 ++
13053 ++ blake2s256_hmac(hash, key, buf, sizeof(key), sizeof(buf));
13054 ++ success &= !memcmp(hash, blake2s_hmac_testvecs[1], BLAKE2S_HASH_SIZE);
13055 ++
13056 ++ if (!success)
13057 ++ pr_err("blake2s256_hmac self-test: FAIL\n");
13058 ++ }
13059 ++
13060 ++ return success;
13061 ++}
13062 +diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c
13063 +new file mode 100644
13064 +index 000000000000..41025a30c524
13065 +--- /dev/null
13066 ++++ b/lib/crypto/blake2s.c
13067 +@@ -0,0 +1,126 @@
13068 ++// SPDX-License-Identifier: GPL-2.0 OR MIT
13069 ++/*
13070 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
13071 ++ *
13072 ++ * This is an implementation of the BLAKE2s hash and PRF functions.
13073 ++ *
13074 ++ * Information: https://blake2.net/
13075 ++ *
13076 ++ */
13077 ++
13078 ++#include <crypto/internal/blake2s.h>
13079 ++#include <linux/types.h>
13080 ++#include <linux/string.h>
13081 ++#include <linux/kernel.h>
13082 ++#include <linux/module.h>
13083 ++#include <linux/init.h>
13084 ++#include <linux/bug.h>
13085 ++#include <asm/unaligned.h>
13086 ++
13087 ++bool blake2s_selftest(void);
13088 ++
13089 ++void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen)
13090 ++{
13091 ++ const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
13092 ++
13093 ++ if (unlikely(!inlen))
13094 ++ return;
13095 ++ if (inlen > fill) {
13096 ++ memcpy(state->buf + state->buflen, in, fill);
13097 ++ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S))
13098 ++ blake2s_compress_arch(state, state->buf, 1,
13099 ++ BLAKE2S_BLOCK_SIZE);
13100 ++ else
13101 ++ blake2s_compress_generic(state, state->buf, 1,
13102 ++ BLAKE2S_BLOCK_SIZE);
13103 ++ state->buflen = 0;
13104 ++ in += fill;
13105 ++ inlen -= fill;
13106 ++ }
13107 ++ if (inlen > BLAKE2S_BLOCK_SIZE) {
13108 ++ const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
13109 ++ /* Hash one less (full) block than strictly possible */
13110 ++ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S))
13111 ++ blake2s_compress_arch(state, in, nblocks - 1,
13112 ++ BLAKE2S_BLOCK_SIZE);
13113 ++ else
13114 ++ blake2s_compress_generic(state, in, nblocks - 1,
13115 ++ BLAKE2S_BLOCK_SIZE);
13116 ++ in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
13117 ++ inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
13118 ++ }
13119 ++ memcpy(state->buf + state->buflen, in, inlen);
13120 ++ state->buflen += inlen;
13121 ++}
13122 ++EXPORT_SYMBOL(blake2s_update);
13123 ++
13124 ++void blake2s_final(struct blake2s_state *state, u8 *out)
13125 ++{
13126 ++ WARN_ON(IS_ENABLED(DEBUG) && !out);
13127 ++ blake2s_set_lastblock(state);
13128 ++ memset(state->buf + state->buflen, 0,
13129 ++ BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
13130 ++ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S))
13131 ++ blake2s_compress_arch(state, state->buf, 1, state->buflen);
13132 ++ else
13133 ++ blake2s_compress_generic(state, state->buf, 1, state->buflen);
13134 ++ cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
13135 ++ memcpy(out, state->h, state->outlen);
13136 ++ memzero_explicit(state, sizeof(*state));
13137 ++}
13138 ++EXPORT_SYMBOL(blake2s_final);
13139 ++
13140 ++void blake2s256_hmac(u8 *out, const u8 *in, const u8 *key, const size_t inlen,
13141 ++ const size_t keylen)
13142 ++{
13143 ++ struct blake2s_state state;
13144 ++ u8 x_key[BLAKE2S_BLOCK_SIZE] __aligned(__alignof__(u32)) = { 0 };
13145 ++ u8 i_hash[BLAKE2S_HASH_SIZE] __aligned(__alignof__(u32));
13146 ++ int i;
13147 ++
13148 ++ if (keylen > BLAKE2S_BLOCK_SIZE) {
13149 ++ blake2s_init(&state, BLAKE2S_HASH_SIZE);
13150 ++ blake2s_update(&state, key, keylen);
13151 ++ blake2s_final(&state, x_key);
13152 ++ } else
13153 ++ memcpy(x_key, key, keylen);
13154 ++
13155 ++ for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
13156 ++ x_key[i] ^= 0x36;
13157 ++
13158 ++ blake2s_init(&state, BLAKE2S_HASH_SIZE);
13159 ++ blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
13160 ++ blake2s_update(&state, in, inlen);
13161 ++ blake2s_final(&state, i_hash);
13162 ++
13163 ++ for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
13164 ++ x_key[i] ^= 0x5c ^ 0x36;
13165 ++
13166 ++ blake2s_init(&state, BLAKE2S_HASH_SIZE);
13167 ++ blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
13168 ++ blake2s_update(&state, i_hash, BLAKE2S_HASH_SIZE);
13169 ++ blake2s_final(&state, i_hash);
13170 ++
13171 ++ memcpy(out, i_hash, BLAKE2S_HASH_SIZE);
13172 ++ memzero_explicit(x_key, BLAKE2S_BLOCK_SIZE);
13173 ++ memzero_explicit(i_hash, BLAKE2S_HASH_SIZE);
13174 ++}
13175 ++EXPORT_SYMBOL(blake2s256_hmac);
13176 ++
13177 ++static int __init mod_init(void)
13178 ++{
13179 ++ if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
13180 ++ WARN_ON(!blake2s_selftest()))
13181 ++ return -ENODEV;
13182 ++ return 0;
13183 ++}
13184 ++
13185 ++static void __exit mod_exit(void)
13186 ++{
13187 ++}
13188 ++
13189 ++module_init(mod_init);
13190 ++module_exit(mod_exit);
13191 ++MODULE_LICENSE("GPL v2");
13192 ++MODULE_DESCRIPTION("BLAKE2s hash function");
13193 ++MODULE_AUTHOR("Jason A. Donenfeld <Jason@×××××.com>");
13194 +--
13195 +cgit v1.2.3-4-ga26e
13196 +
13197 +
13198 +From fd696e96e5f8f313e0cc68b3ec2d21642c553c50 Mon Sep 17 00:00:00 2001
13199 +From: Ard Biesheuvel <ardb@××××××.org>
13200 +Date: Fri, 8 Nov 2019 13:22:29 +0100
13201 +Subject: crypto: testmgr - add test cases for Blake2s
13202 +
13203 +commit 17e1df67023a5c9ccaeb5de8bf5b88f63127ecf7 upstream.
13204 +
13205 +As suggested by Eric for the Blake2b implementation contributed by
13206 +David, introduce a set of test vectors for Blake2s covering different
13207 +digest and key sizes.
13208 +
13209 + blake2s-128 blake2s-160 blake2s-224 blake2s-256
13210 + ---------------------------------------------------
13211 +len=0 | klen=0 klen=1 klen=16 klen=32
13212 +len=1 | klen=16 klen=32 klen=0 klen=1
13213 +len=7 | klen=32 klen=0 klen=1 klen=16
13214 +len=15 | klen=1 klen=16 klen=32 klen=0
13215 +len=64 | klen=0 klen=1 klen=16 klen=32
13216 +len=247 | klen=16 klen=32 klen=0 klen=1
13217 +len=256 | klen=32 klen=0 klen=1 klen=16
13218 +
13219 +Cc: David Sterba <dsterba@××××.com>
13220 +Cc: Eric Biggers <ebiggers@××××××.com>
13221 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
13222 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
13223 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
13224 +---
13225 + crypto/testmgr.c | 24 ++++++
13226 + crypto/testmgr.h | 251 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
13227 + 2 files changed, 275 insertions(+)
13228 +
13229 +diff --git a/crypto/testmgr.c b/crypto/testmgr.c
13230 +index 7473c5bc06b1..711390861f71 100644
13231 +--- a/crypto/testmgr.c
13232 ++++ b/crypto/testmgr.c
13233 +@@ -4034,6 +4034,30 @@ static const struct alg_test_desc alg_test_descs[] = {
13234 + .alg = "authenc(hmac(sha512),rfc3686(ctr(aes)))",
13235 + .test = alg_test_null,
13236 + .fips_allowed = 1,
13237 ++ }, {
13238 ++ .alg = "blake2s-128",
13239 ++ .test = alg_test_hash,
13240 ++ .suite = {
13241 ++ .hash = __VECS(blakes2s_128_tv_template)
13242 ++ }
13243 ++ }, {
13244 ++ .alg = "blake2s-160",
13245 ++ .test = alg_test_hash,
13246 ++ .suite = {
13247 ++ .hash = __VECS(blakes2s_160_tv_template)
13248 ++ }
13249 ++ }, {
13250 ++ .alg = "blake2s-224",
13251 ++ .test = alg_test_hash,
13252 ++ .suite = {
13253 ++ .hash = __VECS(blakes2s_224_tv_template)
13254 ++ }
13255 ++ }, {
13256 ++ .alg = "blake2s-256",
13257 ++ .test = alg_test_hash,
13258 ++ .suite = {
13259 ++ .hash = __VECS(blakes2s_256_tv_template)
13260 ++ }
13261 + }, {
13262 + .alg = "cbc(aes)",
13263 + .test = alg_test_skcipher,
13264 +diff --git a/crypto/testmgr.h b/crypto/testmgr.h
13265 +index ef7d21f39d4a..102fcad54966 100644
13266 +--- a/crypto/testmgr.h
13267 ++++ b/crypto/testmgr.h
13268 +@@ -31567,4 +31567,255 @@ static const struct aead_testvec essiv_hmac_sha256_aes_cbc_tv_temp[] = {
13269 + },
13270 + };
13271 +
13272 ++static const char blake2_ordered_sequence[] =
13273 ++ "\x00\x01\x02\x03\x04\x05\x06\x07"
13274 ++ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
13275 ++ "\x10\x11\x12\x13\x14\x15\x16\x17"
13276 ++ "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
13277 ++ "\x20\x21\x22\x23\x24\x25\x26\x27"
13278 ++ "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
13279 ++ "\x30\x31\x32\x33\x34\x35\x36\x37"
13280 ++ "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
13281 ++ "\x40\x41\x42\x43\x44\x45\x46\x47"
13282 ++ "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
13283 ++ "\x50\x51\x52\x53\x54\x55\x56\x57"
13284 ++ "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
13285 ++ "\x60\x61\x62\x63\x64\x65\x66\x67"
13286 ++ "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
13287 ++ "\x70\x71\x72\x73\x74\x75\x76\x77"
13288 ++ "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
13289 ++ "\x80\x81\x82\x83\x84\x85\x86\x87"
13290 ++ "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
13291 ++ "\x90\x91\x92\x93\x94\x95\x96\x97"
13292 ++ "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
13293 ++ "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
13294 ++ "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
13295 ++ "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
13296 ++ "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
13297 ++ "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
13298 ++ "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
13299 ++ "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
13300 ++ "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
13301 ++ "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
13302 ++ "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
13303 ++ "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
13304 ++ "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff";
13305 ++
13306 ++static const struct hash_testvec blakes2s_128_tv_template[] = {{
13307 ++ .digest = (u8[]){ 0x64, 0x55, 0x0d, 0x6f, 0xfe, 0x2c, 0x0a, 0x01,
13308 ++ 0xa1, 0x4a, 0xba, 0x1e, 0xad, 0xe0, 0x20, 0x0c, },
13309 ++}, {
13310 ++ .plaintext = blake2_ordered_sequence,
13311 ++ .psize = 64,
13312 ++ .digest = (u8[]){ 0xdc, 0x66, 0xca, 0x8f, 0x03, 0x86, 0x58, 0x01,
13313 ++ 0xb0, 0xff, 0xe0, 0x6e, 0xd8, 0xa1, 0xa9, 0x0e, },
13314 ++}, {
13315 ++ .ksize = 16,
13316 ++ .key = blake2_ordered_sequence,
13317 ++ .plaintext = blake2_ordered_sequence,
13318 ++ .psize = 1,
13319 ++ .digest = (u8[]){ 0x88, 0x1e, 0x42, 0xe7, 0xbb, 0x35, 0x80, 0x82,
13320 ++ 0x63, 0x7c, 0x0a, 0x0f, 0xd7, 0xec, 0x6c, 0x2f, },
13321 ++}, {
13322 ++ .ksize = 32,
13323 ++ .key = blake2_ordered_sequence,
13324 ++ .plaintext = blake2_ordered_sequence,
13325 ++ .psize = 7,
13326 ++ .digest = (u8[]){ 0xcf, 0x9e, 0x07, 0x2a, 0xd5, 0x22, 0xf2, 0xcd,
13327 ++ 0xa2, 0xd8, 0x25, 0x21, 0x80, 0x86, 0x73, 0x1c, },
13328 ++}, {
13329 ++ .ksize = 1,
13330 ++ .key = "B",
13331 ++ .plaintext = blake2_ordered_sequence,
13332 ++ .psize = 15,
13333 ++ .digest = (u8[]){ 0xf6, 0x33, 0x5a, 0x2c, 0x22, 0xa0, 0x64, 0xb2,
13334 ++ 0xb6, 0x3f, 0xeb, 0xbc, 0xd1, 0xc3, 0xe5, 0xb2, },
13335 ++}, {
13336 ++ .ksize = 16,
13337 ++ .key = blake2_ordered_sequence,
13338 ++ .plaintext = blake2_ordered_sequence,
13339 ++ .psize = 247,
13340 ++ .digest = (u8[]){ 0x72, 0x66, 0x49, 0x60, 0xf9, 0x4a, 0xea, 0xbe,
13341 ++ 0x1f, 0xf4, 0x60, 0xce, 0xb7, 0x81, 0xcb, 0x09, },
13342 ++}, {
13343 ++ .ksize = 32,
13344 ++ .key = blake2_ordered_sequence,
13345 ++ .plaintext = blake2_ordered_sequence,
13346 ++ .psize = 256,
13347 ++ .digest = (u8[]){ 0xd5, 0xa4, 0x0e, 0xc3, 0x16, 0xc7, 0x51, 0xa6,
13348 ++ 0x3c, 0xd0, 0xd9, 0x11, 0x57, 0xfa, 0x1e, 0xbb, },
13349 ++}};
13350 ++
13351 ++static const struct hash_testvec blakes2s_160_tv_template[] = {{
13352 ++ .plaintext = blake2_ordered_sequence,
13353 ++ .psize = 7,
13354 ++ .digest = (u8[]){ 0xb4, 0xf2, 0x03, 0x49, 0x37, 0xed, 0xb1, 0x3e,
13355 ++ 0x5b, 0x2a, 0xca, 0x64, 0x82, 0x74, 0xf6, 0x62,
13356 ++ 0xe3, 0xf2, 0x84, 0xff, },
13357 ++}, {
13358 ++ .plaintext = blake2_ordered_sequence,
13359 ++ .psize = 256,
13360 ++ .digest = (u8[]){ 0xaa, 0x56, 0x9b, 0xdc, 0x98, 0x17, 0x75, 0xf2,
13361 ++ 0xb3, 0x68, 0x83, 0xb7, 0x9b, 0x8d, 0x48, 0xb1,
13362 ++ 0x9b, 0x2d, 0x35, 0x05, },
13363 ++}, {
13364 ++ .ksize = 1,
13365 ++ .key = "B",
13366 ++ .digest = (u8[]){ 0x50, 0x16, 0xe7, 0x0c, 0x01, 0xd0, 0xd3, 0xc3,
13367 ++ 0xf4, 0x3e, 0xb1, 0x6e, 0x97, 0xa9, 0x4e, 0xd1,
13368 ++ 0x79, 0x65, 0x32, 0x93, },
13369 ++}, {
13370 ++ .ksize = 32,
13371 ++ .key = blake2_ordered_sequence,
13372 ++ .plaintext = blake2_ordered_sequence,
13373 ++ .psize = 1,
13374 ++ .digest = (u8[]){ 0x1c, 0x2b, 0xcd, 0x9a, 0x68, 0xca, 0x8c, 0x71,
13375 ++ 0x90, 0x29, 0x6c, 0x54, 0xfa, 0x56, 0x4a, 0xef,
13376 ++ 0xa2, 0x3a, 0x56, 0x9c, },
13377 ++}, {
13378 ++ .ksize = 16,
13379 ++ .key = blake2_ordered_sequence,
13380 ++ .plaintext = blake2_ordered_sequence,
13381 ++ .psize = 15,
13382 ++ .digest = (u8[]){ 0x36, 0xc3, 0x5f, 0x9a, 0xdc, 0x7e, 0xbf, 0x19,
13383 ++ 0x68, 0xaa, 0xca, 0xd8, 0x81, 0xbf, 0x09, 0x34,
13384 ++ 0x83, 0x39, 0x0f, 0x30, },
13385 ++}, {
13386 ++ .ksize = 1,
13387 ++ .key = "B",
13388 ++ .plaintext = blake2_ordered_sequence,
13389 ++ .psize = 64,
13390 ++ .digest = (u8[]){ 0x86, 0x80, 0x78, 0xa4, 0x14, 0xec, 0x03, 0xe5,
13391 ++ 0xb6, 0x9a, 0x52, 0x0e, 0x42, 0xee, 0x39, 0x9d,
13392 ++ 0xac, 0xa6, 0x81, 0x63, },
13393 ++}, {
13394 ++ .ksize = 32,
13395 ++ .key = blake2_ordered_sequence,
13396 ++ .plaintext = blake2_ordered_sequence,
13397 ++ .psize = 247,
13398 ++ .digest = (u8[]){ 0x2d, 0xd8, 0xd2, 0x53, 0x66, 0xfa, 0xa9, 0x01,
13399 ++ 0x1c, 0x9c, 0xaf, 0xa3, 0xe2, 0x9d, 0x9b, 0x10,
13400 ++ 0x0a, 0xf6, 0x73, 0xe8, },
13401 ++}};
13402 ++
13403 ++static const struct hash_testvec blakes2s_224_tv_template[] = {{
13404 ++ .plaintext = blake2_ordered_sequence,
13405 ++ .psize = 1,
13406 ++ .digest = (u8[]){ 0x61, 0xb9, 0x4e, 0xc9, 0x46, 0x22, 0xa3, 0x91,
13407 ++ 0xd2, 0xae, 0x42, 0xe6, 0x45, 0x6c, 0x90, 0x12,
13408 ++ 0xd5, 0x80, 0x07, 0x97, 0xb8, 0x86, 0x5a, 0xfc,
13409 ++ 0x48, 0x21, 0x97, 0xbb, },
13410 ++}, {
13411 ++ .plaintext = blake2_ordered_sequence,
13412 ++ .psize = 247,
13413 ++ .digest = (u8[]){ 0x9e, 0xda, 0xc7, 0x20, 0x2c, 0xd8, 0x48, 0x2e,
13414 ++ 0x31, 0x94, 0xab, 0x46, 0x6d, 0x94, 0xd8, 0xb4,
13415 ++ 0x69, 0xcd, 0xae, 0x19, 0x6d, 0x9e, 0x41, 0xcc,
13416 ++ 0x2b, 0xa4, 0xd5, 0xf6, },
13417 ++}, {
13418 ++ .ksize = 16,
13419 ++ .key = blake2_ordered_sequence,
13420 ++ .digest = (u8[]){ 0x32, 0xc0, 0xac, 0xf4, 0x3b, 0xd3, 0x07, 0x9f,
13421 ++ 0xbe, 0xfb, 0xfa, 0x4d, 0x6b, 0x4e, 0x56, 0xb3,
13422 ++ 0xaa, 0xd3, 0x27, 0xf6, 0x14, 0xbf, 0xb9, 0x32,
13423 ++ 0xa7, 0x19, 0xfc, 0xb8, },
13424 ++}, {
13425 ++ .ksize = 1,
13426 ++ .key = "B",
13427 ++ .plaintext = blake2_ordered_sequence,
13428 ++ .psize = 7,
13429 ++ .digest = (u8[]){ 0x73, 0xad, 0x5e, 0x6d, 0xb9, 0x02, 0x8e, 0x76,
13430 ++ 0xf2, 0x66, 0x42, 0x4b, 0x4c, 0xfa, 0x1f, 0xe6,
13431 ++ 0x2e, 0x56, 0x40, 0xe5, 0xa2, 0xb0, 0x3c, 0xe8,
13432 ++ 0x7b, 0x45, 0xfe, 0x05, },
13433 ++}, {
13434 ++ .ksize = 32,
13435 ++ .key = blake2_ordered_sequence,
13436 ++ .plaintext = blake2_ordered_sequence,
13437 ++ .psize = 15,
13438 ++ .digest = (u8[]){ 0x16, 0x60, 0xfb, 0x92, 0x54, 0xb3, 0x6e, 0x36,
13439 ++ 0x81, 0xf4, 0x16, 0x41, 0xc3, 0x3d, 0xd3, 0x43,
13440 ++ 0x84, 0xed, 0x10, 0x6f, 0x65, 0x80, 0x7a, 0x3e,
13441 ++ 0x25, 0xab, 0xc5, 0x02, },
13442 ++}, {
13443 ++ .ksize = 16,
13444 ++ .key = blake2_ordered_sequence,
13445 ++ .plaintext = blake2_ordered_sequence,
13446 ++ .psize = 64,
13447 ++ .digest = (u8[]){ 0xca, 0xaa, 0x39, 0x67, 0x9c, 0xf7, 0x6b, 0xc7,
13448 ++ 0xb6, 0x82, 0xca, 0x0e, 0x65, 0x36, 0x5b, 0x7c,
13449 ++ 0x24, 0x00, 0xfa, 0x5f, 0xda, 0x06, 0x91, 0x93,
13450 ++ 0x6a, 0x31, 0x83, 0xb5, },
13451 ++}, {
13452 ++ .ksize = 1,
13453 ++ .key = "B",
13454 ++ .plaintext = blake2_ordered_sequence,
13455 ++ .psize = 256,
13456 ++ .digest = (u8[]){ 0x90, 0x02, 0x26, 0xb5, 0x06, 0x9c, 0x36, 0x86,
13457 ++ 0x94, 0x91, 0x90, 0x1e, 0x7d, 0x2a, 0x71, 0xb2,
13458 ++ 0x48, 0xb5, 0xe8, 0x16, 0xfd, 0x64, 0x33, 0x45,
13459 ++ 0xb3, 0xd7, 0xec, 0xcc, },
13460 ++}};
13461 ++
13462 ++static const struct hash_testvec blakes2s_256_tv_template[] = {{
13463 ++ .plaintext = blake2_ordered_sequence,
13464 ++ .psize = 15,
13465 ++ .digest = (u8[]){ 0xd9, 0x7c, 0x82, 0x8d, 0x81, 0x82, 0xa7, 0x21,
13466 ++ 0x80, 0xa0, 0x6a, 0x78, 0x26, 0x83, 0x30, 0x67,
13467 ++ 0x3f, 0x7c, 0x4e, 0x06, 0x35, 0x94, 0x7c, 0x04,
13468 ++ 0xc0, 0x23, 0x23, 0xfd, 0x45, 0xc0, 0xa5, 0x2d, },
13469 ++}, {
13470 ++ .ksize = 32,
13471 ++ .key = blake2_ordered_sequence,
13472 ++ .digest = (u8[]){ 0x48, 0xa8, 0x99, 0x7d, 0xa4, 0x07, 0x87, 0x6b,
13473 ++ 0x3d, 0x79, 0xc0, 0xd9, 0x23, 0x25, 0xad, 0x3b,
13474 ++ 0x89, 0xcb, 0xb7, 0x54, 0xd8, 0x6a, 0xb7, 0x1a,
13475 ++ 0xee, 0x04, 0x7a, 0xd3, 0x45, 0xfd, 0x2c, 0x49, },
13476 ++}, {
13477 ++ .ksize = 1,
13478 ++ .key = "B",
13479 ++ .plaintext = blake2_ordered_sequence,
13480 ++ .psize = 1,
13481 ++ .digest = (u8[]){ 0x22, 0x27, 0xae, 0xaa, 0x6e, 0x81, 0x56, 0x03,
13482 ++ 0xa7, 0xe3, 0xa1, 0x18, 0xa5, 0x9a, 0x2c, 0x18,
13483 ++ 0xf4, 0x63, 0xbc, 0x16, 0x70, 0xf1, 0xe7, 0x4b,
13484 ++ 0x00, 0x6d, 0x66, 0x16, 0xae, 0x9e, 0x74, 0x4e, },
13485 ++}, {
13486 ++ .ksize = 16,
13487 ++ .key = blake2_ordered_sequence,
13488 ++ .plaintext = blake2_ordered_sequence,
13489 ++ .psize = 7,
13490 ++ .digest = (u8[]){ 0x58, 0x5d, 0xa8, 0x60, 0x1c, 0xa4, 0xd8, 0x03,
13491 ++ 0x86, 0x86, 0x84, 0x64, 0xd7, 0xa0, 0x8e, 0x15,
13492 ++ 0x2f, 0x05, 0xa2, 0x1b, 0xbc, 0xef, 0x7a, 0x34,
13493 ++ 0xb3, 0xc5, 0xbc, 0x4b, 0xf0, 0x32, 0xeb, 0x12, },
13494 ++}, {
13495 ++ .ksize = 32,
13496 ++ .key = blake2_ordered_sequence,
13497 ++ .plaintext = blake2_ordered_sequence,
13498 ++ .psize = 64,
13499 ++ .digest = (u8[]){ 0x89, 0x75, 0xb0, 0x57, 0x7f, 0xd3, 0x55, 0x66,
13500 ++ 0xd7, 0x50, 0xb3, 0x62, 0xb0, 0x89, 0x7a, 0x26,
13501 ++ 0xc3, 0x99, 0x13, 0x6d, 0xf0, 0x7b, 0xab, 0xab,
13502 ++ 0xbd, 0xe6, 0x20, 0x3f, 0xf2, 0x95, 0x4e, 0xd4, },
13503 ++}, {
13504 ++ .ksize = 1,
13505 ++ .key = "B",
13506 ++ .plaintext = blake2_ordered_sequence,
13507 ++ .psize = 247,
13508 ++ .digest = (u8[]){ 0x2e, 0x74, 0x1c, 0x1d, 0x03, 0xf4, 0x9d, 0x84,
13509 ++ 0x6f, 0xfc, 0x86, 0x32, 0x92, 0x49, 0x7e, 0x66,
13510 ++ 0xd7, 0xc3, 0x10, 0x88, 0xfe, 0x28, 0xb3, 0xe0,
13511 ++ 0xbf, 0x50, 0x75, 0xad, 0x8e, 0xa4, 0xe6, 0xb2, },
13512 ++}, {
13513 ++ .ksize = 16,
13514 ++ .key = blake2_ordered_sequence,
13515 ++ .plaintext = blake2_ordered_sequence,
13516 ++ .psize = 256,
13517 ++ .digest = (u8[]){ 0xb9, 0xd2, 0x81, 0x0e, 0x3a, 0xb1, 0x62, 0x9b,
13518 ++ 0xad, 0x44, 0x05, 0xf4, 0x92, 0x2e, 0x99, 0xc1,
13519 ++ 0x4a, 0x47, 0xbb, 0x5b, 0x6f, 0xb2, 0x96, 0xed,
13520 ++ 0xd5, 0x06, 0xb5, 0x3a, 0x7c, 0x7a, 0x65, 0x1d, },
13521 ++}};
13522 ++
13523 + #endif /* _CRYPTO_TESTMGR_H */
13524 +--
13525 +cgit v1.2.3-4-ga26e
13526 +
13527 +
13528 +From 6df25388ce7b87cc82381b9114f4eb3e39df6ff3 Mon Sep 17 00:00:00 2001
13529 +From: Ard Biesheuvel <ardb@××××××.org>
13530 +Date: Fri, 8 Nov 2019 13:22:30 +0100
13531 +Subject: crypto: blake2s - implement generic shash driver
13532 +
13533 +commit 7f9b0880925f1f9d7d59504ea0892d2ae9cfc233 upstream.
13534 +
13535 +Wire up our newly added Blake2s implementation via the shash API.
13536 +
13537 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
13538 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
13539 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
13540 +---
13541 + crypto/Kconfig | 18 ++++
13542 + crypto/Makefile | 1 +
13543 + crypto/blake2s_generic.c | 171 ++++++++++++++++++++++++++++++++++++++
13544 + include/crypto/internal/blake2s.h | 5 ++
13545 + 4 files changed, 195 insertions(+)
13546 + create mode 100644 crypto/blake2s_generic.c
13547 +
13548 +diff --git a/crypto/Kconfig b/crypto/Kconfig
13549 +index 0be5b4092f18..81c8a4059afc 100644
13550 +--- a/crypto/Kconfig
13551 ++++ b/crypto/Kconfig
13552 +@@ -639,6 +639,24 @@ config CRYPTO_XXHASH
13553 + xxHash non-cryptographic hash algorithm. Extremely fast, working at
13554 + speeds close to RAM limits.
13555 +
13556 ++config CRYPTO_BLAKE2S
13557 ++ tristate "BLAKE2s digest algorithm"
13558 ++ select CRYPTO_LIB_BLAKE2S_GENERIC
13559 ++ select CRYPTO_HASH
13560 ++ help
13561 ++ Implementation of cryptographic hash function BLAKE2s
13562 ++ optimized for 8-32bit platforms and can produce digests of any size
13563 ++ between 1 to 32. The keyed hash is also implemented.
13564 ++
13565 ++ This module provides the following algorithms:
13566 ++
13567 ++ - blake2s-128
13568 ++ - blake2s-160
13569 ++ - blake2s-224
13570 ++ - blake2s-256
13571 ++
13572 ++ See https://blake2.net for further information.
13573 ++
13574 + config CRYPTO_CRCT10DIF
13575 + tristate "CRCT10DIF algorithm"
13576 + select CRYPTO_HASH
13577 +diff --git a/crypto/Makefile b/crypto/Makefile
13578 +index aa740c8492b9..fd27edea7c8e 100644
13579 +--- a/crypto/Makefile
13580 ++++ b/crypto/Makefile
13581 +@@ -74,6 +74,7 @@ obj-$(CONFIG_CRYPTO_STREEBOG) += streebog_generic.o
13582 + obj-$(CONFIG_CRYPTO_WP512) += wp512.o
13583 + CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
13584 + obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o
13585 ++obj-$(CONFIG_CRYPTO_BLAKE2S) += blake2s_generic.o
13586 + obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o
13587 + obj-$(CONFIG_CRYPTO_ECB) += ecb.o
13588 + obj-$(CONFIG_CRYPTO_CBC) += cbc.o
13589 +diff --git a/crypto/blake2s_generic.c b/crypto/blake2s_generic.c
13590 +new file mode 100644
13591 +index 000000000000..ed0c74640470
13592 +--- /dev/null
13593 ++++ b/crypto/blake2s_generic.c
13594 +@@ -0,0 +1,171 @@
13595 ++// SPDX-License-Identifier: GPL-2.0 OR MIT
13596 ++/*
13597 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
13598 ++ */
13599 ++
13600 ++#include <crypto/internal/blake2s.h>
13601 ++#include <crypto/internal/simd.h>
13602 ++#include <crypto/internal/hash.h>
13603 ++
13604 ++#include <linux/types.h>
13605 ++#include <linux/jump_label.h>
13606 ++#include <linux/kernel.h>
13607 ++#include <linux/module.h>
13608 ++
13609 ++static int crypto_blake2s_setkey(struct crypto_shash *tfm, const u8 *key,
13610 ++ unsigned int keylen)
13611 ++{
13612 ++ struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm);
13613 ++
13614 ++ if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE) {
13615 ++ crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
13616 ++ return -EINVAL;
13617 ++ }
13618 ++
13619 ++ memcpy(tctx->key, key, keylen);
13620 ++ tctx->keylen = keylen;
13621 ++
13622 ++ return 0;
13623 ++}
13624 ++
13625 ++static int crypto_blake2s_init(struct shash_desc *desc)
13626 ++{
13627 ++ struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
13628 ++ struct blake2s_state *state = shash_desc_ctx(desc);
13629 ++ const int outlen = crypto_shash_digestsize(desc->tfm);
13630 ++
13631 ++ if (tctx->keylen)
13632 ++ blake2s_init_key(state, outlen, tctx->key, tctx->keylen);
13633 ++ else
13634 ++ blake2s_init(state, outlen);
13635 ++
13636 ++ return 0;
13637 ++}
13638 ++
13639 ++static int crypto_blake2s_update(struct shash_desc *desc, const u8 *in,
13640 ++ unsigned int inlen)
13641 ++{
13642 ++ struct blake2s_state *state = shash_desc_ctx(desc);
13643 ++ const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
13644 ++
13645 ++ if (unlikely(!inlen))
13646 ++ return 0;
13647 ++ if (inlen > fill) {
13648 ++ memcpy(state->buf + state->buflen, in, fill);
13649 ++ blake2s_compress_generic(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
13650 ++ state->buflen = 0;
13651 ++ in += fill;
13652 ++ inlen -= fill;
13653 ++ }
13654 ++ if (inlen > BLAKE2S_BLOCK_SIZE) {
13655 ++ const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
13656 ++ /* Hash one less (full) block than strictly possible */
13657 ++ blake2s_compress_generic(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
13658 ++ in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
13659 ++ inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
13660 ++ }
13661 ++ memcpy(state->buf + state->buflen, in, inlen);
13662 ++ state->buflen += inlen;
13663 ++
13664 ++ return 0;
13665 ++}
13666 ++
13667 ++static int crypto_blake2s_final(struct shash_desc *desc, u8 *out)
13668 ++{
13669 ++ struct blake2s_state *state = shash_desc_ctx(desc);
13670 ++
13671 ++ blake2s_set_lastblock(state);
13672 ++ memset(state->buf + state->buflen, 0,
13673 ++ BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
13674 ++ blake2s_compress_generic(state, state->buf, 1, state->buflen);
13675 ++ cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
13676 ++ memcpy(out, state->h, state->outlen);
13677 ++ memzero_explicit(state, sizeof(*state));
13678 ++
13679 ++ return 0;
13680 ++}
13681 ++
13682 ++static struct shash_alg blake2s_algs[] = {{
13683 ++ .base.cra_name = "blake2s-128",
13684 ++ .base.cra_driver_name = "blake2s-128-generic",
13685 ++ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
13686 ++ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
13687 ++ .base.cra_priority = 200,
13688 ++ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
13689 ++ .base.cra_module = THIS_MODULE,
13690 ++
13691 ++ .digestsize = BLAKE2S_128_HASH_SIZE,
13692 ++ .setkey = crypto_blake2s_setkey,
13693 ++ .init = crypto_blake2s_init,
13694 ++ .update = crypto_blake2s_update,
13695 ++ .final = crypto_blake2s_final,
13696 ++ .descsize = sizeof(struct blake2s_state),
13697 ++}, {
13698 ++ .base.cra_name = "blake2s-160",
13699 ++ .base.cra_driver_name = "blake2s-160-generic",
13700 ++ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
13701 ++ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
13702 ++ .base.cra_priority = 200,
13703 ++ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
13704 ++ .base.cra_module = THIS_MODULE,
13705 ++
13706 ++ .digestsize = BLAKE2S_160_HASH_SIZE,
13707 ++ .setkey = crypto_blake2s_setkey,
13708 ++ .init = crypto_blake2s_init,
13709 ++ .update = crypto_blake2s_update,
13710 ++ .final = crypto_blake2s_final,
13711 ++ .descsize = sizeof(struct blake2s_state),
13712 ++}, {
13713 ++ .base.cra_name = "blake2s-224",
13714 ++ .base.cra_driver_name = "blake2s-224-generic",
13715 ++ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
13716 ++ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
13717 ++ .base.cra_priority = 200,
13718 ++ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
13719 ++ .base.cra_module = THIS_MODULE,
13720 ++
13721 ++ .digestsize = BLAKE2S_224_HASH_SIZE,
13722 ++ .setkey = crypto_blake2s_setkey,
13723 ++ .init = crypto_blake2s_init,
13724 ++ .update = crypto_blake2s_update,
13725 ++ .final = crypto_blake2s_final,
13726 ++ .descsize = sizeof(struct blake2s_state),
13727 ++}, {
13728 ++ .base.cra_name = "blake2s-256",
13729 ++ .base.cra_driver_name = "blake2s-256-generic",
13730 ++ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
13731 ++ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
13732 ++ .base.cra_priority = 200,
13733 ++ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
13734 ++ .base.cra_module = THIS_MODULE,
13735 ++
13736 ++ .digestsize = BLAKE2S_256_HASH_SIZE,
13737 ++ .setkey = crypto_blake2s_setkey,
13738 ++ .init = crypto_blake2s_init,
13739 ++ .update = crypto_blake2s_update,
13740 ++ .final = crypto_blake2s_final,
13741 ++ .descsize = sizeof(struct blake2s_state),
13742 ++}};
13743 ++
13744 ++static int __init blake2s_mod_init(void)
13745 ++{
13746 ++ return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
13747 ++}
13748 ++
13749 ++static void __exit blake2s_mod_exit(void)
13750 ++{
13751 ++ crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
13752 ++}
13753 ++
13754 ++subsys_initcall(blake2s_mod_init);
13755 ++module_exit(blake2s_mod_exit);
13756 ++
13757 ++MODULE_ALIAS_CRYPTO("blake2s-128");
13758 ++MODULE_ALIAS_CRYPTO("blake2s-128-generic");
13759 ++MODULE_ALIAS_CRYPTO("blake2s-160");
13760 ++MODULE_ALIAS_CRYPTO("blake2s-160-generic");
13761 ++MODULE_ALIAS_CRYPTO("blake2s-224");
13762 ++MODULE_ALIAS_CRYPTO("blake2s-224-generic");
13763 ++MODULE_ALIAS_CRYPTO("blake2s-256");
13764 ++MODULE_ALIAS_CRYPTO("blake2s-256-generic");
13765 ++MODULE_LICENSE("GPL v2");
13766 +diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h
13767 +index 941693effc7d..74ff77032e52 100644
13768 +--- a/include/crypto/internal/blake2s.h
13769 ++++ b/include/crypto/internal/blake2s.h
13770 +@@ -5,6 +5,11 @@
13771 +
13772 + #include <crypto/blake2s.h>
13773 +
13774 ++struct blake2s_tfm_ctx {
13775 ++ u8 key[BLAKE2S_KEY_SIZE];
13776 ++ unsigned int keylen;
13777 ++};
13778 ++
13779 + void blake2s_compress_generic(struct blake2s_state *state,const u8 *block,
13780 + size_t nblocks, const u32 inc);
13781 +
13782 +--
13783 +cgit v1.2.3-4-ga26e
13784 +
13785 +
13786 +From c0b10abc6a1cadcee5cfafc04ebcc34d3b02cdab Mon Sep 17 00:00:00 2001
13787 +From: "Jason A. Donenfeld" <Jason@×××××.com>
13788 +Date: Fri, 8 Nov 2019 13:22:31 +0100
13789 +Subject: crypto: blake2s - x86_64 SIMD implementation
13790 +
13791 +commit ed0356eda153f6a95649e11feb7b07083caf9e20 upstream.
13792 +
13793 +These implementations from Samuel Neves support AVX and AVX-512VL.
13794 +Originally this used AVX-512F, but Skylake thermal throttling made
13795 +AVX-512VL more attractive and possible to do with negligable difference.
13796 +
13797 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
13798 +Signed-off-by: Samuel Neves <sneves@××××××.pt>
13799 +Co-developed-by: Samuel Neves <sneves@××××××.pt>
13800 +[ardb: move to arch/x86/crypto, wire into lib/crypto framework]
13801 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
13802 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
13803 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
13804 +---
13805 + arch/x86/crypto/Makefile | 2 +
13806 + arch/x86/crypto/blake2s-core.S | 258 +++++++++++++++++++++++++++++++++++++++++
13807 + arch/x86/crypto/blake2s-glue.c | 233 +++++++++++++++++++++++++++++++++++++
13808 + crypto/Kconfig | 6 +
13809 + 4 files changed, 499 insertions(+)
13810 + create mode 100644 arch/x86/crypto/blake2s-core.S
13811 + create mode 100644 arch/x86/crypto/blake2s-glue.c
13812 +
13813 +diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
13814 +index 759b1a927826..922c8ecfa00f 100644
13815 +--- a/arch/x86/crypto/Makefile
13816 ++++ b/arch/x86/crypto/Makefile
13817 +@@ -48,6 +48,7 @@ ifeq ($(avx_supported),yes)
13818 + obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o
13819 + obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o
13820 + obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o
13821 ++ obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += blake2s-x86_64.o
13822 + endif
13823 +
13824 + # These modules require assembler to support AVX2.
13825 +@@ -70,6 +71,7 @@ serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
13826 + aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o
13827 +
13828 + nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o
13829 ++blake2s-x86_64-y := blake2s-core.o blake2s-glue.o
13830 +
13831 + ifeq ($(avx_supported),yes)
13832 + camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
13833 +diff --git a/arch/x86/crypto/blake2s-core.S b/arch/x86/crypto/blake2s-core.S
13834 +new file mode 100644
13835 +index 000000000000..8591938eee26
13836 +--- /dev/null
13837 ++++ b/arch/x86/crypto/blake2s-core.S
13838 +@@ -0,0 +1,258 @@
13839 ++/* SPDX-License-Identifier: GPL-2.0 OR MIT */
13840 ++/*
13841 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
13842 ++ * Copyright (C) 2017-2019 Samuel Neves <sneves@××××××.pt>. All Rights Reserved.
13843 ++ */
13844 ++
13845 ++#include <linux/linkage.h>
13846 ++
13847 ++.section .rodata.cst32.BLAKE2S_IV, "aM", @progbits, 32
13848 ++.align 32
13849 ++IV: .octa 0xA54FF53A3C6EF372BB67AE856A09E667
13850 ++ .octa 0x5BE0CD191F83D9AB9B05688C510E527F
13851 ++.section .rodata.cst16.ROT16, "aM", @progbits, 16
13852 ++.align 16
13853 ++ROT16: .octa 0x0D0C0F0E09080B0A0504070601000302
13854 ++.section .rodata.cst16.ROR328, "aM", @progbits, 16
13855 ++.align 16
13856 ++ROR328: .octa 0x0C0F0E0D080B0A090407060500030201
13857 ++.section .rodata.cst64.BLAKE2S_SIGMA, "aM", @progbits, 160
13858 ++.align 64
13859 ++SIGMA:
13860 ++.byte 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13
13861 ++.byte 14, 4, 9, 13, 10, 8, 15, 6, 5, 1, 0, 11, 3, 12, 2, 7
13862 ++.byte 11, 12, 5, 15, 8, 0, 2, 13, 9, 10, 3, 7, 4, 14, 6, 1
13863 ++.byte 7, 3, 13, 11, 9, 1, 12, 14, 15, 2, 5, 4, 8, 6, 10, 0
13864 ++.byte 9, 5, 2, 10, 0, 7, 4, 15, 3, 14, 11, 6, 13, 1, 12, 8
13865 ++.byte 2, 6, 0, 8, 12, 10, 11, 3, 1, 4, 7, 15, 9, 13, 5, 14
13866 ++.byte 12, 1, 14, 4, 5, 15, 13, 10, 8, 0, 6, 9, 11, 7, 3, 2
13867 ++.byte 13, 7, 12, 3, 11, 14, 1, 9, 2, 5, 15, 8, 10, 0, 4, 6
13868 ++.byte 6, 14, 11, 0, 15, 9, 3, 8, 10, 12, 13, 1, 5, 2, 7, 4
13869 ++.byte 10, 8, 7, 1, 2, 4, 6, 5, 13, 15, 9, 3, 0, 11, 14, 12
13870 ++#ifdef CONFIG_AS_AVX512
13871 ++.section .rodata.cst64.BLAKE2S_SIGMA2, "aM", @progbits, 640
13872 ++.align 64
13873 ++SIGMA2:
13874 ++.long 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13
13875 ++.long 8, 2, 13, 15, 10, 9, 12, 3, 6, 4, 0, 14, 5, 11, 1, 7
13876 ++.long 11, 13, 8, 6, 5, 10, 14, 3, 2, 4, 12, 15, 1, 0, 7, 9
13877 ++.long 11, 10, 7, 0, 8, 15, 1, 13, 3, 6, 2, 12, 4, 14, 9, 5
13878 ++.long 4, 10, 9, 14, 15, 0, 11, 8, 1, 7, 3, 13, 2, 5, 6, 12
13879 ++.long 2, 11, 4, 15, 14, 3, 10, 8, 13, 6, 5, 7, 0, 12, 1, 9
13880 ++.long 4, 8, 15, 9, 14, 11, 13, 5, 3, 2, 1, 12, 6, 10, 7, 0
13881 ++.long 6, 13, 0, 14, 12, 2, 1, 11, 15, 4, 5, 8, 7, 9, 3, 10
13882 ++.long 15, 5, 4, 13, 10, 7, 3, 11, 12, 2, 0, 6, 9, 8, 1, 14
13883 ++.long 8, 7, 14, 11, 13, 15, 0, 12, 10, 4, 5, 6, 3, 2, 1, 9
13884 ++#endif /* CONFIG_AS_AVX512 */
13885 ++
13886 ++.text
13887 ++#ifdef CONFIG_AS_SSSE3
13888 ++ENTRY(blake2s_compress_ssse3)
13889 ++ testq %rdx,%rdx
13890 ++ je .Lendofloop
13891 ++ movdqu (%rdi),%xmm0
13892 ++ movdqu 0x10(%rdi),%xmm1
13893 ++ movdqa ROT16(%rip),%xmm12
13894 ++ movdqa ROR328(%rip),%xmm13
13895 ++ movdqu 0x20(%rdi),%xmm14
13896 ++ movq %rcx,%xmm15
13897 ++ leaq SIGMA+0xa0(%rip),%r8
13898 ++ jmp .Lbeginofloop
13899 ++ .align 32
13900 ++.Lbeginofloop:
13901 ++ movdqa %xmm0,%xmm10
13902 ++ movdqa %xmm1,%xmm11
13903 ++ paddq %xmm15,%xmm14
13904 ++ movdqa IV(%rip),%xmm2
13905 ++ movdqa %xmm14,%xmm3
13906 ++ pxor IV+0x10(%rip),%xmm3
13907 ++ leaq SIGMA(%rip),%rcx
13908 ++.Lroundloop:
13909 ++ movzbl (%rcx),%eax
13910 ++ movd (%rsi,%rax,4),%xmm4
13911 ++ movzbl 0x1(%rcx),%eax
13912 ++ movd (%rsi,%rax,4),%xmm5
13913 ++ movzbl 0x2(%rcx),%eax
13914 ++ movd (%rsi,%rax,4),%xmm6
13915 ++ movzbl 0x3(%rcx),%eax
13916 ++ movd (%rsi,%rax,4),%xmm7
13917 ++ punpckldq %xmm5,%xmm4
13918 ++ punpckldq %xmm7,%xmm6
13919 ++ punpcklqdq %xmm6,%xmm4
13920 ++ paddd %xmm4,%xmm0
13921 ++ paddd %xmm1,%xmm0
13922 ++ pxor %xmm0,%xmm3
13923 ++ pshufb %xmm12,%xmm3
13924 ++ paddd %xmm3,%xmm2
13925 ++ pxor %xmm2,%xmm1
13926 ++ movdqa %xmm1,%xmm8
13927 ++ psrld $0xc,%xmm1
13928 ++ pslld $0x14,%xmm8
13929 ++ por %xmm8,%xmm1
13930 ++ movzbl 0x4(%rcx),%eax
13931 ++ movd (%rsi,%rax,4),%xmm5
13932 ++ movzbl 0x5(%rcx),%eax
13933 ++ movd (%rsi,%rax,4),%xmm6
13934 ++ movzbl 0x6(%rcx),%eax
13935 ++ movd (%rsi,%rax,4),%xmm7
13936 ++ movzbl 0x7(%rcx),%eax
13937 ++ movd (%rsi,%rax,4),%xmm4
13938 ++ punpckldq %xmm6,%xmm5
13939 ++ punpckldq %xmm4,%xmm7
13940 ++ punpcklqdq %xmm7,%xmm5
13941 ++ paddd %xmm5,%xmm0
13942 ++ paddd %xmm1,%xmm0
13943 ++ pxor %xmm0,%xmm3
13944 ++ pshufb %xmm13,%xmm3
13945 ++ paddd %xmm3,%xmm2
13946 ++ pxor %xmm2,%xmm1
13947 ++ movdqa %xmm1,%xmm8
13948 ++ psrld $0x7,%xmm1
13949 ++ pslld $0x19,%xmm8
13950 ++ por %xmm8,%xmm1
13951 ++ pshufd $0x93,%xmm0,%xmm0
13952 ++ pshufd $0x4e,%xmm3,%xmm3
13953 ++ pshufd $0x39,%xmm2,%xmm2
13954 ++ movzbl 0x8(%rcx),%eax
13955 ++ movd (%rsi,%rax,4),%xmm6
13956 ++ movzbl 0x9(%rcx),%eax
13957 ++ movd (%rsi,%rax,4),%xmm7
13958 ++ movzbl 0xa(%rcx),%eax
13959 ++ movd (%rsi,%rax,4),%xmm4
13960 ++ movzbl 0xb(%rcx),%eax
13961 ++ movd (%rsi,%rax,4),%xmm5
13962 ++ punpckldq %xmm7,%xmm6
13963 ++ punpckldq %xmm5,%xmm4
13964 ++ punpcklqdq %xmm4,%xmm6
13965 ++ paddd %xmm6,%xmm0
13966 ++ paddd %xmm1,%xmm0
13967 ++ pxor %xmm0,%xmm3
13968 ++ pshufb %xmm12,%xmm3
13969 ++ paddd %xmm3,%xmm2
13970 ++ pxor %xmm2,%xmm1
13971 ++ movdqa %xmm1,%xmm8
13972 ++ psrld $0xc,%xmm1
13973 ++ pslld $0x14,%xmm8
13974 ++ por %xmm8,%xmm1
13975 ++ movzbl 0xc(%rcx),%eax
13976 ++ movd (%rsi,%rax,4),%xmm7
13977 ++ movzbl 0xd(%rcx),%eax
13978 ++ movd (%rsi,%rax,4),%xmm4
13979 ++ movzbl 0xe(%rcx),%eax
13980 ++ movd (%rsi,%rax,4),%xmm5
13981 ++ movzbl 0xf(%rcx),%eax
13982 ++ movd (%rsi,%rax,4),%xmm6
13983 ++ punpckldq %xmm4,%xmm7
13984 ++ punpckldq %xmm6,%xmm5
13985 ++ punpcklqdq %xmm5,%xmm7
13986 ++ paddd %xmm7,%xmm0
13987 ++ paddd %xmm1,%xmm0
13988 ++ pxor %xmm0,%xmm3
13989 ++ pshufb %xmm13,%xmm3
13990 ++ paddd %xmm3,%xmm2
13991 ++ pxor %xmm2,%xmm1
13992 ++ movdqa %xmm1,%xmm8
13993 ++ psrld $0x7,%xmm1
13994 ++ pslld $0x19,%xmm8
13995 ++ por %xmm8,%xmm1
13996 ++ pshufd $0x39,%xmm0,%xmm0
13997 ++ pshufd $0x4e,%xmm3,%xmm3
13998 ++ pshufd $0x93,%xmm2,%xmm2
13999 ++ addq $0x10,%rcx
14000 ++ cmpq %r8,%rcx
14001 ++ jnz .Lroundloop
14002 ++ pxor %xmm2,%xmm0
14003 ++ pxor %xmm3,%xmm1
14004 ++ pxor %xmm10,%xmm0
14005 ++ pxor %xmm11,%xmm1
14006 ++ addq $0x40,%rsi
14007 ++ decq %rdx
14008 ++ jnz .Lbeginofloop
14009 ++ movdqu %xmm0,(%rdi)
14010 ++ movdqu %xmm1,0x10(%rdi)
14011 ++ movdqu %xmm14,0x20(%rdi)
14012 ++.Lendofloop:
14013 ++ ret
14014 ++ENDPROC(blake2s_compress_ssse3)
14015 ++#endif /* CONFIG_AS_SSSE3 */
14016 ++
14017 ++#ifdef CONFIG_AS_AVX512
14018 ++ENTRY(blake2s_compress_avx512)
14019 ++ vmovdqu (%rdi),%xmm0
14020 ++ vmovdqu 0x10(%rdi),%xmm1
14021 ++ vmovdqu 0x20(%rdi),%xmm4
14022 ++ vmovq %rcx,%xmm5
14023 ++ vmovdqa IV(%rip),%xmm14
14024 ++ vmovdqa IV+16(%rip),%xmm15
14025 ++ jmp .Lblake2s_compress_avx512_mainloop
14026 ++.align 32
14027 ++.Lblake2s_compress_avx512_mainloop:
14028 ++ vmovdqa %xmm0,%xmm10
14029 ++ vmovdqa %xmm1,%xmm11
14030 ++ vpaddq %xmm5,%xmm4,%xmm4
14031 ++ vmovdqa %xmm14,%xmm2
14032 ++ vpxor %xmm15,%xmm4,%xmm3
14033 ++ vmovdqu (%rsi),%ymm6
14034 ++ vmovdqu 0x20(%rsi),%ymm7
14035 ++ addq $0x40,%rsi
14036 ++ leaq SIGMA2(%rip),%rax
14037 ++ movb $0xa,%cl
14038 ++.Lblake2s_compress_avx512_roundloop:
14039 ++ addq $0x40,%rax
14040 ++ vmovdqa -0x40(%rax),%ymm8
14041 ++ vmovdqa -0x20(%rax),%ymm9
14042 ++ vpermi2d %ymm7,%ymm6,%ymm8
14043 ++ vpermi2d %ymm7,%ymm6,%ymm9
14044 ++ vmovdqa %ymm8,%ymm6
14045 ++ vmovdqa %ymm9,%ymm7
14046 ++ vpaddd %xmm8,%xmm0,%xmm0
14047 ++ vpaddd %xmm1,%xmm0,%xmm0
14048 ++ vpxor %xmm0,%xmm3,%xmm3
14049 ++ vprord $0x10,%xmm3,%xmm3
14050 ++ vpaddd %xmm3,%xmm2,%xmm2
14051 ++ vpxor %xmm2,%xmm1,%xmm1
14052 ++ vprord $0xc,%xmm1,%xmm1
14053 ++ vextracti128 $0x1,%ymm8,%xmm8
14054 ++ vpaddd %xmm8,%xmm0,%xmm0
14055 ++ vpaddd %xmm1,%xmm0,%xmm0
14056 ++ vpxor %xmm0,%xmm3,%xmm3
14057 ++ vprord $0x8,%xmm3,%xmm3
14058 ++ vpaddd %xmm3,%xmm2,%xmm2
14059 ++ vpxor %xmm2,%xmm1,%xmm1
14060 ++ vprord $0x7,%xmm1,%xmm1
14061 ++ vpshufd $0x93,%xmm0,%xmm0
14062 ++ vpshufd $0x4e,%xmm3,%xmm3
14063 ++ vpshufd $0x39,%xmm2,%xmm2
14064 ++ vpaddd %xmm9,%xmm0,%xmm0
14065 ++ vpaddd %xmm1,%xmm0,%xmm0
14066 ++ vpxor %xmm0,%xmm3,%xmm3
14067 ++ vprord $0x10,%xmm3,%xmm3
14068 ++ vpaddd %xmm3,%xmm2,%xmm2
14069 ++ vpxor %xmm2,%xmm1,%xmm1
14070 ++ vprord $0xc,%xmm1,%xmm1
14071 ++ vextracti128 $0x1,%ymm9,%xmm9
14072 ++ vpaddd %xmm9,%xmm0,%xmm0
14073 ++ vpaddd %xmm1,%xmm0,%xmm0
14074 ++ vpxor %xmm0,%xmm3,%xmm3
14075 ++ vprord $0x8,%xmm3,%xmm3
14076 ++ vpaddd %xmm3,%xmm2,%xmm2
14077 ++ vpxor %xmm2,%xmm1,%xmm1
14078 ++ vprord $0x7,%xmm1,%xmm1
14079 ++ vpshufd $0x39,%xmm0,%xmm0
14080 ++ vpshufd $0x4e,%xmm3,%xmm3
14081 ++ vpshufd $0x93,%xmm2,%xmm2
14082 ++ decb %cl
14083 ++ jne .Lblake2s_compress_avx512_roundloop
14084 ++ vpxor %xmm10,%xmm0,%xmm0
14085 ++ vpxor %xmm11,%xmm1,%xmm1
14086 ++ vpxor %xmm2,%xmm0,%xmm0
14087 ++ vpxor %xmm3,%xmm1,%xmm1
14088 ++ decq %rdx
14089 ++ jne .Lblake2s_compress_avx512_mainloop
14090 ++ vmovdqu %xmm0,(%rdi)
14091 ++ vmovdqu %xmm1,0x10(%rdi)
14092 ++ vmovdqu %xmm4,0x20(%rdi)
14093 ++ vzeroupper
14094 ++ retq
14095 ++ENDPROC(blake2s_compress_avx512)
14096 ++#endif /* CONFIG_AS_AVX512 */
14097 +diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c
14098 +new file mode 100644
14099 +index 000000000000..4a37ba7cdbe5
14100 +--- /dev/null
14101 ++++ b/arch/x86/crypto/blake2s-glue.c
14102 +@@ -0,0 +1,233 @@
14103 ++// SPDX-License-Identifier: GPL-2.0 OR MIT
14104 ++/*
14105 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
14106 ++ */
14107 ++
14108 ++#include <crypto/internal/blake2s.h>
14109 ++#include <crypto/internal/simd.h>
14110 ++#include <crypto/internal/hash.h>
14111 ++
14112 ++#include <linux/types.h>
14113 ++#include <linux/jump_label.h>
14114 ++#include <linux/kernel.h>
14115 ++#include <linux/module.h>
14116 ++
14117 ++#include <asm/cpufeature.h>
14118 ++#include <asm/fpu/api.h>
14119 ++#include <asm/processor.h>
14120 ++#include <asm/simd.h>
14121 ++
14122 ++asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state,
14123 ++ const u8 *block, const size_t nblocks,
14124 ++ const u32 inc);
14125 ++asmlinkage void blake2s_compress_avx512(struct blake2s_state *state,
14126 ++ const u8 *block, const size_t nblocks,
14127 ++ const u32 inc);
14128 ++
14129 ++static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3);
14130 ++static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512);
14131 ++
14132 ++void blake2s_compress_arch(struct blake2s_state *state,
14133 ++ const u8 *block, size_t nblocks,
14134 ++ const u32 inc)
14135 ++{
14136 ++ /* SIMD disables preemption, so relax after processing each page. */
14137 ++ BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8);
14138 ++
14139 ++ if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) {
14140 ++ blake2s_compress_generic(state, block, nblocks, inc);
14141 ++ return;
14142 ++ }
14143 ++
14144 ++ for (;;) {
14145 ++ const size_t blocks = min_t(size_t, nblocks,
14146 ++ PAGE_SIZE / BLAKE2S_BLOCK_SIZE);
14147 ++
14148 ++ kernel_fpu_begin();
14149 ++ if (IS_ENABLED(CONFIG_AS_AVX512) &&
14150 ++ static_branch_likely(&blake2s_use_avx512))
14151 ++ blake2s_compress_avx512(state, block, blocks, inc);
14152 ++ else
14153 ++ blake2s_compress_ssse3(state, block, blocks, inc);
14154 ++ kernel_fpu_end();
14155 ++
14156 ++ nblocks -= blocks;
14157 ++ if (!nblocks)
14158 ++ break;
14159 ++ block += blocks * BLAKE2S_BLOCK_SIZE;
14160 ++ }
14161 ++}
14162 ++EXPORT_SYMBOL(blake2s_compress_arch);
14163 ++
14164 ++static int crypto_blake2s_setkey(struct crypto_shash *tfm, const u8 *key,
14165 ++ unsigned int keylen)
14166 ++{
14167 ++ struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm);
14168 ++
14169 ++ if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE) {
14170 ++ crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
14171 ++ return -EINVAL;
14172 ++ }
14173 ++
14174 ++ memcpy(tctx->key, key, keylen);
14175 ++ tctx->keylen = keylen;
14176 ++
14177 ++ return 0;
14178 ++}
14179 ++
14180 ++static int crypto_blake2s_init(struct shash_desc *desc)
14181 ++{
14182 ++ struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
14183 ++ struct blake2s_state *state = shash_desc_ctx(desc);
14184 ++ const int outlen = crypto_shash_digestsize(desc->tfm);
14185 ++
14186 ++ if (tctx->keylen)
14187 ++ blake2s_init_key(state, outlen, tctx->key, tctx->keylen);
14188 ++ else
14189 ++ blake2s_init(state, outlen);
14190 ++
14191 ++ return 0;
14192 ++}
14193 ++
14194 ++static int crypto_blake2s_update(struct shash_desc *desc, const u8 *in,
14195 ++ unsigned int inlen)
14196 ++{
14197 ++ struct blake2s_state *state = shash_desc_ctx(desc);
14198 ++ const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
14199 ++
14200 ++ if (unlikely(!inlen))
14201 ++ return 0;
14202 ++ if (inlen > fill) {
14203 ++ memcpy(state->buf + state->buflen, in, fill);
14204 ++ blake2s_compress_arch(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
14205 ++ state->buflen = 0;
14206 ++ in += fill;
14207 ++ inlen -= fill;
14208 ++ }
14209 ++ if (inlen > BLAKE2S_BLOCK_SIZE) {
14210 ++ const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
14211 ++ /* Hash one less (full) block than strictly possible */
14212 ++ blake2s_compress_arch(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
14213 ++ in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
14214 ++ inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
14215 ++ }
14216 ++ memcpy(state->buf + state->buflen, in, inlen);
14217 ++ state->buflen += inlen;
14218 ++
14219 ++ return 0;
14220 ++}
14221 ++
14222 ++static int crypto_blake2s_final(struct shash_desc *desc, u8 *out)
14223 ++{
14224 ++ struct blake2s_state *state = shash_desc_ctx(desc);
14225 ++
14226 ++ blake2s_set_lastblock(state);
14227 ++ memset(state->buf + state->buflen, 0,
14228 ++ BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
14229 ++ blake2s_compress_arch(state, state->buf, 1, state->buflen);
14230 ++ cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
14231 ++ memcpy(out, state->h, state->outlen);
14232 ++ memzero_explicit(state, sizeof(*state));
14233 ++
14234 ++ return 0;
14235 ++}
14236 ++
14237 ++static struct shash_alg blake2s_algs[] = {{
14238 ++ .base.cra_name = "blake2s-128",
14239 ++ .base.cra_driver_name = "blake2s-128-x86",
14240 ++ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
14241 ++ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
14242 ++ .base.cra_priority = 200,
14243 ++ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
14244 ++ .base.cra_module = THIS_MODULE,
14245 ++
14246 ++ .digestsize = BLAKE2S_128_HASH_SIZE,
14247 ++ .setkey = crypto_blake2s_setkey,
14248 ++ .init = crypto_blake2s_init,
14249 ++ .update = crypto_blake2s_update,
14250 ++ .final = crypto_blake2s_final,
14251 ++ .descsize = sizeof(struct blake2s_state),
14252 ++}, {
14253 ++ .base.cra_name = "blake2s-160",
14254 ++ .base.cra_driver_name = "blake2s-160-x86",
14255 ++ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
14256 ++ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
14257 ++ .base.cra_priority = 200,
14258 ++ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
14259 ++ .base.cra_module = THIS_MODULE,
14260 ++
14261 ++ .digestsize = BLAKE2S_160_HASH_SIZE,
14262 ++ .setkey = crypto_blake2s_setkey,
14263 ++ .init = crypto_blake2s_init,
14264 ++ .update = crypto_blake2s_update,
14265 ++ .final = crypto_blake2s_final,
14266 ++ .descsize = sizeof(struct blake2s_state),
14267 ++}, {
14268 ++ .base.cra_name = "blake2s-224",
14269 ++ .base.cra_driver_name = "blake2s-224-x86",
14270 ++ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
14271 ++ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
14272 ++ .base.cra_priority = 200,
14273 ++ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
14274 ++ .base.cra_module = THIS_MODULE,
14275 ++
14276 ++ .digestsize = BLAKE2S_224_HASH_SIZE,
14277 ++ .setkey = crypto_blake2s_setkey,
14278 ++ .init = crypto_blake2s_init,
14279 ++ .update = crypto_blake2s_update,
14280 ++ .final = crypto_blake2s_final,
14281 ++ .descsize = sizeof(struct blake2s_state),
14282 ++}, {
14283 ++ .base.cra_name = "blake2s-256",
14284 ++ .base.cra_driver_name = "blake2s-256-x86",
14285 ++ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
14286 ++ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
14287 ++ .base.cra_priority = 200,
14288 ++ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
14289 ++ .base.cra_module = THIS_MODULE,
14290 ++
14291 ++ .digestsize = BLAKE2S_256_HASH_SIZE,
14292 ++ .setkey = crypto_blake2s_setkey,
14293 ++ .init = crypto_blake2s_init,
14294 ++ .update = crypto_blake2s_update,
14295 ++ .final = crypto_blake2s_final,
14296 ++ .descsize = sizeof(struct blake2s_state),
14297 ++}};
14298 ++
14299 ++static int __init blake2s_mod_init(void)
14300 ++{
14301 ++ if (!boot_cpu_has(X86_FEATURE_SSSE3))
14302 ++ return 0;
14303 ++
14304 ++ static_branch_enable(&blake2s_use_ssse3);
14305 ++
14306 ++ if (IS_ENABLED(CONFIG_AS_AVX512) &&
14307 ++ boot_cpu_has(X86_FEATURE_AVX) &&
14308 ++ boot_cpu_has(X86_FEATURE_AVX2) &&
14309 ++ boot_cpu_has(X86_FEATURE_AVX512F) &&
14310 ++ boot_cpu_has(X86_FEATURE_AVX512VL) &&
14311 ++ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
14312 ++ XFEATURE_MASK_AVX512, NULL))
14313 ++ static_branch_enable(&blake2s_use_avx512);
14314 ++
14315 ++ return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
14316 ++}
14317 ++
14318 ++static void __exit blake2s_mod_exit(void)
14319 ++{
14320 ++ if (boot_cpu_has(X86_FEATURE_SSSE3))
14321 ++ crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
14322 ++}
14323 ++
14324 ++module_init(blake2s_mod_init);
14325 ++module_exit(blake2s_mod_exit);
14326 ++
14327 ++MODULE_ALIAS_CRYPTO("blake2s-128");
14328 ++MODULE_ALIAS_CRYPTO("blake2s-128-x86");
14329 ++MODULE_ALIAS_CRYPTO("blake2s-160");
14330 ++MODULE_ALIAS_CRYPTO("blake2s-160-x86");
14331 ++MODULE_ALIAS_CRYPTO("blake2s-224");
14332 ++MODULE_ALIAS_CRYPTO("blake2s-224-x86");
14333 ++MODULE_ALIAS_CRYPTO("blake2s-256");
14334 ++MODULE_ALIAS_CRYPTO("blake2s-256-x86");
14335 ++MODULE_LICENSE("GPL v2");
14336 +diff --git a/crypto/Kconfig b/crypto/Kconfig
14337 +index 81c8a4059afc..8fd3954bf64c 100644
14338 +--- a/crypto/Kconfig
14339 ++++ b/crypto/Kconfig
14340 +@@ -657,6 +657,12 @@ config CRYPTO_BLAKE2S
14341 +
14342 + See https://blake2.net for further information.
14343 +
14344 ++config CRYPTO_BLAKE2S_X86
14345 ++ tristate "BLAKE2s digest algorithm (x86 accelerated version)"
14346 ++ depends on X86 && 64BIT
14347 ++ select CRYPTO_LIB_BLAKE2S_GENERIC
14348 ++ select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
14349 ++
14350 + config CRYPTO_CRCT10DIF
14351 + tristate "CRCT10DIF algorithm"
14352 + select CRYPTO_HASH
14353 +--
14354 +cgit v1.2.3-4-ga26e
14355 +
14356 +
14357 +From 2356d44a2973966510d5a0cf2fbccf057ccc77e8 Mon Sep 17 00:00:00 2001
14358 +From: "Jason A. Donenfeld" <Jason@×××××.com>
14359 +Date: Fri, 8 Nov 2019 13:22:32 +0100
14360 +Subject: crypto: curve25519 - generic C library implementations
14361 +
14362 +commit 0ed42a6f431e930b2e8fae21955406e09fe75d70 upstream.
14363 +
14364 +This contains two formally verified C implementations of the Curve25519
14365 +scalar multiplication function, one for 32-bit systems, and one for
14366 +64-bit systems whose compiler supports efficient 128-bit integer types.
14367 +Not only are these implementations formally verified, but they are also
14368 +the fastest available C implementations. They have been modified to be
14369 +friendly to kernel space and to be generally less horrendous looking,
14370 +but still an effort has been made to retain their formally verified
14371 +characteristic, and so the C might look slightly unidiomatic.
14372 +
14373 +The 64-bit version comes from HACL*: https://github.com/project-everest/hacl-star
14374 +The 32-bit version comes from Fiat: https://github.com/mit-plv/fiat-crypto
14375 +
14376 +Information: https://cr.yp.to/ecdh.html
14377 +
14378 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
14379 +[ardb: - move from lib/zinc to lib/crypto
14380 + - replace .c #includes with Kconfig based object selection
14381 + - drop simd handling and simplify support for per-arch versions ]
14382 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
14383 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
14384 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
14385 +---
14386 + include/crypto/curve25519.h | 71 ++++
14387 + lib/crypto/Kconfig | 25 ++
14388 + lib/crypto/Makefile | 5 +
14389 + lib/crypto/curve25519-fiat32.c | 864 +++++++++++++++++++++++++++++++++++++++++
14390 + lib/crypto/curve25519-hacl64.c | 788 +++++++++++++++++++++++++++++++++++++
14391 + lib/crypto/curve25519.c | 25 ++
14392 + 6 files changed, 1778 insertions(+)
14393 + create mode 100644 include/crypto/curve25519.h
14394 + create mode 100644 lib/crypto/curve25519-fiat32.c
14395 + create mode 100644 lib/crypto/curve25519-hacl64.c
14396 + create mode 100644 lib/crypto/curve25519.c
14397 +
14398 +diff --git a/include/crypto/curve25519.h b/include/crypto/curve25519.h
14399 +new file mode 100644
14400 +index 000000000000..4e6dc840b159
14401 +--- /dev/null
14402 ++++ b/include/crypto/curve25519.h
14403 +@@ -0,0 +1,71 @@
14404 ++/* SPDX-License-Identifier: GPL-2.0 OR MIT */
14405 ++/*
14406 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
14407 ++ */
14408 ++
14409 ++#ifndef CURVE25519_H
14410 ++#define CURVE25519_H
14411 ++
14412 ++#include <crypto/algapi.h> // For crypto_memneq.
14413 ++#include <linux/types.h>
14414 ++#include <linux/random.h>
14415 ++
14416 ++enum curve25519_lengths {
14417 ++ CURVE25519_KEY_SIZE = 32
14418 ++};
14419 ++
14420 ++extern const u8 curve25519_null_point[];
14421 ++extern const u8 curve25519_base_point[];
14422 ++
14423 ++void curve25519_generic(u8 out[CURVE25519_KEY_SIZE],
14424 ++ const u8 scalar[CURVE25519_KEY_SIZE],
14425 ++ const u8 point[CURVE25519_KEY_SIZE]);
14426 ++
14427 ++void curve25519_arch(u8 out[CURVE25519_KEY_SIZE],
14428 ++ const u8 scalar[CURVE25519_KEY_SIZE],
14429 ++ const u8 point[CURVE25519_KEY_SIZE]);
14430 ++
14431 ++void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
14432 ++ const u8 secret[CURVE25519_KEY_SIZE]);
14433 ++
14434 ++static inline
14435 ++bool __must_check curve25519(u8 mypublic[CURVE25519_KEY_SIZE],
14436 ++ const u8 secret[CURVE25519_KEY_SIZE],
14437 ++ const u8 basepoint[CURVE25519_KEY_SIZE])
14438 ++{
14439 ++ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519))
14440 ++ curve25519_arch(mypublic, secret, basepoint);
14441 ++ else
14442 ++ curve25519_generic(mypublic, secret, basepoint);
14443 ++ return crypto_memneq(mypublic, curve25519_null_point,
14444 ++ CURVE25519_KEY_SIZE);
14445 ++}
14446 ++
14447 ++static inline bool
14448 ++__must_check curve25519_generate_public(u8 pub[CURVE25519_KEY_SIZE],
14449 ++ const u8 secret[CURVE25519_KEY_SIZE])
14450 ++{
14451 ++ if (unlikely(!crypto_memneq(secret, curve25519_null_point,
14452 ++ CURVE25519_KEY_SIZE)))
14453 ++ return false;
14454 ++
14455 ++ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519))
14456 ++ curve25519_base_arch(pub, secret);
14457 ++ else
14458 ++ curve25519_generic(pub, secret, curve25519_base_point);
14459 ++ return crypto_memneq(pub, curve25519_null_point, CURVE25519_KEY_SIZE);
14460 ++}
14461 ++
14462 ++static inline void curve25519_clamp_secret(u8 secret[CURVE25519_KEY_SIZE])
14463 ++{
14464 ++ secret[0] &= 248;
14465 ++ secret[31] = (secret[31] & 127) | 64;
14466 ++}
14467 ++
14468 ++static inline void curve25519_generate_secret(u8 secret[CURVE25519_KEY_SIZE])
14469 ++{
14470 ++ get_random_bytes_wait(secret, CURVE25519_KEY_SIZE);
14471 ++ curve25519_clamp_secret(secret);
14472 ++}
14473 ++
14474 ++#endif /* CURVE25519_H */
14475 +diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
14476 +index 7ad98b624e55..b1d830dc1c9e 100644
14477 +--- a/lib/crypto/Kconfig
14478 ++++ b/lib/crypto/Kconfig
14479 +@@ -59,6 +59,31 @@ config CRYPTO_LIB_CHACHA
14480 + by either the generic implementation or an arch-specific one, if one
14481 + is available and enabled.
14482 +
14483 ++config CRYPTO_ARCH_HAVE_LIB_CURVE25519
14484 ++ tristate
14485 ++ help
14486 ++ Declares whether the architecture provides an arch-specific
14487 ++ accelerated implementation of the Curve25519 library interface,
14488 ++ either builtin or as a module.
14489 ++
14490 ++config CRYPTO_LIB_CURVE25519_GENERIC
14491 ++ tristate
14492 ++ help
14493 ++ This symbol can be depended upon by arch implementations of the
14494 ++ Curve25519 library interface that require the generic code as a
14495 ++ fallback, e.g., for SIMD implementations. If no arch specific
14496 ++ implementation is enabled, this implementation serves the users
14497 ++ of CRYPTO_LIB_CURVE25519.
14498 ++
14499 ++config CRYPTO_LIB_CURVE25519
14500 ++ tristate "Curve25519 scalar multiplication library"
14501 ++ depends on CRYPTO_ARCH_HAVE_LIB_CURVE25519 || !CRYPTO_ARCH_HAVE_LIB_CURVE25519
14502 ++ select CRYPTO_LIB_CURVE25519_GENERIC if CRYPTO_ARCH_HAVE_LIB_CURVE25519=n
14503 ++ help
14504 ++ Enable the Curve25519 library interface. This interface may be
14505 ++ fulfilled by either the generic implementation or an arch-specific
14506 ++ one, if one is available and enabled.
14507 ++
14508 + config CRYPTO_LIB_DES
14509 + tristate
14510 +
14511 +diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
14512 +index 8ca66b5f9807..273c55d5e147 100644
14513 +--- a/lib/crypto/Makefile
14514 ++++ b/lib/crypto/Makefile
14515 +@@ -16,6 +16,11 @@ libblake2s-generic-y += blake2s-generic.o
14516 + obj-$(CONFIG_CRYPTO_LIB_BLAKE2S) += libblake2s.o
14517 + libblake2s-y += blake2s.o
14518 +
14519 ++obj-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += libcurve25519.o
14520 ++libcurve25519-y := curve25519-fiat32.o
14521 ++libcurve25519-$(CONFIG_ARCH_SUPPORTS_INT128) := curve25519-hacl64.o
14522 ++libcurve25519-y += curve25519.o
14523 ++
14524 + obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o
14525 + libdes-y := des.o
14526 +
14527 +diff --git a/lib/crypto/curve25519-fiat32.c b/lib/crypto/curve25519-fiat32.c
14528 +new file mode 100644
14529 +index 000000000000..1c455207341d
14530 +--- /dev/null
14531 ++++ b/lib/crypto/curve25519-fiat32.c
14532 +@@ -0,0 +1,864 @@
14533 ++// SPDX-License-Identifier: GPL-2.0 OR MIT
14534 ++/*
14535 ++ * Copyright (C) 2015-2016 The fiat-crypto Authors.
14536 ++ * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
14537 ++ *
14538 ++ * This is a machine-generated formally verified implementation of Curve25519
14539 ++ * ECDH from: <https://github.com/mit-plv/fiat-crypto>. Though originally
14540 ++ * machine generated, it has been tweaked to be suitable for use in the kernel.
14541 ++ * It is optimized for 32-bit machines and machines that cannot work efficiently
14542 ++ * with 128-bit integer types.
14543 ++ */
14544 ++
14545 ++#include <asm/unaligned.h>
14546 ++#include <crypto/curve25519.h>
14547 ++#include <linux/string.h>
14548 ++
14549 ++/* fe means field element. Here the field is \Z/(2^255-19). An element t,
14550 ++ * entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
14551 ++ * t[3]+2^102 t[4]+...+2^230 t[9].
14552 ++ * fe limbs are bounded by 1.125*2^26,1.125*2^25,1.125*2^26,1.125*2^25,etc.
14553 ++ * Multiplication and carrying produce fe from fe_loose.
14554 ++ */
14555 ++typedef struct fe { u32 v[10]; } fe;
14556 ++
14557 ++/* fe_loose limbs are bounded by 3.375*2^26,3.375*2^25,3.375*2^26,3.375*2^25,etc
14558 ++ * Addition and subtraction produce fe_loose from (fe, fe).
14559 ++ */
14560 ++typedef struct fe_loose { u32 v[10]; } fe_loose;
14561 ++
14562 ++static __always_inline void fe_frombytes_impl(u32 h[10], const u8 *s)
14563 ++{
14564 ++ /* Ignores top bit of s. */
14565 ++ u32 a0 = get_unaligned_le32(s);
14566 ++ u32 a1 = get_unaligned_le32(s+4);
14567 ++ u32 a2 = get_unaligned_le32(s+8);
14568 ++ u32 a3 = get_unaligned_le32(s+12);
14569 ++ u32 a4 = get_unaligned_le32(s+16);
14570 ++ u32 a5 = get_unaligned_le32(s+20);
14571 ++ u32 a6 = get_unaligned_le32(s+24);
14572 ++ u32 a7 = get_unaligned_le32(s+28);
14573 ++ h[0] = a0&((1<<26)-1); /* 26 used, 32-26 left. 26 */
14574 ++ h[1] = (a0>>26) | ((a1&((1<<19)-1))<< 6); /* (32-26) + 19 = 6+19 = 25 */
14575 ++ h[2] = (a1>>19) | ((a2&((1<<13)-1))<<13); /* (32-19) + 13 = 13+13 = 26 */
14576 ++ h[3] = (a2>>13) | ((a3&((1<< 6)-1))<<19); /* (32-13) + 6 = 19+ 6 = 25 */
14577 ++ h[4] = (a3>> 6); /* (32- 6) = 26 */
14578 ++ h[5] = a4&((1<<25)-1); /* 25 */
14579 ++ h[6] = (a4>>25) | ((a5&((1<<19)-1))<< 7); /* (32-25) + 19 = 7+19 = 26 */
14580 ++ h[7] = (a5>>19) | ((a6&((1<<12)-1))<<13); /* (32-19) + 12 = 13+12 = 25 */
14581 ++ h[8] = (a6>>12) | ((a7&((1<< 6)-1))<<20); /* (32-12) + 6 = 20+ 6 = 26 */
14582 ++ h[9] = (a7>> 6)&((1<<25)-1); /* 25 */
14583 ++}
14584 ++
14585 ++static __always_inline void fe_frombytes(fe *h, const u8 *s)
14586 ++{
14587 ++ fe_frombytes_impl(h->v, s);
14588 ++}
14589 ++
14590 ++static __always_inline u8 /*bool*/
14591 ++addcarryx_u25(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
14592 ++{
14593 ++ /* This function extracts 25 bits of result and 1 bit of carry
14594 ++ * (26 total), so a 32-bit intermediate is sufficient.
14595 ++ */
14596 ++ u32 x = a + b + c;
14597 ++ *low = x & ((1 << 25) - 1);
14598 ++ return (x >> 25) & 1;
14599 ++}
14600 ++
14601 ++static __always_inline u8 /*bool*/
14602 ++addcarryx_u26(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
14603 ++{
14604 ++ /* This function extracts 26 bits of result and 1 bit of carry
14605 ++ * (27 total), so a 32-bit intermediate is sufficient.
14606 ++ */
14607 ++ u32 x = a + b + c;
14608 ++ *low = x & ((1 << 26) - 1);
14609 ++ return (x >> 26) & 1;
14610 ++}
14611 ++
14612 ++static __always_inline u8 /*bool*/
14613 ++subborrow_u25(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
14614 ++{
14615 ++ /* This function extracts 25 bits of result and 1 bit of borrow
14616 ++ * (26 total), so a 32-bit intermediate is sufficient.
14617 ++ */
14618 ++ u32 x = a - b - c;
14619 ++ *low = x & ((1 << 25) - 1);
14620 ++ return x >> 31;
14621 ++}
14622 ++
14623 ++static __always_inline u8 /*bool*/
14624 ++subborrow_u26(u8 /*bool*/ c, u32 a, u32 b, u32 *low)
14625 ++{
14626 ++ /* This function extracts 26 bits of result and 1 bit of borrow
14627 ++ *(27 total), so a 32-bit intermediate is sufficient.
14628 ++ */
14629 ++ u32 x = a - b - c;
14630 ++ *low = x & ((1 << 26) - 1);
14631 ++ return x >> 31;
14632 ++}
14633 ++
14634 ++static __always_inline u32 cmovznz32(u32 t, u32 z, u32 nz)
14635 ++{
14636 ++ t = -!!t; /* all set if nonzero, 0 if 0 */
14637 ++ return (t&nz) | ((~t)&z);
14638 ++}
14639 ++
14640 ++static __always_inline void fe_freeze(u32 out[10], const u32 in1[10])
14641 ++{
14642 ++ { const u32 x17 = in1[9];
14643 ++ { const u32 x18 = in1[8];
14644 ++ { const u32 x16 = in1[7];
14645 ++ { const u32 x14 = in1[6];
14646 ++ { const u32 x12 = in1[5];
14647 ++ { const u32 x10 = in1[4];
14648 ++ { const u32 x8 = in1[3];
14649 ++ { const u32 x6 = in1[2];
14650 ++ { const u32 x4 = in1[1];
14651 ++ { const u32 x2 = in1[0];
14652 ++ { u32 x20; u8/*bool*/ x21 = subborrow_u26(0x0, x2, 0x3ffffed, &x20);
14653 ++ { u32 x23; u8/*bool*/ x24 = subborrow_u25(x21, x4, 0x1ffffff, &x23);
14654 ++ { u32 x26; u8/*bool*/ x27 = subborrow_u26(x24, x6, 0x3ffffff, &x26);
14655 ++ { u32 x29; u8/*bool*/ x30 = subborrow_u25(x27, x8, 0x1ffffff, &x29);
14656 ++ { u32 x32; u8/*bool*/ x33 = subborrow_u26(x30, x10, 0x3ffffff, &x32);
14657 ++ { u32 x35; u8/*bool*/ x36 = subborrow_u25(x33, x12, 0x1ffffff, &x35);
14658 ++ { u32 x38; u8/*bool*/ x39 = subborrow_u26(x36, x14, 0x3ffffff, &x38);
14659 ++ { u32 x41; u8/*bool*/ x42 = subborrow_u25(x39, x16, 0x1ffffff, &x41);
14660 ++ { u32 x44; u8/*bool*/ x45 = subborrow_u26(x42, x18, 0x3ffffff, &x44);
14661 ++ { u32 x47; u8/*bool*/ x48 = subborrow_u25(x45, x17, 0x1ffffff, &x47);
14662 ++ { u32 x49 = cmovznz32(x48, 0x0, 0xffffffff);
14663 ++ { u32 x50 = (x49 & 0x3ffffed);
14664 ++ { u32 x52; u8/*bool*/ x53 = addcarryx_u26(0x0, x20, x50, &x52);
14665 ++ { u32 x54 = (x49 & 0x1ffffff);
14666 ++ { u32 x56; u8/*bool*/ x57 = addcarryx_u25(x53, x23, x54, &x56);
14667 ++ { u32 x58 = (x49 & 0x3ffffff);
14668 ++ { u32 x60; u8/*bool*/ x61 = addcarryx_u26(x57, x26, x58, &x60);
14669 ++ { u32 x62 = (x49 & 0x1ffffff);
14670 ++ { u32 x64; u8/*bool*/ x65 = addcarryx_u25(x61, x29, x62, &x64);
14671 ++ { u32 x66 = (x49 & 0x3ffffff);
14672 ++ { u32 x68; u8/*bool*/ x69 = addcarryx_u26(x65, x32, x66, &x68);
14673 ++ { u32 x70 = (x49 & 0x1ffffff);
14674 ++ { u32 x72; u8/*bool*/ x73 = addcarryx_u25(x69, x35, x70, &x72);
14675 ++ { u32 x74 = (x49 & 0x3ffffff);
14676 ++ { u32 x76; u8/*bool*/ x77 = addcarryx_u26(x73, x38, x74, &x76);
14677 ++ { u32 x78 = (x49 & 0x1ffffff);
14678 ++ { u32 x80; u8/*bool*/ x81 = addcarryx_u25(x77, x41, x78, &x80);
14679 ++ { u32 x82 = (x49 & 0x3ffffff);
14680 ++ { u32 x84; u8/*bool*/ x85 = addcarryx_u26(x81, x44, x82, &x84);
14681 ++ { u32 x86 = (x49 & 0x1ffffff);
14682 ++ { u32 x88; addcarryx_u25(x85, x47, x86, &x88);
14683 ++ out[0] = x52;
14684 ++ out[1] = x56;
14685 ++ out[2] = x60;
14686 ++ out[3] = x64;
14687 ++ out[4] = x68;
14688 ++ out[5] = x72;
14689 ++ out[6] = x76;
14690 ++ out[7] = x80;
14691 ++ out[8] = x84;
14692 ++ out[9] = x88;
14693 ++ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
14694 ++}
14695 ++
14696 ++static __always_inline void fe_tobytes(u8 s[32], const fe *f)
14697 ++{
14698 ++ u32 h[10];
14699 ++ fe_freeze(h, f->v);
14700 ++ s[0] = h[0] >> 0;
14701 ++ s[1] = h[0] >> 8;
14702 ++ s[2] = h[0] >> 16;
14703 ++ s[3] = (h[0] >> 24) | (h[1] << 2);
14704 ++ s[4] = h[1] >> 6;
14705 ++ s[5] = h[1] >> 14;
14706 ++ s[6] = (h[1] >> 22) | (h[2] << 3);
14707 ++ s[7] = h[2] >> 5;
14708 ++ s[8] = h[2] >> 13;
14709 ++ s[9] = (h[2] >> 21) | (h[3] << 5);
14710 ++ s[10] = h[3] >> 3;
14711 ++ s[11] = h[3] >> 11;
14712 ++ s[12] = (h[3] >> 19) | (h[4] << 6);
14713 ++ s[13] = h[4] >> 2;
14714 ++ s[14] = h[4] >> 10;
14715 ++ s[15] = h[4] >> 18;
14716 ++ s[16] = h[5] >> 0;
14717 ++ s[17] = h[5] >> 8;
14718 ++ s[18] = h[5] >> 16;
14719 ++ s[19] = (h[5] >> 24) | (h[6] << 1);
14720 ++ s[20] = h[6] >> 7;
14721 ++ s[21] = h[6] >> 15;
14722 ++ s[22] = (h[6] >> 23) | (h[7] << 3);
14723 ++ s[23] = h[7] >> 5;
14724 ++ s[24] = h[7] >> 13;
14725 ++ s[25] = (h[7] >> 21) | (h[8] << 4);
14726 ++ s[26] = h[8] >> 4;
14727 ++ s[27] = h[8] >> 12;
14728 ++ s[28] = (h[8] >> 20) | (h[9] << 6);
14729 ++ s[29] = h[9] >> 2;
14730 ++ s[30] = h[9] >> 10;
14731 ++ s[31] = h[9] >> 18;
14732 ++}
14733 ++
14734 ++/* h = f */
14735 ++static __always_inline void fe_copy(fe *h, const fe *f)
14736 ++{
14737 ++ memmove(h, f, sizeof(u32) * 10);
14738 ++}
14739 ++
14740 ++static __always_inline void fe_copy_lt(fe_loose *h, const fe *f)
14741 ++{
14742 ++ memmove(h, f, sizeof(u32) * 10);
14743 ++}
14744 ++
14745 ++/* h = 0 */
14746 ++static __always_inline void fe_0(fe *h)
14747 ++{
14748 ++ memset(h, 0, sizeof(u32) * 10);
14749 ++}
14750 ++
14751 ++/* h = 1 */
14752 ++static __always_inline void fe_1(fe *h)
14753 ++{
14754 ++ memset(h, 0, sizeof(u32) * 10);
14755 ++ h->v[0] = 1;
14756 ++}
14757 ++
14758 ++static void fe_add_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
14759 ++{
14760 ++ { const u32 x20 = in1[9];
14761 ++ { const u32 x21 = in1[8];
14762 ++ { const u32 x19 = in1[7];
14763 ++ { const u32 x17 = in1[6];
14764 ++ { const u32 x15 = in1[5];
14765 ++ { const u32 x13 = in1[4];
14766 ++ { const u32 x11 = in1[3];
14767 ++ { const u32 x9 = in1[2];
14768 ++ { const u32 x7 = in1[1];
14769 ++ { const u32 x5 = in1[0];
14770 ++ { const u32 x38 = in2[9];
14771 ++ { const u32 x39 = in2[8];
14772 ++ { const u32 x37 = in2[7];
14773 ++ { const u32 x35 = in2[6];
14774 ++ { const u32 x33 = in2[5];
14775 ++ { const u32 x31 = in2[4];
14776 ++ { const u32 x29 = in2[3];
14777 ++ { const u32 x27 = in2[2];
14778 ++ { const u32 x25 = in2[1];
14779 ++ { const u32 x23 = in2[0];
14780 ++ out[0] = (x5 + x23);
14781 ++ out[1] = (x7 + x25);
14782 ++ out[2] = (x9 + x27);
14783 ++ out[3] = (x11 + x29);
14784 ++ out[4] = (x13 + x31);
14785 ++ out[5] = (x15 + x33);
14786 ++ out[6] = (x17 + x35);
14787 ++ out[7] = (x19 + x37);
14788 ++ out[8] = (x21 + x39);
14789 ++ out[9] = (x20 + x38);
14790 ++ }}}}}}}}}}}}}}}}}}}}
14791 ++}
14792 ++
14793 ++/* h = f + g
14794 ++ * Can overlap h with f or g.
14795 ++ */
14796 ++static __always_inline void fe_add(fe_loose *h, const fe *f, const fe *g)
14797 ++{
14798 ++ fe_add_impl(h->v, f->v, g->v);
14799 ++}
14800 ++
14801 ++static void fe_sub_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
14802 ++{
14803 ++ { const u32 x20 = in1[9];
14804 ++ { const u32 x21 = in1[8];
14805 ++ { const u32 x19 = in1[7];
14806 ++ { const u32 x17 = in1[6];
14807 ++ { const u32 x15 = in1[5];
14808 ++ { const u32 x13 = in1[4];
14809 ++ { const u32 x11 = in1[3];
14810 ++ { const u32 x9 = in1[2];
14811 ++ { const u32 x7 = in1[1];
14812 ++ { const u32 x5 = in1[0];
14813 ++ { const u32 x38 = in2[9];
14814 ++ { const u32 x39 = in2[8];
14815 ++ { const u32 x37 = in2[7];
14816 ++ { const u32 x35 = in2[6];
14817 ++ { const u32 x33 = in2[5];
14818 ++ { const u32 x31 = in2[4];
14819 ++ { const u32 x29 = in2[3];
14820 ++ { const u32 x27 = in2[2];
14821 ++ { const u32 x25 = in2[1];
14822 ++ { const u32 x23 = in2[0];
14823 ++ out[0] = ((0x7ffffda + x5) - x23);
14824 ++ out[1] = ((0x3fffffe + x7) - x25);
14825 ++ out[2] = ((0x7fffffe + x9) - x27);
14826 ++ out[3] = ((0x3fffffe + x11) - x29);
14827 ++ out[4] = ((0x7fffffe + x13) - x31);
14828 ++ out[5] = ((0x3fffffe + x15) - x33);
14829 ++ out[6] = ((0x7fffffe + x17) - x35);
14830 ++ out[7] = ((0x3fffffe + x19) - x37);
14831 ++ out[8] = ((0x7fffffe + x21) - x39);
14832 ++ out[9] = ((0x3fffffe + x20) - x38);
14833 ++ }}}}}}}}}}}}}}}}}}}}
14834 ++}
14835 ++
14836 ++/* h = f - g
14837 ++ * Can overlap h with f or g.
14838 ++ */
14839 ++static __always_inline void fe_sub(fe_loose *h, const fe *f, const fe *g)
14840 ++{
14841 ++ fe_sub_impl(h->v, f->v, g->v);
14842 ++}
14843 ++
14844 ++static void fe_mul_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
14845 ++{
14846 ++ { const u32 x20 = in1[9];
14847 ++ { const u32 x21 = in1[8];
14848 ++ { const u32 x19 = in1[7];
14849 ++ { const u32 x17 = in1[6];
14850 ++ { const u32 x15 = in1[5];
14851 ++ { const u32 x13 = in1[4];
14852 ++ { const u32 x11 = in1[3];
14853 ++ { const u32 x9 = in1[2];
14854 ++ { const u32 x7 = in1[1];
14855 ++ { const u32 x5 = in1[0];
14856 ++ { const u32 x38 = in2[9];
14857 ++ { const u32 x39 = in2[8];
14858 ++ { const u32 x37 = in2[7];
14859 ++ { const u32 x35 = in2[6];
14860 ++ { const u32 x33 = in2[5];
14861 ++ { const u32 x31 = in2[4];
14862 ++ { const u32 x29 = in2[3];
14863 ++ { const u32 x27 = in2[2];
14864 ++ { const u32 x25 = in2[1];
14865 ++ { const u32 x23 = in2[0];
14866 ++ { u64 x40 = ((u64)x23 * x5);
14867 ++ { u64 x41 = (((u64)x23 * x7) + ((u64)x25 * x5));
14868 ++ { u64 x42 = ((((u64)(0x2 * x25) * x7) + ((u64)x23 * x9)) + ((u64)x27 * x5));
14869 ++ { u64 x43 = (((((u64)x25 * x9) + ((u64)x27 * x7)) + ((u64)x23 * x11)) + ((u64)x29 * x5));
14870 ++ { u64 x44 = (((((u64)x27 * x9) + (0x2 * (((u64)x25 * x11) + ((u64)x29 * x7)))) + ((u64)x23 * x13)) + ((u64)x31 * x5));
14871 ++ { u64 x45 = (((((((u64)x27 * x11) + ((u64)x29 * x9)) + ((u64)x25 * x13)) + ((u64)x31 * x7)) + ((u64)x23 * x15)) + ((u64)x33 * x5));
14872 ++ { u64 x46 = (((((0x2 * ((((u64)x29 * x11) + ((u64)x25 * x15)) + ((u64)x33 * x7))) + ((u64)x27 * x13)) + ((u64)x31 * x9)) + ((u64)x23 * x17)) + ((u64)x35 * x5));
14873 ++ { u64 x47 = (((((((((u64)x29 * x13) + ((u64)x31 * x11)) + ((u64)x27 * x15)) + ((u64)x33 * x9)) + ((u64)x25 * x17)) + ((u64)x35 * x7)) + ((u64)x23 * x19)) + ((u64)x37 * x5));
14874 ++ { u64 x48 = (((((((u64)x31 * x13) + (0x2 * (((((u64)x29 * x15) + ((u64)x33 * x11)) + ((u64)x25 * x19)) + ((u64)x37 * x7)))) + ((u64)x27 * x17)) + ((u64)x35 * x9)) + ((u64)x23 * x21)) + ((u64)x39 * x5));
14875 ++ { u64 x49 = (((((((((((u64)x31 * x15) + ((u64)x33 * x13)) + ((u64)x29 * x17)) + ((u64)x35 * x11)) + ((u64)x27 * x19)) + ((u64)x37 * x9)) + ((u64)x25 * x21)) + ((u64)x39 * x7)) + ((u64)x23 * x20)) + ((u64)x38 * x5));
14876 ++ { u64 x50 = (((((0x2 * ((((((u64)x33 * x15) + ((u64)x29 * x19)) + ((u64)x37 * x11)) + ((u64)x25 * x20)) + ((u64)x38 * x7))) + ((u64)x31 * x17)) + ((u64)x35 * x13)) + ((u64)x27 * x21)) + ((u64)x39 * x9));
14877 ++ { u64 x51 = (((((((((u64)x33 * x17) + ((u64)x35 * x15)) + ((u64)x31 * x19)) + ((u64)x37 * x13)) + ((u64)x29 * x21)) + ((u64)x39 * x11)) + ((u64)x27 * x20)) + ((u64)x38 * x9));
14878 ++ { u64 x52 = (((((u64)x35 * x17) + (0x2 * (((((u64)x33 * x19) + ((u64)x37 * x15)) + ((u64)x29 * x20)) + ((u64)x38 * x11)))) + ((u64)x31 * x21)) + ((u64)x39 * x13));
14879 ++ { u64 x53 = (((((((u64)x35 * x19) + ((u64)x37 * x17)) + ((u64)x33 * x21)) + ((u64)x39 * x15)) + ((u64)x31 * x20)) + ((u64)x38 * x13));
14880 ++ { u64 x54 = (((0x2 * ((((u64)x37 * x19) + ((u64)x33 * x20)) + ((u64)x38 * x15))) + ((u64)x35 * x21)) + ((u64)x39 * x17));
14881 ++ { u64 x55 = (((((u64)x37 * x21) + ((u64)x39 * x19)) + ((u64)x35 * x20)) + ((u64)x38 * x17));
14882 ++ { u64 x56 = (((u64)x39 * x21) + (0x2 * (((u64)x37 * x20) + ((u64)x38 * x19))));
14883 ++ { u64 x57 = (((u64)x39 * x20) + ((u64)x38 * x21));
14884 ++ { u64 x58 = ((u64)(0x2 * x38) * x20);
14885 ++ { u64 x59 = (x48 + (x58 << 0x4));
14886 ++ { u64 x60 = (x59 + (x58 << 0x1));
14887 ++ { u64 x61 = (x60 + x58);
14888 ++ { u64 x62 = (x47 + (x57 << 0x4));
14889 ++ { u64 x63 = (x62 + (x57 << 0x1));
14890 ++ { u64 x64 = (x63 + x57);
14891 ++ { u64 x65 = (x46 + (x56 << 0x4));
14892 ++ { u64 x66 = (x65 + (x56 << 0x1));
14893 ++ { u64 x67 = (x66 + x56);
14894 ++ { u64 x68 = (x45 + (x55 << 0x4));
14895 ++ { u64 x69 = (x68 + (x55 << 0x1));
14896 ++ { u64 x70 = (x69 + x55);
14897 ++ { u64 x71 = (x44 + (x54 << 0x4));
14898 ++ { u64 x72 = (x71 + (x54 << 0x1));
14899 ++ { u64 x73 = (x72 + x54);
14900 ++ { u64 x74 = (x43 + (x53 << 0x4));
14901 ++ { u64 x75 = (x74 + (x53 << 0x1));
14902 ++ { u64 x76 = (x75 + x53);
14903 ++ { u64 x77 = (x42 + (x52 << 0x4));
14904 ++ { u64 x78 = (x77 + (x52 << 0x1));
14905 ++ { u64 x79 = (x78 + x52);
14906 ++ { u64 x80 = (x41 + (x51 << 0x4));
14907 ++ { u64 x81 = (x80 + (x51 << 0x1));
14908 ++ { u64 x82 = (x81 + x51);
14909 ++ { u64 x83 = (x40 + (x50 << 0x4));
14910 ++ { u64 x84 = (x83 + (x50 << 0x1));
14911 ++ { u64 x85 = (x84 + x50);
14912 ++ { u64 x86 = (x85 >> 0x1a);
14913 ++ { u32 x87 = ((u32)x85 & 0x3ffffff);
14914 ++ { u64 x88 = (x86 + x82);
14915 ++ { u64 x89 = (x88 >> 0x19);
14916 ++ { u32 x90 = ((u32)x88 & 0x1ffffff);
14917 ++ { u64 x91 = (x89 + x79);
14918 ++ { u64 x92 = (x91 >> 0x1a);
14919 ++ { u32 x93 = ((u32)x91 & 0x3ffffff);
14920 ++ { u64 x94 = (x92 + x76);
14921 ++ { u64 x95 = (x94 >> 0x19);
14922 ++ { u32 x96 = ((u32)x94 & 0x1ffffff);
14923 ++ { u64 x97 = (x95 + x73);
14924 ++ { u64 x98 = (x97 >> 0x1a);
14925 ++ { u32 x99 = ((u32)x97 & 0x3ffffff);
14926 ++ { u64 x100 = (x98 + x70);
14927 ++ { u64 x101 = (x100 >> 0x19);
14928 ++ { u32 x102 = ((u32)x100 & 0x1ffffff);
14929 ++ { u64 x103 = (x101 + x67);
14930 ++ { u64 x104 = (x103 >> 0x1a);
14931 ++ { u32 x105 = ((u32)x103 & 0x3ffffff);
14932 ++ { u64 x106 = (x104 + x64);
14933 ++ { u64 x107 = (x106 >> 0x19);
14934 ++ { u32 x108 = ((u32)x106 & 0x1ffffff);
14935 ++ { u64 x109 = (x107 + x61);
14936 ++ { u64 x110 = (x109 >> 0x1a);
14937 ++ { u32 x111 = ((u32)x109 & 0x3ffffff);
14938 ++ { u64 x112 = (x110 + x49);
14939 ++ { u64 x113 = (x112 >> 0x19);
14940 ++ { u32 x114 = ((u32)x112 & 0x1ffffff);
14941 ++ { u64 x115 = (x87 + (0x13 * x113));
14942 ++ { u32 x116 = (u32) (x115 >> 0x1a);
14943 ++ { u32 x117 = ((u32)x115 & 0x3ffffff);
14944 ++ { u32 x118 = (x116 + x90);
14945 ++ { u32 x119 = (x118 >> 0x19);
14946 ++ { u32 x120 = (x118 & 0x1ffffff);
14947 ++ out[0] = x117;
14948 ++ out[1] = x120;
14949 ++ out[2] = (x119 + x93);
14950 ++ out[3] = x96;
14951 ++ out[4] = x99;
14952 ++ out[5] = x102;
14953 ++ out[6] = x105;
14954 ++ out[7] = x108;
14955 ++ out[8] = x111;
14956 ++ out[9] = x114;
14957 ++ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
14958 ++}
14959 ++
14960 ++static __always_inline void fe_mul_ttt(fe *h, const fe *f, const fe *g)
14961 ++{
14962 ++ fe_mul_impl(h->v, f->v, g->v);
14963 ++}
14964 ++
14965 ++static __always_inline void fe_mul_tlt(fe *h, const fe_loose *f, const fe *g)
14966 ++{
14967 ++ fe_mul_impl(h->v, f->v, g->v);
14968 ++}
14969 ++
14970 ++static __always_inline void
14971 ++fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g)
14972 ++{
14973 ++ fe_mul_impl(h->v, f->v, g->v);
14974 ++}
14975 ++
14976 ++static void fe_sqr_impl(u32 out[10], const u32 in1[10])
14977 ++{
14978 ++ { const u32 x17 = in1[9];
14979 ++ { const u32 x18 = in1[8];
14980 ++ { const u32 x16 = in1[7];
14981 ++ { const u32 x14 = in1[6];
14982 ++ { const u32 x12 = in1[5];
14983 ++ { const u32 x10 = in1[4];
14984 ++ { const u32 x8 = in1[3];
14985 ++ { const u32 x6 = in1[2];
14986 ++ { const u32 x4 = in1[1];
14987 ++ { const u32 x2 = in1[0];
14988 ++ { u64 x19 = ((u64)x2 * x2);
14989 ++ { u64 x20 = ((u64)(0x2 * x2) * x4);
14990 ++ { u64 x21 = (0x2 * (((u64)x4 * x4) + ((u64)x2 * x6)));
14991 ++ { u64 x22 = (0x2 * (((u64)x4 * x6) + ((u64)x2 * x8)));
14992 ++ { u64 x23 = ((((u64)x6 * x6) + ((u64)(0x4 * x4) * x8)) + ((u64)(0x2 * x2) * x10));
14993 ++ { u64 x24 = (0x2 * ((((u64)x6 * x8) + ((u64)x4 * x10)) + ((u64)x2 * x12)));
14994 ++ { u64 x25 = (0x2 * (((((u64)x8 * x8) + ((u64)x6 * x10)) + ((u64)x2 * x14)) + ((u64)(0x2 * x4) * x12)));
14995 ++ { u64 x26 = (0x2 * (((((u64)x8 * x10) + ((u64)x6 * x12)) + ((u64)x4 * x14)) + ((u64)x2 * x16)));
14996 ++ { u64 x27 = (((u64)x10 * x10) + (0x2 * ((((u64)x6 * x14) + ((u64)x2 * x18)) + (0x2 * (((u64)x4 * x16) + ((u64)x8 * x12))))));
14997 ++ { u64 x28 = (0x2 * ((((((u64)x10 * x12) + ((u64)x8 * x14)) + ((u64)x6 * x16)) + ((u64)x4 * x18)) + ((u64)x2 * x17)));
14998 ++ { u64 x29 = (0x2 * (((((u64)x12 * x12) + ((u64)x10 * x14)) + ((u64)x6 * x18)) + (0x2 * (((u64)x8 * x16) + ((u64)x4 * x17)))));
14999 ++ { u64 x30 = (0x2 * (((((u64)x12 * x14) + ((u64)x10 * x16)) + ((u64)x8 * x18)) + ((u64)x6 * x17)));
15000 ++ { u64 x31 = (((u64)x14 * x14) + (0x2 * (((u64)x10 * x18) + (0x2 * (((u64)x12 * x16) + ((u64)x8 * x17))))));
15001 ++ { u64 x32 = (0x2 * ((((u64)x14 * x16) + ((u64)x12 * x18)) + ((u64)x10 * x17)));
15002 ++ { u64 x33 = (0x2 * ((((u64)x16 * x16) + ((u64)x14 * x18)) + ((u64)(0x2 * x12) * x17)));
15003 ++ { u64 x34 = (0x2 * (((u64)x16 * x18) + ((u64)x14 * x17)));
15004 ++ { u64 x35 = (((u64)x18 * x18) + ((u64)(0x4 * x16) * x17));
15005 ++ { u64 x36 = ((u64)(0x2 * x18) * x17);
15006 ++ { u64 x37 = ((u64)(0x2 * x17) * x17);
15007 ++ { u64 x38 = (x27 + (x37 << 0x4));
15008 ++ { u64 x39 = (x38 + (x37 << 0x1));
15009 ++ { u64 x40 = (x39 + x37);
15010 ++ { u64 x41 = (x26 + (x36 << 0x4));
15011 ++ { u64 x42 = (x41 + (x36 << 0x1));
15012 ++ { u64 x43 = (x42 + x36);
15013 ++ { u64 x44 = (x25 + (x35 << 0x4));
15014 ++ { u64 x45 = (x44 + (x35 << 0x1));
15015 ++ { u64 x46 = (x45 + x35);
15016 ++ { u64 x47 = (x24 + (x34 << 0x4));
15017 ++ { u64 x48 = (x47 + (x34 << 0x1));
15018 ++ { u64 x49 = (x48 + x34);
15019 ++ { u64 x50 = (x23 + (x33 << 0x4));
15020 ++ { u64 x51 = (x50 + (x33 << 0x1));
15021 ++ { u64 x52 = (x51 + x33);
15022 ++ { u64 x53 = (x22 + (x32 << 0x4));
15023 ++ { u64 x54 = (x53 + (x32 << 0x1));
15024 ++ { u64 x55 = (x54 + x32);
15025 ++ { u64 x56 = (x21 + (x31 << 0x4));
15026 ++ { u64 x57 = (x56 + (x31 << 0x1));
15027 ++ { u64 x58 = (x57 + x31);
15028 ++ { u64 x59 = (x20 + (x30 << 0x4));
15029 ++ { u64 x60 = (x59 + (x30 << 0x1));
15030 ++ { u64 x61 = (x60 + x30);
15031 ++ { u64 x62 = (x19 + (x29 << 0x4));
15032 ++ { u64 x63 = (x62 + (x29 << 0x1));
15033 ++ { u64 x64 = (x63 + x29);
15034 ++ { u64 x65 = (x64 >> 0x1a);
15035 ++ { u32 x66 = ((u32)x64 & 0x3ffffff);
15036 ++ { u64 x67 = (x65 + x61);
15037 ++ { u64 x68 = (x67 >> 0x19);
15038 ++ { u32 x69 = ((u32)x67 & 0x1ffffff);
15039 ++ { u64 x70 = (x68 + x58);
15040 ++ { u64 x71 = (x70 >> 0x1a);
15041 ++ { u32 x72 = ((u32)x70 & 0x3ffffff);
15042 ++ { u64 x73 = (x71 + x55);
15043 ++ { u64 x74 = (x73 >> 0x19);
15044 ++ { u32 x75 = ((u32)x73 & 0x1ffffff);
15045 ++ { u64 x76 = (x74 + x52);
15046 ++ { u64 x77 = (x76 >> 0x1a);
15047 ++ { u32 x78 = ((u32)x76 & 0x3ffffff);
15048 ++ { u64 x79 = (x77 + x49);
15049 ++ { u64 x80 = (x79 >> 0x19);
15050 ++ { u32 x81 = ((u32)x79 & 0x1ffffff);
15051 ++ { u64 x82 = (x80 + x46);
15052 ++ { u64 x83 = (x82 >> 0x1a);
15053 ++ { u32 x84 = ((u32)x82 & 0x3ffffff);
15054 ++ { u64 x85 = (x83 + x43);
15055 ++ { u64 x86 = (x85 >> 0x19);
15056 ++ { u32 x87 = ((u32)x85 & 0x1ffffff);
15057 ++ { u64 x88 = (x86 + x40);
15058 ++ { u64 x89 = (x88 >> 0x1a);
15059 ++ { u32 x90 = ((u32)x88 & 0x3ffffff);
15060 ++ { u64 x91 = (x89 + x28);
15061 ++ { u64 x92 = (x91 >> 0x19);
15062 ++ { u32 x93 = ((u32)x91 & 0x1ffffff);
15063 ++ { u64 x94 = (x66 + (0x13 * x92));
15064 ++ { u32 x95 = (u32) (x94 >> 0x1a);
15065 ++ { u32 x96 = ((u32)x94 & 0x3ffffff);
15066 ++ { u32 x97 = (x95 + x69);
15067 ++ { u32 x98 = (x97 >> 0x19);
15068 ++ { u32 x99 = (x97 & 0x1ffffff);
15069 ++ out[0] = x96;
15070 ++ out[1] = x99;
15071 ++ out[2] = (x98 + x72);
15072 ++ out[3] = x75;
15073 ++ out[4] = x78;
15074 ++ out[5] = x81;
15075 ++ out[6] = x84;
15076 ++ out[7] = x87;
15077 ++ out[8] = x90;
15078 ++ out[9] = x93;
15079 ++ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
15080 ++}
15081 ++
15082 ++static __always_inline void fe_sq_tl(fe *h, const fe_loose *f)
15083 ++{
15084 ++ fe_sqr_impl(h->v, f->v);
15085 ++}
15086 ++
15087 ++static __always_inline void fe_sq_tt(fe *h, const fe *f)
15088 ++{
15089 ++ fe_sqr_impl(h->v, f->v);
15090 ++}
15091 ++
15092 ++static __always_inline void fe_loose_invert(fe *out, const fe_loose *z)
15093 ++{
15094 ++ fe t0;
15095 ++ fe t1;
15096 ++ fe t2;
15097 ++ fe t3;
15098 ++ int i;
15099 ++
15100 ++ fe_sq_tl(&t0, z);
15101 ++ fe_sq_tt(&t1, &t0);
15102 ++ for (i = 1; i < 2; ++i)
15103 ++ fe_sq_tt(&t1, &t1);
15104 ++ fe_mul_tlt(&t1, z, &t1);
15105 ++ fe_mul_ttt(&t0, &t0, &t1);
15106 ++ fe_sq_tt(&t2, &t0);
15107 ++ fe_mul_ttt(&t1, &t1, &t2);
15108 ++ fe_sq_tt(&t2, &t1);
15109 ++ for (i = 1; i < 5; ++i)
15110 ++ fe_sq_tt(&t2, &t2);
15111 ++ fe_mul_ttt(&t1, &t2, &t1);
15112 ++ fe_sq_tt(&t2, &t1);
15113 ++ for (i = 1; i < 10; ++i)
15114 ++ fe_sq_tt(&t2, &t2);
15115 ++ fe_mul_ttt(&t2, &t2, &t1);
15116 ++ fe_sq_tt(&t3, &t2);
15117 ++ for (i = 1; i < 20; ++i)
15118 ++ fe_sq_tt(&t3, &t3);
15119 ++ fe_mul_ttt(&t2, &t3, &t2);
15120 ++ fe_sq_tt(&t2, &t2);
15121 ++ for (i = 1; i < 10; ++i)
15122 ++ fe_sq_tt(&t2, &t2);
15123 ++ fe_mul_ttt(&t1, &t2, &t1);
15124 ++ fe_sq_tt(&t2, &t1);
15125 ++ for (i = 1; i < 50; ++i)
15126 ++ fe_sq_tt(&t2, &t2);
15127 ++ fe_mul_ttt(&t2, &t2, &t1);
15128 ++ fe_sq_tt(&t3, &t2);
15129 ++ for (i = 1; i < 100; ++i)
15130 ++ fe_sq_tt(&t3, &t3);
15131 ++ fe_mul_ttt(&t2, &t3, &t2);
15132 ++ fe_sq_tt(&t2, &t2);
15133 ++ for (i = 1; i < 50; ++i)
15134 ++ fe_sq_tt(&t2, &t2);
15135 ++ fe_mul_ttt(&t1, &t2, &t1);
15136 ++ fe_sq_tt(&t1, &t1);
15137 ++ for (i = 1; i < 5; ++i)
15138 ++ fe_sq_tt(&t1, &t1);
15139 ++ fe_mul_ttt(out, &t1, &t0);
15140 ++}
15141 ++
15142 ++static __always_inline void fe_invert(fe *out, const fe *z)
15143 ++{
15144 ++ fe_loose l;
15145 ++ fe_copy_lt(&l, z);
15146 ++ fe_loose_invert(out, &l);
15147 ++}
15148 ++
15149 ++/* Replace (f,g) with (g,f) if b == 1;
15150 ++ * replace (f,g) with (f,g) if b == 0.
15151 ++ *
15152 ++ * Preconditions: b in {0,1}
15153 ++ */
15154 ++static __always_inline void fe_cswap(fe *f, fe *g, unsigned int b)
15155 ++{
15156 ++ unsigned i;
15157 ++ b = 0 - b;
15158 ++ for (i = 0; i < 10; i++) {
15159 ++ u32 x = f->v[i] ^ g->v[i];
15160 ++ x &= b;
15161 ++ f->v[i] ^= x;
15162 ++ g->v[i] ^= x;
15163 ++ }
15164 ++}
15165 ++
15166 ++/* NOTE: based on fiat-crypto fe_mul, edited for in2=121666, 0, 0.*/
15167 ++static __always_inline void fe_mul_121666_impl(u32 out[10], const u32 in1[10])
15168 ++{
15169 ++ { const u32 x20 = in1[9];
15170 ++ { const u32 x21 = in1[8];
15171 ++ { const u32 x19 = in1[7];
15172 ++ { const u32 x17 = in1[6];
15173 ++ { const u32 x15 = in1[5];
15174 ++ { const u32 x13 = in1[4];
15175 ++ { const u32 x11 = in1[3];
15176 ++ { const u32 x9 = in1[2];
15177 ++ { const u32 x7 = in1[1];
15178 ++ { const u32 x5 = in1[0];
15179 ++ { const u32 x38 = 0;
15180 ++ { const u32 x39 = 0;
15181 ++ { const u32 x37 = 0;
15182 ++ { const u32 x35 = 0;
15183 ++ { const u32 x33 = 0;
15184 ++ { const u32 x31 = 0;
15185 ++ { const u32 x29 = 0;
15186 ++ { const u32 x27 = 0;
15187 ++ { const u32 x25 = 0;
15188 ++ { const u32 x23 = 121666;
15189 ++ { u64 x40 = ((u64)x23 * x5);
15190 ++ { u64 x41 = (((u64)x23 * x7) + ((u64)x25 * x5));
15191 ++ { u64 x42 = ((((u64)(0x2 * x25) * x7) + ((u64)x23 * x9)) + ((u64)x27 * x5));
15192 ++ { u64 x43 = (((((u64)x25 * x9) + ((u64)x27 * x7)) + ((u64)x23 * x11)) + ((u64)x29 * x5));
15193 ++ { u64 x44 = (((((u64)x27 * x9) + (0x2 * (((u64)x25 * x11) + ((u64)x29 * x7)))) + ((u64)x23 * x13)) + ((u64)x31 * x5));
15194 ++ { u64 x45 = (((((((u64)x27 * x11) + ((u64)x29 * x9)) + ((u64)x25 * x13)) + ((u64)x31 * x7)) + ((u64)x23 * x15)) + ((u64)x33 * x5));
15195 ++ { u64 x46 = (((((0x2 * ((((u64)x29 * x11) + ((u64)x25 * x15)) + ((u64)x33 * x7))) + ((u64)x27 * x13)) + ((u64)x31 * x9)) + ((u64)x23 * x17)) + ((u64)x35 * x5));
15196 ++ { u64 x47 = (((((((((u64)x29 * x13) + ((u64)x31 * x11)) + ((u64)x27 * x15)) + ((u64)x33 * x9)) + ((u64)x25 * x17)) + ((u64)x35 * x7)) + ((u64)x23 * x19)) + ((u64)x37 * x5));
15197 ++ { u64 x48 = (((((((u64)x31 * x13) + (0x2 * (((((u64)x29 * x15) + ((u64)x33 * x11)) + ((u64)x25 * x19)) + ((u64)x37 * x7)))) + ((u64)x27 * x17)) + ((u64)x35 * x9)) + ((u64)x23 * x21)) + ((u64)x39 * x5));
15198 ++ { u64 x49 = (((((((((((u64)x31 * x15) + ((u64)x33 * x13)) + ((u64)x29 * x17)) + ((u64)x35 * x11)) + ((u64)x27 * x19)) + ((u64)x37 * x9)) + ((u64)x25 * x21)) + ((u64)x39 * x7)) + ((u64)x23 * x20)) + ((u64)x38 * x5));
15199 ++ { u64 x50 = (((((0x2 * ((((((u64)x33 * x15) + ((u64)x29 * x19)) + ((u64)x37 * x11)) + ((u64)x25 * x20)) + ((u64)x38 * x7))) + ((u64)x31 * x17)) + ((u64)x35 * x13)) + ((u64)x27 * x21)) + ((u64)x39 * x9));
15200 ++ { u64 x51 = (((((((((u64)x33 * x17) + ((u64)x35 * x15)) + ((u64)x31 * x19)) + ((u64)x37 * x13)) + ((u64)x29 * x21)) + ((u64)x39 * x11)) + ((u64)x27 * x20)) + ((u64)x38 * x9));
15201 ++ { u64 x52 = (((((u64)x35 * x17) + (0x2 * (((((u64)x33 * x19) + ((u64)x37 * x15)) + ((u64)x29 * x20)) + ((u64)x38 * x11)))) + ((u64)x31 * x21)) + ((u64)x39 * x13));
15202 ++ { u64 x53 = (((((((u64)x35 * x19) + ((u64)x37 * x17)) + ((u64)x33 * x21)) + ((u64)x39 * x15)) + ((u64)x31 * x20)) + ((u64)x38 * x13));
15203 ++ { u64 x54 = (((0x2 * ((((u64)x37 * x19) + ((u64)x33 * x20)) + ((u64)x38 * x15))) + ((u64)x35 * x21)) + ((u64)x39 * x17));
15204 ++ { u64 x55 = (((((u64)x37 * x21) + ((u64)x39 * x19)) + ((u64)x35 * x20)) + ((u64)x38 * x17));
15205 ++ { u64 x56 = (((u64)x39 * x21) + (0x2 * (((u64)x37 * x20) + ((u64)x38 * x19))));
15206 ++ { u64 x57 = (((u64)x39 * x20) + ((u64)x38 * x21));
15207 ++ { u64 x58 = ((u64)(0x2 * x38) * x20);
15208 ++ { u64 x59 = (x48 + (x58 << 0x4));
15209 ++ { u64 x60 = (x59 + (x58 << 0x1));
15210 ++ { u64 x61 = (x60 + x58);
15211 ++ { u64 x62 = (x47 + (x57 << 0x4));
15212 ++ { u64 x63 = (x62 + (x57 << 0x1));
15213 ++ { u64 x64 = (x63 + x57);
15214 ++ { u64 x65 = (x46 + (x56 << 0x4));
15215 ++ { u64 x66 = (x65 + (x56 << 0x1));
15216 ++ { u64 x67 = (x66 + x56);
15217 ++ { u64 x68 = (x45 + (x55 << 0x4));
15218 ++ { u64 x69 = (x68 + (x55 << 0x1));
15219 ++ { u64 x70 = (x69 + x55);
15220 ++ { u64 x71 = (x44 + (x54 << 0x4));
15221 ++ { u64 x72 = (x71 + (x54 << 0x1));
15222 ++ { u64 x73 = (x72 + x54);
15223 ++ { u64 x74 = (x43 + (x53 << 0x4));
15224 ++ { u64 x75 = (x74 + (x53 << 0x1));
15225 ++ { u64 x76 = (x75 + x53);
15226 ++ { u64 x77 = (x42 + (x52 << 0x4));
15227 ++ { u64 x78 = (x77 + (x52 << 0x1));
15228 ++ { u64 x79 = (x78 + x52);
15229 ++ { u64 x80 = (x41 + (x51 << 0x4));
15230 ++ { u64 x81 = (x80 + (x51 << 0x1));
15231 ++ { u64 x82 = (x81 + x51);
15232 ++ { u64 x83 = (x40 + (x50 << 0x4));
15233 ++ { u64 x84 = (x83 + (x50 << 0x1));
15234 ++ { u64 x85 = (x84 + x50);
15235 ++ { u64 x86 = (x85 >> 0x1a);
15236 ++ { u32 x87 = ((u32)x85 & 0x3ffffff);
15237 ++ { u64 x88 = (x86 + x82);
15238 ++ { u64 x89 = (x88 >> 0x19);
15239 ++ { u32 x90 = ((u32)x88 & 0x1ffffff);
15240 ++ { u64 x91 = (x89 + x79);
15241 ++ { u64 x92 = (x91 >> 0x1a);
15242 ++ { u32 x93 = ((u32)x91 & 0x3ffffff);
15243 ++ { u64 x94 = (x92 + x76);
15244 ++ { u64 x95 = (x94 >> 0x19);
15245 ++ { u32 x96 = ((u32)x94 & 0x1ffffff);
15246 ++ { u64 x97 = (x95 + x73);
15247 ++ { u64 x98 = (x97 >> 0x1a);
15248 ++ { u32 x99 = ((u32)x97 & 0x3ffffff);
15249 ++ { u64 x100 = (x98 + x70);
15250 ++ { u64 x101 = (x100 >> 0x19);
15251 ++ { u32 x102 = ((u32)x100 & 0x1ffffff);
15252 ++ { u64 x103 = (x101 + x67);
15253 ++ { u64 x104 = (x103 >> 0x1a);
15254 ++ { u32 x105 = ((u32)x103 & 0x3ffffff);
15255 ++ { u64 x106 = (x104 + x64);
15256 ++ { u64 x107 = (x106 >> 0x19);
15257 ++ { u32 x108 = ((u32)x106 & 0x1ffffff);
15258 ++ { u64 x109 = (x107 + x61);
15259 ++ { u64 x110 = (x109 >> 0x1a);
15260 ++ { u32 x111 = ((u32)x109 & 0x3ffffff);
15261 ++ { u64 x112 = (x110 + x49);
15262 ++ { u64 x113 = (x112 >> 0x19);
15263 ++ { u32 x114 = ((u32)x112 & 0x1ffffff);
15264 ++ { u64 x115 = (x87 + (0x13 * x113));
15265 ++ { u32 x116 = (u32) (x115 >> 0x1a);
15266 ++ { u32 x117 = ((u32)x115 & 0x3ffffff);
15267 ++ { u32 x118 = (x116 + x90);
15268 ++ { u32 x119 = (x118 >> 0x19);
15269 ++ { u32 x120 = (x118 & 0x1ffffff);
15270 ++ out[0] = x117;
15271 ++ out[1] = x120;
15272 ++ out[2] = (x119 + x93);
15273 ++ out[3] = x96;
15274 ++ out[4] = x99;
15275 ++ out[5] = x102;
15276 ++ out[6] = x105;
15277 ++ out[7] = x108;
15278 ++ out[8] = x111;
15279 ++ out[9] = x114;
15280 ++ }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
15281 ++}
15282 ++
15283 ++static __always_inline void fe_mul121666(fe *h, const fe_loose *f)
15284 ++{
15285 ++ fe_mul_121666_impl(h->v, f->v);
15286 ++}
15287 ++
15288 ++void curve25519_generic(u8 out[CURVE25519_KEY_SIZE],
15289 ++ const u8 scalar[CURVE25519_KEY_SIZE],
15290 ++ const u8 point[CURVE25519_KEY_SIZE])
15291 ++{
15292 ++ fe x1, x2, z2, x3, z3;
15293 ++ fe_loose x2l, z2l, x3l;
15294 ++ unsigned swap = 0;
15295 ++ int pos;
15296 ++ u8 e[32];
15297 ++
15298 ++ memcpy(e, scalar, 32);
15299 ++ curve25519_clamp_secret(e);
15300 ++
15301 ++ /* The following implementation was transcribed to Coq and proven to
15302 ++ * correspond to unary scalar multiplication in affine coordinates given
15303 ++ * that x1 != 0 is the x coordinate of some point on the curve. It was
15304 ++ * also checked in Coq that doing a ladderstep with x1 = x3 = 0 gives
15305 ++ * z2' = z3' = 0, and z2 = z3 = 0 gives z2' = z3' = 0. The statement was
15306 ++ * quantified over the underlying field, so it applies to Curve25519
15307 ++ * itself and the quadratic twist of Curve25519. It was not proven in
15308 ++ * Coq that prime-field arithmetic correctly simulates extension-field
15309 ++ * arithmetic on prime-field values. The decoding of the byte array
15310 ++ * representation of e was not considered.
15311 ++ *
15312 ++ * Specification of Montgomery curves in affine coordinates:
15313 ++ * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Spec/MontgomeryCurve.v#L27>
15314 ++ *
15315 ++ * Proof that these form a group that is isomorphic to a Weierstrass
15316 ++ * curve:
15317 ++ * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/AffineProofs.v#L35>
15318 ++ *
15319 ++ * Coq transcription and correctness proof of the loop
15320 ++ * (where scalarbits=255):
15321 ++ * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L118>
15322 ++ * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L278>
15323 ++ * preconditions: 0 <= e < 2^255 (not necessarily e < order),
15324 ++ * fe_invert(0) = 0
15325 ++ */
15326 ++ fe_frombytes(&x1, point);
15327 ++ fe_1(&x2);
15328 ++ fe_0(&z2);
15329 ++ fe_copy(&x3, &x1);
15330 ++ fe_1(&z3);
15331 ++
15332 ++ for (pos = 254; pos >= 0; --pos) {
15333 ++ fe tmp0, tmp1;
15334 ++ fe_loose tmp0l, tmp1l;
15335 ++ /* loop invariant as of right before the test, for the case
15336 ++ * where x1 != 0:
15337 ++ * pos >= -1; if z2 = 0 then x2 is nonzero; if z3 = 0 then x3
15338 ++ * is nonzero
15339 ++ * let r := e >> (pos+1) in the following equalities of
15340 ++ * projective points:
15341 ++ * to_xz (r*P) === if swap then (x3, z3) else (x2, z2)
15342 ++ * to_xz ((r+1)*P) === if swap then (x2, z2) else (x3, z3)
15343 ++ * x1 is the nonzero x coordinate of the nonzero
15344 ++ * point (r*P-(r+1)*P)
15345 ++ */
15346 ++ unsigned b = 1 & (e[pos / 8] >> (pos & 7));
15347 ++ swap ^= b;
15348 ++ fe_cswap(&x2, &x3, swap);
15349 ++ fe_cswap(&z2, &z3, swap);
15350 ++ swap = b;
15351 ++ /* Coq transcription of ladderstep formula (called from
15352 ++ * transcribed loop):
15353 ++ * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L89>
15354 ++ * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L131>
15355 ++ * x1 != 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L217>
15356 ++ * x1 = 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L147>
15357 ++ */
15358 ++ fe_sub(&tmp0l, &x3, &z3);
15359 ++ fe_sub(&tmp1l, &x2, &z2);
15360 ++ fe_add(&x2l, &x2, &z2);
15361 ++ fe_add(&z2l, &x3, &z3);
15362 ++ fe_mul_tll(&z3, &tmp0l, &x2l);
15363 ++ fe_mul_tll(&z2, &z2l, &tmp1l);
15364 ++ fe_sq_tl(&tmp0, &tmp1l);
15365 ++ fe_sq_tl(&tmp1, &x2l);
15366 ++ fe_add(&x3l, &z3, &z2);
15367 ++ fe_sub(&z2l, &z3, &z2);
15368 ++ fe_mul_ttt(&x2, &tmp1, &tmp0);
15369 ++ fe_sub(&tmp1l, &tmp1, &tmp0);
15370 ++ fe_sq_tl(&z2, &z2l);
15371 ++ fe_mul121666(&z3, &tmp1l);
15372 ++ fe_sq_tl(&x3, &x3l);
15373 ++ fe_add(&tmp0l, &tmp0, &z3);
15374 ++ fe_mul_ttt(&z3, &x1, &z2);
15375 ++ fe_mul_tll(&z2, &tmp1l, &tmp0l);
15376 ++ }
15377 ++ /* here pos=-1, so r=e, so to_xz (e*P) === if swap then (x3, z3)
15378 ++ * else (x2, z2)
15379 ++ */
15380 ++ fe_cswap(&x2, &x3, swap);
15381 ++ fe_cswap(&z2, &z3, swap);
15382 ++
15383 ++ fe_invert(&z2, &z2);
15384 ++ fe_mul_ttt(&x2, &x2, &z2);
15385 ++ fe_tobytes(out, &x2);
15386 ++
15387 ++ memzero_explicit(&x1, sizeof(x1));
15388 ++ memzero_explicit(&x2, sizeof(x2));
15389 ++ memzero_explicit(&z2, sizeof(z2));
15390 ++ memzero_explicit(&x3, sizeof(x3));
15391 ++ memzero_explicit(&z3, sizeof(z3));
15392 ++ memzero_explicit(&x2l, sizeof(x2l));
15393 ++ memzero_explicit(&z2l, sizeof(z2l));
15394 ++ memzero_explicit(&x3l, sizeof(x3l));
15395 ++ memzero_explicit(&e, sizeof(e));
15396 ++}
15397 +diff --git a/lib/crypto/curve25519-hacl64.c b/lib/crypto/curve25519-hacl64.c
15398 +new file mode 100644
15399 +index 000000000000..771d82dc5f14
15400 +--- /dev/null
15401 ++++ b/lib/crypto/curve25519-hacl64.c
15402 +@@ -0,0 +1,788 @@
15403 ++// SPDX-License-Identifier: GPL-2.0 OR MIT
15404 ++/*
15405 ++ * Copyright (C) 2016-2017 INRIA and Microsoft Corporation.
15406 ++ * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
15407 ++ *
15408 ++ * This is a machine-generated formally verified implementation of Curve25519
15409 ++ * ECDH from: <https://github.com/mitls/hacl-star>. Though originally machine
15410 ++ * generated, it has been tweaked to be suitable for use in the kernel. It is
15411 ++ * optimized for 64-bit machines that can efficiently work with 128-bit
15412 ++ * integer types.
15413 ++ */
15414 ++
15415 ++#include <asm/unaligned.h>
15416 ++#include <crypto/curve25519.h>
15417 ++#include <linux/string.h>
15418 ++
15419 ++typedef __uint128_t u128;
15420 ++
15421 ++static __always_inline u64 u64_eq_mask(u64 a, u64 b)
15422 ++{
15423 ++ u64 x = a ^ b;
15424 ++ u64 minus_x = ~x + (u64)1U;
15425 ++ u64 x_or_minus_x = x | minus_x;
15426 ++ u64 xnx = x_or_minus_x >> (u32)63U;
15427 ++ u64 c = xnx - (u64)1U;
15428 ++ return c;
15429 ++}
15430 ++
15431 ++static __always_inline u64 u64_gte_mask(u64 a, u64 b)
15432 ++{
15433 ++ u64 x = a;
15434 ++ u64 y = b;
15435 ++ u64 x_xor_y = x ^ y;
15436 ++ u64 x_sub_y = x - y;
15437 ++ u64 x_sub_y_xor_y = x_sub_y ^ y;
15438 ++ u64 q = x_xor_y | x_sub_y_xor_y;
15439 ++ u64 x_xor_q = x ^ q;
15440 ++ u64 x_xor_q_ = x_xor_q >> (u32)63U;
15441 ++ u64 c = x_xor_q_ - (u64)1U;
15442 ++ return c;
15443 ++}
15444 ++
15445 ++static __always_inline void modulo_carry_top(u64 *b)
15446 ++{
15447 ++ u64 b4 = b[4];
15448 ++ u64 b0 = b[0];
15449 ++ u64 b4_ = b4 & 0x7ffffffffffffLLU;
15450 ++ u64 b0_ = b0 + 19 * (b4 >> 51);
15451 ++ b[4] = b4_;
15452 ++ b[0] = b0_;
15453 ++}
15454 ++
15455 ++static __always_inline void fproduct_copy_from_wide_(u64 *output, u128 *input)
15456 ++{
15457 ++ {
15458 ++ u128 xi = input[0];
15459 ++ output[0] = ((u64)(xi));
15460 ++ }
15461 ++ {
15462 ++ u128 xi = input[1];
15463 ++ output[1] = ((u64)(xi));
15464 ++ }
15465 ++ {
15466 ++ u128 xi = input[2];
15467 ++ output[2] = ((u64)(xi));
15468 ++ }
15469 ++ {
15470 ++ u128 xi = input[3];
15471 ++ output[3] = ((u64)(xi));
15472 ++ }
15473 ++ {
15474 ++ u128 xi = input[4];
15475 ++ output[4] = ((u64)(xi));
15476 ++ }
15477 ++}
15478 ++
15479 ++static __always_inline void
15480 ++fproduct_sum_scalar_multiplication_(u128 *output, u64 *input, u64 s)
15481 ++{
15482 ++ output[0] += (u128)input[0] * s;
15483 ++ output[1] += (u128)input[1] * s;
15484 ++ output[2] += (u128)input[2] * s;
15485 ++ output[3] += (u128)input[3] * s;
15486 ++ output[4] += (u128)input[4] * s;
15487 ++}
15488 ++
15489 ++static __always_inline void fproduct_carry_wide_(u128 *tmp)
15490 ++{
15491 ++ {
15492 ++ u32 ctr = 0;
15493 ++ u128 tctr = tmp[ctr];
15494 ++ u128 tctrp1 = tmp[ctr + 1];
15495 ++ u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU;
15496 ++ u128 c = ((tctr) >> (51));
15497 ++ tmp[ctr] = ((u128)(r0));
15498 ++ tmp[ctr + 1] = ((tctrp1) + (c));
15499 ++ }
15500 ++ {
15501 ++ u32 ctr = 1;
15502 ++ u128 tctr = tmp[ctr];
15503 ++ u128 tctrp1 = tmp[ctr + 1];
15504 ++ u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU;
15505 ++ u128 c = ((tctr) >> (51));
15506 ++ tmp[ctr] = ((u128)(r0));
15507 ++ tmp[ctr + 1] = ((tctrp1) + (c));
15508 ++ }
15509 ++
15510 ++ {
15511 ++ u32 ctr = 2;
15512 ++ u128 tctr = tmp[ctr];
15513 ++ u128 tctrp1 = tmp[ctr + 1];
15514 ++ u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU;
15515 ++ u128 c = ((tctr) >> (51));
15516 ++ tmp[ctr] = ((u128)(r0));
15517 ++ tmp[ctr + 1] = ((tctrp1) + (c));
15518 ++ }
15519 ++ {
15520 ++ u32 ctr = 3;
15521 ++ u128 tctr = tmp[ctr];
15522 ++ u128 tctrp1 = tmp[ctr + 1];
15523 ++ u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU;
15524 ++ u128 c = ((tctr) >> (51));
15525 ++ tmp[ctr] = ((u128)(r0));
15526 ++ tmp[ctr + 1] = ((tctrp1) + (c));
15527 ++ }
15528 ++}
15529 ++
15530 ++static __always_inline void fmul_shift_reduce(u64 *output)
15531 ++{
15532 ++ u64 tmp = output[4];
15533 ++ u64 b0;
15534 ++ {
15535 ++ u32 ctr = 5 - 0 - 1;
15536 ++ u64 z = output[ctr - 1];
15537 ++ output[ctr] = z;
15538 ++ }
15539 ++ {
15540 ++ u32 ctr = 5 - 1 - 1;
15541 ++ u64 z = output[ctr - 1];
15542 ++ output[ctr] = z;
15543 ++ }
15544 ++ {
15545 ++ u32 ctr = 5 - 2 - 1;
15546 ++ u64 z = output[ctr - 1];
15547 ++ output[ctr] = z;
15548 ++ }
15549 ++ {
15550 ++ u32 ctr = 5 - 3 - 1;
15551 ++ u64 z = output[ctr - 1];
15552 ++ output[ctr] = z;
15553 ++ }
15554 ++ output[0] = tmp;
15555 ++ b0 = output[0];
15556 ++ output[0] = 19 * b0;
15557 ++}
15558 ++
15559 ++static __always_inline void fmul_mul_shift_reduce_(u128 *output, u64 *input,
15560 ++ u64 *input21)
15561 ++{
15562 ++ u32 i;
15563 ++ u64 input2i;
15564 ++ {
15565 ++ u64 input2i = input21[0];
15566 ++ fproduct_sum_scalar_multiplication_(output, input, input2i);
15567 ++ fmul_shift_reduce(input);
15568 ++ }
15569 ++ {
15570 ++ u64 input2i = input21[1];
15571 ++ fproduct_sum_scalar_multiplication_(output, input, input2i);
15572 ++ fmul_shift_reduce(input);
15573 ++ }
15574 ++ {
15575 ++ u64 input2i = input21[2];
15576 ++ fproduct_sum_scalar_multiplication_(output, input, input2i);
15577 ++ fmul_shift_reduce(input);
15578 ++ }
15579 ++ {
15580 ++ u64 input2i = input21[3];
15581 ++ fproduct_sum_scalar_multiplication_(output, input, input2i);
15582 ++ fmul_shift_reduce(input);
15583 ++ }
15584 ++ i = 4;
15585 ++ input2i = input21[i];
15586 ++ fproduct_sum_scalar_multiplication_(output, input, input2i);
15587 ++}
15588 ++
15589 ++static __always_inline void fmul_fmul(u64 *output, u64 *input, u64 *input21)
15590 ++{
15591 ++ u64 tmp[5] = { input[0], input[1], input[2], input[3], input[4] };
15592 ++ {
15593 ++ u128 b4;
15594 ++ u128 b0;
15595 ++ u128 b4_;
15596 ++ u128 b0_;
15597 ++ u64 i0;
15598 ++ u64 i1;
15599 ++ u64 i0_;
15600 ++ u64 i1_;
15601 ++ u128 t[5] = { 0 };
15602 ++ fmul_mul_shift_reduce_(t, tmp, input21);
15603 ++ fproduct_carry_wide_(t);
15604 ++ b4 = t[4];
15605 ++ b0 = t[0];
15606 ++ b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU))));
15607 ++ b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51))))))));
15608 ++ t[4] = b4_;
15609 ++ t[0] = b0_;
15610 ++ fproduct_copy_from_wide_(output, t);
15611 ++ i0 = output[0];
15612 ++ i1 = output[1];
15613 ++ i0_ = i0 & 0x7ffffffffffffLLU;
15614 ++ i1_ = i1 + (i0 >> 51);
15615 ++ output[0] = i0_;
15616 ++ output[1] = i1_;
15617 ++ }
15618 ++}
15619 ++
15620 ++static __always_inline void fsquare_fsquare__(u128 *tmp, u64 *output)
15621 ++{
15622 ++ u64 r0 = output[0];
15623 ++ u64 r1 = output[1];
15624 ++ u64 r2 = output[2];
15625 ++ u64 r3 = output[3];
15626 ++ u64 r4 = output[4];
15627 ++ u64 d0 = r0 * 2;
15628 ++ u64 d1 = r1 * 2;
15629 ++ u64 d2 = r2 * 2 * 19;
15630 ++ u64 d419 = r4 * 19;
15631 ++ u64 d4 = d419 * 2;
15632 ++ u128 s0 = ((((((u128)(r0) * (r0))) + (((u128)(d4) * (r1))))) +
15633 ++ (((u128)(d2) * (r3))));
15634 ++ u128 s1 = ((((((u128)(d0) * (r1))) + (((u128)(d4) * (r2))))) +
15635 ++ (((u128)(r3 * 19) * (r3))));
15636 ++ u128 s2 = ((((((u128)(d0) * (r2))) + (((u128)(r1) * (r1))))) +
15637 ++ (((u128)(d4) * (r3))));
15638 ++ u128 s3 = ((((((u128)(d0) * (r3))) + (((u128)(d1) * (r2))))) +
15639 ++ (((u128)(r4) * (d419))));
15640 ++ u128 s4 = ((((((u128)(d0) * (r4))) + (((u128)(d1) * (r3))))) +
15641 ++ (((u128)(r2) * (r2))));
15642 ++ tmp[0] = s0;
15643 ++ tmp[1] = s1;
15644 ++ tmp[2] = s2;
15645 ++ tmp[3] = s3;
15646 ++ tmp[4] = s4;
15647 ++}
15648 ++
15649 ++static __always_inline void fsquare_fsquare_(u128 *tmp, u64 *output)
15650 ++{
15651 ++ u128 b4;
15652 ++ u128 b0;
15653 ++ u128 b4_;
15654 ++ u128 b0_;
15655 ++ u64 i0;
15656 ++ u64 i1;
15657 ++ u64 i0_;
15658 ++ u64 i1_;
15659 ++ fsquare_fsquare__(tmp, output);
15660 ++ fproduct_carry_wide_(tmp);
15661 ++ b4 = tmp[4];
15662 ++ b0 = tmp[0];
15663 ++ b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU))));
15664 ++ b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51))))))));
15665 ++ tmp[4] = b4_;
15666 ++ tmp[0] = b0_;
15667 ++ fproduct_copy_from_wide_(output, tmp);
15668 ++ i0 = output[0];
15669 ++ i1 = output[1];
15670 ++ i0_ = i0 & 0x7ffffffffffffLLU;
15671 ++ i1_ = i1 + (i0 >> 51);
15672 ++ output[0] = i0_;
15673 ++ output[1] = i1_;
15674 ++}
15675 ++
15676 ++static __always_inline void fsquare_fsquare_times_(u64 *output, u128 *tmp,
15677 ++ u32 count1)
15678 ++{
15679 ++ u32 i;
15680 ++ fsquare_fsquare_(tmp, output);
15681 ++ for (i = 1; i < count1; ++i)
15682 ++ fsquare_fsquare_(tmp, output);
15683 ++}
15684 ++
15685 ++static __always_inline void fsquare_fsquare_times(u64 *output, u64 *input,
15686 ++ u32 count1)
15687 ++{
15688 ++ u128 t[5];
15689 ++ memcpy(output, input, 5 * sizeof(*input));
15690 ++ fsquare_fsquare_times_(output, t, count1);
15691 ++}
15692 ++
15693 ++static __always_inline void fsquare_fsquare_times_inplace(u64 *output,
15694 ++ u32 count1)
15695 ++{
15696 ++ u128 t[5];
15697 ++ fsquare_fsquare_times_(output, t, count1);
15698 ++}
15699 ++
15700 ++static __always_inline void crecip_crecip(u64 *out, u64 *z)
15701 ++{
15702 ++ u64 buf[20] = { 0 };
15703 ++ u64 *a0 = buf;
15704 ++ u64 *t00 = buf + 5;
15705 ++ u64 *b0 = buf + 10;
15706 ++ u64 *t01;
15707 ++ u64 *b1;
15708 ++ u64 *c0;
15709 ++ u64 *a;
15710 ++ u64 *t0;
15711 ++ u64 *b;
15712 ++ u64 *c;
15713 ++ fsquare_fsquare_times(a0, z, 1);
15714 ++ fsquare_fsquare_times(t00, a0, 2);
15715 ++ fmul_fmul(b0, t00, z);
15716 ++ fmul_fmul(a0, b0, a0);
15717 ++ fsquare_fsquare_times(t00, a0, 1);
15718 ++ fmul_fmul(b0, t00, b0);
15719 ++ fsquare_fsquare_times(t00, b0, 5);
15720 ++ t01 = buf + 5;
15721 ++ b1 = buf + 10;
15722 ++ c0 = buf + 15;
15723 ++ fmul_fmul(b1, t01, b1);
15724 ++ fsquare_fsquare_times(t01, b1, 10);
15725 ++ fmul_fmul(c0, t01, b1);
15726 ++ fsquare_fsquare_times(t01, c0, 20);
15727 ++ fmul_fmul(t01, t01, c0);
15728 ++ fsquare_fsquare_times_inplace(t01, 10);
15729 ++ fmul_fmul(b1, t01, b1);
15730 ++ fsquare_fsquare_times(t01, b1, 50);
15731 ++ a = buf;
15732 ++ t0 = buf + 5;
15733 ++ b = buf + 10;
15734 ++ c = buf + 15;
15735 ++ fmul_fmul(c, t0, b);
15736 ++ fsquare_fsquare_times(t0, c, 100);
15737 ++ fmul_fmul(t0, t0, c);
15738 ++ fsquare_fsquare_times_inplace(t0, 50);
15739 ++ fmul_fmul(t0, t0, b);
15740 ++ fsquare_fsquare_times_inplace(t0, 5);
15741 ++ fmul_fmul(out, t0, a);
15742 ++}
15743 ++
15744 ++static __always_inline void fsum(u64 *a, u64 *b)
15745 ++{
15746 ++ a[0] += b[0];
15747 ++ a[1] += b[1];
15748 ++ a[2] += b[2];
15749 ++ a[3] += b[3];
15750 ++ a[4] += b[4];
15751 ++}
15752 ++
15753 ++static __always_inline void fdifference(u64 *a, u64 *b)
15754 ++{
15755 ++ u64 tmp[5] = { 0 };
15756 ++ u64 b0;
15757 ++ u64 b1;
15758 ++ u64 b2;
15759 ++ u64 b3;
15760 ++ u64 b4;
15761 ++ memcpy(tmp, b, 5 * sizeof(*b));
15762 ++ b0 = tmp[0];
15763 ++ b1 = tmp[1];
15764 ++ b2 = tmp[2];
15765 ++ b3 = tmp[3];
15766 ++ b4 = tmp[4];
15767 ++ tmp[0] = b0 + 0x3fffffffffff68LLU;
15768 ++ tmp[1] = b1 + 0x3ffffffffffff8LLU;
15769 ++ tmp[2] = b2 + 0x3ffffffffffff8LLU;
15770 ++ tmp[3] = b3 + 0x3ffffffffffff8LLU;
15771 ++ tmp[4] = b4 + 0x3ffffffffffff8LLU;
15772 ++ {
15773 ++ u64 xi = a[0];
15774 ++ u64 yi = tmp[0];
15775 ++ a[0] = yi - xi;
15776 ++ }
15777 ++ {
15778 ++ u64 xi = a[1];
15779 ++ u64 yi = tmp[1];
15780 ++ a[1] = yi - xi;
15781 ++ }
15782 ++ {
15783 ++ u64 xi = a[2];
15784 ++ u64 yi = tmp[2];
15785 ++ a[2] = yi - xi;
15786 ++ }
15787 ++ {
15788 ++ u64 xi = a[3];
15789 ++ u64 yi = tmp[3];
15790 ++ a[3] = yi - xi;
15791 ++ }
15792 ++ {
15793 ++ u64 xi = a[4];
15794 ++ u64 yi = tmp[4];
15795 ++ a[4] = yi - xi;
15796 ++ }
15797 ++}
15798 ++
15799 ++static __always_inline void fscalar(u64 *output, u64 *b, u64 s)
15800 ++{
15801 ++ u128 tmp[5];
15802 ++ u128 b4;
15803 ++ u128 b0;
15804 ++ u128 b4_;
15805 ++ u128 b0_;
15806 ++ {
15807 ++ u64 xi = b[0];
15808 ++ tmp[0] = ((u128)(xi) * (s));
15809 ++ }
15810 ++ {
15811 ++ u64 xi = b[1];
15812 ++ tmp[1] = ((u128)(xi) * (s));
15813 ++ }
15814 ++ {
15815 ++ u64 xi = b[2];
15816 ++ tmp[2] = ((u128)(xi) * (s));
15817 ++ }
15818 ++ {
15819 ++ u64 xi = b[3];
15820 ++ tmp[3] = ((u128)(xi) * (s));
15821 ++ }
15822 ++ {
15823 ++ u64 xi = b[4];
15824 ++ tmp[4] = ((u128)(xi) * (s));
15825 ++ }
15826 ++ fproduct_carry_wide_(tmp);
15827 ++ b4 = tmp[4];
15828 ++ b0 = tmp[0];
15829 ++ b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU))));
15830 ++ b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51))))))));
15831 ++ tmp[4] = b4_;
15832 ++ tmp[0] = b0_;
15833 ++ fproduct_copy_from_wide_(output, tmp);
15834 ++}
15835 ++
15836 ++static __always_inline void fmul(u64 *output, u64 *a, u64 *b)
15837 ++{
15838 ++ fmul_fmul(output, a, b);
15839 ++}
15840 ++
15841 ++static __always_inline void crecip(u64 *output, u64 *input)
15842 ++{
15843 ++ crecip_crecip(output, input);
15844 ++}
15845 ++
15846 ++static __always_inline void point_swap_conditional_step(u64 *a, u64 *b,
15847 ++ u64 swap1, u32 ctr)
15848 ++{
15849 ++ u32 i = ctr - 1;
15850 ++ u64 ai = a[i];
15851 ++ u64 bi = b[i];
15852 ++ u64 x = swap1 & (ai ^ bi);
15853 ++ u64 ai1 = ai ^ x;
15854 ++ u64 bi1 = bi ^ x;
15855 ++ a[i] = ai1;
15856 ++ b[i] = bi1;
15857 ++}
15858 ++
15859 ++static __always_inline void point_swap_conditional5(u64 *a, u64 *b, u64 swap1)
15860 ++{
15861 ++ point_swap_conditional_step(a, b, swap1, 5);
15862 ++ point_swap_conditional_step(a, b, swap1, 4);
15863 ++ point_swap_conditional_step(a, b, swap1, 3);
15864 ++ point_swap_conditional_step(a, b, swap1, 2);
15865 ++ point_swap_conditional_step(a, b, swap1, 1);
15866 ++}
15867 ++
15868 ++static __always_inline void point_swap_conditional(u64 *a, u64 *b, u64 iswap)
15869 ++{
15870 ++ u64 swap1 = 0 - iswap;
15871 ++ point_swap_conditional5(a, b, swap1);
15872 ++ point_swap_conditional5(a + 5, b + 5, swap1);
15873 ++}
15874 ++
15875 ++static __always_inline void point_copy(u64 *output, u64 *input)
15876 ++{
15877 ++ memcpy(output, input, 5 * sizeof(*input));
15878 ++ memcpy(output + 5, input + 5, 5 * sizeof(*input));
15879 ++}
15880 ++
15881 ++static __always_inline void addanddouble_fmonty(u64 *pp, u64 *ppq, u64 *p,
15882 ++ u64 *pq, u64 *qmqp)
15883 ++{
15884 ++ u64 *qx = qmqp;
15885 ++ u64 *x2 = pp;
15886 ++ u64 *z2 = pp + 5;
15887 ++ u64 *x3 = ppq;
15888 ++ u64 *z3 = ppq + 5;
15889 ++ u64 *x = p;
15890 ++ u64 *z = p + 5;
15891 ++ u64 *xprime = pq;
15892 ++ u64 *zprime = pq + 5;
15893 ++ u64 buf[40] = { 0 };
15894 ++ u64 *origx = buf;
15895 ++ u64 *origxprime0 = buf + 5;
15896 ++ u64 *xxprime0;
15897 ++ u64 *zzprime0;
15898 ++ u64 *origxprime;
15899 ++ xxprime0 = buf + 25;
15900 ++ zzprime0 = buf + 30;
15901 ++ memcpy(origx, x, 5 * sizeof(*x));
15902 ++ fsum(x, z);
15903 ++ fdifference(z, origx);
15904 ++ memcpy(origxprime0, xprime, 5 * sizeof(*xprime));
15905 ++ fsum(xprime, zprime);
15906 ++ fdifference(zprime, origxprime0);
15907 ++ fmul(xxprime0, xprime, z);
15908 ++ fmul(zzprime0, x, zprime);
15909 ++ origxprime = buf + 5;
15910 ++ {
15911 ++ u64 *xx0;
15912 ++ u64 *zz0;
15913 ++ u64 *xxprime;
15914 ++ u64 *zzprime;
15915 ++ u64 *zzzprime;
15916 ++ xx0 = buf + 15;
15917 ++ zz0 = buf + 20;
15918 ++ xxprime = buf + 25;
15919 ++ zzprime = buf + 30;
15920 ++ zzzprime = buf + 35;
15921 ++ memcpy(origxprime, xxprime, 5 * sizeof(*xxprime));
15922 ++ fsum(xxprime, zzprime);
15923 ++ fdifference(zzprime, origxprime);
15924 ++ fsquare_fsquare_times(x3, xxprime, 1);
15925 ++ fsquare_fsquare_times(zzzprime, zzprime, 1);
15926 ++ fmul(z3, zzzprime, qx);
15927 ++ fsquare_fsquare_times(xx0, x, 1);
15928 ++ fsquare_fsquare_times(zz0, z, 1);
15929 ++ {
15930 ++ u64 *zzz;
15931 ++ u64 *xx;
15932 ++ u64 *zz;
15933 ++ u64 scalar;
15934 ++ zzz = buf + 10;
15935 ++ xx = buf + 15;
15936 ++ zz = buf + 20;
15937 ++ fmul(x2, xx, zz);
15938 ++ fdifference(zz, xx);
15939 ++ scalar = 121665;
15940 ++ fscalar(zzz, zz, scalar);
15941 ++ fsum(zzz, xx);
15942 ++ fmul(z2, zzz, zz);
15943 ++ }
15944 ++ }
15945 ++}
15946 ++
15947 ++static __always_inline void
15948 ++ladder_smallloop_cmult_small_loop_step(u64 *nq, u64 *nqpq, u64 *nq2, u64 *nqpq2,
15949 ++ u64 *q, u8 byt)
15950 ++{
15951 ++ u64 bit0 = (u64)(byt >> 7);
15952 ++ u64 bit;
15953 ++ point_swap_conditional(nq, nqpq, bit0);
15954 ++ addanddouble_fmonty(nq2, nqpq2, nq, nqpq, q);
15955 ++ bit = (u64)(byt >> 7);
15956 ++ point_swap_conditional(nq2, nqpq2, bit);
15957 ++}
15958 ++
15959 ++static __always_inline void
15960 ++ladder_smallloop_cmult_small_loop_double_step(u64 *nq, u64 *nqpq, u64 *nq2,
15961 ++ u64 *nqpq2, u64 *q, u8 byt)
15962 ++{
15963 ++ u8 byt1;
15964 ++ ladder_smallloop_cmult_small_loop_step(nq, nqpq, nq2, nqpq2, q, byt);
15965 ++ byt1 = byt << 1;
15966 ++ ladder_smallloop_cmult_small_loop_step(nq2, nqpq2, nq, nqpq, q, byt1);
15967 ++}
15968 ++
15969 ++static __always_inline void
15970 ++ladder_smallloop_cmult_small_loop(u64 *nq, u64 *nqpq, u64 *nq2, u64 *nqpq2,
15971 ++ u64 *q, u8 byt, u32 i)
15972 ++{
15973 ++ while (i--) {
15974 ++ ladder_smallloop_cmult_small_loop_double_step(nq, nqpq, nq2,
15975 ++ nqpq2, q, byt);
15976 ++ byt <<= 2;
15977 ++ }
15978 ++}
15979 ++
15980 ++static __always_inline void ladder_bigloop_cmult_big_loop(u8 *n1, u64 *nq,
15981 ++ u64 *nqpq, u64 *nq2,
15982 ++ u64 *nqpq2, u64 *q,
15983 ++ u32 i)
15984 ++{
15985 ++ while (i--) {
15986 ++ u8 byte = n1[i];
15987 ++ ladder_smallloop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q,
15988 ++ byte, 4);
15989 ++ }
15990 ++}
15991 ++
15992 ++static void ladder_cmult(u64 *result, u8 *n1, u64 *q)
15993 ++{
15994 ++ u64 point_buf[40] = { 0 };
15995 ++ u64 *nq = point_buf;
15996 ++ u64 *nqpq = point_buf + 10;
15997 ++ u64 *nq2 = point_buf + 20;
15998 ++ u64 *nqpq2 = point_buf + 30;
15999 ++ point_copy(nqpq, q);
16000 ++ nq[0] = 1;
16001 ++ ladder_bigloop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, 32);
16002 ++ point_copy(result, nq);
16003 ++}
16004 ++
16005 ++static __always_inline void format_fexpand(u64 *output, const u8 *input)
16006 ++{
16007 ++ const u8 *x00 = input + 6;
16008 ++ const u8 *x01 = input + 12;
16009 ++ const u8 *x02 = input + 19;
16010 ++ const u8 *x0 = input + 24;
16011 ++ u64 i0, i1, i2, i3, i4, output0, output1, output2, output3, output4;
16012 ++ i0 = get_unaligned_le64(input);
16013 ++ i1 = get_unaligned_le64(x00);
16014 ++ i2 = get_unaligned_le64(x01);
16015 ++ i3 = get_unaligned_le64(x02);
16016 ++ i4 = get_unaligned_le64(x0);
16017 ++ output0 = i0 & 0x7ffffffffffffLLU;
16018 ++ output1 = i1 >> 3 & 0x7ffffffffffffLLU;
16019 ++ output2 = i2 >> 6 & 0x7ffffffffffffLLU;
16020 ++ output3 = i3 >> 1 & 0x7ffffffffffffLLU;
16021 ++ output4 = i4 >> 12 & 0x7ffffffffffffLLU;
16022 ++ output[0] = output0;
16023 ++ output[1] = output1;
16024 ++ output[2] = output2;
16025 ++ output[3] = output3;
16026 ++ output[4] = output4;
16027 ++}
16028 ++
16029 ++static __always_inline void format_fcontract_first_carry_pass(u64 *input)
16030 ++{
16031 ++ u64 t0 = input[0];
16032 ++ u64 t1 = input[1];
16033 ++ u64 t2 = input[2];
16034 ++ u64 t3 = input[3];
16035 ++ u64 t4 = input[4];
16036 ++ u64 t1_ = t1 + (t0 >> 51);
16037 ++ u64 t0_ = t0 & 0x7ffffffffffffLLU;
16038 ++ u64 t2_ = t2 + (t1_ >> 51);
16039 ++ u64 t1__ = t1_ & 0x7ffffffffffffLLU;
16040 ++ u64 t3_ = t3 + (t2_ >> 51);
16041 ++ u64 t2__ = t2_ & 0x7ffffffffffffLLU;
16042 ++ u64 t4_ = t4 + (t3_ >> 51);
16043 ++ u64 t3__ = t3_ & 0x7ffffffffffffLLU;
16044 ++ input[0] = t0_;
16045 ++ input[1] = t1__;
16046 ++ input[2] = t2__;
16047 ++ input[3] = t3__;
16048 ++ input[4] = t4_;
16049 ++}
16050 ++
16051 ++static __always_inline void format_fcontract_first_carry_full(u64 *input)
16052 ++{
16053 ++ format_fcontract_first_carry_pass(input);
16054 ++ modulo_carry_top(input);
16055 ++}
16056 ++
16057 ++static __always_inline void format_fcontract_second_carry_pass(u64 *input)
16058 ++{
16059 ++ u64 t0 = input[0];
16060 ++ u64 t1 = input[1];
16061 ++ u64 t2 = input[2];
16062 ++ u64 t3 = input[3];
16063 ++ u64 t4 = input[4];
16064 ++ u64 t1_ = t1 + (t0 >> 51);
16065 ++ u64 t0_ = t0 & 0x7ffffffffffffLLU;
16066 ++ u64 t2_ = t2 + (t1_ >> 51);
16067 ++ u64 t1__ = t1_ & 0x7ffffffffffffLLU;
16068 ++ u64 t3_ = t3 + (t2_ >> 51);
16069 ++ u64 t2__ = t2_ & 0x7ffffffffffffLLU;
16070 ++ u64 t4_ = t4 + (t3_ >> 51);
16071 ++ u64 t3__ = t3_ & 0x7ffffffffffffLLU;
16072 ++ input[0] = t0_;
16073 ++ input[1] = t1__;
16074 ++ input[2] = t2__;
16075 ++ input[3] = t3__;
16076 ++ input[4] = t4_;
16077 ++}
16078 ++
16079 ++static __always_inline void format_fcontract_second_carry_full(u64 *input)
16080 ++{
16081 ++ u64 i0;
16082 ++ u64 i1;
16083 ++ u64 i0_;
16084 ++ u64 i1_;
16085 ++ format_fcontract_second_carry_pass(input);
16086 ++ modulo_carry_top(input);
16087 ++ i0 = input[0];
16088 ++ i1 = input[1];
16089 ++ i0_ = i0 & 0x7ffffffffffffLLU;
16090 ++ i1_ = i1 + (i0 >> 51);
16091 ++ input[0] = i0_;
16092 ++ input[1] = i1_;
16093 ++}
16094 ++
16095 ++static __always_inline void format_fcontract_trim(u64 *input)
16096 ++{
16097 ++ u64 a0 = input[0];
16098 ++ u64 a1 = input[1];
16099 ++ u64 a2 = input[2];
16100 ++ u64 a3 = input[3];
16101 ++ u64 a4 = input[4];
16102 ++ u64 mask0 = u64_gte_mask(a0, 0x7ffffffffffedLLU);
16103 ++ u64 mask1 = u64_eq_mask(a1, 0x7ffffffffffffLLU);
16104 ++ u64 mask2 = u64_eq_mask(a2, 0x7ffffffffffffLLU);
16105 ++ u64 mask3 = u64_eq_mask(a3, 0x7ffffffffffffLLU);
16106 ++ u64 mask4 = u64_eq_mask(a4, 0x7ffffffffffffLLU);
16107 ++ u64 mask = (((mask0 & mask1) & mask2) & mask3) & mask4;
16108 ++ u64 a0_ = a0 - (0x7ffffffffffedLLU & mask);
16109 ++ u64 a1_ = a1 - (0x7ffffffffffffLLU & mask);
16110 ++ u64 a2_ = a2 - (0x7ffffffffffffLLU & mask);
16111 ++ u64 a3_ = a3 - (0x7ffffffffffffLLU & mask);
16112 ++ u64 a4_ = a4 - (0x7ffffffffffffLLU & mask);
16113 ++ input[0] = a0_;
16114 ++ input[1] = a1_;
16115 ++ input[2] = a2_;
16116 ++ input[3] = a3_;
16117 ++ input[4] = a4_;
16118 ++}
16119 ++
16120 ++static __always_inline void format_fcontract_store(u8 *output, u64 *input)
16121 ++{
16122 ++ u64 t0 = input[0];
16123 ++ u64 t1 = input[1];
16124 ++ u64 t2 = input[2];
16125 ++ u64 t3 = input[3];
16126 ++ u64 t4 = input[4];
16127 ++ u64 o0 = t1 << 51 | t0;
16128 ++ u64 o1 = t2 << 38 | t1 >> 13;
16129 ++ u64 o2 = t3 << 25 | t2 >> 26;
16130 ++ u64 o3 = t4 << 12 | t3 >> 39;
16131 ++ u8 *b0 = output;
16132 ++ u8 *b1 = output + 8;
16133 ++ u8 *b2 = output + 16;
16134 ++ u8 *b3 = output + 24;
16135 ++ put_unaligned_le64(o0, b0);
16136 ++ put_unaligned_le64(o1, b1);
16137 ++ put_unaligned_le64(o2, b2);
16138 ++ put_unaligned_le64(o3, b3);
16139 ++}
16140 ++
16141 ++static __always_inline void format_fcontract(u8 *output, u64 *input)
16142 ++{
16143 ++ format_fcontract_first_carry_full(input);
16144 ++ format_fcontract_second_carry_full(input);
16145 ++ format_fcontract_trim(input);
16146 ++ format_fcontract_store(output, input);
16147 ++}
16148 ++
16149 ++static __always_inline void format_scalar_of_point(u8 *scalar, u64 *point)
16150 ++{
16151 ++ u64 *x = point;
16152 ++ u64 *z = point + 5;
16153 ++ u64 buf[10] __aligned(32) = { 0 };
16154 ++ u64 *zmone = buf;
16155 ++ u64 *sc = buf + 5;
16156 ++ crecip(zmone, z);
16157 ++ fmul(sc, x, zmone);
16158 ++ format_fcontract(scalar, sc);
16159 ++}
16160 ++
16161 ++void curve25519_generic(u8 mypublic[CURVE25519_KEY_SIZE],
16162 ++ const u8 secret[CURVE25519_KEY_SIZE],
16163 ++ const u8 basepoint[CURVE25519_KEY_SIZE])
16164 ++{
16165 ++ u64 buf0[10] __aligned(32) = { 0 };
16166 ++ u64 *x0 = buf0;
16167 ++ u64 *z = buf0 + 5;
16168 ++ u64 *q;
16169 ++ format_fexpand(x0, basepoint);
16170 ++ z[0] = 1;
16171 ++ q = buf0;
16172 ++ {
16173 ++ u8 e[32] __aligned(32) = { 0 };
16174 ++ u8 *scalar;
16175 ++ memcpy(e, secret, 32);
16176 ++ curve25519_clamp_secret(e);
16177 ++ scalar = e;
16178 ++ {
16179 ++ u64 buf[15] = { 0 };
16180 ++ u64 *nq = buf;
16181 ++ u64 *x = nq;
16182 ++ x[0] = 1;
16183 ++ ladder_cmult(nq, scalar, q);
16184 ++ format_scalar_of_point(mypublic, nq);
16185 ++ memzero_explicit(buf, sizeof(buf));
16186 ++ }
16187 ++ memzero_explicit(e, sizeof(e));
16188 ++ }
16189 ++ memzero_explicit(buf0, sizeof(buf0));
16190 ++}
16191 +diff --git a/lib/crypto/curve25519.c b/lib/crypto/curve25519.c
16192 +new file mode 100644
16193 +index 000000000000..0106bebe6900
16194 +--- /dev/null
16195 ++++ b/lib/crypto/curve25519.c
16196 +@@ -0,0 +1,25 @@
16197 ++// SPDX-License-Identifier: GPL-2.0 OR MIT
16198 ++/*
16199 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
16200 ++ *
16201 ++ * This is an implementation of the Curve25519 ECDH algorithm, using either
16202 ++ * a 32-bit implementation or a 64-bit implementation with 128-bit integers,
16203 ++ * depending on what is supported by the target compiler.
16204 ++ *
16205 ++ * Information: https://cr.yp.to/ecdh.html
16206 ++ */
16207 ++
16208 ++#include <crypto/curve25519.h>
16209 ++#include <linux/module.h>
16210 ++#include <linux/init.h>
16211 ++
16212 ++const u8 curve25519_null_point[CURVE25519_KEY_SIZE] __aligned(32) = { 0 };
16213 ++const u8 curve25519_base_point[CURVE25519_KEY_SIZE] __aligned(32) = { 9 };
16214 ++
16215 ++EXPORT_SYMBOL(curve25519_null_point);
16216 ++EXPORT_SYMBOL(curve25519_base_point);
16217 ++EXPORT_SYMBOL(curve25519_generic);
16218 ++
16219 ++MODULE_LICENSE("GPL v2");
16220 ++MODULE_DESCRIPTION("Curve25519 scalar multiplication");
16221 ++MODULE_AUTHOR("Jason A. Donenfeld <Jason@×××××.com>");
16222 +--
16223 +cgit v1.2.3-4-ga26e
16224 +
16225 +
16226 +From 150e58c0bd71ae69fcb9d2dfe50d2211083d9b9e Mon Sep 17 00:00:00 2001
16227 +From: Ard Biesheuvel <ardb@××××××.org>
16228 +Date: Fri, 8 Nov 2019 13:22:33 +0100
16229 +Subject: crypto: curve25519 - add kpp selftest
16230 +
16231 +commit f613457a7af085728297bef71233c37faf3c01b1 upstream.
16232 +
16233 +In preparation of introducing KPP implementations of Curve25519, import
16234 +the set of test cases proposed by the Zinc patch set, but converted to
16235 +the KPP format.
16236 +
16237 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
16238 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
16239 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
16240 +---
16241 + crypto/testmgr.c | 6 +
16242 + crypto/testmgr.h | 1225 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
16243 + 2 files changed, 1231 insertions(+)
16244 +
16245 +diff --git a/crypto/testmgr.c b/crypto/testmgr.c
16246 +index 711390861f71..57ab993b9ad2 100644
16247 +--- a/crypto/testmgr.c
16248 ++++ b/crypto/testmgr.c
16249 +@@ -4295,6 +4295,12 @@ static const struct alg_test_desc alg_test_descs[] = {
16250 + .alg = "cts(cbc(paes))",
16251 + .test = alg_test_null,
16252 + .fips_allowed = 1,
16253 ++ }, {
16254 ++ .alg = "curve25519",
16255 ++ .test = alg_test_kpp,
16256 ++ .suite = {
16257 ++ .kpp = __VECS(curve25519_tv_template)
16258 ++ }
16259 + }, {
16260 + .alg = "deflate",
16261 + .test = alg_test_comp,
16262 +diff --git a/crypto/testmgr.h b/crypto/testmgr.h
16263 +index 102fcad54966..5d132ae996b4 100644
16264 +--- a/crypto/testmgr.h
16265 ++++ b/crypto/testmgr.h
16266 +@@ -1030,6 +1030,1231 @@ static const struct kpp_testvec dh_tv_template[] = {
16267 + }
16268 + };
16269 +
16270 ++static const struct kpp_testvec curve25519_tv_template[] = {
16271 ++{
16272 ++ .secret = (u8[32]){ 0x77, 0x07, 0x6d, 0x0a, 0x73, 0x18, 0xa5, 0x7d,
16273 ++ 0x3c, 0x16, 0xc1, 0x72, 0x51, 0xb2, 0x66, 0x45,
16274 ++ 0xdf, 0x4c, 0x2f, 0x87, 0xeb, 0xc0, 0x99, 0x2a,
16275 ++ 0xb1, 0x77, 0xfb, 0xa5, 0x1d, 0xb9, 0x2c, 0x2a },
16276 ++ .b_public = (u8[32]){ 0xde, 0x9e, 0xdb, 0x7d, 0x7b, 0x7d, 0xc1, 0xb4,
16277 ++ 0xd3, 0x5b, 0x61, 0xc2, 0xec, 0xe4, 0x35, 0x37,
16278 ++ 0x3f, 0x83, 0x43, 0xc8, 0x5b, 0x78, 0x67, 0x4d,
16279 ++ 0xad, 0xfc, 0x7e, 0x14, 0x6f, 0x88, 0x2b, 0x4f },
16280 ++ .expected_ss = (u8[32]){ 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1,
16281 ++ 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25,
16282 ++ 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33,
16283 ++ 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 },
16284 ++ .secret_size = 32,
16285 ++ .b_public_size = 32,
16286 ++ .expected_ss_size = 32,
16287 ++
16288 ++},
16289 ++{
16290 ++ .secret = (u8[32]){ 0x5d, 0xab, 0x08, 0x7e, 0x62, 0x4a, 0x8a, 0x4b,
16291 ++ 0x79, 0xe1, 0x7f, 0x8b, 0x83, 0x80, 0x0e, 0xe6,
16292 ++ 0x6f, 0x3b, 0xb1, 0x29, 0x26, 0x18, 0xb6, 0xfd,
16293 ++ 0x1c, 0x2f, 0x8b, 0x27, 0xff, 0x88, 0xe0, 0xeb },
16294 ++ .b_public = (u8[32]){ 0x85, 0x20, 0xf0, 0x09, 0x89, 0x30, 0xa7, 0x54,
16295 ++ 0x74, 0x8b, 0x7d, 0xdc, 0xb4, 0x3e, 0xf7, 0x5a,
16296 ++ 0x0d, 0xbf, 0x3a, 0x0d, 0x26, 0x38, 0x1a, 0xf4,
16297 ++ 0xeb, 0xa4, 0xa9, 0x8e, 0xaa, 0x9b, 0x4e, 0x6a },
16298 ++ .expected_ss = (u8[32]){ 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1,
16299 ++ 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25,
16300 ++ 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33,
16301 ++ 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 },
16302 ++ .secret_size = 32,
16303 ++ .b_public_size = 32,
16304 ++ .expected_ss_size = 32,
16305 ++
16306 ++},
16307 ++{
16308 ++ .secret = (u8[32]){ 1 },
16309 ++ .b_public = (u8[32]){ 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
16310 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
16311 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
16312 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
16313 ++ .expected_ss = (u8[32]){ 0x3c, 0x77, 0x77, 0xca, 0xf9, 0x97, 0xb2, 0x64,
16314 ++ 0x41, 0x60, 0x77, 0x66, 0x5b, 0x4e, 0x22, 0x9d,
16315 ++ 0x0b, 0x95, 0x48, 0xdc, 0x0c, 0xd8, 0x19, 0x98,
16316 ++ 0xdd, 0xcd, 0xc5, 0xc8, 0x53, 0x3c, 0x79, 0x7f },
16317 ++ .secret_size = 32,
16318 ++ .b_public_size = 32,
16319 ++ .expected_ss_size = 32,
16320 ++
16321 ++},
16322 ++{
16323 ++ .secret = (u8[32]){ 1 },
16324 ++ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16325 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16326 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16327 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
16328 ++ .expected_ss = (u8[32]){ 0xb3, 0x2d, 0x13, 0x62, 0xc2, 0x48, 0xd6, 0x2f,
16329 ++ 0xe6, 0x26, 0x19, 0xcf, 0xf0, 0x4d, 0xd4, 0x3d,
16330 ++ 0xb7, 0x3f, 0xfc, 0x1b, 0x63, 0x08, 0xed, 0xe3,
16331 ++ 0x0b, 0x78, 0xd8, 0x73, 0x80, 0xf1, 0xe8, 0x34 },
16332 ++ .secret_size = 32,
16333 ++ .b_public_size = 32,
16334 ++ .expected_ss_size = 32,
16335 ++
16336 ++},
16337 ++{
16338 ++ .secret = (u8[32]){ 0xa5, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d,
16339 ++ 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd,
16340 ++ 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18,
16341 ++ 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0xc4 },
16342 ++ .b_public = (u8[32]){ 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb,
16343 ++ 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c,
16344 ++ 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b,
16345 ++ 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c },
16346 ++ .expected_ss = (u8[32]){ 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90,
16347 ++ 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f,
16348 ++ 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7,
16349 ++ 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 },
16350 ++ .secret_size = 32,
16351 ++ .b_public_size = 32,
16352 ++ .expected_ss_size = 32,
16353 ++
16354 ++},
16355 ++{
16356 ++ .secret = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0x0a, 0xff, 0xff, 0xff,
16357 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16358 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16359 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
16360 ++ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16361 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16362 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16363 ++ 0xff, 0xff, 0xff, 0xff, 0x0a, 0x00, 0xfb, 0x9f },
16364 ++ .expected_ss = (u8[32]){ 0x77, 0x52, 0xb6, 0x18, 0xc1, 0x2d, 0x48, 0xd2,
16365 ++ 0xc6, 0x93, 0x46, 0x83, 0x81, 0x7c, 0xc6, 0x57,
16366 ++ 0xf3, 0x31, 0x03, 0x19, 0x49, 0x48, 0x20, 0x05,
16367 ++ 0x42, 0x2b, 0x4e, 0xae, 0x8d, 0x1d, 0x43, 0x23 },
16368 ++ .secret_size = 32,
16369 ++ .b_public_size = 32,
16370 ++ .expected_ss_size = 32,
16371 ++
16372 ++},
16373 ++{
16374 ++ .secret = (u8[32]){ 0x8e, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
16375 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
16376 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
16377 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
16378 ++ .b_public = (u8[32]){ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
16379 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
16380 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
16381 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8e, 0x06 },
16382 ++ .expected_ss = (u8[32]){ 0x5a, 0xdf, 0xaa, 0x25, 0x86, 0x8e, 0x32, 0x3d,
16383 ++ 0xae, 0x49, 0x62, 0xc1, 0x01, 0x5c, 0xb3, 0x12,
16384 ++ 0xe1, 0xc5, 0xc7, 0x9e, 0x95, 0x3f, 0x03, 0x99,
16385 ++ 0xb0, 0xba, 0x16, 0x22, 0xf3, 0xb6, 0xf7, 0x0c },
16386 ++ .secret_size = 32,
16387 ++ .b_public_size = 32,
16388 ++ .expected_ss_size = 32,
16389 ++
16390 ++},
16391 ++/* wycheproof - normal case */
16392 ++{
16393 ++ .secret = (u8[32]){ 0x48, 0x52, 0x83, 0x4d, 0x9d, 0x6b, 0x77, 0xda,
16394 ++ 0xde, 0xab, 0xaa, 0xf2, 0xe1, 0x1d, 0xca, 0x66,
16395 ++ 0xd1, 0x9f, 0xe7, 0x49, 0x93, 0xa7, 0xbe, 0xc3,
16396 ++ 0x6c, 0x6e, 0x16, 0xa0, 0x98, 0x3f, 0xea, 0xba },
16397 ++ .b_public = (u8[32]){ 0x9c, 0x64, 0x7d, 0x9a, 0xe5, 0x89, 0xb9, 0xf5,
16398 ++ 0x8f, 0xdc, 0x3c, 0xa4, 0x94, 0x7e, 0xfb, 0xc9,
16399 ++ 0x15, 0xc4, 0xb2, 0xe0, 0x8e, 0x74, 0x4a, 0x0e,
16400 ++ 0xdf, 0x46, 0x9d, 0xac, 0x59, 0xc8, 0xf8, 0x5a },
16401 ++ .expected_ss = (u8[32]){ 0x87, 0xb7, 0xf2, 0x12, 0xb6, 0x27, 0xf7, 0xa5,
16402 ++ 0x4c, 0xa5, 0xe0, 0xbc, 0xda, 0xdd, 0xd5, 0x38,
16403 ++ 0x9d, 0x9d, 0xe6, 0x15, 0x6c, 0xdb, 0xcf, 0x8e,
16404 ++ 0xbe, 0x14, 0xff, 0xbc, 0xfb, 0x43, 0x65, 0x51 },
16405 ++ .secret_size = 32,
16406 ++ .b_public_size = 32,
16407 ++ .expected_ss_size = 32,
16408 ++
16409 ++},
16410 ++/* wycheproof - public key on twist */
16411 ++{
16412 ++ .secret = (u8[32]){ 0x58, 0x8c, 0x06, 0x1a, 0x50, 0x80, 0x4a, 0xc4,
16413 ++ 0x88, 0xad, 0x77, 0x4a, 0xc7, 0x16, 0xc3, 0xf5,
16414 ++ 0xba, 0x71, 0x4b, 0x27, 0x12, 0xe0, 0x48, 0x49,
16415 ++ 0x13, 0x79, 0xa5, 0x00, 0x21, 0x19, 0x98, 0xa8 },
16416 ++ .b_public = (u8[32]){ 0x63, 0xaa, 0x40, 0xc6, 0xe3, 0x83, 0x46, 0xc5,
16417 ++ 0xca, 0xf2, 0x3a, 0x6d, 0xf0, 0xa5, 0xe6, 0xc8,
16418 ++ 0x08, 0x89, 0xa0, 0x86, 0x47, 0xe5, 0x51, 0xb3,
16419 ++ 0x56, 0x34, 0x49, 0xbe, 0xfc, 0xfc, 0x97, 0x33 },
16420 ++ .expected_ss = (u8[32]){ 0xb1, 0xa7, 0x07, 0x51, 0x94, 0x95, 0xff, 0xff,
16421 ++ 0xb2, 0x98, 0xff, 0x94, 0x17, 0x16, 0xb0, 0x6d,
16422 ++ 0xfa, 0xb8, 0x7c, 0xf8, 0xd9, 0x11, 0x23, 0xfe,
16423 ++ 0x2b, 0xe9, 0xa2, 0x33, 0xdd, 0xa2, 0x22, 0x12 },
16424 ++ .secret_size = 32,
16425 ++ .b_public_size = 32,
16426 ++ .expected_ss_size = 32,
16427 ++
16428 ++},
16429 ++/* wycheproof - public key on twist */
16430 ++{
16431 ++ .secret = (u8[32]){ 0xb0, 0x5b, 0xfd, 0x32, 0xe5, 0x53, 0x25, 0xd9,
16432 ++ 0xfd, 0x64, 0x8c, 0xb3, 0x02, 0x84, 0x80, 0x39,
16433 ++ 0x00, 0x0b, 0x39, 0x0e, 0x44, 0xd5, 0x21, 0xe5,
16434 ++ 0x8a, 0xab, 0x3b, 0x29, 0xa6, 0x96, 0x0b, 0xa8 },
16435 ++ .b_public = (u8[32]){ 0x0f, 0x83, 0xc3, 0x6f, 0xde, 0xd9, 0xd3, 0x2f,
16436 ++ 0xad, 0xf4, 0xef, 0xa3, 0xae, 0x93, 0xa9, 0x0b,
16437 ++ 0xb5, 0xcf, 0xa6, 0x68, 0x93, 0xbc, 0x41, 0x2c,
16438 ++ 0x43, 0xfa, 0x72, 0x87, 0xdb, 0xb9, 0x97, 0x79 },
16439 ++ .expected_ss = (u8[32]){ 0x67, 0xdd, 0x4a, 0x6e, 0x16, 0x55, 0x33, 0x53,
16440 ++ 0x4c, 0x0e, 0x3f, 0x17, 0x2e, 0x4a, 0xb8, 0x57,
16441 ++ 0x6b, 0xca, 0x92, 0x3a, 0x5f, 0x07, 0xb2, 0xc0,
16442 ++ 0x69, 0xb4, 0xc3, 0x10, 0xff, 0x2e, 0x93, 0x5b },
16443 ++ .secret_size = 32,
16444 ++ .b_public_size = 32,
16445 ++ .expected_ss_size = 32,
16446 ++
16447 ++},
16448 ++/* wycheproof - public key on twist */
16449 ++{
16450 ++ .secret = (u8[32]){ 0x70, 0xe3, 0x4b, 0xcb, 0xe1, 0xf4, 0x7f, 0xbc,
16451 ++ 0x0f, 0xdd, 0xfd, 0x7c, 0x1e, 0x1a, 0xa5, 0x3d,
16452 ++ 0x57, 0xbf, 0xe0, 0xf6, 0x6d, 0x24, 0x30, 0x67,
16453 ++ 0xb4, 0x24, 0xbb, 0x62, 0x10, 0xbe, 0xd1, 0x9c },
16454 ++ .b_public = (u8[32]){ 0x0b, 0x82, 0x11, 0xa2, 0xb6, 0x04, 0x90, 0x97,
16455 ++ 0xf6, 0x87, 0x1c, 0x6c, 0x05, 0x2d, 0x3c, 0x5f,
16456 ++ 0xc1, 0xba, 0x17, 0xda, 0x9e, 0x32, 0xae, 0x45,
16457 ++ 0x84, 0x03, 0xb0, 0x5b, 0xb2, 0x83, 0x09, 0x2a },
16458 ++ .expected_ss = (u8[32]){ 0x4a, 0x06, 0x38, 0xcf, 0xaa, 0x9e, 0xf1, 0x93,
16459 ++ 0x3b, 0x47, 0xf8, 0x93, 0x92, 0x96, 0xa6, 0xb2,
16460 ++ 0x5b, 0xe5, 0x41, 0xef, 0x7f, 0x70, 0xe8, 0x44,
16461 ++ 0xc0, 0xbc, 0xc0, 0x0b, 0x13, 0x4d, 0xe6, 0x4a },
16462 ++ .secret_size = 32,
16463 ++ .b_public_size = 32,
16464 ++ .expected_ss_size = 32,
16465 ++
16466 ++},
16467 ++/* wycheproof - public key on twist */
16468 ++{
16469 ++ .secret = (u8[32]){ 0x68, 0xc1, 0xf3, 0xa6, 0x53, 0xa4, 0xcd, 0xb1,
16470 ++ 0xd3, 0x7b, 0xba, 0x94, 0x73, 0x8f, 0x8b, 0x95,
16471 ++ 0x7a, 0x57, 0xbe, 0xb2, 0x4d, 0x64, 0x6e, 0x99,
16472 ++ 0x4d, 0xc2, 0x9a, 0x27, 0x6a, 0xad, 0x45, 0x8d },
16473 ++ .b_public = (u8[32]){ 0x34, 0x3a, 0xc2, 0x0a, 0x3b, 0x9c, 0x6a, 0x27,
16474 ++ 0xb1, 0x00, 0x81, 0x76, 0x50, 0x9a, 0xd3, 0x07,
16475 ++ 0x35, 0x85, 0x6e, 0xc1, 0xc8, 0xd8, 0xfc, 0xae,
16476 ++ 0x13, 0x91, 0x2d, 0x08, 0xd1, 0x52, 0xf4, 0x6c },
16477 ++ .expected_ss = (u8[32]){ 0x39, 0x94, 0x91, 0xfc, 0xe8, 0xdf, 0xab, 0x73,
16478 ++ 0xb4, 0xf9, 0xf6, 0x11, 0xde, 0x8e, 0xa0, 0xb2,
16479 ++ 0x7b, 0x28, 0xf8, 0x59, 0x94, 0x25, 0x0b, 0x0f,
16480 ++ 0x47, 0x5d, 0x58, 0x5d, 0x04, 0x2a, 0xc2, 0x07 },
16481 ++ .secret_size = 32,
16482 ++ .b_public_size = 32,
16483 ++ .expected_ss_size = 32,
16484 ++
16485 ++},
16486 ++/* wycheproof - public key on twist */
16487 ++{
16488 ++ .secret = (u8[32]){ 0xd8, 0x77, 0xb2, 0x6d, 0x06, 0xdf, 0xf9, 0xd9,
16489 ++ 0xf7, 0xfd, 0x4c, 0x5b, 0x37, 0x69, 0xf8, 0xcd,
16490 ++ 0xd5, 0xb3, 0x05, 0x16, 0xa5, 0xab, 0x80, 0x6b,
16491 ++ 0xe3, 0x24, 0xff, 0x3e, 0xb6, 0x9e, 0xa0, 0xb2 },
16492 ++ .b_public = (u8[32]){ 0xfa, 0x69, 0x5f, 0xc7, 0xbe, 0x8d, 0x1b, 0xe5,
16493 ++ 0xbf, 0x70, 0x48, 0x98, 0xf3, 0x88, 0xc4, 0x52,
16494 ++ 0xba, 0xfd, 0xd3, 0xb8, 0xea, 0xe8, 0x05, 0xf8,
16495 ++ 0x68, 0x1a, 0x8d, 0x15, 0xc2, 0xd4, 0xe1, 0x42 },
16496 ++ .expected_ss = (u8[32]){ 0x2c, 0x4f, 0xe1, 0x1d, 0x49, 0x0a, 0x53, 0x86,
16497 ++ 0x17, 0x76, 0xb1, 0x3b, 0x43, 0x54, 0xab, 0xd4,
16498 ++ 0xcf, 0x5a, 0x97, 0x69, 0x9d, 0xb6, 0xe6, 0xc6,
16499 ++ 0x8c, 0x16, 0x26, 0xd0, 0x76, 0x62, 0xf7, 0x58 },
16500 ++ .secret_size = 32,
16501 ++ .b_public_size = 32,
16502 ++ .expected_ss_size = 32,
16503 ++
16504 ++},
16505 ++/* wycheproof - edge case on twist */
16506 ++{
16507 ++ .secret = (u8[32]){ 0x38, 0xdd, 0xe9, 0xf3, 0xe7, 0xb7, 0x99, 0x04,
16508 ++ 0x5f, 0x9a, 0xc3, 0x79, 0x3d, 0x4a, 0x92, 0x77,
16509 ++ 0xda, 0xde, 0xad, 0xc4, 0x1b, 0xec, 0x02, 0x90,
16510 ++ 0xf8, 0x1f, 0x74, 0x4f, 0x73, 0x77, 0x5f, 0x84 },
16511 ++ .b_public = (u8[32]){ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
16512 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
16513 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
16514 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
16515 ++ .expected_ss = (u8[32]){ 0x9a, 0x2c, 0xfe, 0x84, 0xff, 0x9c, 0x4a, 0x97,
16516 ++ 0x39, 0x62, 0x5c, 0xae, 0x4a, 0x3b, 0x82, 0xa9,
16517 ++ 0x06, 0x87, 0x7a, 0x44, 0x19, 0x46, 0xf8, 0xd7,
16518 ++ 0xb3, 0xd7, 0x95, 0xfe, 0x8f, 0x5d, 0x16, 0x39 },
16519 ++ .secret_size = 32,
16520 ++ .b_public_size = 32,
16521 ++ .expected_ss_size = 32,
16522 ++
16523 ++},
16524 ++/* wycheproof - edge case on twist */
16525 ++{
16526 ++ .secret = (u8[32]){ 0x98, 0x57, 0xa9, 0x14, 0xe3, 0xc2, 0x90, 0x36,
16527 ++ 0xfd, 0x9a, 0x44, 0x2b, 0xa5, 0x26, 0xb5, 0xcd,
16528 ++ 0xcd, 0xf2, 0x82, 0x16, 0x15, 0x3e, 0x63, 0x6c,
16529 ++ 0x10, 0x67, 0x7a, 0xca, 0xb6, 0xbd, 0x6a, 0xa5 },
16530 ++ .b_public = (u8[32]){ 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
16531 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
16532 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
16533 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
16534 ++ .expected_ss = (u8[32]){ 0x4d, 0xa4, 0xe0, 0xaa, 0x07, 0x2c, 0x23, 0x2e,
16535 ++ 0xe2, 0xf0, 0xfa, 0x4e, 0x51, 0x9a, 0xe5, 0x0b,
16536 ++ 0x52, 0xc1, 0xed, 0xd0, 0x8a, 0x53, 0x4d, 0x4e,
16537 ++ 0xf3, 0x46, 0xc2, 0xe1, 0x06, 0xd2, 0x1d, 0x60 },
16538 ++ .secret_size = 32,
16539 ++ .b_public_size = 32,
16540 ++ .expected_ss_size = 32,
16541 ++
16542 ++},
16543 ++/* wycheproof - edge case on twist */
16544 ++{
16545 ++ .secret = (u8[32]){ 0x48, 0xe2, 0x13, 0x0d, 0x72, 0x33, 0x05, 0xed,
16546 ++ 0x05, 0xe6, 0xe5, 0x89, 0x4d, 0x39, 0x8a, 0x5e,
16547 ++ 0x33, 0x36, 0x7a, 0x8c, 0x6a, 0xac, 0x8f, 0xcd,
16548 ++ 0xf0, 0xa8, 0x8e, 0x4b, 0x42, 0x82, 0x0d, 0xb7 },
16549 ++ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0x03, 0x00, 0x00, 0xf8, 0xff,
16550 ++ 0xff, 0x1f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff,
16551 ++ 0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, 0x07, 0x00,
16552 ++ 0x00, 0xf0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00 },
16553 ++ .expected_ss = (u8[32]){ 0x9e, 0xd1, 0x0c, 0x53, 0x74, 0x7f, 0x64, 0x7f,
16554 ++ 0x82, 0xf4, 0x51, 0x25, 0xd3, 0xde, 0x15, 0xa1,
16555 ++ 0xe6, 0xb8, 0x24, 0x49, 0x6a, 0xb4, 0x04, 0x10,
16556 ++ 0xff, 0xcc, 0x3c, 0xfe, 0x95, 0x76, 0x0f, 0x3b },
16557 ++ .secret_size = 32,
16558 ++ .b_public_size = 32,
16559 ++ .expected_ss_size = 32,
16560 ++
16561 ++},
16562 ++/* wycheproof - edge case on twist */
16563 ++{
16564 ++ .secret = (u8[32]){ 0x28, 0xf4, 0x10, 0x11, 0x69, 0x18, 0x51, 0xb3,
16565 ++ 0xa6, 0x2b, 0x64, 0x15, 0x53, 0xb3, 0x0d, 0x0d,
16566 ++ 0xfd, 0xdc, 0xb8, 0xff, 0xfc, 0xf5, 0x37, 0x00,
16567 ++ 0xa7, 0xbe, 0x2f, 0x6a, 0x87, 0x2e, 0x9f, 0xb0 },
16568 ++ .b_public = (u8[32]){ 0x00, 0x00, 0x00, 0xfc, 0xff, 0xff, 0x07, 0x00,
16569 ++ 0x00, 0xe0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00,
16570 ++ 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0xf8, 0xff,
16571 ++ 0xff, 0x0f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0x7f },
16572 ++ .expected_ss = (u8[32]){ 0xcf, 0x72, 0xb4, 0xaa, 0x6a, 0xa1, 0xc9, 0xf8,
16573 ++ 0x94, 0xf4, 0x16, 0x5b, 0x86, 0x10, 0x9a, 0xa4,
16574 ++ 0x68, 0x51, 0x76, 0x48, 0xe1, 0xf0, 0xcc, 0x70,
16575 ++ 0xe1, 0xab, 0x08, 0x46, 0x01, 0x76, 0x50, 0x6b },
16576 ++ .secret_size = 32,
16577 ++ .b_public_size = 32,
16578 ++ .expected_ss_size = 32,
16579 ++
16580 ++},
16581 ++/* wycheproof - edge case on twist */
16582 ++{
16583 ++ .secret = (u8[32]){ 0x18, 0xa9, 0x3b, 0x64, 0x99, 0xb9, 0xf6, 0xb3,
16584 ++ 0x22, 0x5c, 0xa0, 0x2f, 0xef, 0x41, 0x0e, 0x0a,
16585 ++ 0xde, 0xc2, 0x35, 0x32, 0x32, 0x1d, 0x2d, 0x8e,
16586 ++ 0xf1, 0xa6, 0xd6, 0x02, 0xa8, 0xc6, 0x5b, 0x83 },
16587 ++ .b_public = (u8[32]){ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
16588 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
16589 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
16590 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x7f },
16591 ++ .expected_ss = (u8[32]){ 0x5d, 0x50, 0xb6, 0x28, 0x36, 0xbb, 0x69, 0x57,
16592 ++ 0x94, 0x10, 0x38, 0x6c, 0xf7, 0xbb, 0x81, 0x1c,
16593 ++ 0x14, 0xbf, 0x85, 0xb1, 0xc7, 0xb1, 0x7e, 0x59,
16594 ++ 0x24, 0xc7, 0xff, 0xea, 0x91, 0xef, 0x9e, 0x12 },
16595 ++ .secret_size = 32,
16596 ++ .b_public_size = 32,
16597 ++ .expected_ss_size = 32,
16598 ++
16599 ++},
16600 ++/* wycheproof - edge case on twist */
16601 ++{
16602 ++ .secret = (u8[32]){ 0xc0, 0x1d, 0x13, 0x05, 0xa1, 0x33, 0x8a, 0x1f,
16603 ++ 0xca, 0xc2, 0xba, 0x7e, 0x2e, 0x03, 0x2b, 0x42,
16604 ++ 0x7e, 0x0b, 0x04, 0x90, 0x31, 0x65, 0xac, 0xa9,
16605 ++ 0x57, 0xd8, 0xd0, 0x55, 0x3d, 0x87, 0x17, 0xb0 },
16606 ++ .b_public = (u8[32]){ 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16607 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16608 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16609 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
16610 ++ .expected_ss = (u8[32]){ 0x19, 0x23, 0x0e, 0xb1, 0x48, 0xd5, 0xd6, 0x7c,
16611 ++ 0x3c, 0x22, 0xab, 0x1d, 0xae, 0xff, 0x80, 0xa5,
16612 ++ 0x7e, 0xae, 0x42, 0x65, 0xce, 0x28, 0x72, 0x65,
16613 ++ 0x7b, 0x2c, 0x80, 0x99, 0xfc, 0x69, 0x8e, 0x50 },
16614 ++ .secret_size = 32,
16615 ++ .b_public_size = 32,
16616 ++ .expected_ss_size = 32,
16617 ++
16618 ++},
16619 ++/* wycheproof - edge case for public key */
16620 ++{
16621 ++ .secret = (u8[32]){ 0x38, 0x6f, 0x7f, 0x16, 0xc5, 0x07, 0x31, 0xd6,
16622 ++ 0x4f, 0x82, 0xe6, 0xa1, 0x70, 0xb1, 0x42, 0xa4,
16623 ++ 0xe3, 0x4f, 0x31, 0xfd, 0x77, 0x68, 0xfc, 0xb8,
16624 ++ 0x90, 0x29, 0x25, 0xe7, 0xd1, 0xe2, 0x1a, 0xbe },
16625 ++ .b_public = (u8[32]){ 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
16626 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
16627 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
16628 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
16629 ++ .expected_ss = (u8[32]){ 0x0f, 0xca, 0xb5, 0xd8, 0x42, 0xa0, 0x78, 0xd7,
16630 ++ 0xa7, 0x1f, 0xc5, 0x9b, 0x57, 0xbf, 0xb4, 0xca,
16631 ++ 0x0b, 0xe6, 0x87, 0x3b, 0x49, 0xdc, 0xdb, 0x9f,
16632 ++ 0x44, 0xe1, 0x4a, 0xe8, 0xfb, 0xdf, 0xa5, 0x42 },
16633 ++ .secret_size = 32,
16634 ++ .b_public_size = 32,
16635 ++ .expected_ss_size = 32,
16636 ++
16637 ++},
16638 ++/* wycheproof - edge case for public key */
16639 ++{
16640 ++ .secret = (u8[32]){ 0xe0, 0x23, 0xa2, 0x89, 0xbd, 0x5e, 0x90, 0xfa,
16641 ++ 0x28, 0x04, 0xdd, 0xc0, 0x19, 0xa0, 0x5e, 0xf3,
16642 ++ 0xe7, 0x9d, 0x43, 0x4b, 0xb6, 0xea, 0x2f, 0x52,
16643 ++ 0x2e, 0xcb, 0x64, 0x3a, 0x75, 0x29, 0x6e, 0x95 },
16644 ++ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
16645 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
16646 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
16647 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 },
16648 ++ .expected_ss = (u8[32]){ 0x54, 0xce, 0x8f, 0x22, 0x75, 0xc0, 0x77, 0xe3,
16649 ++ 0xb1, 0x30, 0x6a, 0x39, 0x39, 0xc5, 0xe0, 0x3e,
16650 ++ 0xef, 0x6b, 0xbb, 0x88, 0x06, 0x05, 0x44, 0x75,
16651 ++ 0x8d, 0x9f, 0xef, 0x59, 0xb0, 0xbc, 0x3e, 0x4f },
16652 ++ .secret_size = 32,
16653 ++ .b_public_size = 32,
16654 ++ .expected_ss_size = 32,
16655 ++
16656 ++},
16657 ++/* wycheproof - edge case for public key */
16658 ++{
16659 ++ .secret = (u8[32]){ 0x68, 0xf0, 0x10, 0xd6, 0x2e, 0xe8, 0xd9, 0x26,
16660 ++ 0x05, 0x3a, 0x36, 0x1c, 0x3a, 0x75, 0xc6, 0xea,
16661 ++ 0x4e, 0xbd, 0xc8, 0x60, 0x6a, 0xb2, 0x85, 0x00,
16662 ++ 0x3a, 0x6f, 0x8f, 0x40, 0x76, 0xb0, 0x1e, 0x83 },
16663 ++ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16664 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16665 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16666 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 },
16667 ++ .expected_ss = (u8[32]){ 0xf1, 0x36, 0x77, 0x5c, 0x5b, 0xeb, 0x0a, 0xf8,
16668 ++ 0x11, 0x0a, 0xf1, 0x0b, 0x20, 0x37, 0x23, 0x32,
16669 ++ 0x04, 0x3c, 0xab, 0x75, 0x24, 0x19, 0x67, 0x87,
16670 ++ 0x75, 0xa2, 0x23, 0xdf, 0x57, 0xc9, 0xd3, 0x0d },
16671 ++ .secret_size = 32,
16672 ++ .b_public_size = 32,
16673 ++ .expected_ss_size = 32,
16674 ++
16675 ++},
16676 ++/* wycheproof - edge case for public key */
16677 ++{
16678 ++ .secret = (u8[32]){ 0x58, 0xeb, 0xcb, 0x35, 0xb0, 0xf8, 0x84, 0x5c,
16679 ++ 0xaf, 0x1e, 0xc6, 0x30, 0xf9, 0x65, 0x76, 0xb6,
16680 ++ 0x2c, 0x4b, 0x7b, 0x6c, 0x36, 0xb2, 0x9d, 0xeb,
16681 ++ 0x2c, 0xb0, 0x08, 0x46, 0x51, 0x75, 0x5c, 0x96 },
16682 ++ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xfb, 0xff,
16683 ++ 0xff, 0xdf, 0xff, 0xff, 0xdf, 0xff, 0xff, 0xff,
16684 ++ 0xfe, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xf7, 0xff,
16685 ++ 0xff, 0xf7, 0xff, 0xff, 0xbf, 0xff, 0xff, 0x3f },
16686 ++ .expected_ss = (u8[32]){ 0xbf, 0x9a, 0xff, 0xd0, 0x6b, 0x84, 0x40, 0x85,
16687 ++ 0x58, 0x64, 0x60, 0x96, 0x2e, 0xf2, 0x14, 0x6f,
16688 ++ 0xf3, 0xd4, 0x53, 0x3d, 0x94, 0x44, 0xaa, 0xb0,
16689 ++ 0x06, 0xeb, 0x88, 0xcc, 0x30, 0x54, 0x40, 0x7d },
16690 ++ .secret_size = 32,
16691 ++ .b_public_size = 32,
16692 ++ .expected_ss_size = 32,
16693 ++
16694 ++},
16695 ++/* wycheproof - edge case for public key */
16696 ++{
16697 ++ .secret = (u8[32]){ 0x18, 0x8c, 0x4b, 0xc5, 0xb9, 0xc4, 0x4b, 0x38,
16698 ++ 0xbb, 0x65, 0x8b, 0x9b, 0x2a, 0xe8, 0x2d, 0x5b,
16699 ++ 0x01, 0x01, 0x5e, 0x09, 0x31, 0x84, 0xb1, 0x7c,
16700 ++ 0xb7, 0x86, 0x35, 0x03, 0xa7, 0x83, 0xe1, 0xbb },
16701 ++ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16702 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16703 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16704 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
16705 ++ .expected_ss = (u8[32]){ 0xd4, 0x80, 0xde, 0x04, 0xf6, 0x99, 0xcb, 0x3b,
16706 ++ 0xe0, 0x68, 0x4a, 0x9c, 0xc2, 0xe3, 0x12, 0x81,
16707 ++ 0xea, 0x0b, 0xc5, 0xa9, 0xdc, 0xc1, 0x57, 0xd3,
16708 ++ 0xd2, 0x01, 0x58, 0xd4, 0x6c, 0xa5, 0x24, 0x6d },
16709 ++ .secret_size = 32,
16710 ++ .b_public_size = 32,
16711 ++ .expected_ss_size = 32,
16712 ++
16713 ++},
16714 ++/* wycheproof - edge case for public key */
16715 ++{
16716 ++ .secret = (u8[32]){ 0xe0, 0x6c, 0x11, 0xbb, 0x2e, 0x13, 0xce, 0x3d,
16717 ++ 0xc7, 0x67, 0x3f, 0x67, 0xf5, 0x48, 0x22, 0x42,
16718 ++ 0x90, 0x94, 0x23, 0xa9, 0xae, 0x95, 0xee, 0x98,
16719 ++ 0x6a, 0x98, 0x8d, 0x98, 0xfa, 0xee, 0x23, 0xa2 },
16720 ++ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f,
16721 ++ 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f,
16722 ++ 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f,
16723 ++ 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f },
16724 ++ .expected_ss = (u8[32]){ 0x4c, 0x44, 0x01, 0xcc, 0xe6, 0xb5, 0x1e, 0x4c,
16725 ++ 0xb1, 0x8f, 0x27, 0x90, 0x24, 0x6c, 0x9b, 0xf9,
16726 ++ 0x14, 0xdb, 0x66, 0x77, 0x50, 0xa1, 0xcb, 0x89,
16727 ++ 0x06, 0x90, 0x92, 0xaf, 0x07, 0x29, 0x22, 0x76 },
16728 ++ .secret_size = 32,
16729 ++ .b_public_size = 32,
16730 ++ .expected_ss_size = 32,
16731 ++
16732 ++},
16733 ++/* wycheproof - edge case for public key */
16734 ++{
16735 ++ .secret = (u8[32]){ 0xc0, 0x65, 0x8c, 0x46, 0xdd, 0xe1, 0x81, 0x29,
16736 ++ 0x29, 0x38, 0x77, 0x53, 0x5b, 0x11, 0x62, 0xb6,
16737 ++ 0xf9, 0xf5, 0x41, 0x4a, 0x23, 0xcf, 0x4d, 0x2c,
16738 ++ 0xbc, 0x14, 0x0a, 0x4d, 0x99, 0xda, 0x2b, 0x8f },
16739 ++ .b_public = (u8[32]){ 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16740 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16741 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16742 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
16743 ++ .expected_ss = (u8[32]){ 0x57, 0x8b, 0xa8, 0xcc, 0x2d, 0xbd, 0xc5, 0x75,
16744 ++ 0xaf, 0xcf, 0x9d, 0xf2, 0xb3, 0xee, 0x61, 0x89,
16745 ++ 0xf5, 0x33, 0x7d, 0x68, 0x54, 0xc7, 0x9b, 0x4c,
16746 ++ 0xe1, 0x65, 0xea, 0x12, 0x29, 0x3b, 0x3a, 0x0f },
16747 ++ .secret_size = 32,
16748 ++ .b_public_size = 32,
16749 ++ .expected_ss_size = 32,
16750 ++
16751 ++},
16752 ++/* wycheproof - public key >= p */
16753 ++{
16754 ++ .secret = (u8[32]){ 0xf0, 0x1e, 0x48, 0xda, 0xfa, 0xc9, 0xd7, 0xbc,
16755 ++ 0xf5, 0x89, 0xcb, 0xc3, 0x82, 0xc8, 0x78, 0xd1,
16756 ++ 0x8b, 0xda, 0x35, 0x50, 0x58, 0x9f, 0xfb, 0x5d,
16757 ++ 0x50, 0xb5, 0x23, 0xbe, 0xbe, 0x32, 0x9d, 0xae },
16758 ++ .b_public = (u8[32]){ 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16759 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16760 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16761 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
16762 ++ .expected_ss = (u8[32]){ 0xbd, 0x36, 0xa0, 0x79, 0x0e, 0xb8, 0x83, 0x09,
16763 ++ 0x8c, 0x98, 0x8b, 0x21, 0x78, 0x67, 0x73, 0xde,
16764 ++ 0x0b, 0x3a, 0x4d, 0xf1, 0x62, 0x28, 0x2c, 0xf1,
16765 ++ 0x10, 0xde, 0x18, 0xdd, 0x48, 0x4c, 0xe7, 0x4b },
16766 ++ .secret_size = 32,
16767 ++ .b_public_size = 32,
16768 ++ .expected_ss_size = 32,
16769 ++
16770 ++},
16771 ++/* wycheproof - public key >= p */
16772 ++{
16773 ++ .secret = (u8[32]){ 0x28, 0x87, 0x96, 0xbc, 0x5a, 0xff, 0x4b, 0x81,
16774 ++ 0xa3, 0x75, 0x01, 0x75, 0x7b, 0xc0, 0x75, 0x3a,
16775 ++ 0x3c, 0x21, 0x96, 0x47, 0x90, 0xd3, 0x86, 0x99,
16776 ++ 0x30, 0x8d, 0xeb, 0xc1, 0x7a, 0x6e, 0xaf, 0x8d },
16777 ++ .b_public = (u8[32]){ 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16778 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16779 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16780 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
16781 ++ .expected_ss = (u8[32]){ 0xb4, 0xe0, 0xdd, 0x76, 0xda, 0x7b, 0x07, 0x17,
16782 ++ 0x28, 0xb6, 0x1f, 0x85, 0x67, 0x71, 0xaa, 0x35,
16783 ++ 0x6e, 0x57, 0xed, 0xa7, 0x8a, 0x5b, 0x16, 0x55,
16784 ++ 0xcc, 0x38, 0x20, 0xfb, 0x5f, 0x85, 0x4c, 0x5c },
16785 ++ .secret_size = 32,
16786 ++ .b_public_size = 32,
16787 ++ .expected_ss_size = 32,
16788 ++
16789 ++},
16790 ++/* wycheproof - public key >= p */
16791 ++{
16792 ++ .secret = (u8[32]){ 0x98, 0xdf, 0x84, 0x5f, 0x66, 0x51, 0xbf, 0x11,
16793 ++ 0x38, 0x22, 0x1f, 0x11, 0x90, 0x41, 0xf7, 0x2b,
16794 ++ 0x6d, 0xbc, 0x3c, 0x4a, 0xce, 0x71, 0x43, 0xd9,
16795 ++ 0x9f, 0xd5, 0x5a, 0xd8, 0x67, 0x48, 0x0d, 0xa8 },
16796 ++ .b_public = (u8[32]){ 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16797 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16798 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16799 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
16800 ++ .expected_ss = (u8[32]){ 0x6f, 0xdf, 0x6c, 0x37, 0x61, 0x1d, 0xbd, 0x53,
16801 ++ 0x04, 0xdc, 0x0f, 0x2e, 0xb7, 0xc9, 0x51, 0x7e,
16802 ++ 0xb3, 0xc5, 0x0e, 0x12, 0xfd, 0x05, 0x0a, 0xc6,
16803 ++ 0xde, 0xc2, 0x70, 0x71, 0xd4, 0xbf, 0xc0, 0x34 },
16804 ++ .secret_size = 32,
16805 ++ .b_public_size = 32,
16806 ++ .expected_ss_size = 32,
16807 ++
16808 ++},
16809 ++/* wycheproof - public key >= p */
16810 ++{
16811 ++ .secret = (u8[32]){ 0xf0, 0x94, 0x98, 0xe4, 0x6f, 0x02, 0xf8, 0x78,
16812 ++ 0x82, 0x9e, 0x78, 0xb8, 0x03, 0xd3, 0x16, 0xa2,
16813 ++ 0xed, 0x69, 0x5d, 0x04, 0x98, 0xa0, 0x8a, 0xbd,
16814 ++ 0xf8, 0x27, 0x69, 0x30, 0xe2, 0x4e, 0xdc, 0xb0 },
16815 ++ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16816 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16817 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16818 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
16819 ++ .expected_ss = (u8[32]){ 0x4c, 0x8f, 0xc4, 0xb1, 0xc6, 0xab, 0x88, 0xfb,
16820 ++ 0x21, 0xf1, 0x8f, 0x6d, 0x4c, 0x81, 0x02, 0x40,
16821 ++ 0xd4, 0xe9, 0x46, 0x51, 0xba, 0x44, 0xf7, 0xa2,
16822 ++ 0xc8, 0x63, 0xce, 0xc7, 0xdc, 0x56, 0x60, 0x2d },
16823 ++ .secret_size = 32,
16824 ++ .b_public_size = 32,
16825 ++ .expected_ss_size = 32,
16826 ++
16827 ++},
16828 ++/* wycheproof - public key >= p */
16829 ++{
16830 ++ .secret = (u8[32]){ 0x18, 0x13, 0xc1, 0x0a, 0x5c, 0x7f, 0x21, 0xf9,
16831 ++ 0x6e, 0x17, 0xf2, 0x88, 0xc0, 0xcc, 0x37, 0x60,
16832 ++ 0x7c, 0x04, 0xc5, 0xf5, 0xae, 0xa2, 0xdb, 0x13,
16833 ++ 0x4f, 0x9e, 0x2f, 0xfc, 0x66, 0xbd, 0x9d, 0xb8 },
16834 ++ .b_public = (u8[32]){ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
16835 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
16836 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
16837 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 },
16838 ++ .expected_ss = (u8[32]){ 0x1c, 0xd0, 0xb2, 0x82, 0x67, 0xdc, 0x54, 0x1c,
16839 ++ 0x64, 0x2d, 0x6d, 0x7d, 0xca, 0x44, 0xa8, 0xb3,
16840 ++ 0x8a, 0x63, 0x73, 0x6e, 0xef, 0x5c, 0x4e, 0x65,
16841 ++ 0x01, 0xff, 0xbb, 0xb1, 0x78, 0x0c, 0x03, 0x3c },
16842 ++ .secret_size = 32,
16843 ++ .b_public_size = 32,
16844 ++ .expected_ss_size = 32,
16845 ++
16846 ++},
16847 ++/* wycheproof - public key >= p */
16848 ++{
16849 ++ .secret = (u8[32]){ 0x78, 0x57, 0xfb, 0x80, 0x86, 0x53, 0x64, 0x5a,
16850 ++ 0x0b, 0xeb, 0x13, 0x8a, 0x64, 0xf5, 0xf4, 0xd7,
16851 ++ 0x33, 0xa4, 0x5e, 0xa8, 0x4c, 0x3c, 0xda, 0x11,
16852 ++ 0xa9, 0xc0, 0x6f, 0x7e, 0x71, 0x39, 0x14, 0x9e },
16853 ++ .b_public = (u8[32]){ 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
16854 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
16855 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
16856 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 },
16857 ++ .expected_ss = (u8[32]){ 0x87, 0x55, 0xbe, 0x01, 0xc6, 0x0a, 0x7e, 0x82,
16858 ++ 0x5c, 0xff, 0x3e, 0x0e, 0x78, 0xcb, 0x3a, 0xa4,
16859 ++ 0x33, 0x38, 0x61, 0x51, 0x6a, 0xa5, 0x9b, 0x1c,
16860 ++ 0x51, 0xa8, 0xb2, 0xa5, 0x43, 0xdf, 0xa8, 0x22 },
16861 ++ .secret_size = 32,
16862 ++ .b_public_size = 32,
16863 ++ .expected_ss_size = 32,
16864 ++
16865 ++},
16866 ++/* wycheproof - public key >= p */
16867 ++{
16868 ++ .secret = (u8[32]){ 0xe0, 0x3a, 0xa8, 0x42, 0xe2, 0xab, 0xc5, 0x6e,
16869 ++ 0x81, 0xe8, 0x7b, 0x8b, 0x9f, 0x41, 0x7b, 0x2a,
16870 ++ 0x1e, 0x59, 0x13, 0xc7, 0x23, 0xee, 0xd2, 0x8d,
16871 ++ 0x75, 0x2f, 0x8d, 0x47, 0xa5, 0x9f, 0x49, 0x8f },
16872 ++ .b_public = (u8[32]){ 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
16873 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
16874 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
16875 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 },
16876 ++ .expected_ss = (u8[32]){ 0x54, 0xc9, 0xa1, 0xed, 0x95, 0xe5, 0x46, 0xd2,
16877 ++ 0x78, 0x22, 0xa3, 0x60, 0x93, 0x1d, 0xda, 0x60,
16878 ++ 0xa1, 0xdf, 0x04, 0x9d, 0xa6, 0xf9, 0x04, 0x25,
16879 ++ 0x3c, 0x06, 0x12, 0xbb, 0xdc, 0x08, 0x74, 0x76 },
16880 ++ .secret_size = 32,
16881 ++ .b_public_size = 32,
16882 ++ .expected_ss_size = 32,
16883 ++
16884 ++},
16885 ++/* wycheproof - public key >= p */
16886 ++{
16887 ++ .secret = (u8[32]){ 0xf8, 0xf7, 0x07, 0xb7, 0x99, 0x9b, 0x18, 0xcb,
16888 ++ 0x0d, 0x6b, 0x96, 0x12, 0x4f, 0x20, 0x45, 0x97,
16889 ++ 0x2c, 0xa2, 0x74, 0xbf, 0xc1, 0x54, 0xad, 0x0c,
16890 ++ 0x87, 0x03, 0x8c, 0x24, 0xc6, 0xd0, 0xd4, 0xb2 },
16891 ++ .b_public = (u8[32]){ 0xda, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16892 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16893 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16894 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
16895 ++ .expected_ss = (u8[32]){ 0xcc, 0x1f, 0x40, 0xd7, 0x43, 0xcd, 0xc2, 0x23,
16896 ++ 0x0e, 0x10, 0x43, 0xda, 0xba, 0x8b, 0x75, 0xe8,
16897 ++ 0x10, 0xf1, 0xfb, 0xab, 0x7f, 0x25, 0x52, 0x69,
16898 ++ 0xbd, 0x9e, 0xbb, 0x29, 0xe6, 0xbf, 0x49, 0x4f },
16899 ++ .secret_size = 32,
16900 ++ .b_public_size = 32,
16901 ++ .expected_ss_size = 32,
16902 ++
16903 ++},
16904 ++/* wycheproof - public key >= p */
16905 ++{
16906 ++ .secret = (u8[32]){ 0xa0, 0x34, 0xf6, 0x84, 0xfa, 0x63, 0x1e, 0x1a,
16907 ++ 0x34, 0x81, 0x18, 0xc1, 0xce, 0x4c, 0x98, 0x23,
16908 ++ 0x1f, 0x2d, 0x9e, 0xec, 0x9b, 0xa5, 0x36, 0x5b,
16909 ++ 0x4a, 0x05, 0xd6, 0x9a, 0x78, 0x5b, 0x07, 0x96 },
16910 ++ .b_public = (u8[32]){ 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16911 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16912 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16913 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
16914 ++ .expected_ss = (u8[32]){ 0x54, 0x99, 0x8e, 0xe4, 0x3a, 0x5b, 0x00, 0x7b,
16915 ++ 0xf4, 0x99, 0xf0, 0x78, 0xe7, 0x36, 0x52, 0x44,
16916 ++ 0x00, 0xa8, 0xb5, 0xc7, 0xe9, 0xb9, 0xb4, 0x37,
16917 ++ 0x71, 0x74, 0x8c, 0x7c, 0xdf, 0x88, 0x04, 0x12 },
16918 ++ .secret_size = 32,
16919 ++ .b_public_size = 32,
16920 ++ .expected_ss_size = 32,
16921 ++
16922 ++},
16923 ++/* wycheproof - public key >= p */
16924 ++{
16925 ++ .secret = (u8[32]){ 0x30, 0xb6, 0xc6, 0xa0, 0xf2, 0xff, 0xa6, 0x80,
16926 ++ 0x76, 0x8f, 0x99, 0x2b, 0xa8, 0x9e, 0x15, 0x2d,
16927 ++ 0x5b, 0xc9, 0x89, 0x3d, 0x38, 0xc9, 0x11, 0x9b,
16928 ++ 0xe4, 0xf7, 0x67, 0xbf, 0xab, 0x6e, 0x0c, 0xa5 },
16929 ++ .b_public = (u8[32]){ 0xdc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16930 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16931 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16932 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
16933 ++ .expected_ss = (u8[32]){ 0xea, 0xd9, 0xb3, 0x8e, 0xfd, 0xd7, 0x23, 0x63,
16934 ++ 0x79, 0x34, 0xe5, 0x5a, 0xb7, 0x17, 0xa7, 0xae,
16935 ++ 0x09, 0xeb, 0x86, 0xa2, 0x1d, 0xc3, 0x6a, 0x3f,
16936 ++ 0xee, 0xb8, 0x8b, 0x75, 0x9e, 0x39, 0x1e, 0x09 },
16937 ++ .secret_size = 32,
16938 ++ .b_public_size = 32,
16939 ++ .expected_ss_size = 32,
16940 ++
16941 ++},
16942 ++/* wycheproof - public key >= p */
16943 ++{
16944 ++ .secret = (u8[32]){ 0x90, 0x1b, 0x9d, 0xcf, 0x88, 0x1e, 0x01, 0xe0,
16945 ++ 0x27, 0x57, 0x50, 0x35, 0xd4, 0x0b, 0x43, 0xbd,
16946 ++ 0xc1, 0xc5, 0x24, 0x2e, 0x03, 0x08, 0x47, 0x49,
16947 ++ 0x5b, 0x0c, 0x72, 0x86, 0x46, 0x9b, 0x65, 0x91 },
16948 ++ .b_public = (u8[32]){ 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16949 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16950 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16951 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
16952 ++ .expected_ss = (u8[32]){ 0x60, 0x2f, 0xf4, 0x07, 0x89, 0xb5, 0x4b, 0x41,
16953 ++ 0x80, 0x59, 0x15, 0xfe, 0x2a, 0x62, 0x21, 0xf0,
16954 ++ 0x7a, 0x50, 0xff, 0xc2, 0xc3, 0xfc, 0x94, 0xcf,
16955 ++ 0x61, 0xf1, 0x3d, 0x79, 0x04, 0xe8, 0x8e, 0x0e },
16956 ++ .secret_size = 32,
16957 ++ .b_public_size = 32,
16958 ++ .expected_ss_size = 32,
16959 ++
16960 ++},
16961 ++/* wycheproof - public key >= p */
16962 ++{
16963 ++ .secret = (u8[32]){ 0x80, 0x46, 0x67, 0x7c, 0x28, 0xfd, 0x82, 0xc9,
16964 ++ 0xa1, 0xbd, 0xb7, 0x1a, 0x1a, 0x1a, 0x34, 0xfa,
16965 ++ 0xba, 0x12, 0x25, 0xe2, 0x50, 0x7f, 0xe3, 0xf5,
16966 ++ 0x4d, 0x10, 0xbd, 0x5b, 0x0d, 0x86, 0x5f, 0x8e },
16967 ++ .b_public = (u8[32]){ 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16968 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16969 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16970 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
16971 ++ .expected_ss = (u8[32]){ 0xe0, 0x0a, 0xe8, 0xb1, 0x43, 0x47, 0x12, 0x47,
16972 ++ 0xba, 0x24, 0xf1, 0x2c, 0x88, 0x55, 0x36, 0xc3,
16973 ++ 0xcb, 0x98, 0x1b, 0x58, 0xe1, 0xe5, 0x6b, 0x2b,
16974 ++ 0xaf, 0x35, 0xc1, 0x2a, 0xe1, 0xf7, 0x9c, 0x26 },
16975 ++ .secret_size = 32,
16976 ++ .b_public_size = 32,
16977 ++ .expected_ss_size = 32,
16978 ++
16979 ++},
16980 ++/* wycheproof - public key >= p */
16981 ++{
16982 ++ .secret = (u8[32]){ 0x60, 0x2f, 0x7e, 0x2f, 0x68, 0xa8, 0x46, 0xb8,
16983 ++ 0x2c, 0xc2, 0x69, 0xb1, 0xd4, 0x8e, 0x93, 0x98,
16984 ++ 0x86, 0xae, 0x54, 0xfd, 0x63, 0x6c, 0x1f, 0xe0,
16985 ++ 0x74, 0xd7, 0x10, 0x12, 0x7d, 0x47, 0x24, 0x91 },
16986 ++ .b_public = (u8[32]){ 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16987 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16988 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
16989 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
16990 ++ .expected_ss = (u8[32]){ 0x98, 0xcb, 0x9b, 0x50, 0xdd, 0x3f, 0xc2, 0xb0,
16991 ++ 0xd4, 0xf2, 0xd2, 0xbf, 0x7c, 0x5c, 0xfd, 0xd1,
16992 ++ 0x0c, 0x8f, 0xcd, 0x31, 0xfc, 0x40, 0xaf, 0x1a,
16993 ++ 0xd4, 0x4f, 0x47, 0xc1, 0x31, 0x37, 0x63, 0x62 },
16994 ++ .secret_size = 32,
16995 ++ .b_public_size = 32,
16996 ++ .expected_ss_size = 32,
16997 ++
16998 ++},
16999 ++/* wycheproof - public key >= p */
17000 ++{
17001 ++ .secret = (u8[32]){ 0x60, 0x88, 0x7b, 0x3d, 0xc7, 0x24, 0x43, 0x02,
17002 ++ 0x6e, 0xbe, 0xdb, 0xbb, 0xb7, 0x06, 0x65, 0xf4,
17003 ++ 0x2b, 0x87, 0xad, 0xd1, 0x44, 0x0e, 0x77, 0x68,
17004 ++ 0xfb, 0xd7, 0xe8, 0xe2, 0xce, 0x5f, 0x63, 0x9d },
17005 ++ .b_public = (u8[32]){ 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17006 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17007 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17008 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
17009 ++ .expected_ss = (u8[32]){ 0x38, 0xd6, 0x30, 0x4c, 0x4a, 0x7e, 0x6d, 0x9f,
17010 ++ 0x79, 0x59, 0x33, 0x4f, 0xb5, 0x24, 0x5b, 0xd2,
17011 ++ 0xc7, 0x54, 0x52, 0x5d, 0x4c, 0x91, 0xdb, 0x95,
17012 ++ 0x02, 0x06, 0x92, 0x62, 0x34, 0xc1, 0xf6, 0x33 },
17013 ++ .secret_size = 32,
17014 ++ .b_public_size = 32,
17015 ++ .expected_ss_size = 32,
17016 ++
17017 ++},
17018 ++/* wycheproof - public key >= p */
17019 ++{
17020 ++ .secret = (u8[32]){ 0x78, 0xd3, 0x1d, 0xfa, 0x85, 0x44, 0x97, 0xd7,
17021 ++ 0x2d, 0x8d, 0xef, 0x8a, 0x1b, 0x7f, 0xb0, 0x06,
17022 ++ 0xce, 0xc2, 0xd8, 0xc4, 0x92, 0x46, 0x47, 0xc9,
17023 ++ 0x38, 0x14, 0xae, 0x56, 0xfa, 0xed, 0xa4, 0x95 },
17024 ++ .b_public = (u8[32]){ 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17025 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17026 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17027 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
17028 ++ .expected_ss = (u8[32]){ 0x78, 0x6c, 0xd5, 0x49, 0x96, 0xf0, 0x14, 0xa5,
17029 ++ 0xa0, 0x31, 0xec, 0x14, 0xdb, 0x81, 0x2e, 0xd0,
17030 ++ 0x83, 0x55, 0x06, 0x1f, 0xdb, 0x5d, 0xe6, 0x80,
17031 ++ 0xa8, 0x00, 0xac, 0x52, 0x1f, 0x31, 0x8e, 0x23 },
17032 ++ .secret_size = 32,
17033 ++ .b_public_size = 32,
17034 ++ .expected_ss_size = 32,
17035 ++
17036 ++},
17037 ++/* wycheproof - public key >= p */
17038 ++{
17039 ++ .secret = (u8[32]){ 0xc0, 0x4c, 0x5b, 0xae, 0xfa, 0x83, 0x02, 0xdd,
17040 ++ 0xde, 0xd6, 0xa4, 0xbb, 0x95, 0x77, 0x61, 0xb4,
17041 ++ 0xeb, 0x97, 0xae, 0xfa, 0x4f, 0xc3, 0xb8, 0x04,
17042 ++ 0x30, 0x85, 0xf9, 0x6a, 0x56, 0x59, 0xb3, 0xa5 },
17043 ++ .b_public = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17044 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17045 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17046 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
17047 ++ .expected_ss = (u8[32]){ 0x29, 0xae, 0x8b, 0xc7, 0x3e, 0x9b, 0x10, 0xa0,
17048 ++ 0x8b, 0x4f, 0x68, 0x1c, 0x43, 0xc3, 0xe0, 0xac,
17049 ++ 0x1a, 0x17, 0x1d, 0x31, 0xb3, 0x8f, 0x1a, 0x48,
17050 ++ 0xef, 0xba, 0x29, 0xae, 0x63, 0x9e, 0xa1, 0x34 },
17051 ++ .secret_size = 32,
17052 ++ .b_public_size = 32,
17053 ++ .expected_ss_size = 32,
17054 ++
17055 ++},
17056 ++/* wycheproof - RFC 7748 */
17057 ++{
17058 ++ .secret = (u8[32]){ 0xa0, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d,
17059 ++ 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd,
17060 ++ 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18,
17061 ++ 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0x44 },
17062 ++ .b_public = (u8[32]){ 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb,
17063 ++ 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c,
17064 ++ 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b,
17065 ++ 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c },
17066 ++ .expected_ss = (u8[32]){ 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90,
17067 ++ 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f,
17068 ++ 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7,
17069 ++ 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 },
17070 ++ .secret_size = 32,
17071 ++ .b_public_size = 32,
17072 ++ .expected_ss_size = 32,
17073 ++
17074 ++},
17075 ++/* wycheproof - RFC 7748 */
17076 ++{
17077 ++ .secret = (u8[32]){ 0x48, 0x66, 0xe9, 0xd4, 0xd1, 0xb4, 0x67, 0x3c,
17078 ++ 0x5a, 0xd2, 0x26, 0x91, 0x95, 0x7d, 0x6a, 0xf5,
17079 ++ 0xc1, 0x1b, 0x64, 0x21, 0xe0, 0xea, 0x01, 0xd4,
17080 ++ 0x2c, 0xa4, 0x16, 0x9e, 0x79, 0x18, 0xba, 0x4d },
17081 ++ .b_public = (u8[32]){ 0xe5, 0x21, 0x0f, 0x12, 0x78, 0x68, 0x11, 0xd3,
17082 ++ 0xf4, 0xb7, 0x95, 0x9d, 0x05, 0x38, 0xae, 0x2c,
17083 ++ 0x31, 0xdb, 0xe7, 0x10, 0x6f, 0xc0, 0x3c, 0x3e,
17084 ++ 0xfc, 0x4c, 0xd5, 0x49, 0xc7, 0x15, 0xa4, 0x13 },
17085 ++ .expected_ss = (u8[32]){ 0x95, 0xcb, 0xde, 0x94, 0x76, 0xe8, 0x90, 0x7d,
17086 ++ 0x7a, 0xad, 0xe4, 0x5c, 0xb4, 0xb8, 0x73, 0xf8,
17087 ++ 0x8b, 0x59, 0x5a, 0x68, 0x79, 0x9f, 0xa1, 0x52,
17088 ++ 0xe6, 0xf8, 0xf7, 0x64, 0x7a, 0xac, 0x79, 0x57 },
17089 ++ .secret_size = 32,
17090 ++ .b_public_size = 32,
17091 ++ .expected_ss_size = 32,
17092 ++
17093 ++},
17094 ++/* wycheproof - edge case for shared secret */
17095 ++{
17096 ++ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
17097 ++ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
17098 ++ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
17099 ++ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
17100 ++ .b_public = (u8[32]){ 0x0a, 0xb4, 0xe7, 0x63, 0x80, 0xd8, 0x4d, 0xde,
17101 ++ 0x4f, 0x68, 0x33, 0xc5, 0x8f, 0x2a, 0x9f, 0xb8,
17102 ++ 0xf8, 0x3b, 0xb0, 0x16, 0x9b, 0x17, 0x2b, 0xe4,
17103 ++ 0xb6, 0xe0, 0x59, 0x28, 0x87, 0x74, 0x1a, 0x36 },
17104 ++ .expected_ss = (u8[32]){ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
17105 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
17106 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
17107 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
17108 ++ .secret_size = 32,
17109 ++ .b_public_size = 32,
17110 ++ .expected_ss_size = 32,
17111 ++
17112 ++},
17113 ++/* wycheproof - edge case for shared secret */
17114 ++{
17115 ++ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
17116 ++ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
17117 ++ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
17118 ++ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
17119 ++ .b_public = (u8[32]){ 0x89, 0xe1, 0x0d, 0x57, 0x01, 0xb4, 0x33, 0x7d,
17120 ++ 0x2d, 0x03, 0x21, 0x81, 0x53, 0x8b, 0x10, 0x64,
17121 ++ 0xbd, 0x40, 0x84, 0x40, 0x1c, 0xec, 0xa1, 0xfd,
17122 ++ 0x12, 0x66, 0x3a, 0x19, 0x59, 0x38, 0x80, 0x00 },
17123 ++ .expected_ss = (u8[32]){ 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
17124 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
17125 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
17126 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
17127 ++ .secret_size = 32,
17128 ++ .b_public_size = 32,
17129 ++ .expected_ss_size = 32,
17130 ++
17131 ++},
17132 ++/* wycheproof - edge case for shared secret */
17133 ++{
17134 ++ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
17135 ++ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
17136 ++ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
17137 ++ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
17138 ++ .b_public = (u8[32]){ 0x2b, 0x55, 0xd3, 0xaa, 0x4a, 0x8f, 0x80, 0xc8,
17139 ++ 0xc0, 0xb2, 0xae, 0x5f, 0x93, 0x3e, 0x85, 0xaf,
17140 ++ 0x49, 0xbe, 0xac, 0x36, 0xc2, 0xfa, 0x73, 0x94,
17141 ++ 0xba, 0xb7, 0x6c, 0x89, 0x33, 0xf8, 0xf8, 0x1d },
17142 ++ .expected_ss = (u8[32]){ 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
17143 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
17144 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
17145 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
17146 ++ .secret_size = 32,
17147 ++ .b_public_size = 32,
17148 ++ .expected_ss_size = 32,
17149 ++
17150 ++},
17151 ++/* wycheproof - edge case for shared secret */
17152 ++{
17153 ++ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
17154 ++ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
17155 ++ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
17156 ++ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
17157 ++ .b_public = (u8[32]){ 0x63, 0xe5, 0xb1, 0xfe, 0x96, 0x01, 0xfe, 0x84,
17158 ++ 0x38, 0x5d, 0x88, 0x66, 0xb0, 0x42, 0x12, 0x62,
17159 ++ 0xf7, 0x8f, 0xbf, 0xa5, 0xaf, 0xf9, 0x58, 0x5e,
17160 ++ 0x62, 0x66, 0x79, 0xb1, 0x85, 0x47, 0xd9, 0x59 },
17161 ++ .expected_ss = (u8[32]){ 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17162 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17163 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17164 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
17165 ++ .secret_size = 32,
17166 ++ .b_public_size = 32,
17167 ++ .expected_ss_size = 32,
17168 ++
17169 ++},
17170 ++/* wycheproof - edge case for shared secret */
17171 ++{
17172 ++ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
17173 ++ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
17174 ++ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
17175 ++ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
17176 ++ .b_public = (u8[32]){ 0xe4, 0x28, 0xf3, 0xda, 0xc1, 0x78, 0x09, 0xf8,
17177 ++ 0x27, 0xa5, 0x22, 0xce, 0x32, 0x35, 0x50, 0x58,
17178 ++ 0xd0, 0x73, 0x69, 0x36, 0x4a, 0xa7, 0x89, 0x02,
17179 ++ 0xee, 0x10, 0x13, 0x9b, 0x9f, 0x9d, 0xd6, 0x53 },
17180 ++ .expected_ss = (u8[32]){ 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17181 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17182 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17183 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
17184 ++ .secret_size = 32,
17185 ++ .b_public_size = 32,
17186 ++ .expected_ss_size = 32,
17187 ++
17188 ++},
17189 ++/* wycheproof - edge case for shared secret */
17190 ++{
17191 ++ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
17192 ++ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
17193 ++ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
17194 ++ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
17195 ++ .b_public = (u8[32]){ 0xb3, 0xb5, 0x0e, 0x3e, 0xd3, 0xa4, 0x07, 0xb9,
17196 ++ 0x5d, 0xe9, 0x42, 0xef, 0x74, 0x57, 0x5b, 0x5a,
17197 ++ 0xb8, 0xa1, 0x0c, 0x09, 0xee, 0x10, 0x35, 0x44,
17198 ++ 0xd6, 0x0b, 0xdf, 0xed, 0x81, 0x38, 0xab, 0x2b },
17199 ++ .expected_ss = (u8[32]){ 0xf9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17200 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17201 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17202 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
17203 ++ .secret_size = 32,
17204 ++ .b_public_size = 32,
17205 ++ .expected_ss_size = 32,
17206 ++
17207 ++},
17208 ++/* wycheproof - edge case for shared secret */
17209 ++{
17210 ++ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
17211 ++ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
17212 ++ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
17213 ++ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
17214 ++ .b_public = (u8[32]){ 0x21, 0x3f, 0xff, 0xe9, 0x3d, 0x5e, 0xa8, 0xcd,
17215 ++ 0x24, 0x2e, 0x46, 0x28, 0x44, 0x02, 0x99, 0x22,
17216 ++ 0xc4, 0x3c, 0x77, 0xc9, 0xe3, 0xe4, 0x2f, 0x56,
17217 ++ 0x2f, 0x48, 0x5d, 0x24, 0xc5, 0x01, 0xa2, 0x0b },
17218 ++ .expected_ss = (u8[32]){ 0xf3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17219 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17220 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17221 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
17222 ++ .secret_size = 32,
17223 ++ .b_public_size = 32,
17224 ++ .expected_ss_size = 32,
17225 ++
17226 ++},
17227 ++/* wycheproof - edge case for shared secret */
17228 ++{
17229 ++ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
17230 ++ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
17231 ++ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
17232 ++ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
17233 ++ .b_public = (u8[32]){ 0x91, 0xb2, 0x32, 0xa1, 0x78, 0xb3, 0xcd, 0x53,
17234 ++ 0x09, 0x32, 0x44, 0x1e, 0x61, 0x39, 0x41, 0x8f,
17235 ++ 0x72, 0x17, 0x22, 0x92, 0xf1, 0xda, 0x4c, 0x18,
17236 ++ 0x34, 0xfc, 0x5e, 0xbf, 0xef, 0xb5, 0x1e, 0x3f },
17237 ++ .expected_ss = (u8[32]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17238 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17239 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17240 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 },
17241 ++ .secret_size = 32,
17242 ++ .b_public_size = 32,
17243 ++ .expected_ss_size = 32,
17244 ++
17245 ++},
17246 ++/* wycheproof - edge case for shared secret */
17247 ++{
17248 ++ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
17249 ++ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
17250 ++ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
17251 ++ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
17252 ++ .b_public = (u8[32]){ 0x04, 0x5c, 0x6e, 0x11, 0xc5, 0xd3, 0x32, 0x55,
17253 ++ 0x6c, 0x78, 0x22, 0xfe, 0x94, 0xeb, 0xf8, 0x9b,
17254 ++ 0x56, 0xa3, 0x87, 0x8d, 0xc2, 0x7c, 0xa0, 0x79,
17255 ++ 0x10, 0x30, 0x58, 0x84, 0x9f, 0xab, 0xcb, 0x4f },
17256 ++ .expected_ss = (u8[32]){ 0xe5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17257 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17258 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17259 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
17260 ++ .secret_size = 32,
17261 ++ .b_public_size = 32,
17262 ++ .expected_ss_size = 32,
17263 ++
17264 ++},
17265 ++/* wycheproof - edge case for shared secret */
17266 ++{
17267 ++ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
17268 ++ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
17269 ++ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
17270 ++ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
17271 ++ .b_public = (u8[32]){ 0x1c, 0xa2, 0x19, 0x0b, 0x71, 0x16, 0x35, 0x39,
17272 ++ 0x06, 0x3c, 0x35, 0x77, 0x3b, 0xda, 0x0c, 0x9c,
17273 ++ 0x92, 0x8e, 0x91, 0x36, 0xf0, 0x62, 0x0a, 0xeb,
17274 ++ 0x09, 0x3f, 0x09, 0x91, 0x97, 0xb7, 0xf7, 0x4e },
17275 ++ .expected_ss = (u8[32]){ 0xe3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17276 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17277 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17278 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
17279 ++ .secret_size = 32,
17280 ++ .b_public_size = 32,
17281 ++ .expected_ss_size = 32,
17282 ++
17283 ++},
17284 ++/* wycheproof - edge case for shared secret */
17285 ++{
17286 ++ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
17287 ++ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
17288 ++ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
17289 ++ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
17290 ++ .b_public = (u8[32]){ 0xf7, 0x6e, 0x90, 0x10, 0xac, 0x33, 0xc5, 0x04,
17291 ++ 0x3b, 0x2d, 0x3b, 0x76, 0xa8, 0x42, 0x17, 0x10,
17292 ++ 0x00, 0xc4, 0x91, 0x62, 0x22, 0xe9, 0xe8, 0x58,
17293 ++ 0x97, 0xa0, 0xae, 0xc7, 0xf6, 0x35, 0x0b, 0x3c },
17294 ++ .expected_ss = (u8[32]){ 0xdd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17295 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17296 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17297 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
17298 ++ .secret_size = 32,
17299 ++ .b_public_size = 32,
17300 ++ .expected_ss_size = 32,
17301 ++
17302 ++},
17303 ++/* wycheproof - edge case for shared secret */
17304 ++{
17305 ++ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
17306 ++ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
17307 ++ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
17308 ++ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
17309 ++ .b_public = (u8[32]){ 0xbb, 0x72, 0x68, 0x8d, 0x8f, 0x8a, 0xa7, 0xa3,
17310 ++ 0x9c, 0xd6, 0x06, 0x0c, 0xd5, 0xc8, 0x09, 0x3c,
17311 ++ 0xde, 0xc6, 0xfe, 0x34, 0x19, 0x37, 0xc3, 0x88,
17312 ++ 0x6a, 0x99, 0x34, 0x6c, 0xd0, 0x7f, 0xaa, 0x55 },
17313 ++ .expected_ss = (u8[32]){ 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17314 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17315 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17316 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
17317 ++ .secret_size = 32,
17318 ++ .b_public_size = 32,
17319 ++ .expected_ss_size = 32,
17320 ++
17321 ++},
17322 ++/* wycheproof - edge case for shared secret */
17323 ++{
17324 ++ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
17325 ++ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
17326 ++ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
17327 ++ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
17328 ++ .b_public = (u8[32]){ 0x88, 0xfd, 0xde, 0xa1, 0x93, 0x39, 0x1c, 0x6a,
17329 ++ 0x59, 0x33, 0xef, 0x9b, 0x71, 0x90, 0x15, 0x49,
17330 ++ 0x44, 0x72, 0x05, 0xaa, 0xe9, 0xda, 0x92, 0x8a,
17331 ++ 0x6b, 0x91, 0xa3, 0x52, 0xba, 0x10, 0xf4, 0x1f },
17332 ++ .expected_ss = (u8[32]){ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
17333 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
17334 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
17335 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 },
17336 ++ .secret_size = 32,
17337 ++ .b_public_size = 32,
17338 ++ .expected_ss_size = 32,
17339 ++
17340 ++},
17341 ++/* wycheproof - edge case for shared secret */
17342 ++{
17343 ++ .secret = (u8[32]){ 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
17344 ++ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
17345 ++ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
17346 ++ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
17347 ++ .b_public = (u8[32]){ 0x30, 0x3b, 0x39, 0x2f, 0x15, 0x31, 0x16, 0xca,
17348 ++ 0xd9, 0xcc, 0x68, 0x2a, 0x00, 0xcc, 0xc4, 0x4c,
17349 ++ 0x95, 0xff, 0x0d, 0x3b, 0xbe, 0x56, 0x8b, 0xeb,
17350 ++ 0x6c, 0x4e, 0x73, 0x9b, 0xaf, 0xdc, 0x2c, 0x68 },
17351 ++ .expected_ss = (u8[32]){ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
17352 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
17353 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
17354 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00 },
17355 ++ .secret_size = 32,
17356 ++ .b_public_size = 32,
17357 ++ .expected_ss_size = 32,
17358 ++
17359 ++},
17360 ++/* wycheproof - checking for overflow */
17361 ++{
17362 ++ .secret = (u8[32]){ 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
17363 ++ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
17364 ++ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
17365 ++ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
17366 ++ .b_public = (u8[32]){ 0xfd, 0x30, 0x0a, 0xeb, 0x40, 0xe1, 0xfa, 0x58,
17367 ++ 0x25, 0x18, 0x41, 0x2b, 0x49, 0xb2, 0x08, 0xa7,
17368 ++ 0x84, 0x2b, 0x1e, 0x1f, 0x05, 0x6a, 0x04, 0x01,
17369 ++ 0x78, 0xea, 0x41, 0x41, 0x53, 0x4f, 0x65, 0x2d },
17370 ++ .expected_ss = (u8[32]){ 0xb7, 0x34, 0x10, 0x5d, 0xc2, 0x57, 0x58, 0x5d,
17371 ++ 0x73, 0xb5, 0x66, 0xcc, 0xb7, 0x6f, 0x06, 0x27,
17372 ++ 0x95, 0xcc, 0xbe, 0xc8, 0x91, 0x28, 0xe5, 0x2b,
17373 ++ 0x02, 0xf3, 0xe5, 0x96, 0x39, 0xf1, 0x3c, 0x46 },
17374 ++ .secret_size = 32,
17375 ++ .b_public_size = 32,
17376 ++ .expected_ss_size = 32,
17377 ++
17378 ++},
17379 ++/* wycheproof - checking for overflow */
17380 ++{
17381 ++ .secret = (u8[32]){ 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
17382 ++ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
17383 ++ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
17384 ++ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
17385 ++ .b_public = (u8[32]){ 0xc8, 0xef, 0x79, 0xb5, 0x14, 0xd7, 0x68, 0x26,
17386 ++ 0x77, 0xbc, 0x79, 0x31, 0xe0, 0x6e, 0xe5, 0xc2,
17387 ++ 0x7c, 0x9b, 0x39, 0x2b, 0x4a, 0xe9, 0x48, 0x44,
17388 ++ 0x73, 0xf5, 0x54, 0xe6, 0x67, 0x8e, 0xcc, 0x2e },
17389 ++ .expected_ss = (u8[32]){ 0x64, 0x7a, 0x46, 0xb6, 0xfc, 0x3f, 0x40, 0xd6,
17390 ++ 0x21, 0x41, 0xee, 0x3c, 0xee, 0x70, 0x6b, 0x4d,
17391 ++ 0x7a, 0x92, 0x71, 0x59, 0x3a, 0x7b, 0x14, 0x3e,
17392 ++ 0x8e, 0x2e, 0x22, 0x79, 0x88, 0x3e, 0x45, 0x50 },
17393 ++ .secret_size = 32,
17394 ++ .b_public_size = 32,
17395 ++ .expected_ss_size = 32,
17396 ++
17397 ++},
17398 ++/* wycheproof - checking for overflow */
17399 ++{
17400 ++ .secret = (u8[32]){ 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
17401 ++ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
17402 ++ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
17403 ++ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
17404 ++ .b_public = (u8[32]){ 0x64, 0xae, 0xac, 0x25, 0x04, 0x14, 0x48, 0x61,
17405 ++ 0x53, 0x2b, 0x7b, 0xbc, 0xb6, 0xc8, 0x7d, 0x67,
17406 ++ 0xdd, 0x4c, 0x1f, 0x07, 0xeb, 0xc2, 0xe0, 0x6e,
17407 ++ 0xff, 0xb9, 0x5a, 0xec, 0xc6, 0x17, 0x0b, 0x2c },
17408 ++ .expected_ss = (u8[32]){ 0x4f, 0xf0, 0x3d, 0x5f, 0xb4, 0x3c, 0xd8, 0x65,
17409 ++ 0x7a, 0x3c, 0xf3, 0x7c, 0x13, 0x8c, 0xad, 0xce,
17410 ++ 0xcc, 0xe5, 0x09, 0xe4, 0xeb, 0xa0, 0x89, 0xd0,
17411 ++ 0xef, 0x40, 0xb4, 0xe4, 0xfb, 0x94, 0x61, 0x55 },
17412 ++ .secret_size = 32,
17413 ++ .b_public_size = 32,
17414 ++ .expected_ss_size = 32,
17415 ++
17416 ++},
17417 ++/* wycheproof - checking for overflow */
17418 ++{
17419 ++ .secret = (u8[32]){ 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
17420 ++ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
17421 ++ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
17422 ++ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
17423 ++ .b_public = (u8[32]){ 0xbf, 0x68, 0xe3, 0x5e, 0x9b, 0xdb, 0x7e, 0xee,
17424 ++ 0x1b, 0x50, 0x57, 0x02, 0x21, 0x86, 0x0f, 0x5d,
17425 ++ 0xcd, 0xad, 0x8a, 0xcb, 0xab, 0x03, 0x1b, 0x14,
17426 ++ 0x97, 0x4c, 0xc4, 0x90, 0x13, 0xc4, 0x98, 0x31 },
17427 ++ .expected_ss = (u8[32]){ 0x21, 0xce, 0xe5, 0x2e, 0xfd, 0xbc, 0x81, 0x2e,
17428 ++ 0x1d, 0x02, 0x1a, 0x4a, 0xf1, 0xe1, 0xd8, 0xbc,
17429 ++ 0x4d, 0xb3, 0xc4, 0x00, 0xe4, 0xd2, 0xa2, 0xc5,
17430 ++ 0x6a, 0x39, 0x26, 0xdb, 0x4d, 0x99, 0xc6, 0x5b },
17431 ++ .secret_size = 32,
17432 ++ .b_public_size = 32,
17433 ++ .expected_ss_size = 32,
17434 ++
17435 ++},
17436 ++/* wycheproof - checking for overflow */
17437 ++{
17438 ++ .secret = (u8[32]){ 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
17439 ++ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
17440 ++ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
17441 ++ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
17442 ++ .b_public = (u8[32]){ 0x53, 0x47, 0xc4, 0x91, 0x33, 0x1a, 0x64, 0xb4,
17443 ++ 0x3d, 0xdc, 0x68, 0x30, 0x34, 0xe6, 0x77, 0xf5,
17444 ++ 0x3d, 0xc3, 0x2b, 0x52, 0xa5, 0x2a, 0x57, 0x7c,
17445 ++ 0x15, 0xa8, 0x3b, 0xf2, 0x98, 0xe9, 0x9f, 0x19 },
17446 ++ .expected_ss = (u8[32]){ 0x18, 0xcb, 0x89, 0xe4, 0xe2, 0x0c, 0x0c, 0x2b,
17447 ++ 0xd3, 0x24, 0x30, 0x52, 0x45, 0x26, 0x6c, 0x93,
17448 ++ 0x27, 0x69, 0x0b, 0xbe, 0x79, 0xac, 0xb8, 0x8f,
17449 ++ 0x5b, 0x8f, 0xb3, 0xf7, 0x4e, 0xca, 0x3e, 0x52 },
17450 ++ .secret_size = 32,
17451 ++ .b_public_size = 32,
17452 ++ .expected_ss_size = 32,
17453 ++
17454 ++},
17455 ++/* wycheproof - private key == -1 (mod order) */
17456 ++{
17457 ++ .secret = (u8[32]){ 0xa0, 0x23, 0xcd, 0xd0, 0x83, 0xef, 0x5b, 0xb8,
17458 ++ 0x2f, 0x10, 0xd6, 0x2e, 0x59, 0xe1, 0x5a, 0x68,
17459 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
17460 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50 },
17461 ++ .b_public = (u8[32]){ 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e,
17462 ++ 0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57,
17463 ++ 0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f,
17464 ++ 0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 },
17465 ++ .expected_ss = (u8[32]){ 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e,
17466 ++ 0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57,
17467 ++ 0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f,
17468 ++ 0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 },
17469 ++ .secret_size = 32,
17470 ++ .b_public_size = 32,
17471 ++ .expected_ss_size = 32,
17472 ++
17473 ++},
17474 ++/* wycheproof - private key == 1 (mod order) on twist */
17475 ++{
17476 ++ .secret = (u8[32]){ 0x58, 0x08, 0x3d, 0xd2, 0x61, 0xad, 0x91, 0xef,
17477 ++ 0xf9, 0x52, 0x32, 0x2e, 0xc8, 0x24, 0xc6, 0x82,
17478 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
17479 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5f },
17480 ++ .b_public = (u8[32]){ 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f,
17481 ++ 0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6,
17482 ++ 0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64,
17483 ++ 0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 },
17484 ++ .expected_ss = (u8[32]){ 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f,
17485 ++ 0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6,
17486 ++ 0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64,
17487 ++ 0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 },
17488 ++ .secret_size = 32,
17489 ++ .b_public_size = 32,
17490 ++ .expected_ss_size = 32,
17491 ++
17492 ++}
17493 ++};
17494 ++
17495 + static const struct kpp_testvec ecdh_tv_template[] = {
17496 + {
17497 + #ifndef CONFIG_CRYPTO_FIPS
17498 +--
17499 +cgit v1.2.3-4-ga26e
17500 +
17501 +
17502 +From 5d982068bc78704dfbb0c1fca5834b7561129858 Mon Sep 17 00:00:00 2001
17503 +From: Ard Biesheuvel <ardb@××××××.org>
17504 +Date: Fri, 8 Nov 2019 13:22:34 +0100
17505 +Subject: crypto: curve25519 - implement generic KPP driver
17506 +
17507 +commit ee772cb641135739c1530647391d5a04c39db192 upstream.
17508 +
17509 +Expose the generic Curve25519 library via the crypto API KPP interface.
17510 +
17511 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
17512 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
17513 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
17514 +---
17515 + crypto/Kconfig | 5 +++
17516 + crypto/Makefile | 1 +
17517 + crypto/curve25519-generic.c | 90 +++++++++++++++++++++++++++++++++++++++++++++
17518 + 3 files changed, 96 insertions(+)
17519 + create mode 100644 crypto/curve25519-generic.c
17520 +
17521 +diff --git a/crypto/Kconfig b/crypto/Kconfig
17522 +index 8fd3954bf64c..a3fc859830c1 100644
17523 +--- a/crypto/Kconfig
17524 ++++ b/crypto/Kconfig
17525 +@@ -264,6 +264,11 @@ config CRYPTO_ECRDSA
17526 + standard algorithms (called GOST algorithms). Only signature verification
17527 + is implemented.
17528 +
17529 ++config CRYPTO_CURVE25519
17530 ++ tristate "Curve25519 algorithm"
17531 ++ select CRYPTO_KPP
17532 ++ select CRYPTO_LIB_CURVE25519_GENERIC
17533 ++
17534 + comment "Authenticated Encryption with Associated Data"
17535 +
17536 + config CRYPTO_CCM
17537 +diff --git a/crypto/Makefile b/crypto/Makefile
17538 +index fd27edea7c8e..4e7a0a8f7e35 100644
17539 +--- a/crypto/Makefile
17540 ++++ b/crypto/Makefile
17541 +@@ -167,6 +167,7 @@ obj-$(CONFIG_CRYPTO_ZSTD) += zstd.o
17542 + obj-$(CONFIG_CRYPTO_OFB) += ofb.o
17543 + obj-$(CONFIG_CRYPTO_ECC) += ecc.o
17544 + obj-$(CONFIG_CRYPTO_ESSIV) += essiv.o
17545 ++obj-$(CONFIG_CRYPTO_CURVE25519) += curve25519-generic.o
17546 +
17547 + ecdh_generic-y += ecdh.o
17548 + ecdh_generic-y += ecdh_helper.o
17549 +diff --git a/crypto/curve25519-generic.c b/crypto/curve25519-generic.c
17550 +new file mode 100644
17551 +index 000000000000..bd88fd571393
17552 +--- /dev/null
17553 ++++ b/crypto/curve25519-generic.c
17554 +@@ -0,0 +1,90 @@
17555 ++// SPDX-License-Identifier: GPL-2.0-or-later
17556 ++
17557 ++#include <crypto/curve25519.h>
17558 ++#include <crypto/internal/kpp.h>
17559 ++#include <crypto/kpp.h>
17560 ++#include <linux/module.h>
17561 ++#include <linux/scatterlist.h>
17562 ++
17563 ++static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
17564 ++ unsigned int len)
17565 ++{
17566 ++ u8 *secret = kpp_tfm_ctx(tfm);
17567 ++
17568 ++ if (!len)
17569 ++ curve25519_generate_secret(secret);
17570 ++ else if (len == CURVE25519_KEY_SIZE &&
17571 ++ crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE))
17572 ++ memcpy(secret, buf, CURVE25519_KEY_SIZE);
17573 ++ else
17574 ++ return -EINVAL;
17575 ++ return 0;
17576 ++}
17577 ++
17578 ++static int curve25519_compute_value(struct kpp_request *req)
17579 ++{
17580 ++ struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
17581 ++ const u8 *secret = kpp_tfm_ctx(tfm);
17582 ++ u8 public_key[CURVE25519_KEY_SIZE];
17583 ++ u8 buf[CURVE25519_KEY_SIZE];
17584 ++ int copied, nbytes;
17585 ++ u8 const *bp;
17586 ++
17587 ++ if (req->src) {
17588 ++ copied = sg_copy_to_buffer(req->src,
17589 ++ sg_nents_for_len(req->src,
17590 ++ CURVE25519_KEY_SIZE),
17591 ++ public_key, CURVE25519_KEY_SIZE);
17592 ++ if (copied != CURVE25519_KEY_SIZE)
17593 ++ return -EINVAL;
17594 ++ bp = public_key;
17595 ++ } else {
17596 ++ bp = curve25519_base_point;
17597 ++ }
17598 ++
17599 ++ curve25519_generic(buf, secret, bp);
17600 ++
17601 ++ /* might want less than we've got */
17602 ++ nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
17603 ++ copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
17604 ++ nbytes),
17605 ++ buf, nbytes);
17606 ++ if (copied != nbytes)
17607 ++ return -EINVAL;
17608 ++ return 0;
17609 ++}
17610 ++
17611 ++static unsigned int curve25519_max_size(struct crypto_kpp *tfm)
17612 ++{
17613 ++ return CURVE25519_KEY_SIZE;
17614 ++}
17615 ++
17616 ++static struct kpp_alg curve25519_alg = {
17617 ++ .base.cra_name = "curve25519",
17618 ++ .base.cra_driver_name = "curve25519-generic",
17619 ++ .base.cra_priority = 100,
17620 ++ .base.cra_module = THIS_MODULE,
17621 ++ .base.cra_ctxsize = CURVE25519_KEY_SIZE,
17622 ++
17623 ++ .set_secret = curve25519_set_secret,
17624 ++ .generate_public_key = curve25519_compute_value,
17625 ++ .compute_shared_secret = curve25519_compute_value,
17626 ++ .max_size = curve25519_max_size,
17627 ++};
17628 ++
17629 ++static int curve25519_init(void)
17630 ++{
17631 ++ return crypto_register_kpp(&curve25519_alg);
17632 ++}
17633 ++
17634 ++static void curve25519_exit(void)
17635 ++{
17636 ++ crypto_unregister_kpp(&curve25519_alg);
17637 ++}
17638 ++
17639 ++subsys_initcall(curve25519_init);
17640 ++module_exit(curve25519_exit);
17641 ++
17642 ++MODULE_ALIAS_CRYPTO("curve25519");
17643 ++MODULE_ALIAS_CRYPTO("curve25519-generic");
17644 ++MODULE_LICENSE("GPL");
17645 +--
17646 +cgit v1.2.3-4-ga26e
17647 +
17648 +
17649 +From c85906ff99bf760f7f02d488aff57dc8095b394b Mon Sep 17 00:00:00 2001
17650 +From: Ard Biesheuvel <ardb@××××××.org>
17651 +Date: Fri, 8 Nov 2019 13:22:35 +0100
17652 +Subject: crypto: lib/curve25519 - work around Clang stack spilling issue
17653 +
17654 +commit 660bb8e1f833ea63185fe80fde847e3e42f18e3b upstream.
17655 +
17656 +Arnd reports that the 32-bit generic library code for Curve25119 ends
17657 +up using an excessive amount of stack space when built with Clang:
17658 +
17659 + lib/crypto/curve25519-fiat32.c:756:6: error: stack frame size
17660 + of 1384 bytes in function 'curve25519_generic'
17661 + [-Werror,-Wframe-larger-than=]
17662 +
17663 +Let's give some hints to the compiler regarding which routines should
17664 +not be inlined, to prevent it from running out of registers and spilling
17665 +to the stack. The resulting code performs identically under both GCC
17666 +and Clang, and makes the warning go away.
17667 +
17668 +Suggested-by: Arnd Bergmann <arnd@×××××.de>
17669 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
17670 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
17671 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
17672 +---
17673 + lib/crypto/curve25519-fiat32.c | 10 +++++-----
17674 + 1 file changed, 5 insertions(+), 5 deletions(-)
17675 +
17676 +diff --git a/lib/crypto/curve25519-fiat32.c b/lib/crypto/curve25519-fiat32.c
17677 +index 1c455207341d..2fde0ec33dbd 100644
17678 +--- a/lib/crypto/curve25519-fiat32.c
17679 ++++ b/lib/crypto/curve25519-fiat32.c
17680 +@@ -223,7 +223,7 @@ static __always_inline void fe_1(fe *h)
17681 + h->v[0] = 1;
17682 + }
17683 +
17684 +-static void fe_add_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
17685 ++static noinline void fe_add_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
17686 + {
17687 + { const u32 x20 = in1[9];
17688 + { const u32 x21 = in1[8];
17689 +@@ -266,7 +266,7 @@ static __always_inline void fe_add(fe_loose *h, const fe *f, const fe *g)
17690 + fe_add_impl(h->v, f->v, g->v);
17691 + }
17692 +
17693 +-static void fe_sub_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
17694 ++static noinline void fe_sub_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
17695 + {
17696 + { const u32 x20 = in1[9];
17697 + { const u32 x21 = in1[8];
17698 +@@ -309,7 +309,7 @@ static __always_inline void fe_sub(fe_loose *h, const fe *f, const fe *g)
17699 + fe_sub_impl(h->v, f->v, g->v);
17700 + }
17701 +
17702 +-static void fe_mul_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
17703 ++static noinline void fe_mul_impl(u32 out[10], const u32 in1[10], const u32 in2[10])
17704 + {
17705 + { const u32 x20 = in1[9];
17706 + { const u32 x21 = in1[8];
17707 +@@ -441,7 +441,7 @@ fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g)
17708 + fe_mul_impl(h->v, f->v, g->v);
17709 + }
17710 +
17711 +-static void fe_sqr_impl(u32 out[10], const u32 in1[10])
17712 ++static noinline void fe_sqr_impl(u32 out[10], const u32 in1[10])
17713 + {
17714 + { const u32 x17 = in1[9];
17715 + { const u32 x18 = in1[8];
17716 +@@ -619,7 +619,7 @@ static __always_inline void fe_invert(fe *out, const fe *z)
17717 + *
17718 + * Preconditions: b in {0,1}
17719 + */
17720 +-static __always_inline void fe_cswap(fe *f, fe *g, unsigned int b)
17721 ++static noinline void fe_cswap(fe *f, fe *g, unsigned int b)
17722 + {
17723 + unsigned i;
17724 + b = 0 - b;
17725 +--
17726 +cgit v1.2.3-4-ga26e
17727 +
17728 +
17729 +From 5feb2b9fbcb42f5317266c859cd5af9662817ab7 Mon Sep 17 00:00:00 2001
17730 +From: "Jason A. Donenfeld" <Jason@×××××.com>
17731 +Date: Fri, 8 Nov 2019 13:22:36 +0100
17732 +Subject: crypto: curve25519 - x86_64 library and KPP implementations
17733 +MIME-Version: 1.0
17734 +Content-Type: text/plain; charset=UTF-8
17735 +Content-Transfer-Encoding: 8bit
17736 +
17737 +commit bb611bdfd6be34d9f822c73305fcc83720499d38 upstream.
17738 +
17739 +This implementation is the fastest available x86_64 implementation, and
17740 +unlike Sandy2x, it doesn't requie use of the floating point registers at
17741 +all. Instead it makes use of BMI2 and ADX, available on recent
17742 +microarchitectures. The implementation was written by Armando
17743 +Faz-Hernández with contributions (upstream) from Samuel Neves and me,
17744 +in addition to further changes in the kernel implementation from us.
17745 +
17746 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
17747 +Signed-off-by: Samuel Neves <sneves@××××××.pt>
17748 +Co-developed-by: Samuel Neves <sneves@××××××.pt>
17749 +[ardb: - move to arch/x86/crypto
17750 + - wire into lib/crypto framework
17751 + - implement crypto API KPP hooks ]
17752 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
17753 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
17754 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
17755 +---
17756 + arch/x86/crypto/Makefile | 1 +
17757 + arch/x86/crypto/curve25519-x86_64.c | 2475 +++++++++++++++++++++++++++++++++++
17758 + crypto/Kconfig | 6 +
17759 + 3 files changed, 2482 insertions(+)
17760 + create mode 100644 arch/x86/crypto/curve25519-x86_64.c
17761 +
17762 +diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
17763 +index 922c8ecfa00f..958440eae27e 100644
17764 +--- a/arch/x86/crypto/Makefile
17765 ++++ b/arch/x86/crypto/Makefile
17766 +@@ -39,6 +39,7 @@ obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o
17767 +
17768 + obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o
17769 + obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o
17770 ++obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
17771 +
17772 + # These modules require assembler to support AVX.
17773 + ifeq ($(avx_supported),yes)
17774 +diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c
17775 +new file mode 100644
17776 +index 000000000000..a52a3fb15727
17777 +--- /dev/null
17778 ++++ b/arch/x86/crypto/curve25519-x86_64.c
17779 +@@ -0,0 +1,2475 @@
17780 ++// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
17781 ++/*
17782 ++ * Copyright (c) 2017 Armando Faz <armfazh@××××××××××.br>. All Rights Reserved.
17783 ++ * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
17784 ++ * Copyright (C) 2018 Samuel Neves <sneves@××××××.pt>. All Rights Reserved.
17785 ++ */
17786 ++
17787 ++#include <crypto/curve25519.h>
17788 ++#include <crypto/internal/kpp.h>
17789 ++
17790 ++#include <linux/types.h>
17791 ++#include <linux/jump_label.h>
17792 ++#include <linux/kernel.h>
17793 ++#include <linux/module.h>
17794 ++
17795 ++#include <asm/cpufeature.h>
17796 ++#include <asm/processor.h>
17797 ++
17798 ++static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2);
17799 ++static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_adx);
17800 ++
17801 ++enum { NUM_WORDS_ELTFP25519 = 4 };
17802 ++typedef __aligned(32) u64 eltfp25519_1w[NUM_WORDS_ELTFP25519];
17803 ++typedef __aligned(32) u64 eltfp25519_1w_buffer[2 * NUM_WORDS_ELTFP25519];
17804 ++
17805 ++#define mul_eltfp25519_1w_adx(c, a, b) do { \
17806 ++ mul_256x256_integer_adx(m.buffer, a, b); \
17807 ++ red_eltfp25519_1w_adx(c, m.buffer); \
17808 ++} while (0)
17809 ++
17810 ++#define mul_eltfp25519_1w_bmi2(c, a, b) do { \
17811 ++ mul_256x256_integer_bmi2(m.buffer, a, b); \
17812 ++ red_eltfp25519_1w_bmi2(c, m.buffer); \
17813 ++} while (0)
17814 ++
17815 ++#define sqr_eltfp25519_1w_adx(a) do { \
17816 ++ sqr_256x256_integer_adx(m.buffer, a); \
17817 ++ red_eltfp25519_1w_adx(a, m.buffer); \
17818 ++} while (0)
17819 ++
17820 ++#define sqr_eltfp25519_1w_bmi2(a) do { \
17821 ++ sqr_256x256_integer_bmi2(m.buffer, a); \
17822 ++ red_eltfp25519_1w_bmi2(a, m.buffer); \
17823 ++} while (0)
17824 ++
17825 ++#define mul_eltfp25519_2w_adx(c, a, b) do { \
17826 ++ mul2_256x256_integer_adx(m.buffer, a, b); \
17827 ++ red_eltfp25519_2w_adx(c, m.buffer); \
17828 ++} while (0)
17829 ++
17830 ++#define mul_eltfp25519_2w_bmi2(c, a, b) do { \
17831 ++ mul2_256x256_integer_bmi2(m.buffer, a, b); \
17832 ++ red_eltfp25519_2w_bmi2(c, m.buffer); \
17833 ++} while (0)
17834 ++
17835 ++#define sqr_eltfp25519_2w_adx(a) do { \
17836 ++ sqr2_256x256_integer_adx(m.buffer, a); \
17837 ++ red_eltfp25519_2w_adx(a, m.buffer); \
17838 ++} while (0)
17839 ++
17840 ++#define sqr_eltfp25519_2w_bmi2(a) do { \
17841 ++ sqr2_256x256_integer_bmi2(m.buffer, a); \
17842 ++ red_eltfp25519_2w_bmi2(a, m.buffer); \
17843 ++} while (0)
17844 ++
17845 ++#define sqrn_eltfp25519_1w_adx(a, times) do { \
17846 ++ int ____counter = (times); \
17847 ++ while (____counter-- > 0) \
17848 ++ sqr_eltfp25519_1w_adx(a); \
17849 ++} while (0)
17850 ++
17851 ++#define sqrn_eltfp25519_1w_bmi2(a, times) do { \
17852 ++ int ____counter = (times); \
17853 ++ while (____counter-- > 0) \
17854 ++ sqr_eltfp25519_1w_bmi2(a); \
17855 ++} while (0)
17856 ++
17857 ++#define copy_eltfp25519_1w(C, A) do { \
17858 ++ (C)[0] = (A)[0]; \
17859 ++ (C)[1] = (A)[1]; \
17860 ++ (C)[2] = (A)[2]; \
17861 ++ (C)[3] = (A)[3]; \
17862 ++} while (0)
17863 ++
17864 ++#define setzero_eltfp25519_1w(C) do { \
17865 ++ (C)[0] = 0; \
17866 ++ (C)[1] = 0; \
17867 ++ (C)[2] = 0; \
17868 ++ (C)[3] = 0; \
17869 ++} while (0)
17870 ++
17871 ++__aligned(32) static const u64 table_ladder_8k[252 * NUM_WORDS_ELTFP25519] = {
17872 ++ /* 1 */ 0xfffffffffffffff3UL, 0xffffffffffffffffUL,
17873 ++ 0xffffffffffffffffUL, 0x5fffffffffffffffUL,
17874 ++ /* 2 */ 0x6b8220f416aafe96UL, 0x82ebeb2b4f566a34UL,
17875 ++ 0xd5a9a5b075a5950fUL, 0x5142b2cf4b2488f4UL,
17876 ++ /* 3 */ 0x6aaebc750069680cUL, 0x89cf7820a0f99c41UL,
17877 ++ 0x2a58d9183b56d0f4UL, 0x4b5aca80e36011a4UL,
17878 ++ /* 4 */ 0x329132348c29745dUL, 0xf4a2e616e1642fd7UL,
17879 ++ 0x1e45bb03ff67bc34UL, 0x306912d0f42a9b4aUL,
17880 ++ /* 5 */ 0xff886507e6af7154UL, 0x04f50e13dfeec82fUL,
17881 ++ 0xaa512fe82abab5ceUL, 0x174e251a68d5f222UL,
17882 ++ /* 6 */ 0xcf96700d82028898UL, 0x1743e3370a2c02c5UL,
17883 ++ 0x379eec98b4e86eaaUL, 0x0c59888a51e0482eUL,
17884 ++ /* 7 */ 0xfbcbf1d699b5d189UL, 0xacaef0d58e9fdc84UL,
17885 ++ 0xc1c20d06231f7614UL, 0x2938218da274f972UL,
17886 ++ /* 8 */ 0xf6af49beff1d7f18UL, 0xcc541c22387ac9c2UL,
17887 ++ 0x96fcc9ef4015c56bUL, 0x69c1627c690913a9UL,
17888 ++ /* 9 */ 0x7a86fd2f4733db0eUL, 0xfdb8c4f29e087de9UL,
17889 ++ 0x095e4b1a8ea2a229UL, 0x1ad7a7c829b37a79UL,
17890 ++ /* 10 */ 0x342d89cad17ea0c0UL, 0x67bedda6cced2051UL,
17891 ++ 0x19ca31bf2bb42f74UL, 0x3df7b4c84980acbbUL,
17892 ++ /* 11 */ 0xa8c6444dc80ad883UL, 0xb91e440366e3ab85UL,
17893 ++ 0xc215cda00164f6d8UL, 0x3d867c6ef247e668UL,
17894 ++ /* 12 */ 0xc7dd582bcc3e658cUL, 0xfd2c4748ee0e5528UL,
17895 ++ 0xa0fd9b95cc9f4f71UL, 0x7529d871b0675ddfUL,
17896 ++ /* 13 */ 0xb8f568b42d3cbd78UL, 0x1233011b91f3da82UL,
17897 ++ 0x2dce6ccd4a7c3b62UL, 0x75e7fc8e9e498603UL,
17898 ++ /* 14 */ 0x2f4f13f1fcd0b6ecUL, 0xf1a8ca1f29ff7a45UL,
17899 ++ 0xc249c1a72981e29bUL, 0x6ebe0dbb8c83b56aUL,
17900 ++ /* 15 */ 0x7114fa8d170bb222UL, 0x65a2dcd5bf93935fUL,
17901 ++ 0xbdc41f68b59c979aUL, 0x2f0eef79a2ce9289UL,
17902 ++ /* 16 */ 0x42ecbf0c083c37ceUL, 0x2930bc09ec496322UL,
17903 ++ 0xf294b0c19cfeac0dUL, 0x3780aa4bedfabb80UL,
17904 ++ /* 17 */ 0x56c17d3e7cead929UL, 0xe7cb4beb2e5722c5UL,
17905 ++ 0x0ce931732dbfe15aUL, 0x41b883c7621052f8UL,
17906 ++ /* 18 */ 0xdbf75ca0c3d25350UL, 0x2936be086eb1e351UL,
17907 ++ 0xc936e03cb4a9b212UL, 0x1d45bf82322225aaUL,
17908 ++ /* 19 */ 0xe81ab1036a024cc5UL, 0xe212201c304c9a72UL,
17909 ++ 0xc5d73fba6832b1fcUL, 0x20ffdb5a4d839581UL,
17910 ++ /* 20 */ 0xa283d367be5d0fadUL, 0x6c2b25ca8b164475UL,
17911 ++ 0x9d4935467caaf22eUL, 0x5166408eee85ff49UL,
17912 ++ /* 21 */ 0x3c67baa2fab4e361UL, 0xb3e433c67ef35cefUL,
17913 ++ 0x5259729241159b1cUL, 0x6a621892d5b0ab33UL,
17914 ++ /* 22 */ 0x20b74a387555cdcbUL, 0x532aa10e1208923fUL,
17915 ++ 0xeaa17b7762281dd1UL, 0x61ab3443f05c44bfUL,
17916 ++ /* 23 */ 0x257a6c422324def8UL, 0x131c6c1017e3cf7fUL,
17917 ++ 0x23758739f630a257UL, 0x295a407a01a78580UL,
17918 ++ /* 24 */ 0xf8c443246d5da8d9UL, 0x19d775450c52fa5dUL,
17919 ++ 0x2afcfc92731bf83dUL, 0x7d10c8e81b2b4700UL,
17920 ++ /* 25 */ 0xc8e0271f70baa20bUL, 0x993748867ca63957UL,
17921 ++ 0x5412efb3cb7ed4bbUL, 0x3196d36173e62975UL,
17922 ++ /* 26 */ 0xde5bcad141c7dffcUL, 0x47cc8cd2b395c848UL,
17923 ++ 0xa34cd942e11af3cbUL, 0x0256dbf2d04ecec2UL,
17924 ++ /* 27 */ 0x875ab7e94b0e667fUL, 0xcad4dd83c0850d10UL,
17925 ++ 0x47f12e8f4e72c79fUL, 0x5f1a87bb8c85b19bUL,
17926 ++ /* 28 */ 0x7ae9d0b6437f51b8UL, 0x12c7ce5518879065UL,
17927 ++ 0x2ade09fe5cf77aeeUL, 0x23a05a2f7d2c5627UL,
17928 ++ /* 29 */ 0x5908e128f17c169aUL, 0xf77498dd8ad0852dUL,
17929 ++ 0x74b4c4ceab102f64UL, 0x183abadd10139845UL,
17930 ++ /* 30 */ 0xb165ba8daa92aaacUL, 0xd5c5ef9599386705UL,
17931 ++ 0xbe2f8f0cf8fc40d1UL, 0x2701e635ee204514UL,
17932 ++ /* 31 */ 0x629fa80020156514UL, 0xf223868764a8c1ceUL,
17933 ++ 0x5b894fff0b3f060eUL, 0x60d9944cf708a3faUL,
17934 ++ /* 32 */ 0xaeea001a1c7a201fUL, 0xebf16a633ee2ce63UL,
17935 ++ 0x6f7709594c7a07e1UL, 0x79b958150d0208cbUL,
17936 ++ /* 33 */ 0x24b55e5301d410e7UL, 0xe3a34edff3fdc84dUL,
17937 ++ 0xd88768e4904032d8UL, 0x131384427b3aaeecUL,
17938 ++ /* 34 */ 0x8405e51286234f14UL, 0x14dc4739adb4c529UL,
17939 ++ 0xb8a2b5b250634ffdUL, 0x2fe2a94ad8a7ff93UL,
17940 ++ /* 35 */ 0xec5c57efe843faddUL, 0x2843ce40f0bb9918UL,
17941 ++ 0xa4b561d6cf3d6305UL, 0x743629bde8fb777eUL,
17942 ++ /* 36 */ 0x343edd46bbaf738fUL, 0xed981828b101a651UL,
17943 ++ 0xa401760b882c797aUL, 0x1fc223e28dc88730UL,
17944 ++ /* 37 */ 0x48604e91fc0fba0eUL, 0xb637f78f052c6fa4UL,
17945 ++ 0x91ccac3d09e9239cUL, 0x23f7eed4437a687cUL,
17946 ++ /* 38 */ 0x5173b1118d9bd800UL, 0x29d641b63189d4a7UL,
17947 ++ 0xfdbf177988bbc586UL, 0x2959894fcad81df5UL,
17948 ++ /* 39 */ 0xaebc8ef3b4bbc899UL, 0x4148995ab26992b9UL,
17949 ++ 0x24e20b0134f92cfbUL, 0x40d158894a05dee8UL,
17950 ++ /* 40 */ 0x46b00b1185af76f6UL, 0x26bac77873187a79UL,
17951 ++ 0x3dc0bf95ab8fff5fUL, 0x2a608bd8945524d7UL,
17952 ++ /* 41 */ 0x26449588bd446302UL, 0x7c4bc21c0388439cUL,
17953 ++ 0x8e98a4f383bd11b2UL, 0x26218d7bc9d876b9UL,
17954 ++ /* 42 */ 0xe3081542997c178aUL, 0x3c2d29a86fb6606fUL,
17955 ++ 0x5c217736fa279374UL, 0x7dde05734afeb1faUL,
17956 ++ /* 43 */ 0x3bf10e3906d42babUL, 0xe4f7803e1980649cUL,
17957 ++ 0xe6053bf89595bf7aUL, 0x394faf38da245530UL,
17958 ++ /* 44 */ 0x7a8efb58896928f4UL, 0xfbc778e9cc6a113cUL,
17959 ++ 0x72670ce330af596fUL, 0x48f222a81d3d6cf7UL,
17960 ++ /* 45 */ 0xf01fce410d72caa7UL, 0x5a20ecc7213b5595UL,
17961 ++ 0x7bc21165c1fa1483UL, 0x07f89ae31da8a741UL,
17962 ++ /* 46 */ 0x05d2c2b4c6830ff9UL, 0xd43e330fc6316293UL,
17963 ++ 0xa5a5590a96d3a904UL, 0x705edb91a65333b6UL,
17964 ++ /* 47 */ 0x048ee15e0bb9a5f7UL, 0x3240cfca9e0aaf5dUL,
17965 ++ 0x8f4b71ceedc4a40bUL, 0x621c0da3de544a6dUL,
17966 ++ /* 48 */ 0x92872836a08c4091UL, 0xce8375b010c91445UL,
17967 ++ 0x8a72eb524f276394UL, 0x2667fcfa7ec83635UL,
17968 ++ /* 49 */ 0x7f4c173345e8752aUL, 0x061b47feee7079a5UL,
17969 ++ 0x25dd9afa9f86ff34UL, 0x3780cef5425dc89cUL,
17970 ++ /* 50 */ 0x1a46035a513bb4e9UL, 0x3e1ef379ac575adaUL,
17971 ++ 0xc78c5f1c5fa24b50UL, 0x321a967634fd9f22UL,
17972 ++ /* 51 */ 0x946707b8826e27faUL, 0x3dca84d64c506fd0UL,
17973 ++ 0xc189218075e91436UL, 0x6d9284169b3b8484UL,
17974 ++ /* 52 */ 0x3a67e840383f2ddfUL, 0x33eec9a30c4f9b75UL,
17975 ++ 0x3ec7c86fa783ef47UL, 0x26ec449fbac9fbc4UL,
17976 ++ /* 53 */ 0x5c0f38cba09b9e7dUL, 0x81168cc762a3478cUL,
17977 ++ 0x3e23b0d306fc121cUL, 0x5a238aa0a5efdcddUL,
17978 ++ /* 54 */ 0x1ba26121c4ea43ffUL, 0x36f8c77f7c8832b5UL,
17979 ++ 0x88fbea0b0adcf99aUL, 0x5ca9938ec25bebf9UL,
17980 ++ /* 55 */ 0xd5436a5e51fccda0UL, 0x1dbc4797c2cd893bUL,
17981 ++ 0x19346a65d3224a08UL, 0x0f5034e49b9af466UL,
17982 ++ /* 56 */ 0xf23c3967a1e0b96eUL, 0xe58b08fa867a4d88UL,
17983 ++ 0xfb2fabc6a7341679UL, 0x2a75381eb6026946UL,
17984 ++ /* 57 */ 0xc80a3be4c19420acUL, 0x66b1f6c681f2b6dcUL,
17985 ++ 0x7cf7036761e93388UL, 0x25abbbd8a660a4c4UL,
17986 ++ /* 58 */ 0x91ea12ba14fd5198UL, 0x684950fc4a3cffa9UL,
17987 ++ 0xf826842130f5ad28UL, 0x3ea988f75301a441UL,
17988 ++ /* 59 */ 0xc978109a695f8c6fUL, 0x1746eb4a0530c3f3UL,
17989 ++ 0x444d6d77b4459995UL, 0x75952b8c054e5cc7UL,
17990 ++ /* 60 */ 0xa3703f7915f4d6aaUL, 0x66c346202f2647d8UL,
17991 ++ 0xd01469df811d644bUL, 0x77fea47d81a5d71fUL,
17992 ++ /* 61 */ 0xc5e9529ef57ca381UL, 0x6eeeb4b9ce2f881aUL,
17993 ++ 0xb6e91a28e8009bd6UL, 0x4b80be3e9afc3fecUL,
17994 ++ /* 62 */ 0x7e3773c526aed2c5UL, 0x1b4afcb453c9a49dUL,
17995 ++ 0xa920bdd7baffb24dUL, 0x7c54699f122d400eUL,
17996 ++ /* 63 */ 0xef46c8e14fa94bc8UL, 0xe0b074ce2952ed5eUL,
17997 ++ 0xbea450e1dbd885d5UL, 0x61b68649320f712cUL,
17998 ++ /* 64 */ 0x8a485f7309ccbdd1UL, 0xbd06320d7d4d1a2dUL,
17999 ++ 0x25232973322dbef4UL, 0x445dc4758c17f770UL,
18000 ++ /* 65 */ 0xdb0434177cc8933cUL, 0xed6fe82175ea059fUL,
18001 ++ 0x1efebefdc053db34UL, 0x4adbe867c65daf99UL,
18002 ++ /* 66 */ 0x3acd71a2a90609dfUL, 0xe5e991856dd04050UL,
18003 ++ 0x1ec69b688157c23cUL, 0x697427f6885cfe4dUL,
18004 ++ /* 67 */ 0xd7be7b9b65e1a851UL, 0xa03d28d522c536ddUL,
18005 ++ 0x28399d658fd2b645UL, 0x49e5b7e17c2641e1UL,
18006 ++ /* 68 */ 0x6f8c3a98700457a4UL, 0x5078f0a25ebb6778UL,
18007 ++ 0xd13c3ccbc382960fUL, 0x2e003258a7df84b1UL,
18008 ++ /* 69 */ 0x8ad1f39be6296a1cUL, 0xc1eeaa652a5fbfb2UL,
18009 ++ 0x33ee0673fd26f3cbUL, 0x59256173a69d2cccUL,
18010 ++ /* 70 */ 0x41ea07aa4e18fc41UL, 0xd9fc19527c87a51eUL,
18011 ++ 0xbdaacb805831ca6fUL, 0x445b652dc916694fUL,
18012 ++ /* 71 */ 0xce92a3a7f2172315UL, 0x1edc282de11b9964UL,
18013 ++ 0xa1823aafe04c314aUL, 0x790a2d94437cf586UL,
18014 ++ /* 72 */ 0x71c447fb93f6e009UL, 0x8922a56722845276UL,
18015 ++ 0xbf70903b204f5169UL, 0x2f7a89891ba319feUL,
18016 ++ /* 73 */ 0x02a08eb577e2140cUL, 0xed9a4ed4427bdcf4UL,
18017 ++ 0x5253ec44e4323cd1UL, 0x3e88363c14e9355bUL,
18018 ++ /* 74 */ 0xaa66c14277110b8cUL, 0x1ae0391610a23390UL,
18019 ++ 0x2030bd12c93fc2a2UL, 0x3ee141579555c7abUL,
18020 ++ /* 75 */ 0x9214de3a6d6e7d41UL, 0x3ccdd88607f17efeUL,
18021 ++ 0x674f1288f8e11217UL, 0x5682250f329f93d0UL,
18022 ++ /* 76 */ 0x6cf00b136d2e396eUL, 0x6e4cf86f1014debfUL,
18023 ++ 0x5930b1b5bfcc4e83UL, 0x047069b48aba16b6UL,
18024 ++ /* 77 */ 0x0d4ce4ab69b20793UL, 0xb24db91a97d0fb9eUL,
18025 ++ 0xcdfa50f54e00d01dUL, 0x221b1085368bddb5UL,
18026 ++ /* 78 */ 0xe7e59468b1e3d8d2UL, 0x53c56563bd122f93UL,
18027 ++ 0xeee8a903e0663f09UL, 0x61efa662cbbe3d42UL,
18028 ++ /* 79 */ 0x2cf8ddddde6eab2aUL, 0x9bf80ad51435f231UL,
18029 ++ 0x5deadacec9f04973UL, 0x29275b5d41d29b27UL,
18030 ++ /* 80 */ 0xcfde0f0895ebf14fUL, 0xb9aab96b054905a7UL,
18031 ++ 0xcae80dd9a1c420fdUL, 0x0a63bf2f1673bbc7UL,
18032 ++ /* 81 */ 0x092f6e11958fbc8cUL, 0x672a81e804822fadUL,
18033 ++ 0xcac8351560d52517UL, 0x6f3f7722c8f192f8UL,
18034 ++ /* 82 */ 0xf8ba90ccc2e894b7UL, 0x2c7557a438ff9f0dUL,
18035 ++ 0x894d1d855ae52359UL, 0x68e122157b743d69UL,
18036 ++ /* 83 */ 0xd87e5570cfb919f3UL, 0x3f2cdecd95798db9UL,
18037 ++ 0x2121154710c0a2ceUL, 0x3c66a115246dc5b2UL,
18038 ++ /* 84 */ 0xcbedc562294ecb72UL, 0xba7143c36a280b16UL,
18039 ++ 0x9610c2efd4078b67UL, 0x6144735d946a4b1eUL,
18040 ++ /* 85 */ 0x536f111ed75b3350UL, 0x0211db8c2041d81bUL,
18041 ++ 0xf93cb1000e10413cUL, 0x149dfd3c039e8876UL,
18042 ++ /* 86 */ 0xd479dde46b63155bUL, 0xb66e15e93c837976UL,
18043 ++ 0xdafde43b1f13e038UL, 0x5fafda1a2e4b0b35UL,
18044 ++ /* 87 */ 0x3600bbdf17197581UL, 0x3972050bbe3cd2c2UL,
18045 ++ 0x5938906dbdd5be86UL, 0x34fce5e43f9b860fUL,
18046 ++ /* 88 */ 0x75a8a4cd42d14d02UL, 0x828dabc53441df65UL,
18047 ++ 0x33dcabedd2e131d3UL, 0x3ebad76fb814d25fUL,
18048 ++ /* 89 */ 0xd4906f566f70e10fUL, 0x5d12f7aa51690f5aUL,
18049 ++ 0x45adb16e76cefcf2UL, 0x01f768aead232999UL,
18050 ++ /* 90 */ 0x2b6cc77b6248febdUL, 0x3cd30628ec3aaffdUL,
18051 ++ 0xce1c0b80d4ef486aUL, 0x4c3bff2ea6f66c23UL,
18052 ++ /* 91 */ 0x3f2ec4094aeaeb5fUL, 0x61b19b286e372ca7UL,
18053 ++ 0x5eefa966de2a701dUL, 0x23b20565de55e3efUL,
18054 ++ /* 92 */ 0xe301ca5279d58557UL, 0x07b2d4ce27c2874fUL,
18055 ++ 0xa532cd8a9dcf1d67UL, 0x2a52fee23f2bff56UL,
18056 ++ /* 93 */ 0x8624efb37cd8663dUL, 0xbbc7ac20ffbd7594UL,
18057 ++ 0x57b85e9c82d37445UL, 0x7b3052cb86a6ec66UL,
18058 ++ /* 94 */ 0x3482f0ad2525e91eUL, 0x2cb68043d28edca0UL,
18059 ++ 0xaf4f6d052e1b003aUL, 0x185f8c2529781b0aUL,
18060 ++ /* 95 */ 0xaa41de5bd80ce0d6UL, 0x9407b2416853e9d6UL,
18061 ++ 0x563ec36e357f4c3aUL, 0x4cc4b8dd0e297bceUL,
18062 ++ /* 96 */ 0xa2fc1a52ffb8730eUL, 0x1811f16e67058e37UL,
18063 ++ 0x10f9a366cddf4ee1UL, 0x72f4a0c4a0b9f099UL,
18064 ++ /* 97 */ 0x8c16c06f663f4ea7UL, 0x693b3af74e970fbaUL,
18065 ++ 0x2102e7f1d69ec345UL, 0x0ba53cbc968a8089UL,
18066 ++ /* 98 */ 0xca3d9dc7fea15537UL, 0x4c6824bb51536493UL,
18067 ++ 0xb9886314844006b1UL, 0x40d2a72ab454cc60UL,
18068 ++ /* 99 */ 0x5936a1b712570975UL, 0x91b9d648debda657UL,
18069 ++ 0x3344094bb64330eaUL, 0x006ba10d12ee51d0UL,
18070 ++ /* 100 */ 0x19228468f5de5d58UL, 0x0eb12f4c38cc05b0UL,
18071 ++ 0xa1039f9dd5601990UL, 0x4502d4ce4fff0e0bUL,
18072 ++ /* 101 */ 0xeb2054106837c189UL, 0xd0f6544c6dd3b93cUL,
18073 ++ 0x40727064c416d74fUL, 0x6e15c6114b502ef0UL,
18074 ++ /* 102 */ 0x4df2a398cfb1a76bUL, 0x11256c7419f2f6b1UL,
18075 ++ 0x4a497962066e6043UL, 0x705b3aab41355b44UL,
18076 ++ /* 103 */ 0x365ef536d797b1d8UL, 0x00076bd622ddf0dbUL,
18077 ++ 0x3bbf33b0e0575a88UL, 0x3777aa05c8e4ca4dUL,
18078 ++ /* 104 */ 0x392745c85578db5fUL, 0x6fda4149dbae5ae2UL,
18079 ++ 0xb1f0b00b8adc9867UL, 0x09963437d36f1da3UL,
18080 ++ /* 105 */ 0x7e824e90a5dc3853UL, 0xccb5f6641f135cbdUL,
18081 ++ 0x6736d86c87ce8fccUL, 0x625f3ce26604249fUL,
18082 ++ /* 106 */ 0xaf8ac8059502f63fUL, 0x0c05e70a2e351469UL,
18083 ++ 0x35292e9c764b6305UL, 0x1a394360c7e23ac3UL,
18084 ++ /* 107 */ 0xd5c6d53251183264UL, 0x62065abd43c2b74fUL,
18085 ++ 0xb5fbf5d03b973f9bUL, 0x13a3da3661206e5eUL,
18086 ++ /* 108 */ 0xc6bd5837725d94e5UL, 0x18e30912205016c5UL,
18087 ++ 0x2088ce1570033c68UL, 0x7fba1f495c837987UL,
18088 ++ /* 109 */ 0x5a8c7423f2f9079dUL, 0x1735157b34023fc5UL,
18089 ++ 0xe4f9b49ad2fab351UL, 0x6691ff72c878e33cUL,
18090 ++ /* 110 */ 0x122c2adedc5eff3eUL, 0xf8dd4bf1d8956cf4UL,
18091 ++ 0xeb86205d9e9e5bdaUL, 0x049b92b9d975c743UL,
18092 ++ /* 111 */ 0xa5379730b0f6c05aUL, 0x72a0ffacc6f3a553UL,
18093 ++ 0xb0032c34b20dcd6dUL, 0x470e9dbc88d5164aUL,
18094 ++ /* 112 */ 0xb19cf10ca237c047UL, 0xb65466711f6c81a2UL,
18095 ++ 0xb3321bd16dd80b43UL, 0x48c14f600c5fbe8eUL,
18096 ++ /* 113 */ 0x66451c264aa6c803UL, 0xb66e3904a4fa7da6UL,
18097 ++ 0xd45f19b0b3128395UL, 0x31602627c3c9bc10UL,
18098 ++ /* 114 */ 0x3120dc4832e4e10dUL, 0xeb20c46756c717f7UL,
18099 ++ 0x00f52e3f67280294UL, 0x566d4fc14730c509UL,
18100 ++ /* 115 */ 0x7e3a5d40fd837206UL, 0xc1e926dc7159547aUL,
18101 ++ 0x216730fba68d6095UL, 0x22e8c3843f69cea7UL,
18102 ++ /* 116 */ 0x33d074e8930e4b2bUL, 0xb6e4350e84d15816UL,
18103 ++ 0x5534c26ad6ba2365UL, 0x7773c12f89f1f3f3UL,
18104 ++ /* 117 */ 0x8cba404da57962aaUL, 0x5b9897a81999ce56UL,
18105 ++ 0x508e862f121692fcUL, 0x3a81907fa093c291UL,
18106 ++ /* 118 */ 0x0dded0ff4725a510UL, 0x10d8cc10673fc503UL,
18107 ++ 0x5b9d151c9f1f4e89UL, 0x32a5c1d5cb09a44cUL,
18108 ++ /* 119 */ 0x1e0aa442b90541fbUL, 0x5f85eb7cc1b485dbUL,
18109 ++ 0xbee595ce8a9df2e5UL, 0x25e496c722422236UL,
18110 ++ /* 120 */ 0x5edf3c46cd0fe5b9UL, 0x34e75a7ed2a43388UL,
18111 ++ 0xe488de11d761e352UL, 0x0e878a01a085545cUL,
18112 ++ /* 121 */ 0xba493c77e021bb04UL, 0x2b4d1843c7df899aUL,
18113 ++ 0x9ea37a487ae80d67UL, 0x67a9958011e41794UL,
18114 ++ /* 122 */ 0x4b58051a6697b065UL, 0x47e33f7d8d6ba6d4UL,
18115 ++ 0xbb4da8d483ca46c1UL, 0x68becaa181c2db0dUL,
18116 ++ /* 123 */ 0x8d8980e90b989aa5UL, 0xf95eb14a2c93c99bUL,
18117 ++ 0x51c6c7c4796e73a2UL, 0x6e228363b5efb569UL,
18118 ++ /* 124 */ 0xc6bbc0b02dd624c8UL, 0x777eb47dec8170eeUL,
18119 ++ 0x3cde15a004cfafa9UL, 0x1dc6bc087160bf9bUL,
18120 ++ /* 125 */ 0x2e07e043eec34002UL, 0x18e9fc677a68dc7fUL,
18121 ++ 0xd8da03188bd15b9aUL, 0x48fbc3bb00568253UL,
18122 ++ /* 126 */ 0x57547d4cfb654ce1UL, 0xd3565b82a058e2adUL,
18123 ++ 0xf63eaf0bbf154478UL, 0x47531ef114dfbb18UL,
18124 ++ /* 127 */ 0xe1ec630a4278c587UL, 0x5507d546ca8e83f3UL,
18125 ++ 0x85e135c63adc0c2bUL, 0x0aa7efa85682844eUL,
18126 ++ /* 128 */ 0x72691ba8b3e1f615UL, 0x32b4e9701fbe3ffaUL,
18127 ++ 0x97b6d92e39bb7868UL, 0x2cfe53dea02e39e8UL,
18128 ++ /* 129 */ 0x687392cd85cd52b0UL, 0x27ff66c910e29831UL,
18129 ++ 0x97134556a9832d06UL, 0x269bb0360a84f8a0UL,
18130 ++ /* 130 */ 0x706e55457643f85cUL, 0x3734a48c9b597d1bUL,
18131 ++ 0x7aee91e8c6efa472UL, 0x5cd6abc198a9d9e0UL,
18132 ++ /* 131 */ 0x0e04de06cb3ce41aUL, 0xd8c6eb893402e138UL,
18133 ++ 0x904659bb686e3772UL, 0x7215c371746ba8c8UL,
18134 ++ /* 132 */ 0xfd12a97eeae4a2d9UL, 0x9514b7516394f2c5UL,
18135 ++ 0x266fd5809208f294UL, 0x5c847085619a26b9UL,
18136 ++ /* 133 */ 0x52985410fed694eaUL, 0x3c905b934a2ed254UL,
18137 ++ 0x10bb47692d3be467UL, 0x063b3d2d69e5e9e1UL,
18138 ++ /* 134 */ 0x472726eedda57debUL, 0xefb6c4ae10f41891UL,
18139 ++ 0x2b1641917b307614UL, 0x117c554fc4f45b7cUL,
18140 ++ /* 135 */ 0xc07cf3118f9d8812UL, 0x01dbd82050017939UL,
18141 ++ 0xd7e803f4171b2827UL, 0x1015e87487d225eaUL,
18142 ++ /* 136 */ 0xc58de3fed23acc4dUL, 0x50db91c294a7be2dUL,
18143 ++ 0x0b94d43d1c9cf457UL, 0x6b1640fa6e37524aUL,
18144 ++ /* 137 */ 0x692f346c5fda0d09UL, 0x200b1c59fa4d3151UL,
18145 ++ 0xb8c46f760777a296UL, 0x4b38395f3ffdfbcfUL,
18146 ++ /* 138 */ 0x18d25e00be54d671UL, 0x60d50582bec8aba6UL,
18147 ++ 0x87ad8f263b78b982UL, 0x50fdf64e9cda0432UL,
18148 ++ /* 139 */ 0x90f567aac578dcf0UL, 0xef1e9b0ef2a3133bUL,
18149 ++ 0x0eebba9242d9de71UL, 0x15473c9bf03101c7UL,
18150 ++ /* 140 */ 0x7c77e8ae56b78095UL, 0xb678e7666e6f078eUL,
18151 ++ 0x2da0b9615348ba1fUL, 0x7cf931c1ff733f0bUL,
18152 ++ /* 141 */ 0x26b357f50a0a366cUL, 0xe9708cf42b87d732UL,
18153 ++ 0xc13aeea5f91cb2c0UL, 0x35d90c991143bb4cUL,
18154 ++ /* 142 */ 0x47c1c404a9a0d9dcUL, 0x659e58451972d251UL,
18155 ++ 0x3875a8c473b38c31UL, 0x1fbd9ed379561f24UL,
18156 ++ /* 143 */ 0x11fabc6fd41ec28dUL, 0x7ef8dfe3cd2a2dcaUL,
18157 ++ 0x72e73b5d8c404595UL, 0x6135fa4954b72f27UL,
18158 ++ /* 144 */ 0xccfc32a2de24b69cUL, 0x3f55698c1f095d88UL,
18159 ++ 0xbe3350ed5ac3f929UL, 0x5e9bf806ca477eebUL,
18160 ++ /* 145 */ 0xe9ce8fb63c309f68UL, 0x5376f63565e1f9f4UL,
18161 ++ 0xd1afcfb35a6393f1UL, 0x6632a1ede5623506UL,
18162 ++ /* 146 */ 0x0b7d6c390c2ded4cUL, 0x56cb3281df04cb1fUL,
18163 ++ 0x66305a1249ecc3c7UL, 0x5d588b60a38ca72aUL,
18164 ++ /* 147 */ 0xa6ecbf78e8e5f42dUL, 0x86eeb44b3c8a3eecUL,
18165 ++ 0xec219c48fbd21604UL, 0x1aaf1af517c36731UL,
18166 ++ /* 148 */ 0xc306a2836769bde7UL, 0x208280622b1e2adbUL,
18167 ++ 0x8027f51ffbff94a6UL, 0x76cfa1ce1124f26bUL,
18168 ++ /* 149 */ 0x18eb00562422abb6UL, 0xf377c4d58f8c29c3UL,
18169 ++ 0x4dbbc207f531561aUL, 0x0253b7f082128a27UL,
18170 ++ /* 150 */ 0x3d1f091cb62c17e0UL, 0x4860e1abd64628a9UL,
18171 ++ 0x52d17436309d4253UL, 0x356f97e13efae576UL,
18172 ++ /* 151 */ 0xd351e11aa150535bUL, 0x3e6b45bb1dd878ccUL,
18173 ++ 0x0c776128bed92c98UL, 0x1d34ae93032885b8UL,
18174 ++ /* 152 */ 0x4ba0488ca85ba4c3UL, 0x985348c33c9ce6ceUL,
18175 ++ 0x66124c6f97bda770UL, 0x0f81a0290654124aUL,
18176 ++ /* 153 */ 0x9ed09ca6569b86fdUL, 0x811009fd18af9a2dUL,
18177 ++ 0xff08d03f93d8c20aUL, 0x52a148199faef26bUL,
18178 ++ /* 154 */ 0x3e03f9dc2d8d1b73UL, 0x4205801873961a70UL,
18179 ++ 0xc0d987f041a35970UL, 0x07aa1f15a1c0d549UL,
18180 ++ /* 155 */ 0xdfd46ce08cd27224UL, 0x6d0a024f934e4239UL,
18181 ++ 0x808a7a6399897b59UL, 0x0a4556e9e13d95a2UL,
18182 ++ /* 156 */ 0xd21a991fe9c13045UL, 0x9b0e8548fe7751b8UL,
18183 ++ 0x5da643cb4bf30035UL, 0x77db28d63940f721UL,
18184 ++ /* 157 */ 0xfc5eeb614adc9011UL, 0x5229419ae8c411ebUL,
18185 ++ 0x9ec3e7787d1dcf74UL, 0x340d053e216e4cb5UL,
18186 ++ /* 158 */ 0xcac7af39b48df2b4UL, 0xc0faec2871a10a94UL,
18187 ++ 0x140a69245ca575edUL, 0x0cf1c37134273a4cUL,
18188 ++ /* 159 */ 0xc8ee306ac224b8a5UL, 0x57eaee7ccb4930b0UL,
18189 ++ 0xa1e806bdaacbe74fUL, 0x7d9a62742eeb657dUL,
18190 ++ /* 160 */ 0x9eb6b6ef546c4830UL, 0x885cca1fddb36e2eUL,
18191 ++ 0xe6b9f383ef0d7105UL, 0x58654fef9d2e0412UL,
18192 ++ /* 161 */ 0xa905c4ffbe0e8e26UL, 0x942de5df9b31816eUL,
18193 ++ 0x497d723f802e88e1UL, 0x30684dea602f408dUL,
18194 ++ /* 162 */ 0x21e5a278a3e6cb34UL, 0xaefb6e6f5b151dc4UL,
18195 ++ 0xb30b8e049d77ca15UL, 0x28c3c9cf53b98981UL,
18196 ++ /* 163 */ 0x287fb721556cdd2aUL, 0x0d317ca897022274UL,
18197 ++ 0x7468c7423a543258UL, 0x4a7f11464eb5642fUL,
18198 ++ /* 164 */ 0xa237a4774d193aa6UL, 0xd865986ea92129a1UL,
18199 ++ 0x24c515ecf87c1a88UL, 0x604003575f39f5ebUL,
18200 ++ /* 165 */ 0x47b9f189570a9b27UL, 0x2b98cede465e4b78UL,
18201 ++ 0x026df551dbb85c20UL, 0x74fcd91047e21901UL,
18202 ++ /* 166 */ 0x13e2a90a23c1bfa3UL, 0x0cb0074e478519f6UL,
18203 ++ 0x5ff1cbbe3af6cf44UL, 0x67fe5438be812dbeUL,
18204 ++ /* 167 */ 0xd13cf64fa40f05b0UL, 0x054dfb2f32283787UL,
18205 ++ 0x4173915b7f0d2aeaUL, 0x482f144f1f610d4eUL,
18206 ++ /* 168 */ 0xf6210201b47f8234UL, 0x5d0ae1929e70b990UL,
18207 ++ 0xdcd7f455b049567cUL, 0x7e93d0f1f0916f01UL,
18208 ++ /* 169 */ 0xdd79cbf18a7db4faUL, 0xbe8391bf6f74c62fUL,
18209 ++ 0x027145d14b8291bdUL, 0x585a73ea2cbf1705UL,
18210 ++ /* 170 */ 0x485ca03e928a0db2UL, 0x10fc01a5742857e7UL,
18211 ++ 0x2f482edbd6d551a7UL, 0x0f0433b5048fdb8aUL,
18212 ++ /* 171 */ 0x60da2e8dd7dc6247UL, 0x88b4c9d38cd4819aUL,
18213 ++ 0x13033ac001f66697UL, 0x273b24fe3b367d75UL,
18214 ++ /* 172 */ 0xc6e8f66a31b3b9d4UL, 0x281514a494df49d5UL,
18215 ++ 0xd1726fdfc8b23da7UL, 0x4b3ae7d103dee548UL,
18216 ++ /* 173 */ 0xc6256e19ce4b9d7eUL, 0xff5c5cf186e3c61cUL,
18217 ++ 0xacc63ca34b8ec145UL, 0x74621888fee66574UL,
18218 ++ /* 174 */ 0x956f409645290a1eUL, 0xef0bf8e3263a962eUL,
18219 ++ 0xed6a50eb5ec2647bUL, 0x0694283a9dca7502UL,
18220 ++ /* 175 */ 0x769b963643a2dcd1UL, 0x42b7c8ea09fc5353UL,
18221 ++ 0x4f002aee13397eabUL, 0x63005e2c19b7d63aUL,
18222 ++ /* 176 */ 0xca6736da63023beaUL, 0x966c7f6db12a99b7UL,
18223 ++ 0xace09390c537c5e1UL, 0x0b696063a1aa89eeUL,
18224 ++ /* 177 */ 0xebb03e97288c56e5UL, 0x432a9f9f938c8be8UL,
18225 ++ 0xa6a5a93d5b717f71UL, 0x1a5fb4c3e18f9d97UL,
18226 ++ /* 178 */ 0x1c94e7ad1c60cdceUL, 0xee202a43fc02c4a0UL,
18227 ++ 0x8dafe4d867c46a20UL, 0x0a10263c8ac27b58UL,
18228 ++ /* 179 */ 0xd0dea9dfe4432a4aUL, 0x856af87bbe9277c5UL,
18229 ++ 0xce8472acc212c71aUL, 0x6f151b6d9bbb1e91UL,
18230 ++ /* 180 */ 0x26776c527ceed56aUL, 0x7d211cb7fbf8faecUL,
18231 ++ 0x37ae66a6fd4609ccUL, 0x1f81b702d2770c42UL,
18232 ++ /* 181 */ 0x2fb0b057eac58392UL, 0xe1dd89fe29744e9dUL,
18233 ++ 0xc964f8eb17beb4f8UL, 0x29571073c9a2d41eUL,
18234 ++ /* 182 */ 0xa948a18981c0e254UL, 0x2df6369b65b22830UL,
18235 ++ 0xa33eb2d75fcfd3c6UL, 0x078cd6ec4199a01fUL,
18236 ++ /* 183 */ 0x4a584a41ad900d2fUL, 0x32142b78e2c74c52UL,
18237 ++ 0x68c4e8338431c978UL, 0x7f69ea9008689fc2UL,
18238 ++ /* 184 */ 0x52f2c81e46a38265UL, 0xfd78072d04a832fdUL,
18239 ++ 0x8cd7d5fa25359e94UL, 0x4de71b7454cc29d2UL,
18240 ++ /* 185 */ 0x42eb60ad1eda6ac9UL, 0x0aad37dfdbc09c3aUL,
18241 ++ 0x81004b71e33cc191UL, 0x44e6be345122803cUL,
18242 ++ /* 186 */ 0x03fe8388ba1920dbUL, 0xf5d57c32150db008UL,
18243 ++ 0x49c8c4281af60c29UL, 0x21edb518de701aeeUL,
18244 ++ /* 187 */ 0x7fb63e418f06dc99UL, 0xa4460d99c166d7b8UL,
18245 ++ 0x24dd5248ce520a83UL, 0x5ec3ad712b928358UL,
18246 ++ /* 188 */ 0x15022a5fbd17930fUL, 0xa4f64a77d82570e3UL,
18247 ++ 0x12bc8d6915783712UL, 0x498194c0fc620abbUL,
18248 ++ /* 189 */ 0x38a2d9d255686c82UL, 0x785c6bd9193e21f0UL,
18249 ++ 0xe4d5c81ab24a5484UL, 0x56307860b2e20989UL,
18250 ++ /* 190 */ 0x429d55f78b4d74c4UL, 0x22f1834643350131UL,
18251 ++ 0x1e60c24598c71fffUL, 0x59f2f014979983efUL,
18252 ++ /* 191 */ 0x46a47d56eb494a44UL, 0x3e22a854d636a18eUL,
18253 ++ 0xb346e15274491c3bUL, 0x2ceafd4e5390cde7UL,
18254 ++ /* 192 */ 0xba8a8538be0d6675UL, 0x4b9074bb50818e23UL,
18255 ++ 0xcbdab89085d304c3UL, 0x61a24fe0e56192c4UL,
18256 ++ /* 193 */ 0xcb7615e6db525bcbUL, 0xdd7d8c35a567e4caUL,
18257 ++ 0xe6b4153acafcdd69UL, 0x2d668e097f3c9766UL,
18258 ++ /* 194 */ 0xa57e7e265ce55ef0UL, 0x5d9f4e527cd4b967UL,
18259 ++ 0xfbc83606492fd1e5UL, 0x090d52beb7c3f7aeUL,
18260 ++ /* 195 */ 0x09b9515a1e7b4d7cUL, 0x1f266a2599da44c0UL,
18261 ++ 0xa1c49548e2c55504UL, 0x7ef04287126f15ccUL,
18262 ++ /* 196 */ 0xfed1659dbd30ef15UL, 0x8b4ab9eec4e0277bUL,
18263 ++ 0x884d6236a5df3291UL, 0x1fd96ea6bf5cf788UL,
18264 ++ /* 197 */ 0x42a161981f190d9aUL, 0x61d849507e6052c1UL,
18265 ++ 0x9fe113bf285a2cd5UL, 0x7c22d676dbad85d8UL,
18266 ++ /* 198 */ 0x82e770ed2bfbd27dUL, 0x4c05b2ece996f5a5UL,
18267 ++ 0xcd40a9c2b0900150UL, 0x5895319213d9bf64UL,
18268 ++ /* 199 */ 0xe7cc5d703fea2e08UL, 0xb50c491258e2188cUL,
18269 ++ 0xcce30baa48205bf0UL, 0x537c659ccfa32d62UL,
18270 ++ /* 200 */ 0x37b6623a98cfc088UL, 0xfe9bed1fa4d6aca4UL,
18271 ++ 0x04d29b8e56a8d1b0UL, 0x725f71c40b519575UL,
18272 ++ /* 201 */ 0x28c7f89cd0339ce6UL, 0x8367b14469ddc18bUL,
18273 ++ 0x883ada83a6a1652cUL, 0x585f1974034d6c17UL,
18274 ++ /* 202 */ 0x89cfb266f1b19188UL, 0xe63b4863e7c35217UL,
18275 ++ 0xd88c9da6b4c0526aUL, 0x3e035c9df0954635UL,
18276 ++ /* 203 */ 0xdd9d5412fb45de9dUL, 0xdd684532e4cff40dUL,
18277 ++ 0x4b5c999b151d671cUL, 0x2d8c2cc811e7f690UL,
18278 ++ /* 204 */ 0x7f54be1d90055d40UL, 0xa464c5df464aaf40UL,
18279 ++ 0x33979624f0e917beUL, 0x2c018dc527356b30UL,
18280 ++ /* 205 */ 0xa5415024e330b3d4UL, 0x73ff3d96691652d3UL,
18281 ++ 0x94ec42c4ef9b59f1UL, 0x0747201618d08e5aUL,
18282 ++ /* 206 */ 0x4d6ca48aca411c53UL, 0x66415f2fcfa66119UL,
18283 ++ 0x9c4dd40051e227ffUL, 0x59810bc09a02f7ebUL,
18284 ++ /* 207 */ 0x2a7eb171b3dc101dUL, 0x441c5ab99ffef68eUL,
18285 ++ 0x32025c9b93b359eaUL, 0x5e8ce0a71e9d112fUL,
18286 ++ /* 208 */ 0xbfcccb92429503fdUL, 0xd271ba752f095d55UL,
18287 ++ 0x345ead5e972d091eUL, 0x18c8df11a83103baUL,
18288 ++ /* 209 */ 0x90cd949a9aed0f4cUL, 0xc5d1f4cb6660e37eUL,
18289 ++ 0xb8cac52d56c52e0bUL, 0x6e42e400c5808e0dUL,
18290 ++ /* 210 */ 0xa3b46966eeaefd23UL, 0x0c4f1f0be39ecdcaUL,
18291 ++ 0x189dc8c9d683a51dUL, 0x51f27f054c09351bUL,
18292 ++ /* 211 */ 0x4c487ccd2a320682UL, 0x587ea95bb3df1c96UL,
18293 ++ 0xc8ccf79e555cb8e8UL, 0x547dc829a206d73dUL,
18294 ++ /* 212 */ 0xb822a6cd80c39b06UL, 0xe96d54732000d4c6UL,
18295 ++ 0x28535b6f91463b4dUL, 0x228f4660e2486e1dUL,
18296 ++ /* 213 */ 0x98799538de8d3abfUL, 0x8cd8330045ebca6eUL,
18297 ++ 0x79952a008221e738UL, 0x4322e1a7535cd2bbUL,
18298 ++ /* 214 */ 0xb114c11819d1801cUL, 0x2016e4d84f3f5ec7UL,
18299 ++ 0xdd0e2df409260f4cUL, 0x5ec362c0ae5f7266UL,
18300 ++ /* 215 */ 0xc0462b18b8b2b4eeUL, 0x7cc8d950274d1afbUL,
18301 ++ 0xf25f7105436b02d2UL, 0x43bbf8dcbff9ccd3UL,
18302 ++ /* 216 */ 0xb6ad1767a039e9dfUL, 0xb0714da8f69d3583UL,
18303 ++ 0x5e55fa18b42931f5UL, 0x4ed5558f33c60961UL,
18304 ++ /* 217 */ 0x1fe37901c647a5ddUL, 0x593ddf1f8081d357UL,
18305 ++ 0x0249a4fd813fd7a6UL, 0x69acca274e9caf61UL,
18306 ++ /* 218 */ 0x047ba3ea330721c9UL, 0x83423fc20e7e1ea0UL,
18307 ++ 0x1df4c0af01314a60UL, 0x09a62dab89289527UL,
18308 ++ /* 219 */ 0xa5b325a49cc6cb00UL, 0xe94b5dc654b56cb6UL,
18309 ++ 0x3be28779adc994a0UL, 0x4296e8f8ba3a4aadUL,
18310 ++ /* 220 */ 0x328689761e451eabUL, 0x2e4d598bff59594aUL,
18311 ++ 0x49b96853d7a7084aUL, 0x4980a319601420a8UL,
18312 ++ /* 221 */ 0x9565b9e12f552c42UL, 0x8a5318db7100fe96UL,
18313 ++ 0x05c90b4d43add0d7UL, 0x538b4cd66a5d4edaUL,
18314 ++ /* 222 */ 0xf4e94fc3e89f039fUL, 0x592c9af26f618045UL,
18315 ++ 0x08a36eb5fd4b9550UL, 0x25fffaf6c2ed1419UL,
18316 ++ /* 223 */ 0x34434459cc79d354UL, 0xeeecbfb4b1d5476bUL,
18317 ++ 0xddeb34a061615d99UL, 0x5129cecceb64b773UL,
18318 ++ /* 224 */ 0xee43215894993520UL, 0x772f9c7cf14c0b3bUL,
18319 ++ 0xd2e2fce306bedad5UL, 0x715f42b546f06a97UL,
18320 ++ /* 225 */ 0x434ecdceda5b5f1aUL, 0x0da17115a49741a9UL,
18321 ++ 0x680bd77c73edad2eUL, 0x487c02354edd9041UL,
18322 ++ /* 226 */ 0xb8efeff3a70ed9c4UL, 0x56a32aa3e857e302UL,
18323 ++ 0xdf3a68bd48a2a5a0UL, 0x07f650b73176c444UL,
18324 ++ /* 227 */ 0xe38b9b1626e0ccb1UL, 0x79e053c18b09fb36UL,
18325 ++ 0x56d90319c9f94964UL, 0x1ca941e7ac9ff5c4UL,
18326 ++ /* 228 */ 0x49c4df29162fa0bbUL, 0x8488cf3282b33305UL,
18327 ++ 0x95dfda14cabb437dUL, 0x3391f78264d5ad86UL,
18328 ++ /* 229 */ 0x729ae06ae2b5095dUL, 0xd58a58d73259a946UL,
18329 ++ 0xe9834262d13921edUL, 0x27fedafaa54bb592UL,
18330 ++ /* 230 */ 0xa99dc5b829ad48bbUL, 0x5f025742499ee260UL,
18331 ++ 0x802c8ecd5d7513fdUL, 0x78ceb3ef3f6dd938UL,
18332 ++ /* 231 */ 0xc342f44f8a135d94UL, 0x7b9edb44828cdda3UL,
18333 ++ 0x9436d11a0537cfe7UL, 0x5064b164ec1ab4c8UL,
18334 ++ /* 232 */ 0x7020eccfd37eb2fcUL, 0x1f31ea3ed90d25fcUL,
18335 ++ 0x1b930d7bdfa1bb34UL, 0x5344467a48113044UL,
18336 ++ /* 233 */ 0x70073170f25e6dfbUL, 0xe385dc1a50114cc8UL,
18337 ++ 0x2348698ac8fc4f00UL, 0x2a77a55284dd40d8UL,
18338 ++ /* 234 */ 0xfe06afe0c98c6ce4UL, 0xc235df96dddfd6e4UL,
18339 ++ 0x1428d01e33bf1ed3UL, 0x785768ec9300bdafUL,
18340 ++ /* 235 */ 0x9702e57a91deb63bUL, 0x61bdb8bfe5ce8b80UL,
18341 ++ 0x645b426f3d1d58acUL, 0x4804a82227a557bcUL,
18342 ++ /* 236 */ 0x8e57048ab44d2601UL, 0x68d6501a4b3a6935UL,
18343 ++ 0xc39c9ec3f9e1c293UL, 0x4172f257d4de63e2UL,
18344 ++ /* 237 */ 0xd368b450330c6401UL, 0x040d3017418f2391UL,
18345 ++ 0x2c34bb6090b7d90dUL, 0x16f649228fdfd51fUL,
18346 ++ /* 238 */ 0xbea6818e2b928ef5UL, 0xe28ccf91cdc11e72UL,
18347 ++ 0x594aaa68e77a36cdUL, 0x313034806c7ffd0fUL,
18348 ++ /* 239 */ 0x8a9d27ac2249bd65UL, 0x19a3b464018e9512UL,
18349 ++ 0xc26ccff352b37ec7UL, 0x056f68341d797b21UL,
18350 ++ /* 240 */ 0x5e79d6757efd2327UL, 0xfabdbcb6553afe15UL,
18351 ++ 0xd3e7222c6eaf5a60UL, 0x7046c76d4dae743bUL,
18352 ++ /* 241 */ 0x660be872b18d4a55UL, 0x19992518574e1496UL,
18353 ++ 0xc103053a302bdcbbUL, 0x3ed8e9800b218e8eUL,
18354 ++ /* 242 */ 0x7b0b9239fa75e03eUL, 0xefe9fb684633c083UL,
18355 ++ 0x98a35fbe391a7793UL, 0x6065510fe2d0fe34UL,
18356 ++ /* 243 */ 0x55cb668548abad0cUL, 0xb4584548da87e527UL,
18357 ++ 0x2c43ecea0107c1ddUL, 0x526028809372de35UL,
18358 ++ /* 244 */ 0x3415c56af9213b1fUL, 0x5bee1a4d017e98dbUL,
18359 ++ 0x13f6b105b5cf709bUL, 0x5ff20e3482b29ab6UL,
18360 ++ /* 245 */ 0x0aa29c75cc2e6c90UL, 0xfc7d73ca3a70e206UL,
18361 ++ 0x899fc38fc4b5c515UL, 0x250386b124ffc207UL,
18362 ++ /* 246 */ 0x54ea28d5ae3d2b56UL, 0x9913149dd6de60ceUL,
18363 ++ 0x16694fc58f06d6c1UL, 0x46b23975eb018fc7UL,
18364 ++ /* 247 */ 0x470a6a0fb4b7b4e2UL, 0x5d92475a8f7253deUL,
18365 ++ 0xabeee5b52fbd3adbUL, 0x7fa20801a0806968UL,
18366 ++ /* 248 */ 0x76f3faf19f7714d2UL, 0xb3e840c12f4660c3UL,
18367 ++ 0x0fb4cd8df212744eUL, 0x4b065a251d3a2dd2UL,
18368 ++ /* 249 */ 0x5cebde383d77cd4aUL, 0x6adf39df882c9cb1UL,
18369 ++ 0xa2dd242eb09af759UL, 0x3147c0e50e5f6422UL,
18370 ++ /* 250 */ 0x164ca5101d1350dbUL, 0xf8d13479c33fc962UL,
18371 ++ 0xe640ce4d13e5da08UL, 0x4bdee0c45061f8baUL,
18372 ++ /* 251 */ 0xd7c46dc1a4edb1c9UL, 0x5514d7b6437fd98aUL,
18373 ++ 0x58942f6bb2a1c00bUL, 0x2dffb2ab1d70710eUL,
18374 ++ /* 252 */ 0xccdfcf2fc18b6d68UL, 0xa8ebcba8b7806167UL,
18375 ++ 0x980697f95e2937e3UL, 0x02fbba1cd0126e8cUL
18376 ++};
18377 ++
18378 ++/* c is two 512-bit products: c0[0:7]=a0[0:3]*b0[0:3] and c1[8:15]=a1[4:7]*b1[4:7]
18379 ++ * a is two 256-bit integers: a0[0:3] and a1[4:7]
18380 ++ * b is two 256-bit integers: b0[0:3] and b1[4:7]
18381 ++ */
18382 ++static void mul2_256x256_integer_adx(u64 *const c, const u64 *const a,
18383 ++ const u64 *const b)
18384 ++{
18385 ++ asm volatile(
18386 ++ "xorl %%r14d, %%r14d ;"
18387 ++ "movq (%1), %%rdx; " /* A[0] */
18388 ++ "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */
18389 ++ "xorl %%r10d, %%r10d ;"
18390 ++ "movq %%r8, (%0) ;"
18391 ++ "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */
18392 ++ "adox %%r10, %%r15 ;"
18393 ++ "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */
18394 ++ "adox %%r8, %%rax ;"
18395 ++ "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
18396 ++ "adox %%r10, %%rbx ;"
18397 ++ /******************************************/
18398 ++ "adox %%r14, %%rcx ;"
18399 ++
18400 ++ "movq 8(%1), %%rdx; " /* A[1] */
18401 ++ "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
18402 ++ "adox %%r15, %%r8 ;"
18403 ++ "movq %%r8, 8(%0) ;"
18404 ++ "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
18405 ++ "adox %%r10, %%r9 ;"
18406 ++ "adcx %%r9, %%rax ;"
18407 ++ "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */
18408 ++ "adox %%r8, %%r11 ;"
18409 ++ "adcx %%r11, %%rbx ;"
18410 ++ "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
18411 ++ "adox %%r10, %%r13 ;"
18412 ++ "adcx %%r13, %%rcx ;"
18413 ++ /******************************************/
18414 ++ "adox %%r14, %%r15 ;"
18415 ++ "adcx %%r14, %%r15 ;"
18416 ++
18417 ++ "movq 16(%1), %%rdx; " /* A[2] */
18418 ++ "xorl %%r10d, %%r10d ;"
18419 ++ "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
18420 ++ "adox %%rax, %%r8 ;"
18421 ++ "movq %%r8, 16(%0) ;"
18422 ++ "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
18423 ++ "adox %%r10, %%r9 ;"
18424 ++ "adcx %%r9, %%rbx ;"
18425 ++ "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */
18426 ++ "adox %%r8, %%r11 ;"
18427 ++ "adcx %%r11, %%rcx ;"
18428 ++ "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
18429 ++ "adox %%r10, %%r13 ;"
18430 ++ "adcx %%r13, %%r15 ;"
18431 ++ /******************************************/
18432 ++ "adox %%r14, %%rax ;"
18433 ++ "adcx %%r14, %%rax ;"
18434 ++
18435 ++ "movq 24(%1), %%rdx; " /* A[3] */
18436 ++ "xorl %%r10d, %%r10d ;"
18437 ++ "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
18438 ++ "adox %%rbx, %%r8 ;"
18439 ++ "movq %%r8, 24(%0) ;"
18440 ++ "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
18441 ++ "adox %%r10, %%r9 ;"
18442 ++ "adcx %%r9, %%rcx ;"
18443 ++ "movq %%rcx, 32(%0) ;"
18444 ++ "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */
18445 ++ "adox %%r8, %%r11 ;"
18446 ++ "adcx %%r11, %%r15 ;"
18447 ++ "movq %%r15, 40(%0) ;"
18448 ++ "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
18449 ++ "adox %%r10, %%r13 ;"
18450 ++ "adcx %%r13, %%rax ;"
18451 ++ "movq %%rax, 48(%0) ;"
18452 ++ /******************************************/
18453 ++ "adox %%r14, %%rbx ;"
18454 ++ "adcx %%r14, %%rbx ;"
18455 ++ "movq %%rbx, 56(%0) ;"
18456 ++
18457 ++ "movq 32(%1), %%rdx; " /* C[0] */
18458 ++ "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */
18459 ++ "xorl %%r10d, %%r10d ;"
18460 ++ "movq %%r8, 64(%0);"
18461 ++ "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */
18462 ++ "adox %%r10, %%r15 ;"
18463 ++ "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */
18464 ++ "adox %%r8, %%rax ;"
18465 ++ "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */
18466 ++ "adox %%r10, %%rbx ;"
18467 ++ /******************************************/
18468 ++ "adox %%r14, %%rcx ;"
18469 ++
18470 ++ "movq 40(%1), %%rdx; " /* C[1] */
18471 ++ "xorl %%r10d, %%r10d ;"
18472 ++ "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */
18473 ++ "adox %%r15, %%r8 ;"
18474 ++ "movq %%r8, 72(%0);"
18475 ++ "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */
18476 ++ "adox %%r10, %%r9 ;"
18477 ++ "adcx %%r9, %%rax ;"
18478 ++ "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */
18479 ++ "adox %%r8, %%r11 ;"
18480 ++ "adcx %%r11, %%rbx ;"
18481 ++ "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */
18482 ++ "adox %%r10, %%r13 ;"
18483 ++ "adcx %%r13, %%rcx ;"
18484 ++ /******************************************/
18485 ++ "adox %%r14, %%r15 ;"
18486 ++ "adcx %%r14, %%r15 ;"
18487 ++
18488 ++ "movq 48(%1), %%rdx; " /* C[2] */
18489 ++ "xorl %%r10d, %%r10d ;"
18490 ++ "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */
18491 ++ "adox %%rax, %%r8 ;"
18492 ++ "movq %%r8, 80(%0);"
18493 ++ "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */
18494 ++ "adox %%r10, %%r9 ;"
18495 ++ "adcx %%r9, %%rbx ;"
18496 ++ "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */
18497 ++ "adox %%r8, %%r11 ;"
18498 ++ "adcx %%r11, %%rcx ;"
18499 ++ "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */
18500 ++ "adox %%r10, %%r13 ;"
18501 ++ "adcx %%r13, %%r15 ;"
18502 ++ /******************************************/
18503 ++ "adox %%r14, %%rax ;"
18504 ++ "adcx %%r14, %%rax ;"
18505 ++
18506 ++ "movq 56(%1), %%rdx; " /* C[3] */
18507 ++ "xorl %%r10d, %%r10d ;"
18508 ++ "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */
18509 ++ "adox %%rbx, %%r8 ;"
18510 ++ "movq %%r8, 88(%0);"
18511 ++ "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */
18512 ++ "adox %%r10, %%r9 ;"
18513 ++ "adcx %%r9, %%rcx ;"
18514 ++ "movq %%rcx, 96(%0) ;"
18515 ++ "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */
18516 ++ "adox %%r8, %%r11 ;"
18517 ++ "adcx %%r11, %%r15 ;"
18518 ++ "movq %%r15, 104(%0) ;"
18519 ++ "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */
18520 ++ "adox %%r10, %%r13 ;"
18521 ++ "adcx %%r13, %%rax ;"
18522 ++ "movq %%rax, 112(%0) ;"
18523 ++ /******************************************/
18524 ++ "adox %%r14, %%rbx ;"
18525 ++ "adcx %%r14, %%rbx ;"
18526 ++ "movq %%rbx, 120(%0) ;"
18527 ++ :
18528 ++ : "r"(c), "r"(a), "r"(b)
18529 ++ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
18530 ++ "%r10", "%r11", "%r13", "%r14", "%r15");
18531 ++}
18532 ++
18533 ++static void mul2_256x256_integer_bmi2(u64 *const c, const u64 *const a,
18534 ++ const u64 *const b)
18535 ++{
18536 ++ asm volatile(
18537 ++ "movq (%1), %%rdx; " /* A[0] */
18538 ++ "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */
18539 ++ "movq %%r8, (%0) ;"
18540 ++ "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */
18541 ++ "addq %%r10, %%r15 ;"
18542 ++ "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */
18543 ++ "adcq %%r8, %%rax ;"
18544 ++ "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
18545 ++ "adcq %%r10, %%rbx ;"
18546 ++ /******************************************/
18547 ++ "adcq $0, %%rcx ;"
18548 ++
18549 ++ "movq 8(%1), %%rdx; " /* A[1] */
18550 ++ "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
18551 ++ "addq %%r15, %%r8 ;"
18552 ++ "movq %%r8, 8(%0) ;"
18553 ++ "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
18554 ++ "adcq %%r10, %%r9 ;"
18555 ++ "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */
18556 ++ "adcq %%r8, %%r11 ;"
18557 ++ "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
18558 ++ "adcq %%r10, %%r13 ;"
18559 ++ /******************************************/
18560 ++ "adcq $0, %%r15 ;"
18561 ++
18562 ++ "addq %%r9, %%rax ;"
18563 ++ "adcq %%r11, %%rbx ;"
18564 ++ "adcq %%r13, %%rcx ;"
18565 ++ "adcq $0, %%r15 ;"
18566 ++
18567 ++ "movq 16(%1), %%rdx; " /* A[2] */
18568 ++ "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
18569 ++ "addq %%rax, %%r8 ;"
18570 ++ "movq %%r8, 16(%0) ;"
18571 ++ "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
18572 ++ "adcq %%r10, %%r9 ;"
18573 ++ "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */
18574 ++ "adcq %%r8, %%r11 ;"
18575 ++ "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
18576 ++ "adcq %%r10, %%r13 ;"
18577 ++ /******************************************/
18578 ++ "adcq $0, %%rax ;"
18579 ++
18580 ++ "addq %%r9, %%rbx ;"
18581 ++ "adcq %%r11, %%rcx ;"
18582 ++ "adcq %%r13, %%r15 ;"
18583 ++ "adcq $0, %%rax ;"
18584 ++
18585 ++ "movq 24(%1), %%rdx; " /* A[3] */
18586 ++ "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
18587 ++ "addq %%rbx, %%r8 ;"
18588 ++ "movq %%r8, 24(%0) ;"
18589 ++ "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
18590 ++ "adcq %%r10, %%r9 ;"
18591 ++ "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */
18592 ++ "adcq %%r8, %%r11 ;"
18593 ++ "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
18594 ++ "adcq %%r10, %%r13 ;"
18595 ++ /******************************************/
18596 ++ "adcq $0, %%rbx ;"
18597 ++
18598 ++ "addq %%r9, %%rcx ;"
18599 ++ "movq %%rcx, 32(%0) ;"
18600 ++ "adcq %%r11, %%r15 ;"
18601 ++ "movq %%r15, 40(%0) ;"
18602 ++ "adcq %%r13, %%rax ;"
18603 ++ "movq %%rax, 48(%0) ;"
18604 ++ "adcq $0, %%rbx ;"
18605 ++ "movq %%rbx, 56(%0) ;"
18606 ++
18607 ++ "movq 32(%1), %%rdx; " /* C[0] */
18608 ++ "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */
18609 ++ "movq %%r8, 64(%0) ;"
18610 ++ "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */
18611 ++ "addq %%r10, %%r15 ;"
18612 ++ "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */
18613 ++ "adcq %%r8, %%rax ;"
18614 ++ "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */
18615 ++ "adcq %%r10, %%rbx ;"
18616 ++ /******************************************/
18617 ++ "adcq $0, %%rcx ;"
18618 ++
18619 ++ "movq 40(%1), %%rdx; " /* C[1] */
18620 ++ "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */
18621 ++ "addq %%r15, %%r8 ;"
18622 ++ "movq %%r8, 72(%0) ;"
18623 ++ "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */
18624 ++ "adcq %%r10, %%r9 ;"
18625 ++ "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */
18626 ++ "adcq %%r8, %%r11 ;"
18627 ++ "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */
18628 ++ "adcq %%r10, %%r13 ;"
18629 ++ /******************************************/
18630 ++ "adcq $0, %%r15 ;"
18631 ++
18632 ++ "addq %%r9, %%rax ;"
18633 ++ "adcq %%r11, %%rbx ;"
18634 ++ "adcq %%r13, %%rcx ;"
18635 ++ "adcq $0, %%r15 ;"
18636 ++
18637 ++ "movq 48(%1), %%rdx; " /* C[2] */
18638 ++ "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */
18639 ++ "addq %%rax, %%r8 ;"
18640 ++ "movq %%r8, 80(%0) ;"
18641 ++ "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */
18642 ++ "adcq %%r10, %%r9 ;"
18643 ++ "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */
18644 ++ "adcq %%r8, %%r11 ;"
18645 ++ "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */
18646 ++ "adcq %%r10, %%r13 ;"
18647 ++ /******************************************/
18648 ++ "adcq $0, %%rax ;"
18649 ++
18650 ++ "addq %%r9, %%rbx ;"
18651 ++ "adcq %%r11, %%rcx ;"
18652 ++ "adcq %%r13, %%r15 ;"
18653 ++ "adcq $0, %%rax ;"
18654 ++
18655 ++ "movq 56(%1), %%rdx; " /* C[3] */
18656 ++ "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */
18657 ++ "addq %%rbx, %%r8 ;"
18658 ++ "movq %%r8, 88(%0) ;"
18659 ++ "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */
18660 ++ "adcq %%r10, %%r9 ;"
18661 ++ "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */
18662 ++ "adcq %%r8, %%r11 ;"
18663 ++ "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */
18664 ++ "adcq %%r10, %%r13 ;"
18665 ++ /******************************************/
18666 ++ "adcq $0, %%rbx ;"
18667 ++
18668 ++ "addq %%r9, %%rcx ;"
18669 ++ "movq %%rcx, 96(%0) ;"
18670 ++ "adcq %%r11, %%r15 ;"
18671 ++ "movq %%r15, 104(%0) ;"
18672 ++ "adcq %%r13, %%rax ;"
18673 ++ "movq %%rax, 112(%0) ;"
18674 ++ "adcq $0, %%rbx ;"
18675 ++ "movq %%rbx, 120(%0) ;"
18676 ++ :
18677 ++ : "r"(c), "r"(a), "r"(b)
18678 ++ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
18679 ++ "%r10", "%r11", "%r13", "%r15");
18680 ++}
18681 ++
18682 ++static void sqr2_256x256_integer_adx(u64 *const c, const u64 *const a)
18683 ++{
18684 ++ asm volatile(
18685 ++ "movq (%1), %%rdx ;" /* A[0] */
18686 ++ "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */
18687 ++ "xorl %%r15d, %%r15d;"
18688 ++ "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */
18689 ++ "adcx %%r14, %%r9 ;"
18690 ++ "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */
18691 ++ "adcx %%rax, %%r10 ;"
18692 ++ "movq 24(%1), %%rdx ;" /* A[3] */
18693 ++ "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */
18694 ++ "adcx %%rcx, %%r11 ;"
18695 ++ "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */
18696 ++ "adcx %%rax, %%rbx ;"
18697 ++ "movq 8(%1), %%rdx ;" /* A[1] */
18698 ++ "adcx %%r15, %%r13 ;"
18699 ++ "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */
18700 ++ "movq $0, %%r14 ;"
18701 ++ /******************************************/
18702 ++ "adcx %%r15, %%r14 ;"
18703 ++
18704 ++ "xorl %%r15d, %%r15d;"
18705 ++ "adox %%rax, %%r10 ;"
18706 ++ "adcx %%r8, %%r8 ;"
18707 ++ "adox %%rcx, %%r11 ;"
18708 ++ "adcx %%r9, %%r9 ;"
18709 ++ "adox %%r15, %%rbx ;"
18710 ++ "adcx %%r10, %%r10 ;"
18711 ++ "adox %%r15, %%r13 ;"
18712 ++ "adcx %%r11, %%r11 ;"
18713 ++ "adox %%r15, %%r14 ;"
18714 ++ "adcx %%rbx, %%rbx ;"
18715 ++ "adcx %%r13, %%r13 ;"
18716 ++ "adcx %%r14, %%r14 ;"
18717 ++
18718 ++ "movq (%1), %%rdx ;"
18719 ++ "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
18720 ++ /*******************/
18721 ++ "movq %%rax, 0(%0) ;"
18722 ++ "addq %%rcx, %%r8 ;"
18723 ++ "movq %%r8, 8(%0) ;"
18724 ++ "movq 8(%1), %%rdx ;"
18725 ++ "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
18726 ++ "adcq %%rax, %%r9 ;"
18727 ++ "movq %%r9, 16(%0) ;"
18728 ++ "adcq %%rcx, %%r10 ;"
18729 ++ "movq %%r10, 24(%0) ;"
18730 ++ "movq 16(%1), %%rdx ;"
18731 ++ "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
18732 ++ "adcq %%rax, %%r11 ;"
18733 ++ "movq %%r11, 32(%0) ;"
18734 ++ "adcq %%rcx, %%rbx ;"
18735 ++ "movq %%rbx, 40(%0) ;"
18736 ++ "movq 24(%1), %%rdx ;"
18737 ++ "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
18738 ++ "adcq %%rax, %%r13 ;"
18739 ++ "movq %%r13, 48(%0) ;"
18740 ++ "adcq %%rcx, %%r14 ;"
18741 ++ "movq %%r14, 56(%0) ;"
18742 ++
18743 ++
18744 ++ "movq 32(%1), %%rdx ;" /* B[0] */
18745 ++ "mulx 40(%1), %%r8, %%r14 ;" /* B[1]*B[0] */
18746 ++ "xorl %%r15d, %%r15d;"
18747 ++ "mulx 48(%1), %%r9, %%r10 ;" /* B[2]*B[0] */
18748 ++ "adcx %%r14, %%r9 ;"
18749 ++ "mulx 56(%1), %%rax, %%rcx ;" /* B[3]*B[0] */
18750 ++ "adcx %%rax, %%r10 ;"
18751 ++ "movq 56(%1), %%rdx ;" /* B[3] */
18752 ++ "mulx 40(%1), %%r11, %%rbx ;" /* B[1]*B[3] */
18753 ++ "adcx %%rcx, %%r11 ;"
18754 ++ "mulx 48(%1), %%rax, %%r13 ;" /* B[2]*B[3] */
18755 ++ "adcx %%rax, %%rbx ;"
18756 ++ "movq 40(%1), %%rdx ;" /* B[1] */
18757 ++ "adcx %%r15, %%r13 ;"
18758 ++ "mulx 48(%1), %%rax, %%rcx ;" /* B[2]*B[1] */
18759 ++ "movq $0, %%r14 ;"
18760 ++ /******************************************/
18761 ++ "adcx %%r15, %%r14 ;"
18762 ++
18763 ++ "xorl %%r15d, %%r15d;"
18764 ++ "adox %%rax, %%r10 ;"
18765 ++ "adcx %%r8, %%r8 ;"
18766 ++ "adox %%rcx, %%r11 ;"
18767 ++ "adcx %%r9, %%r9 ;"
18768 ++ "adox %%r15, %%rbx ;"
18769 ++ "adcx %%r10, %%r10 ;"
18770 ++ "adox %%r15, %%r13 ;"
18771 ++ "adcx %%r11, %%r11 ;"
18772 ++ "adox %%r15, %%r14 ;"
18773 ++ "adcx %%rbx, %%rbx ;"
18774 ++ "adcx %%r13, %%r13 ;"
18775 ++ "adcx %%r14, %%r14 ;"
18776 ++
18777 ++ "movq 32(%1), %%rdx ;"
18778 ++ "mulx %%rdx, %%rax, %%rcx ;" /* B[0]^2 */
18779 ++ /*******************/
18780 ++ "movq %%rax, 64(%0) ;"
18781 ++ "addq %%rcx, %%r8 ;"
18782 ++ "movq %%r8, 72(%0) ;"
18783 ++ "movq 40(%1), %%rdx ;"
18784 ++ "mulx %%rdx, %%rax, %%rcx ;" /* B[1]^2 */
18785 ++ "adcq %%rax, %%r9 ;"
18786 ++ "movq %%r9, 80(%0) ;"
18787 ++ "adcq %%rcx, %%r10 ;"
18788 ++ "movq %%r10, 88(%0) ;"
18789 ++ "movq 48(%1), %%rdx ;"
18790 ++ "mulx %%rdx, %%rax, %%rcx ;" /* B[2]^2 */
18791 ++ "adcq %%rax, %%r11 ;"
18792 ++ "movq %%r11, 96(%0) ;"
18793 ++ "adcq %%rcx, %%rbx ;"
18794 ++ "movq %%rbx, 104(%0) ;"
18795 ++ "movq 56(%1), %%rdx ;"
18796 ++ "mulx %%rdx, %%rax, %%rcx ;" /* B[3]^2 */
18797 ++ "adcq %%rax, %%r13 ;"
18798 ++ "movq %%r13, 112(%0) ;"
18799 ++ "adcq %%rcx, %%r14 ;"
18800 ++ "movq %%r14, 120(%0) ;"
18801 ++ :
18802 ++ : "r"(c), "r"(a)
18803 ++ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
18804 ++ "%r10", "%r11", "%r13", "%r14", "%r15");
18805 ++}
18806 ++
18807 ++static void sqr2_256x256_integer_bmi2(u64 *const c, const u64 *const a)
18808 ++{
18809 ++ asm volatile(
18810 ++ "movq 8(%1), %%rdx ;" /* A[1] */
18811 ++ "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */
18812 ++ "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */
18813 ++ "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */
18814 ++
18815 ++ "movq 16(%1), %%rdx ;" /* A[2] */
18816 ++ "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */
18817 ++ "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */
18818 ++
18819 ++ "addq %%rax, %%r9 ;"
18820 ++ "adcq %%rdx, %%r10 ;"
18821 ++ "adcq %%rcx, %%r11 ;"
18822 ++ "adcq %%r14, %%r15 ;"
18823 ++ "adcq $0, %%r13 ;"
18824 ++ "movq $0, %%r14 ;"
18825 ++ "adcq $0, %%r14 ;"
18826 ++
18827 ++ "movq (%1), %%rdx ;" /* A[0] */
18828 ++ "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */
18829 ++
18830 ++ "addq %%rax, %%r10 ;"
18831 ++ "adcq %%rcx, %%r11 ;"
18832 ++ "adcq $0, %%r15 ;"
18833 ++ "adcq $0, %%r13 ;"
18834 ++ "adcq $0, %%r14 ;"
18835 ++
18836 ++ "shldq $1, %%r13, %%r14 ;"
18837 ++ "shldq $1, %%r15, %%r13 ;"
18838 ++ "shldq $1, %%r11, %%r15 ;"
18839 ++ "shldq $1, %%r10, %%r11 ;"
18840 ++ "shldq $1, %%r9, %%r10 ;"
18841 ++ "shldq $1, %%r8, %%r9 ;"
18842 ++ "shlq $1, %%r8 ;"
18843 ++
18844 ++ /*******************/
18845 ++ "mulx %%rdx, %%rax, %%rcx ; " /* A[0]^2 */
18846 ++ /*******************/
18847 ++ "movq %%rax, 0(%0) ;"
18848 ++ "addq %%rcx, %%r8 ;"
18849 ++ "movq %%r8, 8(%0) ;"
18850 ++ "movq 8(%1), %%rdx ;"
18851 ++ "mulx %%rdx, %%rax, %%rcx ; " /* A[1]^2 */
18852 ++ "adcq %%rax, %%r9 ;"
18853 ++ "movq %%r9, 16(%0) ;"
18854 ++ "adcq %%rcx, %%r10 ;"
18855 ++ "movq %%r10, 24(%0) ;"
18856 ++ "movq 16(%1), %%rdx ;"
18857 ++ "mulx %%rdx, %%rax, %%rcx ; " /* A[2]^2 */
18858 ++ "adcq %%rax, %%r11 ;"
18859 ++ "movq %%r11, 32(%0) ;"
18860 ++ "adcq %%rcx, %%r15 ;"
18861 ++ "movq %%r15, 40(%0) ;"
18862 ++ "movq 24(%1), %%rdx ;"
18863 ++ "mulx %%rdx, %%rax, %%rcx ; " /* A[3]^2 */
18864 ++ "adcq %%rax, %%r13 ;"
18865 ++ "movq %%r13, 48(%0) ;"
18866 ++ "adcq %%rcx, %%r14 ;"
18867 ++ "movq %%r14, 56(%0) ;"
18868 ++
18869 ++ "movq 40(%1), %%rdx ;" /* B[1] */
18870 ++ "mulx 32(%1), %%r8, %%r9 ;" /* B[0]*B[1] */
18871 ++ "mulx 48(%1), %%r10, %%r11 ;" /* B[2]*B[1] */
18872 ++ "mulx 56(%1), %%rcx, %%r14 ;" /* B[3]*B[1] */
18873 ++
18874 ++ "movq 48(%1), %%rdx ;" /* B[2] */
18875 ++ "mulx 56(%1), %%r15, %%r13 ;" /* B[3]*B[2] */
18876 ++ "mulx 32(%1), %%rax, %%rdx ;" /* B[0]*B[2] */
18877 ++
18878 ++ "addq %%rax, %%r9 ;"
18879 ++ "adcq %%rdx, %%r10 ;"
18880 ++ "adcq %%rcx, %%r11 ;"
18881 ++ "adcq %%r14, %%r15 ;"
18882 ++ "adcq $0, %%r13 ;"
18883 ++ "movq $0, %%r14 ;"
18884 ++ "adcq $0, %%r14 ;"
18885 ++
18886 ++ "movq 32(%1), %%rdx ;" /* B[0] */
18887 ++ "mulx 56(%1), %%rax, %%rcx ;" /* B[0]*B[3] */
18888 ++
18889 ++ "addq %%rax, %%r10 ;"
18890 ++ "adcq %%rcx, %%r11 ;"
18891 ++ "adcq $0, %%r15 ;"
18892 ++ "adcq $0, %%r13 ;"
18893 ++ "adcq $0, %%r14 ;"
18894 ++
18895 ++ "shldq $1, %%r13, %%r14 ;"
18896 ++ "shldq $1, %%r15, %%r13 ;"
18897 ++ "shldq $1, %%r11, %%r15 ;"
18898 ++ "shldq $1, %%r10, %%r11 ;"
18899 ++ "shldq $1, %%r9, %%r10 ;"
18900 ++ "shldq $1, %%r8, %%r9 ;"
18901 ++ "shlq $1, %%r8 ;"
18902 ++
18903 ++ /*******************/
18904 ++ "mulx %%rdx, %%rax, %%rcx ; " /* B[0]^2 */
18905 ++ /*******************/
18906 ++ "movq %%rax, 64(%0) ;"
18907 ++ "addq %%rcx, %%r8 ;"
18908 ++ "movq %%r8, 72(%0) ;"
18909 ++ "movq 40(%1), %%rdx ;"
18910 ++ "mulx %%rdx, %%rax, %%rcx ; " /* B[1]^2 */
18911 ++ "adcq %%rax, %%r9 ;"
18912 ++ "movq %%r9, 80(%0) ;"
18913 ++ "adcq %%rcx, %%r10 ;"
18914 ++ "movq %%r10, 88(%0) ;"
18915 ++ "movq 48(%1), %%rdx ;"
18916 ++ "mulx %%rdx, %%rax, %%rcx ; " /* B[2]^2 */
18917 ++ "adcq %%rax, %%r11 ;"
18918 ++ "movq %%r11, 96(%0) ;"
18919 ++ "adcq %%rcx, %%r15 ;"
18920 ++ "movq %%r15, 104(%0) ;"
18921 ++ "movq 56(%1), %%rdx ;"
18922 ++ "mulx %%rdx, %%rax, %%rcx ; " /* B[3]^2 */
18923 ++ "adcq %%rax, %%r13 ;"
18924 ++ "movq %%r13, 112(%0) ;"
18925 ++ "adcq %%rcx, %%r14 ;"
18926 ++ "movq %%r14, 120(%0) ;"
18927 ++ :
18928 ++ : "r"(c), "r"(a)
18929 ++ : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
18930 ++ "%r11", "%r13", "%r14", "%r15");
18931 ++}
18932 ++
18933 ++static void red_eltfp25519_2w_adx(u64 *const c, const u64 *const a)
18934 ++{
18935 ++ asm volatile(
18936 ++ "movl $38, %%edx; " /* 2*c = 38 = 2^256 */
18937 ++ "mulx 32(%1), %%r8, %%r10; " /* c*C[4] */
18938 ++ "xorl %%ebx, %%ebx ;"
18939 ++ "adox (%1), %%r8 ;"
18940 ++ "mulx 40(%1), %%r9, %%r11; " /* c*C[5] */
18941 ++ "adcx %%r10, %%r9 ;"
18942 ++ "adox 8(%1), %%r9 ;"
18943 ++ "mulx 48(%1), %%r10, %%rax; " /* c*C[6] */
18944 ++ "adcx %%r11, %%r10 ;"
18945 ++ "adox 16(%1), %%r10 ;"
18946 ++ "mulx 56(%1), %%r11, %%rcx; " /* c*C[7] */
18947 ++ "adcx %%rax, %%r11 ;"
18948 ++ "adox 24(%1), %%r11 ;"
18949 ++ /***************************************/
18950 ++ "adcx %%rbx, %%rcx ;"
18951 ++ "adox %%rbx, %%rcx ;"
18952 ++ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
18953 ++ "adcx %%rcx, %%r8 ;"
18954 ++ "adcx %%rbx, %%r9 ;"
18955 ++ "movq %%r9, 8(%0) ;"
18956 ++ "adcx %%rbx, %%r10 ;"
18957 ++ "movq %%r10, 16(%0) ;"
18958 ++ "adcx %%rbx, %%r11 ;"
18959 ++ "movq %%r11, 24(%0) ;"
18960 ++ "mov $0, %%ecx ;"
18961 ++ "cmovc %%edx, %%ecx ;"
18962 ++ "addq %%rcx, %%r8 ;"
18963 ++ "movq %%r8, (%0) ;"
18964 ++
18965 ++ "mulx 96(%1), %%r8, %%r10; " /* c*C[4] */
18966 ++ "xorl %%ebx, %%ebx ;"
18967 ++ "adox 64(%1), %%r8 ;"
18968 ++ "mulx 104(%1), %%r9, %%r11; " /* c*C[5] */
18969 ++ "adcx %%r10, %%r9 ;"
18970 ++ "adox 72(%1), %%r9 ;"
18971 ++ "mulx 112(%1), %%r10, %%rax; " /* c*C[6] */
18972 ++ "adcx %%r11, %%r10 ;"
18973 ++ "adox 80(%1), %%r10 ;"
18974 ++ "mulx 120(%1), %%r11, %%rcx; " /* c*C[7] */
18975 ++ "adcx %%rax, %%r11 ;"
18976 ++ "adox 88(%1), %%r11 ;"
18977 ++ /****************************************/
18978 ++ "adcx %%rbx, %%rcx ;"
18979 ++ "adox %%rbx, %%rcx ;"
18980 ++ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
18981 ++ "adcx %%rcx, %%r8 ;"
18982 ++ "adcx %%rbx, %%r9 ;"
18983 ++ "movq %%r9, 40(%0) ;"
18984 ++ "adcx %%rbx, %%r10 ;"
18985 ++ "movq %%r10, 48(%0) ;"
18986 ++ "adcx %%rbx, %%r11 ;"
18987 ++ "movq %%r11, 56(%0) ;"
18988 ++ "mov $0, %%ecx ;"
18989 ++ "cmovc %%edx, %%ecx ;"
18990 ++ "addq %%rcx, %%r8 ;"
18991 ++ "movq %%r8, 32(%0) ;"
18992 ++ :
18993 ++ : "r"(c), "r"(a)
18994 ++ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
18995 ++ "%r10", "%r11");
18996 ++}
18997 ++
18998 ++static void red_eltfp25519_2w_bmi2(u64 *const c, const u64 *const a)
18999 ++{
19000 ++ asm volatile(
19001 ++ "movl $38, %%edx ; " /* 2*c = 38 = 2^256 */
19002 ++ "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */
19003 ++ "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */
19004 ++ "addq %%r10, %%r9 ;"
19005 ++ "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
19006 ++ "adcq %%r11, %%r10 ;"
19007 ++ "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
19008 ++ "adcq %%rax, %%r11 ;"
19009 ++ /***************************************/
19010 ++ "adcq $0, %%rcx ;"
19011 ++ "addq (%1), %%r8 ;"
19012 ++ "adcq 8(%1), %%r9 ;"
19013 ++ "adcq 16(%1), %%r10 ;"
19014 ++ "adcq 24(%1), %%r11 ;"
19015 ++ "adcq $0, %%rcx ;"
19016 ++ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
19017 ++ "addq %%rcx, %%r8 ;"
19018 ++ "adcq $0, %%r9 ;"
19019 ++ "movq %%r9, 8(%0) ;"
19020 ++ "adcq $0, %%r10 ;"
19021 ++ "movq %%r10, 16(%0) ;"
19022 ++ "adcq $0, %%r11 ;"
19023 ++ "movq %%r11, 24(%0) ;"
19024 ++ "mov $0, %%ecx ;"
19025 ++ "cmovc %%edx, %%ecx ;"
19026 ++ "addq %%rcx, %%r8 ;"
19027 ++ "movq %%r8, (%0) ;"
19028 ++
19029 ++ "mulx 96(%1), %%r8, %%r10 ;" /* c*C[4] */
19030 ++ "mulx 104(%1), %%r9, %%r11 ;" /* c*C[5] */
19031 ++ "addq %%r10, %%r9 ;"
19032 ++ "mulx 112(%1), %%r10, %%rax ;" /* c*C[6] */
19033 ++ "adcq %%r11, %%r10 ;"
19034 ++ "mulx 120(%1), %%r11, %%rcx ;" /* c*C[7] */
19035 ++ "adcq %%rax, %%r11 ;"
19036 ++ /****************************************/
19037 ++ "adcq $0, %%rcx ;"
19038 ++ "addq 64(%1), %%r8 ;"
19039 ++ "adcq 72(%1), %%r9 ;"
19040 ++ "adcq 80(%1), %%r10 ;"
19041 ++ "adcq 88(%1), %%r11 ;"
19042 ++ "adcq $0, %%rcx ;"
19043 ++ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
19044 ++ "addq %%rcx, %%r8 ;"
19045 ++ "adcq $0, %%r9 ;"
19046 ++ "movq %%r9, 40(%0) ;"
19047 ++ "adcq $0, %%r10 ;"
19048 ++ "movq %%r10, 48(%0) ;"
19049 ++ "adcq $0, %%r11 ;"
19050 ++ "movq %%r11, 56(%0) ;"
19051 ++ "mov $0, %%ecx ;"
19052 ++ "cmovc %%edx, %%ecx ;"
19053 ++ "addq %%rcx, %%r8 ;"
19054 ++ "movq %%r8, 32(%0) ;"
19055 ++ :
19056 ++ : "r"(c), "r"(a)
19057 ++ : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
19058 ++ "%r11");
19059 ++}
19060 ++
19061 ++static void mul_256x256_integer_adx(u64 *const c, const u64 *const a,
19062 ++ const u64 *const b)
19063 ++{
19064 ++ asm volatile(
19065 ++ "movq (%1), %%rdx; " /* A[0] */
19066 ++ "mulx (%2), %%r8, %%r9; " /* A[0]*B[0] */
19067 ++ "xorl %%r10d, %%r10d ;"
19068 ++ "movq %%r8, (%0) ;"
19069 ++ "mulx 8(%2), %%r10, %%r11; " /* A[0]*B[1] */
19070 ++ "adox %%r9, %%r10 ;"
19071 ++ "movq %%r10, 8(%0) ;"
19072 ++ "mulx 16(%2), %%r15, %%r13; " /* A[0]*B[2] */
19073 ++ "adox %%r11, %%r15 ;"
19074 ++ "mulx 24(%2), %%r14, %%rdx; " /* A[0]*B[3] */
19075 ++ "adox %%r13, %%r14 ;"
19076 ++ "movq $0, %%rax ;"
19077 ++ /******************************************/
19078 ++ "adox %%rdx, %%rax ;"
19079 ++
19080 ++ "movq 8(%1), %%rdx; " /* A[1] */
19081 ++ "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
19082 ++ "xorl %%r10d, %%r10d ;"
19083 ++ "adcx 8(%0), %%r8 ;"
19084 ++ "movq %%r8, 8(%0) ;"
19085 ++ "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
19086 ++ "adox %%r9, %%r10 ;"
19087 ++ "adcx %%r15, %%r10 ;"
19088 ++ "movq %%r10, 16(%0) ;"
19089 ++ "mulx 16(%2), %%r15, %%r13; " /* A[1]*B[2] */
19090 ++ "adox %%r11, %%r15 ;"
19091 ++ "adcx %%r14, %%r15 ;"
19092 ++ "movq $0, %%r8 ;"
19093 ++ "mulx 24(%2), %%r14, %%rdx; " /* A[1]*B[3] */
19094 ++ "adox %%r13, %%r14 ;"
19095 ++ "adcx %%rax, %%r14 ;"
19096 ++ "movq $0, %%rax ;"
19097 ++ /******************************************/
19098 ++ "adox %%rdx, %%rax ;"
19099 ++ "adcx %%r8, %%rax ;"
19100 ++
19101 ++ "movq 16(%1), %%rdx; " /* A[2] */
19102 ++ "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
19103 ++ "xorl %%r10d, %%r10d ;"
19104 ++ "adcx 16(%0), %%r8 ;"
19105 ++ "movq %%r8, 16(%0) ;"
19106 ++ "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
19107 ++ "adox %%r9, %%r10 ;"
19108 ++ "adcx %%r15, %%r10 ;"
19109 ++ "movq %%r10, 24(%0) ;"
19110 ++ "mulx 16(%2), %%r15, %%r13; " /* A[2]*B[2] */
19111 ++ "adox %%r11, %%r15 ;"
19112 ++ "adcx %%r14, %%r15 ;"
19113 ++ "movq $0, %%r8 ;"
19114 ++ "mulx 24(%2), %%r14, %%rdx; " /* A[2]*B[3] */
19115 ++ "adox %%r13, %%r14 ;"
19116 ++ "adcx %%rax, %%r14 ;"
19117 ++ "movq $0, %%rax ;"
19118 ++ /******************************************/
19119 ++ "adox %%rdx, %%rax ;"
19120 ++ "adcx %%r8, %%rax ;"
19121 ++
19122 ++ "movq 24(%1), %%rdx; " /* A[3] */
19123 ++ "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
19124 ++ "xorl %%r10d, %%r10d ;"
19125 ++ "adcx 24(%0), %%r8 ;"
19126 ++ "movq %%r8, 24(%0) ;"
19127 ++ "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
19128 ++ "adox %%r9, %%r10 ;"
19129 ++ "adcx %%r15, %%r10 ;"
19130 ++ "movq %%r10, 32(%0) ;"
19131 ++ "mulx 16(%2), %%r15, %%r13; " /* A[3]*B[2] */
19132 ++ "adox %%r11, %%r15 ;"
19133 ++ "adcx %%r14, %%r15 ;"
19134 ++ "movq %%r15, 40(%0) ;"
19135 ++ "movq $0, %%r8 ;"
19136 ++ "mulx 24(%2), %%r14, %%rdx; " /* A[3]*B[3] */
19137 ++ "adox %%r13, %%r14 ;"
19138 ++ "adcx %%rax, %%r14 ;"
19139 ++ "movq %%r14, 48(%0) ;"
19140 ++ "movq $0, %%rax ;"
19141 ++ /******************************************/
19142 ++ "adox %%rdx, %%rax ;"
19143 ++ "adcx %%r8, %%rax ;"
19144 ++ "movq %%rax, 56(%0) ;"
19145 ++ :
19146 ++ : "r"(c), "r"(a), "r"(b)
19147 ++ : "memory", "cc", "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11",
19148 ++ "%r13", "%r14", "%r15");
19149 ++}
19150 ++
19151 ++static void mul_256x256_integer_bmi2(u64 *const c, const u64 *const a,
19152 ++ const u64 *const b)
19153 ++{
19154 ++ asm volatile(
19155 ++ "movq (%1), %%rdx; " /* A[0] */
19156 ++ "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */
19157 ++ "movq %%r8, (%0) ;"
19158 ++ "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */
19159 ++ "addq %%r10, %%r15 ;"
19160 ++ "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */
19161 ++ "adcq %%r8, %%rax ;"
19162 ++ "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
19163 ++ "adcq %%r10, %%rbx ;"
19164 ++ /******************************************/
19165 ++ "adcq $0, %%rcx ;"
19166 ++
19167 ++ "movq 8(%1), %%rdx; " /* A[1] */
19168 ++ "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
19169 ++ "addq %%r15, %%r8 ;"
19170 ++ "movq %%r8, 8(%0) ;"
19171 ++ "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
19172 ++ "adcq %%r10, %%r9 ;"
19173 ++ "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */
19174 ++ "adcq %%r8, %%r11 ;"
19175 ++ "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
19176 ++ "adcq %%r10, %%r13 ;"
19177 ++ /******************************************/
19178 ++ "adcq $0, %%r15 ;"
19179 ++
19180 ++ "addq %%r9, %%rax ;"
19181 ++ "adcq %%r11, %%rbx ;"
19182 ++ "adcq %%r13, %%rcx ;"
19183 ++ "adcq $0, %%r15 ;"
19184 ++
19185 ++ "movq 16(%1), %%rdx; " /* A[2] */
19186 ++ "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
19187 ++ "addq %%rax, %%r8 ;"
19188 ++ "movq %%r8, 16(%0) ;"
19189 ++ "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
19190 ++ "adcq %%r10, %%r9 ;"
19191 ++ "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */
19192 ++ "adcq %%r8, %%r11 ;"
19193 ++ "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
19194 ++ "adcq %%r10, %%r13 ;"
19195 ++ /******************************************/
19196 ++ "adcq $0, %%rax ;"
19197 ++
19198 ++ "addq %%r9, %%rbx ;"
19199 ++ "adcq %%r11, %%rcx ;"
19200 ++ "adcq %%r13, %%r15 ;"
19201 ++ "adcq $0, %%rax ;"
19202 ++
19203 ++ "movq 24(%1), %%rdx; " /* A[3] */
19204 ++ "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
19205 ++ "addq %%rbx, %%r8 ;"
19206 ++ "movq %%r8, 24(%0) ;"
19207 ++ "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
19208 ++ "adcq %%r10, %%r9 ;"
19209 ++ "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */
19210 ++ "adcq %%r8, %%r11 ;"
19211 ++ "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
19212 ++ "adcq %%r10, %%r13 ;"
19213 ++ /******************************************/
19214 ++ "adcq $0, %%rbx ;"
19215 ++
19216 ++ "addq %%r9, %%rcx ;"
19217 ++ "movq %%rcx, 32(%0) ;"
19218 ++ "adcq %%r11, %%r15 ;"
19219 ++ "movq %%r15, 40(%0) ;"
19220 ++ "adcq %%r13, %%rax ;"
19221 ++ "movq %%rax, 48(%0) ;"
19222 ++ "adcq $0, %%rbx ;"
19223 ++ "movq %%rbx, 56(%0) ;"
19224 ++ :
19225 ++ : "r"(c), "r"(a), "r"(b)
19226 ++ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
19227 ++ "%r10", "%r11", "%r13", "%r15");
19228 ++}
19229 ++
19230 ++static void sqr_256x256_integer_adx(u64 *const c, const u64 *const a)
19231 ++{
19232 ++ asm volatile(
19233 ++ "movq (%1), %%rdx ;" /* A[0] */
19234 ++ "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */
19235 ++ "xorl %%r15d, %%r15d;"
19236 ++ "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */
19237 ++ "adcx %%r14, %%r9 ;"
19238 ++ "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */
19239 ++ "adcx %%rax, %%r10 ;"
19240 ++ "movq 24(%1), %%rdx ;" /* A[3] */
19241 ++ "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */
19242 ++ "adcx %%rcx, %%r11 ;"
19243 ++ "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */
19244 ++ "adcx %%rax, %%rbx ;"
19245 ++ "movq 8(%1), %%rdx ;" /* A[1] */
19246 ++ "adcx %%r15, %%r13 ;"
19247 ++ "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */
19248 ++ "movq $0, %%r14 ;"
19249 ++ /******************************************/
19250 ++ "adcx %%r15, %%r14 ;"
19251 ++
19252 ++ "xorl %%r15d, %%r15d;"
19253 ++ "adox %%rax, %%r10 ;"
19254 ++ "adcx %%r8, %%r8 ;"
19255 ++ "adox %%rcx, %%r11 ;"
19256 ++ "adcx %%r9, %%r9 ;"
19257 ++ "adox %%r15, %%rbx ;"
19258 ++ "adcx %%r10, %%r10 ;"
19259 ++ "adox %%r15, %%r13 ;"
19260 ++ "adcx %%r11, %%r11 ;"
19261 ++ "adox %%r15, %%r14 ;"
19262 ++ "adcx %%rbx, %%rbx ;"
19263 ++ "adcx %%r13, %%r13 ;"
19264 ++ "adcx %%r14, %%r14 ;"
19265 ++
19266 ++ "movq (%1), %%rdx ;"
19267 ++ "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
19268 ++ /*******************/
19269 ++ "movq %%rax, 0(%0) ;"
19270 ++ "addq %%rcx, %%r8 ;"
19271 ++ "movq %%r8, 8(%0) ;"
19272 ++ "movq 8(%1), %%rdx ;"
19273 ++ "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
19274 ++ "adcq %%rax, %%r9 ;"
19275 ++ "movq %%r9, 16(%0) ;"
19276 ++ "adcq %%rcx, %%r10 ;"
19277 ++ "movq %%r10, 24(%0) ;"
19278 ++ "movq 16(%1), %%rdx ;"
19279 ++ "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
19280 ++ "adcq %%rax, %%r11 ;"
19281 ++ "movq %%r11, 32(%0) ;"
19282 ++ "adcq %%rcx, %%rbx ;"
19283 ++ "movq %%rbx, 40(%0) ;"
19284 ++ "movq 24(%1), %%rdx ;"
19285 ++ "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
19286 ++ "adcq %%rax, %%r13 ;"
19287 ++ "movq %%r13, 48(%0) ;"
19288 ++ "adcq %%rcx, %%r14 ;"
19289 ++ "movq %%r14, 56(%0) ;"
19290 ++ :
19291 ++ : "r"(c), "r"(a)
19292 ++ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
19293 ++ "%r10", "%r11", "%r13", "%r14", "%r15");
19294 ++}
19295 ++
19296 ++static void sqr_256x256_integer_bmi2(u64 *const c, const u64 *const a)
19297 ++{
19298 ++ asm volatile(
19299 ++ "movq 8(%1), %%rdx ;" /* A[1] */
19300 ++ "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */
19301 ++ "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */
19302 ++ "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */
19303 ++
19304 ++ "movq 16(%1), %%rdx ;" /* A[2] */
19305 ++ "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */
19306 ++ "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */
19307 ++
19308 ++ "addq %%rax, %%r9 ;"
19309 ++ "adcq %%rdx, %%r10 ;"
19310 ++ "adcq %%rcx, %%r11 ;"
19311 ++ "adcq %%r14, %%r15 ;"
19312 ++ "adcq $0, %%r13 ;"
19313 ++ "movq $0, %%r14 ;"
19314 ++ "adcq $0, %%r14 ;"
19315 ++
19316 ++ "movq (%1), %%rdx ;" /* A[0] */
19317 ++ "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */
19318 ++
19319 ++ "addq %%rax, %%r10 ;"
19320 ++ "adcq %%rcx, %%r11 ;"
19321 ++ "adcq $0, %%r15 ;"
19322 ++ "adcq $0, %%r13 ;"
19323 ++ "adcq $0, %%r14 ;"
19324 ++
19325 ++ "shldq $1, %%r13, %%r14 ;"
19326 ++ "shldq $1, %%r15, %%r13 ;"
19327 ++ "shldq $1, %%r11, %%r15 ;"
19328 ++ "shldq $1, %%r10, %%r11 ;"
19329 ++ "shldq $1, %%r9, %%r10 ;"
19330 ++ "shldq $1, %%r8, %%r9 ;"
19331 ++ "shlq $1, %%r8 ;"
19332 ++
19333 ++ /*******************/
19334 ++ "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
19335 ++ /*******************/
19336 ++ "movq %%rax, 0(%0) ;"
19337 ++ "addq %%rcx, %%r8 ;"
19338 ++ "movq %%r8, 8(%0) ;"
19339 ++ "movq 8(%1), %%rdx ;"
19340 ++ "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
19341 ++ "adcq %%rax, %%r9 ;"
19342 ++ "movq %%r9, 16(%0) ;"
19343 ++ "adcq %%rcx, %%r10 ;"
19344 ++ "movq %%r10, 24(%0) ;"
19345 ++ "movq 16(%1), %%rdx ;"
19346 ++ "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
19347 ++ "adcq %%rax, %%r11 ;"
19348 ++ "movq %%r11, 32(%0) ;"
19349 ++ "adcq %%rcx, %%r15 ;"
19350 ++ "movq %%r15, 40(%0) ;"
19351 ++ "movq 24(%1), %%rdx ;"
19352 ++ "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
19353 ++ "adcq %%rax, %%r13 ;"
19354 ++ "movq %%r13, 48(%0) ;"
19355 ++ "adcq %%rcx, %%r14 ;"
19356 ++ "movq %%r14, 56(%0) ;"
19357 ++ :
19358 ++ : "r"(c), "r"(a)
19359 ++ : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
19360 ++ "%r11", "%r13", "%r14", "%r15");
19361 ++}
19362 ++
19363 ++static void red_eltfp25519_1w_adx(u64 *const c, const u64 *const a)
19364 ++{
19365 ++ asm volatile(
19366 ++ "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */
19367 ++ "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */
19368 ++ "xorl %%ebx, %%ebx ;"
19369 ++ "adox (%1), %%r8 ;"
19370 ++ "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */
19371 ++ "adcx %%r10, %%r9 ;"
19372 ++ "adox 8(%1), %%r9 ;"
19373 ++ "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
19374 ++ "adcx %%r11, %%r10 ;"
19375 ++ "adox 16(%1), %%r10 ;"
19376 ++ "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
19377 ++ "adcx %%rax, %%r11 ;"
19378 ++ "adox 24(%1), %%r11 ;"
19379 ++ /***************************************/
19380 ++ "adcx %%rbx, %%rcx ;"
19381 ++ "adox %%rbx, %%rcx ;"
19382 ++ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
19383 ++ "adcx %%rcx, %%r8 ;"
19384 ++ "adcx %%rbx, %%r9 ;"
19385 ++ "movq %%r9, 8(%0) ;"
19386 ++ "adcx %%rbx, %%r10 ;"
19387 ++ "movq %%r10, 16(%0) ;"
19388 ++ "adcx %%rbx, %%r11 ;"
19389 ++ "movq %%r11, 24(%0) ;"
19390 ++ "mov $0, %%ecx ;"
19391 ++ "cmovc %%edx, %%ecx ;"
19392 ++ "addq %%rcx, %%r8 ;"
19393 ++ "movq %%r8, (%0) ;"
19394 ++ :
19395 ++ : "r"(c), "r"(a)
19396 ++ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
19397 ++ "%r10", "%r11");
19398 ++}
19399 ++
19400 ++static void red_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a)
19401 ++{
19402 ++ asm volatile(
19403 ++ "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */
19404 ++ "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */
19405 ++ "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */
19406 ++ "addq %%r10, %%r9 ;"
19407 ++ "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
19408 ++ "adcq %%r11, %%r10 ;"
19409 ++ "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
19410 ++ "adcq %%rax, %%r11 ;"
19411 ++ /***************************************/
19412 ++ "adcq $0, %%rcx ;"
19413 ++ "addq (%1), %%r8 ;"
19414 ++ "adcq 8(%1), %%r9 ;"
19415 ++ "adcq 16(%1), %%r10 ;"
19416 ++ "adcq 24(%1), %%r11 ;"
19417 ++ "adcq $0, %%rcx ;"
19418 ++ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
19419 ++ "addq %%rcx, %%r8 ;"
19420 ++ "adcq $0, %%r9 ;"
19421 ++ "movq %%r9, 8(%0) ;"
19422 ++ "adcq $0, %%r10 ;"
19423 ++ "movq %%r10, 16(%0) ;"
19424 ++ "adcq $0, %%r11 ;"
19425 ++ "movq %%r11, 24(%0) ;"
19426 ++ "mov $0, %%ecx ;"
19427 ++ "cmovc %%edx, %%ecx ;"
19428 ++ "addq %%rcx, %%r8 ;"
19429 ++ "movq %%r8, (%0) ;"
19430 ++ :
19431 ++ : "r"(c), "r"(a)
19432 ++ : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
19433 ++ "%r11");
19434 ++}
19435 ++
19436 ++static __always_inline void
19437 ++add_eltfp25519_1w_adx(u64 *const c, const u64 *const a, const u64 *const b)
19438 ++{
19439 ++ asm volatile(
19440 ++ "mov $38, %%eax ;"
19441 ++ "xorl %%ecx, %%ecx ;"
19442 ++ "movq (%2), %%r8 ;"
19443 ++ "adcx (%1), %%r8 ;"
19444 ++ "movq 8(%2), %%r9 ;"
19445 ++ "adcx 8(%1), %%r9 ;"
19446 ++ "movq 16(%2), %%r10 ;"
19447 ++ "adcx 16(%1), %%r10 ;"
19448 ++ "movq 24(%2), %%r11 ;"
19449 ++ "adcx 24(%1), %%r11 ;"
19450 ++ "cmovc %%eax, %%ecx ;"
19451 ++ "xorl %%eax, %%eax ;"
19452 ++ "adcx %%rcx, %%r8 ;"
19453 ++ "adcx %%rax, %%r9 ;"
19454 ++ "movq %%r9, 8(%0) ;"
19455 ++ "adcx %%rax, %%r10 ;"
19456 ++ "movq %%r10, 16(%0) ;"
19457 ++ "adcx %%rax, %%r11 ;"
19458 ++ "movq %%r11, 24(%0) ;"
19459 ++ "mov $38, %%ecx ;"
19460 ++ "cmovc %%ecx, %%eax ;"
19461 ++ "addq %%rax, %%r8 ;"
19462 ++ "movq %%r8, (%0) ;"
19463 ++ :
19464 ++ : "r"(c), "r"(a), "r"(b)
19465 ++ : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
19466 ++}
19467 ++
19468 ++static __always_inline void
19469 ++add_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a, const u64 *const b)
19470 ++{
19471 ++ asm volatile(
19472 ++ "mov $38, %%eax ;"
19473 ++ "movq (%2), %%r8 ;"
19474 ++ "addq (%1), %%r8 ;"
19475 ++ "movq 8(%2), %%r9 ;"
19476 ++ "adcq 8(%1), %%r9 ;"
19477 ++ "movq 16(%2), %%r10 ;"
19478 ++ "adcq 16(%1), %%r10 ;"
19479 ++ "movq 24(%2), %%r11 ;"
19480 ++ "adcq 24(%1), %%r11 ;"
19481 ++ "mov $0, %%ecx ;"
19482 ++ "cmovc %%eax, %%ecx ;"
19483 ++ "addq %%rcx, %%r8 ;"
19484 ++ "adcq $0, %%r9 ;"
19485 ++ "movq %%r9, 8(%0) ;"
19486 ++ "adcq $0, %%r10 ;"
19487 ++ "movq %%r10, 16(%0) ;"
19488 ++ "adcq $0, %%r11 ;"
19489 ++ "movq %%r11, 24(%0) ;"
19490 ++ "mov $0, %%ecx ;"
19491 ++ "cmovc %%eax, %%ecx ;"
19492 ++ "addq %%rcx, %%r8 ;"
19493 ++ "movq %%r8, (%0) ;"
19494 ++ :
19495 ++ : "r"(c), "r"(a), "r"(b)
19496 ++ : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
19497 ++}
19498 ++
19499 ++static __always_inline void
19500 ++sub_eltfp25519_1w(u64 *const c, const u64 *const a, const u64 *const b)
19501 ++{
19502 ++ asm volatile(
19503 ++ "mov $38, %%eax ;"
19504 ++ "movq (%1), %%r8 ;"
19505 ++ "subq (%2), %%r8 ;"
19506 ++ "movq 8(%1), %%r9 ;"
19507 ++ "sbbq 8(%2), %%r9 ;"
19508 ++ "movq 16(%1), %%r10 ;"
19509 ++ "sbbq 16(%2), %%r10 ;"
19510 ++ "movq 24(%1), %%r11 ;"
19511 ++ "sbbq 24(%2), %%r11 ;"
19512 ++ "mov $0, %%ecx ;"
19513 ++ "cmovc %%eax, %%ecx ;"
19514 ++ "subq %%rcx, %%r8 ;"
19515 ++ "sbbq $0, %%r9 ;"
19516 ++ "movq %%r9, 8(%0) ;"
19517 ++ "sbbq $0, %%r10 ;"
19518 ++ "movq %%r10, 16(%0) ;"
19519 ++ "sbbq $0, %%r11 ;"
19520 ++ "movq %%r11, 24(%0) ;"
19521 ++ "mov $0, %%ecx ;"
19522 ++ "cmovc %%eax, %%ecx ;"
19523 ++ "subq %%rcx, %%r8 ;"
19524 ++ "movq %%r8, (%0) ;"
19525 ++ :
19526 ++ : "r"(c), "r"(a), "r"(b)
19527 ++ : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
19528 ++}
19529 ++
19530 ++/* Multiplication by a24 = (A+2)/4 = (486662+2)/4 = 121666 */
19531 ++static __always_inline void
19532 ++mul_a24_eltfp25519_1w(u64 *const c, const u64 *const a)
19533 ++{
19534 ++ const u64 a24 = 121666;
19535 ++ asm volatile(
19536 ++ "movq %2, %%rdx ;"
19537 ++ "mulx (%1), %%r8, %%r10 ;"
19538 ++ "mulx 8(%1), %%r9, %%r11 ;"
19539 ++ "addq %%r10, %%r9 ;"
19540 ++ "mulx 16(%1), %%r10, %%rax ;"
19541 ++ "adcq %%r11, %%r10 ;"
19542 ++ "mulx 24(%1), %%r11, %%rcx ;"
19543 ++ "adcq %%rax, %%r11 ;"
19544 ++ /**************************/
19545 ++ "adcq $0, %%rcx ;"
19546 ++ "movl $38, %%edx ;" /* 2*c = 38 = 2^256 mod 2^255-19*/
19547 ++ "imul %%rdx, %%rcx ;"
19548 ++ "addq %%rcx, %%r8 ;"
19549 ++ "adcq $0, %%r9 ;"
19550 ++ "movq %%r9, 8(%0) ;"
19551 ++ "adcq $0, %%r10 ;"
19552 ++ "movq %%r10, 16(%0) ;"
19553 ++ "adcq $0, %%r11 ;"
19554 ++ "movq %%r11, 24(%0) ;"
19555 ++ "mov $0, %%ecx ;"
19556 ++ "cmovc %%edx, %%ecx ;"
19557 ++ "addq %%rcx, %%r8 ;"
19558 ++ "movq %%r8, (%0) ;"
19559 ++ :
19560 ++ : "r"(c), "r"(a), "r"(a24)
19561 ++ : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
19562 ++ "%r11");
19563 ++}
19564 ++
19565 ++static void inv_eltfp25519_1w_adx(u64 *const c, const u64 *const a)
19566 ++{
19567 ++ struct {
19568 ++ eltfp25519_1w_buffer buffer;
19569 ++ eltfp25519_1w x0, x1, x2;
19570 ++ } __aligned(32) m;
19571 ++ u64 *T[4];
19572 ++
19573 ++ T[0] = m.x0;
19574 ++ T[1] = c; /* x^(-1) */
19575 ++ T[2] = m.x1;
19576 ++ T[3] = m.x2;
19577 ++
19578 ++ copy_eltfp25519_1w(T[1], a);
19579 ++ sqrn_eltfp25519_1w_adx(T[1], 1);
19580 ++ copy_eltfp25519_1w(T[2], T[1]);
19581 ++ sqrn_eltfp25519_1w_adx(T[2], 2);
19582 ++ mul_eltfp25519_1w_adx(T[0], a, T[2]);
19583 ++ mul_eltfp25519_1w_adx(T[1], T[1], T[0]);
19584 ++ copy_eltfp25519_1w(T[2], T[1]);
19585 ++ sqrn_eltfp25519_1w_adx(T[2], 1);
19586 ++ mul_eltfp25519_1w_adx(T[0], T[0], T[2]);
19587 ++ copy_eltfp25519_1w(T[2], T[0]);
19588 ++ sqrn_eltfp25519_1w_adx(T[2], 5);
19589 ++ mul_eltfp25519_1w_adx(T[0], T[0], T[2]);
19590 ++ copy_eltfp25519_1w(T[2], T[0]);
19591 ++ sqrn_eltfp25519_1w_adx(T[2], 10);
19592 ++ mul_eltfp25519_1w_adx(T[2], T[2], T[0]);
19593 ++ copy_eltfp25519_1w(T[3], T[2]);
19594 ++ sqrn_eltfp25519_1w_adx(T[3], 20);
19595 ++ mul_eltfp25519_1w_adx(T[3], T[3], T[2]);
19596 ++ sqrn_eltfp25519_1w_adx(T[3], 10);
19597 ++ mul_eltfp25519_1w_adx(T[3], T[3], T[0]);
19598 ++ copy_eltfp25519_1w(T[0], T[3]);
19599 ++ sqrn_eltfp25519_1w_adx(T[0], 50);
19600 ++ mul_eltfp25519_1w_adx(T[0], T[0], T[3]);
19601 ++ copy_eltfp25519_1w(T[2], T[0]);
19602 ++ sqrn_eltfp25519_1w_adx(T[2], 100);
19603 ++ mul_eltfp25519_1w_adx(T[2], T[2], T[0]);
19604 ++ sqrn_eltfp25519_1w_adx(T[2], 50);
19605 ++ mul_eltfp25519_1w_adx(T[2], T[2], T[3]);
19606 ++ sqrn_eltfp25519_1w_adx(T[2], 5);
19607 ++ mul_eltfp25519_1w_adx(T[1], T[1], T[2]);
19608 ++
19609 ++ memzero_explicit(&m, sizeof(m));
19610 ++}
19611 ++
19612 ++static void inv_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a)
19613 ++{
19614 ++ struct {
19615 ++ eltfp25519_1w_buffer buffer;
19616 ++ eltfp25519_1w x0, x1, x2;
19617 ++ } __aligned(32) m;
19618 ++ u64 *T[5];
19619 ++
19620 ++ T[0] = m.x0;
19621 ++ T[1] = c; /* x^(-1) */
19622 ++ T[2] = m.x1;
19623 ++ T[3] = m.x2;
19624 ++
19625 ++ copy_eltfp25519_1w(T[1], a);
19626 ++ sqrn_eltfp25519_1w_bmi2(T[1], 1);
19627 ++ copy_eltfp25519_1w(T[2], T[1]);
19628 ++ sqrn_eltfp25519_1w_bmi2(T[2], 2);
19629 ++ mul_eltfp25519_1w_bmi2(T[0], a, T[2]);
19630 ++ mul_eltfp25519_1w_bmi2(T[1], T[1], T[0]);
19631 ++ copy_eltfp25519_1w(T[2], T[1]);
19632 ++ sqrn_eltfp25519_1w_bmi2(T[2], 1);
19633 ++ mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]);
19634 ++ copy_eltfp25519_1w(T[2], T[0]);
19635 ++ sqrn_eltfp25519_1w_bmi2(T[2], 5);
19636 ++ mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]);
19637 ++ copy_eltfp25519_1w(T[2], T[0]);
19638 ++ sqrn_eltfp25519_1w_bmi2(T[2], 10);
19639 ++ mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]);
19640 ++ copy_eltfp25519_1w(T[3], T[2]);
19641 ++ sqrn_eltfp25519_1w_bmi2(T[3], 20);
19642 ++ mul_eltfp25519_1w_bmi2(T[3], T[3], T[2]);
19643 ++ sqrn_eltfp25519_1w_bmi2(T[3], 10);
19644 ++ mul_eltfp25519_1w_bmi2(T[3], T[3], T[0]);
19645 ++ copy_eltfp25519_1w(T[0], T[3]);
19646 ++ sqrn_eltfp25519_1w_bmi2(T[0], 50);
19647 ++ mul_eltfp25519_1w_bmi2(T[0], T[0], T[3]);
19648 ++ copy_eltfp25519_1w(T[2], T[0]);
19649 ++ sqrn_eltfp25519_1w_bmi2(T[2], 100);
19650 ++ mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]);
19651 ++ sqrn_eltfp25519_1w_bmi2(T[2], 50);
19652 ++ mul_eltfp25519_1w_bmi2(T[2], T[2], T[3]);
19653 ++ sqrn_eltfp25519_1w_bmi2(T[2], 5);
19654 ++ mul_eltfp25519_1w_bmi2(T[1], T[1], T[2]);
19655 ++
19656 ++ memzero_explicit(&m, sizeof(m));
19657 ++}
19658 ++
19659 ++/* Given c, a 256-bit number, fred_eltfp25519_1w updates c
19660 ++ * with a number such that 0 <= C < 2**255-19.
19661 ++ */
19662 ++static __always_inline void fred_eltfp25519_1w(u64 *const c)
19663 ++{
19664 ++ u64 tmp0 = 38, tmp1 = 19;
19665 ++ asm volatile(
19666 ++ "btrq $63, %3 ;" /* Put bit 255 in carry flag and clear */
19667 ++ "cmovncl %k5, %k4 ;" /* c[255] ? 38 : 19 */
19668 ++
19669 ++ /* Add either 19 or 38 to c */
19670 ++ "addq %4, %0 ;"
19671 ++ "adcq $0, %1 ;"
19672 ++ "adcq $0, %2 ;"
19673 ++ "adcq $0, %3 ;"
19674 ++
19675 ++ /* Test for bit 255 again; only triggered on overflow modulo 2^255-19 */
19676 ++ "movl $0, %k4 ;"
19677 ++ "cmovnsl %k5, %k4 ;" /* c[255] ? 0 : 19 */
19678 ++ "btrq $63, %3 ;" /* Clear bit 255 */
19679 ++
19680 ++ /* Subtract 19 if necessary */
19681 ++ "subq %4, %0 ;"
19682 ++ "sbbq $0, %1 ;"
19683 ++ "sbbq $0, %2 ;"
19684 ++ "sbbq $0, %3 ;"
19685 ++
19686 ++ : "+r"(c[0]), "+r"(c[1]), "+r"(c[2]), "+r"(c[3]), "+r"(tmp0),
19687 ++ "+r"(tmp1)
19688 ++ :
19689 ++ : "memory", "cc");
19690 ++}
19691 ++
19692 ++static __always_inline void cswap(u8 bit, u64 *const px, u64 *const py)
19693 ++{
19694 ++ u64 temp;
19695 ++ asm volatile(
19696 ++ "test %9, %9 ;"
19697 ++ "movq %0, %8 ;"
19698 ++ "cmovnzq %4, %0 ;"
19699 ++ "cmovnzq %8, %4 ;"
19700 ++ "movq %1, %8 ;"
19701 ++ "cmovnzq %5, %1 ;"
19702 ++ "cmovnzq %8, %5 ;"
19703 ++ "movq %2, %8 ;"
19704 ++ "cmovnzq %6, %2 ;"
19705 ++ "cmovnzq %8, %6 ;"
19706 ++ "movq %3, %8 ;"
19707 ++ "cmovnzq %7, %3 ;"
19708 ++ "cmovnzq %8, %7 ;"
19709 ++ : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3]),
19710 ++ "+r"(py[0]), "+r"(py[1]), "+r"(py[2]), "+r"(py[3]),
19711 ++ "=r"(temp)
19712 ++ : "r"(bit)
19713 ++ : "cc"
19714 ++ );
19715 ++}
19716 ++
19717 ++static __always_inline void cselect(u8 bit, u64 *const px, const u64 *const py)
19718 ++{
19719 ++ asm volatile(
19720 ++ "test %4, %4 ;"
19721 ++ "cmovnzq %5, %0 ;"
19722 ++ "cmovnzq %6, %1 ;"
19723 ++ "cmovnzq %7, %2 ;"
19724 ++ "cmovnzq %8, %3 ;"
19725 ++ : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3])
19726 ++ : "r"(bit), "rm"(py[0]), "rm"(py[1]), "rm"(py[2]), "rm"(py[3])
19727 ++ : "cc"
19728 ++ );
19729 ++}
19730 ++
19731 ++static void curve25519_adx(u8 shared[CURVE25519_KEY_SIZE],
19732 ++ const u8 private_key[CURVE25519_KEY_SIZE],
19733 ++ const u8 session_key[CURVE25519_KEY_SIZE])
19734 ++{
19735 ++ struct {
19736 ++ u64 buffer[4 * NUM_WORDS_ELTFP25519];
19737 ++ u64 coordinates[4 * NUM_WORDS_ELTFP25519];
19738 ++ u64 workspace[6 * NUM_WORDS_ELTFP25519];
19739 ++ u8 session[CURVE25519_KEY_SIZE];
19740 ++ u8 private[CURVE25519_KEY_SIZE];
19741 ++ } __aligned(32) m;
19742 ++
19743 ++ int i = 0, j = 0;
19744 ++ u64 prev = 0;
19745 ++ u64 *const X1 = (u64 *)m.session;
19746 ++ u64 *const key = (u64 *)m.private;
19747 ++ u64 *const Px = m.coordinates + 0;
19748 ++ u64 *const Pz = m.coordinates + 4;
19749 ++ u64 *const Qx = m.coordinates + 8;
19750 ++ u64 *const Qz = m.coordinates + 12;
19751 ++ u64 *const X2 = Qx;
19752 ++ u64 *const Z2 = Qz;
19753 ++ u64 *const X3 = Px;
19754 ++ u64 *const Z3 = Pz;
19755 ++ u64 *const X2Z2 = Qx;
19756 ++ u64 *const X3Z3 = Px;
19757 ++
19758 ++ u64 *const A = m.workspace + 0;
19759 ++ u64 *const B = m.workspace + 4;
19760 ++ u64 *const D = m.workspace + 8;
19761 ++ u64 *const C = m.workspace + 12;
19762 ++ u64 *const DA = m.workspace + 16;
19763 ++ u64 *const CB = m.workspace + 20;
19764 ++ u64 *const AB = A;
19765 ++ u64 *const DC = D;
19766 ++ u64 *const DACB = DA;
19767 ++
19768 ++ memcpy(m.private, private_key, sizeof(m.private));
19769 ++ memcpy(m.session, session_key, sizeof(m.session));
19770 ++
19771 ++ curve25519_clamp_secret(m.private);
19772 ++
19773 ++ /* As in the draft:
19774 ++ * When receiving such an array, implementations of curve25519
19775 ++ * MUST mask the most-significant bit in the final byte. This
19776 ++ * is done to preserve compatibility with point formats which
19777 ++ * reserve the sign bit for use in other protocols and to
19778 ++ * increase resistance to implementation fingerprinting
19779 ++ */
19780 ++ m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1;
19781 ++
19782 ++ copy_eltfp25519_1w(Px, X1);
19783 ++ setzero_eltfp25519_1w(Pz);
19784 ++ setzero_eltfp25519_1w(Qx);
19785 ++ setzero_eltfp25519_1w(Qz);
19786 ++
19787 ++ Pz[0] = 1;
19788 ++ Qx[0] = 1;
19789 ++
19790 ++ /* main-loop */
19791 ++ prev = 0;
19792 ++ j = 62;
19793 ++ for (i = 3; i >= 0; --i) {
19794 ++ while (j >= 0) {
19795 ++ u64 bit = (key[i] >> j) & 0x1;
19796 ++ u64 swap = bit ^ prev;
19797 ++ prev = bit;
19798 ++
19799 ++ add_eltfp25519_1w_adx(A, X2, Z2); /* A = (X2+Z2) */
19800 ++ sub_eltfp25519_1w(B, X2, Z2); /* B = (X2-Z2) */
19801 ++ add_eltfp25519_1w_adx(C, X3, Z3); /* C = (X3+Z3) */
19802 ++ sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */
19803 ++ mul_eltfp25519_2w_adx(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */
19804 ++
19805 ++ cselect(swap, A, C);
19806 ++ cselect(swap, B, D);
19807 ++
19808 ++ sqr_eltfp25519_2w_adx(AB); /* [AA|BB] = [A^2|B^2] */
19809 ++ add_eltfp25519_1w_adx(X3, DA, CB); /* X3 = (DA+CB) */
19810 ++ sub_eltfp25519_1w(Z3, DA, CB); /* Z3 = (DA-CB) */
19811 ++ sqr_eltfp25519_2w_adx(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */
19812 ++
19813 ++ copy_eltfp25519_1w(X2, B); /* X2 = B^2 */
19814 ++ sub_eltfp25519_1w(Z2, A, B); /* Z2 = E = AA-BB */
19815 ++
19816 ++ mul_a24_eltfp25519_1w(B, Z2); /* B = a24*E */
19817 ++ add_eltfp25519_1w_adx(B, B, X2); /* B = a24*E+B */
19818 ++ mul_eltfp25519_2w_adx(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */
19819 ++ mul_eltfp25519_1w_adx(Z3, Z3, X1); /* Z3 = Z3*X1 */
19820 ++ --j;
19821 ++ }
19822 ++ j = 63;
19823 ++ }
19824 ++
19825 ++ inv_eltfp25519_1w_adx(A, Qz);
19826 ++ mul_eltfp25519_1w_adx((u64 *)shared, Qx, A);
19827 ++ fred_eltfp25519_1w((u64 *)shared);
19828 ++
19829 ++ memzero_explicit(&m, sizeof(m));
19830 ++}
19831 ++
19832 ++static void curve25519_adx_base(u8 session_key[CURVE25519_KEY_SIZE],
19833 ++ const u8 private_key[CURVE25519_KEY_SIZE])
19834 ++{
19835 ++ struct {
19836 ++ u64 buffer[4 * NUM_WORDS_ELTFP25519];
19837 ++ u64 coordinates[4 * NUM_WORDS_ELTFP25519];
19838 ++ u64 workspace[4 * NUM_WORDS_ELTFP25519];
19839 ++ u8 private[CURVE25519_KEY_SIZE];
19840 ++ } __aligned(32) m;
19841 ++
19842 ++ const int ite[4] = { 64, 64, 64, 63 };
19843 ++ const int q = 3;
19844 ++ u64 swap = 1;
19845 ++
19846 ++ int i = 0, j = 0, k = 0;
19847 ++ u64 *const key = (u64 *)m.private;
19848 ++ u64 *const Ur1 = m.coordinates + 0;
19849 ++ u64 *const Zr1 = m.coordinates + 4;
19850 ++ u64 *const Ur2 = m.coordinates + 8;
19851 ++ u64 *const Zr2 = m.coordinates + 12;
19852 ++
19853 ++ u64 *const UZr1 = m.coordinates + 0;
19854 ++ u64 *const ZUr2 = m.coordinates + 8;
19855 ++
19856 ++ u64 *const A = m.workspace + 0;
19857 ++ u64 *const B = m.workspace + 4;
19858 ++ u64 *const C = m.workspace + 8;
19859 ++ u64 *const D = m.workspace + 12;
19860 ++
19861 ++ u64 *const AB = m.workspace + 0;
19862 ++ u64 *const CD = m.workspace + 8;
19863 ++
19864 ++ const u64 *const P = table_ladder_8k;
19865 ++
19866 ++ memcpy(m.private, private_key, sizeof(m.private));
19867 ++
19868 ++ curve25519_clamp_secret(m.private);
19869 ++
19870 ++ setzero_eltfp25519_1w(Ur1);
19871 ++ setzero_eltfp25519_1w(Zr1);
19872 ++ setzero_eltfp25519_1w(Zr2);
19873 ++ Ur1[0] = 1;
19874 ++ Zr1[0] = 1;
19875 ++ Zr2[0] = 1;
19876 ++
19877 ++ /* G-S */
19878 ++ Ur2[3] = 0x1eaecdeee27cab34UL;
19879 ++ Ur2[2] = 0xadc7a0b9235d48e2UL;
19880 ++ Ur2[1] = 0xbbf095ae14b2edf8UL;
19881 ++ Ur2[0] = 0x7e94e1fec82faabdUL;
19882 ++
19883 ++ /* main-loop */
19884 ++ j = q;
19885 ++ for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) {
19886 ++ while (j < ite[i]) {
19887 ++ u64 bit = (key[i] >> j) & 0x1;
19888 ++ k = (64 * i + j - q);
19889 ++ swap = swap ^ bit;
19890 ++ cswap(swap, Ur1, Ur2);
19891 ++ cswap(swap, Zr1, Zr2);
19892 ++ swap = bit;
19893 ++ /* Addition */
19894 ++ sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */
19895 ++ add_eltfp25519_1w_adx(A, Ur1, Zr1); /* A = Ur1+Zr1 */
19896 ++ mul_eltfp25519_1w_adx(C, &P[4 * k], B); /* C = M0-B */
19897 ++ sub_eltfp25519_1w(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */
19898 ++ add_eltfp25519_1w_adx(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */
19899 ++ sqr_eltfp25519_2w_adx(AB); /* A = A^2 | B = B^2 */
19900 ++ mul_eltfp25519_2w_adx(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */
19901 ++ ++j;
19902 ++ }
19903 ++ j = 0;
19904 ++ }
19905 ++
19906 ++ /* Doubling */
19907 ++ for (i = 0; i < q; ++i) {
19908 ++ add_eltfp25519_1w_adx(A, Ur1, Zr1); /* A = Ur1+Zr1 */
19909 ++ sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */
19910 ++ sqr_eltfp25519_2w_adx(AB); /* A = A**2 B = B**2 */
19911 ++ copy_eltfp25519_1w(C, B); /* C = B */
19912 ++ sub_eltfp25519_1w(B, A, B); /* B = A-B */
19913 ++ mul_a24_eltfp25519_1w(D, B); /* D = my_a24*B */
19914 ++ add_eltfp25519_1w_adx(D, D, C); /* D = D+C */
19915 ++ mul_eltfp25519_2w_adx(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */
19916 ++ }
19917 ++
19918 ++ /* Convert to affine coordinates */
19919 ++ inv_eltfp25519_1w_adx(A, Zr1);
19920 ++ mul_eltfp25519_1w_adx((u64 *)session_key, Ur1, A);
19921 ++ fred_eltfp25519_1w((u64 *)session_key);
19922 ++
19923 ++ memzero_explicit(&m, sizeof(m));
19924 ++}
19925 ++
19926 ++static void curve25519_bmi2(u8 shared[CURVE25519_KEY_SIZE],
19927 ++ const u8 private_key[CURVE25519_KEY_SIZE],
19928 ++ const u8 session_key[CURVE25519_KEY_SIZE])
19929 ++{
19930 ++ struct {
19931 ++ u64 buffer[4 * NUM_WORDS_ELTFP25519];
19932 ++ u64 coordinates[4 * NUM_WORDS_ELTFP25519];
19933 ++ u64 workspace[6 * NUM_WORDS_ELTFP25519];
19934 ++ u8 session[CURVE25519_KEY_SIZE];
19935 ++ u8 private[CURVE25519_KEY_SIZE];
19936 ++ } __aligned(32) m;
19937 ++
19938 ++ int i = 0, j = 0;
19939 ++ u64 prev = 0;
19940 ++ u64 *const X1 = (u64 *)m.session;
19941 ++ u64 *const key = (u64 *)m.private;
19942 ++ u64 *const Px = m.coordinates + 0;
19943 ++ u64 *const Pz = m.coordinates + 4;
19944 ++ u64 *const Qx = m.coordinates + 8;
19945 ++ u64 *const Qz = m.coordinates + 12;
19946 ++ u64 *const X2 = Qx;
19947 ++ u64 *const Z2 = Qz;
19948 ++ u64 *const X3 = Px;
19949 ++ u64 *const Z3 = Pz;
19950 ++ u64 *const X2Z2 = Qx;
19951 ++ u64 *const X3Z3 = Px;
19952 ++
19953 ++ u64 *const A = m.workspace + 0;
19954 ++ u64 *const B = m.workspace + 4;
19955 ++ u64 *const D = m.workspace + 8;
19956 ++ u64 *const C = m.workspace + 12;
19957 ++ u64 *const DA = m.workspace + 16;
19958 ++ u64 *const CB = m.workspace + 20;
19959 ++ u64 *const AB = A;
19960 ++ u64 *const DC = D;
19961 ++ u64 *const DACB = DA;
19962 ++
19963 ++ memcpy(m.private, private_key, sizeof(m.private));
19964 ++ memcpy(m.session, session_key, sizeof(m.session));
19965 ++
19966 ++ curve25519_clamp_secret(m.private);
19967 ++
19968 ++ /* As in the draft:
19969 ++ * When receiving such an array, implementations of curve25519
19970 ++ * MUST mask the most-significant bit in the final byte. This
19971 ++ * is done to preserve compatibility with point formats which
19972 ++ * reserve the sign bit for use in other protocols and to
19973 ++ * increase resistance to implementation fingerprinting
19974 ++ */
19975 ++ m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1;
19976 ++
19977 ++ copy_eltfp25519_1w(Px, X1);
19978 ++ setzero_eltfp25519_1w(Pz);
19979 ++ setzero_eltfp25519_1w(Qx);
19980 ++ setzero_eltfp25519_1w(Qz);
19981 ++
19982 ++ Pz[0] = 1;
19983 ++ Qx[0] = 1;
19984 ++
19985 ++ /* main-loop */
19986 ++ prev = 0;
19987 ++ j = 62;
19988 ++ for (i = 3; i >= 0; --i) {
19989 ++ while (j >= 0) {
19990 ++ u64 bit = (key[i] >> j) & 0x1;
19991 ++ u64 swap = bit ^ prev;
19992 ++ prev = bit;
19993 ++
19994 ++ add_eltfp25519_1w_bmi2(A, X2, Z2); /* A = (X2+Z2) */
19995 ++ sub_eltfp25519_1w(B, X2, Z2); /* B = (X2-Z2) */
19996 ++ add_eltfp25519_1w_bmi2(C, X3, Z3); /* C = (X3+Z3) */
19997 ++ sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */
19998 ++ mul_eltfp25519_2w_bmi2(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */
19999 ++
20000 ++ cselect(swap, A, C);
20001 ++ cselect(swap, B, D);
20002 ++
20003 ++ sqr_eltfp25519_2w_bmi2(AB); /* [AA|BB] = [A^2|B^2] */
20004 ++ add_eltfp25519_1w_bmi2(X3, DA, CB); /* X3 = (DA+CB) */
20005 ++ sub_eltfp25519_1w(Z3, DA, CB); /* Z3 = (DA-CB) */
20006 ++ sqr_eltfp25519_2w_bmi2(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */
20007 ++
20008 ++ copy_eltfp25519_1w(X2, B); /* X2 = B^2 */
20009 ++ sub_eltfp25519_1w(Z2, A, B); /* Z2 = E = AA-BB */
20010 ++
20011 ++ mul_a24_eltfp25519_1w(B, Z2); /* B = a24*E */
20012 ++ add_eltfp25519_1w_bmi2(B, B, X2); /* B = a24*E+B */
20013 ++ mul_eltfp25519_2w_bmi2(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */
20014 ++ mul_eltfp25519_1w_bmi2(Z3, Z3, X1); /* Z3 = Z3*X1 */
20015 ++ --j;
20016 ++ }
20017 ++ j = 63;
20018 ++ }
20019 ++
20020 ++ inv_eltfp25519_1w_bmi2(A, Qz);
20021 ++ mul_eltfp25519_1w_bmi2((u64 *)shared, Qx, A);
20022 ++ fred_eltfp25519_1w((u64 *)shared);
20023 ++
20024 ++ memzero_explicit(&m, sizeof(m));
20025 ++}
20026 ++
20027 ++static void curve25519_bmi2_base(u8 session_key[CURVE25519_KEY_SIZE],
20028 ++ const u8 private_key[CURVE25519_KEY_SIZE])
20029 ++{
20030 ++ struct {
20031 ++ u64 buffer[4 * NUM_WORDS_ELTFP25519];
20032 ++ u64 coordinates[4 * NUM_WORDS_ELTFP25519];
20033 ++ u64 workspace[4 * NUM_WORDS_ELTFP25519];
20034 ++ u8 private[CURVE25519_KEY_SIZE];
20035 ++ } __aligned(32) m;
20036 ++
20037 ++ const int ite[4] = { 64, 64, 64, 63 };
20038 ++ const int q = 3;
20039 ++ u64 swap = 1;
20040 ++
20041 ++ int i = 0, j = 0, k = 0;
20042 ++ u64 *const key = (u64 *)m.private;
20043 ++ u64 *const Ur1 = m.coordinates + 0;
20044 ++ u64 *const Zr1 = m.coordinates + 4;
20045 ++ u64 *const Ur2 = m.coordinates + 8;
20046 ++ u64 *const Zr2 = m.coordinates + 12;
20047 ++
20048 ++ u64 *const UZr1 = m.coordinates + 0;
20049 ++ u64 *const ZUr2 = m.coordinates + 8;
20050 ++
20051 ++ u64 *const A = m.workspace + 0;
20052 ++ u64 *const B = m.workspace + 4;
20053 ++ u64 *const C = m.workspace + 8;
20054 ++ u64 *const D = m.workspace + 12;
20055 ++
20056 ++ u64 *const AB = m.workspace + 0;
20057 ++ u64 *const CD = m.workspace + 8;
20058 ++
20059 ++ const u64 *const P = table_ladder_8k;
20060 ++
20061 ++ memcpy(m.private, private_key, sizeof(m.private));
20062 ++
20063 ++ curve25519_clamp_secret(m.private);
20064 ++
20065 ++ setzero_eltfp25519_1w(Ur1);
20066 ++ setzero_eltfp25519_1w(Zr1);
20067 ++ setzero_eltfp25519_1w(Zr2);
20068 ++ Ur1[0] = 1;
20069 ++ Zr1[0] = 1;
20070 ++ Zr2[0] = 1;
20071 ++
20072 ++ /* G-S */
20073 ++ Ur2[3] = 0x1eaecdeee27cab34UL;
20074 ++ Ur2[2] = 0xadc7a0b9235d48e2UL;
20075 ++ Ur2[1] = 0xbbf095ae14b2edf8UL;
20076 ++ Ur2[0] = 0x7e94e1fec82faabdUL;
20077 ++
20078 ++ /* main-loop */
20079 ++ j = q;
20080 ++ for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) {
20081 ++ while (j < ite[i]) {
20082 ++ u64 bit = (key[i] >> j) & 0x1;
20083 ++ k = (64 * i + j - q);
20084 ++ swap = swap ^ bit;
20085 ++ cswap(swap, Ur1, Ur2);
20086 ++ cswap(swap, Zr1, Zr2);
20087 ++ swap = bit;
20088 ++ /* Addition */
20089 ++ sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */
20090 ++ add_eltfp25519_1w_bmi2(A, Ur1, Zr1); /* A = Ur1+Zr1 */
20091 ++ mul_eltfp25519_1w_bmi2(C, &P[4 * k], B);/* C = M0-B */
20092 ++ sub_eltfp25519_1w(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */
20093 ++ add_eltfp25519_1w_bmi2(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */
20094 ++ sqr_eltfp25519_2w_bmi2(AB); /* A = A^2 | B = B^2 */
20095 ++ mul_eltfp25519_2w_bmi2(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */
20096 ++ ++j;
20097 ++ }
20098 ++ j = 0;
20099 ++ }
20100 ++
20101 ++ /* Doubling */
20102 ++ for (i = 0; i < q; ++i) {
20103 ++ add_eltfp25519_1w_bmi2(A, Ur1, Zr1); /* A = Ur1+Zr1 */
20104 ++ sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */
20105 ++ sqr_eltfp25519_2w_bmi2(AB); /* A = A**2 B = B**2 */
20106 ++ copy_eltfp25519_1w(C, B); /* C = B */
20107 ++ sub_eltfp25519_1w(B, A, B); /* B = A-B */
20108 ++ mul_a24_eltfp25519_1w(D, B); /* D = my_a24*B */
20109 ++ add_eltfp25519_1w_bmi2(D, D, C); /* D = D+C */
20110 ++ mul_eltfp25519_2w_bmi2(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */
20111 ++ }
20112 ++
20113 ++ /* Convert to affine coordinates */
20114 ++ inv_eltfp25519_1w_bmi2(A, Zr1);
20115 ++ mul_eltfp25519_1w_bmi2((u64 *)session_key, Ur1, A);
20116 ++ fred_eltfp25519_1w((u64 *)session_key);
20117 ++
20118 ++ memzero_explicit(&m, sizeof(m));
20119 ++}
20120 ++
20121 ++void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE],
20122 ++ const u8 secret[CURVE25519_KEY_SIZE],
20123 ++ const u8 basepoint[CURVE25519_KEY_SIZE])
20124 ++{
20125 ++ if (static_branch_likely(&curve25519_use_adx))
20126 ++ curve25519_adx(mypublic, secret, basepoint);
20127 ++ else if (static_branch_likely(&curve25519_use_bmi2))
20128 ++ curve25519_bmi2(mypublic, secret, basepoint);
20129 ++ else
20130 ++ curve25519_generic(mypublic, secret, basepoint);
20131 ++}
20132 ++EXPORT_SYMBOL(curve25519_arch);
20133 ++
20134 ++void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
20135 ++ const u8 secret[CURVE25519_KEY_SIZE])
20136 ++{
20137 ++ if (static_branch_likely(&curve25519_use_adx))
20138 ++ curve25519_adx_base(pub, secret);
20139 ++ else if (static_branch_likely(&curve25519_use_bmi2))
20140 ++ curve25519_bmi2_base(pub, secret);
20141 ++ else
20142 ++ curve25519_generic(pub, secret, curve25519_base_point);
20143 ++}
20144 ++EXPORT_SYMBOL(curve25519_base_arch);
20145 ++
20146 ++static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
20147 ++ unsigned int len)
20148 ++{
20149 ++ u8 *secret = kpp_tfm_ctx(tfm);
20150 ++
20151 ++ if (!len)
20152 ++ curve25519_generate_secret(secret);
20153 ++ else if (len == CURVE25519_KEY_SIZE &&
20154 ++ crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE))
20155 ++ memcpy(secret, buf, CURVE25519_KEY_SIZE);
20156 ++ else
20157 ++ return -EINVAL;
20158 ++ return 0;
20159 ++}
20160 ++
20161 ++static int curve25519_generate_public_key(struct kpp_request *req)
20162 ++{
20163 ++ struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
20164 ++ const u8 *secret = kpp_tfm_ctx(tfm);
20165 ++ u8 buf[CURVE25519_KEY_SIZE];
20166 ++ int copied, nbytes;
20167 ++
20168 ++ if (req->src)
20169 ++ return -EINVAL;
20170 ++
20171 ++ curve25519_base_arch(buf, secret);
20172 ++
20173 ++ /* might want less than we've got */
20174 ++ nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
20175 ++ copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
20176 ++ nbytes),
20177 ++ buf, nbytes);
20178 ++ if (copied != nbytes)
20179 ++ return -EINVAL;
20180 ++ return 0;
20181 ++}
20182 ++
20183 ++static int curve25519_compute_shared_secret(struct kpp_request *req)
20184 ++{
20185 ++ struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
20186 ++ const u8 *secret = kpp_tfm_ctx(tfm);
20187 ++ u8 public_key[CURVE25519_KEY_SIZE];
20188 ++ u8 buf[CURVE25519_KEY_SIZE];
20189 ++ int copied, nbytes;
20190 ++
20191 ++ if (!req->src)
20192 ++ return -EINVAL;
20193 ++
20194 ++ copied = sg_copy_to_buffer(req->src,
20195 ++ sg_nents_for_len(req->src,
20196 ++ CURVE25519_KEY_SIZE),
20197 ++ public_key, CURVE25519_KEY_SIZE);
20198 ++ if (copied != CURVE25519_KEY_SIZE)
20199 ++ return -EINVAL;
20200 ++
20201 ++ curve25519_arch(buf, secret, public_key);
20202 ++
20203 ++ /* might want less than we've got */
20204 ++ nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
20205 ++ copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
20206 ++ nbytes),
20207 ++ buf, nbytes);
20208 ++ if (copied != nbytes)
20209 ++ return -EINVAL;
20210 ++ return 0;
20211 ++}
20212 ++
20213 ++static unsigned int curve25519_max_size(struct crypto_kpp *tfm)
20214 ++{
20215 ++ return CURVE25519_KEY_SIZE;
20216 ++}
20217 ++
20218 ++static struct kpp_alg curve25519_alg = {
20219 ++ .base.cra_name = "curve25519",
20220 ++ .base.cra_driver_name = "curve25519-x86",
20221 ++ .base.cra_priority = 200,
20222 ++ .base.cra_module = THIS_MODULE,
20223 ++ .base.cra_ctxsize = CURVE25519_KEY_SIZE,
20224 ++
20225 ++ .set_secret = curve25519_set_secret,
20226 ++ .generate_public_key = curve25519_generate_public_key,
20227 ++ .compute_shared_secret = curve25519_compute_shared_secret,
20228 ++ .max_size = curve25519_max_size,
20229 ++};
20230 ++
20231 ++static int __init curve25519_mod_init(void)
20232 ++{
20233 ++ if (boot_cpu_has(X86_FEATURE_BMI2))
20234 ++ static_branch_enable(&curve25519_use_bmi2);
20235 ++ else if (boot_cpu_has(X86_FEATURE_ADX))
20236 ++ static_branch_enable(&curve25519_use_adx);
20237 ++ else
20238 ++ return 0;
20239 ++ return crypto_register_kpp(&curve25519_alg);
20240 ++}
20241 ++
20242 ++static void __exit curve25519_mod_exit(void)
20243 ++{
20244 ++ if (boot_cpu_has(X86_FEATURE_BMI2) ||
20245 ++ boot_cpu_has(X86_FEATURE_ADX))
20246 ++ crypto_unregister_kpp(&curve25519_alg);
20247 ++}
20248 ++
20249 ++module_init(curve25519_mod_init);
20250 ++module_exit(curve25519_mod_exit);
20251 ++
20252 ++MODULE_ALIAS_CRYPTO("curve25519");
20253 ++MODULE_ALIAS_CRYPTO("curve25519-x86");
20254 ++MODULE_LICENSE("GPL v2");
20255 +diff --git a/crypto/Kconfig b/crypto/Kconfig
20256 +index a3fc859830c1..b8b738bcc312 100644
20257 +--- a/crypto/Kconfig
20258 ++++ b/crypto/Kconfig
20259 +@@ -269,6 +269,12 @@ config CRYPTO_CURVE25519
20260 + select CRYPTO_KPP
20261 + select CRYPTO_LIB_CURVE25519_GENERIC
20262 +
20263 ++config CRYPTO_CURVE25519_X86
20264 ++ tristate "x86_64 accelerated Curve25519 scalar multiplication library"
20265 ++ depends on X86 && 64BIT
20266 ++ select CRYPTO_LIB_CURVE25519_GENERIC
20267 ++ select CRYPTO_ARCH_HAVE_LIB_CURVE25519
20268 ++
20269 + comment "Authenticated Encryption with Associated Data"
20270 +
20271 + config CRYPTO_CCM
20272 +--
20273 +cgit v1.2.3-4-ga26e
20274 +
20275 +
20276 +From 163090e6de9d77d38ccfc0ef1ca1e861def9587f Mon Sep 17 00:00:00 2001
20277 +From: "Jason A. Donenfeld" <Jason@×××××.com>
20278 +Date: Fri, 8 Nov 2019 13:22:37 +0100
20279 +Subject: crypto: arm/curve25519 - import Bernstein and Schwabe's Curve25519
20280 + ARM implementation
20281 +
20282 +commit f0fb006b604f98e2309a30f34ef455ac734f7c1c upstream.
20283 +
20284 +This comes from Dan Bernstein and Peter Schwabe's public domain NEON
20285 +code, and is included here in raw form so that subsequent commits that
20286 +fix these up for the kernel can see how it has changed. This code does
20287 +have some entirely cosmetic formatting differences, adding indentation
20288 +and so forth, so that when we actually port it for use in the kernel in
20289 +the subsequent commit, it's obvious what's changed in the process.
20290 +
20291 +This code originates from SUPERCOP 20180818, available at
20292 +<https://bench.cr.yp.to/supercop.html>.
20293 +
20294 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
20295 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
20296 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
20297 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
20298 +---
20299 + arch/arm/crypto/curve25519-core.S | 2105 +++++++++++++++++++++++++++++++++++++
20300 + 1 file changed, 2105 insertions(+)
20301 + create mode 100644 arch/arm/crypto/curve25519-core.S
20302 +
20303 +diff --git a/arch/arm/crypto/curve25519-core.S b/arch/arm/crypto/curve25519-core.S
20304 +new file mode 100644
20305 +index 000000000000..f33b85fef382
20306 +--- /dev/null
20307 ++++ b/arch/arm/crypto/curve25519-core.S
20308 +@@ -0,0 +1,2105 @@
20309 ++/*
20310 ++ * Public domain code from Daniel J. Bernstein and Peter Schwabe, from
20311 ++ * SUPERCOP's curve25519/neon2/scalarmult.s.
20312 ++ */
20313 ++
20314 ++.fpu neon
20315 ++.text
20316 ++.align 4
20317 ++.global _crypto_scalarmult_curve25519_neon2
20318 ++.global crypto_scalarmult_curve25519_neon2
20319 ++.type _crypto_scalarmult_curve25519_neon2 STT_FUNC
20320 ++.type crypto_scalarmult_curve25519_neon2 STT_FUNC
20321 ++ _crypto_scalarmult_curve25519_neon2:
20322 ++ crypto_scalarmult_curve25519_neon2:
20323 ++ vpush {q4, q5, q6, q7}
20324 ++ mov r12, sp
20325 ++ sub sp, sp, #736
20326 ++ and sp, sp, #0xffffffe0
20327 ++ strd r4, [sp, #0]
20328 ++ strd r6, [sp, #8]
20329 ++ strd r8, [sp, #16]
20330 ++ strd r10, [sp, #24]
20331 ++ str r12, [sp, #480]
20332 ++ str r14, [sp, #484]
20333 ++ mov r0, r0
20334 ++ mov r1, r1
20335 ++ mov r2, r2
20336 ++ add r3, sp, #32
20337 ++ ldr r4, =0
20338 ++ ldr r5, =254
20339 ++ vmov.i32 q0, #1
20340 ++ vshr.u64 q1, q0, #7
20341 ++ vshr.u64 q0, q0, #8
20342 ++ vmov.i32 d4, #19
20343 ++ vmov.i32 d5, #38
20344 ++ add r6, sp, #512
20345 ++ vst1.8 {d2-d3}, [r6, : 128]
20346 ++ add r6, sp, #528
20347 ++ vst1.8 {d0-d1}, [r6, : 128]
20348 ++ add r6, sp, #544
20349 ++ vst1.8 {d4-d5}, [r6, : 128]
20350 ++ add r6, r3, #0
20351 ++ vmov.i32 q2, #0
20352 ++ vst1.8 {d4-d5}, [r6, : 128]!
20353 ++ vst1.8 {d4-d5}, [r6, : 128]!
20354 ++ vst1.8 d4, [r6, : 64]
20355 ++ add r6, r3, #0
20356 ++ ldr r7, =960
20357 ++ sub r7, r7, #2
20358 ++ neg r7, r7
20359 ++ sub r7, r7, r7, LSL #7
20360 ++ str r7, [r6]
20361 ++ add r6, sp, #704
20362 ++ vld1.8 {d4-d5}, [r1]!
20363 ++ vld1.8 {d6-d7}, [r1]
20364 ++ vst1.8 {d4-d5}, [r6, : 128]!
20365 ++ vst1.8 {d6-d7}, [r6, : 128]
20366 ++ sub r1, r6, #16
20367 ++ ldrb r6, [r1]
20368 ++ and r6, r6, #248
20369 ++ strb r6, [r1]
20370 ++ ldrb r6, [r1, #31]
20371 ++ and r6, r6, #127
20372 ++ orr r6, r6, #64
20373 ++ strb r6, [r1, #31]
20374 ++ vmov.i64 q2, #0xffffffff
20375 ++ vshr.u64 q3, q2, #7
20376 ++ vshr.u64 q2, q2, #6
20377 ++ vld1.8 {d8}, [r2]
20378 ++ vld1.8 {d10}, [r2]
20379 ++ add r2, r2, #6
20380 ++ vld1.8 {d12}, [r2]
20381 ++ vld1.8 {d14}, [r2]
20382 ++ add r2, r2, #6
20383 ++ vld1.8 {d16}, [r2]
20384 ++ add r2, r2, #4
20385 ++ vld1.8 {d18}, [r2]
20386 ++ vld1.8 {d20}, [r2]
20387 ++ add r2, r2, #6
20388 ++ vld1.8 {d22}, [r2]
20389 ++ add r2, r2, #2
20390 ++ vld1.8 {d24}, [r2]
20391 ++ vld1.8 {d26}, [r2]
20392 ++ vshr.u64 q5, q5, #26
20393 ++ vshr.u64 q6, q6, #3
20394 ++ vshr.u64 q7, q7, #29
20395 ++ vshr.u64 q8, q8, #6
20396 ++ vshr.u64 q10, q10, #25
20397 ++ vshr.u64 q11, q11, #3
20398 ++ vshr.u64 q12, q12, #12
20399 ++ vshr.u64 q13, q13, #38
20400 ++ vand q4, q4, q2
20401 ++ vand q6, q6, q2
20402 ++ vand q8, q8, q2
20403 ++ vand q10, q10, q2
20404 ++ vand q2, q12, q2
20405 ++ vand q5, q5, q3
20406 ++ vand q7, q7, q3
20407 ++ vand q9, q9, q3
20408 ++ vand q11, q11, q3
20409 ++ vand q3, q13, q3
20410 ++ add r2, r3, #48
20411 ++ vadd.i64 q12, q4, q1
20412 ++ vadd.i64 q13, q10, q1
20413 ++ vshr.s64 q12, q12, #26
20414 ++ vshr.s64 q13, q13, #26
20415 ++ vadd.i64 q5, q5, q12
20416 ++ vshl.i64 q12, q12, #26
20417 ++ vadd.i64 q14, q5, q0
20418 ++ vadd.i64 q11, q11, q13
20419 ++ vshl.i64 q13, q13, #26
20420 ++ vadd.i64 q15, q11, q0
20421 ++ vsub.i64 q4, q4, q12
20422 ++ vshr.s64 q12, q14, #25
20423 ++ vsub.i64 q10, q10, q13
20424 ++ vshr.s64 q13, q15, #25
20425 ++ vadd.i64 q6, q6, q12
20426 ++ vshl.i64 q12, q12, #25
20427 ++ vadd.i64 q14, q6, q1
20428 ++ vadd.i64 q2, q2, q13
20429 ++ vsub.i64 q5, q5, q12
20430 ++ vshr.s64 q12, q14, #26
20431 ++ vshl.i64 q13, q13, #25
20432 ++ vadd.i64 q14, q2, q1
20433 ++ vadd.i64 q7, q7, q12
20434 ++ vshl.i64 q12, q12, #26
20435 ++ vadd.i64 q15, q7, q0
20436 ++ vsub.i64 q11, q11, q13
20437 ++ vshr.s64 q13, q14, #26
20438 ++ vsub.i64 q6, q6, q12
20439 ++ vshr.s64 q12, q15, #25
20440 ++ vadd.i64 q3, q3, q13
20441 ++ vshl.i64 q13, q13, #26
20442 ++ vadd.i64 q14, q3, q0
20443 ++ vadd.i64 q8, q8, q12
20444 ++ vshl.i64 q12, q12, #25
20445 ++ vadd.i64 q15, q8, q1
20446 ++ add r2, r2, #8
20447 ++ vsub.i64 q2, q2, q13
20448 ++ vshr.s64 q13, q14, #25
20449 ++ vsub.i64 q7, q7, q12
20450 ++ vshr.s64 q12, q15, #26
20451 ++ vadd.i64 q14, q13, q13
20452 ++ vadd.i64 q9, q9, q12
20453 ++ vtrn.32 d12, d14
20454 ++ vshl.i64 q12, q12, #26
20455 ++ vtrn.32 d13, d15
20456 ++ vadd.i64 q0, q9, q0
20457 ++ vadd.i64 q4, q4, q14
20458 ++ vst1.8 d12, [r2, : 64]!
20459 ++ vshl.i64 q6, q13, #4
20460 ++ vsub.i64 q7, q8, q12
20461 ++ vshr.s64 q0, q0, #25
20462 ++ vadd.i64 q4, q4, q6
20463 ++ vadd.i64 q6, q10, q0
20464 ++ vshl.i64 q0, q0, #25
20465 ++ vadd.i64 q8, q6, q1
20466 ++ vadd.i64 q4, q4, q13
20467 ++ vshl.i64 q10, q13, #25
20468 ++ vadd.i64 q1, q4, q1
20469 ++ vsub.i64 q0, q9, q0
20470 ++ vshr.s64 q8, q8, #26
20471 ++ vsub.i64 q3, q3, q10
20472 ++ vtrn.32 d14, d0
20473 ++ vshr.s64 q1, q1, #26
20474 ++ vtrn.32 d15, d1
20475 ++ vadd.i64 q0, q11, q8
20476 ++ vst1.8 d14, [r2, : 64]
20477 ++ vshl.i64 q7, q8, #26
20478 ++ vadd.i64 q5, q5, q1
20479 ++ vtrn.32 d4, d6
20480 ++ vshl.i64 q1, q1, #26
20481 ++ vtrn.32 d5, d7
20482 ++ vsub.i64 q3, q6, q7
20483 ++ add r2, r2, #16
20484 ++ vsub.i64 q1, q4, q1
20485 ++ vst1.8 d4, [r2, : 64]
20486 ++ vtrn.32 d6, d0
20487 ++ vtrn.32 d7, d1
20488 ++ sub r2, r2, #8
20489 ++ vtrn.32 d2, d10
20490 ++ vtrn.32 d3, d11
20491 ++ vst1.8 d6, [r2, : 64]
20492 ++ sub r2, r2, #24
20493 ++ vst1.8 d2, [r2, : 64]
20494 ++ add r2, r3, #96
20495 ++ vmov.i32 q0, #0
20496 ++ vmov.i64 d2, #0xff
20497 ++ vmov.i64 d3, #0
20498 ++ vshr.u32 q1, q1, #7
20499 ++ vst1.8 {d2-d3}, [r2, : 128]!
20500 ++ vst1.8 {d0-d1}, [r2, : 128]!
20501 ++ vst1.8 d0, [r2, : 64]
20502 ++ add r2, r3, #144
20503 ++ vmov.i32 q0, #0
20504 ++ vst1.8 {d0-d1}, [r2, : 128]!
20505 ++ vst1.8 {d0-d1}, [r2, : 128]!
20506 ++ vst1.8 d0, [r2, : 64]
20507 ++ add r2, r3, #240
20508 ++ vmov.i32 q0, #0
20509 ++ vmov.i64 d2, #0xff
20510 ++ vmov.i64 d3, #0
20511 ++ vshr.u32 q1, q1, #7
20512 ++ vst1.8 {d2-d3}, [r2, : 128]!
20513 ++ vst1.8 {d0-d1}, [r2, : 128]!
20514 ++ vst1.8 d0, [r2, : 64]
20515 ++ add r2, r3, #48
20516 ++ add r6, r3, #192
20517 ++ vld1.8 {d0-d1}, [r2, : 128]!
20518 ++ vld1.8 {d2-d3}, [r2, : 128]!
20519 ++ vld1.8 {d4}, [r2, : 64]
20520 ++ vst1.8 {d0-d1}, [r6, : 128]!
20521 ++ vst1.8 {d2-d3}, [r6, : 128]!
20522 ++ vst1.8 d4, [r6, : 64]
20523 ++._mainloop:
20524 ++ mov r2, r5, LSR #3
20525 ++ and r6, r5, #7
20526 ++ ldrb r2, [r1, r2]
20527 ++ mov r2, r2, LSR r6
20528 ++ and r2, r2, #1
20529 ++ str r5, [sp, #488]
20530 ++ eor r4, r4, r2
20531 ++ str r2, [sp, #492]
20532 ++ neg r2, r4
20533 ++ add r4, r3, #96
20534 ++ add r5, r3, #192
20535 ++ add r6, r3, #144
20536 ++ vld1.8 {d8-d9}, [r4, : 128]!
20537 ++ add r7, r3, #240
20538 ++ vld1.8 {d10-d11}, [r5, : 128]!
20539 ++ veor q6, q4, q5
20540 ++ vld1.8 {d14-d15}, [r6, : 128]!
20541 ++ vdup.i32 q8, r2
20542 ++ vld1.8 {d18-d19}, [r7, : 128]!
20543 ++ veor q10, q7, q9
20544 ++ vld1.8 {d22-d23}, [r4, : 128]!
20545 ++ vand q6, q6, q8
20546 ++ vld1.8 {d24-d25}, [r5, : 128]!
20547 ++ vand q10, q10, q8
20548 ++ vld1.8 {d26-d27}, [r6, : 128]!
20549 ++ veor q4, q4, q6
20550 ++ vld1.8 {d28-d29}, [r7, : 128]!
20551 ++ veor q5, q5, q6
20552 ++ vld1.8 {d0}, [r4, : 64]
20553 ++ veor q6, q7, q10
20554 ++ vld1.8 {d2}, [r5, : 64]
20555 ++ veor q7, q9, q10
20556 ++ vld1.8 {d4}, [r6, : 64]
20557 ++ veor q9, q11, q12
20558 ++ vld1.8 {d6}, [r7, : 64]
20559 ++ veor q10, q0, q1
20560 ++ sub r2, r4, #32
20561 ++ vand q9, q9, q8
20562 ++ sub r4, r5, #32
20563 ++ vand q10, q10, q8
20564 ++ sub r5, r6, #32
20565 ++ veor q11, q11, q9
20566 ++ sub r6, r7, #32
20567 ++ veor q0, q0, q10
20568 ++ veor q9, q12, q9
20569 ++ veor q1, q1, q10
20570 ++ veor q10, q13, q14
20571 ++ veor q12, q2, q3
20572 ++ vand q10, q10, q8
20573 ++ vand q8, q12, q8
20574 ++ veor q12, q13, q10
20575 ++ veor q2, q2, q8
20576 ++ veor q10, q14, q10
20577 ++ veor q3, q3, q8
20578 ++ vadd.i32 q8, q4, q6
20579 ++ vsub.i32 q4, q4, q6
20580 ++ vst1.8 {d16-d17}, [r2, : 128]!
20581 ++ vadd.i32 q6, q11, q12
20582 ++ vst1.8 {d8-d9}, [r5, : 128]!
20583 ++ vsub.i32 q4, q11, q12
20584 ++ vst1.8 {d12-d13}, [r2, : 128]!
20585 ++ vadd.i32 q6, q0, q2
20586 ++ vst1.8 {d8-d9}, [r5, : 128]!
20587 ++ vsub.i32 q0, q0, q2
20588 ++ vst1.8 d12, [r2, : 64]
20589 ++ vadd.i32 q2, q5, q7
20590 ++ vst1.8 d0, [r5, : 64]
20591 ++ vsub.i32 q0, q5, q7
20592 ++ vst1.8 {d4-d5}, [r4, : 128]!
20593 ++ vadd.i32 q2, q9, q10
20594 ++ vst1.8 {d0-d1}, [r6, : 128]!
20595 ++ vsub.i32 q0, q9, q10
20596 ++ vst1.8 {d4-d5}, [r4, : 128]!
20597 ++ vadd.i32 q2, q1, q3
20598 ++ vst1.8 {d0-d1}, [r6, : 128]!
20599 ++ vsub.i32 q0, q1, q3
20600 ++ vst1.8 d4, [r4, : 64]
20601 ++ vst1.8 d0, [r6, : 64]
20602 ++ add r2, sp, #544
20603 ++ add r4, r3, #96
20604 ++ add r5, r3, #144
20605 ++ vld1.8 {d0-d1}, [r2, : 128]
20606 ++ vld1.8 {d2-d3}, [r4, : 128]!
20607 ++ vld1.8 {d4-d5}, [r5, : 128]!
20608 ++ vzip.i32 q1, q2
20609 ++ vld1.8 {d6-d7}, [r4, : 128]!
20610 ++ vld1.8 {d8-d9}, [r5, : 128]!
20611 ++ vshl.i32 q5, q1, #1
20612 ++ vzip.i32 q3, q4
20613 ++ vshl.i32 q6, q2, #1
20614 ++ vld1.8 {d14}, [r4, : 64]
20615 ++ vshl.i32 q8, q3, #1
20616 ++ vld1.8 {d15}, [r5, : 64]
20617 ++ vshl.i32 q9, q4, #1
20618 ++ vmul.i32 d21, d7, d1
20619 ++ vtrn.32 d14, d15
20620 ++ vmul.i32 q11, q4, q0
20621 ++ vmul.i32 q0, q7, q0
20622 ++ vmull.s32 q12, d2, d2
20623 ++ vmlal.s32 q12, d11, d1
20624 ++ vmlal.s32 q12, d12, d0
20625 ++ vmlal.s32 q12, d13, d23
20626 ++ vmlal.s32 q12, d16, d22
20627 ++ vmlal.s32 q12, d7, d21
20628 ++ vmull.s32 q10, d2, d11
20629 ++ vmlal.s32 q10, d4, d1
20630 ++ vmlal.s32 q10, d13, d0
20631 ++ vmlal.s32 q10, d6, d23
20632 ++ vmlal.s32 q10, d17, d22
20633 ++ vmull.s32 q13, d10, d4
20634 ++ vmlal.s32 q13, d11, d3
20635 ++ vmlal.s32 q13, d13, d1
20636 ++ vmlal.s32 q13, d16, d0
20637 ++ vmlal.s32 q13, d17, d23
20638 ++ vmlal.s32 q13, d8, d22
20639 ++ vmull.s32 q1, d10, d5
20640 ++ vmlal.s32 q1, d11, d4
20641 ++ vmlal.s32 q1, d6, d1
20642 ++ vmlal.s32 q1, d17, d0
20643 ++ vmlal.s32 q1, d8, d23
20644 ++ vmull.s32 q14, d10, d6
20645 ++ vmlal.s32 q14, d11, d13
20646 ++ vmlal.s32 q14, d4, d4
20647 ++ vmlal.s32 q14, d17, d1
20648 ++ vmlal.s32 q14, d18, d0
20649 ++ vmlal.s32 q14, d9, d23
20650 ++ vmull.s32 q11, d10, d7
20651 ++ vmlal.s32 q11, d11, d6
20652 ++ vmlal.s32 q11, d12, d5
20653 ++ vmlal.s32 q11, d8, d1
20654 ++ vmlal.s32 q11, d19, d0
20655 ++ vmull.s32 q15, d10, d8
20656 ++ vmlal.s32 q15, d11, d17
20657 ++ vmlal.s32 q15, d12, d6
20658 ++ vmlal.s32 q15, d13, d5
20659 ++ vmlal.s32 q15, d19, d1
20660 ++ vmlal.s32 q15, d14, d0
20661 ++ vmull.s32 q2, d10, d9
20662 ++ vmlal.s32 q2, d11, d8
20663 ++ vmlal.s32 q2, d12, d7
20664 ++ vmlal.s32 q2, d13, d6
20665 ++ vmlal.s32 q2, d14, d1
20666 ++ vmull.s32 q0, d15, d1
20667 ++ vmlal.s32 q0, d10, d14
20668 ++ vmlal.s32 q0, d11, d19
20669 ++ vmlal.s32 q0, d12, d8
20670 ++ vmlal.s32 q0, d13, d17
20671 ++ vmlal.s32 q0, d6, d6
20672 ++ add r2, sp, #512
20673 ++ vld1.8 {d18-d19}, [r2, : 128]
20674 ++ vmull.s32 q3, d16, d7
20675 ++ vmlal.s32 q3, d10, d15
20676 ++ vmlal.s32 q3, d11, d14
20677 ++ vmlal.s32 q3, d12, d9
20678 ++ vmlal.s32 q3, d13, d8
20679 ++ add r2, sp, #528
20680 ++ vld1.8 {d8-d9}, [r2, : 128]
20681 ++ vadd.i64 q5, q12, q9
20682 ++ vadd.i64 q6, q15, q9
20683 ++ vshr.s64 q5, q5, #26
20684 ++ vshr.s64 q6, q6, #26
20685 ++ vadd.i64 q7, q10, q5
20686 ++ vshl.i64 q5, q5, #26
20687 ++ vadd.i64 q8, q7, q4
20688 ++ vadd.i64 q2, q2, q6
20689 ++ vshl.i64 q6, q6, #26
20690 ++ vadd.i64 q10, q2, q4
20691 ++ vsub.i64 q5, q12, q5
20692 ++ vshr.s64 q8, q8, #25
20693 ++ vsub.i64 q6, q15, q6
20694 ++ vshr.s64 q10, q10, #25
20695 ++ vadd.i64 q12, q13, q8
20696 ++ vshl.i64 q8, q8, #25
20697 ++ vadd.i64 q13, q12, q9
20698 ++ vadd.i64 q0, q0, q10
20699 ++ vsub.i64 q7, q7, q8
20700 ++ vshr.s64 q8, q13, #26
20701 ++ vshl.i64 q10, q10, #25
20702 ++ vadd.i64 q13, q0, q9
20703 ++ vadd.i64 q1, q1, q8
20704 ++ vshl.i64 q8, q8, #26
20705 ++ vadd.i64 q15, q1, q4
20706 ++ vsub.i64 q2, q2, q10
20707 ++ vshr.s64 q10, q13, #26
20708 ++ vsub.i64 q8, q12, q8
20709 ++ vshr.s64 q12, q15, #25
20710 ++ vadd.i64 q3, q3, q10
20711 ++ vshl.i64 q10, q10, #26
20712 ++ vadd.i64 q13, q3, q4
20713 ++ vadd.i64 q14, q14, q12
20714 ++ add r2, r3, #288
20715 ++ vshl.i64 q12, q12, #25
20716 ++ add r4, r3, #336
20717 ++ vadd.i64 q15, q14, q9
20718 ++ add r2, r2, #8
20719 ++ vsub.i64 q0, q0, q10
20720 ++ add r4, r4, #8
20721 ++ vshr.s64 q10, q13, #25
20722 ++ vsub.i64 q1, q1, q12
20723 ++ vshr.s64 q12, q15, #26
20724 ++ vadd.i64 q13, q10, q10
20725 ++ vadd.i64 q11, q11, q12
20726 ++ vtrn.32 d16, d2
20727 ++ vshl.i64 q12, q12, #26
20728 ++ vtrn.32 d17, d3
20729 ++ vadd.i64 q1, q11, q4
20730 ++ vadd.i64 q4, q5, q13
20731 ++ vst1.8 d16, [r2, : 64]!
20732 ++ vshl.i64 q5, q10, #4
20733 ++ vst1.8 d17, [r4, : 64]!
20734 ++ vsub.i64 q8, q14, q12
20735 ++ vshr.s64 q1, q1, #25
20736 ++ vadd.i64 q4, q4, q5
20737 ++ vadd.i64 q5, q6, q1
20738 ++ vshl.i64 q1, q1, #25
20739 ++ vadd.i64 q6, q5, q9
20740 ++ vadd.i64 q4, q4, q10
20741 ++ vshl.i64 q10, q10, #25
20742 ++ vadd.i64 q9, q4, q9
20743 ++ vsub.i64 q1, q11, q1
20744 ++ vshr.s64 q6, q6, #26
20745 ++ vsub.i64 q3, q3, q10
20746 ++ vtrn.32 d16, d2
20747 ++ vshr.s64 q9, q9, #26
20748 ++ vtrn.32 d17, d3
20749 ++ vadd.i64 q1, q2, q6
20750 ++ vst1.8 d16, [r2, : 64]
20751 ++ vshl.i64 q2, q6, #26
20752 ++ vst1.8 d17, [r4, : 64]
20753 ++ vadd.i64 q6, q7, q9
20754 ++ vtrn.32 d0, d6
20755 ++ vshl.i64 q7, q9, #26
20756 ++ vtrn.32 d1, d7
20757 ++ vsub.i64 q2, q5, q2
20758 ++ add r2, r2, #16
20759 ++ vsub.i64 q3, q4, q7
20760 ++ vst1.8 d0, [r2, : 64]
20761 ++ add r4, r4, #16
20762 ++ vst1.8 d1, [r4, : 64]
20763 ++ vtrn.32 d4, d2
20764 ++ vtrn.32 d5, d3
20765 ++ sub r2, r2, #8
20766 ++ sub r4, r4, #8
20767 ++ vtrn.32 d6, d12
20768 ++ vtrn.32 d7, d13
20769 ++ vst1.8 d4, [r2, : 64]
20770 ++ vst1.8 d5, [r4, : 64]
20771 ++ sub r2, r2, #24
20772 ++ sub r4, r4, #24
20773 ++ vst1.8 d6, [r2, : 64]
20774 ++ vst1.8 d7, [r4, : 64]
20775 ++ add r2, r3, #240
20776 ++ add r4, r3, #96
20777 ++ vld1.8 {d0-d1}, [r4, : 128]!
20778 ++ vld1.8 {d2-d3}, [r4, : 128]!
20779 ++ vld1.8 {d4}, [r4, : 64]
20780 ++ add r4, r3, #144
20781 ++ vld1.8 {d6-d7}, [r4, : 128]!
20782 ++ vtrn.32 q0, q3
20783 ++ vld1.8 {d8-d9}, [r4, : 128]!
20784 ++ vshl.i32 q5, q0, #4
20785 ++ vtrn.32 q1, q4
20786 ++ vshl.i32 q6, q3, #4
20787 ++ vadd.i32 q5, q5, q0
20788 ++ vadd.i32 q6, q6, q3
20789 ++ vshl.i32 q7, q1, #4
20790 ++ vld1.8 {d5}, [r4, : 64]
20791 ++ vshl.i32 q8, q4, #4
20792 ++ vtrn.32 d4, d5
20793 ++ vadd.i32 q7, q7, q1
20794 ++ vadd.i32 q8, q8, q4
20795 ++ vld1.8 {d18-d19}, [r2, : 128]!
20796 ++ vshl.i32 q10, q2, #4
20797 ++ vld1.8 {d22-d23}, [r2, : 128]!
20798 ++ vadd.i32 q10, q10, q2
20799 ++ vld1.8 {d24}, [r2, : 64]
20800 ++ vadd.i32 q5, q5, q0
20801 ++ add r2, r3, #192
20802 ++ vld1.8 {d26-d27}, [r2, : 128]!
20803 ++ vadd.i32 q6, q6, q3
20804 ++ vld1.8 {d28-d29}, [r2, : 128]!
20805 ++ vadd.i32 q8, q8, q4
20806 ++ vld1.8 {d25}, [r2, : 64]
20807 ++ vadd.i32 q10, q10, q2
20808 ++ vtrn.32 q9, q13
20809 ++ vadd.i32 q7, q7, q1
20810 ++ vadd.i32 q5, q5, q0
20811 ++ vtrn.32 q11, q14
20812 ++ vadd.i32 q6, q6, q3
20813 ++ add r2, sp, #560
20814 ++ vadd.i32 q10, q10, q2
20815 ++ vtrn.32 d24, d25
20816 ++ vst1.8 {d12-d13}, [r2, : 128]
20817 ++ vshl.i32 q6, q13, #1
20818 ++ add r2, sp, #576
20819 ++ vst1.8 {d20-d21}, [r2, : 128]
20820 ++ vshl.i32 q10, q14, #1
20821 ++ add r2, sp, #592
20822 ++ vst1.8 {d12-d13}, [r2, : 128]
20823 ++ vshl.i32 q15, q12, #1
20824 ++ vadd.i32 q8, q8, q4
20825 ++ vext.32 d10, d31, d30, #0
20826 ++ vadd.i32 q7, q7, q1
20827 ++ add r2, sp, #608
20828 ++ vst1.8 {d16-d17}, [r2, : 128]
20829 ++ vmull.s32 q8, d18, d5
20830 ++ vmlal.s32 q8, d26, d4
20831 ++ vmlal.s32 q8, d19, d9
20832 ++ vmlal.s32 q8, d27, d3
20833 ++ vmlal.s32 q8, d22, d8
20834 ++ vmlal.s32 q8, d28, d2
20835 ++ vmlal.s32 q8, d23, d7
20836 ++ vmlal.s32 q8, d29, d1
20837 ++ vmlal.s32 q8, d24, d6
20838 ++ vmlal.s32 q8, d25, d0
20839 ++ add r2, sp, #624
20840 ++ vst1.8 {d14-d15}, [r2, : 128]
20841 ++ vmull.s32 q2, d18, d4
20842 ++ vmlal.s32 q2, d12, d9
20843 ++ vmlal.s32 q2, d13, d8
20844 ++ vmlal.s32 q2, d19, d3
20845 ++ vmlal.s32 q2, d22, d2
20846 ++ vmlal.s32 q2, d23, d1
20847 ++ vmlal.s32 q2, d24, d0
20848 ++ add r2, sp, #640
20849 ++ vst1.8 {d20-d21}, [r2, : 128]
20850 ++ vmull.s32 q7, d18, d9
20851 ++ vmlal.s32 q7, d26, d3
20852 ++ vmlal.s32 q7, d19, d8
20853 ++ vmlal.s32 q7, d27, d2
20854 ++ vmlal.s32 q7, d22, d7
20855 ++ vmlal.s32 q7, d28, d1
20856 ++ vmlal.s32 q7, d23, d6
20857 ++ vmlal.s32 q7, d29, d0
20858 ++ add r2, sp, #656
20859 ++ vst1.8 {d10-d11}, [r2, : 128]
20860 ++ vmull.s32 q5, d18, d3
20861 ++ vmlal.s32 q5, d19, d2
20862 ++ vmlal.s32 q5, d22, d1
20863 ++ vmlal.s32 q5, d23, d0
20864 ++ vmlal.s32 q5, d12, d8
20865 ++ add r2, sp, #672
20866 ++ vst1.8 {d16-d17}, [r2, : 128]
20867 ++ vmull.s32 q4, d18, d8
20868 ++ vmlal.s32 q4, d26, d2
20869 ++ vmlal.s32 q4, d19, d7
20870 ++ vmlal.s32 q4, d27, d1
20871 ++ vmlal.s32 q4, d22, d6
20872 ++ vmlal.s32 q4, d28, d0
20873 ++ vmull.s32 q8, d18, d7
20874 ++ vmlal.s32 q8, d26, d1
20875 ++ vmlal.s32 q8, d19, d6
20876 ++ vmlal.s32 q8, d27, d0
20877 ++ add r2, sp, #576
20878 ++ vld1.8 {d20-d21}, [r2, : 128]
20879 ++ vmlal.s32 q7, d24, d21
20880 ++ vmlal.s32 q7, d25, d20
20881 ++ vmlal.s32 q4, d23, d21
20882 ++ vmlal.s32 q4, d29, d20
20883 ++ vmlal.s32 q8, d22, d21
20884 ++ vmlal.s32 q8, d28, d20
20885 ++ vmlal.s32 q5, d24, d20
20886 ++ add r2, sp, #576
20887 ++ vst1.8 {d14-d15}, [r2, : 128]
20888 ++ vmull.s32 q7, d18, d6
20889 ++ vmlal.s32 q7, d26, d0
20890 ++ add r2, sp, #656
20891 ++ vld1.8 {d30-d31}, [r2, : 128]
20892 ++ vmlal.s32 q2, d30, d21
20893 ++ vmlal.s32 q7, d19, d21
20894 ++ vmlal.s32 q7, d27, d20
20895 ++ add r2, sp, #624
20896 ++ vld1.8 {d26-d27}, [r2, : 128]
20897 ++ vmlal.s32 q4, d25, d27
20898 ++ vmlal.s32 q8, d29, d27
20899 ++ vmlal.s32 q8, d25, d26
20900 ++ vmlal.s32 q7, d28, d27
20901 ++ vmlal.s32 q7, d29, d26
20902 ++ add r2, sp, #608
20903 ++ vld1.8 {d28-d29}, [r2, : 128]
20904 ++ vmlal.s32 q4, d24, d29
20905 ++ vmlal.s32 q8, d23, d29
20906 ++ vmlal.s32 q8, d24, d28
20907 ++ vmlal.s32 q7, d22, d29
20908 ++ vmlal.s32 q7, d23, d28
20909 ++ add r2, sp, #608
20910 ++ vst1.8 {d8-d9}, [r2, : 128]
20911 ++ add r2, sp, #560
20912 ++ vld1.8 {d8-d9}, [r2, : 128]
20913 ++ vmlal.s32 q7, d24, d9
20914 ++ vmlal.s32 q7, d25, d31
20915 ++ vmull.s32 q1, d18, d2
20916 ++ vmlal.s32 q1, d19, d1
20917 ++ vmlal.s32 q1, d22, d0
20918 ++ vmlal.s32 q1, d24, d27
20919 ++ vmlal.s32 q1, d23, d20
20920 ++ vmlal.s32 q1, d12, d7
20921 ++ vmlal.s32 q1, d13, d6
20922 ++ vmull.s32 q6, d18, d1
20923 ++ vmlal.s32 q6, d19, d0
20924 ++ vmlal.s32 q6, d23, d27
20925 ++ vmlal.s32 q6, d22, d20
20926 ++ vmlal.s32 q6, d24, d26
20927 ++ vmull.s32 q0, d18, d0
20928 ++ vmlal.s32 q0, d22, d27
20929 ++ vmlal.s32 q0, d23, d26
20930 ++ vmlal.s32 q0, d24, d31
20931 ++ vmlal.s32 q0, d19, d20
20932 ++ add r2, sp, #640
20933 ++ vld1.8 {d18-d19}, [r2, : 128]
20934 ++ vmlal.s32 q2, d18, d7
20935 ++ vmlal.s32 q2, d19, d6
20936 ++ vmlal.s32 q5, d18, d6
20937 ++ vmlal.s32 q5, d19, d21
20938 ++ vmlal.s32 q1, d18, d21
20939 ++ vmlal.s32 q1, d19, d29
20940 ++ vmlal.s32 q0, d18, d28
20941 ++ vmlal.s32 q0, d19, d9
20942 ++ vmlal.s32 q6, d18, d29
20943 ++ vmlal.s32 q6, d19, d28
20944 ++ add r2, sp, #592
20945 ++ vld1.8 {d18-d19}, [r2, : 128]
20946 ++ add r2, sp, #512
20947 ++ vld1.8 {d22-d23}, [r2, : 128]
20948 ++ vmlal.s32 q5, d19, d7
20949 ++ vmlal.s32 q0, d18, d21
20950 ++ vmlal.s32 q0, d19, d29
20951 ++ vmlal.s32 q6, d18, d6
20952 ++ add r2, sp, #528
20953 ++ vld1.8 {d6-d7}, [r2, : 128]
20954 ++ vmlal.s32 q6, d19, d21
20955 ++ add r2, sp, #576
20956 ++ vld1.8 {d18-d19}, [r2, : 128]
20957 ++ vmlal.s32 q0, d30, d8
20958 ++ add r2, sp, #672
20959 ++ vld1.8 {d20-d21}, [r2, : 128]
20960 ++ vmlal.s32 q5, d30, d29
20961 ++ add r2, sp, #608
20962 ++ vld1.8 {d24-d25}, [r2, : 128]
20963 ++ vmlal.s32 q1, d30, d28
20964 ++ vadd.i64 q13, q0, q11
20965 ++ vadd.i64 q14, q5, q11
20966 ++ vmlal.s32 q6, d30, d9
20967 ++ vshr.s64 q4, q13, #26
20968 ++ vshr.s64 q13, q14, #26
20969 ++ vadd.i64 q7, q7, q4
20970 ++ vshl.i64 q4, q4, #26
20971 ++ vadd.i64 q14, q7, q3
20972 ++ vadd.i64 q9, q9, q13
20973 ++ vshl.i64 q13, q13, #26
20974 ++ vadd.i64 q15, q9, q3
20975 ++ vsub.i64 q0, q0, q4
20976 ++ vshr.s64 q4, q14, #25
20977 ++ vsub.i64 q5, q5, q13
20978 ++ vshr.s64 q13, q15, #25
20979 ++ vadd.i64 q6, q6, q4
20980 ++ vshl.i64 q4, q4, #25
20981 ++ vadd.i64 q14, q6, q11
20982 ++ vadd.i64 q2, q2, q13
20983 ++ vsub.i64 q4, q7, q4
20984 ++ vshr.s64 q7, q14, #26
20985 ++ vshl.i64 q13, q13, #25
20986 ++ vadd.i64 q14, q2, q11
20987 ++ vadd.i64 q8, q8, q7
20988 ++ vshl.i64 q7, q7, #26
20989 ++ vadd.i64 q15, q8, q3
20990 ++ vsub.i64 q9, q9, q13
20991 ++ vshr.s64 q13, q14, #26
20992 ++ vsub.i64 q6, q6, q7
20993 ++ vshr.s64 q7, q15, #25
20994 ++ vadd.i64 q10, q10, q13
20995 ++ vshl.i64 q13, q13, #26
20996 ++ vadd.i64 q14, q10, q3
20997 ++ vadd.i64 q1, q1, q7
20998 ++ add r2, r3, #144
20999 ++ vshl.i64 q7, q7, #25
21000 ++ add r4, r3, #96
21001 ++ vadd.i64 q15, q1, q11
21002 ++ add r2, r2, #8
21003 ++ vsub.i64 q2, q2, q13
21004 ++ add r4, r4, #8
21005 ++ vshr.s64 q13, q14, #25
21006 ++ vsub.i64 q7, q8, q7
21007 ++ vshr.s64 q8, q15, #26
21008 ++ vadd.i64 q14, q13, q13
21009 ++ vadd.i64 q12, q12, q8
21010 ++ vtrn.32 d12, d14
21011 ++ vshl.i64 q8, q8, #26
21012 ++ vtrn.32 d13, d15
21013 ++ vadd.i64 q3, q12, q3
21014 ++ vadd.i64 q0, q0, q14
21015 ++ vst1.8 d12, [r2, : 64]!
21016 ++ vshl.i64 q7, q13, #4
21017 ++ vst1.8 d13, [r4, : 64]!
21018 ++ vsub.i64 q1, q1, q8
21019 ++ vshr.s64 q3, q3, #25
21020 ++ vadd.i64 q0, q0, q7
21021 ++ vadd.i64 q5, q5, q3
21022 ++ vshl.i64 q3, q3, #25
21023 ++ vadd.i64 q6, q5, q11
21024 ++ vadd.i64 q0, q0, q13
21025 ++ vshl.i64 q7, q13, #25
21026 ++ vadd.i64 q8, q0, q11
21027 ++ vsub.i64 q3, q12, q3
21028 ++ vshr.s64 q6, q6, #26
21029 ++ vsub.i64 q7, q10, q7
21030 ++ vtrn.32 d2, d6
21031 ++ vshr.s64 q8, q8, #26
21032 ++ vtrn.32 d3, d7
21033 ++ vadd.i64 q3, q9, q6
21034 ++ vst1.8 d2, [r2, : 64]
21035 ++ vshl.i64 q6, q6, #26
21036 ++ vst1.8 d3, [r4, : 64]
21037 ++ vadd.i64 q1, q4, q8
21038 ++ vtrn.32 d4, d14
21039 ++ vshl.i64 q4, q8, #26
21040 ++ vtrn.32 d5, d15
21041 ++ vsub.i64 q5, q5, q6
21042 ++ add r2, r2, #16
21043 ++ vsub.i64 q0, q0, q4
21044 ++ vst1.8 d4, [r2, : 64]
21045 ++ add r4, r4, #16
21046 ++ vst1.8 d5, [r4, : 64]
21047 ++ vtrn.32 d10, d6
21048 ++ vtrn.32 d11, d7
21049 ++ sub r2, r2, #8
21050 ++ sub r4, r4, #8
21051 ++ vtrn.32 d0, d2
21052 ++ vtrn.32 d1, d3
21053 ++ vst1.8 d10, [r2, : 64]
21054 ++ vst1.8 d11, [r4, : 64]
21055 ++ sub r2, r2, #24
21056 ++ sub r4, r4, #24
21057 ++ vst1.8 d0, [r2, : 64]
21058 ++ vst1.8 d1, [r4, : 64]
21059 ++ add r2, r3, #288
21060 ++ add r4, r3, #336
21061 ++ vld1.8 {d0-d1}, [r2, : 128]!
21062 ++ vld1.8 {d2-d3}, [r4, : 128]!
21063 ++ vsub.i32 q0, q0, q1
21064 ++ vld1.8 {d2-d3}, [r2, : 128]!
21065 ++ vld1.8 {d4-d5}, [r4, : 128]!
21066 ++ vsub.i32 q1, q1, q2
21067 ++ add r5, r3, #240
21068 ++ vld1.8 {d4}, [r2, : 64]
21069 ++ vld1.8 {d6}, [r4, : 64]
21070 ++ vsub.i32 q2, q2, q3
21071 ++ vst1.8 {d0-d1}, [r5, : 128]!
21072 ++ vst1.8 {d2-d3}, [r5, : 128]!
21073 ++ vst1.8 d4, [r5, : 64]
21074 ++ add r2, r3, #144
21075 ++ add r4, r3, #96
21076 ++ add r5, r3, #144
21077 ++ add r6, r3, #192
21078 ++ vld1.8 {d0-d1}, [r2, : 128]!
21079 ++ vld1.8 {d2-d3}, [r4, : 128]!
21080 ++ vsub.i32 q2, q0, q1
21081 ++ vadd.i32 q0, q0, q1
21082 ++ vld1.8 {d2-d3}, [r2, : 128]!
21083 ++ vld1.8 {d6-d7}, [r4, : 128]!
21084 ++ vsub.i32 q4, q1, q3
21085 ++ vadd.i32 q1, q1, q3
21086 ++ vld1.8 {d6}, [r2, : 64]
21087 ++ vld1.8 {d10}, [r4, : 64]
21088 ++ vsub.i32 q6, q3, q5
21089 ++ vadd.i32 q3, q3, q5
21090 ++ vst1.8 {d4-d5}, [r5, : 128]!
21091 ++ vst1.8 {d0-d1}, [r6, : 128]!
21092 ++ vst1.8 {d8-d9}, [r5, : 128]!
21093 ++ vst1.8 {d2-d3}, [r6, : 128]!
21094 ++ vst1.8 d12, [r5, : 64]
21095 ++ vst1.8 d6, [r6, : 64]
21096 ++ add r2, r3, #0
21097 ++ add r4, r3, #240
21098 ++ vld1.8 {d0-d1}, [r4, : 128]!
21099 ++ vld1.8 {d2-d3}, [r4, : 128]!
21100 ++ vld1.8 {d4}, [r4, : 64]
21101 ++ add r4, r3, #336
21102 ++ vld1.8 {d6-d7}, [r4, : 128]!
21103 ++ vtrn.32 q0, q3
21104 ++ vld1.8 {d8-d9}, [r4, : 128]!
21105 ++ vshl.i32 q5, q0, #4
21106 ++ vtrn.32 q1, q4
21107 ++ vshl.i32 q6, q3, #4
21108 ++ vadd.i32 q5, q5, q0
21109 ++ vadd.i32 q6, q6, q3
21110 ++ vshl.i32 q7, q1, #4
21111 ++ vld1.8 {d5}, [r4, : 64]
21112 ++ vshl.i32 q8, q4, #4
21113 ++ vtrn.32 d4, d5
21114 ++ vadd.i32 q7, q7, q1
21115 ++ vadd.i32 q8, q8, q4
21116 ++ vld1.8 {d18-d19}, [r2, : 128]!
21117 ++ vshl.i32 q10, q2, #4
21118 ++ vld1.8 {d22-d23}, [r2, : 128]!
21119 ++ vadd.i32 q10, q10, q2
21120 ++ vld1.8 {d24}, [r2, : 64]
21121 ++ vadd.i32 q5, q5, q0
21122 ++ add r2, r3, #288
21123 ++ vld1.8 {d26-d27}, [r2, : 128]!
21124 ++ vadd.i32 q6, q6, q3
21125 ++ vld1.8 {d28-d29}, [r2, : 128]!
21126 ++ vadd.i32 q8, q8, q4
21127 ++ vld1.8 {d25}, [r2, : 64]
21128 ++ vadd.i32 q10, q10, q2
21129 ++ vtrn.32 q9, q13
21130 ++ vadd.i32 q7, q7, q1
21131 ++ vadd.i32 q5, q5, q0
21132 ++ vtrn.32 q11, q14
21133 ++ vadd.i32 q6, q6, q3
21134 ++ add r2, sp, #560
21135 ++ vadd.i32 q10, q10, q2
21136 ++ vtrn.32 d24, d25
21137 ++ vst1.8 {d12-d13}, [r2, : 128]
21138 ++ vshl.i32 q6, q13, #1
21139 ++ add r2, sp, #576
21140 ++ vst1.8 {d20-d21}, [r2, : 128]
21141 ++ vshl.i32 q10, q14, #1
21142 ++ add r2, sp, #592
21143 ++ vst1.8 {d12-d13}, [r2, : 128]
21144 ++ vshl.i32 q15, q12, #1
21145 ++ vadd.i32 q8, q8, q4
21146 ++ vext.32 d10, d31, d30, #0
21147 ++ vadd.i32 q7, q7, q1
21148 ++ add r2, sp, #608
21149 ++ vst1.8 {d16-d17}, [r2, : 128]
21150 ++ vmull.s32 q8, d18, d5
21151 ++ vmlal.s32 q8, d26, d4
21152 ++ vmlal.s32 q8, d19, d9
21153 ++ vmlal.s32 q8, d27, d3
21154 ++ vmlal.s32 q8, d22, d8
21155 ++ vmlal.s32 q8, d28, d2
21156 ++ vmlal.s32 q8, d23, d7
21157 ++ vmlal.s32 q8, d29, d1
21158 ++ vmlal.s32 q8, d24, d6
21159 ++ vmlal.s32 q8, d25, d0
21160 ++ add r2, sp, #624
21161 ++ vst1.8 {d14-d15}, [r2, : 128]
21162 ++ vmull.s32 q2, d18, d4
21163 ++ vmlal.s32 q2, d12, d9
21164 ++ vmlal.s32 q2, d13, d8
21165 ++ vmlal.s32 q2, d19, d3
21166 ++ vmlal.s32 q2, d22, d2
21167 ++ vmlal.s32 q2, d23, d1
21168 ++ vmlal.s32 q2, d24, d0
21169 ++ add r2, sp, #640
21170 ++ vst1.8 {d20-d21}, [r2, : 128]
21171 ++ vmull.s32 q7, d18, d9
21172 ++ vmlal.s32 q7, d26, d3
21173 ++ vmlal.s32 q7, d19, d8
21174 ++ vmlal.s32 q7, d27, d2
21175 ++ vmlal.s32 q7, d22, d7
21176 ++ vmlal.s32 q7, d28, d1
21177 ++ vmlal.s32 q7, d23, d6
21178 ++ vmlal.s32 q7, d29, d0
21179 ++ add r2, sp, #656
21180 ++ vst1.8 {d10-d11}, [r2, : 128]
21181 ++ vmull.s32 q5, d18, d3
21182 ++ vmlal.s32 q5, d19, d2
21183 ++ vmlal.s32 q5, d22, d1
21184 ++ vmlal.s32 q5, d23, d0
21185 ++ vmlal.s32 q5, d12, d8
21186 ++ add r2, sp, #672
21187 ++ vst1.8 {d16-d17}, [r2, : 128]
21188 ++ vmull.s32 q4, d18, d8
21189 ++ vmlal.s32 q4, d26, d2
21190 ++ vmlal.s32 q4, d19, d7
21191 ++ vmlal.s32 q4, d27, d1
21192 ++ vmlal.s32 q4, d22, d6
21193 ++ vmlal.s32 q4, d28, d0
21194 ++ vmull.s32 q8, d18, d7
21195 ++ vmlal.s32 q8, d26, d1
21196 ++ vmlal.s32 q8, d19, d6
21197 ++ vmlal.s32 q8, d27, d0
21198 ++ add r2, sp, #576
21199 ++ vld1.8 {d20-d21}, [r2, : 128]
21200 ++ vmlal.s32 q7, d24, d21
21201 ++ vmlal.s32 q7, d25, d20
21202 ++ vmlal.s32 q4, d23, d21
21203 ++ vmlal.s32 q4, d29, d20
21204 ++ vmlal.s32 q8, d22, d21
21205 ++ vmlal.s32 q8, d28, d20
21206 ++ vmlal.s32 q5, d24, d20
21207 ++ add r2, sp, #576
21208 ++ vst1.8 {d14-d15}, [r2, : 128]
21209 ++ vmull.s32 q7, d18, d6
21210 ++ vmlal.s32 q7, d26, d0
21211 ++ add r2, sp, #656
21212 ++ vld1.8 {d30-d31}, [r2, : 128]
21213 ++ vmlal.s32 q2, d30, d21
21214 ++ vmlal.s32 q7, d19, d21
21215 ++ vmlal.s32 q7, d27, d20
21216 ++ add r2, sp, #624
21217 ++ vld1.8 {d26-d27}, [r2, : 128]
21218 ++ vmlal.s32 q4, d25, d27
21219 ++ vmlal.s32 q8, d29, d27
21220 ++ vmlal.s32 q8, d25, d26
21221 ++ vmlal.s32 q7, d28, d27
21222 ++ vmlal.s32 q7, d29, d26
21223 ++ add r2, sp, #608
21224 ++ vld1.8 {d28-d29}, [r2, : 128]
21225 ++ vmlal.s32 q4, d24, d29
21226 ++ vmlal.s32 q8, d23, d29
21227 ++ vmlal.s32 q8, d24, d28
21228 ++ vmlal.s32 q7, d22, d29
21229 ++ vmlal.s32 q7, d23, d28
21230 ++ add r2, sp, #608
21231 ++ vst1.8 {d8-d9}, [r2, : 128]
21232 ++ add r2, sp, #560
21233 ++ vld1.8 {d8-d9}, [r2, : 128]
21234 ++ vmlal.s32 q7, d24, d9
21235 ++ vmlal.s32 q7, d25, d31
21236 ++ vmull.s32 q1, d18, d2
21237 ++ vmlal.s32 q1, d19, d1
21238 ++ vmlal.s32 q1, d22, d0
21239 ++ vmlal.s32 q1, d24, d27
21240 ++ vmlal.s32 q1, d23, d20
21241 ++ vmlal.s32 q1, d12, d7
21242 ++ vmlal.s32 q1, d13, d6
21243 ++ vmull.s32 q6, d18, d1
21244 ++ vmlal.s32 q6, d19, d0
21245 ++ vmlal.s32 q6, d23, d27
21246 ++ vmlal.s32 q6, d22, d20
21247 ++ vmlal.s32 q6, d24, d26
21248 ++ vmull.s32 q0, d18, d0
21249 ++ vmlal.s32 q0, d22, d27
21250 ++ vmlal.s32 q0, d23, d26
21251 ++ vmlal.s32 q0, d24, d31
21252 ++ vmlal.s32 q0, d19, d20
21253 ++ add r2, sp, #640
21254 ++ vld1.8 {d18-d19}, [r2, : 128]
21255 ++ vmlal.s32 q2, d18, d7
21256 ++ vmlal.s32 q2, d19, d6
21257 ++ vmlal.s32 q5, d18, d6
21258 ++ vmlal.s32 q5, d19, d21
21259 ++ vmlal.s32 q1, d18, d21
21260 ++ vmlal.s32 q1, d19, d29
21261 ++ vmlal.s32 q0, d18, d28
21262 ++ vmlal.s32 q0, d19, d9
21263 ++ vmlal.s32 q6, d18, d29
21264 ++ vmlal.s32 q6, d19, d28
21265 ++ add r2, sp, #592
21266 ++ vld1.8 {d18-d19}, [r2, : 128]
21267 ++ add r2, sp, #512
21268 ++ vld1.8 {d22-d23}, [r2, : 128]
21269 ++ vmlal.s32 q5, d19, d7
21270 ++ vmlal.s32 q0, d18, d21
21271 ++ vmlal.s32 q0, d19, d29
21272 ++ vmlal.s32 q6, d18, d6
21273 ++ add r2, sp, #528
21274 ++ vld1.8 {d6-d7}, [r2, : 128]
21275 ++ vmlal.s32 q6, d19, d21
21276 ++ add r2, sp, #576
21277 ++ vld1.8 {d18-d19}, [r2, : 128]
21278 ++ vmlal.s32 q0, d30, d8
21279 ++ add r2, sp, #672
21280 ++ vld1.8 {d20-d21}, [r2, : 128]
21281 ++ vmlal.s32 q5, d30, d29
21282 ++ add r2, sp, #608
21283 ++ vld1.8 {d24-d25}, [r2, : 128]
21284 ++ vmlal.s32 q1, d30, d28
21285 ++ vadd.i64 q13, q0, q11
21286 ++ vadd.i64 q14, q5, q11
21287 ++ vmlal.s32 q6, d30, d9
21288 ++ vshr.s64 q4, q13, #26
21289 ++ vshr.s64 q13, q14, #26
21290 ++ vadd.i64 q7, q7, q4
21291 ++ vshl.i64 q4, q4, #26
21292 ++ vadd.i64 q14, q7, q3
21293 ++ vadd.i64 q9, q9, q13
21294 ++ vshl.i64 q13, q13, #26
21295 ++ vadd.i64 q15, q9, q3
21296 ++ vsub.i64 q0, q0, q4
21297 ++ vshr.s64 q4, q14, #25
21298 ++ vsub.i64 q5, q5, q13
21299 ++ vshr.s64 q13, q15, #25
21300 ++ vadd.i64 q6, q6, q4
21301 ++ vshl.i64 q4, q4, #25
21302 ++ vadd.i64 q14, q6, q11
21303 ++ vadd.i64 q2, q2, q13
21304 ++ vsub.i64 q4, q7, q4
21305 ++ vshr.s64 q7, q14, #26
21306 ++ vshl.i64 q13, q13, #25
21307 ++ vadd.i64 q14, q2, q11
21308 ++ vadd.i64 q8, q8, q7
21309 ++ vshl.i64 q7, q7, #26
21310 ++ vadd.i64 q15, q8, q3
21311 ++ vsub.i64 q9, q9, q13
21312 ++ vshr.s64 q13, q14, #26
21313 ++ vsub.i64 q6, q6, q7
21314 ++ vshr.s64 q7, q15, #25
21315 ++ vadd.i64 q10, q10, q13
21316 ++ vshl.i64 q13, q13, #26
21317 ++ vadd.i64 q14, q10, q3
21318 ++ vadd.i64 q1, q1, q7
21319 ++ add r2, r3, #288
21320 ++ vshl.i64 q7, q7, #25
21321 ++ add r4, r3, #96
21322 ++ vadd.i64 q15, q1, q11
21323 ++ add r2, r2, #8
21324 ++ vsub.i64 q2, q2, q13
21325 ++ add r4, r4, #8
21326 ++ vshr.s64 q13, q14, #25
21327 ++ vsub.i64 q7, q8, q7
21328 ++ vshr.s64 q8, q15, #26
21329 ++ vadd.i64 q14, q13, q13
21330 ++ vadd.i64 q12, q12, q8
21331 ++ vtrn.32 d12, d14
21332 ++ vshl.i64 q8, q8, #26
21333 ++ vtrn.32 d13, d15
21334 ++ vadd.i64 q3, q12, q3
21335 ++ vadd.i64 q0, q0, q14
21336 ++ vst1.8 d12, [r2, : 64]!
21337 ++ vshl.i64 q7, q13, #4
21338 ++ vst1.8 d13, [r4, : 64]!
21339 ++ vsub.i64 q1, q1, q8
21340 ++ vshr.s64 q3, q3, #25
21341 ++ vadd.i64 q0, q0, q7
21342 ++ vadd.i64 q5, q5, q3
21343 ++ vshl.i64 q3, q3, #25
21344 ++ vadd.i64 q6, q5, q11
21345 ++ vadd.i64 q0, q0, q13
21346 ++ vshl.i64 q7, q13, #25
21347 ++ vadd.i64 q8, q0, q11
21348 ++ vsub.i64 q3, q12, q3
21349 ++ vshr.s64 q6, q6, #26
21350 ++ vsub.i64 q7, q10, q7
21351 ++ vtrn.32 d2, d6
21352 ++ vshr.s64 q8, q8, #26
21353 ++ vtrn.32 d3, d7
21354 ++ vadd.i64 q3, q9, q6
21355 ++ vst1.8 d2, [r2, : 64]
21356 ++ vshl.i64 q6, q6, #26
21357 ++ vst1.8 d3, [r4, : 64]
21358 ++ vadd.i64 q1, q4, q8
21359 ++ vtrn.32 d4, d14
21360 ++ vshl.i64 q4, q8, #26
21361 ++ vtrn.32 d5, d15
21362 ++ vsub.i64 q5, q5, q6
21363 ++ add r2, r2, #16
21364 ++ vsub.i64 q0, q0, q4
21365 ++ vst1.8 d4, [r2, : 64]
21366 ++ add r4, r4, #16
21367 ++ vst1.8 d5, [r4, : 64]
21368 ++ vtrn.32 d10, d6
21369 ++ vtrn.32 d11, d7
21370 ++ sub r2, r2, #8
21371 ++ sub r4, r4, #8
21372 ++ vtrn.32 d0, d2
21373 ++ vtrn.32 d1, d3
21374 ++ vst1.8 d10, [r2, : 64]
21375 ++ vst1.8 d11, [r4, : 64]
21376 ++ sub r2, r2, #24
21377 ++ sub r4, r4, #24
21378 ++ vst1.8 d0, [r2, : 64]
21379 ++ vst1.8 d1, [r4, : 64]
21380 ++ add r2, sp, #544
21381 ++ add r4, r3, #144
21382 ++ add r5, r3, #192
21383 ++ vld1.8 {d0-d1}, [r2, : 128]
21384 ++ vld1.8 {d2-d3}, [r4, : 128]!
21385 ++ vld1.8 {d4-d5}, [r5, : 128]!
21386 ++ vzip.i32 q1, q2
21387 ++ vld1.8 {d6-d7}, [r4, : 128]!
21388 ++ vld1.8 {d8-d9}, [r5, : 128]!
21389 ++ vshl.i32 q5, q1, #1
21390 ++ vzip.i32 q3, q4
21391 ++ vshl.i32 q6, q2, #1
21392 ++ vld1.8 {d14}, [r4, : 64]
21393 ++ vshl.i32 q8, q3, #1
21394 ++ vld1.8 {d15}, [r5, : 64]
21395 ++ vshl.i32 q9, q4, #1
21396 ++ vmul.i32 d21, d7, d1
21397 ++ vtrn.32 d14, d15
21398 ++ vmul.i32 q11, q4, q0
21399 ++ vmul.i32 q0, q7, q0
21400 ++ vmull.s32 q12, d2, d2
21401 ++ vmlal.s32 q12, d11, d1
21402 ++ vmlal.s32 q12, d12, d0
21403 ++ vmlal.s32 q12, d13, d23
21404 ++ vmlal.s32 q12, d16, d22
21405 ++ vmlal.s32 q12, d7, d21
21406 ++ vmull.s32 q10, d2, d11
21407 ++ vmlal.s32 q10, d4, d1
21408 ++ vmlal.s32 q10, d13, d0
21409 ++ vmlal.s32 q10, d6, d23
21410 ++ vmlal.s32 q10, d17, d22
21411 ++ vmull.s32 q13, d10, d4
21412 ++ vmlal.s32 q13, d11, d3
21413 ++ vmlal.s32 q13, d13, d1
21414 ++ vmlal.s32 q13, d16, d0
21415 ++ vmlal.s32 q13, d17, d23
21416 ++ vmlal.s32 q13, d8, d22
21417 ++ vmull.s32 q1, d10, d5
21418 ++ vmlal.s32 q1, d11, d4
21419 ++ vmlal.s32 q1, d6, d1
21420 ++ vmlal.s32 q1, d17, d0
21421 ++ vmlal.s32 q1, d8, d23
21422 ++ vmull.s32 q14, d10, d6
21423 ++ vmlal.s32 q14, d11, d13
21424 ++ vmlal.s32 q14, d4, d4
21425 ++ vmlal.s32 q14, d17, d1
21426 ++ vmlal.s32 q14, d18, d0
21427 ++ vmlal.s32 q14, d9, d23
21428 ++ vmull.s32 q11, d10, d7
21429 ++ vmlal.s32 q11, d11, d6
21430 ++ vmlal.s32 q11, d12, d5
21431 ++ vmlal.s32 q11, d8, d1
21432 ++ vmlal.s32 q11, d19, d0
21433 ++ vmull.s32 q15, d10, d8
21434 ++ vmlal.s32 q15, d11, d17
21435 ++ vmlal.s32 q15, d12, d6
21436 ++ vmlal.s32 q15, d13, d5
21437 ++ vmlal.s32 q15, d19, d1
21438 ++ vmlal.s32 q15, d14, d0
21439 ++ vmull.s32 q2, d10, d9
21440 ++ vmlal.s32 q2, d11, d8
21441 ++ vmlal.s32 q2, d12, d7
21442 ++ vmlal.s32 q2, d13, d6
21443 ++ vmlal.s32 q2, d14, d1
21444 ++ vmull.s32 q0, d15, d1
21445 ++ vmlal.s32 q0, d10, d14
21446 ++ vmlal.s32 q0, d11, d19
21447 ++ vmlal.s32 q0, d12, d8
21448 ++ vmlal.s32 q0, d13, d17
21449 ++ vmlal.s32 q0, d6, d6
21450 ++ add r2, sp, #512
21451 ++ vld1.8 {d18-d19}, [r2, : 128]
21452 ++ vmull.s32 q3, d16, d7
21453 ++ vmlal.s32 q3, d10, d15
21454 ++ vmlal.s32 q3, d11, d14
21455 ++ vmlal.s32 q3, d12, d9
21456 ++ vmlal.s32 q3, d13, d8
21457 ++ add r2, sp, #528
21458 ++ vld1.8 {d8-d9}, [r2, : 128]
21459 ++ vadd.i64 q5, q12, q9
21460 ++ vadd.i64 q6, q15, q9
21461 ++ vshr.s64 q5, q5, #26
21462 ++ vshr.s64 q6, q6, #26
21463 ++ vadd.i64 q7, q10, q5
21464 ++ vshl.i64 q5, q5, #26
21465 ++ vadd.i64 q8, q7, q4
21466 ++ vadd.i64 q2, q2, q6
21467 ++ vshl.i64 q6, q6, #26
21468 ++ vadd.i64 q10, q2, q4
21469 ++ vsub.i64 q5, q12, q5
21470 ++ vshr.s64 q8, q8, #25
21471 ++ vsub.i64 q6, q15, q6
21472 ++ vshr.s64 q10, q10, #25
21473 ++ vadd.i64 q12, q13, q8
21474 ++ vshl.i64 q8, q8, #25
21475 ++ vadd.i64 q13, q12, q9
21476 ++ vadd.i64 q0, q0, q10
21477 ++ vsub.i64 q7, q7, q8
21478 ++ vshr.s64 q8, q13, #26
21479 ++ vshl.i64 q10, q10, #25
21480 ++ vadd.i64 q13, q0, q9
21481 ++ vadd.i64 q1, q1, q8
21482 ++ vshl.i64 q8, q8, #26
21483 ++ vadd.i64 q15, q1, q4
21484 ++ vsub.i64 q2, q2, q10
21485 ++ vshr.s64 q10, q13, #26
21486 ++ vsub.i64 q8, q12, q8
21487 ++ vshr.s64 q12, q15, #25
21488 ++ vadd.i64 q3, q3, q10
21489 ++ vshl.i64 q10, q10, #26
21490 ++ vadd.i64 q13, q3, q4
21491 ++ vadd.i64 q14, q14, q12
21492 ++ add r2, r3, #144
21493 ++ vshl.i64 q12, q12, #25
21494 ++ add r4, r3, #192
21495 ++ vadd.i64 q15, q14, q9
21496 ++ add r2, r2, #8
21497 ++ vsub.i64 q0, q0, q10
21498 ++ add r4, r4, #8
21499 ++ vshr.s64 q10, q13, #25
21500 ++ vsub.i64 q1, q1, q12
21501 ++ vshr.s64 q12, q15, #26
21502 ++ vadd.i64 q13, q10, q10
21503 ++ vadd.i64 q11, q11, q12
21504 ++ vtrn.32 d16, d2
21505 ++ vshl.i64 q12, q12, #26
21506 ++ vtrn.32 d17, d3
21507 ++ vadd.i64 q1, q11, q4
21508 ++ vadd.i64 q4, q5, q13
21509 ++ vst1.8 d16, [r2, : 64]!
21510 ++ vshl.i64 q5, q10, #4
21511 ++ vst1.8 d17, [r4, : 64]!
21512 ++ vsub.i64 q8, q14, q12
21513 ++ vshr.s64 q1, q1, #25
21514 ++ vadd.i64 q4, q4, q5
21515 ++ vadd.i64 q5, q6, q1
21516 ++ vshl.i64 q1, q1, #25
21517 ++ vadd.i64 q6, q5, q9
21518 ++ vadd.i64 q4, q4, q10
21519 ++ vshl.i64 q10, q10, #25
21520 ++ vadd.i64 q9, q4, q9
21521 ++ vsub.i64 q1, q11, q1
21522 ++ vshr.s64 q6, q6, #26
21523 ++ vsub.i64 q3, q3, q10
21524 ++ vtrn.32 d16, d2
21525 ++ vshr.s64 q9, q9, #26
21526 ++ vtrn.32 d17, d3
21527 ++ vadd.i64 q1, q2, q6
21528 ++ vst1.8 d16, [r2, : 64]
21529 ++ vshl.i64 q2, q6, #26
21530 ++ vst1.8 d17, [r4, : 64]
21531 ++ vadd.i64 q6, q7, q9
21532 ++ vtrn.32 d0, d6
21533 ++ vshl.i64 q7, q9, #26
21534 ++ vtrn.32 d1, d7
21535 ++ vsub.i64 q2, q5, q2
21536 ++ add r2, r2, #16
21537 ++ vsub.i64 q3, q4, q7
21538 ++ vst1.8 d0, [r2, : 64]
21539 ++ add r4, r4, #16
21540 ++ vst1.8 d1, [r4, : 64]
21541 ++ vtrn.32 d4, d2
21542 ++ vtrn.32 d5, d3
21543 ++ sub r2, r2, #8
21544 ++ sub r4, r4, #8
21545 ++ vtrn.32 d6, d12
21546 ++ vtrn.32 d7, d13
21547 ++ vst1.8 d4, [r2, : 64]
21548 ++ vst1.8 d5, [r4, : 64]
21549 ++ sub r2, r2, #24
21550 ++ sub r4, r4, #24
21551 ++ vst1.8 d6, [r2, : 64]
21552 ++ vst1.8 d7, [r4, : 64]
21553 ++ add r2, r3, #336
21554 ++ add r4, r3, #288
21555 ++ vld1.8 {d0-d1}, [r2, : 128]!
21556 ++ vld1.8 {d2-d3}, [r4, : 128]!
21557 ++ vadd.i32 q0, q0, q1
21558 ++ vld1.8 {d2-d3}, [r2, : 128]!
21559 ++ vld1.8 {d4-d5}, [r4, : 128]!
21560 ++ vadd.i32 q1, q1, q2
21561 ++ add r5, r3, #288
21562 ++ vld1.8 {d4}, [r2, : 64]
21563 ++ vld1.8 {d6}, [r4, : 64]
21564 ++ vadd.i32 q2, q2, q3
21565 ++ vst1.8 {d0-d1}, [r5, : 128]!
21566 ++ vst1.8 {d2-d3}, [r5, : 128]!
21567 ++ vst1.8 d4, [r5, : 64]
21568 ++ add r2, r3, #48
21569 ++ add r4, r3, #144
21570 ++ vld1.8 {d0-d1}, [r4, : 128]!
21571 ++ vld1.8 {d2-d3}, [r4, : 128]!
21572 ++ vld1.8 {d4}, [r4, : 64]
21573 ++ add r4, r3, #288
21574 ++ vld1.8 {d6-d7}, [r4, : 128]!
21575 ++ vtrn.32 q0, q3
21576 ++ vld1.8 {d8-d9}, [r4, : 128]!
21577 ++ vshl.i32 q5, q0, #4
21578 ++ vtrn.32 q1, q4
21579 ++ vshl.i32 q6, q3, #4
21580 ++ vadd.i32 q5, q5, q0
21581 ++ vadd.i32 q6, q6, q3
21582 ++ vshl.i32 q7, q1, #4
21583 ++ vld1.8 {d5}, [r4, : 64]
21584 ++ vshl.i32 q8, q4, #4
21585 ++ vtrn.32 d4, d5
21586 ++ vadd.i32 q7, q7, q1
21587 ++ vadd.i32 q8, q8, q4
21588 ++ vld1.8 {d18-d19}, [r2, : 128]!
21589 ++ vshl.i32 q10, q2, #4
21590 ++ vld1.8 {d22-d23}, [r2, : 128]!
21591 ++ vadd.i32 q10, q10, q2
21592 ++ vld1.8 {d24}, [r2, : 64]
21593 ++ vadd.i32 q5, q5, q0
21594 ++ add r2, r3, #240
21595 ++ vld1.8 {d26-d27}, [r2, : 128]!
21596 ++ vadd.i32 q6, q6, q3
21597 ++ vld1.8 {d28-d29}, [r2, : 128]!
21598 ++ vadd.i32 q8, q8, q4
21599 ++ vld1.8 {d25}, [r2, : 64]
21600 ++ vadd.i32 q10, q10, q2
21601 ++ vtrn.32 q9, q13
21602 ++ vadd.i32 q7, q7, q1
21603 ++ vadd.i32 q5, q5, q0
21604 ++ vtrn.32 q11, q14
21605 ++ vadd.i32 q6, q6, q3
21606 ++ add r2, sp, #560
21607 ++ vadd.i32 q10, q10, q2
21608 ++ vtrn.32 d24, d25
21609 ++ vst1.8 {d12-d13}, [r2, : 128]
21610 ++ vshl.i32 q6, q13, #1
21611 ++ add r2, sp, #576
21612 ++ vst1.8 {d20-d21}, [r2, : 128]
21613 ++ vshl.i32 q10, q14, #1
21614 ++ add r2, sp, #592
21615 ++ vst1.8 {d12-d13}, [r2, : 128]
21616 ++ vshl.i32 q15, q12, #1
21617 ++ vadd.i32 q8, q8, q4
21618 ++ vext.32 d10, d31, d30, #0
21619 ++ vadd.i32 q7, q7, q1
21620 ++ add r2, sp, #608
21621 ++ vst1.8 {d16-d17}, [r2, : 128]
21622 ++ vmull.s32 q8, d18, d5
21623 ++ vmlal.s32 q8, d26, d4
21624 ++ vmlal.s32 q8, d19, d9
21625 ++ vmlal.s32 q8, d27, d3
21626 ++ vmlal.s32 q8, d22, d8
21627 ++ vmlal.s32 q8, d28, d2
21628 ++ vmlal.s32 q8, d23, d7
21629 ++ vmlal.s32 q8, d29, d1
21630 ++ vmlal.s32 q8, d24, d6
21631 ++ vmlal.s32 q8, d25, d0
21632 ++ add r2, sp, #624
21633 ++ vst1.8 {d14-d15}, [r2, : 128]
21634 ++ vmull.s32 q2, d18, d4
21635 ++ vmlal.s32 q2, d12, d9
21636 ++ vmlal.s32 q2, d13, d8
21637 ++ vmlal.s32 q2, d19, d3
21638 ++ vmlal.s32 q2, d22, d2
21639 ++ vmlal.s32 q2, d23, d1
21640 ++ vmlal.s32 q2, d24, d0
21641 ++ add r2, sp, #640
21642 ++ vst1.8 {d20-d21}, [r2, : 128]
21643 ++ vmull.s32 q7, d18, d9
21644 ++ vmlal.s32 q7, d26, d3
21645 ++ vmlal.s32 q7, d19, d8
21646 ++ vmlal.s32 q7, d27, d2
21647 ++ vmlal.s32 q7, d22, d7
21648 ++ vmlal.s32 q7, d28, d1
21649 ++ vmlal.s32 q7, d23, d6
21650 ++ vmlal.s32 q7, d29, d0
21651 ++ add r2, sp, #656
21652 ++ vst1.8 {d10-d11}, [r2, : 128]
21653 ++ vmull.s32 q5, d18, d3
21654 ++ vmlal.s32 q5, d19, d2
21655 ++ vmlal.s32 q5, d22, d1
21656 ++ vmlal.s32 q5, d23, d0
21657 ++ vmlal.s32 q5, d12, d8
21658 ++ add r2, sp, #672
21659 ++ vst1.8 {d16-d17}, [r2, : 128]
21660 ++ vmull.s32 q4, d18, d8
21661 ++ vmlal.s32 q4, d26, d2
21662 ++ vmlal.s32 q4, d19, d7
21663 ++ vmlal.s32 q4, d27, d1
21664 ++ vmlal.s32 q4, d22, d6
21665 ++ vmlal.s32 q4, d28, d0
21666 ++ vmull.s32 q8, d18, d7
21667 ++ vmlal.s32 q8, d26, d1
21668 ++ vmlal.s32 q8, d19, d6
21669 ++ vmlal.s32 q8, d27, d0
21670 ++ add r2, sp, #576
21671 ++ vld1.8 {d20-d21}, [r2, : 128]
21672 ++ vmlal.s32 q7, d24, d21
21673 ++ vmlal.s32 q7, d25, d20
21674 ++ vmlal.s32 q4, d23, d21
21675 ++ vmlal.s32 q4, d29, d20
21676 ++ vmlal.s32 q8, d22, d21
21677 ++ vmlal.s32 q8, d28, d20
21678 ++ vmlal.s32 q5, d24, d20
21679 ++ add r2, sp, #576
21680 ++ vst1.8 {d14-d15}, [r2, : 128]
21681 ++ vmull.s32 q7, d18, d6
21682 ++ vmlal.s32 q7, d26, d0
21683 ++ add r2, sp, #656
21684 ++ vld1.8 {d30-d31}, [r2, : 128]
21685 ++ vmlal.s32 q2, d30, d21
21686 ++ vmlal.s32 q7, d19, d21
21687 ++ vmlal.s32 q7, d27, d20
21688 ++ add r2, sp, #624
21689 ++ vld1.8 {d26-d27}, [r2, : 128]
21690 ++ vmlal.s32 q4, d25, d27
21691 ++ vmlal.s32 q8, d29, d27
21692 ++ vmlal.s32 q8, d25, d26
21693 ++ vmlal.s32 q7, d28, d27
21694 ++ vmlal.s32 q7, d29, d26
21695 ++ add r2, sp, #608
21696 ++ vld1.8 {d28-d29}, [r2, : 128]
21697 ++ vmlal.s32 q4, d24, d29
21698 ++ vmlal.s32 q8, d23, d29
21699 ++ vmlal.s32 q8, d24, d28
21700 ++ vmlal.s32 q7, d22, d29
21701 ++ vmlal.s32 q7, d23, d28
21702 ++ add r2, sp, #608
21703 ++ vst1.8 {d8-d9}, [r2, : 128]
21704 ++ add r2, sp, #560
21705 ++ vld1.8 {d8-d9}, [r2, : 128]
21706 ++ vmlal.s32 q7, d24, d9
21707 ++ vmlal.s32 q7, d25, d31
21708 ++ vmull.s32 q1, d18, d2
21709 ++ vmlal.s32 q1, d19, d1
21710 ++ vmlal.s32 q1, d22, d0
21711 ++ vmlal.s32 q1, d24, d27
21712 ++ vmlal.s32 q1, d23, d20
21713 ++ vmlal.s32 q1, d12, d7
21714 ++ vmlal.s32 q1, d13, d6
21715 ++ vmull.s32 q6, d18, d1
21716 ++ vmlal.s32 q6, d19, d0
21717 ++ vmlal.s32 q6, d23, d27
21718 ++ vmlal.s32 q6, d22, d20
21719 ++ vmlal.s32 q6, d24, d26
21720 ++ vmull.s32 q0, d18, d0
21721 ++ vmlal.s32 q0, d22, d27
21722 ++ vmlal.s32 q0, d23, d26
21723 ++ vmlal.s32 q0, d24, d31
21724 ++ vmlal.s32 q0, d19, d20
21725 ++ add r2, sp, #640
21726 ++ vld1.8 {d18-d19}, [r2, : 128]
21727 ++ vmlal.s32 q2, d18, d7
21728 ++ vmlal.s32 q2, d19, d6
21729 ++ vmlal.s32 q5, d18, d6
21730 ++ vmlal.s32 q5, d19, d21
21731 ++ vmlal.s32 q1, d18, d21
21732 ++ vmlal.s32 q1, d19, d29
21733 ++ vmlal.s32 q0, d18, d28
21734 ++ vmlal.s32 q0, d19, d9
21735 ++ vmlal.s32 q6, d18, d29
21736 ++ vmlal.s32 q6, d19, d28
21737 ++ add r2, sp, #592
21738 ++ vld1.8 {d18-d19}, [r2, : 128]
21739 ++ add r2, sp, #512
21740 ++ vld1.8 {d22-d23}, [r2, : 128]
21741 ++ vmlal.s32 q5, d19, d7
21742 ++ vmlal.s32 q0, d18, d21
21743 ++ vmlal.s32 q0, d19, d29
21744 ++ vmlal.s32 q6, d18, d6
21745 ++ add r2, sp, #528
21746 ++ vld1.8 {d6-d7}, [r2, : 128]
21747 ++ vmlal.s32 q6, d19, d21
21748 ++ add r2, sp, #576
21749 ++ vld1.8 {d18-d19}, [r2, : 128]
21750 ++ vmlal.s32 q0, d30, d8
21751 ++ add r2, sp, #672
21752 ++ vld1.8 {d20-d21}, [r2, : 128]
21753 ++ vmlal.s32 q5, d30, d29
21754 ++ add r2, sp, #608
21755 ++ vld1.8 {d24-d25}, [r2, : 128]
21756 ++ vmlal.s32 q1, d30, d28
21757 ++ vadd.i64 q13, q0, q11
21758 ++ vadd.i64 q14, q5, q11
21759 ++ vmlal.s32 q6, d30, d9
21760 ++ vshr.s64 q4, q13, #26
21761 ++ vshr.s64 q13, q14, #26
21762 ++ vadd.i64 q7, q7, q4
21763 ++ vshl.i64 q4, q4, #26
21764 ++ vadd.i64 q14, q7, q3
21765 ++ vadd.i64 q9, q9, q13
21766 ++ vshl.i64 q13, q13, #26
21767 ++ vadd.i64 q15, q9, q3
21768 ++ vsub.i64 q0, q0, q4
21769 ++ vshr.s64 q4, q14, #25
21770 ++ vsub.i64 q5, q5, q13
21771 ++ vshr.s64 q13, q15, #25
21772 ++ vadd.i64 q6, q6, q4
21773 ++ vshl.i64 q4, q4, #25
21774 ++ vadd.i64 q14, q6, q11
21775 ++ vadd.i64 q2, q2, q13
21776 ++ vsub.i64 q4, q7, q4
21777 ++ vshr.s64 q7, q14, #26
21778 ++ vshl.i64 q13, q13, #25
21779 ++ vadd.i64 q14, q2, q11
21780 ++ vadd.i64 q8, q8, q7
21781 ++ vshl.i64 q7, q7, #26
21782 ++ vadd.i64 q15, q8, q3
21783 ++ vsub.i64 q9, q9, q13
21784 ++ vshr.s64 q13, q14, #26
21785 ++ vsub.i64 q6, q6, q7
21786 ++ vshr.s64 q7, q15, #25
21787 ++ vadd.i64 q10, q10, q13
21788 ++ vshl.i64 q13, q13, #26
21789 ++ vadd.i64 q14, q10, q3
21790 ++ vadd.i64 q1, q1, q7
21791 ++ add r2, r3, #240
21792 ++ vshl.i64 q7, q7, #25
21793 ++ add r4, r3, #144
21794 ++ vadd.i64 q15, q1, q11
21795 ++ add r2, r2, #8
21796 ++ vsub.i64 q2, q2, q13
21797 ++ add r4, r4, #8
21798 ++ vshr.s64 q13, q14, #25
21799 ++ vsub.i64 q7, q8, q7
21800 ++ vshr.s64 q8, q15, #26
21801 ++ vadd.i64 q14, q13, q13
21802 ++ vadd.i64 q12, q12, q8
21803 ++ vtrn.32 d12, d14
21804 ++ vshl.i64 q8, q8, #26
21805 ++ vtrn.32 d13, d15
21806 ++ vadd.i64 q3, q12, q3
21807 ++ vadd.i64 q0, q0, q14
21808 ++ vst1.8 d12, [r2, : 64]!
21809 ++ vshl.i64 q7, q13, #4
21810 ++ vst1.8 d13, [r4, : 64]!
21811 ++ vsub.i64 q1, q1, q8
21812 ++ vshr.s64 q3, q3, #25
21813 ++ vadd.i64 q0, q0, q7
21814 ++ vadd.i64 q5, q5, q3
21815 ++ vshl.i64 q3, q3, #25
21816 ++ vadd.i64 q6, q5, q11
21817 ++ vadd.i64 q0, q0, q13
21818 ++ vshl.i64 q7, q13, #25
21819 ++ vadd.i64 q8, q0, q11
21820 ++ vsub.i64 q3, q12, q3
21821 ++ vshr.s64 q6, q6, #26
21822 ++ vsub.i64 q7, q10, q7
21823 ++ vtrn.32 d2, d6
21824 ++ vshr.s64 q8, q8, #26
21825 ++ vtrn.32 d3, d7
21826 ++ vadd.i64 q3, q9, q6
21827 ++ vst1.8 d2, [r2, : 64]
21828 ++ vshl.i64 q6, q6, #26
21829 ++ vst1.8 d3, [r4, : 64]
21830 ++ vadd.i64 q1, q4, q8
21831 ++ vtrn.32 d4, d14
21832 ++ vshl.i64 q4, q8, #26
21833 ++ vtrn.32 d5, d15
21834 ++ vsub.i64 q5, q5, q6
21835 ++ add r2, r2, #16
21836 ++ vsub.i64 q0, q0, q4
21837 ++ vst1.8 d4, [r2, : 64]
21838 ++ add r4, r4, #16
21839 ++ vst1.8 d5, [r4, : 64]
21840 ++ vtrn.32 d10, d6
21841 ++ vtrn.32 d11, d7
21842 ++ sub r2, r2, #8
21843 ++ sub r4, r4, #8
21844 ++ vtrn.32 d0, d2
21845 ++ vtrn.32 d1, d3
21846 ++ vst1.8 d10, [r2, : 64]
21847 ++ vst1.8 d11, [r4, : 64]
21848 ++ sub r2, r2, #24
21849 ++ sub r4, r4, #24
21850 ++ vst1.8 d0, [r2, : 64]
21851 ++ vst1.8 d1, [r4, : 64]
21852 ++ ldr r2, [sp, #488]
21853 ++ ldr r4, [sp, #492]
21854 ++ subs r5, r2, #1
21855 ++ bge ._mainloop
21856 ++ add r1, r3, #144
21857 ++ add r2, r3, #336
21858 ++ vld1.8 {d0-d1}, [r1, : 128]!
21859 ++ vld1.8 {d2-d3}, [r1, : 128]!
21860 ++ vld1.8 {d4}, [r1, : 64]
21861 ++ vst1.8 {d0-d1}, [r2, : 128]!
21862 ++ vst1.8 {d2-d3}, [r2, : 128]!
21863 ++ vst1.8 d4, [r2, : 64]
21864 ++ ldr r1, =0
21865 ++._invertloop:
21866 ++ add r2, r3, #144
21867 ++ ldr r4, =0
21868 ++ ldr r5, =2
21869 ++ cmp r1, #1
21870 ++ ldreq r5, =1
21871 ++ addeq r2, r3, #336
21872 ++ addeq r4, r3, #48
21873 ++ cmp r1, #2
21874 ++ ldreq r5, =1
21875 ++ addeq r2, r3, #48
21876 ++ cmp r1, #3
21877 ++ ldreq r5, =5
21878 ++ addeq r4, r3, #336
21879 ++ cmp r1, #4
21880 ++ ldreq r5, =10
21881 ++ cmp r1, #5
21882 ++ ldreq r5, =20
21883 ++ cmp r1, #6
21884 ++ ldreq r5, =10
21885 ++ addeq r2, r3, #336
21886 ++ addeq r4, r3, #336
21887 ++ cmp r1, #7
21888 ++ ldreq r5, =50
21889 ++ cmp r1, #8
21890 ++ ldreq r5, =100
21891 ++ cmp r1, #9
21892 ++ ldreq r5, =50
21893 ++ addeq r2, r3, #336
21894 ++ cmp r1, #10
21895 ++ ldreq r5, =5
21896 ++ addeq r2, r3, #48
21897 ++ cmp r1, #11
21898 ++ ldreq r5, =0
21899 ++ addeq r2, r3, #96
21900 ++ add r6, r3, #144
21901 ++ add r7, r3, #288
21902 ++ vld1.8 {d0-d1}, [r6, : 128]!
21903 ++ vld1.8 {d2-d3}, [r6, : 128]!
21904 ++ vld1.8 {d4}, [r6, : 64]
21905 ++ vst1.8 {d0-d1}, [r7, : 128]!
21906 ++ vst1.8 {d2-d3}, [r7, : 128]!
21907 ++ vst1.8 d4, [r7, : 64]
21908 ++ cmp r5, #0
21909 ++ beq ._skipsquaringloop
21910 ++._squaringloop:
21911 ++ add r6, r3, #288
21912 ++ add r7, r3, #288
21913 ++ add r8, r3, #288
21914 ++ vmov.i32 q0, #19
21915 ++ vmov.i32 q1, #0
21916 ++ vmov.i32 q2, #1
21917 ++ vzip.i32 q1, q2
21918 ++ vld1.8 {d4-d5}, [r7, : 128]!
21919 ++ vld1.8 {d6-d7}, [r7, : 128]!
21920 ++ vld1.8 {d9}, [r7, : 64]
21921 ++ vld1.8 {d10-d11}, [r6, : 128]!
21922 ++ add r7, sp, #416
21923 ++ vld1.8 {d12-d13}, [r6, : 128]!
21924 ++ vmul.i32 q7, q2, q0
21925 ++ vld1.8 {d8}, [r6, : 64]
21926 ++ vext.32 d17, d11, d10, #1
21927 ++ vmul.i32 q9, q3, q0
21928 ++ vext.32 d16, d10, d8, #1
21929 ++ vshl.u32 q10, q5, q1
21930 ++ vext.32 d22, d14, d4, #1
21931 ++ vext.32 d24, d18, d6, #1
21932 ++ vshl.u32 q13, q6, q1
21933 ++ vshl.u32 d28, d8, d2
21934 ++ vrev64.i32 d22, d22
21935 ++ vmul.i32 d1, d9, d1
21936 ++ vrev64.i32 d24, d24
21937 ++ vext.32 d29, d8, d13, #1
21938 ++ vext.32 d0, d1, d9, #1
21939 ++ vrev64.i32 d0, d0
21940 ++ vext.32 d2, d9, d1, #1
21941 ++ vext.32 d23, d15, d5, #1
21942 ++ vmull.s32 q4, d20, d4
21943 ++ vrev64.i32 d23, d23
21944 ++ vmlal.s32 q4, d21, d1
21945 ++ vrev64.i32 d2, d2
21946 ++ vmlal.s32 q4, d26, d19
21947 ++ vext.32 d3, d5, d15, #1
21948 ++ vmlal.s32 q4, d27, d18
21949 ++ vrev64.i32 d3, d3
21950 ++ vmlal.s32 q4, d28, d15
21951 ++ vext.32 d14, d12, d11, #1
21952 ++ vmull.s32 q5, d16, d23
21953 ++ vext.32 d15, d13, d12, #1
21954 ++ vmlal.s32 q5, d17, d4
21955 ++ vst1.8 d8, [r7, : 64]!
21956 ++ vmlal.s32 q5, d14, d1
21957 ++ vext.32 d12, d9, d8, #0
21958 ++ vmlal.s32 q5, d15, d19
21959 ++ vmov.i64 d13, #0
21960 ++ vmlal.s32 q5, d29, d18
21961 ++ vext.32 d25, d19, d7, #1
21962 ++ vmlal.s32 q6, d20, d5
21963 ++ vrev64.i32 d25, d25
21964 ++ vmlal.s32 q6, d21, d4
21965 ++ vst1.8 d11, [r7, : 64]!
21966 ++ vmlal.s32 q6, d26, d1
21967 ++ vext.32 d9, d10, d10, #0
21968 ++ vmlal.s32 q6, d27, d19
21969 ++ vmov.i64 d8, #0
21970 ++ vmlal.s32 q6, d28, d18
21971 ++ vmlal.s32 q4, d16, d24
21972 ++ vmlal.s32 q4, d17, d5
21973 ++ vmlal.s32 q4, d14, d4
21974 ++ vst1.8 d12, [r7, : 64]!
21975 ++ vmlal.s32 q4, d15, d1
21976 ++ vext.32 d10, d13, d12, #0
21977 ++ vmlal.s32 q4, d29, d19
21978 ++ vmov.i64 d11, #0
21979 ++ vmlal.s32 q5, d20, d6
21980 ++ vmlal.s32 q5, d21, d5
21981 ++ vmlal.s32 q5, d26, d4
21982 ++ vext.32 d13, d8, d8, #0
21983 ++ vmlal.s32 q5, d27, d1
21984 ++ vmov.i64 d12, #0
21985 ++ vmlal.s32 q5, d28, d19
21986 ++ vst1.8 d9, [r7, : 64]!
21987 ++ vmlal.s32 q6, d16, d25
21988 ++ vmlal.s32 q6, d17, d6
21989 ++ vst1.8 d10, [r7, : 64]
21990 ++ vmlal.s32 q6, d14, d5
21991 ++ vext.32 d8, d11, d10, #0
21992 ++ vmlal.s32 q6, d15, d4
21993 ++ vmov.i64 d9, #0
21994 ++ vmlal.s32 q6, d29, d1
21995 ++ vmlal.s32 q4, d20, d7
21996 ++ vmlal.s32 q4, d21, d6
21997 ++ vmlal.s32 q4, d26, d5
21998 ++ vext.32 d11, d12, d12, #0
21999 ++ vmlal.s32 q4, d27, d4
22000 ++ vmov.i64 d10, #0
22001 ++ vmlal.s32 q4, d28, d1
22002 ++ vmlal.s32 q5, d16, d0
22003 ++ sub r6, r7, #32
22004 ++ vmlal.s32 q5, d17, d7
22005 ++ vmlal.s32 q5, d14, d6
22006 ++ vext.32 d30, d9, d8, #0
22007 ++ vmlal.s32 q5, d15, d5
22008 ++ vld1.8 {d31}, [r6, : 64]!
22009 ++ vmlal.s32 q5, d29, d4
22010 ++ vmlal.s32 q15, d20, d0
22011 ++ vext.32 d0, d6, d18, #1
22012 ++ vmlal.s32 q15, d21, d25
22013 ++ vrev64.i32 d0, d0
22014 ++ vmlal.s32 q15, d26, d24
22015 ++ vext.32 d1, d7, d19, #1
22016 ++ vext.32 d7, d10, d10, #0
22017 ++ vmlal.s32 q15, d27, d23
22018 ++ vrev64.i32 d1, d1
22019 ++ vld1.8 {d6}, [r6, : 64]
22020 ++ vmlal.s32 q15, d28, d22
22021 ++ vmlal.s32 q3, d16, d4
22022 ++ add r6, r6, #24
22023 ++ vmlal.s32 q3, d17, d2
22024 ++ vext.32 d4, d31, d30, #0
22025 ++ vmov d17, d11
22026 ++ vmlal.s32 q3, d14, d1
22027 ++ vext.32 d11, d13, d13, #0
22028 ++ vext.32 d13, d30, d30, #0
22029 ++ vmlal.s32 q3, d15, d0
22030 ++ vext.32 d1, d8, d8, #0
22031 ++ vmlal.s32 q3, d29, d3
22032 ++ vld1.8 {d5}, [r6, : 64]
22033 ++ sub r6, r6, #16
22034 ++ vext.32 d10, d6, d6, #0
22035 ++ vmov.i32 q1, #0xffffffff
22036 ++ vshl.i64 q4, q1, #25
22037 ++ add r7, sp, #512
22038 ++ vld1.8 {d14-d15}, [r7, : 128]
22039 ++ vadd.i64 q9, q2, q7
22040 ++ vshl.i64 q1, q1, #26
22041 ++ vshr.s64 q10, q9, #26
22042 ++ vld1.8 {d0}, [r6, : 64]!
22043 ++ vadd.i64 q5, q5, q10
22044 ++ vand q9, q9, q1
22045 ++ vld1.8 {d16}, [r6, : 64]!
22046 ++ add r6, sp, #528
22047 ++ vld1.8 {d20-d21}, [r6, : 128]
22048 ++ vadd.i64 q11, q5, q10
22049 ++ vsub.i64 q2, q2, q9
22050 ++ vshr.s64 q9, q11, #25
22051 ++ vext.32 d12, d5, d4, #0
22052 ++ vand q11, q11, q4
22053 ++ vadd.i64 q0, q0, q9
22054 ++ vmov d19, d7
22055 ++ vadd.i64 q3, q0, q7
22056 ++ vsub.i64 q5, q5, q11
22057 ++ vshr.s64 q11, q3, #26
22058 ++ vext.32 d18, d11, d10, #0
22059 ++ vand q3, q3, q1
22060 ++ vadd.i64 q8, q8, q11
22061 ++ vadd.i64 q11, q8, q10
22062 ++ vsub.i64 q0, q0, q3
22063 ++ vshr.s64 q3, q11, #25
22064 ++ vand q11, q11, q4
22065 ++ vadd.i64 q3, q6, q3
22066 ++ vadd.i64 q6, q3, q7
22067 ++ vsub.i64 q8, q8, q11
22068 ++ vshr.s64 q11, q6, #26
22069 ++ vand q6, q6, q1
22070 ++ vadd.i64 q9, q9, q11
22071 ++ vadd.i64 d25, d19, d21
22072 ++ vsub.i64 q3, q3, q6
22073 ++ vshr.s64 d23, d25, #25
22074 ++ vand q4, q12, q4
22075 ++ vadd.i64 d21, d23, d23
22076 ++ vshl.i64 d25, d23, #4
22077 ++ vadd.i64 d21, d21, d23
22078 ++ vadd.i64 d25, d25, d21
22079 ++ vadd.i64 d4, d4, d25
22080 ++ vzip.i32 q0, q8
22081 ++ vadd.i64 d12, d4, d14
22082 ++ add r6, r8, #8
22083 ++ vst1.8 d0, [r6, : 64]
22084 ++ vsub.i64 d19, d19, d9
22085 ++ add r6, r6, #16
22086 ++ vst1.8 d16, [r6, : 64]
22087 ++ vshr.s64 d22, d12, #26
22088 ++ vand q0, q6, q1
22089 ++ vadd.i64 d10, d10, d22
22090 ++ vzip.i32 q3, q9
22091 ++ vsub.i64 d4, d4, d0
22092 ++ sub r6, r6, #8
22093 ++ vst1.8 d6, [r6, : 64]
22094 ++ add r6, r6, #16
22095 ++ vst1.8 d18, [r6, : 64]
22096 ++ vzip.i32 q2, q5
22097 ++ sub r6, r6, #32
22098 ++ vst1.8 d4, [r6, : 64]
22099 ++ subs r5, r5, #1
22100 ++ bhi ._squaringloop
22101 ++._skipsquaringloop:
22102 ++ mov r2, r2
22103 ++ add r5, r3, #288
22104 ++ add r6, r3, #144
22105 ++ vmov.i32 q0, #19
22106 ++ vmov.i32 q1, #0
22107 ++ vmov.i32 q2, #1
22108 ++ vzip.i32 q1, q2
22109 ++ vld1.8 {d4-d5}, [r5, : 128]!
22110 ++ vld1.8 {d6-d7}, [r5, : 128]!
22111 ++ vld1.8 {d9}, [r5, : 64]
22112 ++ vld1.8 {d10-d11}, [r2, : 128]!
22113 ++ add r5, sp, #416
22114 ++ vld1.8 {d12-d13}, [r2, : 128]!
22115 ++ vmul.i32 q7, q2, q0
22116 ++ vld1.8 {d8}, [r2, : 64]
22117 ++ vext.32 d17, d11, d10, #1
22118 ++ vmul.i32 q9, q3, q0
22119 ++ vext.32 d16, d10, d8, #1
22120 ++ vshl.u32 q10, q5, q1
22121 ++ vext.32 d22, d14, d4, #1
22122 ++ vext.32 d24, d18, d6, #1
22123 ++ vshl.u32 q13, q6, q1
22124 ++ vshl.u32 d28, d8, d2
22125 ++ vrev64.i32 d22, d22
22126 ++ vmul.i32 d1, d9, d1
22127 ++ vrev64.i32 d24, d24
22128 ++ vext.32 d29, d8, d13, #1
22129 ++ vext.32 d0, d1, d9, #1
22130 ++ vrev64.i32 d0, d0
22131 ++ vext.32 d2, d9, d1, #1
22132 ++ vext.32 d23, d15, d5, #1
22133 ++ vmull.s32 q4, d20, d4
22134 ++ vrev64.i32 d23, d23
22135 ++ vmlal.s32 q4, d21, d1
22136 ++ vrev64.i32 d2, d2
22137 ++ vmlal.s32 q4, d26, d19
22138 ++ vext.32 d3, d5, d15, #1
22139 ++ vmlal.s32 q4, d27, d18
22140 ++ vrev64.i32 d3, d3
22141 ++ vmlal.s32 q4, d28, d15
22142 ++ vext.32 d14, d12, d11, #1
22143 ++ vmull.s32 q5, d16, d23
22144 ++ vext.32 d15, d13, d12, #1
22145 ++ vmlal.s32 q5, d17, d4
22146 ++ vst1.8 d8, [r5, : 64]!
22147 ++ vmlal.s32 q5, d14, d1
22148 ++ vext.32 d12, d9, d8, #0
22149 ++ vmlal.s32 q5, d15, d19
22150 ++ vmov.i64 d13, #0
22151 ++ vmlal.s32 q5, d29, d18
22152 ++ vext.32 d25, d19, d7, #1
22153 ++ vmlal.s32 q6, d20, d5
22154 ++ vrev64.i32 d25, d25
22155 ++ vmlal.s32 q6, d21, d4
22156 ++ vst1.8 d11, [r5, : 64]!
22157 ++ vmlal.s32 q6, d26, d1
22158 ++ vext.32 d9, d10, d10, #0
22159 ++ vmlal.s32 q6, d27, d19
22160 ++ vmov.i64 d8, #0
22161 ++ vmlal.s32 q6, d28, d18
22162 ++ vmlal.s32 q4, d16, d24
22163 ++ vmlal.s32 q4, d17, d5
22164 ++ vmlal.s32 q4, d14, d4
22165 ++ vst1.8 d12, [r5, : 64]!
22166 ++ vmlal.s32 q4, d15, d1
22167 ++ vext.32 d10, d13, d12, #0
22168 ++ vmlal.s32 q4, d29, d19
22169 ++ vmov.i64 d11, #0
22170 ++ vmlal.s32 q5, d20, d6
22171 ++ vmlal.s32 q5, d21, d5
22172 ++ vmlal.s32 q5, d26, d4
22173 ++ vext.32 d13, d8, d8, #0
22174 ++ vmlal.s32 q5, d27, d1
22175 ++ vmov.i64 d12, #0
22176 ++ vmlal.s32 q5, d28, d19
22177 ++ vst1.8 d9, [r5, : 64]!
22178 ++ vmlal.s32 q6, d16, d25
22179 ++ vmlal.s32 q6, d17, d6
22180 ++ vst1.8 d10, [r5, : 64]
22181 ++ vmlal.s32 q6, d14, d5
22182 ++ vext.32 d8, d11, d10, #0
22183 ++ vmlal.s32 q6, d15, d4
22184 ++ vmov.i64 d9, #0
22185 ++ vmlal.s32 q6, d29, d1
22186 ++ vmlal.s32 q4, d20, d7
22187 ++ vmlal.s32 q4, d21, d6
22188 ++ vmlal.s32 q4, d26, d5
22189 ++ vext.32 d11, d12, d12, #0
22190 ++ vmlal.s32 q4, d27, d4
22191 ++ vmov.i64 d10, #0
22192 ++ vmlal.s32 q4, d28, d1
22193 ++ vmlal.s32 q5, d16, d0
22194 ++ sub r2, r5, #32
22195 ++ vmlal.s32 q5, d17, d7
22196 ++ vmlal.s32 q5, d14, d6
22197 ++ vext.32 d30, d9, d8, #0
22198 ++ vmlal.s32 q5, d15, d5
22199 ++ vld1.8 {d31}, [r2, : 64]!
22200 ++ vmlal.s32 q5, d29, d4
22201 ++ vmlal.s32 q15, d20, d0
22202 ++ vext.32 d0, d6, d18, #1
22203 ++ vmlal.s32 q15, d21, d25
22204 ++ vrev64.i32 d0, d0
22205 ++ vmlal.s32 q15, d26, d24
22206 ++ vext.32 d1, d7, d19, #1
22207 ++ vext.32 d7, d10, d10, #0
22208 ++ vmlal.s32 q15, d27, d23
22209 ++ vrev64.i32 d1, d1
22210 ++ vld1.8 {d6}, [r2, : 64]
22211 ++ vmlal.s32 q15, d28, d22
22212 ++ vmlal.s32 q3, d16, d4
22213 ++ add r2, r2, #24
22214 ++ vmlal.s32 q3, d17, d2
22215 ++ vext.32 d4, d31, d30, #0
22216 ++ vmov d17, d11
22217 ++ vmlal.s32 q3, d14, d1
22218 ++ vext.32 d11, d13, d13, #0
22219 ++ vext.32 d13, d30, d30, #0
22220 ++ vmlal.s32 q3, d15, d0
22221 ++ vext.32 d1, d8, d8, #0
22222 ++ vmlal.s32 q3, d29, d3
22223 ++ vld1.8 {d5}, [r2, : 64]
22224 ++ sub r2, r2, #16
22225 ++ vext.32 d10, d6, d6, #0
22226 ++ vmov.i32 q1, #0xffffffff
22227 ++ vshl.i64 q4, q1, #25
22228 ++ add r5, sp, #512
22229 ++ vld1.8 {d14-d15}, [r5, : 128]
22230 ++ vadd.i64 q9, q2, q7
22231 ++ vshl.i64 q1, q1, #26
22232 ++ vshr.s64 q10, q9, #26
22233 ++ vld1.8 {d0}, [r2, : 64]!
22234 ++ vadd.i64 q5, q5, q10
22235 ++ vand q9, q9, q1
22236 ++ vld1.8 {d16}, [r2, : 64]!
22237 ++ add r2, sp, #528
22238 ++ vld1.8 {d20-d21}, [r2, : 128]
22239 ++ vadd.i64 q11, q5, q10
22240 ++ vsub.i64 q2, q2, q9
22241 ++ vshr.s64 q9, q11, #25
22242 ++ vext.32 d12, d5, d4, #0
22243 ++ vand q11, q11, q4
22244 ++ vadd.i64 q0, q0, q9
22245 ++ vmov d19, d7
22246 ++ vadd.i64 q3, q0, q7
22247 ++ vsub.i64 q5, q5, q11
22248 ++ vshr.s64 q11, q3, #26
22249 ++ vext.32 d18, d11, d10, #0
22250 ++ vand q3, q3, q1
22251 ++ vadd.i64 q8, q8, q11
22252 ++ vadd.i64 q11, q8, q10
22253 ++ vsub.i64 q0, q0, q3
22254 ++ vshr.s64 q3, q11, #25
22255 ++ vand q11, q11, q4
22256 ++ vadd.i64 q3, q6, q3
22257 ++ vadd.i64 q6, q3, q7
22258 ++ vsub.i64 q8, q8, q11
22259 ++ vshr.s64 q11, q6, #26
22260 ++ vand q6, q6, q1
22261 ++ vadd.i64 q9, q9, q11
22262 ++ vadd.i64 d25, d19, d21
22263 ++ vsub.i64 q3, q3, q6
22264 ++ vshr.s64 d23, d25, #25
22265 ++ vand q4, q12, q4
22266 ++ vadd.i64 d21, d23, d23
22267 ++ vshl.i64 d25, d23, #4
22268 ++ vadd.i64 d21, d21, d23
22269 ++ vadd.i64 d25, d25, d21
22270 ++ vadd.i64 d4, d4, d25
22271 ++ vzip.i32 q0, q8
22272 ++ vadd.i64 d12, d4, d14
22273 ++ add r2, r6, #8
22274 ++ vst1.8 d0, [r2, : 64]
22275 ++ vsub.i64 d19, d19, d9
22276 ++ add r2, r2, #16
22277 ++ vst1.8 d16, [r2, : 64]
22278 ++ vshr.s64 d22, d12, #26
22279 ++ vand q0, q6, q1
22280 ++ vadd.i64 d10, d10, d22
22281 ++ vzip.i32 q3, q9
22282 ++ vsub.i64 d4, d4, d0
22283 ++ sub r2, r2, #8
22284 ++ vst1.8 d6, [r2, : 64]
22285 ++ add r2, r2, #16
22286 ++ vst1.8 d18, [r2, : 64]
22287 ++ vzip.i32 q2, q5
22288 ++ sub r2, r2, #32
22289 ++ vst1.8 d4, [r2, : 64]
22290 ++ cmp r4, #0
22291 ++ beq ._skippostcopy
22292 ++ add r2, r3, #144
22293 ++ mov r4, r4
22294 ++ vld1.8 {d0-d1}, [r2, : 128]!
22295 ++ vld1.8 {d2-d3}, [r2, : 128]!
22296 ++ vld1.8 {d4}, [r2, : 64]
22297 ++ vst1.8 {d0-d1}, [r4, : 128]!
22298 ++ vst1.8 {d2-d3}, [r4, : 128]!
22299 ++ vst1.8 d4, [r4, : 64]
22300 ++._skippostcopy:
22301 ++ cmp r1, #1
22302 ++ bne ._skipfinalcopy
22303 ++ add r2, r3, #288
22304 ++ add r4, r3, #144
22305 ++ vld1.8 {d0-d1}, [r2, : 128]!
22306 ++ vld1.8 {d2-d3}, [r2, : 128]!
22307 ++ vld1.8 {d4}, [r2, : 64]
22308 ++ vst1.8 {d0-d1}, [r4, : 128]!
22309 ++ vst1.8 {d2-d3}, [r4, : 128]!
22310 ++ vst1.8 d4, [r4, : 64]
22311 ++._skipfinalcopy:
22312 ++ add r1, r1, #1
22313 ++ cmp r1, #12
22314 ++ blo ._invertloop
22315 ++ add r1, r3, #144
22316 ++ ldr r2, [r1], #4
22317 ++ ldr r3, [r1], #4
22318 ++ ldr r4, [r1], #4
22319 ++ ldr r5, [r1], #4
22320 ++ ldr r6, [r1], #4
22321 ++ ldr r7, [r1], #4
22322 ++ ldr r8, [r1], #4
22323 ++ ldr r9, [r1], #4
22324 ++ ldr r10, [r1], #4
22325 ++ ldr r1, [r1]
22326 ++ add r11, r1, r1, LSL #4
22327 ++ add r11, r11, r1, LSL #1
22328 ++ add r11, r11, #16777216
22329 ++ mov r11, r11, ASR #25
22330 ++ add r11, r11, r2
22331 ++ mov r11, r11, ASR #26
22332 ++ add r11, r11, r3
22333 ++ mov r11, r11, ASR #25
22334 ++ add r11, r11, r4
22335 ++ mov r11, r11, ASR #26
22336 ++ add r11, r11, r5
22337 ++ mov r11, r11, ASR #25
22338 ++ add r11, r11, r6
22339 ++ mov r11, r11, ASR #26
22340 ++ add r11, r11, r7
22341 ++ mov r11, r11, ASR #25
22342 ++ add r11, r11, r8
22343 ++ mov r11, r11, ASR #26
22344 ++ add r11, r11, r9
22345 ++ mov r11, r11, ASR #25
22346 ++ add r11, r11, r10
22347 ++ mov r11, r11, ASR #26
22348 ++ add r11, r11, r1
22349 ++ mov r11, r11, ASR #25
22350 ++ add r2, r2, r11
22351 ++ add r2, r2, r11, LSL #1
22352 ++ add r2, r2, r11, LSL #4
22353 ++ mov r11, r2, ASR #26
22354 ++ add r3, r3, r11
22355 ++ sub r2, r2, r11, LSL #26
22356 ++ mov r11, r3, ASR #25
22357 ++ add r4, r4, r11
22358 ++ sub r3, r3, r11, LSL #25
22359 ++ mov r11, r4, ASR #26
22360 ++ add r5, r5, r11
22361 ++ sub r4, r4, r11, LSL #26
22362 ++ mov r11, r5, ASR #25
22363 ++ add r6, r6, r11
22364 ++ sub r5, r5, r11, LSL #25
22365 ++ mov r11, r6, ASR #26
22366 ++ add r7, r7, r11
22367 ++ sub r6, r6, r11, LSL #26
22368 ++ mov r11, r7, ASR #25
22369 ++ add r8, r8, r11
22370 ++ sub r7, r7, r11, LSL #25
22371 ++ mov r11, r8, ASR #26
22372 ++ add r9, r9, r11
22373 ++ sub r8, r8, r11, LSL #26
22374 ++ mov r11, r9, ASR #25
22375 ++ add r10, r10, r11
22376 ++ sub r9, r9, r11, LSL #25
22377 ++ mov r11, r10, ASR #26
22378 ++ add r1, r1, r11
22379 ++ sub r10, r10, r11, LSL #26
22380 ++ mov r11, r1, ASR #25
22381 ++ sub r1, r1, r11, LSL #25
22382 ++ add r2, r2, r3, LSL #26
22383 ++ mov r3, r3, LSR #6
22384 ++ add r3, r3, r4, LSL #19
22385 ++ mov r4, r4, LSR #13
22386 ++ add r4, r4, r5, LSL #13
22387 ++ mov r5, r5, LSR #19
22388 ++ add r5, r5, r6, LSL #6
22389 ++ add r6, r7, r8, LSL #25
22390 ++ mov r7, r8, LSR #7
22391 ++ add r7, r7, r9, LSL #19
22392 ++ mov r8, r9, LSR #13
22393 ++ add r8, r8, r10, LSL #12
22394 ++ mov r9, r10, LSR #20
22395 ++ add r1, r9, r1, LSL #6
22396 ++ str r2, [r0], #4
22397 ++ str r3, [r0], #4
22398 ++ str r4, [r0], #4
22399 ++ str r5, [r0], #4
22400 ++ str r6, [r0], #4
22401 ++ str r7, [r0], #4
22402 ++ str r8, [r0], #4
22403 ++ str r1, [r0]
22404 ++ ldrd r4, [sp, #0]
22405 ++ ldrd r6, [sp, #8]
22406 ++ ldrd r8, [sp, #16]
22407 ++ ldrd r10, [sp, #24]
22408 ++ ldr r12, [sp, #480]
22409 ++ ldr r14, [sp, #484]
22410 ++ ldr r0, =0
22411 ++ mov sp, r12
22412 ++ vpop {q4, q5, q6, q7}
22413 ++ bx lr
22414 +--
22415 +cgit v1.2.3-4-ga26e
22416 +
22417 +
22418 +From b1d9366402d85e078206e410f4d84d69b31a372c Mon Sep 17 00:00:00 2001
22419 +From: "Jason A. Donenfeld" <Jason@×××××.com>
22420 +Date: Fri, 8 Nov 2019 13:22:38 +0100
22421 +Subject: crypto: arm/curve25519 - wire up NEON implementation
22422 +
22423 +commit d8f1308a025fc7e00414194ed742d5f05a21e13c upstream.
22424 +
22425 +This ports the SUPERCOP implementation for usage in kernel space. In
22426 +addition to the usual header, macro, and style changes required for
22427 +kernel space, it makes a few small changes to the code:
22428 +
22429 + - The stack alignment is relaxed to 16 bytes.
22430 + - Superfluous mov statements have been removed.
22431 + - ldr for constants has been replaced with movw.
22432 + - ldreq has been replaced with moveq.
22433 + - The str epilogue has been made more idiomatic.
22434 + - SIMD registers are not pushed and popped at the beginning and end.
22435 + - The prologue and epilogue have been made idiomatic.
22436 + - A hole has been removed from the stack, saving 32 bytes.
22437 + - We write-back the base register whenever possible for vld1.8.
22438 + - Some multiplications have been reordered for better A7 performance.
22439 +
22440 +There are more opportunities for cleanup, since this code is from qhasm,
22441 +which doesn't always do the most opportune thing. But even prior to
22442 +extensive hand optimizations, this code delivers significant performance
22443 +improvements (given in get_cycles() per call):
22444 +
22445 + ----------- -------------
22446 + | generic C | this commit |
22447 + ------------ ----------- -------------
22448 + | Cortex-A7 | 49136 | 22395 |
22449 + ------------ ----------- -------------
22450 + | Cortex-A17 | 17326 | 4983 |
22451 + ------------ ----------- -------------
22452 +
22453 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
22454 +[ardb: - move to arch/arm/crypto
22455 + - wire into lib/crypto framework
22456 + - implement crypto API KPP hooks ]
22457 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
22458 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
22459 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
22460 +---
22461 + arch/arm/crypto/Kconfig | 6 +
22462 + arch/arm/crypto/Makefile | 2 +
22463 + arch/arm/crypto/curve25519-core.S | 347 +++++++++++++++++---------------------
22464 + arch/arm/crypto/curve25519-glue.c | 127 ++++++++++++++
22465 + 4 files changed, 287 insertions(+), 195 deletions(-)
22466 + create mode 100644 arch/arm/crypto/curve25519-glue.c
22467 +
22468 +diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
22469 +index 2e8a9289bded..61fa7e4aa8f9 100644
22470 +--- a/arch/arm/crypto/Kconfig
22471 ++++ b/arch/arm/crypto/Kconfig
22472 +@@ -141,4 +141,10 @@ config CRYPTO_NHPOLY1305_NEON
22473 + depends on KERNEL_MODE_NEON
22474 + select CRYPTO_NHPOLY1305
22475 +
22476 ++config CRYPTO_CURVE25519_NEON
22477 ++ tristate "NEON accelerated Curve25519 scalar multiplication library"
22478 ++ depends on KERNEL_MODE_NEON
22479 ++ select CRYPTO_LIB_CURVE25519_GENERIC
22480 ++ select CRYPTO_ARCH_HAVE_LIB_CURVE25519
22481 ++
22482 + endif
22483 +diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
22484 +index 4f6a8a81dabc..7700385cec9f 100644
22485 +--- a/arch/arm/crypto/Makefile
22486 ++++ b/arch/arm/crypto/Makefile
22487 +@@ -12,6 +12,7 @@ obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
22488 + obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
22489 + obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o
22490 + obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
22491 ++obj-$(CONFIG_CRYPTO_CURVE25519_NEON) += curve25519-neon.o
22492 +
22493 + ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
22494 + ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
22495 +@@ -58,6 +59,7 @@ chacha-neon-y := chacha-scalar-core.o chacha-glue.o
22496 + chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o
22497 + poly1305-arm-y := poly1305-core.o poly1305-glue.o
22498 + nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
22499 ++curve25519-neon-y := curve25519-core.o curve25519-glue.o
22500 +
22501 + ifdef REGENERATE_ARM_CRYPTO
22502 + quiet_cmd_perl = PERL $@
22503 +diff --git a/arch/arm/crypto/curve25519-core.S b/arch/arm/crypto/curve25519-core.S
22504 +index f33b85fef382..be18af52e7dc 100644
22505 +--- a/arch/arm/crypto/curve25519-core.S
22506 ++++ b/arch/arm/crypto/curve25519-core.S
22507 +@@ -1,43 +1,35 @@
22508 ++/* SPDX-License-Identifier: GPL-2.0 OR MIT */
22509 + /*
22510 +- * Public domain code from Daniel J. Bernstein and Peter Schwabe, from
22511 +- * SUPERCOP's curve25519/neon2/scalarmult.s.
22512 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
22513 ++ *
22514 ++ * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This
22515 ++ * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been
22516 ++ * manually reworked for use in kernel space.
22517 + */
22518 +
22519 +-.fpu neon
22520 ++#include <linux/linkage.h>
22521 ++
22522 + .text
22523 ++.fpu neon
22524 ++.arch armv7-a
22525 + .align 4
22526 +-.global _crypto_scalarmult_curve25519_neon2
22527 +-.global crypto_scalarmult_curve25519_neon2
22528 +-.type _crypto_scalarmult_curve25519_neon2 STT_FUNC
22529 +-.type crypto_scalarmult_curve25519_neon2 STT_FUNC
22530 +- _crypto_scalarmult_curve25519_neon2:
22531 +- crypto_scalarmult_curve25519_neon2:
22532 +- vpush {q4, q5, q6, q7}
22533 +- mov r12, sp
22534 +- sub sp, sp, #736
22535 +- and sp, sp, #0xffffffe0
22536 +- strd r4, [sp, #0]
22537 +- strd r6, [sp, #8]
22538 +- strd r8, [sp, #16]
22539 +- strd r10, [sp, #24]
22540 +- str r12, [sp, #480]
22541 +- str r14, [sp, #484]
22542 +- mov r0, r0
22543 +- mov r1, r1
22544 +- mov r2, r2
22545 +- add r3, sp, #32
22546 +- ldr r4, =0
22547 +- ldr r5, =254
22548 ++
22549 ++ENTRY(curve25519_neon)
22550 ++ push {r4-r11, lr}
22551 ++ mov ip, sp
22552 ++ sub r3, sp, #704
22553 ++ and r3, r3, #0xfffffff0
22554 ++ mov sp, r3
22555 ++ movw r4, #0
22556 ++ movw r5, #254
22557 + vmov.i32 q0, #1
22558 + vshr.u64 q1, q0, #7
22559 + vshr.u64 q0, q0, #8
22560 + vmov.i32 d4, #19
22561 + vmov.i32 d5, #38
22562 +- add r6, sp, #512
22563 +- vst1.8 {d2-d3}, [r6, : 128]
22564 +- add r6, sp, #528
22565 +- vst1.8 {d0-d1}, [r6, : 128]
22566 +- add r6, sp, #544
22567 ++ add r6, sp, #480
22568 ++ vst1.8 {d2-d3}, [r6, : 128]!
22569 ++ vst1.8 {d0-d1}, [r6, : 128]!
22570 + vst1.8 {d4-d5}, [r6, : 128]
22571 + add r6, r3, #0
22572 + vmov.i32 q2, #0
22573 +@@ -45,12 +37,12 @@
22574 + vst1.8 {d4-d5}, [r6, : 128]!
22575 + vst1.8 d4, [r6, : 64]
22576 + add r6, r3, #0
22577 +- ldr r7, =960
22578 ++ movw r7, #960
22579 + sub r7, r7, #2
22580 + neg r7, r7
22581 + sub r7, r7, r7, LSL #7
22582 + str r7, [r6]
22583 +- add r6, sp, #704
22584 ++ add r6, sp, #672
22585 + vld1.8 {d4-d5}, [r1]!
22586 + vld1.8 {d6-d7}, [r1]
22587 + vst1.8 {d4-d5}, [r6, : 128]!
22588 +@@ -212,15 +204,15 @@
22589 + vst1.8 {d0-d1}, [r6, : 128]!
22590 + vst1.8 {d2-d3}, [r6, : 128]!
22591 + vst1.8 d4, [r6, : 64]
22592 +-._mainloop:
22593 ++.Lmainloop:
22594 + mov r2, r5, LSR #3
22595 + and r6, r5, #7
22596 + ldrb r2, [r1, r2]
22597 + mov r2, r2, LSR r6
22598 + and r2, r2, #1
22599 +- str r5, [sp, #488]
22600 ++ str r5, [sp, #456]
22601 + eor r4, r4, r2
22602 +- str r2, [sp, #492]
22603 ++ str r2, [sp, #460]
22604 + neg r2, r4
22605 + add r4, r3, #96
22606 + add r5, r3, #192
22607 +@@ -291,7 +283,7 @@
22608 + vsub.i32 q0, q1, q3
22609 + vst1.8 d4, [r4, : 64]
22610 + vst1.8 d0, [r6, : 64]
22611 +- add r2, sp, #544
22612 ++ add r2, sp, #512
22613 + add r4, r3, #96
22614 + add r5, r3, #144
22615 + vld1.8 {d0-d1}, [r2, : 128]
22616 +@@ -361,14 +353,13 @@
22617 + vmlal.s32 q0, d12, d8
22618 + vmlal.s32 q0, d13, d17
22619 + vmlal.s32 q0, d6, d6
22620 +- add r2, sp, #512
22621 +- vld1.8 {d18-d19}, [r2, : 128]
22622 ++ add r2, sp, #480
22623 ++ vld1.8 {d18-d19}, [r2, : 128]!
22624 + vmull.s32 q3, d16, d7
22625 + vmlal.s32 q3, d10, d15
22626 + vmlal.s32 q3, d11, d14
22627 + vmlal.s32 q3, d12, d9
22628 + vmlal.s32 q3, d13, d8
22629 +- add r2, sp, #528
22630 + vld1.8 {d8-d9}, [r2, : 128]
22631 + vadd.i64 q5, q12, q9
22632 + vadd.i64 q6, q15, q9
22633 +@@ -502,22 +493,19 @@
22634 + vadd.i32 q5, q5, q0
22635 + vtrn.32 q11, q14
22636 + vadd.i32 q6, q6, q3
22637 +- add r2, sp, #560
22638 ++ add r2, sp, #528
22639 + vadd.i32 q10, q10, q2
22640 + vtrn.32 d24, d25
22641 +- vst1.8 {d12-d13}, [r2, : 128]
22642 ++ vst1.8 {d12-d13}, [r2, : 128]!
22643 + vshl.i32 q6, q13, #1
22644 +- add r2, sp, #576
22645 +- vst1.8 {d20-d21}, [r2, : 128]
22646 ++ vst1.8 {d20-d21}, [r2, : 128]!
22647 + vshl.i32 q10, q14, #1
22648 +- add r2, sp, #592
22649 +- vst1.8 {d12-d13}, [r2, : 128]
22650 ++ vst1.8 {d12-d13}, [r2, : 128]!
22651 + vshl.i32 q15, q12, #1
22652 + vadd.i32 q8, q8, q4
22653 + vext.32 d10, d31, d30, #0
22654 + vadd.i32 q7, q7, q1
22655 +- add r2, sp, #608
22656 +- vst1.8 {d16-d17}, [r2, : 128]
22657 ++ vst1.8 {d16-d17}, [r2, : 128]!
22658 + vmull.s32 q8, d18, d5
22659 + vmlal.s32 q8, d26, d4
22660 + vmlal.s32 q8, d19, d9
22661 +@@ -528,8 +516,7 @@
22662 + vmlal.s32 q8, d29, d1
22663 + vmlal.s32 q8, d24, d6
22664 + vmlal.s32 q8, d25, d0
22665 +- add r2, sp, #624
22666 +- vst1.8 {d14-d15}, [r2, : 128]
22667 ++ vst1.8 {d14-d15}, [r2, : 128]!
22668 + vmull.s32 q2, d18, d4
22669 + vmlal.s32 q2, d12, d9
22670 + vmlal.s32 q2, d13, d8
22671 +@@ -537,8 +524,7 @@
22672 + vmlal.s32 q2, d22, d2
22673 + vmlal.s32 q2, d23, d1
22674 + vmlal.s32 q2, d24, d0
22675 +- add r2, sp, #640
22676 +- vst1.8 {d20-d21}, [r2, : 128]
22677 ++ vst1.8 {d20-d21}, [r2, : 128]!
22678 + vmull.s32 q7, d18, d9
22679 + vmlal.s32 q7, d26, d3
22680 + vmlal.s32 q7, d19, d8
22681 +@@ -547,14 +533,12 @@
22682 + vmlal.s32 q7, d28, d1
22683 + vmlal.s32 q7, d23, d6
22684 + vmlal.s32 q7, d29, d0
22685 +- add r2, sp, #656
22686 +- vst1.8 {d10-d11}, [r2, : 128]
22687 ++ vst1.8 {d10-d11}, [r2, : 128]!
22688 + vmull.s32 q5, d18, d3
22689 + vmlal.s32 q5, d19, d2
22690 + vmlal.s32 q5, d22, d1
22691 + vmlal.s32 q5, d23, d0
22692 + vmlal.s32 q5, d12, d8
22693 +- add r2, sp, #672
22694 + vst1.8 {d16-d17}, [r2, : 128]
22695 + vmull.s32 q4, d18, d8
22696 + vmlal.s32 q4, d26, d2
22697 +@@ -566,7 +550,7 @@
22698 + vmlal.s32 q8, d26, d1
22699 + vmlal.s32 q8, d19, d6
22700 + vmlal.s32 q8, d27, d0
22701 +- add r2, sp, #576
22702 ++ add r2, sp, #544
22703 + vld1.8 {d20-d21}, [r2, : 128]
22704 + vmlal.s32 q7, d24, d21
22705 + vmlal.s32 q7, d25, d20
22706 +@@ -575,32 +559,30 @@
22707 + vmlal.s32 q8, d22, d21
22708 + vmlal.s32 q8, d28, d20
22709 + vmlal.s32 q5, d24, d20
22710 +- add r2, sp, #576
22711 + vst1.8 {d14-d15}, [r2, : 128]
22712 + vmull.s32 q7, d18, d6
22713 + vmlal.s32 q7, d26, d0
22714 +- add r2, sp, #656
22715 ++ add r2, sp, #624
22716 + vld1.8 {d30-d31}, [r2, : 128]
22717 + vmlal.s32 q2, d30, d21
22718 + vmlal.s32 q7, d19, d21
22719 + vmlal.s32 q7, d27, d20
22720 +- add r2, sp, #624
22721 ++ add r2, sp, #592
22722 + vld1.8 {d26-d27}, [r2, : 128]
22723 + vmlal.s32 q4, d25, d27
22724 + vmlal.s32 q8, d29, d27
22725 + vmlal.s32 q8, d25, d26
22726 + vmlal.s32 q7, d28, d27
22727 + vmlal.s32 q7, d29, d26
22728 +- add r2, sp, #608
22729 ++ add r2, sp, #576
22730 + vld1.8 {d28-d29}, [r2, : 128]
22731 + vmlal.s32 q4, d24, d29
22732 + vmlal.s32 q8, d23, d29
22733 + vmlal.s32 q8, d24, d28
22734 + vmlal.s32 q7, d22, d29
22735 + vmlal.s32 q7, d23, d28
22736 +- add r2, sp, #608
22737 + vst1.8 {d8-d9}, [r2, : 128]
22738 +- add r2, sp, #560
22739 ++ add r2, sp, #528
22740 + vld1.8 {d8-d9}, [r2, : 128]
22741 + vmlal.s32 q7, d24, d9
22742 + vmlal.s32 q7, d25, d31
22743 +@@ -621,36 +603,36 @@
22744 + vmlal.s32 q0, d23, d26
22745 + vmlal.s32 q0, d24, d31
22746 + vmlal.s32 q0, d19, d20
22747 +- add r2, sp, #640
22748 ++ add r2, sp, #608
22749 + vld1.8 {d18-d19}, [r2, : 128]
22750 + vmlal.s32 q2, d18, d7
22751 +- vmlal.s32 q2, d19, d6
22752 + vmlal.s32 q5, d18, d6
22753 +- vmlal.s32 q5, d19, d21
22754 + vmlal.s32 q1, d18, d21
22755 +- vmlal.s32 q1, d19, d29
22756 + vmlal.s32 q0, d18, d28
22757 +- vmlal.s32 q0, d19, d9
22758 + vmlal.s32 q6, d18, d29
22759 ++ vmlal.s32 q2, d19, d6
22760 ++ vmlal.s32 q5, d19, d21
22761 ++ vmlal.s32 q1, d19, d29
22762 ++ vmlal.s32 q0, d19, d9
22763 + vmlal.s32 q6, d19, d28
22764 +- add r2, sp, #592
22765 ++ add r2, sp, #560
22766 + vld1.8 {d18-d19}, [r2, : 128]
22767 +- add r2, sp, #512
22768 ++ add r2, sp, #480
22769 + vld1.8 {d22-d23}, [r2, : 128]
22770 + vmlal.s32 q5, d19, d7
22771 + vmlal.s32 q0, d18, d21
22772 + vmlal.s32 q0, d19, d29
22773 + vmlal.s32 q6, d18, d6
22774 +- add r2, sp, #528
22775 ++ add r2, sp, #496
22776 + vld1.8 {d6-d7}, [r2, : 128]
22777 + vmlal.s32 q6, d19, d21
22778 +- add r2, sp, #576
22779 ++ add r2, sp, #544
22780 + vld1.8 {d18-d19}, [r2, : 128]
22781 + vmlal.s32 q0, d30, d8
22782 +- add r2, sp, #672
22783 ++ add r2, sp, #640
22784 + vld1.8 {d20-d21}, [r2, : 128]
22785 + vmlal.s32 q5, d30, d29
22786 +- add r2, sp, #608
22787 ++ add r2, sp, #576
22788 + vld1.8 {d24-d25}, [r2, : 128]
22789 + vmlal.s32 q1, d30, d28
22790 + vadd.i64 q13, q0, q11
22791 +@@ -823,22 +805,19 @@
22792 + vadd.i32 q5, q5, q0
22793 + vtrn.32 q11, q14
22794 + vadd.i32 q6, q6, q3
22795 +- add r2, sp, #560
22796 ++ add r2, sp, #528
22797 + vadd.i32 q10, q10, q2
22798 + vtrn.32 d24, d25
22799 +- vst1.8 {d12-d13}, [r2, : 128]
22800 ++ vst1.8 {d12-d13}, [r2, : 128]!
22801 + vshl.i32 q6, q13, #1
22802 +- add r2, sp, #576
22803 +- vst1.8 {d20-d21}, [r2, : 128]
22804 ++ vst1.8 {d20-d21}, [r2, : 128]!
22805 + vshl.i32 q10, q14, #1
22806 +- add r2, sp, #592
22807 +- vst1.8 {d12-d13}, [r2, : 128]
22808 ++ vst1.8 {d12-d13}, [r2, : 128]!
22809 + vshl.i32 q15, q12, #1
22810 + vadd.i32 q8, q8, q4
22811 + vext.32 d10, d31, d30, #0
22812 + vadd.i32 q7, q7, q1
22813 +- add r2, sp, #608
22814 +- vst1.8 {d16-d17}, [r2, : 128]
22815 ++ vst1.8 {d16-d17}, [r2, : 128]!
22816 + vmull.s32 q8, d18, d5
22817 + vmlal.s32 q8, d26, d4
22818 + vmlal.s32 q8, d19, d9
22819 +@@ -849,8 +828,7 @@
22820 + vmlal.s32 q8, d29, d1
22821 + vmlal.s32 q8, d24, d6
22822 + vmlal.s32 q8, d25, d0
22823 +- add r2, sp, #624
22824 +- vst1.8 {d14-d15}, [r2, : 128]
22825 ++ vst1.8 {d14-d15}, [r2, : 128]!
22826 + vmull.s32 q2, d18, d4
22827 + vmlal.s32 q2, d12, d9
22828 + vmlal.s32 q2, d13, d8
22829 +@@ -858,8 +836,7 @@
22830 + vmlal.s32 q2, d22, d2
22831 + vmlal.s32 q2, d23, d1
22832 + vmlal.s32 q2, d24, d0
22833 +- add r2, sp, #640
22834 +- vst1.8 {d20-d21}, [r2, : 128]
22835 ++ vst1.8 {d20-d21}, [r2, : 128]!
22836 + vmull.s32 q7, d18, d9
22837 + vmlal.s32 q7, d26, d3
22838 + vmlal.s32 q7, d19, d8
22839 +@@ -868,15 +845,13 @@
22840 + vmlal.s32 q7, d28, d1
22841 + vmlal.s32 q7, d23, d6
22842 + vmlal.s32 q7, d29, d0
22843 +- add r2, sp, #656
22844 +- vst1.8 {d10-d11}, [r2, : 128]
22845 ++ vst1.8 {d10-d11}, [r2, : 128]!
22846 + vmull.s32 q5, d18, d3
22847 + vmlal.s32 q5, d19, d2
22848 + vmlal.s32 q5, d22, d1
22849 + vmlal.s32 q5, d23, d0
22850 + vmlal.s32 q5, d12, d8
22851 +- add r2, sp, #672
22852 +- vst1.8 {d16-d17}, [r2, : 128]
22853 ++ vst1.8 {d16-d17}, [r2, : 128]!
22854 + vmull.s32 q4, d18, d8
22855 + vmlal.s32 q4, d26, d2
22856 + vmlal.s32 q4, d19, d7
22857 +@@ -887,7 +862,7 @@
22858 + vmlal.s32 q8, d26, d1
22859 + vmlal.s32 q8, d19, d6
22860 + vmlal.s32 q8, d27, d0
22861 +- add r2, sp, #576
22862 ++ add r2, sp, #544
22863 + vld1.8 {d20-d21}, [r2, : 128]
22864 + vmlal.s32 q7, d24, d21
22865 + vmlal.s32 q7, d25, d20
22866 +@@ -896,32 +871,30 @@
22867 + vmlal.s32 q8, d22, d21
22868 + vmlal.s32 q8, d28, d20
22869 + vmlal.s32 q5, d24, d20
22870 +- add r2, sp, #576
22871 + vst1.8 {d14-d15}, [r2, : 128]
22872 + vmull.s32 q7, d18, d6
22873 + vmlal.s32 q7, d26, d0
22874 +- add r2, sp, #656
22875 ++ add r2, sp, #624
22876 + vld1.8 {d30-d31}, [r2, : 128]
22877 + vmlal.s32 q2, d30, d21
22878 + vmlal.s32 q7, d19, d21
22879 + vmlal.s32 q7, d27, d20
22880 +- add r2, sp, #624
22881 ++ add r2, sp, #592
22882 + vld1.8 {d26-d27}, [r2, : 128]
22883 + vmlal.s32 q4, d25, d27
22884 + vmlal.s32 q8, d29, d27
22885 + vmlal.s32 q8, d25, d26
22886 + vmlal.s32 q7, d28, d27
22887 + vmlal.s32 q7, d29, d26
22888 +- add r2, sp, #608
22889 ++ add r2, sp, #576
22890 + vld1.8 {d28-d29}, [r2, : 128]
22891 + vmlal.s32 q4, d24, d29
22892 + vmlal.s32 q8, d23, d29
22893 + vmlal.s32 q8, d24, d28
22894 + vmlal.s32 q7, d22, d29
22895 + vmlal.s32 q7, d23, d28
22896 +- add r2, sp, #608
22897 + vst1.8 {d8-d9}, [r2, : 128]
22898 +- add r2, sp, #560
22899 ++ add r2, sp, #528
22900 + vld1.8 {d8-d9}, [r2, : 128]
22901 + vmlal.s32 q7, d24, d9
22902 + vmlal.s32 q7, d25, d31
22903 +@@ -942,36 +915,36 @@
22904 + vmlal.s32 q0, d23, d26
22905 + vmlal.s32 q0, d24, d31
22906 + vmlal.s32 q0, d19, d20
22907 +- add r2, sp, #640
22908 ++ add r2, sp, #608
22909 + vld1.8 {d18-d19}, [r2, : 128]
22910 + vmlal.s32 q2, d18, d7
22911 +- vmlal.s32 q2, d19, d6
22912 + vmlal.s32 q5, d18, d6
22913 +- vmlal.s32 q5, d19, d21
22914 + vmlal.s32 q1, d18, d21
22915 +- vmlal.s32 q1, d19, d29
22916 + vmlal.s32 q0, d18, d28
22917 +- vmlal.s32 q0, d19, d9
22918 + vmlal.s32 q6, d18, d29
22919 ++ vmlal.s32 q2, d19, d6
22920 ++ vmlal.s32 q5, d19, d21
22921 ++ vmlal.s32 q1, d19, d29
22922 ++ vmlal.s32 q0, d19, d9
22923 + vmlal.s32 q6, d19, d28
22924 +- add r2, sp, #592
22925 ++ add r2, sp, #560
22926 + vld1.8 {d18-d19}, [r2, : 128]
22927 +- add r2, sp, #512
22928 ++ add r2, sp, #480
22929 + vld1.8 {d22-d23}, [r2, : 128]
22930 + vmlal.s32 q5, d19, d7
22931 + vmlal.s32 q0, d18, d21
22932 + vmlal.s32 q0, d19, d29
22933 + vmlal.s32 q6, d18, d6
22934 +- add r2, sp, #528
22935 ++ add r2, sp, #496
22936 + vld1.8 {d6-d7}, [r2, : 128]
22937 + vmlal.s32 q6, d19, d21
22938 +- add r2, sp, #576
22939 ++ add r2, sp, #544
22940 + vld1.8 {d18-d19}, [r2, : 128]
22941 + vmlal.s32 q0, d30, d8
22942 +- add r2, sp, #672
22943 ++ add r2, sp, #640
22944 + vld1.8 {d20-d21}, [r2, : 128]
22945 + vmlal.s32 q5, d30, d29
22946 +- add r2, sp, #608
22947 ++ add r2, sp, #576
22948 + vld1.8 {d24-d25}, [r2, : 128]
22949 + vmlal.s32 q1, d30, d28
22950 + vadd.i64 q13, q0, q11
22951 +@@ -1069,7 +1042,7 @@
22952 + sub r4, r4, #24
22953 + vst1.8 d0, [r2, : 64]
22954 + vst1.8 d1, [r4, : 64]
22955 +- add r2, sp, #544
22956 ++ add r2, sp, #512
22957 + add r4, r3, #144
22958 + add r5, r3, #192
22959 + vld1.8 {d0-d1}, [r2, : 128]
22960 +@@ -1139,14 +1112,13 @@
22961 + vmlal.s32 q0, d12, d8
22962 + vmlal.s32 q0, d13, d17
22963 + vmlal.s32 q0, d6, d6
22964 +- add r2, sp, #512
22965 +- vld1.8 {d18-d19}, [r2, : 128]
22966 ++ add r2, sp, #480
22967 ++ vld1.8 {d18-d19}, [r2, : 128]!
22968 + vmull.s32 q3, d16, d7
22969 + vmlal.s32 q3, d10, d15
22970 + vmlal.s32 q3, d11, d14
22971 + vmlal.s32 q3, d12, d9
22972 + vmlal.s32 q3, d13, d8
22973 +- add r2, sp, #528
22974 + vld1.8 {d8-d9}, [r2, : 128]
22975 + vadd.i64 q5, q12, q9
22976 + vadd.i64 q6, q15, q9
22977 +@@ -1295,22 +1267,19 @@
22978 + vadd.i32 q5, q5, q0
22979 + vtrn.32 q11, q14
22980 + vadd.i32 q6, q6, q3
22981 +- add r2, sp, #560
22982 ++ add r2, sp, #528
22983 + vadd.i32 q10, q10, q2
22984 + vtrn.32 d24, d25
22985 +- vst1.8 {d12-d13}, [r2, : 128]
22986 ++ vst1.8 {d12-d13}, [r2, : 128]!
22987 + vshl.i32 q6, q13, #1
22988 +- add r2, sp, #576
22989 +- vst1.8 {d20-d21}, [r2, : 128]
22990 ++ vst1.8 {d20-d21}, [r2, : 128]!
22991 + vshl.i32 q10, q14, #1
22992 +- add r2, sp, #592
22993 +- vst1.8 {d12-d13}, [r2, : 128]
22994 ++ vst1.8 {d12-d13}, [r2, : 128]!
22995 + vshl.i32 q15, q12, #1
22996 + vadd.i32 q8, q8, q4
22997 + vext.32 d10, d31, d30, #0
22998 + vadd.i32 q7, q7, q1
22999 +- add r2, sp, #608
23000 +- vst1.8 {d16-d17}, [r2, : 128]
23001 ++ vst1.8 {d16-d17}, [r2, : 128]!
23002 + vmull.s32 q8, d18, d5
23003 + vmlal.s32 q8, d26, d4
23004 + vmlal.s32 q8, d19, d9
23005 +@@ -1321,8 +1290,7 @@
23006 + vmlal.s32 q8, d29, d1
23007 + vmlal.s32 q8, d24, d6
23008 + vmlal.s32 q8, d25, d0
23009 +- add r2, sp, #624
23010 +- vst1.8 {d14-d15}, [r2, : 128]
23011 ++ vst1.8 {d14-d15}, [r2, : 128]!
23012 + vmull.s32 q2, d18, d4
23013 + vmlal.s32 q2, d12, d9
23014 + vmlal.s32 q2, d13, d8
23015 +@@ -1330,8 +1298,7 @@
23016 + vmlal.s32 q2, d22, d2
23017 + vmlal.s32 q2, d23, d1
23018 + vmlal.s32 q2, d24, d0
23019 +- add r2, sp, #640
23020 +- vst1.8 {d20-d21}, [r2, : 128]
23021 ++ vst1.8 {d20-d21}, [r2, : 128]!
23022 + vmull.s32 q7, d18, d9
23023 + vmlal.s32 q7, d26, d3
23024 + vmlal.s32 q7, d19, d8
23025 +@@ -1340,15 +1307,13 @@
23026 + vmlal.s32 q7, d28, d1
23027 + vmlal.s32 q7, d23, d6
23028 + vmlal.s32 q7, d29, d0
23029 +- add r2, sp, #656
23030 +- vst1.8 {d10-d11}, [r2, : 128]
23031 ++ vst1.8 {d10-d11}, [r2, : 128]!
23032 + vmull.s32 q5, d18, d3
23033 + vmlal.s32 q5, d19, d2
23034 + vmlal.s32 q5, d22, d1
23035 + vmlal.s32 q5, d23, d0
23036 + vmlal.s32 q5, d12, d8
23037 +- add r2, sp, #672
23038 +- vst1.8 {d16-d17}, [r2, : 128]
23039 ++ vst1.8 {d16-d17}, [r2, : 128]!
23040 + vmull.s32 q4, d18, d8
23041 + vmlal.s32 q4, d26, d2
23042 + vmlal.s32 q4, d19, d7
23043 +@@ -1359,7 +1324,7 @@
23044 + vmlal.s32 q8, d26, d1
23045 + vmlal.s32 q8, d19, d6
23046 + vmlal.s32 q8, d27, d0
23047 +- add r2, sp, #576
23048 ++ add r2, sp, #544
23049 + vld1.8 {d20-d21}, [r2, : 128]
23050 + vmlal.s32 q7, d24, d21
23051 + vmlal.s32 q7, d25, d20
23052 +@@ -1368,32 +1333,30 @@
23053 + vmlal.s32 q8, d22, d21
23054 + vmlal.s32 q8, d28, d20
23055 + vmlal.s32 q5, d24, d20
23056 +- add r2, sp, #576
23057 + vst1.8 {d14-d15}, [r2, : 128]
23058 + vmull.s32 q7, d18, d6
23059 + vmlal.s32 q7, d26, d0
23060 +- add r2, sp, #656
23061 ++ add r2, sp, #624
23062 + vld1.8 {d30-d31}, [r2, : 128]
23063 + vmlal.s32 q2, d30, d21
23064 + vmlal.s32 q7, d19, d21
23065 + vmlal.s32 q7, d27, d20
23066 +- add r2, sp, #624
23067 ++ add r2, sp, #592
23068 + vld1.8 {d26-d27}, [r2, : 128]
23069 + vmlal.s32 q4, d25, d27
23070 + vmlal.s32 q8, d29, d27
23071 + vmlal.s32 q8, d25, d26
23072 + vmlal.s32 q7, d28, d27
23073 + vmlal.s32 q7, d29, d26
23074 +- add r2, sp, #608
23075 ++ add r2, sp, #576
23076 + vld1.8 {d28-d29}, [r2, : 128]
23077 + vmlal.s32 q4, d24, d29
23078 + vmlal.s32 q8, d23, d29
23079 + vmlal.s32 q8, d24, d28
23080 + vmlal.s32 q7, d22, d29
23081 + vmlal.s32 q7, d23, d28
23082 +- add r2, sp, #608
23083 + vst1.8 {d8-d9}, [r2, : 128]
23084 +- add r2, sp, #560
23085 ++ add r2, sp, #528
23086 + vld1.8 {d8-d9}, [r2, : 128]
23087 + vmlal.s32 q7, d24, d9
23088 + vmlal.s32 q7, d25, d31
23089 +@@ -1414,36 +1377,36 @@
23090 + vmlal.s32 q0, d23, d26
23091 + vmlal.s32 q0, d24, d31
23092 + vmlal.s32 q0, d19, d20
23093 +- add r2, sp, #640
23094 ++ add r2, sp, #608
23095 + vld1.8 {d18-d19}, [r2, : 128]
23096 + vmlal.s32 q2, d18, d7
23097 +- vmlal.s32 q2, d19, d6
23098 + vmlal.s32 q5, d18, d6
23099 +- vmlal.s32 q5, d19, d21
23100 + vmlal.s32 q1, d18, d21
23101 +- vmlal.s32 q1, d19, d29
23102 + vmlal.s32 q0, d18, d28
23103 +- vmlal.s32 q0, d19, d9
23104 + vmlal.s32 q6, d18, d29
23105 ++ vmlal.s32 q2, d19, d6
23106 ++ vmlal.s32 q5, d19, d21
23107 ++ vmlal.s32 q1, d19, d29
23108 ++ vmlal.s32 q0, d19, d9
23109 + vmlal.s32 q6, d19, d28
23110 +- add r2, sp, #592
23111 ++ add r2, sp, #560
23112 + vld1.8 {d18-d19}, [r2, : 128]
23113 +- add r2, sp, #512
23114 ++ add r2, sp, #480
23115 + vld1.8 {d22-d23}, [r2, : 128]
23116 + vmlal.s32 q5, d19, d7
23117 + vmlal.s32 q0, d18, d21
23118 + vmlal.s32 q0, d19, d29
23119 + vmlal.s32 q6, d18, d6
23120 +- add r2, sp, #528
23121 ++ add r2, sp, #496
23122 + vld1.8 {d6-d7}, [r2, : 128]
23123 + vmlal.s32 q6, d19, d21
23124 +- add r2, sp, #576
23125 ++ add r2, sp, #544
23126 + vld1.8 {d18-d19}, [r2, : 128]
23127 + vmlal.s32 q0, d30, d8
23128 +- add r2, sp, #672
23129 ++ add r2, sp, #640
23130 + vld1.8 {d20-d21}, [r2, : 128]
23131 + vmlal.s32 q5, d30, d29
23132 +- add r2, sp, #608
23133 ++ add r2, sp, #576
23134 + vld1.8 {d24-d25}, [r2, : 128]
23135 + vmlal.s32 q1, d30, d28
23136 + vadd.i64 q13, q0, q11
23137 +@@ -1541,10 +1504,10 @@
23138 + sub r4, r4, #24
23139 + vst1.8 d0, [r2, : 64]
23140 + vst1.8 d1, [r4, : 64]
23141 +- ldr r2, [sp, #488]
23142 +- ldr r4, [sp, #492]
23143 ++ ldr r2, [sp, #456]
23144 ++ ldr r4, [sp, #460]
23145 + subs r5, r2, #1
23146 +- bge ._mainloop
23147 ++ bge .Lmainloop
23148 + add r1, r3, #144
23149 + add r2, r3, #336
23150 + vld1.8 {d0-d1}, [r1, : 128]!
23151 +@@ -1553,41 +1516,41 @@
23152 + vst1.8 {d0-d1}, [r2, : 128]!
23153 + vst1.8 {d2-d3}, [r2, : 128]!
23154 + vst1.8 d4, [r2, : 64]
23155 +- ldr r1, =0
23156 +-._invertloop:
23157 ++ movw r1, #0
23158 ++.Linvertloop:
23159 + add r2, r3, #144
23160 +- ldr r4, =0
23161 +- ldr r5, =2
23162 ++ movw r4, #0
23163 ++ movw r5, #2
23164 + cmp r1, #1
23165 +- ldreq r5, =1
23166 ++ moveq r5, #1
23167 + addeq r2, r3, #336
23168 + addeq r4, r3, #48
23169 + cmp r1, #2
23170 +- ldreq r5, =1
23171 ++ moveq r5, #1
23172 + addeq r2, r3, #48
23173 + cmp r1, #3
23174 +- ldreq r5, =5
23175 ++ moveq r5, #5
23176 + addeq r4, r3, #336
23177 + cmp r1, #4
23178 +- ldreq r5, =10
23179 ++ moveq r5, #10
23180 + cmp r1, #5
23181 +- ldreq r5, =20
23182 ++ moveq r5, #20
23183 + cmp r1, #6
23184 +- ldreq r5, =10
23185 ++ moveq r5, #10
23186 + addeq r2, r3, #336
23187 + addeq r4, r3, #336
23188 + cmp r1, #7
23189 +- ldreq r5, =50
23190 ++ moveq r5, #50
23191 + cmp r1, #8
23192 +- ldreq r5, =100
23193 ++ moveq r5, #100
23194 + cmp r1, #9
23195 +- ldreq r5, =50
23196 ++ moveq r5, #50
23197 + addeq r2, r3, #336
23198 + cmp r1, #10
23199 +- ldreq r5, =5
23200 ++ moveq r5, #5
23201 + addeq r2, r3, #48
23202 + cmp r1, #11
23203 +- ldreq r5, =0
23204 ++ moveq r5, #0
23205 + addeq r2, r3, #96
23206 + add r6, r3, #144
23207 + add r7, r3, #288
23208 +@@ -1598,8 +1561,8 @@
23209 + vst1.8 {d2-d3}, [r7, : 128]!
23210 + vst1.8 d4, [r7, : 64]
23211 + cmp r5, #0
23212 +- beq ._skipsquaringloop
23213 +-._squaringloop:
23214 ++ beq .Lskipsquaringloop
23215 ++.Lsquaringloop:
23216 + add r6, r3, #288
23217 + add r7, r3, #288
23218 + add r8, r3, #288
23219 +@@ -1611,7 +1574,7 @@
23220 + vld1.8 {d6-d7}, [r7, : 128]!
23221 + vld1.8 {d9}, [r7, : 64]
23222 + vld1.8 {d10-d11}, [r6, : 128]!
23223 +- add r7, sp, #416
23224 ++ add r7, sp, #384
23225 + vld1.8 {d12-d13}, [r6, : 128]!
23226 + vmul.i32 q7, q2, q0
23227 + vld1.8 {d8}, [r6, : 64]
23228 +@@ -1726,7 +1689,7 @@
23229 + vext.32 d10, d6, d6, #0
23230 + vmov.i32 q1, #0xffffffff
23231 + vshl.i64 q4, q1, #25
23232 +- add r7, sp, #512
23233 ++ add r7, sp, #480
23234 + vld1.8 {d14-d15}, [r7, : 128]
23235 + vadd.i64 q9, q2, q7
23236 + vshl.i64 q1, q1, #26
23237 +@@ -1735,7 +1698,7 @@
23238 + vadd.i64 q5, q5, q10
23239 + vand q9, q9, q1
23240 + vld1.8 {d16}, [r6, : 64]!
23241 +- add r6, sp, #528
23242 ++ add r6, sp, #496
23243 + vld1.8 {d20-d21}, [r6, : 128]
23244 + vadd.i64 q11, q5, q10
23245 + vsub.i64 q2, q2, q9
23246 +@@ -1789,8 +1752,8 @@
23247 + sub r6, r6, #32
23248 + vst1.8 d4, [r6, : 64]
23249 + subs r5, r5, #1
23250 +- bhi ._squaringloop
23251 +-._skipsquaringloop:
23252 ++ bhi .Lsquaringloop
23253 ++.Lskipsquaringloop:
23254 + mov r2, r2
23255 + add r5, r3, #288
23256 + add r6, r3, #144
23257 +@@ -1802,7 +1765,7 @@
23258 + vld1.8 {d6-d7}, [r5, : 128]!
23259 + vld1.8 {d9}, [r5, : 64]
23260 + vld1.8 {d10-d11}, [r2, : 128]!
23261 +- add r5, sp, #416
23262 ++ add r5, sp, #384
23263 + vld1.8 {d12-d13}, [r2, : 128]!
23264 + vmul.i32 q7, q2, q0
23265 + vld1.8 {d8}, [r2, : 64]
23266 +@@ -1917,7 +1880,7 @@
23267 + vext.32 d10, d6, d6, #0
23268 + vmov.i32 q1, #0xffffffff
23269 + vshl.i64 q4, q1, #25
23270 +- add r5, sp, #512
23271 ++ add r5, sp, #480
23272 + vld1.8 {d14-d15}, [r5, : 128]
23273 + vadd.i64 q9, q2, q7
23274 + vshl.i64 q1, q1, #26
23275 +@@ -1926,7 +1889,7 @@
23276 + vadd.i64 q5, q5, q10
23277 + vand q9, q9, q1
23278 + vld1.8 {d16}, [r2, : 64]!
23279 +- add r2, sp, #528
23280 ++ add r2, sp, #496
23281 + vld1.8 {d20-d21}, [r2, : 128]
23282 + vadd.i64 q11, q5, q10
23283 + vsub.i64 q2, q2, q9
23284 +@@ -1980,7 +1943,7 @@
23285 + sub r2, r2, #32
23286 + vst1.8 d4, [r2, : 64]
23287 + cmp r4, #0
23288 +- beq ._skippostcopy
23289 ++ beq .Lskippostcopy
23290 + add r2, r3, #144
23291 + mov r4, r4
23292 + vld1.8 {d0-d1}, [r2, : 128]!
23293 +@@ -1989,9 +1952,9 @@
23294 + vst1.8 {d0-d1}, [r4, : 128]!
23295 + vst1.8 {d2-d3}, [r4, : 128]!
23296 + vst1.8 d4, [r4, : 64]
23297 +-._skippostcopy:
23298 ++.Lskippostcopy:
23299 + cmp r1, #1
23300 +- bne ._skipfinalcopy
23301 ++ bne .Lskipfinalcopy
23302 + add r2, r3, #288
23303 + add r4, r3, #144
23304 + vld1.8 {d0-d1}, [r2, : 128]!
23305 +@@ -2000,10 +1963,10 @@
23306 + vst1.8 {d0-d1}, [r4, : 128]!
23307 + vst1.8 {d2-d3}, [r4, : 128]!
23308 + vst1.8 d4, [r4, : 64]
23309 +-._skipfinalcopy:
23310 ++.Lskipfinalcopy:
23311 + add r1, r1, #1
23312 + cmp r1, #12
23313 +- blo ._invertloop
23314 ++ blo .Linvertloop
23315 + add r1, r3, #144
23316 + ldr r2, [r1], #4
23317 + ldr r3, [r1], #4
23318 +@@ -2085,21 +2048,15 @@
23319 + add r8, r8, r10, LSL #12
23320 + mov r9, r10, LSR #20
23321 + add r1, r9, r1, LSL #6
23322 +- str r2, [r0], #4
23323 +- str r3, [r0], #4
23324 +- str r4, [r0], #4
23325 +- str r5, [r0], #4
23326 +- str r6, [r0], #4
23327 +- str r7, [r0], #4
23328 +- str r8, [r0], #4
23329 +- str r1, [r0]
23330 +- ldrd r4, [sp, #0]
23331 +- ldrd r6, [sp, #8]
23332 +- ldrd r8, [sp, #16]
23333 +- ldrd r10, [sp, #24]
23334 +- ldr r12, [sp, #480]
23335 +- ldr r14, [sp, #484]
23336 +- ldr r0, =0
23337 +- mov sp, r12
23338 +- vpop {q4, q5, q6, q7}
23339 +- bx lr
23340 ++ str r2, [r0]
23341 ++ str r3, [r0, #4]
23342 ++ str r4, [r0, #8]
23343 ++ str r5, [r0, #12]
23344 ++ str r6, [r0, #16]
23345 ++ str r7, [r0, #20]
23346 ++ str r8, [r0, #24]
23347 ++ str r1, [r0, #28]
23348 ++ movw r0, #0
23349 ++ mov sp, ip
23350 ++ pop {r4-r11, pc}
23351 ++ENDPROC(curve25519_neon)
23352 +diff --git a/arch/arm/crypto/curve25519-glue.c b/arch/arm/crypto/curve25519-glue.c
23353 +new file mode 100644
23354 +index 000000000000..2e9e12d2f642
23355 +--- /dev/null
23356 ++++ b/arch/arm/crypto/curve25519-glue.c
23357 +@@ -0,0 +1,127 @@
23358 ++// SPDX-License-Identifier: GPL-2.0 OR MIT
23359 ++/*
23360 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
23361 ++ *
23362 ++ * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This
23363 ++ * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been
23364 ++ * manually reworked for use in kernel space.
23365 ++ */
23366 ++
23367 ++#include <asm/hwcap.h>
23368 ++#include <asm/neon.h>
23369 ++#include <asm/simd.h>
23370 ++#include <crypto/internal/kpp.h>
23371 ++#include <crypto/internal/simd.h>
23372 ++#include <linux/types.h>
23373 ++#include <linux/module.h>
23374 ++#include <linux/init.h>
23375 ++#include <linux/jump_label.h>
23376 ++#include <crypto/curve25519.h>
23377 ++
23378 ++asmlinkage void curve25519_neon(u8 mypublic[CURVE25519_KEY_SIZE],
23379 ++ const u8 secret[CURVE25519_KEY_SIZE],
23380 ++ const u8 basepoint[CURVE25519_KEY_SIZE]);
23381 ++
23382 ++static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
23383 ++
23384 ++void curve25519_arch(u8 out[CURVE25519_KEY_SIZE],
23385 ++ const u8 scalar[CURVE25519_KEY_SIZE],
23386 ++ const u8 point[CURVE25519_KEY_SIZE])
23387 ++{
23388 ++ if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
23389 ++ kernel_neon_begin();
23390 ++ curve25519_neon(out, scalar, point);
23391 ++ kernel_neon_end();
23392 ++ } else {
23393 ++ curve25519_generic(out, scalar, point);
23394 ++ }
23395 ++}
23396 ++EXPORT_SYMBOL(curve25519_arch);
23397 ++
23398 ++static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
23399 ++ unsigned int len)
23400 ++{
23401 ++ u8 *secret = kpp_tfm_ctx(tfm);
23402 ++
23403 ++ if (!len)
23404 ++ curve25519_generate_secret(secret);
23405 ++ else if (len == CURVE25519_KEY_SIZE &&
23406 ++ crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE))
23407 ++ memcpy(secret, buf, CURVE25519_KEY_SIZE);
23408 ++ else
23409 ++ return -EINVAL;
23410 ++ return 0;
23411 ++}
23412 ++
23413 ++static int curve25519_compute_value(struct kpp_request *req)
23414 ++{
23415 ++ struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
23416 ++ const u8 *secret = kpp_tfm_ctx(tfm);
23417 ++ u8 public_key[CURVE25519_KEY_SIZE];
23418 ++ u8 buf[CURVE25519_KEY_SIZE];
23419 ++ int copied, nbytes;
23420 ++ u8 const *bp;
23421 ++
23422 ++ if (req->src) {
23423 ++ copied = sg_copy_to_buffer(req->src,
23424 ++ sg_nents_for_len(req->src,
23425 ++ CURVE25519_KEY_SIZE),
23426 ++ public_key, CURVE25519_KEY_SIZE);
23427 ++ if (copied != CURVE25519_KEY_SIZE)
23428 ++ return -EINVAL;
23429 ++ bp = public_key;
23430 ++ } else {
23431 ++ bp = curve25519_base_point;
23432 ++ }
23433 ++
23434 ++ curve25519_arch(buf, secret, bp);
23435 ++
23436 ++ /* might want less than we've got */
23437 ++ nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
23438 ++ copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
23439 ++ nbytes),
23440 ++ buf, nbytes);
23441 ++ if (copied != nbytes)
23442 ++ return -EINVAL;
23443 ++ return 0;
23444 ++}
23445 ++
23446 ++static unsigned int curve25519_max_size(struct crypto_kpp *tfm)
23447 ++{
23448 ++ return CURVE25519_KEY_SIZE;
23449 ++}
23450 ++
23451 ++static struct kpp_alg curve25519_alg = {
23452 ++ .base.cra_name = "curve25519",
23453 ++ .base.cra_driver_name = "curve25519-neon",
23454 ++ .base.cra_priority = 200,
23455 ++ .base.cra_module = THIS_MODULE,
23456 ++ .base.cra_ctxsize = CURVE25519_KEY_SIZE,
23457 ++
23458 ++ .set_secret = curve25519_set_secret,
23459 ++ .generate_public_key = curve25519_compute_value,
23460 ++ .compute_shared_secret = curve25519_compute_value,
23461 ++ .max_size = curve25519_max_size,
23462 ++};
23463 ++
23464 ++static int __init mod_init(void)
23465 ++{
23466 ++ if (elf_hwcap & HWCAP_NEON) {
23467 ++ static_branch_enable(&have_neon);
23468 ++ return crypto_register_kpp(&curve25519_alg);
23469 ++ }
23470 ++ return 0;
23471 ++}
23472 ++
23473 ++static void __exit mod_exit(void)
23474 ++{
23475 ++ if (elf_hwcap & HWCAP_NEON)
23476 ++ crypto_unregister_kpp(&curve25519_alg);
23477 ++}
23478 ++
23479 ++module_init(mod_init);
23480 ++module_exit(mod_exit);
23481 ++
23482 ++MODULE_ALIAS_CRYPTO("curve25519");
23483 ++MODULE_ALIAS_CRYPTO("curve25519-neon");
23484 ++MODULE_LICENSE("GPL v2");
23485 +--
23486 +cgit v1.2.3-4-ga26e
23487 +
23488 +
23489 +From 4345ed7136e97915ad098fbfe165096a3e6d26c6 Mon Sep 17 00:00:00 2001
23490 +From: Ard Biesheuvel <ardb@××××××.org>
23491 +Date: Fri, 8 Nov 2019 13:22:39 +0100
23492 +Subject: crypto: chacha20poly1305 - import construction and selftest from Zinc
23493 +
23494 +commit ed20078b7e3331e82828be357147af6a3282e4ce upstream.
23495 +
23496 +This incorporates the chacha20poly1305 from the Zinc library, retaining
23497 +the library interface, but replacing the implementation with calls into
23498 +the code that already existed in the kernel's crypto API.
23499 +
23500 +Note that this library API does not implement RFC7539 fully, given that
23501 +it is limited to 64-bit nonces. (The 96-bit nonce version that was part
23502 +of the selftest only has been removed, along with the 96-bit nonce test
23503 +vectors that only tested the selftest but not the actual library itself)
23504 +
23505 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
23506 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
23507 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
23508 +---
23509 + include/crypto/chacha20poly1305.h | 37 +
23510 + lib/crypto/Kconfig | 7 +
23511 + lib/crypto/Makefile | 4 +
23512 + lib/crypto/chacha20poly1305-selftest.c | 7348 ++++++++++++++++++++++++++++++++
23513 + lib/crypto/chacha20poly1305.c | 219 +
23514 + 5 files changed, 7615 insertions(+)
23515 + create mode 100644 include/crypto/chacha20poly1305.h
23516 + create mode 100644 lib/crypto/chacha20poly1305-selftest.c
23517 + create mode 100644 lib/crypto/chacha20poly1305.c
23518 +
23519 +diff --git a/include/crypto/chacha20poly1305.h b/include/crypto/chacha20poly1305.h
23520 +new file mode 100644
23521 +index 000000000000..ad3b1de58df8
23522 +--- /dev/null
23523 ++++ b/include/crypto/chacha20poly1305.h
23524 +@@ -0,0 +1,37 @@
23525 ++/* SPDX-License-Identifier: GPL-2.0 OR MIT */
23526 ++/*
23527 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
23528 ++ */
23529 ++
23530 ++#ifndef __CHACHA20POLY1305_H
23531 ++#define __CHACHA20POLY1305_H
23532 ++
23533 ++#include <linux/types.h>
23534 ++
23535 ++enum chacha20poly1305_lengths {
23536 ++ XCHACHA20POLY1305_NONCE_SIZE = 24,
23537 ++ CHACHA20POLY1305_KEY_SIZE = 32,
23538 ++ CHACHA20POLY1305_AUTHTAG_SIZE = 16
23539 ++};
23540 ++
23541 ++void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
23542 ++ const u8 *ad, const size_t ad_len,
23543 ++ const u64 nonce,
23544 ++ const u8 key[CHACHA20POLY1305_KEY_SIZE]);
23545 ++
23546 ++bool __must_check
23547 ++chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
23548 ++ const u8 *ad, const size_t ad_len, const u64 nonce,
23549 ++ const u8 key[CHACHA20POLY1305_KEY_SIZE]);
23550 ++
23551 ++void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
23552 ++ const u8 *ad, const size_t ad_len,
23553 ++ const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
23554 ++ const u8 key[CHACHA20POLY1305_KEY_SIZE]);
23555 ++
23556 ++bool __must_check xchacha20poly1305_decrypt(
23557 ++ u8 *dst, const u8 *src, const size_t src_len, const u8 *ad,
23558 ++ const size_t ad_len, const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
23559 ++ const u8 key[CHACHA20POLY1305_KEY_SIZE]);
23560 ++
23561 ++#endif /* __CHACHA20POLY1305_H */
23562 +diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
23563 +index b1d830dc1c9e..0b2c4fce26d9 100644
23564 +--- a/lib/crypto/Kconfig
23565 ++++ b/lib/crypto/Kconfig
23566 +@@ -119,5 +119,12 @@ config CRYPTO_LIB_POLY1305
23567 + by either the generic implementation or an arch-specific one, if one
23568 + is available and enabled.
23569 +
23570 ++config CRYPTO_LIB_CHACHA20POLY1305
23571 ++ tristate "ChaCha20-Poly1305 AEAD support (8-byte nonce library version)"
23572 ++ depends on CRYPTO_ARCH_HAVE_LIB_CHACHA || !CRYPTO_ARCH_HAVE_LIB_CHACHA
23573 ++ depends on CRYPTO_ARCH_HAVE_LIB_POLY1305 || !CRYPTO_ARCH_HAVE_LIB_POLY1305
23574 ++ select CRYPTO_LIB_CHACHA
23575 ++ select CRYPTO_LIB_POLY1305
23576 ++
23577 + config CRYPTO_LIB_SHA256
23578 + tristate
23579 +diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
23580 +index 273c55d5e147..34a701ab8b92 100644
23581 +--- a/lib/crypto/Makefile
23582 ++++ b/lib/crypto/Makefile
23583 +@@ -16,6 +16,9 @@ libblake2s-generic-y += blake2s-generic.o
23584 + obj-$(CONFIG_CRYPTO_LIB_BLAKE2S) += libblake2s.o
23585 + libblake2s-y += blake2s.o
23586 +
23587 ++obj-$(CONFIG_CRYPTO_LIB_CHACHA20POLY1305) += libchacha20poly1305.o
23588 ++libchacha20poly1305-y += chacha20poly1305.o
23589 ++
23590 + obj-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += libcurve25519.o
23591 + libcurve25519-y := curve25519-fiat32.o
23592 + libcurve25519-$(CONFIG_ARCH_SUPPORTS_INT128) := curve25519-hacl64.o
23593 +@@ -32,4 +35,5 @@ libsha256-y := sha256.o
23594 +
23595 + ifneq ($(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS),y)
23596 + libblake2s-y += blake2s-selftest.o
23597 ++libchacha20poly1305-y += chacha20poly1305-selftest.o
23598 + endif
23599 +diff --git a/lib/crypto/chacha20poly1305-selftest.c b/lib/crypto/chacha20poly1305-selftest.c
23600 +new file mode 100644
23601 +index 000000000000..d1ed0f27cfdb
23602 +--- /dev/null
23603 ++++ b/lib/crypto/chacha20poly1305-selftest.c
23604 +@@ -0,0 +1,7348 @@
23605 ++// SPDX-License-Identifier: GPL-2.0 OR MIT
23606 ++/*
23607 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
23608 ++ */
23609 ++
23610 ++#include <crypto/chacha20poly1305.h>
23611 ++#include <crypto/poly1305.h>
23612 ++
23613 ++#include <asm/unaligned.h>
23614 ++#include <linux/bug.h>
23615 ++#include <linux/init.h>
23616 ++#include <linux/mm.h>
23617 ++#include <linux/kernel.h>
23618 ++#include <linux/slab.h>
23619 ++
23620 ++struct chacha20poly1305_testvec {
23621 ++ const u8 *input, *output, *assoc, *nonce, *key;
23622 ++ size_t ilen, alen, nlen;
23623 ++ bool failure;
23624 ++};
23625 ++
23626 ++/* The first of these are the ChaCha20-Poly1305 AEAD test vectors from RFC7539
23627 ++ * 2.8.2. After they are generated by reference implementations. And the final
23628 ++ * marked ones are taken from wycheproof, but we only do these for the encrypt
23629 ++ * side, because mostly we're stressing the primitives rather than the actual
23630 ++ * chapoly construction.
23631 ++ */
23632 ++
23633 ++static const u8 enc_input001[] __initconst = {
23634 ++ 0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74,
23635 ++ 0x2d, 0x44, 0x72, 0x61, 0x66, 0x74, 0x73, 0x20,
23636 ++ 0x61, 0x72, 0x65, 0x20, 0x64, 0x72, 0x61, 0x66,
23637 ++ 0x74, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65,
23638 ++ 0x6e, 0x74, 0x73, 0x20, 0x76, 0x61, 0x6c, 0x69,
23639 ++ 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x20,
23640 ++ 0x6d, 0x61, 0x78, 0x69, 0x6d, 0x75, 0x6d, 0x20,
23641 ++ 0x6f, 0x66, 0x20, 0x73, 0x69, 0x78, 0x20, 0x6d,
23642 ++ 0x6f, 0x6e, 0x74, 0x68, 0x73, 0x20, 0x61, 0x6e,
23643 ++ 0x64, 0x20, 0x6d, 0x61, 0x79, 0x20, 0x62, 0x65,
23644 ++ 0x20, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64,
23645 ++ 0x2c, 0x20, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63,
23646 ++ 0x65, 0x64, 0x2c, 0x20, 0x6f, 0x72, 0x20, 0x6f,
23647 ++ 0x62, 0x73, 0x6f, 0x6c, 0x65, 0x74, 0x65, 0x64,
23648 ++ 0x20, 0x62, 0x79, 0x20, 0x6f, 0x74, 0x68, 0x65,
23649 ++ 0x72, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65,
23650 ++ 0x6e, 0x74, 0x73, 0x20, 0x61, 0x74, 0x20, 0x61,
23651 ++ 0x6e, 0x79, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x2e,
23652 ++ 0x20, 0x49, 0x74, 0x20, 0x69, 0x73, 0x20, 0x69,
23653 ++ 0x6e, 0x61, 0x70, 0x70, 0x72, 0x6f, 0x70, 0x72,
23654 ++ 0x69, 0x61, 0x74, 0x65, 0x20, 0x74, 0x6f, 0x20,
23655 ++ 0x75, 0x73, 0x65, 0x20, 0x49, 0x6e, 0x74, 0x65,
23656 ++ 0x72, 0x6e, 0x65, 0x74, 0x2d, 0x44, 0x72, 0x61,
23657 ++ 0x66, 0x74, 0x73, 0x20, 0x61, 0x73, 0x20, 0x72,
23658 ++ 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65,
23659 ++ 0x20, 0x6d, 0x61, 0x74, 0x65, 0x72, 0x69, 0x61,
23660 ++ 0x6c, 0x20, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20,
23661 ++ 0x63, 0x69, 0x74, 0x65, 0x20, 0x74, 0x68, 0x65,
23662 ++ 0x6d, 0x20, 0x6f, 0x74, 0x68, 0x65, 0x72, 0x20,
23663 ++ 0x74, 0x68, 0x61, 0x6e, 0x20, 0x61, 0x73, 0x20,
23664 ++ 0x2f, 0xe2, 0x80, 0x9c, 0x77, 0x6f, 0x72, 0x6b,
23665 ++ 0x20, 0x69, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x67,
23666 ++ 0x72, 0x65, 0x73, 0x73, 0x2e, 0x2f, 0xe2, 0x80,
23667 ++ 0x9d
23668 ++};
23669 ++static const u8 enc_output001[] __initconst = {
23670 ++ 0x64, 0xa0, 0x86, 0x15, 0x75, 0x86, 0x1a, 0xf4,
23671 ++ 0x60, 0xf0, 0x62, 0xc7, 0x9b, 0xe6, 0x43, 0xbd,
23672 ++ 0x5e, 0x80, 0x5c, 0xfd, 0x34, 0x5c, 0xf3, 0x89,
23673 ++ 0xf1, 0x08, 0x67, 0x0a, 0xc7, 0x6c, 0x8c, 0xb2,
23674 ++ 0x4c, 0x6c, 0xfc, 0x18, 0x75, 0x5d, 0x43, 0xee,
23675 ++ 0xa0, 0x9e, 0xe9, 0x4e, 0x38, 0x2d, 0x26, 0xb0,
23676 ++ 0xbd, 0xb7, 0xb7, 0x3c, 0x32, 0x1b, 0x01, 0x00,
23677 ++ 0xd4, 0xf0, 0x3b, 0x7f, 0x35, 0x58, 0x94, 0xcf,
23678 ++ 0x33, 0x2f, 0x83, 0x0e, 0x71, 0x0b, 0x97, 0xce,
23679 ++ 0x98, 0xc8, 0xa8, 0x4a, 0xbd, 0x0b, 0x94, 0x81,
23680 ++ 0x14, 0xad, 0x17, 0x6e, 0x00, 0x8d, 0x33, 0xbd,
23681 ++ 0x60, 0xf9, 0x82, 0xb1, 0xff, 0x37, 0xc8, 0x55,
23682 ++ 0x97, 0x97, 0xa0, 0x6e, 0xf4, 0xf0, 0xef, 0x61,
23683 ++ 0xc1, 0x86, 0x32, 0x4e, 0x2b, 0x35, 0x06, 0x38,
23684 ++ 0x36, 0x06, 0x90, 0x7b, 0x6a, 0x7c, 0x02, 0xb0,
23685 ++ 0xf9, 0xf6, 0x15, 0x7b, 0x53, 0xc8, 0x67, 0xe4,
23686 ++ 0xb9, 0x16, 0x6c, 0x76, 0x7b, 0x80, 0x4d, 0x46,
23687 ++ 0xa5, 0x9b, 0x52, 0x16, 0xcd, 0xe7, 0xa4, 0xe9,
23688 ++ 0x90, 0x40, 0xc5, 0xa4, 0x04, 0x33, 0x22, 0x5e,
23689 ++ 0xe2, 0x82, 0xa1, 0xb0, 0xa0, 0x6c, 0x52, 0x3e,
23690 ++ 0xaf, 0x45, 0x34, 0xd7, 0xf8, 0x3f, 0xa1, 0x15,
23691 ++ 0x5b, 0x00, 0x47, 0x71, 0x8c, 0xbc, 0x54, 0x6a,
23692 ++ 0x0d, 0x07, 0x2b, 0x04, 0xb3, 0x56, 0x4e, 0xea,
23693 ++ 0x1b, 0x42, 0x22, 0x73, 0xf5, 0x48, 0x27, 0x1a,
23694 ++ 0x0b, 0xb2, 0x31, 0x60, 0x53, 0xfa, 0x76, 0x99,
23695 ++ 0x19, 0x55, 0xeb, 0xd6, 0x31, 0x59, 0x43, 0x4e,
23696 ++ 0xce, 0xbb, 0x4e, 0x46, 0x6d, 0xae, 0x5a, 0x10,
23697 ++ 0x73, 0xa6, 0x72, 0x76, 0x27, 0x09, 0x7a, 0x10,
23698 ++ 0x49, 0xe6, 0x17, 0xd9, 0x1d, 0x36, 0x10, 0x94,
23699 ++ 0xfa, 0x68, 0xf0, 0xff, 0x77, 0x98, 0x71, 0x30,
23700 ++ 0x30, 0x5b, 0xea, 0xba, 0x2e, 0xda, 0x04, 0xdf,
23701 ++ 0x99, 0x7b, 0x71, 0x4d, 0x6c, 0x6f, 0x2c, 0x29,
23702 ++ 0xa6, 0xad, 0x5c, 0xb4, 0x02, 0x2b, 0x02, 0x70,
23703 ++ 0x9b, 0xee, 0xad, 0x9d, 0x67, 0x89, 0x0c, 0xbb,
23704 ++ 0x22, 0x39, 0x23, 0x36, 0xfe, 0xa1, 0x85, 0x1f,
23705 ++ 0x38
23706 ++};
23707 ++static const u8 enc_assoc001[] __initconst = {
23708 ++ 0xf3, 0x33, 0x88, 0x86, 0x00, 0x00, 0x00, 0x00,
23709 ++ 0x00, 0x00, 0x4e, 0x91
23710 ++};
23711 ++static const u8 enc_nonce001[] __initconst = {
23712 ++ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08
23713 ++};
23714 ++static const u8 enc_key001[] __initconst = {
23715 ++ 0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a,
23716 ++ 0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0,
23717 ++ 0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09,
23718 ++ 0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0
23719 ++};
23720 ++
23721 ++static const u8 enc_input002[] __initconst = { };
23722 ++static const u8 enc_output002[] __initconst = {
23723 ++ 0xea, 0xe0, 0x1e, 0x9e, 0x2c, 0x91, 0xaa, 0xe1,
23724 ++ 0xdb, 0x5d, 0x99, 0x3f, 0x8a, 0xf7, 0x69, 0x92
23725 ++};
23726 ++static const u8 enc_assoc002[] __initconst = { };
23727 ++static const u8 enc_nonce002[] __initconst = {
23728 ++ 0xca, 0xbf, 0x33, 0x71, 0x32, 0x45, 0x77, 0x8e
23729 ++};
23730 ++static const u8 enc_key002[] __initconst = {
23731 ++ 0x4c, 0xf5, 0x96, 0x83, 0x38, 0xe6, 0xae, 0x7f,
23732 ++ 0x2d, 0x29, 0x25, 0x76, 0xd5, 0x75, 0x27, 0x86,
23733 ++ 0x91, 0x9a, 0x27, 0x7a, 0xfb, 0x46, 0xc5, 0xef,
23734 ++ 0x94, 0x81, 0x79, 0x57, 0x14, 0x59, 0x40, 0x68
23735 ++};
23736 ++
23737 ++static const u8 enc_input003[] __initconst = { };
23738 ++static const u8 enc_output003[] __initconst = {
23739 ++ 0xdd, 0x6b, 0x3b, 0x82, 0xce, 0x5a, 0xbd, 0xd6,
23740 ++ 0xa9, 0x35, 0x83, 0xd8, 0x8c, 0x3d, 0x85, 0x77
23741 ++};
23742 ++static const u8 enc_assoc003[] __initconst = {
23743 ++ 0x33, 0x10, 0x41, 0x12, 0x1f, 0xf3, 0xd2, 0x6b
23744 ++};
23745 ++static const u8 enc_nonce003[] __initconst = {
23746 ++ 0x3d, 0x86, 0xb5, 0x6b, 0xc8, 0xa3, 0x1f, 0x1d
23747 ++};
23748 ++static const u8 enc_key003[] __initconst = {
23749 ++ 0x2d, 0xb0, 0x5d, 0x40, 0xc8, 0xed, 0x44, 0x88,
23750 ++ 0x34, 0xd1, 0x13, 0xaf, 0x57, 0xa1, 0xeb, 0x3a,
23751 ++ 0x2a, 0x80, 0x51, 0x36, 0xec, 0x5b, 0xbc, 0x08,
23752 ++ 0x93, 0x84, 0x21, 0xb5, 0x13, 0x88, 0x3c, 0x0d
23753 ++};
23754 ++
23755 ++static const u8 enc_input004[] __initconst = {
23756 ++ 0xa4
23757 ++};
23758 ++static const u8 enc_output004[] __initconst = {
23759 ++ 0xb7, 0x1b, 0xb0, 0x73, 0x59, 0xb0, 0x84, 0xb2,
23760 ++ 0x6d, 0x8e, 0xab, 0x94, 0x31, 0xa1, 0xae, 0xac,
23761 ++ 0x89
23762 ++};
23763 ++static const u8 enc_assoc004[] __initconst = {
23764 ++ 0x6a, 0xe2, 0xad, 0x3f, 0x88, 0x39, 0x5a, 0x40
23765 ++};
23766 ++static const u8 enc_nonce004[] __initconst = {
23767 ++ 0xd2, 0x32, 0x1f, 0x29, 0x28, 0xc6, 0xc4, 0xc4
23768 ++};
23769 ++static const u8 enc_key004[] __initconst = {
23770 ++ 0x4b, 0x28, 0x4b, 0xa3, 0x7b, 0xbe, 0xe9, 0xf8,
23771 ++ 0x31, 0x80, 0x82, 0xd7, 0xd8, 0xe8, 0xb5, 0xa1,
23772 ++ 0xe2, 0x18, 0x18, 0x8a, 0x9c, 0xfa, 0xa3, 0x3d,
23773 ++ 0x25, 0x71, 0x3e, 0x40, 0xbc, 0x54, 0x7a, 0x3e
23774 ++};
23775 ++
23776 ++static const u8 enc_input005[] __initconst = {
23777 ++ 0x2d
23778 ++};
23779 ++static const u8 enc_output005[] __initconst = {
23780 ++ 0xbf, 0xe1, 0x5b, 0x0b, 0xdb, 0x6b, 0xf5, 0x5e,
23781 ++ 0x6c, 0x5d, 0x84, 0x44, 0x39, 0x81, 0xc1, 0x9c,
23782 ++ 0xac
23783 ++};
23784 ++static const u8 enc_assoc005[] __initconst = { };
23785 ++static const u8 enc_nonce005[] __initconst = {
23786 ++ 0x20, 0x1c, 0xaa, 0x5f, 0x9c, 0xbf, 0x92, 0x30
23787 ++};
23788 ++static const u8 enc_key005[] __initconst = {
23789 ++ 0x66, 0xca, 0x9c, 0x23, 0x2a, 0x4b, 0x4b, 0x31,
23790 ++ 0x0e, 0x92, 0x89, 0x8b, 0xf4, 0x93, 0xc7, 0x87,
23791 ++ 0x98, 0xa3, 0xd8, 0x39, 0xf8, 0xf4, 0xa7, 0x01,
23792 ++ 0xc0, 0x2e, 0x0a, 0xa6, 0x7e, 0x5a, 0x78, 0x87
23793 ++};
23794 ++
23795 ++static const u8 enc_input006[] __initconst = {
23796 ++ 0x33, 0x2f, 0x94, 0xc1, 0xa4, 0xef, 0xcc, 0x2a,
23797 ++ 0x5b, 0xa6, 0xe5, 0x8f, 0x1d, 0x40, 0xf0, 0x92,
23798 ++ 0x3c, 0xd9, 0x24, 0x11, 0xa9, 0x71, 0xf9, 0x37,
23799 ++ 0x14, 0x99, 0xfa, 0xbe, 0xe6, 0x80, 0xde, 0x50,
23800 ++ 0xc9, 0x96, 0xd4, 0xb0, 0xec, 0x9e, 0x17, 0xec,
23801 ++ 0xd2, 0x5e, 0x72, 0x99, 0xfc, 0x0a, 0xe1, 0xcb,
23802 ++ 0x48, 0xd2, 0x85, 0xdd, 0x2f, 0x90, 0xe0, 0x66,
23803 ++ 0x3b, 0xe6, 0x20, 0x74, 0xbe, 0x23, 0x8f, 0xcb,
23804 ++ 0xb4, 0xe4, 0xda, 0x48, 0x40, 0xa6, 0xd1, 0x1b,
23805 ++ 0xc7, 0x42, 0xce, 0x2f, 0x0c, 0xa6, 0x85, 0x6e,
23806 ++ 0x87, 0x37, 0x03, 0xb1, 0x7c, 0x25, 0x96, 0xa3,
23807 ++ 0x05, 0xd8, 0xb0, 0xf4, 0xed, 0xea, 0xc2, 0xf0,
23808 ++ 0x31, 0x98, 0x6c, 0xd1, 0x14, 0x25, 0xc0, 0xcb,
23809 ++ 0x01, 0x74, 0xd0, 0x82, 0xf4, 0x36, 0xf5, 0x41,
23810 ++ 0xd5, 0xdc, 0xca, 0xc5, 0xbb, 0x98, 0xfe, 0xfc,
23811 ++ 0x69, 0x21, 0x70, 0xd8, 0xa4, 0x4b, 0xc8, 0xde,
23812 ++ 0x8f
23813 ++};
23814 ++static const u8 enc_output006[] __initconst = {
23815 ++ 0x8b, 0x06, 0xd3, 0x31, 0xb0, 0x93, 0x45, 0xb1,
23816 ++ 0x75, 0x6e, 0x26, 0xf9, 0x67, 0xbc, 0x90, 0x15,
23817 ++ 0x81, 0x2c, 0xb5, 0xf0, 0xc6, 0x2b, 0xc7, 0x8c,
23818 ++ 0x56, 0xd1, 0xbf, 0x69, 0x6c, 0x07, 0xa0, 0xda,
23819 ++ 0x65, 0x27, 0xc9, 0x90, 0x3d, 0xef, 0x4b, 0x11,
23820 ++ 0x0f, 0x19, 0x07, 0xfd, 0x29, 0x92, 0xd9, 0xc8,
23821 ++ 0xf7, 0x99, 0x2e, 0x4a, 0xd0, 0xb8, 0x2c, 0xdc,
23822 ++ 0x93, 0xf5, 0x9e, 0x33, 0x78, 0xd1, 0x37, 0xc3,
23823 ++ 0x66, 0xd7, 0x5e, 0xbc, 0x44, 0xbf, 0x53, 0xa5,
23824 ++ 0xbc, 0xc4, 0xcb, 0x7b, 0x3a, 0x8e, 0x7f, 0x02,
23825 ++ 0xbd, 0xbb, 0xe7, 0xca, 0xa6, 0x6c, 0x6b, 0x93,
23826 ++ 0x21, 0x93, 0x10, 0x61, 0xe7, 0x69, 0xd0, 0x78,
23827 ++ 0xf3, 0x07, 0x5a, 0x1a, 0x8f, 0x73, 0xaa, 0xb1,
23828 ++ 0x4e, 0xd3, 0xda, 0x4f, 0xf3, 0x32, 0xe1, 0x66,
23829 ++ 0x3e, 0x6c, 0xc6, 0x13, 0xba, 0x06, 0x5b, 0xfc,
23830 ++ 0x6a, 0xe5, 0x6f, 0x60, 0xfb, 0x07, 0x40, 0xb0,
23831 ++ 0x8c, 0x9d, 0x84, 0x43, 0x6b, 0xc1, 0xf7, 0x8d,
23832 ++ 0x8d, 0x31, 0xf7, 0x7a, 0x39, 0x4d, 0x8f, 0x9a,
23833 ++ 0xeb
23834 ++};
23835 ++static const u8 enc_assoc006[] __initconst = {
23836 ++ 0x70, 0xd3, 0x33, 0xf3, 0x8b, 0x18, 0x0b
23837 ++};
23838 ++static const u8 enc_nonce006[] __initconst = {
23839 ++ 0xdf, 0x51, 0x84, 0x82, 0x42, 0x0c, 0x75, 0x9c
23840 ++};
23841 ++static const u8 enc_key006[] __initconst = {
23842 ++ 0x68, 0x7b, 0x8d, 0x8e, 0xe3, 0xc4, 0xdd, 0xae,
23843 ++ 0xdf, 0x72, 0x7f, 0x53, 0x72, 0x25, 0x1e, 0x78,
23844 ++ 0x91, 0xcb, 0x69, 0x76, 0x1f, 0x49, 0x93, 0xf9,
23845 ++ 0x6f, 0x21, 0xcc, 0x39, 0x9c, 0xad, 0xb1, 0x01
23846 ++};
23847 ++
23848 ++static const u8 enc_input007[] __initconst = {
23849 ++ 0x9b, 0x18, 0xdb, 0xdd, 0x9a, 0x0f, 0x3e, 0xa5,
23850 ++ 0x15, 0x17, 0xde, 0xdf, 0x08, 0x9d, 0x65, 0x0a,
23851 ++ 0x67, 0x30, 0x12, 0xe2, 0x34, 0x77, 0x4b, 0xc1,
23852 ++ 0xd9, 0xc6, 0x1f, 0xab, 0xc6, 0x18, 0x50, 0x17,
23853 ++ 0xa7, 0x9d, 0x3c, 0xa6, 0xc5, 0x35, 0x8c, 0x1c,
23854 ++ 0xc0, 0xa1, 0x7c, 0x9f, 0x03, 0x89, 0xca, 0xe1,
23855 ++ 0xe6, 0xe9, 0xd4, 0xd3, 0x88, 0xdb, 0xb4, 0x51,
23856 ++ 0x9d, 0xec, 0xb4, 0xfc, 0x52, 0xee, 0x6d, 0xf1,
23857 ++ 0x75, 0x42, 0xc6, 0xfd, 0xbd, 0x7a, 0x8e, 0x86,
23858 ++ 0xfc, 0x44, 0xb3, 0x4f, 0xf3, 0xea, 0x67, 0x5a,
23859 ++ 0x41, 0x13, 0xba, 0xb0, 0xdc, 0xe1, 0xd3, 0x2a,
23860 ++ 0x7c, 0x22, 0xb3, 0xca, 0xac, 0x6a, 0x37, 0x98,
23861 ++ 0x3e, 0x1d, 0x40, 0x97, 0xf7, 0x9b, 0x1d, 0x36,
23862 ++ 0x6b, 0xb3, 0x28, 0xbd, 0x60, 0x82, 0x47, 0x34,
23863 ++ 0xaa, 0x2f, 0x7d, 0xe9, 0xa8, 0x70, 0x81, 0x57,
23864 ++ 0xd4, 0xb9, 0x77, 0x0a, 0x9d, 0x29, 0xa7, 0x84,
23865 ++ 0x52, 0x4f, 0xc2, 0x4a, 0x40, 0x3b, 0x3c, 0xd4,
23866 ++ 0xc9, 0x2a, 0xdb, 0x4a, 0x53, 0xc4, 0xbe, 0x80,
23867 ++ 0xe9, 0x51, 0x7f, 0x8f, 0xc7, 0xa2, 0xce, 0x82,
23868 ++ 0x5c, 0x91, 0x1e, 0x74, 0xd9, 0xd0, 0xbd, 0xd5,
23869 ++ 0xf3, 0xfd, 0xda, 0x4d, 0x25, 0xb4, 0xbb, 0x2d,
23870 ++ 0xac, 0x2f, 0x3d, 0x71, 0x85, 0x7b, 0xcf, 0x3c,
23871 ++ 0x7b, 0x3e, 0x0e, 0x22, 0x78, 0x0c, 0x29, 0xbf,
23872 ++ 0xe4, 0xf4, 0x57, 0xb3, 0xcb, 0x49, 0xa0, 0xfc,
23873 ++ 0x1e, 0x05, 0x4e, 0x16, 0xbc, 0xd5, 0xa8, 0xa3,
23874 ++ 0xee, 0x05, 0x35, 0xc6, 0x7c, 0xab, 0x60, 0x14,
23875 ++ 0x55, 0x1a, 0x8e, 0xc5, 0x88, 0x5d, 0xd5, 0x81,
23876 ++ 0xc2, 0x81, 0xa5, 0xc4, 0x60, 0xdb, 0xaf, 0x77,
23877 ++ 0x91, 0xe1, 0xce, 0xa2, 0x7e, 0x7f, 0x42, 0xe3,
23878 ++ 0xb0, 0x13, 0x1c, 0x1f, 0x25, 0x60, 0x21, 0xe2,
23879 ++ 0x40, 0x5f, 0x99, 0xb7, 0x73, 0xec, 0x9b, 0x2b,
23880 ++ 0xf0, 0x65, 0x11, 0xc8, 0xd0, 0x0a, 0x9f, 0xd3
23881 ++};
23882 ++static const u8 enc_output007[] __initconst = {
23883 ++ 0x85, 0x04, 0xc2, 0xed, 0x8d, 0xfd, 0x97, 0x5c,
23884 ++ 0xd2, 0xb7, 0xe2, 0xc1, 0x6b, 0xa3, 0xba, 0xf8,
23885 ++ 0xc9, 0x50, 0xc3, 0xc6, 0xa5, 0xe3, 0xa4, 0x7c,
23886 ++ 0xc3, 0x23, 0x49, 0x5e, 0xa9, 0xb9, 0x32, 0xeb,
23887 ++ 0x8a, 0x7c, 0xca, 0xe5, 0xec, 0xfb, 0x7c, 0xc0,
23888 ++ 0xcb, 0x7d, 0xdc, 0x2c, 0x9d, 0x92, 0x55, 0x21,
23889 ++ 0x0a, 0xc8, 0x43, 0x63, 0x59, 0x0a, 0x31, 0x70,
23890 ++ 0x82, 0x67, 0x41, 0x03, 0xf8, 0xdf, 0xf2, 0xac,
23891 ++ 0xa7, 0x02, 0xd4, 0xd5, 0x8a, 0x2d, 0xc8, 0x99,
23892 ++ 0x19, 0x66, 0xd0, 0xf6, 0x88, 0x2c, 0x77, 0xd9,
23893 ++ 0xd4, 0x0d, 0x6c, 0xbd, 0x98, 0xde, 0xe7, 0x7f,
23894 ++ 0xad, 0x7e, 0x8a, 0xfb, 0xe9, 0x4b, 0xe5, 0xf7,
23895 ++ 0xe5, 0x50, 0xa0, 0x90, 0x3f, 0xd6, 0x22, 0x53,
23896 ++ 0xe3, 0xfe, 0x1b, 0xcc, 0x79, 0x3b, 0xec, 0x12,
23897 ++ 0x47, 0x52, 0xa7, 0xd6, 0x04, 0xe3, 0x52, 0xe6,
23898 ++ 0x93, 0x90, 0x91, 0x32, 0x73, 0x79, 0xb8, 0xd0,
23899 ++ 0x31, 0xde, 0x1f, 0x9f, 0x2f, 0x05, 0x38, 0x54,
23900 ++ 0x2f, 0x35, 0x04, 0x39, 0xe0, 0xa7, 0xba, 0xc6,
23901 ++ 0x52, 0xf6, 0x37, 0x65, 0x4c, 0x07, 0xa9, 0x7e,
23902 ++ 0xb3, 0x21, 0x6f, 0x74, 0x8c, 0xc9, 0xde, 0xdb,
23903 ++ 0x65, 0x1b, 0x9b, 0xaa, 0x60, 0xb1, 0x03, 0x30,
23904 ++ 0x6b, 0xb2, 0x03, 0xc4, 0x1c, 0x04, 0xf8, 0x0f,
23905 ++ 0x64, 0xaf, 0x46, 0xe4, 0x65, 0x99, 0x49, 0xe2,
23906 ++ 0xea, 0xce, 0x78, 0x00, 0xd8, 0x8b, 0xd5, 0x2e,
23907 ++ 0xcf, 0xfc, 0x40, 0x49, 0xe8, 0x58, 0xdc, 0x34,
23908 ++ 0x9c, 0x8c, 0x61, 0xbf, 0x0a, 0x8e, 0xec, 0x39,
23909 ++ 0xa9, 0x30, 0x05, 0x5a, 0xd2, 0x56, 0x01, 0xc7,
23910 ++ 0xda, 0x8f, 0x4e, 0xbb, 0x43, 0xa3, 0x3a, 0xf9,
23911 ++ 0x15, 0x2a, 0xd0, 0xa0, 0x7a, 0x87, 0x34, 0x82,
23912 ++ 0xfe, 0x8a, 0xd1, 0x2d, 0x5e, 0xc7, 0xbf, 0x04,
23913 ++ 0x53, 0x5f, 0x3b, 0x36, 0xd4, 0x25, 0x5c, 0x34,
23914 ++ 0x7a, 0x8d, 0xd5, 0x05, 0xce, 0x72, 0xca, 0xef,
23915 ++ 0x7a, 0x4b, 0xbc, 0xb0, 0x10, 0x5c, 0x96, 0x42,
23916 ++ 0x3a, 0x00, 0x98, 0xcd, 0x15, 0xe8, 0xb7, 0x53
23917 ++};
23918 ++static const u8 enc_assoc007[] __initconst = { };
23919 ++static const u8 enc_nonce007[] __initconst = {
23920 ++ 0xde, 0x7b, 0xef, 0xc3, 0x65, 0x1b, 0x68, 0xb0
23921 ++};
23922 ++static const u8 enc_key007[] __initconst = {
23923 ++ 0x8d, 0xb8, 0x91, 0x48, 0xf0, 0xe7, 0x0a, 0xbd,
23924 ++ 0xf9, 0x3f, 0xcd, 0xd9, 0xa0, 0x1e, 0x42, 0x4c,
23925 ++ 0xe7, 0xde, 0x25, 0x3d, 0xa3, 0xd7, 0x05, 0x80,
23926 ++ 0x8d, 0xf2, 0x82, 0xac, 0x44, 0x16, 0x51, 0x01
23927 ++};
23928 ++
23929 ++static const u8 enc_input008[] __initconst = {
23930 ++ 0xc3, 0x09, 0x94, 0x62, 0xe6, 0x46, 0x2e, 0x10,
23931 ++ 0xbe, 0x00, 0xe4, 0xfc, 0xf3, 0x40, 0xa3, 0xe2,
23932 ++ 0x0f, 0xc2, 0x8b, 0x28, 0xdc, 0xba, 0xb4, 0x3c,
23933 ++ 0xe4, 0x21, 0x58, 0x61, 0xcd, 0x8b, 0xcd, 0xfb,
23934 ++ 0xac, 0x94, 0xa1, 0x45, 0xf5, 0x1c, 0xe1, 0x12,
23935 ++ 0xe0, 0x3b, 0x67, 0x21, 0x54, 0x5e, 0x8c, 0xaa,
23936 ++ 0xcf, 0xdb, 0xb4, 0x51, 0xd4, 0x13, 0xda, 0xe6,
23937 ++ 0x83, 0x89, 0xb6, 0x92, 0xe9, 0x21, 0x76, 0xa4,
23938 ++ 0x93, 0x7d, 0x0e, 0xfd, 0x96, 0x36, 0x03, 0x91,
23939 ++ 0x43, 0x5c, 0x92, 0x49, 0x62, 0x61, 0x7b, 0xeb,
23940 ++ 0x43, 0x89, 0xb8, 0x12, 0x20, 0x43, 0xd4, 0x47,
23941 ++ 0x06, 0x84, 0xee, 0x47, 0xe9, 0x8a, 0x73, 0x15,
23942 ++ 0x0f, 0x72, 0xcf, 0xed, 0xce, 0x96, 0xb2, 0x7f,
23943 ++ 0x21, 0x45, 0x76, 0xeb, 0x26, 0x28, 0x83, 0x6a,
23944 ++ 0xad, 0xaa, 0xa6, 0x81, 0xd8, 0x55, 0xb1, 0xa3,
23945 ++ 0x85, 0xb3, 0x0c, 0xdf, 0xf1, 0x69, 0x2d, 0x97,
23946 ++ 0x05, 0x2a, 0xbc, 0x7c, 0x7b, 0x25, 0xf8, 0x80,
23947 ++ 0x9d, 0x39, 0x25, 0xf3, 0x62, 0xf0, 0x66, 0x5e,
23948 ++ 0xf4, 0xa0, 0xcf, 0xd8, 0xfd, 0x4f, 0xb1, 0x1f,
23949 ++ 0x60, 0x3a, 0x08, 0x47, 0xaf, 0xe1, 0xf6, 0x10,
23950 ++ 0x77, 0x09, 0xa7, 0x27, 0x8f, 0x9a, 0x97, 0x5a,
23951 ++ 0x26, 0xfa, 0xfe, 0x41, 0x32, 0x83, 0x10, 0xe0,
23952 ++ 0x1d, 0xbf, 0x64, 0x0d, 0xf4, 0x1c, 0x32, 0x35,
23953 ++ 0xe5, 0x1b, 0x36, 0xef, 0xd4, 0x4a, 0x93, 0x4d,
23954 ++ 0x00, 0x7c, 0xec, 0x02, 0x07, 0x8b, 0x5d, 0x7d,
23955 ++ 0x1b, 0x0e, 0xd1, 0xa6, 0xa5, 0x5d, 0x7d, 0x57,
23956 ++ 0x88, 0xa8, 0xcc, 0x81, 0xb4, 0x86, 0x4e, 0xb4,
23957 ++ 0x40, 0xe9, 0x1d, 0xc3, 0xb1, 0x24, 0x3e, 0x7f,
23958 ++ 0xcc, 0x8a, 0x24, 0x9b, 0xdf, 0x6d, 0xf0, 0x39,
23959 ++ 0x69, 0x3e, 0x4c, 0xc0, 0x96, 0xe4, 0x13, 0xda,
23960 ++ 0x90, 0xda, 0xf4, 0x95, 0x66, 0x8b, 0x17, 0x17,
23961 ++ 0xfe, 0x39, 0x43, 0x25, 0xaa, 0xda, 0xa0, 0x43,
23962 ++ 0x3c, 0xb1, 0x41, 0x02, 0xa3, 0xf0, 0xa7, 0x19,
23963 ++ 0x59, 0xbc, 0x1d, 0x7d, 0x6c, 0x6d, 0x91, 0x09,
23964 ++ 0x5c, 0xb7, 0x5b, 0x01, 0xd1, 0x6f, 0x17, 0x21,
23965 ++ 0x97, 0xbf, 0x89, 0x71, 0xa5, 0xb0, 0x6e, 0x07,
23966 ++ 0x45, 0xfd, 0x9d, 0xea, 0x07, 0xf6, 0x7a, 0x9f,
23967 ++ 0x10, 0x18, 0x22, 0x30, 0x73, 0xac, 0xd4, 0x6b,
23968 ++ 0x72, 0x44, 0xed, 0xd9, 0x19, 0x9b, 0x2d, 0x4a,
23969 ++ 0x41, 0xdd, 0xd1, 0x85, 0x5e, 0x37, 0x19, 0xed,
23970 ++ 0xd2, 0x15, 0x8f, 0x5e, 0x91, 0xdb, 0x33, 0xf2,
23971 ++ 0xe4, 0xdb, 0xff, 0x98, 0xfb, 0xa3, 0xb5, 0xca,
23972 ++ 0x21, 0x69, 0x08, 0xe7, 0x8a, 0xdf, 0x90, 0xff,
23973 ++ 0x3e, 0xe9, 0x20, 0x86, 0x3c, 0xe9, 0xfc, 0x0b,
23974 ++ 0xfe, 0x5c, 0x61, 0xaa, 0x13, 0x92, 0x7f, 0x7b,
23975 ++ 0xec, 0xe0, 0x6d, 0xa8, 0x23, 0x22, 0xf6, 0x6b,
23976 ++ 0x77, 0xc4, 0xfe, 0x40, 0x07, 0x3b, 0xb6, 0xf6,
23977 ++ 0x8e, 0x5f, 0xd4, 0xb9, 0xb7, 0x0f, 0x21, 0x04,
23978 ++ 0xef, 0x83, 0x63, 0x91, 0x69, 0x40, 0xa3, 0x48,
23979 ++ 0x5c, 0xd2, 0x60, 0xf9, 0x4f, 0x6c, 0x47, 0x8b,
23980 ++ 0x3b, 0xb1, 0x9f, 0x8e, 0xee, 0x16, 0x8a, 0x13,
23981 ++ 0xfc, 0x46, 0x17, 0xc3, 0xc3, 0x32, 0x56, 0xf8,
23982 ++ 0x3c, 0x85, 0x3a, 0xb6, 0x3e, 0xaa, 0x89, 0x4f,
23983 ++ 0xb3, 0xdf, 0x38, 0xfd, 0xf1, 0xe4, 0x3a, 0xc0,
23984 ++ 0xe6, 0x58, 0xb5, 0x8f, 0xc5, 0x29, 0xa2, 0x92,
23985 ++ 0x4a, 0xb6, 0xa0, 0x34, 0x7f, 0xab, 0xb5, 0x8a,
23986 ++ 0x90, 0xa1, 0xdb, 0x4d, 0xca, 0xb6, 0x2c, 0x41,
23987 ++ 0x3c, 0xf7, 0x2b, 0x21, 0xc3, 0xfd, 0xf4, 0x17,
23988 ++ 0x5c, 0xb5, 0x33, 0x17, 0x68, 0x2b, 0x08, 0x30,
23989 ++ 0xf3, 0xf7, 0x30, 0x3c, 0x96, 0xe6, 0x6a, 0x20,
23990 ++ 0x97, 0xe7, 0x4d, 0x10, 0x5f, 0x47, 0x5f, 0x49,
23991 ++ 0x96, 0x09, 0xf0, 0x27, 0x91, 0xc8, 0xf8, 0x5a,
23992 ++ 0x2e, 0x79, 0xb5, 0xe2, 0xb8, 0xe8, 0xb9, 0x7b,
23993 ++ 0xd5, 0x10, 0xcb, 0xff, 0x5d, 0x14, 0x73, 0xf3
23994 ++};
23995 ++static const u8 enc_output008[] __initconst = {
23996 ++ 0x14, 0xf6, 0x41, 0x37, 0xa6, 0xd4, 0x27, 0xcd,
23997 ++ 0xdb, 0x06, 0x3e, 0x9a, 0x4e, 0xab, 0xd5, 0xb1,
23998 ++ 0x1e, 0x6b, 0xd2, 0xbc, 0x11, 0xf4, 0x28, 0x93,
23999 ++ 0x63, 0x54, 0xef, 0xbb, 0x5e, 0x1d, 0x3a, 0x1d,
24000 ++ 0x37, 0x3c, 0x0a, 0x6c, 0x1e, 0xc2, 0xd1, 0x2c,
24001 ++ 0xb5, 0xa3, 0xb5, 0x7b, 0xb8, 0x8f, 0x25, 0xa6,
24002 ++ 0x1b, 0x61, 0x1c, 0xec, 0x28, 0x58, 0x26, 0xa4,
24003 ++ 0xa8, 0x33, 0x28, 0x25, 0x5c, 0x45, 0x05, 0xe5,
24004 ++ 0x6c, 0x99, 0xe5, 0x45, 0xc4, 0xa2, 0x03, 0x84,
24005 ++ 0x03, 0x73, 0x1e, 0x8c, 0x49, 0xac, 0x20, 0xdd,
24006 ++ 0x8d, 0xb3, 0xc4, 0xf5, 0xe7, 0x4f, 0xf1, 0xed,
24007 ++ 0xa1, 0x98, 0xde, 0xa4, 0x96, 0xdd, 0x2f, 0xab,
24008 ++ 0xab, 0x97, 0xcf, 0x3e, 0xd2, 0x9e, 0xb8, 0x13,
24009 ++ 0x07, 0x28, 0x29, 0x19, 0xaf, 0xfd, 0xf2, 0x49,
24010 ++ 0x43, 0xea, 0x49, 0x26, 0x91, 0xc1, 0x07, 0xd6,
24011 ++ 0xbb, 0x81, 0x75, 0x35, 0x0d, 0x24, 0x7f, 0xc8,
24012 ++ 0xda, 0xd4, 0xb7, 0xeb, 0xe8, 0x5c, 0x09, 0xa2,
24013 ++ 0x2f, 0xdc, 0x28, 0x7d, 0x3a, 0x03, 0xfa, 0x94,
24014 ++ 0xb5, 0x1d, 0x17, 0x99, 0x36, 0xc3, 0x1c, 0x18,
24015 ++ 0x34, 0xe3, 0x9f, 0xf5, 0x55, 0x7c, 0xb0, 0x60,
24016 ++ 0x9d, 0xff, 0xac, 0xd4, 0x61, 0xf2, 0xad, 0xf8,
24017 ++ 0xce, 0xc7, 0xbe, 0x5c, 0xd2, 0x95, 0xa8, 0x4b,
24018 ++ 0x77, 0x13, 0x19, 0x59, 0x26, 0xc9, 0xb7, 0x8f,
24019 ++ 0x6a, 0xcb, 0x2d, 0x37, 0x91, 0xea, 0x92, 0x9c,
24020 ++ 0x94, 0x5b, 0xda, 0x0b, 0xce, 0xfe, 0x30, 0x20,
24021 ++ 0xf8, 0x51, 0xad, 0xf2, 0xbe, 0xe7, 0xc7, 0xff,
24022 ++ 0xb3, 0x33, 0x91, 0x6a, 0xc9, 0x1a, 0x41, 0xc9,
24023 ++ 0x0f, 0xf3, 0x10, 0x0e, 0xfd, 0x53, 0xff, 0x6c,
24024 ++ 0x16, 0x52, 0xd9, 0xf3, 0xf7, 0x98, 0x2e, 0xc9,
24025 ++ 0x07, 0x31, 0x2c, 0x0c, 0x72, 0xd7, 0xc5, 0xc6,
24026 ++ 0x08, 0x2a, 0x7b, 0xda, 0xbd, 0x7e, 0x02, 0xea,
24027 ++ 0x1a, 0xbb, 0xf2, 0x04, 0x27, 0x61, 0x28, 0x8e,
24028 ++ 0xf5, 0x04, 0x03, 0x1f, 0x4c, 0x07, 0x55, 0x82,
24029 ++ 0xec, 0x1e, 0xd7, 0x8b, 0x2f, 0x65, 0x56, 0xd1,
24030 ++ 0xd9, 0x1e, 0x3c, 0xe9, 0x1f, 0x5e, 0x98, 0x70,
24031 ++ 0x38, 0x4a, 0x8c, 0x49, 0xc5, 0x43, 0xa0, 0xa1,
24032 ++ 0x8b, 0x74, 0x9d, 0x4c, 0x62, 0x0d, 0x10, 0x0c,
24033 ++ 0xf4, 0x6c, 0x8f, 0xe0, 0xaa, 0x9a, 0x8d, 0xb7,
24034 ++ 0xe0, 0xbe, 0x4c, 0x87, 0xf1, 0x98, 0x2f, 0xcc,
24035 ++ 0xed, 0xc0, 0x52, 0x29, 0xdc, 0x83, 0xf8, 0xfc,
24036 ++ 0x2c, 0x0e, 0xa8, 0x51, 0x4d, 0x80, 0x0d, 0xa3,
24037 ++ 0xfe, 0xd8, 0x37, 0xe7, 0x41, 0x24, 0xfc, 0xfb,
24038 ++ 0x75, 0xe3, 0x71, 0x7b, 0x57, 0x45, 0xf5, 0x97,
24039 ++ 0x73, 0x65, 0x63, 0x14, 0x74, 0xb8, 0x82, 0x9f,
24040 ++ 0xf8, 0x60, 0x2f, 0x8a, 0xf2, 0x4e, 0xf1, 0x39,
24041 ++ 0xda, 0x33, 0x91, 0xf8, 0x36, 0xe0, 0x8d, 0x3f,
24042 ++ 0x1f, 0x3b, 0x56, 0xdc, 0xa0, 0x8f, 0x3c, 0x9d,
24043 ++ 0x71, 0x52, 0xa7, 0xb8, 0xc0, 0xa5, 0xc6, 0xa2,
24044 ++ 0x73, 0xda, 0xf4, 0x4b, 0x74, 0x5b, 0x00, 0x3d,
24045 ++ 0x99, 0xd7, 0x96, 0xba, 0xe6, 0xe1, 0xa6, 0x96,
24046 ++ 0x38, 0xad, 0xb3, 0xc0, 0xd2, 0xba, 0x91, 0x6b,
24047 ++ 0xf9, 0x19, 0xdd, 0x3b, 0xbe, 0xbe, 0x9c, 0x20,
24048 ++ 0x50, 0xba, 0xa1, 0xd0, 0xce, 0x11, 0xbd, 0x95,
24049 ++ 0xd8, 0xd1, 0xdd, 0x33, 0x85, 0x74, 0xdc, 0xdb,
24050 ++ 0x66, 0x76, 0x44, 0xdc, 0x03, 0x74, 0x48, 0x35,
24051 ++ 0x98, 0xb1, 0x18, 0x47, 0x94, 0x7d, 0xff, 0x62,
24052 ++ 0xe4, 0x58, 0x78, 0xab, 0xed, 0x95, 0x36, 0xd9,
24053 ++ 0x84, 0x91, 0x82, 0x64, 0x41, 0xbb, 0x58, 0xe6,
24054 ++ 0x1c, 0x20, 0x6d, 0x15, 0x6b, 0x13, 0x96, 0xe8,
24055 ++ 0x35, 0x7f, 0xdc, 0x40, 0x2c, 0xe9, 0xbc, 0x8a,
24056 ++ 0x4f, 0x92, 0xec, 0x06, 0x2d, 0x50, 0xdf, 0x93,
24057 ++ 0x5d, 0x65, 0x5a, 0xa8, 0xfc, 0x20, 0x50, 0x14,
24058 ++ 0xa9, 0x8a, 0x7e, 0x1d, 0x08, 0x1f, 0xe2, 0x99,
24059 ++ 0xd0, 0xbe, 0xfb, 0x3a, 0x21, 0x9d, 0xad, 0x86,
24060 ++ 0x54, 0xfd, 0x0d, 0x98, 0x1c, 0x5a, 0x6f, 0x1f,
24061 ++ 0x9a, 0x40, 0xcd, 0xa2, 0xff, 0x6a, 0xf1, 0x54
24062 ++};
24063 ++static const u8 enc_assoc008[] __initconst = { };
24064 ++static const u8 enc_nonce008[] __initconst = {
24065 ++ 0x0e, 0x0d, 0x57, 0xbb, 0x7b, 0x40, 0x54, 0x02
24066 ++};
24067 ++static const u8 enc_key008[] __initconst = {
24068 ++ 0xf2, 0xaa, 0x4f, 0x99, 0xfd, 0x3e, 0xa8, 0x53,
24069 ++ 0xc1, 0x44, 0xe9, 0x81, 0x18, 0xdc, 0xf5, 0xf0,
24070 ++ 0x3e, 0x44, 0x15, 0x59, 0xe0, 0xc5, 0x44, 0x86,
24071 ++ 0xc3, 0x91, 0xa8, 0x75, 0xc0, 0x12, 0x46, 0xba
24072 ++};
24073 ++
24074 ++static const u8 enc_input009[] __initconst = {
24075 ++ 0xe6, 0xc3, 0xdb, 0x63, 0x55, 0x15, 0xe3, 0x5b,
24076 ++ 0xb7, 0x4b, 0x27, 0x8b, 0x5a, 0xdd, 0xc2, 0xe8,
24077 ++ 0x3a, 0x6b, 0xd7, 0x81, 0x96, 0x35, 0x97, 0xca,
24078 ++ 0xd7, 0x68, 0xe8, 0xef, 0xce, 0xab, 0xda, 0x09,
24079 ++ 0x6e, 0xd6, 0x8e, 0xcb, 0x55, 0xb5, 0xe1, 0xe5,
24080 ++ 0x57, 0xfd, 0xc4, 0xe3, 0xe0, 0x18, 0x4f, 0x85,
24081 ++ 0xf5, 0x3f, 0x7e, 0x4b, 0x88, 0xc9, 0x52, 0x44,
24082 ++ 0x0f, 0xea, 0xaf, 0x1f, 0x71, 0x48, 0x9f, 0x97,
24083 ++ 0x6d, 0xb9, 0x6f, 0x00, 0xa6, 0xde, 0x2b, 0x77,
24084 ++ 0x8b, 0x15, 0xad, 0x10, 0xa0, 0x2b, 0x7b, 0x41,
24085 ++ 0x90, 0x03, 0x2d, 0x69, 0xae, 0xcc, 0x77, 0x7c,
24086 ++ 0xa5, 0x9d, 0x29, 0x22, 0xc2, 0xea, 0xb4, 0x00,
24087 ++ 0x1a, 0xd2, 0x7a, 0x98, 0x8a, 0xf9, 0xf7, 0x82,
24088 ++ 0xb0, 0xab, 0xd8, 0xa6, 0x94, 0x8d, 0x58, 0x2f,
24089 ++ 0x01, 0x9e, 0x00, 0x20, 0xfc, 0x49, 0xdc, 0x0e,
24090 ++ 0x03, 0xe8, 0x45, 0x10, 0xd6, 0xa8, 0xda, 0x55,
24091 ++ 0x10, 0x9a, 0xdf, 0x67, 0x22, 0x8b, 0x43, 0xab,
24092 ++ 0x00, 0xbb, 0x02, 0xc8, 0xdd, 0x7b, 0x97, 0x17,
24093 ++ 0xd7, 0x1d, 0x9e, 0x02, 0x5e, 0x48, 0xde, 0x8e,
24094 ++ 0xcf, 0x99, 0x07, 0x95, 0x92, 0x3c, 0x5f, 0x9f,
24095 ++ 0xc5, 0x8a, 0xc0, 0x23, 0xaa, 0xd5, 0x8c, 0x82,
24096 ++ 0x6e, 0x16, 0x92, 0xb1, 0x12, 0x17, 0x07, 0xc3,
24097 ++ 0xfb, 0x36, 0xf5, 0x6c, 0x35, 0xd6, 0x06, 0x1f,
24098 ++ 0x9f, 0xa7, 0x94, 0xa2, 0x38, 0x63, 0x9c, 0xb0,
24099 ++ 0x71, 0xb3, 0xa5, 0xd2, 0xd8, 0xba, 0x9f, 0x08,
24100 ++ 0x01, 0xb3, 0xff, 0x04, 0x97, 0x73, 0x45, 0x1b,
24101 ++ 0xd5, 0xa9, 0x9c, 0x80, 0xaf, 0x04, 0x9a, 0x85,
24102 ++ 0xdb, 0x32, 0x5b, 0x5d, 0x1a, 0xc1, 0x36, 0x28,
24103 ++ 0x10, 0x79, 0xf1, 0x3c, 0xbf, 0x1a, 0x41, 0x5c,
24104 ++ 0x4e, 0xdf, 0xb2, 0x7c, 0x79, 0x3b, 0x7a, 0x62,
24105 ++ 0x3d, 0x4b, 0xc9, 0x9b, 0x2a, 0x2e, 0x7c, 0xa2,
24106 ++ 0xb1, 0x11, 0x98, 0xa7, 0x34, 0x1a, 0x00, 0xf3,
24107 ++ 0xd1, 0xbc, 0x18, 0x22, 0xba, 0x02, 0x56, 0x62,
24108 ++ 0x31, 0x10, 0x11, 0x6d, 0xe0, 0x54, 0x9d, 0x40,
24109 ++ 0x1f, 0x26, 0x80, 0x41, 0xca, 0x3f, 0x68, 0x0f,
24110 ++ 0x32, 0x1d, 0x0a, 0x8e, 0x79, 0xd8, 0xa4, 0x1b,
24111 ++ 0x29, 0x1c, 0x90, 0x8e, 0xc5, 0xe3, 0xb4, 0x91,
24112 ++ 0x37, 0x9a, 0x97, 0x86, 0x99, 0xd5, 0x09, 0xc5,
24113 ++ 0xbb, 0xa3, 0x3f, 0x21, 0x29, 0x82, 0x14, 0x5c,
24114 ++ 0xab, 0x25, 0xfb, 0xf2, 0x4f, 0x58, 0x26, 0xd4,
24115 ++ 0x83, 0xaa, 0x66, 0x89, 0x67, 0x7e, 0xc0, 0x49,
24116 ++ 0xe1, 0x11, 0x10, 0x7f, 0x7a, 0xda, 0x29, 0x04,
24117 ++ 0xff, 0xf0, 0xcb, 0x09, 0x7c, 0x9d, 0xfa, 0x03,
24118 ++ 0x6f, 0x81, 0x09, 0x31, 0x60, 0xfb, 0x08, 0xfa,
24119 ++ 0x74, 0xd3, 0x64, 0x44, 0x7c, 0x55, 0x85, 0xec,
24120 ++ 0x9c, 0x6e, 0x25, 0xb7, 0x6c, 0xc5, 0x37, 0xb6,
24121 ++ 0x83, 0x87, 0x72, 0x95, 0x8b, 0x9d, 0xe1, 0x69,
24122 ++ 0x5c, 0x31, 0x95, 0x42, 0xa6, 0x2c, 0xd1, 0x36,
24123 ++ 0x47, 0x1f, 0xec, 0x54, 0xab, 0xa2, 0x1c, 0xd8,
24124 ++ 0x00, 0xcc, 0xbc, 0x0d, 0x65, 0xe2, 0x67, 0xbf,
24125 ++ 0xbc, 0xea, 0xee, 0x9e, 0xe4, 0x36, 0x95, 0xbe,
24126 ++ 0x73, 0xd9, 0xa6, 0xd9, 0x0f, 0xa0, 0xcc, 0x82,
24127 ++ 0x76, 0x26, 0xad, 0x5b, 0x58, 0x6c, 0x4e, 0xab,
24128 ++ 0x29, 0x64, 0xd3, 0xd9, 0xa9, 0x08, 0x8c, 0x1d,
24129 ++ 0xa1, 0x4f, 0x80, 0xd8, 0x3f, 0x94, 0xfb, 0xd3,
24130 ++ 0x7b, 0xfc, 0xd1, 0x2b, 0xc3, 0x21, 0xeb, 0xe5,
24131 ++ 0x1c, 0x84, 0x23, 0x7f, 0x4b, 0xfa, 0xdb, 0x34,
24132 ++ 0x18, 0xa2, 0xc2, 0xe5, 0x13, 0xfe, 0x6c, 0x49,
24133 ++ 0x81, 0xd2, 0x73, 0xe7, 0xe2, 0xd7, 0xe4, 0x4f,
24134 ++ 0x4b, 0x08, 0x6e, 0xb1, 0x12, 0x22, 0x10, 0x9d,
24135 ++ 0xac, 0x51, 0x1e, 0x17, 0xd9, 0x8a, 0x0b, 0x42,
24136 ++ 0x88, 0x16, 0x81, 0x37, 0x7c, 0x6a, 0xf7, 0xef,
24137 ++ 0x2d, 0xe3, 0xd9, 0xf8, 0x5f, 0xe0, 0x53, 0x27,
24138 ++ 0x74, 0xb9, 0xe2, 0xd6, 0x1c, 0x80, 0x2c, 0x52,
24139 ++ 0x65
24140 ++};
24141 ++static const u8 enc_output009[] __initconst = {
24142 ++ 0xfd, 0x81, 0x8d, 0xd0, 0x3d, 0xb4, 0xd5, 0xdf,
24143 ++ 0xd3, 0x42, 0x47, 0x5a, 0x6d, 0x19, 0x27, 0x66,
24144 ++ 0x4b, 0x2e, 0x0c, 0x27, 0x9c, 0x96, 0x4c, 0x72,
24145 ++ 0x02, 0xa3, 0x65, 0xc3, 0xb3, 0x6f, 0x2e, 0xbd,
24146 ++ 0x63, 0x8a, 0x4a, 0x5d, 0x29, 0xa2, 0xd0, 0x28,
24147 ++ 0x48, 0xc5, 0x3d, 0x98, 0xa3, 0xbc, 0xe0, 0xbe,
24148 ++ 0x3b, 0x3f, 0xe6, 0x8a, 0xa4, 0x7f, 0x53, 0x06,
24149 ++ 0xfa, 0x7f, 0x27, 0x76, 0x72, 0x31, 0xa1, 0xf5,
24150 ++ 0xd6, 0x0c, 0x52, 0x47, 0xba, 0xcd, 0x4f, 0xd7,
24151 ++ 0xeb, 0x05, 0x48, 0x0d, 0x7c, 0x35, 0x4a, 0x09,
24152 ++ 0xc9, 0x76, 0x71, 0x02, 0xa3, 0xfb, 0xb7, 0x1a,
24153 ++ 0x65, 0xb7, 0xed, 0x98, 0xc6, 0x30, 0x8a, 0x00,
24154 ++ 0xae, 0xa1, 0x31, 0xe5, 0xb5, 0x9e, 0x6d, 0x62,
24155 ++ 0xda, 0xda, 0x07, 0x0f, 0x38, 0x38, 0xd3, 0xcb,
24156 ++ 0xc1, 0xb0, 0xad, 0xec, 0x72, 0xec, 0xb1, 0xa2,
24157 ++ 0x7b, 0x59, 0xf3, 0x3d, 0x2b, 0xef, 0xcd, 0x28,
24158 ++ 0x5b, 0x83, 0xcc, 0x18, 0x91, 0x88, 0xb0, 0x2e,
24159 ++ 0xf9, 0x29, 0x31, 0x18, 0xf9, 0x4e, 0xe9, 0x0a,
24160 ++ 0x91, 0x92, 0x9f, 0xae, 0x2d, 0xad, 0xf4, 0xe6,
24161 ++ 0x1a, 0xe2, 0xa4, 0xee, 0x47, 0x15, 0xbf, 0x83,
24162 ++ 0x6e, 0xd7, 0x72, 0x12, 0x3b, 0x2d, 0x24, 0xe9,
24163 ++ 0xb2, 0x55, 0xcb, 0x3c, 0x10, 0xf0, 0x24, 0x8a,
24164 ++ 0x4a, 0x02, 0xea, 0x90, 0x25, 0xf0, 0xb4, 0x79,
24165 ++ 0x3a, 0xef, 0x6e, 0xf5, 0x52, 0xdf, 0xb0, 0x0a,
24166 ++ 0xcd, 0x24, 0x1c, 0xd3, 0x2e, 0x22, 0x74, 0xea,
24167 ++ 0x21, 0x6f, 0xe9, 0xbd, 0xc8, 0x3e, 0x36, 0x5b,
24168 ++ 0x19, 0xf1, 0xca, 0x99, 0x0a, 0xb4, 0xa7, 0x52,
24169 ++ 0x1a, 0x4e, 0xf2, 0xad, 0x8d, 0x56, 0x85, 0xbb,
24170 ++ 0x64, 0x89, 0xba, 0x26, 0xf9, 0xc7, 0xe1, 0x89,
24171 ++ 0x19, 0x22, 0x77, 0xc3, 0xa8, 0xfc, 0xff, 0xad,
24172 ++ 0xfe, 0xb9, 0x48, 0xae, 0x12, 0x30, 0x9f, 0x19,
24173 ++ 0xfb, 0x1b, 0xef, 0x14, 0x87, 0x8a, 0x78, 0x71,
24174 ++ 0xf3, 0xf4, 0xb7, 0x00, 0x9c, 0x1d, 0xb5, 0x3d,
24175 ++ 0x49, 0x00, 0x0c, 0x06, 0xd4, 0x50, 0xf9, 0x54,
24176 ++ 0x45, 0xb2, 0x5b, 0x43, 0xdb, 0x6d, 0xcf, 0x1a,
24177 ++ 0xe9, 0x7a, 0x7a, 0xcf, 0xfc, 0x8a, 0x4e, 0x4d,
24178 ++ 0x0b, 0x07, 0x63, 0x28, 0xd8, 0xe7, 0x08, 0x95,
24179 ++ 0xdf, 0xa6, 0x72, 0x93, 0x2e, 0xbb, 0xa0, 0x42,
24180 ++ 0x89, 0x16, 0xf1, 0xd9, 0x0c, 0xf9, 0xa1, 0x16,
24181 ++ 0xfd, 0xd9, 0x03, 0xb4, 0x3b, 0x8a, 0xf5, 0xf6,
24182 ++ 0xe7, 0x6b, 0x2e, 0x8e, 0x4c, 0x3d, 0xe2, 0xaf,
24183 ++ 0x08, 0x45, 0x03, 0xff, 0x09, 0xb6, 0xeb, 0x2d,
24184 ++ 0xc6, 0x1b, 0x88, 0x94, 0xac, 0x3e, 0xf1, 0x9f,
24185 ++ 0x0e, 0x0e, 0x2b, 0xd5, 0x00, 0x4d, 0x3f, 0x3b,
24186 ++ 0x53, 0xae, 0xaf, 0x1c, 0x33, 0x5f, 0x55, 0x6e,
24187 ++ 0x8d, 0xaf, 0x05, 0x7a, 0x10, 0x34, 0xc9, 0xf4,
24188 ++ 0x66, 0xcb, 0x62, 0x12, 0xa6, 0xee, 0xe8, 0x1c,
24189 ++ 0x5d, 0x12, 0x86, 0xdb, 0x6f, 0x1c, 0x33, 0xc4,
24190 ++ 0x1c, 0xda, 0x82, 0x2d, 0x3b, 0x59, 0xfe, 0xb1,
24191 ++ 0xa4, 0x59, 0x41, 0x86, 0xd0, 0xef, 0xae, 0xfb,
24192 ++ 0xda, 0x6d, 0x11, 0xb8, 0xca, 0xe9, 0x6e, 0xff,
24193 ++ 0xf7, 0xa9, 0xd9, 0x70, 0x30, 0xfc, 0x53, 0xe2,
24194 ++ 0xd7, 0xa2, 0x4e, 0xc7, 0x91, 0xd9, 0x07, 0x06,
24195 ++ 0xaa, 0xdd, 0xb0, 0x59, 0x28, 0x1d, 0x00, 0x66,
24196 ++ 0xc5, 0x54, 0xc2, 0xfc, 0x06, 0xda, 0x05, 0x90,
24197 ++ 0x52, 0x1d, 0x37, 0x66, 0xee, 0xf0, 0xb2, 0x55,
24198 ++ 0x8a, 0x5d, 0xd2, 0x38, 0x86, 0x94, 0x9b, 0xfc,
24199 ++ 0x10, 0x4c, 0xa1, 0xb9, 0x64, 0x3e, 0x44, 0xb8,
24200 ++ 0x5f, 0xb0, 0x0c, 0xec, 0xe0, 0xc9, 0xe5, 0x62,
24201 ++ 0x75, 0x3f, 0x09, 0xd5, 0xf5, 0xd9, 0x26, 0xba,
24202 ++ 0x9e, 0xd2, 0xf4, 0xb9, 0x48, 0x0a, 0xbc, 0xa2,
24203 ++ 0xd6, 0x7c, 0x36, 0x11, 0x7d, 0x26, 0x81, 0x89,
24204 ++ 0xcf, 0xa4, 0xad, 0x73, 0x0e, 0xee, 0xcc, 0x06,
24205 ++ 0xa9, 0xdb, 0xb1, 0xfd, 0xfb, 0x09, 0x7f, 0x90,
24206 ++ 0x42, 0x37, 0x2f, 0xe1, 0x9c, 0x0f, 0x6f, 0xcf,
24207 ++ 0x43, 0xb5, 0xd9, 0x90, 0xe1, 0x85, 0xf5, 0xa8,
24208 ++ 0xae
24209 ++};
24210 ++static const u8 enc_assoc009[] __initconst = {
24211 ++ 0x5a, 0x27, 0xff, 0xeb, 0xdf, 0x84, 0xb2, 0x9e,
24212 ++ 0xef
24213 ++};
24214 ++static const u8 enc_nonce009[] __initconst = {
24215 ++ 0xef, 0x2d, 0x63, 0xee, 0x6b, 0x80, 0x8b, 0x78
24216 ++};
24217 ++static const u8 enc_key009[] __initconst = {
24218 ++ 0xea, 0xbc, 0x56, 0x99, 0xe3, 0x50, 0xff, 0xc5,
24219 ++ 0xcc, 0x1a, 0xd7, 0xc1, 0x57, 0x72, 0xea, 0x86,
24220 ++ 0x5b, 0x89, 0x88, 0x61, 0x3d, 0x2f, 0x9b, 0xb2,
24221 ++ 0xe7, 0x9c, 0xec, 0x74, 0x6e, 0x3e, 0xf4, 0x3b
24222 ++};
24223 ++
24224 ++static const u8 enc_input010[] __initconst = {
24225 ++ 0x42, 0x93, 0xe4, 0xeb, 0x97, 0xb0, 0x57, 0xbf,
24226 ++ 0x1a, 0x8b, 0x1f, 0xe4, 0x5f, 0x36, 0x20, 0x3c,
24227 ++ 0xef, 0x0a, 0xa9, 0x48, 0x5f, 0x5f, 0x37, 0x22,
24228 ++ 0x3a, 0xde, 0xe3, 0xae, 0xbe, 0xad, 0x07, 0xcc,
24229 ++ 0xb1, 0xf6, 0xf5, 0xf9, 0x56, 0xdd, 0xe7, 0x16,
24230 ++ 0x1e, 0x7f, 0xdf, 0x7a, 0x9e, 0x75, 0xb7, 0xc7,
24231 ++ 0xbe, 0xbe, 0x8a, 0x36, 0x04, 0xc0, 0x10, 0xf4,
24232 ++ 0x95, 0x20, 0x03, 0xec, 0xdc, 0x05, 0xa1, 0x7d,
24233 ++ 0xc4, 0xa9, 0x2c, 0x82, 0xd0, 0xbc, 0x8b, 0xc5,
24234 ++ 0xc7, 0x45, 0x50, 0xf6, 0xa2, 0x1a, 0xb5, 0x46,
24235 ++ 0x3b, 0x73, 0x02, 0xa6, 0x83, 0x4b, 0x73, 0x82,
24236 ++ 0x58, 0x5e, 0x3b, 0x65, 0x2f, 0x0e, 0xfd, 0x2b,
24237 ++ 0x59, 0x16, 0xce, 0xa1, 0x60, 0x9c, 0xe8, 0x3a,
24238 ++ 0x99, 0xed, 0x8d, 0x5a, 0xcf, 0xf6, 0x83, 0xaf,
24239 ++ 0xba, 0xd7, 0x73, 0x73, 0x40, 0x97, 0x3d, 0xca,
24240 ++ 0xef, 0x07, 0x57, 0xe6, 0xd9, 0x70, 0x0e, 0x95,
24241 ++ 0xae, 0xa6, 0x8d, 0x04, 0xcc, 0xee, 0xf7, 0x09,
24242 ++ 0x31, 0x77, 0x12, 0xa3, 0x23, 0x97, 0x62, 0xb3,
24243 ++ 0x7b, 0x32, 0xfb, 0x80, 0x14, 0x48, 0x81, 0xc3,
24244 ++ 0xe5, 0xea, 0x91, 0x39, 0x52, 0x81, 0xa2, 0x4f,
24245 ++ 0xe4, 0xb3, 0x09, 0xff, 0xde, 0x5e, 0xe9, 0x58,
24246 ++ 0x84, 0x6e, 0xf9, 0x3d, 0xdf, 0x25, 0xea, 0xad,
24247 ++ 0xae, 0xe6, 0x9a, 0xd1, 0x89, 0x55, 0xd3, 0xde,
24248 ++ 0x6c, 0x52, 0xdb, 0x70, 0xfe, 0x37, 0xce, 0x44,
24249 ++ 0x0a, 0xa8, 0x25, 0x5f, 0x92, 0xc1, 0x33, 0x4a,
24250 ++ 0x4f, 0x9b, 0x62, 0x35, 0xff, 0xce, 0xc0, 0xa9,
24251 ++ 0x60, 0xce, 0x52, 0x00, 0x97, 0x51, 0x35, 0x26,
24252 ++ 0x2e, 0xb9, 0x36, 0xa9, 0x87, 0x6e, 0x1e, 0xcc,
24253 ++ 0x91, 0x78, 0x53, 0x98, 0x86, 0x5b, 0x9c, 0x74,
24254 ++ 0x7d, 0x88, 0x33, 0xe1, 0xdf, 0x37, 0x69, 0x2b,
24255 ++ 0xbb, 0xf1, 0x4d, 0xf4, 0xd1, 0xf1, 0x39, 0x93,
24256 ++ 0x17, 0x51, 0x19, 0xe3, 0x19, 0x1e, 0x76, 0x37,
24257 ++ 0x25, 0xfb, 0x09, 0x27, 0x6a, 0xab, 0x67, 0x6f,
24258 ++ 0x14, 0x12, 0x64, 0xe7, 0xc4, 0x07, 0xdf, 0x4d,
24259 ++ 0x17, 0xbb, 0x6d, 0xe0, 0xe9, 0xb9, 0xab, 0xca,
24260 ++ 0x10, 0x68, 0xaf, 0x7e, 0xb7, 0x33, 0x54, 0x73,
24261 ++ 0x07, 0x6e, 0xf7, 0x81, 0x97, 0x9c, 0x05, 0x6f,
24262 ++ 0x84, 0x5f, 0xd2, 0x42, 0xfb, 0x38, 0xcf, 0xd1,
24263 ++ 0x2f, 0x14, 0x30, 0x88, 0x98, 0x4d, 0x5a, 0xa9,
24264 ++ 0x76, 0xd5, 0x4f, 0x3e, 0x70, 0x6c, 0x85, 0x76,
24265 ++ 0xd7, 0x01, 0xa0, 0x1a, 0xc8, 0x4e, 0xaa, 0xac,
24266 ++ 0x78, 0xfe, 0x46, 0xde, 0x6a, 0x05, 0x46, 0xa7,
24267 ++ 0x43, 0x0c, 0xb9, 0xde, 0xb9, 0x68, 0xfb, 0xce,
24268 ++ 0x42, 0x99, 0x07, 0x4d, 0x0b, 0x3b, 0x5a, 0x30,
24269 ++ 0x35, 0xa8, 0xf9, 0x3a, 0x73, 0xef, 0x0f, 0xdb,
24270 ++ 0x1e, 0x16, 0x42, 0xc4, 0xba, 0xae, 0x58, 0xaa,
24271 ++ 0xf8, 0xe5, 0x75, 0x2f, 0x1b, 0x15, 0x5c, 0xfd,
24272 ++ 0x0a, 0x97, 0xd0, 0xe4, 0x37, 0x83, 0x61, 0x5f,
24273 ++ 0x43, 0xa6, 0xc7, 0x3f, 0x38, 0x59, 0xe6, 0xeb,
24274 ++ 0xa3, 0x90, 0xc3, 0xaa, 0xaa, 0x5a, 0xd3, 0x34,
24275 ++ 0xd4, 0x17, 0xc8, 0x65, 0x3e, 0x57, 0xbc, 0x5e,
24276 ++ 0xdd, 0x9e, 0xb7, 0xf0, 0x2e, 0x5b, 0xb2, 0x1f,
24277 ++ 0x8a, 0x08, 0x0d, 0x45, 0x91, 0x0b, 0x29, 0x53,
24278 ++ 0x4f, 0x4c, 0x5a, 0x73, 0x56, 0xfe, 0xaf, 0x41,
24279 ++ 0x01, 0x39, 0x0a, 0x24, 0x3c, 0x7e, 0xbe, 0x4e,
24280 ++ 0x53, 0xf3, 0xeb, 0x06, 0x66, 0x51, 0x28, 0x1d,
24281 ++ 0xbd, 0x41, 0x0a, 0x01, 0xab, 0x16, 0x47, 0x27,
24282 ++ 0x47, 0x47, 0xf7, 0xcb, 0x46, 0x0a, 0x70, 0x9e,
24283 ++ 0x01, 0x9c, 0x09, 0xe1, 0x2a, 0x00, 0x1a, 0xd8,
24284 ++ 0xd4, 0x79, 0x9d, 0x80, 0x15, 0x8e, 0x53, 0x2a,
24285 ++ 0x65, 0x83, 0x78, 0x3e, 0x03, 0x00, 0x07, 0x12,
24286 ++ 0x1f, 0x33, 0x3e, 0x7b, 0x13, 0x37, 0xf1, 0xc3,
24287 ++ 0xef, 0xb7, 0xc1, 0x20, 0x3c, 0x3e, 0x67, 0x66,
24288 ++ 0x5d, 0x88, 0xa7, 0x7d, 0x33, 0x50, 0x77, 0xb0,
24289 ++ 0x28, 0x8e, 0xe7, 0x2c, 0x2e, 0x7a, 0xf4, 0x3c,
24290 ++ 0x8d, 0x74, 0x83, 0xaf, 0x8e, 0x87, 0x0f, 0xe4,
24291 ++ 0x50, 0xff, 0x84, 0x5c, 0x47, 0x0c, 0x6a, 0x49,
24292 ++ 0xbf, 0x42, 0x86, 0x77, 0x15, 0x48, 0xa5, 0x90,
24293 ++ 0x5d, 0x93, 0xd6, 0x2a, 0x11, 0xd5, 0xd5, 0x11,
24294 ++ 0xaa, 0xce, 0xe7, 0x6f, 0xa5, 0xb0, 0x09, 0x2c,
24295 ++ 0x8d, 0xd3, 0x92, 0xf0, 0x5a, 0x2a, 0xda, 0x5b,
24296 ++ 0x1e, 0xd5, 0x9a, 0xc4, 0xc4, 0xf3, 0x49, 0x74,
24297 ++ 0x41, 0xca, 0xe8, 0xc1, 0xf8, 0x44, 0xd6, 0x3c,
24298 ++ 0xae, 0x6c, 0x1d, 0x9a, 0x30, 0x04, 0x4d, 0x27,
24299 ++ 0x0e, 0xb1, 0x5f, 0x59, 0xa2, 0x24, 0xe8, 0xe1,
24300 ++ 0x98, 0xc5, 0x6a, 0x4c, 0xfe, 0x41, 0xd2, 0x27,
24301 ++ 0x42, 0x52, 0xe1, 0xe9, 0x7d, 0x62, 0xe4, 0x88,
24302 ++ 0x0f, 0xad, 0xb2, 0x70, 0xcb, 0x9d, 0x4c, 0x27,
24303 ++ 0x2e, 0x76, 0x1e, 0x1a, 0x63, 0x65, 0xf5, 0x3b,
24304 ++ 0xf8, 0x57, 0x69, 0xeb, 0x5b, 0x38, 0x26, 0x39,
24305 ++ 0x33, 0x25, 0x45, 0x3e, 0x91, 0xb8, 0xd8, 0xc7,
24306 ++ 0xd5, 0x42, 0xc0, 0x22, 0x31, 0x74, 0xf4, 0xbc,
24307 ++ 0x0c, 0x23, 0xf1, 0xca, 0xc1, 0x8d, 0xd7, 0xbe,
24308 ++ 0xc9, 0x62, 0xe4, 0x08, 0x1a, 0xcf, 0x36, 0xd5,
24309 ++ 0xfe, 0x55, 0x21, 0x59, 0x91, 0x87, 0x87, 0xdf,
24310 ++ 0x06, 0xdb, 0xdf, 0x96, 0x45, 0x58, 0xda, 0x05,
24311 ++ 0xcd, 0x50, 0x4d, 0xd2, 0x7d, 0x05, 0x18, 0x73,
24312 ++ 0x6a, 0x8d, 0x11, 0x85, 0xa6, 0x88, 0xe8, 0xda,
24313 ++ 0xe6, 0x30, 0x33, 0xa4, 0x89, 0x31, 0x75, 0xbe,
24314 ++ 0x69, 0x43, 0x84, 0x43, 0x50, 0x87, 0xdd, 0x71,
24315 ++ 0x36, 0x83, 0xc3, 0x78, 0x74, 0x24, 0x0a, 0xed,
24316 ++ 0x7b, 0xdb, 0xa4, 0x24, 0x0b, 0xb9, 0x7e, 0x5d,
24317 ++ 0xff, 0xde, 0xb1, 0xef, 0x61, 0x5a, 0x45, 0x33,
24318 ++ 0xf6, 0x17, 0x07, 0x08, 0x98, 0x83, 0x92, 0x0f,
24319 ++ 0x23, 0x6d, 0xe6, 0xaa, 0x17, 0x54, 0xad, 0x6a,
24320 ++ 0xc8, 0xdb, 0x26, 0xbe, 0xb8, 0xb6, 0x08, 0xfa,
24321 ++ 0x68, 0xf1, 0xd7, 0x79, 0x6f, 0x18, 0xb4, 0x9e,
24322 ++ 0x2d, 0x3f, 0x1b, 0x64, 0xaf, 0x8d, 0x06, 0x0e,
24323 ++ 0x49, 0x28, 0xe0, 0x5d, 0x45, 0x68, 0x13, 0x87,
24324 ++ 0xfa, 0xde, 0x40, 0x7b, 0xd2, 0xc3, 0x94, 0xd5,
24325 ++ 0xe1, 0xd9, 0xc2, 0xaf, 0x55, 0x89, 0xeb, 0xb4,
24326 ++ 0x12, 0x59, 0xa8, 0xd4, 0xc5, 0x29, 0x66, 0x38,
24327 ++ 0xe6, 0xac, 0x22, 0x22, 0xd9, 0x64, 0x9b, 0x34,
24328 ++ 0x0a, 0x32, 0x9f, 0xc2, 0xbf, 0x17, 0x6c, 0x3f,
24329 ++ 0x71, 0x7a, 0x38, 0x6b, 0x98, 0xfb, 0x49, 0x36,
24330 ++ 0x89, 0xc9, 0xe2, 0xd6, 0xc7, 0x5d, 0xd0, 0x69,
24331 ++ 0x5f, 0x23, 0x35, 0xc9, 0x30, 0xe2, 0xfd, 0x44,
24332 ++ 0x58, 0x39, 0xd7, 0x97, 0xfb, 0x5c, 0x00, 0xd5,
24333 ++ 0x4f, 0x7a, 0x1a, 0x95, 0x8b, 0x62, 0x4b, 0xce,
24334 ++ 0xe5, 0x91, 0x21, 0x7b, 0x30, 0x00, 0xd6, 0xdd,
24335 ++ 0x6d, 0x02, 0x86, 0x49, 0x0f, 0x3c, 0x1a, 0x27,
24336 ++ 0x3c, 0xd3, 0x0e, 0x71, 0xf2, 0xff, 0xf5, 0x2f,
24337 ++ 0x87, 0xac, 0x67, 0x59, 0x81, 0xa3, 0xf7, 0xf8,
24338 ++ 0xd6, 0x11, 0x0c, 0x84, 0xa9, 0x03, 0xee, 0x2a,
24339 ++ 0xc4, 0xf3, 0x22, 0xab, 0x7c, 0xe2, 0x25, 0xf5,
24340 ++ 0x67, 0xa3, 0xe4, 0x11, 0xe0, 0x59, 0xb3, 0xca,
24341 ++ 0x87, 0xa0, 0xae, 0xc9, 0xa6, 0x62, 0x1b, 0x6e,
24342 ++ 0x4d, 0x02, 0x6b, 0x07, 0x9d, 0xfd, 0xd0, 0x92,
24343 ++ 0x06, 0xe1, 0xb2, 0x9a, 0x4a, 0x1f, 0x1f, 0x13,
24344 ++ 0x49, 0x99, 0x97, 0x08, 0xde, 0x7f, 0x98, 0xaf,
24345 ++ 0x51, 0x98, 0xee, 0x2c, 0xcb, 0xf0, 0x0b, 0xc6,
24346 ++ 0xb6, 0xb7, 0x2d, 0x9a, 0xb1, 0xac, 0xa6, 0xe3,
24347 ++ 0x15, 0x77, 0x9d, 0x6b, 0x1a, 0xe4, 0xfc, 0x8b,
24348 ++ 0xf2, 0x17, 0x59, 0x08, 0x04, 0x58, 0x81, 0x9d,
24349 ++ 0x1b, 0x1b, 0x69, 0x55, 0xc2, 0xb4, 0x3c, 0x1f,
24350 ++ 0x50, 0xf1, 0x7f, 0x77, 0x90, 0x4c, 0x66, 0x40,
24351 ++ 0x5a, 0xc0, 0x33, 0x1f, 0xcb, 0x05, 0x6d, 0x5c,
24352 ++ 0x06, 0x87, 0x52, 0xa2, 0x8f, 0x26, 0xd5, 0x4f
24353 ++};
24354 ++static const u8 enc_output010[] __initconst = {
24355 ++ 0xe5, 0x26, 0xa4, 0x3d, 0xbd, 0x33, 0xd0, 0x4b,
24356 ++ 0x6f, 0x05, 0xa7, 0x6e, 0x12, 0x7a, 0xd2, 0x74,
24357 ++ 0xa6, 0xdd, 0xbd, 0x95, 0xeb, 0xf9, 0xa4, 0xf1,
24358 ++ 0x59, 0x93, 0x91, 0x70, 0xd9, 0xfe, 0x9a, 0xcd,
24359 ++ 0x53, 0x1f, 0x3a, 0xab, 0xa6, 0x7c, 0x9f, 0xa6,
24360 ++ 0x9e, 0xbd, 0x99, 0xd9, 0xb5, 0x97, 0x44, 0xd5,
24361 ++ 0x14, 0x48, 0x4d, 0x9d, 0xc0, 0xd0, 0x05, 0x96,
24362 ++ 0xeb, 0x4c, 0x78, 0x55, 0x09, 0x08, 0x01, 0x02,
24363 ++ 0x30, 0x90, 0x7b, 0x96, 0x7a, 0x7b, 0x5f, 0x30,
24364 ++ 0x41, 0x24, 0xce, 0x68, 0x61, 0x49, 0x86, 0x57,
24365 ++ 0x82, 0xdd, 0x53, 0x1c, 0x51, 0x28, 0x2b, 0x53,
24366 ++ 0x6e, 0x2d, 0xc2, 0x20, 0x4c, 0xdd, 0x8f, 0x65,
24367 ++ 0x10, 0x20, 0x50, 0xdd, 0x9d, 0x50, 0xe5, 0x71,
24368 ++ 0x40, 0x53, 0x69, 0xfc, 0x77, 0x48, 0x11, 0xb9,
24369 ++ 0xde, 0xa4, 0x8d, 0x58, 0xe4, 0xa6, 0x1a, 0x18,
24370 ++ 0x47, 0x81, 0x7e, 0xfc, 0xdd, 0xf6, 0xef, 0xce,
24371 ++ 0x2f, 0x43, 0x68, 0xd6, 0x06, 0xe2, 0x74, 0x6a,
24372 ++ 0xad, 0x90, 0xf5, 0x37, 0xf3, 0x3d, 0x82, 0x69,
24373 ++ 0x40, 0xe9, 0x6b, 0xa7, 0x3d, 0xa8, 0x1e, 0xd2,
24374 ++ 0x02, 0x7c, 0xb7, 0x9b, 0xe4, 0xda, 0x8f, 0x95,
24375 ++ 0x06, 0xc5, 0xdf, 0x73, 0xa3, 0x20, 0x9a, 0x49,
24376 ++ 0xde, 0x9c, 0xbc, 0xee, 0x14, 0x3f, 0x81, 0x5e,
24377 ++ 0xf8, 0x3b, 0x59, 0x3c, 0xe1, 0x68, 0x12, 0x5a,
24378 ++ 0x3a, 0x76, 0x3a, 0x3f, 0xf7, 0x87, 0x33, 0x0a,
24379 ++ 0x01, 0xb8, 0xd4, 0xed, 0xb6, 0xbe, 0x94, 0x5e,
24380 ++ 0x70, 0x40, 0x56, 0x67, 0x1f, 0x50, 0x44, 0x19,
24381 ++ 0xce, 0x82, 0x70, 0x10, 0x87, 0x13, 0x20, 0x0b,
24382 ++ 0x4c, 0x5a, 0xb6, 0xf6, 0xa7, 0xae, 0x81, 0x75,
24383 ++ 0x01, 0x81, 0xe6, 0x4b, 0x57, 0x7c, 0xdd, 0x6d,
24384 ++ 0xf8, 0x1c, 0x29, 0x32, 0xf7, 0xda, 0x3c, 0x2d,
24385 ++ 0xf8, 0x9b, 0x25, 0x6e, 0x00, 0xb4, 0xf7, 0x2f,
24386 ++ 0xf7, 0x04, 0xf7, 0xa1, 0x56, 0xac, 0x4f, 0x1a,
24387 ++ 0x64, 0xb8, 0x47, 0x55, 0x18, 0x7b, 0x07, 0x4d,
24388 ++ 0xbd, 0x47, 0x24, 0x80, 0x5d, 0xa2, 0x70, 0xc5,
24389 ++ 0xdd, 0x8e, 0x82, 0xd4, 0xeb, 0xec, 0xb2, 0x0c,
24390 ++ 0x39, 0xd2, 0x97, 0xc1, 0xcb, 0xeb, 0xf4, 0x77,
24391 ++ 0x59, 0xb4, 0x87, 0xef, 0xcb, 0x43, 0x2d, 0x46,
24392 ++ 0x54, 0xd1, 0xa7, 0xd7, 0x15, 0x99, 0x0a, 0x43,
24393 ++ 0xa1, 0xe0, 0x99, 0x33, 0x71, 0xc1, 0xed, 0xfe,
24394 ++ 0x72, 0x46, 0x33, 0x8e, 0x91, 0x08, 0x9f, 0xc8,
24395 ++ 0x2e, 0xca, 0xfa, 0xdc, 0x59, 0xd5, 0xc3, 0x76,
24396 ++ 0x84, 0x9f, 0xa3, 0x37, 0x68, 0xc3, 0xf0, 0x47,
24397 ++ 0x2c, 0x68, 0xdb, 0x5e, 0xc3, 0x49, 0x4c, 0xe8,
24398 ++ 0x92, 0x85, 0xe2, 0x23, 0xd3, 0x3f, 0xad, 0x32,
24399 ++ 0xe5, 0x2b, 0x82, 0xd7, 0x8f, 0x99, 0x0a, 0x59,
24400 ++ 0x5c, 0x45, 0xd9, 0xb4, 0x51, 0x52, 0xc2, 0xae,
24401 ++ 0xbf, 0x80, 0xcf, 0xc9, 0xc9, 0x51, 0x24, 0x2a,
24402 ++ 0x3b, 0x3a, 0x4d, 0xae, 0xeb, 0xbd, 0x22, 0xc3,
24403 ++ 0x0e, 0x0f, 0x59, 0x25, 0x92, 0x17, 0xe9, 0x74,
24404 ++ 0xc7, 0x8b, 0x70, 0x70, 0x36, 0x55, 0x95, 0x75,
24405 ++ 0x4b, 0xad, 0x61, 0x2b, 0x09, 0xbc, 0x82, 0xf2,
24406 ++ 0x6e, 0x94, 0x43, 0xae, 0xc3, 0xd5, 0xcd, 0x8e,
24407 ++ 0xfe, 0x5b, 0x9a, 0x88, 0x43, 0x01, 0x75, 0xb2,
24408 ++ 0x23, 0x09, 0xf7, 0x89, 0x83, 0xe7, 0xfa, 0xf9,
24409 ++ 0xb4, 0x9b, 0xf8, 0xef, 0xbd, 0x1c, 0x92, 0xc1,
24410 ++ 0xda, 0x7e, 0xfe, 0x05, 0xba, 0x5a, 0xcd, 0x07,
24411 ++ 0x6a, 0x78, 0x9e, 0x5d, 0xfb, 0x11, 0x2f, 0x79,
24412 ++ 0x38, 0xb6, 0xc2, 0x5b, 0x6b, 0x51, 0xb4, 0x71,
24413 ++ 0xdd, 0xf7, 0x2a, 0xe4, 0xf4, 0x72, 0x76, 0xad,
24414 ++ 0xc2, 0xdd, 0x64, 0x5d, 0x79, 0xb6, 0xf5, 0x7a,
24415 ++ 0x77, 0x20, 0x05, 0x3d, 0x30, 0x06, 0xd4, 0x4c,
24416 ++ 0x0a, 0x2c, 0x98, 0x5a, 0xb9, 0xd4, 0x98, 0xa9,
24417 ++ 0x3f, 0xc6, 0x12, 0xea, 0x3b, 0x4b, 0xc5, 0x79,
24418 ++ 0x64, 0x63, 0x6b, 0x09, 0x54, 0x3b, 0x14, 0x27,
24419 ++ 0xba, 0x99, 0x80, 0xc8, 0x72, 0xa8, 0x12, 0x90,
24420 ++ 0x29, 0xba, 0x40, 0x54, 0x97, 0x2b, 0x7b, 0xfe,
24421 ++ 0xeb, 0xcd, 0x01, 0x05, 0x44, 0x72, 0xdb, 0x99,
24422 ++ 0xe4, 0x61, 0xc9, 0x69, 0xd6, 0xb9, 0x28, 0xd1,
24423 ++ 0x05, 0x3e, 0xf9, 0x0b, 0x49, 0x0a, 0x49, 0xe9,
24424 ++ 0x8d, 0x0e, 0xa7, 0x4a, 0x0f, 0xaf, 0x32, 0xd0,
24425 ++ 0xe0, 0xb2, 0x3a, 0x55, 0x58, 0xfe, 0x5c, 0x28,
24426 ++ 0x70, 0x51, 0x23, 0xb0, 0x7b, 0x6a, 0x5f, 0x1e,
24427 ++ 0xb8, 0x17, 0xd7, 0x94, 0x15, 0x8f, 0xee, 0x20,
24428 ++ 0xc7, 0x42, 0x25, 0x3e, 0x9a, 0x14, 0xd7, 0x60,
24429 ++ 0x72, 0x39, 0x47, 0x48, 0xa9, 0xfe, 0xdd, 0x47,
24430 ++ 0x0a, 0xb1, 0xe6, 0x60, 0x28, 0x8c, 0x11, 0x68,
24431 ++ 0xe1, 0xff, 0xd7, 0xce, 0xc8, 0xbe, 0xb3, 0xfe,
24432 ++ 0x27, 0x30, 0x09, 0x70, 0xd7, 0xfa, 0x02, 0x33,
24433 ++ 0x3a, 0x61, 0x2e, 0xc7, 0xff, 0xa4, 0x2a, 0xa8,
24434 ++ 0x6e, 0xb4, 0x79, 0x35, 0x6d, 0x4c, 0x1e, 0x38,
24435 ++ 0xf8, 0xee, 0xd4, 0x84, 0x4e, 0x6e, 0x28, 0xa7,
24436 ++ 0xce, 0xc8, 0xc1, 0xcf, 0x80, 0x05, 0xf3, 0x04,
24437 ++ 0xef, 0xc8, 0x18, 0x28, 0x2e, 0x8d, 0x5e, 0x0c,
24438 ++ 0xdf, 0xb8, 0x5f, 0x96, 0xe8, 0xc6, 0x9c, 0x2f,
24439 ++ 0xe5, 0xa6, 0x44, 0xd7, 0xe7, 0x99, 0x44, 0x0c,
24440 ++ 0xec, 0xd7, 0x05, 0x60, 0x97, 0xbb, 0x74, 0x77,
24441 ++ 0x58, 0xd5, 0xbb, 0x48, 0xde, 0x5a, 0xb2, 0x54,
24442 ++ 0x7f, 0x0e, 0x46, 0x70, 0x6a, 0x6f, 0x78, 0xa5,
24443 ++ 0x08, 0x89, 0x05, 0x4e, 0x7e, 0xa0, 0x69, 0xb4,
24444 ++ 0x40, 0x60, 0x55, 0x77, 0x75, 0x9b, 0x19, 0xf2,
24445 ++ 0xd5, 0x13, 0x80, 0x77, 0xf9, 0x4b, 0x3f, 0x1e,
24446 ++ 0xee, 0xe6, 0x76, 0x84, 0x7b, 0x8c, 0xe5, 0x27,
24447 ++ 0xa8, 0x0a, 0x91, 0x01, 0x68, 0x71, 0x8a, 0x3f,
24448 ++ 0x06, 0xab, 0xf6, 0xa9, 0xa5, 0xe6, 0x72, 0x92,
24449 ++ 0xe4, 0x67, 0xe2, 0xa2, 0x46, 0x35, 0x84, 0x55,
24450 ++ 0x7d, 0xca, 0xa8, 0x85, 0xd0, 0xf1, 0x3f, 0xbe,
24451 ++ 0xd7, 0x34, 0x64, 0xfc, 0xae, 0xe3, 0xe4, 0x04,
24452 ++ 0x9f, 0x66, 0x02, 0xb9, 0x88, 0x10, 0xd9, 0xc4,
24453 ++ 0x4c, 0x31, 0x43, 0x7a, 0x93, 0xe2, 0x9b, 0x56,
24454 ++ 0x43, 0x84, 0xdc, 0xdc, 0xde, 0x1d, 0xa4, 0x02,
24455 ++ 0x0e, 0xc2, 0xef, 0xc3, 0xf8, 0x78, 0xd1, 0xb2,
24456 ++ 0x6b, 0x63, 0x18, 0xc9, 0xa9, 0xe5, 0x72, 0xd8,
24457 ++ 0xf3, 0xb9, 0xd1, 0x8a, 0xc7, 0x1a, 0x02, 0x27,
24458 ++ 0x20, 0x77, 0x10, 0xe5, 0xc8, 0xd4, 0x4a, 0x47,
24459 ++ 0xe5, 0xdf, 0x5f, 0x01, 0xaa, 0xb0, 0xd4, 0x10,
24460 ++ 0xbb, 0x69, 0xe3, 0x36, 0xc8, 0xe1, 0x3d, 0x43,
24461 ++ 0xfb, 0x86, 0xcd, 0xcc, 0xbf, 0xf4, 0x88, 0xe0,
24462 ++ 0x20, 0xca, 0xb7, 0x1b, 0xf1, 0x2f, 0x5c, 0xee,
24463 ++ 0xd4, 0xd3, 0xa3, 0xcc, 0xa4, 0x1e, 0x1c, 0x47,
24464 ++ 0xfb, 0xbf, 0xfc, 0xa2, 0x41, 0x55, 0x9d, 0xf6,
24465 ++ 0x5a, 0x5e, 0x65, 0x32, 0x34, 0x7b, 0x52, 0x8d,
24466 ++ 0xd5, 0xd0, 0x20, 0x60, 0x03, 0xab, 0x3f, 0x8c,
24467 ++ 0xd4, 0x21, 0xea, 0x2a, 0xd9, 0xc4, 0xd0, 0xd3,
24468 ++ 0x65, 0xd8, 0x7a, 0x13, 0x28, 0x62, 0x32, 0x4b,
24469 ++ 0x2c, 0x87, 0x93, 0xa8, 0xb4, 0x52, 0x45, 0x09,
24470 ++ 0x44, 0xec, 0xec, 0xc3, 0x17, 0xdb, 0x9a, 0x4d,
24471 ++ 0x5c, 0xa9, 0x11, 0xd4, 0x7d, 0xaf, 0x9e, 0xf1,
24472 ++ 0x2d, 0xb2, 0x66, 0xc5, 0x1d, 0xed, 0xb7, 0xcd,
24473 ++ 0x0b, 0x25, 0x5e, 0x30, 0x47, 0x3f, 0x40, 0xf4,
24474 ++ 0xa1, 0xa0, 0x00, 0x94, 0x10, 0xc5, 0x6a, 0x63,
24475 ++ 0x1a, 0xd5, 0x88, 0x92, 0x8e, 0x82, 0x39, 0x87,
24476 ++ 0x3c, 0x78, 0x65, 0x58, 0x42, 0x75, 0x5b, 0xdd,
24477 ++ 0x77, 0x3e, 0x09, 0x4e, 0x76, 0x5b, 0xe6, 0x0e,
24478 ++ 0x4d, 0x38, 0xb2, 0xc0, 0xb8, 0x95, 0x01, 0x7a,
24479 ++ 0x10, 0xe0, 0xfb, 0x07, 0xf2, 0xab, 0x2d, 0x8c,
24480 ++ 0x32, 0xed, 0x2b, 0xc0, 0x46, 0xc2, 0xf5, 0x38,
24481 ++ 0x83, 0xf0, 0x17, 0xec, 0xc1, 0x20, 0x6a, 0x9a,
24482 ++ 0x0b, 0x00, 0xa0, 0x98, 0x22, 0x50, 0x23, 0xd5,
24483 ++ 0x80, 0x6b, 0xf6, 0x1f, 0xc3, 0xcc, 0x97, 0xc9,
24484 ++ 0x24, 0x9f, 0xf3, 0xaf, 0x43, 0x14, 0xd5, 0xa0
24485 ++};
24486 ++static const u8 enc_assoc010[] __initconst = {
24487 ++ 0xd2, 0xa1, 0x70, 0xdb, 0x7a, 0xf8, 0xfa, 0x27,
24488 ++ 0xba, 0x73, 0x0f, 0xbf, 0x3d, 0x1e, 0x82, 0xb2
24489 ++};
24490 ++static const u8 enc_nonce010[] __initconst = {
24491 ++ 0xdb, 0x92, 0x0f, 0x7f, 0x17, 0x54, 0x0c, 0x30
24492 ++};
24493 ++static const u8 enc_key010[] __initconst = {
24494 ++ 0x47, 0x11, 0xeb, 0x86, 0x2b, 0x2c, 0xab, 0x44,
24495 ++ 0x34, 0xda, 0x7f, 0x57, 0x03, 0x39, 0x0c, 0xaf,
24496 ++ 0x2c, 0x14, 0xfd, 0x65, 0x23, 0xe9, 0x8e, 0x74,
24497 ++ 0xd5, 0x08, 0x68, 0x08, 0xe7, 0xb4, 0x72, 0xd7
24498 ++};
24499 ++
24500 ++static const u8 enc_input011[] __initconst = {
24501 ++ 0x7a, 0x57, 0xf2, 0xc7, 0x06, 0x3f, 0x50, 0x7b,
24502 ++ 0x36, 0x1a, 0x66, 0x5c, 0xb9, 0x0e, 0x5e, 0x3b,
24503 ++ 0x45, 0x60, 0xbe, 0x9a, 0x31, 0x9f, 0xff, 0x5d,
24504 ++ 0x66, 0x34, 0xb4, 0xdc, 0xfb, 0x9d, 0x8e, 0xee,
24505 ++ 0x6a, 0x33, 0xa4, 0x07, 0x3c, 0xf9, 0x4c, 0x30,
24506 ++ 0xa1, 0x24, 0x52, 0xf9, 0x50, 0x46, 0x88, 0x20,
24507 ++ 0x02, 0x32, 0x3a, 0x0e, 0x99, 0x63, 0xaf, 0x1f,
24508 ++ 0x15, 0x28, 0x2a, 0x05, 0xff, 0x57, 0x59, 0x5e,
24509 ++ 0x18, 0xa1, 0x1f, 0xd0, 0x92, 0x5c, 0x88, 0x66,
24510 ++ 0x1b, 0x00, 0x64, 0xa5, 0x93, 0x8d, 0x06, 0x46,
24511 ++ 0xb0, 0x64, 0x8b, 0x8b, 0xef, 0x99, 0x05, 0x35,
24512 ++ 0x85, 0xb3, 0xf3, 0x33, 0xbb, 0xec, 0x66, 0xb6,
24513 ++ 0x3d, 0x57, 0x42, 0xe3, 0xb4, 0xc6, 0xaa, 0xb0,
24514 ++ 0x41, 0x2a, 0xb9, 0x59, 0xa9, 0xf6, 0x3e, 0x15,
24515 ++ 0x26, 0x12, 0x03, 0x21, 0x4c, 0x74, 0x43, 0x13,
24516 ++ 0x2a, 0x03, 0x27, 0x09, 0xb4, 0xfb, 0xe7, 0xb7,
24517 ++ 0x40, 0xff, 0x5e, 0xce, 0x48, 0x9a, 0x60, 0xe3,
24518 ++ 0x8b, 0x80, 0x8c, 0x38, 0x2d, 0xcb, 0x93, 0x37,
24519 ++ 0x74, 0x05, 0x52, 0x6f, 0x73, 0x3e, 0xc3, 0xbc,
24520 ++ 0xca, 0x72, 0x0a, 0xeb, 0xf1, 0x3b, 0xa0, 0x95,
24521 ++ 0xdc, 0x8a, 0xc4, 0xa9, 0xdc, 0xca, 0x44, 0xd8,
24522 ++ 0x08, 0x63, 0x6a, 0x36, 0xd3, 0x3c, 0xb8, 0xac,
24523 ++ 0x46, 0x7d, 0xfd, 0xaa, 0xeb, 0x3e, 0x0f, 0x45,
24524 ++ 0x8f, 0x49, 0xda, 0x2b, 0xf2, 0x12, 0xbd, 0xaf,
24525 ++ 0x67, 0x8a, 0x63, 0x48, 0x4b, 0x55, 0x5f, 0x6d,
24526 ++ 0x8c, 0xb9, 0x76, 0x34, 0x84, 0xae, 0xc2, 0xfc,
24527 ++ 0x52, 0x64, 0x82, 0xf7, 0xb0, 0x06, 0xf0, 0x45,
24528 ++ 0x73, 0x12, 0x50, 0x30, 0x72, 0xea, 0x78, 0x9a,
24529 ++ 0xa8, 0xaf, 0xb5, 0xe3, 0xbb, 0x77, 0x52, 0xec,
24530 ++ 0x59, 0x84, 0xbf, 0x6b, 0x8f, 0xce, 0x86, 0x5e,
24531 ++ 0x1f, 0x23, 0xe9, 0xfb, 0x08, 0x86, 0xf7, 0x10,
24532 ++ 0xb9, 0xf2, 0x44, 0x96, 0x44, 0x63, 0xa9, 0xa8,
24533 ++ 0x78, 0x00, 0x23, 0xd6, 0xc7, 0xe7, 0x6e, 0x66,
24534 ++ 0x4f, 0xcc, 0xee, 0x15, 0xb3, 0xbd, 0x1d, 0xa0,
24535 ++ 0xe5, 0x9c, 0x1b, 0x24, 0x2c, 0x4d, 0x3c, 0x62,
24536 ++ 0x35, 0x9c, 0x88, 0x59, 0x09, 0xdd, 0x82, 0x1b,
24537 ++ 0xcf, 0x0a, 0x83, 0x6b, 0x3f, 0xae, 0x03, 0xc4,
24538 ++ 0xb4, 0xdd, 0x7e, 0x5b, 0x28, 0x76, 0x25, 0x96,
24539 ++ 0xd9, 0xc9, 0x9d, 0x5f, 0x86, 0xfa, 0xf6, 0xd7,
24540 ++ 0xd2, 0xe6, 0x76, 0x1d, 0x0f, 0xa1, 0xdc, 0x74,
24541 ++ 0x05, 0x1b, 0x1d, 0xe0, 0xcd, 0x16, 0xb0, 0xa8,
24542 ++ 0x8a, 0x34, 0x7b, 0x15, 0x11, 0x77, 0xe5, 0x7b,
24543 ++ 0x7e, 0x20, 0xf7, 0xda, 0x38, 0xda, 0xce, 0x70,
24544 ++ 0xe9, 0xf5, 0x6c, 0xd9, 0xbe, 0x0c, 0x4c, 0x95,
24545 ++ 0x4c, 0xc2, 0x9b, 0x34, 0x55, 0x55, 0xe1, 0xf3,
24546 ++ 0x46, 0x8e, 0x48, 0x74, 0x14, 0x4f, 0x9d, 0xc9,
24547 ++ 0xf5, 0xe8, 0x1a, 0xf0, 0x11, 0x4a, 0xc1, 0x8d,
24548 ++ 0xe0, 0x93, 0xa0, 0xbe, 0x09, 0x1c, 0x2b, 0x4e,
24549 ++ 0x0f, 0xb2, 0x87, 0x8b, 0x84, 0xfe, 0x92, 0x32,
24550 ++ 0x14, 0xd7, 0x93, 0xdf, 0xe7, 0x44, 0xbc, 0xc5,
24551 ++ 0xae, 0x53, 0x69, 0xd8, 0xb3, 0x79, 0x37, 0x80,
24552 ++ 0xe3, 0x17, 0x5c, 0xec, 0x53, 0x00, 0x9a, 0xe3,
24553 ++ 0x8e, 0xdc, 0x38, 0xb8, 0x66, 0xf0, 0xd3, 0xad,
24554 ++ 0x1d, 0x02, 0x96, 0x86, 0x3e, 0x9d, 0x3b, 0x5d,
24555 ++ 0xa5, 0x7f, 0x21, 0x10, 0xf1, 0x1f, 0x13, 0x20,
24556 ++ 0xf9, 0x57, 0x87, 0x20, 0xf5, 0x5f, 0xf1, 0x17,
24557 ++ 0x48, 0x0a, 0x51, 0x5a, 0xcd, 0x19, 0x03, 0xa6,
24558 ++ 0x5a, 0xd1, 0x12, 0x97, 0xe9, 0x48, 0xe2, 0x1d,
24559 ++ 0x83, 0x75, 0x50, 0xd9, 0x75, 0x7d, 0x6a, 0x82,
24560 ++ 0xa1, 0xf9, 0x4e, 0x54, 0x87, 0x89, 0xc9, 0x0c,
24561 ++ 0xb7, 0x5b, 0x6a, 0x91, 0xc1, 0x9c, 0xb2, 0xa9,
24562 ++ 0xdc, 0x9a, 0xa4, 0x49, 0x0a, 0x6d, 0x0d, 0xbb,
24563 ++ 0xde, 0x86, 0x44, 0xdd, 0x5d, 0x89, 0x2b, 0x96,
24564 ++ 0x0f, 0x23, 0x95, 0xad, 0xcc, 0xa2, 0xb3, 0xb9,
24565 ++ 0x7e, 0x74, 0x38, 0xba, 0x9f, 0x73, 0xae, 0x5f,
24566 ++ 0xf8, 0x68, 0xa2, 0xe0, 0xa9, 0xce, 0xbd, 0x40,
24567 ++ 0xd4, 0x4c, 0x6b, 0xd2, 0x56, 0x62, 0xb0, 0xcc,
24568 ++ 0x63, 0x7e, 0x5b, 0xd3, 0xae, 0xd1, 0x75, 0xce,
24569 ++ 0xbb, 0xb4, 0x5b, 0xa8, 0xf8, 0xb4, 0xac, 0x71,
24570 ++ 0x75, 0xaa, 0xc9, 0x9f, 0xbb, 0x6c, 0xad, 0x0f,
24571 ++ 0x55, 0x5d, 0xe8, 0x85, 0x7d, 0xf9, 0x21, 0x35,
24572 ++ 0xea, 0x92, 0x85, 0x2b, 0x00, 0xec, 0x84, 0x90,
24573 ++ 0x0a, 0x63, 0x96, 0xe4, 0x6b, 0xa9, 0x77, 0xb8,
24574 ++ 0x91, 0xf8, 0x46, 0x15, 0x72, 0x63, 0x70, 0x01,
24575 ++ 0x40, 0xa3, 0xa5, 0x76, 0x62, 0x2b, 0xbf, 0xf1,
24576 ++ 0xe5, 0x8d, 0x9f, 0xa3, 0xfa, 0x9b, 0x03, 0xbe,
24577 ++ 0xfe, 0x65, 0x6f, 0xa2, 0x29, 0x0d, 0x54, 0xb4,
24578 ++ 0x71, 0xce, 0xa9, 0xd6, 0x3d, 0x88, 0xf9, 0xaf,
24579 ++ 0x6b, 0xa8, 0x9e, 0xf4, 0x16, 0x96, 0x36, 0xb9,
24580 ++ 0x00, 0xdc, 0x10, 0xab, 0xb5, 0x08, 0x31, 0x1f,
24581 ++ 0x00, 0xb1, 0x3c, 0xd9, 0x38, 0x3e, 0xc6, 0x04,
24582 ++ 0xa7, 0x4e, 0xe8, 0xae, 0xed, 0x98, 0xc2, 0xf7,
24583 ++ 0xb9, 0x00, 0x5f, 0x8c, 0x60, 0xd1, 0xe5, 0x15,
24584 ++ 0xf7, 0xae, 0x1e, 0x84, 0x88, 0xd1, 0xf6, 0xbc,
24585 ++ 0x3a, 0x89, 0x35, 0x22, 0x83, 0x7c, 0xca, 0xf0,
24586 ++ 0x33, 0x82, 0x4c, 0x79, 0x3c, 0xfd, 0xb1, 0xae,
24587 ++ 0x52, 0x62, 0x55, 0xd2, 0x41, 0x60, 0xc6, 0xbb,
24588 ++ 0xfa, 0x0e, 0x59, 0xd6, 0xa8, 0xfe, 0x5d, 0xed,
24589 ++ 0x47, 0x3d, 0xe0, 0xea, 0x1f, 0x6e, 0x43, 0x51,
24590 ++ 0xec, 0x10, 0x52, 0x56, 0x77, 0x42, 0x6b, 0x52,
24591 ++ 0x87, 0xd8, 0xec, 0xe0, 0xaa, 0x76, 0xa5, 0x84,
24592 ++ 0x2a, 0x22, 0x24, 0xfd, 0x92, 0x40, 0x88, 0xd5,
24593 ++ 0x85, 0x1c, 0x1f, 0x6b, 0x47, 0xa0, 0xc4, 0xe4,
24594 ++ 0xef, 0xf4, 0xea, 0xd7, 0x59, 0xac, 0x2a, 0x9e,
24595 ++ 0x8c, 0xfa, 0x1f, 0x42, 0x08, 0xfe, 0x4f, 0x74,
24596 ++ 0xa0, 0x26, 0xf5, 0xb3, 0x84, 0xf6, 0x58, 0x5f,
24597 ++ 0x26, 0x66, 0x3e, 0xd7, 0xe4, 0x22, 0x91, 0x13,
24598 ++ 0xc8, 0xac, 0x25, 0x96, 0x23, 0xd8, 0x09, 0xea,
24599 ++ 0x45, 0x75, 0x23, 0xb8, 0x5f, 0xc2, 0x90, 0x8b,
24600 ++ 0x09, 0xc4, 0xfc, 0x47, 0x6c, 0x6d, 0x0a, 0xef,
24601 ++ 0x69, 0xa4, 0x38, 0x19, 0xcf, 0x7d, 0xf9, 0x09,
24602 ++ 0x73, 0x9b, 0x60, 0x5a, 0xf7, 0x37, 0xb5, 0xfe,
24603 ++ 0x9f, 0xe3, 0x2b, 0x4c, 0x0d, 0x6e, 0x19, 0xf1,
24604 ++ 0xd6, 0xc0, 0x70, 0xf3, 0x9d, 0x22, 0x3c, 0xf9,
24605 ++ 0x49, 0xce, 0x30, 0x8e, 0x44, 0xb5, 0x76, 0x15,
24606 ++ 0x8f, 0x52, 0xfd, 0xa5, 0x04, 0xb8, 0x55, 0x6a,
24607 ++ 0x36, 0x59, 0x7c, 0xc4, 0x48, 0xb8, 0xd7, 0xab,
24608 ++ 0x05, 0x66, 0xe9, 0x5e, 0x21, 0x6f, 0x6b, 0x36,
24609 ++ 0x29, 0xbb, 0xe9, 0xe3, 0xa2, 0x9a, 0xa8, 0xcd,
24610 ++ 0x55, 0x25, 0x11, 0xba, 0x5a, 0x58, 0xa0, 0xde,
24611 ++ 0xae, 0x19, 0x2a, 0x48, 0x5a, 0xff, 0x36, 0xcd,
24612 ++ 0x6d, 0x16, 0x7a, 0x73, 0x38, 0x46, 0xe5, 0x47,
24613 ++ 0x59, 0xc8, 0xa2, 0xf6, 0xe2, 0x6c, 0x83, 0xc5,
24614 ++ 0x36, 0x2c, 0x83, 0x7d, 0xb4, 0x01, 0x05, 0x69,
24615 ++ 0xe7, 0xaf, 0x5c, 0xc4, 0x64, 0x82, 0x12, 0x21,
24616 ++ 0xef, 0xf7, 0xd1, 0x7d, 0xb8, 0x8d, 0x8c, 0x98,
24617 ++ 0x7c, 0x5f, 0x7d, 0x92, 0x88, 0xb9, 0x94, 0x07,
24618 ++ 0x9c, 0xd8, 0xe9, 0x9c, 0x17, 0x38, 0xe3, 0x57,
24619 ++ 0x6c, 0xe0, 0xdc, 0xa5, 0x92, 0x42, 0xb3, 0xbd,
24620 ++ 0x50, 0xa2, 0x7e, 0xb5, 0xb1, 0x52, 0x72, 0x03,
24621 ++ 0x97, 0xd8, 0xaa, 0x9a, 0x1e, 0x75, 0x41, 0x11,
24622 ++ 0xa3, 0x4f, 0xcc, 0xd4, 0xe3, 0x73, 0xad, 0x96,
24623 ++ 0xdc, 0x47, 0x41, 0x9f, 0xb0, 0xbe, 0x79, 0x91,
24624 ++ 0xf5, 0xb6, 0x18, 0xfe, 0xc2, 0x83, 0x18, 0x7d,
24625 ++ 0x73, 0xd9, 0x4f, 0x83, 0x84, 0x03, 0xb3, 0xf0,
24626 ++ 0x77, 0x66, 0x3d, 0x83, 0x63, 0x2e, 0x2c, 0xf9,
24627 ++ 0xdd, 0xa6, 0x1f, 0x89, 0x82, 0xb8, 0x23, 0x42,
24628 ++ 0xeb, 0xe2, 0xca, 0x70, 0x82, 0x61, 0x41, 0x0a,
24629 ++ 0x6d, 0x5f, 0x75, 0xc5, 0xe2, 0xc4, 0x91, 0x18,
24630 ++ 0x44, 0x22, 0xfa, 0x34, 0x10, 0xf5, 0x20, 0xdc,
24631 ++ 0xb7, 0xdd, 0x2a, 0x20, 0x77, 0xf5, 0xf9, 0xce,
24632 ++ 0xdb, 0xa0, 0x0a, 0x52, 0x2a, 0x4e, 0xdd, 0xcc,
24633 ++ 0x97, 0xdf, 0x05, 0xe4, 0x5e, 0xb7, 0xaa, 0xf0,
24634 ++ 0xe2, 0x80, 0xff, 0xba, 0x1a, 0x0f, 0xac, 0xdf,
24635 ++ 0x02, 0x32, 0xe6, 0xf7, 0xc7, 0x17, 0x13, 0xb7,
24636 ++ 0xfc, 0x98, 0x48, 0x8c, 0x0d, 0x82, 0xc9, 0x80,
24637 ++ 0x7a, 0xe2, 0x0a, 0xc5, 0xb4, 0xde, 0x7c, 0x3c,
24638 ++ 0x79, 0x81, 0x0e, 0x28, 0x65, 0x79, 0x67, 0x82,
24639 ++ 0x69, 0x44, 0x66, 0x09, 0xf7, 0x16, 0x1a, 0xf9,
24640 ++ 0x7d, 0x80, 0xa1, 0x79, 0x14, 0xa9, 0xc8, 0x20,
24641 ++ 0xfb, 0xa2, 0x46, 0xbe, 0x08, 0x35, 0x17, 0x58,
24642 ++ 0xc1, 0x1a, 0xda, 0x2a, 0x6b, 0x2e, 0x1e, 0xe6,
24643 ++ 0x27, 0x55, 0x7b, 0x19, 0xe2, 0xfb, 0x64, 0xfc,
24644 ++ 0x5e, 0x15, 0x54, 0x3c, 0xe7, 0xc2, 0x11, 0x50,
24645 ++ 0x30, 0xb8, 0x72, 0x03, 0x0b, 0x1a, 0x9f, 0x86,
24646 ++ 0x27, 0x11, 0x5c, 0x06, 0x2b, 0xbd, 0x75, 0x1a,
24647 ++ 0x0a, 0xda, 0x01, 0xfa, 0x5c, 0x4a, 0xc1, 0x80,
24648 ++ 0x3a, 0x6e, 0x30, 0xc8, 0x2c, 0xeb, 0x56, 0xec,
24649 ++ 0x89, 0xfa, 0x35, 0x7b, 0xb2, 0xf0, 0x97, 0x08,
24650 ++ 0x86, 0x53, 0xbe, 0xbd, 0x40, 0x41, 0x38, 0x1c,
24651 ++ 0xb4, 0x8b, 0x79, 0x2e, 0x18, 0x96, 0x94, 0xde,
24652 ++ 0xe8, 0xca, 0xe5, 0x9f, 0x92, 0x9f, 0x15, 0x5d,
24653 ++ 0x56, 0x60, 0x5c, 0x09, 0xf9, 0x16, 0xf4, 0x17,
24654 ++ 0x0f, 0xf6, 0x4c, 0xda, 0xe6, 0x67, 0x89, 0x9f,
24655 ++ 0xca, 0x6c, 0xe7, 0x9b, 0x04, 0x62, 0x0e, 0x26,
24656 ++ 0xa6, 0x52, 0xbd, 0x29, 0xff, 0xc7, 0xa4, 0x96,
24657 ++ 0xe6, 0x6a, 0x02, 0xa5, 0x2e, 0x7b, 0xfe, 0x97,
24658 ++ 0x68, 0x3e, 0x2e, 0x5f, 0x3b, 0x0f, 0x36, 0xd6,
24659 ++ 0x98, 0x19, 0x59, 0x48, 0xd2, 0xc6, 0xe1, 0x55,
24660 ++ 0x1a, 0x6e, 0xd6, 0xed, 0x2c, 0xba, 0xc3, 0x9e,
24661 ++ 0x64, 0xc9, 0x95, 0x86, 0x35, 0x5e, 0x3e, 0x88,
24662 ++ 0x69, 0x99, 0x4b, 0xee, 0xbe, 0x9a, 0x99, 0xb5,
24663 ++ 0x6e, 0x58, 0xae, 0xdd, 0x22, 0xdb, 0xdd, 0x6b,
24664 ++ 0xfc, 0xaf, 0x90, 0xa3, 0x3d, 0xa4, 0xc1, 0x15,
24665 ++ 0x92, 0x18, 0x8d, 0xd2, 0x4b, 0x7b, 0x06, 0xd1,
24666 ++ 0x37, 0xb5, 0xe2, 0x7c, 0x2c, 0xf0, 0x25, 0xe4,
24667 ++ 0x94, 0x2a, 0xbd, 0xe3, 0x82, 0x70, 0x78, 0xa3,
24668 ++ 0x82, 0x10, 0x5a, 0x90, 0xd7, 0xa4, 0xfa, 0xaf,
24669 ++ 0x1a, 0x88, 0x59, 0xdc, 0x74, 0x12, 0xb4, 0x8e,
24670 ++ 0xd7, 0x19, 0x46, 0xf4, 0x84, 0x69, 0x9f, 0xbb,
24671 ++ 0x70, 0xa8, 0x4c, 0x52, 0x81, 0xa9, 0xff, 0x76,
24672 ++ 0x1c, 0xae, 0xd8, 0x11, 0x3d, 0x7f, 0x7d, 0xc5,
24673 ++ 0x12, 0x59, 0x28, 0x18, 0xc2, 0xa2, 0xb7, 0x1c,
24674 ++ 0x88, 0xf8, 0xd6, 0x1b, 0xa6, 0x7d, 0x9e, 0xde,
24675 ++ 0x29, 0xf8, 0xed, 0xff, 0xeb, 0x92, 0x24, 0x4f,
24676 ++ 0x05, 0xaa, 0xd9, 0x49, 0xba, 0x87, 0x59, 0x51,
24677 ++ 0xc9, 0x20, 0x5c, 0x9b, 0x74, 0xcf, 0x03, 0xd9,
24678 ++ 0x2d, 0x34, 0xc7, 0x5b, 0xa5, 0x40, 0xb2, 0x99,
24679 ++ 0xf5, 0xcb, 0xb4, 0xf6, 0xb7, 0x72, 0x4a, 0xd6,
24680 ++ 0xbd, 0xb0, 0xf3, 0x93, 0xe0, 0x1b, 0xa8, 0x04,
24681 ++ 0x1e, 0x35, 0xd4, 0x80, 0x20, 0xf4, 0x9c, 0x31,
24682 ++ 0x6b, 0x45, 0xb9, 0x15, 0xb0, 0x5e, 0xdd, 0x0a,
24683 ++ 0x33, 0x9c, 0x83, 0xcd, 0x58, 0x89, 0x50, 0x56,
24684 ++ 0xbb, 0x81, 0x00, 0x91, 0x32, 0xf3, 0x1b, 0x3e,
24685 ++ 0xcf, 0x45, 0xe1, 0xf9, 0xe1, 0x2c, 0x26, 0x78,
24686 ++ 0x93, 0x9a, 0x60, 0x46, 0xc9, 0xb5, 0x5e, 0x6a,
24687 ++ 0x28, 0x92, 0x87, 0x3f, 0x63, 0x7b, 0xdb, 0xf7,
24688 ++ 0xd0, 0x13, 0x9d, 0x32, 0x40, 0x5e, 0xcf, 0xfb,
24689 ++ 0x79, 0x68, 0x47, 0x4c, 0xfd, 0x01, 0x17, 0xe6,
24690 ++ 0x97, 0x93, 0x78, 0xbb, 0xa6, 0x27, 0xa3, 0xe8,
24691 ++ 0x1a, 0xe8, 0x94, 0x55, 0x7d, 0x08, 0xe5, 0xdc,
24692 ++ 0x66, 0xa3, 0x69, 0xc8, 0xca, 0xc5, 0xa1, 0x84,
24693 ++ 0x55, 0xde, 0x08, 0x91, 0x16, 0x3a, 0x0c, 0x86,
24694 ++ 0xab, 0x27, 0x2b, 0x64, 0x34, 0x02, 0x6c, 0x76,
24695 ++ 0x8b, 0xc6, 0xaf, 0xcc, 0xe1, 0xd6, 0x8c, 0x2a,
24696 ++ 0x18, 0x3d, 0xa6, 0x1b, 0x37, 0x75, 0x45, 0x73,
24697 ++ 0xc2, 0x75, 0xd7, 0x53, 0x78, 0x3a, 0xd6, 0xe8,
24698 ++ 0x29, 0xd2, 0x4a, 0xa8, 0x1e, 0x82, 0xf6, 0xb6,
24699 ++ 0x81, 0xde, 0x21, 0xed, 0x2b, 0x56, 0xbb, 0xf2,
24700 ++ 0xd0, 0x57, 0xc1, 0x7c, 0xd2, 0x6a, 0xd2, 0x56,
24701 ++ 0xf5, 0x13, 0x5f, 0x1c, 0x6a, 0x0b, 0x74, 0xfb,
24702 ++ 0xe9, 0xfe, 0x9e, 0xea, 0x95, 0xb2, 0x46, 0xab,
24703 ++ 0x0a, 0xfc, 0xfd, 0xf3, 0xbb, 0x04, 0x2b, 0x76,
24704 ++ 0x1b, 0xa4, 0x74, 0xb0, 0xc1, 0x78, 0xc3, 0x69,
24705 ++ 0xe2, 0xb0, 0x01, 0xe1, 0xde, 0x32, 0x4c, 0x8d,
24706 ++ 0x1a, 0xb3, 0x38, 0x08, 0xd5, 0xfc, 0x1f, 0xdc,
24707 ++ 0x0e, 0x2c, 0x9c, 0xb1, 0xa1, 0x63, 0x17, 0x22,
24708 ++ 0xf5, 0x6c, 0x93, 0x70, 0x74, 0x00, 0xf8, 0x39,
24709 ++ 0x01, 0x94, 0xd1, 0x32, 0x23, 0x56, 0x5d, 0xa6,
24710 ++ 0x02, 0x76, 0x76, 0x93, 0xce, 0x2f, 0x19, 0xe9,
24711 ++ 0x17, 0x52, 0xae, 0x6e, 0x2c, 0x6d, 0x61, 0x7f,
24712 ++ 0x3b, 0xaa, 0xe0, 0x52, 0x85, 0xc5, 0x65, 0xc1,
24713 ++ 0xbb, 0x8e, 0x5b, 0x21, 0xd5, 0xc9, 0x78, 0x83,
24714 ++ 0x07, 0x97, 0x4c, 0x62, 0x61, 0x41, 0xd4, 0xfc,
24715 ++ 0xc9, 0x39, 0xe3, 0x9b, 0xd0, 0xcc, 0x75, 0xc4,
24716 ++ 0x97, 0xe6, 0xdd, 0x2a, 0x5f, 0xa6, 0xe8, 0x59,
24717 ++ 0x6c, 0x98, 0xb9, 0x02, 0xe2, 0xa2, 0xd6, 0x68,
24718 ++ 0xee, 0x3b, 0x1d, 0xe3, 0x4d, 0x5b, 0x30, 0xef,
24719 ++ 0x03, 0xf2, 0xeb, 0x18, 0x57, 0x36, 0xe8, 0xa1,
24720 ++ 0xf4, 0x47, 0xfb, 0xcb, 0x8f, 0xcb, 0xc8, 0xf3,
24721 ++ 0x4f, 0x74, 0x9d, 0x9d, 0xb1, 0x8d, 0x14, 0x44,
24722 ++ 0xd9, 0x19, 0xb4, 0x54, 0x4f, 0x75, 0x19, 0x09,
24723 ++ 0xa0, 0x75, 0xbc, 0x3b, 0x82, 0xc6, 0x3f, 0xb8,
24724 ++ 0x83, 0x19, 0x6e, 0xd6, 0x37, 0xfe, 0x6e, 0x8a,
24725 ++ 0x4e, 0xe0, 0x4a, 0xab, 0x7b, 0xc8, 0xb4, 0x1d,
24726 ++ 0xf4, 0xed, 0x27, 0x03, 0x65, 0xa2, 0xa1, 0xae,
24727 ++ 0x11, 0xe7, 0x98, 0x78, 0x48, 0x91, 0xd2, 0xd2,
24728 ++ 0xd4, 0x23, 0x78, 0x50, 0xb1, 0x5b, 0x85, 0x10,
24729 ++ 0x8d, 0xca, 0x5f, 0x0f, 0x71, 0xae, 0x72, 0x9a,
24730 ++ 0xf6, 0x25, 0x19, 0x60, 0x06, 0xf7, 0x10, 0x34,
24731 ++ 0x18, 0x0d, 0xc9, 0x9f, 0x7b, 0x0c, 0x9b, 0x8f,
24732 ++ 0x91, 0x1b, 0x9f, 0xcd, 0x10, 0xee, 0x75, 0xf9,
24733 ++ 0x97, 0x66, 0xfc, 0x4d, 0x33, 0x6e, 0x28, 0x2b,
24734 ++ 0x92, 0x85, 0x4f, 0xab, 0x43, 0x8d, 0x8f, 0x7d,
24735 ++ 0x86, 0xa7, 0xc7, 0xd8, 0xd3, 0x0b, 0x8b, 0x57,
24736 ++ 0xb6, 0x1d, 0x95, 0x0d, 0xe9, 0xbc, 0xd9, 0x03,
24737 ++ 0xd9, 0x10, 0x19, 0xc3, 0x46, 0x63, 0x55, 0x87,
24738 ++ 0x61, 0x79, 0x6c, 0x95, 0x0e, 0x9c, 0xdd, 0xca,
24739 ++ 0xc3, 0xf3, 0x64, 0xf0, 0x7d, 0x76, 0xb7, 0x53,
24740 ++ 0x67, 0x2b, 0x1e, 0x44, 0x56, 0x81, 0xea, 0x8f,
24741 ++ 0x5c, 0x42, 0x16, 0xb8, 0x28, 0xeb, 0x1b, 0x61,
24742 ++ 0x10, 0x1e, 0xbf, 0xec, 0xa8
24743 ++};
24744 ++static const u8 enc_output011[] __initconst = {
24745 ++ 0x6a, 0xfc, 0x4b, 0x25, 0xdf, 0xc0, 0xe4, 0xe8,
24746 ++ 0x17, 0x4d, 0x4c, 0xc9, 0x7e, 0xde, 0x3a, 0xcc,
24747 ++ 0x3c, 0xba, 0x6a, 0x77, 0x47, 0xdb, 0xe3, 0x74,
24748 ++ 0x7a, 0x4d, 0x5f, 0x8d, 0x37, 0x55, 0x80, 0x73,
24749 ++ 0x90, 0x66, 0x5d, 0x3a, 0x7d, 0x5d, 0x86, 0x5e,
24750 ++ 0x8d, 0xfd, 0x83, 0xff, 0x4e, 0x74, 0x6f, 0xf9,
24751 ++ 0xe6, 0x70, 0x17, 0x70, 0x3e, 0x96, 0xa7, 0x7e,
24752 ++ 0xcb, 0xab, 0x8f, 0x58, 0x24, 0x9b, 0x01, 0xfd,
24753 ++ 0xcb, 0xe6, 0x4d, 0x9b, 0xf0, 0x88, 0x94, 0x57,
24754 ++ 0x66, 0xef, 0x72, 0x4c, 0x42, 0x6e, 0x16, 0x19,
24755 ++ 0x15, 0xea, 0x70, 0x5b, 0xac, 0x13, 0xdb, 0x9f,
24756 ++ 0x18, 0xe2, 0x3c, 0x26, 0x97, 0xbc, 0xdc, 0x45,
24757 ++ 0x8c, 0x6c, 0x24, 0x69, 0x9c, 0xf7, 0x65, 0x1e,
24758 ++ 0x18, 0x59, 0x31, 0x7c, 0xe4, 0x73, 0xbc, 0x39,
24759 ++ 0x62, 0xc6, 0x5c, 0x9f, 0xbf, 0xfa, 0x90, 0x03,
24760 ++ 0xc9, 0x72, 0x26, 0xb6, 0x1b, 0xc2, 0xb7, 0x3f,
24761 ++ 0xf2, 0x13, 0x77, 0xf2, 0x8d, 0xb9, 0x47, 0xd0,
24762 ++ 0x53, 0xdd, 0xc8, 0x91, 0x83, 0x8b, 0xb1, 0xce,
24763 ++ 0xa3, 0xfe, 0xcd, 0xd9, 0xdd, 0x92, 0x7b, 0xdb,
24764 ++ 0xb8, 0xfb, 0xc9, 0x2d, 0x01, 0x59, 0x39, 0x52,
24765 ++ 0xad, 0x1b, 0xec, 0xcf, 0xd7, 0x70, 0x13, 0x21,
24766 ++ 0xf5, 0x47, 0xaa, 0x18, 0x21, 0x5c, 0xc9, 0x9a,
24767 ++ 0xd2, 0x6b, 0x05, 0x9c, 0x01, 0xa1, 0xda, 0x35,
24768 ++ 0x5d, 0xb3, 0x70, 0xe6, 0xa9, 0x80, 0x8b, 0x91,
24769 ++ 0xb7, 0xb3, 0x5f, 0x24, 0x9a, 0xb7, 0xd1, 0x6b,
24770 ++ 0xa1, 0x1c, 0x50, 0xba, 0x49, 0xe0, 0xee, 0x2e,
24771 ++ 0x75, 0xac, 0x69, 0xc0, 0xeb, 0x03, 0xdd, 0x19,
24772 ++ 0xe5, 0xf6, 0x06, 0xdd, 0xc3, 0xd7, 0x2b, 0x07,
24773 ++ 0x07, 0x30, 0xa7, 0x19, 0x0c, 0xbf, 0xe6, 0x18,
24774 ++ 0xcc, 0xb1, 0x01, 0x11, 0x85, 0x77, 0x1d, 0x96,
24775 ++ 0xa7, 0xa3, 0x00, 0x84, 0x02, 0xa2, 0x83, 0x68,
24776 ++ 0xda, 0x17, 0x27, 0xc8, 0x7f, 0x23, 0xb7, 0xf4,
24777 ++ 0x13, 0x85, 0xcf, 0xdd, 0x7a, 0x7d, 0x24, 0x57,
24778 ++ 0xfe, 0x05, 0x93, 0xf5, 0x74, 0xce, 0xed, 0x0c,
24779 ++ 0x20, 0x98, 0x8d, 0x92, 0x30, 0xa1, 0x29, 0x23,
24780 ++ 0x1a, 0xa0, 0x4f, 0x69, 0x56, 0x4c, 0xe1, 0xc8,
24781 ++ 0xce, 0xf6, 0x9a, 0x0c, 0xa4, 0xfa, 0x04, 0xf6,
24782 ++ 0x62, 0x95, 0xf2, 0xfa, 0xc7, 0x40, 0x68, 0x40,
24783 ++ 0x8f, 0x41, 0xda, 0xb4, 0x26, 0x6f, 0x70, 0xab,
24784 ++ 0x40, 0x61, 0xa4, 0x0e, 0x75, 0xfb, 0x86, 0xeb,
24785 ++ 0x9d, 0x9a, 0x1f, 0xec, 0x76, 0x99, 0xe7, 0xea,
24786 ++ 0xaa, 0x1e, 0x2d, 0xb5, 0xd4, 0xa6, 0x1a, 0xb8,
24787 ++ 0x61, 0x0a, 0x1d, 0x16, 0x5b, 0x98, 0xc2, 0x31,
24788 ++ 0x40, 0xe7, 0x23, 0x1d, 0x66, 0x99, 0xc8, 0xc0,
24789 ++ 0xd7, 0xce, 0xf3, 0x57, 0x40, 0x04, 0x3f, 0xfc,
24790 ++ 0xea, 0xb3, 0xfc, 0xd2, 0xd3, 0x99, 0xa4, 0x94,
24791 ++ 0x69, 0xa0, 0xef, 0xd1, 0x85, 0xb3, 0xa6, 0xb1,
24792 ++ 0x28, 0xbf, 0x94, 0x67, 0x22, 0xc3, 0x36, 0x46,
24793 ++ 0xf8, 0xd2, 0x0f, 0x5f, 0xf4, 0x59, 0x80, 0xe6,
24794 ++ 0x2d, 0x43, 0x08, 0x7d, 0x19, 0x09, 0x97, 0xa7,
24795 ++ 0x4c, 0x3d, 0x8d, 0xba, 0x65, 0x62, 0xa3, 0x71,
24796 ++ 0x33, 0x29, 0x62, 0xdb, 0xc1, 0x33, 0x34, 0x1a,
24797 ++ 0x63, 0x33, 0x16, 0xb6, 0x64, 0x7e, 0xab, 0x33,
24798 ++ 0xf0, 0xe6, 0x26, 0x68, 0xba, 0x1d, 0x2e, 0x38,
24799 ++ 0x08, 0xe6, 0x02, 0xd3, 0x25, 0x2c, 0x47, 0x23,
24800 ++ 0x58, 0x34, 0x0f, 0x9d, 0x63, 0x4f, 0x63, 0xbb,
24801 ++ 0x7f, 0x3b, 0x34, 0x38, 0xa7, 0xb5, 0x8d, 0x65,
24802 ++ 0xd9, 0x9f, 0x79, 0x55, 0x3e, 0x4d, 0xe7, 0x73,
24803 ++ 0xd8, 0xf6, 0x98, 0x97, 0x84, 0x60, 0x9c, 0xc8,
24804 ++ 0xa9, 0x3c, 0xf6, 0xdc, 0x12, 0x5c, 0xe1, 0xbb,
24805 ++ 0x0b, 0x8b, 0x98, 0x9c, 0x9d, 0x26, 0x7c, 0x4a,
24806 ++ 0xe6, 0x46, 0x36, 0x58, 0x21, 0x4a, 0xee, 0xca,
24807 ++ 0xd7, 0x3b, 0xc2, 0x6c, 0x49, 0x2f, 0xe5, 0xd5,
24808 ++ 0x03, 0x59, 0x84, 0x53, 0xcb, 0xfe, 0x92, 0x71,
24809 ++ 0x2e, 0x7c, 0x21, 0xcc, 0x99, 0x85, 0x7f, 0xb8,
24810 ++ 0x74, 0x90, 0x13, 0x42, 0x3f, 0xe0, 0x6b, 0x1d,
24811 ++ 0xf2, 0x4d, 0x54, 0xd4, 0xfc, 0x3a, 0x05, 0xe6,
24812 ++ 0x74, 0xaf, 0xa6, 0xa0, 0x2a, 0x20, 0x23, 0x5d,
24813 ++ 0x34, 0x5c, 0xd9, 0x3e, 0x4e, 0xfa, 0x93, 0xe7,
24814 ++ 0xaa, 0xe9, 0x6f, 0x08, 0x43, 0x67, 0x41, 0xc5,
24815 ++ 0xad, 0xfb, 0x31, 0x95, 0x82, 0x73, 0x32, 0xd8,
24816 ++ 0xa6, 0xa3, 0xed, 0x0e, 0x2d, 0xf6, 0x5f, 0xfd,
24817 ++ 0x80, 0xa6, 0x7a, 0xe0, 0xdf, 0x78, 0x15, 0x29,
24818 ++ 0x74, 0x33, 0xd0, 0x9e, 0x83, 0x86, 0x72, 0x22,
24819 ++ 0x57, 0x29, 0xb9, 0x9e, 0x5d, 0xd3, 0x1a, 0xb5,
24820 ++ 0x96, 0x72, 0x41, 0x3d, 0xf1, 0x64, 0x43, 0x67,
24821 ++ 0xee, 0xaa, 0x5c, 0xd3, 0x9a, 0x96, 0x13, 0x11,
24822 ++ 0x5d, 0xf3, 0x0c, 0x87, 0x82, 0x1e, 0x41, 0x9e,
24823 ++ 0xd0, 0x27, 0xd7, 0x54, 0x3b, 0x67, 0x73, 0x09,
24824 ++ 0x91, 0xe9, 0xd5, 0x36, 0xa7, 0xb5, 0x55, 0xe4,
24825 ++ 0xf3, 0x21, 0x51, 0x49, 0x22, 0x07, 0x55, 0x4f,
24826 ++ 0x44, 0x4b, 0xd2, 0x15, 0x93, 0x17, 0x2a, 0xfa,
24827 ++ 0x4d, 0x4a, 0x57, 0xdb, 0x4c, 0xa6, 0xeb, 0xec,
24828 ++ 0x53, 0x25, 0x6c, 0x21, 0xed, 0x00, 0x4c, 0x3b,
24829 ++ 0xca, 0x14, 0x57, 0xa9, 0xd6, 0x6a, 0xcd, 0x8d,
24830 ++ 0x5e, 0x74, 0xac, 0x72, 0xc1, 0x97, 0xe5, 0x1b,
24831 ++ 0x45, 0x4e, 0xda, 0xfc, 0xcc, 0x40, 0xe8, 0x48,
24832 ++ 0x88, 0x0b, 0xa3, 0xe3, 0x8d, 0x83, 0x42, 0xc3,
24833 ++ 0x23, 0xfd, 0x68, 0xb5, 0x8e, 0xf1, 0x9d, 0x63,
24834 ++ 0x77, 0xe9, 0xa3, 0x8e, 0x8c, 0x26, 0x6b, 0xbd,
24835 ++ 0x72, 0x73, 0x35, 0x0c, 0x03, 0xf8, 0x43, 0x78,
24836 ++ 0x52, 0x71, 0x15, 0x1f, 0x71, 0x5d, 0x6e, 0xed,
24837 ++ 0xb9, 0xcc, 0x86, 0x30, 0xdb, 0x2b, 0xd3, 0x82,
24838 ++ 0x88, 0x23, 0x71, 0x90, 0x53, 0x5c, 0xa9, 0x2f,
24839 ++ 0x76, 0x01, 0xb7, 0x9a, 0xfe, 0x43, 0x55, 0xa3,
24840 ++ 0x04, 0x9b, 0x0e, 0xe4, 0x59, 0xdf, 0xc9, 0xe9,
24841 ++ 0xb1, 0xea, 0x29, 0x28, 0x3c, 0x5c, 0xae, 0x72,
24842 ++ 0x84, 0xb6, 0xc6, 0xeb, 0x0c, 0x27, 0x07, 0x74,
24843 ++ 0x90, 0x0d, 0x31, 0xb0, 0x00, 0x77, 0xe9, 0x40,
24844 ++ 0x70, 0x6f, 0x68, 0xa7, 0xfd, 0x06, 0xec, 0x4b,
24845 ++ 0xc0, 0xb7, 0xac, 0xbc, 0x33, 0xb7, 0x6d, 0x0a,
24846 ++ 0xbd, 0x12, 0x1b, 0x59, 0xcb, 0xdd, 0x32, 0xf5,
24847 ++ 0x1d, 0x94, 0x57, 0x76, 0x9e, 0x0c, 0x18, 0x98,
24848 ++ 0x71, 0xd7, 0x2a, 0xdb, 0x0b, 0x7b, 0xa7, 0x71,
24849 ++ 0xb7, 0x67, 0x81, 0x23, 0x96, 0xae, 0xb9, 0x7e,
24850 ++ 0x32, 0x43, 0x92, 0x8a, 0x19, 0xa0, 0xc4, 0xd4,
24851 ++ 0x3b, 0x57, 0xf9, 0x4a, 0x2c, 0xfb, 0x51, 0x46,
24852 ++ 0xbb, 0xcb, 0x5d, 0xb3, 0xef, 0x13, 0x93, 0x6e,
24853 ++ 0x68, 0x42, 0x54, 0x57, 0xd3, 0x6a, 0x3a, 0x8f,
24854 ++ 0x9d, 0x66, 0xbf, 0xbd, 0x36, 0x23, 0xf5, 0x93,
24855 ++ 0x83, 0x7b, 0x9c, 0xc0, 0xdd, 0xc5, 0x49, 0xc0,
24856 ++ 0x64, 0xed, 0x07, 0x12, 0xb3, 0xe6, 0xe4, 0xe5,
24857 ++ 0x38, 0x95, 0x23, 0xb1, 0xa0, 0x3b, 0x1a, 0x61,
24858 ++ 0xda, 0x17, 0xac, 0xc3, 0x58, 0xdd, 0x74, 0x64,
24859 ++ 0x22, 0x11, 0xe8, 0x32, 0x1d, 0x16, 0x93, 0x85,
24860 ++ 0x99, 0xa5, 0x9c, 0x34, 0x55, 0xb1, 0xe9, 0x20,
24861 ++ 0x72, 0xc9, 0x28, 0x7b, 0x79, 0x00, 0xa1, 0xa6,
24862 ++ 0xa3, 0x27, 0x40, 0x18, 0x8a, 0x54, 0xe0, 0xcc,
24863 ++ 0xe8, 0x4e, 0x8e, 0x43, 0x96, 0xe7, 0x3f, 0xc8,
24864 ++ 0xe9, 0xb2, 0xf9, 0xc9, 0xda, 0x04, 0x71, 0x50,
24865 ++ 0x47, 0xe4, 0xaa, 0xce, 0xa2, 0x30, 0xc8, 0xe4,
24866 ++ 0xac, 0xc7, 0x0d, 0x06, 0x2e, 0xe6, 0xe8, 0x80,
24867 ++ 0x36, 0x29, 0x9e, 0x01, 0xb8, 0xc3, 0xf0, 0xa0,
24868 ++ 0x5d, 0x7a, 0xca, 0x4d, 0xa0, 0x57, 0xbd, 0x2a,
24869 ++ 0x45, 0xa7, 0x7f, 0x9c, 0x93, 0x07, 0x8f, 0x35,
24870 ++ 0x67, 0x92, 0xe3, 0xe9, 0x7f, 0xa8, 0x61, 0x43,
24871 ++ 0x9e, 0x25, 0x4f, 0x33, 0x76, 0x13, 0x6e, 0x12,
24872 ++ 0xb9, 0xdd, 0xa4, 0x7c, 0x08, 0x9f, 0x7c, 0xe7,
24873 ++ 0x0a, 0x8d, 0x84, 0x06, 0xa4, 0x33, 0x17, 0x34,
24874 ++ 0x5e, 0x10, 0x7c, 0xc0, 0xa8, 0x3d, 0x1f, 0x42,
24875 ++ 0x20, 0x51, 0x65, 0x5d, 0x09, 0xc3, 0xaa, 0xc0,
24876 ++ 0xc8, 0x0d, 0xf0, 0x79, 0xbc, 0x20, 0x1b, 0x95,
24877 ++ 0xe7, 0x06, 0x7d, 0x47, 0x20, 0x03, 0x1a, 0x74,
24878 ++ 0xdd, 0xe2, 0xd4, 0xae, 0x38, 0x71, 0x9b, 0xf5,
24879 ++ 0x80, 0xec, 0x08, 0x4e, 0x56, 0xba, 0x76, 0x12,
24880 ++ 0x1a, 0xdf, 0x48, 0xf3, 0xae, 0xb3, 0xe6, 0xe6,
24881 ++ 0xbe, 0xc0, 0x91, 0x2e, 0x01, 0xb3, 0x01, 0x86,
24882 ++ 0xa2, 0xb9, 0x52, 0xd1, 0x21, 0xae, 0xd4, 0x97,
24883 ++ 0x1d, 0xef, 0x41, 0x12, 0x95, 0x3d, 0x48, 0x45,
24884 ++ 0x1c, 0x56, 0x32, 0x8f, 0xb8, 0x43, 0xbb, 0x19,
24885 ++ 0xf3, 0xca, 0xe9, 0xeb, 0x6d, 0x84, 0xbe, 0x86,
24886 ++ 0x06, 0xe2, 0x36, 0xb2, 0x62, 0x9d, 0xd3, 0x4c,
24887 ++ 0x48, 0x18, 0x54, 0x13, 0x4e, 0xcf, 0xfd, 0xba,
24888 ++ 0x84, 0xb9, 0x30, 0x53, 0xcf, 0xfb, 0xb9, 0x29,
24889 ++ 0x8f, 0xdc, 0x9f, 0xef, 0x60, 0x0b, 0x64, 0xf6,
24890 ++ 0x8b, 0xee, 0xa6, 0x91, 0xc2, 0x41, 0x6c, 0xf6,
24891 ++ 0xfa, 0x79, 0x67, 0x4b, 0xc1, 0x3f, 0xaf, 0x09,
24892 ++ 0x81, 0xd4, 0x5d, 0xcb, 0x09, 0xdf, 0x36, 0x31,
24893 ++ 0xc0, 0x14, 0x3c, 0x7c, 0x0e, 0x65, 0x95, 0x99,
24894 ++ 0x6d, 0xa3, 0xf4, 0xd7, 0x38, 0xee, 0x1a, 0x2b,
24895 ++ 0x37, 0xe2, 0xa4, 0x3b, 0x4b, 0xd0, 0x65, 0xca,
24896 ++ 0xf8, 0xc3, 0xe8, 0x15, 0x20, 0xef, 0xf2, 0x00,
24897 ++ 0xfd, 0x01, 0x09, 0xc5, 0xc8, 0x17, 0x04, 0x93,
24898 ++ 0xd0, 0x93, 0x03, 0x55, 0xc5, 0xfe, 0x32, 0xa3,
24899 ++ 0x3e, 0x28, 0x2d, 0x3b, 0x93, 0x8a, 0xcc, 0x07,
24900 ++ 0x72, 0x80, 0x8b, 0x74, 0x16, 0x24, 0xbb, 0xda,
24901 ++ 0x94, 0x39, 0x30, 0x8f, 0xb1, 0xcd, 0x4a, 0x90,
24902 ++ 0x92, 0x7c, 0x14, 0x8f, 0x95, 0x4e, 0xac, 0x9b,
24903 ++ 0xd8, 0x8f, 0x1a, 0x87, 0xa4, 0x32, 0x27, 0x8a,
24904 ++ 0xba, 0xf7, 0x41, 0xcf, 0x84, 0x37, 0x19, 0xe6,
24905 ++ 0x06, 0xf5, 0x0e, 0xcf, 0x36, 0xf5, 0x9e, 0x6c,
24906 ++ 0xde, 0xbc, 0xff, 0x64, 0x7e, 0x4e, 0x59, 0x57,
24907 ++ 0x48, 0xfe, 0x14, 0xf7, 0x9c, 0x93, 0x5d, 0x15,
24908 ++ 0xad, 0xcc, 0x11, 0xb1, 0x17, 0x18, 0xb2, 0x7e,
24909 ++ 0xcc, 0xab, 0xe9, 0xce, 0x7d, 0x77, 0x5b, 0x51,
24910 ++ 0x1b, 0x1e, 0x20, 0xa8, 0x32, 0x06, 0x0e, 0x75,
24911 ++ 0x93, 0xac, 0xdb, 0x35, 0x37, 0x1f, 0xe9, 0x19,
24912 ++ 0x1d, 0xb4, 0x71, 0x97, 0xd6, 0x4e, 0x2c, 0x08,
24913 ++ 0xa5, 0x13, 0xf9, 0x0e, 0x7e, 0x78, 0x6e, 0x14,
24914 ++ 0xe0, 0xa9, 0xb9, 0x96, 0x4c, 0x80, 0x82, 0xba,
24915 ++ 0x17, 0xb3, 0x9d, 0x69, 0xb0, 0x84, 0x46, 0xff,
24916 ++ 0xf9, 0x52, 0x79, 0x94, 0x58, 0x3a, 0x62, 0x90,
24917 ++ 0x15, 0x35, 0x71, 0x10, 0x37, 0xed, 0xa1, 0x8e,
24918 ++ 0x53, 0x6e, 0xf4, 0x26, 0x57, 0x93, 0x15, 0x93,
24919 ++ 0xf6, 0x81, 0x2c, 0x5a, 0x10, 0xda, 0x92, 0xad,
24920 ++ 0x2f, 0xdb, 0x28, 0x31, 0x2d, 0x55, 0x04, 0xd2,
24921 ++ 0x06, 0x28, 0x8c, 0x1e, 0xdc, 0xea, 0x54, 0xac,
24922 ++ 0xff, 0xb7, 0x6c, 0x30, 0x15, 0xd4, 0xb4, 0x0d,
24923 ++ 0x00, 0x93, 0x57, 0xdd, 0xd2, 0x07, 0x07, 0x06,
24924 ++ 0xd9, 0x43, 0x9b, 0xcd, 0x3a, 0xf4, 0x7d, 0x4c,
24925 ++ 0x36, 0x5d, 0x23, 0xa2, 0xcc, 0x57, 0x40, 0x91,
24926 ++ 0xe9, 0x2c, 0x2f, 0x2c, 0xd5, 0x30, 0x9b, 0x17,
24927 ++ 0xb0, 0xc9, 0xf7, 0xa7, 0x2f, 0xd1, 0x93, 0x20,
24928 ++ 0x6b, 0xc6, 0xc1, 0xe4, 0x6f, 0xcb, 0xd1, 0xe7,
24929 ++ 0x09, 0x0f, 0x9e, 0xdc, 0xaa, 0x9f, 0x2f, 0xdf,
24930 ++ 0x56, 0x9f, 0xd4, 0x33, 0x04, 0xaf, 0xd3, 0x6c,
24931 ++ 0x58, 0x61, 0xf0, 0x30, 0xec, 0xf2, 0x7f, 0xf2,
24932 ++ 0x9c, 0xdf, 0x39, 0xbb, 0x6f, 0xa2, 0x8c, 0x7e,
24933 ++ 0xc4, 0x22, 0x51, 0x71, 0xc0, 0x4d, 0x14, 0x1a,
24934 ++ 0xc4, 0xcd, 0x04, 0xd9, 0x87, 0x08, 0x50, 0x05,
24935 ++ 0xcc, 0xaf, 0xf6, 0xf0, 0x8f, 0x92, 0x54, 0x58,
24936 ++ 0xc2, 0xc7, 0x09, 0x7a, 0x59, 0x02, 0x05, 0xe8,
24937 ++ 0xb0, 0x86, 0xd9, 0xbf, 0x7b, 0x35, 0x51, 0x4d,
24938 ++ 0xaf, 0x08, 0x97, 0x2c, 0x65, 0xda, 0x2a, 0x71,
24939 ++ 0x3a, 0xa8, 0x51, 0xcc, 0xf2, 0x73, 0x27, 0xc3,
24940 ++ 0xfd, 0x62, 0xcf, 0xe3, 0xb2, 0xca, 0xcb, 0xbe,
24941 ++ 0x1a, 0x0a, 0xa1, 0x34, 0x7b, 0x77, 0xc4, 0x62,
24942 ++ 0x68, 0x78, 0x5f, 0x94, 0x07, 0x04, 0x65, 0x16,
24943 ++ 0x4b, 0x61, 0xcb, 0xff, 0x75, 0x26, 0x50, 0x66,
24944 ++ 0x1f, 0x6e, 0x93, 0xf8, 0xc5, 0x51, 0xeb, 0xa4,
24945 ++ 0x4a, 0x48, 0x68, 0x6b, 0xe2, 0x5e, 0x44, 0xb2,
24946 ++ 0x50, 0x2c, 0x6c, 0xae, 0x79, 0x4e, 0x66, 0x35,
24947 ++ 0x81, 0x50, 0xac, 0xbc, 0x3f, 0xb1, 0x0c, 0xf3,
24948 ++ 0x05, 0x3c, 0x4a, 0xa3, 0x6c, 0x2a, 0x79, 0xb4,
24949 ++ 0xb7, 0xab, 0xca, 0xc7, 0x9b, 0x8e, 0xcd, 0x5f,
24950 ++ 0x11, 0x03, 0xcb, 0x30, 0xa3, 0xab, 0xda, 0xfe,
24951 ++ 0x64, 0xb9, 0xbb, 0xd8, 0x5e, 0x3a, 0x1a, 0x56,
24952 ++ 0xe5, 0x05, 0x48, 0x90, 0x1e, 0x61, 0x69, 0x1b,
24953 ++ 0x22, 0xe6, 0x1a, 0x3c, 0x75, 0xad, 0x1f, 0x37,
24954 ++ 0x28, 0xdc, 0xe4, 0x6d, 0xbd, 0x42, 0xdc, 0xd3,
24955 ++ 0xc8, 0xb6, 0x1c, 0x48, 0xfe, 0x94, 0x77, 0x7f,
24956 ++ 0xbd, 0x62, 0xac, 0xa3, 0x47, 0x27, 0xcf, 0x5f,
24957 ++ 0xd9, 0xdb, 0xaf, 0xec, 0xf7, 0x5e, 0xc1, 0xb0,
24958 ++ 0x9d, 0x01, 0x26, 0x99, 0x7e, 0x8f, 0x03, 0x70,
24959 ++ 0xb5, 0x42, 0xbe, 0x67, 0x28, 0x1b, 0x7c, 0xbd,
24960 ++ 0x61, 0x21, 0x97, 0xcc, 0x5c, 0xe1, 0x97, 0x8f,
24961 ++ 0x8d, 0xde, 0x2b, 0xaa, 0xa7, 0x71, 0x1d, 0x1e,
24962 ++ 0x02, 0x73, 0x70, 0x58, 0x32, 0x5b, 0x1d, 0x67,
24963 ++ 0x3d, 0xe0, 0x74, 0x4f, 0x03, 0xf2, 0x70, 0x51,
24964 ++ 0x79, 0xf1, 0x61, 0x70, 0x15, 0x74, 0x9d, 0x23,
24965 ++ 0x89, 0xde, 0xac, 0xfd, 0xde, 0xd0, 0x1f, 0xc3,
24966 ++ 0x87, 0x44, 0x35, 0x4b, 0xe5, 0xb0, 0x60, 0xc5,
24967 ++ 0x22, 0xe4, 0x9e, 0xca, 0xeb, 0xd5, 0x3a, 0x09,
24968 ++ 0x45, 0xa4, 0xdb, 0xfa, 0x3f, 0xeb, 0x1b, 0xc7,
24969 ++ 0xc8, 0x14, 0x99, 0x51, 0x92, 0x10, 0xed, 0xed,
24970 ++ 0x28, 0xe0, 0xa1, 0xf8, 0x26, 0xcf, 0xcd, 0xcb,
24971 ++ 0x63, 0xa1, 0x3b, 0xe3, 0xdf, 0x7e, 0xfe, 0xa6,
24972 ++ 0xf0, 0x81, 0x9a, 0xbf, 0x55, 0xde, 0x54, 0xd5,
24973 ++ 0x56, 0x60, 0x98, 0x10, 0x68, 0xf4, 0x38, 0x96,
24974 ++ 0x8e, 0x6f, 0x1d, 0x44, 0x7f, 0xd6, 0x2f, 0xfe,
24975 ++ 0x55, 0xfb, 0x0c, 0x7e, 0x67, 0xe2, 0x61, 0x44,
24976 ++ 0xed, 0xf2, 0x35, 0x30, 0x5d, 0xe9, 0xc7, 0xd6,
24977 ++ 0x6d, 0xe0, 0xa0, 0xed, 0xf3, 0xfc, 0xd8, 0x3e,
24978 ++ 0x0a, 0x7b, 0xcd, 0xaf, 0x65, 0x68, 0x18, 0xc0,
24979 ++ 0xec, 0x04, 0x1c, 0x74, 0x6d, 0xe2, 0x6e, 0x79,
24980 ++ 0xd4, 0x11, 0x2b, 0x62, 0xd5, 0x27, 0xad, 0x4f,
24981 ++ 0x01, 0x59, 0x73, 0xcc, 0x6a, 0x53, 0xfb, 0x2d,
24982 ++ 0xd5, 0x4e, 0x99, 0x21, 0x65, 0x4d, 0xf5, 0x82,
24983 ++ 0xf7, 0xd8, 0x42, 0xce, 0x6f, 0x3d, 0x36, 0x47,
24984 ++ 0xf1, 0x05, 0x16, 0xe8, 0x1b, 0x6a, 0x8f, 0x93,
24985 ++ 0xf2, 0x8f, 0x37, 0x40, 0x12, 0x28, 0xa3, 0xe6,
24986 ++ 0xb9, 0x17, 0x4a, 0x1f, 0xb1, 0xd1, 0x66, 0x69,
24987 ++ 0x86, 0xc4, 0xfc, 0x97, 0xae, 0x3f, 0x8f, 0x1e,
24988 ++ 0x2b, 0xdf, 0xcd, 0xf9, 0x3c
24989 ++};
24990 ++static const u8 enc_assoc011[] __initconst = {
24991 ++ 0xd6, 0x31, 0xda, 0x5d, 0x42, 0x5e, 0xd7
24992 ++};
24993 ++static const u8 enc_nonce011[] __initconst = {
24994 ++ 0xfd, 0x87, 0xd4, 0xd8, 0x62, 0xfd, 0xec, 0xaa
24995 ++};
24996 ++static const u8 enc_key011[] __initconst = {
24997 ++ 0x35, 0x4e, 0xb5, 0x70, 0x50, 0x42, 0x8a, 0x85,
24998 ++ 0xf2, 0xfb, 0xed, 0x7b, 0xd0, 0x9e, 0x97, 0xca,
24999 ++ 0xfa, 0x98, 0x66, 0x63, 0xee, 0x37, 0xcc, 0x52,
25000 ++ 0xfe, 0xd1, 0xdf, 0x95, 0x15, 0x34, 0x29, 0x38
25001 ++};
25002 ++
25003 ++static const u8 enc_input012[] __initconst = {
25004 ++ 0x74, 0xa6, 0x3e, 0xe4, 0xb1, 0xcb, 0xaf, 0xb0,
25005 ++ 0x40, 0xe5, 0x0f, 0x9e, 0xf1, 0xf2, 0x89, 0xb5,
25006 ++ 0x42, 0x34, 0x8a, 0xa1, 0x03, 0xb7, 0xe9, 0x57,
25007 ++ 0x46, 0xbe, 0x20, 0xe4, 0x6e, 0xb0, 0xeb, 0xff,
25008 ++ 0xea, 0x07, 0x7e, 0xef, 0xe2, 0x55, 0x9f, 0xe5,
25009 ++ 0x78, 0x3a, 0xb7, 0x83, 0xc2, 0x18, 0x40, 0x7b,
25010 ++ 0xeb, 0xcd, 0x81, 0xfb, 0x90, 0x12, 0x9e, 0x46,
25011 ++ 0xa9, 0xd6, 0x4a, 0xba, 0xb0, 0x62, 0xdb, 0x6b,
25012 ++ 0x99, 0xc4, 0xdb, 0x54, 0x4b, 0xb8, 0xa5, 0x71,
25013 ++ 0xcb, 0xcd, 0x63, 0x32, 0x55, 0xfb, 0x31, 0xf0,
25014 ++ 0x38, 0xf5, 0xbe, 0x78, 0xe4, 0x45, 0xce, 0x1b,
25015 ++ 0x6a, 0x5b, 0x0e, 0xf4, 0x16, 0xe4, 0xb1, 0x3d,
25016 ++ 0xf6, 0x63, 0x7b, 0xa7, 0x0c, 0xde, 0x6f, 0x8f,
25017 ++ 0x74, 0xdf, 0xe0, 0x1e, 0x9d, 0xce, 0x8f, 0x24,
25018 ++ 0xef, 0x23, 0x35, 0x33, 0x7b, 0x83, 0x34, 0x23,
25019 ++ 0x58, 0x74, 0x14, 0x77, 0x1f, 0xc2, 0x4f, 0x4e,
25020 ++ 0xc6, 0x89, 0xf9, 0x52, 0x09, 0x37, 0x64, 0x14,
25021 ++ 0xc4, 0x01, 0x6b, 0x9d, 0x77, 0xe8, 0x90, 0x5d,
25022 ++ 0xa8, 0x4a, 0x2a, 0xef, 0x5c, 0x7f, 0xeb, 0xbb,
25023 ++ 0xb2, 0xc6, 0x93, 0x99, 0x66, 0xdc, 0x7f, 0xd4,
25024 ++ 0x9e, 0x2a, 0xca, 0x8d, 0xdb, 0xe7, 0x20, 0xcf,
25025 ++ 0xe4, 0x73, 0xae, 0x49, 0x7d, 0x64, 0x0f, 0x0e,
25026 ++ 0x28, 0x46, 0xa9, 0xa8, 0x32, 0xe4, 0x0e, 0xf6,
25027 ++ 0x51, 0x53, 0xb8, 0x3c, 0xb1, 0xff, 0xa3, 0x33,
25028 ++ 0x41, 0x75, 0xff, 0xf1, 0x6f, 0xf1, 0xfb, 0xbb,
25029 ++ 0x83, 0x7f, 0x06, 0x9b, 0xe7, 0x1b, 0x0a, 0xe0,
25030 ++ 0x5c, 0x33, 0x60, 0x5b, 0xdb, 0x5b, 0xed, 0xfe,
25031 ++ 0xa5, 0x16, 0x19, 0x72, 0xa3, 0x64, 0x23, 0x00,
25032 ++ 0x02, 0xc7, 0xf3, 0x6a, 0x81, 0x3e, 0x44, 0x1d,
25033 ++ 0x79, 0x15, 0x5f, 0x9a, 0xde, 0xe2, 0xfd, 0x1b,
25034 ++ 0x73, 0xc1, 0xbc, 0x23, 0xba, 0x31, 0xd2, 0x50,
25035 ++ 0xd5, 0xad, 0x7f, 0x74, 0xa7, 0xc9, 0xf8, 0x3e,
25036 ++ 0x2b, 0x26, 0x10, 0xf6, 0x03, 0x36, 0x74, 0xe4,
25037 ++ 0x0e, 0x6a, 0x72, 0xb7, 0x73, 0x0a, 0x42, 0x28,
25038 ++ 0xc2, 0xad, 0x5e, 0x03, 0xbe, 0xb8, 0x0b, 0xa8,
25039 ++ 0x5b, 0xd4, 0xb8, 0xba, 0x52, 0x89, 0xb1, 0x9b,
25040 ++ 0xc1, 0xc3, 0x65, 0x87, 0xed, 0xa5, 0xf4, 0x86,
25041 ++ 0xfd, 0x41, 0x80, 0x91, 0x27, 0x59, 0x53, 0x67,
25042 ++ 0x15, 0x78, 0x54, 0x8b, 0x2d, 0x3d, 0xc7, 0xff,
25043 ++ 0x02, 0x92, 0x07, 0x5f, 0x7a, 0x4b, 0x60, 0x59,
25044 ++ 0x3c, 0x6f, 0x5c, 0xd8, 0xec, 0x95, 0xd2, 0xfe,
25045 ++ 0xa0, 0x3b, 0xd8, 0x3f, 0xd1, 0x69, 0xa6, 0xd6,
25046 ++ 0x41, 0xb2, 0xf4, 0x4d, 0x12, 0xf4, 0x58, 0x3e,
25047 ++ 0x66, 0x64, 0x80, 0x31, 0x9b, 0xa8, 0x4c, 0x8b,
25048 ++ 0x07, 0xb2, 0xec, 0x66, 0x94, 0x66, 0x47, 0x50,
25049 ++ 0x50, 0x5f, 0x18, 0x0b, 0x0e, 0xd6, 0xc0, 0x39,
25050 ++ 0x21, 0x13, 0x9e, 0x33, 0xbc, 0x79, 0x36, 0x02,
25051 ++ 0x96, 0x70, 0xf0, 0x48, 0x67, 0x2f, 0x26, 0xe9,
25052 ++ 0x6d, 0x10, 0xbb, 0xd6, 0x3f, 0xd1, 0x64, 0x7a,
25053 ++ 0x2e, 0xbe, 0x0c, 0x61, 0xf0, 0x75, 0x42, 0x38,
25054 ++ 0x23, 0xb1, 0x9e, 0x9f, 0x7c, 0x67, 0x66, 0xd9,
25055 ++ 0x58, 0x9a, 0xf1, 0xbb, 0x41, 0x2a, 0x8d, 0x65,
25056 ++ 0x84, 0x94, 0xfc, 0xdc, 0x6a, 0x50, 0x64, 0xdb,
25057 ++ 0x56, 0x33, 0x76, 0x00, 0x10, 0xed, 0xbe, 0xd2,
25058 ++ 0x12, 0xf6, 0xf6, 0x1b, 0xa2, 0x16, 0xde, 0xae,
25059 ++ 0x31, 0x95, 0xdd, 0xb1, 0x08, 0x7e, 0x4e, 0xee,
25060 ++ 0xe7, 0xf9, 0xa5, 0xfb, 0x5b, 0x61, 0x43, 0x00,
25061 ++ 0x40, 0xf6, 0x7e, 0x02, 0x04, 0x32, 0x4e, 0x0c,
25062 ++ 0xe2, 0x66, 0x0d, 0xd7, 0x07, 0x98, 0x0e, 0xf8,
25063 ++ 0x72, 0x34, 0x6d, 0x95, 0x86, 0xd7, 0xcb, 0x31,
25064 ++ 0x54, 0x47, 0xd0, 0x38, 0x29, 0x9c, 0x5a, 0x68,
25065 ++ 0xd4, 0x87, 0x76, 0xc9, 0xe7, 0x7e, 0xe3, 0xf4,
25066 ++ 0x81, 0x6d, 0x18, 0xcb, 0xc9, 0x05, 0xaf, 0xa0,
25067 ++ 0xfb, 0x66, 0xf7, 0xf1, 0x1c, 0xc6, 0x14, 0x11,
25068 ++ 0x4f, 0x2b, 0x79, 0x42, 0x8b, 0xbc, 0xac, 0xe7,
25069 ++ 0x6c, 0xfe, 0x0f, 0x58, 0xe7, 0x7c, 0x78, 0x39,
25070 ++ 0x30, 0xb0, 0x66, 0x2c, 0x9b, 0x6d, 0x3a, 0xe1,
25071 ++ 0xcf, 0xc9, 0xa4, 0x0e, 0x6d, 0x6d, 0x8a, 0xa1,
25072 ++ 0x3a, 0xe7, 0x28, 0xd4, 0x78, 0x4c, 0xa6, 0xa2,
25073 ++ 0x2a, 0xa6, 0x03, 0x30, 0xd7, 0xa8, 0x25, 0x66,
25074 ++ 0x87, 0x2f, 0x69, 0x5c, 0x4e, 0xdd, 0xa5, 0x49,
25075 ++ 0x5d, 0x37, 0x4a, 0x59, 0xc4, 0xaf, 0x1f, 0xa2,
25076 ++ 0xe4, 0xf8, 0xa6, 0x12, 0x97, 0xd5, 0x79, 0xf5,
25077 ++ 0xe2, 0x4a, 0x2b, 0x5f, 0x61, 0xe4, 0x9e, 0xe3,
25078 ++ 0xee, 0xb8, 0xa7, 0x5b, 0x2f, 0xf4, 0x9e, 0x6c,
25079 ++ 0xfb, 0xd1, 0xc6, 0x56, 0x77, 0xba, 0x75, 0xaa,
25080 ++ 0x3d, 0x1a, 0xa8, 0x0b, 0xb3, 0x68, 0x24, 0x00,
25081 ++ 0x10, 0x7f, 0xfd, 0xd7, 0xa1, 0x8d, 0x83, 0x54,
25082 ++ 0x4f, 0x1f, 0xd8, 0x2a, 0xbe, 0x8a, 0x0c, 0x87,
25083 ++ 0xab, 0xa2, 0xde, 0xc3, 0x39, 0xbf, 0x09, 0x03,
25084 ++ 0xa5, 0xf3, 0x05, 0x28, 0xe1, 0xe1, 0xee, 0x39,
25085 ++ 0x70, 0x9c, 0xd8, 0x81, 0x12, 0x1e, 0x02, 0x40,
25086 ++ 0xd2, 0x6e, 0xf0, 0xeb, 0x1b, 0x3d, 0x22, 0xc6,
25087 ++ 0xe5, 0xe3, 0xb4, 0x5a, 0x98, 0xbb, 0xf0, 0x22,
25088 ++ 0x28, 0x8d, 0xe5, 0xd3, 0x16, 0x48, 0x24, 0xa5,
25089 ++ 0xe6, 0x66, 0x0c, 0xf9, 0x08, 0xf9, 0x7e, 0x1e,
25090 ++ 0xe1, 0x28, 0x26, 0x22, 0xc7, 0xc7, 0x0a, 0x32,
25091 ++ 0x47, 0xfa, 0xa3, 0xbe, 0x3c, 0xc4, 0xc5, 0x53,
25092 ++ 0x0a, 0xd5, 0x94, 0x4a, 0xd7, 0x93, 0xd8, 0x42,
25093 ++ 0x99, 0xb9, 0x0a, 0xdb, 0x56, 0xf7, 0xb9, 0x1c,
25094 ++ 0x53, 0x4f, 0xfa, 0xd3, 0x74, 0xad, 0xd9, 0x68,
25095 ++ 0xf1, 0x1b, 0xdf, 0x61, 0xc6, 0x5e, 0xa8, 0x48,
25096 ++ 0xfc, 0xd4, 0x4a, 0x4c, 0x3c, 0x32, 0xf7, 0x1c,
25097 ++ 0x96, 0x21, 0x9b, 0xf9, 0xa3, 0xcc, 0x5a, 0xce,
25098 ++ 0xd5, 0xd7, 0x08, 0x24, 0xf6, 0x1c, 0xfd, 0xdd,
25099 ++ 0x38, 0xc2, 0x32, 0xe9, 0xb8, 0xe7, 0xb6, 0xfa,
25100 ++ 0x9d, 0x45, 0x13, 0x2c, 0x83, 0xfd, 0x4a, 0x69,
25101 ++ 0x82, 0xcd, 0xdc, 0xb3, 0x76, 0x0c, 0x9e, 0xd8,
25102 ++ 0xf4, 0x1b, 0x45, 0x15, 0xb4, 0x97, 0xe7, 0x58,
25103 ++ 0x34, 0xe2, 0x03, 0x29, 0x5a, 0xbf, 0xb6, 0xe0,
25104 ++ 0x5d, 0x13, 0xd9, 0x2b, 0xb4, 0x80, 0xb2, 0x45,
25105 ++ 0x81, 0x6a, 0x2e, 0x6c, 0x89, 0x7d, 0xee, 0xbb,
25106 ++ 0x52, 0xdd, 0x1f, 0x18, 0xe7, 0x13, 0x6b, 0x33,
25107 ++ 0x0e, 0xea, 0x36, 0x92, 0x77, 0x7b, 0x6d, 0x9c,
25108 ++ 0x5a, 0x5f, 0x45, 0x7b, 0x7b, 0x35, 0x62, 0x23,
25109 ++ 0xd1, 0xbf, 0x0f, 0xd0, 0x08, 0x1b, 0x2b, 0x80,
25110 ++ 0x6b, 0x7e, 0xf1, 0x21, 0x47, 0xb0, 0x57, 0xd1,
25111 ++ 0x98, 0x72, 0x90, 0x34, 0x1c, 0x20, 0x04, 0xff,
25112 ++ 0x3d, 0x5c, 0xee, 0x0e, 0x57, 0x5f, 0x6f, 0x24,
25113 ++ 0x4e, 0x3c, 0xea, 0xfc, 0xa5, 0xa9, 0x83, 0xc9,
25114 ++ 0x61, 0xb4, 0x51, 0x24, 0xf8, 0x27, 0x5e, 0x46,
25115 ++ 0x8c, 0xb1, 0x53, 0x02, 0x96, 0x35, 0xba, 0xb8,
25116 ++ 0x4c, 0x71, 0xd3, 0x15, 0x59, 0x35, 0x22, 0x20,
25117 ++ 0xad, 0x03, 0x9f, 0x66, 0x44, 0x3b, 0x9c, 0x35,
25118 ++ 0x37, 0x1f, 0x9b, 0xbb, 0xf3, 0xdb, 0x35, 0x63,
25119 ++ 0x30, 0x64, 0xaa, 0xa2, 0x06, 0xa8, 0x5d, 0xbb,
25120 ++ 0xe1, 0x9f, 0x70, 0xec, 0x82, 0x11, 0x06, 0x36,
25121 ++ 0xec, 0x8b, 0x69, 0x66, 0x24, 0x44, 0xc9, 0x4a,
25122 ++ 0x57, 0xbb, 0x9b, 0x78, 0x13, 0xce, 0x9c, 0x0c,
25123 ++ 0xba, 0x92, 0x93, 0x63, 0xb8, 0xe2, 0x95, 0x0f,
25124 ++ 0x0f, 0x16, 0x39, 0x52, 0xfd, 0x3a, 0x6d, 0x02,
25125 ++ 0x4b, 0xdf, 0x13, 0xd3, 0x2a, 0x22, 0xb4, 0x03,
25126 ++ 0x7c, 0x54, 0x49, 0x96, 0x68, 0x54, 0x10, 0xfa,
25127 ++ 0xef, 0xaa, 0x6c, 0xe8, 0x22, 0xdc, 0x71, 0x16,
25128 ++ 0x13, 0x1a, 0xf6, 0x28, 0xe5, 0x6d, 0x77, 0x3d,
25129 ++ 0xcd, 0x30, 0x63, 0xb1, 0x70, 0x52, 0xa1, 0xc5,
25130 ++ 0x94, 0x5f, 0xcf, 0xe8, 0xb8, 0x26, 0x98, 0xf7,
25131 ++ 0x06, 0xa0, 0x0a, 0x70, 0xfa, 0x03, 0x80, 0xac,
25132 ++ 0xc1, 0xec, 0xd6, 0x4c, 0x54, 0xd7, 0xfe, 0x47,
25133 ++ 0xb6, 0x88, 0x4a, 0xf7, 0x71, 0x24, 0xee, 0xf3,
25134 ++ 0xd2, 0xc2, 0x4a, 0x7f, 0xfe, 0x61, 0xc7, 0x35,
25135 ++ 0xc9, 0x37, 0x67, 0xcb, 0x24, 0x35, 0xda, 0x7e,
25136 ++ 0xca, 0x5f, 0xf3, 0x8d, 0xd4, 0x13, 0x8e, 0xd6,
25137 ++ 0xcb, 0x4d, 0x53, 0x8f, 0x53, 0x1f, 0xc0, 0x74,
25138 ++ 0xf7, 0x53, 0xb9, 0x5e, 0x23, 0x37, 0xba, 0x6e,
25139 ++ 0xe3, 0x9d, 0x07, 0x55, 0x25, 0x7b, 0xe6, 0x2a,
25140 ++ 0x64, 0xd1, 0x32, 0xdd, 0x54, 0x1b, 0x4b, 0xc0,
25141 ++ 0xe1, 0xd7, 0x69, 0x58, 0xf8, 0x93, 0x29, 0xc4,
25142 ++ 0xdd, 0x23, 0x2f, 0xa5, 0xfc, 0x9d, 0x7e, 0xf8,
25143 ++ 0xd4, 0x90, 0xcd, 0x82, 0x55, 0xdc, 0x16, 0x16,
25144 ++ 0x9f, 0x07, 0x52, 0x9b, 0x9d, 0x25, 0xed, 0x32,
25145 ++ 0xc5, 0x7b, 0xdf, 0xf6, 0x83, 0x46, 0x3d, 0x65,
25146 ++ 0xb7, 0xef, 0x87, 0x7a, 0x12, 0x69, 0x8f, 0x06,
25147 ++ 0x7c, 0x51, 0x15, 0x4a, 0x08, 0xe8, 0xac, 0x9a,
25148 ++ 0x0c, 0x24, 0xa7, 0x27, 0xd8, 0x46, 0x2f, 0xe7,
25149 ++ 0x01, 0x0e, 0x1c, 0xc6, 0x91, 0xb0, 0x6e, 0x85,
25150 ++ 0x65, 0xf0, 0x29, 0x0d, 0x2e, 0x6b, 0x3b, 0xfb,
25151 ++ 0x4b, 0xdf, 0xe4, 0x80, 0x93, 0x03, 0x66, 0x46,
25152 ++ 0x3e, 0x8a, 0x6e, 0xf3, 0x5e, 0x4d, 0x62, 0x0e,
25153 ++ 0x49, 0x05, 0xaf, 0xd4, 0xf8, 0x21, 0x20, 0x61,
25154 ++ 0x1d, 0x39, 0x17, 0xf4, 0x61, 0x47, 0x95, 0xfb,
25155 ++ 0x15, 0x2e, 0xb3, 0x4f, 0xd0, 0x5d, 0xf5, 0x7d,
25156 ++ 0x40, 0xda, 0x90, 0x3c, 0x6b, 0xcb, 0x17, 0x00,
25157 ++ 0x13, 0x3b, 0x64, 0x34, 0x1b, 0xf0, 0xf2, 0xe5,
25158 ++ 0x3b, 0xb2, 0xc7, 0xd3, 0x5f, 0x3a, 0x44, 0xa6,
25159 ++ 0x9b, 0xb7, 0x78, 0x0e, 0x42, 0x5d, 0x4c, 0xc1,
25160 ++ 0xe9, 0xd2, 0xcb, 0xb7, 0x78, 0xd1, 0xfe, 0x9a,
25161 ++ 0xb5, 0x07, 0xe9, 0xe0, 0xbe, 0xe2, 0x8a, 0xa7,
25162 ++ 0x01, 0x83, 0x00, 0x8c, 0x5c, 0x08, 0xe6, 0x63,
25163 ++ 0x12, 0x92, 0xb7, 0xb7, 0xa6, 0x19, 0x7d, 0x38,
25164 ++ 0x13, 0x38, 0x92, 0x87, 0x24, 0xf9, 0x48, 0xb3,
25165 ++ 0x5e, 0x87, 0x6a, 0x40, 0x39, 0x5c, 0x3f, 0xed,
25166 ++ 0x8f, 0xee, 0xdb, 0x15, 0x82, 0x06, 0xda, 0x49,
25167 ++ 0x21, 0x2b, 0xb5, 0xbf, 0x32, 0x7c, 0x9f, 0x42,
25168 ++ 0x28, 0x63, 0xcf, 0xaf, 0x1e, 0xf8, 0xc6, 0xa0,
25169 ++ 0xd1, 0x02, 0x43, 0x57, 0x62, 0xec, 0x9b, 0x0f,
25170 ++ 0x01, 0x9e, 0x71, 0xd8, 0x87, 0x9d, 0x01, 0xc1,
25171 ++ 0x58, 0x77, 0xd9, 0xaf, 0xb1, 0x10, 0x7e, 0xdd,
25172 ++ 0xa6, 0x50, 0x96, 0xe5, 0xf0, 0x72, 0x00, 0x6d,
25173 ++ 0x4b, 0xf8, 0x2a, 0x8f, 0x19, 0xf3, 0x22, 0x88,
25174 ++ 0x11, 0x4a, 0x8b, 0x7c, 0xfd, 0xb7, 0xed, 0xe1,
25175 ++ 0xf6, 0x40, 0x39, 0xe0, 0xe9, 0xf6, 0x3d, 0x25,
25176 ++ 0xe6, 0x74, 0x3c, 0x58, 0x57, 0x7f, 0xe1, 0x22,
25177 ++ 0x96, 0x47, 0x31, 0x91, 0xba, 0x70, 0x85, 0x28,
25178 ++ 0x6b, 0x9f, 0x6e, 0x25, 0xac, 0x23, 0x66, 0x2f,
25179 ++ 0x29, 0x88, 0x28, 0xce, 0x8c, 0x5c, 0x88, 0x53,
25180 ++ 0xd1, 0x3b, 0xcc, 0x6a, 0x51, 0xb2, 0xe1, 0x28,
25181 ++ 0x3f, 0x91, 0xb4, 0x0d, 0x00, 0x3a, 0xe3, 0xf8,
25182 ++ 0xc3, 0x8f, 0xd7, 0x96, 0x62, 0x0e, 0x2e, 0xfc,
25183 ++ 0xc8, 0x6c, 0x77, 0xa6, 0x1d, 0x22, 0xc1, 0xb8,
25184 ++ 0xe6, 0x61, 0xd7, 0x67, 0x36, 0x13, 0x7b, 0xbb,
25185 ++ 0x9b, 0x59, 0x09, 0xa6, 0xdf, 0xf7, 0x6b, 0xa3,
25186 ++ 0x40, 0x1a, 0xf5, 0x4f, 0xb4, 0xda, 0xd3, 0xf3,
25187 ++ 0x81, 0x93, 0xc6, 0x18, 0xd9, 0x26, 0xee, 0xac,
25188 ++ 0xf0, 0xaa, 0xdf, 0xc5, 0x9c, 0xca, 0xc2, 0xa2,
25189 ++ 0xcc, 0x7b, 0x5c, 0x24, 0xb0, 0xbc, 0xd0, 0x6a,
25190 ++ 0x4d, 0x89, 0x09, 0xb8, 0x07, 0xfe, 0x87, 0xad,
25191 ++ 0x0a, 0xea, 0xb8, 0x42, 0xf9, 0x5e, 0xb3, 0x3e,
25192 ++ 0x36, 0x4c, 0xaf, 0x75, 0x9e, 0x1c, 0xeb, 0xbd,
25193 ++ 0xbc, 0xbb, 0x80, 0x40, 0xa7, 0x3a, 0x30, 0xbf,
25194 ++ 0xa8, 0x44, 0xf4, 0xeb, 0x38, 0xad, 0x29, 0xba,
25195 ++ 0x23, 0xed, 0x41, 0x0c, 0xea, 0xd2, 0xbb, 0x41,
25196 ++ 0x18, 0xd6, 0xb9, 0xba, 0x65, 0x2b, 0xa3, 0x91,
25197 ++ 0x6d, 0x1f, 0xa9, 0xf4, 0xd1, 0x25, 0x8d, 0x4d,
25198 ++ 0x38, 0xff, 0x64, 0xa0, 0xec, 0xde, 0xa6, 0xb6,
25199 ++ 0x79, 0xab, 0x8e, 0x33, 0x6c, 0x47, 0xde, 0xaf,
25200 ++ 0x94, 0xa4, 0xa5, 0x86, 0x77, 0x55, 0x09, 0x92,
25201 ++ 0x81, 0x31, 0x76, 0xc7, 0x34, 0x22, 0x89, 0x8e,
25202 ++ 0x3d, 0x26, 0x26, 0xd7, 0xfc, 0x1e, 0x16, 0x72,
25203 ++ 0x13, 0x33, 0x63, 0xd5, 0x22, 0xbe, 0xb8, 0x04,
25204 ++ 0x34, 0x84, 0x41, 0xbb, 0x80, 0xd0, 0x9f, 0x46,
25205 ++ 0x48, 0x07, 0xa7, 0xfc, 0x2b, 0x3a, 0x75, 0x55,
25206 ++ 0x8c, 0xc7, 0x6a, 0xbd, 0x7e, 0x46, 0x08, 0x84,
25207 ++ 0x0f, 0xd5, 0x74, 0xc0, 0x82, 0x8e, 0xaa, 0x61,
25208 ++ 0x05, 0x01, 0xb2, 0x47, 0x6e, 0x20, 0x6a, 0x2d,
25209 ++ 0x58, 0x70, 0x48, 0x32, 0xa7, 0x37, 0xd2, 0xb8,
25210 ++ 0x82, 0x1a, 0x51, 0xb9, 0x61, 0xdd, 0xfd, 0x9d,
25211 ++ 0x6b, 0x0e, 0x18, 0x97, 0xf8, 0x45, 0x5f, 0x87,
25212 ++ 0x10, 0xcf, 0x34, 0x72, 0x45, 0x26, 0x49, 0x70,
25213 ++ 0xe7, 0xa3, 0x78, 0xe0, 0x52, 0x89, 0x84, 0x94,
25214 ++ 0x83, 0x82, 0xc2, 0x69, 0x8f, 0xe3, 0xe1, 0x3f,
25215 ++ 0x60, 0x74, 0x88, 0xc4, 0xf7, 0x75, 0x2c, 0xfb,
25216 ++ 0xbd, 0xb6, 0xc4, 0x7e, 0x10, 0x0a, 0x6c, 0x90,
25217 ++ 0x04, 0x9e, 0xc3, 0x3f, 0x59, 0x7c, 0xce, 0x31,
25218 ++ 0x18, 0x60, 0x57, 0x73, 0x46, 0x94, 0x7d, 0x06,
25219 ++ 0xa0, 0x6d, 0x44, 0xec, 0xa2, 0x0a, 0x9e, 0x05,
25220 ++ 0x15, 0xef, 0xca, 0x5c, 0xbf, 0x00, 0xeb, 0xf7,
25221 ++ 0x3d, 0x32, 0xd4, 0xa5, 0xef, 0x49, 0x89, 0x5e,
25222 ++ 0x46, 0xb0, 0xa6, 0x63, 0x5b, 0x8a, 0x73, 0xae,
25223 ++ 0x6f, 0xd5, 0x9d, 0xf8, 0x4f, 0x40, 0xb5, 0xb2,
25224 ++ 0x6e, 0xd3, 0xb6, 0x01, 0xa9, 0x26, 0xa2, 0x21,
25225 ++ 0xcf, 0x33, 0x7a, 0x3a, 0xa4, 0x23, 0x13, 0xb0,
25226 ++ 0x69, 0x6a, 0xee, 0xce, 0xd8, 0x9d, 0x01, 0x1d,
25227 ++ 0x50, 0xc1, 0x30, 0x6c, 0xb1, 0xcd, 0xa0, 0xf0,
25228 ++ 0xf0, 0xa2, 0x64, 0x6f, 0xbb, 0xbf, 0x5e, 0xe6,
25229 ++ 0xab, 0x87, 0xb4, 0x0f, 0x4f, 0x15, 0xaf, 0xb5,
25230 ++ 0x25, 0xa1, 0xb2, 0xd0, 0x80, 0x2c, 0xfb, 0xf9,
25231 ++ 0xfe, 0xd2, 0x33, 0xbb, 0x76, 0xfe, 0x7c, 0xa8,
25232 ++ 0x66, 0xf7, 0xe7, 0x85, 0x9f, 0x1f, 0x85, 0x57,
25233 ++ 0x88, 0xe1, 0xe9, 0x63, 0xe4, 0xd8, 0x1c, 0xa1,
25234 ++ 0xfb, 0xda, 0x44, 0x05, 0x2e, 0x1d, 0x3a, 0x1c,
25235 ++ 0xff, 0xc8, 0x3b, 0xc0, 0xfe, 0xda, 0x22, 0x0b,
25236 ++ 0x43, 0xd6, 0x88, 0x39, 0x4c, 0x4a, 0xa6, 0x69,
25237 ++ 0x18, 0x93, 0x42, 0x4e, 0xb5, 0xcc, 0x66, 0x0d,
25238 ++ 0x09, 0xf8, 0x1e, 0x7c, 0xd3, 0x3c, 0x99, 0x0d,
25239 ++ 0x50, 0x1d, 0x62, 0xe9, 0x57, 0x06, 0xbf, 0x19,
25240 ++ 0x88, 0xdd, 0xad, 0x7b, 0x4f, 0xf9, 0xc7, 0x82,
25241 ++ 0x6d, 0x8d, 0xc8, 0xc4, 0xc5, 0x78, 0x17, 0x20,
25242 ++ 0x15, 0xc5, 0x52, 0x41, 0xcf, 0x5b, 0xd6, 0x7f,
25243 ++ 0x94, 0x02, 0x41, 0xe0, 0x40, 0x22, 0x03, 0x5e,
25244 ++ 0xd1, 0x53, 0xd4, 0x86, 0xd3, 0x2c, 0x9f, 0x0f,
25245 ++ 0x96, 0xe3, 0x6b, 0x9a, 0x76, 0x32, 0x06, 0x47,
25246 ++ 0x4b, 0x11, 0xb3, 0xdd, 0x03, 0x65, 0xbd, 0x9b,
25247 ++ 0x01, 0xda, 0x9c, 0xb9, 0x7e, 0x3f, 0x6a, 0xc4,
25248 ++ 0x7b, 0xea, 0xd4, 0x3c, 0xb9, 0xfb, 0x5c, 0x6b,
25249 ++ 0x64, 0x33, 0x52, 0xba, 0x64, 0x78, 0x8f, 0xa4,
25250 ++ 0xaf, 0x7a, 0x61, 0x8d, 0xbc, 0xc5, 0x73, 0xe9,
25251 ++ 0x6b, 0x58, 0x97, 0x4b, 0xbf, 0x63, 0x22, 0xd3,
25252 ++ 0x37, 0x02, 0x54, 0xc5, 0xb9, 0x16, 0x4a, 0xf0,
25253 ++ 0x19, 0xd8, 0x94, 0x57, 0xb8, 0x8a, 0xb3, 0x16,
25254 ++ 0x3b, 0xd0, 0x84, 0x8e, 0x67, 0xa6, 0xa3, 0x7d,
25255 ++ 0x78, 0xec, 0x00
25256 ++};
25257 ++static const u8 enc_output012[] __initconst = {
25258 ++ 0x52, 0x34, 0xb3, 0x65, 0x3b, 0xb7, 0xe5, 0xd3,
25259 ++ 0xab, 0x49, 0x17, 0x60, 0xd2, 0x52, 0x56, 0xdf,
25260 ++ 0xdf, 0x34, 0x56, 0x82, 0xe2, 0xbe, 0xe5, 0xe1,
25261 ++ 0x28, 0xd1, 0x4e, 0x5f, 0x4f, 0x01, 0x7d, 0x3f,
25262 ++ 0x99, 0x6b, 0x30, 0x6e, 0x1a, 0x7c, 0x4c, 0x8e,
25263 ++ 0x62, 0x81, 0xae, 0x86, 0x3f, 0x6b, 0xd0, 0xb5,
25264 ++ 0xa9, 0xcf, 0x50, 0xf1, 0x02, 0x12, 0xa0, 0x0b,
25265 ++ 0x24, 0xe9, 0xe6, 0x72, 0x89, 0x2c, 0x52, 0x1b,
25266 ++ 0x34, 0x38, 0xf8, 0x75, 0x5f, 0xa0, 0x74, 0xe2,
25267 ++ 0x99, 0xdd, 0xa6, 0x4b, 0x14, 0x50, 0x4e, 0xf1,
25268 ++ 0xbe, 0xd6, 0x9e, 0xdb, 0xb2, 0x24, 0x27, 0x74,
25269 ++ 0x12, 0x4a, 0x78, 0x78, 0x17, 0xa5, 0x58, 0x8e,
25270 ++ 0x2f, 0xf9, 0xf4, 0x8d, 0xee, 0x03, 0x88, 0xae,
25271 ++ 0xb8, 0x29, 0xa1, 0x2f, 0x4b, 0xee, 0x92, 0xbd,
25272 ++ 0x87, 0xb3, 0xce, 0x34, 0x21, 0x57, 0x46, 0x04,
25273 ++ 0x49, 0x0c, 0x80, 0xf2, 0x01, 0x13, 0xa1, 0x55,
25274 ++ 0xb3, 0xff, 0x44, 0x30, 0x3c, 0x1c, 0xd0, 0xef,
25275 ++ 0xbc, 0x18, 0x74, 0x26, 0xad, 0x41, 0x5b, 0x5b,
25276 ++ 0x3e, 0x9a, 0x7a, 0x46, 0x4f, 0x16, 0xd6, 0x74,
25277 ++ 0x5a, 0xb7, 0x3a, 0x28, 0x31, 0xd8, 0xae, 0x26,
25278 ++ 0xac, 0x50, 0x53, 0x86, 0xf2, 0x56, 0xd7, 0x3f,
25279 ++ 0x29, 0xbc, 0x45, 0x68, 0x8e, 0xcb, 0x98, 0x64,
25280 ++ 0xdd, 0xc9, 0xba, 0xb8, 0x4b, 0x7b, 0x82, 0xdd,
25281 ++ 0x14, 0xa7, 0xcb, 0x71, 0x72, 0x00, 0x5c, 0xad,
25282 ++ 0x7b, 0x6a, 0x89, 0xa4, 0x3d, 0xbf, 0xb5, 0x4b,
25283 ++ 0x3e, 0x7c, 0x5a, 0xcf, 0xb8, 0xa1, 0xc5, 0x6e,
25284 ++ 0xc8, 0xb6, 0x31, 0x57, 0x7b, 0xdf, 0xa5, 0x7e,
25285 ++ 0xb1, 0xd6, 0x42, 0x2a, 0x31, 0x36, 0xd1, 0xd0,
25286 ++ 0x3f, 0x7a, 0xe5, 0x94, 0xd6, 0x36, 0xa0, 0x6f,
25287 ++ 0xb7, 0x40, 0x7d, 0x37, 0xc6, 0x55, 0x7c, 0x50,
25288 ++ 0x40, 0x6d, 0x29, 0x89, 0xe3, 0x5a, 0xae, 0x97,
25289 ++ 0xe7, 0x44, 0x49, 0x6e, 0xbd, 0x81, 0x3d, 0x03,
25290 ++ 0x93, 0x06, 0x12, 0x06, 0xe2, 0x41, 0x12, 0x4a,
25291 ++ 0xf1, 0x6a, 0xa4, 0x58, 0xa2, 0xfb, 0xd2, 0x15,
25292 ++ 0xba, 0xc9, 0x79, 0xc9, 0xce, 0x5e, 0x13, 0xbb,
25293 ++ 0xf1, 0x09, 0x04, 0xcc, 0xfd, 0xe8, 0x51, 0x34,
25294 ++ 0x6a, 0xe8, 0x61, 0x88, 0xda, 0xed, 0x01, 0x47,
25295 ++ 0x84, 0xf5, 0x73, 0x25, 0xf9, 0x1c, 0x42, 0x86,
25296 ++ 0x07, 0xf3, 0x5b, 0x1a, 0x01, 0xb3, 0xeb, 0x24,
25297 ++ 0x32, 0x8d, 0xf6, 0xed, 0x7c, 0x4b, 0xeb, 0x3c,
25298 ++ 0x36, 0x42, 0x28, 0xdf, 0xdf, 0xb6, 0xbe, 0xd9,
25299 ++ 0x8c, 0x52, 0xd3, 0x2b, 0x08, 0x90, 0x8c, 0xe7,
25300 ++ 0x98, 0x31, 0xe2, 0x32, 0x8e, 0xfc, 0x11, 0x48,
25301 ++ 0x00, 0xa8, 0x6a, 0x42, 0x4a, 0x02, 0xc6, 0x4b,
25302 ++ 0x09, 0xf1, 0xe3, 0x49, 0xf3, 0x45, 0x1f, 0x0e,
25303 ++ 0xbc, 0x56, 0xe2, 0xe4, 0xdf, 0xfb, 0xeb, 0x61,
25304 ++ 0xfa, 0x24, 0xc1, 0x63, 0x75, 0xbb, 0x47, 0x75,
25305 ++ 0xaf, 0xe1, 0x53, 0x16, 0x96, 0x21, 0x85, 0x26,
25306 ++ 0x11, 0xb3, 0x76, 0xe3, 0x23, 0xa1, 0x6b, 0x74,
25307 ++ 0x37, 0xd0, 0xde, 0x06, 0x90, 0x71, 0x5d, 0x43,
25308 ++ 0x88, 0x9b, 0x00, 0x54, 0xa6, 0x75, 0x2f, 0xa1,
25309 ++ 0xc2, 0x0b, 0x73, 0x20, 0x1d, 0xb6, 0x21, 0x79,
25310 ++ 0x57, 0x3f, 0xfa, 0x09, 0xbe, 0x8a, 0x33, 0xc3,
25311 ++ 0x52, 0xf0, 0x1d, 0x82, 0x31, 0xd1, 0x55, 0xb5,
25312 ++ 0x6c, 0x99, 0x25, 0xcf, 0x5c, 0x32, 0xce, 0xe9,
25313 ++ 0x0d, 0xfa, 0x69, 0x2c, 0xd5, 0x0d, 0xc5, 0x6d,
25314 ++ 0x86, 0xd0, 0x0c, 0x3b, 0x06, 0x50, 0x79, 0xe8,
25315 ++ 0xc3, 0xae, 0x04, 0xe6, 0xcd, 0x51, 0xe4, 0x26,
25316 ++ 0x9b, 0x4f, 0x7e, 0xa6, 0x0f, 0xab, 0xd8, 0xe5,
25317 ++ 0xde, 0xa9, 0x00, 0x95, 0xbe, 0xa3, 0x9d, 0x5d,
25318 ++ 0xb2, 0x09, 0x70, 0x18, 0x1c, 0xf0, 0xac, 0x29,
25319 ++ 0x23, 0x02, 0x29, 0x28, 0xd2, 0x74, 0x35, 0x57,
25320 ++ 0x62, 0x0f, 0x24, 0xea, 0x5e, 0x33, 0xc2, 0x92,
25321 ++ 0xf3, 0x78, 0x4d, 0x30, 0x1e, 0xa1, 0x99, 0xa9,
25322 ++ 0x82, 0xb0, 0x42, 0x31, 0x8d, 0xad, 0x8a, 0xbc,
25323 ++ 0xfc, 0xd4, 0x57, 0x47, 0x3e, 0xb4, 0x50, 0xdd,
25324 ++ 0x6e, 0x2c, 0x80, 0x4d, 0x22, 0xf1, 0xfb, 0x57,
25325 ++ 0xc4, 0xdd, 0x17, 0xe1, 0x8a, 0x36, 0x4a, 0xb3,
25326 ++ 0x37, 0xca, 0xc9, 0x4e, 0xab, 0xd5, 0x69, 0xc4,
25327 ++ 0xf4, 0xbc, 0x0b, 0x3b, 0x44, 0x4b, 0x29, 0x9c,
25328 ++ 0xee, 0xd4, 0x35, 0x22, 0x21, 0xb0, 0x1f, 0x27,
25329 ++ 0x64, 0xa8, 0x51, 0x1b, 0xf0, 0x9f, 0x19, 0x5c,
25330 ++ 0xfb, 0x5a, 0x64, 0x74, 0x70, 0x45, 0x09, 0xf5,
25331 ++ 0x64, 0xfe, 0x1a, 0x2d, 0xc9, 0x14, 0x04, 0x14,
25332 ++ 0xcf, 0xd5, 0x7d, 0x60, 0xaf, 0x94, 0x39, 0x94,
25333 ++ 0xe2, 0x7d, 0x79, 0x82, 0xd0, 0x65, 0x3b, 0x6b,
25334 ++ 0x9c, 0x19, 0x84, 0xb4, 0x6d, 0xb3, 0x0c, 0x99,
25335 ++ 0xc0, 0x56, 0xa8, 0xbd, 0x73, 0xce, 0x05, 0x84,
25336 ++ 0x3e, 0x30, 0xaa, 0xc4, 0x9b, 0x1b, 0x04, 0x2a,
25337 ++ 0x9f, 0xd7, 0x43, 0x2b, 0x23, 0xdf, 0xbf, 0xaa,
25338 ++ 0xd5, 0xc2, 0x43, 0x2d, 0x70, 0xab, 0xdc, 0x75,
25339 ++ 0xad, 0xac, 0xf7, 0xc0, 0xbe, 0x67, 0xb2, 0x74,
25340 ++ 0xed, 0x67, 0x10, 0x4a, 0x92, 0x60, 0xc1, 0x40,
25341 ++ 0x50, 0x19, 0x8a, 0x8a, 0x8c, 0x09, 0x0e, 0x72,
25342 ++ 0xe1, 0x73, 0x5e, 0xe8, 0x41, 0x85, 0x63, 0x9f,
25343 ++ 0x3f, 0xd7, 0x7d, 0xc4, 0xfb, 0x22, 0x5d, 0x92,
25344 ++ 0x6c, 0xb3, 0x1e, 0xe2, 0x50, 0x2f, 0x82, 0xa8,
25345 ++ 0x28, 0xc0, 0xb5, 0xd7, 0x5f, 0x68, 0x0d, 0x2c,
25346 ++ 0x2d, 0xaf, 0x7e, 0xfa, 0x2e, 0x08, 0x0f, 0x1f,
25347 ++ 0x70, 0x9f, 0xe9, 0x19, 0x72, 0x55, 0xf8, 0xfb,
25348 ++ 0x51, 0xd2, 0x33, 0x5d, 0xa0, 0xd3, 0x2b, 0x0a,
25349 ++ 0x6c, 0xbc, 0x4e, 0xcf, 0x36, 0x4d, 0xdc, 0x3b,
25350 ++ 0xe9, 0x3e, 0x81, 0x7c, 0x61, 0xdb, 0x20, 0x2d,
25351 ++ 0x3a, 0xc3, 0xb3, 0x0c, 0x1e, 0x00, 0xb9, 0x7c,
25352 ++ 0xf5, 0xca, 0x10, 0x5f, 0x3a, 0x71, 0xb3, 0xe4,
25353 ++ 0x20, 0xdb, 0x0c, 0x2a, 0x98, 0x63, 0x45, 0x00,
25354 ++ 0x58, 0xf6, 0x68, 0xe4, 0x0b, 0xda, 0x13, 0x3b,
25355 ++ 0x60, 0x5c, 0x76, 0xdb, 0xb9, 0x97, 0x71, 0xe4,
25356 ++ 0xd9, 0xb7, 0xdb, 0xbd, 0x68, 0xc7, 0x84, 0x84,
25357 ++ 0xaa, 0x7c, 0x68, 0x62, 0x5e, 0x16, 0xfc, 0xba,
25358 ++ 0x72, 0xaa, 0x9a, 0xa9, 0xeb, 0x7c, 0x75, 0x47,
25359 ++ 0x97, 0x7e, 0xad, 0xe2, 0xd9, 0x91, 0xe8, 0xe4,
25360 ++ 0xa5, 0x31, 0xd7, 0x01, 0x8e, 0xa2, 0x11, 0x88,
25361 ++ 0x95, 0xb9, 0xf2, 0x9b, 0xd3, 0x7f, 0x1b, 0x81,
25362 ++ 0x22, 0xf7, 0x98, 0x60, 0x0a, 0x64, 0xa6, 0xc1,
25363 ++ 0xf6, 0x49, 0xc7, 0xe3, 0x07, 0x4d, 0x94, 0x7a,
25364 ++ 0xcf, 0x6e, 0x68, 0x0c, 0x1b, 0x3f, 0x6e, 0x2e,
25365 ++ 0xee, 0x92, 0xfa, 0x52, 0xb3, 0x59, 0xf8, 0xf1,
25366 ++ 0x8f, 0x6a, 0x66, 0xa3, 0x82, 0x76, 0x4a, 0x07,
25367 ++ 0x1a, 0xc7, 0xdd, 0xf5, 0xda, 0x9c, 0x3c, 0x24,
25368 ++ 0xbf, 0xfd, 0x42, 0xa1, 0x10, 0x64, 0x6a, 0x0f,
25369 ++ 0x89, 0xee, 0x36, 0xa5, 0xce, 0x99, 0x48, 0x6a,
25370 ++ 0xf0, 0x9f, 0x9e, 0x69, 0xa4, 0x40, 0x20, 0xe9,
25371 ++ 0x16, 0x15, 0xf7, 0xdb, 0x75, 0x02, 0xcb, 0xe9,
25372 ++ 0x73, 0x8b, 0x3b, 0x49, 0x2f, 0xf0, 0xaf, 0x51,
25373 ++ 0x06, 0x5c, 0xdf, 0x27, 0x27, 0x49, 0x6a, 0xd1,
25374 ++ 0xcc, 0xc7, 0xb5, 0x63, 0xb5, 0xfc, 0xb8, 0x5c,
25375 ++ 0x87, 0x7f, 0x84, 0xb4, 0xcc, 0x14, 0xa9, 0x53,
25376 ++ 0xda, 0xa4, 0x56, 0xf8, 0xb6, 0x1b, 0xcc, 0x40,
25377 ++ 0x27, 0x52, 0x06, 0x5a, 0x13, 0x81, 0xd7, 0x3a,
25378 ++ 0xd4, 0x3b, 0xfb, 0x49, 0x65, 0x31, 0x33, 0xb2,
25379 ++ 0xfa, 0xcd, 0xad, 0x58, 0x4e, 0x2b, 0xae, 0xd2,
25380 ++ 0x20, 0xfb, 0x1a, 0x48, 0xb4, 0x3f, 0x9a, 0xd8,
25381 ++ 0x7a, 0x35, 0x4a, 0xc8, 0xee, 0x88, 0x5e, 0x07,
25382 ++ 0x66, 0x54, 0xb9, 0xec, 0x9f, 0xa3, 0xe3, 0xb9,
25383 ++ 0x37, 0xaa, 0x49, 0x76, 0x31, 0xda, 0x74, 0x2d,
25384 ++ 0x3c, 0xa4, 0x65, 0x10, 0x32, 0x38, 0xf0, 0xde,
25385 ++ 0xd3, 0x99, 0x17, 0xaa, 0x71, 0xaa, 0x8f, 0x0f,
25386 ++ 0x8c, 0xaf, 0xa2, 0xf8, 0x5d, 0x64, 0xba, 0x1d,
25387 ++ 0xa3, 0xef, 0x96, 0x73, 0xe8, 0xa1, 0x02, 0x8d,
25388 ++ 0x0c, 0x6d, 0xb8, 0x06, 0x90, 0xb8, 0x08, 0x56,
25389 ++ 0x2c, 0xa7, 0x06, 0xc9, 0xc2, 0x38, 0xdb, 0x7c,
25390 ++ 0x63, 0xb1, 0x57, 0x8e, 0xea, 0x7c, 0x79, 0xf3,
25391 ++ 0x49, 0x1d, 0xfe, 0x9f, 0xf3, 0x6e, 0xb1, 0x1d,
25392 ++ 0xba, 0x19, 0x80, 0x1a, 0x0a, 0xd3, 0xb0, 0x26,
25393 ++ 0x21, 0x40, 0xb1, 0x7c, 0xf9, 0x4d, 0x8d, 0x10,
25394 ++ 0xc1, 0x7e, 0xf4, 0xf6, 0x3c, 0xa8, 0xfd, 0x7c,
25395 ++ 0xa3, 0x92, 0xb2, 0x0f, 0xaa, 0xcc, 0xa6, 0x11,
25396 ++ 0xfe, 0x04, 0xe3, 0xd1, 0x7a, 0x32, 0x89, 0xdf,
25397 ++ 0x0d, 0xc4, 0x8f, 0x79, 0x6b, 0xca, 0x16, 0x7c,
25398 ++ 0x6e, 0xf9, 0xad, 0x0f, 0xf6, 0xfe, 0x27, 0xdb,
25399 ++ 0xc4, 0x13, 0x70, 0xf1, 0x62, 0x1a, 0x4f, 0x79,
25400 ++ 0x40, 0xc9, 0x9b, 0x8b, 0x21, 0xea, 0x84, 0xfa,
25401 ++ 0xf5, 0xf1, 0x89, 0xce, 0xb7, 0x55, 0x0a, 0x80,
25402 ++ 0x39, 0x2f, 0x55, 0x36, 0x16, 0x9c, 0x7b, 0x08,
25403 ++ 0xbd, 0x87, 0x0d, 0xa5, 0x32, 0xf1, 0x52, 0x7c,
25404 ++ 0xe8, 0x55, 0x60, 0x5b, 0xd7, 0x69, 0xe4, 0xfc,
25405 ++ 0xfa, 0x12, 0x85, 0x96, 0xea, 0x50, 0x28, 0xab,
25406 ++ 0x8a, 0xf7, 0xbb, 0x0e, 0x53, 0x74, 0xca, 0xa6,
25407 ++ 0x27, 0x09, 0xc2, 0xb5, 0xde, 0x18, 0x14, 0xd9,
25408 ++ 0xea, 0xe5, 0x29, 0x1c, 0x40, 0x56, 0xcf, 0xd7,
25409 ++ 0xae, 0x05, 0x3f, 0x65, 0xaf, 0x05, 0x73, 0xe2,
25410 ++ 0x35, 0x96, 0x27, 0x07, 0x14, 0xc0, 0xad, 0x33,
25411 ++ 0xf1, 0xdc, 0x44, 0x7a, 0x89, 0x17, 0x77, 0xd2,
25412 ++ 0x9c, 0x58, 0x60, 0xf0, 0x3f, 0x7b, 0x2d, 0x2e,
25413 ++ 0x57, 0x95, 0x54, 0x87, 0xed, 0xf2, 0xc7, 0x4c,
25414 ++ 0xf0, 0xae, 0x56, 0x29, 0x19, 0x7d, 0x66, 0x4b,
25415 ++ 0x9b, 0x83, 0x84, 0x42, 0x3b, 0x01, 0x25, 0x66,
25416 ++ 0x8e, 0x02, 0xde, 0xb9, 0x83, 0x54, 0x19, 0xf6,
25417 ++ 0x9f, 0x79, 0x0d, 0x67, 0xc5, 0x1d, 0x7a, 0x44,
25418 ++ 0x02, 0x98, 0xa7, 0x16, 0x1c, 0x29, 0x0d, 0x74,
25419 ++ 0xff, 0x85, 0x40, 0x06, 0xef, 0x2c, 0xa9, 0xc6,
25420 ++ 0xf5, 0x53, 0x07, 0x06, 0xae, 0xe4, 0xfa, 0x5f,
25421 ++ 0xd8, 0x39, 0x4d, 0xf1, 0x9b, 0x6b, 0xd9, 0x24,
25422 ++ 0x84, 0xfe, 0x03, 0x4c, 0xb2, 0x3f, 0xdf, 0xa1,
25423 ++ 0x05, 0x9e, 0x50, 0x14, 0x5a, 0xd9, 0x1a, 0xa2,
25424 ++ 0xa7, 0xfa, 0xfa, 0x17, 0xf7, 0x78, 0xd6, 0xb5,
25425 ++ 0x92, 0x61, 0x91, 0xac, 0x36, 0xfa, 0x56, 0x0d,
25426 ++ 0x38, 0x32, 0x18, 0x85, 0x08, 0x58, 0x37, 0xf0,
25427 ++ 0x4b, 0xdb, 0x59, 0xe7, 0xa4, 0x34, 0xc0, 0x1b,
25428 ++ 0x01, 0xaf, 0x2d, 0xde, 0xa1, 0xaa, 0x5d, 0xd3,
25429 ++ 0xec, 0xe1, 0xd4, 0xf7, 0xe6, 0x54, 0x68, 0xf0,
25430 ++ 0x51, 0x97, 0xa7, 0x89, 0xea, 0x24, 0xad, 0xd3,
25431 ++ 0x6e, 0x47, 0x93, 0x8b, 0x4b, 0xb4, 0xf7, 0x1c,
25432 ++ 0x42, 0x06, 0x67, 0xe8, 0x99, 0xf6, 0xf5, 0x7b,
25433 ++ 0x85, 0xb5, 0x65, 0xb5, 0xb5, 0xd2, 0x37, 0xf5,
25434 ++ 0xf3, 0x02, 0xa6, 0x4d, 0x11, 0xa7, 0xdc, 0x51,
25435 ++ 0x09, 0x7f, 0xa0, 0xd8, 0x88, 0x1c, 0x13, 0x71,
25436 ++ 0xae, 0x9c, 0xb7, 0x7b, 0x34, 0xd6, 0x4e, 0x68,
25437 ++ 0x26, 0x83, 0x51, 0xaf, 0x1d, 0xee, 0x8b, 0xbb,
25438 ++ 0x69, 0x43, 0x2b, 0x9e, 0x8a, 0xbc, 0x02, 0x0e,
25439 ++ 0xa0, 0x1b, 0xe0, 0xa8, 0x5f, 0x6f, 0xaf, 0x1b,
25440 ++ 0x8f, 0xe7, 0x64, 0x71, 0x74, 0x11, 0x7e, 0xa8,
25441 ++ 0xd8, 0xf9, 0x97, 0x06, 0xc3, 0xb6, 0xfb, 0xfb,
25442 ++ 0xb7, 0x3d, 0x35, 0x9d, 0x3b, 0x52, 0xed, 0x54,
25443 ++ 0xca, 0xf4, 0x81, 0x01, 0x2d, 0x1b, 0xc3, 0xa7,
25444 ++ 0x00, 0x3d, 0x1a, 0x39, 0x54, 0xe1, 0xf6, 0xff,
25445 ++ 0xed, 0x6f, 0x0b, 0x5a, 0x68, 0xda, 0x58, 0xdd,
25446 ++ 0xa9, 0xcf, 0x5c, 0x4a, 0xe5, 0x09, 0x4e, 0xde,
25447 ++ 0x9d, 0xbc, 0x3e, 0xee, 0x5a, 0x00, 0x3b, 0x2c,
25448 ++ 0x87, 0x10, 0x65, 0x60, 0xdd, 0xd7, 0x56, 0xd1,
25449 ++ 0x4c, 0x64, 0x45, 0xe4, 0x21, 0xec, 0x78, 0xf8,
25450 ++ 0x25, 0x7a, 0x3e, 0x16, 0x5d, 0x09, 0x53, 0x14,
25451 ++ 0xbe, 0x4f, 0xae, 0x87, 0xd8, 0xd1, 0xaa, 0x3c,
25452 ++ 0xf6, 0x3e, 0xa4, 0x70, 0x8c, 0x5e, 0x70, 0xa4,
25453 ++ 0xb3, 0x6b, 0x66, 0x73, 0xd3, 0xbf, 0x31, 0x06,
25454 ++ 0x19, 0x62, 0x93, 0x15, 0xf2, 0x86, 0xe4, 0x52,
25455 ++ 0x7e, 0x53, 0x4c, 0x12, 0x38, 0xcc, 0x34, 0x7d,
25456 ++ 0x57, 0xf6, 0x42, 0x93, 0x8a, 0xc4, 0xee, 0x5c,
25457 ++ 0x8a, 0xe1, 0x52, 0x8f, 0x56, 0x64, 0xf6, 0xa6,
25458 ++ 0xd1, 0x91, 0x57, 0x70, 0xcd, 0x11, 0x76, 0xf5,
25459 ++ 0x59, 0x60, 0x60, 0x3c, 0xc1, 0xc3, 0x0b, 0x7f,
25460 ++ 0x58, 0x1a, 0x50, 0x91, 0xf1, 0x68, 0x8f, 0x6e,
25461 ++ 0x74, 0x74, 0xa8, 0x51, 0x0b, 0xf7, 0x7a, 0x98,
25462 ++ 0x37, 0xf2, 0x0a, 0x0e, 0xa4, 0x97, 0x04, 0xb8,
25463 ++ 0x9b, 0xfd, 0xa0, 0xea, 0xf7, 0x0d, 0xe1, 0xdb,
25464 ++ 0x03, 0xf0, 0x31, 0x29, 0xf8, 0xdd, 0x6b, 0x8b,
25465 ++ 0x5d, 0xd8, 0x59, 0xa9, 0x29, 0xcf, 0x9a, 0x79,
25466 ++ 0x89, 0x19, 0x63, 0x46, 0x09, 0x79, 0x6a, 0x11,
25467 ++ 0xda, 0x63, 0x68, 0x48, 0x77, 0x23, 0xfb, 0x7d,
25468 ++ 0x3a, 0x43, 0xcb, 0x02, 0x3b, 0x7a, 0x6d, 0x10,
25469 ++ 0x2a, 0x9e, 0xac, 0xf1, 0xd4, 0x19, 0xf8, 0x23,
25470 ++ 0x64, 0x1d, 0x2c, 0x5f, 0xf2, 0xb0, 0x5c, 0x23,
25471 ++ 0x27, 0xf7, 0x27, 0x30, 0x16, 0x37, 0xb1, 0x90,
25472 ++ 0xab, 0x38, 0xfb, 0x55, 0xcd, 0x78, 0x58, 0xd4,
25473 ++ 0x7d, 0x43, 0xf6, 0x45, 0x5e, 0x55, 0x8d, 0xb1,
25474 ++ 0x02, 0x65, 0x58, 0xb4, 0x13, 0x4b, 0x36, 0xf7,
25475 ++ 0xcc, 0xfe, 0x3d, 0x0b, 0x82, 0xe2, 0x12, 0x11,
25476 ++ 0xbb, 0xe6, 0xb8, 0x3a, 0x48, 0x71, 0xc7, 0x50,
25477 ++ 0x06, 0x16, 0x3a, 0xe6, 0x7c, 0x05, 0xc7, 0xc8,
25478 ++ 0x4d, 0x2f, 0x08, 0x6a, 0x17, 0x9a, 0x95, 0x97,
25479 ++ 0x50, 0x68, 0xdc, 0x28, 0x18, 0xc4, 0x61, 0x38,
25480 ++ 0xb9, 0xe0, 0x3e, 0x78, 0xdb, 0x29, 0xe0, 0x9f,
25481 ++ 0x52, 0xdd, 0xf8, 0x4f, 0x91, 0xc1, 0xd0, 0x33,
25482 ++ 0xa1, 0x7a, 0x8e, 0x30, 0x13, 0x82, 0x07, 0x9f,
25483 ++ 0xd3, 0x31, 0x0f, 0x23, 0xbe, 0x32, 0x5a, 0x75,
25484 ++ 0xcf, 0x96, 0xb2, 0xec, 0xb5, 0x32, 0xac, 0x21,
25485 ++ 0xd1, 0x82, 0x33, 0xd3, 0x15, 0x74, 0xbd, 0x90,
25486 ++ 0xf1, 0x2c, 0xe6, 0x5f, 0x8d, 0xe3, 0x02, 0xe8,
25487 ++ 0xe9, 0xc4, 0xca, 0x96, 0xeb, 0x0e, 0xbc, 0x91,
25488 ++ 0xf4, 0xb9, 0xea, 0xd9, 0x1b, 0x75, 0xbd, 0xe1,
25489 ++ 0xac, 0x2a, 0x05, 0x37, 0x52, 0x9b, 0x1b, 0x3f,
25490 ++ 0x5a, 0xdc, 0x21, 0xc3, 0x98, 0xbb, 0xaf, 0xa3,
25491 ++ 0xf2, 0x00, 0xbf, 0x0d, 0x30, 0x89, 0x05, 0xcc,
25492 ++ 0xa5, 0x76, 0xf5, 0x06, 0xf0, 0xc6, 0x54, 0x8a,
25493 ++ 0x5d, 0xd4, 0x1e, 0xc1, 0xf2, 0xce, 0xb0, 0x62,
25494 ++ 0xc8, 0xfc, 0x59, 0x42, 0x9a, 0x90, 0x60, 0x55,
25495 ++ 0xfe, 0x88, 0xa5, 0x8b, 0xb8, 0x33, 0x0c, 0x23,
25496 ++ 0x24, 0x0d, 0x15, 0x70, 0x37, 0x1e, 0x3d, 0xf6,
25497 ++ 0xd2, 0xea, 0x92, 0x10, 0xb2, 0xc4, 0x51, 0xac,
25498 ++ 0xf2, 0xac, 0xf3, 0x6b, 0x6c, 0xaa, 0xcf, 0x12,
25499 ++ 0xc5, 0x6c, 0x90, 0x50, 0xb5, 0x0c, 0xfc, 0x1a,
25500 ++ 0x15, 0x52, 0xe9, 0x26, 0xc6, 0x52, 0xa4, 0xe7,
25501 ++ 0x81, 0x69, 0xe1, 0xe7, 0x9e, 0x30, 0x01, 0xec,
25502 ++ 0x84, 0x89, 0xb2, 0x0d, 0x66, 0xdd, 0xce, 0x28,
25503 ++ 0x5c, 0xec, 0x98, 0x46, 0x68, 0x21, 0x9f, 0x88,
25504 ++ 0x3f, 0x1f, 0x42, 0x77, 0xce, 0xd0, 0x61, 0xd4,
25505 ++ 0x20, 0xa7, 0xff, 0x53, 0xad, 0x37, 0xd0, 0x17,
25506 ++ 0x35, 0xc9, 0xfc, 0xba, 0x0a, 0x78, 0x3f, 0xf2,
25507 ++ 0xcc, 0x86, 0x89, 0xe8, 0x4b, 0x3c, 0x48, 0x33,
25508 ++ 0x09, 0x7f, 0xc6, 0xc0, 0xdd, 0xb8, 0xfd, 0x7a,
25509 ++ 0x66, 0x66, 0x65, 0xeb, 0x47, 0xa7, 0x04, 0x28,
25510 ++ 0xa3, 0x19, 0x8e, 0xa9, 0xb1, 0x13, 0x67, 0x62,
25511 ++ 0x70, 0xcf, 0xd6
25512 ++};
25513 ++static const u8 enc_assoc012[] __initconst = {
25514 ++ 0xb1, 0x69, 0x83, 0x87, 0x30, 0xaa, 0x5d, 0xb8,
25515 ++ 0x77, 0xe8, 0x21, 0xff, 0x06, 0x59, 0x35, 0xce,
25516 ++ 0x75, 0xfe, 0x38, 0xef, 0xb8, 0x91, 0x43, 0x8c,
25517 ++ 0xcf, 0x70, 0xdd, 0x0a, 0x68, 0xbf, 0xd4, 0xbc,
25518 ++ 0x16, 0x76, 0x99, 0x36, 0x1e, 0x58, 0x79, 0x5e,
25519 ++ 0xd4, 0x29, 0xf7, 0x33, 0x93, 0x48, 0xdb, 0x5f,
25520 ++ 0x01, 0xae, 0x9c, 0xb6, 0xe4, 0x88, 0x6d, 0x2b,
25521 ++ 0x76, 0x75, 0xe0, 0xf3, 0x74, 0xe2, 0xc9
25522 ++};
25523 ++static const u8 enc_nonce012[] __initconst = {
25524 ++ 0x05, 0xa3, 0x93, 0xed, 0x30, 0xc5, 0xa2, 0x06
25525 ++};
25526 ++static const u8 enc_key012[] __initconst = {
25527 ++ 0xb3, 0x35, 0x50, 0x03, 0x54, 0x2e, 0x40, 0x5e,
25528 ++ 0x8f, 0x59, 0x8e, 0xc5, 0x90, 0xd5, 0x27, 0x2d,
25529 ++ 0xba, 0x29, 0x2e, 0xcb, 0x1b, 0x70, 0x44, 0x1e,
25530 ++ 0x65, 0x91, 0x6e, 0x2a, 0x79, 0x22, 0xda, 0x64
25531 ++};
25532 ++
25533 ++/* wycheproof - misc */
25534 ++static const u8 enc_input053[] __initconst = {
25535 ++ 0x25, 0x6d, 0x40, 0x88, 0x80, 0x94, 0x17, 0x83,
25536 ++ 0x55, 0xd3, 0x04, 0x84, 0x64, 0x43, 0xfe, 0xe8,
25537 ++ 0xdf, 0x99, 0x47, 0x03, 0x03, 0xfb, 0x3b, 0x7b,
25538 ++ 0x80, 0xe0, 0x30, 0xbe, 0xeb, 0xd3, 0x29, 0xbe
25539 ++};
25540 ++static const u8 enc_output053[] __initconst = {
25541 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25542 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25543 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25544 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25545 ++ 0xe6, 0xd3, 0xd7, 0x32, 0x4a, 0x1c, 0xbb, 0xa7,
25546 ++ 0x77, 0xbb, 0xb0, 0xec, 0xdd, 0xa3, 0x78, 0x07
25547 ++};
25548 ++static const u8 enc_assoc053[] __initconst = {
25549 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25550 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
25551 ++};
25552 ++static const u8 enc_nonce053[] __initconst = {
25553 ++ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
25554 ++};
25555 ++static const u8 enc_key053[] __initconst = {
25556 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
25557 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
25558 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
25559 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
25560 ++};
25561 ++
25562 ++/* wycheproof - misc */
25563 ++static const u8 enc_input054[] __initconst = {
25564 ++ 0x25, 0x6d, 0x40, 0x88, 0x80, 0x94, 0x17, 0x83,
25565 ++ 0x55, 0xd3, 0x04, 0x84, 0x64, 0x43, 0xfe, 0xe8,
25566 ++ 0xdf, 0x99, 0x47, 0x03, 0x03, 0xfb, 0x3b, 0x7b,
25567 ++ 0x80, 0xe0, 0x30, 0xbe, 0xeb, 0xd3, 0x29, 0xbe,
25568 ++ 0xe3, 0xbc, 0xdb, 0x5b, 0x1e, 0xde, 0xfc, 0xfe,
25569 ++ 0x8b, 0xcd, 0xa1, 0xb6, 0xa1, 0x5c, 0x8c, 0x2b,
25570 ++ 0x08, 0x69, 0xff, 0xd2, 0xec, 0x5e, 0x26, 0xe5,
25571 ++ 0x53, 0xb7, 0xb2, 0x27, 0xfe, 0x87, 0xfd, 0xbd
25572 ++};
25573 ++static const u8 enc_output054[] __initconst = {
25574 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25575 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25576 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25577 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25578 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25579 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25580 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25581 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25582 ++ 0x06, 0x2d, 0xe6, 0x79, 0x5f, 0x27, 0x4f, 0xd2,
25583 ++ 0xa3, 0x05, 0xd7, 0x69, 0x80, 0xbc, 0x9c, 0xce
25584 ++};
25585 ++static const u8 enc_assoc054[] __initconst = {
25586 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25587 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
25588 ++};
25589 ++static const u8 enc_nonce054[] __initconst = {
25590 ++ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
25591 ++};
25592 ++static const u8 enc_key054[] __initconst = {
25593 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
25594 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
25595 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
25596 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
25597 ++};
25598 ++
25599 ++/* wycheproof - misc */
25600 ++static const u8 enc_input055[] __initconst = {
25601 ++ 0x25, 0x6d, 0x40, 0x88, 0x80, 0x94, 0x17, 0x83,
25602 ++ 0x55, 0xd3, 0x04, 0x84, 0x64, 0x43, 0xfe, 0xe8,
25603 ++ 0xdf, 0x99, 0x47, 0x03, 0x03, 0xfb, 0x3b, 0x7b,
25604 ++ 0x80, 0xe0, 0x30, 0xbe, 0xeb, 0xd3, 0x29, 0xbe,
25605 ++ 0xe3, 0xbc, 0xdb, 0x5b, 0x1e, 0xde, 0xfc, 0xfe,
25606 ++ 0x8b, 0xcd, 0xa1, 0xb6, 0xa1, 0x5c, 0x8c, 0x2b,
25607 ++ 0x08, 0x69, 0xff, 0xd2, 0xec, 0x5e, 0x26, 0xe5,
25608 ++ 0x53, 0xb7, 0xb2, 0x27, 0xfe, 0x87, 0xfd, 0xbd,
25609 ++ 0x7a, 0xda, 0x44, 0x42, 0x42, 0x69, 0xbf, 0xfa,
25610 ++ 0x55, 0x27, 0xf2, 0x70, 0xac, 0xf6, 0x85, 0x02,
25611 ++ 0xb7, 0x4c, 0x5a, 0xe2, 0xe6, 0x0c, 0x05, 0x80,
25612 ++ 0x98, 0x1a, 0x49, 0x38, 0x45, 0x93, 0x92, 0xc4,
25613 ++ 0x9b, 0xb2, 0xf2, 0x84, 0xb6, 0x46, 0xef, 0xc7,
25614 ++ 0xf3, 0xf0, 0xb1, 0x36, 0x1d, 0xc3, 0x48, 0xed,
25615 ++ 0x77, 0xd3, 0x0b, 0xc5, 0x76, 0x92, 0xed, 0x38,
25616 ++ 0xfb, 0xac, 0x01, 0x88, 0x38, 0x04, 0x88, 0xc7
25617 ++};
25618 ++static const u8 enc_output055[] __initconst = {
25619 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25620 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25621 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25622 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25623 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25624 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25625 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25626 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25627 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25628 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25629 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25630 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25631 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25632 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25633 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25634 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25635 ++ 0xd8, 0xb4, 0x79, 0x02, 0xba, 0xae, 0xaf, 0xb3,
25636 ++ 0x42, 0x03, 0x05, 0x15, 0x29, 0xaf, 0x28, 0x2e
25637 ++};
25638 ++static const u8 enc_assoc055[] __initconst = {
25639 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
25640 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
25641 ++};
25642 ++static const u8 enc_nonce055[] __initconst = {
25643 ++ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
25644 ++};
25645 ++static const u8 enc_key055[] __initconst = {
25646 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
25647 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
25648 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
25649 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
25650 ++};
25651 ++
25652 ++/* wycheproof - misc */
25653 ++static const u8 enc_input056[] __initconst = {
25654 ++ 0xda, 0x92, 0xbf, 0x77, 0x7f, 0x6b, 0xe8, 0x7c,
25655 ++ 0xaa, 0x2c, 0xfb, 0x7b, 0x9b, 0xbc, 0x01, 0x17,
25656 ++ 0x20, 0x66, 0xb8, 0xfc, 0xfc, 0x04, 0xc4, 0x84,
25657 ++ 0x7f, 0x1f, 0xcf, 0x41, 0x14, 0x2c, 0xd6, 0x41
25658 ++};
25659 ++static const u8 enc_output056[] __initconst = {
25660 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25661 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25662 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25663 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25664 ++ 0xb3, 0x89, 0x1c, 0x84, 0x9c, 0xb5, 0x2c, 0x27,
25665 ++ 0x74, 0x7e, 0xdf, 0xcf, 0x31, 0x21, 0x3b, 0xb6
25666 ++};
25667 ++static const u8 enc_assoc056[] __initconst = {
25668 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25669 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
25670 ++};
25671 ++static const u8 enc_nonce056[] __initconst = {
25672 ++ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
25673 ++};
25674 ++static const u8 enc_key056[] __initconst = {
25675 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
25676 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
25677 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
25678 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
25679 ++};
25680 ++
25681 ++/* wycheproof - misc */
25682 ++static const u8 enc_input057[] __initconst = {
25683 ++ 0xda, 0x92, 0xbf, 0x77, 0x7f, 0x6b, 0xe8, 0x7c,
25684 ++ 0xaa, 0x2c, 0xfb, 0x7b, 0x9b, 0xbc, 0x01, 0x17,
25685 ++ 0x20, 0x66, 0xb8, 0xfc, 0xfc, 0x04, 0xc4, 0x84,
25686 ++ 0x7f, 0x1f, 0xcf, 0x41, 0x14, 0x2c, 0xd6, 0x41,
25687 ++ 0x1c, 0x43, 0x24, 0xa4, 0xe1, 0x21, 0x03, 0x01,
25688 ++ 0x74, 0x32, 0x5e, 0x49, 0x5e, 0xa3, 0x73, 0xd4,
25689 ++ 0xf7, 0x96, 0x00, 0x2d, 0x13, 0xa1, 0xd9, 0x1a,
25690 ++ 0xac, 0x48, 0x4d, 0xd8, 0x01, 0x78, 0x02, 0x42
25691 ++};
25692 ++static const u8 enc_output057[] __initconst = {
25693 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25694 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25695 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25696 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25697 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25698 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25699 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25700 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25701 ++ 0xf0, 0xc1, 0x2d, 0x26, 0xef, 0x03, 0x02, 0x9b,
25702 ++ 0x62, 0xc0, 0x08, 0xda, 0x27, 0xc5, 0xdc, 0x68
25703 ++};
25704 ++static const u8 enc_assoc057[] __initconst = {
25705 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25706 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
25707 ++};
25708 ++static const u8 enc_nonce057[] __initconst = {
25709 ++ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
25710 ++};
25711 ++static const u8 enc_key057[] __initconst = {
25712 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
25713 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
25714 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
25715 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
25716 ++};
25717 ++
25718 ++/* wycheproof - misc */
25719 ++static const u8 enc_input058[] __initconst = {
25720 ++ 0xda, 0x92, 0xbf, 0x77, 0x7f, 0x6b, 0xe8, 0x7c,
25721 ++ 0xaa, 0x2c, 0xfb, 0x7b, 0x9b, 0xbc, 0x01, 0x17,
25722 ++ 0x20, 0x66, 0xb8, 0xfc, 0xfc, 0x04, 0xc4, 0x84,
25723 ++ 0x7f, 0x1f, 0xcf, 0x41, 0x14, 0x2c, 0xd6, 0x41,
25724 ++ 0x1c, 0x43, 0x24, 0xa4, 0xe1, 0x21, 0x03, 0x01,
25725 ++ 0x74, 0x32, 0x5e, 0x49, 0x5e, 0xa3, 0x73, 0xd4,
25726 ++ 0xf7, 0x96, 0x00, 0x2d, 0x13, 0xa1, 0xd9, 0x1a,
25727 ++ 0xac, 0x48, 0x4d, 0xd8, 0x01, 0x78, 0x02, 0x42,
25728 ++ 0x85, 0x25, 0xbb, 0xbd, 0xbd, 0x96, 0x40, 0x05,
25729 ++ 0xaa, 0xd8, 0x0d, 0x8f, 0x53, 0x09, 0x7a, 0xfd,
25730 ++ 0x48, 0xb3, 0xa5, 0x1d, 0x19, 0xf3, 0xfa, 0x7f,
25731 ++ 0x67, 0xe5, 0xb6, 0xc7, 0xba, 0x6c, 0x6d, 0x3b,
25732 ++ 0x64, 0x4d, 0x0d, 0x7b, 0x49, 0xb9, 0x10, 0x38,
25733 ++ 0x0c, 0x0f, 0x4e, 0xc9, 0xe2, 0x3c, 0xb7, 0x12,
25734 ++ 0x88, 0x2c, 0xf4, 0x3a, 0x89, 0x6d, 0x12, 0xc7,
25735 ++ 0x04, 0x53, 0xfe, 0x77, 0xc7, 0xfb, 0x77, 0x38
25736 ++};
25737 ++static const u8 enc_output058[] __initconst = {
25738 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25739 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25740 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25741 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25742 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25743 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25744 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25745 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25746 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25747 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25748 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25749 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25750 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25751 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25752 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25753 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25754 ++ 0xee, 0x65, 0x78, 0x30, 0x01, 0xc2, 0x56, 0x91,
25755 ++ 0xfa, 0x28, 0xd0, 0xf5, 0xf1, 0xc1, 0xd7, 0x62
25756 ++};
25757 ++static const u8 enc_assoc058[] __initconst = {
25758 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
25759 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
25760 ++};
25761 ++static const u8 enc_nonce058[] __initconst = {
25762 ++ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
25763 ++};
25764 ++static const u8 enc_key058[] __initconst = {
25765 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
25766 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
25767 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
25768 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
25769 ++};
25770 ++
25771 ++/* wycheproof - misc */
25772 ++static const u8 enc_input059[] __initconst = {
25773 ++ 0x25, 0x6d, 0x40, 0x08, 0x80, 0x94, 0x17, 0x03,
25774 ++ 0x55, 0xd3, 0x04, 0x04, 0x64, 0x43, 0xfe, 0x68,
25775 ++ 0xdf, 0x99, 0x47, 0x83, 0x03, 0xfb, 0x3b, 0xfb,
25776 ++ 0x80, 0xe0, 0x30, 0x3e, 0xeb, 0xd3, 0x29, 0x3e
25777 ++};
25778 ++static const u8 enc_output059[] __initconst = {
25779 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25780 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25781 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25782 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25783 ++ 0x79, 0xba, 0x7a, 0x29, 0xf5, 0xa7, 0xbb, 0x75,
25784 ++ 0x79, 0x7a, 0xf8, 0x7a, 0x61, 0x01, 0x29, 0xa4
25785 ++};
25786 ++static const u8 enc_assoc059[] __initconst = {
25787 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25788 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80
25789 ++};
25790 ++static const u8 enc_nonce059[] __initconst = {
25791 ++ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
25792 ++};
25793 ++static const u8 enc_key059[] __initconst = {
25794 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
25795 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
25796 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
25797 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
25798 ++};
25799 ++
25800 ++/* wycheproof - misc */
25801 ++static const u8 enc_input060[] __initconst = {
25802 ++ 0x25, 0x6d, 0x40, 0x08, 0x80, 0x94, 0x17, 0x03,
25803 ++ 0x55, 0xd3, 0x04, 0x04, 0x64, 0x43, 0xfe, 0x68,
25804 ++ 0xdf, 0x99, 0x47, 0x83, 0x03, 0xfb, 0x3b, 0xfb,
25805 ++ 0x80, 0xe0, 0x30, 0x3e, 0xeb, 0xd3, 0x29, 0x3e,
25806 ++ 0xe3, 0xbc, 0xdb, 0xdb, 0x1e, 0xde, 0xfc, 0x7e,
25807 ++ 0x8b, 0xcd, 0xa1, 0x36, 0xa1, 0x5c, 0x8c, 0xab,
25808 ++ 0x08, 0x69, 0xff, 0x52, 0xec, 0x5e, 0x26, 0x65,
25809 ++ 0x53, 0xb7, 0xb2, 0xa7, 0xfe, 0x87, 0xfd, 0x3d
25810 ++};
25811 ++static const u8 enc_output060[] __initconst = {
25812 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25813 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25814 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25815 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25816 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25817 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25818 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25819 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25820 ++ 0x36, 0xb1, 0x74, 0x38, 0x19, 0xe1, 0xb9, 0xba,
25821 ++ 0x15, 0x51, 0xe8, 0xed, 0x92, 0x2a, 0x95, 0x9a
25822 ++};
25823 ++static const u8 enc_assoc060[] __initconst = {
25824 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25825 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80
25826 ++};
25827 ++static const u8 enc_nonce060[] __initconst = {
25828 ++ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
25829 ++};
25830 ++static const u8 enc_key060[] __initconst = {
25831 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
25832 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
25833 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
25834 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
25835 ++};
25836 ++
25837 ++/* wycheproof - misc */
25838 ++static const u8 enc_input061[] __initconst = {
25839 ++ 0x25, 0x6d, 0x40, 0x08, 0x80, 0x94, 0x17, 0x03,
25840 ++ 0x55, 0xd3, 0x04, 0x04, 0x64, 0x43, 0xfe, 0x68,
25841 ++ 0xdf, 0x99, 0x47, 0x83, 0x03, 0xfb, 0x3b, 0xfb,
25842 ++ 0x80, 0xe0, 0x30, 0x3e, 0xeb, 0xd3, 0x29, 0x3e,
25843 ++ 0xe3, 0xbc, 0xdb, 0xdb, 0x1e, 0xde, 0xfc, 0x7e,
25844 ++ 0x8b, 0xcd, 0xa1, 0x36, 0xa1, 0x5c, 0x8c, 0xab,
25845 ++ 0x08, 0x69, 0xff, 0x52, 0xec, 0x5e, 0x26, 0x65,
25846 ++ 0x53, 0xb7, 0xb2, 0xa7, 0xfe, 0x87, 0xfd, 0x3d,
25847 ++ 0x7a, 0xda, 0x44, 0xc2, 0x42, 0x69, 0xbf, 0x7a,
25848 ++ 0x55, 0x27, 0xf2, 0xf0, 0xac, 0xf6, 0x85, 0x82,
25849 ++ 0xb7, 0x4c, 0x5a, 0x62, 0xe6, 0x0c, 0x05, 0x00,
25850 ++ 0x98, 0x1a, 0x49, 0xb8, 0x45, 0x93, 0x92, 0x44,
25851 ++ 0x9b, 0xb2, 0xf2, 0x04, 0xb6, 0x46, 0xef, 0x47,
25852 ++ 0xf3, 0xf0, 0xb1, 0xb6, 0x1d, 0xc3, 0x48, 0x6d,
25853 ++ 0x77, 0xd3, 0x0b, 0x45, 0x76, 0x92, 0xed, 0xb8,
25854 ++ 0xfb, 0xac, 0x01, 0x08, 0x38, 0x04, 0x88, 0x47
25855 ++};
25856 ++static const u8 enc_output061[] __initconst = {
25857 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25858 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25859 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25860 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25861 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25862 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25863 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25864 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25865 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25866 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25867 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25868 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25869 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25870 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25871 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25872 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25873 ++ 0xfe, 0xac, 0x49, 0x55, 0x55, 0x4e, 0x80, 0x6f,
25874 ++ 0x3a, 0x19, 0x02, 0xe2, 0x44, 0x32, 0xc0, 0x8a
25875 ++};
25876 ++static const u8 enc_assoc061[] __initconst = {
25877 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
25878 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80
25879 ++};
25880 ++static const u8 enc_nonce061[] __initconst = {
25881 ++ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
25882 ++};
25883 ++static const u8 enc_key061[] __initconst = {
25884 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
25885 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
25886 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
25887 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
25888 ++};
25889 ++
25890 ++/* wycheproof - misc */
25891 ++static const u8 enc_input062[] __initconst = {
25892 ++ 0xda, 0x92, 0xbf, 0xf7, 0x7f, 0x6b, 0xe8, 0xfc,
25893 ++ 0xaa, 0x2c, 0xfb, 0xfb, 0x9b, 0xbc, 0x01, 0x97,
25894 ++ 0x20, 0x66, 0xb8, 0x7c, 0xfc, 0x04, 0xc4, 0x04,
25895 ++ 0x7f, 0x1f, 0xcf, 0xc1, 0x14, 0x2c, 0xd6, 0xc1
25896 ++};
25897 ++static const u8 enc_output062[] __initconst = {
25898 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25899 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25900 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25901 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25902 ++ 0x20, 0xa3, 0x79, 0x8d, 0xf1, 0x29, 0x2c, 0x59,
25903 ++ 0x72, 0xbf, 0x97, 0x41, 0xae, 0xc3, 0x8a, 0x19
25904 ++};
25905 ++static const u8 enc_assoc062[] __initconst = {
25906 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25907 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f
25908 ++};
25909 ++static const u8 enc_nonce062[] __initconst = {
25910 ++ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
25911 ++};
25912 ++static const u8 enc_key062[] __initconst = {
25913 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
25914 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
25915 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
25916 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
25917 ++};
25918 ++
25919 ++/* wycheproof - misc */
25920 ++static const u8 enc_input063[] __initconst = {
25921 ++ 0xda, 0x92, 0xbf, 0xf7, 0x7f, 0x6b, 0xe8, 0xfc,
25922 ++ 0xaa, 0x2c, 0xfb, 0xfb, 0x9b, 0xbc, 0x01, 0x97,
25923 ++ 0x20, 0x66, 0xb8, 0x7c, 0xfc, 0x04, 0xc4, 0x04,
25924 ++ 0x7f, 0x1f, 0xcf, 0xc1, 0x14, 0x2c, 0xd6, 0xc1,
25925 ++ 0x1c, 0x43, 0x24, 0x24, 0xe1, 0x21, 0x03, 0x81,
25926 ++ 0x74, 0x32, 0x5e, 0xc9, 0x5e, 0xa3, 0x73, 0x54,
25927 ++ 0xf7, 0x96, 0x00, 0xad, 0x13, 0xa1, 0xd9, 0x9a,
25928 ++ 0xac, 0x48, 0x4d, 0x58, 0x01, 0x78, 0x02, 0xc2
25929 ++};
25930 ++static const u8 enc_output063[] __initconst = {
25931 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25932 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25933 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25934 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25935 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25936 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25937 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25938 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25939 ++ 0xc0, 0x3d, 0x9f, 0x67, 0x35, 0x4a, 0x97, 0xb2,
25940 ++ 0xf0, 0x74, 0xf7, 0x55, 0x15, 0x57, 0xe4, 0x9c
25941 ++};
25942 ++static const u8 enc_assoc063[] __initconst = {
25943 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25944 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f
25945 ++};
25946 ++static const u8 enc_nonce063[] __initconst = {
25947 ++ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
25948 ++};
25949 ++static const u8 enc_key063[] __initconst = {
25950 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
25951 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
25952 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
25953 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
25954 ++};
25955 ++
25956 ++/* wycheproof - misc */
25957 ++static const u8 enc_input064[] __initconst = {
25958 ++ 0xda, 0x92, 0xbf, 0xf7, 0x7f, 0x6b, 0xe8, 0xfc,
25959 ++ 0xaa, 0x2c, 0xfb, 0xfb, 0x9b, 0xbc, 0x01, 0x97,
25960 ++ 0x20, 0x66, 0xb8, 0x7c, 0xfc, 0x04, 0xc4, 0x04,
25961 ++ 0x7f, 0x1f, 0xcf, 0xc1, 0x14, 0x2c, 0xd6, 0xc1,
25962 ++ 0x1c, 0x43, 0x24, 0x24, 0xe1, 0x21, 0x03, 0x81,
25963 ++ 0x74, 0x32, 0x5e, 0xc9, 0x5e, 0xa3, 0x73, 0x54,
25964 ++ 0xf7, 0x96, 0x00, 0xad, 0x13, 0xa1, 0xd9, 0x9a,
25965 ++ 0xac, 0x48, 0x4d, 0x58, 0x01, 0x78, 0x02, 0xc2,
25966 ++ 0x85, 0x25, 0xbb, 0x3d, 0xbd, 0x96, 0x40, 0x85,
25967 ++ 0xaa, 0xd8, 0x0d, 0x0f, 0x53, 0x09, 0x7a, 0x7d,
25968 ++ 0x48, 0xb3, 0xa5, 0x9d, 0x19, 0xf3, 0xfa, 0xff,
25969 ++ 0x67, 0xe5, 0xb6, 0x47, 0xba, 0x6c, 0x6d, 0xbb,
25970 ++ 0x64, 0x4d, 0x0d, 0xfb, 0x49, 0xb9, 0x10, 0xb8,
25971 ++ 0x0c, 0x0f, 0x4e, 0x49, 0xe2, 0x3c, 0xb7, 0x92,
25972 ++ 0x88, 0x2c, 0xf4, 0xba, 0x89, 0x6d, 0x12, 0x47,
25973 ++ 0x04, 0x53, 0xfe, 0xf7, 0xc7, 0xfb, 0x77, 0xb8
25974 ++};
25975 ++static const u8 enc_output064[] __initconst = {
25976 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25977 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25978 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25979 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25980 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25981 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25982 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25983 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25984 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25985 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25986 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25987 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25988 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25989 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25990 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25991 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25992 ++ 0xc8, 0x6d, 0xa8, 0xdd, 0x65, 0x22, 0x86, 0xd5,
25993 ++ 0x02, 0x13, 0xd3, 0x28, 0xd6, 0x3e, 0x40, 0x06
25994 ++};
25995 ++static const u8 enc_assoc064[] __initconst = {
25996 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
25997 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f
25998 ++};
25999 ++static const u8 enc_nonce064[] __initconst = {
26000 ++ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
26001 ++};
26002 ++static const u8 enc_key064[] __initconst = {
26003 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
26004 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
26005 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
26006 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
26007 ++};
26008 ++
26009 ++/* wycheproof - misc */
26010 ++static const u8 enc_input065[] __initconst = {
26011 ++ 0x5a, 0x92, 0xbf, 0x77, 0xff, 0x6b, 0xe8, 0x7c,
26012 ++ 0x2a, 0x2c, 0xfb, 0x7b, 0x1b, 0xbc, 0x01, 0x17,
26013 ++ 0xa0, 0x66, 0xb8, 0xfc, 0x7c, 0x04, 0xc4, 0x84,
26014 ++ 0xff, 0x1f, 0xcf, 0x41, 0x94, 0x2c, 0xd6, 0x41
26015 ++};
26016 ++static const u8 enc_output065[] __initconst = {
26017 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26018 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26019 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26020 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26021 ++ 0xbe, 0xde, 0x90, 0x83, 0xce, 0xb3, 0x6d, 0xdf,
26022 ++ 0xe5, 0xfa, 0x81, 0x1f, 0x95, 0x47, 0x1c, 0x67
26023 ++};
26024 ++static const u8 enc_assoc065[] __initconst = {
26025 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26026 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff
26027 ++};
26028 ++static const u8 enc_nonce065[] __initconst = {
26029 ++ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
26030 ++};
26031 ++static const u8 enc_key065[] __initconst = {
26032 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
26033 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
26034 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
26035 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
26036 ++};
26037 ++
26038 ++/* wycheproof - misc */
26039 ++static const u8 enc_input066[] __initconst = {
26040 ++ 0x5a, 0x92, 0xbf, 0x77, 0xff, 0x6b, 0xe8, 0x7c,
26041 ++ 0x2a, 0x2c, 0xfb, 0x7b, 0x1b, 0xbc, 0x01, 0x17,
26042 ++ 0xa0, 0x66, 0xb8, 0xfc, 0x7c, 0x04, 0xc4, 0x84,
26043 ++ 0xff, 0x1f, 0xcf, 0x41, 0x94, 0x2c, 0xd6, 0x41,
26044 ++ 0x9c, 0x43, 0x24, 0xa4, 0x61, 0x21, 0x03, 0x01,
26045 ++ 0xf4, 0x32, 0x5e, 0x49, 0xde, 0xa3, 0x73, 0xd4,
26046 ++ 0x77, 0x96, 0x00, 0x2d, 0x93, 0xa1, 0xd9, 0x1a,
26047 ++ 0x2c, 0x48, 0x4d, 0xd8, 0x81, 0x78, 0x02, 0x42
26048 ++};
26049 ++static const u8 enc_output066[] __initconst = {
26050 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26051 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26052 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26053 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26054 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26055 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26056 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26057 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26058 ++ 0x30, 0x08, 0x74, 0xbb, 0x06, 0x92, 0xb6, 0x89,
26059 ++ 0xde, 0xad, 0x9a, 0xe1, 0x5b, 0x06, 0x73, 0x90
26060 ++};
26061 ++static const u8 enc_assoc066[] __initconst = {
26062 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26063 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff
26064 ++};
26065 ++static const u8 enc_nonce066[] __initconst = {
26066 ++ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
26067 ++};
26068 ++static const u8 enc_key066[] __initconst = {
26069 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
26070 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
26071 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
26072 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
26073 ++};
26074 ++
26075 ++/* wycheproof - misc */
26076 ++static const u8 enc_input067[] __initconst = {
26077 ++ 0x5a, 0x92, 0xbf, 0x77, 0xff, 0x6b, 0xe8, 0x7c,
26078 ++ 0x2a, 0x2c, 0xfb, 0x7b, 0x1b, 0xbc, 0x01, 0x17,
26079 ++ 0xa0, 0x66, 0xb8, 0xfc, 0x7c, 0x04, 0xc4, 0x84,
26080 ++ 0xff, 0x1f, 0xcf, 0x41, 0x94, 0x2c, 0xd6, 0x41,
26081 ++ 0x9c, 0x43, 0x24, 0xa4, 0x61, 0x21, 0x03, 0x01,
26082 ++ 0xf4, 0x32, 0x5e, 0x49, 0xde, 0xa3, 0x73, 0xd4,
26083 ++ 0x77, 0x96, 0x00, 0x2d, 0x93, 0xa1, 0xd9, 0x1a,
26084 ++ 0x2c, 0x48, 0x4d, 0xd8, 0x81, 0x78, 0x02, 0x42,
26085 ++ 0x05, 0x25, 0xbb, 0xbd, 0x3d, 0x96, 0x40, 0x05,
26086 ++ 0x2a, 0xd8, 0x0d, 0x8f, 0xd3, 0x09, 0x7a, 0xfd,
26087 ++ 0xc8, 0xb3, 0xa5, 0x1d, 0x99, 0xf3, 0xfa, 0x7f,
26088 ++ 0xe7, 0xe5, 0xb6, 0xc7, 0x3a, 0x6c, 0x6d, 0x3b,
26089 ++ 0xe4, 0x4d, 0x0d, 0x7b, 0xc9, 0xb9, 0x10, 0x38,
26090 ++ 0x8c, 0x0f, 0x4e, 0xc9, 0x62, 0x3c, 0xb7, 0x12,
26091 ++ 0x08, 0x2c, 0xf4, 0x3a, 0x09, 0x6d, 0x12, 0xc7,
26092 ++ 0x84, 0x53, 0xfe, 0x77, 0x47, 0xfb, 0x77, 0x38
26093 ++};
26094 ++static const u8 enc_output067[] __initconst = {
26095 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26096 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26097 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26098 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26099 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26100 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26101 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26102 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26103 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26104 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26105 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26106 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26107 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26108 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26109 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26110 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26111 ++ 0x99, 0xca, 0xd8, 0x5f, 0x45, 0xca, 0x40, 0x94,
26112 ++ 0x2d, 0x0d, 0x4d, 0x5e, 0x95, 0x0a, 0xde, 0x22
26113 ++};
26114 ++static const u8 enc_assoc067[] __initconst = {
26115 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff,
26116 ++ 0x7f, 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff
26117 ++};
26118 ++static const u8 enc_nonce067[] __initconst = {
26119 ++ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
26120 ++};
26121 ++static const u8 enc_key067[] __initconst = {
26122 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
26123 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
26124 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
26125 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
26126 ++};
26127 ++
26128 ++/* wycheproof - misc */
26129 ++static const u8 enc_input068[] __initconst = {
26130 ++ 0x25, 0x6d, 0x40, 0x88, 0x7f, 0x6b, 0xe8, 0x7c,
26131 ++ 0x55, 0xd3, 0x04, 0x84, 0x9b, 0xbc, 0x01, 0x17,
26132 ++ 0xdf, 0x99, 0x47, 0x03, 0xfc, 0x04, 0xc4, 0x84,
26133 ++ 0x80, 0xe0, 0x30, 0xbe, 0x14, 0x2c, 0xd6, 0x41
26134 ++};
26135 ++static const u8 enc_output068[] __initconst = {
26136 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26137 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26138 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26139 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26140 ++ 0x8b, 0xbe, 0x14, 0x52, 0x72, 0xe7, 0xc2, 0xd9,
26141 ++ 0xa1, 0x89, 0x1a, 0x3a, 0xb0, 0x98, 0x3d, 0x9d
26142 ++};
26143 ++static const u8 enc_assoc068[] __initconst = {
26144 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26145 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff
26146 ++};
26147 ++static const u8 enc_nonce068[] __initconst = {
26148 ++ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
26149 ++};
26150 ++static const u8 enc_key068[] __initconst = {
26151 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
26152 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
26153 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
26154 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
26155 ++};
26156 ++
26157 ++/* wycheproof - misc */
26158 ++static const u8 enc_input069[] __initconst = {
26159 ++ 0x25, 0x6d, 0x40, 0x88, 0x7f, 0x6b, 0xe8, 0x7c,
26160 ++ 0x55, 0xd3, 0x04, 0x84, 0x9b, 0xbc, 0x01, 0x17,
26161 ++ 0xdf, 0x99, 0x47, 0x03, 0xfc, 0x04, 0xc4, 0x84,
26162 ++ 0x80, 0xe0, 0x30, 0xbe, 0x14, 0x2c, 0xd6, 0x41,
26163 ++ 0xe3, 0xbc, 0xdb, 0x5b, 0xe1, 0x21, 0x03, 0x01,
26164 ++ 0x8b, 0xcd, 0xa1, 0xb6, 0x5e, 0xa3, 0x73, 0xd4,
26165 ++ 0x08, 0x69, 0xff, 0xd2, 0x13, 0xa1, 0xd9, 0x1a,
26166 ++ 0x53, 0xb7, 0xb2, 0x27, 0x01, 0x78, 0x02, 0x42
26167 ++};
26168 ++static const u8 enc_output069[] __initconst = {
26169 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26170 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26171 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26172 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26173 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26174 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26175 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26176 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26177 ++ 0x3b, 0x41, 0x86, 0x19, 0x13, 0xa8, 0xf6, 0xde,
26178 ++ 0x7f, 0x61, 0xe2, 0x25, 0x63, 0x1b, 0xc3, 0x82
26179 ++};
26180 ++static const u8 enc_assoc069[] __initconst = {
26181 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26182 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff
26183 ++};
26184 ++static const u8 enc_nonce069[] __initconst = {
26185 ++ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
26186 ++};
26187 ++static const u8 enc_key069[] __initconst = {
26188 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
26189 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
26190 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
26191 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
26192 ++};
26193 ++
26194 ++/* wycheproof - misc */
26195 ++static const u8 enc_input070[] __initconst = {
26196 ++ 0x25, 0x6d, 0x40, 0x88, 0x7f, 0x6b, 0xe8, 0x7c,
26197 ++ 0x55, 0xd3, 0x04, 0x84, 0x9b, 0xbc, 0x01, 0x17,
26198 ++ 0xdf, 0x99, 0x47, 0x03, 0xfc, 0x04, 0xc4, 0x84,
26199 ++ 0x80, 0xe0, 0x30, 0xbe, 0x14, 0x2c, 0xd6, 0x41,
26200 ++ 0xe3, 0xbc, 0xdb, 0x5b, 0xe1, 0x21, 0x03, 0x01,
26201 ++ 0x8b, 0xcd, 0xa1, 0xb6, 0x5e, 0xa3, 0x73, 0xd4,
26202 ++ 0x08, 0x69, 0xff, 0xd2, 0x13, 0xa1, 0xd9, 0x1a,
26203 ++ 0x53, 0xb7, 0xb2, 0x27, 0x01, 0x78, 0x02, 0x42,
26204 ++ 0x7a, 0xda, 0x44, 0x42, 0xbd, 0x96, 0x40, 0x05,
26205 ++ 0x55, 0x27, 0xf2, 0x70, 0x53, 0x09, 0x7a, 0xfd,
26206 ++ 0xb7, 0x4c, 0x5a, 0xe2, 0x19, 0xf3, 0xfa, 0x7f,
26207 ++ 0x98, 0x1a, 0x49, 0x38, 0xba, 0x6c, 0x6d, 0x3b,
26208 ++ 0x9b, 0xb2, 0xf2, 0x84, 0x49, 0xb9, 0x10, 0x38,
26209 ++ 0xf3, 0xf0, 0xb1, 0x36, 0xe2, 0x3c, 0xb7, 0x12,
26210 ++ 0x77, 0xd3, 0x0b, 0xc5, 0x89, 0x6d, 0x12, 0xc7,
26211 ++ 0xfb, 0xac, 0x01, 0x88, 0xc7, 0xfb, 0x77, 0x38
26212 ++};
26213 ++static const u8 enc_output070[] __initconst = {
26214 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26215 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26216 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26217 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26218 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26219 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26220 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26221 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26222 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26223 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26224 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26225 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26226 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26227 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26228 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26229 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26230 ++ 0x84, 0x28, 0xbc, 0xf0, 0x23, 0xec, 0x6b, 0xf3,
26231 ++ 0x1f, 0xd9, 0xef, 0xb2, 0x03, 0xff, 0x08, 0x71
26232 ++};
26233 ++static const u8 enc_assoc070[] __initconst = {
26234 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
26235 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff
26236 ++};
26237 ++static const u8 enc_nonce070[] __initconst = {
26238 ++ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
26239 ++};
26240 ++static const u8 enc_key070[] __initconst = {
26241 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
26242 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
26243 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
26244 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
26245 ++};
26246 ++
26247 ++/* wycheproof - misc */
26248 ++static const u8 enc_input071[] __initconst = {
26249 ++ 0xda, 0x92, 0xbf, 0x77, 0x80, 0x94, 0x17, 0x83,
26250 ++ 0xaa, 0x2c, 0xfb, 0x7b, 0x64, 0x43, 0xfe, 0xe8,
26251 ++ 0x20, 0x66, 0xb8, 0xfc, 0x03, 0xfb, 0x3b, 0x7b,
26252 ++ 0x7f, 0x1f, 0xcf, 0x41, 0xeb, 0xd3, 0x29, 0xbe
26253 ++};
26254 ++static const u8 enc_output071[] __initconst = {
26255 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26256 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26257 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26258 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26259 ++ 0x13, 0x9f, 0xdf, 0x64, 0x74, 0xea, 0x24, 0xf5,
26260 ++ 0x49, 0xb0, 0x75, 0x82, 0x5f, 0x2c, 0x76, 0x20
26261 ++};
26262 ++static const u8 enc_assoc071[] __initconst = {
26263 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26264 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00
26265 ++};
26266 ++static const u8 enc_nonce071[] __initconst = {
26267 ++ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
26268 ++};
26269 ++static const u8 enc_key071[] __initconst = {
26270 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
26271 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
26272 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
26273 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
26274 ++};
26275 ++
26276 ++/* wycheproof - misc */
26277 ++static const u8 enc_input072[] __initconst = {
26278 ++ 0xda, 0x92, 0xbf, 0x77, 0x80, 0x94, 0x17, 0x83,
26279 ++ 0xaa, 0x2c, 0xfb, 0x7b, 0x64, 0x43, 0xfe, 0xe8,
26280 ++ 0x20, 0x66, 0xb8, 0xfc, 0x03, 0xfb, 0x3b, 0x7b,
26281 ++ 0x7f, 0x1f, 0xcf, 0x41, 0xeb, 0xd3, 0x29, 0xbe,
26282 ++ 0x1c, 0x43, 0x24, 0xa4, 0x1e, 0xde, 0xfc, 0xfe,
26283 ++ 0x74, 0x32, 0x5e, 0x49, 0xa1, 0x5c, 0x8c, 0x2b,
26284 ++ 0xf7, 0x96, 0x00, 0x2d, 0xec, 0x5e, 0x26, 0xe5,
26285 ++ 0xac, 0x48, 0x4d, 0xd8, 0xfe, 0x87, 0xfd, 0xbd
26286 ++};
26287 ++static const u8 enc_output072[] __initconst = {
26288 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26289 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26290 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26291 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26292 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26293 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26294 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26295 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26296 ++ 0xbb, 0xad, 0x8d, 0x86, 0x3b, 0x83, 0x5a, 0x8e,
26297 ++ 0x86, 0x64, 0xfd, 0x1d, 0x45, 0x66, 0xb6, 0xb4
26298 ++};
26299 ++static const u8 enc_assoc072[] __initconst = {
26300 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26301 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00
26302 ++};
26303 ++static const u8 enc_nonce072[] __initconst = {
26304 ++ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
26305 ++};
26306 ++static const u8 enc_key072[] __initconst = {
26307 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
26308 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
26309 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
26310 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
26311 ++};
26312 ++
26313 ++/* wycheproof - misc */
26314 ++static const u8 enc_input073[] __initconst = {
26315 ++ 0xda, 0x92, 0xbf, 0x77, 0x80, 0x94, 0x17, 0x83,
26316 ++ 0xaa, 0x2c, 0xfb, 0x7b, 0x64, 0x43, 0xfe, 0xe8,
26317 ++ 0x20, 0x66, 0xb8, 0xfc, 0x03, 0xfb, 0x3b, 0x7b,
26318 ++ 0x7f, 0x1f, 0xcf, 0x41, 0xeb, 0xd3, 0x29, 0xbe,
26319 ++ 0x1c, 0x43, 0x24, 0xa4, 0x1e, 0xde, 0xfc, 0xfe,
26320 ++ 0x74, 0x32, 0x5e, 0x49, 0xa1, 0x5c, 0x8c, 0x2b,
26321 ++ 0xf7, 0x96, 0x00, 0x2d, 0xec, 0x5e, 0x26, 0xe5,
26322 ++ 0xac, 0x48, 0x4d, 0xd8, 0xfe, 0x87, 0xfd, 0xbd,
26323 ++ 0x85, 0x25, 0xbb, 0xbd, 0x42, 0x69, 0xbf, 0xfa,
26324 ++ 0xaa, 0xd8, 0x0d, 0x8f, 0xac, 0xf6, 0x85, 0x02,
26325 ++ 0x48, 0xb3, 0xa5, 0x1d, 0xe6, 0x0c, 0x05, 0x80,
26326 ++ 0x67, 0xe5, 0xb6, 0xc7, 0x45, 0x93, 0x92, 0xc4,
26327 ++ 0x64, 0x4d, 0x0d, 0x7b, 0xb6, 0x46, 0xef, 0xc7,
26328 ++ 0x0c, 0x0f, 0x4e, 0xc9, 0x1d, 0xc3, 0x48, 0xed,
26329 ++ 0x88, 0x2c, 0xf4, 0x3a, 0x76, 0x92, 0xed, 0x38,
26330 ++ 0x04, 0x53, 0xfe, 0x77, 0x38, 0x04, 0x88, 0xc7
26331 ++};
26332 ++static const u8 enc_output073[] __initconst = {
26333 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26334 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26335 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26336 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26337 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26338 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26339 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26340 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26341 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26342 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26343 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26344 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26345 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26346 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26347 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26348 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26349 ++ 0x42, 0xf2, 0x35, 0x42, 0x97, 0x84, 0x9a, 0x51,
26350 ++ 0x1d, 0x53, 0xe5, 0x57, 0x17, 0x72, 0xf7, 0x1f
26351 ++};
26352 ++static const u8 enc_assoc073[] __initconst = {
26353 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
26354 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00
26355 ++};
26356 ++static const u8 enc_nonce073[] __initconst = {
26357 ++ 0x00, 0x00, 0x00, 0x00, 0x01, 0xee, 0x32, 0x00
26358 ++};
26359 ++static const u8 enc_key073[] __initconst = {
26360 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
26361 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
26362 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
26363 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
26364 ++};
26365 ++
26366 ++/* wycheproof - checking for int overflows */
26367 ++static const u8 enc_input076[] __initconst = {
26368 ++ 0x1b, 0x99, 0x6f, 0x9a, 0x3c, 0xcc, 0x67, 0x85,
26369 ++ 0xde, 0x22, 0xff, 0x5b, 0x8a, 0xdd, 0x95, 0x02,
26370 ++ 0xce, 0x03, 0xa0, 0xfa, 0xf5, 0x99, 0x2a, 0x09,
26371 ++ 0x52, 0x2c, 0xdd, 0x12, 0x06, 0xd2, 0x20, 0xb8,
26372 ++ 0xf8, 0xbd, 0x07, 0xd1, 0xf1, 0xf5, 0xa1, 0xbd,
26373 ++ 0x9a, 0x71, 0xd1, 0x1c, 0x7f, 0x57, 0x9b, 0x85,
26374 ++ 0x58, 0x18, 0xc0, 0x8d, 0x4d, 0xe0, 0x36, 0x39,
26375 ++ 0x31, 0x83, 0xb7, 0xf5, 0x90, 0xb3, 0x35, 0xae,
26376 ++ 0xd8, 0xde, 0x5b, 0x57, 0xb1, 0x3c, 0x5f, 0xed,
26377 ++ 0xe2, 0x44, 0x1c, 0x3e, 0x18, 0x4a, 0xa9, 0xd4,
26378 ++ 0x6e, 0x61, 0x59, 0x85, 0x06, 0xb3, 0xe1, 0x1c,
26379 ++ 0x43, 0xc6, 0x2c, 0xbc, 0xac, 0xec, 0xed, 0x33,
26380 ++ 0x19, 0x08, 0x75, 0xb0, 0x12, 0x21, 0x8b, 0x19,
26381 ++ 0x30, 0xfb, 0x7c, 0x38, 0xec, 0x45, 0xac, 0x11,
26382 ++ 0xc3, 0x53, 0xd0, 0xcf, 0x93, 0x8d, 0xcc, 0xb9,
26383 ++ 0xef, 0xad, 0x8f, 0xed, 0xbe, 0x46, 0xda, 0xa5
26384 ++};
26385 ++static const u8 enc_output076[] __initconst = {
26386 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26387 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26388 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26389 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26390 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26391 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26392 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26393 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26394 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26395 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26396 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26397 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26398 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26399 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26400 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26401 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26402 ++ 0x4b, 0x0b, 0xda, 0x8a, 0xd0, 0x43, 0x83, 0x0d,
26403 ++ 0x83, 0x19, 0xab, 0x82, 0xc5, 0x0c, 0x76, 0x63
26404 ++};
26405 ++static const u8 enc_assoc076[] __initconst = {
26406 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26407 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26408 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26409 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26410 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26411 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26412 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26413 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
26414 ++};
26415 ++static const u8 enc_nonce076[] __initconst = {
26416 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0xb4, 0xf0
26417 ++};
26418 ++static const u8 enc_key076[] __initconst = {
26419 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
26420 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
26421 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
26422 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30
26423 ++};
26424 ++
26425 ++/* wycheproof - checking for int overflows */
26426 ++static const u8 enc_input077[] __initconst = {
26427 ++ 0x86, 0xcb, 0xac, 0xae, 0x4d, 0x3f, 0x74, 0xae,
26428 ++ 0x01, 0x21, 0x3e, 0x05, 0x51, 0xcc, 0x15, 0x16,
26429 ++ 0x0e, 0xa1, 0xbe, 0x84, 0x08, 0xe3, 0xd5, 0xd7,
26430 ++ 0x4f, 0x01, 0x46, 0x49, 0x95, 0xa6, 0x9e, 0x61,
26431 ++ 0x76, 0xcb, 0x9e, 0x02, 0xb2, 0x24, 0x7e, 0xd2,
26432 ++ 0x99, 0x89, 0x2f, 0x91, 0x82, 0xa4, 0x5c, 0xaf,
26433 ++ 0x4c, 0x69, 0x40, 0x56, 0x11, 0x76, 0x6e, 0xdf,
26434 ++ 0xaf, 0xdc, 0x28, 0x55, 0x19, 0xea, 0x30, 0x48,
26435 ++ 0x0c, 0x44, 0xf0, 0x5e, 0x78, 0x1e, 0xac, 0xf8,
26436 ++ 0xfc, 0xec, 0xc7, 0x09, 0x0a, 0xbb, 0x28, 0xfa,
26437 ++ 0x5f, 0xd5, 0x85, 0xac, 0x8c, 0xda, 0x7e, 0x87,
26438 ++ 0x72, 0xe5, 0x94, 0xe4, 0xce, 0x6c, 0x88, 0x32,
26439 ++ 0x81, 0x93, 0x2e, 0x0f, 0x89, 0xf8, 0x77, 0xa1,
26440 ++ 0xf0, 0x4d, 0x9c, 0x32, 0xb0, 0x6c, 0xf9, 0x0b,
26441 ++ 0x0e, 0x76, 0x2b, 0x43, 0x0c, 0x4d, 0x51, 0x7c,
26442 ++ 0x97, 0x10, 0x70, 0x68, 0xf4, 0x98, 0xef, 0x7f
26443 ++};
26444 ++static const u8 enc_output077[] __initconst = {
26445 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26446 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26447 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26448 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26449 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26450 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26451 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26452 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26453 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26454 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26455 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26456 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26457 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26458 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26459 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26460 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26461 ++ 0x4b, 0xc9, 0x8f, 0x72, 0xc4, 0x94, 0xc2, 0xa4,
26462 ++ 0x3c, 0x2b, 0x15, 0xa1, 0x04, 0x3f, 0x1c, 0xfa
26463 ++};
26464 ++static const u8 enc_assoc077[] __initconst = {
26465 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26466 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26467 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26468 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26469 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26470 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26471 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26472 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
26473 ++};
26474 ++static const u8 enc_nonce077[] __initconst = {
26475 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0xfb, 0x66
26476 ++};
26477 ++static const u8 enc_key077[] __initconst = {
26478 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
26479 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
26480 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
26481 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30
26482 ++};
26483 ++
26484 ++/* wycheproof - checking for int overflows */
26485 ++static const u8 enc_input078[] __initconst = {
26486 ++ 0xfa, 0xb1, 0xcd, 0xdf, 0x4f, 0xe1, 0x98, 0xef,
26487 ++ 0x63, 0xad, 0xd8, 0x81, 0xd6, 0xea, 0xd6, 0xc5,
26488 ++ 0x76, 0x37, 0xbb, 0xe9, 0x20, 0x18, 0xca, 0x7c,
26489 ++ 0x0b, 0x96, 0xfb, 0xa0, 0x87, 0x1e, 0x93, 0x2d,
26490 ++ 0xb1, 0xfb, 0xf9, 0x07, 0x61, 0xbe, 0x25, 0xdf,
26491 ++ 0x8d, 0xfa, 0xf9, 0x31, 0xce, 0x57, 0x57, 0xe6,
26492 ++ 0x17, 0xb3, 0xd7, 0xa9, 0xf0, 0xbf, 0x0f, 0xfe,
26493 ++ 0x5d, 0x59, 0x1a, 0x33, 0xc1, 0x43, 0xb8, 0xf5,
26494 ++ 0x3f, 0xd0, 0xb5, 0xa1, 0x96, 0x09, 0xfd, 0x62,
26495 ++ 0xe5, 0xc2, 0x51, 0xa4, 0x28, 0x1a, 0x20, 0x0c,
26496 ++ 0xfd, 0xc3, 0x4f, 0x28, 0x17, 0x10, 0x40, 0x6f,
26497 ++ 0x4e, 0x37, 0x62, 0x54, 0x46, 0xff, 0x6e, 0xf2,
26498 ++ 0x24, 0x91, 0x3d, 0xeb, 0x0d, 0x89, 0xaf, 0x33,
26499 ++ 0x71, 0x28, 0xe3, 0xd1, 0x55, 0xd1, 0x6d, 0x3e,
26500 ++ 0xc3, 0x24, 0x60, 0x41, 0x43, 0x21, 0x43, 0xe9,
26501 ++ 0xab, 0x3a, 0x6d, 0x2c, 0xcc, 0x2f, 0x4d, 0x62
26502 ++};
26503 ++static const u8 enc_output078[] __initconst = {
26504 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26505 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26506 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26507 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26508 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26509 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26510 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26511 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26512 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26513 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26514 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26515 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26516 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26517 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26518 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26519 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26520 ++ 0xf7, 0xe9, 0xe1, 0x51, 0xb0, 0x25, 0x33, 0xc7,
26521 ++ 0x46, 0x58, 0xbf, 0xc7, 0x73, 0x7c, 0x68, 0x0d
26522 ++};
26523 ++static const u8 enc_assoc078[] __initconst = {
26524 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26525 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26526 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26527 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26528 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26529 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26530 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26531 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
26532 ++};
26533 ++static const u8 enc_nonce078[] __initconst = {
26534 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0xbb, 0x90
26535 ++};
26536 ++static const u8 enc_key078[] __initconst = {
26537 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
26538 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
26539 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
26540 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30
26541 ++};
26542 ++
26543 ++/* wycheproof - checking for int overflows */
26544 ++static const u8 enc_input079[] __initconst = {
26545 ++ 0x22, 0x72, 0x02, 0xbe, 0x7f, 0x35, 0x15, 0xe9,
26546 ++ 0xd1, 0xc0, 0x2e, 0xea, 0x2f, 0x19, 0x50, 0xb6,
26547 ++ 0x48, 0x1b, 0x04, 0x8a, 0x4c, 0x91, 0x50, 0x6c,
26548 ++ 0xb4, 0x0d, 0x50, 0x4e, 0x6c, 0x94, 0x9f, 0x82,
26549 ++ 0xd1, 0x97, 0xc2, 0x5a, 0xd1, 0x7d, 0xc7, 0x21,
26550 ++ 0x65, 0x11, 0x25, 0x78, 0x2a, 0xc7, 0xa7, 0x12,
26551 ++ 0x47, 0xfe, 0xae, 0xf3, 0x2f, 0x1f, 0x25, 0x0c,
26552 ++ 0xe4, 0xbb, 0x8f, 0x79, 0xac, 0xaa, 0x17, 0x9d,
26553 ++ 0x45, 0xa7, 0xb0, 0x54, 0x5f, 0x09, 0x24, 0x32,
26554 ++ 0x5e, 0xfa, 0x87, 0xd5, 0xe4, 0x41, 0xd2, 0x84,
26555 ++ 0x78, 0xc6, 0x1f, 0x22, 0x23, 0xee, 0x67, 0xc3,
26556 ++ 0xb4, 0x1f, 0x43, 0x94, 0x53, 0x5e, 0x2a, 0x24,
26557 ++ 0x36, 0x9a, 0x2e, 0x16, 0x61, 0x3c, 0x45, 0x94,
26558 ++ 0x90, 0xc1, 0x4f, 0xb1, 0xd7, 0x55, 0xfe, 0x53,
26559 ++ 0xfb, 0xe1, 0xee, 0x45, 0xb1, 0xb2, 0x1f, 0x71,
26560 ++ 0x62, 0xe2, 0xfc, 0xaa, 0x74, 0x2a, 0xbe, 0xfd
26561 ++};
26562 ++static const u8 enc_output079[] __initconst = {
26563 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26564 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26565 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26566 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26567 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26568 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26569 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26570 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26571 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26572 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26573 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26574 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26575 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26576 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26577 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26578 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26579 ++ 0x79, 0x5b, 0xcf, 0xf6, 0x47, 0xc5, 0x53, 0xc2,
26580 ++ 0xe4, 0xeb, 0x6e, 0x0e, 0xaf, 0xd9, 0xe0, 0x4e
26581 ++};
26582 ++static const u8 enc_assoc079[] __initconst = {
26583 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26584 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26585 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26586 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26587 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26588 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26589 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26590 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
26591 ++};
26592 ++static const u8 enc_nonce079[] __initconst = {
26593 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x48, 0x4a
26594 ++};
26595 ++static const u8 enc_key079[] __initconst = {
26596 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
26597 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
26598 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
26599 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30
26600 ++};
26601 ++
26602 ++/* wycheproof - checking for int overflows */
26603 ++static const u8 enc_input080[] __initconst = {
26604 ++ 0xfa, 0xe5, 0x83, 0x45, 0xc1, 0x6c, 0xb0, 0xf5,
26605 ++ 0xcc, 0x53, 0x7f, 0x2b, 0x1b, 0x34, 0x69, 0xc9,
26606 ++ 0x69, 0x46, 0x3b, 0x3e, 0xa7, 0x1b, 0xcf, 0x6b,
26607 ++ 0x98, 0xd6, 0x69, 0xa8, 0xe6, 0x0e, 0x04, 0xfc,
26608 ++ 0x08, 0xd5, 0xfd, 0x06, 0x9c, 0x36, 0x26, 0x38,
26609 ++ 0xe3, 0x40, 0x0e, 0xf4, 0xcb, 0x24, 0x2e, 0x27,
26610 ++ 0xe2, 0x24, 0x5e, 0x68, 0xcb, 0x9e, 0xc5, 0x83,
26611 ++ 0xda, 0x53, 0x40, 0xb1, 0x2e, 0xdf, 0x42, 0x3b,
26612 ++ 0x73, 0x26, 0xad, 0x20, 0xfe, 0xeb, 0x57, 0xda,
26613 ++ 0xca, 0x2e, 0x04, 0x67, 0xa3, 0x28, 0x99, 0xb4,
26614 ++ 0x2d, 0xf8, 0xe5, 0x6d, 0x84, 0xe0, 0x06, 0xbc,
26615 ++ 0x8a, 0x7a, 0xcc, 0x73, 0x1e, 0x7c, 0x1f, 0x6b,
26616 ++ 0xec, 0xb5, 0x71, 0x9f, 0x70, 0x77, 0xf0, 0xd4,
26617 ++ 0xf4, 0xc6, 0x1a, 0xb1, 0x1e, 0xba, 0xc1, 0x00,
26618 ++ 0x18, 0x01, 0xce, 0x33, 0xc4, 0xe4, 0xa7, 0x7d,
26619 ++ 0x83, 0x1d, 0x3c, 0xe3, 0x4e, 0x84, 0x10, 0xe1
26620 ++};
26621 ++static const u8 enc_output080[] __initconst = {
26622 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26623 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26624 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26625 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26626 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26627 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26628 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26629 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26630 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26631 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26632 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26633 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26634 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26635 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26636 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26637 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26638 ++ 0x19, 0x46, 0xd6, 0x53, 0x96, 0x0f, 0x94, 0x7a,
26639 ++ 0x74, 0xd3, 0xe8, 0x09, 0x3c, 0xf4, 0x85, 0x02
26640 ++};
26641 ++static const u8 enc_assoc080[] __initconst = {
26642 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26643 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26644 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26645 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26646 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26647 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26648 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26649 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
26650 ++};
26651 ++static const u8 enc_nonce080[] __initconst = {
26652 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x93, 0x2f, 0x40
26653 ++};
26654 ++static const u8 enc_key080[] __initconst = {
26655 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
26656 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
26657 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
26658 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30
26659 ++};
26660 ++
26661 ++/* wycheproof - checking for int overflows */
26662 ++static const u8 enc_input081[] __initconst = {
26663 ++ 0xeb, 0xb2, 0x16, 0xdd, 0xd7, 0xca, 0x70, 0x92,
26664 ++ 0x15, 0xf5, 0x03, 0xdf, 0x9c, 0xe6, 0x3c, 0x5c,
26665 ++ 0xd2, 0x19, 0x4e, 0x7d, 0x90, 0x99, 0xe8, 0xa9,
26666 ++ 0x0b, 0x2a, 0xfa, 0xad, 0x5e, 0xba, 0x35, 0x06,
26667 ++ 0x99, 0x25, 0xa6, 0x03, 0xfd, 0xbc, 0x34, 0x1a,
26668 ++ 0xae, 0xd4, 0x15, 0x05, 0xb1, 0x09, 0x41, 0xfa,
26669 ++ 0x38, 0x56, 0xa7, 0xe2, 0x47, 0xb1, 0x04, 0x07,
26670 ++ 0x09, 0x74, 0x6c, 0xfc, 0x20, 0x96, 0xca, 0xa6,
26671 ++ 0x31, 0xb2, 0xff, 0xf4, 0x1c, 0x25, 0x05, 0x06,
26672 ++ 0xd8, 0x89, 0xc1, 0xc9, 0x06, 0x71, 0xad, 0xe8,
26673 ++ 0x53, 0xee, 0x63, 0x94, 0xc1, 0x91, 0x92, 0xa5,
26674 ++ 0xcf, 0x37, 0x10, 0xd1, 0x07, 0x30, 0x99, 0xe5,
26675 ++ 0xbc, 0x94, 0x65, 0x82, 0xfc, 0x0f, 0xab, 0x9f,
26676 ++ 0x54, 0x3c, 0x71, 0x6a, 0xe2, 0x48, 0x6a, 0x86,
26677 ++ 0x83, 0xfd, 0xca, 0x39, 0xd2, 0xe1, 0x4f, 0x23,
26678 ++ 0xd0, 0x0a, 0x58, 0x26, 0x64, 0xf4, 0xec, 0xb1
26679 ++};
26680 ++static const u8 enc_output081[] __initconst = {
26681 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26682 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26683 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26684 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26685 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26686 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26687 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26688 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26689 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26690 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26691 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26692 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26693 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26694 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26695 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26696 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26697 ++ 0x36, 0xc3, 0x00, 0x29, 0x85, 0xdd, 0x21, 0xba,
26698 ++ 0xf8, 0x95, 0xd6, 0x33, 0x57, 0x3f, 0x12, 0xc0
26699 ++};
26700 ++static const u8 enc_assoc081[] __initconst = {
26701 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26702 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26703 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26704 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26705 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26706 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26707 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26708 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
26709 ++};
26710 ++static const u8 enc_nonce081[] __initconst = {
26711 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0xe2, 0x93, 0x35
26712 ++};
26713 ++static const u8 enc_key081[] __initconst = {
26714 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
26715 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
26716 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
26717 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30
26718 ++};
26719 ++
26720 ++/* wycheproof - checking for int overflows */
26721 ++static const u8 enc_input082[] __initconst = {
26722 ++ 0x40, 0x8a, 0xe6, 0xef, 0x1c, 0x7e, 0xf0, 0xfb,
26723 ++ 0x2c, 0x2d, 0x61, 0x08, 0x16, 0xfc, 0x78, 0x49,
26724 ++ 0xef, 0xa5, 0x8f, 0x78, 0x27, 0x3f, 0x5f, 0x16,
26725 ++ 0x6e, 0xa6, 0x5f, 0x81, 0xb5, 0x75, 0x74, 0x7d,
26726 ++ 0x03, 0x5b, 0x30, 0x40, 0xfe, 0xde, 0x1e, 0xb9,
26727 ++ 0x45, 0x97, 0x88, 0x66, 0x97, 0x88, 0x40, 0x8e,
26728 ++ 0x00, 0x41, 0x3b, 0x3e, 0x37, 0x6d, 0x15, 0x2d,
26729 ++ 0x20, 0x4a, 0xa2, 0xb7, 0xa8, 0x35, 0x58, 0xfc,
26730 ++ 0xd4, 0x8a, 0x0e, 0xf7, 0xa2, 0x6b, 0x1c, 0xd6,
26731 ++ 0xd3, 0x5d, 0x23, 0xb3, 0xf5, 0xdf, 0xe0, 0xca,
26732 ++ 0x77, 0xa4, 0xce, 0x32, 0xb9, 0x4a, 0xbf, 0x83,
26733 ++ 0xda, 0x2a, 0xef, 0xca, 0xf0, 0x68, 0x38, 0x08,
26734 ++ 0x79, 0xe8, 0x9f, 0xb0, 0xa3, 0x82, 0x95, 0x95,
26735 ++ 0xcf, 0x44, 0xc3, 0x85, 0x2a, 0xe2, 0xcc, 0x66,
26736 ++ 0x2b, 0x68, 0x9f, 0x93, 0x55, 0xd9, 0xc1, 0x83,
26737 ++ 0x80, 0x1f, 0x6a, 0xcc, 0x31, 0x3f, 0x89, 0x07
26738 ++};
26739 ++static const u8 enc_output082[] __initconst = {
26740 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26741 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26742 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26743 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26744 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26745 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26746 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26747 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26748 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26749 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26750 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26751 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26752 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26753 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26754 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26755 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26756 ++ 0x65, 0x14, 0x51, 0x8e, 0x0a, 0x26, 0x41, 0x42,
26757 ++ 0xe0, 0xb7, 0x35, 0x1f, 0x96, 0x7f, 0xc2, 0xae
26758 ++};
26759 ++static const u8 enc_assoc082[] __initconst = {
26760 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26761 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26762 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26763 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26764 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26765 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26766 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26767 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
26768 ++};
26769 ++static const u8 enc_nonce082[] __initconst = {
26770 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0xf7, 0xd5
26771 ++};
26772 ++static const u8 enc_key082[] __initconst = {
26773 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
26774 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
26775 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
26776 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
26777 ++};
26778 ++
26779 ++/* wycheproof - checking for int overflows */
26780 ++static const u8 enc_input083[] __initconst = {
26781 ++ 0x0a, 0x0a, 0x24, 0x49, 0x9b, 0xca, 0xde, 0x58,
26782 ++ 0xcf, 0x15, 0x76, 0xc3, 0x12, 0xac, 0xa9, 0x84,
26783 ++ 0x71, 0x8c, 0xb4, 0xcc, 0x7e, 0x01, 0x53, 0xf5,
26784 ++ 0xa9, 0x01, 0x58, 0x10, 0x85, 0x96, 0x44, 0xdf,
26785 ++ 0xc0, 0x21, 0x17, 0x4e, 0x0b, 0x06, 0x0a, 0x39,
26786 ++ 0x74, 0x48, 0xde, 0x8b, 0x48, 0x4a, 0x86, 0x03,
26787 ++ 0xbe, 0x68, 0x0a, 0x69, 0x34, 0xc0, 0x90, 0x6f,
26788 ++ 0x30, 0xdd, 0x17, 0xea, 0xe2, 0xd4, 0xc5, 0xfa,
26789 ++ 0xa7, 0x77, 0xf8, 0xca, 0x53, 0x37, 0x0e, 0x08,
26790 ++ 0x33, 0x1b, 0x88, 0xc3, 0x42, 0xba, 0xc9, 0x59,
26791 ++ 0x78, 0x7b, 0xbb, 0x33, 0x93, 0x0e, 0x3b, 0x56,
26792 ++ 0xbe, 0x86, 0xda, 0x7f, 0x2a, 0x6e, 0xb1, 0xf9,
26793 ++ 0x40, 0x89, 0xd1, 0xd1, 0x81, 0x07, 0x4d, 0x43,
26794 ++ 0x02, 0xf8, 0xe0, 0x55, 0x2d, 0x0d, 0xe1, 0xfa,
26795 ++ 0xb3, 0x06, 0xa2, 0x1b, 0x42, 0xd4, 0xc3, 0xba,
26796 ++ 0x6e, 0x6f, 0x0c, 0xbc, 0xc8, 0x1e, 0x87, 0x7a
26797 ++};
26798 ++static const u8 enc_output083[] __initconst = {
26799 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26800 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26801 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26802 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26803 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26804 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26805 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26806 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26807 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26808 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26809 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26810 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26811 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26812 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26813 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26814 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26815 ++ 0x4c, 0x19, 0x4d, 0xa6, 0xa9, 0x9f, 0xd6, 0x5b,
26816 ++ 0x40, 0xe9, 0xca, 0xd7, 0x98, 0xf4, 0x4b, 0x19
26817 ++};
26818 ++static const u8 enc_assoc083[] __initconst = {
26819 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26820 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26821 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26822 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26823 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26824 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26825 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26826 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
26827 ++};
26828 ++static const u8 enc_nonce083[] __initconst = {
26829 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x3d, 0xfc, 0xe4
26830 ++};
26831 ++static const u8 enc_key083[] __initconst = {
26832 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
26833 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
26834 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
26835 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
26836 ++};
26837 ++
26838 ++/* wycheproof - checking for int overflows */
26839 ++static const u8 enc_input084[] __initconst = {
26840 ++ 0x4a, 0x0a, 0xaf, 0xf8, 0x49, 0x47, 0x29, 0x18,
26841 ++ 0x86, 0x91, 0x70, 0x13, 0x40, 0xf3, 0xce, 0x2b,
26842 ++ 0x8a, 0x78, 0xee, 0xd3, 0xa0, 0xf0, 0x65, 0x99,
26843 ++ 0x4b, 0x72, 0x48, 0x4e, 0x79, 0x91, 0xd2, 0x5c,
26844 ++ 0x29, 0xaa, 0x07, 0x5e, 0xb1, 0xfc, 0x16, 0xde,
26845 ++ 0x93, 0xfe, 0x06, 0x90, 0x58, 0x11, 0x2a, 0xb2,
26846 ++ 0x84, 0xa3, 0xed, 0x18, 0x78, 0x03, 0x26, 0xd1,
26847 ++ 0x25, 0x8a, 0x47, 0x22, 0x2f, 0xa6, 0x33, 0xd8,
26848 ++ 0xb2, 0x9f, 0x3b, 0xd9, 0x15, 0x0b, 0x23, 0x9b,
26849 ++ 0x15, 0x46, 0xc2, 0xbb, 0x9b, 0x9f, 0x41, 0x0f,
26850 ++ 0xeb, 0xea, 0xd3, 0x96, 0x00, 0x0e, 0xe4, 0x77,
26851 ++ 0x70, 0x15, 0x32, 0xc3, 0xd0, 0xf5, 0xfb, 0xf8,
26852 ++ 0x95, 0xd2, 0x80, 0x19, 0x6d, 0x2f, 0x73, 0x7c,
26853 ++ 0x5e, 0x9f, 0xec, 0x50, 0xd9, 0x2b, 0xb0, 0xdf,
26854 ++ 0x5d, 0x7e, 0x51, 0x3b, 0xe5, 0xb8, 0xea, 0x97,
26855 ++ 0x13, 0x10, 0xd5, 0xbf, 0x16, 0xba, 0x7a, 0xee
26856 ++};
26857 ++static const u8 enc_output084[] __initconst = {
26858 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26859 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26860 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26861 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26862 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26863 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26864 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26865 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26866 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26867 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26868 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26869 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26870 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26871 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26872 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26873 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26874 ++ 0xc8, 0xae, 0x77, 0x88, 0xcd, 0x28, 0x74, 0xab,
26875 ++ 0xc1, 0x38, 0x54, 0x1e, 0x11, 0xfd, 0x05, 0x87
26876 ++};
26877 ++static const u8 enc_assoc084[] __initconst = {
26878 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26879 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26880 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26881 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26882 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26883 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26884 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26885 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
26886 ++};
26887 ++static const u8 enc_nonce084[] __initconst = {
26888 ++ 0x00, 0x00, 0x00, 0x00, 0x01, 0x84, 0x86, 0xa8
26889 ++};
26890 ++static const u8 enc_key084[] __initconst = {
26891 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
26892 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
26893 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
26894 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
26895 ++};
26896 ++
26897 ++/* wycheproof - checking for int overflows */
26898 ++static const u8 enc_input085[] __initconst = {
26899 ++ 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
26900 ++ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
26901 ++ 0x78, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
26902 ++ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
26903 ++ 0x9f, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
26904 ++ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
26905 ++ 0x9c, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
26906 ++ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
26907 ++ 0x47, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
26908 ++ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
26909 ++ 0xd4, 0xd2, 0x06, 0x61, 0x6f, 0x92, 0x93, 0xf6,
26910 ++ 0x5b, 0x45, 0xdb, 0xbc, 0x74, 0xe7, 0xc2, 0xed,
26911 ++ 0xfb, 0xcb, 0xbf, 0x1c, 0xfb, 0x67, 0x9b, 0xb7,
26912 ++ 0x39, 0xa5, 0x86, 0x2d, 0xe2, 0xbc, 0xb9, 0x37,
26913 ++ 0xf7, 0x4d, 0x5b, 0xf8, 0x67, 0x1c, 0x5a, 0x8a,
26914 ++ 0x50, 0x92, 0xf6, 0x1d, 0x54, 0xc9, 0xaa, 0x5b
26915 ++};
26916 ++static const u8 enc_output085[] __initconst = {
26917 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26918 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26919 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26920 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26921 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26922 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26923 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26924 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26925 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26926 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26927 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26928 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26929 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26930 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26931 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26932 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26933 ++ 0x93, 0x3a, 0x51, 0x63, 0xc7, 0xf6, 0x23, 0x68,
26934 ++ 0x32, 0x7b, 0x3f, 0xbc, 0x10, 0x36, 0xc9, 0x43
26935 ++};
26936 ++static const u8 enc_assoc085[] __initconst = {
26937 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26938 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26939 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26940 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26941 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26942 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26943 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26944 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
26945 ++};
26946 ++static const u8 enc_nonce085[] __initconst = {
26947 ++ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
26948 ++};
26949 ++static const u8 enc_key085[] __initconst = {
26950 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
26951 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
26952 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
26953 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
26954 ++};
26955 ++
26956 ++/* wycheproof - edge case intermediate sums in poly1305 */
26957 ++static const u8 enc_input093[] __initconst = {
26958 ++ 0x00, 0x52, 0x35, 0xd2, 0xa9, 0x19, 0xf2, 0x8d,
26959 ++ 0x3d, 0xb7, 0x66, 0x4a, 0x34, 0xae, 0x6b, 0x44,
26960 ++ 0x4d, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
26961 ++ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
26962 ++ 0x5b, 0x8b, 0x94, 0x50, 0x9e, 0x2b, 0x74, 0xa3,
26963 ++ 0x6d, 0x34, 0x6e, 0x33, 0xd5, 0x72, 0x65, 0x9b,
26964 ++ 0xa9, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
26965 ++ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
26966 ++ 0x83, 0xdc, 0xe9, 0xf3, 0x07, 0x3e, 0xfa, 0xdb,
26967 ++ 0x7d, 0x23, 0xb8, 0x7a, 0xce, 0x35, 0x16, 0x8c
26968 ++};
26969 ++static const u8 enc_output093[] __initconst = {
26970 ++ 0x00, 0x39, 0xe2, 0xfd, 0x2f, 0xd3, 0x12, 0x14,
26971 ++ 0x9e, 0x98, 0x98, 0x80, 0x88, 0x48, 0x13, 0xe7,
26972 ++ 0xca, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26973 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26974 ++ 0x3b, 0x0e, 0x86, 0x9a, 0xaa, 0x8e, 0xa4, 0x96,
26975 ++ 0x32, 0xff, 0xff, 0x37, 0xb9, 0xe8, 0xce, 0x00,
26976 ++ 0xca, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26977 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
26978 ++ 0x3b, 0x0e, 0x86, 0x9a, 0xaa, 0x8e, 0xa4, 0x96,
26979 ++ 0x32, 0xff, 0xff, 0x37, 0xb9, 0xe8, 0xce, 0x00,
26980 ++ 0xa5, 0x19, 0xac, 0x1a, 0x35, 0xb4, 0xa5, 0x77,
26981 ++ 0x87, 0x51, 0x0a, 0xf7, 0x8d, 0x8d, 0x20, 0x0a
26982 ++};
26983 ++static const u8 enc_assoc093[] __initconst = {
26984 ++ 0xff, 0xff, 0xff, 0xff
26985 ++};
26986 ++static const u8 enc_nonce093[] __initconst = {
26987 ++ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
26988 ++};
26989 ++static const u8 enc_key093[] __initconst = {
26990 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
26991 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
26992 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
26993 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
26994 ++};
26995 ++
26996 ++/* wycheproof - edge case intermediate sums in poly1305 */
26997 ++static const u8 enc_input094[] __initconst = {
26998 ++ 0xd3, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
26999 ++ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
27000 ++ 0xe5, 0xda, 0x78, 0x76, 0x6f, 0xa1, 0x92, 0x90,
27001 ++ 0xc0, 0x31, 0xf7, 0x52, 0x08, 0x50, 0x67, 0x45,
27002 ++ 0xae, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
27003 ++ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
27004 ++ 0x49, 0x6d, 0xde, 0xb0, 0x55, 0x09, 0xc6, 0xef,
27005 ++ 0xff, 0xab, 0x75, 0xeb, 0x2d, 0xf4, 0xab, 0x09,
27006 ++ 0x76, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
27007 ++ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
27008 ++ 0x01, 0x49, 0xef, 0x50, 0x4b, 0x71, 0xb1, 0x20,
27009 ++ 0xca, 0x4f, 0xf3, 0x95, 0x19, 0xc2, 0xc2, 0x10
27010 ++};
27011 ++static const u8 enc_output094[] __initconst = {
27012 ++ 0xd3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27013 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27014 ++ 0x62, 0x18, 0xb2, 0x7f, 0x83, 0xb8, 0xb4, 0x66,
27015 ++ 0x02, 0xf6, 0xe1, 0xd8, 0x34, 0x20, 0x7b, 0x02,
27016 ++ 0xce, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27017 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27018 ++ 0x2a, 0x64, 0x16, 0xce, 0xdb, 0x1c, 0xdd, 0x29,
27019 ++ 0x6e, 0xf5, 0xd7, 0xd6, 0x92, 0xda, 0xff, 0x02,
27020 ++ 0xce, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27021 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27022 ++ 0x2a, 0x64, 0x16, 0xce, 0xdb, 0x1c, 0xdd, 0x29,
27023 ++ 0x6e, 0xf5, 0xd7, 0xd6, 0x92, 0xda, 0xff, 0x02,
27024 ++ 0x30, 0x2f, 0xe8, 0x2a, 0xb0, 0xa0, 0x9a, 0xf6,
27025 ++ 0x44, 0x00, 0xd0, 0x15, 0xae, 0x83, 0xd9, 0xcc
27026 ++};
27027 ++static const u8 enc_assoc094[] __initconst = {
27028 ++ 0xff, 0xff, 0xff, 0xff
27029 ++};
27030 ++static const u8 enc_nonce094[] __initconst = {
27031 ++ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
27032 ++};
27033 ++static const u8 enc_key094[] __initconst = {
27034 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
27035 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
27036 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
27037 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
27038 ++};
27039 ++
27040 ++/* wycheproof - edge case intermediate sums in poly1305 */
27041 ++static const u8 enc_input095[] __initconst = {
27042 ++ 0xe9, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
27043 ++ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
27044 ++ 0x6d, 0xf1, 0x39, 0x4e, 0xdc, 0x53, 0x9b, 0x5b,
27045 ++ 0x3a, 0x09, 0x57, 0xbe, 0x0f, 0xb8, 0x59, 0x46,
27046 ++ 0x80, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
27047 ++ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
27048 ++ 0xd1, 0x76, 0x9f, 0xe8, 0x06, 0xbb, 0xfe, 0xb6,
27049 ++ 0xf5, 0x90, 0x95, 0x0f, 0x2e, 0xac, 0x9e, 0x0a,
27050 ++ 0x58, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
27051 ++ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
27052 ++ 0x99, 0x52, 0xae, 0x08, 0x18, 0xc3, 0x89, 0x79,
27053 ++ 0xc0, 0x74, 0x13, 0x71, 0x1a, 0x9a, 0xf7, 0x13
27054 ++};
27055 ++static const u8 enc_output095[] __initconst = {
27056 ++ 0xe9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27057 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27058 ++ 0xea, 0x33, 0xf3, 0x47, 0x30, 0x4a, 0xbd, 0xad,
27059 ++ 0xf8, 0xce, 0x41, 0x34, 0x33, 0xc8, 0x45, 0x01,
27060 ++ 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27061 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27062 ++ 0xb2, 0x7f, 0x57, 0x96, 0x88, 0xae, 0xe5, 0x70,
27063 ++ 0x64, 0xce, 0x37, 0x32, 0x91, 0x82, 0xca, 0x01,
27064 ++ 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27065 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27066 ++ 0xb2, 0x7f, 0x57, 0x96, 0x88, 0xae, 0xe5, 0x70,
27067 ++ 0x64, 0xce, 0x37, 0x32, 0x91, 0x82, 0xca, 0x01,
27068 ++ 0x98, 0xa7, 0xe8, 0x36, 0xe0, 0xee, 0x4d, 0x02,
27069 ++ 0x35, 0x00, 0xd0, 0x55, 0x7e, 0xc2, 0xcb, 0xe0
27070 ++};
27071 ++static const u8 enc_assoc095[] __initconst = {
27072 ++ 0xff, 0xff, 0xff, 0xff
27073 ++};
27074 ++static const u8 enc_nonce095[] __initconst = {
27075 ++ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
27076 ++};
27077 ++static const u8 enc_key095[] __initconst = {
27078 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
27079 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
27080 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
27081 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
27082 ++};
27083 ++
27084 ++/* wycheproof - edge case intermediate sums in poly1305 */
27085 ++static const u8 enc_input096[] __initconst = {
27086 ++ 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
27087 ++ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
27088 ++ 0x64, 0xf9, 0x0f, 0x5b, 0x26, 0x92, 0xb8, 0x60,
27089 ++ 0xd4, 0x59, 0x6f, 0xf4, 0xb3, 0x40, 0x2c, 0x5c,
27090 ++ 0x00, 0xb9, 0xbb, 0x53, 0x70, 0x7a, 0xa6, 0x67,
27091 ++ 0xd3, 0x56, 0xfe, 0x50, 0xc7, 0x19, 0x96, 0x94,
27092 ++ 0x03, 0x35, 0x61, 0xe7, 0xca, 0xca, 0x6d, 0x94,
27093 ++ 0x1d, 0xc3, 0xcd, 0x69, 0x14, 0xad, 0x69, 0x04
27094 ++};
27095 ++static const u8 enc_output096[] __initconst = {
27096 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27097 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27098 ++ 0xe3, 0x3b, 0xc5, 0x52, 0xca, 0x8b, 0x9e, 0x96,
27099 ++ 0x16, 0x9e, 0x79, 0x7e, 0x8f, 0x30, 0x30, 0x1b,
27100 ++ 0x60, 0x3c, 0xa9, 0x99, 0x44, 0xdf, 0x76, 0x52,
27101 ++ 0x8c, 0x9d, 0x6f, 0x54, 0xab, 0x83, 0x3d, 0x0f,
27102 ++ 0x60, 0x3c, 0xa9, 0x99, 0x44, 0xdf, 0x76, 0x52,
27103 ++ 0x8c, 0x9d, 0x6f, 0x54, 0xab, 0x83, 0x3d, 0x0f,
27104 ++ 0x6a, 0xb8, 0xdc, 0xe2, 0xc5, 0x9d, 0xa4, 0x73,
27105 ++ 0x71, 0x30, 0xb0, 0x25, 0x2f, 0x68, 0xa8, 0xd8
27106 ++};
27107 ++static const u8 enc_assoc096[] __initconst = {
27108 ++ 0xff, 0xff, 0xff, 0xff
27109 ++};
27110 ++static const u8 enc_nonce096[] __initconst = {
27111 ++ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
27112 ++};
27113 ++static const u8 enc_key096[] __initconst = {
27114 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
27115 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
27116 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
27117 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
27118 ++};
27119 ++
27120 ++/* wycheproof - edge case intermediate sums in poly1305 */
27121 ++static const u8 enc_input097[] __initconst = {
27122 ++ 0x68, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
27123 ++ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
27124 ++ 0xb0, 0x8f, 0x25, 0x67, 0x5b, 0x9b, 0xcb, 0xf6,
27125 ++ 0xe3, 0x84, 0x07, 0xde, 0x2e, 0xc7, 0x5a, 0x47,
27126 ++ 0x9f, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
27127 ++ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
27128 ++ 0x2d, 0x2a, 0xf7, 0xcd, 0x6b, 0x08, 0x05, 0x01,
27129 ++ 0xd3, 0x1b, 0xa5, 0x4f, 0xb2, 0xeb, 0x75, 0x96,
27130 ++ 0x47, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
27131 ++ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
27132 ++ 0x65, 0x0e, 0xc6, 0x2d, 0x75, 0x70, 0x72, 0xce,
27133 ++ 0xe6, 0xff, 0x23, 0x31, 0x86, 0xdd, 0x1c, 0x8f
27134 ++};
27135 ++static const u8 enc_output097[] __initconst = {
27136 ++ 0x68, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27137 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27138 ++ 0x37, 0x4d, 0xef, 0x6e, 0xb7, 0x82, 0xed, 0x00,
27139 ++ 0x21, 0x43, 0x11, 0x54, 0x12, 0xb7, 0x46, 0x00,
27140 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27141 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27142 ++ 0x4e, 0x23, 0x3f, 0xb3, 0xe5, 0x1d, 0x1e, 0xc7,
27143 ++ 0x42, 0x45, 0x07, 0x72, 0x0d, 0xc5, 0x21, 0x9d,
27144 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27145 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27146 ++ 0x4e, 0x23, 0x3f, 0xb3, 0xe5, 0x1d, 0x1e, 0xc7,
27147 ++ 0x42, 0x45, 0x07, 0x72, 0x0d, 0xc5, 0x21, 0x9d,
27148 ++ 0x04, 0x4d, 0xea, 0x60, 0x88, 0x80, 0x41, 0x2b,
27149 ++ 0xfd, 0xff, 0xcf, 0x35, 0x57, 0x9e, 0x9b, 0x26
27150 ++};
27151 ++static const u8 enc_assoc097[] __initconst = {
27152 ++ 0xff, 0xff, 0xff, 0xff
27153 ++};
27154 ++static const u8 enc_nonce097[] __initconst = {
27155 ++ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
27156 ++};
27157 ++static const u8 enc_key097[] __initconst = {
27158 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
27159 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
27160 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
27161 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
27162 ++};
27163 ++
27164 ++/* wycheproof - edge case intermediate sums in poly1305 */
27165 ++static const u8 enc_input098[] __initconst = {
27166 ++ 0x6d, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
27167 ++ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
27168 ++ 0xa1, 0x61, 0xb5, 0xab, 0x04, 0x09, 0x00, 0x62,
27169 ++ 0x9e, 0xfe, 0xff, 0x78, 0xd7, 0xd8, 0x6b, 0x45,
27170 ++ 0x9f, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
27171 ++ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
27172 ++ 0xc6, 0xf8, 0x07, 0x8c, 0xc8, 0xef, 0x12, 0xa0,
27173 ++ 0xff, 0x65, 0x7d, 0x6d, 0x08, 0xdb, 0x10, 0xb8,
27174 ++ 0x47, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
27175 ++ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
27176 ++ 0x8e, 0xdc, 0x36, 0x6c, 0xd6, 0x97, 0x65, 0x6f,
27177 ++ 0xca, 0x81, 0xfb, 0x13, 0x3c, 0xed, 0x79, 0xa1
27178 ++};
27179 ++static const u8 enc_output098[] __initconst = {
27180 ++ 0x6d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27181 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27182 ++ 0x26, 0xa3, 0x7f, 0xa2, 0xe8, 0x10, 0x26, 0x94,
27183 ++ 0x5c, 0x39, 0xe9, 0xf2, 0xeb, 0xa8, 0x77, 0x02,
27184 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27185 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27186 ++ 0xa5, 0xf1, 0xcf, 0xf2, 0x46, 0xfa, 0x09, 0x66,
27187 ++ 0x6e, 0x3b, 0xdf, 0x50, 0xb7, 0xf5, 0x44, 0xb3,
27188 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27189 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27190 ++ 0xa5, 0xf1, 0xcf, 0xf2, 0x46, 0xfa, 0x09, 0x66,
27191 ++ 0x6e, 0x3b, 0xdf, 0x50, 0xb7, 0xf5, 0x44, 0xb3,
27192 ++ 0x1e, 0x6b, 0xea, 0x63, 0x14, 0x54, 0x2e, 0x2e,
27193 ++ 0xf9, 0xff, 0xcf, 0x45, 0x0b, 0x2e, 0x98, 0x2b
27194 ++};
27195 ++static const u8 enc_assoc098[] __initconst = {
27196 ++ 0xff, 0xff, 0xff, 0xff
27197 ++};
27198 ++static const u8 enc_nonce098[] __initconst = {
27199 ++ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
27200 ++};
27201 ++static const u8 enc_key098[] __initconst = {
27202 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
27203 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
27204 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
27205 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
27206 ++};
27207 ++
27208 ++/* wycheproof - edge case intermediate sums in poly1305 */
27209 ++static const u8 enc_input099[] __initconst = {
27210 ++ 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
27211 ++ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
27212 ++ 0xfc, 0x01, 0xb8, 0x91, 0xe5, 0xf0, 0xf9, 0x12,
27213 ++ 0x8d, 0x7d, 0x1c, 0x57, 0x91, 0x92, 0xb6, 0x98,
27214 ++ 0x63, 0x41, 0x44, 0x15, 0xb6, 0x99, 0x68, 0x95,
27215 ++ 0x9a, 0x72, 0x91, 0xb7, 0xa5, 0xaf, 0x13, 0x48,
27216 ++ 0x60, 0xcd, 0x9e, 0xa1, 0x0c, 0x29, 0xa3, 0x66,
27217 ++ 0x54, 0xe7, 0xa2, 0x8e, 0x76, 0x1b, 0xec, 0xd8
27218 ++};
27219 ++static const u8 enc_output099[] __initconst = {
27220 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27221 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27222 ++ 0x7b, 0xc3, 0x72, 0x98, 0x09, 0xe9, 0xdf, 0xe4,
27223 ++ 0x4f, 0xba, 0x0a, 0xdd, 0xad, 0xe2, 0xaa, 0xdf,
27224 ++ 0x03, 0xc4, 0x56, 0xdf, 0x82, 0x3c, 0xb8, 0xa0,
27225 ++ 0xc5, 0xb9, 0x00, 0xb3, 0xc9, 0x35, 0xb8, 0xd3,
27226 ++ 0x03, 0xc4, 0x56, 0xdf, 0x82, 0x3c, 0xb8, 0xa0,
27227 ++ 0xc5, 0xb9, 0x00, 0xb3, 0xc9, 0x35, 0xb8, 0xd3,
27228 ++ 0xed, 0x20, 0x17, 0xc8, 0xdb, 0xa4, 0x77, 0x56,
27229 ++ 0x29, 0x04, 0x9d, 0x78, 0x6e, 0x3b, 0xce, 0xb1
27230 ++};
27231 ++static const u8 enc_assoc099[] __initconst = {
27232 ++ 0xff, 0xff, 0xff, 0xff
27233 ++};
27234 ++static const u8 enc_nonce099[] __initconst = {
27235 ++ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
27236 ++};
27237 ++static const u8 enc_key099[] __initconst = {
27238 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
27239 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
27240 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
27241 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
27242 ++};
27243 ++
27244 ++/* wycheproof - edge case intermediate sums in poly1305 */
27245 ++static const u8 enc_input100[] __initconst = {
27246 ++ 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
27247 ++ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
27248 ++ 0x6b, 0x6d, 0xc9, 0xd2, 0x1a, 0x81, 0x9e, 0x70,
27249 ++ 0xb5, 0x77, 0xf4, 0x41, 0x37, 0xd3, 0xd6, 0xbd,
27250 ++ 0x13, 0x35, 0xf5, 0xeb, 0x44, 0x49, 0x40, 0x77,
27251 ++ 0xb2, 0x64, 0x49, 0xa5, 0x4b, 0x6c, 0x7c, 0x75,
27252 ++ 0x10, 0xb9, 0x2f, 0x5f, 0xfe, 0xf9, 0x8b, 0x84,
27253 ++ 0x7c, 0xf1, 0x7a, 0x9c, 0x98, 0xd8, 0x83, 0xe5
27254 ++};
27255 ++static const u8 enc_output100[] __initconst = {
27256 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27257 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27258 ++ 0xec, 0xaf, 0x03, 0xdb, 0xf6, 0x98, 0xb8, 0x86,
27259 ++ 0x77, 0xb0, 0xe2, 0xcb, 0x0b, 0xa3, 0xca, 0xfa,
27260 ++ 0x73, 0xb0, 0xe7, 0x21, 0x70, 0xec, 0x90, 0x42,
27261 ++ 0xed, 0xaf, 0xd8, 0xa1, 0x27, 0xf6, 0xd7, 0xee,
27262 ++ 0x73, 0xb0, 0xe7, 0x21, 0x70, 0xec, 0x90, 0x42,
27263 ++ 0xed, 0xaf, 0xd8, 0xa1, 0x27, 0xf6, 0xd7, 0xee,
27264 ++ 0x07, 0x3f, 0x17, 0xcb, 0x67, 0x78, 0x64, 0x59,
27265 ++ 0x25, 0x04, 0x9d, 0x88, 0x22, 0xcb, 0xca, 0xb6
27266 ++};
27267 ++static const u8 enc_assoc100[] __initconst = {
27268 ++ 0xff, 0xff, 0xff, 0xff
27269 ++};
27270 ++static const u8 enc_nonce100[] __initconst = {
27271 ++ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
27272 ++};
27273 ++static const u8 enc_key100[] __initconst = {
27274 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
27275 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
27276 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
27277 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
27278 ++};
27279 ++
27280 ++/* wycheproof - edge case intermediate sums in poly1305 */
27281 ++static const u8 enc_input101[] __initconst = {
27282 ++ 0xff, 0xcb, 0x2b, 0x11, 0x06, 0xf8, 0x23, 0x4c,
27283 ++ 0x5e, 0x99, 0xd4, 0xdb, 0x4c, 0x70, 0x48, 0xde,
27284 ++ 0x32, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
27285 ++ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
27286 ++ 0x16, 0xe9, 0x88, 0x4a, 0x11, 0x4f, 0x0e, 0x92,
27287 ++ 0x66, 0xce, 0xa3, 0x88, 0x5f, 0xe3, 0x6b, 0x9f,
27288 ++ 0xd6, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
27289 ++ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
27290 ++ 0xce, 0xbe, 0xf5, 0xe9, 0x88, 0x5a, 0x80, 0xea,
27291 ++ 0x76, 0xd9, 0x75, 0xc1, 0x44, 0xa4, 0x18, 0x88
27292 ++};
27293 ++static const u8 enc_output101[] __initconst = {
27294 ++ 0xff, 0xa0, 0xfc, 0x3e, 0x80, 0x32, 0xc3, 0xd5,
27295 ++ 0xfd, 0xb6, 0x2a, 0x11, 0xf0, 0x96, 0x30, 0x7d,
27296 ++ 0xb5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27297 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27298 ++ 0x76, 0x6c, 0x9a, 0x80, 0x25, 0xea, 0xde, 0xa7,
27299 ++ 0x39, 0x05, 0x32, 0x8c, 0x33, 0x79, 0xc0, 0x04,
27300 ++ 0xb5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27301 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27302 ++ 0x76, 0x6c, 0x9a, 0x80, 0x25, 0xea, 0xde, 0xa7,
27303 ++ 0x39, 0x05, 0x32, 0x8c, 0x33, 0x79, 0xc0, 0x04,
27304 ++ 0x8b, 0x9b, 0xb4, 0xb4, 0x86, 0x12, 0x89, 0x65,
27305 ++ 0x8c, 0x69, 0x6a, 0x83, 0x40, 0x15, 0x04, 0x05
27306 ++};
27307 ++static const u8 enc_assoc101[] __initconst = {
27308 ++ 0xff, 0xff, 0xff, 0xff
27309 ++};
27310 ++static const u8 enc_nonce101[] __initconst = {
27311 ++ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
27312 ++};
27313 ++static const u8 enc_key101[] __initconst = {
27314 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
27315 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
27316 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
27317 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
27318 ++};
27319 ++
27320 ++/* wycheproof - edge case intermediate sums in poly1305 */
27321 ++static const u8 enc_input102[] __initconst = {
27322 ++ 0x6f, 0x9e, 0x70, 0xed, 0x3b, 0x8b, 0xac, 0xa0,
27323 ++ 0x26, 0xe4, 0x6a, 0x5a, 0x09, 0x43, 0x15, 0x8d,
27324 ++ 0x21, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
27325 ++ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
27326 ++ 0x0c, 0x61, 0x2c, 0x5e, 0x8d, 0x89, 0xa8, 0x73,
27327 ++ 0xdb, 0xca, 0xad, 0x5b, 0x73, 0x46, 0x42, 0x9b,
27328 ++ 0xc5, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
27329 ++ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
27330 ++ 0xd4, 0x36, 0x51, 0xfd, 0x14, 0x9c, 0x26, 0x0b,
27331 ++ 0xcb, 0xdd, 0x7b, 0x12, 0x68, 0x01, 0x31, 0x8c
27332 ++};
27333 ++static const u8 enc_output102[] __initconst = {
27334 ++ 0x6f, 0xf5, 0xa7, 0xc2, 0xbd, 0x41, 0x4c, 0x39,
27335 ++ 0x85, 0xcb, 0x94, 0x90, 0xb5, 0xa5, 0x6d, 0x2e,
27336 ++ 0xa6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27337 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27338 ++ 0x6c, 0xe4, 0x3e, 0x94, 0xb9, 0x2c, 0x78, 0x46,
27339 ++ 0x84, 0x01, 0x3c, 0x5f, 0x1f, 0xdc, 0xe9, 0x00,
27340 ++ 0xa6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27341 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27342 ++ 0x6c, 0xe4, 0x3e, 0x94, 0xb9, 0x2c, 0x78, 0x46,
27343 ++ 0x84, 0x01, 0x3c, 0x5f, 0x1f, 0xdc, 0xe9, 0x00,
27344 ++ 0x8b, 0x3b, 0xbd, 0x51, 0x64, 0x44, 0x59, 0x56,
27345 ++ 0x8d, 0x81, 0xca, 0x1f, 0xa7, 0x2c, 0xe4, 0x04
27346 ++};
27347 ++static const u8 enc_assoc102[] __initconst = {
27348 ++ 0xff, 0xff, 0xff, 0xff
27349 ++};
27350 ++static const u8 enc_nonce102[] __initconst = {
27351 ++ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
27352 ++};
27353 ++static const u8 enc_key102[] __initconst = {
27354 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
27355 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
27356 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
27357 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
27358 ++};
27359 ++
27360 ++/* wycheproof - edge case intermediate sums in poly1305 */
27361 ++static const u8 enc_input103[] __initconst = {
27362 ++ 0x41, 0x2b, 0x08, 0x0a, 0x3e, 0x19, 0xc1, 0x0d,
27363 ++ 0x44, 0xa1, 0xaf, 0x1e, 0xab, 0xde, 0xb4, 0xce,
27364 ++ 0x35, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
27365 ++ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
27366 ++ 0x6b, 0x83, 0x94, 0x33, 0x09, 0x21, 0x48, 0x6c,
27367 ++ 0xa1, 0x1d, 0x29, 0x1c, 0x3e, 0x97, 0xee, 0x9a,
27368 ++ 0xd1, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
27369 ++ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
27370 ++ 0xb3, 0xd4, 0xe9, 0x90, 0x90, 0x34, 0xc6, 0x14,
27371 ++ 0xb1, 0x0a, 0xff, 0x55, 0x25, 0xd0, 0x9d, 0x8d
27372 ++};
27373 ++static const u8 enc_output103[] __initconst = {
27374 ++ 0x41, 0x40, 0xdf, 0x25, 0xb8, 0xd3, 0x21, 0x94,
27375 ++ 0xe7, 0x8e, 0x51, 0xd4, 0x17, 0x38, 0xcc, 0x6d,
27376 ++ 0xb2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27377 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27378 ++ 0x0b, 0x06, 0x86, 0xf9, 0x3d, 0x84, 0x98, 0x59,
27379 ++ 0xfe, 0xd6, 0xb8, 0x18, 0x52, 0x0d, 0x45, 0x01,
27380 ++ 0xb2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27381 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27382 ++ 0x0b, 0x06, 0x86, 0xf9, 0x3d, 0x84, 0x98, 0x59,
27383 ++ 0xfe, 0xd6, 0xb8, 0x18, 0x52, 0x0d, 0x45, 0x01,
27384 ++ 0x86, 0xfb, 0xab, 0x2b, 0x4a, 0x94, 0xf4, 0x7a,
27385 ++ 0xa5, 0x6f, 0x0a, 0xea, 0x65, 0xd1, 0x10, 0x08
27386 ++};
27387 ++static const u8 enc_assoc103[] __initconst = {
27388 ++ 0xff, 0xff, 0xff, 0xff
27389 ++};
27390 ++static const u8 enc_nonce103[] __initconst = {
27391 ++ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
27392 ++};
27393 ++static const u8 enc_key103[] __initconst = {
27394 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
27395 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
27396 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
27397 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
27398 ++};
27399 ++
27400 ++/* wycheproof - edge case intermediate sums in poly1305 */
27401 ++static const u8 enc_input104[] __initconst = {
27402 ++ 0xb2, 0x47, 0xa7, 0x47, 0x23, 0x49, 0x1a, 0xac,
27403 ++ 0xac, 0xaa, 0xd7, 0x09, 0xc9, 0x1e, 0x93, 0x2b,
27404 ++ 0x31, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
27405 ++ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
27406 ++ 0x9a, 0xde, 0x04, 0xe7, 0x5b, 0xb7, 0x01, 0xd9,
27407 ++ 0x66, 0x06, 0x01, 0xb3, 0x47, 0x65, 0xde, 0x98,
27408 ++ 0xd5, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
27409 ++ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
27410 ++ 0x42, 0x89, 0x79, 0x44, 0xc2, 0xa2, 0x8f, 0xa1,
27411 ++ 0x76, 0x11, 0xd7, 0xfa, 0x5c, 0x22, 0xad, 0x8f
27412 ++};
27413 ++static const u8 enc_output104[] __initconst = {
27414 ++ 0xb2, 0x2c, 0x70, 0x68, 0xa5, 0x83, 0xfa, 0x35,
27415 ++ 0x0f, 0x85, 0x29, 0xc3, 0x75, 0xf8, 0xeb, 0x88,
27416 ++ 0xb6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27417 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27418 ++ 0xfa, 0x5b, 0x16, 0x2d, 0x6f, 0x12, 0xd1, 0xec,
27419 ++ 0x39, 0xcd, 0x90, 0xb7, 0x2b, 0xff, 0x75, 0x03,
27420 ++ 0xb6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27421 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27422 ++ 0xfa, 0x5b, 0x16, 0x2d, 0x6f, 0x12, 0xd1, 0xec,
27423 ++ 0x39, 0xcd, 0x90, 0xb7, 0x2b, 0xff, 0x75, 0x03,
27424 ++ 0xa0, 0x19, 0xac, 0x2e, 0xd6, 0x67, 0xe1, 0x7d,
27425 ++ 0xa1, 0x6f, 0x0a, 0xfa, 0x19, 0x61, 0x0d, 0x0d
27426 ++};
27427 ++static const u8 enc_assoc104[] __initconst = {
27428 ++ 0xff, 0xff, 0xff, 0xff
27429 ++};
27430 ++static const u8 enc_nonce104[] __initconst = {
27431 ++ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
27432 ++};
27433 ++static const u8 enc_key104[] __initconst = {
27434 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
27435 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
27436 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
27437 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
27438 ++};
27439 ++
27440 ++/* wycheproof - edge case intermediate sums in poly1305 */
27441 ++static const u8 enc_input105[] __initconst = {
27442 ++ 0x74, 0x0f, 0x9e, 0x49, 0xf6, 0x10, 0xef, 0xa5,
27443 ++ 0x85, 0xb6, 0x59, 0xca, 0x6e, 0xd8, 0xb4, 0x99,
27444 ++ 0x2d, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
27445 ++ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
27446 ++ 0x41, 0x2d, 0x96, 0xaf, 0xbe, 0x80, 0xec, 0x3e,
27447 ++ 0x79, 0xd4, 0x51, 0xb0, 0x0a, 0x2d, 0xb2, 0x9a,
27448 ++ 0xc9, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
27449 ++ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
27450 ++ 0x99, 0x7a, 0xeb, 0x0c, 0x27, 0x95, 0x62, 0x46,
27451 ++ 0x69, 0xc3, 0x87, 0xf9, 0x11, 0x6a, 0xc1, 0x8d
27452 ++};
27453 ++static const u8 enc_output105[] __initconst = {
27454 ++ 0x74, 0x64, 0x49, 0x66, 0x70, 0xda, 0x0f, 0x3c,
27455 ++ 0x26, 0x99, 0xa7, 0x00, 0xd2, 0x3e, 0xcc, 0x3a,
27456 ++ 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27457 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27458 ++ 0x21, 0xa8, 0x84, 0x65, 0x8a, 0x25, 0x3c, 0x0b,
27459 ++ 0x26, 0x1f, 0xc0, 0xb4, 0x66, 0xb7, 0x19, 0x01,
27460 ++ 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27461 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27462 ++ 0x21, 0xa8, 0x84, 0x65, 0x8a, 0x25, 0x3c, 0x0b,
27463 ++ 0x26, 0x1f, 0xc0, 0xb4, 0x66, 0xb7, 0x19, 0x01,
27464 ++ 0x73, 0x6e, 0x18, 0x18, 0x16, 0x96, 0xa5, 0x88,
27465 ++ 0x9c, 0x31, 0x59, 0xfa, 0xab, 0xab, 0x20, 0xfd
27466 ++};
27467 ++static const u8 enc_assoc105[] __initconst = {
27468 ++ 0xff, 0xff, 0xff, 0xff
27469 ++};
27470 ++static const u8 enc_nonce105[] __initconst = {
27471 ++ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
27472 ++};
27473 ++static const u8 enc_key105[] __initconst = {
27474 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
27475 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
27476 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
27477 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
27478 ++};
27479 ++
27480 ++/* wycheproof - edge case intermediate sums in poly1305 */
27481 ++static const u8 enc_input106[] __initconst = {
27482 ++ 0xad, 0xba, 0x5d, 0x10, 0x5b, 0xc8, 0xaa, 0x06,
27483 ++ 0x2c, 0x23, 0x36, 0xcb, 0x88, 0x9d, 0xdb, 0xd5,
27484 ++ 0x37, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
27485 ++ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
27486 ++ 0x17, 0x7c, 0x5f, 0xfe, 0x28, 0x75, 0xf4, 0x68,
27487 ++ 0xf6, 0xc2, 0x96, 0x57, 0x48, 0xf3, 0x59, 0x9a,
27488 ++ 0xd3, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
27489 ++ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
27490 ++ 0xcf, 0x2b, 0x22, 0x5d, 0xb1, 0x60, 0x7a, 0x10,
27491 ++ 0xe6, 0xd5, 0x40, 0x1e, 0x53, 0xb4, 0x2a, 0x8d
27492 ++};
27493 ++static const u8 enc_output106[] __initconst = {
27494 ++ 0xad, 0xd1, 0x8a, 0x3f, 0xdd, 0x02, 0x4a, 0x9f,
27495 ++ 0x8f, 0x0c, 0xc8, 0x01, 0x34, 0x7b, 0xa3, 0x76,
27496 ++ 0xb0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27497 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27498 ++ 0x77, 0xf9, 0x4d, 0x34, 0x1c, 0xd0, 0x24, 0x5d,
27499 ++ 0xa9, 0x09, 0x07, 0x53, 0x24, 0x69, 0xf2, 0x01,
27500 ++ 0xb0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27501 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27502 ++ 0x77, 0xf9, 0x4d, 0x34, 0x1c, 0xd0, 0x24, 0x5d,
27503 ++ 0xa9, 0x09, 0x07, 0x53, 0x24, 0x69, 0xf2, 0x01,
27504 ++ 0xba, 0xd5, 0x8f, 0x10, 0xa9, 0x1e, 0x6a, 0x88,
27505 ++ 0x9a, 0xba, 0x32, 0xfd, 0x17, 0xd8, 0x33, 0x1a
27506 ++};
27507 ++static const u8 enc_assoc106[] __initconst = {
27508 ++ 0xff, 0xff, 0xff, 0xff
27509 ++};
27510 ++static const u8 enc_nonce106[] __initconst = {
27511 ++ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
27512 ++};
27513 ++static const u8 enc_key106[] __initconst = {
27514 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
27515 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
27516 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
27517 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
27518 ++};
27519 ++
27520 ++/* wycheproof - edge case intermediate sums in poly1305 */
27521 ++static const u8 enc_input107[] __initconst = {
27522 ++ 0xfe, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
27523 ++ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
27524 ++ 0xc0, 0x01, 0xed, 0xc5, 0xda, 0x44, 0x2e, 0x71,
27525 ++ 0x9b, 0xce, 0x9a, 0xbe, 0x27, 0x3a, 0xf1, 0x44,
27526 ++ 0xb4, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
27527 ++ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
27528 ++ 0x48, 0x02, 0x5f, 0x41, 0xfa, 0x4e, 0x33, 0x6c,
27529 ++ 0x78, 0x69, 0x57, 0xa2, 0xa7, 0xc4, 0x93, 0x0a,
27530 ++ 0x6c, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
27531 ++ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
27532 ++ 0x00, 0x26, 0x6e, 0xa1, 0xe4, 0x36, 0x44, 0xa3,
27533 ++ 0x4d, 0x8d, 0xd1, 0xdc, 0x93, 0xf2, 0xfa, 0x13
27534 ++};
27535 ++static const u8 enc_output107[] __initconst = {
27536 ++ 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27537 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27538 ++ 0x47, 0xc3, 0x27, 0xcc, 0x36, 0x5d, 0x08, 0x87,
27539 ++ 0x59, 0x09, 0x8c, 0x34, 0x1b, 0x4a, 0xed, 0x03,
27540 ++ 0xd4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27541 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27542 ++ 0x2b, 0x0b, 0x97, 0x3f, 0x74, 0x5b, 0x28, 0xaa,
27543 ++ 0xe9, 0x37, 0xf5, 0x9f, 0x18, 0xea, 0xc7, 0x01,
27544 ++ 0xd4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27545 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27546 ++ 0x2b, 0x0b, 0x97, 0x3f, 0x74, 0x5b, 0x28, 0xaa,
27547 ++ 0xe9, 0x37, 0xf5, 0x9f, 0x18, 0xea, 0xc7, 0x01,
27548 ++ 0xd6, 0x8c, 0xe1, 0x74, 0x07, 0x9a, 0xdd, 0x02,
27549 ++ 0x8d, 0xd0, 0x5c, 0xf8, 0x14, 0x63, 0x04, 0x88
27550 ++};
27551 ++static const u8 enc_assoc107[] __initconst = {
27552 ++ 0xff, 0xff, 0xff, 0xff
27553 ++};
27554 ++static const u8 enc_nonce107[] __initconst = {
27555 ++ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
27556 ++};
27557 ++static const u8 enc_key107[] __initconst = {
27558 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
27559 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
27560 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
27561 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
27562 ++};
27563 ++
27564 ++/* wycheproof - edge case intermediate sums in poly1305 */
27565 ++static const u8 enc_input108[] __initconst = {
27566 ++ 0xb5, 0x13, 0xb0, 0x6a, 0xb9, 0xac, 0x14, 0x43,
27567 ++ 0x5a, 0xcb, 0x8a, 0xa3, 0xa3, 0x7a, 0xfd, 0xb6,
27568 ++ 0x54, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
27569 ++ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
27570 ++ 0x61, 0x95, 0x01, 0x93, 0xb1, 0xbf, 0x03, 0x11,
27571 ++ 0xff, 0x11, 0x79, 0x89, 0xae, 0xd9, 0xa9, 0x99,
27572 ++ 0xb0, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
27573 ++ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
27574 ++ 0xb9, 0xc2, 0x7c, 0x30, 0x28, 0xaa, 0x8d, 0x69,
27575 ++ 0xef, 0x06, 0xaf, 0xc0, 0xb5, 0x9e, 0xda, 0x8e
27576 ++};
27577 ++static const u8 enc_output108[] __initconst = {
27578 ++ 0xb5, 0x78, 0x67, 0x45, 0x3f, 0x66, 0xf4, 0xda,
27579 ++ 0xf9, 0xe4, 0x74, 0x69, 0x1f, 0x9c, 0x85, 0x15,
27580 ++ 0xd3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27581 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27582 ++ 0x01, 0x10, 0x13, 0x59, 0x85, 0x1a, 0xd3, 0x24,
27583 ++ 0xa0, 0xda, 0xe8, 0x8d, 0xc2, 0x43, 0x02, 0x02,
27584 ++ 0xd3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27585 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27586 ++ 0x01, 0x10, 0x13, 0x59, 0x85, 0x1a, 0xd3, 0x24,
27587 ++ 0xa0, 0xda, 0xe8, 0x8d, 0xc2, 0x43, 0x02, 0x02,
27588 ++ 0xaa, 0x48, 0xa3, 0x88, 0x7d, 0x4b, 0x05, 0x96,
27589 ++ 0x99, 0xc2, 0xfd, 0xf9, 0xc6, 0x78, 0x7e, 0x0a
27590 ++};
27591 ++static const u8 enc_assoc108[] __initconst = {
27592 ++ 0xff, 0xff, 0xff, 0xff
27593 ++};
27594 ++static const u8 enc_nonce108[] __initconst = {
27595 ++ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
27596 ++};
27597 ++static const u8 enc_key108[] __initconst = {
27598 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
27599 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
27600 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
27601 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
27602 ++};
27603 ++
27604 ++/* wycheproof - edge case intermediate sums in poly1305 */
27605 ++static const u8 enc_input109[] __initconst = {
27606 ++ 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
27607 ++ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
27608 ++ 0xd4, 0xf1, 0x09, 0xe8, 0x14, 0xce, 0xa8, 0x5a,
27609 ++ 0x08, 0xc0, 0x11, 0xd8, 0x50, 0xdd, 0x1d, 0xcb,
27610 ++ 0xcf, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
27611 ++ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
27612 ++ 0x53, 0x40, 0xb8, 0x5a, 0x9a, 0xa0, 0x82, 0x96,
27613 ++ 0xb7, 0x7a, 0x5f, 0xc3, 0x96, 0x1f, 0x66, 0x0f,
27614 ++ 0x17, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
27615 ++ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
27616 ++ 0x1b, 0x64, 0x89, 0xba, 0x84, 0xd8, 0xf5, 0x59,
27617 ++ 0x82, 0x9e, 0xd9, 0xbd, 0xa2, 0x29, 0x0f, 0x16
27618 ++};
27619 ++static const u8 enc_output109[] __initconst = {
27620 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27621 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27622 ++ 0x53, 0x33, 0xc3, 0xe1, 0xf8, 0xd7, 0x8e, 0xac,
27623 ++ 0xca, 0x07, 0x07, 0x52, 0x6c, 0xad, 0x01, 0x8c,
27624 ++ 0xaf, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27625 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27626 ++ 0x30, 0x49, 0x70, 0x24, 0x14, 0xb5, 0x99, 0x50,
27627 ++ 0x26, 0x24, 0xfd, 0xfe, 0x29, 0x31, 0x32, 0x04,
27628 ++ 0xaf, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27629 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27630 ++ 0x30, 0x49, 0x70, 0x24, 0x14, 0xb5, 0x99, 0x50,
27631 ++ 0x26, 0x24, 0xfd, 0xfe, 0x29, 0x31, 0x32, 0x04,
27632 ++ 0xb9, 0x36, 0xa8, 0x17, 0xf2, 0x21, 0x1a, 0xf1,
27633 ++ 0x29, 0xe2, 0xcf, 0x16, 0x0f, 0xd4, 0x2b, 0xcb
27634 ++};
27635 ++static const u8 enc_assoc109[] __initconst = {
27636 ++ 0xff, 0xff, 0xff, 0xff
27637 ++};
27638 ++static const u8 enc_nonce109[] __initconst = {
27639 ++ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
27640 ++};
27641 ++static const u8 enc_key109[] __initconst = {
27642 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
27643 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
27644 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
27645 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
27646 ++};
27647 ++
27648 ++/* wycheproof - edge case intermediate sums in poly1305 */
27649 ++static const u8 enc_input110[] __initconst = {
27650 ++ 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
27651 ++ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
27652 ++ 0xdf, 0x4c, 0x62, 0x03, 0x2d, 0x41, 0x19, 0xb5,
27653 ++ 0x88, 0x47, 0x7e, 0x99, 0x92, 0x5a, 0x56, 0xd9,
27654 ++ 0xd6, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
27655 ++ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
27656 ++ 0xfa, 0x84, 0xf0, 0x64, 0x55, 0x36, 0x42, 0x1b,
27657 ++ 0x2b, 0xb9, 0x24, 0x6e, 0xc2, 0x19, 0xed, 0x0b,
27658 ++ 0x0e, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
27659 ++ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
27660 ++ 0xb2, 0xa0, 0xc1, 0x84, 0x4b, 0x4e, 0x35, 0xd4,
27661 ++ 0x1e, 0x5d, 0xa2, 0x10, 0xf6, 0x2f, 0x84, 0x12
27662 ++};
27663 ++static const u8 enc_output110[] __initconst = {
27664 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27665 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27666 ++ 0x58, 0x8e, 0xa8, 0x0a, 0xc1, 0x58, 0x3f, 0x43,
27667 ++ 0x4a, 0x80, 0x68, 0x13, 0xae, 0x2a, 0x4a, 0x9e,
27668 ++ 0xb6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27669 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27670 ++ 0x99, 0x8d, 0x38, 0x1a, 0xdb, 0x23, 0x59, 0xdd,
27671 ++ 0xba, 0xe7, 0x86, 0x53, 0x7d, 0x37, 0xb9, 0x00,
27672 ++ 0xb6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27673 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27674 ++ 0x99, 0x8d, 0x38, 0x1a, 0xdb, 0x23, 0x59, 0xdd,
27675 ++ 0xba, 0xe7, 0x86, 0x53, 0x7d, 0x37, 0xb9, 0x00,
27676 ++ 0x9f, 0x7a, 0xc4, 0x35, 0x1f, 0x6b, 0x91, 0xe6,
27677 ++ 0x30, 0x97, 0xa7, 0x13, 0x11, 0x5d, 0x05, 0xbe
27678 ++};
27679 ++static const u8 enc_assoc110[] __initconst = {
27680 ++ 0xff, 0xff, 0xff, 0xff
27681 ++};
27682 ++static const u8 enc_nonce110[] __initconst = {
27683 ++ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
27684 ++};
27685 ++static const u8 enc_key110[] __initconst = {
27686 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
27687 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
27688 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
27689 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
27690 ++};
27691 ++
27692 ++/* wycheproof - edge case intermediate sums in poly1305 */
27693 ++static const u8 enc_input111[] __initconst = {
27694 ++ 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
27695 ++ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
27696 ++ 0x13, 0xf8, 0x0a, 0x00, 0x6d, 0xc1, 0xbb, 0xda,
27697 ++ 0xd6, 0x39, 0xa9, 0x2f, 0xc7, 0xec, 0xa6, 0x55,
27698 ++ 0xf7, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
27699 ++ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
27700 ++ 0x63, 0x48, 0xb8, 0xfd, 0x29, 0xbf, 0x96, 0xd5,
27701 ++ 0x63, 0xa5, 0x17, 0xe2, 0x7d, 0x7b, 0xfc, 0x0f,
27702 ++ 0x2f, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
27703 ++ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
27704 ++ 0x2b, 0x6c, 0x89, 0x1d, 0x37, 0xc7, 0xe1, 0x1a,
27705 ++ 0x56, 0x41, 0x91, 0x9c, 0x49, 0x4d, 0x95, 0x16
27706 ++};
27707 ++static const u8 enc_output111[] __initconst = {
27708 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27709 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27710 ++ 0x94, 0x3a, 0xc0, 0x09, 0x81, 0xd8, 0x9d, 0x2c,
27711 ++ 0x14, 0xfe, 0xbf, 0xa5, 0xfb, 0x9c, 0xba, 0x12,
27712 ++ 0x97, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27713 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27714 ++ 0x00, 0x41, 0x70, 0x83, 0xa7, 0xaa, 0x8d, 0x13,
27715 ++ 0xf2, 0xfb, 0xb5, 0xdf, 0xc2, 0x55, 0xa8, 0x04,
27716 ++ 0x97, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27717 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27718 ++ 0x00, 0x41, 0x70, 0x83, 0xa7, 0xaa, 0x8d, 0x13,
27719 ++ 0xf2, 0xfb, 0xb5, 0xdf, 0xc2, 0x55, 0xa8, 0x04,
27720 ++ 0x9a, 0x18, 0xa8, 0x28, 0x07, 0x02, 0x69, 0xf4,
27721 ++ 0x47, 0x00, 0xd0, 0x09, 0xe7, 0x17, 0x1c, 0xc9
27722 ++};
27723 ++static const u8 enc_assoc111[] __initconst = {
27724 ++ 0xff, 0xff, 0xff, 0xff
27725 ++};
27726 ++static const u8 enc_nonce111[] __initconst = {
27727 ++ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
27728 ++};
27729 ++static const u8 enc_key111[] __initconst = {
27730 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
27731 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
27732 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
27733 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
27734 ++};
27735 ++
27736 ++/* wycheproof - edge case intermediate sums in poly1305 */
27737 ++static const u8 enc_input112[] __initconst = {
27738 ++ 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
27739 ++ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
27740 ++ 0x82, 0xe5, 0x9b, 0x45, 0x82, 0x91, 0x50, 0x38,
27741 ++ 0xf9, 0x33, 0x81, 0x1e, 0x65, 0x2d, 0xc6, 0x6a,
27742 ++ 0xfc, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
27743 ++ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
27744 ++ 0xb6, 0x71, 0xc8, 0xca, 0xc2, 0x70, 0xc2, 0x65,
27745 ++ 0xa0, 0xac, 0x2f, 0x53, 0x57, 0x99, 0x88, 0x0a,
27746 ++ 0x24, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
27747 ++ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
27748 ++ 0xfe, 0x55, 0xf9, 0x2a, 0xdc, 0x08, 0xb5, 0xaa,
27749 ++ 0x95, 0x48, 0xa9, 0x2d, 0x63, 0xaf, 0xe1, 0x13
27750 ++};
27751 ++static const u8 enc_output112[] __initconst = {
27752 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27753 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27754 ++ 0x05, 0x27, 0x51, 0x4c, 0x6e, 0x88, 0x76, 0xce,
27755 ++ 0x3b, 0xf4, 0x97, 0x94, 0x59, 0x5d, 0xda, 0x2d,
27756 ++ 0x9c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27757 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27758 ++ 0xd5, 0x78, 0x00, 0xb4, 0x4c, 0x65, 0xd9, 0xa3,
27759 ++ 0x31, 0xf2, 0x8d, 0x6e, 0xe8, 0xb7, 0xdc, 0x01,
27760 ++ 0x9c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27761 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27762 ++ 0xd5, 0x78, 0x00, 0xb4, 0x4c, 0x65, 0xd9, 0xa3,
27763 ++ 0x31, 0xf2, 0x8d, 0x6e, 0xe8, 0xb7, 0xdc, 0x01,
27764 ++ 0xb4, 0x36, 0xa8, 0x2b, 0x93, 0xd5, 0x55, 0xf7,
27765 ++ 0x43, 0x00, 0xd0, 0x19, 0x9b, 0xa7, 0x18, 0xce
27766 ++};
27767 ++static const u8 enc_assoc112[] __initconst = {
27768 ++ 0xff, 0xff, 0xff, 0xff
27769 ++};
27770 ++static const u8 enc_nonce112[] __initconst = {
27771 ++ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
27772 ++};
27773 ++static const u8 enc_key112[] __initconst = {
27774 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
27775 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
27776 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
27777 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
27778 ++};
27779 ++
27780 ++/* wycheproof - edge case intermediate sums in poly1305 */
27781 ++static const u8 enc_input113[] __initconst = {
27782 ++ 0xff, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
27783 ++ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
27784 ++ 0xf1, 0xd1, 0x28, 0x87, 0xb7, 0x21, 0x69, 0x86,
27785 ++ 0xa1, 0x2d, 0x79, 0x09, 0x8b, 0x6d, 0xe6, 0x0f,
27786 ++ 0xc0, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
27787 ++ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
27788 ++ 0xa7, 0xc7, 0x58, 0x99, 0xf3, 0xe6, 0x0a, 0xf1,
27789 ++ 0xfc, 0xb6, 0xc7, 0x30, 0x7d, 0x87, 0x59, 0x0f,
27790 ++ 0x18, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
27791 ++ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
27792 ++ 0xef, 0xe3, 0x69, 0x79, 0xed, 0x9e, 0x7d, 0x3e,
27793 ++ 0xc9, 0x52, 0x41, 0x4e, 0x49, 0xb1, 0x30, 0x16
27794 ++};
27795 ++static const u8 enc_output113[] __initconst = {
27796 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27797 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27798 ++ 0x76, 0x13, 0xe2, 0x8e, 0x5b, 0x38, 0x4f, 0x70,
27799 ++ 0x63, 0xea, 0x6f, 0x83, 0xb7, 0x1d, 0xfa, 0x48,
27800 ++ 0xa0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27801 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27802 ++ 0xc4, 0xce, 0x90, 0xe7, 0x7d, 0xf3, 0x11, 0x37,
27803 ++ 0x6d, 0xe8, 0x65, 0x0d, 0xc2, 0xa9, 0x0d, 0x04,
27804 ++ 0xa0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27805 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27806 ++ 0xc4, 0xce, 0x90, 0xe7, 0x7d, 0xf3, 0x11, 0x37,
27807 ++ 0x6d, 0xe8, 0x65, 0x0d, 0xc2, 0xa9, 0x0d, 0x04,
27808 ++ 0xce, 0x54, 0xa8, 0x2e, 0x1f, 0xa9, 0x42, 0xfa,
27809 ++ 0x3f, 0x00, 0xd0, 0x29, 0x4f, 0x37, 0x15, 0xd3
27810 ++};
27811 ++static const u8 enc_assoc113[] __initconst = {
27812 ++ 0xff, 0xff, 0xff, 0xff
27813 ++};
27814 ++static const u8 enc_nonce113[] __initconst = {
27815 ++ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
27816 ++};
27817 ++static const u8 enc_key113[] __initconst = {
27818 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
27819 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
27820 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
27821 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
27822 ++};
27823 ++
27824 ++/* wycheproof - edge case intermediate sums in poly1305 */
27825 ++static const u8 enc_input114[] __initconst = {
27826 ++ 0xcb, 0xf1, 0xda, 0x9e, 0x0b, 0xa9, 0x37, 0x73,
27827 ++ 0x74, 0xe6, 0x9e, 0x1c, 0x0e, 0x60, 0x0c, 0xfc,
27828 ++ 0x34, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
27829 ++ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
27830 ++ 0xbe, 0x3f, 0xa6, 0x6b, 0x6c, 0xe7, 0x80, 0x8a,
27831 ++ 0xa3, 0xe4, 0x59, 0x49, 0xf9, 0x44, 0x64, 0x9f,
27832 ++ 0xd0, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
27833 ++ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
27834 ++ 0x66, 0x68, 0xdb, 0xc8, 0xf5, 0xf2, 0x0e, 0xf2,
27835 ++ 0xb3, 0xf3, 0x8f, 0x00, 0xe2, 0x03, 0x17, 0x88
27836 ++};
27837 ++static const u8 enc_output114[] __initconst = {
27838 ++ 0xcb, 0x9a, 0x0d, 0xb1, 0x8d, 0x63, 0xd7, 0xea,
27839 ++ 0xd7, 0xc9, 0x60, 0xd6, 0xb2, 0x86, 0x74, 0x5f,
27840 ++ 0xb3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27841 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27842 ++ 0xde, 0xba, 0xb4, 0xa1, 0x58, 0x42, 0x50, 0xbf,
27843 ++ 0xfc, 0x2f, 0xc8, 0x4d, 0x95, 0xde, 0xcf, 0x04,
27844 ++ 0xb3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27845 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27846 ++ 0xde, 0xba, 0xb4, 0xa1, 0x58, 0x42, 0x50, 0xbf,
27847 ++ 0xfc, 0x2f, 0xc8, 0x4d, 0x95, 0xde, 0xcf, 0x04,
27848 ++ 0x23, 0x83, 0xab, 0x0b, 0x79, 0x92, 0x05, 0x69,
27849 ++ 0x9b, 0x51, 0x0a, 0xa7, 0x09, 0xbf, 0x31, 0xf1
27850 ++};
27851 ++static const u8 enc_assoc114[] __initconst = {
27852 ++ 0xff, 0xff, 0xff, 0xff
27853 ++};
27854 ++static const u8 enc_nonce114[] __initconst = {
27855 ++ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
27856 ++};
27857 ++static const u8 enc_key114[] __initconst = {
27858 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
27859 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
27860 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
27861 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
27862 ++};
27863 ++
27864 ++/* wycheproof - edge case intermediate sums in poly1305 */
27865 ++static const u8 enc_input115[] __initconst = {
27866 ++ 0x8f, 0x27, 0x86, 0x94, 0xc4, 0xe9, 0xda, 0xeb,
27867 ++ 0xd5, 0x8d, 0x3e, 0x5b, 0x96, 0x6e, 0x8b, 0x68,
27868 ++ 0x42, 0x3d, 0x35, 0xf6, 0x13, 0xe6, 0xd9, 0x09,
27869 ++ 0x3d, 0x38, 0xe9, 0x75, 0xc3, 0x8f, 0xe3, 0xb8,
27870 ++ 0x06, 0x53, 0xe7, 0xa3, 0x31, 0x71, 0x88, 0x33,
27871 ++ 0xac, 0xc3, 0xb9, 0xad, 0xff, 0x1c, 0x31, 0x98,
27872 ++ 0xa6, 0xf6, 0x37, 0x81, 0x71, 0xea, 0xe4, 0x39,
27873 ++ 0x6e, 0xa1, 0x5d, 0xc2, 0x40, 0xd1, 0xab, 0xf4,
27874 ++ 0xde, 0x04, 0x9a, 0x00, 0xa8, 0x64, 0x06, 0x4b,
27875 ++ 0xbc, 0xd4, 0x6f, 0xe4, 0xe4, 0x5b, 0x42, 0x8f
27876 ++};
27877 ++static const u8 enc_output115[] __initconst = {
27878 ++ 0x8f, 0x4c, 0x51, 0xbb, 0x42, 0x23, 0x3a, 0x72,
27879 ++ 0x76, 0xa2, 0xc0, 0x91, 0x2a, 0x88, 0xf3, 0xcb,
27880 ++ 0xc5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27881 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27882 ++ 0x66, 0xd6, 0xf5, 0x69, 0x05, 0xd4, 0x58, 0x06,
27883 ++ 0xf3, 0x08, 0x28, 0xa9, 0x93, 0x86, 0x9a, 0x03,
27884 ++ 0xc5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27885 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27886 ++ 0x66, 0xd6, 0xf5, 0x69, 0x05, 0xd4, 0x58, 0x06,
27887 ++ 0xf3, 0x08, 0x28, 0xa9, 0x93, 0x86, 0x9a, 0x03,
27888 ++ 0x8b, 0xfb, 0xab, 0x17, 0xa9, 0xe0, 0xb8, 0x74,
27889 ++ 0x8b, 0x51, 0x0a, 0xe7, 0xd9, 0xfd, 0x23, 0x05
27890 ++};
27891 ++static const u8 enc_assoc115[] __initconst = {
27892 ++ 0xff, 0xff, 0xff, 0xff
27893 ++};
27894 ++static const u8 enc_nonce115[] __initconst = {
27895 ++ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
27896 ++};
27897 ++static const u8 enc_key115[] __initconst = {
27898 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
27899 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
27900 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
27901 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
27902 ++};
27903 ++
27904 ++/* wycheproof - edge case intermediate sums in poly1305 */
27905 ++static const u8 enc_input116[] __initconst = {
27906 ++ 0xd5, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
27907 ++ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
27908 ++ 0x9a, 0x22, 0xd7, 0x0a, 0x48, 0xe2, 0x4f, 0xdd,
27909 ++ 0xcd, 0xd4, 0x41, 0x9d, 0xe6, 0x4c, 0x8f, 0x44,
27910 ++ 0xfc, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
27911 ++ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
27912 ++ 0x77, 0xb5, 0xc9, 0x07, 0xd9, 0xc9, 0xe1, 0xea,
27913 ++ 0x51, 0x85, 0x1a, 0x20, 0x4a, 0xad, 0x9f, 0x0a,
27914 ++ 0x24, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
27915 ++ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
27916 ++ 0x3f, 0x91, 0xf8, 0xe7, 0xc7, 0xb1, 0x96, 0x25,
27917 ++ 0x64, 0x61, 0x9c, 0x5e, 0x7e, 0x9b, 0xf6, 0x13
27918 ++};
27919 ++static const u8 enc_output116[] __initconst = {
27920 ++ 0xd5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27921 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27922 ++ 0x1d, 0xe0, 0x1d, 0x03, 0xa4, 0xfb, 0x69, 0x2b,
27923 ++ 0x0f, 0x13, 0x57, 0x17, 0xda, 0x3c, 0x93, 0x03,
27924 ++ 0x9c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27925 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27926 ++ 0x14, 0xbc, 0x01, 0x79, 0x57, 0xdc, 0xfa, 0x2c,
27927 ++ 0xc0, 0xdb, 0xb8, 0x1d, 0xf5, 0x83, 0xcb, 0x01,
27928 ++ 0x9c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27929 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27930 ++ 0x14, 0xbc, 0x01, 0x79, 0x57, 0xdc, 0xfa, 0x2c,
27931 ++ 0xc0, 0xdb, 0xb8, 0x1d, 0xf5, 0x83, 0xcb, 0x01,
27932 ++ 0x49, 0xbc, 0x6e, 0x9f, 0xc5, 0x1c, 0x4d, 0x50,
27933 ++ 0x30, 0x36, 0x64, 0x4d, 0x84, 0x27, 0x73, 0xd2
27934 ++};
27935 ++static const u8 enc_assoc116[] __initconst = {
27936 ++ 0xff, 0xff, 0xff, 0xff
27937 ++};
27938 ++static const u8 enc_nonce116[] __initconst = {
27939 ++ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
27940 ++};
27941 ++static const u8 enc_key116[] __initconst = {
27942 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
27943 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
27944 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
27945 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
27946 ++};
27947 ++
27948 ++/* wycheproof - edge case intermediate sums in poly1305 */
27949 ++static const u8 enc_input117[] __initconst = {
27950 ++ 0xdb, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
27951 ++ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
27952 ++ 0x75, 0xd5, 0x64, 0x3a, 0xa5, 0xaf, 0x93, 0x4d,
27953 ++ 0x8c, 0xce, 0x39, 0x2c, 0xc3, 0xee, 0xdb, 0x47,
27954 ++ 0xc0, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
27955 ++ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
27956 ++ 0x60, 0x1b, 0x5a, 0xd2, 0x06, 0x7f, 0x28, 0x06,
27957 ++ 0x6a, 0x8f, 0x32, 0x81, 0x71, 0x5b, 0xa8, 0x08,
27958 ++ 0x18, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
27959 ++ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
27960 ++ 0x28, 0x3f, 0x6b, 0x32, 0x18, 0x07, 0x5f, 0xc9,
27961 ++ 0x5f, 0x6b, 0xb4, 0xff, 0x45, 0x6d, 0xc1, 0x11
27962 ++};
27963 ++static const u8 enc_output117[] __initconst = {
27964 ++ 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27965 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27966 ++ 0xf2, 0x17, 0xae, 0x33, 0x49, 0xb6, 0xb5, 0xbb,
27967 ++ 0x4e, 0x09, 0x2f, 0xa6, 0xff, 0x9e, 0xc7, 0x00,
27968 ++ 0xa0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27969 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27970 ++ 0x03, 0x12, 0x92, 0xac, 0x88, 0x6a, 0x33, 0xc0,
27971 ++ 0xfb, 0xd1, 0x90, 0xbc, 0xce, 0x75, 0xfc, 0x03,
27972 ++ 0xa0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27973 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
27974 ++ 0x03, 0x12, 0x92, 0xac, 0x88, 0x6a, 0x33, 0xc0,
27975 ++ 0xfb, 0xd1, 0x90, 0xbc, 0xce, 0x75, 0xfc, 0x03,
27976 ++ 0x63, 0xda, 0x6e, 0xa2, 0x51, 0xf0, 0x39, 0x53,
27977 ++ 0x2c, 0x36, 0x64, 0x5d, 0x38, 0xb7, 0x6f, 0xd7
27978 ++};
27979 ++static const u8 enc_assoc117[] __initconst = {
27980 ++ 0xff, 0xff, 0xff, 0xff
27981 ++};
27982 ++static const u8 enc_nonce117[] __initconst = {
27983 ++ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
27984 ++};
27985 ++static const u8 enc_key117[] __initconst = {
27986 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
27987 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
27988 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
27989 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
27990 ++};
27991 ++
27992 ++/* wycheproof - edge case intermediate sums in poly1305 */
27993 ++static const u8 enc_input118[] __initconst = {
27994 ++ 0x93, 0x94, 0x28, 0xd0, 0x79, 0x35, 0x1f, 0x66,
27995 ++ 0x5c, 0xd0, 0x01, 0x35, 0x43, 0x19, 0x87, 0x5c,
27996 ++ 0x62, 0x48, 0x39, 0x60, 0x42, 0x16, 0xe4, 0x03,
27997 ++ 0xeb, 0xcc, 0x6a, 0xf5, 0x59, 0xec, 0x8b, 0x43,
27998 ++ 0x97, 0x7a, 0xed, 0x35, 0xcb, 0x5a, 0x2f, 0xca,
27999 ++ 0xa0, 0x34, 0x6e, 0xfb, 0x93, 0x65, 0x54, 0x64,
28000 ++ 0xd8, 0xc8, 0xc3, 0xfa, 0x1a, 0x9e, 0x47, 0x4a,
28001 ++ 0xbe, 0x52, 0xd0, 0x2c, 0x81, 0x87, 0xe9, 0x0f,
28002 ++ 0x4f, 0x2d, 0x90, 0x96, 0x52, 0x4f, 0xa1, 0xb2,
28003 ++ 0xb0, 0x23, 0xb8, 0xb2, 0x88, 0x22, 0x27, 0x73,
28004 ++ 0x90, 0xec, 0xf2, 0x1a, 0x04, 0xe6, 0x30, 0x85,
28005 ++ 0x8b, 0xb6, 0x56, 0x52, 0xb5, 0xb1, 0x80, 0x16
28006 ++};
28007 ++static const u8 enc_output118[] __initconst = {
28008 ++ 0x93, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
28009 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
28010 ++ 0xe5, 0x8a, 0xf3, 0x69, 0xae, 0x0f, 0xc2, 0xf5,
28011 ++ 0x29, 0x0b, 0x7c, 0x7f, 0x65, 0x9c, 0x97, 0x04,
28012 ++ 0xf7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
28013 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
28014 ++ 0xbb, 0xc1, 0x0b, 0x84, 0x94, 0x8b, 0x5c, 0x8c,
28015 ++ 0x2f, 0x0c, 0x72, 0x11, 0x3e, 0xa9, 0xbd, 0x04,
28016 ++ 0xf7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
28017 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
28018 ++ 0xbb, 0xc1, 0x0b, 0x84, 0x94, 0x8b, 0x5c, 0x8c,
28019 ++ 0x2f, 0x0c, 0x72, 0x11, 0x3e, 0xa9, 0xbd, 0x04,
28020 ++ 0x73, 0xeb, 0x27, 0x24, 0xb5, 0xc4, 0x05, 0xf0,
28021 ++ 0x4d, 0x00, 0xd0, 0xf1, 0x58, 0x40, 0xa1, 0xc1
28022 ++};
28023 ++static const u8 enc_assoc118[] __initconst = {
28024 ++ 0xff, 0xff, 0xff, 0xff
28025 ++};
28026 ++static const u8 enc_nonce118[] __initconst = {
28027 ++ 0x00, 0x00, 0x00, 0x00, 0x06, 0x4c, 0x2d, 0x52
28028 ++};
28029 ++static const u8 enc_key118[] __initconst = {
28030 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
28031 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
28032 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
28033 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
28034 ++};
28035 ++
28036 ++static const struct chacha20poly1305_testvec
28037 ++chacha20poly1305_enc_vectors[] __initconst = {
28038 ++ { enc_input001, enc_output001, enc_assoc001, enc_nonce001, enc_key001,
28039 ++ sizeof(enc_input001), sizeof(enc_assoc001), sizeof(enc_nonce001) },
28040 ++ { enc_input002, enc_output002, enc_assoc002, enc_nonce002, enc_key002,
28041 ++ sizeof(enc_input002), sizeof(enc_assoc002), sizeof(enc_nonce002) },
28042 ++ { enc_input003, enc_output003, enc_assoc003, enc_nonce003, enc_key003,
28043 ++ sizeof(enc_input003), sizeof(enc_assoc003), sizeof(enc_nonce003) },
28044 ++ { enc_input004, enc_output004, enc_assoc004, enc_nonce004, enc_key004,
28045 ++ sizeof(enc_input004), sizeof(enc_assoc004), sizeof(enc_nonce004) },
28046 ++ { enc_input005, enc_output005, enc_assoc005, enc_nonce005, enc_key005,
28047 ++ sizeof(enc_input005), sizeof(enc_assoc005), sizeof(enc_nonce005) },
28048 ++ { enc_input006, enc_output006, enc_assoc006, enc_nonce006, enc_key006,
28049 ++ sizeof(enc_input006), sizeof(enc_assoc006), sizeof(enc_nonce006) },
28050 ++ { enc_input007, enc_output007, enc_assoc007, enc_nonce007, enc_key007,
28051 ++ sizeof(enc_input007), sizeof(enc_assoc007), sizeof(enc_nonce007) },
28052 ++ { enc_input008, enc_output008, enc_assoc008, enc_nonce008, enc_key008,
28053 ++ sizeof(enc_input008), sizeof(enc_assoc008), sizeof(enc_nonce008) },
28054 ++ { enc_input009, enc_output009, enc_assoc009, enc_nonce009, enc_key009,
28055 ++ sizeof(enc_input009), sizeof(enc_assoc009), sizeof(enc_nonce009) },
28056 ++ { enc_input010, enc_output010, enc_assoc010, enc_nonce010, enc_key010,
28057 ++ sizeof(enc_input010), sizeof(enc_assoc010), sizeof(enc_nonce010) },
28058 ++ { enc_input011, enc_output011, enc_assoc011, enc_nonce011, enc_key011,
28059 ++ sizeof(enc_input011), sizeof(enc_assoc011), sizeof(enc_nonce011) },
28060 ++ { enc_input012, enc_output012, enc_assoc012, enc_nonce012, enc_key012,
28061 ++ sizeof(enc_input012), sizeof(enc_assoc012), sizeof(enc_nonce012) },
28062 ++ { enc_input053, enc_output053, enc_assoc053, enc_nonce053, enc_key053,
28063 ++ sizeof(enc_input053), sizeof(enc_assoc053), sizeof(enc_nonce053) },
28064 ++ { enc_input054, enc_output054, enc_assoc054, enc_nonce054, enc_key054,
28065 ++ sizeof(enc_input054), sizeof(enc_assoc054), sizeof(enc_nonce054) },
28066 ++ { enc_input055, enc_output055, enc_assoc055, enc_nonce055, enc_key055,
28067 ++ sizeof(enc_input055), sizeof(enc_assoc055), sizeof(enc_nonce055) },
28068 ++ { enc_input056, enc_output056, enc_assoc056, enc_nonce056, enc_key056,
28069 ++ sizeof(enc_input056), sizeof(enc_assoc056), sizeof(enc_nonce056) },
28070 ++ { enc_input057, enc_output057, enc_assoc057, enc_nonce057, enc_key057,
28071 ++ sizeof(enc_input057), sizeof(enc_assoc057), sizeof(enc_nonce057) },
28072 ++ { enc_input058, enc_output058, enc_assoc058, enc_nonce058, enc_key058,
28073 ++ sizeof(enc_input058), sizeof(enc_assoc058), sizeof(enc_nonce058) },
28074 ++ { enc_input059, enc_output059, enc_assoc059, enc_nonce059, enc_key059,
28075 ++ sizeof(enc_input059), sizeof(enc_assoc059), sizeof(enc_nonce059) },
28076 ++ { enc_input060, enc_output060, enc_assoc060, enc_nonce060, enc_key060,
28077 ++ sizeof(enc_input060), sizeof(enc_assoc060), sizeof(enc_nonce060) },
28078 ++ { enc_input061, enc_output061, enc_assoc061, enc_nonce061, enc_key061,
28079 ++ sizeof(enc_input061), sizeof(enc_assoc061), sizeof(enc_nonce061) },
28080 ++ { enc_input062, enc_output062, enc_assoc062, enc_nonce062, enc_key062,
28081 ++ sizeof(enc_input062), sizeof(enc_assoc062), sizeof(enc_nonce062) },
28082 ++ { enc_input063, enc_output063, enc_assoc063, enc_nonce063, enc_key063,
28083 ++ sizeof(enc_input063), sizeof(enc_assoc063), sizeof(enc_nonce063) },
28084 ++ { enc_input064, enc_output064, enc_assoc064, enc_nonce064, enc_key064,
28085 ++ sizeof(enc_input064), sizeof(enc_assoc064), sizeof(enc_nonce064) },
28086 ++ { enc_input065, enc_output065, enc_assoc065, enc_nonce065, enc_key065,
28087 ++ sizeof(enc_input065), sizeof(enc_assoc065), sizeof(enc_nonce065) },
28088 ++ { enc_input066, enc_output066, enc_assoc066, enc_nonce066, enc_key066,
28089 ++ sizeof(enc_input066), sizeof(enc_assoc066), sizeof(enc_nonce066) },
28090 ++ { enc_input067, enc_output067, enc_assoc067, enc_nonce067, enc_key067,
28091 ++ sizeof(enc_input067), sizeof(enc_assoc067), sizeof(enc_nonce067) },
28092 ++ { enc_input068, enc_output068, enc_assoc068, enc_nonce068, enc_key068,
28093 ++ sizeof(enc_input068), sizeof(enc_assoc068), sizeof(enc_nonce068) },
28094 ++ { enc_input069, enc_output069, enc_assoc069, enc_nonce069, enc_key069,
28095 ++ sizeof(enc_input069), sizeof(enc_assoc069), sizeof(enc_nonce069) },
28096 ++ { enc_input070, enc_output070, enc_assoc070, enc_nonce070, enc_key070,
28097 ++ sizeof(enc_input070), sizeof(enc_assoc070), sizeof(enc_nonce070) },
28098 ++ { enc_input071, enc_output071, enc_assoc071, enc_nonce071, enc_key071,
28099 ++ sizeof(enc_input071), sizeof(enc_assoc071), sizeof(enc_nonce071) },
28100 ++ { enc_input072, enc_output072, enc_assoc072, enc_nonce072, enc_key072,
28101 ++ sizeof(enc_input072), sizeof(enc_assoc072), sizeof(enc_nonce072) },
28102 ++ { enc_input073, enc_output073, enc_assoc073, enc_nonce073, enc_key073,
28103 ++ sizeof(enc_input073), sizeof(enc_assoc073), sizeof(enc_nonce073) },
28104 ++ { enc_input076, enc_output076, enc_assoc076, enc_nonce076, enc_key076,
28105 ++ sizeof(enc_input076), sizeof(enc_assoc076), sizeof(enc_nonce076) },
28106 ++ { enc_input077, enc_output077, enc_assoc077, enc_nonce077, enc_key077,
28107 ++ sizeof(enc_input077), sizeof(enc_assoc077), sizeof(enc_nonce077) },
28108 ++ { enc_input078, enc_output078, enc_assoc078, enc_nonce078, enc_key078,
28109 ++ sizeof(enc_input078), sizeof(enc_assoc078), sizeof(enc_nonce078) },
28110 ++ { enc_input079, enc_output079, enc_assoc079, enc_nonce079, enc_key079,
28111 ++ sizeof(enc_input079), sizeof(enc_assoc079), sizeof(enc_nonce079) },
28112 ++ { enc_input080, enc_output080, enc_assoc080, enc_nonce080, enc_key080,
28113 ++ sizeof(enc_input080), sizeof(enc_assoc080), sizeof(enc_nonce080) },
28114 ++ { enc_input081, enc_output081, enc_assoc081, enc_nonce081, enc_key081,
28115 ++ sizeof(enc_input081), sizeof(enc_assoc081), sizeof(enc_nonce081) },
28116 ++ { enc_input082, enc_output082, enc_assoc082, enc_nonce082, enc_key082,
28117 ++ sizeof(enc_input082), sizeof(enc_assoc082), sizeof(enc_nonce082) },
28118 ++ { enc_input083, enc_output083, enc_assoc083, enc_nonce083, enc_key083,
28119 ++ sizeof(enc_input083), sizeof(enc_assoc083), sizeof(enc_nonce083) },
28120 ++ { enc_input084, enc_output084, enc_assoc084, enc_nonce084, enc_key084,
28121 ++ sizeof(enc_input084), sizeof(enc_assoc084), sizeof(enc_nonce084) },
28122 ++ { enc_input085, enc_output085, enc_assoc085, enc_nonce085, enc_key085,
28123 ++ sizeof(enc_input085), sizeof(enc_assoc085), sizeof(enc_nonce085) },
28124 ++ { enc_input093, enc_output093, enc_assoc093, enc_nonce093, enc_key093,
28125 ++ sizeof(enc_input093), sizeof(enc_assoc093), sizeof(enc_nonce093) },
28126 ++ { enc_input094, enc_output094, enc_assoc094, enc_nonce094, enc_key094,
28127 ++ sizeof(enc_input094), sizeof(enc_assoc094), sizeof(enc_nonce094) },
28128 ++ { enc_input095, enc_output095, enc_assoc095, enc_nonce095, enc_key095,
28129 ++ sizeof(enc_input095), sizeof(enc_assoc095), sizeof(enc_nonce095) },
28130 ++ { enc_input096, enc_output096, enc_assoc096, enc_nonce096, enc_key096,
28131 ++ sizeof(enc_input096), sizeof(enc_assoc096), sizeof(enc_nonce096) },
28132 ++ { enc_input097, enc_output097, enc_assoc097, enc_nonce097, enc_key097,
28133 ++ sizeof(enc_input097), sizeof(enc_assoc097), sizeof(enc_nonce097) },
28134 ++ { enc_input098, enc_output098, enc_assoc098, enc_nonce098, enc_key098,
28135 ++ sizeof(enc_input098), sizeof(enc_assoc098), sizeof(enc_nonce098) },
28136 ++ { enc_input099, enc_output099, enc_assoc099, enc_nonce099, enc_key099,
28137 ++ sizeof(enc_input099), sizeof(enc_assoc099), sizeof(enc_nonce099) },
28138 ++ { enc_input100, enc_output100, enc_assoc100, enc_nonce100, enc_key100,
28139 ++ sizeof(enc_input100), sizeof(enc_assoc100), sizeof(enc_nonce100) },
28140 ++ { enc_input101, enc_output101, enc_assoc101, enc_nonce101, enc_key101,
28141 ++ sizeof(enc_input101), sizeof(enc_assoc101), sizeof(enc_nonce101) },
28142 ++ { enc_input102, enc_output102, enc_assoc102, enc_nonce102, enc_key102,
28143 ++ sizeof(enc_input102), sizeof(enc_assoc102), sizeof(enc_nonce102) },
28144 ++ { enc_input103, enc_output103, enc_assoc103, enc_nonce103, enc_key103,
28145 ++ sizeof(enc_input103), sizeof(enc_assoc103), sizeof(enc_nonce103) },
28146 ++ { enc_input104, enc_output104, enc_assoc104, enc_nonce104, enc_key104,
28147 ++ sizeof(enc_input104), sizeof(enc_assoc104), sizeof(enc_nonce104) },
28148 ++ { enc_input105, enc_output105, enc_assoc105, enc_nonce105, enc_key105,
28149 ++ sizeof(enc_input105), sizeof(enc_assoc105), sizeof(enc_nonce105) },
28150 ++ { enc_input106, enc_output106, enc_assoc106, enc_nonce106, enc_key106,
28151 ++ sizeof(enc_input106), sizeof(enc_assoc106), sizeof(enc_nonce106) },
28152 ++ { enc_input107, enc_output107, enc_assoc107, enc_nonce107, enc_key107,
28153 ++ sizeof(enc_input107), sizeof(enc_assoc107), sizeof(enc_nonce107) },
28154 ++ { enc_input108, enc_output108, enc_assoc108, enc_nonce108, enc_key108,
28155 ++ sizeof(enc_input108), sizeof(enc_assoc108), sizeof(enc_nonce108) },
28156 ++ { enc_input109, enc_output109, enc_assoc109, enc_nonce109, enc_key109,
28157 ++ sizeof(enc_input109), sizeof(enc_assoc109), sizeof(enc_nonce109) },
28158 ++ { enc_input110, enc_output110, enc_assoc110, enc_nonce110, enc_key110,
28159 ++ sizeof(enc_input110), sizeof(enc_assoc110), sizeof(enc_nonce110) },
28160 ++ { enc_input111, enc_output111, enc_assoc111, enc_nonce111, enc_key111,
28161 ++ sizeof(enc_input111), sizeof(enc_assoc111), sizeof(enc_nonce111) },
28162 ++ { enc_input112, enc_output112, enc_assoc112, enc_nonce112, enc_key112,
28163 ++ sizeof(enc_input112), sizeof(enc_assoc112), sizeof(enc_nonce112) },
28164 ++ { enc_input113, enc_output113, enc_assoc113, enc_nonce113, enc_key113,
28165 ++ sizeof(enc_input113), sizeof(enc_assoc113), sizeof(enc_nonce113) },
28166 ++ { enc_input114, enc_output114, enc_assoc114, enc_nonce114, enc_key114,
28167 ++ sizeof(enc_input114), sizeof(enc_assoc114), sizeof(enc_nonce114) },
28168 ++ { enc_input115, enc_output115, enc_assoc115, enc_nonce115, enc_key115,
28169 ++ sizeof(enc_input115), sizeof(enc_assoc115), sizeof(enc_nonce115) },
28170 ++ { enc_input116, enc_output116, enc_assoc116, enc_nonce116, enc_key116,
28171 ++ sizeof(enc_input116), sizeof(enc_assoc116), sizeof(enc_nonce116) },
28172 ++ { enc_input117, enc_output117, enc_assoc117, enc_nonce117, enc_key117,
28173 ++ sizeof(enc_input117), sizeof(enc_assoc117), sizeof(enc_nonce117) },
28174 ++ { enc_input118, enc_output118, enc_assoc118, enc_nonce118, enc_key118,
28175 ++ sizeof(enc_input118), sizeof(enc_assoc118), sizeof(enc_nonce118) }
28176 ++};
28177 ++
28178 ++static const u8 dec_input001[] __initconst = {
28179 ++ 0x64, 0xa0, 0x86, 0x15, 0x75, 0x86, 0x1a, 0xf4,
28180 ++ 0x60, 0xf0, 0x62, 0xc7, 0x9b, 0xe6, 0x43, 0xbd,
28181 ++ 0x5e, 0x80, 0x5c, 0xfd, 0x34, 0x5c, 0xf3, 0x89,
28182 ++ 0xf1, 0x08, 0x67, 0x0a, 0xc7, 0x6c, 0x8c, 0xb2,
28183 ++ 0x4c, 0x6c, 0xfc, 0x18, 0x75, 0x5d, 0x43, 0xee,
28184 ++ 0xa0, 0x9e, 0xe9, 0x4e, 0x38, 0x2d, 0x26, 0xb0,
28185 ++ 0xbd, 0xb7, 0xb7, 0x3c, 0x32, 0x1b, 0x01, 0x00,
28186 ++ 0xd4, 0xf0, 0x3b, 0x7f, 0x35, 0x58, 0x94, 0xcf,
28187 ++ 0x33, 0x2f, 0x83, 0x0e, 0x71, 0x0b, 0x97, 0xce,
28188 ++ 0x98, 0xc8, 0xa8, 0x4a, 0xbd, 0x0b, 0x94, 0x81,
28189 ++ 0x14, 0xad, 0x17, 0x6e, 0x00, 0x8d, 0x33, 0xbd,
28190 ++ 0x60, 0xf9, 0x82, 0xb1, 0xff, 0x37, 0xc8, 0x55,
28191 ++ 0x97, 0x97, 0xa0, 0x6e, 0xf4, 0xf0, 0xef, 0x61,
28192 ++ 0xc1, 0x86, 0x32, 0x4e, 0x2b, 0x35, 0x06, 0x38,
28193 ++ 0x36, 0x06, 0x90, 0x7b, 0x6a, 0x7c, 0x02, 0xb0,
28194 ++ 0xf9, 0xf6, 0x15, 0x7b, 0x53, 0xc8, 0x67, 0xe4,
28195 ++ 0xb9, 0x16, 0x6c, 0x76, 0x7b, 0x80, 0x4d, 0x46,
28196 ++ 0xa5, 0x9b, 0x52, 0x16, 0xcd, 0xe7, 0xa4, 0xe9,
28197 ++ 0x90, 0x40, 0xc5, 0xa4, 0x04, 0x33, 0x22, 0x5e,
28198 ++ 0xe2, 0x82, 0xa1, 0xb0, 0xa0, 0x6c, 0x52, 0x3e,
28199 ++ 0xaf, 0x45, 0x34, 0xd7, 0xf8, 0x3f, 0xa1, 0x15,
28200 ++ 0x5b, 0x00, 0x47, 0x71, 0x8c, 0xbc, 0x54, 0x6a,
28201 ++ 0x0d, 0x07, 0x2b, 0x04, 0xb3, 0x56, 0x4e, 0xea,
28202 ++ 0x1b, 0x42, 0x22, 0x73, 0xf5, 0x48, 0x27, 0x1a,
28203 ++ 0x0b, 0xb2, 0x31, 0x60, 0x53, 0xfa, 0x76, 0x99,
28204 ++ 0x19, 0x55, 0xeb, 0xd6, 0x31, 0x59, 0x43, 0x4e,
28205 ++ 0xce, 0xbb, 0x4e, 0x46, 0x6d, 0xae, 0x5a, 0x10,
28206 ++ 0x73, 0xa6, 0x72, 0x76, 0x27, 0x09, 0x7a, 0x10,
28207 ++ 0x49, 0xe6, 0x17, 0xd9, 0x1d, 0x36, 0x10, 0x94,
28208 ++ 0xfa, 0x68, 0xf0, 0xff, 0x77, 0x98, 0x71, 0x30,
28209 ++ 0x30, 0x5b, 0xea, 0xba, 0x2e, 0xda, 0x04, 0xdf,
28210 ++ 0x99, 0x7b, 0x71, 0x4d, 0x6c, 0x6f, 0x2c, 0x29,
28211 ++ 0xa6, 0xad, 0x5c, 0xb4, 0x02, 0x2b, 0x02, 0x70,
28212 ++ 0x9b, 0xee, 0xad, 0x9d, 0x67, 0x89, 0x0c, 0xbb,
28213 ++ 0x22, 0x39, 0x23, 0x36, 0xfe, 0xa1, 0x85, 0x1f,
28214 ++ 0x38
28215 ++};
28216 ++static const u8 dec_output001[] __initconst = {
28217 ++ 0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74,
28218 ++ 0x2d, 0x44, 0x72, 0x61, 0x66, 0x74, 0x73, 0x20,
28219 ++ 0x61, 0x72, 0x65, 0x20, 0x64, 0x72, 0x61, 0x66,
28220 ++ 0x74, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65,
28221 ++ 0x6e, 0x74, 0x73, 0x20, 0x76, 0x61, 0x6c, 0x69,
28222 ++ 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x20,
28223 ++ 0x6d, 0x61, 0x78, 0x69, 0x6d, 0x75, 0x6d, 0x20,
28224 ++ 0x6f, 0x66, 0x20, 0x73, 0x69, 0x78, 0x20, 0x6d,
28225 ++ 0x6f, 0x6e, 0x74, 0x68, 0x73, 0x20, 0x61, 0x6e,
28226 ++ 0x64, 0x20, 0x6d, 0x61, 0x79, 0x20, 0x62, 0x65,
28227 ++ 0x20, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64,
28228 ++ 0x2c, 0x20, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63,
28229 ++ 0x65, 0x64, 0x2c, 0x20, 0x6f, 0x72, 0x20, 0x6f,
28230 ++ 0x62, 0x73, 0x6f, 0x6c, 0x65, 0x74, 0x65, 0x64,
28231 ++ 0x20, 0x62, 0x79, 0x20, 0x6f, 0x74, 0x68, 0x65,
28232 ++ 0x72, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65,
28233 ++ 0x6e, 0x74, 0x73, 0x20, 0x61, 0x74, 0x20, 0x61,
28234 ++ 0x6e, 0x79, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x2e,
28235 ++ 0x20, 0x49, 0x74, 0x20, 0x69, 0x73, 0x20, 0x69,
28236 ++ 0x6e, 0x61, 0x70, 0x70, 0x72, 0x6f, 0x70, 0x72,
28237 ++ 0x69, 0x61, 0x74, 0x65, 0x20, 0x74, 0x6f, 0x20,
28238 ++ 0x75, 0x73, 0x65, 0x20, 0x49, 0x6e, 0x74, 0x65,
28239 ++ 0x72, 0x6e, 0x65, 0x74, 0x2d, 0x44, 0x72, 0x61,
28240 ++ 0x66, 0x74, 0x73, 0x20, 0x61, 0x73, 0x20, 0x72,
28241 ++ 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65,
28242 ++ 0x20, 0x6d, 0x61, 0x74, 0x65, 0x72, 0x69, 0x61,
28243 ++ 0x6c, 0x20, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20,
28244 ++ 0x63, 0x69, 0x74, 0x65, 0x20, 0x74, 0x68, 0x65,
28245 ++ 0x6d, 0x20, 0x6f, 0x74, 0x68, 0x65, 0x72, 0x20,
28246 ++ 0x74, 0x68, 0x61, 0x6e, 0x20, 0x61, 0x73, 0x20,
28247 ++ 0x2f, 0xe2, 0x80, 0x9c, 0x77, 0x6f, 0x72, 0x6b,
28248 ++ 0x20, 0x69, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x67,
28249 ++ 0x72, 0x65, 0x73, 0x73, 0x2e, 0x2f, 0xe2, 0x80,
28250 ++ 0x9d
28251 ++};
28252 ++static const u8 dec_assoc001[] __initconst = {
28253 ++ 0xf3, 0x33, 0x88, 0x86, 0x00, 0x00, 0x00, 0x00,
28254 ++ 0x00, 0x00, 0x4e, 0x91
28255 ++};
28256 ++static const u8 dec_nonce001[] __initconst = {
28257 ++ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08
28258 ++};
28259 ++static const u8 dec_key001[] __initconst = {
28260 ++ 0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a,
28261 ++ 0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0,
28262 ++ 0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09,
28263 ++ 0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0
28264 ++};
28265 ++
28266 ++static const u8 dec_input002[] __initconst = {
28267 ++ 0xea, 0xe0, 0x1e, 0x9e, 0x2c, 0x91, 0xaa, 0xe1,
28268 ++ 0xdb, 0x5d, 0x99, 0x3f, 0x8a, 0xf7, 0x69, 0x92
28269 ++};
28270 ++static const u8 dec_output002[] __initconst = { };
28271 ++static const u8 dec_assoc002[] __initconst = { };
28272 ++static const u8 dec_nonce002[] __initconst = {
28273 ++ 0xca, 0xbf, 0x33, 0x71, 0x32, 0x45, 0x77, 0x8e
28274 ++};
28275 ++static const u8 dec_key002[] __initconst = {
28276 ++ 0x4c, 0xf5, 0x96, 0x83, 0x38, 0xe6, 0xae, 0x7f,
28277 ++ 0x2d, 0x29, 0x25, 0x76, 0xd5, 0x75, 0x27, 0x86,
28278 ++ 0x91, 0x9a, 0x27, 0x7a, 0xfb, 0x46, 0xc5, 0xef,
28279 ++ 0x94, 0x81, 0x79, 0x57, 0x14, 0x59, 0x40, 0x68
28280 ++};
28281 ++
28282 ++static const u8 dec_input003[] __initconst = {
28283 ++ 0xdd, 0x6b, 0x3b, 0x82, 0xce, 0x5a, 0xbd, 0xd6,
28284 ++ 0xa9, 0x35, 0x83, 0xd8, 0x8c, 0x3d, 0x85, 0x77
28285 ++};
28286 ++static const u8 dec_output003[] __initconst = { };
28287 ++static const u8 dec_assoc003[] __initconst = {
28288 ++ 0x33, 0x10, 0x41, 0x12, 0x1f, 0xf3, 0xd2, 0x6b
28289 ++};
28290 ++static const u8 dec_nonce003[] __initconst = {
28291 ++ 0x3d, 0x86, 0xb5, 0x6b, 0xc8, 0xa3, 0x1f, 0x1d
28292 ++};
28293 ++static const u8 dec_key003[] __initconst = {
28294 ++ 0x2d, 0xb0, 0x5d, 0x40, 0xc8, 0xed, 0x44, 0x88,
28295 ++ 0x34, 0xd1, 0x13, 0xaf, 0x57, 0xa1, 0xeb, 0x3a,
28296 ++ 0x2a, 0x80, 0x51, 0x36, 0xec, 0x5b, 0xbc, 0x08,
28297 ++ 0x93, 0x84, 0x21, 0xb5, 0x13, 0x88, 0x3c, 0x0d
28298 ++};
28299 ++
28300 ++static const u8 dec_input004[] __initconst = {
28301 ++ 0xb7, 0x1b, 0xb0, 0x73, 0x59, 0xb0, 0x84, 0xb2,
28302 ++ 0x6d, 0x8e, 0xab, 0x94, 0x31, 0xa1, 0xae, 0xac,
28303 ++ 0x89
28304 ++};
28305 ++static const u8 dec_output004[] __initconst = {
28306 ++ 0xa4
28307 ++};
28308 ++static const u8 dec_assoc004[] __initconst = {
28309 ++ 0x6a, 0xe2, 0xad, 0x3f, 0x88, 0x39, 0x5a, 0x40
28310 ++};
28311 ++static const u8 dec_nonce004[] __initconst = {
28312 ++ 0xd2, 0x32, 0x1f, 0x29, 0x28, 0xc6, 0xc4, 0xc4
28313 ++};
28314 ++static const u8 dec_key004[] __initconst = {
28315 ++ 0x4b, 0x28, 0x4b, 0xa3, 0x7b, 0xbe, 0xe9, 0xf8,
28316 ++ 0x31, 0x80, 0x82, 0xd7, 0xd8, 0xe8, 0xb5, 0xa1,
28317 ++ 0xe2, 0x18, 0x18, 0x8a, 0x9c, 0xfa, 0xa3, 0x3d,
28318 ++ 0x25, 0x71, 0x3e, 0x40, 0xbc, 0x54, 0x7a, 0x3e
28319 ++};
28320 ++
28321 ++static const u8 dec_input005[] __initconst = {
28322 ++ 0xbf, 0xe1, 0x5b, 0x0b, 0xdb, 0x6b, 0xf5, 0x5e,
28323 ++ 0x6c, 0x5d, 0x84, 0x44, 0x39, 0x81, 0xc1, 0x9c,
28324 ++ 0xac
28325 ++};
28326 ++static const u8 dec_output005[] __initconst = {
28327 ++ 0x2d
28328 ++};
28329 ++static const u8 dec_assoc005[] __initconst = { };
28330 ++static const u8 dec_nonce005[] __initconst = {
28331 ++ 0x20, 0x1c, 0xaa, 0x5f, 0x9c, 0xbf, 0x92, 0x30
28332 ++};
28333 ++static const u8 dec_key005[] __initconst = {
28334 ++ 0x66, 0xca, 0x9c, 0x23, 0x2a, 0x4b, 0x4b, 0x31,
28335 ++ 0x0e, 0x92, 0x89, 0x8b, 0xf4, 0x93, 0xc7, 0x87,
28336 ++ 0x98, 0xa3, 0xd8, 0x39, 0xf8, 0xf4, 0xa7, 0x01,
28337 ++ 0xc0, 0x2e, 0x0a, 0xa6, 0x7e, 0x5a, 0x78, 0x87
28338 ++};
28339 ++
28340 ++static const u8 dec_input006[] __initconst = {
28341 ++ 0x8b, 0x06, 0xd3, 0x31, 0xb0, 0x93, 0x45, 0xb1,
28342 ++ 0x75, 0x6e, 0x26, 0xf9, 0x67, 0xbc, 0x90, 0x15,
28343 ++ 0x81, 0x2c, 0xb5, 0xf0, 0xc6, 0x2b, 0xc7, 0x8c,
28344 ++ 0x56, 0xd1, 0xbf, 0x69, 0x6c, 0x07, 0xa0, 0xda,
28345 ++ 0x65, 0x27, 0xc9, 0x90, 0x3d, 0xef, 0x4b, 0x11,
28346 ++ 0x0f, 0x19, 0x07, 0xfd, 0x29, 0x92, 0xd9, 0xc8,
28347 ++ 0xf7, 0x99, 0x2e, 0x4a, 0xd0, 0xb8, 0x2c, 0xdc,
28348 ++ 0x93, 0xf5, 0x9e, 0x33, 0x78, 0xd1, 0x37, 0xc3,
28349 ++ 0x66, 0xd7, 0x5e, 0xbc, 0x44, 0xbf, 0x53, 0xa5,
28350 ++ 0xbc, 0xc4, 0xcb, 0x7b, 0x3a, 0x8e, 0x7f, 0x02,
28351 ++ 0xbd, 0xbb, 0xe7, 0xca, 0xa6, 0x6c, 0x6b, 0x93,
28352 ++ 0x21, 0x93, 0x10, 0x61, 0xe7, 0x69, 0xd0, 0x78,
28353 ++ 0xf3, 0x07, 0x5a, 0x1a, 0x8f, 0x73, 0xaa, 0xb1,
28354 ++ 0x4e, 0xd3, 0xda, 0x4f, 0xf3, 0x32, 0xe1, 0x66,
28355 ++ 0x3e, 0x6c, 0xc6, 0x13, 0xba, 0x06, 0x5b, 0xfc,
28356 ++ 0x6a, 0xe5, 0x6f, 0x60, 0xfb, 0x07, 0x40, 0xb0,
28357 ++ 0x8c, 0x9d, 0x84, 0x43, 0x6b, 0xc1, 0xf7, 0x8d,
28358 ++ 0x8d, 0x31, 0xf7, 0x7a, 0x39, 0x4d, 0x8f, 0x9a,
28359 ++ 0xeb
28360 ++};
28361 ++static const u8 dec_output006[] __initconst = {
28362 ++ 0x33, 0x2f, 0x94, 0xc1, 0xa4, 0xef, 0xcc, 0x2a,
28363 ++ 0x5b, 0xa6, 0xe5, 0x8f, 0x1d, 0x40, 0xf0, 0x92,
28364 ++ 0x3c, 0xd9, 0x24, 0x11, 0xa9, 0x71, 0xf9, 0x37,
28365 ++ 0x14, 0x99, 0xfa, 0xbe, 0xe6, 0x80, 0xde, 0x50,
28366 ++ 0xc9, 0x96, 0xd4, 0xb0, 0xec, 0x9e, 0x17, 0xec,
28367 ++ 0xd2, 0x5e, 0x72, 0x99, 0xfc, 0x0a, 0xe1, 0xcb,
28368 ++ 0x48, 0xd2, 0x85, 0xdd, 0x2f, 0x90, 0xe0, 0x66,
28369 ++ 0x3b, 0xe6, 0x20, 0x74, 0xbe, 0x23, 0x8f, 0xcb,
28370 ++ 0xb4, 0xe4, 0xda, 0x48, 0x40, 0xa6, 0xd1, 0x1b,
28371 ++ 0xc7, 0x42, 0xce, 0x2f, 0x0c, 0xa6, 0x85, 0x6e,
28372 ++ 0x87, 0x37, 0x03, 0xb1, 0x7c, 0x25, 0x96, 0xa3,
28373 ++ 0x05, 0xd8, 0xb0, 0xf4, 0xed, 0xea, 0xc2, 0xf0,
28374 ++ 0x31, 0x98, 0x6c, 0xd1, 0x14, 0x25, 0xc0, 0xcb,
28375 ++ 0x01, 0x74, 0xd0, 0x82, 0xf4, 0x36, 0xf5, 0x41,
28376 ++ 0xd5, 0xdc, 0xca, 0xc5, 0xbb, 0x98, 0xfe, 0xfc,
28377 ++ 0x69, 0x21, 0x70, 0xd8, 0xa4, 0x4b, 0xc8, 0xde,
28378 ++ 0x8f
28379 ++};
28380 ++static const u8 dec_assoc006[] __initconst = {
28381 ++ 0x70, 0xd3, 0x33, 0xf3, 0x8b, 0x18, 0x0b
28382 ++};
28383 ++static const u8 dec_nonce006[] __initconst = {
28384 ++ 0xdf, 0x51, 0x84, 0x82, 0x42, 0x0c, 0x75, 0x9c
28385 ++};
28386 ++static const u8 dec_key006[] __initconst = {
28387 ++ 0x68, 0x7b, 0x8d, 0x8e, 0xe3, 0xc4, 0xdd, 0xae,
28388 ++ 0xdf, 0x72, 0x7f, 0x53, 0x72, 0x25, 0x1e, 0x78,
28389 ++ 0x91, 0xcb, 0x69, 0x76, 0x1f, 0x49, 0x93, 0xf9,
28390 ++ 0x6f, 0x21, 0xcc, 0x39, 0x9c, 0xad, 0xb1, 0x01
28391 ++};
28392 ++
28393 ++static const u8 dec_input007[] __initconst = {
28394 ++ 0x85, 0x04, 0xc2, 0xed, 0x8d, 0xfd, 0x97, 0x5c,
28395 ++ 0xd2, 0xb7, 0xe2, 0xc1, 0x6b, 0xa3, 0xba, 0xf8,
28396 ++ 0xc9, 0x50, 0xc3, 0xc6, 0xa5, 0xe3, 0xa4, 0x7c,
28397 ++ 0xc3, 0x23, 0x49, 0x5e, 0xa9, 0xb9, 0x32, 0xeb,
28398 ++ 0x8a, 0x7c, 0xca, 0xe5, 0xec, 0xfb, 0x7c, 0xc0,
28399 ++ 0xcb, 0x7d, 0xdc, 0x2c, 0x9d, 0x92, 0x55, 0x21,
28400 ++ 0x0a, 0xc8, 0x43, 0x63, 0x59, 0x0a, 0x31, 0x70,
28401 ++ 0x82, 0x67, 0x41, 0x03, 0xf8, 0xdf, 0xf2, 0xac,
28402 ++ 0xa7, 0x02, 0xd4, 0xd5, 0x8a, 0x2d, 0xc8, 0x99,
28403 ++ 0x19, 0x66, 0xd0, 0xf6, 0x88, 0x2c, 0x77, 0xd9,
28404 ++ 0xd4, 0x0d, 0x6c, 0xbd, 0x98, 0xde, 0xe7, 0x7f,
28405 ++ 0xad, 0x7e, 0x8a, 0xfb, 0xe9, 0x4b, 0xe5, 0xf7,
28406 ++ 0xe5, 0x50, 0xa0, 0x90, 0x3f, 0xd6, 0x22, 0x53,
28407 ++ 0xe3, 0xfe, 0x1b, 0xcc, 0x79, 0x3b, 0xec, 0x12,
28408 ++ 0x47, 0x52, 0xa7, 0xd6, 0x04, 0xe3, 0x52, 0xe6,
28409 ++ 0x93, 0x90, 0x91, 0x32, 0x73, 0x79, 0xb8, 0xd0,
28410 ++ 0x31, 0xde, 0x1f, 0x9f, 0x2f, 0x05, 0x38, 0x54,
28411 ++ 0x2f, 0x35, 0x04, 0x39, 0xe0, 0xa7, 0xba, 0xc6,
28412 ++ 0x52, 0xf6, 0x37, 0x65, 0x4c, 0x07, 0xa9, 0x7e,
28413 ++ 0xb3, 0x21, 0x6f, 0x74, 0x8c, 0xc9, 0xde, 0xdb,
28414 ++ 0x65, 0x1b, 0x9b, 0xaa, 0x60, 0xb1, 0x03, 0x30,
28415 ++ 0x6b, 0xb2, 0x03, 0xc4, 0x1c, 0x04, 0xf8, 0x0f,
28416 ++ 0x64, 0xaf, 0x46, 0xe4, 0x65, 0x99, 0x49, 0xe2,
28417 ++ 0xea, 0xce, 0x78, 0x00, 0xd8, 0x8b, 0xd5, 0x2e,
28418 ++ 0xcf, 0xfc, 0x40, 0x49, 0xe8, 0x58, 0xdc, 0x34,
28419 ++ 0x9c, 0x8c, 0x61, 0xbf, 0x0a, 0x8e, 0xec, 0x39,
28420 ++ 0xa9, 0x30, 0x05, 0x5a, 0xd2, 0x56, 0x01, 0xc7,
28421 ++ 0xda, 0x8f, 0x4e, 0xbb, 0x43, 0xa3, 0x3a, 0xf9,
28422 ++ 0x15, 0x2a, 0xd0, 0xa0, 0x7a, 0x87, 0x34, 0x82,
28423 ++ 0xfe, 0x8a, 0xd1, 0x2d, 0x5e, 0xc7, 0xbf, 0x04,
28424 ++ 0x53, 0x5f, 0x3b, 0x36, 0xd4, 0x25, 0x5c, 0x34,
28425 ++ 0x7a, 0x8d, 0xd5, 0x05, 0xce, 0x72, 0xca, 0xef,
28426 ++ 0x7a, 0x4b, 0xbc, 0xb0, 0x10, 0x5c, 0x96, 0x42,
28427 ++ 0x3a, 0x00, 0x98, 0xcd, 0x15, 0xe8, 0xb7, 0x53
28428 ++};
28429 ++static const u8 dec_output007[] __initconst = {
28430 ++ 0x9b, 0x18, 0xdb, 0xdd, 0x9a, 0x0f, 0x3e, 0xa5,
28431 ++ 0x15, 0x17, 0xde, 0xdf, 0x08, 0x9d, 0x65, 0x0a,
28432 ++ 0x67, 0x30, 0x12, 0xe2, 0x34, 0x77, 0x4b, 0xc1,
28433 ++ 0xd9, 0xc6, 0x1f, 0xab, 0xc6, 0x18, 0x50, 0x17,
28434 ++ 0xa7, 0x9d, 0x3c, 0xa6, 0xc5, 0x35, 0x8c, 0x1c,
28435 ++ 0xc0, 0xa1, 0x7c, 0x9f, 0x03, 0x89, 0xca, 0xe1,
28436 ++ 0xe6, 0xe9, 0xd4, 0xd3, 0x88, 0xdb, 0xb4, 0x51,
28437 ++ 0x9d, 0xec, 0xb4, 0xfc, 0x52, 0xee, 0x6d, 0xf1,
28438 ++ 0x75, 0x42, 0xc6, 0xfd, 0xbd, 0x7a, 0x8e, 0x86,
28439 ++ 0xfc, 0x44, 0xb3, 0x4f, 0xf3, 0xea, 0x67, 0x5a,
28440 ++ 0x41, 0x13, 0xba, 0xb0, 0xdc, 0xe1, 0xd3, 0x2a,
28441 ++ 0x7c, 0x22, 0xb3, 0xca, 0xac, 0x6a, 0x37, 0x98,
28442 ++ 0x3e, 0x1d, 0x40, 0x97, 0xf7, 0x9b, 0x1d, 0x36,
28443 ++ 0x6b, 0xb3, 0x28, 0xbd, 0x60, 0x82, 0x47, 0x34,
28444 ++ 0xaa, 0x2f, 0x7d, 0xe9, 0xa8, 0x70, 0x81, 0x57,
28445 ++ 0xd4, 0xb9, 0x77, 0x0a, 0x9d, 0x29, 0xa7, 0x84,
28446 ++ 0x52, 0x4f, 0xc2, 0x4a, 0x40, 0x3b, 0x3c, 0xd4,
28447 ++ 0xc9, 0x2a, 0xdb, 0x4a, 0x53, 0xc4, 0xbe, 0x80,
28448 ++ 0xe9, 0x51, 0x7f, 0x8f, 0xc7, 0xa2, 0xce, 0x82,
28449 ++ 0x5c, 0x91, 0x1e, 0x74, 0xd9, 0xd0, 0xbd, 0xd5,
28450 ++ 0xf3, 0xfd, 0xda, 0x4d, 0x25, 0xb4, 0xbb, 0x2d,
28451 ++ 0xac, 0x2f, 0x3d, 0x71, 0x85, 0x7b, 0xcf, 0x3c,
28452 ++ 0x7b, 0x3e, 0x0e, 0x22, 0x78, 0x0c, 0x29, 0xbf,
28453 ++ 0xe4, 0xf4, 0x57, 0xb3, 0xcb, 0x49, 0xa0, 0xfc,
28454 ++ 0x1e, 0x05, 0x4e, 0x16, 0xbc, 0xd5, 0xa8, 0xa3,
28455 ++ 0xee, 0x05, 0x35, 0xc6, 0x7c, 0xab, 0x60, 0x14,
28456 ++ 0x55, 0x1a, 0x8e, 0xc5, 0x88, 0x5d, 0xd5, 0x81,
28457 ++ 0xc2, 0x81, 0xa5, 0xc4, 0x60, 0xdb, 0xaf, 0x77,
28458 ++ 0x91, 0xe1, 0xce, 0xa2, 0x7e, 0x7f, 0x42, 0xe3,
28459 ++ 0xb0, 0x13, 0x1c, 0x1f, 0x25, 0x60, 0x21, 0xe2,
28460 ++ 0x40, 0x5f, 0x99, 0xb7, 0x73, 0xec, 0x9b, 0x2b,
28461 ++ 0xf0, 0x65, 0x11, 0xc8, 0xd0, 0x0a, 0x9f, 0xd3
28462 ++};
28463 ++static const u8 dec_assoc007[] __initconst = { };
28464 ++static const u8 dec_nonce007[] __initconst = {
28465 ++ 0xde, 0x7b, 0xef, 0xc3, 0x65, 0x1b, 0x68, 0xb0
28466 ++};
28467 ++static const u8 dec_key007[] __initconst = {
28468 ++ 0x8d, 0xb8, 0x91, 0x48, 0xf0, 0xe7, 0x0a, 0xbd,
28469 ++ 0xf9, 0x3f, 0xcd, 0xd9, 0xa0, 0x1e, 0x42, 0x4c,
28470 ++ 0xe7, 0xde, 0x25, 0x3d, 0xa3, 0xd7, 0x05, 0x80,
28471 ++ 0x8d, 0xf2, 0x82, 0xac, 0x44, 0x16, 0x51, 0x01
28472 ++};
28473 ++
28474 ++static const u8 dec_input008[] __initconst = {
28475 ++ 0x14, 0xf6, 0x41, 0x37, 0xa6, 0xd4, 0x27, 0xcd,
28476 ++ 0xdb, 0x06, 0x3e, 0x9a, 0x4e, 0xab, 0xd5, 0xb1,
28477 ++ 0x1e, 0x6b, 0xd2, 0xbc, 0x11, 0xf4, 0x28, 0x93,
28478 ++ 0x63, 0x54, 0xef, 0xbb, 0x5e, 0x1d, 0x3a, 0x1d,
28479 ++ 0x37, 0x3c, 0x0a, 0x6c, 0x1e, 0xc2, 0xd1, 0x2c,
28480 ++ 0xb5, 0xa3, 0xb5, 0x7b, 0xb8, 0x8f, 0x25, 0xa6,
28481 ++ 0x1b, 0x61, 0x1c, 0xec, 0x28, 0x58, 0x26, 0xa4,
28482 ++ 0xa8, 0x33, 0x28, 0x25, 0x5c, 0x45, 0x05, 0xe5,
28483 ++ 0x6c, 0x99, 0xe5, 0x45, 0xc4, 0xa2, 0x03, 0x84,
28484 ++ 0x03, 0x73, 0x1e, 0x8c, 0x49, 0xac, 0x20, 0xdd,
28485 ++ 0x8d, 0xb3, 0xc4, 0xf5, 0xe7, 0x4f, 0xf1, 0xed,
28486 ++ 0xa1, 0x98, 0xde, 0xa4, 0x96, 0xdd, 0x2f, 0xab,
28487 ++ 0xab, 0x97, 0xcf, 0x3e, 0xd2, 0x9e, 0xb8, 0x13,
28488 ++ 0x07, 0x28, 0x29, 0x19, 0xaf, 0xfd, 0xf2, 0x49,
28489 ++ 0x43, 0xea, 0x49, 0x26, 0x91, 0xc1, 0x07, 0xd6,
28490 ++ 0xbb, 0x81, 0x75, 0x35, 0x0d, 0x24, 0x7f, 0xc8,
28491 ++ 0xda, 0xd4, 0xb7, 0xeb, 0xe8, 0x5c, 0x09, 0xa2,
28492 ++ 0x2f, 0xdc, 0x28, 0x7d, 0x3a, 0x03, 0xfa, 0x94,
28493 ++ 0xb5, 0x1d, 0x17, 0x99, 0x36, 0xc3, 0x1c, 0x18,
28494 ++ 0x34, 0xe3, 0x9f, 0xf5, 0x55, 0x7c, 0xb0, 0x60,
28495 ++ 0x9d, 0xff, 0xac, 0xd4, 0x61, 0xf2, 0xad, 0xf8,
28496 ++ 0xce, 0xc7, 0xbe, 0x5c, 0xd2, 0x95, 0xa8, 0x4b,
28497 ++ 0x77, 0x13, 0x19, 0x59, 0x26, 0xc9, 0xb7, 0x8f,
28498 ++ 0x6a, 0xcb, 0x2d, 0x37, 0x91, 0xea, 0x92, 0x9c,
28499 ++ 0x94, 0x5b, 0xda, 0x0b, 0xce, 0xfe, 0x30, 0x20,
28500 ++ 0xf8, 0x51, 0xad, 0xf2, 0xbe, 0xe7, 0xc7, 0xff,
28501 ++ 0xb3, 0x33, 0x91, 0x6a, 0xc9, 0x1a, 0x41, 0xc9,
28502 ++ 0x0f, 0xf3, 0x10, 0x0e, 0xfd, 0x53, 0xff, 0x6c,
28503 ++ 0x16, 0x52, 0xd9, 0xf3, 0xf7, 0x98, 0x2e, 0xc9,
28504 ++ 0x07, 0x31, 0x2c, 0x0c, 0x72, 0xd7, 0xc5, 0xc6,
28505 ++ 0x08, 0x2a, 0x7b, 0xda, 0xbd, 0x7e, 0x02, 0xea,
28506 ++ 0x1a, 0xbb, 0xf2, 0x04, 0x27, 0x61, 0x28, 0x8e,
28507 ++ 0xf5, 0x04, 0x03, 0x1f, 0x4c, 0x07, 0x55, 0x82,
28508 ++ 0xec, 0x1e, 0xd7, 0x8b, 0x2f, 0x65, 0x56, 0xd1,
28509 ++ 0xd9, 0x1e, 0x3c, 0xe9, 0x1f, 0x5e, 0x98, 0x70,
28510 ++ 0x38, 0x4a, 0x8c, 0x49, 0xc5, 0x43, 0xa0, 0xa1,
28511 ++ 0x8b, 0x74, 0x9d, 0x4c, 0x62, 0x0d, 0x10, 0x0c,
28512 ++ 0xf4, 0x6c, 0x8f, 0xe0, 0xaa, 0x9a, 0x8d, 0xb7,
28513 ++ 0xe0, 0xbe, 0x4c, 0x87, 0xf1, 0x98, 0x2f, 0xcc,
28514 ++ 0xed, 0xc0, 0x52, 0x29, 0xdc, 0x83, 0xf8, 0xfc,
28515 ++ 0x2c, 0x0e, 0xa8, 0x51, 0x4d, 0x80, 0x0d, 0xa3,
28516 ++ 0xfe, 0xd8, 0x37, 0xe7, 0x41, 0x24, 0xfc, 0xfb,
28517 ++ 0x75, 0xe3, 0x71, 0x7b, 0x57, 0x45, 0xf5, 0x97,
28518 ++ 0x73, 0x65, 0x63, 0x14, 0x74, 0xb8, 0x82, 0x9f,
28519 ++ 0xf8, 0x60, 0x2f, 0x8a, 0xf2, 0x4e, 0xf1, 0x39,
28520 ++ 0xda, 0x33, 0x91, 0xf8, 0x36, 0xe0, 0x8d, 0x3f,
28521 ++ 0x1f, 0x3b, 0x56, 0xdc, 0xa0, 0x8f, 0x3c, 0x9d,
28522 ++ 0x71, 0x52, 0xa7, 0xb8, 0xc0, 0xa5, 0xc6, 0xa2,
28523 ++ 0x73, 0xda, 0xf4, 0x4b, 0x74, 0x5b, 0x00, 0x3d,
28524 ++ 0x99, 0xd7, 0x96, 0xba, 0xe6, 0xe1, 0xa6, 0x96,
28525 ++ 0x38, 0xad, 0xb3, 0xc0, 0xd2, 0xba, 0x91, 0x6b,
28526 ++ 0xf9, 0x19, 0xdd, 0x3b, 0xbe, 0xbe, 0x9c, 0x20,
28527 ++ 0x50, 0xba, 0xa1, 0xd0, 0xce, 0x11, 0xbd, 0x95,
28528 ++ 0xd8, 0xd1, 0xdd, 0x33, 0x85, 0x74, 0xdc, 0xdb,
28529 ++ 0x66, 0x76, 0x44, 0xdc, 0x03, 0x74, 0x48, 0x35,
28530 ++ 0x98, 0xb1, 0x18, 0x47, 0x94, 0x7d, 0xff, 0x62,
28531 ++ 0xe4, 0x58, 0x78, 0xab, 0xed, 0x95, 0x36, 0xd9,
28532 ++ 0x84, 0x91, 0x82, 0x64, 0x41, 0xbb, 0x58, 0xe6,
28533 ++ 0x1c, 0x20, 0x6d, 0x15, 0x6b, 0x13, 0x96, 0xe8,
28534 ++ 0x35, 0x7f, 0xdc, 0x40, 0x2c, 0xe9, 0xbc, 0x8a,
28535 ++ 0x4f, 0x92, 0xec, 0x06, 0x2d, 0x50, 0xdf, 0x93,
28536 ++ 0x5d, 0x65, 0x5a, 0xa8, 0xfc, 0x20, 0x50, 0x14,
28537 ++ 0xa9, 0x8a, 0x7e, 0x1d, 0x08, 0x1f, 0xe2, 0x99,
28538 ++ 0xd0, 0xbe, 0xfb, 0x3a, 0x21, 0x9d, 0xad, 0x86,
28539 ++ 0x54, 0xfd, 0x0d, 0x98, 0x1c, 0x5a, 0x6f, 0x1f,
28540 ++ 0x9a, 0x40, 0xcd, 0xa2, 0xff, 0x6a, 0xf1, 0x54
28541 ++};
28542 ++static const u8 dec_output008[] __initconst = {
28543 ++ 0xc3, 0x09, 0x94, 0x62, 0xe6, 0x46, 0x2e, 0x10,
28544 ++ 0xbe, 0x00, 0xe4, 0xfc, 0xf3, 0x40, 0xa3, 0xe2,
28545 ++ 0x0f, 0xc2, 0x8b, 0x28, 0xdc, 0xba, 0xb4, 0x3c,
28546 ++ 0xe4, 0x21, 0x58, 0x61, 0xcd, 0x8b, 0xcd, 0xfb,
28547 ++ 0xac, 0x94, 0xa1, 0x45, 0xf5, 0x1c, 0xe1, 0x12,
28548 ++ 0xe0, 0x3b, 0x67, 0x21, 0x54, 0x5e, 0x8c, 0xaa,
28549 ++ 0xcf, 0xdb, 0xb4, 0x51, 0xd4, 0x13, 0xda, 0xe6,
28550 ++ 0x83, 0x89, 0xb6, 0x92, 0xe9, 0x21, 0x76, 0xa4,
28551 ++ 0x93, 0x7d, 0x0e, 0xfd, 0x96, 0x36, 0x03, 0x91,
28552 ++ 0x43, 0x5c, 0x92, 0x49, 0x62, 0x61, 0x7b, 0xeb,
28553 ++ 0x43, 0x89, 0xb8, 0x12, 0x20, 0x43, 0xd4, 0x47,
28554 ++ 0x06, 0x84, 0xee, 0x47, 0xe9, 0x8a, 0x73, 0x15,
28555 ++ 0x0f, 0x72, 0xcf, 0xed, 0xce, 0x96, 0xb2, 0x7f,
28556 ++ 0x21, 0x45, 0x76, 0xeb, 0x26, 0x28, 0x83, 0x6a,
28557 ++ 0xad, 0xaa, 0xa6, 0x81, 0xd8, 0x55, 0xb1, 0xa3,
28558 ++ 0x85, 0xb3, 0x0c, 0xdf, 0xf1, 0x69, 0x2d, 0x97,
28559 ++ 0x05, 0x2a, 0xbc, 0x7c, 0x7b, 0x25, 0xf8, 0x80,
28560 ++ 0x9d, 0x39, 0x25, 0xf3, 0x62, 0xf0, 0x66, 0x5e,
28561 ++ 0xf4, 0xa0, 0xcf, 0xd8, 0xfd, 0x4f, 0xb1, 0x1f,
28562 ++ 0x60, 0x3a, 0x08, 0x47, 0xaf, 0xe1, 0xf6, 0x10,
28563 ++ 0x77, 0x09, 0xa7, 0x27, 0x8f, 0x9a, 0x97, 0x5a,
28564 ++ 0x26, 0xfa, 0xfe, 0x41, 0x32, 0x83, 0x10, 0xe0,
28565 ++ 0x1d, 0xbf, 0x64, 0x0d, 0xf4, 0x1c, 0x32, 0x35,
28566 ++ 0xe5, 0x1b, 0x36, 0xef, 0xd4, 0x4a, 0x93, 0x4d,
28567 ++ 0x00, 0x7c, 0xec, 0x02, 0x07, 0x8b, 0x5d, 0x7d,
28568 ++ 0x1b, 0x0e, 0xd1, 0xa6, 0xa5, 0x5d, 0x7d, 0x57,
28569 ++ 0x88, 0xa8, 0xcc, 0x81, 0xb4, 0x86, 0x4e, 0xb4,
28570 ++ 0x40, 0xe9, 0x1d, 0xc3, 0xb1, 0x24, 0x3e, 0x7f,
28571 ++ 0xcc, 0x8a, 0x24, 0x9b, 0xdf, 0x6d, 0xf0, 0x39,
28572 ++ 0x69, 0x3e, 0x4c, 0xc0, 0x96, 0xe4, 0x13, 0xda,
28573 ++ 0x90, 0xda, 0xf4, 0x95, 0x66, 0x8b, 0x17, 0x17,
28574 ++ 0xfe, 0x39, 0x43, 0x25, 0xaa, 0xda, 0xa0, 0x43,
28575 ++ 0x3c, 0xb1, 0x41, 0x02, 0xa3, 0xf0, 0xa7, 0x19,
28576 ++ 0x59, 0xbc, 0x1d, 0x7d, 0x6c, 0x6d, 0x91, 0x09,
28577 ++ 0x5c, 0xb7, 0x5b, 0x01, 0xd1, 0x6f, 0x17, 0x21,
28578 ++ 0x97, 0xbf, 0x89, 0x71, 0xa5, 0xb0, 0x6e, 0x07,
28579 ++ 0x45, 0xfd, 0x9d, 0xea, 0x07, 0xf6, 0x7a, 0x9f,
28580 ++ 0x10, 0x18, 0x22, 0x30, 0x73, 0xac, 0xd4, 0x6b,
28581 ++ 0x72, 0x44, 0xed, 0xd9, 0x19, 0x9b, 0x2d, 0x4a,
28582 ++ 0x41, 0xdd, 0xd1, 0x85, 0x5e, 0x37, 0x19, 0xed,
28583 ++ 0xd2, 0x15, 0x8f, 0x5e, 0x91, 0xdb, 0x33, 0xf2,
28584 ++ 0xe4, 0xdb, 0xff, 0x98, 0xfb, 0xa3, 0xb5, 0xca,
28585 ++ 0x21, 0x69, 0x08, 0xe7, 0x8a, 0xdf, 0x90, 0xff,
28586 ++ 0x3e, 0xe9, 0x20, 0x86, 0x3c, 0xe9, 0xfc, 0x0b,
28587 ++ 0xfe, 0x5c, 0x61, 0xaa, 0x13, 0x92, 0x7f, 0x7b,
28588 ++ 0xec, 0xe0, 0x6d, 0xa8, 0x23, 0x22, 0xf6, 0x6b,
28589 ++ 0x77, 0xc4, 0xfe, 0x40, 0x07, 0x3b, 0xb6, 0xf6,
28590 ++ 0x8e, 0x5f, 0xd4, 0xb9, 0xb7, 0x0f, 0x21, 0x04,
28591 ++ 0xef, 0x83, 0x63, 0x91, 0x69, 0x40, 0xa3, 0x48,
28592 ++ 0x5c, 0xd2, 0x60, 0xf9, 0x4f, 0x6c, 0x47, 0x8b,
28593 ++ 0x3b, 0xb1, 0x9f, 0x8e, 0xee, 0x16, 0x8a, 0x13,
28594 ++ 0xfc, 0x46, 0x17, 0xc3, 0xc3, 0x32, 0x56, 0xf8,
28595 ++ 0x3c, 0x85, 0x3a, 0xb6, 0x3e, 0xaa, 0x89, 0x4f,
28596 ++ 0xb3, 0xdf, 0x38, 0xfd, 0xf1, 0xe4, 0x3a, 0xc0,
28597 ++ 0xe6, 0x58, 0xb5, 0x8f, 0xc5, 0x29, 0xa2, 0x92,
28598 ++ 0x4a, 0xb6, 0xa0, 0x34, 0x7f, 0xab, 0xb5, 0x8a,
28599 ++ 0x90, 0xa1, 0xdb, 0x4d, 0xca, 0xb6, 0x2c, 0x41,
28600 ++ 0x3c, 0xf7, 0x2b, 0x21, 0xc3, 0xfd, 0xf4, 0x17,
28601 ++ 0x5c, 0xb5, 0x33, 0x17, 0x68, 0x2b, 0x08, 0x30,
28602 ++ 0xf3, 0xf7, 0x30, 0x3c, 0x96, 0xe6, 0x6a, 0x20,
28603 ++ 0x97, 0xe7, 0x4d, 0x10, 0x5f, 0x47, 0x5f, 0x49,
28604 ++ 0x96, 0x09, 0xf0, 0x27, 0x91, 0xc8, 0xf8, 0x5a,
28605 ++ 0x2e, 0x79, 0xb5, 0xe2, 0xb8, 0xe8, 0xb9, 0x7b,
28606 ++ 0xd5, 0x10, 0xcb, 0xff, 0x5d, 0x14, 0x73, 0xf3
28607 ++};
28608 ++static const u8 dec_assoc008[] __initconst = { };
28609 ++static const u8 dec_nonce008[] __initconst = {
28610 ++ 0x0e, 0x0d, 0x57, 0xbb, 0x7b, 0x40, 0x54, 0x02
28611 ++};
28612 ++static const u8 dec_key008[] __initconst = {
28613 ++ 0xf2, 0xaa, 0x4f, 0x99, 0xfd, 0x3e, 0xa8, 0x53,
28614 ++ 0xc1, 0x44, 0xe9, 0x81, 0x18, 0xdc, 0xf5, 0xf0,
28615 ++ 0x3e, 0x44, 0x15, 0x59, 0xe0, 0xc5, 0x44, 0x86,
28616 ++ 0xc3, 0x91, 0xa8, 0x75, 0xc0, 0x12, 0x46, 0xba
28617 ++};
28618 ++
28619 ++static const u8 dec_input009[] __initconst = {
28620 ++ 0xfd, 0x81, 0x8d, 0xd0, 0x3d, 0xb4, 0xd5, 0xdf,
28621 ++ 0xd3, 0x42, 0x47, 0x5a, 0x6d, 0x19, 0x27, 0x66,
28622 ++ 0x4b, 0x2e, 0x0c, 0x27, 0x9c, 0x96, 0x4c, 0x72,
28623 ++ 0x02, 0xa3, 0x65, 0xc3, 0xb3, 0x6f, 0x2e, 0xbd,
28624 ++ 0x63, 0x8a, 0x4a, 0x5d, 0x29, 0xa2, 0xd0, 0x28,
28625 ++ 0x48, 0xc5, 0x3d, 0x98, 0xa3, 0xbc, 0xe0, 0xbe,
28626 ++ 0x3b, 0x3f, 0xe6, 0x8a, 0xa4, 0x7f, 0x53, 0x06,
28627 ++ 0xfa, 0x7f, 0x27, 0x76, 0x72, 0x31, 0xa1, 0xf5,
28628 ++ 0xd6, 0x0c, 0x52, 0x47, 0xba, 0xcd, 0x4f, 0xd7,
28629 ++ 0xeb, 0x05, 0x48, 0x0d, 0x7c, 0x35, 0x4a, 0x09,
28630 ++ 0xc9, 0x76, 0x71, 0x02, 0xa3, 0xfb, 0xb7, 0x1a,
28631 ++ 0x65, 0xb7, 0xed, 0x98, 0xc6, 0x30, 0x8a, 0x00,
28632 ++ 0xae, 0xa1, 0x31, 0xe5, 0xb5, 0x9e, 0x6d, 0x62,
28633 ++ 0xda, 0xda, 0x07, 0x0f, 0x38, 0x38, 0xd3, 0xcb,
28634 ++ 0xc1, 0xb0, 0xad, 0xec, 0x72, 0xec, 0xb1, 0xa2,
28635 ++ 0x7b, 0x59, 0xf3, 0x3d, 0x2b, 0xef, 0xcd, 0x28,
28636 ++ 0x5b, 0x83, 0xcc, 0x18, 0x91, 0x88, 0xb0, 0x2e,
28637 ++ 0xf9, 0x29, 0x31, 0x18, 0xf9, 0x4e, 0xe9, 0x0a,
28638 ++ 0x91, 0x92, 0x9f, 0xae, 0x2d, 0xad, 0xf4, 0xe6,
28639 ++ 0x1a, 0xe2, 0xa4, 0xee, 0x47, 0x15, 0xbf, 0x83,
28640 ++ 0x6e, 0xd7, 0x72, 0x12, 0x3b, 0x2d, 0x24, 0xe9,
28641 ++ 0xb2, 0x55, 0xcb, 0x3c, 0x10, 0xf0, 0x24, 0x8a,
28642 ++ 0x4a, 0x02, 0xea, 0x90, 0x25, 0xf0, 0xb4, 0x79,
28643 ++ 0x3a, 0xef, 0x6e, 0xf5, 0x52, 0xdf, 0xb0, 0x0a,
28644 ++ 0xcd, 0x24, 0x1c, 0xd3, 0x2e, 0x22, 0x74, 0xea,
28645 ++ 0x21, 0x6f, 0xe9, 0xbd, 0xc8, 0x3e, 0x36, 0x5b,
28646 ++ 0x19, 0xf1, 0xca, 0x99, 0x0a, 0xb4, 0xa7, 0x52,
28647 ++ 0x1a, 0x4e, 0xf2, 0xad, 0x8d, 0x56, 0x85, 0xbb,
28648 ++ 0x64, 0x89, 0xba, 0x26, 0xf9, 0xc7, 0xe1, 0x89,
28649 ++ 0x19, 0x22, 0x77, 0xc3, 0xa8, 0xfc, 0xff, 0xad,
28650 ++ 0xfe, 0xb9, 0x48, 0xae, 0x12, 0x30, 0x9f, 0x19,
28651 ++ 0xfb, 0x1b, 0xef, 0x14, 0x87, 0x8a, 0x78, 0x71,
28652 ++ 0xf3, 0xf4, 0xb7, 0x00, 0x9c, 0x1d, 0xb5, 0x3d,
28653 ++ 0x49, 0x00, 0x0c, 0x06, 0xd4, 0x50, 0xf9, 0x54,
28654 ++ 0x45, 0xb2, 0x5b, 0x43, 0xdb, 0x6d, 0xcf, 0x1a,
28655 ++ 0xe9, 0x7a, 0x7a, 0xcf, 0xfc, 0x8a, 0x4e, 0x4d,
28656 ++ 0x0b, 0x07, 0x63, 0x28, 0xd8, 0xe7, 0x08, 0x95,
28657 ++ 0xdf, 0xa6, 0x72, 0x93, 0x2e, 0xbb, 0xa0, 0x42,
28658 ++ 0x89, 0x16, 0xf1, 0xd9, 0x0c, 0xf9, 0xa1, 0x16,
28659 ++ 0xfd, 0xd9, 0x03, 0xb4, 0x3b, 0x8a, 0xf5, 0xf6,
28660 ++ 0xe7, 0x6b, 0x2e, 0x8e, 0x4c, 0x3d, 0xe2, 0xaf,
28661 ++ 0x08, 0x45, 0x03, 0xff, 0x09, 0xb6, 0xeb, 0x2d,
28662 ++ 0xc6, 0x1b, 0x88, 0x94, 0xac, 0x3e, 0xf1, 0x9f,
28663 ++ 0x0e, 0x0e, 0x2b, 0xd5, 0x00, 0x4d, 0x3f, 0x3b,
28664 ++ 0x53, 0xae, 0xaf, 0x1c, 0x33, 0x5f, 0x55, 0x6e,
28665 ++ 0x8d, 0xaf, 0x05, 0x7a, 0x10, 0x34, 0xc9, 0xf4,
28666 ++ 0x66, 0xcb, 0x62, 0x12, 0xa6, 0xee, 0xe8, 0x1c,
28667 ++ 0x5d, 0x12, 0x86, 0xdb, 0x6f, 0x1c, 0x33, 0xc4,
28668 ++ 0x1c, 0xda, 0x82, 0x2d, 0x3b, 0x59, 0xfe, 0xb1,
28669 ++ 0xa4, 0x59, 0x41, 0x86, 0xd0, 0xef, 0xae, 0xfb,
28670 ++ 0xda, 0x6d, 0x11, 0xb8, 0xca, 0xe9, 0x6e, 0xff,
28671 ++ 0xf7, 0xa9, 0xd9, 0x70, 0x30, 0xfc, 0x53, 0xe2,
28672 ++ 0xd7, 0xa2, 0x4e, 0xc7, 0x91, 0xd9, 0x07, 0x06,
28673 ++ 0xaa, 0xdd, 0xb0, 0x59, 0x28, 0x1d, 0x00, 0x66,
28674 ++ 0xc5, 0x54, 0xc2, 0xfc, 0x06, 0xda, 0x05, 0x90,
28675 ++ 0x52, 0x1d, 0x37, 0x66, 0xee, 0xf0, 0xb2, 0x55,
28676 ++ 0x8a, 0x5d, 0xd2, 0x38, 0x86, 0x94, 0x9b, 0xfc,
28677 ++ 0x10, 0x4c, 0xa1, 0xb9, 0x64, 0x3e, 0x44, 0xb8,
28678 ++ 0x5f, 0xb0, 0x0c, 0xec, 0xe0, 0xc9, 0xe5, 0x62,
28679 ++ 0x75, 0x3f, 0x09, 0xd5, 0xf5, 0xd9, 0x26, 0xba,
28680 ++ 0x9e, 0xd2, 0xf4, 0xb9, 0x48, 0x0a, 0xbc, 0xa2,
28681 ++ 0xd6, 0x7c, 0x36, 0x11, 0x7d, 0x26, 0x81, 0x89,
28682 ++ 0xcf, 0xa4, 0xad, 0x73, 0x0e, 0xee, 0xcc, 0x06,
28683 ++ 0xa9, 0xdb, 0xb1, 0xfd, 0xfb, 0x09, 0x7f, 0x90,
28684 ++ 0x42, 0x37, 0x2f, 0xe1, 0x9c, 0x0f, 0x6f, 0xcf,
28685 ++ 0x43, 0xb5, 0xd9, 0x90, 0xe1, 0x85, 0xf5, 0xa8,
28686 ++ 0xae
28687 ++};
28688 ++static const u8 dec_output009[] __initconst = {
28689 ++ 0xe6, 0xc3, 0xdb, 0x63, 0x55, 0x15, 0xe3, 0x5b,
28690 ++ 0xb7, 0x4b, 0x27, 0x8b, 0x5a, 0xdd, 0xc2, 0xe8,
28691 ++ 0x3a, 0x6b, 0xd7, 0x81, 0x96, 0x35, 0x97, 0xca,
28692 ++ 0xd7, 0x68, 0xe8, 0xef, 0xce, 0xab, 0xda, 0x09,
28693 ++ 0x6e, 0xd6, 0x8e, 0xcb, 0x55, 0xb5, 0xe1, 0xe5,
28694 ++ 0x57, 0xfd, 0xc4, 0xe3, 0xe0, 0x18, 0x4f, 0x85,
28695 ++ 0xf5, 0x3f, 0x7e, 0x4b, 0x88, 0xc9, 0x52, 0x44,
28696 ++ 0x0f, 0xea, 0xaf, 0x1f, 0x71, 0x48, 0x9f, 0x97,
28697 ++ 0x6d, 0xb9, 0x6f, 0x00, 0xa6, 0xde, 0x2b, 0x77,
28698 ++ 0x8b, 0x15, 0xad, 0x10, 0xa0, 0x2b, 0x7b, 0x41,
28699 ++ 0x90, 0x03, 0x2d, 0x69, 0xae, 0xcc, 0x77, 0x7c,
28700 ++ 0xa5, 0x9d, 0x29, 0x22, 0xc2, 0xea, 0xb4, 0x00,
28701 ++ 0x1a, 0xd2, 0x7a, 0x98, 0x8a, 0xf9, 0xf7, 0x82,
28702 ++ 0xb0, 0xab, 0xd8, 0xa6, 0x94, 0x8d, 0x58, 0x2f,
28703 ++ 0x01, 0x9e, 0x00, 0x20, 0xfc, 0x49, 0xdc, 0x0e,
28704 ++ 0x03, 0xe8, 0x45, 0x10, 0xd6, 0xa8, 0xda, 0x55,
28705 ++ 0x10, 0x9a, 0xdf, 0x67, 0x22, 0x8b, 0x43, 0xab,
28706 ++ 0x00, 0xbb, 0x02, 0xc8, 0xdd, 0x7b, 0x97, 0x17,
28707 ++ 0xd7, 0x1d, 0x9e, 0x02, 0x5e, 0x48, 0xde, 0x8e,
28708 ++ 0xcf, 0x99, 0x07, 0x95, 0x92, 0x3c, 0x5f, 0x9f,
28709 ++ 0xc5, 0x8a, 0xc0, 0x23, 0xaa, 0xd5, 0x8c, 0x82,
28710 ++ 0x6e, 0x16, 0x92, 0xb1, 0x12, 0x17, 0x07, 0xc3,
28711 ++ 0xfb, 0x36, 0xf5, 0x6c, 0x35, 0xd6, 0x06, 0x1f,
28712 ++ 0x9f, 0xa7, 0x94, 0xa2, 0x38, 0x63, 0x9c, 0xb0,
28713 ++ 0x71, 0xb3, 0xa5, 0xd2, 0xd8, 0xba, 0x9f, 0x08,
28714 ++ 0x01, 0xb3, 0xff, 0x04, 0x97, 0x73, 0x45, 0x1b,
28715 ++ 0xd5, 0xa9, 0x9c, 0x80, 0xaf, 0x04, 0x9a, 0x85,
28716 ++ 0xdb, 0x32, 0x5b, 0x5d, 0x1a, 0xc1, 0x36, 0x28,
28717 ++ 0x10, 0x79, 0xf1, 0x3c, 0xbf, 0x1a, 0x41, 0x5c,
28718 ++ 0x4e, 0xdf, 0xb2, 0x7c, 0x79, 0x3b, 0x7a, 0x62,
28719 ++ 0x3d, 0x4b, 0xc9, 0x9b, 0x2a, 0x2e, 0x7c, 0xa2,
28720 ++ 0xb1, 0x11, 0x98, 0xa7, 0x34, 0x1a, 0x00, 0xf3,
28721 ++ 0xd1, 0xbc, 0x18, 0x22, 0xba, 0x02, 0x56, 0x62,
28722 ++ 0x31, 0x10, 0x11, 0x6d, 0xe0, 0x54, 0x9d, 0x40,
28723 ++ 0x1f, 0x26, 0x80, 0x41, 0xca, 0x3f, 0x68, 0x0f,
28724 ++ 0x32, 0x1d, 0x0a, 0x8e, 0x79, 0xd8, 0xa4, 0x1b,
28725 ++ 0x29, 0x1c, 0x90, 0x8e, 0xc5, 0xe3, 0xb4, 0x91,
28726 ++ 0x37, 0x9a, 0x97, 0x86, 0x99, 0xd5, 0x09, 0xc5,
28727 ++ 0xbb, 0xa3, 0x3f, 0x21, 0x29, 0x82, 0x14, 0x5c,
28728 ++ 0xab, 0x25, 0xfb, 0xf2, 0x4f, 0x58, 0x26, 0xd4,
28729 ++ 0x83, 0xaa, 0x66, 0x89, 0x67, 0x7e, 0xc0, 0x49,
28730 ++ 0xe1, 0x11, 0x10, 0x7f, 0x7a, 0xda, 0x29, 0x04,
28731 ++ 0xff, 0xf0, 0xcb, 0x09, 0x7c, 0x9d, 0xfa, 0x03,
28732 ++ 0x6f, 0x81, 0x09, 0x31, 0x60, 0xfb, 0x08, 0xfa,
28733 ++ 0x74, 0xd3, 0x64, 0x44, 0x7c, 0x55, 0x85, 0xec,
28734 ++ 0x9c, 0x6e, 0x25, 0xb7, 0x6c, 0xc5, 0x37, 0xb6,
28735 ++ 0x83, 0x87, 0x72, 0x95, 0x8b, 0x9d, 0xe1, 0x69,
28736 ++ 0x5c, 0x31, 0x95, 0x42, 0xa6, 0x2c, 0xd1, 0x36,
28737 ++ 0x47, 0x1f, 0xec, 0x54, 0xab, 0xa2, 0x1c, 0xd8,
28738 ++ 0x00, 0xcc, 0xbc, 0x0d, 0x65, 0xe2, 0x67, 0xbf,
28739 ++ 0xbc, 0xea, 0xee, 0x9e, 0xe4, 0x36, 0x95, 0xbe,
28740 ++ 0x73, 0xd9, 0xa6, 0xd9, 0x0f, 0xa0, 0xcc, 0x82,
28741 ++ 0x76, 0x26, 0xad, 0x5b, 0x58, 0x6c, 0x4e, 0xab,
28742 ++ 0x29, 0x64, 0xd3, 0xd9, 0xa9, 0x08, 0x8c, 0x1d,
28743 ++ 0xa1, 0x4f, 0x80, 0xd8, 0x3f, 0x94, 0xfb, 0xd3,
28744 ++ 0x7b, 0xfc, 0xd1, 0x2b, 0xc3, 0x21, 0xeb, 0xe5,
28745 ++ 0x1c, 0x84, 0x23, 0x7f, 0x4b, 0xfa, 0xdb, 0x34,
28746 ++ 0x18, 0xa2, 0xc2, 0xe5, 0x13, 0xfe, 0x6c, 0x49,
28747 ++ 0x81, 0xd2, 0x73, 0xe7, 0xe2, 0xd7, 0xe4, 0x4f,
28748 ++ 0x4b, 0x08, 0x6e, 0xb1, 0x12, 0x22, 0x10, 0x9d,
28749 ++ 0xac, 0x51, 0x1e, 0x17, 0xd9, 0x8a, 0x0b, 0x42,
28750 ++ 0x88, 0x16, 0x81, 0x37, 0x7c, 0x6a, 0xf7, 0xef,
28751 ++ 0x2d, 0xe3, 0xd9, 0xf8, 0x5f, 0xe0, 0x53, 0x27,
28752 ++ 0x74, 0xb9, 0xe2, 0xd6, 0x1c, 0x80, 0x2c, 0x52,
28753 ++ 0x65
28754 ++};
28755 ++static const u8 dec_assoc009[] __initconst = {
28756 ++ 0x5a, 0x27, 0xff, 0xeb, 0xdf, 0x84, 0xb2, 0x9e,
28757 ++ 0xef
28758 ++};
28759 ++static const u8 dec_nonce009[] __initconst = {
28760 ++ 0xef, 0x2d, 0x63, 0xee, 0x6b, 0x80, 0x8b, 0x78
28761 ++};
28762 ++static const u8 dec_key009[] __initconst = {
28763 ++ 0xea, 0xbc, 0x56, 0x99, 0xe3, 0x50, 0xff, 0xc5,
28764 ++ 0xcc, 0x1a, 0xd7, 0xc1, 0x57, 0x72, 0xea, 0x86,
28765 ++ 0x5b, 0x89, 0x88, 0x61, 0x3d, 0x2f, 0x9b, 0xb2,
28766 ++ 0xe7, 0x9c, 0xec, 0x74, 0x6e, 0x3e, 0xf4, 0x3b
28767 ++};
28768 ++
28769 ++static const u8 dec_input010[] __initconst = {
28770 ++ 0xe5, 0x26, 0xa4, 0x3d, 0xbd, 0x33, 0xd0, 0x4b,
28771 ++ 0x6f, 0x05, 0xa7, 0x6e, 0x12, 0x7a, 0xd2, 0x74,
28772 ++ 0xa6, 0xdd, 0xbd, 0x95, 0xeb, 0xf9, 0xa4, 0xf1,
28773 ++ 0x59, 0x93, 0x91, 0x70, 0xd9, 0xfe, 0x9a, 0xcd,
28774 ++ 0x53, 0x1f, 0x3a, 0xab, 0xa6, 0x7c, 0x9f, 0xa6,
28775 ++ 0x9e, 0xbd, 0x99, 0xd9, 0xb5, 0x97, 0x44, 0xd5,
28776 ++ 0x14, 0x48, 0x4d, 0x9d, 0xc0, 0xd0, 0x05, 0x96,
28777 ++ 0xeb, 0x4c, 0x78, 0x55, 0x09, 0x08, 0x01, 0x02,
28778 ++ 0x30, 0x90, 0x7b, 0x96, 0x7a, 0x7b, 0x5f, 0x30,
28779 ++ 0x41, 0x24, 0xce, 0x68, 0x61, 0x49, 0x86, 0x57,
28780 ++ 0x82, 0xdd, 0x53, 0x1c, 0x51, 0x28, 0x2b, 0x53,
28781 ++ 0x6e, 0x2d, 0xc2, 0x20, 0x4c, 0xdd, 0x8f, 0x65,
28782 ++ 0x10, 0x20, 0x50, 0xdd, 0x9d, 0x50, 0xe5, 0x71,
28783 ++ 0x40, 0x53, 0x69, 0xfc, 0x77, 0x48, 0x11, 0xb9,
28784 ++ 0xde, 0xa4, 0x8d, 0x58, 0xe4, 0xa6, 0x1a, 0x18,
28785 ++ 0x47, 0x81, 0x7e, 0xfc, 0xdd, 0xf6, 0xef, 0xce,
28786 ++ 0x2f, 0x43, 0x68, 0xd6, 0x06, 0xe2, 0x74, 0x6a,
28787 ++ 0xad, 0x90, 0xf5, 0x37, 0xf3, 0x3d, 0x82, 0x69,
28788 ++ 0x40, 0xe9, 0x6b, 0xa7, 0x3d, 0xa8, 0x1e, 0xd2,
28789 ++ 0x02, 0x7c, 0xb7, 0x9b, 0xe4, 0xda, 0x8f, 0x95,
28790 ++ 0x06, 0xc5, 0xdf, 0x73, 0xa3, 0x20, 0x9a, 0x49,
28791 ++ 0xde, 0x9c, 0xbc, 0xee, 0x14, 0x3f, 0x81, 0x5e,
28792 ++ 0xf8, 0x3b, 0x59, 0x3c, 0xe1, 0x68, 0x12, 0x5a,
28793 ++ 0x3a, 0x76, 0x3a, 0x3f, 0xf7, 0x87, 0x33, 0x0a,
28794 ++ 0x01, 0xb8, 0xd4, 0xed, 0xb6, 0xbe, 0x94, 0x5e,
28795 ++ 0x70, 0x40, 0x56, 0x67, 0x1f, 0x50, 0x44, 0x19,
28796 ++ 0xce, 0x82, 0x70, 0x10, 0x87, 0x13, 0x20, 0x0b,
28797 ++ 0x4c, 0x5a, 0xb6, 0xf6, 0xa7, 0xae, 0x81, 0x75,
28798 ++ 0x01, 0x81, 0xe6, 0x4b, 0x57, 0x7c, 0xdd, 0x6d,
28799 ++ 0xf8, 0x1c, 0x29, 0x32, 0xf7, 0xda, 0x3c, 0x2d,
28800 ++ 0xf8, 0x9b, 0x25, 0x6e, 0x00, 0xb4, 0xf7, 0x2f,
28801 ++ 0xf7, 0x04, 0xf7, 0xa1, 0x56, 0xac, 0x4f, 0x1a,
28802 ++ 0x64, 0xb8, 0x47, 0x55, 0x18, 0x7b, 0x07, 0x4d,
28803 ++ 0xbd, 0x47, 0x24, 0x80, 0x5d, 0xa2, 0x70, 0xc5,
28804 ++ 0xdd, 0x8e, 0x82, 0xd4, 0xeb, 0xec, 0xb2, 0x0c,
28805 ++ 0x39, 0xd2, 0x97, 0xc1, 0xcb, 0xeb, 0xf4, 0x77,
28806 ++ 0x59, 0xb4, 0x87, 0xef, 0xcb, 0x43, 0x2d, 0x46,
28807 ++ 0x54, 0xd1, 0xa7, 0xd7, 0x15, 0x99, 0x0a, 0x43,
28808 ++ 0xa1, 0xe0, 0x99, 0x33, 0x71, 0xc1, 0xed, 0xfe,
28809 ++ 0x72, 0x46, 0x33, 0x8e, 0x91, 0x08, 0x9f, 0xc8,
28810 ++ 0x2e, 0xca, 0xfa, 0xdc, 0x59, 0xd5, 0xc3, 0x76,
28811 ++ 0x84, 0x9f, 0xa3, 0x37, 0x68, 0xc3, 0xf0, 0x47,
28812 ++ 0x2c, 0x68, 0xdb, 0x5e, 0xc3, 0x49, 0x4c, 0xe8,
28813 ++ 0x92, 0x85, 0xe2, 0x23, 0xd3, 0x3f, 0xad, 0x32,
28814 ++ 0xe5, 0x2b, 0x82, 0xd7, 0x8f, 0x99, 0x0a, 0x59,
28815 ++ 0x5c, 0x45, 0xd9, 0xb4, 0x51, 0x52, 0xc2, 0xae,
28816 ++ 0xbf, 0x80, 0xcf, 0xc9, 0xc9, 0x51, 0x24, 0x2a,
28817 ++ 0x3b, 0x3a, 0x4d, 0xae, 0xeb, 0xbd, 0x22, 0xc3,
28818 ++ 0x0e, 0x0f, 0x59, 0x25, 0x92, 0x17, 0xe9, 0x74,
28819 ++ 0xc7, 0x8b, 0x70, 0x70, 0x36, 0x55, 0x95, 0x75,
28820 ++ 0x4b, 0xad, 0x61, 0x2b, 0x09, 0xbc, 0x82, 0xf2,
28821 ++ 0x6e, 0x94, 0x43, 0xae, 0xc3, 0xd5, 0xcd, 0x8e,
28822 ++ 0xfe, 0x5b, 0x9a, 0x88, 0x43, 0x01, 0x75, 0xb2,
28823 ++ 0x23, 0x09, 0xf7, 0x89, 0x83, 0xe7, 0xfa, 0xf9,
28824 ++ 0xb4, 0x9b, 0xf8, 0xef, 0xbd, 0x1c, 0x92, 0xc1,
28825 ++ 0xda, 0x7e, 0xfe, 0x05, 0xba, 0x5a, 0xcd, 0x07,
28826 ++ 0x6a, 0x78, 0x9e, 0x5d, 0xfb, 0x11, 0x2f, 0x79,
28827 ++ 0x38, 0xb6, 0xc2, 0x5b, 0x6b, 0x51, 0xb4, 0x71,
28828 ++ 0xdd, 0xf7, 0x2a, 0xe4, 0xf4, 0x72, 0x76, 0xad,
28829 ++ 0xc2, 0xdd, 0x64, 0x5d, 0x79, 0xb6, 0xf5, 0x7a,
28830 ++ 0x77, 0x20, 0x05, 0x3d, 0x30, 0x06, 0xd4, 0x4c,
28831 ++ 0x0a, 0x2c, 0x98, 0x5a, 0xb9, 0xd4, 0x98, 0xa9,
28832 ++ 0x3f, 0xc6, 0x12, 0xea, 0x3b, 0x4b, 0xc5, 0x79,
28833 ++ 0x64, 0x63, 0x6b, 0x09, 0x54, 0x3b, 0x14, 0x27,
28834 ++ 0xba, 0x99, 0x80, 0xc8, 0x72, 0xa8, 0x12, 0x90,
28835 ++ 0x29, 0xba, 0x40, 0x54, 0x97, 0x2b, 0x7b, 0xfe,
28836 ++ 0xeb, 0xcd, 0x01, 0x05, 0x44, 0x72, 0xdb, 0x99,
28837 ++ 0xe4, 0x61, 0xc9, 0x69, 0xd6, 0xb9, 0x28, 0xd1,
28838 ++ 0x05, 0x3e, 0xf9, 0x0b, 0x49, 0x0a, 0x49, 0xe9,
28839 ++ 0x8d, 0x0e, 0xa7, 0x4a, 0x0f, 0xaf, 0x32, 0xd0,
28840 ++ 0xe0, 0xb2, 0x3a, 0x55, 0x58, 0xfe, 0x5c, 0x28,
28841 ++ 0x70, 0x51, 0x23, 0xb0, 0x7b, 0x6a, 0x5f, 0x1e,
28842 ++ 0xb8, 0x17, 0xd7, 0x94, 0x15, 0x8f, 0xee, 0x20,
28843 ++ 0xc7, 0x42, 0x25, 0x3e, 0x9a, 0x14, 0xd7, 0x60,
28844 ++ 0x72, 0x39, 0x47, 0x48, 0xa9, 0xfe, 0xdd, 0x47,
28845 ++ 0x0a, 0xb1, 0xe6, 0x60, 0x28, 0x8c, 0x11, 0x68,
28846 ++ 0xe1, 0xff, 0xd7, 0xce, 0xc8, 0xbe, 0xb3, 0xfe,
28847 ++ 0x27, 0x30, 0x09, 0x70, 0xd7, 0xfa, 0x02, 0x33,
28848 ++ 0x3a, 0x61, 0x2e, 0xc7, 0xff, 0xa4, 0x2a, 0xa8,
28849 ++ 0x6e, 0xb4, 0x79, 0x35, 0x6d, 0x4c, 0x1e, 0x38,
28850 ++ 0xf8, 0xee, 0xd4, 0x84, 0x4e, 0x6e, 0x28, 0xa7,
28851 ++ 0xce, 0xc8, 0xc1, 0xcf, 0x80, 0x05, 0xf3, 0x04,
28852 ++ 0xef, 0xc8, 0x18, 0x28, 0x2e, 0x8d, 0x5e, 0x0c,
28853 ++ 0xdf, 0xb8, 0x5f, 0x96, 0xe8, 0xc6, 0x9c, 0x2f,
28854 ++ 0xe5, 0xa6, 0x44, 0xd7, 0xe7, 0x99, 0x44, 0x0c,
28855 ++ 0xec, 0xd7, 0x05, 0x60, 0x97, 0xbb, 0x74, 0x77,
28856 ++ 0x58, 0xd5, 0xbb, 0x48, 0xde, 0x5a, 0xb2, 0x54,
28857 ++ 0x7f, 0x0e, 0x46, 0x70, 0x6a, 0x6f, 0x78, 0xa5,
28858 ++ 0x08, 0x89, 0x05, 0x4e, 0x7e, 0xa0, 0x69, 0xb4,
28859 ++ 0x40, 0x60, 0x55, 0x77, 0x75, 0x9b, 0x19, 0xf2,
28860 ++ 0xd5, 0x13, 0x80, 0x77, 0xf9, 0x4b, 0x3f, 0x1e,
28861 ++ 0xee, 0xe6, 0x76, 0x84, 0x7b, 0x8c, 0xe5, 0x27,
28862 ++ 0xa8, 0x0a, 0x91, 0x01, 0x68, 0x71, 0x8a, 0x3f,
28863 ++ 0x06, 0xab, 0xf6, 0xa9, 0xa5, 0xe6, 0x72, 0x92,
28864 ++ 0xe4, 0x67, 0xe2, 0xa2, 0x46, 0x35, 0x84, 0x55,
28865 ++ 0x7d, 0xca, 0xa8, 0x85, 0xd0, 0xf1, 0x3f, 0xbe,
28866 ++ 0xd7, 0x34, 0x64, 0xfc, 0xae, 0xe3, 0xe4, 0x04,
28867 ++ 0x9f, 0x66, 0x02, 0xb9, 0x88, 0x10, 0xd9, 0xc4,
28868 ++ 0x4c, 0x31, 0x43, 0x7a, 0x93, 0xe2, 0x9b, 0x56,
28869 ++ 0x43, 0x84, 0xdc, 0xdc, 0xde, 0x1d, 0xa4, 0x02,
28870 ++ 0x0e, 0xc2, 0xef, 0xc3, 0xf8, 0x78, 0xd1, 0xb2,
28871 ++ 0x6b, 0x63, 0x18, 0xc9, 0xa9, 0xe5, 0x72, 0xd8,
28872 ++ 0xf3, 0xb9, 0xd1, 0x8a, 0xc7, 0x1a, 0x02, 0x27,
28873 ++ 0x20, 0x77, 0x10, 0xe5, 0xc8, 0xd4, 0x4a, 0x47,
28874 ++ 0xe5, 0xdf, 0x5f, 0x01, 0xaa, 0xb0, 0xd4, 0x10,
28875 ++ 0xbb, 0x69, 0xe3, 0x36, 0xc8, 0xe1, 0x3d, 0x43,
28876 ++ 0xfb, 0x86, 0xcd, 0xcc, 0xbf, 0xf4, 0x88, 0xe0,
28877 ++ 0x20, 0xca, 0xb7, 0x1b, 0xf1, 0x2f, 0x5c, 0xee,
28878 ++ 0xd4, 0xd3, 0xa3, 0xcc, 0xa4, 0x1e, 0x1c, 0x47,
28879 ++ 0xfb, 0xbf, 0xfc, 0xa2, 0x41, 0x55, 0x9d, 0xf6,
28880 ++ 0x5a, 0x5e, 0x65, 0x32, 0x34, 0x7b, 0x52, 0x8d,
28881 ++ 0xd5, 0xd0, 0x20, 0x60, 0x03, 0xab, 0x3f, 0x8c,
28882 ++ 0xd4, 0x21, 0xea, 0x2a, 0xd9, 0xc4, 0xd0, 0xd3,
28883 ++ 0x65, 0xd8, 0x7a, 0x13, 0x28, 0x62, 0x32, 0x4b,
28884 ++ 0x2c, 0x87, 0x93, 0xa8, 0xb4, 0x52, 0x45, 0x09,
28885 ++ 0x44, 0xec, 0xec, 0xc3, 0x17, 0xdb, 0x9a, 0x4d,
28886 ++ 0x5c, 0xa9, 0x11, 0xd4, 0x7d, 0xaf, 0x9e, 0xf1,
28887 ++ 0x2d, 0xb2, 0x66, 0xc5, 0x1d, 0xed, 0xb7, 0xcd,
28888 ++ 0x0b, 0x25, 0x5e, 0x30, 0x47, 0x3f, 0x40, 0xf4,
28889 ++ 0xa1, 0xa0, 0x00, 0x94, 0x10, 0xc5, 0x6a, 0x63,
28890 ++ 0x1a, 0xd5, 0x88, 0x92, 0x8e, 0x82, 0x39, 0x87,
28891 ++ 0x3c, 0x78, 0x65, 0x58, 0x42, 0x75, 0x5b, 0xdd,
28892 ++ 0x77, 0x3e, 0x09, 0x4e, 0x76, 0x5b, 0xe6, 0x0e,
28893 ++ 0x4d, 0x38, 0xb2, 0xc0, 0xb8, 0x95, 0x01, 0x7a,
28894 ++ 0x10, 0xe0, 0xfb, 0x07, 0xf2, 0xab, 0x2d, 0x8c,
28895 ++ 0x32, 0xed, 0x2b, 0xc0, 0x46, 0xc2, 0xf5, 0x38,
28896 ++ 0x83, 0xf0, 0x17, 0xec, 0xc1, 0x20, 0x6a, 0x9a,
28897 ++ 0x0b, 0x00, 0xa0, 0x98, 0x22, 0x50, 0x23, 0xd5,
28898 ++ 0x80, 0x6b, 0xf6, 0x1f, 0xc3, 0xcc, 0x97, 0xc9,
28899 ++ 0x24, 0x9f, 0xf3, 0xaf, 0x43, 0x14, 0xd5, 0xa0
28900 ++};
28901 ++static const u8 dec_output010[] __initconst = {
28902 ++ 0x42, 0x93, 0xe4, 0xeb, 0x97, 0xb0, 0x57, 0xbf,
28903 ++ 0x1a, 0x8b, 0x1f, 0xe4, 0x5f, 0x36, 0x20, 0x3c,
28904 ++ 0xef, 0x0a, 0xa9, 0x48, 0x5f, 0x5f, 0x37, 0x22,
28905 ++ 0x3a, 0xde, 0xe3, 0xae, 0xbe, 0xad, 0x07, 0xcc,
28906 ++ 0xb1, 0xf6, 0xf5, 0xf9, 0x56, 0xdd, 0xe7, 0x16,
28907 ++ 0x1e, 0x7f, 0xdf, 0x7a, 0x9e, 0x75, 0xb7, 0xc7,
28908 ++ 0xbe, 0xbe, 0x8a, 0x36, 0x04, 0xc0, 0x10, 0xf4,
28909 ++ 0x95, 0x20, 0x03, 0xec, 0xdc, 0x05, 0xa1, 0x7d,
28910 ++ 0xc4, 0xa9, 0x2c, 0x82, 0xd0, 0xbc, 0x8b, 0xc5,
28911 ++ 0xc7, 0x45, 0x50, 0xf6, 0xa2, 0x1a, 0xb5, 0x46,
28912 ++ 0x3b, 0x73, 0x02, 0xa6, 0x83, 0x4b, 0x73, 0x82,
28913 ++ 0x58, 0x5e, 0x3b, 0x65, 0x2f, 0x0e, 0xfd, 0x2b,
28914 ++ 0x59, 0x16, 0xce, 0xa1, 0x60, 0x9c, 0xe8, 0x3a,
28915 ++ 0x99, 0xed, 0x8d, 0x5a, 0xcf, 0xf6, 0x83, 0xaf,
28916 ++ 0xba, 0xd7, 0x73, 0x73, 0x40, 0x97, 0x3d, 0xca,
28917 ++ 0xef, 0x07, 0x57, 0xe6, 0xd9, 0x70, 0x0e, 0x95,
28918 ++ 0xae, 0xa6, 0x8d, 0x04, 0xcc, 0xee, 0xf7, 0x09,
28919 ++ 0x31, 0x77, 0x12, 0xa3, 0x23, 0x97, 0x62, 0xb3,
28920 ++ 0x7b, 0x32, 0xfb, 0x80, 0x14, 0x48, 0x81, 0xc3,
28921 ++ 0xe5, 0xea, 0x91, 0x39, 0x52, 0x81, 0xa2, 0x4f,
28922 ++ 0xe4, 0xb3, 0x09, 0xff, 0xde, 0x5e, 0xe9, 0x58,
28923 ++ 0x84, 0x6e, 0xf9, 0x3d, 0xdf, 0x25, 0xea, 0xad,
28924 ++ 0xae, 0xe6, 0x9a, 0xd1, 0x89, 0x55, 0xd3, 0xde,
28925 ++ 0x6c, 0x52, 0xdb, 0x70, 0xfe, 0x37, 0xce, 0x44,
28926 ++ 0x0a, 0xa8, 0x25, 0x5f, 0x92, 0xc1, 0x33, 0x4a,
28927 ++ 0x4f, 0x9b, 0x62, 0x35, 0xff, 0xce, 0xc0, 0xa9,
28928 ++ 0x60, 0xce, 0x52, 0x00, 0x97, 0x51, 0x35, 0x26,
28929 ++ 0x2e, 0xb9, 0x36, 0xa9, 0x87, 0x6e, 0x1e, 0xcc,
28930 ++ 0x91, 0x78, 0x53, 0x98, 0x86, 0x5b, 0x9c, 0x74,
28931 ++ 0x7d, 0x88, 0x33, 0xe1, 0xdf, 0x37, 0x69, 0x2b,
28932 ++ 0xbb, 0xf1, 0x4d, 0xf4, 0xd1, 0xf1, 0x39, 0x93,
28933 ++ 0x17, 0x51, 0x19, 0xe3, 0x19, 0x1e, 0x76, 0x37,
28934 ++ 0x25, 0xfb, 0x09, 0x27, 0x6a, 0xab, 0x67, 0x6f,
28935 ++ 0x14, 0x12, 0x64, 0xe7, 0xc4, 0x07, 0xdf, 0x4d,
28936 ++ 0x17, 0xbb, 0x6d, 0xe0, 0xe9, 0xb9, 0xab, 0xca,
28937 ++ 0x10, 0x68, 0xaf, 0x7e, 0xb7, 0x33, 0x54, 0x73,
28938 ++ 0x07, 0x6e, 0xf7, 0x81, 0x97, 0x9c, 0x05, 0x6f,
28939 ++ 0x84, 0x5f, 0xd2, 0x42, 0xfb, 0x38, 0xcf, 0xd1,
28940 ++ 0x2f, 0x14, 0x30, 0x88, 0x98, 0x4d, 0x5a, 0xa9,
28941 ++ 0x76, 0xd5, 0x4f, 0x3e, 0x70, 0x6c, 0x85, 0x76,
28942 ++ 0xd7, 0x01, 0xa0, 0x1a, 0xc8, 0x4e, 0xaa, 0xac,
28943 ++ 0x78, 0xfe, 0x46, 0xde, 0x6a, 0x05, 0x46, 0xa7,
28944 ++ 0x43, 0x0c, 0xb9, 0xde, 0xb9, 0x68, 0xfb, 0xce,
28945 ++ 0x42, 0x99, 0x07, 0x4d, 0x0b, 0x3b, 0x5a, 0x30,
28946 ++ 0x35, 0xa8, 0xf9, 0x3a, 0x73, 0xef, 0x0f, 0xdb,
28947 ++ 0x1e, 0x16, 0x42, 0xc4, 0xba, 0xae, 0x58, 0xaa,
28948 ++ 0xf8, 0xe5, 0x75, 0x2f, 0x1b, 0x15, 0x5c, 0xfd,
28949 ++ 0x0a, 0x97, 0xd0, 0xe4, 0x37, 0x83, 0x61, 0x5f,
28950 ++ 0x43, 0xa6, 0xc7, 0x3f, 0x38, 0x59, 0xe6, 0xeb,
28951 ++ 0xa3, 0x90, 0xc3, 0xaa, 0xaa, 0x5a, 0xd3, 0x34,
28952 ++ 0xd4, 0x17, 0xc8, 0x65, 0x3e, 0x57, 0xbc, 0x5e,
28953 ++ 0xdd, 0x9e, 0xb7, 0xf0, 0x2e, 0x5b, 0xb2, 0x1f,
28954 ++ 0x8a, 0x08, 0x0d, 0x45, 0x91, 0x0b, 0x29, 0x53,
28955 ++ 0x4f, 0x4c, 0x5a, 0x73, 0x56, 0xfe, 0xaf, 0x41,
28956 ++ 0x01, 0x39, 0x0a, 0x24, 0x3c, 0x7e, 0xbe, 0x4e,
28957 ++ 0x53, 0xf3, 0xeb, 0x06, 0x66, 0x51, 0x28, 0x1d,
28958 ++ 0xbd, 0x41, 0x0a, 0x01, 0xab, 0x16, 0x47, 0x27,
28959 ++ 0x47, 0x47, 0xf7, 0xcb, 0x46, 0x0a, 0x70, 0x9e,
28960 ++ 0x01, 0x9c, 0x09, 0xe1, 0x2a, 0x00, 0x1a, 0xd8,
28961 ++ 0xd4, 0x79, 0x9d, 0x80, 0x15, 0x8e, 0x53, 0x2a,
28962 ++ 0x65, 0x83, 0x78, 0x3e, 0x03, 0x00, 0x07, 0x12,
28963 ++ 0x1f, 0x33, 0x3e, 0x7b, 0x13, 0x37, 0xf1, 0xc3,
28964 ++ 0xef, 0xb7, 0xc1, 0x20, 0x3c, 0x3e, 0x67, 0x66,
28965 ++ 0x5d, 0x88, 0xa7, 0x7d, 0x33, 0x50, 0x77, 0xb0,
28966 ++ 0x28, 0x8e, 0xe7, 0x2c, 0x2e, 0x7a, 0xf4, 0x3c,
28967 ++ 0x8d, 0x74, 0x83, 0xaf, 0x8e, 0x87, 0x0f, 0xe4,
28968 ++ 0x50, 0xff, 0x84, 0x5c, 0x47, 0x0c, 0x6a, 0x49,
28969 ++ 0xbf, 0x42, 0x86, 0x77, 0x15, 0x48, 0xa5, 0x90,
28970 ++ 0x5d, 0x93, 0xd6, 0x2a, 0x11, 0xd5, 0xd5, 0x11,
28971 ++ 0xaa, 0xce, 0xe7, 0x6f, 0xa5, 0xb0, 0x09, 0x2c,
28972 ++ 0x8d, 0xd3, 0x92, 0xf0, 0x5a, 0x2a, 0xda, 0x5b,
28973 ++ 0x1e, 0xd5, 0x9a, 0xc4, 0xc4, 0xf3, 0x49, 0x74,
28974 ++ 0x41, 0xca, 0xe8, 0xc1, 0xf8, 0x44, 0xd6, 0x3c,
28975 ++ 0xae, 0x6c, 0x1d, 0x9a, 0x30, 0x04, 0x4d, 0x27,
28976 ++ 0x0e, 0xb1, 0x5f, 0x59, 0xa2, 0x24, 0xe8, 0xe1,
28977 ++ 0x98, 0xc5, 0x6a, 0x4c, 0xfe, 0x41, 0xd2, 0x27,
28978 ++ 0x42, 0x52, 0xe1, 0xe9, 0x7d, 0x62, 0xe4, 0x88,
28979 ++ 0x0f, 0xad, 0xb2, 0x70, 0xcb, 0x9d, 0x4c, 0x27,
28980 ++ 0x2e, 0x76, 0x1e, 0x1a, 0x63, 0x65, 0xf5, 0x3b,
28981 ++ 0xf8, 0x57, 0x69, 0xeb, 0x5b, 0x38, 0x26, 0x39,
28982 ++ 0x33, 0x25, 0x45, 0x3e, 0x91, 0xb8, 0xd8, 0xc7,
28983 ++ 0xd5, 0x42, 0xc0, 0x22, 0x31, 0x74, 0xf4, 0xbc,
28984 ++ 0x0c, 0x23, 0xf1, 0xca, 0xc1, 0x8d, 0xd7, 0xbe,
28985 ++ 0xc9, 0x62, 0xe4, 0x08, 0x1a, 0xcf, 0x36, 0xd5,
28986 ++ 0xfe, 0x55, 0x21, 0x59, 0x91, 0x87, 0x87, 0xdf,
28987 ++ 0x06, 0xdb, 0xdf, 0x96, 0x45, 0x58, 0xda, 0x05,
28988 ++ 0xcd, 0x50, 0x4d, 0xd2, 0x7d, 0x05, 0x18, 0x73,
28989 ++ 0x6a, 0x8d, 0x11, 0x85, 0xa6, 0x88, 0xe8, 0xda,
28990 ++ 0xe6, 0x30, 0x33, 0xa4, 0x89, 0x31, 0x75, 0xbe,
28991 ++ 0x69, 0x43, 0x84, 0x43, 0x50, 0x87, 0xdd, 0x71,
28992 ++ 0x36, 0x83, 0xc3, 0x78, 0x74, 0x24, 0x0a, 0xed,
28993 ++ 0x7b, 0xdb, 0xa4, 0x24, 0x0b, 0xb9, 0x7e, 0x5d,
28994 ++ 0xff, 0xde, 0xb1, 0xef, 0x61, 0x5a, 0x45, 0x33,
28995 ++ 0xf6, 0x17, 0x07, 0x08, 0x98, 0x83, 0x92, 0x0f,
28996 ++ 0x23, 0x6d, 0xe6, 0xaa, 0x17, 0x54, 0xad, 0x6a,
28997 ++ 0xc8, 0xdb, 0x26, 0xbe, 0xb8, 0xb6, 0x08, 0xfa,
28998 ++ 0x68, 0xf1, 0xd7, 0x79, 0x6f, 0x18, 0xb4, 0x9e,
28999 ++ 0x2d, 0x3f, 0x1b, 0x64, 0xaf, 0x8d, 0x06, 0x0e,
29000 ++ 0x49, 0x28, 0xe0, 0x5d, 0x45, 0x68, 0x13, 0x87,
29001 ++ 0xfa, 0xde, 0x40, 0x7b, 0xd2, 0xc3, 0x94, 0xd5,
29002 ++ 0xe1, 0xd9, 0xc2, 0xaf, 0x55, 0x89, 0xeb, 0xb4,
29003 ++ 0x12, 0x59, 0xa8, 0xd4, 0xc5, 0x29, 0x66, 0x38,
29004 ++ 0xe6, 0xac, 0x22, 0x22, 0xd9, 0x64, 0x9b, 0x34,
29005 ++ 0x0a, 0x32, 0x9f, 0xc2, 0xbf, 0x17, 0x6c, 0x3f,
29006 ++ 0x71, 0x7a, 0x38, 0x6b, 0x98, 0xfb, 0x49, 0x36,
29007 ++ 0x89, 0xc9, 0xe2, 0xd6, 0xc7, 0x5d, 0xd0, 0x69,
29008 ++ 0x5f, 0x23, 0x35, 0xc9, 0x30, 0xe2, 0xfd, 0x44,
29009 ++ 0x58, 0x39, 0xd7, 0x97, 0xfb, 0x5c, 0x00, 0xd5,
29010 ++ 0x4f, 0x7a, 0x1a, 0x95, 0x8b, 0x62, 0x4b, 0xce,
29011 ++ 0xe5, 0x91, 0x21, 0x7b, 0x30, 0x00, 0xd6, 0xdd,
29012 ++ 0x6d, 0x02, 0x86, 0x49, 0x0f, 0x3c, 0x1a, 0x27,
29013 ++ 0x3c, 0xd3, 0x0e, 0x71, 0xf2, 0xff, 0xf5, 0x2f,
29014 ++ 0x87, 0xac, 0x67, 0x59, 0x81, 0xa3, 0xf7, 0xf8,
29015 ++ 0xd6, 0x11, 0x0c, 0x84, 0xa9, 0x03, 0xee, 0x2a,
29016 ++ 0xc4, 0xf3, 0x22, 0xab, 0x7c, 0xe2, 0x25, 0xf5,
29017 ++ 0x67, 0xa3, 0xe4, 0x11, 0xe0, 0x59, 0xb3, 0xca,
29018 ++ 0x87, 0xa0, 0xae, 0xc9, 0xa6, 0x62, 0x1b, 0x6e,
29019 ++ 0x4d, 0x02, 0x6b, 0x07, 0x9d, 0xfd, 0xd0, 0x92,
29020 ++ 0x06, 0xe1, 0xb2, 0x9a, 0x4a, 0x1f, 0x1f, 0x13,
29021 ++ 0x49, 0x99, 0x97, 0x08, 0xde, 0x7f, 0x98, 0xaf,
29022 ++ 0x51, 0x98, 0xee, 0x2c, 0xcb, 0xf0, 0x0b, 0xc6,
29023 ++ 0xb6, 0xb7, 0x2d, 0x9a, 0xb1, 0xac, 0xa6, 0xe3,
29024 ++ 0x15, 0x77, 0x9d, 0x6b, 0x1a, 0xe4, 0xfc, 0x8b,
29025 ++ 0xf2, 0x17, 0x59, 0x08, 0x04, 0x58, 0x81, 0x9d,
29026 ++ 0x1b, 0x1b, 0x69, 0x55, 0xc2, 0xb4, 0x3c, 0x1f,
29027 ++ 0x50, 0xf1, 0x7f, 0x77, 0x90, 0x4c, 0x66, 0x40,
29028 ++ 0x5a, 0xc0, 0x33, 0x1f, 0xcb, 0x05, 0x6d, 0x5c,
29029 ++ 0x06, 0x87, 0x52, 0xa2, 0x8f, 0x26, 0xd5, 0x4f
29030 ++};
29031 ++static const u8 dec_assoc010[] __initconst = {
29032 ++ 0xd2, 0xa1, 0x70, 0xdb, 0x7a, 0xf8, 0xfa, 0x27,
29033 ++ 0xba, 0x73, 0x0f, 0xbf, 0x3d, 0x1e, 0x82, 0xb2
29034 ++};
29035 ++static const u8 dec_nonce010[] __initconst = {
29036 ++ 0xdb, 0x92, 0x0f, 0x7f, 0x17, 0x54, 0x0c, 0x30
29037 ++};
29038 ++static const u8 dec_key010[] __initconst = {
29039 ++ 0x47, 0x11, 0xeb, 0x86, 0x2b, 0x2c, 0xab, 0x44,
29040 ++ 0x34, 0xda, 0x7f, 0x57, 0x03, 0x39, 0x0c, 0xaf,
29041 ++ 0x2c, 0x14, 0xfd, 0x65, 0x23, 0xe9, 0x8e, 0x74,
29042 ++ 0xd5, 0x08, 0x68, 0x08, 0xe7, 0xb4, 0x72, 0xd7
29043 ++};
29044 ++
29045 ++static const u8 dec_input011[] __initconst = {
29046 ++ 0x6a, 0xfc, 0x4b, 0x25, 0xdf, 0xc0, 0xe4, 0xe8,
29047 ++ 0x17, 0x4d, 0x4c, 0xc9, 0x7e, 0xde, 0x3a, 0xcc,
29048 ++ 0x3c, 0xba, 0x6a, 0x77, 0x47, 0xdb, 0xe3, 0x74,
29049 ++ 0x7a, 0x4d, 0x5f, 0x8d, 0x37, 0x55, 0x80, 0x73,
29050 ++ 0x90, 0x66, 0x5d, 0x3a, 0x7d, 0x5d, 0x86, 0x5e,
29051 ++ 0x8d, 0xfd, 0x83, 0xff, 0x4e, 0x74, 0x6f, 0xf9,
29052 ++ 0xe6, 0x70, 0x17, 0x70, 0x3e, 0x96, 0xa7, 0x7e,
29053 ++ 0xcb, 0xab, 0x8f, 0x58, 0x24, 0x9b, 0x01, 0xfd,
29054 ++ 0xcb, 0xe6, 0x4d, 0x9b, 0xf0, 0x88, 0x94, 0x57,
29055 ++ 0x66, 0xef, 0x72, 0x4c, 0x42, 0x6e, 0x16, 0x19,
29056 ++ 0x15, 0xea, 0x70, 0x5b, 0xac, 0x13, 0xdb, 0x9f,
29057 ++ 0x18, 0xe2, 0x3c, 0x26, 0x97, 0xbc, 0xdc, 0x45,
29058 ++ 0x8c, 0x6c, 0x24, 0x69, 0x9c, 0xf7, 0x65, 0x1e,
29059 ++ 0x18, 0x59, 0x31, 0x7c, 0xe4, 0x73, 0xbc, 0x39,
29060 ++ 0x62, 0xc6, 0x5c, 0x9f, 0xbf, 0xfa, 0x90, 0x03,
29061 ++ 0xc9, 0x72, 0x26, 0xb6, 0x1b, 0xc2, 0xb7, 0x3f,
29062 ++ 0xf2, 0x13, 0x77, 0xf2, 0x8d, 0xb9, 0x47, 0xd0,
29063 ++ 0x53, 0xdd, 0xc8, 0x91, 0x83, 0x8b, 0xb1, 0xce,
29064 ++ 0xa3, 0xfe, 0xcd, 0xd9, 0xdd, 0x92, 0x7b, 0xdb,
29065 ++ 0xb8, 0xfb, 0xc9, 0x2d, 0x01, 0x59, 0x39, 0x52,
29066 ++ 0xad, 0x1b, 0xec, 0xcf, 0xd7, 0x70, 0x13, 0x21,
29067 ++ 0xf5, 0x47, 0xaa, 0x18, 0x21, 0x5c, 0xc9, 0x9a,
29068 ++ 0xd2, 0x6b, 0x05, 0x9c, 0x01, 0xa1, 0xda, 0x35,
29069 ++ 0x5d, 0xb3, 0x70, 0xe6, 0xa9, 0x80, 0x8b, 0x91,
29070 ++ 0xb7, 0xb3, 0x5f, 0x24, 0x9a, 0xb7, 0xd1, 0x6b,
29071 ++ 0xa1, 0x1c, 0x50, 0xba, 0x49, 0xe0, 0xee, 0x2e,
29072 ++ 0x75, 0xac, 0x69, 0xc0, 0xeb, 0x03, 0xdd, 0x19,
29073 ++ 0xe5, 0xf6, 0x06, 0xdd, 0xc3, 0xd7, 0x2b, 0x07,
29074 ++ 0x07, 0x30, 0xa7, 0x19, 0x0c, 0xbf, 0xe6, 0x18,
29075 ++ 0xcc, 0xb1, 0x01, 0x11, 0x85, 0x77, 0x1d, 0x96,
29076 ++ 0xa7, 0xa3, 0x00, 0x84, 0x02, 0xa2, 0x83, 0x68,
29077 ++ 0xda, 0x17, 0x27, 0xc8, 0x7f, 0x23, 0xb7, 0xf4,
29078 ++ 0x13, 0x85, 0xcf, 0xdd, 0x7a, 0x7d, 0x24, 0x57,
29079 ++ 0xfe, 0x05, 0x93, 0xf5, 0x74, 0xce, 0xed, 0x0c,
29080 ++ 0x20, 0x98, 0x8d, 0x92, 0x30, 0xa1, 0x29, 0x23,
29081 ++ 0x1a, 0xa0, 0x4f, 0x69, 0x56, 0x4c, 0xe1, 0xc8,
29082 ++ 0xce, 0xf6, 0x9a, 0x0c, 0xa4, 0xfa, 0x04, 0xf6,
29083 ++ 0x62, 0x95, 0xf2, 0xfa, 0xc7, 0x40, 0x68, 0x40,
29084 ++ 0x8f, 0x41, 0xda, 0xb4, 0x26, 0x6f, 0x70, 0xab,
29085 ++ 0x40, 0x61, 0xa4, 0x0e, 0x75, 0xfb, 0x86, 0xeb,
29086 ++ 0x9d, 0x9a, 0x1f, 0xec, 0x76, 0x99, 0xe7, 0xea,
29087 ++ 0xaa, 0x1e, 0x2d, 0xb5, 0xd4, 0xa6, 0x1a, 0xb8,
29088 ++ 0x61, 0x0a, 0x1d, 0x16, 0x5b, 0x98, 0xc2, 0x31,
29089 ++ 0x40, 0xe7, 0x23, 0x1d, 0x66, 0x99, 0xc8, 0xc0,
29090 ++ 0xd7, 0xce, 0xf3, 0x57, 0x40, 0x04, 0x3f, 0xfc,
29091 ++ 0xea, 0xb3, 0xfc, 0xd2, 0xd3, 0x99, 0xa4, 0x94,
29092 ++ 0x69, 0xa0, 0xef, 0xd1, 0x85, 0xb3, 0xa6, 0xb1,
29093 ++ 0x28, 0xbf, 0x94, 0x67, 0x22, 0xc3, 0x36, 0x46,
29094 ++ 0xf8, 0xd2, 0x0f, 0x5f, 0xf4, 0x59, 0x80, 0xe6,
29095 ++ 0x2d, 0x43, 0x08, 0x7d, 0x19, 0x09, 0x97, 0xa7,
29096 ++ 0x4c, 0x3d, 0x8d, 0xba, 0x65, 0x62, 0xa3, 0x71,
29097 ++ 0x33, 0x29, 0x62, 0xdb, 0xc1, 0x33, 0x34, 0x1a,
29098 ++ 0x63, 0x33, 0x16, 0xb6, 0x64, 0x7e, 0xab, 0x33,
29099 ++ 0xf0, 0xe6, 0x26, 0x68, 0xba, 0x1d, 0x2e, 0x38,
29100 ++ 0x08, 0xe6, 0x02, 0xd3, 0x25, 0x2c, 0x47, 0x23,
29101 ++ 0x58, 0x34, 0x0f, 0x9d, 0x63, 0x4f, 0x63, 0xbb,
29102 ++ 0x7f, 0x3b, 0x34, 0x38, 0xa7, 0xb5, 0x8d, 0x65,
29103 ++ 0xd9, 0x9f, 0x79, 0x55, 0x3e, 0x4d, 0xe7, 0x73,
29104 ++ 0xd8, 0xf6, 0x98, 0x97, 0x84, 0x60, 0x9c, 0xc8,
29105 ++ 0xa9, 0x3c, 0xf6, 0xdc, 0x12, 0x5c, 0xe1, 0xbb,
29106 ++ 0x0b, 0x8b, 0x98, 0x9c, 0x9d, 0x26, 0x7c, 0x4a,
29107 ++ 0xe6, 0x46, 0x36, 0x58, 0x21, 0x4a, 0xee, 0xca,
29108 ++ 0xd7, 0x3b, 0xc2, 0x6c, 0x49, 0x2f, 0xe5, 0xd5,
29109 ++ 0x03, 0x59, 0x84, 0x53, 0xcb, 0xfe, 0x92, 0x71,
29110 ++ 0x2e, 0x7c, 0x21, 0xcc, 0x99, 0x85, 0x7f, 0xb8,
29111 ++ 0x74, 0x90, 0x13, 0x42, 0x3f, 0xe0, 0x6b, 0x1d,
29112 ++ 0xf2, 0x4d, 0x54, 0xd4, 0xfc, 0x3a, 0x05, 0xe6,
29113 ++ 0x74, 0xaf, 0xa6, 0xa0, 0x2a, 0x20, 0x23, 0x5d,
29114 ++ 0x34, 0x5c, 0xd9, 0x3e, 0x4e, 0xfa, 0x93, 0xe7,
29115 ++ 0xaa, 0xe9, 0x6f, 0x08, 0x43, 0x67, 0x41, 0xc5,
29116 ++ 0xad, 0xfb, 0x31, 0x95, 0x82, 0x73, 0x32, 0xd8,
29117 ++ 0xa6, 0xa3, 0xed, 0x0e, 0x2d, 0xf6, 0x5f, 0xfd,
29118 ++ 0x80, 0xa6, 0x7a, 0xe0, 0xdf, 0x78, 0x15, 0x29,
29119 ++ 0x74, 0x33, 0xd0, 0x9e, 0x83, 0x86, 0x72, 0x22,
29120 ++ 0x57, 0x29, 0xb9, 0x9e, 0x5d, 0xd3, 0x1a, 0xb5,
29121 ++ 0x96, 0x72, 0x41, 0x3d, 0xf1, 0x64, 0x43, 0x67,
29122 ++ 0xee, 0xaa, 0x5c, 0xd3, 0x9a, 0x96, 0x13, 0x11,
29123 ++ 0x5d, 0xf3, 0x0c, 0x87, 0x82, 0x1e, 0x41, 0x9e,
29124 ++ 0xd0, 0x27, 0xd7, 0x54, 0x3b, 0x67, 0x73, 0x09,
29125 ++ 0x91, 0xe9, 0xd5, 0x36, 0xa7, 0xb5, 0x55, 0xe4,
29126 ++ 0xf3, 0x21, 0x51, 0x49, 0x22, 0x07, 0x55, 0x4f,
29127 ++ 0x44, 0x4b, 0xd2, 0x15, 0x93, 0x17, 0x2a, 0xfa,
29128 ++ 0x4d, 0x4a, 0x57, 0xdb, 0x4c, 0xa6, 0xeb, 0xec,
29129 ++ 0x53, 0x25, 0x6c, 0x21, 0xed, 0x00, 0x4c, 0x3b,
29130 ++ 0xca, 0x14, 0x57, 0xa9, 0xd6, 0x6a, 0xcd, 0x8d,
29131 ++ 0x5e, 0x74, 0xac, 0x72, 0xc1, 0x97, 0xe5, 0x1b,
29132 ++ 0x45, 0x4e, 0xda, 0xfc, 0xcc, 0x40, 0xe8, 0x48,
29133 ++ 0x88, 0x0b, 0xa3, 0xe3, 0x8d, 0x83, 0x42, 0xc3,
29134 ++ 0x23, 0xfd, 0x68, 0xb5, 0x8e, 0xf1, 0x9d, 0x63,
29135 ++ 0x77, 0xe9, 0xa3, 0x8e, 0x8c, 0x26, 0x6b, 0xbd,
29136 ++ 0x72, 0x73, 0x35, 0x0c, 0x03, 0xf8, 0x43, 0x78,
29137 ++ 0x52, 0x71, 0x15, 0x1f, 0x71, 0x5d, 0x6e, 0xed,
29138 ++ 0xb9, 0xcc, 0x86, 0x30, 0xdb, 0x2b, 0xd3, 0x82,
29139 ++ 0x88, 0x23, 0x71, 0x90, 0x53, 0x5c, 0xa9, 0x2f,
29140 ++ 0x76, 0x01, 0xb7, 0x9a, 0xfe, 0x43, 0x55, 0xa3,
29141 ++ 0x04, 0x9b, 0x0e, 0xe4, 0x59, 0xdf, 0xc9, 0xe9,
29142 ++ 0xb1, 0xea, 0x29, 0x28, 0x3c, 0x5c, 0xae, 0x72,
29143 ++ 0x84, 0xb6, 0xc6, 0xeb, 0x0c, 0x27, 0x07, 0x74,
29144 ++ 0x90, 0x0d, 0x31, 0xb0, 0x00, 0x77, 0xe9, 0x40,
29145 ++ 0x70, 0x6f, 0x68, 0xa7, 0xfd, 0x06, 0xec, 0x4b,
29146 ++ 0xc0, 0xb7, 0xac, 0xbc, 0x33, 0xb7, 0x6d, 0x0a,
29147 ++ 0xbd, 0x12, 0x1b, 0x59, 0xcb, 0xdd, 0x32, 0xf5,
29148 ++ 0x1d, 0x94, 0x57, 0x76, 0x9e, 0x0c, 0x18, 0x98,
29149 ++ 0x71, 0xd7, 0x2a, 0xdb, 0x0b, 0x7b, 0xa7, 0x71,
29150 ++ 0xb7, 0x67, 0x81, 0x23, 0x96, 0xae, 0xb9, 0x7e,
29151 ++ 0x32, 0x43, 0x92, 0x8a, 0x19, 0xa0, 0xc4, 0xd4,
29152 ++ 0x3b, 0x57, 0xf9, 0x4a, 0x2c, 0xfb, 0x51, 0x46,
29153 ++ 0xbb, 0xcb, 0x5d, 0xb3, 0xef, 0x13, 0x93, 0x6e,
29154 ++ 0x68, 0x42, 0x54, 0x57, 0xd3, 0x6a, 0x3a, 0x8f,
29155 ++ 0x9d, 0x66, 0xbf, 0xbd, 0x36, 0x23, 0xf5, 0x93,
29156 ++ 0x83, 0x7b, 0x9c, 0xc0, 0xdd, 0xc5, 0x49, 0xc0,
29157 ++ 0x64, 0xed, 0x07, 0x12, 0xb3, 0xe6, 0xe4, 0xe5,
29158 ++ 0x38, 0x95, 0x23, 0xb1, 0xa0, 0x3b, 0x1a, 0x61,
29159 ++ 0xda, 0x17, 0xac, 0xc3, 0x58, 0xdd, 0x74, 0x64,
29160 ++ 0x22, 0x11, 0xe8, 0x32, 0x1d, 0x16, 0x93, 0x85,
29161 ++ 0x99, 0xa5, 0x9c, 0x34, 0x55, 0xb1, 0xe9, 0x20,
29162 ++ 0x72, 0xc9, 0x28, 0x7b, 0x79, 0x00, 0xa1, 0xa6,
29163 ++ 0xa3, 0x27, 0x40, 0x18, 0x8a, 0x54, 0xe0, 0xcc,
29164 ++ 0xe8, 0x4e, 0x8e, 0x43, 0x96, 0xe7, 0x3f, 0xc8,
29165 ++ 0xe9, 0xb2, 0xf9, 0xc9, 0xda, 0x04, 0x71, 0x50,
29166 ++ 0x47, 0xe4, 0xaa, 0xce, 0xa2, 0x30, 0xc8, 0xe4,
29167 ++ 0xac, 0xc7, 0x0d, 0x06, 0x2e, 0xe6, 0xe8, 0x80,
29168 ++ 0x36, 0x29, 0x9e, 0x01, 0xb8, 0xc3, 0xf0, 0xa0,
29169 ++ 0x5d, 0x7a, 0xca, 0x4d, 0xa0, 0x57, 0xbd, 0x2a,
29170 ++ 0x45, 0xa7, 0x7f, 0x9c, 0x93, 0x07, 0x8f, 0x35,
29171 ++ 0x67, 0x92, 0xe3, 0xe9, 0x7f, 0xa8, 0x61, 0x43,
29172 ++ 0x9e, 0x25, 0x4f, 0x33, 0x76, 0x13, 0x6e, 0x12,
29173 ++ 0xb9, 0xdd, 0xa4, 0x7c, 0x08, 0x9f, 0x7c, 0xe7,
29174 ++ 0x0a, 0x8d, 0x84, 0x06, 0xa4, 0x33, 0x17, 0x34,
29175 ++ 0x5e, 0x10, 0x7c, 0xc0, 0xa8, 0x3d, 0x1f, 0x42,
29176 ++ 0x20, 0x51, 0x65, 0x5d, 0x09, 0xc3, 0xaa, 0xc0,
29177 ++ 0xc8, 0x0d, 0xf0, 0x79, 0xbc, 0x20, 0x1b, 0x95,
29178 ++ 0xe7, 0x06, 0x7d, 0x47, 0x20, 0x03, 0x1a, 0x74,
29179 ++ 0xdd, 0xe2, 0xd4, 0xae, 0x38, 0x71, 0x9b, 0xf5,
29180 ++ 0x80, 0xec, 0x08, 0x4e, 0x56, 0xba, 0x76, 0x12,
29181 ++ 0x1a, 0xdf, 0x48, 0xf3, 0xae, 0xb3, 0xe6, 0xe6,
29182 ++ 0xbe, 0xc0, 0x91, 0x2e, 0x01, 0xb3, 0x01, 0x86,
29183 ++ 0xa2, 0xb9, 0x52, 0xd1, 0x21, 0xae, 0xd4, 0x97,
29184 ++ 0x1d, 0xef, 0x41, 0x12, 0x95, 0x3d, 0x48, 0x45,
29185 ++ 0x1c, 0x56, 0x32, 0x8f, 0xb8, 0x43, 0xbb, 0x19,
29186 ++ 0xf3, 0xca, 0xe9, 0xeb, 0x6d, 0x84, 0xbe, 0x86,
29187 ++ 0x06, 0xe2, 0x36, 0xb2, 0x62, 0x9d, 0xd3, 0x4c,
29188 ++ 0x48, 0x18, 0x54, 0x13, 0x4e, 0xcf, 0xfd, 0xba,
29189 ++ 0x84, 0xb9, 0x30, 0x53, 0xcf, 0xfb, 0xb9, 0x29,
29190 ++ 0x8f, 0xdc, 0x9f, 0xef, 0x60, 0x0b, 0x64, 0xf6,
29191 ++ 0x8b, 0xee, 0xa6, 0x91, 0xc2, 0x41, 0x6c, 0xf6,
29192 ++ 0xfa, 0x79, 0x67, 0x4b, 0xc1, 0x3f, 0xaf, 0x09,
29193 ++ 0x81, 0xd4, 0x5d, 0xcb, 0x09, 0xdf, 0x36, 0x31,
29194 ++ 0xc0, 0x14, 0x3c, 0x7c, 0x0e, 0x65, 0x95, 0x99,
29195 ++ 0x6d, 0xa3, 0xf4, 0xd7, 0x38, 0xee, 0x1a, 0x2b,
29196 ++ 0x37, 0xe2, 0xa4, 0x3b, 0x4b, 0xd0, 0x65, 0xca,
29197 ++ 0xf8, 0xc3, 0xe8, 0x15, 0x20, 0xef, 0xf2, 0x00,
29198 ++ 0xfd, 0x01, 0x09, 0xc5, 0xc8, 0x17, 0x04, 0x93,
29199 ++ 0xd0, 0x93, 0x03, 0x55, 0xc5, 0xfe, 0x32, 0xa3,
29200 ++ 0x3e, 0x28, 0x2d, 0x3b, 0x93, 0x8a, 0xcc, 0x07,
29201 ++ 0x72, 0x80, 0x8b, 0x74, 0x16, 0x24, 0xbb, 0xda,
29202 ++ 0x94, 0x39, 0x30, 0x8f, 0xb1, 0xcd, 0x4a, 0x90,
29203 ++ 0x92, 0x7c, 0x14, 0x8f, 0x95, 0x4e, 0xac, 0x9b,
29204 ++ 0xd8, 0x8f, 0x1a, 0x87, 0xa4, 0x32, 0x27, 0x8a,
29205 ++ 0xba, 0xf7, 0x41, 0xcf, 0x84, 0x37, 0x19, 0xe6,
29206 ++ 0x06, 0xf5, 0x0e, 0xcf, 0x36, 0xf5, 0x9e, 0x6c,
29207 ++ 0xde, 0xbc, 0xff, 0x64, 0x7e, 0x4e, 0x59, 0x57,
29208 ++ 0x48, 0xfe, 0x14, 0xf7, 0x9c, 0x93, 0x5d, 0x15,
29209 ++ 0xad, 0xcc, 0x11, 0xb1, 0x17, 0x18, 0xb2, 0x7e,
29210 ++ 0xcc, 0xab, 0xe9, 0xce, 0x7d, 0x77, 0x5b, 0x51,
29211 ++ 0x1b, 0x1e, 0x20, 0xa8, 0x32, 0x06, 0x0e, 0x75,
29212 ++ 0x93, 0xac, 0xdb, 0x35, 0x37, 0x1f, 0xe9, 0x19,
29213 ++ 0x1d, 0xb4, 0x71, 0x97, 0xd6, 0x4e, 0x2c, 0x08,
29214 ++ 0xa5, 0x13, 0xf9, 0x0e, 0x7e, 0x78, 0x6e, 0x14,
29215 ++ 0xe0, 0xa9, 0xb9, 0x96, 0x4c, 0x80, 0x82, 0xba,
29216 ++ 0x17, 0xb3, 0x9d, 0x69, 0xb0, 0x84, 0x46, 0xff,
29217 ++ 0xf9, 0x52, 0x79, 0x94, 0x58, 0x3a, 0x62, 0x90,
29218 ++ 0x15, 0x35, 0x71, 0x10, 0x37, 0xed, 0xa1, 0x8e,
29219 ++ 0x53, 0x6e, 0xf4, 0x26, 0x57, 0x93, 0x15, 0x93,
29220 ++ 0xf6, 0x81, 0x2c, 0x5a, 0x10, 0xda, 0x92, 0xad,
29221 ++ 0x2f, 0xdb, 0x28, 0x31, 0x2d, 0x55, 0x04, 0xd2,
29222 ++ 0x06, 0x28, 0x8c, 0x1e, 0xdc, 0xea, 0x54, 0xac,
29223 ++ 0xff, 0xb7, 0x6c, 0x30, 0x15, 0xd4, 0xb4, 0x0d,
29224 ++ 0x00, 0x93, 0x57, 0xdd, 0xd2, 0x07, 0x07, 0x06,
29225 ++ 0xd9, 0x43, 0x9b, 0xcd, 0x3a, 0xf4, 0x7d, 0x4c,
29226 ++ 0x36, 0x5d, 0x23, 0xa2, 0xcc, 0x57, 0x40, 0x91,
29227 ++ 0xe9, 0x2c, 0x2f, 0x2c, 0xd5, 0x30, 0x9b, 0x17,
29228 ++ 0xb0, 0xc9, 0xf7, 0xa7, 0x2f, 0xd1, 0x93, 0x20,
29229 ++ 0x6b, 0xc6, 0xc1, 0xe4, 0x6f, 0xcb, 0xd1, 0xe7,
29230 ++ 0x09, 0x0f, 0x9e, 0xdc, 0xaa, 0x9f, 0x2f, 0xdf,
29231 ++ 0x56, 0x9f, 0xd4, 0x33, 0x04, 0xaf, 0xd3, 0x6c,
29232 ++ 0x58, 0x61, 0xf0, 0x30, 0xec, 0xf2, 0x7f, 0xf2,
29233 ++ 0x9c, 0xdf, 0x39, 0xbb, 0x6f, 0xa2, 0x8c, 0x7e,
29234 ++ 0xc4, 0x22, 0x51, 0x71, 0xc0, 0x4d, 0x14, 0x1a,
29235 ++ 0xc4, 0xcd, 0x04, 0xd9, 0x87, 0x08, 0x50, 0x05,
29236 ++ 0xcc, 0xaf, 0xf6, 0xf0, 0x8f, 0x92, 0x54, 0x58,
29237 ++ 0xc2, 0xc7, 0x09, 0x7a, 0x59, 0x02, 0x05, 0xe8,
29238 ++ 0xb0, 0x86, 0xd9, 0xbf, 0x7b, 0x35, 0x51, 0x4d,
29239 ++ 0xaf, 0x08, 0x97, 0x2c, 0x65, 0xda, 0x2a, 0x71,
29240 ++ 0x3a, 0xa8, 0x51, 0xcc, 0xf2, 0x73, 0x27, 0xc3,
29241 ++ 0xfd, 0x62, 0xcf, 0xe3, 0xb2, 0xca, 0xcb, 0xbe,
29242 ++ 0x1a, 0x0a, 0xa1, 0x34, 0x7b, 0x77, 0xc4, 0x62,
29243 ++ 0x68, 0x78, 0x5f, 0x94, 0x07, 0x04, 0x65, 0x16,
29244 ++ 0x4b, 0x61, 0xcb, 0xff, 0x75, 0x26, 0x50, 0x66,
29245 ++ 0x1f, 0x6e, 0x93, 0xf8, 0xc5, 0x51, 0xeb, 0xa4,
29246 ++ 0x4a, 0x48, 0x68, 0x6b, 0xe2, 0x5e, 0x44, 0xb2,
29247 ++ 0x50, 0x2c, 0x6c, 0xae, 0x79, 0x4e, 0x66, 0x35,
29248 ++ 0x81, 0x50, 0xac, 0xbc, 0x3f, 0xb1, 0x0c, 0xf3,
29249 ++ 0x05, 0x3c, 0x4a, 0xa3, 0x6c, 0x2a, 0x79, 0xb4,
29250 ++ 0xb7, 0xab, 0xca, 0xc7, 0x9b, 0x8e, 0xcd, 0x5f,
29251 ++ 0x11, 0x03, 0xcb, 0x30, 0xa3, 0xab, 0xda, 0xfe,
29252 ++ 0x64, 0xb9, 0xbb, 0xd8, 0x5e, 0x3a, 0x1a, 0x56,
29253 ++ 0xe5, 0x05, 0x48, 0x90, 0x1e, 0x61, 0x69, 0x1b,
29254 ++ 0x22, 0xe6, 0x1a, 0x3c, 0x75, 0xad, 0x1f, 0x37,
29255 ++ 0x28, 0xdc, 0xe4, 0x6d, 0xbd, 0x42, 0xdc, 0xd3,
29256 ++ 0xc8, 0xb6, 0x1c, 0x48, 0xfe, 0x94, 0x77, 0x7f,
29257 ++ 0xbd, 0x62, 0xac, 0xa3, 0x47, 0x27, 0xcf, 0x5f,
29258 ++ 0xd9, 0xdb, 0xaf, 0xec, 0xf7, 0x5e, 0xc1, 0xb0,
29259 ++ 0x9d, 0x01, 0x26, 0x99, 0x7e, 0x8f, 0x03, 0x70,
29260 ++ 0xb5, 0x42, 0xbe, 0x67, 0x28, 0x1b, 0x7c, 0xbd,
29261 ++ 0x61, 0x21, 0x97, 0xcc, 0x5c, 0xe1, 0x97, 0x8f,
29262 ++ 0x8d, 0xde, 0x2b, 0xaa, 0xa7, 0x71, 0x1d, 0x1e,
29263 ++ 0x02, 0x73, 0x70, 0x58, 0x32, 0x5b, 0x1d, 0x67,
29264 ++ 0x3d, 0xe0, 0x74, 0x4f, 0x03, 0xf2, 0x70, 0x51,
29265 ++ 0x79, 0xf1, 0x61, 0x70, 0x15, 0x74, 0x9d, 0x23,
29266 ++ 0x89, 0xde, 0xac, 0xfd, 0xde, 0xd0, 0x1f, 0xc3,
29267 ++ 0x87, 0x44, 0x35, 0x4b, 0xe5, 0xb0, 0x60, 0xc5,
29268 ++ 0x22, 0xe4, 0x9e, 0xca, 0xeb, 0xd5, 0x3a, 0x09,
29269 ++ 0x45, 0xa4, 0xdb, 0xfa, 0x3f, 0xeb, 0x1b, 0xc7,
29270 ++ 0xc8, 0x14, 0x99, 0x51, 0x92, 0x10, 0xed, 0xed,
29271 ++ 0x28, 0xe0, 0xa1, 0xf8, 0x26, 0xcf, 0xcd, 0xcb,
29272 ++ 0x63, 0xa1, 0x3b, 0xe3, 0xdf, 0x7e, 0xfe, 0xa6,
29273 ++ 0xf0, 0x81, 0x9a, 0xbf, 0x55, 0xde, 0x54, 0xd5,
29274 ++ 0x56, 0x60, 0x98, 0x10, 0x68, 0xf4, 0x38, 0x96,
29275 ++ 0x8e, 0x6f, 0x1d, 0x44, 0x7f, 0xd6, 0x2f, 0xfe,
29276 ++ 0x55, 0xfb, 0x0c, 0x7e, 0x67, 0xe2, 0x61, 0x44,
29277 ++ 0xed, 0xf2, 0x35, 0x30, 0x5d, 0xe9, 0xc7, 0xd6,
29278 ++ 0x6d, 0xe0, 0xa0, 0xed, 0xf3, 0xfc, 0xd8, 0x3e,
29279 ++ 0x0a, 0x7b, 0xcd, 0xaf, 0x65, 0x68, 0x18, 0xc0,
29280 ++ 0xec, 0x04, 0x1c, 0x74, 0x6d, 0xe2, 0x6e, 0x79,
29281 ++ 0xd4, 0x11, 0x2b, 0x62, 0xd5, 0x27, 0xad, 0x4f,
29282 ++ 0x01, 0x59, 0x73, 0xcc, 0x6a, 0x53, 0xfb, 0x2d,
29283 ++ 0xd5, 0x4e, 0x99, 0x21, 0x65, 0x4d, 0xf5, 0x82,
29284 ++ 0xf7, 0xd8, 0x42, 0xce, 0x6f, 0x3d, 0x36, 0x47,
29285 ++ 0xf1, 0x05, 0x16, 0xe8, 0x1b, 0x6a, 0x8f, 0x93,
29286 ++ 0xf2, 0x8f, 0x37, 0x40, 0x12, 0x28, 0xa3, 0xe6,
29287 ++ 0xb9, 0x17, 0x4a, 0x1f, 0xb1, 0xd1, 0x66, 0x69,
29288 ++ 0x86, 0xc4, 0xfc, 0x97, 0xae, 0x3f, 0x8f, 0x1e,
29289 ++ 0x2b, 0xdf, 0xcd, 0xf9, 0x3c
29290 ++};
29291 ++static const u8 dec_output011[] __initconst = {
29292 ++ 0x7a, 0x57, 0xf2, 0xc7, 0x06, 0x3f, 0x50, 0x7b,
29293 ++ 0x36, 0x1a, 0x66, 0x5c, 0xb9, 0x0e, 0x5e, 0x3b,
29294 ++ 0x45, 0x60, 0xbe, 0x9a, 0x31, 0x9f, 0xff, 0x5d,
29295 ++ 0x66, 0x34, 0xb4, 0xdc, 0xfb, 0x9d, 0x8e, 0xee,
29296 ++ 0x6a, 0x33, 0xa4, 0x07, 0x3c, 0xf9, 0x4c, 0x30,
29297 ++ 0xa1, 0x24, 0x52, 0xf9, 0x50, 0x46, 0x88, 0x20,
29298 ++ 0x02, 0x32, 0x3a, 0x0e, 0x99, 0x63, 0xaf, 0x1f,
29299 ++ 0x15, 0x28, 0x2a, 0x05, 0xff, 0x57, 0x59, 0x5e,
29300 ++ 0x18, 0xa1, 0x1f, 0xd0, 0x92, 0x5c, 0x88, 0x66,
29301 ++ 0x1b, 0x00, 0x64, 0xa5, 0x93, 0x8d, 0x06, 0x46,
29302 ++ 0xb0, 0x64, 0x8b, 0x8b, 0xef, 0x99, 0x05, 0x35,
29303 ++ 0x85, 0xb3, 0xf3, 0x33, 0xbb, 0xec, 0x66, 0xb6,
29304 ++ 0x3d, 0x57, 0x42, 0xe3, 0xb4, 0xc6, 0xaa, 0xb0,
29305 ++ 0x41, 0x2a, 0xb9, 0x59, 0xa9, 0xf6, 0x3e, 0x15,
29306 ++ 0x26, 0x12, 0x03, 0x21, 0x4c, 0x74, 0x43, 0x13,
29307 ++ 0x2a, 0x03, 0x27, 0x09, 0xb4, 0xfb, 0xe7, 0xb7,
29308 ++ 0x40, 0xff, 0x5e, 0xce, 0x48, 0x9a, 0x60, 0xe3,
29309 ++ 0x8b, 0x80, 0x8c, 0x38, 0x2d, 0xcb, 0x93, 0x37,
29310 ++ 0x74, 0x05, 0x52, 0x6f, 0x73, 0x3e, 0xc3, 0xbc,
29311 ++ 0xca, 0x72, 0x0a, 0xeb, 0xf1, 0x3b, 0xa0, 0x95,
29312 ++ 0xdc, 0x8a, 0xc4, 0xa9, 0xdc, 0xca, 0x44, 0xd8,
29313 ++ 0x08, 0x63, 0x6a, 0x36, 0xd3, 0x3c, 0xb8, 0xac,
29314 ++ 0x46, 0x7d, 0xfd, 0xaa, 0xeb, 0x3e, 0x0f, 0x45,
29315 ++ 0x8f, 0x49, 0xda, 0x2b, 0xf2, 0x12, 0xbd, 0xaf,
29316 ++ 0x67, 0x8a, 0x63, 0x48, 0x4b, 0x55, 0x5f, 0x6d,
29317 ++ 0x8c, 0xb9, 0x76, 0x34, 0x84, 0xae, 0xc2, 0xfc,
29318 ++ 0x52, 0x64, 0x82, 0xf7, 0xb0, 0x06, 0xf0, 0x45,
29319 ++ 0x73, 0x12, 0x50, 0x30, 0x72, 0xea, 0x78, 0x9a,
29320 ++ 0xa8, 0xaf, 0xb5, 0xe3, 0xbb, 0x77, 0x52, 0xec,
29321 ++ 0x59, 0x84, 0xbf, 0x6b, 0x8f, 0xce, 0x86, 0x5e,
29322 ++ 0x1f, 0x23, 0xe9, 0xfb, 0x08, 0x86, 0xf7, 0x10,
29323 ++ 0xb9, 0xf2, 0x44, 0x96, 0x44, 0x63, 0xa9, 0xa8,
29324 ++ 0x78, 0x00, 0x23, 0xd6, 0xc7, 0xe7, 0x6e, 0x66,
29325 ++ 0x4f, 0xcc, 0xee, 0x15, 0xb3, 0xbd, 0x1d, 0xa0,
29326 ++ 0xe5, 0x9c, 0x1b, 0x24, 0x2c, 0x4d, 0x3c, 0x62,
29327 ++ 0x35, 0x9c, 0x88, 0x59, 0x09, 0xdd, 0x82, 0x1b,
29328 ++ 0xcf, 0x0a, 0x83, 0x6b, 0x3f, 0xae, 0x03, 0xc4,
29329 ++ 0xb4, 0xdd, 0x7e, 0x5b, 0x28, 0x76, 0x25, 0x96,
29330 ++ 0xd9, 0xc9, 0x9d, 0x5f, 0x86, 0xfa, 0xf6, 0xd7,
29331 ++ 0xd2, 0xe6, 0x76, 0x1d, 0x0f, 0xa1, 0xdc, 0x74,
29332 ++ 0x05, 0x1b, 0x1d, 0xe0, 0xcd, 0x16, 0xb0, 0xa8,
29333 ++ 0x8a, 0x34, 0x7b, 0x15, 0x11, 0x77, 0xe5, 0x7b,
29334 ++ 0x7e, 0x20, 0xf7, 0xda, 0x38, 0xda, 0xce, 0x70,
29335 ++ 0xe9, 0xf5, 0x6c, 0xd9, 0xbe, 0x0c, 0x4c, 0x95,
29336 ++ 0x4c, 0xc2, 0x9b, 0x34, 0x55, 0x55, 0xe1, 0xf3,
29337 ++ 0x46, 0x8e, 0x48, 0x74, 0x14, 0x4f, 0x9d, 0xc9,
29338 ++ 0xf5, 0xe8, 0x1a, 0xf0, 0x11, 0x4a, 0xc1, 0x8d,
29339 ++ 0xe0, 0x93, 0xa0, 0xbe, 0x09, 0x1c, 0x2b, 0x4e,
29340 ++ 0x0f, 0xb2, 0x87, 0x8b, 0x84, 0xfe, 0x92, 0x32,
29341 ++ 0x14, 0xd7, 0x93, 0xdf, 0xe7, 0x44, 0xbc, 0xc5,
29342 ++ 0xae, 0x53, 0x69, 0xd8, 0xb3, 0x79, 0x37, 0x80,
29343 ++ 0xe3, 0x17, 0x5c, 0xec, 0x53, 0x00, 0x9a, 0xe3,
29344 ++ 0x8e, 0xdc, 0x38, 0xb8, 0x66, 0xf0, 0xd3, 0xad,
29345 ++ 0x1d, 0x02, 0x96, 0x86, 0x3e, 0x9d, 0x3b, 0x5d,
29346 ++ 0xa5, 0x7f, 0x21, 0x10, 0xf1, 0x1f, 0x13, 0x20,
29347 ++ 0xf9, 0x57, 0x87, 0x20, 0xf5, 0x5f, 0xf1, 0x17,
29348 ++ 0x48, 0x0a, 0x51, 0x5a, 0xcd, 0x19, 0x03, 0xa6,
29349 ++ 0x5a, 0xd1, 0x12, 0x97, 0xe9, 0x48, 0xe2, 0x1d,
29350 ++ 0x83, 0x75, 0x50, 0xd9, 0x75, 0x7d, 0x6a, 0x82,
29351 ++ 0xa1, 0xf9, 0x4e, 0x54, 0x87, 0x89, 0xc9, 0x0c,
29352 ++ 0xb7, 0x5b, 0x6a, 0x91, 0xc1, 0x9c, 0xb2, 0xa9,
29353 ++ 0xdc, 0x9a, 0xa4, 0x49, 0x0a, 0x6d, 0x0d, 0xbb,
29354 ++ 0xde, 0x86, 0x44, 0xdd, 0x5d, 0x89, 0x2b, 0x96,
29355 ++ 0x0f, 0x23, 0x95, 0xad, 0xcc, 0xa2, 0xb3, 0xb9,
29356 ++ 0x7e, 0x74, 0x38, 0xba, 0x9f, 0x73, 0xae, 0x5f,
29357 ++ 0xf8, 0x68, 0xa2, 0xe0, 0xa9, 0xce, 0xbd, 0x40,
29358 ++ 0xd4, 0x4c, 0x6b, 0xd2, 0x56, 0x62, 0xb0, 0xcc,
29359 ++ 0x63, 0x7e, 0x5b, 0xd3, 0xae, 0xd1, 0x75, 0xce,
29360 ++ 0xbb, 0xb4, 0x5b, 0xa8, 0xf8, 0xb4, 0xac, 0x71,
29361 ++ 0x75, 0xaa, 0xc9, 0x9f, 0xbb, 0x6c, 0xad, 0x0f,
29362 ++ 0x55, 0x5d, 0xe8, 0x85, 0x7d, 0xf9, 0x21, 0x35,
29363 ++ 0xea, 0x92, 0x85, 0x2b, 0x00, 0xec, 0x84, 0x90,
29364 ++ 0x0a, 0x63, 0x96, 0xe4, 0x6b, 0xa9, 0x77, 0xb8,
29365 ++ 0x91, 0xf8, 0x46, 0x15, 0x72, 0x63, 0x70, 0x01,
29366 ++ 0x40, 0xa3, 0xa5, 0x76, 0x62, 0x2b, 0xbf, 0xf1,
29367 ++ 0xe5, 0x8d, 0x9f, 0xa3, 0xfa, 0x9b, 0x03, 0xbe,
29368 ++ 0xfe, 0x65, 0x6f, 0xa2, 0x29, 0x0d, 0x54, 0xb4,
29369 ++ 0x71, 0xce, 0xa9, 0xd6, 0x3d, 0x88, 0xf9, 0xaf,
29370 ++ 0x6b, 0xa8, 0x9e, 0xf4, 0x16, 0x96, 0x36, 0xb9,
29371 ++ 0x00, 0xdc, 0x10, 0xab, 0xb5, 0x08, 0x31, 0x1f,
29372 ++ 0x00, 0xb1, 0x3c, 0xd9, 0x38, 0x3e, 0xc6, 0x04,
29373 ++ 0xa7, 0x4e, 0xe8, 0xae, 0xed, 0x98, 0xc2, 0xf7,
29374 ++ 0xb9, 0x00, 0x5f, 0x8c, 0x60, 0xd1, 0xe5, 0x15,
29375 ++ 0xf7, 0xae, 0x1e, 0x84, 0x88, 0xd1, 0xf6, 0xbc,
29376 ++ 0x3a, 0x89, 0x35, 0x22, 0x83, 0x7c, 0xca, 0xf0,
29377 ++ 0x33, 0x82, 0x4c, 0x79, 0x3c, 0xfd, 0xb1, 0xae,
29378 ++ 0x52, 0x62, 0x55, 0xd2, 0x41, 0x60, 0xc6, 0xbb,
29379 ++ 0xfa, 0x0e, 0x59, 0xd6, 0xa8, 0xfe, 0x5d, 0xed,
29380 ++ 0x47, 0x3d, 0xe0, 0xea, 0x1f, 0x6e, 0x43, 0x51,
29381 ++ 0xec, 0x10, 0x52, 0x56, 0x77, 0x42, 0x6b, 0x52,
29382 ++ 0x87, 0xd8, 0xec, 0xe0, 0xaa, 0x76, 0xa5, 0x84,
29383 ++ 0x2a, 0x22, 0x24, 0xfd, 0x92, 0x40, 0x88, 0xd5,
29384 ++ 0x85, 0x1c, 0x1f, 0x6b, 0x47, 0xa0, 0xc4, 0xe4,
29385 ++ 0xef, 0xf4, 0xea, 0xd7, 0x59, 0xac, 0x2a, 0x9e,
29386 ++ 0x8c, 0xfa, 0x1f, 0x42, 0x08, 0xfe, 0x4f, 0x74,
29387 ++ 0xa0, 0x26, 0xf5, 0xb3, 0x84, 0xf6, 0x58, 0x5f,
29388 ++ 0x26, 0x66, 0x3e, 0xd7, 0xe4, 0x22, 0x91, 0x13,
29389 ++ 0xc8, 0xac, 0x25, 0x96, 0x23, 0xd8, 0x09, 0xea,
29390 ++ 0x45, 0x75, 0x23, 0xb8, 0x5f, 0xc2, 0x90, 0x8b,
29391 ++ 0x09, 0xc4, 0xfc, 0x47, 0x6c, 0x6d, 0x0a, 0xef,
29392 ++ 0x69, 0xa4, 0x38, 0x19, 0xcf, 0x7d, 0xf9, 0x09,
29393 ++ 0x73, 0x9b, 0x60, 0x5a, 0xf7, 0x37, 0xb5, 0xfe,
29394 ++ 0x9f, 0xe3, 0x2b, 0x4c, 0x0d, 0x6e, 0x19, 0xf1,
29395 ++ 0xd6, 0xc0, 0x70, 0xf3, 0x9d, 0x22, 0x3c, 0xf9,
29396 ++ 0x49, 0xce, 0x30, 0x8e, 0x44, 0xb5, 0x76, 0x15,
29397 ++ 0x8f, 0x52, 0xfd, 0xa5, 0x04, 0xb8, 0x55, 0x6a,
29398 ++ 0x36, 0x59, 0x7c, 0xc4, 0x48, 0xb8, 0xd7, 0xab,
29399 ++ 0x05, 0x66, 0xe9, 0x5e, 0x21, 0x6f, 0x6b, 0x36,
29400 ++ 0x29, 0xbb, 0xe9, 0xe3, 0xa2, 0x9a, 0xa8, 0xcd,
29401 ++ 0x55, 0x25, 0x11, 0xba, 0x5a, 0x58, 0xa0, 0xde,
29402 ++ 0xae, 0x19, 0x2a, 0x48, 0x5a, 0xff, 0x36, 0xcd,
29403 ++ 0x6d, 0x16, 0x7a, 0x73, 0x38, 0x46, 0xe5, 0x47,
29404 ++ 0x59, 0xc8, 0xa2, 0xf6, 0xe2, 0x6c, 0x83, 0xc5,
29405 ++ 0x36, 0x2c, 0x83, 0x7d, 0xb4, 0x01, 0x05, 0x69,
29406 ++ 0xe7, 0xaf, 0x5c, 0xc4, 0x64, 0x82, 0x12, 0x21,
29407 ++ 0xef, 0xf7, 0xd1, 0x7d, 0xb8, 0x8d, 0x8c, 0x98,
29408 ++ 0x7c, 0x5f, 0x7d, 0x92, 0x88, 0xb9, 0x94, 0x07,
29409 ++ 0x9c, 0xd8, 0xe9, 0x9c, 0x17, 0x38, 0xe3, 0x57,
29410 ++ 0x6c, 0xe0, 0xdc, 0xa5, 0x92, 0x42, 0xb3, 0xbd,
29411 ++ 0x50, 0xa2, 0x7e, 0xb5, 0xb1, 0x52, 0x72, 0x03,
29412 ++ 0x97, 0xd8, 0xaa, 0x9a, 0x1e, 0x75, 0x41, 0x11,
29413 ++ 0xa3, 0x4f, 0xcc, 0xd4, 0xe3, 0x73, 0xad, 0x96,
29414 ++ 0xdc, 0x47, 0x41, 0x9f, 0xb0, 0xbe, 0x79, 0x91,
29415 ++ 0xf5, 0xb6, 0x18, 0xfe, 0xc2, 0x83, 0x18, 0x7d,
29416 ++ 0x73, 0xd9, 0x4f, 0x83, 0x84, 0x03, 0xb3, 0xf0,
29417 ++ 0x77, 0x66, 0x3d, 0x83, 0x63, 0x2e, 0x2c, 0xf9,
29418 ++ 0xdd, 0xa6, 0x1f, 0x89, 0x82, 0xb8, 0x23, 0x42,
29419 ++ 0xeb, 0xe2, 0xca, 0x70, 0x82, 0x61, 0x41, 0x0a,
29420 ++ 0x6d, 0x5f, 0x75, 0xc5, 0xe2, 0xc4, 0x91, 0x18,
29421 ++ 0x44, 0x22, 0xfa, 0x34, 0x10, 0xf5, 0x20, 0xdc,
29422 ++ 0xb7, 0xdd, 0x2a, 0x20, 0x77, 0xf5, 0xf9, 0xce,
29423 ++ 0xdb, 0xa0, 0x0a, 0x52, 0x2a, 0x4e, 0xdd, 0xcc,
29424 ++ 0x97, 0xdf, 0x05, 0xe4, 0x5e, 0xb7, 0xaa, 0xf0,
29425 ++ 0xe2, 0x80, 0xff, 0xba, 0x1a, 0x0f, 0xac, 0xdf,
29426 ++ 0x02, 0x32, 0xe6, 0xf7, 0xc7, 0x17, 0x13, 0xb7,
29427 ++ 0xfc, 0x98, 0x48, 0x8c, 0x0d, 0x82, 0xc9, 0x80,
29428 ++ 0x7a, 0xe2, 0x0a, 0xc5, 0xb4, 0xde, 0x7c, 0x3c,
29429 ++ 0x79, 0x81, 0x0e, 0x28, 0x65, 0x79, 0x67, 0x82,
29430 ++ 0x69, 0x44, 0x66, 0x09, 0xf7, 0x16, 0x1a, 0xf9,
29431 ++ 0x7d, 0x80, 0xa1, 0x79, 0x14, 0xa9, 0xc8, 0x20,
29432 ++ 0xfb, 0xa2, 0x46, 0xbe, 0x08, 0x35, 0x17, 0x58,
29433 ++ 0xc1, 0x1a, 0xda, 0x2a, 0x6b, 0x2e, 0x1e, 0xe6,
29434 ++ 0x27, 0x55, 0x7b, 0x19, 0xe2, 0xfb, 0x64, 0xfc,
29435 ++ 0x5e, 0x15, 0x54, 0x3c, 0xe7, 0xc2, 0x11, 0x50,
29436 ++ 0x30, 0xb8, 0x72, 0x03, 0x0b, 0x1a, 0x9f, 0x86,
29437 ++ 0x27, 0x11, 0x5c, 0x06, 0x2b, 0xbd, 0x75, 0x1a,
29438 ++ 0x0a, 0xda, 0x01, 0xfa, 0x5c, 0x4a, 0xc1, 0x80,
29439 ++ 0x3a, 0x6e, 0x30, 0xc8, 0x2c, 0xeb, 0x56, 0xec,
29440 ++ 0x89, 0xfa, 0x35, 0x7b, 0xb2, 0xf0, 0x97, 0x08,
29441 ++ 0x86, 0x53, 0xbe, 0xbd, 0x40, 0x41, 0x38, 0x1c,
29442 ++ 0xb4, 0x8b, 0x79, 0x2e, 0x18, 0x96, 0x94, 0xde,
29443 ++ 0xe8, 0xca, 0xe5, 0x9f, 0x92, 0x9f, 0x15, 0x5d,
29444 ++ 0x56, 0x60, 0x5c, 0x09, 0xf9, 0x16, 0xf4, 0x17,
29445 ++ 0x0f, 0xf6, 0x4c, 0xda, 0xe6, 0x67, 0x89, 0x9f,
29446 ++ 0xca, 0x6c, 0xe7, 0x9b, 0x04, 0x62, 0x0e, 0x26,
29447 ++ 0xa6, 0x52, 0xbd, 0x29, 0xff, 0xc7, 0xa4, 0x96,
29448 ++ 0xe6, 0x6a, 0x02, 0xa5, 0x2e, 0x7b, 0xfe, 0x97,
29449 ++ 0x68, 0x3e, 0x2e, 0x5f, 0x3b, 0x0f, 0x36, 0xd6,
29450 ++ 0x98, 0x19, 0x59, 0x48, 0xd2, 0xc6, 0xe1, 0x55,
29451 ++ 0x1a, 0x6e, 0xd6, 0xed, 0x2c, 0xba, 0xc3, 0x9e,
29452 ++ 0x64, 0xc9, 0x95, 0x86, 0x35, 0x5e, 0x3e, 0x88,
29453 ++ 0x69, 0x99, 0x4b, 0xee, 0xbe, 0x9a, 0x99, 0xb5,
29454 ++ 0x6e, 0x58, 0xae, 0xdd, 0x22, 0xdb, 0xdd, 0x6b,
29455 ++ 0xfc, 0xaf, 0x90, 0xa3, 0x3d, 0xa4, 0xc1, 0x15,
29456 ++ 0x92, 0x18, 0x8d, 0xd2, 0x4b, 0x7b, 0x06, 0xd1,
29457 ++ 0x37, 0xb5, 0xe2, 0x7c, 0x2c, 0xf0, 0x25, 0xe4,
29458 ++ 0x94, 0x2a, 0xbd, 0xe3, 0x82, 0x70, 0x78, 0xa3,
29459 ++ 0x82, 0x10, 0x5a, 0x90, 0xd7, 0xa4, 0xfa, 0xaf,
29460 ++ 0x1a, 0x88, 0x59, 0xdc, 0x74, 0x12, 0xb4, 0x8e,
29461 ++ 0xd7, 0x19, 0x46, 0xf4, 0x84, 0x69, 0x9f, 0xbb,
29462 ++ 0x70, 0xa8, 0x4c, 0x52, 0x81, 0xa9, 0xff, 0x76,
29463 ++ 0x1c, 0xae, 0xd8, 0x11, 0x3d, 0x7f, 0x7d, 0xc5,
29464 ++ 0x12, 0x59, 0x28, 0x18, 0xc2, 0xa2, 0xb7, 0x1c,
29465 ++ 0x88, 0xf8, 0xd6, 0x1b, 0xa6, 0x7d, 0x9e, 0xde,
29466 ++ 0x29, 0xf8, 0xed, 0xff, 0xeb, 0x92, 0x24, 0x4f,
29467 ++ 0x05, 0xaa, 0xd9, 0x49, 0xba, 0x87, 0x59, 0x51,
29468 ++ 0xc9, 0x20, 0x5c, 0x9b, 0x74, 0xcf, 0x03, 0xd9,
29469 ++ 0x2d, 0x34, 0xc7, 0x5b, 0xa5, 0x40, 0xb2, 0x99,
29470 ++ 0xf5, 0xcb, 0xb4, 0xf6, 0xb7, 0x72, 0x4a, 0xd6,
29471 ++ 0xbd, 0xb0, 0xf3, 0x93, 0xe0, 0x1b, 0xa8, 0x04,
29472 ++ 0x1e, 0x35, 0xd4, 0x80, 0x20, 0xf4, 0x9c, 0x31,
29473 ++ 0x6b, 0x45, 0xb9, 0x15, 0xb0, 0x5e, 0xdd, 0x0a,
29474 ++ 0x33, 0x9c, 0x83, 0xcd, 0x58, 0x89, 0x50, 0x56,
29475 ++ 0xbb, 0x81, 0x00, 0x91, 0x32, 0xf3, 0x1b, 0x3e,
29476 ++ 0xcf, 0x45, 0xe1, 0xf9, 0xe1, 0x2c, 0x26, 0x78,
29477 ++ 0x93, 0x9a, 0x60, 0x46, 0xc9, 0xb5, 0x5e, 0x6a,
29478 ++ 0x28, 0x92, 0x87, 0x3f, 0x63, 0x7b, 0xdb, 0xf7,
29479 ++ 0xd0, 0x13, 0x9d, 0x32, 0x40, 0x5e, 0xcf, 0xfb,
29480 ++ 0x79, 0x68, 0x47, 0x4c, 0xfd, 0x01, 0x17, 0xe6,
29481 ++ 0x97, 0x93, 0x78, 0xbb, 0xa6, 0x27, 0xa3, 0xe8,
29482 ++ 0x1a, 0xe8, 0x94, 0x55, 0x7d, 0x08, 0xe5, 0xdc,
29483 ++ 0x66, 0xa3, 0x69, 0xc8, 0xca, 0xc5, 0xa1, 0x84,
29484 ++ 0x55, 0xde, 0x08, 0x91, 0x16, 0x3a, 0x0c, 0x86,
29485 ++ 0xab, 0x27, 0x2b, 0x64, 0x34, 0x02, 0x6c, 0x76,
29486 ++ 0x8b, 0xc6, 0xaf, 0xcc, 0xe1, 0xd6, 0x8c, 0x2a,
29487 ++ 0x18, 0x3d, 0xa6, 0x1b, 0x37, 0x75, 0x45, 0x73,
29488 ++ 0xc2, 0x75, 0xd7, 0x53, 0x78, 0x3a, 0xd6, 0xe8,
29489 ++ 0x29, 0xd2, 0x4a, 0xa8, 0x1e, 0x82, 0xf6, 0xb6,
29490 ++ 0x81, 0xde, 0x21, 0xed, 0x2b, 0x56, 0xbb, 0xf2,
29491 ++ 0xd0, 0x57, 0xc1, 0x7c, 0xd2, 0x6a, 0xd2, 0x56,
29492 ++ 0xf5, 0x13, 0x5f, 0x1c, 0x6a, 0x0b, 0x74, 0xfb,
29493 ++ 0xe9, 0xfe, 0x9e, 0xea, 0x95, 0xb2, 0x46, 0xab,
29494 ++ 0x0a, 0xfc, 0xfd, 0xf3, 0xbb, 0x04, 0x2b, 0x76,
29495 ++ 0x1b, 0xa4, 0x74, 0xb0, 0xc1, 0x78, 0xc3, 0x69,
29496 ++ 0xe2, 0xb0, 0x01, 0xe1, 0xde, 0x32, 0x4c, 0x8d,
29497 ++ 0x1a, 0xb3, 0x38, 0x08, 0xd5, 0xfc, 0x1f, 0xdc,
29498 ++ 0x0e, 0x2c, 0x9c, 0xb1, 0xa1, 0x63, 0x17, 0x22,
29499 ++ 0xf5, 0x6c, 0x93, 0x70, 0x74, 0x00, 0xf8, 0x39,
29500 ++ 0x01, 0x94, 0xd1, 0x32, 0x23, 0x56, 0x5d, 0xa6,
29501 ++ 0x02, 0x76, 0x76, 0x93, 0xce, 0x2f, 0x19, 0xe9,
29502 ++ 0x17, 0x52, 0xae, 0x6e, 0x2c, 0x6d, 0x61, 0x7f,
29503 ++ 0x3b, 0xaa, 0xe0, 0x52, 0x85, 0xc5, 0x65, 0xc1,
29504 ++ 0xbb, 0x8e, 0x5b, 0x21, 0xd5, 0xc9, 0x78, 0x83,
29505 ++ 0x07, 0x97, 0x4c, 0x62, 0x61, 0x41, 0xd4, 0xfc,
29506 ++ 0xc9, 0x39, 0xe3, 0x9b, 0xd0, 0xcc, 0x75, 0xc4,
29507 ++ 0x97, 0xe6, 0xdd, 0x2a, 0x5f, 0xa6, 0xe8, 0x59,
29508 ++ 0x6c, 0x98, 0xb9, 0x02, 0xe2, 0xa2, 0xd6, 0x68,
29509 ++ 0xee, 0x3b, 0x1d, 0xe3, 0x4d, 0x5b, 0x30, 0xef,
29510 ++ 0x03, 0xf2, 0xeb, 0x18, 0x57, 0x36, 0xe8, 0xa1,
29511 ++ 0xf4, 0x47, 0xfb, 0xcb, 0x8f, 0xcb, 0xc8, 0xf3,
29512 ++ 0x4f, 0x74, 0x9d, 0x9d, 0xb1, 0x8d, 0x14, 0x44,
29513 ++ 0xd9, 0x19, 0xb4, 0x54, 0x4f, 0x75, 0x19, 0x09,
29514 ++ 0xa0, 0x75, 0xbc, 0x3b, 0x82, 0xc6, 0x3f, 0xb8,
29515 ++ 0x83, 0x19, 0x6e, 0xd6, 0x37, 0xfe, 0x6e, 0x8a,
29516 ++ 0x4e, 0xe0, 0x4a, 0xab, 0x7b, 0xc8, 0xb4, 0x1d,
29517 ++ 0xf4, 0xed, 0x27, 0x03, 0x65, 0xa2, 0xa1, 0xae,
29518 ++ 0x11, 0xe7, 0x98, 0x78, 0x48, 0x91, 0xd2, 0xd2,
29519 ++ 0xd4, 0x23, 0x78, 0x50, 0xb1, 0x5b, 0x85, 0x10,
29520 ++ 0x8d, 0xca, 0x5f, 0x0f, 0x71, 0xae, 0x72, 0x9a,
29521 ++ 0xf6, 0x25, 0x19, 0x60, 0x06, 0xf7, 0x10, 0x34,
29522 ++ 0x18, 0x0d, 0xc9, 0x9f, 0x7b, 0x0c, 0x9b, 0x8f,
29523 ++ 0x91, 0x1b, 0x9f, 0xcd, 0x10, 0xee, 0x75, 0xf9,
29524 ++ 0x97, 0x66, 0xfc, 0x4d, 0x33, 0x6e, 0x28, 0x2b,
29525 ++ 0x92, 0x85, 0x4f, 0xab, 0x43, 0x8d, 0x8f, 0x7d,
29526 ++ 0x86, 0xa7, 0xc7, 0xd8, 0xd3, 0x0b, 0x8b, 0x57,
29527 ++ 0xb6, 0x1d, 0x95, 0x0d, 0xe9, 0xbc, 0xd9, 0x03,
29528 ++ 0xd9, 0x10, 0x19, 0xc3, 0x46, 0x63, 0x55, 0x87,
29529 ++ 0x61, 0x79, 0x6c, 0x95, 0x0e, 0x9c, 0xdd, 0xca,
29530 ++ 0xc3, 0xf3, 0x64, 0xf0, 0x7d, 0x76, 0xb7, 0x53,
29531 ++ 0x67, 0x2b, 0x1e, 0x44, 0x56, 0x81, 0xea, 0x8f,
29532 ++ 0x5c, 0x42, 0x16, 0xb8, 0x28, 0xeb, 0x1b, 0x61,
29533 ++ 0x10, 0x1e, 0xbf, 0xec, 0xa8
29534 ++};
29535 ++static const u8 dec_assoc011[] __initconst = {
29536 ++ 0xd6, 0x31, 0xda, 0x5d, 0x42, 0x5e, 0xd7
29537 ++};
29538 ++static const u8 dec_nonce011[] __initconst = {
29539 ++ 0xfd, 0x87, 0xd4, 0xd8, 0x62, 0xfd, 0xec, 0xaa
29540 ++};
29541 ++static const u8 dec_key011[] __initconst = {
29542 ++ 0x35, 0x4e, 0xb5, 0x70, 0x50, 0x42, 0x8a, 0x85,
29543 ++ 0xf2, 0xfb, 0xed, 0x7b, 0xd0, 0x9e, 0x97, 0xca,
29544 ++ 0xfa, 0x98, 0x66, 0x63, 0xee, 0x37, 0xcc, 0x52,
29545 ++ 0xfe, 0xd1, 0xdf, 0x95, 0x15, 0x34, 0x29, 0x38
29546 ++};
29547 ++
29548 ++static const u8 dec_input012[] __initconst = {
29549 ++ 0x52, 0x34, 0xb3, 0x65, 0x3b, 0xb7, 0xe5, 0xd3,
29550 ++ 0xab, 0x49, 0x17, 0x60, 0xd2, 0x52, 0x56, 0xdf,
29551 ++ 0xdf, 0x34, 0x56, 0x82, 0xe2, 0xbe, 0xe5, 0xe1,
29552 ++ 0x28, 0xd1, 0x4e, 0x5f, 0x4f, 0x01, 0x7d, 0x3f,
29553 ++ 0x99, 0x6b, 0x30, 0x6e, 0x1a, 0x7c, 0x4c, 0x8e,
29554 ++ 0x62, 0x81, 0xae, 0x86, 0x3f, 0x6b, 0xd0, 0xb5,
29555 ++ 0xa9, 0xcf, 0x50, 0xf1, 0x02, 0x12, 0xa0, 0x0b,
29556 ++ 0x24, 0xe9, 0xe6, 0x72, 0x89, 0x2c, 0x52, 0x1b,
29557 ++ 0x34, 0x38, 0xf8, 0x75, 0x5f, 0xa0, 0x74, 0xe2,
29558 ++ 0x99, 0xdd, 0xa6, 0x4b, 0x14, 0x50, 0x4e, 0xf1,
29559 ++ 0xbe, 0xd6, 0x9e, 0xdb, 0xb2, 0x24, 0x27, 0x74,
29560 ++ 0x12, 0x4a, 0x78, 0x78, 0x17, 0xa5, 0x58, 0x8e,
29561 ++ 0x2f, 0xf9, 0xf4, 0x8d, 0xee, 0x03, 0x88, 0xae,
29562 ++ 0xb8, 0x29, 0xa1, 0x2f, 0x4b, 0xee, 0x92, 0xbd,
29563 ++ 0x87, 0xb3, 0xce, 0x34, 0x21, 0x57, 0x46, 0x04,
29564 ++ 0x49, 0x0c, 0x80, 0xf2, 0x01, 0x13, 0xa1, 0x55,
29565 ++ 0xb3, 0xff, 0x44, 0x30, 0x3c, 0x1c, 0xd0, 0xef,
29566 ++ 0xbc, 0x18, 0x74, 0x26, 0xad, 0x41, 0x5b, 0x5b,
29567 ++ 0x3e, 0x9a, 0x7a, 0x46, 0x4f, 0x16, 0xd6, 0x74,
29568 ++ 0x5a, 0xb7, 0x3a, 0x28, 0x31, 0xd8, 0xae, 0x26,
29569 ++ 0xac, 0x50, 0x53, 0x86, 0xf2, 0x56, 0xd7, 0x3f,
29570 ++ 0x29, 0xbc, 0x45, 0x68, 0x8e, 0xcb, 0x98, 0x64,
29571 ++ 0xdd, 0xc9, 0xba, 0xb8, 0x4b, 0x7b, 0x82, 0xdd,
29572 ++ 0x14, 0xa7, 0xcb, 0x71, 0x72, 0x00, 0x5c, 0xad,
29573 ++ 0x7b, 0x6a, 0x89, 0xa4, 0x3d, 0xbf, 0xb5, 0x4b,
29574 ++ 0x3e, 0x7c, 0x5a, 0xcf, 0xb8, 0xa1, 0xc5, 0x6e,
29575 ++ 0xc8, 0xb6, 0x31, 0x57, 0x7b, 0xdf, 0xa5, 0x7e,
29576 ++ 0xb1, 0xd6, 0x42, 0x2a, 0x31, 0x36, 0xd1, 0xd0,
29577 ++ 0x3f, 0x7a, 0xe5, 0x94, 0xd6, 0x36, 0xa0, 0x6f,
29578 ++ 0xb7, 0x40, 0x7d, 0x37, 0xc6, 0x55, 0x7c, 0x50,
29579 ++ 0x40, 0x6d, 0x29, 0x89, 0xe3, 0x5a, 0xae, 0x97,
29580 ++ 0xe7, 0x44, 0x49, 0x6e, 0xbd, 0x81, 0x3d, 0x03,
29581 ++ 0x93, 0x06, 0x12, 0x06, 0xe2, 0x41, 0x12, 0x4a,
29582 ++ 0xf1, 0x6a, 0xa4, 0x58, 0xa2, 0xfb, 0xd2, 0x15,
29583 ++ 0xba, 0xc9, 0x79, 0xc9, 0xce, 0x5e, 0x13, 0xbb,
29584 ++ 0xf1, 0x09, 0x04, 0xcc, 0xfd, 0xe8, 0x51, 0x34,
29585 ++ 0x6a, 0xe8, 0x61, 0x88, 0xda, 0xed, 0x01, 0x47,
29586 ++ 0x84, 0xf5, 0x73, 0x25, 0xf9, 0x1c, 0x42, 0x86,
29587 ++ 0x07, 0xf3, 0x5b, 0x1a, 0x01, 0xb3, 0xeb, 0x24,
29588 ++ 0x32, 0x8d, 0xf6, 0xed, 0x7c, 0x4b, 0xeb, 0x3c,
29589 ++ 0x36, 0x42, 0x28, 0xdf, 0xdf, 0xb6, 0xbe, 0xd9,
29590 ++ 0x8c, 0x52, 0xd3, 0x2b, 0x08, 0x90, 0x8c, 0xe7,
29591 ++ 0x98, 0x31, 0xe2, 0x32, 0x8e, 0xfc, 0x11, 0x48,
29592 ++ 0x00, 0xa8, 0x6a, 0x42, 0x4a, 0x02, 0xc6, 0x4b,
29593 ++ 0x09, 0xf1, 0xe3, 0x49, 0xf3, 0x45, 0x1f, 0x0e,
29594 ++ 0xbc, 0x56, 0xe2, 0xe4, 0xdf, 0xfb, 0xeb, 0x61,
29595 ++ 0xfa, 0x24, 0xc1, 0x63, 0x75, 0xbb, 0x47, 0x75,
29596 ++ 0xaf, 0xe1, 0x53, 0x16, 0x96, 0x21, 0x85, 0x26,
29597 ++ 0x11, 0xb3, 0x76, 0xe3, 0x23, 0xa1, 0x6b, 0x74,
29598 ++ 0x37, 0xd0, 0xde, 0x06, 0x90, 0x71, 0x5d, 0x43,
29599 ++ 0x88, 0x9b, 0x00, 0x54, 0xa6, 0x75, 0x2f, 0xa1,
29600 ++ 0xc2, 0x0b, 0x73, 0x20, 0x1d, 0xb6, 0x21, 0x79,
29601 ++ 0x57, 0x3f, 0xfa, 0x09, 0xbe, 0x8a, 0x33, 0xc3,
29602 ++ 0x52, 0xf0, 0x1d, 0x82, 0x31, 0xd1, 0x55, 0xb5,
29603 ++ 0x6c, 0x99, 0x25, 0xcf, 0x5c, 0x32, 0xce, 0xe9,
29604 ++ 0x0d, 0xfa, 0x69, 0x2c, 0xd5, 0x0d, 0xc5, 0x6d,
29605 ++ 0x86, 0xd0, 0x0c, 0x3b, 0x06, 0x50, 0x79, 0xe8,
29606 ++ 0xc3, 0xae, 0x04, 0xe6, 0xcd, 0x51, 0xe4, 0x26,
29607 ++ 0x9b, 0x4f, 0x7e, 0xa6, 0x0f, 0xab, 0xd8, 0xe5,
29608 ++ 0xde, 0xa9, 0x00, 0x95, 0xbe, 0xa3, 0x9d, 0x5d,
29609 ++ 0xb2, 0x09, 0x70, 0x18, 0x1c, 0xf0, 0xac, 0x29,
29610 ++ 0x23, 0x02, 0x29, 0x28, 0xd2, 0x74, 0x35, 0x57,
29611 ++ 0x62, 0x0f, 0x24, 0xea, 0x5e, 0x33, 0xc2, 0x92,
29612 ++ 0xf3, 0x78, 0x4d, 0x30, 0x1e, 0xa1, 0x99, 0xa9,
29613 ++ 0x82, 0xb0, 0x42, 0x31, 0x8d, 0xad, 0x8a, 0xbc,
29614 ++ 0xfc, 0xd4, 0x57, 0x47, 0x3e, 0xb4, 0x50, 0xdd,
29615 ++ 0x6e, 0x2c, 0x80, 0x4d, 0x22, 0xf1, 0xfb, 0x57,
29616 ++ 0xc4, 0xdd, 0x17, 0xe1, 0x8a, 0x36, 0x4a, 0xb3,
29617 ++ 0x37, 0xca, 0xc9, 0x4e, 0xab, 0xd5, 0x69, 0xc4,
29618 ++ 0xf4, 0xbc, 0x0b, 0x3b, 0x44, 0x4b, 0x29, 0x9c,
29619 ++ 0xee, 0xd4, 0x35, 0x22, 0x21, 0xb0, 0x1f, 0x27,
29620 ++ 0x64, 0xa8, 0x51, 0x1b, 0xf0, 0x9f, 0x19, 0x5c,
29621 ++ 0xfb, 0x5a, 0x64, 0x74, 0x70, 0x45, 0x09, 0xf5,
29622 ++ 0x64, 0xfe, 0x1a, 0x2d, 0xc9, 0x14, 0x04, 0x14,
29623 ++ 0xcf, 0xd5, 0x7d, 0x60, 0xaf, 0x94, 0x39, 0x94,
29624 ++ 0xe2, 0x7d, 0x79, 0x82, 0xd0, 0x65, 0x3b, 0x6b,
29625 ++ 0x9c, 0x19, 0x84, 0xb4, 0x6d, 0xb3, 0x0c, 0x99,
29626 ++ 0xc0, 0x56, 0xa8, 0xbd, 0x73, 0xce, 0x05, 0x84,
29627 ++ 0x3e, 0x30, 0xaa, 0xc4, 0x9b, 0x1b, 0x04, 0x2a,
29628 ++ 0x9f, 0xd7, 0x43, 0x2b, 0x23, 0xdf, 0xbf, 0xaa,
29629 ++ 0xd5, 0xc2, 0x43, 0x2d, 0x70, 0xab, 0xdc, 0x75,
29630 ++ 0xad, 0xac, 0xf7, 0xc0, 0xbe, 0x67, 0xb2, 0x74,
29631 ++ 0xed, 0x67, 0x10, 0x4a, 0x92, 0x60, 0xc1, 0x40,
29632 ++ 0x50, 0x19, 0x8a, 0x8a, 0x8c, 0x09, 0x0e, 0x72,
29633 ++ 0xe1, 0x73, 0x5e, 0xe8, 0x41, 0x85, 0x63, 0x9f,
29634 ++ 0x3f, 0xd7, 0x7d, 0xc4, 0xfb, 0x22, 0x5d, 0x92,
29635 ++ 0x6c, 0xb3, 0x1e, 0xe2, 0x50, 0x2f, 0x82, 0xa8,
29636 ++ 0x28, 0xc0, 0xb5, 0xd7, 0x5f, 0x68, 0x0d, 0x2c,
29637 ++ 0x2d, 0xaf, 0x7e, 0xfa, 0x2e, 0x08, 0x0f, 0x1f,
29638 ++ 0x70, 0x9f, 0xe9, 0x19, 0x72, 0x55, 0xf8, 0xfb,
29639 ++ 0x51, 0xd2, 0x33, 0x5d, 0xa0, 0xd3, 0x2b, 0x0a,
29640 ++ 0x6c, 0xbc, 0x4e, 0xcf, 0x36, 0x4d, 0xdc, 0x3b,
29641 ++ 0xe9, 0x3e, 0x81, 0x7c, 0x61, 0xdb, 0x20, 0x2d,
29642 ++ 0x3a, 0xc3, 0xb3, 0x0c, 0x1e, 0x00, 0xb9, 0x7c,
29643 ++ 0xf5, 0xca, 0x10, 0x5f, 0x3a, 0x71, 0xb3, 0xe4,
29644 ++ 0x20, 0xdb, 0x0c, 0x2a, 0x98, 0x63, 0x45, 0x00,
29645 ++ 0x58, 0xf6, 0x68, 0xe4, 0x0b, 0xda, 0x13, 0x3b,
29646 ++ 0x60, 0x5c, 0x76, 0xdb, 0xb9, 0x97, 0x71, 0xe4,
29647 ++ 0xd9, 0xb7, 0xdb, 0xbd, 0x68, 0xc7, 0x84, 0x84,
29648 ++ 0xaa, 0x7c, 0x68, 0x62, 0x5e, 0x16, 0xfc, 0xba,
29649 ++ 0x72, 0xaa, 0x9a, 0xa9, 0xeb, 0x7c, 0x75, 0x47,
29650 ++ 0x97, 0x7e, 0xad, 0xe2, 0xd9, 0x91, 0xe8, 0xe4,
29651 ++ 0xa5, 0x31, 0xd7, 0x01, 0x8e, 0xa2, 0x11, 0x88,
29652 ++ 0x95, 0xb9, 0xf2, 0x9b, 0xd3, 0x7f, 0x1b, 0x81,
29653 ++ 0x22, 0xf7, 0x98, 0x60, 0x0a, 0x64, 0xa6, 0xc1,
29654 ++ 0xf6, 0x49, 0xc7, 0xe3, 0x07, 0x4d, 0x94, 0x7a,
29655 ++ 0xcf, 0x6e, 0x68, 0x0c, 0x1b, 0x3f, 0x6e, 0x2e,
29656 ++ 0xee, 0x92, 0xfa, 0x52, 0xb3, 0x59, 0xf8, 0xf1,
29657 ++ 0x8f, 0x6a, 0x66, 0xa3, 0x82, 0x76, 0x4a, 0x07,
29658 ++ 0x1a, 0xc7, 0xdd, 0xf5, 0xda, 0x9c, 0x3c, 0x24,
29659 ++ 0xbf, 0xfd, 0x42, 0xa1, 0x10, 0x64, 0x6a, 0x0f,
29660 ++ 0x89, 0xee, 0x36, 0xa5, 0xce, 0x99, 0x48, 0x6a,
29661 ++ 0xf0, 0x9f, 0x9e, 0x69, 0xa4, 0x40, 0x20, 0xe9,
29662 ++ 0x16, 0x15, 0xf7, 0xdb, 0x75, 0x02, 0xcb, 0xe9,
29663 ++ 0x73, 0x8b, 0x3b, 0x49, 0x2f, 0xf0, 0xaf, 0x51,
29664 ++ 0x06, 0x5c, 0xdf, 0x27, 0x27, 0x49, 0x6a, 0xd1,
29665 ++ 0xcc, 0xc7, 0xb5, 0x63, 0xb5, 0xfc, 0xb8, 0x5c,
29666 ++ 0x87, 0x7f, 0x84, 0xb4, 0xcc, 0x14, 0xa9, 0x53,
29667 ++ 0xda, 0xa4, 0x56, 0xf8, 0xb6, 0x1b, 0xcc, 0x40,
29668 ++ 0x27, 0x52, 0x06, 0x5a, 0x13, 0x81, 0xd7, 0x3a,
29669 ++ 0xd4, 0x3b, 0xfb, 0x49, 0x65, 0x31, 0x33, 0xb2,
29670 ++ 0xfa, 0xcd, 0xad, 0x58, 0x4e, 0x2b, 0xae, 0xd2,
29671 ++ 0x20, 0xfb, 0x1a, 0x48, 0xb4, 0x3f, 0x9a, 0xd8,
29672 ++ 0x7a, 0x35, 0x4a, 0xc8, 0xee, 0x88, 0x5e, 0x07,
29673 ++ 0x66, 0x54, 0xb9, 0xec, 0x9f, 0xa3, 0xe3, 0xb9,
29674 ++ 0x37, 0xaa, 0x49, 0x76, 0x31, 0xda, 0x74, 0x2d,
29675 ++ 0x3c, 0xa4, 0x65, 0x10, 0x32, 0x38, 0xf0, 0xde,
29676 ++ 0xd3, 0x99, 0x17, 0xaa, 0x71, 0xaa, 0x8f, 0x0f,
29677 ++ 0x8c, 0xaf, 0xa2, 0xf8, 0x5d, 0x64, 0xba, 0x1d,
29678 ++ 0xa3, 0xef, 0x96, 0x73, 0xe8, 0xa1, 0x02, 0x8d,
29679 ++ 0x0c, 0x6d, 0xb8, 0x06, 0x90, 0xb8, 0x08, 0x56,
29680 ++ 0x2c, 0xa7, 0x06, 0xc9, 0xc2, 0x38, 0xdb, 0x7c,
29681 ++ 0x63, 0xb1, 0x57, 0x8e, 0xea, 0x7c, 0x79, 0xf3,
29682 ++ 0x49, 0x1d, 0xfe, 0x9f, 0xf3, 0x6e, 0xb1, 0x1d,
29683 ++ 0xba, 0x19, 0x80, 0x1a, 0x0a, 0xd3, 0xb0, 0x26,
29684 ++ 0x21, 0x40, 0xb1, 0x7c, 0xf9, 0x4d, 0x8d, 0x10,
29685 ++ 0xc1, 0x7e, 0xf4, 0xf6, 0x3c, 0xa8, 0xfd, 0x7c,
29686 ++ 0xa3, 0x92, 0xb2, 0x0f, 0xaa, 0xcc, 0xa6, 0x11,
29687 ++ 0xfe, 0x04, 0xe3, 0xd1, 0x7a, 0x32, 0x89, 0xdf,
29688 ++ 0x0d, 0xc4, 0x8f, 0x79, 0x6b, 0xca, 0x16, 0x7c,
29689 ++ 0x6e, 0xf9, 0xad, 0x0f, 0xf6, 0xfe, 0x27, 0xdb,
29690 ++ 0xc4, 0x13, 0x70, 0xf1, 0x62, 0x1a, 0x4f, 0x79,
29691 ++ 0x40, 0xc9, 0x9b, 0x8b, 0x21, 0xea, 0x84, 0xfa,
29692 ++ 0xf5, 0xf1, 0x89, 0xce, 0xb7, 0x55, 0x0a, 0x80,
29693 ++ 0x39, 0x2f, 0x55, 0x36, 0x16, 0x9c, 0x7b, 0x08,
29694 ++ 0xbd, 0x87, 0x0d, 0xa5, 0x32, 0xf1, 0x52, 0x7c,
29695 ++ 0xe8, 0x55, 0x60, 0x5b, 0xd7, 0x69, 0xe4, 0xfc,
29696 ++ 0xfa, 0x12, 0x85, 0x96, 0xea, 0x50, 0x28, 0xab,
29697 ++ 0x8a, 0xf7, 0xbb, 0x0e, 0x53, 0x74, 0xca, 0xa6,
29698 ++ 0x27, 0x09, 0xc2, 0xb5, 0xde, 0x18, 0x14, 0xd9,
29699 ++ 0xea, 0xe5, 0x29, 0x1c, 0x40, 0x56, 0xcf, 0xd7,
29700 ++ 0xae, 0x05, 0x3f, 0x65, 0xaf, 0x05, 0x73, 0xe2,
29701 ++ 0x35, 0x96, 0x27, 0x07, 0x14, 0xc0, 0xad, 0x33,
29702 ++ 0xf1, 0xdc, 0x44, 0x7a, 0x89, 0x17, 0x77, 0xd2,
29703 ++ 0x9c, 0x58, 0x60, 0xf0, 0x3f, 0x7b, 0x2d, 0x2e,
29704 ++ 0x57, 0x95, 0x54, 0x87, 0xed, 0xf2, 0xc7, 0x4c,
29705 ++ 0xf0, 0xae, 0x56, 0x29, 0x19, 0x7d, 0x66, 0x4b,
29706 ++ 0x9b, 0x83, 0x84, 0x42, 0x3b, 0x01, 0x25, 0x66,
29707 ++ 0x8e, 0x02, 0xde, 0xb9, 0x83, 0x54, 0x19, 0xf6,
29708 ++ 0x9f, 0x79, 0x0d, 0x67, 0xc5, 0x1d, 0x7a, 0x44,
29709 ++ 0x02, 0x98, 0xa7, 0x16, 0x1c, 0x29, 0x0d, 0x74,
29710 ++ 0xff, 0x85, 0x40, 0x06, 0xef, 0x2c, 0xa9, 0xc6,
29711 ++ 0xf5, 0x53, 0x07, 0x06, 0xae, 0xe4, 0xfa, 0x5f,
29712 ++ 0xd8, 0x39, 0x4d, 0xf1, 0x9b, 0x6b, 0xd9, 0x24,
29713 ++ 0x84, 0xfe, 0x03, 0x4c, 0xb2, 0x3f, 0xdf, 0xa1,
29714 ++ 0x05, 0x9e, 0x50, 0x14, 0x5a, 0xd9, 0x1a, 0xa2,
29715 ++ 0xa7, 0xfa, 0xfa, 0x17, 0xf7, 0x78, 0xd6, 0xb5,
29716 ++ 0x92, 0x61, 0x91, 0xac, 0x36, 0xfa, 0x56, 0x0d,
29717 ++ 0x38, 0x32, 0x18, 0x85, 0x08, 0x58, 0x37, 0xf0,
29718 ++ 0x4b, 0xdb, 0x59, 0xe7, 0xa4, 0x34, 0xc0, 0x1b,
29719 ++ 0x01, 0xaf, 0x2d, 0xde, 0xa1, 0xaa, 0x5d, 0xd3,
29720 ++ 0xec, 0xe1, 0xd4, 0xf7, 0xe6, 0x54, 0x68, 0xf0,
29721 ++ 0x51, 0x97, 0xa7, 0x89, 0xea, 0x24, 0xad, 0xd3,
29722 ++ 0x6e, 0x47, 0x93, 0x8b, 0x4b, 0xb4, 0xf7, 0x1c,
29723 ++ 0x42, 0x06, 0x67, 0xe8, 0x99, 0xf6, 0xf5, 0x7b,
29724 ++ 0x85, 0xb5, 0x65, 0xb5, 0xb5, 0xd2, 0x37, 0xf5,
29725 ++ 0xf3, 0x02, 0xa6, 0x4d, 0x11, 0xa7, 0xdc, 0x51,
29726 ++ 0x09, 0x7f, 0xa0, 0xd8, 0x88, 0x1c, 0x13, 0x71,
29727 ++ 0xae, 0x9c, 0xb7, 0x7b, 0x34, 0xd6, 0x4e, 0x68,
29728 ++ 0x26, 0x83, 0x51, 0xaf, 0x1d, 0xee, 0x8b, 0xbb,
29729 ++ 0x69, 0x43, 0x2b, 0x9e, 0x8a, 0xbc, 0x02, 0x0e,
29730 ++ 0xa0, 0x1b, 0xe0, 0xa8, 0x5f, 0x6f, 0xaf, 0x1b,
29731 ++ 0x8f, 0xe7, 0x64, 0x71, 0x74, 0x11, 0x7e, 0xa8,
29732 ++ 0xd8, 0xf9, 0x97, 0x06, 0xc3, 0xb6, 0xfb, 0xfb,
29733 ++ 0xb7, 0x3d, 0x35, 0x9d, 0x3b, 0x52, 0xed, 0x54,
29734 ++ 0xca, 0xf4, 0x81, 0x01, 0x2d, 0x1b, 0xc3, 0xa7,
29735 ++ 0x00, 0x3d, 0x1a, 0x39, 0x54, 0xe1, 0xf6, 0xff,
29736 ++ 0xed, 0x6f, 0x0b, 0x5a, 0x68, 0xda, 0x58, 0xdd,
29737 ++ 0xa9, 0xcf, 0x5c, 0x4a, 0xe5, 0x09, 0x4e, 0xde,
29738 ++ 0x9d, 0xbc, 0x3e, 0xee, 0x5a, 0x00, 0x3b, 0x2c,
29739 ++ 0x87, 0x10, 0x65, 0x60, 0xdd, 0xd7, 0x56, 0xd1,
29740 ++ 0x4c, 0x64, 0x45, 0xe4, 0x21, 0xec, 0x78, 0xf8,
29741 ++ 0x25, 0x7a, 0x3e, 0x16, 0x5d, 0x09, 0x53, 0x14,
29742 ++ 0xbe, 0x4f, 0xae, 0x87, 0xd8, 0xd1, 0xaa, 0x3c,
29743 ++ 0xf6, 0x3e, 0xa4, 0x70, 0x8c, 0x5e, 0x70, 0xa4,
29744 ++ 0xb3, 0x6b, 0x66, 0x73, 0xd3, 0xbf, 0x31, 0x06,
29745 ++ 0x19, 0x62, 0x93, 0x15, 0xf2, 0x86, 0xe4, 0x52,
29746 ++ 0x7e, 0x53, 0x4c, 0x12, 0x38, 0xcc, 0x34, 0x7d,
29747 ++ 0x57, 0xf6, 0x42, 0x93, 0x8a, 0xc4, 0xee, 0x5c,
29748 ++ 0x8a, 0xe1, 0x52, 0x8f, 0x56, 0x64, 0xf6, 0xa6,
29749 ++ 0xd1, 0x91, 0x57, 0x70, 0xcd, 0x11, 0x76, 0xf5,
29750 ++ 0x59, 0x60, 0x60, 0x3c, 0xc1, 0xc3, 0x0b, 0x7f,
29751 ++ 0x58, 0x1a, 0x50, 0x91, 0xf1, 0x68, 0x8f, 0x6e,
29752 ++ 0x74, 0x74, 0xa8, 0x51, 0x0b, 0xf7, 0x7a, 0x98,
29753 ++ 0x37, 0xf2, 0x0a, 0x0e, 0xa4, 0x97, 0x04, 0xb8,
29754 ++ 0x9b, 0xfd, 0xa0, 0xea, 0xf7, 0x0d, 0xe1, 0xdb,
29755 ++ 0x03, 0xf0, 0x31, 0x29, 0xf8, 0xdd, 0x6b, 0x8b,
29756 ++ 0x5d, 0xd8, 0x59, 0xa9, 0x29, 0xcf, 0x9a, 0x79,
29757 ++ 0x89, 0x19, 0x63, 0x46, 0x09, 0x79, 0x6a, 0x11,
29758 ++ 0xda, 0x63, 0x68, 0x48, 0x77, 0x23, 0xfb, 0x7d,
29759 ++ 0x3a, 0x43, 0xcb, 0x02, 0x3b, 0x7a, 0x6d, 0x10,
29760 ++ 0x2a, 0x9e, 0xac, 0xf1, 0xd4, 0x19, 0xf8, 0x23,
29761 ++ 0x64, 0x1d, 0x2c, 0x5f, 0xf2, 0xb0, 0x5c, 0x23,
29762 ++ 0x27, 0xf7, 0x27, 0x30, 0x16, 0x37, 0xb1, 0x90,
29763 ++ 0xab, 0x38, 0xfb, 0x55, 0xcd, 0x78, 0x58, 0xd4,
29764 ++ 0x7d, 0x43, 0xf6, 0x45, 0x5e, 0x55, 0x8d, 0xb1,
29765 ++ 0x02, 0x65, 0x58, 0xb4, 0x13, 0x4b, 0x36, 0xf7,
29766 ++ 0xcc, 0xfe, 0x3d, 0x0b, 0x82, 0xe2, 0x12, 0x11,
29767 ++ 0xbb, 0xe6, 0xb8, 0x3a, 0x48, 0x71, 0xc7, 0x50,
29768 ++ 0x06, 0x16, 0x3a, 0xe6, 0x7c, 0x05, 0xc7, 0xc8,
29769 ++ 0x4d, 0x2f, 0x08, 0x6a, 0x17, 0x9a, 0x95, 0x97,
29770 ++ 0x50, 0x68, 0xdc, 0x28, 0x18, 0xc4, 0x61, 0x38,
29771 ++ 0xb9, 0xe0, 0x3e, 0x78, 0xdb, 0x29, 0xe0, 0x9f,
29772 ++ 0x52, 0xdd, 0xf8, 0x4f, 0x91, 0xc1, 0xd0, 0x33,
29773 ++ 0xa1, 0x7a, 0x8e, 0x30, 0x13, 0x82, 0x07, 0x9f,
29774 ++ 0xd3, 0x31, 0x0f, 0x23, 0xbe, 0x32, 0x5a, 0x75,
29775 ++ 0xcf, 0x96, 0xb2, 0xec, 0xb5, 0x32, 0xac, 0x21,
29776 ++ 0xd1, 0x82, 0x33, 0xd3, 0x15, 0x74, 0xbd, 0x90,
29777 ++ 0xf1, 0x2c, 0xe6, 0x5f, 0x8d, 0xe3, 0x02, 0xe8,
29778 ++ 0xe9, 0xc4, 0xca, 0x96, 0xeb, 0x0e, 0xbc, 0x91,
29779 ++ 0xf4, 0xb9, 0xea, 0xd9, 0x1b, 0x75, 0xbd, 0xe1,
29780 ++ 0xac, 0x2a, 0x05, 0x37, 0x52, 0x9b, 0x1b, 0x3f,
29781 ++ 0x5a, 0xdc, 0x21, 0xc3, 0x98, 0xbb, 0xaf, 0xa3,
29782 ++ 0xf2, 0x00, 0xbf, 0x0d, 0x30, 0x89, 0x05, 0xcc,
29783 ++ 0xa5, 0x76, 0xf5, 0x06, 0xf0, 0xc6, 0x54, 0x8a,
29784 ++ 0x5d, 0xd4, 0x1e, 0xc1, 0xf2, 0xce, 0xb0, 0x62,
29785 ++ 0xc8, 0xfc, 0x59, 0x42, 0x9a, 0x90, 0x60, 0x55,
29786 ++ 0xfe, 0x88, 0xa5, 0x8b, 0xb8, 0x33, 0x0c, 0x23,
29787 ++ 0x24, 0x0d, 0x15, 0x70, 0x37, 0x1e, 0x3d, 0xf6,
29788 ++ 0xd2, 0xea, 0x92, 0x10, 0xb2, 0xc4, 0x51, 0xac,
29789 ++ 0xf2, 0xac, 0xf3, 0x6b, 0x6c, 0xaa, 0xcf, 0x12,
29790 ++ 0xc5, 0x6c, 0x90, 0x50, 0xb5, 0x0c, 0xfc, 0x1a,
29791 ++ 0x15, 0x52, 0xe9, 0x26, 0xc6, 0x52, 0xa4, 0xe7,
29792 ++ 0x81, 0x69, 0xe1, 0xe7, 0x9e, 0x30, 0x01, 0xec,
29793 ++ 0x84, 0x89, 0xb2, 0x0d, 0x66, 0xdd, 0xce, 0x28,
29794 ++ 0x5c, 0xec, 0x98, 0x46, 0x68, 0x21, 0x9f, 0x88,
29795 ++ 0x3f, 0x1f, 0x42, 0x77, 0xce, 0xd0, 0x61, 0xd4,
29796 ++ 0x20, 0xa7, 0xff, 0x53, 0xad, 0x37, 0xd0, 0x17,
29797 ++ 0x35, 0xc9, 0xfc, 0xba, 0x0a, 0x78, 0x3f, 0xf2,
29798 ++ 0xcc, 0x86, 0x89, 0xe8, 0x4b, 0x3c, 0x48, 0x33,
29799 ++ 0x09, 0x7f, 0xc6, 0xc0, 0xdd, 0xb8, 0xfd, 0x7a,
29800 ++ 0x66, 0x66, 0x65, 0xeb, 0x47, 0xa7, 0x04, 0x28,
29801 ++ 0xa3, 0x19, 0x8e, 0xa9, 0xb1, 0x13, 0x67, 0x62,
29802 ++ 0x70, 0xcf, 0xd6
29803 ++};
29804 ++static const u8 dec_output012[] __initconst = {
29805 ++ 0x74, 0xa6, 0x3e, 0xe4, 0xb1, 0xcb, 0xaf, 0xb0,
29806 ++ 0x40, 0xe5, 0x0f, 0x9e, 0xf1, 0xf2, 0x89, 0xb5,
29807 ++ 0x42, 0x34, 0x8a, 0xa1, 0x03, 0xb7, 0xe9, 0x57,
29808 ++ 0x46, 0xbe, 0x20, 0xe4, 0x6e, 0xb0, 0xeb, 0xff,
29809 ++ 0xea, 0x07, 0x7e, 0xef, 0xe2, 0x55, 0x9f, 0xe5,
29810 ++ 0x78, 0x3a, 0xb7, 0x83, 0xc2, 0x18, 0x40, 0x7b,
29811 ++ 0xeb, 0xcd, 0x81, 0xfb, 0x90, 0x12, 0x9e, 0x46,
29812 ++ 0xa9, 0xd6, 0x4a, 0xba, 0xb0, 0x62, 0xdb, 0x6b,
29813 ++ 0x99, 0xc4, 0xdb, 0x54, 0x4b, 0xb8, 0xa5, 0x71,
29814 ++ 0xcb, 0xcd, 0x63, 0x32, 0x55, 0xfb, 0x31, 0xf0,
29815 ++ 0x38, 0xf5, 0xbe, 0x78, 0xe4, 0x45, 0xce, 0x1b,
29816 ++ 0x6a, 0x5b, 0x0e, 0xf4, 0x16, 0xe4, 0xb1, 0x3d,
29817 ++ 0xf6, 0x63, 0x7b, 0xa7, 0x0c, 0xde, 0x6f, 0x8f,
29818 ++ 0x74, 0xdf, 0xe0, 0x1e, 0x9d, 0xce, 0x8f, 0x24,
29819 ++ 0xef, 0x23, 0x35, 0x33, 0x7b, 0x83, 0x34, 0x23,
29820 ++ 0x58, 0x74, 0x14, 0x77, 0x1f, 0xc2, 0x4f, 0x4e,
29821 ++ 0xc6, 0x89, 0xf9, 0x52, 0x09, 0x37, 0x64, 0x14,
29822 ++ 0xc4, 0x01, 0x6b, 0x9d, 0x77, 0xe8, 0x90, 0x5d,
29823 ++ 0xa8, 0x4a, 0x2a, 0xef, 0x5c, 0x7f, 0xeb, 0xbb,
29824 ++ 0xb2, 0xc6, 0x93, 0x99, 0x66, 0xdc, 0x7f, 0xd4,
29825 ++ 0x9e, 0x2a, 0xca, 0x8d, 0xdb, 0xe7, 0x20, 0xcf,
29826 ++ 0xe4, 0x73, 0xae, 0x49, 0x7d, 0x64, 0x0f, 0x0e,
29827 ++ 0x28, 0x46, 0xa9, 0xa8, 0x32, 0xe4, 0x0e, 0xf6,
29828 ++ 0x51, 0x53, 0xb8, 0x3c, 0xb1, 0xff, 0xa3, 0x33,
29829 ++ 0x41, 0x75, 0xff, 0xf1, 0x6f, 0xf1, 0xfb, 0xbb,
29830 ++ 0x83, 0x7f, 0x06, 0x9b, 0xe7, 0x1b, 0x0a, 0xe0,
29831 ++ 0x5c, 0x33, 0x60, 0x5b, 0xdb, 0x5b, 0xed, 0xfe,
29832 ++ 0xa5, 0x16, 0x19, 0x72, 0xa3, 0x64, 0x23, 0x00,
29833 ++ 0x02, 0xc7, 0xf3, 0x6a, 0x81, 0x3e, 0x44, 0x1d,
29834 ++ 0x79, 0x15, 0x5f, 0x9a, 0xde, 0xe2, 0xfd, 0x1b,
29835 ++ 0x73, 0xc1, 0xbc, 0x23, 0xba, 0x31, 0xd2, 0x50,
29836 ++ 0xd5, 0xad, 0x7f, 0x74, 0xa7, 0xc9, 0xf8, 0x3e,
29837 ++ 0x2b, 0x26, 0x10, 0xf6, 0x03, 0x36, 0x74, 0xe4,
29838 ++ 0x0e, 0x6a, 0x72, 0xb7, 0x73, 0x0a, 0x42, 0x28,
29839 ++ 0xc2, 0xad, 0x5e, 0x03, 0xbe, 0xb8, 0x0b, 0xa8,
29840 ++ 0x5b, 0xd4, 0xb8, 0xba, 0x52, 0x89, 0xb1, 0x9b,
29841 ++ 0xc1, 0xc3, 0x65, 0x87, 0xed, 0xa5, 0xf4, 0x86,
29842 ++ 0xfd, 0x41, 0x80, 0x91, 0x27, 0x59, 0x53, 0x67,
29843 ++ 0x15, 0x78, 0x54, 0x8b, 0x2d, 0x3d, 0xc7, 0xff,
29844 ++ 0x02, 0x92, 0x07, 0x5f, 0x7a, 0x4b, 0x60, 0x59,
29845 ++ 0x3c, 0x6f, 0x5c, 0xd8, 0xec, 0x95, 0xd2, 0xfe,
29846 ++ 0xa0, 0x3b, 0xd8, 0x3f, 0xd1, 0x69, 0xa6, 0xd6,
29847 ++ 0x41, 0xb2, 0xf4, 0x4d, 0x12, 0xf4, 0x58, 0x3e,
29848 ++ 0x66, 0x64, 0x80, 0x31, 0x9b, 0xa8, 0x4c, 0x8b,
29849 ++ 0x07, 0xb2, 0xec, 0x66, 0x94, 0x66, 0x47, 0x50,
29850 ++ 0x50, 0x5f, 0x18, 0x0b, 0x0e, 0xd6, 0xc0, 0x39,
29851 ++ 0x21, 0x13, 0x9e, 0x33, 0xbc, 0x79, 0x36, 0x02,
29852 ++ 0x96, 0x70, 0xf0, 0x48, 0x67, 0x2f, 0x26, 0xe9,
29853 ++ 0x6d, 0x10, 0xbb, 0xd6, 0x3f, 0xd1, 0x64, 0x7a,
29854 ++ 0x2e, 0xbe, 0x0c, 0x61, 0xf0, 0x75, 0x42, 0x38,
29855 ++ 0x23, 0xb1, 0x9e, 0x9f, 0x7c, 0x67, 0x66, 0xd9,
29856 ++ 0x58, 0x9a, 0xf1, 0xbb, 0x41, 0x2a, 0x8d, 0x65,
29857 ++ 0x84, 0x94, 0xfc, 0xdc, 0x6a, 0x50, 0x64, 0xdb,
29858 ++ 0x56, 0x33, 0x76, 0x00, 0x10, 0xed, 0xbe, 0xd2,
29859 ++ 0x12, 0xf6, 0xf6, 0x1b, 0xa2, 0x16, 0xde, 0xae,
29860 ++ 0x31, 0x95, 0xdd, 0xb1, 0x08, 0x7e, 0x4e, 0xee,
29861 ++ 0xe7, 0xf9, 0xa5, 0xfb, 0x5b, 0x61, 0x43, 0x00,
29862 ++ 0x40, 0xf6, 0x7e, 0x02, 0x04, 0x32, 0x4e, 0x0c,
29863 ++ 0xe2, 0x66, 0x0d, 0xd7, 0x07, 0x98, 0x0e, 0xf8,
29864 ++ 0x72, 0x34, 0x6d, 0x95, 0x86, 0xd7, 0xcb, 0x31,
29865 ++ 0x54, 0x47, 0xd0, 0x38, 0x29, 0x9c, 0x5a, 0x68,
29866 ++ 0xd4, 0x87, 0x76, 0xc9, 0xe7, 0x7e, 0xe3, 0xf4,
29867 ++ 0x81, 0x6d, 0x18, 0xcb, 0xc9, 0x05, 0xaf, 0xa0,
29868 ++ 0xfb, 0x66, 0xf7, 0xf1, 0x1c, 0xc6, 0x14, 0x11,
29869 ++ 0x4f, 0x2b, 0x79, 0x42, 0x8b, 0xbc, 0xac, 0xe7,
29870 ++ 0x6c, 0xfe, 0x0f, 0x58, 0xe7, 0x7c, 0x78, 0x39,
29871 ++ 0x30, 0xb0, 0x66, 0x2c, 0x9b, 0x6d, 0x3a, 0xe1,
29872 ++ 0xcf, 0xc9, 0xa4, 0x0e, 0x6d, 0x6d, 0x8a, 0xa1,
29873 ++ 0x3a, 0xe7, 0x28, 0xd4, 0x78, 0x4c, 0xa6, 0xa2,
29874 ++ 0x2a, 0xa6, 0x03, 0x30, 0xd7, 0xa8, 0x25, 0x66,
29875 ++ 0x87, 0x2f, 0x69, 0x5c, 0x4e, 0xdd, 0xa5, 0x49,
29876 ++ 0x5d, 0x37, 0x4a, 0x59, 0xc4, 0xaf, 0x1f, 0xa2,
29877 ++ 0xe4, 0xf8, 0xa6, 0x12, 0x97, 0xd5, 0x79, 0xf5,
29878 ++ 0xe2, 0x4a, 0x2b, 0x5f, 0x61, 0xe4, 0x9e, 0xe3,
29879 ++ 0xee, 0xb8, 0xa7, 0x5b, 0x2f, 0xf4, 0x9e, 0x6c,
29880 ++ 0xfb, 0xd1, 0xc6, 0x56, 0x77, 0xba, 0x75, 0xaa,
29881 ++ 0x3d, 0x1a, 0xa8, 0x0b, 0xb3, 0x68, 0x24, 0x00,
29882 ++ 0x10, 0x7f, 0xfd, 0xd7, 0xa1, 0x8d, 0x83, 0x54,
29883 ++ 0x4f, 0x1f, 0xd8, 0x2a, 0xbe, 0x8a, 0x0c, 0x87,
29884 ++ 0xab, 0xa2, 0xde, 0xc3, 0x39, 0xbf, 0x09, 0x03,
29885 ++ 0xa5, 0xf3, 0x05, 0x28, 0xe1, 0xe1, 0xee, 0x39,
29886 ++ 0x70, 0x9c, 0xd8, 0x81, 0x12, 0x1e, 0x02, 0x40,
29887 ++ 0xd2, 0x6e, 0xf0, 0xeb, 0x1b, 0x3d, 0x22, 0xc6,
29888 ++ 0xe5, 0xe3, 0xb4, 0x5a, 0x98, 0xbb, 0xf0, 0x22,
29889 ++ 0x28, 0x8d, 0xe5, 0xd3, 0x16, 0x48, 0x24, 0xa5,
29890 ++ 0xe6, 0x66, 0x0c, 0xf9, 0x08, 0xf9, 0x7e, 0x1e,
29891 ++ 0xe1, 0x28, 0x26, 0x22, 0xc7, 0xc7, 0x0a, 0x32,
29892 ++ 0x47, 0xfa, 0xa3, 0xbe, 0x3c, 0xc4, 0xc5, 0x53,
29893 ++ 0x0a, 0xd5, 0x94, 0x4a, 0xd7, 0x93, 0xd8, 0x42,
29894 ++ 0x99, 0xb9, 0x0a, 0xdb, 0x56, 0xf7, 0xb9, 0x1c,
29895 ++ 0x53, 0x4f, 0xfa, 0xd3, 0x74, 0xad, 0xd9, 0x68,
29896 ++ 0xf1, 0x1b, 0xdf, 0x61, 0xc6, 0x5e, 0xa8, 0x48,
29897 ++ 0xfc, 0xd4, 0x4a, 0x4c, 0x3c, 0x32, 0xf7, 0x1c,
29898 ++ 0x96, 0x21, 0x9b, 0xf9, 0xa3, 0xcc, 0x5a, 0xce,
29899 ++ 0xd5, 0xd7, 0x08, 0x24, 0xf6, 0x1c, 0xfd, 0xdd,
29900 ++ 0x38, 0xc2, 0x32, 0xe9, 0xb8, 0xe7, 0xb6, 0xfa,
29901 ++ 0x9d, 0x45, 0x13, 0x2c, 0x83, 0xfd, 0x4a, 0x69,
29902 ++ 0x82, 0xcd, 0xdc, 0xb3, 0x76, 0x0c, 0x9e, 0xd8,
29903 ++ 0xf4, 0x1b, 0x45, 0x15, 0xb4, 0x97, 0xe7, 0x58,
29904 ++ 0x34, 0xe2, 0x03, 0x29, 0x5a, 0xbf, 0xb6, 0xe0,
29905 ++ 0x5d, 0x13, 0xd9, 0x2b, 0xb4, 0x80, 0xb2, 0x45,
29906 ++ 0x81, 0x6a, 0x2e, 0x6c, 0x89, 0x7d, 0xee, 0xbb,
29907 ++ 0x52, 0xdd, 0x1f, 0x18, 0xe7, 0x13, 0x6b, 0x33,
29908 ++ 0x0e, 0xea, 0x36, 0x92, 0x77, 0x7b, 0x6d, 0x9c,
29909 ++ 0x5a, 0x5f, 0x45, 0x7b, 0x7b, 0x35, 0x62, 0x23,
29910 ++ 0xd1, 0xbf, 0x0f, 0xd0, 0x08, 0x1b, 0x2b, 0x80,
29911 ++ 0x6b, 0x7e, 0xf1, 0x21, 0x47, 0xb0, 0x57, 0xd1,
29912 ++ 0x98, 0x72, 0x90, 0x34, 0x1c, 0x20, 0x04, 0xff,
29913 ++ 0x3d, 0x5c, 0xee, 0x0e, 0x57, 0x5f, 0x6f, 0x24,
29914 ++ 0x4e, 0x3c, 0xea, 0xfc, 0xa5, 0xa9, 0x83, 0xc9,
29915 ++ 0x61, 0xb4, 0x51, 0x24, 0xf8, 0x27, 0x5e, 0x46,
29916 ++ 0x8c, 0xb1, 0x53, 0x02, 0x96, 0x35, 0xba, 0xb8,
29917 ++ 0x4c, 0x71, 0xd3, 0x15, 0x59, 0x35, 0x22, 0x20,
29918 ++ 0xad, 0x03, 0x9f, 0x66, 0x44, 0x3b, 0x9c, 0x35,
29919 ++ 0x37, 0x1f, 0x9b, 0xbb, 0xf3, 0xdb, 0x35, 0x63,
29920 ++ 0x30, 0x64, 0xaa, 0xa2, 0x06, 0xa8, 0x5d, 0xbb,
29921 ++ 0xe1, 0x9f, 0x70, 0xec, 0x82, 0x11, 0x06, 0x36,
29922 ++ 0xec, 0x8b, 0x69, 0x66, 0x24, 0x44, 0xc9, 0x4a,
29923 ++ 0x57, 0xbb, 0x9b, 0x78, 0x13, 0xce, 0x9c, 0x0c,
29924 ++ 0xba, 0x92, 0x93, 0x63, 0xb8, 0xe2, 0x95, 0x0f,
29925 ++ 0x0f, 0x16, 0x39, 0x52, 0xfd, 0x3a, 0x6d, 0x02,
29926 ++ 0x4b, 0xdf, 0x13, 0xd3, 0x2a, 0x22, 0xb4, 0x03,
29927 ++ 0x7c, 0x54, 0x49, 0x96, 0x68, 0x54, 0x10, 0xfa,
29928 ++ 0xef, 0xaa, 0x6c, 0xe8, 0x22, 0xdc, 0x71, 0x16,
29929 ++ 0x13, 0x1a, 0xf6, 0x28, 0xe5, 0x6d, 0x77, 0x3d,
29930 ++ 0xcd, 0x30, 0x63, 0xb1, 0x70, 0x52, 0xa1, 0xc5,
29931 ++ 0x94, 0x5f, 0xcf, 0xe8, 0xb8, 0x26, 0x98, 0xf7,
29932 ++ 0x06, 0xa0, 0x0a, 0x70, 0xfa, 0x03, 0x80, 0xac,
29933 ++ 0xc1, 0xec, 0xd6, 0x4c, 0x54, 0xd7, 0xfe, 0x47,
29934 ++ 0xb6, 0x88, 0x4a, 0xf7, 0x71, 0x24, 0xee, 0xf3,
29935 ++ 0xd2, 0xc2, 0x4a, 0x7f, 0xfe, 0x61, 0xc7, 0x35,
29936 ++ 0xc9, 0x37, 0x67, 0xcb, 0x24, 0x35, 0xda, 0x7e,
29937 ++ 0xca, 0x5f, 0xf3, 0x8d, 0xd4, 0x13, 0x8e, 0xd6,
29938 ++ 0xcb, 0x4d, 0x53, 0x8f, 0x53, 0x1f, 0xc0, 0x74,
29939 ++ 0xf7, 0x53, 0xb9, 0x5e, 0x23, 0x37, 0xba, 0x6e,
29940 ++ 0xe3, 0x9d, 0x07, 0x55, 0x25, 0x7b, 0xe6, 0x2a,
29941 ++ 0x64, 0xd1, 0x32, 0xdd, 0x54, 0x1b, 0x4b, 0xc0,
29942 ++ 0xe1, 0xd7, 0x69, 0x58, 0xf8, 0x93, 0x29, 0xc4,
29943 ++ 0xdd, 0x23, 0x2f, 0xa5, 0xfc, 0x9d, 0x7e, 0xf8,
29944 ++ 0xd4, 0x90, 0xcd, 0x82, 0x55, 0xdc, 0x16, 0x16,
29945 ++ 0x9f, 0x07, 0x52, 0x9b, 0x9d, 0x25, 0xed, 0x32,
29946 ++ 0xc5, 0x7b, 0xdf, 0xf6, 0x83, 0x46, 0x3d, 0x65,
29947 ++ 0xb7, 0xef, 0x87, 0x7a, 0x12, 0x69, 0x8f, 0x06,
29948 ++ 0x7c, 0x51, 0x15, 0x4a, 0x08, 0xe8, 0xac, 0x9a,
29949 ++ 0x0c, 0x24, 0xa7, 0x27, 0xd8, 0x46, 0x2f, 0xe7,
29950 ++ 0x01, 0x0e, 0x1c, 0xc6, 0x91, 0xb0, 0x6e, 0x85,
29951 ++ 0x65, 0xf0, 0x29, 0x0d, 0x2e, 0x6b, 0x3b, 0xfb,
29952 ++ 0x4b, 0xdf, 0xe4, 0x80, 0x93, 0x03, 0x66, 0x46,
29953 ++ 0x3e, 0x8a, 0x6e, 0xf3, 0x5e, 0x4d, 0x62, 0x0e,
29954 ++ 0x49, 0x05, 0xaf, 0xd4, 0xf8, 0x21, 0x20, 0x61,
29955 ++ 0x1d, 0x39, 0x17, 0xf4, 0x61, 0x47, 0x95, 0xfb,
29956 ++ 0x15, 0x2e, 0xb3, 0x4f, 0xd0, 0x5d, 0xf5, 0x7d,
29957 ++ 0x40, 0xda, 0x90, 0x3c, 0x6b, 0xcb, 0x17, 0x00,
29958 ++ 0x13, 0x3b, 0x64, 0x34, 0x1b, 0xf0, 0xf2, 0xe5,
29959 ++ 0x3b, 0xb2, 0xc7, 0xd3, 0x5f, 0x3a, 0x44, 0xa6,
29960 ++ 0x9b, 0xb7, 0x78, 0x0e, 0x42, 0x5d, 0x4c, 0xc1,
29961 ++ 0xe9, 0xd2, 0xcb, 0xb7, 0x78, 0xd1, 0xfe, 0x9a,
29962 ++ 0xb5, 0x07, 0xe9, 0xe0, 0xbe, 0xe2, 0x8a, 0xa7,
29963 ++ 0x01, 0x83, 0x00, 0x8c, 0x5c, 0x08, 0xe6, 0x63,
29964 ++ 0x12, 0x92, 0xb7, 0xb7, 0xa6, 0x19, 0x7d, 0x38,
29965 ++ 0x13, 0x38, 0x92, 0x87, 0x24, 0xf9, 0x48, 0xb3,
29966 ++ 0x5e, 0x87, 0x6a, 0x40, 0x39, 0x5c, 0x3f, 0xed,
29967 ++ 0x8f, 0xee, 0xdb, 0x15, 0x82, 0x06, 0xda, 0x49,
29968 ++ 0x21, 0x2b, 0xb5, 0xbf, 0x32, 0x7c, 0x9f, 0x42,
29969 ++ 0x28, 0x63, 0xcf, 0xaf, 0x1e, 0xf8, 0xc6, 0xa0,
29970 ++ 0xd1, 0x02, 0x43, 0x57, 0x62, 0xec, 0x9b, 0x0f,
29971 ++ 0x01, 0x9e, 0x71, 0xd8, 0x87, 0x9d, 0x01, 0xc1,
29972 ++ 0x58, 0x77, 0xd9, 0xaf, 0xb1, 0x10, 0x7e, 0xdd,
29973 ++ 0xa6, 0x50, 0x96, 0xe5, 0xf0, 0x72, 0x00, 0x6d,
29974 ++ 0x4b, 0xf8, 0x2a, 0x8f, 0x19, 0xf3, 0x22, 0x88,
29975 ++ 0x11, 0x4a, 0x8b, 0x7c, 0xfd, 0xb7, 0xed, 0xe1,
29976 ++ 0xf6, 0x40, 0x39, 0xe0, 0xe9, 0xf6, 0x3d, 0x25,
29977 ++ 0xe6, 0x74, 0x3c, 0x58, 0x57, 0x7f, 0xe1, 0x22,
29978 ++ 0x96, 0x47, 0x31, 0x91, 0xba, 0x70, 0x85, 0x28,
29979 ++ 0x6b, 0x9f, 0x6e, 0x25, 0xac, 0x23, 0x66, 0x2f,
29980 ++ 0x29, 0x88, 0x28, 0xce, 0x8c, 0x5c, 0x88, 0x53,
29981 ++ 0xd1, 0x3b, 0xcc, 0x6a, 0x51, 0xb2, 0xe1, 0x28,
29982 ++ 0x3f, 0x91, 0xb4, 0x0d, 0x00, 0x3a, 0xe3, 0xf8,
29983 ++ 0xc3, 0x8f, 0xd7, 0x96, 0x62, 0x0e, 0x2e, 0xfc,
29984 ++ 0xc8, 0x6c, 0x77, 0xa6, 0x1d, 0x22, 0xc1, 0xb8,
29985 ++ 0xe6, 0x61, 0xd7, 0x67, 0x36, 0x13, 0x7b, 0xbb,
29986 ++ 0x9b, 0x59, 0x09, 0xa6, 0xdf, 0xf7, 0x6b, 0xa3,
29987 ++ 0x40, 0x1a, 0xf5, 0x4f, 0xb4, 0xda, 0xd3, 0xf3,
29988 ++ 0x81, 0x93, 0xc6, 0x18, 0xd9, 0x26, 0xee, 0xac,
29989 ++ 0xf0, 0xaa, 0xdf, 0xc5, 0x9c, 0xca, 0xc2, 0xa2,
29990 ++ 0xcc, 0x7b, 0x5c, 0x24, 0xb0, 0xbc, 0xd0, 0x6a,
29991 ++ 0x4d, 0x89, 0x09, 0xb8, 0x07, 0xfe, 0x87, 0xad,
29992 ++ 0x0a, 0xea, 0xb8, 0x42, 0xf9, 0x5e, 0xb3, 0x3e,
29993 ++ 0x36, 0x4c, 0xaf, 0x75, 0x9e, 0x1c, 0xeb, 0xbd,
29994 ++ 0xbc, 0xbb, 0x80, 0x40, 0xa7, 0x3a, 0x30, 0xbf,
29995 ++ 0xa8, 0x44, 0xf4, 0xeb, 0x38, 0xad, 0x29, 0xba,
29996 ++ 0x23, 0xed, 0x41, 0x0c, 0xea, 0xd2, 0xbb, 0x41,
29997 ++ 0x18, 0xd6, 0xb9, 0xba, 0x65, 0x2b, 0xa3, 0x91,
29998 ++ 0x6d, 0x1f, 0xa9, 0xf4, 0xd1, 0x25, 0x8d, 0x4d,
29999 ++ 0x38, 0xff, 0x64, 0xa0, 0xec, 0xde, 0xa6, 0xb6,
30000 ++ 0x79, 0xab, 0x8e, 0x33, 0x6c, 0x47, 0xde, 0xaf,
30001 ++ 0x94, 0xa4, 0xa5, 0x86, 0x77, 0x55, 0x09, 0x92,
30002 ++ 0x81, 0x31, 0x76, 0xc7, 0x34, 0x22, 0x89, 0x8e,
30003 ++ 0x3d, 0x26, 0x26, 0xd7, 0xfc, 0x1e, 0x16, 0x72,
30004 ++ 0x13, 0x33, 0x63, 0xd5, 0x22, 0xbe, 0xb8, 0x04,
30005 ++ 0x34, 0x84, 0x41, 0xbb, 0x80, 0xd0, 0x9f, 0x46,
30006 ++ 0x48, 0x07, 0xa7, 0xfc, 0x2b, 0x3a, 0x75, 0x55,
30007 ++ 0x8c, 0xc7, 0x6a, 0xbd, 0x7e, 0x46, 0x08, 0x84,
30008 ++ 0x0f, 0xd5, 0x74, 0xc0, 0x82, 0x8e, 0xaa, 0x61,
30009 ++ 0x05, 0x01, 0xb2, 0x47, 0x6e, 0x20, 0x6a, 0x2d,
30010 ++ 0x58, 0x70, 0x48, 0x32, 0xa7, 0x37, 0xd2, 0xb8,
30011 ++ 0x82, 0x1a, 0x51, 0xb9, 0x61, 0xdd, 0xfd, 0x9d,
30012 ++ 0x6b, 0x0e, 0x18, 0x97, 0xf8, 0x45, 0x5f, 0x87,
30013 ++ 0x10, 0xcf, 0x34, 0x72, 0x45, 0x26, 0x49, 0x70,
30014 ++ 0xe7, 0xa3, 0x78, 0xe0, 0x52, 0x89, 0x84, 0x94,
30015 ++ 0x83, 0x82, 0xc2, 0x69, 0x8f, 0xe3, 0xe1, 0x3f,
30016 ++ 0x60, 0x74, 0x88, 0xc4, 0xf7, 0x75, 0x2c, 0xfb,
30017 ++ 0xbd, 0xb6, 0xc4, 0x7e, 0x10, 0x0a, 0x6c, 0x90,
30018 ++ 0x04, 0x9e, 0xc3, 0x3f, 0x59, 0x7c, 0xce, 0x31,
30019 ++ 0x18, 0x60, 0x57, 0x73, 0x46, 0x94, 0x7d, 0x06,
30020 ++ 0xa0, 0x6d, 0x44, 0xec, 0xa2, 0x0a, 0x9e, 0x05,
30021 ++ 0x15, 0xef, 0xca, 0x5c, 0xbf, 0x00, 0xeb, 0xf7,
30022 ++ 0x3d, 0x32, 0xd4, 0xa5, 0xef, 0x49, 0x89, 0x5e,
30023 ++ 0x46, 0xb0, 0xa6, 0x63, 0x5b, 0x8a, 0x73, 0xae,
30024 ++ 0x6f, 0xd5, 0x9d, 0xf8, 0x4f, 0x40, 0xb5, 0xb2,
30025 ++ 0x6e, 0xd3, 0xb6, 0x01, 0xa9, 0x26, 0xa2, 0x21,
30026 ++ 0xcf, 0x33, 0x7a, 0x3a, 0xa4, 0x23, 0x13, 0xb0,
30027 ++ 0x69, 0x6a, 0xee, 0xce, 0xd8, 0x9d, 0x01, 0x1d,
30028 ++ 0x50, 0xc1, 0x30, 0x6c, 0xb1, 0xcd, 0xa0, 0xf0,
30029 ++ 0xf0, 0xa2, 0x64, 0x6f, 0xbb, 0xbf, 0x5e, 0xe6,
30030 ++ 0xab, 0x87, 0xb4, 0x0f, 0x4f, 0x15, 0xaf, 0xb5,
30031 ++ 0x25, 0xa1, 0xb2, 0xd0, 0x80, 0x2c, 0xfb, 0xf9,
30032 ++ 0xfe, 0xd2, 0x33, 0xbb, 0x76, 0xfe, 0x7c, 0xa8,
30033 ++ 0x66, 0xf7, 0xe7, 0x85, 0x9f, 0x1f, 0x85, 0x57,
30034 ++ 0x88, 0xe1, 0xe9, 0x63, 0xe4, 0xd8, 0x1c, 0xa1,
30035 ++ 0xfb, 0xda, 0x44, 0x05, 0x2e, 0x1d, 0x3a, 0x1c,
30036 ++ 0xff, 0xc8, 0x3b, 0xc0, 0xfe, 0xda, 0x22, 0x0b,
30037 ++ 0x43, 0xd6, 0x88, 0x39, 0x4c, 0x4a, 0xa6, 0x69,
30038 ++ 0x18, 0x93, 0x42, 0x4e, 0xb5, 0xcc, 0x66, 0x0d,
30039 ++ 0x09, 0xf8, 0x1e, 0x7c, 0xd3, 0x3c, 0x99, 0x0d,
30040 ++ 0x50, 0x1d, 0x62, 0xe9, 0x57, 0x06, 0xbf, 0x19,
30041 ++ 0x88, 0xdd, 0xad, 0x7b, 0x4f, 0xf9, 0xc7, 0x82,
30042 ++ 0x6d, 0x8d, 0xc8, 0xc4, 0xc5, 0x78, 0x17, 0x20,
30043 ++ 0x15, 0xc5, 0x52, 0x41, 0xcf, 0x5b, 0xd6, 0x7f,
30044 ++ 0x94, 0x02, 0x41, 0xe0, 0x40, 0x22, 0x03, 0x5e,
30045 ++ 0xd1, 0x53, 0xd4, 0x86, 0xd3, 0x2c, 0x9f, 0x0f,
30046 ++ 0x96, 0xe3, 0x6b, 0x9a, 0x76, 0x32, 0x06, 0x47,
30047 ++ 0x4b, 0x11, 0xb3, 0xdd, 0x03, 0x65, 0xbd, 0x9b,
30048 ++ 0x01, 0xda, 0x9c, 0xb9, 0x7e, 0x3f, 0x6a, 0xc4,
30049 ++ 0x7b, 0xea, 0xd4, 0x3c, 0xb9, 0xfb, 0x5c, 0x6b,
30050 ++ 0x64, 0x33, 0x52, 0xba, 0x64, 0x78, 0x8f, 0xa4,
30051 ++ 0xaf, 0x7a, 0x61, 0x8d, 0xbc, 0xc5, 0x73, 0xe9,
30052 ++ 0x6b, 0x58, 0x97, 0x4b, 0xbf, 0x63, 0x22, 0xd3,
30053 ++ 0x37, 0x02, 0x54, 0xc5, 0xb9, 0x16, 0x4a, 0xf0,
30054 ++ 0x19, 0xd8, 0x94, 0x57, 0xb8, 0x8a, 0xb3, 0x16,
30055 ++ 0x3b, 0xd0, 0x84, 0x8e, 0x67, 0xa6, 0xa3, 0x7d,
30056 ++ 0x78, 0xec, 0x00
30057 ++};
30058 ++static const u8 dec_assoc012[] __initconst = {
30059 ++ 0xb1, 0x69, 0x83, 0x87, 0x30, 0xaa, 0x5d, 0xb8,
30060 ++ 0x77, 0xe8, 0x21, 0xff, 0x06, 0x59, 0x35, 0xce,
30061 ++ 0x75, 0xfe, 0x38, 0xef, 0xb8, 0x91, 0x43, 0x8c,
30062 ++ 0xcf, 0x70, 0xdd, 0x0a, 0x68, 0xbf, 0xd4, 0xbc,
30063 ++ 0x16, 0x76, 0x99, 0x36, 0x1e, 0x58, 0x79, 0x5e,
30064 ++ 0xd4, 0x29, 0xf7, 0x33, 0x93, 0x48, 0xdb, 0x5f,
30065 ++ 0x01, 0xae, 0x9c, 0xb6, 0xe4, 0x88, 0x6d, 0x2b,
30066 ++ 0x76, 0x75, 0xe0, 0xf3, 0x74, 0xe2, 0xc9
30067 ++};
30068 ++static const u8 dec_nonce012[] __initconst = {
30069 ++ 0x05, 0xa3, 0x93, 0xed, 0x30, 0xc5, 0xa2, 0x06
30070 ++};
30071 ++static const u8 dec_key012[] __initconst = {
30072 ++ 0xb3, 0x35, 0x50, 0x03, 0x54, 0x2e, 0x40, 0x5e,
30073 ++ 0x8f, 0x59, 0x8e, 0xc5, 0x90, 0xd5, 0x27, 0x2d,
30074 ++ 0xba, 0x29, 0x2e, 0xcb, 0x1b, 0x70, 0x44, 0x1e,
30075 ++ 0x65, 0x91, 0x6e, 0x2a, 0x79, 0x22, 0xda, 0x64
30076 ++};
30077 ++
30078 ++static const u8 dec_input013[] __initconst = {
30079 ++ 0x52, 0x34, 0xb3, 0x65, 0x3b, 0xb7, 0xe5, 0xd3,
30080 ++ 0xab, 0x49, 0x17, 0x60, 0xd2, 0x52, 0x56, 0xdf,
30081 ++ 0xdf, 0x34, 0x56, 0x82, 0xe2, 0xbe, 0xe5, 0xe1,
30082 ++ 0x28, 0xd1, 0x4e, 0x5f, 0x4f, 0x01, 0x7d, 0x3f,
30083 ++ 0x99, 0x6b, 0x30, 0x6e, 0x1a, 0x7c, 0x4c, 0x8e,
30084 ++ 0x62, 0x81, 0xae, 0x86, 0x3f, 0x6b, 0xd0, 0xb5,
30085 ++ 0xa9, 0xcf, 0x50, 0xf1, 0x02, 0x12, 0xa0, 0x0b,
30086 ++ 0x24, 0xe9, 0xe6, 0x72, 0x89, 0x2c, 0x52, 0x1b,
30087 ++ 0x34, 0x38, 0xf8, 0x75, 0x5f, 0xa0, 0x74, 0xe2,
30088 ++ 0x99, 0xdd, 0xa6, 0x4b, 0x14, 0x50, 0x4e, 0xf1,
30089 ++ 0xbe, 0xd6, 0x9e, 0xdb, 0xb2, 0x24, 0x27, 0x74,
30090 ++ 0x12, 0x4a, 0x78, 0x78, 0x17, 0xa5, 0x58, 0x8e,
30091 ++ 0x2f, 0xf9, 0xf4, 0x8d, 0xee, 0x03, 0x88, 0xae,
30092 ++ 0xb8, 0x29, 0xa1, 0x2f, 0x4b, 0xee, 0x92, 0xbd,
30093 ++ 0x87, 0xb3, 0xce, 0x34, 0x21, 0x57, 0x46, 0x04,
30094 ++ 0x49, 0x0c, 0x80, 0xf2, 0x01, 0x13, 0xa1, 0x55,
30095 ++ 0xb3, 0xff, 0x44, 0x30, 0x3c, 0x1c, 0xd0, 0xef,
30096 ++ 0xbc, 0x18, 0x74, 0x26, 0xad, 0x41, 0x5b, 0x5b,
30097 ++ 0x3e, 0x9a, 0x7a, 0x46, 0x4f, 0x16, 0xd6, 0x74,
30098 ++ 0x5a, 0xb7, 0x3a, 0x28, 0x31, 0xd8, 0xae, 0x26,
30099 ++ 0xac, 0x50, 0x53, 0x86, 0xf2, 0x56, 0xd7, 0x3f,
30100 ++ 0x29, 0xbc, 0x45, 0x68, 0x8e, 0xcb, 0x98, 0x64,
30101 ++ 0xdd, 0xc9, 0xba, 0xb8, 0x4b, 0x7b, 0x82, 0xdd,
30102 ++ 0x14, 0xa7, 0xcb, 0x71, 0x72, 0x00, 0x5c, 0xad,
30103 ++ 0x7b, 0x6a, 0x89, 0xa4, 0x3d, 0xbf, 0xb5, 0x4b,
30104 ++ 0x3e, 0x7c, 0x5a, 0xcf, 0xb8, 0xa1, 0xc5, 0x6e,
30105 ++ 0xc8, 0xb6, 0x31, 0x57, 0x7b, 0xdf, 0xa5, 0x7e,
30106 ++ 0xb1, 0xd6, 0x42, 0x2a, 0x31, 0x36, 0xd1, 0xd0,
30107 ++ 0x3f, 0x7a, 0xe5, 0x94, 0xd6, 0x36, 0xa0, 0x6f,
30108 ++ 0xb7, 0x40, 0x7d, 0x37, 0xc6, 0x55, 0x7c, 0x50,
30109 ++ 0x40, 0x6d, 0x29, 0x89, 0xe3, 0x5a, 0xae, 0x97,
30110 ++ 0xe7, 0x44, 0x49, 0x6e, 0xbd, 0x81, 0x3d, 0x03,
30111 ++ 0x93, 0x06, 0x12, 0x06, 0xe2, 0x41, 0x12, 0x4a,
30112 ++ 0xf1, 0x6a, 0xa4, 0x58, 0xa2, 0xfb, 0xd2, 0x15,
30113 ++ 0xba, 0xc9, 0x79, 0xc9, 0xce, 0x5e, 0x13, 0xbb,
30114 ++ 0xf1, 0x09, 0x04, 0xcc, 0xfd, 0xe8, 0x51, 0x34,
30115 ++ 0x6a, 0xe8, 0x61, 0x88, 0xda, 0xed, 0x01, 0x47,
30116 ++ 0x84, 0xf5, 0x73, 0x25, 0xf9, 0x1c, 0x42, 0x86,
30117 ++ 0x07, 0xf3, 0x5b, 0x1a, 0x01, 0xb3, 0xeb, 0x24,
30118 ++ 0x32, 0x8d, 0xf6, 0xed, 0x7c, 0x4b, 0xeb, 0x3c,
30119 ++ 0x36, 0x42, 0x28, 0xdf, 0xdf, 0xb6, 0xbe, 0xd9,
30120 ++ 0x8c, 0x52, 0xd3, 0x2b, 0x08, 0x90, 0x8c, 0xe7,
30121 ++ 0x98, 0x31, 0xe2, 0x32, 0x8e, 0xfc, 0x11, 0x48,
30122 ++ 0x00, 0xa8, 0x6a, 0x42, 0x4a, 0x02, 0xc6, 0x4b,
30123 ++ 0x09, 0xf1, 0xe3, 0x49, 0xf3, 0x45, 0x1f, 0x0e,
30124 ++ 0xbc, 0x56, 0xe2, 0xe4, 0xdf, 0xfb, 0xeb, 0x61,
30125 ++ 0xfa, 0x24, 0xc1, 0x63, 0x75, 0xbb, 0x47, 0x75,
30126 ++ 0xaf, 0xe1, 0x53, 0x16, 0x96, 0x21, 0x85, 0x26,
30127 ++ 0x11, 0xb3, 0x76, 0xe3, 0x23, 0xa1, 0x6b, 0x74,
30128 ++ 0x37, 0xd0, 0xde, 0x06, 0x90, 0x71, 0x5d, 0x43,
30129 ++ 0x88, 0x9b, 0x00, 0x54, 0xa6, 0x75, 0x2f, 0xa1,
30130 ++ 0xc2, 0x0b, 0x73, 0x20, 0x1d, 0xb6, 0x21, 0x79,
30131 ++ 0x57, 0x3f, 0xfa, 0x09, 0xbe, 0x8a, 0x33, 0xc3,
30132 ++ 0x52, 0xf0, 0x1d, 0x82, 0x31, 0xd1, 0x55, 0xb5,
30133 ++ 0x6c, 0x99, 0x25, 0xcf, 0x5c, 0x32, 0xce, 0xe9,
30134 ++ 0x0d, 0xfa, 0x69, 0x2c, 0xd5, 0x0d, 0xc5, 0x6d,
30135 ++ 0x86, 0xd0, 0x0c, 0x3b, 0x06, 0x50, 0x79, 0xe8,
30136 ++ 0xc3, 0xae, 0x04, 0xe6, 0xcd, 0x51, 0xe4, 0x26,
30137 ++ 0x9b, 0x4f, 0x7e, 0xa6, 0x0f, 0xab, 0xd8, 0xe5,
30138 ++ 0xde, 0xa9, 0x00, 0x95, 0xbe, 0xa3, 0x9d, 0x5d,
30139 ++ 0xb2, 0x09, 0x70, 0x18, 0x1c, 0xf0, 0xac, 0x29,
30140 ++ 0x23, 0x02, 0x29, 0x28, 0xd2, 0x74, 0x35, 0x57,
30141 ++ 0x62, 0x0f, 0x24, 0xea, 0x5e, 0x33, 0xc2, 0x92,
30142 ++ 0xf3, 0x78, 0x4d, 0x30, 0x1e, 0xa1, 0x99, 0xa9,
30143 ++ 0x82, 0xb0, 0x42, 0x31, 0x8d, 0xad, 0x8a, 0xbc,
30144 ++ 0xfc, 0xd4, 0x57, 0x47, 0x3e, 0xb4, 0x50, 0xdd,
30145 ++ 0x6e, 0x2c, 0x80, 0x4d, 0x22, 0xf1, 0xfb, 0x57,
30146 ++ 0xc4, 0xdd, 0x17, 0xe1, 0x8a, 0x36, 0x4a, 0xb3,
30147 ++ 0x37, 0xca, 0xc9, 0x4e, 0xab, 0xd5, 0x69, 0xc4,
30148 ++ 0xf4, 0xbc, 0x0b, 0x3b, 0x44, 0x4b, 0x29, 0x9c,
30149 ++ 0xee, 0xd4, 0x35, 0x22, 0x21, 0xb0, 0x1f, 0x27,
30150 ++ 0x64, 0xa8, 0x51, 0x1b, 0xf0, 0x9f, 0x19, 0x5c,
30151 ++ 0xfb, 0x5a, 0x64, 0x74, 0x70, 0x45, 0x09, 0xf5,
30152 ++ 0x64, 0xfe, 0x1a, 0x2d, 0xc9, 0x14, 0x04, 0x14,
30153 ++ 0xcf, 0xd5, 0x7d, 0x60, 0xaf, 0x94, 0x39, 0x94,
30154 ++ 0xe2, 0x7d, 0x79, 0x82, 0xd0, 0x65, 0x3b, 0x6b,
30155 ++ 0x9c, 0x19, 0x84, 0xb4, 0x6d, 0xb3, 0x0c, 0x99,
30156 ++ 0xc0, 0x56, 0xa8, 0xbd, 0x73, 0xce, 0x05, 0x84,
30157 ++ 0x3e, 0x30, 0xaa, 0xc4, 0x9b, 0x1b, 0x04, 0x2a,
30158 ++ 0x9f, 0xd7, 0x43, 0x2b, 0x23, 0xdf, 0xbf, 0xaa,
30159 ++ 0xd5, 0xc2, 0x43, 0x2d, 0x70, 0xab, 0xdc, 0x75,
30160 ++ 0xad, 0xac, 0xf7, 0xc0, 0xbe, 0x67, 0xb2, 0x74,
30161 ++ 0xed, 0x67, 0x10, 0x4a, 0x92, 0x60, 0xc1, 0x40,
30162 ++ 0x50, 0x19, 0x8a, 0x8a, 0x8c, 0x09, 0x0e, 0x72,
30163 ++ 0xe1, 0x73, 0x5e, 0xe8, 0x41, 0x85, 0x63, 0x9f,
30164 ++ 0x3f, 0xd7, 0x7d, 0xc4, 0xfb, 0x22, 0x5d, 0x92,
30165 ++ 0x6c, 0xb3, 0x1e, 0xe2, 0x50, 0x2f, 0x82, 0xa8,
30166 ++ 0x28, 0xc0, 0xb5, 0xd7, 0x5f, 0x68, 0x0d, 0x2c,
30167 ++ 0x2d, 0xaf, 0x7e, 0xfa, 0x2e, 0x08, 0x0f, 0x1f,
30168 ++ 0x70, 0x9f, 0xe9, 0x19, 0x72, 0x55, 0xf8, 0xfb,
30169 ++ 0x51, 0xd2, 0x33, 0x5d, 0xa0, 0xd3, 0x2b, 0x0a,
30170 ++ 0x6c, 0xbc, 0x4e, 0xcf, 0x36, 0x4d, 0xdc, 0x3b,
30171 ++ 0xe9, 0x3e, 0x81, 0x7c, 0x61, 0xdb, 0x20, 0x2d,
30172 ++ 0x3a, 0xc3, 0xb3, 0x0c, 0x1e, 0x00, 0xb9, 0x7c,
30173 ++ 0xf5, 0xca, 0x10, 0x5f, 0x3a, 0x71, 0xb3, 0xe4,
30174 ++ 0x20, 0xdb, 0x0c, 0x2a, 0x98, 0x63, 0x45, 0x00,
30175 ++ 0x58, 0xf6, 0x68, 0xe4, 0x0b, 0xda, 0x13, 0x3b,
30176 ++ 0x60, 0x5c, 0x76, 0xdb, 0xb9, 0x97, 0x71, 0xe4,
30177 ++ 0xd9, 0xb7, 0xdb, 0xbd, 0x68, 0xc7, 0x84, 0x84,
30178 ++ 0xaa, 0x7c, 0x68, 0x62, 0x5e, 0x16, 0xfc, 0xba,
30179 ++ 0x72, 0xaa, 0x9a, 0xa9, 0xeb, 0x7c, 0x75, 0x47,
30180 ++ 0x97, 0x7e, 0xad, 0xe2, 0xd9, 0x91, 0xe8, 0xe4,
30181 ++ 0xa5, 0x31, 0xd7, 0x01, 0x8e, 0xa2, 0x11, 0x88,
30182 ++ 0x95, 0xb9, 0xf2, 0x9b, 0xd3, 0x7f, 0x1b, 0x81,
30183 ++ 0x22, 0xf7, 0x98, 0x60, 0x0a, 0x64, 0xa6, 0xc1,
30184 ++ 0xf6, 0x49, 0xc7, 0xe3, 0x07, 0x4d, 0x94, 0x7a,
30185 ++ 0xcf, 0x6e, 0x68, 0x0c, 0x1b, 0x3f, 0x6e, 0x2e,
30186 ++ 0xee, 0x92, 0xfa, 0x52, 0xb3, 0x59, 0xf8, 0xf1,
30187 ++ 0x8f, 0x6a, 0x66, 0xa3, 0x82, 0x76, 0x4a, 0x07,
30188 ++ 0x1a, 0xc7, 0xdd, 0xf5, 0xda, 0x9c, 0x3c, 0x24,
30189 ++ 0xbf, 0xfd, 0x42, 0xa1, 0x10, 0x64, 0x6a, 0x0f,
30190 ++ 0x89, 0xee, 0x36, 0xa5, 0xce, 0x99, 0x48, 0x6a,
30191 ++ 0xf0, 0x9f, 0x9e, 0x69, 0xa4, 0x40, 0x20, 0xe9,
30192 ++ 0x16, 0x15, 0xf7, 0xdb, 0x75, 0x02, 0xcb, 0xe9,
30193 ++ 0x73, 0x8b, 0x3b, 0x49, 0x2f, 0xf0, 0xaf, 0x51,
30194 ++ 0x06, 0x5c, 0xdf, 0x27, 0x27, 0x49, 0x6a, 0xd1,
30195 ++ 0xcc, 0xc7, 0xb5, 0x63, 0xb5, 0xfc, 0xb8, 0x5c,
30196 ++ 0x87, 0x7f, 0x84, 0xb4, 0xcc, 0x14, 0xa9, 0x53,
30197 ++ 0xda, 0xa4, 0x56, 0xf8, 0xb6, 0x1b, 0xcc, 0x40,
30198 ++ 0x27, 0x52, 0x06, 0x5a, 0x13, 0x81, 0xd7, 0x3a,
30199 ++ 0xd4, 0x3b, 0xfb, 0x49, 0x65, 0x31, 0x33, 0xb2,
30200 ++ 0xfa, 0xcd, 0xad, 0x58, 0x4e, 0x2b, 0xae, 0xd2,
30201 ++ 0x20, 0xfb, 0x1a, 0x48, 0xb4, 0x3f, 0x9a, 0xd8,
30202 ++ 0x7a, 0x35, 0x4a, 0xc8, 0xee, 0x88, 0x5e, 0x07,
30203 ++ 0x66, 0x54, 0xb9, 0xec, 0x9f, 0xa3, 0xe3, 0xb9,
30204 ++ 0x37, 0xaa, 0x49, 0x76, 0x31, 0xda, 0x74, 0x2d,
30205 ++ 0x3c, 0xa4, 0x65, 0x10, 0x32, 0x38, 0xf0, 0xde,
30206 ++ 0xd3, 0x99, 0x17, 0xaa, 0x71, 0xaa, 0x8f, 0x0f,
30207 ++ 0x8c, 0xaf, 0xa2, 0xf8, 0x5d, 0x64, 0xba, 0x1d,
30208 ++ 0xa3, 0xef, 0x96, 0x73, 0xe8, 0xa1, 0x02, 0x8d,
30209 ++ 0x0c, 0x6d, 0xb8, 0x06, 0x90, 0xb8, 0x08, 0x56,
30210 ++ 0x2c, 0xa7, 0x06, 0xc9, 0xc2, 0x38, 0xdb, 0x7c,
30211 ++ 0x63, 0xb1, 0x57, 0x8e, 0xea, 0x7c, 0x79, 0xf3,
30212 ++ 0x49, 0x1d, 0xfe, 0x9f, 0xf3, 0x6e, 0xb1, 0x1d,
30213 ++ 0xba, 0x19, 0x80, 0x1a, 0x0a, 0xd3, 0xb0, 0x26,
30214 ++ 0x21, 0x40, 0xb1, 0x7c, 0xf9, 0x4d, 0x8d, 0x10,
30215 ++ 0xc1, 0x7e, 0xf4, 0xf6, 0x3c, 0xa8, 0xfd, 0x7c,
30216 ++ 0xa3, 0x92, 0xb2, 0x0f, 0xaa, 0xcc, 0xa6, 0x11,
30217 ++ 0xfe, 0x04, 0xe3, 0xd1, 0x7a, 0x32, 0x89, 0xdf,
30218 ++ 0x0d, 0xc4, 0x8f, 0x79, 0x6b, 0xca, 0x16, 0x7c,
30219 ++ 0x6e, 0xf9, 0xad, 0x0f, 0xf6, 0xfe, 0x27, 0xdb,
30220 ++ 0xc4, 0x13, 0x70, 0xf1, 0x62, 0x1a, 0x4f, 0x79,
30221 ++ 0x40, 0xc9, 0x9b, 0x8b, 0x21, 0xea, 0x84, 0xfa,
30222 ++ 0xf5, 0xf1, 0x89, 0xce, 0xb7, 0x55, 0x0a, 0x80,
30223 ++ 0x39, 0x2f, 0x55, 0x36, 0x16, 0x9c, 0x7b, 0x08,
30224 ++ 0xbd, 0x87, 0x0d, 0xa5, 0x32, 0xf1, 0x52, 0x7c,
30225 ++ 0xe8, 0x55, 0x60, 0x5b, 0xd7, 0x69, 0xe4, 0xfc,
30226 ++ 0xfa, 0x12, 0x85, 0x96, 0xea, 0x50, 0x28, 0xab,
30227 ++ 0x8a, 0xf7, 0xbb, 0x0e, 0x53, 0x74, 0xca, 0xa6,
30228 ++ 0x27, 0x09, 0xc2, 0xb5, 0xde, 0x18, 0x14, 0xd9,
30229 ++ 0xea, 0xe5, 0x29, 0x1c, 0x40, 0x56, 0xcf, 0xd7,
30230 ++ 0xae, 0x05, 0x3f, 0x65, 0xaf, 0x05, 0x73, 0xe2,
30231 ++ 0x35, 0x96, 0x27, 0x07, 0x14, 0xc0, 0xad, 0x33,
30232 ++ 0xf1, 0xdc, 0x44, 0x7a, 0x89, 0x17, 0x77, 0xd2,
30233 ++ 0x9c, 0x58, 0x60, 0xf0, 0x3f, 0x7b, 0x2d, 0x2e,
30234 ++ 0x57, 0x95, 0x54, 0x87, 0xed, 0xf2, 0xc7, 0x4c,
30235 ++ 0xf0, 0xae, 0x56, 0x29, 0x19, 0x7d, 0x66, 0x4b,
30236 ++ 0x9b, 0x83, 0x84, 0x42, 0x3b, 0x01, 0x25, 0x66,
30237 ++ 0x8e, 0x02, 0xde, 0xb9, 0x83, 0x54, 0x19, 0xf6,
30238 ++ 0x9f, 0x79, 0x0d, 0x67, 0xc5, 0x1d, 0x7a, 0x44,
30239 ++ 0x02, 0x98, 0xa7, 0x16, 0x1c, 0x29, 0x0d, 0x74,
30240 ++ 0xff, 0x85, 0x40, 0x06, 0xef, 0x2c, 0xa9, 0xc6,
30241 ++ 0xf5, 0x53, 0x07, 0x06, 0xae, 0xe4, 0xfa, 0x5f,
30242 ++ 0xd8, 0x39, 0x4d, 0xf1, 0x9b, 0x6b, 0xd9, 0x24,
30243 ++ 0x84, 0xfe, 0x03, 0x4c, 0xb2, 0x3f, 0xdf, 0xa1,
30244 ++ 0x05, 0x9e, 0x50, 0x14, 0x5a, 0xd9, 0x1a, 0xa2,
30245 ++ 0xa7, 0xfa, 0xfa, 0x17, 0xf7, 0x78, 0xd6, 0xb5,
30246 ++ 0x92, 0x61, 0x91, 0xac, 0x36, 0xfa, 0x56, 0x0d,
30247 ++ 0x38, 0x32, 0x18, 0x85, 0x08, 0x58, 0x37, 0xf0,
30248 ++ 0x4b, 0xdb, 0x59, 0xe7, 0xa4, 0x34, 0xc0, 0x1b,
30249 ++ 0x01, 0xaf, 0x2d, 0xde, 0xa1, 0xaa, 0x5d, 0xd3,
30250 ++ 0xec, 0xe1, 0xd4, 0xf7, 0xe6, 0x54, 0x68, 0xf0,
30251 ++ 0x51, 0x97, 0xa7, 0x89, 0xea, 0x24, 0xad, 0xd3,
30252 ++ 0x6e, 0x47, 0x93, 0x8b, 0x4b, 0xb4, 0xf7, 0x1c,
30253 ++ 0x42, 0x06, 0x67, 0xe8, 0x99, 0xf6, 0xf5, 0x7b,
30254 ++ 0x85, 0xb5, 0x65, 0xb5, 0xb5, 0xd2, 0x37, 0xf5,
30255 ++ 0xf3, 0x02, 0xa6, 0x4d, 0x11, 0xa7, 0xdc, 0x51,
30256 ++ 0x09, 0x7f, 0xa0, 0xd8, 0x88, 0x1c, 0x13, 0x71,
30257 ++ 0xae, 0x9c, 0xb7, 0x7b, 0x34, 0xd6, 0x4e, 0x68,
30258 ++ 0x26, 0x83, 0x51, 0xaf, 0x1d, 0xee, 0x8b, 0xbb,
30259 ++ 0x69, 0x43, 0x2b, 0x9e, 0x8a, 0xbc, 0x02, 0x0e,
30260 ++ 0xa0, 0x1b, 0xe0, 0xa8, 0x5f, 0x6f, 0xaf, 0x1b,
30261 ++ 0x8f, 0xe7, 0x64, 0x71, 0x74, 0x11, 0x7e, 0xa8,
30262 ++ 0xd8, 0xf9, 0x97, 0x06, 0xc3, 0xb6, 0xfb, 0xfb,
30263 ++ 0xb7, 0x3d, 0x35, 0x9d, 0x3b, 0x52, 0xed, 0x54,
30264 ++ 0xca, 0xf4, 0x81, 0x01, 0x2d, 0x1b, 0xc3, 0xa7,
30265 ++ 0x00, 0x3d, 0x1a, 0x39, 0x54, 0xe1, 0xf6, 0xff,
30266 ++ 0xed, 0x6f, 0x0b, 0x5a, 0x68, 0xda, 0x58, 0xdd,
30267 ++ 0xa9, 0xcf, 0x5c, 0x4a, 0xe5, 0x09, 0x4e, 0xde,
30268 ++ 0x9d, 0xbc, 0x3e, 0xee, 0x5a, 0x00, 0x3b, 0x2c,
30269 ++ 0x87, 0x10, 0x65, 0x60, 0xdd, 0xd7, 0x56, 0xd1,
30270 ++ 0x4c, 0x64, 0x45, 0xe4, 0x21, 0xec, 0x78, 0xf8,
30271 ++ 0x25, 0x7a, 0x3e, 0x16, 0x5d, 0x09, 0x53, 0x14,
30272 ++ 0xbe, 0x4f, 0xae, 0x87, 0xd8, 0xd1, 0xaa, 0x3c,
30273 ++ 0xf6, 0x3e, 0xa4, 0x70, 0x8c, 0x5e, 0x70, 0xa4,
30274 ++ 0xb3, 0x6b, 0x66, 0x73, 0xd3, 0xbf, 0x31, 0x06,
30275 ++ 0x19, 0x62, 0x93, 0x15, 0xf2, 0x86, 0xe4, 0x52,
30276 ++ 0x7e, 0x53, 0x4c, 0x12, 0x38, 0xcc, 0x34, 0x7d,
30277 ++ 0x57, 0xf6, 0x42, 0x93, 0x8a, 0xc4, 0xee, 0x5c,
30278 ++ 0x8a, 0xe1, 0x52, 0x8f, 0x56, 0x64, 0xf6, 0xa6,
30279 ++ 0xd1, 0x91, 0x57, 0x70, 0xcd, 0x11, 0x76, 0xf5,
30280 ++ 0x59, 0x60, 0x60, 0x3c, 0xc1, 0xc3, 0x0b, 0x7f,
30281 ++ 0x58, 0x1a, 0x50, 0x91, 0xf1, 0x68, 0x8f, 0x6e,
30282 ++ 0x74, 0x74, 0xa8, 0x51, 0x0b, 0xf7, 0x7a, 0x98,
30283 ++ 0x37, 0xf2, 0x0a, 0x0e, 0xa4, 0x97, 0x04, 0xb8,
30284 ++ 0x9b, 0xfd, 0xa0, 0xea, 0xf7, 0x0d, 0xe1, 0xdb,
30285 ++ 0x03, 0xf0, 0x31, 0x29, 0xf8, 0xdd, 0x6b, 0x8b,
30286 ++ 0x5d, 0xd8, 0x59, 0xa9, 0x29, 0xcf, 0x9a, 0x79,
30287 ++ 0x89, 0x19, 0x63, 0x46, 0x09, 0x79, 0x6a, 0x11,
30288 ++ 0xda, 0x63, 0x68, 0x48, 0x77, 0x23, 0xfb, 0x7d,
30289 ++ 0x3a, 0x43, 0xcb, 0x02, 0x3b, 0x7a, 0x6d, 0x10,
30290 ++ 0x2a, 0x9e, 0xac, 0xf1, 0xd4, 0x19, 0xf8, 0x23,
30291 ++ 0x64, 0x1d, 0x2c, 0x5f, 0xf2, 0xb0, 0x5c, 0x23,
30292 ++ 0x27, 0xf7, 0x27, 0x30, 0x16, 0x37, 0xb1, 0x90,
30293 ++ 0xab, 0x38, 0xfb, 0x55, 0xcd, 0x78, 0x58, 0xd4,
30294 ++ 0x7d, 0x43, 0xf6, 0x45, 0x5e, 0x55, 0x8d, 0xb1,
30295 ++ 0x02, 0x65, 0x58, 0xb4, 0x13, 0x4b, 0x36, 0xf7,
30296 ++ 0xcc, 0xfe, 0x3d, 0x0b, 0x82, 0xe2, 0x12, 0x11,
30297 ++ 0xbb, 0xe6, 0xb8, 0x3a, 0x48, 0x71, 0xc7, 0x50,
30298 ++ 0x06, 0x16, 0x3a, 0xe6, 0x7c, 0x05, 0xc7, 0xc8,
30299 ++ 0x4d, 0x2f, 0x08, 0x6a, 0x17, 0x9a, 0x95, 0x97,
30300 ++ 0x50, 0x68, 0xdc, 0x28, 0x18, 0xc4, 0x61, 0x38,
30301 ++ 0xb9, 0xe0, 0x3e, 0x78, 0xdb, 0x29, 0xe0, 0x9f,
30302 ++ 0x52, 0xdd, 0xf8, 0x4f, 0x91, 0xc1, 0xd0, 0x33,
30303 ++ 0xa1, 0x7a, 0x8e, 0x30, 0x13, 0x82, 0x07, 0x9f,
30304 ++ 0xd3, 0x31, 0x0f, 0x23, 0xbe, 0x32, 0x5a, 0x75,
30305 ++ 0xcf, 0x96, 0xb2, 0xec, 0xb5, 0x32, 0xac, 0x21,
30306 ++ 0xd1, 0x82, 0x33, 0xd3, 0x15, 0x74, 0xbd, 0x90,
30307 ++ 0xf1, 0x2c, 0xe6, 0x5f, 0x8d, 0xe3, 0x02, 0xe8,
30308 ++ 0xe9, 0xc4, 0xca, 0x96, 0xeb, 0x0e, 0xbc, 0x91,
30309 ++ 0xf4, 0xb9, 0xea, 0xd9, 0x1b, 0x75, 0xbd, 0xe1,
30310 ++ 0xac, 0x2a, 0x05, 0x37, 0x52, 0x9b, 0x1b, 0x3f,
30311 ++ 0x5a, 0xdc, 0x21, 0xc3, 0x98, 0xbb, 0xaf, 0xa3,
30312 ++ 0xf2, 0x00, 0xbf, 0x0d, 0x30, 0x89, 0x05, 0xcc,
30313 ++ 0xa5, 0x76, 0xf5, 0x06, 0xf0, 0xc6, 0x54, 0x8a,
30314 ++ 0x5d, 0xd4, 0x1e, 0xc1, 0xf2, 0xce, 0xb0, 0x62,
30315 ++ 0xc8, 0xfc, 0x59, 0x42, 0x9a, 0x90, 0x60, 0x55,
30316 ++ 0xfe, 0x88, 0xa5, 0x8b, 0xb8, 0x33, 0x0c, 0x23,
30317 ++ 0x24, 0x0d, 0x15, 0x70, 0x37, 0x1e, 0x3d, 0xf6,
30318 ++ 0xd2, 0xea, 0x92, 0x10, 0xb2, 0xc4, 0x51, 0xac,
30319 ++ 0xf2, 0xac, 0xf3, 0x6b, 0x6c, 0xaa, 0xcf, 0x12,
30320 ++ 0xc5, 0x6c, 0x90, 0x50, 0xb5, 0x0c, 0xfc, 0x1a,
30321 ++ 0x15, 0x52, 0xe9, 0x26, 0xc6, 0x52, 0xa4, 0xe7,
30322 ++ 0x81, 0x69, 0xe1, 0xe7, 0x9e, 0x30, 0x01, 0xec,
30323 ++ 0x84, 0x89, 0xb2, 0x0d, 0x66, 0xdd, 0xce, 0x28,
30324 ++ 0x5c, 0xec, 0x98, 0x46, 0x68, 0x21, 0x9f, 0x88,
30325 ++ 0x3f, 0x1f, 0x42, 0x77, 0xce, 0xd0, 0x61, 0xd4,
30326 ++ 0x20, 0xa7, 0xff, 0x53, 0xad, 0x37, 0xd0, 0x17,
30327 ++ 0x35, 0xc9, 0xfc, 0xba, 0x0a, 0x78, 0x3f, 0xf2,
30328 ++ 0xcc, 0x86, 0x89, 0xe8, 0x4b, 0x3c, 0x48, 0x33,
30329 ++ 0x09, 0x7f, 0xc6, 0xc0, 0xdd, 0xb8, 0xfd, 0x7a,
30330 ++ 0x66, 0x66, 0x65, 0xeb, 0x47, 0xa7, 0x04, 0x28,
30331 ++ 0xa3, 0x19, 0x8e, 0xa9, 0xb1, 0x13, 0x67, 0x62,
30332 ++ 0x70, 0xcf, 0xd7
30333 ++};
30334 ++static const u8 dec_output013[] __initconst = {
30335 ++ 0x74, 0xa6, 0x3e, 0xe4, 0xb1, 0xcb, 0xaf, 0xb0,
30336 ++ 0x40, 0xe5, 0x0f, 0x9e, 0xf1, 0xf2, 0x89, 0xb5,
30337 ++ 0x42, 0x34, 0x8a, 0xa1, 0x03, 0xb7, 0xe9, 0x57,
30338 ++ 0x46, 0xbe, 0x20, 0xe4, 0x6e, 0xb0, 0xeb, 0xff,
30339 ++ 0xea, 0x07, 0x7e, 0xef, 0xe2, 0x55, 0x9f, 0xe5,
30340 ++ 0x78, 0x3a, 0xb7, 0x83, 0xc2, 0x18, 0x40, 0x7b,
30341 ++ 0xeb, 0xcd, 0x81, 0xfb, 0x90, 0x12, 0x9e, 0x46,
30342 ++ 0xa9, 0xd6, 0x4a, 0xba, 0xb0, 0x62, 0xdb, 0x6b,
30343 ++ 0x99, 0xc4, 0xdb, 0x54, 0x4b, 0xb8, 0xa5, 0x71,
30344 ++ 0xcb, 0xcd, 0x63, 0x32, 0x55, 0xfb, 0x31, 0xf0,
30345 ++ 0x38, 0xf5, 0xbe, 0x78, 0xe4, 0x45, 0xce, 0x1b,
30346 ++ 0x6a, 0x5b, 0x0e, 0xf4, 0x16, 0xe4, 0xb1, 0x3d,
30347 ++ 0xf6, 0x63, 0x7b, 0xa7, 0x0c, 0xde, 0x6f, 0x8f,
30348 ++ 0x74, 0xdf, 0xe0, 0x1e, 0x9d, 0xce, 0x8f, 0x24,
30349 ++ 0xef, 0x23, 0x35, 0x33, 0x7b, 0x83, 0x34, 0x23,
30350 ++ 0x58, 0x74, 0x14, 0x77, 0x1f, 0xc2, 0x4f, 0x4e,
30351 ++ 0xc6, 0x89, 0xf9, 0x52, 0x09, 0x37, 0x64, 0x14,
30352 ++ 0xc4, 0x01, 0x6b, 0x9d, 0x77, 0xe8, 0x90, 0x5d,
30353 ++ 0xa8, 0x4a, 0x2a, 0xef, 0x5c, 0x7f, 0xeb, 0xbb,
30354 ++ 0xb2, 0xc6, 0x93, 0x99, 0x66, 0xdc, 0x7f, 0xd4,
30355 ++ 0x9e, 0x2a, 0xca, 0x8d, 0xdb, 0xe7, 0x20, 0xcf,
30356 ++ 0xe4, 0x73, 0xae, 0x49, 0x7d, 0x64, 0x0f, 0x0e,
30357 ++ 0x28, 0x46, 0xa9, 0xa8, 0x32, 0xe4, 0x0e, 0xf6,
30358 ++ 0x51, 0x53, 0xb8, 0x3c, 0xb1, 0xff, 0xa3, 0x33,
30359 ++ 0x41, 0x75, 0xff, 0xf1, 0x6f, 0xf1, 0xfb, 0xbb,
30360 ++ 0x83, 0x7f, 0x06, 0x9b, 0xe7, 0x1b, 0x0a, 0xe0,
30361 ++ 0x5c, 0x33, 0x60, 0x5b, 0xdb, 0x5b, 0xed, 0xfe,
30362 ++ 0xa5, 0x16, 0x19, 0x72, 0xa3, 0x64, 0x23, 0x00,
30363 ++ 0x02, 0xc7, 0xf3, 0x6a, 0x81, 0x3e, 0x44, 0x1d,
30364 ++ 0x79, 0x15, 0x5f, 0x9a, 0xde, 0xe2, 0xfd, 0x1b,
30365 ++ 0x73, 0xc1, 0xbc, 0x23, 0xba, 0x31, 0xd2, 0x50,
30366 ++ 0xd5, 0xad, 0x7f, 0x74, 0xa7, 0xc9, 0xf8, 0x3e,
30367 ++ 0x2b, 0x26, 0x10, 0xf6, 0x03, 0x36, 0x74, 0xe4,
30368 ++ 0x0e, 0x6a, 0x72, 0xb7, 0x73, 0x0a, 0x42, 0x28,
30369 ++ 0xc2, 0xad, 0x5e, 0x03, 0xbe, 0xb8, 0x0b, 0xa8,
30370 ++ 0x5b, 0xd4, 0xb8, 0xba, 0x52, 0x89, 0xb1, 0x9b,
30371 ++ 0xc1, 0xc3, 0x65, 0x87, 0xed, 0xa5, 0xf4, 0x86,
30372 ++ 0xfd, 0x41, 0x80, 0x91, 0x27, 0x59, 0x53, 0x67,
30373 ++ 0x15, 0x78, 0x54, 0x8b, 0x2d, 0x3d, 0xc7, 0xff,
30374 ++ 0x02, 0x92, 0x07, 0x5f, 0x7a, 0x4b, 0x60, 0x59,
30375 ++ 0x3c, 0x6f, 0x5c, 0xd8, 0xec, 0x95, 0xd2, 0xfe,
30376 ++ 0xa0, 0x3b, 0xd8, 0x3f, 0xd1, 0x69, 0xa6, 0xd6,
30377 ++ 0x41, 0xb2, 0xf4, 0x4d, 0x12, 0xf4, 0x58, 0x3e,
30378 ++ 0x66, 0x64, 0x80, 0x31, 0x9b, 0xa8, 0x4c, 0x8b,
30379 ++ 0x07, 0xb2, 0xec, 0x66, 0x94, 0x66, 0x47, 0x50,
30380 ++ 0x50, 0x5f, 0x18, 0x0b, 0x0e, 0xd6, 0xc0, 0x39,
30381 ++ 0x21, 0x13, 0x9e, 0x33, 0xbc, 0x79, 0x36, 0x02,
30382 ++ 0x96, 0x70, 0xf0, 0x48, 0x67, 0x2f, 0x26, 0xe9,
30383 ++ 0x6d, 0x10, 0xbb, 0xd6, 0x3f, 0xd1, 0x64, 0x7a,
30384 ++ 0x2e, 0xbe, 0x0c, 0x61, 0xf0, 0x75, 0x42, 0x38,
30385 ++ 0x23, 0xb1, 0x9e, 0x9f, 0x7c, 0x67, 0x66, 0xd9,
30386 ++ 0x58, 0x9a, 0xf1, 0xbb, 0x41, 0x2a, 0x8d, 0x65,
30387 ++ 0x84, 0x94, 0xfc, 0xdc, 0x6a, 0x50, 0x64, 0xdb,
30388 ++ 0x56, 0x33, 0x76, 0x00, 0x10, 0xed, 0xbe, 0xd2,
30389 ++ 0x12, 0xf6, 0xf6, 0x1b, 0xa2, 0x16, 0xde, 0xae,
30390 ++ 0x31, 0x95, 0xdd, 0xb1, 0x08, 0x7e, 0x4e, 0xee,
30391 ++ 0xe7, 0xf9, 0xa5, 0xfb, 0x5b, 0x61, 0x43, 0x00,
30392 ++ 0x40, 0xf6, 0x7e, 0x02, 0x04, 0x32, 0x4e, 0x0c,
30393 ++ 0xe2, 0x66, 0x0d, 0xd7, 0x07, 0x98, 0x0e, 0xf8,
30394 ++ 0x72, 0x34, 0x6d, 0x95, 0x86, 0xd7, 0xcb, 0x31,
30395 ++ 0x54, 0x47, 0xd0, 0x38, 0x29, 0x9c, 0x5a, 0x68,
30396 ++ 0xd4, 0x87, 0x76, 0xc9, 0xe7, 0x7e, 0xe3, 0xf4,
30397 ++ 0x81, 0x6d, 0x18, 0xcb, 0xc9, 0x05, 0xaf, 0xa0,
30398 ++ 0xfb, 0x66, 0xf7, 0xf1, 0x1c, 0xc6, 0x14, 0x11,
30399 ++ 0x4f, 0x2b, 0x79, 0x42, 0x8b, 0xbc, 0xac, 0xe7,
30400 ++ 0x6c, 0xfe, 0x0f, 0x58, 0xe7, 0x7c, 0x78, 0x39,
30401 ++ 0x30, 0xb0, 0x66, 0x2c, 0x9b, 0x6d, 0x3a, 0xe1,
30402 ++ 0xcf, 0xc9, 0xa4, 0x0e, 0x6d, 0x6d, 0x8a, 0xa1,
30403 ++ 0x3a, 0xe7, 0x28, 0xd4, 0x78, 0x4c, 0xa6, 0xa2,
30404 ++ 0x2a, 0xa6, 0x03, 0x30, 0xd7, 0xa8, 0x25, 0x66,
30405 ++ 0x87, 0x2f, 0x69, 0x5c, 0x4e, 0xdd, 0xa5, 0x49,
30406 ++ 0x5d, 0x37, 0x4a, 0x59, 0xc4, 0xaf, 0x1f, 0xa2,
30407 ++ 0xe4, 0xf8, 0xa6, 0x12, 0x97, 0xd5, 0x79, 0xf5,
30408 ++ 0xe2, 0x4a, 0x2b, 0x5f, 0x61, 0xe4, 0x9e, 0xe3,
30409 ++ 0xee, 0xb8, 0xa7, 0x5b, 0x2f, 0xf4, 0x9e, 0x6c,
30410 ++ 0xfb, 0xd1, 0xc6, 0x56, 0x77, 0xba, 0x75, 0xaa,
30411 ++ 0x3d, 0x1a, 0xa8, 0x0b, 0xb3, 0x68, 0x24, 0x00,
30412 ++ 0x10, 0x7f, 0xfd, 0xd7, 0xa1, 0x8d, 0x83, 0x54,
30413 ++ 0x4f, 0x1f, 0xd8, 0x2a, 0xbe, 0x8a, 0x0c, 0x87,
30414 ++ 0xab, 0xa2, 0xde, 0xc3, 0x39, 0xbf, 0x09, 0x03,
30415 ++ 0xa5, 0xf3, 0x05, 0x28, 0xe1, 0xe1, 0xee, 0x39,
30416 ++ 0x70, 0x9c, 0xd8, 0x81, 0x12, 0x1e, 0x02, 0x40,
30417 ++ 0xd2, 0x6e, 0xf0, 0xeb, 0x1b, 0x3d, 0x22, 0xc6,
30418 ++ 0xe5, 0xe3, 0xb4, 0x5a, 0x98, 0xbb, 0xf0, 0x22,
30419 ++ 0x28, 0x8d, 0xe5, 0xd3, 0x16, 0x48, 0x24, 0xa5,
30420 ++ 0xe6, 0x66, 0x0c, 0xf9, 0x08, 0xf9, 0x7e, 0x1e,
30421 ++ 0xe1, 0x28, 0x26, 0x22, 0xc7, 0xc7, 0x0a, 0x32,
30422 ++ 0x47, 0xfa, 0xa3, 0xbe, 0x3c, 0xc4, 0xc5, 0x53,
30423 ++ 0x0a, 0xd5, 0x94, 0x4a, 0xd7, 0x93, 0xd8, 0x42,
30424 ++ 0x99, 0xb9, 0x0a, 0xdb, 0x56, 0xf7, 0xb9, 0x1c,
30425 ++ 0x53, 0x4f, 0xfa, 0xd3, 0x74, 0xad, 0xd9, 0x68,
30426 ++ 0xf1, 0x1b, 0xdf, 0x61, 0xc6, 0x5e, 0xa8, 0x48,
30427 ++ 0xfc, 0xd4, 0x4a, 0x4c, 0x3c, 0x32, 0xf7, 0x1c,
30428 ++ 0x96, 0x21, 0x9b, 0xf9, 0xa3, 0xcc, 0x5a, 0xce,
30429 ++ 0xd5, 0xd7, 0x08, 0x24, 0xf6, 0x1c, 0xfd, 0xdd,
30430 ++ 0x38, 0xc2, 0x32, 0xe9, 0xb8, 0xe7, 0xb6, 0xfa,
30431 ++ 0x9d, 0x45, 0x13, 0x2c, 0x83, 0xfd, 0x4a, 0x69,
30432 ++ 0x82, 0xcd, 0xdc, 0xb3, 0x76, 0x0c, 0x9e, 0xd8,
30433 ++ 0xf4, 0x1b, 0x45, 0x15, 0xb4, 0x97, 0xe7, 0x58,
30434 ++ 0x34, 0xe2, 0x03, 0x29, 0x5a, 0xbf, 0xb6, 0xe0,
30435 ++ 0x5d, 0x13, 0xd9, 0x2b, 0xb4, 0x80, 0xb2, 0x45,
30436 ++ 0x81, 0x6a, 0x2e, 0x6c, 0x89, 0x7d, 0xee, 0xbb,
30437 ++ 0x52, 0xdd, 0x1f, 0x18, 0xe7, 0x13, 0x6b, 0x33,
30438 ++ 0x0e, 0xea, 0x36, 0x92, 0x77, 0x7b, 0x6d, 0x9c,
30439 ++ 0x5a, 0x5f, 0x45, 0x7b, 0x7b, 0x35, 0x62, 0x23,
30440 ++ 0xd1, 0xbf, 0x0f, 0xd0, 0x08, 0x1b, 0x2b, 0x80,
30441 ++ 0x6b, 0x7e, 0xf1, 0x21, 0x47, 0xb0, 0x57, 0xd1,
30442 ++ 0x98, 0x72, 0x90, 0x34, 0x1c, 0x20, 0x04, 0xff,
30443 ++ 0x3d, 0x5c, 0xee, 0x0e, 0x57, 0x5f, 0x6f, 0x24,
30444 ++ 0x4e, 0x3c, 0xea, 0xfc, 0xa5, 0xa9, 0x83, 0xc9,
30445 ++ 0x61, 0xb4, 0x51, 0x24, 0xf8, 0x27, 0x5e, 0x46,
30446 ++ 0x8c, 0xb1, 0x53, 0x02, 0x96, 0x35, 0xba, 0xb8,
30447 ++ 0x4c, 0x71, 0xd3, 0x15, 0x59, 0x35, 0x22, 0x20,
30448 ++ 0xad, 0x03, 0x9f, 0x66, 0x44, 0x3b, 0x9c, 0x35,
30449 ++ 0x37, 0x1f, 0x9b, 0xbb, 0xf3, 0xdb, 0x35, 0x63,
30450 ++ 0x30, 0x64, 0xaa, 0xa2, 0x06, 0xa8, 0x5d, 0xbb,
30451 ++ 0xe1, 0x9f, 0x70, 0xec, 0x82, 0x11, 0x06, 0x36,
30452 ++ 0xec, 0x8b, 0x69, 0x66, 0x24, 0x44, 0xc9, 0x4a,
30453 ++ 0x57, 0xbb, 0x9b, 0x78, 0x13, 0xce, 0x9c, 0x0c,
30454 ++ 0xba, 0x92, 0x93, 0x63, 0xb8, 0xe2, 0x95, 0x0f,
30455 ++ 0x0f, 0x16, 0x39, 0x52, 0xfd, 0x3a, 0x6d, 0x02,
30456 ++ 0x4b, 0xdf, 0x13, 0xd3, 0x2a, 0x22, 0xb4, 0x03,
30457 ++ 0x7c, 0x54, 0x49, 0x96, 0x68, 0x54, 0x10, 0xfa,
30458 ++ 0xef, 0xaa, 0x6c, 0xe8, 0x22, 0xdc, 0x71, 0x16,
30459 ++ 0x13, 0x1a, 0xf6, 0x28, 0xe5, 0x6d, 0x77, 0x3d,
30460 ++ 0xcd, 0x30, 0x63, 0xb1, 0x70, 0x52, 0xa1, 0xc5,
30461 ++ 0x94, 0x5f, 0xcf, 0xe8, 0xb8, 0x26, 0x98, 0xf7,
30462 ++ 0x06, 0xa0, 0x0a, 0x70, 0xfa, 0x03, 0x80, 0xac,
30463 ++ 0xc1, 0xec, 0xd6, 0x4c, 0x54, 0xd7, 0xfe, 0x47,
30464 ++ 0xb6, 0x88, 0x4a, 0xf7, 0x71, 0x24, 0xee, 0xf3,
30465 ++ 0xd2, 0xc2, 0x4a, 0x7f, 0xfe, 0x61, 0xc7, 0x35,
30466 ++ 0xc9, 0x37, 0x67, 0xcb, 0x24, 0x35, 0xda, 0x7e,
30467 ++ 0xca, 0x5f, 0xf3, 0x8d, 0xd4, 0x13, 0x8e, 0xd6,
30468 ++ 0xcb, 0x4d, 0x53, 0x8f, 0x53, 0x1f, 0xc0, 0x74,
30469 ++ 0xf7, 0x53, 0xb9, 0x5e, 0x23, 0x37, 0xba, 0x6e,
30470 ++ 0xe3, 0x9d, 0x07, 0x55, 0x25, 0x7b, 0xe6, 0x2a,
30471 ++ 0x64, 0xd1, 0x32, 0xdd, 0x54, 0x1b, 0x4b, 0xc0,
30472 ++ 0xe1, 0xd7, 0x69, 0x58, 0xf8, 0x93, 0x29, 0xc4,
30473 ++ 0xdd, 0x23, 0x2f, 0xa5, 0xfc, 0x9d, 0x7e, 0xf8,
30474 ++ 0xd4, 0x90, 0xcd, 0x82, 0x55, 0xdc, 0x16, 0x16,
30475 ++ 0x9f, 0x07, 0x52, 0x9b, 0x9d, 0x25, 0xed, 0x32,
30476 ++ 0xc5, 0x7b, 0xdf, 0xf6, 0x83, 0x46, 0x3d, 0x65,
30477 ++ 0xb7, 0xef, 0x87, 0x7a, 0x12, 0x69, 0x8f, 0x06,
30478 ++ 0x7c, 0x51, 0x15, 0x4a, 0x08, 0xe8, 0xac, 0x9a,
30479 ++ 0x0c, 0x24, 0xa7, 0x27, 0xd8, 0x46, 0x2f, 0xe7,
30480 ++ 0x01, 0x0e, 0x1c, 0xc6, 0x91, 0xb0, 0x6e, 0x85,
30481 ++ 0x65, 0xf0, 0x29, 0x0d, 0x2e, 0x6b, 0x3b, 0xfb,
30482 ++ 0x4b, 0xdf, 0xe4, 0x80, 0x93, 0x03, 0x66, 0x46,
30483 ++ 0x3e, 0x8a, 0x6e, 0xf3, 0x5e, 0x4d, 0x62, 0x0e,
30484 ++ 0x49, 0x05, 0xaf, 0xd4, 0xf8, 0x21, 0x20, 0x61,
30485 ++ 0x1d, 0x39, 0x17, 0xf4, 0x61, 0x47, 0x95, 0xfb,
30486 ++ 0x15, 0x2e, 0xb3, 0x4f, 0xd0, 0x5d, 0xf5, 0x7d,
30487 ++ 0x40, 0xda, 0x90, 0x3c, 0x6b, 0xcb, 0x17, 0x00,
30488 ++ 0x13, 0x3b, 0x64, 0x34, 0x1b, 0xf0, 0xf2, 0xe5,
30489 ++ 0x3b, 0xb2, 0xc7, 0xd3, 0x5f, 0x3a, 0x44, 0xa6,
30490 ++ 0x9b, 0xb7, 0x78, 0x0e, 0x42, 0x5d, 0x4c, 0xc1,
30491 ++ 0xe9, 0xd2, 0xcb, 0xb7, 0x78, 0xd1, 0xfe, 0x9a,
30492 ++ 0xb5, 0x07, 0xe9, 0xe0, 0xbe, 0xe2, 0x8a, 0xa7,
30493 ++ 0x01, 0x83, 0x00, 0x8c, 0x5c, 0x08, 0xe6, 0x63,
30494 ++ 0x12, 0x92, 0xb7, 0xb7, 0xa6, 0x19, 0x7d, 0x38,
30495 ++ 0x13, 0x38, 0x92, 0x87, 0x24, 0xf9, 0x48, 0xb3,
30496 ++ 0x5e, 0x87, 0x6a, 0x40, 0x39, 0x5c, 0x3f, 0xed,
30497 ++ 0x8f, 0xee, 0xdb, 0x15, 0x82, 0x06, 0xda, 0x49,
30498 ++ 0x21, 0x2b, 0xb5, 0xbf, 0x32, 0x7c, 0x9f, 0x42,
30499 ++ 0x28, 0x63, 0xcf, 0xaf, 0x1e, 0xf8, 0xc6, 0xa0,
30500 ++ 0xd1, 0x02, 0x43, 0x57, 0x62, 0xec, 0x9b, 0x0f,
30501 ++ 0x01, 0x9e, 0x71, 0xd8, 0x87, 0x9d, 0x01, 0xc1,
30502 ++ 0x58, 0x77, 0xd9, 0xaf, 0xb1, 0x10, 0x7e, 0xdd,
30503 ++ 0xa6, 0x50, 0x96, 0xe5, 0xf0, 0x72, 0x00, 0x6d,
30504 ++ 0x4b, 0xf8, 0x2a, 0x8f, 0x19, 0xf3, 0x22, 0x88,
30505 ++ 0x11, 0x4a, 0x8b, 0x7c, 0xfd, 0xb7, 0xed, 0xe1,
30506 ++ 0xf6, 0x40, 0x39, 0xe0, 0xe9, 0xf6, 0x3d, 0x25,
30507 ++ 0xe6, 0x74, 0x3c, 0x58, 0x57, 0x7f, 0xe1, 0x22,
30508 ++ 0x96, 0x47, 0x31, 0x91, 0xba, 0x70, 0x85, 0x28,
30509 ++ 0x6b, 0x9f, 0x6e, 0x25, 0xac, 0x23, 0x66, 0x2f,
30510 ++ 0x29, 0x88, 0x28, 0xce, 0x8c, 0x5c, 0x88, 0x53,
30511 ++ 0xd1, 0x3b, 0xcc, 0x6a, 0x51, 0xb2, 0xe1, 0x28,
30512 ++ 0x3f, 0x91, 0xb4, 0x0d, 0x00, 0x3a, 0xe3, 0xf8,
30513 ++ 0xc3, 0x8f, 0xd7, 0x96, 0x62, 0x0e, 0x2e, 0xfc,
30514 ++ 0xc8, 0x6c, 0x77, 0xa6, 0x1d, 0x22, 0xc1, 0xb8,
30515 ++ 0xe6, 0x61, 0xd7, 0x67, 0x36, 0x13, 0x7b, 0xbb,
30516 ++ 0x9b, 0x59, 0x09, 0xa6, 0xdf, 0xf7, 0x6b, 0xa3,
30517 ++ 0x40, 0x1a, 0xf5, 0x4f, 0xb4, 0xda, 0xd3, 0xf3,
30518 ++ 0x81, 0x93, 0xc6, 0x18, 0xd9, 0x26, 0xee, 0xac,
30519 ++ 0xf0, 0xaa, 0xdf, 0xc5, 0x9c, 0xca, 0xc2, 0xa2,
30520 ++ 0xcc, 0x7b, 0x5c, 0x24, 0xb0, 0xbc, 0xd0, 0x6a,
30521 ++ 0x4d, 0x89, 0x09, 0xb8, 0x07, 0xfe, 0x87, 0xad,
30522 ++ 0x0a, 0xea, 0xb8, 0x42, 0xf9, 0x5e, 0xb3, 0x3e,
30523 ++ 0x36, 0x4c, 0xaf, 0x75, 0x9e, 0x1c, 0xeb, 0xbd,
30524 ++ 0xbc, 0xbb, 0x80, 0x40, 0xa7, 0x3a, 0x30, 0xbf,
30525 ++ 0xa8, 0x44, 0xf4, 0xeb, 0x38, 0xad, 0x29, 0xba,
30526 ++ 0x23, 0xed, 0x41, 0x0c, 0xea, 0xd2, 0xbb, 0x41,
30527 ++ 0x18, 0xd6, 0xb9, 0xba, 0x65, 0x2b, 0xa3, 0x91,
30528 ++ 0x6d, 0x1f, 0xa9, 0xf4, 0xd1, 0x25, 0x8d, 0x4d,
30529 ++ 0x38, 0xff, 0x64, 0xa0, 0xec, 0xde, 0xa6, 0xb6,
30530 ++ 0x79, 0xab, 0x8e, 0x33, 0x6c, 0x47, 0xde, 0xaf,
30531 ++ 0x94, 0xa4, 0xa5, 0x86, 0x77, 0x55, 0x09, 0x92,
30532 ++ 0x81, 0x31, 0x76, 0xc7, 0x34, 0x22, 0x89, 0x8e,
30533 ++ 0x3d, 0x26, 0x26, 0xd7, 0xfc, 0x1e, 0x16, 0x72,
30534 ++ 0x13, 0x33, 0x63, 0xd5, 0x22, 0xbe, 0xb8, 0x04,
30535 ++ 0x34, 0x84, 0x41, 0xbb, 0x80, 0xd0, 0x9f, 0x46,
30536 ++ 0x48, 0x07, 0xa7, 0xfc, 0x2b, 0x3a, 0x75, 0x55,
30537 ++ 0x8c, 0xc7, 0x6a, 0xbd, 0x7e, 0x46, 0x08, 0x84,
30538 ++ 0x0f, 0xd5, 0x74, 0xc0, 0x82, 0x8e, 0xaa, 0x61,
30539 ++ 0x05, 0x01, 0xb2, 0x47, 0x6e, 0x20, 0x6a, 0x2d,
30540 ++ 0x58, 0x70, 0x48, 0x32, 0xa7, 0x37, 0xd2, 0xb8,
30541 ++ 0x82, 0x1a, 0x51, 0xb9, 0x61, 0xdd, 0xfd, 0x9d,
30542 ++ 0x6b, 0x0e, 0x18, 0x97, 0xf8, 0x45, 0x5f, 0x87,
30543 ++ 0x10, 0xcf, 0x34, 0x72, 0x45, 0x26, 0x49, 0x70,
30544 ++ 0xe7, 0xa3, 0x78, 0xe0, 0x52, 0x89, 0x84, 0x94,
30545 ++ 0x83, 0x82, 0xc2, 0x69, 0x8f, 0xe3, 0xe1, 0x3f,
30546 ++ 0x60, 0x74, 0x88, 0xc4, 0xf7, 0x75, 0x2c, 0xfb,
30547 ++ 0xbd, 0xb6, 0xc4, 0x7e, 0x10, 0x0a, 0x6c, 0x90,
30548 ++ 0x04, 0x9e, 0xc3, 0x3f, 0x59, 0x7c, 0xce, 0x31,
30549 ++ 0x18, 0x60, 0x57, 0x73, 0x46, 0x94, 0x7d, 0x06,
30550 ++ 0xa0, 0x6d, 0x44, 0xec, 0xa2, 0x0a, 0x9e, 0x05,
30551 ++ 0x15, 0xef, 0xca, 0x5c, 0xbf, 0x00, 0xeb, 0xf7,
30552 ++ 0x3d, 0x32, 0xd4, 0xa5, 0xef, 0x49, 0x89, 0x5e,
30553 ++ 0x46, 0xb0, 0xa6, 0x63, 0x5b, 0x8a, 0x73, 0xae,
30554 ++ 0x6f, 0xd5, 0x9d, 0xf8, 0x4f, 0x40, 0xb5, 0xb2,
30555 ++ 0x6e, 0xd3, 0xb6, 0x01, 0xa9, 0x26, 0xa2, 0x21,
30556 ++ 0xcf, 0x33, 0x7a, 0x3a, 0xa4, 0x23, 0x13, 0xb0,
30557 ++ 0x69, 0x6a, 0xee, 0xce, 0xd8, 0x9d, 0x01, 0x1d,
30558 ++ 0x50, 0xc1, 0x30, 0x6c, 0xb1, 0xcd, 0xa0, 0xf0,
30559 ++ 0xf0, 0xa2, 0x64, 0x6f, 0xbb, 0xbf, 0x5e, 0xe6,
30560 ++ 0xab, 0x87, 0xb4, 0x0f, 0x4f, 0x15, 0xaf, 0xb5,
30561 ++ 0x25, 0xa1, 0xb2, 0xd0, 0x80, 0x2c, 0xfb, 0xf9,
30562 ++ 0xfe, 0xd2, 0x33, 0xbb, 0x76, 0xfe, 0x7c, 0xa8,
30563 ++ 0x66, 0xf7, 0xe7, 0x85, 0x9f, 0x1f, 0x85, 0x57,
30564 ++ 0x88, 0xe1, 0xe9, 0x63, 0xe4, 0xd8, 0x1c, 0xa1,
30565 ++ 0xfb, 0xda, 0x44, 0x05, 0x2e, 0x1d, 0x3a, 0x1c,
30566 ++ 0xff, 0xc8, 0x3b, 0xc0, 0xfe, 0xda, 0x22, 0x0b,
30567 ++ 0x43, 0xd6, 0x88, 0x39, 0x4c, 0x4a, 0xa6, 0x69,
30568 ++ 0x18, 0x93, 0x42, 0x4e, 0xb5, 0xcc, 0x66, 0x0d,
30569 ++ 0x09, 0xf8, 0x1e, 0x7c, 0xd3, 0x3c, 0x99, 0x0d,
30570 ++ 0x50, 0x1d, 0x62, 0xe9, 0x57, 0x06, 0xbf, 0x19,
30571 ++ 0x88, 0xdd, 0xad, 0x7b, 0x4f, 0xf9, 0xc7, 0x82,
30572 ++ 0x6d, 0x8d, 0xc8, 0xc4, 0xc5, 0x78, 0x17, 0x20,
30573 ++ 0x15, 0xc5, 0x52, 0x41, 0xcf, 0x5b, 0xd6, 0x7f,
30574 ++ 0x94, 0x02, 0x41, 0xe0, 0x40, 0x22, 0x03, 0x5e,
30575 ++ 0xd1, 0x53, 0xd4, 0x86, 0xd3, 0x2c, 0x9f, 0x0f,
30576 ++ 0x96, 0xe3, 0x6b, 0x9a, 0x76, 0x32, 0x06, 0x47,
30577 ++ 0x4b, 0x11, 0xb3, 0xdd, 0x03, 0x65, 0xbd, 0x9b,
30578 ++ 0x01, 0xda, 0x9c, 0xb9, 0x7e, 0x3f, 0x6a, 0xc4,
30579 ++ 0x7b, 0xea, 0xd4, 0x3c, 0xb9, 0xfb, 0x5c, 0x6b,
30580 ++ 0x64, 0x33, 0x52, 0xba, 0x64, 0x78, 0x8f, 0xa4,
30581 ++ 0xaf, 0x7a, 0x61, 0x8d, 0xbc, 0xc5, 0x73, 0xe9,
30582 ++ 0x6b, 0x58, 0x97, 0x4b, 0xbf, 0x63, 0x22, 0xd3,
30583 ++ 0x37, 0x02, 0x54, 0xc5, 0xb9, 0x16, 0x4a, 0xf0,
30584 ++ 0x19, 0xd8, 0x94, 0x57, 0xb8, 0x8a, 0xb3, 0x16,
30585 ++ 0x3b, 0xd0, 0x84, 0x8e, 0x67, 0xa6, 0xa3, 0x7d,
30586 ++ 0x78, 0xec, 0x00
30587 ++};
30588 ++static const u8 dec_assoc013[] __initconst = {
30589 ++ 0xb1, 0x69, 0x83, 0x87, 0x30, 0xaa, 0x5d, 0xb8,
30590 ++ 0x77, 0xe8, 0x21, 0xff, 0x06, 0x59, 0x35, 0xce,
30591 ++ 0x75, 0xfe, 0x38, 0xef, 0xb8, 0x91, 0x43, 0x8c,
30592 ++ 0xcf, 0x70, 0xdd, 0x0a, 0x68, 0xbf, 0xd4, 0xbc,
30593 ++ 0x16, 0x76, 0x99, 0x36, 0x1e, 0x58, 0x79, 0x5e,
30594 ++ 0xd4, 0x29, 0xf7, 0x33, 0x93, 0x48, 0xdb, 0x5f,
30595 ++ 0x01, 0xae, 0x9c, 0xb6, 0xe4, 0x88, 0x6d, 0x2b,
30596 ++ 0x76, 0x75, 0xe0, 0xf3, 0x74, 0xe2, 0xc9
30597 ++};
30598 ++static const u8 dec_nonce013[] __initconst = {
30599 ++ 0x05, 0xa3, 0x93, 0xed, 0x30, 0xc5, 0xa2, 0x06
30600 ++};
30601 ++static const u8 dec_key013[] __initconst = {
30602 ++ 0xb3, 0x35, 0x50, 0x03, 0x54, 0x2e, 0x40, 0x5e,
30603 ++ 0x8f, 0x59, 0x8e, 0xc5, 0x90, 0xd5, 0x27, 0x2d,
30604 ++ 0xba, 0x29, 0x2e, 0xcb, 0x1b, 0x70, 0x44, 0x1e,
30605 ++ 0x65, 0x91, 0x6e, 0x2a, 0x79, 0x22, 0xda, 0x64
30606 ++};
30607 ++
30608 ++static const struct chacha20poly1305_testvec
30609 ++chacha20poly1305_dec_vectors[] __initconst = {
30610 ++ { dec_input001, dec_output001, dec_assoc001, dec_nonce001, dec_key001,
30611 ++ sizeof(dec_input001), sizeof(dec_assoc001), sizeof(dec_nonce001) },
30612 ++ { dec_input002, dec_output002, dec_assoc002, dec_nonce002, dec_key002,
30613 ++ sizeof(dec_input002), sizeof(dec_assoc002), sizeof(dec_nonce002) },
30614 ++ { dec_input003, dec_output003, dec_assoc003, dec_nonce003, dec_key003,
30615 ++ sizeof(dec_input003), sizeof(dec_assoc003), sizeof(dec_nonce003) },
30616 ++ { dec_input004, dec_output004, dec_assoc004, dec_nonce004, dec_key004,
30617 ++ sizeof(dec_input004), sizeof(dec_assoc004), sizeof(dec_nonce004) },
30618 ++ { dec_input005, dec_output005, dec_assoc005, dec_nonce005, dec_key005,
30619 ++ sizeof(dec_input005), sizeof(dec_assoc005), sizeof(dec_nonce005) },
30620 ++ { dec_input006, dec_output006, dec_assoc006, dec_nonce006, dec_key006,
30621 ++ sizeof(dec_input006), sizeof(dec_assoc006), sizeof(dec_nonce006) },
30622 ++ { dec_input007, dec_output007, dec_assoc007, dec_nonce007, dec_key007,
30623 ++ sizeof(dec_input007), sizeof(dec_assoc007), sizeof(dec_nonce007) },
30624 ++ { dec_input008, dec_output008, dec_assoc008, dec_nonce008, dec_key008,
30625 ++ sizeof(dec_input008), sizeof(dec_assoc008), sizeof(dec_nonce008) },
30626 ++ { dec_input009, dec_output009, dec_assoc009, dec_nonce009, dec_key009,
30627 ++ sizeof(dec_input009), sizeof(dec_assoc009), sizeof(dec_nonce009) },
30628 ++ { dec_input010, dec_output010, dec_assoc010, dec_nonce010, dec_key010,
30629 ++ sizeof(dec_input010), sizeof(dec_assoc010), sizeof(dec_nonce010) },
30630 ++ { dec_input011, dec_output011, dec_assoc011, dec_nonce011, dec_key011,
30631 ++ sizeof(dec_input011), sizeof(dec_assoc011), sizeof(dec_nonce011) },
30632 ++ { dec_input012, dec_output012, dec_assoc012, dec_nonce012, dec_key012,
30633 ++ sizeof(dec_input012), sizeof(dec_assoc012), sizeof(dec_nonce012) },
30634 ++ { dec_input013, dec_output013, dec_assoc013, dec_nonce013, dec_key013,
30635 ++ sizeof(dec_input013), sizeof(dec_assoc013), sizeof(dec_nonce013),
30636 ++ true }
30637 ++};
30638 ++
30639 ++static const u8 xenc_input001[] __initconst = {
30640 ++ 0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74,
30641 ++ 0x2d, 0x44, 0x72, 0x61, 0x66, 0x74, 0x73, 0x20,
30642 ++ 0x61, 0x72, 0x65, 0x20, 0x64, 0x72, 0x61, 0x66,
30643 ++ 0x74, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65,
30644 ++ 0x6e, 0x74, 0x73, 0x20, 0x76, 0x61, 0x6c, 0x69,
30645 ++ 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x20,
30646 ++ 0x6d, 0x61, 0x78, 0x69, 0x6d, 0x75, 0x6d, 0x20,
30647 ++ 0x6f, 0x66, 0x20, 0x73, 0x69, 0x78, 0x20, 0x6d,
30648 ++ 0x6f, 0x6e, 0x74, 0x68, 0x73, 0x20, 0x61, 0x6e,
30649 ++ 0x64, 0x20, 0x6d, 0x61, 0x79, 0x20, 0x62, 0x65,
30650 ++ 0x20, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64,
30651 ++ 0x2c, 0x20, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63,
30652 ++ 0x65, 0x64, 0x2c, 0x20, 0x6f, 0x72, 0x20, 0x6f,
30653 ++ 0x62, 0x73, 0x6f, 0x6c, 0x65, 0x74, 0x65, 0x64,
30654 ++ 0x20, 0x62, 0x79, 0x20, 0x6f, 0x74, 0x68, 0x65,
30655 ++ 0x72, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65,
30656 ++ 0x6e, 0x74, 0x73, 0x20, 0x61, 0x74, 0x20, 0x61,
30657 ++ 0x6e, 0x79, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x2e,
30658 ++ 0x20, 0x49, 0x74, 0x20, 0x69, 0x73, 0x20, 0x69,
30659 ++ 0x6e, 0x61, 0x70, 0x70, 0x72, 0x6f, 0x70, 0x72,
30660 ++ 0x69, 0x61, 0x74, 0x65, 0x20, 0x74, 0x6f, 0x20,
30661 ++ 0x75, 0x73, 0x65, 0x20, 0x49, 0x6e, 0x74, 0x65,
30662 ++ 0x72, 0x6e, 0x65, 0x74, 0x2d, 0x44, 0x72, 0x61,
30663 ++ 0x66, 0x74, 0x73, 0x20, 0x61, 0x73, 0x20, 0x72,
30664 ++ 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65,
30665 ++ 0x20, 0x6d, 0x61, 0x74, 0x65, 0x72, 0x69, 0x61,
30666 ++ 0x6c, 0x20, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20,
30667 ++ 0x63, 0x69, 0x74, 0x65, 0x20, 0x74, 0x68, 0x65,
30668 ++ 0x6d, 0x20, 0x6f, 0x74, 0x68, 0x65, 0x72, 0x20,
30669 ++ 0x74, 0x68, 0x61, 0x6e, 0x20, 0x61, 0x73, 0x20,
30670 ++ 0x2f, 0xe2, 0x80, 0x9c, 0x77, 0x6f, 0x72, 0x6b,
30671 ++ 0x20, 0x69, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x67,
30672 ++ 0x72, 0x65, 0x73, 0x73, 0x2e, 0x2f, 0xe2, 0x80,
30673 ++ 0x9d
30674 ++};
30675 ++static const u8 xenc_output001[] __initconst = {
30676 ++ 0x1a, 0x6e, 0x3a, 0xd9, 0xfd, 0x41, 0x3f, 0x77,
30677 ++ 0x54, 0x72, 0x0a, 0x70, 0x9a, 0xa0, 0x29, 0x92,
30678 ++ 0x2e, 0xed, 0x93, 0xcf, 0x0f, 0x71, 0x88, 0x18,
30679 ++ 0x7a, 0x9d, 0x2d, 0x24, 0xe0, 0xf5, 0xea, 0x3d,
30680 ++ 0x55, 0x64, 0xd7, 0xad, 0x2a, 0x1a, 0x1f, 0x7e,
30681 ++ 0x86, 0x6d, 0xb0, 0xce, 0x80, 0x41, 0x72, 0x86,
30682 ++ 0x26, 0xee, 0x84, 0xd7, 0xef, 0x82, 0x9e, 0xe2,
30683 ++ 0x60, 0x9d, 0x5a, 0xfc, 0xf0, 0xe4, 0x19, 0x85,
30684 ++ 0xea, 0x09, 0xc6, 0xfb, 0xb3, 0xa9, 0x50, 0x09,
30685 ++ 0xec, 0x5e, 0x11, 0x90, 0xa1, 0xc5, 0x4e, 0x49,
30686 ++ 0xef, 0x50, 0xd8, 0x8f, 0xe0, 0x78, 0xd7, 0xfd,
30687 ++ 0xb9, 0x3b, 0xc9, 0xf2, 0x91, 0xc8, 0x25, 0xc8,
30688 ++ 0xa7, 0x63, 0x60, 0xce, 0x10, 0xcd, 0xc6, 0x7f,
30689 ++ 0xf8, 0x16, 0xf8, 0xe1, 0x0a, 0xd9, 0xde, 0x79,
30690 ++ 0x50, 0x33, 0xf2, 0x16, 0x0f, 0x17, 0xba, 0xb8,
30691 ++ 0x5d, 0xd8, 0xdf, 0x4e, 0x51, 0xa8, 0x39, 0xd0,
30692 ++ 0x85, 0xca, 0x46, 0x6a, 0x10, 0xa7, 0xa3, 0x88,
30693 ++ 0xef, 0x79, 0xb9, 0xf8, 0x24, 0xf3, 0xe0, 0x71,
30694 ++ 0x7b, 0x76, 0x28, 0x46, 0x3a, 0x3a, 0x1b, 0x91,
30695 ++ 0xb6, 0xd4, 0x3e, 0x23, 0xe5, 0x44, 0x15, 0xbf,
30696 ++ 0x60, 0x43, 0x9d, 0xa4, 0xbb, 0xd5, 0x5f, 0x89,
30697 ++ 0xeb, 0xef, 0x8e, 0xfd, 0xdd, 0xb4, 0x0d, 0x46,
30698 ++ 0xf0, 0x69, 0x23, 0x63, 0xae, 0x94, 0xf5, 0x5e,
30699 ++ 0xa5, 0xad, 0x13, 0x1c, 0x41, 0x76, 0xe6, 0x90,
30700 ++ 0xd6, 0x6d, 0xa2, 0x8f, 0x97, 0x4c, 0xa8, 0x0b,
30701 ++ 0xcf, 0x8d, 0x43, 0x2b, 0x9c, 0x9b, 0xc5, 0x58,
30702 ++ 0xa5, 0xb6, 0x95, 0x9a, 0xbf, 0x81, 0xc6, 0x54,
30703 ++ 0xc9, 0x66, 0x0c, 0xe5, 0x4f, 0x6a, 0x53, 0xa1,
30704 ++ 0xe5, 0x0c, 0xba, 0x31, 0xde, 0x34, 0x64, 0x73,
30705 ++ 0x8a, 0x3b, 0xbd, 0x92, 0x01, 0xdb, 0x71, 0x69,
30706 ++ 0xf3, 0x58, 0x99, 0xbc, 0xd1, 0xcb, 0x4a, 0x05,
30707 ++ 0xe2, 0x58, 0x9c, 0x25, 0x17, 0xcd, 0xdc, 0x83,
30708 ++ 0xb7, 0xff, 0xfb, 0x09, 0x61, 0xad, 0xbf, 0x13,
30709 ++ 0x5b, 0x5e, 0xed, 0x46, 0x82, 0x6f, 0x22, 0xd8,
30710 ++ 0x93, 0xa6, 0x85, 0x5b, 0x40, 0x39, 0x5c, 0xc5,
30711 ++ 0x9c
30712 ++};
30713 ++static const u8 xenc_assoc001[] __initconst = {
30714 ++ 0xf3, 0x33, 0x88, 0x86, 0x00, 0x00, 0x00, 0x00,
30715 ++ 0x00, 0x00, 0x4e, 0x91
30716 ++};
30717 ++static const u8 xenc_nonce001[] __initconst = {
30718 ++ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
30719 ++ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
30720 ++ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
30721 ++};
30722 ++static const u8 xenc_key001[] __initconst = {
30723 ++ 0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a,
30724 ++ 0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0,
30725 ++ 0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09,
30726 ++ 0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0
30727 ++};
30728 ++
30729 ++static const struct chacha20poly1305_testvec
30730 ++xchacha20poly1305_enc_vectors[] __initconst = {
30731 ++ { xenc_input001, xenc_output001, xenc_assoc001, xenc_nonce001, xenc_key001,
30732 ++ sizeof(xenc_input001), sizeof(xenc_assoc001), sizeof(xenc_nonce001) }
30733 ++};
30734 ++
30735 ++static const u8 xdec_input001[] __initconst = {
30736 ++ 0x1a, 0x6e, 0x3a, 0xd9, 0xfd, 0x41, 0x3f, 0x77,
30737 ++ 0x54, 0x72, 0x0a, 0x70, 0x9a, 0xa0, 0x29, 0x92,
30738 ++ 0x2e, 0xed, 0x93, 0xcf, 0x0f, 0x71, 0x88, 0x18,
30739 ++ 0x7a, 0x9d, 0x2d, 0x24, 0xe0, 0xf5, 0xea, 0x3d,
30740 ++ 0x55, 0x64, 0xd7, 0xad, 0x2a, 0x1a, 0x1f, 0x7e,
30741 ++ 0x86, 0x6d, 0xb0, 0xce, 0x80, 0x41, 0x72, 0x86,
30742 ++ 0x26, 0xee, 0x84, 0xd7, 0xef, 0x82, 0x9e, 0xe2,
30743 ++ 0x60, 0x9d, 0x5a, 0xfc, 0xf0, 0xe4, 0x19, 0x85,
30744 ++ 0xea, 0x09, 0xc6, 0xfb, 0xb3, 0xa9, 0x50, 0x09,
30745 ++ 0xec, 0x5e, 0x11, 0x90, 0xa1, 0xc5, 0x4e, 0x49,
30746 ++ 0xef, 0x50, 0xd8, 0x8f, 0xe0, 0x78, 0xd7, 0xfd,
30747 ++ 0xb9, 0x3b, 0xc9, 0xf2, 0x91, 0xc8, 0x25, 0xc8,
30748 ++ 0xa7, 0x63, 0x60, 0xce, 0x10, 0xcd, 0xc6, 0x7f,
30749 ++ 0xf8, 0x16, 0xf8, 0xe1, 0x0a, 0xd9, 0xde, 0x79,
30750 ++ 0x50, 0x33, 0xf2, 0x16, 0x0f, 0x17, 0xba, 0xb8,
30751 ++ 0x5d, 0xd8, 0xdf, 0x4e, 0x51, 0xa8, 0x39, 0xd0,
30752 ++ 0x85, 0xca, 0x46, 0x6a, 0x10, 0xa7, 0xa3, 0x88,
30753 ++ 0xef, 0x79, 0xb9, 0xf8, 0x24, 0xf3, 0xe0, 0x71,
30754 ++ 0x7b, 0x76, 0x28, 0x46, 0x3a, 0x3a, 0x1b, 0x91,
30755 ++ 0xb6, 0xd4, 0x3e, 0x23, 0xe5, 0x44, 0x15, 0xbf,
30756 ++ 0x60, 0x43, 0x9d, 0xa4, 0xbb, 0xd5, 0x5f, 0x89,
30757 ++ 0xeb, 0xef, 0x8e, 0xfd, 0xdd, 0xb4, 0x0d, 0x46,
30758 ++ 0xf0, 0x69, 0x23, 0x63, 0xae, 0x94, 0xf5, 0x5e,
30759 ++ 0xa5, 0xad, 0x13, 0x1c, 0x41, 0x76, 0xe6, 0x90,
30760 ++ 0xd6, 0x6d, 0xa2, 0x8f, 0x97, 0x4c, 0xa8, 0x0b,
30761 ++ 0xcf, 0x8d, 0x43, 0x2b, 0x9c, 0x9b, 0xc5, 0x58,
30762 ++ 0xa5, 0xb6, 0x95, 0x9a, 0xbf, 0x81, 0xc6, 0x54,
30763 ++ 0xc9, 0x66, 0x0c, 0xe5, 0x4f, 0x6a, 0x53, 0xa1,
30764 ++ 0xe5, 0x0c, 0xba, 0x31, 0xde, 0x34, 0x64, 0x73,
30765 ++ 0x8a, 0x3b, 0xbd, 0x92, 0x01, 0xdb, 0x71, 0x69,
30766 ++ 0xf3, 0x58, 0x99, 0xbc, 0xd1, 0xcb, 0x4a, 0x05,
30767 ++ 0xe2, 0x58, 0x9c, 0x25, 0x17, 0xcd, 0xdc, 0x83,
30768 ++ 0xb7, 0xff, 0xfb, 0x09, 0x61, 0xad, 0xbf, 0x13,
30769 ++ 0x5b, 0x5e, 0xed, 0x46, 0x82, 0x6f, 0x22, 0xd8,
30770 ++ 0x93, 0xa6, 0x85, 0x5b, 0x40, 0x39, 0x5c, 0xc5,
30771 ++ 0x9c
30772 ++};
30773 ++static const u8 xdec_output001[] __initconst = {
30774 ++ 0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65, 0x74,
30775 ++ 0x2d, 0x44, 0x72, 0x61, 0x66, 0x74, 0x73, 0x20,
30776 ++ 0x61, 0x72, 0x65, 0x20, 0x64, 0x72, 0x61, 0x66,
30777 ++ 0x74, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65,
30778 ++ 0x6e, 0x74, 0x73, 0x20, 0x76, 0x61, 0x6c, 0x69,
30779 ++ 0x64, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x20,
30780 ++ 0x6d, 0x61, 0x78, 0x69, 0x6d, 0x75, 0x6d, 0x20,
30781 ++ 0x6f, 0x66, 0x20, 0x73, 0x69, 0x78, 0x20, 0x6d,
30782 ++ 0x6f, 0x6e, 0x74, 0x68, 0x73, 0x20, 0x61, 0x6e,
30783 ++ 0x64, 0x20, 0x6d, 0x61, 0x79, 0x20, 0x62, 0x65,
30784 ++ 0x20, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64,
30785 ++ 0x2c, 0x20, 0x72, 0x65, 0x70, 0x6c, 0x61, 0x63,
30786 ++ 0x65, 0x64, 0x2c, 0x20, 0x6f, 0x72, 0x20, 0x6f,
30787 ++ 0x62, 0x73, 0x6f, 0x6c, 0x65, 0x74, 0x65, 0x64,
30788 ++ 0x20, 0x62, 0x79, 0x20, 0x6f, 0x74, 0x68, 0x65,
30789 ++ 0x72, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65,
30790 ++ 0x6e, 0x74, 0x73, 0x20, 0x61, 0x74, 0x20, 0x61,
30791 ++ 0x6e, 0x79, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x2e,
30792 ++ 0x20, 0x49, 0x74, 0x20, 0x69, 0x73, 0x20, 0x69,
30793 ++ 0x6e, 0x61, 0x70, 0x70, 0x72, 0x6f, 0x70, 0x72,
30794 ++ 0x69, 0x61, 0x74, 0x65, 0x20, 0x74, 0x6f, 0x20,
30795 ++ 0x75, 0x73, 0x65, 0x20, 0x49, 0x6e, 0x74, 0x65,
30796 ++ 0x72, 0x6e, 0x65, 0x74, 0x2d, 0x44, 0x72, 0x61,
30797 ++ 0x66, 0x74, 0x73, 0x20, 0x61, 0x73, 0x20, 0x72,
30798 ++ 0x65, 0x66, 0x65, 0x72, 0x65, 0x6e, 0x63, 0x65,
30799 ++ 0x20, 0x6d, 0x61, 0x74, 0x65, 0x72, 0x69, 0x61,
30800 ++ 0x6c, 0x20, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20,
30801 ++ 0x63, 0x69, 0x74, 0x65, 0x20, 0x74, 0x68, 0x65,
30802 ++ 0x6d, 0x20, 0x6f, 0x74, 0x68, 0x65, 0x72, 0x20,
30803 ++ 0x74, 0x68, 0x61, 0x6e, 0x20, 0x61, 0x73, 0x20,
30804 ++ 0x2f, 0xe2, 0x80, 0x9c, 0x77, 0x6f, 0x72, 0x6b,
30805 ++ 0x20, 0x69, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x67,
30806 ++ 0x72, 0x65, 0x73, 0x73, 0x2e, 0x2f, 0xe2, 0x80,
30807 ++ 0x9d
30808 ++};
30809 ++static const u8 xdec_assoc001[] __initconst = {
30810 ++ 0xf3, 0x33, 0x88, 0x86, 0x00, 0x00, 0x00, 0x00,
30811 ++ 0x00, 0x00, 0x4e, 0x91
30812 ++};
30813 ++static const u8 xdec_nonce001[] __initconst = {
30814 ++ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
30815 ++ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
30816 ++ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
30817 ++};
30818 ++static const u8 xdec_key001[] __initconst = {
30819 ++ 0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a,
30820 ++ 0xf3, 0x33, 0x88, 0x86, 0x04, 0xf6, 0xb5, 0xf0,
30821 ++ 0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09,
30822 ++ 0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0
30823 ++};
30824 ++
30825 ++static const struct chacha20poly1305_testvec
30826 ++xchacha20poly1305_dec_vectors[] __initconst = {
30827 ++ { xdec_input001, xdec_output001, xdec_assoc001, xdec_nonce001, xdec_key001,
30828 ++ sizeof(xdec_input001), sizeof(xdec_assoc001), sizeof(xdec_nonce001) }
30829 ++};
30830 ++
30831 ++static void __init
30832 ++chacha20poly1305_selftest_encrypt(u8 *dst, const u8 *src, const size_t src_len,
30833 ++ const u8 *ad, const size_t ad_len,
30834 ++ const u8 *nonce, const size_t nonce_len,
30835 ++ const u8 key[CHACHA20POLY1305_KEY_SIZE])
30836 ++{
30837 ++ if (nonce_len == 8)
30838 ++ chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len,
30839 ++ get_unaligned_le64(nonce), key);
30840 ++ else
30841 ++ BUG();
30842 ++}
30843 ++
30844 ++static bool __init
30845 ++decryption_success(bool func_ret, bool expect_failure, int memcmp_result)
30846 ++{
30847 ++ if (expect_failure)
30848 ++ return !func_ret;
30849 ++ return func_ret && !memcmp_result;
30850 ++}
30851 ++
30852 ++bool __init chacha20poly1305_selftest(void)
30853 ++{
30854 ++ enum { MAXIMUM_TEST_BUFFER_LEN = 1UL << 12 };
30855 ++ size_t i;
30856 ++ u8 *computed_output = NULL, *heap_src = NULL;
30857 ++ bool success = true, ret;
30858 ++
30859 ++ heap_src = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL);
30860 ++ computed_output = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL);
30861 ++ if (!heap_src || !computed_output) {
30862 ++ pr_err("chacha20poly1305 self-test malloc: FAIL\n");
30863 ++ success = false;
30864 ++ goto out;
30865 ++ }
30866 ++
30867 ++ for (i = 0; i < ARRAY_SIZE(chacha20poly1305_enc_vectors); ++i) {
30868 ++ memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN);
30869 ++ chacha20poly1305_selftest_encrypt(computed_output,
30870 ++ chacha20poly1305_enc_vectors[i].input,
30871 ++ chacha20poly1305_enc_vectors[i].ilen,
30872 ++ chacha20poly1305_enc_vectors[i].assoc,
30873 ++ chacha20poly1305_enc_vectors[i].alen,
30874 ++ chacha20poly1305_enc_vectors[i].nonce,
30875 ++ chacha20poly1305_enc_vectors[i].nlen,
30876 ++ chacha20poly1305_enc_vectors[i].key);
30877 ++ if (memcmp(computed_output,
30878 ++ chacha20poly1305_enc_vectors[i].output,
30879 ++ chacha20poly1305_enc_vectors[i].ilen +
30880 ++ POLY1305_DIGEST_SIZE)) {
30881 ++ pr_err("chacha20poly1305 encryption self-test %zu: FAIL\n",
30882 ++ i + 1);
30883 ++ success = false;
30884 ++ }
30885 ++ }
30886 ++
30887 ++ for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) {
30888 ++ memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN);
30889 ++ ret = chacha20poly1305_decrypt(computed_output,
30890 ++ chacha20poly1305_dec_vectors[i].input,
30891 ++ chacha20poly1305_dec_vectors[i].ilen,
30892 ++ chacha20poly1305_dec_vectors[i].assoc,
30893 ++ chacha20poly1305_dec_vectors[i].alen,
30894 ++ get_unaligned_le64(chacha20poly1305_dec_vectors[i].nonce),
30895 ++ chacha20poly1305_dec_vectors[i].key);
30896 ++ if (!decryption_success(ret,
30897 ++ chacha20poly1305_dec_vectors[i].failure,
30898 ++ memcmp(computed_output,
30899 ++ chacha20poly1305_dec_vectors[i].output,
30900 ++ chacha20poly1305_dec_vectors[i].ilen -
30901 ++ POLY1305_DIGEST_SIZE))) {
30902 ++ pr_err("chacha20poly1305 decryption self-test %zu: FAIL\n",
30903 ++ i + 1);
30904 ++ success = false;
30905 ++ }
30906 ++ }
30907 ++
30908 ++
30909 ++ for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_enc_vectors); ++i) {
30910 ++ memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN);
30911 ++ xchacha20poly1305_encrypt(computed_output,
30912 ++ xchacha20poly1305_enc_vectors[i].input,
30913 ++ xchacha20poly1305_enc_vectors[i].ilen,
30914 ++ xchacha20poly1305_enc_vectors[i].assoc,
30915 ++ xchacha20poly1305_enc_vectors[i].alen,
30916 ++ xchacha20poly1305_enc_vectors[i].nonce,
30917 ++ xchacha20poly1305_enc_vectors[i].key);
30918 ++ if (memcmp(computed_output,
30919 ++ xchacha20poly1305_enc_vectors[i].output,
30920 ++ xchacha20poly1305_enc_vectors[i].ilen +
30921 ++ POLY1305_DIGEST_SIZE)) {
30922 ++ pr_err("xchacha20poly1305 encryption self-test %zu: FAIL\n",
30923 ++ i + 1);
30924 ++ success = false;
30925 ++ }
30926 ++ }
30927 ++ for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_dec_vectors); ++i) {
30928 ++ memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN);
30929 ++ ret = xchacha20poly1305_decrypt(computed_output,
30930 ++ xchacha20poly1305_dec_vectors[i].input,
30931 ++ xchacha20poly1305_dec_vectors[i].ilen,
30932 ++ xchacha20poly1305_dec_vectors[i].assoc,
30933 ++ xchacha20poly1305_dec_vectors[i].alen,
30934 ++ xchacha20poly1305_dec_vectors[i].nonce,
30935 ++ xchacha20poly1305_dec_vectors[i].key);
30936 ++ if (!decryption_success(ret,
30937 ++ xchacha20poly1305_dec_vectors[i].failure,
30938 ++ memcmp(computed_output,
30939 ++ xchacha20poly1305_dec_vectors[i].output,
30940 ++ xchacha20poly1305_dec_vectors[i].ilen -
30941 ++ POLY1305_DIGEST_SIZE))) {
30942 ++ pr_err("xchacha20poly1305 decryption self-test %zu: FAIL\n",
30943 ++ i + 1);
30944 ++ success = false;
30945 ++ }
30946 ++ }
30947 ++
30948 ++out:
30949 ++ kfree(heap_src);
30950 ++ kfree(computed_output);
30951 ++ return success;
30952 ++}
30953 +diff --git a/lib/crypto/chacha20poly1305.c b/lib/crypto/chacha20poly1305.c
30954 +new file mode 100644
30955 +index 000000000000..c12ddbe9eb92
30956 +--- /dev/null
30957 ++++ b/lib/crypto/chacha20poly1305.c
30958 +@@ -0,0 +1,219 @@
30959 ++// SPDX-License-Identifier: GPL-2.0 OR MIT
30960 ++/*
30961 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
30962 ++ *
30963 ++ * This is an implementation of the ChaCha20Poly1305 AEAD construction.
30964 ++ *
30965 ++ * Information: https://tools.ietf.org/html/rfc8439
30966 ++ */
30967 ++
30968 ++#include <crypto/algapi.h>
30969 ++#include <crypto/chacha20poly1305.h>
30970 ++#include <crypto/chacha.h>
30971 ++#include <crypto/poly1305.h>
30972 ++
30973 ++#include <asm/unaligned.h>
30974 ++#include <linux/kernel.h>
30975 ++#include <linux/init.h>
30976 ++#include <linux/mm.h>
30977 ++#include <linux/module.h>
30978 ++
30979 ++#define CHACHA_KEY_WORDS (CHACHA_KEY_SIZE / sizeof(u32))
30980 ++
30981 ++bool __init chacha20poly1305_selftest(void);
30982 ++
30983 ++static void chacha_load_key(u32 *k, const u8 *in)
30984 ++{
30985 ++ k[0] = get_unaligned_le32(in);
30986 ++ k[1] = get_unaligned_le32(in + 4);
30987 ++ k[2] = get_unaligned_le32(in + 8);
30988 ++ k[3] = get_unaligned_le32(in + 12);
30989 ++ k[4] = get_unaligned_le32(in + 16);
30990 ++ k[5] = get_unaligned_le32(in + 20);
30991 ++ k[6] = get_unaligned_le32(in + 24);
30992 ++ k[7] = get_unaligned_le32(in + 28);
30993 ++}
30994 ++
30995 ++static void xchacha_init(u32 *chacha_state, const u8 *key, const u8 *nonce)
30996 ++{
30997 ++ u32 k[CHACHA_KEY_WORDS];
30998 ++ u8 iv[CHACHA_IV_SIZE];
30999 ++
31000 ++ memset(iv, 0, 8);
31001 ++ memcpy(iv + 8, nonce + 16, 8);
31002 ++
31003 ++ chacha_load_key(k, key);
31004 ++
31005 ++ /* Compute the subkey given the original key and first 128 nonce bits */
31006 ++ chacha_init(chacha_state, k, nonce);
31007 ++ hchacha_block(chacha_state, k, 20);
31008 ++
31009 ++ chacha_init(chacha_state, k, iv);
31010 ++
31011 ++ memzero_explicit(k, sizeof(k));
31012 ++ memzero_explicit(iv, sizeof(iv));
31013 ++}
31014 ++
31015 ++static void
31016 ++__chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
31017 ++ const u8 *ad, const size_t ad_len, u32 *chacha_state)
31018 ++{
31019 ++ const u8 *pad0 = page_address(ZERO_PAGE(0));
31020 ++ struct poly1305_desc_ctx poly1305_state;
31021 ++ union {
31022 ++ u8 block0[POLY1305_KEY_SIZE];
31023 ++ __le64 lens[2];
31024 ++ } b;
31025 ++
31026 ++ chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20);
31027 ++ poly1305_init(&poly1305_state, b.block0);
31028 ++
31029 ++ poly1305_update(&poly1305_state, ad, ad_len);
31030 ++ if (ad_len & 0xf)
31031 ++ poly1305_update(&poly1305_state, pad0, 0x10 - (ad_len & 0xf));
31032 ++
31033 ++ chacha_crypt(chacha_state, dst, src, src_len, 20);
31034 ++
31035 ++ poly1305_update(&poly1305_state, dst, src_len);
31036 ++ if (src_len & 0xf)
31037 ++ poly1305_update(&poly1305_state, pad0, 0x10 - (src_len & 0xf));
31038 ++
31039 ++ b.lens[0] = cpu_to_le64(ad_len);
31040 ++ b.lens[1] = cpu_to_le64(src_len);
31041 ++ poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens));
31042 ++
31043 ++ poly1305_final(&poly1305_state, dst + src_len);
31044 ++
31045 ++ memzero_explicit(chacha_state, CHACHA_STATE_WORDS * sizeof(u32));
31046 ++ memzero_explicit(&b, sizeof(b));
31047 ++}
31048 ++
31049 ++void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
31050 ++ const u8 *ad, const size_t ad_len,
31051 ++ const u64 nonce,
31052 ++ const u8 key[CHACHA20POLY1305_KEY_SIZE])
31053 ++{
31054 ++ u32 chacha_state[CHACHA_STATE_WORDS];
31055 ++ u32 k[CHACHA_KEY_WORDS];
31056 ++ __le64 iv[2];
31057 ++
31058 ++ chacha_load_key(k, key);
31059 ++
31060 ++ iv[0] = 0;
31061 ++ iv[1] = cpu_to_le64(nonce);
31062 ++
31063 ++ chacha_init(chacha_state, k, (u8 *)iv);
31064 ++ __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, chacha_state);
31065 ++
31066 ++ memzero_explicit(iv, sizeof(iv));
31067 ++ memzero_explicit(k, sizeof(k));
31068 ++}
31069 ++EXPORT_SYMBOL(chacha20poly1305_encrypt);
31070 ++
31071 ++void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
31072 ++ const u8 *ad, const size_t ad_len,
31073 ++ const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
31074 ++ const u8 key[CHACHA20POLY1305_KEY_SIZE])
31075 ++{
31076 ++ u32 chacha_state[CHACHA_STATE_WORDS];
31077 ++
31078 ++ xchacha_init(chacha_state, key, nonce);
31079 ++ __chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, chacha_state);
31080 ++}
31081 ++EXPORT_SYMBOL(xchacha20poly1305_encrypt);
31082 ++
31083 ++static bool
31084 ++__chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
31085 ++ const u8 *ad, const size_t ad_len, u32 *chacha_state)
31086 ++{
31087 ++ const u8 *pad0 = page_address(ZERO_PAGE(0));
31088 ++ struct poly1305_desc_ctx poly1305_state;
31089 ++ size_t dst_len;
31090 ++ int ret;
31091 ++ union {
31092 ++ u8 block0[POLY1305_KEY_SIZE];
31093 ++ u8 mac[POLY1305_DIGEST_SIZE];
31094 ++ __le64 lens[2];
31095 ++ } b;
31096 ++
31097 ++ if (unlikely(src_len < POLY1305_DIGEST_SIZE))
31098 ++ return false;
31099 ++
31100 ++ chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20);
31101 ++ poly1305_init(&poly1305_state, b.block0);
31102 ++
31103 ++ poly1305_update(&poly1305_state, ad, ad_len);
31104 ++ if (ad_len & 0xf)
31105 ++ poly1305_update(&poly1305_state, pad0, 0x10 - (ad_len & 0xf));
31106 ++
31107 ++ dst_len = src_len - POLY1305_DIGEST_SIZE;
31108 ++ poly1305_update(&poly1305_state, src, dst_len);
31109 ++ if (dst_len & 0xf)
31110 ++ poly1305_update(&poly1305_state, pad0, 0x10 - (dst_len & 0xf));
31111 ++
31112 ++ b.lens[0] = cpu_to_le64(ad_len);
31113 ++ b.lens[1] = cpu_to_le64(dst_len);
31114 ++ poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens));
31115 ++
31116 ++ poly1305_final(&poly1305_state, b.mac);
31117 ++
31118 ++ ret = crypto_memneq(b.mac, src + dst_len, POLY1305_DIGEST_SIZE);
31119 ++ if (likely(!ret))
31120 ++ chacha_crypt(chacha_state, dst, src, dst_len, 20);
31121 ++
31122 ++ memzero_explicit(&b, sizeof(b));
31123 ++
31124 ++ return !ret;
31125 ++}
31126 ++
31127 ++bool chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
31128 ++ const u8 *ad, const size_t ad_len,
31129 ++ const u64 nonce,
31130 ++ const u8 key[CHACHA20POLY1305_KEY_SIZE])
31131 ++{
31132 ++ u32 chacha_state[CHACHA_STATE_WORDS];
31133 ++ u32 k[CHACHA_KEY_WORDS];
31134 ++ __le64 iv[2];
31135 ++ bool ret;
31136 ++
31137 ++ chacha_load_key(k, key);
31138 ++
31139 ++ iv[0] = 0;
31140 ++ iv[1] = cpu_to_le64(nonce);
31141 ++
31142 ++ chacha_init(chacha_state, k, (u8 *)iv);
31143 ++ ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len,
31144 ++ chacha_state);
31145 ++
31146 ++ memzero_explicit(chacha_state, sizeof(chacha_state));
31147 ++ memzero_explicit(iv, sizeof(iv));
31148 ++ memzero_explicit(k, sizeof(k));
31149 ++ return ret;
31150 ++}
31151 ++EXPORT_SYMBOL(chacha20poly1305_decrypt);
31152 ++
31153 ++bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
31154 ++ const u8 *ad, const size_t ad_len,
31155 ++ const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
31156 ++ const u8 key[CHACHA20POLY1305_KEY_SIZE])
31157 ++{
31158 ++ u32 chacha_state[CHACHA_STATE_WORDS];
31159 ++
31160 ++ xchacha_init(chacha_state, key, nonce);
31161 ++ return __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len,
31162 ++ chacha_state);
31163 ++}
31164 ++EXPORT_SYMBOL(xchacha20poly1305_decrypt);
31165 ++
31166 ++static int __init mod_init(void)
31167 ++{
31168 ++ if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
31169 ++ WARN_ON(!chacha20poly1305_selftest()))
31170 ++ return -ENODEV;
31171 ++ return 0;
31172 ++}
31173 ++
31174 ++module_init(mod_init);
31175 ++MODULE_LICENSE("GPL v2");
31176 ++MODULE_DESCRIPTION("ChaCha20Poly1305 AEAD construction");
31177 ++MODULE_AUTHOR("Jason A. Donenfeld <Jason@×××××.com>");
31178 +--
31179 +cgit v1.2.3-4-ga26e
31180 +
31181 +
31182 +From b026668637e28f20e8b33e6db74a6ff1de12f17f Mon Sep 17 00:00:00 2001
31183 +From: Ard Biesheuvel <ardb@××××××.org>
31184 +Date: Fri, 8 Nov 2019 13:22:40 +0100
31185 +Subject: crypto: lib/chacha20poly1305 - reimplement crypt_from_sg() routine
31186 +
31187 +commit d95312a3ccc0cd544d374be2fc45aeaa803e5fd9 upstream.
31188 +
31189 +Reimplement the library routines to perform chacha20poly1305 en/decryption
31190 +on scatterlists, without [ab]using the [deprecated] blkcipher interface,
31191 +which is rather heavyweight and does things we don't really need.
31192 +
31193 +Instead, we use the sg_miter API in a novel and clever way, to iterate
31194 +over the scatterlist in-place (i.e., source == destination, which is the
31195 +only way this library is expected to be used). That way, we don't have to
31196 +iterate over two scatterlists in parallel.
31197 +
31198 +Another optimization is that, instead of relying on the blkcipher walker
31199 +to present the input in suitable chunks, we recognize that ChaCha is a
31200 +streamcipher, and so we can simply deal with partial blocks by keeping a
31201 +block of cipherstream on the stack and use crypto_xor() to mix it with
31202 +the in/output.
31203 +
31204 +Finally, we omit the scatterwalk_and_copy() call if the last element of
31205 +the scatterlist covers the MAC as well (which is the common case),
31206 +avoiding the need to walk the scatterlist and kmap() the page twice.
31207 +
31208 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
31209 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
31210 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
31211 +---
31212 + include/crypto/chacha20poly1305.h | 11 +++
31213 + lib/crypto/chacha20poly1305-selftest.c | 45 ++++++++++
31214 + lib/crypto/chacha20poly1305.c | 150 +++++++++++++++++++++++++++++++++
31215 + 3 files changed, 206 insertions(+)
31216 +
31217 +diff --git a/include/crypto/chacha20poly1305.h b/include/crypto/chacha20poly1305.h
31218 +index ad3b1de58df8..234ee28078ef 100644
31219 +--- a/include/crypto/chacha20poly1305.h
31220 ++++ b/include/crypto/chacha20poly1305.h
31221 +@@ -7,6 +7,7 @@
31222 + #define __CHACHA20POLY1305_H
31223 +
31224 + #include <linux/types.h>
31225 ++#include <linux/scatterlist.h>
31226 +
31227 + enum chacha20poly1305_lengths {
31228 + XCHACHA20POLY1305_NONCE_SIZE = 24,
31229 +@@ -34,4 +35,14 @@ bool __must_check xchacha20poly1305_decrypt(
31230 + const size_t ad_len, const u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
31231 + const u8 key[CHACHA20POLY1305_KEY_SIZE]);
31232 +
31233 ++bool chacha20poly1305_encrypt_sg_inplace(struct scatterlist *src, size_t src_len,
31234 ++ const u8 *ad, const size_t ad_len,
31235 ++ const u64 nonce,
31236 ++ const u8 key[CHACHA20POLY1305_KEY_SIZE]);
31237 ++
31238 ++bool chacha20poly1305_decrypt_sg_inplace(struct scatterlist *src, size_t src_len,
31239 ++ const u8 *ad, const size_t ad_len,
31240 ++ const u64 nonce,
31241 ++ const u8 key[CHACHA20POLY1305_KEY_SIZE]);
31242 ++
31243 + #endif /* __CHACHA20POLY1305_H */
31244 +diff --git a/lib/crypto/chacha20poly1305-selftest.c b/lib/crypto/chacha20poly1305-selftest.c
31245 +index d1ed0f27cfdb..465de46dbdef 100644
31246 +--- a/lib/crypto/chacha20poly1305-selftest.c
31247 ++++ b/lib/crypto/chacha20poly1305-selftest.c
31248 +@@ -7250,6 +7250,7 @@ bool __init chacha20poly1305_selftest(void)
31249 + enum { MAXIMUM_TEST_BUFFER_LEN = 1UL << 12 };
31250 + size_t i;
31251 + u8 *computed_output = NULL, *heap_src = NULL;
31252 ++ struct scatterlist sg_src;
31253 + bool success = true, ret;
31254 +
31255 + heap_src = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL);
31256 +@@ -7280,6 +7281,29 @@ bool __init chacha20poly1305_selftest(void)
31257 + }
31258 + }
31259 +
31260 ++ for (i = 0; i < ARRAY_SIZE(chacha20poly1305_enc_vectors); ++i) {
31261 ++ if (chacha20poly1305_enc_vectors[i].nlen != 8)
31262 ++ continue;
31263 ++ memcpy(heap_src, chacha20poly1305_enc_vectors[i].input,
31264 ++ chacha20poly1305_enc_vectors[i].ilen);
31265 ++ sg_init_one(&sg_src, heap_src,
31266 ++ chacha20poly1305_enc_vectors[i].ilen + POLY1305_DIGEST_SIZE);
31267 ++ chacha20poly1305_encrypt_sg_inplace(&sg_src,
31268 ++ chacha20poly1305_enc_vectors[i].ilen,
31269 ++ chacha20poly1305_enc_vectors[i].assoc,
31270 ++ chacha20poly1305_enc_vectors[i].alen,
31271 ++ get_unaligned_le64(chacha20poly1305_enc_vectors[i].nonce),
31272 ++ chacha20poly1305_enc_vectors[i].key);
31273 ++ if (memcmp(heap_src,
31274 ++ chacha20poly1305_enc_vectors[i].output,
31275 ++ chacha20poly1305_enc_vectors[i].ilen +
31276 ++ POLY1305_DIGEST_SIZE)) {
31277 ++ pr_err("chacha20poly1305 sg encryption self-test %zu: FAIL\n",
31278 ++ i + 1);
31279 ++ success = false;
31280 ++ }
31281 ++ }
31282 ++
31283 + for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) {
31284 + memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN);
31285 + ret = chacha20poly1305_decrypt(computed_output,
31286 +@@ -7301,6 +7325,27 @@ bool __init chacha20poly1305_selftest(void)
31287 + }
31288 + }
31289 +
31290 ++ for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) {
31291 ++ memcpy(heap_src, chacha20poly1305_dec_vectors[i].input,
31292 ++ chacha20poly1305_dec_vectors[i].ilen);
31293 ++ sg_init_one(&sg_src, heap_src,
31294 ++ chacha20poly1305_dec_vectors[i].ilen);
31295 ++ ret = chacha20poly1305_decrypt_sg_inplace(&sg_src,
31296 ++ chacha20poly1305_dec_vectors[i].ilen,
31297 ++ chacha20poly1305_dec_vectors[i].assoc,
31298 ++ chacha20poly1305_dec_vectors[i].alen,
31299 ++ get_unaligned_le64(chacha20poly1305_dec_vectors[i].nonce),
31300 ++ chacha20poly1305_dec_vectors[i].key);
31301 ++ if (!decryption_success(ret,
31302 ++ chacha20poly1305_dec_vectors[i].failure,
31303 ++ memcmp(heap_src, chacha20poly1305_dec_vectors[i].output,
31304 ++ chacha20poly1305_dec_vectors[i].ilen -
31305 ++ POLY1305_DIGEST_SIZE))) {
31306 ++ pr_err("chacha20poly1305 sg decryption self-test %zu: FAIL\n",
31307 ++ i + 1);
31308 ++ success = false;
31309 ++ }
31310 ++ }
31311 +
31312 + for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_enc_vectors); ++i) {
31313 + memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN);
31314 +diff --git a/lib/crypto/chacha20poly1305.c b/lib/crypto/chacha20poly1305.c
31315 +index c12ddbe9eb92..821e5cc9b14e 100644
31316 +--- a/lib/crypto/chacha20poly1305.c
31317 ++++ b/lib/crypto/chacha20poly1305.c
31318 +@@ -11,6 +11,7 @@
31319 + #include <crypto/chacha20poly1305.h>
31320 + #include <crypto/chacha.h>
31321 + #include <crypto/poly1305.h>
31322 ++#include <crypto/scatterwalk.h>
31323 +
31324 + #include <asm/unaligned.h>
31325 + #include <linux/kernel.h>
31326 +@@ -205,6 +206,155 @@ bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
31327 + }
31328 + EXPORT_SYMBOL(xchacha20poly1305_decrypt);
31329 +
31330 ++static
31331 ++bool chacha20poly1305_crypt_sg_inplace(struct scatterlist *src,
31332 ++ const size_t src_len,
31333 ++ const u8 *ad, const size_t ad_len,
31334 ++ const u64 nonce,
31335 ++ const u8 key[CHACHA20POLY1305_KEY_SIZE],
31336 ++ int encrypt)
31337 ++{
31338 ++ const u8 *pad0 = page_address(ZERO_PAGE(0));
31339 ++ struct poly1305_desc_ctx poly1305_state;
31340 ++ u32 chacha_state[CHACHA_STATE_WORDS];
31341 ++ struct sg_mapping_iter miter;
31342 ++ size_t partial = 0;
31343 ++ unsigned int flags;
31344 ++ bool ret = true;
31345 ++ int sl;
31346 ++ union {
31347 ++ struct {
31348 ++ u32 k[CHACHA_KEY_WORDS];
31349 ++ __le64 iv[2];
31350 ++ };
31351 ++ u8 block0[POLY1305_KEY_SIZE];
31352 ++ u8 chacha_stream[CHACHA_BLOCK_SIZE];
31353 ++ struct {
31354 ++ u8 mac[2][POLY1305_DIGEST_SIZE];
31355 ++ };
31356 ++ __le64 lens[2];
31357 ++ } b __aligned(16);
31358 ++
31359 ++ chacha_load_key(b.k, key);
31360 ++
31361 ++ b.iv[0] = 0;
31362 ++ b.iv[1] = cpu_to_le64(nonce);
31363 ++
31364 ++ chacha_init(chacha_state, b.k, (u8 *)b.iv);
31365 ++ chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20);
31366 ++ poly1305_init(&poly1305_state, b.block0);
31367 ++
31368 ++ if (unlikely(ad_len)) {
31369 ++ poly1305_update(&poly1305_state, ad, ad_len);
31370 ++ if (ad_len & 0xf)
31371 ++ poly1305_update(&poly1305_state, pad0, 0x10 - (ad_len & 0xf));
31372 ++ }
31373 ++
31374 ++ flags = SG_MITER_TO_SG;
31375 ++ if (!preemptible())
31376 ++ flags |= SG_MITER_ATOMIC;
31377 ++
31378 ++ sg_miter_start(&miter, src, sg_nents(src), flags);
31379 ++
31380 ++ for (sl = src_len; sl > 0 && sg_miter_next(&miter); sl -= miter.length) {
31381 ++ u8 *addr = miter.addr;
31382 ++ size_t length = min_t(size_t, sl, miter.length);
31383 ++
31384 ++ if (!encrypt)
31385 ++ poly1305_update(&poly1305_state, addr, length);
31386 ++
31387 ++ if (unlikely(partial)) {
31388 ++ size_t l = min(length, CHACHA_BLOCK_SIZE - partial);
31389 ++
31390 ++ crypto_xor(addr, b.chacha_stream + partial, l);
31391 ++ partial = (partial + l) & (CHACHA_BLOCK_SIZE - 1);
31392 ++
31393 ++ addr += l;
31394 ++ length -= l;
31395 ++ }
31396 ++
31397 ++ if (likely(length >= CHACHA_BLOCK_SIZE || length == sl)) {
31398 ++ size_t l = length;
31399 ++
31400 ++ if (unlikely(length < sl))
31401 ++ l &= ~(CHACHA_BLOCK_SIZE - 1);
31402 ++ chacha_crypt(chacha_state, addr, addr, l, 20);
31403 ++ addr += l;
31404 ++ length -= l;
31405 ++ }
31406 ++
31407 ++ if (unlikely(length > 0)) {
31408 ++ chacha_crypt(chacha_state, b.chacha_stream, pad0,
31409 ++ CHACHA_BLOCK_SIZE, 20);
31410 ++ crypto_xor(addr, b.chacha_stream, length);
31411 ++ partial = length;
31412 ++ }
31413 ++
31414 ++ if (encrypt)
31415 ++ poly1305_update(&poly1305_state, miter.addr,
31416 ++ min_t(size_t, sl, miter.length));
31417 ++ }
31418 ++
31419 ++ if (src_len & 0xf)
31420 ++ poly1305_update(&poly1305_state, pad0, 0x10 - (src_len & 0xf));
31421 ++
31422 ++ b.lens[0] = cpu_to_le64(ad_len);
31423 ++ b.lens[1] = cpu_to_le64(src_len);
31424 ++ poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens));
31425 ++
31426 ++ if (likely(sl <= -POLY1305_DIGEST_SIZE)) {
31427 ++ if (encrypt) {
31428 ++ poly1305_final(&poly1305_state,
31429 ++ miter.addr + miter.length + sl);
31430 ++ ret = true;
31431 ++ } else {
31432 ++ poly1305_final(&poly1305_state, b.mac[0]);
31433 ++ ret = !crypto_memneq(b.mac[0],
31434 ++ miter.addr + miter.length + sl,
31435 ++ POLY1305_DIGEST_SIZE);
31436 ++ }
31437 ++ }
31438 ++
31439 ++ sg_miter_stop(&miter);
31440 ++
31441 ++ if (unlikely(sl > -POLY1305_DIGEST_SIZE)) {
31442 ++ poly1305_final(&poly1305_state, b.mac[1]);
31443 ++ scatterwalk_map_and_copy(b.mac[encrypt], src, src_len,
31444 ++ sizeof(b.mac[1]), encrypt);
31445 ++ ret = encrypt ||
31446 ++ !crypto_memneq(b.mac[0], b.mac[1], POLY1305_DIGEST_SIZE);
31447 ++ }
31448 ++
31449 ++ memzero_explicit(chacha_state, sizeof(chacha_state));
31450 ++ memzero_explicit(&b, sizeof(b));
31451 ++
31452 ++ return ret;
31453 ++}
31454 ++
31455 ++bool chacha20poly1305_encrypt_sg_inplace(struct scatterlist *src, size_t src_len,
31456 ++ const u8 *ad, const size_t ad_len,
31457 ++ const u64 nonce,
31458 ++ const u8 key[CHACHA20POLY1305_KEY_SIZE])
31459 ++{
31460 ++ return chacha20poly1305_crypt_sg_inplace(src, src_len, ad, ad_len,
31461 ++ nonce, key, 1);
31462 ++}
31463 ++EXPORT_SYMBOL(chacha20poly1305_encrypt_sg_inplace);
31464 ++
31465 ++bool chacha20poly1305_decrypt_sg_inplace(struct scatterlist *src, size_t src_len,
31466 ++ const u8 *ad, const size_t ad_len,
31467 ++ const u64 nonce,
31468 ++ const u8 key[CHACHA20POLY1305_KEY_SIZE])
31469 ++{
31470 ++ if (unlikely(src_len < POLY1305_DIGEST_SIZE))
31471 ++ return false;
31472 ++
31473 ++ return chacha20poly1305_crypt_sg_inplace(src,
31474 ++ src_len - POLY1305_DIGEST_SIZE,
31475 ++ ad, ad_len, nonce, key, 0);
31476 ++}
31477 ++EXPORT_SYMBOL(chacha20poly1305_decrypt_sg_inplace);
31478 ++
31479 + static int __init mod_init(void)
31480 + {
31481 + if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
31482 +--
31483 +cgit v1.2.3-4-ga26e
31484 +
31485 +
31486 +From 8484741aa871e6f92bc484bef0bf0655e5a31013 Mon Sep 17 00:00:00 2001
31487 +From: Eric Biggers <ebiggers@××××××.com>
31488 +Date: Sun, 17 Nov 2019 23:21:29 -0800
31489 +Subject: crypto: chacha_generic - remove unnecessary setkey() functions
31490 +
31491 +commit 2043323a799a660bc84bbee404cf7a2617ec6157 upstream.
31492 +
31493 +Use chacha20_setkey() and chacha12_setkey() from
31494 +<crypto/internal/chacha.h> instead of defining them again in
31495 +chacha_generic.c.
31496 +
31497 +Signed-off-by: Eric Biggers <ebiggers@××××××.com>
31498 +Acked-by: Ard Biesheuvel <ardb@××××××.org>
31499 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
31500 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
31501 +---
31502 + crypto/chacha_generic.c | 18 +++---------------
31503 + 1 file changed, 3 insertions(+), 15 deletions(-)
31504 +
31505 +diff --git a/crypto/chacha_generic.c b/crypto/chacha_generic.c
31506 +index c1b147318393..8beea79ab117 100644
31507 +--- a/crypto/chacha_generic.c
31508 ++++ b/crypto/chacha_generic.c
31509 +@@ -37,18 +37,6 @@ static int chacha_stream_xor(struct skcipher_request *req,
31510 + return err;
31511 + }
31512 +
31513 +-static int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
31514 +- unsigned int keysize)
31515 +-{
31516 +- return chacha_setkey(tfm, key, keysize, 20);
31517 +-}
31518 +-
31519 +-static int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
31520 +- unsigned int keysize)
31521 +-{
31522 +- return chacha_setkey(tfm, key, keysize, 12);
31523 +-}
31524 +-
31525 + static int crypto_chacha_crypt(struct skcipher_request *req)
31526 + {
31527 + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
31528 +@@ -91,7 +79,7 @@ static struct skcipher_alg algs[] = {
31529 + .max_keysize = CHACHA_KEY_SIZE,
31530 + .ivsize = CHACHA_IV_SIZE,
31531 + .chunksize = CHACHA_BLOCK_SIZE,
31532 +- .setkey = crypto_chacha20_setkey,
31533 ++ .setkey = chacha20_setkey,
31534 + .encrypt = crypto_chacha_crypt,
31535 + .decrypt = crypto_chacha_crypt,
31536 + }, {
31537 +@@ -106,7 +94,7 @@ static struct skcipher_alg algs[] = {
31538 + .max_keysize = CHACHA_KEY_SIZE,
31539 + .ivsize = XCHACHA_IV_SIZE,
31540 + .chunksize = CHACHA_BLOCK_SIZE,
31541 +- .setkey = crypto_chacha20_setkey,
31542 ++ .setkey = chacha20_setkey,
31543 + .encrypt = crypto_xchacha_crypt,
31544 + .decrypt = crypto_xchacha_crypt,
31545 + }, {
31546 +@@ -121,7 +109,7 @@ static struct skcipher_alg algs[] = {
31547 + .max_keysize = CHACHA_KEY_SIZE,
31548 + .ivsize = XCHACHA_IV_SIZE,
31549 + .chunksize = CHACHA_BLOCK_SIZE,
31550 +- .setkey = crypto_chacha12_setkey,
31551 ++ .setkey = chacha12_setkey,
31552 + .encrypt = crypto_xchacha_crypt,
31553 + .decrypt = crypto_xchacha_crypt,
31554 + }
31555 +--
31556 +cgit v1.2.3-4-ga26e
31557 +
31558 +
31559 +From 153c42adc8b12f09b0bc292e5d124f7d5b528a7a Mon Sep 17 00:00:00 2001
31560 +From: Eric Biggers <ebiggers@××××××.com>
31561 +Date: Sun, 17 Nov 2019 23:21:58 -0800
31562 +Subject: crypto: x86/chacha - only unregister algorithms if registered
31563 +
31564 +commit b62755aed3a3f5ca9edd2718339ccea3b6bbbe57 upstream.
31565 +
31566 +It's not valid to call crypto_unregister_skciphers() without a prior
31567 +call to crypto_register_skciphers().
31568 +
31569 +Fixes: 84e03fa39fbe ("crypto: x86/chacha - expose SIMD ChaCha routine as library function")
31570 +Signed-off-by: Eric Biggers <ebiggers@××××××.com>
31571 +Acked-by: Ard Biesheuvel <ardb@××××××.org>
31572 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
31573 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
31574 +---
31575 + arch/x86/crypto/chacha_glue.c | 3 ++-
31576 + 1 file changed, 2 insertions(+), 1 deletion(-)
31577 +
31578 +diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
31579 +index b391e13a9e41..a94e30b6f941 100644
31580 +--- a/arch/x86/crypto/chacha_glue.c
31581 ++++ b/arch/x86/crypto/chacha_glue.c
31582 +@@ -304,7 +304,8 @@ static int __init chacha_simd_mod_init(void)
31583 +
31584 + static void __exit chacha_simd_mod_fini(void)
31585 + {
31586 +- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
31587 ++ if (boot_cpu_has(X86_FEATURE_SSSE3))
31588 ++ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
31589 + }
31590 +
31591 + module_init(chacha_simd_mod_init);
31592 +--
31593 +cgit v1.2.3-4-ga26e
31594 +
31595 +
31596 +From e1e6a56465fdd017b95c1905f4a43ee1206e7ff1 Mon Sep 17 00:00:00 2001
31597 +From: Eric Biggers <ebiggers@××××××.com>
31598 +Date: Sun, 17 Nov 2019 23:22:16 -0800
31599 +Subject: crypto: lib/chacha20poly1305 - use chacha20_crypt()
31600 +
31601 +commit 413808b71e6204b0cc1eeaa77960f7c3cd381d33 upstream.
31602 +
31603 +Use chacha20_crypt() instead of chacha_crypt(), since it's not really
31604 +appropriate for users of the ChaCha library API to be passing the number
31605 +of rounds as an argument.
31606 +
31607 +Signed-off-by: Eric Biggers <ebiggers@××××××.com>
31608 +Acked-by: Ard Biesheuvel <ardb@××××××.org>
31609 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
31610 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
31611 +---
31612 + lib/crypto/chacha20poly1305.c | 16 ++++++++--------
31613 + 1 file changed, 8 insertions(+), 8 deletions(-)
31614 +
31615 +diff --git a/lib/crypto/chacha20poly1305.c b/lib/crypto/chacha20poly1305.c
31616 +index 821e5cc9b14e..6d83cafebc69 100644
31617 +--- a/lib/crypto/chacha20poly1305.c
31618 ++++ b/lib/crypto/chacha20poly1305.c
31619 +@@ -66,14 +66,14 @@ __chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
31620 + __le64 lens[2];
31621 + } b;
31622 +
31623 +- chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20);
31624 ++ chacha20_crypt(chacha_state, b.block0, pad0, sizeof(b.block0));
31625 + poly1305_init(&poly1305_state, b.block0);
31626 +
31627 + poly1305_update(&poly1305_state, ad, ad_len);
31628 + if (ad_len & 0xf)
31629 + poly1305_update(&poly1305_state, pad0, 0x10 - (ad_len & 0xf));
31630 +
31631 +- chacha_crypt(chacha_state, dst, src, src_len, 20);
31632 ++ chacha20_crypt(chacha_state, dst, src, src_len);
31633 +
31634 + poly1305_update(&poly1305_state, dst, src_len);
31635 + if (src_len & 0xf)
31636 +@@ -140,7 +140,7 @@ __chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
31637 + if (unlikely(src_len < POLY1305_DIGEST_SIZE))
31638 + return false;
31639 +
31640 +- chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20);
31641 ++ chacha20_crypt(chacha_state, b.block0, pad0, sizeof(b.block0));
31642 + poly1305_init(&poly1305_state, b.block0);
31643 +
31644 + poly1305_update(&poly1305_state, ad, ad_len);
31645 +@@ -160,7 +160,7 @@ __chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
31646 +
31647 + ret = crypto_memneq(b.mac, src + dst_len, POLY1305_DIGEST_SIZE);
31648 + if (likely(!ret))
31649 +- chacha_crypt(chacha_state, dst, src, dst_len, 20);
31650 ++ chacha20_crypt(chacha_state, dst, src, dst_len);
31651 +
31652 + memzero_explicit(&b, sizeof(b));
31653 +
31654 +@@ -241,7 +241,7 @@ bool chacha20poly1305_crypt_sg_inplace(struct scatterlist *src,
31655 + b.iv[1] = cpu_to_le64(nonce);
31656 +
31657 + chacha_init(chacha_state, b.k, (u8 *)b.iv);
31658 +- chacha_crypt(chacha_state, b.block0, pad0, sizeof(b.block0), 20);
31659 ++ chacha20_crypt(chacha_state, b.block0, pad0, sizeof(b.block0));
31660 + poly1305_init(&poly1305_state, b.block0);
31661 +
31662 + if (unlikely(ad_len)) {
31663 +@@ -278,14 +278,14 @@ bool chacha20poly1305_crypt_sg_inplace(struct scatterlist *src,
31664 +
31665 + if (unlikely(length < sl))
31666 + l &= ~(CHACHA_BLOCK_SIZE - 1);
31667 +- chacha_crypt(chacha_state, addr, addr, l, 20);
31668 ++ chacha20_crypt(chacha_state, addr, addr, l);
31669 + addr += l;
31670 + length -= l;
31671 + }
31672 +
31673 + if (unlikely(length > 0)) {
31674 +- chacha_crypt(chacha_state, b.chacha_stream, pad0,
31675 +- CHACHA_BLOCK_SIZE, 20);
31676 ++ chacha20_crypt(chacha_state, b.chacha_stream, pad0,
31677 ++ CHACHA_BLOCK_SIZE);
31678 + crypto_xor(addr, b.chacha_stream, length);
31679 + partial = length;
31680 + }
31681 +--
31682 +cgit v1.2.3-4-ga26e
31683 +
31684 +
31685 +From 2abad66e27549e982cc021259a1ed15e7f271615 Mon Sep 17 00:00:00 2001
31686 +From: "Jason A. Donenfeld" <Jason@×××××.com>
31687 +Date: Mon, 25 Nov 2019 11:31:12 +0100
31688 +Subject: crypto: arch - conditionalize crypto api in arch glue for lib code
31689 +
31690 +commit 8394bfec51e0e565556101bcc4e2fe7551104cd8 upstream.
31691 +
31692 +For glue code that's used by Zinc, the actual Crypto API functions might
31693 +not necessarily exist, and don't need to exist either. Before this
31694 +patch, there are valid build configurations that lead to a unbuildable
31695 +kernel. This fixes it to conditionalize those symbols on the existence
31696 +of the proper config entry.
31697 +
31698 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
31699 +Acked-by: Ard Biesheuvel <ardb@××××××.org>
31700 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
31701 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
31702 +---
31703 + arch/arm/crypto/chacha-glue.c | 26 ++++++++++++++++----------
31704 + arch/arm/crypto/curve25519-glue.c | 5 +++--
31705 + arch/arm/crypto/poly1305-glue.c | 9 ++++++---
31706 + arch/arm64/crypto/chacha-neon-glue.c | 5 +++--
31707 + arch/arm64/crypto/poly1305-glue.c | 5 +++--
31708 + arch/mips/crypto/chacha-glue.c | 6 ++++--
31709 + arch/mips/crypto/poly1305-glue.c | 6 ++++--
31710 + arch/x86/crypto/blake2s-glue.c | 6 ++++--
31711 + arch/x86/crypto/chacha_glue.c | 5 +++--
31712 + arch/x86/crypto/curve25519-x86_64.c | 7 ++++---
31713 + arch/x86/crypto/poly1305_glue.c | 5 +++--
31714 + 11 files changed, 53 insertions(+), 32 deletions(-)
31715 +
31716 +diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c
31717 +index 3f0c057aa050..7bdf8823066d 100644
31718 +--- a/arch/arm/crypto/chacha-glue.c
31719 ++++ b/arch/arm/crypto/chacha-glue.c
31720 +@@ -286,11 +286,13 @@ static struct skcipher_alg neon_algs[] = {
31721 +
31722 + static int __init chacha_simd_mod_init(void)
31723 + {
31724 +- int err;
31725 ++ int err = 0;
31726 +
31727 +- err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
31728 +- if (err)
31729 +- return err;
31730 ++ if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER)) {
31731 ++ err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
31732 ++ if (err)
31733 ++ return err;
31734 ++ }
31735 +
31736 + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
31737 + int i;
31738 +@@ -310,18 +312,22 @@ static int __init chacha_simd_mod_init(void)
31739 + static_branch_enable(&use_neon);
31740 + }
31741 +
31742 +- err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
31743 +- if (err)
31744 +- crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
31745 ++ if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER)) {
31746 ++ err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
31747 ++ if (err)
31748 ++ crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
31749 ++ }
31750 + }
31751 + return err;
31752 + }
31753 +
31754 + static void __exit chacha_simd_mod_fini(void)
31755 + {
31756 +- crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
31757 +- if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON))
31758 +- crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
31759 ++ if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER)) {
31760 ++ crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
31761 ++ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON))
31762 ++ crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
31763 ++ }
31764 + }
31765 +
31766 + module_init(chacha_simd_mod_init);
31767 +diff --git a/arch/arm/crypto/curve25519-glue.c b/arch/arm/crypto/curve25519-glue.c
31768 +index 2e9e12d2f642..f3f42cf3b893 100644
31769 +--- a/arch/arm/crypto/curve25519-glue.c
31770 ++++ b/arch/arm/crypto/curve25519-glue.c
31771 +@@ -108,14 +108,15 @@ static int __init mod_init(void)
31772 + {
31773 + if (elf_hwcap & HWCAP_NEON) {
31774 + static_branch_enable(&have_neon);
31775 +- return crypto_register_kpp(&curve25519_alg);
31776 ++ return IS_REACHABLE(CONFIG_CRYPTO_KPP) ?
31777 ++ crypto_register_kpp(&curve25519_alg) : 0;
31778 + }
31779 + return 0;
31780 + }
31781 +
31782 + static void __exit mod_exit(void)
31783 + {
31784 +- if (elf_hwcap & HWCAP_NEON)
31785 ++ if (IS_REACHABLE(CONFIG_CRYPTO_KPP) && elf_hwcap & HWCAP_NEON)
31786 + crypto_unregister_kpp(&curve25519_alg);
31787 + }
31788 +
31789 +diff --git a/arch/arm/crypto/poly1305-glue.c b/arch/arm/crypto/poly1305-glue.c
31790 +index 74a725ac89c9..abe3f2d587dc 100644
31791 +--- a/arch/arm/crypto/poly1305-glue.c
31792 ++++ b/arch/arm/crypto/poly1305-glue.c
31793 +@@ -249,16 +249,19 @@ static int __init arm_poly1305_mod_init(void)
31794 + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
31795 + (elf_hwcap & HWCAP_NEON))
31796 + static_branch_enable(&have_neon);
31797 +- else
31798 ++ else if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
31799 + /* register only the first entry */
31800 + return crypto_register_shash(&arm_poly1305_algs[0]);
31801 +
31802 +- return crypto_register_shashes(arm_poly1305_algs,
31803 +- ARRAY_SIZE(arm_poly1305_algs));
31804 ++ return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
31805 ++ crypto_register_shashes(arm_poly1305_algs,
31806 ++ ARRAY_SIZE(arm_poly1305_algs)) : 0;
31807 + }
31808 +
31809 + static void __exit arm_poly1305_mod_exit(void)
31810 + {
31811 ++ if (!IS_REACHABLE(CONFIG_CRYPTO_HASH))
31812 ++ return;
31813 + if (!static_branch_likely(&have_neon)) {
31814 + crypto_unregister_shash(&arm_poly1305_algs[0]);
31815 + return;
31816 +diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
31817 +index b08029d7bde6..71c11d2e9fcd 100644
31818 +--- a/arch/arm64/crypto/chacha-neon-glue.c
31819 ++++ b/arch/arm64/crypto/chacha-neon-glue.c
31820 +@@ -211,12 +211,13 @@ static int __init chacha_simd_mod_init(void)
31821 +
31822 + static_branch_enable(&have_neon);
31823 +
31824 +- return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
31825 ++ return IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER) ?
31826 ++ crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0;
31827 + }
31828 +
31829 + static void __exit chacha_simd_mod_fini(void)
31830 + {
31831 +- if (cpu_have_named_feature(ASIMD))
31832 ++ if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER) && cpu_have_named_feature(ASIMD))
31833 + crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
31834 + }
31835 +
31836 +diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c
31837 +index dd843d0ee83a..83a2338a8826 100644
31838 +--- a/arch/arm64/crypto/poly1305-glue.c
31839 ++++ b/arch/arm64/crypto/poly1305-glue.c
31840 +@@ -220,12 +220,13 @@ static int __init neon_poly1305_mod_init(void)
31841 +
31842 + static_branch_enable(&have_neon);
31843 +
31844 +- return crypto_register_shash(&neon_poly1305_alg);
31845 ++ return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
31846 ++ crypto_register_shash(&neon_poly1305_alg) : 0;
31847 + }
31848 +
31849 + static void __exit neon_poly1305_mod_exit(void)
31850 + {
31851 +- if (cpu_have_named_feature(ASIMD))
31852 ++ if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && cpu_have_named_feature(ASIMD))
31853 + crypto_unregister_shash(&neon_poly1305_alg);
31854 + }
31855 +
31856 +diff --git a/arch/mips/crypto/chacha-glue.c b/arch/mips/crypto/chacha-glue.c
31857 +index 779e399c9bef..90896029d0cd 100644
31858 +--- a/arch/mips/crypto/chacha-glue.c
31859 ++++ b/arch/mips/crypto/chacha-glue.c
31860 +@@ -128,12 +128,14 @@ static struct skcipher_alg algs[] = {
31861 +
31862 + static int __init chacha_simd_mod_init(void)
31863 + {
31864 +- return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
31865 ++ return IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER) ?
31866 ++ crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0;
31867 + }
31868 +
31869 + static void __exit chacha_simd_mod_fini(void)
31870 + {
31871 +- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
31872 ++ if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER))
31873 ++ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
31874 + }
31875 +
31876 + module_init(chacha_simd_mod_init);
31877 +diff --git a/arch/mips/crypto/poly1305-glue.c b/arch/mips/crypto/poly1305-glue.c
31878 +index b759b6ccc361..b37d29cf5d0a 100644
31879 +--- a/arch/mips/crypto/poly1305-glue.c
31880 ++++ b/arch/mips/crypto/poly1305-glue.c
31881 +@@ -187,12 +187,14 @@ static struct shash_alg mips_poly1305_alg = {
31882 +
31883 + static int __init mips_poly1305_mod_init(void)
31884 + {
31885 +- return crypto_register_shash(&mips_poly1305_alg);
31886 ++ return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
31887 ++ crypto_register_shash(&mips_poly1305_alg) : 0;
31888 + }
31889 +
31890 + static void __exit mips_poly1305_mod_exit(void)
31891 + {
31892 +- crypto_unregister_shash(&mips_poly1305_alg);
31893 ++ if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
31894 ++ crypto_unregister_shash(&mips_poly1305_alg);
31895 + }
31896 +
31897 + module_init(mips_poly1305_mod_init);
31898 +diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c
31899 +index 4a37ba7cdbe5..1d9ff8a45e1f 100644
31900 +--- a/arch/x86/crypto/blake2s-glue.c
31901 ++++ b/arch/x86/crypto/blake2s-glue.c
31902 +@@ -210,12 +210,14 @@ static int __init blake2s_mod_init(void)
31903 + XFEATURE_MASK_AVX512, NULL))
31904 + static_branch_enable(&blake2s_use_avx512);
31905 +
31906 +- return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
31907 ++ return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
31908 ++ crypto_register_shashes(blake2s_algs,
31909 ++ ARRAY_SIZE(blake2s_algs)) : 0;
31910 + }
31911 +
31912 + static void __exit blake2s_mod_exit(void)
31913 + {
31914 +- if (boot_cpu_has(X86_FEATURE_SSSE3))
31915 ++ if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3))
31916 + crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
31917 + }
31918 +
31919 +diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
31920 +index a94e30b6f941..1bebe11b9ec9 100644
31921 +--- a/arch/x86/crypto/chacha_glue.c
31922 ++++ b/arch/x86/crypto/chacha_glue.c
31923 +@@ -299,12 +299,13 @@ static int __init chacha_simd_mod_init(void)
31924 + boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */
31925 + static_branch_enable(&chacha_use_avx512vl);
31926 + }
31927 +- return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
31928 ++ return IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER) ?
31929 ++ crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0;
31930 + }
31931 +
31932 + static void __exit chacha_simd_mod_fini(void)
31933 + {
31934 +- if (boot_cpu_has(X86_FEATURE_SSSE3))
31935 ++ if (IS_REACHABLE(CONFIG_CRYPTO_BLKCIPHER) && boot_cpu_has(X86_FEATURE_SSSE3))
31936 + crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
31937 + }
31938 +
31939 +diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c
31940 +index a52a3fb15727..eec7d2d24239 100644
31941 +--- a/arch/x86/crypto/curve25519-x86_64.c
31942 ++++ b/arch/x86/crypto/curve25519-x86_64.c
31943 +@@ -2457,13 +2457,14 @@ static int __init curve25519_mod_init(void)
31944 + static_branch_enable(&curve25519_use_adx);
31945 + else
31946 + return 0;
31947 +- return crypto_register_kpp(&curve25519_alg);
31948 ++ return IS_REACHABLE(CONFIG_CRYPTO_KPP) ?
31949 ++ crypto_register_kpp(&curve25519_alg) : 0;
31950 + }
31951 +
31952 + static void __exit curve25519_mod_exit(void)
31953 + {
31954 +- if (boot_cpu_has(X86_FEATURE_BMI2) ||
31955 +- boot_cpu_has(X86_FEATURE_ADX))
31956 ++ if (IS_REACHABLE(CONFIG_CRYPTO_KPP) &&
31957 ++ (boot_cpu_has(X86_FEATURE_BMI2) || boot_cpu_has(X86_FEATURE_ADX)))
31958 + crypto_unregister_kpp(&curve25519_alg);
31959 + }
31960 +
31961 +diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
31962 +index 370cd88068ec..0cc4537e6617 100644
31963 +--- a/arch/x86/crypto/poly1305_glue.c
31964 ++++ b/arch/x86/crypto/poly1305_glue.c
31965 +@@ -224,12 +224,13 @@ static int __init poly1305_simd_mod_init(void)
31966 + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
31967 + static_branch_enable(&poly1305_use_avx2);
31968 +
31969 +- return crypto_register_shash(&alg);
31970 ++ return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? crypto_register_shash(&alg) : 0;
31971 + }
31972 +
31973 + static void __exit poly1305_simd_mod_exit(void)
31974 + {
31975 +- crypto_unregister_shash(&alg);
31976 ++ if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
31977 ++ crypto_unregister_shash(&alg);
31978 + }
31979 +
31980 + module_init(poly1305_simd_mod_init);
31981 +--
31982 +cgit v1.2.3-4-ga26e
31983 +
31984 +
31985 +From ed52f2fd491bc6af585f76d0da6db0e748f174d8 Mon Sep 17 00:00:00 2001
31986 +From: =?UTF-8?q?Valdis=20Kl=C4=93tnieks?= <valdis.kletnieks@××.edu>
31987 +Date: Thu, 5 Dec 2019 20:58:36 -0500
31988 +Subject: crypto: chacha - fix warning message in header file
31989 +
31990 +commit 579d705cd64e44f3fcda1a6cfd5f37468a5ddf63 upstream.
31991 +
31992 +Building with W=1 causes a warning:
31993 +
31994 + CC [M] arch/x86/crypto/chacha_glue.o
31995 +In file included from arch/x86/crypto/chacha_glue.c:10:
31996 +./include/crypto/internal/chacha.h:37:1: warning: 'inline' is not at beginning of declaration [-Wold-style-declaration]
31997 + 37 | static int inline chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
31998 + | ^~~~~~
31999 +
32000 +Straighten out the order to match the rest of the header file.
32001 +
32002 +Signed-off-by: Valdis Kletnieks <valdis.kletnieks@××.edu>
32003 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
32004 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
32005 +---
32006 + include/crypto/internal/chacha.h | 2 +-
32007 + 1 file changed, 1 insertion(+), 1 deletion(-)
32008 +
32009 +diff --git a/include/crypto/internal/chacha.h b/include/crypto/internal/chacha.h
32010 +index aa5d4a16aac5..b085dc1ac151 100644
32011 +--- a/include/crypto/internal/chacha.h
32012 ++++ b/include/crypto/internal/chacha.h
32013 +@@ -34,7 +34,7 @@ static inline int chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
32014 + return chacha_setkey(tfm, key, keysize, 20);
32015 + }
32016 +
32017 +-static int inline chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
32018 ++static inline int chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
32019 + unsigned int keysize)
32020 + {
32021 + return chacha_setkey(tfm, key, keysize, 12);
32022 +--
32023 +cgit v1.2.3-4-ga26e
32024 +
32025 +
32026 +From bb6de9fa67d473c0c0114157ca6a8c6a7308848d Mon Sep 17 00:00:00 2001
32027 +From: "Jason A. Donenfeld" <Jason@×××××.com>
32028 +Date: Wed, 11 Dec 2019 10:26:39 +0100
32029 +Subject: crypto: arm/curve25519 - add arch-specific key generation function
32030 +
32031 +commit 84faa307249b341f6ad8de3e1869d77a65e26669 upstream.
32032 +
32033 +Somehow this was forgotten when Zinc was being split into oddly shaped
32034 +pieces, resulting in linker errors. The x86_64 glue has a specific key
32035 +generation implementation, but the Arm one does not. However, it can
32036 +still receive the NEON speedups by calling the ordinary DH function
32037 +using the base point.
32038 +
32039 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
32040 +Acked-by: Ard Biesheuvel <ardb@××××××.org>
32041 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
32042 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
32043 +---
32044 + arch/arm/crypto/curve25519-glue.c | 7 +++++++
32045 + 1 file changed, 7 insertions(+)
32046 +
32047 +diff --git a/arch/arm/crypto/curve25519-glue.c b/arch/arm/crypto/curve25519-glue.c
32048 +index f3f42cf3b893..776ae07e0469 100644
32049 +--- a/arch/arm/crypto/curve25519-glue.c
32050 ++++ b/arch/arm/crypto/curve25519-glue.c
32051 +@@ -38,6 +38,13 @@ void curve25519_arch(u8 out[CURVE25519_KEY_SIZE],
32052 + }
32053 + EXPORT_SYMBOL(curve25519_arch);
32054 +
32055 ++void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
32056 ++ const u8 secret[CURVE25519_KEY_SIZE])
32057 ++{
32058 ++ return curve25519_arch(pub, secret, curve25519_base_point);
32059 ++}
32060 ++EXPORT_SYMBOL(curve25519_base_arch);
32061 ++
32062 + static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
32063 + unsigned int len)
32064 + {
32065 +--
32066 +cgit v1.2.3-4-ga26e
32067 +
32068 +
32069 +From 9cd4f1229f4a12351cd17fd67ec2b060cef8d2c8 Mon Sep 17 00:00:00 2001
32070 +From: "Jason A. Donenfeld" <Jason@×××××.com>
32071 +Date: Mon, 16 Dec 2019 19:53:26 +0100
32072 +Subject: crypto: lib/curve25519 - re-add selftests
32073 +
32074 +commit aa127963f1cab2b93c74c9b128a84610203fb674 upstream.
32075 +
32076 +Somehow these were dropped when Zinc was being integrated, which is
32077 +problematic, because testing the library interface for Curve25519 is
32078 +important.. This commit simply adds them back and wires them in in the
32079 +same way that the blake2s selftests are wired in.
32080 +
32081 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
32082 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
32083 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
32084 +---
32085 + lib/crypto/Makefile | 1 +
32086 + lib/crypto/curve25519-selftest.c | 1321 ++++++++++++++++++++++++++++++++++++++
32087 + lib/crypto/curve25519.c | 17 +
32088 + 3 files changed, 1339 insertions(+)
32089 + create mode 100644 lib/crypto/curve25519-selftest.c
32090 +
32091 +diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
32092 +index 34a701ab8b92..f97f9b941110 100644
32093 +--- a/lib/crypto/Makefile
32094 ++++ b/lib/crypto/Makefile
32095 +@@ -36,4 +36,5 @@ libsha256-y := sha256.o
32096 + ifneq ($(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS),y)
32097 + libblake2s-y += blake2s-selftest.o
32098 + libchacha20poly1305-y += chacha20poly1305-selftest.o
32099 ++libcurve25519-y += curve25519-selftest.o
32100 + endif
32101 +diff --git a/lib/crypto/curve25519-selftest.c b/lib/crypto/curve25519-selftest.c
32102 +new file mode 100644
32103 +index 000000000000..c85e85381e78
32104 +--- /dev/null
32105 ++++ b/lib/crypto/curve25519-selftest.c
32106 +@@ -0,0 +1,1321 @@
32107 ++// SPDX-License-Identifier: GPL-2.0 OR MIT
32108 ++/*
32109 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
32110 ++ */
32111 ++
32112 ++#include <crypto/curve25519.h>
32113 ++
32114 ++struct curve25519_test_vector {
32115 ++ u8 private[CURVE25519_KEY_SIZE];
32116 ++ u8 public[CURVE25519_KEY_SIZE];
32117 ++ u8 result[CURVE25519_KEY_SIZE];
32118 ++ bool valid;
32119 ++};
32120 ++static const struct curve25519_test_vector curve25519_test_vectors[] __initconst = {
32121 ++ {
32122 ++ .private = { 0x77, 0x07, 0x6d, 0x0a, 0x73, 0x18, 0xa5, 0x7d,
32123 ++ 0x3c, 0x16, 0xc1, 0x72, 0x51, 0xb2, 0x66, 0x45,
32124 ++ 0xdf, 0x4c, 0x2f, 0x87, 0xeb, 0xc0, 0x99, 0x2a,
32125 ++ 0xb1, 0x77, 0xfb, 0xa5, 0x1d, 0xb9, 0x2c, 0x2a },
32126 ++ .public = { 0xde, 0x9e, 0xdb, 0x7d, 0x7b, 0x7d, 0xc1, 0xb4,
32127 ++ 0xd3, 0x5b, 0x61, 0xc2, 0xec, 0xe4, 0x35, 0x37,
32128 ++ 0x3f, 0x83, 0x43, 0xc8, 0x5b, 0x78, 0x67, 0x4d,
32129 ++ 0xad, 0xfc, 0x7e, 0x14, 0x6f, 0x88, 0x2b, 0x4f },
32130 ++ .result = { 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1,
32131 ++ 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25,
32132 ++ 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33,
32133 ++ 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 },
32134 ++ .valid = true
32135 ++ },
32136 ++ {
32137 ++ .private = { 0x5d, 0xab, 0x08, 0x7e, 0x62, 0x4a, 0x8a, 0x4b,
32138 ++ 0x79, 0xe1, 0x7f, 0x8b, 0x83, 0x80, 0x0e, 0xe6,
32139 ++ 0x6f, 0x3b, 0xb1, 0x29, 0x26, 0x18, 0xb6, 0xfd,
32140 ++ 0x1c, 0x2f, 0x8b, 0x27, 0xff, 0x88, 0xe0, 0xeb },
32141 ++ .public = { 0x85, 0x20, 0xf0, 0x09, 0x89, 0x30, 0xa7, 0x54,
32142 ++ 0x74, 0x8b, 0x7d, 0xdc, 0xb4, 0x3e, 0xf7, 0x5a,
32143 ++ 0x0d, 0xbf, 0x3a, 0x0d, 0x26, 0x38, 0x1a, 0xf4,
32144 ++ 0xeb, 0xa4, 0xa9, 0x8e, 0xaa, 0x9b, 0x4e, 0x6a },
32145 ++ .result = { 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1,
32146 ++ 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25,
32147 ++ 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33,
32148 ++ 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 },
32149 ++ .valid = true
32150 ++ },
32151 ++ {
32152 ++ .private = { 1 },
32153 ++ .public = { 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32154 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32155 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32156 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
32157 ++ .result = { 0x3c, 0x77, 0x77, 0xca, 0xf9, 0x97, 0xb2, 0x64,
32158 ++ 0x41, 0x60, 0x77, 0x66, 0x5b, 0x4e, 0x22, 0x9d,
32159 ++ 0x0b, 0x95, 0x48, 0xdc, 0x0c, 0xd8, 0x19, 0x98,
32160 ++ 0xdd, 0xcd, 0xc5, 0xc8, 0x53, 0x3c, 0x79, 0x7f },
32161 ++ .valid = true
32162 ++ },
32163 ++ {
32164 ++ .private = { 1 },
32165 ++ .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32166 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32167 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32168 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
32169 ++ .result = { 0xb3, 0x2d, 0x13, 0x62, 0xc2, 0x48, 0xd6, 0x2f,
32170 ++ 0xe6, 0x26, 0x19, 0xcf, 0xf0, 0x4d, 0xd4, 0x3d,
32171 ++ 0xb7, 0x3f, 0xfc, 0x1b, 0x63, 0x08, 0xed, 0xe3,
32172 ++ 0x0b, 0x78, 0xd8, 0x73, 0x80, 0xf1, 0xe8, 0x34 },
32173 ++ .valid = true
32174 ++ },
32175 ++ {
32176 ++ .private = { 0xa5, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d,
32177 ++ 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd,
32178 ++ 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18,
32179 ++ 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0xc4 },
32180 ++ .public = { 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb,
32181 ++ 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c,
32182 ++ 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b,
32183 ++ 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c },
32184 ++ .result = { 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90,
32185 ++ 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f,
32186 ++ 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7,
32187 ++ 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 },
32188 ++ .valid = true
32189 ++ },
32190 ++ {
32191 ++ .private = { 1, 2, 3, 4 },
32192 ++ .public = { 0 },
32193 ++ .result = { 0 },
32194 ++ .valid = false
32195 ++ },
32196 ++ {
32197 ++ .private = { 2, 4, 6, 8 },
32198 ++ .public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae,
32199 ++ 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a,
32200 ++ 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd,
32201 ++ 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8 },
32202 ++ .result = { 0 },
32203 ++ .valid = false
32204 ++ },
32205 ++ {
32206 ++ .private = { 0xff, 0xff, 0xff, 0xff, 0x0a, 0xff, 0xff, 0xff,
32207 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32208 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32209 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
32210 ++ .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32211 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32212 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32213 ++ 0xff, 0xff, 0xff, 0xff, 0x0a, 0x00, 0xfb, 0x9f },
32214 ++ .result = { 0x77, 0x52, 0xb6, 0x18, 0xc1, 0x2d, 0x48, 0xd2,
32215 ++ 0xc6, 0x93, 0x46, 0x83, 0x81, 0x7c, 0xc6, 0x57,
32216 ++ 0xf3, 0x31, 0x03, 0x19, 0x49, 0x48, 0x20, 0x05,
32217 ++ 0x42, 0x2b, 0x4e, 0xae, 0x8d, 0x1d, 0x43, 0x23 },
32218 ++ .valid = true
32219 ++ },
32220 ++ {
32221 ++ .private = { 0x8e, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32222 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32223 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32224 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
32225 ++ .public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32226 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32227 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32228 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8e, 0x06 },
32229 ++ .result = { 0x5a, 0xdf, 0xaa, 0x25, 0x86, 0x8e, 0x32, 0x3d,
32230 ++ 0xae, 0x49, 0x62, 0xc1, 0x01, 0x5c, 0xb3, 0x12,
32231 ++ 0xe1, 0xc5, 0xc7, 0x9e, 0x95, 0x3f, 0x03, 0x99,
32232 ++ 0xb0, 0xba, 0x16, 0x22, 0xf3, 0xb6, 0xf7, 0x0c },
32233 ++ .valid = true
32234 ++ },
32235 ++ /* wycheproof - normal case */
32236 ++ {
32237 ++ .private = { 0x48, 0x52, 0x83, 0x4d, 0x9d, 0x6b, 0x77, 0xda,
32238 ++ 0xde, 0xab, 0xaa, 0xf2, 0xe1, 0x1d, 0xca, 0x66,
32239 ++ 0xd1, 0x9f, 0xe7, 0x49, 0x93, 0xa7, 0xbe, 0xc3,
32240 ++ 0x6c, 0x6e, 0x16, 0xa0, 0x98, 0x3f, 0xea, 0xba },
32241 ++ .public = { 0x9c, 0x64, 0x7d, 0x9a, 0xe5, 0x89, 0xb9, 0xf5,
32242 ++ 0x8f, 0xdc, 0x3c, 0xa4, 0x94, 0x7e, 0xfb, 0xc9,
32243 ++ 0x15, 0xc4, 0xb2, 0xe0, 0x8e, 0x74, 0x4a, 0x0e,
32244 ++ 0xdf, 0x46, 0x9d, 0xac, 0x59, 0xc8, 0xf8, 0x5a },
32245 ++ .result = { 0x87, 0xb7, 0xf2, 0x12, 0xb6, 0x27, 0xf7, 0xa5,
32246 ++ 0x4c, 0xa5, 0xe0, 0xbc, 0xda, 0xdd, 0xd5, 0x38,
32247 ++ 0x9d, 0x9d, 0xe6, 0x15, 0x6c, 0xdb, 0xcf, 0x8e,
32248 ++ 0xbe, 0x14, 0xff, 0xbc, 0xfb, 0x43, 0x65, 0x51 },
32249 ++ .valid = true
32250 ++ },
32251 ++ /* wycheproof - public key on twist */
32252 ++ {
32253 ++ .private = { 0x58, 0x8c, 0x06, 0x1a, 0x50, 0x80, 0x4a, 0xc4,
32254 ++ 0x88, 0xad, 0x77, 0x4a, 0xc7, 0x16, 0xc3, 0xf5,
32255 ++ 0xba, 0x71, 0x4b, 0x27, 0x12, 0xe0, 0x48, 0x49,
32256 ++ 0x13, 0x79, 0xa5, 0x00, 0x21, 0x19, 0x98, 0xa8 },
32257 ++ .public = { 0x63, 0xaa, 0x40, 0xc6, 0xe3, 0x83, 0x46, 0xc5,
32258 ++ 0xca, 0xf2, 0x3a, 0x6d, 0xf0, 0xa5, 0xe6, 0xc8,
32259 ++ 0x08, 0x89, 0xa0, 0x86, 0x47, 0xe5, 0x51, 0xb3,
32260 ++ 0x56, 0x34, 0x49, 0xbe, 0xfc, 0xfc, 0x97, 0x33 },
32261 ++ .result = { 0xb1, 0xa7, 0x07, 0x51, 0x94, 0x95, 0xff, 0xff,
32262 ++ 0xb2, 0x98, 0xff, 0x94, 0x17, 0x16, 0xb0, 0x6d,
32263 ++ 0xfa, 0xb8, 0x7c, 0xf8, 0xd9, 0x11, 0x23, 0xfe,
32264 ++ 0x2b, 0xe9, 0xa2, 0x33, 0xdd, 0xa2, 0x22, 0x12 },
32265 ++ .valid = true
32266 ++ },
32267 ++ /* wycheproof - public key on twist */
32268 ++ {
32269 ++ .private = { 0xb0, 0x5b, 0xfd, 0x32, 0xe5, 0x53, 0x25, 0xd9,
32270 ++ 0xfd, 0x64, 0x8c, 0xb3, 0x02, 0x84, 0x80, 0x39,
32271 ++ 0x00, 0x0b, 0x39, 0x0e, 0x44, 0xd5, 0x21, 0xe5,
32272 ++ 0x8a, 0xab, 0x3b, 0x29, 0xa6, 0x96, 0x0b, 0xa8 },
32273 ++ .public = { 0x0f, 0x83, 0xc3, 0x6f, 0xde, 0xd9, 0xd3, 0x2f,
32274 ++ 0xad, 0xf4, 0xef, 0xa3, 0xae, 0x93, 0xa9, 0x0b,
32275 ++ 0xb5, 0xcf, 0xa6, 0x68, 0x93, 0xbc, 0x41, 0x2c,
32276 ++ 0x43, 0xfa, 0x72, 0x87, 0xdb, 0xb9, 0x97, 0x79 },
32277 ++ .result = { 0x67, 0xdd, 0x4a, 0x6e, 0x16, 0x55, 0x33, 0x53,
32278 ++ 0x4c, 0x0e, 0x3f, 0x17, 0x2e, 0x4a, 0xb8, 0x57,
32279 ++ 0x6b, 0xca, 0x92, 0x3a, 0x5f, 0x07, 0xb2, 0xc0,
32280 ++ 0x69, 0xb4, 0xc3, 0x10, 0xff, 0x2e, 0x93, 0x5b },
32281 ++ .valid = true
32282 ++ },
32283 ++ /* wycheproof - public key on twist */
32284 ++ {
32285 ++ .private = { 0x70, 0xe3, 0x4b, 0xcb, 0xe1, 0xf4, 0x7f, 0xbc,
32286 ++ 0x0f, 0xdd, 0xfd, 0x7c, 0x1e, 0x1a, 0xa5, 0x3d,
32287 ++ 0x57, 0xbf, 0xe0, 0xf6, 0x6d, 0x24, 0x30, 0x67,
32288 ++ 0xb4, 0x24, 0xbb, 0x62, 0x10, 0xbe, 0xd1, 0x9c },
32289 ++ .public = { 0x0b, 0x82, 0x11, 0xa2, 0xb6, 0x04, 0x90, 0x97,
32290 ++ 0xf6, 0x87, 0x1c, 0x6c, 0x05, 0x2d, 0x3c, 0x5f,
32291 ++ 0xc1, 0xba, 0x17, 0xda, 0x9e, 0x32, 0xae, 0x45,
32292 ++ 0x84, 0x03, 0xb0, 0x5b, 0xb2, 0x83, 0x09, 0x2a },
32293 ++ .result = { 0x4a, 0x06, 0x38, 0xcf, 0xaa, 0x9e, 0xf1, 0x93,
32294 ++ 0x3b, 0x47, 0xf8, 0x93, 0x92, 0x96, 0xa6, 0xb2,
32295 ++ 0x5b, 0xe5, 0x41, 0xef, 0x7f, 0x70, 0xe8, 0x44,
32296 ++ 0xc0, 0xbc, 0xc0, 0x0b, 0x13, 0x4d, 0xe6, 0x4a },
32297 ++ .valid = true
32298 ++ },
32299 ++ /* wycheproof - public key on twist */
32300 ++ {
32301 ++ .private = { 0x68, 0xc1, 0xf3, 0xa6, 0x53, 0xa4, 0xcd, 0xb1,
32302 ++ 0xd3, 0x7b, 0xba, 0x94, 0x73, 0x8f, 0x8b, 0x95,
32303 ++ 0x7a, 0x57, 0xbe, 0xb2, 0x4d, 0x64, 0x6e, 0x99,
32304 ++ 0x4d, 0xc2, 0x9a, 0x27, 0x6a, 0xad, 0x45, 0x8d },
32305 ++ .public = { 0x34, 0x3a, 0xc2, 0x0a, 0x3b, 0x9c, 0x6a, 0x27,
32306 ++ 0xb1, 0x00, 0x81, 0x76, 0x50, 0x9a, 0xd3, 0x07,
32307 ++ 0x35, 0x85, 0x6e, 0xc1, 0xc8, 0xd8, 0xfc, 0xae,
32308 ++ 0x13, 0x91, 0x2d, 0x08, 0xd1, 0x52, 0xf4, 0x6c },
32309 ++ .result = { 0x39, 0x94, 0x91, 0xfc, 0xe8, 0xdf, 0xab, 0x73,
32310 ++ 0xb4, 0xf9, 0xf6, 0x11, 0xde, 0x8e, 0xa0, 0xb2,
32311 ++ 0x7b, 0x28, 0xf8, 0x59, 0x94, 0x25, 0x0b, 0x0f,
32312 ++ 0x47, 0x5d, 0x58, 0x5d, 0x04, 0x2a, 0xc2, 0x07 },
32313 ++ .valid = true
32314 ++ },
32315 ++ /* wycheproof - public key on twist */
32316 ++ {
32317 ++ .private = { 0xd8, 0x77, 0xb2, 0x6d, 0x06, 0xdf, 0xf9, 0xd9,
32318 ++ 0xf7, 0xfd, 0x4c, 0x5b, 0x37, 0x69, 0xf8, 0xcd,
32319 ++ 0xd5, 0xb3, 0x05, 0x16, 0xa5, 0xab, 0x80, 0x6b,
32320 ++ 0xe3, 0x24, 0xff, 0x3e, 0xb6, 0x9e, 0xa0, 0xb2 },
32321 ++ .public = { 0xfa, 0x69, 0x5f, 0xc7, 0xbe, 0x8d, 0x1b, 0xe5,
32322 ++ 0xbf, 0x70, 0x48, 0x98, 0xf3, 0x88, 0xc4, 0x52,
32323 ++ 0xba, 0xfd, 0xd3, 0xb8, 0xea, 0xe8, 0x05, 0xf8,
32324 ++ 0x68, 0x1a, 0x8d, 0x15, 0xc2, 0xd4, 0xe1, 0x42 },
32325 ++ .result = { 0x2c, 0x4f, 0xe1, 0x1d, 0x49, 0x0a, 0x53, 0x86,
32326 ++ 0x17, 0x76, 0xb1, 0x3b, 0x43, 0x54, 0xab, 0xd4,
32327 ++ 0xcf, 0x5a, 0x97, 0x69, 0x9d, 0xb6, 0xe6, 0xc6,
32328 ++ 0x8c, 0x16, 0x26, 0xd0, 0x76, 0x62, 0xf7, 0x58 },
32329 ++ .valid = true
32330 ++ },
32331 ++ /* wycheproof - public key = 0 */
32332 ++ {
32333 ++ .private = { 0x20, 0x74, 0x94, 0x03, 0x8f, 0x2b, 0xb8, 0x11,
32334 ++ 0xd4, 0x78, 0x05, 0xbc, 0xdf, 0x04, 0xa2, 0xac,
32335 ++ 0x58, 0x5a, 0xda, 0x7f, 0x2f, 0x23, 0x38, 0x9b,
32336 ++ 0xfd, 0x46, 0x58, 0xf9, 0xdd, 0xd4, 0xde, 0xbc },
32337 ++ .public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32338 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32339 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32340 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
32341 ++ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32342 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32343 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32344 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
32345 ++ .valid = false
32346 ++ },
32347 ++ /* wycheproof - public key = 1 */
32348 ++ {
32349 ++ .private = { 0x20, 0x2e, 0x89, 0x72, 0xb6, 0x1c, 0x7e, 0x61,
32350 ++ 0x93, 0x0e, 0xb9, 0x45, 0x0b, 0x50, 0x70, 0xea,
32351 ++ 0xe1, 0xc6, 0x70, 0x47, 0x56, 0x85, 0x54, 0x1f,
32352 ++ 0x04, 0x76, 0x21, 0x7e, 0x48, 0x18, 0xcf, 0xab },
32353 ++ .public = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32354 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32355 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32356 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
32357 ++ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32358 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32359 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32360 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
32361 ++ .valid = false
32362 ++ },
32363 ++ /* wycheproof - edge case on twist */
32364 ++ {
32365 ++ .private = { 0x38, 0xdd, 0xe9, 0xf3, 0xe7, 0xb7, 0x99, 0x04,
32366 ++ 0x5f, 0x9a, 0xc3, 0x79, 0x3d, 0x4a, 0x92, 0x77,
32367 ++ 0xda, 0xde, 0xad, 0xc4, 0x1b, 0xec, 0x02, 0x90,
32368 ++ 0xf8, 0x1f, 0x74, 0x4f, 0x73, 0x77, 0x5f, 0x84 },
32369 ++ .public = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32370 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32371 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32372 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
32373 ++ .result = { 0x9a, 0x2c, 0xfe, 0x84, 0xff, 0x9c, 0x4a, 0x97,
32374 ++ 0x39, 0x62, 0x5c, 0xae, 0x4a, 0x3b, 0x82, 0xa9,
32375 ++ 0x06, 0x87, 0x7a, 0x44, 0x19, 0x46, 0xf8, 0xd7,
32376 ++ 0xb3, 0xd7, 0x95, 0xfe, 0x8f, 0x5d, 0x16, 0x39 },
32377 ++ .valid = true
32378 ++ },
32379 ++ /* wycheproof - edge case on twist */
32380 ++ {
32381 ++ .private = { 0x98, 0x57, 0xa9, 0x14, 0xe3, 0xc2, 0x90, 0x36,
32382 ++ 0xfd, 0x9a, 0x44, 0x2b, 0xa5, 0x26, 0xb5, 0xcd,
32383 ++ 0xcd, 0xf2, 0x82, 0x16, 0x15, 0x3e, 0x63, 0x6c,
32384 ++ 0x10, 0x67, 0x7a, 0xca, 0xb6, 0xbd, 0x6a, 0xa5 },
32385 ++ .public = { 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32386 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32387 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32388 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
32389 ++ .result = { 0x4d, 0xa4, 0xe0, 0xaa, 0x07, 0x2c, 0x23, 0x2e,
32390 ++ 0xe2, 0xf0, 0xfa, 0x4e, 0x51, 0x9a, 0xe5, 0x0b,
32391 ++ 0x52, 0xc1, 0xed, 0xd0, 0x8a, 0x53, 0x4d, 0x4e,
32392 ++ 0xf3, 0x46, 0xc2, 0xe1, 0x06, 0xd2, 0x1d, 0x60 },
32393 ++ .valid = true
32394 ++ },
32395 ++ /* wycheproof - edge case on twist */
32396 ++ {
32397 ++ .private = { 0x48, 0xe2, 0x13, 0x0d, 0x72, 0x33, 0x05, 0xed,
32398 ++ 0x05, 0xe6, 0xe5, 0x89, 0x4d, 0x39, 0x8a, 0x5e,
32399 ++ 0x33, 0x36, 0x7a, 0x8c, 0x6a, 0xac, 0x8f, 0xcd,
32400 ++ 0xf0, 0xa8, 0x8e, 0x4b, 0x42, 0x82, 0x0d, 0xb7 },
32401 ++ .public = { 0xff, 0xff, 0xff, 0x03, 0x00, 0x00, 0xf8, 0xff,
32402 ++ 0xff, 0x1f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff,
32403 ++ 0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, 0x07, 0x00,
32404 ++ 0x00, 0xf0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00 },
32405 ++ .result = { 0x9e, 0xd1, 0x0c, 0x53, 0x74, 0x7f, 0x64, 0x7f,
32406 ++ 0x82, 0xf4, 0x51, 0x25, 0xd3, 0xde, 0x15, 0xa1,
32407 ++ 0xe6, 0xb8, 0x24, 0x49, 0x6a, 0xb4, 0x04, 0x10,
32408 ++ 0xff, 0xcc, 0x3c, 0xfe, 0x95, 0x76, 0x0f, 0x3b },
32409 ++ .valid = true
32410 ++ },
32411 ++ /* wycheproof - edge case on twist */
32412 ++ {
32413 ++ .private = { 0x28, 0xf4, 0x10, 0x11, 0x69, 0x18, 0x51, 0xb3,
32414 ++ 0xa6, 0x2b, 0x64, 0x15, 0x53, 0xb3, 0x0d, 0x0d,
32415 ++ 0xfd, 0xdc, 0xb8, 0xff, 0xfc, 0xf5, 0x37, 0x00,
32416 ++ 0xa7, 0xbe, 0x2f, 0x6a, 0x87, 0x2e, 0x9f, 0xb0 },
32417 ++ .public = { 0x00, 0x00, 0x00, 0xfc, 0xff, 0xff, 0x07, 0x00,
32418 ++ 0x00, 0xe0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00,
32419 ++ 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0xf8, 0xff,
32420 ++ 0xff, 0x0f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0x7f },
32421 ++ .result = { 0xcf, 0x72, 0xb4, 0xaa, 0x6a, 0xa1, 0xc9, 0xf8,
32422 ++ 0x94, 0xf4, 0x16, 0x5b, 0x86, 0x10, 0x9a, 0xa4,
32423 ++ 0x68, 0x51, 0x76, 0x48, 0xe1, 0xf0, 0xcc, 0x70,
32424 ++ 0xe1, 0xab, 0x08, 0x46, 0x01, 0x76, 0x50, 0x6b },
32425 ++ .valid = true
32426 ++ },
32427 ++ /* wycheproof - edge case on twist */
32428 ++ {
32429 ++ .private = { 0x18, 0xa9, 0x3b, 0x64, 0x99, 0xb9, 0xf6, 0xb3,
32430 ++ 0x22, 0x5c, 0xa0, 0x2f, 0xef, 0x41, 0x0e, 0x0a,
32431 ++ 0xde, 0xc2, 0x35, 0x32, 0x32, 0x1d, 0x2d, 0x8e,
32432 ++ 0xf1, 0xa6, 0xd6, 0x02, 0xa8, 0xc6, 0x5b, 0x83 },
32433 ++ .public = { 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
32434 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
32435 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
32436 ++ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x7f },
32437 ++ .result = { 0x5d, 0x50, 0xb6, 0x28, 0x36, 0xbb, 0x69, 0x57,
32438 ++ 0x94, 0x10, 0x38, 0x6c, 0xf7, 0xbb, 0x81, 0x1c,
32439 ++ 0x14, 0xbf, 0x85, 0xb1, 0xc7, 0xb1, 0x7e, 0x59,
32440 ++ 0x24, 0xc7, 0xff, 0xea, 0x91, 0xef, 0x9e, 0x12 },
32441 ++ .valid = true
32442 ++ },
32443 ++ /* wycheproof - edge case on twist */
32444 ++ {
32445 ++ .private = { 0xc0, 0x1d, 0x13, 0x05, 0xa1, 0x33, 0x8a, 0x1f,
32446 ++ 0xca, 0xc2, 0xba, 0x7e, 0x2e, 0x03, 0x2b, 0x42,
32447 ++ 0x7e, 0x0b, 0x04, 0x90, 0x31, 0x65, 0xac, 0xa9,
32448 ++ 0x57, 0xd8, 0xd0, 0x55, 0x3d, 0x87, 0x17, 0xb0 },
32449 ++ .public = { 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32450 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32451 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32452 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
32453 ++ .result = { 0x19, 0x23, 0x0e, 0xb1, 0x48, 0xd5, 0xd6, 0x7c,
32454 ++ 0x3c, 0x22, 0xab, 0x1d, 0xae, 0xff, 0x80, 0xa5,
32455 ++ 0x7e, 0xae, 0x42, 0x65, 0xce, 0x28, 0x72, 0x65,
32456 ++ 0x7b, 0x2c, 0x80, 0x99, 0xfc, 0x69, 0x8e, 0x50 },
32457 ++ .valid = true
32458 ++ },
32459 ++ /* wycheproof - edge case for public key */
32460 ++ {
32461 ++ .private = { 0x38, 0x6f, 0x7f, 0x16, 0xc5, 0x07, 0x31, 0xd6,
32462 ++ 0x4f, 0x82, 0xe6, 0xa1, 0x70, 0xb1, 0x42, 0xa4,
32463 ++ 0xe3, 0x4f, 0x31, 0xfd, 0x77, 0x68, 0xfc, 0xb8,
32464 ++ 0x90, 0x29, 0x25, 0xe7, 0xd1, 0xe2, 0x1a, 0xbe },
32465 ++ .public = { 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32466 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32467 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32468 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
32469 ++ .result = { 0x0f, 0xca, 0xb5, 0xd8, 0x42, 0xa0, 0x78, 0xd7,
32470 ++ 0xa7, 0x1f, 0xc5, 0x9b, 0x57, 0xbf, 0xb4, 0xca,
32471 ++ 0x0b, 0xe6, 0x87, 0x3b, 0x49, 0xdc, 0xdb, 0x9f,
32472 ++ 0x44, 0xe1, 0x4a, 0xe8, 0xfb, 0xdf, 0xa5, 0x42 },
32473 ++ .valid = true
32474 ++ },
32475 ++ /* wycheproof - edge case for public key */
32476 ++ {
32477 ++ .private = { 0xe0, 0x23, 0xa2, 0x89, 0xbd, 0x5e, 0x90, 0xfa,
32478 ++ 0x28, 0x04, 0xdd, 0xc0, 0x19, 0xa0, 0x5e, 0xf3,
32479 ++ 0xe7, 0x9d, 0x43, 0x4b, 0xb6, 0xea, 0x2f, 0x52,
32480 ++ 0x2e, 0xcb, 0x64, 0x3a, 0x75, 0x29, 0x6e, 0x95 },
32481 ++ .public = { 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
32482 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
32483 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
32484 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 },
32485 ++ .result = { 0x54, 0xce, 0x8f, 0x22, 0x75, 0xc0, 0x77, 0xe3,
32486 ++ 0xb1, 0x30, 0x6a, 0x39, 0x39, 0xc5, 0xe0, 0x3e,
32487 ++ 0xef, 0x6b, 0xbb, 0x88, 0x06, 0x05, 0x44, 0x75,
32488 ++ 0x8d, 0x9f, 0xef, 0x59, 0xb0, 0xbc, 0x3e, 0x4f },
32489 ++ .valid = true
32490 ++ },
32491 ++ /* wycheproof - edge case for public key */
32492 ++ {
32493 ++ .private = { 0x68, 0xf0, 0x10, 0xd6, 0x2e, 0xe8, 0xd9, 0x26,
32494 ++ 0x05, 0x3a, 0x36, 0x1c, 0x3a, 0x75, 0xc6, 0xea,
32495 ++ 0x4e, 0xbd, 0xc8, 0x60, 0x6a, 0xb2, 0x85, 0x00,
32496 ++ 0x3a, 0x6f, 0x8f, 0x40, 0x76, 0xb0, 0x1e, 0x83 },
32497 ++ .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32498 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32499 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32500 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 },
32501 ++ .result = { 0xf1, 0x36, 0x77, 0x5c, 0x5b, 0xeb, 0x0a, 0xf8,
32502 ++ 0x11, 0x0a, 0xf1, 0x0b, 0x20, 0x37, 0x23, 0x32,
32503 ++ 0x04, 0x3c, 0xab, 0x75, 0x24, 0x19, 0x67, 0x87,
32504 ++ 0x75, 0xa2, 0x23, 0xdf, 0x57, 0xc9, 0xd3, 0x0d },
32505 ++ .valid = true
32506 ++ },
32507 ++ /* wycheproof - edge case for public key */
32508 ++ {
32509 ++ .private = { 0x58, 0xeb, 0xcb, 0x35, 0xb0, 0xf8, 0x84, 0x5c,
32510 ++ 0xaf, 0x1e, 0xc6, 0x30, 0xf9, 0x65, 0x76, 0xb6,
32511 ++ 0x2c, 0x4b, 0x7b, 0x6c, 0x36, 0xb2, 0x9d, 0xeb,
32512 ++ 0x2c, 0xb0, 0x08, 0x46, 0x51, 0x75, 0x5c, 0x96 },
32513 ++ .public = { 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xfb, 0xff,
32514 ++ 0xff, 0xdf, 0xff, 0xff, 0xdf, 0xff, 0xff, 0xff,
32515 ++ 0xfe, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xf7, 0xff,
32516 ++ 0xff, 0xf7, 0xff, 0xff, 0xbf, 0xff, 0xff, 0x3f },
32517 ++ .result = { 0xbf, 0x9a, 0xff, 0xd0, 0x6b, 0x84, 0x40, 0x85,
32518 ++ 0x58, 0x64, 0x60, 0x96, 0x2e, 0xf2, 0x14, 0x6f,
32519 ++ 0xf3, 0xd4, 0x53, 0x3d, 0x94, 0x44, 0xaa, 0xb0,
32520 ++ 0x06, 0xeb, 0x88, 0xcc, 0x30, 0x54, 0x40, 0x7d },
32521 ++ .valid = true
32522 ++ },
32523 ++ /* wycheproof - edge case for public key */
32524 ++ {
32525 ++ .private = { 0x18, 0x8c, 0x4b, 0xc5, 0xb9, 0xc4, 0x4b, 0x38,
32526 ++ 0xbb, 0x65, 0x8b, 0x9b, 0x2a, 0xe8, 0x2d, 0x5b,
32527 ++ 0x01, 0x01, 0x5e, 0x09, 0x31, 0x84, 0xb1, 0x7c,
32528 ++ 0xb7, 0x86, 0x35, 0x03, 0xa7, 0x83, 0xe1, 0xbb },
32529 ++ .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32530 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32531 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32532 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
32533 ++ .result = { 0xd4, 0x80, 0xde, 0x04, 0xf6, 0x99, 0xcb, 0x3b,
32534 ++ 0xe0, 0x68, 0x4a, 0x9c, 0xc2, 0xe3, 0x12, 0x81,
32535 ++ 0xea, 0x0b, 0xc5, 0xa9, 0xdc, 0xc1, 0x57, 0xd3,
32536 ++ 0xd2, 0x01, 0x58, 0xd4, 0x6c, 0xa5, 0x24, 0x6d },
32537 ++ .valid = true
32538 ++ },
32539 ++ /* wycheproof - edge case for public key */
32540 ++ {
32541 ++ .private = { 0xe0, 0x6c, 0x11, 0xbb, 0x2e, 0x13, 0xce, 0x3d,
32542 ++ 0xc7, 0x67, 0x3f, 0x67, 0xf5, 0x48, 0x22, 0x42,
32543 ++ 0x90, 0x94, 0x23, 0xa9, 0xae, 0x95, 0xee, 0x98,
32544 ++ 0x6a, 0x98, 0x8d, 0x98, 0xfa, 0xee, 0x23, 0xa2 },
32545 ++ .public = { 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f,
32546 ++ 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f,
32547 ++ 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f,
32548 ++ 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f },
32549 ++ .result = { 0x4c, 0x44, 0x01, 0xcc, 0xe6, 0xb5, 0x1e, 0x4c,
32550 ++ 0xb1, 0x8f, 0x27, 0x90, 0x24, 0x6c, 0x9b, 0xf9,
32551 ++ 0x14, 0xdb, 0x66, 0x77, 0x50, 0xa1, 0xcb, 0x89,
32552 ++ 0x06, 0x90, 0x92, 0xaf, 0x07, 0x29, 0x22, 0x76 },
32553 ++ .valid = true
32554 ++ },
32555 ++ /* wycheproof - edge case for public key */
32556 ++ {
32557 ++ .private = { 0xc0, 0x65, 0x8c, 0x46, 0xdd, 0xe1, 0x81, 0x29,
32558 ++ 0x29, 0x38, 0x77, 0x53, 0x5b, 0x11, 0x62, 0xb6,
32559 ++ 0xf9, 0xf5, 0x41, 0x4a, 0x23, 0xcf, 0x4d, 0x2c,
32560 ++ 0xbc, 0x14, 0x0a, 0x4d, 0x99, 0xda, 0x2b, 0x8f },
32561 ++ .public = { 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32562 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32563 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32564 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
32565 ++ .result = { 0x57, 0x8b, 0xa8, 0xcc, 0x2d, 0xbd, 0xc5, 0x75,
32566 ++ 0xaf, 0xcf, 0x9d, 0xf2, 0xb3, 0xee, 0x61, 0x89,
32567 ++ 0xf5, 0x33, 0x7d, 0x68, 0x54, 0xc7, 0x9b, 0x4c,
32568 ++ 0xe1, 0x65, 0xea, 0x12, 0x29, 0x3b, 0x3a, 0x0f },
32569 ++ .valid = true
32570 ++ },
32571 ++ /* wycheproof - public key with low order */
32572 ++ {
32573 ++ .private = { 0x10, 0x25, 0x5c, 0x92, 0x30, 0xa9, 0x7a, 0x30,
32574 ++ 0xa4, 0x58, 0xca, 0x28, 0x4a, 0x62, 0x96, 0x69,
32575 ++ 0x29, 0x3a, 0x31, 0x89, 0x0c, 0xda, 0x9d, 0x14,
32576 ++ 0x7f, 0xeb, 0xc7, 0xd1, 0xe2, 0x2d, 0x6b, 0xb1 },
32577 ++ .public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae,
32578 ++ 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a,
32579 ++ 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd,
32580 ++ 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x00 },
32581 ++ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32582 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32583 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32584 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
32585 ++ .valid = false
32586 ++ },
32587 ++ /* wycheproof - public key with low order */
32588 ++ {
32589 ++ .private = { 0x78, 0xf1, 0xe8, 0xed, 0xf1, 0x44, 0x81, 0xb3,
32590 ++ 0x89, 0x44, 0x8d, 0xac, 0x8f, 0x59, 0xc7, 0x0b,
32591 ++ 0x03, 0x8e, 0x7c, 0xf9, 0x2e, 0xf2, 0xc7, 0xef,
32592 ++ 0xf5, 0x7a, 0x72, 0x46, 0x6e, 0x11, 0x52, 0x96 },
32593 ++ .public = { 0x5f, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24,
32594 ++ 0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b,
32595 ++ 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86,
32596 ++ 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0x57 },
32597 ++ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32598 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32599 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32600 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
32601 ++ .valid = false
32602 ++ },
32603 ++ /* wycheproof - public key with low order */
32604 ++ {
32605 ++ .private = { 0xa0, 0xa0, 0x5a, 0x3e, 0x8f, 0x9f, 0x44, 0x20,
32606 ++ 0x4d, 0x5f, 0x80, 0x59, 0xa9, 0x4a, 0xc7, 0xdf,
32607 ++ 0xc3, 0x9a, 0x49, 0xac, 0x01, 0x6d, 0xd7, 0x43,
32608 ++ 0xdb, 0xfa, 0x43, 0xc5, 0xd6, 0x71, 0xfd, 0x88 },
32609 ++ .public = { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32610 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32611 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32612 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
32613 ++ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32614 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32615 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32616 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
32617 ++ .valid = false
32618 ++ },
32619 ++ /* wycheproof - public key with low order */
32620 ++ {
32621 ++ .private = { 0xd0, 0xdb, 0xb3, 0xed, 0x19, 0x06, 0x66, 0x3f,
32622 ++ 0x15, 0x42, 0x0a, 0xf3, 0x1f, 0x4e, 0xaf, 0x65,
32623 ++ 0x09, 0xd9, 0xa9, 0x94, 0x97, 0x23, 0x50, 0x06,
32624 ++ 0x05, 0xad, 0x7c, 0x1c, 0x6e, 0x74, 0x50, 0xa9 },
32625 ++ .public = { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32626 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32627 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32628 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
32629 ++ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32630 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32631 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32632 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
32633 ++ .valid = false
32634 ++ },
32635 ++ /* wycheproof - public key with low order */
32636 ++ {
32637 ++ .private = { 0xc0, 0xb1, 0xd0, 0xeb, 0x22, 0xb2, 0x44, 0xfe,
32638 ++ 0x32, 0x91, 0x14, 0x00, 0x72, 0xcd, 0xd9, 0xd9,
32639 ++ 0x89, 0xb5, 0xf0, 0xec, 0xd9, 0x6c, 0x10, 0x0f,
32640 ++ 0xeb, 0x5b, 0xca, 0x24, 0x1c, 0x1d, 0x9f, 0x8f },
32641 ++ .public = { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32642 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32643 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32644 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
32645 ++ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32646 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32647 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32648 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
32649 ++ .valid = false
32650 ++ },
32651 ++ /* wycheproof - public key with low order */
32652 ++ {
32653 ++ .private = { 0x48, 0x0b, 0xf4, 0x5f, 0x59, 0x49, 0x42, 0xa8,
32654 ++ 0xbc, 0x0f, 0x33, 0x53, 0xc6, 0xe8, 0xb8, 0x85,
32655 ++ 0x3d, 0x77, 0xf3, 0x51, 0xf1, 0xc2, 0xca, 0x6c,
32656 ++ 0x2d, 0x1a, 0xbf, 0x8a, 0x00, 0xb4, 0x22, 0x9c },
32657 ++ .public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32658 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32659 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32660 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 },
32661 ++ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32662 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32663 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32664 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
32665 ++ .valid = false
32666 ++ },
32667 ++ /* wycheproof - public key with low order */
32668 ++ {
32669 ++ .private = { 0x30, 0xf9, 0x93, 0xfc, 0xf8, 0x51, 0x4f, 0xc8,
32670 ++ 0x9b, 0xd8, 0xdb, 0x14, 0xcd, 0x43, 0xba, 0x0d,
32671 ++ 0x4b, 0x25, 0x30, 0xe7, 0x3c, 0x42, 0x76, 0xa0,
32672 ++ 0x5e, 0x1b, 0x14, 0x5d, 0x42, 0x0c, 0xed, 0xb4 },
32673 ++ .public = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32674 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32675 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32676 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 },
32677 ++ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32678 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32679 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32680 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
32681 ++ .valid = false
32682 ++ },
32683 ++ /* wycheproof - public key with low order */
32684 ++ {
32685 ++ .private = { 0xc0, 0x49, 0x74, 0xb7, 0x58, 0x38, 0x0e, 0x2a,
32686 ++ 0x5b, 0x5d, 0xf6, 0xeb, 0x09, 0xbb, 0x2f, 0x6b,
32687 ++ 0x34, 0x34, 0xf9, 0x82, 0x72, 0x2a, 0x8e, 0x67,
32688 ++ 0x6d, 0x3d, 0xa2, 0x51, 0xd1, 0xb3, 0xde, 0x83 },
32689 ++ .public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae,
32690 ++ 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a,
32691 ++ 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd,
32692 ++ 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x80 },
32693 ++ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32694 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32695 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32696 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
32697 ++ .valid = false
32698 ++ },
32699 ++ /* wycheproof - public key with low order */
32700 ++ {
32701 ++ .private = { 0x50, 0x2a, 0x31, 0x37, 0x3d, 0xb3, 0x24, 0x46,
32702 ++ 0x84, 0x2f, 0xe5, 0xad, 0xd3, 0xe0, 0x24, 0x02,
32703 ++ 0x2e, 0xa5, 0x4f, 0x27, 0x41, 0x82, 0xaf, 0xc3,
32704 ++ 0xd9, 0xf1, 0xbb, 0x3d, 0x39, 0x53, 0x4e, 0xb5 },
32705 ++ .public = { 0x5f, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24,
32706 ++ 0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b,
32707 ++ 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86,
32708 ++ 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0xd7 },
32709 ++ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32710 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32711 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32712 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
32713 ++ .valid = false
32714 ++ },
32715 ++ /* wycheproof - public key with low order */
32716 ++ {
32717 ++ .private = { 0x90, 0xfa, 0x64, 0x17, 0xb0, 0xe3, 0x70, 0x30,
32718 ++ 0xfd, 0x6e, 0x43, 0xef, 0xf2, 0xab, 0xae, 0xf1,
32719 ++ 0x4c, 0x67, 0x93, 0x11, 0x7a, 0x03, 0x9c, 0xf6,
32720 ++ 0x21, 0x31, 0x8b, 0xa9, 0x0f, 0x4e, 0x98, 0xbe },
32721 ++ .public = { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32722 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32723 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32724 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
32725 ++ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32726 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32727 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32728 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
32729 ++ .valid = false
32730 ++ },
32731 ++ /* wycheproof - public key with low order */
32732 ++ {
32733 ++ .private = { 0x78, 0xad, 0x3f, 0x26, 0x02, 0x7f, 0x1c, 0x9f,
32734 ++ 0xdd, 0x97, 0x5a, 0x16, 0x13, 0xb9, 0x47, 0x77,
32735 ++ 0x9b, 0xad, 0x2c, 0xf2, 0xb7, 0x41, 0xad, 0xe0,
32736 ++ 0x18, 0x40, 0x88, 0x5a, 0x30, 0xbb, 0x97, 0x9c },
32737 ++ .public = { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32738 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32739 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32740 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
32741 ++ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32742 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32743 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32744 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
32745 ++ .valid = false
32746 ++ },
32747 ++ /* wycheproof - public key with low order */
32748 ++ {
32749 ++ .private = { 0x98, 0xe2, 0x3d, 0xe7, 0xb1, 0xe0, 0x92, 0x6e,
32750 ++ 0xd9, 0xc8, 0x7e, 0x7b, 0x14, 0xba, 0xf5, 0x5f,
32751 ++ 0x49, 0x7a, 0x1d, 0x70, 0x96, 0xf9, 0x39, 0x77,
32752 ++ 0x68, 0x0e, 0x44, 0xdc, 0x1c, 0x7b, 0x7b, 0x8b },
32753 ++ .public = { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32754 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32755 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32756 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
32757 ++ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32758 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32759 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32760 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
32761 ++ .valid = false
32762 ++ },
32763 ++ /* wycheproof - public key >= p */
32764 ++ {
32765 ++ .private = { 0xf0, 0x1e, 0x48, 0xda, 0xfa, 0xc9, 0xd7, 0xbc,
32766 ++ 0xf5, 0x89, 0xcb, 0xc3, 0x82, 0xc8, 0x78, 0xd1,
32767 ++ 0x8b, 0xda, 0x35, 0x50, 0x58, 0x9f, 0xfb, 0x5d,
32768 ++ 0x50, 0xb5, 0x23, 0xbe, 0xbe, 0x32, 0x9d, 0xae },
32769 ++ .public = { 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32770 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32771 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32772 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
32773 ++ .result = { 0xbd, 0x36, 0xa0, 0x79, 0x0e, 0xb8, 0x83, 0x09,
32774 ++ 0x8c, 0x98, 0x8b, 0x21, 0x78, 0x67, 0x73, 0xde,
32775 ++ 0x0b, 0x3a, 0x4d, 0xf1, 0x62, 0x28, 0x2c, 0xf1,
32776 ++ 0x10, 0xde, 0x18, 0xdd, 0x48, 0x4c, 0xe7, 0x4b },
32777 ++ .valid = true
32778 ++ },
32779 ++ /* wycheproof - public key >= p */
32780 ++ {
32781 ++ .private = { 0x28, 0x87, 0x96, 0xbc, 0x5a, 0xff, 0x4b, 0x81,
32782 ++ 0xa3, 0x75, 0x01, 0x75, 0x7b, 0xc0, 0x75, 0x3a,
32783 ++ 0x3c, 0x21, 0x96, 0x47, 0x90, 0xd3, 0x86, 0x99,
32784 ++ 0x30, 0x8d, 0xeb, 0xc1, 0x7a, 0x6e, 0xaf, 0x8d },
32785 ++ .public = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32786 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32787 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32788 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
32789 ++ .result = { 0xb4, 0xe0, 0xdd, 0x76, 0xda, 0x7b, 0x07, 0x17,
32790 ++ 0x28, 0xb6, 0x1f, 0x85, 0x67, 0x71, 0xaa, 0x35,
32791 ++ 0x6e, 0x57, 0xed, 0xa7, 0x8a, 0x5b, 0x16, 0x55,
32792 ++ 0xcc, 0x38, 0x20, 0xfb, 0x5f, 0x85, 0x4c, 0x5c },
32793 ++ .valid = true
32794 ++ },
32795 ++ /* wycheproof - public key >= p */
32796 ++ {
32797 ++ .private = { 0x98, 0xdf, 0x84, 0x5f, 0x66, 0x51, 0xbf, 0x11,
32798 ++ 0x38, 0x22, 0x1f, 0x11, 0x90, 0x41, 0xf7, 0x2b,
32799 ++ 0x6d, 0xbc, 0x3c, 0x4a, 0xce, 0x71, 0x43, 0xd9,
32800 ++ 0x9f, 0xd5, 0x5a, 0xd8, 0x67, 0x48, 0x0d, 0xa8 },
32801 ++ .public = { 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32802 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32803 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32804 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
32805 ++ .result = { 0x6f, 0xdf, 0x6c, 0x37, 0x61, 0x1d, 0xbd, 0x53,
32806 ++ 0x04, 0xdc, 0x0f, 0x2e, 0xb7, 0xc9, 0x51, 0x7e,
32807 ++ 0xb3, 0xc5, 0x0e, 0x12, 0xfd, 0x05, 0x0a, 0xc6,
32808 ++ 0xde, 0xc2, 0x70, 0x71, 0xd4, 0xbf, 0xc0, 0x34 },
32809 ++ .valid = true
32810 ++ },
32811 ++ /* wycheproof - public key >= p */
32812 ++ {
32813 ++ .private = { 0xf0, 0x94, 0x98, 0xe4, 0x6f, 0x02, 0xf8, 0x78,
32814 ++ 0x82, 0x9e, 0x78, 0xb8, 0x03, 0xd3, 0x16, 0xa2,
32815 ++ 0xed, 0x69, 0x5d, 0x04, 0x98, 0xa0, 0x8a, 0xbd,
32816 ++ 0xf8, 0x27, 0x69, 0x30, 0xe2, 0x4e, 0xdc, 0xb0 },
32817 ++ .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32818 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32819 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32820 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
32821 ++ .result = { 0x4c, 0x8f, 0xc4, 0xb1, 0xc6, 0xab, 0x88, 0xfb,
32822 ++ 0x21, 0xf1, 0x8f, 0x6d, 0x4c, 0x81, 0x02, 0x40,
32823 ++ 0xd4, 0xe9, 0x46, 0x51, 0xba, 0x44, 0xf7, 0xa2,
32824 ++ 0xc8, 0x63, 0xce, 0xc7, 0xdc, 0x56, 0x60, 0x2d },
32825 ++ .valid = true
32826 ++ },
32827 ++ /* wycheproof - public key >= p */
32828 ++ {
32829 ++ .private = { 0x18, 0x13, 0xc1, 0x0a, 0x5c, 0x7f, 0x21, 0xf9,
32830 ++ 0x6e, 0x17, 0xf2, 0x88, 0xc0, 0xcc, 0x37, 0x60,
32831 ++ 0x7c, 0x04, 0xc5, 0xf5, 0xae, 0xa2, 0xdb, 0x13,
32832 ++ 0x4f, 0x9e, 0x2f, 0xfc, 0x66, 0xbd, 0x9d, 0xb8 },
32833 ++ .public = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32834 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32835 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32836 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 },
32837 ++ .result = { 0x1c, 0xd0, 0xb2, 0x82, 0x67, 0xdc, 0x54, 0x1c,
32838 ++ 0x64, 0x2d, 0x6d, 0x7d, 0xca, 0x44, 0xa8, 0xb3,
32839 ++ 0x8a, 0x63, 0x73, 0x6e, 0xef, 0x5c, 0x4e, 0x65,
32840 ++ 0x01, 0xff, 0xbb, 0xb1, 0x78, 0x0c, 0x03, 0x3c },
32841 ++ .valid = true
32842 ++ },
32843 ++ /* wycheproof - public key >= p */
32844 ++ {
32845 ++ .private = { 0x78, 0x57, 0xfb, 0x80, 0x86, 0x53, 0x64, 0x5a,
32846 ++ 0x0b, 0xeb, 0x13, 0x8a, 0x64, 0xf5, 0xf4, 0xd7,
32847 ++ 0x33, 0xa4, 0x5e, 0xa8, 0x4c, 0x3c, 0xda, 0x11,
32848 ++ 0xa9, 0xc0, 0x6f, 0x7e, 0x71, 0x39, 0x14, 0x9e },
32849 ++ .public = { 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32850 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32851 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32852 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 },
32853 ++ .result = { 0x87, 0x55, 0xbe, 0x01, 0xc6, 0x0a, 0x7e, 0x82,
32854 ++ 0x5c, 0xff, 0x3e, 0x0e, 0x78, 0xcb, 0x3a, 0xa4,
32855 ++ 0x33, 0x38, 0x61, 0x51, 0x6a, 0xa5, 0x9b, 0x1c,
32856 ++ 0x51, 0xa8, 0xb2, 0xa5, 0x43, 0xdf, 0xa8, 0x22 },
32857 ++ .valid = true
32858 ++ },
32859 ++ /* wycheproof - public key >= p */
32860 ++ {
32861 ++ .private = { 0xe0, 0x3a, 0xa8, 0x42, 0xe2, 0xab, 0xc5, 0x6e,
32862 ++ 0x81, 0xe8, 0x7b, 0x8b, 0x9f, 0x41, 0x7b, 0x2a,
32863 ++ 0x1e, 0x59, 0x13, 0xc7, 0x23, 0xee, 0xd2, 0x8d,
32864 ++ 0x75, 0x2f, 0x8d, 0x47, 0xa5, 0x9f, 0x49, 0x8f },
32865 ++ .public = { 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32866 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32867 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
32868 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 },
32869 ++ .result = { 0x54, 0xc9, 0xa1, 0xed, 0x95, 0xe5, 0x46, 0xd2,
32870 ++ 0x78, 0x22, 0xa3, 0x60, 0x93, 0x1d, 0xda, 0x60,
32871 ++ 0xa1, 0xdf, 0x04, 0x9d, 0xa6, 0xf9, 0x04, 0x25,
32872 ++ 0x3c, 0x06, 0x12, 0xbb, 0xdc, 0x08, 0x74, 0x76 },
32873 ++ .valid = true
32874 ++ },
32875 ++ /* wycheproof - public key >= p */
32876 ++ {
32877 ++ .private = { 0xf8, 0xf7, 0x07, 0xb7, 0x99, 0x9b, 0x18, 0xcb,
32878 ++ 0x0d, 0x6b, 0x96, 0x12, 0x4f, 0x20, 0x45, 0x97,
32879 ++ 0x2c, 0xa2, 0x74, 0xbf, 0xc1, 0x54, 0xad, 0x0c,
32880 ++ 0x87, 0x03, 0x8c, 0x24, 0xc6, 0xd0, 0xd4, 0xb2 },
32881 ++ .public = { 0xda, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32882 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32883 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32884 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
32885 ++ .result = { 0xcc, 0x1f, 0x40, 0xd7, 0x43, 0xcd, 0xc2, 0x23,
32886 ++ 0x0e, 0x10, 0x43, 0xda, 0xba, 0x8b, 0x75, 0xe8,
32887 ++ 0x10, 0xf1, 0xfb, 0xab, 0x7f, 0x25, 0x52, 0x69,
32888 ++ 0xbd, 0x9e, 0xbb, 0x29, 0xe6, 0xbf, 0x49, 0x4f },
32889 ++ .valid = true
32890 ++ },
32891 ++ /* wycheproof - public key >= p */
32892 ++ {
32893 ++ .private = { 0xa0, 0x34, 0xf6, 0x84, 0xfa, 0x63, 0x1e, 0x1a,
32894 ++ 0x34, 0x81, 0x18, 0xc1, 0xce, 0x4c, 0x98, 0x23,
32895 ++ 0x1f, 0x2d, 0x9e, 0xec, 0x9b, 0xa5, 0x36, 0x5b,
32896 ++ 0x4a, 0x05, 0xd6, 0x9a, 0x78, 0x5b, 0x07, 0x96 },
32897 ++ .public = { 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32898 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32899 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32900 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
32901 ++ .result = { 0x54, 0x99, 0x8e, 0xe4, 0x3a, 0x5b, 0x00, 0x7b,
32902 ++ 0xf4, 0x99, 0xf0, 0x78, 0xe7, 0x36, 0x52, 0x44,
32903 ++ 0x00, 0xa8, 0xb5, 0xc7, 0xe9, 0xb9, 0xb4, 0x37,
32904 ++ 0x71, 0x74, 0x8c, 0x7c, 0xdf, 0x88, 0x04, 0x12 },
32905 ++ .valid = true
32906 ++ },
32907 ++ /* wycheproof - public key >= p */
32908 ++ {
32909 ++ .private = { 0x30, 0xb6, 0xc6, 0xa0, 0xf2, 0xff, 0xa6, 0x80,
32910 ++ 0x76, 0x8f, 0x99, 0x2b, 0xa8, 0x9e, 0x15, 0x2d,
32911 ++ 0x5b, 0xc9, 0x89, 0x3d, 0x38, 0xc9, 0x11, 0x9b,
32912 ++ 0xe4, 0xf7, 0x67, 0xbf, 0xab, 0x6e, 0x0c, 0xa5 },
32913 ++ .public = { 0xdc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32914 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32915 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32916 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
32917 ++ .result = { 0xea, 0xd9, 0xb3, 0x8e, 0xfd, 0xd7, 0x23, 0x63,
32918 ++ 0x79, 0x34, 0xe5, 0x5a, 0xb7, 0x17, 0xa7, 0xae,
32919 ++ 0x09, 0xeb, 0x86, 0xa2, 0x1d, 0xc3, 0x6a, 0x3f,
32920 ++ 0xee, 0xb8, 0x8b, 0x75, 0x9e, 0x39, 0x1e, 0x09 },
32921 ++ .valid = true
32922 ++ },
32923 ++ /* wycheproof - public key >= p */
32924 ++ {
32925 ++ .private = { 0x90, 0x1b, 0x9d, 0xcf, 0x88, 0x1e, 0x01, 0xe0,
32926 ++ 0x27, 0x57, 0x50, 0x35, 0xd4, 0x0b, 0x43, 0xbd,
32927 ++ 0xc1, 0xc5, 0x24, 0x2e, 0x03, 0x08, 0x47, 0x49,
32928 ++ 0x5b, 0x0c, 0x72, 0x86, 0x46, 0x9b, 0x65, 0x91 },
32929 ++ .public = { 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32930 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32931 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32932 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
32933 ++ .result = { 0x60, 0x2f, 0xf4, 0x07, 0x89, 0xb5, 0x4b, 0x41,
32934 ++ 0x80, 0x59, 0x15, 0xfe, 0x2a, 0x62, 0x21, 0xf0,
32935 ++ 0x7a, 0x50, 0xff, 0xc2, 0xc3, 0xfc, 0x94, 0xcf,
32936 ++ 0x61, 0xf1, 0x3d, 0x79, 0x04, 0xe8, 0x8e, 0x0e },
32937 ++ .valid = true
32938 ++ },
32939 ++ /* wycheproof - public key >= p */
32940 ++ {
32941 ++ .private = { 0x80, 0x46, 0x67, 0x7c, 0x28, 0xfd, 0x82, 0xc9,
32942 ++ 0xa1, 0xbd, 0xb7, 0x1a, 0x1a, 0x1a, 0x34, 0xfa,
32943 ++ 0xba, 0x12, 0x25, 0xe2, 0x50, 0x7f, 0xe3, 0xf5,
32944 ++ 0x4d, 0x10, 0xbd, 0x5b, 0x0d, 0x86, 0x5f, 0x8e },
32945 ++ .public = { 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32946 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32947 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32948 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
32949 ++ .result = { 0xe0, 0x0a, 0xe8, 0xb1, 0x43, 0x47, 0x12, 0x47,
32950 ++ 0xba, 0x24, 0xf1, 0x2c, 0x88, 0x55, 0x36, 0xc3,
32951 ++ 0xcb, 0x98, 0x1b, 0x58, 0xe1, 0xe5, 0x6b, 0x2b,
32952 ++ 0xaf, 0x35, 0xc1, 0x2a, 0xe1, 0xf7, 0x9c, 0x26 },
32953 ++ .valid = true
32954 ++ },
32955 ++ /* wycheproof - public key >= p */
32956 ++ {
32957 ++ .private = { 0x60, 0x2f, 0x7e, 0x2f, 0x68, 0xa8, 0x46, 0xb8,
32958 ++ 0x2c, 0xc2, 0x69, 0xb1, 0xd4, 0x8e, 0x93, 0x98,
32959 ++ 0x86, 0xae, 0x54, 0xfd, 0x63, 0x6c, 0x1f, 0xe0,
32960 ++ 0x74, 0xd7, 0x10, 0x12, 0x7d, 0x47, 0x24, 0x91 },
32961 ++ .public = { 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32962 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32963 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32964 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
32965 ++ .result = { 0x98, 0xcb, 0x9b, 0x50, 0xdd, 0x3f, 0xc2, 0xb0,
32966 ++ 0xd4, 0xf2, 0xd2, 0xbf, 0x7c, 0x5c, 0xfd, 0xd1,
32967 ++ 0x0c, 0x8f, 0xcd, 0x31, 0xfc, 0x40, 0xaf, 0x1a,
32968 ++ 0xd4, 0x4f, 0x47, 0xc1, 0x31, 0x37, 0x63, 0x62 },
32969 ++ .valid = true
32970 ++ },
32971 ++ /* wycheproof - public key >= p */
32972 ++ {
32973 ++ .private = { 0x60, 0x88, 0x7b, 0x3d, 0xc7, 0x24, 0x43, 0x02,
32974 ++ 0x6e, 0xbe, 0xdb, 0xbb, 0xb7, 0x06, 0x65, 0xf4,
32975 ++ 0x2b, 0x87, 0xad, 0xd1, 0x44, 0x0e, 0x77, 0x68,
32976 ++ 0xfb, 0xd7, 0xe8, 0xe2, 0xce, 0x5f, 0x63, 0x9d },
32977 ++ .public = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32978 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32979 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32980 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
32981 ++ .result = { 0x38, 0xd6, 0x30, 0x4c, 0x4a, 0x7e, 0x6d, 0x9f,
32982 ++ 0x79, 0x59, 0x33, 0x4f, 0xb5, 0x24, 0x5b, 0xd2,
32983 ++ 0xc7, 0x54, 0x52, 0x5d, 0x4c, 0x91, 0xdb, 0x95,
32984 ++ 0x02, 0x06, 0x92, 0x62, 0x34, 0xc1, 0xf6, 0x33 },
32985 ++ .valid = true
32986 ++ },
32987 ++ /* wycheproof - public key >= p */
32988 ++ {
32989 ++ .private = { 0x78, 0xd3, 0x1d, 0xfa, 0x85, 0x44, 0x97, 0xd7,
32990 ++ 0x2d, 0x8d, 0xef, 0x8a, 0x1b, 0x7f, 0xb0, 0x06,
32991 ++ 0xce, 0xc2, 0xd8, 0xc4, 0x92, 0x46, 0x47, 0xc9,
32992 ++ 0x38, 0x14, 0xae, 0x56, 0xfa, 0xed, 0xa4, 0x95 },
32993 ++ .public = { 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32994 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32995 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
32996 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
32997 ++ .result = { 0x78, 0x6c, 0xd5, 0x49, 0x96, 0xf0, 0x14, 0xa5,
32998 ++ 0xa0, 0x31, 0xec, 0x14, 0xdb, 0x81, 0x2e, 0xd0,
32999 ++ 0x83, 0x55, 0x06, 0x1f, 0xdb, 0x5d, 0xe6, 0x80,
33000 ++ 0xa8, 0x00, 0xac, 0x52, 0x1f, 0x31, 0x8e, 0x23 },
33001 ++ .valid = true
33002 ++ },
33003 ++ /* wycheproof - public key >= p */
33004 ++ {
33005 ++ .private = { 0xc0, 0x4c, 0x5b, 0xae, 0xfa, 0x83, 0x02, 0xdd,
33006 ++ 0xde, 0xd6, 0xa4, 0xbb, 0x95, 0x77, 0x61, 0xb4,
33007 ++ 0xeb, 0x97, 0xae, 0xfa, 0x4f, 0xc3, 0xb8, 0x04,
33008 ++ 0x30, 0x85, 0xf9, 0x6a, 0x56, 0x59, 0xb3, 0xa5 },
33009 ++ .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33010 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33011 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33012 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
33013 ++ .result = { 0x29, 0xae, 0x8b, 0xc7, 0x3e, 0x9b, 0x10, 0xa0,
33014 ++ 0x8b, 0x4f, 0x68, 0x1c, 0x43, 0xc3, 0xe0, 0xac,
33015 ++ 0x1a, 0x17, 0x1d, 0x31, 0xb3, 0x8f, 0x1a, 0x48,
33016 ++ 0xef, 0xba, 0x29, 0xae, 0x63, 0x9e, 0xa1, 0x34 },
33017 ++ .valid = true
33018 ++ },
33019 ++ /* wycheproof - RFC 7748 */
33020 ++ {
33021 ++ .private = { 0xa0, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d,
33022 ++ 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd,
33023 ++ 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18,
33024 ++ 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0x44 },
33025 ++ .public = { 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb,
33026 ++ 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c,
33027 ++ 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b,
33028 ++ 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c },
33029 ++ .result = { 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90,
33030 ++ 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f,
33031 ++ 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7,
33032 ++ 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 },
33033 ++ .valid = true
33034 ++ },
33035 ++ /* wycheproof - RFC 7748 */
33036 ++ {
33037 ++ .private = { 0x48, 0x66, 0xe9, 0xd4, 0xd1, 0xb4, 0x67, 0x3c,
33038 ++ 0x5a, 0xd2, 0x26, 0x91, 0x95, 0x7d, 0x6a, 0xf5,
33039 ++ 0xc1, 0x1b, 0x64, 0x21, 0xe0, 0xea, 0x01, 0xd4,
33040 ++ 0x2c, 0xa4, 0x16, 0x9e, 0x79, 0x18, 0xba, 0x4d },
33041 ++ .public = { 0xe5, 0x21, 0x0f, 0x12, 0x78, 0x68, 0x11, 0xd3,
33042 ++ 0xf4, 0xb7, 0x95, 0x9d, 0x05, 0x38, 0xae, 0x2c,
33043 ++ 0x31, 0xdb, 0xe7, 0x10, 0x6f, 0xc0, 0x3c, 0x3e,
33044 ++ 0xfc, 0x4c, 0xd5, 0x49, 0xc7, 0x15, 0xa4, 0x13 },
33045 ++ .result = { 0x95, 0xcb, 0xde, 0x94, 0x76, 0xe8, 0x90, 0x7d,
33046 ++ 0x7a, 0xad, 0xe4, 0x5c, 0xb4, 0xb8, 0x73, 0xf8,
33047 ++ 0x8b, 0x59, 0x5a, 0x68, 0x79, 0x9f, 0xa1, 0x52,
33048 ++ 0xe6, 0xf8, 0xf7, 0x64, 0x7a, 0xac, 0x79, 0x57 },
33049 ++ .valid = true
33050 ++ },
33051 ++ /* wycheproof - edge case for shared secret */
33052 ++ {
33053 ++ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
33054 ++ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
33055 ++ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
33056 ++ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
33057 ++ .public = { 0x0a, 0xb4, 0xe7, 0x63, 0x80, 0xd8, 0x4d, 0xde,
33058 ++ 0x4f, 0x68, 0x33, 0xc5, 0x8f, 0x2a, 0x9f, 0xb8,
33059 ++ 0xf8, 0x3b, 0xb0, 0x16, 0x9b, 0x17, 0x2b, 0xe4,
33060 ++ 0xb6, 0xe0, 0x59, 0x28, 0x87, 0x74, 0x1a, 0x36 },
33061 ++ .result = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
33062 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
33063 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
33064 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
33065 ++ .valid = true
33066 ++ },
33067 ++ /* wycheproof - edge case for shared secret */
33068 ++ {
33069 ++ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
33070 ++ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
33071 ++ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
33072 ++ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
33073 ++ .public = { 0x89, 0xe1, 0x0d, 0x57, 0x01, 0xb4, 0x33, 0x7d,
33074 ++ 0x2d, 0x03, 0x21, 0x81, 0x53, 0x8b, 0x10, 0x64,
33075 ++ 0xbd, 0x40, 0x84, 0x40, 0x1c, 0xec, 0xa1, 0xfd,
33076 ++ 0x12, 0x66, 0x3a, 0x19, 0x59, 0x38, 0x80, 0x00 },
33077 ++ .result = { 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
33078 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
33079 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
33080 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
33081 ++ .valid = true
33082 ++ },
33083 ++ /* wycheproof - edge case for shared secret */
33084 ++ {
33085 ++ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
33086 ++ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
33087 ++ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
33088 ++ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
33089 ++ .public = { 0x2b, 0x55, 0xd3, 0xaa, 0x4a, 0x8f, 0x80, 0xc8,
33090 ++ 0xc0, 0xb2, 0xae, 0x5f, 0x93, 0x3e, 0x85, 0xaf,
33091 ++ 0x49, 0xbe, 0xac, 0x36, 0xc2, 0xfa, 0x73, 0x94,
33092 ++ 0xba, 0xb7, 0x6c, 0x89, 0x33, 0xf8, 0xf8, 0x1d },
33093 ++ .result = { 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
33094 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
33095 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
33096 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
33097 ++ .valid = true
33098 ++ },
33099 ++ /* wycheproof - edge case for shared secret */
33100 ++ {
33101 ++ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
33102 ++ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
33103 ++ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
33104 ++ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
33105 ++ .public = { 0x63, 0xe5, 0xb1, 0xfe, 0x96, 0x01, 0xfe, 0x84,
33106 ++ 0x38, 0x5d, 0x88, 0x66, 0xb0, 0x42, 0x12, 0x62,
33107 ++ 0xf7, 0x8f, 0xbf, 0xa5, 0xaf, 0xf9, 0x58, 0x5e,
33108 ++ 0x62, 0x66, 0x79, 0xb1, 0x85, 0x47, 0xd9, 0x59 },
33109 ++ .result = { 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33110 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33111 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33112 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
33113 ++ .valid = true
33114 ++ },
33115 ++ /* wycheproof - edge case for shared secret */
33116 ++ {
33117 ++ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
33118 ++ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
33119 ++ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
33120 ++ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
33121 ++ .public = { 0xe4, 0x28, 0xf3, 0xda, 0xc1, 0x78, 0x09, 0xf8,
33122 ++ 0x27, 0xa5, 0x22, 0xce, 0x32, 0x35, 0x50, 0x58,
33123 ++ 0xd0, 0x73, 0x69, 0x36, 0x4a, 0xa7, 0x89, 0x02,
33124 ++ 0xee, 0x10, 0x13, 0x9b, 0x9f, 0x9d, 0xd6, 0x53 },
33125 ++ .result = { 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33126 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33127 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33128 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
33129 ++ .valid = true
33130 ++ },
33131 ++ /* wycheproof - edge case for shared secret */
33132 ++ {
33133 ++ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
33134 ++ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
33135 ++ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
33136 ++ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
33137 ++ .public = { 0xb3, 0xb5, 0x0e, 0x3e, 0xd3, 0xa4, 0x07, 0xb9,
33138 ++ 0x5d, 0xe9, 0x42, 0xef, 0x74, 0x57, 0x5b, 0x5a,
33139 ++ 0xb8, 0xa1, 0x0c, 0x09, 0xee, 0x10, 0x35, 0x44,
33140 ++ 0xd6, 0x0b, 0xdf, 0xed, 0x81, 0x38, 0xab, 0x2b },
33141 ++ .result = { 0xf9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33142 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33143 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33144 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
33145 ++ .valid = true
33146 ++ },
33147 ++ /* wycheproof - edge case for shared secret */
33148 ++ {
33149 ++ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
33150 ++ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
33151 ++ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
33152 ++ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
33153 ++ .public = { 0x21, 0x3f, 0xff, 0xe9, 0x3d, 0x5e, 0xa8, 0xcd,
33154 ++ 0x24, 0x2e, 0x46, 0x28, 0x44, 0x02, 0x99, 0x22,
33155 ++ 0xc4, 0x3c, 0x77, 0xc9, 0xe3, 0xe4, 0x2f, 0x56,
33156 ++ 0x2f, 0x48, 0x5d, 0x24, 0xc5, 0x01, 0xa2, 0x0b },
33157 ++ .result = { 0xf3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33158 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33159 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33160 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f },
33161 ++ .valid = true
33162 ++ },
33163 ++ /* wycheproof - edge case for shared secret */
33164 ++ {
33165 ++ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
33166 ++ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
33167 ++ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
33168 ++ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
33169 ++ .public = { 0x91, 0xb2, 0x32, 0xa1, 0x78, 0xb3, 0xcd, 0x53,
33170 ++ 0x09, 0x32, 0x44, 0x1e, 0x61, 0x39, 0x41, 0x8f,
33171 ++ 0x72, 0x17, 0x22, 0x92, 0xf1, 0xda, 0x4c, 0x18,
33172 ++ 0x34, 0xfc, 0x5e, 0xbf, 0xef, 0xb5, 0x1e, 0x3f },
33173 ++ .result = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33174 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33175 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33176 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 },
33177 ++ .valid = true
33178 ++ },
33179 ++ /* wycheproof - edge case for shared secret */
33180 ++ {
33181 ++ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
33182 ++ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
33183 ++ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
33184 ++ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
33185 ++ .public = { 0x04, 0x5c, 0x6e, 0x11, 0xc5, 0xd3, 0x32, 0x55,
33186 ++ 0x6c, 0x78, 0x22, 0xfe, 0x94, 0xeb, 0xf8, 0x9b,
33187 ++ 0x56, 0xa3, 0x87, 0x8d, 0xc2, 0x7c, 0xa0, 0x79,
33188 ++ 0x10, 0x30, 0x58, 0x84, 0x9f, 0xab, 0xcb, 0x4f },
33189 ++ .result = { 0xe5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33190 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33191 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33192 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
33193 ++ .valid = true
33194 ++ },
33195 ++ /* wycheproof - edge case for shared secret */
33196 ++ {
33197 ++ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
33198 ++ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
33199 ++ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
33200 ++ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
33201 ++ .public = { 0x1c, 0xa2, 0x19, 0x0b, 0x71, 0x16, 0x35, 0x39,
33202 ++ 0x06, 0x3c, 0x35, 0x77, 0x3b, 0xda, 0x0c, 0x9c,
33203 ++ 0x92, 0x8e, 0x91, 0x36, 0xf0, 0x62, 0x0a, 0xeb,
33204 ++ 0x09, 0x3f, 0x09, 0x91, 0x97, 0xb7, 0xf7, 0x4e },
33205 ++ .result = { 0xe3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33206 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33207 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33208 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
33209 ++ .valid = true
33210 ++ },
33211 ++ /* wycheproof - edge case for shared secret */
33212 ++ {
33213 ++ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
33214 ++ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
33215 ++ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
33216 ++ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
33217 ++ .public = { 0xf7, 0x6e, 0x90, 0x10, 0xac, 0x33, 0xc5, 0x04,
33218 ++ 0x3b, 0x2d, 0x3b, 0x76, 0xa8, 0x42, 0x17, 0x10,
33219 ++ 0x00, 0xc4, 0x91, 0x62, 0x22, 0xe9, 0xe8, 0x58,
33220 ++ 0x97, 0xa0, 0xae, 0xc7, 0xf6, 0x35, 0x0b, 0x3c },
33221 ++ .result = { 0xdd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33222 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33223 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33224 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
33225 ++ .valid = true
33226 ++ },
33227 ++ /* wycheproof - edge case for shared secret */
33228 ++ {
33229 ++ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
33230 ++ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
33231 ++ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
33232 ++ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
33233 ++ .public = { 0xbb, 0x72, 0x68, 0x8d, 0x8f, 0x8a, 0xa7, 0xa3,
33234 ++ 0x9c, 0xd6, 0x06, 0x0c, 0xd5, 0xc8, 0x09, 0x3c,
33235 ++ 0xde, 0xc6, 0xfe, 0x34, 0x19, 0x37, 0xc3, 0x88,
33236 ++ 0x6a, 0x99, 0x34, 0x6c, 0xd0, 0x7f, 0xaa, 0x55 },
33237 ++ .result = { 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33238 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33239 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33240 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
33241 ++ .valid = true
33242 ++ },
33243 ++ /* wycheproof - edge case for shared secret */
33244 ++ {
33245 ++ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
33246 ++ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
33247 ++ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
33248 ++ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
33249 ++ .public = { 0x88, 0xfd, 0xde, 0xa1, 0x93, 0x39, 0x1c, 0x6a,
33250 ++ 0x59, 0x33, 0xef, 0x9b, 0x71, 0x90, 0x15, 0x49,
33251 ++ 0x44, 0x72, 0x05, 0xaa, 0xe9, 0xda, 0x92, 0x8a,
33252 ++ 0x6b, 0x91, 0xa3, 0x52, 0xba, 0x10, 0xf4, 0x1f },
33253 ++ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
33254 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
33255 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
33256 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 },
33257 ++ .valid = true
33258 ++ },
33259 ++ /* wycheproof - edge case for shared secret */
33260 ++ {
33261 ++ .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4,
33262 ++ 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3,
33263 ++ 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc,
33264 ++ 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 },
33265 ++ .public = { 0x30, 0x3b, 0x39, 0x2f, 0x15, 0x31, 0x16, 0xca,
33266 ++ 0xd9, 0xcc, 0x68, 0x2a, 0x00, 0xcc, 0xc4, 0x4c,
33267 ++ 0x95, 0xff, 0x0d, 0x3b, 0xbe, 0x56, 0x8b, 0xeb,
33268 ++ 0x6c, 0x4e, 0x73, 0x9b, 0xaf, 0xdc, 0x2c, 0x68 },
33269 ++ .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
33270 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
33271 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
33272 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00 },
33273 ++ .valid = true
33274 ++ },
33275 ++ /* wycheproof - checking for overflow */
33276 ++ {
33277 ++ .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
33278 ++ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
33279 ++ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
33280 ++ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
33281 ++ .public = { 0xfd, 0x30, 0x0a, 0xeb, 0x40, 0xe1, 0xfa, 0x58,
33282 ++ 0x25, 0x18, 0x41, 0x2b, 0x49, 0xb2, 0x08, 0xa7,
33283 ++ 0x84, 0x2b, 0x1e, 0x1f, 0x05, 0x6a, 0x04, 0x01,
33284 ++ 0x78, 0xea, 0x41, 0x41, 0x53, 0x4f, 0x65, 0x2d },
33285 ++ .result = { 0xb7, 0x34, 0x10, 0x5d, 0xc2, 0x57, 0x58, 0x5d,
33286 ++ 0x73, 0xb5, 0x66, 0xcc, 0xb7, 0x6f, 0x06, 0x27,
33287 ++ 0x95, 0xcc, 0xbe, 0xc8, 0x91, 0x28, 0xe5, 0x2b,
33288 ++ 0x02, 0xf3, 0xe5, 0x96, 0x39, 0xf1, 0x3c, 0x46 },
33289 ++ .valid = true
33290 ++ },
33291 ++ /* wycheproof - checking for overflow */
33292 ++ {
33293 ++ .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
33294 ++ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
33295 ++ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
33296 ++ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
33297 ++ .public = { 0xc8, 0xef, 0x79, 0xb5, 0x14, 0xd7, 0x68, 0x26,
33298 ++ 0x77, 0xbc, 0x79, 0x31, 0xe0, 0x6e, 0xe5, 0xc2,
33299 ++ 0x7c, 0x9b, 0x39, 0x2b, 0x4a, 0xe9, 0x48, 0x44,
33300 ++ 0x73, 0xf5, 0x54, 0xe6, 0x67, 0x8e, 0xcc, 0x2e },
33301 ++ .result = { 0x64, 0x7a, 0x46, 0xb6, 0xfc, 0x3f, 0x40, 0xd6,
33302 ++ 0x21, 0x41, 0xee, 0x3c, 0xee, 0x70, 0x6b, 0x4d,
33303 ++ 0x7a, 0x92, 0x71, 0x59, 0x3a, 0x7b, 0x14, 0x3e,
33304 ++ 0x8e, 0x2e, 0x22, 0x79, 0x88, 0x3e, 0x45, 0x50 },
33305 ++ .valid = true
33306 ++ },
33307 ++ /* wycheproof - checking for overflow */
33308 ++ {
33309 ++ .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
33310 ++ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
33311 ++ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
33312 ++ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
33313 ++ .public = { 0x64, 0xae, 0xac, 0x25, 0x04, 0x14, 0x48, 0x61,
33314 ++ 0x53, 0x2b, 0x7b, 0xbc, 0xb6, 0xc8, 0x7d, 0x67,
33315 ++ 0xdd, 0x4c, 0x1f, 0x07, 0xeb, 0xc2, 0xe0, 0x6e,
33316 ++ 0xff, 0xb9, 0x5a, 0xec, 0xc6, 0x17, 0x0b, 0x2c },
33317 ++ .result = { 0x4f, 0xf0, 0x3d, 0x5f, 0xb4, 0x3c, 0xd8, 0x65,
33318 ++ 0x7a, 0x3c, 0xf3, 0x7c, 0x13, 0x8c, 0xad, 0xce,
33319 ++ 0xcc, 0xe5, 0x09, 0xe4, 0xeb, 0xa0, 0x89, 0xd0,
33320 ++ 0xef, 0x40, 0xb4, 0xe4, 0xfb, 0x94, 0x61, 0x55 },
33321 ++ .valid = true
33322 ++ },
33323 ++ /* wycheproof - checking for overflow */
33324 ++ {
33325 ++ .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
33326 ++ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
33327 ++ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
33328 ++ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
33329 ++ .public = { 0xbf, 0x68, 0xe3, 0x5e, 0x9b, 0xdb, 0x7e, 0xee,
33330 ++ 0x1b, 0x50, 0x57, 0x02, 0x21, 0x86, 0x0f, 0x5d,
33331 ++ 0xcd, 0xad, 0x8a, 0xcb, 0xab, 0x03, 0x1b, 0x14,
33332 ++ 0x97, 0x4c, 0xc4, 0x90, 0x13, 0xc4, 0x98, 0x31 },
33333 ++ .result = { 0x21, 0xce, 0xe5, 0x2e, 0xfd, 0xbc, 0x81, 0x2e,
33334 ++ 0x1d, 0x02, 0x1a, 0x4a, 0xf1, 0xe1, 0xd8, 0xbc,
33335 ++ 0x4d, 0xb3, 0xc4, 0x00, 0xe4, 0xd2, 0xa2, 0xc5,
33336 ++ 0x6a, 0x39, 0x26, 0xdb, 0x4d, 0x99, 0xc6, 0x5b },
33337 ++ .valid = true
33338 ++ },
33339 ++ /* wycheproof - checking for overflow */
33340 ++ {
33341 ++ .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d,
33342 ++ 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d,
33343 ++ 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c,
33344 ++ 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 },
33345 ++ .public = { 0x53, 0x47, 0xc4, 0x91, 0x33, 0x1a, 0x64, 0xb4,
33346 ++ 0x3d, 0xdc, 0x68, 0x30, 0x34, 0xe6, 0x77, 0xf5,
33347 ++ 0x3d, 0xc3, 0x2b, 0x52, 0xa5, 0x2a, 0x57, 0x7c,
33348 ++ 0x15, 0xa8, 0x3b, 0xf2, 0x98, 0xe9, 0x9f, 0x19 },
33349 ++ .result = { 0x18, 0xcb, 0x89, 0xe4, 0xe2, 0x0c, 0x0c, 0x2b,
33350 ++ 0xd3, 0x24, 0x30, 0x52, 0x45, 0x26, 0x6c, 0x93,
33351 ++ 0x27, 0x69, 0x0b, 0xbe, 0x79, 0xac, 0xb8, 0x8f,
33352 ++ 0x5b, 0x8f, 0xb3, 0xf7, 0x4e, 0xca, 0x3e, 0x52 },
33353 ++ .valid = true
33354 ++ },
33355 ++ /* wycheproof - private key == -1 (mod order) */
33356 ++ {
33357 ++ .private = { 0xa0, 0x23, 0xcd, 0xd0, 0x83, 0xef, 0x5b, 0xb8,
33358 ++ 0x2f, 0x10, 0xd6, 0x2e, 0x59, 0xe1, 0x5a, 0x68,
33359 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
33360 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50 },
33361 ++ .public = { 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e,
33362 ++ 0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57,
33363 ++ 0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f,
33364 ++ 0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 },
33365 ++ .result = { 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e,
33366 ++ 0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57,
33367 ++ 0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f,
33368 ++ 0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 },
33369 ++ .valid = true
33370 ++ },
33371 ++ /* wycheproof - private key == 1 (mod order) on twist */
33372 ++ {
33373 ++ .private = { 0x58, 0x08, 0x3d, 0xd2, 0x61, 0xad, 0x91, 0xef,
33374 ++ 0xf9, 0x52, 0x32, 0x2e, 0xc8, 0x24, 0xc6, 0x82,
33375 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
33376 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5f },
33377 ++ .public = { 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f,
33378 ++ 0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6,
33379 ++ 0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64,
33380 ++ 0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 },
33381 ++ .result = { 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f,
33382 ++ 0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6,
33383 ++ 0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64,
33384 ++ 0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 },
33385 ++ .valid = true
33386 ++ }
33387 ++};
33388 ++
33389 ++bool __init curve25519_selftest(void)
33390 ++{
33391 ++ bool success = true, ret, ret2;
33392 ++ size_t i = 0, j;
33393 ++ u8 in[CURVE25519_KEY_SIZE];
33394 ++ u8 out[CURVE25519_KEY_SIZE], out2[CURVE25519_KEY_SIZE],
33395 ++ out3[CURVE25519_KEY_SIZE];
33396 ++
33397 ++ for (i = 0; i < ARRAY_SIZE(curve25519_test_vectors); ++i) {
33398 ++ memset(out, 0, CURVE25519_KEY_SIZE);
33399 ++ ret = curve25519(out, curve25519_test_vectors[i].private,
33400 ++ curve25519_test_vectors[i].public);
33401 ++ if (ret != curve25519_test_vectors[i].valid ||
33402 ++ memcmp(out, curve25519_test_vectors[i].result,
33403 ++ CURVE25519_KEY_SIZE)) {
33404 ++ pr_err("curve25519 self-test %zu: FAIL\n", i + 1);
33405 ++ success = false;
33406 ++ }
33407 ++ }
33408 ++
33409 ++ for (i = 0; i < 5; ++i) {
33410 ++ get_random_bytes(in, sizeof(in));
33411 ++ ret = curve25519_generate_public(out, in);
33412 ++ ret2 = curve25519(out2, in, (u8[CURVE25519_KEY_SIZE]){ 9 });
33413 ++ curve25519_generic(out3, in, (u8[CURVE25519_KEY_SIZE]){ 9 });
33414 ++ if (ret != ret2 ||
33415 ++ memcmp(out, out2, CURVE25519_KEY_SIZE) ||
33416 ++ memcmp(out, out3, CURVE25519_KEY_SIZE)) {
33417 ++ pr_err("curve25519 basepoint self-test %zu: FAIL: input - 0x",
33418 ++ i + 1);
33419 ++ for (j = CURVE25519_KEY_SIZE; j-- > 0;)
33420 ++ printk(KERN_CONT "%02x", in[j]);
33421 ++ printk(KERN_CONT "\n");
33422 ++ success = false;
33423 ++ }
33424 ++ }
33425 ++
33426 ++ return success;
33427 ++}
33428 +diff --git a/lib/crypto/curve25519.c b/lib/crypto/curve25519.c
33429 +index 0106bebe6900..c03ccdb99434 100644
33430 +--- a/lib/crypto/curve25519.c
33431 ++++ b/lib/crypto/curve25519.c
33432 +@@ -13,6 +13,8 @@
33433 + #include <linux/module.h>
33434 + #include <linux/init.h>
33435 +
33436 ++bool curve25519_selftest(void);
33437 ++
33438 + const u8 curve25519_null_point[CURVE25519_KEY_SIZE] __aligned(32) = { 0 };
33439 + const u8 curve25519_base_point[CURVE25519_KEY_SIZE] __aligned(32) = { 9 };
33440 +
33441 +@@ -20,6 +22,21 @@ EXPORT_SYMBOL(curve25519_null_point);
33442 + EXPORT_SYMBOL(curve25519_base_point);
33443 + EXPORT_SYMBOL(curve25519_generic);
33444 +
33445 ++static int __init mod_init(void)
33446 ++{
33447 ++ if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
33448 ++ WARN_ON(!curve25519_selftest()))
33449 ++ return -ENODEV;
33450 ++ return 0;
33451 ++}
33452 ++
33453 ++static void __exit mod_exit(void)
33454 ++{
33455 ++}
33456 ++
33457 ++module_init(mod_init);
33458 ++module_exit(mod_exit);
33459 ++
33460 + MODULE_LICENSE("GPL v2");
33461 + MODULE_DESCRIPTION("Curve25519 scalar multiplication");
33462 + MODULE_AUTHOR("Jason A. Donenfeld <Jason@×××××.com>");
33463 +--
33464 +cgit v1.2.3-4-ga26e
33465 +
33466 +
33467 +From a03ec55400d1510189eae589650bf7bf82658148 Mon Sep 17 00:00:00 2001
33468 +From: "Jason A. Donenfeld" <Jason@×××××.com>
33469 +Date: Sun, 5 Jan 2020 22:40:46 -0500
33470 +Subject: crypto: poly1305 - add new 32 and 64-bit generic versions
33471 +
33472 +commit 1c08a104360f3e18f4ee6346c21cc3923efb952e upstream.
33473 +
33474 +These two C implementations from Zinc -- a 32x32 one and a 64x64 one,
33475 +depending on the platform -- come from Andrew Moon's public domain
33476 +poly1305-donna portable code, modified for usage in the kernel. The
33477 +precomputation in the 32-bit version and the use of 64x64 multiplies in
33478 +the 64-bit version make these perform better than the code it replaces.
33479 +Moon's code is also very widespread and has received many eyeballs of
33480 +scrutiny.
33481 +
33482 +There's a bit of interference between the x86 implementation, which
33483 +relies on internal details of the old scalar implementation. In the next
33484 +commit, the x86 implementation will be replaced with a faster one that
33485 +doesn't rely on this, so none of this matters much. But for now, to keep
33486 +this passing the tests, we inline the bits of the old implementation
33487 +that the x86 implementation relied on. Also, since we now support a
33488 +slightly larger key space, via the union, some offsets had to be fixed
33489 +up.
33490 +
33491 +Nonce calculation was folded in with the emit function, to take
33492 +advantage of 64x64 arithmetic. However, Adiantum appeared to rely on no
33493 +nonce handling in emit, so this path was conditionalized. We also
33494 +introduced a new struct, poly1305_core_key, to represent the precise
33495 +amount of space that particular implementation uses.
33496 +
33497 +Testing with kbench9000, depending on the CPU, the update function for
33498 +the 32x32 version has been improved by 4%-7%, and for the 64x64 by
33499 +19%-30%. The 32x32 gains are small, but I think there's great value in
33500 +having a parallel implementation to the 64x64 one so that the two can be
33501 +compared side-by-side as nice stand-alone units.
33502 +
33503 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
33504 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
33505 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
33506 +---
33507 + arch/x86/crypto/poly1305-avx2-x86_64.S | 20 +--
33508 + arch/x86/crypto/poly1305_glue.c | 215 +++++++++++++++++++++++++++++++--
33509 + crypto/adiantum.c | 4 +-
33510 + crypto/nhpoly1305.c | 2 +-
33511 + crypto/poly1305_generic.c | 25 +++-
33512 + include/crypto/internal/poly1305.h | 45 ++-----
33513 + include/crypto/nhpoly1305.h | 4 +-
33514 + include/crypto/poly1305.h | 26 +++-
33515 + lib/crypto/Makefile | 4 +-
33516 + lib/crypto/poly1305-donna32.c | 204 +++++++++++++++++++++++++++++++
33517 + lib/crypto/poly1305-donna64.c | 185 ++++++++++++++++++++++++++++
33518 + lib/crypto/poly1305.c | 169 ++------------------------
33519 + 12 files changed, 675 insertions(+), 228 deletions(-)
33520 + create mode 100644 lib/crypto/poly1305-donna32.c
33521 + create mode 100644 lib/crypto/poly1305-donna64.c
33522 +
33523 +diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S
33524 +index 8b341bc29d41..1688fb551070 100644
33525 +--- a/arch/x86/crypto/poly1305-avx2-x86_64.S
33526 ++++ b/arch/x86/crypto/poly1305-avx2-x86_64.S
33527 +@@ -34,16 +34,16 @@ ORMASK: .octa 0x00000000010000000000000001000000
33528 + #define u2 0x08(%r8)
33529 + #define u3 0x0c(%r8)
33530 + #define u4 0x10(%r8)
33531 +-#define w0 0x14(%r8)
33532 +-#define w1 0x18(%r8)
33533 +-#define w2 0x1c(%r8)
33534 +-#define w3 0x20(%r8)
33535 +-#define w4 0x24(%r8)
33536 +-#define y0 0x28(%r8)
33537 +-#define y1 0x2c(%r8)
33538 +-#define y2 0x30(%r8)
33539 +-#define y3 0x34(%r8)
33540 +-#define y4 0x38(%r8)
33541 ++#define w0 0x18(%r8)
33542 ++#define w1 0x1c(%r8)
33543 ++#define w2 0x20(%r8)
33544 ++#define w3 0x24(%r8)
33545 ++#define w4 0x28(%r8)
33546 ++#define y0 0x30(%r8)
33547 ++#define y1 0x34(%r8)
33548 ++#define y2 0x38(%r8)
33549 ++#define y3 0x3c(%r8)
33550 ++#define y4 0x40(%r8)
33551 + #define m %rsi
33552 + #define hc0 %ymm0
33553 + #define hc1 %ymm1
33554 +diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
33555 +index 0cc4537e6617..edb7113e36f3 100644
33556 +--- a/arch/x86/crypto/poly1305_glue.c
33557 ++++ b/arch/x86/crypto/poly1305_glue.c
33558 +@@ -25,6 +25,21 @@ asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r,
33559 + static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_simd);
33560 + static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2);
33561 +
33562 ++static inline u64 mlt(u64 a, u64 b)
33563 ++{
33564 ++ return a * b;
33565 ++}
33566 ++
33567 ++static inline u32 sr(u64 v, u_char n)
33568 ++{
33569 ++ return v >> n;
33570 ++}
33571 ++
33572 ++static inline u32 and(u32 v, u32 mask)
33573 ++{
33574 ++ return v & mask;
33575 ++}
33576 ++
33577 + static void poly1305_simd_mult(u32 *a, const u32 *b)
33578 + {
33579 + u8 m[POLY1305_BLOCK_SIZE];
33580 +@@ -36,6 +51,168 @@ static void poly1305_simd_mult(u32 *a, const u32 *b)
33581 + poly1305_block_sse2(a, m, b, 1);
33582 + }
33583 +
33584 ++static void poly1305_integer_setkey(struct poly1305_key *key, const u8 *raw_key)
33585 ++{
33586 ++ /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
33587 ++ key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff;
33588 ++ key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03;
33589 ++ key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff;
33590 ++ key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff;
33591 ++ key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff;
33592 ++}
33593 ++
33594 ++static void poly1305_integer_blocks(struct poly1305_state *state,
33595 ++ const struct poly1305_key *key,
33596 ++ const void *src,
33597 ++ unsigned int nblocks, u32 hibit)
33598 ++{
33599 ++ u32 r0, r1, r2, r3, r4;
33600 ++ u32 s1, s2, s3, s4;
33601 ++ u32 h0, h1, h2, h3, h4;
33602 ++ u64 d0, d1, d2, d3, d4;
33603 ++
33604 ++ if (!nblocks)
33605 ++ return;
33606 ++
33607 ++ r0 = key->r[0];
33608 ++ r1 = key->r[1];
33609 ++ r2 = key->r[2];
33610 ++ r3 = key->r[3];
33611 ++ r4 = key->r[4];
33612 ++
33613 ++ s1 = r1 * 5;
33614 ++ s2 = r2 * 5;
33615 ++ s3 = r3 * 5;
33616 ++ s4 = r4 * 5;
33617 ++
33618 ++ h0 = state->h[0];
33619 ++ h1 = state->h[1];
33620 ++ h2 = state->h[2];
33621 ++ h3 = state->h[3];
33622 ++ h4 = state->h[4];
33623 ++
33624 ++ do {
33625 ++ /* h += m[i] */
33626 ++ h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff;
33627 ++ h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff;
33628 ++ h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff;
33629 ++ h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff;
33630 ++ h4 += (get_unaligned_le32(src + 12) >> 8) | (hibit << 24);
33631 ++
33632 ++ /* h *= r */
33633 ++ d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
33634 ++ mlt(h3, s2) + mlt(h4, s1);
33635 ++ d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) +
33636 ++ mlt(h3, s3) + mlt(h4, s2);
33637 ++ d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) +
33638 ++ mlt(h3, s4) + mlt(h4, s3);
33639 ++ d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) +
33640 ++ mlt(h3, r0) + mlt(h4, s4);
33641 ++ d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) +
33642 ++ mlt(h3, r1) + mlt(h4, r0);
33643 ++
33644 ++ /* (partial) h %= p */
33645 ++ d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff);
33646 ++ d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff);
33647 ++ d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff);
33648 ++ d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff);
33649 ++ h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff);
33650 ++ h1 += h0 >> 26; h0 = h0 & 0x3ffffff;
33651 ++
33652 ++ src += POLY1305_BLOCK_SIZE;
33653 ++ } while (--nblocks);
33654 ++
33655 ++ state->h[0] = h0;
33656 ++ state->h[1] = h1;
33657 ++ state->h[2] = h2;
33658 ++ state->h[3] = h3;
33659 ++ state->h[4] = h4;
33660 ++}
33661 ++
33662 ++static void poly1305_integer_emit(const struct poly1305_state *state, void *dst)
33663 ++{
33664 ++ u32 h0, h1, h2, h3, h4;
33665 ++ u32 g0, g1, g2, g3, g4;
33666 ++ u32 mask;
33667 ++
33668 ++ /* fully carry h */
33669 ++ h0 = state->h[0];
33670 ++ h1 = state->h[1];
33671 ++ h2 = state->h[2];
33672 ++ h3 = state->h[3];
33673 ++ h4 = state->h[4];
33674 ++
33675 ++ h2 += (h1 >> 26); h1 = h1 & 0x3ffffff;
33676 ++ h3 += (h2 >> 26); h2 = h2 & 0x3ffffff;
33677 ++ h4 += (h3 >> 26); h3 = h3 & 0x3ffffff;
33678 ++ h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff;
33679 ++ h1 += (h0 >> 26); h0 = h0 & 0x3ffffff;
33680 ++
33681 ++ /* compute h + -p */
33682 ++ g0 = h0 + 5;
33683 ++ g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff;
33684 ++ g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff;
33685 ++ g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff;
33686 ++ g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff;
33687 ++
33688 ++ /* select h if h < p, or h + -p if h >= p */
33689 ++ mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
33690 ++ g0 &= mask;
33691 ++ g1 &= mask;
33692 ++ g2 &= mask;
33693 ++ g3 &= mask;
33694 ++ g4 &= mask;
33695 ++ mask = ~mask;
33696 ++ h0 = (h0 & mask) | g0;
33697 ++ h1 = (h1 & mask) | g1;
33698 ++ h2 = (h2 & mask) | g2;
33699 ++ h3 = (h3 & mask) | g3;
33700 ++ h4 = (h4 & mask) | g4;
33701 ++
33702 ++ /* h = h % (2^128) */
33703 ++ put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0);
33704 ++ put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4);
33705 ++ put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8);
33706 ++ put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12);
33707 ++}
33708 ++
33709 ++void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key)
33710 ++{
33711 ++ poly1305_integer_setkey(desc->opaque_r, key);
33712 ++ desc->s[0] = get_unaligned_le32(key + 16);
33713 ++ desc->s[1] = get_unaligned_le32(key + 20);
33714 ++ desc->s[2] = get_unaligned_le32(key + 24);
33715 ++ desc->s[3] = get_unaligned_le32(key + 28);
33716 ++ poly1305_core_init(&desc->h);
33717 ++ desc->buflen = 0;
33718 ++ desc->sset = true;
33719 ++ desc->rset = 1;
33720 ++}
33721 ++EXPORT_SYMBOL_GPL(poly1305_init_arch);
33722 ++
33723 ++static unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
33724 ++ const u8 *src, unsigned int srclen)
33725 ++{
33726 ++ if (!dctx->sset) {
33727 ++ if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
33728 ++ poly1305_integer_setkey(dctx->r, src);
33729 ++ src += POLY1305_BLOCK_SIZE;
33730 ++ srclen -= POLY1305_BLOCK_SIZE;
33731 ++ dctx->rset = 1;
33732 ++ }
33733 ++ if (srclen >= POLY1305_BLOCK_SIZE) {
33734 ++ dctx->s[0] = get_unaligned_le32(src + 0);
33735 ++ dctx->s[1] = get_unaligned_le32(src + 4);
33736 ++ dctx->s[2] = get_unaligned_le32(src + 8);
33737 ++ dctx->s[3] = get_unaligned_le32(src + 12);
33738 ++ src += POLY1305_BLOCK_SIZE;
33739 ++ srclen -= POLY1305_BLOCK_SIZE;
33740 ++ dctx->sset = true;
33741 ++ }
33742 ++ }
33743 ++ return srclen;
33744 ++}
33745 ++
33746 + static unsigned int poly1305_scalar_blocks(struct poly1305_desc_ctx *dctx,
33747 + const u8 *src, unsigned int srclen)
33748 + {
33749 +@@ -47,8 +224,8 @@ static unsigned int poly1305_scalar_blocks(struct poly1305_desc_ctx *dctx,
33750 + srclen = datalen;
33751 + }
33752 + if (srclen >= POLY1305_BLOCK_SIZE) {
33753 +- poly1305_core_blocks(&dctx->h, dctx->r, src,
33754 +- srclen / POLY1305_BLOCK_SIZE, 1);
33755 ++ poly1305_integer_blocks(&dctx->h, dctx->opaque_r, src,
33756 ++ srclen / POLY1305_BLOCK_SIZE, 1);
33757 + srclen %= POLY1305_BLOCK_SIZE;
33758 + }
33759 + return srclen;
33760 +@@ -105,12 +282,6 @@ static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
33761 + return srclen;
33762 + }
33763 +
33764 +-void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key)
33765 +-{
33766 +- poly1305_init_generic(desc, key);
33767 +-}
33768 +-EXPORT_SYMBOL(poly1305_init_arch);
33769 +-
33770 + void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
33771 + unsigned int srclen)
33772 + {
33773 +@@ -158,9 +329,31 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
33774 + }
33775 + EXPORT_SYMBOL(poly1305_update_arch);
33776 +
33777 +-void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *digest)
33778 ++void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *dst)
33779 + {
33780 +- poly1305_final_generic(desc, digest);
33781 ++ __le32 digest[4];
33782 ++ u64 f = 0;
33783 ++
33784 ++ if (unlikely(desc->buflen)) {
33785 ++ desc->buf[desc->buflen++] = 1;
33786 ++ memset(desc->buf + desc->buflen, 0,
33787 ++ POLY1305_BLOCK_SIZE - desc->buflen);
33788 ++ poly1305_integer_blocks(&desc->h, desc->opaque_r, desc->buf, 1, 0);
33789 ++ }
33790 ++
33791 ++ poly1305_integer_emit(&desc->h, digest);
33792 ++
33793 ++ /* mac = (h + s) % (2^128) */
33794 ++ f = (f >> 32) + le32_to_cpu(digest[0]) + desc->s[0];
33795 ++ put_unaligned_le32(f, dst + 0);
33796 ++ f = (f >> 32) + le32_to_cpu(digest[1]) + desc->s[1];
33797 ++ put_unaligned_le32(f, dst + 4);
33798 ++ f = (f >> 32) + le32_to_cpu(digest[2]) + desc->s[2];
33799 ++ put_unaligned_le32(f, dst + 8);
33800 ++ f = (f >> 32) + le32_to_cpu(digest[3]) + desc->s[3];
33801 ++ put_unaligned_le32(f, dst + 12);
33802 ++
33803 ++ *desc = (struct poly1305_desc_ctx){};
33804 + }
33805 + EXPORT_SYMBOL(poly1305_final_arch);
33806 +
33807 +@@ -183,7 +376,7 @@ static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
33808 + if (unlikely(!dctx->sset))
33809 + return -ENOKEY;
33810 +
33811 +- poly1305_final_generic(dctx, dst);
33812 ++ poly1305_final_arch(dctx, dst);
33813 + return 0;
33814 + }
33815 +
33816 +diff --git a/crypto/adiantum.c b/crypto/adiantum.c
33817 +index aded26092268..c846a887abe1 100644
33818 +--- a/crypto/adiantum.c
33819 ++++ b/crypto/adiantum.c
33820 +@@ -72,7 +72,7 @@ struct adiantum_tfm_ctx {
33821 + struct crypto_skcipher *streamcipher;
33822 + struct crypto_cipher *blockcipher;
33823 + struct crypto_shash *hash;
33824 +- struct poly1305_key header_hash_key;
33825 ++ struct poly1305_core_key header_hash_key;
33826 + };
33827 +
33828 + struct adiantum_request_ctx {
33829 +@@ -249,7 +249,7 @@ static void adiantum_hash_header(struct skcipher_request *req)
33830 + poly1305_core_blocks(&state, &tctx->header_hash_key, req->iv,
33831 + TWEAK_SIZE / POLY1305_BLOCK_SIZE, 1);
33832 +
33833 +- poly1305_core_emit(&state, &rctx->header_hash);
33834 ++ poly1305_core_emit(&state, NULL, &rctx->header_hash);
33835 + }
33836 +
33837 + /* Hash the left-hand part (the "bulk") of the message using NHPoly1305 */
33838 +diff --git a/crypto/nhpoly1305.c b/crypto/nhpoly1305.c
33839 +index f6b6a52092b4..8a3006c3b51b 100644
33840 +--- a/crypto/nhpoly1305.c
33841 ++++ b/crypto/nhpoly1305.c
33842 +@@ -210,7 +210,7 @@ int crypto_nhpoly1305_final_helper(struct shash_desc *desc, u8 *dst, nh_t nh_fn)
33843 + if (state->nh_remaining)
33844 + process_nh_hash_value(state, key);
33845 +
33846 +- poly1305_core_emit(&state->poly_state, dst);
33847 ++ poly1305_core_emit(&state->poly_state, NULL, dst);
33848 + return 0;
33849 + }
33850 + EXPORT_SYMBOL(crypto_nhpoly1305_final_helper);
33851 +diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
33852 +index 21edbd8c99fb..94af47eb6fa6 100644
33853 +--- a/crypto/poly1305_generic.c
33854 ++++ b/crypto/poly1305_generic.c
33855 +@@ -31,6 +31,29 @@ static int crypto_poly1305_init(struct shash_desc *desc)
33856 + return 0;
33857 + }
33858 +
33859 ++static unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
33860 ++ const u8 *src, unsigned int srclen)
33861 ++{
33862 ++ if (!dctx->sset) {
33863 ++ if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
33864 ++ poly1305_core_setkey(&dctx->core_r, src);
33865 ++ src += POLY1305_BLOCK_SIZE;
33866 ++ srclen -= POLY1305_BLOCK_SIZE;
33867 ++ dctx->rset = 2;
33868 ++ }
33869 ++ if (srclen >= POLY1305_BLOCK_SIZE) {
33870 ++ dctx->s[0] = get_unaligned_le32(src + 0);
33871 ++ dctx->s[1] = get_unaligned_le32(src + 4);
33872 ++ dctx->s[2] = get_unaligned_le32(src + 8);
33873 ++ dctx->s[3] = get_unaligned_le32(src + 12);
33874 ++ src += POLY1305_BLOCK_SIZE;
33875 ++ srclen -= POLY1305_BLOCK_SIZE;
33876 ++ dctx->sset = true;
33877 ++ }
33878 ++ }
33879 ++ return srclen;
33880 ++}
33881 ++
33882 + static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
33883 + unsigned int srclen)
33884 + {
33885 +@@ -42,7 +65,7 @@ static void poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
33886 + srclen = datalen;
33887 + }
33888 +
33889 +- poly1305_core_blocks(&dctx->h, dctx->r, src,
33890 ++ poly1305_core_blocks(&dctx->h, &dctx->core_r, src,
33891 + srclen / POLY1305_BLOCK_SIZE, 1);
33892 + }
33893 +
33894 +diff --git a/include/crypto/internal/poly1305.h b/include/crypto/internal/poly1305.h
33895 +index 479b0cab2a1a..064e52ca5248 100644
33896 +--- a/include/crypto/internal/poly1305.h
33897 ++++ b/include/crypto/internal/poly1305.h
33898 +@@ -11,48 +11,23 @@
33899 + #include <crypto/poly1305.h>
33900 +
33901 + /*
33902 +- * Poly1305 core functions. These implement the ε-almost-∆-universal hash
33903 +- * function underlying the Poly1305 MAC, i.e. they don't add an encrypted nonce
33904 +- * ("s key") at the end. They also only support block-aligned inputs.
33905 ++ * Poly1305 core functions. These only accept whole blocks; the caller must
33906 ++ * handle any needed block buffering and padding. 'hibit' must be 1 for any
33907 ++ * full blocks, or 0 for the final block if it had to be padded. If 'nonce' is
33908 ++ * non-NULL, then it's added at the end to compute the Poly1305 MAC. Otherwise,
33909 ++ * only the ε-almost-∆-universal hash function (not the full MAC) is computed.
33910 + */
33911 +-void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key);
33912 ++
33913 ++void poly1305_core_setkey(struct poly1305_core_key *key, const u8 *raw_key);
33914 + static inline void poly1305_core_init(struct poly1305_state *state)
33915 + {
33916 + *state = (struct poly1305_state){};
33917 + }
33918 +
33919 + void poly1305_core_blocks(struct poly1305_state *state,
33920 +- const struct poly1305_key *key, const void *src,
33921 ++ const struct poly1305_core_key *key, const void *src,
33922 + unsigned int nblocks, u32 hibit);
33923 +-void poly1305_core_emit(const struct poly1305_state *state, void *dst);
33924 +-
33925 +-/*
33926 +- * Poly1305 requires a unique key for each tag, which implies that we can't set
33927 +- * it on the tfm that gets accessed by multiple users simultaneously. Instead we
33928 +- * expect the key as the first 32 bytes in the update() call.
33929 +- */
33930 +-static inline
33931 +-unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
33932 +- const u8 *src, unsigned int srclen)
33933 +-{
33934 +- if (!dctx->sset) {
33935 +- if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
33936 +- poly1305_core_setkey(dctx->r, src);
33937 +- src += POLY1305_BLOCK_SIZE;
33938 +- srclen -= POLY1305_BLOCK_SIZE;
33939 +- dctx->rset = 1;
33940 +- }
33941 +- if (srclen >= POLY1305_BLOCK_SIZE) {
33942 +- dctx->s[0] = get_unaligned_le32(src + 0);
33943 +- dctx->s[1] = get_unaligned_le32(src + 4);
33944 +- dctx->s[2] = get_unaligned_le32(src + 8);
33945 +- dctx->s[3] = get_unaligned_le32(src + 12);
33946 +- src += POLY1305_BLOCK_SIZE;
33947 +- srclen -= POLY1305_BLOCK_SIZE;
33948 +- dctx->sset = true;
33949 +- }
33950 +- }
33951 +- return srclen;
33952 +-}
33953 ++void poly1305_core_emit(const struct poly1305_state *state, const u32 nonce[4],
33954 ++ void *dst);
33955 +
33956 + #endif
33957 +diff --git a/include/crypto/nhpoly1305.h b/include/crypto/nhpoly1305.h
33958 +index 53c04423c582..306925fea190 100644
33959 +--- a/include/crypto/nhpoly1305.h
33960 ++++ b/include/crypto/nhpoly1305.h
33961 +@@ -7,7 +7,7 @@
33962 + #define _NHPOLY1305_H
33963 +
33964 + #include <crypto/hash.h>
33965 +-#include <crypto/poly1305.h>
33966 ++#include <crypto/internal/poly1305.h>
33967 +
33968 + /* NH parameterization: */
33969 +
33970 +@@ -33,7 +33,7 @@
33971 + #define NHPOLY1305_KEY_SIZE (POLY1305_BLOCK_SIZE + NH_KEY_BYTES)
33972 +
33973 + struct nhpoly1305_key {
33974 +- struct poly1305_key poly_key;
33975 ++ struct poly1305_core_key poly_key;
33976 + u32 nh_key[NH_KEY_WORDS];
33977 + };
33978 +
33979 +diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h
33980 +index 74c6e1cd73ee..f1f67fc749cf 100644
33981 +--- a/include/crypto/poly1305.h
33982 ++++ b/include/crypto/poly1305.h
33983 +@@ -13,12 +13,29 @@
33984 + #define POLY1305_KEY_SIZE 32
33985 + #define POLY1305_DIGEST_SIZE 16
33986 +
33987 ++/* The poly1305_key and poly1305_state types are mostly opaque and
33988 ++ * implementation-defined. Limbs might be in base 2^64 or base 2^26, or
33989 ++ * different yet. The union type provided keeps these 64-bit aligned for the
33990 ++ * case in which this is implemented using 64x64 multiplies.
33991 ++ */
33992 ++
33993 + struct poly1305_key {
33994 +- u32 r[5]; /* key, base 2^26 */
33995 ++ union {
33996 ++ u32 r[5];
33997 ++ u64 r64[3];
33998 ++ };
33999 ++};
34000 ++
34001 ++struct poly1305_core_key {
34002 ++ struct poly1305_key key;
34003 ++ struct poly1305_key precomputed_s;
34004 + };
34005 +
34006 + struct poly1305_state {
34007 +- u32 h[5]; /* accumulator, base 2^26 */
34008 ++ union {
34009 ++ u32 h[5];
34010 ++ u64 h64[3];
34011 ++ };
34012 + };
34013 +
34014 + struct poly1305_desc_ctx {
34015 +@@ -35,7 +52,10 @@ struct poly1305_desc_ctx {
34016 + /* accumulator */
34017 + struct poly1305_state h;
34018 + /* key */
34019 +- struct poly1305_key r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE];
34020 ++ union {
34021 ++ struct poly1305_key opaque_r[CONFIG_CRYPTO_LIB_POLY1305_RSIZE];
34022 ++ struct poly1305_core_key core_r;
34023 ++ };
34024 + };
34025 +
34026 + void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key);
34027 +diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
34028 +index f97f9b941110..6ecaf83a5a9a 100644
34029 +--- a/lib/crypto/Makefile
34030 ++++ b/lib/crypto/Makefile
34031 +@@ -28,7 +28,9 @@ obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o
34032 + libdes-y := des.o
34033 +
34034 + obj-$(CONFIG_CRYPTO_LIB_POLY1305_GENERIC) += libpoly1305.o
34035 +-libpoly1305-y := poly1305.o
34036 ++libpoly1305-y := poly1305-donna32.o
34037 ++libpoly1305-$(CONFIG_ARCH_SUPPORTS_INT128) := poly1305-donna64.o
34038 ++libpoly1305-y += poly1305.o
34039 +
34040 + obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o
34041 + libsha256-y := sha256.o
34042 +diff --git a/lib/crypto/poly1305-donna32.c b/lib/crypto/poly1305-donna32.c
34043 +new file mode 100644
34044 +index 000000000000..3cc77d94390b
34045 +--- /dev/null
34046 ++++ b/lib/crypto/poly1305-donna32.c
34047 +@@ -0,0 +1,204 @@
34048 ++// SPDX-License-Identifier: GPL-2.0 OR MIT
34049 ++/*
34050 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
34051 ++ *
34052 ++ * This is based in part on Andrew Moon's poly1305-donna, which is in the
34053 ++ * public domain.
34054 ++ */
34055 ++
34056 ++#include <linux/kernel.h>
34057 ++#include <asm/unaligned.h>
34058 ++#include <crypto/internal/poly1305.h>
34059 ++
34060 ++void poly1305_core_setkey(struct poly1305_core_key *key, const u8 raw_key[16])
34061 ++{
34062 ++ /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
34063 ++ key->key.r[0] = (get_unaligned_le32(&raw_key[0])) & 0x3ffffff;
34064 ++ key->key.r[1] = (get_unaligned_le32(&raw_key[3]) >> 2) & 0x3ffff03;
34065 ++ key->key.r[2] = (get_unaligned_le32(&raw_key[6]) >> 4) & 0x3ffc0ff;
34066 ++ key->key.r[3] = (get_unaligned_le32(&raw_key[9]) >> 6) & 0x3f03fff;
34067 ++ key->key.r[4] = (get_unaligned_le32(&raw_key[12]) >> 8) & 0x00fffff;
34068 ++
34069 ++ /* s = 5*r */
34070 ++ key->precomputed_s.r[0] = key->key.r[1] * 5;
34071 ++ key->precomputed_s.r[1] = key->key.r[2] * 5;
34072 ++ key->precomputed_s.r[2] = key->key.r[3] * 5;
34073 ++ key->precomputed_s.r[3] = key->key.r[4] * 5;
34074 ++}
34075 ++EXPORT_SYMBOL(poly1305_core_setkey);
34076 ++
34077 ++void poly1305_core_blocks(struct poly1305_state *state,
34078 ++ const struct poly1305_core_key *key, const void *src,
34079 ++ unsigned int nblocks, u32 hibit)
34080 ++{
34081 ++ const u8 *input = src;
34082 ++ u32 r0, r1, r2, r3, r4;
34083 ++ u32 s1, s2, s3, s4;
34084 ++ u32 h0, h1, h2, h3, h4;
34085 ++ u64 d0, d1, d2, d3, d4;
34086 ++ u32 c;
34087 ++
34088 ++ if (!nblocks)
34089 ++ return;
34090 ++
34091 ++ hibit <<= 24;
34092 ++
34093 ++ r0 = key->key.r[0];
34094 ++ r1 = key->key.r[1];
34095 ++ r2 = key->key.r[2];
34096 ++ r3 = key->key.r[3];
34097 ++ r4 = key->key.r[4];
34098 ++
34099 ++ s1 = key->precomputed_s.r[0];
34100 ++ s2 = key->precomputed_s.r[1];
34101 ++ s3 = key->precomputed_s.r[2];
34102 ++ s4 = key->precomputed_s.r[3];
34103 ++
34104 ++ h0 = state->h[0];
34105 ++ h1 = state->h[1];
34106 ++ h2 = state->h[2];
34107 ++ h3 = state->h[3];
34108 ++ h4 = state->h[4];
34109 ++
34110 ++ do {
34111 ++ /* h += m[i] */
34112 ++ h0 += (get_unaligned_le32(&input[0])) & 0x3ffffff;
34113 ++ h1 += (get_unaligned_le32(&input[3]) >> 2) & 0x3ffffff;
34114 ++ h2 += (get_unaligned_le32(&input[6]) >> 4) & 0x3ffffff;
34115 ++ h3 += (get_unaligned_le32(&input[9]) >> 6) & 0x3ffffff;
34116 ++ h4 += (get_unaligned_le32(&input[12]) >> 8) | hibit;
34117 ++
34118 ++ /* h *= r */
34119 ++ d0 = ((u64)h0 * r0) + ((u64)h1 * s4) +
34120 ++ ((u64)h2 * s3) + ((u64)h3 * s2) +
34121 ++ ((u64)h4 * s1);
34122 ++ d1 = ((u64)h0 * r1) + ((u64)h1 * r0) +
34123 ++ ((u64)h2 * s4) + ((u64)h3 * s3) +
34124 ++ ((u64)h4 * s2);
34125 ++ d2 = ((u64)h0 * r2) + ((u64)h1 * r1) +
34126 ++ ((u64)h2 * r0) + ((u64)h3 * s4) +
34127 ++ ((u64)h4 * s3);
34128 ++ d3 = ((u64)h0 * r3) + ((u64)h1 * r2) +
34129 ++ ((u64)h2 * r1) + ((u64)h3 * r0) +
34130 ++ ((u64)h4 * s4);
34131 ++ d4 = ((u64)h0 * r4) + ((u64)h1 * r3) +
34132 ++ ((u64)h2 * r2) + ((u64)h3 * r1) +
34133 ++ ((u64)h4 * r0);
34134 ++
34135 ++ /* (partial) h %= p */
34136 ++ c = (u32)(d0 >> 26);
34137 ++ h0 = (u32)d0 & 0x3ffffff;
34138 ++ d1 += c;
34139 ++ c = (u32)(d1 >> 26);
34140 ++ h1 = (u32)d1 & 0x3ffffff;
34141 ++ d2 += c;
34142 ++ c = (u32)(d2 >> 26);
34143 ++ h2 = (u32)d2 & 0x3ffffff;
34144 ++ d3 += c;
34145 ++ c = (u32)(d3 >> 26);
34146 ++ h3 = (u32)d3 & 0x3ffffff;
34147 ++ d4 += c;
34148 ++ c = (u32)(d4 >> 26);
34149 ++ h4 = (u32)d4 & 0x3ffffff;
34150 ++ h0 += c * 5;
34151 ++ c = (h0 >> 26);
34152 ++ h0 = h0 & 0x3ffffff;
34153 ++ h1 += c;
34154 ++
34155 ++ input += POLY1305_BLOCK_SIZE;
34156 ++ } while (--nblocks);
34157 ++
34158 ++ state->h[0] = h0;
34159 ++ state->h[1] = h1;
34160 ++ state->h[2] = h2;
34161 ++ state->h[3] = h3;
34162 ++ state->h[4] = h4;
34163 ++}
34164 ++EXPORT_SYMBOL(poly1305_core_blocks);
34165 ++
34166 ++void poly1305_core_emit(const struct poly1305_state *state, const u32 nonce[4],
34167 ++ void *dst)
34168 ++{
34169 ++ u8 *mac = dst;
34170 ++ u32 h0, h1, h2, h3, h4, c;
34171 ++ u32 g0, g1, g2, g3, g4;
34172 ++ u64 f;
34173 ++ u32 mask;
34174 ++
34175 ++ /* fully carry h */
34176 ++ h0 = state->h[0];
34177 ++ h1 = state->h[1];
34178 ++ h2 = state->h[2];
34179 ++ h3 = state->h[3];
34180 ++ h4 = state->h[4];
34181 ++
34182 ++ c = h1 >> 26;
34183 ++ h1 = h1 & 0x3ffffff;
34184 ++ h2 += c;
34185 ++ c = h2 >> 26;
34186 ++ h2 = h2 & 0x3ffffff;
34187 ++ h3 += c;
34188 ++ c = h3 >> 26;
34189 ++ h3 = h3 & 0x3ffffff;
34190 ++ h4 += c;
34191 ++ c = h4 >> 26;
34192 ++ h4 = h4 & 0x3ffffff;
34193 ++ h0 += c * 5;
34194 ++ c = h0 >> 26;
34195 ++ h0 = h0 & 0x3ffffff;
34196 ++ h1 += c;
34197 ++
34198 ++ /* compute h + -p */
34199 ++ g0 = h0 + 5;
34200 ++ c = g0 >> 26;
34201 ++ g0 &= 0x3ffffff;
34202 ++ g1 = h1 + c;
34203 ++ c = g1 >> 26;
34204 ++ g1 &= 0x3ffffff;
34205 ++ g2 = h2 + c;
34206 ++ c = g2 >> 26;
34207 ++ g2 &= 0x3ffffff;
34208 ++ g3 = h3 + c;
34209 ++ c = g3 >> 26;
34210 ++ g3 &= 0x3ffffff;
34211 ++ g4 = h4 + c - (1UL << 26);
34212 ++
34213 ++ /* select h if h < p, or h + -p if h >= p */
34214 ++ mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
34215 ++ g0 &= mask;
34216 ++ g1 &= mask;
34217 ++ g2 &= mask;
34218 ++ g3 &= mask;
34219 ++ g4 &= mask;
34220 ++ mask = ~mask;
34221 ++
34222 ++ h0 = (h0 & mask) | g0;
34223 ++ h1 = (h1 & mask) | g1;
34224 ++ h2 = (h2 & mask) | g2;
34225 ++ h3 = (h3 & mask) | g3;
34226 ++ h4 = (h4 & mask) | g4;
34227 ++
34228 ++ /* h = h % (2^128) */
34229 ++ h0 = ((h0) | (h1 << 26)) & 0xffffffff;
34230 ++ h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
34231 ++ h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
34232 ++ h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
34233 ++
34234 ++ if (likely(nonce)) {
34235 ++ /* mac = (h + nonce) % (2^128) */
34236 ++ f = (u64)h0 + nonce[0];
34237 ++ h0 = (u32)f;
34238 ++ f = (u64)h1 + nonce[1] + (f >> 32);
34239 ++ h1 = (u32)f;
34240 ++ f = (u64)h2 + nonce[2] + (f >> 32);
34241 ++ h2 = (u32)f;
34242 ++ f = (u64)h3 + nonce[3] + (f >> 32);
34243 ++ h3 = (u32)f;
34244 ++ }
34245 ++
34246 ++ put_unaligned_le32(h0, &mac[0]);
34247 ++ put_unaligned_le32(h1, &mac[4]);
34248 ++ put_unaligned_le32(h2, &mac[8]);
34249 ++ put_unaligned_le32(h3, &mac[12]);
34250 ++}
34251 ++EXPORT_SYMBOL(poly1305_core_emit);
34252 +diff --git a/lib/crypto/poly1305-donna64.c b/lib/crypto/poly1305-donna64.c
34253 +new file mode 100644
34254 +index 000000000000..6ae181bb4345
34255 +--- /dev/null
34256 ++++ b/lib/crypto/poly1305-donna64.c
34257 +@@ -0,0 +1,185 @@
34258 ++// SPDX-License-Identifier: GPL-2.0 OR MIT
34259 ++/*
34260 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
34261 ++ *
34262 ++ * This is based in part on Andrew Moon's poly1305-donna, which is in the
34263 ++ * public domain.
34264 ++ */
34265 ++
34266 ++#include <linux/kernel.h>
34267 ++#include <asm/unaligned.h>
34268 ++#include <crypto/internal/poly1305.h>
34269 ++
34270 ++typedef __uint128_t u128;
34271 ++
34272 ++void poly1305_core_setkey(struct poly1305_core_key *key, const u8 raw_key[16])
34273 ++{
34274 ++ u64 t0, t1;
34275 ++
34276 ++ /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
34277 ++ t0 = get_unaligned_le64(&raw_key[0]);
34278 ++ t1 = get_unaligned_le64(&raw_key[8]);
34279 ++
34280 ++ key->key.r64[0] = t0 & 0xffc0fffffffULL;
34281 ++ key->key.r64[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffffULL;
34282 ++ key->key.r64[2] = ((t1 >> 24)) & 0x00ffffffc0fULL;
34283 ++
34284 ++ /* s = 20*r */
34285 ++ key->precomputed_s.r64[0] = key->key.r64[1] * 20;
34286 ++ key->precomputed_s.r64[1] = key->key.r64[2] * 20;
34287 ++}
34288 ++EXPORT_SYMBOL(poly1305_core_setkey);
34289 ++
34290 ++void poly1305_core_blocks(struct poly1305_state *state,
34291 ++ const struct poly1305_core_key *key, const void *src,
34292 ++ unsigned int nblocks, u32 hibit)
34293 ++{
34294 ++ const u8 *input = src;
34295 ++ u64 hibit64;
34296 ++ u64 r0, r1, r2;
34297 ++ u64 s1, s2;
34298 ++ u64 h0, h1, h2;
34299 ++ u64 c;
34300 ++ u128 d0, d1, d2, d;
34301 ++
34302 ++ if (!nblocks)
34303 ++ return;
34304 ++
34305 ++ hibit64 = ((u64)hibit) << 40;
34306 ++
34307 ++ r0 = key->key.r64[0];
34308 ++ r1 = key->key.r64[1];
34309 ++ r2 = key->key.r64[2];
34310 ++
34311 ++ h0 = state->h64[0];
34312 ++ h1 = state->h64[1];
34313 ++ h2 = state->h64[2];
34314 ++
34315 ++ s1 = key->precomputed_s.r64[0];
34316 ++ s2 = key->precomputed_s.r64[1];
34317 ++
34318 ++ do {
34319 ++ u64 t0, t1;
34320 ++
34321 ++ /* h += m[i] */
34322 ++ t0 = get_unaligned_le64(&input[0]);
34323 ++ t1 = get_unaligned_le64(&input[8]);
34324 ++
34325 ++ h0 += t0 & 0xfffffffffffULL;
34326 ++ h1 += ((t0 >> 44) | (t1 << 20)) & 0xfffffffffffULL;
34327 ++ h2 += (((t1 >> 24)) & 0x3ffffffffffULL) | hibit64;
34328 ++
34329 ++ /* h *= r */
34330 ++ d0 = (u128)h0 * r0;
34331 ++ d = (u128)h1 * s2;
34332 ++ d0 += d;
34333 ++ d = (u128)h2 * s1;
34334 ++ d0 += d;
34335 ++ d1 = (u128)h0 * r1;
34336 ++ d = (u128)h1 * r0;
34337 ++ d1 += d;
34338 ++ d = (u128)h2 * s2;
34339 ++ d1 += d;
34340 ++ d2 = (u128)h0 * r2;
34341 ++ d = (u128)h1 * r1;
34342 ++ d2 += d;
34343 ++ d = (u128)h2 * r0;
34344 ++ d2 += d;
34345 ++
34346 ++ /* (partial) h %= p */
34347 ++ c = (u64)(d0 >> 44);
34348 ++ h0 = (u64)d0 & 0xfffffffffffULL;
34349 ++ d1 += c;
34350 ++ c = (u64)(d1 >> 44);
34351 ++ h1 = (u64)d1 & 0xfffffffffffULL;
34352 ++ d2 += c;
34353 ++ c = (u64)(d2 >> 42);
34354 ++ h2 = (u64)d2 & 0x3ffffffffffULL;
34355 ++ h0 += c * 5;
34356 ++ c = h0 >> 44;
34357 ++ h0 = h0 & 0xfffffffffffULL;
34358 ++ h1 += c;
34359 ++
34360 ++ input += POLY1305_BLOCK_SIZE;
34361 ++ } while (--nblocks);
34362 ++
34363 ++ state->h64[0] = h0;
34364 ++ state->h64[1] = h1;
34365 ++ state->h64[2] = h2;
34366 ++}
34367 ++EXPORT_SYMBOL(poly1305_core_blocks);
34368 ++
34369 ++void poly1305_core_emit(const struct poly1305_state *state, const u32 nonce[4],
34370 ++ void *dst)
34371 ++{
34372 ++ u8 *mac = dst;
34373 ++ u64 h0, h1, h2, c;
34374 ++ u64 g0, g1, g2;
34375 ++ u64 t0, t1;
34376 ++
34377 ++ /* fully carry h */
34378 ++ h0 = state->h64[0];
34379 ++ h1 = state->h64[1];
34380 ++ h2 = state->h64[2];
34381 ++
34382 ++ c = h1 >> 44;
34383 ++ h1 &= 0xfffffffffffULL;
34384 ++ h2 += c;
34385 ++ c = h2 >> 42;
34386 ++ h2 &= 0x3ffffffffffULL;
34387 ++ h0 += c * 5;
34388 ++ c = h0 >> 44;
34389 ++ h0 &= 0xfffffffffffULL;
34390 ++ h1 += c;
34391 ++ c = h1 >> 44;
34392 ++ h1 &= 0xfffffffffffULL;
34393 ++ h2 += c;
34394 ++ c = h2 >> 42;
34395 ++ h2 &= 0x3ffffffffffULL;
34396 ++ h0 += c * 5;
34397 ++ c = h0 >> 44;
34398 ++ h0 &= 0xfffffffffffULL;
34399 ++ h1 += c;
34400 ++
34401 ++ /* compute h + -p */
34402 ++ g0 = h0 + 5;
34403 ++ c = g0 >> 44;
34404 ++ g0 &= 0xfffffffffffULL;
34405 ++ g1 = h1 + c;
34406 ++ c = g1 >> 44;
34407 ++ g1 &= 0xfffffffffffULL;
34408 ++ g2 = h2 + c - (1ULL << 42);
34409 ++
34410 ++ /* select h if h < p, or h + -p if h >= p */
34411 ++ c = (g2 >> ((sizeof(u64) * 8) - 1)) - 1;
34412 ++ g0 &= c;
34413 ++ g1 &= c;
34414 ++ g2 &= c;
34415 ++ c = ~c;
34416 ++ h0 = (h0 & c) | g0;
34417 ++ h1 = (h1 & c) | g1;
34418 ++ h2 = (h2 & c) | g2;
34419 ++
34420 ++ if (likely(nonce)) {
34421 ++ /* h = (h + nonce) */
34422 ++ t0 = ((u64)nonce[1] << 32) | nonce[0];
34423 ++ t1 = ((u64)nonce[3] << 32) | nonce[2];
34424 ++
34425 ++ h0 += t0 & 0xfffffffffffULL;
34426 ++ c = h0 >> 44;
34427 ++ h0 &= 0xfffffffffffULL;
34428 ++ h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffffULL) + c;
34429 ++ c = h1 >> 44;
34430 ++ h1 &= 0xfffffffffffULL;
34431 ++ h2 += (((t1 >> 24)) & 0x3ffffffffffULL) + c;
34432 ++ h2 &= 0x3ffffffffffULL;
34433 ++ }
34434 ++
34435 ++ /* mac = h % (2^128) */
34436 ++ h0 = h0 | (h1 << 44);
34437 ++ h1 = (h1 >> 20) | (h2 << 24);
34438 ++
34439 ++ put_unaligned_le64(h0, &mac[0]);
34440 ++ put_unaligned_le64(h1, &mac[8]);
34441 ++}
34442 ++EXPORT_SYMBOL(poly1305_core_emit);
34443 +diff --git a/lib/crypto/poly1305.c b/lib/crypto/poly1305.c
34444 +index 32ec293c65ae..9d2d14df0fee 100644
34445 +--- a/lib/crypto/poly1305.c
34446 ++++ b/lib/crypto/poly1305.c
34447 +@@ -12,151 +12,9 @@
34448 + #include <linux/module.h>
34449 + #include <asm/unaligned.h>
34450 +
34451 +-static inline u64 mlt(u64 a, u64 b)
34452 +-{
34453 +- return a * b;
34454 +-}
34455 +-
34456 +-static inline u32 sr(u64 v, u_char n)
34457 +-{
34458 +- return v >> n;
34459 +-}
34460 +-
34461 +-static inline u32 and(u32 v, u32 mask)
34462 +-{
34463 +- return v & mask;
34464 +-}
34465 +-
34466 +-void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key)
34467 +-{
34468 +- /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
34469 +- key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff;
34470 +- key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03;
34471 +- key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff;
34472 +- key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff;
34473 +- key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff;
34474 +-}
34475 +-EXPORT_SYMBOL_GPL(poly1305_core_setkey);
34476 +-
34477 +-void poly1305_core_blocks(struct poly1305_state *state,
34478 +- const struct poly1305_key *key, const void *src,
34479 +- unsigned int nblocks, u32 hibit)
34480 +-{
34481 +- u32 r0, r1, r2, r3, r4;
34482 +- u32 s1, s2, s3, s4;
34483 +- u32 h0, h1, h2, h3, h4;
34484 +- u64 d0, d1, d2, d3, d4;
34485 +-
34486 +- if (!nblocks)
34487 +- return;
34488 +-
34489 +- r0 = key->r[0];
34490 +- r1 = key->r[1];
34491 +- r2 = key->r[2];
34492 +- r3 = key->r[3];
34493 +- r4 = key->r[4];
34494 +-
34495 +- s1 = r1 * 5;
34496 +- s2 = r2 * 5;
34497 +- s3 = r3 * 5;
34498 +- s4 = r4 * 5;
34499 +-
34500 +- h0 = state->h[0];
34501 +- h1 = state->h[1];
34502 +- h2 = state->h[2];
34503 +- h3 = state->h[3];
34504 +- h4 = state->h[4];
34505 +-
34506 +- do {
34507 +- /* h += m[i] */
34508 +- h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff;
34509 +- h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff;
34510 +- h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff;
34511 +- h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff;
34512 +- h4 += (get_unaligned_le32(src + 12) >> 8) | (hibit << 24);
34513 +-
34514 +- /* h *= r */
34515 +- d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
34516 +- mlt(h3, s2) + mlt(h4, s1);
34517 +- d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) +
34518 +- mlt(h3, s3) + mlt(h4, s2);
34519 +- d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) +
34520 +- mlt(h3, s4) + mlt(h4, s3);
34521 +- d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) +
34522 +- mlt(h3, r0) + mlt(h4, s4);
34523 +- d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) +
34524 +- mlt(h3, r1) + mlt(h4, r0);
34525 +-
34526 +- /* (partial) h %= p */
34527 +- d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff);
34528 +- d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff);
34529 +- d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff);
34530 +- d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff);
34531 +- h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff);
34532 +- h1 += h0 >> 26; h0 = h0 & 0x3ffffff;
34533 +-
34534 +- src += POLY1305_BLOCK_SIZE;
34535 +- } while (--nblocks);
34536 +-
34537 +- state->h[0] = h0;
34538 +- state->h[1] = h1;
34539 +- state->h[2] = h2;
34540 +- state->h[3] = h3;
34541 +- state->h[4] = h4;
34542 +-}
34543 +-EXPORT_SYMBOL_GPL(poly1305_core_blocks);
34544 +-
34545 +-void poly1305_core_emit(const struct poly1305_state *state, void *dst)
34546 +-{
34547 +- u32 h0, h1, h2, h3, h4;
34548 +- u32 g0, g1, g2, g3, g4;
34549 +- u32 mask;
34550 +-
34551 +- /* fully carry h */
34552 +- h0 = state->h[0];
34553 +- h1 = state->h[1];
34554 +- h2 = state->h[2];
34555 +- h3 = state->h[3];
34556 +- h4 = state->h[4];
34557 +-
34558 +- h2 += (h1 >> 26); h1 = h1 & 0x3ffffff;
34559 +- h3 += (h2 >> 26); h2 = h2 & 0x3ffffff;
34560 +- h4 += (h3 >> 26); h3 = h3 & 0x3ffffff;
34561 +- h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff;
34562 +- h1 += (h0 >> 26); h0 = h0 & 0x3ffffff;
34563 +-
34564 +- /* compute h + -p */
34565 +- g0 = h0 + 5;
34566 +- g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff;
34567 +- g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff;
34568 +- g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff;
34569 +- g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff;
34570 +-
34571 +- /* select h if h < p, or h + -p if h >= p */
34572 +- mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
34573 +- g0 &= mask;
34574 +- g1 &= mask;
34575 +- g2 &= mask;
34576 +- g3 &= mask;
34577 +- g4 &= mask;
34578 +- mask = ~mask;
34579 +- h0 = (h0 & mask) | g0;
34580 +- h1 = (h1 & mask) | g1;
34581 +- h2 = (h2 & mask) | g2;
34582 +- h3 = (h3 & mask) | g3;
34583 +- h4 = (h4 & mask) | g4;
34584 +-
34585 +- /* h = h % (2^128) */
34586 +- put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0);
34587 +- put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4);
34588 +- put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8);
34589 +- put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12);
34590 +-}
34591 +-EXPORT_SYMBOL_GPL(poly1305_core_emit);
34592 +-
34593 + void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 *key)
34594 + {
34595 +- poly1305_core_setkey(desc->r, key);
34596 ++ poly1305_core_setkey(&desc->core_r, key);
34597 + desc->s[0] = get_unaligned_le32(key + 16);
34598 + desc->s[1] = get_unaligned_le32(key + 20);
34599 + desc->s[2] = get_unaligned_le32(key + 24);
34600 +@@ -164,7 +22,7 @@ void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 *key)
34601 + poly1305_core_init(&desc->h);
34602 + desc->buflen = 0;
34603 + desc->sset = true;
34604 +- desc->rset = 1;
34605 ++ desc->rset = 2;
34606 + }
34607 + EXPORT_SYMBOL_GPL(poly1305_init_generic);
34608 +
34609 +@@ -181,13 +39,14 @@ void poly1305_update_generic(struct poly1305_desc_ctx *desc, const u8 *src,
34610 + desc->buflen += bytes;
34611 +
34612 + if (desc->buflen == POLY1305_BLOCK_SIZE) {
34613 +- poly1305_core_blocks(&desc->h, desc->r, desc->buf, 1, 1);
34614 ++ poly1305_core_blocks(&desc->h, &desc->core_r, desc->buf,
34615 ++ 1, 1);
34616 + desc->buflen = 0;
34617 + }
34618 + }
34619 +
34620 + if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
34621 +- poly1305_core_blocks(&desc->h, desc->r, src,
34622 ++ poly1305_core_blocks(&desc->h, &desc->core_r, src,
34623 + nbytes / POLY1305_BLOCK_SIZE, 1);
34624 + src += nbytes - (nbytes % POLY1305_BLOCK_SIZE);
34625 + nbytes %= POLY1305_BLOCK_SIZE;
34626 +@@ -202,28 +61,14 @@ EXPORT_SYMBOL_GPL(poly1305_update_generic);
34627 +
34628 + void poly1305_final_generic(struct poly1305_desc_ctx *desc, u8 *dst)
34629 + {
34630 +- __le32 digest[4];
34631 +- u64 f = 0;
34632 +-
34633 + if (unlikely(desc->buflen)) {
34634 + desc->buf[desc->buflen++] = 1;
34635 + memset(desc->buf + desc->buflen, 0,
34636 + POLY1305_BLOCK_SIZE - desc->buflen);
34637 +- poly1305_core_blocks(&desc->h, desc->r, desc->buf, 1, 0);
34638 ++ poly1305_core_blocks(&desc->h, &desc->core_r, desc->buf, 1, 0);
34639 + }
34640 +
34641 +- poly1305_core_emit(&desc->h, digest);
34642 +-
34643 +- /* mac = (h + s) % (2^128) */
34644 +- f = (f >> 32) + le32_to_cpu(digest[0]) + desc->s[0];
34645 +- put_unaligned_le32(f, dst + 0);
34646 +- f = (f >> 32) + le32_to_cpu(digest[1]) + desc->s[1];
34647 +- put_unaligned_le32(f, dst + 4);
34648 +- f = (f >> 32) + le32_to_cpu(digest[2]) + desc->s[2];
34649 +- put_unaligned_le32(f, dst + 8);
34650 +- f = (f >> 32) + le32_to_cpu(digest[3]) + desc->s[3];
34651 +- put_unaligned_le32(f, dst + 12);
34652 +-
34653 ++ poly1305_core_emit(&desc->h, desc->s, dst);
34654 + *desc = (struct poly1305_desc_ctx){};
34655 + }
34656 + EXPORT_SYMBOL_GPL(poly1305_final_generic);
34657 +--
34658 +cgit v1.2.3-4-ga26e
34659 +
34660 +
34661 +From ff76a84d462d096180e817fd6d72a3d748df87d5 Mon Sep 17 00:00:00 2001
34662 +From: "Jason A. Donenfeld" <Jason@×××××.com>
34663 +Date: Sun, 5 Jan 2020 22:40:47 -0500
34664 +Subject: crypto: x86/poly1305 - import unmodified cryptogams implementation
34665 +
34666 +commit 0896ca2a0cb6127e8a129f1f2a680d49b6b0f65c upstream.
34667 +
34668 +These x86_64 vectorized implementations come from Andy Polyakov's
34669 +CRYPTOGAMS implementation, and are included here in raw form without
34670 +modification, so that subsequent commits that fix these up for the
34671 +kernel can see how it has changed.
34672 +
34673 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
34674 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
34675 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
34676 +---
34677 + arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 4159 +++++++++++++++++++++++++
34678 + 1 file changed, 4159 insertions(+)
34679 + create mode 100644 arch/x86/crypto/poly1305-x86_64-cryptogams.pl
34680 +
34681 +diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
34682 +new file mode 100644
34683 +index 000000000000..342ad7f18aa7
34684 +--- /dev/null
34685 ++++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
34686 +@@ -0,0 +1,4159 @@
34687 ++#! /usr/bin/env perl
34688 ++# Copyright 2016-2018 The OpenSSL Project Authors. All Rights Reserved.
34689 ++#
34690 ++# Licensed under the OpenSSL license (the "License"). You may not use
34691 ++# this file except in compliance with the License. You can obtain a copy
34692 ++# in the file LICENSE in the source distribution or at
34693 ++# https://www.openssl.org/source/license.html
34694 ++
34695 ++#
34696 ++# ====================================================================
34697 ++# Written by Andy Polyakov <appro@×××××××.org> for the OpenSSL
34698 ++# project. The module is, however, dual licensed under OpenSSL and
34699 ++# CRYPTOGAMS licenses depending on where you obtain it. For further
34700 ++# details see http://www.openssl.org/~appro/cryptogams/.
34701 ++# ====================================================================
34702 ++#
34703 ++# This module implements Poly1305 hash for x86_64.
34704 ++#
34705 ++# March 2015
34706 ++#
34707 ++# Initial release.
34708 ++#
34709 ++# December 2016
34710 ++#
34711 ++# Add AVX512F+VL+BW code path.
34712 ++#
34713 ++# November 2017
34714 ++#
34715 ++# Convert AVX512F+VL+BW code path to pure AVX512F, so that it can be
34716 ++# executed even on Knights Landing. Trigger for modification was
34717 ++# observation that AVX512 code paths can negatively affect overall
34718 ++# Skylake-X system performance. Since we are likely to suppress
34719 ++# AVX512F capability flag [at least on Skylake-X], conversion serves
34720 ++# as kind of "investment protection". Note that next *lake processor,
34721 ++# Cannolake, has AVX512IFMA code path to execute...
34722 ++#
34723 ++# Numbers are cycles per processed byte with poly1305_blocks alone,
34724 ++# measured with rdtsc at fixed clock frequency.
34725 ++#
34726 ++# IALU/gcc-4.8(*) AVX(**) AVX2 AVX-512
34727 ++# P4 4.46/+120% -
34728 ++# Core 2 2.41/+90% -
34729 ++# Westmere 1.88/+120% -
34730 ++# Sandy Bridge 1.39/+140% 1.10
34731 ++# Haswell 1.14/+175% 1.11 0.65
34732 ++# Skylake[-X] 1.13/+120% 0.96 0.51 [0.35]
34733 ++# Silvermont 2.83/+95% -
34734 ++# Knights L 3.60/? 1.65 1.10 0.41(***)
34735 ++# Goldmont 1.70/+180% -
34736 ++# VIA Nano 1.82/+150% -
34737 ++# Sledgehammer 1.38/+160% -
34738 ++# Bulldozer 2.30/+130% 0.97
34739 ++# Ryzen 1.15/+200% 1.08 1.18
34740 ++#
34741 ++# (*) improvement coefficients relative to clang are more modest and
34742 ++# are ~50% on most processors, in both cases we are comparing to
34743 ++# __int128 code;
34744 ++# (**) SSE2 implementation was attempted, but among non-AVX processors
34745 ++# it was faster than integer-only code only on older Intel P4 and
34746 ++# Core processors, 50-30%, less newer processor is, but slower on
34747 ++# contemporary ones, for example almost 2x slower on Atom, and as
34748 ++# former are naturally disappearing, SSE2 is deemed unnecessary;
34749 ++# (***) strangely enough performance seems to vary from core to core,
34750 ++# listed result is best case;
34751 ++
34752 ++$flavour = shift;
34753 ++$output = shift;
34754 ++if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
34755 ++
34756 ++$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
34757 ++
34758 ++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
34759 ++( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
34760 ++( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
34761 ++die "can't locate x86_64-xlate.pl";
34762 ++
34763 ++if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
34764 ++ =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
34765 ++ $avx = ($1>=2.19) + ($1>=2.22) + ($1>=2.25) + ($1>=2.26);
34766 ++}
34767 ++
34768 ++if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
34769 ++ `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)(?:\.([0-9]+))?/) {
34770 ++ $avx = ($1>=2.09) + ($1>=2.10) + 2 * ($1>=2.12);
34771 ++ $avx += 2 if ($1==2.11 && $2>=8);
34772 ++}
34773 ++
34774 ++if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
34775 ++ `ml64 2>&1` =~ /Version ([0-9]+)\./) {
34776 ++ $avx = ($1>=10) + ($1>=12);
34777 ++}
34778 ++
34779 ++if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9]\.[0-9]+)/) {
34780 ++ $avx = ($2>=3.0) + ($2>3.0);
34781 ++}
34782 ++
34783 ++open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
34784 ++*STDOUT=*OUT;
34785 ++
34786 ++my ($ctx,$inp,$len,$padbit)=("%rdi","%rsi","%rdx","%rcx");
34787 ++my ($mac,$nonce)=($inp,$len); # *_emit arguments
34788 ++my ($d1,$d2,$d3, $r0,$r1,$s1)=map("%r$_",(8..13));
34789 ++my ($h0,$h1,$h2)=("%r14","%rbx","%rbp");
34790 ++
34791 ++sub poly1305_iteration {
34792 ++# input: copy of $r1 in %rax, $h0-$h2, $r0-$r1
34793 ++# output: $h0-$h2 *= $r0-$r1
34794 ++$code.=<<___;
34795 ++ mulq $h0 # h0*r1
34796 ++ mov %rax,$d2
34797 ++ mov $r0,%rax
34798 ++ mov %rdx,$d3
34799 ++
34800 ++ mulq $h0 # h0*r0
34801 ++ mov %rax,$h0 # future $h0
34802 ++ mov $r0,%rax
34803 ++ mov %rdx,$d1
34804 ++
34805 ++ mulq $h1 # h1*r0
34806 ++ add %rax,$d2
34807 ++ mov $s1,%rax
34808 ++ adc %rdx,$d3
34809 ++
34810 ++ mulq $h1 # h1*s1
34811 ++ mov $h2,$h1 # borrow $h1
34812 ++ add %rax,$h0
34813 ++ adc %rdx,$d1
34814 ++
34815 ++ imulq $s1,$h1 # h2*s1
34816 ++ add $h1,$d2
34817 ++ mov $d1,$h1
34818 ++ adc \$0,$d3
34819 ++
34820 ++ imulq $r0,$h2 # h2*r0
34821 ++ add $d2,$h1
34822 ++ mov \$-4,%rax # mask value
34823 ++ adc $h2,$d3
34824 ++
34825 ++ and $d3,%rax # last reduction step
34826 ++ mov $d3,$h2
34827 ++ shr \$2,$d3
34828 ++ and \$3,$h2
34829 ++ add $d3,%rax
34830 ++ add %rax,$h0
34831 ++ adc \$0,$h1
34832 ++ adc \$0,$h2
34833 ++___
34834 ++}
34835 ++
34836 ++########################################################################
34837 ++# Layout of opaque area is following.
34838 ++#
34839 ++# unsigned __int64 h[3]; # current hash value base 2^64
34840 ++# unsigned __int64 r[2]; # key value base 2^64
34841 ++
34842 ++$code.=<<___;
34843 ++.text
34844 ++
34845 ++.extern OPENSSL_ia32cap_P
34846 ++
34847 ++.globl poly1305_init
34848 ++.hidden poly1305_init
34849 ++.globl poly1305_blocks
34850 ++.hidden poly1305_blocks
34851 ++.globl poly1305_emit
34852 ++.hidden poly1305_emit
34853 ++
34854 ++.type poly1305_init,\@function,3
34855 ++.align 32
34856 ++poly1305_init:
34857 ++ xor %rax,%rax
34858 ++ mov %rax,0($ctx) # initialize hash value
34859 ++ mov %rax,8($ctx)
34860 ++ mov %rax,16($ctx)
34861 ++
34862 ++ cmp \$0,$inp
34863 ++ je .Lno_key
34864 ++
34865 ++ lea poly1305_blocks(%rip),%r10
34866 ++ lea poly1305_emit(%rip),%r11
34867 ++___
34868 ++$code.=<<___ if ($avx);
34869 ++ mov OPENSSL_ia32cap_P+4(%rip),%r9
34870 ++ lea poly1305_blocks_avx(%rip),%rax
34871 ++ lea poly1305_emit_avx(%rip),%rcx
34872 ++ bt \$`60-32`,%r9 # AVX?
34873 ++ cmovc %rax,%r10
34874 ++ cmovc %rcx,%r11
34875 ++___
34876 ++$code.=<<___ if ($avx>1);
34877 ++ lea poly1305_blocks_avx2(%rip),%rax
34878 ++ bt \$`5+32`,%r9 # AVX2?
34879 ++ cmovc %rax,%r10
34880 ++___
34881 ++$code.=<<___ if ($avx>3);
34882 ++ mov \$`(1<<31|1<<21|1<<16)`,%rax
34883 ++ shr \$32,%r9
34884 ++ and %rax,%r9
34885 ++ cmp %rax,%r9
34886 ++ je .Linit_base2_44
34887 ++___
34888 ++$code.=<<___;
34889 ++ mov \$0x0ffffffc0fffffff,%rax
34890 ++ mov \$0x0ffffffc0ffffffc,%rcx
34891 ++ and 0($inp),%rax
34892 ++ and 8($inp),%rcx
34893 ++ mov %rax,24($ctx)
34894 ++ mov %rcx,32($ctx)
34895 ++___
34896 ++$code.=<<___ if ($flavour !~ /elf32/);
34897 ++ mov %r10,0(%rdx)
34898 ++ mov %r11,8(%rdx)
34899 ++___
34900 ++$code.=<<___ if ($flavour =~ /elf32/);
34901 ++ mov %r10d,0(%rdx)
34902 ++ mov %r11d,4(%rdx)
34903 ++___
34904 ++$code.=<<___;
34905 ++ mov \$1,%eax
34906 ++.Lno_key:
34907 ++ ret
34908 ++.size poly1305_init,.-poly1305_init
34909 ++
34910 ++.type poly1305_blocks,\@function,4
34911 ++.align 32
34912 ++poly1305_blocks:
34913 ++.cfi_startproc
34914 ++.Lblocks:
34915 ++ shr \$4,$len
34916 ++ jz .Lno_data # too short
34917 ++
34918 ++ push %rbx
34919 ++.cfi_push %rbx
34920 ++ push %rbp
34921 ++.cfi_push %rbp
34922 ++ push %r12
34923 ++.cfi_push %r12
34924 ++ push %r13
34925 ++.cfi_push %r13
34926 ++ push %r14
34927 ++.cfi_push %r14
34928 ++ push %r15
34929 ++.cfi_push %r15
34930 ++.Lblocks_body:
34931 ++
34932 ++ mov $len,%r15 # reassign $len
34933 ++
34934 ++ mov 24($ctx),$r0 # load r
34935 ++ mov 32($ctx),$s1
34936 ++
34937 ++ mov 0($ctx),$h0 # load hash value
34938 ++ mov 8($ctx),$h1
34939 ++ mov 16($ctx),$h2
34940 ++
34941 ++ mov $s1,$r1
34942 ++ shr \$2,$s1
34943 ++ mov $r1,%rax
34944 ++ add $r1,$s1 # s1 = r1 + (r1 >> 2)
34945 ++ jmp .Loop
34946 ++
34947 ++.align 32
34948 ++.Loop:
34949 ++ add 0($inp),$h0 # accumulate input
34950 ++ adc 8($inp),$h1
34951 ++ lea 16($inp),$inp
34952 ++ adc $padbit,$h2
34953 ++___
34954 ++ &poly1305_iteration();
34955 ++$code.=<<___;
34956 ++ mov $r1,%rax
34957 ++ dec %r15 # len-=16
34958 ++ jnz .Loop
34959 ++
34960 ++ mov $h0,0($ctx) # store hash value
34961 ++ mov $h1,8($ctx)
34962 ++ mov $h2,16($ctx)
34963 ++
34964 ++ mov 0(%rsp),%r15
34965 ++.cfi_restore %r15
34966 ++ mov 8(%rsp),%r14
34967 ++.cfi_restore %r14
34968 ++ mov 16(%rsp),%r13
34969 ++.cfi_restore %r13
34970 ++ mov 24(%rsp),%r12
34971 ++.cfi_restore %r12
34972 ++ mov 32(%rsp),%rbp
34973 ++.cfi_restore %rbp
34974 ++ mov 40(%rsp),%rbx
34975 ++.cfi_restore %rbx
34976 ++ lea 48(%rsp),%rsp
34977 ++.cfi_adjust_cfa_offset -48
34978 ++.Lno_data:
34979 ++.Lblocks_epilogue:
34980 ++ ret
34981 ++.cfi_endproc
34982 ++.size poly1305_blocks,.-poly1305_blocks
34983 ++
34984 ++.type poly1305_emit,\@function,3
34985 ++.align 32
34986 ++poly1305_emit:
34987 ++.Lemit:
34988 ++ mov 0($ctx),%r8 # load hash value
34989 ++ mov 8($ctx),%r9
34990 ++ mov 16($ctx),%r10
34991 ++
34992 ++ mov %r8,%rax
34993 ++ add \$5,%r8 # compare to modulus
34994 ++ mov %r9,%rcx
34995 ++ adc \$0,%r9
34996 ++ adc \$0,%r10
34997 ++ shr \$2,%r10 # did 130-bit value overflow?
34998 ++ cmovnz %r8,%rax
34999 ++ cmovnz %r9,%rcx
35000 ++
35001 ++ add 0($nonce),%rax # accumulate nonce
35002 ++ adc 8($nonce),%rcx
35003 ++ mov %rax,0($mac) # write result
35004 ++ mov %rcx,8($mac)
35005 ++
35006 ++ ret
35007 ++.size poly1305_emit,.-poly1305_emit
35008 ++___
35009 ++if ($avx) {
35010 ++
35011 ++########################################################################
35012 ++# Layout of opaque area is following.
35013 ++#
35014 ++# unsigned __int32 h[5]; # current hash value base 2^26
35015 ++# unsigned __int32 is_base2_26;
35016 ++# unsigned __int64 r[2]; # key value base 2^64
35017 ++# unsigned __int64 pad;
35018 ++# struct { unsigned __int32 r^2, r^1, r^4, r^3; } r[9];
35019 ++#
35020 ++# where r^n are base 2^26 digits of degrees of multiplier key. There are
35021 ++# 5 digits, but last four are interleaved with multiples of 5, totalling
35022 ++# in 9 elements: r0, r1, 5*r1, r2, 5*r2, r3, 5*r3, r4, 5*r4.
35023 ++
35024 ++my ($H0,$H1,$H2,$H3,$H4, $T0,$T1,$T2,$T3,$T4, $D0,$D1,$D2,$D3,$D4, $MASK) =
35025 ++ map("%xmm$_",(0..15));
35026 ++
35027 ++$code.=<<___;
35028 ++.type __poly1305_block,\@abi-omnipotent
35029 ++.align 32
35030 ++__poly1305_block:
35031 ++___
35032 ++ &poly1305_iteration();
35033 ++$code.=<<___;
35034 ++ ret
35035 ++.size __poly1305_block,.-__poly1305_block
35036 ++
35037 ++.type __poly1305_init_avx,\@abi-omnipotent
35038 ++.align 32
35039 ++__poly1305_init_avx:
35040 ++ mov $r0,$h0
35041 ++ mov $r1,$h1
35042 ++ xor $h2,$h2
35043 ++
35044 ++ lea 48+64($ctx),$ctx # size optimization
35045 ++
35046 ++ mov $r1,%rax
35047 ++ call __poly1305_block # r^2
35048 ++
35049 ++ mov \$0x3ffffff,%eax # save interleaved r^2 and r base 2^26
35050 ++ mov \$0x3ffffff,%edx
35051 ++ mov $h0,$d1
35052 ++ and $h0#d,%eax
35053 ++ mov $r0,$d2
35054 ++ and $r0#d,%edx
35055 ++ mov %eax,`16*0+0-64`($ctx)
35056 ++ shr \$26,$d1
35057 ++ mov %edx,`16*0+4-64`($ctx)
35058 ++ shr \$26,$d2
35059 ++
35060 ++ mov \$0x3ffffff,%eax
35061 ++ mov \$0x3ffffff,%edx
35062 ++ and $d1#d,%eax
35063 ++ and $d2#d,%edx
35064 ++ mov %eax,`16*1+0-64`($ctx)
35065 ++ lea (%rax,%rax,4),%eax # *5
35066 ++ mov %edx,`16*1+4-64`($ctx)
35067 ++ lea (%rdx,%rdx,4),%edx # *5
35068 ++ mov %eax,`16*2+0-64`($ctx)
35069 ++ shr \$26,$d1
35070 ++ mov %edx,`16*2+4-64`($ctx)
35071 ++ shr \$26,$d2
35072 ++
35073 ++ mov $h1,%rax
35074 ++ mov $r1,%rdx
35075 ++ shl \$12,%rax
35076 ++ shl \$12,%rdx
35077 ++ or $d1,%rax
35078 ++ or $d2,%rdx
35079 ++ and \$0x3ffffff,%eax
35080 ++ and \$0x3ffffff,%edx
35081 ++ mov %eax,`16*3+0-64`($ctx)
35082 ++ lea (%rax,%rax,4),%eax # *5
35083 ++ mov %edx,`16*3+4-64`($ctx)
35084 ++ lea (%rdx,%rdx,4),%edx # *5
35085 ++ mov %eax,`16*4+0-64`($ctx)
35086 ++ mov $h1,$d1
35087 ++ mov %edx,`16*4+4-64`($ctx)
35088 ++ mov $r1,$d2
35089 ++
35090 ++ mov \$0x3ffffff,%eax
35091 ++ mov \$0x3ffffff,%edx
35092 ++ shr \$14,$d1
35093 ++ shr \$14,$d2
35094 ++ and $d1#d,%eax
35095 ++ and $d2#d,%edx
35096 ++ mov %eax,`16*5+0-64`($ctx)
35097 ++ lea (%rax,%rax,4),%eax # *5
35098 ++ mov %edx,`16*5+4-64`($ctx)
35099 ++ lea (%rdx,%rdx,4),%edx # *5
35100 ++ mov %eax,`16*6+0-64`($ctx)
35101 ++ shr \$26,$d1
35102 ++ mov %edx,`16*6+4-64`($ctx)
35103 ++ shr \$26,$d2
35104 ++
35105 ++ mov $h2,%rax
35106 ++ shl \$24,%rax
35107 ++ or %rax,$d1
35108 ++ mov $d1#d,`16*7+0-64`($ctx)
35109 ++ lea ($d1,$d1,4),$d1 # *5
35110 ++ mov $d2#d,`16*7+4-64`($ctx)
35111 ++ lea ($d2,$d2,4),$d2 # *5
35112 ++ mov $d1#d,`16*8+0-64`($ctx)
35113 ++ mov $d2#d,`16*8+4-64`($ctx)
35114 ++
35115 ++ mov $r1,%rax
35116 ++ call __poly1305_block # r^3
35117 ++
35118 ++ mov \$0x3ffffff,%eax # save r^3 base 2^26
35119 ++ mov $h0,$d1
35120 ++ and $h0#d,%eax
35121 ++ shr \$26,$d1
35122 ++ mov %eax,`16*0+12-64`($ctx)
35123 ++
35124 ++ mov \$0x3ffffff,%edx
35125 ++ and $d1#d,%edx
35126 ++ mov %edx,`16*1+12-64`($ctx)
35127 ++ lea (%rdx,%rdx,4),%edx # *5
35128 ++ shr \$26,$d1
35129 ++ mov %edx,`16*2+12-64`($ctx)
35130 ++
35131 ++ mov $h1,%rax
35132 ++ shl \$12,%rax
35133 ++ or $d1,%rax
35134 ++ and \$0x3ffffff,%eax
35135 ++ mov %eax,`16*3+12-64`($ctx)
35136 ++ lea (%rax,%rax,4),%eax # *5
35137 ++ mov $h1,$d1
35138 ++ mov %eax,`16*4+12-64`($ctx)
35139 ++
35140 ++ mov \$0x3ffffff,%edx
35141 ++ shr \$14,$d1
35142 ++ and $d1#d,%edx
35143 ++ mov %edx,`16*5+12-64`($ctx)
35144 ++ lea (%rdx,%rdx,4),%edx # *5
35145 ++ shr \$26,$d1
35146 ++ mov %edx,`16*6+12-64`($ctx)
35147 ++
35148 ++ mov $h2,%rax
35149 ++ shl \$24,%rax
35150 ++ or %rax,$d1
35151 ++ mov $d1#d,`16*7+12-64`($ctx)
35152 ++ lea ($d1,$d1,4),$d1 # *5
35153 ++ mov $d1#d,`16*8+12-64`($ctx)
35154 ++
35155 ++ mov $r1,%rax
35156 ++ call __poly1305_block # r^4
35157 ++
35158 ++ mov \$0x3ffffff,%eax # save r^4 base 2^26
35159 ++ mov $h0,$d1
35160 ++ and $h0#d,%eax
35161 ++ shr \$26,$d1
35162 ++ mov %eax,`16*0+8-64`($ctx)
35163 ++
35164 ++ mov \$0x3ffffff,%edx
35165 ++ and $d1#d,%edx
35166 ++ mov %edx,`16*1+8-64`($ctx)
35167 ++ lea (%rdx,%rdx,4),%edx # *5
35168 ++ shr \$26,$d1
35169 ++ mov %edx,`16*2+8-64`($ctx)
35170 ++
35171 ++ mov $h1,%rax
35172 ++ shl \$12,%rax
35173 ++ or $d1,%rax
35174 ++ and \$0x3ffffff,%eax
35175 ++ mov %eax,`16*3+8-64`($ctx)
35176 ++ lea (%rax,%rax,4),%eax # *5
35177 ++ mov $h1,$d1
35178 ++ mov %eax,`16*4+8-64`($ctx)
35179 ++
35180 ++ mov \$0x3ffffff,%edx
35181 ++ shr \$14,$d1
35182 ++ and $d1#d,%edx
35183 ++ mov %edx,`16*5+8-64`($ctx)
35184 ++ lea (%rdx,%rdx,4),%edx # *5
35185 ++ shr \$26,$d1
35186 ++ mov %edx,`16*6+8-64`($ctx)
35187 ++
35188 ++ mov $h2,%rax
35189 ++ shl \$24,%rax
35190 ++ or %rax,$d1
35191 ++ mov $d1#d,`16*7+8-64`($ctx)
35192 ++ lea ($d1,$d1,4),$d1 # *5
35193 ++ mov $d1#d,`16*8+8-64`($ctx)
35194 ++
35195 ++ lea -48-64($ctx),$ctx # size [de-]optimization
35196 ++ ret
35197 ++.size __poly1305_init_avx,.-__poly1305_init_avx
35198 ++
35199 ++.type poly1305_blocks_avx,\@function,4
35200 ++.align 32
35201 ++poly1305_blocks_avx:
35202 ++.cfi_startproc
35203 ++ mov 20($ctx),%r8d # is_base2_26
35204 ++ cmp \$128,$len
35205 ++ jae .Lblocks_avx
35206 ++ test %r8d,%r8d
35207 ++ jz .Lblocks
35208 ++
35209 ++.Lblocks_avx:
35210 ++ and \$-16,$len
35211 ++ jz .Lno_data_avx
35212 ++
35213 ++ vzeroupper
35214 ++
35215 ++ test %r8d,%r8d
35216 ++ jz .Lbase2_64_avx
35217 ++
35218 ++ test \$31,$len
35219 ++ jz .Leven_avx
35220 ++
35221 ++ push %rbx
35222 ++.cfi_push %rbx
35223 ++ push %rbp
35224 ++.cfi_push %rbp
35225 ++ push %r12
35226 ++.cfi_push %r12
35227 ++ push %r13
35228 ++.cfi_push %r13
35229 ++ push %r14
35230 ++.cfi_push %r14
35231 ++ push %r15
35232 ++.cfi_push %r15
35233 ++.Lblocks_avx_body:
35234 ++
35235 ++ mov $len,%r15 # reassign $len
35236 ++
35237 ++ mov 0($ctx),$d1 # load hash value
35238 ++ mov 8($ctx),$d2
35239 ++ mov 16($ctx),$h2#d
35240 ++
35241 ++ mov 24($ctx),$r0 # load r
35242 ++ mov 32($ctx),$s1
35243 ++
35244 ++ ################################# base 2^26 -> base 2^64
35245 ++ mov $d1#d,$h0#d
35246 ++ and \$`-1*(1<<31)`,$d1
35247 ++ mov $d2,$r1 # borrow $r1
35248 ++ mov $d2#d,$h1#d
35249 ++ and \$`-1*(1<<31)`,$d2
35250 ++
35251 ++ shr \$6,$d1
35252 ++ shl \$52,$r1
35253 ++ add $d1,$h0
35254 ++ shr \$12,$h1
35255 ++ shr \$18,$d2
35256 ++ add $r1,$h0
35257 ++ adc $d2,$h1
35258 ++
35259 ++ mov $h2,$d1
35260 ++ shl \$40,$d1
35261 ++ shr \$24,$h2
35262 ++ add $d1,$h1
35263 ++ adc \$0,$h2 # can be partially reduced...
35264 ++
35265 ++ mov \$-4,$d2 # ... so reduce
35266 ++ mov $h2,$d1
35267 ++ and $h2,$d2
35268 ++ shr \$2,$d1
35269 ++ and \$3,$h2
35270 ++ add $d2,$d1 # =*5
35271 ++ add $d1,$h0
35272 ++ adc \$0,$h1
35273 ++ adc \$0,$h2
35274 ++
35275 ++ mov $s1,$r1
35276 ++ mov $s1,%rax
35277 ++ shr \$2,$s1
35278 ++ add $r1,$s1 # s1 = r1 + (r1 >> 2)
35279 ++
35280 ++ add 0($inp),$h0 # accumulate input
35281 ++ adc 8($inp),$h1
35282 ++ lea 16($inp),$inp
35283 ++ adc $padbit,$h2
35284 ++
35285 ++ call __poly1305_block
35286 ++
35287 ++ test $padbit,$padbit # if $padbit is zero,
35288 ++ jz .Lstore_base2_64_avx # store hash in base 2^64 format
35289 ++
35290 ++ ################################# base 2^64 -> base 2^26
35291 ++ mov $h0,%rax
35292 ++ mov $h0,%rdx
35293 ++ shr \$52,$h0
35294 ++ mov $h1,$r0
35295 ++ mov $h1,$r1
35296 ++ shr \$26,%rdx
35297 ++ and \$0x3ffffff,%rax # h[0]
35298 ++ shl \$12,$r0
35299 ++ and \$0x3ffffff,%rdx # h[1]
35300 ++ shr \$14,$h1
35301 ++ or $r0,$h0
35302 ++ shl \$24,$h2
35303 ++ and \$0x3ffffff,$h0 # h[2]
35304 ++ shr \$40,$r1
35305 ++ and \$0x3ffffff,$h1 # h[3]
35306 ++ or $r1,$h2 # h[4]
35307 ++
35308 ++ sub \$16,%r15
35309 ++ jz .Lstore_base2_26_avx
35310 ++
35311 ++ vmovd %rax#d,$H0
35312 ++ vmovd %rdx#d,$H1
35313 ++ vmovd $h0#d,$H2
35314 ++ vmovd $h1#d,$H3
35315 ++ vmovd $h2#d,$H4
35316 ++ jmp .Lproceed_avx
35317 ++
35318 ++.align 32
35319 ++.Lstore_base2_64_avx:
35320 ++ mov $h0,0($ctx)
35321 ++ mov $h1,8($ctx)
35322 ++ mov $h2,16($ctx) # note that is_base2_26 is zeroed
35323 ++ jmp .Ldone_avx
35324 ++
35325 ++.align 16
35326 ++.Lstore_base2_26_avx:
35327 ++ mov %rax#d,0($ctx) # store hash value base 2^26
35328 ++ mov %rdx#d,4($ctx)
35329 ++ mov $h0#d,8($ctx)
35330 ++ mov $h1#d,12($ctx)
35331 ++ mov $h2#d,16($ctx)
35332 ++.align 16
35333 ++.Ldone_avx:
35334 ++ mov 0(%rsp),%r15
35335 ++.cfi_restore %r15
35336 ++ mov 8(%rsp),%r14
35337 ++.cfi_restore %r14
35338 ++ mov 16(%rsp),%r13
35339 ++.cfi_restore %r13
35340 ++ mov 24(%rsp),%r12
35341 ++.cfi_restore %r12
35342 ++ mov 32(%rsp),%rbp
35343 ++.cfi_restore %rbp
35344 ++ mov 40(%rsp),%rbx
35345 ++.cfi_restore %rbx
35346 ++ lea 48(%rsp),%rsp
35347 ++.cfi_adjust_cfa_offset -48
35348 ++.Lno_data_avx:
35349 ++.Lblocks_avx_epilogue:
35350 ++ ret
35351 ++.cfi_endproc
35352 ++
35353 ++.align 32
35354 ++.Lbase2_64_avx:
35355 ++.cfi_startproc
35356 ++ push %rbx
35357 ++.cfi_push %rbx
35358 ++ push %rbp
35359 ++.cfi_push %rbp
35360 ++ push %r12
35361 ++.cfi_push %r12
35362 ++ push %r13
35363 ++.cfi_push %r13
35364 ++ push %r14
35365 ++.cfi_push %r14
35366 ++ push %r15
35367 ++.cfi_push %r15
35368 ++.Lbase2_64_avx_body:
35369 ++
35370 ++ mov $len,%r15 # reassign $len
35371 ++
35372 ++ mov 24($ctx),$r0 # load r
35373 ++ mov 32($ctx),$s1
35374 ++
35375 ++ mov 0($ctx),$h0 # load hash value
35376 ++ mov 8($ctx),$h1
35377 ++ mov 16($ctx),$h2#d
35378 ++
35379 ++ mov $s1,$r1
35380 ++ mov $s1,%rax
35381 ++ shr \$2,$s1
35382 ++ add $r1,$s1 # s1 = r1 + (r1 >> 2)
35383 ++
35384 ++ test \$31,$len
35385 ++ jz .Linit_avx
35386 ++
35387 ++ add 0($inp),$h0 # accumulate input
35388 ++ adc 8($inp),$h1
35389 ++ lea 16($inp),$inp
35390 ++ adc $padbit,$h2
35391 ++ sub \$16,%r15
35392 ++
35393 ++ call __poly1305_block
35394 ++
35395 ++.Linit_avx:
35396 ++ ################################# base 2^64 -> base 2^26
35397 ++ mov $h0,%rax
35398 ++ mov $h0,%rdx
35399 ++ shr \$52,$h0
35400 ++ mov $h1,$d1
35401 ++ mov $h1,$d2
35402 ++ shr \$26,%rdx
35403 ++ and \$0x3ffffff,%rax # h[0]
35404 ++ shl \$12,$d1
35405 ++ and \$0x3ffffff,%rdx # h[1]
35406 ++ shr \$14,$h1
35407 ++ or $d1,$h0
35408 ++ shl \$24,$h2
35409 ++ and \$0x3ffffff,$h0 # h[2]
35410 ++ shr \$40,$d2
35411 ++ and \$0x3ffffff,$h1 # h[3]
35412 ++ or $d2,$h2 # h[4]
35413 ++
35414 ++ vmovd %rax#d,$H0
35415 ++ vmovd %rdx#d,$H1
35416 ++ vmovd $h0#d,$H2
35417 ++ vmovd $h1#d,$H3
35418 ++ vmovd $h2#d,$H4
35419 ++ movl \$1,20($ctx) # set is_base2_26
35420 ++
35421 ++ call __poly1305_init_avx
35422 ++
35423 ++.Lproceed_avx:
35424 ++ mov %r15,$len
35425 ++
35426 ++ mov 0(%rsp),%r15
35427 ++.cfi_restore %r15
35428 ++ mov 8(%rsp),%r14
35429 ++.cfi_restore %r14
35430 ++ mov 16(%rsp),%r13
35431 ++.cfi_restore %r13
35432 ++ mov 24(%rsp),%r12
35433 ++.cfi_restore %r12
35434 ++ mov 32(%rsp),%rbp
35435 ++.cfi_restore %rbp
35436 ++ mov 40(%rsp),%rbx
35437 ++.cfi_restore %rbx
35438 ++ lea 48(%rsp),%rax
35439 ++ lea 48(%rsp),%rsp
35440 ++.cfi_adjust_cfa_offset -48
35441 ++.Lbase2_64_avx_epilogue:
35442 ++ jmp .Ldo_avx
35443 ++.cfi_endproc
35444 ++
35445 ++.align 32
35446 ++.Leven_avx:
35447 ++.cfi_startproc
35448 ++ vmovd 4*0($ctx),$H0 # load hash value
35449 ++ vmovd 4*1($ctx),$H1
35450 ++ vmovd 4*2($ctx),$H2
35451 ++ vmovd 4*3($ctx),$H3
35452 ++ vmovd 4*4($ctx),$H4
35453 ++
35454 ++.Ldo_avx:
35455 ++___
35456 ++$code.=<<___ if (!$win64);
35457 ++ lea -0x58(%rsp),%r11
35458 ++.cfi_def_cfa %r11,0x60
35459 ++ sub \$0x178,%rsp
35460 ++___
35461 ++$code.=<<___ if ($win64);
35462 ++ lea -0xf8(%rsp),%r11
35463 ++ sub \$0x218,%rsp
35464 ++ vmovdqa %xmm6,0x50(%r11)
35465 ++ vmovdqa %xmm7,0x60(%r11)
35466 ++ vmovdqa %xmm8,0x70(%r11)
35467 ++ vmovdqa %xmm9,0x80(%r11)
35468 ++ vmovdqa %xmm10,0x90(%r11)
35469 ++ vmovdqa %xmm11,0xa0(%r11)
35470 ++ vmovdqa %xmm12,0xb0(%r11)
35471 ++ vmovdqa %xmm13,0xc0(%r11)
35472 ++ vmovdqa %xmm14,0xd0(%r11)
35473 ++ vmovdqa %xmm15,0xe0(%r11)
35474 ++.Ldo_avx_body:
35475 ++___
35476 ++$code.=<<___;
35477 ++ sub \$64,$len
35478 ++ lea -32($inp),%rax
35479 ++ cmovc %rax,$inp
35480 ++
35481 ++ vmovdqu `16*3`($ctx),$D4 # preload r0^2
35482 ++ lea `16*3+64`($ctx),$ctx # size optimization
35483 ++ lea .Lconst(%rip),%rcx
35484 ++
35485 ++ ################################################################
35486 ++ # load input
35487 ++ vmovdqu 16*2($inp),$T0
35488 ++ vmovdqu 16*3($inp),$T1
35489 ++ vmovdqa 64(%rcx),$MASK # .Lmask26
35490 ++
35491 ++ vpsrldq \$6,$T0,$T2 # splat input
35492 ++ vpsrldq \$6,$T1,$T3
35493 ++ vpunpckhqdq $T1,$T0,$T4 # 4
35494 ++ vpunpcklqdq $T1,$T0,$T0 # 0:1
35495 ++ vpunpcklqdq $T3,$T2,$T3 # 2:3
35496 ++
35497 ++ vpsrlq \$40,$T4,$T4 # 4
35498 ++ vpsrlq \$26,$T0,$T1
35499 ++ vpand $MASK,$T0,$T0 # 0
35500 ++ vpsrlq \$4,$T3,$T2
35501 ++ vpand $MASK,$T1,$T1 # 1
35502 ++ vpsrlq \$30,$T3,$T3
35503 ++ vpand $MASK,$T2,$T2 # 2
35504 ++ vpand $MASK,$T3,$T3 # 3
35505 ++ vpor 32(%rcx),$T4,$T4 # padbit, yes, always
35506 ++
35507 ++ jbe .Lskip_loop_avx
35508 ++
35509 ++ # expand and copy pre-calculated table to stack
35510 ++ vmovdqu `16*1-64`($ctx),$D1
35511 ++ vmovdqu `16*2-64`($ctx),$D2
35512 ++ vpshufd \$0xEE,$D4,$D3 # 34xx -> 3434
35513 ++ vpshufd \$0x44,$D4,$D0 # xx12 -> 1212
35514 ++ vmovdqa $D3,-0x90(%r11)
35515 ++ vmovdqa $D0,0x00(%rsp)
35516 ++ vpshufd \$0xEE,$D1,$D4
35517 ++ vmovdqu `16*3-64`($ctx),$D0
35518 ++ vpshufd \$0x44,$D1,$D1
35519 ++ vmovdqa $D4,-0x80(%r11)
35520 ++ vmovdqa $D1,0x10(%rsp)
35521 ++ vpshufd \$0xEE,$D2,$D3
35522 ++ vmovdqu `16*4-64`($ctx),$D1
35523 ++ vpshufd \$0x44,$D2,$D2
35524 ++ vmovdqa $D3,-0x70(%r11)
35525 ++ vmovdqa $D2,0x20(%rsp)
35526 ++ vpshufd \$0xEE,$D0,$D4
35527 ++ vmovdqu `16*5-64`($ctx),$D2
35528 ++ vpshufd \$0x44,$D0,$D0
35529 ++ vmovdqa $D4,-0x60(%r11)
35530 ++ vmovdqa $D0,0x30(%rsp)
35531 ++ vpshufd \$0xEE,$D1,$D3
35532 ++ vmovdqu `16*6-64`($ctx),$D0
35533 ++ vpshufd \$0x44,$D1,$D1
35534 ++ vmovdqa $D3,-0x50(%r11)
35535 ++ vmovdqa $D1,0x40(%rsp)
35536 ++ vpshufd \$0xEE,$D2,$D4
35537 ++ vmovdqu `16*7-64`($ctx),$D1
35538 ++ vpshufd \$0x44,$D2,$D2
35539 ++ vmovdqa $D4,-0x40(%r11)
35540 ++ vmovdqa $D2,0x50(%rsp)
35541 ++ vpshufd \$0xEE,$D0,$D3
35542 ++ vmovdqu `16*8-64`($ctx),$D2
35543 ++ vpshufd \$0x44,$D0,$D0
35544 ++ vmovdqa $D3,-0x30(%r11)
35545 ++ vmovdqa $D0,0x60(%rsp)
35546 ++ vpshufd \$0xEE,$D1,$D4
35547 ++ vpshufd \$0x44,$D1,$D1
35548 ++ vmovdqa $D4,-0x20(%r11)
35549 ++ vmovdqa $D1,0x70(%rsp)
35550 ++ vpshufd \$0xEE,$D2,$D3
35551 ++ vmovdqa 0x00(%rsp),$D4 # preload r0^2
35552 ++ vpshufd \$0x44,$D2,$D2
35553 ++ vmovdqa $D3,-0x10(%r11)
35554 ++ vmovdqa $D2,0x80(%rsp)
35555 ++
35556 ++ jmp .Loop_avx
35557 ++
35558 ++.align 32
35559 ++.Loop_avx:
35560 ++ ################################################################
35561 ++ # ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
35562 ++ # ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
35563 ++ # \___________________/
35564 ++ # ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
35565 ++ # ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
35566 ++ # \___________________/ \____________________/
35567 ++ #
35568 ++ # Note that we start with inp[2:3]*r^2. This is because it
35569 ++ # doesn't depend on reduction in previous iteration.
35570 ++ ################################################################
35571 ++ # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
35572 ++ # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
35573 ++ # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
35574 ++ # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
35575 ++ # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
35576 ++ #
35577 ++ # though note that $Tx and $Hx are "reversed" in this section,
35578 ++ # and $D4 is preloaded with r0^2...
35579 ++
35580 ++ vpmuludq $T0,$D4,$D0 # d0 = h0*r0
35581 ++ vpmuludq $T1,$D4,$D1 # d1 = h1*r0
35582 ++ vmovdqa $H2,0x20(%r11) # offload hash
35583 ++ vpmuludq $T2,$D4,$D2 # d3 = h2*r0
35584 ++ vmovdqa 0x10(%rsp),$H2 # r1^2
35585 ++ vpmuludq $T3,$D4,$D3 # d3 = h3*r0
35586 ++ vpmuludq $T4,$D4,$D4 # d4 = h4*r0
35587 ++
35588 ++ vmovdqa $H0,0x00(%r11) #
35589 ++ vpmuludq 0x20(%rsp),$T4,$H0 # h4*s1
35590 ++ vmovdqa $H1,0x10(%r11) #
35591 ++ vpmuludq $T3,$H2,$H1 # h3*r1
35592 ++ vpaddq $H0,$D0,$D0 # d0 += h4*s1
35593 ++ vpaddq $H1,$D4,$D4 # d4 += h3*r1
35594 ++ vmovdqa $H3,0x30(%r11) #
35595 ++ vpmuludq $T2,$H2,$H0 # h2*r1
35596 ++ vpmuludq $T1,$H2,$H1 # h1*r1
35597 ++ vpaddq $H0,$D3,$D3 # d3 += h2*r1
35598 ++ vmovdqa 0x30(%rsp),$H3 # r2^2
35599 ++ vpaddq $H1,$D2,$D2 # d2 += h1*r1
35600 ++ vmovdqa $H4,0x40(%r11) #
35601 ++ vpmuludq $T0,$H2,$H2 # h0*r1
35602 ++ vpmuludq $T2,$H3,$H0 # h2*r2
35603 ++ vpaddq $H2,$D1,$D1 # d1 += h0*r1
35604 ++
35605 ++ vmovdqa 0x40(%rsp),$H4 # s2^2
35606 ++ vpaddq $H0,$D4,$D4 # d4 += h2*r2
35607 ++ vpmuludq $T1,$H3,$H1 # h1*r2
35608 ++ vpmuludq $T0,$H3,$H3 # h0*r2
35609 ++ vpaddq $H1,$D3,$D3 # d3 += h1*r2
35610 ++ vmovdqa 0x50(%rsp),$H2 # r3^2
35611 ++ vpaddq $H3,$D2,$D2 # d2 += h0*r2
35612 ++ vpmuludq $T4,$H4,$H0 # h4*s2
35613 ++ vpmuludq $T3,$H4,$H4 # h3*s2
35614 ++ vpaddq $H0,$D1,$D1 # d1 += h4*s2
35615 ++ vmovdqa 0x60(%rsp),$H3 # s3^2
35616 ++ vpaddq $H4,$D0,$D0 # d0 += h3*s2
35617 ++
35618 ++ vmovdqa 0x80(%rsp),$H4 # s4^2
35619 ++ vpmuludq $T1,$H2,$H1 # h1*r3
35620 ++ vpmuludq $T0,$H2,$H2 # h0*r3
35621 ++ vpaddq $H1,$D4,$D4 # d4 += h1*r3
35622 ++ vpaddq $H2,$D3,$D3 # d3 += h0*r3
35623 ++ vpmuludq $T4,$H3,$H0 # h4*s3
35624 ++ vpmuludq $T3,$H3,$H1 # h3*s3
35625 ++ vpaddq $H0,$D2,$D2 # d2 += h4*s3
35626 ++ vmovdqu 16*0($inp),$H0 # load input
35627 ++ vpaddq $H1,$D1,$D1 # d1 += h3*s3
35628 ++ vpmuludq $T2,$H3,$H3 # h2*s3
35629 ++ vpmuludq $T2,$H4,$T2 # h2*s4
35630 ++ vpaddq $H3,$D0,$D0 # d0 += h2*s3
35631 ++
35632 ++ vmovdqu 16*1($inp),$H1 #
35633 ++ vpaddq $T2,$D1,$D1 # d1 += h2*s4
35634 ++ vpmuludq $T3,$H4,$T3 # h3*s4
35635 ++ vpmuludq $T4,$H4,$T4 # h4*s4
35636 ++ vpsrldq \$6,$H0,$H2 # splat input
35637 ++ vpaddq $T3,$D2,$D2 # d2 += h3*s4
35638 ++ vpaddq $T4,$D3,$D3 # d3 += h4*s4
35639 ++ vpsrldq \$6,$H1,$H3 #
35640 ++ vpmuludq 0x70(%rsp),$T0,$T4 # h0*r4
35641 ++ vpmuludq $T1,$H4,$T0 # h1*s4
35642 ++ vpunpckhqdq $H1,$H0,$H4 # 4
35643 ++ vpaddq $T4,$D4,$D4 # d4 += h0*r4
35644 ++ vmovdqa -0x90(%r11),$T4 # r0^4
35645 ++ vpaddq $T0,$D0,$D0 # d0 += h1*s4
35646 ++
35647 ++ vpunpcklqdq $H1,$H0,$H0 # 0:1
35648 ++ vpunpcklqdq $H3,$H2,$H3 # 2:3
35649 ++
35650 ++ #vpsrlq \$40,$H4,$H4 # 4
35651 ++ vpsrldq \$`40/8`,$H4,$H4 # 4
35652 ++ vpsrlq \$26,$H0,$H1
35653 ++ vpand $MASK,$H0,$H0 # 0
35654 ++ vpsrlq \$4,$H3,$H2
35655 ++ vpand $MASK,$H1,$H1 # 1
35656 ++ vpand 0(%rcx),$H4,$H4 # .Lmask24
35657 ++ vpsrlq \$30,$H3,$H3
35658 ++ vpand $MASK,$H2,$H2 # 2
35659 ++ vpand $MASK,$H3,$H3 # 3
35660 ++ vpor 32(%rcx),$H4,$H4 # padbit, yes, always
35661 ++
35662 ++ vpaddq 0x00(%r11),$H0,$H0 # add hash value
35663 ++ vpaddq 0x10(%r11),$H1,$H1
35664 ++ vpaddq 0x20(%r11),$H2,$H2
35665 ++ vpaddq 0x30(%r11),$H3,$H3
35666 ++ vpaddq 0x40(%r11),$H4,$H4
35667 ++
35668 ++ lea 16*2($inp),%rax
35669 ++ lea 16*4($inp),$inp
35670 ++ sub \$64,$len
35671 ++ cmovc %rax,$inp
35672 ++
35673 ++ ################################################################
35674 ++ # Now we accumulate (inp[0:1]+hash)*r^4
35675 ++ ################################################################
35676 ++ # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
35677 ++ # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
35678 ++ # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
35679 ++ # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
35680 ++ # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
35681 ++
35682 ++ vpmuludq $H0,$T4,$T0 # h0*r0
35683 ++ vpmuludq $H1,$T4,$T1 # h1*r0
35684 ++ vpaddq $T0,$D0,$D0
35685 ++ vpaddq $T1,$D1,$D1
35686 ++ vmovdqa -0x80(%r11),$T2 # r1^4
35687 ++ vpmuludq $H2,$T4,$T0 # h2*r0
35688 ++ vpmuludq $H3,$T4,$T1 # h3*r0
35689 ++ vpaddq $T0,$D2,$D2
35690 ++ vpaddq $T1,$D3,$D3
35691 ++ vpmuludq $H4,$T4,$T4 # h4*r0
35692 ++ vpmuludq -0x70(%r11),$H4,$T0 # h4*s1
35693 ++ vpaddq $T4,$D4,$D4
35694 ++
35695 ++ vpaddq $T0,$D0,$D0 # d0 += h4*s1
35696 ++ vpmuludq $H2,$T2,$T1 # h2*r1
35697 ++ vpmuludq $H3,$T2,$T0 # h3*r1
35698 ++ vpaddq $T1,$D3,$D3 # d3 += h2*r1
35699 ++ vmovdqa -0x60(%r11),$T3 # r2^4
35700 ++ vpaddq $T0,$D4,$D4 # d4 += h3*r1
35701 ++ vpmuludq $H1,$T2,$T1 # h1*r1
35702 ++ vpmuludq $H0,$T2,$T2 # h0*r1
35703 ++ vpaddq $T1,$D2,$D2 # d2 += h1*r1
35704 ++ vpaddq $T2,$D1,$D1 # d1 += h0*r1
35705 ++
35706 ++ vmovdqa -0x50(%r11),$T4 # s2^4
35707 ++ vpmuludq $H2,$T3,$T0 # h2*r2
35708 ++ vpmuludq $H1,$T3,$T1 # h1*r2
35709 ++ vpaddq $T0,$D4,$D4 # d4 += h2*r2
35710 ++ vpaddq $T1,$D3,$D3 # d3 += h1*r2
35711 ++ vmovdqa -0x40(%r11),$T2 # r3^4
35712 ++ vpmuludq $H0,$T3,$T3 # h0*r2
35713 ++ vpmuludq $H4,$T4,$T0 # h4*s2
35714 ++ vpaddq $T3,$D2,$D2 # d2 += h0*r2
35715 ++ vpaddq $T0,$D1,$D1 # d1 += h4*s2
35716 ++ vmovdqa -0x30(%r11),$T3 # s3^4
35717 ++ vpmuludq $H3,$T4,$T4 # h3*s2
35718 ++ vpmuludq $H1,$T2,$T1 # h1*r3
35719 ++ vpaddq $T4,$D0,$D0 # d0 += h3*s2
35720 ++
35721 ++ vmovdqa -0x10(%r11),$T4 # s4^4
35722 ++ vpaddq $T1,$D4,$D4 # d4 += h1*r3
35723 ++ vpmuludq $H0,$T2,$T2 # h0*r3
35724 ++ vpmuludq $H4,$T3,$T0 # h4*s3
35725 ++ vpaddq $T2,$D3,$D3 # d3 += h0*r3
35726 ++ vpaddq $T0,$D2,$D2 # d2 += h4*s3
35727 ++ vmovdqu 16*2($inp),$T0 # load input
35728 ++ vpmuludq $H3,$T3,$T2 # h3*s3
35729 ++ vpmuludq $H2,$T3,$T3 # h2*s3
35730 ++ vpaddq $T2,$D1,$D1 # d1 += h3*s3
35731 ++ vmovdqu 16*3($inp),$T1 #
35732 ++ vpaddq $T3,$D0,$D0 # d0 += h2*s3
35733 ++
35734 ++ vpmuludq $H2,$T4,$H2 # h2*s4
35735 ++ vpmuludq $H3,$T4,$H3 # h3*s4
35736 ++ vpsrldq \$6,$T0,$T2 # splat input
35737 ++ vpaddq $H2,$D1,$D1 # d1 += h2*s4
35738 ++ vpmuludq $H4,$T4,$H4 # h4*s4
35739 ++ vpsrldq \$6,$T1,$T3 #
35740 ++ vpaddq $H3,$D2,$H2 # h2 = d2 + h3*s4
35741 ++ vpaddq $H4,$D3,$H3 # h3 = d3 + h4*s4
35742 ++ vpmuludq -0x20(%r11),$H0,$H4 # h0*r4
35743 ++ vpmuludq $H1,$T4,$H0
35744 ++ vpunpckhqdq $T1,$T0,$T4 # 4
35745 ++ vpaddq $H4,$D4,$H4 # h4 = d4 + h0*r4
35746 ++ vpaddq $H0,$D0,$H0 # h0 = d0 + h1*s4
35747 ++
35748 ++ vpunpcklqdq $T1,$T0,$T0 # 0:1
35749 ++ vpunpcklqdq $T3,$T2,$T3 # 2:3
35750 ++
35751 ++ #vpsrlq \$40,$T4,$T4 # 4
35752 ++ vpsrldq \$`40/8`,$T4,$T4 # 4
35753 ++ vpsrlq \$26,$T0,$T1
35754 ++ vmovdqa 0x00(%rsp),$D4 # preload r0^2
35755 ++ vpand $MASK,$T0,$T0 # 0
35756 ++ vpsrlq \$4,$T3,$T2
35757 ++ vpand $MASK,$T1,$T1 # 1
35758 ++ vpand 0(%rcx),$T4,$T4 # .Lmask24
35759 ++ vpsrlq \$30,$T3,$T3
35760 ++ vpand $MASK,$T2,$T2 # 2
35761 ++ vpand $MASK,$T3,$T3 # 3
35762 ++ vpor 32(%rcx),$T4,$T4 # padbit, yes, always
35763 ++
35764 ++ ################################################################
35765 ++ # lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
35766 ++ # and P. Schwabe
35767 ++
35768 ++ vpsrlq \$26,$H3,$D3
35769 ++ vpand $MASK,$H3,$H3
35770 ++ vpaddq $D3,$H4,$H4 # h3 -> h4
35771 ++
35772 ++ vpsrlq \$26,$H0,$D0
35773 ++ vpand $MASK,$H0,$H0
35774 ++ vpaddq $D0,$D1,$H1 # h0 -> h1
35775 ++
35776 ++ vpsrlq \$26,$H4,$D0
35777 ++ vpand $MASK,$H4,$H4
35778 ++
35779 ++ vpsrlq \$26,$H1,$D1
35780 ++ vpand $MASK,$H1,$H1
35781 ++ vpaddq $D1,$H2,$H2 # h1 -> h2
35782 ++
35783 ++ vpaddq $D0,$H0,$H0
35784 ++ vpsllq \$2,$D0,$D0
35785 ++ vpaddq $D0,$H0,$H0 # h4 -> h0
35786 ++
35787 ++ vpsrlq \$26,$H2,$D2
35788 ++ vpand $MASK,$H2,$H2
35789 ++ vpaddq $D2,$H3,$H3 # h2 -> h3
35790 ++
35791 ++ vpsrlq \$26,$H0,$D0
35792 ++ vpand $MASK,$H0,$H0
35793 ++ vpaddq $D0,$H1,$H1 # h0 -> h1
35794 ++
35795 ++ vpsrlq \$26,$H3,$D3
35796 ++ vpand $MASK,$H3,$H3
35797 ++ vpaddq $D3,$H4,$H4 # h3 -> h4
35798 ++
35799 ++ ja .Loop_avx
35800 ++
35801 ++.Lskip_loop_avx:
35802 ++ ################################################################
35803 ++ # multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
35804 ++
35805 ++ vpshufd \$0x10,$D4,$D4 # r0^n, xx12 -> x1x2
35806 ++ add \$32,$len
35807 ++ jnz .Long_tail_avx
35808 ++
35809 ++ vpaddq $H2,$T2,$T2
35810 ++ vpaddq $H0,$T0,$T0
35811 ++ vpaddq $H1,$T1,$T1
35812 ++ vpaddq $H3,$T3,$T3
35813 ++ vpaddq $H4,$T4,$T4
35814 ++
35815 ++.Long_tail_avx:
35816 ++ vmovdqa $H2,0x20(%r11)
35817 ++ vmovdqa $H0,0x00(%r11)
35818 ++ vmovdqa $H1,0x10(%r11)
35819 ++ vmovdqa $H3,0x30(%r11)
35820 ++ vmovdqa $H4,0x40(%r11)
35821 ++
35822 ++ # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
35823 ++ # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
35824 ++ # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
35825 ++ # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
35826 ++ # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
35827 ++
35828 ++ vpmuludq $T2,$D4,$D2 # d2 = h2*r0
35829 ++ vpmuludq $T0,$D4,$D0 # d0 = h0*r0
35830 ++ vpshufd \$0x10,`16*1-64`($ctx),$H2 # r1^n
35831 ++ vpmuludq $T1,$D4,$D1 # d1 = h1*r0
35832 ++ vpmuludq $T3,$D4,$D3 # d3 = h3*r0
35833 ++ vpmuludq $T4,$D4,$D4 # d4 = h4*r0
35834 ++
35835 ++ vpmuludq $T3,$H2,$H0 # h3*r1
35836 ++ vpaddq $H0,$D4,$D4 # d4 += h3*r1
35837 ++ vpshufd \$0x10,`16*2-64`($ctx),$H3 # s1^n
35838 ++ vpmuludq $T2,$H2,$H1 # h2*r1
35839 ++ vpaddq $H1,$D3,$D3 # d3 += h2*r1
35840 ++ vpshufd \$0x10,`16*3-64`($ctx),$H4 # r2^n
35841 ++ vpmuludq $T1,$H2,$H0 # h1*r1
35842 ++ vpaddq $H0,$D2,$D2 # d2 += h1*r1
35843 ++ vpmuludq $T0,$H2,$H2 # h0*r1
35844 ++ vpaddq $H2,$D1,$D1 # d1 += h0*r1
35845 ++ vpmuludq $T4,$H3,$H3 # h4*s1
35846 ++ vpaddq $H3,$D0,$D0 # d0 += h4*s1
35847 ++
35848 ++ vpshufd \$0x10,`16*4-64`($ctx),$H2 # s2^n
35849 ++ vpmuludq $T2,$H4,$H1 # h2*r2
35850 ++ vpaddq $H1,$D4,$D4 # d4 += h2*r2
35851 ++ vpmuludq $T1,$H4,$H0 # h1*r2
35852 ++ vpaddq $H0,$D3,$D3 # d3 += h1*r2
35853 ++ vpshufd \$0x10,`16*5-64`($ctx),$H3 # r3^n
35854 ++ vpmuludq $T0,$H4,$H4 # h0*r2
35855 ++ vpaddq $H4,$D2,$D2 # d2 += h0*r2
35856 ++ vpmuludq $T4,$H2,$H1 # h4*s2
35857 ++ vpaddq $H1,$D1,$D1 # d1 += h4*s2
35858 ++ vpshufd \$0x10,`16*6-64`($ctx),$H4 # s3^n
35859 ++ vpmuludq $T3,$H2,$H2 # h3*s2
35860 ++ vpaddq $H2,$D0,$D0 # d0 += h3*s2
35861 ++
35862 ++ vpmuludq $T1,$H3,$H0 # h1*r3
35863 ++ vpaddq $H0,$D4,$D4 # d4 += h1*r3
35864 ++ vpmuludq $T0,$H3,$H3 # h0*r3
35865 ++ vpaddq $H3,$D3,$D3 # d3 += h0*r3
35866 ++ vpshufd \$0x10,`16*7-64`($ctx),$H2 # r4^n
35867 ++ vpmuludq $T4,$H4,$H1 # h4*s3
35868 ++ vpaddq $H1,$D2,$D2 # d2 += h4*s3
35869 ++ vpshufd \$0x10,`16*8-64`($ctx),$H3 # s4^n
35870 ++ vpmuludq $T3,$H4,$H0 # h3*s3
35871 ++ vpaddq $H0,$D1,$D1 # d1 += h3*s3
35872 ++ vpmuludq $T2,$H4,$H4 # h2*s3
35873 ++ vpaddq $H4,$D0,$D0 # d0 += h2*s3
35874 ++
35875 ++ vpmuludq $T0,$H2,$H2 # h0*r4
35876 ++ vpaddq $H2,$D4,$D4 # h4 = d4 + h0*r4
35877 ++ vpmuludq $T4,$H3,$H1 # h4*s4
35878 ++ vpaddq $H1,$D3,$D3 # h3 = d3 + h4*s4
35879 ++ vpmuludq $T3,$H3,$H0 # h3*s4
35880 ++ vpaddq $H0,$D2,$D2 # h2 = d2 + h3*s4
35881 ++ vpmuludq $T2,$H3,$H1 # h2*s4
35882 ++ vpaddq $H1,$D1,$D1 # h1 = d1 + h2*s4
35883 ++ vpmuludq $T1,$H3,$H3 # h1*s4
35884 ++ vpaddq $H3,$D0,$D0 # h0 = d0 + h1*s4
35885 ++
35886 ++ jz .Lshort_tail_avx
35887 ++
35888 ++ vmovdqu 16*0($inp),$H0 # load input
35889 ++ vmovdqu 16*1($inp),$H1
35890 ++
35891 ++ vpsrldq \$6,$H0,$H2 # splat input
35892 ++ vpsrldq \$6,$H1,$H3
35893 ++ vpunpckhqdq $H1,$H0,$H4 # 4
35894 ++ vpunpcklqdq $H1,$H0,$H0 # 0:1
35895 ++ vpunpcklqdq $H3,$H2,$H3 # 2:3
35896 ++
35897 ++ vpsrlq \$40,$H4,$H4 # 4
35898 ++ vpsrlq \$26,$H0,$H1
35899 ++ vpand $MASK,$H0,$H0 # 0
35900 ++ vpsrlq \$4,$H3,$H2
35901 ++ vpand $MASK,$H1,$H1 # 1
35902 ++ vpsrlq \$30,$H3,$H3
35903 ++ vpand $MASK,$H2,$H2 # 2
35904 ++ vpand $MASK,$H3,$H3 # 3
35905 ++ vpor 32(%rcx),$H4,$H4 # padbit, yes, always
35906 ++
35907 ++ vpshufd \$0x32,`16*0-64`($ctx),$T4 # r0^n, 34xx -> x3x4
35908 ++ vpaddq 0x00(%r11),$H0,$H0
35909 ++ vpaddq 0x10(%r11),$H1,$H1
35910 ++ vpaddq 0x20(%r11),$H2,$H2
35911 ++ vpaddq 0x30(%r11),$H3,$H3
35912 ++ vpaddq 0x40(%r11),$H4,$H4
35913 ++
35914 ++ ################################################################
35915 ++ # multiply (inp[0:1]+hash) by r^4:r^3 and accumulate
35916 ++
35917 ++ vpmuludq $H0,$T4,$T0 # h0*r0
35918 ++ vpaddq $T0,$D0,$D0 # d0 += h0*r0
35919 ++ vpmuludq $H1,$T4,$T1 # h1*r0
35920 ++ vpaddq $T1,$D1,$D1 # d1 += h1*r0
35921 ++ vpmuludq $H2,$T4,$T0 # h2*r0
35922 ++ vpaddq $T0,$D2,$D2 # d2 += h2*r0
35923 ++ vpshufd \$0x32,`16*1-64`($ctx),$T2 # r1^n
35924 ++ vpmuludq $H3,$T4,$T1 # h3*r0
35925 ++ vpaddq $T1,$D3,$D3 # d3 += h3*r0
35926 ++ vpmuludq $H4,$T4,$T4 # h4*r0
35927 ++ vpaddq $T4,$D4,$D4 # d4 += h4*r0
35928 ++
35929 ++ vpmuludq $H3,$T2,$T0 # h3*r1
35930 ++ vpaddq $T0,$D4,$D4 # d4 += h3*r1
35931 ++ vpshufd \$0x32,`16*2-64`($ctx),$T3 # s1
35932 ++ vpmuludq $H2,$T2,$T1 # h2*r1
35933 ++ vpaddq $T1,$D3,$D3 # d3 += h2*r1
35934 ++ vpshufd \$0x32,`16*3-64`($ctx),$T4 # r2
35935 ++ vpmuludq $H1,$T2,$T0 # h1*r1
35936 ++ vpaddq $T0,$D2,$D2 # d2 += h1*r1
35937 ++ vpmuludq $H0,$T2,$T2 # h0*r1
35938 ++ vpaddq $T2,$D1,$D1 # d1 += h0*r1
35939 ++ vpmuludq $H4,$T3,$T3 # h4*s1
35940 ++ vpaddq $T3,$D0,$D0 # d0 += h4*s1
35941 ++
35942 ++ vpshufd \$0x32,`16*4-64`($ctx),$T2 # s2
35943 ++ vpmuludq $H2,$T4,$T1 # h2*r2
35944 ++ vpaddq $T1,$D4,$D4 # d4 += h2*r2
35945 ++ vpmuludq $H1,$T4,$T0 # h1*r2
35946 ++ vpaddq $T0,$D3,$D3 # d3 += h1*r2
35947 ++ vpshufd \$0x32,`16*5-64`($ctx),$T3 # r3
35948 ++ vpmuludq $H0,$T4,$T4 # h0*r2
35949 ++ vpaddq $T4,$D2,$D2 # d2 += h0*r2
35950 ++ vpmuludq $H4,$T2,$T1 # h4*s2
35951 ++ vpaddq $T1,$D1,$D1 # d1 += h4*s2
35952 ++ vpshufd \$0x32,`16*6-64`($ctx),$T4 # s3
35953 ++ vpmuludq $H3,$T2,$T2 # h3*s2
35954 ++ vpaddq $T2,$D0,$D0 # d0 += h3*s2
35955 ++
35956 ++ vpmuludq $H1,$T3,$T0 # h1*r3
35957 ++ vpaddq $T0,$D4,$D4 # d4 += h1*r3
35958 ++ vpmuludq $H0,$T3,$T3 # h0*r3
35959 ++ vpaddq $T3,$D3,$D3 # d3 += h0*r3
35960 ++ vpshufd \$0x32,`16*7-64`($ctx),$T2 # r4
35961 ++ vpmuludq $H4,$T4,$T1 # h4*s3
35962 ++ vpaddq $T1,$D2,$D2 # d2 += h4*s3
35963 ++ vpshufd \$0x32,`16*8-64`($ctx),$T3 # s4
35964 ++ vpmuludq $H3,$T4,$T0 # h3*s3
35965 ++ vpaddq $T0,$D1,$D1 # d1 += h3*s3
35966 ++ vpmuludq $H2,$T4,$T4 # h2*s3
35967 ++ vpaddq $T4,$D0,$D0 # d0 += h2*s3
35968 ++
35969 ++ vpmuludq $H0,$T2,$T2 # h0*r4
35970 ++ vpaddq $T2,$D4,$D4 # d4 += h0*r4
35971 ++ vpmuludq $H4,$T3,$T1 # h4*s4
35972 ++ vpaddq $T1,$D3,$D3 # d3 += h4*s4
35973 ++ vpmuludq $H3,$T3,$T0 # h3*s4
35974 ++ vpaddq $T0,$D2,$D2 # d2 += h3*s4
35975 ++ vpmuludq $H2,$T3,$T1 # h2*s4
35976 ++ vpaddq $T1,$D1,$D1 # d1 += h2*s4
35977 ++ vpmuludq $H1,$T3,$T3 # h1*s4
35978 ++ vpaddq $T3,$D0,$D0 # d0 += h1*s4
35979 ++
35980 ++.Lshort_tail_avx:
35981 ++ ################################################################
35982 ++ # horizontal addition
35983 ++
35984 ++ vpsrldq \$8,$D4,$T4
35985 ++ vpsrldq \$8,$D3,$T3
35986 ++ vpsrldq \$8,$D1,$T1
35987 ++ vpsrldq \$8,$D0,$T0
35988 ++ vpsrldq \$8,$D2,$T2
35989 ++ vpaddq $T3,$D3,$D3
35990 ++ vpaddq $T4,$D4,$D4
35991 ++ vpaddq $T0,$D0,$D0
35992 ++ vpaddq $T1,$D1,$D1
35993 ++ vpaddq $T2,$D2,$D2
35994 ++
35995 ++ ################################################################
35996 ++ # lazy reduction
35997 ++
35998 ++ vpsrlq \$26,$D3,$H3
35999 ++ vpand $MASK,$D3,$D3
36000 ++ vpaddq $H3,$D4,$D4 # h3 -> h4
36001 ++
36002 ++ vpsrlq \$26,$D0,$H0
36003 ++ vpand $MASK,$D0,$D0
36004 ++ vpaddq $H0,$D1,$D1 # h0 -> h1
36005 ++
36006 ++ vpsrlq \$26,$D4,$H4
36007 ++ vpand $MASK,$D4,$D4
36008 ++
36009 ++ vpsrlq \$26,$D1,$H1
36010 ++ vpand $MASK,$D1,$D1
36011 ++ vpaddq $H1,$D2,$D2 # h1 -> h2
36012 ++
36013 ++ vpaddq $H4,$D0,$D0
36014 ++ vpsllq \$2,$H4,$H4
36015 ++ vpaddq $H4,$D0,$D0 # h4 -> h0
36016 ++
36017 ++ vpsrlq \$26,$D2,$H2
36018 ++ vpand $MASK,$D2,$D2
36019 ++ vpaddq $H2,$D3,$D3 # h2 -> h3
36020 ++
36021 ++ vpsrlq \$26,$D0,$H0
36022 ++ vpand $MASK,$D0,$D0
36023 ++ vpaddq $H0,$D1,$D1 # h0 -> h1
36024 ++
36025 ++ vpsrlq \$26,$D3,$H3
36026 ++ vpand $MASK,$D3,$D3
36027 ++ vpaddq $H3,$D4,$D4 # h3 -> h4
36028 ++
36029 ++ vmovd $D0,`4*0-48-64`($ctx) # save partially reduced
36030 ++ vmovd $D1,`4*1-48-64`($ctx)
36031 ++ vmovd $D2,`4*2-48-64`($ctx)
36032 ++ vmovd $D3,`4*3-48-64`($ctx)
36033 ++ vmovd $D4,`4*4-48-64`($ctx)
36034 ++___
36035 ++$code.=<<___ if ($win64);
36036 ++ vmovdqa 0x50(%r11),%xmm6
36037 ++ vmovdqa 0x60(%r11),%xmm7
36038 ++ vmovdqa 0x70(%r11),%xmm8
36039 ++ vmovdqa 0x80(%r11),%xmm9
36040 ++ vmovdqa 0x90(%r11),%xmm10
36041 ++ vmovdqa 0xa0(%r11),%xmm11
36042 ++ vmovdqa 0xb0(%r11),%xmm12
36043 ++ vmovdqa 0xc0(%r11),%xmm13
36044 ++ vmovdqa 0xd0(%r11),%xmm14
36045 ++ vmovdqa 0xe0(%r11),%xmm15
36046 ++ lea 0xf8(%r11),%rsp
36047 ++.Ldo_avx_epilogue:
36048 ++___
36049 ++$code.=<<___ if (!$win64);
36050 ++ lea 0x58(%r11),%rsp
36051 ++.cfi_def_cfa %rsp,8
36052 ++___
36053 ++$code.=<<___;
36054 ++ vzeroupper
36055 ++ ret
36056 ++.cfi_endproc
36057 ++.size poly1305_blocks_avx,.-poly1305_blocks_avx
36058 ++
36059 ++.type poly1305_emit_avx,\@function,3
36060 ++.align 32
36061 ++poly1305_emit_avx:
36062 ++ cmpl \$0,20($ctx) # is_base2_26?
36063 ++ je .Lemit
36064 ++
36065 ++ mov 0($ctx),%eax # load hash value base 2^26
36066 ++ mov 4($ctx),%ecx
36067 ++ mov 8($ctx),%r8d
36068 ++ mov 12($ctx),%r11d
36069 ++ mov 16($ctx),%r10d
36070 ++
36071 ++ shl \$26,%rcx # base 2^26 -> base 2^64
36072 ++ mov %r8,%r9
36073 ++ shl \$52,%r8
36074 ++ add %rcx,%rax
36075 ++ shr \$12,%r9
36076 ++ add %rax,%r8 # h0
36077 ++ adc \$0,%r9
36078 ++
36079 ++ shl \$14,%r11
36080 ++ mov %r10,%rax
36081 ++ shr \$24,%r10
36082 ++ add %r11,%r9
36083 ++ shl \$40,%rax
36084 ++ add %rax,%r9 # h1
36085 ++ adc \$0,%r10 # h2
36086 ++
36087 ++ mov %r10,%rax # could be partially reduced, so reduce
36088 ++ mov %r10,%rcx
36089 ++ and \$3,%r10
36090 ++ shr \$2,%rax
36091 ++ and \$-4,%rcx
36092 ++ add %rcx,%rax
36093 ++ add %rax,%r8
36094 ++ adc \$0,%r9
36095 ++ adc \$0,%r10
36096 ++
36097 ++ mov %r8,%rax
36098 ++ add \$5,%r8 # compare to modulus
36099 ++ mov %r9,%rcx
36100 ++ adc \$0,%r9
36101 ++ adc \$0,%r10
36102 ++ shr \$2,%r10 # did 130-bit value overflow?
36103 ++ cmovnz %r8,%rax
36104 ++ cmovnz %r9,%rcx
36105 ++
36106 ++ add 0($nonce),%rax # accumulate nonce
36107 ++ adc 8($nonce),%rcx
36108 ++ mov %rax,0($mac) # write result
36109 ++ mov %rcx,8($mac)
36110 ++
36111 ++ ret
36112 ++.size poly1305_emit_avx,.-poly1305_emit_avx
36113 ++___
36114 ++
36115 ++if ($avx>1) {
36116 ++my ($H0,$H1,$H2,$H3,$H4, $MASK, $T4,$T0,$T1,$T2,$T3, $D0,$D1,$D2,$D3,$D4) =
36117 ++ map("%ymm$_",(0..15));
36118 ++my $S4=$MASK;
36119 ++
36120 ++$code.=<<___;
36121 ++.type poly1305_blocks_avx2,\@function,4
36122 ++.align 32
36123 ++poly1305_blocks_avx2:
36124 ++.cfi_startproc
36125 ++ mov 20($ctx),%r8d # is_base2_26
36126 ++ cmp \$128,$len
36127 ++ jae .Lblocks_avx2
36128 ++ test %r8d,%r8d
36129 ++ jz .Lblocks
36130 ++
36131 ++.Lblocks_avx2:
36132 ++ and \$-16,$len
36133 ++ jz .Lno_data_avx2
36134 ++
36135 ++ vzeroupper
36136 ++
36137 ++ test %r8d,%r8d
36138 ++ jz .Lbase2_64_avx2
36139 ++
36140 ++ test \$63,$len
36141 ++ jz .Leven_avx2
36142 ++
36143 ++ push %rbx
36144 ++.cfi_push %rbx
36145 ++ push %rbp
36146 ++.cfi_push %rbp
36147 ++ push %r12
36148 ++.cfi_push %r12
36149 ++ push %r13
36150 ++.cfi_push %r13
36151 ++ push %r14
36152 ++.cfi_push %r14
36153 ++ push %r15
36154 ++.cfi_push %r15
36155 ++.Lblocks_avx2_body:
36156 ++
36157 ++ mov $len,%r15 # reassign $len
36158 ++
36159 ++ mov 0($ctx),$d1 # load hash value
36160 ++ mov 8($ctx),$d2
36161 ++ mov 16($ctx),$h2#d
36162 ++
36163 ++ mov 24($ctx),$r0 # load r
36164 ++ mov 32($ctx),$s1
36165 ++
36166 ++ ################################# base 2^26 -> base 2^64
36167 ++ mov $d1#d,$h0#d
36168 ++ and \$`-1*(1<<31)`,$d1
36169 ++ mov $d2,$r1 # borrow $r1
36170 ++ mov $d2#d,$h1#d
36171 ++ and \$`-1*(1<<31)`,$d2
36172 ++
36173 ++ shr \$6,$d1
36174 ++ shl \$52,$r1
36175 ++ add $d1,$h0
36176 ++ shr \$12,$h1
36177 ++ shr \$18,$d2
36178 ++ add $r1,$h0
36179 ++ adc $d2,$h1
36180 ++
36181 ++ mov $h2,$d1
36182 ++ shl \$40,$d1
36183 ++ shr \$24,$h2
36184 ++ add $d1,$h1
36185 ++ adc \$0,$h2 # can be partially reduced...
36186 ++
36187 ++ mov \$-4,$d2 # ... so reduce
36188 ++ mov $h2,$d1
36189 ++ and $h2,$d2
36190 ++ shr \$2,$d1
36191 ++ and \$3,$h2
36192 ++ add $d2,$d1 # =*5
36193 ++ add $d1,$h0
36194 ++ adc \$0,$h1
36195 ++ adc \$0,$h2
36196 ++
36197 ++ mov $s1,$r1
36198 ++ mov $s1,%rax
36199 ++ shr \$2,$s1
36200 ++ add $r1,$s1 # s1 = r1 + (r1 >> 2)
36201 ++
36202 ++.Lbase2_26_pre_avx2:
36203 ++ add 0($inp),$h0 # accumulate input
36204 ++ adc 8($inp),$h1
36205 ++ lea 16($inp),$inp
36206 ++ adc $padbit,$h2
36207 ++ sub \$16,%r15
36208 ++
36209 ++ call __poly1305_block
36210 ++ mov $r1,%rax
36211 ++
36212 ++ test \$63,%r15
36213 ++ jnz .Lbase2_26_pre_avx2
36214 ++
36215 ++ test $padbit,$padbit # if $padbit is zero,
36216 ++ jz .Lstore_base2_64_avx2 # store hash in base 2^64 format
36217 ++
36218 ++ ################################# base 2^64 -> base 2^26
36219 ++ mov $h0,%rax
36220 ++ mov $h0,%rdx
36221 ++ shr \$52,$h0
36222 ++ mov $h1,$r0
36223 ++ mov $h1,$r1
36224 ++ shr \$26,%rdx
36225 ++ and \$0x3ffffff,%rax # h[0]
36226 ++ shl \$12,$r0
36227 ++ and \$0x3ffffff,%rdx # h[1]
36228 ++ shr \$14,$h1
36229 ++ or $r0,$h0
36230 ++ shl \$24,$h2
36231 ++ and \$0x3ffffff,$h0 # h[2]
36232 ++ shr \$40,$r1
36233 ++ and \$0x3ffffff,$h1 # h[3]
36234 ++ or $r1,$h2 # h[4]
36235 ++
36236 ++ test %r15,%r15
36237 ++ jz .Lstore_base2_26_avx2
36238 ++
36239 ++ vmovd %rax#d,%x#$H0
36240 ++ vmovd %rdx#d,%x#$H1
36241 ++ vmovd $h0#d,%x#$H2
36242 ++ vmovd $h1#d,%x#$H3
36243 ++ vmovd $h2#d,%x#$H4
36244 ++ jmp .Lproceed_avx2
36245 ++
36246 ++.align 32
36247 ++.Lstore_base2_64_avx2:
36248 ++ mov $h0,0($ctx)
36249 ++ mov $h1,8($ctx)
36250 ++ mov $h2,16($ctx) # note that is_base2_26 is zeroed
36251 ++ jmp .Ldone_avx2
36252 ++
36253 ++.align 16
36254 ++.Lstore_base2_26_avx2:
36255 ++ mov %rax#d,0($ctx) # store hash value base 2^26
36256 ++ mov %rdx#d,4($ctx)
36257 ++ mov $h0#d,8($ctx)
36258 ++ mov $h1#d,12($ctx)
36259 ++ mov $h2#d,16($ctx)
36260 ++.align 16
36261 ++.Ldone_avx2:
36262 ++ mov 0(%rsp),%r15
36263 ++.cfi_restore %r15
36264 ++ mov 8(%rsp),%r14
36265 ++.cfi_restore %r14
36266 ++ mov 16(%rsp),%r13
36267 ++.cfi_restore %r13
36268 ++ mov 24(%rsp),%r12
36269 ++.cfi_restore %r12
36270 ++ mov 32(%rsp),%rbp
36271 ++.cfi_restore %rbp
36272 ++ mov 40(%rsp),%rbx
36273 ++.cfi_restore %rbx
36274 ++ lea 48(%rsp),%rsp
36275 ++.cfi_adjust_cfa_offset -48
36276 ++.Lno_data_avx2:
36277 ++.Lblocks_avx2_epilogue:
36278 ++ ret
36279 ++.cfi_endproc
36280 ++
36281 ++.align 32
36282 ++.Lbase2_64_avx2:
36283 ++.cfi_startproc
36284 ++ push %rbx
36285 ++.cfi_push %rbx
36286 ++ push %rbp
36287 ++.cfi_push %rbp
36288 ++ push %r12
36289 ++.cfi_push %r12
36290 ++ push %r13
36291 ++.cfi_push %r13
36292 ++ push %r14
36293 ++.cfi_push %r14
36294 ++ push %r15
36295 ++.cfi_push %r15
36296 ++.Lbase2_64_avx2_body:
36297 ++
36298 ++ mov $len,%r15 # reassign $len
36299 ++
36300 ++ mov 24($ctx),$r0 # load r
36301 ++ mov 32($ctx),$s1
36302 ++
36303 ++ mov 0($ctx),$h0 # load hash value
36304 ++ mov 8($ctx),$h1
36305 ++ mov 16($ctx),$h2#d
36306 ++
36307 ++ mov $s1,$r1
36308 ++ mov $s1,%rax
36309 ++ shr \$2,$s1
36310 ++ add $r1,$s1 # s1 = r1 + (r1 >> 2)
36311 ++
36312 ++ test \$63,$len
36313 ++ jz .Linit_avx2
36314 ++
36315 ++.Lbase2_64_pre_avx2:
36316 ++ add 0($inp),$h0 # accumulate input
36317 ++ adc 8($inp),$h1
36318 ++ lea 16($inp),$inp
36319 ++ adc $padbit,$h2
36320 ++ sub \$16,%r15
36321 ++
36322 ++ call __poly1305_block
36323 ++ mov $r1,%rax
36324 ++
36325 ++ test \$63,%r15
36326 ++ jnz .Lbase2_64_pre_avx2
36327 ++
36328 ++.Linit_avx2:
36329 ++ ################################# base 2^64 -> base 2^26
36330 ++ mov $h0,%rax
36331 ++ mov $h0,%rdx
36332 ++ shr \$52,$h0
36333 ++ mov $h1,$d1
36334 ++ mov $h1,$d2
36335 ++ shr \$26,%rdx
36336 ++ and \$0x3ffffff,%rax # h[0]
36337 ++ shl \$12,$d1
36338 ++ and \$0x3ffffff,%rdx # h[1]
36339 ++ shr \$14,$h1
36340 ++ or $d1,$h0
36341 ++ shl \$24,$h2
36342 ++ and \$0x3ffffff,$h0 # h[2]
36343 ++ shr \$40,$d2
36344 ++ and \$0x3ffffff,$h1 # h[3]
36345 ++ or $d2,$h2 # h[4]
36346 ++
36347 ++ vmovd %rax#d,%x#$H0
36348 ++ vmovd %rdx#d,%x#$H1
36349 ++ vmovd $h0#d,%x#$H2
36350 ++ vmovd $h1#d,%x#$H3
36351 ++ vmovd $h2#d,%x#$H4
36352 ++ movl \$1,20($ctx) # set is_base2_26
36353 ++
36354 ++ call __poly1305_init_avx
36355 ++
36356 ++.Lproceed_avx2:
36357 ++ mov %r15,$len # restore $len
36358 ++ mov OPENSSL_ia32cap_P+8(%rip),%r10d
36359 ++ mov \$`(1<<31|1<<30|1<<16)`,%r11d
36360 ++
36361 ++ mov 0(%rsp),%r15
36362 ++.cfi_restore %r15
36363 ++ mov 8(%rsp),%r14
36364 ++.cfi_restore %r14
36365 ++ mov 16(%rsp),%r13
36366 ++.cfi_restore %r13
36367 ++ mov 24(%rsp),%r12
36368 ++.cfi_restore %r12
36369 ++ mov 32(%rsp),%rbp
36370 ++.cfi_restore %rbp
36371 ++ mov 40(%rsp),%rbx
36372 ++.cfi_restore %rbx
36373 ++ lea 48(%rsp),%rax
36374 ++ lea 48(%rsp),%rsp
36375 ++.cfi_adjust_cfa_offset -48
36376 ++.Lbase2_64_avx2_epilogue:
36377 ++ jmp .Ldo_avx2
36378 ++.cfi_endproc
36379 ++
36380 ++.align 32
36381 ++.Leven_avx2:
36382 ++.cfi_startproc
36383 ++ mov OPENSSL_ia32cap_P+8(%rip),%r10d
36384 ++ vmovd 4*0($ctx),%x#$H0 # load hash value base 2^26
36385 ++ vmovd 4*1($ctx),%x#$H1
36386 ++ vmovd 4*2($ctx),%x#$H2
36387 ++ vmovd 4*3($ctx),%x#$H3
36388 ++ vmovd 4*4($ctx),%x#$H4
36389 ++
36390 ++.Ldo_avx2:
36391 ++___
36392 ++$code.=<<___ if ($avx>2);
36393 ++ cmp \$512,$len
36394 ++ jb .Lskip_avx512
36395 ++ and %r11d,%r10d
36396 ++ test \$`1<<16`,%r10d # check for AVX512F
36397 ++ jnz .Lblocks_avx512
36398 ++.Lskip_avx512:
36399 ++___
36400 ++$code.=<<___ if (!$win64);
36401 ++ lea -8(%rsp),%r11
36402 ++.cfi_def_cfa %r11,16
36403 ++ sub \$0x128,%rsp
36404 ++___
36405 ++$code.=<<___ if ($win64);
36406 ++ lea -0xf8(%rsp),%r11
36407 ++ sub \$0x1c8,%rsp
36408 ++ vmovdqa %xmm6,0x50(%r11)
36409 ++ vmovdqa %xmm7,0x60(%r11)
36410 ++ vmovdqa %xmm8,0x70(%r11)
36411 ++ vmovdqa %xmm9,0x80(%r11)
36412 ++ vmovdqa %xmm10,0x90(%r11)
36413 ++ vmovdqa %xmm11,0xa0(%r11)
36414 ++ vmovdqa %xmm12,0xb0(%r11)
36415 ++ vmovdqa %xmm13,0xc0(%r11)
36416 ++ vmovdqa %xmm14,0xd0(%r11)
36417 ++ vmovdqa %xmm15,0xe0(%r11)
36418 ++.Ldo_avx2_body:
36419 ++___
36420 ++$code.=<<___;
36421 ++ lea .Lconst(%rip),%rcx
36422 ++ lea 48+64($ctx),$ctx # size optimization
36423 ++ vmovdqa 96(%rcx),$T0 # .Lpermd_avx2
36424 ++
36425 ++ # expand and copy pre-calculated table to stack
36426 ++ vmovdqu `16*0-64`($ctx),%x#$T2
36427 ++ and \$-512,%rsp
36428 ++ vmovdqu `16*1-64`($ctx),%x#$T3
36429 ++ vmovdqu `16*2-64`($ctx),%x#$T4
36430 ++ vmovdqu `16*3-64`($ctx),%x#$D0
36431 ++ vmovdqu `16*4-64`($ctx),%x#$D1
36432 ++ vmovdqu `16*5-64`($ctx),%x#$D2
36433 ++ lea 0x90(%rsp),%rax # size optimization
36434 ++ vmovdqu `16*6-64`($ctx),%x#$D3
36435 ++ vpermd $T2,$T0,$T2 # 00003412 -> 14243444
36436 ++ vmovdqu `16*7-64`($ctx),%x#$D4
36437 ++ vpermd $T3,$T0,$T3
36438 ++ vmovdqu `16*8-64`($ctx),%x#$MASK
36439 ++ vpermd $T4,$T0,$T4
36440 ++ vmovdqa $T2,0x00(%rsp)
36441 ++ vpermd $D0,$T0,$D0
36442 ++ vmovdqa $T3,0x20-0x90(%rax)
36443 ++ vpermd $D1,$T0,$D1
36444 ++ vmovdqa $T4,0x40-0x90(%rax)
36445 ++ vpermd $D2,$T0,$D2
36446 ++ vmovdqa $D0,0x60-0x90(%rax)
36447 ++ vpermd $D3,$T0,$D3
36448 ++ vmovdqa $D1,0x80-0x90(%rax)
36449 ++ vpermd $D4,$T0,$D4
36450 ++ vmovdqa $D2,0xa0-0x90(%rax)
36451 ++ vpermd $MASK,$T0,$MASK
36452 ++ vmovdqa $D3,0xc0-0x90(%rax)
36453 ++ vmovdqa $D4,0xe0-0x90(%rax)
36454 ++ vmovdqa $MASK,0x100-0x90(%rax)
36455 ++ vmovdqa 64(%rcx),$MASK # .Lmask26
36456 ++
36457 ++ ################################################################
36458 ++ # load input
36459 ++ vmovdqu 16*0($inp),%x#$T0
36460 ++ vmovdqu 16*1($inp),%x#$T1
36461 ++ vinserti128 \$1,16*2($inp),$T0,$T0
36462 ++ vinserti128 \$1,16*3($inp),$T1,$T1
36463 ++ lea 16*4($inp),$inp
36464 ++
36465 ++ vpsrldq \$6,$T0,$T2 # splat input
36466 ++ vpsrldq \$6,$T1,$T3
36467 ++ vpunpckhqdq $T1,$T0,$T4 # 4
36468 ++ vpunpcklqdq $T3,$T2,$T2 # 2:3
36469 ++ vpunpcklqdq $T1,$T0,$T0 # 0:1
36470 ++
36471 ++ vpsrlq \$30,$T2,$T3
36472 ++ vpsrlq \$4,$T2,$T2
36473 ++ vpsrlq \$26,$T0,$T1
36474 ++ vpsrlq \$40,$T4,$T4 # 4
36475 ++ vpand $MASK,$T2,$T2 # 2
36476 ++ vpand $MASK,$T0,$T0 # 0
36477 ++ vpand $MASK,$T1,$T1 # 1
36478 ++ vpand $MASK,$T3,$T3 # 3
36479 ++ vpor 32(%rcx),$T4,$T4 # padbit, yes, always
36480 ++
36481 ++ vpaddq $H2,$T2,$H2 # accumulate input
36482 ++ sub \$64,$len
36483 ++ jz .Ltail_avx2
36484 ++ jmp .Loop_avx2
36485 ++
36486 ++.align 32
36487 ++.Loop_avx2:
36488 ++ ################################################################
36489 ++ # ((inp[0]*r^4+inp[4])*r^4+inp[ 8])*r^4
36490 ++ # ((inp[1]*r^4+inp[5])*r^4+inp[ 9])*r^3
36491 ++ # ((inp[2]*r^4+inp[6])*r^4+inp[10])*r^2
36492 ++ # ((inp[3]*r^4+inp[7])*r^4+inp[11])*r^1
36493 ++ # \________/\__________/
36494 ++ ################################################################
36495 ++ #vpaddq $H2,$T2,$H2 # accumulate input
36496 ++ vpaddq $H0,$T0,$H0
36497 ++ vmovdqa `32*0`(%rsp),$T0 # r0^4
36498 ++ vpaddq $H1,$T1,$H1
36499 ++ vmovdqa `32*1`(%rsp),$T1 # r1^4
36500 ++ vpaddq $H3,$T3,$H3
36501 ++ vmovdqa `32*3`(%rsp),$T2 # r2^4
36502 ++ vpaddq $H4,$T4,$H4
36503 ++ vmovdqa `32*6-0x90`(%rax),$T3 # s3^4
36504 ++ vmovdqa `32*8-0x90`(%rax),$S4 # s4^4
36505 ++
36506 ++ # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
36507 ++ # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
36508 ++ # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
36509 ++ # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
36510 ++ # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
36511 ++ #
36512 ++ # however, as h2 is "chronologically" first one available pull
36513 ++ # corresponding operations up, so it's
36514 ++ #
36515 ++ # d4 = h2*r2 + h4*r0 + h3*r1 + h1*r3 + h0*r4
36516 ++ # d3 = h2*r1 + h3*r0 + h1*r2 + h0*r3 + h4*5*r4
36517 ++ # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
36518 ++ # d1 = h2*5*r4 + h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3
36519 ++ # d0 = h2*5*r3 + h0*r0 + h4*5*r1 + h3*5*r2 + h1*5*r4
36520 ++
36521 ++ vpmuludq $H2,$T0,$D2 # d2 = h2*r0
36522 ++ vpmuludq $H2,$T1,$D3 # d3 = h2*r1
36523 ++ vpmuludq $H2,$T2,$D4 # d4 = h2*r2
36524 ++ vpmuludq $H2,$T3,$D0 # d0 = h2*s3
36525 ++ vpmuludq $H2,$S4,$D1 # d1 = h2*s4
36526 ++
36527 ++ vpmuludq $H0,$T1,$T4 # h0*r1
36528 ++ vpmuludq $H1,$T1,$H2 # h1*r1, borrow $H2 as temp
36529 ++ vpaddq $T4,$D1,$D1 # d1 += h0*r1
36530 ++ vpaddq $H2,$D2,$D2 # d2 += h1*r1
36531 ++ vpmuludq $H3,$T1,$T4 # h3*r1
36532 ++ vpmuludq `32*2`(%rsp),$H4,$H2 # h4*s1
36533 ++ vpaddq $T4,$D4,$D4 # d4 += h3*r1
36534 ++ vpaddq $H2,$D0,$D0 # d0 += h4*s1
36535 ++ vmovdqa `32*4-0x90`(%rax),$T1 # s2
36536 ++
36537 ++ vpmuludq $H0,$T0,$T4 # h0*r0
36538 ++ vpmuludq $H1,$T0,$H2 # h1*r0
36539 ++ vpaddq $T4,$D0,$D0 # d0 += h0*r0
36540 ++ vpaddq $H2,$D1,$D1 # d1 += h1*r0
36541 ++ vpmuludq $H3,$T0,$T4 # h3*r0
36542 ++ vpmuludq $H4,$T0,$H2 # h4*r0
36543 ++ vmovdqu 16*0($inp),%x#$T0 # load input
36544 ++ vpaddq $T4,$D3,$D3 # d3 += h3*r0
36545 ++ vpaddq $H2,$D4,$D4 # d4 += h4*r0
36546 ++ vinserti128 \$1,16*2($inp),$T0,$T0
36547 ++
36548 ++ vpmuludq $H3,$T1,$T4 # h3*s2
36549 ++ vpmuludq $H4,$T1,$H2 # h4*s2
36550 ++ vmovdqu 16*1($inp),%x#$T1
36551 ++ vpaddq $T4,$D0,$D0 # d0 += h3*s2
36552 ++ vpaddq $H2,$D1,$D1 # d1 += h4*s2
36553 ++ vmovdqa `32*5-0x90`(%rax),$H2 # r3
36554 ++ vpmuludq $H1,$T2,$T4 # h1*r2
36555 ++ vpmuludq $H0,$T2,$T2 # h0*r2
36556 ++ vpaddq $T4,$D3,$D3 # d3 += h1*r2
36557 ++ vpaddq $T2,$D2,$D2 # d2 += h0*r2
36558 ++ vinserti128 \$1,16*3($inp),$T1,$T1
36559 ++ lea 16*4($inp),$inp
36560 ++
36561 ++ vpmuludq $H1,$H2,$T4 # h1*r3
36562 ++ vpmuludq $H0,$H2,$H2 # h0*r3
36563 ++ vpsrldq \$6,$T0,$T2 # splat input
36564 ++ vpaddq $T4,$D4,$D4 # d4 += h1*r3
36565 ++ vpaddq $H2,$D3,$D3 # d3 += h0*r3
36566 ++ vpmuludq $H3,$T3,$T4 # h3*s3
36567 ++ vpmuludq $H4,$T3,$H2 # h4*s3
36568 ++ vpsrldq \$6,$T1,$T3
36569 ++ vpaddq $T4,$D1,$D1 # d1 += h3*s3
36570 ++ vpaddq $H2,$D2,$D2 # d2 += h4*s3
36571 ++ vpunpckhqdq $T1,$T0,$T4 # 4
36572 ++
36573 ++ vpmuludq $H3,$S4,$H3 # h3*s4
36574 ++ vpmuludq $H4,$S4,$H4 # h4*s4
36575 ++ vpunpcklqdq $T1,$T0,$T0 # 0:1
36576 ++ vpaddq $H3,$D2,$H2 # h2 = d2 + h3*r4
36577 ++ vpaddq $H4,$D3,$H3 # h3 = d3 + h4*r4
36578 ++ vpunpcklqdq $T3,$T2,$T3 # 2:3
36579 ++ vpmuludq `32*7-0x90`(%rax),$H0,$H4 # h0*r4
36580 ++ vpmuludq $H1,$S4,$H0 # h1*s4
36581 ++ vmovdqa 64(%rcx),$MASK # .Lmask26
36582 ++ vpaddq $H4,$D4,$H4 # h4 = d4 + h0*r4
36583 ++ vpaddq $H0,$D0,$H0 # h0 = d0 + h1*s4
36584 ++
36585 ++ ################################################################
36586 ++ # lazy reduction (interleaved with tail of input splat)
36587 ++
36588 ++ vpsrlq \$26,$H3,$D3
36589 ++ vpand $MASK,$H3,$H3
36590 ++ vpaddq $D3,$H4,$H4 # h3 -> h4
36591 ++
36592 ++ vpsrlq \$26,$H0,$D0
36593 ++ vpand $MASK,$H0,$H0
36594 ++ vpaddq $D0,$D1,$H1 # h0 -> h1
36595 ++
36596 ++ vpsrlq \$26,$H4,$D4
36597 ++ vpand $MASK,$H4,$H4
36598 ++
36599 ++ vpsrlq \$4,$T3,$T2
36600 ++
36601 ++ vpsrlq \$26,$H1,$D1
36602 ++ vpand $MASK,$H1,$H1
36603 ++ vpaddq $D1,$H2,$H2 # h1 -> h2
36604 ++
36605 ++ vpaddq $D4,$H0,$H0
36606 ++ vpsllq \$2,$D4,$D4
36607 ++ vpaddq $D4,$H0,$H0 # h4 -> h0
36608 ++
36609 ++ vpand $MASK,$T2,$T2 # 2
36610 ++ vpsrlq \$26,$T0,$T1
36611 ++
36612 ++ vpsrlq \$26,$H2,$D2
36613 ++ vpand $MASK,$H2,$H2
36614 ++ vpaddq $D2,$H3,$H3 # h2 -> h3
36615 ++
36616 ++ vpaddq $T2,$H2,$H2 # modulo-scheduled
36617 ++ vpsrlq \$30,$T3,$T3
36618 ++
36619 ++ vpsrlq \$26,$H0,$D0
36620 ++ vpand $MASK,$H0,$H0
36621 ++ vpaddq $D0,$H1,$H1 # h0 -> h1
36622 ++
36623 ++ vpsrlq \$40,$T4,$T4 # 4
36624 ++
36625 ++ vpsrlq \$26,$H3,$D3
36626 ++ vpand $MASK,$H3,$H3
36627 ++ vpaddq $D3,$H4,$H4 # h3 -> h4
36628 ++
36629 ++ vpand $MASK,$T0,$T0 # 0
36630 ++ vpand $MASK,$T1,$T1 # 1
36631 ++ vpand $MASK,$T3,$T3 # 3
36632 ++ vpor 32(%rcx),$T4,$T4 # padbit, yes, always
36633 ++
36634 ++ sub \$64,$len
36635 ++ jnz .Loop_avx2
36636 ++
36637 ++ .byte 0x66,0x90
36638 ++.Ltail_avx2:
36639 ++ ################################################################
36640 ++ # while above multiplications were by r^4 in all lanes, in last
36641 ++ # iteration we multiply least significant lane by r^4 and most
36642 ++ # significant one by r, so copy of above except that references
36643 ++ # to the precomputed table are displaced by 4...
36644 ++
36645 ++ #vpaddq $H2,$T2,$H2 # accumulate input
36646 ++ vpaddq $H0,$T0,$H0
36647 ++ vmovdqu `32*0+4`(%rsp),$T0 # r0^4
36648 ++ vpaddq $H1,$T1,$H1
36649 ++ vmovdqu `32*1+4`(%rsp),$T1 # r1^4
36650 ++ vpaddq $H3,$T3,$H3
36651 ++ vmovdqu `32*3+4`(%rsp),$T2 # r2^4
36652 ++ vpaddq $H4,$T4,$H4
36653 ++ vmovdqu `32*6+4-0x90`(%rax),$T3 # s3^4
36654 ++ vmovdqu `32*8+4-0x90`(%rax),$S4 # s4^4
36655 ++
36656 ++ vpmuludq $H2,$T0,$D2 # d2 = h2*r0
36657 ++ vpmuludq $H2,$T1,$D3 # d3 = h2*r1
36658 ++ vpmuludq $H2,$T2,$D4 # d4 = h2*r2
36659 ++ vpmuludq $H2,$T3,$D0 # d0 = h2*s3
36660 ++ vpmuludq $H2,$S4,$D1 # d1 = h2*s4
36661 ++
36662 ++ vpmuludq $H0,$T1,$T4 # h0*r1
36663 ++ vpmuludq $H1,$T1,$H2 # h1*r1
36664 ++ vpaddq $T4,$D1,$D1 # d1 += h0*r1
36665 ++ vpaddq $H2,$D2,$D2 # d2 += h1*r1
36666 ++ vpmuludq $H3,$T1,$T4 # h3*r1
36667 ++ vpmuludq `32*2+4`(%rsp),$H4,$H2 # h4*s1
36668 ++ vpaddq $T4,$D4,$D4 # d4 += h3*r1
36669 ++ vpaddq $H2,$D0,$D0 # d0 += h4*s1
36670 ++
36671 ++ vpmuludq $H0,$T0,$T4 # h0*r0
36672 ++ vpmuludq $H1,$T0,$H2 # h1*r0
36673 ++ vpaddq $T4,$D0,$D0 # d0 += h0*r0
36674 ++ vmovdqu `32*4+4-0x90`(%rax),$T1 # s2
36675 ++ vpaddq $H2,$D1,$D1 # d1 += h1*r0
36676 ++ vpmuludq $H3,$T0,$T4 # h3*r0
36677 ++ vpmuludq $H4,$T0,$H2 # h4*r0
36678 ++ vpaddq $T4,$D3,$D3 # d3 += h3*r0
36679 ++ vpaddq $H2,$D4,$D4 # d4 += h4*r0
36680 ++
36681 ++ vpmuludq $H3,$T1,$T4 # h3*s2
36682 ++ vpmuludq $H4,$T1,$H2 # h4*s2
36683 ++ vpaddq $T4,$D0,$D0 # d0 += h3*s2
36684 ++ vpaddq $H2,$D1,$D1 # d1 += h4*s2
36685 ++ vmovdqu `32*5+4-0x90`(%rax),$H2 # r3
36686 ++ vpmuludq $H1,$T2,$T4 # h1*r2
36687 ++ vpmuludq $H0,$T2,$T2 # h0*r2
36688 ++ vpaddq $T4,$D3,$D3 # d3 += h1*r2
36689 ++ vpaddq $T2,$D2,$D2 # d2 += h0*r2
36690 ++
36691 ++ vpmuludq $H1,$H2,$T4 # h1*r3
36692 ++ vpmuludq $H0,$H2,$H2 # h0*r3
36693 ++ vpaddq $T4,$D4,$D4 # d4 += h1*r3
36694 ++ vpaddq $H2,$D3,$D3 # d3 += h0*r3
36695 ++ vpmuludq $H3,$T3,$T4 # h3*s3
36696 ++ vpmuludq $H4,$T3,$H2 # h4*s3
36697 ++ vpaddq $T4,$D1,$D1 # d1 += h3*s3
36698 ++ vpaddq $H2,$D2,$D2 # d2 += h4*s3
36699 ++
36700 ++ vpmuludq $H3,$S4,$H3 # h3*s4
36701 ++ vpmuludq $H4,$S4,$H4 # h4*s4
36702 ++ vpaddq $H3,$D2,$H2 # h2 = d2 + h3*r4
36703 ++ vpaddq $H4,$D3,$H3 # h3 = d3 + h4*r4
36704 ++ vpmuludq `32*7+4-0x90`(%rax),$H0,$H4 # h0*r4
36705 ++ vpmuludq $H1,$S4,$H0 # h1*s4
36706 ++ vmovdqa 64(%rcx),$MASK # .Lmask26
36707 ++ vpaddq $H4,$D4,$H4 # h4 = d4 + h0*r4
36708 ++ vpaddq $H0,$D0,$H0 # h0 = d0 + h1*s4
36709 ++
36710 ++ ################################################################
36711 ++ # horizontal addition
36712 ++
36713 ++ vpsrldq \$8,$D1,$T1
36714 ++ vpsrldq \$8,$H2,$T2
36715 ++ vpsrldq \$8,$H3,$T3
36716 ++ vpsrldq \$8,$H4,$T4
36717 ++ vpsrldq \$8,$H0,$T0
36718 ++ vpaddq $T1,$D1,$D1
36719 ++ vpaddq $T2,$H2,$H2
36720 ++ vpaddq $T3,$H3,$H3
36721 ++ vpaddq $T4,$H4,$H4
36722 ++ vpaddq $T0,$H0,$H0
36723 ++
36724 ++ vpermq \$0x2,$H3,$T3
36725 ++ vpermq \$0x2,$H4,$T4
36726 ++ vpermq \$0x2,$H0,$T0
36727 ++ vpermq \$0x2,$D1,$T1
36728 ++ vpermq \$0x2,$H2,$T2
36729 ++ vpaddq $T3,$H3,$H3
36730 ++ vpaddq $T4,$H4,$H4
36731 ++ vpaddq $T0,$H0,$H0
36732 ++ vpaddq $T1,$D1,$D1
36733 ++ vpaddq $T2,$H2,$H2
36734 ++
36735 ++ ################################################################
36736 ++ # lazy reduction
36737 ++
36738 ++ vpsrlq \$26,$H3,$D3
36739 ++ vpand $MASK,$H3,$H3
36740 ++ vpaddq $D3,$H4,$H4 # h3 -> h4
36741 ++
36742 ++ vpsrlq \$26,$H0,$D0
36743 ++ vpand $MASK,$H0,$H0
36744 ++ vpaddq $D0,$D1,$H1 # h0 -> h1
36745 ++
36746 ++ vpsrlq \$26,$H4,$D4
36747 ++ vpand $MASK,$H4,$H4
36748 ++
36749 ++ vpsrlq \$26,$H1,$D1
36750 ++ vpand $MASK,$H1,$H1
36751 ++ vpaddq $D1,$H2,$H2 # h1 -> h2
36752 ++
36753 ++ vpaddq $D4,$H0,$H0
36754 ++ vpsllq \$2,$D4,$D4
36755 ++ vpaddq $D4,$H0,$H0 # h4 -> h0
36756 ++
36757 ++ vpsrlq \$26,$H2,$D2
36758 ++ vpand $MASK,$H2,$H2
36759 ++ vpaddq $D2,$H3,$H3 # h2 -> h3
36760 ++
36761 ++ vpsrlq \$26,$H0,$D0
36762 ++ vpand $MASK,$H0,$H0
36763 ++ vpaddq $D0,$H1,$H1 # h0 -> h1
36764 ++
36765 ++ vpsrlq \$26,$H3,$D3
36766 ++ vpand $MASK,$H3,$H3
36767 ++ vpaddq $D3,$H4,$H4 # h3 -> h4
36768 ++
36769 ++ vmovd %x#$H0,`4*0-48-64`($ctx)# save partially reduced
36770 ++ vmovd %x#$H1,`4*1-48-64`($ctx)
36771 ++ vmovd %x#$H2,`4*2-48-64`($ctx)
36772 ++ vmovd %x#$H3,`4*3-48-64`($ctx)
36773 ++ vmovd %x#$H4,`4*4-48-64`($ctx)
36774 ++___
36775 ++$code.=<<___ if ($win64);
36776 ++ vmovdqa 0x50(%r11),%xmm6
36777 ++ vmovdqa 0x60(%r11),%xmm7
36778 ++ vmovdqa 0x70(%r11),%xmm8
36779 ++ vmovdqa 0x80(%r11),%xmm9
36780 ++ vmovdqa 0x90(%r11),%xmm10
36781 ++ vmovdqa 0xa0(%r11),%xmm11
36782 ++ vmovdqa 0xb0(%r11),%xmm12
36783 ++ vmovdqa 0xc0(%r11),%xmm13
36784 ++ vmovdqa 0xd0(%r11),%xmm14
36785 ++ vmovdqa 0xe0(%r11),%xmm15
36786 ++ lea 0xf8(%r11),%rsp
36787 ++.Ldo_avx2_epilogue:
36788 ++___
36789 ++$code.=<<___ if (!$win64);
36790 ++ lea 8(%r11),%rsp
36791 ++.cfi_def_cfa %rsp,8
36792 ++___
36793 ++$code.=<<___;
36794 ++ vzeroupper
36795 ++ ret
36796 ++.cfi_endproc
36797 ++.size poly1305_blocks_avx2,.-poly1305_blocks_avx2
36798 ++___
36799 ++#######################################################################
36800 ++if ($avx>2) {
36801 ++# On entry we have input length divisible by 64. But since inner loop
36802 ++# processes 128 bytes per iteration, cases when length is not divisible
36803 ++# by 128 are handled by passing tail 64 bytes to .Ltail_avx2. For this
36804 ++# reason stack layout is kept identical to poly1305_blocks_avx2. If not
36805 ++# for this tail, we wouldn't have to even allocate stack frame...
36806 ++
36807 ++my ($R0,$R1,$R2,$R3,$R4, $S1,$S2,$S3,$S4) = map("%zmm$_",(16..24));
36808 ++my ($M0,$M1,$M2,$M3,$M4) = map("%zmm$_",(25..29));
36809 ++my $PADBIT="%zmm30";
36810 ++
36811 ++map(s/%y/%z/,($T4,$T0,$T1,$T2,$T3)); # switch to %zmm domain
36812 ++map(s/%y/%z/,($D0,$D1,$D2,$D3,$D4));
36813 ++map(s/%y/%z/,($H0,$H1,$H2,$H3,$H4));
36814 ++map(s/%y/%z/,($MASK));
36815 ++
36816 ++$code.=<<___;
36817 ++.type poly1305_blocks_avx512,\@function,4
36818 ++.align 32
36819 ++poly1305_blocks_avx512:
36820 ++.cfi_startproc
36821 ++.Lblocks_avx512:
36822 ++ mov \$15,%eax
36823 ++ kmovw %eax,%k2
36824 ++___
36825 ++$code.=<<___ if (!$win64);
36826 ++ lea -8(%rsp),%r11
36827 ++.cfi_def_cfa %r11,16
36828 ++ sub \$0x128,%rsp
36829 ++___
36830 ++$code.=<<___ if ($win64);
36831 ++ lea -0xf8(%rsp),%r11
36832 ++ sub \$0x1c8,%rsp
36833 ++ vmovdqa %xmm6,0x50(%r11)
36834 ++ vmovdqa %xmm7,0x60(%r11)
36835 ++ vmovdqa %xmm8,0x70(%r11)
36836 ++ vmovdqa %xmm9,0x80(%r11)
36837 ++ vmovdqa %xmm10,0x90(%r11)
36838 ++ vmovdqa %xmm11,0xa0(%r11)
36839 ++ vmovdqa %xmm12,0xb0(%r11)
36840 ++ vmovdqa %xmm13,0xc0(%r11)
36841 ++ vmovdqa %xmm14,0xd0(%r11)
36842 ++ vmovdqa %xmm15,0xe0(%r11)
36843 ++.Ldo_avx512_body:
36844 ++___
36845 ++$code.=<<___;
36846 ++ lea .Lconst(%rip),%rcx
36847 ++ lea 48+64($ctx),$ctx # size optimization
36848 ++ vmovdqa 96(%rcx),%y#$T2 # .Lpermd_avx2
36849 ++
36850 ++ # expand pre-calculated table
36851 ++ vmovdqu `16*0-64`($ctx),%x#$D0 # will become expanded ${R0}
36852 ++ and \$-512,%rsp
36853 ++ vmovdqu `16*1-64`($ctx),%x#$D1 # will become ... ${R1}
36854 ++ mov \$0x20,%rax
36855 ++ vmovdqu `16*2-64`($ctx),%x#$T0 # ... ${S1}
36856 ++ vmovdqu `16*3-64`($ctx),%x#$D2 # ... ${R2}
36857 ++ vmovdqu `16*4-64`($ctx),%x#$T1 # ... ${S2}
36858 ++ vmovdqu `16*5-64`($ctx),%x#$D3 # ... ${R3}
36859 ++ vmovdqu `16*6-64`($ctx),%x#$T3 # ... ${S3}
36860 ++ vmovdqu `16*7-64`($ctx),%x#$D4 # ... ${R4}
36861 ++ vmovdqu `16*8-64`($ctx),%x#$T4 # ... ${S4}
36862 ++ vpermd $D0,$T2,$R0 # 00003412 -> 14243444
36863 ++ vpbroadcastq 64(%rcx),$MASK # .Lmask26
36864 ++ vpermd $D1,$T2,$R1
36865 ++ vpermd $T0,$T2,$S1
36866 ++ vpermd $D2,$T2,$R2
36867 ++ vmovdqa64 $R0,0x00(%rsp){%k2} # save in case $len%128 != 0
36868 ++ vpsrlq \$32,$R0,$T0 # 14243444 -> 01020304
36869 ++ vpermd $T1,$T2,$S2
36870 ++ vmovdqu64 $R1,0x00(%rsp,%rax){%k2}
36871 ++ vpsrlq \$32,$R1,$T1
36872 ++ vpermd $D3,$T2,$R3
36873 ++ vmovdqa64 $S1,0x40(%rsp){%k2}
36874 ++ vpermd $T3,$T2,$S3
36875 ++ vpermd $D4,$T2,$R4
36876 ++ vmovdqu64 $R2,0x40(%rsp,%rax){%k2}
36877 ++ vpermd $T4,$T2,$S4
36878 ++ vmovdqa64 $S2,0x80(%rsp){%k2}
36879 ++ vmovdqu64 $R3,0x80(%rsp,%rax){%k2}
36880 ++ vmovdqa64 $S3,0xc0(%rsp){%k2}
36881 ++ vmovdqu64 $R4,0xc0(%rsp,%rax){%k2}
36882 ++ vmovdqa64 $S4,0x100(%rsp){%k2}
36883 ++
36884 ++ ################################################################
36885 ++ # calculate 5th through 8th powers of the key
36886 ++ #
36887 ++ # d0 = r0'*r0 + r1'*5*r4 + r2'*5*r3 + r3'*5*r2 + r4'*5*r1
36888 ++ # d1 = r0'*r1 + r1'*r0 + r2'*5*r4 + r3'*5*r3 + r4'*5*r2
36889 ++ # d2 = r0'*r2 + r1'*r1 + r2'*r0 + r3'*5*r4 + r4'*5*r3
36890 ++ # d3 = r0'*r3 + r1'*r2 + r2'*r1 + r3'*r0 + r4'*5*r4
36891 ++ # d4 = r0'*r4 + r1'*r3 + r2'*r2 + r3'*r1 + r4'*r0
36892 ++
36893 ++ vpmuludq $T0,$R0,$D0 # d0 = r0'*r0
36894 ++ vpmuludq $T0,$R1,$D1 # d1 = r0'*r1
36895 ++ vpmuludq $T0,$R2,$D2 # d2 = r0'*r2
36896 ++ vpmuludq $T0,$R3,$D3 # d3 = r0'*r3
36897 ++ vpmuludq $T0,$R4,$D4 # d4 = r0'*r4
36898 ++ vpsrlq \$32,$R2,$T2
36899 ++
36900 ++ vpmuludq $T1,$S4,$M0
36901 ++ vpmuludq $T1,$R0,$M1
36902 ++ vpmuludq $T1,$R1,$M2
36903 ++ vpmuludq $T1,$R2,$M3
36904 ++ vpmuludq $T1,$R3,$M4
36905 ++ vpsrlq \$32,$R3,$T3
36906 ++ vpaddq $M0,$D0,$D0 # d0 += r1'*5*r4
36907 ++ vpaddq $M1,$D1,$D1 # d1 += r1'*r0
36908 ++ vpaddq $M2,$D2,$D2 # d2 += r1'*r1
36909 ++ vpaddq $M3,$D3,$D3 # d3 += r1'*r2
36910 ++ vpaddq $M4,$D4,$D4 # d4 += r1'*r3
36911 ++
36912 ++ vpmuludq $T2,$S3,$M0
36913 ++ vpmuludq $T2,$S4,$M1
36914 ++ vpmuludq $T2,$R1,$M3
36915 ++ vpmuludq $T2,$R2,$M4
36916 ++ vpmuludq $T2,$R0,$M2
36917 ++ vpsrlq \$32,$R4,$T4
36918 ++ vpaddq $M0,$D0,$D0 # d0 += r2'*5*r3
36919 ++ vpaddq $M1,$D1,$D1 # d1 += r2'*5*r4
36920 ++ vpaddq $M3,$D3,$D3 # d3 += r2'*r1
36921 ++ vpaddq $M4,$D4,$D4 # d4 += r2'*r2
36922 ++ vpaddq $M2,$D2,$D2 # d2 += r2'*r0
36923 ++
36924 ++ vpmuludq $T3,$S2,$M0
36925 ++ vpmuludq $T3,$R0,$M3
36926 ++ vpmuludq $T3,$R1,$M4
36927 ++ vpmuludq $T3,$S3,$M1
36928 ++ vpmuludq $T3,$S4,$M2
36929 ++ vpaddq $M0,$D0,$D0 # d0 += r3'*5*r2
36930 ++ vpaddq $M3,$D3,$D3 # d3 += r3'*r0
36931 ++ vpaddq $M4,$D4,$D4 # d4 += r3'*r1
36932 ++ vpaddq $M1,$D1,$D1 # d1 += r3'*5*r3
36933 ++ vpaddq $M2,$D2,$D2 # d2 += r3'*5*r4
36934 ++
36935 ++ vpmuludq $T4,$S4,$M3
36936 ++ vpmuludq $T4,$R0,$M4
36937 ++ vpmuludq $T4,$S1,$M0
36938 ++ vpmuludq $T4,$S2,$M1
36939 ++ vpmuludq $T4,$S3,$M2
36940 ++ vpaddq $M3,$D3,$D3 # d3 += r2'*5*r4
36941 ++ vpaddq $M4,$D4,$D4 # d4 += r2'*r0
36942 ++ vpaddq $M0,$D0,$D0 # d0 += r2'*5*r1
36943 ++ vpaddq $M1,$D1,$D1 # d1 += r2'*5*r2
36944 ++ vpaddq $M2,$D2,$D2 # d2 += r2'*5*r3
36945 ++
36946 ++ ################################################################
36947 ++ # load input
36948 ++ vmovdqu64 16*0($inp),%z#$T3
36949 ++ vmovdqu64 16*4($inp),%z#$T4
36950 ++ lea 16*8($inp),$inp
36951 ++
36952 ++ ################################################################
36953 ++ # lazy reduction
36954 ++
36955 ++ vpsrlq \$26,$D3,$M3
36956 ++ vpandq $MASK,$D3,$D3
36957 ++ vpaddq $M3,$D4,$D4 # d3 -> d4
36958 ++
36959 ++ vpsrlq \$26,$D0,$M0
36960 ++ vpandq $MASK,$D0,$D0
36961 ++ vpaddq $M0,$D1,$D1 # d0 -> d1
36962 ++
36963 ++ vpsrlq \$26,$D4,$M4
36964 ++ vpandq $MASK,$D4,$D4
36965 ++
36966 ++ vpsrlq \$26,$D1,$M1
36967 ++ vpandq $MASK,$D1,$D1
36968 ++ vpaddq $M1,$D2,$D2 # d1 -> d2
36969 ++
36970 ++ vpaddq $M4,$D0,$D0
36971 ++ vpsllq \$2,$M4,$M4
36972 ++ vpaddq $M4,$D0,$D0 # d4 -> d0
36973 ++
36974 ++ vpsrlq \$26,$D2,$M2
36975 ++ vpandq $MASK,$D2,$D2
36976 ++ vpaddq $M2,$D3,$D3 # d2 -> d3
36977 ++
36978 ++ vpsrlq \$26,$D0,$M0
36979 ++ vpandq $MASK,$D0,$D0
36980 ++ vpaddq $M0,$D1,$D1 # d0 -> d1
36981 ++
36982 ++ vpsrlq \$26,$D3,$M3
36983 ++ vpandq $MASK,$D3,$D3
36984 ++ vpaddq $M3,$D4,$D4 # d3 -> d4
36985 ++
36986 ++ ################################################################
36987 ++ # at this point we have 14243444 in $R0-$S4 and 05060708 in
36988 ++ # $D0-$D4, ...
36989 ++
36990 ++ vpunpcklqdq $T4,$T3,$T0 # transpose input
36991 ++ vpunpckhqdq $T4,$T3,$T4
36992 ++
36993 ++ # ... since input 64-bit lanes are ordered as 73625140, we could
36994 ++ # "vperm" it to 76543210 (here and in each loop iteration), *or*
36995 ++ # we could just flow along, hence the goal for $R0-$S4 is
36996 ++ # 1858286838784888 ...
36997 ++
36998 ++ vmovdqa32 128(%rcx),$M0 # .Lpermd_avx512:
36999 ++ mov \$0x7777,%eax
37000 ++ kmovw %eax,%k1
37001 ++
37002 ++ vpermd $R0,$M0,$R0 # 14243444 -> 1---2---3---4---
37003 ++ vpermd $R1,$M0,$R1
37004 ++ vpermd $R2,$M0,$R2
37005 ++ vpermd $R3,$M0,$R3
37006 ++ vpermd $R4,$M0,$R4
37007 ++
37008 ++ vpermd $D0,$M0,${R0}{%k1} # 05060708 -> 1858286838784888
37009 ++ vpermd $D1,$M0,${R1}{%k1}
37010 ++ vpermd $D2,$M0,${R2}{%k1}
37011 ++ vpermd $D3,$M0,${R3}{%k1}
37012 ++ vpermd $D4,$M0,${R4}{%k1}
37013 ++
37014 ++ vpslld \$2,$R1,$S1 # *5
37015 ++ vpslld \$2,$R2,$S2
37016 ++ vpslld \$2,$R3,$S3
37017 ++ vpslld \$2,$R4,$S4
37018 ++ vpaddd $R1,$S1,$S1
37019 ++ vpaddd $R2,$S2,$S2
37020 ++ vpaddd $R3,$S3,$S3
37021 ++ vpaddd $R4,$S4,$S4
37022 ++
37023 ++ vpbroadcastq 32(%rcx),$PADBIT # .L129
37024 ++
37025 ++ vpsrlq \$52,$T0,$T2 # splat input
37026 ++ vpsllq \$12,$T4,$T3
37027 ++ vporq $T3,$T2,$T2
37028 ++ vpsrlq \$26,$T0,$T1
37029 ++ vpsrlq \$14,$T4,$T3
37030 ++ vpsrlq \$40,$T4,$T4 # 4
37031 ++ vpandq $MASK,$T2,$T2 # 2
37032 ++ vpandq $MASK,$T0,$T0 # 0
37033 ++ #vpandq $MASK,$T1,$T1 # 1
37034 ++ #vpandq $MASK,$T3,$T3 # 3
37035 ++ #vporq $PADBIT,$T4,$T4 # padbit, yes, always
37036 ++
37037 ++ vpaddq $H2,$T2,$H2 # accumulate input
37038 ++ sub \$192,$len
37039 ++ jbe .Ltail_avx512
37040 ++ jmp .Loop_avx512
37041 ++
37042 ++.align 32
37043 ++.Loop_avx512:
37044 ++ ################################################################
37045 ++ # ((inp[0]*r^8+inp[ 8])*r^8+inp[16])*r^8
37046 ++ # ((inp[1]*r^8+inp[ 9])*r^8+inp[17])*r^7
37047 ++ # ((inp[2]*r^8+inp[10])*r^8+inp[18])*r^6
37048 ++ # ((inp[3]*r^8+inp[11])*r^8+inp[19])*r^5
37049 ++ # ((inp[4]*r^8+inp[12])*r^8+inp[20])*r^4
37050 ++ # ((inp[5]*r^8+inp[13])*r^8+inp[21])*r^3
37051 ++ # ((inp[6]*r^8+inp[14])*r^8+inp[22])*r^2
37052 ++ # ((inp[7]*r^8+inp[15])*r^8+inp[23])*r^1
37053 ++ # \________/\___________/
37054 ++ ################################################################
37055 ++ #vpaddq $H2,$T2,$H2 # accumulate input
37056 ++
37057 ++ # d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
37058 ++ # d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
37059 ++ # d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
37060 ++ # d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
37061 ++ # d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
37062 ++ #
37063 ++ # however, as h2 is "chronologically" first one available pull
37064 ++ # corresponding operations up, so it's
37065 ++ #
37066 ++ # d3 = h2*r1 + h0*r3 + h1*r2 + h3*r0 + h4*5*r4
37067 ++ # d4 = h2*r2 + h0*r4 + h1*r3 + h3*r1 + h4*r0
37068 ++ # d0 = h2*5*r3 + h0*r0 + h1*5*r4 + h3*5*r2 + h4*5*r1
37069 ++ # d1 = h2*5*r4 + h0*r1 + h1*r0 + h3*5*r3 + h4*5*r2
37070 ++ # d2 = h2*r0 + h0*r2 + h1*r1 + h3*5*r4 + h4*5*r3
37071 ++
37072 ++ vpmuludq $H2,$R1,$D3 # d3 = h2*r1
37073 ++ vpaddq $H0,$T0,$H0
37074 ++ vpmuludq $H2,$R2,$D4 # d4 = h2*r2
37075 ++ vpandq $MASK,$T1,$T1 # 1
37076 ++ vpmuludq $H2,$S3,$D0 # d0 = h2*s3
37077 ++ vpandq $MASK,$T3,$T3 # 3
37078 ++ vpmuludq $H2,$S4,$D1 # d1 = h2*s4
37079 ++ vporq $PADBIT,$T4,$T4 # padbit, yes, always
37080 ++ vpmuludq $H2,$R0,$D2 # d2 = h2*r0
37081 ++ vpaddq $H1,$T1,$H1 # accumulate input
37082 ++ vpaddq $H3,$T3,$H3
37083 ++ vpaddq $H4,$T4,$H4
37084 ++
37085 ++ vmovdqu64 16*0($inp),$T3 # load input
37086 ++ vmovdqu64 16*4($inp),$T4
37087 ++ lea 16*8($inp),$inp
37088 ++ vpmuludq $H0,$R3,$M3
37089 ++ vpmuludq $H0,$R4,$M4
37090 ++ vpmuludq $H0,$R0,$M0
37091 ++ vpmuludq $H0,$R1,$M1
37092 ++ vpaddq $M3,$D3,$D3 # d3 += h0*r3
37093 ++ vpaddq $M4,$D4,$D4 # d4 += h0*r4
37094 ++ vpaddq $M0,$D0,$D0 # d0 += h0*r0
37095 ++ vpaddq $M1,$D1,$D1 # d1 += h0*r1
37096 ++
37097 ++ vpmuludq $H1,$R2,$M3
37098 ++ vpmuludq $H1,$R3,$M4
37099 ++ vpmuludq $H1,$S4,$M0
37100 ++ vpmuludq $H0,$R2,$M2
37101 ++ vpaddq $M3,$D3,$D3 # d3 += h1*r2
37102 ++ vpaddq $M4,$D4,$D4 # d4 += h1*r3
37103 ++ vpaddq $M0,$D0,$D0 # d0 += h1*s4
37104 ++ vpaddq $M2,$D2,$D2 # d2 += h0*r2
37105 ++
37106 ++ vpunpcklqdq $T4,$T3,$T0 # transpose input
37107 ++ vpunpckhqdq $T4,$T3,$T4
37108 ++
37109 ++ vpmuludq $H3,$R0,$M3
37110 ++ vpmuludq $H3,$R1,$M4
37111 ++ vpmuludq $H1,$R0,$M1
37112 ++ vpmuludq $H1,$R1,$M2
37113 ++ vpaddq $M3,$D3,$D3 # d3 += h3*r0
37114 ++ vpaddq $M4,$D4,$D4 # d4 += h3*r1
37115 ++ vpaddq $M1,$D1,$D1 # d1 += h1*r0
37116 ++ vpaddq $M2,$D2,$D2 # d2 += h1*r1
37117 ++
37118 ++ vpmuludq $H4,$S4,$M3
37119 ++ vpmuludq $H4,$R0,$M4
37120 ++ vpmuludq $H3,$S2,$M0
37121 ++ vpmuludq $H3,$S3,$M1
37122 ++ vpaddq $M3,$D3,$D3 # d3 += h4*s4
37123 ++ vpmuludq $H3,$S4,$M2
37124 ++ vpaddq $M4,$D4,$D4 # d4 += h4*r0
37125 ++ vpaddq $M0,$D0,$D0 # d0 += h3*s2
37126 ++ vpaddq $M1,$D1,$D1 # d1 += h3*s3
37127 ++ vpaddq $M2,$D2,$D2 # d2 += h3*s4
37128 ++
37129 ++ vpmuludq $H4,$S1,$M0
37130 ++ vpmuludq $H4,$S2,$M1
37131 ++ vpmuludq $H4,$S3,$M2
37132 ++ vpaddq $M0,$D0,$H0 # h0 = d0 + h4*s1
37133 ++ vpaddq $M1,$D1,$H1 # h1 = d2 + h4*s2
37134 ++ vpaddq $M2,$D2,$H2 # h2 = d3 + h4*s3
37135 ++
37136 ++ ################################################################
37137 ++ # lazy reduction (interleaved with input splat)
37138 ++
37139 ++ vpsrlq \$52,$T0,$T2 # splat input
37140 ++ vpsllq \$12,$T4,$T3
37141 ++
37142 ++ vpsrlq \$26,$D3,$H3
37143 ++ vpandq $MASK,$D3,$D3
37144 ++ vpaddq $H3,$D4,$H4 # h3 -> h4
37145 ++
37146 ++ vporq $T3,$T2,$T2
37147 ++
37148 ++ vpsrlq \$26,$H0,$D0
37149 ++ vpandq $MASK,$H0,$H0
37150 ++ vpaddq $D0,$H1,$H1 # h0 -> h1
37151 ++
37152 ++ vpandq $MASK,$T2,$T2 # 2
37153 ++
37154 ++ vpsrlq \$26,$H4,$D4
37155 ++ vpandq $MASK,$H4,$H4
37156 ++
37157 ++ vpsrlq \$26,$H1,$D1
37158 ++ vpandq $MASK,$H1,$H1
37159 ++ vpaddq $D1,$H2,$H2 # h1 -> h2
37160 ++
37161 ++ vpaddq $D4,$H0,$H0
37162 ++ vpsllq \$2,$D4,$D4
37163 ++ vpaddq $D4,$H0,$H0 # h4 -> h0
37164 ++
37165 ++ vpaddq $T2,$H2,$H2 # modulo-scheduled
37166 ++ vpsrlq \$26,$T0,$T1
37167 ++
37168 ++ vpsrlq \$26,$H2,$D2
37169 ++ vpandq $MASK,$H2,$H2
37170 ++ vpaddq $D2,$D3,$H3 # h2 -> h3
37171 ++
37172 ++ vpsrlq \$14,$T4,$T3
37173 ++
37174 ++ vpsrlq \$26,$H0,$D0
37175 ++ vpandq $MASK,$H0,$H0
37176 ++ vpaddq $D0,$H1,$H1 # h0 -> h1
37177 ++
37178 ++ vpsrlq \$40,$T4,$T4 # 4
37179 ++
37180 ++ vpsrlq \$26,$H3,$D3
37181 ++ vpandq $MASK,$H3,$H3
37182 ++ vpaddq $D3,$H4,$H4 # h3 -> h4
37183 ++
37184 ++ vpandq $MASK,$T0,$T0 # 0
37185 ++ #vpandq $MASK,$T1,$T1 # 1
37186 ++ #vpandq $MASK,$T3,$T3 # 3
37187 ++ #vporq $PADBIT,$T4,$T4 # padbit, yes, always
37188 ++
37189 ++ sub \$128,$len
37190 ++ ja .Loop_avx512
37191 ++
37192 ++.Ltail_avx512:
37193 ++ ################################################################
37194 ++ # while above multiplications were by r^8 in all lanes, in last
37195 ++ # iteration we multiply least significant lane by r^8 and most
37196 ++ # significant one by r, that's why table gets shifted...
37197 ++
37198 ++ vpsrlq \$32,$R0,$R0 # 0105020603070408
37199 ++ vpsrlq \$32,$R1,$R1
37200 ++ vpsrlq \$32,$R2,$R2
37201 ++ vpsrlq \$32,$S3,$S3
37202 ++ vpsrlq \$32,$S4,$S4
37203 ++ vpsrlq \$32,$R3,$R3
37204 ++ vpsrlq \$32,$R4,$R4
37205 ++ vpsrlq \$32,$S1,$S1
37206 ++ vpsrlq \$32,$S2,$S2
37207 ++
37208 ++ ################################################################
37209 ++ # load either next or last 64 byte of input
37210 ++ lea ($inp,$len),$inp
37211 ++
37212 ++ #vpaddq $H2,$T2,$H2 # accumulate input
37213 ++ vpaddq $H0,$T0,$H0
37214 ++
37215 ++ vpmuludq $H2,$R1,$D3 # d3 = h2*r1
37216 ++ vpmuludq $H2,$R2,$D4 # d4 = h2*r2
37217 ++ vpmuludq $H2,$S3,$D0 # d0 = h2*s3
37218 ++ vpandq $MASK,$T1,$T1 # 1
37219 ++ vpmuludq $H2,$S4,$D1 # d1 = h2*s4
37220 ++ vpandq $MASK,$T3,$T3 # 3
37221 ++ vpmuludq $H2,$R0,$D2 # d2 = h2*r0
37222 ++ vporq $PADBIT,$T4,$T4 # padbit, yes, always
37223 ++ vpaddq $H1,$T1,$H1 # accumulate input
37224 ++ vpaddq $H3,$T3,$H3
37225 ++ vpaddq $H4,$T4,$H4
37226 ++
37227 ++ vmovdqu 16*0($inp),%x#$T0
37228 ++ vpmuludq $H0,$R3,$M3
37229 ++ vpmuludq $H0,$R4,$M4
37230 ++ vpmuludq $H0,$R0,$M0
37231 ++ vpmuludq $H0,$R1,$M1
37232 ++ vpaddq $M3,$D3,$D3 # d3 += h0*r3
37233 ++ vpaddq $M4,$D4,$D4 # d4 += h0*r4
37234 ++ vpaddq $M0,$D0,$D0 # d0 += h0*r0
37235 ++ vpaddq $M1,$D1,$D1 # d1 += h0*r1
37236 ++
37237 ++ vmovdqu 16*1($inp),%x#$T1
37238 ++ vpmuludq $H1,$R2,$M3
37239 ++ vpmuludq $H1,$R3,$M4
37240 ++ vpmuludq $H1,$S4,$M0
37241 ++ vpmuludq $H0,$R2,$M2
37242 ++ vpaddq $M3,$D3,$D3 # d3 += h1*r2
37243 ++ vpaddq $M4,$D4,$D4 # d4 += h1*r3
37244 ++ vpaddq $M0,$D0,$D0 # d0 += h1*s4
37245 ++ vpaddq $M2,$D2,$D2 # d2 += h0*r2
37246 ++
37247 ++ vinserti128 \$1,16*2($inp),%y#$T0,%y#$T0
37248 ++ vpmuludq $H3,$R0,$M3
37249 ++ vpmuludq $H3,$R1,$M4
37250 ++ vpmuludq $H1,$R0,$M1
37251 ++ vpmuludq $H1,$R1,$M2
37252 ++ vpaddq $M3,$D3,$D3 # d3 += h3*r0
37253 ++ vpaddq $M4,$D4,$D4 # d4 += h3*r1
37254 ++ vpaddq $M1,$D1,$D1 # d1 += h1*r0
37255 ++ vpaddq $M2,$D2,$D2 # d2 += h1*r1
37256 ++
37257 ++ vinserti128 \$1,16*3($inp),%y#$T1,%y#$T1
37258 ++ vpmuludq $H4,$S4,$M3
37259 ++ vpmuludq $H4,$R0,$M4
37260 ++ vpmuludq $H3,$S2,$M0
37261 ++ vpmuludq $H3,$S3,$M1
37262 ++ vpmuludq $H3,$S4,$M2
37263 ++ vpaddq $M3,$D3,$H3 # h3 = d3 + h4*s4
37264 ++ vpaddq $M4,$D4,$D4 # d4 += h4*r0
37265 ++ vpaddq $M0,$D0,$D0 # d0 += h3*s2
37266 ++ vpaddq $M1,$D1,$D1 # d1 += h3*s3
37267 ++ vpaddq $M2,$D2,$D2 # d2 += h3*s4
37268 ++
37269 ++ vpmuludq $H4,$S1,$M0
37270 ++ vpmuludq $H4,$S2,$M1
37271 ++ vpmuludq $H4,$S3,$M2
37272 ++ vpaddq $M0,$D0,$H0 # h0 = d0 + h4*s1
37273 ++ vpaddq $M1,$D1,$H1 # h1 = d2 + h4*s2
37274 ++ vpaddq $M2,$D2,$H2 # h2 = d3 + h4*s3
37275 ++
37276 ++ ################################################################
37277 ++ # horizontal addition
37278 ++
37279 ++ mov \$1,%eax
37280 ++ vpermq \$0xb1,$H3,$D3
37281 ++ vpermq \$0xb1,$D4,$H4
37282 ++ vpermq \$0xb1,$H0,$D0
37283 ++ vpermq \$0xb1,$H1,$D1
37284 ++ vpermq \$0xb1,$H2,$D2
37285 ++ vpaddq $D3,$H3,$H3
37286 ++ vpaddq $D4,$H4,$H4
37287 ++ vpaddq $D0,$H0,$H0
37288 ++ vpaddq $D1,$H1,$H1
37289 ++ vpaddq $D2,$H2,$H2
37290 ++
37291 ++ kmovw %eax,%k3
37292 ++ vpermq \$0x2,$H3,$D3
37293 ++ vpermq \$0x2,$H4,$D4
37294 ++ vpermq \$0x2,$H0,$D0
37295 ++ vpermq \$0x2,$H1,$D1
37296 ++ vpermq \$0x2,$H2,$D2
37297 ++ vpaddq $D3,$H3,$H3
37298 ++ vpaddq $D4,$H4,$H4
37299 ++ vpaddq $D0,$H0,$H0
37300 ++ vpaddq $D1,$H1,$H1
37301 ++ vpaddq $D2,$H2,$H2
37302 ++
37303 ++ vextracti64x4 \$0x1,$H3,%y#$D3
37304 ++ vextracti64x4 \$0x1,$H4,%y#$D4
37305 ++ vextracti64x4 \$0x1,$H0,%y#$D0
37306 ++ vextracti64x4 \$0x1,$H1,%y#$D1
37307 ++ vextracti64x4 \$0x1,$H2,%y#$D2
37308 ++ vpaddq $D3,$H3,${H3}{%k3}{z} # keep single qword in case
37309 ++ vpaddq $D4,$H4,${H4}{%k3}{z} # it's passed to .Ltail_avx2
37310 ++ vpaddq $D0,$H0,${H0}{%k3}{z}
37311 ++ vpaddq $D1,$H1,${H1}{%k3}{z}
37312 ++ vpaddq $D2,$H2,${H2}{%k3}{z}
37313 ++___
37314 ++map(s/%z/%y/,($T0,$T1,$T2,$T3,$T4, $PADBIT));
37315 ++map(s/%z/%y/,($H0,$H1,$H2,$H3,$H4, $D0,$D1,$D2,$D3,$D4, $MASK));
37316 ++$code.=<<___;
37317 ++ ################################################################
37318 ++ # lazy reduction (interleaved with input splat)
37319 ++
37320 ++ vpsrlq \$26,$H3,$D3
37321 ++ vpand $MASK,$H3,$H3
37322 ++ vpsrldq \$6,$T0,$T2 # splat input
37323 ++ vpsrldq \$6,$T1,$T3
37324 ++ vpunpckhqdq $T1,$T0,$T4 # 4
37325 ++ vpaddq $D3,$H4,$H4 # h3 -> h4
37326 ++
37327 ++ vpsrlq \$26,$H0,$D0
37328 ++ vpand $MASK,$H0,$H0
37329 ++ vpunpcklqdq $T3,$T2,$T2 # 2:3
37330 ++ vpunpcklqdq $T1,$T0,$T0 # 0:1
37331 ++ vpaddq $D0,$H1,$H1 # h0 -> h1
37332 ++
37333 ++ vpsrlq \$26,$H4,$D4
37334 ++ vpand $MASK,$H4,$H4
37335 ++
37336 ++ vpsrlq \$26,$H1,$D1
37337 ++ vpand $MASK,$H1,$H1
37338 ++ vpsrlq \$30,$T2,$T3
37339 ++ vpsrlq \$4,$T2,$T2
37340 ++ vpaddq $D1,$H2,$H2 # h1 -> h2
37341 ++
37342 ++ vpaddq $D4,$H0,$H0
37343 ++ vpsllq \$2,$D4,$D4
37344 ++ vpsrlq \$26,$T0,$T1
37345 ++ vpsrlq \$40,$T4,$T4 # 4
37346 ++ vpaddq $D4,$H0,$H0 # h4 -> h0
37347 ++
37348 ++ vpsrlq \$26,$H2,$D2
37349 ++ vpand $MASK,$H2,$H2
37350 ++ vpand $MASK,$T2,$T2 # 2
37351 ++ vpand $MASK,$T0,$T0 # 0
37352 ++ vpaddq $D2,$H3,$H3 # h2 -> h3
37353 ++
37354 ++ vpsrlq \$26,$H0,$D0
37355 ++ vpand $MASK,$H0,$H0
37356 ++ vpaddq $H2,$T2,$H2 # accumulate input for .Ltail_avx2
37357 ++ vpand $MASK,$T1,$T1 # 1
37358 ++ vpaddq $D0,$H1,$H1 # h0 -> h1
37359 ++
37360 ++ vpsrlq \$26,$H3,$D3
37361 ++ vpand $MASK,$H3,$H3
37362 ++ vpand $MASK,$T3,$T3 # 3
37363 ++ vpor 32(%rcx),$T4,$T4 # padbit, yes, always
37364 ++ vpaddq $D3,$H4,$H4 # h3 -> h4
37365 ++
37366 ++ lea 0x90(%rsp),%rax # size optimization for .Ltail_avx2
37367 ++ add \$64,$len
37368 ++ jnz .Ltail_avx2
37369 ++
37370 ++ vpsubq $T2,$H2,$H2 # undo input accumulation
37371 ++ vmovd %x#$H0,`4*0-48-64`($ctx)# save partially reduced
37372 ++ vmovd %x#$H1,`4*1-48-64`($ctx)
37373 ++ vmovd %x#$H2,`4*2-48-64`($ctx)
37374 ++ vmovd %x#$H3,`4*3-48-64`($ctx)
37375 ++ vmovd %x#$H4,`4*4-48-64`($ctx)
37376 ++ vzeroall
37377 ++___
37378 ++$code.=<<___ if ($win64);
37379 ++ movdqa 0x50(%r11),%xmm6
37380 ++ movdqa 0x60(%r11),%xmm7
37381 ++ movdqa 0x70(%r11),%xmm8
37382 ++ movdqa 0x80(%r11),%xmm9
37383 ++ movdqa 0x90(%r11),%xmm10
37384 ++ movdqa 0xa0(%r11),%xmm11
37385 ++ movdqa 0xb0(%r11),%xmm12
37386 ++ movdqa 0xc0(%r11),%xmm13
37387 ++ movdqa 0xd0(%r11),%xmm14
37388 ++ movdqa 0xe0(%r11),%xmm15
37389 ++ lea 0xf8(%r11),%rsp
37390 ++.Ldo_avx512_epilogue:
37391 ++___
37392 ++$code.=<<___ if (!$win64);
37393 ++ lea 8(%r11),%rsp
37394 ++.cfi_def_cfa %rsp,8
37395 ++___
37396 ++$code.=<<___;
37397 ++ ret
37398 ++.cfi_endproc
37399 ++.size poly1305_blocks_avx512,.-poly1305_blocks_avx512
37400 ++___
37401 ++if ($avx>3) {
37402 ++########################################################################
37403 ++# VPMADD52 version using 2^44 radix.
37404 ++#
37405 ++# One can argue that base 2^52 would be more natural. Well, even though
37406 ++# some operations would be more natural, one has to recognize couple of
37407 ++# things. Base 2^52 doesn't provide advantage over base 2^44 if you look
37408 ++# at amount of multiply-n-accumulate operations. Secondly, it makes it
37409 ++# impossible to pre-compute multiples of 5 [referred to as s[]/sN in
37410 ++# reference implementations], which means that more such operations
37411 ++# would have to be performed in inner loop, which in turn makes critical
37412 ++# path longer. In other words, even though base 2^44 reduction might
37413 ++# look less elegant, overall critical path is actually shorter...
37414 ++
37415 ++########################################################################
37416 ++# Layout of opaque area is following.
37417 ++#
37418 ++# unsigned __int64 h[3]; # current hash value base 2^44
37419 ++# unsigned __int64 s[2]; # key value*20 base 2^44
37420 ++# unsigned __int64 r[3]; # key value base 2^44
37421 ++# struct { unsigned __int64 r^1, r^3, r^2, r^4; } R[4];
37422 ++# # r^n positions reflect
37423 ++# # placement in register, not
37424 ++# # memory, R[3] is R[1]*20
37425 ++
37426 ++$code.=<<___;
37427 ++.type poly1305_init_base2_44,\@function,3
37428 ++.align 32
37429 ++poly1305_init_base2_44:
37430 ++ xor %rax,%rax
37431 ++ mov %rax,0($ctx) # initialize hash value
37432 ++ mov %rax,8($ctx)
37433 ++ mov %rax,16($ctx)
37434 ++
37435 ++.Linit_base2_44:
37436 ++ lea poly1305_blocks_vpmadd52(%rip),%r10
37437 ++ lea poly1305_emit_base2_44(%rip),%r11
37438 ++
37439 ++ mov \$0x0ffffffc0fffffff,%rax
37440 ++ mov \$0x0ffffffc0ffffffc,%rcx
37441 ++ and 0($inp),%rax
37442 ++ mov \$0x00000fffffffffff,%r8
37443 ++ and 8($inp),%rcx
37444 ++ mov \$0x00000fffffffffff,%r9
37445 ++ and %rax,%r8
37446 ++ shrd \$44,%rcx,%rax
37447 ++ mov %r8,40($ctx) # r0
37448 ++ and %r9,%rax
37449 ++ shr \$24,%rcx
37450 ++ mov %rax,48($ctx) # r1
37451 ++ lea (%rax,%rax,4),%rax # *5
37452 ++ mov %rcx,56($ctx) # r2
37453 ++ shl \$2,%rax # magic <<2
37454 ++ lea (%rcx,%rcx,4),%rcx # *5
37455 ++ shl \$2,%rcx # magic <<2
37456 ++ mov %rax,24($ctx) # s1
37457 ++ mov %rcx,32($ctx) # s2
37458 ++ movq \$-1,64($ctx) # write impossible value
37459 ++___
37460 ++$code.=<<___ if ($flavour !~ /elf32/);
37461 ++ mov %r10,0(%rdx)
37462 ++ mov %r11,8(%rdx)
37463 ++___
37464 ++$code.=<<___ if ($flavour =~ /elf32/);
37465 ++ mov %r10d,0(%rdx)
37466 ++ mov %r11d,4(%rdx)
37467 ++___
37468 ++$code.=<<___;
37469 ++ mov \$1,%eax
37470 ++ ret
37471 ++.size poly1305_init_base2_44,.-poly1305_init_base2_44
37472 ++___
37473 ++{
37474 ++my ($H0,$H1,$H2,$r2r1r0,$r1r0s2,$r0s2s1,$Dlo,$Dhi) = map("%ymm$_",(0..5,16,17));
37475 ++my ($T0,$inp_permd,$inp_shift,$PAD) = map("%ymm$_",(18..21));
37476 ++my ($reduc_mask,$reduc_rght,$reduc_left) = map("%ymm$_",(22..25));
37477 ++
37478 ++$code.=<<___;
37479 ++.type poly1305_blocks_vpmadd52,\@function,4
37480 ++.align 32
37481 ++poly1305_blocks_vpmadd52:
37482 ++ shr \$4,$len
37483 ++ jz .Lno_data_vpmadd52 # too short
37484 ++
37485 ++ shl \$40,$padbit
37486 ++ mov 64($ctx),%r8 # peek on power of the key
37487 ++
37488 ++ # if powers of the key are not calculated yet, process up to 3
37489 ++ # blocks with this single-block subroutine, otherwise ensure that
37490 ++ # length is divisible by 2 blocks and pass the rest down to next
37491 ++ # subroutine...
37492 ++
37493 ++ mov \$3,%rax
37494 ++ mov \$1,%r10
37495 ++ cmp \$4,$len # is input long
37496 ++ cmovae %r10,%rax
37497 ++ test %r8,%r8 # is power value impossible?
37498 ++ cmovns %r10,%rax
37499 ++
37500 ++ and $len,%rax # is input of favourable length?
37501 ++ jz .Lblocks_vpmadd52_4x
37502 ++
37503 ++ sub %rax,$len
37504 ++ mov \$7,%r10d
37505 ++ mov \$1,%r11d
37506 ++ kmovw %r10d,%k7
37507 ++ lea .L2_44_inp_permd(%rip),%r10
37508 ++ kmovw %r11d,%k1
37509 ++
37510 ++ vmovq $padbit,%x#$PAD
37511 ++ vmovdqa64 0(%r10),$inp_permd # .L2_44_inp_permd
37512 ++ vmovdqa64 32(%r10),$inp_shift # .L2_44_inp_shift
37513 ++ vpermq \$0xcf,$PAD,$PAD
37514 ++ vmovdqa64 64(%r10),$reduc_mask # .L2_44_mask
37515 ++
37516 ++ vmovdqu64 0($ctx),${Dlo}{%k7}{z} # load hash value
37517 ++ vmovdqu64 40($ctx),${r2r1r0}{%k7}{z} # load keys
37518 ++ vmovdqu64 32($ctx),${r1r0s2}{%k7}{z}
37519 ++ vmovdqu64 24($ctx),${r0s2s1}{%k7}{z}
37520 ++
37521 ++ vmovdqa64 96(%r10),$reduc_rght # .L2_44_shift_rgt
37522 ++ vmovdqa64 128(%r10),$reduc_left # .L2_44_shift_lft
37523 ++
37524 ++ jmp .Loop_vpmadd52
37525 ++
37526 ++.align 32
37527 ++.Loop_vpmadd52:
37528 ++ vmovdqu32 0($inp),%x#$T0 # load input as ----3210
37529 ++ lea 16($inp),$inp
37530 ++
37531 ++ vpermd $T0,$inp_permd,$T0 # ----3210 -> --322110
37532 ++ vpsrlvq $inp_shift,$T0,$T0
37533 ++ vpandq $reduc_mask,$T0,$T0
37534 ++ vporq $PAD,$T0,$T0
37535 ++
37536 ++ vpaddq $T0,$Dlo,$Dlo # accumulate input
37537 ++
37538 ++ vpermq \$0,$Dlo,${H0}{%k7}{z} # smash hash value
37539 ++ vpermq \$0b01010101,$Dlo,${H1}{%k7}{z}
37540 ++ vpermq \$0b10101010,$Dlo,${H2}{%k7}{z}
37541 ++
37542 ++ vpxord $Dlo,$Dlo,$Dlo
37543 ++ vpxord $Dhi,$Dhi,$Dhi
37544 ++
37545 ++ vpmadd52luq $r2r1r0,$H0,$Dlo
37546 ++ vpmadd52huq $r2r1r0,$H0,$Dhi
37547 ++
37548 ++ vpmadd52luq $r1r0s2,$H1,$Dlo
37549 ++ vpmadd52huq $r1r0s2,$H1,$Dhi
37550 ++
37551 ++ vpmadd52luq $r0s2s1,$H2,$Dlo
37552 ++ vpmadd52huq $r0s2s1,$H2,$Dhi
37553 ++
37554 ++ vpsrlvq $reduc_rght,$Dlo,$T0 # 0 in topmost qword
37555 ++ vpsllvq $reduc_left,$Dhi,$Dhi # 0 in topmost qword
37556 ++ vpandq $reduc_mask,$Dlo,$Dlo
37557 ++
37558 ++ vpaddq $T0,$Dhi,$Dhi
37559 ++
37560 ++ vpermq \$0b10010011,$Dhi,$Dhi # 0 in lowest qword
37561 ++
37562 ++ vpaddq $Dhi,$Dlo,$Dlo # note topmost qword :-)
37563 ++
37564 ++ vpsrlvq $reduc_rght,$Dlo,$T0 # 0 in topmost word
37565 ++ vpandq $reduc_mask,$Dlo,$Dlo
37566 ++
37567 ++ vpermq \$0b10010011,$T0,$T0
37568 ++
37569 ++ vpaddq $T0,$Dlo,$Dlo
37570 ++
37571 ++ vpermq \$0b10010011,$Dlo,${T0}{%k1}{z}
37572 ++
37573 ++ vpaddq $T0,$Dlo,$Dlo
37574 ++ vpsllq \$2,$T0,$T0
37575 ++
37576 ++ vpaddq $T0,$Dlo,$Dlo
37577 ++
37578 ++ dec %rax # len-=16
37579 ++ jnz .Loop_vpmadd52
37580 ++
37581 ++ vmovdqu64 $Dlo,0($ctx){%k7} # store hash value
37582 ++
37583 ++ test $len,$len
37584 ++ jnz .Lblocks_vpmadd52_4x
37585 ++
37586 ++.Lno_data_vpmadd52:
37587 ++ ret
37588 ++.size poly1305_blocks_vpmadd52,.-poly1305_blocks_vpmadd52
37589 ++___
37590 ++}
37591 ++{
37592 ++########################################################################
37593 ++# As implied by its name 4x subroutine processes 4 blocks in parallel
37594 ++# (but handles even 4*n+2 blocks lengths). It takes up to 4th key power
37595 ++# and is handled in 256-bit %ymm registers.
37596 ++
37597 ++my ($H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2) = map("%ymm$_",(0..5,16,17));
37598 ++my ($D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi) = map("%ymm$_",(18..23));
37599 ++my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31));
37600 ++
37601 ++$code.=<<___;
37602 ++.type poly1305_blocks_vpmadd52_4x,\@function,4
37603 ++.align 32
37604 ++poly1305_blocks_vpmadd52_4x:
37605 ++ shr \$4,$len
37606 ++ jz .Lno_data_vpmadd52_4x # too short
37607 ++
37608 ++ shl \$40,$padbit
37609 ++ mov 64($ctx),%r8 # peek on power of the key
37610 ++
37611 ++.Lblocks_vpmadd52_4x:
37612 ++ vpbroadcastq $padbit,$PAD
37613 ++
37614 ++ vmovdqa64 .Lx_mask44(%rip),$mask44
37615 ++ mov \$5,%eax
37616 ++ vmovdqa64 .Lx_mask42(%rip),$mask42
37617 ++ kmovw %eax,%k1 # used in 2x path
37618 ++
37619 ++ test %r8,%r8 # is power value impossible?
37620 ++ js .Linit_vpmadd52 # if it is, then init R[4]
37621 ++
37622 ++ vmovq 0($ctx),%x#$H0 # load current hash value
37623 ++ vmovq 8($ctx),%x#$H1
37624 ++ vmovq 16($ctx),%x#$H2
37625 ++
37626 ++ test \$3,$len # is length 4*n+2?
37627 ++ jnz .Lblocks_vpmadd52_2x_do
37628 ++
37629 ++.Lblocks_vpmadd52_4x_do:
37630 ++ vpbroadcastq 64($ctx),$R0 # load 4th power of the key
37631 ++ vpbroadcastq 96($ctx),$R1
37632 ++ vpbroadcastq 128($ctx),$R2
37633 ++ vpbroadcastq 160($ctx),$S1
37634 ++
37635 ++.Lblocks_vpmadd52_4x_key_loaded:
37636 ++ vpsllq \$2,$R2,$S2 # S2 = R2*5*4
37637 ++ vpaddq $R2,$S2,$S2
37638 ++ vpsllq \$2,$S2,$S2
37639 ++
37640 ++ test \$7,$len # is len 8*n?
37641 ++ jz .Lblocks_vpmadd52_8x
37642 ++
37643 ++ vmovdqu64 16*0($inp),$T2 # load data
37644 ++ vmovdqu64 16*2($inp),$T3
37645 ++ lea 16*4($inp),$inp
37646 ++
37647 ++ vpunpcklqdq $T3,$T2,$T1 # transpose data
37648 ++ vpunpckhqdq $T3,$T2,$T3
37649 ++
37650 ++ # at this point 64-bit lanes are ordered as 3-1-2-0
37651 ++
37652 ++ vpsrlq \$24,$T3,$T2 # splat the data
37653 ++ vporq $PAD,$T2,$T2
37654 ++ vpaddq $T2,$H2,$H2 # accumulate input
37655 ++ vpandq $mask44,$T1,$T0
37656 ++ vpsrlq \$44,$T1,$T1
37657 ++ vpsllq \$20,$T3,$T3
37658 ++ vporq $T3,$T1,$T1
37659 ++ vpandq $mask44,$T1,$T1
37660 ++
37661 ++ sub \$4,$len
37662 ++ jz .Ltail_vpmadd52_4x
37663 ++ jmp .Loop_vpmadd52_4x
37664 ++ ud2
37665 ++
37666 ++.align 32
37667 ++.Linit_vpmadd52:
37668 ++ vmovq 24($ctx),%x#$S1 # load key
37669 ++ vmovq 56($ctx),%x#$H2
37670 ++ vmovq 32($ctx),%x#$S2
37671 ++ vmovq 40($ctx),%x#$R0
37672 ++ vmovq 48($ctx),%x#$R1
37673 ++
37674 ++ vmovdqa $R0,$H0
37675 ++ vmovdqa $R1,$H1
37676 ++ vmovdqa $H2,$R2
37677 ++
37678 ++ mov \$2,%eax
37679 ++
37680 ++.Lmul_init_vpmadd52:
37681 ++ vpxorq $D0lo,$D0lo,$D0lo
37682 ++ vpmadd52luq $H2,$S1,$D0lo
37683 ++ vpxorq $D0hi,$D0hi,$D0hi
37684 ++ vpmadd52huq $H2,$S1,$D0hi
37685 ++ vpxorq $D1lo,$D1lo,$D1lo
37686 ++ vpmadd52luq $H2,$S2,$D1lo
37687 ++ vpxorq $D1hi,$D1hi,$D1hi
37688 ++ vpmadd52huq $H2,$S2,$D1hi
37689 ++ vpxorq $D2lo,$D2lo,$D2lo
37690 ++ vpmadd52luq $H2,$R0,$D2lo
37691 ++ vpxorq $D2hi,$D2hi,$D2hi
37692 ++ vpmadd52huq $H2,$R0,$D2hi
37693 ++
37694 ++ vpmadd52luq $H0,$R0,$D0lo
37695 ++ vpmadd52huq $H0,$R0,$D0hi
37696 ++ vpmadd52luq $H0,$R1,$D1lo
37697 ++ vpmadd52huq $H0,$R1,$D1hi
37698 ++ vpmadd52luq $H0,$R2,$D2lo
37699 ++ vpmadd52huq $H0,$R2,$D2hi
37700 ++
37701 ++ vpmadd52luq $H1,$S2,$D0lo
37702 ++ vpmadd52huq $H1,$S2,$D0hi
37703 ++ vpmadd52luq $H1,$R0,$D1lo
37704 ++ vpmadd52huq $H1,$R0,$D1hi
37705 ++ vpmadd52luq $H1,$R1,$D2lo
37706 ++ vpmadd52huq $H1,$R1,$D2hi
37707 ++
37708 ++ ################################################################
37709 ++ # partial reduction
37710 ++ vpsrlq \$44,$D0lo,$tmp
37711 ++ vpsllq \$8,$D0hi,$D0hi
37712 ++ vpandq $mask44,$D0lo,$H0
37713 ++ vpaddq $tmp,$D0hi,$D0hi
37714 ++
37715 ++ vpaddq $D0hi,$D1lo,$D1lo
37716 ++
37717 ++ vpsrlq \$44,$D1lo,$tmp
37718 ++ vpsllq \$8,$D1hi,$D1hi
37719 ++ vpandq $mask44,$D1lo,$H1
37720 ++ vpaddq $tmp,$D1hi,$D1hi
37721 ++
37722 ++ vpaddq $D1hi,$D2lo,$D2lo
37723 ++
37724 ++ vpsrlq \$42,$D2lo,$tmp
37725 ++ vpsllq \$10,$D2hi,$D2hi
37726 ++ vpandq $mask42,$D2lo,$H2
37727 ++ vpaddq $tmp,$D2hi,$D2hi
37728 ++
37729 ++ vpaddq $D2hi,$H0,$H0
37730 ++ vpsllq \$2,$D2hi,$D2hi
37731 ++
37732 ++ vpaddq $D2hi,$H0,$H0
37733 ++
37734 ++ vpsrlq \$44,$H0,$tmp # additional step
37735 ++ vpandq $mask44,$H0,$H0
37736 ++
37737 ++ vpaddq $tmp,$H1,$H1
37738 ++
37739 ++ dec %eax
37740 ++ jz .Ldone_init_vpmadd52
37741 ++
37742 ++ vpunpcklqdq $R1,$H1,$R1 # 1,2
37743 ++ vpbroadcastq %x#$H1,%x#$H1 # 2,2
37744 ++ vpunpcklqdq $R2,$H2,$R2
37745 ++ vpbroadcastq %x#$H2,%x#$H2
37746 ++ vpunpcklqdq $R0,$H0,$R0
37747 ++ vpbroadcastq %x#$H0,%x#$H0
37748 ++
37749 ++ vpsllq \$2,$R1,$S1 # S1 = R1*5*4
37750 ++ vpsllq \$2,$R2,$S2 # S2 = R2*5*4
37751 ++ vpaddq $R1,$S1,$S1
37752 ++ vpaddq $R2,$S2,$S2
37753 ++ vpsllq \$2,$S1,$S1
37754 ++ vpsllq \$2,$S2,$S2
37755 ++
37756 ++ jmp .Lmul_init_vpmadd52
37757 ++ ud2
37758 ++
37759 ++.align 32
37760 ++.Ldone_init_vpmadd52:
37761 ++ vinserti128 \$1,%x#$R1,$H1,$R1 # 1,2,3,4
37762 ++ vinserti128 \$1,%x#$R2,$H2,$R2
37763 ++ vinserti128 \$1,%x#$R0,$H0,$R0
37764 ++
37765 ++ vpermq \$0b11011000,$R1,$R1 # 1,3,2,4
37766 ++ vpermq \$0b11011000,$R2,$R2
37767 ++ vpermq \$0b11011000,$R0,$R0
37768 ++
37769 ++ vpsllq \$2,$R1,$S1 # S1 = R1*5*4
37770 ++ vpaddq $R1,$S1,$S1
37771 ++ vpsllq \$2,$S1,$S1
37772 ++
37773 ++ vmovq 0($ctx),%x#$H0 # load current hash value
37774 ++ vmovq 8($ctx),%x#$H1
37775 ++ vmovq 16($ctx),%x#$H2
37776 ++
37777 ++ test \$3,$len # is length 4*n+2?
37778 ++ jnz .Ldone_init_vpmadd52_2x
37779 ++
37780 ++ vmovdqu64 $R0,64($ctx) # save key powers
37781 ++ vpbroadcastq %x#$R0,$R0 # broadcast 4th power
37782 ++ vmovdqu64 $R1,96($ctx)
37783 ++ vpbroadcastq %x#$R1,$R1
37784 ++ vmovdqu64 $R2,128($ctx)
37785 ++ vpbroadcastq %x#$R2,$R2
37786 ++ vmovdqu64 $S1,160($ctx)
37787 ++ vpbroadcastq %x#$S1,$S1
37788 ++
37789 ++ jmp .Lblocks_vpmadd52_4x_key_loaded
37790 ++ ud2
37791 ++
37792 ++.align 32
37793 ++.Ldone_init_vpmadd52_2x:
37794 ++ vmovdqu64 $R0,64($ctx) # save key powers
37795 ++ vpsrldq \$8,$R0,$R0 # 0-1-0-2
37796 ++ vmovdqu64 $R1,96($ctx)
37797 ++ vpsrldq \$8,$R1,$R1
37798 ++ vmovdqu64 $R2,128($ctx)
37799 ++ vpsrldq \$8,$R2,$R2
37800 ++ vmovdqu64 $S1,160($ctx)
37801 ++ vpsrldq \$8,$S1,$S1
37802 ++ jmp .Lblocks_vpmadd52_2x_key_loaded
37803 ++ ud2
37804 ++
37805 ++.align 32
37806 ++.Lblocks_vpmadd52_2x_do:
37807 ++ vmovdqu64 128+8($ctx),${R2}{%k1}{z}# load 2nd and 1st key powers
37808 ++ vmovdqu64 160+8($ctx),${S1}{%k1}{z}
37809 ++ vmovdqu64 64+8($ctx),${R0}{%k1}{z}
37810 ++ vmovdqu64 96+8($ctx),${R1}{%k1}{z}
37811 ++
37812 ++.Lblocks_vpmadd52_2x_key_loaded:
37813 ++ vmovdqu64 16*0($inp),$T2 # load data
37814 ++ vpxorq $T3,$T3,$T3
37815 ++ lea 16*2($inp),$inp
37816 ++
37817 ++ vpunpcklqdq $T3,$T2,$T1 # transpose data
37818 ++ vpunpckhqdq $T3,$T2,$T3
37819 ++
37820 ++ # at this point 64-bit lanes are ordered as x-1-x-0
37821 ++
37822 ++ vpsrlq \$24,$T3,$T2 # splat the data
37823 ++ vporq $PAD,$T2,$T2
37824 ++ vpaddq $T2,$H2,$H2 # accumulate input
37825 ++ vpandq $mask44,$T1,$T0
37826 ++ vpsrlq \$44,$T1,$T1
37827 ++ vpsllq \$20,$T3,$T3
37828 ++ vporq $T3,$T1,$T1
37829 ++ vpandq $mask44,$T1,$T1
37830 ++
37831 ++ jmp .Ltail_vpmadd52_2x
37832 ++ ud2
37833 ++
37834 ++.align 32
37835 ++.Loop_vpmadd52_4x:
37836 ++ #vpaddq $T2,$H2,$H2 # accumulate input
37837 ++ vpaddq $T0,$H0,$H0
37838 ++ vpaddq $T1,$H1,$H1
37839 ++
37840 ++ vpxorq $D0lo,$D0lo,$D0lo
37841 ++ vpmadd52luq $H2,$S1,$D0lo
37842 ++ vpxorq $D0hi,$D0hi,$D0hi
37843 ++ vpmadd52huq $H2,$S1,$D0hi
37844 ++ vpxorq $D1lo,$D1lo,$D1lo
37845 ++ vpmadd52luq $H2,$S2,$D1lo
37846 ++ vpxorq $D1hi,$D1hi,$D1hi
37847 ++ vpmadd52huq $H2,$S2,$D1hi
37848 ++ vpxorq $D2lo,$D2lo,$D2lo
37849 ++ vpmadd52luq $H2,$R0,$D2lo
37850 ++ vpxorq $D2hi,$D2hi,$D2hi
37851 ++ vpmadd52huq $H2,$R0,$D2hi
37852 ++
37853 ++ vmovdqu64 16*0($inp),$T2 # load data
37854 ++ vmovdqu64 16*2($inp),$T3
37855 ++ lea 16*4($inp),$inp
37856 ++ vpmadd52luq $H0,$R0,$D0lo
37857 ++ vpmadd52huq $H0,$R0,$D0hi
37858 ++ vpmadd52luq $H0,$R1,$D1lo
37859 ++ vpmadd52huq $H0,$R1,$D1hi
37860 ++ vpmadd52luq $H0,$R2,$D2lo
37861 ++ vpmadd52huq $H0,$R2,$D2hi
37862 ++
37863 ++ vpunpcklqdq $T3,$T2,$T1 # transpose data
37864 ++ vpunpckhqdq $T3,$T2,$T3
37865 ++ vpmadd52luq $H1,$S2,$D0lo
37866 ++ vpmadd52huq $H1,$S2,$D0hi
37867 ++ vpmadd52luq $H1,$R0,$D1lo
37868 ++ vpmadd52huq $H1,$R0,$D1hi
37869 ++ vpmadd52luq $H1,$R1,$D2lo
37870 ++ vpmadd52huq $H1,$R1,$D2hi
37871 ++
37872 ++ ################################################################
37873 ++ # partial reduction (interleaved with data splat)
37874 ++ vpsrlq \$44,$D0lo,$tmp
37875 ++ vpsllq \$8,$D0hi,$D0hi
37876 ++ vpandq $mask44,$D0lo,$H0
37877 ++ vpaddq $tmp,$D0hi,$D0hi
37878 ++
37879 ++ vpsrlq \$24,$T3,$T2
37880 ++ vporq $PAD,$T2,$T2
37881 ++ vpaddq $D0hi,$D1lo,$D1lo
37882 ++
37883 ++ vpsrlq \$44,$D1lo,$tmp
37884 ++ vpsllq \$8,$D1hi,$D1hi
37885 ++ vpandq $mask44,$D1lo,$H1
37886 ++ vpaddq $tmp,$D1hi,$D1hi
37887 ++
37888 ++ vpandq $mask44,$T1,$T0
37889 ++ vpsrlq \$44,$T1,$T1
37890 ++ vpsllq \$20,$T3,$T3
37891 ++ vpaddq $D1hi,$D2lo,$D2lo
37892 ++
37893 ++ vpsrlq \$42,$D2lo,$tmp
37894 ++ vpsllq \$10,$D2hi,$D2hi
37895 ++ vpandq $mask42,$D2lo,$H2
37896 ++ vpaddq $tmp,$D2hi,$D2hi
37897 ++
37898 ++ vpaddq $T2,$H2,$H2 # accumulate input
37899 ++ vpaddq $D2hi,$H0,$H0
37900 ++ vpsllq \$2,$D2hi,$D2hi
37901 ++
37902 ++ vpaddq $D2hi,$H0,$H0
37903 ++ vporq $T3,$T1,$T1
37904 ++ vpandq $mask44,$T1,$T1
37905 ++
37906 ++ vpsrlq \$44,$H0,$tmp # additional step
37907 ++ vpandq $mask44,$H0,$H0
37908 ++
37909 ++ vpaddq $tmp,$H1,$H1
37910 ++
37911 ++ sub \$4,$len # len-=64
37912 ++ jnz .Loop_vpmadd52_4x
37913 ++
37914 ++.Ltail_vpmadd52_4x:
37915 ++ vmovdqu64 128($ctx),$R2 # load all key powers
37916 ++ vmovdqu64 160($ctx),$S1
37917 ++ vmovdqu64 64($ctx),$R0
37918 ++ vmovdqu64 96($ctx),$R1
37919 ++
37920 ++.Ltail_vpmadd52_2x:
37921 ++ vpsllq \$2,$R2,$S2 # S2 = R2*5*4
37922 ++ vpaddq $R2,$S2,$S2
37923 ++ vpsllq \$2,$S2,$S2
37924 ++
37925 ++ #vpaddq $T2,$H2,$H2 # accumulate input
37926 ++ vpaddq $T0,$H0,$H0
37927 ++ vpaddq $T1,$H1,$H1
37928 ++
37929 ++ vpxorq $D0lo,$D0lo,$D0lo
37930 ++ vpmadd52luq $H2,$S1,$D0lo
37931 ++ vpxorq $D0hi,$D0hi,$D0hi
37932 ++ vpmadd52huq $H2,$S1,$D0hi
37933 ++ vpxorq $D1lo,$D1lo,$D1lo
37934 ++ vpmadd52luq $H2,$S2,$D1lo
37935 ++ vpxorq $D1hi,$D1hi,$D1hi
37936 ++ vpmadd52huq $H2,$S2,$D1hi
37937 ++ vpxorq $D2lo,$D2lo,$D2lo
37938 ++ vpmadd52luq $H2,$R0,$D2lo
37939 ++ vpxorq $D2hi,$D2hi,$D2hi
37940 ++ vpmadd52huq $H2,$R0,$D2hi
37941 ++
37942 ++ vpmadd52luq $H0,$R0,$D0lo
37943 ++ vpmadd52huq $H0,$R0,$D0hi
37944 ++ vpmadd52luq $H0,$R1,$D1lo
37945 ++ vpmadd52huq $H0,$R1,$D1hi
37946 ++ vpmadd52luq $H0,$R2,$D2lo
37947 ++ vpmadd52huq $H0,$R2,$D2hi
37948 ++
37949 ++ vpmadd52luq $H1,$S2,$D0lo
37950 ++ vpmadd52huq $H1,$S2,$D0hi
37951 ++ vpmadd52luq $H1,$R0,$D1lo
37952 ++ vpmadd52huq $H1,$R0,$D1hi
37953 ++ vpmadd52luq $H1,$R1,$D2lo
37954 ++ vpmadd52huq $H1,$R1,$D2hi
37955 ++
37956 ++ ################################################################
37957 ++ # horizontal addition
37958 ++
37959 ++ mov \$1,%eax
37960 ++ kmovw %eax,%k1
37961 ++ vpsrldq \$8,$D0lo,$T0
37962 ++ vpsrldq \$8,$D0hi,$H0
37963 ++ vpsrldq \$8,$D1lo,$T1
37964 ++ vpsrldq \$8,$D1hi,$H1
37965 ++ vpaddq $T0,$D0lo,$D0lo
37966 ++ vpaddq $H0,$D0hi,$D0hi
37967 ++ vpsrldq \$8,$D2lo,$T2
37968 ++ vpsrldq \$8,$D2hi,$H2
37969 ++ vpaddq $T1,$D1lo,$D1lo
37970 ++ vpaddq $H1,$D1hi,$D1hi
37971 ++ vpermq \$0x2,$D0lo,$T0
37972 ++ vpermq \$0x2,$D0hi,$H0
37973 ++ vpaddq $T2,$D2lo,$D2lo
37974 ++ vpaddq $H2,$D2hi,$D2hi
37975 ++
37976 ++ vpermq \$0x2,$D1lo,$T1
37977 ++ vpermq \$0x2,$D1hi,$H1
37978 ++ vpaddq $T0,$D0lo,${D0lo}{%k1}{z}
37979 ++ vpaddq $H0,$D0hi,${D0hi}{%k1}{z}
37980 ++ vpermq \$0x2,$D2lo,$T2
37981 ++ vpermq \$0x2,$D2hi,$H2
37982 ++ vpaddq $T1,$D1lo,${D1lo}{%k1}{z}
37983 ++ vpaddq $H1,$D1hi,${D1hi}{%k1}{z}
37984 ++ vpaddq $T2,$D2lo,${D2lo}{%k1}{z}
37985 ++ vpaddq $H2,$D2hi,${D2hi}{%k1}{z}
37986 ++
37987 ++ ################################################################
37988 ++ # partial reduction
37989 ++ vpsrlq \$44,$D0lo,$tmp
37990 ++ vpsllq \$8,$D0hi,$D0hi
37991 ++ vpandq $mask44,$D0lo,$H0
37992 ++ vpaddq $tmp,$D0hi,$D0hi
37993 ++
37994 ++ vpaddq $D0hi,$D1lo,$D1lo
37995 ++
37996 ++ vpsrlq \$44,$D1lo,$tmp
37997 ++ vpsllq \$8,$D1hi,$D1hi
37998 ++ vpandq $mask44,$D1lo,$H1
37999 ++ vpaddq $tmp,$D1hi,$D1hi
38000 ++
38001 ++ vpaddq $D1hi,$D2lo,$D2lo
38002 ++
38003 ++ vpsrlq \$42,$D2lo,$tmp
38004 ++ vpsllq \$10,$D2hi,$D2hi
38005 ++ vpandq $mask42,$D2lo,$H2
38006 ++ vpaddq $tmp,$D2hi,$D2hi
38007 ++
38008 ++ vpaddq $D2hi,$H0,$H0
38009 ++ vpsllq \$2,$D2hi,$D2hi
38010 ++
38011 ++ vpaddq $D2hi,$H0,$H0
38012 ++
38013 ++ vpsrlq \$44,$H0,$tmp # additional step
38014 ++ vpandq $mask44,$H0,$H0
38015 ++
38016 ++ vpaddq $tmp,$H1,$H1
38017 ++ # at this point $len is
38018 ++ # either 4*n+2 or 0...
38019 ++ sub \$2,$len # len-=32
38020 ++ ja .Lblocks_vpmadd52_4x_do
38021 ++
38022 ++ vmovq %x#$H0,0($ctx)
38023 ++ vmovq %x#$H1,8($ctx)
38024 ++ vmovq %x#$H2,16($ctx)
38025 ++ vzeroall
38026 ++
38027 ++.Lno_data_vpmadd52_4x:
38028 ++ ret
38029 ++.size poly1305_blocks_vpmadd52_4x,.-poly1305_blocks_vpmadd52_4x
38030 ++___
38031 ++}
38032 ++{
38033 ++########################################################################
38034 ++# As implied by its name 8x subroutine processes 8 blocks in parallel...
38035 ++# This is intermediate version, as it's used only in cases when input
38036 ++# length is either 8*n, 8*n+1 or 8*n+2...
38037 ++
38038 ++my ($H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2) = map("%ymm$_",(0..5,16,17));
38039 ++my ($D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi) = map("%ymm$_",(18..23));
38040 ++my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31));
38041 ++my ($RR0,$RR1,$RR2,$SS1,$SS2) = map("%ymm$_",(6..10));
38042 ++
38043 ++$code.=<<___;
38044 ++.type poly1305_blocks_vpmadd52_8x,\@function,4
38045 ++.align 32
38046 ++poly1305_blocks_vpmadd52_8x:
38047 ++ shr \$4,$len
38048 ++ jz .Lno_data_vpmadd52_8x # too short
38049 ++
38050 ++ shl \$40,$padbit
38051 ++ mov 64($ctx),%r8 # peek on power of the key
38052 ++
38053 ++ vmovdqa64 .Lx_mask44(%rip),$mask44
38054 ++ vmovdqa64 .Lx_mask42(%rip),$mask42
38055 ++
38056 ++ test %r8,%r8 # is power value impossible?
38057 ++ js .Linit_vpmadd52 # if it is, then init R[4]
38058 ++
38059 ++ vmovq 0($ctx),%x#$H0 # load current hash value
38060 ++ vmovq 8($ctx),%x#$H1
38061 ++ vmovq 16($ctx),%x#$H2
38062 ++
38063 ++.Lblocks_vpmadd52_8x:
38064 ++ ################################################################
38065 ++ # fist we calculate more key powers
38066 ++
38067 ++ vmovdqu64 128($ctx),$R2 # load 1-3-2-4 powers
38068 ++ vmovdqu64 160($ctx),$S1
38069 ++ vmovdqu64 64($ctx),$R0
38070 ++ vmovdqu64 96($ctx),$R1
38071 ++
38072 ++ vpsllq \$2,$R2,$S2 # S2 = R2*5*4
38073 ++ vpaddq $R2,$S2,$S2
38074 ++ vpsllq \$2,$S2,$S2
38075 ++
38076 ++ vpbroadcastq %x#$R2,$RR2 # broadcast 4th power
38077 ++ vpbroadcastq %x#$R0,$RR0
38078 ++ vpbroadcastq %x#$R1,$RR1
38079 ++
38080 ++ vpxorq $D0lo,$D0lo,$D0lo
38081 ++ vpmadd52luq $RR2,$S1,$D0lo
38082 ++ vpxorq $D0hi,$D0hi,$D0hi
38083 ++ vpmadd52huq $RR2,$S1,$D0hi
38084 ++ vpxorq $D1lo,$D1lo,$D1lo
38085 ++ vpmadd52luq $RR2,$S2,$D1lo
38086 ++ vpxorq $D1hi,$D1hi,$D1hi
38087 ++ vpmadd52huq $RR2,$S2,$D1hi
38088 ++ vpxorq $D2lo,$D2lo,$D2lo
38089 ++ vpmadd52luq $RR2,$R0,$D2lo
38090 ++ vpxorq $D2hi,$D2hi,$D2hi
38091 ++ vpmadd52huq $RR2,$R0,$D2hi
38092 ++
38093 ++ vpmadd52luq $RR0,$R0,$D0lo
38094 ++ vpmadd52huq $RR0,$R0,$D0hi
38095 ++ vpmadd52luq $RR0,$R1,$D1lo
38096 ++ vpmadd52huq $RR0,$R1,$D1hi
38097 ++ vpmadd52luq $RR0,$R2,$D2lo
38098 ++ vpmadd52huq $RR0,$R2,$D2hi
38099 ++
38100 ++ vpmadd52luq $RR1,$S2,$D0lo
38101 ++ vpmadd52huq $RR1,$S2,$D0hi
38102 ++ vpmadd52luq $RR1,$R0,$D1lo
38103 ++ vpmadd52huq $RR1,$R0,$D1hi
38104 ++ vpmadd52luq $RR1,$R1,$D2lo
38105 ++ vpmadd52huq $RR1,$R1,$D2hi
38106 ++
38107 ++ ################################################################
38108 ++ # partial reduction
38109 ++ vpsrlq \$44,$D0lo,$tmp
38110 ++ vpsllq \$8,$D0hi,$D0hi
38111 ++ vpandq $mask44,$D0lo,$RR0
38112 ++ vpaddq $tmp,$D0hi,$D0hi
38113 ++
38114 ++ vpaddq $D0hi,$D1lo,$D1lo
38115 ++
38116 ++ vpsrlq \$44,$D1lo,$tmp
38117 ++ vpsllq \$8,$D1hi,$D1hi
38118 ++ vpandq $mask44,$D1lo,$RR1
38119 ++ vpaddq $tmp,$D1hi,$D1hi
38120 ++
38121 ++ vpaddq $D1hi,$D2lo,$D2lo
38122 ++
38123 ++ vpsrlq \$42,$D2lo,$tmp
38124 ++ vpsllq \$10,$D2hi,$D2hi
38125 ++ vpandq $mask42,$D2lo,$RR2
38126 ++ vpaddq $tmp,$D2hi,$D2hi
38127 ++
38128 ++ vpaddq $D2hi,$RR0,$RR0
38129 ++ vpsllq \$2,$D2hi,$D2hi
38130 ++
38131 ++ vpaddq $D2hi,$RR0,$RR0
38132 ++
38133 ++ vpsrlq \$44,$RR0,$tmp # additional step
38134 ++ vpandq $mask44,$RR0,$RR0
38135 ++
38136 ++ vpaddq $tmp,$RR1,$RR1
38137 ++
38138 ++ ################################################################
38139 ++ # At this point Rx holds 1324 powers, RRx - 5768, and the goal
38140 ++ # is 15263748, which reflects how data is loaded...
38141 ++
38142 ++ vpunpcklqdq $R2,$RR2,$T2 # 3748
38143 ++ vpunpckhqdq $R2,$RR2,$R2 # 1526
38144 ++ vpunpcklqdq $R0,$RR0,$T0
38145 ++ vpunpckhqdq $R0,$RR0,$R0
38146 ++ vpunpcklqdq $R1,$RR1,$T1
38147 ++ vpunpckhqdq $R1,$RR1,$R1
38148 ++___
38149 ++######## switch to %zmm
38150 ++map(s/%y/%z/, $H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2);
38151 ++map(s/%y/%z/, $D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi);
38152 ++map(s/%y/%z/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD);
38153 ++map(s/%y/%z/, $RR0,$RR1,$RR2,$SS1,$SS2);
38154 ++
38155 ++$code.=<<___;
38156 ++ vshufi64x2 \$0x44,$R2,$T2,$RR2 # 15263748
38157 ++ vshufi64x2 \$0x44,$R0,$T0,$RR0
38158 ++ vshufi64x2 \$0x44,$R1,$T1,$RR1
38159 ++
38160 ++ vmovdqu64 16*0($inp),$T2 # load data
38161 ++ vmovdqu64 16*4($inp),$T3
38162 ++ lea 16*8($inp),$inp
38163 ++
38164 ++ vpsllq \$2,$RR2,$SS2 # S2 = R2*5*4
38165 ++ vpsllq \$2,$RR1,$SS1 # S1 = R1*5*4
38166 ++ vpaddq $RR2,$SS2,$SS2
38167 ++ vpaddq $RR1,$SS1,$SS1
38168 ++ vpsllq \$2,$SS2,$SS2
38169 ++ vpsllq \$2,$SS1,$SS1
38170 ++
38171 ++ vpbroadcastq $padbit,$PAD
38172 ++ vpbroadcastq %x#$mask44,$mask44
38173 ++ vpbroadcastq %x#$mask42,$mask42
38174 ++
38175 ++ vpbroadcastq %x#$SS1,$S1 # broadcast 8th power
38176 ++ vpbroadcastq %x#$SS2,$S2
38177 ++ vpbroadcastq %x#$RR0,$R0
38178 ++ vpbroadcastq %x#$RR1,$R1
38179 ++ vpbroadcastq %x#$RR2,$R2
38180 ++
38181 ++ vpunpcklqdq $T3,$T2,$T1 # transpose data
38182 ++ vpunpckhqdq $T3,$T2,$T3
38183 ++
38184 ++ # at this point 64-bit lanes are ordered as 73625140
38185 ++
38186 ++ vpsrlq \$24,$T3,$T2 # splat the data
38187 ++ vporq $PAD,$T2,$T2
38188 ++ vpaddq $T2,$H2,$H2 # accumulate input
38189 ++ vpandq $mask44,$T1,$T0
38190 ++ vpsrlq \$44,$T1,$T1
38191 ++ vpsllq \$20,$T3,$T3
38192 ++ vporq $T3,$T1,$T1
38193 ++ vpandq $mask44,$T1,$T1
38194 ++
38195 ++ sub \$8,$len
38196 ++ jz .Ltail_vpmadd52_8x
38197 ++ jmp .Loop_vpmadd52_8x
38198 ++
38199 ++.align 32
38200 ++.Loop_vpmadd52_8x:
38201 ++ #vpaddq $T2,$H2,$H2 # accumulate input
38202 ++ vpaddq $T0,$H0,$H0
38203 ++ vpaddq $T1,$H1,$H1
38204 ++
38205 ++ vpxorq $D0lo,$D0lo,$D0lo
38206 ++ vpmadd52luq $H2,$S1,$D0lo
38207 ++ vpxorq $D0hi,$D0hi,$D0hi
38208 ++ vpmadd52huq $H2,$S1,$D0hi
38209 ++ vpxorq $D1lo,$D1lo,$D1lo
38210 ++ vpmadd52luq $H2,$S2,$D1lo
38211 ++ vpxorq $D1hi,$D1hi,$D1hi
38212 ++ vpmadd52huq $H2,$S2,$D1hi
38213 ++ vpxorq $D2lo,$D2lo,$D2lo
38214 ++ vpmadd52luq $H2,$R0,$D2lo
38215 ++ vpxorq $D2hi,$D2hi,$D2hi
38216 ++ vpmadd52huq $H2,$R0,$D2hi
38217 ++
38218 ++ vmovdqu64 16*0($inp),$T2 # load data
38219 ++ vmovdqu64 16*4($inp),$T3
38220 ++ lea 16*8($inp),$inp
38221 ++ vpmadd52luq $H0,$R0,$D0lo
38222 ++ vpmadd52huq $H0,$R0,$D0hi
38223 ++ vpmadd52luq $H0,$R1,$D1lo
38224 ++ vpmadd52huq $H0,$R1,$D1hi
38225 ++ vpmadd52luq $H0,$R2,$D2lo
38226 ++ vpmadd52huq $H0,$R2,$D2hi
38227 ++
38228 ++ vpunpcklqdq $T3,$T2,$T1 # transpose data
38229 ++ vpunpckhqdq $T3,$T2,$T3
38230 ++ vpmadd52luq $H1,$S2,$D0lo
38231 ++ vpmadd52huq $H1,$S2,$D0hi
38232 ++ vpmadd52luq $H1,$R0,$D1lo
38233 ++ vpmadd52huq $H1,$R0,$D1hi
38234 ++ vpmadd52luq $H1,$R1,$D2lo
38235 ++ vpmadd52huq $H1,$R1,$D2hi
38236 ++
38237 ++ ################################################################
38238 ++ # partial reduction (interleaved with data splat)
38239 ++ vpsrlq \$44,$D0lo,$tmp
38240 ++ vpsllq \$8,$D0hi,$D0hi
38241 ++ vpandq $mask44,$D0lo,$H0
38242 ++ vpaddq $tmp,$D0hi,$D0hi
38243 ++
38244 ++ vpsrlq \$24,$T3,$T2
38245 ++ vporq $PAD,$T2,$T2
38246 ++ vpaddq $D0hi,$D1lo,$D1lo
38247 ++
38248 ++ vpsrlq \$44,$D1lo,$tmp
38249 ++ vpsllq \$8,$D1hi,$D1hi
38250 ++ vpandq $mask44,$D1lo,$H1
38251 ++ vpaddq $tmp,$D1hi,$D1hi
38252 ++
38253 ++ vpandq $mask44,$T1,$T0
38254 ++ vpsrlq \$44,$T1,$T1
38255 ++ vpsllq \$20,$T3,$T3
38256 ++ vpaddq $D1hi,$D2lo,$D2lo
38257 ++
38258 ++ vpsrlq \$42,$D2lo,$tmp
38259 ++ vpsllq \$10,$D2hi,$D2hi
38260 ++ vpandq $mask42,$D2lo,$H2
38261 ++ vpaddq $tmp,$D2hi,$D2hi
38262 ++
38263 ++ vpaddq $T2,$H2,$H2 # accumulate input
38264 ++ vpaddq $D2hi,$H0,$H0
38265 ++ vpsllq \$2,$D2hi,$D2hi
38266 ++
38267 ++ vpaddq $D2hi,$H0,$H0
38268 ++ vporq $T3,$T1,$T1
38269 ++ vpandq $mask44,$T1,$T1
38270 ++
38271 ++ vpsrlq \$44,$H0,$tmp # additional step
38272 ++ vpandq $mask44,$H0,$H0
38273 ++
38274 ++ vpaddq $tmp,$H1,$H1
38275 ++
38276 ++ sub \$8,$len # len-=128
38277 ++ jnz .Loop_vpmadd52_8x
38278 ++
38279 ++.Ltail_vpmadd52_8x:
38280 ++ #vpaddq $T2,$H2,$H2 # accumulate input
38281 ++ vpaddq $T0,$H0,$H0
38282 ++ vpaddq $T1,$H1,$H1
38283 ++
38284 ++ vpxorq $D0lo,$D0lo,$D0lo
38285 ++ vpmadd52luq $H2,$SS1,$D0lo
38286 ++ vpxorq $D0hi,$D0hi,$D0hi
38287 ++ vpmadd52huq $H2,$SS1,$D0hi
38288 ++ vpxorq $D1lo,$D1lo,$D1lo
38289 ++ vpmadd52luq $H2,$SS2,$D1lo
38290 ++ vpxorq $D1hi,$D1hi,$D1hi
38291 ++ vpmadd52huq $H2,$SS2,$D1hi
38292 ++ vpxorq $D2lo,$D2lo,$D2lo
38293 ++ vpmadd52luq $H2,$RR0,$D2lo
38294 ++ vpxorq $D2hi,$D2hi,$D2hi
38295 ++ vpmadd52huq $H2,$RR0,$D2hi
38296 ++
38297 ++ vpmadd52luq $H0,$RR0,$D0lo
38298 ++ vpmadd52huq $H0,$RR0,$D0hi
38299 ++ vpmadd52luq $H0,$RR1,$D1lo
38300 ++ vpmadd52huq $H0,$RR1,$D1hi
38301 ++ vpmadd52luq $H0,$RR2,$D2lo
38302 ++ vpmadd52huq $H0,$RR2,$D2hi
38303 ++
38304 ++ vpmadd52luq $H1,$SS2,$D0lo
38305 ++ vpmadd52huq $H1,$SS2,$D0hi
38306 ++ vpmadd52luq $H1,$RR0,$D1lo
38307 ++ vpmadd52huq $H1,$RR0,$D1hi
38308 ++ vpmadd52luq $H1,$RR1,$D2lo
38309 ++ vpmadd52huq $H1,$RR1,$D2hi
38310 ++
38311 ++ ################################################################
38312 ++ # horizontal addition
38313 ++
38314 ++ mov \$1,%eax
38315 ++ kmovw %eax,%k1
38316 ++ vpsrldq \$8,$D0lo,$T0
38317 ++ vpsrldq \$8,$D0hi,$H0
38318 ++ vpsrldq \$8,$D1lo,$T1
38319 ++ vpsrldq \$8,$D1hi,$H1
38320 ++ vpaddq $T0,$D0lo,$D0lo
38321 ++ vpaddq $H0,$D0hi,$D0hi
38322 ++ vpsrldq \$8,$D2lo,$T2
38323 ++ vpsrldq \$8,$D2hi,$H2
38324 ++ vpaddq $T1,$D1lo,$D1lo
38325 ++ vpaddq $H1,$D1hi,$D1hi
38326 ++ vpermq \$0x2,$D0lo,$T0
38327 ++ vpermq \$0x2,$D0hi,$H0
38328 ++ vpaddq $T2,$D2lo,$D2lo
38329 ++ vpaddq $H2,$D2hi,$D2hi
38330 ++
38331 ++ vpermq \$0x2,$D1lo,$T1
38332 ++ vpermq \$0x2,$D1hi,$H1
38333 ++ vpaddq $T0,$D0lo,$D0lo
38334 ++ vpaddq $H0,$D0hi,$D0hi
38335 ++ vpermq \$0x2,$D2lo,$T2
38336 ++ vpermq \$0x2,$D2hi,$H2
38337 ++ vpaddq $T1,$D1lo,$D1lo
38338 ++ vpaddq $H1,$D1hi,$D1hi
38339 ++ vextracti64x4 \$1,$D0lo,%y#$T0
38340 ++ vextracti64x4 \$1,$D0hi,%y#$H0
38341 ++ vpaddq $T2,$D2lo,$D2lo
38342 ++ vpaddq $H2,$D2hi,$D2hi
38343 ++
38344 ++ vextracti64x4 \$1,$D1lo,%y#$T1
38345 ++ vextracti64x4 \$1,$D1hi,%y#$H1
38346 ++ vextracti64x4 \$1,$D2lo,%y#$T2
38347 ++ vextracti64x4 \$1,$D2hi,%y#$H2
38348 ++___
38349 ++######## switch back to %ymm
38350 ++map(s/%z/%y/, $H0,$H1,$H2,$R0,$R1,$R2,$S1,$S2);
38351 ++map(s/%z/%y/, $D0lo,$D0hi,$D1lo,$D1hi,$D2lo,$D2hi);
38352 ++map(s/%z/%y/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD);
38353 ++
38354 ++$code.=<<___;
38355 ++ vpaddq $T0,$D0lo,${D0lo}{%k1}{z}
38356 ++ vpaddq $H0,$D0hi,${D0hi}{%k1}{z}
38357 ++ vpaddq $T1,$D1lo,${D1lo}{%k1}{z}
38358 ++ vpaddq $H1,$D1hi,${D1hi}{%k1}{z}
38359 ++ vpaddq $T2,$D2lo,${D2lo}{%k1}{z}
38360 ++ vpaddq $H2,$D2hi,${D2hi}{%k1}{z}
38361 ++
38362 ++ ################################################################
38363 ++ # partial reduction
38364 ++ vpsrlq \$44,$D0lo,$tmp
38365 ++ vpsllq \$8,$D0hi,$D0hi
38366 ++ vpandq $mask44,$D0lo,$H0
38367 ++ vpaddq $tmp,$D0hi,$D0hi
38368 ++
38369 ++ vpaddq $D0hi,$D1lo,$D1lo
38370 ++
38371 ++ vpsrlq \$44,$D1lo,$tmp
38372 ++ vpsllq \$8,$D1hi,$D1hi
38373 ++ vpandq $mask44,$D1lo,$H1
38374 ++ vpaddq $tmp,$D1hi,$D1hi
38375 ++
38376 ++ vpaddq $D1hi,$D2lo,$D2lo
38377 ++
38378 ++ vpsrlq \$42,$D2lo,$tmp
38379 ++ vpsllq \$10,$D2hi,$D2hi
38380 ++ vpandq $mask42,$D2lo,$H2
38381 ++ vpaddq $tmp,$D2hi,$D2hi
38382 ++
38383 ++ vpaddq $D2hi,$H0,$H0
38384 ++ vpsllq \$2,$D2hi,$D2hi
38385 ++
38386 ++ vpaddq $D2hi,$H0,$H0
38387 ++
38388 ++ vpsrlq \$44,$H0,$tmp # additional step
38389 ++ vpandq $mask44,$H0,$H0
38390 ++
38391 ++ vpaddq $tmp,$H1,$H1
38392 ++
38393 ++ ################################################################
38394 ++
38395 ++ vmovq %x#$H0,0($ctx)
38396 ++ vmovq %x#$H1,8($ctx)
38397 ++ vmovq %x#$H2,16($ctx)
38398 ++ vzeroall
38399 ++
38400 ++.Lno_data_vpmadd52_8x:
38401 ++ ret
38402 ++.size poly1305_blocks_vpmadd52_8x,.-poly1305_blocks_vpmadd52_8x
38403 ++___
38404 ++}
38405 ++$code.=<<___;
38406 ++.type poly1305_emit_base2_44,\@function,3
38407 ++.align 32
38408 ++poly1305_emit_base2_44:
38409 ++ mov 0($ctx),%r8 # load hash value
38410 ++ mov 8($ctx),%r9
38411 ++ mov 16($ctx),%r10
38412 ++
38413 ++ mov %r9,%rax
38414 ++ shr \$20,%r9
38415 ++ shl \$44,%rax
38416 ++ mov %r10,%rcx
38417 ++ shr \$40,%r10
38418 ++ shl \$24,%rcx
38419 ++
38420 ++ add %rax,%r8
38421 ++ adc %rcx,%r9
38422 ++ adc \$0,%r10
38423 ++
38424 ++ mov %r8,%rax
38425 ++ add \$5,%r8 # compare to modulus
38426 ++ mov %r9,%rcx
38427 ++ adc \$0,%r9
38428 ++ adc \$0,%r10
38429 ++ shr \$2,%r10 # did 130-bit value overflow?
38430 ++ cmovnz %r8,%rax
38431 ++ cmovnz %r9,%rcx
38432 ++
38433 ++ add 0($nonce),%rax # accumulate nonce
38434 ++ adc 8($nonce),%rcx
38435 ++ mov %rax,0($mac) # write result
38436 ++ mov %rcx,8($mac)
38437 ++
38438 ++ ret
38439 ++.size poly1305_emit_base2_44,.-poly1305_emit_base2_44
38440 ++___
38441 ++} } }
38442 ++$code.=<<___;
38443 ++.align 64
38444 ++.Lconst:
38445 ++.Lmask24:
38446 ++.long 0x0ffffff,0,0x0ffffff,0,0x0ffffff,0,0x0ffffff,0
38447 ++.L129:
38448 ++.long `1<<24`,0,`1<<24`,0,`1<<24`,0,`1<<24`,0
38449 ++.Lmask26:
38450 ++.long 0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,0
38451 ++.Lpermd_avx2:
38452 ++.long 2,2,2,3,2,0,2,1
38453 ++.Lpermd_avx512:
38454 ++.long 0,0,0,1, 0,2,0,3, 0,4,0,5, 0,6,0,7
38455 ++
38456 ++.L2_44_inp_permd:
38457 ++.long 0,1,1,2,2,3,7,7
38458 ++.L2_44_inp_shift:
38459 ++.quad 0,12,24,64
38460 ++.L2_44_mask:
38461 ++.quad 0xfffffffffff,0xfffffffffff,0x3ffffffffff,0xffffffffffffffff
38462 ++.L2_44_shift_rgt:
38463 ++.quad 44,44,42,64
38464 ++.L2_44_shift_lft:
38465 ++.quad 8,8,10,64
38466 ++
38467 ++.align 64
38468 ++.Lx_mask44:
38469 ++.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff
38470 ++.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff
38471 ++.Lx_mask42:
38472 ++.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff
38473 ++.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff
38474 ++___
38475 ++}
38476 ++$code.=<<___;
38477 ++.asciz "Poly1305 for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
38478 ++.align 16
38479 ++___
38480 ++
38481 ++{ # chacha20-poly1305 helpers
38482 ++my ($out,$inp,$otp,$len)=$win64 ? ("%rcx","%rdx","%r8", "%r9") : # Win64 order
38483 ++ ("%rdi","%rsi","%rdx","%rcx"); # Unix order
38484 ++$code.=<<___;
38485 ++.globl xor128_encrypt_n_pad
38486 ++.type xor128_encrypt_n_pad,\@abi-omnipotent
38487 ++.align 16
38488 ++xor128_encrypt_n_pad:
38489 ++ sub $otp,$inp
38490 ++ sub $otp,$out
38491 ++ mov $len,%r10 # put len aside
38492 ++ shr \$4,$len # len / 16
38493 ++ jz .Ltail_enc
38494 ++ nop
38495 ++.Loop_enc_xmm:
38496 ++ movdqu ($inp,$otp),%xmm0
38497 ++ pxor ($otp),%xmm0
38498 ++ movdqu %xmm0,($out,$otp)
38499 ++ movdqa %xmm0,($otp)
38500 ++ lea 16($otp),$otp
38501 ++ dec $len
38502 ++ jnz .Loop_enc_xmm
38503 ++
38504 ++ and \$15,%r10 # len % 16
38505 ++ jz .Ldone_enc
38506 ++
38507 ++.Ltail_enc:
38508 ++ mov \$16,$len
38509 ++ sub %r10,$len
38510 ++ xor %eax,%eax
38511 ++.Loop_enc_byte:
38512 ++ mov ($inp,$otp),%al
38513 ++ xor ($otp),%al
38514 ++ mov %al,($out,$otp)
38515 ++ mov %al,($otp)
38516 ++ lea 1($otp),$otp
38517 ++ dec %r10
38518 ++ jnz .Loop_enc_byte
38519 ++
38520 ++ xor %eax,%eax
38521 ++.Loop_enc_pad:
38522 ++ mov %al,($otp)
38523 ++ lea 1($otp),$otp
38524 ++ dec $len
38525 ++ jnz .Loop_enc_pad
38526 ++
38527 ++.Ldone_enc:
38528 ++ mov $otp,%rax
38529 ++ ret
38530 ++.size xor128_encrypt_n_pad,.-xor128_encrypt_n_pad
38531 ++
38532 ++.globl xor128_decrypt_n_pad
38533 ++.type xor128_decrypt_n_pad,\@abi-omnipotent
38534 ++.align 16
38535 ++xor128_decrypt_n_pad:
38536 ++ sub $otp,$inp
38537 ++ sub $otp,$out
38538 ++ mov $len,%r10 # put len aside
38539 ++ shr \$4,$len # len / 16
38540 ++ jz .Ltail_dec
38541 ++ nop
38542 ++.Loop_dec_xmm:
38543 ++ movdqu ($inp,$otp),%xmm0
38544 ++ movdqa ($otp),%xmm1
38545 ++ pxor %xmm0,%xmm1
38546 ++ movdqu %xmm1,($out,$otp)
38547 ++ movdqa %xmm0,($otp)
38548 ++ lea 16($otp),$otp
38549 ++ dec $len
38550 ++ jnz .Loop_dec_xmm
38551 ++
38552 ++ pxor %xmm1,%xmm1
38553 ++ and \$15,%r10 # len % 16
38554 ++ jz .Ldone_dec
38555 ++
38556 ++.Ltail_dec:
38557 ++ mov \$16,$len
38558 ++ sub %r10,$len
38559 ++ xor %eax,%eax
38560 ++ xor %r11,%r11
38561 ++.Loop_dec_byte:
38562 ++ mov ($inp,$otp),%r11b
38563 ++ mov ($otp),%al
38564 ++ xor %r11b,%al
38565 ++ mov %al,($out,$otp)
38566 ++ mov %r11b,($otp)
38567 ++ lea 1($otp),$otp
38568 ++ dec %r10
38569 ++ jnz .Loop_dec_byte
38570 ++
38571 ++ xor %eax,%eax
38572 ++.Loop_dec_pad:
38573 ++ mov %al,($otp)
38574 ++ lea 1($otp),$otp
38575 ++ dec $len
38576 ++ jnz .Loop_dec_pad
38577 ++
38578 ++.Ldone_dec:
38579 ++ mov $otp,%rax
38580 ++ ret
38581 ++.size xor128_decrypt_n_pad,.-xor128_decrypt_n_pad
38582 ++___
38583 ++}
38584 ++
38585 ++# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
38586 ++# CONTEXT *context,DISPATCHER_CONTEXT *disp)
38587 ++if ($win64) {
38588 ++$rec="%rcx";
38589 ++$frame="%rdx";
38590 ++$context="%r8";
38591 ++$disp="%r9";
38592 ++
38593 ++$code.=<<___;
38594 ++.extern __imp_RtlVirtualUnwind
38595 ++.type se_handler,\@abi-omnipotent
38596 ++.align 16
38597 ++se_handler:
38598 ++ push %rsi
38599 ++ push %rdi
38600 ++ push %rbx
38601 ++ push %rbp
38602 ++ push %r12
38603 ++ push %r13
38604 ++ push %r14
38605 ++ push %r15
38606 ++ pushfq
38607 ++ sub \$64,%rsp
38608 ++
38609 ++ mov 120($context),%rax # pull context->Rax
38610 ++ mov 248($context),%rbx # pull context->Rip
38611 ++
38612 ++ mov 8($disp),%rsi # disp->ImageBase
38613 ++ mov 56($disp),%r11 # disp->HandlerData
38614 ++
38615 ++ mov 0(%r11),%r10d # HandlerData[0]
38616 ++ lea (%rsi,%r10),%r10 # prologue label
38617 ++ cmp %r10,%rbx # context->Rip<.Lprologue
38618 ++ jb .Lcommon_seh_tail
38619 ++
38620 ++ mov 152($context),%rax # pull context->Rsp
38621 ++
38622 ++ mov 4(%r11),%r10d # HandlerData[1]
38623 ++ lea (%rsi,%r10),%r10 # epilogue label
38624 ++ cmp %r10,%rbx # context->Rip>=.Lepilogue
38625 ++ jae .Lcommon_seh_tail
38626 ++
38627 ++ lea 48(%rax),%rax
38628 ++
38629 ++ mov -8(%rax),%rbx
38630 ++ mov -16(%rax),%rbp
38631 ++ mov -24(%rax),%r12
38632 ++ mov -32(%rax),%r13
38633 ++ mov -40(%rax),%r14
38634 ++ mov -48(%rax),%r15
38635 ++ mov %rbx,144($context) # restore context->Rbx
38636 ++ mov %rbp,160($context) # restore context->Rbp
38637 ++ mov %r12,216($context) # restore context->R12
38638 ++ mov %r13,224($context) # restore context->R13
38639 ++ mov %r14,232($context) # restore context->R14
38640 ++ mov %r15,240($context) # restore context->R14
38641 ++
38642 ++ jmp .Lcommon_seh_tail
38643 ++.size se_handler,.-se_handler
38644 ++
38645 ++.type avx_handler,\@abi-omnipotent
38646 ++.align 16
38647 ++avx_handler:
38648 ++ push %rsi
38649 ++ push %rdi
38650 ++ push %rbx
38651 ++ push %rbp
38652 ++ push %r12
38653 ++ push %r13
38654 ++ push %r14
38655 ++ push %r15
38656 ++ pushfq
38657 ++ sub \$64,%rsp
38658 ++
38659 ++ mov 120($context),%rax # pull context->Rax
38660 ++ mov 248($context),%rbx # pull context->Rip
38661 ++
38662 ++ mov 8($disp),%rsi # disp->ImageBase
38663 ++ mov 56($disp),%r11 # disp->HandlerData
38664 ++
38665 ++ mov 0(%r11),%r10d # HandlerData[0]
38666 ++ lea (%rsi,%r10),%r10 # prologue label
38667 ++ cmp %r10,%rbx # context->Rip<prologue label
38668 ++ jb .Lcommon_seh_tail
38669 ++
38670 ++ mov 152($context),%rax # pull context->Rsp
38671 ++
38672 ++ mov 4(%r11),%r10d # HandlerData[1]
38673 ++ lea (%rsi,%r10),%r10 # epilogue label
38674 ++ cmp %r10,%rbx # context->Rip>=epilogue label
38675 ++ jae .Lcommon_seh_tail
38676 ++
38677 ++ mov 208($context),%rax # pull context->R11
38678 ++
38679 ++ lea 0x50(%rax),%rsi
38680 ++ lea 0xf8(%rax),%rax
38681 ++ lea 512($context),%rdi # &context.Xmm6
38682 ++ mov \$20,%ecx
38683 ++ .long 0xa548f3fc # cld; rep movsq
38684 ++
38685 ++.Lcommon_seh_tail:
38686 ++ mov 8(%rax),%rdi
38687 ++ mov 16(%rax),%rsi
38688 ++ mov %rax,152($context) # restore context->Rsp
38689 ++ mov %rsi,168($context) # restore context->Rsi
38690 ++ mov %rdi,176($context) # restore context->Rdi
38691 ++
38692 ++ mov 40($disp),%rdi # disp->ContextRecord
38693 ++ mov $context,%rsi # context
38694 ++ mov \$154,%ecx # sizeof(CONTEXT)
38695 ++ .long 0xa548f3fc # cld; rep movsq
38696 ++
38697 ++ mov $disp,%rsi
38698 ++ xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
38699 ++ mov 8(%rsi),%rdx # arg2, disp->ImageBase
38700 ++ mov 0(%rsi),%r8 # arg3, disp->ControlPc
38701 ++ mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
38702 ++ mov 40(%rsi),%r10 # disp->ContextRecord
38703 ++ lea 56(%rsi),%r11 # &disp->HandlerData
38704 ++ lea 24(%rsi),%r12 # &disp->EstablisherFrame
38705 ++ mov %r10,32(%rsp) # arg5
38706 ++ mov %r11,40(%rsp) # arg6
38707 ++ mov %r12,48(%rsp) # arg7
38708 ++ mov %rcx,56(%rsp) # arg8, (NULL)
38709 ++ call *__imp_RtlVirtualUnwind(%rip)
38710 ++
38711 ++ mov \$1,%eax # ExceptionContinueSearch
38712 ++ add \$64,%rsp
38713 ++ popfq
38714 ++ pop %r15
38715 ++ pop %r14
38716 ++ pop %r13
38717 ++ pop %r12
38718 ++ pop %rbp
38719 ++ pop %rbx
38720 ++ pop %rdi
38721 ++ pop %rsi
38722 ++ ret
38723 ++.size avx_handler,.-avx_handler
38724 ++
38725 ++.section .pdata
38726 ++.align 4
38727 ++ .rva .LSEH_begin_poly1305_init
38728 ++ .rva .LSEH_end_poly1305_init
38729 ++ .rva .LSEH_info_poly1305_init
38730 ++
38731 ++ .rva .LSEH_begin_poly1305_blocks
38732 ++ .rva .LSEH_end_poly1305_blocks
38733 ++ .rva .LSEH_info_poly1305_blocks
38734 ++
38735 ++ .rva .LSEH_begin_poly1305_emit
38736 ++ .rva .LSEH_end_poly1305_emit
38737 ++ .rva .LSEH_info_poly1305_emit
38738 ++___
38739 ++$code.=<<___ if ($avx);
38740 ++ .rva .LSEH_begin_poly1305_blocks_avx
38741 ++ .rva .Lbase2_64_avx
38742 ++ .rva .LSEH_info_poly1305_blocks_avx_1
38743 ++
38744 ++ .rva .Lbase2_64_avx
38745 ++ .rva .Leven_avx
38746 ++ .rva .LSEH_info_poly1305_blocks_avx_2
38747 ++
38748 ++ .rva .Leven_avx
38749 ++ .rva .LSEH_end_poly1305_blocks_avx
38750 ++ .rva .LSEH_info_poly1305_blocks_avx_3
38751 ++
38752 ++ .rva .LSEH_begin_poly1305_emit_avx
38753 ++ .rva .LSEH_end_poly1305_emit_avx
38754 ++ .rva .LSEH_info_poly1305_emit_avx
38755 ++___
38756 ++$code.=<<___ if ($avx>1);
38757 ++ .rva .LSEH_begin_poly1305_blocks_avx2
38758 ++ .rva .Lbase2_64_avx2
38759 ++ .rva .LSEH_info_poly1305_blocks_avx2_1
38760 ++
38761 ++ .rva .Lbase2_64_avx2
38762 ++ .rva .Leven_avx2
38763 ++ .rva .LSEH_info_poly1305_blocks_avx2_2
38764 ++
38765 ++ .rva .Leven_avx2
38766 ++ .rva .LSEH_end_poly1305_blocks_avx2
38767 ++ .rva .LSEH_info_poly1305_blocks_avx2_3
38768 ++___
38769 ++$code.=<<___ if ($avx>2);
38770 ++ .rva .LSEH_begin_poly1305_blocks_avx512
38771 ++ .rva .LSEH_end_poly1305_blocks_avx512
38772 ++ .rva .LSEH_info_poly1305_blocks_avx512
38773 ++___
38774 ++$code.=<<___;
38775 ++.section .xdata
38776 ++.align 8
38777 ++.LSEH_info_poly1305_init:
38778 ++ .byte 9,0,0,0
38779 ++ .rva se_handler
38780 ++ .rva .LSEH_begin_poly1305_init,.LSEH_begin_poly1305_init
38781 ++
38782 ++.LSEH_info_poly1305_blocks:
38783 ++ .byte 9,0,0,0
38784 ++ .rva se_handler
38785 ++ .rva .Lblocks_body,.Lblocks_epilogue
38786 ++
38787 ++.LSEH_info_poly1305_emit:
38788 ++ .byte 9,0,0,0
38789 ++ .rva se_handler
38790 ++ .rva .LSEH_begin_poly1305_emit,.LSEH_begin_poly1305_emit
38791 ++___
38792 ++$code.=<<___ if ($avx);
38793 ++.LSEH_info_poly1305_blocks_avx_1:
38794 ++ .byte 9,0,0,0
38795 ++ .rva se_handler
38796 ++ .rva .Lblocks_avx_body,.Lblocks_avx_epilogue # HandlerData[]
38797 ++
38798 ++.LSEH_info_poly1305_blocks_avx_2:
38799 ++ .byte 9,0,0,0
38800 ++ .rva se_handler
38801 ++ .rva .Lbase2_64_avx_body,.Lbase2_64_avx_epilogue # HandlerData[]
38802 ++
38803 ++.LSEH_info_poly1305_blocks_avx_3:
38804 ++ .byte 9,0,0,0
38805 ++ .rva avx_handler
38806 ++ .rva .Ldo_avx_body,.Ldo_avx_epilogue # HandlerData[]
38807 ++
38808 ++.LSEH_info_poly1305_emit_avx:
38809 ++ .byte 9,0,0,0
38810 ++ .rva se_handler
38811 ++ .rva .LSEH_begin_poly1305_emit_avx,.LSEH_begin_poly1305_emit_avx
38812 ++___
38813 ++$code.=<<___ if ($avx>1);
38814 ++.LSEH_info_poly1305_blocks_avx2_1:
38815 ++ .byte 9,0,0,0
38816 ++ .rva se_handler
38817 ++ .rva .Lblocks_avx2_body,.Lblocks_avx2_epilogue # HandlerData[]
38818 ++
38819 ++.LSEH_info_poly1305_blocks_avx2_2:
38820 ++ .byte 9,0,0,0
38821 ++ .rva se_handler
38822 ++ .rva .Lbase2_64_avx2_body,.Lbase2_64_avx2_epilogue # HandlerData[]
38823 ++
38824 ++.LSEH_info_poly1305_blocks_avx2_3:
38825 ++ .byte 9,0,0,0
38826 ++ .rva avx_handler
38827 ++ .rva .Ldo_avx2_body,.Ldo_avx2_epilogue # HandlerData[]
38828 ++___
38829 ++$code.=<<___ if ($avx>2);
38830 ++.LSEH_info_poly1305_blocks_avx512:
38831 ++ .byte 9,0,0,0
38832 ++ .rva avx_handler
38833 ++ .rva .Ldo_avx512_body,.Ldo_avx512_epilogue # HandlerData[]
38834 ++___
38835 ++}
38836 ++
38837 ++foreach (split('\n',$code)) {
38838 ++ s/\`([^\`]*)\`/eval($1)/ge;
38839 ++ s/%r([a-z]+)#d/%e$1/g;
38840 ++ s/%r([0-9]+)#d/%r$1d/g;
38841 ++ s/%x#%[yz]/%x/g or s/%y#%z/%y/g or s/%z#%[yz]/%z/g;
38842 ++
38843 ++ print $_,"\n";
38844 ++}
38845 ++close STDOUT;
38846 +--
38847 +cgit v1.2.3-4-ga26e
38848 +
38849 +
38850 +From db58c15848d5db42ed3c6e07bb74f7195dc70535 Mon Sep 17 00:00:00 2001
38851 +From: "Jason A. Donenfeld" <Jason@×××××.com>
38852 +Date: Sun, 5 Jan 2020 22:40:48 -0500
38853 +Subject: crypto: x86/poly1305 - wire up faster implementations for kernel
38854 +
38855 +commit d7d7b853566254648df59f7ea27ea05952a6cfa8 upstream.
38856 +
38857 +These x86_64 vectorized implementations support AVX, AVX-2, and AVX512F.
38858 +The AVX-512F implementation is disabled on Skylake, due to throttling,
38859 +but it is quite fast on >= Cannonlake.
38860 +
38861 +On the left is cycle counts on a Core i7 6700HQ using the AVX-2
38862 +codepath, comparing this implementation ("new") to the implementation in
38863 +the current crypto api ("old"). On the right are benchmarks on a Xeon
38864 +Gold 5120 using the AVX-512 codepath. The new implementation is faster
38865 +on all benchmarks.
38866 +
38867 + AVX-2 AVX-512
38868 + --------- -----------
38869 +
38870 + size old new size old new
38871 + ---- ---- ---- ---- ---- ----
38872 + 0 70 68 0 74 70
38873 + 16 92 90 16 96 92
38874 + 32 134 104 32 136 106
38875 + 48 172 120 48 184 124
38876 + 64 218 136 64 218 138
38877 + 80 254 158 80 260 160
38878 + 96 298 174 96 300 176
38879 + 112 342 192 112 342 194
38880 + 128 388 212 128 384 212
38881 + 144 428 228 144 420 226
38882 + 160 466 246 160 464 248
38883 + 176 510 264 176 504 264
38884 + 192 550 282 192 544 282
38885 + 208 594 302 208 582 300
38886 + 224 628 316 224 624 318
38887 + 240 676 334 240 662 338
38888 + 256 716 354 256 708 358
38889 + 272 764 374 272 748 372
38890 + 288 802 352 288 788 358
38891 + 304 420 366 304 422 370
38892 + 320 428 360 320 432 364
38893 + 336 484 378 336 486 380
38894 + 352 426 384 352 434 390
38895 + 368 478 400 368 480 408
38896 + 384 488 394 384 490 398
38897 + 400 542 408 400 542 412
38898 + 416 486 416 416 492 426
38899 + 432 534 430 432 538 436
38900 + 448 544 422 448 546 432
38901 + 464 600 438 464 600 448
38902 + 480 540 448 480 548 456
38903 + 496 594 464 496 594 476
38904 + 512 602 456 512 606 470
38905 + 528 656 476 528 656 480
38906 + 544 600 480 544 606 498
38907 + 560 650 494 560 652 512
38908 + 576 664 490 576 662 508
38909 + 592 714 508 592 716 522
38910 + 608 656 514 608 664 538
38911 + 624 708 532 624 710 552
38912 + 640 716 524 640 720 516
38913 + 656 770 536 656 772 526
38914 + 672 716 548 672 722 544
38915 + 688 770 562 688 768 556
38916 + 704 774 552 704 778 556
38917 + 720 826 568 720 832 568
38918 + 736 768 574 736 780 584
38919 + 752 822 592 752 826 600
38920 + 768 830 584 768 836 560
38921 + 784 884 602 784 888 572
38922 + 800 828 610 800 838 588
38923 + 816 884 628 816 884 604
38924 + 832 888 618 832 894 598
38925 + 848 942 632 848 946 612
38926 + 864 884 644 864 896 628
38927 + 880 936 660 880 942 644
38928 + 896 948 652 896 952 608
38929 + 912 1000 664 912 1004 616
38930 + 928 942 676 928 954 634
38931 + 944 994 690 944 1000 646
38932 + 960 1002 680 960 1008 646
38933 + 976 1054 694 976 1062 658
38934 + 992 1002 706 992 1012 674
38935 + 1008 1052 720 1008 1058 690
38936 +
38937 +This commit wires in the prior implementation from Andy, and makes the
38938 +following changes to be suitable for kernel land.
38939 +
38940 + - Some cosmetic and structural changes, like renaming labels to
38941 + .Lname, constants, and other Linux conventions, as well as making
38942 + the code easy for us to maintain moving forward.
38943 +
38944 + - CPU feature checking is done in C by the glue code.
38945 +
38946 + - We avoid jumping into the middle of functions, to appease objtool,
38947 + and instead parameterize shared code.
38948 +
38949 + - We maintain frame pointers so that stack traces make sense.
38950 +
38951 + - We remove the dependency on the perl xlate code, which transforms
38952 + the output into things that assemblers we don't care about use.
38953 +
38954 +Importantly, none of our changes affect the arithmetic or core code, but
38955 +just involve the differing environment of kernel space.
38956 +
38957 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
38958 +Signed-off-by: Samuel Neves <sneves@××××××.pt>
38959 +Co-developed-by: Samuel Neves <sneves@××××××.pt>
38960 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
38961 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
38962 +---
38963 + arch/x86/crypto/.gitignore | 1 +
38964 + arch/x86/crypto/Makefile | 11 +-
38965 + arch/x86/crypto/poly1305-avx2-x86_64.S | 390 ---------------
38966 + arch/x86/crypto/poly1305-sse2-x86_64.S | 590 ----------------------
38967 + arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 682 +++++++++++++++-----------
38968 + arch/x86/crypto/poly1305_glue.c | 473 +++++++-----------
38969 + lib/crypto/Kconfig | 2 +-
38970 + 7 files changed, 572 insertions(+), 1577 deletions(-)
38971 + create mode 100644 arch/x86/crypto/.gitignore
38972 + delete mode 100644 arch/x86/crypto/poly1305-avx2-x86_64.S
38973 + delete mode 100644 arch/x86/crypto/poly1305-sse2-x86_64.S
38974 +
38975 +diff --git a/arch/x86/crypto/.gitignore b/arch/x86/crypto/.gitignore
38976 +new file mode 100644
38977 +index 000000000000..c406ea6571fa
38978 +--- /dev/null
38979 ++++ b/arch/x86/crypto/.gitignore
38980 +@@ -0,0 +1 @@
38981 ++poly1305-x86_64.S
38982 +diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
38983 +index 958440eae27e..b69e00bf20b8 100644
38984 +--- a/arch/x86/crypto/Makefile
38985 ++++ b/arch/x86/crypto/Makefile
38986 +@@ -73,6 +73,10 @@ aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o
38987 +
38988 + nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o
38989 + blake2s-x86_64-y := blake2s-core.o blake2s-glue.o
38990 ++poly1305-x86_64-y := poly1305-x86_64-cryptogams.o poly1305_glue.o
38991 ++ifneq ($(CONFIG_CRYPTO_POLY1305_X86_64),)
38992 ++targets += poly1305-x86_64-cryptogams.S
38993 ++endif
38994 +
38995 + ifeq ($(avx_supported),yes)
38996 + camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
38997 +@@ -101,10 +105,8 @@ aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
38998 + aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o
38999 + ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
39000 + sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
39001 +-poly1305-x86_64-y := poly1305-sse2-x86_64.o poly1305_glue.o
39002 + ifeq ($(avx2_supported),yes)
39003 + sha1-ssse3-y += sha1_avx2_x86_64_asm.o
39004 +-poly1305-x86_64-y += poly1305-avx2-x86_64.o
39005 + endif
39006 + ifeq ($(sha1_ni_supported),yes)
39007 + sha1-ssse3-y += sha1_ni_asm.o
39008 +@@ -118,3 +120,8 @@ sha256-ssse3-y += sha256_ni_asm.o
39009 + endif
39010 + sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
39011 + crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o
39012 ++
39013 ++quiet_cmd_perlasm = PERLASM $@
39014 ++ cmd_perlasm = $(PERL) $< > $@
39015 ++$(obj)/%.S: $(src)/%.pl FORCE
39016 ++ $(call if_changed,perlasm)
39017 +diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S
39018 +deleted file mode 100644
39019 +index 1688fb551070..000000000000
39020 +--- a/arch/x86/crypto/poly1305-avx2-x86_64.S
39021 ++++ /dev/null
39022 +@@ -1,390 +0,0 @@
39023 +-/* SPDX-License-Identifier: GPL-2.0-or-later */
39024 +-/*
39025 +- * Poly1305 authenticator algorithm, RFC7539, x64 AVX2 functions
39026 +- *
39027 +- * Copyright (C) 2015 Martin Willi
39028 +- */
39029 +-
39030 +-#include <linux/linkage.h>
39031 +-
39032 +-.section .rodata.cst32.ANMASK, "aM", @progbits, 32
39033 +-.align 32
39034 +-ANMASK: .octa 0x0000000003ffffff0000000003ffffff
39035 +- .octa 0x0000000003ffffff0000000003ffffff
39036 +-
39037 +-.section .rodata.cst32.ORMASK, "aM", @progbits, 32
39038 +-.align 32
39039 +-ORMASK: .octa 0x00000000010000000000000001000000
39040 +- .octa 0x00000000010000000000000001000000
39041 +-
39042 +-.text
39043 +-
39044 +-#define h0 0x00(%rdi)
39045 +-#define h1 0x04(%rdi)
39046 +-#define h2 0x08(%rdi)
39047 +-#define h3 0x0c(%rdi)
39048 +-#define h4 0x10(%rdi)
39049 +-#define r0 0x00(%rdx)
39050 +-#define r1 0x04(%rdx)
39051 +-#define r2 0x08(%rdx)
39052 +-#define r3 0x0c(%rdx)
39053 +-#define r4 0x10(%rdx)
39054 +-#define u0 0x00(%r8)
39055 +-#define u1 0x04(%r8)
39056 +-#define u2 0x08(%r8)
39057 +-#define u3 0x0c(%r8)
39058 +-#define u4 0x10(%r8)
39059 +-#define w0 0x18(%r8)
39060 +-#define w1 0x1c(%r8)
39061 +-#define w2 0x20(%r8)
39062 +-#define w3 0x24(%r8)
39063 +-#define w4 0x28(%r8)
39064 +-#define y0 0x30(%r8)
39065 +-#define y1 0x34(%r8)
39066 +-#define y2 0x38(%r8)
39067 +-#define y3 0x3c(%r8)
39068 +-#define y4 0x40(%r8)
39069 +-#define m %rsi
39070 +-#define hc0 %ymm0
39071 +-#define hc1 %ymm1
39072 +-#define hc2 %ymm2
39073 +-#define hc3 %ymm3
39074 +-#define hc4 %ymm4
39075 +-#define hc0x %xmm0
39076 +-#define hc1x %xmm1
39077 +-#define hc2x %xmm2
39078 +-#define hc3x %xmm3
39079 +-#define hc4x %xmm4
39080 +-#define t1 %ymm5
39081 +-#define t2 %ymm6
39082 +-#define t1x %xmm5
39083 +-#define t2x %xmm6
39084 +-#define ruwy0 %ymm7
39085 +-#define ruwy1 %ymm8
39086 +-#define ruwy2 %ymm9
39087 +-#define ruwy3 %ymm10
39088 +-#define ruwy4 %ymm11
39089 +-#define ruwy0x %xmm7
39090 +-#define ruwy1x %xmm8
39091 +-#define ruwy2x %xmm9
39092 +-#define ruwy3x %xmm10
39093 +-#define ruwy4x %xmm11
39094 +-#define svxz1 %ymm12
39095 +-#define svxz2 %ymm13
39096 +-#define svxz3 %ymm14
39097 +-#define svxz4 %ymm15
39098 +-#define d0 %r9
39099 +-#define d1 %r10
39100 +-#define d2 %r11
39101 +-#define d3 %r12
39102 +-#define d4 %r13
39103 +-
39104 +-ENTRY(poly1305_4block_avx2)
39105 +- # %rdi: Accumulator h[5]
39106 +- # %rsi: 64 byte input block m
39107 +- # %rdx: Poly1305 key r[5]
39108 +- # %rcx: Quadblock count
39109 +- # %r8: Poly1305 derived key r^2 u[5], r^3 w[5], r^4 y[5],
39110 +-
39111 +- # This four-block variant uses loop unrolled block processing. It
39112 +- # requires 4 Poly1305 keys: r, r^2, r^3 and r^4:
39113 +- # h = (h + m) * r => h = (h + m1) * r^4 + m2 * r^3 + m3 * r^2 + m4 * r
39114 +-
39115 +- vzeroupper
39116 +- push %rbx
39117 +- push %r12
39118 +- push %r13
39119 +-
39120 +- # combine r0,u0,w0,y0
39121 +- vmovd y0,ruwy0x
39122 +- vmovd w0,t1x
39123 +- vpunpcklqdq t1,ruwy0,ruwy0
39124 +- vmovd u0,t1x
39125 +- vmovd r0,t2x
39126 +- vpunpcklqdq t2,t1,t1
39127 +- vperm2i128 $0x20,t1,ruwy0,ruwy0
39128 +-
39129 +- # combine r1,u1,w1,y1 and s1=r1*5,v1=u1*5,x1=w1*5,z1=y1*5
39130 +- vmovd y1,ruwy1x
39131 +- vmovd w1,t1x
39132 +- vpunpcklqdq t1,ruwy1,ruwy1
39133 +- vmovd u1,t1x
39134 +- vmovd r1,t2x
39135 +- vpunpcklqdq t2,t1,t1
39136 +- vperm2i128 $0x20,t1,ruwy1,ruwy1
39137 +- vpslld $2,ruwy1,svxz1
39138 +- vpaddd ruwy1,svxz1,svxz1
39139 +-
39140 +- # combine r2,u2,w2,y2 and s2=r2*5,v2=u2*5,x2=w2*5,z2=y2*5
39141 +- vmovd y2,ruwy2x
39142 +- vmovd w2,t1x
39143 +- vpunpcklqdq t1,ruwy2,ruwy2
39144 +- vmovd u2,t1x
39145 +- vmovd r2,t2x
39146 +- vpunpcklqdq t2,t1,t1
39147 +- vperm2i128 $0x20,t1,ruwy2,ruwy2
39148 +- vpslld $2,ruwy2,svxz2
39149 +- vpaddd ruwy2,svxz2,svxz2
39150 +-
39151 +- # combine r3,u3,w3,y3 and s3=r3*5,v3=u3*5,x3=w3*5,z3=y3*5
39152 +- vmovd y3,ruwy3x
39153 +- vmovd w3,t1x
39154 +- vpunpcklqdq t1,ruwy3,ruwy3
39155 +- vmovd u3,t1x
39156 +- vmovd r3,t2x
39157 +- vpunpcklqdq t2,t1,t1
39158 +- vperm2i128 $0x20,t1,ruwy3,ruwy3
39159 +- vpslld $2,ruwy3,svxz3
39160 +- vpaddd ruwy3,svxz3,svxz3
39161 +-
39162 +- # combine r4,u4,w4,y4 and s4=r4*5,v4=u4*5,x4=w4*5,z4=y4*5
39163 +- vmovd y4,ruwy4x
39164 +- vmovd w4,t1x
39165 +- vpunpcklqdq t1,ruwy4,ruwy4
39166 +- vmovd u4,t1x
39167 +- vmovd r4,t2x
39168 +- vpunpcklqdq t2,t1,t1
39169 +- vperm2i128 $0x20,t1,ruwy4,ruwy4
39170 +- vpslld $2,ruwy4,svxz4
39171 +- vpaddd ruwy4,svxz4,svxz4
39172 +-
39173 +-.Ldoblock4:
39174 +- # hc0 = [m[48-51] & 0x3ffffff, m[32-35] & 0x3ffffff,
39175 +- # m[16-19] & 0x3ffffff, m[ 0- 3] & 0x3ffffff + h0]
39176 +- vmovd 0x00(m),hc0x
39177 +- vmovd 0x10(m),t1x
39178 +- vpunpcklqdq t1,hc0,hc0
39179 +- vmovd 0x20(m),t1x
39180 +- vmovd 0x30(m),t2x
39181 +- vpunpcklqdq t2,t1,t1
39182 +- vperm2i128 $0x20,t1,hc0,hc0
39183 +- vpand ANMASK(%rip),hc0,hc0
39184 +- vmovd h0,t1x
39185 +- vpaddd t1,hc0,hc0
39186 +- # hc1 = [(m[51-54] >> 2) & 0x3ffffff, (m[35-38] >> 2) & 0x3ffffff,
39187 +- # (m[19-22] >> 2) & 0x3ffffff, (m[ 3- 6] >> 2) & 0x3ffffff + h1]
39188 +- vmovd 0x03(m),hc1x
39189 +- vmovd 0x13(m),t1x
39190 +- vpunpcklqdq t1,hc1,hc1
39191 +- vmovd 0x23(m),t1x
39192 +- vmovd 0x33(m),t2x
39193 +- vpunpcklqdq t2,t1,t1
39194 +- vperm2i128 $0x20,t1,hc1,hc1
39195 +- vpsrld $2,hc1,hc1
39196 +- vpand ANMASK(%rip),hc1,hc1
39197 +- vmovd h1,t1x
39198 +- vpaddd t1,hc1,hc1
39199 +- # hc2 = [(m[54-57] >> 4) & 0x3ffffff, (m[38-41] >> 4) & 0x3ffffff,
39200 +- # (m[22-25] >> 4) & 0x3ffffff, (m[ 6- 9] >> 4) & 0x3ffffff + h2]
39201 +- vmovd 0x06(m),hc2x
39202 +- vmovd 0x16(m),t1x
39203 +- vpunpcklqdq t1,hc2,hc2
39204 +- vmovd 0x26(m),t1x
39205 +- vmovd 0x36(m),t2x
39206 +- vpunpcklqdq t2,t1,t1
39207 +- vperm2i128 $0x20,t1,hc2,hc2
39208 +- vpsrld $4,hc2,hc2
39209 +- vpand ANMASK(%rip),hc2,hc2
39210 +- vmovd h2,t1x
39211 +- vpaddd t1,hc2,hc2
39212 +- # hc3 = [(m[57-60] >> 6) & 0x3ffffff, (m[41-44] >> 6) & 0x3ffffff,
39213 +- # (m[25-28] >> 6) & 0x3ffffff, (m[ 9-12] >> 6) & 0x3ffffff + h3]
39214 +- vmovd 0x09(m),hc3x
39215 +- vmovd 0x19(m),t1x
39216 +- vpunpcklqdq t1,hc3,hc3
39217 +- vmovd 0x29(m),t1x
39218 +- vmovd 0x39(m),t2x
39219 +- vpunpcklqdq t2,t1,t1
39220 +- vperm2i128 $0x20,t1,hc3,hc3
39221 +- vpsrld $6,hc3,hc3
39222 +- vpand ANMASK(%rip),hc3,hc3
39223 +- vmovd h3,t1x
39224 +- vpaddd t1,hc3,hc3
39225 +- # hc4 = [(m[60-63] >> 8) | (1<<24), (m[44-47] >> 8) | (1<<24),
39226 +- # (m[28-31] >> 8) | (1<<24), (m[12-15] >> 8) | (1<<24) + h4]
39227 +- vmovd 0x0c(m),hc4x
39228 +- vmovd 0x1c(m),t1x
39229 +- vpunpcklqdq t1,hc4,hc4
39230 +- vmovd 0x2c(m),t1x
39231 +- vmovd 0x3c(m),t2x
39232 +- vpunpcklqdq t2,t1,t1
39233 +- vperm2i128 $0x20,t1,hc4,hc4
39234 +- vpsrld $8,hc4,hc4
39235 +- vpor ORMASK(%rip),hc4,hc4
39236 +- vmovd h4,t1x
39237 +- vpaddd t1,hc4,hc4
39238 +-
39239 +- # t1 = [ hc0[3] * r0, hc0[2] * u0, hc0[1] * w0, hc0[0] * y0 ]
39240 +- vpmuludq hc0,ruwy0,t1
39241 +- # t1 += [ hc1[3] * s4, hc1[2] * v4, hc1[1] * x4, hc1[0] * z4 ]
39242 +- vpmuludq hc1,svxz4,t2
39243 +- vpaddq t2,t1,t1
39244 +- # t1 += [ hc2[3] * s3, hc2[2] * v3, hc2[1] * x3, hc2[0] * z3 ]
39245 +- vpmuludq hc2,svxz3,t2
39246 +- vpaddq t2,t1,t1
39247 +- # t1 += [ hc3[3] * s2, hc3[2] * v2, hc3[1] * x2, hc3[0] * z2 ]
39248 +- vpmuludq hc3,svxz2,t2
39249 +- vpaddq t2,t1,t1
39250 +- # t1 += [ hc4[3] * s1, hc4[2] * v1, hc4[1] * x1, hc4[0] * z1 ]
39251 +- vpmuludq hc4,svxz1,t2
39252 +- vpaddq t2,t1,t1
39253 +- # d0 = t1[0] + t1[1] + t[2] + t[3]
39254 +- vpermq $0xee,t1,t2
39255 +- vpaddq t2,t1,t1
39256 +- vpsrldq $8,t1,t2
39257 +- vpaddq t2,t1,t1
39258 +- vmovq t1x,d0
39259 +-
39260 +- # t1 = [ hc0[3] * r1, hc0[2] * u1,hc0[1] * w1, hc0[0] * y1 ]
39261 +- vpmuludq hc0,ruwy1,t1
39262 +- # t1 += [ hc1[3] * r0, hc1[2] * u0, hc1[1] * w0, hc1[0] * y0 ]
39263 +- vpmuludq hc1,ruwy0,t2
39264 +- vpaddq t2,t1,t1
39265 +- # t1 += [ hc2[3] * s4, hc2[2] * v4, hc2[1] * x4, hc2[0] * z4 ]
39266 +- vpmuludq hc2,svxz4,t2
39267 +- vpaddq t2,t1,t1
39268 +- # t1 += [ hc3[3] * s3, hc3[2] * v3, hc3[1] * x3, hc3[0] * z3 ]
39269 +- vpmuludq hc3,svxz3,t2
39270 +- vpaddq t2,t1,t1
39271 +- # t1 += [ hc4[3] * s2, hc4[2] * v2, hc4[1] * x2, hc4[0] * z2 ]
39272 +- vpmuludq hc4,svxz2,t2
39273 +- vpaddq t2,t1,t1
39274 +- # d1 = t1[0] + t1[1] + t1[3] + t1[4]
39275 +- vpermq $0xee,t1,t2
39276 +- vpaddq t2,t1,t1
39277 +- vpsrldq $8,t1,t2
39278 +- vpaddq t2,t1,t1
39279 +- vmovq t1x,d1
39280 +-
39281 +- # t1 = [ hc0[3] * r2, hc0[2] * u2, hc0[1] * w2, hc0[0] * y2 ]
39282 +- vpmuludq hc0,ruwy2,t1
39283 +- # t1 += [ hc1[3] * r1, hc1[2] * u1, hc1[1] * w1, hc1[0] * y1 ]
39284 +- vpmuludq hc1,ruwy1,t2
39285 +- vpaddq t2,t1,t1
39286 +- # t1 += [ hc2[3] * r0, hc2[2] * u0, hc2[1] * w0, hc2[0] * y0 ]
39287 +- vpmuludq hc2,ruwy0,t2
39288 +- vpaddq t2,t1,t1
39289 +- # t1 += [ hc3[3] * s4, hc3[2] * v4, hc3[1] * x4, hc3[0] * z4 ]
39290 +- vpmuludq hc3,svxz4,t2
39291 +- vpaddq t2,t1,t1
39292 +- # t1 += [ hc4[3] * s3, hc4[2] * v3, hc4[1] * x3, hc4[0] * z3 ]
39293 +- vpmuludq hc4,svxz3,t2
39294 +- vpaddq t2,t1,t1
39295 +- # d2 = t1[0] + t1[1] + t1[2] + t1[3]
39296 +- vpermq $0xee,t1,t2
39297 +- vpaddq t2,t1,t1
39298 +- vpsrldq $8,t1,t2
39299 +- vpaddq t2,t1,t1
39300 +- vmovq t1x,d2
39301 +-
39302 +- # t1 = [ hc0[3] * r3, hc0[2] * u3, hc0[1] * w3, hc0[0] * y3 ]
39303 +- vpmuludq hc0,ruwy3,t1
39304 +- # t1 += [ hc1[3] * r2, hc1[2] * u2, hc1[1] * w2, hc1[0] * y2 ]
39305 +- vpmuludq hc1,ruwy2,t2
39306 +- vpaddq t2,t1,t1
39307 +- # t1 += [ hc2[3] * r1, hc2[2] * u1, hc2[1] * w1, hc2[0] * y1 ]
39308 +- vpmuludq hc2,ruwy1,t2
39309 +- vpaddq t2,t1,t1
39310 +- # t1 += [ hc3[3] * r0, hc3[2] * u0, hc3[1] * w0, hc3[0] * y0 ]
39311 +- vpmuludq hc3,ruwy0,t2
39312 +- vpaddq t2,t1,t1
39313 +- # t1 += [ hc4[3] * s4, hc4[2] * v4, hc4[1] * x4, hc4[0] * z4 ]
39314 +- vpmuludq hc4,svxz4,t2
39315 +- vpaddq t2,t1,t1
39316 +- # d3 = t1[0] + t1[1] + t1[2] + t1[3]
39317 +- vpermq $0xee,t1,t2
39318 +- vpaddq t2,t1,t1
39319 +- vpsrldq $8,t1,t2
39320 +- vpaddq t2,t1,t1
39321 +- vmovq t1x,d3
39322 +-
39323 +- # t1 = [ hc0[3] * r4, hc0[2] * u4, hc0[1] * w4, hc0[0] * y4 ]
39324 +- vpmuludq hc0,ruwy4,t1
39325 +- # t1 += [ hc1[3] * r3, hc1[2] * u3, hc1[1] * w3, hc1[0] * y3 ]
39326 +- vpmuludq hc1,ruwy3,t2
39327 +- vpaddq t2,t1,t1
39328 +- # t1 += [ hc2[3] * r2, hc2[2] * u2, hc2[1] * w2, hc2[0] * y2 ]
39329 +- vpmuludq hc2,ruwy2,t2
39330 +- vpaddq t2,t1,t1
39331 +- # t1 += [ hc3[3] * r1, hc3[2] * u1, hc3[1] * w1, hc3[0] * y1 ]
39332 +- vpmuludq hc3,ruwy1,t2
39333 +- vpaddq t2,t1,t1
39334 +- # t1 += [ hc4[3] * r0, hc4[2] * u0, hc4[1] * w0, hc4[0] * y0 ]
39335 +- vpmuludq hc4,ruwy0,t2
39336 +- vpaddq t2,t1,t1
39337 +- # d4 = t1[0] + t1[1] + t1[2] + t1[3]
39338 +- vpermq $0xee,t1,t2
39339 +- vpaddq t2,t1,t1
39340 +- vpsrldq $8,t1,t2
39341 +- vpaddq t2,t1,t1
39342 +- vmovq t1x,d4
39343 +-
39344 +- # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 ->
39345 +- # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small
39346 +- # amount. Careful: we must not assume the carry bits 'd0 >> 26',
39347 +- # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit
39348 +- # integers. It's true in a single-block implementation, but not here.
39349 +-
39350 +- # d1 += d0 >> 26
39351 +- mov d0,%rax
39352 +- shr $26,%rax
39353 +- add %rax,d1
39354 +- # h0 = d0 & 0x3ffffff
39355 +- mov d0,%rbx
39356 +- and $0x3ffffff,%ebx
39357 +-
39358 +- # d2 += d1 >> 26
39359 +- mov d1,%rax
39360 +- shr $26,%rax
39361 +- add %rax,d2
39362 +- # h1 = d1 & 0x3ffffff
39363 +- mov d1,%rax
39364 +- and $0x3ffffff,%eax
39365 +- mov %eax,h1
39366 +-
39367 +- # d3 += d2 >> 26
39368 +- mov d2,%rax
39369 +- shr $26,%rax
39370 +- add %rax,d3
39371 +- # h2 = d2 & 0x3ffffff
39372 +- mov d2,%rax
39373 +- and $0x3ffffff,%eax
39374 +- mov %eax,h2
39375 +-
39376 +- # d4 += d3 >> 26
39377 +- mov d3,%rax
39378 +- shr $26,%rax
39379 +- add %rax,d4
39380 +- # h3 = d3 & 0x3ffffff
39381 +- mov d3,%rax
39382 +- and $0x3ffffff,%eax
39383 +- mov %eax,h3
39384 +-
39385 +- # h0 += (d4 >> 26) * 5
39386 +- mov d4,%rax
39387 +- shr $26,%rax
39388 +- lea (%rax,%rax,4),%rax
39389 +- add %rax,%rbx
39390 +- # h4 = d4 & 0x3ffffff
39391 +- mov d4,%rax
39392 +- and $0x3ffffff,%eax
39393 +- mov %eax,h4
39394 +-
39395 +- # h1 += h0 >> 26
39396 +- mov %rbx,%rax
39397 +- shr $26,%rax
39398 +- add %eax,h1
39399 +- # h0 = h0 & 0x3ffffff
39400 +- andl $0x3ffffff,%ebx
39401 +- mov %ebx,h0
39402 +-
39403 +- add $0x40,m
39404 +- dec %rcx
39405 +- jnz .Ldoblock4
39406 +-
39407 +- vzeroupper
39408 +- pop %r13
39409 +- pop %r12
39410 +- pop %rbx
39411 +- ret
39412 +-ENDPROC(poly1305_4block_avx2)
39413 +diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S
39414 +deleted file mode 100644
39415 +index 5578f846e622..000000000000
39416 +--- a/arch/x86/crypto/poly1305-sse2-x86_64.S
39417 ++++ /dev/null
39418 +@@ -1,590 +0,0 @@
39419 +-/* SPDX-License-Identifier: GPL-2.0-or-later */
39420 +-/*
39421 +- * Poly1305 authenticator algorithm, RFC7539, x64 SSE2 functions
39422 +- *
39423 +- * Copyright (C) 2015 Martin Willi
39424 +- */
39425 +-
39426 +-#include <linux/linkage.h>
39427 +-
39428 +-.section .rodata.cst16.ANMASK, "aM", @progbits, 16
39429 +-.align 16
39430 +-ANMASK: .octa 0x0000000003ffffff0000000003ffffff
39431 +-
39432 +-.section .rodata.cst16.ORMASK, "aM", @progbits, 16
39433 +-.align 16
39434 +-ORMASK: .octa 0x00000000010000000000000001000000
39435 +-
39436 +-.text
39437 +-
39438 +-#define h0 0x00(%rdi)
39439 +-#define h1 0x04(%rdi)
39440 +-#define h2 0x08(%rdi)
39441 +-#define h3 0x0c(%rdi)
39442 +-#define h4 0x10(%rdi)
39443 +-#define r0 0x00(%rdx)
39444 +-#define r1 0x04(%rdx)
39445 +-#define r2 0x08(%rdx)
39446 +-#define r3 0x0c(%rdx)
39447 +-#define r4 0x10(%rdx)
39448 +-#define s1 0x00(%rsp)
39449 +-#define s2 0x04(%rsp)
39450 +-#define s3 0x08(%rsp)
39451 +-#define s4 0x0c(%rsp)
39452 +-#define m %rsi
39453 +-#define h01 %xmm0
39454 +-#define h23 %xmm1
39455 +-#define h44 %xmm2
39456 +-#define t1 %xmm3
39457 +-#define t2 %xmm4
39458 +-#define t3 %xmm5
39459 +-#define t4 %xmm6
39460 +-#define mask %xmm7
39461 +-#define d0 %r8
39462 +-#define d1 %r9
39463 +-#define d2 %r10
39464 +-#define d3 %r11
39465 +-#define d4 %r12
39466 +-
39467 +-ENTRY(poly1305_block_sse2)
39468 +- # %rdi: Accumulator h[5]
39469 +- # %rsi: 16 byte input block m
39470 +- # %rdx: Poly1305 key r[5]
39471 +- # %rcx: Block count
39472 +-
39473 +- # This single block variant tries to improve performance by doing two
39474 +- # multiplications in parallel using SSE instructions. There is quite
39475 +- # some quardword packing involved, hence the speedup is marginal.
39476 +-
39477 +- push %rbx
39478 +- push %r12
39479 +- sub $0x10,%rsp
39480 +-
39481 +- # s1..s4 = r1..r4 * 5
39482 +- mov r1,%eax
39483 +- lea (%eax,%eax,4),%eax
39484 +- mov %eax,s1
39485 +- mov r2,%eax
39486 +- lea (%eax,%eax,4),%eax
39487 +- mov %eax,s2
39488 +- mov r3,%eax
39489 +- lea (%eax,%eax,4),%eax
39490 +- mov %eax,s3
39491 +- mov r4,%eax
39492 +- lea (%eax,%eax,4),%eax
39493 +- mov %eax,s4
39494 +-
39495 +- movdqa ANMASK(%rip),mask
39496 +-
39497 +-.Ldoblock:
39498 +- # h01 = [0, h1, 0, h0]
39499 +- # h23 = [0, h3, 0, h2]
39500 +- # h44 = [0, h4, 0, h4]
39501 +- movd h0,h01
39502 +- movd h1,t1
39503 +- movd h2,h23
39504 +- movd h3,t2
39505 +- movd h4,h44
39506 +- punpcklqdq t1,h01
39507 +- punpcklqdq t2,h23
39508 +- punpcklqdq h44,h44
39509 +-
39510 +- # h01 += [ (m[3-6] >> 2) & 0x3ffffff, m[0-3] & 0x3ffffff ]
39511 +- movd 0x00(m),t1
39512 +- movd 0x03(m),t2
39513 +- psrld $2,t2
39514 +- punpcklqdq t2,t1
39515 +- pand mask,t1
39516 +- paddd t1,h01
39517 +- # h23 += [ (m[9-12] >> 6) & 0x3ffffff, (m[6-9] >> 4) & 0x3ffffff ]
39518 +- movd 0x06(m),t1
39519 +- movd 0x09(m),t2
39520 +- psrld $4,t1
39521 +- psrld $6,t2
39522 +- punpcklqdq t2,t1
39523 +- pand mask,t1
39524 +- paddd t1,h23
39525 +- # h44 += [ (m[12-15] >> 8) | (1 << 24), (m[12-15] >> 8) | (1 << 24) ]
39526 +- mov 0x0c(m),%eax
39527 +- shr $8,%eax
39528 +- or $0x01000000,%eax
39529 +- movd %eax,t1
39530 +- pshufd $0xc4,t1,t1
39531 +- paddd t1,h44
39532 +-
39533 +- # t1[0] = h0 * r0 + h2 * s3
39534 +- # t1[1] = h1 * s4 + h3 * s2
39535 +- movd r0,t1
39536 +- movd s4,t2
39537 +- punpcklqdq t2,t1
39538 +- pmuludq h01,t1
39539 +- movd s3,t2
39540 +- movd s2,t3
39541 +- punpcklqdq t3,t2
39542 +- pmuludq h23,t2
39543 +- paddq t2,t1
39544 +- # t2[0] = h0 * r1 + h2 * s4
39545 +- # t2[1] = h1 * r0 + h3 * s3
39546 +- movd r1,t2
39547 +- movd r0,t3
39548 +- punpcklqdq t3,t2
39549 +- pmuludq h01,t2
39550 +- movd s4,t3
39551 +- movd s3,t4
39552 +- punpcklqdq t4,t3
39553 +- pmuludq h23,t3
39554 +- paddq t3,t2
39555 +- # t3[0] = h4 * s1
39556 +- # t3[1] = h4 * s2
39557 +- movd s1,t3
39558 +- movd s2,t4
39559 +- punpcklqdq t4,t3
39560 +- pmuludq h44,t3
39561 +- # d0 = t1[0] + t1[1] + t3[0]
39562 +- # d1 = t2[0] + t2[1] + t3[1]
39563 +- movdqa t1,t4
39564 +- punpcklqdq t2,t4
39565 +- punpckhqdq t2,t1
39566 +- paddq t4,t1
39567 +- paddq t3,t1
39568 +- movq t1,d0
39569 +- psrldq $8,t1
39570 +- movq t1,d1
39571 +-
39572 +- # t1[0] = h0 * r2 + h2 * r0
39573 +- # t1[1] = h1 * r1 + h3 * s4
39574 +- movd r2,t1
39575 +- movd r1,t2
39576 +- punpcklqdq t2,t1
39577 +- pmuludq h01,t1
39578 +- movd r0,t2
39579 +- movd s4,t3
39580 +- punpcklqdq t3,t2
39581 +- pmuludq h23,t2
39582 +- paddq t2,t1
39583 +- # t2[0] = h0 * r3 + h2 * r1
39584 +- # t2[1] = h1 * r2 + h3 * r0
39585 +- movd r3,t2
39586 +- movd r2,t3
39587 +- punpcklqdq t3,t2
39588 +- pmuludq h01,t2
39589 +- movd r1,t3
39590 +- movd r0,t4
39591 +- punpcklqdq t4,t3
39592 +- pmuludq h23,t3
39593 +- paddq t3,t2
39594 +- # t3[0] = h4 * s3
39595 +- # t3[1] = h4 * s4
39596 +- movd s3,t3
39597 +- movd s4,t4
39598 +- punpcklqdq t4,t3
39599 +- pmuludq h44,t3
39600 +- # d2 = t1[0] + t1[1] + t3[0]
39601 +- # d3 = t2[0] + t2[1] + t3[1]
39602 +- movdqa t1,t4
39603 +- punpcklqdq t2,t4
39604 +- punpckhqdq t2,t1
39605 +- paddq t4,t1
39606 +- paddq t3,t1
39607 +- movq t1,d2
39608 +- psrldq $8,t1
39609 +- movq t1,d3
39610 +-
39611 +- # t1[0] = h0 * r4 + h2 * r2
39612 +- # t1[1] = h1 * r3 + h3 * r1
39613 +- movd r4,t1
39614 +- movd r3,t2
39615 +- punpcklqdq t2,t1
39616 +- pmuludq h01,t1
39617 +- movd r2,t2
39618 +- movd r1,t3
39619 +- punpcklqdq t3,t2
39620 +- pmuludq h23,t2
39621 +- paddq t2,t1
39622 +- # t3[0] = h4 * r0
39623 +- movd r0,t3
39624 +- pmuludq h44,t3
39625 +- # d4 = t1[0] + t1[1] + t3[0]
39626 +- movdqa t1,t4
39627 +- psrldq $8,t4
39628 +- paddq t4,t1
39629 +- paddq t3,t1
39630 +- movq t1,d4
39631 +-
39632 +- # d1 += d0 >> 26
39633 +- mov d0,%rax
39634 +- shr $26,%rax
39635 +- add %rax,d1
39636 +- # h0 = d0 & 0x3ffffff
39637 +- mov d0,%rbx
39638 +- and $0x3ffffff,%ebx
39639 +-
39640 +- # d2 += d1 >> 26
39641 +- mov d1,%rax
39642 +- shr $26,%rax
39643 +- add %rax,d2
39644 +- # h1 = d1 & 0x3ffffff
39645 +- mov d1,%rax
39646 +- and $0x3ffffff,%eax
39647 +- mov %eax,h1
39648 +-
39649 +- # d3 += d2 >> 26
39650 +- mov d2,%rax
39651 +- shr $26,%rax
39652 +- add %rax,d3
39653 +- # h2 = d2 & 0x3ffffff
39654 +- mov d2,%rax
39655 +- and $0x3ffffff,%eax
39656 +- mov %eax,h2
39657 +-
39658 +- # d4 += d3 >> 26
39659 +- mov d3,%rax
39660 +- shr $26,%rax
39661 +- add %rax,d4
39662 +- # h3 = d3 & 0x3ffffff
39663 +- mov d3,%rax
39664 +- and $0x3ffffff,%eax
39665 +- mov %eax,h3
39666 +-
39667 +- # h0 += (d4 >> 26) * 5
39668 +- mov d4,%rax
39669 +- shr $26,%rax
39670 +- lea (%rax,%rax,4),%rax
39671 +- add %rax,%rbx
39672 +- # h4 = d4 & 0x3ffffff
39673 +- mov d4,%rax
39674 +- and $0x3ffffff,%eax
39675 +- mov %eax,h4
39676 +-
39677 +- # h1 += h0 >> 26
39678 +- mov %rbx,%rax
39679 +- shr $26,%rax
39680 +- add %eax,h1
39681 +- # h0 = h0 & 0x3ffffff
39682 +- andl $0x3ffffff,%ebx
39683 +- mov %ebx,h0
39684 +-
39685 +- add $0x10,m
39686 +- dec %rcx
39687 +- jnz .Ldoblock
39688 +-
39689 +- # Zeroing of key material
39690 +- mov %rcx,0x00(%rsp)
39691 +- mov %rcx,0x08(%rsp)
39692 +-
39693 +- add $0x10,%rsp
39694 +- pop %r12
39695 +- pop %rbx
39696 +- ret
39697 +-ENDPROC(poly1305_block_sse2)
39698 +-
39699 +-
39700 +-#define u0 0x00(%r8)
39701 +-#define u1 0x04(%r8)
39702 +-#define u2 0x08(%r8)
39703 +-#define u3 0x0c(%r8)
39704 +-#define u4 0x10(%r8)
39705 +-#define hc0 %xmm0
39706 +-#define hc1 %xmm1
39707 +-#define hc2 %xmm2
39708 +-#define hc3 %xmm5
39709 +-#define hc4 %xmm6
39710 +-#define ru0 %xmm7
39711 +-#define ru1 %xmm8
39712 +-#define ru2 %xmm9
39713 +-#define ru3 %xmm10
39714 +-#define ru4 %xmm11
39715 +-#define sv1 %xmm12
39716 +-#define sv2 %xmm13
39717 +-#define sv3 %xmm14
39718 +-#define sv4 %xmm15
39719 +-#undef d0
39720 +-#define d0 %r13
39721 +-
39722 +-ENTRY(poly1305_2block_sse2)
39723 +- # %rdi: Accumulator h[5]
39724 +- # %rsi: 16 byte input block m
39725 +- # %rdx: Poly1305 key r[5]
39726 +- # %rcx: Doubleblock count
39727 +- # %r8: Poly1305 derived key r^2 u[5]
39728 +-
39729 +- # This two-block variant further improves performance by using loop
39730 +- # unrolled block processing. This is more straight forward and does
39731 +- # less byte shuffling, but requires a second Poly1305 key r^2:
39732 +- # h = (h + m) * r => h = (h + m1) * r^2 + m2 * r
39733 +-
39734 +- push %rbx
39735 +- push %r12
39736 +- push %r13
39737 +-
39738 +- # combine r0,u0
39739 +- movd u0,ru0
39740 +- movd r0,t1
39741 +- punpcklqdq t1,ru0
39742 +-
39743 +- # combine r1,u1 and s1=r1*5,v1=u1*5
39744 +- movd u1,ru1
39745 +- movd r1,t1
39746 +- punpcklqdq t1,ru1
39747 +- movdqa ru1,sv1
39748 +- pslld $2,sv1
39749 +- paddd ru1,sv1
39750 +-
39751 +- # combine r2,u2 and s2=r2*5,v2=u2*5
39752 +- movd u2,ru2
39753 +- movd r2,t1
39754 +- punpcklqdq t1,ru2
39755 +- movdqa ru2,sv2
39756 +- pslld $2,sv2
39757 +- paddd ru2,sv2
39758 +-
39759 +- # combine r3,u3 and s3=r3*5,v3=u3*5
39760 +- movd u3,ru3
39761 +- movd r3,t1
39762 +- punpcklqdq t1,ru3
39763 +- movdqa ru3,sv3
39764 +- pslld $2,sv3
39765 +- paddd ru3,sv3
39766 +-
39767 +- # combine r4,u4 and s4=r4*5,v4=u4*5
39768 +- movd u4,ru4
39769 +- movd r4,t1
39770 +- punpcklqdq t1,ru4
39771 +- movdqa ru4,sv4
39772 +- pslld $2,sv4
39773 +- paddd ru4,sv4
39774 +-
39775 +-.Ldoblock2:
39776 +- # hc0 = [ m[16-19] & 0x3ffffff, h0 + m[0-3] & 0x3ffffff ]
39777 +- movd 0x00(m),hc0
39778 +- movd 0x10(m),t1
39779 +- punpcklqdq t1,hc0
39780 +- pand ANMASK(%rip),hc0
39781 +- movd h0,t1
39782 +- paddd t1,hc0
39783 +- # hc1 = [ (m[19-22] >> 2) & 0x3ffffff, h1 + (m[3-6] >> 2) & 0x3ffffff ]
39784 +- movd 0x03(m),hc1
39785 +- movd 0x13(m),t1
39786 +- punpcklqdq t1,hc1
39787 +- psrld $2,hc1
39788 +- pand ANMASK(%rip),hc1
39789 +- movd h1,t1
39790 +- paddd t1,hc1
39791 +- # hc2 = [ (m[22-25] >> 4) & 0x3ffffff, h2 + (m[6-9] >> 4) & 0x3ffffff ]
39792 +- movd 0x06(m),hc2
39793 +- movd 0x16(m),t1
39794 +- punpcklqdq t1,hc2
39795 +- psrld $4,hc2
39796 +- pand ANMASK(%rip),hc2
39797 +- movd h2,t1
39798 +- paddd t1,hc2
39799 +- # hc3 = [ (m[25-28] >> 6) & 0x3ffffff, h3 + (m[9-12] >> 6) & 0x3ffffff ]
39800 +- movd 0x09(m),hc3
39801 +- movd 0x19(m),t1
39802 +- punpcklqdq t1,hc3
39803 +- psrld $6,hc3
39804 +- pand ANMASK(%rip),hc3
39805 +- movd h3,t1
39806 +- paddd t1,hc3
39807 +- # hc4 = [ (m[28-31] >> 8) | (1<<24), h4 + (m[12-15] >> 8) | (1<<24) ]
39808 +- movd 0x0c(m),hc4
39809 +- movd 0x1c(m),t1
39810 +- punpcklqdq t1,hc4
39811 +- psrld $8,hc4
39812 +- por ORMASK(%rip),hc4
39813 +- movd h4,t1
39814 +- paddd t1,hc4
39815 +-
39816 +- # t1 = [ hc0[1] * r0, hc0[0] * u0 ]
39817 +- movdqa ru0,t1
39818 +- pmuludq hc0,t1
39819 +- # t1 += [ hc1[1] * s4, hc1[0] * v4 ]
39820 +- movdqa sv4,t2
39821 +- pmuludq hc1,t2
39822 +- paddq t2,t1
39823 +- # t1 += [ hc2[1] * s3, hc2[0] * v3 ]
39824 +- movdqa sv3,t2
39825 +- pmuludq hc2,t2
39826 +- paddq t2,t1
39827 +- # t1 += [ hc3[1] * s2, hc3[0] * v2 ]
39828 +- movdqa sv2,t2
39829 +- pmuludq hc3,t2
39830 +- paddq t2,t1
39831 +- # t1 += [ hc4[1] * s1, hc4[0] * v1 ]
39832 +- movdqa sv1,t2
39833 +- pmuludq hc4,t2
39834 +- paddq t2,t1
39835 +- # d0 = t1[0] + t1[1]
39836 +- movdqa t1,t2
39837 +- psrldq $8,t2
39838 +- paddq t2,t1
39839 +- movq t1,d0
39840 +-
39841 +- # t1 = [ hc0[1] * r1, hc0[0] * u1 ]
39842 +- movdqa ru1,t1
39843 +- pmuludq hc0,t1
39844 +- # t1 += [ hc1[1] * r0, hc1[0] * u0 ]
39845 +- movdqa ru0,t2
39846 +- pmuludq hc1,t2
39847 +- paddq t2,t1
39848 +- # t1 += [ hc2[1] * s4, hc2[0] * v4 ]
39849 +- movdqa sv4,t2
39850 +- pmuludq hc2,t2
39851 +- paddq t2,t1
39852 +- # t1 += [ hc3[1] * s3, hc3[0] * v3 ]
39853 +- movdqa sv3,t2
39854 +- pmuludq hc3,t2
39855 +- paddq t2,t1
39856 +- # t1 += [ hc4[1] * s2, hc4[0] * v2 ]
39857 +- movdqa sv2,t2
39858 +- pmuludq hc4,t2
39859 +- paddq t2,t1
39860 +- # d1 = t1[0] + t1[1]
39861 +- movdqa t1,t2
39862 +- psrldq $8,t2
39863 +- paddq t2,t1
39864 +- movq t1,d1
39865 +-
39866 +- # t1 = [ hc0[1] * r2, hc0[0] * u2 ]
39867 +- movdqa ru2,t1
39868 +- pmuludq hc0,t1
39869 +- # t1 += [ hc1[1] * r1, hc1[0] * u1 ]
39870 +- movdqa ru1,t2
39871 +- pmuludq hc1,t2
39872 +- paddq t2,t1
39873 +- # t1 += [ hc2[1] * r0, hc2[0] * u0 ]
39874 +- movdqa ru0,t2
39875 +- pmuludq hc2,t2
39876 +- paddq t2,t1
39877 +- # t1 += [ hc3[1] * s4, hc3[0] * v4 ]
39878 +- movdqa sv4,t2
39879 +- pmuludq hc3,t2
39880 +- paddq t2,t1
39881 +- # t1 += [ hc4[1] * s3, hc4[0] * v3 ]
39882 +- movdqa sv3,t2
39883 +- pmuludq hc4,t2
39884 +- paddq t2,t1
39885 +- # d2 = t1[0] + t1[1]
39886 +- movdqa t1,t2
39887 +- psrldq $8,t2
39888 +- paddq t2,t1
39889 +- movq t1,d2
39890 +-
39891 +- # t1 = [ hc0[1] * r3, hc0[0] * u3 ]
39892 +- movdqa ru3,t1
39893 +- pmuludq hc0,t1
39894 +- # t1 += [ hc1[1] * r2, hc1[0] * u2 ]
39895 +- movdqa ru2,t2
39896 +- pmuludq hc1,t2
39897 +- paddq t2,t1
39898 +- # t1 += [ hc2[1] * r1, hc2[0] * u1 ]
39899 +- movdqa ru1,t2
39900 +- pmuludq hc2,t2
39901 +- paddq t2,t1
39902 +- # t1 += [ hc3[1] * r0, hc3[0] * u0 ]
39903 +- movdqa ru0,t2
39904 +- pmuludq hc3,t2
39905 +- paddq t2,t1
39906 +- # t1 += [ hc4[1] * s4, hc4[0] * v4 ]
39907 +- movdqa sv4,t2
39908 +- pmuludq hc4,t2
39909 +- paddq t2,t1
39910 +- # d3 = t1[0] + t1[1]
39911 +- movdqa t1,t2
39912 +- psrldq $8,t2
39913 +- paddq t2,t1
39914 +- movq t1,d3
39915 +-
39916 +- # t1 = [ hc0[1] * r4, hc0[0] * u4 ]
39917 +- movdqa ru4,t1
39918 +- pmuludq hc0,t1
39919 +- # t1 += [ hc1[1] * r3, hc1[0] * u3 ]
39920 +- movdqa ru3,t2
39921 +- pmuludq hc1,t2
39922 +- paddq t2,t1
39923 +- # t1 += [ hc2[1] * r2, hc2[0] * u2 ]
39924 +- movdqa ru2,t2
39925 +- pmuludq hc2,t2
39926 +- paddq t2,t1
39927 +- # t1 += [ hc3[1] * r1, hc3[0] * u1 ]
39928 +- movdqa ru1,t2
39929 +- pmuludq hc3,t2
39930 +- paddq t2,t1
39931 +- # t1 += [ hc4[1] * r0, hc4[0] * u0 ]
39932 +- movdqa ru0,t2
39933 +- pmuludq hc4,t2
39934 +- paddq t2,t1
39935 +- # d4 = t1[0] + t1[1]
39936 +- movdqa t1,t2
39937 +- psrldq $8,t2
39938 +- paddq t2,t1
39939 +- movq t1,d4
39940 +-
39941 +- # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 ->
39942 +- # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small
39943 +- # amount. Careful: we must not assume the carry bits 'd0 >> 26',
39944 +- # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit
39945 +- # integers. It's true in a single-block implementation, but not here.
39946 +-
39947 +- # d1 += d0 >> 26
39948 +- mov d0,%rax
39949 +- shr $26,%rax
39950 +- add %rax,d1
39951 +- # h0 = d0 & 0x3ffffff
39952 +- mov d0,%rbx
39953 +- and $0x3ffffff,%ebx
39954 +-
39955 +- # d2 += d1 >> 26
39956 +- mov d1,%rax
39957 +- shr $26,%rax
39958 +- add %rax,d2
39959 +- # h1 = d1 & 0x3ffffff
39960 +- mov d1,%rax
39961 +- and $0x3ffffff,%eax
39962 +- mov %eax,h1
39963 +-
39964 +- # d3 += d2 >> 26
39965 +- mov d2,%rax
39966 +- shr $26,%rax
39967 +- add %rax,d3
39968 +- # h2 = d2 & 0x3ffffff
39969 +- mov d2,%rax
39970 +- and $0x3ffffff,%eax
39971 +- mov %eax,h2
39972 +-
39973 +- # d4 += d3 >> 26
39974 +- mov d3,%rax
39975 +- shr $26,%rax
39976 +- add %rax,d4
39977 +- # h3 = d3 & 0x3ffffff
39978 +- mov d3,%rax
39979 +- and $0x3ffffff,%eax
39980 +- mov %eax,h3
39981 +-
39982 +- # h0 += (d4 >> 26) * 5
39983 +- mov d4,%rax
39984 +- shr $26,%rax
39985 +- lea (%rax,%rax,4),%rax
39986 +- add %rax,%rbx
39987 +- # h4 = d4 & 0x3ffffff
39988 +- mov d4,%rax
39989 +- and $0x3ffffff,%eax
39990 +- mov %eax,h4
39991 +-
39992 +- # h1 += h0 >> 26
39993 +- mov %rbx,%rax
39994 +- shr $26,%rax
39995 +- add %eax,h1
39996 +- # h0 = h0 & 0x3ffffff
39997 +- andl $0x3ffffff,%ebx
39998 +- mov %ebx,h0
39999 +-
40000 +- add $0x20,m
40001 +- dec %rcx
40002 +- jnz .Ldoblock2
40003 +-
40004 +- pop %r13
40005 +- pop %r12
40006 +- pop %rbx
40007 +- ret
40008 +-ENDPROC(poly1305_2block_sse2)
40009 +diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
40010 +index 342ad7f18aa7..80061bea6b16 100644
40011 +--- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
40012 ++++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
40013 +@@ -1,11 +1,14 @@
40014 +-#! /usr/bin/env perl
40015 +-# Copyright 2016-2018 The OpenSSL Project Authors. All Rights Reserved.
40016 ++#!/usr/bin/env perl
40017 ++# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
40018 + #
40019 +-# Licensed under the OpenSSL license (the "License"). You may not use
40020 +-# this file except in compliance with the License. You can obtain a copy
40021 +-# in the file LICENSE in the source distribution or at
40022 +-# https://www.openssl.org/source/license.html
40023 +-
40024 ++# Copyright (C) 2017-2018 Samuel Neves <sneves@××××××.pt>. All Rights Reserved.
40025 ++# Copyright (C) 2017-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
40026 ++# Copyright (C) 2006-2017 CRYPTOGAMS by <appro@×××××××.org>. All Rights Reserved.
40027 ++#
40028 ++# This code is taken from the OpenSSL project but the author, Andy Polyakov,
40029 ++# has relicensed it under the licenses specified in the SPDX header above.
40030 ++# The original headers, including the original license headers, are
40031 ++# included below for completeness.
40032 + #
40033 + # ====================================================================
40034 + # Written by Andy Polyakov <appro@×××××××.org> for the OpenSSL
40035 +@@ -32,7 +35,7 @@
40036 + # Skylake-X system performance. Since we are likely to suppress
40037 + # AVX512F capability flag [at least on Skylake-X], conversion serves
40038 + # as kind of "investment protection". Note that next *lake processor,
40039 +-# Cannolake, has AVX512IFMA code path to execute...
40040 ++# Cannonlake, has AVX512IFMA code path to execute...
40041 + #
40042 + # Numbers are cycles per processed byte with poly1305_blocks alone,
40043 + # measured with rdtsc at fixed clock frequency.
40044 +@@ -68,39 +71,114 @@ $output = shift;
40045 + if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
40046 +
40047 + $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
40048 +-
40049 +-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
40050 +-( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
40051 +-( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
40052 +-die "can't locate x86_64-xlate.pl";
40053 +-
40054 +-if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
40055 +- =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
40056 +- $avx = ($1>=2.19) + ($1>=2.22) + ($1>=2.25) + ($1>=2.26);
40057 ++$kernel=0; $kernel=1 if (!$flavour && !$output);
40058 ++
40059 ++if (!$kernel) {
40060 ++ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
40061 ++ ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
40062 ++ ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
40063 ++ die "can't locate x86_64-xlate.pl";
40064 ++
40065 ++ open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
40066 ++ *STDOUT=*OUT;
40067 ++
40068 ++ if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
40069 ++ =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
40070 ++ $avx = ($1>=2.19) + ($1>=2.22) + ($1>=2.25);
40071 ++ }
40072 ++
40073 ++ if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
40074 ++ `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)(?:\.([0-9]+))?/) {
40075 ++ $avx = ($1>=2.09) + ($1>=2.10) + ($1>=2.12);
40076 ++ $avx += 1 if ($1==2.11 && $2>=8);
40077 ++ }
40078 ++
40079 ++ if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
40080 ++ `ml64 2>&1` =~ /Version ([0-9]+)\./) {
40081 ++ $avx = ($1>=10) + ($1>=11);
40082 ++ }
40083 ++
40084 ++ if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9]\.[0-9]+)/) {
40085 ++ $avx = ($2>=3.0) + ($2>3.0);
40086 ++ }
40087 ++} else {
40088 ++ $avx = 4; # The kernel uses ifdefs for this.
40089 + }
40090 +
40091 +-if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
40092 +- `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)(?:\.([0-9]+))?/) {
40093 +- $avx = ($1>=2.09) + ($1>=2.10) + 2 * ($1>=2.12);
40094 +- $avx += 2 if ($1==2.11 && $2>=8);
40095 ++sub declare_function() {
40096 ++ my ($name, $align, $nargs) = @_;
40097 ++ if($kernel) {
40098 ++ $code .= ".align $align\n";
40099 ++ $code .= "ENTRY($name)\n";
40100 ++ $code .= ".L$name:\n";
40101 ++ } else {
40102 ++ $code .= ".globl $name\n";
40103 ++ $code .= ".type $name,\@function,$nargs\n";
40104 ++ $code .= ".align $align\n";
40105 ++ $code .= "$name:\n";
40106 ++ }
40107 + }
40108 +
40109 +-if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
40110 +- `ml64 2>&1` =~ /Version ([0-9]+)\./) {
40111 +- $avx = ($1>=10) + ($1>=12);
40112 ++sub end_function() {
40113 ++ my ($name) = @_;
40114 ++ if($kernel) {
40115 ++ $code .= "ENDPROC($name)\n";
40116 ++ } else {
40117 ++ $code .= ".size $name,.-$name\n";
40118 ++ }
40119 + }
40120 +
40121 +-if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9]\.[0-9]+)/) {
40122 +- $avx = ($2>=3.0) + ($2>3.0);
40123 +-}
40124 ++$code.=<<___ if $kernel;
40125 ++#include <linux/linkage.h>
40126 ++___
40127 ++
40128 ++if ($avx) {
40129 ++$code.=<<___ if $kernel;
40130 ++.section .rodata
40131 ++___
40132 ++$code.=<<___;
40133 ++.align 64
40134 ++.Lconst:
40135 ++.Lmask24:
40136 ++.long 0x0ffffff,0,0x0ffffff,0,0x0ffffff,0,0x0ffffff,0
40137 ++.L129:
40138 ++.long `1<<24`,0,`1<<24`,0,`1<<24`,0,`1<<24`,0
40139 ++.Lmask26:
40140 ++.long 0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,0
40141 ++.Lpermd_avx2:
40142 ++.long 2,2,2,3,2,0,2,1
40143 ++.Lpermd_avx512:
40144 ++.long 0,0,0,1, 0,2,0,3, 0,4,0,5, 0,6,0,7
40145 ++
40146 ++.L2_44_inp_permd:
40147 ++.long 0,1,1,2,2,3,7,7
40148 ++.L2_44_inp_shift:
40149 ++.quad 0,12,24,64
40150 ++.L2_44_mask:
40151 ++.quad 0xfffffffffff,0xfffffffffff,0x3ffffffffff,0xffffffffffffffff
40152 ++.L2_44_shift_rgt:
40153 ++.quad 44,44,42,64
40154 ++.L2_44_shift_lft:
40155 ++.quad 8,8,10,64
40156 +
40157 +-open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
40158 +-*STDOUT=*OUT;
40159 ++.align 64
40160 ++.Lx_mask44:
40161 ++.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff
40162 ++.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff
40163 ++.Lx_mask42:
40164 ++.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff
40165 ++.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff
40166 ++___
40167 ++}
40168 ++$code.=<<___ if (!$kernel);
40169 ++.asciz "Poly1305 for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
40170 ++.align 16
40171 ++___
40172 +
40173 + my ($ctx,$inp,$len,$padbit)=("%rdi","%rsi","%rdx","%rcx");
40174 + my ($mac,$nonce)=($inp,$len); # *_emit arguments
40175 +-my ($d1,$d2,$d3, $r0,$r1,$s1)=map("%r$_",(8..13));
40176 +-my ($h0,$h1,$h2)=("%r14","%rbx","%rbp");
40177 ++my ($d1,$d2,$d3, $r0,$r1,$s1)=("%r8","%r9","%rdi","%r11","%r12","%r13");
40178 ++my ($h0,$h1,$h2)=("%r14","%rbx","%r10");
40179 +
40180 + sub poly1305_iteration {
40181 + # input: copy of $r1 in %rax, $h0-$h2, $r0-$r1
40182 +@@ -155,19 +233,19 @@ ___
40183 +
40184 + $code.=<<___;
40185 + .text
40186 +-
40187 ++___
40188 ++$code.=<<___ if (!$kernel);
40189 + .extern OPENSSL_ia32cap_P
40190 +
40191 +-.globl poly1305_init
40192 +-.hidden poly1305_init
40193 +-.globl poly1305_blocks
40194 +-.hidden poly1305_blocks
40195 +-.globl poly1305_emit
40196 +-.hidden poly1305_emit
40197 +-
40198 +-.type poly1305_init,\@function,3
40199 +-.align 32
40200 +-poly1305_init:
40201 ++.globl poly1305_init_x86_64
40202 ++.hidden poly1305_init_x86_64
40203 ++.globl poly1305_blocks_x86_64
40204 ++.hidden poly1305_blocks_x86_64
40205 ++.globl poly1305_emit_x86_64
40206 ++.hidden poly1305_emit_x86_64
40207 ++___
40208 ++&declare_function("poly1305_init_x86_64", 32, 3);
40209 ++$code.=<<___;
40210 + xor %rax,%rax
40211 + mov %rax,0($ctx) # initialize hash value
40212 + mov %rax,8($ctx)
40213 +@@ -175,11 +253,12 @@ poly1305_init:
40214 +
40215 + cmp \$0,$inp
40216 + je .Lno_key
40217 +-
40218 +- lea poly1305_blocks(%rip),%r10
40219 +- lea poly1305_emit(%rip),%r11
40220 + ___
40221 +-$code.=<<___ if ($avx);
40222 ++$code.=<<___ if (!$kernel);
40223 ++ lea poly1305_blocks_x86_64(%rip),%r10
40224 ++ lea poly1305_emit_x86_64(%rip),%r11
40225 ++___
40226 ++$code.=<<___ if (!$kernel && $avx);
40227 + mov OPENSSL_ia32cap_P+4(%rip),%r9
40228 + lea poly1305_blocks_avx(%rip),%rax
40229 + lea poly1305_emit_avx(%rip),%rcx
40230 +@@ -187,12 +266,12 @@ $code.=<<___ if ($avx);
40231 + cmovc %rax,%r10
40232 + cmovc %rcx,%r11
40233 + ___
40234 +-$code.=<<___ if ($avx>1);
40235 ++$code.=<<___ if (!$kernel && $avx>1);
40236 + lea poly1305_blocks_avx2(%rip),%rax
40237 + bt \$`5+32`,%r9 # AVX2?
40238 + cmovc %rax,%r10
40239 + ___
40240 +-$code.=<<___ if ($avx>3);
40241 ++$code.=<<___ if (!$kernel && $avx>3);
40242 + mov \$`(1<<31|1<<21|1<<16)`,%rax
40243 + shr \$32,%r9
40244 + and %rax,%r9
40245 +@@ -207,11 +286,11 @@ $code.=<<___;
40246 + mov %rax,24($ctx)
40247 + mov %rcx,32($ctx)
40248 + ___
40249 +-$code.=<<___ if ($flavour !~ /elf32/);
40250 ++$code.=<<___ if (!$kernel && $flavour !~ /elf32/);
40251 + mov %r10,0(%rdx)
40252 + mov %r11,8(%rdx)
40253 + ___
40254 +-$code.=<<___ if ($flavour =~ /elf32/);
40255 ++$code.=<<___ if (!$kernel && $flavour =~ /elf32/);
40256 + mov %r10d,0(%rdx)
40257 + mov %r11d,4(%rdx)
40258 + ___
40259 +@@ -219,11 +298,11 @@ $code.=<<___;
40260 + mov \$1,%eax
40261 + .Lno_key:
40262 + ret
40263 +-.size poly1305_init,.-poly1305_init
40264 ++___
40265 ++&end_function("poly1305_init_x86_64");
40266 +
40267 +-.type poly1305_blocks,\@function,4
40268 +-.align 32
40269 +-poly1305_blocks:
40270 ++&declare_function("poly1305_blocks_x86_64", 32, 4);
40271 ++$code.=<<___;
40272 + .cfi_startproc
40273 + .Lblocks:
40274 + shr \$4,$len
40275 +@@ -231,8 +310,6 @@ poly1305_blocks:
40276 +
40277 + push %rbx
40278 + .cfi_push %rbx
40279 +- push %rbp
40280 +-.cfi_push %rbp
40281 + push %r12
40282 + .cfi_push %r12
40283 + push %r13
40284 +@@ -241,6 +318,8 @@ poly1305_blocks:
40285 + .cfi_push %r14
40286 + push %r15
40287 + .cfi_push %r15
40288 ++ push $ctx
40289 ++.cfi_push $ctx
40290 + .Lblocks_body:
40291 +
40292 + mov $len,%r15 # reassign $len
40293 +@@ -265,26 +344,29 @@ poly1305_blocks:
40294 + lea 16($inp),$inp
40295 + adc $padbit,$h2
40296 + ___
40297 ++
40298 + &poly1305_iteration();
40299 ++
40300 + $code.=<<___;
40301 + mov $r1,%rax
40302 + dec %r15 # len-=16
40303 + jnz .Loop
40304 +
40305 ++ mov 0(%rsp),$ctx
40306 ++.cfi_restore $ctx
40307 ++
40308 + mov $h0,0($ctx) # store hash value
40309 + mov $h1,8($ctx)
40310 + mov $h2,16($ctx)
40311 +
40312 +- mov 0(%rsp),%r15
40313 ++ mov 8(%rsp),%r15
40314 + .cfi_restore %r15
40315 +- mov 8(%rsp),%r14
40316 ++ mov 16(%rsp),%r14
40317 + .cfi_restore %r14
40318 +- mov 16(%rsp),%r13
40319 ++ mov 24(%rsp),%r13
40320 + .cfi_restore %r13
40321 +- mov 24(%rsp),%r12
40322 ++ mov 32(%rsp),%r12
40323 + .cfi_restore %r12
40324 +- mov 32(%rsp),%rbp
40325 +-.cfi_restore %rbp
40326 + mov 40(%rsp),%rbx
40327 + .cfi_restore %rbx
40328 + lea 48(%rsp),%rsp
40329 +@@ -293,11 +375,11 @@ $code.=<<___;
40330 + .Lblocks_epilogue:
40331 + ret
40332 + .cfi_endproc
40333 +-.size poly1305_blocks,.-poly1305_blocks
40334 ++___
40335 ++&end_function("poly1305_blocks_x86_64");
40336 +
40337 +-.type poly1305_emit,\@function,3
40338 +-.align 32
40339 +-poly1305_emit:
40340 ++&declare_function("poly1305_emit_x86_64", 32, 3);
40341 ++$code.=<<___;
40342 + .Lemit:
40343 + mov 0($ctx),%r8 # load hash value
40344 + mov 8($ctx),%r9
40345 +@@ -318,10 +400,14 @@ poly1305_emit:
40346 + mov %rcx,8($mac)
40347 +
40348 + ret
40349 +-.size poly1305_emit,.-poly1305_emit
40350 + ___
40351 ++&end_function("poly1305_emit_x86_64");
40352 + if ($avx) {
40353 +
40354 ++if($kernel) {
40355 ++ $code .= "#ifdef CONFIG_AS_AVX\n";
40356 ++}
40357 ++
40358 + ########################################################################
40359 + # Layout of opaque area is following.
40360 + #
40361 +@@ -342,15 +428,19 @@ $code.=<<___;
40362 + .type __poly1305_block,\@abi-omnipotent
40363 + .align 32
40364 + __poly1305_block:
40365 ++ push $ctx
40366 + ___
40367 + &poly1305_iteration();
40368 + $code.=<<___;
40369 ++ pop $ctx
40370 + ret
40371 + .size __poly1305_block,.-__poly1305_block
40372 +
40373 + .type __poly1305_init_avx,\@abi-omnipotent
40374 + .align 32
40375 + __poly1305_init_avx:
40376 ++ push %rbp
40377 ++ mov %rsp,%rbp
40378 + mov $r0,$h0
40379 + mov $r1,$h1
40380 + xor $h2,$h2
40381 +@@ -507,12 +597,13 @@ __poly1305_init_avx:
40382 + mov $d1#d,`16*8+8-64`($ctx)
40383 +
40384 + lea -48-64($ctx),$ctx # size [de-]optimization
40385 ++ pop %rbp
40386 + ret
40387 + .size __poly1305_init_avx,.-__poly1305_init_avx
40388 ++___
40389 +
40390 +-.type poly1305_blocks_avx,\@function,4
40391 +-.align 32
40392 +-poly1305_blocks_avx:
40393 ++&declare_function("poly1305_blocks_avx", 32, 4);
40394 ++$code.=<<___;
40395 + .cfi_startproc
40396 + mov 20($ctx),%r8d # is_base2_26
40397 + cmp \$128,$len
40398 +@@ -532,10 +623,11 @@ poly1305_blocks_avx:
40399 + test \$31,$len
40400 + jz .Leven_avx
40401 +
40402 +- push %rbx
40403 +-.cfi_push %rbx
40404 + push %rbp
40405 + .cfi_push %rbp
40406 ++ mov %rsp,%rbp
40407 ++ push %rbx
40408 ++.cfi_push %rbx
40409 + push %r12
40410 + .cfi_push %r12
40411 + push %r13
40412 +@@ -645,20 +737,18 @@ poly1305_blocks_avx:
40413 + mov $h2#d,16($ctx)
40414 + .align 16
40415 + .Ldone_avx:
40416 +- mov 0(%rsp),%r15
40417 ++ pop %r15
40418 + .cfi_restore %r15
40419 +- mov 8(%rsp),%r14
40420 ++ pop %r14
40421 + .cfi_restore %r14
40422 +- mov 16(%rsp),%r13
40423 ++ pop %r13
40424 + .cfi_restore %r13
40425 +- mov 24(%rsp),%r12
40426 ++ pop %r12
40427 + .cfi_restore %r12
40428 +- mov 32(%rsp),%rbp
40429 +-.cfi_restore %rbp
40430 +- mov 40(%rsp),%rbx
40431 ++ pop %rbx
40432 + .cfi_restore %rbx
40433 +- lea 48(%rsp),%rsp
40434 +-.cfi_adjust_cfa_offset -48
40435 ++ pop %rbp
40436 ++.cfi_restore %rbp
40437 + .Lno_data_avx:
40438 + .Lblocks_avx_epilogue:
40439 + ret
40440 +@@ -667,10 +757,11 @@ poly1305_blocks_avx:
40441 + .align 32
40442 + .Lbase2_64_avx:
40443 + .cfi_startproc
40444 +- push %rbx
40445 +-.cfi_push %rbx
40446 + push %rbp
40447 + .cfi_push %rbp
40448 ++ mov %rsp,%rbp
40449 ++ push %rbx
40450 ++.cfi_push %rbx
40451 + push %r12
40452 + .cfi_push %r12
40453 + push %r13
40454 +@@ -736,22 +827,18 @@ poly1305_blocks_avx:
40455 +
40456 + .Lproceed_avx:
40457 + mov %r15,$len
40458 +-
40459 +- mov 0(%rsp),%r15
40460 ++ pop %r15
40461 + .cfi_restore %r15
40462 +- mov 8(%rsp),%r14
40463 ++ pop %r14
40464 + .cfi_restore %r14
40465 +- mov 16(%rsp),%r13
40466 ++ pop %r13
40467 + .cfi_restore %r13
40468 +- mov 24(%rsp),%r12
40469 ++ pop %r12
40470 + .cfi_restore %r12
40471 +- mov 32(%rsp),%rbp
40472 +-.cfi_restore %rbp
40473 +- mov 40(%rsp),%rbx
40474 ++ pop %rbx
40475 + .cfi_restore %rbx
40476 +- lea 48(%rsp),%rax
40477 +- lea 48(%rsp),%rsp
40478 +-.cfi_adjust_cfa_offset -48
40479 ++ pop %rbp
40480 ++.cfi_restore %rbp
40481 + .Lbase2_64_avx_epilogue:
40482 + jmp .Ldo_avx
40483 + .cfi_endproc
40484 +@@ -768,8 +855,11 @@ poly1305_blocks_avx:
40485 + .Ldo_avx:
40486 + ___
40487 + $code.=<<___ if (!$win64);
40488 ++ lea 8(%rsp),%r10
40489 ++.cfi_def_cfa_register %r10
40490 ++ and \$-32,%rsp
40491 ++ sub \$-8,%rsp
40492 + lea -0x58(%rsp),%r11
40493 +-.cfi_def_cfa %r11,0x60
40494 + sub \$0x178,%rsp
40495 + ___
40496 + $code.=<<___ if ($win64);
40497 +@@ -1361,18 +1451,18 @@ $code.=<<___ if ($win64);
40498 + .Ldo_avx_epilogue:
40499 + ___
40500 + $code.=<<___ if (!$win64);
40501 +- lea 0x58(%r11),%rsp
40502 +-.cfi_def_cfa %rsp,8
40503 ++ lea -8(%r10),%rsp
40504 ++.cfi_def_cfa_register %rsp
40505 + ___
40506 + $code.=<<___;
40507 + vzeroupper
40508 + ret
40509 + .cfi_endproc
40510 +-.size poly1305_blocks_avx,.-poly1305_blocks_avx
40511 ++___
40512 ++&end_function("poly1305_blocks_avx");
40513 +
40514 +-.type poly1305_emit_avx,\@function,3
40515 +-.align 32
40516 +-poly1305_emit_avx:
40517 ++&declare_function("poly1305_emit_avx", 32, 3);
40518 ++$code.=<<___;
40519 + cmpl \$0,20($ctx) # is_base2_26?
40520 + je .Lemit
40521 +
40522 +@@ -1423,41 +1513,51 @@ poly1305_emit_avx:
40523 + mov %rcx,8($mac)
40524 +
40525 + ret
40526 +-.size poly1305_emit_avx,.-poly1305_emit_avx
40527 + ___
40528 ++&end_function("poly1305_emit_avx");
40529 ++
40530 ++if ($kernel) {
40531 ++ $code .= "#endif\n";
40532 ++}
40533 +
40534 + if ($avx>1) {
40535 ++
40536 ++if ($kernel) {
40537 ++ $code .= "#ifdef CONFIG_AS_AVX2\n";
40538 ++}
40539 ++
40540 + my ($H0,$H1,$H2,$H3,$H4, $MASK, $T4,$T0,$T1,$T2,$T3, $D0,$D1,$D2,$D3,$D4) =
40541 + map("%ymm$_",(0..15));
40542 + my $S4=$MASK;
40543 +
40544 ++sub poly1305_blocks_avxN {
40545 ++ my ($avx512) = @_;
40546 ++ my $suffix = $avx512 ? "_avx512" : "";
40547 + $code.=<<___;
40548 +-.type poly1305_blocks_avx2,\@function,4
40549 +-.align 32
40550 +-poly1305_blocks_avx2:
40551 + .cfi_startproc
40552 + mov 20($ctx),%r8d # is_base2_26
40553 + cmp \$128,$len
40554 +- jae .Lblocks_avx2
40555 ++ jae .Lblocks_avx2$suffix
40556 + test %r8d,%r8d
40557 + jz .Lblocks
40558 +
40559 +-.Lblocks_avx2:
40560 ++.Lblocks_avx2$suffix:
40561 + and \$-16,$len
40562 +- jz .Lno_data_avx2
40563 ++ jz .Lno_data_avx2$suffix
40564 +
40565 + vzeroupper
40566 +
40567 + test %r8d,%r8d
40568 +- jz .Lbase2_64_avx2
40569 ++ jz .Lbase2_64_avx2$suffix
40570 +
40571 + test \$63,$len
40572 +- jz .Leven_avx2
40573 ++ jz .Leven_avx2$suffix
40574 +
40575 +- push %rbx
40576 +-.cfi_push %rbx
40577 + push %rbp
40578 + .cfi_push %rbp
40579 ++ mov %rsp,%rbp
40580 ++ push %rbx
40581 ++.cfi_push %rbx
40582 + push %r12
40583 + .cfi_push %r12
40584 + push %r13
40585 +@@ -1466,7 +1566,7 @@ poly1305_blocks_avx2:
40586 + .cfi_push %r14
40587 + push %r15
40588 + .cfi_push %r15
40589 +-.Lblocks_avx2_body:
40590 ++.Lblocks_avx2_body$suffix:
40591 +
40592 + mov $len,%r15 # reassign $len
40593 +
40594 +@@ -1513,7 +1613,7 @@ poly1305_blocks_avx2:
40595 + shr \$2,$s1
40596 + add $r1,$s1 # s1 = r1 + (r1 >> 2)
40597 +
40598 +-.Lbase2_26_pre_avx2:
40599 ++.Lbase2_26_pre_avx2$suffix:
40600 + add 0($inp),$h0 # accumulate input
40601 + adc 8($inp),$h1
40602 + lea 16($inp),$inp
40603 +@@ -1524,10 +1624,10 @@ poly1305_blocks_avx2:
40604 + mov $r1,%rax
40605 +
40606 + test \$63,%r15
40607 +- jnz .Lbase2_26_pre_avx2
40608 ++ jnz .Lbase2_26_pre_avx2$suffix
40609 +
40610 + test $padbit,$padbit # if $padbit is zero,
40611 +- jz .Lstore_base2_64_avx2 # store hash in base 2^64 format
40612 ++ jz .Lstore_base2_64_avx2$suffix # store hash in base 2^64 format
40613 +
40614 + ################################# base 2^64 -> base 2^26
40615 + mov $h0,%rax
40616 +@@ -1548,57 +1648,56 @@ poly1305_blocks_avx2:
40617 + or $r1,$h2 # h[4]
40618 +
40619 + test %r15,%r15
40620 +- jz .Lstore_base2_26_avx2
40621 ++ jz .Lstore_base2_26_avx2$suffix
40622 +
40623 + vmovd %rax#d,%x#$H0
40624 + vmovd %rdx#d,%x#$H1
40625 + vmovd $h0#d,%x#$H2
40626 + vmovd $h1#d,%x#$H3
40627 + vmovd $h2#d,%x#$H4
40628 +- jmp .Lproceed_avx2
40629 ++ jmp .Lproceed_avx2$suffix
40630 +
40631 + .align 32
40632 +-.Lstore_base2_64_avx2:
40633 ++.Lstore_base2_64_avx2$suffix:
40634 + mov $h0,0($ctx)
40635 + mov $h1,8($ctx)
40636 + mov $h2,16($ctx) # note that is_base2_26 is zeroed
40637 +- jmp .Ldone_avx2
40638 ++ jmp .Ldone_avx2$suffix
40639 +
40640 + .align 16
40641 +-.Lstore_base2_26_avx2:
40642 ++.Lstore_base2_26_avx2$suffix:
40643 + mov %rax#d,0($ctx) # store hash value base 2^26
40644 + mov %rdx#d,4($ctx)
40645 + mov $h0#d,8($ctx)
40646 + mov $h1#d,12($ctx)
40647 + mov $h2#d,16($ctx)
40648 + .align 16
40649 +-.Ldone_avx2:
40650 +- mov 0(%rsp),%r15
40651 ++.Ldone_avx2$suffix:
40652 ++ pop %r15
40653 + .cfi_restore %r15
40654 +- mov 8(%rsp),%r14
40655 ++ pop %r14
40656 + .cfi_restore %r14
40657 +- mov 16(%rsp),%r13
40658 ++ pop %r13
40659 + .cfi_restore %r13
40660 +- mov 24(%rsp),%r12
40661 ++ pop %r12
40662 + .cfi_restore %r12
40663 +- mov 32(%rsp),%rbp
40664 +-.cfi_restore %rbp
40665 +- mov 40(%rsp),%rbx
40666 ++ pop %rbx
40667 + .cfi_restore %rbx
40668 +- lea 48(%rsp),%rsp
40669 +-.cfi_adjust_cfa_offset -48
40670 +-.Lno_data_avx2:
40671 +-.Lblocks_avx2_epilogue:
40672 ++ pop %rbp
40673 ++.cfi_restore %rbp
40674 ++.Lno_data_avx2$suffix:
40675 ++.Lblocks_avx2_epilogue$suffix:
40676 + ret
40677 + .cfi_endproc
40678 +
40679 + .align 32
40680 +-.Lbase2_64_avx2:
40681 ++.Lbase2_64_avx2$suffix:
40682 + .cfi_startproc
40683 +- push %rbx
40684 +-.cfi_push %rbx
40685 + push %rbp
40686 + .cfi_push %rbp
40687 ++ mov %rsp,%rbp
40688 ++ push %rbx
40689 ++.cfi_push %rbx
40690 + push %r12
40691 + .cfi_push %r12
40692 + push %r13
40693 +@@ -1607,7 +1706,7 @@ poly1305_blocks_avx2:
40694 + .cfi_push %r14
40695 + push %r15
40696 + .cfi_push %r15
40697 +-.Lbase2_64_avx2_body:
40698 ++.Lbase2_64_avx2_body$suffix:
40699 +
40700 + mov $len,%r15 # reassign $len
40701 +
40702 +@@ -1624,9 +1723,9 @@ poly1305_blocks_avx2:
40703 + add $r1,$s1 # s1 = r1 + (r1 >> 2)
40704 +
40705 + test \$63,$len
40706 +- jz .Linit_avx2
40707 ++ jz .Linit_avx2$suffix
40708 +
40709 +-.Lbase2_64_pre_avx2:
40710 ++.Lbase2_64_pre_avx2$suffix:
40711 + add 0($inp),$h0 # accumulate input
40712 + adc 8($inp),$h1
40713 + lea 16($inp),$inp
40714 +@@ -1637,9 +1736,9 @@ poly1305_blocks_avx2:
40715 + mov $r1,%rax
40716 +
40717 + test \$63,%r15
40718 +- jnz .Lbase2_64_pre_avx2
40719 ++ jnz .Lbase2_64_pre_avx2$suffix
40720 +
40721 +-.Linit_avx2:
40722 ++.Linit_avx2$suffix:
40723 + ################################# base 2^64 -> base 2^26
40724 + mov $h0,%rax
40725 + mov $h0,%rdx
40726 +@@ -1667,69 +1766,77 @@ poly1305_blocks_avx2:
40727 +
40728 + call __poly1305_init_avx
40729 +
40730 +-.Lproceed_avx2:
40731 ++.Lproceed_avx2$suffix:
40732 + mov %r15,$len # restore $len
40733 +- mov OPENSSL_ia32cap_P+8(%rip),%r10d
40734 ++___
40735 ++$code.=<<___ if (!$kernel);
40736 ++ mov OPENSSL_ia32cap_P+8(%rip),%r9d
40737 + mov \$`(1<<31|1<<30|1<<16)`,%r11d
40738 +-
40739 +- mov 0(%rsp),%r15
40740 ++___
40741 ++$code.=<<___;
40742 ++ pop %r15
40743 + .cfi_restore %r15
40744 +- mov 8(%rsp),%r14
40745 ++ pop %r14
40746 + .cfi_restore %r14
40747 +- mov 16(%rsp),%r13
40748 ++ pop %r13
40749 + .cfi_restore %r13
40750 +- mov 24(%rsp),%r12
40751 ++ pop %r12
40752 + .cfi_restore %r12
40753 +- mov 32(%rsp),%rbp
40754 +-.cfi_restore %rbp
40755 +- mov 40(%rsp),%rbx
40756 ++ pop %rbx
40757 + .cfi_restore %rbx
40758 +- lea 48(%rsp),%rax
40759 +- lea 48(%rsp),%rsp
40760 +-.cfi_adjust_cfa_offset -48
40761 +-.Lbase2_64_avx2_epilogue:
40762 +- jmp .Ldo_avx2
40763 ++ pop %rbp
40764 ++.cfi_restore %rbp
40765 ++.Lbase2_64_avx2_epilogue$suffix:
40766 ++ jmp .Ldo_avx2$suffix
40767 + .cfi_endproc
40768 +
40769 + .align 32
40770 +-.Leven_avx2:
40771 ++.Leven_avx2$suffix:
40772 + .cfi_startproc
40773 +- mov OPENSSL_ia32cap_P+8(%rip),%r10d
40774 ++___
40775 ++$code.=<<___ if (!$kernel);
40776 ++ mov OPENSSL_ia32cap_P+8(%rip),%r9d
40777 ++___
40778 ++$code.=<<___;
40779 + vmovd 4*0($ctx),%x#$H0 # load hash value base 2^26
40780 + vmovd 4*1($ctx),%x#$H1
40781 + vmovd 4*2($ctx),%x#$H2
40782 + vmovd 4*3($ctx),%x#$H3
40783 + vmovd 4*4($ctx),%x#$H4
40784 +
40785 +-.Ldo_avx2:
40786 ++.Ldo_avx2$suffix:
40787 + ___
40788 +-$code.=<<___ if ($avx>2);
40789 ++$code.=<<___ if (!$kernel && $avx>2);
40790 + cmp \$512,$len
40791 + jb .Lskip_avx512
40792 +- and %r11d,%r10d
40793 +- test \$`1<<16`,%r10d # check for AVX512F
40794 ++ and %r11d,%r9d
40795 ++ test \$`1<<16`,%r9d # check for AVX512F
40796 + jnz .Lblocks_avx512
40797 +-.Lskip_avx512:
40798 ++.Lskip_avx512$suffix:
40799 ++___
40800 ++$code.=<<___ if ($avx > 2 && $avx512 && $kernel);
40801 ++ cmp \$512,$len
40802 ++ jae .Lblocks_avx512
40803 + ___
40804 + $code.=<<___ if (!$win64);
40805 +- lea -8(%rsp),%r11
40806 +-.cfi_def_cfa %r11,16
40807 ++ lea 8(%rsp),%r10
40808 ++.cfi_def_cfa_register %r10
40809 + sub \$0x128,%rsp
40810 + ___
40811 + $code.=<<___ if ($win64);
40812 +- lea -0xf8(%rsp),%r11
40813 ++ lea 8(%rsp),%r10
40814 + sub \$0x1c8,%rsp
40815 +- vmovdqa %xmm6,0x50(%r11)
40816 +- vmovdqa %xmm7,0x60(%r11)
40817 +- vmovdqa %xmm8,0x70(%r11)
40818 +- vmovdqa %xmm9,0x80(%r11)
40819 +- vmovdqa %xmm10,0x90(%r11)
40820 +- vmovdqa %xmm11,0xa0(%r11)
40821 +- vmovdqa %xmm12,0xb0(%r11)
40822 +- vmovdqa %xmm13,0xc0(%r11)
40823 +- vmovdqa %xmm14,0xd0(%r11)
40824 +- vmovdqa %xmm15,0xe0(%r11)
40825 +-.Ldo_avx2_body:
40826 ++ vmovdqa %xmm6,-0xb0(%r10)
40827 ++ vmovdqa %xmm7,-0xa0(%r10)
40828 ++ vmovdqa %xmm8,-0x90(%r10)
40829 ++ vmovdqa %xmm9,-0x80(%r10)
40830 ++ vmovdqa %xmm10,-0x70(%r10)
40831 ++ vmovdqa %xmm11,-0x60(%r10)
40832 ++ vmovdqa %xmm12,-0x50(%r10)
40833 ++ vmovdqa %xmm13,-0x40(%r10)
40834 ++ vmovdqa %xmm14,-0x30(%r10)
40835 ++ vmovdqa %xmm15,-0x20(%r10)
40836 ++.Ldo_avx2_body$suffix:
40837 + ___
40838 + $code.=<<___;
40839 + lea .Lconst(%rip),%rcx
40840 +@@ -1794,11 +1901,11 @@ $code.=<<___;
40841 +
40842 + vpaddq $H2,$T2,$H2 # accumulate input
40843 + sub \$64,$len
40844 +- jz .Ltail_avx2
40845 +- jmp .Loop_avx2
40846 ++ jz .Ltail_avx2$suffix
40847 ++ jmp .Loop_avx2$suffix
40848 +
40849 + .align 32
40850 +-.Loop_avx2:
40851 ++.Loop_avx2$suffix:
40852 + ################################################################
40853 + # ((inp[0]*r^4+inp[4])*r^4+inp[ 8])*r^4
40854 + # ((inp[1]*r^4+inp[5])*r^4+inp[ 9])*r^3
40855 +@@ -1946,10 +2053,10 @@ $code.=<<___;
40856 + vpor 32(%rcx),$T4,$T4 # padbit, yes, always
40857 +
40858 + sub \$64,$len
40859 +- jnz .Loop_avx2
40860 ++ jnz .Loop_avx2$suffix
40861 +
40862 + .byte 0x66,0x90
40863 +-.Ltail_avx2:
40864 ++.Ltail_avx2$suffix:
40865 + ################################################################
40866 + # while above multiplications were by r^4 in all lanes, in last
40867 + # iteration we multiply least significant lane by r^4 and most
40868 +@@ -2087,37 +2194,29 @@ $code.=<<___;
40869 + vmovd %x#$H4,`4*4-48-64`($ctx)
40870 + ___
40871 + $code.=<<___ if ($win64);
40872 +- vmovdqa 0x50(%r11),%xmm6
40873 +- vmovdqa 0x60(%r11),%xmm7
40874 +- vmovdqa 0x70(%r11),%xmm8
40875 +- vmovdqa 0x80(%r11),%xmm9
40876 +- vmovdqa 0x90(%r11),%xmm10
40877 +- vmovdqa 0xa0(%r11),%xmm11
40878 +- vmovdqa 0xb0(%r11),%xmm12
40879 +- vmovdqa 0xc0(%r11),%xmm13
40880 +- vmovdqa 0xd0(%r11),%xmm14
40881 +- vmovdqa 0xe0(%r11),%xmm15
40882 +- lea 0xf8(%r11),%rsp
40883 +-.Ldo_avx2_epilogue:
40884 ++ vmovdqa -0xb0(%r10),%xmm6
40885 ++ vmovdqa -0xa0(%r10),%xmm7
40886 ++ vmovdqa -0x90(%r10),%xmm8
40887 ++ vmovdqa -0x80(%r10),%xmm9
40888 ++ vmovdqa -0x70(%r10),%xmm10
40889 ++ vmovdqa -0x60(%r10),%xmm11
40890 ++ vmovdqa -0x50(%r10),%xmm12
40891 ++ vmovdqa -0x40(%r10),%xmm13
40892 ++ vmovdqa -0x30(%r10),%xmm14
40893 ++ vmovdqa -0x20(%r10),%xmm15
40894 ++ lea -8(%r10),%rsp
40895 ++.Ldo_avx2_epilogue$suffix:
40896 + ___
40897 + $code.=<<___ if (!$win64);
40898 +- lea 8(%r11),%rsp
40899 +-.cfi_def_cfa %rsp,8
40900 ++ lea -8(%r10),%rsp
40901 ++.cfi_def_cfa_register %rsp
40902 + ___
40903 + $code.=<<___;
40904 + vzeroupper
40905 + ret
40906 + .cfi_endproc
40907 +-.size poly1305_blocks_avx2,.-poly1305_blocks_avx2
40908 + ___
40909 +-#######################################################################
40910 +-if ($avx>2) {
40911 +-# On entry we have input length divisible by 64. But since inner loop
40912 +-# processes 128 bytes per iteration, cases when length is not divisible
40913 +-# by 128 are handled by passing tail 64 bytes to .Ltail_avx2. For this
40914 +-# reason stack layout is kept identical to poly1305_blocks_avx2. If not
40915 +-# for this tail, we wouldn't have to even allocate stack frame...
40916 +-
40917 ++if($avx > 2 && $avx512) {
40918 + my ($R0,$R1,$R2,$R3,$R4, $S1,$S2,$S3,$S4) = map("%zmm$_",(16..24));
40919 + my ($M0,$M1,$M2,$M3,$M4) = map("%zmm$_",(25..29));
40920 + my $PADBIT="%zmm30";
40921 +@@ -2128,32 +2227,29 @@ map(s/%y/%z/,($H0,$H1,$H2,$H3,$H4));
40922 + map(s/%y/%z/,($MASK));
40923 +
40924 + $code.=<<___;
40925 +-.type poly1305_blocks_avx512,\@function,4
40926 +-.align 32
40927 +-poly1305_blocks_avx512:
40928 + .cfi_startproc
40929 + .Lblocks_avx512:
40930 + mov \$15,%eax
40931 + kmovw %eax,%k2
40932 + ___
40933 + $code.=<<___ if (!$win64);
40934 +- lea -8(%rsp),%r11
40935 +-.cfi_def_cfa %r11,16
40936 ++ lea 8(%rsp),%r10
40937 ++.cfi_def_cfa_register %r10
40938 + sub \$0x128,%rsp
40939 + ___
40940 + $code.=<<___ if ($win64);
40941 +- lea -0xf8(%rsp),%r11
40942 ++ lea 8(%rsp),%r10
40943 + sub \$0x1c8,%rsp
40944 +- vmovdqa %xmm6,0x50(%r11)
40945 +- vmovdqa %xmm7,0x60(%r11)
40946 +- vmovdqa %xmm8,0x70(%r11)
40947 +- vmovdqa %xmm9,0x80(%r11)
40948 +- vmovdqa %xmm10,0x90(%r11)
40949 +- vmovdqa %xmm11,0xa0(%r11)
40950 +- vmovdqa %xmm12,0xb0(%r11)
40951 +- vmovdqa %xmm13,0xc0(%r11)
40952 +- vmovdqa %xmm14,0xd0(%r11)
40953 +- vmovdqa %xmm15,0xe0(%r11)
40954 ++ vmovdqa %xmm6,-0xb0(%r10)
40955 ++ vmovdqa %xmm7,-0xa0(%r10)
40956 ++ vmovdqa %xmm8,-0x90(%r10)
40957 ++ vmovdqa %xmm9,-0x80(%r10)
40958 ++ vmovdqa %xmm10,-0x70(%r10)
40959 ++ vmovdqa %xmm11,-0x60(%r10)
40960 ++ vmovdqa %xmm12,-0x50(%r10)
40961 ++ vmovdqa %xmm13,-0x40(%r10)
40962 ++ vmovdqa %xmm14,-0x30(%r10)
40963 ++ vmovdqa %xmm15,-0x20(%r10)
40964 + .Ldo_avx512_body:
40965 + ___
40966 + $code.=<<___;
40967 +@@ -2679,7 +2775,7 @@ $code.=<<___;
40968 +
40969 + lea 0x90(%rsp),%rax # size optimization for .Ltail_avx2
40970 + add \$64,$len
40971 +- jnz .Ltail_avx2
40972 ++ jnz .Ltail_avx2$suffix
40973 +
40974 + vpsubq $T2,$H2,$H2 # undo input accumulation
40975 + vmovd %x#$H0,`4*0-48-64`($ctx)# save partially reduced
40976 +@@ -2690,29 +2786,61 @@ $code.=<<___;
40977 + vzeroall
40978 + ___
40979 + $code.=<<___ if ($win64);
40980 +- movdqa 0x50(%r11),%xmm6
40981 +- movdqa 0x60(%r11),%xmm7
40982 +- movdqa 0x70(%r11),%xmm8
40983 +- movdqa 0x80(%r11),%xmm9
40984 +- movdqa 0x90(%r11),%xmm10
40985 +- movdqa 0xa0(%r11),%xmm11
40986 +- movdqa 0xb0(%r11),%xmm12
40987 +- movdqa 0xc0(%r11),%xmm13
40988 +- movdqa 0xd0(%r11),%xmm14
40989 +- movdqa 0xe0(%r11),%xmm15
40990 +- lea 0xf8(%r11),%rsp
40991 ++ movdqa -0xb0(%r10),%xmm6
40992 ++ movdqa -0xa0(%r10),%xmm7
40993 ++ movdqa -0x90(%r10),%xmm8
40994 ++ movdqa -0x80(%r10),%xmm9
40995 ++ movdqa -0x70(%r10),%xmm10
40996 ++ movdqa -0x60(%r10),%xmm11
40997 ++ movdqa -0x50(%r10),%xmm12
40998 ++ movdqa -0x40(%r10),%xmm13
40999 ++ movdqa -0x30(%r10),%xmm14
41000 ++ movdqa -0x20(%r10),%xmm15
41001 ++ lea -8(%r10),%rsp
41002 + .Ldo_avx512_epilogue:
41003 + ___
41004 + $code.=<<___ if (!$win64);
41005 +- lea 8(%r11),%rsp
41006 +-.cfi_def_cfa %rsp,8
41007 ++ lea -8(%r10),%rsp
41008 ++.cfi_def_cfa_register %rsp
41009 + ___
41010 + $code.=<<___;
41011 + ret
41012 + .cfi_endproc
41013 +-.size poly1305_blocks_avx512,.-poly1305_blocks_avx512
41014 + ___
41015 +-if ($avx>3) {
41016 ++
41017 ++}
41018 ++
41019 ++}
41020 ++
41021 ++&declare_function("poly1305_blocks_avx2", 32, 4);
41022 ++poly1305_blocks_avxN(0);
41023 ++&end_function("poly1305_blocks_avx2");
41024 ++
41025 ++if($kernel) {
41026 ++ $code .= "#endif\n";
41027 ++}
41028 ++
41029 ++#######################################################################
41030 ++if ($avx>2) {
41031 ++# On entry we have input length divisible by 64. But since inner loop
41032 ++# processes 128 bytes per iteration, cases when length is not divisible
41033 ++# by 128 are handled by passing tail 64 bytes to .Ltail_avx2. For this
41034 ++# reason stack layout is kept identical to poly1305_blocks_avx2. If not
41035 ++# for this tail, we wouldn't have to even allocate stack frame...
41036 ++
41037 ++if($kernel) {
41038 ++ $code .= "#ifdef CONFIG_AS_AVX512\n";
41039 ++}
41040 ++
41041 ++&declare_function("poly1305_blocks_avx512", 32, 4);
41042 ++poly1305_blocks_avxN(1);
41043 ++&end_function("poly1305_blocks_avx512");
41044 ++
41045 ++if ($kernel) {
41046 ++ $code .= "#endif\n";
41047 ++}
41048 ++
41049 ++if (!$kernel && $avx>3) {
41050 + ########################################################################
41051 + # VPMADD52 version using 2^44 radix.
41052 + #
41053 +@@ -3753,45 +3881,9 @@ poly1305_emit_base2_44:
41054 + .size poly1305_emit_base2_44,.-poly1305_emit_base2_44
41055 + ___
41056 + } } }
41057 +-$code.=<<___;
41058 +-.align 64
41059 +-.Lconst:
41060 +-.Lmask24:
41061 +-.long 0x0ffffff,0,0x0ffffff,0,0x0ffffff,0,0x0ffffff,0
41062 +-.L129:
41063 +-.long `1<<24`,0,`1<<24`,0,`1<<24`,0,`1<<24`,0
41064 +-.Lmask26:
41065 +-.long 0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,0
41066 +-.Lpermd_avx2:
41067 +-.long 2,2,2,3,2,0,2,1
41068 +-.Lpermd_avx512:
41069 +-.long 0,0,0,1, 0,2,0,3, 0,4,0,5, 0,6,0,7
41070 +-
41071 +-.L2_44_inp_permd:
41072 +-.long 0,1,1,2,2,3,7,7
41073 +-.L2_44_inp_shift:
41074 +-.quad 0,12,24,64
41075 +-.L2_44_mask:
41076 +-.quad 0xfffffffffff,0xfffffffffff,0x3ffffffffff,0xffffffffffffffff
41077 +-.L2_44_shift_rgt:
41078 +-.quad 44,44,42,64
41079 +-.L2_44_shift_lft:
41080 +-.quad 8,8,10,64
41081 +-
41082 +-.align 64
41083 +-.Lx_mask44:
41084 +-.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff
41085 +-.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff
41086 +-.Lx_mask42:
41087 +-.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff
41088 +-.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff
41089 +-___
41090 + }
41091 +-$code.=<<___;
41092 +-.asciz "Poly1305 for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
41093 +-.align 16
41094 +-___
41095 +
41096 ++if (!$kernel)
41097 + { # chacha20-poly1305 helpers
41098 + my ($out,$inp,$otp,$len)=$win64 ? ("%rcx","%rdx","%r8", "%r9") : # Win64 order
41099 + ("%rdi","%rsi","%rdx","%rcx"); # Unix order
41100 +@@ -4038,17 +4130,17 @@ avx_handler:
41101 +
41102 + .section .pdata
41103 + .align 4
41104 +- .rva .LSEH_begin_poly1305_init
41105 +- .rva .LSEH_end_poly1305_init
41106 +- .rva .LSEH_info_poly1305_init
41107 ++ .rva .LSEH_begin_poly1305_init_x86_64
41108 ++ .rva .LSEH_end_poly1305_init_x86_64
41109 ++ .rva .LSEH_info_poly1305_init_x86_64
41110 +
41111 +- .rva .LSEH_begin_poly1305_blocks
41112 +- .rva .LSEH_end_poly1305_blocks
41113 +- .rva .LSEH_info_poly1305_blocks
41114 ++ .rva .LSEH_begin_poly1305_blocks_x86_64
41115 ++ .rva .LSEH_end_poly1305_blocks_x86_64
41116 ++ .rva .LSEH_info_poly1305_blocks_x86_64
41117 +
41118 +- .rva .LSEH_begin_poly1305_emit
41119 +- .rva .LSEH_end_poly1305_emit
41120 +- .rva .LSEH_info_poly1305_emit
41121 ++ .rva .LSEH_begin_poly1305_emit_x86_64
41122 ++ .rva .LSEH_end_poly1305_emit_x86_64
41123 ++ .rva .LSEH_info_poly1305_emit_x86_64
41124 + ___
41125 + $code.=<<___ if ($avx);
41126 + .rva .LSEH_begin_poly1305_blocks_avx
41127 +@@ -4088,20 +4180,20 @@ ___
41128 + $code.=<<___;
41129 + .section .xdata
41130 + .align 8
41131 +-.LSEH_info_poly1305_init:
41132 ++.LSEH_info_poly1305_init_x86_64:
41133 + .byte 9,0,0,0
41134 + .rva se_handler
41135 +- .rva .LSEH_begin_poly1305_init,.LSEH_begin_poly1305_init
41136 ++ .rva .LSEH_begin_poly1305_init_x86_64,.LSEH_begin_poly1305_init_x86_64
41137 +
41138 +-.LSEH_info_poly1305_blocks:
41139 ++.LSEH_info_poly1305_blocks_x86_64:
41140 + .byte 9,0,0,0
41141 + .rva se_handler
41142 + .rva .Lblocks_body,.Lblocks_epilogue
41143 +
41144 +-.LSEH_info_poly1305_emit:
41145 ++.LSEH_info_poly1305_emit_x86_64:
41146 + .byte 9,0,0,0
41147 + .rva se_handler
41148 +- .rva .LSEH_begin_poly1305_emit,.LSEH_begin_poly1305_emit
41149 ++ .rva .LSEH_begin_poly1305_emit_x86_64,.LSEH_begin_poly1305_emit_x86_64
41150 + ___
41151 + $code.=<<___ if ($avx);
41152 + .LSEH_info_poly1305_blocks_avx_1:
41153 +@@ -4148,12 +4240,26 @@ $code.=<<___ if ($avx>2);
41154 + ___
41155 + }
41156 +
41157 ++open SELF,$0;
41158 ++while(<SELF>) {
41159 ++ next if (/^#!/);
41160 ++ last if (!s/^#/\/\// and !/^$/);
41161 ++ print;
41162 ++}
41163 ++close SELF;
41164 ++
41165 + foreach (split('\n',$code)) {
41166 + s/\`([^\`]*)\`/eval($1)/ge;
41167 + s/%r([a-z]+)#d/%e$1/g;
41168 + s/%r([0-9]+)#d/%r$1d/g;
41169 + s/%x#%[yz]/%x/g or s/%y#%z/%y/g or s/%z#%[yz]/%z/g;
41170 +
41171 ++ if ($kernel) {
41172 ++ s/(^\.type.*),[0-9]+$/\1/;
41173 ++ s/(^\.type.*),\@abi-omnipotent+$/\1,\@function/;
41174 ++ next if /^\.cfi.*/;
41175 ++ }
41176 ++
41177 + print $_,"\n";
41178 + }
41179 + close STDOUT;
41180 +diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
41181 +index edb7113e36f3..657363588e0c 100644
41182 +--- a/arch/x86/crypto/poly1305_glue.c
41183 ++++ b/arch/x86/crypto/poly1305_glue.c
41184 +@@ -1,8 +1,6 @@
41185 +-// SPDX-License-Identifier: GPL-2.0-or-later
41186 ++// SPDX-License-Identifier: GPL-2.0 OR MIT
41187 + /*
41188 +- * Poly1305 authenticator algorithm, RFC7539, SIMD glue code
41189 +- *
41190 +- * Copyright (C) 2015 Martin Willi
41191 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
41192 + */
41193 +
41194 + #include <crypto/algapi.h>
41195 +@@ -13,279 +11,170 @@
41196 + #include <linux/jump_label.h>
41197 + #include <linux/kernel.h>
41198 + #include <linux/module.h>
41199 ++#include <asm/intel-family.h>
41200 + #include <asm/simd.h>
41201 +
41202 +-asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src,
41203 +- const u32 *r, unsigned int blocks);
41204 +-asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r,
41205 +- unsigned int blocks, const u32 *u);
41206 +-asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r,
41207 +- unsigned int blocks, const u32 *u);
41208 +-
41209 +-static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_simd);
41210 ++asmlinkage void poly1305_init_x86_64(void *ctx,
41211 ++ const u8 key[POLY1305_KEY_SIZE]);
41212 ++asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp,
41213 ++ const size_t len, const u32 padbit);
41214 ++asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
41215 ++ const u32 nonce[4]);
41216 ++asmlinkage void poly1305_emit_avx(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
41217 ++ const u32 nonce[4]);
41218 ++asmlinkage void poly1305_blocks_avx(void *ctx, const u8 *inp, const size_t len,
41219 ++ const u32 padbit);
41220 ++asmlinkage void poly1305_blocks_avx2(void *ctx, const u8 *inp, const size_t len,
41221 ++ const u32 padbit);
41222 ++asmlinkage void poly1305_blocks_avx512(void *ctx, const u8 *inp,
41223 ++ const size_t len, const u32 padbit);
41224 ++
41225 ++static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx);
41226 + static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2);
41227 ++static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx512);
41228 ++
41229 ++struct poly1305_arch_internal {
41230 ++ union {
41231 ++ struct {
41232 ++ u32 h[5];
41233 ++ u32 is_base2_26;
41234 ++ };
41235 ++ u64 hs[3];
41236 ++ };
41237 ++ u64 r[2];
41238 ++ u64 pad;
41239 ++ struct { u32 r2, r1, r4, r3; } rn[9];
41240 ++};
41241 +
41242 +-static inline u64 mlt(u64 a, u64 b)
41243 ++/* The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit
41244 ++ * the unfortunate situation of using AVX and then having to go back to scalar
41245 ++ * -- because the user is silly and has called the update function from two
41246 ++ * separate contexts -- then we need to convert back to the original base before
41247 ++ * proceeding. It is possible to reason that the initial reduction below is
41248 ++ * sufficient given the implementation invariants. However, for an avoidance of
41249 ++ * doubt and because this is not performance critical, we do the full reduction
41250 ++ * anyway. Z3 proof of below function: https://xn--4db.cc/ltPtHCKN/py
41251 ++ */
41252 ++static void convert_to_base2_64(void *ctx)
41253 + {
41254 +- return a * b;
41255 +-}
41256 ++ struct poly1305_arch_internal *state = ctx;
41257 ++ u32 cy;
41258 +
41259 +-static inline u32 sr(u64 v, u_char n)
41260 +-{
41261 +- return v >> n;
41262 +-}
41263 ++ if (!state->is_base2_26)
41264 ++ return;
41265 +
41266 +-static inline u32 and(u32 v, u32 mask)
41267 +-{
41268 +- return v & mask;
41269 ++ cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy;
41270 ++ cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy;
41271 ++ cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy;
41272 ++ cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy;
41273 ++ state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0];
41274 ++ state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12);
41275 ++ state->hs[2] = state->h[4] >> 24;
41276 ++#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1))
41277 ++ cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL);
41278 ++ state->hs[2] &= 3;
41279 ++ state->hs[0] += cy;
41280 ++ state->hs[1] += (cy = ULT(state->hs[0], cy));
41281 ++ state->hs[2] += ULT(state->hs[1], cy);
41282 ++#undef ULT
41283 ++ state->is_base2_26 = 0;
41284 + }
41285 +
41286 +-static void poly1305_simd_mult(u32 *a, const u32 *b)
41287 ++static void poly1305_simd_init(void *ctx, const u8 key[POLY1305_KEY_SIZE])
41288 + {
41289 +- u8 m[POLY1305_BLOCK_SIZE];
41290 +-
41291 +- memset(m, 0, sizeof(m));
41292 +- /* The poly1305 block function adds a hi-bit to the accumulator which
41293 +- * we don't need for key multiplication; compensate for it. */
41294 +- a[4] -= 1 << 24;
41295 +- poly1305_block_sse2(a, m, b, 1);
41296 ++ poly1305_init_x86_64(ctx, key);
41297 + }
41298 +
41299 +-static void poly1305_integer_setkey(struct poly1305_key *key, const u8 *raw_key)
41300 ++static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
41301 ++ const u32 padbit)
41302 + {
41303 +- /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
41304 +- key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff;
41305 +- key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03;
41306 +- key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff;
41307 +- key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff;
41308 +- key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff;
41309 +-}
41310 ++ struct poly1305_arch_internal *state = ctx;
41311 +
41312 +-static void poly1305_integer_blocks(struct poly1305_state *state,
41313 +- const struct poly1305_key *key,
41314 +- const void *src,
41315 +- unsigned int nblocks, u32 hibit)
41316 +-{
41317 +- u32 r0, r1, r2, r3, r4;
41318 +- u32 s1, s2, s3, s4;
41319 +- u32 h0, h1, h2, h3, h4;
41320 +- u64 d0, d1, d2, d3, d4;
41321 ++ /* SIMD disables preemption, so relax after processing each page. */
41322 ++ BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE ||
41323 ++ PAGE_SIZE % POLY1305_BLOCK_SIZE);
41324 +
41325 +- if (!nblocks)
41326 ++ if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx) ||
41327 ++ (len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) ||
41328 ++ !crypto_simd_usable()) {
41329 ++ convert_to_base2_64(ctx);
41330 ++ poly1305_blocks_x86_64(ctx, inp, len, padbit);
41331 + return;
41332 ++ }
41333 +
41334 +- r0 = key->r[0];
41335 +- r1 = key->r[1];
41336 +- r2 = key->r[2];
41337 +- r3 = key->r[3];
41338 +- r4 = key->r[4];
41339 +-
41340 +- s1 = r1 * 5;
41341 +- s2 = r2 * 5;
41342 +- s3 = r3 * 5;
41343 +- s4 = r4 * 5;
41344 +-
41345 +- h0 = state->h[0];
41346 +- h1 = state->h[1];
41347 +- h2 = state->h[2];
41348 +- h3 = state->h[3];
41349 +- h4 = state->h[4];
41350 +-
41351 +- do {
41352 +- /* h += m[i] */
41353 +- h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff;
41354 +- h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff;
41355 +- h2 += (get_unaligned_le32(src + 6) >> 4) & 0x3ffffff;
41356 +- h3 += (get_unaligned_le32(src + 9) >> 6) & 0x3ffffff;
41357 +- h4 += (get_unaligned_le32(src + 12) >> 8) | (hibit << 24);
41358 +-
41359 +- /* h *= r */
41360 +- d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) +
41361 +- mlt(h3, s2) + mlt(h4, s1);
41362 +- d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) +
41363 +- mlt(h3, s3) + mlt(h4, s2);
41364 +- d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) +
41365 +- mlt(h3, s4) + mlt(h4, s3);
41366 +- d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) +
41367 +- mlt(h3, r0) + mlt(h4, s4);
41368 +- d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) +
41369 +- mlt(h3, r1) + mlt(h4, r0);
41370 +-
41371 +- /* (partial) h %= p */
41372 +- d1 += sr(d0, 26); h0 = and(d0, 0x3ffffff);
41373 +- d2 += sr(d1, 26); h1 = and(d1, 0x3ffffff);
41374 +- d3 += sr(d2, 26); h2 = and(d2, 0x3ffffff);
41375 +- d4 += sr(d3, 26); h3 = and(d3, 0x3ffffff);
41376 +- h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff);
41377 +- h1 += h0 >> 26; h0 = h0 & 0x3ffffff;
41378 +-
41379 +- src += POLY1305_BLOCK_SIZE;
41380 +- } while (--nblocks);
41381 +-
41382 +- state->h[0] = h0;
41383 +- state->h[1] = h1;
41384 +- state->h[2] = h2;
41385 +- state->h[3] = h3;
41386 +- state->h[4] = h4;
41387 ++ for (;;) {
41388 ++ const size_t bytes = min_t(size_t, len, PAGE_SIZE);
41389 ++
41390 ++ kernel_fpu_begin();
41391 ++ if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512))
41392 ++ poly1305_blocks_avx512(ctx, inp, bytes, padbit);
41393 ++ else if (IS_ENABLED(CONFIG_AS_AVX2) && static_branch_likely(&poly1305_use_avx2))
41394 ++ poly1305_blocks_avx2(ctx, inp, bytes, padbit);
41395 ++ else
41396 ++ poly1305_blocks_avx(ctx, inp, bytes, padbit);
41397 ++ kernel_fpu_end();
41398 ++ len -= bytes;
41399 ++ if (!len)
41400 ++ break;
41401 ++ inp += bytes;
41402 ++ }
41403 + }
41404 +
41405 +-static void poly1305_integer_emit(const struct poly1305_state *state, void *dst)
41406 ++static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
41407 ++ const u32 nonce[4])
41408 + {
41409 +- u32 h0, h1, h2, h3, h4;
41410 +- u32 g0, g1, g2, g3, g4;
41411 +- u32 mask;
41412 +-
41413 +- /* fully carry h */
41414 +- h0 = state->h[0];
41415 +- h1 = state->h[1];
41416 +- h2 = state->h[2];
41417 +- h3 = state->h[3];
41418 +- h4 = state->h[4];
41419 +-
41420 +- h2 += (h1 >> 26); h1 = h1 & 0x3ffffff;
41421 +- h3 += (h2 >> 26); h2 = h2 & 0x3ffffff;
41422 +- h4 += (h3 >> 26); h3 = h3 & 0x3ffffff;
41423 +- h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff;
41424 +- h1 += (h0 >> 26); h0 = h0 & 0x3ffffff;
41425 +-
41426 +- /* compute h + -p */
41427 +- g0 = h0 + 5;
41428 +- g1 = h1 + (g0 >> 26); g0 &= 0x3ffffff;
41429 +- g2 = h2 + (g1 >> 26); g1 &= 0x3ffffff;
41430 +- g3 = h3 + (g2 >> 26); g2 &= 0x3ffffff;
41431 +- g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff;
41432 +-
41433 +- /* select h if h < p, or h + -p if h >= p */
41434 +- mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
41435 +- g0 &= mask;
41436 +- g1 &= mask;
41437 +- g2 &= mask;
41438 +- g3 &= mask;
41439 +- g4 &= mask;
41440 +- mask = ~mask;
41441 +- h0 = (h0 & mask) | g0;
41442 +- h1 = (h1 & mask) | g1;
41443 +- h2 = (h2 & mask) | g2;
41444 +- h3 = (h3 & mask) | g3;
41445 +- h4 = (h4 & mask) | g4;
41446 +-
41447 +- /* h = h % (2^128) */
41448 +- put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0);
41449 +- put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4);
41450 +- put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8);
41451 +- put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12);
41452 ++ struct poly1305_arch_internal *state = ctx;
41453 ++
41454 ++ if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx) ||
41455 ++ !state->is_base2_26 || !crypto_simd_usable()) {
41456 ++ convert_to_base2_64(ctx);
41457 ++ poly1305_emit_x86_64(ctx, mac, nonce);
41458 ++ } else
41459 ++ poly1305_emit_avx(ctx, mac, nonce);
41460 + }
41461 +
41462 +-void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key)
41463 ++void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
41464 + {
41465 +- poly1305_integer_setkey(desc->opaque_r, key);
41466 +- desc->s[0] = get_unaligned_le32(key + 16);
41467 +- desc->s[1] = get_unaligned_le32(key + 20);
41468 +- desc->s[2] = get_unaligned_le32(key + 24);
41469 +- desc->s[3] = get_unaligned_le32(key + 28);
41470 +- poly1305_core_init(&desc->h);
41471 +- desc->buflen = 0;
41472 +- desc->sset = true;
41473 +- desc->rset = 1;
41474 ++ poly1305_simd_init(&dctx->h, key);
41475 ++ dctx->s[0] = get_unaligned_le32(&key[16]);
41476 ++ dctx->s[1] = get_unaligned_le32(&key[20]);
41477 ++ dctx->s[2] = get_unaligned_le32(&key[24]);
41478 ++ dctx->s[3] = get_unaligned_le32(&key[28]);
41479 ++ dctx->buflen = 0;
41480 ++ dctx->sset = true;
41481 + }
41482 +-EXPORT_SYMBOL_GPL(poly1305_init_arch);
41483 ++EXPORT_SYMBOL(poly1305_init_arch);
41484 +
41485 +-static unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
41486 +- const u8 *src, unsigned int srclen)
41487 ++static unsigned int crypto_poly1305_setdctxkey(struct poly1305_desc_ctx *dctx,
41488 ++ const u8 *inp, unsigned int len)
41489 + {
41490 +- if (!dctx->sset) {
41491 +- if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
41492 +- poly1305_integer_setkey(dctx->r, src);
41493 +- src += POLY1305_BLOCK_SIZE;
41494 +- srclen -= POLY1305_BLOCK_SIZE;
41495 ++ unsigned int acc = 0;
41496 ++ if (unlikely(!dctx->sset)) {
41497 ++ if (!dctx->rset && len >= POLY1305_BLOCK_SIZE) {
41498 ++ poly1305_simd_init(&dctx->h, inp);
41499 ++ inp += POLY1305_BLOCK_SIZE;
41500 ++ len -= POLY1305_BLOCK_SIZE;
41501 ++ acc += POLY1305_BLOCK_SIZE;
41502 + dctx->rset = 1;
41503 + }
41504 +- if (srclen >= POLY1305_BLOCK_SIZE) {
41505 +- dctx->s[0] = get_unaligned_le32(src + 0);
41506 +- dctx->s[1] = get_unaligned_le32(src + 4);
41507 +- dctx->s[2] = get_unaligned_le32(src + 8);
41508 +- dctx->s[3] = get_unaligned_le32(src + 12);
41509 +- src += POLY1305_BLOCK_SIZE;
41510 +- srclen -= POLY1305_BLOCK_SIZE;
41511 ++ if (len >= POLY1305_BLOCK_SIZE) {
41512 ++ dctx->s[0] = get_unaligned_le32(&inp[0]);
41513 ++ dctx->s[1] = get_unaligned_le32(&inp[4]);
41514 ++ dctx->s[2] = get_unaligned_le32(&inp[8]);
41515 ++ dctx->s[3] = get_unaligned_le32(&inp[12]);
41516 ++ inp += POLY1305_BLOCK_SIZE;
41517 ++ len -= POLY1305_BLOCK_SIZE;
41518 ++ acc += POLY1305_BLOCK_SIZE;
41519 + dctx->sset = true;
41520 + }
41521 + }
41522 +- return srclen;
41523 +-}
41524 +-
41525 +-static unsigned int poly1305_scalar_blocks(struct poly1305_desc_ctx *dctx,
41526 +- const u8 *src, unsigned int srclen)
41527 +-{
41528 +- unsigned int datalen;
41529 +-
41530 +- if (unlikely(!dctx->sset)) {
41531 +- datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
41532 +- src += srclen - datalen;
41533 +- srclen = datalen;
41534 +- }
41535 +- if (srclen >= POLY1305_BLOCK_SIZE) {
41536 +- poly1305_integer_blocks(&dctx->h, dctx->opaque_r, src,
41537 +- srclen / POLY1305_BLOCK_SIZE, 1);
41538 +- srclen %= POLY1305_BLOCK_SIZE;
41539 +- }
41540 +- return srclen;
41541 +-}
41542 +-
41543 +-static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
41544 +- const u8 *src, unsigned int srclen)
41545 +-{
41546 +- unsigned int blocks, datalen;
41547 +-
41548 +- if (unlikely(!dctx->sset)) {
41549 +- datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
41550 +- src += srclen - datalen;
41551 +- srclen = datalen;
41552 +- }
41553 +-
41554 +- if (IS_ENABLED(CONFIG_AS_AVX2) &&
41555 +- static_branch_likely(&poly1305_use_avx2) &&
41556 +- srclen >= POLY1305_BLOCK_SIZE * 4) {
41557 +- if (unlikely(dctx->rset < 4)) {
41558 +- if (dctx->rset < 2) {
41559 +- dctx->r[1] = dctx->r[0];
41560 +- poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
41561 +- }
41562 +- dctx->r[2] = dctx->r[1];
41563 +- poly1305_simd_mult(dctx->r[2].r, dctx->r[0].r);
41564 +- dctx->r[3] = dctx->r[2];
41565 +- poly1305_simd_mult(dctx->r[3].r, dctx->r[0].r);
41566 +- dctx->rset = 4;
41567 +- }
41568 +- blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
41569 +- poly1305_4block_avx2(dctx->h.h, src, dctx->r[0].r, blocks,
41570 +- dctx->r[1].r);
41571 +- src += POLY1305_BLOCK_SIZE * 4 * blocks;
41572 +- srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
41573 +- }
41574 +-
41575 +- if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
41576 +- if (unlikely(dctx->rset < 2)) {
41577 +- dctx->r[1] = dctx->r[0];
41578 +- poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
41579 +- dctx->rset = 2;
41580 +- }
41581 +- blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
41582 +- poly1305_2block_sse2(dctx->h.h, src, dctx->r[0].r,
41583 +- blocks, dctx->r[1].r);
41584 +- src += POLY1305_BLOCK_SIZE * 2 * blocks;
41585 +- srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
41586 +- }
41587 +- if (srclen >= POLY1305_BLOCK_SIZE) {
41588 +- poly1305_block_sse2(dctx->h.h, src, dctx->r[0].r, 1);
41589 +- srclen -= POLY1305_BLOCK_SIZE;
41590 +- }
41591 +- return srclen;
41592 ++ return acc;
41593 + }
41594 +
41595 + void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
41596 + unsigned int srclen)
41597 + {
41598 +- unsigned int bytes;
41599 ++ unsigned int bytes, used;
41600 +
41601 + if (unlikely(dctx->buflen)) {
41602 + bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
41603 +@@ -295,31 +184,19 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
41604 + dctx->buflen += bytes;
41605 +
41606 + if (dctx->buflen == POLY1305_BLOCK_SIZE) {
41607 +- if (static_branch_likely(&poly1305_use_simd) &&
41608 +- likely(crypto_simd_usable())) {
41609 +- kernel_fpu_begin();
41610 +- poly1305_simd_blocks(dctx, dctx->buf,
41611 +- POLY1305_BLOCK_SIZE);
41612 +- kernel_fpu_end();
41613 +- } else {
41614 +- poly1305_scalar_blocks(dctx, dctx->buf,
41615 +- POLY1305_BLOCK_SIZE);
41616 +- }
41617 ++ if (likely(!crypto_poly1305_setdctxkey(dctx, dctx->buf, POLY1305_BLOCK_SIZE)))
41618 ++ poly1305_simd_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1);
41619 + dctx->buflen = 0;
41620 + }
41621 + }
41622 +
41623 + if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
41624 +- if (static_branch_likely(&poly1305_use_simd) &&
41625 +- likely(crypto_simd_usable())) {
41626 +- kernel_fpu_begin();
41627 +- bytes = poly1305_simd_blocks(dctx, src, srclen);
41628 +- kernel_fpu_end();
41629 +- } else {
41630 +- bytes = poly1305_scalar_blocks(dctx, src, srclen);
41631 +- }
41632 +- src += srclen - bytes;
41633 +- srclen = bytes;
41634 ++ bytes = round_down(srclen, POLY1305_BLOCK_SIZE);
41635 ++ srclen -= bytes;
41636 ++ used = crypto_poly1305_setdctxkey(dctx, src, bytes);
41637 ++ if (likely(bytes - used))
41638 ++ poly1305_simd_blocks(&dctx->h, src + used, bytes - used, 1);
41639 ++ src += bytes;
41640 + }
41641 +
41642 + if (unlikely(srclen)) {
41643 +@@ -329,31 +206,17 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
41644 + }
41645 + EXPORT_SYMBOL(poly1305_update_arch);
41646 +
41647 +-void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *dst)
41648 ++void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
41649 + {
41650 +- __le32 digest[4];
41651 +- u64 f = 0;
41652 +-
41653 +- if (unlikely(desc->buflen)) {
41654 +- desc->buf[desc->buflen++] = 1;
41655 +- memset(desc->buf + desc->buflen, 0,
41656 +- POLY1305_BLOCK_SIZE - desc->buflen);
41657 +- poly1305_integer_blocks(&desc->h, desc->opaque_r, desc->buf, 1, 0);
41658 ++ if (unlikely(dctx->buflen)) {
41659 ++ dctx->buf[dctx->buflen++] = 1;
41660 ++ memset(dctx->buf + dctx->buflen, 0,
41661 ++ POLY1305_BLOCK_SIZE - dctx->buflen);
41662 ++ poly1305_simd_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
41663 + }
41664 +
41665 +- poly1305_integer_emit(&desc->h, digest);
41666 +-
41667 +- /* mac = (h + s) % (2^128) */
41668 +- f = (f >> 32) + le32_to_cpu(digest[0]) + desc->s[0];
41669 +- put_unaligned_le32(f, dst + 0);
41670 +- f = (f >> 32) + le32_to_cpu(digest[1]) + desc->s[1];
41671 +- put_unaligned_le32(f, dst + 4);
41672 +- f = (f >> 32) + le32_to_cpu(digest[2]) + desc->s[2];
41673 +- put_unaligned_le32(f, dst + 8);
41674 +- f = (f >> 32) + le32_to_cpu(digest[3]) + desc->s[3];
41675 +- put_unaligned_le32(f, dst + 12);
41676 +-
41677 +- *desc = (struct poly1305_desc_ctx){};
41678 ++ poly1305_simd_emit(&dctx->h, dst, dctx->s);
41679 ++ *dctx = (struct poly1305_desc_ctx){};
41680 + }
41681 + EXPORT_SYMBOL(poly1305_final_arch);
41682 +
41683 +@@ -361,38 +224,34 @@ static int crypto_poly1305_init(struct shash_desc *desc)
41684 + {
41685 + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
41686 +
41687 +- poly1305_core_init(&dctx->h);
41688 +- dctx->buflen = 0;
41689 +- dctx->rset = 0;
41690 +- dctx->sset = false;
41691 +-
41692 ++ *dctx = (struct poly1305_desc_ctx){};
41693 + return 0;
41694 + }
41695 +
41696 +-static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
41697 ++static int crypto_poly1305_update(struct shash_desc *desc,
41698 ++ const u8 *src, unsigned int srclen)
41699 + {
41700 + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
41701 +
41702 +- if (unlikely(!dctx->sset))
41703 +- return -ENOKEY;
41704 +-
41705 +- poly1305_final_arch(dctx, dst);
41706 ++ poly1305_update_arch(dctx, src, srclen);
41707 + return 0;
41708 + }
41709 +
41710 +-static int poly1305_simd_update(struct shash_desc *desc,
41711 +- const u8 *src, unsigned int srclen)
41712 ++static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
41713 + {
41714 + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
41715 +
41716 +- poly1305_update_arch(dctx, src, srclen);
41717 ++ if (unlikely(!dctx->sset))
41718 ++ return -ENOKEY;
41719 ++
41720 ++ poly1305_final_arch(dctx, dst);
41721 + return 0;
41722 + }
41723 +
41724 + static struct shash_alg alg = {
41725 + .digestsize = POLY1305_DIGEST_SIZE,
41726 + .init = crypto_poly1305_init,
41727 +- .update = poly1305_simd_update,
41728 ++ .update = crypto_poly1305_update,
41729 + .final = crypto_poly1305_final,
41730 + .descsize = sizeof(struct poly1305_desc_ctx),
41731 + .base = {
41732 +@@ -406,17 +265,19 @@ static struct shash_alg alg = {
41733 +
41734 + static int __init poly1305_simd_mod_init(void)
41735 + {
41736 +- if (!boot_cpu_has(X86_FEATURE_XMM2))
41737 +- return 0;
41738 +-
41739 +- static_branch_enable(&poly1305_use_simd);
41740 +-
41741 +- if (IS_ENABLED(CONFIG_AS_AVX2) &&
41742 +- boot_cpu_has(X86_FEATURE_AVX) &&
41743 ++ if (IS_ENABLED(CONFIG_AS_AVX) && boot_cpu_has(X86_FEATURE_AVX) &&
41744 ++ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
41745 ++ static_branch_enable(&poly1305_use_avx);
41746 ++ if (IS_ENABLED(CONFIG_AS_AVX2) && boot_cpu_has(X86_FEATURE_AVX) &&
41747 + boot_cpu_has(X86_FEATURE_AVX2) &&
41748 + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
41749 + static_branch_enable(&poly1305_use_avx2);
41750 +-
41751 ++ if (IS_ENABLED(CONFIG_AS_AVX512) && boot_cpu_has(X86_FEATURE_AVX) &&
41752 ++ boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX512F) &&
41753 ++ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_AVX512, NULL) &&
41754 ++ /* Skylake downclocks unacceptably much when using zmm, but later generations are fast. */
41755 ++ boot_cpu_data.x86_model != INTEL_FAM6_SKYLAKE_X)
41756 ++ static_branch_enable(&poly1305_use_avx512);
41757 + return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? crypto_register_shash(&alg) : 0;
41758 + }
41759 +
41760 +@@ -430,7 +291,7 @@ module_init(poly1305_simd_mod_init);
41761 + module_exit(poly1305_simd_mod_exit);
41762 +
41763 + MODULE_LICENSE("GPL");
41764 +-MODULE_AUTHOR("Martin Willi <martin@××××××××××.org>");
41765 ++MODULE_AUTHOR("Jason A. Donenfeld <Jason@×××××.com>");
41766 + MODULE_DESCRIPTION("Poly1305 authenticator");
41767 + MODULE_ALIAS_CRYPTO("poly1305");
41768 + MODULE_ALIAS_CRYPTO("poly1305-simd");
41769 +diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
41770 +index 0b2c4fce26d9..14c032de276e 100644
41771 +--- a/lib/crypto/Kconfig
41772 ++++ b/lib/crypto/Kconfig
41773 +@@ -90,7 +90,7 @@ config CRYPTO_LIB_DES
41774 + config CRYPTO_LIB_POLY1305_RSIZE
41775 + int
41776 + default 2 if MIPS
41777 +- default 4 if X86_64
41778 ++ default 11 if X86_64
41779 + default 9 if ARM || ARM64
41780 + default 1
41781 +
41782 +--
41783 +cgit v1.2.3-4-ga26e
41784 +
41785 +
41786 +From f0fba3d0d39eaefa8d3de48892d94e05653db501 Mon Sep 17 00:00:00 2001
41787 +From: "Jason A. Donenfeld" <Jason@×××××.com>
41788 +Date: Sun, 5 Jan 2020 22:40:49 -0500
41789 +Subject: crypto: {arm,arm64,mips}/poly1305 - remove redundant non-reduction
41790 + from emit
41791 +MIME-Version: 1.0
41792 +Content-Type: text/plain; charset=UTF-8
41793 +Content-Transfer-Encoding: 8bit
41794 +
41795 +commit 31899908a0d248b030b4464425b86c717e0007d4 upstream.
41796 +
41797 +This appears to be some kind of copy and paste error, and is actually
41798 +dead code.
41799 +
41800 +Pre: f = 0 ⇒ (f >> 32) = 0
41801 + f = (f >> 32) + le32_to_cpu(digest[0]);
41802 +Post: 0 ≤ f < 2³²
41803 + put_unaligned_le32(f, dst);
41804 +
41805 +Pre: 0 ≤ f < 2³² ⇒ (f >> 32) = 0
41806 + f = (f >> 32) + le32_to_cpu(digest[1]);
41807 +Post: 0 ≤ f < 2³²
41808 + put_unaligned_le32(f, dst + 4);
41809 +
41810 +Pre: 0 ≤ f < 2³² ⇒ (f >> 32) = 0
41811 + f = (f >> 32) + le32_to_cpu(digest[2]);
41812 +Post: 0 ≤ f < 2³²
41813 + put_unaligned_le32(f, dst + 8);
41814 +
41815 +Pre: 0 ≤ f < 2³² ⇒ (f >> 32) = 0
41816 + f = (f >> 32) + le32_to_cpu(digest[3]);
41817 +Post: 0 ≤ f < 2³²
41818 + put_unaligned_le32(f, dst + 12);
41819 +
41820 +Therefore this sequence is redundant. And Andy's code appears to handle
41821 +misalignment acceptably.
41822 +
41823 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
41824 +Tested-by: Ard Biesheuvel <ardb@××××××.org>
41825 +Reviewed-by: Ard Biesheuvel <ardb@××××××.org>
41826 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
41827 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
41828 +---
41829 + arch/arm/crypto/poly1305-glue.c | 18 ++----------------
41830 + arch/arm64/crypto/poly1305-glue.c | 18 ++----------------
41831 + arch/mips/crypto/poly1305-glue.c | 18 ++----------------
41832 + 3 files changed, 6 insertions(+), 48 deletions(-)
41833 +
41834 +diff --git a/arch/arm/crypto/poly1305-glue.c b/arch/arm/crypto/poly1305-glue.c
41835 +index abe3f2d587dc..ceec04ec2f40 100644
41836 +--- a/arch/arm/crypto/poly1305-glue.c
41837 ++++ b/arch/arm/crypto/poly1305-glue.c
41838 +@@ -20,7 +20,7 @@
41839 +
41840 + void poly1305_init_arm(void *state, const u8 *key);
41841 + void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit);
41842 +-void poly1305_emit_arm(void *state, __le32 *digest, const u32 *nonce);
41843 ++void poly1305_emit_arm(void *state, u8 *digest, const u32 *nonce);
41844 +
41845 + void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit)
41846 + {
41847 +@@ -179,9 +179,6 @@ EXPORT_SYMBOL(poly1305_update_arch);
41848 +
41849 + void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
41850 + {
41851 +- __le32 digest[4];
41852 +- u64 f = 0;
41853 +-
41854 + if (unlikely(dctx->buflen)) {
41855 + dctx->buf[dctx->buflen++] = 1;
41856 + memset(dctx->buf + dctx->buflen, 0,
41857 +@@ -189,18 +186,7 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
41858 + poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
41859 + }
41860 +
41861 +- poly1305_emit_arm(&dctx->h, digest, dctx->s);
41862 +-
41863 +- /* mac = (h + s) % (2^128) */
41864 +- f = (f >> 32) + le32_to_cpu(digest[0]);
41865 +- put_unaligned_le32(f, dst);
41866 +- f = (f >> 32) + le32_to_cpu(digest[1]);
41867 +- put_unaligned_le32(f, dst + 4);
41868 +- f = (f >> 32) + le32_to_cpu(digest[2]);
41869 +- put_unaligned_le32(f, dst + 8);
41870 +- f = (f >> 32) + le32_to_cpu(digest[3]);
41871 +- put_unaligned_le32(f, dst + 12);
41872 +-
41873 ++ poly1305_emit_arm(&dctx->h, dst, dctx->s);
41874 + *dctx = (struct poly1305_desc_ctx){};
41875 + }
41876 + EXPORT_SYMBOL(poly1305_final_arch);
41877 +diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c
41878 +index 83a2338a8826..e97b092f56b8 100644
41879 +--- a/arch/arm64/crypto/poly1305-glue.c
41880 ++++ b/arch/arm64/crypto/poly1305-glue.c
41881 +@@ -21,7 +21,7 @@
41882 + asmlinkage void poly1305_init_arm64(void *state, const u8 *key);
41883 + asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit);
41884 + asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
41885 +-asmlinkage void poly1305_emit(void *state, __le32 *digest, const u32 *nonce);
41886 ++asmlinkage void poly1305_emit(void *state, u8 *digest, const u32 *nonce);
41887 +
41888 + static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
41889 +
41890 +@@ -162,9 +162,6 @@ EXPORT_SYMBOL(poly1305_update_arch);
41891 +
41892 + void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
41893 + {
41894 +- __le32 digest[4];
41895 +- u64 f = 0;
41896 +-
41897 + if (unlikely(dctx->buflen)) {
41898 + dctx->buf[dctx->buflen++] = 1;
41899 + memset(dctx->buf + dctx->buflen, 0,
41900 +@@ -172,18 +169,7 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
41901 + poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
41902 + }
41903 +
41904 +- poly1305_emit(&dctx->h, digest, dctx->s);
41905 +-
41906 +- /* mac = (h + s) % (2^128) */
41907 +- f = (f >> 32) + le32_to_cpu(digest[0]);
41908 +- put_unaligned_le32(f, dst);
41909 +- f = (f >> 32) + le32_to_cpu(digest[1]);
41910 +- put_unaligned_le32(f, dst + 4);
41911 +- f = (f >> 32) + le32_to_cpu(digest[2]);
41912 +- put_unaligned_le32(f, dst + 8);
41913 +- f = (f >> 32) + le32_to_cpu(digest[3]);
41914 +- put_unaligned_le32(f, dst + 12);
41915 +-
41916 ++ poly1305_emit(&dctx->h, dst, dctx->s);
41917 + *dctx = (struct poly1305_desc_ctx){};
41918 + }
41919 + EXPORT_SYMBOL(poly1305_final_arch);
41920 +diff --git a/arch/mips/crypto/poly1305-glue.c b/arch/mips/crypto/poly1305-glue.c
41921 +index b37d29cf5d0a..fc881b46d911 100644
41922 +--- a/arch/mips/crypto/poly1305-glue.c
41923 ++++ b/arch/mips/crypto/poly1305-glue.c
41924 +@@ -15,7 +15,7 @@
41925 +
41926 + asmlinkage void poly1305_init_mips(void *state, const u8 *key);
41927 + asmlinkage void poly1305_blocks_mips(void *state, const u8 *src, u32 len, u32 hibit);
41928 +-asmlinkage void poly1305_emit_mips(void *state, __le32 *digest, const u32 *nonce);
41929 ++asmlinkage void poly1305_emit_mips(void *state, u8 *digest, const u32 *nonce);
41930 +
41931 + void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
41932 + {
41933 +@@ -134,9 +134,6 @@ EXPORT_SYMBOL(poly1305_update_arch);
41934 +
41935 + void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
41936 + {
41937 +- __le32 digest[4];
41938 +- u64 f = 0;
41939 +-
41940 + if (unlikely(dctx->buflen)) {
41941 + dctx->buf[dctx->buflen++] = 1;
41942 + memset(dctx->buf + dctx->buflen, 0,
41943 +@@ -144,18 +141,7 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
41944 + poly1305_blocks_mips(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
41945 + }
41946 +
41947 +- poly1305_emit_mips(&dctx->h, digest, dctx->s);
41948 +-
41949 +- /* mac = (h + s) % (2^128) */
41950 +- f = (f >> 32) + le32_to_cpu(digest[0]);
41951 +- put_unaligned_le32(f, dst);
41952 +- f = (f >> 32) + le32_to_cpu(digest[1]);
41953 +- put_unaligned_le32(f, dst + 4);
41954 +- f = (f >> 32) + le32_to_cpu(digest[2]);
41955 +- put_unaligned_le32(f, dst + 8);
41956 +- f = (f >> 32) + le32_to_cpu(digest[3]);
41957 +- put_unaligned_le32(f, dst + 12);
41958 +-
41959 ++ poly1305_emit_mips(&dctx->h, dst, dctx->s);
41960 + *dctx = (struct poly1305_desc_ctx){};
41961 + }
41962 + EXPORT_SYMBOL(poly1305_final_arch);
41963 +--
41964 +cgit v1.2.3-4-ga26e
41965 +
41966 +
41967 +From 44e0852cc4056c8a211b3887d356954aa825f82e Mon Sep 17 00:00:00 2001
41968 +From: Herbert Xu <herbert@××××××××××××××××.au>
41969 +Date: Wed, 8 Jan 2020 12:37:35 +0800
41970 +Subject: crypto: curve25519 - Fix selftest build error
41971 +
41972 +commit a8bdf2c42ee4d1ee42af1f3601f85de94e70a421 upstream.
41973 +
41974 +If CRYPTO_CURVE25519 is y, CRYPTO_LIB_CURVE25519_GENERIC will be
41975 +y, but CRYPTO_LIB_CURVE25519 may be set to m, this causes build
41976 +errors:
41977 +
41978 +lib/crypto/curve25519-selftest.o: In function `curve25519':
41979 +curve25519-selftest.c:(.text.unlikely+0xc): undefined reference to `curve25519_arch'
41980 +lib/crypto/curve25519-selftest.o: In function `curve25519_selftest':
41981 +curve25519-selftest.c:(.init.text+0x17e): undefined reference to `curve25519_base_arch'
41982 +
41983 +This is because the curve25519 self-test code is being controlled
41984 +by the GENERIC option rather than the overall CURVE25519 option,
41985 +as is the case with blake2s. To recap, the GENERIC and ARCH options
41986 +for CURVE25519 are internal only and selected by users such as
41987 +the Crypto API, or the externally visible CURVE25519 option which
41988 +in turn is selected by wireguard. The self-test is specific to the
41989 +the external CURVE25519 option and should not be enabled by the
41990 +Crypto API.
41991 +
41992 +This patch fixes this by splitting the GENERIC module from the
41993 +CURVE25519 module with the latter now containing just the self-test.
41994 +
41995 +Reported-by: Hulk Robot <hulkci@××××××.com>
41996 +Fixes: aa127963f1ca ("crypto: lib/curve25519 - re-add selftests")
41997 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
41998 +Reviewed-by: Jason A. Donenfeld <Jason@×××××.com>
41999 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
42000 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
42001 +---
42002 + lib/crypto/Makefile | 9 ++++++---
42003 + lib/crypto/curve25519-generic.c | 24 ++++++++++++++++++++++++
42004 + lib/crypto/curve25519.c | 7 -------
42005 + 3 files changed, 30 insertions(+), 10 deletions(-)
42006 + create mode 100644 lib/crypto/curve25519-generic.c
42007 +
42008 +diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
42009 +index 6ecaf83a5a9a..3a435629d9ce 100644
42010 +--- a/lib/crypto/Makefile
42011 ++++ b/lib/crypto/Makefile
42012 +@@ -19,9 +19,12 @@ libblake2s-y += blake2s.o
42013 + obj-$(CONFIG_CRYPTO_LIB_CHACHA20POLY1305) += libchacha20poly1305.o
42014 + libchacha20poly1305-y += chacha20poly1305.o
42015 +
42016 +-obj-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += libcurve25519.o
42017 +-libcurve25519-y := curve25519-fiat32.o
42018 +-libcurve25519-$(CONFIG_ARCH_SUPPORTS_INT128) := curve25519-hacl64.o
42019 ++obj-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += libcurve25519-generic.o
42020 ++libcurve25519-generic-y := curve25519-fiat32.o
42021 ++libcurve25519-generic-$(CONFIG_ARCH_SUPPORTS_INT128) := curve25519-hacl64.o
42022 ++libcurve25519-generic-y += curve25519-generic.o
42023 ++
42024 ++obj-$(CONFIG_CRYPTO_LIB_CURVE25519) += libcurve25519.o
42025 + libcurve25519-y += curve25519.o
42026 +
42027 + obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o
42028 +diff --git a/lib/crypto/curve25519-generic.c b/lib/crypto/curve25519-generic.c
42029 +new file mode 100644
42030 +index 000000000000..de7c99172fa2
42031 +--- /dev/null
42032 ++++ b/lib/crypto/curve25519-generic.c
42033 +@@ -0,0 +1,24 @@
42034 ++// SPDX-License-Identifier: GPL-2.0 OR MIT
42035 ++/*
42036 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
42037 ++ *
42038 ++ * This is an implementation of the Curve25519 ECDH algorithm, using either
42039 ++ * a 32-bit implementation or a 64-bit implementation with 128-bit integers,
42040 ++ * depending on what is supported by the target compiler.
42041 ++ *
42042 ++ * Information: https://cr.yp.to/ecdh.html
42043 ++ */
42044 ++
42045 ++#include <crypto/curve25519.h>
42046 ++#include <linux/module.h>
42047 ++
42048 ++const u8 curve25519_null_point[CURVE25519_KEY_SIZE] __aligned(32) = { 0 };
42049 ++const u8 curve25519_base_point[CURVE25519_KEY_SIZE] __aligned(32) = { 9 };
42050 ++
42051 ++EXPORT_SYMBOL(curve25519_null_point);
42052 ++EXPORT_SYMBOL(curve25519_base_point);
42053 ++EXPORT_SYMBOL(curve25519_generic);
42054 ++
42055 ++MODULE_LICENSE("GPL v2");
42056 ++MODULE_DESCRIPTION("Curve25519 scalar multiplication");
42057 ++MODULE_AUTHOR("Jason A. Donenfeld <Jason@×××××.com>");
42058 +diff --git a/lib/crypto/curve25519.c b/lib/crypto/curve25519.c
42059 +index c03ccdb99434..288a62cd29b2 100644
42060 +--- a/lib/crypto/curve25519.c
42061 ++++ b/lib/crypto/curve25519.c
42062 +@@ -15,13 +15,6 @@
42063 +
42064 + bool curve25519_selftest(void);
42065 +
42066 +-const u8 curve25519_null_point[CURVE25519_KEY_SIZE] __aligned(32) = { 0 };
42067 +-const u8 curve25519_base_point[CURVE25519_KEY_SIZE] __aligned(32) = { 9 };
42068 +-
42069 +-EXPORT_SYMBOL(curve25519_null_point);
42070 +-EXPORT_SYMBOL(curve25519_base_point);
42071 +-EXPORT_SYMBOL(curve25519_generic);
42072 +-
42073 + static int __init mod_init(void)
42074 + {
42075 + if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) &&
42076 +--
42077 +cgit v1.2.3-4-ga26e
42078 +
42079 +
42080 +From 4308a206f861b85f292336ed8e20fde09976d392 Mon Sep 17 00:00:00 2001
42081 +From: "Jason A. Donenfeld" <Jason@×××××.com>
42082 +Date: Thu, 16 Jan 2020 18:23:55 +0100
42083 +Subject: crypto: x86/poly1305 - fix .gitignore typo
42084 +
42085 +commit 1f6868995326cc82102049e349d8dbd116bdb656 upstream.
42086 +
42087 +Admist the kbuild robot induced changes, the .gitignore file for the
42088 +generated file wasn't updated with the non-clashing filename. This
42089 +commit adjusts that.
42090 +
42091 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
42092 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
42093 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
42094 +---
42095 + arch/x86/crypto/.gitignore | 2 +-
42096 + 1 file changed, 1 insertion(+), 1 deletion(-)
42097 +
42098 +diff --git a/arch/x86/crypto/.gitignore b/arch/x86/crypto/.gitignore
42099 +index c406ea6571fa..30be0400a439 100644
42100 +--- a/arch/x86/crypto/.gitignore
42101 ++++ b/arch/x86/crypto/.gitignore
42102 +@@ -1 +1 @@
42103 +-poly1305-x86_64.S
42104 ++poly1305-x86_64-cryptogams.S
42105 +--
42106 +cgit v1.2.3-4-ga26e
42107 +
42108 +
42109 +From 92cf6598dabc549bc3dc603be21103163d355ff7 Mon Sep 17 00:00:00 2001
42110 +From: "Jason A. Donenfeld" <Jason@×××××.com>
42111 +Date: Thu, 16 Jan 2020 21:26:34 +0100
42112 +Subject: crypto: chacha20poly1305 - add back missing test vectors and test
42113 + chunking
42114 +
42115 +commit 72c7943792c9e7788ddd182337bcf8f650cf56f5 upstream.
42116 +
42117 +When this was originally ported, the 12-byte nonce vectors were left out
42118 +to keep things simple. I agree that we don't need nor want a library
42119 +interface for 12-byte nonces. But these test vectors were specially
42120 +crafted to look at issues in the underlying primitives and related
42121 +interactions. Therefore, we actually want to keep around all of the
42122 +test vectors, and simply have a helper function to test them with.
42123 +
42124 +Secondly, the sglist-based chunking code in the library interface is
42125 +rather complicated, so this adds a developer-only test for ensuring that
42126 +all the book keeping is correct, across a wide array of possibilities.
42127 +
42128 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
42129 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
42130 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
42131 +---
42132 + lib/crypto/chacha20poly1305-selftest.c | 1712 +++++++++++++++++++++++++++++++-
42133 + 1 file changed, 1698 insertions(+), 14 deletions(-)
42134 +
42135 +diff --git a/lib/crypto/chacha20poly1305-selftest.c b/lib/crypto/chacha20poly1305-selftest.c
42136 +index 465de46dbdef..c391a91364e9 100644
42137 +--- a/lib/crypto/chacha20poly1305-selftest.c
42138 ++++ b/lib/crypto/chacha20poly1305-selftest.c
42139 +@@ -4,6 +4,7 @@
42140 + */
42141 +
42142 + #include <crypto/chacha20poly1305.h>
42143 ++#include <crypto/chacha.h>
42144 + #include <crypto/poly1305.h>
42145 +
42146 + #include <asm/unaligned.h>
42147 +@@ -1926,6 +1927,1104 @@ static const u8 enc_key012[] __initconst = {
42148 + 0x65, 0x91, 0x6e, 0x2a, 0x79, 0x22, 0xda, 0x64
42149 + };
42150 +
42151 ++/* wycheproof - rfc7539 */
42152 ++static const u8 enc_input013[] __initconst = {
42153 ++ 0x4c, 0x61, 0x64, 0x69, 0x65, 0x73, 0x20, 0x61,
42154 ++ 0x6e, 0x64, 0x20, 0x47, 0x65, 0x6e, 0x74, 0x6c,
42155 ++ 0x65, 0x6d, 0x65, 0x6e, 0x20, 0x6f, 0x66, 0x20,
42156 ++ 0x74, 0x68, 0x65, 0x20, 0x63, 0x6c, 0x61, 0x73,
42157 ++ 0x73, 0x20, 0x6f, 0x66, 0x20, 0x27, 0x39, 0x39,
42158 ++ 0x3a, 0x20, 0x49, 0x66, 0x20, 0x49, 0x20, 0x63,
42159 ++ 0x6f, 0x75, 0x6c, 0x64, 0x20, 0x6f, 0x66, 0x66,
42160 ++ 0x65, 0x72, 0x20, 0x79, 0x6f, 0x75, 0x20, 0x6f,
42161 ++ 0x6e, 0x6c, 0x79, 0x20, 0x6f, 0x6e, 0x65, 0x20,
42162 ++ 0x74, 0x69, 0x70, 0x20, 0x66, 0x6f, 0x72, 0x20,
42163 ++ 0x74, 0x68, 0x65, 0x20, 0x66, 0x75, 0x74, 0x75,
42164 ++ 0x72, 0x65, 0x2c, 0x20, 0x73, 0x75, 0x6e, 0x73,
42165 ++ 0x63, 0x72, 0x65, 0x65, 0x6e, 0x20, 0x77, 0x6f,
42166 ++ 0x75, 0x6c, 0x64, 0x20, 0x62, 0x65, 0x20, 0x69,
42167 ++ 0x74, 0x2e
42168 ++};
42169 ++static const u8 enc_output013[] __initconst = {
42170 ++ 0xd3, 0x1a, 0x8d, 0x34, 0x64, 0x8e, 0x60, 0xdb,
42171 ++ 0x7b, 0x86, 0xaf, 0xbc, 0x53, 0xef, 0x7e, 0xc2,
42172 ++ 0xa4, 0xad, 0xed, 0x51, 0x29, 0x6e, 0x08, 0xfe,
42173 ++ 0xa9, 0xe2, 0xb5, 0xa7, 0x36, 0xee, 0x62, 0xd6,
42174 ++ 0x3d, 0xbe, 0xa4, 0x5e, 0x8c, 0xa9, 0x67, 0x12,
42175 ++ 0x82, 0xfa, 0xfb, 0x69, 0xda, 0x92, 0x72, 0x8b,
42176 ++ 0x1a, 0x71, 0xde, 0x0a, 0x9e, 0x06, 0x0b, 0x29,
42177 ++ 0x05, 0xd6, 0xa5, 0xb6, 0x7e, 0xcd, 0x3b, 0x36,
42178 ++ 0x92, 0xdd, 0xbd, 0x7f, 0x2d, 0x77, 0x8b, 0x8c,
42179 ++ 0x98, 0x03, 0xae, 0xe3, 0x28, 0x09, 0x1b, 0x58,
42180 ++ 0xfa, 0xb3, 0x24, 0xe4, 0xfa, 0xd6, 0x75, 0x94,
42181 ++ 0x55, 0x85, 0x80, 0x8b, 0x48, 0x31, 0xd7, 0xbc,
42182 ++ 0x3f, 0xf4, 0xde, 0xf0, 0x8e, 0x4b, 0x7a, 0x9d,
42183 ++ 0xe5, 0x76, 0xd2, 0x65, 0x86, 0xce, 0xc6, 0x4b,
42184 ++ 0x61, 0x16, 0x1a, 0xe1, 0x0b, 0x59, 0x4f, 0x09,
42185 ++ 0xe2, 0x6a, 0x7e, 0x90, 0x2e, 0xcb, 0xd0, 0x60,
42186 ++ 0x06, 0x91
42187 ++};
42188 ++static const u8 enc_assoc013[] __initconst = {
42189 ++ 0x50, 0x51, 0x52, 0x53, 0xc0, 0xc1, 0xc2, 0xc3,
42190 ++ 0xc4, 0xc5, 0xc6, 0xc7
42191 ++};
42192 ++static const u8 enc_nonce013[] __initconst = {
42193 ++ 0x07, 0x00, 0x00, 0x00, 0x40, 0x41, 0x42, 0x43,
42194 ++ 0x44, 0x45, 0x46, 0x47
42195 ++};
42196 ++static const u8 enc_key013[] __initconst = {
42197 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
42198 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
42199 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
42200 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
42201 ++};
42202 ++
42203 ++/* wycheproof - misc */
42204 ++static const u8 enc_input014[] __initconst = { };
42205 ++static const u8 enc_output014[] __initconst = {
42206 ++ 0x76, 0xac, 0xb3, 0x42, 0xcf, 0x31, 0x66, 0xa5,
42207 ++ 0xb6, 0x3c, 0x0c, 0x0e, 0xa1, 0x38, 0x3c, 0x8d
42208 ++};
42209 ++static const u8 enc_assoc014[] __initconst = { };
42210 ++static const u8 enc_nonce014[] __initconst = {
42211 ++ 0x4d, 0xa5, 0xbf, 0x8d, 0xfd, 0x58, 0x52, 0xc1,
42212 ++ 0xea, 0x12, 0x37, 0x9d
42213 ++};
42214 ++static const u8 enc_key014[] __initconst = {
42215 ++ 0x80, 0xba, 0x31, 0x92, 0xc8, 0x03, 0xce, 0x96,
42216 ++ 0x5e, 0xa3, 0x71, 0xd5, 0xff, 0x07, 0x3c, 0xf0,
42217 ++ 0xf4, 0x3b, 0x6a, 0x2a, 0xb5, 0x76, 0xb2, 0x08,
42218 ++ 0x42, 0x6e, 0x11, 0x40, 0x9c, 0x09, 0xb9, 0xb0
42219 ++};
42220 ++
42221 ++/* wycheproof - misc */
42222 ++static const u8 enc_input015[] __initconst = { };
42223 ++static const u8 enc_output015[] __initconst = {
42224 ++ 0x90, 0x6f, 0xa6, 0x28, 0x4b, 0x52, 0xf8, 0x7b,
42225 ++ 0x73, 0x59, 0xcb, 0xaa, 0x75, 0x63, 0xc7, 0x09
42226 ++};
42227 ++static const u8 enc_assoc015[] __initconst = {
42228 ++ 0xbd, 0x50, 0x67, 0x64, 0xf2, 0xd2, 0xc4, 0x10
42229 ++};
42230 ++static const u8 enc_nonce015[] __initconst = {
42231 ++ 0xa9, 0x2e, 0xf0, 0xac, 0x99, 0x1d, 0xd5, 0x16,
42232 ++ 0xa3, 0xc6, 0xf6, 0x89
42233 ++};
42234 ++static const u8 enc_key015[] __initconst = {
42235 ++ 0x7a, 0x4c, 0xd7, 0x59, 0x17, 0x2e, 0x02, 0xeb,
42236 ++ 0x20, 0x4d, 0xb2, 0xc3, 0xf5, 0xc7, 0x46, 0x22,
42237 ++ 0x7d, 0xf5, 0x84, 0xfc, 0x13, 0x45, 0x19, 0x63,
42238 ++ 0x91, 0xdb, 0xb9, 0x57, 0x7a, 0x25, 0x07, 0x42
42239 ++};
42240 ++
42241 ++/* wycheproof - misc */
42242 ++static const u8 enc_input016[] __initconst = {
42243 ++ 0x2a
42244 ++};
42245 ++static const u8 enc_output016[] __initconst = {
42246 ++ 0x3a, 0xca, 0xc2, 0x7d, 0xec, 0x09, 0x68, 0x80,
42247 ++ 0x1e, 0x9f, 0x6e, 0xde, 0xd6, 0x9d, 0x80, 0x75,
42248 ++ 0x22
42249 ++};
42250 ++static const u8 enc_assoc016[] __initconst = { };
42251 ++static const u8 enc_nonce016[] __initconst = {
42252 ++ 0x99, 0xe2, 0x3e, 0xc4, 0x89, 0x85, 0xbc, 0xcd,
42253 ++ 0xee, 0xab, 0x60, 0xf1
42254 ++};
42255 ++static const u8 enc_key016[] __initconst = {
42256 ++ 0xcc, 0x56, 0xb6, 0x80, 0x55, 0x2e, 0xb7, 0x50,
42257 ++ 0x08, 0xf5, 0x48, 0x4b, 0x4c, 0xb8, 0x03, 0xfa,
42258 ++ 0x50, 0x63, 0xeb, 0xd6, 0xea, 0xb9, 0x1f, 0x6a,
42259 ++ 0xb6, 0xae, 0xf4, 0x91, 0x6a, 0x76, 0x62, 0x73
42260 ++};
42261 ++
42262 ++/* wycheproof - misc */
42263 ++static const u8 enc_input017[] __initconst = {
42264 ++ 0x51
42265 ++};
42266 ++static const u8 enc_output017[] __initconst = {
42267 ++ 0xc4, 0x16, 0x83, 0x10, 0xca, 0x45, 0xb1, 0xf7,
42268 ++ 0xc6, 0x6c, 0xad, 0x4e, 0x99, 0xe4, 0x3f, 0x72,
42269 ++ 0xb9
42270 ++};
42271 ++static const u8 enc_assoc017[] __initconst = {
42272 ++ 0x91, 0xca, 0x6c, 0x59, 0x2c, 0xbc, 0xca, 0x53
42273 ++};
42274 ++static const u8 enc_nonce017[] __initconst = {
42275 ++ 0xab, 0x0d, 0xca, 0x71, 0x6e, 0xe0, 0x51, 0xd2,
42276 ++ 0x78, 0x2f, 0x44, 0x03
42277 ++};
42278 ++static const u8 enc_key017[] __initconst = {
42279 ++ 0x46, 0xf0, 0x25, 0x49, 0x65, 0xf7, 0x69, 0xd5,
42280 ++ 0x2b, 0xdb, 0x4a, 0x70, 0xb4, 0x43, 0x19, 0x9f,
42281 ++ 0x8e, 0xf2, 0x07, 0x52, 0x0d, 0x12, 0x20, 0xc5,
42282 ++ 0x5e, 0x4b, 0x70, 0xf0, 0xfd, 0xa6, 0x20, 0xee
42283 ++};
42284 ++
42285 ++/* wycheproof - misc */
42286 ++static const u8 enc_input018[] __initconst = {
42287 ++ 0x5c, 0x60
42288 ++};
42289 ++static const u8 enc_output018[] __initconst = {
42290 ++ 0x4d, 0x13, 0x91, 0xe8, 0xb6, 0x1e, 0xfb, 0x39,
42291 ++ 0xc1, 0x22, 0x19, 0x54, 0x53, 0x07, 0x7b, 0x22,
42292 ++ 0xe5, 0xe2
42293 ++};
42294 ++static const u8 enc_assoc018[] __initconst = { };
42295 ++static const u8 enc_nonce018[] __initconst = {
42296 ++ 0x46, 0x1a, 0xf1, 0x22, 0xe9, 0xf2, 0xe0, 0x34,
42297 ++ 0x7e, 0x03, 0xf2, 0xdb
42298 ++};
42299 ++static const u8 enc_key018[] __initconst = {
42300 ++ 0x2f, 0x7f, 0x7e, 0x4f, 0x59, 0x2b, 0xb3, 0x89,
42301 ++ 0x19, 0x49, 0x89, 0x74, 0x35, 0x07, 0xbf, 0x3e,
42302 ++ 0xe9, 0xcb, 0xde, 0x17, 0x86, 0xb6, 0x69, 0x5f,
42303 ++ 0xe6, 0xc0, 0x25, 0xfd, 0x9b, 0xa4, 0xc1, 0x00
42304 ++};
42305 ++
42306 ++/* wycheproof - misc */
42307 ++static const u8 enc_input019[] __initconst = {
42308 ++ 0xdd, 0xf2
42309 ++};
42310 ++static const u8 enc_output019[] __initconst = {
42311 ++ 0xb6, 0x0d, 0xea, 0xd0, 0xfd, 0x46, 0x97, 0xec,
42312 ++ 0x2e, 0x55, 0x58, 0x23, 0x77, 0x19, 0xd0, 0x24,
42313 ++ 0x37, 0xa2
42314 ++};
42315 ++static const u8 enc_assoc019[] __initconst = {
42316 ++ 0x88, 0x36, 0x4f, 0xc8, 0x06, 0x05, 0x18, 0xbf
42317 ++};
42318 ++static const u8 enc_nonce019[] __initconst = {
42319 ++ 0x61, 0x54, 0x6b, 0xa5, 0xf1, 0x72, 0x05, 0x90,
42320 ++ 0xb6, 0x04, 0x0a, 0xc6
42321 ++};
42322 ++static const u8 enc_key019[] __initconst = {
42323 ++ 0xc8, 0x83, 0x3d, 0xce, 0x5e, 0xa9, 0xf2, 0x48,
42324 ++ 0xaa, 0x20, 0x30, 0xea, 0xcf, 0xe7, 0x2b, 0xff,
42325 ++ 0xe6, 0x9a, 0x62, 0x0c, 0xaf, 0x79, 0x33, 0x44,
42326 ++ 0xe5, 0x71, 0x8f, 0xe0, 0xd7, 0xab, 0x1a, 0x58
42327 ++};
42328 ++
42329 ++/* wycheproof - misc */
42330 ++static const u8 enc_input020[] __initconst = {
42331 ++ 0xab, 0x85, 0xe9, 0xc1, 0x57, 0x17, 0x31
42332 ++};
42333 ++static const u8 enc_output020[] __initconst = {
42334 ++ 0x5d, 0xfe, 0x34, 0x40, 0xdb, 0xb3, 0xc3, 0xed,
42335 ++ 0x7a, 0x43, 0x4e, 0x26, 0x02, 0xd3, 0x94, 0x28,
42336 ++ 0x1e, 0x0a, 0xfa, 0x9f, 0xb7, 0xaa, 0x42
42337 ++};
42338 ++static const u8 enc_assoc020[] __initconst = { };
42339 ++static const u8 enc_nonce020[] __initconst = {
42340 ++ 0x3c, 0x4e, 0x65, 0x4d, 0x66, 0x3f, 0xa4, 0x59,
42341 ++ 0x6d, 0xc5, 0x5b, 0xb7
42342 ++};
42343 ++static const u8 enc_key020[] __initconst = {
42344 ++ 0x55, 0x56, 0x81, 0x58, 0xd3, 0xa6, 0x48, 0x3f,
42345 ++ 0x1f, 0x70, 0x21, 0xea, 0xb6, 0x9b, 0x70, 0x3f,
42346 ++ 0x61, 0x42, 0x51, 0xca, 0xdc, 0x1a, 0xf5, 0xd3,
42347 ++ 0x4a, 0x37, 0x4f, 0xdb, 0xfc, 0x5a, 0xda, 0xc7
42348 ++};
42349 ++
42350 ++/* wycheproof - misc */
42351 ++static const u8 enc_input021[] __initconst = {
42352 ++ 0x4e, 0xe5, 0xcd, 0xa2, 0x0d, 0x42, 0x90
42353 ++};
42354 ++static const u8 enc_output021[] __initconst = {
42355 ++ 0x4b, 0xd4, 0x72, 0x12, 0x94, 0x1c, 0xe3, 0x18,
42356 ++ 0x5f, 0x14, 0x08, 0xee, 0x7f, 0xbf, 0x18, 0xf5,
42357 ++ 0xab, 0xad, 0x6e, 0x22, 0x53, 0xa1, 0xba
42358 ++};
42359 ++static const u8 enc_assoc021[] __initconst = {
42360 ++ 0x84, 0xe4, 0x6b, 0xe8, 0xc0, 0x91, 0x90, 0x53
42361 ++};
42362 ++static const u8 enc_nonce021[] __initconst = {
42363 ++ 0x58, 0x38, 0x93, 0x75, 0xc6, 0x9e, 0xe3, 0x98,
42364 ++ 0xde, 0x94, 0x83, 0x96
42365 ++};
42366 ++static const u8 enc_key021[] __initconst = {
42367 ++ 0xe3, 0xc0, 0x9e, 0x7f, 0xab, 0x1a, 0xef, 0xb5,
42368 ++ 0x16, 0xda, 0x6a, 0x33, 0x02, 0x2a, 0x1d, 0xd4,
42369 ++ 0xeb, 0x27, 0x2c, 0x80, 0xd5, 0x40, 0xc5, 0xda,
42370 ++ 0x52, 0xa7, 0x30, 0xf3, 0x4d, 0x84, 0x0d, 0x7f
42371 ++};
42372 ++
42373 ++/* wycheproof - misc */
42374 ++static const u8 enc_input022[] __initconst = {
42375 ++ 0xbe, 0x33, 0x08, 0xf7, 0x2a, 0x2c, 0x6a, 0xed
42376 ++};
42377 ++static const u8 enc_output022[] __initconst = {
42378 ++ 0x8e, 0x94, 0x39, 0xa5, 0x6e, 0xee, 0xc8, 0x17,
42379 ++ 0xfb, 0xe8, 0xa6, 0xed, 0x8f, 0xab, 0xb1, 0x93,
42380 ++ 0x75, 0x39, 0xdd, 0x6c, 0x00, 0xe9, 0x00, 0x21
42381 ++};
42382 ++static const u8 enc_assoc022[] __initconst = { };
42383 ++static const u8 enc_nonce022[] __initconst = {
42384 ++ 0x4f, 0x07, 0xaf, 0xed, 0xfd, 0xc3, 0xb6, 0xc2,
42385 ++ 0x36, 0x18, 0x23, 0xd3
42386 ++};
42387 ++static const u8 enc_key022[] __initconst = {
42388 ++ 0x51, 0xe4, 0xbf, 0x2b, 0xad, 0x92, 0xb7, 0xaf,
42389 ++ 0xf1, 0xa4, 0xbc, 0x05, 0x55, 0x0b, 0xa8, 0x1d,
42390 ++ 0xf4, 0xb9, 0x6f, 0xab, 0xf4, 0x1c, 0x12, 0xc7,
42391 ++ 0xb0, 0x0e, 0x60, 0xe4, 0x8d, 0xb7, 0xe1, 0x52
42392 ++};
42393 ++
42394 ++/* wycheproof - misc */
42395 ++static const u8 enc_input023[] __initconst = {
42396 ++ 0xa4, 0xc9, 0xc2, 0x80, 0x1b, 0x71, 0xf7, 0xdf
42397 ++};
42398 ++static const u8 enc_output023[] __initconst = {
42399 ++ 0xb9, 0xb9, 0x10, 0x43, 0x3a, 0xf0, 0x52, 0xb0,
42400 ++ 0x45, 0x30, 0xf5, 0x1a, 0xee, 0xe0, 0x24, 0xe0,
42401 ++ 0xa4, 0x45, 0xa6, 0x32, 0x8f, 0xa6, 0x7a, 0x18
42402 ++};
42403 ++static const u8 enc_assoc023[] __initconst = {
42404 ++ 0x66, 0xc0, 0xae, 0x70, 0x07, 0x6c, 0xb1, 0x4d
42405 ++};
42406 ++static const u8 enc_nonce023[] __initconst = {
42407 ++ 0xb4, 0xea, 0x66, 0x6e, 0xe1, 0x19, 0x56, 0x33,
42408 ++ 0x66, 0x48, 0x4a, 0x78
42409 ++};
42410 ++static const u8 enc_key023[] __initconst = {
42411 ++ 0x11, 0x31, 0xc1, 0x41, 0x85, 0x77, 0xa0, 0x54,
42412 ++ 0xde, 0x7a, 0x4a, 0xc5, 0x51, 0x95, 0x0f, 0x1a,
42413 ++ 0x05, 0x3f, 0x9a, 0xe4, 0x6e, 0x5b, 0x75, 0xfe,
42414 ++ 0x4a, 0xbd, 0x56, 0x08, 0xd7, 0xcd, 0xda, 0xdd
42415 ++};
42416 ++
42417 ++/* wycheproof - misc */
42418 ++static const u8 enc_input024[] __initconst = {
42419 ++ 0x42, 0xba, 0xae, 0x59, 0x78, 0xfe, 0xaf, 0x5c,
42420 ++ 0x36, 0x8d, 0x14, 0xe0
42421 ++};
42422 ++static const u8 enc_output024[] __initconst = {
42423 ++ 0xff, 0x7d, 0xc2, 0x03, 0xb2, 0x6c, 0x46, 0x7a,
42424 ++ 0x6b, 0x50, 0xdb, 0x33, 0x57, 0x8c, 0x0f, 0x27,
42425 ++ 0x58, 0xc2, 0xe1, 0x4e, 0x36, 0xd4, 0xfc, 0x10,
42426 ++ 0x6d, 0xcb, 0x29, 0xb4
42427 ++};
42428 ++static const u8 enc_assoc024[] __initconst = { };
42429 ++static const u8 enc_nonce024[] __initconst = {
42430 ++ 0x9a, 0x59, 0xfc, 0xe2, 0x6d, 0xf0, 0x00, 0x5e,
42431 ++ 0x07, 0x53, 0x86, 0x56
42432 ++};
42433 ++static const u8 enc_key024[] __initconst = {
42434 ++ 0x99, 0xb6, 0x2b, 0xd5, 0xaf, 0xbe, 0x3f, 0xb0,
42435 ++ 0x15, 0xbd, 0xe9, 0x3f, 0x0a, 0xbf, 0x48, 0x39,
42436 ++ 0x57, 0xa1, 0xc3, 0xeb, 0x3c, 0xa5, 0x9c, 0xb5,
42437 ++ 0x0b, 0x39, 0xf7, 0xf8, 0xa9, 0xcc, 0x51, 0xbe
42438 ++};
42439 ++
42440 ++/* wycheproof - misc */
42441 ++static const u8 enc_input025[] __initconst = {
42442 ++ 0xfd, 0xc8, 0x5b, 0x94, 0xa4, 0xb2, 0xa6, 0xb7,
42443 ++ 0x59, 0xb1, 0xa0, 0xda
42444 ++};
42445 ++static const u8 enc_output025[] __initconst = {
42446 ++ 0x9f, 0x88, 0x16, 0xde, 0x09, 0x94, 0xe9, 0x38,
42447 ++ 0xd9, 0xe5, 0x3f, 0x95, 0xd0, 0x86, 0xfc, 0x6c,
42448 ++ 0x9d, 0x8f, 0xa9, 0x15, 0xfd, 0x84, 0x23, 0xa7,
42449 ++ 0xcf, 0x05, 0x07, 0x2f
42450 ++};
42451 ++static const u8 enc_assoc025[] __initconst = {
42452 ++ 0xa5, 0x06, 0xe1, 0xa5, 0xc6, 0x90, 0x93, 0xf9
42453 ++};
42454 ++static const u8 enc_nonce025[] __initconst = {
42455 ++ 0x58, 0xdb, 0xd4, 0xad, 0x2c, 0x4a, 0xd3, 0x5d,
42456 ++ 0xd9, 0x06, 0xe9, 0xce
42457 ++};
42458 ++static const u8 enc_key025[] __initconst = {
42459 ++ 0x85, 0xf3, 0x5b, 0x62, 0x82, 0xcf, 0xf4, 0x40,
42460 ++ 0xbc, 0x10, 0x20, 0xc8, 0x13, 0x6f, 0xf2, 0x70,
42461 ++ 0x31, 0x11, 0x0f, 0xa6, 0x3e, 0xc1, 0x6f, 0x1e,
42462 ++ 0x82, 0x51, 0x18, 0xb0, 0x06, 0xb9, 0x12, 0x57
42463 ++};
42464 ++
42465 ++/* wycheproof - misc */
42466 ++static const u8 enc_input026[] __initconst = {
42467 ++ 0x51, 0xf8, 0xc1, 0xf7, 0x31, 0xea, 0x14, 0xac,
42468 ++ 0xdb, 0x21, 0x0a, 0x6d, 0x97, 0x3e, 0x07
42469 ++};
42470 ++static const u8 enc_output026[] __initconst = {
42471 ++ 0x0b, 0x29, 0x63, 0x8e, 0x1f, 0xbd, 0xd6, 0xdf,
42472 ++ 0x53, 0x97, 0x0b, 0xe2, 0x21, 0x00, 0x42, 0x2a,
42473 ++ 0x91, 0x34, 0x08, 0x7d, 0x67, 0xa4, 0x6e, 0x79,
42474 ++ 0x17, 0x8d, 0x0a, 0x93, 0xf5, 0xe1, 0xd2
42475 ++};
42476 ++static const u8 enc_assoc026[] __initconst = { };
42477 ++static const u8 enc_nonce026[] __initconst = {
42478 ++ 0x68, 0xab, 0x7f, 0xdb, 0xf6, 0x19, 0x01, 0xda,
42479 ++ 0xd4, 0x61, 0xd2, 0x3c
42480 ++};
42481 ++static const u8 enc_key026[] __initconst = {
42482 ++ 0x67, 0x11, 0x96, 0x27, 0xbd, 0x98, 0x8e, 0xda,
42483 ++ 0x90, 0x62, 0x19, 0xe0, 0x8c, 0x0d, 0x0d, 0x77,
42484 ++ 0x9a, 0x07, 0xd2, 0x08, 0xce, 0x8a, 0x4f, 0xe0,
42485 ++ 0x70, 0x9a, 0xf7, 0x55, 0xee, 0xec, 0x6d, 0xcb
42486 ++};
42487 ++
42488 ++/* wycheproof - misc */
42489 ++static const u8 enc_input027[] __initconst = {
42490 ++ 0x97, 0x46, 0x9d, 0xa6, 0x67, 0xd6, 0x11, 0x0f,
42491 ++ 0x9c, 0xbd, 0xa1, 0xd1, 0xa2, 0x06, 0x73
42492 ++};
42493 ++static const u8 enc_output027[] __initconst = {
42494 ++ 0x32, 0xdb, 0x66, 0xc4, 0xa3, 0x81, 0x9d, 0x81,
42495 ++ 0x55, 0x74, 0x55, 0xe5, 0x98, 0x0f, 0xed, 0xfe,
42496 ++ 0xae, 0x30, 0xde, 0xc9, 0x4e, 0x6a, 0xd3, 0xa9,
42497 ++ 0xee, 0xa0, 0x6a, 0x0d, 0x70, 0x39, 0x17
42498 ++};
42499 ++static const u8 enc_assoc027[] __initconst = {
42500 ++ 0x64, 0x53, 0xa5, 0x33, 0x84, 0x63, 0x22, 0x12
42501 ++};
42502 ++static const u8 enc_nonce027[] __initconst = {
42503 ++ 0xd9, 0x5b, 0x32, 0x43, 0xaf, 0xae, 0xf7, 0x14,
42504 ++ 0xc5, 0x03, 0x5b, 0x6a
42505 ++};
42506 ++static const u8 enc_key027[] __initconst = {
42507 ++ 0xe6, 0xf1, 0x11, 0x8d, 0x41, 0xe4, 0xb4, 0x3f,
42508 ++ 0xb5, 0x82, 0x21, 0xb7, 0xed, 0x79, 0x67, 0x38,
42509 ++ 0x34, 0xe0, 0xd8, 0xac, 0x5c, 0x4f, 0xa6, 0x0b,
42510 ++ 0xbc, 0x8b, 0xc4, 0x89, 0x3a, 0x58, 0x89, 0x4d
42511 ++};
42512 ++
42513 ++/* wycheproof - misc */
42514 ++static const u8 enc_input028[] __initconst = {
42515 ++ 0x54, 0x9b, 0x36, 0x5a, 0xf9, 0x13, 0xf3, 0xb0,
42516 ++ 0x81, 0x13, 0x1c, 0xcb, 0x6b, 0x82, 0x55, 0x88
42517 ++};
42518 ++static const u8 enc_output028[] __initconst = {
42519 ++ 0xe9, 0x11, 0x0e, 0x9f, 0x56, 0xab, 0x3c, 0xa4,
42520 ++ 0x83, 0x50, 0x0c, 0xea, 0xba, 0xb6, 0x7a, 0x13,
42521 ++ 0x83, 0x6c, 0xca, 0xbf, 0x15, 0xa6, 0xa2, 0x2a,
42522 ++ 0x51, 0xc1, 0x07, 0x1c, 0xfa, 0x68, 0xfa, 0x0c
42523 ++};
42524 ++static const u8 enc_assoc028[] __initconst = { };
42525 ++static const u8 enc_nonce028[] __initconst = {
42526 ++ 0x2f, 0xcb, 0x1b, 0x38, 0xa9, 0x9e, 0x71, 0xb8,
42527 ++ 0x47, 0x40, 0xad, 0x9b
42528 ++};
42529 ++static const u8 enc_key028[] __initconst = {
42530 ++ 0x59, 0xd4, 0xea, 0xfb, 0x4d, 0xe0, 0xcf, 0xc7,
42531 ++ 0xd3, 0xdb, 0x99, 0xa8, 0xf5, 0x4b, 0x15, 0xd7,
42532 ++ 0xb3, 0x9f, 0x0a, 0xcc, 0x8d, 0xa6, 0x97, 0x63,
42533 ++ 0xb0, 0x19, 0xc1, 0x69, 0x9f, 0x87, 0x67, 0x4a
42534 ++};
42535 ++
42536 ++/* wycheproof - misc */
42537 ++static const u8 enc_input029[] __initconst = {
42538 ++ 0x55, 0xa4, 0x65, 0x64, 0x4f, 0x5b, 0x65, 0x09,
42539 ++ 0x28, 0xcb, 0xee, 0x7c, 0x06, 0x32, 0x14, 0xd6
42540 ++};
42541 ++static const u8 enc_output029[] __initconst = {
42542 ++ 0xe4, 0xb1, 0x13, 0xcb, 0x77, 0x59, 0x45, 0xf3,
42543 ++ 0xd3, 0xa8, 0xae, 0x9e, 0xc1, 0x41, 0xc0, 0x0c,
42544 ++ 0x7c, 0x43, 0xf1, 0x6c, 0xe0, 0x96, 0xd0, 0xdc,
42545 ++ 0x27, 0xc9, 0x58, 0x49, 0xdc, 0x38, 0x3b, 0x7d
42546 ++};
42547 ++static const u8 enc_assoc029[] __initconst = {
42548 ++ 0x03, 0x45, 0x85, 0x62, 0x1a, 0xf8, 0xd7, 0xff
42549 ++};
42550 ++static const u8 enc_nonce029[] __initconst = {
42551 ++ 0x11, 0x8a, 0x69, 0x64, 0xc2, 0xd3, 0xe3, 0x80,
42552 ++ 0x07, 0x1f, 0x52, 0x66
42553 ++};
42554 ++static const u8 enc_key029[] __initconst = {
42555 ++ 0xb9, 0x07, 0xa4, 0x50, 0x75, 0x51, 0x3f, 0xe8,
42556 ++ 0xa8, 0x01, 0x9e, 0xde, 0xe3, 0xf2, 0x59, 0x14,
42557 ++ 0x87, 0xb2, 0xa0, 0x30, 0xb0, 0x3c, 0x6e, 0x1d,
42558 ++ 0x77, 0x1c, 0x86, 0x25, 0x71, 0xd2, 0xea, 0x1e
42559 ++};
42560 ++
42561 ++/* wycheproof - misc */
42562 ++static const u8 enc_input030[] __initconst = {
42563 ++ 0x3f, 0xf1, 0x51, 0x4b, 0x1c, 0x50, 0x39, 0x15,
42564 ++ 0x91, 0x8f, 0x0c, 0x0c, 0x31, 0x09, 0x4a, 0x6e,
42565 ++ 0x1f
42566 ++};
42567 ++static const u8 enc_output030[] __initconst = {
42568 ++ 0x02, 0xcc, 0x3a, 0xcb, 0x5e, 0xe1, 0xfc, 0xdd,
42569 ++ 0x12, 0xa0, 0x3b, 0xb8, 0x57, 0x97, 0x64, 0x74,
42570 ++ 0xd3, 0xd8, 0x3b, 0x74, 0x63, 0xa2, 0xc3, 0x80,
42571 ++ 0x0f, 0xe9, 0x58, 0xc2, 0x8e, 0xaa, 0x29, 0x08,
42572 ++ 0x13
42573 ++};
42574 ++static const u8 enc_assoc030[] __initconst = { };
42575 ++static const u8 enc_nonce030[] __initconst = {
42576 ++ 0x45, 0xaa, 0xa3, 0xe5, 0xd1, 0x6d, 0x2d, 0x42,
42577 ++ 0xdc, 0x03, 0x44, 0x5d
42578 ++};
42579 ++static const u8 enc_key030[] __initconst = {
42580 ++ 0x3b, 0x24, 0x58, 0xd8, 0x17, 0x6e, 0x16, 0x21,
42581 ++ 0xc0, 0xcc, 0x24, 0xc0, 0xc0, 0xe2, 0x4c, 0x1e,
42582 ++ 0x80, 0xd7, 0x2f, 0x7e, 0xe9, 0x14, 0x9a, 0x4b,
42583 ++ 0x16, 0x61, 0x76, 0x62, 0x96, 0x16, 0xd0, 0x11
42584 ++};
42585 ++
42586 ++/* wycheproof - misc */
42587 ++static const u8 enc_input031[] __initconst = {
42588 ++ 0x63, 0x85, 0x8c, 0xa3, 0xe2, 0xce, 0x69, 0x88,
42589 ++ 0x7b, 0x57, 0x8a, 0x3c, 0x16, 0x7b, 0x42, 0x1c,
42590 ++ 0x9c
42591 ++};
42592 ++static const u8 enc_output031[] __initconst = {
42593 ++ 0x35, 0x76, 0x64, 0x88, 0xd2, 0xbc, 0x7c, 0x2b,
42594 ++ 0x8d, 0x17, 0xcb, 0xbb, 0x9a, 0xbf, 0xad, 0x9e,
42595 ++ 0x6d, 0x1f, 0x39, 0x1e, 0x65, 0x7b, 0x27, 0x38,
42596 ++ 0xdd, 0xa0, 0x84, 0x48, 0xcb, 0xa2, 0x81, 0x1c,
42597 ++ 0xeb
42598 ++};
42599 ++static const u8 enc_assoc031[] __initconst = {
42600 ++ 0x9a, 0xaf, 0x29, 0x9e, 0xee, 0xa7, 0x8f, 0x79
42601 ++};
42602 ++static const u8 enc_nonce031[] __initconst = {
42603 ++ 0xf0, 0x38, 0x4f, 0xb8, 0x76, 0x12, 0x14, 0x10,
42604 ++ 0x63, 0x3d, 0x99, 0x3d
42605 ++};
42606 ++static const u8 enc_key031[] __initconst = {
42607 ++ 0xf6, 0x0c, 0x6a, 0x1b, 0x62, 0x57, 0x25, 0xf7,
42608 ++ 0x6c, 0x70, 0x37, 0xb4, 0x8f, 0xe3, 0x57, 0x7f,
42609 ++ 0xa7, 0xf7, 0xb8, 0x7b, 0x1b, 0xd5, 0xa9, 0x82,
42610 ++ 0x17, 0x6d, 0x18, 0x23, 0x06, 0xff, 0xb8, 0x70
42611 ++};
42612 ++
42613 ++/* wycheproof - misc */
42614 ++static const u8 enc_input032[] __initconst = {
42615 ++ 0x10, 0xf1, 0xec, 0xf9, 0xc6, 0x05, 0x84, 0x66,
42616 ++ 0x5d, 0x9a, 0xe5, 0xef, 0xe2, 0x79, 0xe7, 0xf7,
42617 ++ 0x37, 0x7e, 0xea, 0x69, 0x16, 0xd2, 0xb1, 0x11
42618 ++};
42619 ++static const u8 enc_output032[] __initconst = {
42620 ++ 0x42, 0xf2, 0x6c, 0x56, 0xcb, 0x4b, 0xe2, 0x1d,
42621 ++ 0x9d, 0x8d, 0x0c, 0x80, 0xfc, 0x99, 0xdd, 0xe0,
42622 ++ 0x0d, 0x75, 0xf3, 0x80, 0x74, 0xbf, 0xe7, 0x64,
42623 ++ 0x54, 0xaa, 0x7e, 0x13, 0xd4, 0x8f, 0xff, 0x7d,
42624 ++ 0x75, 0x57, 0x03, 0x94, 0x57, 0x04, 0x0a, 0x3a
42625 ++};
42626 ++static const u8 enc_assoc032[] __initconst = { };
42627 ++static const u8 enc_nonce032[] __initconst = {
42628 ++ 0xe6, 0xb1, 0xad, 0xf2, 0xfd, 0x58, 0xa8, 0x76,
42629 ++ 0x2c, 0x65, 0xf3, 0x1b
42630 ++};
42631 ++static const u8 enc_key032[] __initconst = {
42632 ++ 0x02, 0x12, 0xa8, 0xde, 0x50, 0x07, 0xed, 0x87,
42633 ++ 0xb3, 0x3f, 0x1a, 0x70, 0x90, 0xb6, 0x11, 0x4f,
42634 ++ 0x9e, 0x08, 0xce, 0xfd, 0x96, 0x07, 0xf2, 0xc2,
42635 ++ 0x76, 0xbd, 0xcf, 0xdb, 0xc5, 0xce, 0x9c, 0xd7
42636 ++};
42637 ++
42638 ++/* wycheproof - misc */
42639 ++static const u8 enc_input033[] __initconst = {
42640 ++ 0x92, 0x22, 0xf9, 0x01, 0x8e, 0x54, 0xfd, 0x6d,
42641 ++ 0xe1, 0x20, 0x08, 0x06, 0xa9, 0xee, 0x8e, 0x4c,
42642 ++ 0xc9, 0x04, 0xd2, 0x9f, 0x25, 0xcb, 0xa1, 0x93
42643 ++};
42644 ++static const u8 enc_output033[] __initconst = {
42645 ++ 0x12, 0x30, 0x32, 0x43, 0x7b, 0x4b, 0xfd, 0x69,
42646 ++ 0x20, 0xe8, 0xf7, 0xe7, 0xe0, 0x08, 0x7a, 0xe4,
42647 ++ 0x88, 0x9e, 0xbe, 0x7a, 0x0a, 0xd0, 0xe9, 0x00,
42648 ++ 0x3c, 0xf6, 0x8f, 0x17, 0x95, 0x50, 0xda, 0x63,
42649 ++ 0xd3, 0xb9, 0x6c, 0x2d, 0x55, 0x41, 0x18, 0x65
42650 ++};
42651 ++static const u8 enc_assoc033[] __initconst = {
42652 ++ 0x3e, 0x8b, 0xc5, 0xad, 0xe1, 0x82, 0xff, 0x08
42653 ++};
42654 ++static const u8 enc_nonce033[] __initconst = {
42655 ++ 0x6b, 0x28, 0x2e, 0xbe, 0xcc, 0x54, 0x1b, 0xcd,
42656 ++ 0x78, 0x34, 0xed, 0x55
42657 ++};
42658 ++static const u8 enc_key033[] __initconst = {
42659 ++ 0xc5, 0xbc, 0x09, 0x56, 0x56, 0x46, 0xe7, 0xed,
42660 ++ 0xda, 0x95, 0x4f, 0x1f, 0x73, 0x92, 0x23, 0xda,
42661 ++ 0xda, 0x20, 0xb9, 0x5c, 0x44, 0xab, 0x03, 0x3d,
42662 ++ 0x0f, 0xae, 0x4b, 0x02, 0x83, 0xd1, 0x8b, 0xe3
42663 ++};
42664 ++
42665 ++/* wycheproof - misc */
42666 ++static const u8 enc_input034[] __initconst = {
42667 ++ 0xb0, 0x53, 0x99, 0x92, 0x86, 0xa2, 0x82, 0x4f,
42668 ++ 0x42, 0xcc, 0x8c, 0x20, 0x3a, 0xb2, 0x4e, 0x2c,
42669 ++ 0x97, 0xa6, 0x85, 0xad, 0xcc, 0x2a, 0xd3, 0x26,
42670 ++ 0x62, 0x55, 0x8e, 0x55, 0xa5, 0xc7, 0x29
42671 ++};
42672 ++static const u8 enc_output034[] __initconst = {
42673 ++ 0x45, 0xc7, 0xd6, 0xb5, 0x3a, 0xca, 0xd4, 0xab,
42674 ++ 0xb6, 0x88, 0x76, 0xa6, 0xe9, 0x6a, 0x48, 0xfb,
42675 ++ 0x59, 0x52, 0x4d, 0x2c, 0x92, 0xc9, 0xd8, 0xa1,
42676 ++ 0x89, 0xc9, 0xfd, 0x2d, 0xb9, 0x17, 0x46, 0x56,
42677 ++ 0x6d, 0x3c, 0xa1, 0x0e, 0x31, 0x1b, 0x69, 0x5f,
42678 ++ 0x3e, 0xae, 0x15, 0x51, 0x65, 0x24, 0x93
42679 ++};
42680 ++static const u8 enc_assoc034[] __initconst = { };
42681 ++static const u8 enc_nonce034[] __initconst = {
42682 ++ 0x04, 0xa9, 0xbe, 0x03, 0x50, 0x8a, 0x5f, 0x31,
42683 ++ 0x37, 0x1a, 0x6f, 0xd2
42684 ++};
42685 ++static const u8 enc_key034[] __initconst = {
42686 ++ 0x2e, 0xb5, 0x1c, 0x46, 0x9a, 0xa8, 0xeb, 0x9e,
42687 ++ 0x6c, 0x54, 0xa8, 0x34, 0x9b, 0xae, 0x50, 0xa2,
42688 ++ 0x0f, 0x0e, 0x38, 0x27, 0x11, 0xbb, 0xa1, 0x15,
42689 ++ 0x2c, 0x42, 0x4f, 0x03, 0xb6, 0x67, 0x1d, 0x71
42690 ++};
42691 ++
42692 ++/* wycheproof - misc */
42693 ++static const u8 enc_input035[] __initconst = {
42694 ++ 0xf4, 0x52, 0x06, 0xab, 0xc2, 0x55, 0x52, 0xb2,
42695 ++ 0xab, 0xc9, 0xab, 0x7f, 0xa2, 0x43, 0x03, 0x5f,
42696 ++ 0xed, 0xaa, 0xdd, 0xc3, 0xb2, 0x29, 0x39, 0x56,
42697 ++ 0xf1, 0xea, 0x6e, 0x71, 0x56, 0xe7, 0xeb
42698 ++};
42699 ++static const u8 enc_output035[] __initconst = {
42700 ++ 0x46, 0xa8, 0x0c, 0x41, 0x87, 0x02, 0x47, 0x20,
42701 ++ 0x08, 0x46, 0x27, 0x58, 0x00, 0x80, 0xdd, 0xe5,
42702 ++ 0xa3, 0xf4, 0xa1, 0x10, 0x93, 0xa7, 0x07, 0x6e,
42703 ++ 0xd6, 0xf3, 0xd3, 0x26, 0xbc, 0x7b, 0x70, 0x53,
42704 ++ 0x4d, 0x4a, 0xa2, 0x83, 0x5a, 0x52, 0xe7, 0x2d,
42705 ++ 0x14, 0xdf, 0x0e, 0x4f, 0x47, 0xf2, 0x5f
42706 ++};
42707 ++static const u8 enc_assoc035[] __initconst = {
42708 ++ 0x37, 0x46, 0x18, 0xa0, 0x6e, 0xa9, 0x8a, 0x48
42709 ++};
42710 ++static const u8 enc_nonce035[] __initconst = {
42711 ++ 0x47, 0x0a, 0x33, 0x9e, 0xcb, 0x32, 0x19, 0xb8,
42712 ++ 0xb8, 0x1a, 0x1f, 0x8b
42713 ++};
42714 ++static const u8 enc_key035[] __initconst = {
42715 ++ 0x7f, 0x5b, 0x74, 0xc0, 0x7e, 0xd1, 0xb4, 0x0f,
42716 ++ 0xd1, 0x43, 0x58, 0xfe, 0x2f, 0xf2, 0xa7, 0x40,
42717 ++ 0xc1, 0x16, 0xc7, 0x70, 0x65, 0x10, 0xe6, 0xa4,
42718 ++ 0x37, 0xf1, 0x9e, 0xa4, 0x99, 0x11, 0xce, 0xc4
42719 ++};
42720 ++
42721 ++/* wycheproof - misc */
42722 ++static const u8 enc_input036[] __initconst = {
42723 ++ 0xb9, 0xc5, 0x54, 0xcb, 0xc3, 0x6a, 0xc1, 0x8a,
42724 ++ 0xe8, 0x97, 0xdf, 0x7b, 0xee, 0xca, 0xc1, 0xdb,
42725 ++ 0xeb, 0x4e, 0xaf, 0xa1, 0x56, 0xbb, 0x60, 0xce,
42726 ++ 0x2e, 0x5d, 0x48, 0xf0, 0x57, 0x15, 0xe6, 0x78
42727 ++};
42728 ++static const u8 enc_output036[] __initconst = {
42729 ++ 0xea, 0x29, 0xaf, 0xa4, 0x9d, 0x36, 0xe8, 0x76,
42730 ++ 0x0f, 0x5f, 0xe1, 0x97, 0x23, 0xb9, 0x81, 0x1e,
42731 ++ 0xd5, 0xd5, 0x19, 0x93, 0x4a, 0x44, 0x0f, 0x50,
42732 ++ 0x81, 0xac, 0x43, 0x0b, 0x95, 0x3b, 0x0e, 0x21,
42733 ++ 0x22, 0x25, 0x41, 0xaf, 0x46, 0xb8, 0x65, 0x33,
42734 ++ 0xc6, 0xb6, 0x8d, 0x2f, 0xf1, 0x08, 0xa7, 0xea
42735 ++};
42736 ++static const u8 enc_assoc036[] __initconst = { };
42737 ++static const u8 enc_nonce036[] __initconst = {
42738 ++ 0x72, 0xcf, 0xd9, 0x0e, 0xf3, 0x02, 0x6c, 0xa2,
42739 ++ 0x2b, 0x7e, 0x6e, 0x6a
42740 ++};
42741 ++static const u8 enc_key036[] __initconst = {
42742 ++ 0xe1, 0x73, 0x1d, 0x58, 0x54, 0xe1, 0xb7, 0x0c,
42743 ++ 0xb3, 0xff, 0xe8, 0xb7, 0x86, 0xa2, 0xb3, 0xeb,
42744 ++ 0xf0, 0x99, 0x43, 0x70, 0x95, 0x47, 0x57, 0xb9,
42745 ++ 0xdc, 0x8c, 0x7b, 0xc5, 0x35, 0x46, 0x34, 0xa3
42746 ++};
42747 ++
42748 ++/* wycheproof - misc */
42749 ++static const u8 enc_input037[] __initconst = {
42750 ++ 0x6b, 0x26, 0x04, 0x99, 0x6c, 0xd3, 0x0c, 0x14,
42751 ++ 0xa1, 0x3a, 0x52, 0x57, 0xed, 0x6c, 0xff, 0xd3,
42752 ++ 0xbc, 0x5e, 0x29, 0xd6, 0xb9, 0x7e, 0xb1, 0x79,
42753 ++ 0x9e, 0xb3, 0x35, 0xe2, 0x81, 0xea, 0x45, 0x1e
42754 ++};
42755 ++static const u8 enc_output037[] __initconst = {
42756 ++ 0x6d, 0xad, 0x63, 0x78, 0x97, 0x54, 0x4d, 0x8b,
42757 ++ 0xf6, 0xbe, 0x95, 0x07, 0xed, 0x4d, 0x1b, 0xb2,
42758 ++ 0xe9, 0x54, 0xbc, 0x42, 0x7e, 0x5d, 0xe7, 0x29,
42759 ++ 0xda, 0xf5, 0x07, 0x62, 0x84, 0x6f, 0xf2, 0xf4,
42760 ++ 0x7b, 0x99, 0x7d, 0x93, 0xc9, 0x82, 0x18, 0x9d,
42761 ++ 0x70, 0x95, 0xdc, 0x79, 0x4c, 0x74, 0x62, 0x32
42762 ++};
42763 ++static const u8 enc_assoc037[] __initconst = {
42764 ++ 0x23, 0x33, 0xe5, 0xce, 0x0f, 0x93, 0xb0, 0x59
42765 ++};
42766 ++static const u8 enc_nonce037[] __initconst = {
42767 ++ 0x26, 0x28, 0x80, 0xd4, 0x75, 0xf3, 0xda, 0xc5,
42768 ++ 0x34, 0x0d, 0xd1, 0xb8
42769 ++};
42770 ++static const u8 enc_key037[] __initconst = {
42771 ++ 0x27, 0xd8, 0x60, 0x63, 0x1b, 0x04, 0x85, 0xa4,
42772 ++ 0x10, 0x70, 0x2f, 0xea, 0x61, 0xbc, 0x87, 0x3f,
42773 ++ 0x34, 0x42, 0x26, 0x0c, 0xad, 0xed, 0x4a, 0xbd,
42774 ++ 0xe2, 0x5b, 0x78, 0x6a, 0x2d, 0x97, 0xf1, 0x45
42775 ++};
42776 ++
42777 ++/* wycheproof - misc */
42778 ++static const u8 enc_input038[] __initconst = {
42779 ++ 0x97, 0x3d, 0x0c, 0x75, 0x38, 0x26, 0xba, 0xe4,
42780 ++ 0x66, 0xcf, 0x9a, 0xbb, 0x34, 0x93, 0x15, 0x2e,
42781 ++ 0x9d, 0xe7, 0x81, 0x9e, 0x2b, 0xd0, 0xc7, 0x11,
42782 ++ 0x71, 0x34, 0x6b, 0x4d, 0x2c, 0xeb, 0xf8, 0x04,
42783 ++ 0x1a, 0xa3, 0xce, 0xdc, 0x0d, 0xfd, 0x7b, 0x46,
42784 ++ 0x7e, 0x26, 0x22, 0x8b, 0xc8, 0x6c, 0x9a
42785 ++};
42786 ++static const u8 enc_output038[] __initconst = {
42787 ++ 0xfb, 0xa7, 0x8a, 0xe4, 0xf9, 0xd8, 0x08, 0xa6,
42788 ++ 0x2e, 0x3d, 0xa4, 0x0b, 0xe2, 0xcb, 0x77, 0x00,
42789 ++ 0xc3, 0x61, 0x3d, 0x9e, 0xb2, 0xc5, 0x29, 0xc6,
42790 ++ 0x52, 0xe7, 0x6a, 0x43, 0x2c, 0x65, 0x8d, 0x27,
42791 ++ 0x09, 0x5f, 0x0e, 0xb8, 0xf9, 0x40, 0xc3, 0x24,
42792 ++ 0x98, 0x1e, 0xa9, 0x35, 0xe5, 0x07, 0xf9, 0x8f,
42793 ++ 0x04, 0x69, 0x56, 0xdb, 0x3a, 0x51, 0x29, 0x08,
42794 ++ 0xbd, 0x7a, 0xfc, 0x8f, 0x2a, 0xb0, 0xa9
42795 ++};
42796 ++static const u8 enc_assoc038[] __initconst = { };
42797 ++static const u8 enc_nonce038[] __initconst = {
42798 ++ 0xe7, 0x4a, 0x51, 0x5e, 0x7e, 0x21, 0x02, 0xb9,
42799 ++ 0x0b, 0xef, 0x55, 0xd2
42800 ++};
42801 ++static const u8 enc_key038[] __initconst = {
42802 ++ 0xcf, 0x0d, 0x40, 0xa4, 0x64, 0x4e, 0x5f, 0x51,
42803 ++ 0x81, 0x51, 0x65, 0xd5, 0x30, 0x1b, 0x22, 0x63,
42804 ++ 0x1f, 0x45, 0x44, 0xc4, 0x9a, 0x18, 0x78, 0xe3,
42805 ++ 0xa0, 0xa5, 0xe8, 0xe1, 0xaa, 0xe0, 0xf2, 0x64
42806 ++};
42807 ++
42808 ++/* wycheproof - misc */
42809 ++static const u8 enc_input039[] __initconst = {
42810 ++ 0xa9, 0x89, 0x95, 0x50, 0x4d, 0xf1, 0x6f, 0x74,
42811 ++ 0x8b, 0xfb, 0x77, 0x85, 0xff, 0x91, 0xee, 0xb3,
42812 ++ 0xb6, 0x60, 0xea, 0x9e, 0xd3, 0x45, 0x0c, 0x3d,
42813 ++ 0x5e, 0x7b, 0x0e, 0x79, 0xef, 0x65, 0x36, 0x59,
42814 ++ 0xa9, 0x97, 0x8d, 0x75, 0x54, 0x2e, 0xf9, 0x1c,
42815 ++ 0x45, 0x67, 0x62, 0x21, 0x56, 0x40, 0xb9
42816 ++};
42817 ++static const u8 enc_output039[] __initconst = {
42818 ++ 0xa1, 0xff, 0xed, 0x80, 0x76, 0x18, 0x29, 0xec,
42819 ++ 0xce, 0x24, 0x2e, 0x0e, 0x88, 0xb1, 0x38, 0x04,
42820 ++ 0x90, 0x16, 0xbc, 0xa0, 0x18, 0xda, 0x2b, 0x6e,
42821 ++ 0x19, 0x98, 0x6b, 0x3e, 0x31, 0x8c, 0xae, 0x8d,
42822 ++ 0x80, 0x61, 0x98, 0xfb, 0x4c, 0x52, 0x7c, 0xc3,
42823 ++ 0x93, 0x50, 0xeb, 0xdd, 0xea, 0xc5, 0x73, 0xc4,
42824 ++ 0xcb, 0xf0, 0xbe, 0xfd, 0xa0, 0xb7, 0x02, 0x42,
42825 ++ 0xc6, 0x40, 0xd7, 0xcd, 0x02, 0xd7, 0xa3
42826 ++};
42827 ++static const u8 enc_assoc039[] __initconst = {
42828 ++ 0xb3, 0xe4, 0x06, 0x46, 0x83, 0xb0, 0x2d, 0x84
42829 ++};
42830 ++static const u8 enc_nonce039[] __initconst = {
42831 ++ 0xd4, 0xd8, 0x07, 0x34, 0x16, 0x83, 0x82, 0x5b,
42832 ++ 0x31, 0xcd, 0x4d, 0x95
42833 ++};
42834 ++static const u8 enc_key039[] __initconst = {
42835 ++ 0x6c, 0xbf, 0xd7, 0x1c, 0x64, 0x5d, 0x18, 0x4c,
42836 ++ 0xf5, 0xd2, 0x3c, 0x40, 0x2b, 0xdb, 0x0d, 0x25,
42837 ++ 0xec, 0x54, 0x89, 0x8c, 0x8a, 0x02, 0x73, 0xd4,
42838 ++ 0x2e, 0xb5, 0xbe, 0x10, 0x9f, 0xdc, 0xb2, 0xac
42839 ++};
42840 ++
42841 ++/* wycheproof - misc */
42842 ++static const u8 enc_input040[] __initconst = {
42843 ++ 0xd0, 0x96, 0x80, 0x31, 0x81, 0xbe, 0xef, 0x9e,
42844 ++ 0x00, 0x8f, 0xf8, 0x5d, 0x5d, 0xdc, 0x38, 0xdd,
42845 ++ 0xac, 0xf0, 0xf0, 0x9e, 0xe5, 0xf7, 0xe0, 0x7f,
42846 ++ 0x1e, 0x40, 0x79, 0xcb, 0x64, 0xd0, 0xdc, 0x8f,
42847 ++ 0x5e, 0x67, 0x11, 0xcd, 0x49, 0x21, 0xa7, 0x88,
42848 ++ 0x7d, 0xe7, 0x6e, 0x26, 0x78, 0xfd, 0xc6, 0x76,
42849 ++ 0x18, 0xf1, 0x18, 0x55, 0x86, 0xbf, 0xea, 0x9d,
42850 ++ 0x4c, 0x68, 0x5d, 0x50, 0xe4, 0xbb, 0x9a, 0x82
42851 ++};
42852 ++static const u8 enc_output040[] __initconst = {
42853 ++ 0x9a, 0x4e, 0xf2, 0x2b, 0x18, 0x16, 0x77, 0xb5,
42854 ++ 0x75, 0x5c, 0x08, 0xf7, 0x47, 0xc0, 0xf8, 0xd8,
42855 ++ 0xe8, 0xd4, 0xc1, 0x8a, 0x9c, 0xc2, 0x40, 0x5c,
42856 ++ 0x12, 0xbb, 0x51, 0xbb, 0x18, 0x72, 0xc8, 0xe8,
42857 ++ 0xb8, 0x77, 0x67, 0x8b, 0xec, 0x44, 0x2c, 0xfc,
42858 ++ 0xbb, 0x0f, 0xf4, 0x64, 0xa6, 0x4b, 0x74, 0x33,
42859 ++ 0x2c, 0xf0, 0x72, 0x89, 0x8c, 0x7e, 0x0e, 0xdd,
42860 ++ 0xf6, 0x23, 0x2e, 0xa6, 0xe2, 0x7e, 0xfe, 0x50,
42861 ++ 0x9f, 0xf3, 0x42, 0x7a, 0x0f, 0x32, 0xfa, 0x56,
42862 ++ 0x6d, 0x9c, 0xa0, 0xa7, 0x8a, 0xef, 0xc0, 0x13
42863 ++};
42864 ++static const u8 enc_assoc040[] __initconst = { };
42865 ++static const u8 enc_nonce040[] __initconst = {
42866 ++ 0xd6, 0x10, 0x40, 0xa3, 0x13, 0xed, 0x49, 0x28,
42867 ++ 0x23, 0xcc, 0x06, 0x5b
42868 ++};
42869 ++static const u8 enc_key040[] __initconst = {
42870 ++ 0x5b, 0x1d, 0x10, 0x35, 0xc0, 0xb1, 0x7e, 0xe0,
42871 ++ 0xb0, 0x44, 0x47, 0x67, 0xf8, 0x0a, 0x25, 0xb8,
42872 ++ 0xc1, 0xb7, 0x41, 0xf4, 0xb5, 0x0a, 0x4d, 0x30,
42873 ++ 0x52, 0x22, 0x6b, 0xaa, 0x1c, 0x6f, 0xb7, 0x01
42874 ++};
42875 ++
42876 ++/* wycheproof - misc */
42877 ++static const u8 enc_input041[] __initconst = {
42878 ++ 0x94, 0xee, 0x16, 0x6d, 0x6d, 0x6e, 0xcf, 0x88,
42879 ++ 0x32, 0x43, 0x71, 0x36, 0xb4, 0xae, 0x80, 0x5d,
42880 ++ 0x42, 0x88, 0x64, 0x35, 0x95, 0x86, 0xd9, 0x19,
42881 ++ 0x3a, 0x25, 0x01, 0x62, 0x93, 0xed, 0xba, 0x44,
42882 ++ 0x3c, 0x58, 0xe0, 0x7e, 0x7b, 0x71, 0x95, 0xec,
42883 ++ 0x5b, 0xd8, 0x45, 0x82, 0xa9, 0xd5, 0x6c, 0x8d,
42884 ++ 0x4a, 0x10, 0x8c, 0x7d, 0x7c, 0xe3, 0x4e, 0x6c,
42885 ++ 0x6f, 0x8e, 0xa1, 0xbe, 0xc0, 0x56, 0x73, 0x17
42886 ++};
42887 ++static const u8 enc_output041[] __initconst = {
42888 ++ 0x5f, 0xbb, 0xde, 0xcc, 0x34, 0xbe, 0x20, 0x16,
42889 ++ 0x14, 0xf6, 0x36, 0x03, 0x1e, 0xeb, 0x42, 0xf1,
42890 ++ 0xca, 0xce, 0x3c, 0x79, 0xa1, 0x2c, 0xff, 0xd8,
42891 ++ 0x71, 0xee, 0x8e, 0x73, 0x82, 0x0c, 0x82, 0x97,
42892 ++ 0x49, 0xf1, 0xab, 0xb4, 0x29, 0x43, 0x67, 0x84,
42893 ++ 0x9f, 0xb6, 0xc2, 0xaa, 0x56, 0xbd, 0xa8, 0xa3,
42894 ++ 0x07, 0x8f, 0x72, 0x3d, 0x7c, 0x1c, 0x85, 0x20,
42895 ++ 0x24, 0xb0, 0x17, 0xb5, 0x89, 0x73, 0xfb, 0x1e,
42896 ++ 0x09, 0x26, 0x3d, 0xa7, 0xb4, 0xcb, 0x92, 0x14,
42897 ++ 0x52, 0xf9, 0x7d, 0xca, 0x40, 0xf5, 0x80, 0xec
42898 ++};
42899 ++static const u8 enc_assoc041[] __initconst = {
42900 ++ 0x71, 0x93, 0xf6, 0x23, 0x66, 0x33, 0x21, 0xa2
42901 ++};
42902 ++static const u8 enc_nonce041[] __initconst = {
42903 ++ 0xd3, 0x1c, 0x21, 0xab, 0xa1, 0x75, 0xb7, 0x0d,
42904 ++ 0xe4, 0xeb, 0xb1, 0x9c
42905 ++};
42906 ++static const u8 enc_key041[] __initconst = {
42907 ++ 0x97, 0xd6, 0x35, 0xc4, 0xf4, 0x75, 0x74, 0xd9,
42908 ++ 0x99, 0x8a, 0x90, 0x87, 0x5d, 0xa1, 0xd3, 0xa2,
42909 ++ 0x84, 0xb7, 0x55, 0xb2, 0xd3, 0x92, 0x97, 0xa5,
42910 ++ 0x72, 0x52, 0x35, 0x19, 0x0e, 0x10, 0xa9, 0x7e
42911 ++};
42912 ++
42913 ++/* wycheproof - misc */
42914 ++static const u8 enc_input042[] __initconst = {
42915 ++ 0xb4, 0x29, 0xeb, 0x80, 0xfb, 0x8f, 0xe8, 0xba,
42916 ++ 0xed, 0xa0, 0xc8, 0x5b, 0x9c, 0x33, 0x34, 0x58,
42917 ++ 0xe7, 0xc2, 0x99, 0x2e, 0x55, 0x84, 0x75, 0x06,
42918 ++ 0x9d, 0x12, 0xd4, 0x5c, 0x22, 0x21, 0x75, 0x64,
42919 ++ 0x12, 0x15, 0x88, 0x03, 0x22, 0x97, 0xef, 0xf5,
42920 ++ 0x67, 0x83, 0x74, 0x2a, 0x5f, 0xc2, 0x2d, 0x74,
42921 ++ 0x10, 0xff, 0xb2, 0x9d, 0x66, 0x09, 0x86, 0x61,
42922 ++ 0xd7, 0x6f, 0x12, 0x6c, 0x3c, 0x27, 0x68, 0x9e,
42923 ++ 0x43, 0xb3, 0x72, 0x67, 0xca, 0xc5, 0xa3, 0xa6,
42924 ++ 0xd3, 0xab, 0x49, 0xe3, 0x91, 0xda, 0x29, 0xcd,
42925 ++ 0x30, 0x54, 0xa5, 0x69, 0x2e, 0x28, 0x07, 0xe4,
42926 ++ 0xc3, 0xea, 0x46, 0xc8, 0x76, 0x1d, 0x50, 0xf5,
42927 ++ 0x92
42928 ++};
42929 ++static const u8 enc_output042[] __initconst = {
42930 ++ 0xd0, 0x10, 0x2f, 0x6c, 0x25, 0x8b, 0xf4, 0x97,
42931 ++ 0x42, 0xce, 0xc3, 0x4c, 0xf2, 0xd0, 0xfe, 0xdf,
42932 ++ 0x23, 0xd1, 0x05, 0xfb, 0x4c, 0x84, 0xcf, 0x98,
42933 ++ 0x51, 0x5e, 0x1b, 0xc9, 0xa6, 0x4f, 0x8a, 0xd5,
42934 ++ 0xbe, 0x8f, 0x07, 0x21, 0xbd, 0xe5, 0x06, 0x45,
42935 ++ 0xd0, 0x00, 0x83, 0xc3, 0xa2, 0x63, 0xa3, 0x10,
42936 ++ 0x53, 0xb7, 0x60, 0x24, 0x5f, 0x52, 0xae, 0x28,
42937 ++ 0x66, 0xa5, 0xec, 0x83, 0xb1, 0x9f, 0x61, 0xbe,
42938 ++ 0x1d, 0x30, 0xd5, 0xc5, 0xd9, 0xfe, 0xcc, 0x4c,
42939 ++ 0xbb, 0xe0, 0x8f, 0xd3, 0x85, 0x81, 0x3a, 0x2a,
42940 ++ 0xa3, 0x9a, 0x00, 0xff, 0x9c, 0x10, 0xf7, 0xf2,
42941 ++ 0x37, 0x02, 0xad, 0xd1, 0xe4, 0xb2, 0xff, 0xa3,
42942 ++ 0x1c, 0x41, 0x86, 0x5f, 0xc7, 0x1d, 0xe1, 0x2b,
42943 ++ 0x19, 0x61, 0x21, 0x27, 0xce, 0x49, 0x99, 0x3b,
42944 ++ 0xb0
42945 ++};
42946 ++static const u8 enc_assoc042[] __initconst = { };
42947 ++static const u8 enc_nonce042[] __initconst = {
42948 ++ 0x17, 0xc8, 0x6a, 0x8a, 0xbb, 0xb7, 0xe0, 0x03,
42949 ++ 0xac, 0xde, 0x27, 0x99
42950 ++};
42951 ++static const u8 enc_key042[] __initconst = {
42952 ++ 0xfe, 0x6e, 0x55, 0xbd, 0xae, 0xd1, 0xf7, 0x28,
42953 ++ 0x4c, 0xa5, 0xfc, 0x0f, 0x8c, 0x5f, 0x2b, 0x8d,
42954 ++ 0xf5, 0x6d, 0xc0, 0xf4, 0x9e, 0x8c, 0xa6, 0x6a,
42955 ++ 0x41, 0x99, 0x5e, 0x78, 0x33, 0x51, 0xf9, 0x01
42956 ++};
42957 ++
42958 ++/* wycheproof - misc */
42959 ++static const u8 enc_input043[] __initconst = {
42960 ++ 0xce, 0xb5, 0x34, 0xce, 0x50, 0xdc, 0x23, 0xff,
42961 ++ 0x63, 0x8a, 0xce, 0x3e, 0xf6, 0x3a, 0xb2, 0xcc,
42962 ++ 0x29, 0x73, 0xee, 0xad, 0xa8, 0x07, 0x85, 0xfc,
42963 ++ 0x16, 0x5d, 0x06, 0xc2, 0xf5, 0x10, 0x0f, 0xf5,
42964 ++ 0xe8, 0xab, 0x28, 0x82, 0xc4, 0x75, 0xaf, 0xcd,
42965 ++ 0x05, 0xcc, 0xd4, 0x9f, 0x2e, 0x7d, 0x8f, 0x55,
42966 ++ 0xef, 0x3a, 0x72, 0xe3, 0xdc, 0x51, 0xd6, 0x85,
42967 ++ 0x2b, 0x8e, 0x6b, 0x9e, 0x7a, 0xec, 0xe5, 0x7b,
42968 ++ 0xe6, 0x55, 0x6b, 0x0b, 0x6d, 0x94, 0x13, 0xe3,
42969 ++ 0x3f, 0xc5, 0xfc, 0x24, 0xa9, 0xa2, 0x05, 0xad,
42970 ++ 0x59, 0x57, 0x4b, 0xb3, 0x9d, 0x94, 0x4a, 0x92,
42971 ++ 0xdc, 0x47, 0x97, 0x0d, 0x84, 0xa6, 0xad, 0x31,
42972 ++ 0x76
42973 ++};
42974 ++static const u8 enc_output043[] __initconst = {
42975 ++ 0x75, 0x45, 0x39, 0x1b, 0x51, 0xde, 0x01, 0xd5,
42976 ++ 0xc5, 0x3d, 0xfa, 0xca, 0x77, 0x79, 0x09, 0x06,
42977 ++ 0x3e, 0x58, 0xed, 0xee, 0x4b, 0xb1, 0x22, 0x7e,
42978 ++ 0x71, 0x10, 0xac, 0x4d, 0x26, 0x20, 0xc2, 0xae,
42979 ++ 0xc2, 0xf8, 0x48, 0xf5, 0x6d, 0xee, 0xb0, 0x37,
42980 ++ 0xa8, 0xdc, 0xed, 0x75, 0xaf, 0xa8, 0xa6, 0xc8,
42981 ++ 0x90, 0xe2, 0xde, 0xe4, 0x2f, 0x95, 0x0b, 0xb3,
42982 ++ 0x3d, 0x9e, 0x24, 0x24, 0xd0, 0x8a, 0x50, 0x5d,
42983 ++ 0x89, 0x95, 0x63, 0x97, 0x3e, 0xd3, 0x88, 0x70,
42984 ++ 0xf3, 0xde, 0x6e, 0xe2, 0xad, 0xc7, 0xfe, 0x07,
42985 ++ 0x2c, 0x36, 0x6c, 0x14, 0xe2, 0xcf, 0x7c, 0xa6,
42986 ++ 0x2f, 0xb3, 0xd3, 0x6b, 0xee, 0x11, 0x68, 0x54,
42987 ++ 0x61, 0xb7, 0x0d, 0x44, 0xef, 0x8c, 0x66, 0xc5,
42988 ++ 0xc7, 0xbb, 0xf1, 0x0d, 0xca, 0xdd, 0x7f, 0xac,
42989 ++ 0xf6
42990 ++};
42991 ++static const u8 enc_assoc043[] __initconst = {
42992 ++ 0xa1, 0x1c, 0x40, 0xb6, 0x03, 0x76, 0x73, 0x30
42993 ++};
42994 ++static const u8 enc_nonce043[] __initconst = {
42995 ++ 0x46, 0x36, 0x2f, 0x45, 0xd6, 0x37, 0x9e, 0x63,
42996 ++ 0xe5, 0x22, 0x94, 0x60
42997 ++};
42998 ++static const u8 enc_key043[] __initconst = {
42999 ++ 0xaa, 0xbc, 0x06, 0x34, 0x74, 0xe6, 0x5c, 0x4c,
43000 ++ 0x3e, 0x9b, 0xdc, 0x48, 0x0d, 0xea, 0x97, 0xb4,
43001 ++ 0x51, 0x10, 0xc8, 0x61, 0x88, 0x46, 0xff, 0x6b,
43002 ++ 0x15, 0xbd, 0xd2, 0xa4, 0xa5, 0x68, 0x2c, 0x4e
43003 ++};
43004 ++
43005 ++/* wycheproof - misc */
43006 ++static const u8 enc_input044[] __initconst = {
43007 ++ 0xe5, 0xcc, 0xaa, 0x44, 0x1b, 0xc8, 0x14, 0x68,
43008 ++ 0x8f, 0x8f, 0x6e, 0x8f, 0x28, 0xb5, 0x00, 0xb2
43009 ++};
43010 ++static const u8 enc_output044[] __initconst = {
43011 ++ 0x7e, 0x72, 0xf5, 0xa1, 0x85, 0xaf, 0x16, 0xa6,
43012 ++ 0x11, 0x92, 0x1b, 0x43, 0x8f, 0x74, 0x9f, 0x0b,
43013 ++ 0x12, 0x42, 0xc6, 0x70, 0x73, 0x23, 0x34, 0x02,
43014 ++ 0x9a, 0xdf, 0xe1, 0xc5, 0x00, 0x16, 0x51, 0xe4
43015 ++};
43016 ++static const u8 enc_assoc044[] __initconst = {
43017 ++ 0x02
43018 ++};
43019 ++static const u8 enc_nonce044[] __initconst = {
43020 ++ 0x87, 0x34, 0x5f, 0x10, 0x55, 0xfd, 0x9e, 0x21,
43021 ++ 0x02, 0xd5, 0x06, 0x56
43022 ++};
43023 ++static const u8 enc_key044[] __initconst = {
43024 ++ 0x7d, 0x00, 0xb4, 0x80, 0x95, 0xad, 0xfa, 0x32,
43025 ++ 0x72, 0x05, 0x06, 0x07, 0xb2, 0x64, 0x18, 0x50,
43026 ++ 0x02, 0xba, 0x99, 0x95, 0x7c, 0x49, 0x8b, 0xe0,
43027 ++ 0x22, 0x77, 0x0f, 0x2c, 0xe2, 0xf3, 0x14, 0x3c
43028 ++};
43029 ++
43030 ++/* wycheproof - misc */
43031 ++static const u8 enc_input045[] __initconst = {
43032 ++ 0x02, 0xcd, 0xe1, 0x68, 0xfb, 0xa3, 0xf5, 0x44,
43033 ++ 0xbb, 0xd0, 0x33, 0x2f, 0x7a, 0xde, 0xad, 0xa8
43034 ++};
43035 ++static const u8 enc_output045[] __initconst = {
43036 ++ 0x85, 0xf2, 0x9a, 0x71, 0x95, 0x57, 0xcd, 0xd1,
43037 ++ 0x4d, 0x1f, 0x8f, 0xff, 0xab, 0x6d, 0x9e, 0x60,
43038 ++ 0x73, 0x2c, 0xa3, 0x2b, 0xec, 0xd5, 0x15, 0xa1,
43039 ++ 0xed, 0x35, 0x3f, 0x54, 0x2e, 0x99, 0x98, 0x58
43040 ++};
43041 ++static const u8 enc_assoc045[] __initconst = {
43042 ++ 0xb6, 0x48
43043 ++};
43044 ++static const u8 enc_nonce045[] __initconst = {
43045 ++ 0x87, 0xa3, 0x16, 0x3e, 0xc0, 0x59, 0x8a, 0xd9,
43046 ++ 0x5b, 0x3a, 0xa7, 0x13
43047 ++};
43048 ++static const u8 enc_key045[] __initconst = {
43049 ++ 0x64, 0x32, 0x71, 0x7f, 0x1d, 0xb8, 0x5e, 0x41,
43050 ++ 0xac, 0x78, 0x36, 0xbc, 0xe2, 0x51, 0x85, 0xa0,
43051 ++ 0x80, 0xd5, 0x76, 0x2b, 0x9e, 0x2b, 0x18, 0x44,
43052 ++ 0x4b, 0x6e, 0xc7, 0x2c, 0x3b, 0xd8, 0xe4, 0xdc
43053 ++};
43054 ++
43055 ++/* wycheproof - misc */
43056 ++static const u8 enc_input046[] __initconst = {
43057 ++ 0x16, 0xdd, 0xd2, 0x3f, 0xf5, 0x3f, 0x3d, 0x23,
43058 ++ 0xc0, 0x63, 0x34, 0x48, 0x70, 0x40, 0xeb, 0x47
43059 ++};
43060 ++static const u8 enc_output046[] __initconst = {
43061 ++ 0xc1, 0xb2, 0x95, 0x93, 0x6d, 0x56, 0xfa, 0xda,
43062 ++ 0xc0, 0x3e, 0x5f, 0x74, 0x2b, 0xff, 0x73, 0xa1,
43063 ++ 0x39, 0xc4, 0x57, 0xdb, 0xab, 0x66, 0x38, 0x2b,
43064 ++ 0xab, 0xb3, 0xb5, 0x58, 0x00, 0xcd, 0xa5, 0xb8
43065 ++};
43066 ++static const u8 enc_assoc046[] __initconst = {
43067 ++ 0xbd, 0x4c, 0xd0, 0x2f, 0xc7, 0x50, 0x2b, 0xbd,
43068 ++ 0xbd, 0xf6, 0xc9, 0xa3, 0xcb, 0xe8, 0xf0
43069 ++};
43070 ++static const u8 enc_nonce046[] __initconst = {
43071 ++ 0x6f, 0x57, 0x3a, 0xa8, 0x6b, 0xaa, 0x49, 0x2b,
43072 ++ 0xa4, 0x65, 0x96, 0xdf
43073 ++};
43074 ++static const u8 enc_key046[] __initconst = {
43075 ++ 0x8e, 0x34, 0xcf, 0x73, 0xd2, 0x45, 0xa1, 0x08,
43076 ++ 0x2a, 0x92, 0x0b, 0x86, 0x36, 0x4e, 0xb8, 0x96,
43077 ++ 0xc4, 0x94, 0x64, 0x67, 0xbc, 0xb3, 0xd5, 0x89,
43078 ++ 0x29, 0xfc, 0xb3, 0x66, 0x90, 0xe6, 0x39, 0x4f
43079 ++};
43080 ++
43081 ++/* wycheproof - misc */
43082 ++static const u8 enc_input047[] __initconst = {
43083 ++ 0x62, 0x3b, 0x78, 0x50, 0xc3, 0x21, 0xe2, 0xcf,
43084 ++ 0x0c, 0x6f, 0xbc, 0xc8, 0xdf, 0xd1, 0xaf, 0xf2
43085 ++};
43086 ++static const u8 enc_output047[] __initconst = {
43087 ++ 0xc8, 0x4c, 0x9b, 0xb7, 0xc6, 0x1c, 0x1b, 0xcb,
43088 ++ 0x17, 0x77, 0x2a, 0x1c, 0x50, 0x0c, 0x50, 0x95,
43089 ++ 0xdb, 0xad, 0xf7, 0xa5, 0x13, 0x8c, 0xa0, 0x34,
43090 ++ 0x59, 0xa2, 0xcd, 0x65, 0x83, 0x1e, 0x09, 0x2f
43091 ++};
43092 ++static const u8 enc_assoc047[] __initconst = {
43093 ++ 0x89, 0xcc, 0xe9, 0xfb, 0x47, 0x44, 0x1d, 0x07,
43094 ++ 0xe0, 0x24, 0x5a, 0x66, 0xfe, 0x8b, 0x77, 0x8b
43095 ++};
43096 ++static const u8 enc_nonce047[] __initconst = {
43097 ++ 0x1a, 0x65, 0x18, 0xf0, 0x2e, 0xde, 0x1d, 0xa6,
43098 ++ 0x80, 0x92, 0x66, 0xd9
43099 ++};
43100 ++static const u8 enc_key047[] __initconst = {
43101 ++ 0xcb, 0x55, 0x75, 0xf5, 0xc7, 0xc4, 0x5c, 0x91,
43102 ++ 0xcf, 0x32, 0x0b, 0x13, 0x9f, 0xb5, 0x94, 0x23,
43103 ++ 0x75, 0x60, 0xd0, 0xa3, 0xe6, 0xf8, 0x65, 0xa6,
43104 ++ 0x7d, 0x4f, 0x63, 0x3f, 0x2c, 0x08, 0xf0, 0x16
43105 ++};
43106 ++
43107 ++/* wycheproof - misc */
43108 ++static const u8 enc_input048[] __initconst = {
43109 ++ 0x87, 0xb3, 0xa4, 0xd7, 0xb2, 0x6d, 0x8d, 0x32,
43110 ++ 0x03, 0xa0, 0xde, 0x1d, 0x64, 0xef, 0x82, 0xe3
43111 ++};
43112 ++static const u8 enc_output048[] __initconst = {
43113 ++ 0x94, 0xbc, 0x80, 0x62, 0x1e, 0xd1, 0xe7, 0x1b,
43114 ++ 0x1f, 0xd2, 0xb5, 0xc3, 0xa1, 0x5e, 0x35, 0x68,
43115 ++ 0x33, 0x35, 0x11, 0x86, 0x17, 0x96, 0x97, 0x84,
43116 ++ 0x01, 0x59, 0x8b, 0x96, 0x37, 0x22, 0xf5, 0xb3
43117 ++};
43118 ++static const u8 enc_assoc048[] __initconst = {
43119 ++ 0xd1, 0x9f, 0x2d, 0x98, 0x90, 0x95, 0xf7, 0xab,
43120 ++ 0x03, 0xa5, 0xfd, 0xe8, 0x44, 0x16, 0xe0, 0x0c,
43121 ++ 0x0e
43122 ++};
43123 ++static const u8 enc_nonce048[] __initconst = {
43124 ++ 0x56, 0x4d, 0xee, 0x49, 0xab, 0x00, 0xd2, 0x40,
43125 ++ 0xfc, 0x10, 0x68, 0xc3
43126 ++};
43127 ++static const u8 enc_key048[] __initconst = {
43128 ++ 0xa5, 0x56, 0x9e, 0x72, 0x9a, 0x69, 0xb2, 0x4b,
43129 ++ 0xa6, 0xe0, 0xff, 0x15, 0xc4, 0x62, 0x78, 0x97,
43130 ++ 0x43, 0x68, 0x24, 0xc9, 0x41, 0xe9, 0xd0, 0x0b,
43131 ++ 0x2e, 0x93, 0xfd, 0xdc, 0x4b, 0xa7, 0x76, 0x57
43132 ++};
43133 ++
43134 ++/* wycheproof - misc */
43135 ++static const u8 enc_input049[] __initconst = {
43136 ++ 0xe6, 0x01, 0xb3, 0x85, 0x57, 0x79, 0x7d, 0xa2,
43137 ++ 0xf8, 0xa4, 0x10, 0x6a, 0x08, 0x9d, 0x1d, 0xa6
43138 ++};
43139 ++static const u8 enc_output049[] __initconst = {
43140 ++ 0x29, 0x9b, 0x5d, 0x3f, 0x3d, 0x03, 0xc0, 0x87,
43141 ++ 0x20, 0x9a, 0x16, 0xe2, 0x85, 0x14, 0x31, 0x11,
43142 ++ 0x4b, 0x45, 0x4e, 0xd1, 0x98, 0xde, 0x11, 0x7e,
43143 ++ 0x83, 0xec, 0x49, 0xfa, 0x8d, 0x85, 0x08, 0xd6
43144 ++};
43145 ++static const u8 enc_assoc049[] __initconst = {
43146 ++ 0x5e, 0x64, 0x70, 0xfa, 0xcd, 0x99, 0xc1, 0xd8,
43147 ++ 0x1e, 0x37, 0xcd, 0x44, 0x01, 0x5f, 0xe1, 0x94,
43148 ++ 0x80, 0xa2, 0xa4, 0xd3, 0x35, 0x2a, 0x4f, 0xf5,
43149 ++ 0x60, 0xc0, 0x64, 0x0f, 0xdb, 0xda
43150 ++};
43151 ++static const u8 enc_nonce049[] __initconst = {
43152 ++ 0xdf, 0x87, 0x13, 0xe8, 0x7e, 0xc3, 0xdb, 0xcf,
43153 ++ 0xad, 0x14, 0xd5, 0x3e
43154 ++};
43155 ++static const u8 enc_key049[] __initconst = {
43156 ++ 0x56, 0x20, 0x74, 0x65, 0xb4, 0xe4, 0x8e, 0x6d,
43157 ++ 0x04, 0x63, 0x0f, 0x4a, 0x42, 0xf3, 0x5c, 0xfc,
43158 ++ 0x16, 0x3a, 0xb2, 0x89, 0xc2, 0x2a, 0x2b, 0x47,
43159 ++ 0x84, 0xf6, 0xf9, 0x29, 0x03, 0x30, 0xbe, 0xe0
43160 ++};
43161 ++
43162 ++/* wycheproof - misc */
43163 ++static const u8 enc_input050[] __initconst = {
43164 ++ 0xdc, 0x9e, 0x9e, 0xaf, 0x11, 0xe3, 0x14, 0x18,
43165 ++ 0x2d, 0xf6, 0xa4, 0xeb, 0xa1, 0x7a, 0xec, 0x9c
43166 ++};
43167 ++static const u8 enc_output050[] __initconst = {
43168 ++ 0x60, 0x5b, 0xbf, 0x90, 0xae, 0xb9, 0x74, 0xf6,
43169 ++ 0x60, 0x2b, 0xc7, 0x78, 0x05, 0x6f, 0x0d, 0xca,
43170 ++ 0x38, 0xea, 0x23, 0xd9, 0x90, 0x54, 0xb4, 0x6b,
43171 ++ 0x42, 0xff, 0xe0, 0x04, 0x12, 0x9d, 0x22, 0x04
43172 ++};
43173 ++static const u8 enc_assoc050[] __initconst = {
43174 ++ 0xba, 0x44, 0x6f, 0x6f, 0x9a, 0x0c, 0xed, 0x22,
43175 ++ 0x45, 0x0f, 0xeb, 0x10, 0x73, 0x7d, 0x90, 0x07,
43176 ++ 0xfd, 0x69, 0xab, 0xc1, 0x9b, 0x1d, 0x4d, 0x90,
43177 ++ 0x49, 0xa5, 0x55, 0x1e, 0x86, 0xec, 0x2b, 0x37
43178 ++};
43179 ++static const u8 enc_nonce050[] __initconst = {
43180 ++ 0x8d, 0xf4, 0xb1, 0x5a, 0x88, 0x8c, 0x33, 0x28,
43181 ++ 0x6a, 0x7b, 0x76, 0x51
43182 ++};
43183 ++static const u8 enc_key050[] __initconst = {
43184 ++ 0x39, 0x37, 0x98, 0x6a, 0xf8, 0x6d, 0xaf, 0xc1,
43185 ++ 0xba, 0x0c, 0x46, 0x72, 0xd8, 0xab, 0xc4, 0x6c,
43186 ++ 0x20, 0x70, 0x62, 0x68, 0x2d, 0x9c, 0x26, 0x4a,
43187 ++ 0xb0, 0x6d, 0x6c, 0x58, 0x07, 0x20, 0x51, 0x30
43188 ++};
43189 ++
43190 ++/* wycheproof - misc */
43191 ++static const u8 enc_input051[] __initconst = {
43192 ++ 0x81, 0xce, 0x84, 0xed, 0xe9, 0xb3, 0x58, 0x59,
43193 ++ 0xcc, 0x8c, 0x49, 0xa8, 0xf6, 0xbe, 0x7d, 0xc6
43194 ++};
43195 ++static const u8 enc_output051[] __initconst = {
43196 ++ 0x7b, 0x7c, 0xe0, 0xd8, 0x24, 0x80, 0x9a, 0x70,
43197 ++ 0xde, 0x32, 0x56, 0x2c, 0xcf, 0x2c, 0x2b, 0xbd,
43198 ++ 0x15, 0xd4, 0x4a, 0x00, 0xce, 0x0d, 0x19, 0xb4,
43199 ++ 0x23, 0x1f, 0x92, 0x1e, 0x22, 0xbc, 0x0a, 0x43
43200 ++};
43201 ++static const u8 enc_assoc051[] __initconst = {
43202 ++ 0xd4, 0x1a, 0x82, 0x8d, 0x5e, 0x71, 0x82, 0x92,
43203 ++ 0x47, 0x02, 0x19, 0x05, 0x40, 0x2e, 0xa2, 0x57,
43204 ++ 0xdc, 0xcb, 0xc3, 0xb8, 0x0f, 0xcd, 0x56, 0x75,
43205 ++ 0x05, 0x6b, 0x68, 0xbb, 0x59, 0xe6, 0x2e, 0x88,
43206 ++ 0x73
43207 ++};
43208 ++static const u8 enc_nonce051[] __initconst = {
43209 ++ 0xbe, 0x40, 0xe5, 0xf1, 0xa1, 0x18, 0x17, 0xa0,
43210 ++ 0xa8, 0xfa, 0x89, 0x49
43211 ++};
43212 ++static const u8 enc_key051[] __initconst = {
43213 ++ 0x36, 0x37, 0x2a, 0xbc, 0xdb, 0x78, 0xe0, 0x27,
43214 ++ 0x96, 0x46, 0xac, 0x3d, 0x17, 0x6b, 0x96, 0x74,
43215 ++ 0xe9, 0x15, 0x4e, 0xec, 0xf0, 0xd5, 0x46, 0x9c,
43216 ++ 0x65, 0x1e, 0xc7, 0xe1, 0x6b, 0x4c, 0x11, 0x99
43217 ++};
43218 ++
43219 ++/* wycheproof - misc */
43220 ++static const u8 enc_input052[] __initconst = {
43221 ++ 0xa6, 0x67, 0x47, 0xc8, 0x9e, 0x85, 0x7a, 0xf3,
43222 ++ 0xa1, 0x8e, 0x2c, 0x79, 0x50, 0x00, 0x87, 0xed
43223 ++};
43224 ++static const u8 enc_output052[] __initconst = {
43225 ++ 0xca, 0x82, 0xbf, 0xf3, 0xe2, 0xf3, 0x10, 0xcc,
43226 ++ 0xc9, 0x76, 0x67, 0x2c, 0x44, 0x15, 0xe6, 0x9b,
43227 ++ 0x57, 0x63, 0x8c, 0x62, 0xa5, 0xd8, 0x5d, 0xed,
43228 ++ 0x77, 0x4f, 0x91, 0x3c, 0x81, 0x3e, 0xa0, 0x32
43229 ++};
43230 ++static const u8 enc_assoc052[] __initconst = {
43231 ++ 0x3f, 0x2d, 0xd4, 0x9b, 0xbf, 0x09, 0xd6, 0x9a,
43232 ++ 0x78, 0xa3, 0xd8, 0x0e, 0xa2, 0x56, 0x66, 0x14,
43233 ++ 0xfc, 0x37, 0x94, 0x74, 0x19, 0x6c, 0x1a, 0xae,
43234 ++ 0x84, 0x58, 0x3d, 0xa7, 0x3d, 0x7f, 0xf8, 0x5c,
43235 ++ 0x6f, 0x42, 0xca, 0x42, 0x05, 0x6a, 0x97, 0x92,
43236 ++ 0xcc, 0x1b, 0x9f, 0xb3, 0xc7, 0xd2, 0x61
43237 ++};
43238 ++static const u8 enc_nonce052[] __initconst = {
43239 ++ 0x84, 0xc8, 0x7d, 0xae, 0x4e, 0xee, 0x27, 0x73,
43240 ++ 0x0e, 0xc3, 0x5d, 0x12
43241 ++};
43242 ++static const u8 enc_key052[] __initconst = {
43243 ++ 0x9f, 0x14, 0x79, 0xed, 0x09, 0x7d, 0x7f, 0xe5,
43244 ++ 0x29, 0xc1, 0x1f, 0x2f, 0x5a, 0xdd, 0x9a, 0xaf,
43245 ++ 0xf4, 0xa1, 0xca, 0x0b, 0x68, 0x99, 0x7a, 0x2c,
43246 ++ 0xb7, 0xf7, 0x97, 0x49, 0xbd, 0x90, 0xaa, 0xf4
43247 ++};
43248 ++
43249 + /* wycheproof - misc */
43250 + static const u8 enc_input053[] __initconst = {
43251 + 0x25, 0x6d, 0x40, 0x88, 0x80, 0x94, 0x17, 0x83,
43252 +@@ -2759,6 +3858,126 @@ static const u8 enc_key073[] __initconst = {
43253 + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
43254 + };
43255 +
43256 ++/* wycheproof - checking for int overflows */
43257 ++static const u8 enc_input074[] __initconst = {
43258 ++ 0xd4, 0x50, 0x0b, 0xf0, 0x09, 0x49, 0x35, 0x51,
43259 ++ 0xc3, 0x80, 0xad, 0xf5, 0x2c, 0x57, 0x3a, 0x69,
43260 ++ 0xdf, 0x7e, 0x8b, 0x76, 0x24, 0x63, 0x33, 0x0f,
43261 ++ 0xac, 0xc1, 0x6a, 0x57, 0x26, 0xbe, 0x71, 0x90,
43262 ++ 0xc6, 0x3c, 0x5a, 0x1c, 0x92, 0x65, 0x84, 0xa0,
43263 ++ 0x96, 0x75, 0x68, 0x28, 0xdc, 0xdc, 0x64, 0xac,
43264 ++ 0xdf, 0x96, 0x3d, 0x93, 0x1b, 0xf1, 0xda, 0xe2,
43265 ++ 0x38, 0xf3, 0xf1, 0x57, 0x22, 0x4a, 0xc4, 0xb5,
43266 ++ 0x42, 0xd7, 0x85, 0xb0, 0xdd, 0x84, 0xdb, 0x6b,
43267 ++ 0xe3, 0xbc, 0x5a, 0x36, 0x63, 0xe8, 0x41, 0x49,
43268 ++ 0xff, 0xbe, 0xd0, 0x9e, 0x54, 0xf7, 0x8f, 0x16,
43269 ++ 0xa8, 0x22, 0x3b, 0x24, 0xcb, 0x01, 0x9f, 0x58,
43270 ++ 0xb2, 0x1b, 0x0e, 0x55, 0x1e, 0x7a, 0xa0, 0x73,
43271 ++ 0x27, 0x62, 0x95, 0x51, 0x37, 0x6c, 0xcb, 0xc3,
43272 ++ 0x93, 0x76, 0x71, 0xa0, 0x62, 0x9b, 0xd9, 0x5c,
43273 ++ 0x99, 0x15, 0xc7, 0x85, 0x55, 0x77, 0x1e, 0x7a
43274 ++};
43275 ++static const u8 enc_output074[] __initconst = {
43276 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43277 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43278 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43279 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43280 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43281 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43282 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43283 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43284 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43285 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43286 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43287 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43288 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43289 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43290 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43291 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43292 ++ 0x0b, 0x30, 0x0d, 0x8d, 0xa5, 0x6c, 0x21, 0x85,
43293 ++ 0x75, 0x52, 0x79, 0x55, 0x3c, 0x4c, 0x82, 0xca
43294 ++};
43295 ++static const u8 enc_assoc074[] __initconst = {
43296 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43297 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43298 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43299 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43300 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43301 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43302 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43303 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
43304 ++};
43305 ++static const u8 enc_nonce074[] __initconst = {
43306 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
43307 ++ 0x00, 0x02, 0x50, 0x6e
43308 ++};
43309 ++static const u8 enc_key074[] __initconst = {
43310 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
43311 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
43312 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
43313 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30
43314 ++};
43315 ++
43316 ++/* wycheproof - checking for int overflows */
43317 ++static const u8 enc_input075[] __initconst = {
43318 ++ 0x7d, 0xe8, 0x7f, 0x67, 0x29, 0x94, 0x52, 0x75,
43319 ++ 0xd0, 0x65, 0x5d, 0xa4, 0xc7, 0xfd, 0xe4, 0x56,
43320 ++ 0x9e, 0x16, 0xf1, 0x11, 0xb5, 0xeb, 0x26, 0xc2,
43321 ++ 0x2d, 0x85, 0x9e, 0x3f, 0xf8, 0x22, 0xec, 0xed,
43322 ++ 0x3a, 0x6d, 0xd9, 0xa6, 0x0f, 0x22, 0x95, 0x7f,
43323 ++ 0x7b, 0x7c, 0x85, 0x7e, 0x88, 0x22, 0xeb, 0x9f,
43324 ++ 0xe0, 0xb8, 0xd7, 0x02, 0x21, 0x41, 0xf2, 0xd0,
43325 ++ 0xb4, 0x8f, 0x4b, 0x56, 0x12, 0xd3, 0x22, 0xa8,
43326 ++ 0x8d, 0xd0, 0xfe, 0x0b, 0x4d, 0x91, 0x79, 0x32,
43327 ++ 0x4f, 0x7c, 0x6c, 0x9e, 0x99, 0x0e, 0xfb, 0xd8,
43328 ++ 0x0e, 0x5e, 0xd6, 0x77, 0x58, 0x26, 0x49, 0x8b,
43329 ++ 0x1e, 0xfe, 0x0f, 0x71, 0xa0, 0xf3, 0xec, 0x5b,
43330 ++ 0x29, 0xcb, 0x28, 0xc2, 0x54, 0x0a, 0x7d, 0xcd,
43331 ++ 0x51, 0xb7, 0xda, 0xae, 0xe0, 0xff, 0x4a, 0x7f,
43332 ++ 0x3a, 0xc1, 0xee, 0x54, 0xc2, 0x9e, 0xe4, 0xc1,
43333 ++ 0x70, 0xde, 0x40, 0x8f, 0x66, 0x69, 0x21, 0x94
43334 ++};
43335 ++static const u8 enc_output075[] __initconst = {
43336 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43337 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43338 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43339 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43340 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43341 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43342 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43343 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43344 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43345 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43346 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43347 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43348 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43349 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43350 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43351 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43352 ++ 0xc5, 0x78, 0xe2, 0xaa, 0x44, 0xd3, 0x09, 0xb7,
43353 ++ 0xb6, 0xa5, 0x19, 0x3b, 0xdc, 0x61, 0x18, 0xf5
43354 ++};
43355 ++static const u8 enc_assoc075[] __initconst = {
43356 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43357 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43358 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43359 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43360 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43361 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43362 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43363 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
43364 ++};
43365 ++static const u8 enc_nonce075[] __initconst = {
43366 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
43367 ++ 0x00, 0x03, 0x18, 0xa5
43368 ++};
43369 ++static const u8 enc_key075[] __initconst = {
43370 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
43371 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
43372 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
43373 ++ 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30
43374 ++};
43375 ++
43376 + /* wycheproof - checking for int overflows */
43377 + static const u8 enc_input076[] __initconst = {
43378 + 0x1b, 0x99, 0x6f, 0x9a, 0x3c, 0xcc, 0x67, 0x85,
43379 +@@ -3349,6 +4568,286 @@ static const u8 enc_key085[] __initconst = {
43380 + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
43381 + };
43382 +
43383 ++/* wycheproof - special case tag */
43384 ++static const u8 enc_input086[] __initconst = {
43385 ++ 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6,
43386 ++ 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd,
43387 ++ 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b,
43388 ++ 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2,
43389 ++ 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19,
43390 ++ 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4,
43391 ++ 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63,
43392 ++ 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d
43393 ++};
43394 ++static const u8 enc_output086[] __initconst = {
43395 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43396 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43397 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43398 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43399 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43400 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43401 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43402 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43403 ++ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
43404 ++ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
43405 ++};
43406 ++static const u8 enc_assoc086[] __initconst = {
43407 ++ 0x85, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43408 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43409 ++ 0xa6, 0x90, 0x2f, 0xcb, 0xc8, 0x83, 0xbb, 0xc1,
43410 ++ 0x80, 0xb2, 0x56, 0xae, 0x34, 0xad, 0x7f, 0x00
43411 ++};
43412 ++static const u8 enc_nonce086[] __initconst = {
43413 ++ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
43414 ++ 0x08, 0x09, 0x0a, 0x0b
43415 ++};
43416 ++static const u8 enc_key086[] __initconst = {
43417 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
43418 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
43419 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
43420 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
43421 ++};
43422 ++
43423 ++/* wycheproof - special case tag */
43424 ++static const u8 enc_input087[] __initconst = {
43425 ++ 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6,
43426 ++ 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd,
43427 ++ 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b,
43428 ++ 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2,
43429 ++ 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19,
43430 ++ 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4,
43431 ++ 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63,
43432 ++ 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d
43433 ++};
43434 ++static const u8 enc_output087[] __initconst = {
43435 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43436 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43437 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43438 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43439 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43440 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43441 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43442 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43443 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
43444 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
43445 ++};
43446 ++static const u8 enc_assoc087[] __initconst = {
43447 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43448 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43449 ++ 0x24, 0x7e, 0x50, 0x64, 0x2a, 0x1c, 0x0a, 0x2f,
43450 ++ 0x8f, 0x77, 0x21, 0x96, 0x09, 0xdb, 0xa9, 0x58
43451 ++};
43452 ++static const u8 enc_nonce087[] __initconst = {
43453 ++ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
43454 ++ 0x08, 0x09, 0x0a, 0x0b
43455 ++};
43456 ++static const u8 enc_key087[] __initconst = {
43457 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
43458 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
43459 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
43460 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
43461 ++};
43462 ++
43463 ++/* wycheproof - special case tag */
43464 ++static const u8 enc_input088[] __initconst = {
43465 ++ 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6,
43466 ++ 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd,
43467 ++ 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b,
43468 ++ 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2,
43469 ++ 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19,
43470 ++ 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4,
43471 ++ 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63,
43472 ++ 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d
43473 ++};
43474 ++static const u8 enc_output088[] __initconst = {
43475 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43476 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43477 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43478 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43479 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43480 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43481 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43482 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43483 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43484 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
43485 ++};
43486 ++static const u8 enc_assoc088[] __initconst = {
43487 ++ 0x7c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43488 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43489 ++ 0xd9, 0xe7, 0x2c, 0x06, 0x4a, 0xc8, 0x96, 0x1f,
43490 ++ 0x3f, 0xa5, 0x85, 0xe0, 0xe2, 0xab, 0xd6, 0x00
43491 ++};
43492 ++static const u8 enc_nonce088[] __initconst = {
43493 ++ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
43494 ++ 0x08, 0x09, 0x0a, 0x0b
43495 ++};
43496 ++static const u8 enc_key088[] __initconst = {
43497 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
43498 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
43499 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
43500 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
43501 ++};
43502 ++
43503 ++/* wycheproof - special case tag */
43504 ++static const u8 enc_input089[] __initconst = {
43505 ++ 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6,
43506 ++ 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd,
43507 ++ 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b,
43508 ++ 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2,
43509 ++ 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19,
43510 ++ 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4,
43511 ++ 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63,
43512 ++ 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d
43513 ++};
43514 ++static const u8 enc_output089[] __initconst = {
43515 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43516 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43517 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43518 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43519 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43520 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43521 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43522 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43523 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80,
43524 ++ 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80
43525 ++};
43526 ++static const u8 enc_assoc089[] __initconst = {
43527 ++ 0x65, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43528 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43529 ++ 0x95, 0xaf, 0x0f, 0x4d, 0x0b, 0x68, 0x6e, 0xae,
43530 ++ 0xcc, 0xca, 0x43, 0x07, 0xd5, 0x96, 0xf5, 0x02
43531 ++};
43532 ++static const u8 enc_nonce089[] __initconst = {
43533 ++ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
43534 ++ 0x08, 0x09, 0x0a, 0x0b
43535 ++};
43536 ++static const u8 enc_key089[] __initconst = {
43537 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
43538 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
43539 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
43540 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
43541 ++};
43542 ++
43543 ++/* wycheproof - special case tag */
43544 ++static const u8 enc_input090[] __initconst = {
43545 ++ 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6,
43546 ++ 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd,
43547 ++ 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b,
43548 ++ 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2,
43549 ++ 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19,
43550 ++ 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4,
43551 ++ 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63,
43552 ++ 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d
43553 ++};
43554 ++static const u8 enc_output090[] __initconst = {
43555 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43556 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43557 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43558 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43559 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43560 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43561 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43562 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43563 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f,
43564 ++ 0xff, 0xff, 0xff, 0x7f, 0xff, 0xff, 0xff, 0x7f
43565 ++};
43566 ++static const u8 enc_assoc090[] __initconst = {
43567 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43568 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43569 ++ 0x85, 0x40, 0xb4, 0x64, 0x35, 0x77, 0x07, 0xbe,
43570 ++ 0x3a, 0x39, 0xd5, 0x5c, 0x34, 0xf8, 0xbc, 0xb3
43571 ++};
43572 ++static const u8 enc_nonce090[] __initconst = {
43573 ++ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
43574 ++ 0x08, 0x09, 0x0a, 0x0b
43575 ++};
43576 ++static const u8 enc_key090[] __initconst = {
43577 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
43578 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
43579 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
43580 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
43581 ++};
43582 ++
43583 ++/* wycheproof - special case tag */
43584 ++static const u8 enc_input091[] __initconst = {
43585 ++ 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6,
43586 ++ 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd,
43587 ++ 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b,
43588 ++ 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2,
43589 ++ 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19,
43590 ++ 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4,
43591 ++ 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63,
43592 ++ 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d
43593 ++};
43594 ++static const u8 enc_output091[] __initconst = {
43595 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43596 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43597 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43598 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43599 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43600 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43601 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43602 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43603 ++ 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
43604 ++ 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00
43605 ++};
43606 ++static const u8 enc_assoc091[] __initconst = {
43607 ++ 0x4f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43608 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43609 ++ 0x66, 0x23, 0xd9, 0x90, 0xb8, 0x98, 0xd8, 0x30,
43610 ++ 0xd2, 0x12, 0xaf, 0x23, 0x83, 0x33, 0x07, 0x01
43611 ++};
43612 ++static const u8 enc_nonce091[] __initconst = {
43613 ++ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
43614 ++ 0x08, 0x09, 0x0a, 0x0b
43615 ++};
43616 ++static const u8 enc_key091[] __initconst = {
43617 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
43618 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
43619 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
43620 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
43621 ++};
43622 ++
43623 ++/* wycheproof - special case tag */
43624 ++static const u8 enc_input092[] __initconst = {
43625 ++ 0x9a, 0x49, 0xc4, 0x0f, 0x8b, 0x48, 0xd7, 0xc6,
43626 ++ 0x6d, 0x1d, 0xb4, 0xe5, 0x3f, 0x20, 0xf2, 0xdd,
43627 ++ 0x4a, 0xaa, 0x24, 0x1d, 0xda, 0xb2, 0x6b, 0x5b,
43628 ++ 0xc0, 0xe2, 0x18, 0xb7, 0x2c, 0x33, 0x90, 0xf2,
43629 ++ 0xdf, 0x3e, 0xbd, 0x01, 0x76, 0x70, 0x44, 0x19,
43630 ++ 0x97, 0x2b, 0xcd, 0xbc, 0x6b, 0xbc, 0xb3, 0xe4,
43631 ++ 0xe7, 0x4a, 0x71, 0x52, 0x8e, 0xf5, 0x12, 0x63,
43632 ++ 0xce, 0x24, 0xe0, 0xd5, 0x75, 0xe0, 0xe4, 0x4d
43633 ++};
43634 ++static const u8 enc_output092[] __initconst = {
43635 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43636 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43637 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43638 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43639 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43640 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43641 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43642 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43643 ++ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00,
43644 ++ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
43645 ++};
43646 ++static const u8 enc_assoc092[] __initconst = {
43647 ++ 0x83, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43648 ++ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
43649 ++ 0x5f, 0x16, 0xd0, 0x9f, 0x17, 0x78, 0x72, 0x11,
43650 ++ 0xb7, 0xd4, 0x84, 0xe0, 0x24, 0xf8, 0x97, 0x01
43651 ++};
43652 ++static const u8 enc_nonce092[] __initconst = {
43653 ++ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
43654 ++ 0x08, 0x09, 0x0a, 0x0b
43655 ++};
43656 ++static const u8 enc_key092[] __initconst = {
43657 ++ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
43658 ++ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
43659 ++ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
43660 ++ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
43661 ++};
43662 ++
43663 + /* wycheproof - edge case intermediate sums in poly1305 */
43664 + static const u8 enc_input093[] __initconst = {
43665 + 0x00, 0x52, 0x35, 0xd2, 0xa9, 0x19, 0xf2, 0x8d,
43666 +@@ -4455,6 +5954,86 @@ chacha20poly1305_enc_vectors[] __initconst = {
43667 + sizeof(enc_input011), sizeof(enc_assoc011), sizeof(enc_nonce011) },
43668 + { enc_input012, enc_output012, enc_assoc012, enc_nonce012, enc_key012,
43669 + sizeof(enc_input012), sizeof(enc_assoc012), sizeof(enc_nonce012) },
43670 ++ { enc_input013, enc_output013, enc_assoc013, enc_nonce013, enc_key013,
43671 ++ sizeof(enc_input013), sizeof(enc_assoc013), sizeof(enc_nonce013) },
43672 ++ { enc_input014, enc_output014, enc_assoc014, enc_nonce014, enc_key014,
43673 ++ sizeof(enc_input014), sizeof(enc_assoc014), sizeof(enc_nonce014) },
43674 ++ { enc_input015, enc_output015, enc_assoc015, enc_nonce015, enc_key015,
43675 ++ sizeof(enc_input015), sizeof(enc_assoc015), sizeof(enc_nonce015) },
43676 ++ { enc_input016, enc_output016, enc_assoc016, enc_nonce016, enc_key016,
43677 ++ sizeof(enc_input016), sizeof(enc_assoc016), sizeof(enc_nonce016) },
43678 ++ { enc_input017, enc_output017, enc_assoc017, enc_nonce017, enc_key017,
43679 ++ sizeof(enc_input017), sizeof(enc_assoc017), sizeof(enc_nonce017) },
43680 ++ { enc_input018, enc_output018, enc_assoc018, enc_nonce018, enc_key018,
43681 ++ sizeof(enc_input018), sizeof(enc_assoc018), sizeof(enc_nonce018) },
43682 ++ { enc_input019, enc_output019, enc_assoc019, enc_nonce019, enc_key019,
43683 ++ sizeof(enc_input019), sizeof(enc_assoc019), sizeof(enc_nonce019) },
43684 ++ { enc_input020, enc_output020, enc_assoc020, enc_nonce020, enc_key020,
43685 ++ sizeof(enc_input020), sizeof(enc_assoc020), sizeof(enc_nonce020) },
43686 ++ { enc_input021, enc_output021, enc_assoc021, enc_nonce021, enc_key021,
43687 ++ sizeof(enc_input021), sizeof(enc_assoc021), sizeof(enc_nonce021) },
43688 ++ { enc_input022, enc_output022, enc_assoc022, enc_nonce022, enc_key022,
43689 ++ sizeof(enc_input022), sizeof(enc_assoc022), sizeof(enc_nonce022) },
43690 ++ { enc_input023, enc_output023, enc_assoc023, enc_nonce023, enc_key023,
43691 ++ sizeof(enc_input023), sizeof(enc_assoc023), sizeof(enc_nonce023) },
43692 ++ { enc_input024, enc_output024, enc_assoc024, enc_nonce024, enc_key024,
43693 ++ sizeof(enc_input024), sizeof(enc_assoc024), sizeof(enc_nonce024) },
43694 ++ { enc_input025, enc_output025, enc_assoc025, enc_nonce025, enc_key025,
43695 ++ sizeof(enc_input025), sizeof(enc_assoc025), sizeof(enc_nonce025) },
43696 ++ { enc_input026, enc_output026, enc_assoc026, enc_nonce026, enc_key026,
43697 ++ sizeof(enc_input026), sizeof(enc_assoc026), sizeof(enc_nonce026) },
43698 ++ { enc_input027, enc_output027, enc_assoc027, enc_nonce027, enc_key027,
43699 ++ sizeof(enc_input027), sizeof(enc_assoc027), sizeof(enc_nonce027) },
43700 ++ { enc_input028, enc_output028, enc_assoc028, enc_nonce028, enc_key028,
43701 ++ sizeof(enc_input028), sizeof(enc_assoc028), sizeof(enc_nonce028) },
43702 ++ { enc_input029, enc_output029, enc_assoc029, enc_nonce029, enc_key029,
43703 ++ sizeof(enc_input029), sizeof(enc_assoc029), sizeof(enc_nonce029) },
43704 ++ { enc_input030, enc_output030, enc_assoc030, enc_nonce030, enc_key030,
43705 ++ sizeof(enc_input030), sizeof(enc_assoc030), sizeof(enc_nonce030) },
43706 ++ { enc_input031, enc_output031, enc_assoc031, enc_nonce031, enc_key031,
43707 ++ sizeof(enc_input031), sizeof(enc_assoc031), sizeof(enc_nonce031) },
43708 ++ { enc_input032, enc_output032, enc_assoc032, enc_nonce032, enc_key032,
43709 ++ sizeof(enc_input032), sizeof(enc_assoc032), sizeof(enc_nonce032) },
43710 ++ { enc_input033, enc_output033, enc_assoc033, enc_nonce033, enc_key033,
43711 ++ sizeof(enc_input033), sizeof(enc_assoc033), sizeof(enc_nonce033) },
43712 ++ { enc_input034, enc_output034, enc_assoc034, enc_nonce034, enc_key034,
43713 ++ sizeof(enc_input034), sizeof(enc_assoc034), sizeof(enc_nonce034) },
43714 ++ { enc_input035, enc_output035, enc_assoc035, enc_nonce035, enc_key035,
43715 ++ sizeof(enc_input035), sizeof(enc_assoc035), sizeof(enc_nonce035) },
43716 ++ { enc_input036, enc_output036, enc_assoc036, enc_nonce036, enc_key036,
43717 ++ sizeof(enc_input036), sizeof(enc_assoc036), sizeof(enc_nonce036) },
43718 ++ { enc_input037, enc_output037, enc_assoc037, enc_nonce037, enc_key037,
43719 ++ sizeof(enc_input037), sizeof(enc_assoc037), sizeof(enc_nonce037) },
43720 ++ { enc_input038, enc_output038, enc_assoc038, enc_nonce038, enc_key038,
43721 ++ sizeof(enc_input038), sizeof(enc_assoc038), sizeof(enc_nonce038) },
43722 ++ { enc_input039, enc_output039, enc_assoc039, enc_nonce039, enc_key039,
43723 ++ sizeof(enc_input039), sizeof(enc_assoc039), sizeof(enc_nonce039) },
43724 ++ { enc_input040, enc_output040, enc_assoc040, enc_nonce040, enc_key040,
43725 ++ sizeof(enc_input040), sizeof(enc_assoc040), sizeof(enc_nonce040) },
43726 ++ { enc_input041, enc_output041, enc_assoc041, enc_nonce041, enc_key041,
43727 ++ sizeof(enc_input041), sizeof(enc_assoc041), sizeof(enc_nonce041) },
43728 ++ { enc_input042, enc_output042, enc_assoc042, enc_nonce042, enc_key042,
43729 ++ sizeof(enc_input042), sizeof(enc_assoc042), sizeof(enc_nonce042) },
43730 ++ { enc_input043, enc_output043, enc_assoc043, enc_nonce043, enc_key043,
43731 ++ sizeof(enc_input043), sizeof(enc_assoc043), sizeof(enc_nonce043) },
43732 ++ { enc_input044, enc_output044, enc_assoc044, enc_nonce044, enc_key044,
43733 ++ sizeof(enc_input044), sizeof(enc_assoc044), sizeof(enc_nonce044) },
43734 ++ { enc_input045, enc_output045, enc_assoc045, enc_nonce045, enc_key045,
43735 ++ sizeof(enc_input045), sizeof(enc_assoc045), sizeof(enc_nonce045) },
43736 ++ { enc_input046, enc_output046, enc_assoc046, enc_nonce046, enc_key046,
43737 ++ sizeof(enc_input046), sizeof(enc_assoc046), sizeof(enc_nonce046) },
43738 ++ { enc_input047, enc_output047, enc_assoc047, enc_nonce047, enc_key047,
43739 ++ sizeof(enc_input047), sizeof(enc_assoc047), sizeof(enc_nonce047) },
43740 ++ { enc_input048, enc_output048, enc_assoc048, enc_nonce048, enc_key048,
43741 ++ sizeof(enc_input048), sizeof(enc_assoc048), sizeof(enc_nonce048) },
43742 ++ { enc_input049, enc_output049, enc_assoc049, enc_nonce049, enc_key049,
43743 ++ sizeof(enc_input049), sizeof(enc_assoc049), sizeof(enc_nonce049) },
43744 ++ { enc_input050, enc_output050, enc_assoc050, enc_nonce050, enc_key050,
43745 ++ sizeof(enc_input050), sizeof(enc_assoc050), sizeof(enc_nonce050) },
43746 ++ { enc_input051, enc_output051, enc_assoc051, enc_nonce051, enc_key051,
43747 ++ sizeof(enc_input051), sizeof(enc_assoc051), sizeof(enc_nonce051) },
43748 ++ { enc_input052, enc_output052, enc_assoc052, enc_nonce052, enc_key052,
43749 ++ sizeof(enc_input052), sizeof(enc_assoc052), sizeof(enc_nonce052) },
43750 + { enc_input053, enc_output053, enc_assoc053, enc_nonce053, enc_key053,
43751 + sizeof(enc_input053), sizeof(enc_assoc053), sizeof(enc_nonce053) },
43752 + { enc_input054, enc_output054, enc_assoc054, enc_nonce054, enc_key054,
43753 +@@ -4497,6 +6076,10 @@ chacha20poly1305_enc_vectors[] __initconst = {
43754 + sizeof(enc_input072), sizeof(enc_assoc072), sizeof(enc_nonce072) },
43755 + { enc_input073, enc_output073, enc_assoc073, enc_nonce073, enc_key073,
43756 + sizeof(enc_input073), sizeof(enc_assoc073), sizeof(enc_nonce073) },
43757 ++ { enc_input074, enc_output074, enc_assoc074, enc_nonce074, enc_key074,
43758 ++ sizeof(enc_input074), sizeof(enc_assoc074), sizeof(enc_nonce074) },
43759 ++ { enc_input075, enc_output075, enc_assoc075, enc_nonce075, enc_key075,
43760 ++ sizeof(enc_input075), sizeof(enc_assoc075), sizeof(enc_nonce075) },
43761 + { enc_input076, enc_output076, enc_assoc076, enc_nonce076, enc_key076,
43762 + sizeof(enc_input076), sizeof(enc_assoc076), sizeof(enc_nonce076) },
43763 + { enc_input077, enc_output077, enc_assoc077, enc_nonce077, enc_key077,
43764 +@@ -4517,6 +6100,20 @@ chacha20poly1305_enc_vectors[] __initconst = {
43765 + sizeof(enc_input084), sizeof(enc_assoc084), sizeof(enc_nonce084) },
43766 + { enc_input085, enc_output085, enc_assoc085, enc_nonce085, enc_key085,
43767 + sizeof(enc_input085), sizeof(enc_assoc085), sizeof(enc_nonce085) },
43768 ++ { enc_input086, enc_output086, enc_assoc086, enc_nonce086, enc_key086,
43769 ++ sizeof(enc_input086), sizeof(enc_assoc086), sizeof(enc_nonce086) },
43770 ++ { enc_input087, enc_output087, enc_assoc087, enc_nonce087, enc_key087,
43771 ++ sizeof(enc_input087), sizeof(enc_assoc087), sizeof(enc_nonce087) },
43772 ++ { enc_input088, enc_output088, enc_assoc088, enc_nonce088, enc_key088,
43773 ++ sizeof(enc_input088), sizeof(enc_assoc088), sizeof(enc_nonce088) },
43774 ++ { enc_input089, enc_output089, enc_assoc089, enc_nonce089, enc_key089,
43775 ++ sizeof(enc_input089), sizeof(enc_assoc089), sizeof(enc_nonce089) },
43776 ++ { enc_input090, enc_output090, enc_assoc090, enc_nonce090, enc_key090,
43777 ++ sizeof(enc_input090), sizeof(enc_assoc090), sizeof(enc_nonce090) },
43778 ++ { enc_input091, enc_output091, enc_assoc091, enc_nonce091, enc_key091,
43779 ++ sizeof(enc_input091), sizeof(enc_assoc091), sizeof(enc_nonce091) },
43780 ++ { enc_input092, enc_output092, enc_assoc092, enc_nonce092, enc_key092,
43781 ++ sizeof(enc_input092), sizeof(enc_assoc092), sizeof(enc_nonce092) },
43782 + { enc_input093, enc_output093, enc_assoc093, enc_nonce093, enc_key093,
43783 + sizeof(enc_input093), sizeof(enc_assoc093), sizeof(enc_nonce093) },
43784 + { enc_input094, enc_output094, enc_assoc094, enc_nonce094, enc_key094,
43785 +@@ -7224,6 +8821,43 @@ xchacha20poly1305_dec_vectors[] __initconst = {
43786 + sizeof(xdec_input001), sizeof(xdec_assoc001), sizeof(xdec_nonce001) }
43787 + };
43788 +
43789 ++/* This is for the selftests-only, since it is only useful for the purpose of
43790 ++ * testing the underlying primitives and interactions.
43791 ++ */
43792 ++static void __init
43793 ++chacha20poly1305_encrypt_bignonce(u8 *dst, const u8 *src, const size_t src_len,
43794 ++ const u8 *ad, const size_t ad_len,
43795 ++ const u8 nonce[12],
43796 ++ const u8 key[CHACHA20POLY1305_KEY_SIZE])
43797 ++{
43798 ++ const u8 *pad0 = page_address(ZERO_PAGE(0));
43799 ++ struct poly1305_desc_ctx poly1305_state;
43800 ++ u32 chacha20_state[CHACHA_STATE_WORDS];
43801 ++ union {
43802 ++ u8 block0[POLY1305_KEY_SIZE];
43803 ++ __le64 lens[2];
43804 ++ } b = {{ 0 }};
43805 ++ u8 bottom_row[16] = { 0 };
43806 ++ u32 le_key[8];
43807 ++ int i;
43808 ++
43809 ++ memcpy(&bottom_row[4], nonce, 12);
43810 ++ for (i = 0; i < 8; ++i)
43811 ++ le_key[i] = get_unaligned_le32(key + sizeof(le_key[i]) * i);
43812 ++ chacha_init(chacha20_state, le_key, bottom_row);
43813 ++ chacha20_crypt(chacha20_state, b.block0, b.block0, sizeof(b.block0));
43814 ++ poly1305_init(&poly1305_state, b.block0);
43815 ++ poly1305_update(&poly1305_state, ad, ad_len);
43816 ++ poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf);
43817 ++ chacha20_crypt(chacha20_state, dst, src, src_len);
43818 ++ poly1305_update(&poly1305_state, dst, src_len);
43819 ++ poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf);
43820 ++ b.lens[0] = cpu_to_le64(ad_len);
43821 ++ b.lens[1] = cpu_to_le64(src_len);
43822 ++ poly1305_update(&poly1305_state, (u8 *)b.lens, sizeof(b.lens));
43823 ++ poly1305_final(&poly1305_state, dst + src_len);
43824 ++}
43825 ++
43826 + static void __init
43827 + chacha20poly1305_selftest_encrypt(u8 *dst, const u8 *src, const size_t src_len,
43828 + const u8 *ad, const size_t ad_len,
43829 +@@ -7233,6 +8867,9 @@ chacha20poly1305_selftest_encrypt(u8 *dst, const u8 *src, const size_t src_len,
43830 + if (nonce_len == 8)
43831 + chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len,
43832 + get_unaligned_le64(nonce), key);
43833 ++ else if (nonce_len == 12)
43834 ++ chacha20poly1305_encrypt_bignonce(dst, src, src_len, ad,
43835 ++ ad_len, nonce, key);
43836 + else
43837 + BUG();
43838 + }
43839 +@@ -7248,14 +8885,14 @@ decryption_success(bool func_ret, bool expect_failure, int memcmp_result)
43840 + bool __init chacha20poly1305_selftest(void)
43841 + {
43842 + enum { MAXIMUM_TEST_BUFFER_LEN = 1UL << 12 };
43843 +- size_t i;
43844 +- u8 *computed_output = NULL, *heap_src = NULL;
43845 +- struct scatterlist sg_src;
43846 ++ size_t i, j, k, total_len;
43847 ++ u8 *computed_output = NULL, *input = NULL;
43848 + bool success = true, ret;
43849 ++ struct scatterlist sg_src[3];
43850 +
43851 +- heap_src = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL);
43852 + computed_output = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL);
43853 +- if (!heap_src || !computed_output) {
43854 ++ input = kmalloc(MAXIMUM_TEST_BUFFER_LEN, GFP_KERNEL);
43855 ++ if (!computed_output || !input) {
43856 + pr_err("chacha20poly1305 self-test malloc: FAIL\n");
43857 + success = false;
43858 + goto out;
43859 +@@ -7284,17 +8921,17 @@ bool __init chacha20poly1305_selftest(void)
43860 + for (i = 0; i < ARRAY_SIZE(chacha20poly1305_enc_vectors); ++i) {
43861 + if (chacha20poly1305_enc_vectors[i].nlen != 8)
43862 + continue;
43863 +- memcpy(heap_src, chacha20poly1305_enc_vectors[i].input,
43864 ++ memcpy(computed_output, chacha20poly1305_enc_vectors[i].input,
43865 + chacha20poly1305_enc_vectors[i].ilen);
43866 +- sg_init_one(&sg_src, heap_src,
43867 ++ sg_init_one(sg_src, computed_output,
43868 + chacha20poly1305_enc_vectors[i].ilen + POLY1305_DIGEST_SIZE);
43869 +- chacha20poly1305_encrypt_sg_inplace(&sg_src,
43870 ++ ret = chacha20poly1305_encrypt_sg_inplace(sg_src,
43871 + chacha20poly1305_enc_vectors[i].ilen,
43872 + chacha20poly1305_enc_vectors[i].assoc,
43873 + chacha20poly1305_enc_vectors[i].alen,
43874 + get_unaligned_le64(chacha20poly1305_enc_vectors[i].nonce),
43875 + chacha20poly1305_enc_vectors[i].key);
43876 +- if (memcmp(heap_src,
43877 ++ if (!ret || memcmp(computed_output,
43878 + chacha20poly1305_enc_vectors[i].output,
43879 + chacha20poly1305_enc_vectors[i].ilen +
43880 + POLY1305_DIGEST_SIZE)) {
43881 +@@ -7326,11 +8963,11 @@ bool __init chacha20poly1305_selftest(void)
43882 + }
43883 +
43884 + for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) {
43885 +- memcpy(heap_src, chacha20poly1305_dec_vectors[i].input,
43886 ++ memcpy(computed_output, chacha20poly1305_dec_vectors[i].input,
43887 + chacha20poly1305_dec_vectors[i].ilen);
43888 +- sg_init_one(&sg_src, heap_src,
43889 ++ sg_init_one(sg_src, computed_output,
43890 + chacha20poly1305_dec_vectors[i].ilen);
43891 +- ret = chacha20poly1305_decrypt_sg_inplace(&sg_src,
43892 ++ ret = chacha20poly1305_decrypt_sg_inplace(sg_src,
43893 + chacha20poly1305_dec_vectors[i].ilen,
43894 + chacha20poly1305_dec_vectors[i].assoc,
43895 + chacha20poly1305_dec_vectors[i].alen,
43896 +@@ -7338,7 +8975,7 @@ bool __init chacha20poly1305_selftest(void)
43897 + chacha20poly1305_dec_vectors[i].key);
43898 + if (!decryption_success(ret,
43899 + chacha20poly1305_dec_vectors[i].failure,
43900 +- memcmp(heap_src, chacha20poly1305_dec_vectors[i].output,
43901 ++ memcmp(computed_output, chacha20poly1305_dec_vectors[i].output,
43902 + chacha20poly1305_dec_vectors[i].ilen -
43903 + POLY1305_DIGEST_SIZE))) {
43904 + pr_err("chacha20poly1305 sg decryption self-test %zu: FAIL\n",
43905 +@@ -7365,6 +9002,7 @@ bool __init chacha20poly1305_selftest(void)
43906 + success = false;
43907 + }
43908 + }
43909 ++
43910 + for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_dec_vectors); ++i) {
43911 + memset(computed_output, 0, MAXIMUM_TEST_BUFFER_LEN);
43912 + ret = xchacha20poly1305_decrypt(computed_output,
43913 +@@ -7386,8 +9024,54 @@ bool __init chacha20poly1305_selftest(void)
43914 + }
43915 + }
43916 +
43917 ++ for (total_len = POLY1305_DIGEST_SIZE; IS_ENABLED(DEBUG_CHACHA20POLY1305_SLOW_CHUNK_TEST)
43918 ++ && total_len <= 1 << 10; ++total_len) {
43919 ++ for (i = 0; i <= total_len; ++i) {
43920 ++ for (j = i; j <= total_len; ++j) {
43921 ++ sg_init_table(sg_src, 3);
43922 ++ sg_set_buf(&sg_src[0], input, i);
43923 ++ sg_set_buf(&sg_src[1], input + i, j - i);
43924 ++ sg_set_buf(&sg_src[2], input + j, total_len - j);
43925 ++ memset(computed_output, 0, total_len);
43926 ++ memset(input, 0, total_len);
43927 ++
43928 ++ if (!chacha20poly1305_encrypt_sg_inplace(sg_src,
43929 ++ total_len - POLY1305_DIGEST_SIZE, NULL, 0,
43930 ++ 0, enc_key001))
43931 ++ goto chunkfail;
43932 ++ chacha20poly1305_encrypt(computed_output,
43933 ++ computed_output,
43934 ++ total_len - POLY1305_DIGEST_SIZE, NULL, 0, 0,
43935 ++ enc_key001);
43936 ++ if (memcmp(computed_output, input, total_len))
43937 ++ goto chunkfail;
43938 ++ if (!chacha20poly1305_decrypt(computed_output,
43939 ++ input, total_len, NULL, 0, 0, enc_key001))
43940 ++ goto chunkfail;
43941 ++ for (k = 0; k < total_len - POLY1305_DIGEST_SIZE; ++k) {
43942 ++ if (computed_output[k])
43943 ++ goto chunkfail;
43944 ++ }
43945 ++ if (!chacha20poly1305_decrypt_sg_inplace(sg_src,
43946 ++ total_len, NULL, 0, 0, enc_key001))
43947 ++ goto chunkfail;
43948 ++ for (k = 0; k < total_len - POLY1305_DIGEST_SIZE; ++k) {
43949 ++ if (input[k])
43950 ++ goto chunkfail;
43951 ++ }
43952 ++ continue;
43953 ++
43954 ++ chunkfail:
43955 ++ pr_err("chacha20poly1305 chunked self-test %zu/%zu/%zu: FAIL\n",
43956 ++ total_len, i, j);
43957 ++ success = false;
43958 ++ }
43959 ++
43960 ++ }
43961 ++ }
43962 ++
43963 + out:
43964 +- kfree(heap_src);
43965 + kfree(computed_output);
43966 ++ kfree(input);
43967 + return success;
43968 + }
43969 +--
43970 +cgit v1.2.3-4-ga26e
43971 +
43972 +
43973 +From 2dba8d64e0583da48e21bbeb9991e3318526aecf Mon Sep 17 00:00:00 2001
43974 +From: "Jason A. Donenfeld" <Jason@×××××.com>
43975 +Date: Fri, 17 Jan 2020 11:42:22 +0100
43976 +Subject: crypto: x86/poly1305 - emit does base conversion itself
43977 +
43978 +commit f9e7fe32a792726186301423ff63a465d63386e1 upstream.
43979 +
43980 +The emit code does optional base conversion itself in assembly, so we
43981 +don't need to do that here. Also, neither one of these functions uses
43982 +simd instructions, so checking for that doesn't make sense either.
43983 +
43984 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
43985 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
43986 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
43987 +---
43988 + arch/x86/crypto/poly1305_glue.c | 8 ++------
43989 + 1 file changed, 2 insertions(+), 6 deletions(-)
43990 +
43991 +diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
43992 +index 657363588e0c..79bb58737d52 100644
43993 +--- a/arch/x86/crypto/poly1305_glue.c
43994 ++++ b/arch/x86/crypto/poly1305_glue.c
43995 +@@ -123,13 +123,9 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
43996 + static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
43997 + const u32 nonce[4])
43998 + {
43999 +- struct poly1305_arch_internal *state = ctx;
44000 +-
44001 +- if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx) ||
44002 +- !state->is_base2_26 || !crypto_simd_usable()) {
44003 +- convert_to_base2_64(ctx);
44004 ++ if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx))
44005 + poly1305_emit_x86_64(ctx, mac, nonce);
44006 +- } else
44007 ++ else
44008 + poly1305_emit_avx(ctx, mac, nonce);
44009 + }
44010 +
44011 +--
44012 +cgit v1.2.3-4-ga26e
44013 +
44014 +
44015 +From 29107919adb09029eeade09543d2113c77af64d9 Mon Sep 17 00:00:00 2001
44016 +From: Ard Biesheuvel <ardb@××××××.org>
44017 +Date: Fri, 17 Jan 2020 17:43:18 +0100
44018 +Subject: crypto: arm/chacha - fix build failured when kernel mode NEON is
44019 + disabled
44020 +
44021 +commit 0bc81767c5bd9d005fae1099fb39eb3688370cb1 upstream.
44022 +
44023 +When the ARM accelerated ChaCha driver is built as part of a configuration
44024 +that has kernel mode NEON disabled, we expect the compiler to propagate
44025 +the build time constant expression IS_ENABLED(CONFIG_KERNEL_MODE_NEON) in
44026 +a way that eliminates all the cross-object references to the actual NEON
44027 +routines, which allows the chacha-neon-core.o object to be omitted from
44028 +the build entirely.
44029 +
44030 +Unfortunately, this fails to work as expected in some cases, and we may
44031 +end up with a build error such as
44032 +
44033 + chacha-glue.c:(.text+0xc0): undefined reference to `chacha_4block_xor_neon'
44034 +
44035 +caused by the fact that chacha_doneon() has not been eliminated from the
44036 +object code, even though it will never be called in practice.
44037 +
44038 +Let's fix this by adding some IS_ENABLED(CONFIG_KERNEL_MODE_NEON) tests
44039 +that are not strictly needed from a logical point of view, but should
44040 +help the compiler infer that the NEON code paths are unreachable in
44041 +those cases.
44042 +
44043 +Fixes: b36d8c09e710c71f ("crypto: arm/chacha - remove dependency on generic ...")
44044 +Reported-by: Russell King <linux@××××××××××××.uk>
44045 +Cc: Arnd Bergmann <arnd@×××××.de>
44046 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
44047 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
44048 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
44049 +---
44050 + arch/arm/crypto/chacha-glue.c | 4 ++--
44051 + 1 file changed, 2 insertions(+), 2 deletions(-)
44052 +
44053 +diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c
44054 +index 7bdf8823066d..893692ed12b7 100644
44055 +--- a/arch/arm/crypto/chacha-glue.c
44056 ++++ b/arch/arm/crypto/chacha-glue.c
44057 +@@ -115,7 +115,7 @@ static int chacha_stream_xor(struct skcipher_request *req,
44058 + if (nbytes < walk.total)
44059 + nbytes = round_down(nbytes, walk.stride);
44060 +
44061 +- if (!neon) {
44062 ++ if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) {
44063 + chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr,
44064 + nbytes, state, ctx->nrounds);
44065 + state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE);
44066 +@@ -159,7 +159,7 @@ static int do_xchacha(struct skcipher_request *req, bool neon)
44067 +
44068 + chacha_init_generic(state, ctx->key, req->iv);
44069 +
44070 +- if (!neon) {
44071 ++ if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) {
44072 + hchacha_block_arm(state, subctx.key, ctx->nrounds);
44073 + } else {
44074 + kernel_neon_begin();
44075 +--
44076 +cgit v1.2.3-4-ga26e
44077 +
44078 +
44079 +From a2b73b527aaab157a27b7f7e13d2e1db01a1f73e Mon Sep 17 00:00:00 2001
44080 +From: "Jason A. Donenfeld" <Jason@×××××.com>
44081 +Date: Fri, 17 Jan 2020 12:01:36 +0100
44082 +Subject: crypto: Kconfig - allow tests to be disabled when manager is disabled
44083 +
44084 +commit 2343d1529aff8b552589f622c23932035ed7a05d upstream.
44085 +
44086 +The library code uses CRYPTO_MANAGER_DISABLE_TESTS to conditionalize its
44087 +tests, but the library code can also exist without CRYPTO_MANAGER. That
44088 +means on minimal configs, the test code winds up being built with no way
44089 +to disable it.
44090 +
44091 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
44092 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
44093 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
44094 +---
44095 + crypto/Kconfig | 4 ----
44096 + 1 file changed, 4 deletions(-)
44097 +
44098 +diff --git a/crypto/Kconfig b/crypto/Kconfig
44099 +index b8b738bcc312..8fcf630471dc 100644
44100 +--- a/crypto/Kconfig
44101 ++++ b/crypto/Kconfig
44102 +@@ -136,8 +136,6 @@ config CRYPTO_USER
44103 + Userspace configuration for cryptographic instantiations such as
44104 + cbc(aes).
44105 +
44106 +-if CRYPTO_MANAGER2
44107 +-
44108 + config CRYPTO_MANAGER_DISABLE_TESTS
44109 + bool "Disable run-time self tests"
44110 + default y
44111 +@@ -155,8 +153,6 @@ config CRYPTO_MANAGER_EXTRA_TESTS
44112 + This is intended for developer use only, as these tests take much
44113 + longer to run than the normal self tests.
44114 +
44115 +-endif # if CRYPTO_MANAGER2
44116 +-
44117 + config CRYPTO_GF128MUL
44118 + tristate
44119 +
44120 +--
44121 +cgit v1.2.3-4-ga26e
44122 +
44123 +
44124 +From 32e48b6be46bffe564c445bb9fd471fba4cf3bf0 Mon Sep 17 00:00:00 2001
44125 +From: "Jason A. Donenfeld" <Jason@×××××.com>
44126 +Date: Thu, 6 Feb 2020 12:42:01 +0100
44127 +Subject: crypto: chacha20poly1305 - prevent integer overflow on large input
44128 +
44129 +commit c9cc0517bba9f0213f1e55172feceb99e5512daf upstream.
44130 +
44131 +This code assigns src_len (size_t) to sl (int), which causes problems
44132 +when src_len is very large. Probably nobody in the kernel should be
44133 +passing this much data to chacha20poly1305 all in one go anyway, so I
44134 +don't think we need to change the algorithm or introduce larger types
44135 +or anything. But we should at least error out early in this case and
44136 +print a warning so that we get reports if this does happen and can look
44137 +into why anybody is possibly passing it that much data or if they're
44138 +accidently passing -1 or similar.
44139 +
44140 +Fixes: d95312a3ccc0 ("crypto: lib/chacha20poly1305 - reimplement crypt_from_sg() routine")
44141 +Cc: Ard Biesheuvel <ardb@××××××.org>
44142 +Cc: stable@×××××××××××.org # 5.5+
44143 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
44144 +Acked-by: Ard Biesheuvel <ardb@××××××.org>
44145 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
44146 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
44147 +---
44148 + lib/crypto/chacha20poly1305.c | 3 +++
44149 + 1 file changed, 3 insertions(+)
44150 +
44151 +diff --git a/lib/crypto/chacha20poly1305.c b/lib/crypto/chacha20poly1305.c
44152 +index 6d83cafebc69..ad0699ce702f 100644
44153 +--- a/lib/crypto/chacha20poly1305.c
44154 ++++ b/lib/crypto/chacha20poly1305.c
44155 +@@ -235,6 +235,9 @@ bool chacha20poly1305_crypt_sg_inplace(struct scatterlist *src,
44156 + __le64 lens[2];
44157 + } b __aligned(16);
44158 +
44159 ++ if (WARN_ON(src_len > INT_MAX))
44160 ++ return false;
44161 ++
44162 + chacha_load_key(b.k, key);
44163 +
44164 + b.iv[0] = 0;
44165 +--
44166 +cgit v1.2.3-4-ga26e
44167 +
44168 +
44169 +From ba30e02313fcaf2d3652d919743844f38ef2b3b0 Mon Sep 17 00:00:00 2001
44170 +From: "Jason A. Donenfeld" <Jason@×××××.com>
44171 +Date: Sun, 1 Mar 2020 22:52:35 +0800
44172 +Subject: crypto: x86/curve25519 - support assemblers with no adx support
44173 +
44174 +commit 1579f1bc3b753d17a44de3457d5c6f4a5b14c752 upstream.
44175 +
44176 +Some older version of GAS do not support the ADX instructions, similarly
44177 +to how they also don't support AVX and such. This commit adds the same
44178 +build-time detection mechanisms we use for AVX and others for ADX, and
44179 +then makes sure that the curve25519 library dispatcher calls the right
44180 +functions.
44181 +
44182 +Reported-by: Willy Tarreau <w@×××.eu>
44183 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
44184 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
44185 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
44186 +---
44187 + arch/x86/Makefile | 5 +++--
44188 + arch/x86/crypto/Makefile | 7 ++++++-
44189 + include/crypto/curve25519.h | 6 ++++--
44190 + 3 files changed, 13 insertions(+), 5 deletions(-)
44191 +
44192 +diff --git a/arch/x86/Makefile b/arch/x86/Makefile
44193 +index 94df0868804b..513a55562d75 100644
44194 +--- a/arch/x86/Makefile
44195 ++++ b/arch/x86/Makefile
44196 +@@ -194,9 +194,10 @@ avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
44197 + avx512_instr :=$(call as-instr,vpmovm2b %k1$(comma)%zmm5,-DCONFIG_AS_AVX512=1)
44198 + sha1_ni_instr :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA1_NI=1)
44199 + sha256_ni_instr :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA256_NI=1)
44200 ++adx_instr := $(call as-instr,adox %r10$(comma)%r10,-DCONFIG_AS_ADX=1)
44201 +
44202 +-KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr)
44203 +-KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr)
44204 ++KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr) $(adx_instr)
44205 ++KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr) $(avx512_instr) $(sha1_ni_instr) $(sha256_ni_instr) $(adx_instr)
44206 +
44207 + KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE)
44208 +
44209 +diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
44210 +index b69e00bf20b8..8c2e9eadee8a 100644
44211 +--- a/arch/x86/crypto/Makefile
44212 ++++ b/arch/x86/crypto/Makefile
44213 +@@ -11,6 +11,7 @@ avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
44214 + avx512_supported :=$(call as-instr,vpmovm2b %k1$(comma)%zmm5,yes,no)
44215 + sha1_ni_supported :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,yes,no)
44216 + sha256_ni_supported :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,yes,no)
44217 ++adx_supported := $(call as-instr,adox %r10$(comma)%r10,yes,no)
44218 +
44219 + obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
44220 +
44221 +@@ -39,7 +40,11 @@ obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o
44222 +
44223 + obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o
44224 + obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o
44225 +-obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
44226 ++
44227 ++# These modules require the assembler to support ADX.
44228 ++ifeq ($(adx_supported),yes)
44229 ++ obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
44230 ++endif
44231 +
44232 + # These modules require assembler to support AVX.
44233 + ifeq ($(avx_supported),yes)
44234 +diff --git a/include/crypto/curve25519.h b/include/crypto/curve25519.h
44235 +index 4e6dc840b159..9ecb3c1f0f15 100644
44236 +--- a/include/crypto/curve25519.h
44237 ++++ b/include/crypto/curve25519.h
44238 +@@ -33,7 +33,8 @@ bool __must_check curve25519(u8 mypublic[CURVE25519_KEY_SIZE],
44239 + const u8 secret[CURVE25519_KEY_SIZE],
44240 + const u8 basepoint[CURVE25519_KEY_SIZE])
44241 + {
44242 +- if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519))
44243 ++ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519) &&
44244 ++ (!IS_ENABLED(CONFIG_CRYPTO_CURVE25519_X86) || IS_ENABLED(CONFIG_AS_ADX)))
44245 + curve25519_arch(mypublic, secret, basepoint);
44246 + else
44247 + curve25519_generic(mypublic, secret, basepoint);
44248 +@@ -49,7 +50,8 @@ __must_check curve25519_generate_public(u8 pub[CURVE25519_KEY_SIZE],
44249 + CURVE25519_KEY_SIZE)))
44250 + return false;
44251 +
44252 +- if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519))
44253 ++ if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519) &&
44254 ++ (!IS_ENABLED(CONFIG_CRYPTO_CURVE25519_X86) || IS_ENABLED(CONFIG_AS_ADX)))
44255 + curve25519_base_arch(pub, secret);
44256 + else
44257 + curve25519_generic(pub, secret, curve25519_base_point);
44258 +--
44259 +cgit v1.2.3-4-ga26e
44260 +
44261 +
44262 +From 91165aa654ca65a40c205b1372efaabdc1344a48 Mon Sep 17 00:00:00 2001
44263 +From: "Jason A. Donenfeld" <Jason@×××××.com>
44264 +Date: Wed, 18 Mar 2020 20:27:32 -0600
44265 +Subject: crypto: arm64/chacha - correctly walk through blocks
44266 +
44267 +commit c8cfcb78c65877313cda7bcbace624d3dbd1f3b3 upstream.
44268 +
44269 +Prior, passing in chunks of 2, 3, or 4, followed by any additional
44270 +chunks would result in the chacha state counter getting out of sync,
44271 +resulting in incorrect encryption/decryption, which is a pretty nasty
44272 +crypto vuln: "why do images look weird on webpages?" WireGuard users
44273 +never experienced this prior, because we have always, out of tree, used
44274 +a different crypto library, until the recent Frankenzinc addition. This
44275 +commit fixes the issue by advancing the pointers and state counter by
44276 +the actual size processed. It also fixes up a bug in the (optional,
44277 +costly) stride test that prevented it from running on arm64.
44278 +
44279 +Fixes: b3aad5bad26a ("crypto: arm64/chacha - expose arm64 ChaCha routine as library function")
44280 +Reported-and-tested-by: Emil Renner Berthing <kernel@×××××.dk>
44281 +Cc: Ard Biesheuvel <ardb@××××××.org>
44282 +Cc: stable@×××××××××××.org # v5.5+
44283 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
44284 +Reviewed-by: Eric Biggers <ebiggers@××××××.com>
44285 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
44286 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
44287 +---
44288 + arch/arm64/crypto/chacha-neon-glue.c | 8 ++++----
44289 + lib/crypto/chacha20poly1305-selftest.c | 11 ++++++++---
44290 + 2 files changed, 12 insertions(+), 7 deletions(-)
44291 +
44292 +diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
44293 +index 71c11d2e9fcd..218943612261 100644
44294 +--- a/arch/arm64/crypto/chacha-neon-glue.c
44295 ++++ b/arch/arm64/crypto/chacha-neon-glue.c
44296 +@@ -55,10 +55,10 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
44297 + break;
44298 + }
44299 + chacha_4block_xor_neon(state, dst, src, nrounds, l);
44300 +- bytes -= CHACHA_BLOCK_SIZE * 5;
44301 +- src += CHACHA_BLOCK_SIZE * 5;
44302 +- dst += CHACHA_BLOCK_SIZE * 5;
44303 +- state[12] += 5;
44304 ++ bytes -= l;
44305 ++ src += l;
44306 ++ dst += l;
44307 ++ state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
44308 + }
44309 + }
44310 +
44311 +diff --git a/lib/crypto/chacha20poly1305-selftest.c b/lib/crypto/chacha20poly1305-selftest.c
44312 +index c391a91364e9..fa43deda2660 100644
44313 +--- a/lib/crypto/chacha20poly1305-selftest.c
44314 ++++ b/lib/crypto/chacha20poly1305-selftest.c
44315 +@@ -9028,10 +9028,15 @@ bool __init chacha20poly1305_selftest(void)
44316 + && total_len <= 1 << 10; ++total_len) {
44317 + for (i = 0; i <= total_len; ++i) {
44318 + for (j = i; j <= total_len; ++j) {
44319 ++ k = 0;
44320 + sg_init_table(sg_src, 3);
44321 +- sg_set_buf(&sg_src[0], input, i);
44322 +- sg_set_buf(&sg_src[1], input + i, j - i);
44323 +- sg_set_buf(&sg_src[2], input + j, total_len - j);
44324 ++ if (i)
44325 ++ sg_set_buf(&sg_src[k++], input, i);
44326 ++ if (j - i)
44327 ++ sg_set_buf(&sg_src[k++], input + i, j - i);
44328 ++ if (total_len - j)
44329 ++ sg_set_buf(&sg_src[k++], input + j, total_len - j);
44330 ++ sg_init_marker(sg_src, k);
44331 + memset(computed_output, 0, total_len);
44332 + memset(input, 0, total_len);
44333 +
44334 +--
44335 +cgit v1.2.3-4-ga26e
44336 +
44337 +
44338 +From 51f991728a3dee9beacf8bfa0817f0a9700c1e28 Mon Sep 17 00:00:00 2001
44339 +From: "Jason A. Donenfeld" <Jason@×××××.com>
44340 +Date: Mon, 20 Jan 2020 18:18:15 +0100
44341 +Subject: crypto: x86/curve25519 - replace with formally verified
44342 + implementation
44343 +
44344 +commit 07b586fe06625b0b610dc3d3a969c51913d143d4 upstream.
44345 +
44346 +This comes from INRIA's HACL*/Vale. It implements the same algorithm and
44347 +implementation strategy as the code it replaces, only this code has been
44348 +formally verified, sans the base point multiplication, which uses code
44349 +similar to prior, only it uses the formally verified field arithmetic
44350 +alongside reproducable ladder generation steps. This doesn't have a
44351 +pure-bmi2 version, which means haswell no longer benefits, but the
44352 +increased (doubled) code complexity is not worth it for a single
44353 +generation of chips that's already old.
44354 +
44355 +Performance-wise, this is around 1% slower on older microarchitectures,
44356 +and slightly faster on newer microarchitectures, mainly 10nm ones or
44357 +backports of 10nm to 14nm. This implementation is "everest" below:
44358 +
44359 +Xeon E5-2680 v4 (Broadwell)
44360 +
44361 + armfazh: 133340 cycles per call
44362 + everest: 133436 cycles per call
44363 +
44364 +Xeon Gold 5120 (Sky Lake Server)
44365 +
44366 + armfazh: 112636 cycles per call
44367 + everest: 113906 cycles per call
44368 +
44369 +Core i5-6300U (Sky Lake Client)
44370 +
44371 + armfazh: 116810 cycles per call
44372 + everest: 117916 cycles per call
44373 +
44374 +Core i7-7600U (Kaby Lake)
44375 +
44376 + armfazh: 119523 cycles per call
44377 + everest: 119040 cycles per call
44378 +
44379 +Core i7-8750H (Coffee Lake)
44380 +
44381 + armfazh: 113914 cycles per call
44382 + everest: 113650 cycles per call
44383 +
44384 +Core i9-9880H (Coffee Lake Refresh)
44385 +
44386 + armfazh: 112616 cycles per call
44387 + everest: 114082 cycles per call
44388 +
44389 +Core i3-8121U (Cannon Lake)
44390 +
44391 + armfazh: 113202 cycles per call
44392 + everest: 111382 cycles per call
44393 +
44394 +Core i7-8265U (Whiskey Lake)
44395 +
44396 + armfazh: 127307 cycles per call
44397 + everest: 127697 cycles per call
44398 +
44399 +Core i7-8550U (Kaby Lake Refresh)
44400 +
44401 + armfazh: 127522 cycles per call
44402 + everest: 127083 cycles per call
44403 +
44404 +Xeon Platinum 8275CL (Cascade Lake)
44405 +
44406 + armfazh: 114380 cycles per call
44407 + everest: 114656 cycles per call
44408 +
44409 +Achieving these kind of results with formally verified code is quite
44410 +remarkable, especialy considering that performance is favorable for
44411 +newer chips.
44412 +
44413 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
44414 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
44415 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
44416 +---
44417 + arch/x86/crypto/curve25519-x86_64.c | 3546 +++++++++++++----------------------
44418 + 1 file changed, 1292 insertions(+), 2254 deletions(-)
44419 +
44420 +diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c
44421 +index eec7d2d24239..e4e58b8e9afe 100644
44422 +--- a/arch/x86/crypto/curve25519-x86_64.c
44423 ++++ b/arch/x86/crypto/curve25519-x86_64.c
44424 +@@ -1,8 +1,7 @@
44425 +-// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
44426 ++// SPDX-License-Identifier: GPL-2.0 OR MIT
44427 + /*
44428 +- * Copyright (c) 2017 Armando Faz <armfazh@××××××××××.br>. All Rights Reserved.
44429 +- * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
44430 +- * Copyright (C) 2018 Samuel Neves <sneves@××××××.pt>. All Rights Reserved.
44431 ++ * Copyright (C) 2020 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
44432 ++ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
44433 + */
44434 +
44435 + #include <crypto/curve25519.h>
44436 +@@ -16,2337 +15,1378 @@
44437 + #include <asm/cpufeature.h>
44438 + #include <asm/processor.h>
44439 +
44440 +-static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2);
44441 +-static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_adx);
44442 +-
44443 +-enum { NUM_WORDS_ELTFP25519 = 4 };
44444 +-typedef __aligned(32) u64 eltfp25519_1w[NUM_WORDS_ELTFP25519];
44445 +-typedef __aligned(32) u64 eltfp25519_1w_buffer[2 * NUM_WORDS_ELTFP25519];
44446 +-
44447 +-#define mul_eltfp25519_1w_adx(c, a, b) do { \
44448 +- mul_256x256_integer_adx(m.buffer, a, b); \
44449 +- red_eltfp25519_1w_adx(c, m.buffer); \
44450 +-} while (0)
44451 +-
44452 +-#define mul_eltfp25519_1w_bmi2(c, a, b) do { \
44453 +- mul_256x256_integer_bmi2(m.buffer, a, b); \
44454 +- red_eltfp25519_1w_bmi2(c, m.buffer); \
44455 +-} while (0)
44456 +-
44457 +-#define sqr_eltfp25519_1w_adx(a) do { \
44458 +- sqr_256x256_integer_adx(m.buffer, a); \
44459 +- red_eltfp25519_1w_adx(a, m.buffer); \
44460 +-} while (0)
44461 +-
44462 +-#define sqr_eltfp25519_1w_bmi2(a) do { \
44463 +- sqr_256x256_integer_bmi2(m.buffer, a); \
44464 +- red_eltfp25519_1w_bmi2(a, m.buffer); \
44465 +-} while (0)
44466 +-
44467 +-#define mul_eltfp25519_2w_adx(c, a, b) do { \
44468 +- mul2_256x256_integer_adx(m.buffer, a, b); \
44469 +- red_eltfp25519_2w_adx(c, m.buffer); \
44470 +-} while (0)
44471 +-
44472 +-#define mul_eltfp25519_2w_bmi2(c, a, b) do { \
44473 +- mul2_256x256_integer_bmi2(m.buffer, a, b); \
44474 +- red_eltfp25519_2w_bmi2(c, m.buffer); \
44475 +-} while (0)
44476 +-
44477 +-#define sqr_eltfp25519_2w_adx(a) do { \
44478 +- sqr2_256x256_integer_adx(m.buffer, a); \
44479 +- red_eltfp25519_2w_adx(a, m.buffer); \
44480 +-} while (0)
44481 +-
44482 +-#define sqr_eltfp25519_2w_bmi2(a) do { \
44483 +- sqr2_256x256_integer_bmi2(m.buffer, a); \
44484 +- red_eltfp25519_2w_bmi2(a, m.buffer); \
44485 +-} while (0)
44486 +-
44487 +-#define sqrn_eltfp25519_1w_adx(a, times) do { \
44488 +- int ____counter = (times); \
44489 +- while (____counter-- > 0) \
44490 +- sqr_eltfp25519_1w_adx(a); \
44491 +-} while (0)
44492 +-
44493 +-#define sqrn_eltfp25519_1w_bmi2(a, times) do { \
44494 +- int ____counter = (times); \
44495 +- while (____counter-- > 0) \
44496 +- sqr_eltfp25519_1w_bmi2(a); \
44497 +-} while (0)
44498 +-
44499 +-#define copy_eltfp25519_1w(C, A) do { \
44500 +- (C)[0] = (A)[0]; \
44501 +- (C)[1] = (A)[1]; \
44502 +- (C)[2] = (A)[2]; \
44503 +- (C)[3] = (A)[3]; \
44504 +-} while (0)
44505 +-
44506 +-#define setzero_eltfp25519_1w(C) do { \
44507 +- (C)[0] = 0; \
44508 +- (C)[1] = 0; \
44509 +- (C)[2] = 0; \
44510 +- (C)[3] = 0; \
44511 +-} while (0)
44512 +-
44513 +-__aligned(32) static const u64 table_ladder_8k[252 * NUM_WORDS_ELTFP25519] = {
44514 +- /* 1 */ 0xfffffffffffffff3UL, 0xffffffffffffffffUL,
44515 +- 0xffffffffffffffffUL, 0x5fffffffffffffffUL,
44516 +- /* 2 */ 0x6b8220f416aafe96UL, 0x82ebeb2b4f566a34UL,
44517 +- 0xd5a9a5b075a5950fUL, 0x5142b2cf4b2488f4UL,
44518 +- /* 3 */ 0x6aaebc750069680cUL, 0x89cf7820a0f99c41UL,
44519 +- 0x2a58d9183b56d0f4UL, 0x4b5aca80e36011a4UL,
44520 +- /* 4 */ 0x329132348c29745dUL, 0xf4a2e616e1642fd7UL,
44521 +- 0x1e45bb03ff67bc34UL, 0x306912d0f42a9b4aUL,
44522 +- /* 5 */ 0xff886507e6af7154UL, 0x04f50e13dfeec82fUL,
44523 +- 0xaa512fe82abab5ceUL, 0x174e251a68d5f222UL,
44524 +- /* 6 */ 0xcf96700d82028898UL, 0x1743e3370a2c02c5UL,
44525 +- 0x379eec98b4e86eaaUL, 0x0c59888a51e0482eUL,
44526 +- /* 7 */ 0xfbcbf1d699b5d189UL, 0xacaef0d58e9fdc84UL,
44527 +- 0xc1c20d06231f7614UL, 0x2938218da274f972UL,
44528 +- /* 8 */ 0xf6af49beff1d7f18UL, 0xcc541c22387ac9c2UL,
44529 +- 0x96fcc9ef4015c56bUL, 0x69c1627c690913a9UL,
44530 +- /* 9 */ 0x7a86fd2f4733db0eUL, 0xfdb8c4f29e087de9UL,
44531 +- 0x095e4b1a8ea2a229UL, 0x1ad7a7c829b37a79UL,
44532 +- /* 10 */ 0x342d89cad17ea0c0UL, 0x67bedda6cced2051UL,
44533 +- 0x19ca31bf2bb42f74UL, 0x3df7b4c84980acbbUL,
44534 +- /* 11 */ 0xa8c6444dc80ad883UL, 0xb91e440366e3ab85UL,
44535 +- 0xc215cda00164f6d8UL, 0x3d867c6ef247e668UL,
44536 +- /* 12 */ 0xc7dd582bcc3e658cUL, 0xfd2c4748ee0e5528UL,
44537 +- 0xa0fd9b95cc9f4f71UL, 0x7529d871b0675ddfUL,
44538 +- /* 13 */ 0xb8f568b42d3cbd78UL, 0x1233011b91f3da82UL,
44539 +- 0x2dce6ccd4a7c3b62UL, 0x75e7fc8e9e498603UL,
44540 +- /* 14 */ 0x2f4f13f1fcd0b6ecUL, 0xf1a8ca1f29ff7a45UL,
44541 +- 0xc249c1a72981e29bUL, 0x6ebe0dbb8c83b56aUL,
44542 +- /* 15 */ 0x7114fa8d170bb222UL, 0x65a2dcd5bf93935fUL,
44543 +- 0xbdc41f68b59c979aUL, 0x2f0eef79a2ce9289UL,
44544 +- /* 16 */ 0x42ecbf0c083c37ceUL, 0x2930bc09ec496322UL,
44545 +- 0xf294b0c19cfeac0dUL, 0x3780aa4bedfabb80UL,
44546 +- /* 17 */ 0x56c17d3e7cead929UL, 0xe7cb4beb2e5722c5UL,
44547 +- 0x0ce931732dbfe15aUL, 0x41b883c7621052f8UL,
44548 +- /* 18 */ 0xdbf75ca0c3d25350UL, 0x2936be086eb1e351UL,
44549 +- 0xc936e03cb4a9b212UL, 0x1d45bf82322225aaUL,
44550 +- /* 19 */ 0xe81ab1036a024cc5UL, 0xe212201c304c9a72UL,
44551 +- 0xc5d73fba6832b1fcUL, 0x20ffdb5a4d839581UL,
44552 +- /* 20 */ 0xa283d367be5d0fadUL, 0x6c2b25ca8b164475UL,
44553 +- 0x9d4935467caaf22eUL, 0x5166408eee85ff49UL,
44554 +- /* 21 */ 0x3c67baa2fab4e361UL, 0xb3e433c67ef35cefUL,
44555 +- 0x5259729241159b1cUL, 0x6a621892d5b0ab33UL,
44556 +- /* 22 */ 0x20b74a387555cdcbUL, 0x532aa10e1208923fUL,
44557 +- 0xeaa17b7762281dd1UL, 0x61ab3443f05c44bfUL,
44558 +- /* 23 */ 0x257a6c422324def8UL, 0x131c6c1017e3cf7fUL,
44559 +- 0x23758739f630a257UL, 0x295a407a01a78580UL,
44560 +- /* 24 */ 0xf8c443246d5da8d9UL, 0x19d775450c52fa5dUL,
44561 +- 0x2afcfc92731bf83dUL, 0x7d10c8e81b2b4700UL,
44562 +- /* 25 */ 0xc8e0271f70baa20bUL, 0x993748867ca63957UL,
44563 +- 0x5412efb3cb7ed4bbUL, 0x3196d36173e62975UL,
44564 +- /* 26 */ 0xde5bcad141c7dffcUL, 0x47cc8cd2b395c848UL,
44565 +- 0xa34cd942e11af3cbUL, 0x0256dbf2d04ecec2UL,
44566 +- /* 27 */ 0x875ab7e94b0e667fUL, 0xcad4dd83c0850d10UL,
44567 +- 0x47f12e8f4e72c79fUL, 0x5f1a87bb8c85b19bUL,
44568 +- /* 28 */ 0x7ae9d0b6437f51b8UL, 0x12c7ce5518879065UL,
44569 +- 0x2ade09fe5cf77aeeUL, 0x23a05a2f7d2c5627UL,
44570 +- /* 29 */ 0x5908e128f17c169aUL, 0xf77498dd8ad0852dUL,
44571 +- 0x74b4c4ceab102f64UL, 0x183abadd10139845UL,
44572 +- /* 30 */ 0xb165ba8daa92aaacUL, 0xd5c5ef9599386705UL,
44573 +- 0xbe2f8f0cf8fc40d1UL, 0x2701e635ee204514UL,
44574 +- /* 31 */ 0x629fa80020156514UL, 0xf223868764a8c1ceUL,
44575 +- 0x5b894fff0b3f060eUL, 0x60d9944cf708a3faUL,
44576 +- /* 32 */ 0xaeea001a1c7a201fUL, 0xebf16a633ee2ce63UL,
44577 +- 0x6f7709594c7a07e1UL, 0x79b958150d0208cbUL,
44578 +- /* 33 */ 0x24b55e5301d410e7UL, 0xe3a34edff3fdc84dUL,
44579 +- 0xd88768e4904032d8UL, 0x131384427b3aaeecUL,
44580 +- /* 34 */ 0x8405e51286234f14UL, 0x14dc4739adb4c529UL,
44581 +- 0xb8a2b5b250634ffdUL, 0x2fe2a94ad8a7ff93UL,
44582 +- /* 35 */ 0xec5c57efe843faddUL, 0x2843ce40f0bb9918UL,
44583 +- 0xa4b561d6cf3d6305UL, 0x743629bde8fb777eUL,
44584 +- /* 36 */ 0x343edd46bbaf738fUL, 0xed981828b101a651UL,
44585 +- 0xa401760b882c797aUL, 0x1fc223e28dc88730UL,
44586 +- /* 37 */ 0x48604e91fc0fba0eUL, 0xb637f78f052c6fa4UL,
44587 +- 0x91ccac3d09e9239cUL, 0x23f7eed4437a687cUL,
44588 +- /* 38 */ 0x5173b1118d9bd800UL, 0x29d641b63189d4a7UL,
44589 +- 0xfdbf177988bbc586UL, 0x2959894fcad81df5UL,
44590 +- /* 39 */ 0xaebc8ef3b4bbc899UL, 0x4148995ab26992b9UL,
44591 +- 0x24e20b0134f92cfbUL, 0x40d158894a05dee8UL,
44592 +- /* 40 */ 0x46b00b1185af76f6UL, 0x26bac77873187a79UL,
44593 +- 0x3dc0bf95ab8fff5fUL, 0x2a608bd8945524d7UL,
44594 +- /* 41 */ 0x26449588bd446302UL, 0x7c4bc21c0388439cUL,
44595 +- 0x8e98a4f383bd11b2UL, 0x26218d7bc9d876b9UL,
44596 +- /* 42 */ 0xe3081542997c178aUL, 0x3c2d29a86fb6606fUL,
44597 +- 0x5c217736fa279374UL, 0x7dde05734afeb1faUL,
44598 +- /* 43 */ 0x3bf10e3906d42babUL, 0xe4f7803e1980649cUL,
44599 +- 0xe6053bf89595bf7aUL, 0x394faf38da245530UL,
44600 +- /* 44 */ 0x7a8efb58896928f4UL, 0xfbc778e9cc6a113cUL,
44601 +- 0x72670ce330af596fUL, 0x48f222a81d3d6cf7UL,
44602 +- /* 45 */ 0xf01fce410d72caa7UL, 0x5a20ecc7213b5595UL,
44603 +- 0x7bc21165c1fa1483UL, 0x07f89ae31da8a741UL,
44604 +- /* 46 */ 0x05d2c2b4c6830ff9UL, 0xd43e330fc6316293UL,
44605 +- 0xa5a5590a96d3a904UL, 0x705edb91a65333b6UL,
44606 +- /* 47 */ 0x048ee15e0bb9a5f7UL, 0x3240cfca9e0aaf5dUL,
44607 +- 0x8f4b71ceedc4a40bUL, 0x621c0da3de544a6dUL,
44608 +- /* 48 */ 0x92872836a08c4091UL, 0xce8375b010c91445UL,
44609 +- 0x8a72eb524f276394UL, 0x2667fcfa7ec83635UL,
44610 +- /* 49 */ 0x7f4c173345e8752aUL, 0x061b47feee7079a5UL,
44611 +- 0x25dd9afa9f86ff34UL, 0x3780cef5425dc89cUL,
44612 +- /* 50 */ 0x1a46035a513bb4e9UL, 0x3e1ef379ac575adaUL,
44613 +- 0xc78c5f1c5fa24b50UL, 0x321a967634fd9f22UL,
44614 +- /* 51 */ 0x946707b8826e27faUL, 0x3dca84d64c506fd0UL,
44615 +- 0xc189218075e91436UL, 0x6d9284169b3b8484UL,
44616 +- /* 52 */ 0x3a67e840383f2ddfUL, 0x33eec9a30c4f9b75UL,
44617 +- 0x3ec7c86fa783ef47UL, 0x26ec449fbac9fbc4UL,
44618 +- /* 53 */ 0x5c0f38cba09b9e7dUL, 0x81168cc762a3478cUL,
44619 +- 0x3e23b0d306fc121cUL, 0x5a238aa0a5efdcddUL,
44620 +- /* 54 */ 0x1ba26121c4ea43ffUL, 0x36f8c77f7c8832b5UL,
44621 +- 0x88fbea0b0adcf99aUL, 0x5ca9938ec25bebf9UL,
44622 +- /* 55 */ 0xd5436a5e51fccda0UL, 0x1dbc4797c2cd893bUL,
44623 +- 0x19346a65d3224a08UL, 0x0f5034e49b9af466UL,
44624 +- /* 56 */ 0xf23c3967a1e0b96eUL, 0xe58b08fa867a4d88UL,
44625 +- 0xfb2fabc6a7341679UL, 0x2a75381eb6026946UL,
44626 +- /* 57 */ 0xc80a3be4c19420acUL, 0x66b1f6c681f2b6dcUL,
44627 +- 0x7cf7036761e93388UL, 0x25abbbd8a660a4c4UL,
44628 +- /* 58 */ 0x91ea12ba14fd5198UL, 0x684950fc4a3cffa9UL,
44629 +- 0xf826842130f5ad28UL, 0x3ea988f75301a441UL,
44630 +- /* 59 */ 0xc978109a695f8c6fUL, 0x1746eb4a0530c3f3UL,
44631 +- 0x444d6d77b4459995UL, 0x75952b8c054e5cc7UL,
44632 +- /* 60 */ 0xa3703f7915f4d6aaUL, 0x66c346202f2647d8UL,
44633 +- 0xd01469df811d644bUL, 0x77fea47d81a5d71fUL,
44634 +- /* 61 */ 0xc5e9529ef57ca381UL, 0x6eeeb4b9ce2f881aUL,
44635 +- 0xb6e91a28e8009bd6UL, 0x4b80be3e9afc3fecUL,
44636 +- /* 62 */ 0x7e3773c526aed2c5UL, 0x1b4afcb453c9a49dUL,
44637 +- 0xa920bdd7baffb24dUL, 0x7c54699f122d400eUL,
44638 +- /* 63 */ 0xef46c8e14fa94bc8UL, 0xe0b074ce2952ed5eUL,
44639 +- 0xbea450e1dbd885d5UL, 0x61b68649320f712cUL,
44640 +- /* 64 */ 0x8a485f7309ccbdd1UL, 0xbd06320d7d4d1a2dUL,
44641 +- 0x25232973322dbef4UL, 0x445dc4758c17f770UL,
44642 +- /* 65 */ 0xdb0434177cc8933cUL, 0xed6fe82175ea059fUL,
44643 +- 0x1efebefdc053db34UL, 0x4adbe867c65daf99UL,
44644 +- /* 66 */ 0x3acd71a2a90609dfUL, 0xe5e991856dd04050UL,
44645 +- 0x1ec69b688157c23cUL, 0x697427f6885cfe4dUL,
44646 +- /* 67 */ 0xd7be7b9b65e1a851UL, 0xa03d28d522c536ddUL,
44647 +- 0x28399d658fd2b645UL, 0x49e5b7e17c2641e1UL,
44648 +- /* 68 */ 0x6f8c3a98700457a4UL, 0x5078f0a25ebb6778UL,
44649 +- 0xd13c3ccbc382960fUL, 0x2e003258a7df84b1UL,
44650 +- /* 69 */ 0x8ad1f39be6296a1cUL, 0xc1eeaa652a5fbfb2UL,
44651 +- 0x33ee0673fd26f3cbUL, 0x59256173a69d2cccUL,
44652 +- /* 70 */ 0x41ea07aa4e18fc41UL, 0xd9fc19527c87a51eUL,
44653 +- 0xbdaacb805831ca6fUL, 0x445b652dc916694fUL,
44654 +- /* 71 */ 0xce92a3a7f2172315UL, 0x1edc282de11b9964UL,
44655 +- 0xa1823aafe04c314aUL, 0x790a2d94437cf586UL,
44656 +- /* 72 */ 0x71c447fb93f6e009UL, 0x8922a56722845276UL,
44657 +- 0xbf70903b204f5169UL, 0x2f7a89891ba319feUL,
44658 +- /* 73 */ 0x02a08eb577e2140cUL, 0xed9a4ed4427bdcf4UL,
44659 +- 0x5253ec44e4323cd1UL, 0x3e88363c14e9355bUL,
44660 +- /* 74 */ 0xaa66c14277110b8cUL, 0x1ae0391610a23390UL,
44661 +- 0x2030bd12c93fc2a2UL, 0x3ee141579555c7abUL,
44662 +- /* 75 */ 0x9214de3a6d6e7d41UL, 0x3ccdd88607f17efeUL,
44663 +- 0x674f1288f8e11217UL, 0x5682250f329f93d0UL,
44664 +- /* 76 */ 0x6cf00b136d2e396eUL, 0x6e4cf86f1014debfUL,
44665 +- 0x5930b1b5bfcc4e83UL, 0x047069b48aba16b6UL,
44666 +- /* 77 */ 0x0d4ce4ab69b20793UL, 0xb24db91a97d0fb9eUL,
44667 +- 0xcdfa50f54e00d01dUL, 0x221b1085368bddb5UL,
44668 +- /* 78 */ 0xe7e59468b1e3d8d2UL, 0x53c56563bd122f93UL,
44669 +- 0xeee8a903e0663f09UL, 0x61efa662cbbe3d42UL,
44670 +- /* 79 */ 0x2cf8ddddde6eab2aUL, 0x9bf80ad51435f231UL,
44671 +- 0x5deadacec9f04973UL, 0x29275b5d41d29b27UL,
44672 +- /* 80 */ 0xcfde0f0895ebf14fUL, 0xb9aab96b054905a7UL,
44673 +- 0xcae80dd9a1c420fdUL, 0x0a63bf2f1673bbc7UL,
44674 +- /* 81 */ 0x092f6e11958fbc8cUL, 0x672a81e804822fadUL,
44675 +- 0xcac8351560d52517UL, 0x6f3f7722c8f192f8UL,
44676 +- /* 82 */ 0xf8ba90ccc2e894b7UL, 0x2c7557a438ff9f0dUL,
44677 +- 0x894d1d855ae52359UL, 0x68e122157b743d69UL,
44678 +- /* 83 */ 0xd87e5570cfb919f3UL, 0x3f2cdecd95798db9UL,
44679 +- 0x2121154710c0a2ceUL, 0x3c66a115246dc5b2UL,
44680 +- /* 84 */ 0xcbedc562294ecb72UL, 0xba7143c36a280b16UL,
44681 +- 0x9610c2efd4078b67UL, 0x6144735d946a4b1eUL,
44682 +- /* 85 */ 0x536f111ed75b3350UL, 0x0211db8c2041d81bUL,
44683 +- 0xf93cb1000e10413cUL, 0x149dfd3c039e8876UL,
44684 +- /* 86 */ 0xd479dde46b63155bUL, 0xb66e15e93c837976UL,
44685 +- 0xdafde43b1f13e038UL, 0x5fafda1a2e4b0b35UL,
44686 +- /* 87 */ 0x3600bbdf17197581UL, 0x3972050bbe3cd2c2UL,
44687 +- 0x5938906dbdd5be86UL, 0x34fce5e43f9b860fUL,
44688 +- /* 88 */ 0x75a8a4cd42d14d02UL, 0x828dabc53441df65UL,
44689 +- 0x33dcabedd2e131d3UL, 0x3ebad76fb814d25fUL,
44690 +- /* 89 */ 0xd4906f566f70e10fUL, 0x5d12f7aa51690f5aUL,
44691 +- 0x45adb16e76cefcf2UL, 0x01f768aead232999UL,
44692 +- /* 90 */ 0x2b6cc77b6248febdUL, 0x3cd30628ec3aaffdUL,
44693 +- 0xce1c0b80d4ef486aUL, 0x4c3bff2ea6f66c23UL,
44694 +- /* 91 */ 0x3f2ec4094aeaeb5fUL, 0x61b19b286e372ca7UL,
44695 +- 0x5eefa966de2a701dUL, 0x23b20565de55e3efUL,
44696 +- /* 92 */ 0xe301ca5279d58557UL, 0x07b2d4ce27c2874fUL,
44697 +- 0xa532cd8a9dcf1d67UL, 0x2a52fee23f2bff56UL,
44698 +- /* 93 */ 0x8624efb37cd8663dUL, 0xbbc7ac20ffbd7594UL,
44699 +- 0x57b85e9c82d37445UL, 0x7b3052cb86a6ec66UL,
44700 +- /* 94 */ 0x3482f0ad2525e91eUL, 0x2cb68043d28edca0UL,
44701 +- 0xaf4f6d052e1b003aUL, 0x185f8c2529781b0aUL,
44702 +- /* 95 */ 0xaa41de5bd80ce0d6UL, 0x9407b2416853e9d6UL,
44703 +- 0x563ec36e357f4c3aUL, 0x4cc4b8dd0e297bceUL,
44704 +- /* 96 */ 0xa2fc1a52ffb8730eUL, 0x1811f16e67058e37UL,
44705 +- 0x10f9a366cddf4ee1UL, 0x72f4a0c4a0b9f099UL,
44706 +- /* 97 */ 0x8c16c06f663f4ea7UL, 0x693b3af74e970fbaUL,
44707 +- 0x2102e7f1d69ec345UL, 0x0ba53cbc968a8089UL,
44708 +- /* 98 */ 0xca3d9dc7fea15537UL, 0x4c6824bb51536493UL,
44709 +- 0xb9886314844006b1UL, 0x40d2a72ab454cc60UL,
44710 +- /* 99 */ 0x5936a1b712570975UL, 0x91b9d648debda657UL,
44711 +- 0x3344094bb64330eaUL, 0x006ba10d12ee51d0UL,
44712 +- /* 100 */ 0x19228468f5de5d58UL, 0x0eb12f4c38cc05b0UL,
44713 +- 0xa1039f9dd5601990UL, 0x4502d4ce4fff0e0bUL,
44714 +- /* 101 */ 0xeb2054106837c189UL, 0xd0f6544c6dd3b93cUL,
44715 +- 0x40727064c416d74fUL, 0x6e15c6114b502ef0UL,
44716 +- /* 102 */ 0x4df2a398cfb1a76bUL, 0x11256c7419f2f6b1UL,
44717 +- 0x4a497962066e6043UL, 0x705b3aab41355b44UL,
44718 +- /* 103 */ 0x365ef536d797b1d8UL, 0x00076bd622ddf0dbUL,
44719 +- 0x3bbf33b0e0575a88UL, 0x3777aa05c8e4ca4dUL,
44720 +- /* 104 */ 0x392745c85578db5fUL, 0x6fda4149dbae5ae2UL,
44721 +- 0xb1f0b00b8adc9867UL, 0x09963437d36f1da3UL,
44722 +- /* 105 */ 0x7e824e90a5dc3853UL, 0xccb5f6641f135cbdUL,
44723 +- 0x6736d86c87ce8fccUL, 0x625f3ce26604249fUL,
44724 +- /* 106 */ 0xaf8ac8059502f63fUL, 0x0c05e70a2e351469UL,
44725 +- 0x35292e9c764b6305UL, 0x1a394360c7e23ac3UL,
44726 +- /* 107 */ 0xd5c6d53251183264UL, 0x62065abd43c2b74fUL,
44727 +- 0xb5fbf5d03b973f9bUL, 0x13a3da3661206e5eUL,
44728 +- /* 108 */ 0xc6bd5837725d94e5UL, 0x18e30912205016c5UL,
44729 +- 0x2088ce1570033c68UL, 0x7fba1f495c837987UL,
44730 +- /* 109 */ 0x5a8c7423f2f9079dUL, 0x1735157b34023fc5UL,
44731 +- 0xe4f9b49ad2fab351UL, 0x6691ff72c878e33cUL,
44732 +- /* 110 */ 0x122c2adedc5eff3eUL, 0xf8dd4bf1d8956cf4UL,
44733 +- 0xeb86205d9e9e5bdaUL, 0x049b92b9d975c743UL,
44734 +- /* 111 */ 0xa5379730b0f6c05aUL, 0x72a0ffacc6f3a553UL,
44735 +- 0xb0032c34b20dcd6dUL, 0x470e9dbc88d5164aUL,
44736 +- /* 112 */ 0xb19cf10ca237c047UL, 0xb65466711f6c81a2UL,
44737 +- 0xb3321bd16dd80b43UL, 0x48c14f600c5fbe8eUL,
44738 +- /* 113 */ 0x66451c264aa6c803UL, 0xb66e3904a4fa7da6UL,
44739 +- 0xd45f19b0b3128395UL, 0x31602627c3c9bc10UL,
44740 +- /* 114 */ 0x3120dc4832e4e10dUL, 0xeb20c46756c717f7UL,
44741 +- 0x00f52e3f67280294UL, 0x566d4fc14730c509UL,
44742 +- /* 115 */ 0x7e3a5d40fd837206UL, 0xc1e926dc7159547aUL,
44743 +- 0x216730fba68d6095UL, 0x22e8c3843f69cea7UL,
44744 +- /* 116 */ 0x33d074e8930e4b2bUL, 0xb6e4350e84d15816UL,
44745 +- 0x5534c26ad6ba2365UL, 0x7773c12f89f1f3f3UL,
44746 +- /* 117 */ 0x8cba404da57962aaUL, 0x5b9897a81999ce56UL,
44747 +- 0x508e862f121692fcUL, 0x3a81907fa093c291UL,
44748 +- /* 118 */ 0x0dded0ff4725a510UL, 0x10d8cc10673fc503UL,
44749 +- 0x5b9d151c9f1f4e89UL, 0x32a5c1d5cb09a44cUL,
44750 +- /* 119 */ 0x1e0aa442b90541fbUL, 0x5f85eb7cc1b485dbUL,
44751 +- 0xbee595ce8a9df2e5UL, 0x25e496c722422236UL,
44752 +- /* 120 */ 0x5edf3c46cd0fe5b9UL, 0x34e75a7ed2a43388UL,
44753 +- 0xe488de11d761e352UL, 0x0e878a01a085545cUL,
44754 +- /* 121 */ 0xba493c77e021bb04UL, 0x2b4d1843c7df899aUL,
44755 +- 0x9ea37a487ae80d67UL, 0x67a9958011e41794UL,
44756 +- /* 122 */ 0x4b58051a6697b065UL, 0x47e33f7d8d6ba6d4UL,
44757 +- 0xbb4da8d483ca46c1UL, 0x68becaa181c2db0dUL,
44758 +- /* 123 */ 0x8d8980e90b989aa5UL, 0xf95eb14a2c93c99bUL,
44759 +- 0x51c6c7c4796e73a2UL, 0x6e228363b5efb569UL,
44760 +- /* 124 */ 0xc6bbc0b02dd624c8UL, 0x777eb47dec8170eeUL,
44761 +- 0x3cde15a004cfafa9UL, 0x1dc6bc087160bf9bUL,
44762 +- /* 125 */ 0x2e07e043eec34002UL, 0x18e9fc677a68dc7fUL,
44763 +- 0xd8da03188bd15b9aUL, 0x48fbc3bb00568253UL,
44764 +- /* 126 */ 0x57547d4cfb654ce1UL, 0xd3565b82a058e2adUL,
44765 +- 0xf63eaf0bbf154478UL, 0x47531ef114dfbb18UL,
44766 +- /* 127 */ 0xe1ec630a4278c587UL, 0x5507d546ca8e83f3UL,
44767 +- 0x85e135c63adc0c2bUL, 0x0aa7efa85682844eUL,
44768 +- /* 128 */ 0x72691ba8b3e1f615UL, 0x32b4e9701fbe3ffaUL,
44769 +- 0x97b6d92e39bb7868UL, 0x2cfe53dea02e39e8UL,
44770 +- /* 129 */ 0x687392cd85cd52b0UL, 0x27ff66c910e29831UL,
44771 +- 0x97134556a9832d06UL, 0x269bb0360a84f8a0UL,
44772 +- /* 130 */ 0x706e55457643f85cUL, 0x3734a48c9b597d1bUL,
44773 +- 0x7aee91e8c6efa472UL, 0x5cd6abc198a9d9e0UL,
44774 +- /* 131 */ 0x0e04de06cb3ce41aUL, 0xd8c6eb893402e138UL,
44775 +- 0x904659bb686e3772UL, 0x7215c371746ba8c8UL,
44776 +- /* 132 */ 0xfd12a97eeae4a2d9UL, 0x9514b7516394f2c5UL,
44777 +- 0x266fd5809208f294UL, 0x5c847085619a26b9UL,
44778 +- /* 133 */ 0x52985410fed694eaUL, 0x3c905b934a2ed254UL,
44779 +- 0x10bb47692d3be467UL, 0x063b3d2d69e5e9e1UL,
44780 +- /* 134 */ 0x472726eedda57debUL, 0xefb6c4ae10f41891UL,
44781 +- 0x2b1641917b307614UL, 0x117c554fc4f45b7cUL,
44782 +- /* 135 */ 0xc07cf3118f9d8812UL, 0x01dbd82050017939UL,
44783 +- 0xd7e803f4171b2827UL, 0x1015e87487d225eaUL,
44784 +- /* 136 */ 0xc58de3fed23acc4dUL, 0x50db91c294a7be2dUL,
44785 +- 0x0b94d43d1c9cf457UL, 0x6b1640fa6e37524aUL,
44786 +- /* 137 */ 0x692f346c5fda0d09UL, 0x200b1c59fa4d3151UL,
44787 +- 0xb8c46f760777a296UL, 0x4b38395f3ffdfbcfUL,
44788 +- /* 138 */ 0x18d25e00be54d671UL, 0x60d50582bec8aba6UL,
44789 +- 0x87ad8f263b78b982UL, 0x50fdf64e9cda0432UL,
44790 +- /* 139 */ 0x90f567aac578dcf0UL, 0xef1e9b0ef2a3133bUL,
44791 +- 0x0eebba9242d9de71UL, 0x15473c9bf03101c7UL,
44792 +- /* 140 */ 0x7c77e8ae56b78095UL, 0xb678e7666e6f078eUL,
44793 +- 0x2da0b9615348ba1fUL, 0x7cf931c1ff733f0bUL,
44794 +- /* 141 */ 0x26b357f50a0a366cUL, 0xe9708cf42b87d732UL,
44795 +- 0xc13aeea5f91cb2c0UL, 0x35d90c991143bb4cUL,
44796 +- /* 142 */ 0x47c1c404a9a0d9dcUL, 0x659e58451972d251UL,
44797 +- 0x3875a8c473b38c31UL, 0x1fbd9ed379561f24UL,
44798 +- /* 143 */ 0x11fabc6fd41ec28dUL, 0x7ef8dfe3cd2a2dcaUL,
44799 +- 0x72e73b5d8c404595UL, 0x6135fa4954b72f27UL,
44800 +- /* 144 */ 0xccfc32a2de24b69cUL, 0x3f55698c1f095d88UL,
44801 +- 0xbe3350ed5ac3f929UL, 0x5e9bf806ca477eebUL,
44802 +- /* 145 */ 0xe9ce8fb63c309f68UL, 0x5376f63565e1f9f4UL,
44803 +- 0xd1afcfb35a6393f1UL, 0x6632a1ede5623506UL,
44804 +- /* 146 */ 0x0b7d6c390c2ded4cUL, 0x56cb3281df04cb1fUL,
44805 +- 0x66305a1249ecc3c7UL, 0x5d588b60a38ca72aUL,
44806 +- /* 147 */ 0xa6ecbf78e8e5f42dUL, 0x86eeb44b3c8a3eecUL,
44807 +- 0xec219c48fbd21604UL, 0x1aaf1af517c36731UL,
44808 +- /* 148 */ 0xc306a2836769bde7UL, 0x208280622b1e2adbUL,
44809 +- 0x8027f51ffbff94a6UL, 0x76cfa1ce1124f26bUL,
44810 +- /* 149 */ 0x18eb00562422abb6UL, 0xf377c4d58f8c29c3UL,
44811 +- 0x4dbbc207f531561aUL, 0x0253b7f082128a27UL,
44812 +- /* 150 */ 0x3d1f091cb62c17e0UL, 0x4860e1abd64628a9UL,
44813 +- 0x52d17436309d4253UL, 0x356f97e13efae576UL,
44814 +- /* 151 */ 0xd351e11aa150535bUL, 0x3e6b45bb1dd878ccUL,
44815 +- 0x0c776128bed92c98UL, 0x1d34ae93032885b8UL,
44816 +- /* 152 */ 0x4ba0488ca85ba4c3UL, 0x985348c33c9ce6ceUL,
44817 +- 0x66124c6f97bda770UL, 0x0f81a0290654124aUL,
44818 +- /* 153 */ 0x9ed09ca6569b86fdUL, 0x811009fd18af9a2dUL,
44819 +- 0xff08d03f93d8c20aUL, 0x52a148199faef26bUL,
44820 +- /* 154 */ 0x3e03f9dc2d8d1b73UL, 0x4205801873961a70UL,
44821 +- 0xc0d987f041a35970UL, 0x07aa1f15a1c0d549UL,
44822 +- /* 155 */ 0xdfd46ce08cd27224UL, 0x6d0a024f934e4239UL,
44823 +- 0x808a7a6399897b59UL, 0x0a4556e9e13d95a2UL,
44824 +- /* 156 */ 0xd21a991fe9c13045UL, 0x9b0e8548fe7751b8UL,
44825 +- 0x5da643cb4bf30035UL, 0x77db28d63940f721UL,
44826 +- /* 157 */ 0xfc5eeb614adc9011UL, 0x5229419ae8c411ebUL,
44827 +- 0x9ec3e7787d1dcf74UL, 0x340d053e216e4cb5UL,
44828 +- /* 158 */ 0xcac7af39b48df2b4UL, 0xc0faec2871a10a94UL,
44829 +- 0x140a69245ca575edUL, 0x0cf1c37134273a4cUL,
44830 +- /* 159 */ 0xc8ee306ac224b8a5UL, 0x57eaee7ccb4930b0UL,
44831 +- 0xa1e806bdaacbe74fUL, 0x7d9a62742eeb657dUL,
44832 +- /* 160 */ 0x9eb6b6ef546c4830UL, 0x885cca1fddb36e2eUL,
44833 +- 0xe6b9f383ef0d7105UL, 0x58654fef9d2e0412UL,
44834 +- /* 161 */ 0xa905c4ffbe0e8e26UL, 0x942de5df9b31816eUL,
44835 +- 0x497d723f802e88e1UL, 0x30684dea602f408dUL,
44836 +- /* 162 */ 0x21e5a278a3e6cb34UL, 0xaefb6e6f5b151dc4UL,
44837 +- 0xb30b8e049d77ca15UL, 0x28c3c9cf53b98981UL,
44838 +- /* 163 */ 0x287fb721556cdd2aUL, 0x0d317ca897022274UL,
44839 +- 0x7468c7423a543258UL, 0x4a7f11464eb5642fUL,
44840 +- /* 164 */ 0xa237a4774d193aa6UL, 0xd865986ea92129a1UL,
44841 +- 0x24c515ecf87c1a88UL, 0x604003575f39f5ebUL,
44842 +- /* 165 */ 0x47b9f189570a9b27UL, 0x2b98cede465e4b78UL,
44843 +- 0x026df551dbb85c20UL, 0x74fcd91047e21901UL,
44844 +- /* 166 */ 0x13e2a90a23c1bfa3UL, 0x0cb0074e478519f6UL,
44845 +- 0x5ff1cbbe3af6cf44UL, 0x67fe5438be812dbeUL,
44846 +- /* 167 */ 0xd13cf64fa40f05b0UL, 0x054dfb2f32283787UL,
44847 +- 0x4173915b7f0d2aeaUL, 0x482f144f1f610d4eUL,
44848 +- /* 168 */ 0xf6210201b47f8234UL, 0x5d0ae1929e70b990UL,
44849 +- 0xdcd7f455b049567cUL, 0x7e93d0f1f0916f01UL,
44850 +- /* 169 */ 0xdd79cbf18a7db4faUL, 0xbe8391bf6f74c62fUL,
44851 +- 0x027145d14b8291bdUL, 0x585a73ea2cbf1705UL,
44852 +- /* 170 */ 0x485ca03e928a0db2UL, 0x10fc01a5742857e7UL,
44853 +- 0x2f482edbd6d551a7UL, 0x0f0433b5048fdb8aUL,
44854 +- /* 171 */ 0x60da2e8dd7dc6247UL, 0x88b4c9d38cd4819aUL,
44855 +- 0x13033ac001f66697UL, 0x273b24fe3b367d75UL,
44856 +- /* 172 */ 0xc6e8f66a31b3b9d4UL, 0x281514a494df49d5UL,
44857 +- 0xd1726fdfc8b23da7UL, 0x4b3ae7d103dee548UL,
44858 +- /* 173 */ 0xc6256e19ce4b9d7eUL, 0xff5c5cf186e3c61cUL,
44859 +- 0xacc63ca34b8ec145UL, 0x74621888fee66574UL,
44860 +- /* 174 */ 0x956f409645290a1eUL, 0xef0bf8e3263a962eUL,
44861 +- 0xed6a50eb5ec2647bUL, 0x0694283a9dca7502UL,
44862 +- /* 175 */ 0x769b963643a2dcd1UL, 0x42b7c8ea09fc5353UL,
44863 +- 0x4f002aee13397eabUL, 0x63005e2c19b7d63aUL,
44864 +- /* 176 */ 0xca6736da63023beaUL, 0x966c7f6db12a99b7UL,
44865 +- 0xace09390c537c5e1UL, 0x0b696063a1aa89eeUL,
44866 +- /* 177 */ 0xebb03e97288c56e5UL, 0x432a9f9f938c8be8UL,
44867 +- 0xa6a5a93d5b717f71UL, 0x1a5fb4c3e18f9d97UL,
44868 +- /* 178 */ 0x1c94e7ad1c60cdceUL, 0xee202a43fc02c4a0UL,
44869 +- 0x8dafe4d867c46a20UL, 0x0a10263c8ac27b58UL,
44870 +- /* 179 */ 0xd0dea9dfe4432a4aUL, 0x856af87bbe9277c5UL,
44871 +- 0xce8472acc212c71aUL, 0x6f151b6d9bbb1e91UL,
44872 +- /* 180 */ 0x26776c527ceed56aUL, 0x7d211cb7fbf8faecUL,
44873 +- 0x37ae66a6fd4609ccUL, 0x1f81b702d2770c42UL,
44874 +- /* 181 */ 0x2fb0b057eac58392UL, 0xe1dd89fe29744e9dUL,
44875 +- 0xc964f8eb17beb4f8UL, 0x29571073c9a2d41eUL,
44876 +- /* 182 */ 0xa948a18981c0e254UL, 0x2df6369b65b22830UL,
44877 +- 0xa33eb2d75fcfd3c6UL, 0x078cd6ec4199a01fUL,
44878 +- /* 183 */ 0x4a584a41ad900d2fUL, 0x32142b78e2c74c52UL,
44879 +- 0x68c4e8338431c978UL, 0x7f69ea9008689fc2UL,
44880 +- /* 184 */ 0x52f2c81e46a38265UL, 0xfd78072d04a832fdUL,
44881 +- 0x8cd7d5fa25359e94UL, 0x4de71b7454cc29d2UL,
44882 +- /* 185 */ 0x42eb60ad1eda6ac9UL, 0x0aad37dfdbc09c3aUL,
44883 +- 0x81004b71e33cc191UL, 0x44e6be345122803cUL,
44884 +- /* 186 */ 0x03fe8388ba1920dbUL, 0xf5d57c32150db008UL,
44885 +- 0x49c8c4281af60c29UL, 0x21edb518de701aeeUL,
44886 +- /* 187 */ 0x7fb63e418f06dc99UL, 0xa4460d99c166d7b8UL,
44887 +- 0x24dd5248ce520a83UL, 0x5ec3ad712b928358UL,
44888 +- /* 188 */ 0x15022a5fbd17930fUL, 0xa4f64a77d82570e3UL,
44889 +- 0x12bc8d6915783712UL, 0x498194c0fc620abbUL,
44890 +- /* 189 */ 0x38a2d9d255686c82UL, 0x785c6bd9193e21f0UL,
44891 +- 0xe4d5c81ab24a5484UL, 0x56307860b2e20989UL,
44892 +- /* 190 */ 0x429d55f78b4d74c4UL, 0x22f1834643350131UL,
44893 +- 0x1e60c24598c71fffUL, 0x59f2f014979983efUL,
44894 +- /* 191 */ 0x46a47d56eb494a44UL, 0x3e22a854d636a18eUL,
44895 +- 0xb346e15274491c3bUL, 0x2ceafd4e5390cde7UL,
44896 +- /* 192 */ 0xba8a8538be0d6675UL, 0x4b9074bb50818e23UL,
44897 +- 0xcbdab89085d304c3UL, 0x61a24fe0e56192c4UL,
44898 +- /* 193 */ 0xcb7615e6db525bcbUL, 0xdd7d8c35a567e4caUL,
44899 +- 0xe6b4153acafcdd69UL, 0x2d668e097f3c9766UL,
44900 +- /* 194 */ 0xa57e7e265ce55ef0UL, 0x5d9f4e527cd4b967UL,
44901 +- 0xfbc83606492fd1e5UL, 0x090d52beb7c3f7aeUL,
44902 +- /* 195 */ 0x09b9515a1e7b4d7cUL, 0x1f266a2599da44c0UL,
44903 +- 0xa1c49548e2c55504UL, 0x7ef04287126f15ccUL,
44904 +- /* 196 */ 0xfed1659dbd30ef15UL, 0x8b4ab9eec4e0277bUL,
44905 +- 0x884d6236a5df3291UL, 0x1fd96ea6bf5cf788UL,
44906 +- /* 197 */ 0x42a161981f190d9aUL, 0x61d849507e6052c1UL,
44907 +- 0x9fe113bf285a2cd5UL, 0x7c22d676dbad85d8UL,
44908 +- /* 198 */ 0x82e770ed2bfbd27dUL, 0x4c05b2ece996f5a5UL,
44909 +- 0xcd40a9c2b0900150UL, 0x5895319213d9bf64UL,
44910 +- /* 199 */ 0xe7cc5d703fea2e08UL, 0xb50c491258e2188cUL,
44911 +- 0xcce30baa48205bf0UL, 0x537c659ccfa32d62UL,
44912 +- /* 200 */ 0x37b6623a98cfc088UL, 0xfe9bed1fa4d6aca4UL,
44913 +- 0x04d29b8e56a8d1b0UL, 0x725f71c40b519575UL,
44914 +- /* 201 */ 0x28c7f89cd0339ce6UL, 0x8367b14469ddc18bUL,
44915 +- 0x883ada83a6a1652cUL, 0x585f1974034d6c17UL,
44916 +- /* 202 */ 0x89cfb266f1b19188UL, 0xe63b4863e7c35217UL,
44917 +- 0xd88c9da6b4c0526aUL, 0x3e035c9df0954635UL,
44918 +- /* 203 */ 0xdd9d5412fb45de9dUL, 0xdd684532e4cff40dUL,
44919 +- 0x4b5c999b151d671cUL, 0x2d8c2cc811e7f690UL,
44920 +- /* 204 */ 0x7f54be1d90055d40UL, 0xa464c5df464aaf40UL,
44921 +- 0x33979624f0e917beUL, 0x2c018dc527356b30UL,
44922 +- /* 205 */ 0xa5415024e330b3d4UL, 0x73ff3d96691652d3UL,
44923 +- 0x94ec42c4ef9b59f1UL, 0x0747201618d08e5aUL,
44924 +- /* 206 */ 0x4d6ca48aca411c53UL, 0x66415f2fcfa66119UL,
44925 +- 0x9c4dd40051e227ffUL, 0x59810bc09a02f7ebUL,
44926 +- /* 207 */ 0x2a7eb171b3dc101dUL, 0x441c5ab99ffef68eUL,
44927 +- 0x32025c9b93b359eaUL, 0x5e8ce0a71e9d112fUL,
44928 +- /* 208 */ 0xbfcccb92429503fdUL, 0xd271ba752f095d55UL,
44929 +- 0x345ead5e972d091eUL, 0x18c8df11a83103baUL,
44930 +- /* 209 */ 0x90cd949a9aed0f4cUL, 0xc5d1f4cb6660e37eUL,
44931 +- 0xb8cac52d56c52e0bUL, 0x6e42e400c5808e0dUL,
44932 +- /* 210 */ 0xa3b46966eeaefd23UL, 0x0c4f1f0be39ecdcaUL,
44933 +- 0x189dc8c9d683a51dUL, 0x51f27f054c09351bUL,
44934 +- /* 211 */ 0x4c487ccd2a320682UL, 0x587ea95bb3df1c96UL,
44935 +- 0xc8ccf79e555cb8e8UL, 0x547dc829a206d73dUL,
44936 +- /* 212 */ 0xb822a6cd80c39b06UL, 0xe96d54732000d4c6UL,
44937 +- 0x28535b6f91463b4dUL, 0x228f4660e2486e1dUL,
44938 +- /* 213 */ 0x98799538de8d3abfUL, 0x8cd8330045ebca6eUL,
44939 +- 0x79952a008221e738UL, 0x4322e1a7535cd2bbUL,
44940 +- /* 214 */ 0xb114c11819d1801cUL, 0x2016e4d84f3f5ec7UL,
44941 +- 0xdd0e2df409260f4cUL, 0x5ec362c0ae5f7266UL,
44942 +- /* 215 */ 0xc0462b18b8b2b4eeUL, 0x7cc8d950274d1afbUL,
44943 +- 0xf25f7105436b02d2UL, 0x43bbf8dcbff9ccd3UL,
44944 +- /* 216 */ 0xb6ad1767a039e9dfUL, 0xb0714da8f69d3583UL,
44945 +- 0x5e55fa18b42931f5UL, 0x4ed5558f33c60961UL,
44946 +- /* 217 */ 0x1fe37901c647a5ddUL, 0x593ddf1f8081d357UL,
44947 +- 0x0249a4fd813fd7a6UL, 0x69acca274e9caf61UL,
44948 +- /* 218 */ 0x047ba3ea330721c9UL, 0x83423fc20e7e1ea0UL,
44949 +- 0x1df4c0af01314a60UL, 0x09a62dab89289527UL,
44950 +- /* 219 */ 0xa5b325a49cc6cb00UL, 0xe94b5dc654b56cb6UL,
44951 +- 0x3be28779adc994a0UL, 0x4296e8f8ba3a4aadUL,
44952 +- /* 220 */ 0x328689761e451eabUL, 0x2e4d598bff59594aUL,
44953 +- 0x49b96853d7a7084aUL, 0x4980a319601420a8UL,
44954 +- /* 221 */ 0x9565b9e12f552c42UL, 0x8a5318db7100fe96UL,
44955 +- 0x05c90b4d43add0d7UL, 0x538b4cd66a5d4edaUL,
44956 +- /* 222 */ 0xf4e94fc3e89f039fUL, 0x592c9af26f618045UL,
44957 +- 0x08a36eb5fd4b9550UL, 0x25fffaf6c2ed1419UL,
44958 +- /* 223 */ 0x34434459cc79d354UL, 0xeeecbfb4b1d5476bUL,
44959 +- 0xddeb34a061615d99UL, 0x5129cecceb64b773UL,
44960 +- /* 224 */ 0xee43215894993520UL, 0x772f9c7cf14c0b3bUL,
44961 +- 0xd2e2fce306bedad5UL, 0x715f42b546f06a97UL,
44962 +- /* 225 */ 0x434ecdceda5b5f1aUL, 0x0da17115a49741a9UL,
44963 +- 0x680bd77c73edad2eUL, 0x487c02354edd9041UL,
44964 +- /* 226 */ 0xb8efeff3a70ed9c4UL, 0x56a32aa3e857e302UL,
44965 +- 0xdf3a68bd48a2a5a0UL, 0x07f650b73176c444UL,
44966 +- /* 227 */ 0xe38b9b1626e0ccb1UL, 0x79e053c18b09fb36UL,
44967 +- 0x56d90319c9f94964UL, 0x1ca941e7ac9ff5c4UL,
44968 +- /* 228 */ 0x49c4df29162fa0bbUL, 0x8488cf3282b33305UL,
44969 +- 0x95dfda14cabb437dUL, 0x3391f78264d5ad86UL,
44970 +- /* 229 */ 0x729ae06ae2b5095dUL, 0xd58a58d73259a946UL,
44971 +- 0xe9834262d13921edUL, 0x27fedafaa54bb592UL,
44972 +- /* 230 */ 0xa99dc5b829ad48bbUL, 0x5f025742499ee260UL,
44973 +- 0x802c8ecd5d7513fdUL, 0x78ceb3ef3f6dd938UL,
44974 +- /* 231 */ 0xc342f44f8a135d94UL, 0x7b9edb44828cdda3UL,
44975 +- 0x9436d11a0537cfe7UL, 0x5064b164ec1ab4c8UL,
44976 +- /* 232 */ 0x7020eccfd37eb2fcUL, 0x1f31ea3ed90d25fcUL,
44977 +- 0x1b930d7bdfa1bb34UL, 0x5344467a48113044UL,
44978 +- /* 233 */ 0x70073170f25e6dfbUL, 0xe385dc1a50114cc8UL,
44979 +- 0x2348698ac8fc4f00UL, 0x2a77a55284dd40d8UL,
44980 +- /* 234 */ 0xfe06afe0c98c6ce4UL, 0xc235df96dddfd6e4UL,
44981 +- 0x1428d01e33bf1ed3UL, 0x785768ec9300bdafUL,
44982 +- /* 235 */ 0x9702e57a91deb63bUL, 0x61bdb8bfe5ce8b80UL,
44983 +- 0x645b426f3d1d58acUL, 0x4804a82227a557bcUL,
44984 +- /* 236 */ 0x8e57048ab44d2601UL, 0x68d6501a4b3a6935UL,
44985 +- 0xc39c9ec3f9e1c293UL, 0x4172f257d4de63e2UL,
44986 +- /* 237 */ 0xd368b450330c6401UL, 0x040d3017418f2391UL,
44987 +- 0x2c34bb6090b7d90dUL, 0x16f649228fdfd51fUL,
44988 +- /* 238 */ 0xbea6818e2b928ef5UL, 0xe28ccf91cdc11e72UL,
44989 +- 0x594aaa68e77a36cdUL, 0x313034806c7ffd0fUL,
44990 +- /* 239 */ 0x8a9d27ac2249bd65UL, 0x19a3b464018e9512UL,
44991 +- 0xc26ccff352b37ec7UL, 0x056f68341d797b21UL,
44992 +- /* 240 */ 0x5e79d6757efd2327UL, 0xfabdbcb6553afe15UL,
44993 +- 0xd3e7222c6eaf5a60UL, 0x7046c76d4dae743bUL,
44994 +- /* 241 */ 0x660be872b18d4a55UL, 0x19992518574e1496UL,
44995 +- 0xc103053a302bdcbbUL, 0x3ed8e9800b218e8eUL,
44996 +- /* 242 */ 0x7b0b9239fa75e03eUL, 0xefe9fb684633c083UL,
44997 +- 0x98a35fbe391a7793UL, 0x6065510fe2d0fe34UL,
44998 +- /* 243 */ 0x55cb668548abad0cUL, 0xb4584548da87e527UL,
44999 +- 0x2c43ecea0107c1ddUL, 0x526028809372de35UL,
45000 +- /* 244 */ 0x3415c56af9213b1fUL, 0x5bee1a4d017e98dbUL,
45001 +- 0x13f6b105b5cf709bUL, 0x5ff20e3482b29ab6UL,
45002 +- /* 245 */ 0x0aa29c75cc2e6c90UL, 0xfc7d73ca3a70e206UL,
45003 +- 0x899fc38fc4b5c515UL, 0x250386b124ffc207UL,
45004 +- /* 246 */ 0x54ea28d5ae3d2b56UL, 0x9913149dd6de60ceUL,
45005 +- 0x16694fc58f06d6c1UL, 0x46b23975eb018fc7UL,
45006 +- /* 247 */ 0x470a6a0fb4b7b4e2UL, 0x5d92475a8f7253deUL,
45007 +- 0xabeee5b52fbd3adbUL, 0x7fa20801a0806968UL,
45008 +- /* 248 */ 0x76f3faf19f7714d2UL, 0xb3e840c12f4660c3UL,
45009 +- 0x0fb4cd8df212744eUL, 0x4b065a251d3a2dd2UL,
45010 +- /* 249 */ 0x5cebde383d77cd4aUL, 0x6adf39df882c9cb1UL,
45011 +- 0xa2dd242eb09af759UL, 0x3147c0e50e5f6422UL,
45012 +- /* 250 */ 0x164ca5101d1350dbUL, 0xf8d13479c33fc962UL,
45013 +- 0xe640ce4d13e5da08UL, 0x4bdee0c45061f8baUL,
45014 +- /* 251 */ 0xd7c46dc1a4edb1c9UL, 0x5514d7b6437fd98aUL,
45015 +- 0x58942f6bb2a1c00bUL, 0x2dffb2ab1d70710eUL,
45016 +- /* 252 */ 0xccdfcf2fc18b6d68UL, 0xa8ebcba8b7806167UL,
45017 +- 0x980697f95e2937e3UL, 0x02fbba1cd0126e8cUL
45018 +-};
45019 +-
45020 +-/* c is two 512-bit products: c0[0:7]=a0[0:3]*b0[0:3] and c1[8:15]=a1[4:7]*b1[4:7]
45021 +- * a is two 256-bit integers: a0[0:3] and a1[4:7]
45022 +- * b is two 256-bit integers: b0[0:3] and b1[4:7]
45023 +- */
45024 +-static void mul2_256x256_integer_adx(u64 *const c, const u64 *const a,
45025 +- const u64 *const b)
45026 +-{
45027 +- asm volatile(
45028 +- "xorl %%r14d, %%r14d ;"
45029 +- "movq (%1), %%rdx; " /* A[0] */
45030 +- "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */
45031 +- "xorl %%r10d, %%r10d ;"
45032 +- "movq %%r8, (%0) ;"
45033 +- "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */
45034 +- "adox %%r10, %%r15 ;"
45035 +- "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */
45036 +- "adox %%r8, %%rax ;"
45037 +- "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
45038 +- "adox %%r10, %%rbx ;"
45039 +- /******************************************/
45040 +- "adox %%r14, %%rcx ;"
45041 +-
45042 +- "movq 8(%1), %%rdx; " /* A[1] */
45043 +- "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
45044 +- "adox %%r15, %%r8 ;"
45045 +- "movq %%r8, 8(%0) ;"
45046 +- "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
45047 +- "adox %%r10, %%r9 ;"
45048 +- "adcx %%r9, %%rax ;"
45049 +- "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */
45050 +- "adox %%r8, %%r11 ;"
45051 +- "adcx %%r11, %%rbx ;"
45052 +- "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
45053 +- "adox %%r10, %%r13 ;"
45054 +- "adcx %%r13, %%rcx ;"
45055 +- /******************************************/
45056 +- "adox %%r14, %%r15 ;"
45057 +- "adcx %%r14, %%r15 ;"
45058 +-
45059 +- "movq 16(%1), %%rdx; " /* A[2] */
45060 +- "xorl %%r10d, %%r10d ;"
45061 +- "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
45062 +- "adox %%rax, %%r8 ;"
45063 +- "movq %%r8, 16(%0) ;"
45064 +- "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
45065 +- "adox %%r10, %%r9 ;"
45066 +- "adcx %%r9, %%rbx ;"
45067 +- "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */
45068 +- "adox %%r8, %%r11 ;"
45069 +- "adcx %%r11, %%rcx ;"
45070 +- "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
45071 +- "adox %%r10, %%r13 ;"
45072 +- "adcx %%r13, %%r15 ;"
45073 +- /******************************************/
45074 +- "adox %%r14, %%rax ;"
45075 +- "adcx %%r14, %%rax ;"
45076 +-
45077 +- "movq 24(%1), %%rdx; " /* A[3] */
45078 +- "xorl %%r10d, %%r10d ;"
45079 +- "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
45080 +- "adox %%rbx, %%r8 ;"
45081 +- "movq %%r8, 24(%0) ;"
45082 +- "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
45083 +- "adox %%r10, %%r9 ;"
45084 +- "adcx %%r9, %%rcx ;"
45085 +- "movq %%rcx, 32(%0) ;"
45086 +- "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */
45087 +- "adox %%r8, %%r11 ;"
45088 +- "adcx %%r11, %%r15 ;"
45089 +- "movq %%r15, 40(%0) ;"
45090 +- "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
45091 +- "adox %%r10, %%r13 ;"
45092 +- "adcx %%r13, %%rax ;"
45093 +- "movq %%rax, 48(%0) ;"
45094 +- /******************************************/
45095 +- "adox %%r14, %%rbx ;"
45096 +- "adcx %%r14, %%rbx ;"
45097 +- "movq %%rbx, 56(%0) ;"
45098 +-
45099 +- "movq 32(%1), %%rdx; " /* C[0] */
45100 +- "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */
45101 +- "xorl %%r10d, %%r10d ;"
45102 +- "movq %%r8, 64(%0);"
45103 +- "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */
45104 +- "adox %%r10, %%r15 ;"
45105 +- "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */
45106 +- "adox %%r8, %%rax ;"
45107 +- "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */
45108 +- "adox %%r10, %%rbx ;"
45109 +- /******************************************/
45110 +- "adox %%r14, %%rcx ;"
45111 +-
45112 +- "movq 40(%1), %%rdx; " /* C[1] */
45113 +- "xorl %%r10d, %%r10d ;"
45114 +- "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */
45115 +- "adox %%r15, %%r8 ;"
45116 +- "movq %%r8, 72(%0);"
45117 +- "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */
45118 +- "adox %%r10, %%r9 ;"
45119 +- "adcx %%r9, %%rax ;"
45120 +- "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */
45121 +- "adox %%r8, %%r11 ;"
45122 +- "adcx %%r11, %%rbx ;"
45123 +- "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */
45124 +- "adox %%r10, %%r13 ;"
45125 +- "adcx %%r13, %%rcx ;"
45126 +- /******************************************/
45127 +- "adox %%r14, %%r15 ;"
45128 +- "adcx %%r14, %%r15 ;"
45129 +-
45130 +- "movq 48(%1), %%rdx; " /* C[2] */
45131 +- "xorl %%r10d, %%r10d ;"
45132 +- "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */
45133 +- "adox %%rax, %%r8 ;"
45134 +- "movq %%r8, 80(%0);"
45135 +- "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */
45136 +- "adox %%r10, %%r9 ;"
45137 +- "adcx %%r9, %%rbx ;"
45138 +- "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */
45139 +- "adox %%r8, %%r11 ;"
45140 +- "adcx %%r11, %%rcx ;"
45141 +- "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */
45142 +- "adox %%r10, %%r13 ;"
45143 +- "adcx %%r13, %%r15 ;"
45144 +- /******************************************/
45145 +- "adox %%r14, %%rax ;"
45146 +- "adcx %%r14, %%rax ;"
45147 +-
45148 +- "movq 56(%1), %%rdx; " /* C[3] */
45149 +- "xorl %%r10d, %%r10d ;"
45150 +- "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */
45151 +- "adox %%rbx, %%r8 ;"
45152 +- "movq %%r8, 88(%0);"
45153 +- "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */
45154 +- "adox %%r10, %%r9 ;"
45155 +- "adcx %%r9, %%rcx ;"
45156 +- "movq %%rcx, 96(%0) ;"
45157 +- "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */
45158 +- "adox %%r8, %%r11 ;"
45159 +- "adcx %%r11, %%r15 ;"
45160 +- "movq %%r15, 104(%0) ;"
45161 +- "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */
45162 +- "adox %%r10, %%r13 ;"
45163 +- "adcx %%r13, %%rax ;"
45164 +- "movq %%rax, 112(%0) ;"
45165 +- /******************************************/
45166 +- "adox %%r14, %%rbx ;"
45167 +- "adcx %%r14, %%rbx ;"
45168 +- "movq %%rbx, 120(%0) ;"
45169 +- :
45170 +- : "r"(c), "r"(a), "r"(b)
45171 +- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
45172 +- "%r10", "%r11", "%r13", "%r14", "%r15");
45173 +-}
45174 +-
45175 +-static void mul2_256x256_integer_bmi2(u64 *const c, const u64 *const a,
45176 +- const u64 *const b)
45177 ++static __always_inline u64 eq_mask(u64 a, u64 b)
45178 + {
45179 +- asm volatile(
45180 +- "movq (%1), %%rdx; " /* A[0] */
45181 +- "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */
45182 +- "movq %%r8, (%0) ;"
45183 +- "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */
45184 +- "addq %%r10, %%r15 ;"
45185 +- "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */
45186 +- "adcq %%r8, %%rax ;"
45187 +- "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
45188 +- "adcq %%r10, %%rbx ;"
45189 +- /******************************************/
45190 +- "adcq $0, %%rcx ;"
45191 +-
45192 +- "movq 8(%1), %%rdx; " /* A[1] */
45193 +- "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
45194 +- "addq %%r15, %%r8 ;"
45195 +- "movq %%r8, 8(%0) ;"
45196 +- "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
45197 +- "adcq %%r10, %%r9 ;"
45198 +- "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */
45199 +- "adcq %%r8, %%r11 ;"
45200 +- "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
45201 +- "adcq %%r10, %%r13 ;"
45202 +- /******************************************/
45203 +- "adcq $0, %%r15 ;"
45204 +-
45205 +- "addq %%r9, %%rax ;"
45206 +- "adcq %%r11, %%rbx ;"
45207 +- "adcq %%r13, %%rcx ;"
45208 +- "adcq $0, %%r15 ;"
45209 +-
45210 +- "movq 16(%1), %%rdx; " /* A[2] */
45211 +- "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
45212 +- "addq %%rax, %%r8 ;"
45213 +- "movq %%r8, 16(%0) ;"
45214 +- "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
45215 +- "adcq %%r10, %%r9 ;"
45216 +- "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */
45217 +- "adcq %%r8, %%r11 ;"
45218 +- "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
45219 +- "adcq %%r10, %%r13 ;"
45220 +- /******************************************/
45221 +- "adcq $0, %%rax ;"
45222 +-
45223 +- "addq %%r9, %%rbx ;"
45224 +- "adcq %%r11, %%rcx ;"
45225 +- "adcq %%r13, %%r15 ;"
45226 +- "adcq $0, %%rax ;"
45227 +-
45228 +- "movq 24(%1), %%rdx; " /* A[3] */
45229 +- "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
45230 +- "addq %%rbx, %%r8 ;"
45231 +- "movq %%r8, 24(%0) ;"
45232 +- "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
45233 +- "adcq %%r10, %%r9 ;"
45234 +- "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */
45235 +- "adcq %%r8, %%r11 ;"
45236 +- "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
45237 +- "adcq %%r10, %%r13 ;"
45238 +- /******************************************/
45239 +- "adcq $0, %%rbx ;"
45240 +-
45241 +- "addq %%r9, %%rcx ;"
45242 +- "movq %%rcx, 32(%0) ;"
45243 +- "adcq %%r11, %%r15 ;"
45244 +- "movq %%r15, 40(%0) ;"
45245 +- "adcq %%r13, %%rax ;"
45246 +- "movq %%rax, 48(%0) ;"
45247 +- "adcq $0, %%rbx ;"
45248 +- "movq %%rbx, 56(%0) ;"
45249 +-
45250 +- "movq 32(%1), %%rdx; " /* C[0] */
45251 +- "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */
45252 +- "movq %%r8, 64(%0) ;"
45253 +- "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */
45254 +- "addq %%r10, %%r15 ;"
45255 +- "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */
45256 +- "adcq %%r8, %%rax ;"
45257 +- "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */
45258 +- "adcq %%r10, %%rbx ;"
45259 +- /******************************************/
45260 +- "adcq $0, %%rcx ;"
45261 +-
45262 +- "movq 40(%1), %%rdx; " /* C[1] */
45263 +- "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */
45264 +- "addq %%r15, %%r8 ;"
45265 +- "movq %%r8, 72(%0) ;"
45266 +- "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */
45267 +- "adcq %%r10, %%r9 ;"
45268 +- "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */
45269 +- "adcq %%r8, %%r11 ;"
45270 +- "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */
45271 +- "adcq %%r10, %%r13 ;"
45272 +- /******************************************/
45273 +- "adcq $0, %%r15 ;"
45274 +-
45275 +- "addq %%r9, %%rax ;"
45276 +- "adcq %%r11, %%rbx ;"
45277 +- "adcq %%r13, %%rcx ;"
45278 +- "adcq $0, %%r15 ;"
45279 +-
45280 +- "movq 48(%1), %%rdx; " /* C[2] */
45281 +- "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */
45282 +- "addq %%rax, %%r8 ;"
45283 +- "movq %%r8, 80(%0) ;"
45284 +- "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */
45285 +- "adcq %%r10, %%r9 ;"
45286 +- "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */
45287 +- "adcq %%r8, %%r11 ;"
45288 +- "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */
45289 +- "adcq %%r10, %%r13 ;"
45290 +- /******************************************/
45291 +- "adcq $0, %%rax ;"
45292 +-
45293 +- "addq %%r9, %%rbx ;"
45294 +- "adcq %%r11, %%rcx ;"
45295 +- "adcq %%r13, %%r15 ;"
45296 +- "adcq $0, %%rax ;"
45297 +-
45298 +- "movq 56(%1), %%rdx; " /* C[3] */
45299 +- "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */
45300 +- "addq %%rbx, %%r8 ;"
45301 +- "movq %%r8, 88(%0) ;"
45302 +- "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */
45303 +- "adcq %%r10, %%r9 ;"
45304 +- "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */
45305 +- "adcq %%r8, %%r11 ;"
45306 +- "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */
45307 +- "adcq %%r10, %%r13 ;"
45308 +- /******************************************/
45309 +- "adcq $0, %%rbx ;"
45310 +-
45311 +- "addq %%r9, %%rcx ;"
45312 +- "movq %%rcx, 96(%0) ;"
45313 +- "adcq %%r11, %%r15 ;"
45314 +- "movq %%r15, 104(%0) ;"
45315 +- "adcq %%r13, %%rax ;"
45316 +- "movq %%rax, 112(%0) ;"
45317 +- "adcq $0, %%rbx ;"
45318 +- "movq %%rbx, 120(%0) ;"
45319 +- :
45320 +- : "r"(c), "r"(a), "r"(b)
45321 +- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
45322 +- "%r10", "%r11", "%r13", "%r15");
45323 ++ u64 x = a ^ b;
45324 ++ u64 minus_x = ~x + (u64)1U;
45325 ++ u64 x_or_minus_x = x | minus_x;
45326 ++ u64 xnx = x_or_minus_x >> (u32)63U;
45327 ++ return xnx - (u64)1U;
45328 + }
45329 +
45330 +-static void sqr2_256x256_integer_adx(u64 *const c, const u64 *const a)
45331 ++static __always_inline u64 gte_mask(u64 a, u64 b)
45332 + {
45333 +- asm volatile(
45334 +- "movq (%1), %%rdx ;" /* A[0] */
45335 +- "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */
45336 +- "xorl %%r15d, %%r15d;"
45337 +- "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */
45338 +- "adcx %%r14, %%r9 ;"
45339 +- "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */
45340 +- "adcx %%rax, %%r10 ;"
45341 +- "movq 24(%1), %%rdx ;" /* A[3] */
45342 +- "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */
45343 +- "adcx %%rcx, %%r11 ;"
45344 +- "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */
45345 +- "adcx %%rax, %%rbx ;"
45346 +- "movq 8(%1), %%rdx ;" /* A[1] */
45347 +- "adcx %%r15, %%r13 ;"
45348 +- "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */
45349 +- "movq $0, %%r14 ;"
45350 +- /******************************************/
45351 +- "adcx %%r15, %%r14 ;"
45352 +-
45353 +- "xorl %%r15d, %%r15d;"
45354 +- "adox %%rax, %%r10 ;"
45355 +- "adcx %%r8, %%r8 ;"
45356 +- "adox %%rcx, %%r11 ;"
45357 +- "adcx %%r9, %%r9 ;"
45358 +- "adox %%r15, %%rbx ;"
45359 +- "adcx %%r10, %%r10 ;"
45360 +- "adox %%r15, %%r13 ;"
45361 +- "adcx %%r11, %%r11 ;"
45362 +- "adox %%r15, %%r14 ;"
45363 +- "adcx %%rbx, %%rbx ;"
45364 +- "adcx %%r13, %%r13 ;"
45365 +- "adcx %%r14, %%r14 ;"
45366 +-
45367 +- "movq (%1), %%rdx ;"
45368 +- "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
45369 +- /*******************/
45370 +- "movq %%rax, 0(%0) ;"
45371 +- "addq %%rcx, %%r8 ;"
45372 +- "movq %%r8, 8(%0) ;"
45373 +- "movq 8(%1), %%rdx ;"
45374 +- "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
45375 +- "adcq %%rax, %%r9 ;"
45376 +- "movq %%r9, 16(%0) ;"
45377 +- "adcq %%rcx, %%r10 ;"
45378 +- "movq %%r10, 24(%0) ;"
45379 +- "movq 16(%1), %%rdx ;"
45380 +- "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
45381 +- "adcq %%rax, %%r11 ;"
45382 +- "movq %%r11, 32(%0) ;"
45383 +- "adcq %%rcx, %%rbx ;"
45384 +- "movq %%rbx, 40(%0) ;"
45385 +- "movq 24(%1), %%rdx ;"
45386 +- "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
45387 +- "adcq %%rax, %%r13 ;"
45388 +- "movq %%r13, 48(%0) ;"
45389 +- "adcq %%rcx, %%r14 ;"
45390 +- "movq %%r14, 56(%0) ;"
45391 +-
45392 +-
45393 +- "movq 32(%1), %%rdx ;" /* B[0] */
45394 +- "mulx 40(%1), %%r8, %%r14 ;" /* B[1]*B[0] */
45395 +- "xorl %%r15d, %%r15d;"
45396 +- "mulx 48(%1), %%r9, %%r10 ;" /* B[2]*B[0] */
45397 +- "adcx %%r14, %%r9 ;"
45398 +- "mulx 56(%1), %%rax, %%rcx ;" /* B[3]*B[0] */
45399 +- "adcx %%rax, %%r10 ;"
45400 +- "movq 56(%1), %%rdx ;" /* B[3] */
45401 +- "mulx 40(%1), %%r11, %%rbx ;" /* B[1]*B[3] */
45402 +- "adcx %%rcx, %%r11 ;"
45403 +- "mulx 48(%1), %%rax, %%r13 ;" /* B[2]*B[3] */
45404 +- "adcx %%rax, %%rbx ;"
45405 +- "movq 40(%1), %%rdx ;" /* B[1] */
45406 +- "adcx %%r15, %%r13 ;"
45407 +- "mulx 48(%1), %%rax, %%rcx ;" /* B[2]*B[1] */
45408 +- "movq $0, %%r14 ;"
45409 +- /******************************************/
45410 +- "adcx %%r15, %%r14 ;"
45411 +-
45412 +- "xorl %%r15d, %%r15d;"
45413 +- "adox %%rax, %%r10 ;"
45414 +- "adcx %%r8, %%r8 ;"
45415 +- "adox %%rcx, %%r11 ;"
45416 +- "adcx %%r9, %%r9 ;"
45417 +- "adox %%r15, %%rbx ;"
45418 +- "adcx %%r10, %%r10 ;"
45419 +- "adox %%r15, %%r13 ;"
45420 +- "adcx %%r11, %%r11 ;"
45421 +- "adox %%r15, %%r14 ;"
45422 +- "adcx %%rbx, %%rbx ;"
45423 +- "adcx %%r13, %%r13 ;"
45424 +- "adcx %%r14, %%r14 ;"
45425 +-
45426 +- "movq 32(%1), %%rdx ;"
45427 +- "mulx %%rdx, %%rax, %%rcx ;" /* B[0]^2 */
45428 +- /*******************/
45429 +- "movq %%rax, 64(%0) ;"
45430 +- "addq %%rcx, %%r8 ;"
45431 +- "movq %%r8, 72(%0) ;"
45432 +- "movq 40(%1), %%rdx ;"
45433 +- "mulx %%rdx, %%rax, %%rcx ;" /* B[1]^2 */
45434 +- "adcq %%rax, %%r9 ;"
45435 +- "movq %%r9, 80(%0) ;"
45436 +- "adcq %%rcx, %%r10 ;"
45437 +- "movq %%r10, 88(%0) ;"
45438 +- "movq 48(%1), %%rdx ;"
45439 +- "mulx %%rdx, %%rax, %%rcx ;" /* B[2]^2 */
45440 +- "adcq %%rax, %%r11 ;"
45441 +- "movq %%r11, 96(%0) ;"
45442 +- "adcq %%rcx, %%rbx ;"
45443 +- "movq %%rbx, 104(%0) ;"
45444 +- "movq 56(%1), %%rdx ;"
45445 +- "mulx %%rdx, %%rax, %%rcx ;" /* B[3]^2 */
45446 +- "adcq %%rax, %%r13 ;"
45447 +- "movq %%r13, 112(%0) ;"
45448 +- "adcq %%rcx, %%r14 ;"
45449 +- "movq %%r14, 120(%0) ;"
45450 +- :
45451 +- : "r"(c), "r"(a)
45452 +- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
45453 +- "%r10", "%r11", "%r13", "%r14", "%r15");
45454 ++ u64 x = a;
45455 ++ u64 y = b;
45456 ++ u64 x_xor_y = x ^ y;
45457 ++ u64 x_sub_y = x - y;
45458 ++ u64 x_sub_y_xor_y = x_sub_y ^ y;
45459 ++ u64 q = x_xor_y | x_sub_y_xor_y;
45460 ++ u64 x_xor_q = x ^ q;
45461 ++ u64 x_xor_q_ = x_xor_q >> (u32)63U;
45462 ++ return x_xor_q_ - (u64)1U;
45463 + }
45464 +
45465 +-static void sqr2_256x256_integer_bmi2(u64 *const c, const u64 *const a)
45466 ++/* Computes the addition of four-element f1 with value in f2
45467 ++ * and returns the carry (if any) */
45468 ++static inline u64 add_scalar(u64 *out, const u64 *f1, u64 f2)
45469 + {
45470 +- asm volatile(
45471 +- "movq 8(%1), %%rdx ;" /* A[1] */
45472 +- "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */
45473 +- "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */
45474 +- "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */
45475 +-
45476 +- "movq 16(%1), %%rdx ;" /* A[2] */
45477 +- "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */
45478 +- "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */
45479 +-
45480 +- "addq %%rax, %%r9 ;"
45481 +- "adcq %%rdx, %%r10 ;"
45482 +- "adcq %%rcx, %%r11 ;"
45483 +- "adcq %%r14, %%r15 ;"
45484 +- "adcq $0, %%r13 ;"
45485 +- "movq $0, %%r14 ;"
45486 +- "adcq $0, %%r14 ;"
45487 +-
45488 +- "movq (%1), %%rdx ;" /* A[0] */
45489 +- "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */
45490 +-
45491 +- "addq %%rax, %%r10 ;"
45492 +- "adcq %%rcx, %%r11 ;"
45493 +- "adcq $0, %%r15 ;"
45494 +- "adcq $0, %%r13 ;"
45495 +- "adcq $0, %%r14 ;"
45496 +-
45497 +- "shldq $1, %%r13, %%r14 ;"
45498 +- "shldq $1, %%r15, %%r13 ;"
45499 +- "shldq $1, %%r11, %%r15 ;"
45500 +- "shldq $1, %%r10, %%r11 ;"
45501 +- "shldq $1, %%r9, %%r10 ;"
45502 +- "shldq $1, %%r8, %%r9 ;"
45503 +- "shlq $1, %%r8 ;"
45504 +-
45505 +- /*******************/
45506 +- "mulx %%rdx, %%rax, %%rcx ; " /* A[0]^2 */
45507 +- /*******************/
45508 +- "movq %%rax, 0(%0) ;"
45509 +- "addq %%rcx, %%r8 ;"
45510 +- "movq %%r8, 8(%0) ;"
45511 +- "movq 8(%1), %%rdx ;"
45512 +- "mulx %%rdx, %%rax, %%rcx ; " /* A[1]^2 */
45513 +- "adcq %%rax, %%r9 ;"
45514 +- "movq %%r9, 16(%0) ;"
45515 +- "adcq %%rcx, %%r10 ;"
45516 +- "movq %%r10, 24(%0) ;"
45517 +- "movq 16(%1), %%rdx ;"
45518 +- "mulx %%rdx, %%rax, %%rcx ; " /* A[2]^2 */
45519 +- "adcq %%rax, %%r11 ;"
45520 +- "movq %%r11, 32(%0) ;"
45521 +- "adcq %%rcx, %%r15 ;"
45522 +- "movq %%r15, 40(%0) ;"
45523 +- "movq 24(%1), %%rdx ;"
45524 +- "mulx %%rdx, %%rax, %%rcx ; " /* A[3]^2 */
45525 +- "adcq %%rax, %%r13 ;"
45526 +- "movq %%r13, 48(%0) ;"
45527 +- "adcq %%rcx, %%r14 ;"
45528 +- "movq %%r14, 56(%0) ;"
45529 +-
45530 +- "movq 40(%1), %%rdx ;" /* B[1] */
45531 +- "mulx 32(%1), %%r8, %%r9 ;" /* B[0]*B[1] */
45532 +- "mulx 48(%1), %%r10, %%r11 ;" /* B[2]*B[1] */
45533 +- "mulx 56(%1), %%rcx, %%r14 ;" /* B[3]*B[1] */
45534 +-
45535 +- "movq 48(%1), %%rdx ;" /* B[2] */
45536 +- "mulx 56(%1), %%r15, %%r13 ;" /* B[3]*B[2] */
45537 +- "mulx 32(%1), %%rax, %%rdx ;" /* B[0]*B[2] */
45538 +-
45539 +- "addq %%rax, %%r9 ;"
45540 +- "adcq %%rdx, %%r10 ;"
45541 +- "adcq %%rcx, %%r11 ;"
45542 +- "adcq %%r14, %%r15 ;"
45543 +- "adcq $0, %%r13 ;"
45544 +- "movq $0, %%r14 ;"
45545 +- "adcq $0, %%r14 ;"
45546 +-
45547 +- "movq 32(%1), %%rdx ;" /* B[0] */
45548 +- "mulx 56(%1), %%rax, %%rcx ;" /* B[0]*B[3] */
45549 +-
45550 +- "addq %%rax, %%r10 ;"
45551 +- "adcq %%rcx, %%r11 ;"
45552 +- "adcq $0, %%r15 ;"
45553 +- "adcq $0, %%r13 ;"
45554 +- "adcq $0, %%r14 ;"
45555 +-
45556 +- "shldq $1, %%r13, %%r14 ;"
45557 +- "shldq $1, %%r15, %%r13 ;"
45558 +- "shldq $1, %%r11, %%r15 ;"
45559 +- "shldq $1, %%r10, %%r11 ;"
45560 +- "shldq $1, %%r9, %%r10 ;"
45561 +- "shldq $1, %%r8, %%r9 ;"
45562 +- "shlq $1, %%r8 ;"
45563 +-
45564 +- /*******************/
45565 +- "mulx %%rdx, %%rax, %%rcx ; " /* B[0]^2 */
45566 +- /*******************/
45567 +- "movq %%rax, 64(%0) ;"
45568 +- "addq %%rcx, %%r8 ;"
45569 +- "movq %%r8, 72(%0) ;"
45570 +- "movq 40(%1), %%rdx ;"
45571 +- "mulx %%rdx, %%rax, %%rcx ; " /* B[1]^2 */
45572 +- "adcq %%rax, %%r9 ;"
45573 +- "movq %%r9, 80(%0) ;"
45574 +- "adcq %%rcx, %%r10 ;"
45575 +- "movq %%r10, 88(%0) ;"
45576 +- "movq 48(%1), %%rdx ;"
45577 +- "mulx %%rdx, %%rax, %%rcx ; " /* B[2]^2 */
45578 +- "adcq %%rax, %%r11 ;"
45579 +- "movq %%r11, 96(%0) ;"
45580 +- "adcq %%rcx, %%r15 ;"
45581 +- "movq %%r15, 104(%0) ;"
45582 +- "movq 56(%1), %%rdx ;"
45583 +- "mulx %%rdx, %%rax, %%rcx ; " /* B[3]^2 */
45584 +- "adcq %%rax, %%r13 ;"
45585 +- "movq %%r13, 112(%0) ;"
45586 +- "adcq %%rcx, %%r14 ;"
45587 +- "movq %%r14, 120(%0) ;"
45588 +- :
45589 +- : "r"(c), "r"(a)
45590 +- : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
45591 +- "%r11", "%r13", "%r14", "%r15");
45592 +-}
45593 ++ u64 carry_r;
45594 +
45595 +-static void red_eltfp25519_2w_adx(u64 *const c, const u64 *const a)
45596 +-{
45597 + asm volatile(
45598 +- "movl $38, %%edx; " /* 2*c = 38 = 2^256 */
45599 +- "mulx 32(%1), %%r8, %%r10; " /* c*C[4] */
45600 +- "xorl %%ebx, %%ebx ;"
45601 +- "adox (%1), %%r8 ;"
45602 +- "mulx 40(%1), %%r9, %%r11; " /* c*C[5] */
45603 +- "adcx %%r10, %%r9 ;"
45604 +- "adox 8(%1), %%r9 ;"
45605 +- "mulx 48(%1), %%r10, %%rax; " /* c*C[6] */
45606 +- "adcx %%r11, %%r10 ;"
45607 +- "adox 16(%1), %%r10 ;"
45608 +- "mulx 56(%1), %%r11, %%rcx; " /* c*C[7] */
45609 +- "adcx %%rax, %%r11 ;"
45610 +- "adox 24(%1), %%r11 ;"
45611 +- /***************************************/
45612 +- "adcx %%rbx, %%rcx ;"
45613 +- "adox %%rbx, %%rcx ;"
45614 +- "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
45615 +- "adcx %%rcx, %%r8 ;"
45616 +- "adcx %%rbx, %%r9 ;"
45617 +- "movq %%r9, 8(%0) ;"
45618 +- "adcx %%rbx, %%r10 ;"
45619 +- "movq %%r10, 16(%0) ;"
45620 +- "adcx %%rbx, %%r11 ;"
45621 +- "movq %%r11, 24(%0) ;"
45622 +- "mov $0, %%ecx ;"
45623 +- "cmovc %%edx, %%ecx ;"
45624 +- "addq %%rcx, %%r8 ;"
45625 +- "movq %%r8, (%0) ;"
45626 +-
45627 +- "mulx 96(%1), %%r8, %%r10; " /* c*C[4] */
45628 +- "xorl %%ebx, %%ebx ;"
45629 +- "adox 64(%1), %%r8 ;"
45630 +- "mulx 104(%1), %%r9, %%r11; " /* c*C[5] */
45631 +- "adcx %%r10, %%r9 ;"
45632 +- "adox 72(%1), %%r9 ;"
45633 +- "mulx 112(%1), %%r10, %%rax; " /* c*C[6] */
45634 +- "adcx %%r11, %%r10 ;"
45635 +- "adox 80(%1), %%r10 ;"
45636 +- "mulx 120(%1), %%r11, %%rcx; " /* c*C[7] */
45637 +- "adcx %%rax, %%r11 ;"
45638 +- "adox 88(%1), %%r11 ;"
45639 +- /****************************************/
45640 +- "adcx %%rbx, %%rcx ;"
45641 +- "adox %%rbx, %%rcx ;"
45642 +- "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
45643 +- "adcx %%rcx, %%r8 ;"
45644 +- "adcx %%rbx, %%r9 ;"
45645 +- "movq %%r9, 40(%0) ;"
45646 +- "adcx %%rbx, %%r10 ;"
45647 +- "movq %%r10, 48(%0) ;"
45648 +- "adcx %%rbx, %%r11 ;"
45649 +- "movq %%r11, 56(%0) ;"
45650 +- "mov $0, %%ecx ;"
45651 +- "cmovc %%edx, %%ecx ;"
45652 +- "addq %%rcx, %%r8 ;"
45653 +- "movq %%r8, 32(%0) ;"
45654 +- :
45655 +- : "r"(c), "r"(a)
45656 +- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
45657 +- "%r10", "%r11");
45658 +-}
45659 ++ /* Clear registers to propagate the carry bit */
45660 ++ " xor %%r8, %%r8;"
45661 ++ " xor %%r9, %%r9;"
45662 ++ " xor %%r10, %%r10;"
45663 ++ " xor %%r11, %%r11;"
45664 ++ " xor %1, %1;"
45665 ++
45666 ++ /* Begin addition chain */
45667 ++ " addq 0(%3), %0;"
45668 ++ " movq %0, 0(%2);"
45669 ++ " adcxq 8(%3), %%r8;"
45670 ++ " movq %%r8, 8(%2);"
45671 ++ " adcxq 16(%3), %%r9;"
45672 ++ " movq %%r9, 16(%2);"
45673 ++ " adcxq 24(%3), %%r10;"
45674 ++ " movq %%r10, 24(%2);"
45675 ++
45676 ++ /* Return the carry bit in a register */
45677 ++ " adcx %%r11, %1;"
45678 ++ : "+&r" (f2), "=&r" (carry_r)
45679 ++ : "r" (out), "r" (f1)
45680 ++ : "%r8", "%r9", "%r10", "%r11", "memory", "cc"
45681 ++ );
45682 +
45683 +-static void red_eltfp25519_2w_bmi2(u64 *const c, const u64 *const a)
45684 +-{
45685 +- asm volatile(
45686 +- "movl $38, %%edx ; " /* 2*c = 38 = 2^256 */
45687 +- "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */
45688 +- "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */
45689 +- "addq %%r10, %%r9 ;"
45690 +- "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
45691 +- "adcq %%r11, %%r10 ;"
45692 +- "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
45693 +- "adcq %%rax, %%r11 ;"
45694 +- /***************************************/
45695 +- "adcq $0, %%rcx ;"
45696 +- "addq (%1), %%r8 ;"
45697 +- "adcq 8(%1), %%r9 ;"
45698 +- "adcq 16(%1), %%r10 ;"
45699 +- "adcq 24(%1), %%r11 ;"
45700 +- "adcq $0, %%rcx ;"
45701 +- "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
45702 +- "addq %%rcx, %%r8 ;"
45703 +- "adcq $0, %%r9 ;"
45704 +- "movq %%r9, 8(%0) ;"
45705 +- "adcq $0, %%r10 ;"
45706 +- "movq %%r10, 16(%0) ;"
45707 +- "adcq $0, %%r11 ;"
45708 +- "movq %%r11, 24(%0) ;"
45709 +- "mov $0, %%ecx ;"
45710 +- "cmovc %%edx, %%ecx ;"
45711 +- "addq %%rcx, %%r8 ;"
45712 +- "movq %%r8, (%0) ;"
45713 +-
45714 +- "mulx 96(%1), %%r8, %%r10 ;" /* c*C[4] */
45715 +- "mulx 104(%1), %%r9, %%r11 ;" /* c*C[5] */
45716 +- "addq %%r10, %%r9 ;"
45717 +- "mulx 112(%1), %%r10, %%rax ;" /* c*C[6] */
45718 +- "adcq %%r11, %%r10 ;"
45719 +- "mulx 120(%1), %%r11, %%rcx ;" /* c*C[7] */
45720 +- "adcq %%rax, %%r11 ;"
45721 +- /****************************************/
45722 +- "adcq $0, %%rcx ;"
45723 +- "addq 64(%1), %%r8 ;"
45724 +- "adcq 72(%1), %%r9 ;"
45725 +- "adcq 80(%1), %%r10 ;"
45726 +- "adcq 88(%1), %%r11 ;"
45727 +- "adcq $0, %%rcx ;"
45728 +- "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
45729 +- "addq %%rcx, %%r8 ;"
45730 +- "adcq $0, %%r9 ;"
45731 +- "movq %%r9, 40(%0) ;"
45732 +- "adcq $0, %%r10 ;"
45733 +- "movq %%r10, 48(%0) ;"
45734 +- "adcq $0, %%r11 ;"
45735 +- "movq %%r11, 56(%0) ;"
45736 +- "mov $0, %%ecx ;"
45737 +- "cmovc %%edx, %%ecx ;"
45738 +- "addq %%rcx, %%r8 ;"
45739 +- "movq %%r8, 32(%0) ;"
45740 +- :
45741 +- : "r"(c), "r"(a)
45742 +- : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
45743 +- "%r11");
45744 ++ return carry_r;
45745 + }
45746 +
45747 +-static void mul_256x256_integer_adx(u64 *const c, const u64 *const a,
45748 +- const u64 *const b)
45749 ++/* Computes the field addition of two field elements */
45750 ++static inline void fadd(u64 *out, const u64 *f1, const u64 *f2)
45751 + {
45752 + asm volatile(
45753 +- "movq (%1), %%rdx; " /* A[0] */
45754 +- "mulx (%2), %%r8, %%r9; " /* A[0]*B[0] */
45755 +- "xorl %%r10d, %%r10d ;"
45756 +- "movq %%r8, (%0) ;"
45757 +- "mulx 8(%2), %%r10, %%r11; " /* A[0]*B[1] */
45758 +- "adox %%r9, %%r10 ;"
45759 +- "movq %%r10, 8(%0) ;"
45760 +- "mulx 16(%2), %%r15, %%r13; " /* A[0]*B[2] */
45761 +- "adox %%r11, %%r15 ;"
45762 +- "mulx 24(%2), %%r14, %%rdx; " /* A[0]*B[3] */
45763 +- "adox %%r13, %%r14 ;"
45764 +- "movq $0, %%rax ;"
45765 +- /******************************************/
45766 +- "adox %%rdx, %%rax ;"
45767 +-
45768 +- "movq 8(%1), %%rdx; " /* A[1] */
45769 +- "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
45770 +- "xorl %%r10d, %%r10d ;"
45771 +- "adcx 8(%0), %%r8 ;"
45772 +- "movq %%r8, 8(%0) ;"
45773 +- "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
45774 +- "adox %%r9, %%r10 ;"
45775 +- "adcx %%r15, %%r10 ;"
45776 +- "movq %%r10, 16(%0) ;"
45777 +- "mulx 16(%2), %%r15, %%r13; " /* A[1]*B[2] */
45778 +- "adox %%r11, %%r15 ;"
45779 +- "adcx %%r14, %%r15 ;"
45780 +- "movq $0, %%r8 ;"
45781 +- "mulx 24(%2), %%r14, %%rdx; " /* A[1]*B[3] */
45782 +- "adox %%r13, %%r14 ;"
45783 +- "adcx %%rax, %%r14 ;"
45784 +- "movq $0, %%rax ;"
45785 +- /******************************************/
45786 +- "adox %%rdx, %%rax ;"
45787 +- "adcx %%r8, %%rax ;"
45788 +-
45789 +- "movq 16(%1), %%rdx; " /* A[2] */
45790 +- "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
45791 +- "xorl %%r10d, %%r10d ;"
45792 +- "adcx 16(%0), %%r8 ;"
45793 +- "movq %%r8, 16(%0) ;"
45794 +- "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
45795 +- "adox %%r9, %%r10 ;"
45796 +- "adcx %%r15, %%r10 ;"
45797 +- "movq %%r10, 24(%0) ;"
45798 +- "mulx 16(%2), %%r15, %%r13; " /* A[2]*B[2] */
45799 +- "adox %%r11, %%r15 ;"
45800 +- "adcx %%r14, %%r15 ;"
45801 +- "movq $0, %%r8 ;"
45802 +- "mulx 24(%2), %%r14, %%rdx; " /* A[2]*B[3] */
45803 +- "adox %%r13, %%r14 ;"
45804 +- "adcx %%rax, %%r14 ;"
45805 +- "movq $0, %%rax ;"
45806 +- /******************************************/
45807 +- "adox %%rdx, %%rax ;"
45808 +- "adcx %%r8, %%rax ;"
45809 +-
45810 +- "movq 24(%1), %%rdx; " /* A[3] */
45811 +- "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
45812 +- "xorl %%r10d, %%r10d ;"
45813 +- "adcx 24(%0), %%r8 ;"
45814 +- "movq %%r8, 24(%0) ;"
45815 +- "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
45816 +- "adox %%r9, %%r10 ;"
45817 +- "adcx %%r15, %%r10 ;"
45818 +- "movq %%r10, 32(%0) ;"
45819 +- "mulx 16(%2), %%r15, %%r13; " /* A[3]*B[2] */
45820 +- "adox %%r11, %%r15 ;"
45821 +- "adcx %%r14, %%r15 ;"
45822 +- "movq %%r15, 40(%0) ;"
45823 +- "movq $0, %%r8 ;"
45824 +- "mulx 24(%2), %%r14, %%rdx; " /* A[3]*B[3] */
45825 +- "adox %%r13, %%r14 ;"
45826 +- "adcx %%rax, %%r14 ;"
45827 +- "movq %%r14, 48(%0) ;"
45828 +- "movq $0, %%rax ;"
45829 +- /******************************************/
45830 +- "adox %%rdx, %%rax ;"
45831 +- "adcx %%r8, %%rax ;"
45832 +- "movq %%rax, 56(%0) ;"
45833 +- :
45834 +- : "r"(c), "r"(a), "r"(b)
45835 +- : "memory", "cc", "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11",
45836 +- "%r13", "%r14", "%r15");
45837 ++ /* Compute the raw addition of f1 + f2 */
45838 ++ " movq 0(%0), %%r8;"
45839 ++ " addq 0(%2), %%r8;"
45840 ++ " movq 8(%0), %%r9;"
45841 ++ " adcxq 8(%2), %%r9;"
45842 ++ " movq 16(%0), %%r10;"
45843 ++ " adcxq 16(%2), %%r10;"
45844 ++ " movq 24(%0), %%r11;"
45845 ++ " adcxq 24(%2), %%r11;"
45846 ++
45847 ++ /* Wrap the result back into the field */
45848 ++
45849 ++ /* Step 1: Compute carry*38 */
45850 ++ " mov $0, %%rax;"
45851 ++ " mov $38, %0;"
45852 ++ " cmovc %0, %%rax;"
45853 ++
45854 ++ /* Step 2: Add carry*38 to the original sum */
45855 ++ " xor %%rcx, %%rcx;"
45856 ++ " add %%rax, %%r8;"
45857 ++ " adcx %%rcx, %%r9;"
45858 ++ " movq %%r9, 8(%1);"
45859 ++ " adcx %%rcx, %%r10;"
45860 ++ " movq %%r10, 16(%1);"
45861 ++ " adcx %%rcx, %%r11;"
45862 ++ " movq %%r11, 24(%1);"
45863 ++
45864 ++ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
45865 ++ " mov $0, %%rax;"
45866 ++ " cmovc %0, %%rax;"
45867 ++ " add %%rax, %%r8;"
45868 ++ " movq %%r8, 0(%1);"
45869 ++ : "+&r" (f2)
45870 ++ : "r" (out), "r" (f1)
45871 ++ : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"
45872 ++ );
45873 + }
45874 +
45875 +-static void mul_256x256_integer_bmi2(u64 *const c, const u64 *const a,
45876 +- const u64 *const b)
45877 ++/* Computes the field substraction of two field elements */
45878 ++static inline void fsub(u64 *out, const u64 *f1, const u64 *f2)
45879 + {
45880 + asm volatile(
45881 +- "movq (%1), %%rdx; " /* A[0] */
45882 +- "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */
45883 +- "movq %%r8, (%0) ;"
45884 +- "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */
45885 +- "addq %%r10, %%r15 ;"
45886 +- "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */
45887 +- "adcq %%r8, %%rax ;"
45888 +- "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
45889 +- "adcq %%r10, %%rbx ;"
45890 +- /******************************************/
45891 +- "adcq $0, %%rcx ;"
45892 +-
45893 +- "movq 8(%1), %%rdx; " /* A[1] */
45894 +- "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
45895 +- "addq %%r15, %%r8 ;"
45896 +- "movq %%r8, 8(%0) ;"
45897 +- "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
45898 +- "adcq %%r10, %%r9 ;"
45899 +- "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */
45900 +- "adcq %%r8, %%r11 ;"
45901 +- "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
45902 +- "adcq %%r10, %%r13 ;"
45903 +- /******************************************/
45904 +- "adcq $0, %%r15 ;"
45905 +-
45906 +- "addq %%r9, %%rax ;"
45907 +- "adcq %%r11, %%rbx ;"
45908 +- "adcq %%r13, %%rcx ;"
45909 +- "adcq $0, %%r15 ;"
45910 +-
45911 +- "movq 16(%1), %%rdx; " /* A[2] */
45912 +- "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
45913 +- "addq %%rax, %%r8 ;"
45914 +- "movq %%r8, 16(%0) ;"
45915 +- "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
45916 +- "adcq %%r10, %%r9 ;"
45917 +- "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */
45918 +- "adcq %%r8, %%r11 ;"
45919 +- "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
45920 +- "adcq %%r10, %%r13 ;"
45921 +- /******************************************/
45922 +- "adcq $0, %%rax ;"
45923 +-
45924 +- "addq %%r9, %%rbx ;"
45925 +- "adcq %%r11, %%rcx ;"
45926 +- "adcq %%r13, %%r15 ;"
45927 +- "adcq $0, %%rax ;"
45928 +-
45929 +- "movq 24(%1), %%rdx; " /* A[3] */
45930 +- "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
45931 +- "addq %%rbx, %%r8 ;"
45932 +- "movq %%r8, 24(%0) ;"
45933 +- "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
45934 +- "adcq %%r10, %%r9 ;"
45935 +- "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */
45936 +- "adcq %%r8, %%r11 ;"
45937 +- "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
45938 +- "adcq %%r10, %%r13 ;"
45939 +- /******************************************/
45940 +- "adcq $0, %%rbx ;"
45941 +-
45942 +- "addq %%r9, %%rcx ;"
45943 +- "movq %%rcx, 32(%0) ;"
45944 +- "adcq %%r11, %%r15 ;"
45945 +- "movq %%r15, 40(%0) ;"
45946 +- "adcq %%r13, %%rax ;"
45947 +- "movq %%rax, 48(%0) ;"
45948 +- "adcq $0, %%rbx ;"
45949 +- "movq %%rbx, 56(%0) ;"
45950 +- :
45951 +- : "r"(c), "r"(a), "r"(b)
45952 +- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
45953 +- "%r10", "%r11", "%r13", "%r15");
45954 ++ /* Compute the raw substraction of f1-f2 */
45955 ++ " movq 0(%1), %%r8;"
45956 ++ " subq 0(%2), %%r8;"
45957 ++ " movq 8(%1), %%r9;"
45958 ++ " sbbq 8(%2), %%r9;"
45959 ++ " movq 16(%1), %%r10;"
45960 ++ " sbbq 16(%2), %%r10;"
45961 ++ " movq 24(%1), %%r11;"
45962 ++ " sbbq 24(%2), %%r11;"
45963 ++
45964 ++ /* Wrap the result back into the field */
45965 ++
45966 ++ /* Step 1: Compute carry*38 */
45967 ++ " mov $0, %%rax;"
45968 ++ " mov $38, %%rcx;"
45969 ++ " cmovc %%rcx, %%rax;"
45970 ++
45971 ++ /* Step 2: Substract carry*38 from the original difference */
45972 ++ " sub %%rax, %%r8;"
45973 ++ " sbb $0, %%r9;"
45974 ++ " sbb $0, %%r10;"
45975 ++ " sbb $0, %%r11;"
45976 ++
45977 ++ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
45978 ++ " mov $0, %%rax;"
45979 ++ " cmovc %%rcx, %%rax;"
45980 ++ " sub %%rax, %%r8;"
45981 ++
45982 ++ /* Store the result */
45983 ++ " movq %%r8, 0(%0);"
45984 ++ " movq %%r9, 8(%0);"
45985 ++ " movq %%r10, 16(%0);"
45986 ++ " movq %%r11, 24(%0);"
45987 ++ :
45988 ++ : "r" (out), "r" (f1), "r" (f2)
45989 ++ : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"
45990 ++ );
45991 + }
45992 +
45993 +-static void sqr_256x256_integer_adx(u64 *const c, const u64 *const a)
45994 ++/* Computes a field multiplication: out <- f1 * f2
45995 ++ * Uses the 8-element buffer tmp for intermediate results */
45996 ++static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
45997 + {
45998 + asm volatile(
45999 +- "movq (%1), %%rdx ;" /* A[0] */
46000 +- "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */
46001 +- "xorl %%r15d, %%r15d;"
46002 +- "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */
46003 +- "adcx %%r14, %%r9 ;"
46004 +- "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */
46005 +- "adcx %%rax, %%r10 ;"
46006 +- "movq 24(%1), %%rdx ;" /* A[3] */
46007 +- "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */
46008 +- "adcx %%rcx, %%r11 ;"
46009 +- "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */
46010 +- "adcx %%rax, %%rbx ;"
46011 +- "movq 8(%1), %%rdx ;" /* A[1] */
46012 +- "adcx %%r15, %%r13 ;"
46013 +- "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */
46014 +- "movq $0, %%r14 ;"
46015 +- /******************************************/
46016 +- "adcx %%r15, %%r14 ;"
46017 +-
46018 +- "xorl %%r15d, %%r15d;"
46019 +- "adox %%rax, %%r10 ;"
46020 +- "adcx %%r8, %%r8 ;"
46021 +- "adox %%rcx, %%r11 ;"
46022 +- "adcx %%r9, %%r9 ;"
46023 +- "adox %%r15, %%rbx ;"
46024 +- "adcx %%r10, %%r10 ;"
46025 +- "adox %%r15, %%r13 ;"
46026 +- "adcx %%r11, %%r11 ;"
46027 +- "adox %%r15, %%r14 ;"
46028 +- "adcx %%rbx, %%rbx ;"
46029 +- "adcx %%r13, %%r13 ;"
46030 +- "adcx %%r14, %%r14 ;"
46031 +-
46032 +- "movq (%1), %%rdx ;"
46033 +- "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
46034 +- /*******************/
46035 +- "movq %%rax, 0(%0) ;"
46036 +- "addq %%rcx, %%r8 ;"
46037 +- "movq %%r8, 8(%0) ;"
46038 +- "movq 8(%1), %%rdx ;"
46039 +- "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
46040 +- "adcq %%rax, %%r9 ;"
46041 +- "movq %%r9, 16(%0) ;"
46042 +- "adcq %%rcx, %%r10 ;"
46043 +- "movq %%r10, 24(%0) ;"
46044 +- "movq 16(%1), %%rdx ;"
46045 +- "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
46046 +- "adcq %%rax, %%r11 ;"
46047 +- "movq %%r11, 32(%0) ;"
46048 +- "adcq %%rcx, %%rbx ;"
46049 +- "movq %%rbx, 40(%0) ;"
46050 +- "movq 24(%1), %%rdx ;"
46051 +- "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
46052 +- "adcq %%rax, %%r13 ;"
46053 +- "movq %%r13, 48(%0) ;"
46054 +- "adcq %%rcx, %%r14 ;"
46055 +- "movq %%r14, 56(%0) ;"
46056 +- :
46057 +- : "r"(c), "r"(a)
46058 +- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
46059 +- "%r10", "%r11", "%r13", "%r14", "%r15");
46060 ++ /* Compute the raw multiplication: tmp <- src1 * src2 */
46061 ++
46062 ++ /* Compute src1[0] * src2 */
46063 ++ " movq 0(%1), %%rdx;"
46064 ++ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);"
46065 ++ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);"
46066 ++ " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;"
46067 ++ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
46068 ++ " adox %%rdx, %%rax;"
46069 ++ /* Compute src1[1] * src2 */
46070 ++ " movq 8(%1), %%rdx;"
46071 ++ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
46072 ++ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 16(%0);"
46073 ++ " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
46074 ++ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
46075 ++ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
46076 ++ /* Compute src1[2] * src2 */
46077 ++ " movq 16(%1), %%rdx;"
46078 ++ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
46079 ++ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 24(%0);"
46080 ++ " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
46081 ++ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
46082 ++ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
46083 ++ /* Compute src1[3] * src2 */
46084 ++ " movq 24(%1), %%rdx;"
46085 ++ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
46086 ++ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 32(%0);"
46087 ++ " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 40(%0);" " mov $0, %%r8;"
46088 ++ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;"
46089 ++ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);"
46090 ++ /* Line up pointers */
46091 ++ " mov %0, %1;"
46092 ++ " mov %2, %0;"
46093 ++
46094 ++ /* Wrap the result back into the field */
46095 ++
46096 ++ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
46097 ++ " mov $38, %%rdx;"
46098 ++ " mulxq 32(%1), %%r8, %%r13;"
46099 ++ " xor %3, %3;"
46100 ++ " adoxq 0(%1), %%r8;"
46101 ++ " mulxq 40(%1), %%r9, %%r12;"
46102 ++ " adcx %%r13, %%r9;"
46103 ++ " adoxq 8(%1), %%r9;"
46104 ++ " mulxq 48(%1), %%r10, %%r13;"
46105 ++ " adcx %%r12, %%r10;"
46106 ++ " adoxq 16(%1), %%r10;"
46107 ++ " mulxq 56(%1), %%r11, %%rax;"
46108 ++ " adcx %%r13, %%r11;"
46109 ++ " adoxq 24(%1), %%r11;"
46110 ++ " adcx %3, %%rax;"
46111 ++ " adox %3, %%rax;"
46112 ++ " imul %%rdx, %%rax;"
46113 ++
46114 ++ /* Step 2: Fold the carry back into dst */
46115 ++ " add %%rax, %%r8;"
46116 ++ " adcx %3, %%r9;"
46117 ++ " movq %%r9, 8(%0);"
46118 ++ " adcx %3, %%r10;"
46119 ++ " movq %%r10, 16(%0);"
46120 ++ " adcx %3, %%r11;"
46121 ++ " movq %%r11, 24(%0);"
46122 ++
46123 ++ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
46124 ++ " mov $0, %%rax;"
46125 ++ " cmovc %%rdx, %%rax;"
46126 ++ " add %%rax, %%r8;"
46127 ++ " movq %%r8, 0(%0);"
46128 ++ : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2)
46129 ++ :
46130 ++ : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "memory", "cc"
46131 ++ );
46132 + }
46133 +
46134 +-static void sqr_256x256_integer_bmi2(u64 *const c, const u64 *const a)
46135 ++/* Computes two field multiplications:
46136 ++ * out[0] <- f1[0] * f2[0]
46137 ++ * out[1] <- f1[1] * f2[1]
46138 ++ * Uses the 16-element buffer tmp for intermediate results. */
46139 ++static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
46140 + {
46141 + asm volatile(
46142 +- "movq 8(%1), %%rdx ;" /* A[1] */
46143 +- "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */
46144 +- "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */
46145 +- "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */
46146 +-
46147 +- "movq 16(%1), %%rdx ;" /* A[2] */
46148 +- "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */
46149 +- "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */
46150 +-
46151 +- "addq %%rax, %%r9 ;"
46152 +- "adcq %%rdx, %%r10 ;"
46153 +- "adcq %%rcx, %%r11 ;"
46154 +- "adcq %%r14, %%r15 ;"
46155 +- "adcq $0, %%r13 ;"
46156 +- "movq $0, %%r14 ;"
46157 +- "adcq $0, %%r14 ;"
46158 +-
46159 +- "movq (%1), %%rdx ;" /* A[0] */
46160 +- "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */
46161 +-
46162 +- "addq %%rax, %%r10 ;"
46163 +- "adcq %%rcx, %%r11 ;"
46164 +- "adcq $0, %%r15 ;"
46165 +- "adcq $0, %%r13 ;"
46166 +- "adcq $0, %%r14 ;"
46167 +-
46168 +- "shldq $1, %%r13, %%r14 ;"
46169 +- "shldq $1, %%r15, %%r13 ;"
46170 +- "shldq $1, %%r11, %%r15 ;"
46171 +- "shldq $1, %%r10, %%r11 ;"
46172 +- "shldq $1, %%r9, %%r10 ;"
46173 +- "shldq $1, %%r8, %%r9 ;"
46174 +- "shlq $1, %%r8 ;"
46175 +-
46176 +- /*******************/
46177 +- "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
46178 +- /*******************/
46179 +- "movq %%rax, 0(%0) ;"
46180 +- "addq %%rcx, %%r8 ;"
46181 +- "movq %%r8, 8(%0) ;"
46182 +- "movq 8(%1), %%rdx ;"
46183 +- "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
46184 +- "adcq %%rax, %%r9 ;"
46185 +- "movq %%r9, 16(%0) ;"
46186 +- "adcq %%rcx, %%r10 ;"
46187 +- "movq %%r10, 24(%0) ;"
46188 +- "movq 16(%1), %%rdx ;"
46189 +- "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
46190 +- "adcq %%rax, %%r11 ;"
46191 +- "movq %%r11, 32(%0) ;"
46192 +- "adcq %%rcx, %%r15 ;"
46193 +- "movq %%r15, 40(%0) ;"
46194 +- "movq 24(%1), %%rdx ;"
46195 +- "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
46196 +- "adcq %%rax, %%r13 ;"
46197 +- "movq %%r13, 48(%0) ;"
46198 +- "adcq %%rcx, %%r14 ;"
46199 +- "movq %%r14, 56(%0) ;"
46200 +- :
46201 +- : "r"(c), "r"(a)
46202 +- : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
46203 +- "%r11", "%r13", "%r14", "%r15");
46204 ++ /* Compute the raw multiplication tmp[0] <- f1[0] * f2[0] */
46205 ++
46206 ++ /* Compute src1[0] * src2 */
46207 ++ " movq 0(%1), %%rdx;"
46208 ++ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);"
46209 ++ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);"
46210 ++ " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;"
46211 ++ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
46212 ++ " adox %%rdx, %%rax;"
46213 ++ /* Compute src1[1] * src2 */
46214 ++ " movq 8(%1), %%rdx;"
46215 ++ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
46216 ++ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 16(%0);"
46217 ++ " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
46218 ++ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
46219 ++ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
46220 ++ /* Compute src1[2] * src2 */
46221 ++ " movq 16(%1), %%rdx;"
46222 ++ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
46223 ++ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 24(%0);"
46224 ++ " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
46225 ++ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
46226 ++ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
46227 ++ /* Compute src1[3] * src2 */
46228 ++ " movq 24(%1), %%rdx;"
46229 ++ " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
46230 ++ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 32(%0);"
46231 ++ " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 40(%0);" " mov $0, %%r8;"
46232 ++ " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;"
46233 ++ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);"
46234 ++
46235 ++ /* Compute the raw multiplication tmp[1] <- f1[1] * f2[1] */
46236 ++
46237 ++ /* Compute src1[0] * src2 */
46238 ++ " movq 32(%1), %%rdx;"
46239 ++ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 64(%0);"
46240 ++ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 72(%0);"
46241 ++ " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;"
46242 ++ " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
46243 ++ " adox %%rdx, %%rax;"
46244 ++ /* Compute src1[1] * src2 */
46245 ++ " movq 40(%1), %%rdx;"
46246 ++ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 72(%0), %%r8;" " movq %%r8, 72(%0);"
46247 ++ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 80(%0);"
46248 ++ " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
46249 ++ " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
46250 ++ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
46251 ++ /* Compute src1[2] * src2 */
46252 ++ " movq 48(%1), %%rdx;"
46253 ++ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 80(%0), %%r8;" " movq %%r8, 80(%0);"
46254 ++ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 88(%0);"
46255 ++ " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
46256 ++ " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
46257 ++ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
46258 ++ /* Compute src1[3] * src2 */
46259 ++ " movq 56(%1), %%rdx;"
46260 ++ " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 88(%0), %%r8;" " movq %%r8, 88(%0);"
46261 ++ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 96(%0);"
46262 ++ " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 104(%0);" " mov $0, %%r8;"
46263 ++ " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 112(%0);" " mov $0, %%rax;"
46264 ++ " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 120(%0);"
46265 ++ /* Line up pointers */
46266 ++ " mov %0, %1;"
46267 ++ " mov %2, %0;"
46268 ++
46269 ++ /* Wrap the results back into the field */
46270 ++
46271 ++ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
46272 ++ " mov $38, %%rdx;"
46273 ++ " mulxq 32(%1), %%r8, %%r13;"
46274 ++ " xor %3, %3;"
46275 ++ " adoxq 0(%1), %%r8;"
46276 ++ " mulxq 40(%1), %%r9, %%r12;"
46277 ++ " adcx %%r13, %%r9;"
46278 ++ " adoxq 8(%1), %%r9;"
46279 ++ " mulxq 48(%1), %%r10, %%r13;"
46280 ++ " adcx %%r12, %%r10;"
46281 ++ " adoxq 16(%1), %%r10;"
46282 ++ " mulxq 56(%1), %%r11, %%rax;"
46283 ++ " adcx %%r13, %%r11;"
46284 ++ " adoxq 24(%1), %%r11;"
46285 ++ " adcx %3, %%rax;"
46286 ++ " adox %3, %%rax;"
46287 ++ " imul %%rdx, %%rax;"
46288 ++
46289 ++ /* Step 2: Fold the carry back into dst */
46290 ++ " add %%rax, %%r8;"
46291 ++ " adcx %3, %%r9;"
46292 ++ " movq %%r9, 8(%0);"
46293 ++ " adcx %3, %%r10;"
46294 ++ " movq %%r10, 16(%0);"
46295 ++ " adcx %3, %%r11;"
46296 ++ " movq %%r11, 24(%0);"
46297 ++
46298 ++ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
46299 ++ " mov $0, %%rax;"
46300 ++ " cmovc %%rdx, %%rax;"
46301 ++ " add %%rax, %%r8;"
46302 ++ " movq %%r8, 0(%0);"
46303 ++
46304 ++ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
46305 ++ " mov $38, %%rdx;"
46306 ++ " mulxq 96(%1), %%r8, %%r13;"
46307 ++ " xor %3, %3;"
46308 ++ " adoxq 64(%1), %%r8;"
46309 ++ " mulxq 104(%1), %%r9, %%r12;"
46310 ++ " adcx %%r13, %%r9;"
46311 ++ " adoxq 72(%1), %%r9;"
46312 ++ " mulxq 112(%1), %%r10, %%r13;"
46313 ++ " adcx %%r12, %%r10;"
46314 ++ " adoxq 80(%1), %%r10;"
46315 ++ " mulxq 120(%1), %%r11, %%rax;"
46316 ++ " adcx %%r13, %%r11;"
46317 ++ " adoxq 88(%1), %%r11;"
46318 ++ " adcx %3, %%rax;"
46319 ++ " adox %3, %%rax;"
46320 ++ " imul %%rdx, %%rax;"
46321 ++
46322 ++ /* Step 2: Fold the carry back into dst */
46323 ++ " add %%rax, %%r8;"
46324 ++ " adcx %3, %%r9;"
46325 ++ " movq %%r9, 40(%0);"
46326 ++ " adcx %3, %%r10;"
46327 ++ " movq %%r10, 48(%0);"
46328 ++ " adcx %3, %%r11;"
46329 ++ " movq %%r11, 56(%0);"
46330 ++
46331 ++ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
46332 ++ " mov $0, %%rax;"
46333 ++ " cmovc %%rdx, %%rax;"
46334 ++ " add %%rax, %%r8;"
46335 ++ " movq %%r8, 32(%0);"
46336 ++ : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2)
46337 ++ :
46338 ++ : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "memory", "cc"
46339 ++ );
46340 + }
46341 +
46342 +-static void red_eltfp25519_1w_adx(u64 *const c, const u64 *const a)
46343 ++/* Computes the field multiplication of four-element f1 with value in f2 */
46344 ++static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2)
46345 + {
46346 +- asm volatile(
46347 +- "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */
46348 +- "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */
46349 +- "xorl %%ebx, %%ebx ;"
46350 +- "adox (%1), %%r8 ;"
46351 +- "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */
46352 +- "adcx %%r10, %%r9 ;"
46353 +- "adox 8(%1), %%r9 ;"
46354 +- "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
46355 +- "adcx %%r11, %%r10 ;"
46356 +- "adox 16(%1), %%r10 ;"
46357 +- "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
46358 +- "adcx %%rax, %%r11 ;"
46359 +- "adox 24(%1), %%r11 ;"
46360 +- /***************************************/
46361 +- "adcx %%rbx, %%rcx ;"
46362 +- "adox %%rbx, %%rcx ;"
46363 +- "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
46364 +- "adcx %%rcx, %%r8 ;"
46365 +- "adcx %%rbx, %%r9 ;"
46366 +- "movq %%r9, 8(%0) ;"
46367 +- "adcx %%rbx, %%r10 ;"
46368 +- "movq %%r10, 16(%0) ;"
46369 +- "adcx %%rbx, %%r11 ;"
46370 +- "movq %%r11, 24(%0) ;"
46371 +- "mov $0, %%ecx ;"
46372 +- "cmovc %%edx, %%ecx ;"
46373 +- "addq %%rcx, %%r8 ;"
46374 +- "movq %%r8, (%0) ;"
46375 +- :
46376 +- : "r"(c), "r"(a)
46377 +- : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
46378 +- "%r10", "%r11");
46379 +-}
46380 ++ register u64 f2_r asm("rdx") = f2;
46381 +
46382 +-static void red_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a)
46383 +-{
46384 + asm volatile(
46385 +- "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */
46386 +- "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */
46387 +- "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */
46388 +- "addq %%r10, %%r9 ;"
46389 +- "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
46390 +- "adcq %%r11, %%r10 ;"
46391 +- "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
46392 +- "adcq %%rax, %%r11 ;"
46393 +- /***************************************/
46394 +- "adcq $0, %%rcx ;"
46395 +- "addq (%1), %%r8 ;"
46396 +- "adcq 8(%1), %%r9 ;"
46397 +- "adcq 16(%1), %%r10 ;"
46398 +- "adcq 24(%1), %%r11 ;"
46399 +- "adcq $0, %%rcx ;"
46400 +- "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
46401 +- "addq %%rcx, %%r8 ;"
46402 +- "adcq $0, %%r9 ;"
46403 +- "movq %%r9, 8(%0) ;"
46404 +- "adcq $0, %%r10 ;"
46405 +- "movq %%r10, 16(%0) ;"
46406 +- "adcq $0, %%r11 ;"
46407 +- "movq %%r11, 24(%0) ;"
46408 +- "mov $0, %%ecx ;"
46409 +- "cmovc %%edx, %%ecx ;"
46410 +- "addq %%rcx, %%r8 ;"
46411 +- "movq %%r8, (%0) ;"
46412 +- :
46413 +- : "r"(c), "r"(a)
46414 +- : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
46415 +- "%r11");
46416 ++ /* Compute the raw multiplication of f1*f2 */
46417 ++ " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */
46418 ++ " mulxq 8(%2), %%r9, %%r12;" /* f1[1]*f2 */
46419 ++ " add %%rcx, %%r9;"
46420 ++ " mov $0, %%rcx;"
46421 ++ " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */
46422 ++ " adcx %%r12, %%r10;"
46423 ++ " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */
46424 ++ " adcx %%r13, %%r11;"
46425 ++ " adcx %%rcx, %%rax;"
46426 ++
46427 ++ /* Wrap the result back into the field */
46428 ++
46429 ++ /* Step 1: Compute carry*38 */
46430 ++ " mov $38, %%rdx;"
46431 ++ " imul %%rdx, %%rax;"
46432 ++
46433 ++ /* Step 2: Fold the carry back into dst */
46434 ++ " add %%rax, %%r8;"
46435 ++ " adcx %%rcx, %%r9;"
46436 ++ " movq %%r9, 8(%1);"
46437 ++ " adcx %%rcx, %%r10;"
46438 ++ " movq %%r10, 16(%1);"
46439 ++ " adcx %%rcx, %%r11;"
46440 ++ " movq %%r11, 24(%1);"
46441 ++
46442 ++ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
46443 ++ " mov $0, %%rax;"
46444 ++ " cmovc %%rdx, %%rax;"
46445 ++ " add %%rax, %%r8;"
46446 ++ " movq %%r8, 0(%1);"
46447 ++ : "+&r" (f2_r)
46448 ++ : "r" (out), "r" (f1)
46449 ++ : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "memory", "cc"
46450 ++ );
46451 + }
46452 +
46453 +-static __always_inline void
46454 +-add_eltfp25519_1w_adx(u64 *const c, const u64 *const a, const u64 *const b)
46455 ++/* Computes p1 <- bit ? p2 : p1 in constant time */
46456 ++static inline void cswap2(u64 bit, const u64 *p1, const u64 *p2)
46457 + {
46458 + asm volatile(
46459 +- "mov $38, %%eax ;"
46460 +- "xorl %%ecx, %%ecx ;"
46461 +- "movq (%2), %%r8 ;"
46462 +- "adcx (%1), %%r8 ;"
46463 +- "movq 8(%2), %%r9 ;"
46464 +- "adcx 8(%1), %%r9 ;"
46465 +- "movq 16(%2), %%r10 ;"
46466 +- "adcx 16(%1), %%r10 ;"
46467 +- "movq 24(%2), %%r11 ;"
46468 +- "adcx 24(%1), %%r11 ;"
46469 +- "cmovc %%eax, %%ecx ;"
46470 +- "xorl %%eax, %%eax ;"
46471 +- "adcx %%rcx, %%r8 ;"
46472 +- "adcx %%rax, %%r9 ;"
46473 +- "movq %%r9, 8(%0) ;"
46474 +- "adcx %%rax, %%r10 ;"
46475 +- "movq %%r10, 16(%0) ;"
46476 +- "adcx %%rax, %%r11 ;"
46477 +- "movq %%r11, 24(%0) ;"
46478 +- "mov $38, %%ecx ;"
46479 +- "cmovc %%ecx, %%eax ;"
46480 +- "addq %%rax, %%r8 ;"
46481 +- "movq %%r8, (%0) ;"
46482 +- :
46483 +- : "r"(c), "r"(a), "r"(b)
46484 +- : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
46485 ++ /* Invert the polarity of bit to match cmov expectations */
46486 ++ " add $18446744073709551615, %0;"
46487 ++
46488 ++ /* cswap p1[0], p2[0] */
46489 ++ " movq 0(%1), %%r8;"
46490 ++ " movq 0(%2), %%r9;"
46491 ++ " mov %%r8, %%r10;"
46492 ++ " cmovc %%r9, %%r8;"
46493 ++ " cmovc %%r10, %%r9;"
46494 ++ " movq %%r8, 0(%1);"
46495 ++ " movq %%r9, 0(%2);"
46496 ++
46497 ++ /* cswap p1[1], p2[1] */
46498 ++ " movq 8(%1), %%r8;"
46499 ++ " movq 8(%2), %%r9;"
46500 ++ " mov %%r8, %%r10;"
46501 ++ " cmovc %%r9, %%r8;"
46502 ++ " cmovc %%r10, %%r9;"
46503 ++ " movq %%r8, 8(%1);"
46504 ++ " movq %%r9, 8(%2);"
46505 ++
46506 ++ /* cswap p1[2], p2[2] */
46507 ++ " movq 16(%1), %%r8;"
46508 ++ " movq 16(%2), %%r9;"
46509 ++ " mov %%r8, %%r10;"
46510 ++ " cmovc %%r9, %%r8;"
46511 ++ " cmovc %%r10, %%r9;"
46512 ++ " movq %%r8, 16(%1);"
46513 ++ " movq %%r9, 16(%2);"
46514 ++
46515 ++ /* cswap p1[3], p2[3] */
46516 ++ " movq 24(%1), %%r8;"
46517 ++ " movq 24(%2), %%r9;"
46518 ++ " mov %%r8, %%r10;"
46519 ++ " cmovc %%r9, %%r8;"
46520 ++ " cmovc %%r10, %%r9;"
46521 ++ " movq %%r8, 24(%1);"
46522 ++ " movq %%r9, 24(%2);"
46523 ++
46524 ++ /* cswap p1[4], p2[4] */
46525 ++ " movq 32(%1), %%r8;"
46526 ++ " movq 32(%2), %%r9;"
46527 ++ " mov %%r8, %%r10;"
46528 ++ " cmovc %%r9, %%r8;"
46529 ++ " cmovc %%r10, %%r9;"
46530 ++ " movq %%r8, 32(%1);"
46531 ++ " movq %%r9, 32(%2);"
46532 ++
46533 ++ /* cswap p1[5], p2[5] */
46534 ++ " movq 40(%1), %%r8;"
46535 ++ " movq 40(%2), %%r9;"
46536 ++ " mov %%r8, %%r10;"
46537 ++ " cmovc %%r9, %%r8;"
46538 ++ " cmovc %%r10, %%r9;"
46539 ++ " movq %%r8, 40(%1);"
46540 ++ " movq %%r9, 40(%2);"
46541 ++
46542 ++ /* cswap p1[6], p2[6] */
46543 ++ " movq 48(%1), %%r8;"
46544 ++ " movq 48(%2), %%r9;"
46545 ++ " mov %%r8, %%r10;"
46546 ++ " cmovc %%r9, %%r8;"
46547 ++ " cmovc %%r10, %%r9;"
46548 ++ " movq %%r8, 48(%1);"
46549 ++ " movq %%r9, 48(%2);"
46550 ++
46551 ++ /* cswap p1[7], p2[7] */
46552 ++ " movq 56(%1), %%r8;"
46553 ++ " movq 56(%2), %%r9;"
46554 ++ " mov %%r8, %%r10;"
46555 ++ " cmovc %%r9, %%r8;"
46556 ++ " cmovc %%r10, %%r9;"
46557 ++ " movq %%r8, 56(%1);"
46558 ++ " movq %%r9, 56(%2);"
46559 ++ : "+&r" (bit)
46560 ++ : "r" (p1), "r" (p2)
46561 ++ : "%r8", "%r9", "%r10", "memory", "cc"
46562 ++ );
46563 + }
46564 +
46565 +-static __always_inline void
46566 +-add_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a, const u64 *const b)
46567 ++/* Computes the square of a field element: out <- f * f
46568 ++ * Uses the 8-element buffer tmp for intermediate results */
46569 ++static inline void fsqr(u64 *out, const u64 *f, u64 *tmp)
46570 + {
46571 + asm volatile(
46572 +- "mov $38, %%eax ;"
46573 +- "movq (%2), %%r8 ;"
46574 +- "addq (%1), %%r8 ;"
46575 +- "movq 8(%2), %%r9 ;"
46576 +- "adcq 8(%1), %%r9 ;"
46577 +- "movq 16(%2), %%r10 ;"
46578 +- "adcq 16(%1), %%r10 ;"
46579 +- "movq 24(%2), %%r11 ;"
46580 +- "adcq 24(%1), %%r11 ;"
46581 +- "mov $0, %%ecx ;"
46582 +- "cmovc %%eax, %%ecx ;"
46583 +- "addq %%rcx, %%r8 ;"
46584 +- "adcq $0, %%r9 ;"
46585 +- "movq %%r9, 8(%0) ;"
46586 +- "adcq $0, %%r10 ;"
46587 +- "movq %%r10, 16(%0) ;"
46588 +- "adcq $0, %%r11 ;"
46589 +- "movq %%r11, 24(%0) ;"
46590 +- "mov $0, %%ecx ;"
46591 +- "cmovc %%eax, %%ecx ;"
46592 +- "addq %%rcx, %%r8 ;"
46593 +- "movq %%r8, (%0) ;"
46594 +- :
46595 +- : "r"(c), "r"(a), "r"(b)
46596 +- : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
46597 ++ /* Compute the raw multiplication: tmp <- f * f */
46598 ++
46599 ++ /* Step 1: Compute all partial products */
46600 ++ " movq 0(%1), %%rdx;" /* f[0] */
46601 ++ " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */
46602 ++ " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
46603 ++ " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
46604 ++ " movq 24(%1), %%rdx;" /* f[3] */
46605 ++ " mulxq 8(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
46606 ++ " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */
46607 ++ " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */
46608 ++ " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
46609 ++
46610 ++ /* Step 2: Compute two parallel carry chains */
46611 ++ " xor %%r15, %%r15;"
46612 ++ " adox %%rax, %%r10;"
46613 ++ " adcx %%r8, %%r8;"
46614 ++ " adox %%rcx, %%r11;"
46615 ++ " adcx %%r9, %%r9;"
46616 ++ " adox %%r15, %%r12;"
46617 ++ " adcx %%r10, %%r10;"
46618 ++ " adox %%r15, %%r13;"
46619 ++ " adcx %%r11, %%r11;"
46620 ++ " adox %%r15, %%r14;"
46621 ++ " adcx %%r12, %%r12;"
46622 ++ " adcx %%r13, %%r13;"
46623 ++ " adcx %%r14, %%r14;"
46624 ++
46625 ++ /* Step 3: Compute intermediate squares */
46626 ++ " movq 0(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
46627 ++ " movq %%rax, 0(%0);"
46628 ++ " add %%rcx, %%r8;" " movq %%r8, 8(%0);"
46629 ++ " movq 8(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
46630 ++ " adcx %%rax, %%r9;" " movq %%r9, 16(%0);"
46631 ++ " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);"
46632 ++ " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
46633 ++ " adcx %%rax, %%r11;" " movq %%r11, 32(%0);"
46634 ++ " adcx %%rcx, %%r12;" " movq %%r12, 40(%0);"
46635 ++ " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
46636 ++ " adcx %%rax, %%r13;" " movq %%r13, 48(%0);"
46637 ++ " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);"
46638 ++
46639 ++ /* Line up pointers */
46640 ++ " mov %0, %1;"
46641 ++ " mov %2, %0;"
46642 ++
46643 ++ /* Wrap the result back into the field */
46644 ++
46645 ++ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
46646 ++ " mov $38, %%rdx;"
46647 ++ " mulxq 32(%1), %%r8, %%r13;"
46648 ++ " xor %%rcx, %%rcx;"
46649 ++ " adoxq 0(%1), %%r8;"
46650 ++ " mulxq 40(%1), %%r9, %%r12;"
46651 ++ " adcx %%r13, %%r9;"
46652 ++ " adoxq 8(%1), %%r9;"
46653 ++ " mulxq 48(%1), %%r10, %%r13;"
46654 ++ " adcx %%r12, %%r10;"
46655 ++ " adoxq 16(%1), %%r10;"
46656 ++ " mulxq 56(%1), %%r11, %%rax;"
46657 ++ " adcx %%r13, %%r11;"
46658 ++ " adoxq 24(%1), %%r11;"
46659 ++ " adcx %%rcx, %%rax;"
46660 ++ " adox %%rcx, %%rax;"
46661 ++ " imul %%rdx, %%rax;"
46662 ++
46663 ++ /* Step 2: Fold the carry back into dst */
46664 ++ " add %%rax, %%r8;"
46665 ++ " adcx %%rcx, %%r9;"
46666 ++ " movq %%r9, 8(%0);"
46667 ++ " adcx %%rcx, %%r10;"
46668 ++ " movq %%r10, 16(%0);"
46669 ++ " adcx %%rcx, %%r11;"
46670 ++ " movq %%r11, 24(%0);"
46671 ++
46672 ++ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
46673 ++ " mov $0, %%rax;"
46674 ++ " cmovc %%rdx, %%rax;"
46675 ++ " add %%rax, %%r8;"
46676 ++ " movq %%r8, 0(%0);"
46677 ++ : "+&r" (tmp), "+&r" (f), "+&r" (out)
46678 ++ :
46679 ++ : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "memory", "cc"
46680 ++ );
46681 + }
46682 +
46683 +-static __always_inline void
46684 +-sub_eltfp25519_1w(u64 *const c, const u64 *const a, const u64 *const b)
46685 ++/* Computes two field squarings:
46686 ++ * out[0] <- f[0] * f[0]
46687 ++ * out[1] <- f[1] * f[1]
46688 ++ * Uses the 16-element buffer tmp for intermediate results */
46689 ++static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
46690 + {
46691 + asm volatile(
46692 +- "mov $38, %%eax ;"
46693 +- "movq (%1), %%r8 ;"
46694 +- "subq (%2), %%r8 ;"
46695 +- "movq 8(%1), %%r9 ;"
46696 +- "sbbq 8(%2), %%r9 ;"
46697 +- "movq 16(%1), %%r10 ;"
46698 +- "sbbq 16(%2), %%r10 ;"
46699 +- "movq 24(%1), %%r11 ;"
46700 +- "sbbq 24(%2), %%r11 ;"
46701 +- "mov $0, %%ecx ;"
46702 +- "cmovc %%eax, %%ecx ;"
46703 +- "subq %%rcx, %%r8 ;"
46704 +- "sbbq $0, %%r9 ;"
46705 +- "movq %%r9, 8(%0) ;"
46706 +- "sbbq $0, %%r10 ;"
46707 +- "movq %%r10, 16(%0) ;"
46708 +- "sbbq $0, %%r11 ;"
46709 +- "movq %%r11, 24(%0) ;"
46710 +- "mov $0, %%ecx ;"
46711 +- "cmovc %%eax, %%ecx ;"
46712 +- "subq %%rcx, %%r8 ;"
46713 +- "movq %%r8, (%0) ;"
46714 +- :
46715 +- : "r"(c), "r"(a), "r"(b)
46716 +- : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
46717 ++ /* Step 1: Compute all partial products */
46718 ++ " movq 0(%1), %%rdx;" /* f[0] */
46719 ++ " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */
46720 ++ " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
46721 ++ " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
46722 ++ " movq 24(%1), %%rdx;" /* f[3] */
46723 ++ " mulxq 8(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
46724 ++ " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */
46725 ++ " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */
46726 ++ " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
46727 ++
46728 ++ /* Step 2: Compute two parallel carry chains */
46729 ++ " xor %%r15, %%r15;"
46730 ++ " adox %%rax, %%r10;"
46731 ++ " adcx %%r8, %%r8;"
46732 ++ " adox %%rcx, %%r11;"
46733 ++ " adcx %%r9, %%r9;"
46734 ++ " adox %%r15, %%r12;"
46735 ++ " adcx %%r10, %%r10;"
46736 ++ " adox %%r15, %%r13;"
46737 ++ " adcx %%r11, %%r11;"
46738 ++ " adox %%r15, %%r14;"
46739 ++ " adcx %%r12, %%r12;"
46740 ++ " adcx %%r13, %%r13;"
46741 ++ " adcx %%r14, %%r14;"
46742 ++
46743 ++ /* Step 3: Compute intermediate squares */
46744 ++ " movq 0(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
46745 ++ " movq %%rax, 0(%0);"
46746 ++ " add %%rcx, %%r8;" " movq %%r8, 8(%0);"
46747 ++ " movq 8(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
46748 ++ " adcx %%rax, %%r9;" " movq %%r9, 16(%0);"
46749 ++ " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);"
46750 ++ " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
46751 ++ " adcx %%rax, %%r11;" " movq %%r11, 32(%0);"
46752 ++ " adcx %%rcx, %%r12;" " movq %%r12, 40(%0);"
46753 ++ " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
46754 ++ " adcx %%rax, %%r13;" " movq %%r13, 48(%0);"
46755 ++ " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);"
46756 ++
46757 ++ /* Step 1: Compute all partial products */
46758 ++ " movq 32(%1), %%rdx;" /* f[0] */
46759 ++ " mulxq 40(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */
46760 ++ " mulxq 48(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
46761 ++ " mulxq 56(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
46762 ++ " movq 56(%1), %%rdx;" /* f[3] */
46763 ++ " mulxq 40(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
46764 ++ " mulxq 48(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */
46765 ++ " movq 40(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */
46766 ++ " mulxq 48(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
46767 ++
46768 ++ /* Step 2: Compute two parallel carry chains */
46769 ++ " xor %%r15, %%r15;"
46770 ++ " adox %%rax, %%r10;"
46771 ++ " adcx %%r8, %%r8;"
46772 ++ " adox %%rcx, %%r11;"
46773 ++ " adcx %%r9, %%r9;"
46774 ++ " adox %%r15, %%r12;"
46775 ++ " adcx %%r10, %%r10;"
46776 ++ " adox %%r15, %%r13;"
46777 ++ " adcx %%r11, %%r11;"
46778 ++ " adox %%r15, %%r14;"
46779 ++ " adcx %%r12, %%r12;"
46780 ++ " adcx %%r13, %%r13;"
46781 ++ " adcx %%r14, %%r14;"
46782 ++
46783 ++ /* Step 3: Compute intermediate squares */
46784 ++ " movq 32(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */
46785 ++ " movq %%rax, 64(%0);"
46786 ++ " add %%rcx, %%r8;" " movq %%r8, 72(%0);"
46787 ++ " movq 40(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */
46788 ++ " adcx %%rax, %%r9;" " movq %%r9, 80(%0);"
46789 ++ " adcx %%rcx, %%r10;" " movq %%r10, 88(%0);"
46790 ++ " movq 48(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
46791 ++ " adcx %%rax, %%r11;" " movq %%r11, 96(%0);"
46792 ++ " adcx %%rcx, %%r12;" " movq %%r12, 104(%0);"
46793 ++ " movq 56(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
46794 ++ " adcx %%rax, %%r13;" " movq %%r13, 112(%0);"
46795 ++ " adcx %%rcx, %%r14;" " movq %%r14, 120(%0);"
46796 ++
46797 ++ /* Line up pointers */
46798 ++ " mov %0, %1;"
46799 ++ " mov %2, %0;"
46800 ++
46801 ++ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
46802 ++ " mov $38, %%rdx;"
46803 ++ " mulxq 32(%1), %%r8, %%r13;"
46804 ++ " xor %%rcx, %%rcx;"
46805 ++ " adoxq 0(%1), %%r8;"
46806 ++ " mulxq 40(%1), %%r9, %%r12;"
46807 ++ " adcx %%r13, %%r9;"
46808 ++ " adoxq 8(%1), %%r9;"
46809 ++ " mulxq 48(%1), %%r10, %%r13;"
46810 ++ " adcx %%r12, %%r10;"
46811 ++ " adoxq 16(%1), %%r10;"
46812 ++ " mulxq 56(%1), %%r11, %%rax;"
46813 ++ " adcx %%r13, %%r11;"
46814 ++ " adoxq 24(%1), %%r11;"
46815 ++ " adcx %%rcx, %%rax;"
46816 ++ " adox %%rcx, %%rax;"
46817 ++ " imul %%rdx, %%rax;"
46818 ++
46819 ++ /* Step 2: Fold the carry back into dst */
46820 ++ " add %%rax, %%r8;"
46821 ++ " adcx %%rcx, %%r9;"
46822 ++ " movq %%r9, 8(%0);"
46823 ++ " adcx %%rcx, %%r10;"
46824 ++ " movq %%r10, 16(%0);"
46825 ++ " adcx %%rcx, %%r11;"
46826 ++ " movq %%r11, 24(%0);"
46827 ++
46828 ++ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
46829 ++ " mov $0, %%rax;"
46830 ++ " cmovc %%rdx, %%rax;"
46831 ++ " add %%rax, %%r8;"
46832 ++ " movq %%r8, 0(%0);"
46833 ++
46834 ++ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */
46835 ++ " mov $38, %%rdx;"
46836 ++ " mulxq 96(%1), %%r8, %%r13;"
46837 ++ " xor %%rcx, %%rcx;"
46838 ++ " adoxq 64(%1), %%r8;"
46839 ++ " mulxq 104(%1), %%r9, %%r12;"
46840 ++ " adcx %%r13, %%r9;"
46841 ++ " adoxq 72(%1), %%r9;"
46842 ++ " mulxq 112(%1), %%r10, %%r13;"
46843 ++ " adcx %%r12, %%r10;"
46844 ++ " adoxq 80(%1), %%r10;"
46845 ++ " mulxq 120(%1), %%r11, %%rax;"
46846 ++ " adcx %%r13, %%r11;"
46847 ++ " adoxq 88(%1), %%r11;"
46848 ++ " adcx %%rcx, %%rax;"
46849 ++ " adox %%rcx, %%rax;"
46850 ++ " imul %%rdx, %%rax;"
46851 ++
46852 ++ /* Step 2: Fold the carry back into dst */
46853 ++ " add %%rax, %%r8;"
46854 ++ " adcx %%rcx, %%r9;"
46855 ++ " movq %%r9, 40(%0);"
46856 ++ " adcx %%rcx, %%r10;"
46857 ++ " movq %%r10, 48(%0);"
46858 ++ " adcx %%rcx, %%r11;"
46859 ++ " movq %%r11, 56(%0);"
46860 ++
46861 ++ /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */
46862 ++ " mov $0, %%rax;"
46863 ++ " cmovc %%rdx, %%rax;"
46864 ++ " add %%rax, %%r8;"
46865 ++ " movq %%r8, 32(%0);"
46866 ++ : "+&r" (tmp), "+&r" (f), "+&r" (out)
46867 ++ :
46868 ++ : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "memory", "cc"
46869 ++ );
46870 + }
46871 +
46872 +-/* Multiplication by a24 = (A+2)/4 = (486662+2)/4 = 121666 */
46873 +-static __always_inline void
46874 +-mul_a24_eltfp25519_1w(u64 *const c, const u64 *const a)
46875 ++static void point_add_and_double(u64 *q, u64 *p01_tmp1, u64 *tmp2)
46876 + {
46877 +- const u64 a24 = 121666;
46878 +- asm volatile(
46879 +- "movq %2, %%rdx ;"
46880 +- "mulx (%1), %%r8, %%r10 ;"
46881 +- "mulx 8(%1), %%r9, %%r11 ;"
46882 +- "addq %%r10, %%r9 ;"
46883 +- "mulx 16(%1), %%r10, %%rax ;"
46884 +- "adcq %%r11, %%r10 ;"
46885 +- "mulx 24(%1), %%r11, %%rcx ;"
46886 +- "adcq %%rax, %%r11 ;"
46887 +- /**************************/
46888 +- "adcq $0, %%rcx ;"
46889 +- "movl $38, %%edx ;" /* 2*c = 38 = 2^256 mod 2^255-19*/
46890 +- "imul %%rdx, %%rcx ;"
46891 +- "addq %%rcx, %%r8 ;"
46892 +- "adcq $0, %%r9 ;"
46893 +- "movq %%r9, 8(%0) ;"
46894 +- "adcq $0, %%r10 ;"
46895 +- "movq %%r10, 16(%0) ;"
46896 +- "adcq $0, %%r11 ;"
46897 +- "movq %%r11, 24(%0) ;"
46898 +- "mov $0, %%ecx ;"
46899 +- "cmovc %%edx, %%ecx ;"
46900 +- "addq %%rcx, %%r8 ;"
46901 +- "movq %%r8, (%0) ;"
46902 +- :
46903 +- : "r"(c), "r"(a), "r"(a24)
46904 +- : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
46905 +- "%r11");
46906 ++ u64 *nq = p01_tmp1;
46907 ++ u64 *nq_p1 = p01_tmp1 + (u32)8U;
46908 ++ u64 *tmp1 = p01_tmp1 + (u32)16U;
46909 ++ u64 *x1 = q;
46910 ++ u64 *x2 = nq;
46911 ++ u64 *z2 = nq + (u32)4U;
46912 ++ u64 *z3 = nq_p1 + (u32)4U;
46913 ++ u64 *a = tmp1;
46914 ++ u64 *b = tmp1 + (u32)4U;
46915 ++ u64 *ab = tmp1;
46916 ++ u64 *dc = tmp1 + (u32)8U;
46917 ++ u64 *x3;
46918 ++ u64 *z31;
46919 ++ u64 *d0;
46920 ++ u64 *c0;
46921 ++ u64 *a1;
46922 ++ u64 *b1;
46923 ++ u64 *d;
46924 ++ u64 *c;
46925 ++ u64 *ab1;
46926 ++ u64 *dc1;
46927 ++ fadd(a, x2, z2);
46928 ++ fsub(b, x2, z2);
46929 ++ x3 = nq_p1;
46930 ++ z31 = nq_p1 + (u32)4U;
46931 ++ d0 = dc;
46932 ++ c0 = dc + (u32)4U;
46933 ++ fadd(c0, x3, z31);
46934 ++ fsub(d0, x3, z31);
46935 ++ fmul2(dc, dc, ab, tmp2);
46936 ++ fadd(x3, d0, c0);
46937 ++ fsub(z31, d0, c0);
46938 ++ a1 = tmp1;
46939 ++ b1 = tmp1 + (u32)4U;
46940 ++ d = tmp1 + (u32)8U;
46941 ++ c = tmp1 + (u32)12U;
46942 ++ ab1 = tmp1;
46943 ++ dc1 = tmp1 + (u32)8U;
46944 ++ fsqr2(dc1, ab1, tmp2);
46945 ++ fsqr2(nq_p1, nq_p1, tmp2);
46946 ++ a1[0U] = c[0U];
46947 ++ a1[1U] = c[1U];
46948 ++ a1[2U] = c[2U];
46949 ++ a1[3U] = c[3U];
46950 ++ fsub(c, d, c);
46951 ++ fmul_scalar(b1, c, (u64)121665U);
46952 ++ fadd(b1, b1, d);
46953 ++ fmul2(nq, dc1, ab1, tmp2);
46954 ++ fmul(z3, z3, x1, tmp2);
46955 + }
46956 +
46957 +-static void inv_eltfp25519_1w_adx(u64 *const c, const u64 *const a)
46958 ++static void point_double(u64 *nq, u64 *tmp1, u64 *tmp2)
46959 + {
46960 +- struct {
46961 +- eltfp25519_1w_buffer buffer;
46962 +- eltfp25519_1w x0, x1, x2;
46963 +- } __aligned(32) m;
46964 +- u64 *T[4];
46965 +-
46966 +- T[0] = m.x0;
46967 +- T[1] = c; /* x^(-1) */
46968 +- T[2] = m.x1;
46969 +- T[3] = m.x2;
46970 +-
46971 +- copy_eltfp25519_1w(T[1], a);
46972 +- sqrn_eltfp25519_1w_adx(T[1], 1);
46973 +- copy_eltfp25519_1w(T[2], T[1]);
46974 +- sqrn_eltfp25519_1w_adx(T[2], 2);
46975 +- mul_eltfp25519_1w_adx(T[0], a, T[2]);
46976 +- mul_eltfp25519_1w_adx(T[1], T[1], T[0]);
46977 +- copy_eltfp25519_1w(T[2], T[1]);
46978 +- sqrn_eltfp25519_1w_adx(T[2], 1);
46979 +- mul_eltfp25519_1w_adx(T[0], T[0], T[2]);
46980 +- copy_eltfp25519_1w(T[2], T[0]);
46981 +- sqrn_eltfp25519_1w_adx(T[2], 5);
46982 +- mul_eltfp25519_1w_adx(T[0], T[0], T[2]);
46983 +- copy_eltfp25519_1w(T[2], T[0]);
46984 +- sqrn_eltfp25519_1w_adx(T[2], 10);
46985 +- mul_eltfp25519_1w_adx(T[2], T[2], T[0]);
46986 +- copy_eltfp25519_1w(T[3], T[2]);
46987 +- sqrn_eltfp25519_1w_adx(T[3], 20);
46988 +- mul_eltfp25519_1w_adx(T[3], T[3], T[2]);
46989 +- sqrn_eltfp25519_1w_adx(T[3], 10);
46990 +- mul_eltfp25519_1w_adx(T[3], T[3], T[0]);
46991 +- copy_eltfp25519_1w(T[0], T[3]);
46992 +- sqrn_eltfp25519_1w_adx(T[0], 50);
46993 +- mul_eltfp25519_1w_adx(T[0], T[0], T[3]);
46994 +- copy_eltfp25519_1w(T[2], T[0]);
46995 +- sqrn_eltfp25519_1w_adx(T[2], 100);
46996 +- mul_eltfp25519_1w_adx(T[2], T[2], T[0]);
46997 +- sqrn_eltfp25519_1w_adx(T[2], 50);
46998 +- mul_eltfp25519_1w_adx(T[2], T[2], T[3]);
46999 +- sqrn_eltfp25519_1w_adx(T[2], 5);
47000 +- mul_eltfp25519_1w_adx(T[1], T[1], T[2]);
47001 +-
47002 +- memzero_explicit(&m, sizeof(m));
47003 ++ u64 *x2 = nq;
47004 ++ u64 *z2 = nq + (u32)4U;
47005 ++ u64 *a = tmp1;
47006 ++ u64 *b = tmp1 + (u32)4U;
47007 ++ u64 *d = tmp1 + (u32)8U;
47008 ++ u64 *c = tmp1 + (u32)12U;
47009 ++ u64 *ab = tmp1;
47010 ++ u64 *dc = tmp1 + (u32)8U;
47011 ++ fadd(a, x2, z2);
47012 ++ fsub(b, x2, z2);
47013 ++ fsqr2(dc, ab, tmp2);
47014 ++ a[0U] = c[0U];
47015 ++ a[1U] = c[1U];
47016 ++ a[2U] = c[2U];
47017 ++ a[3U] = c[3U];
47018 ++ fsub(c, d, c);
47019 ++ fmul_scalar(b, c, (u64)121665U);
47020 ++ fadd(b, b, d);
47021 ++ fmul2(nq, dc, ab, tmp2);
47022 + }
47023 +
47024 +-static void inv_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a)
47025 ++static void montgomery_ladder(u64 *out, const u8 *key, u64 *init1)
47026 + {
47027 +- struct {
47028 +- eltfp25519_1w_buffer buffer;
47029 +- eltfp25519_1w x0, x1, x2;
47030 +- } __aligned(32) m;
47031 +- u64 *T[5];
47032 +-
47033 +- T[0] = m.x0;
47034 +- T[1] = c; /* x^(-1) */
47035 +- T[2] = m.x1;
47036 +- T[3] = m.x2;
47037 +-
47038 +- copy_eltfp25519_1w(T[1], a);
47039 +- sqrn_eltfp25519_1w_bmi2(T[1], 1);
47040 +- copy_eltfp25519_1w(T[2], T[1]);
47041 +- sqrn_eltfp25519_1w_bmi2(T[2], 2);
47042 +- mul_eltfp25519_1w_bmi2(T[0], a, T[2]);
47043 +- mul_eltfp25519_1w_bmi2(T[1], T[1], T[0]);
47044 +- copy_eltfp25519_1w(T[2], T[1]);
47045 +- sqrn_eltfp25519_1w_bmi2(T[2], 1);
47046 +- mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]);
47047 +- copy_eltfp25519_1w(T[2], T[0]);
47048 +- sqrn_eltfp25519_1w_bmi2(T[2], 5);
47049 +- mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]);
47050 +- copy_eltfp25519_1w(T[2], T[0]);
47051 +- sqrn_eltfp25519_1w_bmi2(T[2], 10);
47052 +- mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]);
47053 +- copy_eltfp25519_1w(T[3], T[2]);
47054 +- sqrn_eltfp25519_1w_bmi2(T[3], 20);
47055 +- mul_eltfp25519_1w_bmi2(T[3], T[3], T[2]);
47056 +- sqrn_eltfp25519_1w_bmi2(T[3], 10);
47057 +- mul_eltfp25519_1w_bmi2(T[3], T[3], T[0]);
47058 +- copy_eltfp25519_1w(T[0], T[3]);
47059 +- sqrn_eltfp25519_1w_bmi2(T[0], 50);
47060 +- mul_eltfp25519_1w_bmi2(T[0], T[0], T[3]);
47061 +- copy_eltfp25519_1w(T[2], T[0]);
47062 +- sqrn_eltfp25519_1w_bmi2(T[2], 100);
47063 +- mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]);
47064 +- sqrn_eltfp25519_1w_bmi2(T[2], 50);
47065 +- mul_eltfp25519_1w_bmi2(T[2], T[2], T[3]);
47066 +- sqrn_eltfp25519_1w_bmi2(T[2], 5);
47067 +- mul_eltfp25519_1w_bmi2(T[1], T[1], T[2]);
47068 +-
47069 +- memzero_explicit(&m, sizeof(m));
47070 ++ u64 tmp2[16U] = { 0U };
47071 ++ u64 p01_tmp1_swap[33U] = { 0U };
47072 ++ u64 *p0 = p01_tmp1_swap;
47073 ++ u64 *p01 = p01_tmp1_swap;
47074 ++ u64 *p03 = p01;
47075 ++ u64 *p11 = p01 + (u32)8U;
47076 ++ u64 *x0;
47077 ++ u64 *z0;
47078 ++ u64 *p01_tmp1;
47079 ++ u64 *p01_tmp11;
47080 ++ u64 *nq10;
47081 ++ u64 *nq_p11;
47082 ++ u64 *swap1;
47083 ++ u64 sw0;
47084 ++ u64 *nq1;
47085 ++ u64 *tmp1;
47086 ++ memcpy(p11, init1, (u32)8U * sizeof(init1[0U]));
47087 ++ x0 = p03;
47088 ++ z0 = p03 + (u32)4U;
47089 ++ x0[0U] = (u64)1U;
47090 ++ x0[1U] = (u64)0U;
47091 ++ x0[2U] = (u64)0U;
47092 ++ x0[3U] = (u64)0U;
47093 ++ z0[0U] = (u64)0U;
47094 ++ z0[1U] = (u64)0U;
47095 ++ z0[2U] = (u64)0U;
47096 ++ z0[3U] = (u64)0U;
47097 ++ p01_tmp1 = p01_tmp1_swap;
47098 ++ p01_tmp11 = p01_tmp1_swap;
47099 ++ nq10 = p01_tmp1_swap;
47100 ++ nq_p11 = p01_tmp1_swap + (u32)8U;
47101 ++ swap1 = p01_tmp1_swap + (u32)32U;
47102 ++ cswap2((u64)1U, nq10, nq_p11);
47103 ++ point_add_and_double(init1, p01_tmp11, tmp2);
47104 ++ swap1[0U] = (u64)1U;
47105 ++ {
47106 ++ u32 i;
47107 ++ for (i = (u32)0U; i < (u32)251U; i = i + (u32)1U) {
47108 ++ u64 *p01_tmp12 = p01_tmp1_swap;
47109 ++ u64 *swap2 = p01_tmp1_swap + (u32)32U;
47110 ++ u64 *nq2 = p01_tmp12;
47111 ++ u64 *nq_p12 = p01_tmp12 + (u32)8U;
47112 ++ u64 bit = (u64)(key[((u32)253U - i) / (u32)8U] >> ((u32)253U - i) % (u32)8U & (u8)1U);
47113 ++ u64 sw = swap2[0U] ^ bit;
47114 ++ cswap2(sw, nq2, nq_p12);
47115 ++ point_add_and_double(init1, p01_tmp12, tmp2);
47116 ++ swap2[0U] = bit;
47117 ++ }
47118 ++ }
47119 ++ sw0 = swap1[0U];
47120 ++ cswap2(sw0, nq10, nq_p11);
47121 ++ nq1 = p01_tmp1;
47122 ++ tmp1 = p01_tmp1 + (u32)16U;
47123 ++ point_double(nq1, tmp1, tmp2);
47124 ++ point_double(nq1, tmp1, tmp2);
47125 ++ point_double(nq1, tmp1, tmp2);
47126 ++ memcpy(out, p0, (u32)8U * sizeof(p0[0U]));
47127 ++
47128 ++ memzero_explicit(tmp2, sizeof(tmp2));
47129 ++ memzero_explicit(p01_tmp1_swap, sizeof(p01_tmp1_swap));
47130 + }
47131 +
47132 +-/* Given c, a 256-bit number, fred_eltfp25519_1w updates c
47133 +- * with a number such that 0 <= C < 2**255-19.
47134 +- */
47135 +-static __always_inline void fred_eltfp25519_1w(u64 *const c)
47136 ++static void fsquare_times(u64 *o, const u64 *inp, u64 *tmp, u32 n1)
47137 + {
47138 +- u64 tmp0 = 38, tmp1 = 19;
47139 +- asm volatile(
47140 +- "btrq $63, %3 ;" /* Put bit 255 in carry flag and clear */
47141 +- "cmovncl %k5, %k4 ;" /* c[255] ? 38 : 19 */
47142 +-
47143 +- /* Add either 19 or 38 to c */
47144 +- "addq %4, %0 ;"
47145 +- "adcq $0, %1 ;"
47146 +- "adcq $0, %2 ;"
47147 +- "adcq $0, %3 ;"
47148 +-
47149 +- /* Test for bit 255 again; only triggered on overflow modulo 2^255-19 */
47150 +- "movl $0, %k4 ;"
47151 +- "cmovnsl %k5, %k4 ;" /* c[255] ? 0 : 19 */
47152 +- "btrq $63, %3 ;" /* Clear bit 255 */
47153 +-
47154 +- /* Subtract 19 if necessary */
47155 +- "subq %4, %0 ;"
47156 +- "sbbq $0, %1 ;"
47157 +- "sbbq $0, %2 ;"
47158 +- "sbbq $0, %3 ;"
47159 +-
47160 +- : "+r"(c[0]), "+r"(c[1]), "+r"(c[2]), "+r"(c[3]), "+r"(tmp0),
47161 +- "+r"(tmp1)
47162 +- :
47163 +- : "memory", "cc");
47164 ++ u32 i;
47165 ++ fsqr(o, inp, tmp);
47166 ++ for (i = (u32)0U; i < n1 - (u32)1U; i = i + (u32)1U)
47167 ++ fsqr(o, o, tmp);
47168 + }
47169 +
47170 +-static __always_inline void cswap(u8 bit, u64 *const px, u64 *const py)
47171 ++static void finv(u64 *o, const u64 *i, u64 *tmp)
47172 + {
47173 +- u64 temp;
47174 +- asm volatile(
47175 +- "test %9, %9 ;"
47176 +- "movq %0, %8 ;"
47177 +- "cmovnzq %4, %0 ;"
47178 +- "cmovnzq %8, %4 ;"
47179 +- "movq %1, %8 ;"
47180 +- "cmovnzq %5, %1 ;"
47181 +- "cmovnzq %8, %5 ;"
47182 +- "movq %2, %8 ;"
47183 +- "cmovnzq %6, %2 ;"
47184 +- "cmovnzq %8, %6 ;"
47185 +- "movq %3, %8 ;"
47186 +- "cmovnzq %7, %3 ;"
47187 +- "cmovnzq %8, %7 ;"
47188 +- : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3]),
47189 +- "+r"(py[0]), "+r"(py[1]), "+r"(py[2]), "+r"(py[3]),
47190 +- "=r"(temp)
47191 +- : "r"(bit)
47192 +- : "cc"
47193 +- );
47194 ++ u64 t1[16U] = { 0U };
47195 ++ u64 *a0 = t1;
47196 ++ u64 *b = t1 + (u32)4U;
47197 ++ u64 *c = t1 + (u32)8U;
47198 ++ u64 *t00 = t1 + (u32)12U;
47199 ++ u64 *tmp1 = tmp;
47200 ++ u64 *a;
47201 ++ u64 *t0;
47202 ++ fsquare_times(a0, i, tmp1, (u32)1U);
47203 ++ fsquare_times(t00, a0, tmp1, (u32)2U);
47204 ++ fmul(b, t00, i, tmp);
47205 ++ fmul(a0, b, a0, tmp);
47206 ++ fsquare_times(t00, a0, tmp1, (u32)1U);
47207 ++ fmul(b, t00, b, tmp);
47208 ++ fsquare_times(t00, b, tmp1, (u32)5U);
47209 ++ fmul(b, t00, b, tmp);
47210 ++ fsquare_times(t00, b, tmp1, (u32)10U);
47211 ++ fmul(c, t00, b, tmp);
47212 ++ fsquare_times(t00, c, tmp1, (u32)20U);
47213 ++ fmul(t00, t00, c, tmp);
47214 ++ fsquare_times(t00, t00, tmp1, (u32)10U);
47215 ++ fmul(b, t00, b, tmp);
47216 ++ fsquare_times(t00, b, tmp1, (u32)50U);
47217 ++ fmul(c, t00, b, tmp);
47218 ++ fsquare_times(t00, c, tmp1, (u32)100U);
47219 ++ fmul(t00, t00, c, tmp);
47220 ++ fsquare_times(t00, t00, tmp1, (u32)50U);
47221 ++ fmul(t00, t00, b, tmp);
47222 ++ fsquare_times(t00, t00, tmp1, (u32)5U);
47223 ++ a = t1;
47224 ++ t0 = t1 + (u32)12U;
47225 ++ fmul(o, t0, a, tmp);
47226 + }
47227 +
47228 +-static __always_inline void cselect(u8 bit, u64 *const px, const u64 *const py)
47229 ++static void store_felem(u64 *b, u64 *f)
47230 + {
47231 +- asm volatile(
47232 +- "test %4, %4 ;"
47233 +- "cmovnzq %5, %0 ;"
47234 +- "cmovnzq %6, %1 ;"
47235 +- "cmovnzq %7, %2 ;"
47236 +- "cmovnzq %8, %3 ;"
47237 +- : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3])
47238 +- : "r"(bit), "rm"(py[0]), "rm"(py[1]), "rm"(py[2]), "rm"(py[3])
47239 +- : "cc"
47240 +- );
47241 ++ u64 f30 = f[3U];
47242 ++ u64 top_bit0 = f30 >> (u32)63U;
47243 ++ u64 carry0;
47244 ++ u64 f31;
47245 ++ u64 top_bit;
47246 ++ u64 carry;
47247 ++ u64 f0;
47248 ++ u64 f1;
47249 ++ u64 f2;
47250 ++ u64 f3;
47251 ++ u64 m0;
47252 ++ u64 m1;
47253 ++ u64 m2;
47254 ++ u64 m3;
47255 ++ u64 mask;
47256 ++ u64 f0_;
47257 ++ u64 f1_;
47258 ++ u64 f2_;
47259 ++ u64 f3_;
47260 ++ u64 o0;
47261 ++ u64 o1;
47262 ++ u64 o2;
47263 ++ u64 o3;
47264 ++ f[3U] = f30 & (u64)0x7fffffffffffffffU;
47265 ++ carry0 = add_scalar(f, f, (u64)19U * top_bit0);
47266 ++ f31 = f[3U];
47267 ++ top_bit = f31 >> (u32)63U;
47268 ++ f[3U] = f31 & (u64)0x7fffffffffffffffU;
47269 ++ carry = add_scalar(f, f, (u64)19U * top_bit);
47270 ++ f0 = f[0U];
47271 ++ f1 = f[1U];
47272 ++ f2 = f[2U];
47273 ++ f3 = f[3U];
47274 ++ m0 = gte_mask(f0, (u64)0xffffffffffffffedU);
47275 ++ m1 = eq_mask(f1, (u64)0xffffffffffffffffU);
47276 ++ m2 = eq_mask(f2, (u64)0xffffffffffffffffU);
47277 ++ m3 = eq_mask(f3, (u64)0x7fffffffffffffffU);
47278 ++ mask = ((m0 & m1) & m2) & m3;
47279 ++ f0_ = f0 - (mask & (u64)0xffffffffffffffedU);
47280 ++ f1_ = f1 - (mask & (u64)0xffffffffffffffffU);
47281 ++ f2_ = f2 - (mask & (u64)0xffffffffffffffffU);
47282 ++ f3_ = f3 - (mask & (u64)0x7fffffffffffffffU);
47283 ++ o0 = f0_;
47284 ++ o1 = f1_;
47285 ++ o2 = f2_;
47286 ++ o3 = f3_;
47287 ++ b[0U] = o0;
47288 ++ b[1U] = o1;
47289 ++ b[2U] = o2;
47290 ++ b[3U] = o3;
47291 + }
47292 +
47293 +-static void curve25519_adx(u8 shared[CURVE25519_KEY_SIZE],
47294 +- const u8 private_key[CURVE25519_KEY_SIZE],
47295 +- const u8 session_key[CURVE25519_KEY_SIZE])
47296 ++static void encode_point(u8 *o, const u64 *i)
47297 + {
47298 +- struct {
47299 +- u64 buffer[4 * NUM_WORDS_ELTFP25519];
47300 +- u64 coordinates[4 * NUM_WORDS_ELTFP25519];
47301 +- u64 workspace[6 * NUM_WORDS_ELTFP25519];
47302 +- u8 session[CURVE25519_KEY_SIZE];
47303 +- u8 private[CURVE25519_KEY_SIZE];
47304 +- } __aligned(32) m;
47305 +-
47306 +- int i = 0, j = 0;
47307 +- u64 prev = 0;
47308 +- u64 *const X1 = (u64 *)m.session;
47309 +- u64 *const key = (u64 *)m.private;
47310 +- u64 *const Px = m.coordinates + 0;
47311 +- u64 *const Pz = m.coordinates + 4;
47312 +- u64 *const Qx = m.coordinates + 8;
47313 +- u64 *const Qz = m.coordinates + 12;
47314 +- u64 *const X2 = Qx;
47315 +- u64 *const Z2 = Qz;
47316 +- u64 *const X3 = Px;
47317 +- u64 *const Z3 = Pz;
47318 +- u64 *const X2Z2 = Qx;
47319 +- u64 *const X3Z3 = Px;
47320 +-
47321 +- u64 *const A = m.workspace + 0;
47322 +- u64 *const B = m.workspace + 4;
47323 +- u64 *const D = m.workspace + 8;
47324 +- u64 *const C = m.workspace + 12;
47325 +- u64 *const DA = m.workspace + 16;
47326 +- u64 *const CB = m.workspace + 20;
47327 +- u64 *const AB = A;
47328 +- u64 *const DC = D;
47329 +- u64 *const DACB = DA;
47330 +-
47331 +- memcpy(m.private, private_key, sizeof(m.private));
47332 +- memcpy(m.session, session_key, sizeof(m.session));
47333 +-
47334 +- curve25519_clamp_secret(m.private);
47335 +-
47336 +- /* As in the draft:
47337 +- * When receiving such an array, implementations of curve25519
47338 +- * MUST mask the most-significant bit in the final byte. This
47339 +- * is done to preserve compatibility with point formats which
47340 +- * reserve the sign bit for use in other protocols and to
47341 +- * increase resistance to implementation fingerprinting
47342 +- */
47343 +- m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1;
47344 +-
47345 +- copy_eltfp25519_1w(Px, X1);
47346 +- setzero_eltfp25519_1w(Pz);
47347 +- setzero_eltfp25519_1w(Qx);
47348 +- setzero_eltfp25519_1w(Qz);
47349 +-
47350 +- Pz[0] = 1;
47351 +- Qx[0] = 1;
47352 +-
47353 +- /* main-loop */
47354 +- prev = 0;
47355 +- j = 62;
47356 +- for (i = 3; i >= 0; --i) {
47357 +- while (j >= 0) {
47358 +- u64 bit = (key[i] >> j) & 0x1;
47359 +- u64 swap = bit ^ prev;
47360 +- prev = bit;
47361 +-
47362 +- add_eltfp25519_1w_adx(A, X2, Z2); /* A = (X2+Z2) */
47363 +- sub_eltfp25519_1w(B, X2, Z2); /* B = (X2-Z2) */
47364 +- add_eltfp25519_1w_adx(C, X3, Z3); /* C = (X3+Z3) */
47365 +- sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */
47366 +- mul_eltfp25519_2w_adx(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */
47367 +-
47368 +- cselect(swap, A, C);
47369 +- cselect(swap, B, D);
47370 +-
47371 +- sqr_eltfp25519_2w_adx(AB); /* [AA|BB] = [A^2|B^2] */
47372 +- add_eltfp25519_1w_adx(X3, DA, CB); /* X3 = (DA+CB) */
47373 +- sub_eltfp25519_1w(Z3, DA, CB); /* Z3 = (DA-CB) */
47374 +- sqr_eltfp25519_2w_adx(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */
47375 +-
47376 +- copy_eltfp25519_1w(X2, B); /* X2 = B^2 */
47377 +- sub_eltfp25519_1w(Z2, A, B); /* Z2 = E = AA-BB */
47378 +-
47379 +- mul_a24_eltfp25519_1w(B, Z2); /* B = a24*E */
47380 +- add_eltfp25519_1w_adx(B, B, X2); /* B = a24*E+B */
47381 +- mul_eltfp25519_2w_adx(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */
47382 +- mul_eltfp25519_1w_adx(Z3, Z3, X1); /* Z3 = Z3*X1 */
47383 +- --j;
47384 +- }
47385 +- j = 63;
47386 +- }
47387 +-
47388 +- inv_eltfp25519_1w_adx(A, Qz);
47389 +- mul_eltfp25519_1w_adx((u64 *)shared, Qx, A);
47390 +- fred_eltfp25519_1w((u64 *)shared);
47391 +-
47392 +- memzero_explicit(&m, sizeof(m));
47393 ++ const u64 *x = i;
47394 ++ const u64 *z = i + (u32)4U;
47395 ++ u64 tmp[4U] = { 0U };
47396 ++ u64 tmp_w[16U] = { 0U };
47397 ++ finv(tmp, z, tmp_w);
47398 ++ fmul(tmp, tmp, x, tmp_w);
47399 ++ store_felem((u64 *)o, tmp);
47400 + }
47401 +
47402 +-static void curve25519_adx_base(u8 session_key[CURVE25519_KEY_SIZE],
47403 +- const u8 private_key[CURVE25519_KEY_SIZE])
47404 ++static void curve25519_ever64(u8 *out, const u8 *priv, const u8 *pub)
47405 + {
47406 +- struct {
47407 +- u64 buffer[4 * NUM_WORDS_ELTFP25519];
47408 +- u64 coordinates[4 * NUM_WORDS_ELTFP25519];
47409 +- u64 workspace[4 * NUM_WORDS_ELTFP25519];
47410 +- u8 private[CURVE25519_KEY_SIZE];
47411 +- } __aligned(32) m;
47412 +-
47413 +- const int ite[4] = { 64, 64, 64, 63 };
47414 +- const int q = 3;
47415 +- u64 swap = 1;
47416 +-
47417 +- int i = 0, j = 0, k = 0;
47418 +- u64 *const key = (u64 *)m.private;
47419 +- u64 *const Ur1 = m.coordinates + 0;
47420 +- u64 *const Zr1 = m.coordinates + 4;
47421 +- u64 *const Ur2 = m.coordinates + 8;
47422 +- u64 *const Zr2 = m.coordinates + 12;
47423 +-
47424 +- u64 *const UZr1 = m.coordinates + 0;
47425 +- u64 *const ZUr2 = m.coordinates + 8;
47426 +-
47427 +- u64 *const A = m.workspace + 0;
47428 +- u64 *const B = m.workspace + 4;
47429 +- u64 *const C = m.workspace + 8;
47430 +- u64 *const D = m.workspace + 12;
47431 +-
47432 +- u64 *const AB = m.workspace + 0;
47433 +- u64 *const CD = m.workspace + 8;
47434 +-
47435 +- const u64 *const P = table_ladder_8k;
47436 +-
47437 +- memcpy(m.private, private_key, sizeof(m.private));
47438 +-
47439 +- curve25519_clamp_secret(m.private);
47440 +-
47441 +- setzero_eltfp25519_1w(Ur1);
47442 +- setzero_eltfp25519_1w(Zr1);
47443 +- setzero_eltfp25519_1w(Zr2);
47444 +- Ur1[0] = 1;
47445 +- Zr1[0] = 1;
47446 +- Zr2[0] = 1;
47447 +-
47448 +- /* G-S */
47449 +- Ur2[3] = 0x1eaecdeee27cab34UL;
47450 +- Ur2[2] = 0xadc7a0b9235d48e2UL;
47451 +- Ur2[1] = 0xbbf095ae14b2edf8UL;
47452 +- Ur2[0] = 0x7e94e1fec82faabdUL;
47453 +-
47454 +- /* main-loop */
47455 +- j = q;
47456 +- for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) {
47457 +- while (j < ite[i]) {
47458 +- u64 bit = (key[i] >> j) & 0x1;
47459 +- k = (64 * i + j - q);
47460 +- swap = swap ^ bit;
47461 +- cswap(swap, Ur1, Ur2);
47462 +- cswap(swap, Zr1, Zr2);
47463 +- swap = bit;
47464 +- /* Addition */
47465 +- sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */
47466 +- add_eltfp25519_1w_adx(A, Ur1, Zr1); /* A = Ur1+Zr1 */
47467 +- mul_eltfp25519_1w_adx(C, &P[4 * k], B); /* C = M0-B */
47468 +- sub_eltfp25519_1w(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */
47469 +- add_eltfp25519_1w_adx(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */
47470 +- sqr_eltfp25519_2w_adx(AB); /* A = A^2 | B = B^2 */
47471 +- mul_eltfp25519_2w_adx(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */
47472 +- ++j;
47473 ++ u64 init1[8U] = { 0U };
47474 ++ u64 tmp[4U] = { 0U };
47475 ++ u64 tmp3;
47476 ++ u64 *x;
47477 ++ u64 *z;
47478 ++ {
47479 ++ u32 i;
47480 ++ for (i = (u32)0U; i < (u32)4U; i = i + (u32)1U) {
47481 ++ u64 *os = tmp;
47482 ++ const u8 *bj = pub + i * (u32)8U;
47483 ++ u64 u = *(u64 *)bj;
47484 ++ u64 r = u;
47485 ++ u64 x0 = r;
47486 ++ os[i] = x0;
47487 + }
47488 +- j = 0;
47489 + }
47490 +-
47491 +- /* Doubling */
47492 +- for (i = 0; i < q; ++i) {
47493 +- add_eltfp25519_1w_adx(A, Ur1, Zr1); /* A = Ur1+Zr1 */
47494 +- sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */
47495 +- sqr_eltfp25519_2w_adx(AB); /* A = A**2 B = B**2 */
47496 +- copy_eltfp25519_1w(C, B); /* C = B */
47497 +- sub_eltfp25519_1w(B, A, B); /* B = A-B */
47498 +- mul_a24_eltfp25519_1w(D, B); /* D = my_a24*B */
47499 +- add_eltfp25519_1w_adx(D, D, C); /* D = D+C */
47500 +- mul_eltfp25519_2w_adx(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */
47501 +- }
47502 +-
47503 +- /* Convert to affine coordinates */
47504 +- inv_eltfp25519_1w_adx(A, Zr1);
47505 +- mul_eltfp25519_1w_adx((u64 *)session_key, Ur1, A);
47506 +- fred_eltfp25519_1w((u64 *)session_key);
47507 +-
47508 +- memzero_explicit(&m, sizeof(m));
47509 ++ tmp3 = tmp[3U];
47510 ++ tmp[3U] = tmp3 & (u64)0x7fffffffffffffffU;
47511 ++ x = init1;
47512 ++ z = init1 + (u32)4U;
47513 ++ z[0U] = (u64)1U;
47514 ++ z[1U] = (u64)0U;
47515 ++ z[2U] = (u64)0U;
47516 ++ z[3U] = (u64)0U;
47517 ++ x[0U] = tmp[0U];
47518 ++ x[1U] = tmp[1U];
47519 ++ x[2U] = tmp[2U];
47520 ++ x[3U] = tmp[3U];
47521 ++ montgomery_ladder(init1, priv, init1);
47522 ++ encode_point(out, init1);
47523 + }
47524 +
47525 +-static void curve25519_bmi2(u8 shared[CURVE25519_KEY_SIZE],
47526 +- const u8 private_key[CURVE25519_KEY_SIZE],
47527 +- const u8 session_key[CURVE25519_KEY_SIZE])
47528 +-{
47529 +- struct {
47530 +- u64 buffer[4 * NUM_WORDS_ELTFP25519];
47531 +- u64 coordinates[4 * NUM_WORDS_ELTFP25519];
47532 +- u64 workspace[6 * NUM_WORDS_ELTFP25519];
47533 +- u8 session[CURVE25519_KEY_SIZE];
47534 +- u8 private[CURVE25519_KEY_SIZE];
47535 +- } __aligned(32) m;
47536 +-
47537 +- int i = 0, j = 0;
47538 +- u64 prev = 0;
47539 +- u64 *const X1 = (u64 *)m.session;
47540 +- u64 *const key = (u64 *)m.private;
47541 +- u64 *const Px = m.coordinates + 0;
47542 +- u64 *const Pz = m.coordinates + 4;
47543 +- u64 *const Qx = m.coordinates + 8;
47544 +- u64 *const Qz = m.coordinates + 12;
47545 +- u64 *const X2 = Qx;
47546 +- u64 *const Z2 = Qz;
47547 +- u64 *const X3 = Px;
47548 +- u64 *const Z3 = Pz;
47549 +- u64 *const X2Z2 = Qx;
47550 +- u64 *const X3Z3 = Px;
47551 +-
47552 +- u64 *const A = m.workspace + 0;
47553 +- u64 *const B = m.workspace + 4;
47554 +- u64 *const D = m.workspace + 8;
47555 +- u64 *const C = m.workspace + 12;
47556 +- u64 *const DA = m.workspace + 16;
47557 +- u64 *const CB = m.workspace + 20;
47558 +- u64 *const AB = A;
47559 +- u64 *const DC = D;
47560 +- u64 *const DACB = DA;
47561 +-
47562 +- memcpy(m.private, private_key, sizeof(m.private));
47563 +- memcpy(m.session, session_key, sizeof(m.session));
47564 +-
47565 +- curve25519_clamp_secret(m.private);
47566 +-
47567 +- /* As in the draft:
47568 +- * When receiving such an array, implementations of curve25519
47569 +- * MUST mask the most-significant bit in the final byte. This
47570 +- * is done to preserve compatibility with point formats which
47571 +- * reserve the sign bit for use in other protocols and to
47572 +- * increase resistance to implementation fingerprinting
47573 +- */
47574 +- m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1;
47575 +-
47576 +- copy_eltfp25519_1w(Px, X1);
47577 +- setzero_eltfp25519_1w(Pz);
47578 +- setzero_eltfp25519_1w(Qx);
47579 +- setzero_eltfp25519_1w(Qz);
47580 +-
47581 +- Pz[0] = 1;
47582 +- Qx[0] = 1;
47583 +-
47584 +- /* main-loop */
47585 +- prev = 0;
47586 +- j = 62;
47587 +- for (i = 3; i >= 0; --i) {
47588 +- while (j >= 0) {
47589 +- u64 bit = (key[i] >> j) & 0x1;
47590 +- u64 swap = bit ^ prev;
47591 +- prev = bit;
47592 +-
47593 +- add_eltfp25519_1w_bmi2(A, X2, Z2); /* A = (X2+Z2) */
47594 +- sub_eltfp25519_1w(B, X2, Z2); /* B = (X2-Z2) */
47595 +- add_eltfp25519_1w_bmi2(C, X3, Z3); /* C = (X3+Z3) */
47596 +- sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */
47597 +- mul_eltfp25519_2w_bmi2(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */
47598 +-
47599 +- cselect(swap, A, C);
47600 +- cselect(swap, B, D);
47601 +-
47602 +- sqr_eltfp25519_2w_bmi2(AB); /* [AA|BB] = [A^2|B^2] */
47603 +- add_eltfp25519_1w_bmi2(X3, DA, CB); /* X3 = (DA+CB) */
47604 +- sub_eltfp25519_1w(Z3, DA, CB); /* Z3 = (DA-CB) */
47605 +- sqr_eltfp25519_2w_bmi2(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */
47606 +-
47607 +- copy_eltfp25519_1w(X2, B); /* X2 = B^2 */
47608 +- sub_eltfp25519_1w(Z2, A, B); /* Z2 = E = AA-BB */
47609 +-
47610 +- mul_a24_eltfp25519_1w(B, Z2); /* B = a24*E */
47611 +- add_eltfp25519_1w_bmi2(B, B, X2); /* B = a24*E+B */
47612 +- mul_eltfp25519_2w_bmi2(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */
47613 +- mul_eltfp25519_1w_bmi2(Z3, Z3, X1); /* Z3 = Z3*X1 */
47614 +- --j;
47615 +- }
47616 +- j = 63;
47617 +- }
47618 +-
47619 +- inv_eltfp25519_1w_bmi2(A, Qz);
47620 +- mul_eltfp25519_1w_bmi2((u64 *)shared, Qx, A);
47621 +- fred_eltfp25519_1w((u64 *)shared);
47622 ++/* The below constants were generated using this sage script:
47623 ++ *
47624 ++ * #!/usr/bin/env sage
47625 ++ * import sys
47626 ++ * from sage.all import *
47627 ++ * def limbs(n):
47628 ++ * n = int(n)
47629 ++ * l = ((n >> 0) % 2^64, (n >> 64) % 2^64, (n >> 128) % 2^64, (n >> 192) % 2^64)
47630 ++ * return "0x%016xULL, 0x%016xULL, 0x%016xULL, 0x%016xULL" % l
47631 ++ * ec = EllipticCurve(GF(2^255 - 19), [0, 486662, 0, 1, 0])
47632 ++ * p_minus_s = (ec.lift_x(9) - ec.lift_x(1))[0]
47633 ++ * print("static const u64 p_minus_s[] = { %s };\n" % limbs(p_minus_s))
47634 ++ * print("static const u64 table_ladder[] = {")
47635 ++ * p = ec.lift_x(9)
47636 ++ * for i in range(252):
47637 ++ * l = (p[0] + p[2]) / (p[0] - p[2])
47638 ++ * print(("\t%s" + ("," if i != 251 else "")) % limbs(l))
47639 ++ * p = p * 2
47640 ++ * print("};")
47641 ++ *
47642 ++ */
47643 +
47644 +- memzero_explicit(&m, sizeof(m));
47645 +-}
47646 ++static const u64 p_minus_s[] = { 0x816b1e0137d48290ULL, 0x440f6a51eb4d1207ULL, 0x52385f46dca2b71dULL, 0x215132111d8354cbULL };
47647 ++
47648 ++static const u64 table_ladder[] = {
47649 ++ 0xfffffffffffffff3ULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x5fffffffffffffffULL,
47650 ++ 0x6b8220f416aafe96ULL, 0x82ebeb2b4f566a34ULL, 0xd5a9a5b075a5950fULL, 0x5142b2cf4b2488f4ULL,
47651 ++ 0x6aaebc750069680cULL, 0x89cf7820a0f99c41ULL, 0x2a58d9183b56d0f4ULL, 0x4b5aca80e36011a4ULL,
47652 ++ 0x329132348c29745dULL, 0xf4a2e616e1642fd7ULL, 0x1e45bb03ff67bc34ULL, 0x306912d0f42a9b4aULL,
47653 ++ 0xff886507e6af7154ULL, 0x04f50e13dfeec82fULL, 0xaa512fe82abab5ceULL, 0x174e251a68d5f222ULL,
47654 ++ 0xcf96700d82028898ULL, 0x1743e3370a2c02c5ULL, 0x379eec98b4e86eaaULL, 0x0c59888a51e0482eULL,
47655 ++ 0xfbcbf1d699b5d189ULL, 0xacaef0d58e9fdc84ULL, 0xc1c20d06231f7614ULL, 0x2938218da274f972ULL,
47656 ++ 0xf6af49beff1d7f18ULL, 0xcc541c22387ac9c2ULL, 0x96fcc9ef4015c56bULL, 0x69c1627c690913a9ULL,
47657 ++ 0x7a86fd2f4733db0eULL, 0xfdb8c4f29e087de9ULL, 0x095e4b1a8ea2a229ULL, 0x1ad7a7c829b37a79ULL,
47658 ++ 0x342d89cad17ea0c0ULL, 0x67bedda6cced2051ULL, 0x19ca31bf2bb42f74ULL, 0x3df7b4c84980acbbULL,
47659 ++ 0xa8c6444dc80ad883ULL, 0xb91e440366e3ab85ULL, 0xc215cda00164f6d8ULL, 0x3d867c6ef247e668ULL,
47660 ++ 0xc7dd582bcc3e658cULL, 0xfd2c4748ee0e5528ULL, 0xa0fd9b95cc9f4f71ULL, 0x7529d871b0675ddfULL,
47661 ++ 0xb8f568b42d3cbd78ULL, 0x1233011b91f3da82ULL, 0x2dce6ccd4a7c3b62ULL, 0x75e7fc8e9e498603ULL,
47662 ++ 0x2f4f13f1fcd0b6ecULL, 0xf1a8ca1f29ff7a45ULL, 0xc249c1a72981e29bULL, 0x6ebe0dbb8c83b56aULL,
47663 ++ 0x7114fa8d170bb222ULL, 0x65a2dcd5bf93935fULL, 0xbdc41f68b59c979aULL, 0x2f0eef79a2ce9289ULL,
47664 ++ 0x42ecbf0c083c37ceULL, 0x2930bc09ec496322ULL, 0xf294b0c19cfeac0dULL, 0x3780aa4bedfabb80ULL,
47665 ++ 0x56c17d3e7cead929ULL, 0xe7cb4beb2e5722c5ULL, 0x0ce931732dbfe15aULL, 0x41b883c7621052f8ULL,
47666 ++ 0xdbf75ca0c3d25350ULL, 0x2936be086eb1e351ULL, 0xc936e03cb4a9b212ULL, 0x1d45bf82322225aaULL,
47667 ++ 0xe81ab1036a024cc5ULL, 0xe212201c304c9a72ULL, 0xc5d73fba6832b1fcULL, 0x20ffdb5a4d839581ULL,
47668 ++ 0xa283d367be5d0fadULL, 0x6c2b25ca8b164475ULL, 0x9d4935467caaf22eULL, 0x5166408eee85ff49ULL,
47669 ++ 0x3c67baa2fab4e361ULL, 0xb3e433c67ef35cefULL, 0x5259729241159b1cULL, 0x6a621892d5b0ab33ULL,
47670 ++ 0x20b74a387555cdcbULL, 0x532aa10e1208923fULL, 0xeaa17b7762281dd1ULL, 0x61ab3443f05c44bfULL,
47671 ++ 0x257a6c422324def8ULL, 0x131c6c1017e3cf7fULL, 0x23758739f630a257ULL, 0x295a407a01a78580ULL,
47672 ++ 0xf8c443246d5da8d9ULL, 0x19d775450c52fa5dULL, 0x2afcfc92731bf83dULL, 0x7d10c8e81b2b4700ULL,
47673 ++ 0xc8e0271f70baa20bULL, 0x993748867ca63957ULL, 0x5412efb3cb7ed4bbULL, 0x3196d36173e62975ULL,
47674 ++ 0xde5bcad141c7dffcULL, 0x47cc8cd2b395c848ULL, 0xa34cd942e11af3cbULL, 0x0256dbf2d04ecec2ULL,
47675 ++ 0x875ab7e94b0e667fULL, 0xcad4dd83c0850d10ULL, 0x47f12e8f4e72c79fULL, 0x5f1a87bb8c85b19bULL,
47676 ++ 0x7ae9d0b6437f51b8ULL, 0x12c7ce5518879065ULL, 0x2ade09fe5cf77aeeULL, 0x23a05a2f7d2c5627ULL,
47677 ++ 0x5908e128f17c169aULL, 0xf77498dd8ad0852dULL, 0x74b4c4ceab102f64ULL, 0x183abadd10139845ULL,
47678 ++ 0xb165ba8daa92aaacULL, 0xd5c5ef9599386705ULL, 0xbe2f8f0cf8fc40d1ULL, 0x2701e635ee204514ULL,
47679 ++ 0x629fa80020156514ULL, 0xf223868764a8c1ceULL, 0x5b894fff0b3f060eULL, 0x60d9944cf708a3faULL,
47680 ++ 0xaeea001a1c7a201fULL, 0xebf16a633ee2ce63ULL, 0x6f7709594c7a07e1ULL, 0x79b958150d0208cbULL,
47681 ++ 0x24b55e5301d410e7ULL, 0xe3a34edff3fdc84dULL, 0xd88768e4904032d8ULL, 0x131384427b3aaeecULL,
47682 ++ 0x8405e51286234f14ULL, 0x14dc4739adb4c529ULL, 0xb8a2b5b250634ffdULL, 0x2fe2a94ad8a7ff93ULL,
47683 ++ 0xec5c57efe843faddULL, 0x2843ce40f0bb9918ULL, 0xa4b561d6cf3d6305ULL, 0x743629bde8fb777eULL,
47684 ++ 0x343edd46bbaf738fULL, 0xed981828b101a651ULL, 0xa401760b882c797aULL, 0x1fc223e28dc88730ULL,
47685 ++ 0x48604e91fc0fba0eULL, 0xb637f78f052c6fa4ULL, 0x91ccac3d09e9239cULL, 0x23f7eed4437a687cULL,
47686 ++ 0x5173b1118d9bd800ULL, 0x29d641b63189d4a7ULL, 0xfdbf177988bbc586ULL, 0x2959894fcad81df5ULL,
47687 ++ 0xaebc8ef3b4bbc899ULL, 0x4148995ab26992b9ULL, 0x24e20b0134f92cfbULL, 0x40d158894a05dee8ULL,
47688 ++ 0x46b00b1185af76f6ULL, 0x26bac77873187a79ULL, 0x3dc0bf95ab8fff5fULL, 0x2a608bd8945524d7ULL,
47689 ++ 0x26449588bd446302ULL, 0x7c4bc21c0388439cULL, 0x8e98a4f383bd11b2ULL, 0x26218d7bc9d876b9ULL,
47690 ++ 0xe3081542997c178aULL, 0x3c2d29a86fb6606fULL, 0x5c217736fa279374ULL, 0x7dde05734afeb1faULL,
47691 ++ 0x3bf10e3906d42babULL, 0xe4f7803e1980649cULL, 0xe6053bf89595bf7aULL, 0x394faf38da245530ULL,
47692 ++ 0x7a8efb58896928f4ULL, 0xfbc778e9cc6a113cULL, 0x72670ce330af596fULL, 0x48f222a81d3d6cf7ULL,
47693 ++ 0xf01fce410d72caa7ULL, 0x5a20ecc7213b5595ULL, 0x7bc21165c1fa1483ULL, 0x07f89ae31da8a741ULL,
47694 ++ 0x05d2c2b4c6830ff9ULL, 0xd43e330fc6316293ULL, 0xa5a5590a96d3a904ULL, 0x705edb91a65333b6ULL,
47695 ++ 0x048ee15e0bb9a5f7ULL, 0x3240cfca9e0aaf5dULL, 0x8f4b71ceedc4a40bULL, 0x621c0da3de544a6dULL,
47696 ++ 0x92872836a08c4091ULL, 0xce8375b010c91445ULL, 0x8a72eb524f276394ULL, 0x2667fcfa7ec83635ULL,
47697 ++ 0x7f4c173345e8752aULL, 0x061b47feee7079a5ULL, 0x25dd9afa9f86ff34ULL, 0x3780cef5425dc89cULL,
47698 ++ 0x1a46035a513bb4e9ULL, 0x3e1ef379ac575adaULL, 0xc78c5f1c5fa24b50ULL, 0x321a967634fd9f22ULL,
47699 ++ 0x946707b8826e27faULL, 0x3dca84d64c506fd0ULL, 0xc189218075e91436ULL, 0x6d9284169b3b8484ULL,
47700 ++ 0x3a67e840383f2ddfULL, 0x33eec9a30c4f9b75ULL, 0x3ec7c86fa783ef47ULL, 0x26ec449fbac9fbc4ULL,
47701 ++ 0x5c0f38cba09b9e7dULL, 0x81168cc762a3478cULL, 0x3e23b0d306fc121cULL, 0x5a238aa0a5efdcddULL,
47702 ++ 0x1ba26121c4ea43ffULL, 0x36f8c77f7c8832b5ULL, 0x88fbea0b0adcf99aULL, 0x5ca9938ec25bebf9ULL,
47703 ++ 0xd5436a5e51fccda0ULL, 0x1dbc4797c2cd893bULL, 0x19346a65d3224a08ULL, 0x0f5034e49b9af466ULL,
47704 ++ 0xf23c3967a1e0b96eULL, 0xe58b08fa867a4d88ULL, 0xfb2fabc6a7341679ULL, 0x2a75381eb6026946ULL,
47705 ++ 0xc80a3be4c19420acULL, 0x66b1f6c681f2b6dcULL, 0x7cf7036761e93388ULL, 0x25abbbd8a660a4c4ULL,
47706 ++ 0x91ea12ba14fd5198ULL, 0x684950fc4a3cffa9ULL, 0xf826842130f5ad28ULL, 0x3ea988f75301a441ULL,
47707 ++ 0xc978109a695f8c6fULL, 0x1746eb4a0530c3f3ULL, 0x444d6d77b4459995ULL, 0x75952b8c054e5cc7ULL,
47708 ++ 0xa3703f7915f4d6aaULL, 0x66c346202f2647d8ULL, 0xd01469df811d644bULL, 0x77fea47d81a5d71fULL,
47709 ++ 0xc5e9529ef57ca381ULL, 0x6eeeb4b9ce2f881aULL, 0xb6e91a28e8009bd6ULL, 0x4b80be3e9afc3fecULL,
47710 ++ 0x7e3773c526aed2c5ULL, 0x1b4afcb453c9a49dULL, 0xa920bdd7baffb24dULL, 0x7c54699f122d400eULL,
47711 ++ 0xef46c8e14fa94bc8ULL, 0xe0b074ce2952ed5eULL, 0xbea450e1dbd885d5ULL, 0x61b68649320f712cULL,
47712 ++ 0x8a485f7309ccbdd1ULL, 0xbd06320d7d4d1a2dULL, 0x25232973322dbef4ULL, 0x445dc4758c17f770ULL,
47713 ++ 0xdb0434177cc8933cULL, 0xed6fe82175ea059fULL, 0x1efebefdc053db34ULL, 0x4adbe867c65daf99ULL,
47714 ++ 0x3acd71a2a90609dfULL, 0xe5e991856dd04050ULL, 0x1ec69b688157c23cULL, 0x697427f6885cfe4dULL,
47715 ++ 0xd7be7b9b65e1a851ULL, 0xa03d28d522c536ddULL, 0x28399d658fd2b645ULL, 0x49e5b7e17c2641e1ULL,
47716 ++ 0x6f8c3a98700457a4ULL, 0x5078f0a25ebb6778ULL, 0xd13c3ccbc382960fULL, 0x2e003258a7df84b1ULL,
47717 ++ 0x8ad1f39be6296a1cULL, 0xc1eeaa652a5fbfb2ULL, 0x33ee0673fd26f3cbULL, 0x59256173a69d2cccULL,
47718 ++ 0x41ea07aa4e18fc41ULL, 0xd9fc19527c87a51eULL, 0xbdaacb805831ca6fULL, 0x445b652dc916694fULL,
47719 ++ 0xce92a3a7f2172315ULL, 0x1edc282de11b9964ULL, 0xa1823aafe04c314aULL, 0x790a2d94437cf586ULL,
47720 ++ 0x71c447fb93f6e009ULL, 0x8922a56722845276ULL, 0xbf70903b204f5169ULL, 0x2f7a89891ba319feULL,
47721 ++ 0x02a08eb577e2140cULL, 0xed9a4ed4427bdcf4ULL, 0x5253ec44e4323cd1ULL, 0x3e88363c14e9355bULL,
47722 ++ 0xaa66c14277110b8cULL, 0x1ae0391610a23390ULL, 0x2030bd12c93fc2a2ULL, 0x3ee141579555c7abULL,
47723 ++ 0x9214de3a6d6e7d41ULL, 0x3ccdd88607f17efeULL, 0x674f1288f8e11217ULL, 0x5682250f329f93d0ULL,
47724 ++ 0x6cf00b136d2e396eULL, 0x6e4cf86f1014debfULL, 0x5930b1b5bfcc4e83ULL, 0x047069b48aba16b6ULL,
47725 ++ 0x0d4ce4ab69b20793ULL, 0xb24db91a97d0fb9eULL, 0xcdfa50f54e00d01dULL, 0x221b1085368bddb5ULL,
47726 ++ 0xe7e59468b1e3d8d2ULL, 0x53c56563bd122f93ULL, 0xeee8a903e0663f09ULL, 0x61efa662cbbe3d42ULL,
47727 ++ 0x2cf8ddddde6eab2aULL, 0x9bf80ad51435f231ULL, 0x5deadacec9f04973ULL, 0x29275b5d41d29b27ULL,
47728 ++ 0xcfde0f0895ebf14fULL, 0xb9aab96b054905a7ULL, 0xcae80dd9a1c420fdULL, 0x0a63bf2f1673bbc7ULL,
47729 ++ 0x092f6e11958fbc8cULL, 0x672a81e804822fadULL, 0xcac8351560d52517ULL, 0x6f3f7722c8f192f8ULL,
47730 ++ 0xf8ba90ccc2e894b7ULL, 0x2c7557a438ff9f0dULL, 0x894d1d855ae52359ULL, 0x68e122157b743d69ULL,
47731 ++ 0xd87e5570cfb919f3ULL, 0x3f2cdecd95798db9ULL, 0x2121154710c0a2ceULL, 0x3c66a115246dc5b2ULL,
47732 ++ 0xcbedc562294ecb72ULL, 0xba7143c36a280b16ULL, 0x9610c2efd4078b67ULL, 0x6144735d946a4b1eULL,
47733 ++ 0x536f111ed75b3350ULL, 0x0211db8c2041d81bULL, 0xf93cb1000e10413cULL, 0x149dfd3c039e8876ULL,
47734 ++ 0xd479dde46b63155bULL, 0xb66e15e93c837976ULL, 0xdafde43b1f13e038ULL, 0x5fafda1a2e4b0b35ULL,
47735 ++ 0x3600bbdf17197581ULL, 0x3972050bbe3cd2c2ULL, 0x5938906dbdd5be86ULL, 0x34fce5e43f9b860fULL,
47736 ++ 0x75a8a4cd42d14d02ULL, 0x828dabc53441df65ULL, 0x33dcabedd2e131d3ULL, 0x3ebad76fb814d25fULL,
47737 ++ 0xd4906f566f70e10fULL, 0x5d12f7aa51690f5aULL, 0x45adb16e76cefcf2ULL, 0x01f768aead232999ULL,
47738 ++ 0x2b6cc77b6248febdULL, 0x3cd30628ec3aaffdULL, 0xce1c0b80d4ef486aULL, 0x4c3bff2ea6f66c23ULL,
47739 ++ 0x3f2ec4094aeaeb5fULL, 0x61b19b286e372ca7ULL, 0x5eefa966de2a701dULL, 0x23b20565de55e3efULL,
47740 ++ 0xe301ca5279d58557ULL, 0x07b2d4ce27c2874fULL, 0xa532cd8a9dcf1d67ULL, 0x2a52fee23f2bff56ULL,
47741 ++ 0x8624efb37cd8663dULL, 0xbbc7ac20ffbd7594ULL, 0x57b85e9c82d37445ULL, 0x7b3052cb86a6ec66ULL,
47742 ++ 0x3482f0ad2525e91eULL, 0x2cb68043d28edca0ULL, 0xaf4f6d052e1b003aULL, 0x185f8c2529781b0aULL,
47743 ++ 0xaa41de5bd80ce0d6ULL, 0x9407b2416853e9d6ULL, 0x563ec36e357f4c3aULL, 0x4cc4b8dd0e297bceULL,
47744 ++ 0xa2fc1a52ffb8730eULL, 0x1811f16e67058e37ULL, 0x10f9a366cddf4ee1ULL, 0x72f4a0c4a0b9f099ULL,
47745 ++ 0x8c16c06f663f4ea7ULL, 0x693b3af74e970fbaULL, 0x2102e7f1d69ec345ULL, 0x0ba53cbc968a8089ULL,
47746 ++ 0xca3d9dc7fea15537ULL, 0x4c6824bb51536493ULL, 0xb9886314844006b1ULL, 0x40d2a72ab454cc60ULL,
47747 ++ 0x5936a1b712570975ULL, 0x91b9d648debda657ULL, 0x3344094bb64330eaULL, 0x006ba10d12ee51d0ULL,
47748 ++ 0x19228468f5de5d58ULL, 0x0eb12f4c38cc05b0ULL, 0xa1039f9dd5601990ULL, 0x4502d4ce4fff0e0bULL,
47749 ++ 0xeb2054106837c189ULL, 0xd0f6544c6dd3b93cULL, 0x40727064c416d74fULL, 0x6e15c6114b502ef0ULL,
47750 ++ 0x4df2a398cfb1a76bULL, 0x11256c7419f2f6b1ULL, 0x4a497962066e6043ULL, 0x705b3aab41355b44ULL,
47751 ++ 0x365ef536d797b1d8ULL, 0x00076bd622ddf0dbULL, 0x3bbf33b0e0575a88ULL, 0x3777aa05c8e4ca4dULL,
47752 ++ 0x392745c85578db5fULL, 0x6fda4149dbae5ae2ULL, 0xb1f0b00b8adc9867ULL, 0x09963437d36f1da3ULL,
47753 ++ 0x7e824e90a5dc3853ULL, 0xccb5f6641f135cbdULL, 0x6736d86c87ce8fccULL, 0x625f3ce26604249fULL,
47754 ++ 0xaf8ac8059502f63fULL, 0x0c05e70a2e351469ULL, 0x35292e9c764b6305ULL, 0x1a394360c7e23ac3ULL,
47755 ++ 0xd5c6d53251183264ULL, 0x62065abd43c2b74fULL, 0xb5fbf5d03b973f9bULL, 0x13a3da3661206e5eULL,
47756 ++ 0xc6bd5837725d94e5ULL, 0x18e30912205016c5ULL, 0x2088ce1570033c68ULL, 0x7fba1f495c837987ULL,
47757 ++ 0x5a8c7423f2f9079dULL, 0x1735157b34023fc5ULL, 0xe4f9b49ad2fab351ULL, 0x6691ff72c878e33cULL,
47758 ++ 0x122c2adedc5eff3eULL, 0xf8dd4bf1d8956cf4ULL, 0xeb86205d9e9e5bdaULL, 0x049b92b9d975c743ULL,
47759 ++ 0xa5379730b0f6c05aULL, 0x72a0ffacc6f3a553ULL, 0xb0032c34b20dcd6dULL, 0x470e9dbc88d5164aULL,
47760 ++ 0xb19cf10ca237c047ULL, 0xb65466711f6c81a2ULL, 0xb3321bd16dd80b43ULL, 0x48c14f600c5fbe8eULL,
47761 ++ 0x66451c264aa6c803ULL, 0xb66e3904a4fa7da6ULL, 0xd45f19b0b3128395ULL, 0x31602627c3c9bc10ULL,
47762 ++ 0x3120dc4832e4e10dULL, 0xeb20c46756c717f7ULL, 0x00f52e3f67280294ULL, 0x566d4fc14730c509ULL,
47763 ++ 0x7e3a5d40fd837206ULL, 0xc1e926dc7159547aULL, 0x216730fba68d6095ULL, 0x22e8c3843f69cea7ULL,
47764 ++ 0x33d074e8930e4b2bULL, 0xb6e4350e84d15816ULL, 0x5534c26ad6ba2365ULL, 0x7773c12f89f1f3f3ULL,
47765 ++ 0x8cba404da57962aaULL, 0x5b9897a81999ce56ULL, 0x508e862f121692fcULL, 0x3a81907fa093c291ULL,
47766 ++ 0x0dded0ff4725a510ULL, 0x10d8cc10673fc503ULL, 0x5b9d151c9f1f4e89ULL, 0x32a5c1d5cb09a44cULL,
47767 ++ 0x1e0aa442b90541fbULL, 0x5f85eb7cc1b485dbULL, 0xbee595ce8a9df2e5ULL, 0x25e496c722422236ULL,
47768 ++ 0x5edf3c46cd0fe5b9ULL, 0x34e75a7ed2a43388ULL, 0xe488de11d761e352ULL, 0x0e878a01a085545cULL,
47769 ++ 0xba493c77e021bb04ULL, 0x2b4d1843c7df899aULL, 0x9ea37a487ae80d67ULL, 0x67a9958011e41794ULL,
47770 ++ 0x4b58051a6697b065ULL, 0x47e33f7d8d6ba6d4ULL, 0xbb4da8d483ca46c1ULL, 0x68becaa181c2db0dULL,
47771 ++ 0x8d8980e90b989aa5ULL, 0xf95eb14a2c93c99bULL, 0x51c6c7c4796e73a2ULL, 0x6e228363b5efb569ULL,
47772 ++ 0xc6bbc0b02dd624c8ULL, 0x777eb47dec8170eeULL, 0x3cde15a004cfafa9ULL, 0x1dc6bc087160bf9bULL,
47773 ++ 0x2e07e043eec34002ULL, 0x18e9fc677a68dc7fULL, 0xd8da03188bd15b9aULL, 0x48fbc3bb00568253ULL,
47774 ++ 0x57547d4cfb654ce1ULL, 0xd3565b82a058e2adULL, 0xf63eaf0bbf154478ULL, 0x47531ef114dfbb18ULL,
47775 ++ 0xe1ec630a4278c587ULL, 0x5507d546ca8e83f3ULL, 0x85e135c63adc0c2bULL, 0x0aa7efa85682844eULL,
47776 ++ 0x72691ba8b3e1f615ULL, 0x32b4e9701fbe3ffaULL, 0x97b6d92e39bb7868ULL, 0x2cfe53dea02e39e8ULL,
47777 ++ 0x687392cd85cd52b0ULL, 0x27ff66c910e29831ULL, 0x97134556a9832d06ULL, 0x269bb0360a84f8a0ULL,
47778 ++ 0x706e55457643f85cULL, 0x3734a48c9b597d1bULL, 0x7aee91e8c6efa472ULL, 0x5cd6abc198a9d9e0ULL,
47779 ++ 0x0e04de06cb3ce41aULL, 0xd8c6eb893402e138ULL, 0x904659bb686e3772ULL, 0x7215c371746ba8c8ULL,
47780 ++ 0xfd12a97eeae4a2d9ULL, 0x9514b7516394f2c5ULL, 0x266fd5809208f294ULL, 0x5c847085619a26b9ULL,
47781 ++ 0x52985410fed694eaULL, 0x3c905b934a2ed254ULL, 0x10bb47692d3be467ULL, 0x063b3d2d69e5e9e1ULL,
47782 ++ 0x472726eedda57debULL, 0xefb6c4ae10f41891ULL, 0x2b1641917b307614ULL, 0x117c554fc4f45b7cULL,
47783 ++ 0xc07cf3118f9d8812ULL, 0x01dbd82050017939ULL, 0xd7e803f4171b2827ULL, 0x1015e87487d225eaULL,
47784 ++ 0xc58de3fed23acc4dULL, 0x50db91c294a7be2dULL, 0x0b94d43d1c9cf457ULL, 0x6b1640fa6e37524aULL,
47785 ++ 0x692f346c5fda0d09ULL, 0x200b1c59fa4d3151ULL, 0xb8c46f760777a296ULL, 0x4b38395f3ffdfbcfULL,
47786 ++ 0x18d25e00be54d671ULL, 0x60d50582bec8aba6ULL, 0x87ad8f263b78b982ULL, 0x50fdf64e9cda0432ULL,
47787 ++ 0x90f567aac578dcf0ULL, 0xef1e9b0ef2a3133bULL, 0x0eebba9242d9de71ULL, 0x15473c9bf03101c7ULL,
47788 ++ 0x7c77e8ae56b78095ULL, 0xb678e7666e6f078eULL, 0x2da0b9615348ba1fULL, 0x7cf931c1ff733f0bULL,
47789 ++ 0x26b357f50a0a366cULL, 0xe9708cf42b87d732ULL, 0xc13aeea5f91cb2c0ULL, 0x35d90c991143bb4cULL,
47790 ++ 0x47c1c404a9a0d9dcULL, 0x659e58451972d251ULL, 0x3875a8c473b38c31ULL, 0x1fbd9ed379561f24ULL,
47791 ++ 0x11fabc6fd41ec28dULL, 0x7ef8dfe3cd2a2dcaULL, 0x72e73b5d8c404595ULL, 0x6135fa4954b72f27ULL,
47792 ++ 0xccfc32a2de24b69cULL, 0x3f55698c1f095d88ULL, 0xbe3350ed5ac3f929ULL, 0x5e9bf806ca477eebULL,
47793 ++ 0xe9ce8fb63c309f68ULL, 0x5376f63565e1f9f4ULL, 0xd1afcfb35a6393f1ULL, 0x6632a1ede5623506ULL,
47794 ++ 0x0b7d6c390c2ded4cULL, 0x56cb3281df04cb1fULL, 0x66305a1249ecc3c7ULL, 0x5d588b60a38ca72aULL,
47795 ++ 0xa6ecbf78e8e5f42dULL, 0x86eeb44b3c8a3eecULL, 0xec219c48fbd21604ULL, 0x1aaf1af517c36731ULL,
47796 ++ 0xc306a2836769bde7ULL, 0x208280622b1e2adbULL, 0x8027f51ffbff94a6ULL, 0x76cfa1ce1124f26bULL,
47797 ++ 0x18eb00562422abb6ULL, 0xf377c4d58f8c29c3ULL, 0x4dbbc207f531561aULL, 0x0253b7f082128a27ULL,
47798 ++ 0x3d1f091cb62c17e0ULL, 0x4860e1abd64628a9ULL, 0x52d17436309d4253ULL, 0x356f97e13efae576ULL,
47799 ++ 0xd351e11aa150535bULL, 0x3e6b45bb1dd878ccULL, 0x0c776128bed92c98ULL, 0x1d34ae93032885b8ULL,
47800 ++ 0x4ba0488ca85ba4c3ULL, 0x985348c33c9ce6ceULL, 0x66124c6f97bda770ULL, 0x0f81a0290654124aULL,
47801 ++ 0x9ed09ca6569b86fdULL, 0x811009fd18af9a2dULL, 0xff08d03f93d8c20aULL, 0x52a148199faef26bULL,
47802 ++ 0x3e03f9dc2d8d1b73ULL, 0x4205801873961a70ULL, 0xc0d987f041a35970ULL, 0x07aa1f15a1c0d549ULL,
47803 ++ 0xdfd46ce08cd27224ULL, 0x6d0a024f934e4239ULL, 0x808a7a6399897b59ULL, 0x0a4556e9e13d95a2ULL,
47804 ++ 0xd21a991fe9c13045ULL, 0x9b0e8548fe7751b8ULL, 0x5da643cb4bf30035ULL, 0x77db28d63940f721ULL,
47805 ++ 0xfc5eeb614adc9011ULL, 0x5229419ae8c411ebULL, 0x9ec3e7787d1dcf74ULL, 0x340d053e216e4cb5ULL,
47806 ++ 0xcac7af39b48df2b4ULL, 0xc0faec2871a10a94ULL, 0x140a69245ca575edULL, 0x0cf1c37134273a4cULL,
47807 ++ 0xc8ee306ac224b8a5ULL, 0x57eaee7ccb4930b0ULL, 0xa1e806bdaacbe74fULL, 0x7d9a62742eeb657dULL,
47808 ++ 0x9eb6b6ef546c4830ULL, 0x885cca1fddb36e2eULL, 0xe6b9f383ef0d7105ULL, 0x58654fef9d2e0412ULL,
47809 ++ 0xa905c4ffbe0e8e26ULL, 0x942de5df9b31816eULL, 0x497d723f802e88e1ULL, 0x30684dea602f408dULL,
47810 ++ 0x21e5a278a3e6cb34ULL, 0xaefb6e6f5b151dc4ULL, 0xb30b8e049d77ca15ULL, 0x28c3c9cf53b98981ULL,
47811 ++ 0x287fb721556cdd2aULL, 0x0d317ca897022274ULL, 0x7468c7423a543258ULL, 0x4a7f11464eb5642fULL,
47812 ++ 0xa237a4774d193aa6ULL, 0xd865986ea92129a1ULL, 0x24c515ecf87c1a88ULL, 0x604003575f39f5ebULL,
47813 ++ 0x47b9f189570a9b27ULL, 0x2b98cede465e4b78ULL, 0x026df551dbb85c20ULL, 0x74fcd91047e21901ULL,
47814 ++ 0x13e2a90a23c1bfa3ULL, 0x0cb0074e478519f6ULL, 0x5ff1cbbe3af6cf44ULL, 0x67fe5438be812dbeULL,
47815 ++ 0xd13cf64fa40f05b0ULL, 0x054dfb2f32283787ULL, 0x4173915b7f0d2aeaULL, 0x482f144f1f610d4eULL,
47816 ++ 0xf6210201b47f8234ULL, 0x5d0ae1929e70b990ULL, 0xdcd7f455b049567cULL, 0x7e93d0f1f0916f01ULL,
47817 ++ 0xdd79cbf18a7db4faULL, 0xbe8391bf6f74c62fULL, 0x027145d14b8291bdULL, 0x585a73ea2cbf1705ULL,
47818 ++ 0x485ca03e928a0db2ULL, 0x10fc01a5742857e7ULL, 0x2f482edbd6d551a7ULL, 0x0f0433b5048fdb8aULL,
47819 ++ 0x60da2e8dd7dc6247ULL, 0x88b4c9d38cd4819aULL, 0x13033ac001f66697ULL, 0x273b24fe3b367d75ULL,
47820 ++ 0xc6e8f66a31b3b9d4ULL, 0x281514a494df49d5ULL, 0xd1726fdfc8b23da7ULL, 0x4b3ae7d103dee548ULL,
47821 ++ 0xc6256e19ce4b9d7eULL, 0xff5c5cf186e3c61cULL, 0xacc63ca34b8ec145ULL, 0x74621888fee66574ULL,
47822 ++ 0x956f409645290a1eULL, 0xef0bf8e3263a962eULL, 0xed6a50eb5ec2647bULL, 0x0694283a9dca7502ULL,
47823 ++ 0x769b963643a2dcd1ULL, 0x42b7c8ea09fc5353ULL, 0x4f002aee13397eabULL, 0x63005e2c19b7d63aULL,
47824 ++ 0xca6736da63023beaULL, 0x966c7f6db12a99b7ULL, 0xace09390c537c5e1ULL, 0x0b696063a1aa89eeULL,
47825 ++ 0xebb03e97288c56e5ULL, 0x432a9f9f938c8be8ULL, 0xa6a5a93d5b717f71ULL, 0x1a5fb4c3e18f9d97ULL,
47826 ++ 0x1c94e7ad1c60cdceULL, 0xee202a43fc02c4a0ULL, 0x8dafe4d867c46a20ULL, 0x0a10263c8ac27b58ULL,
47827 ++ 0xd0dea9dfe4432a4aULL, 0x856af87bbe9277c5ULL, 0xce8472acc212c71aULL, 0x6f151b6d9bbb1e91ULL,
47828 ++ 0x26776c527ceed56aULL, 0x7d211cb7fbf8faecULL, 0x37ae66a6fd4609ccULL, 0x1f81b702d2770c42ULL,
47829 ++ 0x2fb0b057eac58392ULL, 0xe1dd89fe29744e9dULL, 0xc964f8eb17beb4f8ULL, 0x29571073c9a2d41eULL,
47830 ++ 0xa948a18981c0e254ULL, 0x2df6369b65b22830ULL, 0xa33eb2d75fcfd3c6ULL, 0x078cd6ec4199a01fULL,
47831 ++ 0x4a584a41ad900d2fULL, 0x32142b78e2c74c52ULL, 0x68c4e8338431c978ULL, 0x7f69ea9008689fc2ULL,
47832 ++ 0x52f2c81e46a38265ULL, 0xfd78072d04a832fdULL, 0x8cd7d5fa25359e94ULL, 0x4de71b7454cc29d2ULL,
47833 ++ 0x42eb60ad1eda6ac9ULL, 0x0aad37dfdbc09c3aULL, 0x81004b71e33cc191ULL, 0x44e6be345122803cULL,
47834 ++ 0x03fe8388ba1920dbULL, 0xf5d57c32150db008ULL, 0x49c8c4281af60c29ULL, 0x21edb518de701aeeULL,
47835 ++ 0x7fb63e418f06dc99ULL, 0xa4460d99c166d7b8ULL, 0x24dd5248ce520a83ULL, 0x5ec3ad712b928358ULL,
47836 ++ 0x15022a5fbd17930fULL, 0xa4f64a77d82570e3ULL, 0x12bc8d6915783712ULL, 0x498194c0fc620abbULL,
47837 ++ 0x38a2d9d255686c82ULL, 0x785c6bd9193e21f0ULL, 0xe4d5c81ab24a5484ULL, 0x56307860b2e20989ULL,
47838 ++ 0x429d55f78b4d74c4ULL, 0x22f1834643350131ULL, 0x1e60c24598c71fffULL, 0x59f2f014979983efULL,
47839 ++ 0x46a47d56eb494a44ULL, 0x3e22a854d636a18eULL, 0xb346e15274491c3bULL, 0x2ceafd4e5390cde7ULL,
47840 ++ 0xba8a8538be0d6675ULL, 0x4b9074bb50818e23ULL, 0xcbdab89085d304c3ULL, 0x61a24fe0e56192c4ULL,
47841 ++ 0xcb7615e6db525bcbULL, 0xdd7d8c35a567e4caULL, 0xe6b4153acafcdd69ULL, 0x2d668e097f3c9766ULL,
47842 ++ 0xa57e7e265ce55ef0ULL, 0x5d9f4e527cd4b967ULL, 0xfbc83606492fd1e5ULL, 0x090d52beb7c3f7aeULL,
47843 ++ 0x09b9515a1e7b4d7cULL, 0x1f266a2599da44c0ULL, 0xa1c49548e2c55504ULL, 0x7ef04287126f15ccULL,
47844 ++ 0xfed1659dbd30ef15ULL, 0x8b4ab9eec4e0277bULL, 0x884d6236a5df3291ULL, 0x1fd96ea6bf5cf788ULL,
47845 ++ 0x42a161981f190d9aULL, 0x61d849507e6052c1ULL, 0x9fe113bf285a2cd5ULL, 0x7c22d676dbad85d8ULL,
47846 ++ 0x82e770ed2bfbd27dULL, 0x4c05b2ece996f5a5ULL, 0xcd40a9c2b0900150ULL, 0x5895319213d9bf64ULL,
47847 ++ 0xe7cc5d703fea2e08ULL, 0xb50c491258e2188cULL, 0xcce30baa48205bf0ULL, 0x537c659ccfa32d62ULL,
47848 ++ 0x37b6623a98cfc088ULL, 0xfe9bed1fa4d6aca4ULL, 0x04d29b8e56a8d1b0ULL, 0x725f71c40b519575ULL,
47849 ++ 0x28c7f89cd0339ce6ULL, 0x8367b14469ddc18bULL, 0x883ada83a6a1652cULL, 0x585f1974034d6c17ULL,
47850 ++ 0x89cfb266f1b19188ULL, 0xe63b4863e7c35217ULL, 0xd88c9da6b4c0526aULL, 0x3e035c9df0954635ULL,
47851 ++ 0xdd9d5412fb45de9dULL, 0xdd684532e4cff40dULL, 0x4b5c999b151d671cULL, 0x2d8c2cc811e7f690ULL,
47852 ++ 0x7f54be1d90055d40ULL, 0xa464c5df464aaf40ULL, 0x33979624f0e917beULL, 0x2c018dc527356b30ULL,
47853 ++ 0xa5415024e330b3d4ULL, 0x73ff3d96691652d3ULL, 0x94ec42c4ef9b59f1ULL, 0x0747201618d08e5aULL,
47854 ++ 0x4d6ca48aca411c53ULL, 0x66415f2fcfa66119ULL, 0x9c4dd40051e227ffULL, 0x59810bc09a02f7ebULL,
47855 ++ 0x2a7eb171b3dc101dULL, 0x441c5ab99ffef68eULL, 0x32025c9b93b359eaULL, 0x5e8ce0a71e9d112fULL,
47856 ++ 0xbfcccb92429503fdULL, 0xd271ba752f095d55ULL, 0x345ead5e972d091eULL, 0x18c8df11a83103baULL,
47857 ++ 0x90cd949a9aed0f4cULL, 0xc5d1f4cb6660e37eULL, 0xb8cac52d56c52e0bULL, 0x6e42e400c5808e0dULL,
47858 ++ 0xa3b46966eeaefd23ULL, 0x0c4f1f0be39ecdcaULL, 0x189dc8c9d683a51dULL, 0x51f27f054c09351bULL,
47859 ++ 0x4c487ccd2a320682ULL, 0x587ea95bb3df1c96ULL, 0xc8ccf79e555cb8e8ULL, 0x547dc829a206d73dULL,
47860 ++ 0xb822a6cd80c39b06ULL, 0xe96d54732000d4c6ULL, 0x28535b6f91463b4dULL, 0x228f4660e2486e1dULL,
47861 ++ 0x98799538de8d3abfULL, 0x8cd8330045ebca6eULL, 0x79952a008221e738ULL, 0x4322e1a7535cd2bbULL,
47862 ++ 0xb114c11819d1801cULL, 0x2016e4d84f3f5ec7ULL, 0xdd0e2df409260f4cULL, 0x5ec362c0ae5f7266ULL,
47863 ++ 0xc0462b18b8b2b4eeULL, 0x7cc8d950274d1afbULL, 0xf25f7105436b02d2ULL, 0x43bbf8dcbff9ccd3ULL,
47864 ++ 0xb6ad1767a039e9dfULL, 0xb0714da8f69d3583ULL, 0x5e55fa18b42931f5ULL, 0x4ed5558f33c60961ULL,
47865 ++ 0x1fe37901c647a5ddULL, 0x593ddf1f8081d357ULL, 0x0249a4fd813fd7a6ULL, 0x69acca274e9caf61ULL,
47866 ++ 0x047ba3ea330721c9ULL, 0x83423fc20e7e1ea0ULL, 0x1df4c0af01314a60ULL, 0x09a62dab89289527ULL,
47867 ++ 0xa5b325a49cc6cb00ULL, 0xe94b5dc654b56cb6ULL, 0x3be28779adc994a0ULL, 0x4296e8f8ba3a4aadULL,
47868 ++ 0x328689761e451eabULL, 0x2e4d598bff59594aULL, 0x49b96853d7a7084aULL, 0x4980a319601420a8ULL,
47869 ++ 0x9565b9e12f552c42ULL, 0x8a5318db7100fe96ULL, 0x05c90b4d43add0d7ULL, 0x538b4cd66a5d4edaULL,
47870 ++ 0xf4e94fc3e89f039fULL, 0x592c9af26f618045ULL, 0x08a36eb5fd4b9550ULL, 0x25fffaf6c2ed1419ULL,
47871 ++ 0x34434459cc79d354ULL, 0xeeecbfb4b1d5476bULL, 0xddeb34a061615d99ULL, 0x5129cecceb64b773ULL,
47872 ++ 0xee43215894993520ULL, 0x772f9c7cf14c0b3bULL, 0xd2e2fce306bedad5ULL, 0x715f42b546f06a97ULL,
47873 ++ 0x434ecdceda5b5f1aULL, 0x0da17115a49741a9ULL, 0x680bd77c73edad2eULL, 0x487c02354edd9041ULL,
47874 ++ 0xb8efeff3a70ed9c4ULL, 0x56a32aa3e857e302ULL, 0xdf3a68bd48a2a5a0ULL, 0x07f650b73176c444ULL,
47875 ++ 0xe38b9b1626e0ccb1ULL, 0x79e053c18b09fb36ULL, 0x56d90319c9f94964ULL, 0x1ca941e7ac9ff5c4ULL,
47876 ++ 0x49c4df29162fa0bbULL, 0x8488cf3282b33305ULL, 0x95dfda14cabb437dULL, 0x3391f78264d5ad86ULL,
47877 ++ 0x729ae06ae2b5095dULL, 0xd58a58d73259a946ULL, 0xe9834262d13921edULL, 0x27fedafaa54bb592ULL,
47878 ++ 0xa99dc5b829ad48bbULL, 0x5f025742499ee260ULL, 0x802c8ecd5d7513fdULL, 0x78ceb3ef3f6dd938ULL,
47879 ++ 0xc342f44f8a135d94ULL, 0x7b9edb44828cdda3ULL, 0x9436d11a0537cfe7ULL, 0x5064b164ec1ab4c8ULL,
47880 ++ 0x7020eccfd37eb2fcULL, 0x1f31ea3ed90d25fcULL, 0x1b930d7bdfa1bb34ULL, 0x5344467a48113044ULL,
47881 ++ 0x70073170f25e6dfbULL, 0xe385dc1a50114cc8ULL, 0x2348698ac8fc4f00ULL, 0x2a77a55284dd40d8ULL,
47882 ++ 0xfe06afe0c98c6ce4ULL, 0xc235df96dddfd6e4ULL, 0x1428d01e33bf1ed3ULL, 0x785768ec9300bdafULL,
47883 ++ 0x9702e57a91deb63bULL, 0x61bdb8bfe5ce8b80ULL, 0x645b426f3d1d58acULL, 0x4804a82227a557bcULL,
47884 ++ 0x8e57048ab44d2601ULL, 0x68d6501a4b3a6935ULL, 0xc39c9ec3f9e1c293ULL, 0x4172f257d4de63e2ULL,
47885 ++ 0xd368b450330c6401ULL, 0x040d3017418f2391ULL, 0x2c34bb6090b7d90dULL, 0x16f649228fdfd51fULL,
47886 ++ 0xbea6818e2b928ef5ULL, 0xe28ccf91cdc11e72ULL, 0x594aaa68e77a36cdULL, 0x313034806c7ffd0fULL,
47887 ++ 0x8a9d27ac2249bd65ULL, 0x19a3b464018e9512ULL, 0xc26ccff352b37ec7ULL, 0x056f68341d797b21ULL,
47888 ++ 0x5e79d6757efd2327ULL, 0xfabdbcb6553afe15ULL, 0xd3e7222c6eaf5a60ULL, 0x7046c76d4dae743bULL,
47889 ++ 0x660be872b18d4a55ULL, 0x19992518574e1496ULL, 0xc103053a302bdcbbULL, 0x3ed8e9800b218e8eULL,
47890 ++ 0x7b0b9239fa75e03eULL, 0xefe9fb684633c083ULL, 0x98a35fbe391a7793ULL, 0x6065510fe2d0fe34ULL,
47891 ++ 0x55cb668548abad0cULL, 0xb4584548da87e527ULL, 0x2c43ecea0107c1ddULL, 0x526028809372de35ULL,
47892 ++ 0x3415c56af9213b1fULL, 0x5bee1a4d017e98dbULL, 0x13f6b105b5cf709bULL, 0x5ff20e3482b29ab6ULL,
47893 ++ 0x0aa29c75cc2e6c90ULL, 0xfc7d73ca3a70e206ULL, 0x899fc38fc4b5c515ULL, 0x250386b124ffc207ULL,
47894 ++ 0x54ea28d5ae3d2b56ULL, 0x9913149dd6de60ceULL, 0x16694fc58f06d6c1ULL, 0x46b23975eb018fc7ULL,
47895 ++ 0x470a6a0fb4b7b4e2ULL, 0x5d92475a8f7253deULL, 0xabeee5b52fbd3adbULL, 0x7fa20801a0806968ULL,
47896 ++ 0x76f3faf19f7714d2ULL, 0xb3e840c12f4660c3ULL, 0x0fb4cd8df212744eULL, 0x4b065a251d3a2dd2ULL,
47897 ++ 0x5cebde383d77cd4aULL, 0x6adf39df882c9cb1ULL, 0xa2dd242eb09af759ULL, 0x3147c0e50e5f6422ULL,
47898 ++ 0x164ca5101d1350dbULL, 0xf8d13479c33fc962ULL, 0xe640ce4d13e5da08ULL, 0x4bdee0c45061f8baULL,
47899 ++ 0xd7c46dc1a4edb1c9ULL, 0x5514d7b6437fd98aULL, 0x58942f6bb2a1c00bULL, 0x2dffb2ab1d70710eULL,
47900 ++ 0xccdfcf2fc18b6d68ULL, 0xa8ebcba8b7806167ULL, 0x980697f95e2937e3ULL, 0x02fbba1cd0126e8cULL
47901 ++};
47902 +
47903 +-static void curve25519_bmi2_base(u8 session_key[CURVE25519_KEY_SIZE],
47904 +- const u8 private_key[CURVE25519_KEY_SIZE])
47905 ++static void curve25519_ever64_base(u8 *out, const u8 *priv)
47906 + {
47907 +- struct {
47908 +- u64 buffer[4 * NUM_WORDS_ELTFP25519];
47909 +- u64 coordinates[4 * NUM_WORDS_ELTFP25519];
47910 +- u64 workspace[4 * NUM_WORDS_ELTFP25519];
47911 +- u8 private[CURVE25519_KEY_SIZE];
47912 +- } __aligned(32) m;
47913 +-
47914 +- const int ite[4] = { 64, 64, 64, 63 };
47915 +- const int q = 3;
47916 + u64 swap = 1;
47917 +-
47918 +- int i = 0, j = 0, k = 0;
47919 +- u64 *const key = (u64 *)m.private;
47920 +- u64 *const Ur1 = m.coordinates + 0;
47921 +- u64 *const Zr1 = m.coordinates + 4;
47922 +- u64 *const Ur2 = m.coordinates + 8;
47923 +- u64 *const Zr2 = m.coordinates + 12;
47924 +-
47925 +- u64 *const UZr1 = m.coordinates + 0;
47926 +- u64 *const ZUr2 = m.coordinates + 8;
47927 +-
47928 +- u64 *const A = m.workspace + 0;
47929 +- u64 *const B = m.workspace + 4;
47930 +- u64 *const C = m.workspace + 8;
47931 +- u64 *const D = m.workspace + 12;
47932 +-
47933 +- u64 *const AB = m.workspace + 0;
47934 +- u64 *const CD = m.workspace + 8;
47935 +-
47936 +- const u64 *const P = table_ladder_8k;
47937 +-
47938 +- memcpy(m.private, private_key, sizeof(m.private));
47939 +-
47940 +- curve25519_clamp_secret(m.private);
47941 +-
47942 +- setzero_eltfp25519_1w(Ur1);
47943 +- setzero_eltfp25519_1w(Zr1);
47944 +- setzero_eltfp25519_1w(Zr2);
47945 +- Ur1[0] = 1;
47946 +- Zr1[0] = 1;
47947 +- Zr2[0] = 1;
47948 +-
47949 +- /* G-S */
47950 +- Ur2[3] = 0x1eaecdeee27cab34UL;
47951 +- Ur2[2] = 0xadc7a0b9235d48e2UL;
47952 +- Ur2[1] = 0xbbf095ae14b2edf8UL;
47953 +- Ur2[0] = 0x7e94e1fec82faabdUL;
47954 +-
47955 +- /* main-loop */
47956 +- j = q;
47957 +- for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) {
47958 +- while (j < ite[i]) {
47959 +- u64 bit = (key[i] >> j) & 0x1;
47960 +- k = (64 * i + j - q);
47961 ++ int i, j, k;
47962 ++ u64 tmp[16 + 32 + 4];
47963 ++ u64 *x1 = &tmp[0];
47964 ++ u64 *z1 = &tmp[4];
47965 ++ u64 *x2 = &tmp[8];
47966 ++ u64 *z2 = &tmp[12];
47967 ++ u64 *xz1 = &tmp[0];
47968 ++ u64 *xz2 = &tmp[8];
47969 ++ u64 *a = &tmp[0 + 16];
47970 ++ u64 *b = &tmp[4 + 16];
47971 ++ u64 *c = &tmp[8 + 16];
47972 ++ u64 *ab = &tmp[0 + 16];
47973 ++ u64 *abcd = &tmp[0 + 16];
47974 ++ u64 *ef = &tmp[16 + 16];
47975 ++ u64 *efgh = &tmp[16 + 16];
47976 ++ u64 *key = &tmp[0 + 16 + 32];
47977 ++
47978 ++ memcpy(key, priv, 32);
47979 ++ ((u8 *)key)[0] &= 248;
47980 ++ ((u8 *)key)[31] = (((u8 *)key)[31] & 127) | 64;
47981 ++
47982 ++ x1[0] = 1, x1[1] = x1[2] = x1[3] = 0;
47983 ++ z1[0] = 1, z1[1] = z1[2] = z1[3] = 0;
47984 ++ z2[0] = 1, z2[1] = z2[2] = z2[3] = 0;
47985 ++ memcpy(x2, p_minus_s, sizeof(p_minus_s));
47986 ++
47987 ++ j = 3;
47988 ++ for (i = 0; i < 4; ++i) {
47989 ++ while (j < (const int[]){ 64, 64, 64, 63 }[i]) {
47990 ++ u64 bit = (key[i] >> j) & 1;
47991 ++ k = (64 * i + j - 3);
47992 + swap = swap ^ bit;
47993 +- cswap(swap, Ur1, Ur2);
47994 +- cswap(swap, Zr1, Zr2);
47995 ++ cswap2(swap, xz1, xz2);
47996 + swap = bit;
47997 +- /* Addition */
47998 +- sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */
47999 +- add_eltfp25519_1w_bmi2(A, Ur1, Zr1); /* A = Ur1+Zr1 */
48000 +- mul_eltfp25519_1w_bmi2(C, &P[4 * k], B);/* C = M0-B */
48001 +- sub_eltfp25519_1w(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */
48002 +- add_eltfp25519_1w_bmi2(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */
48003 +- sqr_eltfp25519_2w_bmi2(AB); /* A = A^2 | B = B^2 */
48004 +- mul_eltfp25519_2w_bmi2(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */
48005 ++ fsub(b, x1, z1);
48006 ++ fadd(a, x1, z1);
48007 ++ fmul(c, &table_ladder[4 * k], b, ef);
48008 ++ fsub(b, a, c);
48009 ++ fadd(a, a, c);
48010 ++ fsqr2(ab, ab, efgh);
48011 ++ fmul2(xz1, xz2, ab, efgh);
48012 + ++j;
48013 + }
48014 + j = 0;
48015 + }
48016 +
48017 +- /* Doubling */
48018 +- for (i = 0; i < q; ++i) {
48019 +- add_eltfp25519_1w_bmi2(A, Ur1, Zr1); /* A = Ur1+Zr1 */
48020 +- sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */
48021 +- sqr_eltfp25519_2w_bmi2(AB); /* A = A**2 B = B**2 */
48022 +- copy_eltfp25519_1w(C, B); /* C = B */
48023 +- sub_eltfp25519_1w(B, A, B); /* B = A-B */
48024 +- mul_a24_eltfp25519_1w(D, B); /* D = my_a24*B */
48025 +- add_eltfp25519_1w_bmi2(D, D, C); /* D = D+C */
48026 +- mul_eltfp25519_2w_bmi2(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */
48027 +- }
48028 ++ point_double(xz1, abcd, efgh);
48029 ++ point_double(xz1, abcd, efgh);
48030 ++ point_double(xz1, abcd, efgh);
48031 ++ encode_point(out, xz1);
48032 +
48033 +- /* Convert to affine coordinates */
48034 +- inv_eltfp25519_1w_bmi2(A, Zr1);
48035 +- mul_eltfp25519_1w_bmi2((u64 *)session_key, Ur1, A);
48036 +- fred_eltfp25519_1w((u64 *)session_key);
48037 +-
48038 +- memzero_explicit(&m, sizeof(m));
48039 ++ memzero_explicit(tmp, sizeof(tmp));
48040 + }
48041 +
48042 ++static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2_adx);
48043 ++
48044 + void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE],
48045 + const u8 secret[CURVE25519_KEY_SIZE],
48046 + const u8 basepoint[CURVE25519_KEY_SIZE])
48047 + {
48048 +- if (static_branch_likely(&curve25519_use_adx))
48049 +- curve25519_adx(mypublic, secret, basepoint);
48050 +- else if (static_branch_likely(&curve25519_use_bmi2))
48051 +- curve25519_bmi2(mypublic, secret, basepoint);
48052 ++ if (static_branch_likely(&curve25519_use_bmi2_adx))
48053 ++ curve25519_ever64(mypublic, secret, basepoint);
48054 + else
48055 + curve25519_generic(mypublic, secret, basepoint);
48056 + }
48057 +@@ -2355,10 +1395,8 @@ EXPORT_SYMBOL(curve25519_arch);
48058 + void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
48059 + const u8 secret[CURVE25519_KEY_SIZE])
48060 + {
48061 +- if (static_branch_likely(&curve25519_use_adx))
48062 +- curve25519_adx_base(pub, secret);
48063 +- else if (static_branch_likely(&curve25519_use_bmi2))
48064 +- curve25519_bmi2_base(pub, secret);
48065 ++ if (static_branch_likely(&curve25519_use_bmi2_adx))
48066 ++ curve25519_ever64_base(pub, secret);
48067 + else
48068 + curve25519_generic(pub, secret, curve25519_base_point);
48069 + }
48070 +@@ -2449,12 +1487,11 @@ static struct kpp_alg curve25519_alg = {
48071 + .max_size = curve25519_max_size,
48072 + };
48073 +
48074 ++
48075 + static int __init curve25519_mod_init(void)
48076 + {
48077 +- if (boot_cpu_has(X86_FEATURE_BMI2))
48078 +- static_branch_enable(&curve25519_use_bmi2);
48079 +- else if (boot_cpu_has(X86_FEATURE_ADX))
48080 +- static_branch_enable(&curve25519_use_adx);
48081 ++ if (boot_cpu_has(X86_FEATURE_BMI2) && boot_cpu_has(X86_FEATURE_ADX))
48082 ++ static_branch_enable(&curve25519_use_bmi2_adx);
48083 + else
48084 + return 0;
48085 + return IS_REACHABLE(CONFIG_CRYPTO_KPP) ?
48086 +@@ -2474,3 +1511,4 @@ module_exit(curve25519_mod_exit);
48087 + MODULE_ALIAS_CRYPTO("curve25519");
48088 + MODULE_ALIAS_CRYPTO("curve25519-x86");
48089 + MODULE_LICENSE("GPL v2");
48090 ++MODULE_AUTHOR("Jason A. Donenfeld <Jason@×××××.com>");
48091 +--
48092 +cgit v1.2.3-4-ga26e
48093 +
48094 +
48095 +From eb740e786695b07e514c607be84a2a3c9379e59c Mon Sep 17 00:00:00 2001
48096 +From: "Jason A. Donenfeld" <Jason@×××××.com>
48097 +Date: Sun, 1 Mar 2020 16:06:56 +0800
48098 +Subject: crypto: x86/curve25519 - leave r12 as spare register
48099 +
48100 +commit dc7fc3a53ae158263196b1892b672aedf67796c5 upstream.
48101 +
48102 +This updates to the newer register selection proved by HACL*, which
48103 +leads to a more compact instruction encoding, and saves around 100
48104 +cycles.
48105 +
48106 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
48107 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
48108 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
48109 +---
48110 + arch/x86/crypto/curve25519-x86_64.c | 110 ++++++++++++++++++------------------
48111 + 1 file changed, 55 insertions(+), 55 deletions(-)
48112 +
48113 +diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c
48114 +index e4e58b8e9afe..8a17621f7d3a 100644
48115 +--- a/arch/x86/crypto/curve25519-x86_64.c
48116 ++++ b/arch/x86/crypto/curve25519-x86_64.c
48117 +@@ -167,28 +167,28 @@ static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
48118 + " movq 0(%1), %%rdx;"
48119 + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);"
48120 + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);"
48121 +- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;"
48122 ++ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
48123 + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
48124 + " adox %%rdx, %%rax;"
48125 + /* Compute src1[1] * src2 */
48126 + " movq 8(%1), %%rdx;"
48127 + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
48128 +- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 16(%0);"
48129 +- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
48130 ++ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);"
48131 ++ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
48132 + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
48133 + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
48134 + /* Compute src1[2] * src2 */
48135 + " movq 16(%1), %%rdx;"
48136 + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
48137 +- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 24(%0);"
48138 +- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
48139 ++ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);"
48140 ++ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
48141 + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
48142 + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
48143 + /* Compute src1[3] * src2 */
48144 + " movq 24(%1), %%rdx;"
48145 + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
48146 +- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 32(%0);"
48147 +- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 40(%0);" " mov $0, %%r8;"
48148 ++ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);"
48149 ++ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;"
48150 + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;"
48151 + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);"
48152 + /* Line up pointers */
48153 +@@ -202,11 +202,11 @@ static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
48154 + " mulxq 32(%1), %%r8, %%r13;"
48155 + " xor %3, %3;"
48156 + " adoxq 0(%1), %%r8;"
48157 +- " mulxq 40(%1), %%r9, %%r12;"
48158 ++ " mulxq 40(%1), %%r9, %%rbx;"
48159 + " adcx %%r13, %%r9;"
48160 + " adoxq 8(%1), %%r9;"
48161 + " mulxq 48(%1), %%r10, %%r13;"
48162 +- " adcx %%r12, %%r10;"
48163 ++ " adcx %%rbx, %%r10;"
48164 + " adoxq 16(%1), %%r10;"
48165 + " mulxq 56(%1), %%r11, %%rax;"
48166 + " adcx %%r13, %%r11;"
48167 +@@ -231,7 +231,7 @@ static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
48168 + " movq %%r8, 0(%0);"
48169 + : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2)
48170 + :
48171 +- : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "memory", "cc"
48172 ++ : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "memory", "cc"
48173 + );
48174 + }
48175 +
48176 +@@ -248,28 +248,28 @@ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
48177 + " movq 0(%1), %%rdx;"
48178 + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);"
48179 + " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);"
48180 +- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;"
48181 ++ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
48182 + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
48183 + " adox %%rdx, %%rax;"
48184 + /* Compute src1[1] * src2 */
48185 + " movq 8(%1), %%rdx;"
48186 + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);"
48187 +- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 16(%0);"
48188 +- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
48189 ++ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);"
48190 ++ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
48191 + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
48192 + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
48193 + /* Compute src1[2] * src2 */
48194 + " movq 16(%1), %%rdx;"
48195 + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);"
48196 +- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 24(%0);"
48197 +- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
48198 ++ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);"
48199 ++ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
48200 + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
48201 + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
48202 + /* Compute src1[3] * src2 */
48203 + " movq 24(%1), %%rdx;"
48204 + " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);"
48205 +- " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 32(%0);"
48206 +- " mulxq 16(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 40(%0);" " mov $0, %%r8;"
48207 ++ " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);"
48208 ++ " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;"
48209 + " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;"
48210 + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);"
48211 +
48212 +@@ -279,28 +279,28 @@ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
48213 + " movq 32(%1), %%rdx;"
48214 + " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 64(%0);"
48215 + " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 72(%0);"
48216 +- " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;"
48217 ++ " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;"
48218 + " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;"
48219 + " adox %%rdx, %%rax;"
48220 + /* Compute src1[1] * src2 */
48221 + " movq 40(%1), %%rdx;"
48222 + " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 72(%0), %%r8;" " movq %%r8, 72(%0);"
48223 +- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 80(%0);"
48224 +- " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
48225 ++ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 80(%0);"
48226 ++ " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
48227 + " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
48228 + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
48229 + /* Compute src1[2] * src2 */
48230 + " movq 48(%1), %%rdx;"
48231 + " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 80(%0), %%r8;" " movq %%r8, 80(%0);"
48232 +- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 88(%0);"
48233 +- " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " mov $0, %%r8;"
48234 ++ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 88(%0);"
48235 ++ " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;"
48236 + " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;"
48237 + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;"
48238 + /* Compute src1[3] * src2 */
48239 + " movq 56(%1), %%rdx;"
48240 + " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 88(%0), %%r8;" " movq %%r8, 88(%0);"
48241 +- " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%r12, %%r10;" " movq %%r10, 96(%0);"
48242 +- " mulxq 48(%3), %%r12, %%r13;" " adox %%r11, %%r12;" " adcx %%r14, %%r12;" " movq %%r12, 104(%0);" " mov $0, %%r8;"
48243 ++ " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 96(%0);"
48244 ++ " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 104(%0);" " mov $0, %%r8;"
48245 + " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 112(%0);" " mov $0, %%rax;"
48246 + " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 120(%0);"
48247 + /* Line up pointers */
48248 +@@ -314,11 +314,11 @@ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
48249 + " mulxq 32(%1), %%r8, %%r13;"
48250 + " xor %3, %3;"
48251 + " adoxq 0(%1), %%r8;"
48252 +- " mulxq 40(%1), %%r9, %%r12;"
48253 ++ " mulxq 40(%1), %%r9, %%rbx;"
48254 + " adcx %%r13, %%r9;"
48255 + " adoxq 8(%1), %%r9;"
48256 + " mulxq 48(%1), %%r10, %%r13;"
48257 +- " adcx %%r12, %%r10;"
48258 ++ " adcx %%rbx, %%r10;"
48259 + " adoxq 16(%1), %%r10;"
48260 + " mulxq 56(%1), %%r11, %%rax;"
48261 + " adcx %%r13, %%r11;"
48262 +@@ -347,11 +347,11 @@ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
48263 + " mulxq 96(%1), %%r8, %%r13;"
48264 + " xor %3, %3;"
48265 + " adoxq 64(%1), %%r8;"
48266 +- " mulxq 104(%1), %%r9, %%r12;"
48267 ++ " mulxq 104(%1), %%r9, %%rbx;"
48268 + " adcx %%r13, %%r9;"
48269 + " adoxq 72(%1), %%r9;"
48270 + " mulxq 112(%1), %%r10, %%r13;"
48271 +- " adcx %%r12, %%r10;"
48272 ++ " adcx %%rbx, %%r10;"
48273 + " adoxq 80(%1), %%r10;"
48274 + " mulxq 120(%1), %%r11, %%rax;"
48275 + " adcx %%r13, %%r11;"
48276 +@@ -376,7 +376,7 @@ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp)
48277 + " movq %%r8, 32(%0);"
48278 + : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2)
48279 + :
48280 +- : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "memory", "cc"
48281 ++ : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "memory", "cc"
48282 + );
48283 + }
48284 +
48285 +@@ -388,11 +388,11 @@ static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2)
48286 + asm volatile(
48287 + /* Compute the raw multiplication of f1*f2 */
48288 + " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */
48289 +- " mulxq 8(%2), %%r9, %%r12;" /* f1[1]*f2 */
48290 ++ " mulxq 8(%2), %%r9, %%rbx;" /* f1[1]*f2 */
48291 + " add %%rcx, %%r9;"
48292 + " mov $0, %%rcx;"
48293 + " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */
48294 +- " adcx %%r12, %%r10;"
48295 ++ " adcx %%rbx, %%r10;"
48296 + " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */
48297 + " adcx %%r13, %%r11;"
48298 + " adcx %%rcx, %%rax;"
48299 +@@ -419,7 +419,7 @@ static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2)
48300 + " movq %%r8, 0(%1);"
48301 + : "+&r" (f2_r)
48302 + : "r" (out), "r" (f1)
48303 +- : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "memory", "cc"
48304 ++ : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "memory", "cc"
48305 + );
48306 + }
48307 +
48308 +@@ -520,8 +520,8 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp)
48309 + " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
48310 + " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
48311 + " movq 24(%1), %%rdx;" /* f[3] */
48312 +- " mulxq 8(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
48313 +- " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */
48314 ++ " mulxq 8(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
48315 ++ " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */
48316 + " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */
48317 + " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
48318 +
48319 +@@ -531,12 +531,12 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp)
48320 + " adcx %%r8, %%r8;"
48321 + " adox %%rcx, %%r11;"
48322 + " adcx %%r9, %%r9;"
48323 +- " adox %%r15, %%r12;"
48324 ++ " adox %%r15, %%rbx;"
48325 + " adcx %%r10, %%r10;"
48326 + " adox %%r15, %%r13;"
48327 + " adcx %%r11, %%r11;"
48328 + " adox %%r15, %%r14;"
48329 +- " adcx %%r12, %%r12;"
48330 ++ " adcx %%rbx, %%rbx;"
48331 + " adcx %%r13, %%r13;"
48332 + " adcx %%r14, %%r14;"
48333 +
48334 +@@ -549,7 +549,7 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp)
48335 + " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);"
48336 + " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
48337 + " adcx %%rax, %%r11;" " movq %%r11, 32(%0);"
48338 +- " adcx %%rcx, %%r12;" " movq %%r12, 40(%0);"
48339 ++ " adcx %%rcx, %%rbx;" " movq %%rbx, 40(%0);"
48340 + " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
48341 + " adcx %%rax, %%r13;" " movq %%r13, 48(%0);"
48342 + " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);"
48343 +@@ -565,11 +565,11 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp)
48344 + " mulxq 32(%1), %%r8, %%r13;"
48345 + " xor %%rcx, %%rcx;"
48346 + " adoxq 0(%1), %%r8;"
48347 +- " mulxq 40(%1), %%r9, %%r12;"
48348 ++ " mulxq 40(%1), %%r9, %%rbx;"
48349 + " adcx %%r13, %%r9;"
48350 + " adoxq 8(%1), %%r9;"
48351 + " mulxq 48(%1), %%r10, %%r13;"
48352 +- " adcx %%r12, %%r10;"
48353 ++ " adcx %%rbx, %%r10;"
48354 + " adoxq 16(%1), %%r10;"
48355 + " mulxq 56(%1), %%r11, %%rax;"
48356 + " adcx %%r13, %%r11;"
48357 +@@ -594,7 +594,7 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp)
48358 + " movq %%r8, 0(%0);"
48359 + : "+&r" (tmp), "+&r" (f), "+&r" (out)
48360 + :
48361 +- : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "memory", "cc"
48362 ++ : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "%r15", "memory", "cc"
48363 + );
48364 + }
48365 +
48366 +@@ -611,8 +611,8 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
48367 + " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
48368 + " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
48369 + " movq 24(%1), %%rdx;" /* f[3] */
48370 +- " mulxq 8(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
48371 +- " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */
48372 ++ " mulxq 8(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
48373 ++ " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */
48374 + " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */
48375 + " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
48376 +
48377 +@@ -622,12 +622,12 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
48378 + " adcx %%r8, %%r8;"
48379 + " adox %%rcx, %%r11;"
48380 + " adcx %%r9, %%r9;"
48381 +- " adox %%r15, %%r12;"
48382 ++ " adox %%r15, %%rbx;"
48383 + " adcx %%r10, %%r10;"
48384 + " adox %%r15, %%r13;"
48385 + " adcx %%r11, %%r11;"
48386 + " adox %%r15, %%r14;"
48387 +- " adcx %%r12, %%r12;"
48388 ++ " adcx %%rbx, %%rbx;"
48389 + " adcx %%r13, %%r13;"
48390 + " adcx %%r14, %%r14;"
48391 +
48392 +@@ -640,7 +640,7 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
48393 + " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);"
48394 + " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
48395 + " adcx %%rax, %%r11;" " movq %%r11, 32(%0);"
48396 +- " adcx %%rcx, %%r12;" " movq %%r12, 40(%0);"
48397 ++ " adcx %%rcx, %%rbx;" " movq %%rbx, 40(%0);"
48398 + " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
48399 + " adcx %%rax, %%r13;" " movq %%r13, 48(%0);"
48400 + " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);"
48401 +@@ -651,8 +651,8 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
48402 + " mulxq 48(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */
48403 + " mulxq 56(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */
48404 + " movq 56(%1), %%rdx;" /* f[3] */
48405 +- " mulxq 40(%1), %%r11, %%r12;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
48406 +- " mulxq 48(%1), %%rax, %%r13;" " adcx %%rax, %%r12;" /* f[2]*f[3] */
48407 ++ " mulxq 40(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */
48408 ++ " mulxq 48(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */
48409 + " movq 40(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */
48410 + " mulxq 48(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */
48411 +
48412 +@@ -662,12 +662,12 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
48413 + " adcx %%r8, %%r8;"
48414 + " adox %%rcx, %%r11;"
48415 + " adcx %%r9, %%r9;"
48416 +- " adox %%r15, %%r12;"
48417 ++ " adox %%r15, %%rbx;"
48418 + " adcx %%r10, %%r10;"
48419 + " adox %%r15, %%r13;"
48420 + " adcx %%r11, %%r11;"
48421 + " adox %%r15, %%r14;"
48422 +- " adcx %%r12, %%r12;"
48423 ++ " adcx %%rbx, %%rbx;"
48424 + " adcx %%r13, %%r13;"
48425 + " adcx %%r14, %%r14;"
48426 +
48427 +@@ -680,7 +680,7 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
48428 + " adcx %%rcx, %%r10;" " movq %%r10, 88(%0);"
48429 + " movq 48(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */
48430 + " adcx %%rax, %%r11;" " movq %%r11, 96(%0);"
48431 +- " adcx %%rcx, %%r12;" " movq %%r12, 104(%0);"
48432 ++ " adcx %%rcx, %%rbx;" " movq %%rbx, 104(%0);"
48433 + " movq 56(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */
48434 + " adcx %%rax, %%r13;" " movq %%r13, 112(%0);"
48435 + " adcx %%rcx, %%r14;" " movq %%r14, 120(%0);"
48436 +@@ -694,11 +694,11 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
48437 + " mulxq 32(%1), %%r8, %%r13;"
48438 + " xor %%rcx, %%rcx;"
48439 + " adoxq 0(%1), %%r8;"
48440 +- " mulxq 40(%1), %%r9, %%r12;"
48441 ++ " mulxq 40(%1), %%r9, %%rbx;"
48442 + " adcx %%r13, %%r9;"
48443 + " adoxq 8(%1), %%r9;"
48444 + " mulxq 48(%1), %%r10, %%r13;"
48445 +- " adcx %%r12, %%r10;"
48446 ++ " adcx %%rbx, %%r10;"
48447 + " adoxq 16(%1), %%r10;"
48448 + " mulxq 56(%1), %%r11, %%rax;"
48449 + " adcx %%r13, %%r11;"
48450 +@@ -727,11 +727,11 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
48451 + " mulxq 96(%1), %%r8, %%r13;"
48452 + " xor %%rcx, %%rcx;"
48453 + " adoxq 64(%1), %%r8;"
48454 +- " mulxq 104(%1), %%r9, %%r12;"
48455 ++ " mulxq 104(%1), %%r9, %%rbx;"
48456 + " adcx %%r13, %%r9;"
48457 + " adoxq 72(%1), %%r9;"
48458 + " mulxq 112(%1), %%r10, %%r13;"
48459 +- " adcx %%r12, %%r10;"
48460 ++ " adcx %%rbx, %%r10;"
48461 + " adoxq 80(%1), %%r10;"
48462 + " mulxq 120(%1), %%r11, %%rax;"
48463 + " adcx %%r13, %%r11;"
48464 +@@ -756,7 +756,7 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp)
48465 + " movq %%r8, 32(%0);"
48466 + : "+&r" (tmp), "+&r" (f), "+&r" (out)
48467 + :
48468 +- : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "memory", "cc"
48469 ++ : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "%r15", "memory", "cc"
48470 + );
48471 + }
48472 +
48473 +--
48474 +cgit v1.2.3-4-ga26e
48475 +
48476 +
48477 +From 05824a47a9943bdc53033a528905573cc13b688f Mon Sep 17 00:00:00 2001
48478 +From: "Jason A. Donenfeld" <Jason@×××××.com>
48479 +Date: Thu, 19 Mar 2020 11:56:17 -0600
48480 +Subject: crypto: arm[64]/poly1305 - add artifact to .gitignore files
48481 +
48482 +commit 6e4e00d8b68ca7eb30d08afb740033e0d36abe55 upstream.
48483 +
48484 +The .S_shipped yields a .S, and the pattern in these directories is to
48485 +add that to .gitignore so that git-status doesn't raise a fuss.
48486 +
48487 +Fixes: a6b803b3ddc7 ("crypto: arm/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation")
48488 +Fixes: f569ca164751 ("crypto: arm64/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation")
48489 +Reported-by: Emil Renner Berthing <kernel@×××××.dk>
48490 +Cc: Ard Biesheuvel <ardb@××××××.org>
48491 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
48492 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
48493 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
48494 +---
48495 + arch/arm/crypto/.gitignore | 1 +
48496 + arch/arm64/crypto/.gitignore | 1 +
48497 + 2 files changed, 2 insertions(+)
48498 +
48499 +diff --git a/arch/arm/crypto/.gitignore b/arch/arm/crypto/.gitignore
48500 +index 31e1f538df7d..a3c7ad52a469 100644
48501 +--- a/arch/arm/crypto/.gitignore
48502 ++++ b/arch/arm/crypto/.gitignore
48503 +@@ -1,3 +1,4 @@
48504 + aesbs-core.S
48505 + sha256-core.S
48506 + sha512-core.S
48507 ++poly1305-core.S
48508 +diff --git a/arch/arm64/crypto/.gitignore b/arch/arm64/crypto/.gitignore
48509 +index 879df8781ed5..e403b1343328 100644
48510 +--- a/arch/arm64/crypto/.gitignore
48511 ++++ b/arch/arm64/crypto/.gitignore
48512 +@@ -1,2 +1,3 @@
48513 + sha256-core.S
48514 + sha512-core.S
48515 ++poly1305-core.S
48516 +--
48517 +cgit v1.2.3-4-ga26e
48518 +
48519 +
48520 +From 604312ea60632d5731946d6c3cfc21c4bd0f2474 Mon Sep 17 00:00:00 2001
48521 +From: "Jason A. Donenfeld" <Jason@×××××.com>
48522 +Date: Thu, 23 Apr 2020 15:54:04 -0600
48523 +Subject: crypto: arch/lib - limit simd usage to 4k chunks
48524 +
48525 +commit 706024a52c614b478b63f7728d202532ce6591a9 upstream.
48526 +
48527 +The initial Zinc patchset, after some mailing list discussion, contained
48528 +code to ensure that kernel_fpu_enable would not be kept on for more than
48529 +a 4k chunk, since it disables preemption. The choice of 4k isn't totally
48530 +scientific, but it's not a bad guess either, and it's what's used in
48531 +both the x86 poly1305, blake2s, and nhpoly1305 code already (in the form
48532 +of PAGE_SIZE, which this commit corrects to be explicitly 4k for the
48533 +former two).
48534 +
48535 +Ard did some back of the envelope calculations and found that
48536 +at 5 cycles/byte (overestimate) on a 1ghz processor (pretty slow), 4k
48537 +means we have a maximum preemption disabling of 20us, which Sebastian
48538 +confirmed was probably a good limit.
48539 +
48540 +Unfortunately the chunking appears to have been left out of the final
48541 +patchset that added the glue code. So, this commit adds it back in.
48542 +
48543 +Fixes: 84e03fa39fbe ("crypto: x86/chacha - expose SIMD ChaCha routine as library function")
48544 +Fixes: b3aad5bad26a ("crypto: arm64/chacha - expose arm64 ChaCha routine as library function")
48545 +Fixes: a44a3430d71b ("crypto: arm/chacha - expose ARM ChaCha routine as library function")
48546 +Fixes: d7d7b8535662 ("crypto: x86/poly1305 - wire up faster implementations for kernel")
48547 +Fixes: f569ca164751 ("crypto: arm64/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation")
48548 +Fixes: a6b803b3ddc7 ("crypto: arm/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation")
48549 +Fixes: ed0356eda153 ("crypto: blake2s - x86_64 SIMD implementation")
48550 +Cc: Eric Biggers <ebiggers@××××××.com>
48551 +Cc: Ard Biesheuvel <ardb@××××××.org>
48552 +Cc: Sebastian Andrzej Siewior <bigeasy@××××××××××.de>
48553 +Cc: stable@×××××××××××.org
48554 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
48555 +Reviewed-by: Ard Biesheuvel <ardb@××××××.org>
48556 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
48557 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
48558 +---
48559 + arch/arm/crypto/chacha-glue.c | 14 +++++++++++---
48560 + arch/arm/crypto/poly1305-glue.c | 15 +++++++++++----
48561 + arch/arm64/crypto/chacha-neon-glue.c | 14 +++++++++++---
48562 + arch/arm64/crypto/poly1305-glue.c | 15 +++++++++++----
48563 + arch/x86/crypto/blake2s-glue.c | 10 ++++------
48564 + arch/x86/crypto/chacha_glue.c | 14 +++++++++++---
48565 + arch/x86/crypto/poly1305_glue.c | 13 ++++++-------
48566 + 7 files changed, 65 insertions(+), 30 deletions(-)
48567 +
48568 +diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c
48569 +index 893692ed12b7..cd131b454c2e 100644
48570 +--- a/arch/arm/crypto/chacha-glue.c
48571 ++++ b/arch/arm/crypto/chacha-glue.c
48572 +@@ -91,9 +91,17 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
48573 + return;
48574 + }
48575 +
48576 +- kernel_neon_begin();
48577 +- chacha_doneon(state, dst, src, bytes, nrounds);
48578 +- kernel_neon_end();
48579 ++ do {
48580 ++ unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
48581 ++
48582 ++ kernel_neon_begin();
48583 ++ chacha_doneon(state, dst, src, todo, nrounds);
48584 ++ kernel_neon_end();
48585 ++
48586 ++ bytes -= todo;
48587 ++ src += todo;
48588 ++ dst += todo;
48589 ++ } while (bytes);
48590 + }
48591 + EXPORT_SYMBOL(chacha_crypt_arch);
48592 +
48593 +diff --git a/arch/arm/crypto/poly1305-glue.c b/arch/arm/crypto/poly1305-glue.c
48594 +index ceec04ec2f40..13cfef4ae22e 100644
48595 +--- a/arch/arm/crypto/poly1305-glue.c
48596 ++++ b/arch/arm/crypto/poly1305-glue.c
48597 +@@ -160,13 +160,20 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
48598 + unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
48599 +
48600 + if (static_branch_likely(&have_neon) && do_neon) {
48601 +- kernel_neon_begin();
48602 +- poly1305_blocks_neon(&dctx->h, src, len, 1);
48603 +- kernel_neon_end();
48604 ++ do {
48605 ++ unsigned int todo = min_t(unsigned int, len, SZ_4K);
48606 ++
48607 ++ kernel_neon_begin();
48608 ++ poly1305_blocks_neon(&dctx->h, src, todo, 1);
48609 ++ kernel_neon_end();
48610 ++
48611 ++ len -= todo;
48612 ++ src += todo;
48613 ++ } while (len);
48614 + } else {
48615 + poly1305_blocks_arm(&dctx->h, src, len, 1);
48616 ++ src += len;
48617 + }
48618 +- src += len;
48619 + nbytes %= POLY1305_BLOCK_SIZE;
48620 + }
48621 +
48622 +diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
48623 +index 218943612261..1d9824c4ae43 100644
48624 +--- a/arch/arm64/crypto/chacha-neon-glue.c
48625 ++++ b/arch/arm64/crypto/chacha-neon-glue.c
48626 +@@ -87,9 +87,17 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
48627 + !crypto_simd_usable())
48628 + return chacha_crypt_generic(state, dst, src, bytes, nrounds);
48629 +
48630 +- kernel_neon_begin();
48631 +- chacha_doneon(state, dst, src, bytes, nrounds);
48632 +- kernel_neon_end();
48633 ++ do {
48634 ++ unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
48635 ++
48636 ++ kernel_neon_begin();
48637 ++ chacha_doneon(state, dst, src, todo, nrounds);
48638 ++ kernel_neon_end();
48639 ++
48640 ++ bytes -= todo;
48641 ++ src += todo;
48642 ++ dst += todo;
48643 ++ } while (bytes);
48644 + }
48645 + EXPORT_SYMBOL(chacha_crypt_arch);
48646 +
48647 +diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c
48648 +index e97b092f56b8..f33ada70c4ed 100644
48649 +--- a/arch/arm64/crypto/poly1305-glue.c
48650 ++++ b/arch/arm64/crypto/poly1305-glue.c
48651 +@@ -143,13 +143,20 @@ void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
48652 + unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
48653 +
48654 + if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
48655 +- kernel_neon_begin();
48656 +- poly1305_blocks_neon(&dctx->h, src, len, 1);
48657 +- kernel_neon_end();
48658 ++ do {
48659 ++ unsigned int todo = min_t(unsigned int, len, SZ_4K);
48660 ++
48661 ++ kernel_neon_begin();
48662 ++ poly1305_blocks_neon(&dctx->h, src, todo, 1);
48663 ++ kernel_neon_end();
48664 ++
48665 ++ len -= todo;
48666 ++ src += todo;
48667 ++ } while (len);
48668 + } else {
48669 + poly1305_blocks(&dctx->h, src, len, 1);
48670 ++ src += len;
48671 + }
48672 +- src += len;
48673 + nbytes %= POLY1305_BLOCK_SIZE;
48674 + }
48675 +
48676 +diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c
48677 +index 1d9ff8a45e1f..94ac5bdd9f6f 100644
48678 +--- a/arch/x86/crypto/blake2s-glue.c
48679 ++++ b/arch/x86/crypto/blake2s-glue.c
48680 +@@ -32,16 +32,16 @@ void blake2s_compress_arch(struct blake2s_state *state,
48681 + const u32 inc)
48682 + {
48683 + /* SIMD disables preemption, so relax after processing each page. */
48684 +- BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8);
48685 ++ BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8);
48686 +
48687 + if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) {
48688 + blake2s_compress_generic(state, block, nblocks, inc);
48689 + return;
48690 + }
48691 +
48692 +- for (;;) {
48693 ++ do {
48694 + const size_t blocks = min_t(size_t, nblocks,
48695 +- PAGE_SIZE / BLAKE2S_BLOCK_SIZE);
48696 ++ SZ_4K / BLAKE2S_BLOCK_SIZE);
48697 +
48698 + kernel_fpu_begin();
48699 + if (IS_ENABLED(CONFIG_AS_AVX512) &&
48700 +@@ -52,10 +52,8 @@ void blake2s_compress_arch(struct blake2s_state *state,
48701 + kernel_fpu_end();
48702 +
48703 + nblocks -= blocks;
48704 +- if (!nblocks)
48705 +- break;
48706 + block += blocks * BLAKE2S_BLOCK_SIZE;
48707 +- }
48708 ++ } while (nblocks);
48709 + }
48710 + EXPORT_SYMBOL(blake2s_compress_arch);
48711 +
48712 +diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
48713 +index 1bebe11b9ec9..f3bfce21bc0d 100644
48714 +--- a/arch/x86/crypto/chacha_glue.c
48715 ++++ b/arch/x86/crypto/chacha_glue.c
48716 +@@ -154,9 +154,17 @@ void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
48717 + bytes <= CHACHA_BLOCK_SIZE)
48718 + return chacha_crypt_generic(state, dst, src, bytes, nrounds);
48719 +
48720 +- kernel_fpu_begin();
48721 +- chacha_dosimd(state, dst, src, bytes, nrounds);
48722 +- kernel_fpu_end();
48723 ++ do {
48724 ++ unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
48725 ++
48726 ++ kernel_fpu_begin();
48727 ++ chacha_dosimd(state, dst, src, todo, nrounds);
48728 ++ kernel_fpu_end();
48729 ++
48730 ++ bytes -= todo;
48731 ++ src += todo;
48732 ++ dst += todo;
48733 ++ } while (bytes);
48734 + }
48735 + EXPORT_SYMBOL(chacha_crypt_arch);
48736 +
48737 +diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
48738 +index 79bb58737d52..61b2bc8b6986 100644
48739 +--- a/arch/x86/crypto/poly1305_glue.c
48740 ++++ b/arch/x86/crypto/poly1305_glue.c
48741 +@@ -91,8 +91,8 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
48742 + struct poly1305_arch_internal *state = ctx;
48743 +
48744 + /* SIMD disables preemption, so relax after processing each page. */
48745 +- BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE ||
48746 +- PAGE_SIZE % POLY1305_BLOCK_SIZE);
48747 ++ BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE ||
48748 ++ SZ_4K % POLY1305_BLOCK_SIZE);
48749 +
48750 + if (!IS_ENABLED(CONFIG_AS_AVX) || !static_branch_likely(&poly1305_use_avx) ||
48751 + (len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) ||
48752 +@@ -102,8 +102,8 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
48753 + return;
48754 + }
48755 +
48756 +- for (;;) {
48757 +- const size_t bytes = min_t(size_t, len, PAGE_SIZE);
48758 ++ do {
48759 ++ const size_t bytes = min_t(size_t, len, SZ_4K);
48760 +
48761 + kernel_fpu_begin();
48762 + if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512))
48763 +@@ -113,11 +113,10 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
48764 + else
48765 + poly1305_blocks_avx(ctx, inp, bytes, padbit);
48766 + kernel_fpu_end();
48767 ++
48768 + len -= bytes;
48769 +- if (!len)
48770 +- break;
48771 + inp += bytes;
48772 +- }
48773 ++ } while (len);
48774 + }
48775 +
48776 + static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
48777 +--
48778 +cgit v1.2.3-4-ga26e
48779 +
48780 +
48781 +From 6495db85fb0a9dd981b31afab160dd1c826aa8a5 Mon Sep 17 00:00:00 2001
48782 +From: Herbert Xu <herbert@××××××××××××××××.au>
48783 +Date: Wed, 8 Jul 2020 12:41:13 +1000
48784 +Subject: crypto: lib/chacha20poly1305 - Add missing function declaration
48785 +
48786 +commit 06cc2afbbdf9a9e8df3e2f8db724997dd6e1b4ac upstream.
48787 +
48788 +This patch adds a declaration for chacha20poly1305_selftest to
48789 +silence a sparse warning.
48790 +
48791 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
48792 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
48793 +---
48794 + include/crypto/chacha20poly1305.h | 2 ++
48795 + lib/crypto/chacha20poly1305.c | 2 --
48796 + 2 files changed, 2 insertions(+), 2 deletions(-)
48797 +
48798 +diff --git a/include/crypto/chacha20poly1305.h b/include/crypto/chacha20poly1305.h
48799 +index 234ee28078ef..d2ac3ff7dc1e 100644
48800 +--- a/include/crypto/chacha20poly1305.h
48801 ++++ b/include/crypto/chacha20poly1305.h
48802 +@@ -45,4 +45,6 @@ bool chacha20poly1305_decrypt_sg_inplace(struct scatterlist *src, size_t src_len
48803 + const u64 nonce,
48804 + const u8 key[CHACHA20POLY1305_KEY_SIZE]);
48805 +
48806 ++bool chacha20poly1305_selftest(void);
48807 ++
48808 + #endif /* __CHACHA20POLY1305_H */
48809 +diff --git a/lib/crypto/chacha20poly1305.c b/lib/crypto/chacha20poly1305.c
48810 +index ad0699ce702f..431e04280332 100644
48811 +--- a/lib/crypto/chacha20poly1305.c
48812 ++++ b/lib/crypto/chacha20poly1305.c
48813 +@@ -21,8 +21,6 @@
48814 +
48815 + #define CHACHA_KEY_WORDS (CHACHA_KEY_SIZE / sizeof(u32))
48816 +
48817 +-bool __init chacha20poly1305_selftest(void);
48818 +-
48819 + static void chacha_load_key(u32 *k, const u8 *in)
48820 + {
48821 + k[0] = get_unaligned_le32(in);
48822 +--
48823 +cgit v1.2.3-4-ga26e
48824 +
48825 +
48826 +From 6554c5a044e825f301e8dc97feed0db5ca4bae07 Mon Sep 17 00:00:00 2001
48827 +From: Ard Biesheuvel <ardb@××××××.org>
48828 +Date: Wed, 8 Jul 2020 12:11:18 +0300
48829 +Subject: crypto: x86/chacha-sse3 - use unaligned loads for state array
48830 +
48831 +commit e79a31715193686e92dadb4caedfbb1f5de3659c upstream.
48832 +
48833 +Due to the fact that the x86 port does not support allocating objects
48834 +on the stack with an alignment that exceeds 8 bytes, we have a rather
48835 +ugly hack in the x86 code for ChaCha to ensure that the state array is
48836 +aligned to 16 bytes, allowing the SSE3 implementation of the algorithm
48837 +to use aligned loads.
48838 +
48839 +Given that the performance benefit of using of aligned loads appears to
48840 +be limited (~0.25% for 1k blocks using tcrypt on a Corei7-8650U), and
48841 +the fact that this hack has leaked into generic ChaCha code, let's just
48842 +remove it.
48843 +
48844 +Cc: Martin Willi <martin@××××××××××.org>
48845 +Cc: Herbert Xu <herbert@××××××××××××××××.au>
48846 +Cc: Eric Biggers <ebiggers@××××××.org>
48847 +Signed-off-by: Ard Biesheuvel <ardb@××××××.org>
48848 +Reviewed-by: Martin Willi <martin@××××××××××.org>
48849 +Reviewed-by: Eric Biggers <ebiggers@××××××.com>
48850 +Signed-off-by: Herbert Xu <herbert@××××××××××××××××.au>
48851 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
48852 +---
48853 + arch/x86/crypto/chacha-ssse3-x86_64.S | 16 ++++++++--------
48854 + arch/x86/crypto/chacha_glue.c | 17 ++---------------
48855 + include/crypto/chacha.h | 4 ----
48856 + 3 files changed, 10 insertions(+), 27 deletions(-)
48857 +
48858 +diff --git a/arch/x86/crypto/chacha-ssse3-x86_64.S b/arch/x86/crypto/chacha-ssse3-x86_64.S
48859 +index 2d86c7d6dc88..a556a57f1a4a 100644
48860 +--- a/arch/x86/crypto/chacha-ssse3-x86_64.S
48861 ++++ b/arch/x86/crypto/chacha-ssse3-x86_64.S
48862 +@@ -120,10 +120,10 @@ ENTRY(chacha_block_xor_ssse3)
48863 + FRAME_BEGIN
48864 +
48865 + # x0..3 = s0..3
48866 +- movdqa 0x00(%rdi),%xmm0
48867 +- movdqa 0x10(%rdi),%xmm1
48868 +- movdqa 0x20(%rdi),%xmm2
48869 +- movdqa 0x30(%rdi),%xmm3
48870 ++ movdqu 0x00(%rdi),%xmm0
48871 ++ movdqu 0x10(%rdi),%xmm1
48872 ++ movdqu 0x20(%rdi),%xmm2
48873 ++ movdqu 0x30(%rdi),%xmm3
48874 + movdqa %xmm0,%xmm8
48875 + movdqa %xmm1,%xmm9
48876 + movdqa %xmm2,%xmm10
48877 +@@ -205,10 +205,10 @@ ENTRY(hchacha_block_ssse3)
48878 + # %edx: nrounds
48879 + FRAME_BEGIN
48880 +
48881 +- movdqa 0x00(%rdi),%xmm0
48882 +- movdqa 0x10(%rdi),%xmm1
48883 +- movdqa 0x20(%rdi),%xmm2
48884 +- movdqa 0x30(%rdi),%xmm3
48885 ++ movdqu 0x00(%rdi),%xmm0
48886 ++ movdqu 0x10(%rdi),%xmm1
48887 ++ movdqu 0x20(%rdi),%xmm2
48888 ++ movdqu 0x30(%rdi),%xmm3
48889 +
48890 + mov %edx,%r8d
48891 + call chacha_permute
48892 +diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
48893 +index f3bfce21bc0d..4c4dc64398cb 100644
48894 +--- a/arch/x86/crypto/chacha_glue.c
48895 ++++ b/arch/x86/crypto/chacha_glue.c
48896 +@@ -14,8 +14,6 @@
48897 + #include <linux/module.h>
48898 + #include <asm/simd.h>
48899 +
48900 +-#define CHACHA_STATE_ALIGN 16
48901 +-
48902 + asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
48903 + unsigned int len, int nrounds);
48904 + asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
48905 +@@ -125,8 +123,6 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
48906 +
48907 + void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
48908 + {
48909 +- state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
48910 +-
48911 + if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable()) {
48912 + hchacha_block_generic(state, stream, nrounds);
48913 + } else {
48914 +@@ -139,8 +135,6 @@ EXPORT_SYMBOL(hchacha_block_arch);
48915 +
48916 + void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
48917 + {
48918 +- state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
48919 +-
48920 + chacha_init_generic(state, key, iv);
48921 + }
48922 + EXPORT_SYMBOL(chacha_init_arch);
48923 +@@ -148,8 +142,6 @@ EXPORT_SYMBOL(chacha_init_arch);
48924 + void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
48925 + int nrounds)
48926 + {
48927 +- state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
48928 +-
48929 + if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable() ||
48930 + bytes <= CHACHA_BLOCK_SIZE)
48931 + return chacha_crypt_generic(state, dst, src, bytes, nrounds);
48932 +@@ -171,15 +163,12 @@ EXPORT_SYMBOL(chacha_crypt_arch);
48933 + static int chacha_simd_stream_xor(struct skcipher_request *req,
48934 + const struct chacha_ctx *ctx, const u8 *iv)
48935 + {
48936 +- u32 *state, state_buf[16 + 2] __aligned(8);
48937 ++ u32 state[CHACHA_STATE_WORDS] __aligned(8);
48938 + struct skcipher_walk walk;
48939 + int err;
48940 +
48941 + err = skcipher_walk_virt(&walk, req, false);
48942 +
48943 +- BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
48944 +- state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
48945 +-
48946 + chacha_init_generic(state, ctx->key, iv);
48947 +
48948 + while (walk.nbytes > 0) {
48949 +@@ -218,12 +207,10 @@ static int xchacha_simd(struct skcipher_request *req)
48950 + {
48951 + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
48952 + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
48953 +- u32 *state, state_buf[16 + 2] __aligned(8);
48954 ++ u32 state[CHACHA_STATE_WORDS] __aligned(8);
48955 + struct chacha_ctx subctx;
48956 + u8 real_iv[16];
48957 +
48958 +- BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
48959 +- state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
48960 + chacha_init_generic(state, ctx->key, req->iv);
48961 +
48962 + if (req->cryptlen > CHACHA_BLOCK_SIZE && crypto_simd_usable()) {
48963 +diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h
48964 +index 2676f4fbd4c1..3a1c72fdb7cf 100644
48965 +--- a/include/crypto/chacha.h
48966 ++++ b/include/crypto/chacha.h
48967 +@@ -25,11 +25,7 @@
48968 + #define CHACHA_BLOCK_SIZE 64
48969 + #define CHACHAPOLY_IV_SIZE 12
48970 +
48971 +-#ifdef CONFIG_X86_64
48972 +-#define CHACHA_STATE_WORDS ((CHACHA_BLOCK_SIZE + 12) / sizeof(u32))
48973 +-#else
48974 + #define CHACHA_STATE_WORDS (CHACHA_BLOCK_SIZE / sizeof(u32))
48975 +-#endif
48976 +
48977 + /* 192-bit nonce, then 64-bit stream position */
48978 + #define XCHACHA_IV_SIZE 32
48979 +--
48980 +cgit v1.2.3-4-ga26e
48981 +
48982 +
48983 +From 678a9b8a9a48519e7bf7459d89f2cceabf1d89f3 Mon Sep 17 00:00:00 2001
48984 +From: "Jason A. Donenfeld" <Jason@×××××.com>
48985 +Date: Tue, 11 Feb 2020 20:47:05 +0100
48986 +Subject: icmp: introduce helper for nat'd source address in network device
48987 + context
48988 +
48989 +commit 0b41713b606694257b90d61ba7e2712d8457648b upstream.
48990 +
48991 +This introduces a helper function to be called only by network drivers
48992 +that wraps calls to icmp[v6]_send in a conntrack transformation, in case
48993 +NAT has been used. We don't want to pollute the non-driver path, though,
48994 +so we introduce this as a helper to be called by places that actually
48995 +make use of this, as suggested by Florian.
48996 +
48997 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
48998 +Cc: Florian Westphal <fw@××××××.de>
48999 +Signed-off-by: David S. Miller <davem@×××××××××.net>
49000 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
49001 +---
49002 + include/linux/icmpv6.h | 10 ++++++++++
49003 + include/net/icmp.h | 6 ++++++
49004 + net/ipv4/icmp.c | 33 +++++++++++++++++++++++++++++++++
49005 + net/ipv6/ip6_icmp.c | 34 ++++++++++++++++++++++++++++++++++
49006 + 4 files changed, 83 insertions(+)
49007 +
49008 +diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h
49009 +index a8f888976137..024b7a4cd98e 100644
49010 +--- a/include/linux/icmpv6.h
49011 ++++ b/include/linux/icmpv6.h
49012 +@@ -22,12 +22,22 @@ extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn);
49013 + int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
49014 + unsigned int data_len);
49015 +
49016 ++#if IS_ENABLED(CONFIG_NF_NAT)
49017 ++void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info);
49018 ++#else
49019 ++#define icmpv6_ndo_send icmpv6_send
49020 ++#endif
49021 ++
49022 + #else
49023 +
49024 + static inline void icmpv6_send(struct sk_buff *skb,
49025 + u8 type, u8 code, __u32 info)
49026 + {
49027 ++}
49028 +
49029 ++static inline void icmpv6_ndo_send(struct sk_buff *skb,
49030 ++ u8 type, u8 code, __u32 info)
49031 ++{
49032 + }
49033 + #endif
49034 +
49035 +diff --git a/include/net/icmp.h b/include/net/icmp.h
49036 +index 5d4bfdba9adf..9ac2d2672a93 100644
49037 +--- a/include/net/icmp.h
49038 ++++ b/include/net/icmp.h
49039 +@@ -43,6 +43,12 @@ static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32
49040 + __icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt);
49041 + }
49042 +
49043 ++#if IS_ENABLED(CONFIG_NF_NAT)
49044 ++void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info);
49045 ++#else
49046 ++#define icmp_ndo_send icmp_send
49047 ++#endif
49048 ++
49049 + int icmp_rcv(struct sk_buff *skb);
49050 + int icmp_err(struct sk_buff *skb, u32 info);
49051 + int icmp_init(void);
49052 +diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
49053 +index 73f46cb5e51d..2721e1774e2f 100644
49054 +--- a/net/ipv4/icmp.c
49055 ++++ b/net/ipv4/icmp.c
49056 +@@ -747,6 +747,39 @@ out:;
49057 + }
49058 + EXPORT_SYMBOL(__icmp_send);
49059 +
49060 ++#if IS_ENABLED(CONFIG_NF_NAT)
49061 ++#include <net/netfilter/nf_conntrack.h>
49062 ++void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info)
49063 ++{
49064 ++ struct sk_buff *cloned_skb = NULL;
49065 ++ enum ip_conntrack_info ctinfo;
49066 ++ struct nf_conn *ct;
49067 ++ __be32 orig_ip;
49068 ++
49069 ++ ct = nf_ct_get(skb_in, &ctinfo);
49070 ++ if (!ct || !(ct->status & IPS_SRC_NAT)) {
49071 ++ icmp_send(skb_in, type, code, info);
49072 ++ return;
49073 ++ }
49074 ++
49075 ++ if (skb_shared(skb_in))
49076 ++ skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC);
49077 ++
49078 ++ if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head ||
49079 ++ (skb_network_header(skb_in) + sizeof(struct iphdr)) >
49080 ++ skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in,
49081 ++ skb_network_offset(skb_in) + sizeof(struct iphdr))))
49082 ++ goto out;
49083 ++
49084 ++ orig_ip = ip_hdr(skb_in)->saddr;
49085 ++ ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip;
49086 ++ icmp_send(skb_in, type, code, info);
49087 ++ ip_hdr(skb_in)->saddr = orig_ip;
49088 ++out:
49089 ++ consume_skb(cloned_skb);
49090 ++}
49091 ++EXPORT_SYMBOL(icmp_ndo_send);
49092 ++#endif
49093 +
49094 + static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
49095 + {
49096 +diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c
49097 +index 02045494c24c..e0086758b6ee 100644
49098 +--- a/net/ipv6/ip6_icmp.c
49099 ++++ b/net/ipv6/ip6_icmp.c
49100 +@@ -45,4 +45,38 @@ out:
49101 + rcu_read_unlock();
49102 + }
49103 + EXPORT_SYMBOL(icmpv6_send);
49104 ++
49105 ++#if IS_ENABLED(CONFIG_NF_NAT)
49106 ++#include <net/netfilter/nf_conntrack.h>
49107 ++void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info)
49108 ++{
49109 ++ struct sk_buff *cloned_skb = NULL;
49110 ++ enum ip_conntrack_info ctinfo;
49111 ++ struct in6_addr orig_ip;
49112 ++ struct nf_conn *ct;
49113 ++
49114 ++ ct = nf_ct_get(skb_in, &ctinfo);
49115 ++ if (!ct || !(ct->status & IPS_SRC_NAT)) {
49116 ++ icmpv6_send(skb_in, type, code, info);
49117 ++ return;
49118 ++ }
49119 ++
49120 ++ if (skb_shared(skb_in))
49121 ++ skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC);
49122 ++
49123 ++ if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head ||
49124 ++ (skb_network_header(skb_in) + sizeof(struct ipv6hdr)) >
49125 ++ skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in,
49126 ++ skb_network_offset(skb_in) + sizeof(struct ipv6hdr))))
49127 ++ goto out;
49128 ++
49129 ++ orig_ip = ipv6_hdr(skb_in)->saddr;
49130 ++ ipv6_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.in6;
49131 ++ icmpv6_send(skb_in, type, code, info);
49132 ++ ipv6_hdr(skb_in)->saddr = orig_ip;
49133 ++out:
49134 ++ consume_skb(cloned_skb);
49135 ++}
49136 ++EXPORT_SYMBOL(icmpv6_ndo_send);
49137 ++#endif
49138 + #endif
49139 +--
49140 +cgit v1.2.3-4-ga26e
49141 +
49142 +
49143 +From 419c9010e6075120339069cd5eef8de14a9d0f34 Mon Sep 17 00:00:00 2001
49144 +From: "Jason A. Donenfeld" <Jason@×××××.com>
49145 +Date: Mon, 9 Dec 2019 00:27:34 +0100
49146 +Subject: net: WireGuard secure network tunnel
49147 +
49148 +commit e7096c131e5161fa3b8e52a650d7719d2857adfd upstream.
49149 +
49150 +WireGuard is a layer 3 secure networking tunnel made specifically for
49151 +the kernel, that aims to be much simpler and easier to audit than IPsec.
49152 +Extensive documentation and description of the protocol and
49153 +considerations, along with formal proofs of the cryptography, are
49154 +available at:
49155 +
49156 + * https://www.wireguard.com/
49157 + * https://www.wireguard.com/papers/wireguard.pdf
49158 +
49159 +This commit implements WireGuard as a simple network device driver,
49160 +accessible in the usual RTNL way used by virtual network drivers. It
49161 +makes use of the udp_tunnel APIs, GRO, GSO, NAPI, and the usual set of
49162 +networking subsystem APIs. It has a somewhat novel multicore queueing
49163 +system designed for maximum throughput and minimal latency of encryption
49164 +operations, but it is implemented modestly using workqueues and NAPI.
49165 +Configuration is done via generic Netlink, and following a review from
49166 +the Netlink maintainer a year ago, several high profile userspace tools
49167 +have already implemented the API.
49168 +
49169 +This commit also comes with several different tests, both in-kernel
49170 +tests and out-of-kernel tests based on network namespaces, taking profit
49171 +of the fact that sockets used by WireGuard intentionally stay in the
49172 +namespace the WireGuard interface was originally created, exactly like
49173 +the semantics of userspace tun devices. See wireguard.com/netns/ for
49174 +pictures and examples.
49175 +
49176 +The source code is fairly short, but rather than combining everything
49177 +into a single file, WireGuard is developed as cleanly separable files,
49178 +making auditing and comprehension easier. Things are laid out as
49179 +follows:
49180 +
49181 + * noise.[ch], cookie.[ch], messages.h: These implement the bulk of the
49182 + cryptographic aspects of the protocol, and are mostly data-only in
49183 + nature, taking in buffers of bytes and spitting out buffers of
49184 + bytes. They also handle reference counting for their various shared
49185 + pieces of data, like keys and key lists.
49186 +
49187 + * ratelimiter.[ch]: Used as an integral part of cookie.[ch] for
49188 + ratelimiting certain types of cryptographic operations in accordance
49189 + with particular WireGuard semantics.
49190 +
49191 + * allowedips.[ch], peerlookup.[ch]: The main lookup structures of
49192 + WireGuard, the former being trie-like with particular semantics, an
49193 + integral part of the design of the protocol, and the latter just
49194 + being nice helper functions around the various hashtables we use.
49195 +
49196 + * device.[ch]: Implementation of functions for the netdevice and for
49197 + rtnl, responsible for maintaining the life of a given interface and
49198 + wiring it up to the rest of WireGuard.
49199 +
49200 + * peer.[ch]: Each interface has a list of peers, with helper functions
49201 + available here for creation, destruction, and reference counting.
49202 +
49203 + * socket.[ch]: Implementation of functions related to udp_socket and
49204 + the general set of kernel socket APIs, for sending and receiving
49205 + ciphertext UDP packets, and taking care of WireGuard-specific sticky
49206 + socket routing semantics for the automatic roaming.
49207 +
49208 + * netlink.[ch]: Userspace API entry point for configuring WireGuard
49209 + peers and devices. The API has been implemented by several userspace
49210 + tools and network management utility, and the WireGuard project
49211 + distributes the basic wg(8) tool.
49212 +
49213 + * queueing.[ch]: Shared function on the rx and tx path for handling
49214 + the various queues used in the multicore algorithms.
49215 +
49216 + * send.c: Handles encrypting outgoing packets in parallel on
49217 + multiple cores, before sending them in order on a single core, via
49218 + workqueues and ring buffers. Also handles sending handshake and cookie
49219 + messages as part of the protocol, in parallel.
49220 +
49221 + * receive.c: Handles decrypting incoming packets in parallel on
49222 + multiple cores, before passing them off in order to be ingested via
49223 + the rest of the networking subsystem with GRO via the typical NAPI
49224 + poll function. Also handles receiving handshake and cookie messages
49225 + as part of the protocol, in parallel.
49226 +
49227 + * timers.[ch]: Uses the timer wheel to implement protocol particular
49228 + event timeouts, and gives a set of very simple event-driven entry
49229 + point functions for callers.
49230 +
49231 + * main.c, version.h: Initialization and deinitialization of the module.
49232 +
49233 + * selftest/*.h: Runtime unit tests for some of the most security
49234 + sensitive functions.
49235 +
49236 + * tools/testing/selftests/wireguard/netns.sh: Aforementioned testing
49237 + script using network namespaces.
49238 +
49239 +This commit aims to be as self-contained as possible, implementing
49240 +WireGuard as a standalone module not needing much special handling or
49241 +coordination from the network subsystem. I expect for future
49242 +optimizations to the network stack to positively improve WireGuard, and
49243 +vice-versa, but for the time being, this exists as intentionally
49244 +standalone.
49245 +
49246 +We introduce a menu option for CONFIG_WIREGUARD, as well as providing a
49247 +verbose debug log and self-tests via CONFIG_WIREGUARD_DEBUG.
49248 +
49249 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
49250 +Cc: David Miller <davem@×××××××××.net>
49251 +Cc: Greg KH <gregkh@×××××××××××××××.org>
49252 +Cc: Linus Torvalds <torvalds@××××××××××××××××.org>
49253 +Cc: Herbert Xu <herbert@××××××××××××××××.au>
49254 +Cc: linux-crypto@×××××××××××.org
49255 +Cc: linux-kernel@×××××××××××.org
49256 +Cc: netdev@×××××××××××.org
49257 +Signed-off-by: David S. Miller <davem@×××××××××.net>
49258 +[Jason: ported to 5.4 by doing the following:
49259 + - wg_get_device_start uses genl_family_attrbuf
49260 + - trival skb_redirect_reset change from 2c64605b590e is folded in]
49261 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
49262 +---
49263 + MAINTAINERS | 8 +
49264 + drivers/net/Kconfig | 41 ++
49265 + drivers/net/Makefile | 1 +
49266 + drivers/net/wireguard/Makefile | 18 +
49267 + drivers/net/wireguard/allowedips.c | 381 ++++++++++++
49268 + drivers/net/wireguard/allowedips.h | 59 ++
49269 + drivers/net/wireguard/cookie.c | 236 ++++++++
49270 + drivers/net/wireguard/cookie.h | 59 ++
49271 + drivers/net/wireguard/device.c | 458 +++++++++++++++
49272 + drivers/net/wireguard/device.h | 73 +++
49273 + drivers/net/wireguard/main.c | 64 +++
49274 + drivers/net/wireguard/messages.h | 128 +++++
49275 + drivers/net/wireguard/netlink.c | 648 +++++++++++++++++++++
49276 + drivers/net/wireguard/netlink.h | 12 +
49277 + drivers/net/wireguard/noise.c | 828 +++++++++++++++++++++++++++
49278 + drivers/net/wireguard/noise.h | 137 +++++
49279 + drivers/net/wireguard/peer.c | 240 ++++++++
49280 + drivers/net/wireguard/peer.h | 83 +++
49281 + drivers/net/wireguard/peerlookup.c | 221 +++++++
49282 + drivers/net/wireguard/peerlookup.h | 64 +++
49283 + drivers/net/wireguard/queueing.c | 53 ++
49284 + drivers/net/wireguard/queueing.h | 197 +++++++
49285 + drivers/net/wireguard/ratelimiter.c | 223 ++++++++
49286 + drivers/net/wireguard/ratelimiter.h | 19 +
49287 + drivers/net/wireguard/receive.c | 595 +++++++++++++++++++
49288 + drivers/net/wireguard/selftest/allowedips.c | 683 ++++++++++++++++++++++
49289 + drivers/net/wireguard/selftest/counter.c | 104 ++++
49290 + drivers/net/wireguard/selftest/ratelimiter.c | 226 ++++++++
49291 + drivers/net/wireguard/send.c | 413 +++++++++++++
49292 + drivers/net/wireguard/socket.c | 437 ++++++++++++++
49293 + drivers/net/wireguard/socket.h | 44 ++
49294 + drivers/net/wireguard/timers.c | 243 ++++++++
49295 + drivers/net/wireguard/timers.h | 31 +
49296 + drivers/net/wireguard/version.h | 1 +
49297 + include/uapi/linux/wireguard.h | 196 +++++++
49298 + tools/testing/selftests/wireguard/netns.sh | 537 +++++++++++++++++
49299 + 36 files changed, 7761 insertions(+)
49300 + create mode 100644 drivers/net/wireguard/Makefile
49301 + create mode 100644 drivers/net/wireguard/allowedips.c
49302 + create mode 100644 drivers/net/wireguard/allowedips.h
49303 + create mode 100644 drivers/net/wireguard/cookie.c
49304 + create mode 100644 drivers/net/wireguard/cookie.h
49305 + create mode 100644 drivers/net/wireguard/device.c
49306 + create mode 100644 drivers/net/wireguard/device.h
49307 + create mode 100644 drivers/net/wireguard/main.c
49308 + create mode 100644 drivers/net/wireguard/messages.h
49309 + create mode 100644 drivers/net/wireguard/netlink.c
49310 + create mode 100644 drivers/net/wireguard/netlink.h
49311 + create mode 100644 drivers/net/wireguard/noise.c
49312 + create mode 100644 drivers/net/wireguard/noise.h
49313 + create mode 100644 drivers/net/wireguard/peer.c
49314 + create mode 100644 drivers/net/wireguard/peer.h
49315 + create mode 100644 drivers/net/wireguard/peerlookup.c
49316 + create mode 100644 drivers/net/wireguard/peerlookup.h
49317 + create mode 100644 drivers/net/wireguard/queueing.c
49318 + create mode 100644 drivers/net/wireguard/queueing.h
49319 + create mode 100644 drivers/net/wireguard/ratelimiter.c
49320 + create mode 100644 drivers/net/wireguard/ratelimiter.h
49321 + create mode 100644 drivers/net/wireguard/receive.c
49322 + create mode 100644 drivers/net/wireguard/selftest/allowedips.c
49323 + create mode 100644 drivers/net/wireguard/selftest/counter.c
49324 + create mode 100644 drivers/net/wireguard/selftest/ratelimiter.c
49325 + create mode 100644 drivers/net/wireguard/send.c
49326 + create mode 100644 drivers/net/wireguard/socket.c
49327 + create mode 100644 drivers/net/wireguard/socket.h
49328 + create mode 100644 drivers/net/wireguard/timers.c
49329 + create mode 100644 drivers/net/wireguard/timers.h
49330 + create mode 100644 drivers/net/wireguard/version.h
49331 + create mode 100644 include/uapi/linux/wireguard.h
49332 + create mode 100755 tools/testing/selftests/wireguard/netns.sh
49333 +
49334 +diff --git a/MAINTAINERS b/MAINTAINERS
49335 +index fe6fa5d3a63e..d05f78261f33 100644
49336 +--- a/MAINTAINERS
49337 ++++ b/MAINTAINERS
49338 +@@ -17583,6 +17583,14 @@ L: linux-gpio@×××××××××××.org
49339 + S: Maintained
49340 + F: drivers/gpio/gpio-ws16c48.c
49341 +
49342 ++WIREGUARD SECURE NETWORK TUNNEL
49343 ++M: Jason A. Donenfeld <Jason@×××××.com>
49344 ++S: Maintained
49345 ++F: drivers/net/wireguard/
49346 ++F: tools/testing/selftests/wireguard/
49347 ++L: wireguard@×××××××××××.com
49348 ++L: netdev@×××××××××××.org
49349 ++
49350 + WISTRON LAPTOP BUTTON DRIVER
49351 + M: Miloslav Trmac <mitr@×××××.cz>
49352 + S: Maintained
49353 +diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
49354 +index df3cd2589bcf..16ad145e22c9 100644
49355 +--- a/drivers/net/Kconfig
49356 ++++ b/drivers/net/Kconfig
49357 +@@ -71,6 +71,47 @@ config DUMMY
49358 + To compile this driver as a module, choose M here: the module
49359 + will be called dummy.
49360 +
49361 ++config WIREGUARD
49362 ++ tristate "WireGuard secure network tunnel"
49363 ++ depends on NET && INET
49364 ++ depends on IPV6 || !IPV6
49365 ++ select NET_UDP_TUNNEL
49366 ++ select DST_CACHE
49367 ++ select CRYPTO
49368 ++ select CRYPTO_LIB_CURVE25519
49369 ++ select CRYPTO_LIB_CHACHA20POLY1305
49370 ++ select CRYPTO_LIB_BLAKE2S
49371 ++ select CRYPTO_CHACHA20_X86_64 if X86 && 64BIT
49372 ++ select CRYPTO_POLY1305_X86_64 if X86 && 64BIT
49373 ++ select CRYPTO_BLAKE2S_X86 if X86 && 64BIT
49374 ++ select CRYPTO_CURVE25519_X86 if X86 && 64BIT
49375 ++ select CRYPTO_CHACHA20_NEON if (ARM || ARM64) && KERNEL_MODE_NEON
49376 ++ select CRYPTO_POLY1305_NEON if ARM64 && KERNEL_MODE_NEON
49377 ++ select CRYPTO_POLY1305_ARM if ARM
49378 ++ select CRYPTO_CURVE25519_NEON if ARM && KERNEL_MODE_NEON
49379 ++ select CRYPTO_CHACHA_MIPS if CPU_MIPS32_R2
49380 ++ select CRYPTO_POLY1305_MIPS if CPU_MIPS32 || (CPU_MIPS64 && 64BIT)
49381 ++ help
49382 ++ WireGuard is a secure, fast, and easy to use replacement for IPSec
49383 ++ that uses modern cryptography and clever networking tricks. It's
49384 ++ designed to be fairly general purpose and abstract enough to fit most
49385 ++ use cases, while at the same time remaining extremely simple to
49386 ++ configure. See www.wireguard.com for more info.
49387 ++
49388 ++ It's safe to say Y or M here, as the driver is very lightweight and
49389 ++ is only in use when an administrator chooses to add an interface.
49390 ++
49391 ++config WIREGUARD_DEBUG
49392 ++ bool "Debugging checks and verbose messages"
49393 ++ depends on WIREGUARD
49394 ++ help
49395 ++ This will write log messages for handshake and other events
49396 ++ that occur for a WireGuard interface. It will also perform some
49397 ++ extra validation checks and unit tests at various points. This is
49398 ++ only useful for debugging.
49399 ++
49400 ++ Say N here unless you know what you're doing.
49401 ++
49402 + config EQUALIZER
49403 + tristate "EQL (serial line load balancing) support"
49404 + ---help---
49405 +diff --git a/drivers/net/Makefile b/drivers/net/Makefile
49406 +index 0d3ba056cda3..953b7c12f0b0 100644
49407 +--- a/drivers/net/Makefile
49408 ++++ b/drivers/net/Makefile
49409 +@@ -10,6 +10,7 @@ obj-$(CONFIG_BONDING) += bonding/
49410 + obj-$(CONFIG_IPVLAN) += ipvlan/
49411 + obj-$(CONFIG_IPVTAP) += ipvlan/
49412 + obj-$(CONFIG_DUMMY) += dummy.o
49413 ++obj-$(CONFIG_WIREGUARD) += wireguard/
49414 + obj-$(CONFIG_EQUALIZER) += eql.o
49415 + obj-$(CONFIG_IFB) += ifb.o
49416 + obj-$(CONFIG_MACSEC) += macsec.o
49417 +diff --git a/drivers/net/wireguard/Makefile b/drivers/net/wireguard/Makefile
49418 +new file mode 100644
49419 +index 000000000000..fc52b2cb500b
49420 +--- /dev/null
49421 ++++ b/drivers/net/wireguard/Makefile
49422 +@@ -0,0 +1,18 @@
49423 ++ccflags-y := -O3
49424 ++ccflags-y += -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt'
49425 ++ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DDEBUG
49426 ++wireguard-y := main.o
49427 ++wireguard-y += noise.o
49428 ++wireguard-y += device.o
49429 ++wireguard-y += peer.o
49430 ++wireguard-y += timers.o
49431 ++wireguard-y += queueing.o
49432 ++wireguard-y += send.o
49433 ++wireguard-y += receive.o
49434 ++wireguard-y += socket.o
49435 ++wireguard-y += peerlookup.o
49436 ++wireguard-y += allowedips.o
49437 ++wireguard-y += ratelimiter.o
49438 ++wireguard-y += cookie.o
49439 ++wireguard-y += netlink.o
49440 ++obj-$(CONFIG_WIREGUARD) := wireguard.o
49441 +diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c
49442 +new file mode 100644
49443 +index 000000000000..72667d5399c3
49444 +--- /dev/null
49445 ++++ b/drivers/net/wireguard/allowedips.c
49446 +@@ -0,0 +1,381 @@
49447 ++// SPDX-License-Identifier: GPL-2.0
49448 ++/*
49449 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
49450 ++ */
49451 ++
49452 ++#include "allowedips.h"
49453 ++#include "peer.h"
49454 ++
49455 ++static void swap_endian(u8 *dst, const u8 *src, u8 bits)
49456 ++{
49457 ++ if (bits == 32) {
49458 ++ *(u32 *)dst = be32_to_cpu(*(const __be32 *)src);
49459 ++ } else if (bits == 128) {
49460 ++ ((u64 *)dst)[0] = be64_to_cpu(((const __be64 *)src)[0]);
49461 ++ ((u64 *)dst)[1] = be64_to_cpu(((const __be64 *)src)[1]);
49462 ++ }
49463 ++}
49464 ++
49465 ++static void copy_and_assign_cidr(struct allowedips_node *node, const u8 *src,
49466 ++ u8 cidr, u8 bits)
49467 ++{
49468 ++ node->cidr = cidr;
49469 ++ node->bit_at_a = cidr / 8U;
49470 ++#ifdef __LITTLE_ENDIAN
49471 ++ node->bit_at_a ^= (bits / 8U - 1U) % 8U;
49472 ++#endif
49473 ++ node->bit_at_b = 7U - (cidr % 8U);
49474 ++ node->bitlen = bits;
49475 ++ memcpy(node->bits, src, bits / 8U);
49476 ++}
49477 ++#define CHOOSE_NODE(parent, key) \
49478 ++ parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1]
49479 ++
49480 ++static void node_free_rcu(struct rcu_head *rcu)
49481 ++{
49482 ++ kfree(container_of(rcu, struct allowedips_node, rcu));
49483 ++}
49484 ++
49485 ++static void push_rcu(struct allowedips_node **stack,
49486 ++ struct allowedips_node __rcu *p, unsigned int *len)
49487 ++{
49488 ++ if (rcu_access_pointer(p)) {
49489 ++ WARN_ON(IS_ENABLED(DEBUG) && *len >= 128);
49490 ++ stack[(*len)++] = rcu_dereference_raw(p);
49491 ++ }
49492 ++}
49493 ++
49494 ++static void root_free_rcu(struct rcu_head *rcu)
49495 ++{
49496 ++ struct allowedips_node *node, *stack[128] = {
49497 ++ container_of(rcu, struct allowedips_node, rcu) };
49498 ++ unsigned int len = 1;
49499 ++
49500 ++ while (len > 0 && (node = stack[--len])) {
49501 ++ push_rcu(stack, node->bit[0], &len);
49502 ++ push_rcu(stack, node->bit[1], &len);
49503 ++ kfree(node);
49504 ++ }
49505 ++}
49506 ++
49507 ++static void root_remove_peer_lists(struct allowedips_node *root)
49508 ++{
49509 ++ struct allowedips_node *node, *stack[128] = { root };
49510 ++ unsigned int len = 1;
49511 ++
49512 ++ while (len > 0 && (node = stack[--len])) {
49513 ++ push_rcu(stack, node->bit[0], &len);
49514 ++ push_rcu(stack, node->bit[1], &len);
49515 ++ if (rcu_access_pointer(node->peer))
49516 ++ list_del(&node->peer_list);
49517 ++ }
49518 ++}
49519 ++
49520 ++static void walk_remove_by_peer(struct allowedips_node __rcu **top,
49521 ++ struct wg_peer *peer, struct mutex *lock)
49522 ++{
49523 ++#define REF(p) rcu_access_pointer(p)
49524 ++#define DEREF(p) rcu_dereference_protected(*(p), lockdep_is_held(lock))
49525 ++#define PUSH(p) ({ \
49526 ++ WARN_ON(IS_ENABLED(DEBUG) && len >= 128); \
49527 ++ stack[len++] = p; \
49528 ++ })
49529 ++
49530 ++ struct allowedips_node __rcu **stack[128], **nptr;
49531 ++ struct allowedips_node *node, *prev;
49532 ++ unsigned int len;
49533 ++
49534 ++ if (unlikely(!peer || !REF(*top)))
49535 ++ return;
49536 ++
49537 ++ for (prev = NULL, len = 0, PUSH(top); len > 0; prev = node) {
49538 ++ nptr = stack[len - 1];
49539 ++ node = DEREF(nptr);
49540 ++ if (!node) {
49541 ++ --len;
49542 ++ continue;
49543 ++ }
49544 ++ if (!prev || REF(prev->bit[0]) == node ||
49545 ++ REF(prev->bit[1]) == node) {
49546 ++ if (REF(node->bit[0]))
49547 ++ PUSH(&node->bit[0]);
49548 ++ else if (REF(node->bit[1]))
49549 ++ PUSH(&node->bit[1]);
49550 ++ } else if (REF(node->bit[0]) == prev) {
49551 ++ if (REF(node->bit[1]))
49552 ++ PUSH(&node->bit[1]);
49553 ++ } else {
49554 ++ if (rcu_dereference_protected(node->peer,
49555 ++ lockdep_is_held(lock)) == peer) {
49556 ++ RCU_INIT_POINTER(node->peer, NULL);
49557 ++ list_del_init(&node->peer_list);
49558 ++ if (!node->bit[0] || !node->bit[1]) {
49559 ++ rcu_assign_pointer(*nptr, DEREF(
49560 ++ &node->bit[!REF(node->bit[0])]));
49561 ++ call_rcu(&node->rcu, node_free_rcu);
49562 ++ node = DEREF(nptr);
49563 ++ }
49564 ++ }
49565 ++ --len;
49566 ++ }
49567 ++ }
49568 ++
49569 ++#undef REF
49570 ++#undef DEREF
49571 ++#undef PUSH
49572 ++}
49573 ++
49574 ++static unsigned int fls128(u64 a, u64 b)
49575 ++{
49576 ++ return a ? fls64(a) + 64U : fls64(b);
49577 ++}
49578 ++
49579 ++static u8 common_bits(const struct allowedips_node *node, const u8 *key,
49580 ++ u8 bits)
49581 ++{
49582 ++ if (bits == 32)
49583 ++ return 32U - fls(*(const u32 *)node->bits ^ *(const u32 *)key);
49584 ++ else if (bits == 128)
49585 ++ return 128U - fls128(
49586 ++ *(const u64 *)&node->bits[0] ^ *(const u64 *)&key[0],
49587 ++ *(const u64 *)&node->bits[8] ^ *(const u64 *)&key[8]);
49588 ++ return 0;
49589 ++}
49590 ++
49591 ++static bool prefix_matches(const struct allowedips_node *node, const u8 *key,
49592 ++ u8 bits)
49593 ++{
49594 ++ /* This could be much faster if it actually just compared the common
49595 ++ * bits properly, by precomputing a mask bswap(~0 << (32 - cidr)), and
49596 ++ * the rest, but it turns out that common_bits is already super fast on
49597 ++ * modern processors, even taking into account the unfortunate bswap.
49598 ++ * So, we just inline it like this instead.
49599 ++ */
49600 ++ return common_bits(node, key, bits) >= node->cidr;
49601 ++}
49602 ++
49603 ++static struct allowedips_node *find_node(struct allowedips_node *trie, u8 bits,
49604 ++ const u8 *key)
49605 ++{
49606 ++ struct allowedips_node *node = trie, *found = NULL;
49607 ++
49608 ++ while (node && prefix_matches(node, key, bits)) {
49609 ++ if (rcu_access_pointer(node->peer))
49610 ++ found = node;
49611 ++ if (node->cidr == bits)
49612 ++ break;
49613 ++ node = rcu_dereference_bh(CHOOSE_NODE(node, key));
49614 ++ }
49615 ++ return found;
49616 ++}
49617 ++
49618 ++/* Returns a strong reference to a peer */
49619 ++static struct wg_peer *lookup(struct allowedips_node __rcu *root, u8 bits,
49620 ++ const void *be_ip)
49621 ++{
49622 ++ /* Aligned so it can be passed to fls/fls64 */
49623 ++ u8 ip[16] __aligned(__alignof(u64));
49624 ++ struct allowedips_node *node;
49625 ++ struct wg_peer *peer = NULL;
49626 ++
49627 ++ swap_endian(ip, be_ip, bits);
49628 ++
49629 ++ rcu_read_lock_bh();
49630 ++retry:
49631 ++ node = find_node(rcu_dereference_bh(root), bits, ip);
49632 ++ if (node) {
49633 ++ peer = wg_peer_get_maybe_zero(rcu_dereference_bh(node->peer));
49634 ++ if (!peer)
49635 ++ goto retry;
49636 ++ }
49637 ++ rcu_read_unlock_bh();
49638 ++ return peer;
49639 ++}
49640 ++
49641 ++static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key,
49642 ++ u8 cidr, u8 bits, struct allowedips_node **rnode,
49643 ++ struct mutex *lock)
49644 ++{
49645 ++ struct allowedips_node *node = rcu_dereference_protected(trie,
49646 ++ lockdep_is_held(lock));
49647 ++ struct allowedips_node *parent = NULL;
49648 ++ bool exact = false;
49649 ++
49650 ++ while (node && node->cidr <= cidr && prefix_matches(node, key, bits)) {
49651 ++ parent = node;
49652 ++ if (parent->cidr == cidr) {
49653 ++ exact = true;
49654 ++ break;
49655 ++ }
49656 ++ node = rcu_dereference_protected(CHOOSE_NODE(parent, key),
49657 ++ lockdep_is_held(lock));
49658 ++ }
49659 ++ *rnode = parent;
49660 ++ return exact;
49661 ++}
49662 ++
49663 ++static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
49664 ++ u8 cidr, struct wg_peer *peer, struct mutex *lock)
49665 ++{
49666 ++ struct allowedips_node *node, *parent, *down, *newnode;
49667 ++
49668 ++ if (unlikely(cidr > bits || !peer))
49669 ++ return -EINVAL;
49670 ++
49671 ++ if (!rcu_access_pointer(*trie)) {
49672 ++ node = kzalloc(sizeof(*node), GFP_KERNEL);
49673 ++ if (unlikely(!node))
49674 ++ return -ENOMEM;
49675 ++ RCU_INIT_POINTER(node->peer, peer);
49676 ++ list_add_tail(&node->peer_list, &peer->allowedips_list);
49677 ++ copy_and_assign_cidr(node, key, cidr, bits);
49678 ++ rcu_assign_pointer(*trie, node);
49679 ++ return 0;
49680 ++ }
49681 ++ if (node_placement(*trie, key, cidr, bits, &node, lock)) {
49682 ++ rcu_assign_pointer(node->peer, peer);
49683 ++ list_move_tail(&node->peer_list, &peer->allowedips_list);
49684 ++ return 0;
49685 ++ }
49686 ++
49687 ++ newnode = kzalloc(sizeof(*newnode), GFP_KERNEL);
49688 ++ if (unlikely(!newnode))
49689 ++ return -ENOMEM;
49690 ++ RCU_INIT_POINTER(newnode->peer, peer);
49691 ++ list_add_tail(&newnode->peer_list, &peer->allowedips_list);
49692 ++ copy_and_assign_cidr(newnode, key, cidr, bits);
49693 ++
49694 ++ if (!node) {
49695 ++ down = rcu_dereference_protected(*trie, lockdep_is_held(lock));
49696 ++ } else {
49697 ++ down = rcu_dereference_protected(CHOOSE_NODE(node, key),
49698 ++ lockdep_is_held(lock));
49699 ++ if (!down) {
49700 ++ rcu_assign_pointer(CHOOSE_NODE(node, key), newnode);
49701 ++ return 0;
49702 ++ }
49703 ++ }
49704 ++ cidr = min(cidr, common_bits(down, key, bits));
49705 ++ parent = node;
49706 ++
49707 ++ if (newnode->cidr == cidr) {
49708 ++ rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down);
49709 ++ if (!parent)
49710 ++ rcu_assign_pointer(*trie, newnode);
49711 ++ else
49712 ++ rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits),
49713 ++ newnode);
49714 ++ } else {
49715 ++ node = kzalloc(sizeof(*node), GFP_KERNEL);
49716 ++ if (unlikely(!node)) {
49717 ++ kfree(newnode);
49718 ++ return -ENOMEM;
49719 ++ }
49720 ++ INIT_LIST_HEAD(&node->peer_list);
49721 ++ copy_and_assign_cidr(node, newnode->bits, cidr, bits);
49722 ++
49723 ++ rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down);
49724 ++ rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode);
49725 ++ if (!parent)
49726 ++ rcu_assign_pointer(*trie, node);
49727 ++ else
49728 ++ rcu_assign_pointer(CHOOSE_NODE(parent, node->bits),
49729 ++ node);
49730 ++ }
49731 ++ return 0;
49732 ++}
49733 ++
49734 ++void wg_allowedips_init(struct allowedips *table)
49735 ++{
49736 ++ table->root4 = table->root6 = NULL;
49737 ++ table->seq = 1;
49738 ++}
49739 ++
49740 ++void wg_allowedips_free(struct allowedips *table, struct mutex *lock)
49741 ++{
49742 ++ struct allowedips_node __rcu *old4 = table->root4, *old6 = table->root6;
49743 ++
49744 ++ ++table->seq;
49745 ++ RCU_INIT_POINTER(table->root4, NULL);
49746 ++ RCU_INIT_POINTER(table->root6, NULL);
49747 ++ if (rcu_access_pointer(old4)) {
49748 ++ struct allowedips_node *node = rcu_dereference_protected(old4,
49749 ++ lockdep_is_held(lock));
49750 ++
49751 ++ root_remove_peer_lists(node);
49752 ++ call_rcu(&node->rcu, root_free_rcu);
49753 ++ }
49754 ++ if (rcu_access_pointer(old6)) {
49755 ++ struct allowedips_node *node = rcu_dereference_protected(old6,
49756 ++ lockdep_is_held(lock));
49757 ++
49758 ++ root_remove_peer_lists(node);
49759 ++ call_rcu(&node->rcu, root_free_rcu);
49760 ++ }
49761 ++}
49762 ++
49763 ++int wg_allowedips_insert_v4(struct allowedips *table, const struct in_addr *ip,
49764 ++ u8 cidr, struct wg_peer *peer, struct mutex *lock)
49765 ++{
49766 ++ /* Aligned so it can be passed to fls */
49767 ++ u8 key[4] __aligned(__alignof(u32));
49768 ++
49769 ++ ++table->seq;
49770 ++ swap_endian(key, (const u8 *)ip, 32);
49771 ++ return add(&table->root4, 32, key, cidr, peer, lock);
49772 ++}
49773 ++
49774 ++int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip,
49775 ++ u8 cidr, struct wg_peer *peer, struct mutex *lock)
49776 ++{
49777 ++ /* Aligned so it can be passed to fls64 */
49778 ++ u8 key[16] __aligned(__alignof(u64));
49779 ++
49780 ++ ++table->seq;
49781 ++ swap_endian(key, (const u8 *)ip, 128);
49782 ++ return add(&table->root6, 128, key, cidr, peer, lock);
49783 ++}
49784 ++
49785 ++void wg_allowedips_remove_by_peer(struct allowedips *table,
49786 ++ struct wg_peer *peer, struct mutex *lock)
49787 ++{
49788 ++ ++table->seq;
49789 ++ walk_remove_by_peer(&table->root4, peer, lock);
49790 ++ walk_remove_by_peer(&table->root6, peer, lock);
49791 ++}
49792 ++
49793 ++int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr)
49794 ++{
49795 ++ const unsigned int cidr_bytes = DIV_ROUND_UP(node->cidr, 8U);
49796 ++ swap_endian(ip, node->bits, node->bitlen);
49797 ++ memset(ip + cidr_bytes, 0, node->bitlen / 8U - cidr_bytes);
49798 ++ if (node->cidr)
49799 ++ ip[cidr_bytes - 1U] &= ~0U << (-node->cidr % 8U);
49800 ++
49801 ++ *cidr = node->cidr;
49802 ++ return node->bitlen == 32 ? AF_INET : AF_INET6;
49803 ++}
49804 ++
49805 ++/* Returns a strong reference to a peer */
49806 ++struct wg_peer *wg_allowedips_lookup_dst(struct allowedips *table,
49807 ++ struct sk_buff *skb)
49808 ++{
49809 ++ if (skb->protocol == htons(ETH_P_IP))
49810 ++ return lookup(table->root4, 32, &ip_hdr(skb)->daddr);
49811 ++ else if (skb->protocol == htons(ETH_P_IPV6))
49812 ++ return lookup(table->root6, 128, &ipv6_hdr(skb)->daddr);
49813 ++ return NULL;
49814 ++}
49815 ++
49816 ++/* Returns a strong reference to a peer */
49817 ++struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table,
49818 ++ struct sk_buff *skb)
49819 ++{
49820 ++ if (skb->protocol == htons(ETH_P_IP))
49821 ++ return lookup(table->root4, 32, &ip_hdr(skb)->saddr);
49822 ++ else if (skb->protocol == htons(ETH_P_IPV6))
49823 ++ return lookup(table->root6, 128, &ipv6_hdr(skb)->saddr);
49824 ++ return NULL;
49825 ++}
49826 ++
49827 ++#include "selftest/allowedips.c"
49828 +diff --git a/drivers/net/wireguard/allowedips.h b/drivers/net/wireguard/allowedips.h
49829 +new file mode 100644
49830 +index 000000000000..e5c83cafcef4
49831 +--- /dev/null
49832 ++++ b/drivers/net/wireguard/allowedips.h
49833 +@@ -0,0 +1,59 @@
49834 ++/* SPDX-License-Identifier: GPL-2.0 */
49835 ++/*
49836 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
49837 ++ */
49838 ++
49839 ++#ifndef _WG_ALLOWEDIPS_H
49840 ++#define _WG_ALLOWEDIPS_H
49841 ++
49842 ++#include <linux/mutex.h>
49843 ++#include <linux/ip.h>
49844 ++#include <linux/ipv6.h>
49845 ++
49846 ++struct wg_peer;
49847 ++
49848 ++struct allowedips_node {
49849 ++ struct wg_peer __rcu *peer;
49850 ++ struct allowedips_node __rcu *bit[2];
49851 ++ /* While it may seem scandalous that we waste space for v4,
49852 ++ * we're alloc'ing to the nearest power of 2 anyway, so this
49853 ++ * doesn't actually make a difference.
49854 ++ */
49855 ++ u8 bits[16] __aligned(__alignof(u64));
49856 ++ u8 cidr, bit_at_a, bit_at_b, bitlen;
49857 ++
49858 ++ /* Keep rarely used list at bottom to be beyond cache line. */
49859 ++ union {
49860 ++ struct list_head peer_list;
49861 ++ struct rcu_head rcu;
49862 ++ };
49863 ++};
49864 ++
49865 ++struct allowedips {
49866 ++ struct allowedips_node __rcu *root4;
49867 ++ struct allowedips_node __rcu *root6;
49868 ++ u64 seq;
49869 ++};
49870 ++
49871 ++void wg_allowedips_init(struct allowedips *table);
49872 ++void wg_allowedips_free(struct allowedips *table, struct mutex *mutex);
49873 ++int wg_allowedips_insert_v4(struct allowedips *table, const struct in_addr *ip,
49874 ++ u8 cidr, struct wg_peer *peer, struct mutex *lock);
49875 ++int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip,
49876 ++ u8 cidr, struct wg_peer *peer, struct mutex *lock);
49877 ++void wg_allowedips_remove_by_peer(struct allowedips *table,
49878 ++ struct wg_peer *peer, struct mutex *lock);
49879 ++/* The ip input pointer should be __aligned(__alignof(u64))) */
49880 ++int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr);
49881 ++
49882 ++/* These return a strong reference to a peer: */
49883 ++struct wg_peer *wg_allowedips_lookup_dst(struct allowedips *table,
49884 ++ struct sk_buff *skb);
49885 ++struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table,
49886 ++ struct sk_buff *skb);
49887 ++
49888 ++#ifdef DEBUG
49889 ++bool wg_allowedips_selftest(void);
49890 ++#endif
49891 ++
49892 ++#endif /* _WG_ALLOWEDIPS_H */
49893 +diff --git a/drivers/net/wireguard/cookie.c b/drivers/net/wireguard/cookie.c
49894 +new file mode 100644
49895 +index 000000000000..4956f0499c19
49896 +--- /dev/null
49897 ++++ b/drivers/net/wireguard/cookie.c
49898 +@@ -0,0 +1,236 @@
49899 ++// SPDX-License-Identifier: GPL-2.0
49900 ++/*
49901 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
49902 ++ */
49903 ++
49904 ++#include "cookie.h"
49905 ++#include "peer.h"
49906 ++#include "device.h"
49907 ++#include "messages.h"
49908 ++#include "ratelimiter.h"
49909 ++#include "timers.h"
49910 ++
49911 ++#include <crypto/blake2s.h>
49912 ++#include <crypto/chacha20poly1305.h>
49913 ++
49914 ++#include <net/ipv6.h>
49915 ++#include <crypto/algapi.h>
49916 ++
49917 ++void wg_cookie_checker_init(struct cookie_checker *checker,
49918 ++ struct wg_device *wg)
49919 ++{
49920 ++ init_rwsem(&checker->secret_lock);
49921 ++ checker->secret_birthdate = ktime_get_coarse_boottime_ns();
49922 ++ get_random_bytes(checker->secret, NOISE_HASH_LEN);
49923 ++ checker->device = wg;
49924 ++}
49925 ++
49926 ++enum { COOKIE_KEY_LABEL_LEN = 8 };
49927 ++static const u8 mac1_key_label[COOKIE_KEY_LABEL_LEN] = "mac1----";
49928 ++static const u8 cookie_key_label[COOKIE_KEY_LABEL_LEN] = "cookie--";
49929 ++
49930 ++static void precompute_key(u8 key[NOISE_SYMMETRIC_KEY_LEN],
49931 ++ const u8 pubkey[NOISE_PUBLIC_KEY_LEN],
49932 ++ const u8 label[COOKIE_KEY_LABEL_LEN])
49933 ++{
49934 ++ struct blake2s_state blake;
49935 ++
49936 ++ blake2s_init(&blake, NOISE_SYMMETRIC_KEY_LEN);
49937 ++ blake2s_update(&blake, label, COOKIE_KEY_LABEL_LEN);
49938 ++ blake2s_update(&blake, pubkey, NOISE_PUBLIC_KEY_LEN);
49939 ++ blake2s_final(&blake, key);
49940 ++}
49941 ++
49942 ++/* Must hold peer->handshake.static_identity->lock */
49943 ++void wg_cookie_checker_precompute_device_keys(struct cookie_checker *checker)
49944 ++{
49945 ++ if (likely(checker->device->static_identity.has_identity)) {
49946 ++ precompute_key(checker->cookie_encryption_key,
49947 ++ checker->device->static_identity.static_public,
49948 ++ cookie_key_label);
49949 ++ precompute_key(checker->message_mac1_key,
49950 ++ checker->device->static_identity.static_public,
49951 ++ mac1_key_label);
49952 ++ } else {
49953 ++ memset(checker->cookie_encryption_key, 0,
49954 ++ NOISE_SYMMETRIC_KEY_LEN);
49955 ++ memset(checker->message_mac1_key, 0, NOISE_SYMMETRIC_KEY_LEN);
49956 ++ }
49957 ++}
49958 ++
49959 ++void wg_cookie_checker_precompute_peer_keys(struct wg_peer *peer)
49960 ++{
49961 ++ precompute_key(peer->latest_cookie.cookie_decryption_key,
49962 ++ peer->handshake.remote_static, cookie_key_label);
49963 ++ precompute_key(peer->latest_cookie.message_mac1_key,
49964 ++ peer->handshake.remote_static, mac1_key_label);
49965 ++}
49966 ++
49967 ++void wg_cookie_init(struct cookie *cookie)
49968 ++{
49969 ++ memset(cookie, 0, sizeof(*cookie));
49970 ++ init_rwsem(&cookie->lock);
49971 ++}
49972 ++
49973 ++static void compute_mac1(u8 mac1[COOKIE_LEN], const void *message, size_t len,
49974 ++ const u8 key[NOISE_SYMMETRIC_KEY_LEN])
49975 ++{
49976 ++ len = len - sizeof(struct message_macs) +
49977 ++ offsetof(struct message_macs, mac1);
49978 ++ blake2s(mac1, message, key, COOKIE_LEN, len, NOISE_SYMMETRIC_KEY_LEN);
49979 ++}
49980 ++
49981 ++static void compute_mac2(u8 mac2[COOKIE_LEN], const void *message, size_t len,
49982 ++ const u8 cookie[COOKIE_LEN])
49983 ++{
49984 ++ len = len - sizeof(struct message_macs) +
49985 ++ offsetof(struct message_macs, mac2);
49986 ++ blake2s(mac2, message, cookie, COOKIE_LEN, len, COOKIE_LEN);
49987 ++}
49988 ++
49989 ++static void make_cookie(u8 cookie[COOKIE_LEN], struct sk_buff *skb,
49990 ++ struct cookie_checker *checker)
49991 ++{
49992 ++ struct blake2s_state state;
49993 ++
49994 ++ if (wg_birthdate_has_expired(checker->secret_birthdate,
49995 ++ COOKIE_SECRET_MAX_AGE)) {
49996 ++ down_write(&checker->secret_lock);
49997 ++ checker->secret_birthdate = ktime_get_coarse_boottime_ns();
49998 ++ get_random_bytes(checker->secret, NOISE_HASH_LEN);
49999 ++ up_write(&checker->secret_lock);
50000 ++ }
50001 ++
50002 ++ down_read(&checker->secret_lock);
50003 ++
50004 ++ blake2s_init_key(&state, COOKIE_LEN, checker->secret, NOISE_HASH_LEN);
50005 ++ if (skb->protocol == htons(ETH_P_IP))
50006 ++ blake2s_update(&state, (u8 *)&ip_hdr(skb)->saddr,
50007 ++ sizeof(struct in_addr));
50008 ++ else if (skb->protocol == htons(ETH_P_IPV6))
50009 ++ blake2s_update(&state, (u8 *)&ipv6_hdr(skb)->saddr,
50010 ++ sizeof(struct in6_addr));
50011 ++ blake2s_update(&state, (u8 *)&udp_hdr(skb)->source, sizeof(__be16));
50012 ++ blake2s_final(&state, cookie);
50013 ++
50014 ++ up_read(&checker->secret_lock);
50015 ++}
50016 ++
50017 ++enum cookie_mac_state wg_cookie_validate_packet(struct cookie_checker *checker,
50018 ++ struct sk_buff *skb,
50019 ++ bool check_cookie)
50020 ++{
50021 ++ struct message_macs *macs = (struct message_macs *)
50022 ++ (skb->data + skb->len - sizeof(*macs));
50023 ++ enum cookie_mac_state ret;
50024 ++ u8 computed_mac[COOKIE_LEN];
50025 ++ u8 cookie[COOKIE_LEN];
50026 ++
50027 ++ ret = INVALID_MAC;
50028 ++ compute_mac1(computed_mac, skb->data, skb->len,
50029 ++ checker->message_mac1_key);
50030 ++ if (crypto_memneq(computed_mac, macs->mac1, COOKIE_LEN))
50031 ++ goto out;
50032 ++
50033 ++ ret = VALID_MAC_BUT_NO_COOKIE;
50034 ++
50035 ++ if (!check_cookie)
50036 ++ goto out;
50037 ++
50038 ++ make_cookie(cookie, skb, checker);
50039 ++
50040 ++ compute_mac2(computed_mac, skb->data, skb->len, cookie);
50041 ++ if (crypto_memneq(computed_mac, macs->mac2, COOKIE_LEN))
50042 ++ goto out;
50043 ++
50044 ++ ret = VALID_MAC_WITH_COOKIE_BUT_RATELIMITED;
50045 ++ if (!wg_ratelimiter_allow(skb, dev_net(checker->device->dev)))
50046 ++ goto out;
50047 ++
50048 ++ ret = VALID_MAC_WITH_COOKIE;
50049 ++
50050 ++out:
50051 ++ return ret;
50052 ++}
50053 ++
50054 ++void wg_cookie_add_mac_to_packet(void *message, size_t len,
50055 ++ struct wg_peer *peer)
50056 ++{
50057 ++ struct message_macs *macs = (struct message_macs *)
50058 ++ ((u8 *)message + len - sizeof(*macs));
50059 ++
50060 ++ down_write(&peer->latest_cookie.lock);
50061 ++ compute_mac1(macs->mac1, message, len,
50062 ++ peer->latest_cookie.message_mac1_key);
50063 ++ memcpy(peer->latest_cookie.last_mac1_sent, macs->mac1, COOKIE_LEN);
50064 ++ peer->latest_cookie.have_sent_mac1 = true;
50065 ++ up_write(&peer->latest_cookie.lock);
50066 ++
50067 ++ down_read(&peer->latest_cookie.lock);
50068 ++ if (peer->latest_cookie.is_valid &&
50069 ++ !wg_birthdate_has_expired(peer->latest_cookie.birthdate,
50070 ++ COOKIE_SECRET_MAX_AGE - COOKIE_SECRET_LATENCY))
50071 ++ compute_mac2(macs->mac2, message, len,
50072 ++ peer->latest_cookie.cookie);
50073 ++ else
50074 ++ memset(macs->mac2, 0, COOKIE_LEN);
50075 ++ up_read(&peer->latest_cookie.lock);
50076 ++}
50077 ++
50078 ++void wg_cookie_message_create(struct message_handshake_cookie *dst,
50079 ++ struct sk_buff *skb, __le32 index,
50080 ++ struct cookie_checker *checker)
50081 ++{
50082 ++ struct message_macs *macs = (struct message_macs *)
50083 ++ ((u8 *)skb->data + skb->len - sizeof(*macs));
50084 ++ u8 cookie[COOKIE_LEN];
50085 ++
50086 ++ dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE);
50087 ++ dst->receiver_index = index;
50088 ++ get_random_bytes_wait(dst->nonce, COOKIE_NONCE_LEN);
50089 ++
50090 ++ make_cookie(cookie, skb, checker);
50091 ++ xchacha20poly1305_encrypt(dst->encrypted_cookie, cookie, COOKIE_LEN,
50092 ++ macs->mac1, COOKIE_LEN, dst->nonce,
50093 ++ checker->cookie_encryption_key);
50094 ++}
50095 ++
50096 ++void wg_cookie_message_consume(struct message_handshake_cookie *src,
50097 ++ struct wg_device *wg)
50098 ++{
50099 ++ struct wg_peer *peer = NULL;
50100 ++ u8 cookie[COOKIE_LEN];
50101 ++ bool ret;
50102 ++
50103 ++ if (unlikely(!wg_index_hashtable_lookup(wg->index_hashtable,
50104 ++ INDEX_HASHTABLE_HANDSHAKE |
50105 ++ INDEX_HASHTABLE_KEYPAIR,
50106 ++ src->receiver_index, &peer)))
50107 ++ return;
50108 ++
50109 ++ down_read(&peer->latest_cookie.lock);
50110 ++ if (unlikely(!peer->latest_cookie.have_sent_mac1)) {
50111 ++ up_read(&peer->latest_cookie.lock);
50112 ++ goto out;
50113 ++ }
50114 ++ ret = xchacha20poly1305_decrypt(
50115 ++ cookie, src->encrypted_cookie, sizeof(src->encrypted_cookie),
50116 ++ peer->latest_cookie.last_mac1_sent, COOKIE_LEN, src->nonce,
50117 ++ peer->latest_cookie.cookie_decryption_key);
50118 ++ up_read(&peer->latest_cookie.lock);
50119 ++
50120 ++ if (ret) {
50121 ++ down_write(&peer->latest_cookie.lock);
50122 ++ memcpy(peer->latest_cookie.cookie, cookie, COOKIE_LEN);
50123 ++ peer->latest_cookie.birthdate = ktime_get_coarse_boottime_ns();
50124 ++ peer->latest_cookie.is_valid = true;
50125 ++ peer->latest_cookie.have_sent_mac1 = false;
50126 ++ up_write(&peer->latest_cookie.lock);
50127 ++ } else {
50128 ++ net_dbg_ratelimited("%s: Could not decrypt invalid cookie response\n",
50129 ++ wg->dev->name);
50130 ++ }
50131 ++
50132 ++out:
50133 ++ wg_peer_put(peer);
50134 ++}
50135 +diff --git a/drivers/net/wireguard/cookie.h b/drivers/net/wireguard/cookie.h
50136 +new file mode 100644
50137 +index 000000000000..c4bd61ca03f2
50138 +--- /dev/null
50139 ++++ b/drivers/net/wireguard/cookie.h
50140 +@@ -0,0 +1,59 @@
50141 ++/* SPDX-License-Identifier: GPL-2.0 */
50142 ++/*
50143 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
50144 ++ */
50145 ++
50146 ++#ifndef _WG_COOKIE_H
50147 ++#define _WG_COOKIE_H
50148 ++
50149 ++#include "messages.h"
50150 ++#include <linux/rwsem.h>
50151 ++
50152 ++struct wg_peer;
50153 ++
50154 ++struct cookie_checker {
50155 ++ u8 secret[NOISE_HASH_LEN];
50156 ++ u8 cookie_encryption_key[NOISE_SYMMETRIC_KEY_LEN];
50157 ++ u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN];
50158 ++ u64 secret_birthdate;
50159 ++ struct rw_semaphore secret_lock;
50160 ++ struct wg_device *device;
50161 ++};
50162 ++
50163 ++struct cookie {
50164 ++ u64 birthdate;
50165 ++ bool is_valid;
50166 ++ u8 cookie[COOKIE_LEN];
50167 ++ bool have_sent_mac1;
50168 ++ u8 last_mac1_sent[COOKIE_LEN];
50169 ++ u8 cookie_decryption_key[NOISE_SYMMETRIC_KEY_LEN];
50170 ++ u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN];
50171 ++ struct rw_semaphore lock;
50172 ++};
50173 ++
50174 ++enum cookie_mac_state {
50175 ++ INVALID_MAC,
50176 ++ VALID_MAC_BUT_NO_COOKIE,
50177 ++ VALID_MAC_WITH_COOKIE_BUT_RATELIMITED,
50178 ++ VALID_MAC_WITH_COOKIE
50179 ++};
50180 ++
50181 ++void wg_cookie_checker_init(struct cookie_checker *checker,
50182 ++ struct wg_device *wg);
50183 ++void wg_cookie_checker_precompute_device_keys(struct cookie_checker *checker);
50184 ++void wg_cookie_checker_precompute_peer_keys(struct wg_peer *peer);
50185 ++void wg_cookie_init(struct cookie *cookie);
50186 ++
50187 ++enum cookie_mac_state wg_cookie_validate_packet(struct cookie_checker *checker,
50188 ++ struct sk_buff *skb,
50189 ++ bool check_cookie);
50190 ++void wg_cookie_add_mac_to_packet(void *message, size_t len,
50191 ++ struct wg_peer *peer);
50192 ++
50193 ++void wg_cookie_message_create(struct message_handshake_cookie *src,
50194 ++ struct sk_buff *skb, __le32 index,
50195 ++ struct cookie_checker *checker);
50196 ++void wg_cookie_message_consume(struct message_handshake_cookie *src,
50197 ++ struct wg_device *wg);
50198 ++
50199 ++#endif /* _WG_COOKIE_H */
50200 +diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c
50201 +new file mode 100644
50202 +index 000000000000..16b19824b9ad
50203 +--- /dev/null
50204 ++++ b/drivers/net/wireguard/device.c
50205 +@@ -0,0 +1,458 @@
50206 ++// SPDX-License-Identifier: GPL-2.0
50207 ++/*
50208 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
50209 ++ */
50210 ++
50211 ++#include "queueing.h"
50212 ++#include "socket.h"
50213 ++#include "timers.h"
50214 ++#include "device.h"
50215 ++#include "ratelimiter.h"
50216 ++#include "peer.h"
50217 ++#include "messages.h"
50218 ++
50219 ++#include <linux/module.h>
50220 ++#include <linux/rtnetlink.h>
50221 ++#include <linux/inet.h>
50222 ++#include <linux/netdevice.h>
50223 ++#include <linux/inetdevice.h>
50224 ++#include <linux/if_arp.h>
50225 ++#include <linux/icmp.h>
50226 ++#include <linux/suspend.h>
50227 ++#include <net/icmp.h>
50228 ++#include <net/rtnetlink.h>
50229 ++#include <net/ip_tunnels.h>
50230 ++#include <net/addrconf.h>
50231 ++
50232 ++static LIST_HEAD(device_list);
50233 ++
50234 ++static int wg_open(struct net_device *dev)
50235 ++{
50236 ++ struct in_device *dev_v4 = __in_dev_get_rtnl(dev);
50237 ++ struct inet6_dev *dev_v6 = __in6_dev_get(dev);
50238 ++ struct wg_device *wg = netdev_priv(dev);
50239 ++ struct wg_peer *peer;
50240 ++ int ret;
50241 ++
50242 ++ if (dev_v4) {
50243 ++ /* At some point we might put this check near the ip_rt_send_
50244 ++ * redirect call of ip_forward in net/ipv4/ip_forward.c, similar
50245 ++ * to the current secpath check.
50246 ++ */
50247 ++ IN_DEV_CONF_SET(dev_v4, SEND_REDIRECTS, false);
50248 ++ IPV4_DEVCONF_ALL(dev_net(dev), SEND_REDIRECTS) = false;
50249 ++ }
50250 ++ if (dev_v6)
50251 ++ dev_v6->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_NONE;
50252 ++
50253 ++ ret = wg_socket_init(wg, wg->incoming_port);
50254 ++ if (ret < 0)
50255 ++ return ret;
50256 ++ mutex_lock(&wg->device_update_lock);
50257 ++ list_for_each_entry(peer, &wg->peer_list, peer_list) {
50258 ++ wg_packet_send_staged_packets(peer);
50259 ++ if (peer->persistent_keepalive_interval)
50260 ++ wg_packet_send_keepalive(peer);
50261 ++ }
50262 ++ mutex_unlock(&wg->device_update_lock);
50263 ++ return 0;
50264 ++}
50265 ++
50266 ++#ifdef CONFIG_PM_SLEEP
50267 ++static int wg_pm_notification(struct notifier_block *nb, unsigned long action,
50268 ++ void *data)
50269 ++{
50270 ++ struct wg_device *wg;
50271 ++ struct wg_peer *peer;
50272 ++
50273 ++ /* If the machine is constantly suspending and resuming, as part of
50274 ++ * its normal operation rather than as a somewhat rare event, then we
50275 ++ * don't actually want to clear keys.
50276 ++ */
50277 ++ if (IS_ENABLED(CONFIG_PM_AUTOSLEEP) || IS_ENABLED(CONFIG_ANDROID))
50278 ++ return 0;
50279 ++
50280 ++ if (action != PM_HIBERNATION_PREPARE && action != PM_SUSPEND_PREPARE)
50281 ++ return 0;
50282 ++
50283 ++ rtnl_lock();
50284 ++ list_for_each_entry(wg, &device_list, device_list) {
50285 ++ mutex_lock(&wg->device_update_lock);
50286 ++ list_for_each_entry(peer, &wg->peer_list, peer_list) {
50287 ++ del_timer(&peer->timer_zero_key_material);
50288 ++ wg_noise_handshake_clear(&peer->handshake);
50289 ++ wg_noise_keypairs_clear(&peer->keypairs);
50290 ++ }
50291 ++ mutex_unlock(&wg->device_update_lock);
50292 ++ }
50293 ++ rtnl_unlock();
50294 ++ rcu_barrier();
50295 ++ return 0;
50296 ++}
50297 ++
50298 ++static struct notifier_block pm_notifier = { .notifier_call = wg_pm_notification };
50299 ++#endif
50300 ++
50301 ++static int wg_stop(struct net_device *dev)
50302 ++{
50303 ++ struct wg_device *wg = netdev_priv(dev);
50304 ++ struct wg_peer *peer;
50305 ++
50306 ++ mutex_lock(&wg->device_update_lock);
50307 ++ list_for_each_entry(peer, &wg->peer_list, peer_list) {
50308 ++ wg_packet_purge_staged_packets(peer);
50309 ++ wg_timers_stop(peer);
50310 ++ wg_noise_handshake_clear(&peer->handshake);
50311 ++ wg_noise_keypairs_clear(&peer->keypairs);
50312 ++ wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake);
50313 ++ }
50314 ++ mutex_unlock(&wg->device_update_lock);
50315 ++ skb_queue_purge(&wg->incoming_handshakes);
50316 ++ wg_socket_reinit(wg, NULL, NULL);
50317 ++ return 0;
50318 ++}
50319 ++
50320 ++static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev)
50321 ++{
50322 ++ struct wg_device *wg = netdev_priv(dev);
50323 ++ struct sk_buff_head packets;
50324 ++ struct wg_peer *peer;
50325 ++ struct sk_buff *next;
50326 ++ sa_family_t family;
50327 ++ u32 mtu;
50328 ++ int ret;
50329 ++
50330 ++ if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol)) {
50331 ++ ret = -EPROTONOSUPPORT;
50332 ++ net_dbg_ratelimited("%s: Invalid IP packet\n", dev->name);
50333 ++ goto err;
50334 ++ }
50335 ++
50336 ++ peer = wg_allowedips_lookup_dst(&wg->peer_allowedips, skb);
50337 ++ if (unlikely(!peer)) {
50338 ++ ret = -ENOKEY;
50339 ++ if (skb->protocol == htons(ETH_P_IP))
50340 ++ net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI4\n",
50341 ++ dev->name, &ip_hdr(skb)->daddr);
50342 ++ else if (skb->protocol == htons(ETH_P_IPV6))
50343 ++ net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI6\n",
50344 ++ dev->name, &ipv6_hdr(skb)->daddr);
50345 ++ goto err;
50346 ++ }
50347 ++
50348 ++ family = READ_ONCE(peer->endpoint.addr.sa_family);
50349 ++ if (unlikely(family != AF_INET && family != AF_INET6)) {
50350 ++ ret = -EDESTADDRREQ;
50351 ++ net_dbg_ratelimited("%s: No valid endpoint has been configured or discovered for peer %llu\n",
50352 ++ dev->name, peer->internal_id);
50353 ++ goto err_peer;
50354 ++ }
50355 ++
50356 ++ mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
50357 ++
50358 ++ __skb_queue_head_init(&packets);
50359 ++ if (!skb_is_gso(skb)) {
50360 ++ skb_mark_not_on_list(skb);
50361 ++ } else {
50362 ++ struct sk_buff *segs = skb_gso_segment(skb, 0);
50363 ++
50364 ++ if (unlikely(IS_ERR(segs))) {
50365 ++ ret = PTR_ERR(segs);
50366 ++ goto err_peer;
50367 ++ }
50368 ++ dev_kfree_skb(skb);
50369 ++ skb = segs;
50370 ++ }
50371 ++
50372 ++ skb_list_walk_safe(skb, skb, next) {
50373 ++ skb_mark_not_on_list(skb);
50374 ++
50375 ++ skb = skb_share_check(skb, GFP_ATOMIC);
50376 ++ if (unlikely(!skb))
50377 ++ continue;
50378 ++
50379 ++ /* We only need to keep the original dst around for icmp,
50380 ++ * so at this point we're in a position to drop it.
50381 ++ */
50382 ++ skb_dst_drop(skb);
50383 ++
50384 ++ PACKET_CB(skb)->mtu = mtu;
50385 ++
50386 ++ __skb_queue_tail(&packets, skb);
50387 ++ }
50388 ++
50389 ++ spin_lock_bh(&peer->staged_packet_queue.lock);
50390 ++ /* If the queue is getting too big, we start removing the oldest packets
50391 ++ * until it's small again. We do this before adding the new packet, so
50392 ++ * we don't remove GSO segments that are in excess.
50393 ++ */
50394 ++ while (skb_queue_len(&peer->staged_packet_queue) > MAX_STAGED_PACKETS) {
50395 ++ dev_kfree_skb(__skb_dequeue(&peer->staged_packet_queue));
50396 ++ ++dev->stats.tx_dropped;
50397 ++ }
50398 ++ skb_queue_splice_tail(&packets, &peer->staged_packet_queue);
50399 ++ spin_unlock_bh(&peer->staged_packet_queue.lock);
50400 ++
50401 ++ wg_packet_send_staged_packets(peer);
50402 ++
50403 ++ wg_peer_put(peer);
50404 ++ return NETDEV_TX_OK;
50405 ++
50406 ++err_peer:
50407 ++ wg_peer_put(peer);
50408 ++err:
50409 ++ ++dev->stats.tx_errors;
50410 ++ if (skb->protocol == htons(ETH_P_IP))
50411 ++ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
50412 ++ else if (skb->protocol == htons(ETH_P_IPV6))
50413 ++ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
50414 ++ kfree_skb(skb);
50415 ++ return ret;
50416 ++}
50417 ++
50418 ++static const struct net_device_ops netdev_ops = {
50419 ++ .ndo_open = wg_open,
50420 ++ .ndo_stop = wg_stop,
50421 ++ .ndo_start_xmit = wg_xmit,
50422 ++ .ndo_get_stats64 = ip_tunnel_get_stats64
50423 ++};
50424 ++
50425 ++static void wg_destruct(struct net_device *dev)
50426 ++{
50427 ++ struct wg_device *wg = netdev_priv(dev);
50428 ++
50429 ++ rtnl_lock();
50430 ++ list_del(&wg->device_list);
50431 ++ rtnl_unlock();
50432 ++ mutex_lock(&wg->device_update_lock);
50433 ++ wg->incoming_port = 0;
50434 ++ wg_socket_reinit(wg, NULL, NULL);
50435 ++ /* The final references are cleared in the below calls to destroy_workqueue. */
50436 ++ wg_peer_remove_all(wg);
50437 ++ destroy_workqueue(wg->handshake_receive_wq);
50438 ++ destroy_workqueue(wg->handshake_send_wq);
50439 ++ destroy_workqueue(wg->packet_crypt_wq);
50440 ++ wg_packet_queue_free(&wg->decrypt_queue, true);
50441 ++ wg_packet_queue_free(&wg->encrypt_queue, true);
50442 ++ rcu_barrier(); /* Wait for all the peers to be actually freed. */
50443 ++ wg_ratelimiter_uninit();
50444 ++ memzero_explicit(&wg->static_identity, sizeof(wg->static_identity));
50445 ++ skb_queue_purge(&wg->incoming_handshakes);
50446 ++ free_percpu(dev->tstats);
50447 ++ free_percpu(wg->incoming_handshakes_worker);
50448 ++ if (wg->have_creating_net_ref)
50449 ++ put_net(wg->creating_net);
50450 ++ kvfree(wg->index_hashtable);
50451 ++ kvfree(wg->peer_hashtable);
50452 ++ mutex_unlock(&wg->device_update_lock);
50453 ++
50454 ++ pr_debug("%s: Interface deleted\n", dev->name);
50455 ++ free_netdev(dev);
50456 ++}
50457 ++
50458 ++static const struct device_type device_type = { .name = KBUILD_MODNAME };
50459 ++
50460 ++static void wg_setup(struct net_device *dev)
50461 ++{
50462 ++ struct wg_device *wg = netdev_priv(dev);
50463 ++ enum { WG_NETDEV_FEATURES = NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
50464 ++ NETIF_F_SG | NETIF_F_GSO |
50465 ++ NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA };
50466 ++
50467 ++ dev->netdev_ops = &netdev_ops;
50468 ++ dev->hard_header_len = 0;
50469 ++ dev->addr_len = 0;
50470 ++ dev->needed_headroom = DATA_PACKET_HEAD_ROOM;
50471 ++ dev->needed_tailroom = noise_encrypted_len(MESSAGE_PADDING_MULTIPLE);
50472 ++ dev->type = ARPHRD_NONE;
50473 ++ dev->flags = IFF_POINTOPOINT | IFF_NOARP;
50474 ++ dev->priv_flags |= IFF_NO_QUEUE;
50475 ++ dev->features |= NETIF_F_LLTX;
50476 ++ dev->features |= WG_NETDEV_FEATURES;
50477 ++ dev->hw_features |= WG_NETDEV_FEATURES;
50478 ++ dev->hw_enc_features |= WG_NETDEV_FEATURES;
50479 ++ dev->mtu = ETH_DATA_LEN - MESSAGE_MINIMUM_LENGTH -
50480 ++ sizeof(struct udphdr) -
50481 ++ max(sizeof(struct ipv6hdr), sizeof(struct iphdr));
50482 ++
50483 ++ SET_NETDEV_DEVTYPE(dev, &device_type);
50484 ++
50485 ++ /* We need to keep the dst around in case of icmp replies. */
50486 ++ netif_keep_dst(dev);
50487 ++
50488 ++ memset(wg, 0, sizeof(*wg));
50489 ++ wg->dev = dev;
50490 ++}
50491 ++
50492 ++static int wg_newlink(struct net *src_net, struct net_device *dev,
50493 ++ struct nlattr *tb[], struct nlattr *data[],
50494 ++ struct netlink_ext_ack *extack)
50495 ++{
50496 ++ struct wg_device *wg = netdev_priv(dev);
50497 ++ int ret = -ENOMEM;
50498 ++
50499 ++ wg->creating_net = src_net;
50500 ++ init_rwsem(&wg->static_identity.lock);
50501 ++ mutex_init(&wg->socket_update_lock);
50502 ++ mutex_init(&wg->device_update_lock);
50503 ++ skb_queue_head_init(&wg->incoming_handshakes);
50504 ++ wg_allowedips_init(&wg->peer_allowedips);
50505 ++ wg_cookie_checker_init(&wg->cookie_checker, wg);
50506 ++ INIT_LIST_HEAD(&wg->peer_list);
50507 ++ wg->device_update_gen = 1;
50508 ++
50509 ++ wg->peer_hashtable = wg_pubkey_hashtable_alloc();
50510 ++ if (!wg->peer_hashtable)
50511 ++ return ret;
50512 ++
50513 ++ wg->index_hashtable = wg_index_hashtable_alloc();
50514 ++ if (!wg->index_hashtable)
50515 ++ goto err_free_peer_hashtable;
50516 ++
50517 ++ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
50518 ++ if (!dev->tstats)
50519 ++ goto err_free_index_hashtable;
50520 ++
50521 ++ wg->incoming_handshakes_worker =
50522 ++ wg_packet_percpu_multicore_worker_alloc(
50523 ++ wg_packet_handshake_receive_worker, wg);
50524 ++ if (!wg->incoming_handshakes_worker)
50525 ++ goto err_free_tstats;
50526 ++
50527 ++ wg->handshake_receive_wq = alloc_workqueue("wg-kex-%s",
50528 ++ WQ_CPU_INTENSIVE | WQ_FREEZABLE, 0, dev->name);
50529 ++ if (!wg->handshake_receive_wq)
50530 ++ goto err_free_incoming_handshakes;
50531 ++
50532 ++ wg->handshake_send_wq = alloc_workqueue("wg-kex-%s",
50533 ++ WQ_UNBOUND | WQ_FREEZABLE, 0, dev->name);
50534 ++ if (!wg->handshake_send_wq)
50535 ++ goto err_destroy_handshake_receive;
50536 ++
50537 ++ wg->packet_crypt_wq = alloc_workqueue("wg-crypt-%s",
50538 ++ WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 0, dev->name);
50539 ++ if (!wg->packet_crypt_wq)
50540 ++ goto err_destroy_handshake_send;
50541 ++
50542 ++ ret = wg_packet_queue_init(&wg->encrypt_queue, wg_packet_encrypt_worker,
50543 ++ true, MAX_QUEUED_PACKETS);
50544 ++ if (ret < 0)
50545 ++ goto err_destroy_packet_crypt;
50546 ++
50547 ++ ret = wg_packet_queue_init(&wg->decrypt_queue, wg_packet_decrypt_worker,
50548 ++ true, MAX_QUEUED_PACKETS);
50549 ++ if (ret < 0)
50550 ++ goto err_free_encrypt_queue;
50551 ++
50552 ++ ret = wg_ratelimiter_init();
50553 ++ if (ret < 0)
50554 ++ goto err_free_decrypt_queue;
50555 ++
50556 ++ ret = register_netdevice(dev);
50557 ++ if (ret < 0)
50558 ++ goto err_uninit_ratelimiter;
50559 ++
50560 ++ list_add(&wg->device_list, &device_list);
50561 ++
50562 ++ /* We wait until the end to assign priv_destructor, so that
50563 ++ * register_netdevice doesn't call it for us if it fails.
50564 ++ */
50565 ++ dev->priv_destructor = wg_destruct;
50566 ++
50567 ++ pr_debug("%s: Interface created\n", dev->name);
50568 ++ return ret;
50569 ++
50570 ++err_uninit_ratelimiter:
50571 ++ wg_ratelimiter_uninit();
50572 ++err_free_decrypt_queue:
50573 ++ wg_packet_queue_free(&wg->decrypt_queue, true);
50574 ++err_free_encrypt_queue:
50575 ++ wg_packet_queue_free(&wg->encrypt_queue, true);
50576 ++err_destroy_packet_crypt:
50577 ++ destroy_workqueue(wg->packet_crypt_wq);
50578 ++err_destroy_handshake_send:
50579 ++ destroy_workqueue(wg->handshake_send_wq);
50580 ++err_destroy_handshake_receive:
50581 ++ destroy_workqueue(wg->handshake_receive_wq);
50582 ++err_free_incoming_handshakes:
50583 ++ free_percpu(wg->incoming_handshakes_worker);
50584 ++err_free_tstats:
50585 ++ free_percpu(dev->tstats);
50586 ++err_free_index_hashtable:
50587 ++ kvfree(wg->index_hashtable);
50588 ++err_free_peer_hashtable:
50589 ++ kvfree(wg->peer_hashtable);
50590 ++ return ret;
50591 ++}
50592 ++
50593 ++static struct rtnl_link_ops link_ops __read_mostly = {
50594 ++ .kind = KBUILD_MODNAME,
50595 ++ .priv_size = sizeof(struct wg_device),
50596 ++ .setup = wg_setup,
50597 ++ .newlink = wg_newlink,
50598 ++};
50599 ++
50600 ++static int wg_netdevice_notification(struct notifier_block *nb,
50601 ++ unsigned long action, void *data)
50602 ++{
50603 ++ struct net_device *dev = ((struct netdev_notifier_info *)data)->dev;
50604 ++ struct wg_device *wg = netdev_priv(dev);
50605 ++
50606 ++ ASSERT_RTNL();
50607 ++
50608 ++ if (action != NETDEV_REGISTER || dev->netdev_ops != &netdev_ops)
50609 ++ return 0;
50610 ++
50611 ++ if (dev_net(dev) == wg->creating_net && wg->have_creating_net_ref) {
50612 ++ put_net(wg->creating_net);
50613 ++ wg->have_creating_net_ref = false;
50614 ++ } else if (dev_net(dev) != wg->creating_net &&
50615 ++ !wg->have_creating_net_ref) {
50616 ++ wg->have_creating_net_ref = true;
50617 ++ get_net(wg->creating_net);
50618 ++ }
50619 ++ return 0;
50620 ++}
50621 ++
50622 ++static struct notifier_block netdevice_notifier = {
50623 ++ .notifier_call = wg_netdevice_notification
50624 ++};
50625 ++
50626 ++int __init wg_device_init(void)
50627 ++{
50628 ++ int ret;
50629 ++
50630 ++#ifdef CONFIG_PM_SLEEP
50631 ++ ret = register_pm_notifier(&pm_notifier);
50632 ++ if (ret)
50633 ++ return ret;
50634 ++#endif
50635 ++
50636 ++ ret = register_netdevice_notifier(&netdevice_notifier);
50637 ++ if (ret)
50638 ++ goto error_pm;
50639 ++
50640 ++ ret = rtnl_link_register(&link_ops);
50641 ++ if (ret)
50642 ++ goto error_netdevice;
50643 ++
50644 ++ return 0;
50645 ++
50646 ++error_netdevice:
50647 ++ unregister_netdevice_notifier(&netdevice_notifier);
50648 ++error_pm:
50649 ++#ifdef CONFIG_PM_SLEEP
50650 ++ unregister_pm_notifier(&pm_notifier);
50651 ++#endif
50652 ++ return ret;
50653 ++}
50654 ++
50655 ++void wg_device_uninit(void)
50656 ++{
50657 ++ rtnl_link_unregister(&link_ops);
50658 ++ unregister_netdevice_notifier(&netdevice_notifier);
50659 ++#ifdef CONFIG_PM_SLEEP
50660 ++ unregister_pm_notifier(&pm_notifier);
50661 ++#endif
50662 ++ rcu_barrier();
50663 ++}
50664 +diff --git a/drivers/net/wireguard/device.h b/drivers/net/wireguard/device.h
50665 +new file mode 100644
50666 +index 000000000000..c91f3051c5c7
50667 +--- /dev/null
50668 ++++ b/drivers/net/wireguard/device.h
50669 +@@ -0,0 +1,73 @@
50670 ++/* SPDX-License-Identifier: GPL-2.0 */
50671 ++/*
50672 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
50673 ++ */
50674 ++
50675 ++#ifndef _WG_DEVICE_H
50676 ++#define _WG_DEVICE_H
50677 ++
50678 ++#include "noise.h"
50679 ++#include "allowedips.h"
50680 ++#include "peerlookup.h"
50681 ++#include "cookie.h"
50682 ++
50683 ++#include <linux/types.h>
50684 ++#include <linux/netdevice.h>
50685 ++#include <linux/workqueue.h>
50686 ++#include <linux/mutex.h>
50687 ++#include <linux/net.h>
50688 ++#include <linux/ptr_ring.h>
50689 ++
50690 ++struct wg_device;
50691 ++
50692 ++struct multicore_worker {
50693 ++ void *ptr;
50694 ++ struct work_struct work;
50695 ++};
50696 ++
50697 ++struct crypt_queue {
50698 ++ struct ptr_ring ring;
50699 ++ union {
50700 ++ struct {
50701 ++ struct multicore_worker __percpu *worker;
50702 ++ int last_cpu;
50703 ++ };
50704 ++ struct work_struct work;
50705 ++ };
50706 ++};
50707 ++
50708 ++struct wg_device {
50709 ++ struct net_device *dev;
50710 ++ struct crypt_queue encrypt_queue, decrypt_queue;
50711 ++ struct sock __rcu *sock4, *sock6;
50712 ++ struct net *creating_net;
50713 ++ struct noise_static_identity static_identity;
50714 ++ struct workqueue_struct *handshake_receive_wq, *handshake_send_wq;
50715 ++ struct workqueue_struct *packet_crypt_wq;
50716 ++ struct sk_buff_head incoming_handshakes;
50717 ++ int incoming_handshake_cpu;
50718 ++ struct multicore_worker __percpu *incoming_handshakes_worker;
50719 ++ struct cookie_checker cookie_checker;
50720 ++ struct pubkey_hashtable *peer_hashtable;
50721 ++ struct index_hashtable *index_hashtable;
50722 ++ struct allowedips peer_allowedips;
50723 ++ struct mutex device_update_lock, socket_update_lock;
50724 ++ struct list_head device_list, peer_list;
50725 ++ unsigned int num_peers, device_update_gen;
50726 ++ u32 fwmark;
50727 ++ u16 incoming_port;
50728 ++ bool have_creating_net_ref;
50729 ++};
50730 ++
50731 ++int wg_device_init(void);
50732 ++void wg_device_uninit(void);
50733 ++
50734 ++/* Later after the dust settles, this can be moved into include/linux/skbuff.h,
50735 ++ * where virtually all code that deals with GSO segs can benefit, around ~30
50736 ++ * drivers as of writing.
50737 ++ */
50738 ++#define skb_list_walk_safe(first, skb, next) \
50739 ++ for (skb = first, next = skb->next; skb; \
50740 ++ skb = next, next = skb ? skb->next : NULL)
50741 ++
50742 ++#endif /* _WG_DEVICE_H */
50743 +diff --git a/drivers/net/wireguard/main.c b/drivers/net/wireguard/main.c
50744 +new file mode 100644
50745 +index 000000000000..10c0a40f6a9e
50746 +--- /dev/null
50747 ++++ b/drivers/net/wireguard/main.c
50748 +@@ -0,0 +1,64 @@
50749 ++// SPDX-License-Identifier: GPL-2.0
50750 ++/*
50751 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
50752 ++ */
50753 ++
50754 ++#include "version.h"
50755 ++#include "device.h"
50756 ++#include "noise.h"
50757 ++#include "queueing.h"
50758 ++#include "ratelimiter.h"
50759 ++#include "netlink.h"
50760 ++
50761 ++#include <uapi/linux/wireguard.h>
50762 ++
50763 ++#include <linux/version.h>
50764 ++#include <linux/init.h>
50765 ++#include <linux/module.h>
50766 ++#include <linux/genetlink.h>
50767 ++#include <net/rtnetlink.h>
50768 ++
50769 ++static int __init mod_init(void)
50770 ++{
50771 ++ int ret;
50772 ++
50773 ++#ifdef DEBUG
50774 ++ if (!wg_allowedips_selftest() || !wg_packet_counter_selftest() ||
50775 ++ !wg_ratelimiter_selftest())
50776 ++ return -ENOTRECOVERABLE;
50777 ++#endif
50778 ++ wg_noise_init();
50779 ++
50780 ++ ret = wg_device_init();
50781 ++ if (ret < 0)
50782 ++ goto err_device;
50783 ++
50784 ++ ret = wg_genetlink_init();
50785 ++ if (ret < 0)
50786 ++ goto err_netlink;
50787 ++
50788 ++ pr_info("WireGuard " WIREGUARD_VERSION " loaded. See www.wireguard.com for information.\n");
50789 ++ pr_info("Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.\n");
50790 ++
50791 ++ return 0;
50792 ++
50793 ++err_netlink:
50794 ++ wg_device_uninit();
50795 ++err_device:
50796 ++ return ret;
50797 ++}
50798 ++
50799 ++static void __exit mod_exit(void)
50800 ++{
50801 ++ wg_genetlink_uninit();
50802 ++ wg_device_uninit();
50803 ++}
50804 ++
50805 ++module_init(mod_init);
50806 ++module_exit(mod_exit);
50807 ++MODULE_LICENSE("GPL v2");
50808 ++MODULE_DESCRIPTION("WireGuard secure network tunnel");
50809 ++MODULE_AUTHOR("Jason A. Donenfeld <Jason@×××××.com>");
50810 ++MODULE_VERSION(WIREGUARD_VERSION);
50811 ++MODULE_ALIAS_RTNL_LINK(KBUILD_MODNAME);
50812 ++MODULE_ALIAS_GENL_FAMILY(WG_GENL_NAME);
50813 +diff --git a/drivers/net/wireguard/messages.h b/drivers/net/wireguard/messages.h
50814 +new file mode 100644
50815 +index 000000000000..b8a7b9ce32ba
50816 +--- /dev/null
50817 ++++ b/drivers/net/wireguard/messages.h
50818 +@@ -0,0 +1,128 @@
50819 ++/* SPDX-License-Identifier: GPL-2.0 */
50820 ++/*
50821 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
50822 ++ */
50823 ++
50824 ++#ifndef _WG_MESSAGES_H
50825 ++#define _WG_MESSAGES_H
50826 ++
50827 ++#include <crypto/curve25519.h>
50828 ++#include <crypto/chacha20poly1305.h>
50829 ++#include <crypto/blake2s.h>
50830 ++
50831 ++#include <linux/kernel.h>
50832 ++#include <linux/param.h>
50833 ++#include <linux/skbuff.h>
50834 ++
50835 ++enum noise_lengths {
50836 ++ NOISE_PUBLIC_KEY_LEN = CURVE25519_KEY_SIZE,
50837 ++ NOISE_SYMMETRIC_KEY_LEN = CHACHA20POLY1305_KEY_SIZE,
50838 ++ NOISE_TIMESTAMP_LEN = sizeof(u64) + sizeof(u32),
50839 ++ NOISE_AUTHTAG_LEN = CHACHA20POLY1305_AUTHTAG_SIZE,
50840 ++ NOISE_HASH_LEN = BLAKE2S_HASH_SIZE
50841 ++};
50842 ++
50843 ++#define noise_encrypted_len(plain_len) ((plain_len) + NOISE_AUTHTAG_LEN)
50844 ++
50845 ++enum cookie_values {
50846 ++ COOKIE_SECRET_MAX_AGE = 2 * 60,
50847 ++ COOKIE_SECRET_LATENCY = 5,
50848 ++ COOKIE_NONCE_LEN = XCHACHA20POLY1305_NONCE_SIZE,
50849 ++ COOKIE_LEN = 16
50850 ++};
50851 ++
50852 ++enum counter_values {
50853 ++ COUNTER_BITS_TOTAL = 2048,
50854 ++ COUNTER_REDUNDANT_BITS = BITS_PER_LONG,
50855 ++ COUNTER_WINDOW_SIZE = COUNTER_BITS_TOTAL - COUNTER_REDUNDANT_BITS
50856 ++};
50857 ++
50858 ++enum limits {
50859 ++ REKEY_AFTER_MESSAGES = 1ULL << 60,
50860 ++ REJECT_AFTER_MESSAGES = U64_MAX - COUNTER_WINDOW_SIZE - 1,
50861 ++ REKEY_TIMEOUT = 5,
50862 ++ REKEY_TIMEOUT_JITTER_MAX_JIFFIES = HZ / 3,
50863 ++ REKEY_AFTER_TIME = 120,
50864 ++ REJECT_AFTER_TIME = 180,
50865 ++ INITIATIONS_PER_SECOND = 50,
50866 ++ MAX_PEERS_PER_DEVICE = 1U << 20,
50867 ++ KEEPALIVE_TIMEOUT = 10,
50868 ++ MAX_TIMER_HANDSHAKES = 90 / REKEY_TIMEOUT,
50869 ++ MAX_QUEUED_INCOMING_HANDSHAKES = 4096, /* TODO: replace this with DQL */
50870 ++ MAX_STAGED_PACKETS = 128,
50871 ++ MAX_QUEUED_PACKETS = 1024 /* TODO: replace this with DQL */
50872 ++};
50873 ++
50874 ++enum message_type {
50875 ++ MESSAGE_INVALID = 0,
50876 ++ MESSAGE_HANDSHAKE_INITIATION = 1,
50877 ++ MESSAGE_HANDSHAKE_RESPONSE = 2,
50878 ++ MESSAGE_HANDSHAKE_COOKIE = 3,
50879 ++ MESSAGE_DATA = 4
50880 ++};
50881 ++
50882 ++struct message_header {
50883 ++ /* The actual layout of this that we want is:
50884 ++ * u8 type
50885 ++ * u8 reserved_zero[3]
50886 ++ *
50887 ++ * But it turns out that by encoding this as little endian,
50888 ++ * we achieve the same thing, and it makes checking faster.
50889 ++ */
50890 ++ __le32 type;
50891 ++};
50892 ++
50893 ++struct message_macs {
50894 ++ u8 mac1[COOKIE_LEN];
50895 ++ u8 mac2[COOKIE_LEN];
50896 ++};
50897 ++
50898 ++struct message_handshake_initiation {
50899 ++ struct message_header header;
50900 ++ __le32 sender_index;
50901 ++ u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN];
50902 ++ u8 encrypted_static[noise_encrypted_len(NOISE_PUBLIC_KEY_LEN)];
50903 ++ u8 encrypted_timestamp[noise_encrypted_len(NOISE_TIMESTAMP_LEN)];
50904 ++ struct message_macs macs;
50905 ++};
50906 ++
50907 ++struct message_handshake_response {
50908 ++ struct message_header header;
50909 ++ __le32 sender_index;
50910 ++ __le32 receiver_index;
50911 ++ u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN];
50912 ++ u8 encrypted_nothing[noise_encrypted_len(0)];
50913 ++ struct message_macs macs;
50914 ++};
50915 ++
50916 ++struct message_handshake_cookie {
50917 ++ struct message_header header;
50918 ++ __le32 receiver_index;
50919 ++ u8 nonce[COOKIE_NONCE_LEN];
50920 ++ u8 encrypted_cookie[noise_encrypted_len(COOKIE_LEN)];
50921 ++};
50922 ++
50923 ++struct message_data {
50924 ++ struct message_header header;
50925 ++ __le32 key_idx;
50926 ++ __le64 counter;
50927 ++ u8 encrypted_data[];
50928 ++};
50929 ++
50930 ++#define message_data_len(plain_len) \
50931 ++ (noise_encrypted_len(plain_len) + sizeof(struct message_data))
50932 ++
50933 ++enum message_alignments {
50934 ++ MESSAGE_PADDING_MULTIPLE = 16,
50935 ++ MESSAGE_MINIMUM_LENGTH = message_data_len(0)
50936 ++};
50937 ++
50938 ++#define SKB_HEADER_LEN \
50939 ++ (max(sizeof(struct iphdr), sizeof(struct ipv6hdr)) + \
50940 ++ sizeof(struct udphdr) + NET_SKB_PAD)
50941 ++#define DATA_PACKET_HEAD_ROOM \
50942 ++ ALIGN(sizeof(struct message_data) + SKB_HEADER_LEN, 4)
50943 ++
50944 ++enum { HANDSHAKE_DSCP = 0x88 /* AF41, plus 00 ECN */ };
50945 ++
50946 ++#endif /* _WG_MESSAGES_H */
50947 +diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c
50948 +new file mode 100644
50949 +index 000000000000..0739a2cd1920
50950 +--- /dev/null
50951 ++++ b/drivers/net/wireguard/netlink.c
50952 +@@ -0,0 +1,648 @@
50953 ++// SPDX-License-Identifier: GPL-2.0
50954 ++/*
50955 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
50956 ++ */
50957 ++
50958 ++#include "netlink.h"
50959 ++#include "device.h"
50960 ++#include "peer.h"
50961 ++#include "socket.h"
50962 ++#include "queueing.h"
50963 ++#include "messages.h"
50964 ++
50965 ++#include <uapi/linux/wireguard.h>
50966 ++
50967 ++#include <linux/if.h>
50968 ++#include <net/genetlink.h>
50969 ++#include <net/sock.h>
50970 ++#include <crypto/algapi.h>
50971 ++
50972 ++static struct genl_family genl_family;
50973 ++
50974 ++static const struct nla_policy device_policy[WGDEVICE_A_MAX + 1] = {
50975 ++ [WGDEVICE_A_IFINDEX] = { .type = NLA_U32 },
50976 ++ [WGDEVICE_A_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
50977 ++ [WGDEVICE_A_PRIVATE_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN },
50978 ++ [WGDEVICE_A_PUBLIC_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN },
50979 ++ [WGDEVICE_A_FLAGS] = { .type = NLA_U32 },
50980 ++ [WGDEVICE_A_LISTEN_PORT] = { .type = NLA_U16 },
50981 ++ [WGDEVICE_A_FWMARK] = { .type = NLA_U32 },
50982 ++ [WGDEVICE_A_PEERS] = { .type = NLA_NESTED }
50983 ++};
50984 ++
50985 ++static const struct nla_policy peer_policy[WGPEER_A_MAX + 1] = {
50986 ++ [WGPEER_A_PUBLIC_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN },
50987 ++ [WGPEER_A_PRESHARED_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_SYMMETRIC_KEY_LEN },
50988 ++ [WGPEER_A_FLAGS] = { .type = NLA_U32 },
50989 ++ [WGPEER_A_ENDPOINT] = { .type = NLA_MIN_LEN, .len = sizeof(struct sockaddr) },
50990 ++ [WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL] = { .type = NLA_U16 },
50991 ++ [WGPEER_A_LAST_HANDSHAKE_TIME] = { .type = NLA_EXACT_LEN, .len = sizeof(struct __kernel_timespec) },
50992 ++ [WGPEER_A_RX_BYTES] = { .type = NLA_U64 },
50993 ++ [WGPEER_A_TX_BYTES] = { .type = NLA_U64 },
50994 ++ [WGPEER_A_ALLOWEDIPS] = { .type = NLA_NESTED },
50995 ++ [WGPEER_A_PROTOCOL_VERSION] = { .type = NLA_U32 }
50996 ++};
50997 ++
50998 ++static const struct nla_policy allowedip_policy[WGALLOWEDIP_A_MAX + 1] = {
50999 ++ [WGALLOWEDIP_A_FAMILY] = { .type = NLA_U16 },
51000 ++ [WGALLOWEDIP_A_IPADDR] = { .type = NLA_MIN_LEN, .len = sizeof(struct in_addr) },
51001 ++ [WGALLOWEDIP_A_CIDR_MASK] = { .type = NLA_U8 }
51002 ++};
51003 ++
51004 ++static struct wg_device *lookup_interface(struct nlattr **attrs,
51005 ++ struct sk_buff *skb)
51006 ++{
51007 ++ struct net_device *dev = NULL;
51008 ++
51009 ++ if (!attrs[WGDEVICE_A_IFINDEX] == !attrs[WGDEVICE_A_IFNAME])
51010 ++ return ERR_PTR(-EBADR);
51011 ++ if (attrs[WGDEVICE_A_IFINDEX])
51012 ++ dev = dev_get_by_index(sock_net(skb->sk),
51013 ++ nla_get_u32(attrs[WGDEVICE_A_IFINDEX]));
51014 ++ else if (attrs[WGDEVICE_A_IFNAME])
51015 ++ dev = dev_get_by_name(sock_net(skb->sk),
51016 ++ nla_data(attrs[WGDEVICE_A_IFNAME]));
51017 ++ if (!dev)
51018 ++ return ERR_PTR(-ENODEV);
51019 ++ if (!dev->rtnl_link_ops || !dev->rtnl_link_ops->kind ||
51020 ++ strcmp(dev->rtnl_link_ops->kind, KBUILD_MODNAME)) {
51021 ++ dev_put(dev);
51022 ++ return ERR_PTR(-EOPNOTSUPP);
51023 ++ }
51024 ++ return netdev_priv(dev);
51025 ++}
51026 ++
51027 ++static int get_allowedips(struct sk_buff *skb, const u8 *ip, u8 cidr,
51028 ++ int family)
51029 ++{
51030 ++ struct nlattr *allowedip_nest;
51031 ++
51032 ++ allowedip_nest = nla_nest_start(skb, 0);
51033 ++ if (!allowedip_nest)
51034 ++ return -EMSGSIZE;
51035 ++
51036 ++ if (nla_put_u8(skb, WGALLOWEDIP_A_CIDR_MASK, cidr) ||
51037 ++ nla_put_u16(skb, WGALLOWEDIP_A_FAMILY, family) ||
51038 ++ nla_put(skb, WGALLOWEDIP_A_IPADDR, family == AF_INET6 ?
51039 ++ sizeof(struct in6_addr) : sizeof(struct in_addr), ip)) {
51040 ++ nla_nest_cancel(skb, allowedip_nest);
51041 ++ return -EMSGSIZE;
51042 ++ }
51043 ++
51044 ++ nla_nest_end(skb, allowedip_nest);
51045 ++ return 0;
51046 ++}
51047 ++
51048 ++struct dump_ctx {
51049 ++ struct wg_device *wg;
51050 ++ struct wg_peer *next_peer;
51051 ++ u64 allowedips_seq;
51052 ++ struct allowedips_node *next_allowedip;
51053 ++};
51054 ++
51055 ++#define DUMP_CTX(cb) ((struct dump_ctx *)(cb)->args)
51056 ++
51057 ++static int
51058 ++get_peer(struct wg_peer *peer, struct sk_buff *skb, struct dump_ctx *ctx)
51059 ++{
51060 ++
51061 ++ struct nlattr *allowedips_nest, *peer_nest = nla_nest_start(skb, 0);
51062 ++ struct allowedips_node *allowedips_node = ctx->next_allowedip;
51063 ++ bool fail;
51064 ++
51065 ++ if (!peer_nest)
51066 ++ return -EMSGSIZE;
51067 ++
51068 ++ down_read(&peer->handshake.lock);
51069 ++ fail = nla_put(skb, WGPEER_A_PUBLIC_KEY, NOISE_PUBLIC_KEY_LEN,
51070 ++ peer->handshake.remote_static);
51071 ++ up_read(&peer->handshake.lock);
51072 ++ if (fail)
51073 ++ goto err;
51074 ++
51075 ++ if (!allowedips_node) {
51076 ++ const struct __kernel_timespec last_handshake = {
51077 ++ .tv_sec = peer->walltime_last_handshake.tv_sec,
51078 ++ .tv_nsec = peer->walltime_last_handshake.tv_nsec
51079 ++ };
51080 ++
51081 ++ down_read(&peer->handshake.lock);
51082 ++ fail = nla_put(skb, WGPEER_A_PRESHARED_KEY,
51083 ++ NOISE_SYMMETRIC_KEY_LEN,
51084 ++ peer->handshake.preshared_key);
51085 ++ up_read(&peer->handshake.lock);
51086 ++ if (fail)
51087 ++ goto err;
51088 ++
51089 ++ if (nla_put(skb, WGPEER_A_LAST_HANDSHAKE_TIME,
51090 ++ sizeof(last_handshake), &last_handshake) ||
51091 ++ nla_put_u16(skb, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL,
51092 ++ peer->persistent_keepalive_interval) ||
51093 ++ nla_put_u64_64bit(skb, WGPEER_A_TX_BYTES, peer->tx_bytes,
51094 ++ WGPEER_A_UNSPEC) ||
51095 ++ nla_put_u64_64bit(skb, WGPEER_A_RX_BYTES, peer->rx_bytes,
51096 ++ WGPEER_A_UNSPEC) ||
51097 ++ nla_put_u32(skb, WGPEER_A_PROTOCOL_VERSION, 1))
51098 ++ goto err;
51099 ++
51100 ++ read_lock_bh(&peer->endpoint_lock);
51101 ++ if (peer->endpoint.addr.sa_family == AF_INET)
51102 ++ fail = nla_put(skb, WGPEER_A_ENDPOINT,
51103 ++ sizeof(peer->endpoint.addr4),
51104 ++ &peer->endpoint.addr4);
51105 ++ else if (peer->endpoint.addr.sa_family == AF_INET6)
51106 ++ fail = nla_put(skb, WGPEER_A_ENDPOINT,
51107 ++ sizeof(peer->endpoint.addr6),
51108 ++ &peer->endpoint.addr6);
51109 ++ read_unlock_bh(&peer->endpoint_lock);
51110 ++ if (fail)
51111 ++ goto err;
51112 ++ allowedips_node =
51113 ++ list_first_entry_or_null(&peer->allowedips_list,
51114 ++ struct allowedips_node, peer_list);
51115 ++ }
51116 ++ if (!allowedips_node)
51117 ++ goto no_allowedips;
51118 ++ if (!ctx->allowedips_seq)
51119 ++ ctx->allowedips_seq = peer->device->peer_allowedips.seq;
51120 ++ else if (ctx->allowedips_seq != peer->device->peer_allowedips.seq)
51121 ++ goto no_allowedips;
51122 ++
51123 ++ allowedips_nest = nla_nest_start(skb, WGPEER_A_ALLOWEDIPS);
51124 ++ if (!allowedips_nest)
51125 ++ goto err;
51126 ++
51127 ++ list_for_each_entry_from(allowedips_node, &peer->allowedips_list,
51128 ++ peer_list) {
51129 ++ u8 cidr, ip[16] __aligned(__alignof(u64));
51130 ++ int family;
51131 ++
51132 ++ family = wg_allowedips_read_node(allowedips_node, ip, &cidr);
51133 ++ if (get_allowedips(skb, ip, cidr, family)) {
51134 ++ nla_nest_end(skb, allowedips_nest);
51135 ++ nla_nest_end(skb, peer_nest);
51136 ++ ctx->next_allowedip = allowedips_node;
51137 ++ return -EMSGSIZE;
51138 ++ }
51139 ++ }
51140 ++ nla_nest_end(skb, allowedips_nest);
51141 ++no_allowedips:
51142 ++ nla_nest_end(skb, peer_nest);
51143 ++ ctx->next_allowedip = NULL;
51144 ++ ctx->allowedips_seq = 0;
51145 ++ return 0;
51146 ++err:
51147 ++ nla_nest_cancel(skb, peer_nest);
51148 ++ return -EMSGSIZE;
51149 ++}
51150 ++
51151 ++static int wg_get_device_start(struct netlink_callback *cb)
51152 ++{
51153 ++ struct nlattr **attrs = genl_family_attrbuf(&genl_family);
51154 ++ struct wg_device *wg;
51155 ++ int ret;
51156 ++
51157 ++ ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + genl_family.hdrsize, attrs,
51158 ++ genl_family.maxattr, device_policy, NULL);
51159 ++ if (ret < 0)
51160 ++ return ret;
51161 ++ wg = lookup_interface(attrs, cb->skb);
51162 ++ if (IS_ERR(wg))
51163 ++ return PTR_ERR(wg);
51164 ++ DUMP_CTX(cb)->wg = wg;
51165 ++ return 0;
51166 ++}
51167 ++
51168 ++static int wg_get_device_dump(struct sk_buff *skb, struct netlink_callback *cb)
51169 ++{
51170 ++ struct wg_peer *peer, *next_peer_cursor;
51171 ++ struct dump_ctx *ctx = DUMP_CTX(cb);
51172 ++ struct wg_device *wg = ctx->wg;
51173 ++ struct nlattr *peers_nest;
51174 ++ int ret = -EMSGSIZE;
51175 ++ bool done = true;
51176 ++ void *hdr;
51177 ++
51178 ++ rtnl_lock();
51179 ++ mutex_lock(&wg->device_update_lock);
51180 ++ cb->seq = wg->device_update_gen;
51181 ++ next_peer_cursor = ctx->next_peer;
51182 ++
51183 ++ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
51184 ++ &genl_family, NLM_F_MULTI, WG_CMD_GET_DEVICE);
51185 ++ if (!hdr)
51186 ++ goto out;
51187 ++ genl_dump_check_consistent(cb, hdr);
51188 ++
51189 ++ if (!ctx->next_peer) {
51190 ++ if (nla_put_u16(skb, WGDEVICE_A_LISTEN_PORT,
51191 ++ wg->incoming_port) ||
51192 ++ nla_put_u32(skb, WGDEVICE_A_FWMARK, wg->fwmark) ||
51193 ++ nla_put_u32(skb, WGDEVICE_A_IFINDEX, wg->dev->ifindex) ||
51194 ++ nla_put_string(skb, WGDEVICE_A_IFNAME, wg->dev->name))
51195 ++ goto out;
51196 ++
51197 ++ down_read(&wg->static_identity.lock);
51198 ++ if (wg->static_identity.has_identity) {
51199 ++ if (nla_put(skb, WGDEVICE_A_PRIVATE_KEY,
51200 ++ NOISE_PUBLIC_KEY_LEN,
51201 ++ wg->static_identity.static_private) ||
51202 ++ nla_put(skb, WGDEVICE_A_PUBLIC_KEY,
51203 ++ NOISE_PUBLIC_KEY_LEN,
51204 ++ wg->static_identity.static_public)) {
51205 ++ up_read(&wg->static_identity.lock);
51206 ++ goto out;
51207 ++ }
51208 ++ }
51209 ++ up_read(&wg->static_identity.lock);
51210 ++ }
51211 ++
51212 ++ peers_nest = nla_nest_start(skb, WGDEVICE_A_PEERS);
51213 ++ if (!peers_nest)
51214 ++ goto out;
51215 ++ ret = 0;
51216 ++ /* If the last cursor was removed via list_del_init in peer_remove, then
51217 ++ * we just treat this the same as there being no more peers left. The
51218 ++ * reason is that seq_nr should indicate to userspace that this isn't a
51219 ++ * coherent dump anyway, so they'll try again.
51220 ++ */
51221 ++ if (list_empty(&wg->peer_list) ||
51222 ++ (ctx->next_peer && list_empty(&ctx->next_peer->peer_list))) {
51223 ++ nla_nest_cancel(skb, peers_nest);
51224 ++ goto out;
51225 ++ }
51226 ++ lockdep_assert_held(&wg->device_update_lock);
51227 ++ peer = list_prepare_entry(ctx->next_peer, &wg->peer_list, peer_list);
51228 ++ list_for_each_entry_continue(peer, &wg->peer_list, peer_list) {
51229 ++ if (get_peer(peer, skb, ctx)) {
51230 ++ done = false;
51231 ++ break;
51232 ++ }
51233 ++ next_peer_cursor = peer;
51234 ++ }
51235 ++ nla_nest_end(skb, peers_nest);
51236 ++
51237 ++out:
51238 ++ if (!ret && !done && next_peer_cursor)
51239 ++ wg_peer_get(next_peer_cursor);
51240 ++ wg_peer_put(ctx->next_peer);
51241 ++ mutex_unlock(&wg->device_update_lock);
51242 ++ rtnl_unlock();
51243 ++
51244 ++ if (ret) {
51245 ++ genlmsg_cancel(skb, hdr);
51246 ++ return ret;
51247 ++ }
51248 ++ genlmsg_end(skb, hdr);
51249 ++ if (done) {
51250 ++ ctx->next_peer = NULL;
51251 ++ return 0;
51252 ++ }
51253 ++ ctx->next_peer = next_peer_cursor;
51254 ++ return skb->len;
51255 ++
51256 ++ /* At this point, we can't really deal ourselves with safely zeroing out
51257 ++ * the private key material after usage. This will need an additional API
51258 ++ * in the kernel for marking skbs as zero_on_free.
51259 ++ */
51260 ++}
51261 ++
51262 ++static int wg_get_device_done(struct netlink_callback *cb)
51263 ++{
51264 ++ struct dump_ctx *ctx = DUMP_CTX(cb);
51265 ++
51266 ++ if (ctx->wg)
51267 ++ dev_put(ctx->wg->dev);
51268 ++ wg_peer_put(ctx->next_peer);
51269 ++ return 0;
51270 ++}
51271 ++
51272 ++static int set_port(struct wg_device *wg, u16 port)
51273 ++{
51274 ++ struct wg_peer *peer;
51275 ++
51276 ++ if (wg->incoming_port == port)
51277 ++ return 0;
51278 ++ list_for_each_entry(peer, &wg->peer_list, peer_list)
51279 ++ wg_socket_clear_peer_endpoint_src(peer);
51280 ++ if (!netif_running(wg->dev)) {
51281 ++ wg->incoming_port = port;
51282 ++ return 0;
51283 ++ }
51284 ++ return wg_socket_init(wg, port);
51285 ++}
51286 ++
51287 ++static int set_allowedip(struct wg_peer *peer, struct nlattr **attrs)
51288 ++{
51289 ++ int ret = -EINVAL;
51290 ++ u16 family;
51291 ++ u8 cidr;
51292 ++
51293 ++ if (!attrs[WGALLOWEDIP_A_FAMILY] || !attrs[WGALLOWEDIP_A_IPADDR] ||
51294 ++ !attrs[WGALLOWEDIP_A_CIDR_MASK])
51295 ++ return ret;
51296 ++ family = nla_get_u16(attrs[WGALLOWEDIP_A_FAMILY]);
51297 ++ cidr = nla_get_u8(attrs[WGALLOWEDIP_A_CIDR_MASK]);
51298 ++
51299 ++ if (family == AF_INET && cidr <= 32 &&
51300 ++ nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in_addr))
51301 ++ ret = wg_allowedips_insert_v4(
51302 ++ &peer->device->peer_allowedips,
51303 ++ nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer,
51304 ++ &peer->device->device_update_lock);
51305 ++ else if (family == AF_INET6 && cidr <= 128 &&
51306 ++ nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in6_addr))
51307 ++ ret = wg_allowedips_insert_v6(
51308 ++ &peer->device->peer_allowedips,
51309 ++ nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer,
51310 ++ &peer->device->device_update_lock);
51311 ++
51312 ++ return ret;
51313 ++}
51314 ++
51315 ++static int set_peer(struct wg_device *wg, struct nlattr **attrs)
51316 ++{
51317 ++ u8 *public_key = NULL, *preshared_key = NULL;
51318 ++ struct wg_peer *peer = NULL;
51319 ++ u32 flags = 0;
51320 ++ int ret;
51321 ++
51322 ++ ret = -EINVAL;
51323 ++ if (attrs[WGPEER_A_PUBLIC_KEY] &&
51324 ++ nla_len(attrs[WGPEER_A_PUBLIC_KEY]) == NOISE_PUBLIC_KEY_LEN)
51325 ++ public_key = nla_data(attrs[WGPEER_A_PUBLIC_KEY]);
51326 ++ else
51327 ++ goto out;
51328 ++ if (attrs[WGPEER_A_PRESHARED_KEY] &&
51329 ++ nla_len(attrs[WGPEER_A_PRESHARED_KEY]) == NOISE_SYMMETRIC_KEY_LEN)
51330 ++ preshared_key = nla_data(attrs[WGPEER_A_PRESHARED_KEY]);
51331 ++
51332 ++ if (attrs[WGPEER_A_FLAGS])
51333 ++ flags = nla_get_u32(attrs[WGPEER_A_FLAGS]);
51334 ++ ret = -EOPNOTSUPP;
51335 ++ if (flags & ~__WGPEER_F_ALL)
51336 ++ goto out;
51337 ++
51338 ++ ret = -EPFNOSUPPORT;
51339 ++ if (attrs[WGPEER_A_PROTOCOL_VERSION]) {
51340 ++ if (nla_get_u32(attrs[WGPEER_A_PROTOCOL_VERSION]) != 1)
51341 ++ goto out;
51342 ++ }
51343 ++
51344 ++ peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable,
51345 ++ nla_data(attrs[WGPEER_A_PUBLIC_KEY]));
51346 ++ ret = 0;
51347 ++ if (!peer) { /* Peer doesn't exist yet. Add a new one. */
51348 ++ if (flags & (WGPEER_F_REMOVE_ME | WGPEER_F_UPDATE_ONLY))
51349 ++ goto out;
51350 ++
51351 ++ /* The peer is new, so there aren't allowed IPs to remove. */
51352 ++ flags &= ~WGPEER_F_REPLACE_ALLOWEDIPS;
51353 ++
51354 ++ down_read(&wg->static_identity.lock);
51355 ++ if (wg->static_identity.has_identity &&
51356 ++ !memcmp(nla_data(attrs[WGPEER_A_PUBLIC_KEY]),
51357 ++ wg->static_identity.static_public,
51358 ++ NOISE_PUBLIC_KEY_LEN)) {
51359 ++ /* We silently ignore peers that have the same public
51360 ++ * key as the device. The reason we do it silently is
51361 ++ * that we'd like for people to be able to reuse the
51362 ++ * same set of API calls across peers.
51363 ++ */
51364 ++ up_read(&wg->static_identity.lock);
51365 ++ ret = 0;
51366 ++ goto out;
51367 ++ }
51368 ++ up_read(&wg->static_identity.lock);
51369 ++
51370 ++ peer = wg_peer_create(wg, public_key, preshared_key);
51371 ++ if (IS_ERR(peer)) {
51372 ++ /* Similar to the above, if the key is invalid, we skip
51373 ++ * it without fanfare, so that services don't need to
51374 ++ * worry about doing key validation themselves.
51375 ++ */
51376 ++ ret = PTR_ERR(peer) == -EKEYREJECTED ? 0 : PTR_ERR(peer);
51377 ++ peer = NULL;
51378 ++ goto out;
51379 ++ }
51380 ++ /* Take additional reference, as though we've just been
51381 ++ * looked up.
51382 ++ */
51383 ++ wg_peer_get(peer);
51384 ++ }
51385 ++
51386 ++ if (flags & WGPEER_F_REMOVE_ME) {
51387 ++ wg_peer_remove(peer);
51388 ++ goto out;
51389 ++ }
51390 ++
51391 ++ if (preshared_key) {
51392 ++ down_write(&peer->handshake.lock);
51393 ++ memcpy(&peer->handshake.preshared_key, preshared_key,
51394 ++ NOISE_SYMMETRIC_KEY_LEN);
51395 ++ up_write(&peer->handshake.lock);
51396 ++ }
51397 ++
51398 ++ if (attrs[WGPEER_A_ENDPOINT]) {
51399 ++ struct sockaddr *addr = nla_data(attrs[WGPEER_A_ENDPOINT]);
51400 ++ size_t len = nla_len(attrs[WGPEER_A_ENDPOINT]);
51401 ++
51402 ++ if ((len == sizeof(struct sockaddr_in) &&
51403 ++ addr->sa_family == AF_INET) ||
51404 ++ (len == sizeof(struct sockaddr_in6) &&
51405 ++ addr->sa_family == AF_INET6)) {
51406 ++ struct endpoint endpoint = { { { 0 } } };
51407 ++
51408 ++ memcpy(&endpoint.addr, addr, len);
51409 ++ wg_socket_set_peer_endpoint(peer, &endpoint);
51410 ++ }
51411 ++ }
51412 ++
51413 ++ if (flags & WGPEER_F_REPLACE_ALLOWEDIPS)
51414 ++ wg_allowedips_remove_by_peer(&wg->peer_allowedips, peer,
51415 ++ &wg->device_update_lock);
51416 ++
51417 ++ if (attrs[WGPEER_A_ALLOWEDIPS]) {
51418 ++ struct nlattr *attr, *allowedip[WGALLOWEDIP_A_MAX + 1];
51419 ++ int rem;
51420 ++
51421 ++ nla_for_each_nested(attr, attrs[WGPEER_A_ALLOWEDIPS], rem) {
51422 ++ ret = nla_parse_nested(allowedip, WGALLOWEDIP_A_MAX,
51423 ++ attr, allowedip_policy, NULL);
51424 ++ if (ret < 0)
51425 ++ goto out;
51426 ++ ret = set_allowedip(peer, allowedip);
51427 ++ if (ret < 0)
51428 ++ goto out;
51429 ++ }
51430 ++ }
51431 ++
51432 ++ if (attrs[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]) {
51433 ++ const u16 persistent_keepalive_interval = nla_get_u16(
51434 ++ attrs[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]);
51435 ++ const bool send_keepalive =
51436 ++ !peer->persistent_keepalive_interval &&
51437 ++ persistent_keepalive_interval &&
51438 ++ netif_running(wg->dev);
51439 ++
51440 ++ peer->persistent_keepalive_interval = persistent_keepalive_interval;
51441 ++ if (send_keepalive)
51442 ++ wg_packet_send_keepalive(peer);
51443 ++ }
51444 ++
51445 ++ if (netif_running(wg->dev))
51446 ++ wg_packet_send_staged_packets(peer);
51447 ++
51448 ++out:
51449 ++ wg_peer_put(peer);
51450 ++ if (attrs[WGPEER_A_PRESHARED_KEY])
51451 ++ memzero_explicit(nla_data(attrs[WGPEER_A_PRESHARED_KEY]),
51452 ++ nla_len(attrs[WGPEER_A_PRESHARED_KEY]));
51453 ++ return ret;
51454 ++}
51455 ++
51456 ++static int wg_set_device(struct sk_buff *skb, struct genl_info *info)
51457 ++{
51458 ++ struct wg_device *wg = lookup_interface(info->attrs, skb);
51459 ++ u32 flags = 0;
51460 ++ int ret;
51461 ++
51462 ++ if (IS_ERR(wg)) {
51463 ++ ret = PTR_ERR(wg);
51464 ++ goto out_nodev;
51465 ++ }
51466 ++
51467 ++ rtnl_lock();
51468 ++ mutex_lock(&wg->device_update_lock);
51469 ++
51470 ++ if (info->attrs[WGDEVICE_A_FLAGS])
51471 ++ flags = nla_get_u32(info->attrs[WGDEVICE_A_FLAGS]);
51472 ++ ret = -EOPNOTSUPP;
51473 ++ if (flags & ~__WGDEVICE_F_ALL)
51474 ++ goto out;
51475 ++
51476 ++ ret = -EPERM;
51477 ++ if ((info->attrs[WGDEVICE_A_LISTEN_PORT] ||
51478 ++ info->attrs[WGDEVICE_A_FWMARK]) &&
51479 ++ !ns_capable(wg->creating_net->user_ns, CAP_NET_ADMIN))
51480 ++ goto out;
51481 ++
51482 ++ ++wg->device_update_gen;
51483 ++
51484 ++ if (info->attrs[WGDEVICE_A_FWMARK]) {
51485 ++ struct wg_peer *peer;
51486 ++
51487 ++ wg->fwmark = nla_get_u32(info->attrs[WGDEVICE_A_FWMARK]);
51488 ++ list_for_each_entry(peer, &wg->peer_list, peer_list)
51489 ++ wg_socket_clear_peer_endpoint_src(peer);
51490 ++ }
51491 ++
51492 ++ if (info->attrs[WGDEVICE_A_LISTEN_PORT]) {
51493 ++ ret = set_port(wg,
51494 ++ nla_get_u16(info->attrs[WGDEVICE_A_LISTEN_PORT]));
51495 ++ if (ret)
51496 ++ goto out;
51497 ++ }
51498 ++
51499 ++ if (flags & WGDEVICE_F_REPLACE_PEERS)
51500 ++ wg_peer_remove_all(wg);
51501 ++
51502 ++ if (info->attrs[WGDEVICE_A_PRIVATE_KEY] &&
51503 ++ nla_len(info->attrs[WGDEVICE_A_PRIVATE_KEY]) ==
51504 ++ NOISE_PUBLIC_KEY_LEN) {
51505 ++ u8 *private_key = nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]);
51506 ++ u8 public_key[NOISE_PUBLIC_KEY_LEN];
51507 ++ struct wg_peer *peer, *temp;
51508 ++
51509 ++ if (!crypto_memneq(wg->static_identity.static_private,
51510 ++ private_key, NOISE_PUBLIC_KEY_LEN))
51511 ++ goto skip_set_private_key;
51512 ++
51513 ++ /* We remove before setting, to prevent race, which means doing
51514 ++ * two 25519-genpub ops.
51515 ++ */
51516 ++ if (curve25519_generate_public(public_key, private_key)) {
51517 ++ peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable,
51518 ++ public_key);
51519 ++ if (peer) {
51520 ++ wg_peer_put(peer);
51521 ++ wg_peer_remove(peer);
51522 ++ }
51523 ++ }
51524 ++
51525 ++ down_write(&wg->static_identity.lock);
51526 ++ wg_noise_set_static_identity_private_key(&wg->static_identity,
51527 ++ private_key);
51528 ++ list_for_each_entry_safe(peer, temp, &wg->peer_list,
51529 ++ peer_list) {
51530 ++ if (wg_noise_precompute_static_static(peer))
51531 ++ wg_noise_expire_current_peer_keypairs(peer);
51532 ++ else
51533 ++ wg_peer_remove(peer);
51534 ++ }
51535 ++ wg_cookie_checker_precompute_device_keys(&wg->cookie_checker);
51536 ++ up_write(&wg->static_identity.lock);
51537 ++ }
51538 ++skip_set_private_key:
51539 ++
51540 ++ if (info->attrs[WGDEVICE_A_PEERS]) {
51541 ++ struct nlattr *attr, *peer[WGPEER_A_MAX + 1];
51542 ++ int rem;
51543 ++
51544 ++ nla_for_each_nested(attr, info->attrs[WGDEVICE_A_PEERS], rem) {
51545 ++ ret = nla_parse_nested(peer, WGPEER_A_MAX, attr,
51546 ++ peer_policy, NULL);
51547 ++ if (ret < 0)
51548 ++ goto out;
51549 ++ ret = set_peer(wg, peer);
51550 ++ if (ret < 0)
51551 ++ goto out;
51552 ++ }
51553 ++ }
51554 ++ ret = 0;
51555 ++
51556 ++out:
51557 ++ mutex_unlock(&wg->device_update_lock);
51558 ++ rtnl_unlock();
51559 ++ dev_put(wg->dev);
51560 ++out_nodev:
51561 ++ if (info->attrs[WGDEVICE_A_PRIVATE_KEY])
51562 ++ memzero_explicit(nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]),
51563 ++ nla_len(info->attrs[WGDEVICE_A_PRIVATE_KEY]));
51564 ++ return ret;
51565 ++}
51566 ++
51567 ++static const struct genl_ops genl_ops[] = {
51568 ++ {
51569 ++ .cmd = WG_CMD_GET_DEVICE,
51570 ++ .start = wg_get_device_start,
51571 ++ .dumpit = wg_get_device_dump,
51572 ++ .done = wg_get_device_done,
51573 ++ .flags = GENL_UNS_ADMIN_PERM
51574 ++ }, {
51575 ++ .cmd = WG_CMD_SET_DEVICE,
51576 ++ .doit = wg_set_device,
51577 ++ .flags = GENL_UNS_ADMIN_PERM
51578 ++ }
51579 ++};
51580 ++
51581 ++static struct genl_family genl_family __ro_after_init = {
51582 ++ .ops = genl_ops,
51583 ++ .n_ops = ARRAY_SIZE(genl_ops),
51584 ++ .name = WG_GENL_NAME,
51585 ++ .version = WG_GENL_VERSION,
51586 ++ .maxattr = WGDEVICE_A_MAX,
51587 ++ .module = THIS_MODULE,
51588 ++ .policy = device_policy,
51589 ++ .netnsok = true
51590 ++};
51591 ++
51592 ++int __init wg_genetlink_init(void)
51593 ++{
51594 ++ return genl_register_family(&genl_family);
51595 ++}
51596 ++
51597 ++void __exit wg_genetlink_uninit(void)
51598 ++{
51599 ++ genl_unregister_family(&genl_family);
51600 ++}
51601 +diff --git a/drivers/net/wireguard/netlink.h b/drivers/net/wireguard/netlink.h
51602 +new file mode 100644
51603 +index 000000000000..15100d92e2e3
51604 +--- /dev/null
51605 ++++ b/drivers/net/wireguard/netlink.h
51606 +@@ -0,0 +1,12 @@
51607 ++/* SPDX-License-Identifier: GPL-2.0 */
51608 ++/*
51609 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
51610 ++ */
51611 ++
51612 ++#ifndef _WG_NETLINK_H
51613 ++#define _WG_NETLINK_H
51614 ++
51615 ++int wg_genetlink_init(void);
51616 ++void wg_genetlink_uninit(void);
51617 ++
51618 ++#endif /* _WG_NETLINK_H */
51619 +diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c
51620 +new file mode 100644
51621 +index 000000000000..d71c8db68a8c
51622 +--- /dev/null
51623 ++++ b/drivers/net/wireguard/noise.c
51624 +@@ -0,0 +1,828 @@
51625 ++// SPDX-License-Identifier: GPL-2.0
51626 ++/*
51627 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
51628 ++ */
51629 ++
51630 ++#include "noise.h"
51631 ++#include "device.h"
51632 ++#include "peer.h"
51633 ++#include "messages.h"
51634 ++#include "queueing.h"
51635 ++#include "peerlookup.h"
51636 ++
51637 ++#include <linux/rcupdate.h>
51638 ++#include <linux/slab.h>
51639 ++#include <linux/bitmap.h>
51640 ++#include <linux/scatterlist.h>
51641 ++#include <linux/highmem.h>
51642 ++#include <crypto/algapi.h>
51643 ++
51644 ++/* This implements Noise_IKpsk2:
51645 ++ *
51646 ++ * <- s
51647 ++ * ******
51648 ++ * -> e, es, s, ss, {t}
51649 ++ * <- e, ee, se, psk, {}
51650 ++ */
51651 ++
51652 ++static const u8 handshake_name[37] = "Noise_IKpsk2_25519_ChaChaPoly_BLAKE2s";
51653 ++static const u8 identifier_name[34] = "WireGuard v1 zx2c4 Jason@×××××.com";
51654 ++static u8 handshake_init_hash[NOISE_HASH_LEN] __ro_after_init;
51655 ++static u8 handshake_init_chaining_key[NOISE_HASH_LEN] __ro_after_init;
51656 ++static atomic64_t keypair_counter = ATOMIC64_INIT(0);
51657 ++
51658 ++void __init wg_noise_init(void)
51659 ++{
51660 ++ struct blake2s_state blake;
51661 ++
51662 ++ blake2s(handshake_init_chaining_key, handshake_name, NULL,
51663 ++ NOISE_HASH_LEN, sizeof(handshake_name), 0);
51664 ++ blake2s_init(&blake, NOISE_HASH_LEN);
51665 ++ blake2s_update(&blake, handshake_init_chaining_key, NOISE_HASH_LEN);
51666 ++ blake2s_update(&blake, identifier_name, sizeof(identifier_name));
51667 ++ blake2s_final(&blake, handshake_init_hash);
51668 ++}
51669 ++
51670 ++/* Must hold peer->handshake.static_identity->lock */
51671 ++bool wg_noise_precompute_static_static(struct wg_peer *peer)
51672 ++{
51673 ++ bool ret = true;
51674 ++
51675 ++ down_write(&peer->handshake.lock);
51676 ++ if (peer->handshake.static_identity->has_identity)
51677 ++ ret = curve25519(
51678 ++ peer->handshake.precomputed_static_static,
51679 ++ peer->handshake.static_identity->static_private,
51680 ++ peer->handshake.remote_static);
51681 ++ else
51682 ++ memset(peer->handshake.precomputed_static_static, 0,
51683 ++ NOISE_PUBLIC_KEY_LEN);
51684 ++ up_write(&peer->handshake.lock);
51685 ++ return ret;
51686 ++}
51687 ++
51688 ++bool wg_noise_handshake_init(struct noise_handshake *handshake,
51689 ++ struct noise_static_identity *static_identity,
51690 ++ const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
51691 ++ const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
51692 ++ struct wg_peer *peer)
51693 ++{
51694 ++ memset(handshake, 0, sizeof(*handshake));
51695 ++ init_rwsem(&handshake->lock);
51696 ++ handshake->entry.type = INDEX_HASHTABLE_HANDSHAKE;
51697 ++ handshake->entry.peer = peer;
51698 ++ memcpy(handshake->remote_static, peer_public_key, NOISE_PUBLIC_KEY_LEN);
51699 ++ if (peer_preshared_key)
51700 ++ memcpy(handshake->preshared_key, peer_preshared_key,
51701 ++ NOISE_SYMMETRIC_KEY_LEN);
51702 ++ handshake->static_identity = static_identity;
51703 ++ handshake->state = HANDSHAKE_ZEROED;
51704 ++ return wg_noise_precompute_static_static(peer);
51705 ++}
51706 ++
51707 ++static void handshake_zero(struct noise_handshake *handshake)
51708 ++{
51709 ++ memset(&handshake->ephemeral_private, 0, NOISE_PUBLIC_KEY_LEN);
51710 ++ memset(&handshake->remote_ephemeral, 0, NOISE_PUBLIC_KEY_LEN);
51711 ++ memset(&handshake->hash, 0, NOISE_HASH_LEN);
51712 ++ memset(&handshake->chaining_key, 0, NOISE_HASH_LEN);
51713 ++ handshake->remote_index = 0;
51714 ++ handshake->state = HANDSHAKE_ZEROED;
51715 ++}
51716 ++
51717 ++void wg_noise_handshake_clear(struct noise_handshake *handshake)
51718 ++{
51719 ++ wg_index_hashtable_remove(
51720 ++ handshake->entry.peer->device->index_hashtable,
51721 ++ &handshake->entry);
51722 ++ down_write(&handshake->lock);
51723 ++ handshake_zero(handshake);
51724 ++ up_write(&handshake->lock);
51725 ++ wg_index_hashtable_remove(
51726 ++ handshake->entry.peer->device->index_hashtable,
51727 ++ &handshake->entry);
51728 ++}
51729 ++
51730 ++static struct noise_keypair *keypair_create(struct wg_peer *peer)
51731 ++{
51732 ++ struct noise_keypair *keypair = kzalloc(sizeof(*keypair), GFP_KERNEL);
51733 ++
51734 ++ if (unlikely(!keypair))
51735 ++ return NULL;
51736 ++ keypair->internal_id = atomic64_inc_return(&keypair_counter);
51737 ++ keypair->entry.type = INDEX_HASHTABLE_KEYPAIR;
51738 ++ keypair->entry.peer = peer;
51739 ++ kref_init(&keypair->refcount);
51740 ++ return keypair;
51741 ++}
51742 ++
51743 ++static void keypair_free_rcu(struct rcu_head *rcu)
51744 ++{
51745 ++ kzfree(container_of(rcu, struct noise_keypair, rcu));
51746 ++}
51747 ++
51748 ++static void keypair_free_kref(struct kref *kref)
51749 ++{
51750 ++ struct noise_keypair *keypair =
51751 ++ container_of(kref, struct noise_keypair, refcount);
51752 ++
51753 ++ net_dbg_ratelimited("%s: Keypair %llu destroyed for peer %llu\n",
51754 ++ keypair->entry.peer->device->dev->name,
51755 ++ keypair->internal_id,
51756 ++ keypair->entry.peer->internal_id);
51757 ++ wg_index_hashtable_remove(keypair->entry.peer->device->index_hashtable,
51758 ++ &keypair->entry);
51759 ++ call_rcu(&keypair->rcu, keypair_free_rcu);
51760 ++}
51761 ++
51762 ++void wg_noise_keypair_put(struct noise_keypair *keypair, bool unreference_now)
51763 ++{
51764 ++ if (unlikely(!keypair))
51765 ++ return;
51766 ++ if (unlikely(unreference_now))
51767 ++ wg_index_hashtable_remove(
51768 ++ keypair->entry.peer->device->index_hashtable,
51769 ++ &keypair->entry);
51770 ++ kref_put(&keypair->refcount, keypair_free_kref);
51771 ++}
51772 ++
51773 ++struct noise_keypair *wg_noise_keypair_get(struct noise_keypair *keypair)
51774 ++{
51775 ++ RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(),
51776 ++ "Taking noise keypair reference without holding the RCU BH read lock");
51777 ++ if (unlikely(!keypair || !kref_get_unless_zero(&keypair->refcount)))
51778 ++ return NULL;
51779 ++ return keypair;
51780 ++}
51781 ++
51782 ++void wg_noise_keypairs_clear(struct noise_keypairs *keypairs)
51783 ++{
51784 ++ struct noise_keypair *old;
51785 ++
51786 ++ spin_lock_bh(&keypairs->keypair_update_lock);
51787 ++
51788 ++ /* We zero the next_keypair before zeroing the others, so that
51789 ++ * wg_noise_received_with_keypair returns early before subsequent ones
51790 ++ * are zeroed.
51791 ++ */
51792 ++ old = rcu_dereference_protected(keypairs->next_keypair,
51793 ++ lockdep_is_held(&keypairs->keypair_update_lock));
51794 ++ RCU_INIT_POINTER(keypairs->next_keypair, NULL);
51795 ++ wg_noise_keypair_put(old, true);
51796 ++
51797 ++ old = rcu_dereference_protected(keypairs->previous_keypair,
51798 ++ lockdep_is_held(&keypairs->keypair_update_lock));
51799 ++ RCU_INIT_POINTER(keypairs->previous_keypair, NULL);
51800 ++ wg_noise_keypair_put(old, true);
51801 ++
51802 ++ old = rcu_dereference_protected(keypairs->current_keypair,
51803 ++ lockdep_is_held(&keypairs->keypair_update_lock));
51804 ++ RCU_INIT_POINTER(keypairs->current_keypair, NULL);
51805 ++ wg_noise_keypair_put(old, true);
51806 ++
51807 ++ spin_unlock_bh(&keypairs->keypair_update_lock);
51808 ++}
51809 ++
51810 ++void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer)
51811 ++{
51812 ++ struct noise_keypair *keypair;
51813 ++
51814 ++ wg_noise_handshake_clear(&peer->handshake);
51815 ++ wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake);
51816 ++
51817 ++ spin_lock_bh(&peer->keypairs.keypair_update_lock);
51818 ++ keypair = rcu_dereference_protected(peer->keypairs.next_keypair,
51819 ++ lockdep_is_held(&peer->keypairs.keypair_update_lock));
51820 ++ if (keypair)
51821 ++ keypair->sending.is_valid = false;
51822 ++ keypair = rcu_dereference_protected(peer->keypairs.current_keypair,
51823 ++ lockdep_is_held(&peer->keypairs.keypair_update_lock));
51824 ++ if (keypair)
51825 ++ keypair->sending.is_valid = false;
51826 ++ spin_unlock_bh(&peer->keypairs.keypair_update_lock);
51827 ++}
51828 ++
51829 ++static void add_new_keypair(struct noise_keypairs *keypairs,
51830 ++ struct noise_keypair *new_keypair)
51831 ++{
51832 ++ struct noise_keypair *previous_keypair, *next_keypair, *current_keypair;
51833 ++
51834 ++ spin_lock_bh(&keypairs->keypair_update_lock);
51835 ++ previous_keypair = rcu_dereference_protected(keypairs->previous_keypair,
51836 ++ lockdep_is_held(&keypairs->keypair_update_lock));
51837 ++ next_keypair = rcu_dereference_protected(keypairs->next_keypair,
51838 ++ lockdep_is_held(&keypairs->keypair_update_lock));
51839 ++ current_keypair = rcu_dereference_protected(keypairs->current_keypair,
51840 ++ lockdep_is_held(&keypairs->keypair_update_lock));
51841 ++ if (new_keypair->i_am_the_initiator) {
51842 ++ /* If we're the initiator, it means we've sent a handshake, and
51843 ++ * received a confirmation response, which means this new
51844 ++ * keypair can now be used.
51845 ++ */
51846 ++ if (next_keypair) {
51847 ++ /* If there already was a next keypair pending, we
51848 ++ * demote it to be the previous keypair, and free the
51849 ++ * existing current. Note that this means KCI can result
51850 ++ * in this transition. It would perhaps be more sound to
51851 ++ * always just get rid of the unused next keypair
51852 ++ * instead of putting it in the previous slot, but this
51853 ++ * might be a bit less robust. Something to think about
51854 ++ * for the future.
51855 ++ */
51856 ++ RCU_INIT_POINTER(keypairs->next_keypair, NULL);
51857 ++ rcu_assign_pointer(keypairs->previous_keypair,
51858 ++ next_keypair);
51859 ++ wg_noise_keypair_put(current_keypair, true);
51860 ++ } else /* If there wasn't an existing next keypair, we replace
51861 ++ * the previous with the current one.
51862 ++ */
51863 ++ rcu_assign_pointer(keypairs->previous_keypair,
51864 ++ current_keypair);
51865 ++ /* At this point we can get rid of the old previous keypair, and
51866 ++ * set up the new keypair.
51867 ++ */
51868 ++ wg_noise_keypair_put(previous_keypair, true);
51869 ++ rcu_assign_pointer(keypairs->current_keypair, new_keypair);
51870 ++ } else {
51871 ++ /* If we're the responder, it means we can't use the new keypair
51872 ++ * until we receive confirmation via the first data packet, so
51873 ++ * we get rid of the existing previous one, the possibly
51874 ++ * existing next one, and slide in the new next one.
51875 ++ */
51876 ++ rcu_assign_pointer(keypairs->next_keypair, new_keypair);
51877 ++ wg_noise_keypair_put(next_keypair, true);
51878 ++ RCU_INIT_POINTER(keypairs->previous_keypair, NULL);
51879 ++ wg_noise_keypair_put(previous_keypair, true);
51880 ++ }
51881 ++ spin_unlock_bh(&keypairs->keypair_update_lock);
51882 ++}
51883 ++
51884 ++bool wg_noise_received_with_keypair(struct noise_keypairs *keypairs,
51885 ++ struct noise_keypair *received_keypair)
51886 ++{
51887 ++ struct noise_keypair *old_keypair;
51888 ++ bool key_is_new;
51889 ++
51890 ++ /* We first check without taking the spinlock. */
51891 ++ key_is_new = received_keypair ==
51892 ++ rcu_access_pointer(keypairs->next_keypair);
51893 ++ if (likely(!key_is_new))
51894 ++ return false;
51895 ++
51896 ++ spin_lock_bh(&keypairs->keypair_update_lock);
51897 ++ /* After locking, we double check that things didn't change from
51898 ++ * beneath us.
51899 ++ */
51900 ++ if (unlikely(received_keypair !=
51901 ++ rcu_dereference_protected(keypairs->next_keypair,
51902 ++ lockdep_is_held(&keypairs->keypair_update_lock)))) {
51903 ++ spin_unlock_bh(&keypairs->keypair_update_lock);
51904 ++ return false;
51905 ++ }
51906 ++
51907 ++ /* When we've finally received the confirmation, we slide the next
51908 ++ * into the current, the current into the previous, and get rid of
51909 ++ * the old previous.
51910 ++ */
51911 ++ old_keypair = rcu_dereference_protected(keypairs->previous_keypair,
51912 ++ lockdep_is_held(&keypairs->keypair_update_lock));
51913 ++ rcu_assign_pointer(keypairs->previous_keypair,
51914 ++ rcu_dereference_protected(keypairs->current_keypair,
51915 ++ lockdep_is_held(&keypairs->keypair_update_lock)));
51916 ++ wg_noise_keypair_put(old_keypair, true);
51917 ++ rcu_assign_pointer(keypairs->current_keypair, received_keypair);
51918 ++ RCU_INIT_POINTER(keypairs->next_keypair, NULL);
51919 ++
51920 ++ spin_unlock_bh(&keypairs->keypair_update_lock);
51921 ++ return true;
51922 ++}
51923 ++
51924 ++/* Must hold static_identity->lock */
51925 ++void wg_noise_set_static_identity_private_key(
51926 ++ struct noise_static_identity *static_identity,
51927 ++ const u8 private_key[NOISE_PUBLIC_KEY_LEN])
51928 ++{
51929 ++ memcpy(static_identity->static_private, private_key,
51930 ++ NOISE_PUBLIC_KEY_LEN);
51931 ++ curve25519_clamp_secret(static_identity->static_private);
51932 ++ static_identity->has_identity = curve25519_generate_public(
51933 ++ static_identity->static_public, private_key);
51934 ++}
51935 ++
51936 ++/* This is Hugo Krawczyk's HKDF:
51937 ++ * - https://eprint.iacr.org/2010/264.pdf
51938 ++ * - https://tools.ietf.org/html/rfc5869
51939 ++ */
51940 ++static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data,
51941 ++ size_t first_len, size_t second_len, size_t third_len,
51942 ++ size_t data_len, const u8 chaining_key[NOISE_HASH_LEN])
51943 ++{
51944 ++ u8 output[BLAKE2S_HASH_SIZE + 1];
51945 ++ u8 secret[BLAKE2S_HASH_SIZE];
51946 ++
51947 ++ WARN_ON(IS_ENABLED(DEBUG) &&
51948 ++ (first_len > BLAKE2S_HASH_SIZE ||
51949 ++ second_len > BLAKE2S_HASH_SIZE ||
51950 ++ third_len > BLAKE2S_HASH_SIZE ||
51951 ++ ((second_len || second_dst || third_len || third_dst) &&
51952 ++ (!first_len || !first_dst)) ||
51953 ++ ((third_len || third_dst) && (!second_len || !second_dst))));
51954 ++
51955 ++ /* Extract entropy from data into secret */
51956 ++ blake2s256_hmac(secret, data, chaining_key, data_len, NOISE_HASH_LEN);
51957 ++
51958 ++ if (!first_dst || !first_len)
51959 ++ goto out;
51960 ++
51961 ++ /* Expand first key: key = secret, data = 0x1 */
51962 ++ output[0] = 1;
51963 ++ blake2s256_hmac(output, output, secret, 1, BLAKE2S_HASH_SIZE);
51964 ++ memcpy(first_dst, output, first_len);
51965 ++
51966 ++ if (!second_dst || !second_len)
51967 ++ goto out;
51968 ++
51969 ++ /* Expand second key: key = secret, data = first-key || 0x2 */
51970 ++ output[BLAKE2S_HASH_SIZE] = 2;
51971 ++ blake2s256_hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1,
51972 ++ BLAKE2S_HASH_SIZE);
51973 ++ memcpy(second_dst, output, second_len);
51974 ++
51975 ++ if (!third_dst || !third_len)
51976 ++ goto out;
51977 ++
51978 ++ /* Expand third key: key = secret, data = second-key || 0x3 */
51979 ++ output[BLAKE2S_HASH_SIZE] = 3;
51980 ++ blake2s256_hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1,
51981 ++ BLAKE2S_HASH_SIZE);
51982 ++ memcpy(third_dst, output, third_len);
51983 ++
51984 ++out:
51985 ++ /* Clear sensitive data from stack */
51986 ++ memzero_explicit(secret, BLAKE2S_HASH_SIZE);
51987 ++ memzero_explicit(output, BLAKE2S_HASH_SIZE + 1);
51988 ++}
51989 ++
51990 ++static void symmetric_key_init(struct noise_symmetric_key *key)
51991 ++{
51992 ++ spin_lock_init(&key->counter.receive.lock);
51993 ++ atomic64_set(&key->counter.counter, 0);
51994 ++ memset(key->counter.receive.backtrack, 0,
51995 ++ sizeof(key->counter.receive.backtrack));
51996 ++ key->birthdate = ktime_get_coarse_boottime_ns();
51997 ++ key->is_valid = true;
51998 ++}
51999 ++
52000 ++static void derive_keys(struct noise_symmetric_key *first_dst,
52001 ++ struct noise_symmetric_key *second_dst,
52002 ++ const u8 chaining_key[NOISE_HASH_LEN])
52003 ++{
52004 ++ kdf(first_dst->key, second_dst->key, NULL, NULL,
52005 ++ NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, 0,
52006 ++ chaining_key);
52007 ++ symmetric_key_init(first_dst);
52008 ++ symmetric_key_init(second_dst);
52009 ++}
52010 ++
52011 ++static bool __must_check mix_dh(u8 chaining_key[NOISE_HASH_LEN],
52012 ++ u8 key[NOISE_SYMMETRIC_KEY_LEN],
52013 ++ const u8 private[NOISE_PUBLIC_KEY_LEN],
52014 ++ const u8 public[NOISE_PUBLIC_KEY_LEN])
52015 ++{
52016 ++ u8 dh_calculation[NOISE_PUBLIC_KEY_LEN];
52017 ++
52018 ++ if (unlikely(!curve25519(dh_calculation, private, public)))
52019 ++ return false;
52020 ++ kdf(chaining_key, key, NULL, dh_calculation, NOISE_HASH_LEN,
52021 ++ NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, chaining_key);
52022 ++ memzero_explicit(dh_calculation, NOISE_PUBLIC_KEY_LEN);
52023 ++ return true;
52024 ++}
52025 ++
52026 ++static void mix_hash(u8 hash[NOISE_HASH_LEN], const u8 *src, size_t src_len)
52027 ++{
52028 ++ struct blake2s_state blake;
52029 ++
52030 ++ blake2s_init(&blake, NOISE_HASH_LEN);
52031 ++ blake2s_update(&blake, hash, NOISE_HASH_LEN);
52032 ++ blake2s_update(&blake, src, src_len);
52033 ++ blake2s_final(&blake, hash);
52034 ++}
52035 ++
52036 ++static void mix_psk(u8 chaining_key[NOISE_HASH_LEN], u8 hash[NOISE_HASH_LEN],
52037 ++ u8 key[NOISE_SYMMETRIC_KEY_LEN],
52038 ++ const u8 psk[NOISE_SYMMETRIC_KEY_LEN])
52039 ++{
52040 ++ u8 temp_hash[NOISE_HASH_LEN];
52041 ++
52042 ++ kdf(chaining_key, temp_hash, key, psk, NOISE_HASH_LEN, NOISE_HASH_LEN,
52043 ++ NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, chaining_key);
52044 ++ mix_hash(hash, temp_hash, NOISE_HASH_LEN);
52045 ++ memzero_explicit(temp_hash, NOISE_HASH_LEN);
52046 ++}
52047 ++
52048 ++static void handshake_init(u8 chaining_key[NOISE_HASH_LEN],
52049 ++ u8 hash[NOISE_HASH_LEN],
52050 ++ const u8 remote_static[NOISE_PUBLIC_KEY_LEN])
52051 ++{
52052 ++ memcpy(hash, handshake_init_hash, NOISE_HASH_LEN);
52053 ++ memcpy(chaining_key, handshake_init_chaining_key, NOISE_HASH_LEN);
52054 ++ mix_hash(hash, remote_static, NOISE_PUBLIC_KEY_LEN);
52055 ++}
52056 ++
52057 ++static void message_encrypt(u8 *dst_ciphertext, const u8 *src_plaintext,
52058 ++ size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN],
52059 ++ u8 hash[NOISE_HASH_LEN])
52060 ++{
52061 ++ chacha20poly1305_encrypt(dst_ciphertext, src_plaintext, src_len, hash,
52062 ++ NOISE_HASH_LEN,
52063 ++ 0 /* Always zero for Noise_IK */, key);
52064 ++ mix_hash(hash, dst_ciphertext, noise_encrypted_len(src_len));
52065 ++}
52066 ++
52067 ++static bool message_decrypt(u8 *dst_plaintext, const u8 *src_ciphertext,
52068 ++ size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN],
52069 ++ u8 hash[NOISE_HASH_LEN])
52070 ++{
52071 ++ if (!chacha20poly1305_decrypt(dst_plaintext, src_ciphertext, src_len,
52072 ++ hash, NOISE_HASH_LEN,
52073 ++ 0 /* Always zero for Noise_IK */, key))
52074 ++ return false;
52075 ++ mix_hash(hash, src_ciphertext, src_len);
52076 ++ return true;
52077 ++}
52078 ++
52079 ++static void message_ephemeral(u8 ephemeral_dst[NOISE_PUBLIC_KEY_LEN],
52080 ++ const u8 ephemeral_src[NOISE_PUBLIC_KEY_LEN],
52081 ++ u8 chaining_key[NOISE_HASH_LEN],
52082 ++ u8 hash[NOISE_HASH_LEN])
52083 ++{
52084 ++ if (ephemeral_dst != ephemeral_src)
52085 ++ memcpy(ephemeral_dst, ephemeral_src, NOISE_PUBLIC_KEY_LEN);
52086 ++ mix_hash(hash, ephemeral_src, NOISE_PUBLIC_KEY_LEN);
52087 ++ kdf(chaining_key, NULL, NULL, ephemeral_src, NOISE_HASH_LEN, 0, 0,
52088 ++ NOISE_PUBLIC_KEY_LEN, chaining_key);
52089 ++}
52090 ++
52091 ++static void tai64n_now(u8 output[NOISE_TIMESTAMP_LEN])
52092 ++{
52093 ++ struct timespec64 now;
52094 ++
52095 ++ ktime_get_real_ts64(&now);
52096 ++
52097 ++ /* In order to prevent some sort of infoleak from precise timers, we
52098 ++ * round down the nanoseconds part to the closest rounded-down power of
52099 ++ * two to the maximum initiations per second allowed anyway by the
52100 ++ * implementation.
52101 ++ */
52102 ++ now.tv_nsec = ALIGN_DOWN(now.tv_nsec,
52103 ++ rounddown_pow_of_two(NSEC_PER_SEC / INITIATIONS_PER_SECOND));
52104 ++
52105 ++ /* https://cr.yp.to/libtai/tai64.html */
52106 ++ *(__be64 *)output = cpu_to_be64(0x400000000000000aULL + now.tv_sec);
52107 ++ *(__be32 *)(output + sizeof(__be64)) = cpu_to_be32(now.tv_nsec);
52108 ++}
52109 ++
52110 ++bool
52111 ++wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst,
52112 ++ struct noise_handshake *handshake)
52113 ++{
52114 ++ u8 timestamp[NOISE_TIMESTAMP_LEN];
52115 ++ u8 key[NOISE_SYMMETRIC_KEY_LEN];
52116 ++ bool ret = false;
52117 ++
52118 ++ /* We need to wait for crng _before_ taking any locks, since
52119 ++ * curve25519_generate_secret uses get_random_bytes_wait.
52120 ++ */
52121 ++ wait_for_random_bytes();
52122 ++
52123 ++ down_read(&handshake->static_identity->lock);
52124 ++ down_write(&handshake->lock);
52125 ++
52126 ++ if (unlikely(!handshake->static_identity->has_identity))
52127 ++ goto out;
52128 ++
52129 ++ dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION);
52130 ++
52131 ++ handshake_init(handshake->chaining_key, handshake->hash,
52132 ++ handshake->remote_static);
52133 ++
52134 ++ /* e */
52135 ++ curve25519_generate_secret(handshake->ephemeral_private);
52136 ++ if (!curve25519_generate_public(dst->unencrypted_ephemeral,
52137 ++ handshake->ephemeral_private))
52138 ++ goto out;
52139 ++ message_ephemeral(dst->unencrypted_ephemeral,
52140 ++ dst->unencrypted_ephemeral, handshake->chaining_key,
52141 ++ handshake->hash);
52142 ++
52143 ++ /* es */
52144 ++ if (!mix_dh(handshake->chaining_key, key, handshake->ephemeral_private,
52145 ++ handshake->remote_static))
52146 ++ goto out;
52147 ++
52148 ++ /* s */
52149 ++ message_encrypt(dst->encrypted_static,
52150 ++ handshake->static_identity->static_public,
52151 ++ NOISE_PUBLIC_KEY_LEN, key, handshake->hash);
52152 ++
52153 ++ /* ss */
52154 ++ kdf(handshake->chaining_key, key, NULL,
52155 ++ handshake->precomputed_static_static, NOISE_HASH_LEN,
52156 ++ NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN,
52157 ++ handshake->chaining_key);
52158 ++
52159 ++ /* {t} */
52160 ++ tai64n_now(timestamp);
52161 ++ message_encrypt(dst->encrypted_timestamp, timestamp,
52162 ++ NOISE_TIMESTAMP_LEN, key, handshake->hash);
52163 ++
52164 ++ dst->sender_index = wg_index_hashtable_insert(
52165 ++ handshake->entry.peer->device->index_hashtable,
52166 ++ &handshake->entry);
52167 ++
52168 ++ handshake->state = HANDSHAKE_CREATED_INITIATION;
52169 ++ ret = true;
52170 ++
52171 ++out:
52172 ++ up_write(&handshake->lock);
52173 ++ up_read(&handshake->static_identity->lock);
52174 ++ memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
52175 ++ return ret;
52176 ++}
52177 ++
52178 ++struct wg_peer *
52179 ++wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src,
52180 ++ struct wg_device *wg)
52181 ++{
52182 ++ struct wg_peer *peer = NULL, *ret_peer = NULL;
52183 ++ struct noise_handshake *handshake;
52184 ++ bool replay_attack, flood_attack;
52185 ++ u8 key[NOISE_SYMMETRIC_KEY_LEN];
52186 ++ u8 chaining_key[NOISE_HASH_LEN];
52187 ++ u8 hash[NOISE_HASH_LEN];
52188 ++ u8 s[NOISE_PUBLIC_KEY_LEN];
52189 ++ u8 e[NOISE_PUBLIC_KEY_LEN];
52190 ++ u8 t[NOISE_TIMESTAMP_LEN];
52191 ++ u64 initiation_consumption;
52192 ++
52193 ++ down_read(&wg->static_identity.lock);
52194 ++ if (unlikely(!wg->static_identity.has_identity))
52195 ++ goto out;
52196 ++
52197 ++ handshake_init(chaining_key, hash, wg->static_identity.static_public);
52198 ++
52199 ++ /* e */
52200 ++ message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash);
52201 ++
52202 ++ /* es */
52203 ++ if (!mix_dh(chaining_key, key, wg->static_identity.static_private, e))
52204 ++ goto out;
52205 ++
52206 ++ /* s */
52207 ++ if (!message_decrypt(s, src->encrypted_static,
52208 ++ sizeof(src->encrypted_static), key, hash))
52209 ++ goto out;
52210 ++
52211 ++ /* Lookup which peer we're actually talking to */
52212 ++ peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable, s);
52213 ++ if (!peer)
52214 ++ goto out;
52215 ++ handshake = &peer->handshake;
52216 ++
52217 ++ /* ss */
52218 ++ kdf(chaining_key, key, NULL, handshake->precomputed_static_static,
52219 ++ NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN,
52220 ++ chaining_key);
52221 ++
52222 ++ /* {t} */
52223 ++ if (!message_decrypt(t, src->encrypted_timestamp,
52224 ++ sizeof(src->encrypted_timestamp), key, hash))
52225 ++ goto out;
52226 ++
52227 ++ down_read(&handshake->lock);
52228 ++ replay_attack = memcmp(t, handshake->latest_timestamp,
52229 ++ NOISE_TIMESTAMP_LEN) <= 0;
52230 ++ flood_attack = (s64)handshake->last_initiation_consumption +
52231 ++ NSEC_PER_SEC / INITIATIONS_PER_SECOND >
52232 ++ (s64)ktime_get_coarse_boottime_ns();
52233 ++ up_read(&handshake->lock);
52234 ++ if (replay_attack || flood_attack)
52235 ++ goto out;
52236 ++
52237 ++ /* Success! Copy everything to peer */
52238 ++ down_write(&handshake->lock);
52239 ++ memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN);
52240 ++ if (memcmp(t, handshake->latest_timestamp, NOISE_TIMESTAMP_LEN) > 0)
52241 ++ memcpy(handshake->latest_timestamp, t, NOISE_TIMESTAMP_LEN);
52242 ++ memcpy(handshake->hash, hash, NOISE_HASH_LEN);
52243 ++ memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN);
52244 ++ handshake->remote_index = src->sender_index;
52245 ++ if ((s64)(handshake->last_initiation_consumption -
52246 ++ (initiation_consumption = ktime_get_coarse_boottime_ns())) < 0)
52247 ++ handshake->last_initiation_consumption = initiation_consumption;
52248 ++ handshake->state = HANDSHAKE_CONSUMED_INITIATION;
52249 ++ up_write(&handshake->lock);
52250 ++ ret_peer = peer;
52251 ++
52252 ++out:
52253 ++ memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
52254 ++ memzero_explicit(hash, NOISE_HASH_LEN);
52255 ++ memzero_explicit(chaining_key, NOISE_HASH_LEN);
52256 ++ up_read(&wg->static_identity.lock);
52257 ++ if (!ret_peer)
52258 ++ wg_peer_put(peer);
52259 ++ return ret_peer;
52260 ++}
52261 ++
52262 ++bool wg_noise_handshake_create_response(struct message_handshake_response *dst,
52263 ++ struct noise_handshake *handshake)
52264 ++{
52265 ++ u8 key[NOISE_SYMMETRIC_KEY_LEN];
52266 ++ bool ret = false;
52267 ++
52268 ++ /* We need to wait for crng _before_ taking any locks, since
52269 ++ * curve25519_generate_secret uses get_random_bytes_wait.
52270 ++ */
52271 ++ wait_for_random_bytes();
52272 ++
52273 ++ down_read(&handshake->static_identity->lock);
52274 ++ down_write(&handshake->lock);
52275 ++
52276 ++ if (handshake->state != HANDSHAKE_CONSUMED_INITIATION)
52277 ++ goto out;
52278 ++
52279 ++ dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE);
52280 ++ dst->receiver_index = handshake->remote_index;
52281 ++
52282 ++ /* e */
52283 ++ curve25519_generate_secret(handshake->ephemeral_private);
52284 ++ if (!curve25519_generate_public(dst->unencrypted_ephemeral,
52285 ++ handshake->ephemeral_private))
52286 ++ goto out;
52287 ++ message_ephemeral(dst->unencrypted_ephemeral,
52288 ++ dst->unencrypted_ephemeral, handshake->chaining_key,
52289 ++ handshake->hash);
52290 ++
52291 ++ /* ee */
52292 ++ if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private,
52293 ++ handshake->remote_ephemeral))
52294 ++ goto out;
52295 ++
52296 ++ /* se */
52297 ++ if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private,
52298 ++ handshake->remote_static))
52299 ++ goto out;
52300 ++
52301 ++ /* psk */
52302 ++ mix_psk(handshake->chaining_key, handshake->hash, key,
52303 ++ handshake->preshared_key);
52304 ++
52305 ++ /* {} */
52306 ++ message_encrypt(dst->encrypted_nothing, NULL, 0, key, handshake->hash);
52307 ++
52308 ++ dst->sender_index = wg_index_hashtable_insert(
52309 ++ handshake->entry.peer->device->index_hashtable,
52310 ++ &handshake->entry);
52311 ++
52312 ++ handshake->state = HANDSHAKE_CREATED_RESPONSE;
52313 ++ ret = true;
52314 ++
52315 ++out:
52316 ++ up_write(&handshake->lock);
52317 ++ up_read(&handshake->static_identity->lock);
52318 ++ memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
52319 ++ return ret;
52320 ++}
52321 ++
52322 ++struct wg_peer *
52323 ++wg_noise_handshake_consume_response(struct message_handshake_response *src,
52324 ++ struct wg_device *wg)
52325 ++{
52326 ++ enum noise_handshake_state state = HANDSHAKE_ZEROED;
52327 ++ struct wg_peer *peer = NULL, *ret_peer = NULL;
52328 ++ struct noise_handshake *handshake;
52329 ++ u8 key[NOISE_SYMMETRIC_KEY_LEN];
52330 ++ u8 hash[NOISE_HASH_LEN];
52331 ++ u8 chaining_key[NOISE_HASH_LEN];
52332 ++ u8 e[NOISE_PUBLIC_KEY_LEN];
52333 ++ u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN];
52334 ++ u8 static_private[NOISE_PUBLIC_KEY_LEN];
52335 ++
52336 ++ down_read(&wg->static_identity.lock);
52337 ++
52338 ++ if (unlikely(!wg->static_identity.has_identity))
52339 ++ goto out;
52340 ++
52341 ++ handshake = (struct noise_handshake *)wg_index_hashtable_lookup(
52342 ++ wg->index_hashtable, INDEX_HASHTABLE_HANDSHAKE,
52343 ++ src->receiver_index, &peer);
52344 ++ if (unlikely(!handshake))
52345 ++ goto out;
52346 ++
52347 ++ down_read(&handshake->lock);
52348 ++ state = handshake->state;
52349 ++ memcpy(hash, handshake->hash, NOISE_HASH_LEN);
52350 ++ memcpy(chaining_key, handshake->chaining_key, NOISE_HASH_LEN);
52351 ++ memcpy(ephemeral_private, handshake->ephemeral_private,
52352 ++ NOISE_PUBLIC_KEY_LEN);
52353 ++ up_read(&handshake->lock);
52354 ++
52355 ++ if (state != HANDSHAKE_CREATED_INITIATION)
52356 ++ goto fail;
52357 ++
52358 ++ /* e */
52359 ++ message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash);
52360 ++
52361 ++ /* ee */
52362 ++ if (!mix_dh(chaining_key, NULL, ephemeral_private, e))
52363 ++ goto fail;
52364 ++
52365 ++ /* se */
52366 ++ if (!mix_dh(chaining_key, NULL, wg->static_identity.static_private, e))
52367 ++ goto fail;
52368 ++
52369 ++ /* psk */
52370 ++ mix_psk(chaining_key, hash, key, handshake->preshared_key);
52371 ++
52372 ++ /* {} */
52373 ++ if (!message_decrypt(NULL, src->encrypted_nothing,
52374 ++ sizeof(src->encrypted_nothing), key, hash))
52375 ++ goto fail;
52376 ++
52377 ++ /* Success! Copy everything to peer */
52378 ++ down_write(&handshake->lock);
52379 ++ /* It's important to check that the state is still the same, while we
52380 ++ * have an exclusive lock.
52381 ++ */
52382 ++ if (handshake->state != state) {
52383 ++ up_write(&handshake->lock);
52384 ++ goto fail;
52385 ++ }
52386 ++ memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN);
52387 ++ memcpy(handshake->hash, hash, NOISE_HASH_LEN);
52388 ++ memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN);
52389 ++ handshake->remote_index = src->sender_index;
52390 ++ handshake->state = HANDSHAKE_CONSUMED_RESPONSE;
52391 ++ up_write(&handshake->lock);
52392 ++ ret_peer = peer;
52393 ++ goto out;
52394 ++
52395 ++fail:
52396 ++ wg_peer_put(peer);
52397 ++out:
52398 ++ memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
52399 ++ memzero_explicit(hash, NOISE_HASH_LEN);
52400 ++ memzero_explicit(chaining_key, NOISE_HASH_LEN);
52401 ++ memzero_explicit(ephemeral_private, NOISE_PUBLIC_KEY_LEN);
52402 ++ memzero_explicit(static_private, NOISE_PUBLIC_KEY_LEN);
52403 ++ up_read(&wg->static_identity.lock);
52404 ++ return ret_peer;
52405 ++}
52406 ++
52407 ++bool wg_noise_handshake_begin_session(struct noise_handshake *handshake,
52408 ++ struct noise_keypairs *keypairs)
52409 ++{
52410 ++ struct noise_keypair *new_keypair;
52411 ++ bool ret = false;
52412 ++
52413 ++ down_write(&handshake->lock);
52414 ++ if (handshake->state != HANDSHAKE_CREATED_RESPONSE &&
52415 ++ handshake->state != HANDSHAKE_CONSUMED_RESPONSE)
52416 ++ goto out;
52417 ++
52418 ++ new_keypair = keypair_create(handshake->entry.peer);
52419 ++ if (!new_keypair)
52420 ++ goto out;
52421 ++ new_keypair->i_am_the_initiator = handshake->state ==
52422 ++ HANDSHAKE_CONSUMED_RESPONSE;
52423 ++ new_keypair->remote_index = handshake->remote_index;
52424 ++
52425 ++ if (new_keypair->i_am_the_initiator)
52426 ++ derive_keys(&new_keypair->sending, &new_keypair->receiving,
52427 ++ handshake->chaining_key);
52428 ++ else
52429 ++ derive_keys(&new_keypair->receiving, &new_keypair->sending,
52430 ++ handshake->chaining_key);
52431 ++
52432 ++ handshake_zero(handshake);
52433 ++ rcu_read_lock_bh();
52434 ++ if (likely(!READ_ONCE(container_of(handshake, struct wg_peer,
52435 ++ handshake)->is_dead))) {
52436 ++ add_new_keypair(keypairs, new_keypair);
52437 ++ net_dbg_ratelimited("%s: Keypair %llu created for peer %llu\n",
52438 ++ handshake->entry.peer->device->dev->name,
52439 ++ new_keypair->internal_id,
52440 ++ handshake->entry.peer->internal_id);
52441 ++ ret = wg_index_hashtable_replace(
52442 ++ handshake->entry.peer->device->index_hashtable,
52443 ++ &handshake->entry, &new_keypair->entry);
52444 ++ } else {
52445 ++ kzfree(new_keypair);
52446 ++ }
52447 ++ rcu_read_unlock_bh();
52448 ++
52449 ++out:
52450 ++ up_write(&handshake->lock);
52451 ++ return ret;
52452 ++}
52453 +diff --git a/drivers/net/wireguard/noise.h b/drivers/net/wireguard/noise.h
52454 +new file mode 100644
52455 +index 000000000000..138a07bb817c
52456 +--- /dev/null
52457 ++++ b/drivers/net/wireguard/noise.h
52458 +@@ -0,0 +1,137 @@
52459 ++/* SPDX-License-Identifier: GPL-2.0 */
52460 ++/*
52461 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
52462 ++ */
52463 ++#ifndef _WG_NOISE_H
52464 ++#define _WG_NOISE_H
52465 ++
52466 ++#include "messages.h"
52467 ++#include "peerlookup.h"
52468 ++
52469 ++#include <linux/types.h>
52470 ++#include <linux/spinlock.h>
52471 ++#include <linux/atomic.h>
52472 ++#include <linux/rwsem.h>
52473 ++#include <linux/mutex.h>
52474 ++#include <linux/kref.h>
52475 ++
52476 ++union noise_counter {
52477 ++ struct {
52478 ++ u64 counter;
52479 ++ unsigned long backtrack[COUNTER_BITS_TOTAL / BITS_PER_LONG];
52480 ++ spinlock_t lock;
52481 ++ } receive;
52482 ++ atomic64_t counter;
52483 ++};
52484 ++
52485 ++struct noise_symmetric_key {
52486 ++ u8 key[NOISE_SYMMETRIC_KEY_LEN];
52487 ++ union noise_counter counter;
52488 ++ u64 birthdate;
52489 ++ bool is_valid;
52490 ++};
52491 ++
52492 ++struct noise_keypair {
52493 ++ struct index_hashtable_entry entry;
52494 ++ struct noise_symmetric_key sending;
52495 ++ struct noise_symmetric_key receiving;
52496 ++ __le32 remote_index;
52497 ++ bool i_am_the_initiator;
52498 ++ struct kref refcount;
52499 ++ struct rcu_head rcu;
52500 ++ u64 internal_id;
52501 ++};
52502 ++
52503 ++struct noise_keypairs {
52504 ++ struct noise_keypair __rcu *current_keypair;
52505 ++ struct noise_keypair __rcu *previous_keypair;
52506 ++ struct noise_keypair __rcu *next_keypair;
52507 ++ spinlock_t keypair_update_lock;
52508 ++};
52509 ++
52510 ++struct noise_static_identity {
52511 ++ u8 static_public[NOISE_PUBLIC_KEY_LEN];
52512 ++ u8 static_private[NOISE_PUBLIC_KEY_LEN];
52513 ++ struct rw_semaphore lock;
52514 ++ bool has_identity;
52515 ++};
52516 ++
52517 ++enum noise_handshake_state {
52518 ++ HANDSHAKE_ZEROED,
52519 ++ HANDSHAKE_CREATED_INITIATION,
52520 ++ HANDSHAKE_CONSUMED_INITIATION,
52521 ++ HANDSHAKE_CREATED_RESPONSE,
52522 ++ HANDSHAKE_CONSUMED_RESPONSE
52523 ++};
52524 ++
52525 ++struct noise_handshake {
52526 ++ struct index_hashtable_entry entry;
52527 ++
52528 ++ enum noise_handshake_state state;
52529 ++ u64 last_initiation_consumption;
52530 ++
52531 ++ struct noise_static_identity *static_identity;
52532 ++
52533 ++ u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN];
52534 ++ u8 remote_static[NOISE_PUBLIC_KEY_LEN];
52535 ++ u8 remote_ephemeral[NOISE_PUBLIC_KEY_LEN];
52536 ++ u8 precomputed_static_static[NOISE_PUBLIC_KEY_LEN];
52537 ++
52538 ++ u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN];
52539 ++
52540 ++ u8 hash[NOISE_HASH_LEN];
52541 ++ u8 chaining_key[NOISE_HASH_LEN];
52542 ++
52543 ++ u8 latest_timestamp[NOISE_TIMESTAMP_LEN];
52544 ++ __le32 remote_index;
52545 ++
52546 ++ /* Protects all members except the immutable (after noise_handshake_
52547 ++ * init): remote_static, precomputed_static_static, static_identity.
52548 ++ */
52549 ++ struct rw_semaphore lock;
52550 ++};
52551 ++
52552 ++struct wg_device;
52553 ++
52554 ++void wg_noise_init(void);
52555 ++bool wg_noise_handshake_init(struct noise_handshake *handshake,
52556 ++ struct noise_static_identity *static_identity,
52557 ++ const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
52558 ++ const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
52559 ++ struct wg_peer *peer);
52560 ++void wg_noise_handshake_clear(struct noise_handshake *handshake);
52561 ++static inline void wg_noise_reset_last_sent_handshake(atomic64_t *handshake_ns)
52562 ++{
52563 ++ atomic64_set(handshake_ns, ktime_get_coarse_boottime_ns() -
52564 ++ (u64)(REKEY_TIMEOUT + 1) * NSEC_PER_SEC);
52565 ++}
52566 ++
52567 ++void wg_noise_keypair_put(struct noise_keypair *keypair, bool unreference_now);
52568 ++struct noise_keypair *wg_noise_keypair_get(struct noise_keypair *keypair);
52569 ++void wg_noise_keypairs_clear(struct noise_keypairs *keypairs);
52570 ++bool wg_noise_received_with_keypair(struct noise_keypairs *keypairs,
52571 ++ struct noise_keypair *received_keypair);
52572 ++void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer);
52573 ++
52574 ++void wg_noise_set_static_identity_private_key(
52575 ++ struct noise_static_identity *static_identity,
52576 ++ const u8 private_key[NOISE_PUBLIC_KEY_LEN]);
52577 ++bool wg_noise_precompute_static_static(struct wg_peer *peer);
52578 ++
52579 ++bool
52580 ++wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst,
52581 ++ struct noise_handshake *handshake);
52582 ++struct wg_peer *
52583 ++wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src,
52584 ++ struct wg_device *wg);
52585 ++
52586 ++bool wg_noise_handshake_create_response(struct message_handshake_response *dst,
52587 ++ struct noise_handshake *handshake);
52588 ++struct wg_peer *
52589 ++wg_noise_handshake_consume_response(struct message_handshake_response *src,
52590 ++ struct wg_device *wg);
52591 ++
52592 ++bool wg_noise_handshake_begin_session(struct noise_handshake *handshake,
52593 ++ struct noise_keypairs *keypairs);
52594 ++
52595 ++#endif /* _WG_NOISE_H */
52596 +diff --git a/drivers/net/wireguard/peer.c b/drivers/net/wireguard/peer.c
52597 +new file mode 100644
52598 +index 000000000000..071eedf33f5a
52599 +--- /dev/null
52600 ++++ b/drivers/net/wireguard/peer.c
52601 +@@ -0,0 +1,240 @@
52602 ++// SPDX-License-Identifier: GPL-2.0
52603 ++/*
52604 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
52605 ++ */
52606 ++
52607 ++#include "peer.h"
52608 ++#include "device.h"
52609 ++#include "queueing.h"
52610 ++#include "timers.h"
52611 ++#include "peerlookup.h"
52612 ++#include "noise.h"
52613 ++
52614 ++#include <linux/kref.h>
52615 ++#include <linux/lockdep.h>
52616 ++#include <linux/rcupdate.h>
52617 ++#include <linux/list.h>
52618 ++
52619 ++static atomic64_t peer_counter = ATOMIC64_INIT(0);
52620 ++
52621 ++struct wg_peer *wg_peer_create(struct wg_device *wg,
52622 ++ const u8 public_key[NOISE_PUBLIC_KEY_LEN],
52623 ++ const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN])
52624 ++{
52625 ++ struct wg_peer *peer;
52626 ++ int ret = -ENOMEM;
52627 ++
52628 ++ lockdep_assert_held(&wg->device_update_lock);
52629 ++
52630 ++ if (wg->num_peers >= MAX_PEERS_PER_DEVICE)
52631 ++ return ERR_PTR(ret);
52632 ++
52633 ++ peer = kzalloc(sizeof(*peer), GFP_KERNEL);
52634 ++ if (unlikely(!peer))
52635 ++ return ERR_PTR(ret);
52636 ++ peer->device = wg;
52637 ++
52638 ++ if (!wg_noise_handshake_init(&peer->handshake, &wg->static_identity,
52639 ++ public_key, preshared_key, peer)) {
52640 ++ ret = -EKEYREJECTED;
52641 ++ goto err_1;
52642 ++ }
52643 ++ if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL))
52644 ++ goto err_1;
52645 ++ if (wg_packet_queue_init(&peer->tx_queue, wg_packet_tx_worker, false,
52646 ++ MAX_QUEUED_PACKETS))
52647 ++ goto err_2;
52648 ++ if (wg_packet_queue_init(&peer->rx_queue, NULL, false,
52649 ++ MAX_QUEUED_PACKETS))
52650 ++ goto err_3;
52651 ++
52652 ++ peer->internal_id = atomic64_inc_return(&peer_counter);
52653 ++ peer->serial_work_cpu = nr_cpumask_bits;
52654 ++ wg_cookie_init(&peer->latest_cookie);
52655 ++ wg_timers_init(peer);
52656 ++ wg_cookie_checker_precompute_peer_keys(peer);
52657 ++ spin_lock_init(&peer->keypairs.keypair_update_lock);
52658 ++ INIT_WORK(&peer->transmit_handshake_work,
52659 ++ wg_packet_handshake_send_worker);
52660 ++ rwlock_init(&peer->endpoint_lock);
52661 ++ kref_init(&peer->refcount);
52662 ++ skb_queue_head_init(&peer->staged_packet_queue);
52663 ++ wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake);
52664 ++ set_bit(NAPI_STATE_NO_BUSY_POLL, &peer->napi.state);
52665 ++ netif_napi_add(wg->dev, &peer->napi, wg_packet_rx_poll,
52666 ++ NAPI_POLL_WEIGHT);
52667 ++ napi_enable(&peer->napi);
52668 ++ list_add_tail(&peer->peer_list, &wg->peer_list);
52669 ++ INIT_LIST_HEAD(&peer->allowedips_list);
52670 ++ wg_pubkey_hashtable_add(wg->peer_hashtable, peer);
52671 ++ ++wg->num_peers;
52672 ++ pr_debug("%s: Peer %llu created\n", wg->dev->name, peer->internal_id);
52673 ++ return peer;
52674 ++
52675 ++err_3:
52676 ++ wg_packet_queue_free(&peer->tx_queue, false);
52677 ++err_2:
52678 ++ dst_cache_destroy(&peer->endpoint_cache);
52679 ++err_1:
52680 ++ kfree(peer);
52681 ++ return ERR_PTR(ret);
52682 ++}
52683 ++
52684 ++struct wg_peer *wg_peer_get_maybe_zero(struct wg_peer *peer)
52685 ++{
52686 ++ RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(),
52687 ++ "Taking peer reference without holding the RCU read lock");
52688 ++ if (unlikely(!peer || !kref_get_unless_zero(&peer->refcount)))
52689 ++ return NULL;
52690 ++ return peer;
52691 ++}
52692 ++
52693 ++static void peer_make_dead(struct wg_peer *peer)
52694 ++{
52695 ++ /* Remove from configuration-time lookup structures. */
52696 ++ list_del_init(&peer->peer_list);
52697 ++ wg_allowedips_remove_by_peer(&peer->device->peer_allowedips, peer,
52698 ++ &peer->device->device_update_lock);
52699 ++ wg_pubkey_hashtable_remove(peer->device->peer_hashtable, peer);
52700 ++
52701 ++ /* Mark as dead, so that we don't allow jumping contexts after. */
52702 ++ WRITE_ONCE(peer->is_dead, true);
52703 ++
52704 ++ /* The caller must now synchronize_rcu() for this to take effect. */
52705 ++}
52706 ++
52707 ++static void peer_remove_after_dead(struct wg_peer *peer)
52708 ++{
52709 ++ WARN_ON(!peer->is_dead);
52710 ++
52711 ++ /* No more keypairs can be created for this peer, since is_dead protects
52712 ++ * add_new_keypair, so we can now destroy existing ones.
52713 ++ */
52714 ++ wg_noise_keypairs_clear(&peer->keypairs);
52715 ++
52716 ++ /* Destroy all ongoing timers that were in-flight at the beginning of
52717 ++ * this function.
52718 ++ */
52719 ++ wg_timers_stop(peer);
52720 ++
52721 ++ /* The transition between packet encryption/decryption queues isn't
52722 ++ * guarded by is_dead, but each reference's life is strictly bounded by
52723 ++ * two generations: once for parallel crypto and once for serial
52724 ++ * ingestion, so we can simply flush twice, and be sure that we no
52725 ++ * longer have references inside these queues.
52726 ++ */
52727 ++
52728 ++ /* a) For encrypt/decrypt. */
52729 ++ flush_workqueue(peer->device->packet_crypt_wq);
52730 ++ /* b.1) For send (but not receive, since that's napi). */
52731 ++ flush_workqueue(peer->device->packet_crypt_wq);
52732 ++ /* b.2.1) For receive (but not send, since that's wq). */
52733 ++ napi_disable(&peer->napi);
52734 ++ /* b.2.1) It's now safe to remove the napi struct, which must be done
52735 ++ * here from process context.
52736 ++ */
52737 ++ netif_napi_del(&peer->napi);
52738 ++
52739 ++ /* Ensure any workstructs we own (like transmit_handshake_work or
52740 ++ * clear_peer_work) no longer are in use.
52741 ++ */
52742 ++ flush_workqueue(peer->device->handshake_send_wq);
52743 ++
52744 ++ /* After the above flushes, a peer might still be active in a few
52745 ++ * different contexts: 1) from xmit(), before hitting is_dead and
52746 ++ * returning, 2) from wg_packet_consume_data(), before hitting is_dead
52747 ++ * and returning, 3) from wg_receive_handshake_packet() after a point
52748 ++ * where it has processed an incoming handshake packet, but where
52749 ++ * all calls to pass it off to timers fails because of is_dead. We won't
52750 ++ * have new references in (1) eventually, because we're removed from
52751 ++ * allowedips; we won't have new references in (2) eventually, because
52752 ++ * wg_index_hashtable_lookup will always return NULL, since we removed
52753 ++ * all existing keypairs and no more can be created; we won't have new
52754 ++ * references in (3) eventually, because we're removed from the pubkey
52755 ++ * hash table, which allows for a maximum of one handshake response,
52756 ++ * via the still-uncleared index hashtable entry, but not more than one,
52757 ++ * and in wg_cookie_message_consume, the lookup eventually gets a peer
52758 ++ * with a refcount of zero, so no new reference is taken.
52759 ++ */
52760 ++
52761 ++ --peer->device->num_peers;
52762 ++ wg_peer_put(peer);
52763 ++}
52764 ++
52765 ++/* We have a separate "remove" function make sure that all active places where
52766 ++ * a peer is currently operating will eventually come to an end and not pass
52767 ++ * their reference onto another context.
52768 ++ */
52769 ++void wg_peer_remove(struct wg_peer *peer)
52770 ++{
52771 ++ if (unlikely(!peer))
52772 ++ return;
52773 ++ lockdep_assert_held(&peer->device->device_update_lock);
52774 ++
52775 ++ peer_make_dead(peer);
52776 ++ synchronize_rcu();
52777 ++ peer_remove_after_dead(peer);
52778 ++}
52779 ++
52780 ++void wg_peer_remove_all(struct wg_device *wg)
52781 ++{
52782 ++ struct wg_peer *peer, *temp;
52783 ++ LIST_HEAD(dead_peers);
52784 ++
52785 ++ lockdep_assert_held(&wg->device_update_lock);
52786 ++
52787 ++ /* Avoid having to traverse individually for each one. */
52788 ++ wg_allowedips_free(&wg->peer_allowedips, &wg->device_update_lock);
52789 ++
52790 ++ list_for_each_entry_safe(peer, temp, &wg->peer_list, peer_list) {
52791 ++ peer_make_dead(peer);
52792 ++ list_add_tail(&peer->peer_list, &dead_peers);
52793 ++ }
52794 ++ synchronize_rcu();
52795 ++ list_for_each_entry_safe(peer, temp, &dead_peers, peer_list)
52796 ++ peer_remove_after_dead(peer);
52797 ++}
52798 ++
52799 ++static void rcu_release(struct rcu_head *rcu)
52800 ++{
52801 ++ struct wg_peer *peer = container_of(rcu, struct wg_peer, rcu);
52802 ++
52803 ++ dst_cache_destroy(&peer->endpoint_cache);
52804 ++ wg_packet_queue_free(&peer->rx_queue, false);
52805 ++ wg_packet_queue_free(&peer->tx_queue, false);
52806 ++
52807 ++ /* The final zeroing takes care of clearing any remaining handshake key
52808 ++ * material and other potentially sensitive information.
52809 ++ */
52810 ++ kzfree(peer);
52811 ++}
52812 ++
52813 ++static void kref_release(struct kref *refcount)
52814 ++{
52815 ++ struct wg_peer *peer = container_of(refcount, struct wg_peer, refcount);
52816 ++
52817 ++ pr_debug("%s: Peer %llu (%pISpfsc) destroyed\n",
52818 ++ peer->device->dev->name, peer->internal_id,
52819 ++ &peer->endpoint.addr);
52820 ++
52821 ++ /* Remove ourself from dynamic runtime lookup structures, now that the
52822 ++ * last reference is gone.
52823 ++ */
52824 ++ wg_index_hashtable_remove(peer->device->index_hashtable,
52825 ++ &peer->handshake.entry);
52826 ++
52827 ++ /* Remove any lingering packets that didn't have a chance to be
52828 ++ * transmitted.
52829 ++ */
52830 ++ wg_packet_purge_staged_packets(peer);
52831 ++
52832 ++ /* Free the memory used. */
52833 ++ call_rcu(&peer->rcu, rcu_release);
52834 ++}
52835 ++
52836 ++void wg_peer_put(struct wg_peer *peer)
52837 ++{
52838 ++ if (unlikely(!peer))
52839 ++ return;
52840 ++ kref_put(&peer->refcount, kref_release);
52841 ++}
52842 +diff --git a/drivers/net/wireguard/peer.h b/drivers/net/wireguard/peer.h
52843 +new file mode 100644
52844 +index 000000000000..23af40922997
52845 +--- /dev/null
52846 ++++ b/drivers/net/wireguard/peer.h
52847 +@@ -0,0 +1,83 @@
52848 ++/* SPDX-License-Identifier: GPL-2.0 */
52849 ++/*
52850 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
52851 ++ */
52852 ++
52853 ++#ifndef _WG_PEER_H
52854 ++#define _WG_PEER_H
52855 ++
52856 ++#include "device.h"
52857 ++#include "noise.h"
52858 ++#include "cookie.h"
52859 ++
52860 ++#include <linux/types.h>
52861 ++#include <linux/netfilter.h>
52862 ++#include <linux/spinlock.h>
52863 ++#include <linux/kref.h>
52864 ++#include <net/dst_cache.h>
52865 ++
52866 ++struct wg_device;
52867 ++
52868 ++struct endpoint {
52869 ++ union {
52870 ++ struct sockaddr addr;
52871 ++ struct sockaddr_in addr4;
52872 ++ struct sockaddr_in6 addr6;
52873 ++ };
52874 ++ union {
52875 ++ struct {
52876 ++ struct in_addr src4;
52877 ++ /* Essentially the same as addr6->scope_id */
52878 ++ int src_if4;
52879 ++ };
52880 ++ struct in6_addr src6;
52881 ++ };
52882 ++};
52883 ++
52884 ++struct wg_peer {
52885 ++ struct wg_device *device;
52886 ++ struct crypt_queue tx_queue, rx_queue;
52887 ++ struct sk_buff_head staged_packet_queue;
52888 ++ int serial_work_cpu;
52889 ++ struct noise_keypairs keypairs;
52890 ++ struct endpoint endpoint;
52891 ++ struct dst_cache endpoint_cache;
52892 ++ rwlock_t endpoint_lock;
52893 ++ struct noise_handshake handshake;
52894 ++ atomic64_t last_sent_handshake;
52895 ++ struct work_struct transmit_handshake_work, clear_peer_work;
52896 ++ struct cookie latest_cookie;
52897 ++ struct hlist_node pubkey_hash;
52898 ++ u64 rx_bytes, tx_bytes;
52899 ++ struct timer_list timer_retransmit_handshake, timer_send_keepalive;
52900 ++ struct timer_list timer_new_handshake, timer_zero_key_material;
52901 ++ struct timer_list timer_persistent_keepalive;
52902 ++ unsigned int timer_handshake_attempts;
52903 ++ u16 persistent_keepalive_interval;
52904 ++ bool timer_need_another_keepalive;
52905 ++ bool sent_lastminute_handshake;
52906 ++ struct timespec64 walltime_last_handshake;
52907 ++ struct kref refcount;
52908 ++ struct rcu_head rcu;
52909 ++ struct list_head peer_list;
52910 ++ struct list_head allowedips_list;
52911 ++ u64 internal_id;
52912 ++ struct napi_struct napi;
52913 ++ bool is_dead;
52914 ++};
52915 ++
52916 ++struct wg_peer *wg_peer_create(struct wg_device *wg,
52917 ++ const u8 public_key[NOISE_PUBLIC_KEY_LEN],
52918 ++ const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]);
52919 ++
52920 ++struct wg_peer *__must_check wg_peer_get_maybe_zero(struct wg_peer *peer);
52921 ++static inline struct wg_peer *wg_peer_get(struct wg_peer *peer)
52922 ++{
52923 ++ kref_get(&peer->refcount);
52924 ++ return peer;
52925 ++}
52926 ++void wg_peer_put(struct wg_peer *peer);
52927 ++void wg_peer_remove(struct wg_peer *peer);
52928 ++void wg_peer_remove_all(struct wg_device *wg);
52929 ++
52930 ++#endif /* _WG_PEER_H */
52931 +diff --git a/drivers/net/wireguard/peerlookup.c b/drivers/net/wireguard/peerlookup.c
52932 +new file mode 100644
52933 +index 000000000000..e4deb331476b
52934 +--- /dev/null
52935 ++++ b/drivers/net/wireguard/peerlookup.c
52936 +@@ -0,0 +1,221 @@
52937 ++// SPDX-License-Identifier: GPL-2.0
52938 ++/*
52939 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
52940 ++ */
52941 ++
52942 ++#include "peerlookup.h"
52943 ++#include "peer.h"
52944 ++#include "noise.h"
52945 ++
52946 ++static struct hlist_head *pubkey_bucket(struct pubkey_hashtable *table,
52947 ++ const u8 pubkey[NOISE_PUBLIC_KEY_LEN])
52948 ++{
52949 ++ /* siphash gives us a secure 64bit number based on a random key. Since
52950 ++ * the bits are uniformly distributed, we can then mask off to get the
52951 ++ * bits we need.
52952 ++ */
52953 ++ const u64 hash = siphash(pubkey, NOISE_PUBLIC_KEY_LEN, &table->key);
52954 ++
52955 ++ return &table->hashtable[hash & (HASH_SIZE(table->hashtable) - 1)];
52956 ++}
52957 ++
52958 ++struct pubkey_hashtable *wg_pubkey_hashtable_alloc(void)
52959 ++{
52960 ++ struct pubkey_hashtable *table = kvmalloc(sizeof(*table), GFP_KERNEL);
52961 ++
52962 ++ if (!table)
52963 ++ return NULL;
52964 ++
52965 ++ get_random_bytes(&table->key, sizeof(table->key));
52966 ++ hash_init(table->hashtable);
52967 ++ mutex_init(&table->lock);
52968 ++ return table;
52969 ++}
52970 ++
52971 ++void wg_pubkey_hashtable_add(struct pubkey_hashtable *table,
52972 ++ struct wg_peer *peer)
52973 ++{
52974 ++ mutex_lock(&table->lock);
52975 ++ hlist_add_head_rcu(&peer->pubkey_hash,
52976 ++ pubkey_bucket(table, peer->handshake.remote_static));
52977 ++ mutex_unlock(&table->lock);
52978 ++}
52979 ++
52980 ++void wg_pubkey_hashtable_remove(struct pubkey_hashtable *table,
52981 ++ struct wg_peer *peer)
52982 ++{
52983 ++ mutex_lock(&table->lock);
52984 ++ hlist_del_init_rcu(&peer->pubkey_hash);
52985 ++ mutex_unlock(&table->lock);
52986 ++}
52987 ++
52988 ++/* Returns a strong reference to a peer */
52989 ++struct wg_peer *
52990 ++wg_pubkey_hashtable_lookup(struct pubkey_hashtable *table,
52991 ++ const u8 pubkey[NOISE_PUBLIC_KEY_LEN])
52992 ++{
52993 ++ struct wg_peer *iter_peer, *peer = NULL;
52994 ++
52995 ++ rcu_read_lock_bh();
52996 ++ hlist_for_each_entry_rcu_bh(iter_peer, pubkey_bucket(table, pubkey),
52997 ++ pubkey_hash) {
52998 ++ if (!memcmp(pubkey, iter_peer->handshake.remote_static,
52999 ++ NOISE_PUBLIC_KEY_LEN)) {
53000 ++ peer = iter_peer;
53001 ++ break;
53002 ++ }
53003 ++ }
53004 ++ peer = wg_peer_get_maybe_zero(peer);
53005 ++ rcu_read_unlock_bh();
53006 ++ return peer;
53007 ++}
53008 ++
53009 ++static struct hlist_head *index_bucket(struct index_hashtable *table,
53010 ++ const __le32 index)
53011 ++{
53012 ++ /* Since the indices are random and thus all bits are uniformly
53013 ++ * distributed, we can find its bucket simply by masking.
53014 ++ */
53015 ++ return &table->hashtable[(__force u32)index &
53016 ++ (HASH_SIZE(table->hashtable) - 1)];
53017 ++}
53018 ++
53019 ++struct index_hashtable *wg_index_hashtable_alloc(void)
53020 ++{
53021 ++ struct index_hashtable *table = kvmalloc(sizeof(*table), GFP_KERNEL);
53022 ++
53023 ++ if (!table)
53024 ++ return NULL;
53025 ++
53026 ++ hash_init(table->hashtable);
53027 ++ spin_lock_init(&table->lock);
53028 ++ return table;
53029 ++}
53030 ++
53031 ++/* At the moment, we limit ourselves to 2^20 total peers, which generally might
53032 ++ * amount to 2^20*3 items in this hashtable. The algorithm below works by
53033 ++ * picking a random number and testing it. We can see that these limits mean we
53034 ++ * usually succeed pretty quickly:
53035 ++ *
53036 ++ * >>> def calculation(tries, size):
53037 ++ * ... return (size / 2**32)**(tries - 1) * (1 - (size / 2**32))
53038 ++ * ...
53039 ++ * >>> calculation(1, 2**20 * 3)
53040 ++ * 0.999267578125
53041 ++ * >>> calculation(2, 2**20 * 3)
53042 ++ * 0.0007318854331970215
53043 ++ * >>> calculation(3, 2**20 * 3)
53044 ++ * 5.360489012673497e-07
53045 ++ * >>> calculation(4, 2**20 * 3)
53046 ++ * 3.9261394135792216e-10
53047 ++ *
53048 ++ * At the moment, we don't do any masking, so this algorithm isn't exactly
53049 ++ * constant time in either the random guessing or in the hash list lookup. We
53050 ++ * could require a minimum of 3 tries, which would successfully mask the
53051 ++ * guessing. this would not, however, help with the growing hash lengths, which
53052 ++ * is another thing to consider moving forward.
53053 ++ */
53054 ++
53055 ++__le32 wg_index_hashtable_insert(struct index_hashtable *table,
53056 ++ struct index_hashtable_entry *entry)
53057 ++{
53058 ++ struct index_hashtable_entry *existing_entry;
53059 ++
53060 ++ spin_lock_bh(&table->lock);
53061 ++ hlist_del_init_rcu(&entry->index_hash);
53062 ++ spin_unlock_bh(&table->lock);
53063 ++
53064 ++ rcu_read_lock_bh();
53065 ++
53066 ++search_unused_slot:
53067 ++ /* First we try to find an unused slot, randomly, while unlocked. */
53068 ++ entry->index = (__force __le32)get_random_u32();
53069 ++ hlist_for_each_entry_rcu_bh(existing_entry,
53070 ++ index_bucket(table, entry->index),
53071 ++ index_hash) {
53072 ++ if (existing_entry->index == entry->index)
53073 ++ /* If it's already in use, we continue searching. */
53074 ++ goto search_unused_slot;
53075 ++ }
53076 ++
53077 ++ /* Once we've found an unused slot, we lock it, and then double-check
53078 ++ * that nobody else stole it from us.
53079 ++ */
53080 ++ spin_lock_bh(&table->lock);
53081 ++ hlist_for_each_entry_rcu_bh(existing_entry,
53082 ++ index_bucket(table, entry->index),
53083 ++ index_hash) {
53084 ++ if (existing_entry->index == entry->index) {
53085 ++ spin_unlock_bh(&table->lock);
53086 ++ /* If it was stolen, we start over. */
53087 ++ goto search_unused_slot;
53088 ++ }
53089 ++ }
53090 ++ /* Otherwise, we know we have it exclusively (since we're locked),
53091 ++ * so we insert.
53092 ++ */
53093 ++ hlist_add_head_rcu(&entry->index_hash,
53094 ++ index_bucket(table, entry->index));
53095 ++ spin_unlock_bh(&table->lock);
53096 ++
53097 ++ rcu_read_unlock_bh();
53098 ++
53099 ++ return entry->index;
53100 ++}
53101 ++
53102 ++bool wg_index_hashtable_replace(struct index_hashtable *table,
53103 ++ struct index_hashtable_entry *old,
53104 ++ struct index_hashtable_entry *new)
53105 ++{
53106 ++ if (unlikely(hlist_unhashed(&old->index_hash)))
53107 ++ return false;
53108 ++ spin_lock_bh(&table->lock);
53109 ++ new->index = old->index;
53110 ++ hlist_replace_rcu(&old->index_hash, &new->index_hash);
53111 ++
53112 ++ /* Calling init here NULLs out index_hash, and in fact after this
53113 ++ * function returns, it's theoretically possible for this to get
53114 ++ * reinserted elsewhere. That means the RCU lookup below might either
53115 ++ * terminate early or jump between buckets, in which case the packet
53116 ++ * simply gets dropped, which isn't terrible.
53117 ++ */
53118 ++ INIT_HLIST_NODE(&old->index_hash);
53119 ++ spin_unlock_bh(&table->lock);
53120 ++ return true;
53121 ++}
53122 ++
53123 ++void wg_index_hashtable_remove(struct index_hashtable *table,
53124 ++ struct index_hashtable_entry *entry)
53125 ++{
53126 ++ spin_lock_bh(&table->lock);
53127 ++ hlist_del_init_rcu(&entry->index_hash);
53128 ++ spin_unlock_bh(&table->lock);
53129 ++}
53130 ++
53131 ++/* Returns a strong reference to a entry->peer */
53132 ++struct index_hashtable_entry *
53133 ++wg_index_hashtable_lookup(struct index_hashtable *table,
53134 ++ const enum index_hashtable_type type_mask,
53135 ++ const __le32 index, struct wg_peer **peer)
53136 ++{
53137 ++ struct index_hashtable_entry *iter_entry, *entry = NULL;
53138 ++
53139 ++ rcu_read_lock_bh();
53140 ++ hlist_for_each_entry_rcu_bh(iter_entry, index_bucket(table, index),
53141 ++ index_hash) {
53142 ++ if (iter_entry->index == index) {
53143 ++ if (likely(iter_entry->type & type_mask))
53144 ++ entry = iter_entry;
53145 ++ break;
53146 ++ }
53147 ++ }
53148 ++ if (likely(entry)) {
53149 ++ entry->peer = wg_peer_get_maybe_zero(entry->peer);
53150 ++ if (likely(entry->peer))
53151 ++ *peer = entry->peer;
53152 ++ else
53153 ++ entry = NULL;
53154 ++ }
53155 ++ rcu_read_unlock_bh();
53156 ++ return entry;
53157 ++}
53158 +diff --git a/drivers/net/wireguard/peerlookup.h b/drivers/net/wireguard/peerlookup.h
53159 +new file mode 100644
53160 +index 000000000000..ced811797680
53161 +--- /dev/null
53162 ++++ b/drivers/net/wireguard/peerlookup.h
53163 +@@ -0,0 +1,64 @@
53164 ++/* SPDX-License-Identifier: GPL-2.0 */
53165 ++/*
53166 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
53167 ++ */
53168 ++
53169 ++#ifndef _WG_PEERLOOKUP_H
53170 ++#define _WG_PEERLOOKUP_H
53171 ++
53172 ++#include "messages.h"
53173 ++
53174 ++#include <linux/hashtable.h>
53175 ++#include <linux/mutex.h>
53176 ++#include <linux/siphash.h>
53177 ++
53178 ++struct wg_peer;
53179 ++
53180 ++struct pubkey_hashtable {
53181 ++ /* TODO: move to rhashtable */
53182 ++ DECLARE_HASHTABLE(hashtable, 11);
53183 ++ siphash_key_t key;
53184 ++ struct mutex lock;
53185 ++};
53186 ++
53187 ++struct pubkey_hashtable *wg_pubkey_hashtable_alloc(void);
53188 ++void wg_pubkey_hashtable_add(struct pubkey_hashtable *table,
53189 ++ struct wg_peer *peer);
53190 ++void wg_pubkey_hashtable_remove(struct pubkey_hashtable *table,
53191 ++ struct wg_peer *peer);
53192 ++struct wg_peer *
53193 ++wg_pubkey_hashtable_lookup(struct pubkey_hashtable *table,
53194 ++ const u8 pubkey[NOISE_PUBLIC_KEY_LEN]);
53195 ++
53196 ++struct index_hashtable {
53197 ++ /* TODO: move to rhashtable */
53198 ++ DECLARE_HASHTABLE(hashtable, 13);
53199 ++ spinlock_t lock;
53200 ++};
53201 ++
53202 ++enum index_hashtable_type {
53203 ++ INDEX_HASHTABLE_HANDSHAKE = 1U << 0,
53204 ++ INDEX_HASHTABLE_KEYPAIR = 1U << 1
53205 ++};
53206 ++
53207 ++struct index_hashtable_entry {
53208 ++ struct wg_peer *peer;
53209 ++ struct hlist_node index_hash;
53210 ++ enum index_hashtable_type type;
53211 ++ __le32 index;
53212 ++};
53213 ++
53214 ++struct index_hashtable *wg_index_hashtable_alloc(void);
53215 ++__le32 wg_index_hashtable_insert(struct index_hashtable *table,
53216 ++ struct index_hashtable_entry *entry);
53217 ++bool wg_index_hashtable_replace(struct index_hashtable *table,
53218 ++ struct index_hashtable_entry *old,
53219 ++ struct index_hashtable_entry *new);
53220 ++void wg_index_hashtable_remove(struct index_hashtable *table,
53221 ++ struct index_hashtable_entry *entry);
53222 ++struct index_hashtable_entry *
53223 ++wg_index_hashtable_lookup(struct index_hashtable *table,
53224 ++ const enum index_hashtable_type type_mask,
53225 ++ const __le32 index, struct wg_peer **peer);
53226 ++
53227 ++#endif /* _WG_PEERLOOKUP_H */
53228 +diff --git a/drivers/net/wireguard/queueing.c b/drivers/net/wireguard/queueing.c
53229 +new file mode 100644
53230 +index 000000000000..5c964fcb994e
53231 +--- /dev/null
53232 ++++ b/drivers/net/wireguard/queueing.c
53233 +@@ -0,0 +1,53 @@
53234 ++// SPDX-License-Identifier: GPL-2.0
53235 ++/*
53236 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
53237 ++ */
53238 ++
53239 ++#include "queueing.h"
53240 ++
53241 ++struct multicore_worker __percpu *
53242 ++wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr)
53243 ++{
53244 ++ int cpu;
53245 ++ struct multicore_worker __percpu *worker =
53246 ++ alloc_percpu(struct multicore_worker);
53247 ++
53248 ++ if (!worker)
53249 ++ return NULL;
53250 ++
53251 ++ for_each_possible_cpu(cpu) {
53252 ++ per_cpu_ptr(worker, cpu)->ptr = ptr;
53253 ++ INIT_WORK(&per_cpu_ptr(worker, cpu)->work, function);
53254 ++ }
53255 ++ return worker;
53256 ++}
53257 ++
53258 ++int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
53259 ++ bool multicore, unsigned int len)
53260 ++{
53261 ++ int ret;
53262 ++
53263 ++ memset(queue, 0, sizeof(*queue));
53264 ++ ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL);
53265 ++ if (ret)
53266 ++ return ret;
53267 ++ if (function) {
53268 ++ if (multicore) {
53269 ++ queue->worker = wg_packet_percpu_multicore_worker_alloc(
53270 ++ function, queue);
53271 ++ if (!queue->worker)
53272 ++ return -ENOMEM;
53273 ++ } else {
53274 ++ INIT_WORK(&queue->work, function);
53275 ++ }
53276 ++ }
53277 ++ return 0;
53278 ++}
53279 ++
53280 ++void wg_packet_queue_free(struct crypt_queue *queue, bool multicore)
53281 ++{
53282 ++ if (multicore)
53283 ++ free_percpu(queue->worker);
53284 ++ WARN_ON(!__ptr_ring_empty(&queue->ring));
53285 ++ ptr_ring_cleanup(&queue->ring, NULL);
53286 ++}
53287 +diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h
53288 +new file mode 100644
53289 +index 000000000000..58fdd630b246
53290 +--- /dev/null
53291 ++++ b/drivers/net/wireguard/queueing.h
53292 +@@ -0,0 +1,197 @@
53293 ++/* SPDX-License-Identifier: GPL-2.0 */
53294 ++/*
53295 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
53296 ++ */
53297 ++
53298 ++#ifndef _WG_QUEUEING_H
53299 ++#define _WG_QUEUEING_H
53300 ++
53301 ++#include "peer.h"
53302 ++#include <linux/types.h>
53303 ++#include <linux/skbuff.h>
53304 ++#include <linux/ip.h>
53305 ++#include <linux/ipv6.h>
53306 ++
53307 ++struct wg_device;
53308 ++struct wg_peer;
53309 ++struct multicore_worker;
53310 ++struct crypt_queue;
53311 ++struct sk_buff;
53312 ++
53313 ++/* queueing.c APIs: */
53314 ++int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
53315 ++ bool multicore, unsigned int len);
53316 ++void wg_packet_queue_free(struct crypt_queue *queue, bool multicore);
53317 ++struct multicore_worker __percpu *
53318 ++wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr);
53319 ++
53320 ++/* receive.c APIs: */
53321 ++void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb);
53322 ++void wg_packet_handshake_receive_worker(struct work_struct *work);
53323 ++/* NAPI poll function: */
53324 ++int wg_packet_rx_poll(struct napi_struct *napi, int budget);
53325 ++/* Workqueue worker: */
53326 ++void wg_packet_decrypt_worker(struct work_struct *work);
53327 ++
53328 ++/* send.c APIs: */
53329 ++void wg_packet_send_queued_handshake_initiation(struct wg_peer *peer,
53330 ++ bool is_retry);
53331 ++void wg_packet_send_handshake_response(struct wg_peer *peer);
53332 ++void wg_packet_send_handshake_cookie(struct wg_device *wg,
53333 ++ struct sk_buff *initiating_skb,
53334 ++ __le32 sender_index);
53335 ++void wg_packet_send_keepalive(struct wg_peer *peer);
53336 ++void wg_packet_purge_staged_packets(struct wg_peer *peer);
53337 ++void wg_packet_send_staged_packets(struct wg_peer *peer);
53338 ++/* Workqueue workers: */
53339 ++void wg_packet_handshake_send_worker(struct work_struct *work);
53340 ++void wg_packet_tx_worker(struct work_struct *work);
53341 ++void wg_packet_encrypt_worker(struct work_struct *work);
53342 ++
53343 ++enum packet_state {
53344 ++ PACKET_STATE_UNCRYPTED,
53345 ++ PACKET_STATE_CRYPTED,
53346 ++ PACKET_STATE_DEAD
53347 ++};
53348 ++
53349 ++struct packet_cb {
53350 ++ u64 nonce;
53351 ++ struct noise_keypair *keypair;
53352 ++ atomic_t state;
53353 ++ u32 mtu;
53354 ++ u8 ds;
53355 ++};
53356 ++
53357 ++#define PACKET_CB(skb) ((struct packet_cb *)((skb)->cb))
53358 ++#define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer)
53359 ++
53360 ++/* Returns either the correct skb->protocol value, or 0 if invalid. */
53361 ++static inline __be16 wg_skb_examine_untrusted_ip_hdr(struct sk_buff *skb)
53362 ++{
53363 ++ if (skb_network_header(skb) >= skb->head &&
53364 ++ (skb_network_header(skb) + sizeof(struct iphdr)) <=
53365 ++ skb_tail_pointer(skb) &&
53366 ++ ip_hdr(skb)->version == 4)
53367 ++ return htons(ETH_P_IP);
53368 ++ if (skb_network_header(skb) >= skb->head &&
53369 ++ (skb_network_header(skb) + sizeof(struct ipv6hdr)) <=
53370 ++ skb_tail_pointer(skb) &&
53371 ++ ipv6_hdr(skb)->version == 6)
53372 ++ return htons(ETH_P_IPV6);
53373 ++ return 0;
53374 ++}
53375 ++
53376 ++static inline void wg_reset_packet(struct sk_buff *skb)
53377 ++{
53378 ++ const int pfmemalloc = skb->pfmemalloc;
53379 ++
53380 ++ skb_scrub_packet(skb, true);
53381 ++ memset(&skb->headers_start, 0,
53382 ++ offsetof(struct sk_buff, headers_end) -
53383 ++ offsetof(struct sk_buff, headers_start));
53384 ++ skb->pfmemalloc = pfmemalloc;
53385 ++ skb->queue_mapping = 0;
53386 ++ skb->nohdr = 0;
53387 ++ skb->peeked = 0;
53388 ++ skb->mac_len = 0;
53389 ++ skb->dev = NULL;
53390 ++#ifdef CONFIG_NET_SCHED
53391 ++ skb->tc_index = 0;
53392 ++#endif
53393 ++ skb_reset_redirect(skb);
53394 ++ skb->hdr_len = skb_headroom(skb);
53395 ++ skb_reset_mac_header(skb);
53396 ++ skb_reset_network_header(skb);
53397 ++ skb_reset_transport_header(skb);
53398 ++ skb_probe_transport_header(skb);
53399 ++ skb_reset_inner_headers(skb);
53400 ++}
53401 ++
53402 ++static inline int wg_cpumask_choose_online(int *stored_cpu, unsigned int id)
53403 ++{
53404 ++ unsigned int cpu = *stored_cpu, cpu_index, i;
53405 ++
53406 ++ if (unlikely(cpu == nr_cpumask_bits ||
53407 ++ !cpumask_test_cpu(cpu, cpu_online_mask))) {
53408 ++ cpu_index = id % cpumask_weight(cpu_online_mask);
53409 ++ cpu = cpumask_first(cpu_online_mask);
53410 ++ for (i = 0; i < cpu_index; ++i)
53411 ++ cpu = cpumask_next(cpu, cpu_online_mask);
53412 ++ *stored_cpu = cpu;
53413 ++ }
53414 ++ return cpu;
53415 ++}
53416 ++
53417 ++/* This function is racy, in the sense that next is unlocked, so it could return
53418 ++ * the same CPU twice. A race-free version of this would be to instead store an
53419 ++ * atomic sequence number, do an increment-and-return, and then iterate through
53420 ++ * every possible CPU until we get to that index -- choose_cpu. However that's
53421 ++ * a bit slower, and it doesn't seem like this potential race actually
53422 ++ * introduces any performance loss, so we live with it.
53423 ++ */
53424 ++static inline int wg_cpumask_next_online(int *next)
53425 ++{
53426 ++ int cpu = *next;
53427 ++
53428 ++ while (unlikely(!cpumask_test_cpu(cpu, cpu_online_mask)))
53429 ++ cpu = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits;
53430 ++ *next = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits;
53431 ++ return cpu;
53432 ++}
53433 ++
53434 ++static inline int wg_queue_enqueue_per_device_and_peer(
53435 ++ struct crypt_queue *device_queue, struct crypt_queue *peer_queue,
53436 ++ struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu)
53437 ++{
53438 ++ int cpu;
53439 ++
53440 ++ atomic_set_release(&PACKET_CB(skb)->state, PACKET_STATE_UNCRYPTED);
53441 ++ /* We first queue this up for the peer ingestion, but the consumer
53442 ++ * will wait for the state to change to CRYPTED or DEAD before.
53443 ++ */
53444 ++ if (unlikely(ptr_ring_produce_bh(&peer_queue->ring, skb)))
53445 ++ return -ENOSPC;
53446 ++ /* Then we queue it up in the device queue, which consumes the
53447 ++ * packet as soon as it can.
53448 ++ */
53449 ++ cpu = wg_cpumask_next_online(next_cpu);
53450 ++ if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb)))
53451 ++ return -EPIPE;
53452 ++ queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work);
53453 ++ return 0;
53454 ++}
53455 ++
53456 ++static inline void wg_queue_enqueue_per_peer(struct crypt_queue *queue,
53457 ++ struct sk_buff *skb,
53458 ++ enum packet_state state)
53459 ++{
53460 ++ /* We take a reference, because as soon as we call atomic_set, the
53461 ++ * peer can be freed from below us.
53462 ++ */
53463 ++ struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb));
53464 ++
53465 ++ atomic_set_release(&PACKET_CB(skb)->state, state);
53466 ++ queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu,
53467 ++ peer->internal_id),
53468 ++ peer->device->packet_crypt_wq, &queue->work);
53469 ++ wg_peer_put(peer);
53470 ++}
53471 ++
53472 ++static inline void wg_queue_enqueue_per_peer_napi(struct sk_buff *skb,
53473 ++ enum packet_state state)
53474 ++{
53475 ++ /* We take a reference, because as soon as we call atomic_set, the
53476 ++ * peer can be freed from below us.
53477 ++ */
53478 ++ struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb));
53479 ++
53480 ++ atomic_set_release(&PACKET_CB(skb)->state, state);
53481 ++ napi_schedule(&peer->napi);
53482 ++ wg_peer_put(peer);
53483 ++}
53484 ++
53485 ++#ifdef DEBUG
53486 ++bool wg_packet_counter_selftest(void);
53487 ++#endif
53488 ++
53489 ++#endif /* _WG_QUEUEING_H */
53490 +diff --git a/drivers/net/wireguard/ratelimiter.c b/drivers/net/wireguard/ratelimiter.c
53491 +new file mode 100644
53492 +index 000000000000..3fedd1d21f5e
53493 +--- /dev/null
53494 ++++ b/drivers/net/wireguard/ratelimiter.c
53495 +@@ -0,0 +1,223 @@
53496 ++// SPDX-License-Identifier: GPL-2.0
53497 ++/*
53498 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
53499 ++ */
53500 ++
53501 ++#include "ratelimiter.h"
53502 ++#include <linux/siphash.h>
53503 ++#include <linux/mm.h>
53504 ++#include <linux/slab.h>
53505 ++#include <net/ip.h>
53506 ++
53507 ++static struct kmem_cache *entry_cache;
53508 ++static hsiphash_key_t key;
53509 ++static spinlock_t table_lock = __SPIN_LOCK_UNLOCKED("ratelimiter_table_lock");
53510 ++static DEFINE_MUTEX(init_lock);
53511 ++static u64 init_refcnt; /* Protected by init_lock, hence not atomic. */
53512 ++static atomic_t total_entries = ATOMIC_INIT(0);
53513 ++static unsigned int max_entries, table_size;
53514 ++static void wg_ratelimiter_gc_entries(struct work_struct *);
53515 ++static DECLARE_DEFERRABLE_WORK(gc_work, wg_ratelimiter_gc_entries);
53516 ++static struct hlist_head *table_v4;
53517 ++#if IS_ENABLED(CONFIG_IPV6)
53518 ++static struct hlist_head *table_v6;
53519 ++#endif
53520 ++
53521 ++struct ratelimiter_entry {
53522 ++ u64 last_time_ns, tokens, ip;
53523 ++ void *net;
53524 ++ spinlock_t lock;
53525 ++ struct hlist_node hash;
53526 ++ struct rcu_head rcu;
53527 ++};
53528 ++
53529 ++enum {
53530 ++ PACKETS_PER_SECOND = 20,
53531 ++ PACKETS_BURSTABLE = 5,
53532 ++ PACKET_COST = NSEC_PER_SEC / PACKETS_PER_SECOND,
53533 ++ TOKEN_MAX = PACKET_COST * PACKETS_BURSTABLE
53534 ++};
53535 ++
53536 ++static void entry_free(struct rcu_head *rcu)
53537 ++{
53538 ++ kmem_cache_free(entry_cache,
53539 ++ container_of(rcu, struct ratelimiter_entry, rcu));
53540 ++ atomic_dec(&total_entries);
53541 ++}
53542 ++
53543 ++static void entry_uninit(struct ratelimiter_entry *entry)
53544 ++{
53545 ++ hlist_del_rcu(&entry->hash);
53546 ++ call_rcu(&entry->rcu, entry_free);
53547 ++}
53548 ++
53549 ++/* Calling this function with a NULL work uninits all entries. */
53550 ++static void wg_ratelimiter_gc_entries(struct work_struct *work)
53551 ++{
53552 ++ const u64 now = ktime_get_coarse_boottime_ns();
53553 ++ struct ratelimiter_entry *entry;
53554 ++ struct hlist_node *temp;
53555 ++ unsigned int i;
53556 ++
53557 ++ for (i = 0; i < table_size; ++i) {
53558 ++ spin_lock(&table_lock);
53559 ++ hlist_for_each_entry_safe(entry, temp, &table_v4[i], hash) {
53560 ++ if (unlikely(!work) ||
53561 ++ now - entry->last_time_ns > NSEC_PER_SEC)
53562 ++ entry_uninit(entry);
53563 ++ }
53564 ++#if IS_ENABLED(CONFIG_IPV6)
53565 ++ hlist_for_each_entry_safe(entry, temp, &table_v6[i], hash) {
53566 ++ if (unlikely(!work) ||
53567 ++ now - entry->last_time_ns > NSEC_PER_SEC)
53568 ++ entry_uninit(entry);
53569 ++ }
53570 ++#endif
53571 ++ spin_unlock(&table_lock);
53572 ++ if (likely(work))
53573 ++ cond_resched();
53574 ++ }
53575 ++ if (likely(work))
53576 ++ queue_delayed_work(system_power_efficient_wq, &gc_work, HZ);
53577 ++}
53578 ++
53579 ++bool wg_ratelimiter_allow(struct sk_buff *skb, struct net *net)
53580 ++{
53581 ++ /* We only take the bottom half of the net pointer, so that we can hash
53582 ++ * 3 words in the end. This way, siphash's len param fits into the final
53583 ++ * u32, and we don't incur an extra round.
53584 ++ */
53585 ++ const u32 net_word = (unsigned long)net;
53586 ++ struct ratelimiter_entry *entry;
53587 ++ struct hlist_head *bucket;
53588 ++ u64 ip;
53589 ++
53590 ++ if (skb->protocol == htons(ETH_P_IP)) {
53591 ++ ip = (u64 __force)ip_hdr(skb)->saddr;
53592 ++ bucket = &table_v4[hsiphash_2u32(net_word, ip, &key) &
53593 ++ (table_size - 1)];
53594 ++ }
53595 ++#if IS_ENABLED(CONFIG_IPV6)
53596 ++ else if (skb->protocol == htons(ETH_P_IPV6)) {
53597 ++ /* Only use 64 bits, so as to ratelimit the whole /64. */
53598 ++ memcpy(&ip, &ipv6_hdr(skb)->saddr, sizeof(ip));
53599 ++ bucket = &table_v6[hsiphash_3u32(net_word, ip >> 32, ip, &key) &
53600 ++ (table_size - 1)];
53601 ++ }
53602 ++#endif
53603 ++ else
53604 ++ return false;
53605 ++ rcu_read_lock();
53606 ++ hlist_for_each_entry_rcu(entry, bucket, hash) {
53607 ++ if (entry->net == net && entry->ip == ip) {
53608 ++ u64 now, tokens;
53609 ++ bool ret;
53610 ++ /* Quasi-inspired by nft_limit.c, but this is actually a
53611 ++ * slightly different algorithm. Namely, we incorporate
53612 ++ * the burst as part of the maximum tokens, rather than
53613 ++ * as part of the rate.
53614 ++ */
53615 ++ spin_lock(&entry->lock);
53616 ++ now = ktime_get_coarse_boottime_ns();
53617 ++ tokens = min_t(u64, TOKEN_MAX,
53618 ++ entry->tokens + now -
53619 ++ entry->last_time_ns);
53620 ++ entry->last_time_ns = now;
53621 ++ ret = tokens >= PACKET_COST;
53622 ++ entry->tokens = ret ? tokens - PACKET_COST : tokens;
53623 ++ spin_unlock(&entry->lock);
53624 ++ rcu_read_unlock();
53625 ++ return ret;
53626 ++ }
53627 ++ }
53628 ++ rcu_read_unlock();
53629 ++
53630 ++ if (atomic_inc_return(&total_entries) > max_entries)
53631 ++ goto err_oom;
53632 ++
53633 ++ entry = kmem_cache_alloc(entry_cache, GFP_KERNEL);
53634 ++ if (unlikely(!entry))
53635 ++ goto err_oom;
53636 ++
53637 ++ entry->net = net;
53638 ++ entry->ip = ip;
53639 ++ INIT_HLIST_NODE(&entry->hash);
53640 ++ spin_lock_init(&entry->lock);
53641 ++ entry->last_time_ns = ktime_get_coarse_boottime_ns();
53642 ++ entry->tokens = TOKEN_MAX - PACKET_COST;
53643 ++ spin_lock(&table_lock);
53644 ++ hlist_add_head_rcu(&entry->hash, bucket);
53645 ++ spin_unlock(&table_lock);
53646 ++ return true;
53647 ++
53648 ++err_oom:
53649 ++ atomic_dec(&total_entries);
53650 ++ return false;
53651 ++}
53652 ++
53653 ++int wg_ratelimiter_init(void)
53654 ++{
53655 ++ mutex_lock(&init_lock);
53656 ++ if (++init_refcnt != 1)
53657 ++ goto out;
53658 ++
53659 ++ entry_cache = KMEM_CACHE(ratelimiter_entry, 0);
53660 ++ if (!entry_cache)
53661 ++ goto err;
53662 ++
53663 ++ /* xt_hashlimit.c uses a slightly different algorithm for ratelimiting,
53664 ++ * but what it shares in common is that it uses a massive hashtable. So,
53665 ++ * we borrow their wisdom about good table sizes on different systems
53666 ++ * dependent on RAM. This calculation here comes from there.
53667 ++ */
53668 ++ table_size = (totalram_pages() > (1U << 30) / PAGE_SIZE) ? 8192 :
53669 ++ max_t(unsigned long, 16, roundup_pow_of_two(
53670 ++ (totalram_pages() << PAGE_SHIFT) /
53671 ++ (1U << 14) / sizeof(struct hlist_head)));
53672 ++ max_entries = table_size * 8;
53673 ++
53674 ++ table_v4 = kvzalloc(table_size * sizeof(*table_v4), GFP_KERNEL);
53675 ++ if (unlikely(!table_v4))
53676 ++ goto err_kmemcache;
53677 ++
53678 ++#if IS_ENABLED(CONFIG_IPV6)
53679 ++ table_v6 = kvzalloc(table_size * sizeof(*table_v6), GFP_KERNEL);
53680 ++ if (unlikely(!table_v6)) {
53681 ++ kvfree(table_v4);
53682 ++ goto err_kmemcache;
53683 ++ }
53684 ++#endif
53685 ++
53686 ++ queue_delayed_work(system_power_efficient_wq, &gc_work, HZ);
53687 ++ get_random_bytes(&key, sizeof(key));
53688 ++out:
53689 ++ mutex_unlock(&init_lock);
53690 ++ return 0;
53691 ++
53692 ++err_kmemcache:
53693 ++ kmem_cache_destroy(entry_cache);
53694 ++err:
53695 ++ --init_refcnt;
53696 ++ mutex_unlock(&init_lock);
53697 ++ return -ENOMEM;
53698 ++}
53699 ++
53700 ++void wg_ratelimiter_uninit(void)
53701 ++{
53702 ++ mutex_lock(&init_lock);
53703 ++ if (!init_refcnt || --init_refcnt)
53704 ++ goto out;
53705 ++
53706 ++ cancel_delayed_work_sync(&gc_work);
53707 ++ wg_ratelimiter_gc_entries(NULL);
53708 ++ rcu_barrier();
53709 ++ kvfree(table_v4);
53710 ++#if IS_ENABLED(CONFIG_IPV6)
53711 ++ kvfree(table_v6);
53712 ++#endif
53713 ++ kmem_cache_destroy(entry_cache);
53714 ++out:
53715 ++ mutex_unlock(&init_lock);
53716 ++}
53717 ++
53718 ++#include "selftest/ratelimiter.c"
53719 +diff --git a/drivers/net/wireguard/ratelimiter.h b/drivers/net/wireguard/ratelimiter.h
53720 +new file mode 100644
53721 +index 000000000000..83067f71ea99
53722 +--- /dev/null
53723 ++++ b/drivers/net/wireguard/ratelimiter.h
53724 +@@ -0,0 +1,19 @@
53725 ++/* SPDX-License-Identifier: GPL-2.0 */
53726 ++/*
53727 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
53728 ++ */
53729 ++
53730 ++#ifndef _WG_RATELIMITER_H
53731 ++#define _WG_RATELIMITER_H
53732 ++
53733 ++#include <linux/skbuff.h>
53734 ++
53735 ++int wg_ratelimiter_init(void);
53736 ++void wg_ratelimiter_uninit(void);
53737 ++bool wg_ratelimiter_allow(struct sk_buff *skb, struct net *net);
53738 ++
53739 ++#ifdef DEBUG
53740 ++bool wg_ratelimiter_selftest(void);
53741 ++#endif
53742 ++
53743 ++#endif /* _WG_RATELIMITER_H */
53744 +diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
53745 +new file mode 100644
53746 +index 000000000000..7e675f541491
53747 +--- /dev/null
53748 ++++ b/drivers/net/wireguard/receive.c
53749 +@@ -0,0 +1,595 @@
53750 ++// SPDX-License-Identifier: GPL-2.0
53751 ++/*
53752 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
53753 ++ */
53754 ++
53755 ++#include "queueing.h"
53756 ++#include "device.h"
53757 ++#include "peer.h"
53758 ++#include "timers.h"
53759 ++#include "messages.h"
53760 ++#include "cookie.h"
53761 ++#include "socket.h"
53762 ++
53763 ++#include <linux/ip.h>
53764 ++#include <linux/ipv6.h>
53765 ++#include <linux/udp.h>
53766 ++#include <net/ip_tunnels.h>
53767 ++
53768 ++/* Must be called with bh disabled. */
53769 ++static void update_rx_stats(struct wg_peer *peer, size_t len)
53770 ++{
53771 ++ struct pcpu_sw_netstats *tstats =
53772 ++ get_cpu_ptr(peer->device->dev->tstats);
53773 ++
53774 ++ u64_stats_update_begin(&tstats->syncp);
53775 ++ ++tstats->rx_packets;
53776 ++ tstats->rx_bytes += len;
53777 ++ peer->rx_bytes += len;
53778 ++ u64_stats_update_end(&tstats->syncp);
53779 ++ put_cpu_ptr(tstats);
53780 ++}
53781 ++
53782 ++#define SKB_TYPE_LE32(skb) (((struct message_header *)(skb)->data)->type)
53783 ++
53784 ++static size_t validate_header_len(struct sk_buff *skb)
53785 ++{
53786 ++ if (unlikely(skb->len < sizeof(struct message_header)))
53787 ++ return 0;
53788 ++ if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_DATA) &&
53789 ++ skb->len >= MESSAGE_MINIMUM_LENGTH)
53790 ++ return sizeof(struct message_data);
53791 ++ if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION) &&
53792 ++ skb->len == sizeof(struct message_handshake_initiation))
53793 ++ return sizeof(struct message_handshake_initiation);
53794 ++ if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE) &&
53795 ++ skb->len == sizeof(struct message_handshake_response))
53796 ++ return sizeof(struct message_handshake_response);
53797 ++ if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE) &&
53798 ++ skb->len == sizeof(struct message_handshake_cookie))
53799 ++ return sizeof(struct message_handshake_cookie);
53800 ++ return 0;
53801 ++}
53802 ++
53803 ++static int prepare_skb_header(struct sk_buff *skb, struct wg_device *wg)
53804 ++{
53805 ++ size_t data_offset, data_len, header_len;
53806 ++ struct udphdr *udp;
53807 ++
53808 ++ if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol ||
53809 ++ skb_transport_header(skb) < skb->head ||
53810 ++ (skb_transport_header(skb) + sizeof(struct udphdr)) >
53811 ++ skb_tail_pointer(skb)))
53812 ++ return -EINVAL; /* Bogus IP header */
53813 ++ udp = udp_hdr(skb);
53814 ++ data_offset = (u8 *)udp - skb->data;
53815 ++ if (unlikely(data_offset > U16_MAX ||
53816 ++ data_offset + sizeof(struct udphdr) > skb->len))
53817 ++ /* Packet has offset at impossible location or isn't big enough
53818 ++ * to have UDP fields.
53819 ++ */
53820 ++ return -EINVAL;
53821 ++ data_len = ntohs(udp->len);
53822 ++ if (unlikely(data_len < sizeof(struct udphdr) ||
53823 ++ data_len > skb->len - data_offset))
53824 ++ /* UDP packet is reporting too small of a size or lying about
53825 ++ * its size.
53826 ++ */
53827 ++ return -EINVAL;
53828 ++ data_len -= sizeof(struct udphdr);
53829 ++ data_offset = (u8 *)udp + sizeof(struct udphdr) - skb->data;
53830 ++ if (unlikely(!pskb_may_pull(skb,
53831 ++ data_offset + sizeof(struct message_header)) ||
53832 ++ pskb_trim(skb, data_len + data_offset) < 0))
53833 ++ return -EINVAL;
53834 ++ skb_pull(skb, data_offset);
53835 ++ if (unlikely(skb->len != data_len))
53836 ++ /* Final len does not agree with calculated len */
53837 ++ return -EINVAL;
53838 ++ header_len = validate_header_len(skb);
53839 ++ if (unlikely(!header_len))
53840 ++ return -EINVAL;
53841 ++ __skb_push(skb, data_offset);
53842 ++ if (unlikely(!pskb_may_pull(skb, data_offset + header_len)))
53843 ++ return -EINVAL;
53844 ++ __skb_pull(skb, data_offset);
53845 ++ return 0;
53846 ++}
53847 ++
53848 ++static void wg_receive_handshake_packet(struct wg_device *wg,
53849 ++ struct sk_buff *skb)
53850 ++{
53851 ++ enum cookie_mac_state mac_state;
53852 ++ struct wg_peer *peer = NULL;
53853 ++ /* This is global, so that our load calculation applies to the whole
53854 ++ * system. We don't care about races with it at all.
53855 ++ */
53856 ++ static u64 last_under_load;
53857 ++ bool packet_needs_cookie;
53858 ++ bool under_load;
53859 ++
53860 ++ if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE)) {
53861 ++ net_dbg_skb_ratelimited("%s: Receiving cookie response from %pISpfsc\n",
53862 ++ wg->dev->name, skb);
53863 ++ wg_cookie_message_consume(
53864 ++ (struct message_handshake_cookie *)skb->data, wg);
53865 ++ return;
53866 ++ }
53867 ++
53868 ++ under_load = skb_queue_len(&wg->incoming_handshakes) >=
53869 ++ MAX_QUEUED_INCOMING_HANDSHAKES / 8;
53870 ++ if (under_load)
53871 ++ last_under_load = ktime_get_coarse_boottime_ns();
53872 ++ else if (last_under_load)
53873 ++ under_load = !wg_birthdate_has_expired(last_under_load, 1);
53874 ++ mac_state = wg_cookie_validate_packet(&wg->cookie_checker, skb,
53875 ++ under_load);
53876 ++ if ((under_load && mac_state == VALID_MAC_WITH_COOKIE) ||
53877 ++ (!under_load && mac_state == VALID_MAC_BUT_NO_COOKIE)) {
53878 ++ packet_needs_cookie = false;
53879 ++ } else if (under_load && mac_state == VALID_MAC_BUT_NO_COOKIE) {
53880 ++ packet_needs_cookie = true;
53881 ++ } else {
53882 ++ net_dbg_skb_ratelimited("%s: Invalid MAC of handshake, dropping packet from %pISpfsc\n",
53883 ++ wg->dev->name, skb);
53884 ++ return;
53885 ++ }
53886 ++
53887 ++ switch (SKB_TYPE_LE32(skb)) {
53888 ++ case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION): {
53889 ++ struct message_handshake_initiation *message =
53890 ++ (struct message_handshake_initiation *)skb->data;
53891 ++
53892 ++ if (packet_needs_cookie) {
53893 ++ wg_packet_send_handshake_cookie(wg, skb,
53894 ++ message->sender_index);
53895 ++ return;
53896 ++ }
53897 ++ peer = wg_noise_handshake_consume_initiation(message, wg);
53898 ++ if (unlikely(!peer)) {
53899 ++ net_dbg_skb_ratelimited("%s: Invalid handshake initiation from %pISpfsc\n",
53900 ++ wg->dev->name, skb);
53901 ++ return;
53902 ++ }
53903 ++ wg_socket_set_peer_endpoint_from_skb(peer, skb);
53904 ++ net_dbg_ratelimited("%s: Receiving handshake initiation from peer %llu (%pISpfsc)\n",
53905 ++ wg->dev->name, peer->internal_id,
53906 ++ &peer->endpoint.addr);
53907 ++ wg_packet_send_handshake_response(peer);
53908 ++ break;
53909 ++ }
53910 ++ case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE): {
53911 ++ struct message_handshake_response *message =
53912 ++ (struct message_handshake_response *)skb->data;
53913 ++
53914 ++ if (packet_needs_cookie) {
53915 ++ wg_packet_send_handshake_cookie(wg, skb,
53916 ++ message->sender_index);
53917 ++ return;
53918 ++ }
53919 ++ peer = wg_noise_handshake_consume_response(message, wg);
53920 ++ if (unlikely(!peer)) {
53921 ++ net_dbg_skb_ratelimited("%s: Invalid handshake response from %pISpfsc\n",
53922 ++ wg->dev->name, skb);
53923 ++ return;
53924 ++ }
53925 ++ wg_socket_set_peer_endpoint_from_skb(peer, skb);
53926 ++ net_dbg_ratelimited("%s: Receiving handshake response from peer %llu (%pISpfsc)\n",
53927 ++ wg->dev->name, peer->internal_id,
53928 ++ &peer->endpoint.addr);
53929 ++ if (wg_noise_handshake_begin_session(&peer->handshake,
53930 ++ &peer->keypairs)) {
53931 ++ wg_timers_session_derived(peer);
53932 ++ wg_timers_handshake_complete(peer);
53933 ++ /* Calling this function will either send any existing
53934 ++ * packets in the queue and not send a keepalive, which
53935 ++ * is the best case, Or, if there's nothing in the
53936 ++ * queue, it will send a keepalive, in order to give
53937 ++ * immediate confirmation of the session.
53938 ++ */
53939 ++ wg_packet_send_keepalive(peer);
53940 ++ }
53941 ++ break;
53942 ++ }
53943 ++ }
53944 ++
53945 ++ if (unlikely(!peer)) {
53946 ++ WARN(1, "Somehow a wrong type of packet wound up in the handshake queue!\n");
53947 ++ return;
53948 ++ }
53949 ++
53950 ++ local_bh_disable();
53951 ++ update_rx_stats(peer, skb->len);
53952 ++ local_bh_enable();
53953 ++
53954 ++ wg_timers_any_authenticated_packet_received(peer);
53955 ++ wg_timers_any_authenticated_packet_traversal(peer);
53956 ++ wg_peer_put(peer);
53957 ++}
53958 ++
53959 ++void wg_packet_handshake_receive_worker(struct work_struct *work)
53960 ++{
53961 ++ struct wg_device *wg = container_of(work, struct multicore_worker,
53962 ++ work)->ptr;
53963 ++ struct sk_buff *skb;
53964 ++
53965 ++ while ((skb = skb_dequeue(&wg->incoming_handshakes)) != NULL) {
53966 ++ wg_receive_handshake_packet(wg, skb);
53967 ++ dev_kfree_skb(skb);
53968 ++ cond_resched();
53969 ++ }
53970 ++}
53971 ++
53972 ++static void keep_key_fresh(struct wg_peer *peer)
53973 ++{
53974 ++ struct noise_keypair *keypair;
53975 ++ bool send = false;
53976 ++
53977 ++ if (peer->sent_lastminute_handshake)
53978 ++ return;
53979 ++
53980 ++ rcu_read_lock_bh();
53981 ++ keypair = rcu_dereference_bh(peer->keypairs.current_keypair);
53982 ++ if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) &&
53983 ++ keypair->i_am_the_initiator &&
53984 ++ unlikely(wg_birthdate_has_expired(keypair->sending.birthdate,
53985 ++ REJECT_AFTER_TIME - KEEPALIVE_TIMEOUT - REKEY_TIMEOUT)))
53986 ++ send = true;
53987 ++ rcu_read_unlock_bh();
53988 ++
53989 ++ if (send) {
53990 ++ peer->sent_lastminute_handshake = true;
53991 ++ wg_packet_send_queued_handshake_initiation(peer, false);
53992 ++ }
53993 ++}
53994 ++
53995 ++static bool decrypt_packet(struct sk_buff *skb, struct noise_symmetric_key *key)
53996 ++{
53997 ++ struct scatterlist sg[MAX_SKB_FRAGS + 8];
53998 ++ struct sk_buff *trailer;
53999 ++ unsigned int offset;
54000 ++ int num_frags;
54001 ++
54002 ++ if (unlikely(!key))
54003 ++ return false;
54004 ++
54005 ++ if (unlikely(!READ_ONCE(key->is_valid) ||
54006 ++ wg_birthdate_has_expired(key->birthdate, REJECT_AFTER_TIME) ||
54007 ++ key->counter.receive.counter >= REJECT_AFTER_MESSAGES)) {
54008 ++ WRITE_ONCE(key->is_valid, false);
54009 ++ return false;
54010 ++ }
54011 ++
54012 ++ PACKET_CB(skb)->nonce =
54013 ++ le64_to_cpu(((struct message_data *)skb->data)->counter);
54014 ++
54015 ++ /* We ensure that the network header is part of the packet before we
54016 ++ * call skb_cow_data, so that there's no chance that data is removed
54017 ++ * from the skb, so that later we can extract the original endpoint.
54018 ++ */
54019 ++ offset = skb->data - skb_network_header(skb);
54020 ++ skb_push(skb, offset);
54021 ++ num_frags = skb_cow_data(skb, 0, &trailer);
54022 ++ offset += sizeof(struct message_data);
54023 ++ skb_pull(skb, offset);
54024 ++ if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg)))
54025 ++ return false;
54026 ++
54027 ++ sg_init_table(sg, num_frags);
54028 ++ if (skb_to_sgvec(skb, sg, 0, skb->len) <= 0)
54029 ++ return false;
54030 ++
54031 ++ if (!chacha20poly1305_decrypt_sg_inplace(sg, skb->len, NULL, 0,
54032 ++ PACKET_CB(skb)->nonce,
54033 ++ key->key))
54034 ++ return false;
54035 ++
54036 ++ /* Another ugly situation of pushing and pulling the header so as to
54037 ++ * keep endpoint information intact.
54038 ++ */
54039 ++ skb_push(skb, offset);
54040 ++ if (pskb_trim(skb, skb->len - noise_encrypted_len(0)))
54041 ++ return false;
54042 ++ skb_pull(skb, offset);
54043 ++
54044 ++ return true;
54045 ++}
54046 ++
54047 ++/* This is RFC6479, a replay detection bitmap algorithm that avoids bitshifts */
54048 ++static bool counter_validate(union noise_counter *counter, u64 their_counter)
54049 ++{
54050 ++ unsigned long index, index_current, top, i;
54051 ++ bool ret = false;
54052 ++
54053 ++ spin_lock_bh(&counter->receive.lock);
54054 ++
54055 ++ if (unlikely(counter->receive.counter >= REJECT_AFTER_MESSAGES + 1 ||
54056 ++ their_counter >= REJECT_AFTER_MESSAGES))
54057 ++ goto out;
54058 ++
54059 ++ ++their_counter;
54060 ++
54061 ++ if (unlikely((COUNTER_WINDOW_SIZE + their_counter) <
54062 ++ counter->receive.counter))
54063 ++ goto out;
54064 ++
54065 ++ index = their_counter >> ilog2(BITS_PER_LONG);
54066 ++
54067 ++ if (likely(their_counter > counter->receive.counter)) {
54068 ++ index_current = counter->receive.counter >> ilog2(BITS_PER_LONG);
54069 ++ top = min_t(unsigned long, index - index_current,
54070 ++ COUNTER_BITS_TOTAL / BITS_PER_LONG);
54071 ++ for (i = 1; i <= top; ++i)
54072 ++ counter->receive.backtrack[(i + index_current) &
54073 ++ ((COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1)] = 0;
54074 ++ counter->receive.counter = their_counter;
54075 ++ }
54076 ++
54077 ++ index &= (COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1;
54078 ++ ret = !test_and_set_bit(their_counter & (BITS_PER_LONG - 1),
54079 ++ &counter->receive.backtrack[index]);
54080 ++
54081 ++out:
54082 ++ spin_unlock_bh(&counter->receive.lock);
54083 ++ return ret;
54084 ++}
54085 ++
54086 ++#include "selftest/counter.c"
54087 ++
54088 ++static void wg_packet_consume_data_done(struct wg_peer *peer,
54089 ++ struct sk_buff *skb,
54090 ++ struct endpoint *endpoint)
54091 ++{
54092 ++ struct net_device *dev = peer->device->dev;
54093 ++ unsigned int len, len_before_trim;
54094 ++ struct wg_peer *routed_peer;
54095 ++
54096 ++ wg_socket_set_peer_endpoint(peer, endpoint);
54097 ++
54098 ++ if (unlikely(wg_noise_received_with_keypair(&peer->keypairs,
54099 ++ PACKET_CB(skb)->keypair))) {
54100 ++ wg_timers_handshake_complete(peer);
54101 ++ wg_packet_send_staged_packets(peer);
54102 ++ }
54103 ++
54104 ++ keep_key_fresh(peer);
54105 ++
54106 ++ wg_timers_any_authenticated_packet_received(peer);
54107 ++ wg_timers_any_authenticated_packet_traversal(peer);
54108 ++
54109 ++ /* A packet with length 0 is a keepalive packet */
54110 ++ if (unlikely(!skb->len)) {
54111 ++ update_rx_stats(peer, message_data_len(0));
54112 ++ net_dbg_ratelimited("%s: Receiving keepalive packet from peer %llu (%pISpfsc)\n",
54113 ++ dev->name, peer->internal_id,
54114 ++ &peer->endpoint.addr);
54115 ++ goto packet_processed;
54116 ++ }
54117 ++
54118 ++ wg_timers_data_received(peer);
54119 ++
54120 ++ if (unlikely(skb_network_header(skb) < skb->head))
54121 ++ goto dishonest_packet_size;
54122 ++ if (unlikely(!(pskb_network_may_pull(skb, sizeof(struct iphdr)) &&
54123 ++ (ip_hdr(skb)->version == 4 ||
54124 ++ (ip_hdr(skb)->version == 6 &&
54125 ++ pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))))))
54126 ++ goto dishonest_packet_type;
54127 ++
54128 ++ skb->dev = dev;
54129 ++ /* We've already verified the Poly1305 auth tag, which means this packet
54130 ++ * was not modified in transit. We can therefore tell the networking
54131 ++ * stack that all checksums of every layer of encapsulation have already
54132 ++ * been checked "by the hardware" and therefore is unneccessary to check
54133 ++ * again in software.
54134 ++ */
54135 ++ skb->ip_summed = CHECKSUM_UNNECESSARY;
54136 ++ skb->csum_level = ~0; /* All levels */
54137 ++ skb->protocol = wg_skb_examine_untrusted_ip_hdr(skb);
54138 ++ if (skb->protocol == htons(ETH_P_IP)) {
54139 ++ len = ntohs(ip_hdr(skb)->tot_len);
54140 ++ if (unlikely(len < sizeof(struct iphdr)))
54141 ++ goto dishonest_packet_size;
54142 ++ if (INET_ECN_is_ce(PACKET_CB(skb)->ds))
54143 ++ IP_ECN_set_ce(ip_hdr(skb));
54144 ++ } else if (skb->protocol == htons(ETH_P_IPV6)) {
54145 ++ len = ntohs(ipv6_hdr(skb)->payload_len) +
54146 ++ sizeof(struct ipv6hdr);
54147 ++ if (INET_ECN_is_ce(PACKET_CB(skb)->ds))
54148 ++ IP6_ECN_set_ce(skb, ipv6_hdr(skb));
54149 ++ } else {
54150 ++ goto dishonest_packet_type;
54151 ++ }
54152 ++
54153 ++ if (unlikely(len > skb->len))
54154 ++ goto dishonest_packet_size;
54155 ++ len_before_trim = skb->len;
54156 ++ if (unlikely(pskb_trim(skb, len)))
54157 ++ goto packet_processed;
54158 ++
54159 ++ routed_peer = wg_allowedips_lookup_src(&peer->device->peer_allowedips,
54160 ++ skb);
54161 ++ wg_peer_put(routed_peer); /* We don't need the extra reference. */
54162 ++
54163 ++ if (unlikely(routed_peer != peer))
54164 ++ goto dishonest_packet_peer;
54165 ++
54166 ++ if (unlikely(napi_gro_receive(&peer->napi, skb) == GRO_DROP)) {
54167 ++ ++dev->stats.rx_dropped;
54168 ++ net_dbg_ratelimited("%s: Failed to give packet to userspace from peer %llu (%pISpfsc)\n",
54169 ++ dev->name, peer->internal_id,
54170 ++ &peer->endpoint.addr);
54171 ++ } else {
54172 ++ update_rx_stats(peer, message_data_len(len_before_trim));
54173 ++ }
54174 ++ return;
54175 ++
54176 ++dishonest_packet_peer:
54177 ++ net_dbg_skb_ratelimited("%s: Packet has unallowed src IP (%pISc) from peer %llu (%pISpfsc)\n",
54178 ++ dev->name, skb, peer->internal_id,
54179 ++ &peer->endpoint.addr);
54180 ++ ++dev->stats.rx_errors;
54181 ++ ++dev->stats.rx_frame_errors;
54182 ++ goto packet_processed;
54183 ++dishonest_packet_type:
54184 ++ net_dbg_ratelimited("%s: Packet is neither ipv4 nor ipv6 from peer %llu (%pISpfsc)\n",
54185 ++ dev->name, peer->internal_id, &peer->endpoint.addr);
54186 ++ ++dev->stats.rx_errors;
54187 ++ ++dev->stats.rx_frame_errors;
54188 ++ goto packet_processed;
54189 ++dishonest_packet_size:
54190 ++ net_dbg_ratelimited("%s: Packet has incorrect size from peer %llu (%pISpfsc)\n",
54191 ++ dev->name, peer->internal_id, &peer->endpoint.addr);
54192 ++ ++dev->stats.rx_errors;
54193 ++ ++dev->stats.rx_length_errors;
54194 ++ goto packet_processed;
54195 ++packet_processed:
54196 ++ dev_kfree_skb(skb);
54197 ++}
54198 ++
54199 ++int wg_packet_rx_poll(struct napi_struct *napi, int budget)
54200 ++{
54201 ++ struct wg_peer *peer = container_of(napi, struct wg_peer, napi);
54202 ++ struct crypt_queue *queue = &peer->rx_queue;
54203 ++ struct noise_keypair *keypair;
54204 ++ struct endpoint endpoint;
54205 ++ enum packet_state state;
54206 ++ struct sk_buff *skb;
54207 ++ int work_done = 0;
54208 ++ bool free;
54209 ++
54210 ++ if (unlikely(budget <= 0))
54211 ++ return 0;
54212 ++
54213 ++ while ((skb = __ptr_ring_peek(&queue->ring)) != NULL &&
54214 ++ (state = atomic_read_acquire(&PACKET_CB(skb)->state)) !=
54215 ++ PACKET_STATE_UNCRYPTED) {
54216 ++ __ptr_ring_discard_one(&queue->ring);
54217 ++ peer = PACKET_PEER(skb);
54218 ++ keypair = PACKET_CB(skb)->keypair;
54219 ++ free = true;
54220 ++
54221 ++ if (unlikely(state != PACKET_STATE_CRYPTED))
54222 ++ goto next;
54223 ++
54224 ++ if (unlikely(!counter_validate(&keypair->receiving.counter,
54225 ++ PACKET_CB(skb)->nonce))) {
54226 ++ net_dbg_ratelimited("%s: Packet has invalid nonce %llu (max %llu)\n",
54227 ++ peer->device->dev->name,
54228 ++ PACKET_CB(skb)->nonce,
54229 ++ keypair->receiving.counter.receive.counter);
54230 ++ goto next;
54231 ++ }
54232 ++
54233 ++ if (unlikely(wg_socket_endpoint_from_skb(&endpoint, skb)))
54234 ++ goto next;
54235 ++
54236 ++ wg_reset_packet(skb);
54237 ++ wg_packet_consume_data_done(peer, skb, &endpoint);
54238 ++ free = false;
54239 ++
54240 ++next:
54241 ++ wg_noise_keypair_put(keypair, false);
54242 ++ wg_peer_put(peer);
54243 ++ if (unlikely(free))
54244 ++ dev_kfree_skb(skb);
54245 ++
54246 ++ if (++work_done >= budget)
54247 ++ break;
54248 ++ }
54249 ++
54250 ++ if (work_done < budget)
54251 ++ napi_complete_done(napi, work_done);
54252 ++
54253 ++ return work_done;
54254 ++}
54255 ++
54256 ++void wg_packet_decrypt_worker(struct work_struct *work)
54257 ++{
54258 ++ struct crypt_queue *queue = container_of(work, struct multicore_worker,
54259 ++ work)->ptr;
54260 ++ struct sk_buff *skb;
54261 ++
54262 ++ while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) {
54263 ++ enum packet_state state = likely(decrypt_packet(skb,
54264 ++ &PACKET_CB(skb)->keypair->receiving)) ?
54265 ++ PACKET_STATE_CRYPTED : PACKET_STATE_DEAD;
54266 ++ wg_queue_enqueue_per_peer_napi(skb, state);
54267 ++ }
54268 ++}
54269 ++
54270 ++static void wg_packet_consume_data(struct wg_device *wg, struct sk_buff *skb)
54271 ++{
54272 ++ __le32 idx = ((struct message_data *)skb->data)->key_idx;
54273 ++ struct wg_peer *peer = NULL;
54274 ++ int ret;
54275 ++
54276 ++ rcu_read_lock_bh();
54277 ++ PACKET_CB(skb)->keypair =
54278 ++ (struct noise_keypair *)wg_index_hashtable_lookup(
54279 ++ wg->index_hashtable, INDEX_HASHTABLE_KEYPAIR, idx,
54280 ++ &peer);
54281 ++ if (unlikely(!wg_noise_keypair_get(PACKET_CB(skb)->keypair)))
54282 ++ goto err_keypair;
54283 ++
54284 ++ if (unlikely(READ_ONCE(peer->is_dead)))
54285 ++ goto err;
54286 ++
54287 ++ ret = wg_queue_enqueue_per_device_and_peer(&wg->decrypt_queue,
54288 ++ &peer->rx_queue, skb,
54289 ++ wg->packet_crypt_wq,
54290 ++ &wg->decrypt_queue.last_cpu);
54291 ++ if (unlikely(ret == -EPIPE))
54292 ++ wg_queue_enqueue_per_peer_napi(skb, PACKET_STATE_DEAD);
54293 ++ if (likely(!ret || ret == -EPIPE)) {
54294 ++ rcu_read_unlock_bh();
54295 ++ return;
54296 ++ }
54297 ++err:
54298 ++ wg_noise_keypair_put(PACKET_CB(skb)->keypair, false);
54299 ++err_keypair:
54300 ++ rcu_read_unlock_bh();
54301 ++ wg_peer_put(peer);
54302 ++ dev_kfree_skb(skb);
54303 ++}
54304 ++
54305 ++void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb)
54306 ++{
54307 ++ if (unlikely(prepare_skb_header(skb, wg) < 0))
54308 ++ goto err;
54309 ++ switch (SKB_TYPE_LE32(skb)) {
54310 ++ case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION):
54311 ++ case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE):
54312 ++ case cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE): {
54313 ++ int cpu;
54314 ++
54315 ++ if (skb_queue_len(&wg->incoming_handshakes) >
54316 ++ MAX_QUEUED_INCOMING_HANDSHAKES ||
54317 ++ unlikely(!rng_is_initialized())) {
54318 ++ net_dbg_skb_ratelimited("%s: Dropping handshake packet from %pISpfsc\n",
54319 ++ wg->dev->name, skb);
54320 ++ goto err;
54321 ++ }
54322 ++ skb_queue_tail(&wg->incoming_handshakes, skb);
54323 ++ /* Queues up a call to packet_process_queued_handshake_
54324 ++ * packets(skb):
54325 ++ */
54326 ++ cpu = wg_cpumask_next_online(&wg->incoming_handshake_cpu);
54327 ++ queue_work_on(cpu, wg->handshake_receive_wq,
54328 ++ &per_cpu_ptr(wg->incoming_handshakes_worker, cpu)->work);
54329 ++ break;
54330 ++ }
54331 ++ case cpu_to_le32(MESSAGE_DATA):
54332 ++ PACKET_CB(skb)->ds = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
54333 ++ wg_packet_consume_data(wg, skb);
54334 ++ break;
54335 ++ default:
54336 ++ net_dbg_skb_ratelimited("%s: Invalid packet from %pISpfsc\n",
54337 ++ wg->dev->name, skb);
54338 ++ goto err;
54339 ++ }
54340 ++ return;
54341 ++
54342 ++err:
54343 ++ dev_kfree_skb(skb);
54344 ++}
54345 +diff --git a/drivers/net/wireguard/selftest/allowedips.c b/drivers/net/wireguard/selftest/allowedips.c
54346 +new file mode 100644
54347 +index 000000000000..846db14cb046
54348 +--- /dev/null
54349 ++++ b/drivers/net/wireguard/selftest/allowedips.c
54350 +@@ -0,0 +1,683 @@
54351 ++// SPDX-License-Identifier: GPL-2.0
54352 ++/*
54353 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
54354 ++ *
54355 ++ * This contains some basic static unit tests for the allowedips data structure.
54356 ++ * It also has two additional modes that are disabled and meant to be used by
54357 ++ * folks directly playing with this file. If you define the macro
54358 ++ * DEBUG_PRINT_TRIE_GRAPHVIZ to be 1, then every time there's a full tree in
54359 ++ * memory, it will be printed out as KERN_DEBUG in a format that can be passed
54360 ++ * to graphviz (the dot command) to visualize it. If you define the macro
54361 ++ * DEBUG_RANDOM_TRIE to be 1, then there will be an extremely costly set of
54362 ++ * randomized tests done against a trivial implementation, which may take
54363 ++ * upwards of a half-hour to complete. There's no set of users who should be
54364 ++ * enabling these, and the only developers that should go anywhere near these
54365 ++ * nobs are the ones who are reading this comment.
54366 ++ */
54367 ++
54368 ++#ifdef DEBUG
54369 ++
54370 ++#include <linux/siphash.h>
54371 ++
54372 ++static __init void swap_endian_and_apply_cidr(u8 *dst, const u8 *src, u8 bits,
54373 ++ u8 cidr)
54374 ++{
54375 ++ swap_endian(dst, src, bits);
54376 ++ memset(dst + (cidr + 7) / 8, 0, bits / 8 - (cidr + 7) / 8);
54377 ++ if (cidr)
54378 ++ dst[(cidr + 7) / 8 - 1] &= ~0U << ((8 - (cidr % 8)) % 8);
54379 ++}
54380 ++
54381 ++static __init void print_node(struct allowedips_node *node, u8 bits)
54382 ++{
54383 ++ char *fmt_connection = KERN_DEBUG "\t\"%p/%d\" -> \"%p/%d\";\n";
54384 ++ char *fmt_declaration = KERN_DEBUG
54385 ++ "\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n";
54386 ++ char *style = "dotted";
54387 ++ u8 ip1[16], ip2[16];
54388 ++ u32 color = 0;
54389 ++
54390 ++ if (bits == 32) {
54391 ++ fmt_connection = KERN_DEBUG "\t\"%pI4/%d\" -> \"%pI4/%d\";\n";
54392 ++ fmt_declaration = KERN_DEBUG
54393 ++ "\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n";
54394 ++ } else if (bits == 128) {
54395 ++ fmt_connection = KERN_DEBUG "\t\"%pI6/%d\" -> \"%pI6/%d\";\n";
54396 ++ fmt_declaration = KERN_DEBUG
54397 ++ "\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n";
54398 ++ }
54399 ++ if (node->peer) {
54400 ++ hsiphash_key_t key = { { 0 } };
54401 ++
54402 ++ memcpy(&key, &node->peer, sizeof(node->peer));
54403 ++ color = hsiphash_1u32(0xdeadbeef, &key) % 200 << 16 |
54404 ++ hsiphash_1u32(0xbabecafe, &key) % 200 << 8 |
54405 ++ hsiphash_1u32(0xabad1dea, &key) % 200;
54406 ++ style = "bold";
54407 ++ }
54408 ++ swap_endian_and_apply_cidr(ip1, node->bits, bits, node->cidr);
54409 ++ printk(fmt_declaration, ip1, node->cidr, style, color);
54410 ++ if (node->bit[0]) {
54411 ++ swap_endian_and_apply_cidr(ip2,
54412 ++ rcu_dereference_raw(node->bit[0])->bits, bits,
54413 ++ node->cidr);
54414 ++ printk(fmt_connection, ip1, node->cidr, ip2,
54415 ++ rcu_dereference_raw(node->bit[0])->cidr);
54416 ++ print_node(rcu_dereference_raw(node->bit[0]), bits);
54417 ++ }
54418 ++ if (node->bit[1]) {
54419 ++ swap_endian_and_apply_cidr(ip2,
54420 ++ rcu_dereference_raw(node->bit[1])->bits,
54421 ++ bits, node->cidr);
54422 ++ printk(fmt_connection, ip1, node->cidr, ip2,
54423 ++ rcu_dereference_raw(node->bit[1])->cidr);
54424 ++ print_node(rcu_dereference_raw(node->bit[1]), bits);
54425 ++ }
54426 ++}
54427 ++
54428 ++static __init void print_tree(struct allowedips_node __rcu *top, u8 bits)
54429 ++{
54430 ++ printk(KERN_DEBUG "digraph trie {\n");
54431 ++ print_node(rcu_dereference_raw(top), bits);
54432 ++ printk(KERN_DEBUG "}\n");
54433 ++}
54434 ++
54435 ++enum {
54436 ++ NUM_PEERS = 2000,
54437 ++ NUM_RAND_ROUTES = 400,
54438 ++ NUM_MUTATED_ROUTES = 100,
54439 ++ NUM_QUERIES = NUM_RAND_ROUTES * NUM_MUTATED_ROUTES * 30
54440 ++};
54441 ++
54442 ++struct horrible_allowedips {
54443 ++ struct hlist_head head;
54444 ++};
54445 ++
54446 ++struct horrible_allowedips_node {
54447 ++ struct hlist_node table;
54448 ++ union nf_inet_addr ip;
54449 ++ union nf_inet_addr mask;
54450 ++ u8 ip_version;
54451 ++ void *value;
54452 ++};
54453 ++
54454 ++static __init void horrible_allowedips_init(struct horrible_allowedips *table)
54455 ++{
54456 ++ INIT_HLIST_HEAD(&table->head);
54457 ++}
54458 ++
54459 ++static __init void horrible_allowedips_free(struct horrible_allowedips *table)
54460 ++{
54461 ++ struct horrible_allowedips_node *node;
54462 ++ struct hlist_node *h;
54463 ++
54464 ++ hlist_for_each_entry_safe(node, h, &table->head, table) {
54465 ++ hlist_del(&node->table);
54466 ++ kfree(node);
54467 ++ }
54468 ++}
54469 ++
54470 ++static __init inline union nf_inet_addr horrible_cidr_to_mask(u8 cidr)
54471 ++{
54472 ++ union nf_inet_addr mask;
54473 ++
54474 ++ memset(&mask, 0x00, 128 / 8);
54475 ++ memset(&mask, 0xff, cidr / 8);
54476 ++ if (cidr % 32)
54477 ++ mask.all[cidr / 32] = (__force u32)htonl(
54478 ++ (0xFFFFFFFFUL << (32 - (cidr % 32))) & 0xFFFFFFFFUL);
54479 ++ return mask;
54480 ++}
54481 ++
54482 ++static __init inline u8 horrible_mask_to_cidr(union nf_inet_addr subnet)
54483 ++{
54484 ++ return hweight32(subnet.all[0]) + hweight32(subnet.all[1]) +
54485 ++ hweight32(subnet.all[2]) + hweight32(subnet.all[3]);
54486 ++}
54487 ++
54488 ++static __init inline void
54489 ++horrible_mask_self(struct horrible_allowedips_node *node)
54490 ++{
54491 ++ if (node->ip_version == 4) {
54492 ++ node->ip.ip &= node->mask.ip;
54493 ++ } else if (node->ip_version == 6) {
54494 ++ node->ip.ip6[0] &= node->mask.ip6[0];
54495 ++ node->ip.ip6[1] &= node->mask.ip6[1];
54496 ++ node->ip.ip6[2] &= node->mask.ip6[2];
54497 ++ node->ip.ip6[3] &= node->mask.ip6[3];
54498 ++ }
54499 ++}
54500 ++
54501 ++static __init inline bool
54502 ++horrible_match_v4(const struct horrible_allowedips_node *node,
54503 ++ struct in_addr *ip)
54504 ++{
54505 ++ return (ip->s_addr & node->mask.ip) == node->ip.ip;
54506 ++}
54507 ++
54508 ++static __init inline bool
54509 ++horrible_match_v6(const struct horrible_allowedips_node *node,
54510 ++ struct in6_addr *ip)
54511 ++{
54512 ++ return (ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) ==
54513 ++ node->ip.ip6[0] &&
54514 ++ (ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) ==
54515 ++ node->ip.ip6[1] &&
54516 ++ (ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) ==
54517 ++ node->ip.ip6[2] &&
54518 ++ (ip->in6_u.u6_addr32[3] & node->mask.ip6[3]) == node->ip.ip6[3];
54519 ++}
54520 ++
54521 ++static __init void
54522 ++horrible_insert_ordered(struct horrible_allowedips *table,
54523 ++ struct horrible_allowedips_node *node)
54524 ++{
54525 ++ struct horrible_allowedips_node *other = NULL, *where = NULL;
54526 ++ u8 my_cidr = horrible_mask_to_cidr(node->mask);
54527 ++
54528 ++ hlist_for_each_entry(other, &table->head, table) {
54529 ++ if (!memcmp(&other->mask, &node->mask,
54530 ++ sizeof(union nf_inet_addr)) &&
54531 ++ !memcmp(&other->ip, &node->ip,
54532 ++ sizeof(union nf_inet_addr)) &&
54533 ++ other->ip_version == node->ip_version) {
54534 ++ other->value = node->value;
54535 ++ kfree(node);
54536 ++ return;
54537 ++ }
54538 ++ where = other;
54539 ++ if (horrible_mask_to_cidr(other->mask) <= my_cidr)
54540 ++ break;
54541 ++ }
54542 ++ if (!other && !where)
54543 ++ hlist_add_head(&node->table, &table->head);
54544 ++ else if (!other)
54545 ++ hlist_add_behind(&node->table, &where->table);
54546 ++ else
54547 ++ hlist_add_before(&node->table, &where->table);
54548 ++}
54549 ++
54550 ++static __init int
54551 ++horrible_allowedips_insert_v4(struct horrible_allowedips *table,
54552 ++ struct in_addr *ip, u8 cidr, void *value)
54553 ++{
54554 ++ struct horrible_allowedips_node *node = kzalloc(sizeof(*node),
54555 ++ GFP_KERNEL);
54556 ++
54557 ++ if (unlikely(!node))
54558 ++ return -ENOMEM;
54559 ++ node->ip.in = *ip;
54560 ++ node->mask = horrible_cidr_to_mask(cidr);
54561 ++ node->ip_version = 4;
54562 ++ node->value = value;
54563 ++ horrible_mask_self(node);
54564 ++ horrible_insert_ordered(table, node);
54565 ++ return 0;
54566 ++}
54567 ++
54568 ++static __init int
54569 ++horrible_allowedips_insert_v6(struct horrible_allowedips *table,
54570 ++ struct in6_addr *ip, u8 cidr, void *value)
54571 ++{
54572 ++ struct horrible_allowedips_node *node = kzalloc(sizeof(*node),
54573 ++ GFP_KERNEL);
54574 ++
54575 ++ if (unlikely(!node))
54576 ++ return -ENOMEM;
54577 ++ node->ip.in6 = *ip;
54578 ++ node->mask = horrible_cidr_to_mask(cidr);
54579 ++ node->ip_version = 6;
54580 ++ node->value = value;
54581 ++ horrible_mask_self(node);
54582 ++ horrible_insert_ordered(table, node);
54583 ++ return 0;
54584 ++}
54585 ++
54586 ++static __init void *
54587 ++horrible_allowedips_lookup_v4(struct horrible_allowedips *table,
54588 ++ struct in_addr *ip)
54589 ++{
54590 ++ struct horrible_allowedips_node *node;
54591 ++ void *ret = NULL;
54592 ++
54593 ++ hlist_for_each_entry(node, &table->head, table) {
54594 ++ if (node->ip_version != 4)
54595 ++ continue;
54596 ++ if (horrible_match_v4(node, ip)) {
54597 ++ ret = node->value;
54598 ++ break;
54599 ++ }
54600 ++ }
54601 ++ return ret;
54602 ++}
54603 ++
54604 ++static __init void *
54605 ++horrible_allowedips_lookup_v6(struct horrible_allowedips *table,
54606 ++ struct in6_addr *ip)
54607 ++{
54608 ++ struct horrible_allowedips_node *node;
54609 ++ void *ret = NULL;
54610 ++
54611 ++ hlist_for_each_entry(node, &table->head, table) {
54612 ++ if (node->ip_version != 6)
54613 ++ continue;
54614 ++ if (horrible_match_v6(node, ip)) {
54615 ++ ret = node->value;
54616 ++ break;
54617 ++ }
54618 ++ }
54619 ++ return ret;
54620 ++}
54621 ++
54622 ++static __init bool randomized_test(void)
54623 ++{
54624 ++ unsigned int i, j, k, mutate_amount, cidr;
54625 ++ u8 ip[16], mutate_mask[16], mutated[16];
54626 ++ struct wg_peer **peers, *peer;
54627 ++ struct horrible_allowedips h;
54628 ++ DEFINE_MUTEX(mutex);
54629 ++ struct allowedips t;
54630 ++ bool ret = false;
54631 ++
54632 ++ mutex_init(&mutex);
54633 ++
54634 ++ wg_allowedips_init(&t);
54635 ++ horrible_allowedips_init(&h);
54636 ++
54637 ++ peers = kcalloc(NUM_PEERS, sizeof(*peers), GFP_KERNEL);
54638 ++ if (unlikely(!peers)) {
54639 ++ pr_err("allowedips random self-test malloc: FAIL\n");
54640 ++ goto free;
54641 ++ }
54642 ++ for (i = 0; i < NUM_PEERS; ++i) {
54643 ++ peers[i] = kzalloc(sizeof(*peers[i]), GFP_KERNEL);
54644 ++ if (unlikely(!peers[i])) {
54645 ++ pr_err("allowedips random self-test malloc: FAIL\n");
54646 ++ goto free;
54647 ++ }
54648 ++ kref_init(&peers[i]->refcount);
54649 ++ }
54650 ++
54651 ++ mutex_lock(&mutex);
54652 ++
54653 ++ for (i = 0; i < NUM_RAND_ROUTES; ++i) {
54654 ++ prandom_bytes(ip, 4);
54655 ++ cidr = prandom_u32_max(32) + 1;
54656 ++ peer = peers[prandom_u32_max(NUM_PEERS)];
54657 ++ if (wg_allowedips_insert_v4(&t, (struct in_addr *)ip, cidr,
54658 ++ peer, &mutex) < 0) {
54659 ++ pr_err("allowedips random self-test malloc: FAIL\n");
54660 ++ goto free_locked;
54661 ++ }
54662 ++ if (horrible_allowedips_insert_v4(&h, (struct in_addr *)ip,
54663 ++ cidr, peer) < 0) {
54664 ++ pr_err("allowedips random self-test malloc: FAIL\n");
54665 ++ goto free_locked;
54666 ++ }
54667 ++ for (j = 0; j < NUM_MUTATED_ROUTES; ++j) {
54668 ++ memcpy(mutated, ip, 4);
54669 ++ prandom_bytes(mutate_mask, 4);
54670 ++ mutate_amount = prandom_u32_max(32);
54671 ++ for (k = 0; k < mutate_amount / 8; ++k)
54672 ++ mutate_mask[k] = 0xff;
54673 ++ mutate_mask[k] = 0xff
54674 ++ << ((8 - (mutate_amount % 8)) % 8);
54675 ++ for (; k < 4; ++k)
54676 ++ mutate_mask[k] = 0;
54677 ++ for (k = 0; k < 4; ++k)
54678 ++ mutated[k] = (mutated[k] & mutate_mask[k]) |
54679 ++ (~mutate_mask[k] &
54680 ++ prandom_u32_max(256));
54681 ++ cidr = prandom_u32_max(32) + 1;
54682 ++ peer = peers[prandom_u32_max(NUM_PEERS)];
54683 ++ if (wg_allowedips_insert_v4(&t,
54684 ++ (struct in_addr *)mutated,
54685 ++ cidr, peer, &mutex) < 0) {
54686 ++ pr_err("allowedips random malloc: FAIL\n");
54687 ++ goto free_locked;
54688 ++ }
54689 ++ if (horrible_allowedips_insert_v4(&h,
54690 ++ (struct in_addr *)mutated, cidr, peer)) {
54691 ++ pr_err("allowedips random self-test malloc: FAIL\n");
54692 ++ goto free_locked;
54693 ++ }
54694 ++ }
54695 ++ }
54696 ++
54697 ++ for (i = 0; i < NUM_RAND_ROUTES; ++i) {
54698 ++ prandom_bytes(ip, 16);
54699 ++ cidr = prandom_u32_max(128) + 1;
54700 ++ peer = peers[prandom_u32_max(NUM_PEERS)];
54701 ++ if (wg_allowedips_insert_v6(&t, (struct in6_addr *)ip, cidr,
54702 ++ peer, &mutex) < 0) {
54703 ++ pr_err("allowedips random self-test malloc: FAIL\n");
54704 ++ goto free_locked;
54705 ++ }
54706 ++ if (horrible_allowedips_insert_v6(&h, (struct in6_addr *)ip,
54707 ++ cidr, peer) < 0) {
54708 ++ pr_err("allowedips random self-test malloc: FAIL\n");
54709 ++ goto free_locked;
54710 ++ }
54711 ++ for (j = 0; j < NUM_MUTATED_ROUTES; ++j) {
54712 ++ memcpy(mutated, ip, 16);
54713 ++ prandom_bytes(mutate_mask, 16);
54714 ++ mutate_amount = prandom_u32_max(128);
54715 ++ for (k = 0; k < mutate_amount / 8; ++k)
54716 ++ mutate_mask[k] = 0xff;
54717 ++ mutate_mask[k] = 0xff
54718 ++ << ((8 - (mutate_amount % 8)) % 8);
54719 ++ for (; k < 4; ++k)
54720 ++ mutate_mask[k] = 0;
54721 ++ for (k = 0; k < 4; ++k)
54722 ++ mutated[k] = (mutated[k] & mutate_mask[k]) |
54723 ++ (~mutate_mask[k] &
54724 ++ prandom_u32_max(256));
54725 ++ cidr = prandom_u32_max(128) + 1;
54726 ++ peer = peers[prandom_u32_max(NUM_PEERS)];
54727 ++ if (wg_allowedips_insert_v6(&t,
54728 ++ (struct in6_addr *)mutated,
54729 ++ cidr, peer, &mutex) < 0) {
54730 ++ pr_err("allowedips random self-test malloc: FAIL\n");
54731 ++ goto free_locked;
54732 ++ }
54733 ++ if (horrible_allowedips_insert_v6(
54734 ++ &h, (struct in6_addr *)mutated, cidr,
54735 ++ peer)) {
54736 ++ pr_err("allowedips random self-test malloc: FAIL\n");
54737 ++ goto free_locked;
54738 ++ }
54739 ++ }
54740 ++ }
54741 ++
54742 ++ mutex_unlock(&mutex);
54743 ++
54744 ++ if (IS_ENABLED(DEBUG_PRINT_TRIE_GRAPHVIZ)) {
54745 ++ print_tree(t.root4, 32);
54746 ++ print_tree(t.root6, 128);
54747 ++ }
54748 ++
54749 ++ for (i = 0; i < NUM_QUERIES; ++i) {
54750 ++ prandom_bytes(ip, 4);
54751 ++ if (lookup(t.root4, 32, ip) !=
54752 ++ horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip)) {
54753 ++ pr_err("allowedips random self-test: FAIL\n");
54754 ++ goto free;
54755 ++ }
54756 ++ }
54757 ++
54758 ++ for (i = 0; i < NUM_QUERIES; ++i) {
54759 ++ prandom_bytes(ip, 16);
54760 ++ if (lookup(t.root6, 128, ip) !=
54761 ++ horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) {
54762 ++ pr_err("allowedips random self-test: FAIL\n");
54763 ++ goto free;
54764 ++ }
54765 ++ }
54766 ++ ret = true;
54767 ++
54768 ++free:
54769 ++ mutex_lock(&mutex);
54770 ++free_locked:
54771 ++ wg_allowedips_free(&t, &mutex);
54772 ++ mutex_unlock(&mutex);
54773 ++ horrible_allowedips_free(&h);
54774 ++ if (peers) {
54775 ++ for (i = 0; i < NUM_PEERS; ++i)
54776 ++ kfree(peers[i]);
54777 ++ }
54778 ++ kfree(peers);
54779 ++ return ret;
54780 ++}
54781 ++
54782 ++static __init inline struct in_addr *ip4(u8 a, u8 b, u8 c, u8 d)
54783 ++{
54784 ++ static struct in_addr ip;
54785 ++ u8 *split = (u8 *)&ip;
54786 ++
54787 ++ split[0] = a;
54788 ++ split[1] = b;
54789 ++ split[2] = c;
54790 ++ split[3] = d;
54791 ++ return &ip;
54792 ++}
54793 ++
54794 ++static __init inline struct in6_addr *ip6(u32 a, u32 b, u32 c, u32 d)
54795 ++{
54796 ++ static struct in6_addr ip;
54797 ++ __be32 *split = (__be32 *)&ip;
54798 ++
54799 ++ split[0] = cpu_to_be32(a);
54800 ++ split[1] = cpu_to_be32(b);
54801 ++ split[2] = cpu_to_be32(c);
54802 ++ split[3] = cpu_to_be32(d);
54803 ++ return &ip;
54804 ++}
54805 ++
54806 ++static __init struct wg_peer *init_peer(void)
54807 ++{
54808 ++ struct wg_peer *peer = kzalloc(sizeof(*peer), GFP_KERNEL);
54809 ++
54810 ++ if (!peer)
54811 ++ return NULL;
54812 ++ kref_init(&peer->refcount);
54813 ++ INIT_LIST_HEAD(&peer->allowedips_list);
54814 ++ return peer;
54815 ++}
54816 ++
54817 ++#define insert(version, mem, ipa, ipb, ipc, ipd, cidr) \
54818 ++ wg_allowedips_insert_v##version(&t, ip##version(ipa, ipb, ipc, ipd), \
54819 ++ cidr, mem, &mutex)
54820 ++
54821 ++#define maybe_fail() do { \
54822 ++ ++i; \
54823 ++ if (!_s) { \
54824 ++ pr_info("allowedips self-test %zu: FAIL\n", i); \
54825 ++ success = false; \
54826 ++ } \
54827 ++ } while (0)
54828 ++
54829 ++#define test(version, mem, ipa, ipb, ipc, ipd) do { \
54830 ++ bool _s = lookup(t.root##version, (version) == 4 ? 32 : 128, \
54831 ++ ip##version(ipa, ipb, ipc, ipd)) == (mem); \
54832 ++ maybe_fail(); \
54833 ++ } while (0)
54834 ++
54835 ++#define test_negative(version, mem, ipa, ipb, ipc, ipd) do { \
54836 ++ bool _s = lookup(t.root##version, (version) == 4 ? 32 : 128, \
54837 ++ ip##version(ipa, ipb, ipc, ipd)) != (mem); \
54838 ++ maybe_fail(); \
54839 ++ } while (0)
54840 ++
54841 ++#define test_boolean(cond) do { \
54842 ++ bool _s = (cond); \
54843 ++ maybe_fail(); \
54844 ++ } while (0)
54845 ++
54846 ++bool __init wg_allowedips_selftest(void)
54847 ++{
54848 ++ bool found_a = false, found_b = false, found_c = false, found_d = false,
54849 ++ found_e = false, found_other = false;
54850 ++ struct wg_peer *a = init_peer(), *b = init_peer(), *c = init_peer(),
54851 ++ *d = init_peer(), *e = init_peer(), *f = init_peer(),
54852 ++ *g = init_peer(), *h = init_peer();
54853 ++ struct allowedips_node *iter_node;
54854 ++ bool success = false;
54855 ++ struct allowedips t;
54856 ++ DEFINE_MUTEX(mutex);
54857 ++ struct in6_addr ip;
54858 ++ size_t i = 0, count = 0;
54859 ++ __be64 part;
54860 ++
54861 ++ mutex_init(&mutex);
54862 ++ mutex_lock(&mutex);
54863 ++ wg_allowedips_init(&t);
54864 ++
54865 ++ if (!a || !b || !c || !d || !e || !f || !g || !h) {
54866 ++ pr_err("allowedips self-test malloc: FAIL\n");
54867 ++ goto free;
54868 ++ }
54869 ++
54870 ++ insert(4, a, 192, 168, 4, 0, 24);
54871 ++ insert(4, b, 192, 168, 4, 4, 32);
54872 ++ insert(4, c, 192, 168, 0, 0, 16);
54873 ++ insert(4, d, 192, 95, 5, 64, 27);
54874 ++ /* replaces previous entry, and maskself is required */
54875 ++ insert(4, c, 192, 95, 5, 65, 27);
54876 ++ insert(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543, 128);
54877 ++ insert(6, c, 0x26075300, 0x60006b00, 0, 0, 64);
54878 ++ insert(4, e, 0, 0, 0, 0, 0);
54879 ++ insert(6, e, 0, 0, 0, 0, 0);
54880 ++ /* replaces previous entry */
54881 ++ insert(6, f, 0, 0, 0, 0, 0);
54882 ++ insert(6, g, 0x24046800, 0, 0, 0, 32);
54883 ++ /* maskself is required */
54884 ++ insert(6, h, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 64);
54885 ++ insert(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 128);
54886 ++ insert(6, c, 0x24446800, 0x40e40800, 0xdeaebeef, 0xdefbeef, 128);
54887 ++ insert(6, b, 0x24446800, 0xf0e40800, 0xeeaebeef, 0, 98);
54888 ++ insert(4, g, 64, 15, 112, 0, 20);
54889 ++ /* maskself is required */
54890 ++ insert(4, h, 64, 15, 123, 211, 25);
54891 ++ insert(4, a, 10, 0, 0, 0, 25);
54892 ++ insert(4, b, 10, 0, 0, 128, 25);
54893 ++ insert(4, a, 10, 1, 0, 0, 30);
54894 ++ insert(4, b, 10, 1, 0, 4, 30);
54895 ++ insert(4, c, 10, 1, 0, 8, 29);
54896 ++ insert(4, d, 10, 1, 0, 16, 29);
54897 ++
54898 ++ if (IS_ENABLED(DEBUG_PRINT_TRIE_GRAPHVIZ)) {
54899 ++ print_tree(t.root4, 32);
54900 ++ print_tree(t.root6, 128);
54901 ++ }
54902 ++
54903 ++ success = true;
54904 ++
54905 ++ test(4, a, 192, 168, 4, 20);
54906 ++ test(4, a, 192, 168, 4, 0);
54907 ++ test(4, b, 192, 168, 4, 4);
54908 ++ test(4, c, 192, 168, 200, 182);
54909 ++ test(4, c, 192, 95, 5, 68);
54910 ++ test(4, e, 192, 95, 5, 96);
54911 ++ test(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543);
54912 ++ test(6, c, 0x26075300, 0x60006b00, 0, 0xc02e01ee);
54913 ++ test(6, f, 0x26075300, 0x60006b01, 0, 0);
54914 ++ test(6, g, 0x24046800, 0x40040806, 0, 0x1006);
54915 ++ test(6, g, 0x24046800, 0x40040806, 0x1234, 0x5678);
54916 ++ test(6, f, 0x240467ff, 0x40040806, 0x1234, 0x5678);
54917 ++ test(6, f, 0x24046801, 0x40040806, 0x1234, 0x5678);
54918 ++ test(6, h, 0x24046800, 0x40040800, 0x1234, 0x5678);
54919 ++ test(6, h, 0x24046800, 0x40040800, 0, 0);
54920 ++ test(6, h, 0x24046800, 0x40040800, 0x10101010, 0x10101010);
54921 ++ test(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef);
54922 ++ test(4, g, 64, 15, 116, 26);
54923 ++ test(4, g, 64, 15, 127, 3);
54924 ++ test(4, g, 64, 15, 123, 1);
54925 ++ test(4, h, 64, 15, 123, 128);
54926 ++ test(4, h, 64, 15, 123, 129);
54927 ++ test(4, a, 10, 0, 0, 52);
54928 ++ test(4, b, 10, 0, 0, 220);
54929 ++ test(4, a, 10, 1, 0, 2);
54930 ++ test(4, b, 10, 1, 0, 6);
54931 ++ test(4, c, 10, 1, 0, 10);
54932 ++ test(4, d, 10, 1, 0, 20);
54933 ++
54934 ++ insert(4, a, 1, 0, 0, 0, 32);
54935 ++ insert(4, a, 64, 0, 0, 0, 32);
54936 ++ insert(4, a, 128, 0, 0, 0, 32);
54937 ++ insert(4, a, 192, 0, 0, 0, 32);
54938 ++ insert(4, a, 255, 0, 0, 0, 32);
54939 ++ wg_allowedips_remove_by_peer(&t, a, &mutex);
54940 ++ test_negative(4, a, 1, 0, 0, 0);
54941 ++ test_negative(4, a, 64, 0, 0, 0);
54942 ++ test_negative(4, a, 128, 0, 0, 0);
54943 ++ test_negative(4, a, 192, 0, 0, 0);
54944 ++ test_negative(4, a, 255, 0, 0, 0);
54945 ++
54946 ++ wg_allowedips_free(&t, &mutex);
54947 ++ wg_allowedips_init(&t);
54948 ++ insert(4, a, 192, 168, 0, 0, 16);
54949 ++ insert(4, a, 192, 168, 0, 0, 24);
54950 ++ wg_allowedips_remove_by_peer(&t, a, &mutex);
54951 ++ test_negative(4, a, 192, 168, 0, 1);
54952 ++
54953 ++ /* These will hit the WARN_ON(len >= 128) in free_node if something
54954 ++ * goes wrong.
54955 ++ */
54956 ++ for (i = 0; i < 128; ++i) {
54957 ++ part = cpu_to_be64(~(1LLU << (i % 64)));
54958 ++ memset(&ip, 0xff, 16);
54959 ++ memcpy((u8 *)&ip + (i < 64) * 8, &part, 8);
54960 ++ wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex);
54961 ++ }
54962 ++
54963 ++ wg_allowedips_free(&t, &mutex);
54964 ++
54965 ++ wg_allowedips_init(&t);
54966 ++ insert(4, a, 192, 95, 5, 93, 27);
54967 ++ insert(6, a, 0x26075300, 0x60006b00, 0, 0xc05f0543, 128);
54968 ++ insert(4, a, 10, 1, 0, 20, 29);
54969 ++ insert(6, a, 0x26075300, 0x6d8a6bf8, 0xdab1f1df, 0xc05f1523, 83);
54970 ++ insert(6, a, 0x26075300, 0x6d8a6bf8, 0xdab1f1df, 0xc05f1523, 21);
54971 ++ list_for_each_entry(iter_node, &a->allowedips_list, peer_list) {
54972 ++ u8 cidr, ip[16] __aligned(__alignof(u64));
54973 ++ int family = wg_allowedips_read_node(iter_node, ip, &cidr);
54974 ++
54975 ++ count++;
54976 ++
54977 ++ if (cidr == 27 && family == AF_INET &&
54978 ++ !memcmp(ip, ip4(192, 95, 5, 64), sizeof(struct in_addr)))
54979 ++ found_a = true;
54980 ++ else if (cidr == 128 && family == AF_INET6 &&
54981 ++ !memcmp(ip, ip6(0x26075300, 0x60006b00, 0, 0xc05f0543),
54982 ++ sizeof(struct in6_addr)))
54983 ++ found_b = true;
54984 ++ else if (cidr == 29 && family == AF_INET &&
54985 ++ !memcmp(ip, ip4(10, 1, 0, 16), sizeof(struct in_addr)))
54986 ++ found_c = true;
54987 ++ else if (cidr == 83 && family == AF_INET6 &&
54988 ++ !memcmp(ip, ip6(0x26075300, 0x6d8a6bf8, 0xdab1e000, 0),
54989 ++ sizeof(struct in6_addr)))
54990 ++ found_d = true;
54991 ++ else if (cidr == 21 && family == AF_INET6 &&
54992 ++ !memcmp(ip, ip6(0x26075000, 0, 0, 0),
54993 ++ sizeof(struct in6_addr)))
54994 ++ found_e = true;
54995 ++ else
54996 ++ found_other = true;
54997 ++ }
54998 ++ test_boolean(count == 5);
54999 ++ test_boolean(found_a);
55000 ++ test_boolean(found_b);
55001 ++ test_boolean(found_c);
55002 ++ test_boolean(found_d);
55003 ++ test_boolean(found_e);
55004 ++ test_boolean(!found_other);
55005 ++
55006 ++ if (IS_ENABLED(DEBUG_RANDOM_TRIE) && success)
55007 ++ success = randomized_test();
55008 ++
55009 ++ if (success)
55010 ++ pr_info("allowedips self-tests: pass\n");
55011 ++
55012 ++free:
55013 ++ wg_allowedips_free(&t, &mutex);
55014 ++ kfree(a);
55015 ++ kfree(b);
55016 ++ kfree(c);
55017 ++ kfree(d);
55018 ++ kfree(e);
55019 ++ kfree(f);
55020 ++ kfree(g);
55021 ++ kfree(h);
55022 ++ mutex_unlock(&mutex);
55023 ++
55024 ++ return success;
55025 ++}
55026 ++
55027 ++#undef test_negative
55028 ++#undef test
55029 ++#undef remove
55030 ++#undef insert
55031 ++#undef init_peer
55032 ++
55033 ++#endif
55034 +diff --git a/drivers/net/wireguard/selftest/counter.c b/drivers/net/wireguard/selftest/counter.c
55035 +new file mode 100644
55036 +index 000000000000..f4fbb9072ed7
55037 +--- /dev/null
55038 ++++ b/drivers/net/wireguard/selftest/counter.c
55039 +@@ -0,0 +1,104 @@
55040 ++// SPDX-License-Identifier: GPL-2.0
55041 ++/*
55042 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
55043 ++ */
55044 ++
55045 ++#ifdef DEBUG
55046 ++bool __init wg_packet_counter_selftest(void)
55047 ++{
55048 ++ unsigned int test_num = 0, i;
55049 ++ union noise_counter counter;
55050 ++ bool success = true;
55051 ++
55052 ++#define T_INIT do { \
55053 ++ memset(&counter, 0, sizeof(union noise_counter)); \
55054 ++ spin_lock_init(&counter.receive.lock); \
55055 ++ } while (0)
55056 ++#define T_LIM (COUNTER_WINDOW_SIZE + 1)
55057 ++#define T(n, v) do { \
55058 ++ ++test_num; \
55059 ++ if (counter_validate(&counter, n) != (v)) { \
55060 ++ pr_err("nonce counter self-test %u: FAIL\n", \
55061 ++ test_num); \
55062 ++ success = false; \
55063 ++ } \
55064 ++ } while (0)
55065 ++
55066 ++ T_INIT;
55067 ++ /* 1 */ T(0, true);
55068 ++ /* 2 */ T(1, true);
55069 ++ /* 3 */ T(1, false);
55070 ++ /* 4 */ T(9, true);
55071 ++ /* 5 */ T(8, true);
55072 ++ /* 6 */ T(7, true);
55073 ++ /* 7 */ T(7, false);
55074 ++ /* 8 */ T(T_LIM, true);
55075 ++ /* 9 */ T(T_LIM - 1, true);
55076 ++ /* 10 */ T(T_LIM - 1, false);
55077 ++ /* 11 */ T(T_LIM - 2, true);
55078 ++ /* 12 */ T(2, true);
55079 ++ /* 13 */ T(2, false);
55080 ++ /* 14 */ T(T_LIM + 16, true);
55081 ++ /* 15 */ T(3, false);
55082 ++ /* 16 */ T(T_LIM + 16, false);
55083 ++ /* 17 */ T(T_LIM * 4, true);
55084 ++ /* 18 */ T(T_LIM * 4 - (T_LIM - 1), true);
55085 ++ /* 19 */ T(10, false);
55086 ++ /* 20 */ T(T_LIM * 4 - T_LIM, false);
55087 ++ /* 21 */ T(T_LIM * 4 - (T_LIM + 1), false);
55088 ++ /* 22 */ T(T_LIM * 4 - (T_LIM - 2), true);
55089 ++ /* 23 */ T(T_LIM * 4 + 1 - T_LIM, false);
55090 ++ /* 24 */ T(0, false);
55091 ++ /* 25 */ T(REJECT_AFTER_MESSAGES, false);
55092 ++ /* 26 */ T(REJECT_AFTER_MESSAGES - 1, true);
55093 ++ /* 27 */ T(REJECT_AFTER_MESSAGES, false);
55094 ++ /* 28 */ T(REJECT_AFTER_MESSAGES - 1, false);
55095 ++ /* 29 */ T(REJECT_AFTER_MESSAGES - 2, true);
55096 ++ /* 30 */ T(REJECT_AFTER_MESSAGES + 1, false);
55097 ++ /* 31 */ T(REJECT_AFTER_MESSAGES + 2, false);
55098 ++ /* 32 */ T(REJECT_AFTER_MESSAGES - 2, false);
55099 ++ /* 33 */ T(REJECT_AFTER_MESSAGES - 3, true);
55100 ++ /* 34 */ T(0, false);
55101 ++
55102 ++ T_INIT;
55103 ++ for (i = 1; i <= COUNTER_WINDOW_SIZE; ++i)
55104 ++ T(i, true);
55105 ++ T(0, true);
55106 ++ T(0, false);
55107 ++
55108 ++ T_INIT;
55109 ++ for (i = 2; i <= COUNTER_WINDOW_SIZE + 1; ++i)
55110 ++ T(i, true);
55111 ++ T(1, true);
55112 ++ T(0, false);
55113 ++
55114 ++ T_INIT;
55115 ++ for (i = COUNTER_WINDOW_SIZE + 1; i-- > 0;)
55116 ++ T(i, true);
55117 ++
55118 ++ T_INIT;
55119 ++ for (i = COUNTER_WINDOW_SIZE + 2; i-- > 1;)
55120 ++ T(i, true);
55121 ++ T(0, false);
55122 ++
55123 ++ T_INIT;
55124 ++ for (i = COUNTER_WINDOW_SIZE + 1; i-- > 1;)
55125 ++ T(i, true);
55126 ++ T(COUNTER_WINDOW_SIZE + 1, true);
55127 ++ T(0, false);
55128 ++
55129 ++ T_INIT;
55130 ++ for (i = COUNTER_WINDOW_SIZE + 1; i-- > 1;)
55131 ++ T(i, true);
55132 ++ T(0, true);
55133 ++ T(COUNTER_WINDOW_SIZE + 1, true);
55134 ++
55135 ++#undef T
55136 ++#undef T_LIM
55137 ++#undef T_INIT
55138 ++
55139 ++ if (success)
55140 ++ pr_info("nonce counter self-tests: pass\n");
55141 ++ return success;
55142 ++}
55143 ++#endif
55144 +diff --git a/drivers/net/wireguard/selftest/ratelimiter.c b/drivers/net/wireguard/selftest/ratelimiter.c
55145 +new file mode 100644
55146 +index 000000000000..bcd6462e4540
55147 +--- /dev/null
55148 ++++ b/drivers/net/wireguard/selftest/ratelimiter.c
55149 +@@ -0,0 +1,226 @@
55150 ++// SPDX-License-Identifier: GPL-2.0
55151 ++/*
55152 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
55153 ++ */
55154 ++
55155 ++#ifdef DEBUG
55156 ++
55157 ++#include <linux/jiffies.h>
55158 ++
55159 ++static const struct {
55160 ++ bool result;
55161 ++ unsigned int msec_to_sleep_before;
55162 ++} expected_results[] __initconst = {
55163 ++ [0 ... PACKETS_BURSTABLE - 1] = { true, 0 },
55164 ++ [PACKETS_BURSTABLE] = { false, 0 },
55165 ++ [PACKETS_BURSTABLE + 1] = { true, MSEC_PER_SEC / PACKETS_PER_SECOND },
55166 ++ [PACKETS_BURSTABLE + 2] = { false, 0 },
55167 ++ [PACKETS_BURSTABLE + 3] = { true, (MSEC_PER_SEC / PACKETS_PER_SECOND) * 2 },
55168 ++ [PACKETS_BURSTABLE + 4] = { true, 0 },
55169 ++ [PACKETS_BURSTABLE + 5] = { false, 0 }
55170 ++};
55171 ++
55172 ++static __init unsigned int maximum_jiffies_at_index(int index)
55173 ++{
55174 ++ unsigned int total_msecs = 2 * MSEC_PER_SEC / PACKETS_PER_SECOND / 3;
55175 ++ int i;
55176 ++
55177 ++ for (i = 0; i <= index; ++i)
55178 ++ total_msecs += expected_results[i].msec_to_sleep_before;
55179 ++ return msecs_to_jiffies(total_msecs);
55180 ++}
55181 ++
55182 ++static __init int timings_test(struct sk_buff *skb4, struct iphdr *hdr4,
55183 ++ struct sk_buff *skb6, struct ipv6hdr *hdr6,
55184 ++ int *test)
55185 ++{
55186 ++ unsigned long loop_start_time;
55187 ++ int i;
55188 ++
55189 ++ wg_ratelimiter_gc_entries(NULL);
55190 ++ rcu_barrier();
55191 ++ loop_start_time = jiffies;
55192 ++
55193 ++ for (i = 0; i < ARRAY_SIZE(expected_results); ++i) {
55194 ++ if (expected_results[i].msec_to_sleep_before)
55195 ++ msleep(expected_results[i].msec_to_sleep_before);
55196 ++
55197 ++ if (time_is_before_jiffies(loop_start_time +
55198 ++ maximum_jiffies_at_index(i)))
55199 ++ return -ETIMEDOUT;
55200 ++ if (wg_ratelimiter_allow(skb4, &init_net) !=
55201 ++ expected_results[i].result)
55202 ++ return -EXFULL;
55203 ++ ++(*test);
55204 ++
55205 ++ hdr4->saddr = htonl(ntohl(hdr4->saddr) + i + 1);
55206 ++ if (time_is_before_jiffies(loop_start_time +
55207 ++ maximum_jiffies_at_index(i)))
55208 ++ return -ETIMEDOUT;
55209 ++ if (!wg_ratelimiter_allow(skb4, &init_net))
55210 ++ return -EXFULL;
55211 ++ ++(*test);
55212 ++
55213 ++ hdr4->saddr = htonl(ntohl(hdr4->saddr) - i - 1);
55214 ++
55215 ++#if IS_ENABLED(CONFIG_IPV6)
55216 ++ hdr6->saddr.in6_u.u6_addr32[2] = htonl(i);
55217 ++ hdr6->saddr.in6_u.u6_addr32[3] = htonl(i);
55218 ++ if (time_is_before_jiffies(loop_start_time +
55219 ++ maximum_jiffies_at_index(i)))
55220 ++ return -ETIMEDOUT;
55221 ++ if (wg_ratelimiter_allow(skb6, &init_net) !=
55222 ++ expected_results[i].result)
55223 ++ return -EXFULL;
55224 ++ ++(*test);
55225 ++
55226 ++ hdr6->saddr.in6_u.u6_addr32[0] =
55227 ++ htonl(ntohl(hdr6->saddr.in6_u.u6_addr32[0]) + i + 1);
55228 ++ if (time_is_before_jiffies(loop_start_time +
55229 ++ maximum_jiffies_at_index(i)))
55230 ++ return -ETIMEDOUT;
55231 ++ if (!wg_ratelimiter_allow(skb6, &init_net))
55232 ++ return -EXFULL;
55233 ++ ++(*test);
55234 ++
55235 ++ hdr6->saddr.in6_u.u6_addr32[0] =
55236 ++ htonl(ntohl(hdr6->saddr.in6_u.u6_addr32[0]) - i - 1);
55237 ++
55238 ++ if (time_is_before_jiffies(loop_start_time +
55239 ++ maximum_jiffies_at_index(i)))
55240 ++ return -ETIMEDOUT;
55241 ++#endif
55242 ++ }
55243 ++ return 0;
55244 ++}
55245 ++
55246 ++static __init int capacity_test(struct sk_buff *skb4, struct iphdr *hdr4,
55247 ++ int *test)
55248 ++{
55249 ++ int i;
55250 ++
55251 ++ wg_ratelimiter_gc_entries(NULL);
55252 ++ rcu_barrier();
55253 ++
55254 ++ if (atomic_read(&total_entries))
55255 ++ return -EXFULL;
55256 ++ ++(*test);
55257 ++
55258 ++ for (i = 0; i <= max_entries; ++i) {
55259 ++ hdr4->saddr = htonl(i);
55260 ++ if (wg_ratelimiter_allow(skb4, &init_net) != (i != max_entries))
55261 ++ return -EXFULL;
55262 ++ ++(*test);
55263 ++ }
55264 ++ return 0;
55265 ++}
55266 ++
55267 ++bool __init wg_ratelimiter_selftest(void)
55268 ++{
55269 ++ enum { TRIALS_BEFORE_GIVING_UP = 5000 };
55270 ++ bool success = false;
55271 ++ int test = 0, trials;
55272 ++ struct sk_buff *skb4, *skb6;
55273 ++ struct iphdr *hdr4;
55274 ++ struct ipv6hdr *hdr6;
55275 ++
55276 ++ if (IS_ENABLED(CONFIG_KASAN) || IS_ENABLED(CONFIG_UBSAN))
55277 ++ return true;
55278 ++
55279 ++ BUILD_BUG_ON(MSEC_PER_SEC % PACKETS_PER_SECOND != 0);
55280 ++
55281 ++ if (wg_ratelimiter_init())
55282 ++ goto out;
55283 ++ ++test;
55284 ++ if (wg_ratelimiter_init()) {
55285 ++ wg_ratelimiter_uninit();
55286 ++ goto out;
55287 ++ }
55288 ++ ++test;
55289 ++ if (wg_ratelimiter_init()) {
55290 ++ wg_ratelimiter_uninit();
55291 ++ wg_ratelimiter_uninit();
55292 ++ goto out;
55293 ++ }
55294 ++ ++test;
55295 ++
55296 ++ skb4 = alloc_skb(sizeof(struct iphdr), GFP_KERNEL);
55297 ++ if (unlikely(!skb4))
55298 ++ goto err_nofree;
55299 ++ skb4->protocol = htons(ETH_P_IP);
55300 ++ hdr4 = (struct iphdr *)skb_put(skb4, sizeof(*hdr4));
55301 ++ hdr4->saddr = htonl(8182);
55302 ++ skb_reset_network_header(skb4);
55303 ++ ++test;
55304 ++
55305 ++#if IS_ENABLED(CONFIG_IPV6)
55306 ++ skb6 = alloc_skb(sizeof(struct ipv6hdr), GFP_KERNEL);
55307 ++ if (unlikely(!skb6)) {
55308 ++ kfree_skb(skb4);
55309 ++ goto err_nofree;
55310 ++ }
55311 ++ skb6->protocol = htons(ETH_P_IPV6);
55312 ++ hdr6 = (struct ipv6hdr *)skb_put(skb6, sizeof(*hdr6));
55313 ++ hdr6->saddr.in6_u.u6_addr32[0] = htonl(1212);
55314 ++ hdr6->saddr.in6_u.u6_addr32[1] = htonl(289188);
55315 ++ skb_reset_network_header(skb6);
55316 ++ ++test;
55317 ++#endif
55318 ++
55319 ++ for (trials = TRIALS_BEFORE_GIVING_UP;;) {
55320 ++ int test_count = 0, ret;
55321 ++
55322 ++ ret = timings_test(skb4, hdr4, skb6, hdr6, &test_count);
55323 ++ if (ret == -ETIMEDOUT) {
55324 ++ if (!trials--) {
55325 ++ test += test_count;
55326 ++ goto err;
55327 ++ }
55328 ++ msleep(500);
55329 ++ continue;
55330 ++ } else if (ret < 0) {
55331 ++ test += test_count;
55332 ++ goto err;
55333 ++ } else {
55334 ++ test += test_count;
55335 ++ break;
55336 ++ }
55337 ++ }
55338 ++
55339 ++ for (trials = TRIALS_BEFORE_GIVING_UP;;) {
55340 ++ int test_count = 0;
55341 ++
55342 ++ if (capacity_test(skb4, hdr4, &test_count) < 0) {
55343 ++ if (!trials--) {
55344 ++ test += test_count;
55345 ++ goto err;
55346 ++ }
55347 ++ msleep(50);
55348 ++ continue;
55349 ++ }
55350 ++ test += test_count;
55351 ++ break;
55352 ++ }
55353 ++
55354 ++ success = true;
55355 ++
55356 ++err:
55357 ++ kfree_skb(skb4);
55358 ++#if IS_ENABLED(CONFIG_IPV6)
55359 ++ kfree_skb(skb6);
55360 ++#endif
55361 ++err_nofree:
55362 ++ wg_ratelimiter_uninit();
55363 ++ wg_ratelimiter_uninit();
55364 ++ wg_ratelimiter_uninit();
55365 ++ /* Uninit one extra time to check underflow detection. */
55366 ++ wg_ratelimiter_uninit();
55367 ++out:
55368 ++ if (success)
55369 ++ pr_info("ratelimiter self-tests: pass\n");
55370 ++ else
55371 ++ pr_err("ratelimiter self-test %d: FAIL\n", test);
55372 ++
55373 ++ return success;
55374 ++}
55375 ++#endif
55376 +diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c
55377 +new file mode 100644
55378 +index 000000000000..c13260563446
55379 +--- /dev/null
55380 ++++ b/drivers/net/wireguard/send.c
55381 +@@ -0,0 +1,413 @@
55382 ++// SPDX-License-Identifier: GPL-2.0
55383 ++/*
55384 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
55385 ++ */
55386 ++
55387 ++#include "queueing.h"
55388 ++#include "timers.h"
55389 ++#include "device.h"
55390 ++#include "peer.h"
55391 ++#include "socket.h"
55392 ++#include "messages.h"
55393 ++#include "cookie.h"
55394 ++
55395 ++#include <linux/uio.h>
55396 ++#include <linux/inetdevice.h>
55397 ++#include <linux/socket.h>
55398 ++#include <net/ip_tunnels.h>
55399 ++#include <net/udp.h>
55400 ++#include <net/sock.h>
55401 ++
55402 ++static void wg_packet_send_handshake_initiation(struct wg_peer *peer)
55403 ++{
55404 ++ struct message_handshake_initiation packet;
55405 ++
55406 ++ if (!wg_birthdate_has_expired(atomic64_read(&peer->last_sent_handshake),
55407 ++ REKEY_TIMEOUT))
55408 ++ return; /* This function is rate limited. */
55409 ++
55410 ++ atomic64_set(&peer->last_sent_handshake, ktime_get_coarse_boottime_ns());
55411 ++ net_dbg_ratelimited("%s: Sending handshake initiation to peer %llu (%pISpfsc)\n",
55412 ++ peer->device->dev->name, peer->internal_id,
55413 ++ &peer->endpoint.addr);
55414 ++
55415 ++ if (wg_noise_handshake_create_initiation(&packet, &peer->handshake)) {
55416 ++ wg_cookie_add_mac_to_packet(&packet, sizeof(packet), peer);
55417 ++ wg_timers_any_authenticated_packet_traversal(peer);
55418 ++ wg_timers_any_authenticated_packet_sent(peer);
55419 ++ atomic64_set(&peer->last_sent_handshake,
55420 ++ ktime_get_coarse_boottime_ns());
55421 ++ wg_socket_send_buffer_to_peer(peer, &packet, sizeof(packet),
55422 ++ HANDSHAKE_DSCP);
55423 ++ wg_timers_handshake_initiated(peer);
55424 ++ }
55425 ++}
55426 ++
55427 ++void wg_packet_handshake_send_worker(struct work_struct *work)
55428 ++{
55429 ++ struct wg_peer *peer = container_of(work, struct wg_peer,
55430 ++ transmit_handshake_work);
55431 ++
55432 ++ wg_packet_send_handshake_initiation(peer);
55433 ++ wg_peer_put(peer);
55434 ++}
55435 ++
55436 ++void wg_packet_send_queued_handshake_initiation(struct wg_peer *peer,
55437 ++ bool is_retry)
55438 ++{
55439 ++ if (!is_retry)
55440 ++ peer->timer_handshake_attempts = 0;
55441 ++
55442 ++ rcu_read_lock_bh();
55443 ++ /* We check last_sent_handshake here in addition to the actual function
55444 ++ * we're queueing up, so that we don't queue things if not strictly
55445 ++ * necessary:
55446 ++ */
55447 ++ if (!wg_birthdate_has_expired(atomic64_read(&peer->last_sent_handshake),
55448 ++ REKEY_TIMEOUT) ||
55449 ++ unlikely(READ_ONCE(peer->is_dead)))
55450 ++ goto out;
55451 ++
55452 ++ wg_peer_get(peer);
55453 ++ /* Queues up calling packet_send_queued_handshakes(peer), where we do a
55454 ++ * peer_put(peer) after:
55455 ++ */
55456 ++ if (!queue_work(peer->device->handshake_send_wq,
55457 ++ &peer->transmit_handshake_work))
55458 ++ /* If the work was already queued, we want to drop the
55459 ++ * extra reference:
55460 ++ */
55461 ++ wg_peer_put(peer);
55462 ++out:
55463 ++ rcu_read_unlock_bh();
55464 ++}
55465 ++
55466 ++void wg_packet_send_handshake_response(struct wg_peer *peer)
55467 ++{
55468 ++ struct message_handshake_response packet;
55469 ++
55470 ++ atomic64_set(&peer->last_sent_handshake, ktime_get_coarse_boottime_ns());
55471 ++ net_dbg_ratelimited("%s: Sending handshake response to peer %llu (%pISpfsc)\n",
55472 ++ peer->device->dev->name, peer->internal_id,
55473 ++ &peer->endpoint.addr);
55474 ++
55475 ++ if (wg_noise_handshake_create_response(&packet, &peer->handshake)) {
55476 ++ wg_cookie_add_mac_to_packet(&packet, sizeof(packet), peer);
55477 ++ if (wg_noise_handshake_begin_session(&peer->handshake,
55478 ++ &peer->keypairs)) {
55479 ++ wg_timers_session_derived(peer);
55480 ++ wg_timers_any_authenticated_packet_traversal(peer);
55481 ++ wg_timers_any_authenticated_packet_sent(peer);
55482 ++ atomic64_set(&peer->last_sent_handshake,
55483 ++ ktime_get_coarse_boottime_ns());
55484 ++ wg_socket_send_buffer_to_peer(peer, &packet,
55485 ++ sizeof(packet),
55486 ++ HANDSHAKE_DSCP);
55487 ++ }
55488 ++ }
55489 ++}
55490 ++
55491 ++void wg_packet_send_handshake_cookie(struct wg_device *wg,
55492 ++ struct sk_buff *initiating_skb,
55493 ++ __le32 sender_index)
55494 ++{
55495 ++ struct message_handshake_cookie packet;
55496 ++
55497 ++ net_dbg_skb_ratelimited("%s: Sending cookie response for denied handshake message for %pISpfsc\n",
55498 ++ wg->dev->name, initiating_skb);
55499 ++ wg_cookie_message_create(&packet, initiating_skb, sender_index,
55500 ++ &wg->cookie_checker);
55501 ++ wg_socket_send_buffer_as_reply_to_skb(wg, initiating_skb, &packet,
55502 ++ sizeof(packet));
55503 ++}
55504 ++
55505 ++static void keep_key_fresh(struct wg_peer *peer)
55506 ++{
55507 ++ struct noise_keypair *keypair;
55508 ++ bool send = false;
55509 ++
55510 ++ rcu_read_lock_bh();
55511 ++ keypair = rcu_dereference_bh(peer->keypairs.current_keypair);
55512 ++ if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) &&
55513 ++ (unlikely(atomic64_read(&keypair->sending.counter.counter) >
55514 ++ REKEY_AFTER_MESSAGES) ||
55515 ++ (keypair->i_am_the_initiator &&
55516 ++ unlikely(wg_birthdate_has_expired(keypair->sending.birthdate,
55517 ++ REKEY_AFTER_TIME)))))
55518 ++ send = true;
55519 ++ rcu_read_unlock_bh();
55520 ++
55521 ++ if (send)
55522 ++ wg_packet_send_queued_handshake_initiation(peer, false);
55523 ++}
55524 ++
55525 ++static unsigned int calculate_skb_padding(struct sk_buff *skb)
55526 ++{
55527 ++ /* We do this modulo business with the MTU, just in case the networking
55528 ++ * layer gives us a packet that's bigger than the MTU. In that case, we
55529 ++ * wouldn't want the final subtraction to overflow in the case of the
55530 ++ * padded_size being clamped.
55531 ++ */
55532 ++ unsigned int last_unit = skb->len % PACKET_CB(skb)->mtu;
55533 ++ unsigned int padded_size = ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE);
55534 ++
55535 ++ if (padded_size > PACKET_CB(skb)->mtu)
55536 ++ padded_size = PACKET_CB(skb)->mtu;
55537 ++ return padded_size - last_unit;
55538 ++}
55539 ++
55540 ++static bool encrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair)
55541 ++{
55542 ++ unsigned int padding_len, plaintext_len, trailer_len;
55543 ++ struct scatterlist sg[MAX_SKB_FRAGS + 8];
55544 ++ struct message_data *header;
55545 ++ struct sk_buff *trailer;
55546 ++ int num_frags;
55547 ++
55548 ++ /* Calculate lengths. */
55549 ++ padding_len = calculate_skb_padding(skb);
55550 ++ trailer_len = padding_len + noise_encrypted_len(0);
55551 ++ plaintext_len = skb->len + padding_len;
55552 ++
55553 ++ /* Expand data section to have room for padding and auth tag. */
55554 ++ num_frags = skb_cow_data(skb, trailer_len, &trailer);
55555 ++ if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg)))
55556 ++ return false;
55557 ++
55558 ++ /* Set the padding to zeros, and make sure it and the auth tag are part
55559 ++ * of the skb.
55560 ++ */
55561 ++ memset(skb_tail_pointer(trailer), 0, padding_len);
55562 ++
55563 ++ /* Expand head section to have room for our header and the network
55564 ++ * stack's headers.
55565 ++ */
55566 ++ if (unlikely(skb_cow_head(skb, DATA_PACKET_HEAD_ROOM) < 0))
55567 ++ return false;
55568 ++
55569 ++ /* Finalize checksum calculation for the inner packet, if required. */
55570 ++ if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL &&
55571 ++ skb_checksum_help(skb)))
55572 ++ return false;
55573 ++
55574 ++ /* Only after checksumming can we safely add on the padding at the end
55575 ++ * and the header.
55576 ++ */
55577 ++ skb_set_inner_network_header(skb, 0);
55578 ++ header = (struct message_data *)skb_push(skb, sizeof(*header));
55579 ++ header->header.type = cpu_to_le32(MESSAGE_DATA);
55580 ++ header->key_idx = keypair->remote_index;
55581 ++ header->counter = cpu_to_le64(PACKET_CB(skb)->nonce);
55582 ++ pskb_put(skb, trailer, trailer_len);
55583 ++
55584 ++ /* Now we can encrypt the scattergather segments */
55585 ++ sg_init_table(sg, num_frags);
55586 ++ if (skb_to_sgvec(skb, sg, sizeof(struct message_data),
55587 ++ noise_encrypted_len(plaintext_len)) <= 0)
55588 ++ return false;
55589 ++ return chacha20poly1305_encrypt_sg_inplace(sg, plaintext_len, NULL, 0,
55590 ++ PACKET_CB(skb)->nonce,
55591 ++ keypair->sending.key);
55592 ++}
55593 ++
55594 ++void wg_packet_send_keepalive(struct wg_peer *peer)
55595 ++{
55596 ++ struct sk_buff *skb;
55597 ++
55598 ++ if (skb_queue_empty(&peer->staged_packet_queue)) {
55599 ++ skb = alloc_skb(DATA_PACKET_HEAD_ROOM + MESSAGE_MINIMUM_LENGTH,
55600 ++ GFP_ATOMIC);
55601 ++ if (unlikely(!skb))
55602 ++ return;
55603 ++ skb_reserve(skb, DATA_PACKET_HEAD_ROOM);
55604 ++ skb->dev = peer->device->dev;
55605 ++ PACKET_CB(skb)->mtu = skb->dev->mtu;
55606 ++ skb_queue_tail(&peer->staged_packet_queue, skb);
55607 ++ net_dbg_ratelimited("%s: Sending keepalive packet to peer %llu (%pISpfsc)\n",
55608 ++ peer->device->dev->name, peer->internal_id,
55609 ++ &peer->endpoint.addr);
55610 ++ }
55611 ++
55612 ++ wg_packet_send_staged_packets(peer);
55613 ++}
55614 ++
55615 ++static void wg_packet_create_data_done(struct sk_buff *first,
55616 ++ struct wg_peer *peer)
55617 ++{
55618 ++ struct sk_buff *skb, *next;
55619 ++ bool is_keepalive, data_sent = false;
55620 ++
55621 ++ wg_timers_any_authenticated_packet_traversal(peer);
55622 ++ wg_timers_any_authenticated_packet_sent(peer);
55623 ++ skb_list_walk_safe(first, skb, next) {
55624 ++ is_keepalive = skb->len == message_data_len(0);
55625 ++ if (likely(!wg_socket_send_skb_to_peer(peer, skb,
55626 ++ PACKET_CB(skb)->ds) && !is_keepalive))
55627 ++ data_sent = true;
55628 ++ }
55629 ++
55630 ++ if (likely(data_sent))
55631 ++ wg_timers_data_sent(peer);
55632 ++
55633 ++ keep_key_fresh(peer);
55634 ++}
55635 ++
55636 ++void wg_packet_tx_worker(struct work_struct *work)
55637 ++{
55638 ++ struct crypt_queue *queue = container_of(work, struct crypt_queue,
55639 ++ work);
55640 ++ struct noise_keypair *keypair;
55641 ++ enum packet_state state;
55642 ++ struct sk_buff *first;
55643 ++ struct wg_peer *peer;
55644 ++
55645 ++ while ((first = __ptr_ring_peek(&queue->ring)) != NULL &&
55646 ++ (state = atomic_read_acquire(&PACKET_CB(first)->state)) !=
55647 ++ PACKET_STATE_UNCRYPTED) {
55648 ++ __ptr_ring_discard_one(&queue->ring);
55649 ++ peer = PACKET_PEER(first);
55650 ++ keypair = PACKET_CB(first)->keypair;
55651 ++
55652 ++ if (likely(state == PACKET_STATE_CRYPTED))
55653 ++ wg_packet_create_data_done(first, peer);
55654 ++ else
55655 ++ kfree_skb_list(first);
55656 ++
55657 ++ wg_noise_keypair_put(keypair, false);
55658 ++ wg_peer_put(peer);
55659 ++ }
55660 ++}
55661 ++
55662 ++void wg_packet_encrypt_worker(struct work_struct *work)
55663 ++{
55664 ++ struct crypt_queue *queue = container_of(work, struct multicore_worker,
55665 ++ work)->ptr;
55666 ++ struct sk_buff *first, *skb, *next;
55667 ++
55668 ++ while ((first = ptr_ring_consume_bh(&queue->ring)) != NULL) {
55669 ++ enum packet_state state = PACKET_STATE_CRYPTED;
55670 ++
55671 ++ skb_list_walk_safe(first, skb, next) {
55672 ++ if (likely(encrypt_packet(skb,
55673 ++ PACKET_CB(first)->keypair))) {
55674 ++ wg_reset_packet(skb);
55675 ++ } else {
55676 ++ state = PACKET_STATE_DEAD;
55677 ++ break;
55678 ++ }
55679 ++ }
55680 ++ wg_queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first,
55681 ++ state);
55682 ++
55683 ++ }
55684 ++}
55685 ++
55686 ++static void wg_packet_create_data(struct sk_buff *first)
55687 ++{
55688 ++ struct wg_peer *peer = PACKET_PEER(first);
55689 ++ struct wg_device *wg = peer->device;
55690 ++ int ret = -EINVAL;
55691 ++
55692 ++ rcu_read_lock_bh();
55693 ++ if (unlikely(READ_ONCE(peer->is_dead)))
55694 ++ goto err;
55695 ++
55696 ++ ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue,
55697 ++ &peer->tx_queue, first,
55698 ++ wg->packet_crypt_wq,
55699 ++ &wg->encrypt_queue.last_cpu);
55700 ++ if (unlikely(ret == -EPIPE))
55701 ++ wg_queue_enqueue_per_peer(&peer->tx_queue, first,
55702 ++ PACKET_STATE_DEAD);
55703 ++err:
55704 ++ rcu_read_unlock_bh();
55705 ++ if (likely(!ret || ret == -EPIPE))
55706 ++ return;
55707 ++ wg_noise_keypair_put(PACKET_CB(first)->keypair, false);
55708 ++ wg_peer_put(peer);
55709 ++ kfree_skb_list(first);
55710 ++}
55711 ++
55712 ++void wg_packet_purge_staged_packets(struct wg_peer *peer)
55713 ++{
55714 ++ spin_lock_bh(&peer->staged_packet_queue.lock);
55715 ++ peer->device->dev->stats.tx_dropped += peer->staged_packet_queue.qlen;
55716 ++ __skb_queue_purge(&peer->staged_packet_queue);
55717 ++ spin_unlock_bh(&peer->staged_packet_queue.lock);
55718 ++}
55719 ++
55720 ++void wg_packet_send_staged_packets(struct wg_peer *peer)
55721 ++{
55722 ++ struct noise_symmetric_key *key;
55723 ++ struct noise_keypair *keypair;
55724 ++ struct sk_buff_head packets;
55725 ++ struct sk_buff *skb;
55726 ++
55727 ++ /* Steal the current queue into our local one. */
55728 ++ __skb_queue_head_init(&packets);
55729 ++ spin_lock_bh(&peer->staged_packet_queue.lock);
55730 ++ skb_queue_splice_init(&peer->staged_packet_queue, &packets);
55731 ++ spin_unlock_bh(&peer->staged_packet_queue.lock);
55732 ++ if (unlikely(skb_queue_empty(&packets)))
55733 ++ return;
55734 ++
55735 ++ /* First we make sure we have a valid reference to a valid key. */
55736 ++ rcu_read_lock_bh();
55737 ++ keypair = wg_noise_keypair_get(
55738 ++ rcu_dereference_bh(peer->keypairs.current_keypair));
55739 ++ rcu_read_unlock_bh();
55740 ++ if (unlikely(!keypair))
55741 ++ goto out_nokey;
55742 ++ key = &keypair->sending;
55743 ++ if (unlikely(!READ_ONCE(key->is_valid)))
55744 ++ goto out_nokey;
55745 ++ if (unlikely(wg_birthdate_has_expired(key->birthdate,
55746 ++ REJECT_AFTER_TIME)))
55747 ++ goto out_invalid;
55748 ++
55749 ++ /* After we know we have a somewhat valid key, we now try to assign
55750 ++ * nonces to all of the packets in the queue. If we can't assign nonces
55751 ++ * for all of them, we just consider it a failure and wait for the next
55752 ++ * handshake.
55753 ++ */
55754 ++ skb_queue_walk(&packets, skb) {
55755 ++ /* 0 for no outer TOS: no leak. TODO: at some later point, we
55756 ++ * might consider using flowi->tos as outer instead.
55757 ++ */
55758 ++ PACKET_CB(skb)->ds = ip_tunnel_ecn_encap(0, ip_hdr(skb), skb);
55759 ++ PACKET_CB(skb)->nonce =
55760 ++ atomic64_inc_return(&key->counter.counter) - 1;
55761 ++ if (unlikely(PACKET_CB(skb)->nonce >= REJECT_AFTER_MESSAGES))
55762 ++ goto out_invalid;
55763 ++ }
55764 ++
55765 ++ packets.prev->next = NULL;
55766 ++ wg_peer_get(keypair->entry.peer);
55767 ++ PACKET_CB(packets.next)->keypair = keypair;
55768 ++ wg_packet_create_data(packets.next);
55769 ++ return;
55770 ++
55771 ++out_invalid:
55772 ++ WRITE_ONCE(key->is_valid, false);
55773 ++out_nokey:
55774 ++ wg_noise_keypair_put(keypair, false);
55775 ++
55776 ++ /* We orphan the packets if we're waiting on a handshake, so that they
55777 ++ * don't block a socket's pool.
55778 ++ */
55779 ++ skb_queue_walk(&packets, skb)
55780 ++ skb_orphan(skb);
55781 ++ /* Then we put them back on the top of the queue. We're not too
55782 ++ * concerned about accidentally getting things a little out of order if
55783 ++ * packets are being added really fast, because this queue is for before
55784 ++ * packets can even be sent and it's small anyway.
55785 ++ */
55786 ++ spin_lock_bh(&peer->staged_packet_queue.lock);
55787 ++ skb_queue_splice(&packets, &peer->staged_packet_queue);
55788 ++ spin_unlock_bh(&peer->staged_packet_queue.lock);
55789 ++
55790 ++ /* If we're exiting because there's something wrong with the key, it
55791 ++ * means we should initiate a new handshake.
55792 ++ */
55793 ++ wg_packet_send_queued_handshake_initiation(peer, false);
55794 ++}
55795 +diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c
55796 +new file mode 100644
55797 +index 000000000000..c46256d0d81c
55798 +--- /dev/null
55799 ++++ b/drivers/net/wireguard/socket.c
55800 +@@ -0,0 +1,437 @@
55801 ++// SPDX-License-Identifier: GPL-2.0
55802 ++/*
55803 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
55804 ++ */
55805 ++
55806 ++#include "device.h"
55807 ++#include "peer.h"
55808 ++#include "socket.h"
55809 ++#include "queueing.h"
55810 ++#include "messages.h"
55811 ++
55812 ++#include <linux/ctype.h>
55813 ++#include <linux/net.h>
55814 ++#include <linux/if_vlan.h>
55815 ++#include <linux/if_ether.h>
55816 ++#include <linux/inetdevice.h>
55817 ++#include <net/udp_tunnel.h>
55818 ++#include <net/ipv6.h>
55819 ++
55820 ++static int send4(struct wg_device *wg, struct sk_buff *skb,
55821 ++ struct endpoint *endpoint, u8 ds, struct dst_cache *cache)
55822 ++{
55823 ++ struct flowi4 fl = {
55824 ++ .saddr = endpoint->src4.s_addr,
55825 ++ .daddr = endpoint->addr4.sin_addr.s_addr,
55826 ++ .fl4_dport = endpoint->addr4.sin_port,
55827 ++ .flowi4_mark = wg->fwmark,
55828 ++ .flowi4_proto = IPPROTO_UDP
55829 ++ };
55830 ++ struct rtable *rt = NULL;
55831 ++ struct sock *sock;
55832 ++ int ret = 0;
55833 ++
55834 ++ skb_mark_not_on_list(skb);
55835 ++ skb->dev = wg->dev;
55836 ++ skb->mark = wg->fwmark;
55837 ++
55838 ++ rcu_read_lock_bh();
55839 ++ sock = rcu_dereference_bh(wg->sock4);
55840 ++
55841 ++ if (unlikely(!sock)) {
55842 ++ ret = -ENONET;
55843 ++ goto err;
55844 ++ }
55845 ++
55846 ++ fl.fl4_sport = inet_sk(sock)->inet_sport;
55847 ++
55848 ++ if (cache)
55849 ++ rt = dst_cache_get_ip4(cache, &fl.saddr);
55850 ++
55851 ++ if (!rt) {
55852 ++ security_sk_classify_flow(sock, flowi4_to_flowi(&fl));
55853 ++ if (unlikely(!inet_confirm_addr(sock_net(sock), NULL, 0,
55854 ++ fl.saddr, RT_SCOPE_HOST))) {
55855 ++ endpoint->src4.s_addr = 0;
55856 ++ *(__force __be32 *)&endpoint->src_if4 = 0;
55857 ++ fl.saddr = 0;
55858 ++ if (cache)
55859 ++ dst_cache_reset(cache);
55860 ++ }
55861 ++ rt = ip_route_output_flow(sock_net(sock), &fl, sock);
55862 ++ if (unlikely(endpoint->src_if4 && ((IS_ERR(rt) &&
55863 ++ PTR_ERR(rt) == -EINVAL) || (!IS_ERR(rt) &&
55864 ++ rt->dst.dev->ifindex != endpoint->src_if4)))) {
55865 ++ endpoint->src4.s_addr = 0;
55866 ++ *(__force __be32 *)&endpoint->src_if4 = 0;
55867 ++ fl.saddr = 0;
55868 ++ if (cache)
55869 ++ dst_cache_reset(cache);
55870 ++ if (!IS_ERR(rt))
55871 ++ ip_rt_put(rt);
55872 ++ rt = ip_route_output_flow(sock_net(sock), &fl, sock);
55873 ++ }
55874 ++ if (unlikely(IS_ERR(rt))) {
55875 ++ ret = PTR_ERR(rt);
55876 ++ net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n",
55877 ++ wg->dev->name, &endpoint->addr, ret);
55878 ++ goto err;
55879 ++ } else if (unlikely(rt->dst.dev == skb->dev)) {
55880 ++ ip_rt_put(rt);
55881 ++ ret = -ELOOP;
55882 ++ net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n",
55883 ++ wg->dev->name, &endpoint->addr);
55884 ++ goto err;
55885 ++ }
55886 ++ if (cache)
55887 ++ dst_cache_set_ip4(cache, &rt->dst, fl.saddr);
55888 ++ }
55889 ++
55890 ++ skb->ignore_df = 1;
55891 ++ udp_tunnel_xmit_skb(rt, sock, skb, fl.saddr, fl.daddr, ds,
55892 ++ ip4_dst_hoplimit(&rt->dst), 0, fl.fl4_sport,
55893 ++ fl.fl4_dport, false, false);
55894 ++ goto out;
55895 ++
55896 ++err:
55897 ++ kfree_skb(skb);
55898 ++out:
55899 ++ rcu_read_unlock_bh();
55900 ++ return ret;
55901 ++}
55902 ++
55903 ++static int send6(struct wg_device *wg, struct sk_buff *skb,
55904 ++ struct endpoint *endpoint, u8 ds, struct dst_cache *cache)
55905 ++{
55906 ++#if IS_ENABLED(CONFIG_IPV6)
55907 ++ struct flowi6 fl = {
55908 ++ .saddr = endpoint->src6,
55909 ++ .daddr = endpoint->addr6.sin6_addr,
55910 ++ .fl6_dport = endpoint->addr6.sin6_port,
55911 ++ .flowi6_mark = wg->fwmark,
55912 ++ .flowi6_oif = endpoint->addr6.sin6_scope_id,
55913 ++ .flowi6_proto = IPPROTO_UDP
55914 ++ /* TODO: addr->sin6_flowinfo */
55915 ++ };
55916 ++ struct dst_entry *dst = NULL;
55917 ++ struct sock *sock;
55918 ++ int ret = 0;
55919 ++
55920 ++ skb_mark_not_on_list(skb);
55921 ++ skb->dev = wg->dev;
55922 ++ skb->mark = wg->fwmark;
55923 ++
55924 ++ rcu_read_lock_bh();
55925 ++ sock = rcu_dereference_bh(wg->sock6);
55926 ++
55927 ++ if (unlikely(!sock)) {
55928 ++ ret = -ENONET;
55929 ++ goto err;
55930 ++ }
55931 ++
55932 ++ fl.fl6_sport = inet_sk(sock)->inet_sport;
55933 ++
55934 ++ if (cache)
55935 ++ dst = dst_cache_get_ip6(cache, &fl.saddr);
55936 ++
55937 ++ if (!dst) {
55938 ++ security_sk_classify_flow(sock, flowi6_to_flowi(&fl));
55939 ++ if (unlikely(!ipv6_addr_any(&fl.saddr) &&
55940 ++ !ipv6_chk_addr(sock_net(sock), &fl.saddr, NULL, 0))) {
55941 ++ endpoint->src6 = fl.saddr = in6addr_any;
55942 ++ if (cache)
55943 ++ dst_cache_reset(cache);
55944 ++ }
55945 ++ dst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(sock), sock, &fl,
55946 ++ NULL);
55947 ++ if (unlikely(IS_ERR(dst))) {
55948 ++ ret = PTR_ERR(dst);
55949 ++ net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n",
55950 ++ wg->dev->name, &endpoint->addr, ret);
55951 ++ goto err;
55952 ++ } else if (unlikely(dst->dev == skb->dev)) {
55953 ++ dst_release(dst);
55954 ++ ret = -ELOOP;
55955 ++ net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n",
55956 ++ wg->dev->name, &endpoint->addr);
55957 ++ goto err;
55958 ++ }
55959 ++ if (cache)
55960 ++ dst_cache_set_ip6(cache, dst, &fl.saddr);
55961 ++ }
55962 ++
55963 ++ skb->ignore_df = 1;
55964 ++ udp_tunnel6_xmit_skb(dst, sock, skb, skb->dev, &fl.saddr, &fl.daddr, ds,
55965 ++ ip6_dst_hoplimit(dst), 0, fl.fl6_sport,
55966 ++ fl.fl6_dport, false);
55967 ++ goto out;
55968 ++
55969 ++err:
55970 ++ kfree_skb(skb);
55971 ++out:
55972 ++ rcu_read_unlock_bh();
55973 ++ return ret;
55974 ++#else
55975 ++ return -EAFNOSUPPORT;
55976 ++#endif
55977 ++}
55978 ++
55979 ++int wg_socket_send_skb_to_peer(struct wg_peer *peer, struct sk_buff *skb, u8 ds)
55980 ++{
55981 ++ size_t skb_len = skb->len;
55982 ++ int ret = -EAFNOSUPPORT;
55983 ++
55984 ++ read_lock_bh(&peer->endpoint_lock);
55985 ++ if (peer->endpoint.addr.sa_family == AF_INET)
55986 ++ ret = send4(peer->device, skb, &peer->endpoint, ds,
55987 ++ &peer->endpoint_cache);
55988 ++ else if (peer->endpoint.addr.sa_family == AF_INET6)
55989 ++ ret = send6(peer->device, skb, &peer->endpoint, ds,
55990 ++ &peer->endpoint_cache);
55991 ++ else
55992 ++ dev_kfree_skb(skb);
55993 ++ if (likely(!ret))
55994 ++ peer->tx_bytes += skb_len;
55995 ++ read_unlock_bh(&peer->endpoint_lock);
55996 ++
55997 ++ return ret;
55998 ++}
55999 ++
56000 ++int wg_socket_send_buffer_to_peer(struct wg_peer *peer, void *buffer,
56001 ++ size_t len, u8 ds)
56002 ++{
56003 ++ struct sk_buff *skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC);
56004 ++
56005 ++ if (unlikely(!skb))
56006 ++ return -ENOMEM;
56007 ++
56008 ++ skb_reserve(skb, SKB_HEADER_LEN);
56009 ++ skb_set_inner_network_header(skb, 0);
56010 ++ skb_put_data(skb, buffer, len);
56011 ++ return wg_socket_send_skb_to_peer(peer, skb, ds);
56012 ++}
56013 ++
56014 ++int wg_socket_send_buffer_as_reply_to_skb(struct wg_device *wg,
56015 ++ struct sk_buff *in_skb, void *buffer,
56016 ++ size_t len)
56017 ++{
56018 ++ int ret = 0;
56019 ++ struct sk_buff *skb;
56020 ++ struct endpoint endpoint;
56021 ++
56022 ++ if (unlikely(!in_skb))
56023 ++ return -EINVAL;
56024 ++ ret = wg_socket_endpoint_from_skb(&endpoint, in_skb);
56025 ++ if (unlikely(ret < 0))
56026 ++ return ret;
56027 ++
56028 ++ skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC);
56029 ++ if (unlikely(!skb))
56030 ++ return -ENOMEM;
56031 ++ skb_reserve(skb, SKB_HEADER_LEN);
56032 ++ skb_set_inner_network_header(skb, 0);
56033 ++ skb_put_data(skb, buffer, len);
56034 ++
56035 ++ if (endpoint.addr.sa_family == AF_INET)
56036 ++ ret = send4(wg, skb, &endpoint, 0, NULL);
56037 ++ else if (endpoint.addr.sa_family == AF_INET6)
56038 ++ ret = send6(wg, skb, &endpoint, 0, NULL);
56039 ++ /* No other possibilities if the endpoint is valid, which it is,
56040 ++ * as we checked above.
56041 ++ */
56042 ++
56043 ++ return ret;
56044 ++}
56045 ++
56046 ++int wg_socket_endpoint_from_skb(struct endpoint *endpoint,
56047 ++ const struct sk_buff *skb)
56048 ++{
56049 ++ memset(endpoint, 0, sizeof(*endpoint));
56050 ++ if (skb->protocol == htons(ETH_P_IP)) {
56051 ++ endpoint->addr4.sin_family = AF_INET;
56052 ++ endpoint->addr4.sin_port = udp_hdr(skb)->source;
56053 ++ endpoint->addr4.sin_addr.s_addr = ip_hdr(skb)->saddr;
56054 ++ endpoint->src4.s_addr = ip_hdr(skb)->daddr;
56055 ++ endpoint->src_if4 = skb->skb_iif;
56056 ++ } else if (skb->protocol == htons(ETH_P_IPV6)) {
56057 ++ endpoint->addr6.sin6_family = AF_INET6;
56058 ++ endpoint->addr6.sin6_port = udp_hdr(skb)->source;
56059 ++ endpoint->addr6.sin6_addr = ipv6_hdr(skb)->saddr;
56060 ++ endpoint->addr6.sin6_scope_id = ipv6_iface_scope_id(
56061 ++ &ipv6_hdr(skb)->saddr, skb->skb_iif);
56062 ++ endpoint->src6 = ipv6_hdr(skb)->daddr;
56063 ++ } else {
56064 ++ return -EINVAL;
56065 ++ }
56066 ++ return 0;
56067 ++}
56068 ++
56069 ++static bool endpoint_eq(const struct endpoint *a, const struct endpoint *b)
56070 ++{
56071 ++ return (a->addr.sa_family == AF_INET && b->addr.sa_family == AF_INET &&
56072 ++ a->addr4.sin_port == b->addr4.sin_port &&
56073 ++ a->addr4.sin_addr.s_addr == b->addr4.sin_addr.s_addr &&
56074 ++ a->src4.s_addr == b->src4.s_addr && a->src_if4 == b->src_if4) ||
56075 ++ (a->addr.sa_family == AF_INET6 &&
56076 ++ b->addr.sa_family == AF_INET6 &&
56077 ++ a->addr6.sin6_port == b->addr6.sin6_port &&
56078 ++ ipv6_addr_equal(&a->addr6.sin6_addr, &b->addr6.sin6_addr) &&
56079 ++ a->addr6.sin6_scope_id == b->addr6.sin6_scope_id &&
56080 ++ ipv6_addr_equal(&a->src6, &b->src6)) ||
56081 ++ unlikely(!a->addr.sa_family && !b->addr.sa_family);
56082 ++}
56083 ++
56084 ++void wg_socket_set_peer_endpoint(struct wg_peer *peer,
56085 ++ const struct endpoint *endpoint)
56086 ++{
56087 ++ /* First we check unlocked, in order to optimize, since it's pretty rare
56088 ++ * that an endpoint will change. If we happen to be mid-write, and two
56089 ++ * CPUs wind up writing the same thing or something slightly different,
56090 ++ * it doesn't really matter much either.
56091 ++ */
56092 ++ if (endpoint_eq(endpoint, &peer->endpoint))
56093 ++ return;
56094 ++ write_lock_bh(&peer->endpoint_lock);
56095 ++ if (endpoint->addr.sa_family == AF_INET) {
56096 ++ peer->endpoint.addr4 = endpoint->addr4;
56097 ++ peer->endpoint.src4 = endpoint->src4;
56098 ++ peer->endpoint.src_if4 = endpoint->src_if4;
56099 ++ } else if (endpoint->addr.sa_family == AF_INET6) {
56100 ++ peer->endpoint.addr6 = endpoint->addr6;
56101 ++ peer->endpoint.src6 = endpoint->src6;
56102 ++ } else {
56103 ++ goto out;
56104 ++ }
56105 ++ dst_cache_reset(&peer->endpoint_cache);
56106 ++out:
56107 ++ write_unlock_bh(&peer->endpoint_lock);
56108 ++}
56109 ++
56110 ++void wg_socket_set_peer_endpoint_from_skb(struct wg_peer *peer,
56111 ++ const struct sk_buff *skb)
56112 ++{
56113 ++ struct endpoint endpoint;
56114 ++
56115 ++ if (!wg_socket_endpoint_from_skb(&endpoint, skb))
56116 ++ wg_socket_set_peer_endpoint(peer, &endpoint);
56117 ++}
56118 ++
56119 ++void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer)
56120 ++{
56121 ++ write_lock_bh(&peer->endpoint_lock);
56122 ++ memset(&peer->endpoint.src6, 0, sizeof(peer->endpoint.src6));
56123 ++ dst_cache_reset(&peer->endpoint_cache);
56124 ++ write_unlock_bh(&peer->endpoint_lock);
56125 ++}
56126 ++
56127 ++static int wg_receive(struct sock *sk, struct sk_buff *skb)
56128 ++{
56129 ++ struct wg_device *wg;
56130 ++
56131 ++ if (unlikely(!sk))
56132 ++ goto err;
56133 ++ wg = sk->sk_user_data;
56134 ++ if (unlikely(!wg))
56135 ++ goto err;
56136 ++ wg_packet_receive(wg, skb);
56137 ++ return 0;
56138 ++
56139 ++err:
56140 ++ kfree_skb(skb);
56141 ++ return 0;
56142 ++}
56143 ++
56144 ++static void sock_free(struct sock *sock)
56145 ++{
56146 ++ if (unlikely(!sock))
56147 ++ return;
56148 ++ sk_clear_memalloc(sock);
56149 ++ udp_tunnel_sock_release(sock->sk_socket);
56150 ++}
56151 ++
56152 ++static void set_sock_opts(struct socket *sock)
56153 ++{
56154 ++ sock->sk->sk_allocation = GFP_ATOMIC;
56155 ++ sock->sk->sk_sndbuf = INT_MAX;
56156 ++ sk_set_memalloc(sock->sk);
56157 ++}
56158 ++
56159 ++int wg_socket_init(struct wg_device *wg, u16 port)
56160 ++{
56161 ++ int ret;
56162 ++ struct udp_tunnel_sock_cfg cfg = {
56163 ++ .sk_user_data = wg,
56164 ++ .encap_type = 1,
56165 ++ .encap_rcv = wg_receive
56166 ++ };
56167 ++ struct socket *new4 = NULL, *new6 = NULL;
56168 ++ struct udp_port_cfg port4 = {
56169 ++ .family = AF_INET,
56170 ++ .local_ip.s_addr = htonl(INADDR_ANY),
56171 ++ .local_udp_port = htons(port),
56172 ++ .use_udp_checksums = true
56173 ++ };
56174 ++#if IS_ENABLED(CONFIG_IPV6)
56175 ++ int retries = 0;
56176 ++ struct udp_port_cfg port6 = {
56177 ++ .family = AF_INET6,
56178 ++ .local_ip6 = IN6ADDR_ANY_INIT,
56179 ++ .use_udp6_tx_checksums = true,
56180 ++ .use_udp6_rx_checksums = true,
56181 ++ .ipv6_v6only = true
56182 ++ };
56183 ++#endif
56184 ++
56185 ++#if IS_ENABLED(CONFIG_IPV6)
56186 ++retry:
56187 ++#endif
56188 ++
56189 ++ ret = udp_sock_create(wg->creating_net, &port4, &new4);
56190 ++ if (ret < 0) {
56191 ++ pr_err("%s: Could not create IPv4 socket\n", wg->dev->name);
56192 ++ return ret;
56193 ++ }
56194 ++ set_sock_opts(new4);
56195 ++ setup_udp_tunnel_sock(wg->creating_net, new4, &cfg);
56196 ++
56197 ++#if IS_ENABLED(CONFIG_IPV6)
56198 ++ if (ipv6_mod_enabled()) {
56199 ++ port6.local_udp_port = inet_sk(new4->sk)->inet_sport;
56200 ++ ret = udp_sock_create(wg->creating_net, &port6, &new6);
56201 ++ if (ret < 0) {
56202 ++ udp_tunnel_sock_release(new4);
56203 ++ if (ret == -EADDRINUSE && !port && retries++ < 100)
56204 ++ goto retry;
56205 ++ pr_err("%s: Could not create IPv6 socket\n",
56206 ++ wg->dev->name);
56207 ++ return ret;
56208 ++ }
56209 ++ set_sock_opts(new6);
56210 ++ setup_udp_tunnel_sock(wg->creating_net, new6, &cfg);
56211 ++ }
56212 ++#endif
56213 ++
56214 ++ wg_socket_reinit(wg, new4->sk, new6 ? new6->sk : NULL);
56215 ++ return 0;
56216 ++}
56217 ++
56218 ++void wg_socket_reinit(struct wg_device *wg, struct sock *new4,
56219 ++ struct sock *new6)
56220 ++{
56221 ++ struct sock *old4, *old6;
56222 ++
56223 ++ mutex_lock(&wg->socket_update_lock);
56224 ++ old4 = rcu_dereference_protected(wg->sock4,
56225 ++ lockdep_is_held(&wg->socket_update_lock));
56226 ++ old6 = rcu_dereference_protected(wg->sock6,
56227 ++ lockdep_is_held(&wg->socket_update_lock));
56228 ++ rcu_assign_pointer(wg->sock4, new4);
56229 ++ rcu_assign_pointer(wg->sock6, new6);
56230 ++ if (new4)
56231 ++ wg->incoming_port = ntohs(inet_sk(new4)->inet_sport);
56232 ++ mutex_unlock(&wg->socket_update_lock);
56233 ++ synchronize_rcu();
56234 ++ synchronize_net();
56235 ++ sock_free(old4);
56236 ++ sock_free(old6);
56237 ++}
56238 +diff --git a/drivers/net/wireguard/socket.h b/drivers/net/wireguard/socket.h
56239 +new file mode 100644
56240 +index 000000000000..bab5848efbcd
56241 +--- /dev/null
56242 ++++ b/drivers/net/wireguard/socket.h
56243 +@@ -0,0 +1,44 @@
56244 ++/* SPDX-License-Identifier: GPL-2.0 */
56245 ++/*
56246 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
56247 ++ */
56248 ++
56249 ++#ifndef _WG_SOCKET_H
56250 ++#define _WG_SOCKET_H
56251 ++
56252 ++#include <linux/netdevice.h>
56253 ++#include <linux/udp.h>
56254 ++#include <linux/if_vlan.h>
56255 ++#include <linux/if_ether.h>
56256 ++
56257 ++int wg_socket_init(struct wg_device *wg, u16 port);
56258 ++void wg_socket_reinit(struct wg_device *wg, struct sock *new4,
56259 ++ struct sock *new6);
56260 ++int wg_socket_send_buffer_to_peer(struct wg_peer *peer, void *data,
56261 ++ size_t len, u8 ds);
56262 ++int wg_socket_send_skb_to_peer(struct wg_peer *peer, struct sk_buff *skb,
56263 ++ u8 ds);
56264 ++int wg_socket_send_buffer_as_reply_to_skb(struct wg_device *wg,
56265 ++ struct sk_buff *in_skb,
56266 ++ void *out_buffer, size_t len);
56267 ++
56268 ++int wg_socket_endpoint_from_skb(struct endpoint *endpoint,
56269 ++ const struct sk_buff *skb);
56270 ++void wg_socket_set_peer_endpoint(struct wg_peer *peer,
56271 ++ const struct endpoint *endpoint);
56272 ++void wg_socket_set_peer_endpoint_from_skb(struct wg_peer *peer,
56273 ++ const struct sk_buff *skb);
56274 ++void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer);
56275 ++
56276 ++#if defined(CONFIG_DYNAMIC_DEBUG) || defined(DEBUG)
56277 ++#define net_dbg_skb_ratelimited(fmt, dev, skb, ...) do { \
56278 ++ struct endpoint __endpoint; \
56279 ++ wg_socket_endpoint_from_skb(&__endpoint, skb); \
56280 ++ net_dbg_ratelimited(fmt, dev, &__endpoint.addr, \
56281 ++ ##__VA_ARGS__); \
56282 ++ } while (0)
56283 ++#else
56284 ++#define net_dbg_skb_ratelimited(fmt, skb, ...)
56285 ++#endif
56286 ++
56287 ++#endif /* _WG_SOCKET_H */
56288 +diff --git a/drivers/net/wireguard/timers.c b/drivers/net/wireguard/timers.c
56289 +new file mode 100644
56290 +index 000000000000..d54d32ac9bc4
56291 +--- /dev/null
56292 ++++ b/drivers/net/wireguard/timers.c
56293 +@@ -0,0 +1,243 @@
56294 ++// SPDX-License-Identifier: GPL-2.0
56295 ++/*
56296 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
56297 ++ */
56298 ++
56299 ++#include "timers.h"
56300 ++#include "device.h"
56301 ++#include "peer.h"
56302 ++#include "queueing.h"
56303 ++#include "socket.h"
56304 ++
56305 ++/*
56306 ++ * - Timer for retransmitting the handshake if we don't hear back after
56307 ++ * `REKEY_TIMEOUT + jitter` ms.
56308 ++ *
56309 ++ * - Timer for sending empty packet if we have received a packet but after have
56310 ++ * not sent one for `KEEPALIVE_TIMEOUT` ms.
56311 ++ *
56312 ++ * - Timer for initiating new handshake if we have sent a packet but after have
56313 ++ * not received one (even empty) for `(KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) +
56314 ++ * jitter` ms.
56315 ++ *
56316 ++ * - Timer for zeroing out all ephemeral keys after `(REJECT_AFTER_TIME * 3)` ms
56317 ++ * if no new keys have been received.
56318 ++ *
56319 ++ * - Timer for, if enabled, sending an empty authenticated packet every user-
56320 ++ * specified seconds.
56321 ++ */
56322 ++
56323 ++static inline void mod_peer_timer(struct wg_peer *peer,
56324 ++ struct timer_list *timer,
56325 ++ unsigned long expires)
56326 ++{
56327 ++ rcu_read_lock_bh();
56328 ++ if (likely(netif_running(peer->device->dev) &&
56329 ++ !READ_ONCE(peer->is_dead)))
56330 ++ mod_timer(timer, expires);
56331 ++ rcu_read_unlock_bh();
56332 ++}
56333 ++
56334 ++static void wg_expired_retransmit_handshake(struct timer_list *timer)
56335 ++{
56336 ++ struct wg_peer *peer = from_timer(peer, timer,
56337 ++ timer_retransmit_handshake);
56338 ++
56339 ++ if (peer->timer_handshake_attempts > MAX_TIMER_HANDSHAKES) {
56340 ++ pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d attempts, giving up\n",
56341 ++ peer->device->dev->name, peer->internal_id,
56342 ++ &peer->endpoint.addr, MAX_TIMER_HANDSHAKES + 2);
56343 ++
56344 ++ del_timer(&peer->timer_send_keepalive);
56345 ++ /* We drop all packets without a keypair and don't try again,
56346 ++ * if we try unsuccessfully for too long to make a handshake.
56347 ++ */
56348 ++ wg_packet_purge_staged_packets(peer);
56349 ++
56350 ++ /* We set a timer for destroying any residue that might be left
56351 ++ * of a partial exchange.
56352 ++ */
56353 ++ if (!timer_pending(&peer->timer_zero_key_material))
56354 ++ mod_peer_timer(peer, &peer->timer_zero_key_material,
56355 ++ jiffies + REJECT_AFTER_TIME * 3 * HZ);
56356 ++ } else {
56357 ++ ++peer->timer_handshake_attempts;
56358 ++ pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d seconds, retrying (try %d)\n",
56359 ++ peer->device->dev->name, peer->internal_id,
56360 ++ &peer->endpoint.addr, REKEY_TIMEOUT,
56361 ++ peer->timer_handshake_attempts + 1);
56362 ++
56363 ++ /* We clear the endpoint address src address, in case this is
56364 ++ * the cause of trouble.
56365 ++ */
56366 ++ wg_socket_clear_peer_endpoint_src(peer);
56367 ++
56368 ++ wg_packet_send_queued_handshake_initiation(peer, true);
56369 ++ }
56370 ++}
56371 ++
56372 ++static void wg_expired_send_keepalive(struct timer_list *timer)
56373 ++{
56374 ++ struct wg_peer *peer = from_timer(peer, timer, timer_send_keepalive);
56375 ++
56376 ++ wg_packet_send_keepalive(peer);
56377 ++ if (peer->timer_need_another_keepalive) {
56378 ++ peer->timer_need_another_keepalive = false;
56379 ++ mod_peer_timer(peer, &peer->timer_send_keepalive,
56380 ++ jiffies + KEEPALIVE_TIMEOUT * HZ);
56381 ++ }
56382 ++}
56383 ++
56384 ++static void wg_expired_new_handshake(struct timer_list *timer)
56385 ++{
56386 ++ struct wg_peer *peer = from_timer(peer, timer, timer_new_handshake);
56387 ++
56388 ++ pr_debug("%s: Retrying handshake with peer %llu (%pISpfsc) because we stopped hearing back after %d seconds\n",
56389 ++ peer->device->dev->name, peer->internal_id,
56390 ++ &peer->endpoint.addr, KEEPALIVE_TIMEOUT + REKEY_TIMEOUT);
56391 ++ /* We clear the endpoint address src address, in case this is the cause
56392 ++ * of trouble.
56393 ++ */
56394 ++ wg_socket_clear_peer_endpoint_src(peer);
56395 ++ wg_packet_send_queued_handshake_initiation(peer, false);
56396 ++}
56397 ++
56398 ++static void wg_expired_zero_key_material(struct timer_list *timer)
56399 ++{
56400 ++ struct wg_peer *peer = from_timer(peer, timer, timer_zero_key_material);
56401 ++
56402 ++ rcu_read_lock_bh();
56403 ++ if (!READ_ONCE(peer->is_dead)) {
56404 ++ wg_peer_get(peer);
56405 ++ if (!queue_work(peer->device->handshake_send_wq,
56406 ++ &peer->clear_peer_work))
56407 ++ /* If the work was already on the queue, we want to drop
56408 ++ * the extra reference.
56409 ++ */
56410 ++ wg_peer_put(peer);
56411 ++ }
56412 ++ rcu_read_unlock_bh();
56413 ++}
56414 ++
56415 ++static void wg_queued_expired_zero_key_material(struct work_struct *work)
56416 ++{
56417 ++ struct wg_peer *peer = container_of(work, struct wg_peer,
56418 ++ clear_peer_work);
56419 ++
56420 ++ pr_debug("%s: Zeroing out all keys for peer %llu (%pISpfsc), since we haven't received a new one in %d seconds\n",
56421 ++ peer->device->dev->name, peer->internal_id,
56422 ++ &peer->endpoint.addr, REJECT_AFTER_TIME * 3);
56423 ++ wg_noise_handshake_clear(&peer->handshake);
56424 ++ wg_noise_keypairs_clear(&peer->keypairs);
56425 ++ wg_peer_put(peer);
56426 ++}
56427 ++
56428 ++static void wg_expired_send_persistent_keepalive(struct timer_list *timer)
56429 ++{
56430 ++ struct wg_peer *peer = from_timer(peer, timer,
56431 ++ timer_persistent_keepalive);
56432 ++
56433 ++ if (likely(peer->persistent_keepalive_interval))
56434 ++ wg_packet_send_keepalive(peer);
56435 ++}
56436 ++
56437 ++/* Should be called after an authenticated data packet is sent. */
56438 ++void wg_timers_data_sent(struct wg_peer *peer)
56439 ++{
56440 ++ if (!timer_pending(&peer->timer_new_handshake))
56441 ++ mod_peer_timer(peer, &peer->timer_new_handshake,
56442 ++ jiffies + (KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) * HZ +
56443 ++ prandom_u32_max(REKEY_TIMEOUT_JITTER_MAX_JIFFIES));
56444 ++}
56445 ++
56446 ++/* Should be called after an authenticated data packet is received. */
56447 ++void wg_timers_data_received(struct wg_peer *peer)
56448 ++{
56449 ++ if (likely(netif_running(peer->device->dev))) {
56450 ++ if (!timer_pending(&peer->timer_send_keepalive))
56451 ++ mod_peer_timer(peer, &peer->timer_send_keepalive,
56452 ++ jiffies + KEEPALIVE_TIMEOUT * HZ);
56453 ++ else
56454 ++ peer->timer_need_another_keepalive = true;
56455 ++ }
56456 ++}
56457 ++
56458 ++/* Should be called after any type of authenticated packet is sent, whether
56459 ++ * keepalive, data, or handshake.
56460 ++ */
56461 ++void wg_timers_any_authenticated_packet_sent(struct wg_peer *peer)
56462 ++{
56463 ++ del_timer(&peer->timer_send_keepalive);
56464 ++}
56465 ++
56466 ++/* Should be called after any type of authenticated packet is received, whether
56467 ++ * keepalive, data, or handshake.
56468 ++ */
56469 ++void wg_timers_any_authenticated_packet_received(struct wg_peer *peer)
56470 ++{
56471 ++ del_timer(&peer->timer_new_handshake);
56472 ++}
56473 ++
56474 ++/* Should be called after a handshake initiation message is sent. */
56475 ++void wg_timers_handshake_initiated(struct wg_peer *peer)
56476 ++{
56477 ++ mod_peer_timer(peer, &peer->timer_retransmit_handshake,
56478 ++ jiffies + REKEY_TIMEOUT * HZ +
56479 ++ prandom_u32_max(REKEY_TIMEOUT_JITTER_MAX_JIFFIES));
56480 ++}
56481 ++
56482 ++/* Should be called after a handshake response message is received and processed
56483 ++ * or when getting key confirmation via the first data message.
56484 ++ */
56485 ++void wg_timers_handshake_complete(struct wg_peer *peer)
56486 ++{
56487 ++ del_timer(&peer->timer_retransmit_handshake);
56488 ++ peer->timer_handshake_attempts = 0;
56489 ++ peer->sent_lastminute_handshake = false;
56490 ++ ktime_get_real_ts64(&peer->walltime_last_handshake);
56491 ++}
56492 ++
56493 ++/* Should be called after an ephemeral key is created, which is before sending a
56494 ++ * handshake response or after receiving a handshake response.
56495 ++ */
56496 ++void wg_timers_session_derived(struct wg_peer *peer)
56497 ++{
56498 ++ mod_peer_timer(peer, &peer->timer_zero_key_material,
56499 ++ jiffies + REJECT_AFTER_TIME * 3 * HZ);
56500 ++}
56501 ++
56502 ++/* Should be called before a packet with authentication, whether
56503 ++ * keepalive, data, or handshakem is sent, or after one is received.
56504 ++ */
56505 ++void wg_timers_any_authenticated_packet_traversal(struct wg_peer *peer)
56506 ++{
56507 ++ if (peer->persistent_keepalive_interval)
56508 ++ mod_peer_timer(peer, &peer->timer_persistent_keepalive,
56509 ++ jiffies + peer->persistent_keepalive_interval * HZ);
56510 ++}
56511 ++
56512 ++void wg_timers_init(struct wg_peer *peer)
56513 ++{
56514 ++ timer_setup(&peer->timer_retransmit_handshake,
56515 ++ wg_expired_retransmit_handshake, 0);
56516 ++ timer_setup(&peer->timer_send_keepalive, wg_expired_send_keepalive, 0);
56517 ++ timer_setup(&peer->timer_new_handshake, wg_expired_new_handshake, 0);
56518 ++ timer_setup(&peer->timer_zero_key_material,
56519 ++ wg_expired_zero_key_material, 0);
56520 ++ timer_setup(&peer->timer_persistent_keepalive,
56521 ++ wg_expired_send_persistent_keepalive, 0);
56522 ++ INIT_WORK(&peer->clear_peer_work, wg_queued_expired_zero_key_material);
56523 ++ peer->timer_handshake_attempts = 0;
56524 ++ peer->sent_lastminute_handshake = false;
56525 ++ peer->timer_need_another_keepalive = false;
56526 ++}
56527 ++
56528 ++void wg_timers_stop(struct wg_peer *peer)
56529 ++{
56530 ++ del_timer_sync(&peer->timer_retransmit_handshake);
56531 ++ del_timer_sync(&peer->timer_send_keepalive);
56532 ++ del_timer_sync(&peer->timer_new_handshake);
56533 ++ del_timer_sync(&peer->timer_zero_key_material);
56534 ++ del_timer_sync(&peer->timer_persistent_keepalive);
56535 ++ flush_work(&peer->clear_peer_work);
56536 ++}
56537 +diff --git a/drivers/net/wireguard/timers.h b/drivers/net/wireguard/timers.h
56538 +new file mode 100644
56539 +index 000000000000..f0653dcb1326
56540 +--- /dev/null
56541 ++++ b/drivers/net/wireguard/timers.h
56542 +@@ -0,0 +1,31 @@
56543 ++/* SPDX-License-Identifier: GPL-2.0 */
56544 ++/*
56545 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
56546 ++ */
56547 ++
56548 ++#ifndef _WG_TIMERS_H
56549 ++#define _WG_TIMERS_H
56550 ++
56551 ++#include <linux/ktime.h>
56552 ++
56553 ++struct wg_peer;
56554 ++
56555 ++void wg_timers_init(struct wg_peer *peer);
56556 ++void wg_timers_stop(struct wg_peer *peer);
56557 ++void wg_timers_data_sent(struct wg_peer *peer);
56558 ++void wg_timers_data_received(struct wg_peer *peer);
56559 ++void wg_timers_any_authenticated_packet_sent(struct wg_peer *peer);
56560 ++void wg_timers_any_authenticated_packet_received(struct wg_peer *peer);
56561 ++void wg_timers_handshake_initiated(struct wg_peer *peer);
56562 ++void wg_timers_handshake_complete(struct wg_peer *peer);
56563 ++void wg_timers_session_derived(struct wg_peer *peer);
56564 ++void wg_timers_any_authenticated_packet_traversal(struct wg_peer *peer);
56565 ++
56566 ++static inline bool wg_birthdate_has_expired(u64 birthday_nanoseconds,
56567 ++ u64 expiration_seconds)
56568 ++{
56569 ++ return (s64)(birthday_nanoseconds + expiration_seconds * NSEC_PER_SEC)
56570 ++ <= (s64)ktime_get_coarse_boottime_ns();
56571 ++}
56572 ++
56573 ++#endif /* _WG_TIMERS_H */
56574 +diff --git a/drivers/net/wireguard/version.h b/drivers/net/wireguard/version.h
56575 +new file mode 100644
56576 +index 000000000000..a1a269a11634
56577 +--- /dev/null
56578 ++++ b/drivers/net/wireguard/version.h
56579 +@@ -0,0 +1 @@
56580 ++#define WIREGUARD_VERSION "1.0.0"
56581 +diff --git a/include/uapi/linux/wireguard.h b/include/uapi/linux/wireguard.h
56582 +new file mode 100644
56583 +index 000000000000..dd8a47c4ad11
56584 +--- /dev/null
56585 ++++ b/include/uapi/linux/wireguard.h
56586 +@@ -0,0 +1,196 @@
56587 ++/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
56588 ++/*
56589 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
56590 ++ *
56591 ++ * Documentation
56592 ++ * =============
56593 ++ *
56594 ++ * The below enums and macros are for interfacing with WireGuard, using generic
56595 ++ * netlink, with family WG_GENL_NAME and version WG_GENL_VERSION. It defines two
56596 ++ * methods: get and set. Note that while they share many common attributes,
56597 ++ * these two functions actually accept a slightly different set of inputs and
56598 ++ * outputs.
56599 ++ *
56600 ++ * WG_CMD_GET_DEVICE
56601 ++ * -----------------
56602 ++ *
56603 ++ * May only be called via NLM_F_REQUEST | NLM_F_DUMP. The command should contain
56604 ++ * one but not both of:
56605 ++ *
56606 ++ * WGDEVICE_A_IFINDEX: NLA_U32
56607 ++ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1
56608 ++ *
56609 ++ * The kernel will then return several messages (NLM_F_MULTI) containing the
56610 ++ * following tree of nested items:
56611 ++ *
56612 ++ * WGDEVICE_A_IFINDEX: NLA_U32
56613 ++ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1
56614 ++ * WGDEVICE_A_PRIVATE_KEY: NLA_EXACT_LEN, len WG_KEY_LEN
56615 ++ * WGDEVICE_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN
56616 ++ * WGDEVICE_A_LISTEN_PORT: NLA_U16
56617 ++ * WGDEVICE_A_FWMARK: NLA_U32
56618 ++ * WGDEVICE_A_PEERS: NLA_NESTED
56619 ++ * 0: NLA_NESTED
56620 ++ * WGPEER_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN
56621 ++ * WGPEER_A_PRESHARED_KEY: NLA_EXACT_LEN, len WG_KEY_LEN
56622 ++ * WGPEER_A_ENDPOINT: NLA_MIN_LEN(struct sockaddr), struct sockaddr_in or struct sockaddr_in6
56623 ++ * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16
56624 ++ * WGPEER_A_LAST_HANDSHAKE_TIME: NLA_EXACT_LEN, struct __kernel_timespec
56625 ++ * WGPEER_A_RX_BYTES: NLA_U64
56626 ++ * WGPEER_A_TX_BYTES: NLA_U64
56627 ++ * WGPEER_A_ALLOWEDIPS: NLA_NESTED
56628 ++ * 0: NLA_NESTED
56629 ++ * WGALLOWEDIP_A_FAMILY: NLA_U16
56630 ++ * WGALLOWEDIP_A_IPADDR: NLA_MIN_LEN(struct in_addr), struct in_addr or struct in6_addr
56631 ++ * WGALLOWEDIP_A_CIDR_MASK: NLA_U8
56632 ++ * 0: NLA_NESTED
56633 ++ * ...
56634 ++ * 0: NLA_NESTED
56635 ++ * ...
56636 ++ * ...
56637 ++ * WGPEER_A_PROTOCOL_VERSION: NLA_U32
56638 ++ * 0: NLA_NESTED
56639 ++ * ...
56640 ++ * ...
56641 ++ *
56642 ++ * It is possible that all of the allowed IPs of a single peer will not
56643 ++ * fit within a single netlink message. In that case, the same peer will
56644 ++ * be written in the following message, except it will only contain
56645 ++ * WGPEER_A_PUBLIC_KEY and WGPEER_A_ALLOWEDIPS. This may occur several
56646 ++ * times in a row for the same peer. It is then up to the receiver to
56647 ++ * coalesce adjacent peers. Likewise, it is possible that all peers will
56648 ++ * not fit within a single message. So, subsequent peers will be sent
56649 ++ * in following messages, except those will only contain WGDEVICE_A_IFNAME
56650 ++ * and WGDEVICE_A_PEERS. It is then up to the receiver to coalesce these
56651 ++ * messages to form the complete list of peers.
56652 ++ *
56653 ++ * Since this is an NLA_F_DUMP command, the final message will always be
56654 ++ * NLMSG_DONE, even if an error occurs. However, this NLMSG_DONE message
56655 ++ * contains an integer error code. It is either zero or a negative error
56656 ++ * code corresponding to the errno.
56657 ++ *
56658 ++ * WG_CMD_SET_DEVICE
56659 ++ * -----------------
56660 ++ *
56661 ++ * May only be called via NLM_F_REQUEST. The command should contain the
56662 ++ * following tree of nested items, containing one but not both of
56663 ++ * WGDEVICE_A_IFINDEX and WGDEVICE_A_IFNAME:
56664 ++ *
56665 ++ * WGDEVICE_A_IFINDEX: NLA_U32
56666 ++ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1
56667 ++ * WGDEVICE_A_FLAGS: NLA_U32, 0 or WGDEVICE_F_REPLACE_PEERS if all current
56668 ++ * peers should be removed prior to adding the list below.
56669 ++ * WGDEVICE_A_PRIVATE_KEY: len WG_KEY_LEN, all zeros to remove
56670 ++ * WGDEVICE_A_LISTEN_PORT: NLA_U16, 0 to choose randomly
56671 ++ * WGDEVICE_A_FWMARK: NLA_U32, 0 to disable
56672 ++ * WGDEVICE_A_PEERS: NLA_NESTED
56673 ++ * 0: NLA_NESTED
56674 ++ * WGPEER_A_PUBLIC_KEY: len WG_KEY_LEN
56675 ++ * WGPEER_A_FLAGS: NLA_U32, 0 and/or WGPEER_F_REMOVE_ME if the
56676 ++ * specified peer should not exist at the end of the
56677 ++ * operation, rather than added/updated and/or
56678 ++ * WGPEER_F_REPLACE_ALLOWEDIPS if all current allowed
56679 ++ * IPs of this peer should be removed prior to adding
56680 ++ * the list below and/or WGPEER_F_UPDATE_ONLY if the
56681 ++ * peer should only be set if it already exists.
56682 ++ * WGPEER_A_PRESHARED_KEY: len WG_KEY_LEN, all zeros to remove
56683 ++ * WGPEER_A_ENDPOINT: struct sockaddr_in or struct sockaddr_in6
56684 ++ * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16, 0 to disable
56685 ++ * WGPEER_A_ALLOWEDIPS: NLA_NESTED
56686 ++ * 0: NLA_NESTED
56687 ++ * WGALLOWEDIP_A_FAMILY: NLA_U16
56688 ++ * WGALLOWEDIP_A_IPADDR: struct in_addr or struct in6_addr
56689 ++ * WGALLOWEDIP_A_CIDR_MASK: NLA_U8
56690 ++ * 0: NLA_NESTED
56691 ++ * ...
56692 ++ * 0: NLA_NESTED
56693 ++ * ...
56694 ++ * ...
56695 ++ * WGPEER_A_PROTOCOL_VERSION: NLA_U32, should not be set or used at
56696 ++ * all by most users of this API, as the
56697 ++ * most recent protocol will be used when
56698 ++ * this is unset. Otherwise, must be set
56699 ++ * to 1.
56700 ++ * 0: NLA_NESTED
56701 ++ * ...
56702 ++ * ...
56703 ++ *
56704 ++ * It is possible that the amount of configuration data exceeds that of
56705 ++ * the maximum message length accepted by the kernel. In that case, several
56706 ++ * messages should be sent one after another, with each successive one
56707 ++ * filling in information not contained in the prior. Note that if
56708 ++ * WGDEVICE_F_REPLACE_PEERS is specified in the first message, it probably
56709 ++ * should not be specified in fragments that come after, so that the list
56710 ++ * of peers is only cleared the first time but appened after. Likewise for
56711 ++ * peers, if WGPEER_F_REPLACE_ALLOWEDIPS is specified in the first message
56712 ++ * of a peer, it likely should not be specified in subsequent fragments.
56713 ++ *
56714 ++ * If an error occurs, NLMSG_ERROR will reply containing an errno.
56715 ++ */
56716 ++
56717 ++#ifndef _WG_UAPI_WIREGUARD_H
56718 ++#define _WG_UAPI_WIREGUARD_H
56719 ++
56720 ++#define WG_GENL_NAME "wireguard"
56721 ++#define WG_GENL_VERSION 1
56722 ++
56723 ++#define WG_KEY_LEN 32
56724 ++
56725 ++enum wg_cmd {
56726 ++ WG_CMD_GET_DEVICE,
56727 ++ WG_CMD_SET_DEVICE,
56728 ++ __WG_CMD_MAX
56729 ++};
56730 ++#define WG_CMD_MAX (__WG_CMD_MAX - 1)
56731 ++
56732 ++enum wgdevice_flag {
56733 ++ WGDEVICE_F_REPLACE_PEERS = 1U << 0,
56734 ++ __WGDEVICE_F_ALL = WGDEVICE_F_REPLACE_PEERS
56735 ++};
56736 ++enum wgdevice_attribute {
56737 ++ WGDEVICE_A_UNSPEC,
56738 ++ WGDEVICE_A_IFINDEX,
56739 ++ WGDEVICE_A_IFNAME,
56740 ++ WGDEVICE_A_PRIVATE_KEY,
56741 ++ WGDEVICE_A_PUBLIC_KEY,
56742 ++ WGDEVICE_A_FLAGS,
56743 ++ WGDEVICE_A_LISTEN_PORT,
56744 ++ WGDEVICE_A_FWMARK,
56745 ++ WGDEVICE_A_PEERS,
56746 ++ __WGDEVICE_A_LAST
56747 ++};
56748 ++#define WGDEVICE_A_MAX (__WGDEVICE_A_LAST - 1)
56749 ++
56750 ++enum wgpeer_flag {
56751 ++ WGPEER_F_REMOVE_ME = 1U << 0,
56752 ++ WGPEER_F_REPLACE_ALLOWEDIPS = 1U << 1,
56753 ++ WGPEER_F_UPDATE_ONLY = 1U << 2,
56754 ++ __WGPEER_F_ALL = WGPEER_F_REMOVE_ME | WGPEER_F_REPLACE_ALLOWEDIPS |
56755 ++ WGPEER_F_UPDATE_ONLY
56756 ++};
56757 ++enum wgpeer_attribute {
56758 ++ WGPEER_A_UNSPEC,
56759 ++ WGPEER_A_PUBLIC_KEY,
56760 ++ WGPEER_A_PRESHARED_KEY,
56761 ++ WGPEER_A_FLAGS,
56762 ++ WGPEER_A_ENDPOINT,
56763 ++ WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL,
56764 ++ WGPEER_A_LAST_HANDSHAKE_TIME,
56765 ++ WGPEER_A_RX_BYTES,
56766 ++ WGPEER_A_TX_BYTES,
56767 ++ WGPEER_A_ALLOWEDIPS,
56768 ++ WGPEER_A_PROTOCOL_VERSION,
56769 ++ __WGPEER_A_LAST
56770 ++};
56771 ++#define WGPEER_A_MAX (__WGPEER_A_LAST - 1)
56772 ++
56773 ++enum wgallowedip_attribute {
56774 ++ WGALLOWEDIP_A_UNSPEC,
56775 ++ WGALLOWEDIP_A_FAMILY,
56776 ++ WGALLOWEDIP_A_IPADDR,
56777 ++ WGALLOWEDIP_A_CIDR_MASK,
56778 ++ __WGALLOWEDIP_A_LAST
56779 ++};
56780 ++#define WGALLOWEDIP_A_MAX (__WGALLOWEDIP_A_LAST - 1)
56781 ++
56782 ++#endif /* _WG_UAPI_WIREGUARD_H */
56783 +diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
56784 +new file mode 100755
56785 +index 000000000000..e7310d9390f7
56786 +--- /dev/null
56787 ++++ b/tools/testing/selftests/wireguard/netns.sh
56788 +@@ -0,0 +1,537 @@
56789 ++#!/bin/bash
56790 ++# SPDX-License-Identifier: GPL-2.0
56791 ++#
56792 ++# Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
56793 ++#
56794 ++# This script tests the below topology:
56795 ++#
56796 ++# ┌─────────────────────┐ ┌──────────────────────────────────┐ ┌─────────────────────┐
56797 ++# │ $ns1 namespace │ │ $ns0 namespace │ │ $ns2 namespace │
56798 ++# │ │ │ │ │ │
56799 ++# │┌────────┐ │ │ ┌────────┐ │ │ ┌────────┐│
56800 ++# ││ wg0 │───────────┼───┼────────────│ lo │────────────┼───┼───────────│ wg0 ││
56801 ++# │├────────┴──────────┐│ │ ┌───────┴────────┴────────┐ │ │┌──────────┴────────┤│
56802 ++# ││192.168.241.1/24 ││ │ │(ns1) (ns2) │ │ ││192.168.241.2/24 ││
56803 ++# ││fd00::1/24 ││ │ │127.0.0.1:1 127.0.0.1:2│ │ ││fd00::2/24 ││
56804 ++# │└───────────────────┘│ │ │[::]:1 [::]:2 │ │ │└───────────────────┘│
56805 ++# └─────────────────────┘ │ └─────────────────────────┘ │ └─────────────────────┘
56806 ++# └──────────────────────────────────┘
56807 ++#
56808 ++# After the topology is prepared we run a series of TCP/UDP iperf3 tests between the
56809 ++# wireguard peers in $ns1 and $ns2. Note that $ns0 is the endpoint for the wg0
56810 ++# interfaces in $ns1 and $ns2. See https://www.wireguard.com/netns/ for further
56811 ++# details on how this is accomplished.
56812 ++set -e
56813 ++
56814 ++exec 3>&1
56815 ++export WG_HIDE_KEYS=never
56816 ++netns0="wg-test-$$-0"
56817 ++netns1="wg-test-$$-1"
56818 ++netns2="wg-test-$$-2"
56819 ++pretty() { echo -e "\x1b[32m\x1b[1m[+] ${1:+NS$1: }${2}\x1b[0m" >&3; }
56820 ++pp() { pretty "" "$*"; "$@"; }
56821 ++maybe_exec() { if [[ $BASHPID -eq $$ ]]; then "$@"; else exec "$@"; fi; }
56822 ++n0() { pretty 0 "$*"; maybe_exec ip netns exec $netns0 "$@"; }
56823 ++n1() { pretty 1 "$*"; maybe_exec ip netns exec $netns1 "$@"; }
56824 ++n2() { pretty 2 "$*"; maybe_exec ip netns exec $netns2 "$@"; }
56825 ++ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; }
56826 ++ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; }
56827 ++ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; }
56828 ++sleep() { read -t "$1" -N 0 || true; }
56829 ++waitiperf() { pretty "${1//*-}" "wait for iperf:5201"; while [[ $(ss -N "$1" -tlp 'sport = 5201') != *iperf3* ]]; do sleep 0.1; done; }
56830 ++waitncatudp() { pretty "${1//*-}" "wait for udp:1111"; while [[ $(ss -N "$1" -ulp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; }
56831 ++waitncattcp() { pretty "${1//*-}" "wait for tcp:1111"; while [[ $(ss -N "$1" -tlp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; }
56832 ++waitiface() { pretty "${1//*-}" "wait for $2 to come up"; ip netns exec "$1" bash -c "while [[ \$(< \"/sys/class/net/$2/operstate\") != up ]]; do read -t .1 -N 0 || true; done;"; }
56833 ++
56834 ++cleanup() {
56835 ++ set +e
56836 ++ exec 2>/dev/null
56837 ++ printf "$orig_message_cost" > /proc/sys/net/core/message_cost
56838 ++ ip0 link del dev wg0
56839 ++ ip1 link del dev wg0
56840 ++ ip2 link del dev wg0
56841 ++ local to_kill="$(ip netns pids $netns0) $(ip netns pids $netns1) $(ip netns pids $netns2)"
56842 ++ [[ -n $to_kill ]] && kill $to_kill
56843 ++ pp ip netns del $netns1
56844 ++ pp ip netns del $netns2
56845 ++ pp ip netns del $netns0
56846 ++ exit
56847 ++}
56848 ++
56849 ++orig_message_cost="$(< /proc/sys/net/core/message_cost)"
56850 ++trap cleanup EXIT
56851 ++printf 0 > /proc/sys/net/core/message_cost
56852 ++
56853 ++ip netns del $netns0 2>/dev/null || true
56854 ++ip netns del $netns1 2>/dev/null || true
56855 ++ip netns del $netns2 2>/dev/null || true
56856 ++pp ip netns add $netns0
56857 ++pp ip netns add $netns1
56858 ++pp ip netns add $netns2
56859 ++ip0 link set up dev lo
56860 ++
56861 ++ip0 link add dev wg0 type wireguard
56862 ++ip0 link set wg0 netns $netns1
56863 ++ip0 link add dev wg0 type wireguard
56864 ++ip0 link set wg0 netns $netns2
56865 ++key1="$(pp wg genkey)"
56866 ++key2="$(pp wg genkey)"
56867 ++key3="$(pp wg genkey)"
56868 ++pub1="$(pp wg pubkey <<<"$key1")"
56869 ++pub2="$(pp wg pubkey <<<"$key2")"
56870 ++pub3="$(pp wg pubkey <<<"$key3")"
56871 ++psk="$(pp wg genpsk)"
56872 ++[[ -n $key1 && -n $key2 && -n $psk ]]
56873 ++
56874 ++configure_peers() {
56875 ++ ip1 addr add 192.168.241.1/24 dev wg0
56876 ++ ip1 addr add fd00::1/24 dev wg0
56877 ++
56878 ++ ip2 addr add 192.168.241.2/24 dev wg0
56879 ++ ip2 addr add fd00::2/24 dev wg0
56880 ++
56881 ++ n1 wg set wg0 \
56882 ++ private-key <(echo "$key1") \
56883 ++ listen-port 1 \
56884 ++ peer "$pub2" \
56885 ++ preshared-key <(echo "$psk") \
56886 ++ allowed-ips 192.168.241.2/32,fd00::2/128
56887 ++ n2 wg set wg0 \
56888 ++ private-key <(echo "$key2") \
56889 ++ listen-port 2 \
56890 ++ peer "$pub1" \
56891 ++ preshared-key <(echo "$psk") \
56892 ++ allowed-ips 192.168.241.1/32,fd00::1/128
56893 ++
56894 ++ ip1 link set up dev wg0
56895 ++ ip2 link set up dev wg0
56896 ++}
56897 ++configure_peers
56898 ++
56899 ++tests() {
56900 ++ # Ping over IPv4
56901 ++ n2 ping -c 10 -f -W 1 192.168.241.1
56902 ++ n1 ping -c 10 -f -W 1 192.168.241.2
56903 ++
56904 ++ # Ping over IPv6
56905 ++ n2 ping6 -c 10 -f -W 1 fd00::1
56906 ++ n1 ping6 -c 10 -f -W 1 fd00::2
56907 ++
56908 ++ # TCP over IPv4
56909 ++ n2 iperf3 -s -1 -B 192.168.241.2 &
56910 ++ waitiperf $netns2
56911 ++ n1 iperf3 -Z -t 3 -c 192.168.241.2
56912 ++
56913 ++ # TCP over IPv6
56914 ++ n1 iperf3 -s -1 -B fd00::1 &
56915 ++ waitiperf $netns1
56916 ++ n2 iperf3 -Z -t 3 -c fd00::1
56917 ++
56918 ++ # UDP over IPv4
56919 ++ n1 iperf3 -s -1 -B 192.168.241.1 &
56920 ++ waitiperf $netns1
56921 ++ n2 iperf3 -Z -t 3 -b 0 -u -c 192.168.241.1
56922 ++
56923 ++ # UDP over IPv6
56924 ++ n2 iperf3 -s -1 -B fd00::2 &
56925 ++ waitiperf $netns2
56926 ++ n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2
56927 ++}
56928 ++
56929 ++[[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}"
56930 ++big_mtu=$(( 34816 - 1500 + $orig_mtu ))
56931 ++
56932 ++# Test using IPv4 as outer transport
56933 ++n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2
56934 ++n2 wg set wg0 peer "$pub1" endpoint 127.0.0.1:1
56935 ++# Before calling tests, we first make sure that the stats counters and timestamper are working
56936 ++n2 ping -c 10 -f -W 1 192.168.241.1
56937 ++{ read _; read _; read _; read rx_bytes _; read _; read tx_bytes _; } < <(ip2 -stats link show dev wg0)
56938 ++(( rx_bytes == 1372 && (tx_bytes == 1428 || tx_bytes == 1460) ))
56939 ++{ read _; read _; read _; read rx_bytes _; read _; read tx_bytes _; } < <(ip1 -stats link show dev wg0)
56940 ++(( tx_bytes == 1372 && (rx_bytes == 1428 || rx_bytes == 1460) ))
56941 ++read _ rx_bytes tx_bytes < <(n2 wg show wg0 transfer)
56942 ++(( rx_bytes == 1372 && (tx_bytes == 1428 || tx_bytes == 1460) ))
56943 ++read _ rx_bytes tx_bytes < <(n1 wg show wg0 transfer)
56944 ++(( tx_bytes == 1372 && (rx_bytes == 1428 || rx_bytes == 1460) ))
56945 ++read _ timestamp < <(n1 wg show wg0 latest-handshakes)
56946 ++(( timestamp != 0 ))
56947 ++
56948 ++tests
56949 ++ip1 link set wg0 mtu $big_mtu
56950 ++ip2 link set wg0 mtu $big_mtu
56951 ++tests
56952 ++
56953 ++ip1 link set wg0 mtu $orig_mtu
56954 ++ip2 link set wg0 mtu $orig_mtu
56955 ++
56956 ++# Test using IPv6 as outer transport
56957 ++n1 wg set wg0 peer "$pub2" endpoint [::1]:2
56958 ++n2 wg set wg0 peer "$pub1" endpoint [::1]:1
56959 ++tests
56960 ++ip1 link set wg0 mtu $big_mtu
56961 ++ip2 link set wg0 mtu $big_mtu
56962 ++tests
56963 ++
56964 ++# Test that route MTUs work with the padding
56965 ++ip1 link set wg0 mtu 1300
56966 ++ip2 link set wg0 mtu 1300
56967 ++n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2
56968 ++n2 wg set wg0 peer "$pub1" endpoint 127.0.0.1:1
56969 ++n0 iptables -A INPUT -m length --length 1360 -j DROP
56970 ++n1 ip route add 192.168.241.2/32 dev wg0 mtu 1299
56971 ++n2 ip route add 192.168.241.1/32 dev wg0 mtu 1299
56972 ++n2 ping -c 1 -W 1 -s 1269 192.168.241.1
56973 ++n2 ip route delete 192.168.241.1/32 dev wg0 mtu 1299
56974 ++n1 ip route delete 192.168.241.2/32 dev wg0 mtu 1299
56975 ++n0 iptables -F INPUT
56976 ++
56977 ++ip1 link set wg0 mtu $orig_mtu
56978 ++ip2 link set wg0 mtu $orig_mtu
56979 ++
56980 ++# Test using IPv4 that roaming works
56981 ++ip0 -4 addr del 127.0.0.1/8 dev lo
56982 ++ip0 -4 addr add 127.212.121.99/8 dev lo
56983 ++n1 wg set wg0 listen-port 9999
56984 ++n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2
56985 ++n1 ping6 -W 1 -c 1 fd00::2
56986 ++[[ $(n2 wg show wg0 endpoints) == "$pub1 127.212.121.99:9999" ]]
56987 ++
56988 ++# Test using IPv6 that roaming works
56989 ++n1 wg set wg0 listen-port 9998
56990 ++n1 wg set wg0 peer "$pub2" endpoint [::1]:2
56991 ++n1 ping -W 1 -c 1 192.168.241.2
56992 ++[[ $(n2 wg show wg0 endpoints) == "$pub1 [::1]:9998" ]]
56993 ++
56994 ++# Test that crypto-RP filter works
56995 ++n1 wg set wg0 peer "$pub2" allowed-ips 192.168.241.0/24
56996 ++exec 4< <(n1 ncat -l -u -p 1111)
56997 ++ncat_pid=$!
56998 ++waitncatudp $netns1
56999 ++n2 ncat -u 192.168.241.1 1111 <<<"X"
57000 ++read -r -N 1 -t 1 out <&4 && [[ $out == "X" ]]
57001 ++kill $ncat_pid
57002 ++more_specific_key="$(pp wg genkey | pp wg pubkey)"
57003 ++n1 wg set wg0 peer "$more_specific_key" allowed-ips 192.168.241.2/32
57004 ++n2 wg set wg0 listen-port 9997
57005 ++exec 4< <(n1 ncat -l -u -p 1111)
57006 ++ncat_pid=$!
57007 ++waitncatudp $netns1
57008 ++n2 ncat -u 192.168.241.1 1111 <<<"X"
57009 ++! read -r -N 1 -t 1 out <&4 || false
57010 ++kill $ncat_pid
57011 ++n1 wg set wg0 peer "$more_specific_key" remove
57012 ++[[ $(n1 wg show wg0 endpoints) == "$pub2 [::1]:9997" ]]
57013 ++
57014 ++# Test that we can change private keys keys and immediately handshake
57015 ++n1 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk") allowed-ips 192.168.241.2/32 endpoint 127.0.0.1:2
57016 ++n2 wg set wg0 private-key <(echo "$key2") listen-port 2 peer "$pub1" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32
57017 ++n1 ping -W 1 -c 1 192.168.241.2
57018 ++n1 wg set wg0 private-key <(echo "$key3")
57019 ++n2 wg set wg0 peer "$pub3" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32 peer "$pub1" remove
57020 ++n1 ping -W 1 -c 1 192.168.241.2
57021 ++
57022 ++ip1 link del wg0
57023 ++ip2 link del wg0
57024 ++
57025 ++# Test using NAT. We now change the topology to this:
57026 ++# ┌────────────────────────────────────────┐ ┌────────────────────────────────────────────────┐ ┌────────────────────────────────────────┐
57027 ++# │ $ns1 namespace │ │ $ns0 namespace │ │ $ns2 namespace │
57028 ++# │ │ │ │ │ │
57029 ++# │ ┌─────┐ ┌─────┐ │ │ ┌──────┐ ┌──────┐ │ │ ┌─────┐ ┌─────┐ │
57030 ++# │ │ wg0 │─────────────│vethc│───────────┼────┼────│vethrc│ │vethrs│──────────────┼─────┼──│veths│────────────│ wg0 │ │
57031 ++# │ ├─────┴──────────┐ ├─────┴──────────┐│ │ ├──────┴─────────┐ ├──────┴────────────┐ │ │ ├─────┴──────────┐ ├─────┴──────────┐ │
57032 ++# │ │192.168.241.1/24│ │192.168.1.100/24││ │ │192.168.1.1/24 │ │10.0.0.1/24 │ │ │ │10.0.0.100/24 │ │192.168.241.2/24│ │
57033 ++# │ │fd00::1/24 │ │ ││ │ │ │ │SNAT:192.168.1.0/24│ │ │ │ │ │fd00::2/24 │ │
57034 ++# │ └────────────────┘ └────────────────┘│ │ └────────────────┘ └───────────────────┘ │ │ └────────────────┘ └────────────────┘ │
57035 ++# └────────────────────────────────────────┘ └────────────────────────────────────────────────┘ └────────────────────────────────────────┘
57036 ++
57037 ++ip1 link add dev wg0 type wireguard
57038 ++ip2 link add dev wg0 type wireguard
57039 ++configure_peers
57040 ++
57041 ++ip0 link add vethrc type veth peer name vethc
57042 ++ip0 link add vethrs type veth peer name veths
57043 ++ip0 link set vethc netns $netns1
57044 ++ip0 link set veths netns $netns2
57045 ++ip0 link set vethrc up
57046 ++ip0 link set vethrs up
57047 ++ip0 addr add 192.168.1.1/24 dev vethrc
57048 ++ip0 addr add 10.0.0.1/24 dev vethrs
57049 ++ip1 addr add 192.168.1.100/24 dev vethc
57050 ++ip1 link set vethc up
57051 ++ip1 route add default via 192.168.1.1
57052 ++ip2 addr add 10.0.0.100/24 dev veths
57053 ++ip2 link set veths up
57054 ++waitiface $netns0 vethrc
57055 ++waitiface $netns0 vethrs
57056 ++waitiface $netns1 vethc
57057 ++waitiface $netns2 veths
57058 ++
57059 ++n0 bash -c 'printf 1 > /proc/sys/net/ipv4/ip_forward'
57060 ++n0 bash -c 'printf 2 > /proc/sys/net/netfilter/nf_conntrack_udp_timeout'
57061 ++n0 bash -c 'printf 2 > /proc/sys/net/netfilter/nf_conntrack_udp_timeout_stream'
57062 ++n0 iptables -t nat -A POSTROUTING -s 192.168.1.0/24 -d 10.0.0.0/24 -j SNAT --to 10.0.0.1
57063 ++
57064 ++n1 wg set wg0 peer "$pub2" endpoint 10.0.0.100:2 persistent-keepalive 1
57065 ++n1 ping -W 1 -c 1 192.168.241.2
57066 ++n2 ping -W 1 -c 1 192.168.241.1
57067 ++[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]]
57068 ++# Demonstrate n2 can still send packets to n1, since persistent-keepalive will prevent connection tracking entry from expiring (to see entries: `n0 conntrack -L`).
57069 ++pp sleep 3
57070 ++n2 ping -W 1 -c 1 192.168.241.1
57071 ++n1 wg set wg0 peer "$pub2" persistent-keepalive 0
57072 ++
57073 ++# Do a wg-quick(8)-style policy routing for the default route, making sure vethc has a v6 address to tease out bugs.
57074 ++ip1 -6 addr add fc00::9/96 dev vethc
57075 ++ip1 -6 route add default via fc00::1
57076 ++ip2 -4 addr add 192.168.99.7/32 dev wg0
57077 ++ip2 -6 addr add abab::1111/128 dev wg0
57078 ++n1 wg set wg0 fwmark 51820 peer "$pub2" allowed-ips 192.168.99.7,abab::1111
57079 ++ip1 -6 route add default dev wg0 table 51820
57080 ++ip1 -6 rule add not fwmark 51820 table 51820
57081 ++ip1 -6 rule add table main suppress_prefixlength 0
57082 ++ip1 -4 route add default dev wg0 table 51820
57083 ++ip1 -4 rule add not fwmark 51820 table 51820
57084 ++ip1 -4 rule add table main suppress_prefixlength 0
57085 ++# suppress_prefixlength only got added in 3.12, and we want to support 3.10+.
57086 ++if [[ $(ip1 -4 rule show all) == *suppress_prefixlength* ]]; then
57087 ++ # Flood the pings instead of sending just one, to trigger routing table reference counting bugs.
57088 ++ n1 ping -W 1 -c 100 -f 192.168.99.7
57089 ++ n1 ping -W 1 -c 100 -f abab::1111
57090 ++fi
57091 ++
57092 ++n0 iptables -t nat -F
57093 ++ip0 link del vethrc
57094 ++ip0 link del vethrs
57095 ++ip1 link del wg0
57096 ++ip2 link del wg0
57097 ++
57098 ++# Test that saddr routing is sticky but not too sticky, changing to this topology:
57099 ++# ┌────────────────────────────────────────┐ ┌────────────────────────────────────────┐
57100 ++# │ $ns1 namespace │ │ $ns2 namespace │
57101 ++# │ │ │ │
57102 ++# │ ┌─────┐ ┌─────┐ │ │ ┌─────┐ ┌─────┐ │
57103 ++# │ │ wg0 │─────────────│veth1│───────────┼────┼──│veth2│────────────│ wg0 │ │
57104 ++# │ ├─────┴──────────┐ ├─────┴──────────┐│ │ ├─────┴──────────┐ ├─────┴──────────┐ │
57105 ++# │ │192.168.241.1/24│ │10.0.0.1/24 ││ │ │10.0.0.2/24 │ │192.168.241.2/24│ │
57106 ++# │ │fd00::1/24 │ │fd00:aa::1/96 ││ │ │fd00:aa::2/96 │ │fd00::2/24 │ │
57107 ++# │ └────────────────┘ └────────────────┘│ │ └────────────────┘ └────────────────┘ │
57108 ++# └────────────────────────────────────────┘ └────────────────────────────────────────┘
57109 ++
57110 ++ip1 link add dev wg0 type wireguard
57111 ++ip2 link add dev wg0 type wireguard
57112 ++configure_peers
57113 ++ip1 link add veth1 type veth peer name veth2
57114 ++ip1 link set veth2 netns $netns2
57115 ++n1 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/all/accept_dad'
57116 ++n2 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/all/accept_dad'
57117 ++n1 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/veth1/accept_dad'
57118 ++n2 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/veth2/accept_dad'
57119 ++n1 bash -c 'printf 1 > /proc/sys/net/ipv4/conf/veth1/promote_secondaries'
57120 ++
57121 ++# First we check that we aren't overly sticky and can fall over to new IPs when old ones are removed
57122 ++ip1 addr add 10.0.0.1/24 dev veth1
57123 ++ip1 addr add fd00:aa::1/96 dev veth1
57124 ++ip2 addr add 10.0.0.2/24 dev veth2
57125 ++ip2 addr add fd00:aa::2/96 dev veth2
57126 ++ip1 link set veth1 up
57127 ++ip2 link set veth2 up
57128 ++waitiface $netns1 veth1
57129 ++waitiface $netns2 veth2
57130 ++n1 wg set wg0 peer "$pub2" endpoint 10.0.0.2:2
57131 ++n1 ping -W 1 -c 1 192.168.241.2
57132 ++ip1 addr add 10.0.0.10/24 dev veth1
57133 ++ip1 addr del 10.0.0.1/24 dev veth1
57134 ++n1 ping -W 1 -c 1 192.168.241.2
57135 ++n1 wg set wg0 peer "$pub2" endpoint [fd00:aa::2]:2
57136 ++n1 ping -W 1 -c 1 192.168.241.2
57137 ++ip1 addr add fd00:aa::10/96 dev veth1
57138 ++ip1 addr del fd00:aa::1/96 dev veth1
57139 ++n1 ping -W 1 -c 1 192.168.241.2
57140 ++
57141 ++# Now we show that we can successfully do reply to sender routing
57142 ++ip1 link set veth1 down
57143 ++ip2 link set veth2 down
57144 ++ip1 addr flush dev veth1
57145 ++ip2 addr flush dev veth2
57146 ++ip1 addr add 10.0.0.1/24 dev veth1
57147 ++ip1 addr add 10.0.0.2/24 dev veth1
57148 ++ip1 addr add fd00:aa::1/96 dev veth1
57149 ++ip1 addr add fd00:aa::2/96 dev veth1
57150 ++ip2 addr add 10.0.0.3/24 dev veth2
57151 ++ip2 addr add fd00:aa::3/96 dev veth2
57152 ++ip1 link set veth1 up
57153 ++ip2 link set veth2 up
57154 ++waitiface $netns1 veth1
57155 ++waitiface $netns2 veth2
57156 ++n2 wg set wg0 peer "$pub1" endpoint 10.0.0.1:1
57157 ++n2 ping -W 1 -c 1 192.168.241.1
57158 ++[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]]
57159 ++n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::1]:1
57160 ++n2 ping -W 1 -c 1 192.168.241.1
57161 ++[[ $(n2 wg show wg0 endpoints) == "$pub1 [fd00:aa::1]:1" ]]
57162 ++n2 wg set wg0 peer "$pub1" endpoint 10.0.0.2:1
57163 ++n2 ping -W 1 -c 1 192.168.241.1
57164 ++[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.2:1" ]]
57165 ++n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::2]:1
57166 ++n2 ping -W 1 -c 1 192.168.241.1
57167 ++[[ $(n2 wg show wg0 endpoints) == "$pub1 [fd00:aa::2]:1" ]]
57168 ++
57169 ++# What happens if the inbound destination address belongs to a different interface as the default route?
57170 ++ip1 link add dummy0 type dummy
57171 ++ip1 addr add 10.50.0.1/24 dev dummy0
57172 ++ip1 link set dummy0 up
57173 ++ip2 route add 10.50.0.0/24 dev veth2
57174 ++n2 wg set wg0 peer "$pub1" endpoint 10.50.0.1:1
57175 ++n2 ping -W 1 -c 1 192.168.241.1
57176 ++[[ $(n2 wg show wg0 endpoints) == "$pub1 10.50.0.1:1" ]]
57177 ++
57178 ++ip1 link del dummy0
57179 ++ip1 addr flush dev veth1
57180 ++ip2 addr flush dev veth2
57181 ++ip1 route flush dev veth1
57182 ++ip2 route flush dev veth2
57183 ++
57184 ++# Now we see what happens if another interface route takes precedence over an ongoing one
57185 ++ip1 link add veth3 type veth peer name veth4
57186 ++ip1 link set veth4 netns $netns2
57187 ++ip1 addr add 10.0.0.1/24 dev veth1
57188 ++ip2 addr add 10.0.0.2/24 dev veth2
57189 ++ip1 addr add 10.0.0.3/24 dev veth3
57190 ++ip1 link set veth1 up
57191 ++ip2 link set veth2 up
57192 ++ip1 link set veth3 up
57193 ++ip2 link set veth4 up
57194 ++waitiface $netns1 veth1
57195 ++waitiface $netns2 veth2
57196 ++waitiface $netns1 veth3
57197 ++waitiface $netns2 veth4
57198 ++ip1 route flush dev veth1
57199 ++ip1 route flush dev veth3
57200 ++ip1 route add 10.0.0.0/24 dev veth1 src 10.0.0.1 metric 2
57201 ++n1 wg set wg0 peer "$pub2" endpoint 10.0.0.2:2
57202 ++n1 ping -W 1 -c 1 192.168.241.2
57203 ++[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]]
57204 ++ip1 route add 10.0.0.0/24 dev veth3 src 10.0.0.3 metric 1
57205 ++n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/veth1/rp_filter'
57206 ++n2 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/veth4/rp_filter'
57207 ++n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/all/rp_filter'
57208 ++n2 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/all/rp_filter'
57209 ++n1 ping -W 1 -c 1 192.168.241.2
57210 ++[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.3:1" ]]
57211 ++
57212 ++ip1 link del veth1
57213 ++ip1 link del veth3
57214 ++ip1 link del wg0
57215 ++ip2 link del wg0
57216 ++
57217 ++# We test that Netlink/IPC is working properly by doing things that usually cause split responses
57218 ++ip0 link add dev wg0 type wireguard
57219 ++config=( "[Interface]" "PrivateKey=$(wg genkey)" "[Peer]" "PublicKey=$(wg genkey)" )
57220 ++for a in {1..255}; do
57221 ++ for b in {0..255}; do
57222 ++ config+=( "AllowedIPs=$a.$b.0.0/16,$a::$b/128" )
57223 ++ done
57224 ++done
57225 ++n0 wg setconf wg0 <(printf '%s\n' "${config[@]}")
57226 ++i=0
57227 ++for ip in $(n0 wg show wg0 allowed-ips); do
57228 ++ ((++i))
57229 ++done
57230 ++((i == 255*256*2+1))
57231 ++ip0 link del wg0
57232 ++ip0 link add dev wg0 type wireguard
57233 ++config=( "[Interface]" "PrivateKey=$(wg genkey)" )
57234 ++for a in {1..40}; do
57235 ++ config+=( "[Peer]" "PublicKey=$(wg genkey)" )
57236 ++ for b in {1..52}; do
57237 ++ config+=( "AllowedIPs=$a.$b.0.0/16" )
57238 ++ done
57239 ++done
57240 ++n0 wg setconf wg0 <(printf '%s\n' "${config[@]}")
57241 ++i=0
57242 ++while read -r line; do
57243 ++ j=0
57244 ++ for ip in $line; do
57245 ++ ((++j))
57246 ++ done
57247 ++ ((j == 53))
57248 ++ ((++i))
57249 ++done < <(n0 wg show wg0 allowed-ips)
57250 ++((i == 40))
57251 ++ip0 link del wg0
57252 ++ip0 link add wg0 type wireguard
57253 ++config=( )
57254 ++for i in {1..29}; do
57255 ++ config+=( "[Peer]" "PublicKey=$(wg genkey)" )
57256 ++done
57257 ++config+=( "[Peer]" "PublicKey=$(wg genkey)" "AllowedIPs=255.2.3.4/32,abcd::255/128" )
57258 ++n0 wg setconf wg0 <(printf '%s\n' "${config[@]}")
57259 ++n0 wg showconf wg0 > /dev/null
57260 ++ip0 link del wg0
57261 ++
57262 ++allowedips=( )
57263 ++for i in {1..197}; do
57264 ++ allowedips+=( abcd::$i )
57265 ++done
57266 ++saved_ifs="$IFS"
57267 ++IFS=,
57268 ++allowedips="${allowedips[*]}"
57269 ++IFS="$saved_ifs"
57270 ++ip0 link add wg0 type wireguard
57271 ++n0 wg set wg0 peer "$pub1"
57272 ++n0 wg set wg0 peer "$pub2" allowed-ips "$allowedips"
57273 ++{
57274 ++ read -r pub allowedips
57275 ++ [[ $pub == "$pub1" && $allowedips == "(none)" ]]
57276 ++ read -r pub allowedips
57277 ++ [[ $pub == "$pub2" ]]
57278 ++ i=0
57279 ++ for _ in $allowedips; do
57280 ++ ((++i))
57281 ++ done
57282 ++ ((i == 197))
57283 ++} < <(n0 wg show wg0 allowed-ips)
57284 ++ip0 link del wg0
57285 ++
57286 ++! n0 wg show doesnotexist || false
57287 ++
57288 ++ip0 link add wg0 type wireguard
57289 ++n0 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk")
57290 ++[[ $(n0 wg show wg0 private-key) == "$key1" ]]
57291 ++[[ $(n0 wg show wg0 preshared-keys) == "$pub2 $psk" ]]
57292 ++n0 wg set wg0 private-key /dev/null peer "$pub2" preshared-key /dev/null
57293 ++[[ $(n0 wg show wg0 private-key) == "(none)" ]]
57294 ++[[ $(n0 wg show wg0 preshared-keys) == "$pub2 (none)" ]]
57295 ++n0 wg set wg0 peer "$pub2"
57296 ++n0 wg set wg0 private-key <(echo "$key2")
57297 ++[[ $(n0 wg show wg0 public-key) == "$pub2" ]]
57298 ++[[ -z $(n0 wg show wg0 peers) ]]
57299 ++n0 wg set wg0 peer "$pub2"
57300 ++[[ -z $(n0 wg show wg0 peers) ]]
57301 ++n0 wg set wg0 private-key <(echo "$key1")
57302 ++n0 wg set wg0 peer "$pub2"
57303 ++[[ $(n0 wg show wg0 peers) == "$pub2" ]]
57304 ++n0 wg set wg0 private-key <(echo "/${key1:1}")
57305 ++[[ $(n0 wg show wg0 private-key) == "+${key1:1}" ]]
57306 ++n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0,10.0.0.0/8,100.0.0.0/10,172.16.0.0/12,192.168.0.0/16
57307 ++n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0
57308 ++n0 wg set wg0 peer "$pub2" allowed-ips ::/0,1700::/111,5000::/4,e000::/37,9000::/75
57309 ++n0 wg set wg0 peer "$pub2" allowed-ips ::/0
57310 ++ip0 link del wg0
57311 ++
57312 ++declare -A objects
57313 ++while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do
57314 ++ [[ $line =~ .*(wg[0-9]+:\ [A-Z][a-z]+\ [0-9]+)\ .*(created|destroyed).* ]] || continue
57315 ++ objects["${BASH_REMATCH[1]}"]+="${BASH_REMATCH[2]}"
57316 ++done < /dev/kmsg
57317 ++alldeleted=1
57318 ++for object in "${!objects[@]}"; do
57319 ++ if [[ ${objects["$object"]} != *createddestroyed ]]; then
57320 ++ echo "Error: $object: merely ${objects["$object"]}" >&3
57321 ++ alldeleted=0
57322 ++ fi
57323 ++done
57324 ++[[ $alldeleted -eq 1 ]]
57325 ++pretty "" "Objects that were created were also destroyed."
57326 +--
57327 +cgit v1.2.3-4-ga26e
57328 +
57329 +
57330 +From c5f784d785b0e9f68a446abf2e9ec31406adb721 Mon Sep 17 00:00:00 2001
57331 +From: "Jason A. Donenfeld" <Jason@×××××.com>
57332 +Date: Sun, 15 Dec 2019 22:08:00 +0100
57333 +Subject: wireguard: selftests: import harness makefile for test suite
57334 +
57335 +commit 65d88d04114bca7d85faebd5fed61069cb2b632c upstream.
57336 +
57337 +WireGuard has been using this on build.wireguard.com for the last
57338 +several years with considerable success. It allows for very quick and
57339 +iterative development cycles, and supports several platforms.
57340 +
57341 +To run the test suite on your current platform in QEMU:
57342 +
57343 + $ make -C tools/testing/selftests/wireguard/qemu -j$(nproc)
57344 +
57345 +To run it with KASAN and such turned on:
57346 +
57347 + $ DEBUG_KERNEL=yes make -C tools/testing/selftests/wireguard/qemu -j$(nproc)
57348 +
57349 +To run it emulated for another platform in QEMU:
57350 +
57351 + $ ARCH=arm make -C tools/testing/selftests/wireguard/qemu -j$(nproc)
57352 +
57353 +At the moment, we support aarch64_be, aarch64, arm, armeb, i686, m68k,
57354 +mips64, mips64el, mips, mipsel, powerpc64le, powerpc, and x86_64.
57355 +
57356 +The system supports incremental rebuilding, so it should be very fast to
57357 +change a single file and then test it out and have immediate feedback.
57358 +
57359 +This requires for the right toolchain and qemu to be installed prior.
57360 +I've had success with those from musl.cc.
57361 +
57362 +This is tailored for WireGuard at the moment, though later projects
57363 +might generalize it for other network testing.
57364 +
57365 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
57366 +Signed-off-by: David S. Miller <davem@×××××××××.net>
57367 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
57368 +---
57369 + tools/testing/selftests/wireguard/qemu/.gitignore | 2 +
57370 + tools/testing/selftests/wireguard/qemu/Makefile | 385 +++++++++++++++++++++
57371 + .../selftests/wireguard/qemu/arch/aarch64.config | 5 +
57372 + .../wireguard/qemu/arch/aarch64_be.config | 6 +
57373 + .../selftests/wireguard/qemu/arch/arm.config | 9 +
57374 + .../selftests/wireguard/qemu/arch/armeb.config | 10 +
57375 + .../selftests/wireguard/qemu/arch/i686.config | 5 +
57376 + .../selftests/wireguard/qemu/arch/m68k.config | 9 +
57377 + .../selftests/wireguard/qemu/arch/mips.config | 11 +
57378 + .../selftests/wireguard/qemu/arch/mips64.config | 14 +
57379 + .../selftests/wireguard/qemu/arch/mips64el.config | 15 +
57380 + .../selftests/wireguard/qemu/arch/mipsel.config | 12 +
57381 + .../selftests/wireguard/qemu/arch/powerpc.config | 10 +
57382 + .../wireguard/qemu/arch/powerpc64le.config | 12 +
57383 + .../selftests/wireguard/qemu/arch/x86_64.config | 5 +
57384 + .../testing/selftests/wireguard/qemu/debug.config | 67 ++++
57385 + tools/testing/selftests/wireguard/qemu/init.c | 284 +++++++++++++++
57386 + .../testing/selftests/wireguard/qemu/kernel.config | 86 +++++
57387 + 18 files changed, 947 insertions(+)
57388 + create mode 100644 tools/testing/selftests/wireguard/qemu/.gitignore
57389 + create mode 100644 tools/testing/selftests/wireguard/qemu/Makefile
57390 + create mode 100644 tools/testing/selftests/wireguard/qemu/arch/aarch64.config
57391 + create mode 100644 tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config
57392 + create mode 100644 tools/testing/selftests/wireguard/qemu/arch/arm.config
57393 + create mode 100644 tools/testing/selftests/wireguard/qemu/arch/armeb.config
57394 + create mode 100644 tools/testing/selftests/wireguard/qemu/arch/i686.config
57395 + create mode 100644 tools/testing/selftests/wireguard/qemu/arch/m68k.config
57396 + create mode 100644 tools/testing/selftests/wireguard/qemu/arch/mips.config
57397 + create mode 100644 tools/testing/selftests/wireguard/qemu/arch/mips64.config
57398 + create mode 100644 tools/testing/selftests/wireguard/qemu/arch/mips64el.config
57399 + create mode 100644 tools/testing/selftests/wireguard/qemu/arch/mipsel.config
57400 + create mode 100644 tools/testing/selftests/wireguard/qemu/arch/powerpc.config
57401 + create mode 100644 tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
57402 + create mode 100644 tools/testing/selftests/wireguard/qemu/arch/x86_64.config
57403 + create mode 100644 tools/testing/selftests/wireguard/qemu/debug.config
57404 + create mode 100644 tools/testing/selftests/wireguard/qemu/init.c
57405 + create mode 100644 tools/testing/selftests/wireguard/qemu/kernel.config
57406 +
57407 +diff --git a/tools/testing/selftests/wireguard/qemu/.gitignore b/tools/testing/selftests/wireguard/qemu/.gitignore
57408 +new file mode 100644
57409 +index 000000000000..415b542a9d59
57410 +--- /dev/null
57411 ++++ b/tools/testing/selftests/wireguard/qemu/.gitignore
57412 +@@ -0,0 +1,2 @@
57413 ++build/
57414 ++distfiles/
57415 +diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
57416 +new file mode 100644
57417 +index 000000000000..6d51bf78eeff
57418 +--- /dev/null
57419 ++++ b/tools/testing/selftests/wireguard/qemu/Makefile
57420 +@@ -0,0 +1,385 @@
57421 ++# SPDX-License-Identifier: GPL-2.0
57422 ++#
57423 ++# Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
57424 ++
57425 ++PWD := $(shell pwd)
57426 ++
57427 ++CHOST := $(shell gcc -dumpmachine)
57428 ++ifneq (,$(ARCH))
57429 ++CBUILD := $(subst -gcc,,$(lastword $(subst /, ,$(firstword $(wildcard $(foreach bindir,$(subst :, ,$(PATH)),$(bindir)/$(ARCH)-*-gcc))))))
57430 ++ifeq (,$(CBUILD))
57431 ++$(error The toolchain for $(ARCH) is not installed)
57432 ++endif
57433 ++else
57434 ++CBUILD := $(CHOST)
57435 ++ARCH := $(firstword $(subst -, ,$(CBUILD)))
57436 ++endif
57437 ++
57438 ++# Set these from the environment to override
57439 ++KERNEL_PATH ?= $(PWD)/../../../../..
57440 ++BUILD_PATH ?= $(PWD)/build/$(ARCH)
57441 ++DISTFILES_PATH ?= $(PWD)/distfiles
57442 ++NR_CPUS ?= 4
57443 ++
57444 ++MIRROR := https://download.wireguard.com/qemu-test/distfiles/
57445 ++
57446 ++default: qemu
57447 ++
57448 ++# variable name, tarball project name, version, tarball extension, default URI base
57449 ++define tar_download =
57450 ++$(1)_VERSION := $(3)
57451 ++$(1)_NAME := $(2)-$$($(1)_VERSION)
57452 ++$(1)_TAR := $(DISTFILES_PATH)/$$($(1)_NAME)$(4)
57453 ++$(1)_PATH := $(BUILD_PATH)/$$($(1)_NAME)
57454 ++$(call file_download,$$($(1)_NAME)$(4),$(5),$(6))
57455 ++endef
57456 ++
57457 ++define file_download =
57458 ++$(DISTFILES_PATH)/$(1):
57459 ++ mkdir -p $(DISTFILES_PATH)
57460 ++ flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -t inf --retry-on-http-error=404 -O $$@.tmp $(2)$(1) || rm -f $$@.tmp'
57461 ++ if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi
57462 ++endef
57463 ++
57464 ++$(eval $(call tar_download,MUSL,musl,1.1.20,.tar.gz,https://www.musl-libc.org/releases/,44be8771d0e6c6b5f82dd15662eb2957c9a3173a19a8b49966ac0542bbd40d61))
57465 ++$(eval $(call tar_download,LIBMNL,libmnl,1.0.4,.tar.bz2,https://www.netfilter.org/projects/libmnl/files/,171f89699f286a5854b72b91d06e8f8e3683064c5901fb09d954a9ab6f551f81))
57466 ++$(eval $(call tar_download,IPERF,iperf,3.1.7,.tar.gz,http://downloads.es.net/pub/iperf/,a4ef73406fe92250602b8da2ae89ec53211f805df97a1d1d629db5a14043734f))
57467 ++$(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d))
57468 ++$(eval $(call tar_download,IPROUTE2,iproute2,5.1.0,.tar.gz,https://www.kernel.org/pub/linux/utils/net/iproute2/,9b43707d6075ecdca14803ca8ce0c8553848c49fa1586d12fd508d66577243f2))
57469 ++$(eval $(call tar_download,IPTABLES,iptables,1.6.1,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,0fc2d7bd5d7be11311726466789d4c65fb4c8e096c9182b56ce97440864f0cf5))
57470 ++$(eval $(call tar_download,NMAP,nmap,7.60,.tar.bz2,https://nmap.org/dist/,a8796ecc4fa6c38aad6139d9515dc8113023a82e9d787e5a5fb5fa1b05516f21))
57471 ++$(eval $(call tar_download,IPUTILS,iputils,s20161105,.tar.gz,https://github.com/iputils/iputils/archive/s20161105.tar.gz/#,f813092f03d17294fd23544b129b95cdb87fe19f7970a51908a6b88509acad8a))
57472 ++$(eval $(call tar_download,WIREGUARD_TOOLS,WireGuard,0.0.20191212,.tar.xz,https://git.zx2c4.com/WireGuard/snapshot/,b0d718380f7a8822b2f12d75e462fa4eafa3a77871002981f367cd4fe2a1b071))
57473 ++
57474 ++KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug)
57475 ++rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))
57476 ++WIREGUARD_SOURCES := $(call rwildcard,$(KERNEL_PATH)/drivers/net/wireguard/,*)
57477 ++
57478 ++export CFLAGS ?= -O3 -pipe
57479 ++export LDFLAGS ?=
57480 ++export CPPFLAGS := -I$(BUILD_PATH)/include
57481 ++
57482 ++ifeq ($(CHOST),$(CBUILD))
57483 ++CROSS_COMPILE_FLAG := --host=$(CHOST)
57484 ++NOPIE_GCC := gcc -fno-PIE
57485 ++CFLAGS += -march=native
57486 ++STRIP := strip
57487 ++else
57488 ++$(info Cross compilation: building for $(CBUILD) using $(CHOST))
57489 ++CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST)
57490 ++export CROSS_COMPILE=$(CBUILD)-
57491 ++NOPIE_GCC := $(CBUILD)-gcc -fno-PIE
57492 ++STRIP := $(CBUILD)-strip
57493 ++endif
57494 ++ifeq ($(ARCH),aarch64)
57495 ++QEMU_ARCH := aarch64
57496 ++KERNEL_ARCH := arm64
57497 ++KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image
57498 ++ifeq ($(CHOST),$(CBUILD))
57499 ++QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
57500 ++else
57501 ++QEMU_MACHINE := -cpu cortex-a53 -machine virt
57502 ++CFLAGS += -march=armv8-a -mtune=cortex-a53
57503 ++endif
57504 ++else ifeq ($(ARCH),aarch64_be)
57505 ++QEMU_ARCH := aarch64
57506 ++KERNEL_ARCH := arm64
57507 ++KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image
57508 ++ifeq ($(CHOST),$(CBUILD))
57509 ++QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
57510 ++else
57511 ++QEMU_MACHINE := -cpu cortex-a53 -machine virt
57512 ++CFLAGS += -march=armv8-a -mtune=cortex-a53
57513 ++endif
57514 ++else ifeq ($(ARCH),arm)
57515 ++QEMU_ARCH := arm
57516 ++KERNEL_ARCH := arm
57517 ++KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage
57518 ++ifeq ($(CHOST),$(CBUILD))
57519 ++QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
57520 ++else
57521 ++QEMU_MACHINE := -cpu cortex-a15 -machine virt
57522 ++CFLAGS += -march=armv7-a -mtune=cortex-a15 -mabi=aapcs-linux
57523 ++endif
57524 ++else ifeq ($(ARCH),armeb)
57525 ++QEMU_ARCH := arm
57526 ++KERNEL_ARCH := arm
57527 ++KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage
57528 ++ifeq ($(CHOST),$(CBUILD))
57529 ++QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
57530 ++else
57531 ++QEMU_MACHINE := -cpu cortex-a15 -machine virt
57532 ++CFLAGS += -march=armv7-a -mabi=aapcs-linux # We don't pass -mtune=cortex-a15 due to a compiler bug on big endian.
57533 ++LDFLAGS += -Wl,--be8
57534 ++endif
57535 ++else ifeq ($(ARCH),x86_64)
57536 ++QEMU_ARCH := x86_64
57537 ++KERNEL_ARCH := x86_64
57538 ++KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
57539 ++ifeq ($(CHOST),$(CBUILD))
57540 ++QEMU_MACHINE := -cpu host -machine q35,accel=kvm
57541 ++else
57542 ++QEMU_MACHINE := -cpu Skylake-Server -machine q35
57543 ++CFLAGS += -march=skylake-avx512
57544 ++endif
57545 ++else ifeq ($(ARCH),i686)
57546 ++QEMU_ARCH := i386
57547 ++KERNEL_ARCH := x86
57548 ++KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
57549 ++ifeq ($(subst i686,x86_64,$(CBUILD)),$(CHOST))
57550 ++QEMU_MACHINE := -cpu host -machine q35,accel=kvm
57551 ++else
57552 ++QEMU_MACHINE := -cpu coreduo -machine q35
57553 ++CFLAGS += -march=prescott
57554 ++endif
57555 ++else ifeq ($(ARCH),mips64)
57556 ++QEMU_ARCH := mips64
57557 ++KERNEL_ARCH := mips
57558 ++KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
57559 ++ifeq ($(CHOST),$(CBUILD))
57560 ++QEMU_MACHINE := -cpu host -machine malta,accel=kvm
57561 ++CFLAGS += -EB
57562 ++else
57563 ++QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1
57564 ++CFLAGS += -march=mips64r2 -EB
57565 ++endif
57566 ++else ifeq ($(ARCH),mips64el)
57567 ++QEMU_ARCH := mips64el
57568 ++KERNEL_ARCH := mips
57569 ++KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
57570 ++ifeq ($(CHOST),$(CBUILD))
57571 ++QEMU_MACHINE := -cpu host -machine malta,accel=kvm
57572 ++CFLAGS += -EL
57573 ++else
57574 ++QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1
57575 ++CFLAGS += -march=mips64r2 -EL
57576 ++endif
57577 ++else ifeq ($(ARCH),mips)
57578 ++QEMU_ARCH := mips
57579 ++KERNEL_ARCH := mips
57580 ++KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
57581 ++ifeq ($(CHOST),$(CBUILD))
57582 ++QEMU_MACHINE := -cpu host -machine malta,accel=kvm
57583 ++CFLAGS += -EB
57584 ++else
57585 ++QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1
57586 ++CFLAGS += -march=mips32r2 -EB
57587 ++endif
57588 ++else ifeq ($(ARCH),mipsel)
57589 ++QEMU_ARCH := mipsel
57590 ++KERNEL_ARCH := mips
57591 ++KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
57592 ++ifeq ($(CHOST),$(CBUILD))
57593 ++QEMU_MACHINE := -cpu host -machine malta,accel=kvm
57594 ++CFLAGS += -EL
57595 ++else
57596 ++QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1
57597 ++CFLAGS += -march=mips32r2 -EL
57598 ++endif
57599 ++else ifeq ($(ARCH),powerpc64le)
57600 ++QEMU_ARCH := ppc64
57601 ++KERNEL_ARCH := powerpc
57602 ++KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
57603 ++ifeq ($(CHOST),$(CBUILD))
57604 ++QEMU_MACHINE := -cpu host,accel=kvm -machine pseries
57605 ++else
57606 ++QEMU_MACHINE := -machine pseries
57607 ++endif
57608 ++CFLAGS += -mcpu=powerpc64le -mlong-double-64
57609 ++else ifeq ($(ARCH),powerpc)
57610 ++QEMU_ARCH := ppc
57611 ++KERNEL_ARCH := powerpc
57612 ++KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/powerpc/boot/uImage
57613 ++ifeq ($(CHOST),$(CBUILD))
57614 ++QEMU_MACHINE := -cpu host,accel=kvm -machine ppce500
57615 ++else
57616 ++QEMU_MACHINE := -machine ppce500
57617 ++endif
57618 ++CFLAGS += -mcpu=powerpc -mlong-double-64 -msecure-plt
57619 ++else ifeq ($(ARCH),m68k)
57620 ++QEMU_ARCH := m68k
57621 ++KERNEL_ARCH := m68k
57622 ++KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
57623 ++ifeq ($(CHOST),$(CBUILD))
57624 ++QEMU_MACHINE := -cpu host,accel=kvm -machine q800
57625 ++else
57626 ++QEMU_MACHINE := -machine q800
57627 ++endif
57628 ++else
57629 ++$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64le, powerpc, m68k)
57630 ++endif
57631 ++
57632 ++REAL_CC := $(CBUILD)-gcc
57633 ++MUSL_CC := $(BUILD_PATH)/musl-gcc
57634 ++export CC := $(MUSL_CC)
57635 ++USERSPACE_DEPS := $(MUSL_CC) $(BUILD_PATH)/include/.installed $(BUILD_PATH)/include/linux/.installed
57636 ++
57637 ++build: $(KERNEL_BZIMAGE)
57638 ++qemu: $(KERNEL_BZIMAGE)
57639 ++ rm -f $(BUILD_PATH)/result
57640 ++ timeout --foreground 20m qemu-system-$(QEMU_ARCH) \
57641 ++ -nodefaults \
57642 ++ -nographic \
57643 ++ -smp $(NR_CPUS) \
57644 ++ $(QEMU_MACHINE) \
57645 ++ -m $$(grep -q CONFIG_DEBUG_KMEMLEAK=y $(KERNEL_BUILD_PATH)/.config && echo 1G || echo 256M) \
57646 ++ -serial stdio \
57647 ++ -serial file:$(BUILD_PATH)/result \
57648 ++ -no-reboot \
57649 ++ -monitor none \
57650 ++ -kernel $<
57651 ++ grep -Fq success $(BUILD_PATH)/result
57652 ++
57653 ++$(BUILD_PATH)/init-cpio-spec.txt:
57654 ++ mkdir -p $(BUILD_PATH)
57655 ++ echo "file /init $(BUILD_PATH)/init 755 0 0" > $@
57656 ++ echo "file /init.sh $(PWD)/../netns.sh 755 0 0" >> $@
57657 ++ echo "dir /dev 755 0 0" >> $@
57658 ++ echo "nod /dev/console 644 0 0 c 5 1" >> $@
57659 ++ echo "dir /bin 755 0 0" >> $@
57660 ++ echo "file /bin/iperf3 $(IPERF_PATH)/src/iperf3 755 0 0" >> $@
57661 ++ echo "file /bin/wg $(WIREGUARD_TOOLS_PATH)/src/tools/wg 755 0 0" >> $@
57662 ++ echo "file /bin/bash $(BASH_PATH)/bash 755 0 0" >> $@
57663 ++ echo "file /bin/ip $(IPROUTE2_PATH)/ip/ip 755 0 0" >> $@
57664 ++ echo "file /bin/ss $(IPROUTE2_PATH)/misc/ss 755 0 0" >> $@
57665 ++ echo "file /bin/ping $(IPUTILS_PATH)/ping 755 0 0" >> $@
57666 ++ echo "file /bin/ncat $(NMAP_PATH)/ncat/ncat 755 0 0" >> $@
57667 ++ echo "file /bin/xtables-multi $(IPTABLES_PATH)/iptables/xtables-multi 755 0 0" >> $@
57668 ++ echo "slink /bin/iptables xtables-multi 777 0 0" >> $@
57669 ++ echo "slink /bin/ping6 ping 777 0 0" >> $@
57670 ++ echo "dir /lib 755 0 0" >> $@
57671 ++ echo "file /lib/libc.so $(MUSL_PATH)/lib/libc.so 755 0 0" >> $@
57672 ++ echo "slink /lib/ld-linux.so.1 libc.so 777 0 0" >> $@
57673 ++
57674 ++$(KERNEL_BUILD_PATH)/.config: kernel.config arch/$(ARCH).config
57675 ++ mkdir -p $(KERNEL_BUILD_PATH)
57676 ++ cp kernel.config $(KERNEL_BUILD_PATH)/minimal.config
57677 ++ printf 'CONFIG_NR_CPUS=$(NR_CPUS)\nCONFIG_INITRAMFS_SOURCE="$(BUILD_PATH)/init-cpio-spec.txt"\n' >> $(KERNEL_BUILD_PATH)/minimal.config
57678 ++ cat arch/$(ARCH).config >> $(KERNEL_BUILD_PATH)/minimal.config
57679 ++ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) allnoconfig
57680 ++ cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config $(KERNEL_BUILD_PATH)/minimal.config
57681 ++ $(if $(findstring yes,$(DEBUG_KERNEL)),cp debug.config $(KERNEL_BUILD_PATH) && cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config debug.config,)
57682 ++
57683 ++$(KERNEL_BZIMAGE): $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/tools/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES)
57684 ++ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) CC="$(NOPIE_GCC)"
57685 ++
57686 ++$(BUILD_PATH)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config
57687 ++ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install
57688 ++ touch $@
57689 ++
57690 ++$(MUSL_PATH)/lib/libc.so: $(MUSL_TAR)
57691 ++ mkdir -p $(BUILD_PATH)
57692 ++ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
57693 ++ cd $(MUSL_PATH) && CC=$(REAL_CC) ./configure --prefix=/ --disable-static --build=$(CBUILD)
57694 ++ $(MAKE) -C $(MUSL_PATH)
57695 ++ $(STRIP) -s $@
57696 ++
57697 ++$(BUILD_PATH)/include/.installed: $(MUSL_PATH)/lib/libc.so
57698 ++ $(MAKE) -C $(MUSL_PATH) DESTDIR=$(BUILD_PATH) install-headers
57699 ++ touch $@
57700 ++
57701 ++$(MUSL_CC): $(MUSL_PATH)/lib/libc.so
57702 ++ sh $(MUSL_PATH)/tools/musl-gcc.specs.sh $(BUILD_PATH)/include $(MUSL_PATH)/lib /lib/ld-linux.so.1 > $(BUILD_PATH)/musl-gcc.specs
57703 ++ printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" -fno-stack-protector -no-pie "$$@"\n' > $(BUILD_PATH)/musl-gcc
57704 ++ chmod +x $(BUILD_PATH)/musl-gcc
57705 ++
57706 ++$(IPERF_PATH)/.installed: $(IPERF_TAR)
57707 ++ mkdir -p $(BUILD_PATH)
57708 ++ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
57709 ++ sed -i '1s/^/#include <stdint.h>/' $(IPERF_PATH)/src/cjson.h $(IPERF_PATH)/src/timer.h
57710 ++ sed -i -r 's/-p?g//g' $(IPERF_PATH)/src/Makefile*
57711 ++ touch $@
57712 ++
57713 ++$(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS)
57714 ++ cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared
57715 ++ $(MAKE) -C $(IPERF_PATH)
57716 ++ $(STRIP) -s $@
57717 ++
57718 ++$(LIBMNL_PATH)/.installed: $(LIBMNL_TAR)
57719 ++ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
57720 ++ touch $@
57721 ++
57722 ++$(LIBMNL_PATH)/src/.libs/libmnl.a: | $(LIBMNL_PATH)/.installed $(USERSPACE_DEPS)
57723 ++ cd $(LIBMNL_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared
57724 ++ $(MAKE) -C $(LIBMNL_PATH)
57725 ++ sed -i 's:prefix=.*:prefix=$(LIBMNL_PATH):' $(LIBMNL_PATH)/libmnl.pc
57726 ++
57727 ++$(WIREGUARD_TOOLS_PATH)/.installed: $(WIREGUARD_TOOLS_TAR)
57728 ++ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
57729 ++ touch $@
57730 ++
57731 ++$(WIREGUARD_TOOLS_PATH)/src/tools/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
57732 ++ LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src/tools LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg
57733 ++ $(STRIP) -s $@
57734 ++
57735 ++$(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS)
57736 ++ mkdir -p $(BUILD_PATH)
57737 ++ $(MUSL_CC) -o $@ $(CFLAGS) $(LDFLAGS) -std=gnu11 $<
57738 ++ $(STRIP) -s $@
57739 ++
57740 ++$(IPUTILS_PATH)/.installed: $(IPUTILS_TAR)
57741 ++ mkdir -p $(BUILD_PATH)
57742 ++ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
57743 ++ touch $@
57744 ++
57745 ++$(IPUTILS_PATH)/ping: | $(IPUTILS_PATH)/.installed $(USERSPACE_DEPS)
57746 ++ $(MAKE) -C $(IPUTILS_PATH) USE_CAP=no USE_IDN=no USE_NETTLE=no USE_CRYPTO=no ping
57747 ++ $(STRIP) -s $@
57748 ++
57749 ++$(BASH_PATH)/.installed: $(BASH_TAR)
57750 ++ mkdir -p $(BUILD_PATH)
57751 ++ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
57752 ++ touch $@
57753 ++
57754 ++$(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS)
57755 ++ cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-multibyte --disable-progcomp --disable-readline --disable-mem-scramble
57756 ++ $(MAKE) -C $(BASH_PATH)
57757 ++ $(STRIP) -s $@
57758 ++
57759 ++$(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR)
57760 ++ mkdir -p $(BUILD_PATH)
57761 ++ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
57762 ++ printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=y\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_LIBMNL -I$(LIBMNL_PATH)/include\nLDLIBS+=-lmnl' > $(IPROUTE2_PATH)/config.mk
57763 ++ printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile
57764 ++ touch $@
57765 ++
57766 ++$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
57767 ++ LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip
57768 ++ $(STRIP) -s $(IPROUTE2_PATH)/ip/ip
57769 ++
57770 ++$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
57771 ++ LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss
57772 ++ $(STRIP) -s $(IPROUTE2_PATH)/misc/ss
57773 ++
57774 ++$(IPTABLES_PATH)/.installed: $(IPTABLES_TAR)
57775 ++ mkdir -p $(BUILD_PATH)
57776 ++ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
57777 ++ sed -i -e "/nfnetlink=[01]/s:=[01]:=0:" -e "/nfconntrack=[01]/s:=[01]:=0:" $(IPTABLES_PATH)/configure
57778 ++ touch $@
57779 ++
57780 ++$(IPTABLES_PATH)/iptables/xtables-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
57781 ++ cd $(IPTABLES_PATH) && PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --with-kernel=$(BUILD_PATH)/include
57782 ++ $(MAKE) -C $(IPTABLES_PATH)
57783 ++ $(STRIP) -s $@
57784 ++
57785 ++$(NMAP_PATH)/.installed: $(NMAP_TAR)
57786 ++ mkdir -p $(BUILD_PATH)
57787 ++ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
57788 ++ touch $@
57789 ++
57790 ++$(NMAP_PATH)/ncat/ncat: | $(NMAP_PATH)/.installed $(USERSPACE_DEPS)
57791 ++ cd $(NMAP_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --without-ndiff --without-zenmap --without-nping --with-libpcap=included --with-libpcre=included --with-libdnet=included --without-liblua --with-liblinear=included --without-nmap-update --without-openssl --with-pcap=linux
57792 ++ $(MAKE) -C $(NMAP_PATH) build-ncat
57793 ++ $(STRIP) -s $@
57794 ++
57795 ++clean:
57796 ++ rm -rf $(BUILD_PATH)
57797 ++
57798 ++distclean: clean
57799 ++ rm -rf $(DISTFILES_PATH)
57800 ++
57801 ++menuconfig: $(KERNEL_BUILD_PATH)/.config
57802 ++ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) CC="$(NOPIE_GCC)" menuconfig
57803 ++
57804 ++.PHONY: qemu build clean distclean menuconfig
57805 ++.DELETE_ON_ERROR:
57806 +diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config
57807 +new file mode 100644
57808 +index 000000000000..3d063bb247bb
57809 +--- /dev/null
57810 ++++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config
57811 +@@ -0,0 +1,5 @@
57812 ++CONFIG_SERIAL_AMBA_PL011=y
57813 ++CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
57814 ++CONFIG_CMDLINE_BOOL=y
57815 ++CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
57816 ++CONFIG_FRAME_WARN=1280
57817 +diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config
57818 +new file mode 100644
57819 +index 000000000000..dbdc7e406a7b
57820 +--- /dev/null
57821 ++++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config
57822 +@@ -0,0 +1,6 @@
57823 ++CONFIG_CPU_BIG_ENDIAN=y
57824 ++CONFIG_SERIAL_AMBA_PL011=y
57825 ++CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
57826 ++CONFIG_CMDLINE_BOOL=y
57827 ++CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
57828 ++CONFIG_FRAME_WARN=1280
57829 +diff --git a/tools/testing/selftests/wireguard/qemu/arch/arm.config b/tools/testing/selftests/wireguard/qemu/arch/arm.config
57830 +new file mode 100644
57831 +index 000000000000..148f49905418
57832 +--- /dev/null
57833 ++++ b/tools/testing/selftests/wireguard/qemu/arch/arm.config
57834 +@@ -0,0 +1,9 @@
57835 ++CONFIG_MMU=y
57836 ++CONFIG_ARCH_MULTI_V7=y
57837 ++CONFIG_ARCH_VIRT=y
57838 ++CONFIG_THUMB2_KERNEL=n
57839 ++CONFIG_SERIAL_AMBA_PL011=y
57840 ++CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
57841 ++CONFIG_CMDLINE_BOOL=y
57842 ++CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
57843 ++CONFIG_FRAME_WARN=1024
57844 +diff --git a/tools/testing/selftests/wireguard/qemu/arch/armeb.config b/tools/testing/selftests/wireguard/qemu/arch/armeb.config
57845 +new file mode 100644
57846 +index 000000000000..bd76b07d00a2
57847 +--- /dev/null
57848 ++++ b/tools/testing/selftests/wireguard/qemu/arch/armeb.config
57849 +@@ -0,0 +1,10 @@
57850 ++CONFIG_MMU=y
57851 ++CONFIG_ARCH_MULTI_V7=y
57852 ++CONFIG_ARCH_VIRT=y
57853 ++CONFIG_THUMB2_KERNEL=n
57854 ++CONFIG_SERIAL_AMBA_PL011=y
57855 ++CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
57856 ++CONFIG_CMDLINE_BOOL=y
57857 ++CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
57858 ++CONFIG_CPU_BIG_ENDIAN=y
57859 ++CONFIG_FRAME_WARN=1024
57860 +diff --git a/tools/testing/selftests/wireguard/qemu/arch/i686.config b/tools/testing/selftests/wireguard/qemu/arch/i686.config
57861 +new file mode 100644
57862 +index 000000000000..a85025d7206e
57863 +--- /dev/null
57864 ++++ b/tools/testing/selftests/wireguard/qemu/arch/i686.config
57865 +@@ -0,0 +1,5 @@
57866 ++CONFIG_SERIAL_8250=y
57867 ++CONFIG_SERIAL_8250_CONSOLE=y
57868 ++CONFIG_CMDLINE_BOOL=y
57869 ++CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
57870 ++CONFIG_FRAME_WARN=1024
57871 +diff --git a/tools/testing/selftests/wireguard/qemu/arch/m68k.config b/tools/testing/selftests/wireguard/qemu/arch/m68k.config
57872 +new file mode 100644
57873 +index 000000000000..5381ea10896c
57874 +--- /dev/null
57875 ++++ b/tools/testing/selftests/wireguard/qemu/arch/m68k.config
57876 +@@ -0,0 +1,9 @@
57877 ++CONFIG_MMU=y
57878 ++CONFIG_M68040=y
57879 ++CONFIG_MAC=y
57880 ++CONFIG_SERIAL_PMACZILOG=y
57881 ++CONFIG_SERIAL_PMACZILOG_TTYS=y
57882 ++CONFIG_SERIAL_PMACZILOG_CONSOLE=y
57883 ++CONFIG_CMDLINE_BOOL=y
57884 ++CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
57885 ++CONFIG_FRAME_WARN=1024
57886 +diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips.config b/tools/testing/selftests/wireguard/qemu/arch/mips.config
57887 +new file mode 100644
57888 +index 000000000000..df71d6b95546
57889 +--- /dev/null
57890 ++++ b/tools/testing/selftests/wireguard/qemu/arch/mips.config
57891 +@@ -0,0 +1,11 @@
57892 ++CONFIG_CPU_MIPS32_R2=y
57893 ++CONFIG_MIPS_MALTA=y
57894 ++CONFIG_MIPS_CPS=y
57895 ++CONFIG_MIPS_FP_SUPPORT=y
57896 ++CONFIG_POWER_RESET=y
57897 ++CONFIG_POWER_RESET_SYSCON=y
57898 ++CONFIG_SERIAL_8250=y
57899 ++CONFIG_SERIAL_8250_CONSOLE=y
57900 ++CONFIG_CMDLINE_BOOL=y
57901 ++CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
57902 ++CONFIG_FRAME_WARN=1024
57903 +diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips64.config b/tools/testing/selftests/wireguard/qemu/arch/mips64.config
57904 +new file mode 100644
57905 +index 000000000000..90c783f725c4
57906 +--- /dev/null
57907 ++++ b/tools/testing/selftests/wireguard/qemu/arch/mips64.config
57908 +@@ -0,0 +1,14 @@
57909 ++CONFIG_64BIT=y
57910 ++CONFIG_CPU_MIPS64_R2=y
57911 ++CONFIG_MIPS32_N32=y
57912 ++CONFIG_CPU_HAS_MSA=y
57913 ++CONFIG_MIPS_MALTA=y
57914 ++CONFIG_MIPS_CPS=y
57915 ++CONFIG_MIPS_FP_SUPPORT=y
57916 ++CONFIG_POWER_RESET=y
57917 ++CONFIG_POWER_RESET_SYSCON=y
57918 ++CONFIG_SERIAL_8250=y
57919 ++CONFIG_SERIAL_8250_CONSOLE=y
57920 ++CONFIG_CMDLINE_BOOL=y
57921 ++CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
57922 ++CONFIG_FRAME_WARN=1280
57923 +diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips64el.config b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config
57924 +new file mode 100644
57925 +index 000000000000..435b0b43e00c
57926 +--- /dev/null
57927 ++++ b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config
57928 +@@ -0,0 +1,15 @@
57929 ++CONFIG_64BIT=y
57930 ++CONFIG_CPU_MIPS64_R2=y
57931 ++CONFIG_MIPS32_N32=y
57932 ++CONFIG_CPU_HAS_MSA=y
57933 ++CONFIG_MIPS_MALTA=y
57934 ++CONFIG_CPU_LITTLE_ENDIAN=y
57935 ++CONFIG_MIPS_CPS=y
57936 ++CONFIG_MIPS_FP_SUPPORT=y
57937 ++CONFIG_POWER_RESET=y
57938 ++CONFIG_POWER_RESET_SYSCON=y
57939 ++CONFIG_SERIAL_8250=y
57940 ++CONFIG_SERIAL_8250_CONSOLE=y
57941 ++CONFIG_CMDLINE_BOOL=y
57942 ++CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
57943 ++CONFIG_FRAME_WARN=1280
57944 +diff --git a/tools/testing/selftests/wireguard/qemu/arch/mipsel.config b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config
57945 +new file mode 100644
57946 +index 000000000000..62bb50c4a85f
57947 +--- /dev/null
57948 ++++ b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config
57949 +@@ -0,0 +1,12 @@
57950 ++CONFIG_CPU_MIPS32_R2=y
57951 ++CONFIG_MIPS_MALTA=y
57952 ++CONFIG_CPU_LITTLE_ENDIAN=y
57953 ++CONFIG_MIPS_CPS=y
57954 ++CONFIG_MIPS_FP_SUPPORT=y
57955 ++CONFIG_POWER_RESET=y
57956 ++CONFIG_POWER_RESET_SYSCON=y
57957 ++CONFIG_SERIAL_8250=y
57958 ++CONFIG_SERIAL_8250_CONSOLE=y
57959 ++CONFIG_CMDLINE_BOOL=y
57960 ++CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
57961 ++CONFIG_FRAME_WARN=1024
57962 +diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config
57963 +new file mode 100644
57964 +index 000000000000..57957093b71b
57965 +--- /dev/null
57966 ++++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config
57967 +@@ -0,0 +1,10 @@
57968 ++CONFIG_PPC_QEMU_E500=y
57969 ++CONFIG_FSL_SOC_BOOKE=y
57970 ++CONFIG_PPC_85xx=y
57971 ++CONFIG_PHYS_64BIT=y
57972 ++CONFIG_SERIAL_8250=y
57973 ++CONFIG_SERIAL_8250_CONSOLE=y
57974 ++CONFIG_MATH_EMULATION=y
57975 ++CONFIG_CMDLINE_BOOL=y
57976 ++CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
57977 ++CONFIG_FRAME_WARN=1024
57978 +diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
57979 +new file mode 100644
57980 +index 000000000000..990c510a9cfa
57981 +--- /dev/null
57982 ++++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
57983 +@@ -0,0 +1,12 @@
57984 ++CONFIG_PPC64=y
57985 ++CONFIG_PPC_PSERIES=y
57986 ++CONFIG_ALTIVEC=y
57987 ++CONFIG_VSX=y
57988 ++CONFIG_PPC_OF_BOOT_TRAMPOLINE=y
57989 ++CONFIG_PPC_RADIX_MMU=y
57990 ++CONFIG_HVC_CONSOLE=y
57991 ++CONFIG_CPU_LITTLE_ENDIAN=y
57992 ++CONFIG_CMDLINE_BOOL=y
57993 ++CONFIG_CMDLINE="console=hvc0 wg.success=hvc1"
57994 ++CONFIG_SECTION_MISMATCH_WARN_ONLY=y
57995 ++CONFIG_FRAME_WARN=1280
57996 +diff --git a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config
57997 +new file mode 100644
57998 +index 000000000000..00a1ef4869d5
57999 +--- /dev/null
58000 ++++ b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config
58001 +@@ -0,0 +1,5 @@
58002 ++CONFIG_SERIAL_8250=y
58003 ++CONFIG_SERIAL_8250_CONSOLE=y
58004 ++CONFIG_CMDLINE_BOOL=y
58005 ++CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
58006 ++CONFIG_FRAME_WARN=1280
58007 +diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config
58008 +new file mode 100644
58009 +index 000000000000..b9c72706fe4d
58010 +--- /dev/null
58011 ++++ b/tools/testing/selftests/wireguard/qemu/debug.config
58012 +@@ -0,0 +1,67 @@
58013 ++CONFIG_LOCALVERSION="-debug"
58014 ++CONFIG_ENABLE_WARN_DEPRECATED=y
58015 ++CONFIG_ENABLE_MUST_CHECK=y
58016 ++CONFIG_FRAME_POINTER=y
58017 ++CONFIG_STACK_VALIDATION=y
58018 ++CONFIG_DEBUG_KERNEL=y
58019 ++CONFIG_DEBUG_INFO=y
58020 ++CONFIG_DEBUG_INFO_DWARF4=y
58021 ++CONFIG_PAGE_EXTENSION=y
58022 ++CONFIG_PAGE_POISONING=y
58023 ++CONFIG_DEBUG_OBJECTS=y
58024 ++CONFIG_DEBUG_OBJECTS_FREE=y
58025 ++CONFIG_DEBUG_OBJECTS_TIMERS=y
58026 ++CONFIG_DEBUG_OBJECTS_WORK=y
58027 ++CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
58028 ++CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y
58029 ++CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT=1
58030 ++CONFIG_SLUB_DEBUG_ON=y
58031 ++CONFIG_DEBUG_VM=y
58032 ++CONFIG_DEBUG_MEMORY_INIT=y
58033 ++CONFIG_HAVE_DEBUG_STACKOVERFLOW=y
58034 ++CONFIG_DEBUG_STACKOVERFLOW=y
58035 ++CONFIG_HAVE_ARCH_KMEMCHECK=y
58036 ++CONFIG_HAVE_ARCH_KASAN=y
58037 ++CONFIG_KASAN=y
58038 ++CONFIG_KASAN_INLINE=y
58039 ++CONFIG_UBSAN=y
58040 ++CONFIG_UBSAN_SANITIZE_ALL=y
58041 ++CONFIG_UBSAN_NO_ALIGNMENT=y
58042 ++CONFIG_UBSAN_NULL=y
58043 ++CONFIG_DEBUG_KMEMLEAK=y
58044 ++CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=8192
58045 ++CONFIG_DEBUG_STACK_USAGE=y
58046 ++CONFIG_DEBUG_SHIRQ=y
58047 ++CONFIG_WQ_WATCHDOG=y
58048 ++CONFIG_SCHED_DEBUG=y
58049 ++CONFIG_SCHED_INFO=y
58050 ++CONFIG_SCHEDSTATS=y
58051 ++CONFIG_SCHED_STACK_END_CHECK=y
58052 ++CONFIG_DEBUG_TIMEKEEPING=y
58053 ++CONFIG_TIMER_STATS=y
58054 ++CONFIG_DEBUG_PREEMPT=y
58055 ++CONFIG_DEBUG_RT_MUTEXES=y
58056 ++CONFIG_DEBUG_SPINLOCK=y
58057 ++CONFIG_DEBUG_MUTEXES=y
58058 ++CONFIG_DEBUG_LOCK_ALLOC=y
58059 ++CONFIG_PROVE_LOCKING=y
58060 ++CONFIG_LOCKDEP=y
58061 ++CONFIG_DEBUG_ATOMIC_SLEEP=y
58062 ++CONFIG_TRACE_IRQFLAGS=y
58063 ++CONFIG_DEBUG_BUGVERBOSE=y
58064 ++CONFIG_DEBUG_LIST=y
58065 ++CONFIG_DEBUG_PI_LIST=y
58066 ++CONFIG_PROVE_RCU=y
58067 ++CONFIG_SPARSE_RCU_POINTER=y
58068 ++CONFIG_RCU_CPU_STALL_TIMEOUT=21
58069 ++CONFIG_RCU_TRACE=y
58070 ++CONFIG_RCU_EQS_DEBUG=y
58071 ++CONFIG_USER_STACKTRACE_SUPPORT=y
58072 ++CONFIG_DEBUG_SG=y
58073 ++CONFIG_DEBUG_NOTIFIERS=y
58074 ++CONFIG_DOUBLEFAULT=y
58075 ++CONFIG_X86_DEBUG_FPU=y
58076 ++CONFIG_DEBUG_SECTION_MISMATCH=y
58077 ++CONFIG_DEBUG_PAGEALLOC=y
58078 ++CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT=y
58079 ++CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y
58080 +diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c
58081 +new file mode 100644
58082 +index 000000000000..51e5ddedee88
58083 +--- /dev/null
58084 ++++ b/tools/testing/selftests/wireguard/qemu/init.c
58085 +@@ -0,0 +1,284 @@
58086 ++// SPDX-License-Identifier: GPL-2.0
58087 ++/*
58088 ++ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@×××××.com>. All Rights Reserved.
58089 ++ */
58090 ++
58091 ++#define _GNU_SOURCE
58092 ++#include <unistd.h>
58093 ++#include <errno.h>
58094 ++#include <string.h>
58095 ++#include <stdio.h>
58096 ++#include <stdlib.h>
58097 ++#include <stdbool.h>
58098 ++#include <fcntl.h>
58099 ++#include <sys/wait.h>
58100 ++#include <sys/mount.h>
58101 ++#include <sys/types.h>
58102 ++#include <sys/stat.h>
58103 ++#include <sys/types.h>
58104 ++#include <sys/io.h>
58105 ++#include <sys/ioctl.h>
58106 ++#include <sys/reboot.h>
58107 ++#include <sys/utsname.h>
58108 ++#include <sys/sendfile.h>
58109 ++#include <linux/random.h>
58110 ++#include <linux/version.h>
58111 ++
58112 ++__attribute__((noreturn)) static void poweroff(void)
58113 ++{
58114 ++ fflush(stdout);
58115 ++ fflush(stderr);
58116 ++ reboot(RB_AUTOBOOT);
58117 ++ sleep(30);
58118 ++ fprintf(stderr, "\x1b[37m\x1b[41m\x1b[1mFailed to power off!!!\x1b[0m\n");
58119 ++ exit(1);
58120 ++}
58121 ++
58122 ++static void panic(const char *what)
58123 ++{
58124 ++ fprintf(stderr, "\n\n\x1b[37m\x1b[41m\x1b[1mSOMETHING WENT HORRIBLY WRONG\x1b[0m\n\n \x1b[31m\x1b[1m%s: %s\x1b[0m\n\n\x1b[37m\x1b[44m\x1b[1mPower off...\x1b[0m\n\n", what, strerror(errno));
58125 ++ poweroff();
58126 ++}
58127 ++
58128 ++#define pretty_message(msg) puts("\x1b[32m\x1b[1m" msg "\x1b[0m")
58129 ++
58130 ++static void print_banner(void)
58131 ++{
58132 ++ struct utsname utsname;
58133 ++ int len;
58134 ++
58135 ++ if (uname(&utsname) < 0)
58136 ++ panic("uname");
58137 ++
58138 ++ len = strlen(" WireGuard Test Suite on ") + strlen(utsname.sysname) + strlen(utsname.release) + strlen(utsname.machine);
58139 ++ printf("\x1b[45m\x1b[33m\x1b[1m%*.s\x1b[0m\n\x1b[45m\x1b[33m\x1b[1m WireGuard Test Suite on %s %s %s \x1b[0m\n\x1b[45m\x1b[33m\x1b[1m%*.s\x1b[0m\n\n", len, "", utsname.sysname, utsname.release, utsname.machine, len, "");
58140 ++}
58141 ++
58142 ++static void seed_rng(void)
58143 ++{
58144 ++ int fd;
58145 ++ struct {
58146 ++ int entropy_count;
58147 ++ int buffer_size;
58148 ++ unsigned char buffer[256];
58149 ++ } entropy = {
58150 ++ .entropy_count = sizeof(entropy.buffer) * 8,
58151 ++ .buffer_size = sizeof(entropy.buffer),
58152 ++ .buffer = "Adding real entropy is not actually important for these tests. Don't try this at home, kids!"
58153 ++ };
58154 ++
58155 ++ if (mknod("/dev/urandom", S_IFCHR | 0644, makedev(1, 9)))
58156 ++ panic("mknod(/dev/urandom)");
58157 ++ fd = open("/dev/urandom", O_WRONLY);
58158 ++ if (fd < 0)
58159 ++ panic("open(urandom)");
58160 ++ for (int i = 0; i < 256; ++i) {
58161 ++ if (ioctl(fd, RNDADDENTROPY, &entropy) < 0)
58162 ++ panic("ioctl(urandom)");
58163 ++ }
58164 ++ close(fd);
58165 ++}
58166 ++
58167 ++static void mount_filesystems(void)
58168 ++{
58169 ++ pretty_message("[+] Mounting filesystems...");
58170 ++ mkdir("/dev", 0755);
58171 ++ mkdir("/proc", 0755);
58172 ++ mkdir("/sys", 0755);
58173 ++ mkdir("/tmp", 0755);
58174 ++ mkdir("/run", 0755);
58175 ++ mkdir("/var", 0755);
58176 ++ if (mount("none", "/dev", "devtmpfs", 0, NULL))
58177 ++ panic("devtmpfs mount");
58178 ++ if (mount("none", "/proc", "proc", 0, NULL))
58179 ++ panic("procfs mount");
58180 ++ if (mount("none", "/sys", "sysfs", 0, NULL))
58181 ++ panic("sysfs mount");
58182 ++ if (mount("none", "/tmp", "tmpfs", 0, NULL))
58183 ++ panic("tmpfs mount");
58184 ++ if (mount("none", "/run", "tmpfs", 0, NULL))
58185 ++ panic("tmpfs mount");
58186 ++ if (mount("none", "/sys/kernel/debug", "debugfs", 0, NULL))
58187 ++ ; /* Not a problem if it fails.*/
58188 ++ if (symlink("/run", "/var/run"))
58189 ++ panic("run symlink");
58190 ++ if (symlink("/proc/self/fd", "/dev/fd"))
58191 ++ panic("fd symlink");
58192 ++}
58193 ++
58194 ++static void enable_logging(void)
58195 ++{
58196 ++ int fd;
58197 ++ pretty_message("[+] Enabling logging...");
58198 ++ fd = open("/proc/sys/kernel/printk", O_WRONLY);
58199 ++ if (fd >= 0) {
58200 ++ if (write(fd, "9\n", 2) != 2)
58201 ++ panic("write(printk)");
58202 ++ close(fd);
58203 ++ }
58204 ++ fd = open("/proc/sys/debug/exception-trace", O_WRONLY);
58205 ++ if (fd >= 0) {
58206 ++ if (write(fd, "1\n", 2) != 2)
58207 ++ panic("write(exception-trace)");
58208 ++ close(fd);
58209 ++ }
58210 ++ fd = open("/proc/sys/kernel/panic_on_warn", O_WRONLY);
58211 ++ if (fd >= 0) {
58212 ++ if (write(fd, "1\n", 2) != 2)
58213 ++ panic("write(panic_on_warn)");
58214 ++ close(fd);
58215 ++ }
58216 ++}
58217 ++
58218 ++static void kmod_selftests(void)
58219 ++{
58220 ++ FILE *file;
58221 ++ char line[2048], *start, *pass;
58222 ++ bool success = true;
58223 ++ pretty_message("[+] Module self-tests:");
58224 ++ file = fopen("/proc/kmsg", "r");
58225 ++ if (!file)
58226 ++ panic("fopen(kmsg)");
58227 ++ if (fcntl(fileno(file), F_SETFL, O_NONBLOCK) < 0)
58228 ++ panic("fcntl(kmsg, nonblock)");
58229 ++ while (fgets(line, sizeof(line), file)) {
58230 ++ start = strstr(line, "wireguard: ");
58231 ++ if (!start)
58232 ++ continue;
58233 ++ start += 11;
58234 ++ *strchrnul(start, '\n') = '\0';
58235 ++ if (strstr(start, "www.wireguard.com"))
58236 ++ break;
58237 ++ pass = strstr(start, ": pass");
58238 ++ if (!pass || pass[6] != '\0') {
58239 ++ success = false;
58240 ++ printf(" \x1b[31m* %s\x1b[0m\n", start);
58241 ++ } else
58242 ++ printf(" \x1b[32m* %s\x1b[0m\n", start);
58243 ++ }
58244 ++ fclose(file);
58245 ++ if (!success) {
58246 ++ puts("\x1b[31m\x1b[1m[-] Tests failed! \u2639\x1b[0m");
58247 ++ poweroff();
58248 ++ }
58249 ++}
58250 ++
58251 ++static void launch_tests(void)
58252 ++{
58253 ++ char cmdline[4096], *success_dev;
58254 ++ int status, fd;
58255 ++ pid_t pid;
58256 ++
58257 ++ pretty_message("[+] Launching tests...");
58258 ++ pid = fork();
58259 ++ if (pid == -1)
58260 ++ panic("fork");
58261 ++ else if (pid == 0) {
58262 ++ execl("/init.sh", "init", NULL);
58263 ++ panic("exec");
58264 ++ }
58265 ++ if (waitpid(pid, &status, 0) < 0)
58266 ++ panic("waitpid");
58267 ++ if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
58268 ++ pretty_message("[+] Tests successful! :-)");
58269 ++ fd = open("/proc/cmdline", O_RDONLY);
58270 ++ if (fd < 0)
58271 ++ panic("open(/proc/cmdline)");
58272 ++ if (read(fd, cmdline, sizeof(cmdline) - 1) <= 0)
58273 ++ panic("read(/proc/cmdline)");
58274 ++ cmdline[sizeof(cmdline) - 1] = '\0';
58275 ++ for (success_dev = strtok(cmdline, " \n"); success_dev; success_dev = strtok(NULL, " \n")) {
58276 ++ if (strncmp(success_dev, "wg.success=", 11))
58277 ++ continue;
58278 ++ memcpy(success_dev + 11 - 5, "/dev/", 5);
58279 ++ success_dev += 11 - 5;
58280 ++ break;
58281 ++ }
58282 ++ if (!success_dev || !strlen(success_dev))
58283 ++ panic("Unable to find success device");
58284 ++
58285 ++ fd = open(success_dev, O_WRONLY);
58286 ++ if (fd < 0)
58287 ++ panic("open(success_dev)");
58288 ++ if (write(fd, "success\n", 8) != 8)
58289 ++ panic("write(success_dev)");
58290 ++ close(fd);
58291 ++ } else {
58292 ++ const char *why = "unknown cause";
58293 ++ int what = -1;
58294 ++
58295 ++ if (WIFEXITED(status)) {
58296 ++ why = "exit code";
58297 ++ what = WEXITSTATUS(status);
58298 ++ } else if (WIFSIGNALED(status)) {
58299 ++ why = "signal";
58300 ++ what = WTERMSIG(status);
58301 ++ }
58302 ++ printf("\x1b[31m\x1b[1m[-] Tests failed with %s %d! \u2639\x1b[0m\n", why, what);
58303 ++ }
58304 ++}
58305 ++
58306 ++static void ensure_console(void)
58307 ++{
58308 ++ for (unsigned int i = 0; i < 1000; ++i) {
58309 ++ int fd = open("/dev/console", O_RDWR);
58310 ++ if (fd < 0) {
58311 ++ usleep(50000);
58312 ++ continue;
58313 ++ }
58314 ++ dup2(fd, 0);
58315 ++ dup2(fd, 1);
58316 ++ dup2(fd, 2);
58317 ++ close(fd);
58318 ++ if (write(1, "\0\0\0\0\n", 5) == 5)
58319 ++ return;
58320 ++ }
58321 ++ panic("Unable to open console device");
58322 ++}
58323 ++
58324 ++static void clear_leaks(void)
58325 ++{
58326 ++ int fd;
58327 ++
58328 ++ fd = open("/sys/kernel/debug/kmemleak", O_WRONLY);
58329 ++ if (fd < 0)
58330 ++ return;
58331 ++ pretty_message("[+] Starting memory leak detection...");
58332 ++ write(fd, "clear\n", 5);
58333 ++ close(fd);
58334 ++}
58335 ++
58336 ++static void check_leaks(void)
58337 ++{
58338 ++ int fd;
58339 ++
58340 ++ fd = open("/sys/kernel/debug/kmemleak", O_WRONLY);
58341 ++ if (fd < 0)
58342 ++ return;
58343 ++ pretty_message("[+] Scanning for memory leaks...");
58344 ++ sleep(2); /* Wait for any grace periods. */
58345 ++ write(fd, "scan\n", 5);
58346 ++ close(fd);
58347 ++
58348 ++ fd = open("/sys/kernel/debug/kmemleak", O_RDONLY);
58349 ++ if (fd < 0)
58350 ++ return;
58351 ++ if (sendfile(1, fd, NULL, 0x7ffff000) > 0)
58352 ++ panic("Memory leaks encountered");
58353 ++ close(fd);
58354 ++}
58355 ++
58356 ++int main(int argc, char *argv[])
58357 ++{
58358 ++ seed_rng();
58359 ++ ensure_console();
58360 ++ print_banner();
58361 ++ mount_filesystems();
58362 ++ kmod_selftests();
58363 ++ enable_logging();
58364 ++ clear_leaks();
58365 ++ launch_tests();
58366 ++ check_leaks();
58367 ++ poweroff();
58368 ++ return 1;
58369 ++}
58370 +diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
58371 +new file mode 100644
58372 +index 000000000000..9cca30206014
58373 +--- /dev/null
58374 ++++ b/tools/testing/selftests/wireguard/qemu/kernel.config
58375 +@@ -0,0 +1,86 @@
58376 ++CONFIG_LOCALVERSION=""
58377 ++CONFIG_NET=y
58378 ++CONFIG_NETDEVICES=y
58379 ++CONFIG_NET_CORE=y
58380 ++CONFIG_NET_IPIP=y
58381 ++CONFIG_DUMMY=y
58382 ++CONFIG_VETH=y
58383 ++CONFIG_MULTIUSER=y
58384 ++CONFIG_NAMESPACES=y
58385 ++CONFIG_NET_NS=y
58386 ++CONFIG_UNIX=y
58387 ++CONFIG_INET=y
58388 ++CONFIG_IPV6=y
58389 ++CONFIG_NETFILTER=y
58390 ++CONFIG_NETFILTER_ADVANCED=y
58391 ++CONFIG_NF_CONNTRACK=y
58392 ++CONFIG_NF_NAT=y
58393 ++CONFIG_NETFILTER_XTABLES=y
58394 ++CONFIG_NETFILTER_XT_NAT=y
58395 ++CONFIG_NETFILTER_XT_MATCH_LENGTH=y
58396 ++CONFIG_NF_CONNTRACK_IPV4=y
58397 ++CONFIG_NF_NAT_IPV4=y
58398 ++CONFIG_IP_NF_IPTABLES=y
58399 ++CONFIG_IP_NF_FILTER=y
58400 ++CONFIG_IP_NF_NAT=y
58401 ++CONFIG_IP_ADVANCED_ROUTER=y
58402 ++CONFIG_IP_MULTIPLE_TABLES=y
58403 ++CONFIG_IPV6_MULTIPLE_TABLES=y
58404 ++CONFIG_TTY=y
58405 ++CONFIG_BINFMT_ELF=y
58406 ++CONFIG_BINFMT_SCRIPT=y
58407 ++CONFIG_VDSO=y
58408 ++CONFIG_VIRTUALIZATION=y
58409 ++CONFIG_HYPERVISOR_GUEST=y
58410 ++CONFIG_PARAVIRT=y
58411 ++CONFIG_KVM_GUEST=y
58412 ++CONFIG_PARAVIRT_SPINLOCKS=y
58413 ++CONFIG_PRINTK=y
58414 ++CONFIG_KALLSYMS=y
58415 ++CONFIG_BUG=y
58416 ++CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y
58417 ++CONFIG_EMBEDDED=n
58418 ++CONFIG_BASE_FULL=y
58419 ++CONFIG_FUTEX=y
58420 ++CONFIG_SHMEM=y
58421 ++CONFIG_SLUB=y
58422 ++CONFIG_SPARSEMEM_VMEMMAP=y
58423 ++CONFIG_SMP=y
58424 ++CONFIG_SCHED_SMT=y
58425 ++CONFIG_SCHED_MC=y
58426 ++CONFIG_NUMA=y
58427 ++CONFIG_PREEMPT=y
58428 ++CONFIG_NO_HZ=y
58429 ++CONFIG_NO_HZ_IDLE=y
58430 ++CONFIG_NO_HZ_FULL=n
58431 ++CONFIG_HZ_PERIODIC=n
58432 ++CONFIG_HIGH_RES_TIMERS=y
58433 ++CONFIG_ARCH_RANDOM=y
58434 ++CONFIG_FILE_LOCKING=y
58435 ++CONFIG_POSIX_TIMERS=y
58436 ++CONFIG_DEVTMPFS=y
58437 ++CONFIG_PROC_FS=y
58438 ++CONFIG_PROC_SYSCTL=y
58439 ++CONFIG_SYSFS=y
58440 ++CONFIG_TMPFS=y
58441 ++CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15
58442 ++CONFIG_PRINTK_TIME=y
58443 ++CONFIG_BLK_DEV_INITRD=y
58444 ++CONFIG_LEGACY_VSYSCALL_NONE=y
58445 ++CONFIG_KERNEL_GZIP=y
58446 ++CONFIG_PANIC_ON_OOPS=y
58447 ++CONFIG_BUG_ON_DATA_CORRUPTION=y
58448 ++CONFIG_LOCKUP_DETECTOR=y
58449 ++CONFIG_SOFTLOCKUP_DETECTOR=y
58450 ++CONFIG_HARDLOCKUP_DETECTOR=y
58451 ++CONFIG_WQ_WATCHDOG=y
58452 ++CONFIG_DETECT_HUNG_TASK=y
58453 ++CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
58454 ++CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
58455 ++CONFIG_BOOTPARAM_HUNG_TASK_PANIC=y
58456 ++CONFIG_PANIC_TIMEOUT=-1
58457 ++CONFIG_STACKTRACE=y
58458 ++CONFIG_EARLY_PRINTK=y
58459 ++CONFIG_GDB_SCRIPTS=y
58460 ++CONFIG_WIREGUARD=y
58461 ++CONFIG_WIREGUARD_DEBUG=y
58462 +--
58463 +cgit v1.2.3-4-ga26e
58464 +
58465 +
58466 +From 6a9281aa85c605bd672f8c5a5a16dec1cda122cc Mon Sep 17 00:00:00 2001
58467 +From: "Jason A. Donenfeld" <Jason@×××××.com>
58468 +Date: Sun, 15 Dec 2019 22:08:01 +0100
58469 +Subject: wireguard: Kconfig: select parent dependency for crypto
58470 +
58471 +commit d7c68a38bb4f9b7c1a2e4a772872c752ee5c44a6 upstream.
58472 +
58473 +This fixes the crypto selection submenu depenencies. Otherwise, we'd
58474 +wind up issuing warnings in which certain dependencies we also select
58475 +couldn't be satisfied. This condition was triggered by the addition of
58476 +the test suite autobuilder in the previous commit.
58477 +
58478 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
58479 +Signed-off-by: David S. Miller <davem@×××××××××.net>
58480 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
58481 +---
58482 + drivers/net/Kconfig | 2 ++
58483 + 1 file changed, 2 insertions(+)
58484 +
58485 +diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
58486 +index 16ad145e22c9..57f1ba924f4e 100644
58487 +--- a/drivers/net/Kconfig
58488 ++++ b/drivers/net/Kconfig
58489 +@@ -85,6 +85,8 @@ config WIREGUARD
58490 + select CRYPTO_POLY1305_X86_64 if X86 && 64BIT
58491 + select CRYPTO_BLAKE2S_X86 if X86 && 64BIT
58492 + select CRYPTO_CURVE25519_X86 if X86 && 64BIT
58493 ++ select ARM_CRYPTO if ARM
58494 ++ select ARM64_CRYPTO if ARM64
58495 + select CRYPTO_CHACHA20_NEON if (ARM || ARM64) && KERNEL_MODE_NEON
58496 + select CRYPTO_POLY1305_NEON if ARM64 && KERNEL_MODE_NEON
58497 + select CRYPTO_POLY1305_ARM if ARM
58498 +--
58499 +cgit v1.2.3-4-ga26e
58500 +
58501 +
58502 +From 85c2b51d6a1e5139024e0ccf45d08839af6d77d0 Mon Sep 17 00:00:00 2001
58503 +From: Josh Soref <jsoref@×××××.com>
58504 +Date: Sun, 15 Dec 2019 22:08:02 +0100
58505 +Subject: wireguard: global: fix spelling mistakes in comments
58506 +
58507 +commit a2ec8b5706944d228181c8b91d815f41d6dd8e7b upstream.
58508 +
58509 +This fixes two spelling errors in source code comments.
58510 +
58511 +Signed-off-by: Josh Soref <jsoref@×××××.com>
58512 +[Jason: rewrote commit message]
58513 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
58514 +Signed-off-by: David S. Miller <davem@×××××××××.net>
58515 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
58516 +---
58517 + drivers/net/wireguard/receive.c | 2 +-
58518 + include/uapi/linux/wireguard.h | 8 ++++----
58519 + 2 files changed, 5 insertions(+), 5 deletions(-)
58520 +
58521 +diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
58522 +index 7e675f541491..9c6bab9c981f 100644
58523 +--- a/drivers/net/wireguard/receive.c
58524 ++++ b/drivers/net/wireguard/receive.c
58525 +@@ -380,7 +380,7 @@ static void wg_packet_consume_data_done(struct wg_peer *peer,
58526 + /* We've already verified the Poly1305 auth tag, which means this packet
58527 + * was not modified in transit. We can therefore tell the networking
58528 + * stack that all checksums of every layer of encapsulation have already
58529 +- * been checked "by the hardware" and therefore is unneccessary to check
58530 ++ * been checked "by the hardware" and therefore is unnecessary to check
58531 + * again in software.
58532 + */
58533 + skb->ip_summed = CHECKSUM_UNNECESSARY;
58534 +diff --git a/include/uapi/linux/wireguard.h b/include/uapi/linux/wireguard.h
58535 +index dd8a47c4ad11..ae88be14c947 100644
58536 +--- a/include/uapi/linux/wireguard.h
58537 ++++ b/include/uapi/linux/wireguard.h
58538 +@@ -18,13 +18,13 @@
58539 + * one but not both of:
58540 + *
58541 + * WGDEVICE_A_IFINDEX: NLA_U32
58542 +- * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1
58543 ++ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1
58544 + *
58545 + * The kernel will then return several messages (NLM_F_MULTI) containing the
58546 + * following tree of nested items:
58547 + *
58548 + * WGDEVICE_A_IFINDEX: NLA_U32
58549 +- * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1
58550 ++ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1
58551 + * WGDEVICE_A_PRIVATE_KEY: NLA_EXACT_LEN, len WG_KEY_LEN
58552 + * WGDEVICE_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN
58553 + * WGDEVICE_A_LISTEN_PORT: NLA_U16
58554 +@@ -77,7 +77,7 @@
58555 + * WGDEVICE_A_IFINDEX and WGDEVICE_A_IFNAME:
58556 + *
58557 + * WGDEVICE_A_IFINDEX: NLA_U32
58558 +- * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMESIZ - 1
58559 ++ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1
58560 + * WGDEVICE_A_FLAGS: NLA_U32, 0 or WGDEVICE_F_REPLACE_PEERS if all current
58561 + * peers should be removed prior to adding the list below.
58562 + * WGDEVICE_A_PRIVATE_KEY: len WG_KEY_LEN, all zeros to remove
58563 +@@ -121,7 +121,7 @@
58564 + * filling in information not contained in the prior. Note that if
58565 + * WGDEVICE_F_REPLACE_PEERS is specified in the first message, it probably
58566 + * should not be specified in fragments that come after, so that the list
58567 +- * of peers is only cleared the first time but appened after. Likewise for
58568 ++ * of peers is only cleared the first time but appended after. Likewise for
58569 + * peers, if WGPEER_F_REPLACE_ALLOWEDIPS is specified in the first message
58570 + * of a peer, it likely should not be specified in subsequent fragments.
58571 + *
58572 +--
58573 +cgit v1.2.3-4-ga26e
58574 +
58575 +
58576 +From 06da2a280c63ec864747978362a9aebfb5863220 Mon Sep 17 00:00:00 2001
58577 +From: YueHaibing <yuehaibing@××××××.com>
58578 +Date: Sun, 15 Dec 2019 22:08:03 +0100
58579 +Subject: wireguard: main: remove unused include <linux/version.h>
58580 +
58581 +commit 43967b6ff91e53bcce5ae08c16a0588a475b53a1 upstream.
58582 +
58583 +Remove <linux/version.h> from the includes for main.c, which is unused.
58584 +
58585 +Signed-off-by: YueHaibing <yuehaibing@××××××.com>
58586 +[Jason: reworded commit message]
58587 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
58588 +Signed-off-by: David S. Miller <davem@×××××××××.net>
58589 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
58590 +---
58591 + drivers/net/wireguard/main.c | 1 -
58592 + 1 file changed, 1 deletion(-)
58593 +
58594 +diff --git a/drivers/net/wireguard/main.c b/drivers/net/wireguard/main.c
58595 +index 10c0a40f6a9e..7a7d5f1a80fc 100644
58596 +--- a/drivers/net/wireguard/main.c
58597 ++++ b/drivers/net/wireguard/main.c
58598 +@@ -12,7 +12,6 @@
58599 +
58600 + #include <uapi/linux/wireguard.h>
58601 +
58602 +-#include <linux/version.h>
58603 + #include <linux/init.h>
58604 + #include <linux/module.h>
58605 + #include <linux/genetlink.h>
58606 +--
58607 +cgit v1.2.3-4-ga26e
58608 +
58609 +
58610 +From 9182739301f9baaf8cd9fef959996bc7ee09c1d6 Mon Sep 17 00:00:00 2001
58611 +From: Wei Yongjun <weiyongjun1@××××××.com>
58612 +Date: Sun, 15 Dec 2019 22:08:04 +0100
58613 +Subject: wireguard: allowedips: use kfree_rcu() instead of call_rcu()
58614 +
58615 +commit d89ee7d5c73af15c1c6f12b016cdf469742b5726 upstream.
58616 +
58617 +The callback function of call_rcu() just calls a kfree(), so we
58618 +can use kfree_rcu() instead of call_rcu() + callback function.
58619 +
58620 +Signed-off-by: Wei Yongjun <weiyongjun1@××××××.com>
58621 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
58622 +Signed-off-by: David S. Miller <davem@×××××××××.net>
58623 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
58624 +---
58625 + drivers/net/wireguard/allowedips.c | 7 +------
58626 + 1 file changed, 1 insertion(+), 6 deletions(-)
58627 +
58628 +diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c
58629 +index 72667d5399c3..121d9ea0f135 100644
58630 +--- a/drivers/net/wireguard/allowedips.c
58631 ++++ b/drivers/net/wireguard/allowedips.c
58632 +@@ -31,11 +31,6 @@ static void copy_and_assign_cidr(struct allowedips_node *node, const u8 *src,
58633 + #define CHOOSE_NODE(parent, key) \
58634 + parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1]
58635 +
58636 +-static void node_free_rcu(struct rcu_head *rcu)
58637 +-{
58638 +- kfree(container_of(rcu, struct allowedips_node, rcu));
58639 +-}
58640 +-
58641 + static void push_rcu(struct allowedips_node **stack,
58642 + struct allowedips_node __rcu *p, unsigned int *len)
58643 + {
58644 +@@ -112,7 +107,7 @@ static void walk_remove_by_peer(struct allowedips_node __rcu **top,
58645 + if (!node->bit[0] || !node->bit[1]) {
58646 + rcu_assign_pointer(*nptr, DEREF(
58647 + &node->bit[!REF(node->bit[0])]));
58648 +- call_rcu(&node->rcu, node_free_rcu);
58649 ++ kfree_rcu(node, rcu);
58650 + node = DEREF(nptr);
58651 + }
58652 + }
58653 +--
58654 +cgit v1.2.3-4-ga26e
58655 +
58656 +
58657 +From 75128a642978fd6ce5b008b259b3b12ff49b93e2 Mon Sep 17 00:00:00 2001
58658 +From: "Jason A. Donenfeld" <Jason@×××××.com>
58659 +Date: Thu, 2 Jan 2020 17:47:49 +0100
58660 +Subject: wireguard: selftests: remove ancient kernel compatibility code
58661 +
58662 +commit 9a69a4c8802adf642bc4a13d471b5a86b44ed434 upstream.
58663 +
58664 +Quite a bit of the test suite was designed to work with ancient kernels.
58665 +Thankfully we no longer have to deal with this. This commit updates
58666 +things that we can finally update and removes things that we can finally
58667 +remove, to avoid the build-up of the last several years as a result of
58668 +having to support ancient kernels. We can finally rely on suppress_
58669 +prefixlength being available. On the build side of things, the no-PIE
58670 +hack is no longer required, and we can bump some of the tools, repair
58671 +our m68k and i686-kvm support, and get better coverage of the static
58672 +branches used in the crypto lib and in udp_tunnel.
58673 +
58674 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
58675 +Signed-off-by: David S. Miller <davem@×××××××××.net>
58676 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
58677 +---
58678 + tools/testing/selftests/wireguard/netns.sh | 11 ++-
58679 + tools/testing/selftests/wireguard/qemu/Makefile | 82 +++++++++++-----------
58680 + .../selftests/wireguard/qemu/arch/m68k.config | 2 +-
58681 + tools/testing/selftests/wireguard/qemu/init.c | 1 +
58682 + .../testing/selftests/wireguard/qemu/kernel.config | 2 +
58683 + 5 files changed, 50 insertions(+), 48 deletions(-)
58684 +
58685 +diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
58686 +index e7310d9390f7..d5c85c7494f2 100755
58687 +--- a/tools/testing/selftests/wireguard/netns.sh
58688 ++++ b/tools/testing/selftests/wireguard/netns.sh
58689 +@@ -37,7 +37,7 @@ n2() { pretty 2 "$*"; maybe_exec ip netns exec $netns2 "$@"; }
58690 + ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; }
58691 + ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; }
58692 + ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; }
58693 +-sleep() { read -t "$1" -N 0 || true; }
58694 ++sleep() { read -t "$1" -N 1 || true; }
58695 + waitiperf() { pretty "${1//*-}" "wait for iperf:5201"; while [[ $(ss -N "$1" -tlp 'sport = 5201') != *iperf3* ]]; do sleep 0.1; done; }
58696 + waitncatudp() { pretty "${1//*-}" "wait for udp:1111"; while [[ $(ss -N "$1" -ulp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; }
58697 + waitncattcp() { pretty "${1//*-}" "wait for tcp:1111"; while [[ $(ss -N "$1" -tlp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; }
58698 +@@ -294,12 +294,9 @@ ip1 -6 rule add table main suppress_prefixlength 0
58699 + ip1 -4 route add default dev wg0 table 51820
58700 + ip1 -4 rule add not fwmark 51820 table 51820
58701 + ip1 -4 rule add table main suppress_prefixlength 0
58702 +-# suppress_prefixlength only got added in 3.12, and we want to support 3.10+.
58703 +-if [[ $(ip1 -4 rule show all) == *suppress_prefixlength* ]]; then
58704 +- # Flood the pings instead of sending just one, to trigger routing table reference counting bugs.
58705 +- n1 ping -W 1 -c 100 -f 192.168.99.7
58706 +- n1 ping -W 1 -c 100 -f abab::1111
58707 +-fi
58708 ++# Flood the pings instead of sending just one, to trigger routing table reference counting bugs.
58709 ++n1 ping -W 1 -c 100 -f 192.168.99.7
58710 ++n1 ping -W 1 -c 100 -f abab::1111
58711 +
58712 + n0 iptables -t nat -F
58713 + ip0 link del vethrc
58714 +diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
58715 +index 6d51bf78eeff..f10aa3590adc 100644
58716 +--- a/tools/testing/selftests/wireguard/qemu/Makefile
58717 ++++ b/tools/testing/selftests/wireguard/qemu/Makefile
58718 +@@ -5,6 +5,7 @@
58719 + PWD := $(shell pwd)
58720 +
58721 + CHOST := $(shell gcc -dumpmachine)
58722 ++HOST_ARCH := $(firstword $(subst -, ,$(CHOST)))
58723 + ifneq (,$(ARCH))
58724 + CBUILD := $(subst -gcc,,$(lastword $(subst /, ,$(firstword $(wildcard $(foreach bindir,$(subst :, ,$(PATH)),$(bindir)/$(ARCH)-*-gcc))))))
58725 + ifeq (,$(CBUILD))
58726 +@@ -37,19 +38,19 @@ endef
58727 + define file_download =
58728 + $(DISTFILES_PATH)/$(1):
58729 + mkdir -p $(DISTFILES_PATH)
58730 +- flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -t inf --retry-on-http-error=404 -O $$@.tmp $(2)$(1) || rm -f $$@.tmp'
58731 ++ flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp'
58732 + if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi
58733 + endef
58734 +
58735 +-$(eval $(call tar_download,MUSL,musl,1.1.20,.tar.gz,https://www.musl-libc.org/releases/,44be8771d0e6c6b5f82dd15662eb2957c9a3173a19a8b49966ac0542bbd40d61))
58736 ++$(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/,1370c9a812b2cf2a7d92802510cca0058cc37e66a7bedd70051f0a34015022a3))
58737 + $(eval $(call tar_download,LIBMNL,libmnl,1.0.4,.tar.bz2,https://www.netfilter.org/projects/libmnl/files/,171f89699f286a5854b72b91d06e8f8e3683064c5901fb09d954a9ab6f551f81))
58738 +-$(eval $(call tar_download,IPERF,iperf,3.1.7,.tar.gz,http://downloads.es.net/pub/iperf/,a4ef73406fe92250602b8da2ae89ec53211f805df97a1d1d629db5a14043734f))
58739 ++$(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c))
58740 + $(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d))
58741 +-$(eval $(call tar_download,IPROUTE2,iproute2,5.1.0,.tar.gz,https://www.kernel.org/pub/linux/utils/net/iproute2/,9b43707d6075ecdca14803ca8ce0c8553848c49fa1586d12fd508d66577243f2))
58742 +-$(eval $(call tar_download,IPTABLES,iptables,1.6.1,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,0fc2d7bd5d7be11311726466789d4c65fb4c8e096c9182b56ce97440864f0cf5))
58743 +-$(eval $(call tar_download,NMAP,nmap,7.60,.tar.bz2,https://nmap.org/dist/,a8796ecc4fa6c38aad6139d9515dc8113023a82e9d787e5a5fb5fa1b05516f21))
58744 +-$(eval $(call tar_download,IPUTILS,iputils,s20161105,.tar.gz,https://github.com/iputils/iputils/archive/s20161105.tar.gz/#,f813092f03d17294fd23544b129b95cdb87fe19f7970a51908a6b88509acad8a))
58745 +-$(eval $(call tar_download,WIREGUARD_TOOLS,WireGuard,0.0.20191212,.tar.xz,https://git.zx2c4.com/WireGuard/snapshot/,b0d718380f7a8822b2f12d75e462fa4eafa3a77871002981f367cd4fe2a1b071))
58746 ++$(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae))
58747 ++$(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c))
58748 ++$(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa))
58749 ++$(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a))
58750 ++$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20191226,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,aa8af0fdc9872d369d8c890a84dbc2a2466b55795dccd5b47721b2d97644b04f))
58751 +
58752 + KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug)
58753 + rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))
58754 +@@ -59,23 +60,21 @@ export CFLAGS ?= -O3 -pipe
58755 + export LDFLAGS ?=
58756 + export CPPFLAGS := -I$(BUILD_PATH)/include
58757 +
58758 +-ifeq ($(CHOST),$(CBUILD))
58759 ++ifeq ($(HOST_ARCH),$(ARCH))
58760 + CROSS_COMPILE_FLAG := --host=$(CHOST)
58761 +-NOPIE_GCC := gcc -fno-PIE
58762 + CFLAGS += -march=native
58763 + STRIP := strip
58764 + else
58765 + $(info Cross compilation: building for $(CBUILD) using $(CHOST))
58766 + CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST)
58767 + export CROSS_COMPILE=$(CBUILD)-
58768 +-NOPIE_GCC := $(CBUILD)-gcc -fno-PIE
58769 + STRIP := $(CBUILD)-strip
58770 + endif
58771 + ifeq ($(ARCH),aarch64)
58772 + QEMU_ARCH := aarch64
58773 + KERNEL_ARCH := arm64
58774 + KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image
58775 +-ifeq ($(CHOST),$(CBUILD))
58776 ++ifeq ($(HOST_ARCH),$(ARCH))
58777 + QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
58778 + else
58779 + QEMU_MACHINE := -cpu cortex-a53 -machine virt
58780 +@@ -85,7 +84,7 @@ else ifeq ($(ARCH),aarch64_be)
58781 + QEMU_ARCH := aarch64
58782 + KERNEL_ARCH := arm64
58783 + KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image
58784 +-ifeq ($(CHOST),$(CBUILD))
58785 ++ifeq ($(HOST_ARCH),$(ARCH))
58786 + QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
58787 + else
58788 + QEMU_MACHINE := -cpu cortex-a53 -machine virt
58789 +@@ -95,7 +94,7 @@ else ifeq ($(ARCH),arm)
58790 + QEMU_ARCH := arm
58791 + KERNEL_ARCH := arm
58792 + KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage
58793 +-ifeq ($(CHOST),$(CBUILD))
58794 ++ifeq ($(HOST_ARCH),$(ARCH))
58795 + QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
58796 + else
58797 + QEMU_MACHINE := -cpu cortex-a15 -machine virt
58798 +@@ -105,7 +104,7 @@ else ifeq ($(ARCH),armeb)
58799 + QEMU_ARCH := arm
58800 + KERNEL_ARCH := arm
58801 + KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage
58802 +-ifeq ($(CHOST),$(CBUILD))
58803 ++ifeq ($(HOST_ARCH),$(ARCH))
58804 + QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
58805 + else
58806 + QEMU_MACHINE := -cpu cortex-a15 -machine virt
58807 +@@ -116,7 +115,7 @@ else ifeq ($(ARCH),x86_64)
58808 + QEMU_ARCH := x86_64
58809 + KERNEL_ARCH := x86_64
58810 + KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
58811 +-ifeq ($(CHOST),$(CBUILD))
58812 ++ifeq ($(HOST_ARCH),$(ARCH))
58813 + QEMU_MACHINE := -cpu host -machine q35,accel=kvm
58814 + else
58815 + QEMU_MACHINE := -cpu Skylake-Server -machine q35
58816 +@@ -126,7 +125,7 @@ else ifeq ($(ARCH),i686)
58817 + QEMU_ARCH := i386
58818 + KERNEL_ARCH := x86
58819 + KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
58820 +-ifeq ($(subst i686,x86_64,$(CBUILD)),$(CHOST))
58821 ++ifeq ($(subst x86_64,i686,$(HOST_ARCH)),$(ARCH))
58822 + QEMU_MACHINE := -cpu host -machine q35,accel=kvm
58823 + else
58824 + QEMU_MACHINE := -cpu coreduo -machine q35
58825 +@@ -136,7 +135,7 @@ else ifeq ($(ARCH),mips64)
58826 + QEMU_ARCH := mips64
58827 + KERNEL_ARCH := mips
58828 + KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
58829 +-ifeq ($(CHOST),$(CBUILD))
58830 ++ifeq ($(HOST_ARCH),$(ARCH))
58831 + QEMU_MACHINE := -cpu host -machine malta,accel=kvm
58832 + CFLAGS += -EB
58833 + else
58834 +@@ -147,7 +146,7 @@ else ifeq ($(ARCH),mips64el)
58835 + QEMU_ARCH := mips64el
58836 + KERNEL_ARCH := mips
58837 + KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
58838 +-ifeq ($(CHOST),$(CBUILD))
58839 ++ifeq ($(HOST_ARCH),$(ARCH))
58840 + QEMU_MACHINE := -cpu host -machine malta,accel=kvm
58841 + CFLAGS += -EL
58842 + else
58843 +@@ -158,7 +157,7 @@ else ifeq ($(ARCH),mips)
58844 + QEMU_ARCH := mips
58845 + KERNEL_ARCH := mips
58846 + KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
58847 +-ifeq ($(CHOST),$(CBUILD))
58848 ++ifeq ($(HOST_ARCH),$(ARCH))
58849 + QEMU_MACHINE := -cpu host -machine malta,accel=kvm
58850 + CFLAGS += -EB
58851 + else
58852 +@@ -169,7 +168,7 @@ else ifeq ($(ARCH),mipsel)
58853 + QEMU_ARCH := mipsel
58854 + KERNEL_ARCH := mips
58855 + KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
58856 +-ifeq ($(CHOST),$(CBUILD))
58857 ++ifeq ($(HOST_ARCH),$(ARCH))
58858 + QEMU_MACHINE := -cpu host -machine malta,accel=kvm
58859 + CFLAGS += -EL
58860 + else
58861 +@@ -180,7 +179,7 @@ else ifeq ($(ARCH),powerpc64le)
58862 + QEMU_ARCH := ppc64
58863 + KERNEL_ARCH := powerpc
58864 + KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
58865 +-ifeq ($(CHOST),$(CBUILD))
58866 ++ifeq ($(HOST_ARCH),$(ARCH))
58867 + QEMU_MACHINE := -cpu host,accel=kvm -machine pseries
58868 + else
58869 + QEMU_MACHINE := -machine pseries
58870 +@@ -190,7 +189,7 @@ else ifeq ($(ARCH),powerpc)
58871 + QEMU_ARCH := ppc
58872 + KERNEL_ARCH := powerpc
58873 + KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/powerpc/boot/uImage
58874 +-ifeq ($(CHOST),$(CBUILD))
58875 ++ifeq ($(HOST_ARCH),$(ARCH))
58876 + QEMU_MACHINE := -cpu host,accel=kvm -machine ppce500
58877 + else
58878 + QEMU_MACHINE := -machine ppce500
58879 +@@ -200,10 +199,11 @@ else ifeq ($(ARCH),m68k)
58880 + QEMU_ARCH := m68k
58881 + KERNEL_ARCH := m68k
58882 + KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
58883 +-ifeq ($(CHOST),$(CBUILD))
58884 +-QEMU_MACHINE := -cpu host,accel=kvm -machine q800
58885 ++KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/m68k.config)
58886 ++ifeq ($(HOST_ARCH),$(ARCH))
58887 ++QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -smp 1 -append $(KERNEL_CMDLINE)
58888 + else
58889 +-QEMU_MACHINE := -machine q800
58890 ++QEMU_MACHINE := -machine q800 -smp 1 -append $(KERNEL_CMDLINE)
58891 + endif
58892 + else
58893 + $(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64le, powerpc, m68k)
58894 +@@ -238,14 +238,14 @@ $(BUILD_PATH)/init-cpio-spec.txt:
58895 + echo "nod /dev/console 644 0 0 c 5 1" >> $@
58896 + echo "dir /bin 755 0 0" >> $@
58897 + echo "file /bin/iperf3 $(IPERF_PATH)/src/iperf3 755 0 0" >> $@
58898 +- echo "file /bin/wg $(WIREGUARD_TOOLS_PATH)/src/tools/wg 755 0 0" >> $@
58899 ++ echo "file /bin/wg $(WIREGUARD_TOOLS_PATH)/src/wg 755 0 0" >> $@
58900 + echo "file /bin/bash $(BASH_PATH)/bash 755 0 0" >> $@
58901 + echo "file /bin/ip $(IPROUTE2_PATH)/ip/ip 755 0 0" >> $@
58902 + echo "file /bin/ss $(IPROUTE2_PATH)/misc/ss 755 0 0" >> $@
58903 + echo "file /bin/ping $(IPUTILS_PATH)/ping 755 0 0" >> $@
58904 + echo "file /bin/ncat $(NMAP_PATH)/ncat/ncat 755 0 0" >> $@
58905 +- echo "file /bin/xtables-multi $(IPTABLES_PATH)/iptables/xtables-multi 755 0 0" >> $@
58906 +- echo "slink /bin/iptables xtables-multi 777 0 0" >> $@
58907 ++ echo "file /bin/xtables-legacy-multi $(IPTABLES_PATH)/iptables/xtables-legacy-multi 755 0 0" >> $@
58908 ++ echo "slink /bin/iptables xtables-legacy-multi 777 0 0" >> $@
58909 + echo "slink /bin/ping6 ping 777 0 0" >> $@
58910 + echo "dir /lib 755 0 0" >> $@
58911 + echo "file /lib/libc.so $(MUSL_PATH)/lib/libc.so 755 0 0" >> $@
58912 +@@ -260,8 +260,8 @@ $(KERNEL_BUILD_PATH)/.config: kernel.config arch/$(ARCH).config
58913 + cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config $(KERNEL_BUILD_PATH)/minimal.config
58914 + $(if $(findstring yes,$(DEBUG_KERNEL)),cp debug.config $(KERNEL_BUILD_PATH) && cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config debug.config,)
58915 +
58916 +-$(KERNEL_BZIMAGE): $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/tools/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES)
58917 +- $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) CC="$(NOPIE_GCC)"
58918 ++$(KERNEL_BZIMAGE): $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES)
58919 ++ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE)
58920 +
58921 + $(BUILD_PATH)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config
58922 + $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install
58923 +@@ -280,7 +280,7 @@ $(BUILD_PATH)/include/.installed: $(MUSL_PATH)/lib/libc.so
58924 +
58925 + $(MUSL_CC): $(MUSL_PATH)/lib/libc.so
58926 + sh $(MUSL_PATH)/tools/musl-gcc.specs.sh $(BUILD_PATH)/include $(MUSL_PATH)/lib /lib/ld-linux.so.1 > $(BUILD_PATH)/musl-gcc.specs
58927 +- printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" -fno-stack-protector -no-pie "$$@"\n' > $(BUILD_PATH)/musl-gcc
58928 ++ printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" "$$@"\n' > $(BUILD_PATH)/musl-gcc
58929 + chmod +x $(BUILD_PATH)/musl-gcc
58930 +
58931 + $(IPERF_PATH)/.installed: $(IPERF_TAR)
58932 +@@ -291,7 +291,7 @@ $(IPERF_PATH)/.installed: $(IPERF_TAR)
58933 + touch $@
58934 +
58935 + $(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS)
58936 +- cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared
58937 ++ cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --with-openssl=no
58938 + $(MAKE) -C $(IPERF_PATH)
58939 + $(STRIP) -s $@
58940 +
58941 +@@ -308,8 +308,8 @@ $(WIREGUARD_TOOLS_PATH)/.installed: $(WIREGUARD_TOOLS_TAR)
58942 + flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
58943 + touch $@
58944 +
58945 +-$(WIREGUARD_TOOLS_PATH)/src/tools/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
58946 +- LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src/tools LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg
58947 ++$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
58948 ++ LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg
58949 + $(STRIP) -s $@
58950 +
58951 + $(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS)
58952 +@@ -323,7 +323,8 @@ $(IPUTILS_PATH)/.installed: $(IPUTILS_TAR)
58953 + touch $@
58954 +
58955 + $(IPUTILS_PATH)/ping: | $(IPUTILS_PATH)/.installed $(USERSPACE_DEPS)
58956 +- $(MAKE) -C $(IPUTILS_PATH) USE_CAP=no USE_IDN=no USE_NETTLE=no USE_CRYPTO=no ping
58957 ++ sed -i /atexit/d $(IPUTILS_PATH)/ping.c
58958 ++ cd $(IPUTILS_PATH) && $(CC) $(CFLAGS) -std=c99 -o $@ ping.c ping_common.c ping6_common.c iputils_common.c -D_GNU_SOURCE -D'IPUTILS_VERSION(f)=f' -lresolv $(LDFLAGS)
58959 + $(STRIP) -s $@
58960 +
58961 + $(BASH_PATH)/.installed: $(BASH_TAR)
58962 +@@ -357,7 +358,7 @@ $(IPTABLES_PATH)/.installed: $(IPTABLES_TAR)
58963 + sed -i -e "/nfnetlink=[01]/s:=[01]:=0:" -e "/nfconntrack=[01]/s:=[01]:=0:" $(IPTABLES_PATH)/configure
58964 + touch $@
58965 +
58966 +-$(IPTABLES_PATH)/iptables/xtables-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
58967 ++$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
58968 + cd $(IPTABLES_PATH) && PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --with-kernel=$(BUILD_PATH)/include
58969 + $(MAKE) -C $(IPTABLES_PATH)
58970 + $(STRIP) -s $@
58971 +@@ -368,8 +369,9 @@ $(NMAP_PATH)/.installed: $(NMAP_TAR)
58972 + touch $@
58973 +
58974 + $(NMAP_PATH)/ncat/ncat: | $(NMAP_PATH)/.installed $(USERSPACE_DEPS)
58975 +- cd $(NMAP_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --without-ndiff --without-zenmap --without-nping --with-libpcap=included --with-libpcre=included --with-libdnet=included --without-liblua --with-liblinear=included --without-nmap-update --without-openssl --with-pcap=linux
58976 +- $(MAKE) -C $(NMAP_PATH) build-ncat
58977 ++ cd $(NMAP_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --without-ndiff --without-zenmap --without-nping --with-libpcap=included --with-libpcre=included --with-libdnet=included --without-liblua --with-liblinear=included --without-nmap-update --without-openssl --with-pcap=linux --without-libssh
58978 ++ $(MAKE) -C $(NMAP_PATH)/libpcap
58979 ++ $(MAKE) -C $(NMAP_PATH)/ncat
58980 + $(STRIP) -s $@
58981 +
58982 + clean:
58983 +@@ -379,7 +381,7 @@ distclean: clean
58984 + rm -rf $(DISTFILES_PATH)
58985 +
58986 + menuconfig: $(KERNEL_BUILD_PATH)/.config
58987 +- $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) CC="$(NOPIE_GCC)" menuconfig
58988 ++ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) menuconfig
58989 +
58990 + .PHONY: qemu build clean distclean menuconfig
58991 + .DELETE_ON_ERROR:
58992 +diff --git a/tools/testing/selftests/wireguard/qemu/arch/m68k.config b/tools/testing/selftests/wireguard/qemu/arch/m68k.config
58993 +index 5381ea10896c..62a15bdb877e 100644
58994 +--- a/tools/testing/selftests/wireguard/qemu/arch/m68k.config
58995 ++++ b/tools/testing/selftests/wireguard/qemu/arch/m68k.config
58996 +@@ -1,9 +1,9 @@
58997 + CONFIG_MMU=y
58998 ++CONFIG_M68KCLASSIC=y
58999 + CONFIG_M68040=y
59000 + CONFIG_MAC=y
59001 + CONFIG_SERIAL_PMACZILOG=y
59002 + CONFIG_SERIAL_PMACZILOG_TTYS=y
59003 + CONFIG_SERIAL_PMACZILOG_CONSOLE=y
59004 +-CONFIG_CMDLINE_BOOL=y
59005 + CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
59006 + CONFIG_FRAME_WARN=1024
59007 +diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c
59008 +index 51e5ddedee88..90bc9813cadc 100644
59009 +--- a/tools/testing/selftests/wireguard/qemu/init.c
59010 ++++ b/tools/testing/selftests/wireguard/qemu/init.c
59011 +@@ -21,6 +21,7 @@
59012 + #include <sys/reboot.h>
59013 + #include <sys/utsname.h>
59014 + #include <sys/sendfile.h>
59015 ++#include <sys/sysmacros.h>
59016 + #include <linux/random.h>
59017 + #include <linux/version.h>
59018 +
59019 +diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
59020 +index 9cca30206014..af9323a0b6e0 100644
59021 +--- a/tools/testing/selftests/wireguard/qemu/kernel.config
59022 ++++ b/tools/testing/selftests/wireguard/qemu/kernel.config
59023 +@@ -39,6 +39,7 @@ CONFIG_PRINTK=y
59024 + CONFIG_KALLSYMS=y
59025 + CONFIG_BUG=y
59026 + CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y
59027 ++CONFIG_JUMP_LABEL=y
59028 + CONFIG_EMBEDDED=n
59029 + CONFIG_BASE_FULL=y
59030 + CONFIG_FUTEX=y
59031 +@@ -55,6 +56,7 @@ CONFIG_NO_HZ_IDLE=y
59032 + CONFIG_NO_HZ_FULL=n
59033 + CONFIG_HZ_PERIODIC=n
59034 + CONFIG_HIGH_RES_TIMERS=y
59035 ++CONFIG_COMPAT_32BIT_TIME=y
59036 + CONFIG_ARCH_RANDOM=y
59037 + CONFIG_FILE_LOCKING=y
59038 + CONFIG_POSIX_TIMERS=y
59039 +--
59040 +cgit v1.2.3-4-ga26e
59041 +
59042 +
59043 +From dc3fde4a24f0014b3b8b88f0ccc7bbc5fd5737dd Mon Sep 17 00:00:00 2001
59044 +From: "Jason A. Donenfeld" <Jason@×××××.com>
59045 +Date: Thu, 2 Jan 2020 17:47:50 +0100
59046 +Subject: wireguard: queueing: do not account for pfmemalloc when clearing skb
59047 + header
59048 +
59049 +commit 04d2ea92a18417619182cbb79063f154892b0150 upstream.
59050 +
59051 +Before 8b7008620b84 ("net: Don't copy pfmemalloc flag in __copy_skb_
59052 +header()"), the pfmemalloc flag used to be between headers_start and
59053 +headers_end, which is a region we clear when preparing the packet for
59054 +encryption/decryption. This is a parameter we certainly want to
59055 +preserve, which is why 8b7008620b84 moved it out of there. The code here
59056 +was written in a world before 8b7008620b84, though, where we had to
59057 +manually account for it. This commit brings things up to speed.
59058 +
59059 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
59060 +Signed-off-by: David S. Miller <davem@×××××××××.net>
59061 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
59062 +---
59063 + drivers/net/wireguard/queueing.h | 3 ---
59064 + 1 file changed, 3 deletions(-)
59065 +
59066 +diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h
59067 +index 58fdd630b246..e62c714a548e 100644
59068 +--- a/drivers/net/wireguard/queueing.h
59069 ++++ b/drivers/net/wireguard/queueing.h
59070 +@@ -83,13 +83,10 @@ static inline __be16 wg_skb_examine_untrusted_ip_hdr(struct sk_buff *skb)
59071 +
59072 + static inline void wg_reset_packet(struct sk_buff *skb)
59073 + {
59074 +- const int pfmemalloc = skb->pfmemalloc;
59075 +-
59076 + skb_scrub_packet(skb, true);
59077 + memset(&skb->headers_start, 0,
59078 + offsetof(struct sk_buff, headers_end) -
59079 + offsetof(struct sk_buff, headers_start));
59080 +- skb->pfmemalloc = pfmemalloc;
59081 + skb->queue_mapping = 0;
59082 + skb->nohdr = 0;
59083 + skb->peeked = 0;
59084 +--
59085 +cgit v1.2.3-4-ga26e
59086 +
59087 +
59088 +From 17cba5a635e390727c6e8d545862d10aa66f0d17 Mon Sep 17 00:00:00 2001
59089 +From: "Jason A. Donenfeld" <Jason@×××××.com>
59090 +Date: Thu, 2 Jan 2020 17:47:51 +0100
59091 +Subject: wireguard: socket: mark skbs as not on list when receiving via gro
59092 +
59093 +commit 736775d06bac60d7a353e405398b48b2bd8b1e54 upstream.
59094 +
59095 +Certain drivers will pass gro skbs to udp, at which point the udp driver
59096 +simply iterates through them and passes them off to encap_rcv, which is
59097 +where we pick up. At the moment, we're not attempting to coalesce these
59098 +into bundles, but we also don't want to wind up having cascaded lists of
59099 +skbs treated separately. The right behavior here, then, is to just mark
59100 +each incoming one as not on a list. This can be seen in practice, for
59101 +example, with Qualcomm's rmnet_perf driver.
59102 +
59103 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
59104 +Tested-by: Yaroslav Furman <yaro330@×××××.com>
59105 +Signed-off-by: David S. Miller <davem@×××××××××.net>
59106 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
59107 +---
59108 + drivers/net/wireguard/socket.c | 1 +
59109 + 1 file changed, 1 insertion(+)
59110 +
59111 +diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c
59112 +index c46256d0d81c..262f3b5c819d 100644
59113 +--- a/drivers/net/wireguard/socket.c
59114 ++++ b/drivers/net/wireguard/socket.c
59115 +@@ -333,6 +333,7 @@ static int wg_receive(struct sock *sk, struct sk_buff *skb)
59116 + wg = sk->sk_user_data;
59117 + if (unlikely(!wg))
59118 + goto err;
59119 ++ skb_mark_not_on_list(skb);
59120 + wg_packet_receive(wg, skb);
59121 + return 0;
59122 +
59123 +--
59124 +cgit v1.2.3-4-ga26e
59125 +
59126 +
59127 +From bdc8a25aac2deb79f1d0b8fb97691de2a4fe858b Mon Sep 17 00:00:00 2001
59128 +From: "Jason A. Donenfeld" <Jason@×××××.com>
59129 +Date: Wed, 8 Jan 2020 16:59:02 -0500
59130 +Subject: net: introduce skb_list_walk_safe for skb segment walking
59131 +
59132 +commit dcfea72e79b0aa7a057c8f6024169d86a1bbc84b upstream.
59133 +
59134 +As part of the continual effort to remove direct usage of skb->next and
59135 +skb->prev, this patch adds a helper for iterating through the
59136 +singly-linked variant of skb lists, which are used for lists of GSO
59137 +packet. The name "skb_list_..." has been chosen to match the existing
59138 +function, "kfree_skb_list, which also operates on these singly-linked
59139 +lists, and the "..._walk_safe" part is the same idiom as elsewhere in
59140 +the kernel.
59141 +
59142 +This patch removes the helper from wireguard and puts it into
59143 +linux/skbuff.h, while making it a bit more robust for general usage. In
59144 +particular, parenthesis are added around the macro argument usage, and it
59145 +now accounts for trying to iterate through an already-null skb pointer,
59146 +which will simply run the iteration zero times. This latter enhancement
59147 +means it can be used to replace both do { ... } while and while (...)
59148 +open-coded idioms.
59149 +
59150 +This should take care of these three possible usages, which match all
59151 +current methods of iterations.
59152 +
59153 +skb_list_walk_safe(segs, skb, next) { ... }
59154 +skb_list_walk_safe(skb, skb, next) { ... }
59155 +skb_list_walk_safe(segs, skb, segs) { ... }
59156 +
59157 +Gcc appears to generate efficient code for each of these.
59158 +
59159 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
59160 +Signed-off-by: David S. Miller <davem@×××××××××.net>
59161 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
59162 +---
59163 + drivers/net/wireguard/device.h | 8 --------
59164 + include/linux/skbuff.h | 5 +++++
59165 + 2 files changed, 5 insertions(+), 8 deletions(-)
59166 +
59167 +diff --git a/drivers/net/wireguard/device.h b/drivers/net/wireguard/device.h
59168 +index c91f3051c5c7..b15a8be9d816 100644
59169 +--- a/drivers/net/wireguard/device.h
59170 ++++ b/drivers/net/wireguard/device.h
59171 +@@ -62,12 +62,4 @@ struct wg_device {
59172 + int wg_device_init(void);
59173 + void wg_device_uninit(void);
59174 +
59175 +-/* Later after the dust settles, this can be moved into include/linux/skbuff.h,
59176 +- * where virtually all code that deals with GSO segs can benefit, around ~30
59177 +- * drivers as of writing.
59178 +- */
59179 +-#define skb_list_walk_safe(first, skb, next) \
59180 +- for (skb = first, next = skb->next; skb; \
59181 +- skb = next, next = skb ? skb->next : NULL)
59182 +-
59183 + #endif /* _WG_DEVICE_H */
59184 +diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
59185 +index 955e1370f033..5e9fe508977f 100644
59186 +--- a/include/linux/skbuff.h
59187 ++++ b/include/linux/skbuff.h
59188 +@@ -1480,6 +1480,11 @@ static inline void skb_mark_not_on_list(struct sk_buff *skb)
59189 + skb->next = NULL;
59190 + }
59191 +
59192 ++/* Iterate through singly-linked GSO fragments of an skb. */
59193 ++#define skb_list_walk_safe(first, skb, next) \
59194 ++ for ((skb) = (first), (next) = (skb) ? (skb)->next : NULL; (skb); \
59195 ++ (skb) = (next), (next) = (skb) ? (skb)->next : NULL)
59196 ++
59197 + static inline void skb_list_del_init(struct sk_buff *skb)
59198 + {
59199 + __list_del_entry(&skb->list);
59200 +--
59201 +cgit v1.2.3-4-ga26e
59202 +
59203 +
59204 +From 34b15ff9019f709b9f6a8e72377e0641e75618ec Mon Sep 17 00:00:00 2001
59205 +From: "Jason A. Donenfeld" <Jason@×××××.com>
59206 +Date: Mon, 13 Jan 2020 18:42:26 -0500
59207 +Subject: net: skbuff: disambiguate argument and member for skb_list_walk_safe
59208 + helper
59209 +
59210 +commit 5eee7bd7e245914e4e050c413dfe864e31805207 upstream.
59211 +
59212 +This worked before, because we made all callers name their next pointer
59213 +"next". But in trying to be more "drop-in" ready, the silliness here is
59214 +revealed. This commit fixes the problem by making the macro argument and
59215 +the member use different names.
59216 +
59217 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
59218 +Signed-off-by: David S. Miller <davem@×××××××××.net>
59219 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
59220 +---
59221 + include/linux/skbuff.h | 6 +++---
59222 + 1 file changed, 3 insertions(+), 3 deletions(-)
59223 +
59224 +diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
59225 +index 5e9fe508977f..3c7755d29636 100644
59226 +--- a/include/linux/skbuff.h
59227 ++++ b/include/linux/skbuff.h
59228 +@@ -1481,9 +1481,9 @@ static inline void skb_mark_not_on_list(struct sk_buff *skb)
59229 + }
59230 +
59231 + /* Iterate through singly-linked GSO fragments of an skb. */
59232 +-#define skb_list_walk_safe(first, skb, next) \
59233 +- for ((skb) = (first), (next) = (skb) ? (skb)->next : NULL; (skb); \
59234 +- (skb) = (next), (next) = (skb) ? (skb)->next : NULL)
59235 ++#define skb_list_walk_safe(first, skb, next_skb) \
59236 ++ for ((skb) = (first), (next_skb) = (skb) ? (skb)->next : NULL; (skb); \
59237 ++ (skb) = (next_skb), (next_skb) = (skb) ? (skb)->next : NULL)
59238 +
59239 + static inline void skb_list_del_init(struct sk_buff *skb)
59240 + {
59241 +--
59242 +cgit v1.2.3-4-ga26e
59243 +
59244 +
59245 +From df4e51d1e69d70602f31c77457f78809838bc65a Mon Sep 17 00:00:00 2001
59246 +From: Eric Dumazet <edumazet@××××××.com>
59247 +Date: Tue, 4 Feb 2020 22:17:25 +0100
59248 +Subject: wireguard: allowedips: fix use-after-free in root_remove_peer_lists
59249 +
59250 +commit 9981159fc3b677b357f84e069a11de5a5ec8a2a8 upstream.
59251 +
59252 +In the unlikely case a new node could not be allocated, we need to
59253 +remove @newnode from @peer->allowedips_list before freeing it.
59254 +
59255 +syzbot reported:
59256 +
59257 +BUG: KASAN: use-after-free in __list_del_entry_valid+0xdc/0xf5 lib/list_debug.c:54
59258 +Read of size 8 at addr ffff88809881a538 by task syz-executor.4/30133
59259 +
59260 +CPU: 0 PID: 30133 Comm: syz-executor.4 Not tainted 5.5.0-syzkaller #0
59261 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
59262 +Call Trace:
59263 + __dump_stack lib/dump_stack.c:77 [inline]
59264 + dump_stack+0x197/0x210 lib/dump_stack.c:118
59265 + print_address_description.constprop.0.cold+0xd4/0x30b mm/kasan/report.c:374
59266 + __kasan_report.cold+0x1b/0x32 mm/kasan/report.c:506
59267 + kasan_report+0x12/0x20 mm/kasan/common.c:639
59268 + __asan_report_load8_noabort+0x14/0x20 mm/kasan/generic_report.c:135
59269 + __list_del_entry_valid+0xdc/0xf5 lib/list_debug.c:54
59270 + __list_del_entry include/linux/list.h:132 [inline]
59271 + list_del include/linux/list.h:146 [inline]
59272 + root_remove_peer_lists+0x24f/0x4b0 drivers/net/wireguard/allowedips.c:65
59273 + wg_allowedips_free+0x232/0x390 drivers/net/wireguard/allowedips.c:300
59274 + wg_peer_remove_all+0xd5/0x620 drivers/net/wireguard/peer.c:187
59275 + wg_set_device+0xd01/0x1350 drivers/net/wireguard/netlink.c:542
59276 + genl_family_rcv_msg_doit net/netlink/genetlink.c:672 [inline]
59277 + genl_family_rcv_msg net/netlink/genetlink.c:717 [inline]
59278 + genl_rcv_msg+0x67d/0xea0 net/netlink/genetlink.c:734
59279 + netlink_rcv_skb+0x177/0x450 net/netlink/af_netlink.c:2477
59280 + genl_rcv+0x29/0x40 net/netlink/genetlink.c:745
59281 + netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline]
59282 + netlink_unicast+0x59e/0x7e0 net/netlink/af_netlink.c:1328
59283 + netlink_sendmsg+0x91c/0xea0 net/netlink/af_netlink.c:1917
59284 + sock_sendmsg_nosec net/socket.c:652 [inline]
59285 + sock_sendmsg+0xd7/0x130 net/socket.c:672
59286 + ____sys_sendmsg+0x753/0x880 net/socket.c:2343
59287 + ___sys_sendmsg+0x100/0x170 net/socket.c:2397
59288 + __sys_sendmsg+0x105/0x1d0 net/socket.c:2430
59289 + __do_sys_sendmsg net/socket.c:2439 [inline]
59290 + __se_sys_sendmsg net/socket.c:2437 [inline]
59291 + __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2437
59292 + do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294
59293 + entry_SYSCALL_64_after_hwframe+0x49/0xbe
59294 +RIP: 0033:0x45b399
59295 +Code: ad b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00
59296 +RSP: 002b:00007f99a9bcdc78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
59297 +RAX: ffffffffffffffda RBX: 00007f99a9bce6d4 RCX: 000000000045b399
59298 +RDX: 0000000000000000 RSI: 0000000020001340 RDI: 0000000000000003
59299 +RBP: 000000000075bf20 R08: 0000000000000000 R09: 0000000000000000
59300 +R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000004
59301 +R13: 00000000000009ba R14: 00000000004cb2b8 R15: 0000000000000009
59302 +
59303 +Allocated by task 30103:
59304 + save_stack+0x23/0x90 mm/kasan/common.c:72
59305 + set_track mm/kasan/common.c:80 [inline]
59306 + __kasan_kmalloc mm/kasan/common.c:513 [inline]
59307 + __kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:486
59308 + kasan_kmalloc+0x9/0x10 mm/kasan/common.c:527
59309 + kmem_cache_alloc_trace+0x158/0x790 mm/slab.c:3551
59310 + kmalloc include/linux/slab.h:556 [inline]
59311 + kzalloc include/linux/slab.h:670 [inline]
59312 + add+0x70a/0x1970 drivers/net/wireguard/allowedips.c:236
59313 + wg_allowedips_insert_v4+0xf6/0x160 drivers/net/wireguard/allowedips.c:320
59314 + set_allowedip drivers/net/wireguard/netlink.c:343 [inline]
59315 + set_peer+0xfb9/0x1150 drivers/net/wireguard/netlink.c:468
59316 + wg_set_device+0xbd4/0x1350 drivers/net/wireguard/netlink.c:591
59317 + genl_family_rcv_msg_doit net/netlink/genetlink.c:672 [inline]
59318 + genl_family_rcv_msg net/netlink/genetlink.c:717 [inline]
59319 + genl_rcv_msg+0x67d/0xea0 net/netlink/genetlink.c:734
59320 + netlink_rcv_skb+0x177/0x450 net/netlink/af_netlink.c:2477
59321 + genl_rcv+0x29/0x40 net/netlink/genetlink.c:745
59322 + netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline]
59323 + netlink_unicast+0x59e/0x7e0 net/netlink/af_netlink.c:1328
59324 + netlink_sendmsg+0x91c/0xea0 net/netlink/af_netlink.c:1917
59325 + sock_sendmsg_nosec net/socket.c:652 [inline]
59326 + sock_sendmsg+0xd7/0x130 net/socket.c:672
59327 + ____sys_sendmsg+0x753/0x880 net/socket.c:2343
59328 + ___sys_sendmsg+0x100/0x170 net/socket.c:2397
59329 + __sys_sendmsg+0x105/0x1d0 net/socket.c:2430
59330 + __do_sys_sendmsg net/socket.c:2439 [inline]
59331 + __se_sys_sendmsg net/socket.c:2437 [inline]
59332 + __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2437
59333 + do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294
59334 + entry_SYSCALL_64_after_hwframe+0x49/0xbe
59335 +
59336 +Freed by task 30103:
59337 + save_stack+0x23/0x90 mm/kasan/common.c:72
59338 + set_track mm/kasan/common.c:80 [inline]
59339 + kasan_set_free_info mm/kasan/common.c:335 [inline]
59340 + __kasan_slab_free+0x102/0x150 mm/kasan/common.c:474
59341 + kasan_slab_free+0xe/0x10 mm/kasan/common.c:483
59342 + __cache_free mm/slab.c:3426 [inline]
59343 + kfree+0x10a/0x2c0 mm/slab.c:3757
59344 + add+0x12d2/0x1970 drivers/net/wireguard/allowedips.c:266
59345 + wg_allowedips_insert_v4+0xf6/0x160 drivers/net/wireguard/allowedips.c:320
59346 + set_allowedip drivers/net/wireguard/netlink.c:343 [inline]
59347 + set_peer+0xfb9/0x1150 drivers/net/wireguard/netlink.c:468
59348 + wg_set_device+0xbd4/0x1350 drivers/net/wireguard/netlink.c:591
59349 + genl_family_rcv_msg_doit net/netlink/genetlink.c:672 [inline]
59350 + genl_family_rcv_msg net/netlink/genetlink.c:717 [inline]
59351 + genl_rcv_msg+0x67d/0xea0 net/netlink/genetlink.c:734
59352 + netlink_rcv_skb+0x177/0x450 net/netlink/af_netlink.c:2477
59353 + genl_rcv+0x29/0x40 net/netlink/genetlink.c:745
59354 + netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline]
59355 + netlink_unicast+0x59e/0x7e0 net/netlink/af_netlink.c:1328
59356 + netlink_sendmsg+0x91c/0xea0 net/netlink/af_netlink.c:1917
59357 + sock_sendmsg_nosec net/socket.c:652 [inline]
59358 + sock_sendmsg+0xd7/0x130 net/socket.c:672
59359 + ____sys_sendmsg+0x753/0x880 net/socket.c:2343
59360 + ___sys_sendmsg+0x100/0x170 net/socket.c:2397
59361 + __sys_sendmsg+0x105/0x1d0 net/socket.c:2430
59362 + __do_sys_sendmsg net/socket.c:2439 [inline]
59363 + __se_sys_sendmsg net/socket.c:2437 [inline]
59364 + __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2437
59365 + do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294
59366 + entry_SYSCALL_64_after_hwframe+0x49/0xbe
59367 +
59368 +The buggy address belongs to the object at ffff88809881a500
59369 + which belongs to the cache kmalloc-64 of size 64
59370 +The buggy address is located 56 bytes inside of
59371 + 64-byte region [ffff88809881a500, ffff88809881a540)
59372 +The buggy address belongs to the page:
59373 +page:ffffea0002620680 refcount:1 mapcount:0 mapping:ffff8880aa400380 index:0x0
59374 +raw: 00fffe0000000200 ffffea000250b748 ffffea000254bac8 ffff8880aa400380
59375 +raw: 0000000000000000 ffff88809881a000 0000000100000020 0000000000000000
59376 +page dumped because: kasan: bad access detected
59377 +
59378 +Memory state around the buggy address:
59379 + ffff88809881a400: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
59380 + ffff88809881a480: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc
59381 +>ffff88809881a500: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
59382 + ^
59383 + ffff88809881a580: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
59384 + ffff88809881a600: 00 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc
59385 +
59386 +Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
59387 +Signed-off-by: Eric Dumazet <edumazet@××××××.com>
59388 +Reported-by: syzbot <syzkaller@××××××××××××.com>
59389 +Cc: Jason A. Donenfeld <Jason@×××××.com>
59390 +Cc: wireguard@×××××××××××.com
59391 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
59392 +Signed-off-by: David S. Miller <davem@×××××××××.net>
59393 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
59394 +---
59395 + drivers/net/wireguard/allowedips.c | 1 +
59396 + 1 file changed, 1 insertion(+)
59397 +
59398 +diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c
59399 +index 121d9ea0f135..3725e9cd85f4 100644
59400 +--- a/drivers/net/wireguard/allowedips.c
59401 ++++ b/drivers/net/wireguard/allowedips.c
59402 +@@ -263,6 +263,7 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
59403 + } else {
59404 + node = kzalloc(sizeof(*node), GFP_KERNEL);
59405 + if (unlikely(!node)) {
59406 ++ list_del(&newnode->peer_list);
59407 + kfree(newnode);
59408 + return -ENOMEM;
59409 + }
59410 +--
59411 +cgit v1.2.3-4-ga26e
59412 +
59413 +
59414 +From a5bb2a5ab0aeb5d019521b8f35854c65cb85ac7b Mon Sep 17 00:00:00 2001
59415 +From: "Jason A. Donenfeld" <Jason@×××××.com>
59416 +Date: Tue, 4 Feb 2020 22:17:26 +0100
59417 +Subject: wireguard: noise: reject peers with low order public keys
59418 +
59419 +commit ec31c2676a10e064878927b243fada8c2fb0c03c upstream.
59420 +
59421 +Our static-static calculation returns a failure if the public key is of
59422 +low order. We check for this when peers are added, and don't allow them
59423 +to be added if they're low order, except in the case where we haven't
59424 +yet been given a private key. In that case, we would defer the removal
59425 +of the peer until we're given a private key, since at that point we're
59426 +doing new static-static calculations which incur failures we can act on.
59427 +This meant, however, that we wound up removing peers rather late in the
59428 +configuration flow.
59429 +
59430 +Syzkaller points out that peer_remove calls flush_workqueue, which in
59431 +turn might then wait for sending a handshake initiation to complete.
59432 +Since handshake initiation needs the static identity lock, holding the
59433 +static identity lock while calling peer_remove can result in a rare
59434 +deadlock. We have precisely this case in this situation of late-stage
59435 +peer removal based on an invalid public key. We can't drop the lock when
59436 +removing, because then incoming handshakes might interact with a bogus
59437 +static-static calculation.
59438 +
59439 +While the band-aid patch for this would involve breaking up the peer
59440 +removal into two steps like wg_peer_remove_all does, in order to solve
59441 +the locking issue, there's actually a much more elegant way of fixing
59442 +this:
59443 +
59444 +If the static-static calculation succeeds with one private key, it
59445 +*must* succeed with all others, because all 32-byte strings map to valid
59446 +private keys, thanks to clamping. That means we can get rid of this
59447 +silly dance and locking headaches of removing peers late in the
59448 +configuration flow, and instead just reject them early on, regardless of
59449 +whether the device has yet been assigned a private key. For the case
59450 +where the device doesn't yet have a private key, we safely use zeros
59451 +just for the purposes of checking for low order points by way of
59452 +checking the output of the calculation.
59453 +
59454 +The following PoC will trigger the deadlock:
59455 +
59456 +ip link add wg0 type wireguard
59457 +ip addr add 10.0.0.1/24 dev wg0
59458 +ip link set wg0 up
59459 +ping -f 10.0.0.2 &
59460 +while true; do
59461 + wg set wg0 private-key /dev/null peer AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= allowed-ips 10.0.0.0/24 endpoint 10.0.0.3:1234
59462 + wg set wg0 private-key <(echo AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=)
59463 +done
59464 +
59465 +[ 0.949105] ======================================================
59466 +[ 0.949550] WARNING: possible circular locking dependency detected
59467 +[ 0.950143] 5.5.0-debug+ #18 Not tainted
59468 +[ 0.950431] ------------------------------------------------------
59469 +[ 0.950959] wg/89 is trying to acquire lock:
59470 +[ 0.951252] ffff8880333e2128 ((wq_completion)wg-kex-wg0){+.+.}, at: flush_workqueue+0xe3/0x12f0
59471 +[ 0.951865]
59472 +[ 0.951865] but task is already holding lock:
59473 +[ 0.952280] ffff888032819bc0 (&wg->static_identity.lock){++++}, at: wg_set_device+0x95d/0xcc0
59474 +[ 0.953011]
59475 +[ 0.953011] which lock already depends on the new lock.
59476 +[ 0.953011]
59477 +[ 0.953651]
59478 +[ 0.953651] the existing dependency chain (in reverse order) is:
59479 +[ 0.954292]
59480 +[ 0.954292] -> #2 (&wg->static_identity.lock){++++}:
59481 +[ 0.954804] lock_acquire+0x127/0x350
59482 +[ 0.955133] down_read+0x83/0x410
59483 +[ 0.955428] wg_noise_handshake_create_initiation+0x97/0x700
59484 +[ 0.955885] wg_packet_send_handshake_initiation+0x13a/0x280
59485 +[ 0.956401] wg_packet_handshake_send_worker+0x10/0x20
59486 +[ 0.956841] process_one_work+0x806/0x1500
59487 +[ 0.957167] worker_thread+0x8c/0xcb0
59488 +[ 0.957549] kthread+0x2ee/0x3b0
59489 +[ 0.957792] ret_from_fork+0x24/0x30
59490 +[ 0.958234]
59491 +[ 0.958234] -> #1 ((work_completion)(&peer->transmit_handshake_work)){+.+.}:
59492 +[ 0.958808] lock_acquire+0x127/0x350
59493 +[ 0.959075] process_one_work+0x7ab/0x1500
59494 +[ 0.959369] worker_thread+0x8c/0xcb0
59495 +[ 0.959639] kthread+0x2ee/0x3b0
59496 +[ 0.959896] ret_from_fork+0x24/0x30
59497 +[ 0.960346]
59498 +[ 0.960346] -> #0 ((wq_completion)wg-kex-wg0){+.+.}:
59499 +[ 0.960945] check_prev_add+0x167/0x1e20
59500 +[ 0.961351] __lock_acquire+0x2012/0x3170
59501 +[ 0.961725] lock_acquire+0x127/0x350
59502 +[ 0.961990] flush_workqueue+0x106/0x12f0
59503 +[ 0.962280] peer_remove_after_dead+0x160/0x220
59504 +[ 0.962600] wg_set_device+0xa24/0xcc0
59505 +[ 0.962994] genl_rcv_msg+0x52f/0xe90
59506 +[ 0.963298] netlink_rcv_skb+0x111/0x320
59507 +[ 0.963618] genl_rcv+0x1f/0x30
59508 +[ 0.963853] netlink_unicast+0x3f6/0x610
59509 +[ 0.964245] netlink_sendmsg+0x700/0xb80
59510 +[ 0.964586] __sys_sendto+0x1dd/0x2c0
59511 +[ 0.964854] __x64_sys_sendto+0xd8/0x1b0
59512 +[ 0.965141] do_syscall_64+0x90/0xd9a
59513 +[ 0.965408] entry_SYSCALL_64_after_hwframe+0x49/0xbe
59514 +[ 0.965769]
59515 +[ 0.965769] other info that might help us debug this:
59516 +[ 0.965769]
59517 +[ 0.966337] Chain exists of:
59518 +[ 0.966337] (wq_completion)wg-kex-wg0 --> (work_completion)(&peer->transmit_handshake_work) --> &wg->static_identity.lock
59519 +[ 0.966337]
59520 +[ 0.967417] Possible unsafe locking scenario:
59521 +[ 0.967417]
59522 +[ 0.967836] CPU0 CPU1
59523 +[ 0.968155] ---- ----
59524 +[ 0.968497] lock(&wg->static_identity.lock);
59525 +[ 0.968779] lock((work_completion)(&peer->transmit_handshake_work));
59526 +[ 0.969345] lock(&wg->static_identity.lock);
59527 +[ 0.969809] lock((wq_completion)wg-kex-wg0);
59528 +[ 0.970146]
59529 +[ 0.970146] *** DEADLOCK ***
59530 +[ 0.970146]
59531 +[ 0.970531] 5 locks held by wg/89:
59532 +[ 0.970908] #0: ffffffff827433c8 (cb_lock){++++}, at: genl_rcv+0x10/0x30
59533 +[ 0.971400] #1: ffffffff82743480 (genl_mutex){+.+.}, at: genl_rcv_msg+0x642/0xe90
59534 +[ 0.971924] #2: ffffffff827160c0 (rtnl_mutex){+.+.}, at: wg_set_device+0x9f/0xcc0
59535 +[ 0.972488] #3: ffff888032819de0 (&wg->device_update_lock){+.+.}, at: wg_set_device+0xb0/0xcc0
59536 +[ 0.973095] #4: ffff888032819bc0 (&wg->static_identity.lock){++++}, at: wg_set_device+0x95d/0xcc0
59537 +[ 0.973653]
59538 +[ 0.973653] stack backtrace:
59539 +[ 0.973932] CPU: 1 PID: 89 Comm: wg Not tainted 5.5.0-debug+ #18
59540 +[ 0.974476] Call Trace:
59541 +[ 0.974638] dump_stack+0x97/0xe0
59542 +[ 0.974869] check_noncircular+0x312/0x3e0
59543 +[ 0.975132] ? print_circular_bug+0x1f0/0x1f0
59544 +[ 0.975410] ? __kernel_text_address+0x9/0x30
59545 +[ 0.975727] ? unwind_get_return_address+0x51/0x90
59546 +[ 0.976024] check_prev_add+0x167/0x1e20
59547 +[ 0.976367] ? graph_lock+0x70/0x160
59548 +[ 0.976682] __lock_acquire+0x2012/0x3170
59549 +[ 0.976998] ? register_lock_class+0x1140/0x1140
59550 +[ 0.977323] lock_acquire+0x127/0x350
59551 +[ 0.977627] ? flush_workqueue+0xe3/0x12f0
59552 +[ 0.977890] flush_workqueue+0x106/0x12f0
59553 +[ 0.978147] ? flush_workqueue+0xe3/0x12f0
59554 +[ 0.978410] ? find_held_lock+0x2c/0x110
59555 +[ 0.978662] ? lock_downgrade+0x6e0/0x6e0
59556 +[ 0.978919] ? queue_rcu_work+0x60/0x60
59557 +[ 0.979166] ? netif_napi_del+0x151/0x3b0
59558 +[ 0.979501] ? peer_remove_after_dead+0x160/0x220
59559 +[ 0.979871] peer_remove_after_dead+0x160/0x220
59560 +[ 0.980232] wg_set_device+0xa24/0xcc0
59561 +[ 0.980516] ? deref_stack_reg+0x8e/0xc0
59562 +[ 0.980801] ? set_peer+0xe10/0xe10
59563 +[ 0.981040] ? __ww_mutex_check_waiters+0x150/0x150
59564 +[ 0.981430] ? __nla_validate_parse+0x163/0x270
59565 +[ 0.981719] ? genl_family_rcv_msg_attrs_parse+0x13f/0x310
59566 +[ 0.982078] genl_rcv_msg+0x52f/0xe90
59567 +[ 0.982348] ? genl_family_rcv_msg_attrs_parse+0x310/0x310
59568 +[ 0.982690] ? register_lock_class+0x1140/0x1140
59569 +[ 0.983049] netlink_rcv_skb+0x111/0x320
59570 +[ 0.983298] ? genl_family_rcv_msg_attrs_parse+0x310/0x310
59571 +[ 0.983645] ? netlink_ack+0x880/0x880
59572 +[ 0.983888] genl_rcv+0x1f/0x30
59573 +[ 0.984168] netlink_unicast+0x3f6/0x610
59574 +[ 0.984443] ? netlink_detachskb+0x60/0x60
59575 +[ 0.984729] ? find_held_lock+0x2c/0x110
59576 +[ 0.984976] netlink_sendmsg+0x700/0xb80
59577 +[ 0.985220] ? netlink_broadcast_filtered+0xa60/0xa60
59578 +[ 0.985533] __sys_sendto+0x1dd/0x2c0
59579 +[ 0.985763] ? __x64_sys_getpeername+0xb0/0xb0
59580 +[ 0.986039] ? sockfd_lookup_light+0x17/0x160
59581 +[ 0.986397] ? __sys_recvmsg+0x8c/0xf0
59582 +[ 0.986711] ? __sys_recvmsg_sock+0xd0/0xd0
59583 +[ 0.987018] __x64_sys_sendto+0xd8/0x1b0
59584 +[ 0.987283] ? lockdep_hardirqs_on+0x39b/0x5a0
59585 +[ 0.987666] do_syscall_64+0x90/0xd9a
59586 +[ 0.987903] entry_SYSCALL_64_after_hwframe+0x49/0xbe
59587 +[ 0.988223] RIP: 0033:0x7fe77c12003e
59588 +[ 0.988508] Code: c3 8b 07 85 c0 75 24 49 89 fb 48 89 f0 48 89 d7 48 89 ce 4c 89 c2 4d 89 ca 4c 8b 44 24 08 4c 8b 4c 24 10 4c 4
59589 +[ 0.989666] RSP: 002b:00007fffada2ed58 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
59590 +[ 0.990137] RAX: ffffffffffffffda RBX: 00007fe77c159d48 RCX: 00007fe77c12003e
59591 +[ 0.990583] RDX: 0000000000000040 RSI: 000055fd1d38e020 RDI: 0000000000000004
59592 +[ 0.991091] RBP: 000055fd1d38e020 R08: 000055fd1cb63358 R09: 000000000000000c
59593 +[ 0.991568] R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000002c
59594 +[ 0.992014] R13: 0000000000000004 R14: 000055fd1d38e020 R15: 0000000000000001
59595 +
59596 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
59597 +Reported-by: syzbot <syzkaller@××××××××××××.com>
59598 +Signed-off-by: David S. Miller <davem@×××××××××.net>
59599 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
59600 +---
59601 + drivers/net/wireguard/netlink.c | 6 ++----
59602 + drivers/net/wireguard/noise.c | 10 +++++++---
59603 + 2 files changed, 9 insertions(+), 7 deletions(-)
59604 +
59605 +diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c
59606 +index 0739a2cd1920..45a631e79d7a 100644
59607 +--- a/drivers/net/wireguard/netlink.c
59608 ++++ b/drivers/net/wireguard/netlink.c
59609 +@@ -575,10 +575,8 @@ static int wg_set_device(struct sk_buff *skb, struct genl_info *info)
59610 + private_key);
59611 + list_for_each_entry_safe(peer, temp, &wg->peer_list,
59612 + peer_list) {
59613 +- if (wg_noise_precompute_static_static(peer))
59614 +- wg_noise_expire_current_peer_keypairs(peer);
59615 +- else
59616 +- wg_peer_remove(peer);
59617 ++ BUG_ON(!wg_noise_precompute_static_static(peer));
59618 ++ wg_noise_expire_current_peer_keypairs(peer);
59619 + }
59620 + wg_cookie_checker_precompute_device_keys(&wg->cookie_checker);
59621 + up_write(&wg->static_identity.lock);
59622 +diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c
59623 +index d71c8db68a8c..919d9d866446 100644
59624 +--- a/drivers/net/wireguard/noise.c
59625 ++++ b/drivers/net/wireguard/noise.c
59626 +@@ -46,17 +46,21 @@ void __init wg_noise_init(void)
59627 + /* Must hold peer->handshake.static_identity->lock */
59628 + bool wg_noise_precompute_static_static(struct wg_peer *peer)
59629 + {
59630 +- bool ret = true;
59631 ++ bool ret;
59632 +
59633 + down_write(&peer->handshake.lock);
59634 +- if (peer->handshake.static_identity->has_identity)
59635 ++ if (peer->handshake.static_identity->has_identity) {
59636 + ret = curve25519(
59637 + peer->handshake.precomputed_static_static,
59638 + peer->handshake.static_identity->static_private,
59639 + peer->handshake.remote_static);
59640 +- else
59641 ++ } else {
59642 ++ u8 empty[NOISE_PUBLIC_KEY_LEN] = { 0 };
59643 ++
59644 ++ ret = curve25519(empty, empty, peer->handshake.remote_static);
59645 + memset(peer->handshake.precomputed_static_static, 0,
59646 + NOISE_PUBLIC_KEY_LEN);
59647 ++ }
59648 + up_write(&peer->handshake.lock);
59649 + return ret;
59650 + }
59651 +--
59652 +cgit v1.2.3-4-ga26e
59653 +
59654 +
59655 +From c25186dfc16e83c01c55f4177a93a547831c20d4 Mon Sep 17 00:00:00 2001
59656 +From: "Jason A. Donenfeld" <Jason@×××××.com>
59657 +Date: Tue, 4 Feb 2020 22:17:27 +0100
59658 +Subject: wireguard: selftests: ensure non-addition of peers with failed
59659 + precomputation
59660 +
59661 +commit f9398acba6a4ae9cb98bfe4d56414d376eff8d57 upstream.
59662 +
59663 +Ensure that peers with low order points are ignored, both in the case
59664 +where we already have a device private key and in the case where we do
59665 +not. This adds points that naturally give a zero output.
59666 +
59667 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
59668 +Signed-off-by: David S. Miller <davem@×××××××××.net>
59669 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
59670 +---
59671 + tools/testing/selftests/wireguard/netns.sh | 6 ++++++
59672 + 1 file changed, 6 insertions(+)
59673 +
59674 +diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
59675 +index d5c85c7494f2..b03647d1bbf6 100755
59676 +--- a/tools/testing/selftests/wireguard/netns.sh
59677 ++++ b/tools/testing/selftests/wireguard/netns.sh
59678 +@@ -516,6 +516,12 @@ n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0,10.0.0.0/8,100.0.0.0/10,172.16.
59679 + n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0
59680 + n0 wg set wg0 peer "$pub2" allowed-ips ::/0,1700::/111,5000::/4,e000::/37,9000::/75
59681 + n0 wg set wg0 peer "$pub2" allowed-ips ::/0
59682 ++n0 wg set wg0 peer "$pub2" remove
59683 ++low_order_points=( AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38= )
59684 ++n0 wg set wg0 private-key /dev/null ${low_order_points[@]/#/peer }
59685 ++[[ -z $(n0 wg show wg0 peers) ]]
59686 ++n0 wg set wg0 private-key <(echo "$key1") ${low_order_points[@]/#/peer }
59687 ++[[ -z $(n0 wg show wg0 peers) ]]
59688 + ip0 link del wg0
59689 +
59690 + declare -A objects
59691 +--
59692 +cgit v1.2.3-4-ga26e
59693 +
59694 +
59695 +From 3d05fe9dd8a792d432b6c575f3522e450b1f2906 Mon Sep 17 00:00:00 2001
59696 +From: "Jason A. Donenfeld" <Jason@×××××.com>
59697 +Date: Tue, 4 Feb 2020 22:17:29 +0100
59698 +Subject: wireguard: selftests: tie socket waiting to target pid
59699 +
59700 +commit 88f404a9b1d75388225b1c67b6dd327cb2182777 upstream.
59701 +
59702 +Without this, we wind up proceeding too early sometimes when the
59703 +previous process has just used the same listening port. So, we tie the
59704 +listening socket query to the specific pid we're interested in.
59705 +
59706 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
59707 +Signed-off-by: David S. Miller <davem@×××××××××.net>
59708 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
59709 +---
59710 + tools/testing/selftests/wireguard/netns.sh | 17 ++++++++---------
59711 + 1 file changed, 8 insertions(+), 9 deletions(-)
59712 +
59713 +diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
59714 +index b03647d1bbf6..f5ab1cda8bb5 100755
59715 +--- a/tools/testing/selftests/wireguard/netns.sh
59716 ++++ b/tools/testing/selftests/wireguard/netns.sh
59717 +@@ -38,9 +38,8 @@ ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; }
59718 + ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; }
59719 + ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; }
59720 + sleep() { read -t "$1" -N 1 || true; }
59721 +-waitiperf() { pretty "${1//*-}" "wait for iperf:5201"; while [[ $(ss -N "$1" -tlp 'sport = 5201') != *iperf3* ]]; do sleep 0.1; done; }
59722 +-waitncatudp() { pretty "${1//*-}" "wait for udp:1111"; while [[ $(ss -N "$1" -ulp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; }
59723 +-waitncattcp() { pretty "${1//*-}" "wait for tcp:1111"; while [[ $(ss -N "$1" -tlp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; }
59724 ++waitiperf() { pretty "${1//*-}" "wait for iperf:5201 pid $2"; while [[ $(ss -N "$1" -tlpH 'sport = 5201') != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; }
59725 ++waitncatudp() { pretty "${1//*-}" "wait for udp:1111 pid $2"; while [[ $(ss -N "$1" -ulpH 'sport = 1111') != *\"ncat\",pid=$2,fd=* ]]; do sleep 0.1; done; }
59726 + waitiface() { pretty "${1//*-}" "wait for $2 to come up"; ip netns exec "$1" bash -c "while [[ \$(< \"/sys/class/net/$2/operstate\") != up ]]; do read -t .1 -N 0 || true; done;"; }
59727 +
59728 + cleanup() {
59729 +@@ -119,22 +118,22 @@ tests() {
59730 +
59731 + # TCP over IPv4
59732 + n2 iperf3 -s -1 -B 192.168.241.2 &
59733 +- waitiperf $netns2
59734 ++ waitiperf $netns2 $!
59735 + n1 iperf3 -Z -t 3 -c 192.168.241.2
59736 +
59737 + # TCP over IPv6
59738 + n1 iperf3 -s -1 -B fd00::1 &
59739 +- waitiperf $netns1
59740 ++ waitiperf $netns1 $!
59741 + n2 iperf3 -Z -t 3 -c fd00::1
59742 +
59743 + # UDP over IPv4
59744 + n1 iperf3 -s -1 -B 192.168.241.1 &
59745 +- waitiperf $netns1
59746 ++ waitiperf $netns1 $!
59747 + n2 iperf3 -Z -t 3 -b 0 -u -c 192.168.241.1
59748 +
59749 + # UDP over IPv6
59750 + n2 iperf3 -s -1 -B fd00::2 &
59751 +- waitiperf $netns2
59752 ++ waitiperf $netns2 $!
59753 + n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2
59754 + }
59755 +
59756 +@@ -207,7 +206,7 @@ n1 ping -W 1 -c 1 192.168.241.2
59757 + n1 wg set wg0 peer "$pub2" allowed-ips 192.168.241.0/24
59758 + exec 4< <(n1 ncat -l -u -p 1111)
59759 + ncat_pid=$!
59760 +-waitncatudp $netns1
59761 ++waitncatudp $netns1 $ncat_pid
59762 + n2 ncat -u 192.168.241.1 1111 <<<"X"
59763 + read -r -N 1 -t 1 out <&4 && [[ $out == "X" ]]
59764 + kill $ncat_pid
59765 +@@ -216,7 +215,7 @@ n1 wg set wg0 peer "$more_specific_key" allowed-ips 192.168.241.2/32
59766 + n2 wg set wg0 listen-port 9997
59767 + exec 4< <(n1 ncat -l -u -p 1111)
59768 + ncat_pid=$!
59769 +-waitncatudp $netns1
59770 ++waitncatudp $netns1 $ncat_pid
59771 + n2 ncat -u 192.168.241.1 1111 <<<"X"
59772 + ! read -r -N 1 -t 1 out <&4 || false
59773 + kill $ncat_pid
59774 +--
59775 +cgit v1.2.3-4-ga26e
59776 +
59777 +
59778 +From 57af60bdb9ca9449ef602749b5d31b12077c42f5 Mon Sep 17 00:00:00 2001
59779 +From: "Jason A. Donenfeld" <Jason@×××××.com>
59780 +Date: Tue, 11 Feb 2020 20:47:08 +0100
59781 +Subject: wireguard: device: use icmp_ndo_send helper
59782 +
59783 +commit a12d7f3cbdc72c7625881c8dc2660fc2c979fdf2 upstream.
59784 +
59785 +Because wireguard is calling icmp from network device context, it should
59786 +use the ndo helper so that the rate limiting applies correctly. This
59787 +commit adds a small test to the wireguard test suite to ensure that the
59788 +new functions continue doing the right thing in the context of
59789 +wireguard. It does this by setting up a condition that will definately
59790 +evoke an icmp error message from the driver, but along a nat'd path.
59791 +
59792 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
59793 +Signed-off-by: David S. Miller <davem@×××××××××.net>
59794 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
59795 +---
59796 + drivers/net/wireguard/device.c | 4 ++--
59797 + tools/testing/selftests/wireguard/netns.sh | 11 +++++++++++
59798 + 2 files changed, 13 insertions(+), 2 deletions(-)
59799 +
59800 +diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c
59801 +index 16b19824b9ad..43db442b1373 100644
59802 +--- a/drivers/net/wireguard/device.c
59803 ++++ b/drivers/net/wireguard/device.c
59804 +@@ -203,9 +203,9 @@ err_peer:
59805 + err:
59806 + ++dev->stats.tx_errors;
59807 + if (skb->protocol == htons(ETH_P_IP))
59808 +- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
59809 ++ icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
59810 + else if (skb->protocol == htons(ETH_P_IPV6))
59811 +- icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
59812 ++ icmpv6_ndo_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
59813 + kfree_skb(skb);
59814 + return ret;
59815 + }
59816 +diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
59817 +index f5ab1cda8bb5..138d46b3f330 100755
59818 +--- a/tools/testing/selftests/wireguard/netns.sh
59819 ++++ b/tools/testing/selftests/wireguard/netns.sh
59820 +@@ -24,6 +24,7 @@
59821 + set -e
59822 +
59823 + exec 3>&1
59824 ++export LANG=C
59825 + export WG_HIDE_KEYS=never
59826 + netns0="wg-test-$$-0"
59827 + netns1="wg-test-$$-1"
59828 +@@ -297,7 +298,17 @@ ip1 -4 rule add table main suppress_prefixlength 0
59829 + n1 ping -W 1 -c 100 -f 192.168.99.7
59830 + n1 ping -W 1 -c 100 -f abab::1111
59831 +
59832 ++# Have ns2 NAT into wg0 packets from ns0, but return an icmp error along the right route.
59833 ++n2 iptables -t nat -A POSTROUTING -s 10.0.0.0/24 -d 192.168.241.0/24 -j SNAT --to 192.168.241.2
59834 ++n0 iptables -t filter -A INPUT \! -s 10.0.0.0/24 -i vethrs -j DROP # Manual rpfilter just to be explicit.
59835 ++n2 bash -c 'printf 1 > /proc/sys/net/ipv4/ip_forward'
59836 ++ip0 -4 route add 192.168.241.1 via 10.0.0.100
59837 ++n2 wg set wg0 peer "$pub1" remove
59838 ++[[ $(! n0 ping -W 1 -c 1 192.168.241.1 || false) == *"From 10.0.0.100 icmp_seq=1 Destination Host Unreachable"* ]]
59839 ++
59840 + n0 iptables -t nat -F
59841 ++n0 iptables -t filter -F
59842 ++n2 iptables -t nat -F
59843 + ip0 link del vethrc
59844 + ip0 link del vethrs
59845 + ip1 link del wg0
59846 +--
59847 +cgit v1.2.3-4-ga26e
59848 +
59849 +
59850 +From d26e91479c3b141559e55dd230cce1a90a513144 Mon Sep 17 00:00:00 2001
59851 +From: "Jason A. Donenfeld" <Jason@×××××.com>
59852 +Date: Fri, 14 Feb 2020 23:57:20 +0100
59853 +Subject: wireguard: selftests: reduce complexity and fix make races
59854 +
59855 +commit 04ddf1208f03e1dbc39a4619c40eba640051b950 upstream.
59856 +
59857 +This gives us fewer dependencies and shortens build time, fixes up some
59858 +hash checking race conditions, and also fixes missing directory creation
59859 +that caused issues on massively parallel builds.
59860 +
59861 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
59862 +Signed-off-by: David S. Miller <davem@×××××××××.net>
59863 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
59864 +---
59865 + tools/testing/selftests/wireguard/qemu/Makefile | 38 +++++++++----------------
59866 + 1 file changed, 14 insertions(+), 24 deletions(-)
59867 +
59868 +diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
59869 +index f10aa3590adc..28d477683e8a 100644
59870 +--- a/tools/testing/selftests/wireguard/qemu/Makefile
59871 ++++ b/tools/testing/selftests/wireguard/qemu/Makefile
59872 +@@ -38,19 +38,17 @@ endef
59873 + define file_download =
59874 + $(DISTFILES_PATH)/$(1):
59875 + mkdir -p $(DISTFILES_PATH)
59876 +- flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp'
59877 +- if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi
59878 ++ flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi'
59879 + endef
59880 +
59881 + $(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/,1370c9a812b2cf2a7d92802510cca0058cc37e66a7bedd70051f0a34015022a3))
59882 +-$(eval $(call tar_download,LIBMNL,libmnl,1.0.4,.tar.bz2,https://www.netfilter.org/projects/libmnl/files/,171f89699f286a5854b72b91d06e8f8e3683064c5901fb09d954a9ab6f551f81))
59883 + $(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c))
59884 + $(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d))
59885 + $(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae))
59886 + $(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c))
59887 + $(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa))
59888 + $(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a))
59889 +-$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20191226,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,aa8af0fdc9872d369d8c890a84dbc2a2466b55795dccd5b47721b2d97644b04f))
59890 ++$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20200206,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,f5207248c6a3c3e3bfc9ab30b91c1897b00802ed861e1f9faaed873366078c64))
59891 +
59892 + KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug)
59893 + rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))
59894 +@@ -295,21 +293,13 @@ $(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS)
59895 + $(MAKE) -C $(IPERF_PATH)
59896 + $(STRIP) -s $@
59897 +
59898 +-$(LIBMNL_PATH)/.installed: $(LIBMNL_TAR)
59899 +- flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
59900 +- touch $@
59901 +-
59902 +-$(LIBMNL_PATH)/src/.libs/libmnl.a: | $(LIBMNL_PATH)/.installed $(USERSPACE_DEPS)
59903 +- cd $(LIBMNL_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared
59904 +- $(MAKE) -C $(LIBMNL_PATH)
59905 +- sed -i 's:prefix=.*:prefix=$(LIBMNL_PATH):' $(LIBMNL_PATH)/libmnl.pc
59906 +-
59907 + $(WIREGUARD_TOOLS_PATH)/.installed: $(WIREGUARD_TOOLS_TAR)
59908 ++ mkdir -p $(BUILD_PATH)
59909 + flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
59910 + touch $@
59911 +
59912 +-$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
59913 +- LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg
59914 ++$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(USERSPACE_DEPS)
59915 ++ $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src wg
59916 + $(STRIP) -s $@
59917 +
59918 + $(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS)
59919 +@@ -340,17 +330,17 @@ $(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS)
59920 + $(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR)
59921 + mkdir -p $(BUILD_PATH)
59922 + flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
59923 +- printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=y\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_LIBMNL -I$(LIBMNL_PATH)/include\nLDLIBS+=-lmnl' > $(IPROUTE2_PATH)/config.mk
59924 ++ printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=n\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS\n' > $(IPROUTE2_PATH)/config.mk
59925 + printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile
59926 + touch $@
59927 +
59928 +-$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
59929 +- LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip
59930 +- $(STRIP) -s $(IPROUTE2_PATH)/ip/ip
59931 ++$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS)
59932 ++ $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip
59933 ++ $(STRIP) -s $@
59934 +
59935 +-$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
59936 +- LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss
59937 +- $(STRIP) -s $(IPROUTE2_PATH)/misc/ss
59938 ++$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS)
59939 ++ $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss
59940 ++ $(STRIP) -s $@
59941 +
59942 + $(IPTABLES_PATH)/.installed: $(IPTABLES_TAR)
59943 + mkdir -p $(BUILD_PATH)
59944 +@@ -358,8 +348,8 @@ $(IPTABLES_PATH)/.installed: $(IPTABLES_TAR)
59945 + sed -i -e "/nfnetlink=[01]/s:=[01]:=0:" -e "/nfconntrack=[01]/s:=[01]:=0:" $(IPTABLES_PATH)/configure
59946 + touch $@
59947 +
59948 +-$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
59949 +- cd $(IPTABLES_PATH) && PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --with-kernel=$(BUILD_PATH)/include
59950 ++$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(USERSPACE_DEPS)
59951 ++ cd $(IPTABLES_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --disable-connlabel --with-kernel=$(BUILD_PATH)/include
59952 + $(MAKE) -C $(IPTABLES_PATH)
59953 + $(STRIP) -s $@
59954 +
59955 +--
59956 +cgit v1.2.3-4-ga26e
59957 +
59958 +
59959 +From 9e1c979b0e58c17c005aae42c113e831fb3a436d Mon Sep 17 00:00:00 2001
59960 +From: "Jason A. Donenfeld" <Jason@×××××.com>
59961 +Date: Fri, 14 Feb 2020 23:57:21 +0100
59962 +Subject: wireguard: receive: reset last_under_load to zero
59963 +
59964 +commit 2a8a4df36462aa85b0db87b7c5ea145ba67e34a8 upstream.
59965 +
59966 +This is a small optimization that prevents more expensive comparisons
59967 +from happening when they are no longer necessary, by clearing the
59968 +last_under_load variable whenever we wind up in a state where we were
59969 +under load but we no longer are.
59970 +
59971 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
59972 +Suggested-by: Matt Dunwoodie <ncon@××××××××.net>
59973 +Signed-off-by: David S. Miller <davem@×××××××××.net>
59974 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
59975 +---
59976 + drivers/net/wireguard/receive.c | 7 +++++--
59977 + 1 file changed, 5 insertions(+), 2 deletions(-)
59978 +
59979 +diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
59980 +index 9c6bab9c981f..4a153894cee2 100644
59981 +--- a/drivers/net/wireguard/receive.c
59982 ++++ b/drivers/net/wireguard/receive.c
59983 +@@ -118,10 +118,13 @@ static void wg_receive_handshake_packet(struct wg_device *wg,
59984 +
59985 + under_load = skb_queue_len(&wg->incoming_handshakes) >=
59986 + MAX_QUEUED_INCOMING_HANDSHAKES / 8;
59987 +- if (under_load)
59988 ++ if (under_load) {
59989 + last_under_load = ktime_get_coarse_boottime_ns();
59990 +- else if (last_under_load)
59991 ++ } else if (last_under_load) {
59992 + under_load = !wg_birthdate_has_expired(last_under_load, 1);
59993 ++ if (!under_load)
59994 ++ last_under_load = 0;
59995 ++ }
59996 + mac_state = wg_cookie_validate_packet(&wg->cookie_checker, skb,
59997 + under_load);
59998 + if ((under_load && mac_state == VALID_MAC_WITH_COOKIE) ||
59999 +--
60000 +cgit v1.2.3-4-ga26e
60001 +
60002 +
60003 +From 650ffb6ab845664dd85076cae39f65259a61d8ec Mon Sep 17 00:00:00 2001
60004 +From: "Jason A. Donenfeld" <Jason@×××××.com>
60005 +Date: Fri, 14 Feb 2020 23:57:22 +0100
60006 +Subject: wireguard: send: account for mtu=0 devices
60007 +
60008 +commit 175f1ca9a9ed8689d2028da1a7c624bb4fb4ff7e upstream.
60009 +
60010 +It turns out there's an easy way to get packets queued up while still
60011 +having an MTU of zero, and that's via persistent keep alive. This commit
60012 +makes sure that in whatever condition, we don't wind up dividing by
60013 +zero. Note that an MTU of zero for a wireguard interface is something
60014 +quasi-valid, so I don't think the correct fix is to limit it via
60015 +min_mtu. This can be reproduced easily with:
60016 +
60017 +ip link add wg0 type wireguard
60018 +ip link add wg1 type wireguard
60019 +ip link set wg0 up mtu 0
60020 +ip link set wg1 up
60021 +wg set wg0 private-key <(wg genkey)
60022 +wg set wg1 listen-port 1 private-key <(wg genkey) peer $(wg show wg0 public-key)
60023 +wg set wg0 peer $(wg show wg1 public-key) persistent-keepalive 1 endpoint 127.0.0.1:1
60024 +
60025 +However, while min_mtu=0 seems fine, it makes sense to restrict the
60026 +max_mtu. This commit also restricts the maximum MTU to the greatest
60027 +number for which rounding up to the padding multiple won't overflow a
60028 +signed integer. Packets this large were always rejected anyway
60029 +eventually, due to checks deeper in, but it seems more sound not to even
60030 +let the administrator configure something that won't work anyway.
60031 +
60032 +We use this opportunity to clean up this function a bit so that it's
60033 +clear which paths we're expecting.
60034 +
60035 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
60036 +Cc: Eric Dumazet <eric.dumazet@×××××.com>
60037 +Reviewed-by: Eric Dumazet <edumazet@××××××.com>
60038 +Signed-off-by: David S. Miller <davem@×××××××××.net>
60039 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
60040 +---
60041 + drivers/net/wireguard/device.c | 7 ++++---
60042 + drivers/net/wireguard/send.c | 16 +++++++++++-----
60043 + 2 files changed, 15 insertions(+), 8 deletions(-)
60044 +
60045 +diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c
60046 +index 43db442b1373..cdc96968b0f4 100644
60047 +--- a/drivers/net/wireguard/device.c
60048 ++++ b/drivers/net/wireguard/device.c
60049 +@@ -258,6 +258,8 @@ static void wg_setup(struct net_device *dev)
60050 + enum { WG_NETDEV_FEATURES = NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
60051 + NETIF_F_SG | NETIF_F_GSO |
60052 + NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA };
60053 ++ const int overhead = MESSAGE_MINIMUM_LENGTH + sizeof(struct udphdr) +
60054 ++ max(sizeof(struct ipv6hdr), sizeof(struct iphdr));
60055 +
60056 + dev->netdev_ops = &netdev_ops;
60057 + dev->hard_header_len = 0;
60058 +@@ -271,9 +273,8 @@ static void wg_setup(struct net_device *dev)
60059 + dev->features |= WG_NETDEV_FEATURES;
60060 + dev->hw_features |= WG_NETDEV_FEATURES;
60061 + dev->hw_enc_features |= WG_NETDEV_FEATURES;
60062 +- dev->mtu = ETH_DATA_LEN - MESSAGE_MINIMUM_LENGTH -
60063 +- sizeof(struct udphdr) -
60064 +- max(sizeof(struct ipv6hdr), sizeof(struct iphdr));
60065 ++ dev->mtu = ETH_DATA_LEN - overhead;
60066 ++ dev->max_mtu = round_down(INT_MAX, MESSAGE_PADDING_MULTIPLE) - overhead;
60067 +
60068 + SET_NETDEV_DEVTYPE(dev, &device_type);
60069 +
60070 +diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c
60071 +index c13260563446..7348c10cbae3 100644
60072 +--- a/drivers/net/wireguard/send.c
60073 ++++ b/drivers/net/wireguard/send.c
60074 +@@ -143,16 +143,22 @@ static void keep_key_fresh(struct wg_peer *peer)
60075 +
60076 + static unsigned int calculate_skb_padding(struct sk_buff *skb)
60077 + {
60078 ++ unsigned int padded_size, last_unit = skb->len;
60079 ++
60080 ++ if (unlikely(!PACKET_CB(skb)->mtu))
60081 ++ return ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE) - last_unit;
60082 ++
60083 + /* We do this modulo business with the MTU, just in case the networking
60084 + * layer gives us a packet that's bigger than the MTU. In that case, we
60085 + * wouldn't want the final subtraction to overflow in the case of the
60086 +- * padded_size being clamped.
60087 ++ * padded_size being clamped. Fortunately, that's very rarely the case,
60088 ++ * so we optimize for that not happening.
60089 + */
60090 +- unsigned int last_unit = skb->len % PACKET_CB(skb)->mtu;
60091 +- unsigned int padded_size = ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE);
60092 ++ if (unlikely(last_unit > PACKET_CB(skb)->mtu))
60093 ++ last_unit %= PACKET_CB(skb)->mtu;
60094 +
60095 +- if (padded_size > PACKET_CB(skb)->mtu)
60096 +- padded_size = PACKET_CB(skb)->mtu;
60097 ++ padded_size = min(PACKET_CB(skb)->mtu,
60098 ++ ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE));
60099 + return padded_size - last_unit;
60100 + }
60101 +
60102 +--
60103 +cgit v1.2.3-4-ga26e
60104 +
60105 +
60106 +From 5e8e58db12266ae46507326d7ad95d9a701aba0c Mon Sep 17 00:00:00 2001
60107 +From: "Jason A. Donenfeld" <Jason@×××××.com>
60108 +Date: Fri, 14 Feb 2020 23:57:23 +0100
60109 +Subject: wireguard: socket: remove extra call to synchronize_net
60110 +
60111 +commit 1fbc33b0a7feb6ca72bf7dc8a05d81485ee8ee2e upstream.
60112 +
60113 +synchronize_net() is a wrapper around synchronize_rcu(), so there's no
60114 +point in having synchronize_net and synchronize_rcu back to back,
60115 +despite the documentation comment suggesting maybe it's somewhat useful,
60116 +"Wait for packets currently being received to be done." This commit
60117 +removes the extra call.
60118 +
60119 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
60120 +Suggested-by: Eric Dumazet <eric.dumazet@×××××.com>
60121 +Reviewed-by: Eric Dumazet <edumazet@××××××.com>
60122 +Signed-off-by: David S. Miller <davem@×××××××××.net>
60123 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
60124 +---
60125 + drivers/net/wireguard/socket.c | 1 -
60126 + 1 file changed, 1 deletion(-)
60127 +
60128 +diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c
60129 +index 262f3b5c819d..b0d6541582d3 100644
60130 +--- a/drivers/net/wireguard/socket.c
60131 ++++ b/drivers/net/wireguard/socket.c
60132 +@@ -432,7 +432,6 @@ void wg_socket_reinit(struct wg_device *wg, struct sock *new4,
60133 + wg->incoming_port = ntohs(inet_sk(new4)->inet_sport);
60134 + mutex_unlock(&wg->socket_update_lock);
60135 + synchronize_rcu();
60136 +- synchronize_net();
60137 + sock_free(old4);
60138 + sock_free(old6);
60139 + }
60140 +--
60141 +cgit v1.2.3-4-ga26e
60142 +
60143 +
60144 +From 72a60897d15cc78f05ff3b2121d980966d08d0f9 Mon Sep 17 00:00:00 2001
60145 +From: YueHaibing <yuehaibing@××××××.com>
60146 +Date: Wed, 18 Mar 2020 18:30:43 -0600
60147 +Subject: wireguard: selftests: remove duplicated include <sys/types.h>
60148 +
60149 +commit 166391159c5deb84795d2ff46e95f276177fa5fb upstream.
60150 +
60151 +This commit removes a duplicated include.
60152 +
60153 +Signed-off-by: YueHaibing <yuehaibing@××××××.com>
60154 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
60155 +Signed-off-by: David S. Miller <davem@×××××××××.net>
60156 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
60157 +---
60158 + tools/testing/selftests/wireguard/qemu/init.c | 1 -
60159 + 1 file changed, 1 deletion(-)
60160 +
60161 +diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c
60162 +index 90bc9813cadc..c9698120ac9d 100644
60163 +--- a/tools/testing/selftests/wireguard/qemu/init.c
60164 ++++ b/tools/testing/selftests/wireguard/qemu/init.c
60165 +@@ -13,7 +13,6 @@
60166 + #include <fcntl.h>
60167 + #include <sys/wait.h>
60168 + #include <sys/mount.h>
60169 +-#include <sys/types.h>
60170 + #include <sys/stat.h>
60171 + #include <sys/types.h>
60172 + #include <sys/io.h>
60173 +--
60174 +cgit v1.2.3-4-ga26e
60175 +
60176 +
60177 +From 8969eb6fd2e5560a230772c5877ca4d7708c884c Mon Sep 17 00:00:00 2001
60178 +From: "Jason A. Donenfeld" <Jason@×××××.com>
60179 +Date: Wed, 18 Mar 2020 18:30:45 -0600
60180 +Subject: wireguard: queueing: account for skb->protocol==0
60181 +
60182 +commit a5588604af448664e796daf3c1d5a4523c60667b upstream.
60183 +
60184 +We carry out checks to the effect of:
60185 +
60186 + if (skb->protocol != wg_examine_packet_protocol(skb))
60187 + goto err;
60188 +
60189 +By having wg_skb_examine_untrusted_ip_hdr return 0 on failure, this
60190 +means that the check above still passes in the case where skb->protocol
60191 +is zero, which is possible to hit with AF_PACKET:
60192 +
60193 + struct sockaddr_pkt saddr = { .spkt_device = "wg0" };
60194 + unsigned char buffer[5] = { 0 };
60195 + sendto(socket(AF_PACKET, SOCK_PACKET, /* skb->protocol = */ 0),
60196 + buffer, sizeof(buffer), 0, (const struct sockaddr *)&saddr, sizeof(saddr));
60197 +
60198 +Additional checks mean that this isn't actually a problem in the code
60199 +base, but I could imagine it becoming a problem later if the function is
60200 +used more liberally.
60201 +
60202 +I would prefer to fix this by having wg_examine_packet_protocol return a
60203 +32-bit ~0 value on failure, which will never match any value of
60204 +skb->protocol, which would simply change the generated code from a mov
60205 +to a movzx. However, sparse complains, and adding __force casts doesn't
60206 +seem like a good idea, so instead we just add a simple helper function
60207 +to check for the zero return value. Since wg_examine_packet_protocol
60208 +itself gets inlined, this winds up not adding an additional branch to
60209 +the generated code, since the 0 return value already happens in a
60210 +mergable branch.
60211 +
60212 +Reported-by: Fabian Freyer <fabianfreyer@×××××××××××××××××××××.com>
60213 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
60214 +Signed-off-by: David S. Miller <davem@×××××××××.net>
60215 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
60216 +---
60217 + drivers/net/wireguard/device.c | 2 +-
60218 + drivers/net/wireguard/queueing.h | 8 +++++++-
60219 + drivers/net/wireguard/receive.c | 4 ++--
60220 + 3 files changed, 10 insertions(+), 4 deletions(-)
60221 +
60222 +diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c
60223 +index cdc96968b0f4..3ac3f8570ca1 100644
60224 +--- a/drivers/net/wireguard/device.c
60225 ++++ b/drivers/net/wireguard/device.c
60226 +@@ -122,7 +122,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev)
60227 + u32 mtu;
60228 + int ret;
60229 +
60230 +- if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol)) {
60231 ++ if (unlikely(!wg_check_packet_protocol(skb))) {
60232 + ret = -EPROTONOSUPPORT;
60233 + net_dbg_ratelimited("%s: Invalid IP packet\n", dev->name);
60234 + goto err;
60235 +diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h
60236 +index e62c714a548e..3432232afe06 100644
60237 +--- a/drivers/net/wireguard/queueing.h
60238 ++++ b/drivers/net/wireguard/queueing.h
60239 +@@ -66,7 +66,7 @@ struct packet_cb {
60240 + #define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer)
60241 +
60242 + /* Returns either the correct skb->protocol value, or 0 if invalid. */
60243 +-static inline __be16 wg_skb_examine_untrusted_ip_hdr(struct sk_buff *skb)
60244 ++static inline __be16 wg_examine_packet_protocol(struct sk_buff *skb)
60245 + {
60246 + if (skb_network_header(skb) >= skb->head &&
60247 + (skb_network_header(skb) + sizeof(struct iphdr)) <=
60248 +@@ -81,6 +81,12 @@ static inline __be16 wg_skb_examine_untrusted_ip_hdr(struct sk_buff *skb)
60249 + return 0;
60250 + }
60251 +
60252 ++static inline bool wg_check_packet_protocol(struct sk_buff *skb)
60253 ++{
60254 ++ __be16 real_protocol = wg_examine_packet_protocol(skb);
60255 ++ return real_protocol && skb->protocol == real_protocol;
60256 ++}
60257 ++
60258 + static inline void wg_reset_packet(struct sk_buff *skb)
60259 + {
60260 + skb_scrub_packet(skb, true);
60261 +diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
60262 +index 4a153894cee2..243ed7172dd2 100644
60263 +--- a/drivers/net/wireguard/receive.c
60264 ++++ b/drivers/net/wireguard/receive.c
60265 +@@ -56,7 +56,7 @@ static int prepare_skb_header(struct sk_buff *skb, struct wg_device *wg)
60266 + size_t data_offset, data_len, header_len;
60267 + struct udphdr *udp;
60268 +
60269 +- if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol ||
60270 ++ if (unlikely(!wg_check_packet_protocol(skb) ||
60271 + skb_transport_header(skb) < skb->head ||
60272 + (skb_transport_header(skb) + sizeof(struct udphdr)) >
60273 + skb_tail_pointer(skb)))
60274 +@@ -388,7 +388,7 @@ static void wg_packet_consume_data_done(struct wg_peer *peer,
60275 + */
60276 + skb->ip_summed = CHECKSUM_UNNECESSARY;
60277 + skb->csum_level = ~0; /* All levels */
60278 +- skb->protocol = wg_skb_examine_untrusted_ip_hdr(skb);
60279 ++ skb->protocol = wg_examine_packet_protocol(skb);
60280 + if (skb->protocol == htons(ETH_P_IP)) {
60281 + len = ntohs(ip_hdr(skb)->tot_len);
60282 + if (unlikely(len < sizeof(struct iphdr)))
60283 +--
60284 +cgit v1.2.3-4-ga26e
60285 +
60286 +
60287 +From 9a90e708ec4e6db85bd43d76439d9d44b69e65a3 Mon Sep 17 00:00:00 2001
60288 +From: "Jason A. Donenfeld" <Jason@×××××.com>
60289 +Date: Wed, 18 Mar 2020 18:30:46 -0600
60290 +Subject: wireguard: receive: remove dead code from default packet type case
60291 +
60292 +commit 2b8765c52db24c0fbcc81bac9b5e8390f2c7d3c8 upstream.
60293 +
60294 +The situation in which we wind up hitting the default case here
60295 +indicates a major bug in earlier parsing code. It is not a usual thing
60296 +that should ever happen, which means a "friendly" message for it doesn't
60297 +make sense. Rather, replace this with a WARN_ON, just like we do earlier
60298 +in the file for a similar situation, so that somebody sends us a bug
60299 +report and we can fix it.
60300 +
60301 +Reported-by: Fabian Freyer <fabianfreyer@×××××××××××××××××××××.com>
60302 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
60303 +Signed-off-by: David S. Miller <davem@×××××××××.net>
60304 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
60305 +---
60306 + drivers/net/wireguard/receive.c | 3 +--
60307 + 1 file changed, 1 insertion(+), 2 deletions(-)
60308 +
60309 +diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
60310 +index 243ed7172dd2..da3b782ab7d3 100644
60311 +--- a/drivers/net/wireguard/receive.c
60312 ++++ b/drivers/net/wireguard/receive.c
60313 +@@ -587,8 +587,7 @@ void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb)
60314 + wg_packet_consume_data(wg, skb);
60315 + break;
60316 + default:
60317 +- net_dbg_skb_ratelimited("%s: Invalid packet from %pISpfsc\n",
60318 +- wg->dev->name, skb);
60319 ++ WARN(1, "Non-exhaustive parsing of packet header lead to unknown packet type!\n");
60320 + goto err;
60321 + }
60322 + return;
60323 +--
60324 +cgit v1.2.3-4-ga26e
60325 +
60326 +
60327 +From 2ac8a5abcd43cd1a04107a7494e3aaaf1bf269fd Mon Sep 17 00:00:00 2001
60328 +From: "Jason A. Donenfeld" <Jason@×××××.com>
60329 +Date: Wed, 18 Mar 2020 18:30:47 -0600
60330 +Subject: wireguard: noise: error out precomputed DH during handshake rather
60331 + than config
60332 +
60333 +commit 11a7686aa99c7fe4b3f80f6dcccd54129817984d upstream.
60334 +
60335 +We precompute the static-static ECDH during configuration time, in order
60336 +to save an expensive computation later when receiving network packets.
60337 +However, not all ECDH computations yield a contributory result. Prior,
60338 +we were just not letting those peers be added to the interface. However,
60339 +this creates a strange inconsistency, since it was still possible to add
60340 +other weird points, like a valid public key plus a low-order point, and,
60341 +like points that result in zeros, a handshake would not complete. In
60342 +order to make the behavior more uniform and less surprising, simply
60343 +allow all peers to be added. Then, we'll error out later when doing the
60344 +crypto if there's an issue. This also adds more separation between the
60345 +crypto layer and the configuration layer.
60346 +
60347 +Discussed-with: Mathias Hall-Andersen <mathias@×××××××××××××.dk>
60348 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
60349 +Signed-off-by: David S. Miller <davem@×××××××××.net>
60350 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
60351 +---
60352 + drivers/net/wireguard/netlink.c | 8 ++---
60353 + drivers/net/wireguard/noise.c | 55 ++++++++++++++++--------------
60354 + drivers/net/wireguard/noise.h | 12 +++----
60355 + drivers/net/wireguard/peer.c | 7 ++--
60356 + tools/testing/selftests/wireguard/netns.sh | 15 +++++---
60357 + 5 files changed, 49 insertions(+), 48 deletions(-)
60358 +
60359 +diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c
60360 +index 45a631e79d7a..ab6cbe95a652 100644
60361 +--- a/drivers/net/wireguard/netlink.c
60362 ++++ b/drivers/net/wireguard/netlink.c
60363 +@@ -417,11 +417,7 @@ static int set_peer(struct wg_device *wg, struct nlattr **attrs)
60364 +
60365 + peer = wg_peer_create(wg, public_key, preshared_key);
60366 + if (IS_ERR(peer)) {
60367 +- /* Similar to the above, if the key is invalid, we skip
60368 +- * it without fanfare, so that services don't need to
60369 +- * worry about doing key validation themselves.
60370 +- */
60371 +- ret = PTR_ERR(peer) == -EKEYREJECTED ? 0 : PTR_ERR(peer);
60372 ++ ret = PTR_ERR(peer);
60373 + peer = NULL;
60374 + goto out;
60375 + }
60376 +@@ -575,7 +571,7 @@ static int wg_set_device(struct sk_buff *skb, struct genl_info *info)
60377 + private_key);
60378 + list_for_each_entry_safe(peer, temp, &wg->peer_list,
60379 + peer_list) {
60380 +- BUG_ON(!wg_noise_precompute_static_static(peer));
60381 ++ wg_noise_precompute_static_static(peer);
60382 + wg_noise_expire_current_peer_keypairs(peer);
60383 + }
60384 + wg_cookie_checker_precompute_device_keys(&wg->cookie_checker);
60385 +diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c
60386 +index 919d9d866446..708dc61c974f 100644
60387 +--- a/drivers/net/wireguard/noise.c
60388 ++++ b/drivers/net/wireguard/noise.c
60389 +@@ -44,32 +44,23 @@ void __init wg_noise_init(void)
60390 + }
60391 +
60392 + /* Must hold peer->handshake.static_identity->lock */
60393 +-bool wg_noise_precompute_static_static(struct wg_peer *peer)
60394 ++void wg_noise_precompute_static_static(struct wg_peer *peer)
60395 + {
60396 +- bool ret;
60397 +-
60398 + down_write(&peer->handshake.lock);
60399 +- if (peer->handshake.static_identity->has_identity) {
60400 +- ret = curve25519(
60401 +- peer->handshake.precomputed_static_static,
60402 ++ if (!peer->handshake.static_identity->has_identity ||
60403 ++ !curve25519(peer->handshake.precomputed_static_static,
60404 + peer->handshake.static_identity->static_private,
60405 +- peer->handshake.remote_static);
60406 +- } else {
60407 +- u8 empty[NOISE_PUBLIC_KEY_LEN] = { 0 };
60408 +-
60409 +- ret = curve25519(empty, empty, peer->handshake.remote_static);
60410 ++ peer->handshake.remote_static))
60411 + memset(peer->handshake.precomputed_static_static, 0,
60412 + NOISE_PUBLIC_KEY_LEN);
60413 +- }
60414 + up_write(&peer->handshake.lock);
60415 +- return ret;
60416 + }
60417 +
60418 +-bool wg_noise_handshake_init(struct noise_handshake *handshake,
60419 +- struct noise_static_identity *static_identity,
60420 +- const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
60421 +- const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
60422 +- struct wg_peer *peer)
60423 ++void wg_noise_handshake_init(struct noise_handshake *handshake,
60424 ++ struct noise_static_identity *static_identity,
60425 ++ const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
60426 ++ const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
60427 ++ struct wg_peer *peer)
60428 + {
60429 + memset(handshake, 0, sizeof(*handshake));
60430 + init_rwsem(&handshake->lock);
60431 +@@ -81,7 +72,7 @@ bool wg_noise_handshake_init(struct noise_handshake *handshake,
60432 + NOISE_SYMMETRIC_KEY_LEN);
60433 + handshake->static_identity = static_identity;
60434 + handshake->state = HANDSHAKE_ZEROED;
60435 +- return wg_noise_precompute_static_static(peer);
60436 ++ wg_noise_precompute_static_static(peer);
60437 + }
60438 +
60439 + static void handshake_zero(struct noise_handshake *handshake)
60440 +@@ -403,6 +394,19 @@ static bool __must_check mix_dh(u8 chaining_key[NOISE_HASH_LEN],
60441 + return true;
60442 + }
60443 +
60444 ++static bool __must_check mix_precomputed_dh(u8 chaining_key[NOISE_HASH_LEN],
60445 ++ u8 key[NOISE_SYMMETRIC_KEY_LEN],
60446 ++ const u8 precomputed[NOISE_PUBLIC_KEY_LEN])
60447 ++{
60448 ++ static u8 zero_point[NOISE_PUBLIC_KEY_LEN];
60449 ++ if (unlikely(!crypto_memneq(precomputed, zero_point, NOISE_PUBLIC_KEY_LEN)))
60450 ++ return false;
60451 ++ kdf(chaining_key, key, NULL, precomputed, NOISE_HASH_LEN,
60452 ++ NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN,
60453 ++ chaining_key);
60454 ++ return true;
60455 ++}
60456 ++
60457 + static void mix_hash(u8 hash[NOISE_HASH_LEN], const u8 *src, size_t src_len)
60458 + {
60459 + struct blake2s_state blake;
60460 +@@ -531,10 +535,9 @@ wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst,
60461 + NOISE_PUBLIC_KEY_LEN, key, handshake->hash);
60462 +
60463 + /* ss */
60464 +- kdf(handshake->chaining_key, key, NULL,
60465 +- handshake->precomputed_static_static, NOISE_HASH_LEN,
60466 +- NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN,
60467 +- handshake->chaining_key);
60468 ++ if (!mix_precomputed_dh(handshake->chaining_key, key,
60469 ++ handshake->precomputed_static_static))
60470 ++ goto out;
60471 +
60472 + /* {t} */
60473 + tai64n_now(timestamp);
60474 +@@ -595,9 +598,9 @@ wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src,
60475 + handshake = &peer->handshake;
60476 +
60477 + /* ss */
60478 +- kdf(chaining_key, key, NULL, handshake->precomputed_static_static,
60479 +- NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN,
60480 +- chaining_key);
60481 ++ if (!mix_precomputed_dh(chaining_key, key,
60482 ++ handshake->precomputed_static_static))
60483 ++ goto out;
60484 +
60485 + /* {t} */
60486 + if (!message_decrypt(t, src->encrypted_timestamp,
60487 +diff --git a/drivers/net/wireguard/noise.h b/drivers/net/wireguard/noise.h
60488 +index 138a07bb817c..f532d59d3f19 100644
60489 +--- a/drivers/net/wireguard/noise.h
60490 ++++ b/drivers/net/wireguard/noise.h
60491 +@@ -94,11 +94,11 @@ struct noise_handshake {
60492 + struct wg_device;
60493 +
60494 + void wg_noise_init(void);
60495 +-bool wg_noise_handshake_init(struct noise_handshake *handshake,
60496 +- struct noise_static_identity *static_identity,
60497 +- const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
60498 +- const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
60499 +- struct wg_peer *peer);
60500 ++void wg_noise_handshake_init(struct noise_handshake *handshake,
60501 ++ struct noise_static_identity *static_identity,
60502 ++ const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
60503 ++ const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
60504 ++ struct wg_peer *peer);
60505 + void wg_noise_handshake_clear(struct noise_handshake *handshake);
60506 + static inline void wg_noise_reset_last_sent_handshake(atomic64_t *handshake_ns)
60507 + {
60508 +@@ -116,7 +116,7 @@ void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer);
60509 + void wg_noise_set_static_identity_private_key(
60510 + struct noise_static_identity *static_identity,
60511 + const u8 private_key[NOISE_PUBLIC_KEY_LEN]);
60512 +-bool wg_noise_precompute_static_static(struct wg_peer *peer);
60513 ++void wg_noise_precompute_static_static(struct wg_peer *peer);
60514 +
60515 + bool
60516 + wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst,
60517 +diff --git a/drivers/net/wireguard/peer.c b/drivers/net/wireguard/peer.c
60518 +index 071eedf33f5a..1d634bd3038f 100644
60519 +--- a/drivers/net/wireguard/peer.c
60520 ++++ b/drivers/net/wireguard/peer.c
60521 +@@ -34,11 +34,8 @@ struct wg_peer *wg_peer_create(struct wg_device *wg,
60522 + return ERR_PTR(ret);
60523 + peer->device = wg;
60524 +
60525 +- if (!wg_noise_handshake_init(&peer->handshake, &wg->static_identity,
60526 +- public_key, preshared_key, peer)) {
60527 +- ret = -EKEYREJECTED;
60528 +- goto err_1;
60529 +- }
60530 ++ wg_noise_handshake_init(&peer->handshake, &wg->static_identity,
60531 ++ public_key, preshared_key, peer);
60532 + if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL))
60533 + goto err_1;
60534 + if (wg_packet_queue_init(&peer->tx_queue, wg_packet_tx_worker, false,
60535 +diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
60536 +index 138d46b3f330..936e1ca9410e 100755
60537 +--- a/tools/testing/selftests/wireguard/netns.sh
60538 ++++ b/tools/testing/selftests/wireguard/netns.sh
60539 +@@ -527,11 +527,16 @@ n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0
60540 + n0 wg set wg0 peer "$pub2" allowed-ips ::/0,1700::/111,5000::/4,e000::/37,9000::/75
60541 + n0 wg set wg0 peer "$pub2" allowed-ips ::/0
60542 + n0 wg set wg0 peer "$pub2" remove
60543 +-low_order_points=( AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38= )
60544 +-n0 wg set wg0 private-key /dev/null ${low_order_points[@]/#/peer }
60545 +-[[ -z $(n0 wg show wg0 peers) ]]
60546 +-n0 wg set wg0 private-key <(echo "$key1") ${low_order_points[@]/#/peer }
60547 +-[[ -z $(n0 wg show wg0 peers) ]]
60548 ++for low_order_point in AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38=; do
60549 ++ n0 wg set wg0 peer "$low_order_point" persistent-keepalive 1 endpoint 127.0.0.1:1111
60550 ++done
60551 ++[[ -n $(n0 wg show wg0 peers) ]]
60552 ++exec 4< <(n0 ncat -l -u -p 1111)
60553 ++ncat_pid=$!
60554 ++waitncatudp $netns0 $ncat_pid
60555 ++ip0 link set wg0 up
60556 ++! read -r -n 1 -t 2 <&4 || false
60557 ++kill $ncat_pid
60558 + ip0 link del wg0
60559 +
60560 + declare -A objects
60561 +--
60562 +cgit v1.2.3-4-ga26e
60563 +
60564 +
60565 +From 18f436332de3d1e27c2363e2fd71d24839680f0b Mon Sep 17 00:00:00 2001
60566 +From: Sultan Alsawaf <sultan@×××××××××××.com>
60567 +Date: Wed, 29 Apr 2020 14:59:20 -0600
60568 +Subject: wireguard: send: remove errant newline from packet_encrypt_worker
60569 +
60570 +commit d6833e42786e050e7522d6a91a9361e54085897d upstream.
60571 +
60572 +This commit removes a useless newline at the end of a scope, which
60573 +doesn't add anything in the way of organization or readability.
60574 +
60575 +Signed-off-by: Sultan Alsawaf <sultan@×××××××××××.com>
60576 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
60577 +Signed-off-by: David S. Miller <davem@×××××××××.net>
60578 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
60579 +---
60580 + drivers/net/wireguard/send.c | 1 -
60581 + 1 file changed, 1 deletion(-)
60582 +
60583 +diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c
60584 +index 7348c10cbae3..3e030d614df5 100644
60585 +--- a/drivers/net/wireguard/send.c
60586 ++++ b/drivers/net/wireguard/send.c
60587 +@@ -304,7 +304,6 @@ void wg_packet_encrypt_worker(struct work_struct *work)
60588 + }
60589 + wg_queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first,
60590 + state);
60591 +-
60592 + }
60593 + }
60594 +
60595 +--
60596 +cgit v1.2.3-4-ga26e
60597 +
60598 +
60599 +From da6c304ced4e7861177eb695d2c450f72e2dc335 Mon Sep 17 00:00:00 2001
60600 +From: "Jason A. Donenfeld" <Jason@×××××.com>
60601 +Date: Wed, 29 Apr 2020 14:59:21 -0600
60602 +Subject: wireguard: queueing: cleanup ptr_ring in error path of
60603 + packet_queue_init
60604 +
60605 +commit 130c58606171326c81841a49cc913cd354113dd9 upstream.
60606 +
60607 +Prior, if the alloc_percpu of packet_percpu_multicore_worker_alloc
60608 +failed, the previously allocated ptr_ring wouldn't be freed. This commit
60609 +adds the missing call to ptr_ring_cleanup in the error case.
60610 +
60611 +Reported-by: Sultan Alsawaf <sultan@×××××××××××.com>
60612 +Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
60613 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
60614 +Signed-off-by: David S. Miller <davem@×××××××××.net>
60615 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
60616 +---
60617 + drivers/net/wireguard/queueing.c | 4 +++-
60618 + 1 file changed, 3 insertions(+), 1 deletion(-)
60619 +
60620 +diff --git a/drivers/net/wireguard/queueing.c b/drivers/net/wireguard/queueing.c
60621 +index 5c964fcb994e..71b8e80b58e1 100644
60622 +--- a/drivers/net/wireguard/queueing.c
60623 ++++ b/drivers/net/wireguard/queueing.c
60624 +@@ -35,8 +35,10 @@ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
60625 + if (multicore) {
60626 + queue->worker = wg_packet_percpu_multicore_worker_alloc(
60627 + function, queue);
60628 +- if (!queue->worker)
60629 ++ if (!queue->worker) {
60630 ++ ptr_ring_cleanup(&queue->ring, NULL);
60631 + return -ENOMEM;
60632 ++ }
60633 + } else {
60634 + INIT_WORK(&queue->work, function);
60635 + }
60636 +--
60637 +cgit v1.2.3-4-ga26e
60638 +
60639 +
60640 +From 212b44288e798c8901a01bf22157170ef01a08f8 Mon Sep 17 00:00:00 2001
60641 +From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@××××××.com>
60642 +Date: Wed, 29 Apr 2020 14:59:22 -0600
60643 +Subject: wireguard: receive: use tunnel helpers for decapsulating ECN markings
60644 +MIME-Version: 1.0
60645 +Content-Type: text/plain; charset=UTF-8
60646 +Content-Transfer-Encoding: 8bit
60647 +
60648 +commit eebabcb26ea1e3295704477c6cd4e772c96a9559 upstream.
60649 +
60650 +WireGuard currently only propagates ECN markings on tunnel decap according
60651 +to the old RFC3168 specification. However, the spec has since been updated
60652 +in RFC6040 to recommend slightly different decapsulation semantics. This
60653 +was implemented in the kernel as a set of common helpers for ECN
60654 +decapsulation, so let's just switch over WireGuard to using those, so it
60655 +can benefit from this enhancement and any future tweaks. We do not drop
60656 +packets with invalid ECN marking combinations, because WireGuard is
60657 +frequently used to work around broken ISPs, which could be doing that.
60658 +
60659 +Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
60660 +Reported-by: Olivier Tilmans <olivier.tilmans@×××××××××××××××.com>
60661 +Cc: Dave Taht <dave.taht@×××××.com>
60662 +Cc: Rodney W. Grimes <ietf@×××××××××××××.net>
60663 +Signed-off-by: Toke Høiland-Jørgensen <toke@××××××.com>
60664 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
60665 +Signed-off-by: David S. Miller <davem@×××××××××.net>
60666 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
60667 +---
60668 + drivers/net/wireguard/receive.c | 6 ++----
60669 + 1 file changed, 2 insertions(+), 4 deletions(-)
60670 +
60671 +diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
60672 +index da3b782ab7d3..267f202f1931 100644
60673 +--- a/drivers/net/wireguard/receive.c
60674 ++++ b/drivers/net/wireguard/receive.c
60675 +@@ -393,13 +393,11 @@ static void wg_packet_consume_data_done(struct wg_peer *peer,
60676 + len = ntohs(ip_hdr(skb)->tot_len);
60677 + if (unlikely(len < sizeof(struct iphdr)))
60678 + goto dishonest_packet_size;
60679 +- if (INET_ECN_is_ce(PACKET_CB(skb)->ds))
60680 +- IP_ECN_set_ce(ip_hdr(skb));
60681 ++ INET_ECN_decapsulate(skb, PACKET_CB(skb)->ds, ip_hdr(skb)->tos);
60682 + } else if (skb->protocol == htons(ETH_P_IPV6)) {
60683 + len = ntohs(ipv6_hdr(skb)->payload_len) +
60684 + sizeof(struct ipv6hdr);
60685 +- if (INET_ECN_is_ce(PACKET_CB(skb)->ds))
60686 +- IP6_ECN_set_ce(skb, ipv6_hdr(skb));
60687 ++ INET_ECN_decapsulate(skb, PACKET_CB(skb)->ds, ipv6_get_dsfield(ipv6_hdr(skb)));
60688 + } else {
60689 + goto dishonest_packet_type;
60690 + }
60691 +--
60692 +cgit v1.2.3-4-ga26e
60693 +
60694 +
60695 +From dc93841a86aaaa0c47fd38cee1b9ba2d0831f816 Mon Sep 17 00:00:00 2001
60696 +From: "Jason A. Donenfeld" <Jason@×××××.com>
60697 +Date: Wed, 6 May 2020 15:33:02 -0600
60698 +Subject: wireguard: selftests: use normal kernel stack size on ppc64
60699 +
60700 +commit a0fd7cc87a018df1a17f9d3f0bd994c1f22c6b34 upstream.
60701 +
60702 +While at some point it might have made sense to be running these tests
60703 +on ppc64 with 4k stacks, the kernel hasn't actually used 4k stacks on
60704 +64-bit powerpc in a long time, and more interesting things that we test
60705 +don't really work when we deviate from the default (16k). So, we stop
60706 +pushing our luck in this commit, and return to the default instead of
60707 +the minimum.
60708 +
60709 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
60710 +Signed-off-by: David S. Miller <davem@×××××××××.net>
60711 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
60712 +---
60713 + tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config | 1 +
60714 + 1 file changed, 1 insertion(+)
60715 +
60716 +diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
60717 +index 990c510a9cfa..f52f1e2bc7f6 100644
60718 +--- a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
60719 ++++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
60720 +@@ -10,3 +10,4 @@ CONFIG_CMDLINE_BOOL=y
60721 + CONFIG_CMDLINE="console=hvc0 wg.success=hvc1"
60722 + CONFIG_SECTION_MISMATCH_WARN_ONLY=y
60723 + CONFIG_FRAME_WARN=1280
60724 ++CONFIG_THREAD_SHIFT=14
60725 +--
60726 +cgit v1.2.3-4-ga26e
60727 +
60728 +
60729 +From 28397d330c847d3922153b65d7cea175d1abdfeb Mon Sep 17 00:00:00 2001
60730 +From: "Jason A. Donenfeld" <Jason@×××××.com>
60731 +Date: Wed, 6 May 2020 15:33:03 -0600
60732 +Subject: wireguard: socket: remove errant restriction on looping to self
60733 +
60734 +commit b673e24aad36981f327a6570412ffa7754de8911 upstream.
60735 +
60736 +It's already possible to create two different interfaces and loop
60737 +packets between them. This has always been possible with tunnels in the
60738 +kernel, and isn't specific to wireguard. Therefore, the networking stack
60739 +already needs to deal with that. At the very least, the packet winds up
60740 +exceeding the MTU and is discarded at that point. So, since this is
60741 +already something that happens, there's no need to forbid the not very
60742 +exceptional case of routing a packet back to the same interface; this
60743 +loop is no different than others, and we shouldn't special case it, but
60744 +rather rely on generic handling of loops in general. This also makes it
60745 +easier to do interesting things with wireguard such as onion routing.
60746 +
60747 +At the same time, we add a selftest for this, ensuring that both onion
60748 +routing works and infinite routing loops do not crash the kernel. We
60749 +also add a test case for wireguard interfaces nesting packets and
60750 +sending traffic between each other, as well as the loop in this case
60751 +too. We make sure to send some throughput-heavy traffic for this use
60752 +case, to stress out any possible recursion issues with the locks around
60753 +workqueues.
60754 +
60755 +Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
60756 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
60757 +Signed-off-by: David S. Miller <davem@×××××××××.net>
60758 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
60759 +---
60760 + drivers/net/wireguard/socket.c | 12 -------
60761 + tools/testing/selftests/wireguard/netns.sh | 54 ++++++++++++++++++++++++++++--
60762 + 2 files changed, 51 insertions(+), 15 deletions(-)
60763 +
60764 +diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c
60765 +index b0d6541582d3..f9018027fc13 100644
60766 +--- a/drivers/net/wireguard/socket.c
60767 ++++ b/drivers/net/wireguard/socket.c
60768 +@@ -76,12 +76,6 @@ static int send4(struct wg_device *wg, struct sk_buff *skb,
60769 + net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n",
60770 + wg->dev->name, &endpoint->addr, ret);
60771 + goto err;
60772 +- } else if (unlikely(rt->dst.dev == skb->dev)) {
60773 +- ip_rt_put(rt);
60774 +- ret = -ELOOP;
60775 +- net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n",
60776 +- wg->dev->name, &endpoint->addr);
60777 +- goto err;
60778 + }
60779 + if (cache)
60780 + dst_cache_set_ip4(cache, &rt->dst, fl.saddr);
60781 +@@ -149,12 +143,6 @@ static int send6(struct wg_device *wg, struct sk_buff *skb,
60782 + net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n",
60783 + wg->dev->name, &endpoint->addr, ret);
60784 + goto err;
60785 +- } else if (unlikely(dst->dev == skb->dev)) {
60786 +- dst_release(dst);
60787 +- ret = -ELOOP;
60788 +- net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n",
60789 +- wg->dev->name, &endpoint->addr);
60790 +- goto err;
60791 + }
60792 + if (cache)
60793 + dst_cache_set_ip6(cache, dst, &fl.saddr);
60794 +diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
60795 +index 936e1ca9410e..17a1f53ceba0 100755
60796 +--- a/tools/testing/selftests/wireguard/netns.sh
60797 ++++ b/tools/testing/selftests/wireguard/netns.sh
60798 +@@ -48,8 +48,11 @@ cleanup() {
60799 + exec 2>/dev/null
60800 + printf "$orig_message_cost" > /proc/sys/net/core/message_cost
60801 + ip0 link del dev wg0
60802 ++ ip0 link del dev wg1
60803 + ip1 link del dev wg0
60804 ++ ip1 link del dev wg1
60805 + ip2 link del dev wg0
60806 ++ ip2 link del dev wg1
60807 + local to_kill="$(ip netns pids $netns0) $(ip netns pids $netns1) $(ip netns pids $netns2)"
60808 + [[ -n $to_kill ]] && kill $to_kill
60809 + pp ip netns del $netns1
60810 +@@ -77,18 +80,20 @@ ip0 link set wg0 netns $netns2
60811 + key1="$(pp wg genkey)"
60812 + key2="$(pp wg genkey)"
60813 + key3="$(pp wg genkey)"
60814 ++key4="$(pp wg genkey)"
60815 + pub1="$(pp wg pubkey <<<"$key1")"
60816 + pub2="$(pp wg pubkey <<<"$key2")"
60817 + pub3="$(pp wg pubkey <<<"$key3")"
60818 ++pub4="$(pp wg pubkey <<<"$key4")"
60819 + psk="$(pp wg genpsk)"
60820 + [[ -n $key1 && -n $key2 && -n $psk ]]
60821 +
60822 + configure_peers() {
60823 + ip1 addr add 192.168.241.1/24 dev wg0
60824 +- ip1 addr add fd00::1/24 dev wg0
60825 ++ ip1 addr add fd00::1/112 dev wg0
60826 +
60827 + ip2 addr add 192.168.241.2/24 dev wg0
60828 +- ip2 addr add fd00::2/24 dev wg0
60829 ++ ip2 addr add fd00::2/112 dev wg0
60830 +
60831 + n1 wg set wg0 \
60832 + private-key <(echo "$key1") \
60833 +@@ -230,9 +235,38 @@ n1 ping -W 1 -c 1 192.168.241.2
60834 + n1 wg set wg0 private-key <(echo "$key3")
60835 + n2 wg set wg0 peer "$pub3" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32 peer "$pub1" remove
60836 + n1 ping -W 1 -c 1 192.168.241.2
60837 ++n2 wg set wg0 peer "$pub3" remove
60838 ++
60839 ++# Test that we can route wg through wg
60840 ++ip1 addr flush dev wg0
60841 ++ip2 addr flush dev wg0
60842 ++ip1 addr add fd00::5:1/112 dev wg0
60843 ++ip2 addr add fd00::5:2/112 dev wg0
60844 ++n1 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk") allowed-ips fd00::5:2/128 endpoint 127.0.0.1:2
60845 ++n2 wg set wg0 private-key <(echo "$key2") listen-port 2 peer "$pub1" preshared-key <(echo "$psk") allowed-ips fd00::5:1/128 endpoint 127.212.121.99:9998
60846 ++ip1 link add wg1 type wireguard
60847 ++ip2 link add wg1 type wireguard
60848 ++ip1 addr add 192.168.241.1/24 dev wg1
60849 ++ip1 addr add fd00::1/112 dev wg1
60850 ++ip2 addr add 192.168.241.2/24 dev wg1
60851 ++ip2 addr add fd00::2/112 dev wg1
60852 ++ip1 link set mtu 1340 up dev wg1
60853 ++ip2 link set mtu 1340 up dev wg1
60854 ++n1 wg set wg1 listen-port 5 private-key <(echo "$key3") peer "$pub4" allowed-ips 192.168.241.2/32,fd00::2/128 endpoint [fd00::5:2]:5
60855 ++n2 wg set wg1 listen-port 5 private-key <(echo "$key4") peer "$pub3" allowed-ips 192.168.241.1/32,fd00::1/128 endpoint [fd00::5:1]:5
60856 ++tests
60857 ++# Try to set up a routing loop between the two namespaces
60858 ++ip1 link set netns $netns0 dev wg1
60859 ++ip0 addr add 192.168.241.1/24 dev wg1
60860 ++ip0 link set up dev wg1
60861 ++n0 ping -W 1 -c 1 192.168.241.2
60862 ++n1 wg set wg0 peer "$pub2" endpoint 192.168.241.2:7
60863 ++ip2 link del wg0
60864 ++ip2 link del wg1
60865 ++! n0 ping -W 1 -c 10 -f 192.168.241.2 || false # Should not crash kernel
60866 +
60867 ++ip0 link del wg1
60868 + ip1 link del wg0
60869 +-ip2 link del wg0
60870 +
60871 + # Test using NAT. We now change the topology to this:
60872 + # ┌────────────────────────────────────────┐ ┌────────────────────────────────────────────────┐ ┌────────────────────────────────────────┐
60873 +@@ -282,6 +316,20 @@ pp sleep 3
60874 + n2 ping -W 1 -c 1 192.168.241.1
60875 + n1 wg set wg0 peer "$pub2" persistent-keepalive 0
60876 +
60877 ++# Test that onion routing works, even when it loops
60878 ++n1 wg set wg0 peer "$pub3" allowed-ips 192.168.242.2/32 endpoint 192.168.241.2:5
60879 ++ip1 addr add 192.168.242.1/24 dev wg0
60880 ++ip2 link add wg1 type wireguard
60881 ++ip2 addr add 192.168.242.2/24 dev wg1
60882 ++n2 wg set wg1 private-key <(echo "$key3") listen-port 5 peer "$pub1" allowed-ips 192.168.242.1/32
60883 ++ip2 link set wg1 up
60884 ++n1 ping -W 1 -c 1 192.168.242.2
60885 ++ip2 link del wg1
60886 ++n1 wg set wg0 peer "$pub3" endpoint 192.168.242.2:5
60887 ++! n1 ping -W 1 -c 1 192.168.242.2 || false # Should not crash kernel
60888 ++n1 wg set wg0 peer "$pub3" remove
60889 ++ip1 addr del 192.168.242.1/24 dev wg0
60890 ++
60891 + # Do a wg-quick(8)-style policy routing for the default route, making sure vethc has a v6 address to tease out bugs.
60892 + ip1 -6 addr add fc00::9/96 dev vethc
60893 + ip1 -6 route add default via fc00::1
60894 +--
60895 +cgit v1.2.3-4-ga26e
60896 +
60897 +
60898 +From 5d70da9eaea884dbe1ad789830a7e274b371a9d2 Mon Sep 17 00:00:00 2001
60899 +From: "Jason A. Donenfeld" <Jason@×××××.com>
60900 +Date: Wed, 6 May 2020 15:33:04 -0600
60901 +Subject: wireguard: send/receive: cond_resched() when processing worker
60902 + ringbuffers
60903 +
60904 +commit 4005f5c3c9d006157ba716594e0d70c88a235c5e upstream.
60905 +
60906 +Users with pathological hardware reported CPU stalls on CONFIG_
60907 +PREEMPT_VOLUNTARY=y, because the ringbuffers would stay full, meaning
60908 +these workers would never terminate. That turned out not to be okay on
60909 +systems without forced preemption, which Sultan observed. This commit
60910 +adds a cond_resched() to the bottom of each loop iteration, so that
60911 +these workers don't hog the core. Note that we don't need this on the
60912 +napi poll worker, since that terminates after its budget is expended.
60913 +
60914 +Suggested-by: Sultan Alsawaf <sultan@×××××××××××.com>
60915 +Reported-by: Wang Jian <larkwang@×××××.com>
60916 +Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
60917 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
60918 +Signed-off-by: David S. Miller <davem@×××××××××.net>
60919 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
60920 +---
60921 + drivers/net/wireguard/receive.c | 2 ++
60922 + drivers/net/wireguard/send.c | 4 ++++
60923 + 2 files changed, 6 insertions(+)
60924 +
60925 +diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
60926 +index 267f202f1931..2566e13a292d 100644
60927 +--- a/drivers/net/wireguard/receive.c
60928 ++++ b/drivers/net/wireguard/receive.c
60929 +@@ -516,6 +516,8 @@ void wg_packet_decrypt_worker(struct work_struct *work)
60930 + &PACKET_CB(skb)->keypair->receiving)) ?
60931 + PACKET_STATE_CRYPTED : PACKET_STATE_DEAD;
60932 + wg_queue_enqueue_per_peer_napi(skb, state);
60933 ++ if (need_resched())
60934 ++ cond_resched();
60935 + }
60936 + }
60937 +
60938 +diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c
60939 +index 3e030d614df5..dc3079e17c7f 100644
60940 +--- a/drivers/net/wireguard/send.c
60941 ++++ b/drivers/net/wireguard/send.c
60942 +@@ -281,6 +281,8 @@ void wg_packet_tx_worker(struct work_struct *work)
60943 +
60944 + wg_noise_keypair_put(keypair, false);
60945 + wg_peer_put(peer);
60946 ++ if (need_resched())
60947 ++ cond_resched();
60948 + }
60949 + }
60950 +
60951 +@@ -304,6 +306,8 @@ void wg_packet_encrypt_worker(struct work_struct *work)
60952 + }
60953 + wg_queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first,
60954 + state);
60955 ++ if (need_resched())
60956 ++ cond_resched();
60957 + }
60958 + }
60959 +
60960 +--
60961 +cgit v1.2.3-4-ga26e
60962 +
60963 +
60964 +From e976a3368b3f1a9a596519b08d167b51832e2e59 Mon Sep 17 00:00:00 2001
60965 +From: "Jason A. Donenfeld" <Jason@×××××.com>
60966 +Date: Wed, 6 May 2020 15:33:05 -0600
60967 +Subject: wireguard: selftests: initalize ipv6 members to NULL to squelch clang
60968 + warning
60969 +
60970 +commit 4fed818ef54b08d4b29200e416cce65546ad5312 upstream.
60971 +
60972 +Without setting these to NULL, clang complains in certain
60973 +configurations that have CONFIG_IPV6=n:
60974 +
60975 +In file included from drivers/net/wireguard/ratelimiter.c:223:
60976 +drivers/net/wireguard/selftest/ratelimiter.c:173:34: error: variable 'skb6' is uninitialized when used here [-Werror,-Wuninitialized]
60977 + ret = timings_test(skb4, hdr4, skb6, hdr6, &test_count);
60978 + ^~~~
60979 +drivers/net/wireguard/selftest/ratelimiter.c:123:29: note: initialize the variable 'skb6' to silence this warning
60980 + struct sk_buff *skb4, *skb6;
60981 + ^
60982 + = NULL
60983 +drivers/net/wireguard/selftest/ratelimiter.c:173:40: error: variable 'hdr6' is uninitialized when used here [-Werror,-Wuninitialized]
60984 + ret = timings_test(skb4, hdr4, skb6, hdr6, &test_count);
60985 + ^~~~
60986 +drivers/net/wireguard/selftest/ratelimiter.c:125:22: note: initialize the variable 'hdr6' to silence this warning
60987 + struct ipv6hdr *hdr6;
60988 + ^
60989 +
60990 +We silence this warning by setting the variables to NULL as the warning
60991 +suggests.
60992 +
60993 +Reported-by: Arnd Bergmann <arnd@×××××.de>
60994 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
60995 +Signed-off-by: David S. Miller <davem@×××××××××.net>
60996 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
60997 +---
60998 + drivers/net/wireguard/selftest/ratelimiter.c | 4 ++--
60999 + 1 file changed, 2 insertions(+), 2 deletions(-)
61000 +
61001 +diff --git a/drivers/net/wireguard/selftest/ratelimiter.c b/drivers/net/wireguard/selftest/ratelimiter.c
61002 +index bcd6462e4540..007cd4457c5f 100644
61003 +--- a/drivers/net/wireguard/selftest/ratelimiter.c
61004 ++++ b/drivers/net/wireguard/selftest/ratelimiter.c
61005 +@@ -120,9 +120,9 @@ bool __init wg_ratelimiter_selftest(void)
61006 + enum { TRIALS_BEFORE_GIVING_UP = 5000 };
61007 + bool success = false;
61008 + int test = 0, trials;
61009 +- struct sk_buff *skb4, *skb6;
61010 ++ struct sk_buff *skb4, *skb6 = NULL;
61011 + struct iphdr *hdr4;
61012 +- struct ipv6hdr *hdr6;
61013 ++ struct ipv6hdr *hdr6 = NULL;
61014 +
61015 + if (IS_ENABLED(CONFIG_KASAN) || IS_ENABLED(CONFIG_UBSAN))
61016 + return true;
61017 +--
61018 +cgit v1.2.3-4-ga26e
61019 +
61020 +
61021 +From 07788016cb3129826d48d149181e04e0daeb26d7 Mon Sep 17 00:00:00 2001
61022 +From: "Jason A. Donenfeld" <Jason@×××××.com>
61023 +Date: Wed, 6 May 2020 15:33:06 -0600
61024 +Subject: wireguard: send/receive: use explicit unlikely branch instead of
61025 + implicit coalescing
61026 +
61027 +commit 243f2148937adc72bcaaa590d482d599c936efde upstream.
61028 +
61029 +It's very unlikely that send will become true. It's nearly always false
61030 +between 0 and 120 seconds of a session, and in most cases becomes true
61031 +only between 120 and 121 seconds before becoming false again. So,
61032 +unlikely(send) is clearly the right option here.
61033 +
61034 +What happened before was that we had this complex boolean expression
61035 +with multiple likely and unlikely clauses nested. Since this is
61036 +evaluated left-to-right anyway, the whole thing got converted to
61037 +unlikely. So, we can clean this up to better represent what's going on.
61038 +
61039 +The generated code is the same.
61040 +
61041 +Suggested-by: Sultan Alsawaf <sultan@×××××××××××.com>
61042 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
61043 +Signed-off-by: David S. Miller <davem@×××××××××.net>
61044 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
61045 +---
61046 + drivers/net/wireguard/receive.c | 13 ++++++-------
61047 + drivers/net/wireguard/send.c | 15 ++++++---------
61048 + 2 files changed, 12 insertions(+), 16 deletions(-)
61049 +
61050 +diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
61051 +index 2566e13a292d..3bb5b9ae7cd1 100644
61052 +--- a/drivers/net/wireguard/receive.c
61053 ++++ b/drivers/net/wireguard/receive.c
61054 +@@ -226,21 +226,20 @@ void wg_packet_handshake_receive_worker(struct work_struct *work)
61055 + static void keep_key_fresh(struct wg_peer *peer)
61056 + {
61057 + struct noise_keypair *keypair;
61058 +- bool send = false;
61059 ++ bool send;
61060 +
61061 + if (peer->sent_lastminute_handshake)
61062 + return;
61063 +
61064 + rcu_read_lock_bh();
61065 + keypair = rcu_dereference_bh(peer->keypairs.current_keypair);
61066 +- if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) &&
61067 +- keypair->i_am_the_initiator &&
61068 +- unlikely(wg_birthdate_has_expired(keypair->sending.birthdate,
61069 +- REJECT_AFTER_TIME - KEEPALIVE_TIMEOUT - REKEY_TIMEOUT)))
61070 +- send = true;
61071 ++ send = keypair && READ_ONCE(keypair->sending.is_valid) &&
61072 ++ keypair->i_am_the_initiator &&
61073 ++ wg_birthdate_has_expired(keypair->sending.birthdate,
61074 ++ REJECT_AFTER_TIME - KEEPALIVE_TIMEOUT - REKEY_TIMEOUT);
61075 + rcu_read_unlock_bh();
61076 +
61077 +- if (send) {
61078 ++ if (unlikely(send)) {
61079 + peer->sent_lastminute_handshake = true;
61080 + wg_packet_send_queued_handshake_initiation(peer, false);
61081 + }
61082 +diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c
61083 +index dc3079e17c7f..6687db699803 100644
61084 +--- a/drivers/net/wireguard/send.c
61085 ++++ b/drivers/net/wireguard/send.c
61086 +@@ -124,20 +124,17 @@ void wg_packet_send_handshake_cookie(struct wg_device *wg,
61087 + static void keep_key_fresh(struct wg_peer *peer)
61088 + {
61089 + struct noise_keypair *keypair;
61090 +- bool send = false;
61091 ++ bool send;
61092 +
61093 + rcu_read_lock_bh();
61094 + keypair = rcu_dereference_bh(peer->keypairs.current_keypair);
61095 +- if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) &&
61096 +- (unlikely(atomic64_read(&keypair->sending.counter.counter) >
61097 +- REKEY_AFTER_MESSAGES) ||
61098 +- (keypair->i_am_the_initiator &&
61099 +- unlikely(wg_birthdate_has_expired(keypair->sending.birthdate,
61100 +- REKEY_AFTER_TIME)))))
61101 +- send = true;
61102 ++ send = keypair && READ_ONCE(keypair->sending.is_valid) &&
61103 ++ (atomic64_read(&keypair->sending.counter.counter) > REKEY_AFTER_MESSAGES ||
61104 ++ (keypair->i_am_the_initiator &&
61105 ++ wg_birthdate_has_expired(keypair->sending.birthdate, REKEY_AFTER_TIME)));
61106 + rcu_read_unlock_bh();
61107 +
61108 +- if (send)
61109 ++ if (unlikely(send))
61110 + wg_packet_send_queued_handshake_initiation(peer, false);
61111 + }
61112 +
61113 +--
61114 +cgit v1.2.3-4-ga26e
61115 +
61116 +
61117 +From e123fd102df725354c0700df9115e0378691482d Mon Sep 17 00:00:00 2001
61118 +From: "Jason A. Donenfeld" <Jason@×××××.com>
61119 +Date: Tue, 19 May 2020 22:49:27 -0600
61120 +Subject: wireguard: selftests: use newer iproute2 for gcc-10
61121 +
61122 +commit ee3c1aa3f34b7842c1557cfe5d8c3f7b8c692de8 upstream.
61123 +
61124 +gcc-10 switched to defaulting to -fno-common, which broke iproute2-5.4.
61125 +This was fixed in iproute-5.6, so switch to that. Because we're after a
61126 +stable testing surface, we generally don't like to bump these
61127 +unnecessarily, but in this case, being able to actually build is a basic
61128 +necessity.
61129 +
61130 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
61131 +Signed-off-by: David S. Miller <davem@×××××××××.net>
61132 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
61133 +---
61134 + tools/testing/selftests/wireguard/qemu/Makefile | 2 +-
61135 + 1 file changed, 1 insertion(+), 1 deletion(-)
61136 +
61137 +diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
61138 +index 28d477683e8a..2dab4f57516d 100644
61139 +--- a/tools/testing/selftests/wireguard/qemu/Makefile
61140 ++++ b/tools/testing/selftests/wireguard/qemu/Makefile
61141 +@@ -44,7 +44,7 @@ endef
61142 + $(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/,1370c9a812b2cf2a7d92802510cca0058cc37e66a7bedd70051f0a34015022a3))
61143 + $(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c))
61144 + $(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d))
61145 +-$(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae))
61146 ++$(eval $(call tar_download,IPROUTE2,iproute2,5.6.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,1b5b0e25ce6e23da7526ea1da044e814ad85ba761b10dd29c2b027c056b04692))
61147 + $(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c))
61148 + $(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa))
61149 + $(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a))
61150 +--
61151 +cgit v1.2.3-4-ga26e
61152 +
61153 +
61154 +From 5ae45bba3dea3b321b0733ed2f0f179959beb0eb Mon Sep 17 00:00:00 2001
61155 +From: "Jason A. Donenfeld" <Jason@×××××.com>
61156 +Date: Tue, 19 May 2020 22:49:28 -0600
61157 +Subject: wireguard: noise: read preshared key while taking lock
61158 +
61159 +commit bc67d371256f5c47d824e2eec51e46c8d62d022e upstream.
61160 +
61161 +Prior we read the preshared key after dropping the handshake lock, which
61162 +isn't an actual crypto issue if it races, but it's still not quite
61163 +correct. So copy that part of the state into a temporary like we do with
61164 +the rest of the handshake state variables. Then we can release the lock,
61165 +operate on the temporary, and zero it out at the end of the function. In
61166 +performance tests, the impact of this was entirely unnoticable, probably
61167 +because those bytes are coming from the same cacheline as other things
61168 +that are being copied out in the same manner.
61169 +
61170 +Reported-by: Matt Dunwoodie <ncon@××××××××.net>
61171 +Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
61172 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
61173 +Signed-off-by: David S. Miller <davem@×××××××××.net>
61174 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
61175 +---
61176 + drivers/net/wireguard/noise.c | 6 +++++-
61177 + 1 file changed, 5 insertions(+), 1 deletion(-)
61178 +
61179 +diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c
61180 +index 708dc61c974f..07eb438a6dee 100644
61181 +--- a/drivers/net/wireguard/noise.c
61182 ++++ b/drivers/net/wireguard/noise.c
61183 +@@ -715,6 +715,7 @@ wg_noise_handshake_consume_response(struct message_handshake_response *src,
61184 + u8 e[NOISE_PUBLIC_KEY_LEN];
61185 + u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN];
61186 + u8 static_private[NOISE_PUBLIC_KEY_LEN];
61187 ++ u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN];
61188 +
61189 + down_read(&wg->static_identity.lock);
61190 +
61191 +@@ -733,6 +734,8 @@ wg_noise_handshake_consume_response(struct message_handshake_response *src,
61192 + memcpy(chaining_key, handshake->chaining_key, NOISE_HASH_LEN);
61193 + memcpy(ephemeral_private, handshake->ephemeral_private,
61194 + NOISE_PUBLIC_KEY_LEN);
61195 ++ memcpy(preshared_key, handshake->preshared_key,
61196 ++ NOISE_SYMMETRIC_KEY_LEN);
61197 + up_read(&handshake->lock);
61198 +
61199 + if (state != HANDSHAKE_CREATED_INITIATION)
61200 +@@ -750,7 +753,7 @@ wg_noise_handshake_consume_response(struct message_handshake_response *src,
61201 + goto fail;
61202 +
61203 + /* psk */
61204 +- mix_psk(chaining_key, hash, key, handshake->preshared_key);
61205 ++ mix_psk(chaining_key, hash, key, preshared_key);
61206 +
61207 + /* {} */
61208 + if (!message_decrypt(NULL, src->encrypted_nothing,
61209 +@@ -783,6 +786,7 @@ out:
61210 + memzero_explicit(chaining_key, NOISE_HASH_LEN);
61211 + memzero_explicit(ephemeral_private, NOISE_PUBLIC_KEY_LEN);
61212 + memzero_explicit(static_private, NOISE_PUBLIC_KEY_LEN);
61213 ++ memzero_explicit(preshared_key, NOISE_SYMMETRIC_KEY_LEN);
61214 + up_read(&wg->static_identity.lock);
61215 + return ret_peer;
61216 + }
61217 +--
61218 +cgit v1.2.3-4-ga26e
61219 +
61220 +
61221 +From 864924cf6a99b232cb157ffc3dc2be5833b4b66e Mon Sep 17 00:00:00 2001
61222 +From: "Jason A. Donenfeld" <Jason@×××××.com>
61223 +Date: Tue, 19 May 2020 22:49:29 -0600
61224 +Subject: wireguard: queueing: preserve flow hash across packet scrubbing
61225 +MIME-Version: 1.0
61226 +Content-Type: text/plain; charset=UTF-8
61227 +Content-Transfer-Encoding: 8bit
61228 +
61229 +commit c78a0b4a78839d572d8a80f6a62221c0d7843135 upstream.
61230 +
61231 +It's important that we clear most header fields during encapsulation and
61232 +decapsulation, because the packet is substantially changed, and we don't
61233 +want any info leak or logic bug due to an accidental correlation. But,
61234 +for encapsulation, it's wrong to clear skb->hash, since it's used by
61235 +fq_codel and flow dissection in general. Without it, classification does
61236 +not proceed as usual. This change might make it easier to estimate the
61237 +number of innerflows by examining clustering of out of order packets,
61238 +but this shouldn't open up anything that can't already be inferred
61239 +otherwise (e.g. syn packet size inference), and fq_codel can be disabled
61240 +anyway.
61241 +
61242 +Furthermore, it might be the case that the hash isn't used or queried at
61243 +all until after wireguard transmits the encrypted UDP packet, which
61244 +means skb->hash might still be zero at this point, and thus no hash
61245 +taken over the inner packet data. In order to address this situation, we
61246 +force a calculation of skb->hash before encrypting packet data.
61247 +
61248 +Of course this means that fq_codel might transmit packets slightly more
61249 +out of order than usual. Toke did some testing on beefy machines with
61250 +high quantities of parallel flows and found that increasing the
61251 +reply-attack counter to 8192 takes care of the most pathological cases
61252 +pretty well.
61253 +
61254 +Reported-by: Dave Taht <dave.taht@×××××.com>
61255 +Reviewed-and-tested-by: Toke Høiland-Jørgensen <toke@××××.dk>
61256 +Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
61257 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
61258 +Signed-off-by: David S. Miller <davem@×××××××××.net>
61259 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
61260 +---
61261 + drivers/net/wireguard/messages.h | 2 +-
61262 + drivers/net/wireguard/queueing.h | 10 +++++++++-
61263 + drivers/net/wireguard/receive.c | 2 +-
61264 + drivers/net/wireguard/send.c | 7 ++++++-
61265 + 4 files changed, 17 insertions(+), 4 deletions(-)
61266 +
61267 +diff --git a/drivers/net/wireguard/messages.h b/drivers/net/wireguard/messages.h
61268 +index b8a7b9ce32ba..208da72673fc 100644
61269 +--- a/drivers/net/wireguard/messages.h
61270 ++++ b/drivers/net/wireguard/messages.h
61271 +@@ -32,7 +32,7 @@ enum cookie_values {
61272 + };
61273 +
61274 + enum counter_values {
61275 +- COUNTER_BITS_TOTAL = 2048,
61276 ++ COUNTER_BITS_TOTAL = 8192,
61277 + COUNTER_REDUNDANT_BITS = BITS_PER_LONG,
61278 + COUNTER_WINDOW_SIZE = COUNTER_BITS_TOTAL - COUNTER_REDUNDANT_BITS
61279 + };
61280 +diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h
61281 +index 3432232afe06..c58df439dbbe 100644
61282 +--- a/drivers/net/wireguard/queueing.h
61283 ++++ b/drivers/net/wireguard/queueing.h
61284 +@@ -87,12 +87,20 @@ static inline bool wg_check_packet_protocol(struct sk_buff *skb)
61285 + return real_protocol && skb->protocol == real_protocol;
61286 + }
61287 +
61288 +-static inline void wg_reset_packet(struct sk_buff *skb)
61289 ++static inline void wg_reset_packet(struct sk_buff *skb, bool encapsulating)
61290 + {
61291 ++ u8 l4_hash = skb->l4_hash;
61292 ++ u8 sw_hash = skb->sw_hash;
61293 ++ u32 hash = skb->hash;
61294 + skb_scrub_packet(skb, true);
61295 + memset(&skb->headers_start, 0,
61296 + offsetof(struct sk_buff, headers_end) -
61297 + offsetof(struct sk_buff, headers_start));
61298 ++ if (encapsulating) {
61299 ++ skb->l4_hash = l4_hash;
61300 ++ skb->sw_hash = sw_hash;
61301 ++ skb->hash = hash;
61302 ++ }
61303 + skb->queue_mapping = 0;
61304 + skb->nohdr = 0;
61305 + skb->peeked = 0;
61306 +diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
61307 +index 3bb5b9ae7cd1..d0eebd90c9d5 100644
61308 +--- a/drivers/net/wireguard/receive.c
61309 ++++ b/drivers/net/wireguard/receive.c
61310 +@@ -484,7 +484,7 @@ int wg_packet_rx_poll(struct napi_struct *napi, int budget)
61311 + if (unlikely(wg_socket_endpoint_from_skb(&endpoint, skb)))
61312 + goto next;
61313 +
61314 +- wg_reset_packet(skb);
61315 ++ wg_reset_packet(skb, false);
61316 + wg_packet_consume_data_done(peer, skb, &endpoint);
61317 + free = false;
61318 +
61319 +diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c
61320 +index 6687db699803..2f5119ff93d8 100644
61321 +--- a/drivers/net/wireguard/send.c
61322 ++++ b/drivers/net/wireguard/send.c
61323 +@@ -167,6 +167,11 @@ static bool encrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair)
61324 + struct sk_buff *trailer;
61325 + int num_frags;
61326 +
61327 ++ /* Force hash calculation before encryption so that flow analysis is
61328 ++ * consistent over the inner packet.
61329 ++ */
61330 ++ skb_get_hash(skb);
61331 ++
61332 + /* Calculate lengths. */
61333 + padding_len = calculate_skb_padding(skb);
61334 + trailer_len = padding_len + noise_encrypted_len(0);
61335 +@@ -295,7 +300,7 @@ void wg_packet_encrypt_worker(struct work_struct *work)
61336 + skb_list_walk_safe(first, skb, next) {
61337 + if (likely(encrypt_packet(skb,
61338 + PACKET_CB(first)->keypair))) {
61339 +- wg_reset_packet(skb);
61340 ++ wg_reset_packet(skb, true);
61341 + } else {
61342 + state = PACKET_STATE_DEAD;
61343 + break;
61344 +--
61345 +cgit v1.2.3-4-ga26e
61346 +
61347 +
61348 +From e9f6b556c7466323dd3700779e2f28cf0df2f186 Mon Sep 17 00:00:00 2001
61349 +From: "Jason A. Donenfeld" <Jason@×××××.com>
61350 +Date: Tue, 19 May 2020 22:49:30 -0600
61351 +Subject: wireguard: noise: separate receive counter from send counter
61352 +
61353 +commit a9e90d9931f3a474f04bab782ccd9d77904941e9 upstream.
61354 +
61355 +In "wireguard: queueing: preserve flow hash across packet scrubbing", we
61356 +were required to slightly increase the size of the receive replay
61357 +counter to something still fairly small, but an increase nonetheless.
61358 +It turns out that we can recoup some of the additional memory overhead
61359 +by splitting up the prior union type into two distinct types. Before, we
61360 +used the same "noise_counter" union for both sending and receiving, with
61361 +sending just using a simple atomic64_t, while receiving used the full
61362 +replay counter checker. This meant that most of the memory being
61363 +allocated for the sending counter was being wasted. Since the old
61364 +"noise_counter" type increased in size in the prior commit, now is a
61365 +good time to split up that union type into a distinct "noise_replay_
61366 +counter" for receiving and a boring atomic64_t for sending, each using
61367 +neither more nor less memory than required.
61368 +
61369 +Also, since sometimes the replay counter is accessed without
61370 +necessitating additional accesses to the bitmap, we can reduce cache
61371 +misses by hoisting the always-necessary lock above the bitmap in the
61372 +struct layout. We also change a "noise_replay_counter" stack allocation
61373 +to kmalloc in a -DDEBUG selftest so that KASAN doesn't trigger a stack
61374 +frame warning.
61375 +
61376 +All and all, removing a bit of abstraction in this commit makes the code
61377 +simpler and smaller, in addition to the motivating memory usage
61378 +recuperation. For example, passing around raw "noise_symmetric_key"
61379 +structs is something that really only makes sense within noise.c, in the
61380 +one place where the sending and receiving keys can safely be thought of
61381 +as the same type of object; subsequent to that, it's important that we
61382 +uniformly access these through keypair->{sending,receiving}, where their
61383 +distinct roles are always made explicit. So this patch allows us to draw
61384 +that distinction clearly as well.
61385 +
61386 +Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
61387 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
61388 +Signed-off-by: David S. Miller <davem@×××××××××.net>
61389 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
61390 +---
61391 + drivers/net/wireguard/noise.c | 16 +++---------
61392 + drivers/net/wireguard/noise.h | 14 +++++------
61393 + drivers/net/wireguard/receive.c | 42 ++++++++++++++++----------------
61394 + drivers/net/wireguard/selftest/counter.c | 17 +++++++++----
61395 + drivers/net/wireguard/send.c | 12 ++++-----
61396 + 5 files changed, 48 insertions(+), 53 deletions(-)
61397 +
61398 +diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c
61399 +index 07eb438a6dee..626433690abb 100644
61400 +--- a/drivers/net/wireguard/noise.c
61401 ++++ b/drivers/net/wireguard/noise.c
61402 +@@ -104,6 +104,7 @@ static struct noise_keypair *keypair_create(struct wg_peer *peer)
61403 +
61404 + if (unlikely(!keypair))
61405 + return NULL;
61406 ++ spin_lock_init(&keypair->receiving_counter.lock);
61407 + keypair->internal_id = atomic64_inc_return(&keypair_counter);
61408 + keypair->entry.type = INDEX_HASHTABLE_KEYPAIR;
61409 + keypair->entry.peer = peer;
61410 +@@ -358,25 +359,16 @@ out:
61411 + memzero_explicit(output, BLAKE2S_HASH_SIZE + 1);
61412 + }
61413 +
61414 +-static void symmetric_key_init(struct noise_symmetric_key *key)
61415 +-{
61416 +- spin_lock_init(&key->counter.receive.lock);
61417 +- atomic64_set(&key->counter.counter, 0);
61418 +- memset(key->counter.receive.backtrack, 0,
61419 +- sizeof(key->counter.receive.backtrack));
61420 +- key->birthdate = ktime_get_coarse_boottime_ns();
61421 +- key->is_valid = true;
61422 +-}
61423 +-
61424 + static void derive_keys(struct noise_symmetric_key *first_dst,
61425 + struct noise_symmetric_key *second_dst,
61426 + const u8 chaining_key[NOISE_HASH_LEN])
61427 + {
61428 ++ u64 birthdate = ktime_get_coarse_boottime_ns();
61429 + kdf(first_dst->key, second_dst->key, NULL, NULL,
61430 + NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, 0,
61431 + chaining_key);
61432 +- symmetric_key_init(first_dst);
61433 +- symmetric_key_init(second_dst);
61434 ++ first_dst->birthdate = second_dst->birthdate = birthdate;
61435 ++ first_dst->is_valid = second_dst->is_valid = true;
61436 + }
61437 +
61438 + static bool __must_check mix_dh(u8 chaining_key[NOISE_HASH_LEN],
61439 +diff --git a/drivers/net/wireguard/noise.h b/drivers/net/wireguard/noise.h
61440 +index f532d59d3f19..c527253dba80 100644
61441 +--- a/drivers/net/wireguard/noise.h
61442 ++++ b/drivers/net/wireguard/noise.h
61443 +@@ -15,18 +15,14 @@
61444 + #include <linux/mutex.h>
61445 + #include <linux/kref.h>
61446 +
61447 +-union noise_counter {
61448 +- struct {
61449 +- u64 counter;
61450 +- unsigned long backtrack[COUNTER_BITS_TOTAL / BITS_PER_LONG];
61451 +- spinlock_t lock;
61452 +- } receive;
61453 +- atomic64_t counter;
61454 ++struct noise_replay_counter {
61455 ++ u64 counter;
61456 ++ spinlock_t lock;
61457 ++ unsigned long backtrack[COUNTER_BITS_TOTAL / BITS_PER_LONG];
61458 + };
61459 +
61460 + struct noise_symmetric_key {
61461 + u8 key[NOISE_SYMMETRIC_KEY_LEN];
61462 +- union noise_counter counter;
61463 + u64 birthdate;
61464 + bool is_valid;
61465 + };
61466 +@@ -34,7 +30,9 @@ struct noise_symmetric_key {
61467 + struct noise_keypair {
61468 + struct index_hashtable_entry entry;
61469 + struct noise_symmetric_key sending;
61470 ++ atomic64_t sending_counter;
61471 + struct noise_symmetric_key receiving;
61472 ++ struct noise_replay_counter receiving_counter;
61473 + __le32 remote_index;
61474 + bool i_am_the_initiator;
61475 + struct kref refcount;
61476 +diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
61477 +index d0eebd90c9d5..91438144e4f7 100644
61478 +--- a/drivers/net/wireguard/receive.c
61479 ++++ b/drivers/net/wireguard/receive.c
61480 +@@ -245,20 +245,20 @@ static void keep_key_fresh(struct wg_peer *peer)
61481 + }
61482 + }
61483 +
61484 +-static bool decrypt_packet(struct sk_buff *skb, struct noise_symmetric_key *key)
61485 ++static bool decrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair)
61486 + {
61487 + struct scatterlist sg[MAX_SKB_FRAGS + 8];
61488 + struct sk_buff *trailer;
61489 + unsigned int offset;
61490 + int num_frags;
61491 +
61492 +- if (unlikely(!key))
61493 ++ if (unlikely(!keypair))
61494 + return false;
61495 +
61496 +- if (unlikely(!READ_ONCE(key->is_valid) ||
61497 +- wg_birthdate_has_expired(key->birthdate, REJECT_AFTER_TIME) ||
61498 +- key->counter.receive.counter >= REJECT_AFTER_MESSAGES)) {
61499 +- WRITE_ONCE(key->is_valid, false);
61500 ++ if (unlikely(!READ_ONCE(keypair->receiving.is_valid) ||
61501 ++ wg_birthdate_has_expired(keypair->receiving.birthdate, REJECT_AFTER_TIME) ||
61502 ++ keypair->receiving_counter.counter >= REJECT_AFTER_MESSAGES)) {
61503 ++ WRITE_ONCE(keypair->receiving.is_valid, false);
61504 + return false;
61505 + }
61506 +
61507 +@@ -283,7 +283,7 @@ static bool decrypt_packet(struct sk_buff *skb, struct noise_symmetric_key *key)
61508 +
61509 + if (!chacha20poly1305_decrypt_sg_inplace(sg, skb->len, NULL, 0,
61510 + PACKET_CB(skb)->nonce,
61511 +- key->key))
61512 ++ keypair->receiving.key))
61513 + return false;
61514 +
61515 + /* Another ugly situation of pushing and pulling the header so as to
61516 +@@ -298,41 +298,41 @@ static bool decrypt_packet(struct sk_buff *skb, struct noise_symmetric_key *key)
61517 + }
61518 +
61519 + /* This is RFC6479, a replay detection bitmap algorithm that avoids bitshifts */
61520 +-static bool counter_validate(union noise_counter *counter, u64 their_counter)
61521 ++static bool counter_validate(struct noise_replay_counter *counter, u64 their_counter)
61522 + {
61523 + unsigned long index, index_current, top, i;
61524 + bool ret = false;
61525 +
61526 +- spin_lock_bh(&counter->receive.lock);
61527 ++ spin_lock_bh(&counter->lock);
61528 +
61529 +- if (unlikely(counter->receive.counter >= REJECT_AFTER_MESSAGES + 1 ||
61530 ++ if (unlikely(counter->counter >= REJECT_AFTER_MESSAGES + 1 ||
61531 + their_counter >= REJECT_AFTER_MESSAGES))
61532 + goto out;
61533 +
61534 + ++their_counter;
61535 +
61536 + if (unlikely((COUNTER_WINDOW_SIZE + their_counter) <
61537 +- counter->receive.counter))
61538 ++ counter->counter))
61539 + goto out;
61540 +
61541 + index = their_counter >> ilog2(BITS_PER_LONG);
61542 +
61543 +- if (likely(their_counter > counter->receive.counter)) {
61544 +- index_current = counter->receive.counter >> ilog2(BITS_PER_LONG);
61545 ++ if (likely(their_counter > counter->counter)) {
61546 ++ index_current = counter->counter >> ilog2(BITS_PER_LONG);
61547 + top = min_t(unsigned long, index - index_current,
61548 + COUNTER_BITS_TOTAL / BITS_PER_LONG);
61549 + for (i = 1; i <= top; ++i)
61550 +- counter->receive.backtrack[(i + index_current) &
61551 ++ counter->backtrack[(i + index_current) &
61552 + ((COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1)] = 0;
61553 +- counter->receive.counter = their_counter;
61554 ++ counter->counter = their_counter;
61555 + }
61556 +
61557 + index &= (COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1;
61558 + ret = !test_and_set_bit(their_counter & (BITS_PER_LONG - 1),
61559 +- &counter->receive.backtrack[index]);
61560 ++ &counter->backtrack[index]);
61561 +
61562 + out:
61563 +- spin_unlock_bh(&counter->receive.lock);
61564 ++ spin_unlock_bh(&counter->lock);
61565 + return ret;
61566 + }
61567 +
61568 +@@ -472,12 +472,12 @@ int wg_packet_rx_poll(struct napi_struct *napi, int budget)
61569 + if (unlikely(state != PACKET_STATE_CRYPTED))
61570 + goto next;
61571 +
61572 +- if (unlikely(!counter_validate(&keypair->receiving.counter,
61573 ++ if (unlikely(!counter_validate(&keypair->receiving_counter,
61574 + PACKET_CB(skb)->nonce))) {
61575 + net_dbg_ratelimited("%s: Packet has invalid nonce %llu (max %llu)\n",
61576 + peer->device->dev->name,
61577 + PACKET_CB(skb)->nonce,
61578 +- keypair->receiving.counter.receive.counter);
61579 ++ keypair->receiving_counter.counter);
61580 + goto next;
61581 + }
61582 +
61583 +@@ -511,8 +511,8 @@ void wg_packet_decrypt_worker(struct work_struct *work)
61584 + struct sk_buff *skb;
61585 +
61586 + while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) {
61587 +- enum packet_state state = likely(decrypt_packet(skb,
61588 +- &PACKET_CB(skb)->keypair->receiving)) ?
61589 ++ enum packet_state state =
61590 ++ likely(decrypt_packet(skb, PACKET_CB(skb)->keypair)) ?
61591 + PACKET_STATE_CRYPTED : PACKET_STATE_DEAD;
61592 + wg_queue_enqueue_per_peer_napi(skb, state);
61593 + if (need_resched())
61594 +diff --git a/drivers/net/wireguard/selftest/counter.c b/drivers/net/wireguard/selftest/counter.c
61595 +index f4fbb9072ed7..ec3c156bf91b 100644
61596 +--- a/drivers/net/wireguard/selftest/counter.c
61597 ++++ b/drivers/net/wireguard/selftest/counter.c
61598 +@@ -6,18 +6,24 @@
61599 + #ifdef DEBUG
61600 + bool __init wg_packet_counter_selftest(void)
61601 + {
61602 ++ struct noise_replay_counter *counter;
61603 + unsigned int test_num = 0, i;
61604 +- union noise_counter counter;
61605 + bool success = true;
61606 +
61607 +-#define T_INIT do { \
61608 +- memset(&counter, 0, sizeof(union noise_counter)); \
61609 +- spin_lock_init(&counter.receive.lock); \
61610 ++ counter = kmalloc(sizeof(*counter), GFP_KERNEL);
61611 ++ if (unlikely(!counter)) {
61612 ++ pr_err("nonce counter self-test malloc: FAIL\n");
61613 ++ return false;
61614 ++ }
61615 ++
61616 ++#define T_INIT do { \
61617 ++ memset(counter, 0, sizeof(*counter)); \
61618 ++ spin_lock_init(&counter->lock); \
61619 + } while (0)
61620 + #define T_LIM (COUNTER_WINDOW_SIZE + 1)
61621 + #define T(n, v) do { \
61622 + ++test_num; \
61623 +- if (counter_validate(&counter, n) != (v)) { \
61624 ++ if (counter_validate(counter, n) != (v)) { \
61625 + pr_err("nonce counter self-test %u: FAIL\n", \
61626 + test_num); \
61627 + success = false; \
61628 +@@ -99,6 +105,7 @@ bool __init wg_packet_counter_selftest(void)
61629 +
61630 + if (success)
61631 + pr_info("nonce counter self-tests: pass\n");
61632 ++ kfree(counter);
61633 + return success;
61634 + }
61635 + #endif
61636 +diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c
61637 +index 2f5119ff93d8..f74b9341ab0f 100644
61638 +--- a/drivers/net/wireguard/send.c
61639 ++++ b/drivers/net/wireguard/send.c
61640 +@@ -129,7 +129,7 @@ static void keep_key_fresh(struct wg_peer *peer)
61641 + rcu_read_lock_bh();
61642 + keypair = rcu_dereference_bh(peer->keypairs.current_keypair);
61643 + send = keypair && READ_ONCE(keypair->sending.is_valid) &&
61644 +- (atomic64_read(&keypair->sending.counter.counter) > REKEY_AFTER_MESSAGES ||
61645 ++ (atomic64_read(&keypair->sending_counter) > REKEY_AFTER_MESSAGES ||
61646 + (keypair->i_am_the_initiator &&
61647 + wg_birthdate_has_expired(keypair->sending.birthdate, REKEY_AFTER_TIME)));
61648 + rcu_read_unlock_bh();
61649 +@@ -349,7 +349,6 @@ void wg_packet_purge_staged_packets(struct wg_peer *peer)
61650 +
61651 + void wg_packet_send_staged_packets(struct wg_peer *peer)
61652 + {
61653 +- struct noise_symmetric_key *key;
61654 + struct noise_keypair *keypair;
61655 + struct sk_buff_head packets;
61656 + struct sk_buff *skb;
61657 +@@ -369,10 +368,9 @@ void wg_packet_send_staged_packets(struct wg_peer *peer)
61658 + rcu_read_unlock_bh();
61659 + if (unlikely(!keypair))
61660 + goto out_nokey;
61661 +- key = &keypair->sending;
61662 +- if (unlikely(!READ_ONCE(key->is_valid)))
61663 ++ if (unlikely(!READ_ONCE(keypair->sending.is_valid)))
61664 + goto out_nokey;
61665 +- if (unlikely(wg_birthdate_has_expired(key->birthdate,
61666 ++ if (unlikely(wg_birthdate_has_expired(keypair->sending.birthdate,
61667 + REJECT_AFTER_TIME)))
61668 + goto out_invalid;
61669 +
61670 +@@ -387,7 +385,7 @@ void wg_packet_send_staged_packets(struct wg_peer *peer)
61671 + */
61672 + PACKET_CB(skb)->ds = ip_tunnel_ecn_encap(0, ip_hdr(skb), skb);
61673 + PACKET_CB(skb)->nonce =
61674 +- atomic64_inc_return(&key->counter.counter) - 1;
61675 ++ atomic64_inc_return(&keypair->sending_counter) - 1;
61676 + if (unlikely(PACKET_CB(skb)->nonce >= REJECT_AFTER_MESSAGES))
61677 + goto out_invalid;
61678 + }
61679 +@@ -399,7 +397,7 @@ void wg_packet_send_staged_packets(struct wg_peer *peer)
61680 + return;
61681 +
61682 + out_invalid:
61683 +- WRITE_ONCE(key->is_valid, false);
61684 ++ WRITE_ONCE(keypair->sending.is_valid, false);
61685 + out_nokey:
61686 + wg_noise_keypair_put(keypair, false);
61687 +
61688 +--
61689 +cgit v1.2.3-4-ga26e
61690 +
61691 +
61692 +From 4518c919a92a4d0c55f9d74696ba95327570330b Mon Sep 17 00:00:00 2001
61693 +From: Frank Werner-Krippendorf <mail@××××××.ch>
61694 +Date: Tue, 23 Jun 2020 03:59:44 -0600
61695 +Subject: wireguard: noise: do not assign initiation time in if condition
61696 +
61697 +commit 558b353c9c2a717509f291c066c6bd8f5f5e21be upstream.
61698 +
61699 +Fixes an error condition reported by checkpatch.pl which caused by
61700 +assigning a variable in an if condition in wg_noise_handshake_consume_
61701 +initiation().
61702 +
61703 +Signed-off-by: Frank Werner-Krippendorf <mail@××××××.ch>
61704 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
61705 +Signed-off-by: David S. Miller <davem@×××××××××.net>
61706 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
61707 +---
61708 + drivers/net/wireguard/noise.c | 4 ++--
61709 + 1 file changed, 2 insertions(+), 2 deletions(-)
61710 +
61711 +diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c
61712 +index 626433690abb..201a22681945 100644
61713 +--- a/drivers/net/wireguard/noise.c
61714 ++++ b/drivers/net/wireguard/noise.c
61715 +@@ -617,8 +617,8 @@ wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src,
61716 + memcpy(handshake->hash, hash, NOISE_HASH_LEN);
61717 + memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN);
61718 + handshake->remote_index = src->sender_index;
61719 +- if ((s64)(handshake->last_initiation_consumption -
61720 +- (initiation_consumption = ktime_get_coarse_boottime_ns())) < 0)
61721 ++ initiation_consumption = ktime_get_coarse_boottime_ns();
61722 ++ if ((s64)(handshake->last_initiation_consumption - initiation_consumption) < 0)
61723 + handshake->last_initiation_consumption = initiation_consumption;
61724 + handshake->state = HANDSHAKE_CONSUMED_INITIATION;
61725 + up_write(&handshake->lock);
61726 +--
61727 +cgit v1.2.3-4-ga26e
61728 +
61729 +
61730 +From a7c9cf5dc057c07e11e2283287310950c45f0610 Mon Sep 17 00:00:00 2001
61731 +From: "Jason A. Donenfeld" <Jason@×××××.com>
61732 +Date: Tue, 23 Jun 2020 03:59:45 -0600
61733 +Subject: wireguard: device: avoid circular netns references
61734 +
61735 +commit 900575aa33a3eaaef802b31de187a85c4a4b4bd0 upstream.
61736 +
61737 +Before, we took a reference to the creating netns if the new netns was
61738 +different. This caused issues with circular references, with two
61739 +wireguard interfaces swapping namespaces. The solution is to rather not
61740 +take any extra references at all, but instead simply invalidate the
61741 +creating netns pointer when that netns is deleted.
61742 +
61743 +In order to prevent this from happening again, this commit improves the
61744 +rough object leak tracking by allowing it to account for created and
61745 +destroyed interfaces, aside from just peers and keys. That then makes it
61746 +possible to check for the object leak when having two interfaces take a
61747 +reference to each others' namespaces.
61748 +
61749 +Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
61750 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
61751 +Signed-off-by: David S. Miller <davem@×××××××××.net>
61752 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
61753 +---
61754 + drivers/net/wireguard/device.c | 58 ++++++++++++++----------------
61755 + drivers/net/wireguard/device.h | 3 +-
61756 + drivers/net/wireguard/netlink.c | 14 +++++---
61757 + drivers/net/wireguard/socket.c | 25 +++++++++----
61758 + tools/testing/selftests/wireguard/netns.sh | 13 ++++++-
61759 + 5 files changed, 67 insertions(+), 46 deletions(-)
61760 +
61761 +diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c
61762 +index 3ac3f8570ca1..a8f151b1b5fa 100644
61763 +--- a/drivers/net/wireguard/device.c
61764 ++++ b/drivers/net/wireguard/device.c
61765 +@@ -45,17 +45,18 @@ static int wg_open(struct net_device *dev)
61766 + if (dev_v6)
61767 + dev_v6->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_NONE;
61768 +
61769 ++ mutex_lock(&wg->device_update_lock);
61770 + ret = wg_socket_init(wg, wg->incoming_port);
61771 + if (ret < 0)
61772 +- return ret;
61773 +- mutex_lock(&wg->device_update_lock);
61774 ++ goto out;
61775 + list_for_each_entry(peer, &wg->peer_list, peer_list) {
61776 + wg_packet_send_staged_packets(peer);
61777 + if (peer->persistent_keepalive_interval)
61778 + wg_packet_send_keepalive(peer);
61779 + }
61780 ++out:
61781 + mutex_unlock(&wg->device_update_lock);
61782 +- return 0;
61783 ++ return ret;
61784 + }
61785 +
61786 + #ifdef CONFIG_PM_SLEEP
61787 +@@ -225,6 +226,7 @@ static void wg_destruct(struct net_device *dev)
61788 + list_del(&wg->device_list);
61789 + rtnl_unlock();
61790 + mutex_lock(&wg->device_update_lock);
61791 ++ rcu_assign_pointer(wg->creating_net, NULL);
61792 + wg->incoming_port = 0;
61793 + wg_socket_reinit(wg, NULL, NULL);
61794 + /* The final references are cleared in the below calls to destroy_workqueue. */
61795 +@@ -240,13 +242,11 @@ static void wg_destruct(struct net_device *dev)
61796 + skb_queue_purge(&wg->incoming_handshakes);
61797 + free_percpu(dev->tstats);
61798 + free_percpu(wg->incoming_handshakes_worker);
61799 +- if (wg->have_creating_net_ref)
61800 +- put_net(wg->creating_net);
61801 + kvfree(wg->index_hashtable);
61802 + kvfree(wg->peer_hashtable);
61803 + mutex_unlock(&wg->device_update_lock);
61804 +
61805 +- pr_debug("%s: Interface deleted\n", dev->name);
61806 ++ pr_debug("%s: Interface destroyed\n", dev->name);
61807 + free_netdev(dev);
61808 + }
61809 +
61810 +@@ -292,7 +292,7 @@ static int wg_newlink(struct net *src_net, struct net_device *dev,
61811 + struct wg_device *wg = netdev_priv(dev);
61812 + int ret = -ENOMEM;
61813 +
61814 +- wg->creating_net = src_net;
61815 ++ rcu_assign_pointer(wg->creating_net, src_net);
61816 + init_rwsem(&wg->static_identity.lock);
61817 + mutex_init(&wg->socket_update_lock);
61818 + mutex_init(&wg->device_update_lock);
61819 +@@ -393,30 +393,26 @@ static struct rtnl_link_ops link_ops __read_mostly = {
61820 + .newlink = wg_newlink,
61821 + };
61822 +
61823 +-static int wg_netdevice_notification(struct notifier_block *nb,
61824 +- unsigned long action, void *data)
61825 ++static void wg_netns_pre_exit(struct net *net)
61826 + {
61827 +- struct net_device *dev = ((struct netdev_notifier_info *)data)->dev;
61828 +- struct wg_device *wg = netdev_priv(dev);
61829 +-
61830 +- ASSERT_RTNL();
61831 +-
61832 +- if (action != NETDEV_REGISTER || dev->netdev_ops != &netdev_ops)
61833 +- return 0;
61834 ++ struct wg_device *wg;
61835 +
61836 +- if (dev_net(dev) == wg->creating_net && wg->have_creating_net_ref) {
61837 +- put_net(wg->creating_net);
61838 +- wg->have_creating_net_ref = false;
61839 +- } else if (dev_net(dev) != wg->creating_net &&
61840 +- !wg->have_creating_net_ref) {
61841 +- wg->have_creating_net_ref = true;
61842 +- get_net(wg->creating_net);
61843 ++ rtnl_lock();
61844 ++ list_for_each_entry(wg, &device_list, device_list) {
61845 ++ if (rcu_access_pointer(wg->creating_net) == net) {
61846 ++ pr_debug("%s: Creating namespace exiting\n", wg->dev->name);
61847 ++ netif_carrier_off(wg->dev);
61848 ++ mutex_lock(&wg->device_update_lock);
61849 ++ rcu_assign_pointer(wg->creating_net, NULL);
61850 ++ wg_socket_reinit(wg, NULL, NULL);
61851 ++ mutex_unlock(&wg->device_update_lock);
61852 ++ }
61853 + }
61854 +- return 0;
61855 ++ rtnl_unlock();
61856 + }
61857 +
61858 +-static struct notifier_block netdevice_notifier = {
61859 +- .notifier_call = wg_netdevice_notification
61860 ++static struct pernet_operations pernet_ops = {
61861 ++ .pre_exit = wg_netns_pre_exit
61862 + };
61863 +
61864 + int __init wg_device_init(void)
61865 +@@ -429,18 +425,18 @@ int __init wg_device_init(void)
61866 + return ret;
61867 + #endif
61868 +
61869 +- ret = register_netdevice_notifier(&netdevice_notifier);
61870 ++ ret = register_pernet_device(&pernet_ops);
61871 + if (ret)
61872 + goto error_pm;
61873 +
61874 + ret = rtnl_link_register(&link_ops);
61875 + if (ret)
61876 +- goto error_netdevice;
61877 ++ goto error_pernet;
61878 +
61879 + return 0;
61880 +
61881 +-error_netdevice:
61882 +- unregister_netdevice_notifier(&netdevice_notifier);
61883 ++error_pernet:
61884 ++ unregister_pernet_device(&pernet_ops);
61885 + error_pm:
61886 + #ifdef CONFIG_PM_SLEEP
61887 + unregister_pm_notifier(&pm_notifier);
61888 +@@ -451,7 +447,7 @@ error_pm:
61889 + void wg_device_uninit(void)
61890 + {
61891 + rtnl_link_unregister(&link_ops);
61892 +- unregister_netdevice_notifier(&netdevice_notifier);
61893 ++ unregister_pernet_device(&pernet_ops);
61894 + #ifdef CONFIG_PM_SLEEP
61895 + unregister_pm_notifier(&pm_notifier);
61896 + #endif
61897 +diff --git a/drivers/net/wireguard/device.h b/drivers/net/wireguard/device.h
61898 +index b15a8be9d816..4d0144e16947 100644
61899 +--- a/drivers/net/wireguard/device.h
61900 ++++ b/drivers/net/wireguard/device.h
61901 +@@ -40,7 +40,7 @@ struct wg_device {
61902 + struct net_device *dev;
61903 + struct crypt_queue encrypt_queue, decrypt_queue;
61904 + struct sock __rcu *sock4, *sock6;
61905 +- struct net *creating_net;
61906 ++ struct net __rcu *creating_net;
61907 + struct noise_static_identity static_identity;
61908 + struct workqueue_struct *handshake_receive_wq, *handshake_send_wq;
61909 + struct workqueue_struct *packet_crypt_wq;
61910 +@@ -56,7 +56,6 @@ struct wg_device {
61911 + unsigned int num_peers, device_update_gen;
61912 + u32 fwmark;
61913 + u16 incoming_port;
61914 +- bool have_creating_net_ref;
61915 + };
61916 +
61917 + int wg_device_init(void);
61918 +diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c
61919 +index ab6cbe95a652..9756239416fd 100644
61920 +--- a/drivers/net/wireguard/netlink.c
61921 ++++ b/drivers/net/wireguard/netlink.c
61922 +@@ -517,11 +517,15 @@ static int wg_set_device(struct sk_buff *skb, struct genl_info *info)
61923 + if (flags & ~__WGDEVICE_F_ALL)
61924 + goto out;
61925 +
61926 +- ret = -EPERM;
61927 +- if ((info->attrs[WGDEVICE_A_LISTEN_PORT] ||
61928 +- info->attrs[WGDEVICE_A_FWMARK]) &&
61929 +- !ns_capable(wg->creating_net->user_ns, CAP_NET_ADMIN))
61930 +- goto out;
61931 ++ if (info->attrs[WGDEVICE_A_LISTEN_PORT] || info->attrs[WGDEVICE_A_FWMARK]) {
61932 ++ struct net *net;
61933 ++ rcu_read_lock();
61934 ++ net = rcu_dereference(wg->creating_net);
61935 ++ ret = !net || !ns_capable(net->user_ns, CAP_NET_ADMIN) ? -EPERM : 0;
61936 ++ rcu_read_unlock();
61937 ++ if (ret)
61938 ++ goto out;
61939 ++ }
61940 +
61941 + ++wg->device_update_gen;
61942 +
61943 +diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c
61944 +index f9018027fc13..c33e2c81635f 100644
61945 +--- a/drivers/net/wireguard/socket.c
61946 ++++ b/drivers/net/wireguard/socket.c
61947 +@@ -347,6 +347,7 @@ static void set_sock_opts(struct socket *sock)
61948 +
61949 + int wg_socket_init(struct wg_device *wg, u16 port)
61950 + {
61951 ++ struct net *net;
61952 + int ret;
61953 + struct udp_tunnel_sock_cfg cfg = {
61954 + .sk_user_data = wg,
61955 +@@ -371,37 +372,47 @@ int wg_socket_init(struct wg_device *wg, u16 port)
61956 + };
61957 + #endif
61958 +
61959 ++ rcu_read_lock();
61960 ++ net = rcu_dereference(wg->creating_net);
61961 ++ net = net ? maybe_get_net(net) : NULL;
61962 ++ rcu_read_unlock();
61963 ++ if (unlikely(!net))
61964 ++ return -ENONET;
61965 ++
61966 + #if IS_ENABLED(CONFIG_IPV6)
61967 + retry:
61968 + #endif
61969 +
61970 +- ret = udp_sock_create(wg->creating_net, &port4, &new4);
61971 ++ ret = udp_sock_create(net, &port4, &new4);
61972 + if (ret < 0) {
61973 + pr_err("%s: Could not create IPv4 socket\n", wg->dev->name);
61974 +- return ret;
61975 ++ goto out;
61976 + }
61977 + set_sock_opts(new4);
61978 +- setup_udp_tunnel_sock(wg->creating_net, new4, &cfg);
61979 ++ setup_udp_tunnel_sock(net, new4, &cfg);
61980 +
61981 + #if IS_ENABLED(CONFIG_IPV6)
61982 + if (ipv6_mod_enabled()) {
61983 + port6.local_udp_port = inet_sk(new4->sk)->inet_sport;
61984 +- ret = udp_sock_create(wg->creating_net, &port6, &new6);
61985 ++ ret = udp_sock_create(net, &port6, &new6);
61986 + if (ret < 0) {
61987 + udp_tunnel_sock_release(new4);
61988 + if (ret == -EADDRINUSE && !port && retries++ < 100)
61989 + goto retry;
61990 + pr_err("%s: Could not create IPv6 socket\n",
61991 + wg->dev->name);
61992 +- return ret;
61993 ++ goto out;
61994 + }
61995 + set_sock_opts(new6);
61996 +- setup_udp_tunnel_sock(wg->creating_net, new6, &cfg);
61997 ++ setup_udp_tunnel_sock(net, new6, &cfg);
61998 + }
61999 + #endif
62000 +
62001 + wg_socket_reinit(wg, new4->sk, new6 ? new6->sk : NULL);
62002 +- return 0;
62003 ++ ret = 0;
62004 ++out:
62005 ++ put_net(net);
62006 ++ return ret;
62007 + }
62008 +
62009 + void wg_socket_reinit(struct wg_device *wg, struct sock *new4,
62010 +diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
62011 +index 17a1f53ceba0..d77f4829f1e0 100755
62012 +--- a/tools/testing/selftests/wireguard/netns.sh
62013 ++++ b/tools/testing/selftests/wireguard/netns.sh
62014 +@@ -587,9 +587,20 @@ ip0 link set wg0 up
62015 + kill $ncat_pid
62016 + ip0 link del wg0
62017 +
62018 ++# Ensure there aren't circular reference loops
62019 ++ip1 link add wg1 type wireguard
62020 ++ip2 link add wg2 type wireguard
62021 ++ip1 link set wg1 netns $netns2
62022 ++ip2 link set wg2 netns $netns1
62023 ++pp ip netns delete $netns1
62024 ++pp ip netns delete $netns2
62025 ++pp ip netns add $netns1
62026 ++pp ip netns add $netns2
62027 ++
62028 ++sleep 2 # Wait for cleanup and grace periods
62029 + declare -A objects
62030 + while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do
62031 +- [[ $line =~ .*(wg[0-9]+:\ [A-Z][a-z]+\ [0-9]+)\ .*(created|destroyed).* ]] || continue
62032 ++ [[ $line =~ .*(wg[0-9]+:\ [A-Z][a-z]+\ ?[0-9]*)\ .*(created|destroyed).* ]] || continue
62033 + objects["${BASH_REMATCH[1]}"]+="${BASH_REMATCH[2]}"
62034 + done < /dev/kmsg
62035 + alldeleted=1
62036 +--
62037 +cgit v1.2.3-4-ga26e
62038 +
62039 +
62040 +From 532974db7e174f507d443838798be9c1dab1b6f6 Mon Sep 17 00:00:00 2001
62041 +From: "Jason A. Donenfeld" <Jason@×××××.com>
62042 +Date: Wed, 24 Jun 2020 16:06:03 -0600
62043 +Subject: wireguard: receive: account for napi_gro_receive never returning
62044 + GRO_DROP
62045 +
62046 +commit df08126e3833e9dca19e2407db5f5860a7c194fb upstream.
62047 +
62048 +The napi_gro_receive function no longer returns GRO_DROP ever, making
62049 +handling GRO_DROP dead code. This commit removes that dead code.
62050 +Further, it's not even clear that device drivers have any business in
62051 +taking action after passing off received packets; that's arguably out of
62052 +their hands.
62053 +
62054 +Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
62055 +Fixes: 6570bc79c0df ("net: core: use listified Rx for GRO_NORMAL in napi_gro_receive()")
62056 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
62057 +Signed-off-by: David S. Miller <davem@×××××××××.net>
62058 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
62059 +---
62060 + drivers/net/wireguard/receive.c | 10 ++--------
62061 + 1 file changed, 2 insertions(+), 8 deletions(-)
62062 +
62063 +diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
62064 +index 91438144e4f7..9b2ab6fc91cd 100644
62065 +--- a/drivers/net/wireguard/receive.c
62066 ++++ b/drivers/net/wireguard/receive.c
62067 +@@ -414,14 +414,8 @@ static void wg_packet_consume_data_done(struct wg_peer *peer,
62068 + if (unlikely(routed_peer != peer))
62069 + goto dishonest_packet_peer;
62070 +
62071 +- if (unlikely(napi_gro_receive(&peer->napi, skb) == GRO_DROP)) {
62072 +- ++dev->stats.rx_dropped;
62073 +- net_dbg_ratelimited("%s: Failed to give packet to userspace from peer %llu (%pISpfsc)\n",
62074 +- dev->name, peer->internal_id,
62075 +- &peer->endpoint.addr);
62076 +- } else {
62077 +- update_rx_stats(peer, message_data_len(len_before_trim));
62078 +- }
62079 ++ napi_gro_receive(&peer->napi, skb);
62080 ++ update_rx_stats(peer, message_data_len(len_before_trim));
62081 + return;
62082 +
62083 + dishonest_packet_peer:
62084 +--
62085 +cgit v1.2.3-4-ga26e
62086 +
62087 +
62088 +From 8e02acc7cbd610e08d4c9f0a9a2e825aadf40ca4 Mon Sep 17 00:00:00 2001
62089 +From: "Jason A. Donenfeld" <Jason@×××××.com>
62090 +Date: Mon, 29 Jun 2020 19:06:18 -0600
62091 +Subject: net: ip_tunnel: add header_ops for layer 3 devices
62092 +
62093 +commit 2606aff916854b61234bf85001be9777bab2d5f8 upstream.
62094 +
62095 +Some devices that take straight up layer 3 packets benefit from having a
62096 +shared header_ops so that AF_PACKET sockets can inject packets that are
62097 +recognized. This shared infrastructure will be used by other drivers
62098 +that currently can't inject packets using AF_PACKET. It also exposes the
62099 +parser function, as it is useful in standalone form too.
62100 +
62101 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
62102 +Acked-by: Willem de Bruijn <willemb@××××××.com>
62103 +Signed-off-by: David S. Miller <davem@×××××××××.net>
62104 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
62105 +---
62106 + include/net/ip_tunnels.h | 3 +++
62107 + net/ipv4/ip_tunnel_core.c | 18 ++++++++++++++++++
62108 + 2 files changed, 21 insertions(+)
62109 +
62110 +diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
62111 +index af645604f328..b0b03a9f7af9 100644
62112 +--- a/include/net/ip_tunnels.h
62113 ++++ b/include/net/ip_tunnels.h
62114 +@@ -289,6 +289,9 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
62115 + struct ip_tunnel_parm *p, __u32 fwmark);
62116 + void ip_tunnel_setup(struct net_device *dev, unsigned int net_id);
62117 +
62118 ++extern const struct header_ops ip_tunnel_header_ops;
62119 ++__be16 ip_tunnel_parse_protocol(const struct sk_buff *skb);
62120 ++
62121 + struct ip_tunnel_encap_ops {
62122 + size_t (*encap_hlen)(struct ip_tunnel_encap *e);
62123 + int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e,
62124 +diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
62125 +index 1452a97914a0..cfe21c3ddfc2 100644
62126 +--- a/net/ipv4/ip_tunnel_core.c
62127 ++++ b/net/ipv4/ip_tunnel_core.c
62128 +@@ -446,3 +446,21 @@ void ip_tunnel_unneed_metadata(void)
62129 + static_branch_dec(&ip_tunnel_metadata_cnt);
62130 + }
62131 + EXPORT_SYMBOL_GPL(ip_tunnel_unneed_metadata);
62132 ++
62133 ++/* Returns either the correct skb->protocol value, or 0 if invalid. */
62134 ++__be16 ip_tunnel_parse_protocol(const struct sk_buff *skb)
62135 ++{
62136 ++ if (skb_network_header(skb) >= skb->head &&
62137 ++ (skb_network_header(skb) + sizeof(struct iphdr)) <= skb_tail_pointer(skb) &&
62138 ++ ip_hdr(skb)->version == 4)
62139 ++ return htons(ETH_P_IP);
62140 ++ if (skb_network_header(skb) >= skb->head &&
62141 ++ (skb_network_header(skb) + sizeof(struct ipv6hdr)) <= skb_tail_pointer(skb) &&
62142 ++ ipv6_hdr(skb)->version == 6)
62143 ++ return htons(ETH_P_IPV6);
62144 ++ return 0;
62145 ++}
62146 ++EXPORT_SYMBOL(ip_tunnel_parse_protocol);
62147 ++
62148 ++const struct header_ops ip_tunnel_header_ops = { .parse_protocol = ip_tunnel_parse_protocol };
62149 ++EXPORT_SYMBOL(ip_tunnel_header_ops);
62150 +--
62151 +cgit v1.2.3-4-ga26e
62152 +
62153 +
62154 +From b5c967bc30a62f1a38ff796fa0db55ad239df9fd Mon Sep 17 00:00:00 2001
62155 +From: "Jason A. Donenfeld" <Jason@×××××.com>
62156 +Date: Mon, 29 Jun 2020 19:06:20 -0600
62157 +Subject: wireguard: implement header_ops->parse_protocol for AF_PACKET
62158 +
62159 +commit 01a4967c71c004f8ecad4ab57021348636502fa9 upstream.
62160 +
62161 +WireGuard uses skb->protocol to determine packet type, and bails out if
62162 +it's not set or set to something it's not expecting. For AF_PACKET
62163 +injection, we need to support its call chain of:
62164 +
62165 + packet_sendmsg -> packet_snd -> packet_parse_headers ->
62166 + dev_parse_header_protocol -> parse_protocol
62167 +
62168 +Without a valid parse_protocol, this returns zero, and wireguard then
62169 +rejects the skb. So, this wires up the ip_tunnel handler for layer 3
62170 +packets for that case.
62171 +
62172 +Reported-by: Hans Wippel <ndev@×××××.net>
62173 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
62174 +Signed-off-by: David S. Miller <davem@×××××××××.net>
62175 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
62176 +---
62177 + drivers/net/wireguard/device.c | 1 +
62178 + 1 file changed, 1 insertion(+)
62179 +
62180 +diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c
62181 +index a8f151b1b5fa..c9f65e96ccb0 100644
62182 +--- a/drivers/net/wireguard/device.c
62183 ++++ b/drivers/net/wireguard/device.c
62184 +@@ -262,6 +262,7 @@ static void wg_setup(struct net_device *dev)
62185 + max(sizeof(struct ipv6hdr), sizeof(struct iphdr));
62186 +
62187 + dev->netdev_ops = &netdev_ops;
62188 ++ dev->header_ops = &ip_tunnel_header_ops;
62189 + dev->hard_header_len = 0;
62190 + dev->addr_len = 0;
62191 + dev->needed_headroom = DATA_PACKET_HEAD_ROOM;
62192 +--
62193 +cgit v1.2.3-4-ga26e
62194 +
62195 +
62196 +From 2c492351adfcb97c1af2f4b521904f2bf14af1b8 Mon Sep 17 00:00:00 2001
62197 +From: "Jason A. Donenfeld" <Jason@×××××.com>
62198 +Date: Mon, 29 Jun 2020 19:06:21 -0600
62199 +Subject: wireguard: queueing: make use of ip_tunnel_parse_protocol
62200 +
62201 +commit 1a574074ae7d1d745c16f7710655f38a53174c27 upstream.
62202 +
62203 +Now that wg_examine_packet_protocol has been added for general
62204 +consumption as ip_tunnel_parse_protocol, it's possible to remove
62205 +wg_examine_packet_protocol and simply use the new
62206 +ip_tunnel_parse_protocol function directly.
62207 +
62208 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
62209 +Signed-off-by: David S. Miller <davem@×××××××××.net>
62210 +Signed-off-by: Jason A. Donenfeld <Jason@×××××.com>
62211 +---
62212 + drivers/net/wireguard/queueing.h | 19 ++-----------------
62213 + drivers/net/wireguard/receive.c | 2 +-
62214 + 2 files changed, 3 insertions(+), 18 deletions(-)
62215 +
62216 +diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h
62217 +index c58df439dbbe..dfb674e03076 100644
62218 +--- a/drivers/net/wireguard/queueing.h
62219 ++++ b/drivers/net/wireguard/queueing.h
62220 +@@ -11,6 +11,7 @@
62221 + #include <linux/skbuff.h>
62222 + #include <linux/ip.h>
62223 + #include <linux/ipv6.h>
62224 ++#include <net/ip_tunnels.h>
62225 +
62226 + struct wg_device;
62227 + struct wg_peer;
62228 +@@ -65,25 +66,9 @@ struct packet_cb {
62229 + #define PACKET_CB(skb) ((struct packet_cb *)((skb)->cb))
62230 + #define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer)
62231 +
62232 +-/* Returns either the correct skb->protocol value, or 0 if invalid. */
62233 +-static inline __be16 wg_examine_packet_protocol(struct sk_buff *skb)
62234 +-{
62235 +- if (skb_network_header(skb) >= skb->head &&
62236 +- (skb_network_header(skb) + sizeof(struct iphdr)) <=
62237 +- skb_tail_pointer(skb) &&
62238 +- ip_hdr(skb)->version == 4)
62239 +- return htons(ETH_P_IP);
62240 +- if (skb_network_header(skb) >= skb->head &&
62241 +- (skb_network_header(skb) + sizeof(struct ipv6hdr)) <=
62242 +- skb_tail_pointer(skb) &&
62243 +- ipv6_hdr(skb)->version == 6)
62244 +- return htons(ETH_P_IPV6);
62245 +- return 0;
62246 +-}
62247 +-
62248 + static inline bool wg_check_packet_protocol(struct sk_buff *skb)
62249 + {
62250 +- __be16 real_protocol = wg_examine_packet_protocol(skb);
62251 ++ __be16 real_protocol = ip_tunnel_parse_protocol(skb);
62252 + return real_protocol && skb->protocol == real_protocol;
62253 + }
62254 +
62255 +diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
62256 +index 9b2ab6fc91cd..2c9551ea6dc7 100644
62257 +--- a/drivers/net/wireguard/receive.c
62258 ++++ b/drivers/net/wireguard/receive.c
62259 +@@ -387,7 +387,7 @@ static void wg_packet_consume_data_done(struct wg_peer *peer,
62260 + */
62261 + skb->ip_summed = CHECKSUM_UNNECESSARY;
62262 + skb->csum_level = ~0; /* All levels */
62263 +- skb->protocol = wg_examine_packet_protocol(skb);
62264 ++ skb->protocol = ip_tunnel_parse_protocol(skb);
62265 + if (skb->protocol == htons(ETH_P_IP)) {
62266 + len = ntohs(ip_hdr(skb)->tot_len);
62267 + if (unlikely(len < sizeof(struct iphdr)))
62268 +--
62269 +cgit v1.2.3-4-ga26e
62270 +