1 |
arfrever 11/05/01 16:04:19 |
2 |
|
3 |
Added: libgcrypt-1.5.0_beta1-ctr.patch |
4 |
Log: |
5 |
Backport changes in CTR mode (bug #356325). |
6 |
|
7 |
(Portage version: 2.2.0_alpha30_p25/cvs/Linux x86_64) |
8 |
|
9 |
Revision Changes Path |
10 |
1.1 dev-libs/libgcrypt/files/libgcrypt-1.5.0_beta1-ctr.patch |
11 |
|
12 |
file : http://sources.gentoo.org/viewvc.cgi/gentoo-x86/dev-libs/libgcrypt/files/libgcrypt-1.5.0_beta1-ctr.patch?rev=1.1&view=markup |
13 |
plain: http://sources.gentoo.org/viewvc.cgi/gentoo-x86/dev-libs/libgcrypt/files/libgcrypt-1.5.0_beta1-ctr.patch?rev=1.1&content-type=text/plain |
14 |
|
15 |
Index: libgcrypt-1.5.0_beta1-ctr.patch |
16 |
=================================================================== |
17 |
http://git.gnupg.org/cgi-bin/gitweb.cgi?p=libgcrypt.git;a=commitdiff;h=2674140cdfdc59ce5ad0238177da1542f5df6e00 |
18 |
http://git.gnupg.org/cgi-bin/gitweb.cgi?p=libgcrypt.git;a=commitdiff;h=3c18377a55085faf4df745034056bac53565effa |
19 |
|
20 |
--- cipher/cipher.c |
21 |
+++ cipher/cipher.c |
22 |
@@ -190,6 +190,9 @@ |
23 |
void (*cbc_dec)(void *context, unsigned char *iv, |
24 |
void *outbuf_arg, const void *inbuf_arg, |
25 |
unsigned int nblocks); |
26 |
+ void (*ctr_enc)(void *context, unsigned char *iv, |
27 |
+ void *outbuf_arg, const void *inbuf_arg, |
28 |
+ unsigned int nblocks); |
29 |
} bulk; |
30 |
|
31 |
|
32 |
@@ -209,12 +212,16 @@ |
33 |
unsigned char iv[MAX_BLOCKSIZE]; |
34 |
} u_iv; |
35 |
|
36 |
+ /* The counter for CTR mode. This field is also used by AESWRAP and |
37 |
+ thus we can't use the U_IV union. */ |
38 |
+ union { |
39 |
+ cipher_context_alignment_t iv_align; |
40 |
+ unsigned char ctr[MAX_BLOCKSIZE]; |
41 |
+ } u_ctr; |
42 |
+ |
43 |
unsigned char lastiv[MAX_BLOCKSIZE]; |
44 |
int unused; /* Number of unused bytes in the IV. */ |
45 |
|
46 |
- unsigned char ctr[MAX_BLOCKSIZE]; /* For Counter (CTR) mode. */ |
47 |
- |
48 |
- |
49 |
/* What follows are two contexts of the cipher in use. The first |
50 |
one needs to be aligned well enough for the cipher operation |
51 |
whereas the second one is a copy created by cipher_setkey and |
52 |
@@ -814,6 +821,7 @@ |
53 |
h->bulk.cfb_dec = _gcry_aes_cfb_dec; |
54 |
h->bulk.cbc_enc = _gcry_aes_cbc_enc; |
55 |
h->bulk.cbc_dec = _gcry_aes_cbc_dec; |
56 |
+ h->bulk.ctr_enc = _gcry_aes_ctr_enc; |
57 |
break; |
58 |
#endif /*USE_AES*/ |
59 |
|
60 |
@@ -936,7 +944,7 @@ |
61 |
memset (&c->marks, 0, sizeof c->marks); |
62 |
memset (c->u_iv.iv, 0, c->cipher->blocksize); |
63 |
memset (c->lastiv, 0, c->cipher->blocksize); |
64 |
- memset (c->ctr, 0, c->cipher->blocksize); |
65 |
+ memset (c->u_ctr.ctr, 0, c->cipher->blocksize); |
66 |
} |
67 |
|
68 |
|
69 |
@@ -1441,35 +1449,50 @@ |
70 |
const unsigned char *inbuf, unsigned int inbuflen) |
71 |
{ |
72 |
unsigned int n; |
73 |
- unsigned char tmp[MAX_BLOCKSIZE]; |
74 |
int i; |
75 |
unsigned int blocksize = c->cipher->blocksize; |
76 |
+ unsigned int nblocks; |
77 |
|
78 |
if (outbuflen < inbuflen) |
79 |
return GPG_ERR_BUFFER_TOO_SHORT; |
80 |
|
81 |
- if ((inbuflen % blocksize)) |
82 |
- return GPG_ERR_INV_LENGTH; |
83 |
+ /* Use a bulk method if available. */ |
84 |
+ nblocks = inbuflen / blocksize; |
85 |
+ if (nblocks && c->bulk.ctr_enc) |
86 |
+ { |
87 |
+ c->bulk.ctr_enc (&c->context.c, c->u_ctr.ctr, outbuf, inbuf, nblocks); |
88 |
+ inbuf += nblocks * blocksize; |
89 |
+ outbuf += nblocks * blocksize; |
90 |
+ inbuflen -= nblocks * blocksize; |
91 |
+ } |
92 |
|
93 |
- for (n=0; n < inbuflen; n++) |
94 |
+ /* If we don't have a bulk method use the standard method. We also |
95 |
+ use this method for the a remaining partial block. */ |
96 |
+ if (inbuflen) |
97 |
{ |
98 |
- if ((n % blocksize) == 0) |
99 |
- { |
100 |
- c->cipher->encrypt (&c->context.c, tmp, c->ctr); |
101 |
+ unsigned char tmp[MAX_BLOCKSIZE]; |
102 |
|
103 |
- for (i = blocksize; i > 0; i--) |
104 |
- { |
105 |
- c->ctr[i-1]++; |
106 |
- if (c->ctr[i-1] != 0) |
107 |
- break; |
108 |
- } |
109 |
- } |
110 |
+ for (n=0; n < inbuflen; n++) |
111 |
+ { |
112 |
+ if ((n % blocksize) == 0) |
113 |
+ { |
114 |
+ c->cipher->encrypt (&c->context.c, tmp, c->u_ctr.ctr); |
115 |
+ |
116 |
+ for (i = blocksize; i > 0; i--) |
117 |
+ { |
118 |
+ c->u_ctr.ctr[i-1]++; |
119 |
+ if (c->u_ctr.ctr[i-1] != 0) |
120 |
+ break; |
121 |
+ } |
122 |
+ } |
123 |
+ |
124 |
+ /* XOR input with encrypted counter and store in output. */ |
125 |
+ outbuf[n] = inbuf[n] ^ tmp[n % blocksize]; |
126 |
+ } |
127 |
|
128 |
- /* XOR input with encrypted counter and store in output. */ |
129 |
- outbuf[n] = inbuf[n] ^ tmp[n % blocksize]; |
130 |
+ wipememory (tmp, sizeof tmp); |
131 |
} |
132 |
|
133 |
- wipememory (tmp, sizeof tmp); |
134 |
return 0; |
135 |
} |
136 |
|
137 |
@@ -1517,7 +1540,7 @@ |
138 |
|
139 |
r = outbuf; |
140 |
a = outbuf; /* We store A directly in OUTBUF. */ |
141 |
- b = c->ctr; /* B is also used to concatenate stuff. */ |
142 |
+ b = c->u_ctr.ctr; /* B is also used to concatenate stuff. */ |
143 |
|
144 |
/* If an IV has been set we use that IV as the Alternative Initial |
145 |
Value; if it has not been set we use the standard value. */ |
146 |
@@ -1593,7 +1616,7 @@ |
147 |
|
148 |
r = outbuf; |
149 |
a = c->lastiv; /* We use c->LASTIV as buffer for A. */ |
150 |
- b = c->ctr; /* B is also used to concatenate stuff. */ |
151 |
+ b = c->u_ctr.ctr; /* B is also used to concatenate stuff. */ |
152 |
|
153 |
/* Copy the inbuf to the outbuf and save A. */ |
154 |
memcpy (a, inbuf, 8); |
155 |
@@ -1861,9 +1884,9 @@ |
156 |
_gcry_cipher_setctr (gcry_cipher_hd_t hd, const void *ctr, size_t ctrlen) |
157 |
{ |
158 |
if (ctr && ctrlen == hd->cipher->blocksize) |
159 |
- memcpy (hd->ctr, ctr, hd->cipher->blocksize); |
160 |
+ memcpy (hd->u_ctr.ctr, ctr, hd->cipher->blocksize); |
161 |
else if (!ctr || !ctrlen) |
162 |
- memset (hd->ctr, 0, hd->cipher->blocksize); |
163 |
+ memset (hd->u_ctr.ctr, 0, hd->cipher->blocksize); |
164 |
else |
165 |
return gpg_error (GPG_ERR_INV_ARG); |
166 |
return 0; |
167 |
@@ -1923,9 +1946,9 @@ |
168 |
|
169 |
case GCRYCTL_SET_CTR: /* Deprecated; use gcry_cipher_setctr. */ |
170 |
if (buffer && buflen == h->cipher->blocksize) |
171 |
- memcpy (h->ctr, buffer, h->cipher->blocksize); |
172 |
+ memcpy (h->u_ctr.ctr, buffer, h->cipher->blocksize); |
173 |
else if (buffer == NULL || buflen == 0) |
174 |
- memset (h->ctr, 0, h->cipher->blocksize); |
175 |
+ memset (h->u_ctr.ctr, 0, h->cipher->blocksize); |
176 |
else |
177 |
rc = GPG_ERR_INV_ARG; |
178 |
break; |
179 |
--- cipher/rijndael.c |
180 |
+++ cipher/rijndael.c |
181 |
@@ -90,9 +90,7 @@ |
182 |
#endif |
183 |
|
184 |
|
185 |
-static const char *selftest(void); |
186 |
- |
187 |
- |
188 |
+ |
189 |
/* Our context object. */ |
190 |
typedef struct |
191 |
{ |
192 |
@@ -144,6 +142,11 @@ |
193 |
do { asm volatile ("pxor %%xmm0, %%xmm0\n\t" \ |
194 |
"pxor %%xmm1, %%xmm1\n" :: ); \ |
195 |
} while (0) |
196 |
+# define aesni_cleanup_2_4() \ |
197 |
+ do { asm volatile ("pxor %%xmm2, %%xmm2\n\t" \ |
198 |
+ "pxor %%xmm3, %%xmm3\n" \ |
199 |
+ "pxor %%xmm4, %%xmm4\n":: ); \ |
200 |
+ } while (0) |
201 |
#else |
202 |
# define aesni_prepare() do { } while (0) |
203 |
# define aesni_cleanup() do { } while (0) |
204 |
@@ -154,6 +157,23 @@ |
205 |
#include "rijndael-tables.h" |
206 |
|
207 |
|
208 |
+ |
209 |
+/* Function prototypes. */ |
210 |
+#ifdef USE_AESNI |
211 |
+/* We don't want to inline these functions to help gcc allocate enough |
212 |
+ registers. */ |
213 |
+static void do_aesni_ctr (const RIJNDAEL_context *ctx, unsigned char *ctr, |
214 |
+ unsigned char *b, const unsigned char *a) |
215 |
+ __attribute__ ((__noinline__)); |
216 |
+static void do_aesni_ctr_4 (const RIJNDAEL_context *ctx, unsigned char *ctr, |
217 |
+ unsigned char *b, const unsigned char *a) |
218 |
+ __attribute__ ((__noinline__)); |
219 |
+#endif /*USE_AESNI*/ |
220 |
+ |
221 |
+static const char *selftest(void); |
222 |
+ |
223 |
+ |
224 |
+ |
225 |
/* Perform the key setup. */ |
226 |
static gcry_err_code_t |
227 |
do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen) |
228 |
@@ -272,7 +292,7 @@ |
229 |
else if (ctx->use_aesni && ctx->rounds == 10) |
230 |
{ |
231 |
/* Note: This code works for AES-128 but it is not much better |
232 |
- than than using the standard key schedule. We disable it for |
233 |
+ than using the standard key schedule. We disable it for |
234 |
now and don't put any effort into implementing this for |
235 |
AES-192 and AES-256. */ |
236 |
asm volatile ("movl %[key], %%esi\n\t" |
237 |
@@ -860,6 +880,239 @@ |
238 |
#undef aesenclast_xmm1_xmm0 |
239 |
} |
240 |
|
241 |
+/* Perform a CTR encryption round using the counter CTR and the input |
242 |
+ block A. Write the result to the output block B and update CTR. |
243 |
+ CTR needs to be a 16 byte aligned little-endian value. */ |
244 |
+static void |
245 |
+do_aesni_ctr (const RIJNDAEL_context *ctx, |
246 |
+ unsigned char *ctr, unsigned char *b, const unsigned char *a) |
247 |
+{ |
248 |
+#define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t" |
249 |
+#define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t" |
250 |
+ static unsigned char be_mask[16] __attribute__ ((aligned (16))) = |
251 |
+ { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; |
252 |
+ |
253 |
+ asm volatile ("movdqa %[ctr], %%xmm0\n\t" /* xmm0, xmm2 := CTR */ |
254 |
+ "movaps %%xmm0, %%xmm2\n\t" |
255 |
+ "mov $1, %%esi\n\t" /* xmm2++ (big-endian) */ |
256 |
+ "movd %%esi, %%xmm1\n\t" |
257 |
+ "pshufb %[mask], %%xmm2\n\t" |
258 |
+ "paddq %%xmm1, %%xmm2\n\t" |
259 |
+ "pshufb %[mask], %%xmm2\n\t" |
260 |
+ "movdqa %%xmm2, %[ctr]\n" /* Update CTR. */ |
261 |
+ |
262 |
+ "movl %[key], %%esi\n\t" /* esi := keyschenc */ |
263 |
+ "movdqa (%%esi), %%xmm1\n\t" /* xmm1 := key[0] */ |
264 |
+ "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ |
265 |
+ "movdqa 0x10(%%esi), %%xmm1\n\t" |
266 |
+ aesenc_xmm1_xmm0 |
267 |
+ "movdqa 0x20(%%esi), %%xmm1\n\t" |
268 |
+ aesenc_xmm1_xmm0 |
269 |
+ "movdqa 0x30(%%esi), %%xmm1\n\t" |
270 |
+ aesenc_xmm1_xmm0 |
271 |
+ "movdqa 0x40(%%esi), %%xmm1\n\t" |
272 |
+ aesenc_xmm1_xmm0 |
273 |
+ "movdqa 0x50(%%esi), %%xmm1\n\t" |
274 |
+ aesenc_xmm1_xmm0 |
275 |
+ "movdqa 0x60(%%esi), %%xmm1\n\t" |
276 |
+ aesenc_xmm1_xmm0 |
277 |
+ "movdqa 0x70(%%esi), %%xmm1\n\t" |
278 |
+ aesenc_xmm1_xmm0 |
279 |
+ "movdqa 0x80(%%esi), %%xmm1\n\t" |
280 |
+ aesenc_xmm1_xmm0 |
281 |
+ "movdqa 0x90(%%esi), %%xmm1\n\t" |
282 |
+ aesenc_xmm1_xmm0 |
283 |
+ "movdqa 0xa0(%%esi), %%xmm1\n\t" |
284 |
+ "cmp $10, %[rounds]\n\t" |
285 |
+ "jz .Lenclast%=\n\t" |
286 |
+ aesenc_xmm1_xmm0 |
287 |
+ "movdqa 0xb0(%%esi), %%xmm1\n\t" |
288 |
+ aesenc_xmm1_xmm0 |
289 |
+ "movdqa 0xc0(%%esi), %%xmm1\n\t" |
290 |
+ "cmp $12, %[rounds]\n\t" |
291 |
+ "jz .Lenclast%=\n\t" |
292 |
+ aesenc_xmm1_xmm0 |
293 |
+ "movdqa 0xd0(%%esi), %%xmm1\n\t" |
294 |
+ aesenc_xmm1_xmm0 |
295 |
+ "movdqa 0xe0(%%esi), %%xmm1\n" |
296 |
+ |
297 |
+ ".Lenclast%=:\n\t" |
298 |
+ aesenclast_xmm1_xmm0 |
299 |
+ "movdqu %[src], %%xmm1\n\t" /* xmm1 := input */ |
300 |
+ "pxor %%xmm1, %%xmm0\n\t" /* EncCTR ^= input */ |
301 |
+ "movdqu %%xmm0, %[dst]" /* Store EncCTR. */ |
302 |
+ |
303 |
+ : [ctr] "+m" (*ctr), [dst] "=m" (*b) |
304 |
+ : [src] "m" (*a), |
305 |
+ [key] "g" (ctx->keyschenc), |
306 |
+ [rounds] "g" (ctx->rounds), |
307 |
+ [mask] "m" (*be_mask) |
308 |
+ : "%esi", "cc", "memory"); |
309 |
+#undef aesenc_xmm1_xmm0 |
310 |
+#undef aesenclast_xmm1_xmm0 |
311 |
+} |
312 |
+ |
313 |
+ |
314 |
+/* Four blocks at a time variant of do_aesni_ctr. */ |
315 |
+static void |
316 |
+do_aesni_ctr_4 (const RIJNDAEL_context *ctx, |
317 |
+ unsigned char *ctr, unsigned char *b, const unsigned char *a) |
318 |
+{ |
319 |
+#define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t" |
320 |
+#define aesenc_xmm1_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd1\n\t" |
321 |
+#define aesenc_xmm1_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd9\n\t" |
322 |
+#define aesenc_xmm1_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xe1\n\t" |
323 |
+#define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t" |
324 |
+#define aesenclast_xmm1_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd1\n\t" |
325 |
+#define aesenclast_xmm1_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd9\n\t" |
326 |
+#define aesenclast_xmm1_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xe1\n\t" |
327 |
+ |
328 |
+ static unsigned char be_mask[16] __attribute__ ((aligned (16))) = |
329 |
+ { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; |
330 |
+ |
331 |
+ /* Register usage: |
332 |
+ esi keyschedule |
333 |
+ xmm0 CTR-0 |
334 |
+ xmm1 temp / round key |
335 |
+ xmm2 CTR-1 |
336 |
+ xmm3 CTR-2 |
337 |
+ xmm4 CTR-3 |
338 |
+ xmm5 temp |
339 |
+ */ |
340 |
+ |
341 |
+ asm volatile ("movdqa %[ctr], %%xmm0\n\t" /* xmm0, xmm2 := CTR */ |
342 |
+ "movaps %%xmm0, %%xmm2\n\t" |
343 |
+ "mov $1, %%esi\n\t" /* xmm1 := 1 */ |
344 |
+ "movd %%esi, %%xmm1\n\t" |
345 |
+ "pshufb %[mask], %%xmm2\n\t" /* xmm2 := le(xmm2) */ |
346 |
+ "paddq %%xmm1, %%xmm2\n\t" /* xmm2++ */ |
347 |
+ "movaps %%xmm2, %%xmm3\n\t" /* xmm3 := xmm2 */ |
348 |
+ "paddq %%xmm1, %%xmm3\n\t" /* xmm3++ */ |
349 |
+ "movaps %%xmm3, %%xmm4\n\t" /* xmm4 := xmm3 */ |
350 |
+ "paddq %%xmm1, %%xmm4\n\t" /* xmm4++ */ |
351 |
+ "movaps %%xmm4, %%xmm5\n\t" /* xmm5 := xmm4 */ |
352 |
+ "paddq %%xmm1, %%xmm5\n\t" /* xmm5++ */ |
353 |
+ "pshufb %[mask], %%xmm2\n\t" /* xmm2 := be(xmm2) */ |
354 |
+ "pshufb %[mask], %%xmm3\n\t" /* xmm3 := be(xmm3) */ |
355 |
+ "pshufb %[mask], %%xmm4\n\t" /* xmm4 := be(xmm4) */ |
356 |
+ "pshufb %[mask], %%xmm5\n\t" /* xmm5 := be(xmm5) */ |
357 |
+ "movdqa %%xmm5, %[ctr]\n" /* Update CTR. */ |
358 |
+ |
359 |
+ "movl %[key], %%esi\n\t" /* esi := keyschenc */ |
360 |
+ "movdqa (%%esi), %%xmm1\n\t" /* xmm1 := key[0] */ |
361 |
+ "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ |
362 |
+ "pxor %%xmm1, %%xmm2\n\t" /* xmm2 ^= key[0] */ |
363 |
+ "pxor %%xmm1, %%xmm3\n\t" /* xmm3 ^= key[0] */ |
364 |
+ "pxor %%xmm1, %%xmm4\n\t" /* xmm4 ^= key[0] */ |
365 |
+ "movdqa 0x10(%%esi), %%xmm1\n\t" |
366 |
+ aesenc_xmm1_xmm0 |
367 |
+ aesenc_xmm1_xmm2 |
368 |
+ aesenc_xmm1_xmm3 |
369 |
+ aesenc_xmm1_xmm4 |
370 |
+ "movdqa 0x20(%%esi), %%xmm1\n\t" |
371 |
+ aesenc_xmm1_xmm0 |
372 |
+ aesenc_xmm1_xmm2 |
373 |
+ aesenc_xmm1_xmm3 |
374 |
+ aesenc_xmm1_xmm4 |
375 |
+ "movdqa 0x30(%%esi), %%xmm1\n\t" |
376 |
+ aesenc_xmm1_xmm0 |
377 |
+ aesenc_xmm1_xmm2 |
378 |
+ aesenc_xmm1_xmm3 |
379 |
+ aesenc_xmm1_xmm4 |
380 |
+ "movdqa 0x40(%%esi), %%xmm1\n\t" |
381 |
+ aesenc_xmm1_xmm0 |
382 |
+ aesenc_xmm1_xmm2 |
383 |
+ aesenc_xmm1_xmm3 |
384 |
+ aesenc_xmm1_xmm4 |
385 |
+ "movdqa 0x50(%%esi), %%xmm1\n\t" |
386 |
+ aesenc_xmm1_xmm0 |
387 |
+ aesenc_xmm1_xmm2 |
388 |
+ aesenc_xmm1_xmm3 |
389 |
+ aesenc_xmm1_xmm4 |
390 |
+ "movdqa 0x60(%%esi), %%xmm1\n\t" |
391 |
+ aesenc_xmm1_xmm0 |
392 |
+ aesenc_xmm1_xmm2 |
393 |
+ aesenc_xmm1_xmm3 |
394 |
+ aesenc_xmm1_xmm4 |
395 |
+ "movdqa 0x70(%%esi), %%xmm1\n\t" |
396 |
+ aesenc_xmm1_xmm0 |
397 |
+ aesenc_xmm1_xmm2 |
398 |
+ aesenc_xmm1_xmm3 |
399 |
+ aesenc_xmm1_xmm4 |
400 |
+ "movdqa 0x80(%%esi), %%xmm1\n\t" |
401 |
+ aesenc_xmm1_xmm0 |
402 |
+ aesenc_xmm1_xmm2 |
403 |
+ aesenc_xmm1_xmm3 |
404 |
+ aesenc_xmm1_xmm4 |
405 |
+ "movdqa 0x90(%%esi), %%xmm1\n\t" |
406 |
+ aesenc_xmm1_xmm0 |
407 |
+ aesenc_xmm1_xmm2 |
408 |
+ aesenc_xmm1_xmm3 |
409 |
+ aesenc_xmm1_xmm4 |
410 |
+ "movdqa 0xa0(%%esi), %%xmm1\n\t" |
411 |
+ "cmp $10, %[rounds]\n\t" |
412 |
+ "jz .Lenclast%=\n\t" |
413 |
+ aesenc_xmm1_xmm0 |
414 |
+ aesenc_xmm1_xmm2 |
415 |
+ aesenc_xmm1_xmm3 |
416 |
+ aesenc_xmm1_xmm4 |
417 |
+ "movdqa 0xb0(%%esi), %%xmm1\n\t" |
418 |
+ aesenc_xmm1_xmm0 |
419 |
+ aesenc_xmm1_xmm2 |
420 |
+ aesenc_xmm1_xmm3 |
421 |
+ aesenc_xmm1_xmm4 |
422 |
+ "movdqa 0xc0(%%esi), %%xmm1\n\t" |
423 |
+ "cmp $12, %[rounds]\n\t" |
424 |
+ "jz .Lenclast%=\n\t" |
425 |
+ aesenc_xmm1_xmm0 |
426 |
+ aesenc_xmm1_xmm2 |
427 |
+ aesenc_xmm1_xmm3 |
428 |
+ aesenc_xmm1_xmm4 |
429 |
+ "movdqa 0xd0(%%esi), %%xmm1\n\t" |
430 |
+ aesenc_xmm1_xmm0 |
431 |
+ aesenc_xmm1_xmm2 |
432 |
+ aesenc_xmm1_xmm3 |
433 |
+ aesenc_xmm1_xmm4 |
434 |
+ "movdqa 0xe0(%%esi), %%xmm1\n" |
435 |
+ |
436 |
+ ".Lenclast%=:\n\t" |
437 |
+ aesenclast_xmm1_xmm0 |
438 |
+ aesenclast_xmm1_xmm2 |
439 |
+ aesenclast_xmm1_xmm3 |
440 |
+ aesenclast_xmm1_xmm4 |
441 |
+ |
442 |
+ "movdqu %[src], %%xmm1\n\t" /* Get block 1. */ |
443 |
+ "pxor %%xmm1, %%xmm0\n\t" /* EncCTR-1 ^= input */ |
444 |
+ "movdqu %%xmm0, %[dst]\n\t" /* Store block 1 */ |
445 |
+ |
446 |
+ "movdqu (16)%[src], %%xmm1\n\t" /* Get block 2. */ |
447 |
+ "pxor %%xmm1, %%xmm2\n\t" /* EncCTR-2 ^= input */ |
448 |
+ "movdqu %%xmm2, (16)%[dst]\n\t" /* Store block 2. */ |
449 |
+ |
450 |
+ "movdqu (32)%[src], %%xmm1\n\t" /* Get block 3. */ |
451 |
+ "pxor %%xmm1, %%xmm3\n\t" /* EncCTR-3 ^= input */ |
452 |
+ "movdqu %%xmm3, (32)%[dst]\n\t" /* Store block 3. */ |
453 |
+ |
454 |
+ "movdqu (48)%[src], %%xmm1\n\t" /* Get block 4. */ |
455 |
+ "pxor %%xmm1, %%xmm4\n\t" /* EncCTR-4 ^= input */ |
456 |
+ "movdqu %%xmm4, (48)%[dst]" /* Store block 4. */ |
457 |
+ |
458 |
+ : [ctr] "+m" (*ctr), [dst] "=m" (*b) |
459 |
+ : [src] "m" (*a), |
460 |
+ [key] "g" (ctx->keyschenc), |
461 |
+ [rounds] "g" (ctx->rounds), |
462 |
+ [mask] "m" (*be_mask) |
463 |
+ : "%esi", "cc", "memory"); |
464 |
+#undef aesenc_xmm1_xmm0 |
465 |
+#undef aesenc_xmm1_xmm2 |
466 |
+#undef aesenc_xmm1_xmm3 |
467 |
+#undef aesenc_xmm1_xmm4 |
468 |
+#undef aesenclast_xmm1_xmm0 |
469 |
+#undef aesenclast_xmm1_xmm2 |
470 |
+#undef aesenclast_xmm1_xmm3 |
471 |
+#undef aesenclast_xmm1_xmm4 |
472 |
+} |
473 |
+ |
474 |
|
475 |
static void |
476 |
do_aesni (RIJNDAEL_context *ctx, int decrypt_flag, |
477 |
@@ -1014,6 +1267,69 @@ |
478 |
|
479 |
_gcry_burn_stack (48 + 2*sizeof(int)); |
480 |
} |
481 |
+ |
482 |
+ |
483 |
+/* Bulk encryption of complete blocks in CTR mode. Caller needs to |
484 |
+ make sure that CTR is aligned on a 16 byte boundary if AESNI; the |
485 |
+ minimum alignment is for an u32. This function is only intended |
486 |
+ for the bulk encryption feature of cipher.c. CTR is expected to be |
487 |
+ of size BLOCKSIZE. */ |
488 |
+void |
489 |
+_gcry_aes_ctr_enc (void *context, unsigned char *ctr, |
490 |
+ void *outbuf_arg, const void *inbuf_arg, |
491 |
+ unsigned int nblocks) |
492 |
+{ |
493 |
+ RIJNDAEL_context *ctx = context; |
494 |
+ unsigned char *outbuf = outbuf_arg; |
495 |
+ const unsigned char *inbuf = inbuf_arg; |
496 |
+ unsigned char *p; |
497 |
+ int i; |
498 |
+ |
499 |
+ if (0) |
500 |
+ ; |
501 |
+#ifdef USE_AESNI |
502 |
+ else if (ctx->use_aesni) |
503 |
+ { |
504 |
+ aesni_prepare (); |
505 |
+ for ( ;nblocks > 3 ; nblocks -= 4 ) |
506 |
+ { |
507 |
+ do_aesni_ctr_4 (ctx, ctr, outbuf, inbuf); |
508 |
+ outbuf += 4*BLOCKSIZE; |
509 |
+ inbuf += 4*BLOCKSIZE; |
510 |
+ } |
511 |
+ for ( ;nblocks; nblocks-- ) |
512 |
+ { |
513 |
+ do_aesni_ctr (ctx, ctr, outbuf, inbuf); |
514 |
+ outbuf += BLOCKSIZE; |
515 |
+ inbuf += BLOCKSIZE; |
516 |
+ } |
517 |
+ aesni_cleanup (); |
518 |
+ aesni_cleanup_2_4 (); |
519 |
+ } |
520 |
+#endif /*USE_AESNI*/ |
521 |
+ else |
522 |
+ { |
523 |
+ union { unsigned char x1[16]; u32 x32[4]; } tmp; |
524 |
+ |
525 |
+ for ( ;nblocks; nblocks-- ) |
526 |
+ { |
527 |
+ /* Encrypt the counter. */ |
528 |
+ do_encrypt_aligned (ctx, tmp.x1, ctr); |
529 |
+ /* XOR the input with the encrypted counter and store in output. */ |
530 |
+ for (p=tmp.x1, i=0; i < BLOCKSIZE; i++) |
531 |
+ *outbuf++ = (*p++ ^= *inbuf++); |
532 |
+ /* Increment the counter. */ |
533 |
+ for (i = BLOCKSIZE; i > 0; i--) |
534 |
+ { |
535 |
+ ctr[i-1]++; |
536 |
+ if (ctr[i-1]) |
537 |
+ break; |
538 |
+ } |
539 |
+ } |
540 |
+ } |
541 |
+ |
542 |
+ _gcry_burn_stack (48 + 2*sizeof(int)); |
543 |
+} |
544 |
|
545 |
|
546 |
|
547 |
--- src/cipher.h |
548 |
+++ src/cipher.h |
549 |
@@ -53,6 +53,9 @@ |
550 |
void _gcry_aes_cbc_dec (void *context, unsigned char *iv, |
551 |
void *outbuf_arg, const void *inbuf_arg, |
552 |
unsigned int nblocks); |
553 |
+void _gcry_aes_ctr_enc (void *context, unsigned char *ctr, |
554 |
+ void *outbuf_arg, const void *inbuf_arg, |
555 |
+ unsigned int nblocks); |
556 |
|
557 |
|
558 |
/*-- dsa.c --*/ |
559 |
--- tests/basic.c |
560 |
+++ tests/basic.c |
561 |
@@ -69,6 +69,22 @@ |
562 |
} |
563 |
|
564 |
static void |
565 |
+mismatch (const void *expected, size_t expectedlen, |
566 |
+ const void *computed, size_t computedlen) |
567 |
+{ |
568 |
+ const unsigned char *p; |
569 |
+ |
570 |
+ fprintf (stderr, "expected:"); |
571 |
+ for (p = expected; expectedlen; p++, expectedlen--) |
572 |
+ fprintf (stderr, " %02x", *p); |
573 |
+ fprintf (stderr, "\ncomputed:"); |
574 |
+ for (p = computed; computedlen; p++, computedlen--) |
575 |
+ fprintf (stderr, " %02x", *p); |
576 |
+ fprintf (stderr, "\n"); |
577 |
+} |
578 |
+ |
579 |
+ |
580 |
+static void |
581 |
die (const char *format, ...) |
582 |
{ |
583 |
va_list arg_ptr; |
584 |
@@ -349,8 +365,7 @@ |
585 |
unsigned char plaintext[MAX_DATA_LEN]; |
586 |
int inlen; |
587 |
char out[MAX_DATA_LEN]; |
588 |
- } |
589 |
- data[MAX_DATA_LEN]; |
590 |
+ } data[5]; |
591 |
} tv[] = |
592 |
{ |
593 |
/* http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf */ |
594 |
@@ -369,6 +384,8 @@ |
595 |
{ "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17\xad\x2b\x41\x7b\xe6\x6c\x37\x10", |
596 |
16, |
597 |
"\x1e\x03\x1d\xda\x2f\xbe\x03\xd1\x79\x21\x70\xa0\xf3\x00\x9c\xee" }, |
598 |
+ |
599 |
+ { "", 0, "" } |
600 |
} |
601 |
}, |
602 |
{ GCRY_CIPHER_AES192, |
603 |
@@ -387,6 +404,7 @@ |
604 |
{ "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17\xad\x2b\x41\x7b\xe6\x6c\x37\x10", |
605 |
16, |
606 |
"\x4f\x78\xa7\xf6\xd2\x98\x09\x58\x5a\x97\xda\xec\x58\xc6\xb0\x50" }, |
607 |
+ { "", 0, "" } |
608 |
} |
609 |
}, |
610 |
{ GCRY_CIPHER_AES256, |
611 |
@@ -404,7 +422,80 @@ |
612 |
"\x2b\x09\x30\xda\xa2\x3d\xe9\x4c\xe8\x70\x17\xba\x2d\x84\x98\x8d" }, |
613 |
{ "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17\xad\x2b\x41\x7b\xe6\x6c\x37\x10", |
614 |
16, |
615 |
- "\xdf\xc9\xc5\x8d\xb6\x7a\xad\xa6\x13\xc2\xdd\x08\x45\x79\x41\xa6" } |
616 |
+ "\xdf\xc9\xc5\x8d\xb6\x7a\xad\xa6\x13\xc2\xdd\x08\x45\x79\x41\xa6" }, |
617 |
+ { "", 0, "" } |
618 |
+ } |
619 |
+ }, |
620 |
+ /* Some truncation tests. With a truncated second block and |
621 |
+ also with a single truncated block. */ |
622 |
+ { GCRY_CIPHER_AES, |
623 |
+ "\x2b\x7e\x15\x16\x28\xae\xd2\xa6\xab\xf7\x15\x88\x09\xcf\x4f\x3c", |
624 |
+ "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", |
625 |
+ {{"\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a", |
626 |
+ 16, |
627 |
+ "\x87\x4d\x61\x91\xb6\x20\xe3\x26\x1b\xef\x68\x64\x99\x0d\xb6\xce" }, |
628 |
+ {"\xae\x2d\x8a\x57\x1e\x03\xac\x9c\x9e\xb7\x6f\xac\x45\xaf\x8e", |
629 |
+ 15, |
630 |
+ "\x98\x06\xf6\x6b\x79\x70\xfd\xff\x86\x17\x18\x7b\xb9\xff\xfd" }, |
631 |
+ {"", 0, "" } |
632 |
+ } |
633 |
+ }, |
634 |
+ { GCRY_CIPHER_AES, |
635 |
+ "\x2b\x7e\x15\x16\x28\xae\xd2\xa6\xab\xf7\x15\x88\x09\xcf\x4f\x3c", |
636 |
+ "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", |
637 |
+ {{"\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a", |
638 |
+ 16, |
639 |
+ "\x87\x4d\x61\x91\xb6\x20\xe3\x26\x1b\xef\x68\x64\x99\x0d\xb6\xce" }, |
640 |
+ {"\xae", |
641 |
+ 1, |
642 |
+ "\x98" }, |
643 |
+ {"", 0, "" } |
644 |
+ } |
645 |
+ }, |
646 |
+ { GCRY_CIPHER_AES, |
647 |
+ "\x2b\x7e\x15\x16\x28\xae\xd2\xa6\xab\xf7\x15\x88\x09\xcf\x4f\x3c", |
648 |
+ "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", |
649 |
+ {{"\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17", |
650 |
+ 15, |
651 |
+ "\x87\x4d\x61\x91\xb6\x20\xe3\x26\x1b\xef\x68\x64\x99\x0d\xb6" }, |
652 |
+ {"", 0, "" } |
653 |
+ } |
654 |
+ }, |
655 |
+ { GCRY_CIPHER_AES, |
656 |
+ "\x2b\x7e\x15\x16\x28\xae\xd2\xa6\xab\xf7\x15\x88\x09\xcf\x4f\x3c", |
657 |
+ "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", |
658 |
+ {{"\x6b", |
659 |
+ 1, |
660 |
+ "\x87" }, |
661 |
+ {"", 0, "" } |
662 |
+ } |
663 |
+ }, |
664 |
+#if USE_CAST5 |
665 |
+ /* A selfmade test vector using an 64 bit block cipher. */ |
666 |
+ { GCRY_CIPHER_CAST5, |
667 |
+ "\x2b\x7e\x15\x16\x28\xae\xd2\xa6\xab\xf7\x15\x88\x09\xcf\x4f\x3c", |
668 |
+ "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8", |
669 |
+ {{"\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a", |
670 |
+ 16, |
671 |
+ "\xe8\xa7\xac\x68\xca\xca\xa0\x20\x10\xcb\x1b\xcc\x79\x2c\xc4\x48" }, |
672 |
+ {"\xae\x2d\x8a\x57\x1e\x03\xac\x9c", |
673 |
+ 8, |
674 |
+ "\x16\xe8\x72\x77\xb0\x98\x29\x68" }, |
675 |
+ {"\x9e\xb7\x6f\xac\x45\xaf\x8e\x51", |
676 |
+ 8, |
677 |
+ "\x9a\xb3\xa8\x03\x3b\xb4\x14\xba" }, |
678 |
+ {"\xae\x2d\x8a\x57\x1e\x03\xac\x9c\xa1\x00", |
679 |
+ 10, |
680 |
+ "\x31\x5e\xd3\xfb\x1b\x8d\xd1\xf9\xb0\x83" }, |
681 |
+ { "", 0, "" } |
682 |
+ } |
683 |
+ }, |
684 |
+#endif /*USE_CAST5*/ |
685 |
+ { 0, |
686 |
+ "", |
687 |
+ "", |
688 |
+ { |
689 |
+ {"", 0, "" } |
690 |
} |
691 |
} |
692 |
}; |
693 |
@@ -417,6 +508,9 @@ |
694 |
fprintf (stderr, " Starting CTR cipher checks.\n"); |
695 |
for (i = 0; i < sizeof (tv) / sizeof (tv[0]); i++) |
696 |
{ |
697 |
+ if (!tv[i].algo) |
698 |
+ continue; |
699 |
+ |
700 |
err = gcry_cipher_open (&hde, tv[i].algo, GCRY_CIPHER_MODE_CTR, 0); |
701 |
if (!err) |
702 |
err = gcry_cipher_open (&hdd, tv[i].algo, GCRY_CIPHER_MODE_CTR, 0); |
703 |
@@ -485,7 +579,11 @@ |
704 |
} |
705 |
|
706 |
if (memcmp (tv[i].data[j].out, out, tv[i].data[j].inlen)) |
707 |
- fail ("aes-ctr, encrypt mismatch entry %d:%d\n", i, j); |
708 |
+ { |
709 |
+ fail ("aes-ctr, encrypt mismatch entry %d:%d\n", i, j); |
710 |
+ mismatch (tv[i].data[j].out, tv[i].data[j].inlen, |
711 |
+ out, tv[i].data[j].inlen); |
712 |
+ } |
713 |
|
714 |
err = gcry_cipher_decrypt (hdd, out, tv[i].data[j].inlen, NULL, 0); |
715 |
if (err) |
716 |
@@ -498,7 +596,11 @@ |
717 |
} |
718 |
|
719 |
if (memcmp (tv[i].data[j].plaintext, out, tv[i].data[j].inlen)) |
720 |
- fail ("aes-ctr, decrypt mismatch entry %d:%d\n", i, j); |
721 |
+ { |
722 |
+ fail ("aes-ctr, decrypt mismatch entry %d:%d\n", i, j); |
723 |
+ mismatch (tv[i].data[j].plaintext, tv[i].data[j].inlen, |
724 |
+ out, tv[i].data[j].inlen); |
725 |
+ } |
726 |
|
727 |
} |
728 |
|
729 |
@@ -509,18 +611,6 @@ |
730 |
if (err) |
731 |
fail ("aes-ctr, encryption failed for valid input"); |
732 |
|
733 |
- err = gcry_cipher_encrypt (hde, out, MAX_DATA_LEN, |
734 |
- "1234567890123456", 15); |
735 |
- if (gpg_err_code (err) != GPG_ERR_INV_LENGTH) |
736 |
- fail ("aes-ctr, too short input returned wrong error: %s\n", |
737 |
- gpg_strerror (err)); |
738 |
- |
739 |
- err = gcry_cipher_encrypt (hde, out, MAX_DATA_LEN, |
740 |
- "12345678901234567", 17); |
741 |
- if (gpg_err_code (err) != GPG_ERR_INV_LENGTH) |
742 |
- fail ("aes-ctr, too long input returned wrong error: %s\n", |
743 |
- gpg_strerror (err)); |
744 |
- |
745 |
err = gcry_cipher_encrypt (hde, out, 15, |
746 |
"1234567890123456", 16); |
747 |
if (gpg_err_code (err) != GPG_ERR_BUFFER_TOO_SHORT) |
748 |
@@ -545,18 +635,6 @@ |
749 |
if (err) |
750 |
fail ("aes-ctr, decryption failed for valid input"); |
751 |
|
752 |
- err = gcry_cipher_decrypt (hde, out, MAX_DATA_LEN, |
753 |
- "1234567890123456", 15); |
754 |
- if (gpg_err_code (err) != GPG_ERR_INV_LENGTH) |
755 |
- fail ("aes-ctr, too short input returned wrong error: %s\n", |
756 |
- gpg_strerror (err)); |
757 |
- |
758 |
- err = gcry_cipher_decrypt (hde, out, MAX_DATA_LEN, |
759 |
- "12345678901234567", 17); |
760 |
- if (gpg_err_code (err) != GPG_ERR_INV_LENGTH) |
761 |
- fail ("aes-ctr, too long input returned wrong error: %s\n", |
762 |
- gpg_strerror (err)); |
763 |
- |
764 |
err = gcry_cipher_decrypt (hde, out, 15, |
765 |
"1234567890123456", 16); |
766 |
if (gpg_err_code (err) != GPG_ERR_BUFFER_TOO_SHORT) |