Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi...
[firefly-linux-kernel-4.4.55.git] / arch / x86 / crypto / blowfish_avx2_glue.c
1 /*
2  * Glue Code for x86_64/AVX2 assembler optimized version of Blowfish
3  *
4  * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5  *
6  * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
7  *   Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
8  * CTR part based on code (crypto/ctr.c) by:
9  *   (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
10  *
11  * This program is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation; either version 2 of the License, or
14  * (at your option) any later version.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU General Public License for more details.
20  *
21  */
22
23 #include <linux/module.h>
24 #include <linux/types.h>
25 #include <linux/crypto.h>
26 #include <linux/err.h>
27 #include <crypto/algapi.h>
28 #include <crypto/blowfish.h>
29 #include <crypto/cryptd.h>
30 #include <crypto/ctr.h>
31 #include <asm/i387.h>
32 #include <asm/xcr.h>
33 #include <asm/xsave.h>
34 #include <asm/crypto/blowfish.h>
35 #include <asm/crypto/ablk_helper.h>
36 #include <crypto/scatterwalk.h>
37
38 #define BF_AVX2_PARALLEL_BLOCKS 32
39
40 /* 32-way AVX2 parallel cipher functions */
41 asmlinkage void blowfish_ecb_enc_32way(struct bf_ctx *ctx, u8 *dst,
42                                        const u8 *src);
43 asmlinkage void blowfish_ecb_dec_32way(struct bf_ctx *ctx, u8 *dst,
44                                        const u8 *src);
45 asmlinkage void blowfish_cbc_dec_32way(struct bf_ctx *ctx, u8 *dst,
46                                        const u8 *src);
47 asmlinkage void blowfish_ctr_32way(struct bf_ctx *ctx, u8 *dst, const u8 *src,
48                                    __be64 *iv);
49
50 static inline bool bf_fpu_begin(bool fpu_enabled, unsigned int nbytes)
51 {
52         if (fpu_enabled)
53                 return true;
54
55         /* FPU is only used when chunk to be processed is large enough, so
56          * do not enable FPU until it is necessary.
57          */
58         if (nbytes < BF_BLOCK_SIZE * BF_AVX2_PARALLEL_BLOCKS)
59                 return false;
60
61         kernel_fpu_begin();
62         return true;
63 }
64
65 static inline void bf_fpu_end(bool fpu_enabled)
66 {
67         if (fpu_enabled)
68                 kernel_fpu_end();
69 }
70
71 static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
72                      bool enc)
73 {
74         bool fpu_enabled = false;
75         struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
76         const unsigned int bsize = BF_BLOCK_SIZE;
77         unsigned int nbytes;
78         int err;
79
80         err = blkcipher_walk_virt(desc, walk);
81         desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
82
83         while ((nbytes = walk->nbytes)) {
84                 u8 *wsrc = walk->src.virt.addr;
85                 u8 *wdst = walk->dst.virt.addr;
86
87                 fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes);
88
89                 /* Process multi-block AVX2 batch */
90                 if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) {
91                         do {
92                                 if (enc)
93                                         blowfish_ecb_enc_32way(ctx, wdst, wsrc);
94                                 else
95                                         blowfish_ecb_dec_32way(ctx, wdst, wsrc);
96
97                                 wsrc += bsize * BF_AVX2_PARALLEL_BLOCKS;
98                                 wdst += bsize * BF_AVX2_PARALLEL_BLOCKS;
99                                 nbytes -= bsize * BF_AVX2_PARALLEL_BLOCKS;
100                         } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS);
101
102                         if (nbytes < bsize)
103                                 goto done;
104                 }
105
106                 /* Process multi-block batch */
107                 if (nbytes >= bsize * BF_PARALLEL_BLOCKS) {
108                         do {
109                                 if (enc)
110                                         blowfish_enc_blk_4way(ctx, wdst, wsrc);
111                                 else
112                                         blowfish_dec_blk_4way(ctx, wdst, wsrc);
113
114                                 wsrc += bsize * BF_PARALLEL_BLOCKS;
115                                 wdst += bsize * BF_PARALLEL_BLOCKS;
116                                 nbytes -= bsize * BF_PARALLEL_BLOCKS;
117                         } while (nbytes >= bsize * BF_PARALLEL_BLOCKS);
118
119                         if (nbytes < bsize)
120                                 goto done;
121                 }
122
123                 /* Handle leftovers */
124                 do {
125                         if (enc)
126                                 blowfish_enc_blk(ctx, wdst, wsrc);
127                         else
128                                 blowfish_dec_blk(ctx, wdst, wsrc);
129
130                         wsrc += bsize;
131                         wdst += bsize;
132                         nbytes -= bsize;
133                 } while (nbytes >= bsize);
134
135 done:
136                 err = blkcipher_walk_done(desc, walk, nbytes);
137         }
138
139         bf_fpu_end(fpu_enabled);
140         return err;
141 }
142
143 static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
144                        struct scatterlist *src, unsigned int nbytes)
145 {
146         struct blkcipher_walk walk;
147
148         blkcipher_walk_init(&walk, dst, src, nbytes);
149         return ecb_crypt(desc, &walk, true);
150 }
151
152 static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
153                        struct scatterlist *src, unsigned int nbytes)
154 {
155         struct blkcipher_walk walk;
156
157         blkcipher_walk_init(&walk, dst, src, nbytes);
158         return ecb_crypt(desc, &walk, false);
159 }
160
161 static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
162                                   struct blkcipher_walk *walk)
163 {
164         struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
165         unsigned int bsize = BF_BLOCK_SIZE;
166         unsigned int nbytes = walk->nbytes;
167         u64 *src = (u64 *)walk->src.virt.addr;
168         u64 *dst = (u64 *)walk->dst.virt.addr;
169         u64 *iv = (u64 *)walk->iv;
170
171         do {
172                 *dst = *src ^ *iv;
173                 blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
174                 iv = dst;
175
176                 src += 1;
177                 dst += 1;
178                 nbytes -= bsize;
179         } while (nbytes >= bsize);
180
181         *(u64 *)walk->iv = *iv;
182         return nbytes;
183 }
184
185 static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
186                        struct scatterlist *src, unsigned int nbytes)
187 {
188         struct blkcipher_walk walk;
189         int err;
190
191         blkcipher_walk_init(&walk, dst, src, nbytes);
192         err = blkcipher_walk_virt(desc, &walk);
193
194         while ((nbytes = walk.nbytes)) {
195                 nbytes = __cbc_encrypt(desc, &walk);
196                 err = blkcipher_walk_done(desc, &walk, nbytes);
197         }
198
199         return err;
200 }
201
202 static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
203                                   struct blkcipher_walk *walk)
204 {
205         struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
206         const unsigned int bsize = BF_BLOCK_SIZE;
207         unsigned int nbytes = walk->nbytes;
208         u64 *src = (u64 *)walk->src.virt.addr;
209         u64 *dst = (u64 *)walk->dst.virt.addr;
210         u64 last_iv;
211         int i;
212
213         /* Start of the last block. */
214         src += nbytes / bsize - 1;
215         dst += nbytes / bsize - 1;
216
217         last_iv = *src;
218
219         /* Process multi-block AVX2 batch */
220         if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) {
221                 do {
222                         nbytes -= bsize * (BF_AVX2_PARALLEL_BLOCKS - 1);
223                         src -= BF_AVX2_PARALLEL_BLOCKS - 1;
224                         dst -= BF_AVX2_PARALLEL_BLOCKS - 1;
225
226                         blowfish_cbc_dec_32way(ctx, (u8 *)dst, (u8 *)src);
227
228                         nbytes -= bsize;
229                         if (nbytes < bsize)
230                                 goto done;
231
232                         *dst ^= *(src - 1);
233                         src -= 1;
234                         dst -= 1;
235                 } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS);
236
237                 if (nbytes < bsize)
238                         goto done;
239         }
240
241         /* Process multi-block batch */
242         if (nbytes >= bsize * BF_PARALLEL_BLOCKS) {
243                 u64 ivs[BF_PARALLEL_BLOCKS - 1];
244
245                 do {
246                         nbytes -= bsize * (BF_PARALLEL_BLOCKS - 1);
247                         src -= BF_PARALLEL_BLOCKS - 1;
248                         dst -= BF_PARALLEL_BLOCKS - 1;
249
250                         for (i = 0; i < BF_PARALLEL_BLOCKS - 1; i++)
251                                 ivs[i] = src[i];
252
253                         blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src);
254
255                         for (i = 0; i < BF_PARALLEL_BLOCKS - 1; i++)
256                                 dst[i + 1] ^= ivs[i];
257
258                         nbytes -= bsize;
259                         if (nbytes < bsize)
260                                 goto done;
261
262                         *dst ^= *(src - 1);
263                         src -= 1;
264                         dst -= 1;
265                 } while (nbytes >= bsize * BF_PARALLEL_BLOCKS);
266
267                 if (nbytes < bsize)
268                         goto done;
269         }
270
271         /* Handle leftovers */
272         for (;;) {
273                 blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
274
275                 nbytes -= bsize;
276                 if (nbytes < bsize)
277                         break;
278
279                 *dst ^= *(src - 1);
280                 src -= 1;
281                 dst -= 1;
282         }
283
284 done:
285         *dst ^= *(u64 *)walk->iv;
286         *(u64 *)walk->iv = last_iv;
287
288         return nbytes;
289 }
290
291 static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
292                        struct scatterlist *src, unsigned int nbytes)
293 {
294         bool fpu_enabled = false;
295         struct blkcipher_walk walk;
296         int err;
297
298         blkcipher_walk_init(&walk, dst, src, nbytes);
299         err = blkcipher_walk_virt(desc, &walk);
300         desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
301
302         while ((nbytes = walk.nbytes)) {
303                 fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes);
304                 nbytes = __cbc_decrypt(desc, &walk);
305                 err = blkcipher_walk_done(desc, &walk, nbytes);
306         }
307
308         bf_fpu_end(fpu_enabled);
309         return err;
310 }
311
312 static void ctr_crypt_final(struct blkcipher_desc *desc,
313                             struct blkcipher_walk *walk)
314 {
315         struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
316         u8 *ctrblk = walk->iv;
317         u8 keystream[BF_BLOCK_SIZE];
318         u8 *src = walk->src.virt.addr;
319         u8 *dst = walk->dst.virt.addr;
320         unsigned int nbytes = walk->nbytes;
321
322         blowfish_enc_blk(ctx, keystream, ctrblk);
323         crypto_xor(keystream, src, nbytes);
324         memcpy(dst, keystream, nbytes);
325
326         crypto_inc(ctrblk, BF_BLOCK_SIZE);
327 }
328
329 static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
330                                 struct blkcipher_walk *walk)
331 {
332         struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
333         unsigned int bsize = BF_BLOCK_SIZE;
334         unsigned int nbytes = walk->nbytes;
335         u64 *src = (u64 *)walk->src.virt.addr;
336         u64 *dst = (u64 *)walk->dst.virt.addr;
337         int i;
338
339         /* Process multi-block AVX2 batch */
340         if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) {
341                 do {
342                         blowfish_ctr_32way(ctx, (u8 *)dst, (u8 *)src,
343                                            (__be64 *)walk->iv);
344
345                         src += BF_AVX2_PARALLEL_BLOCKS;
346                         dst += BF_AVX2_PARALLEL_BLOCKS;
347                         nbytes -= bsize * BF_AVX2_PARALLEL_BLOCKS;
348                 } while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS);
349
350                 if (nbytes < bsize)
351                         goto done;
352         }
353
354         /* Process four block batch */
355         if (nbytes >= bsize * BF_PARALLEL_BLOCKS) {
356                 __be64 ctrblocks[BF_PARALLEL_BLOCKS];
357                 u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
358
359                 do {
360                         /* create ctrblks for parallel encrypt */
361                         for (i = 0; i < BF_PARALLEL_BLOCKS; i++) {
362                                 if (dst != src)
363                                         dst[i] = src[i];
364
365                                 ctrblocks[i] = cpu_to_be64(ctrblk++);
366                         }
367
368                         blowfish_enc_blk_xor_4way(ctx, (u8 *)dst,
369                                                   (u8 *)ctrblocks);
370
371                         src += BF_PARALLEL_BLOCKS;
372                         dst += BF_PARALLEL_BLOCKS;
373                         nbytes -= bsize * BF_PARALLEL_BLOCKS;
374                 } while (nbytes >= bsize * BF_PARALLEL_BLOCKS);
375
376                 *(__be64 *)walk->iv = cpu_to_be64(ctrblk);
377
378                 if (nbytes < bsize)
379                         goto done;
380         }
381
382         /* Handle leftovers */
383         do {
384                 u64 ctrblk;
385
386                 if (dst != src)
387                         *dst = *src;
388
389                 ctrblk = *(u64 *)walk->iv;
390                 be64_add_cpu((__be64 *)walk->iv, 1);
391
392                 blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk);
393
394                 src += 1;
395                 dst += 1;
396         } while ((nbytes -= bsize) >= bsize);
397
398 done:
399         return nbytes;
400 }
401
402 static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
403                      struct scatterlist *src, unsigned int nbytes)
404 {
405         bool fpu_enabled = false;
406         struct blkcipher_walk walk;
407         int err;
408
409         blkcipher_walk_init(&walk, dst, src, nbytes);
410         err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE);
411         desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
412
413         while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) {
414                 fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes);
415                 nbytes = __ctr_crypt(desc, &walk);
416                 err = blkcipher_walk_done(desc, &walk, nbytes);
417         }
418
419         bf_fpu_end(fpu_enabled);
420
421         if (walk.nbytes) {
422                 ctr_crypt_final(desc, &walk);
423                 err = blkcipher_walk_done(desc, &walk, 0);
424         }
425
426         return err;
427 }
428
429 static struct crypto_alg bf_algs[6] = { {
430         .cra_name               = "__ecb-blowfish-avx2",
431         .cra_driver_name        = "__driver-ecb-blowfish-avx2",
432         .cra_priority           = 0,
433         .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER,
434         .cra_blocksize          = BF_BLOCK_SIZE,
435         .cra_ctxsize            = sizeof(struct bf_ctx),
436         .cra_alignmask          = 0,
437         .cra_type               = &crypto_blkcipher_type,
438         .cra_module             = THIS_MODULE,
439         .cra_u = {
440                 .blkcipher = {
441                         .min_keysize    = BF_MIN_KEY_SIZE,
442                         .max_keysize    = BF_MAX_KEY_SIZE,
443                         .setkey         = blowfish_setkey,
444                         .encrypt        = ecb_encrypt,
445                         .decrypt        = ecb_decrypt,
446                 },
447         },
448 }, {
449         .cra_name               = "__cbc-blowfish-avx2",
450         .cra_driver_name        = "__driver-cbc-blowfish-avx2",
451         .cra_priority           = 0,
452         .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER,
453         .cra_blocksize          = BF_BLOCK_SIZE,
454         .cra_ctxsize            = sizeof(struct bf_ctx),
455         .cra_alignmask          = 0,
456         .cra_type               = &crypto_blkcipher_type,
457         .cra_module             = THIS_MODULE,
458         .cra_u = {
459                 .blkcipher = {
460                         .min_keysize    = BF_MIN_KEY_SIZE,
461                         .max_keysize    = BF_MAX_KEY_SIZE,
462                         .setkey         = blowfish_setkey,
463                         .encrypt        = cbc_encrypt,
464                         .decrypt        = cbc_decrypt,
465                 },
466         },
467 }, {
468         .cra_name               = "__ctr-blowfish-avx2",
469         .cra_driver_name        = "__driver-ctr-blowfish-avx2",
470         .cra_priority           = 0,
471         .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER,
472         .cra_blocksize          = 1,
473         .cra_ctxsize            = sizeof(struct bf_ctx),
474         .cra_alignmask          = 0,
475         .cra_type               = &crypto_blkcipher_type,
476         .cra_module             = THIS_MODULE,
477         .cra_u = {
478                 .blkcipher = {
479                         .min_keysize    = BF_MIN_KEY_SIZE,
480                         .max_keysize    = BF_MAX_KEY_SIZE,
481                         .ivsize         = BF_BLOCK_SIZE,
482                         .setkey         = blowfish_setkey,
483                         .encrypt        = ctr_crypt,
484                         .decrypt        = ctr_crypt,
485                 },
486         },
487 }, {
488         .cra_name               = "ecb(blowfish)",
489         .cra_driver_name        = "ecb-blowfish-avx2",
490         .cra_priority           = 400,
491         .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
492         .cra_blocksize          = BF_BLOCK_SIZE,
493         .cra_ctxsize            = sizeof(struct async_helper_ctx),
494         .cra_alignmask          = 0,
495         .cra_type               = &crypto_ablkcipher_type,
496         .cra_module             = THIS_MODULE,
497         .cra_init               = ablk_init,
498         .cra_exit               = ablk_exit,
499         .cra_u = {
500                 .ablkcipher = {
501                         .min_keysize    = BF_MIN_KEY_SIZE,
502                         .max_keysize    = BF_MAX_KEY_SIZE,
503                         .setkey         = ablk_set_key,
504                         .encrypt        = ablk_encrypt,
505                         .decrypt        = ablk_decrypt,
506                 },
507         },
508 }, {
509         .cra_name               = "cbc(blowfish)",
510         .cra_driver_name        = "cbc-blowfish-avx2",
511         .cra_priority           = 400,
512         .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
513         .cra_blocksize          = BF_BLOCK_SIZE,
514         .cra_ctxsize            = sizeof(struct async_helper_ctx),
515         .cra_alignmask          = 0,
516         .cra_type               = &crypto_ablkcipher_type,
517         .cra_module             = THIS_MODULE,
518         .cra_init               = ablk_init,
519         .cra_exit               = ablk_exit,
520         .cra_u = {
521                 .ablkcipher = {
522                         .min_keysize    = BF_MIN_KEY_SIZE,
523                         .max_keysize    = BF_MAX_KEY_SIZE,
524                         .ivsize         = BF_BLOCK_SIZE,
525                         .setkey         = ablk_set_key,
526                         .encrypt        = __ablk_encrypt,
527                         .decrypt        = ablk_decrypt,
528                 },
529         },
530 }, {
531         .cra_name               = "ctr(blowfish)",
532         .cra_driver_name        = "ctr-blowfish-avx2",
533         .cra_priority           = 400,
534         .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
535         .cra_blocksize          = 1,
536         .cra_ctxsize            = sizeof(struct async_helper_ctx),
537         .cra_alignmask          = 0,
538         .cra_type               = &crypto_ablkcipher_type,
539         .cra_module             = THIS_MODULE,
540         .cra_init               = ablk_init,
541         .cra_exit               = ablk_exit,
542         .cra_u = {
543                 .ablkcipher = {
544                         .min_keysize    = BF_MIN_KEY_SIZE,
545                         .max_keysize    = BF_MAX_KEY_SIZE,
546                         .ivsize         = BF_BLOCK_SIZE,
547                         .setkey         = ablk_set_key,
548                         .encrypt        = ablk_encrypt,
549                         .decrypt        = ablk_encrypt,
550                         .geniv          = "chainiv",
551                 },
552         },
553 } };
554
555
556 static int __init init(void)
557 {
558         u64 xcr0;
559
560         if (!cpu_has_avx2 || !cpu_has_osxsave) {
561                 pr_info("AVX2 instructions are not detected.\n");
562                 return -ENODEV;
563         }
564
565         xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
566         if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
567                 pr_info("AVX detected but unusable.\n");
568                 return -ENODEV;
569         }
570
571         return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs));
572 }
573
574 static void __exit fini(void)
575 {
576         crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs));
577 }
578
579 module_init(init);
580 module_exit(fini);
581
582 MODULE_LICENSE("GPL");
583 MODULE_DESCRIPTION("Blowfish Cipher Algorithm, AVX2 optimized");
584 MODULE_ALIAS("blowfish");
585 MODULE_ALIAS("blowfish-asm");