2 * Glue Code for x86_64/AVX2 assembler optimized version of Twofish
4 * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/crypto.h>
16 #include <linux/err.h>
17 #include <crypto/algapi.h>
18 #include <crypto/ctr.h>
19 #include <crypto/twofish.h>
20 #include <crypto/lrw.h>
21 #include <crypto/xts.h>
23 #include <asm/xsave.h>
24 #include <asm/crypto/twofish.h>
25 #include <asm/crypto/ablk_helper.h>
26 #include <asm/crypto/glue_helper.h>
27 #include <crypto/scatterwalk.h>
29 #define TF_AVX2_PARALLEL_BLOCKS 16
31 /* 16-way AVX2 parallel cipher functions */
32 asmlinkage void twofish_ecb_enc_16way(struct twofish_ctx *ctx, u8 *dst,
34 asmlinkage void twofish_ecb_dec_16way(struct twofish_ctx *ctx, u8 *dst,
36 asmlinkage void twofish_cbc_dec_16way(void *ctx, u128 *dst, const u128 *src);
38 asmlinkage void twofish_ctr_16way(void *ctx, u128 *dst, const u128 *src,
41 asmlinkage void twofish_xts_enc_16way(struct twofish_ctx *ctx, u8 *dst,
42 const u8 *src, le128 *iv);
43 asmlinkage void twofish_xts_dec_16way(struct twofish_ctx *ctx, u8 *dst,
44 const u8 *src, le128 *iv);
46 static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
49 __twofish_enc_blk_3way(ctx, dst, src, false);
52 static const struct common_glue_ctx twofish_enc = {
54 .fpu_blocks_limit = 8,
58 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_16way) }
61 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_8way) }
64 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
67 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
71 static const struct common_glue_ctx twofish_ctr = {
73 .fpu_blocks_limit = 8,
77 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_16way) }
80 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_8way) }
83 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) }
86 .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) }
90 static const struct common_glue_ctx twofish_enc_xts = {
92 .fpu_blocks_limit = 8,
96 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_16way) }
99 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_8way) }
102 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc) }
106 static const struct common_glue_ctx twofish_dec = {
108 .fpu_blocks_limit = 8,
112 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_16way) }
115 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_8way) }
118 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
121 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
125 static const struct common_glue_ctx twofish_dec_cbc = {
127 .fpu_blocks_limit = 8,
131 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_16way) }
134 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_8way) }
137 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
140 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
144 static const struct common_glue_ctx twofish_dec_xts = {
146 .fpu_blocks_limit = 8,
150 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_16way) }
153 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_8way) }
156 .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec) }
160 static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
161 struct scatterlist *src, unsigned int nbytes)
163 return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes);
166 static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
167 struct scatterlist *src, unsigned int nbytes)
169 return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes);
172 static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
173 struct scatterlist *src, unsigned int nbytes)
175 return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc,
179 static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
180 struct scatterlist *src, unsigned int nbytes)
182 return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src,
186 static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
187 struct scatterlist *src, unsigned int nbytes)
189 return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes);
192 static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes)
194 /* since reusing AVX functions, starts using FPU at 8 parallel blocks */
195 return glue_fpu_begin(TF_BLOCK_SIZE, 8, NULL, fpu_enabled, nbytes);
198 static inline void twofish_fpu_end(bool fpu_enabled)
200 glue_fpu_end(fpu_enabled);
204 struct twofish_ctx *ctx;
208 static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
210 const unsigned int bsize = TF_BLOCK_SIZE;
211 struct crypt_priv *ctx = priv;
214 ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
216 while (nbytes >= TF_AVX2_PARALLEL_BLOCKS * bsize) {
217 twofish_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
218 srcdst += bsize * TF_AVX2_PARALLEL_BLOCKS;
219 nbytes -= bsize * TF_AVX2_PARALLEL_BLOCKS;
222 while (nbytes >= 8 * bsize) {
223 twofish_ecb_enc_8way(ctx->ctx, srcdst, srcdst);
228 while (nbytes >= 3 * bsize) {
229 twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst);
234 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
235 twofish_enc_blk(ctx->ctx, srcdst, srcdst);
238 static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
240 const unsigned int bsize = TF_BLOCK_SIZE;
241 struct crypt_priv *ctx = priv;
244 ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
246 while (nbytes >= TF_AVX2_PARALLEL_BLOCKS * bsize) {
247 twofish_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
248 srcdst += bsize * TF_AVX2_PARALLEL_BLOCKS;
249 nbytes -= bsize * TF_AVX2_PARALLEL_BLOCKS;
252 while (nbytes >= 8 * bsize) {
253 twofish_ecb_dec_8way(ctx->ctx, srcdst, srcdst);
258 while (nbytes >= 3 * bsize) {
259 twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst);
264 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
265 twofish_dec_blk(ctx->ctx, srcdst, srcdst);
268 static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
269 struct scatterlist *src, unsigned int nbytes)
271 struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
272 be128 buf[TF_AVX2_PARALLEL_BLOCKS];
273 struct crypt_priv crypt_ctx = {
274 .ctx = &ctx->twofish_ctx,
275 .fpu_enabled = false,
277 struct lrw_crypt_req req = {
279 .tbuflen = sizeof(buf),
281 .table_ctx = &ctx->lrw_table,
282 .crypt_ctx = &crypt_ctx,
283 .crypt_fn = encrypt_callback,
287 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
288 ret = lrw_crypt(desc, dst, src, nbytes, &req);
289 twofish_fpu_end(crypt_ctx.fpu_enabled);
294 static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
295 struct scatterlist *src, unsigned int nbytes)
297 struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
298 be128 buf[TF_AVX2_PARALLEL_BLOCKS];
299 struct crypt_priv crypt_ctx = {
300 .ctx = &ctx->twofish_ctx,
301 .fpu_enabled = false,
303 struct lrw_crypt_req req = {
305 .tbuflen = sizeof(buf),
307 .table_ctx = &ctx->lrw_table,
308 .crypt_ctx = &crypt_ctx,
309 .crypt_fn = decrypt_callback,
313 desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
314 ret = lrw_crypt(desc, dst, src, nbytes, &req);
315 twofish_fpu_end(crypt_ctx.fpu_enabled);
320 static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
321 struct scatterlist *src, unsigned int nbytes)
323 struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
325 return glue_xts_crypt_128bit(&twofish_enc_xts, desc, dst, src, nbytes,
326 XTS_TWEAK_CAST(twofish_enc_blk),
327 &ctx->tweak_ctx, &ctx->crypt_ctx);
330 static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
331 struct scatterlist *src, unsigned int nbytes)
333 struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
335 return glue_xts_crypt_128bit(&twofish_dec_xts, desc, dst, src, nbytes,
336 XTS_TWEAK_CAST(twofish_enc_blk),
337 &ctx->tweak_ctx, &ctx->crypt_ctx);
340 static struct crypto_alg tf_algs[10] = { {
341 .cra_name = "__ecb-twofish-avx2",
342 .cra_driver_name = "__driver-ecb-twofish-avx2",
344 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
345 .cra_blocksize = TF_BLOCK_SIZE,
346 .cra_ctxsize = sizeof(struct twofish_ctx),
348 .cra_type = &crypto_blkcipher_type,
349 .cra_module = THIS_MODULE,
352 .min_keysize = TF_MIN_KEY_SIZE,
353 .max_keysize = TF_MAX_KEY_SIZE,
354 .setkey = twofish_setkey,
355 .encrypt = ecb_encrypt,
356 .decrypt = ecb_decrypt,
360 .cra_name = "__cbc-twofish-avx2",
361 .cra_driver_name = "__driver-cbc-twofish-avx2",
363 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
364 .cra_blocksize = TF_BLOCK_SIZE,
365 .cra_ctxsize = sizeof(struct twofish_ctx),
367 .cra_type = &crypto_blkcipher_type,
368 .cra_module = THIS_MODULE,
371 .min_keysize = TF_MIN_KEY_SIZE,
372 .max_keysize = TF_MAX_KEY_SIZE,
373 .setkey = twofish_setkey,
374 .encrypt = cbc_encrypt,
375 .decrypt = cbc_decrypt,
379 .cra_name = "__ctr-twofish-avx2",
380 .cra_driver_name = "__driver-ctr-twofish-avx2",
382 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
384 .cra_ctxsize = sizeof(struct twofish_ctx),
386 .cra_type = &crypto_blkcipher_type,
387 .cra_module = THIS_MODULE,
390 .min_keysize = TF_MIN_KEY_SIZE,
391 .max_keysize = TF_MAX_KEY_SIZE,
392 .ivsize = TF_BLOCK_SIZE,
393 .setkey = twofish_setkey,
394 .encrypt = ctr_crypt,
395 .decrypt = ctr_crypt,
399 .cra_name = "__lrw-twofish-avx2",
400 .cra_driver_name = "__driver-lrw-twofish-avx2",
402 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
403 .cra_blocksize = TF_BLOCK_SIZE,
404 .cra_ctxsize = sizeof(struct twofish_lrw_ctx),
406 .cra_type = &crypto_blkcipher_type,
407 .cra_module = THIS_MODULE,
408 .cra_exit = lrw_twofish_exit_tfm,
411 .min_keysize = TF_MIN_KEY_SIZE +
413 .max_keysize = TF_MAX_KEY_SIZE +
415 .ivsize = TF_BLOCK_SIZE,
416 .setkey = lrw_twofish_setkey,
417 .encrypt = lrw_encrypt,
418 .decrypt = lrw_decrypt,
422 .cra_name = "__xts-twofish-avx2",
423 .cra_driver_name = "__driver-xts-twofish-avx2",
425 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
426 .cra_blocksize = TF_BLOCK_SIZE,
427 .cra_ctxsize = sizeof(struct twofish_xts_ctx),
429 .cra_type = &crypto_blkcipher_type,
430 .cra_module = THIS_MODULE,
433 .min_keysize = TF_MIN_KEY_SIZE * 2,
434 .max_keysize = TF_MAX_KEY_SIZE * 2,
435 .ivsize = TF_BLOCK_SIZE,
436 .setkey = xts_twofish_setkey,
437 .encrypt = xts_encrypt,
438 .decrypt = xts_decrypt,
442 .cra_name = "ecb(twofish)",
443 .cra_driver_name = "ecb-twofish-avx2",
445 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
446 .cra_blocksize = TF_BLOCK_SIZE,
447 .cra_ctxsize = sizeof(struct async_helper_ctx),
449 .cra_type = &crypto_ablkcipher_type,
450 .cra_module = THIS_MODULE,
451 .cra_init = ablk_init,
452 .cra_exit = ablk_exit,
455 .min_keysize = TF_MIN_KEY_SIZE,
456 .max_keysize = TF_MAX_KEY_SIZE,
457 .setkey = ablk_set_key,
458 .encrypt = ablk_encrypt,
459 .decrypt = ablk_decrypt,
463 .cra_name = "cbc(twofish)",
464 .cra_driver_name = "cbc-twofish-avx2",
466 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
467 .cra_blocksize = TF_BLOCK_SIZE,
468 .cra_ctxsize = sizeof(struct async_helper_ctx),
470 .cra_type = &crypto_ablkcipher_type,
471 .cra_module = THIS_MODULE,
472 .cra_init = ablk_init,
473 .cra_exit = ablk_exit,
476 .min_keysize = TF_MIN_KEY_SIZE,
477 .max_keysize = TF_MAX_KEY_SIZE,
478 .ivsize = TF_BLOCK_SIZE,
479 .setkey = ablk_set_key,
480 .encrypt = __ablk_encrypt,
481 .decrypt = ablk_decrypt,
485 .cra_name = "ctr(twofish)",
486 .cra_driver_name = "ctr-twofish-avx2",
488 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
490 .cra_ctxsize = sizeof(struct async_helper_ctx),
492 .cra_type = &crypto_ablkcipher_type,
493 .cra_module = THIS_MODULE,
494 .cra_init = ablk_init,
495 .cra_exit = ablk_exit,
498 .min_keysize = TF_MIN_KEY_SIZE,
499 .max_keysize = TF_MAX_KEY_SIZE,
500 .ivsize = TF_BLOCK_SIZE,
501 .setkey = ablk_set_key,
502 .encrypt = ablk_encrypt,
503 .decrypt = ablk_encrypt,
508 .cra_name = "lrw(twofish)",
509 .cra_driver_name = "lrw-twofish-avx2",
511 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
512 .cra_blocksize = TF_BLOCK_SIZE,
513 .cra_ctxsize = sizeof(struct async_helper_ctx),
515 .cra_type = &crypto_ablkcipher_type,
516 .cra_module = THIS_MODULE,
517 .cra_init = ablk_init,
518 .cra_exit = ablk_exit,
521 .min_keysize = TF_MIN_KEY_SIZE +
523 .max_keysize = TF_MAX_KEY_SIZE +
525 .ivsize = TF_BLOCK_SIZE,
526 .setkey = ablk_set_key,
527 .encrypt = ablk_encrypt,
528 .decrypt = ablk_decrypt,
532 .cra_name = "xts(twofish)",
533 .cra_driver_name = "xts-twofish-avx2",
535 .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
536 .cra_blocksize = TF_BLOCK_SIZE,
537 .cra_ctxsize = sizeof(struct async_helper_ctx),
539 .cra_type = &crypto_ablkcipher_type,
540 .cra_module = THIS_MODULE,
541 .cra_init = ablk_init,
542 .cra_exit = ablk_exit,
545 .min_keysize = TF_MIN_KEY_SIZE * 2,
546 .max_keysize = TF_MAX_KEY_SIZE * 2,
547 .ivsize = TF_BLOCK_SIZE,
548 .setkey = ablk_set_key,
549 .encrypt = ablk_encrypt,
550 .decrypt = ablk_decrypt,
555 static int __init init(void)
559 if (!cpu_has_avx2 || !cpu_has_osxsave) {
560 pr_info("AVX2 instructions are not detected.\n");
564 xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
565 if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
566 pr_info("AVX2 detected but unusable.\n");
570 return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs));
573 static void __exit fini(void)
575 crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs));
581 MODULE_LICENSE("GPL");
582 MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX2 optimized");
583 MODULE_ALIAS("twofish");
584 MODULE_ALIAS("twofish-asm");