2 * AMD Cryptographic Coprocessor (CCP) driver
4 * Copyright (C) 2013 Advanced Micro Devices, Inc.
6 * Author: Tom Lendacky <thomas.lendacky@amd.com>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/pci.h>
16 #include <linux/pci_ids.h>
17 #include <linux/kthread.h>
18 #include <linux/sched.h>
19 #include <linux/interrupt.h>
20 #include <linux/spinlock.h>
21 #include <linux/mutex.h>
22 #include <linux/delay.h>
23 #include <linux/ccp.h>
24 #include <linux/scatterlist.h>
25 #include <crypto/scatterwalk.h>
26 #include <crypto/sha.h>
31 CCP_MEMTYPE_SYSTEM = 0,
41 enum dma_data_direction dir;
44 struct ccp_dm_workarea {
46 struct dma_pool *dma_pool;
50 struct ccp_dma_info dma;
53 struct ccp_sg_workarea {
54 struct scatterlist *sg;
57 struct scatterlist *dma_sg;
58 struct device *dma_dev;
59 unsigned int dma_count;
60 enum dma_data_direction dma_dir;
68 struct ccp_sg_workarea sg_wa;
69 struct ccp_dm_workarea dm_wa;
73 enum ccp_memtype type;
75 struct ccp_dma_info dma;
81 enum ccp_aes_type type;
82 enum ccp_aes_mode mode;
83 enum ccp_aes_action action;
86 struct ccp_xts_aes_op {
87 enum ccp_aes_action action;
88 enum ccp_xts_aes_unit_size unit_size;
92 enum ccp_sha_type type;
101 struct ccp_passthru_op {
102 enum ccp_passthru_bitwise bit_mod;
103 enum ccp_passthru_byteswap byte_swap;
107 enum ccp_ecc_function function;
111 struct ccp_cmd_queue *cmd_q;
125 struct ccp_aes_op aes;
126 struct ccp_xts_aes_op xts;
127 struct ccp_sha_op sha;
128 struct ccp_rsa_op rsa;
129 struct ccp_passthru_op passthru;
130 struct ccp_ecc_op ecc;
134 /* SHA initial context values */
135 static const __be32 ccp_sha1_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
136 cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
137 cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
138 cpu_to_be32(SHA1_H4), 0, 0, 0,
141 static const __be32 ccp_sha224_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
142 cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
143 cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
144 cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
145 cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
148 static const __be32 ccp_sha256_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
149 cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
150 cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
151 cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
152 cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
155 /* The CCP cannot perform zero-length sha operations so the caller
156 * is required to buffer data for the final operation. However, a
157 * sha operation for a message with a total length of zero is valid
158 * so known values are required to supply the result.
160 static const u8 ccp_sha1_zero[CCP_SHA_CTXSIZE] = {
161 0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d,
162 0x32, 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90,
163 0xaf, 0xd8, 0x07, 0x09, 0x00, 0x00, 0x00, 0x00,
164 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
167 static const u8 ccp_sha224_zero[CCP_SHA_CTXSIZE] = {
168 0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9,
169 0x47, 0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4,
170 0x15, 0xa2, 0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a,
171 0xc5, 0xb3, 0xe4, 0x2f, 0x00, 0x00, 0x00, 0x00,
174 static const u8 ccp_sha256_zero[CCP_SHA_CTXSIZE] = {
175 0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14,
176 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24,
177 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c,
178 0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55,
181 static u32 ccp_addr_lo(struct ccp_dma_info *info)
183 return lower_32_bits(info->address + info->offset);
186 static u32 ccp_addr_hi(struct ccp_dma_info *info)
188 return upper_32_bits(info->address + info->offset) & 0x0000ffff;
191 static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
193 struct ccp_cmd_queue *cmd_q = op->cmd_q;
194 struct ccp_device *ccp = cmd_q->ccp;
195 void __iomem *cr_addr;
200 /* We could read a status register to see how many free slots
201 * are actually available, but reading that register resets it
202 * and you could lose some error information.
206 cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT)
207 | (op->jobid << REQ0_JOBID_SHIFT)
208 | REQ0_WAIT_FOR_WRITE;
211 cr0 |= REQ0_STOP_ON_COMPLETE
212 | REQ0_INT_ON_COMPLETE;
214 if (op->ioc || !cmd_q->free_slots)
215 cr0 |= REQ0_INT_ON_COMPLETE;
217 /* Start at CMD_REQ1 */
218 cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR;
220 mutex_lock(&ccp->req_mutex);
222 /* Write CMD_REQ1 through CMD_REQx first */
223 for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR)
224 iowrite32(*(cr + i), cr_addr);
226 /* Tell the CCP to start */
228 iowrite32(cr0, ccp->io_regs + CMD_REQ0);
230 mutex_unlock(&ccp->req_mutex);
232 if (cr0 & REQ0_INT_ON_COMPLETE) {
233 /* Wait for the job to complete */
234 ret = wait_event_interruptible(cmd_q->int_queue,
236 if (ret || cmd_q->cmd_error) {
237 /* On error delete all related jobs from the queue */
238 cmd = (cmd_q->id << DEL_Q_ID_SHIFT)
241 iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
245 } else if (op->soc) {
246 /* Delete just head job from the queue on SoC */
248 | (cmd_q->id << DEL_Q_ID_SHIFT)
251 iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
254 cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status);
262 static int ccp_perform_aes(struct ccp_op *op)
266 /* Fill out the register contents for REQ1 through REQ6 */
267 cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT)
268 | (op->u.aes.type << REQ1_AES_TYPE_SHIFT)
269 | (op->u.aes.mode << REQ1_AES_MODE_SHIFT)
270 | (op->u.aes.action << REQ1_AES_ACTION_SHIFT)
271 | (op->ksb_key << REQ1_KEY_KSB_SHIFT);
272 cr[1] = op->src.u.dma.length - 1;
273 cr[2] = ccp_addr_lo(&op->src.u.dma);
274 cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
275 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
276 | ccp_addr_hi(&op->src.u.dma);
277 cr[4] = ccp_addr_lo(&op->dst.u.dma);
278 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
279 | ccp_addr_hi(&op->dst.u.dma);
281 if (op->u.aes.mode == CCP_AES_MODE_CFB)
282 cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT);
290 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
293 static int ccp_perform_xts_aes(struct ccp_op *op)
297 /* Fill out the register contents for REQ1 through REQ6 */
298 cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT)
299 | (op->u.xts.action << REQ1_AES_ACTION_SHIFT)
300 | (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT)
301 | (op->ksb_key << REQ1_KEY_KSB_SHIFT);
302 cr[1] = op->src.u.dma.length - 1;
303 cr[2] = ccp_addr_lo(&op->src.u.dma);
304 cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
305 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
306 | ccp_addr_hi(&op->src.u.dma);
307 cr[4] = ccp_addr_lo(&op->dst.u.dma);
308 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
309 | ccp_addr_hi(&op->dst.u.dma);
317 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
320 static int ccp_perform_sha(struct ccp_op *op)
324 /* Fill out the register contents for REQ1 through REQ6 */
325 cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT)
326 | (op->u.sha.type << REQ1_SHA_TYPE_SHIFT)
328 cr[1] = op->src.u.dma.length - 1;
329 cr[2] = ccp_addr_lo(&op->src.u.dma);
330 cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
331 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
332 | ccp_addr_hi(&op->src.u.dma);
336 cr[4] = lower_32_bits(op->u.sha.msg_bits);
337 cr[5] = upper_32_bits(op->u.sha.msg_bits);
343 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
346 static int ccp_perform_rsa(struct ccp_op *op)
350 /* Fill out the register contents for REQ1 through REQ6 */
351 cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT)
352 | (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT)
353 | (op->ksb_key << REQ1_KEY_KSB_SHIFT)
355 cr[1] = op->u.rsa.input_len - 1;
356 cr[2] = ccp_addr_lo(&op->src.u.dma);
357 cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
358 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
359 | ccp_addr_hi(&op->src.u.dma);
360 cr[4] = ccp_addr_lo(&op->dst.u.dma);
361 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
362 | ccp_addr_hi(&op->dst.u.dma);
364 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
367 static int ccp_perform_passthru(struct ccp_op *op)
371 /* Fill out the register contents for REQ1 through REQ6 */
372 cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT)
373 | (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT)
374 | (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT);
376 if (op->src.type == CCP_MEMTYPE_SYSTEM)
377 cr[1] = op->src.u.dma.length - 1;
379 cr[1] = op->dst.u.dma.length - 1;
381 if (op->src.type == CCP_MEMTYPE_SYSTEM) {
382 cr[2] = ccp_addr_lo(&op->src.u.dma);
383 cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
384 | ccp_addr_hi(&op->src.u.dma);
386 if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
387 cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT);
389 cr[2] = op->src.u.ksb * CCP_KSB_BYTES;
390 cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT);
393 if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
394 cr[4] = ccp_addr_lo(&op->dst.u.dma);
395 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
396 | ccp_addr_hi(&op->dst.u.dma);
398 cr[4] = op->dst.u.ksb * CCP_KSB_BYTES;
399 cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT);
405 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
408 static int ccp_perform_ecc(struct ccp_op *op)
412 /* Fill out the register contents for REQ1 through REQ6 */
413 cr[0] = REQ1_ECC_AFFINE_CONVERT
414 | (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT)
415 | (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT)
417 cr[1] = op->src.u.dma.length - 1;
418 cr[2] = ccp_addr_lo(&op->src.u.dma);
419 cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
420 | ccp_addr_hi(&op->src.u.dma);
421 cr[4] = ccp_addr_lo(&op->dst.u.dma);
422 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
423 | ccp_addr_hi(&op->dst.u.dma);
425 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
428 static u32 ccp_alloc_ksb(struct ccp_device *ccp, unsigned int count)
433 mutex_lock(&ccp->ksb_mutex);
435 start = (u32)bitmap_find_next_zero_area(ccp->ksb,
439 if (start <= ccp->ksb_count) {
440 bitmap_set(ccp->ksb, start, count);
442 mutex_unlock(&ccp->ksb_mutex);
448 mutex_unlock(&ccp->ksb_mutex);
450 /* Wait for KSB entries to become available */
451 if (wait_event_interruptible(ccp->ksb_queue, ccp->ksb_avail))
455 return KSB_START + start;
458 static void ccp_free_ksb(struct ccp_device *ccp, unsigned int start,
464 mutex_lock(&ccp->ksb_mutex);
466 bitmap_clear(ccp->ksb, start - KSB_START, count);
470 mutex_unlock(&ccp->ksb_mutex);
472 wake_up_interruptible_all(&ccp->ksb_queue);
475 static u32 ccp_gen_jobid(struct ccp_device *ccp)
477 return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
480 static void ccp_sg_free(struct ccp_sg_workarea *wa)
483 dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir);
488 static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
489 struct scatterlist *sg, u64 len,
490 enum dma_data_direction dma_dir)
492 memset(wa, 0, sizeof(*wa));
498 wa->nents = sg_nents_for_len(sg, len);
502 wa->bytes_left = len;
508 if (dma_dir == DMA_NONE)
513 wa->dma_dir = dma_dir;
514 wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
521 static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
523 unsigned int nbytes = min_t(u64, len, wa->bytes_left);
528 wa->sg_used += nbytes;
529 wa->bytes_left -= nbytes;
530 if (wa->sg_used == wa->sg->length) {
531 wa->sg = sg_next(wa->sg);
536 static void ccp_dm_free(struct ccp_dm_workarea *wa)
538 if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
540 dma_pool_free(wa->dma_pool, wa->address,
544 dma_unmap_single(wa->dev, wa->dma.address, wa->length,
553 static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
554 struct ccp_cmd_queue *cmd_q,
556 enum dma_data_direction dir)
558 memset(wa, 0, sizeof(*wa));
563 wa->dev = cmd_q->ccp->dev;
566 if (len <= CCP_DMAPOOL_MAX_SIZE) {
567 wa->dma_pool = cmd_q->dma_pool;
569 wa->address = dma_pool_alloc(wa->dma_pool, GFP_KERNEL,
574 wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
576 memset(wa->address, 0, CCP_DMAPOOL_MAX_SIZE);
578 wa->address = kzalloc(len, GFP_KERNEL);
582 wa->dma.address = dma_map_single(wa->dev, wa->address, len,
584 if (!wa->dma.address)
587 wa->dma.length = len;
594 static void ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
595 struct scatterlist *sg, unsigned int sg_offset,
598 WARN_ON(!wa->address);
600 scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
604 static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
605 struct scatterlist *sg, unsigned int sg_offset,
608 WARN_ON(!wa->address);
610 scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
614 static void ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
615 struct scatterlist *sg,
616 unsigned int len, unsigned int se_len,
619 unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
620 u8 buffer[CCP_REVERSE_BUF_SIZE];
622 BUG_ON(se_len > sizeof(buffer));
628 ksb_len = min_t(unsigned int, nbytes, se_len);
629 sg_offset -= ksb_len;
631 scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 0);
632 for (i = 0; i < ksb_len; i++)
633 wa->address[dm_offset + i] = buffer[ksb_len - i - 1];
635 dm_offset += ksb_len;
638 if ((ksb_len != se_len) && sign_extend) {
639 /* Must sign-extend to nearest sign-extend length */
640 if (wa->address[dm_offset - 1] & 0x80)
641 memset(wa->address + dm_offset, 0xff,
647 static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
648 struct scatterlist *sg,
651 unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
652 u8 buffer[CCP_REVERSE_BUF_SIZE];
658 ksb_len = min_t(unsigned int, nbytes, sizeof(buffer));
659 dm_offset -= ksb_len;
661 for (i = 0; i < ksb_len; i++)
662 buffer[ksb_len - i - 1] = wa->address[dm_offset + i];
663 scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 1);
665 sg_offset += ksb_len;
670 static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
672 ccp_dm_free(&data->dm_wa);
673 ccp_sg_free(&data->sg_wa);
676 static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q,
677 struct scatterlist *sg, u64 sg_len,
679 enum dma_data_direction dir)
683 memset(data, 0, sizeof(*data));
685 ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len,
690 ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir);
697 ccp_free_data(data, cmd_q);
702 static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
704 struct ccp_sg_workarea *sg_wa = &data->sg_wa;
705 struct ccp_dm_workarea *dm_wa = &data->dm_wa;
706 unsigned int buf_count, nbytes;
708 /* Clear the buffer if setting it */
710 memset(dm_wa->address, 0, dm_wa->length);
715 /* Perform the copy operation
716 * nbytes will always be <= UINT_MAX because dm_wa->length is
719 nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length);
720 scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used,
723 /* Update the structures and generate the count */
725 while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
726 nbytes = min(sg_wa->sg->length - sg_wa->sg_used,
727 dm_wa->length - buf_count);
728 nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
731 ccp_update_sg_workarea(sg_wa, nbytes);
737 static unsigned int ccp_fill_queue_buf(struct ccp_data *data)
739 return ccp_queue_buf(data, 0);
742 static unsigned int ccp_empty_queue_buf(struct ccp_data *data)
744 return ccp_queue_buf(data, 1);
747 static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
748 struct ccp_op *op, unsigned int block_size,
751 unsigned int sg_src_len, sg_dst_len, op_len;
753 /* The CCP can only DMA from/to one address each per operation. This
754 * requires that we find the smallest DMA area between the source
755 * and destination. The resulting len values will always be <= UINT_MAX
756 * because the dma length is an unsigned int.
758 sg_src_len = sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used;
759 sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
762 sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used;
763 sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
764 op_len = min(sg_src_len, sg_dst_len);
769 /* The data operation length will be at least block_size in length
770 * or the smaller of available sg room remaining for the source or
773 op_len = max(op_len, block_size);
775 /* Unless we have to buffer data, there's no reason to wait */
778 if (sg_src_len < block_size) {
779 /* Not enough data in the sg element, so it
780 * needs to be buffered into a blocksize chunk
782 int cp_len = ccp_fill_queue_buf(src);
785 op->src.u.dma.address = src->dm_wa.dma.address;
786 op->src.u.dma.offset = 0;
787 op->src.u.dma.length = (blocksize_op) ? block_size : cp_len;
789 /* Enough data in the sg element, but we need to
790 * adjust for any previously copied data
792 op->src.u.dma.address = sg_dma_address(src->sg_wa.sg);
793 op->src.u.dma.offset = src->sg_wa.sg_used;
794 op->src.u.dma.length = op_len & ~(block_size - 1);
796 ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length);
800 if (sg_dst_len < block_size) {
801 /* Not enough room in the sg element or we're on the
802 * last piece of data (when using padding), so the
803 * output needs to be buffered into a blocksize chunk
806 op->dst.u.dma.address = dst->dm_wa.dma.address;
807 op->dst.u.dma.offset = 0;
808 op->dst.u.dma.length = op->src.u.dma.length;
810 /* Enough room in the sg element, but we need to
811 * adjust for any previously used area
813 op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg);
814 op->dst.u.dma.offset = dst->sg_wa.sg_used;
815 op->dst.u.dma.length = op->src.u.dma.length;
820 static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
826 if (op->dst.u.dma.address == dst->dm_wa.dma.address)
827 ccp_empty_queue_buf(dst);
829 ccp_update_sg_workarea(&dst->sg_wa,
830 op->dst.u.dma.length);
834 static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q,
835 struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
836 u32 byte_swap, bool from)
840 memset(&op, 0, sizeof(op));
848 op.src.type = CCP_MEMTYPE_KSB;
850 op.dst.type = CCP_MEMTYPE_SYSTEM;
851 op.dst.u.dma.address = wa->dma.address;
852 op.dst.u.dma.length = wa->length;
854 op.src.type = CCP_MEMTYPE_SYSTEM;
855 op.src.u.dma.address = wa->dma.address;
856 op.src.u.dma.length = wa->length;
857 op.dst.type = CCP_MEMTYPE_KSB;
861 op.u.passthru.byte_swap = byte_swap;
863 return ccp_perform_passthru(&op);
866 static int ccp_copy_to_ksb(struct ccp_cmd_queue *cmd_q,
867 struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
870 return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, false);
873 static int ccp_copy_from_ksb(struct ccp_cmd_queue *cmd_q,
874 struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
877 return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, true);
880 static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
883 struct ccp_aes_engine *aes = &cmd->u.aes;
884 struct ccp_dm_workarea key, ctx;
887 unsigned int dm_offset;
890 if (!((aes->key_len == AES_KEYSIZE_128) ||
891 (aes->key_len == AES_KEYSIZE_192) ||
892 (aes->key_len == AES_KEYSIZE_256)))
895 if (aes->src_len & (AES_BLOCK_SIZE - 1))
898 if (aes->iv_len != AES_BLOCK_SIZE)
901 if (!aes->key || !aes->iv || !aes->src)
904 if (aes->cmac_final) {
905 if (aes->cmac_key_len != AES_BLOCK_SIZE)
912 BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
913 BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
916 memset(&op, 0, sizeof(op));
918 op.jobid = ccp_gen_jobid(cmd_q->ccp);
919 op.ksb_key = cmd_q->ksb_key;
920 op.ksb_ctx = cmd_q->ksb_ctx;
922 op.u.aes.type = aes->type;
923 op.u.aes.mode = aes->mode;
924 op.u.aes.action = aes->action;
926 /* All supported key sizes fit in a single (32-byte) KSB entry
927 * and must be in little endian format. Use the 256-bit byte
928 * swap passthru option to convert from big endian to little
931 ret = ccp_init_dm_workarea(&key, cmd_q,
932 CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
937 dm_offset = CCP_KSB_BYTES - aes->key_len;
938 ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
939 ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
940 CCP_PASSTHRU_BYTESWAP_256BIT);
942 cmd->engine_error = cmd_q->cmd_error;
946 /* The AES context fits in a single (32-byte) KSB entry and
947 * must be in little endian format. Use the 256-bit byte swap
948 * passthru option to convert from big endian to little endian.
950 ret = ccp_init_dm_workarea(&ctx, cmd_q,
951 CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
956 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
957 ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
958 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
959 CCP_PASSTHRU_BYTESWAP_256BIT);
961 cmd->engine_error = cmd_q->cmd_error;
965 /* Send data to the CCP AES engine */
966 ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
967 AES_BLOCK_SIZE, DMA_TO_DEVICE);
971 while (src.sg_wa.bytes_left) {
972 ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true);
973 if (aes->cmac_final && !src.sg_wa.bytes_left) {
976 /* Push the K1/K2 key to the CCP now */
977 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid,
979 CCP_PASSTHRU_BYTESWAP_256BIT);
981 cmd->engine_error = cmd_q->cmd_error;
985 ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
987 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
988 CCP_PASSTHRU_BYTESWAP_256BIT);
990 cmd->engine_error = cmd_q->cmd_error;
995 ret = ccp_perform_aes(&op);
997 cmd->engine_error = cmd_q->cmd_error;
1001 ccp_process_data(&src, NULL, &op);
1004 /* Retrieve the AES context - convert from LE to BE using
1005 * 32-byte (256-bit) byteswapping
1007 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1008 CCP_PASSTHRU_BYTESWAP_256BIT);
1010 cmd->engine_error = cmd_q->cmd_error;
1014 /* ...but we only need AES_BLOCK_SIZE bytes */
1015 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1016 ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1019 ccp_free_data(&src, cmd_q);
1030 static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1032 struct ccp_aes_engine *aes = &cmd->u.aes;
1033 struct ccp_dm_workarea key, ctx;
1034 struct ccp_data src, dst;
1036 unsigned int dm_offset;
1037 bool in_place = false;
1040 if (aes->mode == CCP_AES_MODE_CMAC)
1041 return ccp_run_aes_cmac_cmd(cmd_q, cmd);
1043 if (!((aes->key_len == AES_KEYSIZE_128) ||
1044 (aes->key_len == AES_KEYSIZE_192) ||
1045 (aes->key_len == AES_KEYSIZE_256)))
1048 if (((aes->mode == CCP_AES_MODE_ECB) ||
1049 (aes->mode == CCP_AES_MODE_CBC) ||
1050 (aes->mode == CCP_AES_MODE_CFB)) &&
1051 (aes->src_len & (AES_BLOCK_SIZE - 1)))
1054 if (!aes->key || !aes->src || !aes->dst)
1057 if (aes->mode != CCP_AES_MODE_ECB) {
1058 if (aes->iv_len != AES_BLOCK_SIZE)
1065 BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
1066 BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
1069 memset(&op, 0, sizeof(op));
1071 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1072 op.ksb_key = cmd_q->ksb_key;
1073 op.ksb_ctx = cmd_q->ksb_ctx;
1074 op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
1075 op.u.aes.type = aes->type;
1076 op.u.aes.mode = aes->mode;
1077 op.u.aes.action = aes->action;
1079 /* All supported key sizes fit in a single (32-byte) KSB entry
1080 * and must be in little endian format. Use the 256-bit byte
1081 * swap passthru option to convert from big endian to little
1084 ret = ccp_init_dm_workarea(&key, cmd_q,
1085 CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1090 dm_offset = CCP_KSB_BYTES - aes->key_len;
1091 ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
1092 ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1093 CCP_PASSTHRU_BYTESWAP_256BIT);
1095 cmd->engine_error = cmd_q->cmd_error;
1099 /* The AES context fits in a single (32-byte) KSB entry and
1100 * must be in little endian format. Use the 256-bit byte swap
1101 * passthru option to convert from big endian to little endian.
1103 ret = ccp_init_dm_workarea(&ctx, cmd_q,
1104 CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1109 if (aes->mode != CCP_AES_MODE_ECB) {
1110 /* Load the AES context - conver to LE */
1111 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1112 ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1113 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1114 CCP_PASSTHRU_BYTESWAP_256BIT);
1116 cmd->engine_error = cmd_q->cmd_error;
1121 /* Prepare the input and output data workareas. For in-place
1122 * operations we need to set the dma direction to BIDIRECTIONAL
1123 * and copy the src workarea to the dst workarea.
1125 if (sg_virt(aes->src) == sg_virt(aes->dst))
1128 ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
1130 in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1137 ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
1138 AES_BLOCK_SIZE, DMA_FROM_DEVICE);
1143 /* Send data to the CCP AES engine */
1144 while (src.sg_wa.bytes_left) {
1145 ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
1146 if (!src.sg_wa.bytes_left) {
1149 /* Since we don't retrieve the AES context in ECB
1150 * mode we have to wait for the operation to complete
1151 * on the last piece of data
1153 if (aes->mode == CCP_AES_MODE_ECB)
1157 ret = ccp_perform_aes(&op);
1159 cmd->engine_error = cmd_q->cmd_error;
1163 ccp_process_data(&src, &dst, &op);
1166 if (aes->mode != CCP_AES_MODE_ECB) {
1167 /* Retrieve the AES context - convert from LE to BE using
1168 * 32-byte (256-bit) byteswapping
1170 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1171 CCP_PASSTHRU_BYTESWAP_256BIT);
1173 cmd->engine_error = cmd_q->cmd_error;
1177 /* ...but we only need AES_BLOCK_SIZE bytes */
1178 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1179 ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1184 ccp_free_data(&dst, cmd_q);
1187 ccp_free_data(&src, cmd_q);
1198 static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
1199 struct ccp_cmd *cmd)
1201 struct ccp_xts_aes_engine *xts = &cmd->u.xts;
1202 struct ccp_dm_workarea key, ctx;
1203 struct ccp_data src, dst;
1205 unsigned int unit_size, dm_offset;
1206 bool in_place = false;
1209 switch (xts->unit_size) {
1210 case CCP_XTS_AES_UNIT_SIZE_16:
1213 case CCP_XTS_AES_UNIT_SIZE_512:
1216 case CCP_XTS_AES_UNIT_SIZE_1024:
1219 case CCP_XTS_AES_UNIT_SIZE_2048:
1222 case CCP_XTS_AES_UNIT_SIZE_4096:
1230 if (xts->key_len != AES_KEYSIZE_128)
1233 if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
1236 if (xts->iv_len != AES_BLOCK_SIZE)
1239 if (!xts->key || !xts->iv || !xts->src || !xts->dst)
1242 BUILD_BUG_ON(CCP_XTS_AES_KEY_KSB_COUNT != 1);
1243 BUILD_BUG_ON(CCP_XTS_AES_CTX_KSB_COUNT != 1);
1246 memset(&op, 0, sizeof(op));
1248 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1249 op.ksb_key = cmd_q->ksb_key;
1250 op.ksb_ctx = cmd_q->ksb_ctx;
1252 op.u.xts.action = xts->action;
1253 op.u.xts.unit_size = xts->unit_size;
1255 /* All supported key sizes fit in a single (32-byte) KSB entry
1256 * and must be in little endian format. Use the 256-bit byte
1257 * swap passthru option to convert from big endian to little
1260 ret = ccp_init_dm_workarea(&key, cmd_q,
1261 CCP_XTS_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1266 dm_offset = CCP_KSB_BYTES - AES_KEYSIZE_128;
1267 ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
1268 ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len);
1269 ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1270 CCP_PASSTHRU_BYTESWAP_256BIT);
1272 cmd->engine_error = cmd_q->cmd_error;
1276 /* The AES context fits in a single (32-byte) KSB entry and
1277 * for XTS is already in little endian format so no byte swapping
1280 ret = ccp_init_dm_workarea(&ctx, cmd_q,
1281 CCP_XTS_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1286 ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
1287 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1288 CCP_PASSTHRU_BYTESWAP_NOOP);
1290 cmd->engine_error = cmd_q->cmd_error;
1294 /* Prepare the input and output data workareas. For in-place
1295 * operations we need to set the dma direction to BIDIRECTIONAL
1296 * and copy the src workarea to the dst workarea.
1298 if (sg_virt(xts->src) == sg_virt(xts->dst))
1301 ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len,
1303 in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1310 ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
1311 unit_size, DMA_FROM_DEVICE);
1316 /* Send data to the CCP AES engine */
1317 while (src.sg_wa.bytes_left) {
1318 ccp_prepare_data(&src, &dst, &op, unit_size, true);
1319 if (!src.sg_wa.bytes_left)
1322 ret = ccp_perform_xts_aes(&op);
1324 cmd->engine_error = cmd_q->cmd_error;
1328 ccp_process_data(&src, &dst, &op);
1331 /* Retrieve the AES context - convert from LE to BE using
1332 * 32-byte (256-bit) byteswapping
1334 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1335 CCP_PASSTHRU_BYTESWAP_256BIT);
1337 cmd->engine_error = cmd_q->cmd_error;
1341 /* ...but we only need AES_BLOCK_SIZE bytes */
1342 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1343 ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
1347 ccp_free_data(&dst, cmd_q);
1350 ccp_free_data(&src, cmd_q);
1361 static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1363 struct ccp_sha_engine *sha = &cmd->u.sha;
1364 struct ccp_dm_workarea ctx;
1365 struct ccp_data src;
1369 if (sha->ctx_len != CCP_SHA_CTXSIZE)
1375 if (!sha->final && (sha->src_len & (CCP_SHA_BLOCKSIZE - 1)))
1378 if (!sha->src_len) {
1381 /* Not final, just return */
1385 /* CCP can't do a zero length sha operation so the caller
1386 * must buffer the data.
1391 /* A sha operation for a message with a total length of zero,
1392 * return known result.
1394 switch (sha->type) {
1395 case CCP_SHA_TYPE_1:
1396 sha_zero = ccp_sha1_zero;
1398 case CCP_SHA_TYPE_224:
1399 sha_zero = ccp_sha224_zero;
1401 case CCP_SHA_TYPE_256:
1402 sha_zero = ccp_sha256_zero;
1408 scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
1417 BUILD_BUG_ON(CCP_SHA_KSB_COUNT != 1);
1419 memset(&op, 0, sizeof(op));
1421 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1422 op.ksb_ctx = cmd_q->ksb_ctx;
1423 op.u.sha.type = sha->type;
1424 op.u.sha.msg_bits = sha->msg_bits;
1426 /* The SHA context fits in a single (32-byte) KSB entry and
1427 * must be in little endian format. Use the 256-bit byte swap
1428 * passthru option to convert from big endian to little endian.
1430 ret = ccp_init_dm_workarea(&ctx, cmd_q,
1431 CCP_SHA_KSB_COUNT * CCP_KSB_BYTES,
1439 switch (sha->type) {
1440 case CCP_SHA_TYPE_1:
1441 init = ccp_sha1_init;
1443 case CCP_SHA_TYPE_224:
1444 init = ccp_sha224_init;
1446 case CCP_SHA_TYPE_256:
1447 init = ccp_sha256_init;
1453 memcpy(ctx.address, init, CCP_SHA_CTXSIZE);
1455 ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1458 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1459 CCP_PASSTHRU_BYTESWAP_256BIT);
1461 cmd->engine_error = cmd_q->cmd_error;
1465 /* Send data to the CCP SHA engine */
1466 ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
1467 CCP_SHA_BLOCKSIZE, DMA_TO_DEVICE);
1471 while (src.sg_wa.bytes_left) {
1472 ccp_prepare_data(&src, NULL, &op, CCP_SHA_BLOCKSIZE, false);
1473 if (sha->final && !src.sg_wa.bytes_left)
1476 ret = ccp_perform_sha(&op);
1478 cmd->engine_error = cmd_q->cmd_error;
1482 ccp_process_data(&src, NULL, &op);
1485 /* Retrieve the SHA context - convert from LE to BE using
1486 * 32-byte (256-bit) byteswapping to BE
1488 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1489 CCP_PASSTHRU_BYTESWAP_256BIT);
1491 cmd->engine_error = cmd_q->cmd_error;
1495 ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1497 if (sha->final && sha->opad) {
1498 /* HMAC operation, recursively perform final SHA */
1499 struct ccp_cmd hmac_cmd;
1500 struct scatterlist sg;
1501 u64 block_size, digest_size;
1504 switch (sha->type) {
1505 case CCP_SHA_TYPE_1:
1506 block_size = SHA1_BLOCK_SIZE;
1507 digest_size = SHA1_DIGEST_SIZE;
1509 case CCP_SHA_TYPE_224:
1510 block_size = SHA224_BLOCK_SIZE;
1511 digest_size = SHA224_DIGEST_SIZE;
1513 case CCP_SHA_TYPE_256:
1514 block_size = SHA256_BLOCK_SIZE;
1515 digest_size = SHA256_DIGEST_SIZE;
1522 if (sha->opad_len != block_size) {
1527 hmac_buf = kmalloc(block_size + digest_size, GFP_KERNEL);
1532 sg_init_one(&sg, hmac_buf, block_size + digest_size);
1534 scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0);
1535 memcpy(hmac_buf + block_size, ctx.address, digest_size);
1537 memset(&hmac_cmd, 0, sizeof(hmac_cmd));
1538 hmac_cmd.engine = CCP_ENGINE_SHA;
1539 hmac_cmd.u.sha.type = sha->type;
1540 hmac_cmd.u.sha.ctx = sha->ctx;
1541 hmac_cmd.u.sha.ctx_len = sha->ctx_len;
1542 hmac_cmd.u.sha.src = &sg;
1543 hmac_cmd.u.sha.src_len = block_size + digest_size;
1544 hmac_cmd.u.sha.opad = NULL;
1545 hmac_cmd.u.sha.opad_len = 0;
1546 hmac_cmd.u.sha.first = 1;
1547 hmac_cmd.u.sha.final = 1;
1548 hmac_cmd.u.sha.msg_bits = (block_size + digest_size) << 3;
1550 ret = ccp_run_sha_cmd(cmd_q, &hmac_cmd);
1552 cmd->engine_error = hmac_cmd.engine_error;
1558 ccp_free_data(&src, cmd_q);
1566 static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1568 struct ccp_rsa_engine *rsa = &cmd->u.rsa;
1569 struct ccp_dm_workarea exp, src;
1570 struct ccp_data dst;
1572 unsigned int ksb_count, i_len, o_len;
1575 if (rsa->key_size > CCP_RSA_MAX_WIDTH)
1578 if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
1581 /* The RSA modulus must precede the message being acted upon, so
1582 * it must be copied to a DMA area where the message and the
1583 * modulus can be concatenated. Therefore the input buffer
1584 * length required is twice the output buffer length (which
1585 * must be a multiple of 256-bits).
1587 o_len = ((rsa->key_size + 255) / 256) * 32;
1590 ksb_count = o_len / CCP_KSB_BYTES;
1592 memset(&op, 0, sizeof(op));
1594 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1595 op.ksb_key = ccp_alloc_ksb(cmd_q->ccp, ksb_count);
1599 /* The RSA exponent may span multiple (32-byte) KSB entries and must
1600 * be in little endian format. Reverse copy each 32-byte chunk
1601 * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
1602 * and each byte within that chunk and do not perform any byte swap
1603 * operations on the passthru operation.
1605 ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
1609 ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len, CCP_KSB_BYTES,
1611 ret = ccp_copy_to_ksb(cmd_q, &exp, op.jobid, op.ksb_key,
1612 CCP_PASSTHRU_BYTESWAP_NOOP);
1614 cmd->engine_error = cmd_q->cmd_error;
1618 /* Concatenate the modulus and the message. Both the modulus and
1619 * the operands must be in little endian format. Since the input
1620 * is in big endian format it must be converted.
1622 ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE);
1626 ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len, CCP_KSB_BYTES,
1628 src.address += o_len; /* Adjust the address for the copy operation */
1629 ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len, CCP_KSB_BYTES,
1631 src.address -= o_len; /* Reset the address to original value */
1633 /* Prepare the output area for the operation */
1634 ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len,
1635 o_len, DMA_FROM_DEVICE);
1640 op.src.u.dma.address = src.dma.address;
1641 op.src.u.dma.offset = 0;
1642 op.src.u.dma.length = i_len;
1643 op.dst.u.dma.address = dst.dm_wa.dma.address;
1644 op.dst.u.dma.offset = 0;
1645 op.dst.u.dma.length = o_len;
1647 op.u.rsa.mod_size = rsa->key_size;
1648 op.u.rsa.input_len = i_len;
1650 ret = ccp_perform_rsa(&op);
1652 cmd->engine_error = cmd_q->cmd_error;
1656 ccp_reverse_get_dm_area(&dst.dm_wa, rsa->dst, rsa->mod_len);
1659 ccp_free_data(&dst, cmd_q);
1668 ccp_free_ksb(cmd_q->ccp, op.ksb_key, ksb_count);
1673 static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
1674 struct ccp_cmd *cmd)
1676 struct ccp_passthru_engine *pt = &cmd->u.passthru;
1677 struct ccp_dm_workarea mask;
1678 struct ccp_data src, dst;
1680 bool in_place = false;
1684 if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
1687 if (!pt->src || !pt->dst)
1690 if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1691 if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
1697 BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1);
1699 memset(&op, 0, sizeof(op));
1701 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1703 if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1705 op.ksb_key = cmd_q->ksb_key;
1707 ret = ccp_init_dm_workarea(&mask, cmd_q,
1708 CCP_PASSTHRU_KSB_COUNT *
1714 ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
1715 ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key,
1716 CCP_PASSTHRU_BYTESWAP_NOOP);
1718 cmd->engine_error = cmd_q->cmd_error;
1723 /* Prepare the input and output data workareas. For in-place
1724 * operations we need to set the dma direction to BIDIRECTIONAL
1725 * and copy the src workarea to the dst workarea.
1727 if (sg_virt(pt->src) == sg_virt(pt->dst))
1730 ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len,
1731 CCP_PASSTHRU_MASKSIZE,
1732 in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1739 ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
1740 CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
1745 /* Send data to the CCP Passthru engine
1746 * Because the CCP engine works on a single source and destination
1747 * dma address at a time, each entry in the source scatterlist
1748 * (after the dma_map_sg call) must be less than or equal to the
1749 * (remaining) length in the destination scatterlist entry and the
1750 * length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
1752 dst.sg_wa.sg_used = 0;
1753 for (i = 1; i <= src.sg_wa.dma_count; i++) {
1754 if (!dst.sg_wa.sg ||
1755 (dst.sg_wa.sg->length < src.sg_wa.sg->length)) {
1760 if (i == src.sg_wa.dma_count) {
1765 op.src.type = CCP_MEMTYPE_SYSTEM;
1766 op.src.u.dma.address = sg_dma_address(src.sg_wa.sg);
1767 op.src.u.dma.offset = 0;
1768 op.src.u.dma.length = sg_dma_len(src.sg_wa.sg);
1770 op.dst.type = CCP_MEMTYPE_SYSTEM;
1771 op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
1772 op.dst.u.dma.offset = dst.sg_wa.sg_used;
1773 op.dst.u.dma.length = op.src.u.dma.length;
1775 ret = ccp_perform_passthru(&op);
1777 cmd->engine_error = cmd_q->cmd_error;
1781 dst.sg_wa.sg_used += src.sg_wa.sg->length;
1782 if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) {
1783 dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
1784 dst.sg_wa.sg_used = 0;
1786 src.sg_wa.sg = sg_next(src.sg_wa.sg);
1791 ccp_free_data(&dst, cmd_q);
1794 ccp_free_data(&src, cmd_q);
1797 if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
1803 static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1805 struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1806 struct ccp_dm_workarea src, dst;
1811 if (!ecc->u.mm.operand_1 ||
1812 (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES))
1815 if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT)
1816 if (!ecc->u.mm.operand_2 ||
1817 (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES))
1820 if (!ecc->u.mm.result ||
1821 (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES))
1824 memset(&op, 0, sizeof(op));
1826 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1828 /* Concatenate the modulus and the operands. Both the modulus and
1829 * the operands must be in little endian format. Since the input
1830 * is in big endian format it must be converted and placed in a
1831 * fixed length buffer.
1833 ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1838 /* Save the workarea address since it is updated in order to perform
1843 /* Copy the ECC modulus */
1844 ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1845 CCP_ECC_OPERAND_SIZE, false);
1846 src.address += CCP_ECC_OPERAND_SIZE;
1848 /* Copy the first operand */
1849 ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_1,
1850 ecc->u.mm.operand_1_len,
1851 CCP_ECC_OPERAND_SIZE, false);
1852 src.address += CCP_ECC_OPERAND_SIZE;
1854 if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
1855 /* Copy the second operand */
1856 ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_2,
1857 ecc->u.mm.operand_2_len,
1858 CCP_ECC_OPERAND_SIZE, false);
1859 src.address += CCP_ECC_OPERAND_SIZE;
1862 /* Restore the workarea address */
1865 /* Prepare the output area for the operation */
1866 ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
1872 op.src.u.dma.address = src.dma.address;
1873 op.src.u.dma.offset = 0;
1874 op.src.u.dma.length = src.length;
1875 op.dst.u.dma.address = dst.dma.address;
1876 op.dst.u.dma.offset = 0;
1877 op.dst.u.dma.length = dst.length;
1879 op.u.ecc.function = cmd->u.ecc.function;
1881 ret = ccp_perform_ecc(&op);
1883 cmd->engine_error = cmd_q->cmd_error;
1887 ecc->ecc_result = le16_to_cpup(
1888 (const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
1889 if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
1894 /* Save the ECC result */
1895 ccp_reverse_get_dm_area(&dst, ecc->u.mm.result, CCP_ECC_MODULUS_BYTES);
1906 static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1908 struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1909 struct ccp_dm_workarea src, dst;
1914 if (!ecc->u.pm.point_1.x ||
1915 (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) ||
1916 !ecc->u.pm.point_1.y ||
1917 (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES))
1920 if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1921 if (!ecc->u.pm.point_2.x ||
1922 (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) ||
1923 !ecc->u.pm.point_2.y ||
1924 (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES))
1927 if (!ecc->u.pm.domain_a ||
1928 (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES))
1931 if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT)
1932 if (!ecc->u.pm.scalar ||
1933 (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES))
1937 if (!ecc->u.pm.result.x ||
1938 (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) ||
1939 !ecc->u.pm.result.y ||
1940 (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES))
1943 memset(&op, 0, sizeof(op));
1945 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1947 /* Concatenate the modulus and the operands. Both the modulus and
1948 * the operands must be in little endian format. Since the input
1949 * is in big endian format it must be converted and placed in a
1950 * fixed length buffer.
1952 ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1957 /* Save the workarea address since it is updated in order to perform
1962 /* Copy the ECC modulus */
1963 ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1964 CCP_ECC_OPERAND_SIZE, false);
1965 src.address += CCP_ECC_OPERAND_SIZE;
1967 /* Copy the first point X and Y coordinate */
1968 ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.x,
1969 ecc->u.pm.point_1.x_len,
1970 CCP_ECC_OPERAND_SIZE, false);
1971 src.address += CCP_ECC_OPERAND_SIZE;
1972 ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.y,
1973 ecc->u.pm.point_1.y_len,
1974 CCP_ECC_OPERAND_SIZE, false);
1975 src.address += CCP_ECC_OPERAND_SIZE;
1977 /* Set the first point Z coordianate to 1 */
1978 *src.address = 0x01;
1979 src.address += CCP_ECC_OPERAND_SIZE;
1981 if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1982 /* Copy the second point X and Y coordinate */
1983 ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.x,
1984 ecc->u.pm.point_2.x_len,
1985 CCP_ECC_OPERAND_SIZE, false);
1986 src.address += CCP_ECC_OPERAND_SIZE;
1987 ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.y,
1988 ecc->u.pm.point_2.y_len,
1989 CCP_ECC_OPERAND_SIZE, false);
1990 src.address += CCP_ECC_OPERAND_SIZE;
1992 /* Set the second point Z coordianate to 1 */
1993 *src.address = 0x01;
1994 src.address += CCP_ECC_OPERAND_SIZE;
1996 /* Copy the Domain "a" parameter */
1997 ccp_reverse_set_dm_area(&src, ecc->u.pm.domain_a,
1998 ecc->u.pm.domain_a_len,
1999 CCP_ECC_OPERAND_SIZE, false);
2000 src.address += CCP_ECC_OPERAND_SIZE;
2002 if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
2003 /* Copy the scalar value */
2004 ccp_reverse_set_dm_area(&src, ecc->u.pm.scalar,
2005 ecc->u.pm.scalar_len,
2006 CCP_ECC_OPERAND_SIZE, false);
2007 src.address += CCP_ECC_OPERAND_SIZE;
2011 /* Restore the workarea address */
2014 /* Prepare the output area for the operation */
2015 ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
2021 op.src.u.dma.address = src.dma.address;
2022 op.src.u.dma.offset = 0;
2023 op.src.u.dma.length = src.length;
2024 op.dst.u.dma.address = dst.dma.address;
2025 op.dst.u.dma.offset = 0;
2026 op.dst.u.dma.length = dst.length;
2028 op.u.ecc.function = cmd->u.ecc.function;
2030 ret = ccp_perform_ecc(&op);
2032 cmd->engine_error = cmd_q->cmd_error;
2036 ecc->ecc_result = le16_to_cpup(
2037 (const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
2038 if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
2043 /* Save the workarea address since it is updated as we walk through
2044 * to copy the point math result
2048 /* Save the ECC result X and Y coordinates */
2049 ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.x,
2050 CCP_ECC_MODULUS_BYTES);
2051 dst.address += CCP_ECC_OUTPUT_SIZE;
2052 ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.y,
2053 CCP_ECC_MODULUS_BYTES);
2054 dst.address += CCP_ECC_OUTPUT_SIZE;
2056 /* Restore the workarea address */
2068 static int ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2070 struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2072 ecc->ecc_result = 0;
2075 (ecc->mod_len > CCP_ECC_MODULUS_BYTES))
2078 switch (ecc->function) {
2079 case CCP_ECC_FUNCTION_MMUL_384BIT:
2080 case CCP_ECC_FUNCTION_MADD_384BIT:
2081 case CCP_ECC_FUNCTION_MINV_384BIT:
2082 return ccp_run_ecc_mm_cmd(cmd_q, cmd);
2084 case CCP_ECC_FUNCTION_PADD_384BIT:
2085 case CCP_ECC_FUNCTION_PMUL_384BIT:
2086 case CCP_ECC_FUNCTION_PDBL_384BIT:
2087 return ccp_run_ecc_pm_cmd(cmd_q, cmd);
2094 int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2098 cmd->engine_error = 0;
2099 cmd_q->cmd_error = 0;
2100 cmd_q->int_rcvd = 0;
2101 cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
2103 switch (cmd->engine) {
2104 case CCP_ENGINE_AES:
2105 ret = ccp_run_aes_cmd(cmd_q, cmd);
2107 case CCP_ENGINE_XTS_AES_128:
2108 ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
2110 case CCP_ENGINE_SHA:
2111 ret = ccp_run_sha_cmd(cmd_q, cmd);
2113 case CCP_ENGINE_RSA:
2114 ret = ccp_run_rsa_cmd(cmd_q, cmd);
2116 case CCP_ENGINE_PASSTHRU:
2117 ret = ccp_run_passthru_cmd(cmd_q, cmd);
2119 case CCP_ENGINE_ECC:
2120 ret = ccp_run_ecc_cmd(cmd_q, cmd);