From e09a39044924f7497820ca704e0dffc42d49c01b Mon Sep 17 00:00:00 2001 From: Akhil R Date: Wed, 11 Dec 2024 12:45:16 +0530 Subject: [PATCH] crypto: tegra: Fix HASH intermediate result handling The intermediate hash values generated during an update task were handled incorrectly in the driver. The values have a defined format for each algorithm. Copying and pasting from the HASH_RESULT register balantly would not work for all the supported algorithms. This incorrect handling causes failures when there is a context switch between multiple operations. To handle the expected format correctly, add a separate buffer for storing the intermediate results for each request. Remove the previous copy/paste functions which read/wrote to the registers directly. Instead configure the hardware to get the intermediate result copied to the buffer and use host1x path to restore the intermediate hash results. Bug 4883011 Signed-off-by: Akhil R Change-Id: I54952620906ccfd57d560c4619d17211f67b9ac3 Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/3328440 GVS: buildbot_gerritrpt Reviewed-by: Laxman Dewangan --- drivers/crypto/tegra/tegra-se-aes.c | 19 ++-- drivers/crypto/tegra/tegra-se-hash.c | 145 ++++++++++++++++++--------- drivers/crypto/tegra/tegra-se-sm4.c | 52 +++++++--- drivers/crypto/tegra/tegra-se.h | 1 + 4 files changed, 142 insertions(+), 75 deletions(-) diff --git a/drivers/crypto/tegra/tegra-se-aes.c b/drivers/crypto/tegra/tegra-se-aes.c index 9b27fad7..e756f7fe 100644 --- a/drivers/crypto/tegra/tegra-se-aes.c +++ b/drivers/crypto/tegra/tegra-se-aes.c @@ -1894,23 +1894,15 @@ static int tegra_cmac_do_update(struct ahash_request *req) rctx->residue.size = nresidue; /* - * If this is not the first 'update' call, paste the previous copied + * If this is not the first task, paste the previous copied * intermediate results to the registers so that it gets picked up. - * This is to support the import/export functionality. */ if (!(rctx->task & SHA_FIRST)) tegra_cmac_paste_result(ctx->se, rctx); cmdlen = tegra_cmac_prep_cmd(se, rctx); - ret = tegra_se_host1x_submit(se, se->cmdbuf, cmdlen); - /* - * If this is not the final update, copy the intermediate results - * from the registers so that it can be used in the next 'update' - * call. This is to support the import/export functionality. - */ - if (!(rctx->task & SHA_FINAL)) - tegra_cmac_copy_result(ctx->se, rctx); + tegra_cmac_copy_result(ctx->se, rctx); return ret; } @@ -1943,6 +1935,13 @@ static int tegra_cmac_do_final(struct ahash_request *req) rctx->total_len += rctx->residue.size; rctx->config = se->regcfg->cfg(ctx->final_alg, 0); + /* + * If this is not the first task, paste the previous copied + * intermediate results to the registers so that it gets picked up. + */ + if (!(rctx->task & SHA_FIRST)) + tegra_cmac_paste_result(ctx->se, rctx); + /* Prepare command and submit */ cmdlen = tegra_cmac_prep_cmd(se, rctx); ret = tegra_se_host1x_submit(se, se->cmdbuf, cmdlen); diff --git a/drivers/crypto/tegra/tegra-se-hash.c b/drivers/crypto/tegra/tegra-se-hash.c index a0e32a96..06e9a5e0 100644 --- a/drivers/crypto/tegra/tegra-se-hash.c +++ b/drivers/crypto/tegra/tegra-se-hash.c @@ -39,6 +39,7 @@ struct tegra_sha_reqctx { struct tegra_se_datbuf datbuf; struct tegra_se_datbuf residue; struct tegra_se_datbuf digest; + struct tegra_se_datbuf intr_res; unsigned int alg; unsigned int config; unsigned int total_len; @@ -221,9 +222,62 @@ static int tegra_sha_fallback_export(struct ahash_request *req, void *out) return crypto_ahash_export(&rctx->fallback_req, out); } -static int tegra_sha_prep_cmd(struct tegra_se *se, u32 *cpuvaddr, +static int tegra_se_insert_hash_result(struct tegra_sha_ctx *ctx, u32 *cpuvaddr, + struct tegra_sha_reqctx *rctx) +{ + __be32 *res_be = (__be32 *)rctx->intr_res.buf; + u32 *res = (u32 *)rctx->intr_res.buf; + int i = 0, j; + + cpuvaddr[i++] = 0; + cpuvaddr[i++] = host1x_opcode_setpayload(HASH_RESULT_REG_COUNT); + cpuvaddr[i++] = se_host1x_opcode_incr_w(SE_SHA_HASH_RESULT); + + for (j = 0; j < HASH_RESULT_REG_COUNT; j++) { + int idx = j; + + /* + * The initial, intermediate and final hash value of SHA-384, SHA-512 + * in SHA_HASH_RESULT registers follow the below layout of bytes. + * + * +---------------+------------+ + * | HASH_RESULT_0 | B4...B7 | + * +---------------+------------+ + * | HASH_RESULT_1 | B0...B3 | + * +---------------+------------+ + * | HASH_RESULT_2 | B12...B15 | + * +---------------+------------+ + * | HASH_RESULT_3 | B8...B11 | + * +---------------+------------+ + * | ...... | + * +---------------+------------+ + * | HASH_RESULT_14| B60...B63 | + * +---------------+------------+ + * | HASH_RESULT_15| B56...B59 | + * +---------------+------------+ + * + */ + if (ctx->alg == SE_ALG_SHA384 || ctx->alg == SE_ALG_SHA512) + idx = (j % 2) ? j - 1 : j + 1; + + /* For SHA-1, SHA-224, SHA-256, SHA-384, SHA-512 the initial + * intermediate and final hash value when stored in + * SHA_HASH_RESULT registers, the byte order is NOT in + * little-endian. + */ + if (ctx->alg <= SE_ALG_SHA512) + cpuvaddr[i++] = be32_to_cpu(res_be[idx]); + else + cpuvaddr[i++] = res[idx]; + } + + return i; +} + +static int tegra_sha_prep_cmd(struct tegra_sha_ctx *ctx, u32 *cpuvaddr, struct tegra_sha_reqctx *rctx) { + struct tegra_se *se = ctx->se; u64 msg_len, msg_left; int i = 0; @@ -251,7 +305,7 @@ static int tegra_sha_prep_cmd(struct tegra_se *se, u32 *cpuvaddr, cpuvaddr[i++] = upper_32_bits(msg_left); cpuvaddr[i++] = 0; cpuvaddr[i++] = 0; - cpuvaddr[i++] = host1x_opcode_setpayload(6); + cpuvaddr[i++] = host1x_opcode_setpayload(2); cpuvaddr[i++] = se_host1x_opcode_incr_w(SE_SHA_CFG); cpuvaddr[i++] = rctx->config; @@ -259,15 +313,29 @@ static int tegra_sha_prep_cmd(struct tegra_se *se, u32 *cpuvaddr, cpuvaddr[i++] = SE_SHA_TASK_HASH_INIT; rctx->task &= ~SHA_FIRST; } else { - cpuvaddr[i++] = 0; + /* + * If it isn't the first task, program the HASH_RESULT register + * with the intermediate result from the previous task + */ + i += tegra_se_insert_hash_result(ctx, cpuvaddr + i, rctx); } + cpuvaddr[i++] = host1x_opcode_setpayload(4); + cpuvaddr[i++] = se_host1x_opcode_incr_w(SE_SHA_IN_ADDR); cpuvaddr[i++] = rctx->datbuf.addr; cpuvaddr[i++] = (u32)(SE_ADDR_HI_MSB(upper_32_bits(rctx->datbuf.addr)) | SE_ADDR_HI_SZ(rctx->datbuf.size)); - cpuvaddr[i++] = rctx->digest.addr; - cpuvaddr[i++] = (u32)(SE_ADDR_HI_MSB(upper_32_bits(rctx->digest.addr)) | - SE_ADDR_HI_SZ(rctx->digest.size)); + + if (rctx->task & SHA_UPDATE) { + cpuvaddr[i++] = rctx->intr_res.addr; + cpuvaddr[i++] = (u32)(SE_ADDR_HI_MSB(upper_32_bits(rctx->intr_res.addr)) | + SE_ADDR_HI_SZ(rctx->intr_res.size)); + } else { + cpuvaddr[i++] = rctx->digest.addr; + cpuvaddr[i++] = (u32)(SE_ADDR_HI_MSB(upper_32_bits(rctx->digest.addr)) | + SE_ADDR_HI_SZ(rctx->digest.size)); + } + if (rctx->key_id) { cpuvaddr[i++] = host1x_opcode_setpayload(1); cpuvaddr[i++] = se_host1x_opcode_nonincr_w(SE_SHA_CRYPTO_CFG); @@ -276,36 +344,18 @@ static int tegra_sha_prep_cmd(struct tegra_se *se, u32 *cpuvaddr, cpuvaddr[i++] = host1x_opcode_setpayload(1); cpuvaddr[i++] = se_host1x_opcode_nonincr_w(SE_SHA_OPERATION); - cpuvaddr[i++] = SE_SHA_OP_WRSTALL | - SE_SHA_OP_START | + cpuvaddr[i++] = SE_SHA_OP_WRSTALL | SE_SHA_OP_START | SE_SHA_OP_LASTBUF; cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1); cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) | host1x_uclass_incr_syncpt_indx_f(se->syncpt_id); - dev_dbg(se->dev, "msg len %llu msg left %llu cfg %#x", - msg_len, msg_left, rctx->config); + dev_dbg(se->dev, "msg len %llu msg left %llu sz %lu cfg %#x", + msg_len, msg_left, rctx->datbuf.size, rctx->config); return i; } -static void tegra_sha_copy_hash_result(struct tegra_se *se, struct tegra_sha_reqctx *rctx) -{ - int i; - - for (i = 0; i < HASH_RESULT_REG_COUNT; i++) - rctx->result[i] = readl(se->base + se->hw->regs->result + (i * 4)); -} - -static void tegra_sha_paste_hash_result(struct tegra_se *se, struct tegra_sha_reqctx *rctx) -{ - int i; - - for (i = 0; i < HASH_RESULT_REG_COUNT; i++) - writel(rctx->result[i], - se->base + se->hw->regs->result + (i * 4)); -} - static int tegra_sha_do_init(struct ahash_request *req) { struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); @@ -335,8 +385,17 @@ static int tegra_sha_do_init(struct ahash_request *req) if (!rctx->residue.buf) goto resbuf_fail; + rctx->intr_res.size = HASH_RESULT_REG_COUNT * 4; + rctx->intr_res.buf = dma_alloc_coherent(se->dev, rctx->intr_res.size, + &rctx->intr_res.addr, GFP_KERNEL); + if (!rctx->intr_res.buf) + goto intr_res_fail; + return 0; +intr_res_fail: + dma_free_coherent(se->dev, rctx->residue.size, rctx->residue.buf, + rctx->residue.addr); resbuf_fail: dma_free_coherent(se->dev, rctx->digest.size, rctx->digest.buf, rctx->digest.addr); @@ -375,12 +434,12 @@ static int tegra_sha_do_update(struct ahash_request *req) if (nblks < 1) { scatterwalk_map_and_copy(rctx->residue.buf + rctx->residue.size, rctx->src_sg, 0, req->nbytes, 0); - rctx->residue.size += req->nbytes; + return 0; } - rctx->datbuf.buf = dma_alloc_coherent(ctx->se->dev, rctx->datbuf.size, + rctx->datbuf.buf = dma_alloc_coherent(se->dev, rctx->datbuf.size, &rctx->datbuf.addr, GFP_KERNEL); if (!rctx->datbuf.buf) return -ENOMEM; @@ -399,28 +458,12 @@ static int tegra_sha_do_update(struct ahash_request *req) rctx->residue.size = nresidue; rctx->config = tegra_sha_get_config(rctx->alg) | - SE_SHA_DST_HASH_REG; + SE_SHA_DST_MEMORY; - /* - * If this is not the first 'update' call, paste the previous copied - * intermediate results to the registers so that it gets picked up. - * This is to support the import/export functionality. - */ - if (!(rctx->task & SHA_FIRST)) - tegra_sha_paste_hash_result(se, rctx); - - cmdlen = tegra_sha_prep_cmd(se, cpuvaddr, rctx); + cmdlen = tegra_sha_prep_cmd(ctx, cpuvaddr, rctx); ret = tegra_se_host1x_submit(se, se->cmdbuf, cmdlen); - /* - * If this is not the final update, copy the intermediate results - * from the registers so that it can be used in the next 'update' - * call. This is to support the import/export functionality. - */ - if (!(rctx->task & SHA_FINAL)) - tegra_sha_copy_hash_result(se, rctx); - - dma_free_coherent(ctx->se->dev, rctx->datbuf.size, + dma_free_coherent(se->dev, rctx->datbuf.size, rctx->datbuf.buf, rctx->datbuf.addr); return ret; @@ -452,7 +495,7 @@ static int tegra_sha_do_final(struct ahash_request *req) memcpy(rctx->datbuf.buf, rctx->residue.buf, rctx->residue.size); } - cmdlen = tegra_sha_prep_cmd(se, cpuvaddr, rctx); + cmdlen = tegra_sha_prep_cmd(ctx, cpuvaddr, rctx); ret = tegra_se_host1x_submit(se, se->cmdbuf, cmdlen); if (ret) goto out; @@ -469,6 +512,10 @@ out_free: rctx->residue.buf, rctx->residue.addr); dma_free_coherent(se->dev, rctx->digest.size, rctx->digest.buf, rctx->digest.addr); + + dma_free_coherent(se->dev, rctx->intr_res.size, rctx->intr_res.buf, + rctx->intr_res.addr); + return ret; } diff --git a/drivers/crypto/tegra/tegra-se-sm4.c b/drivers/crypto/tegra/tegra-se-sm4.c index fc050cac..b72b154c 100644 --- a/drivers/crypto/tegra/tegra-se-sm4.c +++ b/drivers/crypto/tegra/tegra-se-sm4.c @@ -1169,6 +1169,23 @@ static int tegra_sm4_cmac_prep_cmd(struct tegra_se *se, u32 *cpuvaddr, struct te return i; } +static void tegra_sm4_cmac_copy_result(struct tegra_se *se, struct tegra_sm4_cmac_reqctx *rctx) +{ + int i; + + for (i = 0; i < CMAC_RESULT_REG_COUNT; i++) + rctx->result[i] = readl(se->base + se->hw->regs->result + (i * 4)); +} + +static void tegra_sm4_cmac_paste_result(struct tegra_se *se, struct tegra_sm4_cmac_reqctx *rctx) +{ + int i; + + for (i = 0; i < CMAC_RESULT_REG_COUNT; i++) + writel(rctx->result[i], + se->base + se->hw->regs->result + (i * 4)); +} + static int tegra_sm4_cmac_do_init(struct ahash_request *req) { struct tegra_sm4_cmac_reqctx *rctx = ahash_request_ctx(req); @@ -1228,6 +1245,7 @@ static int tegra_sm4_cmac_do_update(struct ahash_request *req) struct tegra_sm4_cmac_ctx *ctx = crypto_ahash_ctx(tfm); struct tegra_se *se = ctx->se; unsigned int nblks, nresidue, size; + int ret; nresidue = (req->nbytes + rctx->residue.size) % rctx->blk_size; nblks = (req->nbytes + rctx->residue.size) / rctx->blk_size; @@ -1277,9 +1295,18 @@ static int tegra_sm4_cmac_do_update(struct ahash_request *req) /* Update residue value with the residue after current block */ rctx->residue.size = nresidue; - size = tegra_sm4_cmac_prep_cmd(se, se->cmdbuf->addr, rctx); + /* + * If this is not the first task, paste the previous copied + * intermediate results to the registers so that it gets picked up. + */ + if (!(rctx->task & SHA_FIRST)) + tegra_sm4_cmac_paste_result(ctx->se, rctx); - return tegra_se_host1x_submit(se, se->cmdbuf, size); + size = tegra_sm4_cmac_prep_cmd(se, se->cmdbuf->addr, rctx); + ret = tegra_se_host1x_submit(se, se->cmdbuf, size); + tegra_sm4_cmac_copy_result(ctx->se, rctx); + + return ret; } static int tegra_sm4_cmac_do_final(struct ahash_request *req) @@ -1305,6 +1332,13 @@ static int tegra_sm4_cmac_do_final(struct ahash_request *req) memcpy(rctx->datbuf.buf, rctx->residue.buf, rctx->residue.size); } + /* + * If this is not the first task, paste the previous copied + * intermediate results to the registers so that it gets picked up. + */ + if (!(rctx->task & SHA_FIRST)) + tegra_sm4_cmac_paste_result(ctx->se, rctx); + /* Prepare command and submit */ size = tegra_sm4_cmac_prep_cmd(se, se->cmdbuf->addr, rctx); ret = tegra_se_host1x_submit(se, se->cmdbuf, size); @@ -1480,13 +1514,6 @@ static int tegra_sm4_cmac_digest(struct ahash_request *req) static int tegra_sm4_cmac_export(struct ahash_request *req, void *out) { struct tegra_sm4_cmac_reqctx *rctx = ahash_request_ctx(req); - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct tegra_sm4_cmac_ctx *ctx = crypto_ahash_ctx(tfm); - u32 result_reg = ctx->se->hw->regs->result; - int i; - - for (i = 0; i < CMAC_RESULT_REG_COUNT; i++) - rctx->result[i] = readl(ctx->se->base + result_reg + (i * 4)); memcpy(out, rctx, sizeof(*rctx)); @@ -1496,16 +1523,9 @@ static int tegra_sm4_cmac_export(struct ahash_request *req, void *out) static int tegra_sm4_cmac_import(struct ahash_request *req, const void *in) { struct tegra_sm4_cmac_reqctx *rctx = ahash_request_ctx(req); - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct tegra_sm4_cmac_ctx *ctx = crypto_ahash_ctx(tfm); - u32 result_reg = ctx->se->hw->regs->result; - int i; memcpy(rctx, in, sizeof(*rctx)); - for (i = 0; i < CMAC_RESULT_REG_COUNT; i++) - writel(rctx->result[i], ctx->se->base + result_reg + (i * 4)); - return 0; } diff --git a/drivers/crypto/tegra/tegra-se.h b/drivers/crypto/tegra/tegra-se.h index e3525bc5..12d5901c 100644 --- a/drivers/crypto/tegra/tegra-se.h +++ b/drivers/crypto/tegra/tegra-se.h @@ -26,6 +26,7 @@ #define SE_STREAM_ID 0x90 #define SE_SHA_CFG 0x4004 +#define SE_SHA_IN_ADDR 0x400c #define SE_SHA_KEY_ADDR 0x4094 #define SE_SHA_KEY_DATA 0x4098 #define SE_SHA_KEYMANIFEST 0x409c