From 02f28eacbc043139fb927c8cc6f446a1d5f86ea2 Mon Sep 17 00:00:00 2001 From: Mahantesh Kumbar Date: Fri, 7 Dec 2018 22:12:58 +0530 Subject: [PATCH] gpu: nvgpu: PMU payload as part of FBQ element -Earlier, with DMEM queue, if command needs in/out payload then space needs to be allocated in DMEM/FB-surface & copy payload in allocated space before sending command by providing payload info in sending command . -With FBQ, command in/out payload is also part of FB command queue element & not required to allocate separate space in DMEM/FB-surface, so added changes to handle FBQ payload request while sending command & also in response handler to extract data from out payload. JIRA NVGPU-1579 Change-Id: Ic256523db38badb1f9c14cbdb98dc9f70934606d Signed-off-by: Mahantesh Kumbar Reviewed-on: https://git-master.nvidia.com/r/1966741 Reviewed-by: svc-mobile-coverity Reviewed-by: svc-mobile-misra GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/pmu/pmu_ipc.c | 472 +++++++++++++++---- drivers/gpu/nvgpu/include/nvgpu/flcnif_cmn.h | 7 + drivers/gpu/nvgpu/include/nvgpu/pmu.h | 20 + 3 files changed, 402 insertions(+), 97 deletions(-) diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_ipc.c b/drivers/gpu/nvgpu/common/pmu/pmu_ipc.c index 258b7d76c..b7f3c0c41 100644 --- a/drivers/gpu/nvgpu/common/pmu/pmu_ipc.c +++ b/drivers/gpu/nvgpu/common/pmu/pmu_ipc.c @@ -264,7 +264,13 @@ static bool pmu_validate_cmd(struct nvgpu_pmu *pmu, struct pmu_cmd *cmd, } queue = pmu->queue[queue_id]; - queue_size = nvgpu_falcon_queue_get_size(queue); + + if (pmu->queue_type == QUEUE_TYPE_FB) { + queue_size = nvgpu_falcon_fbq_get_element_size(queue); + } else { + queue_size = nvgpu_falcon_queue_get_size(queue); + } + if (cmd->hdr.size < PMU_CMD_HDR_SIZE) { goto invalid_cmd; } @@ -365,33 +371,84 @@ static int pmu_write_cmd(struct nvgpu_pmu *pmu, struct pmu_cmd *cmd, return err; } -static int pmu_cmd_payload_extract_rpc(struct gk20a *g, struct pmu_cmd *cmd, +static int pmu_payload_allocate(struct gk20a *g, struct pmu_sequence *seq, + struct falcon_payload_alloc *alloc) +{ + struct nvgpu_pmu *pmu = &g->pmu; + u64 tmp; + int err = 0; + + if (alloc->fb_surface == NULL && + alloc->fb_size != 0x0U) { + + alloc->fb_surface = nvgpu_kzalloc(g, sizeof(struct nvgpu_mem)); + if (alloc->fb_surface == NULL) { + err = -ENOMEM; + goto clean_up; + } + nvgpu_pmu_vidmem_surface_alloc(g, alloc->fb_surface, alloc->fb_size); + } + + if (pmu->queue_type == QUEUE_TYPE_FB) { + seq->fbq_out_offset_in_queue_element = seq->buffer_size_used; + /* Save target address in FBQ work buffer. */ + alloc->dmem_offset = seq->buffer_size_used; + seq->buffer_size_used += alloc->dmem_size; + } else { + tmp = nvgpu_alloc(&pmu->dmem, alloc->dmem_size); + nvgpu_assert(tmp <= U32_MAX); + alloc->dmem_offset = (u32)tmp; + if (alloc->dmem_offset == 0U) { + err = -ENOMEM; + goto clean_up; + } + } + +clean_up: + return err; +} + +static int pmu_cmd_payload_setup_rpc(struct gk20a *g, struct pmu_cmd *cmd, struct pmu_payload *payload, struct pmu_sequence *seq) { struct nvgpu_pmu *pmu = &g->pmu; struct pmu_v *pv = &g->ops.pmu_ver; - u16 dmem_alloc_size = 0; - u64 tmp; - u32 dmem_alloc_offset = 0; + struct nvgpu_falcon_queue *queue = seq->cmd_queue; + struct falcon_payload_alloc alloc; int err = 0; nvgpu_log_fn(g, " "); - dmem_alloc_size = payload->rpc.size_rpc + + memset(&alloc, 0, sizeof(struct falcon_payload_alloc)); + + alloc.dmem_size = payload->rpc.size_rpc + payload->rpc.size_scratch; - tmp = nvgpu_alloc(&pmu->dmem, dmem_alloc_size); - nvgpu_assert(tmp <= U32_MAX); - dmem_alloc_offset = (u32)tmp; - if (dmem_alloc_offset == 0U) { - err = -ENOMEM; + + err = pmu_payload_allocate(g, seq, &alloc); + if (err != 0) { goto clean_up; } - nvgpu_falcon_copy_to_dmem(pmu->flcn, dmem_alloc_offset, - payload->rpc.prpc, payload->rpc.size_rpc, 0); + alloc.dmem_size = payload->rpc.size_rpc; + + if (pmu->queue_type == QUEUE_TYPE_FB) { + /* copy payload to FBQ work buffer */ + nvgpu_memcpy((u8 *) + nvgpu_falcon_queue_get_fbq_work_buffer(queue) + + alloc.dmem_offset, + (u8 *)payload->rpc.prpc, payload->rpc.size_rpc); + + alloc.dmem_offset += seq->fbq_heap_offset; + + seq->in_payload_fb_queue = true; + seq->out_payload_fb_queue = true; + } else { + nvgpu_falcon_copy_to_dmem(pmu->flcn, alloc.dmem_offset, + payload->rpc.prpc, payload->rpc.size_rpc, 0); + } cmd->cmd.rpc.rpc_dmem_size = payload->rpc.size_rpc; - cmd->cmd.rpc.rpc_dmem_ptr = dmem_alloc_offset; + cmd->cmd.rpc.rpc_dmem_ptr = alloc.dmem_offset; seq->out_payload = payload->rpc.prpc; pv->pmu_allocation_set_dmem_size(pmu, @@ -399,7 +456,7 @@ static int pmu_cmd_payload_extract_rpc(struct gk20a *g, struct pmu_cmd *cmd, payload->rpc.size_rpc); pv->pmu_allocation_set_dmem_offset(pmu, pv->get_pmu_seq_out_a_ptr(seq), - dmem_alloc_offset); + alloc.dmem_offset); clean_up: if (err != 0) { @@ -411,14 +468,14 @@ clean_up: return err; } -static int pmu_cmd_payload_extract(struct gk20a *g, struct pmu_cmd *cmd, +static int pmu_cmd_payload_setup(struct gk20a *g, struct pmu_cmd *cmd, struct pmu_payload *payload, struct pmu_sequence *seq) { struct nvgpu_pmu *pmu = &g->pmu; struct pmu_v *pv = &g->ops.pmu_ver; void *in = NULL, *out = NULL; + struct falcon_payload_alloc alloc; int err = 0; - u64 tmp; nvgpu_log_fn(g, " "); @@ -426,6 +483,8 @@ static int pmu_cmd_payload_extract(struct gk20a *g, struct pmu_cmd *cmd, seq->out_payload = payload->out.buf; } + memset(&alloc, 0, sizeof(struct falcon_payload_alloc)); + if (payload != NULL && payload->in.offset != 0U) { pv->set_pmu_allocation_ptr(pmu, &in, ((u8 *)&cmd->cmd + payload->in.offset)); @@ -438,24 +497,22 @@ static int pmu_cmd_payload_extract(struct gk20a *g, struct pmu_cmd *cmd, (u16)max(payload->in.size, payload->out.size)); } - tmp = nvgpu_alloc(&pmu->dmem, - pv->pmu_allocation_get_dmem_size(pmu, in)); - nvgpu_assert(tmp <= U32_MAX); - *(pv->pmu_allocation_get_dmem_offset_addr(pmu, in)) = (u32)tmp; - if (*(pv->pmu_allocation_get_dmem_offset_addr(pmu, in)) == 0U) { + alloc.dmem_size = pv->pmu_allocation_get_dmem_size(pmu, in); + + if (payload->in.fb_size != 0x0U) { + alloc.fb_size = payload->in.fb_size; + } + + err = pmu_payload_allocate(g, seq, &alloc); + if (err != 0) { goto clean_up; } - if (payload->in.fb_size != 0x0U) { - seq->in_mem = nvgpu_kzalloc(g, - sizeof(struct nvgpu_mem)); - if (seq->in_mem == NULL) { - err = -ENOMEM; - goto clean_up; - } + *(pv->pmu_allocation_get_dmem_offset_addr(pmu, in)) = + alloc.dmem_offset; - nvgpu_pmu_vidmem_surface_alloc(g, seq->in_mem, - payload->in.fb_size); + if (payload->in.fb_size != 0x0U) { + seq->in_mem = alloc.fb_surface; nvgpu_pmu_surface_describe(g, seq->in_mem, (struct flcn_mem_desc_v0 *) pv->pmu_allocation_get_fb_addr(pmu, in)); @@ -463,10 +520,30 @@ static int pmu_cmd_payload_extract(struct gk20a *g, struct pmu_cmd *cmd, nvgpu_mem_wr_n(g, seq->in_mem, 0, payload->in.buf, payload->in.fb_size); + if (pmu->queue_type == QUEUE_TYPE_FB) { + alloc.dmem_offset += seq->fbq_heap_offset; + *(pv->pmu_allocation_get_dmem_offset_addr(pmu, in)) = + alloc.dmem_offset; + } } else { - nvgpu_falcon_copy_to_dmem(pmu->flcn, - (pv->pmu_allocation_get_dmem_offset(pmu, in)), - payload->in.buf, payload->in.size, 0); + if (pmu->queue_type == QUEUE_TYPE_FB) { + /* copy payload to FBQ work buffer */ + nvgpu_memcpy((u8 *) + nvgpu_falcon_queue_get_fbq_work_buffer(seq->cmd_queue) + + alloc.dmem_offset, + (u8 *)payload->in.buf, + payload->in.size); + + alloc.dmem_offset += seq->fbq_heap_offset; + *(pv->pmu_allocation_get_dmem_offset_addr(pmu, in)) = + alloc.dmem_offset; + + seq->in_payload_fb_queue = true; + } else { + nvgpu_falcon_copy_to_dmem(pmu->flcn, + (pv->pmu_allocation_get_dmem_offset(pmu, in)), + payload->in.buf, payload->in.size, 0); + } } pv->pmu_allocation_set_dmem_size(pmu, pv->get_pmu_seq_in_a_ptr(seq), @@ -483,44 +560,49 @@ static int pmu_cmd_payload_extract(struct gk20a *g, struct pmu_cmd *cmd, (u16)payload->out.size); if (payload->in.buf != payload->out.buf) { - tmp = nvgpu_alloc(&pmu->dmem, - pv->pmu_allocation_get_dmem_size(pmu, - out)); - nvgpu_assert(tmp <= U32_MAX); - *(pv->pmu_allocation_get_dmem_offset_addr(pmu, out)) = - (u32)tmp; - if (*(pv->pmu_allocation_get_dmem_offset_addr(pmu, - out)) == 0U) { + alloc.dmem_size = pv->pmu_allocation_get_dmem_size(pmu, out); + + if (payload->out.fb_size != 0x0U) { + alloc.fb_size = payload->out.fb_size; + } + + err = pmu_payload_allocate(g, seq, &alloc); + if (err != 0) { goto clean_up; } - if (payload->out.fb_size != 0x0U) { - seq->out_mem = nvgpu_kzalloc(g, - sizeof(struct nvgpu_mem)); - if (seq->out_mem == NULL) { - err = -ENOMEM; - goto clean_up; - } - nvgpu_pmu_vidmem_surface_alloc(g, seq->out_mem, - payload->out.fb_size); - nvgpu_pmu_surface_describe(g, seq->out_mem, - (struct flcn_mem_desc_v0 *) - pv->pmu_allocation_get_fb_addr(pmu, - out)); - } + *(pv->pmu_allocation_get_dmem_offset_addr(pmu, out)) = + alloc.dmem_offset; + seq->out_mem = alloc.fb_surface; } else { BUG_ON(in == NULL); seq->out_mem = seq->in_mem; pv->pmu_allocation_set_dmem_offset(pmu, out, pv->pmu_allocation_get_dmem_offset(pmu, in)); } + + if (payload->out.fb_size != 0x0U) { + nvgpu_pmu_surface_describe(g, seq->out_mem, + (struct flcn_mem_desc_v0 *) + pv->pmu_allocation_get_fb_addr(pmu, + out)); + } + + if (pmu->queue_type == QUEUE_TYPE_FB) { + if (payload->in.buf != payload->out.buf) { + *(pv->pmu_allocation_get_dmem_offset_addr(pmu, + out)) += seq->fbq_heap_offset; + } + + seq->out_payload_fb_queue = true; + } + pv->pmu_allocation_set_dmem_size(pmu, pv->get_pmu_seq_out_a_ptr(seq), pv->pmu_allocation_get_dmem_size(pmu, out)); pv->pmu_allocation_set_dmem_offset(pmu, pv->get_pmu_seq_out_a_ptr(seq), pv->pmu_allocation_get_dmem_offset(pmu, out)); - } clean_up: @@ -541,6 +623,95 @@ clean_up: return err; } +static int pmu_fbq_cmd_setup(struct gk20a *g, struct pmu_cmd *cmd, + struct nvgpu_falcon_queue *queue, struct pmu_payload *payload, + struct pmu_sequence *seq) +{ + struct nvgpu_pmu *pmu = &g->pmu; + struct nv_falcon_fbq_hdr *fbq_hdr = NULL; + struct pmu_cmd *flcn_cmd = NULL; + u16 fbq_size_needed = 0; + u16 heap_offset = 0; + u64 tmp; + int err = 0; + + /* allocate work buffer for current FBQ element */ + nvgpu_falcon_queue_allocate_fbq_work_buffer(queue); + + fbq_hdr = (struct nv_falcon_fbq_hdr *) + nvgpu_falcon_queue_get_fbq_work_buffer(queue); + + flcn_cmd = (struct pmu_cmd *) + (nvgpu_falcon_queue_get_fbq_work_buffer(queue) + + sizeof(struct nv_falcon_fbq_hdr)); + + if (cmd->cmd.rpc.cmd_type == NV_PMU_RPC_CMD_ID) { + if (payload != NULL) { + fbq_size_needed = payload->rpc.size_rpc + + payload->rpc.size_scratch; + } + } else { + if (payload != NULL) { + if (payload->in.offset != 0U) { + if (payload->in.buf != payload->out.buf) { + fbq_size_needed = (u16)payload->in.size; + } else { + fbq_size_needed = (u16)max(payload->in.size, + payload->out.size); + } + } + + if (payload->out.offset != 0U) { + if (payload->out.buf != payload->in.buf) { + fbq_size_needed += (u16)payload->out.size; + } + } + } + } + + fbq_size_needed = fbq_size_needed + + sizeof(struct nv_falcon_fbq_hdr) + + cmd->hdr.size; + + fbq_size_needed = ALIGN_UP(fbq_size_needed, 4); + + tmp = nvgpu_alloc(&pmu->dmem, fbq_size_needed); + nvgpu_assert(tmp <= U32_MAX); + heap_offset = (u16) tmp; + if (heap_offset == 0U) { + err = -ENOMEM; + goto exit; + } + + seq->in_payload_fb_queue = false; + seq->out_payload_fb_queue = false; + + /* clear work queue buffer */ + memset(nvgpu_falcon_queue_get_fbq_work_buffer(queue), 0, + nvgpu_falcon_fbq_get_element_size(queue)); + + /* Need to save room for both FBQ hdr, and the CMD */ + seq->buffer_size_used = sizeof(struct nv_falcon_fbq_hdr) + + cmd->hdr.size; + + /* copy cmd into the work buffer */ + nvgpu_memcpy((u8 *)flcn_cmd, (u8 *)cmd, cmd->hdr.size); + + /* Fill in FBQ hdr, and offset in seq structure */ + fbq_hdr->heap_size = fbq_size_needed; + fbq_hdr->heap_offset = heap_offset; + seq->fbq_heap_offset = heap_offset; + + /* + * save queue index in seq structure + * so can free queue element when response is received + */ + seq->fbq_element_index = nvgpu_falcon_queue_get_position(queue); + +exit: + return err; +} + int nvgpu_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, struct pmu_msg *msg, struct pmu_payload *payload, u32 queue_id, pmu_callback callback, void *cb_param, @@ -548,6 +719,7 @@ int nvgpu_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, { struct nvgpu_pmu *pmu = &g->pmu; struct pmu_sequence *seq = NULL; + struct nvgpu_falcon_queue *queue = pmu->queue[queue_id]; int err; nvgpu_log_fn(g, " "); @@ -580,6 +752,9 @@ int nvgpu_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_STATUS; cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_INTR; + /* Save the queue in the seq structure. */ + seq->cmd_queue = queue; + seq->callback = callback; seq->cb_params = cb_param; seq->msg = msg; @@ -588,10 +763,24 @@ int nvgpu_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd, *seq_desc = seq->desc; + if (pmu->queue_type == QUEUE_TYPE_FB) { + /* Create FBQ work buffer & copy cmd to FBQ work buffer */ + err = pmu_fbq_cmd_setup(g, cmd, queue, payload, seq); + + /* + * change cmd pointer to point to FBQ work + * buffer as cmd copied to FBQ work buffer + * in call pmu_fgq_cmd_setup() + */ + cmd = (struct pmu_cmd *) + (nvgpu_falcon_queue_get_fbq_work_buffer(queue) + + sizeof(struct nv_falcon_fbq_hdr)); + } + if (cmd->cmd.rpc.cmd_type == NV_PMU_RPC_CMD_ID) { - err = pmu_cmd_payload_extract_rpc(g, cmd, payload, seq); + err = pmu_cmd_payload_setup_rpc(g, cmd, payload, seq); } else { - err = pmu_cmd_payload_extract(g, cmd, payload, seq); + err = pmu_cmd_payload_setup(g, cmd, payload, seq); } if (err != 0) { @@ -616,39 +805,29 @@ clean_up: return err; } -static int pmu_response_handle(struct nvgpu_pmu *pmu, - struct pmu_msg *msg) +static void pmu_payload_extract(struct nvgpu_pmu *pmu, + struct pmu_msg *msg, struct pmu_sequence *seq) { struct gk20a *g = gk20a_from_pmu(pmu); - struct pmu_sequence *seq; struct pmu_v *pv = &g->ops.pmu_ver; - int ret = 0; + u32 fbq_payload_offset = 0U; nvgpu_log_fn(g, " "); - seq = &pmu->seq[msg->hdr.seq_id]; + if (seq->out_payload_fb_queue) { + fbq_payload_offset = + nvgpu_falcon_queue_get_fbq_offset(seq->cmd_queue) + + seq->fbq_out_offset_in_queue_element + (seq->fbq_element_index * + nvgpu_falcon_fbq_get_element_size(seq->cmd_queue)); - if (seq->state != PMU_SEQ_STATE_USED && - seq->state != PMU_SEQ_STATE_CANCELLED) { - nvgpu_err(g, "msg for an unknown sequence %d", seq->id); - return -EINVAL; - } + nvgpu_mem_rd_n(g, &pmu->super_surface_buf, fbq_payload_offset, + seq->out_payload, + pv->pmu_allocation_get_dmem_size(pmu, + pv->get_pmu_seq_out_a_ptr(seq))); - if (msg->hdr.unit_id == PMU_UNIT_RC && - msg->msg.rc.msg_type == PMU_RC_MSG_TYPE_UNHANDLED_CMD) { - nvgpu_err(g, "unhandled cmd: seq %d", seq->id); - } else if (seq->state != PMU_SEQ_STATE_CANCELLED) { - if (seq->msg != NULL) { - if (seq->msg->hdr.size >= msg->hdr.size) { - nvgpu_memcpy((u8 *)seq->msg, (u8 *)msg, - msg->hdr.size); - } else { - nvgpu_err(g, "sequence %d msg buffer too small", - seq->id); - } - } + } else { if (pv->pmu_allocation_get_dmem_size(pmu, - pv->get_pmu_seq_out_a_ptr(seq)) != 0U) { + pv->get_pmu_seq_out_a_ptr(seq)) != 0U) { nvgpu_falcon_copy_from_dmem(pmu->flcn, pv->pmu_allocation_get_dmem_offset(pmu, pv->get_pmu_seq_out_a_ptr(seq)), @@ -656,22 +835,66 @@ static int pmu_response_handle(struct nvgpu_pmu *pmu, pv->pmu_allocation_get_dmem_size(pmu, pv->get_pmu_seq_out_a_ptr(seq)), 0); } + } +} + +static void pmu_payload_extract_rpc(struct nvgpu_pmu *pmu, + struct pmu_msg *msg, struct pmu_sequence *seq) +{ + nvgpu_log_fn(pmu->g, " "); + + pmu_payload_extract(pmu, msg, seq); +} + +static void pmu_payload_fbq_free(struct nvgpu_pmu *pmu, + struct pmu_sequence *seq) +{ + nvgpu_log_fn(pmu->g, " "); + + seq->out_payload_fb_queue = false; + seq->in_payload_fb_queue = false; + + nvgpu_free(&pmu->dmem, seq->fbq_heap_offset); + seq->fbq_heap_offset = 0; + + /* + * free FBQ allocated work buffer + * set FBQ element work buffer to NULL + * Clear the in use bit for the queue entry this CMD used. + */ + nvgpu_falcon_queue_free_fbq_element(pmu->flcn, seq->cmd_queue, + seq->fbq_element_index); +} + +static void pmu_payload_free(struct nvgpu_pmu *pmu, + struct pmu_msg *msg, struct pmu_sequence *seq) +{ + struct gk20a *g = gk20a_from_pmu(pmu); + struct pmu_v *pv = &g->ops.pmu_ver; + + nvgpu_log_fn(g, " "); + + /* free FBQ payload*/ + if (pmu->queue_type == QUEUE_TYPE_FB) { + pmu_payload_fbq_free(pmu, seq); } else { - seq->callback = NULL; - } - if (pv->pmu_allocation_get_dmem_size(pmu, + /* free DMEM space payload*/ + if (pv->pmu_allocation_get_dmem_size(pmu, pv->get_pmu_seq_in_a_ptr(seq)) != 0U) { - nvgpu_free(&pmu->dmem, - pv->pmu_allocation_get_dmem_offset(pmu, - pv->get_pmu_seq_in_a_ptr(seq))); - } - if (pv->pmu_allocation_get_dmem_size(pmu, + nvgpu_free(&pmu->dmem, + pv->pmu_allocation_get_dmem_offset(pmu, + pv->get_pmu_seq_in_a_ptr(seq))); + } + + if (pv->pmu_allocation_get_dmem_size(pmu, pv->get_pmu_seq_out_a_ptr(seq)) != 0U) { - nvgpu_free(&pmu->dmem, - pv->pmu_allocation_get_dmem_offset(pmu, - pv->get_pmu_seq_out_a_ptr(seq))); + nvgpu_free(&pmu->dmem, + pv->pmu_allocation_get_dmem_offset(pmu, + pv->get_pmu_seq_out_a_ptr(seq))); + } } + /* free FB surface payload */ if (seq->out_mem != NULL) { (void) memset(pv->pmu_allocation_get_fb_addr(pmu, pv->get_pmu_seq_out_a_ptr(seq)), 0x0, @@ -696,18 +919,73 @@ static int pmu_response_handle(struct nvgpu_pmu *pmu, nvgpu_kfree(g, seq->in_mem); seq->in_mem = NULL; } +} + +static int pmu_response_handle(struct nvgpu_pmu *pmu, + struct pmu_msg *msg) +{ + struct gk20a *g = gk20a_from_pmu(pmu); + struct pmu_sequence *seq; + int err = 0; + + nvgpu_log_fn(g, " "); + + seq = &pmu->seq[msg->hdr.seq_id]; + + if (seq->state != PMU_SEQ_STATE_USED && + seq->state != PMU_SEQ_STATE_CANCELLED) { + nvgpu_err(g, "msg for an unknown sequence %d", seq->id); + err = -EINVAL; + goto exit; + } + + if (msg->hdr.unit_id == PMU_UNIT_RC && + msg->msg.rc.msg_type == PMU_RC_MSG_TYPE_UNHANDLED_CMD) { + nvgpu_err(g, "unhandled cmd: seq %d", seq->id); + err = -EINVAL; + } else if (seq->state != PMU_SEQ_STATE_CANCELLED) { + + if (msg->hdr.size > PMU_MSG_HDR_SIZE && + msg->msg.rc.msg_type == NV_PMU_RPC_MSG_ID) { + pmu_payload_extract_rpc(pmu, msg, seq); + } else { + if (seq->msg != NULL) { + if (seq->msg->hdr.size >= msg->hdr.size) { + nvgpu_memcpy((u8 *)seq->msg, (u8 *)msg, + msg->hdr.size); + } else { + nvgpu_err(g, "sequence %d msg buffer too small", + seq->id); + err = -EINVAL; + } + } + + pmu_payload_extract(pmu, msg, seq); + } + } else { + seq->callback = NULL; + } + +exit: + /* + * free allocated space for payload in + * DMEM/FB-surface/FB_QUEUE as data is + * copied to buffer pointed by + * seq->out_payload + */ + pmu_payload_free(pmu, msg, seq); if (seq->callback != NULL) { - seq->callback(g, msg, seq->cb_params, seq->desc, ret); + seq->callback(g, msg, seq->cb_params, seq->desc, err); } pmu_seq_release(pmu, seq); /* TBD: notify client waiting for available dmem */ - nvgpu_log_fn(g, "done"); + nvgpu_log_fn(g, "done err %d", err); - return 0; + return err; } static int pmu_handle_event(struct nvgpu_pmu *pmu, struct pmu_msg *msg) diff --git a/drivers/gpu/nvgpu/include/nvgpu/flcnif_cmn.h b/drivers/gpu/nvgpu/include/nvgpu/flcnif_cmn.h index c2453fa87..99218ca71 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/flcnif_cmn.h +++ b/drivers/gpu/nvgpu/include/nvgpu/flcnif_cmn.h @@ -87,6 +87,13 @@ struct pmu_allocation_v3 { } alloc; }; +struct falcon_payload_alloc { + u16 dmem_size; + u32 dmem_offset; + u32 fb_size; + struct nvgpu_mem *fb_surface; +}; + #define nv_pmu_allocation pmu_allocation_v3 struct pmu_hdr { diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmu.h b/drivers/gpu/nvgpu/include/nvgpu/pmu.h index bdb65eefd..7828bcc98 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/pmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/pmu.h @@ -295,6 +295,26 @@ struct pmu_sequence { u8 *out_payload; pmu_callback callback; void *cb_params; + + /* fb queue that is associated with this seq */ + struct nvgpu_falcon_queue *cmd_queue; + /* fbq element that is associated with this seq */ + u8 *fbq_work_buffer; + u32 fbq_element_index; + /* flags if queue element has an in payload */ + bool in_payload_fb_queue; + /* flags if queue element has an out payload */ + bool out_payload_fb_queue; + /* Heap location this cmd will use in the nvgpu managed heap */ + u16 fbq_heap_offset; + /* + * Track the amount of the "work buffer" (queue_buffer) that + * has been used so far, as the outbound frame is assembled + * (first FB Queue hdr, then CMD, then payloads). + */ + u32 buffer_size_used; + /* offset to out data in the queue element */ + u16 fbq_out_offset_in_queue_element; }; struct nvgpu_pg_init {