diff --git a/drivers/gpu/nvgpu/common/falcon/falcon_priv.h b/drivers/gpu/nvgpu/common/falcon/falcon_priv.h index 9ce64d2d2..1c731be0b 100644 --- a/drivers/gpu/nvgpu/common/falcon/falcon_priv.h +++ b/drivers/gpu/nvgpu/common/falcon/falcon_priv.h @@ -71,7 +71,7 @@ enum falcon_mem_type { }; struct nvgpu_falcon_queue { - + struct gk20a *g; /* Queue Type (queue_type) */ u8 queue_type; @@ -91,6 +91,62 @@ struct nvgpu_falcon_queue { /* open-flag */ u32 oflag; + /* members unique to the FB version of the falcon queues */ + struct { + /* Holds super surface base address */ + struct nvgpu_mem *super_surface_mem; + + /* + * Holds the offset of queue data (0th element). + * This is used for FB Queues to hold a offset of + * Super Surface for this queue. + */ + u32 fb_offset; + + /* + * Define the size of a single queue element. + * queues_size above is used for the number of + * queue elements. + */ + u32 element_size; + + /* + * Define a pointers to point to local (SYSMEM) + * allocated buffers to hold a queue elements + * it is being assembled. + */ + u8 *write_buffer[64]; + + /* To keep track of elements in use */ + u64 element_in_use; + + /* + * Define a pointer to a local (SYSMEM) allocated + * buffer to hold a single queue element read + */ + u8 *read_buffer; + + /* + * Tracks how much of the current FB Queue MSG queue + * entry have been read. This is needed as functions read + * the MSG queue as a byte stream, rather + * than reading a whole MSG at a time. + */ + u32 read_position; + + /* + * Tail as tracked on the nvgpu "side". Because the queue + * elements and its associated payload (which is also moved + * PMU->nvgpu through the FB CMD Queue) can't be free-ed until + * the command is complete, response is received and any "out" + * payload delivered to the client, it is necessary for the + * nvgpu to track it's own version of "tail". This one is + * incremented as commands and completed entries are found + * following tail. + */ + u32 tail; + } fbq; + /* queue type(DMEM-Q/FB-Q) specific ops */ int (*rewind)(struct nvgpu_falcon *flcn, struct nvgpu_falcon_queue *queue); diff --git a/drivers/gpu/nvgpu/common/falcon/falcon_queue.c b/drivers/gpu/nvgpu/common/falcon/falcon_queue.c index 544dd2acc..75e79c835 100644 --- a/drivers/gpu/nvgpu/common/falcon/falcon_queue.c +++ b/drivers/gpu/nvgpu/common/falcon/falcon_queue.c @@ -21,6 +21,7 @@ */ #include +#include #include #include "falcon_priv.h" @@ -135,6 +136,329 @@ exit: return err; } +/* FB-Q ops */ +static int falcon_queue_tail_fb(struct nvgpu_falcon *flcn, + struct nvgpu_falcon_queue *queue, u32 *tail, bool set) +{ + struct gk20a *g = flcn->g; + int err = -ENOSYS; + + if (set == false && PMU_IS_COMMAND_QUEUE(queue->id)) { + *tail = queue->fbq.tail; + err = 0; + } else { + if (flcn->flcn_engine_dep_ops.queue_tail != NULL) { + err = flcn->flcn_engine_dep_ops.queue_tail(g, + queue, tail, set); + } + } + + return err; +} + +static inline u32 falcon_queue_get_next_fb(struct nvgpu_falcon *flcn, + struct nvgpu_falcon_queue *queue, u32 head) +{ + return (head + 1U) % queue->size; +} + +static bool falcon_queue_has_room_fb(struct nvgpu_falcon *flcn, + struct nvgpu_falcon_queue *queue, + u32 size, bool *need_rewind) +{ + u32 head = 0; + u32 tail = 0; + u32 next_head = 0; + int err = 0; + + err = queue->head(flcn, queue, &head, QUEUE_GET); + if (err != 0) { + nvgpu_err(flcn->g, "queue head GET failed"); + goto exit; + } + + err = queue->tail(flcn, queue, &tail, QUEUE_GET); + if (err != 0) { + nvgpu_err(flcn->g, "queue tail GET failed"); + goto exit; + } + + next_head = falcon_queue_get_next_fb(flcn, queue, head); + +exit: + return next_head != tail; +} + +static int falcon_queue_write_fb(struct nvgpu_falcon *flcn, + struct nvgpu_falcon_queue *queue, u32 offset, + u8 *src, u32 size) +{ + struct gk20a *g = flcn->g; + struct nv_falcon_fbq_hdr *fb_q_hdr = (struct nv_falcon_fbq_hdr *) + (void *)queue->fbq.write_buffer[queue->position]; + u32 entry_offset = 0U; + int err = 0; + + if (queue->fbq.write_buffer[queue->position] == NULL) { + nvgpu_err(g, "Invalid/Unallocated work buffer"); + err = -EINVAL; + goto exit; + } + + /* Fill out FBQ hdr, that is in the work buffer */ + fb_q_hdr->element_index = (u8)offset; + + /* check queue entry size */ + if (fb_q_hdr->heap_size >= (u16)queue->fbq.element_size) { + err = -EINVAL; + goto exit; + } + + /* get offset to this element entry */ + entry_offset = offset * queue->fbq.element_size; + + /* copy cmd to super-surface */ + nvgpu_mem_wr_n(g, queue->fbq.super_surface_mem, + queue->fbq.fb_offset + entry_offset, + queue->fbq.write_buffer[queue->position], queue->fbq.element_size); + +exit: + return err; +} + +static int falcon_queue_element_set_use_state_fb(struct nvgpu_falcon *flcn, + struct nvgpu_falcon_queue *queue, u32 queue_pos, bool set) +{ + int err = 0; + + if (queue_pos >= queue->size) { + err = -EINVAL; + goto exit; + } + + if (test_bit((int)queue_pos, + (void *)&queue->fbq.element_in_use) && set) { + nvgpu_err(flcn->g, + "FBQ last received queue element not processed yet" + " queue_pos %d", queue_pos); + err = -EINVAL; + goto exit; + } + + if (set) { + set_bit((int)queue_pos, (void *)&queue->fbq.element_in_use); + } else { + clear_bit((int)queue_pos, (void *)&queue->fbq.element_in_use); + } + +exit: + return err; +} + +static int falcon_queue_push_fb(struct nvgpu_falcon *flcn, + struct nvgpu_falcon_queue *queue, void *data, u32 size) +{ + struct gk20a *g = flcn->g; + int err = 0; + + nvgpu_log_fn(g, " "); + + /* Bounds check size */ + if (size > queue->fbq.element_size) { + nvgpu_err(g, "size too large size=0x%x", size); + goto exit; + } + + /* Set queue element in use */ + if (falcon_queue_element_set_use_state_fb(flcn, queue, + queue->position, true) != 0 ) { + nvgpu_err(g, + "fb-queue element in use map is in invalid state"); + goto exit; + } + + /* write data to FB */ + err = falcon_queue_write_fb(flcn, queue, queue->position, data, size); + if (err != 0){ + nvgpu_err(g, "write to fb-queue failed"); + goto exit; + } + + queue->position = falcon_queue_get_next_fb(flcn, queue, + queue->position); + +exit: + if (err != 0) { + nvgpu_err(flcn->g, "falcon id-%d, queue id-%d, failed", + flcn->flcn_id, queue->id); + } + + return err; +} + +static int falcon_queue_pop_fb(struct nvgpu_falcon *flcn, + struct nvgpu_falcon_queue *queue, void *data, u32 size, + u32 *bytes_read) +{ + struct gk20a *g = flcn->g; + struct pmu_hdr *hdr = (struct pmu_hdr *) + (void *)queue->fbq.read_buffer; + u32 entry_offset = 0U; + int err = 0; + + nvgpu_log_fn(g, " "); + + *bytes_read = 0U; + + /* Check size */ + if ((size + queue->fbq.read_position) >= + queue->fbq.element_size) { + nvgpu_err(g, + "Attempt to read > than queue element size for queue id-%d", + queue->id); + err = -EINVAL ; + goto exit; + } + + entry_offset = queue->position * queue->fbq.element_size; + + /* + * If first read for this queue element then read whole queue + * element into work buffer. + */ + if (queue->fbq.read_position == 0U) { + nvgpu_mem_rd_n(g, queue->fbq.super_surface_mem, + /* source (FBQ data) offset*/ + queue->fbq.fb_offset + entry_offset, + /* destination buffer */ + (void *)queue->fbq.read_buffer, + /* copy size */ + queue->fbq.element_size); + + /* Check size in hdr of MSG just read */ + if (hdr->size >= queue->fbq.element_size) { + nvgpu_err(g, "Super Surface read failed"); + err = -ERANGE ; + goto exit; + } + } + + nvgpu_memcpy((u8 *)data, (u8 *)queue->fbq.read_buffer + + queue->fbq.read_position, size); + + /* update current position */ + queue->fbq.read_position += size; + + /* If reached end of this queue element, move on to next. */ + if (queue->fbq.read_position >= hdr->size) { + queue->fbq.read_position = 0U; + /* Increment queue index. */ + queue->position = falcon_queue_get_next_fb(flcn, queue, + queue->position); + } + + *bytes_read = size; + +exit: + if (err != 0) { + nvgpu_err(flcn->g, "falcon id-%d, queue id-%d, failed", + flcn->flcn_id, queue->id); + } + + return err; +} + +static int falcon_queue_element_is_in_use_fb(struct nvgpu_falcon *flcn, + struct nvgpu_falcon_queue *queue, u32 queue_pos, bool *in_use) +{ + int err = 0; + + if (queue_pos >= queue->size) { + err = -EINVAL; + goto exit; + } + + *in_use = test_bit((int)queue_pos, (void *)&queue->fbq.element_in_use); + +exit: + return err; +} + +static int falcon_queue_sweep_fb(struct nvgpu_falcon *flcn, + struct nvgpu_falcon_queue *queue) +{ + u32 head; + u32 tail; + bool in_use = false; + int err = 0; + + tail = queue->fbq.tail; + err = queue->head(flcn, queue, &head, QUEUE_GET); + if (err != 0) { + nvgpu_err(flcn->g, "flcn-%d queue-%d, position GET failed", + flcn->flcn_id, queue->id); + goto exit; + } + + /* + * Step from tail forward in the queue, + * to see how many consecutive entries + * can be made available. + */ + while (tail != head) { + if (falcon_queue_element_is_in_use_fb(flcn, queue, + tail, &in_use) != 0) { + break; + } + + if (in_use) { + break; + } + + tail = falcon_queue_get_next_fb(flcn, queue, tail); + } + + /* Update tail */ + queue->fbq.tail = tail; + +exit: + return err; +} + +/* assign FB queue type specific ops */ +static int falcon_queue_init_fb_queue(struct nvgpu_falcon *flcn, + struct nvgpu_falcon_queue *queue) +{ + struct gk20a *g = flcn->g; + int err = 0; + + nvgpu_log_fn(flcn->g, " "); + + if (queue->oflag == OFLAG_READ) { + queue->fbq.read_buffer = nvgpu_kzalloc(g, queue->fbq.element_size); + if (queue->fbq.read_buffer == NULL) { + err = -ENOMEM; + goto exit; + } + } + + queue->offset = 0U; + queue->position = 0U; + queue->fbq.tail = 0U; + queue->fbq.element_in_use = 0U; + queue->fbq.read_position = 0U; + + queue->head = falcon_queue_head; + queue->tail = falcon_queue_tail_fb; + queue->has_room = falcon_queue_has_room_fb; + queue->push = falcon_queue_push_fb; + queue->pop = falcon_queue_pop_fb; + queue->rewind = NULL; /* Not required for FB-Q */ + +exit: + return err; +} + /* EMEM-Q specific ops */ static int falcon_queue_push_emem(struct nvgpu_falcon *flcn, struct nvgpu_falcon_queue *queue, void *data, u32 size) @@ -315,7 +639,9 @@ static int falcon_queue_prepare_write(struct nvgpu_falcon *flcn, } if (q_rewind) { - err = queue->rewind(flcn, queue); + if (queue->rewind != NULL) { + err = queue->rewind(flcn, queue); + } } exit: @@ -488,6 +814,11 @@ void nvgpu_falcon_queue_free(struct nvgpu_falcon *flcn, nvgpu_pmu_dbg(g, "flcn id-%d q-id %d: index %d ", flcn->flcn_id, queue->id, queue->index); + if (queue->queue_type == QUEUE_TYPE_FB && + queue->oflag == OFLAG_READ) { + nvgpu_kfree(g, queue->fbq.read_buffer); + } + /* destroy mutex */ nvgpu_mutex_destroy(&queue->mutex); @@ -500,6 +831,11 @@ u32 nvgpu_falcon_queue_get_id(struct nvgpu_falcon_queue *queue) return queue->id; } +u32 nvgpu_falcon_queue_get_position(struct nvgpu_falcon_queue *queue) +{ + return queue->position; +} + u32 nvgpu_falcon_queue_get_index(struct nvgpu_falcon_queue *queue) { return queue->index; @@ -510,6 +846,75 @@ u32 nvgpu_falcon_queue_get_size(struct nvgpu_falcon_queue *queue) return queue->size; } +/* return the queue element size */ +u32 nvgpu_falcon_fbq_get_element_size(struct nvgpu_falcon_queue *queue) +{ + return queue->fbq.element_size; +} + +/* return the queue offset from super surface FBQ's */ +u32 nvgpu_falcon_queue_get_fbq_offset(struct nvgpu_falcon_queue *queue) +{ + return queue->fbq.fb_offset; +} + +/* allocate work buffer for current queue element */ +int nvgpu_falcon_queue_allocate_fbq_work_buffer( + struct nvgpu_falcon_queue *queue) +{ + int err = 0; + + /* + * create work buffer for current FBQ element & + * will be freed allocated space once ACK received + * for command + */ + queue->fbq.write_buffer[queue->position] = nvgpu_kzalloc(queue->g, + queue->fbq.element_size); + if (queue->fbq.write_buffer[queue->position] == NULL) { + err = -ENOMEM; + } + + nvgpu_pmu_dbg(queue->g, " FBQ alloc work buffer-%p q_pos %d", + queue->fbq.write_buffer[queue->position], + queue->position); + + return err; +} + +/* return a pointer of current queue element work buffer */ +u8* nvgpu_falcon_queue_get_fbq_work_buffer(struct nvgpu_falcon_queue *queue) +{ + return queue->fbq.write_buffer[queue->position]; +} + +int nvgpu_falcon_queue_free_fbq_element(struct nvgpu_falcon *flcn, + struct nvgpu_falcon_queue *queue, u32 queue_pos) +{ + int err = 0; + + nvgpu_pmu_dbg(queue->g, "FBQ free work buffer q-pos %d wrk_buff %p", + queue_pos, queue->fbq.write_buffer[queue_pos]); + + /* free work buffer of current queue element */ + nvgpu_kfree(queue->g, queue->fbq.write_buffer[queue_pos]); + + queue->fbq.write_buffer[queue_pos] = NULL; + + err = falcon_queue_element_set_use_state_fb(flcn, queue, + queue_pos, false); + if (err != 0) { + nvgpu_err(flcn->g, "fb queue elelment %d free failed", + queue_pos); + goto exit; + } + + err = falcon_queue_sweep_fb(flcn, queue); + +exit: + return err; +} + int nvgpu_falcon_queue_init(struct nvgpu_falcon *flcn, struct nvgpu_falcon_queue **queue_p, struct nvgpu_falcon_queue_params params) @@ -529,6 +934,7 @@ int nvgpu_falcon_queue_init(struct nvgpu_falcon *flcn, return -ENOMEM; } + queue->g = g; queue->id = params.id; queue->index = params.index; queue->offset = params.offset; @@ -549,6 +955,16 @@ int nvgpu_falcon_queue_init(struct nvgpu_falcon *flcn, case QUEUE_TYPE_EMEM: falcon_queue_init_emem_queue(flcn, queue); break; + case QUEUE_TYPE_FB: + queue->fbq.super_surface_mem = params.super_surface_mem; + queue->fbq.element_size = params.fbq_element_size; + queue->fbq.fb_offset = params.fbq_offset; + + err = falcon_queue_init_fb_queue(flcn, queue); + if (err != 0x0) { + goto exit; + } + break; default: err = -EINVAL; break; diff --git a/drivers/gpu/nvgpu/include/nvgpu/falcon.h b/drivers/gpu/nvgpu/include/nvgpu/falcon.h index 6aced577d..acb32157f 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/falcon.h +++ b/drivers/gpu/nvgpu/include/nvgpu/falcon.h @@ -79,6 +79,7 @@ /* Queue Type */ #define QUEUE_TYPE_DMEM 0x0U #define QUEUE_TYPE_EMEM 0x1U +#define QUEUE_TYPE_FB 0x2U struct gk20a; struct nvgpu_falcon; @@ -99,6 +100,16 @@ struct nvgpu_falcon_queue_params { u32 size; /* open-flag */ u32 oflag; + + /* fb queue params*/ + /* Holds the offset of queue data (0th element) */ + u32 fbq_offset; + + /* fb queue element size*/ + u32 fbq_element_size; + + /* Holds super surface base address */ + struct nvgpu_mem *super_surface_mem; }; struct nvgpu_falcon_bl_info { @@ -158,8 +169,16 @@ int nvgpu_falcon_queue_push(struct nvgpu_falcon *flcn, void nvgpu_falcon_queue_free(struct nvgpu_falcon *flcn, struct nvgpu_falcon_queue **queue_p); u32 nvgpu_falcon_queue_get_id(struct nvgpu_falcon_queue *queue); +u32 nvgpu_falcon_queue_get_position(struct nvgpu_falcon_queue *queue); u32 nvgpu_falcon_queue_get_index(struct nvgpu_falcon_queue *queue); u32 nvgpu_falcon_queue_get_size(struct nvgpu_falcon_queue *queue); +u32 nvgpu_falcon_fbq_get_element_size(struct nvgpu_falcon_queue *queue); +u32 nvgpu_falcon_queue_get_fbq_offset(struct nvgpu_falcon_queue *queue); +u8 *nvgpu_falcon_queue_get_fbq_work_buffer(struct nvgpu_falcon_queue *queue); +int nvgpu_falcon_queue_allocate_fbq_work_buffer( + struct nvgpu_falcon_queue *queue); +int nvgpu_falcon_queue_free_fbq_element(struct nvgpu_falcon *flcn, + struct nvgpu_falcon_queue *queue, u32 queue_pos); int nvgpu_falcon_sw_init(struct gk20a *g, u32 flcn_id); void nvgpu_falcon_sw_free(struct gk20a *g, u32 flcn_id);