mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: vgpu: cyclestat snapshot support
Add support for cyclestats snapshots in the virtual case Bug 1700143 JIRA EVLR-278 Change-Id: I376a8804d57324f43eb16452d857a3b7bb0ecc90 Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com> Reviewed-on: http://git-master/r/1211547 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
70cad5fbb5
commit
9aa7de15c2
@@ -105,6 +105,7 @@ nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \
|
|||||||
vgpu/dbg_vgpu.o \
|
vgpu/dbg_vgpu.o \
|
||||||
vgpu/fecs_trace_vgpu.o \
|
vgpu/fecs_trace_vgpu.o \
|
||||||
vgpu/tsg_vgpu.o \
|
vgpu/tsg_vgpu.o \
|
||||||
|
vgpu/css_vgpu.o \
|
||||||
vgpu/gk20a/vgpu_hal_gk20a.o \
|
vgpu/gk20a/vgpu_hal_gk20a.o \
|
||||||
vgpu/gk20a/vgpu_gr_gk20a.o \
|
vgpu/gk20a/vgpu_gr_gk20a.o \
|
||||||
vgpu/gm20b/vgpu_hal_gm20b.o \
|
vgpu/gm20b/vgpu_hal_gm20b.o \
|
||||||
|
|||||||
@@ -603,7 +603,7 @@ static int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch)
|
|||||||
|
|
||||||
mutex_lock(&ch->cs_client_mutex);
|
mutex_lock(&ch->cs_client_mutex);
|
||||||
if (ch->cs_client)
|
if (ch->cs_client)
|
||||||
ret = gr_gk20a_css_flush(ch->g, ch->cs_client);
|
ret = gr_gk20a_css_flush(ch, ch->cs_client);
|
||||||
else
|
else
|
||||||
ret = -EBADF;
|
ret = -EBADF;
|
||||||
mutex_unlock(&ch->cs_client_mutex);
|
mutex_unlock(&ch->cs_client_mutex);
|
||||||
@@ -622,7 +622,7 @@ static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch,
|
|||||||
if (ch->cs_client) {
|
if (ch->cs_client) {
|
||||||
ret = -EEXIST;
|
ret = -EEXIST;
|
||||||
} else {
|
} else {
|
||||||
ret = gr_gk20a_css_attach(ch->g,
|
ret = gr_gk20a_css_attach(ch,
|
||||||
dmabuf_fd,
|
dmabuf_fd,
|
||||||
perfmon_id_count,
|
perfmon_id_count,
|
||||||
perfmon_id_start,
|
perfmon_id_start,
|
||||||
@@ -639,7 +639,7 @@ static int gk20a_free_cycle_stats_snapshot(struct channel_gk20a *ch)
|
|||||||
|
|
||||||
mutex_lock(&ch->cs_client_mutex);
|
mutex_lock(&ch->cs_client_mutex);
|
||||||
if (ch->cs_client) {
|
if (ch->cs_client) {
|
||||||
ret = gr_gk20a_css_detach(ch->g, ch->cs_client);
|
ret = gr_gk20a_css_detach(ch, ch->cs_client);
|
||||||
ch->cs_client = NULL;
|
ch->cs_client = NULL;
|
||||||
} else {
|
} else {
|
||||||
ret = 0;
|
ret = 0;
|
||||||
|
|||||||
@@ -25,93 +25,13 @@
|
|||||||
#include "gk20a.h"
|
#include "gk20a.h"
|
||||||
#include "hw_perf_gk20a.h"
|
#include "hw_perf_gk20a.h"
|
||||||
#include "hw_mc_gk20a.h"
|
#include "hw_mc_gk20a.h"
|
||||||
|
#include "css_gr_gk20a.h"
|
||||||
|
|
||||||
|
|
||||||
/* cycle stats fifo header (must match NvSnapshotBufferFifo) */
|
|
||||||
struct gk20a_cs_snapshot_fifo {
|
|
||||||
/* layout description of the buffer */
|
|
||||||
u32 start;
|
|
||||||
u32 end;
|
|
||||||
|
|
||||||
/* snafu bits */
|
|
||||||
u32 hw_overflow_events_occured;
|
|
||||||
u32 sw_overflow_events_occured;
|
|
||||||
|
|
||||||
/* the kernel copies new entries to put and
|
|
||||||
* increment the put++. if put == get then
|
|
||||||
* overflowEventsOccured++
|
|
||||||
*/
|
|
||||||
u32 put;
|
|
||||||
u32 _reserved10;
|
|
||||||
u32 _reserved11;
|
|
||||||
u32 _reserved12;
|
|
||||||
|
|
||||||
/* the driver/client reads from get until
|
|
||||||
* put==get, get++ */
|
|
||||||
u32 get;
|
|
||||||
u32 _reserved20;
|
|
||||||
u32 _reserved21;
|
|
||||||
u32 _reserved22;
|
|
||||||
|
|
||||||
/* unused */
|
|
||||||
u32 _reserved30;
|
|
||||||
u32 _reserved31;
|
|
||||||
u32 _reserved32;
|
|
||||||
u32 _reserved33;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* cycle stats fifo entry (must match NvSnapshotBufferFifoEntry) */
|
|
||||||
struct gk20a_cs_snapshot_fifo_entry {
|
|
||||||
/* global 48 timestamp */
|
|
||||||
u32 timestamp31_00:32;
|
|
||||||
u32 timestamp39_32:8;
|
|
||||||
|
|
||||||
/* id of perfmon, should correlate with CSS_MAX_PERFMON_IDS */
|
|
||||||
u32 perfmon_id:8;
|
|
||||||
|
|
||||||
/* typically samples_counter is wired to #pmtrigger count */
|
|
||||||
u32 samples_counter:12;
|
|
||||||
|
|
||||||
/* DS=Delay Sample, SZ=Size (0=32B, 1=16B) */
|
|
||||||
u32 ds:1;
|
|
||||||
u32 sz:1;
|
|
||||||
u32 zero0:1;
|
|
||||||
u32 zero1:1;
|
|
||||||
|
|
||||||
/* counter results */
|
|
||||||
u32 event_cnt:32;
|
|
||||||
u32 trigger0_cnt:32;
|
|
||||||
u32 trigger1_cnt:32;
|
|
||||||
u32 sample_cnt:32;
|
|
||||||
|
|
||||||
/* Local PmTrigger results for Maxwell+ or padding otherwise */
|
|
||||||
u16 local_trigger_b_count:16;
|
|
||||||
u16 book_mark_b:16;
|
|
||||||
u16 local_trigger_a_count:16;
|
|
||||||
u16 book_mark_a:16;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
/* cycle stats snapshot client data (e.g. associated with channel) */
|
|
||||||
struct gk20a_cs_snapshot_client {
|
|
||||||
struct list_head list;
|
|
||||||
u32 dmabuf_fd;
|
|
||||||
struct dma_buf *dma_handler;
|
|
||||||
struct gk20a_cs_snapshot_fifo *snapshot;
|
|
||||||
u32 snapshot_size;
|
|
||||||
u32 perfmon_start;
|
|
||||||
u32 perfmon_count;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* check client for pointed perfmon ownership */
|
/* check client for pointed perfmon ownership */
|
||||||
#define CONTAINS_PERFMON(cl, pm) \
|
#define CONTAINS_PERFMON(cl, pm) \
|
||||||
((cl)->perfmon_start <= (pm) && \
|
((cl)->perfmon_start <= (pm) && \
|
||||||
((pm) - (cl)->perfmon_start) < (cl)->perfmon_count)
|
((pm) - (cl)->perfmon_start) < (cl)->perfmon_count)
|
||||||
|
|
||||||
/* the minimal size of HW buffer - should be enough to avoid HW overflows */
|
|
||||||
#define CSS_MIN_HW_SNAPSHOT_SIZE (8 * 1024 * 1024)
|
|
||||||
|
|
||||||
/* the minimal size of client buffer */
|
/* the minimal size of client buffer */
|
||||||
#define CSS_MIN_CLIENT_SNAPSHOT_SIZE \
|
#define CSS_MIN_CLIENT_SNAPSHOT_SIZE \
|
||||||
(sizeof(struct gk20a_cs_snapshot_fifo) + \
|
(sizeof(struct gk20a_cs_snapshot_fifo) + \
|
||||||
@@ -131,20 +51,6 @@ struct gk20a_cs_snapshot_client {
|
|||||||
/* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */
|
/* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */
|
||||||
#define CSS_MAX_PERFMON_IDS 256
|
#define CSS_MAX_PERFMON_IDS 256
|
||||||
|
|
||||||
/* local definitions to avoid hardcodes sizes and shifts */
|
|
||||||
#define PM_BITMAP_SIZE DIV_ROUND_UP(CSS_MAX_PERFMON_IDS, BITS_PER_LONG)
|
|
||||||
|
|
||||||
/* cycle stats snapshot control structure for one HW entry and many clients */
|
|
||||||
struct gk20a_cs_snapshot {
|
|
||||||
unsigned long perfmon_ids[PM_BITMAP_SIZE];
|
|
||||||
struct list_head clients;
|
|
||||||
struct mem_desc hw_memdesc;
|
|
||||||
/* pointer to allocated cpu_va memory where GPU place data */
|
|
||||||
struct gk20a_cs_snapshot_fifo_entry *hw_snapshot;
|
|
||||||
struct gk20a_cs_snapshot_fifo_entry *hw_end;
|
|
||||||
struct gk20a_cs_snapshot_fifo_entry *hw_get;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* reports whether the hw queue overflowed */
|
/* reports whether the hw queue overflowed */
|
||||||
static inline bool css_hw_get_overflow_status(struct gk20a *g)
|
static inline bool css_hw_get_overflow_status(struct gk20a *g)
|
||||||
{
|
{
|
||||||
@@ -215,10 +121,13 @@ static int css_gr_create_shared_data(struct gr_gk20a *gr)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int css_hw_enable_snapshot(struct gr_gk20a *gr, u32 snapshot_size)
|
static int css_hw_enable_snapshot(struct channel_gk20a *ch,
|
||||||
|
struct gk20a_cs_snapshot_client *cs_client)
|
||||||
{
|
{
|
||||||
struct gk20a *g = gr->g;
|
struct gk20a *g = ch->g;
|
||||||
|
struct gr_gk20a *gr = &g->gr;
|
||||||
struct gk20a_cs_snapshot *data = gr->cs_data;
|
struct gk20a_cs_snapshot *data = gr->cs_data;
|
||||||
|
u32 snapshot_size = cs_client->snapshot_size;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
u32 virt_addr_lo;
|
u32 virt_addr_lo;
|
||||||
@@ -317,9 +226,11 @@ static void css_hw_disable_snapshot(struct gr_gk20a *gr)
|
|||||||
|
|
||||||
static void css_gr_free_shared_data(struct gr_gk20a *gr)
|
static void css_gr_free_shared_data(struct gr_gk20a *gr)
|
||||||
{
|
{
|
||||||
|
struct gk20a *g = gr->g;
|
||||||
|
|
||||||
if (gr->cs_data) {
|
if (gr->cs_data) {
|
||||||
/* the clients list is expected to be empty */
|
/* the clients list is expected to be empty */
|
||||||
css_hw_disable_snapshot(gr);
|
g->ops.css.disable_snapshot(gr);
|
||||||
|
|
||||||
/* release the objects */
|
/* release the objects */
|
||||||
kfree(gr->cs_data);
|
kfree(gr->cs_data);
|
||||||
@@ -344,12 +255,15 @@ css_gr_search_client(struct list_head *clients, u32 perfmon)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int css_gr_flush_snapshots(struct gr_gk20a *gr)
|
static int css_gr_flush_snapshots(struct channel_gk20a *ch)
|
||||||
{
|
{
|
||||||
struct gk20a *g = gr->g;
|
struct gk20a *g = ch->g;
|
||||||
|
struct gr_gk20a *gr = &g->gr;
|
||||||
struct gk20a_cs_snapshot *css = gr->cs_data;
|
struct gk20a_cs_snapshot *css = gr->cs_data;
|
||||||
struct gk20a_cs_snapshot_client *cur;
|
struct gk20a_cs_snapshot_client *cur;
|
||||||
u32 pending;
|
u32 pending, completed;
|
||||||
|
bool hw_overflow;
|
||||||
|
int err;
|
||||||
|
|
||||||
/* variables for iterating over HW entries */
|
/* variables for iterating over HW entries */
|
||||||
u32 sid;
|
u32 sid;
|
||||||
@@ -360,24 +274,25 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr)
|
|||||||
struct gk20a_cs_snapshot_fifo *dst;
|
struct gk20a_cs_snapshot_fifo *dst;
|
||||||
struct gk20a_cs_snapshot_fifo_entry *dst_get;
|
struct gk20a_cs_snapshot_fifo_entry *dst_get;
|
||||||
struct gk20a_cs_snapshot_fifo_entry *dst_put;
|
struct gk20a_cs_snapshot_fifo_entry *dst_put;
|
||||||
|
struct gk20a_cs_snapshot_fifo_entry *dst_nxt;
|
||||||
struct gk20a_cs_snapshot_fifo_entry *dst_head;
|
struct gk20a_cs_snapshot_fifo_entry *dst_head;
|
||||||
struct gk20a_cs_snapshot_fifo_entry *dst_tail;
|
struct gk20a_cs_snapshot_fifo_entry *dst_tail;
|
||||||
|
|
||||||
if (!css)
|
if (!css)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (!css->hw_snapshot)
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
if (list_empty(&css->clients))
|
if (list_empty(&css->clients))
|
||||||
return -EBADF;
|
return -EBADF;
|
||||||
|
|
||||||
/* check data available */
|
/* check data available */
|
||||||
pending = css_hw_get_pending_snapshots(g);
|
err = g->ops.css.check_data_available(ch, &pending, &hw_overflow);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
if (!pending)
|
if (!pending)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (css_hw_get_overflow_status(g)) {
|
if (hw_overflow) {
|
||||||
struct list_head *pos;
|
struct list_head *pos;
|
||||||
|
|
||||||
list_for_each(pos, &css->clients) {
|
list_for_each(pos, &css->clients) {
|
||||||
@@ -387,11 +302,12 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr)
|
|||||||
}
|
}
|
||||||
|
|
||||||
gk20a_warn(dev_from_gk20a(g),
|
gk20a_warn(dev_from_gk20a(g),
|
||||||
"cyclestats: hardware overflow detected\n");
|
"cyclestats: hardware overflow detected\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* proceed all items in HW buffer */
|
/* process all items in HW buffer */
|
||||||
sid = 0;
|
sid = 0;
|
||||||
|
completed = 0;
|
||||||
cur = NULL;
|
cur = NULL;
|
||||||
dst = NULL;
|
dst = NULL;
|
||||||
dst_put = NULL;
|
dst_put = NULL;
|
||||||
@@ -419,7 +335,11 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr)
|
|||||||
dst_get = CSS_FIFO_ENTRY(dst, dst->get);
|
dst_get = CSS_FIFO_ENTRY(dst, dst->get);
|
||||||
dst_put = CSS_FIFO_ENTRY(dst, dst->put);
|
dst_put = CSS_FIFO_ENTRY(dst, dst->put);
|
||||||
dst_head = CSS_FIFO_ENTRY(dst, dst->start);
|
dst_head = CSS_FIFO_ENTRY(dst, dst->start);
|
||||||
dst_tail = CSS_FIFO_ENTRY(dst, dst->end) - 1;
|
dst_tail = CSS_FIFO_ENTRY(dst, dst->end);
|
||||||
|
|
||||||
|
dst_nxt = dst_put + 1;
|
||||||
|
if (dst_nxt == dst_tail)
|
||||||
|
dst_nxt = dst_head;
|
||||||
} else {
|
} else {
|
||||||
/* client not found - skipping this entry */
|
/* client not found - skipping this entry */
|
||||||
gk20a_warn(dev_from_gk20a(g),
|
gk20a_warn(dev_from_gk20a(g),
|
||||||
@@ -430,8 +350,7 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* check for software overflows */
|
/* check for software overflows */
|
||||||
if (dst_put + 1 == dst_get ||
|
if (dst_nxt == dst_get) {
|
||||||
(dst_put == dst_tail && dst_get == dst_head)) {
|
|
||||||
/* no data copy, no pointer updates */
|
/* no data copy, no pointer updates */
|
||||||
dst->sw_overflow_events_occured++;
|
dst->sw_overflow_events_occured++;
|
||||||
gk20a_warn(dev_from_gk20a(g),
|
gk20a_warn(dev_from_gk20a(g),
|
||||||
@@ -439,10 +358,12 @@ static int css_gr_flush_snapshots(struct gr_gk20a *gr)
|
|||||||
src->perfmon_id);
|
src->perfmon_id);
|
||||||
} else {
|
} else {
|
||||||
*dst_put = *src;
|
*dst_put = *src;
|
||||||
if (dst_put == dst_tail)
|
completed++;
|
||||||
dst_put = dst_head;
|
|
||||||
else
|
dst_put = dst_nxt++;
|
||||||
dst_put++;
|
|
||||||
|
if (dst_nxt == dst_tail)
|
||||||
|
dst_nxt = dst_head;
|
||||||
}
|
}
|
||||||
|
|
||||||
next_hw_fifo_entry:
|
next_hw_fifo_entry:
|
||||||
@@ -465,14 +386,17 @@ next_hw_fifo_entry:
|
|||||||
(css->hw_end - css->hw_get) * sizeof(*src));
|
(css->hw_end - css->hw_get) * sizeof(*src));
|
||||||
}
|
}
|
||||||
gr->cs_data->hw_get = src;
|
gr->cs_data->hw_get = src;
|
||||||
css_hw_set_handled_snapshots(g, sid);
|
|
||||||
if (pending != sid) {
|
if (g->ops.css.set_handled_snapshots)
|
||||||
|
g->ops.css.set_handled_snapshots(g, sid);
|
||||||
|
|
||||||
|
if (completed != sid) {
|
||||||
/* not all entries proceed correctly. some of problems */
|
/* not all entries proceed correctly. some of problems */
|
||||||
/* reported as overflows, some as orphaned perfmons, */
|
/* reported as overflows, some as orphaned perfmons, */
|
||||||
/* but it will be better notify with summary about it */
|
/* but it will be better notify with summary about it */
|
||||||
gk20a_warn(dev_from_gk20a(g),
|
gk20a_warn(dev_from_gk20a(g),
|
||||||
"cyclestats: done %u from %u entries\n",
|
"cyclestats: completed %u from %u entries\n",
|
||||||
sid, pending);
|
completed, pending);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@@ -511,7 +435,8 @@ static u32 css_gr_release_perfmon_ids(struct gk20a_cs_snapshot *data,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static int css_gr_free_client_data(struct gk20a_cs_snapshot *data,
|
static int css_gr_free_client_data(struct gk20a *g,
|
||||||
|
struct gk20a_cs_snapshot *data,
|
||||||
struct gk20a_cs_snapshot_client *client)
|
struct gk20a_cs_snapshot_client *client)
|
||||||
{
|
{
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
@@ -519,8 +444,9 @@ static int css_gr_free_client_data(struct gk20a_cs_snapshot *data,
|
|||||||
if (client->list.next && client->list.prev)
|
if (client->list.next && client->list.prev)
|
||||||
list_del(&client->list);
|
list_del(&client->list);
|
||||||
|
|
||||||
if (client->perfmon_start && client->perfmon_count) {
|
if (client->perfmon_start && client->perfmon_count
|
||||||
if (client->perfmon_count != css_gr_release_perfmon_ids(data,
|
&& g->ops.css.release_perfmon_ids) {
|
||||||
|
if (client->perfmon_count != g->ops.css.release_perfmon_ids(data,
|
||||||
client->perfmon_start, client->perfmon_count))
|
client->perfmon_start, client->perfmon_count))
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
}
|
}
|
||||||
@@ -536,7 +462,8 @@ static int css_gr_free_client_data(struct gk20a_cs_snapshot *data,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int css_gr_create_client_data(struct gk20a_cs_snapshot *data,
|
static int css_gr_create_client_data(struct gk20a *g,
|
||||||
|
struct gk20a_cs_snapshot *data,
|
||||||
u32 dmabuf_fd, u32 perfmon_count,
|
u32 dmabuf_fd, u32 perfmon_count,
|
||||||
struct gk20a_cs_snapshot_client **client)
|
struct gk20a_cs_snapshot_client **client)
|
||||||
{
|
{
|
||||||
@@ -581,8 +508,12 @@ static int css_gr_create_client_data(struct gk20a_cs_snapshot *data,
|
|||||||
cur->snapshot->put = cur->snapshot->start;
|
cur->snapshot->put = cur->snapshot->start;
|
||||||
|
|
||||||
cur->perfmon_count = perfmon_count;
|
cur->perfmon_count = perfmon_count;
|
||||||
if (cur->perfmon_count) {
|
|
||||||
cur->perfmon_start = css_gr_allocate_perfmon_ids(data,
|
/* In virtual case, perfmon ID allocation is handled by the server
|
||||||
|
* at the time of the attach (allocate_perfmon_ids is NULL in this case)
|
||||||
|
*/
|
||||||
|
if (cur->perfmon_count && g->ops.css.allocate_perfmon_ids) {
|
||||||
|
cur->perfmon_start = g->ops.css.allocate_perfmon_ids(data,
|
||||||
cur->perfmon_count);
|
cur->perfmon_count);
|
||||||
if (!cur->perfmon_start) {
|
if (!cur->perfmon_start) {
|
||||||
ret = -ENOENT;
|
ret = -ENOENT;
|
||||||
@@ -598,19 +529,20 @@ static int css_gr_create_client_data(struct gk20a_cs_snapshot *data,
|
|||||||
failed:
|
failed:
|
||||||
*client = NULL;
|
*client = NULL;
|
||||||
if (cur)
|
if (cur)
|
||||||
css_gr_free_client_data(data, cur);
|
css_gr_free_client_data(g, data, cur);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int gr_gk20a_css_attach(struct gk20a *g,
|
int gr_gk20a_css_attach(struct channel_gk20a *ch,
|
||||||
u32 dmabuf_fd,
|
u32 dmabuf_fd,
|
||||||
u32 perfmon_count,
|
u32 perfmon_count,
|
||||||
u32 *perfmon_start,
|
u32 *perfmon_start,
|
||||||
struct gk20a_cs_snapshot_client **cs_client)
|
struct gk20a_cs_snapshot_client **cs_client)
|
||||||
{
|
{
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
struct gk20a *g = ch->g;
|
||||||
struct gr_gk20a *gr;
|
struct gr_gk20a *gr;
|
||||||
|
|
||||||
/* we must have a placeholder to store pointer to client structure */
|
/* we must have a placeholder to store pointer to client structure */
|
||||||
@@ -630,14 +562,14 @@ int gr_gk20a_css_attach(struct gk20a *g,
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto failed;
|
goto failed;
|
||||||
|
|
||||||
ret = css_gr_create_client_data(gr->cs_data,
|
ret = css_gr_create_client_data(g, gr->cs_data,
|
||||||
dmabuf_fd,
|
dmabuf_fd,
|
||||||
perfmon_count,
|
perfmon_count,
|
||||||
cs_client);
|
cs_client);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto failed;
|
goto failed;
|
||||||
|
|
||||||
ret = css_hw_enable_snapshot(gr, (*cs_client)->snapshot_size);
|
ret = g->ops.css.enable_snapshot(ch, *cs_client);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto failed;
|
goto failed;
|
||||||
|
|
||||||
@@ -651,7 +583,7 @@ int gr_gk20a_css_attach(struct gk20a *g,
|
|||||||
failed:
|
failed:
|
||||||
if (gr->cs_data) {
|
if (gr->cs_data) {
|
||||||
if (*cs_client) {
|
if (*cs_client) {
|
||||||
css_gr_free_client_data(gr->cs_data, *cs_client);
|
css_gr_free_client_data(g, gr->cs_data, *cs_client);
|
||||||
*cs_client = NULL;
|
*cs_client = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -666,10 +598,11 @@ failed:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int gr_gk20a_css_detach(struct gk20a *g,
|
int gr_gk20a_css_detach(struct channel_gk20a *ch,
|
||||||
struct gk20a_cs_snapshot_client *cs_client)
|
struct gk20a_cs_snapshot_client *cs_client)
|
||||||
{
|
{
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
struct gk20a *g = ch->g;
|
||||||
struct gr_gk20a *gr;
|
struct gr_gk20a *gr;
|
||||||
|
|
||||||
if (!cs_client)
|
if (!cs_client)
|
||||||
@@ -680,7 +613,10 @@ int gr_gk20a_css_detach(struct gk20a *g,
|
|||||||
if (gr->cs_data) {
|
if (gr->cs_data) {
|
||||||
struct gk20a_cs_snapshot *data = gr->cs_data;
|
struct gk20a_cs_snapshot *data = gr->cs_data;
|
||||||
|
|
||||||
ret = css_gr_free_client_data(data, cs_client);
|
if (g->ops.css.detach_snapshot)
|
||||||
|
g->ops.css.detach_snapshot(ch, cs_client);
|
||||||
|
|
||||||
|
ret = css_gr_free_client_data(g, data, cs_client);
|
||||||
if (list_empty(&data->clients))
|
if (list_empty(&data->clients))
|
||||||
css_gr_free_shared_data(gr);
|
css_gr_free_shared_data(gr);
|
||||||
} else {
|
} else {
|
||||||
@@ -691,10 +627,11 @@ int gr_gk20a_css_detach(struct gk20a *g,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int gr_gk20a_css_flush(struct gk20a *g,
|
int gr_gk20a_css_flush(struct channel_gk20a *ch,
|
||||||
struct gk20a_cs_snapshot_client *cs_client)
|
struct gk20a_cs_snapshot_client *cs_client)
|
||||||
{
|
{
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
struct gk20a *g = ch->g;
|
||||||
struct gr_gk20a *gr;
|
struct gr_gk20a *gr;
|
||||||
|
|
||||||
if (!cs_client)
|
if (!cs_client)
|
||||||
@@ -702,7 +639,7 @@ int gr_gk20a_css_flush(struct gk20a *g,
|
|||||||
|
|
||||||
gr = &g->gr;
|
gr = &g->gr;
|
||||||
mutex_lock(&gr->cs_lock);
|
mutex_lock(&gr->cs_lock);
|
||||||
ret = css_gr_flush_snapshots(gr);
|
ret = css_gr_flush_snapshots(ch);
|
||||||
mutex_unlock(&gr->cs_lock);
|
mutex_unlock(&gr->cs_lock);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
@@ -718,3 +655,31 @@ void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g)
|
|||||||
mutex_unlock(&gr->cs_lock);
|
mutex_unlock(&gr->cs_lock);
|
||||||
mutex_destroy(&gr->cs_lock);
|
mutex_destroy(&gr->cs_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int css_hw_check_data_available(struct channel_gk20a *ch, u32 *pending,
|
||||||
|
bool *hw_overflow)
|
||||||
|
{
|
||||||
|
struct gk20a *g = ch->g;
|
||||||
|
struct gr_gk20a *gr = &g->gr;
|
||||||
|
struct gk20a_cs_snapshot *css = gr->cs_data;
|
||||||
|
|
||||||
|
if (!css->hw_snapshot)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
*pending = css_hw_get_pending_snapshots(g);
|
||||||
|
if (!*pending)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
*hw_overflow = css_hw_get_overflow_status(g);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gk20a_init_css_ops(struct gpu_ops *gops)
|
||||||
|
{
|
||||||
|
gops->css.enable_snapshot = css_hw_enable_snapshot;
|
||||||
|
gops->css.disable_snapshot = css_hw_disable_snapshot;
|
||||||
|
gops->css.check_data_available = css_hw_check_data_available;
|
||||||
|
gops->css.set_handled_snapshots = css_hw_set_handled_snapshots;
|
||||||
|
gops->css.allocate_perfmon_ids = css_gr_allocate_perfmon_ids;
|
||||||
|
gops->css.release_perfmon_ids = css_gr_release_perfmon_ids;
|
||||||
|
}
|
||||||
|
|||||||
119
drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h
Normal file
119
drivers/gpu/nvgpu/gk20a/css_gr_gk20a.h
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
/*
|
||||||
|
* GK20A Cycle stats snapshots support (subsystem for gr_gk20a).
|
||||||
|
*
|
||||||
|
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms and conditions of the GNU General Public License,
|
||||||
|
* version 2, as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||||
|
* more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef CSS_GR_GK20A_H
|
||||||
|
#define CSS_GR_GK20A_H
|
||||||
|
|
||||||
|
/* the minimal size of HW buffer - should be enough to avoid HW overflows */
|
||||||
|
#define CSS_MIN_HW_SNAPSHOT_SIZE (8 * 1024 * 1024)
|
||||||
|
|
||||||
|
/* cycle stats fifo header (must match NvSnapshotBufferFifo) */
|
||||||
|
struct gk20a_cs_snapshot_fifo {
|
||||||
|
/* layout description of the buffer */
|
||||||
|
u32 start;
|
||||||
|
u32 end;
|
||||||
|
|
||||||
|
/* snafu bits */
|
||||||
|
u32 hw_overflow_events_occured;
|
||||||
|
u32 sw_overflow_events_occured;
|
||||||
|
|
||||||
|
/* the kernel copies new entries to put and
|
||||||
|
* increment the put++. if put == get then
|
||||||
|
* overflowEventsOccured++
|
||||||
|
*/
|
||||||
|
u32 put;
|
||||||
|
u32 _reserved10;
|
||||||
|
u32 _reserved11;
|
||||||
|
u32 _reserved12;
|
||||||
|
|
||||||
|
/* the driver/client reads from get until
|
||||||
|
* put==get, get++ */
|
||||||
|
u32 get;
|
||||||
|
u32 _reserved20;
|
||||||
|
u32 _reserved21;
|
||||||
|
u32 _reserved22;
|
||||||
|
|
||||||
|
/* unused */
|
||||||
|
u32 _reserved30;
|
||||||
|
u32 _reserved31;
|
||||||
|
u32 _reserved32;
|
||||||
|
u32 _reserved33;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* cycle stats fifo entry (must match NvSnapshotBufferFifoEntry) */
|
||||||
|
struct gk20a_cs_snapshot_fifo_entry {
|
||||||
|
/* global 48 timestamp */
|
||||||
|
u32 timestamp31_00:32;
|
||||||
|
u32 timestamp39_32:8;
|
||||||
|
|
||||||
|
/* id of perfmon, should correlate with CSS_MAX_PERFMON_IDS */
|
||||||
|
u32 perfmon_id:8;
|
||||||
|
|
||||||
|
/* typically samples_counter is wired to #pmtrigger count */
|
||||||
|
u32 samples_counter:12;
|
||||||
|
|
||||||
|
/* DS=Delay Sample, SZ=Size (0=32B, 1=16B) */
|
||||||
|
u32 ds:1;
|
||||||
|
u32 sz:1;
|
||||||
|
u32 zero0:1;
|
||||||
|
u32 zero1:1;
|
||||||
|
|
||||||
|
/* counter results */
|
||||||
|
u32 event_cnt:32;
|
||||||
|
u32 trigger0_cnt:32;
|
||||||
|
u32 trigger1_cnt:32;
|
||||||
|
u32 sample_cnt:32;
|
||||||
|
|
||||||
|
/* Local PmTrigger results for Maxwell+ or padding otherwise */
|
||||||
|
u16 local_trigger_b_count:16;
|
||||||
|
u16 book_mark_b:16;
|
||||||
|
u16 local_trigger_a_count:16;
|
||||||
|
u16 book_mark_a:16;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* cycle stats snapshot client data (e.g. associated with channel) */
|
||||||
|
struct gk20a_cs_snapshot_client {
|
||||||
|
struct list_head list;
|
||||||
|
u32 dmabuf_fd;
|
||||||
|
struct dma_buf *dma_handler;
|
||||||
|
struct gk20a_cs_snapshot_fifo *snapshot;
|
||||||
|
u32 snapshot_size;
|
||||||
|
u32 perfmon_start;
|
||||||
|
u32 perfmon_count;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* should correlate with size of gk20a_cs_snapshot_fifo_entry::perfmon_id */
|
||||||
|
#define CSS_MAX_PERFMON_IDS 256
|
||||||
|
|
||||||
|
/* local definitions to avoid hardcodes sizes and shifts */
|
||||||
|
#define PM_BITMAP_SIZE DIV_ROUND_UP(CSS_MAX_PERFMON_IDS, BITS_PER_LONG)
|
||||||
|
|
||||||
|
/* cycle stats snapshot control structure for one HW entry and many clients */
|
||||||
|
struct gk20a_cs_snapshot {
|
||||||
|
unsigned long perfmon_ids[PM_BITMAP_SIZE];
|
||||||
|
struct list_head clients;
|
||||||
|
struct mem_desc hw_memdesc;
|
||||||
|
/* pointer to allocated cpu_va memory where GPU place data */
|
||||||
|
struct gk20a_cs_snapshot_fifo_entry *hw_snapshot;
|
||||||
|
struct gk20a_cs_snapshot_fifo_entry *hw_end;
|
||||||
|
struct gk20a_cs_snapshot_fifo_entry *hw_get;
|
||||||
|
};
|
||||||
|
|
||||||
|
void gk20a_init_css_ops(struct gpu_ops *gops);
|
||||||
|
|
||||||
|
#endif /* CSS_GR_GK20A_H */
|
||||||
@@ -657,6 +657,24 @@ struct gpu_ops {
|
|||||||
struct {
|
struct {
|
||||||
int (*init)(struct gk20a *g);
|
int (*init)(struct gk20a *g);
|
||||||
} bios;
|
} bios;
|
||||||
|
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
||||||
|
struct {
|
||||||
|
int (*enable_snapshot)(struct channel_gk20a *ch,
|
||||||
|
struct gk20a_cs_snapshot_client *client);
|
||||||
|
void (*disable_snapshot)(struct gr_gk20a *gr);
|
||||||
|
int (*check_data_available)(struct channel_gk20a *ch,
|
||||||
|
u32 *pending,
|
||||||
|
bool *hw_overflow);
|
||||||
|
void (*set_handled_snapshots)(struct gk20a *g, u32 num);
|
||||||
|
u32 (*allocate_perfmon_ids)(struct gk20a_cs_snapshot *data,
|
||||||
|
u32 count);
|
||||||
|
u32 (*release_perfmon_ids)(struct gk20a_cs_snapshot *data,
|
||||||
|
u32 start,
|
||||||
|
u32 count);
|
||||||
|
int (*detach_snapshot)(struct channel_gk20a *ch,
|
||||||
|
struct gk20a_cs_snapshot_client *client);
|
||||||
|
} css;
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
struct nvgpu_bios_ucode {
|
struct nvgpu_bios_ucode {
|
||||||
|
|||||||
@@ -603,16 +603,16 @@ int gr_gk20a_halt_pipe(struct gk20a *g);
|
|||||||
int gr_gk20a_debugfs_init(struct gk20a *g);
|
int gr_gk20a_debugfs_init(struct gk20a *g);
|
||||||
|
|
||||||
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
||||||
int gr_gk20a_css_attach(struct gk20a *g, /* in - main hw structure */
|
int gr_gk20a_css_attach(struct channel_gk20a *ch, /* in - main hw structure */
|
||||||
u32 dmabuf_fd, /* in - dma mapped memory */
|
u32 dmabuf_fd, /* in - dma mapped memory */
|
||||||
u32 perfmon_id_count, /* in - number of perfmons*/
|
u32 perfmon_id_count, /* in - number of perfmons*/
|
||||||
u32 *perfmon_id_start, /* out- index of first pm */
|
u32 *perfmon_id_start, /* out- index of first pm */
|
||||||
/* out - pointer to client data used in later */
|
/* out - pointer to client data used in later */
|
||||||
struct gk20a_cs_snapshot_client **css_client);
|
struct gk20a_cs_snapshot_client **css_client);
|
||||||
|
|
||||||
int gr_gk20a_css_detach(struct gk20a *g,
|
int gr_gk20a_css_detach(struct channel_gk20a *ch,
|
||||||
struct gk20a_cs_snapshot_client *css_client);
|
struct gk20a_cs_snapshot_client *css_client);
|
||||||
int gr_gk20a_css_flush(struct gk20a *g,
|
int gr_gk20a_css_flush(struct channel_gk20a *ch,
|
||||||
struct gk20a_cs_snapshot_client *css_client);
|
struct gk20a_cs_snapshot_client *css_client);
|
||||||
|
|
||||||
void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g);
|
void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g);
|
||||||
|
|||||||
@@ -32,6 +32,7 @@
|
|||||||
#include "hw_proj_gk20a.h"
|
#include "hw_proj_gk20a.h"
|
||||||
#include "tsg_gk20a.h"
|
#include "tsg_gk20a.h"
|
||||||
#include "dbg_gpu_gk20a.h"
|
#include "dbg_gpu_gk20a.h"
|
||||||
|
#include "css_gr_gk20a.h"
|
||||||
|
|
||||||
static struct gpu_ops gk20a_ops = {
|
static struct gpu_ops gk20a_ops = {
|
||||||
.clock_gating = {
|
.clock_gating = {
|
||||||
@@ -157,6 +158,9 @@ int gk20a_init_hal(struct gk20a *g)
|
|||||||
gk20a_init_dbg_session_ops(gops);
|
gk20a_init_dbg_session_ops(gops);
|
||||||
gk20a_init_therm_ops(gops);
|
gk20a_init_therm_ops(gops);
|
||||||
gk20a_init_tsg_ops(gops);
|
gk20a_init_tsg_ops(gops);
|
||||||
|
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
||||||
|
gk20a_init_css_ops(gops);
|
||||||
|
#endif
|
||||||
gops->name = "gk20a";
|
gops->name = "gk20a";
|
||||||
gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics;
|
gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics;
|
||||||
gops->get_litter_value = gk20a_get_litter_value;
|
gops->get_litter_value = gk20a_get_litter_value;
|
||||||
|
|||||||
@@ -41,6 +41,7 @@
|
|||||||
#include "gr_gm206.h"
|
#include "gr_gm206.h"
|
||||||
#include "hw_proj_gm206.h"
|
#include "hw_proj_gm206.h"
|
||||||
#include "gk20a/dbg_gpu_gk20a.h"
|
#include "gk20a/dbg_gpu_gk20a.h"
|
||||||
|
#include "gk20a/css_gr_gk20a.h"
|
||||||
|
|
||||||
static struct gpu_ops gm206_ops = {
|
static struct gpu_ops gm206_ops = {
|
||||||
.clock_gating = {
|
.clock_gating = {
|
||||||
@@ -199,6 +200,9 @@ int gm206_init_hal(struct gk20a *g)
|
|||||||
gm20b_init_cde_ops(gops);
|
gm20b_init_cde_ops(gops);
|
||||||
gm20b_init_therm_ops(gops);
|
gm20b_init_therm_ops(gops);
|
||||||
gk20a_init_tsg_ops(gops);
|
gk20a_init_tsg_ops(gops);
|
||||||
|
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
||||||
|
gk20a_init_css_ops(gops);
|
||||||
|
#endif
|
||||||
gm206_init_bios(gops);
|
gm206_init_bios(gops);
|
||||||
switch(ver){
|
switch(ver){
|
||||||
case GK20A_GPUID_GM206:
|
case GK20A_GPUID_GM206:
|
||||||
|
|||||||
@@ -36,6 +36,7 @@
|
|||||||
#include "therm_gm20b.h"
|
#include "therm_gm20b.h"
|
||||||
#include "hw_proj_gm20b.h"
|
#include "hw_proj_gm20b.h"
|
||||||
#include "gk20a/dbg_gpu_gk20a.h"
|
#include "gk20a/dbg_gpu_gk20a.h"
|
||||||
|
#include "gk20a/css_gr_gk20a.h"
|
||||||
|
|
||||||
#define FUSE_OPT_PRIV_SEC_DIS_0 0x264
|
#define FUSE_OPT_PRIV_SEC_DIS_0 0x264
|
||||||
#define PRIV_SECURITY_DISABLE 0x01
|
#define PRIV_SECURITY_DISABLE 0x01
|
||||||
@@ -226,6 +227,9 @@ int gm20b_init_hal(struct gk20a *g)
|
|||||||
gm20b_init_cde_ops(gops);
|
gm20b_init_cde_ops(gops);
|
||||||
gm20b_init_therm_ops(gops);
|
gm20b_init_therm_ops(gops);
|
||||||
gk20a_init_tsg_ops(gops);
|
gk20a_init_tsg_ops(gops);
|
||||||
|
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
||||||
|
gk20a_init_css_ops(gops);
|
||||||
|
#endif
|
||||||
gops->name = "gm20b";
|
gops->name = "gm20b";
|
||||||
gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics;
|
gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics;
|
||||||
gops->get_litter_value = gm20b_get_litter_value;
|
gops->get_litter_value = gm20b_get_litter_value;
|
||||||
|
|||||||
221
drivers/gpu/nvgpu/vgpu/css_vgpu.c
Normal file
221
drivers/gpu/nvgpu/vgpu/css_vgpu.c
Normal file
@@ -0,0 +1,221 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms and conditions of the GNU General Public License,
|
||||||
|
* version 2, as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||||
|
* more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
||||||
|
|
||||||
|
#include <linux/kernel.h>
|
||||||
|
#include <linux/nvhost.h>
|
||||||
|
#include <linux/tegra-ivc.h>
|
||||||
|
#include <linux/tegra_vgpu.h>
|
||||||
|
|
||||||
|
#include "gk20a/gk20a.h"
|
||||||
|
#include "gk20a/channel_gk20a.h"
|
||||||
|
#include "gk20a/platform_gk20a.h"
|
||||||
|
#include "gk20a/css_gr_gk20a.h"
|
||||||
|
#include "vgpu.h"
|
||||||
|
|
||||||
|
struct vgpu_hw_snapshot_buffer {
|
||||||
|
struct tegra_hv_ivm_cookie *cookie;
|
||||||
|
void *buf;
|
||||||
|
struct gk20a_cs_snapshot_fifo_entry *end;
|
||||||
|
struct gk20a_cs_snapshot_fifo_entry *src_get;
|
||||||
|
struct gk20a_cs_snapshot_fifo_entry *src_put;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct tegra_hv_ivm_cookie *css_cookie;
|
||||||
|
|
||||||
|
int vgpu_css_init_snapshot_buffer(struct gr_gk20a *gr)
|
||||||
|
{
|
||||||
|
struct gk20a *g = gr->g;
|
||||||
|
struct device *dev = g->dev;
|
||||||
|
struct gk20a_cs_snapshot *data = gr->cs_data;
|
||||||
|
struct device_node *np = dev->of_node;
|
||||||
|
struct of_phandle_args args;
|
||||||
|
struct device_node *hv_np;
|
||||||
|
void *buf = NULL;
|
||||||
|
u32 mempool;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
gk20a_dbg_fn("");
|
||||||
|
|
||||||
|
if (data->hw_snapshot)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
err = of_parse_phandle_with_fixed_args(np,
|
||||||
|
"mempool-css", 1, 0, &args);
|
||||||
|
if (err) {
|
||||||
|
dev_info(dev_from_gk20a(g), "dt missing mempool-css\n");
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
hv_np = args.np;
|
||||||
|
mempool = args.args[0];
|
||||||
|
css_cookie = tegra_hv_mempool_reserve(hv_np, mempool);
|
||||||
|
if (IS_ERR(css_cookie)) {
|
||||||
|
dev_info(dev_from_gk20a(g),
|
||||||
|
"mempool %u reserve failed\n", mempool);
|
||||||
|
err = -EINVAL;
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Make sure buffer size is large enough */
|
||||||
|
if (css_cookie->size < CSS_MIN_HW_SNAPSHOT_SIZE) {
|
||||||
|
dev_info(dev_from_gk20a(g), "mempool size %lld too small\n",
|
||||||
|
css_cookie->size);
|
||||||
|
err = -ENOMEM;
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0)
|
||||||
|
buf = ioremap_cached(css_cookie->ipa, css_cookie->size);
|
||||||
|
#else
|
||||||
|
buf = ioremap_cache(css_cookie->ipa, css_cookie->size);
|
||||||
|
#endif
|
||||||
|
if (!buf) {
|
||||||
|
dev_info(dev_from_gk20a(g), "ioremap_cache failed\n");
|
||||||
|
err = -EINVAL;
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
data->hw_snapshot = buf;
|
||||||
|
data->hw_end = data->hw_snapshot +
|
||||||
|
css_cookie->size / sizeof(struct gk20a_cs_snapshot_fifo_entry);
|
||||||
|
data->hw_get = data->hw_snapshot;
|
||||||
|
memset(data->hw_snapshot, 0xff, css_cookie->size);
|
||||||
|
return 0;
|
||||||
|
fail:
|
||||||
|
if (!IS_ERR_OR_NULL(css_cookie))
|
||||||
|
tegra_hv_mempool_unreserve(css_cookie);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void vgpu_css_release_snapshot_buffer(struct gr_gk20a *gr)
|
||||||
|
{
|
||||||
|
struct gk20a_cs_snapshot *data = gr->cs_data;
|
||||||
|
|
||||||
|
if (!data->hw_snapshot)
|
||||||
|
return;
|
||||||
|
|
||||||
|
iounmap(data->hw_snapshot);
|
||||||
|
data->hw_snapshot = NULL;
|
||||||
|
|
||||||
|
tegra_hv_mempool_unreserve(css_cookie);
|
||||||
|
|
||||||
|
gk20a_dbg_info("cyclestats(vgpu): buffer for snapshots released\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static int vgpu_css_flush_snapshots(struct channel_gk20a *ch,
|
||||||
|
u32 *pending, bool *hw_overflow)
|
||||||
|
{
|
||||||
|
struct gk20a *g = ch->g;
|
||||||
|
struct tegra_vgpu_cmd_msg msg = {};
|
||||||
|
struct tegra_vgpu_channel_cyclestats_snapshot_params *p;
|
||||||
|
struct gr_gk20a *gr = &g->gr;
|
||||||
|
struct gk20a_cs_snapshot *data = gr->cs_data;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
gk20a_dbg_fn("");
|
||||||
|
|
||||||
|
msg.cmd = TEGRA_VGPU_CMD_CHANNEL_CYCLESTATS_SNAPSHOT;
|
||||||
|
msg.handle = vgpu_get_handle(g);
|
||||||
|
p = &msg.params.cyclestats_snapshot;
|
||||||
|
p->handle = ch->virt_ctx;
|
||||||
|
p->subcmd = NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH;
|
||||||
|
p->buf_info = (uintptr_t)data->hw_get - (uintptr_t)data->hw_snapshot;
|
||||||
|
|
||||||
|
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
|
||||||
|
|
||||||
|
err = (err || msg.ret) ? -1 : 0;
|
||||||
|
|
||||||
|
*pending = p->buf_info;
|
||||||
|
*hw_overflow = p->hw_overflow;
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int vgpu_css_attach(struct channel_gk20a *ch,
|
||||||
|
struct gk20a_cs_snapshot_client *cs_client)
|
||||||
|
{
|
||||||
|
struct gk20a *g = ch->g;
|
||||||
|
struct tegra_vgpu_cmd_msg msg = {};
|
||||||
|
struct tegra_vgpu_channel_cyclestats_snapshot_params *p =
|
||||||
|
&msg.params.cyclestats_snapshot;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
gk20a_dbg_fn("");
|
||||||
|
|
||||||
|
msg.cmd = TEGRA_VGPU_CMD_CHANNEL_CYCLESTATS_SNAPSHOT;
|
||||||
|
msg.handle = vgpu_get_handle(g);
|
||||||
|
p->handle = ch->virt_ctx;
|
||||||
|
p->subcmd = NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_ATTACH;
|
||||||
|
p->perfmon_count = cs_client->perfmon_count;
|
||||||
|
|
||||||
|
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
|
||||||
|
err = err ? err : msg.ret;
|
||||||
|
if (err)
|
||||||
|
gk20a_err(dev_from_gk20a(g), "%s failed", __func__);
|
||||||
|
else
|
||||||
|
cs_client->perfmon_start = p->perfmon_start;
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int vgpu_css_detach(struct channel_gk20a *ch,
|
||||||
|
struct gk20a_cs_snapshot_client *cs_client)
|
||||||
|
{
|
||||||
|
struct gk20a *g = ch->g;
|
||||||
|
struct tegra_vgpu_cmd_msg msg = {};
|
||||||
|
struct tegra_vgpu_channel_cyclestats_snapshot_params *p =
|
||||||
|
&msg.params.cyclestats_snapshot;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
gk20a_dbg_fn("");
|
||||||
|
|
||||||
|
msg.cmd = TEGRA_VGPU_CMD_CHANNEL_CYCLESTATS_SNAPSHOT;
|
||||||
|
msg.handle = vgpu_get_handle(g);
|
||||||
|
p->handle = ch->virt_ctx;
|
||||||
|
p->subcmd = NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_DETACH;
|
||||||
|
p->perfmon_start = cs_client->perfmon_start;
|
||||||
|
p->perfmon_count = cs_client->perfmon_count;
|
||||||
|
|
||||||
|
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
|
||||||
|
err = err ? err : msg.ret;
|
||||||
|
if (err)
|
||||||
|
gk20a_err(dev_from_gk20a(g), "%s failed", __func__);
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int vgpu_css_enable_snapshot_buffer(struct channel_gk20a *ch,
|
||||||
|
struct gk20a_cs_snapshot_client *cs_client)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = vgpu_css_attach(ch, cs_client);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
ret = vgpu_css_init_snapshot_buffer(&ch->g->gr);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void vgpu_init_css_ops(struct gpu_ops *gops)
|
||||||
|
{
|
||||||
|
gops->css.enable_snapshot = vgpu_css_enable_snapshot_buffer;
|
||||||
|
gops->css.disable_snapshot = vgpu_css_release_snapshot_buffer;
|
||||||
|
gops->css.check_data_available = vgpu_css_flush_snapshots;
|
||||||
|
gops->css.detach_snapshot = vgpu_css_detach;
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_GK20A_CYCLE_STATS */
|
||||||
@@ -875,6 +875,10 @@ static int vgpu_gr_init_gr_setup_sw(struct gk20a *g)
|
|||||||
|
|
||||||
gr->g = g;
|
gr->g = g;
|
||||||
|
|
||||||
|
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
||||||
|
mutex_init(&g->gr.cs_lock);
|
||||||
|
#endif
|
||||||
|
|
||||||
err = vgpu_gr_init_gr_config(g, gr);
|
err = vgpu_gr_init_gr_config(g, gr);
|
||||||
if (err)
|
if (err)
|
||||||
goto clean_up;
|
goto clean_up;
|
||||||
|
|||||||
@@ -334,6 +334,9 @@ void vgpu_init_hal_common(struct gk20a *g)
|
|||||||
vgpu_init_dbg_session_ops(gops);
|
vgpu_init_dbg_session_ops(gops);
|
||||||
vgpu_init_fecs_trace_ops(gops);
|
vgpu_init_fecs_trace_ops(gops);
|
||||||
vgpu_init_tsg_ops(gops);
|
vgpu_init_tsg_ops(gops);
|
||||||
|
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
||||||
|
vgpu_init_css_ops(gops);
|
||||||
|
#endif
|
||||||
gops->chip_init_gpu_characteristics = vgpu_init_gpu_characteristics;
|
gops->chip_init_gpu_characteristics = vgpu_init_gpu_characteristics;
|
||||||
gops->read_ptimer = vgpu_read_ptimer;
|
gops->read_ptimer = vgpu_read_ptimer;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -85,6 +85,9 @@ void vgpu_init_ltc_ops(struct gpu_ops *gops);
|
|||||||
void vgpu_init_mm_ops(struct gpu_ops *gops);
|
void vgpu_init_mm_ops(struct gpu_ops *gops);
|
||||||
void vgpu_init_debug_ops(struct gpu_ops *gops);
|
void vgpu_init_debug_ops(struct gpu_ops *gops);
|
||||||
void vgpu_init_tsg_ops(struct gpu_ops *gops);
|
void vgpu_init_tsg_ops(struct gpu_ops *gops);
|
||||||
|
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
||||||
|
void vgpu_init_css_ops(struct gpu_ops *gops);
|
||||||
|
#endif
|
||||||
int vgpu_init_mm_support(struct gk20a *g);
|
int vgpu_init_mm_support(struct gk20a *g);
|
||||||
int vgpu_init_gr_support(struct gk20a *g);
|
int vgpu_init_gr_support(struct gk20a *g);
|
||||||
int vgpu_init_fifo_support(struct gk20a *g);
|
int vgpu_init_fifo_support(struct gk20a *g);
|
||||||
@@ -161,6 +164,11 @@ static inline void vgpu_init_mm_ops(struct gpu_ops *gops)
|
|||||||
static inline void vgpu_init_debug_ops(struct gpu_ops *gops)
|
static inline void vgpu_init_debug_ops(struct gpu_ops *gops)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
||||||
|
static inline void vgpu_init_css_ops(struct gpu_ops *gops)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
#endif
|
||||||
static inline int vgpu_init_mm_support(struct gk20a *g)
|
static inline int vgpu_init_mm_support(struct gk20a *g)
|
||||||
{
|
{
|
||||||
return -ENOSYS;
|
return -ENOSYS;
|
||||||
|
|||||||
@@ -97,6 +97,7 @@ enum {
|
|||||||
TEGRA_VGPU_CMD_SET_POWERGATE = 60,
|
TEGRA_VGPU_CMD_SET_POWERGATE = 60,
|
||||||
TEGRA_VGPU_CMD_SET_GPU_CLK_RATE = 61,
|
TEGRA_VGPU_CMD_SET_GPU_CLK_RATE = 61,
|
||||||
TEGRA_VGPU_CMD_GET_CONSTANTS = 62,
|
TEGRA_VGPU_CMD_GET_CONSTANTS = 62,
|
||||||
|
TEGRA_VGPU_CMD_CHANNEL_CYCLESTATS_SNAPSHOT = 63,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct tegra_vgpu_connect_params {
|
struct tegra_vgpu_connect_params {
|
||||||
@@ -437,6 +438,15 @@ struct tegra_vgpu_constants_params {
|
|||||||
u16 gpc_tpc_mask[TEGRA_VGPU_MAX_GPC_COUNT];
|
u16 gpc_tpc_mask[TEGRA_VGPU_MAX_GPC_COUNT];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct tegra_vgpu_channel_cyclestats_snapshot_params {
|
||||||
|
u64 handle;
|
||||||
|
u32 perfmon_start;
|
||||||
|
u32 perfmon_count;
|
||||||
|
u32 buf_info; /* client->srvr: get ptr; srvr->client: num pending */
|
||||||
|
u8 subcmd;
|
||||||
|
u8 hw_overflow;
|
||||||
|
};
|
||||||
|
|
||||||
struct tegra_vgpu_cmd_msg {
|
struct tegra_vgpu_cmd_msg {
|
||||||
u32 cmd;
|
u32 cmd;
|
||||||
int ret;
|
int ret;
|
||||||
@@ -481,6 +491,7 @@ struct tegra_vgpu_cmd_msg {
|
|||||||
struct tegra_vgpu_set_powergate_params set_powergate;
|
struct tegra_vgpu_set_powergate_params set_powergate;
|
||||||
struct tegra_vgpu_gpu_clk_rate_params gpu_clk_rate;
|
struct tegra_vgpu_gpu_clk_rate_params gpu_clk_rate;
|
||||||
struct tegra_vgpu_constants_params constants;
|
struct tegra_vgpu_constants_params constants;
|
||||||
|
struct tegra_vgpu_channel_cyclestats_snapshot_params cyclestats_snapshot;
|
||||||
char padding[192];
|
char padding[192];
|
||||||
} params;
|
} params;
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user