gpu: nvgpu: move snapshot_client memory handling to linux

We right now store dmabuf fd and dma_buf pointer for gk20a_cs_snapshot_client
But since dma_buf and all related APIs are linux specific, we need to remove
them from common code and move them to linux specific code

Add new linux specific structure gk20a_cs_snapshot_client_linux which includes
struct gk20a_cs_snapshot_client and linux specific dma_buf pointer

In gk20a_attach_cycle_stats_snapshot(), we first handle all dma_buf related
operations and then call gr_gk20a_css_attach()

Move gk20a_channel_free_cycle_stats_snapshot() to ioctl_channel.c
In gk20a_channel_free_cycle_stats_snapshot(), we call gr_gk20a_css_detach()
and then free up dma_buf in linux specific code

We also need to call gk20a_channel_free_cycle_stats_snapshot() while closing
the channel, so call it from linux specific nvgpu_channel_close_linux()

Jira NVGPU-397
Jira NVGPU-415

Change-Id: Ida27240541f6adf31f28d7d7ee4f51651c6d3de2
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1603908
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Deepak Nibade
2017-11-23 01:03:24 -08:00
committed by mobile promotions
parent 3fbb44d757
commit 861b11a968
8 changed files with 117 additions and 85 deletions

View File

@@ -29,6 +29,7 @@
#include "gk20a/gk20a.h"
#include "channel.h"
#include "ioctl_channel.h"
#include "os_linux.h"
#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
@@ -242,6 +243,10 @@ static void nvgpu_channel_open_linux(struct channel_gk20a *ch)
static void nvgpu_channel_close_linux(struct channel_gk20a *ch)
{
nvgpu_channel_work_completion_clear(ch);
#if defined(CONFIG_GK20A_CYCLE_STATS)
gk20a_channel_free_cycle_stats_snapshot(ch);
#endif
}
static int nvgpu_channel_alloc_linux(struct gk20a *g, struct channel_gk20a *ch)

View File

@@ -42,6 +42,11 @@
#include "os_linux.h"
#include "ctxsw_trace.h"
/* the minimal size of client buffer */
#define CSS_MIN_CLIENT_SNAPSHOT_SIZE \
(sizeof(struct gk20a_cs_snapshot_fifo) + \
sizeof(struct gk20a_cs_snapshot_fifo_entry) * 256)
static const char *gr_gk20a_graphics_preempt_mode_name(u32 graphics_preempt_mode)
{
switch (graphics_preempt_mode) {
@@ -157,18 +162,92 @@ static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch,
u32 perfmon_id_count,
u32 *perfmon_id_start)
{
int ret;
int ret = 0;
struct gk20a *g = ch->g;
struct gk20a_cs_snapshot_client_linux *client_linux;
struct gk20a_cs_snapshot_client *client;
nvgpu_mutex_acquire(&ch->cs_client_mutex);
if (ch->cs_client) {
ret = -EEXIST;
} else {
ret = gr_gk20a_css_attach(ch,
dmabuf_fd,
perfmon_id_count,
perfmon_id_start,
&ch->cs_client);
nvgpu_mutex_release(&ch->cs_client_mutex);
return -EEXIST;
}
client_linux = nvgpu_kzalloc(g, sizeof(*client_linux));
if (!client_linux) {
ret = -ENOMEM;
goto err;
}
client_linux->dmabuf_fd = dmabuf_fd;
client_linux->dma_handler = dma_buf_get(client_linux->dmabuf_fd);
if (IS_ERR(client_linux->dma_handler)) {
ret = PTR_ERR(client_linux->dma_handler);
client_linux->dma_handler = NULL;
goto err_free;
}
client = &client_linux->cs_client;
client->snapshot_size = client_linux->dma_handler->size;
if (client->snapshot_size < CSS_MIN_CLIENT_SNAPSHOT_SIZE) {
ret = -ENOMEM;
goto err_put;
}
client->snapshot = (struct gk20a_cs_snapshot_fifo *)
dma_buf_vmap(client_linux->dma_handler);
if (!client->snapshot) {
ret = -ENOMEM;
goto err_put;
}
ch->cs_client = client;
ret = gr_gk20a_css_attach(ch,
perfmon_id_count,
perfmon_id_start,
ch->cs_client);
nvgpu_mutex_release(&ch->cs_client_mutex);
return ret;
err_put:
dma_buf_put(client_linux->dma_handler);
err_free:
nvgpu_kfree(g, client_linux);
err:
nvgpu_mutex_release(&ch->cs_client_mutex);
return ret;
}
int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch)
{
int ret;
struct gk20a_cs_snapshot_client_linux *client_linux;
nvgpu_mutex_acquire(&ch->cs_client_mutex);
if (!ch->cs_client) {
nvgpu_mutex_release(&ch->cs_client_mutex);
return 0;
}
client_linux = container_of(ch->cs_client,
struct gk20a_cs_snapshot_client_linux,
cs_client);
ret = gr_gk20a_css_detach(ch, ch->cs_client);
if (client_linux->dma_handler) {
if (ch->cs_client->snapshot)
dma_buf_vunmap(client_linux->dma_handler,
ch->cs_client->snapshot);
dma_buf_put(client_linux->dma_handler);
}
ch->cs_client = NULL;
nvgpu_kfree(ch->g, client_linux);
nvgpu_mutex_release(&ch->cs_client_mutex);
return ret;

View File

@@ -15,11 +15,20 @@
#include <linux/fs.h>
#include "gk20a/css_gr_gk20a.h"
struct inode;
struct file;
struct gk20a;
struct nvgpu_channel_open_args;
struct gk20a_cs_snapshot_client_linux {
struct gk20a_cs_snapshot_client cs_client;
u32 dmabuf_fd;
struct dma_buf *dma_handler;
};
int gk20a_channel_open(struct inode *inode, struct file *filp);
int gk20a_channel_release(struct inode *inode, struct file *filp);
long gk20a_channel_ioctl(struct file *filp,
@@ -27,6 +36,8 @@ long gk20a_channel_ioctl(struct file *filp,
int gk20a_channel_open_ioctl(struct gk20a *g,
struct nvgpu_channel_open_args *args);
int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch);
extern const struct file_operations gk20a_event_id_ops;
extern const struct file_operations gk20a_channel_ops;

View File

@@ -376,22 +376,6 @@ void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch)
nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex);
}
int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch)
{
int ret;
nvgpu_mutex_acquire(&ch->cs_client_mutex);
if (ch->cs_client) {
ret = gr_gk20a_css_detach(ch, ch->cs_client);
ch->cs_client = NULL;
} else {
ret = 0;
}
nvgpu_mutex_release(&ch->cs_client_mutex);
return ret;
}
#endif
/* call ONLY when no references to the channel exist: after the last put */
@@ -508,7 +492,6 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
#if defined(CONFIG_GK20A_CYCLE_STATS)
gk20a_channel_free_cycle_stats_buffer(ch);
gk20a_channel_free_cycle_stats_snapshot(ch);
#endif
channel_gk20a_free_priv_cmdbuf(ch);

View File

@@ -371,7 +371,6 @@ int gk20a_channel_alloc_gpfifo(struct channel_gk20a *c,
unsigned int num_inflight_jobs,
u32 flags);
void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch);
int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch);
void gk20a_channel_timeout_restart_all_channels(struct gk20a *g);

View File

@@ -45,11 +45,6 @@
((cl)->perfmon_start <= (pm) && \
((pm) - (cl)->perfmon_start) < (cl)->perfmon_count)
/* the minimal size of client buffer */
#define CSS_MIN_CLIENT_SNAPSHOT_SIZE \
(sizeof(struct gk20a_cs_snapshot_fifo) + \
sizeof(struct gk20a_cs_snapshot_fifo_entry) * 256)
/* address of fifo entry by offset */
#define CSS_FIFO_ENTRY(fifo, offs) \
((struct gk20a_cs_snapshot_fifo_entry *)(((char *)(fifo)) + (offs)))
@@ -452,52 +447,16 @@ static int css_gr_free_client_data(struct gk20a *g,
ret = -EINVAL;
}
if (client->dma_handler) {
if (client->snapshot)
dma_buf_vunmap(client->dma_handler, client->snapshot);
dma_buf_put(client->dma_handler);
}
nvgpu_kfree(g, client);
return ret;
}
static int css_gr_create_client_data(struct gk20a *g,
struct gk20a_cs_snapshot *data,
u32 dmabuf_fd, u32 perfmon_count,
struct gk20a_cs_snapshot_client **client)
u32 perfmon_count,
struct gk20a_cs_snapshot_client *cur)
{
struct gk20a_cs_snapshot_client *cur;
int ret = 0;
cur = nvgpu_kzalloc(g, sizeof(*cur));
if (!cur) {
ret = -ENOMEM;
goto failed;
}
cur->dmabuf_fd = dmabuf_fd;
cur->dma_handler = dma_buf_get(cur->dmabuf_fd);
if (IS_ERR(cur->dma_handler)) {
ret = PTR_ERR(cur->dma_handler);
cur->dma_handler = NULL;
goto failed;
}
cur->snapshot = (struct gk20a_cs_snapshot_fifo *)
dma_buf_vmap(cur->dma_handler);
if (!cur->snapshot) {
ret = -ENOMEM;
goto failed;
}
cur->snapshot_size = cur->dma_handler->size;
if (cur->snapshot_size < CSS_MIN_CLIENT_SNAPSHOT_SIZE) {
ret = -ENOMEM;
goto failed;
}
memset(cur->snapshot, 0, sizeof(*cur->snapshot));
cur->snapshot->start = sizeof(*cur->snapshot);
/* we should be ensure that can fit all fifo entries here */
@@ -523,12 +482,10 @@ static int css_gr_create_client_data(struct gk20a *g,
}
nvgpu_list_add_tail(&cur->list, &data->clients);
*client = cur;
return 0;
failed:
*client = NULL;
if (cur)
css_gr_free_client_data(g, data, cur);
@@ -537,10 +494,9 @@ failed:
int gr_gk20a_css_attach(struct channel_gk20a *ch,
u32 dmabuf_fd,
u32 perfmon_count,
u32 *perfmon_start,
struct gk20a_cs_snapshot_client **cs_client)
struct gk20a_cs_snapshot_client *cs_client)
{
int ret = 0;
struct gk20a *g = ch->g;
@@ -555,7 +511,6 @@ int gr_gk20a_css_attach(struct channel_gk20a *ch,
return -EINVAL;
gr = &g->gr;
*cs_client = NULL;
nvgpu_mutex_acquire(&gr->cs_lock);
@@ -564,18 +519,17 @@ int gr_gk20a_css_attach(struct channel_gk20a *ch,
goto failed;
ret = css_gr_create_client_data(g, gr->cs_data,
dmabuf_fd,
perfmon_count,
cs_client);
if (ret)
goto failed;
ret = g->ops.css.enable_snapshot(ch, *cs_client);
ret = g->ops.css.enable_snapshot(ch, cs_client);
if (ret)
goto failed;
if (perfmon_start)
*perfmon_start = (*cs_client)->perfmon_start;
*perfmon_start = cs_client->perfmon_start;
nvgpu_mutex_release(&gr->cs_lock);
@@ -583,9 +537,9 @@ int gr_gk20a_css_attach(struct channel_gk20a *ch,
failed:
if (gr->cs_data) {
if (*cs_client) {
css_gr_free_client_data(g, gr->cs_data, *cs_client);
*cs_client = NULL;
if (cs_client) {
css_gr_free_client_data(g, gr->cs_data, cs_client);
cs_client = NULL;
}
if (nvgpu_list_empty(&gr->cs_data->clients))

View File

@@ -28,6 +28,10 @@
/* the minimal size of HW buffer - should be enough to avoid HW overflows */
#define CSS_MIN_HW_SNAPSHOT_SIZE (8 * 1024 * 1024)
struct gk20a;
struct gr_gk20a;
struct channel_gk20a;
/* cycle stats fifo header (must match NvSnapshotBufferFifo) */
struct gk20a_cs_snapshot_fifo {
/* layout description of the buffer */
@@ -95,8 +99,6 @@ struct gk20a_cs_snapshot_fifo_entry {
/* cycle stats snapshot client data (e.g. associated with channel) */
struct gk20a_cs_snapshot_client {
struct nvgpu_list_node list;
u32 dmabuf_fd;
struct dma_buf *dma_handler;
struct gk20a_cs_snapshot_fifo *snapshot;
u32 snapshot_size;
u32 perfmon_start;

View File

@@ -699,11 +699,10 @@ int gr_gk20a_halt_pipe(struct gk20a *g);
#if defined(CONFIG_GK20A_CYCLE_STATS)
int gr_gk20a_css_attach(struct channel_gk20a *ch, /* in - main hw structure */
u32 dmabuf_fd, /* in - dma mapped memory */
u32 perfmon_id_count, /* in - number of perfmons*/
u32 *perfmon_id_start, /* out- index of first pm */
/* out - pointer to client data used in later */
struct gk20a_cs_snapshot_client **css_client);
/* in/out - pointer to client data used in later */
struct gk20a_cs_snapshot_client *css_client);
int gr_gk20a_css_detach(struct channel_gk20a *ch,
struct gk20a_cs_snapshot_client *css_client);