gpu: nvgpu: profiler create/free, hwpm reserve

Add support for creating/freeing profiler objects, hwpm reservations

Bug 1775465
JIRA EVLR-680
JIRA EVLR-682

Change-Id: I4db83d00e4b0b552b05b9aae96dc553dd1257d88
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-on: http://git-master/r/1322487
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Peter Daifuku
2017-01-25 18:50:44 -08:00
committed by mobile promotions
parent 22ac82a075
commit 009d9fd7f7
6 changed files with 382 additions and 7 deletions

View File

@@ -52,6 +52,8 @@ static void nvgpu_init_vars(struct gk20a *g)
INIT_LIST_HEAD(&g->pending_sema_waits); INIT_LIST_HEAD(&g->pending_sema_waits);
nvgpu_raw_spinlock_init(&g->pending_sema_waits_lock); nvgpu_raw_spinlock_init(&g->pending_sema_waits_lock);
INIT_LIST_HEAD(&g->profiler_objects);
} }
static void nvgpu_init_timeout(struct gk20a *g) static void nvgpu_init_timeout(struct gk20a *g)

View File

@@ -62,11 +62,12 @@ nvgpu_dbg_gpu_get_session_channel(struct dbg_session_gk20a *dbg_s)
return ch; return ch;
} }
/* silly allocator - just increment session id */ /* silly allocators - just increment id */
static atomic_t session_id = ATOMIC_INIT(0); static atomic_t session_id = ATOMIC_INIT(0);
static int generate_session_id(void) static atomic_t profiler_id = ATOMIC_INIT(0);
static int generate_id(atomic_t *id)
{ {
return atomic_add_return(1, &session_id); return atomic_add_return(1, id);
} }
static int alloc_session(struct dbg_session_gk20a **_dbg_s) static int alloc_session(struct dbg_session_gk20a **_dbg_s)
@@ -80,11 +81,27 @@ static int alloc_session(struct dbg_session_gk20a **_dbg_s)
if (!dbg_s) if (!dbg_s)
return -ENOMEM; return -ENOMEM;
dbg_s->id = generate_session_id(); dbg_s->id = generate_id(&session_id);
*_dbg_s = dbg_s; *_dbg_s = dbg_s;
return 0; return 0;
} }
static int alloc_profiler(struct dbg_profiler_object_data **_prof)
{
struct dbg_profiler_object_data *prof;
*_prof = NULL;
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
prof = kzalloc(sizeof(*prof), GFP_KERNEL);
if (!prof)
return -ENOMEM;
prof->prof_handle = generate_id(&profiler_id);
*_prof = prof;
return 0;
}
static int gk20a_dbg_gpu_do_dev_open(struct inode *inode, static int gk20a_dbg_gpu_do_dev_open(struct inode *inode,
struct file *filp, bool is_profiler) struct file *filp, bool is_profiler)
{ {
@@ -384,13 +401,28 @@ int dbg_unbind_single_channel_gk20a(struct dbg_session_gk20a *dbg_s,
{ {
struct gk20a *g = dbg_s->g; struct gk20a *g = dbg_s->g;
int chid; int chid;
struct channel_gk20a *ch;
struct dbg_session_data *session_data; struct dbg_session_data *session_data;
struct dbg_profiler_object_data *prof_obj, *tmp_obj;
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
chid = ch_data->chid; chid = ch_data->chid;
ch = g->fifo.channel + chid;
/* If there's a profiler ctx reservation record associated with this
* session/channel pair, release it.
*/
list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
prof_obj_entry) {
if ((prof_obj->session_id == dbg_s->id) &&
(prof_obj->ch->hw_chid == chid)) {
if (prof_obj->has_reservation) {
g->profiler_reservation_count--;
dbg_s->has_profiler_reservation = false;
}
list_del(&prof_obj->prof_obj_entry);
kfree(prof_obj);
}
}
list_del_init(&ch_data->ch_entry); list_del_init(&ch_data->ch_entry);
@@ -464,6 +496,7 @@ int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
{ {
struct dbg_session_gk20a *dbg_s = filp->private_data; struct dbg_session_gk20a *dbg_s = filp->private_data;
struct gk20a *g = dbg_s->g; struct gk20a *g = dbg_s->g;
struct dbg_profiler_object_data *prof_obj, *tmp_obj;
gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "%s", dev_name(dbg_s->dev)); gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "%s", dev_name(dbg_s->dev));
@@ -478,6 +511,21 @@ int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, g->ops.dbg_session_ops.dbg_set_powergate(dbg_s,
NVGPU_DBG_GPU_POWERGATE_MODE_ENABLE); NVGPU_DBG_GPU_POWERGATE_MODE_ENABLE);
nvgpu_dbg_timeout_enable(dbg_s, NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE); nvgpu_dbg_timeout_enable(dbg_s, NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE);
/* Per-context profiler objects were released when we called
* dbg_unbind_all_channels. We could still have global ones.
*/
list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
prof_obj_entry) {
if (prof_obj->session_id == dbg_s->id) {
if (prof_obj->has_reservation) {
g->global_profiler_reservation_held = false;
g->profiler_reservation_count--;
}
list_del(&prof_obj->prof_obj_entry);
kfree(prof_obj);
}
}
nvgpu_mutex_release(&g->dbg_sessions_lock); nvgpu_mutex_release(&g->dbg_sessions_lock);
kfree(dbg_s); kfree(dbg_s);
@@ -563,6 +611,15 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
struct dbg_session_gk20a *dbg_s, struct dbg_session_gk20a *dbg_s,
struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args); struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args);
static int nvgpu_ioctl_allocate_profiler_object(struct dbg_session_gk20a *dbg_s,
struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args);
static int nvgpu_ioctl_free_profiler_object(struct dbg_session_gk20a *dbg_s,
struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args);
static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s,
struct nvgpu_dbg_gpu_profiler_reserve_args *args);
static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
struct nvgpu_dbg_gpu_perfbuf_map_args *args); struct nvgpu_dbg_gpu_perfbuf_map_args *args);
@@ -1001,6 +1058,21 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
(struct nvgpu_dbg_gpu_access_fb_memory_args *)buf); (struct nvgpu_dbg_gpu_access_fb_memory_args *)buf);
break; break;
case NVGPU_DBG_GPU_IOCTL_PROFILER_ALLOCATE:
err = nvgpu_ioctl_allocate_profiler_object(dbg_s,
(struct nvgpu_dbg_gpu_profiler_obj_mgt_args *)buf);
break;
case NVGPU_DBG_GPU_IOCTL_PROFILER_FREE:
err = nvgpu_ioctl_free_profiler_object(dbg_s,
(struct nvgpu_dbg_gpu_profiler_obj_mgt_args *)buf);
break;
case NVGPU_DBG_GPU_IOCTL_PROFILER_RESERVE:
err = nvgpu_ioctl_profiler_reserve(dbg_s,
(struct nvgpu_dbg_gpu_profiler_reserve_args *)buf);
break;
default: default:
gk20a_err(dev_from_gk20a(g), gk20a_err(dev_from_gk20a(g),
"unrecognized dbg gpu ioctl cmd: 0x%x", "unrecognized dbg gpu ioctl cmd: 0x%x",
@@ -1336,6 +1408,16 @@ static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
gk20a_dbg_fn("%s pm ctxsw mode = %d", gk20a_dbg_fn("%s pm ctxsw mode = %d",
dev_name(dbg_s->dev), args->mode); dev_name(dbg_s->dev), args->mode);
/* Must have a valid reservation to enable/disable hwpm cxtsw.
* Just print an error message for now, but eventually this should
* return an error, at the point where all client sw has been
* cleaned up.
*/
if (!dbg_s->has_profiler_reservation) {
gk20a_err(dev_from_gk20a(g),
"session doesn't have a valid reservation");
}
err = gk20a_busy(g->dev); err = gk20a_busy(g->dev);
if (err) { if (err) {
gk20a_err(dev_from_gk20a(g), "failed to poweron"); gk20a_err(dev_from_gk20a(g), "failed to poweron");
@@ -1419,6 +1501,261 @@ clean_up:
return err; return err;
} }
static int nvgpu_ioctl_allocate_profiler_object(
struct dbg_session_gk20a *dbg_s,
struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args)
{
int err = 0;
struct gk20a *g = get_gk20a(dbg_s->dev);
struct dbg_profiler_object_data *prof_obj;
gk20a_dbg_fn("%s", dev_name(dbg_s->dev));
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
err = alloc_profiler(&prof_obj);
if (err)
goto clean_up;
prof_obj->session_id = dbg_s->id;
if (dbg_s->is_profiler)
prof_obj->ch = NULL;
else {
prof_obj->ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
if (prof_obj->ch == NULL) {
gk20a_err(dev_from_gk20a(g),
"bind a channel for dbg session");
kfree(prof_obj);
err = -EINVAL;
goto clean_up;
}
}
/* Return handle to client */
args->profiler_handle = prof_obj->prof_handle;
INIT_LIST_HEAD(&prof_obj->prof_obj_entry);
list_add(&prof_obj->prof_obj_entry, &g->profiler_objects);
clean_up:
nvgpu_mutex_release(&g->dbg_sessions_lock);
return err;
}
static int nvgpu_ioctl_free_profiler_object(
struct dbg_session_gk20a *dbg_s,
struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args)
{
int err = 0;
struct gk20a *g = get_gk20a(dbg_s->dev);
struct dbg_profiler_object_data *prof_obj, *tmp_obj;
bool obj_found = false;
gk20a_dbg_fn("%s session_id = %d profiler_handle = %x",
dev_name(dbg_s->dev), dbg_s->id, args->profiler_handle);
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
/* Remove profiler object from the list, if a match is found */
list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
prof_obj_entry) {
if (prof_obj->prof_handle == args->profiler_handle) {
if (prof_obj->session_id != dbg_s->id) {
gk20a_err(dev_from_gk20a(g),
"invalid handle %x",
args->profiler_handle);
err = -EINVAL;
break;
}
if (prof_obj->has_reservation) {
if (prof_obj->ch == NULL)
g->global_profiler_reservation_held = false;
g->profiler_reservation_count--;
dbg_s->has_profiler_reservation = false;
}
list_del(&prof_obj->prof_obj_entry);
kfree(prof_obj);
obj_found = true;
break;
}
}
if (!obj_found) {
gk20a_err(dev_from_gk20a(g), "profiler %x not found",
args->profiler_handle);
err = -EINVAL;
}
nvgpu_mutex_release(&g->dbg_sessions_lock);
return err;
}
static struct dbg_profiler_object_data *find_matching_prof_obj(
struct dbg_session_gk20a *dbg_s,
u32 profiler_handle)
{
struct gk20a *g = dbg_s->g;
struct dbg_profiler_object_data *prof_obj;
list_for_each_entry(prof_obj, &g->profiler_objects, prof_obj_entry) {
if (prof_obj->prof_handle == profiler_handle) {
if (prof_obj->session_id != dbg_s->id) {
gk20a_err(dev_from_gk20a(g),
"invalid handle %x",
profiler_handle);
return NULL;
}
return prof_obj;
}
}
return NULL;
}
static int nvgpu_profiler_reserve_acquire(struct dbg_session_gk20a *dbg_s,
u32 profiler_handle)
{
struct gk20a *g = dbg_s->g;
struct dbg_profiler_object_data *prof_obj, *my_prof_obj;
int err = 0;
gk20a_dbg_fn("%s profiler_handle = %x", dev_name(dbg_s->dev), profiler_handle);
if (g->profiler_reservation_count < 0) {
gk20a_err(dev_from_gk20a(g), "Negative reservation count!");
return -EINVAL;
}
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
/* Find matching object. */
my_prof_obj = find_matching_prof_obj(dbg_s, profiler_handle);
if (!my_prof_obj) {
gk20a_err(dev_from_gk20a(g), "object not found");
err = -EINVAL;
goto exit;
}
/* If we already have the reservation, we're done */
if (my_prof_obj->has_reservation) {
err = 0;
goto exit;
}
if (my_prof_obj->ch == NULL) {
/* Global reservations are only allowed if there are no other
* global or per-context reservations currently held
*/
if (g->profiler_reservation_count > 0) {
gk20a_err(dev_from_gk20a(g),
"global reserve: have existing reservation");
err = -EBUSY;
goto exit;
}
my_prof_obj->has_reservation = true;
g->global_profiler_reservation_held = true;
g->profiler_reservation_count = 1;
dbg_s->has_profiler_reservation = true;
} else if (g->global_profiler_reservation_held) {
/* If there's a global reservation,
* we can't take a per-context one.
*/
gk20a_err(dev_from_gk20a(g),
"per-ctxt reserve: global reservation in effect");
err = -EBUSY;
goto exit;
} else if (gk20a_is_channel_marked_as_tsg(my_prof_obj->ch)) {
/* TSG: check that another channel in the TSG
* doesn't already have the reservation
*/
int my_tsgid = my_prof_obj->ch->tsgid;
list_for_each_entry(prof_obj, &g->profiler_objects,
prof_obj_entry) {
if (prof_obj->has_reservation &&
(prof_obj->ch->tsgid == my_tsgid)) {
gk20a_err(dev_from_gk20a(g),
"per-ctxt reserve (tsg): already reserved");
err = -EBUSY;
goto exit;
}
}
my_prof_obj->has_reservation = true;
g->profiler_reservation_count++;
dbg_s->has_profiler_reservation = true;
} else {
/* channel: check that some other profiler object doesn't
* already have the reservation.
*/
struct channel_gk20a *my_ch = my_prof_obj->ch;
list_for_each_entry(prof_obj, &g->profiler_objects,
prof_obj_entry) {
if (prof_obj->has_reservation &&
(prof_obj->ch == my_ch)) {
gk20a_err(dev_from_gk20a(g),
"per-ctxt reserve (ch): already reserved");
err = -EBUSY;
goto exit;
}
}
my_prof_obj->has_reservation = true;
g->profiler_reservation_count++;
dbg_s->has_profiler_reservation = true;
}
exit:
nvgpu_mutex_release(&g->dbg_sessions_lock);
return err;
}
static int nvgpu_profiler_reserve_release(struct dbg_session_gk20a *dbg_s,
u32 profiler_handle)
{
struct gk20a *g = dbg_s->g;
struct dbg_profiler_object_data *prof_obj;
int err = 0;
gk20a_dbg_fn("%s profiler_handle = %x", dev_name(dbg_s->dev), profiler_handle);
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
/* Find matching object. */
prof_obj = find_matching_prof_obj(dbg_s, profiler_handle);
if (!prof_obj) {
gk20a_err(dev_from_gk20a(g), "object not found");
err = -EINVAL;
goto exit;
}
if (prof_obj->has_reservation) {
prof_obj->has_reservation = false;
if (prof_obj->ch == NULL)
g->global_profiler_reservation_held = false;
g->profiler_reservation_count--;
dbg_s->has_profiler_reservation = false;
} else {
gk20a_err(dev_from_gk20a(g), "No reservation found");
err = -EINVAL;
goto exit;
}
exit:
nvgpu_mutex_release(&g->dbg_sessions_lock);
return err;
}
static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s,
struct nvgpu_dbg_gpu_profiler_reserve_args *args)
{
if (args->acquire)
return nvgpu_profiler_reserve_acquire(dbg_s, args->profiler_handle);
return nvgpu_profiler_reserve_release(dbg_s, args->profiler_handle);
}
static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
struct nvgpu_dbg_gpu_perfbuf_map_args *args) struct nvgpu_dbg_gpu_perfbuf_map_args *args)
{ {

View File

@@ -47,6 +47,9 @@ struct dbg_session_gk20a {
/* profiler session, if any */ /* profiler session, if any */
bool is_profiler; bool is_profiler;
/* has a valid profiler reservation */
bool has_profiler_reservation;
/* power enabled or disabled */ /* power enabled or disabled */
bool is_pg_disabled; bool is_pg_disabled;
@@ -90,6 +93,14 @@ struct dbg_session_channel_data {
struct dbg_session_data *session_data; struct dbg_session_data *session_data;
}; };
struct dbg_profiler_object_data {
int session_id;
u32 prof_handle;
struct channel_gk20a *ch;
bool has_reservation;
struct list_head prof_obj_entry;
};
int dbg_unbind_single_channel_gk20a(struct dbg_session_gk20a *dbg_s, int dbg_unbind_single_channel_gk20a(struct dbg_session_gk20a *dbg_s,
struct dbg_session_channel_data *ch_data); struct dbg_session_channel_data *ch_data);

View File

@@ -956,6 +956,11 @@ struct gk20a {
struct nvgpu_dbg_gpu_reg_op *dbg_regops_tmp_buf; struct nvgpu_dbg_gpu_reg_op *dbg_regops_tmp_buf;
u32 dbg_regops_tmp_buf_ops; u32 dbg_regops_tmp_buf_ops;
/* For profiler reservations */
struct list_head profiler_objects;
bool global_profiler_reservation_held;
int profiler_reservation_count;
void (*remove_support)(struct device *); void (*remove_support)(struct device *);
u64 pg_ingating_time_us; u64 pg_ingating_time_us;

View File

@@ -252,6 +252,8 @@ static int vgpu_init_support(struct platform_device *pdev)
nvgpu_mutex_init(&g->dbg_sessions_lock); nvgpu_mutex_init(&g->dbg_sessions_lock);
nvgpu_mutex_init(&g->client_lock); nvgpu_mutex_init(&g->client_lock);
INIT_LIST_HEAD(&g->profiler_objects);
g->dbg_regops_tmp_buf = kzalloc(SZ_4K, GFP_KERNEL); g->dbg_regops_tmp_buf = kzalloc(SZ_4K, GFP_KERNEL);
if (!g->dbg_regops_tmp_buf) { if (!g->dbg_regops_tmp_buf) {
dev_err(g->dev, "couldn't allocate regops tmp buf"); dev_err(g->dev, "couldn't allocate regops tmp buf");

View File

@@ -1264,9 +1264,27 @@ struct nvgpu_dbg_gpu_access_fb_memory_args {
#define NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY \ #define NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY \
_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 19, struct nvgpu_dbg_gpu_access_fb_memory_args) _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 19, struct nvgpu_dbg_gpu_access_fb_memory_args)
struct nvgpu_dbg_gpu_profiler_obj_mgt_args {
__u32 profiler_handle;
__u32 reserved;
};
#define NVGPU_DBG_GPU_IOCTL_PROFILER_ALLOCATE \
_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 20, struct nvgpu_dbg_gpu_profiler_obj_mgt_args)
#define NVGPU_DBG_GPU_IOCTL_PROFILER_FREE \
_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 21, struct nvgpu_dbg_gpu_profiler_obj_mgt_args)
struct nvgpu_dbg_gpu_profiler_reserve_args {
__u32 profiler_handle;
__u32 acquire;
};
#define NVGPU_DBG_GPU_IOCTL_PROFILER_RESERVE \
_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 22, struct nvgpu_dbg_gpu_profiler_reserve_args)
#define NVGPU_DBG_GPU_IOCTL_LAST \ #define NVGPU_DBG_GPU_IOCTL_LAST \
_IOC_NR(NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY) _IOC_NR(NVGPU_DBG_GPU_IOCTL_PROFILER_RESERVE)
#define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \ #define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \
sizeof(struct nvgpu_dbg_gpu_access_fb_memory_args) sizeof(struct nvgpu_dbg_gpu_access_fb_memory_args)