gpu: nvgpu: profiler create/free, hwpm reserve

Add support for creating/freeing profiler objects, hwpm reservations Bug 1775465 JIRA EVLR-680 JIRA EVLR-682 Change-Id: I4db83d00e4b0b552b05b9aae96dc553dd1257d88 Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com> Reviewed-on: http://git-master/r/1322487 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2017-01-25 18:50:44 -08:00
parent 22ac82a075
commit 009d9fd7f7
6 changed files with 382 additions and 7 deletions
--- a/drivers/gpu/nvgpu/common/nvgpu_common.c
+++ b/drivers/gpu/nvgpu/common/nvgpu_common.c
@@ -52,6 +52,8 @@ static void nvgpu_init_vars(struct gk20a *g)
 	INIT_LIST_HEAD(&g->pending_sema_waits);
 	nvgpu_raw_spinlock_init(&g->pending_sema_waits_lock);
 	INIT_LIST_HEAD(&g->profiler_objects);
 }
 static void nvgpu_init_timeout(struct gk20a *g)
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -62,11 +62,12 @@ nvgpu_dbg_gpu_get_session_channel(struct dbg_session_gk20a *dbg_s)
 	return ch;
 }
-/* silly allocator - just increment session id */
+/* silly allocators - just increment id */
 static atomic_t session_id = ATOMIC_INIT(0);
-static int generate_session_id(void)
+static atomic_t profiler_id = ATOMIC_INIT(0);
 static int generate_id(atomic_t *id)
 {
-	return atomic_add_return(1, &session_id);
+	return atomic_add_return(1, id);
 }
 static int alloc_session(struct dbg_session_gk20a **_dbg_s)
@@ -80,11 +81,27 @@ static int alloc_session(struct dbg_session_gk20a **_dbg_s)
 	if (!dbg_s)
 		return -ENOMEM;
-	dbg_s->id = generate_session_id();
+	dbg_s->id = generate_id(&session_id);
 	*_dbg_s = dbg_s;
 	return 0;
 }
 static int alloc_profiler(struct dbg_profiler_object_data **_prof)
 {
 	struct dbg_profiler_object_data *prof;
 	*_prof = NULL;
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
 	prof = kzalloc(sizeof(*prof), GFP_KERNEL);
 	if (!prof)
 		return -ENOMEM;
 	prof->prof_handle = generate_id(&profiler_id);
 	*_prof = prof;
 	return 0;
 }
 static int gk20a_dbg_gpu_do_dev_open(struct inode *inode,
 		struct file *filp, bool is_profiler)
 {
@@ -384,13 +401,28 @@ int dbg_unbind_single_channel_gk20a(struct dbg_session_gk20a *dbg_s,
 {
 	struct gk20a *g = dbg_s->g;
 	int chid;
 	struct channel_gk20a *ch;
 	struct dbg_session_data *session_data;
 	struct dbg_profiler_object_data *prof_obj, *tmp_obj;
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
 	chid = ch_data->chid;
-	ch = g->fifo.channel + chid;
+
 	/* If there's a profiler ctx reservation record associated with this
 	 * session/channel pair, release it.
 	 */
 	list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
 							prof_obj_entry) {
 		if ((prof_obj->session_id == dbg_s->id) &&
 			(prof_obj->ch->hw_chid == chid)) {
 			if (prof_obj->has_reservation) {
 				g->profiler_reservation_count--;
 				dbg_s->has_profiler_reservation = false;
 			}
 			list_del(&prof_obj->prof_obj_entry);
 			kfree(prof_obj);
 		}
 	}
 	list_del_init(&ch_data->ch_entry);
@@ -464,6 +496,7 @@ int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
 {
 	struct dbg_session_gk20a *dbg_s = filp->private_data;
 	struct gk20a *g = dbg_s->g;
 	struct dbg_profiler_object_data *prof_obj, *tmp_obj;
 	gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "%s", dev_name(dbg_s->dev));
@@ -478,6 +511,21 @@ int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
 	g->ops.dbg_session_ops.dbg_set_powergate(dbg_s,
 				NVGPU_DBG_GPU_POWERGATE_MODE_ENABLE);
 	nvgpu_dbg_timeout_enable(dbg_s, NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE);
 	/* Per-context profiler objects were released when we called
 	 * dbg_unbind_all_channels. We could still have global ones.
 	 */
 	list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
 							prof_obj_entry) {
 		if (prof_obj->session_id == dbg_s->id) {
 			if (prof_obj->has_reservation) {
 				g->global_profiler_reservation_held = false;
 				g->profiler_reservation_count--;
 			}
 			list_del(&prof_obj->prof_obj_entry);
 			kfree(prof_obj);
 		}
 	}
 	nvgpu_mutex_release(&g->dbg_sessions_lock);
 	kfree(dbg_s);
@@ -563,6 +611,15 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
 		struct dbg_session_gk20a *dbg_s,
 		struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args);
 static int nvgpu_ioctl_allocate_profiler_object(struct dbg_session_gk20a *dbg_s,
 				struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args);
 static int nvgpu_ioctl_free_profiler_object(struct dbg_session_gk20a *dbg_s,
 				struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args);
 static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s,
 			   struct nvgpu_dbg_gpu_profiler_reserve_args *args);
 static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
 		struct nvgpu_dbg_gpu_perfbuf_map_args *args);
@@ -1001,6 +1058,21 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
 			(struct nvgpu_dbg_gpu_access_fb_memory_args *)buf);
 		break;
 	case NVGPU_DBG_GPU_IOCTL_PROFILER_ALLOCATE:
 		err = nvgpu_ioctl_allocate_profiler_object(dbg_s,
 			(struct nvgpu_dbg_gpu_profiler_obj_mgt_args *)buf);
 		break;
 	case NVGPU_DBG_GPU_IOCTL_PROFILER_FREE:
 		err = nvgpu_ioctl_free_profiler_object(dbg_s,
 			(struct nvgpu_dbg_gpu_profiler_obj_mgt_args *)buf);
 		break;
 	case NVGPU_DBG_GPU_IOCTL_PROFILER_RESERVE:
 		err = nvgpu_ioctl_profiler_reserve(dbg_s,
 			   (struct nvgpu_dbg_gpu_profiler_reserve_args *)buf);
 		break;
 	default:
 		gk20a_err(dev_from_gk20a(g),
 			   "unrecognized dbg gpu ioctl cmd: 0x%x",
@@ -1336,6 +1408,16 @@ static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
 	gk20a_dbg_fn("%s pm ctxsw mode = %d",
 		     dev_name(dbg_s->dev), args->mode);
 	/* Must have a valid reservation to enable/disable hwpm cxtsw.
 	 * Just print an error message for now, but eventually this should
 	 * return an error, at the point where all client sw has been
 	 * cleaned up.
 	 */
 	if (!dbg_s->has_profiler_reservation) {
 		gk20a_err(dev_from_gk20a(g),
 			"session doesn't have a valid reservation");
 	}
 	err = gk20a_busy(g->dev);
 	if (err) {
 		gk20a_err(dev_from_gk20a(g), "failed to poweron");
@@ -1419,6 +1501,261 @@ clean_up:
 	return  err;
 }
 static int nvgpu_ioctl_allocate_profiler_object(
 				struct dbg_session_gk20a *dbg_s,
 				struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args)
 {
 	int err = 0;
 	struct gk20a *g = get_gk20a(dbg_s->dev);
 	struct dbg_profiler_object_data *prof_obj;
 	gk20a_dbg_fn("%s", dev_name(dbg_s->dev));
 	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 	err = alloc_profiler(&prof_obj);
 	if (err)
 		goto clean_up;
 	prof_obj->session_id = dbg_s->id;
 	if (dbg_s->is_profiler)
 		prof_obj->ch = NULL;
 	else {
 		prof_obj->ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
 		if (prof_obj->ch == NULL) {
 			gk20a_err(dev_from_gk20a(g),
 				"bind a channel for dbg session");
 			kfree(prof_obj);
 			err = -EINVAL;
 			goto clean_up;
 		}
 	}
 	/* Return handle to client */
 	args->profiler_handle = prof_obj->prof_handle;
 	INIT_LIST_HEAD(&prof_obj->prof_obj_entry);
 	list_add(&prof_obj->prof_obj_entry, &g->profiler_objects);
 clean_up:
 	nvgpu_mutex_release(&g->dbg_sessions_lock);
 	return  err;
 }
 static int nvgpu_ioctl_free_profiler_object(
 				struct dbg_session_gk20a *dbg_s,
 				struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args)
 {
 	int err = 0;
 	struct gk20a *g = get_gk20a(dbg_s->dev);
 	struct dbg_profiler_object_data *prof_obj, *tmp_obj;
 	bool obj_found = false;
 	gk20a_dbg_fn("%s session_id = %d profiler_handle = %x",
 		     dev_name(dbg_s->dev), dbg_s->id, args->profiler_handle);
 	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 	/* Remove profiler object from the list, if a match is found */
 	list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
 							prof_obj_entry) {
 		if (prof_obj->prof_handle == args->profiler_handle) {
 			if (prof_obj->session_id != dbg_s->id) {
 				gk20a_err(dev_from_gk20a(g),
 						"invalid handle %x",
 						args->profiler_handle);
 				err = -EINVAL;
 				break;
 			}
 			if (prof_obj->has_reservation) {
 				if (prof_obj->ch == NULL)
 					g->global_profiler_reservation_held = false;
 				g->profiler_reservation_count--;
 				dbg_s->has_profiler_reservation = false;
 			}
 			list_del(&prof_obj->prof_obj_entry);
 			kfree(prof_obj);
 			obj_found = true;
 			break;
 		}
 	}
 	if (!obj_found) {
 		gk20a_err(dev_from_gk20a(g), "profiler %x not found",
 							args->profiler_handle);
 		err = -EINVAL;
 	}
 	nvgpu_mutex_release(&g->dbg_sessions_lock);
 	return  err;
 }
 static struct dbg_profiler_object_data *find_matching_prof_obj(
 						struct dbg_session_gk20a *dbg_s,
 						u32 profiler_handle)
 {
 	struct gk20a *g = dbg_s->g;
 	struct dbg_profiler_object_data *prof_obj;
 	list_for_each_entry(prof_obj, &g->profiler_objects, prof_obj_entry) {
 		if (prof_obj->prof_handle == profiler_handle) {
 			if (prof_obj->session_id != dbg_s->id) {
 				gk20a_err(dev_from_gk20a(g),
 						"invalid handle %x",
 						profiler_handle);
 				return NULL;
 			}
 			return prof_obj;
 		}
 	}
 	return NULL;
 }
 static int nvgpu_profiler_reserve_acquire(struct dbg_session_gk20a *dbg_s,
 								u32 profiler_handle)
 {
 	struct gk20a *g = dbg_s->g;
 	struct dbg_profiler_object_data *prof_obj, *my_prof_obj;
 	int err = 0;
 	gk20a_dbg_fn("%s profiler_handle = %x", dev_name(dbg_s->dev), profiler_handle);
 	if (g->profiler_reservation_count < 0) {
 		gk20a_err(dev_from_gk20a(g), "Negative reservation count!");
 		return -EINVAL;
 	}
 	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 	/* Find matching object. */
 	my_prof_obj = find_matching_prof_obj(dbg_s, profiler_handle);
 	if (!my_prof_obj) {
 		gk20a_err(dev_from_gk20a(g), "object not found");
 		err = -EINVAL;
 		goto exit;
 	}
 	/* If we already have the reservation, we're done */
 	if (my_prof_obj->has_reservation) {
 		err = 0;
 		goto exit;
 	}
 	if (my_prof_obj->ch == NULL) {
 		/* Global reservations are only allowed if there are no other
 		 * global or per-context reservations currently held
 		 */
 		if (g->profiler_reservation_count > 0) {
 			gk20a_err(dev_from_gk20a(g),
 				"global reserve: have existing reservation");
 			err =  -EBUSY;
 			goto exit;
 		}
 		my_prof_obj->has_reservation = true;
 		g->global_profiler_reservation_held = true;
 		g->profiler_reservation_count = 1;
 		dbg_s->has_profiler_reservation = true;
 	} else if (g->global_profiler_reservation_held) {
 		/* If there's a global reservation,
 		 * we can't take a per-context one.
 		 */
 		gk20a_err(dev_from_gk20a(g),
 			"per-ctxt reserve: global reservation in effect");
 		err = -EBUSY;
 		goto exit;
 	} else if (gk20a_is_channel_marked_as_tsg(my_prof_obj->ch)) {
 		/* TSG: check that another channel in the TSG
 		 * doesn't already have the reservation
 		 */
 		int my_tsgid = my_prof_obj->ch->tsgid;
 		list_for_each_entry(prof_obj, &g->profiler_objects,
 							prof_obj_entry) {
 			if (prof_obj->has_reservation &&
 					(prof_obj->ch->tsgid == my_tsgid)) {
 				gk20a_err(dev_from_gk20a(g),
 				    "per-ctxt reserve (tsg): already reserved");
 				err = -EBUSY;
 				goto exit;
 			}
 		}
 		my_prof_obj->has_reservation = true;
 		g->profiler_reservation_count++;
 		dbg_s->has_profiler_reservation = true;
 	} else {
 		/* channel: check that some other profiler object doesn't
 		 * already have the reservation.
 		 */
 		struct channel_gk20a *my_ch = my_prof_obj->ch;
 		list_for_each_entry(prof_obj, &g->profiler_objects,
 							prof_obj_entry) {
 			if (prof_obj->has_reservation &&
 						(prof_obj->ch == my_ch)) {
 				gk20a_err(dev_from_gk20a(g),
 				    "per-ctxt reserve (ch): already reserved");
 				err = -EBUSY;
 				goto exit;
 			}
 		}
 		my_prof_obj->has_reservation = true;
 		g->profiler_reservation_count++;
 		dbg_s->has_profiler_reservation = true;
 	}
 exit:
 	nvgpu_mutex_release(&g->dbg_sessions_lock);
 	return err;
 }
 static int nvgpu_profiler_reserve_release(struct dbg_session_gk20a *dbg_s,
 								u32 profiler_handle)
 {
 	struct gk20a *g = dbg_s->g;
 	struct dbg_profiler_object_data *prof_obj;
 	int err = 0;
 	gk20a_dbg_fn("%s profiler_handle = %x", dev_name(dbg_s->dev), profiler_handle);
 	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 	/* Find matching object. */
 	prof_obj = find_matching_prof_obj(dbg_s, profiler_handle);
 	if (!prof_obj) {
 		gk20a_err(dev_from_gk20a(g), "object not found");
 		err = -EINVAL;
 		goto exit;
 	}
 	if (prof_obj->has_reservation) {
 		prof_obj->has_reservation = false;
 		if (prof_obj->ch == NULL)
 			g->global_profiler_reservation_held = false;
 		g->profiler_reservation_count--;
 		dbg_s->has_profiler_reservation = false;
 	} else {
 		gk20a_err(dev_from_gk20a(g), "No reservation found");
 		err = -EINVAL;
 		goto exit;
 	}
 exit:
 	nvgpu_mutex_release(&g->dbg_sessions_lock);
 	return err;
 }
 static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s,
 			   struct nvgpu_dbg_gpu_profiler_reserve_args *args)
 {
 	if (args->acquire)
 		return nvgpu_profiler_reserve_acquire(dbg_s, args->profiler_handle);
 	return nvgpu_profiler_reserve_release(dbg_s, args->profiler_handle);
 }
 static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
 		struct nvgpu_dbg_gpu_perfbuf_map_args *args)
 {
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h
@@ -47,6 +47,9 @@ struct dbg_session_gk20a {
 	/* profiler session, if any */
 	bool is_profiler;
 	/* has a valid profiler reservation */
 	bool has_profiler_reservation;
 	/* power enabled or disabled */
 	bool is_pg_disabled;
@@ -90,6 +93,14 @@ struct dbg_session_channel_data {
 	struct dbg_session_data *session_data;
 };
 struct dbg_profiler_object_data {
 	int session_id;
 	u32 prof_handle;
 	struct channel_gk20a *ch;
 	bool has_reservation;
 	struct list_head prof_obj_entry;
 };
 int dbg_unbind_single_channel_gk20a(struct dbg_session_gk20a *dbg_s,
 			struct dbg_session_channel_data *ch_data);
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -956,6 +956,11 @@ struct gk20a {
 	struct nvgpu_dbg_gpu_reg_op *dbg_regops_tmp_buf;
 	u32 dbg_regops_tmp_buf_ops;
 	/* For profiler reservations */
 	struct list_head profiler_objects;
 	bool global_profiler_reservation_held;
 	int profiler_reservation_count;
 	void (*remove_support)(struct device *);
 	u64 pg_ingating_time_us;
--- a/drivers/gpu/nvgpu/vgpu/vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -252,6 +252,8 @@ static int vgpu_init_support(struct platform_device *pdev)
 	nvgpu_mutex_init(&g->dbg_sessions_lock);
 	nvgpu_mutex_init(&g->client_lock);
 	INIT_LIST_HEAD(&g->profiler_objects);
 	g->dbg_regops_tmp_buf = kzalloc(SZ_4K, GFP_KERNEL);
 	if (!g->dbg_regops_tmp_buf) {
 		dev_err(g->dev, "couldn't allocate regops tmp buf");
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -1264,9 +1264,27 @@ struct nvgpu_dbg_gpu_access_fb_memory_args {
 #define NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY	\
 	_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 19, struct nvgpu_dbg_gpu_access_fb_memory_args)
 struct nvgpu_dbg_gpu_profiler_obj_mgt_args {
 	__u32 profiler_handle;
 	__u32 reserved;
 };
 #define NVGPU_DBG_GPU_IOCTL_PROFILER_ALLOCATE	\
 	_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 20, struct nvgpu_dbg_gpu_profiler_obj_mgt_args)
 #define NVGPU_DBG_GPU_IOCTL_PROFILER_FREE	\
 	_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 21, struct nvgpu_dbg_gpu_profiler_obj_mgt_args)
 struct nvgpu_dbg_gpu_profiler_reserve_args {
 	__u32 profiler_handle;
 	__u32 acquire;
 };
 #define NVGPU_DBG_GPU_IOCTL_PROFILER_RESERVE			\
 	_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 22, struct nvgpu_dbg_gpu_profiler_reserve_args)
 #define NVGPU_DBG_GPU_IOCTL_LAST		\
-	_IOC_NR(NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY)
+	_IOC_NR(NVGPU_DBG_GPU_IOCTL_PROFILER_RESERVE)
 #define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE		\
 	sizeof(struct nvgpu_dbg_gpu_access_fb_memory_args)