gpu: nvgpu: rework pm resource reservation system

Current PM resource reservation system is limited to HWPM resources only. And reservation tracking is done using boolean variables. New upcoming profiler support requires reservation for all the PM resources like SMPC and PMA stream. Using boolean variables is not scalable and confusing. Plus the variables have to be replicated on gpu server in case of virtualization. Remove flag tracking mechanism and use list based approach to track all PM reservations. Also, current HALs are defined on debugger object. Implement new HALs in new pm_reservation object since it is really an independent functionality. Add new source file common/profiler/pm_reservation.c which implements functions to reserve/release resources and to check if any resource is reserved or not. Add common/vgpu/pm_reservation_vgpu.c for vGPU which simply forwards the request to gpu server. Define new HAL object gops.pm_reservation and assign above functions to below respective HALs : g->ops.pm_reservation.acquire() g->ops.pm_reservation.release() g->ops.pm_reservation.release_all_per_vmid() Last HAL above is only used for gpu server cleanup of guest OS. Add below new common profiler functions that act as APIs to reserve/ release resources for rest of the units in nvgpu. nvgpu_profiler_pm_resource_reserve() nvgpu_profiler_pm_resource_release() Initialize the meta data required for reservtion system in nvgpu_pm_reservation_init() and call it during nvgpu_finalize_poweron. Clean up the meta data before releasing struct gk20a. Delete below HALs : g->ops.debugger.check_and_set_global_reservation() g->ops.debugger.check_and_set_context_reservation() g->ops.debugger.release_profiler_reservation() Bug 2510974 Jira NVGPU-5360 Change-Id: I4d9f89c58c791b3b2e63099a8a603462e5319222 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2367224 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 02:22:34 +03:00 · 2020-04-30 16:23:44 +05:30
parent b21116485f
commit 08308bc936
24 changed files with 666 additions and 278 deletions
--- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
@@ -210,11 +210,9 @@ int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
 	nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
 				nvgpu_profiler_object, prof_obj_entry) {
 		if (prof_obj->session_id == dbg_s->id) {
-			if (prof_obj->has_reservation)
-				g->ops.debugger.
-				  release_profiler_reservation(dbg_s, prof_obj);
-			nvgpu_list_del(&prof_obj->prof_obj_entry);
-			nvgpu_kfree(g, prof_obj);
+			nvgpu_profiler_pm_resource_release(prof_obj,
+				NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY);
+			nvgpu_profiler_free(prof_obj);
 		}
 	}
 	nvgpu_mutex_release(&g->dbg_sessions_lock);
@@ -460,12 +458,9 @@ static int dbg_unbind_single_channel_gk20a(struct dbg_session_gk20a *dbg_s,
 				nvgpu_profiler_object, prof_obj_entry) {
 		if ((prof_obj->session_id == dbg_s->id) &&
 				(prof_obj->tsg->tsgid == ch->tsgid)) {
-			if (prof_obj->has_reservation) {
-				g->ops.debugger.
-				  release_profiler_reservation(dbg_s, prof_obj);
-			}
-			nvgpu_list_del(&prof_obj->prof_obj_entry);
-			nvgpu_kfree(g, prof_obj);
+			nvgpu_profiler_pm_resource_release(prof_obj,
+				NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY);
+			nvgpu_profiler_free(prof_obj);
 		}
 	}

@@ -1025,6 +1020,8 @@ static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
 	struct nvgpu_channel *ch_gk20a;
 	struct nvgpu_tsg *tsg;
 	u32 mode = nvgpu_hwpm_ctxsw_mode_to_common_mode(args->mode);
+	struct nvgpu_profiler_object *prof_obj, *tmp_obj;
+	bool reserved = false;

 	nvgpu_log_fn(g, "%s pm ctxsw mode = %d", g->name, args->mode);

@@ -1033,9 +1030,17 @@ static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
 	 * return an error, at the point where all client sw has been
 	 * cleaned up.
 	 */
-	if (!dbg_s->has_profiler_reservation) {
-		nvgpu_err(g,
-			"session doesn't have a valid reservation");
+	nvgpu_list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
+				nvgpu_profiler_object, prof_obj_entry) {
+		if (prof_obj->session_id == dbg_s->id) {
+			if (prof_obj->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY]) {
+				reserved = true;
+			}
+		}
+	}
+
+	if (!reserved) {
+		nvgpu_err(g, "session doesn't have a valid reservation");
 	}

 	err = gk20a_busy(g);
@@ -1191,6 +1196,7 @@ static int nvgpu_ioctl_allocate_profiler_object(
 	struct nvgpu_profiler_object *prof_obj;
 	struct nvgpu_channel *ch = NULL;
 	struct nvgpu_tsg *tsg;
+	enum nvgpu_profiler_pm_reservation_scope scope;

 	nvgpu_log_fn(g, "%s", g->name);

@@ -1205,7 +1211,13 @@ static int nvgpu_ioctl_allocate_profiler_object(
 		}
 	}

-	err = nvgpu_profiler_alloc(g, &prof_obj);
+	if (ch != NULL) {
+		scope = NVGPU_PROFILER_PM_RESERVATION_SCOPE_CONTEXT;
+	} else {
+		scope = NVGPU_PROFILER_PM_RESERVATION_SCOPE_DEVICE;
+	}
+
+	err = nvgpu_profiler_alloc(g, &prof_obj, scope);
 	if (err != 0) {
 		goto clean_up;
 	}
@@ -1256,9 +1268,8 @@ static int nvgpu_ioctl_free_profiler_object(
 				err = -EINVAL;
 				break;
 			}
-			if (prof_obj->has_reservation)
-				g->ops.debugger.
-				  release_profiler_reservation(dbg_s, prof_obj);
+			nvgpu_profiler_pm_resource_release(prof_obj,
+				NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY);
 			nvgpu_profiler_free(prof_obj);
 			obj_found = true;
 			break;
@@ -1716,13 +1727,9 @@ static int nvgpu_profiler_reserve_release(struct dbg_session_gk20a *dbg_s,
 		goto exit;
 	}

-	if (prof_obj->has_reservation)
-		g->ops.debugger.release_profiler_reservation(dbg_s, prof_obj);
-	else {
-		nvgpu_err(g, "No reservation found");
-		err = -EINVAL;
-		goto exit;
-	}
+	err = nvgpu_profiler_pm_resource_release(prof_obj,
+			NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY);
+
 exit:
 	nvgpu_mutex_release(&g->dbg_sessions_lock);
 	return err;
@@ -1732,73 +1739,42 @@ static int nvgpu_profiler_reserve_acquire(struct dbg_session_gk20a *dbg_s,
 								u32 profiler_handle)
 {
 	struct gk20a *g = dbg_s->g;
-	struct nvgpu_profiler_object *prof_obj, *my_prof_obj;
+	struct nvgpu_profiler_object *prof_obj;
 	int err = 0;

 	nvgpu_log_fn(g, "%s profiler_handle = %x", g->name, profiler_handle);

-	if (g->profiler_reservation_count < 0) {
-		nvgpu_err(g, "Negative reservation count!");
-		return -EINVAL;
-	}
-
 	nvgpu_mutex_acquire(&g->dbg_sessions_lock);

 	/* Find matching object. */
-	my_prof_obj = find_matching_prof_obj(dbg_s, profiler_handle);
+	prof_obj = find_matching_prof_obj(dbg_s, profiler_handle);

-	if (!my_prof_obj) {
+	if (!prof_obj) {
 		nvgpu_err(g, "object not found");
 		err = -EINVAL;
 		goto exit;
 	}

-	/* If we already have the reservation, we're done */
-	if (my_prof_obj->has_reservation) {
-		err = 0;
-		goto exit;
-	}
+	if (prof_obj->tsg != NULL) {
+		struct nvgpu_tsg *tsg = prof_obj->tsg;
+		struct nvgpu_profiler_object *tmp_obj;

-	if (my_prof_obj->tsg == NULL) {
-		/* Global reservations are only allowed if there are no other
-		 * global or per-context reservations currently held
-		 */
-		if (!g->ops.debugger.check_and_set_global_reservation(
-							dbg_s, my_prof_obj)) {
-			nvgpu_err(g,
-				"global reserve: have existing reservation");
-			err =  -EBUSY;
-		}
-	} else if (g->global_profiler_reservation_held) {
-		/* If there's a global reservation,
-		 * we can't take a per-context one.
-		 */
-		nvgpu_err(g,
-			"per-ctxt reserve: global reservation in effect");
-		err = -EBUSY;
-	} else {
-		/* check that this TSG doesn't already have the reservation */
-		u32 my_tsgid = my_prof_obj->tsg->tsgid;
-
-		nvgpu_list_for_each_entry(prof_obj, &g->profiler_objects,
+		nvgpu_list_for_each_entry(tmp_obj, &g->profiler_objects,
 				nvgpu_profiler_object, prof_obj_entry) {
-			if (prof_obj->has_reservation &&
-					(prof_obj->tsg->tsgid == my_tsgid)) {
-				nvgpu_err(g,
-				    "per-ctxt reserve (tsg): already reserved");
-				err = -EBUSY;
+			if (tmp_obj->tsg == NULL) {
+				continue;
+			}
+			if ((tmp_obj->tsg->tsgid == tsg->tsgid) &&
+				  tmp_obj->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY]) {
+				err = -EINVAL;
 				goto exit;
 			}
 		}
-
-		if (!g->ops.debugger.check_and_set_context_reservation(
-							dbg_s, my_prof_obj)) {
-			/* Another guest OS has the global reservation */
-			nvgpu_err(g,
-				"per-ctxt reserve: global reservation in effect");
-			err = -EBUSY;
-		}
 	}
+
+	err = nvgpu_profiler_pm_resource_reserve(prof_obj,
+		NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY);
+
 exit:
 	nvgpu_mutex_release(&g->dbg_sessions_lock);
 	return err;