mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 02:22:34 +03:00
gpu: nvgpu: Add czf_bypass sysfs node for gp10b
This change adds a new sysfs node to allow configuring CZF_BYPASS, to
enable platforms with low context-switching latency requirements.
/sys/devices/17000000.gp10b/czf_bypass
Values:
0 - always
1 - lateZ (default)
2 - single pass
3 - never
The specified value will apply only to newly allocated contexts.
Bug 1914014
Change-Id: Ibb9a8e86089acaadaa7260b00eedec5c80762d6f
Signed-off-by: Peter Boonstoppel <pboonstoppel@nvidia.com>
Reviewed-on: http://git-master/r/1478567
(cherry picked from commit 3bc022cb38)
Reviewed-on: http://git-master/r/1473820
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
65de2a2d65
commit
39a9e251da
@@ -366,6 +366,8 @@ struct gpu_ops {
|
||||
int (*resume_from_pause)(struct gk20a *g);
|
||||
int (*clear_sm_errors)(struct gk20a *g);
|
||||
u32 (*tpc_enabled_exceptions)(struct gk20a *g);
|
||||
int (*set_czf_bypass)(struct gk20a *g,
|
||||
struct channel_gk20a *ch);
|
||||
} gr;
|
||||
struct {
|
||||
void (*init_hw)(struct gk20a *g);
|
||||
|
||||
@@ -3312,6 +3312,9 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
|
||||
c->first_init = true;
|
||||
}
|
||||
|
||||
if (g->ops.gr.set_czf_bypass)
|
||||
g->ops.gr.set_czf_bypass(g, c);
|
||||
|
||||
gk20a_dbg_fn("done");
|
||||
return 0;
|
||||
out:
|
||||
@@ -8236,44 +8239,27 @@ bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
|
||||
struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
|
||||
u32 num_ctx_wr_ops, u32 num_ctx_rd_ops)
|
||||
int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
|
||||
struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
|
||||
u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
|
||||
bool ch_is_curr_ctx)
|
||||
{
|
||||
struct gk20a *g = ch->g;
|
||||
struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
|
||||
bool gr_ctx_ready = false;
|
||||
bool pm_ctx_ready = false;
|
||||
struct nvgpu_mem *current_mem = NULL;
|
||||
bool ch_is_curr_ctx, restart_gr_ctxsw = false;
|
||||
u32 i, j, offset, v;
|
||||
struct gr_gk20a *gr = &g->gr;
|
||||
u32 max_offsets = gr->max_gpc_count * gr->max_tpc_per_gpc_count;
|
||||
u32 *offsets = NULL;
|
||||
u32 *offset_addrs = NULL;
|
||||
u32 ctx_op_nr, num_ctx_ops[2] = {num_ctx_wr_ops, num_ctx_rd_ops};
|
||||
int err, pass;
|
||||
int err = 0, pass;
|
||||
|
||||
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d",
|
||||
num_ctx_wr_ops, num_ctx_rd_ops);
|
||||
|
||||
/* disable channel switching.
|
||||
* at that point the hardware state can be inspected to
|
||||
* determine if the context we're interested in is current.
|
||||
*/
|
||||
err = gr_gk20a_disable_ctxsw(g);
|
||||
if (err) {
|
||||
nvgpu_err(g, "unable to stop gr ctxsw");
|
||||
/* this should probably be ctx-fatal... */
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
restart_gr_ctxsw = true;
|
||||
|
||||
ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch);
|
||||
|
||||
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d", ch_is_curr_ctx);
|
||||
|
||||
if (ch_is_curr_ctx) {
|
||||
for (pass = 0; pass < 2; pass++) {
|
||||
ctx_op_nr = 0;
|
||||
@@ -8497,12 +8483,40 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
|
||||
if (pm_ctx_ready)
|
||||
nvgpu_mem_end(g, &ch_ctx->pm_ctx.mem);
|
||||
|
||||
if (restart_gr_ctxsw) {
|
||||
int tmp_err = gr_gk20a_enable_ctxsw(g);
|
||||
if (tmp_err) {
|
||||
nvgpu_err(g, "unable to restart ctxsw!\n");
|
||||
err = tmp_err;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
|
||||
struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
|
||||
u32 num_ctx_wr_ops, u32 num_ctx_rd_ops)
|
||||
{
|
||||
struct gk20a *g = ch->g;
|
||||
int err, tmp_err;
|
||||
bool ch_is_curr_ctx;
|
||||
|
||||
/* disable channel switching.
|
||||
* at that point the hardware state can be inspected to
|
||||
* determine if the context we're interested in is current.
|
||||
*/
|
||||
err = gr_gk20a_disable_ctxsw(g);
|
||||
if (err) {
|
||||
nvgpu_err(g, "unable to stop gr ctxsw");
|
||||
/* this should probably be ctx-fatal... */
|
||||
return err;
|
||||
}
|
||||
|
||||
ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch);
|
||||
|
||||
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d",
|
||||
ch_is_curr_ctx);
|
||||
|
||||
err = __gr_gk20a_exec_ctx_ops(ch, ctx_ops, num_ops, num_ctx_wr_ops,
|
||||
num_ctx_rd_ops, ch_is_curr_ctx);
|
||||
|
||||
tmp_err = gr_gk20a_enable_ctxsw(g);
|
||||
if (tmp_err) {
|
||||
nvgpu_err(g, "unable to restart ctxsw!\n");
|
||||
err = tmp_err;
|
||||
}
|
||||
|
||||
return err;
|
||||
|
||||
@@ -320,6 +320,7 @@ struct gr_gk20a {
|
||||
u32 alpha_cb_default_size;
|
||||
u32 alpha_cb_size;
|
||||
u32 timeslice_mode;
|
||||
u32 czf_bypass;
|
||||
|
||||
struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF];
|
||||
|
||||
@@ -563,6 +564,10 @@ struct nvgpu_dbg_gpu_reg_op;
|
||||
int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
|
||||
struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
|
||||
u32 num_ctx_wr_ops, u32 num_ctx_rd_ops);
|
||||
int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
|
||||
struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
|
||||
u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
|
||||
bool ch_is_curr_ctx);
|
||||
int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
|
||||
u32 addr,
|
||||
u32 max_offsets,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* GP10B specific sysfs files
|
||||
*
|
||||
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
@@ -18,6 +18,8 @@
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "gp10b_sysfs.h"
|
||||
|
||||
#include <nvgpu/hw/gp10b/hw_gr_gp10b.h>
|
||||
|
||||
#define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH)
|
||||
|
||||
static ssize_t ecc_enable_store(struct device *dev,
|
||||
@@ -49,12 +51,43 @@ static ssize_t ecc_enable_read(struct device *dev,
|
||||
|
||||
static DEVICE_ATTR(ecc_enable, ROOTRW, ecc_enable_read, ecc_enable_store);
|
||||
|
||||
|
||||
static ssize_t czf_bypass_store(struct device *dev,
|
||||
struct device_attribute *attr, const char *buf, size_t count)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
unsigned long val;
|
||||
|
||||
if (kstrtoul(buf, 10, &val) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (val >= 4)
|
||||
return -EINVAL;
|
||||
|
||||
g->gr.czf_bypass = val;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t czf_bypass_read(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
|
||||
return sprintf(buf, "%d\n", g->gr.czf_bypass);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(czf_bypass, ROOTRW, czf_bypass_read, czf_bypass_store);
|
||||
|
||||
void gp10b_create_sysfs(struct device *dev)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
int error = 0;
|
||||
|
||||
g->gr.czf_bypass = gr_gpc0_prop_debug1_czf_bypass_init_v();
|
||||
|
||||
error |= device_create_file(dev, &dev_attr_ecc_enable);
|
||||
error |= device_create_file(dev, &dev_attr_czf_bypass);
|
||||
if (error)
|
||||
nvgpu_err(g, "Failed to create sysfs attributes!\n");
|
||||
}
|
||||
@@ -62,4 +95,5 @@ void gp10b_create_sysfs(struct device *dev)
|
||||
void gp10b_remove_sysfs(struct device *dev)
|
||||
{
|
||||
device_remove_file(dev, &dev_attr_ecc_enable);
|
||||
device_remove_file(dev, &dev_attr_czf_bypass);
|
||||
}
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "gk20a/gr_gk20a.h"
|
||||
#include "gk20a/dbg_gpu_gk20a.h"
|
||||
#include "gk20a/regops_gk20a.h"
|
||||
|
||||
#include "gm20b/gr_gm20b.h"
|
||||
#include "gp10b/gr_gp10b.h"
|
||||
@@ -2304,6 +2305,22 @@ static void gr_gp10b_write_preemption_ptr(struct gk20a *g,
|
||||
|
||||
}
|
||||
|
||||
int gr_gp10b_set_czf_bypass(struct gk20a *g, struct channel_gk20a *ch)
|
||||
{
|
||||
struct nvgpu_dbg_gpu_reg_op ops;
|
||||
|
||||
ops.op = REGOP(WRITE_32);
|
||||
ops.type = REGOP(TYPE_GR_CTX);
|
||||
ops.status = REGOP(STATUS_SUCCESS);
|
||||
ops.value_hi = 0;
|
||||
ops.and_n_mask_lo = gr_gpc0_prop_debug1_czf_bypass_m();
|
||||
ops.and_n_mask_hi = 0;
|
||||
ops.offset = gr_gpc0_prop_debug1_r();
|
||||
ops.value_lo = gr_gpc0_prop_debug1_czf_bypass_f(
|
||||
g->gr.czf_bypass);
|
||||
|
||||
return __gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0, false);
|
||||
}
|
||||
|
||||
void gp10b_init_gr(struct gpu_ops *gops)
|
||||
{
|
||||
@@ -2355,4 +2372,5 @@ void gp10b_init_gr(struct gpu_ops *gops)
|
||||
gops->gr.load_smid_config = gr_gp10b_load_smid_config;
|
||||
gops->gr.set_boosted_ctx = gr_gp10b_set_boosted_ctx;
|
||||
gops->gr.update_boosted_ctx = gr_gp10b_update_boosted_ctx;
|
||||
gops->gr.set_czf_bypass = gr_gp10b_set_czf_bypass;
|
||||
}
|
||||
|
||||
@@ -4270,4 +4270,20 @@ static inline u32 gr_gpcs_tc_debug0_limit_coalesce_buffer_size_m(void)
|
||||
{
|
||||
return 0xff << 0;
|
||||
}
|
||||
static inline u32 gr_gpc0_prop_debug1_r(void)
|
||||
{
|
||||
return 0x00500400;
|
||||
}
|
||||
static inline u32 gr_gpc0_prop_debug1_czf_bypass_f(u32 v)
|
||||
{
|
||||
return (v & 0x3) << 14;
|
||||
}
|
||||
static inline u32 gr_gpc0_prop_debug1_czf_bypass_m(void)
|
||||
{
|
||||
return 0x3 << 14;
|
||||
}
|
||||
static inline u32 gr_gpc0_prop_debug1_czf_bypass_init_v(void)
|
||||
{
|
||||
return 0x00000001;
|
||||
}
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user