gpu: nvgpu: Add czf_bypass sysfs node for gp10b

This change adds a new sysfs node to allow configuring CZF_BYPASS, to
enable platforms with low context-switching latency requirements.

/sys/devices/17000000.gp10b/czf_bypass

Values:
0 - always
1 - lateZ (default)
2 - single pass
3 - never

The specified value will apply only to newly allocated contexts.

Bug 1914014

Change-Id: Ibb9a8e86089acaadaa7260b00eedec5c80762d6f
Signed-off-by: Peter Boonstoppel <pboonstoppel@nvidia.com>
Reviewed-on: http://git-master/r/1478567
(cherry picked from commit 3bc022cb38)
Reviewed-on: http://git-master/r/1473820
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Peter Boonstoppel
2017-05-02 12:09:40 -07:00
committed by mobile promotions
parent 65de2a2d65
commit 39a9e251da
6 changed files with 118 additions and 29 deletions

View File

@@ -366,6 +366,8 @@ struct gpu_ops {
int (*resume_from_pause)(struct gk20a *g);
int (*clear_sm_errors)(struct gk20a *g);
u32 (*tpc_enabled_exceptions)(struct gk20a *g);
int (*set_czf_bypass)(struct gk20a *g,
struct channel_gk20a *ch);
} gr;
struct {
void (*init_hw)(struct gk20a *g);

View File

@@ -3312,6 +3312,9 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
c->first_init = true;
}
if (g->ops.gr.set_czf_bypass)
g->ops.gr.set_czf_bypass(g, c);
gk20a_dbg_fn("done");
return 0;
out:
@@ -8236,44 +8239,27 @@ bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch)
return ret;
}
int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
u32 num_ctx_wr_ops, u32 num_ctx_rd_ops)
int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
bool ch_is_curr_ctx)
{
struct gk20a *g = ch->g;
struct channel_ctx_gk20a *ch_ctx = &ch->ch_ctx;
bool gr_ctx_ready = false;
bool pm_ctx_ready = false;
struct nvgpu_mem *current_mem = NULL;
bool ch_is_curr_ctx, restart_gr_ctxsw = false;
u32 i, j, offset, v;
struct gr_gk20a *gr = &g->gr;
u32 max_offsets = gr->max_gpc_count * gr->max_tpc_per_gpc_count;
u32 *offsets = NULL;
u32 *offset_addrs = NULL;
u32 ctx_op_nr, num_ctx_ops[2] = {num_ctx_wr_ops, num_ctx_rd_ops};
int err, pass;
int err = 0, pass;
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "wr_ops=%d rd_ops=%d",
num_ctx_wr_ops, num_ctx_rd_ops);
/* disable channel switching.
* at that point the hardware state can be inspected to
* determine if the context we're interested in is current.
*/
err = gr_gk20a_disable_ctxsw(g);
if (err) {
nvgpu_err(g, "unable to stop gr ctxsw");
/* this should probably be ctx-fatal... */
goto cleanup;
}
restart_gr_ctxsw = true;
ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch);
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d", ch_is_curr_ctx);
if (ch_is_curr_ctx) {
for (pass = 0; pass < 2; pass++) {
ctx_op_nr = 0;
@@ -8497,12 +8483,40 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
if (pm_ctx_ready)
nvgpu_mem_end(g, &ch_ctx->pm_ctx.mem);
if (restart_gr_ctxsw) {
int tmp_err = gr_gk20a_enable_ctxsw(g);
if (tmp_err) {
nvgpu_err(g, "unable to restart ctxsw!\n");
err = tmp_err;
}
return err;
}
int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
u32 num_ctx_wr_ops, u32 num_ctx_rd_ops)
{
struct gk20a *g = ch->g;
int err, tmp_err;
bool ch_is_curr_ctx;
/* disable channel switching.
* at that point the hardware state can be inspected to
* determine if the context we're interested in is current.
*/
err = gr_gk20a_disable_ctxsw(g);
if (err) {
nvgpu_err(g, "unable to stop gr ctxsw");
/* this should probably be ctx-fatal... */
return err;
}
ch_is_curr_ctx = gk20a_is_channel_ctx_resident(ch);
gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "is curr ctx=%d",
ch_is_curr_ctx);
err = __gr_gk20a_exec_ctx_ops(ch, ctx_ops, num_ops, num_ctx_wr_ops,
num_ctx_rd_ops, ch_is_curr_ctx);
tmp_err = gr_gk20a_enable_ctxsw(g);
if (tmp_err) {
nvgpu_err(g, "unable to restart ctxsw!\n");
err = tmp_err;
}
return err;

View File

@@ -320,6 +320,7 @@ struct gr_gk20a {
u32 alpha_cb_default_size;
u32 alpha_cb_size;
u32 timeslice_mode;
u32 czf_bypass;
struct gr_ctx_buffer_desc global_ctx_buffer[NR_GLOBAL_CTX_BUF];
@@ -563,6 +564,10 @@ struct nvgpu_dbg_gpu_reg_op;
int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
u32 num_ctx_wr_ops, u32 num_ctx_rd_ops);
int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
struct nvgpu_dbg_gpu_reg_op *ctx_ops, u32 num_ops,
u32 num_ctx_wr_ops, u32 num_ctx_rd_ops,
bool ch_is_curr_ctx);
int gr_gk20a_get_ctx_buffer_offsets(struct gk20a *g,
u32 addr,
u32 max_offsets,

View File

@@ -1,7 +1,7 @@
/*
* GP10B specific sysfs files
*
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -18,6 +18,8 @@
#include "gk20a/gk20a.h"
#include "gp10b_sysfs.h"
#include <nvgpu/hw/gp10b/hw_gr_gp10b.h>
#define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH)
static ssize_t ecc_enable_store(struct device *dev,
@@ -49,12 +51,43 @@ static ssize_t ecc_enable_read(struct device *dev,
static DEVICE_ATTR(ecc_enable, ROOTRW, ecc_enable_read, ecc_enable_store);
static ssize_t czf_bypass_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct gk20a *g = get_gk20a(dev);
unsigned long val;
if (kstrtoul(buf, 10, &val) < 0)
return -EINVAL;
if (val >= 4)
return -EINVAL;
g->gr.czf_bypass = val;
return count;
}
static ssize_t czf_bypass_read(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct gk20a *g = get_gk20a(dev);
return sprintf(buf, "%d\n", g->gr.czf_bypass);
}
static DEVICE_ATTR(czf_bypass, ROOTRW, czf_bypass_read, czf_bypass_store);
void gp10b_create_sysfs(struct device *dev)
{
struct gk20a *g = get_gk20a(dev);
int error = 0;
g->gr.czf_bypass = gr_gpc0_prop_debug1_czf_bypass_init_v();
error |= device_create_file(dev, &dev_attr_ecc_enable);
error |= device_create_file(dev, &dev_attr_czf_bypass);
if (error)
nvgpu_err(g, "Failed to create sysfs attributes!\n");
}
@@ -62,4 +95,5 @@ void gp10b_create_sysfs(struct device *dev)
void gp10b_remove_sysfs(struct device *dev)
{
device_remove_file(dev, &dev_attr_ecc_enable);
device_remove_file(dev, &dev_attr_czf_bypass);
}

View File

@@ -27,6 +27,7 @@
#include "gk20a/gk20a.h"
#include "gk20a/gr_gk20a.h"
#include "gk20a/dbg_gpu_gk20a.h"
#include "gk20a/regops_gk20a.h"
#include "gm20b/gr_gm20b.h"
#include "gp10b/gr_gp10b.h"
@@ -2304,6 +2305,22 @@ static void gr_gp10b_write_preemption_ptr(struct gk20a *g,
}
int gr_gp10b_set_czf_bypass(struct gk20a *g, struct channel_gk20a *ch)
{
struct nvgpu_dbg_gpu_reg_op ops;
ops.op = REGOP(WRITE_32);
ops.type = REGOP(TYPE_GR_CTX);
ops.status = REGOP(STATUS_SUCCESS);
ops.value_hi = 0;
ops.and_n_mask_lo = gr_gpc0_prop_debug1_czf_bypass_m();
ops.and_n_mask_hi = 0;
ops.offset = gr_gpc0_prop_debug1_r();
ops.value_lo = gr_gpc0_prop_debug1_czf_bypass_f(
g->gr.czf_bypass);
return __gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0, false);
}
void gp10b_init_gr(struct gpu_ops *gops)
{
@@ -2355,4 +2372,5 @@ void gp10b_init_gr(struct gpu_ops *gops)
gops->gr.load_smid_config = gr_gp10b_load_smid_config;
gops->gr.set_boosted_ctx = gr_gp10b_set_boosted_ctx;
gops->gr.update_boosted_ctx = gr_gp10b_update_boosted_ctx;
gops->gr.set_czf_bypass = gr_gp10b_set_czf_bypass;
}

View File

@@ -4270,4 +4270,20 @@ static inline u32 gr_gpcs_tc_debug0_limit_coalesce_buffer_size_m(void)
{
return 0xff << 0;
}
static inline u32 gr_gpc0_prop_debug1_r(void)
{
return 0x00500400;
}
static inline u32 gr_gpc0_prop_debug1_czf_bypass_f(u32 v)
{
return (v & 0x3) << 14;
}
static inline u32 gr_gpc0_prop_debug1_czf_bypass_m(void)
{
return 0x3 << 14;
}
static inline u32 gr_gpc0_prop_debug1_czf_bypass_init_v(void)
{
return 0x00000001;
}
#endif