From 95bfa039f584fbdced7f3e7f53bb37a47e86f65a Mon Sep 17 00:00:00 2001 From: Antony Clince Alex Date: Wed, 16 Dec 2020 12:09:03 +0530 Subject: [PATCH] gpu: nvgpu: tu104: implement l2 sector promotion Introduce new HAL gops_ltc.set_l2_sector_promotion to configure L2 sector promotion policy. The follow three promotion settings are support: - NVGPU_GPU_IOCTL_TSG_L2_SECTOR_PROMOTE_FLAG_NONE - NVGPU_GPU_IOCTL_TSG_L2_SECTOR_PROMOTE_FLAG_64B - NVGPU_GPU_IOCTL_TSG_L2_SECTOR_PROMOTE_FLAG_128B Add ioctl "NVGPU_TSG_IOCTL_SET_L2_SECTOR_PROMOTION" to the gpu tsg node to support l2 sector promotion. On chips which do not support sector promotion, the ioctl returns 0. Bug 200656177 Change-Id: Iad835a5c954d3b10da436cfafb388aaaa04f44c7 Signed-off-by: Antony Clince Alex Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2460553 Tested-by: mobile promotions Reviewed-by: svc-mobile-coverity Reviewed-by: svc-mobile-cert Reviewed-by: Deepak Nibade Reviewed-by: mobile promotions GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/hal/init/hal_tu104.c | 1 + drivers/gpu/nvgpu/hal/ltc/ltc_tu104.c | 105 +++++++++++++++++- drivers/gpu/nvgpu/hal/ltc/ltc_tu104.h | 2 + drivers/gpu/nvgpu/include/nvgpu/gops/ltc.h | 2 + .../include/nvgpu/hw/tu104/hw_ltc_tu104.h | 28 ++++- drivers/gpu/nvgpu/include/nvgpu/ltc.h | 7 +- drivers/gpu/nvgpu/os/linux/ioctl_tsg.c | 83 ++++++++++++++ include/uapi/linux/nvgpu.h | 22 +++- 8 files changed, 246 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c index 47db57d1e..40ba22fb1 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c +++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c @@ -330,6 +330,7 @@ static const struct gops_ltc tu104_ops_ltc = { .split_lts_broadcast_addr = gm20b_ltc_split_lts_broadcast_addr, .split_ltc_broadcast_addr = gm20b_ltc_split_ltc_broadcast_addr, .pri_is_lts_tstg_addr = tu104_ltc_pri_is_lts_tstg_addr, + .set_l2_sector_promotion = tu104_set_l2_sector_promotion, #endif /* CONFIG_NVGPU_DEBUGGER */ }; diff --git a/drivers/gpu/nvgpu/hal/ltc/ltc_tu104.c b/drivers/gpu/nvgpu/hal/ltc/ltc_tu104.c index 0d34c0126..f338dccfe 100644 --- a/drivers/gpu/nvgpu/hal/ltc/ltc_tu104.c +++ b/drivers/gpu/nvgpu/hal/ltc/ltc_tu104.c @@ -26,8 +26,10 @@ #include #include #include -#include "ltc_tu104.h" +#include +#include "hal/gr/gr/gr_gk20a.h" +#include "ltc_tu104.h" #include "ltc_gv11b.h" #include @@ -69,4 +71,105 @@ u32 tu104_ltc_pri_is_lts_tstg_addr(struct gk20a *g, u32 addr) return (lts_addr >= LTS_TSTG_BASE && lts_addr <= LTS_TSTG_EXTENT) ? true : false; } + +int tu104_set_l2_sector_promotion(struct gk20a *g, struct nvgpu_tsg *tsg, + u32 policy) +{ + int err = 0; + struct nvgpu_dbg_reg_op cfg_ops[2] = { + { + .op = REGOP(READ_32), + .type = REGOP(TYPE_GR_CTX), + .offset = ltc_ltcs_ltss_tstg_cfg2_r() + }, + { + .op = REGOP(READ_32), + .type = REGOP(TYPE_GR_CTX), + .offset = ltc_ltcs_ltss_tstg_cfg3_r() + }, + }; + u32 flags = NVGPU_REG_OP_FLAG_MODE_ALL_OR_NONE; + u32 num_ops = 2U; + u32 cfg2_vidmem = 0U, cfg3_sysmem = 0U; + + /* + * Read current value for ltc_ltcs_ltss_tstg_cfg(2,3)_r + */ + err = gr_gk20a_exec_ctx_ops(tsg, cfg_ops, num_ops, 0, num_ops, &flags); + if (err != 0) { + nvgpu_err(g, "failed to read ltcs_ltss_tstg_cfg(2,3)_r"); + goto fail; + } + cfg2_vidmem = cfg_ops[0].value_lo; + cfg3_sysmem = cfg_ops[1].value_lo; + +#define APPLY_SECTOR_PROMOTION_POLICY(cfg, unit, policy) \ + do { \ + switch (policy) { \ + case NVGPU_L2_SECTOR_PROMOTE_FLAG_NONE: \ + cfg = set_field(cfg, \ + ltc_ltcs_ltss_tstg_##cfg##_##unit##_promote_m(), \ + ltc_ltcs_ltss_tstg_##cfg##_##unit##_promote_f( \ + ltc_ltcs_ltss_tstg_##cfg##_##unit##_promote_none_v() \ + )); \ + break; \ + case NVGPU_L2_SECTOR_PROMOTE_FLAG_64B: \ + cfg = set_field(cfg, \ + ltc_ltcs_ltss_tstg_##cfg##_##unit##_promote_m(), \ + ltc_ltcs_ltss_tstg_##cfg##_##unit##_promote_f( \ + ltc_ltcs_ltss_tstg_##cfg##_##unit##_promote_64b_v() \ + )); \ + break; \ + case NVGPU_L2_SECTOR_PROMOTE_FLAG_128B: \ + cfg = set_field(cfg, \ + ltc_ltcs_ltss_tstg_##cfg##_##unit##_promote_m(), \ + ltc_ltcs_ltss_tstg_##cfg##_##unit##_promote_f( \ + ltc_ltcs_ltss_tstg_##cfg##_##unit##_promote_128b_v() \ + )); \ + break; \ + } \ + } while (0) + + /* + * Update T1_PROMOTE and L1_PROMOTE fields of cfg2_vidmem and + * cfg3_sysmem. + */ + APPLY_SECTOR_PROMOTION_POLICY(cfg2_vidmem, t1, policy); + APPLY_SECTOR_PROMOTION_POLICY(cfg2_vidmem, l1, policy); + APPLY_SECTOR_PROMOTION_POLICY(cfg3_sysmem, t1, policy); + APPLY_SECTOR_PROMOTION_POLICY(cfg3_sysmem, l1, policy); + +#undef APPLY_SECTOR_PROMOTION_POLICY + + cfg_ops[0].op = REGOP(WRITE_32); + cfg_ops[0].value_lo = cfg2_vidmem; + cfg_ops[1].op = REGOP(WRITE_32); + cfg_ops[1].value_lo = cfg3_sysmem; + err = gr_gk20a_exec_ctx_ops(tsg, cfg_ops, num_ops, num_ops, 0, &flags); + if (err != 0) { + nvgpu_err(g, "failed to update ltcs_ltss_tstg_cfg(2,3)_r"); + goto fail; + } + + /* Readback and verify the write */ + cfg_ops[0].op = REGOP(READ_32); + cfg_ops[0].value_lo = 0U; + cfg_ops[1].op = REGOP(READ_32); + cfg_ops[1].value_lo = 0U; + err = gr_gk20a_exec_ctx_ops(tsg, cfg_ops, num_ops, 0, num_ops, &flags); + if (err != 0) { + nvgpu_err(g, "failed to read ltcs_ltss_tstg_cfg(2,3)_r"); + goto fail; + } + if (cfg2_vidmem != cfg_ops[0].value_lo || cfg3_sysmem != cfg_ops[1].value_lo) { + nvgpu_err(g, "mismatch: cfg2: wrote(0x%x) read(0x%x)", + cfg_ops[0].value_lo, cfg2_vidmem); + nvgpu_err(g, " cfg3: wrote(0x%x) read(0x%x)", + cfg_ops[1].value_lo, cfg3_sysmem); + err = -EINVAL; + } + +fail: + return err; +} #endif diff --git a/drivers/gpu/nvgpu/hal/ltc/ltc_tu104.h b/drivers/gpu/nvgpu/hal/ltc/ltc_tu104.h index dcaff738f..61ddd917d 100644 --- a/drivers/gpu/nvgpu/hal/ltc/ltc_tu104.h +++ b/drivers/gpu/nvgpu/hal/ltc/ltc_tu104.h @@ -41,6 +41,8 @@ struct gk20a; void ltc_tu104_init_fs_state(struct gk20a *g); #ifdef CONFIG_NVGPU_DEBUGGER u32 tu104_ltc_pri_is_lts_tstg_addr(struct gk20a *g, u32 addr); +int tu104_set_l2_sector_promotion(struct gk20a *g, struct nvgpu_tsg *tsg, + u32 policy); #endif #endif /* LTC_TU104_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/ltc.h b/drivers/gpu/nvgpu/include/nvgpu/gops/ltc.h index 6341e6d26..1bf1d7592 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gops/ltc.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gops/ltc.h @@ -158,6 +158,8 @@ struct gops_ltc { int (*get_l2_max_ways_evict_last)(struct gk20a *g, struct nvgpu_tsg *tsg, u32 *num_ways); u32 (*pri_is_lts_tstg_addr)(struct gk20a *g, u32 addr); + int (*set_l2_sector_promotion)(struct gk20a *g, struct nvgpu_tsg *tsg, + u32 policy); #if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) #include "include/nvgpu/nvgpu_next_gops_ltc.h" #endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/tu104/hw_ltc_tu104.h b/drivers/gpu/nvgpu/include/nvgpu/hw/tu104/hw_ltc_tu104.h index 22de9d164..d7dca93fc 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/hw/tu104/hw_ltc_tu104.h +++ b/drivers/gpu/nvgpu/include/nvgpu/hw/tu104/hw_ltc_tu104.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -231,4 +231,30 @@ #define ltc_ltcs_ltss_tstg_set_mgmt_1_plc_recompress_plc_disabled_f() (0x0U) #define ltc_ltcs_ltss_tstg_set_mgmt_1_plc_recompress_rmw_m() (U32(0x1U) << 29U) #define ltc_ltcs_ltss_tstg_set_mgmt_1_plc_recompress_rmw_disabled_f() (0x0U) +#define ltc_ltcs_ltss_tstg_cfg2_r() (0x0017e298U) +#define ltc_ltcs_ltss_tstg_cfg2_vidmem_l1_promote_f(v) ((U32(v) & 0x3U) << 16U) +#define ltc_ltcs_ltss_tstg_cfg2_vidmem_l1_promote_m() (U32(0x3U) << 16U) +#define ltc_ltcs_ltss_tstg_cfg2_vidmem_l1_promote_v(r) (((r) >> 16U) & 0x3U) +#define ltc_ltcs_ltss_tstg_cfg2_vidmem_l1_promote_none_v() (0x00000000U) +#define ltc_ltcs_ltss_tstg_cfg2_vidmem_l1_promote_64b_v() (0x00000001U) +#define ltc_ltcs_ltss_tstg_cfg2_vidmem_l1_promote_128b_v() (0x00000002U) +#define ltc_ltcs_ltss_tstg_cfg2_vidmem_t1_promote_f(v) ((U32(v) & 0x3U) << 18U) +#define ltc_ltcs_ltss_tstg_cfg2_vidmem_t1_promote_m() (U32(0x3U) << 18U) +#define ltc_ltcs_ltss_tstg_cfg2_vidmem_t1_promote_v(r) (((r) >> 18U) & 0x3U) +#define ltc_ltcs_ltss_tstg_cfg2_vidmem_t1_promote_none_v() (0x00000000U) +#define ltc_ltcs_ltss_tstg_cfg2_vidmem_t1_promote_64b_v() (0x00000001U) +#define ltc_ltcs_ltss_tstg_cfg2_vidmem_t1_promote_128b_v() (0x00000002U) +#define ltc_ltcs_ltss_tstg_cfg3_r() (0x0017e29cU) +#define ltc_ltcs_ltss_tstg_cfg3_sysmem_l1_promote_f(v) ((U32(v) & 0x3U) << 16U) +#define ltc_ltcs_ltss_tstg_cfg3_sysmem_l1_promote_m() (U32(0x3U) << 16U) +#define ltc_ltcs_ltss_tstg_cfg3_sysmem_l1_promote_v(r) (((r) >> 16U) & 0x3U) +#define ltc_ltcs_ltss_tstg_cfg3_sysmem_l1_promote_none_v() (0x00000000U) +#define ltc_ltcs_ltss_tstg_cfg3_sysmem_l1_promote_64b_v() (0x00000001U) +#define ltc_ltcs_ltss_tstg_cfg3_sysmem_l1_promote_128b_v() (0x00000002U) +#define ltc_ltcs_ltss_tstg_cfg3_sysmem_t1_promote_f(v) ((U32(v) & 0x3U) << 18U) +#define ltc_ltcs_ltss_tstg_cfg3_sysmem_t1_promote_m() (U32(0x3U) << 18U) +#define ltc_ltcs_ltss_tstg_cfg3_sysmem_t1_promote_v(r) (((r) >> 18U) & 0x3U) +#define ltc_ltcs_ltss_tstg_cfg3_sysmem_t1_promote_none_v() (0x00000000U) +#define ltc_ltcs_ltss_tstg_cfg3_sysmem_t1_promote_64b_v() (0x00000001U) +#define ltc_ltcs_ltss_tstg_cfg3_sysmem_t1_promote_128b_v() (0x00000002U) #endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/ltc.h b/drivers/gpu/nvgpu/include/nvgpu/ltc.h index d392fa22f..a21b36e6e 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/ltc.h +++ b/drivers/gpu/nvgpu/include/nvgpu/ltc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -79,6 +79,11 @@ int nvgpu_ecc_counter_init_per_lts(struct gk20a *g, #define NVGPU_ECC_COUNTER_INIT_PER_LTS(stat) \ nvgpu_ecc_counter_init_per_lts(g, &g->ecc.ltc.stat, #stat) +#define NVGPU_L2_SECTOR_PROMOTE_FLAG_NONE (1U << 0U) +#define NVGPU_L2_SECTOR_PROMOTE_FLAG_64B (1U << 1U) +#define NVGPU_L2_SECTOR_PROMOTE_FLAG_128B (1U << 2U) +#define NVGPU_L2_SECTOR_PROMOTE_FLAG_INVALID (1U << 3U) + /** * @brief Release all LTC ECC stats counters. * diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c index e232464ff..94dcd01d3 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "platform_gk20a.h" #include "ioctl_tsg.h" @@ -681,6 +682,67 @@ static int nvgpu_gpu_ioctl_get_l2_max_ways_evict_last( return err; } +static u32 nvgpu_translate_l2_sector_promotion_flag(struct gk20a *g, u32 flag) +{ + u32 promotion_flag = NVGPU_L2_SECTOR_PROMOTE_FLAG_INVALID; + + switch (flag) { + case NVGPU_GPU_IOCTL_TSG_L2_SECTOR_PROMOTE_FLAG_NONE: + promotion_flag = NVGPU_L2_SECTOR_PROMOTE_FLAG_NONE; + break; + + case NVGPU_GPU_IOCTL_TSG_L2_SECTOR_PROMOTE_FLAG_64B: + promotion_flag = NVGPU_L2_SECTOR_PROMOTE_FLAG_64B; + break; + + case NVGPU_GPU_IOCTL_TSG_L2_SECTOR_PROMOTE_FLAG_128B: + promotion_flag = NVGPU_L2_SECTOR_PROMOTE_FLAG_128B; + break; + + default: + nvgpu_err(g, "invalid sector promotion flag(%d)", + flag); + break; + } + + return promotion_flag; +} + +static int nvgpu_gpu_ioctl_set_l2_sector_promotion(struct gk20a *g, + struct nvgpu_tsg *tsg, + struct nvgpu_tsg_set_l2_sector_promotion_args *args) +{ + u32 promotion_flag = 0U; + int err = 0; + + /* + * L2 sector promotion is a perf feature so return silently without + * error if not supported. + */ + if (g->ops.ltc.set_l2_sector_promotion == NULL) { + return 0; + } + + promotion_flag = + nvgpu_translate_l2_sector_promotion_flag(g, + args->promotion_flag); + if (promotion_flag == + NVGPU_L2_SECTOR_PROMOTE_FLAG_INVALID) { + return -EINVAL; + } + + err = gk20a_busy(g); + if (err) { + nvgpu_err(g, "failed to power on gpu"); + return err; + } + err = g->ops.ltc.set_l2_sector_promotion(g, tsg, + promotion_flag); + gk20a_idle(g); + + return err; +} + long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -826,15 +888,36 @@ long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, unsigned int cmd, case NVGPU_TSG_IOCTL_SET_L2_MAX_WAYS_EVICT_LAST: { + err = gk20a_busy(g); + if (err) { + nvgpu_err(g, + "failed to power on gpu for ioctl cmd: 0x%x", cmd); + break; + } err = nvgpu_gpu_ioctl_set_l2_max_ways_evict_last(g, tsg, (struct nvgpu_tsg_l2_max_ways_evict_last_args *)buf); + gk20a_idle(g); break; } case NVGPU_TSG_IOCTL_GET_L2_MAX_WAYS_EVICT_LAST: { + err = gk20a_busy(g); + if (err) { + nvgpu_err(g, + "failed to power on gpu for ioctl cmd: 0x%x", cmd); + break; + } err = nvgpu_gpu_ioctl_get_l2_max_ways_evict_last(g, tsg, (struct nvgpu_tsg_l2_max_ways_evict_last_args *)buf); + gk20a_idle(g); + break; + } + + case NVGPU_TSG_IOCTL_SET_L2_SECTOR_PROMOTION: + { + err = nvgpu_gpu_ioctl_set_l2_sector_promotion(g, tsg, + (struct nvgpu_tsg_set_l2_sector_promotion_args *)buf); break; } diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 7ed75bc3d..c15aa8f98 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -82,6 +82,23 @@ struct nvgpu_tsg_l2_max_ways_evict_last_args { * with eviction_policy=EVICT_LAST */ __u32 max_ways; + __u32 reserved; +}; + +/* + * This struct contains the parameter for configuring L2 sector promotion. + * It supports 3 valid options:- + * - PROMOTE_NONE(1): cache-miss doens't get promoted. + * - PROMOTE_64B(2): cache-miss gets promoted to 64 bytes if less than 64 bytes. + * - PROMOTE_128B(4): cache-miss gets promoted to 128 bytes if less than 128 bytes. + */ +#define NVGPU_GPU_IOCTL_TSG_L2_SECTOR_PROMOTE_FLAG_NONE (1U << 0U) +#define NVGPU_GPU_IOCTL_TSG_L2_SECTOR_PROMOTE_FLAG_64B (1U << 1U) +#define NVGPU_GPU_IOCTL_TSG_L2_SECTOR_PROMOTE_FLAG_128B (1U << 2U) +struct nvgpu_tsg_set_l2_sector_promotion_args { + /* Valid promotion flag */ + __u32 promotion_flag; + __u32 reserved; }; #define NVGPU_TSG_IOCTL_BIND_CHANNEL \ @@ -113,11 +130,14 @@ struct nvgpu_tsg_l2_max_ways_evict_last_args { #define NVGPU_TSG_IOCTL_GET_L2_MAX_WAYS_EVICT_LAST \ _IOR(NVGPU_TSG_IOCTL_MAGIC, 14, \ struct nvgpu_tsg_l2_max_ways_evict_last_args) +#define NVGPU_TSG_IOCTL_SET_L2_SECTOR_PROMOTION \ + _IOW(NVGPU_TSG_IOCTL_MAGIC, 15, \ + struct nvgpu_tsg_set_l2_sector_promotion_args) #define NVGPU_TSG_IOCTL_MAX_ARG_SIZE \ sizeof(struct nvgpu_tsg_bind_channel_ex_args) #define NVGPU_TSG_IOCTL_LAST \ - _IOC_NR(NVGPU_TSG_IOCTL_GET_L2_MAX_WAYS_EVICT_LAST) + _IOC_NR(NVGPU_TSG_IOCTL_SET_L2_SECTOR_PROMOTION) /* * /dev/nvhost-dbg-gpu device