gpu: nvgpu: add ioctl to configure l2 max_ways_evict_last

Add ioctl support to configure and read the max number of lines/ways
in a L2 cache set that can be marked as EVICT_LAST. This is accomplished
through two new ltc hals: set_l2_max_ways_evict_last,
get_l2_max_ways_evict_last. These hals will only be set for nvgpu-next
chips. Incase of legacy chips, the IOCTLs will return error -ENOSYS.

Generate following litter constants to get the number of sets in a l2
slice and the number of ways in each set:
- GPU_LIT_NUM_LTC_LTS_SETS
- GPU_LIT_NUM_LTC_LTS_WAYS

Add gpu characteritics flag: NVGPU_L2_MAX_WAYS_EVICT_LAST_ENABLED to
allow userspace driver to determine if L2_MAX_WAYS_EVICT_LAST ioctl is
supported.

Bug 200605474

Change-Id: Id3180f891399f5e128500f3835d762aee59953e0
Signed-off-by: Antony Clince Alex <aalex@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2445884
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Antony Clince Alex
2020-11-13 14:50:51 +05:30
committed by mobile promotions
parent fe03443161
commit f41e5975d8
11 changed files with 104 additions and 7 deletions

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -132,6 +132,12 @@ u32 gv11b_get_litter_value(struct gk20a *g, int value)
case GPU_LIT_NUM_FBPAS: case GPU_LIT_NUM_FBPAS:
ret = proj_scal_litter_num_fbpas_v(); ret = proj_scal_litter_num_fbpas_v();
break; break;
case GPU_LIT_NUM_LTC_LTS_SETS:
ret = proj_scal_litter_num_ltc_lts_sets_v();
break;
case GPU_LIT_NUM_LTC_LTS_WAYS:
ret = proj_scal_litter_num_ltc_lts_ways_v();
break;
/* Hardcode FBPA values other than NUM_FBPAS to 0. */ /* Hardcode FBPA values other than NUM_FBPAS to 0. */
case GPU_LIT_FBPA_STRIDE: case GPU_LIT_FBPA_STRIDE:
case GPU_LIT_FBPA_BASE: case GPU_LIT_FBPA_BASE:

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -137,6 +137,12 @@ u32 tu104_get_litter_value(struct gk20a *g, int value)
case GPU_LIT_SM_SHARED_BASE: case GPU_LIT_SM_SHARED_BASE:
ret = proj_sm_shared_base_v(); ret = proj_sm_shared_base_v();
break; break;
case GPU_LIT_NUM_LTC_LTS_SETS:
ret = proj_scal_litter_num_ltc_lts_sets_v();
break;
case GPU_LIT_NUM_LTC_LTS_WAYS:
ret = proj_scal_litter_num_ltc_lts_ways_v();
break;
#ifdef CONFIG_NVGPU_GRAPHICS #ifdef CONFIG_NVGPU_GRAPHICS
case GPU_LIT_TWOD_CLASS: case GPU_LIT_TWOD_CLASS:
ret = FERMI_TWOD_A; ret = FERMI_TWOD_A;

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -204,6 +204,8 @@ struct gk20a;
DEFINE_FLAG(NVGPU_PMU_NEXT_CORE_ENABLED, "PMU NEXT CORE enabled"), \ DEFINE_FLAG(NVGPU_PMU_NEXT_CORE_ENABLED, "PMU NEXT CORE enabled"), \
DEFINE_FLAG(NVGPU_ACR_NEXT_CORE_ENABLED, \ DEFINE_FLAG(NVGPU_ACR_NEXT_CORE_ENABLED, \
"NEXT CORE availability for acr"), \ "NEXT CORE availability for acr"), \
DEFINE_FLAG(NVGPU_L2_MAX_WAYS_EVICT_LAST_ENABLED, \
"Set L2 Max Ways Evict Last support"), \
DEFINE_FLAG(NVGPU_MAX_ENABLED_BITS, "Marks max number of flags"), DEFINE_FLAG(NVGPU_MAX_ENABLED_BITS, "Marks max number of flags"),
/** /**

View File

@@ -265,6 +265,8 @@ struct railgate_stats {
#define GPU_LIT_GPC_ADDR_WIDTH 47 #define GPU_LIT_GPC_ADDR_WIDTH 47
#define GPU_LIT_TPC_ADDR_WIDTH 48 #define GPU_LIT_TPC_ADDR_WIDTH 48
#define GPU_LIT_MAX_RUNLISTS_SUPPORTED 49 #define GPU_LIT_MAX_RUNLISTS_SUPPORTED 49
#define GPU_LIT_NUM_LTC_LTS_SETS 50
#define GPU_LIT_NUM_LTC_LTS_WAYS 51
/** @endcond */ /** @endcond */
/** Macro to get litter values corresponding to the litter defines. */ /** Macro to get litter values corresponding to the litter defines. */

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -155,6 +155,10 @@ struct gops_ltc {
void (*split_ltc_broadcast_addr)(struct gk20a *g, u32 addr, void (*split_ltc_broadcast_addr)(struct gk20a *g, u32 addr,
u32 *priv_addr_table, u32 *priv_addr_table,
u32 *priv_addr_table_index); u32 *priv_addr_table_index);
int (*set_l2_max_ways_evict_last)(struct gk20a *g, struct nvgpu_tsg *tsg,
u32 num_ways);
int (*get_l2_max_ways_evict_last)(struct gk20a *g, struct nvgpu_tsg *tsg,
u32 *num_ways);
#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) #if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT)
#include "include/nvgpu/nvgpu_next_gops_ltc.h" #include "include/nvgpu/nvgpu_next_gops_ltc.h"
#endif #endif

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -89,6 +89,8 @@
#define proj_scal_litter_num_tpcs_per_pes_v() (0x00000002U) #define proj_scal_litter_num_tpcs_per_pes_v() (0x00000002U)
#define proj_scal_litter_num_zcull_banks_v() (0x00000004U) #define proj_scal_litter_num_zcull_banks_v() (0x00000004U)
#define proj_scal_litter_num_sm_per_tpc_v() (0x00000002U) #define proj_scal_litter_num_sm_per_tpc_v() (0x00000002U)
#define proj_scal_litter_num_ltc_lts_sets_v() (0x00000040U)
#define proj_scal_litter_num_ltc_lts_ways_v() (0x00000010U)
#define proj_scal_max_gpcs_v() (0x00000020U) #define proj_scal_max_gpcs_v() (0x00000020U)
#define proj_scal_max_tpc_per_gpc_v() (0x00000008U) #define proj_scal_max_tpc_per_gpc_v() (0x00000008U)
#define proj_sm_unique_base_v() (0x00000700U) #define proj_sm_unique_base_v() (0x00000700U)

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -91,6 +91,8 @@
#define proj_scal_litter_num_tpcs_per_pes_v() (0x00000002U) #define proj_scal_litter_num_tpcs_per_pes_v() (0x00000002U)
#define proj_scal_litter_num_zcull_banks_v() (0x00000004U) #define proj_scal_litter_num_zcull_banks_v() (0x00000004U)
#define proj_scal_litter_num_sm_per_tpc_v() (0x00000002U) #define proj_scal_litter_num_sm_per_tpc_v() (0x00000002U)
#define proj_scal_litter_num_ltc_lts_sets_v() (0x00000040U)
#define proj_scal_litter_num_ltc_lts_ways_v() (0x00000010U)
#define proj_scal_max_gpcs_v() (0x00000020U) #define proj_scal_max_gpcs_v() (0x00000020U)
#define proj_scal_max_tpc_per_gpc_v() (0x00000008U) #define proj_scal_max_tpc_per_gpc_v() (0x00000008U)
#define proj_sm_unique_base_v() (0x00000700U) #define proj_sm_unique_base_v() (0x00000700U)

View File

@@ -291,6 +291,8 @@ static struct nvgpu_flags_mapping flags_mapping[] = {
NVGPU_SUPPORT_SMPC_GLOBAL_MODE}, NVGPU_SUPPORT_SMPC_GLOBAL_MODE},
{NVGPU_GPU_FLAGS_SUPPORT_GET_GR_CONTEXT, {NVGPU_GPU_FLAGS_SUPPORT_GET_GR_CONTEXT,
NVGPU_SUPPORT_GET_GR_CONTEXT}, NVGPU_SUPPORT_GET_GR_CONTEXT},
{NVGPU_GPU_FLAGS_L2_MAX_WAYS_EVICT_LAST_ENABLED,
NVGPU_L2_MAX_WAYS_EVICT_LAST_ENABLED},
}; };
static u64 nvgpu_ctrl_ioctl_gpu_characteristics_flags(struct gk20a *g) static u64 nvgpu_ctrl_ioctl_gpu_characteristics_flags(struct gk20a *g)

View File

@@ -645,6 +645,42 @@ static int gk20a_tsg_ioctl_read_single_sm_error_state(struct gk20a *g,
return 0; return 0;
} }
static int nvgpu_gpu_ioctl_set_l2_max_ways_evict_last(
struct gk20a *g, struct nvgpu_tsg *tsg,
struct nvgpu_tsg_l2_max_ways_evict_last_args *args)
{
int err;
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
if (g->ops.ltc.set_l2_max_ways_evict_last) {
err = g->ops.ltc.set_l2_max_ways_evict_last(g, tsg,
args->max_ways);
} else {
err = -ENOSYS;
}
nvgpu_mutex_release(&g->dbg_sessions_lock);
return err;
}
static int nvgpu_gpu_ioctl_get_l2_max_ways_evict_last(
struct gk20a *g, struct nvgpu_tsg *tsg,
struct nvgpu_tsg_l2_max_ways_evict_last_args *args)
{
int err;
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
if (g->ops.ltc.get_l2_max_ways_evict_last) {
err = g->ops.ltc.get_l2_max_ways_evict_last(g, tsg,
&args->max_ways);
} else {
err = -ENOSYS;
}
nvgpu_mutex_release(&g->dbg_sessions_lock);
return err;
}
long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, unsigned int cmd, long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg) unsigned long arg)
{ {
@@ -788,6 +824,20 @@ long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, unsigned int cmd,
break; break;
} }
case NVGPU_TSG_IOCTL_SET_L2_MAX_WAYS_EVICT_LAST:
{
err = nvgpu_gpu_ioctl_set_l2_max_ways_evict_last(g, tsg,
(struct nvgpu_tsg_l2_max_ways_evict_last_args *)buf);
break;
}
case NVGPU_TSG_IOCTL_GET_L2_MAX_WAYS_EVICT_LAST:
{
err = nvgpu_gpu_ioctl_get_l2_max_ways_evict_last(g, tsg,
(struct nvgpu_tsg_l2_max_ways_evict_last_args *)buf);
break;
}
default: default:
nvgpu_err(g, "unrecognized tsg gpu ioctl cmd: 0x%x", nvgpu_err(g, "unrecognized tsg gpu ioctl cmd: 0x%x",
cmd); cmd);

View File

@@ -184,6 +184,8 @@ struct nvgpu_gpu_zbc_query_table_args {
#define NVGPU_GPU_FLAGS_SUPPORT_GET_GR_CONTEXT (1ULL << 49) #define NVGPU_GPU_FLAGS_SUPPORT_GET_GR_CONTEXT (1ULL << 49)
/* Additional buffer metadata association supported */ /* Additional buffer metadata association supported */
#define NVGPU_GPU_FLAGS_SUPPORT_BUFFER_METADATA (1ULL << 50) #define NVGPU_GPU_FLAGS_SUPPORT_BUFFER_METADATA (1ULL << 50)
/* Flag to indicate whether configuring L2_MAXEVICTLAST_WAYS is supported */
#define NVGPU_GPU_FLAGS_L2_MAX_WAYS_EVICT_LAST_ENABLED (1ULL << 51)
/* SM LRF ECC is enabled */ /* SM LRF ECC is enabled */
#define NVGPU_GPU_FLAGS_ECC_ENABLED_SM_LRF (1ULL << 60) #define NVGPU_GPU_FLAGS_ECC_ENABLED_SM_LRF (1ULL << 60)
/* SM SHM ECC is enabled */ /* SM SHM ECC is enabled */

View File

@@ -72,6 +72,18 @@ struct nvgpu_tsg_read_single_sm_error_state_args {
__u64 record_size; __u64 record_size;
}; };
/*
* This struct is used to read and configure l2 max evict_last
* setting.
*/
struct nvgpu_tsg_l2_max_ways_evict_last_args {
/*
* Maximum number of ways in a l2 cache set that can be allocated
* with eviction_policy=EVICT_LAST
*/
__u32 max_ways;
};
#define NVGPU_TSG_IOCTL_BIND_CHANNEL \ #define NVGPU_TSG_IOCTL_BIND_CHANNEL \
_IOW(NVGPU_TSG_IOCTL_MAGIC, 1, int) _IOW(NVGPU_TSG_IOCTL_MAGIC, 1, int)
#define NVGPU_TSG_IOCTL_UNBIND_CHANNEL \ #define NVGPU_TSG_IOCTL_UNBIND_CHANNEL \
@@ -95,10 +107,17 @@ struct nvgpu_tsg_read_single_sm_error_state_args {
#define NVGPU_TSG_IOCTL_READ_SINGLE_SM_ERROR_STATE \ #define NVGPU_TSG_IOCTL_READ_SINGLE_SM_ERROR_STATE \
_IOWR(NVGPU_TSG_IOCTL_MAGIC, 12, \ _IOWR(NVGPU_TSG_IOCTL_MAGIC, 12, \
struct nvgpu_tsg_read_single_sm_error_state_args) struct nvgpu_tsg_read_single_sm_error_state_args)
#define NVGPU_TSG_IOCTL_SET_L2_MAX_WAYS_EVICT_LAST \
_IOW(NVGPU_TSG_IOCTL_MAGIC, 13, \
struct nvgpu_tsg_l2_max_ways_evict_last_args)
#define NVGPU_TSG_IOCTL_GET_L2_MAX_WAYS_EVICT_LAST \
_IOR(NVGPU_TSG_IOCTL_MAGIC, 14, \
struct nvgpu_tsg_l2_max_ways_evict_last_args)
#define NVGPU_TSG_IOCTL_MAX_ARG_SIZE \ #define NVGPU_TSG_IOCTL_MAX_ARG_SIZE \
sizeof(struct nvgpu_tsg_bind_channel_ex_args) sizeof(struct nvgpu_tsg_bind_channel_ex_args)
#define NVGPU_TSG_IOCTL_LAST \ #define NVGPU_TSG_IOCTL_LAST \
_IOC_NR(NVGPU_TSG_IOCTL_READ_SINGLE_SM_ERROR_STATE) _IOC_NR(NVGPU_TSG_IOCTL_GET_L2_MAX_WAYS_EVICT_LAST)
/* /*
* /dev/nvhost-dbg-gpu device * /dev/nvhost-dbg-gpu device