mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: vgpu: ecc sysfs support for vgpu
- fetch ecc info from RM server and create sysfs nodes - new file ecc_vgpu.c for platform-independent code - add 2 new commands: GET_ECC_INFO and GET_ECC_COUNTER_VALUE JIRA EVLR-2590 Change-Id: I040a9fcd23326e432ca93e9a028319f9c1c570f0 Signed-off-by: Kyle Guo <kyleg@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1777428 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
91390d857f
commit
2a25d03f2b
@@ -245,6 +245,7 @@ nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \
|
||||
vgpu/dbg_vgpu.o \
|
||||
vgpu/tsg_vgpu.o \
|
||||
vgpu/css_vgpu.o \
|
||||
vgpu/ecc_vgpu.o \
|
||||
vgpu/gm20b/vgpu_gr_gm20b.o \
|
||||
vgpu/gp10b/vgpu_hal_gp10b.o \
|
||||
vgpu/gp10b/vgpu_gr_gp10b.o \
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#define __TEGRA_VGPU_H
|
||||
|
||||
#include <nvgpu/types.h>
|
||||
#include <nvgpu/ecc.h> /* For NVGPU_ECC_STAT_NAME_MAX_SIZE */
|
||||
|
||||
enum {
|
||||
TEGRA_VGPU_MODULE_GPU = 0,
|
||||
@@ -120,6 +121,8 @@ enum {
|
||||
TEGRA_VGPU_CMD_UPDATE_PC_SAMPLING = 81,
|
||||
TEGRA_VGPU_CMD_SUSPEND = 82,
|
||||
TEGRA_VGPU_CMD_RESUME = 83,
|
||||
TEGRA_VGPU_CMD_GET_ECC_INFO = 84,
|
||||
TEGRA_VGPU_CMD_GET_ECC_COUNTER_VALUE = 85,
|
||||
};
|
||||
|
||||
struct tegra_vgpu_connect_params {
|
||||
@@ -378,6 +381,20 @@ struct tegra_vgpu_channel_free_hwpm_ctx {
|
||||
u64 handle;
|
||||
};
|
||||
|
||||
struct tegra_vgpu_ecc_info_params {
|
||||
u32 ecc_stats_count;
|
||||
};
|
||||
|
||||
struct tegra_vgpu_ecc_info_entry {
|
||||
u32 ecc_id;
|
||||
char name[NVGPU_ECC_STAT_NAME_MAX_SIZE];
|
||||
};
|
||||
|
||||
struct tegra_vgpu_ecc_counter_params {
|
||||
u32 ecc_id;
|
||||
u32 value;
|
||||
};
|
||||
|
||||
struct tegra_vgpu_gr_ctx_params {
|
||||
u64 gr_ctx_handle;
|
||||
u64 as_handle;
|
||||
@@ -659,6 +676,8 @@ struct tegra_vgpu_cmd_msg {
|
||||
struct tegra_vgpu_map_syncpt_params map_syncpt;
|
||||
struct tegra_vgpu_tsg_bind_channel_ex_params tsg_bind_channel_ex;
|
||||
struct tegra_vgpu_channel_update_pc_sampling update_pc_sampling;
|
||||
struct tegra_vgpu_ecc_info_params ecc_info;
|
||||
struct tegra_vgpu_ecc_counter_params ecc_counter;
|
||||
char padding[192];
|
||||
} params;
|
||||
};
|
||||
|
||||
@@ -37,11 +37,14 @@ struct gk20a;
|
||||
struct vm_gk20a;
|
||||
struct nvgpu_gr_ctx;
|
||||
struct nvgpu_cpu_time_correlation_sample;
|
||||
struct vgpu_ecc_stat;
|
||||
|
||||
struct vgpu_priv_data {
|
||||
u64 virt_handle;
|
||||
struct nvgpu_thread intr_handler;
|
||||
struct tegra_vgpu_constants_params constants;
|
||||
struct vgpu_ecc_stat *ecc_stats;
|
||||
int ecc_stats_count;
|
||||
};
|
||||
|
||||
struct vgpu_priv_data *vgpu_get_priv_data(struct gk20a *g);
|
||||
|
||||
@@ -18,6 +18,8 @@
|
||||
#include <nvgpu/vgpu/vgpu.h>
|
||||
|
||||
#include "os/linux/platform_gk20a.h"
|
||||
#include "os/linux/os_linux.h"
|
||||
#include "vgpu/ecc_vgpu.h"
|
||||
|
||||
static ssize_t vgpu_load_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
@@ -38,13 +40,104 @@ static ssize_t vgpu_load_show(struct device *dev,
|
||||
}
|
||||
static DEVICE_ATTR(load, S_IRUGO, vgpu_load_show, NULL);
|
||||
|
||||
static ssize_t vgpu_ecc_stat_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
struct tegra_vgpu_cmd_msg msg = {0};
|
||||
struct tegra_vgpu_ecc_counter_params *p = &msg.params.ecc_counter;
|
||||
struct dev_ext_attribute *ext_attr = container_of(attr,
|
||||
struct dev_ext_attribute, attr);
|
||||
struct vgpu_ecc_stat *ecc_stat = ext_attr->var;
|
||||
int err;
|
||||
|
||||
p->ecc_id = ecc_stat->ecc_id;
|
||||
|
||||
msg.cmd = TEGRA_VGPU_CMD_GET_ECC_COUNTER_VALUE;
|
||||
msg.handle = vgpu_get_handle(g);
|
||||
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
|
||||
err = err ? err : msg.ret;
|
||||
if (unlikely(err)) {
|
||||
nvgpu_err(g, "ecc: cannot get ECC counter value: %d", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%u\n", p->value);
|
||||
}
|
||||
|
||||
static int vgpu_create_ecc_sysfs(struct device *dev)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
|
||||
struct vgpu_ecc_stat *stats;
|
||||
struct dev_ext_attribute *attrs;
|
||||
int err, i, count;
|
||||
|
||||
err = vgpu_ecc_get_info(g);
|
||||
if (unlikely(err)) {
|
||||
nvgpu_err(g, "ecc: cannot get ECC info: %d", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
stats = priv->ecc_stats;
|
||||
count = priv->ecc_stats_count;
|
||||
|
||||
attrs = nvgpu_kzalloc(g, count * sizeof(*attrs));
|
||||
if (unlikely(!attrs)) {
|
||||
nvgpu_err(g, "ecc: no memory");
|
||||
vgpu_ecc_remove_info(g);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
sysfs_attr_init(&attrs[i].attr);
|
||||
attrs[i].attr.attr.name = stats[i].name;
|
||||
attrs[i].attr.attr.mode = VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
|
||||
attrs[i].attr.show = vgpu_ecc_stat_show;
|
||||
attrs[i].attr.store = NULL;
|
||||
attrs[i].var = &stats[i];
|
||||
|
||||
err = device_create_file(dev, &attrs[i].attr);
|
||||
if (unlikely(err)) {
|
||||
nvgpu_warn(g, "ecc: cannot create file \"%s\": %d",
|
||||
stats[i].name, err);
|
||||
}
|
||||
}
|
||||
|
||||
l->ecc_attrs = attrs;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void vgpu_remove_ecc_sysfs(struct device *dev)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
|
||||
int i;
|
||||
|
||||
if (l->ecc_attrs) {
|
||||
for (i = 0; i < priv->ecc_stats_count; i++)
|
||||
device_remove_file(dev, &l->ecc_attrs[i].attr);
|
||||
|
||||
nvgpu_kfree(g, l->ecc_attrs);
|
||||
l->ecc_attrs = NULL;
|
||||
}
|
||||
|
||||
vgpu_ecc_remove_info(g);
|
||||
}
|
||||
|
||||
void vgpu_create_sysfs(struct device *dev)
|
||||
{
|
||||
if (device_create_file(dev, &dev_attr_load))
|
||||
dev_err(dev, "Failed to create vgpu sysfs attributes!\n");
|
||||
|
||||
vgpu_create_ecc_sysfs(dev);
|
||||
}
|
||||
|
||||
void vgpu_remove_sysfs(struct device *dev)
|
||||
{
|
||||
device_remove_file(dev, &dev_attr_load);
|
||||
vgpu_remove_ecc_sysfs(dev);
|
||||
}
|
||||
|
||||
92
drivers/gpu/nvgpu/vgpu/ecc_vgpu.c
Normal file
92
drivers/gpu/nvgpu/vgpu/ecc_vgpu.c
Normal file
@@ -0,0 +1,92 @@
|
||||
/*
|
||||
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/kmem.h>
|
||||
#include <nvgpu/vgpu/vgpu_ivc.h>
|
||||
#include <nvgpu/vgpu/vgpu.h>
|
||||
#include <nvgpu/errno.h>
|
||||
|
||||
#include "vgpu/ecc_vgpu.h"
|
||||
|
||||
int vgpu_ecc_get_info(struct gk20a *g)
|
||||
{
|
||||
struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
|
||||
struct tegra_vgpu_cmd_msg msg = {0};
|
||||
struct tegra_vgpu_ecc_info_params *p = &msg.params.ecc_info;
|
||||
struct tegra_vgpu_ecc_info_entry *entry;
|
||||
struct vgpu_ecc_stat *stats;
|
||||
void *handle;
|
||||
int err, i, count;
|
||||
size_t oob_size;
|
||||
|
||||
msg.cmd = TEGRA_VGPU_CMD_GET_ECC_INFO;
|
||||
msg.handle = vgpu_get_handle(g);
|
||||
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
|
||||
err = err ? err : msg.ret;
|
||||
if (unlikely(err)) {
|
||||
nvgpu_err(g, "vgpu get_ecc_info failed, err=%d", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
count = p->ecc_stats_count;
|
||||
|
||||
handle = vgpu_ivc_oob_get_ptr(vgpu_ivc_get_server_vmid(),
|
||||
TEGRA_VGPU_QUEUE_CMD,
|
||||
(void **)&entry, &oob_size);
|
||||
if (unlikely(!handle))
|
||||
return -EINVAL;
|
||||
|
||||
if (unlikely(oob_size < count * sizeof(*entry))) {
|
||||
err = -E2BIG;
|
||||
goto out;
|
||||
}
|
||||
|
||||
stats = nvgpu_kzalloc(g, count * sizeof(*stats));
|
||||
if (unlikely(!stats)) {
|
||||
err = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
stats[i].ecc_id = entry[i].ecc_id;
|
||||
strncpy(stats[i].name, entry[i].name,
|
||||
NVGPU_ECC_STAT_NAME_MAX_SIZE);
|
||||
}
|
||||
|
||||
priv->ecc_stats = stats;
|
||||
priv->ecc_stats_count = count;
|
||||
out:
|
||||
vgpu_ivc_oob_put_ptr(handle);
|
||||
return err;
|
||||
}
|
||||
|
||||
void vgpu_ecc_remove_info(struct gk20a *g)
|
||||
{
|
||||
struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
|
||||
|
||||
priv->ecc_stats_count = 0;
|
||||
|
||||
if (priv->ecc_stats) {
|
||||
nvgpu_kfree(g, priv->ecc_stats);
|
||||
priv->ecc_stats = NULL;
|
||||
}
|
||||
}
|
||||
39
drivers/gpu/nvgpu/vgpu/ecc_vgpu.h
Normal file
39
drivers/gpu/nvgpu/vgpu/ecc_vgpu.h
Normal file
@@ -0,0 +1,39 @@
|
||||
/*
|
||||
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _ECC_VGPU_H_
|
||||
#define _ECC_VGPU_H_
|
||||
|
||||
#include <nvgpu/types.h>
|
||||
#include <nvgpu/ecc.h> /* For NVGPU_ECC_STAT_NAME_MAX_SIZE */
|
||||
|
||||
struct gk20a;
|
||||
|
||||
struct vgpu_ecc_stat {
|
||||
u32 ecc_id;
|
||||
char name[NVGPU_ECC_STAT_NAME_MAX_SIZE + 1];
|
||||
};
|
||||
|
||||
int vgpu_ecc_get_info(struct gk20a *g);
|
||||
void vgpu_ecc_remove_info(struct gk20a *g);
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user