mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 09:12:24 +03:00
gpu: nvgpu: vgpu: ecc sysfs support for vgpu
- fetch ecc info from RM server and create sysfs nodes - new file ecc_vgpu.c for platform-independent code - add 2 new commands: GET_ECC_INFO and GET_ECC_COUNTER_VALUE JIRA EVLR-2590 Change-Id: I040a9fcd23326e432ca93e9a028319f9c1c570f0 Signed-off-by: Kyle Guo <kyleg@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1777428 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
91390d857f
commit
2a25d03f2b
@@ -245,6 +245,7 @@ nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \
|
|||||||
vgpu/dbg_vgpu.o \
|
vgpu/dbg_vgpu.o \
|
||||||
vgpu/tsg_vgpu.o \
|
vgpu/tsg_vgpu.o \
|
||||||
vgpu/css_vgpu.o \
|
vgpu/css_vgpu.o \
|
||||||
|
vgpu/ecc_vgpu.o \
|
||||||
vgpu/gm20b/vgpu_gr_gm20b.o \
|
vgpu/gm20b/vgpu_gr_gm20b.o \
|
||||||
vgpu/gp10b/vgpu_hal_gp10b.o \
|
vgpu/gp10b/vgpu_hal_gp10b.o \
|
||||||
vgpu/gp10b/vgpu_gr_gp10b.o \
|
vgpu/gp10b/vgpu_gr_gp10b.o \
|
||||||
|
|||||||
@@ -26,6 +26,7 @@
|
|||||||
#define __TEGRA_VGPU_H
|
#define __TEGRA_VGPU_H
|
||||||
|
|
||||||
#include <nvgpu/types.h>
|
#include <nvgpu/types.h>
|
||||||
|
#include <nvgpu/ecc.h> /* For NVGPU_ECC_STAT_NAME_MAX_SIZE */
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
TEGRA_VGPU_MODULE_GPU = 0,
|
TEGRA_VGPU_MODULE_GPU = 0,
|
||||||
@@ -120,6 +121,8 @@ enum {
|
|||||||
TEGRA_VGPU_CMD_UPDATE_PC_SAMPLING = 81,
|
TEGRA_VGPU_CMD_UPDATE_PC_SAMPLING = 81,
|
||||||
TEGRA_VGPU_CMD_SUSPEND = 82,
|
TEGRA_VGPU_CMD_SUSPEND = 82,
|
||||||
TEGRA_VGPU_CMD_RESUME = 83,
|
TEGRA_VGPU_CMD_RESUME = 83,
|
||||||
|
TEGRA_VGPU_CMD_GET_ECC_INFO = 84,
|
||||||
|
TEGRA_VGPU_CMD_GET_ECC_COUNTER_VALUE = 85,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct tegra_vgpu_connect_params {
|
struct tegra_vgpu_connect_params {
|
||||||
@@ -378,6 +381,20 @@ struct tegra_vgpu_channel_free_hwpm_ctx {
|
|||||||
u64 handle;
|
u64 handle;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct tegra_vgpu_ecc_info_params {
|
||||||
|
u32 ecc_stats_count;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct tegra_vgpu_ecc_info_entry {
|
||||||
|
u32 ecc_id;
|
||||||
|
char name[NVGPU_ECC_STAT_NAME_MAX_SIZE];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct tegra_vgpu_ecc_counter_params {
|
||||||
|
u32 ecc_id;
|
||||||
|
u32 value;
|
||||||
|
};
|
||||||
|
|
||||||
struct tegra_vgpu_gr_ctx_params {
|
struct tegra_vgpu_gr_ctx_params {
|
||||||
u64 gr_ctx_handle;
|
u64 gr_ctx_handle;
|
||||||
u64 as_handle;
|
u64 as_handle;
|
||||||
@@ -659,6 +676,8 @@ struct tegra_vgpu_cmd_msg {
|
|||||||
struct tegra_vgpu_map_syncpt_params map_syncpt;
|
struct tegra_vgpu_map_syncpt_params map_syncpt;
|
||||||
struct tegra_vgpu_tsg_bind_channel_ex_params tsg_bind_channel_ex;
|
struct tegra_vgpu_tsg_bind_channel_ex_params tsg_bind_channel_ex;
|
||||||
struct tegra_vgpu_channel_update_pc_sampling update_pc_sampling;
|
struct tegra_vgpu_channel_update_pc_sampling update_pc_sampling;
|
||||||
|
struct tegra_vgpu_ecc_info_params ecc_info;
|
||||||
|
struct tegra_vgpu_ecc_counter_params ecc_counter;
|
||||||
char padding[192];
|
char padding[192];
|
||||||
} params;
|
} params;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -37,11 +37,14 @@ struct gk20a;
|
|||||||
struct vm_gk20a;
|
struct vm_gk20a;
|
||||||
struct nvgpu_gr_ctx;
|
struct nvgpu_gr_ctx;
|
||||||
struct nvgpu_cpu_time_correlation_sample;
|
struct nvgpu_cpu_time_correlation_sample;
|
||||||
|
struct vgpu_ecc_stat;
|
||||||
|
|
||||||
struct vgpu_priv_data {
|
struct vgpu_priv_data {
|
||||||
u64 virt_handle;
|
u64 virt_handle;
|
||||||
struct nvgpu_thread intr_handler;
|
struct nvgpu_thread intr_handler;
|
||||||
struct tegra_vgpu_constants_params constants;
|
struct tegra_vgpu_constants_params constants;
|
||||||
|
struct vgpu_ecc_stat *ecc_stats;
|
||||||
|
int ecc_stats_count;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct vgpu_priv_data *vgpu_get_priv_data(struct gk20a *g);
|
struct vgpu_priv_data *vgpu_get_priv_data(struct gk20a *g);
|
||||||
|
|||||||
@@ -18,6 +18,8 @@
|
|||||||
#include <nvgpu/vgpu/vgpu.h>
|
#include <nvgpu/vgpu/vgpu.h>
|
||||||
|
|
||||||
#include "os/linux/platform_gk20a.h"
|
#include "os/linux/platform_gk20a.h"
|
||||||
|
#include "os/linux/os_linux.h"
|
||||||
|
#include "vgpu/ecc_vgpu.h"
|
||||||
|
|
||||||
static ssize_t vgpu_load_show(struct device *dev,
|
static ssize_t vgpu_load_show(struct device *dev,
|
||||||
struct device_attribute *attr,
|
struct device_attribute *attr,
|
||||||
@@ -38,13 +40,104 @@ static ssize_t vgpu_load_show(struct device *dev,
|
|||||||
}
|
}
|
||||||
static DEVICE_ATTR(load, S_IRUGO, vgpu_load_show, NULL);
|
static DEVICE_ATTR(load, S_IRUGO, vgpu_load_show, NULL);
|
||||||
|
|
||||||
|
static ssize_t vgpu_ecc_stat_show(struct device *dev,
|
||||||
|
struct device_attribute *attr,
|
||||||
|
char *buf)
|
||||||
|
{
|
||||||
|
struct gk20a *g = get_gk20a(dev);
|
||||||
|
struct tegra_vgpu_cmd_msg msg = {0};
|
||||||
|
struct tegra_vgpu_ecc_counter_params *p = &msg.params.ecc_counter;
|
||||||
|
struct dev_ext_attribute *ext_attr = container_of(attr,
|
||||||
|
struct dev_ext_attribute, attr);
|
||||||
|
struct vgpu_ecc_stat *ecc_stat = ext_attr->var;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
p->ecc_id = ecc_stat->ecc_id;
|
||||||
|
|
||||||
|
msg.cmd = TEGRA_VGPU_CMD_GET_ECC_COUNTER_VALUE;
|
||||||
|
msg.handle = vgpu_get_handle(g);
|
||||||
|
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
|
||||||
|
err = err ? err : msg.ret;
|
||||||
|
if (unlikely(err)) {
|
||||||
|
nvgpu_err(g, "ecc: cannot get ECC counter value: %d", err);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
return snprintf(buf, PAGE_SIZE, "%u\n", p->value);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int vgpu_create_ecc_sysfs(struct device *dev)
|
||||||
|
{
|
||||||
|
struct gk20a *g = get_gk20a(dev);
|
||||||
|
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||||
|
struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
|
||||||
|
struct vgpu_ecc_stat *stats;
|
||||||
|
struct dev_ext_attribute *attrs;
|
||||||
|
int err, i, count;
|
||||||
|
|
||||||
|
err = vgpu_ecc_get_info(g);
|
||||||
|
if (unlikely(err)) {
|
||||||
|
nvgpu_err(g, "ecc: cannot get ECC info: %d", err);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
stats = priv->ecc_stats;
|
||||||
|
count = priv->ecc_stats_count;
|
||||||
|
|
||||||
|
attrs = nvgpu_kzalloc(g, count * sizeof(*attrs));
|
||||||
|
if (unlikely(!attrs)) {
|
||||||
|
nvgpu_err(g, "ecc: no memory");
|
||||||
|
vgpu_ecc_remove_info(g);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < count; i++) {
|
||||||
|
sysfs_attr_init(&attrs[i].attr);
|
||||||
|
attrs[i].attr.attr.name = stats[i].name;
|
||||||
|
attrs[i].attr.attr.mode = VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
|
||||||
|
attrs[i].attr.show = vgpu_ecc_stat_show;
|
||||||
|
attrs[i].attr.store = NULL;
|
||||||
|
attrs[i].var = &stats[i];
|
||||||
|
|
||||||
|
err = device_create_file(dev, &attrs[i].attr);
|
||||||
|
if (unlikely(err)) {
|
||||||
|
nvgpu_warn(g, "ecc: cannot create file \"%s\": %d",
|
||||||
|
stats[i].name, err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
l->ecc_attrs = attrs;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void vgpu_remove_ecc_sysfs(struct device *dev)
|
||||||
|
{
|
||||||
|
struct gk20a *g = get_gk20a(dev);
|
||||||
|
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||||
|
struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (l->ecc_attrs) {
|
||||||
|
for (i = 0; i < priv->ecc_stats_count; i++)
|
||||||
|
device_remove_file(dev, &l->ecc_attrs[i].attr);
|
||||||
|
|
||||||
|
nvgpu_kfree(g, l->ecc_attrs);
|
||||||
|
l->ecc_attrs = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
vgpu_ecc_remove_info(g);
|
||||||
|
}
|
||||||
|
|
||||||
void vgpu_create_sysfs(struct device *dev)
|
void vgpu_create_sysfs(struct device *dev)
|
||||||
{
|
{
|
||||||
if (device_create_file(dev, &dev_attr_load))
|
if (device_create_file(dev, &dev_attr_load))
|
||||||
dev_err(dev, "Failed to create vgpu sysfs attributes!\n");
|
dev_err(dev, "Failed to create vgpu sysfs attributes!\n");
|
||||||
|
|
||||||
|
vgpu_create_ecc_sysfs(dev);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vgpu_remove_sysfs(struct device *dev)
|
void vgpu_remove_sysfs(struct device *dev)
|
||||||
{
|
{
|
||||||
device_remove_file(dev, &dev_attr_load);
|
device_remove_file(dev, &dev_attr_load);
|
||||||
|
vgpu_remove_ecc_sysfs(dev);
|
||||||
}
|
}
|
||||||
|
|||||||
92
drivers/gpu/nvgpu/vgpu/ecc_vgpu.c
Normal file
92
drivers/gpu/nvgpu/vgpu/ecc_vgpu.c
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <nvgpu/kmem.h>
|
||||||
|
#include <nvgpu/vgpu/vgpu_ivc.h>
|
||||||
|
#include <nvgpu/vgpu/vgpu.h>
|
||||||
|
#include <nvgpu/errno.h>
|
||||||
|
|
||||||
|
#include "vgpu/ecc_vgpu.h"
|
||||||
|
|
||||||
|
int vgpu_ecc_get_info(struct gk20a *g)
|
||||||
|
{
|
||||||
|
struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
|
||||||
|
struct tegra_vgpu_cmd_msg msg = {0};
|
||||||
|
struct tegra_vgpu_ecc_info_params *p = &msg.params.ecc_info;
|
||||||
|
struct tegra_vgpu_ecc_info_entry *entry;
|
||||||
|
struct vgpu_ecc_stat *stats;
|
||||||
|
void *handle;
|
||||||
|
int err, i, count;
|
||||||
|
size_t oob_size;
|
||||||
|
|
||||||
|
msg.cmd = TEGRA_VGPU_CMD_GET_ECC_INFO;
|
||||||
|
msg.handle = vgpu_get_handle(g);
|
||||||
|
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
|
||||||
|
err = err ? err : msg.ret;
|
||||||
|
if (unlikely(err)) {
|
||||||
|
nvgpu_err(g, "vgpu get_ecc_info failed, err=%d", err);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
count = p->ecc_stats_count;
|
||||||
|
|
||||||
|
handle = vgpu_ivc_oob_get_ptr(vgpu_ivc_get_server_vmid(),
|
||||||
|
TEGRA_VGPU_QUEUE_CMD,
|
||||||
|
(void **)&entry, &oob_size);
|
||||||
|
if (unlikely(!handle))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (unlikely(oob_size < count * sizeof(*entry))) {
|
||||||
|
err = -E2BIG;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
stats = nvgpu_kzalloc(g, count * sizeof(*stats));
|
||||||
|
if (unlikely(!stats)) {
|
||||||
|
err = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < count; i++) {
|
||||||
|
stats[i].ecc_id = entry[i].ecc_id;
|
||||||
|
strncpy(stats[i].name, entry[i].name,
|
||||||
|
NVGPU_ECC_STAT_NAME_MAX_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
priv->ecc_stats = stats;
|
||||||
|
priv->ecc_stats_count = count;
|
||||||
|
out:
|
||||||
|
vgpu_ivc_oob_put_ptr(handle);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
void vgpu_ecc_remove_info(struct gk20a *g)
|
||||||
|
{
|
||||||
|
struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
|
||||||
|
|
||||||
|
priv->ecc_stats_count = 0;
|
||||||
|
|
||||||
|
if (priv->ecc_stats) {
|
||||||
|
nvgpu_kfree(g, priv->ecc_stats);
|
||||||
|
priv->ecc_stats = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
39
drivers/gpu/nvgpu/vgpu/ecc_vgpu.h
Normal file
39
drivers/gpu/nvgpu/vgpu/ecc_vgpu.h
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _ECC_VGPU_H_
|
||||||
|
#define _ECC_VGPU_H_
|
||||||
|
|
||||||
|
#include <nvgpu/types.h>
|
||||||
|
#include <nvgpu/ecc.h> /* For NVGPU_ECC_STAT_NAME_MAX_SIZE */
|
||||||
|
|
||||||
|
struct gk20a;
|
||||||
|
|
||||||
|
struct vgpu_ecc_stat {
|
||||||
|
u32 ecc_id;
|
||||||
|
char name[NVGPU_ECC_STAT_NAME_MAX_SIZE + 1];
|
||||||
|
};
|
||||||
|
|
||||||
|
int vgpu_ecc_get_info(struct gk20a *g);
|
||||||
|
void vgpu_ecc_remove_info(struct gk20a *g);
|
||||||
|
|
||||||
|
#endif
|
||||||
Reference in New Issue
Block a user