nvidia-oot: port virtual oops storage driver

Using this patch we are porting virtual oops storage driver to OOT kernel. JIRA ESLC-7217 Bug 3961155 Change-Id: I60ba159b3a4662cf02d686a1916a110c5158901e Signed-off-by: Manish Bhardwaj <mbhardwaj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nv-oot/+/2849807 Reviewed-by: Suresh Venkatachalam <skathirampat@nvidia.com> Reviewed-by: Sachin Nikam <snikam@nvidia.com>
2025-12-22 09:11:26 +03:00 · 2023-01-27 18:11:59 +00:00
parent 4d73511481
commit 01a13440b4
4 changed files with 885 additions and 1 deletions
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -1,9 +1,10 @@
 # SPDX-License-Identifier: GPL-2.0
-# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.

 LINUXINCLUDE += -I$(srctree.nvidia-oot)/include

 obj-m += block/tegra_virt_storage/
+obj-m += block/tegra_oops_virt_storage/
 obj-m += c2c/
 obj-m += clink/
 obj-m += crypto/
--- a/drivers/block/tegra_oops_virt_storage/Makefile
+++ b/drivers/block/tegra_oops_virt_storage/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#
+# Makefile for Virtual Storage OOPS Driver
+#
+
+obj-m += tegra_hv_vblk_oops.o
--- a/drivers/block/tegra_oops_virt_storage/tegra_hv_vblk_oops.c
+++ b/drivers/block/tegra_oops_virt_storage/tegra_hv_vblk_oops.c
@@ -0,0 +1,798 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/kernel.h> /* printk() */
+#include <linux/pm.h>
+#include <linux/slab.h>   /* kmalloc() */
+#include <linux/fs.h>   /* everything... */
+#include <linux/errno.h> /* error codes */
+#include <linux/kdev_t.h>
+#include <linux/vmalloc.h>
+#include <linux/interrupt.h>
+#include <linux/version.h>
+#include <soc/tegra/fuse.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/delay.h>
+#include <asm/uaccess.h>
+#include <asm-generic/bug.h>
+#include <linux/version.h>
+#include <linux/kmsg_dump.h>
+#include <linux/pstore_zone.h>
+#include "tegra_vblk_oops.h"
+
+static struct vblk_dev *vblkdev_oops;
+static struct pstore_zone_info pstore_zone;
+
+/*avg. wait time for storage operations*/
+#define WAIT_FOR_EMMC_READ_MS			2
+#define WAIT_FOR_EMMC_WRITE_MS			3
+#define WAIT_FOR_UFS_READ_MS			3
+#define WAIT_FOR_UFS_WRITE_MS			5
+#define VSC_RESPONSE_RETRIES_AVG_MS		1
+#define VSC_RESPONSE_RETRIES_WORST_EMMC_MS	30000
+#define VSC_RESPONSE_RETRIES_WORST_UFS_MS	30000
+
+#define POPULATE_BLK_REQ(x, req_type, req_opr, opr_offset, num_of_blk, opr_data_offset) \
+do { \
+	x.type = req_type;\
+	x.blkdev_req.req_op = req_opr; \
+	x.blkdev_req.blk_req.blk_offset = opr_offset; \
+	x.blkdev_req.blk_req.num_blks = num_of_blk; \
+	x.blkdev_req.blk_req.data_offset = opr_data_offset; \
+} while (0)
+
+static int32_t wait_for_fops_completion(struct vblk_dev *vblkdev_oops, bool is_read)
+{
+	int32_t retry;
+	int32_t sleep;
+
+	/*
+	 * 1. wait for response from Storage Server with
+	 *    average weighted.
+	 * 2. If we did not get the response then we wait
+	 *    for the response from Storage Server for every
+	 *    1ms with wrost case scenario.
+	 * 3. Below data represents read and write operation.
+	 *    Time taken in worst case:-
+	 *      UFS :- 1.5 secs
+	 *      EMMC :- 2secs
+	 *
+	 *    Time taken in average time :-
+	 *            EMMC         UFS
+	 *    read    2ms          3ms
+	 *    write   3ms          5ms
+	 */
+	if (vblkdev_oops->config.phys_dev == VSC_DEV_EMMC) {
+		if (is_read)
+			sleep = WAIT_FOR_EMMC_READ_MS;
+		else
+			sleep = WAIT_FOR_EMMC_WRITE_MS;
+	} else if (vblkdev_oops->config.phys_dev == VSC_DEV_UFS) {
+		if (is_read)
+			sleep = WAIT_FOR_UFS_READ_MS;
+		else
+			sleep = WAIT_FOR_UFS_WRITE_MS;
+	} else {
+		dev_err(vblkdev_oops->device, "not supportted for QSPI device\n");
+		retry = -1;
+		return retry;
+	}
+
+	retry = VSC_RESPONSE_RETRIES_AVG_MS;
+	while (!tegra_hv_ivc_can_read(vblkdev_oops->ivck) && (retry--)) {
+		dev_dbg(vblkdev_oops->device, "Waiting for IVC response\n");
+		msleep(sleep);
+	}
+
+	if (retry == -1) {
+		if (vblkdev_oops->config.phys_dev == VSC_DEV_EMMC) {
+			retry = VSC_RESPONSE_RETRIES_WORST_EMMC_MS;
+		} else if (vblkdev_oops->config.phys_dev == VSC_DEV_UFS) {
+			retry = VSC_RESPONSE_RETRIES_WORST_UFS_MS;
+		} else {
+			dev_err(vblkdev_oops->device, "not supportted for QSPI device\n");
+			retry = -1;
+			return retry;
+		}
+
+		while (!tegra_hv_ivc_can_read(vblkdev_oops->ivck) && (retry--)) {
+			dev_dbg(vblkdev_oops->device, "Waiting for IVC response\n");
+			msleep(VSC_RESPONSE_WAIT_MS);
+		}
+	}
+
+	return retry;
+}
+
+static ssize_t vblk_oops_read(char *buf, size_t bytes, loff_t pos)
+{
+	struct vsc_request *vsc_req;
+	struct vs_request req_in;
+	struct vs_request req_out;
+	uint32_t blocks, block_pos;
+	uint32_t block_size = vblkdev_oops->config.blk_config.hardblk_size;
+	int32_t ret;
+
+	dev_dbg(vblkdev_oops->device, "%s> pos:%lld, bytes:%lu\n", __func__,
+		pos, bytes);
+
+	/*
+	 * We expect to be invoked in non-atomic context for read, but let's
+	 * make sure that's always the case.
+	 */
+	if (in_atomic()) {
+		dev_warn(vblkdev_oops->device,
+			"%s invoked in atomic context..aborting\n", __func__);
+		return -EBUSY;
+	}
+
+	/*
+	 * Read is always from the start of record which is block aligned, but
+	 * let's check just to be sure.
+	 */
+	if (pos & (block_size - 1)) {
+		dev_warn(vblkdev_oops->device, "Unaligned start address\n");
+		return -ENOMSG;
+	}
+
+	mutex_lock(&vblkdev_oops->ivc_lock);
+	vsc_req = &vblkdev_oops->reqs[VSC_REQ_RW];
+
+	block_pos = pos/block_size;
+	blocks = bytes/block_size;
+
+	/*
+	 * For non-block aligned read requests, we can read full block(s) and
+	 * return requested bytes.
+	 */
+	if (bytes & (block_size - 1))
+		blocks += 1;
+
+	POPULATE_BLK_REQ(req_in, VS_DATA_REQ, VS_BLK_READ, block_pos, blocks,
+			vsc_req->mempool_offset);
+
+	if (!tegra_hv_ivc_write(vblkdev_oops->ivck, &req_in,
+				sizeof(struct vs_request))) {
+		dev_err(vblkdev_oops->device,
+			"%s: IVC write failed!\n", __func__);
+		goto fail;
+	}
+
+	ret = wait_for_fops_completion(vblkdev_oops, true);
+	if (ret == (-1)) {
+		dev_err(vblkdev_oops->device,
+			"%s: No response from virtual storage!\n", __func__);
+		goto fail;
+	}
+
+	/* Copy the data and advance to next frame */
+	if ((tegra_hv_ivc_read(vblkdev_oops->ivck, &req_out,
+				sizeof(struct vs_request)) <= 0)) {
+		dev_err(vblkdev_oops->device,
+				"%s: IVC read failed!\n", __func__);
+		goto fail;
+	}
+
+	if (req_out.status != 0) {
+		dev_err(vblkdev_oops->device, "%s: IO request error = %d\n",
+				__func__, req_out.status);
+	}
+
+	memcpy(buf, vsc_req->mempool_virt, bytes);
+
+	mutex_unlock(&vblkdev_oops->ivc_lock);
+	return bytes;
+
+fail:
+	mutex_unlock(&vblkdev_oops->ivc_lock);
+	return -ENOMSG;
+}
+
+static ssize_t vblk_oops_write(const char *buf, size_t bytes,
+		loff_t pos)
+{
+	struct vsc_request *vsc_req;
+	struct vs_request req_in;
+	struct vs_request req_out;
+	uint32_t blocks, block_pos;
+	uint32_t block_size = vblkdev_oops->config.blk_config.hardblk_size;
+	int32_t ret;
+
+	dev_dbg(vblkdev_oops->device, "%s> pos:%lld, bytes:%lu\n", __func__,
+		pos, bytes);
+
+	/*
+	 * It is possible for write to be invoked from atomic context.  We
+	 * will return EBUSY so pstore_zone will attempt a retry from
+	 * workqueue later.
+	 */
+	if (in_atomic()) {
+		dev_warn(vblkdev_oops->device,
+			"%s invoked in atomic context..aborting\n", __func__);
+		return -EBUSY;
+	}
+
+	/*
+	 * If write position is misaligned with block size, return EBUSY so
+	 * pstore_zone will retry to flush all dirty records (record start
+	 * addresses are always block aligned).
+	 *
+	 * However, this is not expected to happen since pstore always writes
+	 * from the start address for record buffer (for KMSG atleast) && we
+	 * support only KMSG.
+	 */
+	if (pos & (block_size - 1)) {
+		dev_warn(vblkdev_oops->device, "Unaligned start address\n");
+		return -EBUSY;
+	}
+
+	if (!bytes)
+		return -ENOMSG;
+
+	mutex_lock(&vblkdev_oops->ivc_lock);
+	vsc_req = &vblkdev_oops->reqs[VSC_REQ_RW];
+
+	block_pos = pos/block_size;
+	blocks = bytes/block_size;
+
+	/*
+	 * Only need for unaligned size is when metadata is updated during
+	 * pstore erase operation.  It is OK in this case to round up size to
+	 * block boundary (corrupting remainder of the block).
+	 */
+	if (bytes & (block_size - 1))
+		blocks += 1;
+
+	POPULATE_BLK_REQ(req_in, VS_DATA_REQ, VS_BLK_WRITE, block_pos, blocks,
+			vsc_req->mempool_offset);
+
+	memcpy(vsc_req->mempool_virt, buf, bytes);
+
+	if (!tegra_hv_ivc_write(vblkdev_oops->ivck, &req_in,
+				sizeof(struct vs_request))) {
+		dev_err(vblkdev_oops->device,
+			"%s IVC write failed!\n", __func__);
+		goto fail;
+	}
+
+	ret = wait_for_fops_completion(vblkdev_oops, false);
+	if (ret == (-1)) {
+		dev_err(vblkdev_oops->device,
+			"%s: No response from virtual storage!\n", __func__);
+		goto fail;
+	}
+
+	/* Copy the data and advance to next frame */
+	if ((tegra_hv_ivc_read(vblkdev_oops->ivck, &req_out,
+				sizeof(struct vs_request)) <= 0)) {
+		dev_err(vblkdev_oops->device,
+				"%s: IVC read failed!!\n", __func__);
+		goto fail;
+	}
+
+	if (req_out.status != 0) {
+		dev_err(vblkdev_oops->device, "%s: IO request error = %d\n",
+				__func__, req_out.status);
+	}
+
+	mutex_unlock(&vblkdev_oops->ivc_lock);
+	return bytes;
+
+fail:
+	mutex_unlock(&vblkdev_oops->ivc_lock);
+	return -ENOMSG;
+}
+
+/*
+ * panic_write is going to mirror what regular write is going to do with some
+ * differences:
+ * - this is best effort service that can have no assumptions on system state
+ * - avoid locks since nobody is executing concurrently .and. system is going
+ *   to stop running soon
+ * - use VSC_REQ that is reserved for panic
+ * - no need to check for VSC response.  Send request and assume it is all good
+ *   since the caller is not going to do anything meaningful if we report error
+ */
+static ssize_t vblk_oops_panic_write(const char *buf, size_t bytes,
+		loff_t pos)
+{
+	struct vsc_request *vsc_req;
+	struct vs_request req_in;
+	uint32_t blocks, block_pos;
+	uint32_t block_size = vblkdev_oops->config.blk_config.hardblk_size;
+
+	dev_dbg(vblkdev_oops->device, "%s> pos:%lld, bytes:%lu\n", __func__,
+		pos, bytes);
+
+	/* Not expected to happen for KMSG */
+	if (pos & (block_size-1)) {
+		dev_warn(vblkdev_oops->device, "Unaligned start address\n");
+		return -ENOMSG;
+	}
+
+	if (!bytes)
+		return -ENOMSG;
+
+	vsc_req = &vblkdev_oops->reqs[VSC_REQ_PANIC];
+
+	block_pos = pos/block_size;
+	blocks = bytes/block_size;
+
+	/*
+	 * only need for unaligned size is when metadata is updated during
+	 * pstore erase operation.  It is OK in this case to round up size to
+	 * block boundary.
+	 *
+	 * For panic_write, however, we expect full records to be written which
+	 * means start offset and size are both block aligned.
+	 */
+	if (bytes & (block_size-1))
+		blocks += 1;
+
+	POPULATE_BLK_REQ(req_in, VS_DATA_REQ, VS_BLK_WRITE, block_pos, blocks,
+			vsc_req->mempool_offset);
+
+	memcpy(vsc_req->mempool_virt, buf, bytes);
+
+	/*
+	 * We are avoiding ivc_lock usage in this path since the assumption is
+	 * that in panic flow there is only a single thread/CPU executing which
+	 * is currently in vblk_oops_panic_write() and after this, the VM is
+	 * going to either reboot or die.  Once we get here,
+	 * vblk_oops_read()/vblk_oops_write() is not going to be invoked.
+	 *
+	 * There is potential for IVC corruption in the event that
+	 * vblk_oops_read()/vblk_oops_write() was accessing IVC when panic was
+	 * triggered (either as a part of vblk_* flow or outside of it). The
+	 * right way avoid corruption would be to use ivc_lock here but we
+	 * could potentially deadlock since vblk_oops_read()/vblk_oops_write()
+	 * won't be able to run to release the acquired ivc_lock.
+	 */
+	if (!tegra_hv_ivc_write(vblkdev_oops->ivck, &req_in,
+				sizeof(struct vs_request))) {
+		dev_err(vblkdev_oops->device,
+			"Request IVC write failed!\n");
+		return 0;
+	}
+
+	/*
+	 * VSC will respond at some point but we don't care about the response
+	 * since we cannot do anything new to recover/retry (if there is some
+	 * error). So we are not going to wait and check the response.
+	 *
+	 * Also, after panic_write is invoked,the VM is going to stop executing
+	 * and the only recovery out of this is a VM (or) tegra reboots.
+	 * In both the cases we reset IVC to get it to a clean state.
+	 */
+	return bytes;
+}
+
+/* Set up virtual device. */
+static void setup_device(struct vblk_dev *vblkdev)
+{
+	uint32_t max_io_bytes;
+	uint32_t req_id;
+	uint32_t max_requests;
+	struct vsc_request *req;
+
+	vblkdev->size =
+		vblkdev->config.blk_config.num_blks *
+			vblkdev->config.blk_config.hardblk_size;
+
+	mutex_init(&vblkdev->ivc_lock);
+
+	if (vblkdev->config.blk_config.max_read_blks_per_io !=
+		vblkdev->config.blk_config.max_write_blks_per_io) {
+		dev_err(vblkdev->device,
+			"Different read/write blks not supported!\n");
+		return;
+	}
+
+	/*
+	 * Set the maximum number of requests possible using
+	 * server returned information
+	 */
+	max_io_bytes = (vblkdev->config.blk_config.hardblk_size *
+			vblkdev->config.blk_config.max_read_blks_per_io);
+	if (max_io_bytes == 0) {
+		dev_err(vblkdev->device, "Maximum io bytes value is 0!\n");
+		return;
+	}
+
+	max_requests = ((vblkdev->ivmk->size) / max_io_bytes);
+
+	if (max_requests < MAX_OOPS_VSC_REQS) {
+		dev_err(vblkdev->device,
+			"Device needs to support %d concurrent requests\n",
+			MAX_OOPS_VSC_REQS);
+		return;
+	} else if (max_requests > MAX_OOPS_VSC_REQS) {
+		dev_warn(vblkdev->device,
+			"Only %d concurrent requests can be filed, consider reducing mempool size\n",
+			MAX_OOPS_VSC_REQS);
+		max_requests = MAX_OOPS_VSC_REQS;
+	}
+
+	/* if the number of ivc frames is lesser than th  maximum requests that
+	 * can be supported(calculated based on mempool size above), treat this
+	 * as critical error and panic.
+	 *
+	 *if (num_of_ivc_frames < max_supported_requests)
+	 *   PANIC
+	 * Ideally, these 2 should be equal for below reasons
+	 *   1. Each ivc frame is a request should have a backing data memory
+	 *      for transfers. So, number of requests supported by message
+	 *      request memory should be <= number of frames in
+	 *      IVC queue. The read/write logic depends on this.
+	 *   2. If number of requests supported by message request memory is
+	 *	more than IVC frame count, then thats a wastage of memory space
+	 *      and it introduces a race condition in submit_bio_req().
+	 *      The race condition happens when there is only one empty slot in
+	 *      IVC write queue and 2 threads enter submit_bio_req(). Both will
+	 *      compete for IVC write(After calling ivc_can_write) and one of
+	 *      the write will fail. But with vblk_get_req() this race can be
+	 *      avoided if num_of_ivc_frames >= max_supported_requests
+	 *      holds true.
+	 *
+	 *  In short, the optimal setting is when both of these are equal
+	 */
+	if (vblkdev->ivck->nframes < max_requests) {
+		/* Error if the virtual storage device supports
+		 * read, write and ioctl operations
+		 */
+		panic("hv_vblk: IVC Channel:%u IVC frames %d less than possible max requests %d!\n",
+			vblkdev->ivc_id, vblkdev->ivck->nframes,
+			max_requests);
+		return;
+	}
+
+	for (req_id = 0; req_id < max_requests; req_id++) {
+		req = &vblkdev->reqs[req_id];
+		req->mempool_virt = (void *)((uintptr_t)vblkdev->shared_buffer +
+			(uintptr_t)(req_id * max_io_bytes));
+		req->mempool_offset = (req_id * max_io_bytes);
+		req->mempool_len = max_io_bytes;
+		req->id = req_id;
+		req->vblkdev = vblkdev;
+	}
+
+	if (max_requests == 0) {
+		dev_err(vblkdev->device,
+			"maximum requests set to 0!\n");
+		return;
+	}
+
+	vblkdev->max_requests = max_requests;
+
+	if (!(vblkdev->config.blk_config.req_ops_supported &
+				VS_BLK_READ_ONLY_MASK)) {
+		dev_warn(vblkdev->device, "device partition is read-only ?!\n");
+	}
+
+	dev_dbg(vblkdev->device,
+		"Size: %lld B, blk_size: %d B, numblocks/IO: %d, maxio: %d B, max_req: %d, phys_dev: %s\n",
+		vblkdev->size, vblkdev->config.blk_config.hardblk_size,
+		vblkdev->config.blk_config.max_read_blks_per_io, max_io_bytes,
+		max_requests, ((vblkdev->config.phys_dev == VSC_DEV_EMMC)?"EMMC":"Other"));
+
+	/*
+	 * Ensure that the selected kmsg record size is a multiple of blk_size
+	 * and atleast one block size.
+	 */
+	if ((vblkdev->pstore_kmsg_size < vblkdev->config.blk_config.hardblk_size) ||
+		(vblkdev->pstore_kmsg_size & (vblkdev->config.blk_config.hardblk_size - 1))) {
+		dev_warn(vblkdev->device,
+			"Unsupported pstore_kmsg_size property, assuming %d bytes\n",
+			PSTORE_KMSG_RECORD_SIZE);
+		vblkdev->pstore_kmsg_size = PSTORE_KMSG_RECORD_SIZE;
+	}
+
+	/* Check if the storage is enough for aleast one kmsg record */
+	if (vblkdev->pstore_kmsg_size > vblkdev->size) {
+		dev_warn(vblkdev->device,
+			"pstore_kmsg_size cannot be greater than storage size, reducing to %llu bytes\n",
+			vblkdev->size);
+		vblkdev->pstore_kmsg_size = vblkdev->size;
+	}
+
+	/*
+	 * Allow only KMSG (PANIC/OOPS) since pstore_zone doesn't take care of
+	 * block restrictions for CONSOLE/FTRACE/PMSG during write (since we
+	 * have a block device that we are accessing directly without block layer
+	 * support, we cannot handle non-block aligned start offset/size).
+	 */
+	pstore_zone.name = OOPS_DRV_NAME;
+	pstore_zone.total_size = vblkdev->size;
+	pstore_zone.kmsg_size = vblkdev->pstore_kmsg_size;
+	pstore_zone.max_reason = vblkdev->pstore_max_reason;
+	pstore_zone.pmsg_size = 0;
+	pstore_zone.console_size = 0;
+	pstore_zone.ftrace_size = 0;
+	pstore_zone.read = vblk_oops_read;
+	pstore_zone.write = vblk_oops_write;
+	pstore_zone.panic_write = vblk_oops_panic_write;
+
+#if KERNEL_VERSION(5, 18, 0) > LINUX_VERSION_CODE
+	if (register_pstore_zone(&pstore_zone))
+		dev_err(vblkdev->device, "Could not register with pstore_zone\n");
+#endif
+}
+
+static int vblk_oops_send_config_cmd(struct vblk_dev *vblkdev)
+{
+	struct vs_request *vs_req;
+	int i = 0;
+
+	/* This while loop exits as long as the remote endpoint cooperates. */
+	if (tegra_hv_ivc_channel_notified(vblkdev->ivck) != 0) {
+		pr_notice("vblk: send_config wait for ivc channel reset\n");
+		while (tegra_hv_ivc_channel_notified(vblkdev->ivck) != 0) {
+			if (i++ > IVC_RESET_RETRIES) {
+				dev_err(vblkdev->device, "ivc reset timeout\n");
+				return -EIO;
+			}
+			set_current_state(TASK_INTERRUPTIBLE);
+			schedule_timeout(usecs_to_jiffies(1));
+		}
+	}
+	vs_req = (struct vs_request *)
+		tegra_hv_ivc_write_get_next_frame(vblkdev->ivck);
+	if (IS_ERR_OR_NULL(vs_req)) {
+		dev_err(vblkdev->device, "no empty frame for write\n");
+		return -EIO;
+	}
+
+	vs_req->type = VS_CONFIGINFO_REQ;
+
+	dev_info(vblkdev->device, "send config cmd to ivc #%d\n",
+		vblkdev->ivc_id);
+
+	if (tegra_hv_ivc_write_advance(vblkdev->ivck)) {
+		dev_err(vblkdev->device, "ivc write failed\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int vblk_oops_get_configinfo(struct vblk_dev *vblkdev)
+{
+	struct vs_request *req;
+	int32_t status;
+
+	dev_info(vblkdev->device, "get config data from ivc #%d\n",
+		vblkdev->ivc_id);
+
+	req = (struct vs_request *)
+		tegra_hv_ivc_read_get_next_frame(vblkdev->ivck);
+	if (IS_ERR_OR_NULL(req)) {
+		dev_err(vblkdev->device, "no empty frame for read\n");
+		return -EIO;
+	}
+
+	status = req->status;
+	vblkdev->config = req->config_info;
+
+	if (tegra_hv_ivc_read_advance(vblkdev->ivck)) {
+		dev_err(vblkdev->device, "ivc read failed\n");
+		return -EIO;
+	}
+
+	if (status != 0)
+		return -EINVAL;
+
+	if (vblkdev->config.type != VS_BLK_DEV) {
+		dev_err(vblkdev->device, "Non Blk dev config not supported!\n");
+		return -EINVAL;
+	}
+
+	if (vblkdev->config.blk_config.num_blks == 0) {
+		dev_err(vblkdev->device, "controller init failed\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void vblk_oops_init_device(struct work_struct *ws)
+{
+	struct vblk_dev *vblkdev = container_of(ws, struct vblk_dev, init.work);
+
+	dev_info(vblkdev->device, "%s: Check for IVC channel reset\n", __func__);
+
+	/* wait for ivc channel reset to finish */
+	if (tegra_hv_ivc_channel_notified(vblkdev->ivck) != 0) {
+		dev_warn(vblkdev->device,
+			"%s: IVC channel reset not complete...retry\n", __func__);
+		schedule_delayed_work(&vblkdev->init,
+					msecs_to_jiffies(VSC_RESPONSE_WAIT_MS));
+		return;
+	}
+
+	if (tegra_hv_ivc_can_read(vblkdev->ivck) && !vblkdev->initialized) {
+		if (vblk_oops_get_configinfo(vblkdev)) {
+			dev_err(vblkdev->device,
+				"unable to get configinfo, giving up\n");
+			return;
+		}
+		vblkdev->initialized = true;
+		setup_device(vblkdev);
+	}
+}
+
+static int tegra_hv_vblk_oops_probe(struct platform_device *pdev)
+{
+	static struct device_node *vblk_node;
+	struct device *dev = &pdev->dev;
+	int ret;
+	struct tegra_hv_ivm_cookie *ivmk;
+
+	if (!is_tegra_hypervisor_mode()) {
+		dev_err(dev, "Hypervisor is not present\n");
+		return -ENODEV;
+	}
+
+	vblk_node = dev->of_node;
+	if (vblk_node == NULL) {
+		dev_err(dev, "No of_node data\n");
+		return -ENODEV;
+	}
+
+	vblkdev_oops = devm_kzalloc(dev, sizeof(struct vblk_dev), GFP_KERNEL);
+	if (vblkdev_oops == NULL)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, vblkdev_oops);
+	vblkdev_oops->device = dev;
+
+	/* Get properties of instance and ivc channel id */
+	if (of_property_read_u32(vblk_node, "instance", &(vblkdev_oops->devnum))) {
+		dev_err(dev, "Failed to read instance property\n");
+		ret = -ENODEV;
+		goto fail;
+	} else {
+		if (of_property_read_u32_index(vblk_node, "ivc", 1,
+			&(vblkdev_oops->ivc_id))) {
+			dev_err(dev, "Failed to read ivc property\n");
+			ret = -ENODEV;
+			goto fail;
+		}
+		if (of_property_read_u32_index(vblk_node, "mempool", 0,
+			&(vblkdev_oops->ivm_id))) {
+			dev_err(dev, "Failed to read mempool property\n");
+			ret = -ENODEV;
+			goto fail;
+		}
+	}
+
+	if (of_property_read_u32(vblk_node, "pstore_max_reason",
+				&(vblkdev_oops->pstore_max_reason))) {
+		dev_warn(dev,
+			"Failed to read pstore_max_reason property, assuming %d\n",
+			KMSG_DUMP_OOPS);
+		vblkdev_oops->pstore_max_reason = KMSG_DUMP_OOPS;
+	} else if (vblkdev_oops->pstore_max_reason != KMSG_DUMP_OOPS) {
+		/* currently we support only KMSG_DUMP_OOPS */
+		dev_warn(dev, "Unsupported pstore_max_reason property, assuming %d\n",
+			KMSG_DUMP_OOPS);
+		vblkdev_oops->pstore_max_reason = KMSG_DUMP_OOPS;
+	}
+
+	if (of_property_read_u32(vblk_node, "pstore_kmsg_size",
+				&(vblkdev_oops->pstore_kmsg_size))) {
+		dev_warn(dev, "Failed to read pstore_kmsg_size property, assuming %d bytes\n",
+			PSTORE_KMSG_RECORD_SIZE);
+		vblkdev_oops->pstore_kmsg_size = PSTORE_KMSG_RECORD_SIZE;
+		/* defer alignment and minimum size check for later */
+	}
+
+	vblkdev_oops->ivck = tegra_hv_ivc_reserve(NULL, vblkdev_oops->ivc_id, NULL);
+	if (IS_ERR_OR_NULL(vblkdev_oops->ivck)) {
+		dev_err(dev, "Failed to reserve IVC channel %d\n",
+			vblkdev_oops->ivc_id);
+		vblkdev_oops->ivck = NULL;
+		ret = -ENODEV;
+		goto fail;
+	}
+
+	ivmk = tegra_hv_mempool_reserve(vblkdev_oops->ivm_id);
+	if (IS_ERR_OR_NULL(ivmk)) {
+		dev_err(dev, "Failed to reserve IVM channel %d\n",
+			vblkdev_oops->ivm_id);
+		ivmk = NULL;
+		ret = -ENODEV;
+		goto free_ivc;
+	}
+	vblkdev_oops->ivmk = ivmk;
+
+	vblkdev_oops->shared_buffer = devm_memremap(vblkdev_oops->device,
+			ivmk->ipa, ivmk->size, MEMREMAP_WB);
+	if (IS_ERR_OR_NULL(vblkdev_oops->shared_buffer)) {
+		dev_err(dev, "Failed to map mempool area %d\n",
+				vblkdev_oops->ivm_id);
+		ret = -ENOMEM;
+		goto free_mempool;
+	}
+
+	vblkdev_oops->initialized = false;
+
+	INIT_DELAYED_WORK(&vblkdev_oops->init, vblk_oops_init_device);
+
+	tegra_hv_ivc_channel_reset(vblkdev_oops->ivck);
+	if (vblk_oops_send_config_cmd(vblkdev_oops)) {
+		dev_err(dev, "Failed to send config cmd\n");
+		ret = -EACCES;
+		goto free_mempool;
+	}
+
+	/* postpone init work that needs response */
+	schedule_delayed_work(&vblkdev_oops->init,
+				msecs_to_jiffies(VSC_RESPONSE_WAIT_MS));
+
+	return 0;
+
+free_mempool:
+	tegra_hv_mempool_unreserve(vblkdev_oops->ivmk);
+
+free_ivc:
+	tegra_hv_ivc_unreserve(vblkdev_oops->ivck);
+
+fail:
+	return ret;
+}
+
+static int tegra_hv_vblk_oops_remove(struct platform_device *pdev)
+{
+	struct vblk_dev *vblkdev = platform_get_drvdata(pdev);
+
+	tegra_hv_ivc_unreserve(vblkdev->ivck);
+	tegra_hv_mempool_unreserve(vblkdev->ivmk);
+
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id tegra_hv_vblk_oops_match[] = {
+	{ .compatible = "nvidia,tegra-hv-oops-storage", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, tegra_hv_vblk_oops_match);
+#endif /* CONFIG_OF */
+
+static struct platform_driver tegra_hv_vblk_oops_driver = {
+	.probe	= tegra_hv_vblk_oops_probe,
+	.remove	= tegra_hv_vblk_oops_remove,
+	.driver	= {
+		.name = OOPS_DRV_NAME,
+		.owner = THIS_MODULE,
+		.of_match_table = of_match_ptr(tegra_hv_vblk_oops_match),
+	},
+};
+
+static int __init tegra_hv_vblk_driver_init(void)
+{
+	return platform_driver_register(&tegra_hv_vblk_oops_driver);
+}
+module_init(tegra_hv_vblk_driver_init);
+
+static void __exit tegra_hv_vblk_driver_exit(void)
+{
+	platform_driver_unregister(&tegra_hv_vblk_oops_driver);
+}
+module_exit(tegra_hv_vblk_driver_exit);
+
+MODULE_AUTHOR("Haribabu Narayanan <hnarayanan@nvidia.com>");
+MODULE_DESCRIPTION("Virtual OOPS storage device over Tegra Hypervisor IVC channel");
+MODULE_LICENSE("GPL");
+
--- a/drivers/block/tegra_oops_virt_storage/tegra_vblk_oops.h
+++ b/drivers/block/tegra_oops_virt_storage/tegra_vblk_oops.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ */
+
+#ifndef _TEGRA_VBLK_H_
+#define _TEGRA_VBLK_H_
+
+#include <linux/version.h>
+#if KERNEL_VERSION(5, 18, 0) > LINUX_VERSION_CODE
+#include <linux/genhd.h>
+#endif
+#include <linux/blkdev.h>
+#include <linux/bio.h>
+#include <soc/tegra/ivc-priv.h>
+#include <soc/tegra/ivc_ext.h>
+#include <soc/tegra/virt/hv-ivc.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
+#include <tegra_virt_storage_spec.h>
+
+#define OOPS_DRV_NAME "tegra_hv_vblk_oops"
+
+#define IVC_RESET_RETRIES	30
+#define VSC_RESPONSE_RETRIES	10
+
+/* one IVC for regular IO and one for panic write */
+#define VSC_REQ_RW 0
+#define VSC_REQ_PANIC (VSC_REQ_RW+1)
+#define MAX_OOPS_VSC_REQS (VSC_REQ_PANIC+1)
+
+/* wait time for response from VSC */
+#define VSC_RESPONSE_WAIT_MS 1
+
+/* PSTORE defaults */
+#define PSTORE_KMSG_RECORD_SIZE (64*1024)
+
+
+struct vsc_request {
+	struct vs_request vs_req;
+	struct request *req;
+	struct vblk_ioctl_req *ioctl_req;
+	void *mempool_virt;
+	uint32_t mempool_offset;
+	uint32_t mempool_len;
+	uint32_t id;
+	struct vblk_dev *vblkdev;
+	/* Scatter list for maping IOVA address */
+	struct scatterlist *sg_lst;
+	int sg_num_ents;
+};
+
+/*
+ * The drvdata of virtual device.
+ */
+struct vblk_dev {
+	struct vs_config_info config;
+	uint64_t size;                   /* Device size in bytes */
+	uint32_t ivc_id;
+	uint32_t ivm_id;
+	struct tegra_hv_ivc_cookie *ivck;
+	struct tegra_hv_ivm_cookie *ivmk;
+	uint32_t devnum;
+	bool initialized;
+	struct delayed_work init;
+	struct device *device;
+	void *shared_buffer;
+	struct vsc_request reqs[MAX_OOPS_VSC_REQS];
+	DECLARE_BITMAP(pending_reqs, MAX_OOPS_VSC_REQS);
+	uint32_t inflight_reqs;
+	uint32_t max_requests;
+	struct mutex ivc_lock;
+	int pstore_max_reason;		/* pstore max_reason */
+	uint32_t pstore_kmsg_size;	/* pstore kmsg record size */
+};
+
+#endif