aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/host1x
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/host1x')
-rw-r--r--drivers/gpu/host1x/Makefile6
-rw-r--r--drivers/gpu/host1x/cdma.c43
-rw-r--r--drivers/gpu/host1x/channel.c8
-rw-r--r--drivers/gpu/host1x/context.c160
-rw-r--r--drivers/gpu/host1x/context.h38
-rw-r--r--drivers/gpu/host1x/context_bus.c5
-rw-r--r--drivers/gpu/host1x/dev.c124
-rw-r--r--drivers/gpu/host1x/dev.h13
-rw-r--r--drivers/gpu/host1x/hw/cdma_hw.c34
-rw-r--r--drivers/gpu/host1x/hw/channel_hw.c137
-rw-r--r--drivers/gpu/host1x/hw/host1x01_hardware.h114
-rw-r--r--drivers/gpu/host1x/hw/host1x02_hardware.h113
-rw-r--r--drivers/gpu/host1x/hw/host1x04_hardware.h113
-rw-r--r--drivers/gpu/host1x/hw/host1x05_hardware.h113
-rw-r--r--drivers/gpu/host1x/hw/host1x06_hardware.h118
-rw-r--r--drivers/gpu/host1x/hw/host1x07_hardware.h118
-rw-r--r--drivers/gpu/host1x/hw/host1x08.c33
-rw-r--r--drivers/gpu/host1x/hw/host1x08.h15
-rw-r--r--drivers/gpu/host1x/hw/host1x08_hardware.h21
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x08_channel.h11
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x08_common.h11
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h9
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x08_uclass.h181
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x08_vm.h36
-rw-r--r--drivers/gpu/host1x/hw/intr_hw.c11
-rw-r--r--drivers/gpu/host1x/hw/opcodes.h150
26 files changed, 956 insertions, 779 deletions
diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
index c891a3e33844..ee5286ffe08d 100644
--- a/drivers/gpu/host1x/Makefile
+++ b/drivers/gpu/host1x/Makefile
@@ -15,7 +15,11 @@ host1x-y = \
hw/host1x04.o \
hw/host1x05.o \
hw/host1x06.o \
- hw/host1x07.o
+ hw/host1x07.o \
+ hw/host1x08.o
+
+host1x-$(CONFIG_IOMMU_API) += \
+ context.o
obj-$(CONFIG_TEGRA_HOST1X) += host1x.o
obj-$(CONFIG_TEGRA_HOST1X_CONTEXT_BUS) += context_bus.o
diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index 765e5aa64eb6..103fda055394 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -457,9 +457,24 @@ syncpt_incr:
* to offset 0xbad. This does nothing but
* has a easily detected signature in debug
* traces.
+ *
+ * On systems with MLOCK enforcement enabled,
+ * the above 0 word writes would fall foul of
+ * the enforcement. As such, in the first slot
+ * put a RESTART_W opcode to the beginning
+ * of the next job. We don't use this for older
+ * chips since those only support the RESTART
+ * opcode with inconvenient alignment requirements.
*/
- mapped[2*slot+0] = 0x1bad0000;
- mapped[2*slot+1] = 0x1bad0000;
+ if (i == 0 && host1x->info->has_wide_gather) {
+ unsigned int next_job = (job->first_get/8 + job->num_slots)
+ % HOST1X_PUSHBUFFER_SLOTS;
+ mapped[2*slot+0] = (0xd << 28) | (next_job * 2);
+ mapped[2*slot+1] = 0x0;
+ } else {
+ mapped[2*slot+0] = 0x1bad0000;
+ mapped[2*slot+1] = 0x1bad0000;
+ }
}
job->cancelled = true;
@@ -600,8 +615,8 @@ void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 op1, u32 op2,
struct host1x_channel *channel = cdma_to_channel(cdma);
struct host1x *host1x = cdma_to_host1x(cdma);
struct push_buffer *pb = &cdma->push_buffer;
- unsigned int needed = 2, extra = 0, i;
unsigned int space = cdma->slots_free;
+ unsigned int needed = 2, extra = 0;
if (host1x_debug_trace_cmdbuf)
trace_host1x_cdma_push_wide(dev_name(channel->dev), op1, op2,
@@ -619,20 +634,14 @@ void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 op1, u32 op2,
cdma->slots_free = space - needed;
cdma->slots_used += needed;
- /*
- * Note that we rely on the fact that this is only used to submit wide
- * gather opcodes, which consist of 3 words, and they are padded with
- * a NOP to avoid having to deal with fractional slots (a slot always
- * represents 2 words). The fourth opcode passed to this function will
- * therefore always be a NOP.
- *
- * This works around a slight ambiguity when it comes to opcodes. For
- * all current host1x incarnations the NOP opcode uses the exact same
- * encoding (0x20000000), so we could hard-code the value here, but a
- * new incarnation may change it and break that assumption.
- */
- for (i = 0; i < extra; i++)
- host1x_pushbuffer_push(pb, op4, op4);
+ if (extra > 0) {
+ /*
+ * If there isn't enough space at the tail of the pushbuffer,
+ * insert a RESTART(0) here to go back to the beginning.
+ * The code above adjusted the indexes appropriately.
+ */
+ host1x_pushbuffer_push(pb, (0x5 << 28), 0xdead0000);
+ }
host1x_pushbuffer_push(pb, op1, op2);
host1x_pushbuffer_push(pb, op3, op4);
diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
index 2a9a3a8d5931..2d0051d6314c 100644
--- a/drivers/gpu/host1x/channel.c
+++ b/drivers/gpu/host1x/channel.c
@@ -21,22 +21,18 @@ int host1x_channel_list_init(struct host1x_channel_list *chlist,
if (!chlist->channels)
return -ENOMEM;
- chlist->allocated_channels =
- kcalloc(BITS_TO_LONGS(num_channels), sizeof(unsigned long),
- GFP_KERNEL);
+ chlist->allocated_channels = bitmap_zalloc(num_channels, GFP_KERNEL);
if (!chlist->allocated_channels) {
kfree(chlist->channels);
return -ENOMEM;
}
- bitmap_zero(chlist->allocated_channels, num_channels);
-
return 0;
}
void host1x_channel_list_free(struct host1x_channel_list *chlist)
{
- kfree(chlist->allocated_channels);
+ bitmap_free(chlist->allocated_channels);
kfree(chlist->channels);
}
diff --git a/drivers/gpu/host1x/context.c b/drivers/gpu/host1x/context.c
new file mode 100644
index 000000000000..b08cf11f9a66
--- /dev/null
+++ b/drivers/gpu/host1x/context.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, NVIDIA Corporation.
+ */
+
+#include <linux/device.h>
+#include <linux/kref.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/pid.h>
+#include <linux/slab.h>
+
+#include "context.h"
+#include "dev.h"
+
+int host1x_memory_context_list_init(struct host1x *host1x)
+{
+ struct host1x_memory_context_list *cdl = &host1x->context_list;
+ struct device_node *node = host1x->dev->of_node;
+ struct host1x_memory_context *ctx;
+ unsigned int i;
+ int err;
+
+ cdl->devs = NULL;
+ cdl->len = 0;
+ mutex_init(&cdl->lock);
+
+ err = of_property_count_u32_elems(node, "iommu-map");
+ if (err < 0)
+ return 0;
+
+ cdl->devs = kcalloc(err, sizeof(*cdl->devs), GFP_KERNEL);
+ if (!cdl->devs)
+ return -ENOMEM;
+ cdl->len = err / 4;
+
+ for (i = 0; i < cdl->len; i++) {
+ struct iommu_fwspec *fwspec;
+
+ ctx = &cdl->devs[i];
+
+ ctx->host = host1x;
+
+ device_initialize(&ctx->dev);
+
+ /*
+ * Due to an issue with T194 NVENC, only 38 bits can be used.
+ * Anyway, 256GiB of IOVA ought to be enough for anyone.
+ */
+ ctx->dma_mask = DMA_BIT_MASK(38);
+ ctx->dev.dma_mask = &ctx->dma_mask;
+ ctx->dev.coherent_dma_mask = ctx->dma_mask;
+ dev_set_name(&ctx->dev, "host1x-ctx.%d", i);
+ ctx->dev.bus = &host1x_context_device_bus_type;
+ ctx->dev.parent = host1x->dev;
+
+ dma_set_max_seg_size(&ctx->dev, UINT_MAX);
+
+ err = device_add(&ctx->dev);
+ if (err) {
+ dev_err(host1x->dev, "could not add context device %d: %d\n", i, err);
+ goto del_devices;
+ }
+
+ err = of_dma_configure_id(&ctx->dev, node, true, &i);
+ if (err) {
+ dev_err(host1x->dev, "IOMMU configuration failed for context device %d: %d\n",
+ i, err);
+ device_del(&ctx->dev);
+ goto del_devices;
+ }
+
+ fwspec = dev_iommu_fwspec_get(&ctx->dev);
+ if (!fwspec || !device_iommu_mapped(&ctx->dev)) {
+ dev_err(host1x->dev, "Context device %d has no IOMMU!\n", i);
+ device_del(&ctx->dev);
+ goto del_devices;
+ }
+
+ ctx->stream_id = fwspec->ids[0] & 0xffff;
+ }
+
+ return 0;
+
+del_devices:
+ while (i--)
+ device_del(&cdl->devs[i].dev);
+
+ kfree(cdl->devs);
+ cdl->len = 0;
+
+ return err;
+}
+
+void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl)
+{
+ unsigned int i;
+
+ for (i = 0; i < cdl->len; i++)
+ device_del(&cdl->devs[i].dev);
+
+ kfree(cdl->devs);
+ cdl->len = 0;
+}
+
+struct host1x_memory_context *host1x_memory_context_alloc(struct host1x *host1x,
+ struct pid *pid)
+{
+ struct host1x_memory_context_list *cdl = &host1x->context_list;
+ struct host1x_memory_context *free = NULL;
+ int i;
+
+ if (!cdl->len)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mutex_lock(&cdl->lock);
+
+ for (i = 0; i < cdl->len; i++) {
+ struct host1x_memory_context *cd = &cdl->devs[i];
+
+ if (cd->owner == pid) {
+ refcount_inc(&cd->ref);
+ mutex_unlock(&cdl->lock);
+ return cd;
+ } else if (!cd->owner && !free) {
+ free = cd;
+ }
+ }
+
+ if (!free) {
+ mutex_unlock(&cdl->lock);
+ return ERR_PTR(-EBUSY);
+ }
+
+ refcount_set(&free->ref, 1);
+ free->owner = get_pid(pid);
+
+ mutex_unlock(&cdl->lock);
+
+ return free;
+}
+EXPORT_SYMBOL_GPL(host1x_memory_context_alloc);
+
+void host1x_memory_context_get(struct host1x_memory_context *cd)
+{
+ refcount_inc(&cd->ref);
+}
+EXPORT_SYMBOL_GPL(host1x_memory_context_get);
+
+void host1x_memory_context_put(struct host1x_memory_context *cd)
+{
+ struct host1x_memory_context_list *cdl = &cd->host->context_list;
+
+ if (refcount_dec_and_mutex_lock(&cd->ref, &cdl->lock)) {
+ put_pid(cd->owner);
+ cd->owner = NULL;
+ mutex_unlock(&cdl->lock);
+ }
+}
+EXPORT_SYMBOL_GPL(host1x_memory_context_put);
diff --git a/drivers/gpu/host1x/context.h b/drivers/gpu/host1x/context.h
new file mode 100644
index 000000000000..3e03bc1d3bac
--- /dev/null
+++ b/drivers/gpu/host1x/context.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Host1x context devices
+ *
+ * Copyright (c) 2020, NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_CONTEXT_H
+#define __HOST1X_CONTEXT_H
+
+#include <linux/mutex.h>
+#include <linux/refcount.h>
+
+struct host1x;
+
+extern struct bus_type host1x_context_device_bus_type;
+
+struct host1x_memory_context_list {
+ struct mutex lock;
+ struct host1x_memory_context *devs;
+ unsigned int len;
+};
+
+#ifdef CONFIG_IOMMU_API
+int host1x_memory_context_list_init(struct host1x *host1x);
+void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl);
+#else
+static inline int host1x_memory_context_list_init(struct host1x *host1x)
+{
+ return 0;
+}
+
+static inline void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl)
+{
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/host1x/context_bus.c b/drivers/gpu/host1x/context_bus.c
index b0d35b2bbe89..d9421179d7b4 100644
--- a/drivers/gpu/host1x/context_bus.c
+++ b/drivers/gpu/host1x/context_bus.c
@@ -15,11 +15,6 @@ static int __init host1x_context_device_bus_init(void)
{
int err;
- if (!of_machine_is_compatible("nvidia,tegra186") &&
- !of_machine_is_compatible("nvidia,tegra194") &&
- !of_machine_is_compatible("nvidia,tegra234"))
- return 0;
-
err = bus_register(&host1x_context_device_bus_type);
if (err < 0) {
pr_err("bus type registration failed: %d\n", err);
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 80c685ab3e30..0cd3f97e7e49 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -28,6 +28,7 @@
#include "bus.h"
#include "channel.h"
+#include "context.h"
#include "debug.h"
#include "dev.h"
#include "intr.h"
@@ -38,6 +39,12 @@
#include "hw/host1x05.h"
#include "hw/host1x06.h"
#include "hw/host1x07.h"
+#include "hw/host1x08.h"
+
+void host1x_common_writel(struct host1x *host1x, u32 v, u32 r)
+{
+ writel(v, host1x->common_regs + r);
+}
void host1x_hypervisor_writel(struct host1x *host1x, u32 v, u32 r)
{
@@ -199,7 +206,48 @@ static const struct host1x_info host1x07_info = {
.reserve_vblank_syncpts = false,
};
+/*
+ * Tegra234 has two stream ID protection tables, one for setting stream IDs
+ * through the channel path via SETSTREAMID, and one for setting them via
+ * MMIO. We program each engine's data stream ID in the channel path table
+ * and firmware stream ID in the MMIO path table.
+ */
+static const struct host1x_sid_entry tegra234_sid_table[] = {
+ {
+ /* VIC channel */
+ .base = 0x17b8,
+ .offset = 0x30,
+ .limit = 0x30
+ },
+ {
+ /* VIC MMIO */
+ .base = 0x1688,
+ .offset = 0x34,
+ .limit = 0x34
+ },
+};
+
+static const struct host1x_info host1x08_info = {
+ .nb_channels = 63,
+ .nb_pts = 1024,
+ .nb_mlocks = 24,
+ .nb_bases = 0,
+ .init = host1x08_init,
+ .sync_offset = 0x0,
+ .dma_mask = DMA_BIT_MASK(40),
+ .has_wide_gather = true,
+ .has_hypervisor = true,
+ .has_common = true,
+ .num_sid_entries = ARRAY_SIZE(tegra234_sid_table),
+ .sid_table = tegra234_sid_table,
+ .streamid_vm_table = { 0x1004, 128 },
+ .classid_vm_table = { 0x1404, 25 },
+ .mmio_vm_table = { 0x1504, 25 },
+ .reserve_vblank_syncpts = false,
+};
+
static const struct of_device_id host1x_of_match[] = {
+ { .compatible = "nvidia,tegra234-host1x", .data = &host1x08_info, },
{ .compatible = "nvidia,tegra194-host1x", .data = &host1x07_info, },
{ .compatible = "nvidia,tegra186-host1x", .data = &host1x06_info, },
{ .compatible = "nvidia,tegra210-host1x", .data = &host1x05_info, },
@@ -211,7 +259,7 @@ static const struct of_device_id host1x_of_match[] = {
};
MODULE_DEVICE_TABLE(of, host1x_of_match);
-static void host1x_setup_sid_table(struct host1x *host)
+static void host1x_setup_virtualization_tables(struct host1x *host)
{
const struct host1x_info *info = host->info;
unsigned int i;
@@ -225,6 +273,21 @@ static void host1x_setup_sid_table(struct host1x *host)
host1x_hypervisor_writel(host, entry->offset, entry->base);
host1x_hypervisor_writel(host, entry->limit, entry->base + 4);
}
+
+ for (i = 0; i < info->streamid_vm_table.count; i++) {
+ /* Allow access to all stream IDs to all VMs. */
+ host1x_hypervisor_writel(host, 0xff, info->streamid_vm_table.base + 4 * i);
+ }
+
+ for (i = 0; i < info->classid_vm_table.count; i++) {
+ /* Allow access to all classes to all VMs. */
+ host1x_hypervisor_writel(host, 0xff, info->classid_vm_table.base + 4 * i);
+ }
+
+ for (i = 0; i < info->mmio_vm_table.count; i++) {
+ /* Use VM1 (that's us) as originator VMID for engine MMIO accesses. */
+ host1x_hypervisor_writel(host, 0x1, info->mmio_vm_table.base + 4 * i);
+ }
}
static bool host1x_wants_iommu(struct host1x *host1x)
@@ -402,16 +465,12 @@ static int host1x_get_resets(struct host1x *host)
return err;
}
- if (WARN_ON(!host->resets[1].rstc))
- return -ENOENT;
-
return 0;
}
static int host1x_probe(struct platform_device *pdev)
{
struct host1x *host;
- struct resource *regs, *hv_regs = NULL;
int syncpt_irq;
int err;
@@ -422,25 +481,23 @@ static int host1x_probe(struct platform_device *pdev)
host->info = of_device_get_match_data(&pdev->dev);
if (host->info->has_hypervisor) {
- regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "vm");
- if (!regs) {
- dev_err(&pdev->dev, "failed to get vm registers\n");
- return -ENXIO;
- }
+ host->regs = devm_platform_ioremap_resource_byname(pdev, "vm");
+ if (IS_ERR(host->regs))
+ return PTR_ERR(host->regs);
+
+ host->hv_regs = devm_platform_ioremap_resource_byname(pdev, "hypervisor");
+ if (IS_ERR(host->hv_regs))
+ return PTR_ERR(host->hv_regs);
- hv_regs = platform_get_resource_byname(pdev, IORESOURCE_MEM,
- "hypervisor");
- if (!hv_regs) {
- dev_err(&pdev->dev,
- "failed to get hypervisor registers\n");
- return -ENXIO;
+ if (host->info->has_common) {
+ host->common_regs = devm_platform_ioremap_resource_byname(pdev, "common");
+ if (IS_ERR(host->common_regs))
+ return PTR_ERR(host->common_regs);
}
} else {
- regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!regs) {
- dev_err(&pdev->dev, "failed to get registers\n");
- return -ENXIO;
- }
+ host->regs = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(host->regs))
+ return PTR_ERR(host->regs);
}
syncpt_irq = platform_get_irq(pdev, 0);
@@ -455,16 +512,6 @@ static int host1x_probe(struct platform_device *pdev)
/* set common host1x device data */
platform_set_drvdata(pdev, host);
- host->regs = devm_ioremap_resource(&pdev->dev, regs);
- if (IS_ERR(host->regs))
- return PTR_ERR(host->regs);
-
- if (host->info->has_hypervisor) {
- host->hv_regs = devm_ioremap_resource(&pdev->dev, hv_regs);
- if (IS_ERR(host->hv_regs))
- return PTR_ERR(host->hv_regs);
- }
-
host->dev->dma_parms = &host->dma_parms;
dma_set_max_seg_size(host->dev, UINT_MAX);
@@ -503,10 +550,16 @@ static int host1x_probe(struct platform_device *pdev)
goto iommu_exit;
}
+ err = host1x_memory_context_list_init(host);
+ if (err) {
+ dev_err(&pdev->dev, "failed to initialize context list\n");
+ goto free_channels;
+ }
+
err = host1x_syncpt_init(host);
if (err) {
dev_err(&pdev->dev, "failed to initialize syncpts\n");
- goto free_channels;
+ goto free_contexts;
}
err = host1x_intr_init(host, syncpt_irq);
@@ -550,6 +603,8 @@ pm_disable:
host1x_intr_deinit(host);
deinit_syncpt:
host1x_syncpt_deinit(host);
+free_contexts:
+ host1x_memory_context_list_free(&host->context_list);
free_channels:
host1x_channel_list_free(&host->channel_list);
iommu_exit:
@@ -571,6 +626,7 @@ static int host1x_remove(struct platform_device *pdev)
host1x_intr_deinit(host);
host1x_syncpt_deinit(host);
+ host1x_memory_context_list_free(&host->context_list);
host1x_channel_list_free(&host->channel_list);
host1x_iommu_exit(host);
host1x_bo_cache_destroy(&host->cache);
@@ -600,7 +656,7 @@ static int __maybe_unused host1x_runtime_suspend(struct device *dev)
return 0;
resume_host1x:
- host1x_setup_sid_table(host);
+ host1x_setup_virtualization_tables(host);
host1x_syncpt_restore(host);
host1x_intr_start(host);
@@ -630,7 +686,7 @@ static int __maybe_unused host1x_runtime_resume(struct device *dev)
goto disable_clk;
}
- host1x_setup_sid_table(host);
+ host1x_setup_virtualization_tables(host);
host1x_syncpt_restore(host);
host1x_intr_start(host);
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index ca4b082f0cd4..920e5548cfbc 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -14,6 +14,7 @@
#include "cdma.h"
#include "channel.h"
+#include "context.h"
#include "intr.h"
#include "job.h"
#include "syncpt.h"
@@ -89,6 +90,11 @@ struct host1x_sid_entry {
unsigned int limit;
};
+struct host1x_table_desc {
+ unsigned int base;
+ unsigned int count;
+};
+
struct host1x_info {
unsigned int nb_channels; /* host1x: number of channels supported */
unsigned int nb_pts; /* host1x: number of syncpoints supported */
@@ -99,8 +105,12 @@ struct host1x_info {
u64 dma_mask; /* mask of addressable memory */
bool has_wide_gather; /* supports GATHER_W opcode */
bool has_hypervisor; /* has hypervisor registers */
+ bool has_common; /* has common registers separate from hypervisor */
unsigned int num_sid_entries;
const struct host1x_sid_entry *sid_table;
+ struct host1x_table_desc streamid_vm_table;
+ struct host1x_table_desc classid_vm_table;
+ struct host1x_table_desc mmio_vm_table;
/*
* On T20-T148, the boot chain may setup DC to increment syncpoints
* 26/27 on VBLANK. As such we cannot use these syncpoints until
@@ -114,6 +124,7 @@ struct host1x {
void __iomem *regs;
void __iomem *hv_regs; /* hypervisor region */
+ void __iomem *common_regs;
struct host1x_syncpt *syncpt;
struct host1x_syncpt_base *bases;
struct device *dev;
@@ -141,6 +152,7 @@ struct host1x {
struct mutex syncpt_mutex;
struct host1x_channel_list channel_list;
+ struct host1x_memory_context_list context_list;
struct dentry *debugfs;
@@ -154,6 +166,7 @@ struct host1x {
struct host1x_bo_cache cache;
};
+void host1x_common_writel(struct host1x *host1x, u32 v, u32 r);
void host1x_hypervisor_writel(struct host1x *host1x, u32 r, u32 v);
u32 host1x_hypervisor_readl(struct host1x *host1x, u32 r);
void host1x_sync_writel(struct host1x *host1x, u32 r, u32 v);
diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c
index e49cd5b8f735..1b65a10b9dfc 100644
--- a/drivers/gpu/host1x/hw/cdma_hw.c
+++ b/drivers/gpu/host1x/hw/cdma_hw.c
@@ -238,6 +238,37 @@ static void cdma_resume(struct host1x_cdma *cdma, u32 getptr)
cdma_timeout_restart(cdma, getptr);
}
+static void timeout_release_mlock(struct host1x_cdma *cdma)
+{
+#if HOST1X_HW >= 8
+ /* Tegra186 and Tegra194 require a more complicated MLOCK release
+ * sequence. Furthermore, those chips by default don't enforce MLOCKs,
+ * so it turns out that if we don't /actually/ need MLOCKs, we can just
+ * ignore them.
+ *
+ * As such, for now just implement this on Tegra234 where things are
+ * stricter but also easy to implement.
+ */
+ struct host1x_channel *ch = cdma_to_channel(cdma);
+ struct host1x *host1x = cdma_to_host1x(cdma);
+ u32 offset;
+
+ switch (ch->client->class) {
+ case HOST1X_CLASS_VIC:
+ offset = HOST1X_COMMON_VIC_MLOCK;
+ break;
+ case HOST1X_CLASS_NVDEC:
+ offset = HOST1X_COMMON_NVDEC_MLOCK;
+ break;
+ default:
+ WARN(1, "%s was not updated for class %u", __func__, ch->client->class);
+ return;
+ }
+
+ host1x_common_writel(host1x, 0x0, offset);
+#endif
+}
+
/*
* If this timeout fires, it indicates the current sync_queue entry has
* exceeded its TTL and the userctx should be timed out and remaining
@@ -288,6 +319,9 @@ static void cdma_timeout_handler(struct work_struct *work)
/* stop HW, resetting channel/module */
host1x_hw_cdma_freeze(host1x, cdma);
+ /* release any held MLOCK */
+ timeout_release_mlock(cdma);
+
host1x_cdma_update_sync_queue(cdma, ch->dev);
mutex_unlock(&cdma->lock);
}
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 6b40e9af1e88..732abe0750ff 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -47,10 +47,41 @@ static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo,
}
}
-static void submit_wait(struct host1x_cdma *cdma, u32 id, u32 threshold,
+static void submit_wait(struct host1x_job *job, u32 id, u32 threshold,
u32 next_class)
{
-#if HOST1X_HW >= 2
+ struct host1x_cdma *cdma = &job->channel->cdma;
+
+#if HOST1X_HW >= 6
+ u32 stream_id;
+
+ /*
+ * If a memory context has been set, use it. Otherwise
+ * (if context isolation is disabled) use the engine's
+ * firmware stream ID.
+ */
+ if (job->memory_context)
+ stream_id = job->memory_context->stream_id;
+ else
+ stream_id = job->engine_fallback_streamid;
+
+ host1x_cdma_push_wide(cdma,
+ host1x_opcode_setclass(
+ HOST1X_CLASS_HOST1X,
+ HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32,
+ /* WAIT_SYNCPT_32 is at SYNCPT_PAYLOAD_32+2 */
+ BIT(0) | BIT(2)
+ ),
+ threshold,
+ id,
+ HOST1X_OPCODE_NOP
+ );
+ host1x_cdma_push_wide(&job->channel->cdma,
+ host1x_opcode_setclass(job->class, 0, 0),
+ host1x_opcode_setpayload(stream_id),
+ host1x_opcode_setstreamid(job->engine_streamid_offset / 4),
+ HOST1X_OPCODE_NOP);
+#elif HOST1X_HW >= 2
host1x_cdma_push_wide(cdma,
host1x_opcode_setclass(
HOST1X_CLASS_HOST1X,
@@ -97,7 +128,7 @@ static void submit_gathers(struct host1x_job *job, u32 job_syncpt_base)
else
threshold = cmd->wait.threshold;
- submit_wait(cdma, cmd->wait.id, threshold, cmd->wait.next_class);
+ submit_wait(job, cmd->wait.id, threshold, cmd->wait.next_class);
} else {
struct host1x_job_gather *g = &cmd->gather;
@@ -180,11 +211,77 @@ static void host1x_enable_gather_filter(struct host1x_channel *ch)
#endif
}
+static void channel_program_cdma(struct host1x_job *job)
+{
+ struct host1x_cdma *cdma = &job->channel->cdma;
+ struct host1x_syncpt *sp = job->syncpt;
+
+#if HOST1X_HW >= 6
+ u32 fence;
+
+ /* Enter engine class with invalid stream ID. */
+ host1x_cdma_push_wide(cdma,
+ host1x_opcode_acquire_mlock(job->class),
+ host1x_opcode_setclass(job->class, 0, 0),
+ host1x_opcode_setpayload(0),
+ host1x_opcode_setstreamid(job->engine_streamid_offset / 4));
+
+ /* Before switching stream ID to real stream ID, ensure engine is idle. */
+ fence = host1x_syncpt_incr_max(sp, 1);
+ host1x_cdma_push(&job->channel->cdma,
+ host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1),
+ HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) |
+ HOST1X_UCLASS_INCR_SYNCPT_COND_F(4));
+ submit_wait(job, job->syncpt->id, fence, job->class);
+
+ /* Submit work. */
+ job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs);
+ submit_gathers(job, job->syncpt_end - job->syncpt_incrs);
+
+ /* Before releasing MLOCK, ensure engine is idle again. */
+ fence = host1x_syncpt_incr_max(sp, 1);
+ host1x_cdma_push(&job->channel->cdma,
+ host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1),
+ HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) |
+ HOST1X_UCLASS_INCR_SYNCPT_COND_F(4));
+ submit_wait(job, job->syncpt->id, fence, job->class);
+
+ /* Release MLOCK. */
+ host1x_cdma_push(cdma,
+ HOST1X_OPCODE_NOP, host1x_opcode_release_mlock(job->class));
+#else
+ if (job->serialize) {
+ /*
+ * Force serialization by inserting a host wait for the
+ * previous job to finish before this one can commence.
+ */
+ host1x_cdma_push(cdma,
+ host1x_opcode_setclass(HOST1X_CLASS_HOST1X,
+ host1x_uclass_wait_syncpt_r(), 1),
+ host1x_class_host_wait_syncpt(job->syncpt->id,
+ host1x_syncpt_read_max(sp)));
+ }
+
+ /* Synchronize base register to allow using it for relative waiting */
+ if (sp->base)
+ synchronize_syncpt_base(job);
+
+ /* add a setclass for modules that require it */
+ if (job->class)
+ host1x_cdma_push(cdma,
+ host1x_opcode_setclass(job->class, 0, 0),
+ HOST1X_OPCODE_NOP);
+
+ job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs);
+
+ submit_gathers(job, job->syncpt_end - job->syncpt_incrs);
+#endif
+}
+
static int channel_submit(struct host1x_job *job)
{
struct host1x_channel *ch = job->channel;
struct host1x_syncpt *sp = job->syncpt;
- u32 user_syncpt_incrs = job->syncpt_incrs;
u32 prev_max = 0;
u32 syncval;
int err;
@@ -212,6 +309,7 @@ static int channel_submit(struct host1x_job *job)
host1x_channel_set_streamid(ch);
host1x_enable_gather_filter(ch);
+ host1x_hw_syncpt_assign_to_channel(host, sp, ch);
/* begin a CDMA submit */
err = host1x_cdma_begin(&ch->cdma, job);
@@ -220,35 +318,8 @@ static int channel_submit(struct host1x_job *job)
goto error;
}
- if (job->serialize) {
- /*
- * Force serialization by inserting a host wait for the
- * previous job to finish before this one can commence.
- */
- host1x_cdma_push(&ch->cdma,
- host1x_opcode_setclass(HOST1X_CLASS_HOST1X,
- host1x_uclass_wait_syncpt_r(), 1),
- host1x_class_host_wait_syncpt(job->syncpt->id,
- host1x_syncpt_read_max(sp)));
- }
-
- /* Synchronize base register to allow using it for relative waiting */
- if (sp->base)
- synchronize_syncpt_base(job);
-
- syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
-
- host1x_hw_syncpt_assign_to_channel(host, sp, ch);
-
- job->syncpt_end = syncval;
-
- /* add a setclass for modules that require it */
- if (job->class)
- host1x_cdma_push(&ch->cdma,
- host1x_opcode_setclass(job->class, 0, 0),
- HOST1X_OPCODE_NOP);
-
- submit_gathers(job, syncval - user_syncpt_incrs);
+ channel_program_cdma(job);
+ syncval = host1x_syncpt_read_max(sp);
/* end CDMA submit & stash pinned hMems into sync queue */
host1x_cdma_end(&ch->cdma, job);
diff --git a/drivers/gpu/host1x/hw/host1x01_hardware.h b/drivers/gpu/host1x/hw/host1x01_hardware.h
index fe59df1d3dc3..cb93d7c1808c 100644
--- a/drivers/gpu/host1x/hw/host1x01_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x01_hardware.h
@@ -15,118 +15,6 @@
#include "hw_host1x01_sync.h"
#include "hw_host1x01_uclass.h"
-static inline u32 host1x_class_host_wait_syncpt(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_wait_syncpt_indx_f(indx)
- | host1x_uclass_wait_syncpt_thresh_f(threshold);
-}
-
-static inline u32 host1x_class_host_load_syncpt_base(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_load_syncpt_base_base_indx_f(indx)
- | host1x_uclass_load_syncpt_base_value_f(threshold);
-}
-
-static inline u32 host1x_class_host_wait_syncpt_base(
- unsigned indx, unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_wait_syncpt_base_indx_f(indx)
- | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_wait_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt_base(
- unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_incr_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt(
- unsigned cond, unsigned indx)
-{
- return host1x_uclass_incr_syncpt_cond_f(cond)
- | host1x_uclass_incr_syncpt_indx_f(indx);
-}
-
-static inline u32 host1x_class_host_indoff_reg_write(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indbe_f(0xf)
- | host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset);
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-static inline u32 host1x_class_host_indoff_reg_read(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset)
- | host1x_uclass_indoff_rwn_read_v();
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-
-/* cdma opcodes */
-static inline u32 host1x_opcode_setclass(
- unsigned class_id, unsigned offset, unsigned mask)
-{
- return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
-}
-
-static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
-{
- return (1 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
-{
- return (2 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
-{
- return (3 << 28) | (offset << 16) | mask;
-}
-
-static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
-{
- return (4 << 28) | (offset << 16) | value;
-}
-
-static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
-{
- return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
- host1x_class_host_incr_syncpt(cond, indx));
-}
-
-static inline u32 host1x_opcode_restart(unsigned address)
-{
- return (5 << 28) | (address >> 4);
-}
-
-static inline u32 host1x_opcode_gather(unsigned count)
-{
- return (6 << 28) | count;
-}
-
-static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | count;
-}
-
-static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
-}
-
-#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+#include "opcodes.h"
#endif
diff --git a/drivers/gpu/host1x/hw/host1x02_hardware.h b/drivers/gpu/host1x/hw/host1x02_hardware.h
index af60d7fb016d..2d1282b9bc33 100644
--- a/drivers/gpu/host1x/hw/host1x02_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x02_hardware.h
@@ -15,117 +15,6 @@
#include "hw_host1x02_sync.h"
#include "hw_host1x02_uclass.h"
-static inline u32 host1x_class_host_wait_syncpt(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_wait_syncpt_indx_f(indx)
- | host1x_uclass_wait_syncpt_thresh_f(threshold);
-}
-
-static inline u32 host1x_class_host_load_syncpt_base(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_load_syncpt_base_base_indx_f(indx)
- | host1x_uclass_load_syncpt_base_value_f(threshold);
-}
-
-static inline u32 host1x_class_host_wait_syncpt_base(
- unsigned indx, unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_wait_syncpt_base_indx_f(indx)
- | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_wait_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt_base(
- unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_incr_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt(
- unsigned cond, unsigned indx)
-{
- return host1x_uclass_incr_syncpt_cond_f(cond)
- | host1x_uclass_incr_syncpt_indx_f(indx);
-}
-
-static inline u32 host1x_class_host_indoff_reg_write(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indbe_f(0xf)
- | host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset);
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-static inline u32 host1x_class_host_indoff_reg_read(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset)
- | host1x_uclass_indoff_rwn_read_v();
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-/* cdma opcodes */
-static inline u32 host1x_opcode_setclass(
- unsigned class_id, unsigned offset, unsigned mask)
-{
- return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
-}
-
-static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
-{
- return (1 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
-{
- return (2 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
-{
- return (3 << 28) | (offset << 16) | mask;
-}
-
-static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
-{
- return (4 << 28) | (offset << 16) | value;
-}
-
-static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
-{
- return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
- host1x_class_host_incr_syncpt(cond, indx));
-}
-
-static inline u32 host1x_opcode_restart(unsigned address)
-{
- return (5 << 28) | (address >> 4);
-}
-
-static inline u32 host1x_opcode_gather(unsigned count)
-{
- return (6 << 28) | count;
-}
-
-static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | count;
-}
-
-static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
-}
-
-#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+#include "opcodes.h"
#endif
diff --git a/drivers/gpu/host1x/hw/host1x04_hardware.h b/drivers/gpu/host1x/hw/host1x04_hardware.h
index 4f9bcddf27e3..84d244e8af30 100644
--- a/drivers/gpu/host1x/hw/host1x04_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x04_hardware.h
@@ -15,117 +15,6 @@
#include "hw_host1x04_sync.h"
#include "hw_host1x04_uclass.h"
-static inline u32 host1x_class_host_wait_syncpt(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_wait_syncpt_indx_f(indx)
- | host1x_uclass_wait_syncpt_thresh_f(threshold);
-}
-
-static inline u32 host1x_class_host_load_syncpt_base(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_load_syncpt_base_base_indx_f(indx)
- | host1x_uclass_load_syncpt_base_value_f(threshold);
-}
-
-static inline u32 host1x_class_host_wait_syncpt_base(
- unsigned indx, unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_wait_syncpt_base_indx_f(indx)
- | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_wait_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt_base(
- unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_incr_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt(
- unsigned cond, unsigned indx)
-{
- return host1x_uclass_incr_syncpt_cond_f(cond)
- | host1x_uclass_incr_syncpt_indx_f(indx);
-}
-
-static inline u32 host1x_class_host_indoff_reg_write(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indbe_f(0xf)
- | host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset);
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-static inline u32 host1x_class_host_indoff_reg_read(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset)
- | host1x_uclass_indoff_rwn_read_v();
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-/* cdma opcodes */
-static inline u32 host1x_opcode_setclass(
- unsigned class_id, unsigned offset, unsigned mask)
-{
- return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
-}
-
-static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
-{
- return (1 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
-{
- return (2 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
-{
- return (3 << 28) | (offset << 16) | mask;
-}
-
-static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
-{
- return (4 << 28) | (offset << 16) | value;
-}
-
-static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
-{
- return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
- host1x_class_host_incr_syncpt(cond, indx));
-}
-
-static inline u32 host1x_opcode_restart(unsigned address)
-{
- return (5 << 28) | (address >> 4);
-}
-
-static inline u32 host1x_opcode_gather(unsigned count)
-{
- return (6 << 28) | count;
-}
-
-static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | count;
-}
-
-static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
-}
-
-#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+#include "opcodes.h"
#endif
diff --git a/drivers/gpu/host1x/hw/host1x05_hardware.h b/drivers/gpu/host1x/hw/host1x05_hardware.h
index af3ab4b7f010..1dcde6ec7909 100644
--- a/drivers/gpu/host1x/hw/host1x05_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x05_hardware.h
@@ -15,117 +15,6 @@
#include "hw_host1x05_sync.h"
#include "hw_host1x05_uclass.h"
-static inline u32 host1x_class_host_wait_syncpt(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_wait_syncpt_indx_f(indx)
- | host1x_uclass_wait_syncpt_thresh_f(threshold);
-}
-
-static inline u32 host1x_class_host_load_syncpt_base(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_load_syncpt_base_base_indx_f(indx)
- | host1x_uclass_load_syncpt_base_value_f(threshold);
-}
-
-static inline u32 host1x_class_host_wait_syncpt_base(
- unsigned indx, unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_wait_syncpt_base_indx_f(indx)
- | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_wait_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt_base(
- unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_incr_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt(
- unsigned cond, unsigned indx)
-{
- return host1x_uclass_incr_syncpt_cond_f(cond)
- | host1x_uclass_incr_syncpt_indx_f(indx);
-}
-
-static inline u32 host1x_class_host_indoff_reg_write(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indbe_f(0xf)
- | host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset);
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-static inline u32 host1x_class_host_indoff_reg_read(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset)
- | host1x_uclass_indoff_rwn_read_v();
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-/* cdma opcodes */
-static inline u32 host1x_opcode_setclass(
- unsigned class_id, unsigned offset, unsigned mask)
-{
- return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
-}
-
-static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
-{
- return (1 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
-{
- return (2 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
-{
- return (3 << 28) | (offset << 16) | mask;
-}
-
-static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
-{
- return (4 << 28) | (offset << 16) | value;
-}
-
-static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
-{
- return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
- host1x_class_host_incr_syncpt(cond, indx));
-}
-
-static inline u32 host1x_opcode_restart(unsigned address)
-{
- return (5 << 28) | (address >> 4);
-}
-
-static inline u32 host1x_opcode_gather(unsigned count)
-{
- return (6 << 28) | count;
-}
-
-static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | count;
-}
-
-static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
-}
-
-#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+#include "opcodes.h"
#endif
diff --git a/drivers/gpu/host1x/hw/host1x06_hardware.h b/drivers/gpu/host1x/hw/host1x06_hardware.h
index 01a142a09800..c05cfa7e3090 100644
--- a/drivers/gpu/host1x/hw/host1x06_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x06_hardware.h
@@ -16,122 +16,6 @@
#include "hw_host1x06_vm.h"
#include "hw_host1x06_hypervisor.h"
-static inline u32 host1x_class_host_wait_syncpt(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_wait_syncpt_indx_f(indx)
- | host1x_uclass_wait_syncpt_thresh_f(threshold);
-}
-
-static inline u32 host1x_class_host_load_syncpt_base(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_load_syncpt_base_base_indx_f(indx)
- | host1x_uclass_load_syncpt_base_value_f(threshold);
-}
-
-static inline u32 host1x_class_host_wait_syncpt_base(
- unsigned indx, unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_wait_syncpt_base_indx_f(indx)
- | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_wait_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt_base(
- unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_incr_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt(
- unsigned cond, unsigned indx)
-{
- return host1x_uclass_incr_syncpt_cond_f(cond)
- | host1x_uclass_incr_syncpt_indx_f(indx);
-}
-
-static inline u32 host1x_class_host_indoff_reg_write(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indbe_f(0xf)
- | host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset);
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-static inline u32 host1x_class_host_indoff_reg_read(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset)
- | host1x_uclass_indoff_rwn_read_v();
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-/* cdma opcodes */
-static inline u32 host1x_opcode_setclass(
- unsigned class_id, unsigned offset, unsigned mask)
-{
- return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
-}
-
-static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
-{
- return (1 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
-{
- return (2 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
-{
- return (3 << 28) | (offset << 16) | mask;
-}
-
-static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
-{
- return (4 << 28) | (offset << 16) | value;
-}
-
-static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
-{
- return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
- host1x_class_host_incr_syncpt(cond, indx));
-}
-
-static inline u32 host1x_opcode_restart(unsigned address)
-{
- return (5 << 28) | (address >> 4);
-}
-
-static inline u32 host1x_opcode_gather(unsigned count)
-{
- return (6 << 28) | count;
-}
-
-static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | count;
-}
-
-static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
-}
-
-static inline u32 host1x_opcode_gather_wide(unsigned count)
-{
- return (12 << 28) | count;
-}
-
-#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+#include "opcodes.h"
#endif
diff --git a/drivers/gpu/host1x/hw/host1x07_hardware.h b/drivers/gpu/host1x/hw/host1x07_hardware.h
index e6582172ebfd..d67364e03956 100644
--- a/drivers/gpu/host1x/hw/host1x07_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x07_hardware.h
@@ -16,122 +16,6 @@
#include "hw_host1x07_vm.h"
#include "hw_host1x07_hypervisor.h"
-static inline u32 host1x_class_host_wait_syncpt(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_wait_syncpt_indx_f(indx)
- | host1x_uclass_wait_syncpt_thresh_f(threshold);
-}
-
-static inline u32 host1x_class_host_load_syncpt_base(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_load_syncpt_base_base_indx_f(indx)
- | host1x_uclass_load_syncpt_base_value_f(threshold);
-}
-
-static inline u32 host1x_class_host_wait_syncpt_base(
- unsigned indx, unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_wait_syncpt_base_indx_f(indx)
- | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_wait_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt_base(
- unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_incr_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt(
- unsigned cond, unsigned indx)
-{
- return host1x_uclass_incr_syncpt_cond_f(cond)
- | host1x_uclass_incr_syncpt_indx_f(indx);
-}
-
-static inline u32 host1x_class_host_indoff_reg_write(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indbe_f(0xf)
- | host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset);
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-static inline u32 host1x_class_host_indoff_reg_read(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset)
- | host1x_uclass_indoff_rwn_read_v();
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-/* cdma opcodes */
-static inline u32 host1x_opcode_setclass(
- unsigned class_id, unsigned offset, unsigned mask)
-{
- return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
-}
-
-static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
-{
- return (1 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
-{
- return (2 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
-{
- return (3 << 28) | (offset << 16) | mask;
-}
-
-static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
-{
- return (4 << 28) | (offset << 16) | value;
-}
-
-static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
-{
- return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
- host1x_class_host_incr_syncpt(cond, indx));
-}
-
-static inline u32 host1x_opcode_restart(unsigned address)
-{
- return (5 << 28) | (address >> 4);
-}
-
-static inline u32 host1x_opcode_gather(unsigned count)
-{
- return (6 << 28) | count;
-}
-
-static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | count;
-}
-
-static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
-}
-
-static inline u32 host1x_opcode_gather_wide(unsigned count)
-{
- return (12 << 28) | count;
-}
-
-#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+#include "opcodes.h"
#endif
diff --git a/drivers/gpu/host1x/hw/host1x08.c b/drivers/gpu/host1x/hw/host1x08.c
new file mode 100644
index 000000000000..754890c34c74
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x08.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Host1x init for Tegra234 SoCs
+ *
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+/* include hw specification */
+#include "host1x08.h"
+#include "host1x08_hardware.h"
+
+/* include code */
+#define HOST1X_HW 8
+
+#include "cdma_hw.c"
+#include "channel_hw.c"
+#include "debug_hw.c"
+#include "intr_hw.c"
+#include "syncpt_hw.c"
+
+#include "../dev.h"
+
+int host1x08_init(struct host1x *host)
+{
+ host->channel_op = &host1x_channel_ops;
+ host->cdma_op = &host1x_cdma_ops;
+ host->cdma_pb_op = &host1x_pushbuffer_ops;
+ host->syncpt_op = &host1x_syncpt_ops;
+ host->intr_op = &host1x_intr_ops;
+ host->debug_op = &host1x_debug_ops;
+
+ return 0;
+}
diff --git a/drivers/gpu/host1x/hw/host1x08.h b/drivers/gpu/host1x/hw/host1x08.h
new file mode 100644
index 000000000000..a6bad56e44cf
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x08.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Host1x init for Tegra234 SoCs
+ *
+ * Copyright (c) 2018 NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_HOST1X08_H
+#define HOST1X_HOST1X08_H
+
+struct host1x;
+
+int host1x08_init(struct host1x *host);
+
+#endif
diff --git a/drivers/gpu/host1x/hw/host1x08_hardware.h b/drivers/gpu/host1x/hw/host1x08_hardware.h
new file mode 100644
index 000000000000..936243060bff
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x08_hardware.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tegra host1x Register Offsets for Tegra234
+ *
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_HOST1X08_HARDWARE_H
+#define __HOST1X_HOST1X08_HARDWARE_H
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+#include "hw_host1x08_uclass.h"
+#include "hw_host1x08_vm.h"
+#include "hw_host1x08_hypervisor.h"
+#include "hw_host1x08_common.h"
+
+#include "opcodes.h"
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_channel.h b/drivers/gpu/host1x/hw/hw_host1x08_channel.h
new file mode 100644
index 000000000000..c9272d2ab14a
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x08_channel.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_HW_HOST1X08_CHANNEL_H
+#define HOST1X_HW_HOST1X08_CHANNEL_H
+
+#define HOST1X_CHANNEL_SMMU_STREAMID 0x084
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_common.h b/drivers/gpu/host1x/hw/hw_host1x08_common.h
new file mode 100644
index 000000000000..8e0c99150ec2
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x08_common.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#define HOST1X_COMMON_OFA_MLOCK 0x4050
+#define HOST1X_COMMON_NVJPG1_MLOCK 0x4070
+#define HOST1X_COMMON_VIC_MLOCK 0x4078
+#define HOST1X_COMMON_NVENC_MLOCK 0x407c
+#define HOST1X_COMMON_NVDEC_MLOCK 0x4080
+#define HOST1X_COMMON_NVJPG_MLOCK 0x4084
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h b/drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h
new file mode 100644
index 000000000000..22964324c914
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#define HOST1X_HV_SYNCPT_PROT_EN 0x1724
+#define HOST1X_HV_SYNCPT_PROT_EN_CH_EN BIT(1)
+#define HOST1X_HV_CH_MLOCK_EN(x) (0x1700 + (x * 4))
+#define HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(x) (0x1710 + (x * 4))
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_uclass.h b/drivers/gpu/host1x/hw/hw_host1x08_uclass.h
new file mode 100644
index 000000000000..724cccd71aa1
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x08_uclass.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2018 NVIDIA Corporation.
+ */
+
+ /*
+ * Function naming determines intended use:
+ *
+ * <x>_r(void) : Returns the offset for register <x>.
+ *
+ * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ * and masked to place it at field <y> of register <x>. This value
+ * can be |'d with others to produce a full register value for
+ * register <x>.
+ *
+ * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
+ * value can be ~'d and then &'d to clear the value of field <y> for
+ * register <x>.
+ *
+ * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ * to place it at field <y> of register <x>. This value can be |'d
+ * with others to produce a full register value for <x>.
+ *
+ * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ * <x> value 'r' after being shifted to place its LSB at bit 0.
+ * This value is suitable for direct comparison with other unshifted
+ * values appropriate for use in field <y> of register <x>.
+ *
+ * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ * field <y> of register <x>. This value is suitable for direct
+ * comparison with unshifted values appropriate for use in field <y>
+ * of register <x>.
+ */
+
+#ifndef HOST1X_HW_HOST1X08_UCLASS_H
+#define HOST1X_HW_HOST1X08_UCLASS_H
+
+static inline u32 host1x_uclass_incr_syncpt_r(void)
+{
+ return 0x0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT \
+ host1x_uclass_incr_syncpt_r()
+static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
+{
+ return (v & 0xff) << 10;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \
+ host1x_uclass_incr_syncpt_cond_f(v)
+static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
+{
+ return (v & 0xff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
+ host1x_uclass_incr_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_r(void)
+{
+ return 0x8;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT \
+ host1x_uclass_wait_syncpt_r()
+static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v)
+{
+ return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \
+ host1x_uclass_wait_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v)
+{
+ return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \
+ host1x_uclass_wait_syncpt_thresh_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_r(void)
+{
+ return 0x9;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \
+ host1x_uclass_wait_syncpt_base_r()
+static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v)
+{
+ return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \
+ host1x_uclass_wait_syncpt_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v)
+{
+ return (v & 0xff) << 16;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \
+ host1x_uclass_wait_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v)
+{
+ return (v & 0xffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \
+ host1x_uclass_wait_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_r(void)
+{
+ return 0xb;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \
+ host1x_uclass_load_syncpt_base_r()
+static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v)
+{
+ return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \
+ host1x_uclass_load_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v)
+{
+ return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \
+ host1x_uclass_load_syncpt_base_value_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v)
+{
+ return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \
+ host1x_uclass_incr_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v)
+{
+ return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \
+ host1x_uclass_incr_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_indoff_r(void)
+{
+ return 0x2d;
+}
+#define HOST1X_UCLASS_INDOFF \
+ host1x_uclass_indoff_r()
+static inline u32 host1x_uclass_indoff_indbe_f(u32 v)
+{
+ return (v & 0xf) << 28;
+}
+#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \
+ host1x_uclass_indoff_indbe_f(v)
+static inline u32 host1x_uclass_indoff_autoinc_f(u32 v)
+{
+ return (v & 0x1) << 27;
+}
+#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \
+ host1x_uclass_indoff_autoinc_f(v)
+static inline u32 host1x_uclass_indoff_indmodid_f(u32 v)
+{
+ return (v & 0xff) << 18;
+}
+#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \
+ host1x_uclass_indoff_indmodid_f(v)
+static inline u32 host1x_uclass_indoff_indroffset_f(u32 v)
+{
+ return (v & 0xffff) << 2;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+ host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_indoff_rwn_read_v(void)
+{
+ return 1;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+ host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
+{
+ return 0x4e;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
+ host1x_uclass_load_syncpt_payload_32_r()
+static inline u32 host1x_uclass_wait_syncpt_32_r(void)
+{
+ return 0x50;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
+ host1x_uclass_wait_syncpt_32_r()
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_vm.h b/drivers/gpu/host1x/hw/hw_host1x08_vm.h
new file mode 100644
index 000000000000..1455a4670bf8
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x08_vm.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#define HOST1X_CHANNEL_DMASTART 0x0000
+#define HOST1X_CHANNEL_DMASTART_HI 0x0004
+#define HOST1X_CHANNEL_DMAPUT 0x0008
+#define HOST1X_CHANNEL_DMAPUT_HI 0x000c
+#define HOST1X_CHANNEL_DMAGET 0x0010
+#define HOST1X_CHANNEL_DMAGET_HI 0x0014
+#define HOST1X_CHANNEL_DMAEND 0x0018
+#define HOST1X_CHANNEL_DMAEND_HI 0x001c
+#define HOST1X_CHANNEL_DMACTRL 0x0020
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP BIT(0)
+#define HOST1X_CHANNEL_DMACTRL_DMAGETRST BIT(1)
+#define HOST1X_CHANNEL_DMACTRL_DMAINITGET BIT(2)
+#define HOST1X_CHANNEL_CMDFIFO_STAT 0x0024
+#define HOST1X_CHANNEL_CMDFIFO_STAT_EMPTY BIT(13)
+#define HOST1X_CHANNEL_CMDFIFO_RDATA 0x0028
+#define HOST1X_CHANNEL_CMDP_OFFSET 0x0030
+#define HOST1X_CHANNEL_CMDP_CLASS 0x0034
+#define HOST1X_CHANNEL_CHANNELSTAT 0x0038
+#define HOST1X_CHANNEL_CMDPROC_STOP 0x0048
+#define HOST1X_CHANNEL_TEARDOWN 0x004c
+#define HOST1X_CHANNEL_SMMU_STREAMID 0x0084
+
+#define HOST1X_SYNC_SYNCPT_CPU_INCR(x) (0x6400 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(x) (0x6600 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_INTR_DEST(x) (0x6684 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(x) (0x770c + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(x) (0x7790 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT(x) (0x8080 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_INT_THRESH(x) (0xa088 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_CH_APP(x) (0xb090 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_CH_APP_CH(v) (((v) & 0x3f) << 8)
diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c
index f56375ee6e71..9acccdb139e6 100644
--- a/drivers/gpu/host1x/hw/intr_hw.c
+++ b/drivers/gpu/host1x/hw/intr_hw.c
@@ -76,6 +76,17 @@ static void intr_hw_init(struct host1x *host, u32 cpm)
/* update host clocks per usec */
host1x_sync_writel(host, cpm, HOST1X_SYNC_USEC_CLK);
#endif
+#if HOST1X_HW >= 8
+ u32 id;
+
+ /*
+ * Program threshold interrupt destination among 8 lines per VM,
+ * per syncpoint. For now, just direct all to the first interrupt
+ * line.
+ */
+ for (id = 0; id < host->info->nb_pts; id++)
+ host1x_sync_writel(host, 0, HOST1X_SYNC_SYNCPT_INTR_DEST(id));
+#endif
}
static int
diff --git a/drivers/gpu/host1x/hw/opcodes.h b/drivers/gpu/host1x/hw/opcodes.h
new file mode 100644
index 000000000000..649614499b04
--- /dev/null
+++ b/drivers/gpu/host1x/hw/opcodes.h
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tegra host1x opcodes
+ *
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_OPCODES_H
+#define __HOST1X_OPCODES_H
+
+#include <linux/types.h>
+
+static inline u32 host1x_class_host_wait_syncpt(
+ unsigned indx, unsigned threshold)
+{
+ return host1x_uclass_wait_syncpt_indx_f(indx)
+ | host1x_uclass_wait_syncpt_thresh_f(threshold);
+}
+
+static inline u32 host1x_class_host_load_syncpt_base(
+ unsigned indx, unsigned threshold)
+{
+ return host1x_uclass_load_syncpt_base_base_indx_f(indx)
+ | host1x_uclass_load_syncpt_base_value_f(threshold);
+}
+
+static inline u32 host1x_class_host_wait_syncpt_base(
+ unsigned indx, unsigned base_indx, unsigned offset)
+{
+ return host1x_uclass_wait_syncpt_base_indx_f(indx)
+ | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
+ | host1x_uclass_wait_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt_base(
+ unsigned base_indx, unsigned offset)
+{
+ return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
+ | host1x_uclass_incr_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt(
+ unsigned cond, unsigned indx)
+{
+ return host1x_uclass_incr_syncpt_cond_f(cond)
+ | host1x_uclass_incr_syncpt_indx_f(indx);
+}
+
+static inline u32 host1x_class_host_indoff_reg_write(
+ unsigned mod_id, unsigned offset, bool auto_inc)
+{
+ u32 v = host1x_uclass_indoff_indbe_f(0xf)
+ | host1x_uclass_indoff_indmodid_f(mod_id)
+ | host1x_uclass_indoff_indroffset_f(offset);
+ if (auto_inc)
+ v |= host1x_uclass_indoff_autoinc_f(1);
+ return v;
+}
+
+static inline u32 host1x_class_host_indoff_reg_read(
+ unsigned mod_id, unsigned offset, bool auto_inc)
+{
+ u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
+ | host1x_uclass_indoff_indroffset_f(offset)
+ | host1x_uclass_indoff_rwn_read_v();
+ if (auto_inc)
+ v |= host1x_uclass_indoff_autoinc_f(1);
+ return v;
+}
+
+static inline u32 host1x_opcode_setclass(
+ unsigned class_id, unsigned offset, unsigned mask)
+{
+ return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
+}
+
+static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
+{
+ return (1 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
+{
+ return (2 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
+{
+ return (3 << 28) | (offset << 16) | mask;
+}
+
+static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
+{
+ return (4 << 28) | (offset << 16) | value;
+}
+
+static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
+{
+ return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
+ host1x_class_host_incr_syncpt(cond, indx));
+}
+
+static inline u32 host1x_opcode_restart(unsigned address)
+{
+ return (5 << 28) | (address >> 4);
+}
+
+static inline u32 host1x_opcode_gather(unsigned count)
+{
+ return (6 << 28) | count;
+}
+
+static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count)
+{
+ return (6 << 28) | (offset << 16) | BIT(15) | count;
+}
+
+static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
+{
+ return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
+}
+
+static inline u32 host1x_opcode_setstreamid(unsigned streamid)
+{
+ return (7 << 28) | streamid;
+}
+
+static inline u32 host1x_opcode_setpayload(unsigned payload)
+{
+ return (9 << 28) | payload;
+}
+
+static inline u32 host1x_opcode_gather_wide(unsigned count)
+{
+ return (12 << 28) | count;
+}
+
+static inline u32 host1x_opcode_acquire_mlock(unsigned mlock)
+{
+ return (14 << 28) | (0 << 24) | mlock;
+}
+
+static inline u32 host1x_opcode_release_mlock(unsigned mlock)
+{
+ return (14 << 28) | (1 << 24) | mlock;
+}
+
+#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+
+#endif