diff options
Diffstat (limited to 'drivers/gpu/host1x')
26 files changed, 956 insertions, 779 deletions
diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile index c891a3e33844..ee5286ffe08d 100644 --- a/drivers/gpu/host1x/Makefile +++ b/drivers/gpu/host1x/Makefile @@ -15,7 +15,11 @@ host1x-y = \ hw/host1x04.o \ hw/host1x05.o \ hw/host1x06.o \ - hw/host1x07.o + hw/host1x07.o \ + hw/host1x08.o + +host1x-$(CONFIG_IOMMU_API) += \ + context.o obj-$(CONFIG_TEGRA_HOST1X) += host1x.o obj-$(CONFIG_TEGRA_HOST1X_CONTEXT_BUS) += context_bus.o diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c index 765e5aa64eb6..103fda055394 100644 --- a/drivers/gpu/host1x/cdma.c +++ b/drivers/gpu/host1x/cdma.c @@ -457,9 +457,24 @@ syncpt_incr: * to offset 0xbad. This does nothing but * has a easily detected signature in debug * traces. + * + * On systems with MLOCK enforcement enabled, + * the above 0 word writes would fall foul of + * the enforcement. As such, in the first slot + * put a RESTART_W opcode to the beginning + * of the next job. We don't use this for older + * chips since those only support the RESTART + * opcode with inconvenient alignment requirements. */ - mapped[2*slot+0] = 0x1bad0000; - mapped[2*slot+1] = 0x1bad0000; + if (i == 0 && host1x->info->has_wide_gather) { + unsigned int next_job = (job->first_get/8 + job->num_slots) + % HOST1X_PUSHBUFFER_SLOTS; + mapped[2*slot+0] = (0xd << 28) | (next_job * 2); + mapped[2*slot+1] = 0x0; + } else { + mapped[2*slot+0] = 0x1bad0000; + mapped[2*slot+1] = 0x1bad0000; + } } job->cancelled = true; @@ -600,8 +615,8 @@ void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 op1, u32 op2, struct host1x_channel *channel = cdma_to_channel(cdma); struct host1x *host1x = cdma_to_host1x(cdma); struct push_buffer *pb = &cdma->push_buffer; - unsigned int needed = 2, extra = 0, i; unsigned int space = cdma->slots_free; + unsigned int needed = 2, extra = 0; if (host1x_debug_trace_cmdbuf) trace_host1x_cdma_push_wide(dev_name(channel->dev), op1, op2, @@ -619,20 +634,14 @@ void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 op1, u32 op2, cdma->slots_free = space - needed; cdma->slots_used += needed; - /* - * Note that we rely on the fact that this is only used to submit wide - * gather opcodes, which consist of 3 words, and they are padded with - * a NOP to avoid having to deal with fractional slots (a slot always - * represents 2 words). The fourth opcode passed to this function will - * therefore always be a NOP. - * - * This works around a slight ambiguity when it comes to opcodes. For - * all current host1x incarnations the NOP opcode uses the exact same - * encoding (0x20000000), so we could hard-code the value here, but a - * new incarnation may change it and break that assumption. - */ - for (i = 0; i < extra; i++) - host1x_pushbuffer_push(pb, op4, op4); + if (extra > 0) { + /* + * If there isn't enough space at the tail of the pushbuffer, + * insert a RESTART(0) here to go back to the beginning. + * The code above adjusted the indexes appropriately. + */ + host1x_pushbuffer_push(pb, (0x5 << 28), 0xdead0000); + } host1x_pushbuffer_push(pb, op1, op2); host1x_pushbuffer_push(pb, op3, op4); diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c index 2a9a3a8d5931..2d0051d6314c 100644 --- a/drivers/gpu/host1x/channel.c +++ b/drivers/gpu/host1x/channel.c @@ -21,22 +21,18 @@ int host1x_channel_list_init(struct host1x_channel_list *chlist, if (!chlist->channels) return -ENOMEM; - chlist->allocated_channels = - kcalloc(BITS_TO_LONGS(num_channels), sizeof(unsigned long), - GFP_KERNEL); + chlist->allocated_channels = bitmap_zalloc(num_channels, GFP_KERNEL); if (!chlist->allocated_channels) { kfree(chlist->channels); return -ENOMEM; } - bitmap_zero(chlist->allocated_channels, num_channels); - return 0; } void host1x_channel_list_free(struct host1x_channel_list *chlist) { - kfree(chlist->allocated_channels); + bitmap_free(chlist->allocated_channels); kfree(chlist->channels); } diff --git a/drivers/gpu/host1x/context.c b/drivers/gpu/host1x/context.c new file mode 100644 index 000000000000..b08cf11f9a66 --- /dev/null +++ b/drivers/gpu/host1x/context.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, NVIDIA Corporation. + */ + +#include <linux/device.h> +#include <linux/kref.h> +#include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/pid.h> +#include <linux/slab.h> + +#include "context.h" +#include "dev.h" + +int host1x_memory_context_list_init(struct host1x *host1x) +{ + struct host1x_memory_context_list *cdl = &host1x->context_list; + struct device_node *node = host1x->dev->of_node; + struct host1x_memory_context *ctx; + unsigned int i; + int err; + + cdl->devs = NULL; + cdl->len = 0; + mutex_init(&cdl->lock); + + err = of_property_count_u32_elems(node, "iommu-map"); + if (err < 0) + return 0; + + cdl->devs = kcalloc(err, sizeof(*cdl->devs), GFP_KERNEL); + if (!cdl->devs) + return -ENOMEM; + cdl->len = err / 4; + + for (i = 0; i < cdl->len; i++) { + struct iommu_fwspec *fwspec; + + ctx = &cdl->devs[i]; + + ctx->host = host1x; + + device_initialize(&ctx->dev); + + /* + * Due to an issue with T194 NVENC, only 38 bits can be used. + * Anyway, 256GiB of IOVA ought to be enough for anyone. + */ + ctx->dma_mask = DMA_BIT_MASK(38); + ctx->dev.dma_mask = &ctx->dma_mask; + ctx->dev.coherent_dma_mask = ctx->dma_mask; + dev_set_name(&ctx->dev, "host1x-ctx.%d", i); + ctx->dev.bus = &host1x_context_device_bus_type; + ctx->dev.parent = host1x->dev; + + dma_set_max_seg_size(&ctx->dev, UINT_MAX); + + err = device_add(&ctx->dev); + if (err) { + dev_err(host1x->dev, "could not add context device %d: %d\n", i, err); + goto del_devices; + } + + err = of_dma_configure_id(&ctx->dev, node, true, &i); + if (err) { + dev_err(host1x->dev, "IOMMU configuration failed for context device %d: %d\n", + i, err); + device_del(&ctx->dev); + goto del_devices; + } + + fwspec = dev_iommu_fwspec_get(&ctx->dev); + if (!fwspec || !device_iommu_mapped(&ctx->dev)) { + dev_err(host1x->dev, "Context device %d has no IOMMU!\n", i); + device_del(&ctx->dev); + goto del_devices; + } + + ctx->stream_id = fwspec->ids[0] & 0xffff; + } + + return 0; + +del_devices: + while (i--) + device_del(&cdl->devs[i].dev); + + kfree(cdl->devs); + cdl->len = 0; + + return err; +} + +void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl) +{ + unsigned int i; + + for (i = 0; i < cdl->len; i++) + device_del(&cdl->devs[i].dev); + + kfree(cdl->devs); + cdl->len = 0; +} + +struct host1x_memory_context *host1x_memory_context_alloc(struct host1x *host1x, + struct pid *pid) +{ + struct host1x_memory_context_list *cdl = &host1x->context_list; + struct host1x_memory_context *free = NULL; + int i; + + if (!cdl->len) + return ERR_PTR(-EOPNOTSUPP); + + mutex_lock(&cdl->lock); + + for (i = 0; i < cdl->len; i++) { + struct host1x_memory_context *cd = &cdl->devs[i]; + + if (cd->owner == pid) { + refcount_inc(&cd->ref); + mutex_unlock(&cdl->lock); + return cd; + } else if (!cd->owner && !free) { + free = cd; + } + } + + if (!free) { + mutex_unlock(&cdl->lock); + return ERR_PTR(-EBUSY); + } + + refcount_set(&free->ref, 1); + free->owner = get_pid(pid); + + mutex_unlock(&cdl->lock); + + return free; +} +EXPORT_SYMBOL_GPL(host1x_memory_context_alloc); + +void host1x_memory_context_get(struct host1x_memory_context *cd) +{ + refcount_inc(&cd->ref); +} +EXPORT_SYMBOL_GPL(host1x_memory_context_get); + +void host1x_memory_context_put(struct host1x_memory_context *cd) +{ + struct host1x_memory_context_list *cdl = &cd->host->context_list; + + if (refcount_dec_and_mutex_lock(&cd->ref, &cdl->lock)) { + put_pid(cd->owner); + cd->owner = NULL; + mutex_unlock(&cdl->lock); + } +} +EXPORT_SYMBOL_GPL(host1x_memory_context_put); diff --git a/drivers/gpu/host1x/context.h b/drivers/gpu/host1x/context.h new file mode 100644 index 000000000000..3e03bc1d3bac --- /dev/null +++ b/drivers/gpu/host1x/context.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Host1x context devices + * + * Copyright (c) 2020, NVIDIA Corporation. + */ + +#ifndef __HOST1X_CONTEXT_H +#define __HOST1X_CONTEXT_H + +#include <linux/mutex.h> +#include <linux/refcount.h> + +struct host1x; + +extern struct bus_type host1x_context_device_bus_type; + +struct host1x_memory_context_list { + struct mutex lock; + struct host1x_memory_context *devs; + unsigned int len; +}; + +#ifdef CONFIG_IOMMU_API +int host1x_memory_context_list_init(struct host1x *host1x); +void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl); +#else +static inline int host1x_memory_context_list_init(struct host1x *host1x) +{ + return 0; +} + +static inline void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl) +{ +} +#endif + +#endif diff --git a/drivers/gpu/host1x/context_bus.c b/drivers/gpu/host1x/context_bus.c index b0d35b2bbe89..d9421179d7b4 100644 --- a/drivers/gpu/host1x/context_bus.c +++ b/drivers/gpu/host1x/context_bus.c @@ -15,11 +15,6 @@ static int __init host1x_context_device_bus_init(void) { int err; - if (!of_machine_is_compatible("nvidia,tegra186") && - !of_machine_is_compatible("nvidia,tegra194") && - !of_machine_is_compatible("nvidia,tegra234")) - return 0; - err = bus_register(&host1x_context_device_bus_type); if (err < 0) { pr_err("bus type registration failed: %d\n", err); diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 80c685ab3e30..0cd3f97e7e49 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -28,6 +28,7 @@ #include "bus.h" #include "channel.h" +#include "context.h" #include "debug.h" #include "dev.h" #include "intr.h" @@ -38,6 +39,12 @@ #include "hw/host1x05.h" #include "hw/host1x06.h" #include "hw/host1x07.h" +#include "hw/host1x08.h" + +void host1x_common_writel(struct host1x *host1x, u32 v, u32 r) +{ + writel(v, host1x->common_regs + r); +} void host1x_hypervisor_writel(struct host1x *host1x, u32 v, u32 r) { @@ -199,7 +206,48 @@ static const struct host1x_info host1x07_info = { .reserve_vblank_syncpts = false, }; +/* + * Tegra234 has two stream ID protection tables, one for setting stream IDs + * through the channel path via SETSTREAMID, and one for setting them via + * MMIO. We program each engine's data stream ID in the channel path table + * and firmware stream ID in the MMIO path table. + */ +static const struct host1x_sid_entry tegra234_sid_table[] = { + { + /* VIC channel */ + .base = 0x17b8, + .offset = 0x30, + .limit = 0x30 + }, + { + /* VIC MMIO */ + .base = 0x1688, + .offset = 0x34, + .limit = 0x34 + }, +}; + +static const struct host1x_info host1x08_info = { + .nb_channels = 63, + .nb_pts = 1024, + .nb_mlocks = 24, + .nb_bases = 0, + .init = host1x08_init, + .sync_offset = 0x0, + .dma_mask = DMA_BIT_MASK(40), + .has_wide_gather = true, + .has_hypervisor = true, + .has_common = true, + .num_sid_entries = ARRAY_SIZE(tegra234_sid_table), + .sid_table = tegra234_sid_table, + .streamid_vm_table = { 0x1004, 128 }, + .classid_vm_table = { 0x1404, 25 }, + .mmio_vm_table = { 0x1504, 25 }, + .reserve_vblank_syncpts = false, +}; + static const struct of_device_id host1x_of_match[] = { + { .compatible = "nvidia,tegra234-host1x", .data = &host1x08_info, }, { .compatible = "nvidia,tegra194-host1x", .data = &host1x07_info, }, { .compatible = "nvidia,tegra186-host1x", .data = &host1x06_info, }, { .compatible = "nvidia,tegra210-host1x", .data = &host1x05_info, }, @@ -211,7 +259,7 @@ static const struct of_device_id host1x_of_match[] = { }; MODULE_DEVICE_TABLE(of, host1x_of_match); -static void host1x_setup_sid_table(struct host1x *host) +static void host1x_setup_virtualization_tables(struct host1x *host) { const struct host1x_info *info = host->info; unsigned int i; @@ -225,6 +273,21 @@ static void host1x_setup_sid_table(struct host1x *host) host1x_hypervisor_writel(host, entry->offset, entry->base); host1x_hypervisor_writel(host, entry->limit, entry->base + 4); } + + for (i = 0; i < info->streamid_vm_table.count; i++) { + /* Allow access to all stream IDs to all VMs. */ + host1x_hypervisor_writel(host, 0xff, info->streamid_vm_table.base + 4 * i); + } + + for (i = 0; i < info->classid_vm_table.count; i++) { + /* Allow access to all classes to all VMs. */ + host1x_hypervisor_writel(host, 0xff, info->classid_vm_table.base + 4 * i); + } + + for (i = 0; i < info->mmio_vm_table.count; i++) { + /* Use VM1 (that's us) as originator VMID for engine MMIO accesses. */ + host1x_hypervisor_writel(host, 0x1, info->mmio_vm_table.base + 4 * i); + } } static bool host1x_wants_iommu(struct host1x *host1x) @@ -402,16 +465,12 @@ static int host1x_get_resets(struct host1x *host) return err; } - if (WARN_ON(!host->resets[1].rstc)) - return -ENOENT; - return 0; } static int host1x_probe(struct platform_device *pdev) { struct host1x *host; - struct resource *regs, *hv_regs = NULL; int syncpt_irq; int err; @@ -422,25 +481,23 @@ static int host1x_probe(struct platform_device *pdev) host->info = of_device_get_match_data(&pdev->dev); if (host->info->has_hypervisor) { - regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "vm"); - if (!regs) { - dev_err(&pdev->dev, "failed to get vm registers\n"); - return -ENXIO; - } + host->regs = devm_platform_ioremap_resource_byname(pdev, "vm"); + if (IS_ERR(host->regs)) + return PTR_ERR(host->regs); + + host->hv_regs = devm_platform_ioremap_resource_byname(pdev, "hypervisor"); + if (IS_ERR(host->hv_regs)) + return PTR_ERR(host->hv_regs); - hv_regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, - "hypervisor"); - if (!hv_regs) { - dev_err(&pdev->dev, - "failed to get hypervisor registers\n"); - return -ENXIO; + if (host->info->has_common) { + host->common_regs = devm_platform_ioremap_resource_byname(pdev, "common"); + if (IS_ERR(host->common_regs)) + return PTR_ERR(host->common_regs); } } else { - regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!regs) { - dev_err(&pdev->dev, "failed to get registers\n"); - return -ENXIO; - } + host->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(host->regs)) + return PTR_ERR(host->regs); } syncpt_irq = platform_get_irq(pdev, 0); @@ -455,16 +512,6 @@ static int host1x_probe(struct platform_device *pdev) /* set common host1x device data */ platform_set_drvdata(pdev, host); - host->regs = devm_ioremap_resource(&pdev->dev, regs); - if (IS_ERR(host->regs)) - return PTR_ERR(host->regs); - - if (host->info->has_hypervisor) { - host->hv_regs = devm_ioremap_resource(&pdev->dev, hv_regs); - if (IS_ERR(host->hv_regs)) - return PTR_ERR(host->hv_regs); - } - host->dev->dma_parms = &host->dma_parms; dma_set_max_seg_size(host->dev, UINT_MAX); @@ -503,10 +550,16 @@ static int host1x_probe(struct platform_device *pdev) goto iommu_exit; } + err = host1x_memory_context_list_init(host); + if (err) { + dev_err(&pdev->dev, "failed to initialize context list\n"); + goto free_channels; + } + err = host1x_syncpt_init(host); if (err) { dev_err(&pdev->dev, "failed to initialize syncpts\n"); - goto free_channels; + goto free_contexts; } err = host1x_intr_init(host, syncpt_irq); @@ -550,6 +603,8 @@ pm_disable: host1x_intr_deinit(host); deinit_syncpt: host1x_syncpt_deinit(host); +free_contexts: + host1x_memory_context_list_free(&host->context_list); free_channels: host1x_channel_list_free(&host->channel_list); iommu_exit: @@ -571,6 +626,7 @@ static int host1x_remove(struct platform_device *pdev) host1x_intr_deinit(host); host1x_syncpt_deinit(host); + host1x_memory_context_list_free(&host->context_list); host1x_channel_list_free(&host->channel_list); host1x_iommu_exit(host); host1x_bo_cache_destroy(&host->cache); @@ -600,7 +656,7 @@ static int __maybe_unused host1x_runtime_suspend(struct device *dev) return 0; resume_host1x: - host1x_setup_sid_table(host); + host1x_setup_virtualization_tables(host); host1x_syncpt_restore(host); host1x_intr_start(host); @@ -630,7 +686,7 @@ static int __maybe_unused host1x_runtime_resume(struct device *dev) goto disable_clk; } - host1x_setup_sid_table(host); + host1x_setup_virtualization_tables(host); host1x_syncpt_restore(host); host1x_intr_start(host); diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index ca4b082f0cd4..920e5548cfbc 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -14,6 +14,7 @@ #include "cdma.h" #include "channel.h" +#include "context.h" #include "intr.h" #include "job.h" #include "syncpt.h" @@ -89,6 +90,11 @@ struct host1x_sid_entry { unsigned int limit; }; +struct host1x_table_desc { + unsigned int base; + unsigned int count; +}; + struct host1x_info { unsigned int nb_channels; /* host1x: number of channels supported */ unsigned int nb_pts; /* host1x: number of syncpoints supported */ @@ -99,8 +105,12 @@ struct host1x_info { u64 dma_mask; /* mask of addressable memory */ bool has_wide_gather; /* supports GATHER_W opcode */ bool has_hypervisor; /* has hypervisor registers */ + bool has_common; /* has common registers separate from hypervisor */ unsigned int num_sid_entries; const struct host1x_sid_entry *sid_table; + struct host1x_table_desc streamid_vm_table; + struct host1x_table_desc classid_vm_table; + struct host1x_table_desc mmio_vm_table; /* * On T20-T148, the boot chain may setup DC to increment syncpoints * 26/27 on VBLANK. As such we cannot use these syncpoints until @@ -114,6 +124,7 @@ struct host1x { void __iomem *regs; void __iomem *hv_regs; /* hypervisor region */ + void __iomem *common_regs; struct host1x_syncpt *syncpt; struct host1x_syncpt_base *bases; struct device *dev; @@ -141,6 +152,7 @@ struct host1x { struct mutex syncpt_mutex; struct host1x_channel_list channel_list; + struct host1x_memory_context_list context_list; struct dentry *debugfs; @@ -154,6 +166,7 @@ struct host1x { struct host1x_bo_cache cache; }; +void host1x_common_writel(struct host1x *host1x, u32 v, u32 r); void host1x_hypervisor_writel(struct host1x *host1x, u32 r, u32 v); u32 host1x_hypervisor_readl(struct host1x *host1x, u32 r); void host1x_sync_writel(struct host1x *host1x, u32 r, u32 v); diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c index e49cd5b8f735..1b65a10b9dfc 100644 --- a/drivers/gpu/host1x/hw/cdma_hw.c +++ b/drivers/gpu/host1x/hw/cdma_hw.c @@ -238,6 +238,37 @@ static void cdma_resume(struct host1x_cdma *cdma, u32 getptr) cdma_timeout_restart(cdma, getptr); } +static void timeout_release_mlock(struct host1x_cdma *cdma) +{ +#if HOST1X_HW >= 8 + /* Tegra186 and Tegra194 require a more complicated MLOCK release + * sequence. Furthermore, those chips by default don't enforce MLOCKs, + * so it turns out that if we don't /actually/ need MLOCKs, we can just + * ignore them. + * + * As such, for now just implement this on Tegra234 where things are + * stricter but also easy to implement. + */ + struct host1x_channel *ch = cdma_to_channel(cdma); + struct host1x *host1x = cdma_to_host1x(cdma); + u32 offset; + + switch (ch->client->class) { + case HOST1X_CLASS_VIC: + offset = HOST1X_COMMON_VIC_MLOCK; + break; + case HOST1X_CLASS_NVDEC: + offset = HOST1X_COMMON_NVDEC_MLOCK; + break; + default: + WARN(1, "%s was not updated for class %u", __func__, ch->client->class); + return; + } + + host1x_common_writel(host1x, 0x0, offset); +#endif +} + /* * If this timeout fires, it indicates the current sync_queue entry has * exceeded its TTL and the userctx should be timed out and remaining @@ -288,6 +319,9 @@ static void cdma_timeout_handler(struct work_struct *work) /* stop HW, resetting channel/module */ host1x_hw_cdma_freeze(host1x, cdma); + /* release any held MLOCK */ + timeout_release_mlock(cdma); + host1x_cdma_update_sync_queue(cdma, ch->dev); mutex_unlock(&cdma->lock); } diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c index 6b40e9af1e88..732abe0750ff 100644 --- a/drivers/gpu/host1x/hw/channel_hw.c +++ b/drivers/gpu/host1x/hw/channel_hw.c @@ -47,10 +47,41 @@ static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo, } } -static void submit_wait(struct host1x_cdma *cdma, u32 id, u32 threshold, +static void submit_wait(struct host1x_job *job, u32 id, u32 threshold, u32 next_class) { -#if HOST1X_HW >= 2 + struct host1x_cdma *cdma = &job->channel->cdma; + +#if HOST1X_HW >= 6 + u32 stream_id; + + /* + * If a memory context has been set, use it. Otherwise + * (if context isolation is disabled) use the engine's + * firmware stream ID. + */ + if (job->memory_context) + stream_id = job->memory_context->stream_id; + else + stream_id = job->engine_fallback_streamid; + + host1x_cdma_push_wide(cdma, + host1x_opcode_setclass( + HOST1X_CLASS_HOST1X, + HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32, + /* WAIT_SYNCPT_32 is at SYNCPT_PAYLOAD_32+2 */ + BIT(0) | BIT(2) + ), + threshold, + id, + HOST1X_OPCODE_NOP + ); + host1x_cdma_push_wide(&job->channel->cdma, + host1x_opcode_setclass(job->class, 0, 0), + host1x_opcode_setpayload(stream_id), + host1x_opcode_setstreamid(job->engine_streamid_offset / 4), + HOST1X_OPCODE_NOP); +#elif HOST1X_HW >= 2 host1x_cdma_push_wide(cdma, host1x_opcode_setclass( HOST1X_CLASS_HOST1X, @@ -97,7 +128,7 @@ static void submit_gathers(struct host1x_job *job, u32 job_syncpt_base) else threshold = cmd->wait.threshold; - submit_wait(cdma, cmd->wait.id, threshold, cmd->wait.next_class); + submit_wait(job, cmd->wait.id, threshold, cmd->wait.next_class); } else { struct host1x_job_gather *g = &cmd->gather; @@ -180,11 +211,77 @@ static void host1x_enable_gather_filter(struct host1x_channel *ch) #endif } +static void channel_program_cdma(struct host1x_job *job) +{ + struct host1x_cdma *cdma = &job->channel->cdma; + struct host1x_syncpt *sp = job->syncpt; + +#if HOST1X_HW >= 6 + u32 fence; + + /* Enter engine class with invalid stream ID. */ + host1x_cdma_push_wide(cdma, + host1x_opcode_acquire_mlock(job->class), + host1x_opcode_setclass(job->class, 0, 0), + host1x_opcode_setpayload(0), + host1x_opcode_setstreamid(job->engine_streamid_offset / 4)); + + /* Before switching stream ID to real stream ID, ensure engine is idle. */ + fence = host1x_syncpt_incr_max(sp, 1); + host1x_cdma_push(&job->channel->cdma, + host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1), + HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) | + HOST1X_UCLASS_INCR_SYNCPT_COND_F(4)); + submit_wait(job, job->syncpt->id, fence, job->class); + + /* Submit work. */ + job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs); + submit_gathers(job, job->syncpt_end - job->syncpt_incrs); + + /* Before releasing MLOCK, ensure engine is idle again. */ + fence = host1x_syncpt_incr_max(sp, 1); + host1x_cdma_push(&job->channel->cdma, + host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1), + HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) | + HOST1X_UCLASS_INCR_SYNCPT_COND_F(4)); + submit_wait(job, job->syncpt->id, fence, job->class); + + /* Release MLOCK. */ + host1x_cdma_push(cdma, + HOST1X_OPCODE_NOP, host1x_opcode_release_mlock(job->class)); +#else + if (job->serialize) { + /* + * Force serialization by inserting a host wait for the + * previous job to finish before this one can commence. + */ + host1x_cdma_push(cdma, + host1x_opcode_setclass(HOST1X_CLASS_HOST1X, + host1x_uclass_wait_syncpt_r(), 1), + host1x_class_host_wait_syncpt(job->syncpt->id, + host1x_syncpt_read_max(sp))); + } + + /* Synchronize base register to allow using it for relative waiting */ + if (sp->base) + synchronize_syncpt_base(job); + + /* add a setclass for modules that require it */ + if (job->class) + host1x_cdma_push(cdma, + host1x_opcode_setclass(job->class, 0, 0), + HOST1X_OPCODE_NOP); + + job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs); + + submit_gathers(job, job->syncpt_end - job->syncpt_incrs); +#endif +} + static int channel_submit(struct host1x_job *job) { struct host1x_channel *ch = job->channel; struct host1x_syncpt *sp = job->syncpt; - u32 user_syncpt_incrs = job->syncpt_incrs; u32 prev_max = 0; u32 syncval; int err; @@ -212,6 +309,7 @@ static int channel_submit(struct host1x_job *job) host1x_channel_set_streamid(ch); host1x_enable_gather_filter(ch); + host1x_hw_syncpt_assign_to_channel(host, sp, ch); /* begin a CDMA submit */ err = host1x_cdma_begin(&ch->cdma, job); @@ -220,35 +318,8 @@ static int channel_submit(struct host1x_job *job) goto error; } - if (job->serialize) { - /* - * Force serialization by inserting a host wait for the - * previous job to finish before this one can commence. - */ - host1x_cdma_push(&ch->cdma, - host1x_opcode_setclass(HOST1X_CLASS_HOST1X, - host1x_uclass_wait_syncpt_r(), 1), - host1x_class_host_wait_syncpt(job->syncpt->id, - host1x_syncpt_read_max(sp))); - } - - /* Synchronize base register to allow using it for relative waiting */ - if (sp->base) - synchronize_syncpt_base(job); - - syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs); - - host1x_hw_syncpt_assign_to_channel(host, sp, ch); - - job->syncpt_end = syncval; - - /* add a setclass for modules that require it */ - if (job->class) - host1x_cdma_push(&ch->cdma, - host1x_opcode_setclass(job->class, 0, 0), - HOST1X_OPCODE_NOP); - - submit_gathers(job, syncval - user_syncpt_incrs); + channel_program_cdma(job); + syncval = host1x_syncpt_read_max(sp); /* end CDMA submit & stash pinned hMems into sync queue */ host1x_cdma_end(&ch->cdma, job); diff --git a/drivers/gpu/host1x/hw/host1x01_hardware.h b/drivers/gpu/host1x/hw/host1x01_hardware.h index fe59df1d3dc3..cb93d7c1808c 100644 --- a/drivers/gpu/host1x/hw/host1x01_hardware.h +++ b/drivers/gpu/host1x/hw/host1x01_hardware.h @@ -15,118 +15,6 @@ #include "hw_host1x01_sync.h" #include "hw_host1x01_uclass.h" -static inline u32 host1x_class_host_wait_syncpt( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_wait_syncpt_indx_f(indx) - | host1x_uclass_wait_syncpt_thresh_f(threshold); -} - -static inline u32 host1x_class_host_load_syncpt_base( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_load_syncpt_base_base_indx_f(indx) - | host1x_uclass_load_syncpt_base_value_f(threshold); -} - -static inline u32 host1x_class_host_wait_syncpt_base( - unsigned indx, unsigned base_indx, unsigned offset) -{ - return host1x_uclass_wait_syncpt_base_indx_f(indx) - | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_wait_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt_base( - unsigned base_indx, unsigned offset) -{ - return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_incr_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt( - unsigned cond, unsigned indx) -{ - return host1x_uclass_incr_syncpt_cond_f(cond) - | host1x_uclass_incr_syncpt_indx_f(indx); -} - -static inline u32 host1x_class_host_indoff_reg_write( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indbe_f(0xf) - | host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -static inline u32 host1x_class_host_indoff_reg_read( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset) - | host1x_uclass_indoff_rwn_read_v(); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - - -/* cdma opcodes */ -static inline u32 host1x_opcode_setclass( - unsigned class_id, unsigned offset, unsigned mask) -{ - return (0 << 28) | (offset << 16) | (class_id << 6) | mask; -} - -static inline u32 host1x_opcode_incr(unsigned offset, unsigned count) -{ - return (1 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count) -{ - return (2 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask) -{ - return (3 << 28) | (offset << 16) | mask; -} - -static inline u32 host1x_opcode_imm(unsigned offset, unsigned value) -{ - return (4 << 28) | (offset << 16) | value; -} - -static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx) -{ - return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(), - host1x_class_host_incr_syncpt(cond, indx)); -} - -static inline u32 host1x_opcode_restart(unsigned address) -{ - return (5 << 28) | (address >> 4); -} - -static inline u32 host1x_opcode_gather(unsigned count) -{ - return (6 << 28) | count; -} - -static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | count; -} - -static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count; -} - -#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0) +#include "opcodes.h" #endif diff --git a/drivers/gpu/host1x/hw/host1x02_hardware.h b/drivers/gpu/host1x/hw/host1x02_hardware.h index af60d7fb016d..2d1282b9bc33 100644 --- a/drivers/gpu/host1x/hw/host1x02_hardware.h +++ b/drivers/gpu/host1x/hw/host1x02_hardware.h @@ -15,117 +15,6 @@ #include "hw_host1x02_sync.h" #include "hw_host1x02_uclass.h" -static inline u32 host1x_class_host_wait_syncpt( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_wait_syncpt_indx_f(indx) - | host1x_uclass_wait_syncpt_thresh_f(threshold); -} - -static inline u32 host1x_class_host_load_syncpt_base( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_load_syncpt_base_base_indx_f(indx) - | host1x_uclass_load_syncpt_base_value_f(threshold); -} - -static inline u32 host1x_class_host_wait_syncpt_base( - unsigned indx, unsigned base_indx, unsigned offset) -{ - return host1x_uclass_wait_syncpt_base_indx_f(indx) - | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_wait_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt_base( - unsigned base_indx, unsigned offset) -{ - return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_incr_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt( - unsigned cond, unsigned indx) -{ - return host1x_uclass_incr_syncpt_cond_f(cond) - | host1x_uclass_incr_syncpt_indx_f(indx); -} - -static inline u32 host1x_class_host_indoff_reg_write( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indbe_f(0xf) - | host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -static inline u32 host1x_class_host_indoff_reg_read( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset) - | host1x_uclass_indoff_rwn_read_v(); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -/* cdma opcodes */ -static inline u32 host1x_opcode_setclass( - unsigned class_id, unsigned offset, unsigned mask) -{ - return (0 << 28) | (offset << 16) | (class_id << 6) | mask; -} - -static inline u32 host1x_opcode_incr(unsigned offset, unsigned count) -{ - return (1 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count) -{ - return (2 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask) -{ - return (3 << 28) | (offset << 16) | mask; -} - -static inline u32 host1x_opcode_imm(unsigned offset, unsigned value) -{ - return (4 << 28) | (offset << 16) | value; -} - -static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx) -{ - return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(), - host1x_class_host_incr_syncpt(cond, indx)); -} - -static inline u32 host1x_opcode_restart(unsigned address) -{ - return (5 << 28) | (address >> 4); -} - -static inline u32 host1x_opcode_gather(unsigned count) -{ - return (6 << 28) | count; -} - -static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | count; -} - -static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count; -} - -#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0) +#include "opcodes.h" #endif diff --git a/drivers/gpu/host1x/hw/host1x04_hardware.h b/drivers/gpu/host1x/hw/host1x04_hardware.h index 4f9bcddf27e3..84d244e8af30 100644 --- a/drivers/gpu/host1x/hw/host1x04_hardware.h +++ b/drivers/gpu/host1x/hw/host1x04_hardware.h @@ -15,117 +15,6 @@ #include "hw_host1x04_sync.h" #include "hw_host1x04_uclass.h" -static inline u32 host1x_class_host_wait_syncpt( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_wait_syncpt_indx_f(indx) - | host1x_uclass_wait_syncpt_thresh_f(threshold); -} - -static inline u32 host1x_class_host_load_syncpt_base( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_load_syncpt_base_base_indx_f(indx) - | host1x_uclass_load_syncpt_base_value_f(threshold); -} - -static inline u32 host1x_class_host_wait_syncpt_base( - unsigned indx, unsigned base_indx, unsigned offset) -{ - return host1x_uclass_wait_syncpt_base_indx_f(indx) - | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_wait_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt_base( - unsigned base_indx, unsigned offset) -{ - return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_incr_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt( - unsigned cond, unsigned indx) -{ - return host1x_uclass_incr_syncpt_cond_f(cond) - | host1x_uclass_incr_syncpt_indx_f(indx); -} - -static inline u32 host1x_class_host_indoff_reg_write( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indbe_f(0xf) - | host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -static inline u32 host1x_class_host_indoff_reg_read( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset) - | host1x_uclass_indoff_rwn_read_v(); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -/* cdma opcodes */ -static inline u32 host1x_opcode_setclass( - unsigned class_id, unsigned offset, unsigned mask) -{ - return (0 << 28) | (offset << 16) | (class_id << 6) | mask; -} - -static inline u32 host1x_opcode_incr(unsigned offset, unsigned count) -{ - return (1 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count) -{ - return (2 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask) -{ - return (3 << 28) | (offset << 16) | mask; -} - -static inline u32 host1x_opcode_imm(unsigned offset, unsigned value) -{ - return (4 << 28) | (offset << 16) | value; -} - -static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx) -{ - return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(), - host1x_class_host_incr_syncpt(cond, indx)); -} - -static inline u32 host1x_opcode_restart(unsigned address) -{ - return (5 << 28) | (address >> 4); -} - -static inline u32 host1x_opcode_gather(unsigned count) -{ - return (6 << 28) | count; -} - -static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | count; -} - -static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count; -} - -#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0) +#include "opcodes.h" #endif diff --git a/drivers/gpu/host1x/hw/host1x05_hardware.h b/drivers/gpu/host1x/hw/host1x05_hardware.h index af3ab4b7f010..1dcde6ec7909 100644 --- a/drivers/gpu/host1x/hw/host1x05_hardware.h +++ b/drivers/gpu/host1x/hw/host1x05_hardware.h @@ -15,117 +15,6 @@ #include "hw_host1x05_sync.h" #include "hw_host1x05_uclass.h" -static inline u32 host1x_class_host_wait_syncpt( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_wait_syncpt_indx_f(indx) - | host1x_uclass_wait_syncpt_thresh_f(threshold); -} - -static inline u32 host1x_class_host_load_syncpt_base( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_load_syncpt_base_base_indx_f(indx) - | host1x_uclass_load_syncpt_base_value_f(threshold); -} - -static inline u32 host1x_class_host_wait_syncpt_base( - unsigned indx, unsigned base_indx, unsigned offset) -{ - return host1x_uclass_wait_syncpt_base_indx_f(indx) - | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_wait_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt_base( - unsigned base_indx, unsigned offset) -{ - return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_incr_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt( - unsigned cond, unsigned indx) -{ - return host1x_uclass_incr_syncpt_cond_f(cond) - | host1x_uclass_incr_syncpt_indx_f(indx); -} - -static inline u32 host1x_class_host_indoff_reg_write( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indbe_f(0xf) - | host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -static inline u32 host1x_class_host_indoff_reg_read( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset) - | host1x_uclass_indoff_rwn_read_v(); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -/* cdma opcodes */ -static inline u32 host1x_opcode_setclass( - unsigned class_id, unsigned offset, unsigned mask) -{ - return (0 << 28) | (offset << 16) | (class_id << 6) | mask; -} - -static inline u32 host1x_opcode_incr(unsigned offset, unsigned count) -{ - return (1 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count) -{ - return (2 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask) -{ - return (3 << 28) | (offset << 16) | mask; -} - -static inline u32 host1x_opcode_imm(unsigned offset, unsigned value) -{ - return (4 << 28) | (offset << 16) | value; -} - -static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx) -{ - return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(), - host1x_class_host_incr_syncpt(cond, indx)); -} - -static inline u32 host1x_opcode_restart(unsigned address) -{ - return (5 << 28) | (address >> 4); -} - -static inline u32 host1x_opcode_gather(unsigned count) -{ - return (6 << 28) | count; -} - -static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | count; -} - -static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count; -} - -#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0) +#include "opcodes.h" #endif diff --git a/drivers/gpu/host1x/hw/host1x06_hardware.h b/drivers/gpu/host1x/hw/host1x06_hardware.h index 01a142a09800..c05cfa7e3090 100644 --- a/drivers/gpu/host1x/hw/host1x06_hardware.h +++ b/drivers/gpu/host1x/hw/host1x06_hardware.h @@ -16,122 +16,6 @@ #include "hw_host1x06_vm.h" #include "hw_host1x06_hypervisor.h" -static inline u32 host1x_class_host_wait_syncpt( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_wait_syncpt_indx_f(indx) - | host1x_uclass_wait_syncpt_thresh_f(threshold); -} - -static inline u32 host1x_class_host_load_syncpt_base( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_load_syncpt_base_base_indx_f(indx) - | host1x_uclass_load_syncpt_base_value_f(threshold); -} - -static inline u32 host1x_class_host_wait_syncpt_base( - unsigned indx, unsigned base_indx, unsigned offset) -{ - return host1x_uclass_wait_syncpt_base_indx_f(indx) - | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_wait_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt_base( - unsigned base_indx, unsigned offset) -{ - return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_incr_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt( - unsigned cond, unsigned indx) -{ - return host1x_uclass_incr_syncpt_cond_f(cond) - | host1x_uclass_incr_syncpt_indx_f(indx); -} - -static inline u32 host1x_class_host_indoff_reg_write( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indbe_f(0xf) - | host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -static inline u32 host1x_class_host_indoff_reg_read( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset) - | host1x_uclass_indoff_rwn_read_v(); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -/* cdma opcodes */ -static inline u32 host1x_opcode_setclass( - unsigned class_id, unsigned offset, unsigned mask) -{ - return (0 << 28) | (offset << 16) | (class_id << 6) | mask; -} - -static inline u32 host1x_opcode_incr(unsigned offset, unsigned count) -{ - return (1 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count) -{ - return (2 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask) -{ - return (3 << 28) | (offset << 16) | mask; -} - -static inline u32 host1x_opcode_imm(unsigned offset, unsigned value) -{ - return (4 << 28) | (offset << 16) | value; -} - -static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx) -{ - return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(), - host1x_class_host_incr_syncpt(cond, indx)); -} - -static inline u32 host1x_opcode_restart(unsigned address) -{ - return (5 << 28) | (address >> 4); -} - -static inline u32 host1x_opcode_gather(unsigned count) -{ - return (6 << 28) | count; -} - -static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | count; -} - -static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count; -} - -static inline u32 host1x_opcode_gather_wide(unsigned count) -{ - return (12 << 28) | count; -} - -#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0) +#include "opcodes.h" #endif diff --git a/drivers/gpu/host1x/hw/host1x07_hardware.h b/drivers/gpu/host1x/hw/host1x07_hardware.h index e6582172ebfd..d67364e03956 100644 --- a/drivers/gpu/host1x/hw/host1x07_hardware.h +++ b/drivers/gpu/host1x/hw/host1x07_hardware.h @@ -16,122 +16,6 @@ #include "hw_host1x07_vm.h" #include "hw_host1x07_hypervisor.h" -static inline u32 host1x_class_host_wait_syncpt( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_wait_syncpt_indx_f(indx) - | host1x_uclass_wait_syncpt_thresh_f(threshold); -} - -static inline u32 host1x_class_host_load_syncpt_base( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_load_syncpt_base_base_indx_f(indx) - | host1x_uclass_load_syncpt_base_value_f(threshold); -} - -static inline u32 host1x_class_host_wait_syncpt_base( - unsigned indx, unsigned base_indx, unsigned offset) -{ - return host1x_uclass_wait_syncpt_base_indx_f(indx) - | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_wait_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt_base( - unsigned base_indx, unsigned offset) -{ - return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_incr_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt( - unsigned cond, unsigned indx) -{ - return host1x_uclass_incr_syncpt_cond_f(cond) - | host1x_uclass_incr_syncpt_indx_f(indx); -} - -static inline u32 host1x_class_host_indoff_reg_write( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indbe_f(0xf) - | host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -static inline u32 host1x_class_host_indoff_reg_read( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset) - | host1x_uclass_indoff_rwn_read_v(); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -/* cdma opcodes */ -static inline u32 host1x_opcode_setclass( - unsigned class_id, unsigned offset, unsigned mask) -{ - return (0 << 28) | (offset << 16) | (class_id << 6) | mask; -} - -static inline u32 host1x_opcode_incr(unsigned offset, unsigned count) -{ - return (1 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count) -{ - return (2 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask) -{ - return (3 << 28) | (offset << 16) | mask; -} - -static inline u32 host1x_opcode_imm(unsigned offset, unsigned value) -{ - return (4 << 28) | (offset << 16) | value; -} - -static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx) -{ - return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(), - host1x_class_host_incr_syncpt(cond, indx)); -} - -static inline u32 host1x_opcode_restart(unsigned address) -{ - return (5 << 28) | (address >> 4); -} - -static inline u32 host1x_opcode_gather(unsigned count) -{ - return (6 << 28) | count; -} - -static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | count; -} - -static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count; -} - -static inline u32 host1x_opcode_gather_wide(unsigned count) -{ - return (12 << 28) | count; -} - -#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0) +#include "opcodes.h" #endif diff --git a/drivers/gpu/host1x/hw/host1x08.c b/drivers/gpu/host1x/hw/host1x08.c new file mode 100644 index 000000000000..754890c34c74 --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x08.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Host1x init for Tegra234 SoCs + * + * Copyright (c) 2022 NVIDIA Corporation. + */ + +/* include hw specification */ +#include "host1x08.h" +#include "host1x08_hardware.h" + +/* include code */ +#define HOST1X_HW 8 + +#include "cdma_hw.c" +#include "channel_hw.c" +#include "debug_hw.c" +#include "intr_hw.c" +#include "syncpt_hw.c" + +#include "../dev.h" + +int host1x08_init(struct host1x *host) +{ + host->channel_op = &host1x_channel_ops; + host->cdma_op = &host1x_cdma_ops; + host->cdma_pb_op = &host1x_pushbuffer_ops; + host->syncpt_op = &host1x_syncpt_ops; + host->intr_op = &host1x_intr_ops; + host->debug_op = &host1x_debug_ops; + + return 0; +} diff --git a/drivers/gpu/host1x/hw/host1x08.h b/drivers/gpu/host1x/hw/host1x08.h new file mode 100644 index 000000000000..a6bad56e44cf --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x08.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Host1x init for Tegra234 SoCs + * + * Copyright (c) 2018 NVIDIA Corporation. + */ + +#ifndef HOST1X_HOST1X08_H +#define HOST1X_HOST1X08_H + +struct host1x; + +int host1x08_init(struct host1x *host); + +#endif diff --git a/drivers/gpu/host1x/hw/host1x08_hardware.h b/drivers/gpu/host1x/hw/host1x08_hardware.h new file mode 100644 index 000000000000..936243060bff --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x08_hardware.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Tegra host1x Register Offsets for Tegra234 + * + * Copyright (c) 2022 NVIDIA Corporation. + */ + +#ifndef __HOST1X_HOST1X08_HARDWARE_H +#define __HOST1X_HOST1X08_HARDWARE_H + +#include <linux/types.h> +#include <linux/bitops.h> + +#include "hw_host1x08_uclass.h" +#include "hw_host1x08_vm.h" +#include "hw_host1x08_hypervisor.h" +#include "hw_host1x08_common.h" + +#include "opcodes.h" + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x08_channel.h b/drivers/gpu/host1x/hw/hw_host1x08_channel.h new file mode 100644 index 000000000000..c9272d2ab14a --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x08_channel.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 NVIDIA Corporation. + */ + +#ifndef HOST1X_HW_HOST1X08_CHANNEL_H +#define HOST1X_HW_HOST1X08_CHANNEL_H + +#define HOST1X_CHANNEL_SMMU_STREAMID 0x084 + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x08_common.h b/drivers/gpu/host1x/hw/hw_host1x08_common.h new file mode 100644 index 000000000000..8e0c99150ec2 --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x08_common.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 NVIDIA Corporation. + */ + +#define HOST1X_COMMON_OFA_MLOCK 0x4050 +#define HOST1X_COMMON_NVJPG1_MLOCK 0x4070 +#define HOST1X_COMMON_VIC_MLOCK 0x4078 +#define HOST1X_COMMON_NVENC_MLOCK 0x407c +#define HOST1X_COMMON_NVDEC_MLOCK 0x4080 +#define HOST1X_COMMON_NVJPG_MLOCK 0x4084 diff --git a/drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h b/drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h new file mode 100644 index 000000000000..22964324c914 --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 NVIDIA Corporation. + */ + +#define HOST1X_HV_SYNCPT_PROT_EN 0x1724 +#define HOST1X_HV_SYNCPT_PROT_EN_CH_EN BIT(1) +#define HOST1X_HV_CH_MLOCK_EN(x) (0x1700 + (x * 4)) +#define HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(x) (0x1710 + (x * 4)) diff --git a/drivers/gpu/host1x/hw/hw_host1x08_uclass.h b/drivers/gpu/host1x/hw/hw_host1x08_uclass.h new file mode 100644 index 000000000000..724cccd71aa1 --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x08_uclass.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2018 NVIDIA Corporation. + */ + + /* + * Function naming determines intended use: + * + * <x>_r(void) : Returns the offset for register <x>. + * + * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. + * + * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. + * + * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field <y> of register <x>. This value + * can be |'d with others to produce a full register value for + * register <x>. + * + * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This + * value can be ~'d and then &'d to clear the value of field <y> for + * register <x>. + * + * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted + * to place it at field <y> of register <x>. This value can be |'d + * with others to produce a full register value for <x>. + * + * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register + * <x> value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field <y> of register <x>. + * + * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for + * field <y> of register <x>. This value is suitable for direct + * comparison with unshifted values appropriate for use in field <y> + * of register <x>. + */ + +#ifndef HOST1X_HW_HOST1X08_UCLASS_H +#define HOST1X_HW_HOST1X08_UCLASS_H + +static inline u32 host1x_uclass_incr_syncpt_r(void) +{ + return 0x0; +} +#define HOST1X_UCLASS_INCR_SYNCPT \ + host1x_uclass_incr_syncpt_r() +static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v) +{ + return (v & 0xff) << 10; +} +#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \ + host1x_uclass_incr_syncpt_cond_f(v) +static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v) +{ + return (v & 0xff) << 0; +} +#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \ + host1x_uclass_incr_syncpt_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_r(void) +{ + return 0x8; +} +#define HOST1X_UCLASS_WAIT_SYNCPT \ + host1x_uclass_wait_syncpt_r() +static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \ + host1x_uclass_wait_syncpt_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \ + host1x_uclass_wait_syncpt_thresh_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_r(void) +{ + return 0x9; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \ + host1x_uclass_wait_syncpt_base_r() +static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \ + host1x_uclass_wait_syncpt_base_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 16; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_wait_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v) +{ + return (v & 0xffff) << 0; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \ + host1x_uclass_wait_syncpt_base_offset_f(v) +static inline u32 host1x_uclass_load_syncpt_base_r(void) +{ + return 0xb; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \ + host1x_uclass_load_syncpt_base_r() +static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_load_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \ + host1x_uclass_load_syncpt_base_value_f(v) +static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_incr_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \ + host1x_uclass_incr_syncpt_base_offset_f(v) +static inline u32 host1x_uclass_indoff_r(void) +{ + return 0x2d; +} +#define HOST1X_UCLASS_INDOFF \ + host1x_uclass_indoff_r() +static inline u32 host1x_uclass_indoff_indbe_f(u32 v) +{ + return (v & 0xf) << 28; +} +#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \ + host1x_uclass_indoff_indbe_f(v) +static inline u32 host1x_uclass_indoff_autoinc_f(u32 v) +{ + return (v & 0x1) << 27; +} +#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \ + host1x_uclass_indoff_autoinc_f(v) +static inline u32 host1x_uclass_indoff_indmodid_f(u32 v) +{ + return (v & 0xff) << 18; +} +#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \ + host1x_uclass_indoff_indmodid_f(v) +static inline u32 host1x_uclass_indoff_indroffset_f(u32 v) +{ + return (v & 0xffff) << 2; +} +#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ + host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_indoff_rwn_read_v(void) +{ + return 1; +} +#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ + host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x08_vm.h b/drivers/gpu/host1x/hw/hw_host1x08_vm.h new file mode 100644 index 000000000000..1455a4670bf8 --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x08_vm.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 NVIDIA Corporation. + */ + +#define HOST1X_CHANNEL_DMASTART 0x0000 +#define HOST1X_CHANNEL_DMASTART_HI 0x0004 +#define HOST1X_CHANNEL_DMAPUT 0x0008 +#define HOST1X_CHANNEL_DMAPUT_HI 0x000c +#define HOST1X_CHANNEL_DMAGET 0x0010 +#define HOST1X_CHANNEL_DMAGET_HI 0x0014 +#define HOST1X_CHANNEL_DMAEND 0x0018 +#define HOST1X_CHANNEL_DMAEND_HI 0x001c +#define HOST1X_CHANNEL_DMACTRL 0x0020 +#define HOST1X_CHANNEL_DMACTRL_DMASTOP BIT(0) +#define HOST1X_CHANNEL_DMACTRL_DMAGETRST BIT(1) +#define HOST1X_CHANNEL_DMACTRL_DMAINITGET BIT(2) +#define HOST1X_CHANNEL_CMDFIFO_STAT 0x0024 +#define HOST1X_CHANNEL_CMDFIFO_STAT_EMPTY BIT(13) +#define HOST1X_CHANNEL_CMDFIFO_RDATA 0x0028 +#define HOST1X_CHANNEL_CMDP_OFFSET 0x0030 +#define HOST1X_CHANNEL_CMDP_CLASS 0x0034 +#define HOST1X_CHANNEL_CHANNELSTAT 0x0038 +#define HOST1X_CHANNEL_CMDPROC_STOP 0x0048 +#define HOST1X_CHANNEL_TEARDOWN 0x004c +#define HOST1X_CHANNEL_SMMU_STREAMID 0x0084 + +#define HOST1X_SYNC_SYNCPT_CPU_INCR(x) (0x6400 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(x) (0x6600 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_INTR_DEST(x) (0x6684 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(x) (0x770c + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(x) (0x7790 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT(x) (0x8080 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_INT_THRESH(x) (0xa088 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_CH_APP(x) (0xb090 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_CH_APP_CH(v) (((v) & 0x3f) << 8) diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c index f56375ee6e71..9acccdb139e6 100644 --- a/drivers/gpu/host1x/hw/intr_hw.c +++ b/drivers/gpu/host1x/hw/intr_hw.c @@ -76,6 +76,17 @@ static void intr_hw_init(struct host1x *host, u32 cpm) /* update host clocks per usec */ host1x_sync_writel(host, cpm, HOST1X_SYNC_USEC_CLK); #endif +#if HOST1X_HW >= 8 + u32 id; + + /* + * Program threshold interrupt destination among 8 lines per VM, + * per syncpoint. For now, just direct all to the first interrupt + * line. + */ + for (id = 0; id < host->info->nb_pts; id++) + host1x_sync_writel(host, 0, HOST1X_SYNC_SYNCPT_INTR_DEST(id)); +#endif } static int diff --git a/drivers/gpu/host1x/hw/opcodes.h b/drivers/gpu/host1x/hw/opcodes.h new file mode 100644 index 000000000000..649614499b04 --- /dev/null +++ b/drivers/gpu/host1x/hw/opcodes.h @@ -0,0 +1,150 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Tegra host1x opcodes + * + * Copyright (c) 2022 NVIDIA Corporation. + */ + +#ifndef __HOST1X_OPCODES_H +#define __HOST1X_OPCODES_H + +#include <linux/types.h> + +static inline u32 host1x_class_host_wait_syncpt( + unsigned indx, unsigned threshold) +{ + return host1x_uclass_wait_syncpt_indx_f(indx) + | host1x_uclass_wait_syncpt_thresh_f(threshold); +} + +static inline u32 host1x_class_host_load_syncpt_base( + unsigned indx, unsigned threshold) +{ + return host1x_uclass_load_syncpt_base_base_indx_f(indx) + | host1x_uclass_load_syncpt_base_value_f(threshold); +} + +static inline u32 host1x_class_host_wait_syncpt_base( + unsigned indx, unsigned base_indx, unsigned offset) +{ + return host1x_uclass_wait_syncpt_base_indx_f(indx) + | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx) + | host1x_uclass_wait_syncpt_base_offset_f(offset); +} + +static inline u32 host1x_class_host_incr_syncpt_base( + unsigned base_indx, unsigned offset) +{ + return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx) + | host1x_uclass_incr_syncpt_base_offset_f(offset); +} + +static inline u32 host1x_class_host_incr_syncpt( + unsigned cond, unsigned indx) +{ + return host1x_uclass_incr_syncpt_cond_f(cond) + | host1x_uclass_incr_syncpt_indx_f(indx); +} + +static inline u32 host1x_class_host_indoff_reg_write( + unsigned mod_id, unsigned offset, bool auto_inc) +{ + u32 v = host1x_uclass_indoff_indbe_f(0xf) + | host1x_uclass_indoff_indmodid_f(mod_id) + | host1x_uclass_indoff_indroffset_f(offset); + if (auto_inc) + v |= host1x_uclass_indoff_autoinc_f(1); + return v; +} + +static inline u32 host1x_class_host_indoff_reg_read( + unsigned mod_id, unsigned offset, bool auto_inc) +{ + u32 v = host1x_uclass_indoff_indmodid_f(mod_id) + | host1x_uclass_indoff_indroffset_f(offset) + | host1x_uclass_indoff_rwn_read_v(); + if (auto_inc) + v |= host1x_uclass_indoff_autoinc_f(1); + return v; +} + +static inline u32 host1x_opcode_setclass( + unsigned class_id, unsigned offset, unsigned mask) +{ + return (0 << 28) | (offset << 16) | (class_id << 6) | mask; +} + +static inline u32 host1x_opcode_incr(unsigned offset, unsigned count) +{ + return (1 << 28) | (offset << 16) | count; +} + +static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count) +{ + return (2 << 28) | (offset << 16) | count; +} + +static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask) +{ + return (3 << 28) | (offset << 16) | mask; +} + +static inline u32 host1x_opcode_imm(unsigned offset, unsigned value) +{ + return (4 << 28) | (offset << 16) | value; +} + +static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx) +{ + return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(), + host1x_class_host_incr_syncpt(cond, indx)); +} + +static inline u32 host1x_opcode_restart(unsigned address) +{ + return (5 << 28) | (address >> 4); +} + +static inline u32 host1x_opcode_gather(unsigned count) +{ + return (6 << 28) | count; +} + +static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count) +{ + return (6 << 28) | (offset << 16) | BIT(15) | count; +} + +static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count) +{ + return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count; +} + +static inline u32 host1x_opcode_setstreamid(unsigned streamid) +{ + return (7 << 28) | streamid; +} + +static inline u32 host1x_opcode_setpayload(unsigned payload) +{ + return (9 << 28) | payload; +} + +static inline u32 host1x_opcode_gather_wide(unsigned count) +{ + return (12 << 28) | count; +} + +static inline u32 host1x_opcode_acquire_mlock(unsigned mlock) +{ + return (14 << 28) | (0 << 24) | mlock; +} + +static inline u32 host1x_opcode_release_mlock(unsigned mlock) +{ + return (14 << 28) | (1 << 24) | mlock; +} + +#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0) + +#endif |