From 754716874389ccbea5ee03174df8ad9e72e41880 Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Fri, 22 Mar 2013 16:34:01 +0200 Subject: gpu: host1x: Add host1x driver Add host1x, the driver for host1x and its client unit 2D. The Tegra host1x module is the DMA engine for register access to Tegra's graphics- and multimedia-related modules. The modules served by host1x are referred to as clients. host1x includes some other functionality, such as synchronization. Signed-off-by: Arto Merilainen Signed-off-by: Terje Bergstrom Reviewed-by: Thierry Reding Tested-by: Thierry Reding Tested-by: Erik Faye-Lund Signed-off-by: Thierry Reding --- drivers/gpu/host1x/hw/host1x01.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 drivers/gpu/host1x/hw/host1x01.c (limited to 'drivers/gpu/host1x/hw/host1x01.c') diff --git a/drivers/gpu/host1x/hw/host1x01.c b/drivers/gpu/host1x/hw/host1x01.c new file mode 100644 index 000000000000..612b4574c4b6 --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x01.c @@ -0,0 +1,33 @@ +/* + * Host1x init for T20 and T30 Architecture Chips + * + * Copyright (c) 2011-2013, NVIDIA Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/* include hw specification */ +#include "hw/host1x01.h" +#include "hw/host1x01_hardware.h" + +/* include code */ +#include "hw/syncpt_hw.c" + +#include "dev.h" + +int host1x01_init(struct host1x *host) +{ + host->syncpt_op = &host1x_syncpt_ops; + + return 0; +} -- cgit From 7ede0b0bf3e2595d40d6195b6fe4c4dcef438830 Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Fri, 22 Mar 2013 16:34:02 +0200 Subject: gpu: host1x: Add syncpoint wait and interrupts Add support for sync point interrupts, and sync point wait. Sync point wait used interrupts for unblocking wait. Signed-off-by: Arto Merilainen Signed-off-by: Terje Bergstrom Reviewed-by: Thierry Reding Tested-by: Thierry Reding Tested-by: Erik Faye-Lund Signed-off-by: Thierry Reding --- drivers/gpu/host1x/Makefile | 1 + drivers/gpu/host1x/dev.c | 12 ++ drivers/gpu/host1x/dev.h | 51 +++++ drivers/gpu/host1x/hw/host1x01.c | 2 + drivers/gpu/host1x/hw/hw_host1x01_sync.h | 42 ++++ drivers/gpu/host1x/hw/intr_hw.c | 143 ++++++++++++++ drivers/gpu/host1x/intr.c | 328 +++++++++++++++++++++++++++++++ drivers/gpu/host1x/intr.h | 96 +++++++++ drivers/gpu/host1x/syncpt.c | 159 +++++++++++++++ drivers/gpu/host1x/syncpt.h | 12 ++ 10 files changed, 846 insertions(+) create mode 100644 drivers/gpu/host1x/hw/intr_hw.c create mode 100644 drivers/gpu/host1x/intr.c create mode 100644 drivers/gpu/host1x/intr.h (limited to 'drivers/gpu/host1x/hw/host1x01.c') diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile index 363e6ab31517..5ef47ffce674 100644 --- a/drivers/gpu/host1x/Makefile +++ b/drivers/gpu/host1x/Makefile @@ -3,6 +3,7 @@ ccflags-y = -Idrivers/gpu/host1x host1x-y = \ syncpt.o \ dev.o \ + intr.o \ hw/host1x01.o obj-$(CONFIG_TEGRA_HOST1X) += host1x.o diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 0d6002cb67c1..b967f6e8df55 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -28,6 +28,7 @@ #include #include "dev.h" +#include "intr.h" #include "hw/host1x01.h" void host1x_sync_writel(struct host1x *host1x, u32 v, u32 r) @@ -123,13 +124,24 @@ static int host1x_probe(struct platform_device *pdev) return err; } + err = host1x_intr_init(host, syncpt_irq); + if (err) { + dev_err(&pdev->dev, "failed to initialize interrupts\n"); + goto fail_deinit_syncpt; + } + return 0; + +fail_deinit_syncpt: + host1x_syncpt_deinit(host); + return err; } static int __exit host1x_remove(struct platform_device *pdev) { struct host1x *host = platform_get_drvdata(pdev); + host1x_intr_deinit(host); host1x_syncpt_deinit(host); clk_disable_unprepare(host->clk); diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index eaf602657f76..caf9cc62eb17 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -21,6 +21,7 @@ #include #include "syncpt.h" +#include "intr.h" struct host1x_syncpt; @@ -33,6 +34,17 @@ struct host1x_syncpt_ops { int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr); }; +struct host1x_intr_ops { + int (*init_host_sync)(struct host1x *host, u32 cpm, + void (*syncpt_thresh_work)(struct work_struct *work)); + void (*set_syncpt_threshold)( + struct host1x *host, u32 id, u32 thresh); + void (*enable_syncpt_intr)(struct host1x *host, u32 id); + void (*disable_syncpt_intr)(struct host1x *host, u32 id); + void (*disable_all_syncpt_intrs)(struct host1x *host); + int (*free_syncpt_irq)(struct host1x *host); +}; + struct host1x_info { int nb_channels; /* host1x: num channels supported */ int nb_pts; /* host1x: num syncpoints supported */ @@ -50,7 +62,13 @@ struct host1x { struct device *dev; struct clk *clk; + struct mutex intr_mutex; + struct workqueue_struct *intr_wq; + int intr_syncpt_irq; + const struct host1x_syncpt_ops *syncpt_op; + const struct host1x_intr_ops *intr_op; + }; void host1x_sync_writel(struct host1x *host1x, u32 r, u32 v); @@ -93,4 +111,37 @@ static inline int host1x_hw_syncpt_patch_wait(struct host1x *host, return host->syncpt_op->patch_wait(sp, patch_addr); } +static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm, + void (*syncpt_thresh_work)(struct work_struct *)) +{ + return host->intr_op->init_host_sync(host, cpm, syncpt_thresh_work); +} + +static inline void host1x_hw_intr_set_syncpt_threshold(struct host1x *host, + u32 id, u32 thresh) +{ + host->intr_op->set_syncpt_threshold(host, id, thresh); +} + +static inline void host1x_hw_intr_enable_syncpt_intr(struct host1x *host, + u32 id) +{ + host->intr_op->enable_syncpt_intr(host, id); +} + +static inline void host1x_hw_intr_disable_syncpt_intr(struct host1x *host, + u32 id) +{ + host->intr_op->disable_syncpt_intr(host, id); +} + +static inline void host1x_hw_intr_disable_all_syncpt_intrs(struct host1x *host) +{ + host->intr_op->disable_all_syncpt_intrs(host); +} + +static inline int host1x_hw_intr_free_syncpt_irq(struct host1x *host) +{ + return host->intr_op->free_syncpt_irq(host); +} #endif diff --git a/drivers/gpu/host1x/hw/host1x01.c b/drivers/gpu/host1x/hw/host1x01.c index 612b4574c4b6..f5c35fa66d05 100644 --- a/drivers/gpu/host1x/hw/host1x01.c +++ b/drivers/gpu/host1x/hw/host1x01.c @@ -21,6 +21,7 @@ #include "hw/host1x01_hardware.h" /* include code */ +#include "hw/intr_hw.c" #include "hw/syncpt_hw.c" #include "dev.h" @@ -28,6 +29,7 @@ int host1x01_init(struct host1x *host) { host->syncpt_op = &host1x_syncpt_ops; + host->intr_op = &host1x_intr_ops; return 0; } diff --git a/drivers/gpu/host1x/hw/hw_host1x01_sync.h b/drivers/gpu/host1x/hw/hw_host1x01_sync.h index 3af258b46e62..eea0bb06052a 100644 --- a/drivers/gpu/host1x/hw/hw_host1x01_sync.h +++ b/drivers/gpu/host1x/hw/hw_host1x01_sync.h @@ -59,6 +59,48 @@ static inline u32 host1x_sync_syncpt_r(unsigned int id) } #define HOST1X_SYNC_SYNCPT(id) \ host1x_sync_syncpt_r(id) +static inline u32 host1x_sync_syncpt_thresh_cpu0_int_status_r(unsigned int id) +{ + return 0x40 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id) \ + host1x_sync_syncpt_thresh_cpu0_int_status_r(id) +static inline u32 host1x_sync_syncpt_thresh_int_disable_r(unsigned int id) +{ + return 0x60 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(id) \ + host1x_sync_syncpt_thresh_int_disable_r(id) +static inline u32 host1x_sync_syncpt_thresh_int_enable_cpu0_r(unsigned int id) +{ + return 0x68 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(id) \ + host1x_sync_syncpt_thresh_int_enable_cpu0_r(id) +static inline u32 host1x_sync_usec_clk_r(void) +{ + return 0x1a4; +} +#define HOST1X_SYNC_USEC_CLK \ + host1x_sync_usec_clk_r() +static inline u32 host1x_sync_ctxsw_timeout_cfg_r(void) +{ + return 0x1a8; +} +#define HOST1X_SYNC_CTXSW_TIMEOUT_CFG \ + host1x_sync_ctxsw_timeout_cfg_r() +static inline u32 host1x_sync_ip_busy_timeout_r(void) +{ + return 0x1bc; +} +#define HOST1X_SYNC_IP_BUSY_TIMEOUT \ + host1x_sync_ip_busy_timeout_r() +static inline u32 host1x_sync_syncpt_int_thresh_r(unsigned int id) +{ + return 0x500 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT_INT_THRESH(id) \ + host1x_sync_syncpt_int_thresh_r(id) static inline u32 host1x_sync_syncpt_base_r(unsigned int id) { return 0x600 + id * REGISTER_STRIDE; diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c new file mode 100644 index 000000000000..b592eef1efcb --- /dev/null +++ b/drivers/gpu/host1x/hw/intr_hw.c @@ -0,0 +1,143 @@ +/* + * Tegra host1x Interrupt Management + * + * Copyright (C) 2010 Google, Inc. + * Copyright (c) 2010-2013, NVIDIA Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +#include "intr.h" +#include "dev.h" + +/* + * Sync point threshold interrupt service function + * Handles sync point threshold triggers, in interrupt context + */ +static void host1x_intr_syncpt_handle(struct host1x_syncpt *syncpt) +{ + unsigned int id = syncpt->id; + struct host1x *host = syncpt->host; + + host1x_sync_writel(host, BIT_MASK(id), + HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(BIT_WORD(id))); + host1x_sync_writel(host, BIT_MASK(id), + HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(BIT_WORD(id))); + + queue_work(host->intr_wq, &syncpt->intr.work); +} + +static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id) +{ + struct host1x *host = dev_id; + unsigned long reg; + int i, id; + + for (i = 0; i <= BIT_WORD(host->info->nb_pts); i++) { + reg = host1x_sync_readl(host, + HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i)); + for_each_set_bit(id, ®, BITS_PER_LONG) { + struct host1x_syncpt *syncpt = + host->syncpt + (i * BITS_PER_LONG + id); + host1x_intr_syncpt_handle(syncpt); + } + } + + return IRQ_HANDLED; +} + +static void _host1x_intr_disable_all_syncpt_intrs(struct host1x *host) +{ + u32 i; + + for (i = 0; i <= BIT_WORD(host->info->nb_pts); ++i) { + host1x_sync_writel(host, 0xffffffffu, + HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(i)); + host1x_sync_writel(host, 0xffffffffu, + HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i)); + } +} + +static int _host1x_intr_init_host_sync(struct host1x *host, u32 cpm, + void (*syncpt_thresh_work)(struct work_struct *)) +{ + int i, err; + + host1x_hw_intr_disable_all_syncpt_intrs(host); + + for (i = 0; i < host->info->nb_pts; i++) + INIT_WORK(&host->syncpt[i].intr.work, syncpt_thresh_work); + + err = devm_request_irq(host->dev, host->intr_syncpt_irq, + syncpt_thresh_isr, IRQF_SHARED, + "host1x_syncpt", host); + if (IS_ERR_VALUE(err)) { + WARN_ON(1); + return err; + } + + /* disable the ip_busy_timeout. this prevents write drops */ + host1x_sync_writel(host, 0, HOST1X_SYNC_IP_BUSY_TIMEOUT); + + /* + * increase the auto-ack timout to the maximum value. 2d will hang + * otherwise on Tegra2. + */ + host1x_sync_writel(host, 0xff, HOST1X_SYNC_CTXSW_TIMEOUT_CFG); + + /* update host clocks per usec */ + host1x_sync_writel(host, cpm, HOST1X_SYNC_USEC_CLK); + + return 0; +} + +static void _host1x_intr_set_syncpt_threshold(struct host1x *host, + u32 id, u32 thresh) +{ + host1x_sync_writel(host, thresh, HOST1X_SYNC_SYNCPT_INT_THRESH(id)); +} + +static void _host1x_intr_enable_syncpt_intr(struct host1x *host, u32 id) +{ + host1x_sync_writel(host, BIT_MASK(id), + HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(BIT_WORD(id))); +} + +static void _host1x_intr_disable_syncpt_intr(struct host1x *host, u32 id) +{ + host1x_sync_writel(host, BIT_MASK(id), + HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(BIT_WORD(id))); + host1x_sync_writel(host, BIT_MASK(id), + HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(BIT_WORD(id))); +} + +static int _host1x_free_syncpt_irq(struct host1x *host) +{ + devm_free_irq(host->dev, host->intr_syncpt_irq, host); + flush_workqueue(host->intr_wq); + return 0; +} + +static const struct host1x_intr_ops host1x_intr_ops = { + .init_host_sync = _host1x_intr_init_host_sync, + .set_syncpt_threshold = _host1x_intr_set_syncpt_threshold, + .enable_syncpt_intr = _host1x_intr_enable_syncpt_intr, + .disable_syncpt_intr = _host1x_intr_disable_syncpt_intr, + .disable_all_syncpt_intrs = _host1x_intr_disable_all_syncpt_intrs, + .free_syncpt_irq = _host1x_free_syncpt_irq, +}; diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c new file mode 100644 index 000000000000..b1b5a80e3125 --- /dev/null +++ b/drivers/gpu/host1x/intr.c @@ -0,0 +1,328 @@ +/* + * Tegra host1x Interrupt Management + * + * Copyright (c) 2010-2013, NVIDIA Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +#include "dev.h" +#include "intr.h" + +/* Wait list management */ + +enum waitlist_state { + WLS_PENDING, + WLS_REMOVED, + WLS_CANCELLED, + WLS_HANDLED +}; + +static void waiter_release(struct kref *kref) +{ + kfree(container_of(kref, struct host1x_waitlist, refcount)); +} + +/* + * add a waiter to a waiter queue, sorted by threshold + * returns true if it was added at the head of the queue + */ +static bool add_waiter_to_queue(struct host1x_waitlist *waiter, + struct list_head *queue) +{ + struct host1x_waitlist *pos; + u32 thresh = waiter->thresh; + + list_for_each_entry_reverse(pos, queue, list) + if ((s32)(pos->thresh - thresh) <= 0) { + list_add(&waiter->list, &pos->list); + return false; + } + + list_add(&waiter->list, queue); + return true; +} + +/* + * run through a waiter queue for a single sync point ID + * and gather all completed waiters into lists by actions + */ +static void remove_completed_waiters(struct list_head *head, u32 sync, + struct list_head completed[HOST1X_INTR_ACTION_COUNT]) +{ + struct list_head *dest; + struct host1x_waitlist *waiter, *next; + + list_for_each_entry_safe(waiter, next, head, list) { + if ((s32)(waiter->thresh - sync) > 0) + break; + + dest = completed + waiter->action; + + /* PENDING->REMOVED or CANCELLED->HANDLED */ + if (atomic_inc_return(&waiter->state) == WLS_HANDLED || !dest) { + list_del(&waiter->list); + kref_put(&waiter->refcount, waiter_release); + } else + list_move_tail(&waiter->list, dest); + } +} + +static void reset_threshold_interrupt(struct host1x *host, + struct list_head *head, + unsigned int id) +{ + u32 thresh = + list_first_entry(head, struct host1x_waitlist, list)->thresh; + + host1x_hw_intr_set_syncpt_threshold(host, id, thresh); + host1x_hw_intr_enable_syncpt_intr(host, id); +} + +static void action_wakeup(struct host1x_waitlist *waiter) +{ + wait_queue_head_t *wq = waiter->data; + wake_up(wq); +} + +static void action_wakeup_interruptible(struct host1x_waitlist *waiter) +{ + wait_queue_head_t *wq = waiter->data; + wake_up_interruptible(wq); +} + +typedef void (*action_handler)(struct host1x_waitlist *waiter); + +static action_handler action_handlers[HOST1X_INTR_ACTION_COUNT] = { + action_wakeup, + action_wakeup_interruptible, +}; + +static void run_handlers(struct list_head completed[HOST1X_INTR_ACTION_COUNT]) +{ + struct list_head *head = completed; + int i; + + for (i = 0; i < HOST1X_INTR_ACTION_COUNT; ++i, ++head) { + action_handler handler = action_handlers[i]; + struct host1x_waitlist *waiter, *next; + + list_for_each_entry_safe(waiter, next, head, list) { + list_del(&waiter->list); + handler(waiter); + WARN_ON(atomic_xchg(&waiter->state, WLS_HANDLED) != + WLS_REMOVED); + kref_put(&waiter->refcount, waiter_release); + } + } +} + +/* + * Remove & handle all waiters that have completed for the given syncpt + */ +static int process_wait_list(struct host1x *host, + struct host1x_syncpt *syncpt, + u32 threshold) +{ + struct list_head completed[HOST1X_INTR_ACTION_COUNT]; + unsigned int i; + int empty; + + for (i = 0; i < HOST1X_INTR_ACTION_COUNT; ++i) + INIT_LIST_HEAD(completed + i); + + spin_lock(&syncpt->intr.lock); + + remove_completed_waiters(&syncpt->intr.wait_head, threshold, + completed); + + empty = list_empty(&syncpt->intr.wait_head); + if (empty) + host1x_hw_intr_disable_syncpt_intr(host, syncpt->id); + else + reset_threshold_interrupt(host, &syncpt->intr.wait_head, + syncpt->id); + + spin_unlock(&syncpt->intr.lock); + + run_handlers(completed); + + return empty; +} + +/* + * Sync point threshold interrupt service thread function + * Handles sync point threshold triggers, in thread context + */ + +static void syncpt_thresh_work(struct work_struct *work) +{ + struct host1x_syncpt_intr *syncpt_intr = + container_of(work, struct host1x_syncpt_intr, work); + struct host1x_syncpt *syncpt = + container_of(syncpt_intr, struct host1x_syncpt, intr); + unsigned int id = syncpt->id; + struct host1x *host = syncpt->host; + + (void)process_wait_list(host, syncpt, + host1x_syncpt_load(host->syncpt + id)); +} + +int host1x_intr_add_action(struct host1x *host, u32 id, u32 thresh, + enum host1x_intr_action action, void *data, + struct host1x_waitlist *waiter, void **ref) +{ + struct host1x_syncpt *syncpt; + int queue_was_empty; + + if (waiter == NULL) { + pr_warn("%s: NULL waiter\n", __func__); + return -EINVAL; + } + + /* initialize a new waiter */ + INIT_LIST_HEAD(&waiter->list); + kref_init(&waiter->refcount); + if (ref) + kref_get(&waiter->refcount); + waiter->thresh = thresh; + waiter->action = action; + atomic_set(&waiter->state, WLS_PENDING); + waiter->data = data; + waiter->count = 1; + + syncpt = host->syncpt + id; + + spin_lock(&syncpt->intr.lock); + + queue_was_empty = list_empty(&syncpt->intr.wait_head); + + if (add_waiter_to_queue(waiter, &syncpt->intr.wait_head)) { + /* added at head of list - new threshold value */ + host1x_hw_intr_set_syncpt_threshold(host, id, thresh); + + /* added as first waiter - enable interrupt */ + if (queue_was_empty) + host1x_hw_intr_enable_syncpt_intr(host, id); + } + + spin_unlock(&syncpt->intr.lock); + + if (ref) + *ref = waiter; + return 0; +} + +void host1x_intr_put_ref(struct host1x *host, u32 id, void *ref) +{ + struct host1x_waitlist *waiter = ref; + struct host1x_syncpt *syncpt; + + while (atomic_cmpxchg(&waiter->state, WLS_PENDING, WLS_CANCELLED) == + WLS_REMOVED) + schedule(); + + syncpt = host->syncpt + id; + (void)process_wait_list(host, syncpt, + host1x_syncpt_load(host->syncpt + id)); + + kref_put(&waiter->refcount, waiter_release); +} + +int host1x_intr_init(struct host1x *host, unsigned int irq_sync) +{ + unsigned int id; + u32 nb_pts = host1x_syncpt_nb_pts(host); + + mutex_init(&host->intr_mutex); + host->intr_syncpt_irq = irq_sync; + host->intr_wq = create_workqueue("host_syncpt"); + if (!host->intr_wq) + return -ENOMEM; + + for (id = 0; id < nb_pts; ++id) { + struct host1x_syncpt *syncpt = host->syncpt + id; + + spin_lock_init(&syncpt->intr.lock); + INIT_LIST_HEAD(&syncpt->intr.wait_head); + snprintf(syncpt->intr.thresh_irq_name, + sizeof(syncpt->intr.thresh_irq_name), + "host1x_sp_%02d", id); + } + + host1x_intr_start(host); + + return 0; +} + +void host1x_intr_deinit(struct host1x *host) +{ + host1x_intr_stop(host); + destroy_workqueue(host->intr_wq); +} + +void host1x_intr_start(struct host1x *host) +{ + u32 hz = clk_get_rate(host->clk); + int err; + + mutex_lock(&host->intr_mutex); + err = host1x_hw_intr_init_host_sync(host, DIV_ROUND_UP(hz, 1000000), + syncpt_thresh_work); + if (err) { + mutex_unlock(&host->intr_mutex); + return; + } + mutex_unlock(&host->intr_mutex); +} + +void host1x_intr_stop(struct host1x *host) +{ + unsigned int id; + struct host1x_syncpt *syncpt = host->syncpt; + u32 nb_pts = host1x_syncpt_nb_pts(host); + + mutex_lock(&host->intr_mutex); + + host1x_hw_intr_disable_all_syncpt_intrs(host); + + for (id = 0; id < nb_pts; ++id) { + struct host1x_waitlist *waiter, *next; + + list_for_each_entry_safe(waiter, next, + &syncpt[id].intr.wait_head, list) { + if (atomic_cmpxchg(&waiter->state, + WLS_CANCELLED, WLS_HANDLED) == WLS_CANCELLED) { + list_del(&waiter->list); + kref_put(&waiter->refcount, waiter_release); + } + } + + if (!list_empty(&syncpt[id].intr.wait_head)) { + /* output diagnostics */ + mutex_unlock(&host->intr_mutex); + pr_warn("%s cannot stop syncpt intr id=%d\n", + __func__, id); + return; + } + } + + host1x_hw_intr_free_syncpt_irq(host); + + mutex_unlock(&host->intr_mutex); +} diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h new file mode 100644 index 000000000000..a3f06abe94bb --- /dev/null +++ b/drivers/gpu/host1x/intr.h @@ -0,0 +1,96 @@ +/* + * Tegra host1x Interrupt Management + * + * Copyright (c) 2010-2013, NVIDIA Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __HOST1X_INTR_H +#define __HOST1X_INTR_H + +#include +#include + +struct host1x; + +enum host1x_intr_action { + /* + * Wake up a task. + * 'data' points to a wait_queue_head_t + */ + HOST1X_INTR_ACTION_WAKEUP, + + /* + * Wake up a interruptible task. + * 'data' points to a wait_queue_head_t + */ + HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE, + + HOST1X_INTR_ACTION_COUNT +}; + +struct host1x_syncpt_intr { + spinlock_t lock; + struct list_head wait_head; + char thresh_irq_name[12]; + struct work_struct work; +}; + +struct host1x_waitlist { + struct list_head list; + struct kref refcount; + u32 thresh; + enum host1x_intr_action action; + atomic_t state; + void *data; + int count; +}; + +/* + * Schedule an action to be taken when a sync point reaches the given threshold. + * + * @id the sync point + * @thresh the threshold + * @action the action to take + * @data a pointer to extra data depending on action, see above + * @waiter waiter structure - assumes ownership + * @ref must be passed if cancellation is possible, else NULL + * + * This is a non-blocking api. + */ +int host1x_intr_add_action(struct host1x *host, u32 id, u32 thresh, + enum host1x_intr_action action, void *data, + struct host1x_waitlist *waiter, void **ref); + +/* + * Unreference an action submitted to host1x_intr_add_action(). + * You must call this if you passed non-NULL as ref. + * @ref the ref returned from host1x_intr_add_action() + */ +void host1x_intr_put_ref(struct host1x *host, u32 id, void *ref); + +/* Initialize host1x sync point interrupt */ +int host1x_intr_init(struct host1x *host, unsigned int irq_sync); + +/* Deinitialize host1x sync point interrupt */ +void host1x_intr_deinit(struct host1x *host); + +/* Enable host1x sync point interrupt */ +void host1x_intr_start(struct host1x *host); + +/* Disable host1x sync point interrupt */ +void host1x_intr_stop(struct host1x *host); + +irqreturn_t host1x_syncpt_thresh_fn(void *dev_id); +#endif diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index 30385f62daee..07fad1412bab 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -24,6 +24,10 @@ #include "syncpt.h" #include "dev.h" +#include "intr.h" + +#define SYNCPT_CHECK_PERIOD (2 * HZ) +#define MAX_STUCK_CHECK_COUNT 15 static struct host1x_syncpt *_host1x_syncpt_alloc(struct host1x *host, struct device *dev, @@ -141,6 +145,161 @@ void host1x_syncpt_incr(struct host1x_syncpt *sp) host1x_syncpt_cpu_incr(sp); } +/* + * Updated sync point form hardware, and returns true if syncpoint is expired, + * false if we may need to wait + */ +static bool syncpt_load_min_is_expired(struct host1x_syncpt *sp, u32 thresh) +{ + host1x_hw_syncpt_load(sp->host, sp); + return host1x_syncpt_is_expired(sp, thresh); +} + +/* + * Main entrypoint for syncpoint value waits. + */ +int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout, + u32 *value) +{ + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); + void *ref; + struct host1x_waitlist *waiter; + int err = 0, check_count = 0; + u32 val; + + if (value) + *value = 0; + + /* first check cache */ + if (host1x_syncpt_is_expired(sp, thresh)) { + if (value) + *value = host1x_syncpt_load(sp); + return 0; + } + + /* try to read from register */ + val = host1x_hw_syncpt_load(sp->host, sp); + if (host1x_syncpt_is_expired(sp, thresh)) { + if (value) + *value = val; + goto done; + } + + if (!timeout) { + err = -EAGAIN; + goto done; + } + + /* allocate a waiter */ + waiter = kzalloc(sizeof(*waiter), GFP_KERNEL); + if (!waiter) { + err = -ENOMEM; + goto done; + } + + /* schedule a wakeup when the syncpoint value is reached */ + err = host1x_intr_add_action(sp->host, sp->id, thresh, + HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE, + &wq, waiter, &ref); + if (err) + goto done; + + err = -EAGAIN; + /* Caller-specified timeout may be impractically low */ + if (timeout < 0) + timeout = LONG_MAX; + + /* wait for the syncpoint, or timeout, or signal */ + while (timeout) { + long check = min_t(long, SYNCPT_CHECK_PERIOD, timeout); + int remain = wait_event_interruptible_timeout(wq, + syncpt_load_min_is_expired(sp, thresh), + check); + if (remain > 0 || host1x_syncpt_is_expired(sp, thresh)) { + if (value) + *value = host1x_syncpt_load(sp); + err = 0; + break; + } + if (remain < 0) { + err = remain; + break; + } + timeout -= check; + if (timeout && check_count <= MAX_STUCK_CHECK_COUNT) { + dev_warn(sp->host->dev, + "%s: syncpoint id %d (%s) stuck waiting %d, timeout=%ld\n", + current->comm, sp->id, sp->name, + thresh, timeout); + check_count++; + } + } + host1x_intr_put_ref(sp->host, sp->id, ref); + +done: + return err; +} +EXPORT_SYMBOL(host1x_syncpt_wait); + +/* + * Returns true if syncpoint is expired, false if we may need to wait + */ +bool host1x_syncpt_is_expired(struct host1x_syncpt *sp, u32 thresh) +{ + u32 current_val; + u32 future_val; + smp_rmb(); + current_val = (u32)atomic_read(&sp->min_val); + future_val = (u32)atomic_read(&sp->max_val); + + /* Note the use of unsigned arithmetic here (mod 1<<32). + * + * c = current_val = min_val = the current value of the syncpoint. + * t = thresh = the value we are checking + * f = future_val = max_val = the value c will reach when all + * outstanding increments have completed. + * + * Note that c always chases f until it reaches f. + * + * Dtf = (f - t) + * Dtc = (c - t) + * + * Consider all cases: + * + * A) .....c..t..f..... Dtf < Dtc need to wait + * B) .....c.....f..t.. Dtf > Dtc expired + * C) ..t..c.....f..... Dtf > Dtc expired (Dct very large) + * + * Any case where f==c: always expired (for any t). Dtf == Dcf + * Any case where t==c: always expired (for any f). Dtf >= Dtc (because Dtc==0) + * Any case where t==f!=c: always wait. Dtf < Dtc (because Dtf==0, + * Dtc!=0) + * + * Other cases: + * + * A) .....t..f..c..... Dtf < Dtc need to wait + * A) .....f..c..t..... Dtf < Dtc need to wait + * A) .....f..t..c..... Dtf > Dtc expired + * + * So: + * Dtf >= Dtc implies EXPIRED (return true) + * Dtf < Dtc implies WAIT (return false) + * + * Note: If t is expired then we *cannot* wait on it. We would wait + * forever (hang the system). + * + * Note: do NOT get clever and remove the -thresh from both sides. It + * is NOT the same. + * + * If future valueis zero, we have a client managed sync point. In that + * case we do a direct comparison. + */ + if (!host1x_syncpt_client_managed(sp)) + return future_val - thresh >= current_val - thresh; + else + return (s32)(current_val - thresh) >= 0; +} + int host1x_syncpt_init(struct host1x *host) { struct host1x_syncpt *syncpt; diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h index efa2b6e659d6..17c1616de100 100644 --- a/drivers/gpu/host1x/syncpt.h +++ b/drivers/gpu/host1x/syncpt.h @@ -23,6 +23,8 @@ #include #include +#include "intr.h" + struct host1x; struct host1x_syncpt { @@ -34,6 +36,9 @@ struct host1x_syncpt { int client_managed; struct host1x *host; struct device *dev; + + /* interrupt data */ + struct host1x_syncpt_intr intr; }; /* Initialize sync point array */ @@ -113,6 +118,9 @@ void host1x_syncpt_cpu_incr(struct host1x_syncpt *sp); /* Load current value from hardware to the shadow register. */ u32 host1x_syncpt_load(struct host1x_syncpt *sp); +/* Check if the given syncpoint value has already passed */ +bool host1x_syncpt_is_expired(struct host1x_syncpt *sp, u32 thresh); + /* Save host1x sync point state into shadow registers. */ void host1x_syncpt_save(struct host1x *host); @@ -128,6 +136,10 @@ void host1x_syncpt_incr(struct host1x_syncpt *sp); /* Indicate future operations by incrementing the sync point max. */ u32 host1x_syncpt_incr_max(struct host1x_syncpt *sp, u32 incrs); +/* Wait until sync point reaches a threshold value, or a timeout. */ +int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, + long timeout, u32 *value); + /* Check if sync point id is valid. */ static inline int host1x_syncpt_is_valid(struct host1x_syncpt *sp) { -- cgit From 6579324a41cc414009a601738b70a53d6376325c Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Fri, 22 Mar 2013 16:34:03 +0200 Subject: gpu: host1x: Add channel support Add support for host1x client modules, and host1x channels to submit work to the clients. Signed-off-by: Arto Merilainen Signed-off-by: Terje Bergstrom Reviewed-by: Thierry Reding Tested-by: Thierry Reding Tested-by: Erik Faye-Lund Signed-off-by: Thierry Reding --- drivers/gpu/host1x/Kconfig | 12 + drivers/gpu/host1x/Makefile | 3 + drivers/gpu/host1x/cdma.c | 487 ++++++++++++++++++++++ drivers/gpu/host1x/cdma.h | 100 +++++ drivers/gpu/host1x/channel.c | 126 ++++++ drivers/gpu/host1x/channel.h | 52 +++ drivers/gpu/host1x/dev.c | 17 + drivers/gpu/host1x/dev.h | 113 ++++++ drivers/gpu/host1x/host1x.h | 28 ++ drivers/gpu/host1x/host1x_bo.h | 87 ++++ drivers/gpu/host1x/hw/cdma_hw.c | 324 +++++++++++++++ drivers/gpu/host1x/hw/channel_hw.c | 143 +++++++ drivers/gpu/host1x/hw/host1x01.c | 5 + drivers/gpu/host1x/hw/host1x01_hardware.h | 116 ++++++ drivers/gpu/host1x/hw/hw_host1x01_channel.h | 102 +++++ drivers/gpu/host1x/hw/hw_host1x01_sync.h | 12 + drivers/gpu/host1x/hw/hw_host1x01_uclass.h | 168 ++++++++ drivers/gpu/host1x/hw/syncpt_hw.c | 11 + drivers/gpu/host1x/intr.c | 28 +- drivers/gpu/host1x/intr.h | 6 + drivers/gpu/host1x/job.c | 603 ++++++++++++++++++++++++++++ drivers/gpu/host1x/job.h | 162 ++++++++ drivers/gpu/host1x/syncpt.c | 11 + drivers/gpu/host1x/syncpt.h | 6 + include/trace/events/host1x.h | 192 +++++++++ 25 files changed, 2913 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/host1x/cdma.c create mode 100644 drivers/gpu/host1x/cdma.h create mode 100644 drivers/gpu/host1x/channel.c create mode 100644 drivers/gpu/host1x/channel.h create mode 100644 drivers/gpu/host1x/host1x.h create mode 100644 drivers/gpu/host1x/host1x_bo.h create mode 100644 drivers/gpu/host1x/hw/cdma_hw.c create mode 100644 drivers/gpu/host1x/hw/channel_hw.c create mode 100644 drivers/gpu/host1x/hw/hw_host1x01_channel.h create mode 100644 drivers/gpu/host1x/hw/hw_host1x01_uclass.h create mode 100644 drivers/gpu/host1x/job.c create mode 100644 drivers/gpu/host1x/job.h (limited to 'drivers/gpu/host1x/hw/host1x01.c') diff --git a/drivers/gpu/host1x/Kconfig b/drivers/gpu/host1x/Kconfig index 567ae8b7c7c9..7d6bed222542 100644 --- a/drivers/gpu/host1x/Kconfig +++ b/drivers/gpu/host1x/Kconfig @@ -8,3 +8,15 @@ config TEGRA_HOST1X Tegra's graphics- and multimedia-related modules. The modules served by host1x are referred to as clients. host1x includes some other functionality, such as synchronization. + +if TEGRA_HOST1X + +config TEGRA_HOST1X_FIREWALL + bool "Enable HOST1X security firewall" + default y + help + Say yes if kernel should protect command streams from tampering. + + If unsure, choose Y. + +endif diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile index 5ef47ffce674..06a995b60902 100644 --- a/drivers/gpu/host1x/Makefile +++ b/drivers/gpu/host1x/Makefile @@ -4,6 +4,9 @@ host1x-y = \ syncpt.o \ dev.o \ intr.o \ + cdma.o \ + channel.o \ + job.o \ hw/host1x01.o obj-$(CONFIG_TEGRA_HOST1X) += host1x.o diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c new file mode 100644 index 000000000000..33935de91bb1 --- /dev/null +++ b/drivers/gpu/host1x/cdma.c @@ -0,0 +1,487 @@ +/* + * Tegra host1x Command DMA + * + * Copyright (c) 2010-2013, NVIDIA Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cdma.h" +#include "channel.h" +#include "dev.h" +#include "debug.h" +#include "host1x_bo.h" +#include "job.h" + +/* + * push_buffer + * + * The push buffer is a circular array of words to be fetched by command DMA. + * Note that it works slightly differently to the sync queue; fence == pos + * means that the push buffer is full, not empty. + */ + +#define HOST1X_PUSHBUFFER_SLOTS 512 + +/* + * Clean up push buffer resources + */ +static void host1x_pushbuffer_destroy(struct push_buffer *pb) +{ + struct host1x_cdma *cdma = pb_to_cdma(pb); + struct host1x *host1x = cdma_to_host1x(cdma); + + if (pb->phys != 0) + dma_free_writecombine(host1x->dev, pb->size_bytes + 4, + pb->mapped, pb->phys); + + pb->mapped = NULL; + pb->phys = 0; +} + +/* + * Init push buffer resources + */ +static int host1x_pushbuffer_init(struct push_buffer *pb) +{ + struct host1x_cdma *cdma = pb_to_cdma(pb); + struct host1x *host1x = cdma_to_host1x(cdma); + + pb->mapped = NULL; + pb->phys = 0; + pb->size_bytes = HOST1X_PUSHBUFFER_SLOTS * 8; + + /* initialize buffer pointers */ + pb->fence = pb->size_bytes - 8; + pb->pos = 0; + + /* allocate and map pushbuffer memory */ + pb->mapped = dma_alloc_writecombine(host1x->dev, pb->size_bytes + 4, + &pb->phys, GFP_KERNEL); + if (!pb->mapped) + goto fail; + + host1x_hw_pushbuffer_init(host1x, pb); + + return 0; + +fail: + host1x_pushbuffer_destroy(pb); + return -ENOMEM; +} + +/* + * Push two words to the push buffer + * Caller must ensure push buffer is not full + */ +static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2) +{ + u32 pos = pb->pos; + u32 *p = (u32 *)((u32)pb->mapped + pos); + WARN_ON(pos == pb->fence); + *(p++) = op1; + *(p++) = op2; + pb->pos = (pos + 8) & (pb->size_bytes - 1); +} + +/* + * Pop a number of two word slots from the push buffer + * Caller must ensure push buffer is not empty + */ +static void host1x_pushbuffer_pop(struct push_buffer *pb, unsigned int slots) +{ + /* Advance the next write position */ + pb->fence = (pb->fence + slots * 8) & (pb->size_bytes - 1); +} + +/* + * Return the number of two word slots free in the push buffer + */ +static u32 host1x_pushbuffer_space(struct push_buffer *pb) +{ + return ((pb->fence - pb->pos) & (pb->size_bytes - 1)) / 8; +} + +/* + * Sleep (if necessary) until the requested event happens + * - CDMA_EVENT_SYNC_QUEUE_EMPTY : sync queue is completely empty. + * - Returns 1 + * - CDMA_EVENT_PUSH_BUFFER_SPACE : there is space in the push buffer + * - Return the amount of space (> 0) + * Must be called with the cdma lock held. + */ +unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma, + enum cdma_event event) +{ + for (;;) { + unsigned int space; + + if (event == CDMA_EVENT_SYNC_QUEUE_EMPTY) + space = list_empty(&cdma->sync_queue) ? 1 : 0; + else if (event == CDMA_EVENT_PUSH_BUFFER_SPACE) { + struct push_buffer *pb = &cdma->push_buffer; + space = host1x_pushbuffer_space(pb); + } else { + WARN_ON(1); + return -EINVAL; + } + + if (space) + return space; + + trace_host1x_wait_cdma(dev_name(cdma_to_channel(cdma)->dev), + event); + + /* If somebody has managed to already start waiting, yield */ + if (cdma->event != CDMA_EVENT_NONE) { + mutex_unlock(&cdma->lock); + schedule(); + mutex_lock(&cdma->lock); + continue; + } + cdma->event = event; + + mutex_unlock(&cdma->lock); + down(&cdma->sem); + mutex_lock(&cdma->lock); + } + return 0; +} + +/* + * Start timer that tracks the time spent by the job. + * Must be called with the cdma lock held. + */ +static void cdma_start_timer_locked(struct host1x_cdma *cdma, + struct host1x_job *job) +{ + struct host1x *host = cdma_to_host1x(cdma); + + if (cdma->timeout.client) { + /* timer already started */ + return; + } + + cdma->timeout.client = job->client; + cdma->timeout.syncpt = host1x_syncpt_get(host, job->syncpt_id); + cdma->timeout.syncpt_val = job->syncpt_end; + cdma->timeout.start_ktime = ktime_get(); + + schedule_delayed_work(&cdma->timeout.wq, + msecs_to_jiffies(job->timeout)); +} + +/* + * Stop timer when a buffer submission completes. + * Must be called with the cdma lock held. + */ +static void stop_cdma_timer_locked(struct host1x_cdma *cdma) +{ + cancel_delayed_work(&cdma->timeout.wq); + cdma->timeout.client = 0; +} + +/* + * For all sync queue entries that have already finished according to the + * current sync point registers: + * - unpin & unref their mems + * - pop their push buffer slots + * - remove them from the sync queue + * This is normally called from the host code's worker thread, but can be + * called manually if necessary. + * Must be called with the cdma lock held. + */ +static void update_cdma_locked(struct host1x_cdma *cdma) +{ + bool signal = false; + struct host1x *host1x = cdma_to_host1x(cdma); + struct host1x_job *job, *n; + + /* If CDMA is stopped, queue is cleared and we can return */ + if (!cdma->running) + return; + + /* + * Walk the sync queue, reading the sync point registers as necessary, + * to consume as many sync queue entries as possible without blocking + */ + list_for_each_entry_safe(job, n, &cdma->sync_queue, list) { + struct host1x_syncpt *sp = + host1x_syncpt_get(host1x, job->syncpt_id); + + /* Check whether this syncpt has completed, and bail if not */ + if (!host1x_syncpt_is_expired(sp, job->syncpt_end)) { + /* Start timer on next pending syncpt */ + if (job->timeout) + cdma_start_timer_locked(cdma, job); + break; + } + + /* Cancel timeout, when a buffer completes */ + if (cdma->timeout.client) + stop_cdma_timer_locked(cdma); + + /* Unpin the memory */ + host1x_job_unpin(job); + + /* Pop push buffer slots */ + if (job->num_slots) { + struct push_buffer *pb = &cdma->push_buffer; + host1x_pushbuffer_pop(pb, job->num_slots); + if (cdma->event == CDMA_EVENT_PUSH_BUFFER_SPACE) + signal = true; + } + + list_del(&job->list); + host1x_job_put(job); + } + + if (cdma->event == CDMA_EVENT_SYNC_QUEUE_EMPTY && + list_empty(&cdma->sync_queue)) + signal = true; + + if (signal) { + cdma->event = CDMA_EVENT_NONE; + up(&cdma->sem); + } +} + +void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma, + struct device *dev) +{ + u32 restart_addr; + u32 syncpt_incrs; + struct host1x_job *job = NULL; + u32 syncpt_val; + struct host1x *host1x = cdma_to_host1x(cdma); + + syncpt_val = host1x_syncpt_load(cdma->timeout.syncpt); + + dev_dbg(dev, "%s: starting cleanup (thresh %d)\n", + __func__, syncpt_val); + + /* + * Move the sync_queue read pointer to the first entry that hasn't + * completed based on the current HW syncpt value. It's likely there + * won't be any (i.e. we're still at the head), but covers the case + * where a syncpt incr happens just prior/during the teardown. + */ + + dev_dbg(dev, "%s: skip completed buffers still in sync_queue\n", + __func__); + + list_for_each_entry(job, &cdma->sync_queue, list) { + if (syncpt_val < job->syncpt_end) + break; + + host1x_job_dump(dev, job); + } + + /* + * Walk the sync_queue, first incrementing with the CPU syncpts that + * are partially executed (the first buffer) or fully skipped while + * still in the current context (slots are also NOP-ed). + * + * At the point contexts are interleaved, syncpt increments must be + * done inline with the pushbuffer from a GATHER buffer to maintain + * the order (slots are modified to be a GATHER of syncpt incrs). + * + * Note: save in restart_addr the location where the timed out buffer + * started in the PB, so we can start the refetch from there (with the + * modified NOP-ed PB slots). This lets things appear to have completed + * properly for this buffer and resources are freed. + */ + + dev_dbg(dev, "%s: perform CPU incr on pending same ctx buffers\n", + __func__); + + if (!list_empty(&cdma->sync_queue)) + restart_addr = job->first_get; + else + restart_addr = cdma->last_pos; + + /* do CPU increments as long as this context continues */ + list_for_each_entry_from(job, &cdma->sync_queue, list) { + /* different context, gets us out of this loop */ + if (job->client != cdma->timeout.client) + break; + + /* won't need a timeout when replayed */ + job->timeout = 0; + + syncpt_incrs = job->syncpt_end - syncpt_val; + dev_dbg(dev, "%s: CPU incr (%d)\n", __func__, syncpt_incrs); + + host1x_job_dump(dev, job); + + /* safe to use CPU to incr syncpts */ + host1x_hw_cdma_timeout_cpu_incr(host1x, cdma, job->first_get, + syncpt_incrs, job->syncpt_end, + job->num_slots); + + syncpt_val += syncpt_incrs; + } + + /* The following sumbits from the same client may be dependent on the + * failed submit and therefore they may fail. Force a small timeout + * to make the queue cleanup faster */ + + list_for_each_entry_from(job, &cdma->sync_queue, list) + if (job->client == cdma->timeout.client) + job->timeout = min_t(unsigned int, job->timeout, 500); + + dev_dbg(dev, "%s: finished sync_queue modification\n", __func__); + + /* roll back DMAGET and start up channel again */ + host1x_hw_cdma_resume(host1x, cdma, restart_addr); +} + +/* + * Create a cdma + */ +int host1x_cdma_init(struct host1x_cdma *cdma) +{ + int err; + + mutex_init(&cdma->lock); + sema_init(&cdma->sem, 0); + + INIT_LIST_HEAD(&cdma->sync_queue); + + cdma->event = CDMA_EVENT_NONE; + cdma->running = false; + cdma->torndown = false; + + err = host1x_pushbuffer_init(&cdma->push_buffer); + if (err) + return err; + return 0; +} + +/* + * Destroy a cdma + */ +int host1x_cdma_deinit(struct host1x_cdma *cdma) +{ + struct push_buffer *pb = &cdma->push_buffer; + struct host1x *host1x = cdma_to_host1x(cdma); + + if (cdma->running) { + pr_warn("%s: CDMA still running\n", __func__); + return -EBUSY; + } + + host1x_pushbuffer_destroy(pb); + host1x_hw_cdma_timeout_destroy(host1x, cdma); + + return 0; +} + +/* + * Begin a cdma submit + */ +int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job) +{ + struct host1x *host1x = cdma_to_host1x(cdma); + + mutex_lock(&cdma->lock); + + if (job->timeout) { + /* init state on first submit with timeout value */ + if (!cdma->timeout.initialized) { + int err; + err = host1x_hw_cdma_timeout_init(host1x, cdma, + job->syncpt_id); + if (err) { + mutex_unlock(&cdma->lock); + return err; + } + } + } + if (!cdma->running) + host1x_hw_cdma_start(host1x, cdma); + + cdma->slots_free = 0; + cdma->slots_used = 0; + cdma->first_get = cdma->push_buffer.pos; + + trace_host1x_cdma_begin(dev_name(job->channel->dev)); + return 0; +} + +/* + * Push two words into a push buffer slot + * Blocks as necessary if the push buffer is full. + */ +void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2) +{ + struct host1x *host1x = cdma_to_host1x(cdma); + struct push_buffer *pb = &cdma->push_buffer; + u32 slots_free = cdma->slots_free; + + if (slots_free == 0) { + host1x_hw_cdma_flush(host1x, cdma); + slots_free = host1x_cdma_wait_locked(cdma, + CDMA_EVENT_PUSH_BUFFER_SPACE); + } + cdma->slots_free = slots_free - 1; + cdma->slots_used++; + host1x_pushbuffer_push(pb, op1, op2); +} + +/* + * End a cdma submit + * Kick off DMA, add job to the sync queue, and a number of slots to be freed + * from the pushbuffer. The handles for a submit must all be pinned at the same + * time, but they can be unpinned in smaller chunks. + */ +void host1x_cdma_end(struct host1x_cdma *cdma, + struct host1x_job *job) +{ + struct host1x *host1x = cdma_to_host1x(cdma); + bool idle = list_empty(&cdma->sync_queue); + + host1x_hw_cdma_flush(host1x, cdma); + + job->first_get = cdma->first_get; + job->num_slots = cdma->slots_used; + host1x_job_get(job); + list_add_tail(&job->list, &cdma->sync_queue); + + /* start timer on idle -> active transitions */ + if (job->timeout && idle) + cdma_start_timer_locked(cdma, job); + + trace_host1x_cdma_end(dev_name(job->channel->dev)); + mutex_unlock(&cdma->lock); +} + +/* + * Update cdma state according to current sync point values + */ +void host1x_cdma_update(struct host1x_cdma *cdma) +{ + mutex_lock(&cdma->lock); + update_cdma_locked(cdma); + mutex_unlock(&cdma->lock); +} diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h new file mode 100644 index 000000000000..313c4b784348 --- /dev/null +++ b/drivers/gpu/host1x/cdma.h @@ -0,0 +1,100 @@ +/* + * Tegra host1x Command DMA + * + * Copyright (c) 2010-2013, NVIDIA Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __HOST1X_CDMA_H +#define __HOST1X_CDMA_H + +#include +#include +#include + +struct host1x_syncpt; +struct host1x_userctx_timeout; +struct host1x_job; + +/* + * cdma + * + * This is in charge of a host command DMA channel. + * Sends ops to a push buffer, and takes responsibility for unpinning + * (& possibly freeing) of memory after those ops have completed. + * Producer: + * begin + * push - send ops to the push buffer + * end - start command DMA and enqueue handles to be unpinned + * Consumer: + * update - call to update sync queue and push buffer, unpin memory + */ + +struct push_buffer { + u32 *mapped; /* mapped pushbuffer memory */ + dma_addr_t phys; /* physical address of pushbuffer */ + u32 fence; /* index we've written */ + u32 pos; /* index to write to */ + u32 size_bytes; +}; + +struct buffer_timeout { + struct delayed_work wq; /* work queue */ + bool initialized; /* timer one-time setup flag */ + struct host1x_syncpt *syncpt; /* buffer completion syncpt */ + u32 syncpt_val; /* syncpt value when completed */ + ktime_t start_ktime; /* starting time */ + /* context timeout information */ + int client; +}; + +enum cdma_event { + CDMA_EVENT_NONE, /* not waiting for any event */ + CDMA_EVENT_SYNC_QUEUE_EMPTY, /* wait for empty sync queue */ + CDMA_EVENT_PUSH_BUFFER_SPACE /* wait for space in push buffer */ +}; + +struct host1x_cdma { + struct mutex lock; /* controls access to shared state */ + struct semaphore sem; /* signalled when event occurs */ + enum cdma_event event; /* event that sem is waiting for */ + unsigned int slots_used; /* pb slots used in current submit */ + unsigned int slots_free; /* pb slots free in current submit */ + unsigned int first_get; /* DMAGET value, where submit begins */ + unsigned int last_pos; /* last value written to DMAPUT */ + struct push_buffer push_buffer; /* channel's push buffer */ + struct list_head sync_queue; /* job queue */ + struct buffer_timeout timeout; /* channel's timeout state/wq */ + bool running; + bool torndown; +}; + +#define cdma_to_channel(cdma) container_of(cdma, struct host1x_channel, cdma) +#define cdma_to_host1x(cdma) dev_get_drvdata(cdma_to_channel(cdma)->dev->parent) +#define pb_to_cdma(pb) container_of(pb, struct host1x_cdma, push_buffer) + +int host1x_cdma_init(struct host1x_cdma *cdma); +int host1x_cdma_deinit(struct host1x_cdma *cdma); +void host1x_cdma_stop(struct host1x_cdma *cdma); +int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job); +void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2); +void host1x_cdma_end(struct host1x_cdma *cdma, struct host1x_job *job); +void host1x_cdma_update(struct host1x_cdma *cdma); +void host1x_cdma_peek(struct host1x_cdma *cdma, u32 dmaget, int slot, + u32 *out); +unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma, + enum cdma_event event); +void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma, + struct device *dev); +#endif diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c new file mode 100644 index 000000000000..83ea51b9f0fc --- /dev/null +++ b/drivers/gpu/host1x/channel.c @@ -0,0 +1,126 @@ +/* + * Tegra host1x Channel + * + * Copyright (c) 2010-2013, NVIDIA Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include "channel.h" +#include "dev.h" +#include "job.h" + +/* Constructor for the host1x device list */ +int host1x_channel_list_init(struct host1x *host) +{ + INIT_LIST_HEAD(&host->chlist.list); + mutex_init(&host->chlist_mutex); + + if (host->info->nb_channels > BITS_PER_LONG) { + WARN(1, "host1x hardware has more channels than supported by the driver\n"); + return -ENOSYS; + } + + return 0; +} + +int host1x_job_submit(struct host1x_job *job) +{ + struct host1x *host = dev_get_drvdata(job->channel->dev->parent); + + return host1x_hw_channel_submit(host, job); +} + +struct host1x_channel *host1x_channel_get(struct host1x_channel *channel) +{ + int err = 0; + + mutex_lock(&channel->reflock); + + if (channel->refcount == 0) + err = host1x_cdma_init(&channel->cdma); + + if (!err) + channel->refcount++; + + mutex_unlock(&channel->reflock); + + return err ? NULL : channel; +} + +void host1x_channel_put(struct host1x_channel *channel) +{ + mutex_lock(&channel->reflock); + + if (channel->refcount == 1) { + struct host1x *host = dev_get_drvdata(channel->dev->parent); + + host1x_hw_cdma_stop(host, &channel->cdma); + host1x_cdma_deinit(&channel->cdma); + } + + channel->refcount--; + + mutex_unlock(&channel->reflock); +} + +struct host1x_channel *host1x_channel_request(struct device *dev) +{ + struct host1x *host = dev_get_drvdata(dev->parent); + int max_channels = host->info->nb_channels; + struct host1x_channel *channel = NULL; + int index, err; + + mutex_lock(&host->chlist_mutex); + + index = find_first_zero_bit(&host->allocated_channels, max_channels); + if (index >= max_channels) + goto fail; + + channel = kzalloc(sizeof(*channel), GFP_KERNEL); + if (!channel) + goto fail; + + err = host1x_hw_channel_init(host, channel, index); + if (err < 0) + goto fail; + + /* Link device to host1x_channel */ + channel->dev = dev; + + /* Add to channel list */ + list_add_tail(&channel->list, &host->chlist.list); + + host->allocated_channels |= BIT(index); + + mutex_unlock(&host->chlist_mutex); + return channel; + +fail: + dev_err(dev, "failed to init channel\n"); + kfree(channel); + mutex_unlock(&host->chlist_mutex); + return NULL; +} + +void host1x_channel_free(struct host1x_channel *channel) +{ + struct host1x *host = dev_get_drvdata(channel->dev->parent); + + host->allocated_channels &= ~BIT(channel->id); + list_del(&channel->list); + kfree(channel); +} diff --git a/drivers/gpu/host1x/channel.h b/drivers/gpu/host1x/channel.h new file mode 100644 index 000000000000..48723b8eea42 --- /dev/null +++ b/drivers/gpu/host1x/channel.h @@ -0,0 +1,52 @@ +/* + * Tegra host1x Channel + * + * Copyright (c) 2010-2013, NVIDIA Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __HOST1X_CHANNEL_H +#define __HOST1X_CHANNEL_H + +#include + +#include "cdma.h" + +struct host1x; + +struct host1x_channel { + struct list_head list; + + unsigned int refcount; + unsigned int id; + struct mutex reflock; + struct mutex submitlock; + void __iomem *regs; + struct device *dev; + struct host1x_cdma cdma; +}; + +/* channel list operations */ +int host1x_channel_list_init(struct host1x *host); + +struct host1x_channel *host1x_channel_request(struct device *dev); +void host1x_channel_free(struct host1x_channel *channel); +struct host1x_channel *host1x_channel_get(struct host1x_channel *channel); +void host1x_channel_put(struct host1x_channel *channel); +int host1x_job_submit(struct host1x_job *job); + +#define host1x_for_each_channel(host, channel) \ + list_for_each_entry(channel, &host->chlist.list, list) + +#endif diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index b967f6e8df55..4e522c532bc8 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -29,6 +29,7 @@ #include "dev.h" #include "intr.h" +#include "channel.h" #include "hw/host1x01.h" void host1x_sync_writel(struct host1x *host1x, u32 v, u32 r) @@ -45,6 +46,16 @@ u32 host1x_sync_readl(struct host1x *host1x, u32 r) return readl(sync_regs + r); } +void host1x_ch_writel(struct host1x_channel *ch, u32 v, u32 r) +{ + writel(v, ch->regs + r); +} + +u32 host1x_ch_readl(struct host1x_channel *ch, u32 r) +{ + return readl(ch->regs + r); +} + static const struct host1x_info host1x01_info = { .nb_channels = 8, .nb_pts = 32, @@ -112,6 +123,12 @@ static int host1x_probe(struct platform_device *pdev) return err; } + err = host1x_channel_list_init(host); + if (err) { + dev_err(&pdev->dev, "failed to initialize channel list\n"); + return err; + } + err = clk_prepare_enable(host->clk); if (err < 0) { dev_err(&pdev->dev, "failed to enable clock\n"); diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index caf9cc62eb17..1a9b4383dc3b 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -20,10 +20,39 @@ #include #include +#include "channel.h" #include "syncpt.h" #include "intr.h" +#include "cdma.h" +#include "job.h" struct host1x_syncpt; +struct host1x_channel; +struct host1x_cdma; +struct host1x_job; +struct push_buffer; + +struct host1x_channel_ops { + int (*init)(struct host1x_channel *channel, struct host1x *host, + unsigned int id); + int (*submit)(struct host1x_job *job); +}; + +struct host1x_cdma_ops { + void (*start)(struct host1x_cdma *cdma); + void (*stop)(struct host1x_cdma *cdma); + void (*flush)(struct host1x_cdma *cdma); + int (*timeout_init)(struct host1x_cdma *cdma, u32 syncpt_id); + void (*timeout_destroy)(struct host1x_cdma *cdma); + void (*freeze)(struct host1x_cdma *cdma); + void (*resume)(struct host1x_cdma *cdma, u32 getptr); + void (*timeout_cpu_incr)(struct host1x_cdma *cdma, u32 getptr, + u32 syncpt_incrs, u32 syncval, u32 nr_slots); +}; + +struct host1x_pushbuffer_ops { + void (*init)(struct push_buffer *pb); +}; struct host1x_syncpt_ops { void (*restore)(struct host1x_syncpt *syncpt); @@ -68,11 +97,22 @@ struct host1x { const struct host1x_syncpt_ops *syncpt_op; const struct host1x_intr_ops *intr_op; + const struct host1x_channel_ops *channel_op; + const struct host1x_cdma_ops *cdma_op; + const struct host1x_pushbuffer_ops *cdma_pb_op; + struct host1x_syncpt *nop_sp; + + struct mutex chlist_mutex; + struct host1x_channel chlist; + unsigned long allocated_channels; + unsigned int num_allocated_channels; }; void host1x_sync_writel(struct host1x *host1x, u32 r, u32 v); u32 host1x_sync_readl(struct host1x *host1x, u32 r); +void host1x_ch_writel(struct host1x_channel *ch, u32 r, u32 v); +u32 host1x_ch_readl(struct host1x_channel *ch, u32 r); static inline void host1x_hw_syncpt_restore(struct host1x *host, struct host1x_syncpt *sp) @@ -144,4 +184,77 @@ static inline int host1x_hw_intr_free_syncpt_irq(struct host1x *host) { return host->intr_op->free_syncpt_irq(host); } + +static inline int host1x_hw_channel_init(struct host1x *host, + struct host1x_channel *channel, + int chid) +{ + return host->channel_op->init(channel, host, chid); +} + +static inline int host1x_hw_channel_submit(struct host1x *host, + struct host1x_job *job) +{ + return host->channel_op->submit(job); +} + +static inline void host1x_hw_cdma_start(struct host1x *host, + struct host1x_cdma *cdma) +{ + host->cdma_op->start(cdma); +} + +static inline void host1x_hw_cdma_stop(struct host1x *host, + struct host1x_cdma *cdma) +{ + host->cdma_op->stop(cdma); +} + +static inline void host1x_hw_cdma_flush(struct host1x *host, + struct host1x_cdma *cdma) +{ + host->cdma_op->flush(cdma); +} + +static inline int host1x_hw_cdma_timeout_init(struct host1x *host, + struct host1x_cdma *cdma, + u32 syncpt_id) +{ + return host->cdma_op->timeout_init(cdma, syncpt_id); +} + +static inline void host1x_hw_cdma_timeout_destroy(struct host1x *host, + struct host1x_cdma *cdma) +{ + host->cdma_op->timeout_destroy(cdma); +} + +static inline void host1x_hw_cdma_freeze(struct host1x *host, + struct host1x_cdma *cdma) +{ + host->cdma_op->freeze(cdma); +} + +static inline void host1x_hw_cdma_resume(struct host1x *host, + struct host1x_cdma *cdma, u32 getptr) +{ + host->cdma_op->resume(cdma, getptr); +} + +static inline void host1x_hw_cdma_timeout_cpu_incr(struct host1x *host, + struct host1x_cdma *cdma, + u32 getptr, + u32 syncpt_incrs, + u32 syncval, u32 nr_slots) +{ + host->cdma_op->timeout_cpu_incr(cdma, getptr, syncpt_incrs, syncval, + nr_slots); +} + +static inline void host1x_hw_pushbuffer_init(struct host1x *host, + struct push_buffer *pb) +{ + host->cdma_pb_op->init(pb); +} + #endif diff --git a/drivers/gpu/host1x/host1x.h b/drivers/gpu/host1x/host1x.h new file mode 100644 index 000000000000..bca6563f6766 --- /dev/null +++ b/drivers/gpu/host1x/host1x.h @@ -0,0 +1,28 @@ +/* + * Tegra host1x driver + * + * Copyright (c) 2009-2013, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __LINUX_HOST1X_H +#define __LINUX_HOST1X_H + +enum host1x_class { + HOST1X_CLASS_HOST1X = 0x1 +}; + +#endif diff --git a/drivers/gpu/host1x/host1x_bo.h b/drivers/gpu/host1x/host1x_bo.h new file mode 100644 index 000000000000..4c1f10bd773d --- /dev/null +++ b/drivers/gpu/host1x/host1x_bo.h @@ -0,0 +1,87 @@ +/* + * Tegra host1x Memory Management Abstraction header + * + * Copyright (c) 2012-2013, NVIDIA Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef _HOST1X_BO_H +#define _HOST1X_BO_H + +struct host1x_bo; + +struct host1x_bo_ops { + struct host1x_bo *(*get)(struct host1x_bo *bo); + void (*put)(struct host1x_bo *bo); + dma_addr_t (*pin)(struct host1x_bo *bo, struct sg_table **sgt); + void (*unpin)(struct host1x_bo *bo, struct sg_table *sgt); + void *(*mmap)(struct host1x_bo *bo); + void (*munmap)(struct host1x_bo *bo, void *addr); + void *(*kmap)(struct host1x_bo *bo, unsigned int pagenum); + void (*kunmap)(struct host1x_bo *bo, unsigned int pagenum, void *addr); +}; + +struct host1x_bo { + const struct host1x_bo_ops *ops; +}; + +static inline void host1x_bo_init(struct host1x_bo *bo, + const struct host1x_bo_ops *ops) +{ + bo->ops = ops; +} + +static inline struct host1x_bo *host1x_bo_get(struct host1x_bo *bo) +{ + return bo->ops->get(bo); +} + +static inline void host1x_bo_put(struct host1x_bo *bo) +{ + bo->ops->put(bo); +} + +static inline dma_addr_t host1x_bo_pin(struct host1x_bo *bo, + struct sg_table **sgt) +{ + return bo->ops->pin(bo, sgt); +} + +static inline void host1x_bo_unpin(struct host1x_bo *bo, struct sg_table *sgt) +{ + bo->ops->unpin(bo, sgt); +} + +static inline void *host1x_bo_mmap(struct host1x_bo *bo) +{ + return bo->ops->mmap(bo); +} + +static inline void host1x_bo_munmap(struct host1x_bo *bo, void *addr) +{ + bo->ops->munmap(bo, addr); +} + +static inline void *host1x_bo_kmap(struct host1x_bo *bo, unsigned int pagenum) +{ + return bo->ops->kmap(bo, pagenum); +} + +static inline void host1x_bo_kunmap(struct host1x_bo *bo, + unsigned int pagenum, void *addr) +{ + bo->ops->kunmap(bo, pagenum, addr); +} + +#endif diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c new file mode 100644 index 000000000000..4eb22ef29776 --- /dev/null +++ b/drivers/gpu/host1x/hw/cdma_hw.c @@ -0,0 +1,324 @@ +/* + * Tegra host1x Command DMA + * + * Copyright (c) 2010-2013, NVIDIA Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +#include "cdma.h" +#include "channel.h" +#include "dev.h" +#include "debug.h" + +/* + * Put the restart at the end of pushbuffer memor + */ +static void push_buffer_init(struct push_buffer *pb) +{ + *(pb->mapped + (pb->size_bytes >> 2)) = host1x_opcode_restart(0); +} + +/* + * Increment timedout buffer's syncpt via CPU. + */ +static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr, + u32 syncpt_incrs, u32 syncval, u32 nr_slots) +{ + struct host1x *host1x = cdma_to_host1x(cdma); + struct push_buffer *pb = &cdma->push_buffer; + u32 i; + + for (i = 0; i < syncpt_incrs; i++) + host1x_syncpt_cpu_incr(cdma->timeout.syncpt); + + /* after CPU incr, ensure shadow is up to date */ + host1x_syncpt_load(cdma->timeout.syncpt); + + /* NOP all the PB slots */ + while (nr_slots--) { + u32 *p = (u32 *)((u32)pb->mapped + getptr); + *(p++) = HOST1X_OPCODE_NOP; + *(p++) = HOST1X_OPCODE_NOP; + dev_dbg(host1x->dev, "%s: NOP at 0x%x\n", __func__, + pb->phys + getptr); + getptr = (getptr + 8) & (pb->size_bytes - 1); + } + wmb(); +} + +/* + * Start channel DMA + */ +static void cdma_start(struct host1x_cdma *cdma) +{ + struct host1x_channel *ch = cdma_to_channel(cdma); + + if (cdma->running) + return; + + cdma->last_pos = cdma->push_buffer.pos; + + host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP, + HOST1X_CHANNEL_DMACTRL); + + /* set base, put and end pointer */ + host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART); + host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT); + host1x_ch_writel(ch, cdma->push_buffer.phys + + cdma->push_buffer.size_bytes + 4, + HOST1X_CHANNEL_DMAEND); + + /* reset GET */ + host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP | + HOST1X_CHANNEL_DMACTRL_DMAGETRST | + HOST1X_CHANNEL_DMACTRL_DMAINITGET, + HOST1X_CHANNEL_DMACTRL); + + /* start the command DMA */ + host1x_ch_writel(ch, 0, HOST1X_CHANNEL_DMACTRL); + + cdma->running = true; +} + +/* + * Similar to cdma_start(), but rather than starting from an idle + * state (where DMA GET is set to DMA PUT), on a timeout we restore + * DMA GET from an explicit value (so DMA may again be pending). + */ +static void cdma_timeout_restart(struct host1x_cdma *cdma, u32 getptr) +{ + struct host1x *host1x = cdma_to_host1x(cdma); + struct host1x_channel *ch = cdma_to_channel(cdma); + + if (cdma->running) + return; + + cdma->last_pos = cdma->push_buffer.pos; + + host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP, + HOST1X_CHANNEL_DMACTRL); + + /* set base, end pointer (all of memory) */ + host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART); + host1x_ch_writel(ch, cdma->push_buffer.phys + + cdma->push_buffer.size_bytes, + HOST1X_CHANNEL_DMAEND); + + /* set GET, by loading the value in PUT (then reset GET) */ + host1x_ch_writel(ch, getptr, HOST1X_CHANNEL_DMAPUT); + host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP | + HOST1X_CHANNEL_DMACTRL_DMAGETRST | + HOST1X_CHANNEL_DMACTRL_DMAINITGET, + HOST1X_CHANNEL_DMACTRL); + + dev_dbg(host1x->dev, + "%s: DMA GET 0x%x, PUT HW 0x%x / shadow 0x%x\n", __func__, + host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET), + host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT), + cdma->last_pos); + + /* deassert GET reset and set PUT */ + host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP, + HOST1X_CHANNEL_DMACTRL); + host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT); + + /* start the command DMA */ + host1x_ch_writel(ch, 0, HOST1X_CHANNEL_DMACTRL); + + cdma->running = true; +} + +/* + * Kick channel DMA into action by writing its PUT offset (if it has changed) + */ +static void cdma_flush(struct host1x_cdma *cdma) +{ + struct host1x_channel *ch = cdma_to_channel(cdma); + + if (cdma->push_buffer.pos != cdma->last_pos) { + host1x_ch_writel(ch, cdma->push_buffer.pos, + HOST1X_CHANNEL_DMAPUT); + cdma->last_pos = cdma->push_buffer.pos; + } +} + +static void cdma_stop(struct host1x_cdma *cdma) +{ + struct host1x_channel *ch = cdma_to_channel(cdma); + + mutex_lock(&cdma->lock); + if (cdma->running) { + host1x_cdma_wait_locked(cdma, CDMA_EVENT_SYNC_QUEUE_EMPTY); + host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP, + HOST1X_CHANNEL_DMACTRL); + cdma->running = false; + } + mutex_unlock(&cdma->lock); +} + +/* + * Stops both channel's command processor and CDMA immediately. + * Also, tears down the channel and resets corresponding module. + */ +static void cdma_freeze(struct host1x_cdma *cdma) +{ + struct host1x *host = cdma_to_host1x(cdma); + struct host1x_channel *ch = cdma_to_channel(cdma); + u32 cmdproc_stop; + + if (cdma->torndown && !cdma->running) { + dev_warn(host->dev, "Already torn down\n"); + return; + } + + dev_dbg(host->dev, "freezing channel (id %d)\n", ch->id); + + cmdproc_stop = host1x_sync_readl(host, HOST1X_SYNC_CMDPROC_STOP); + cmdproc_stop |= BIT(ch->id); + host1x_sync_writel(host, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP); + + dev_dbg(host->dev, "%s: DMA GET 0x%x, PUT HW 0x%x / shadow 0x%x\n", + __func__, host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET), + host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT), + cdma->last_pos); + + host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP, + HOST1X_CHANNEL_DMACTRL); + + host1x_sync_writel(host, BIT(ch->id), HOST1X_SYNC_CH_TEARDOWN); + + cdma->running = false; + cdma->torndown = true; +} + +static void cdma_resume(struct host1x_cdma *cdma, u32 getptr) +{ + struct host1x *host1x = cdma_to_host1x(cdma); + struct host1x_channel *ch = cdma_to_channel(cdma); + u32 cmdproc_stop; + + dev_dbg(host1x->dev, + "resuming channel (id %d, DMAGET restart = 0x%x)\n", + ch->id, getptr); + + cmdproc_stop = host1x_sync_readl(host1x, HOST1X_SYNC_CMDPROC_STOP); + cmdproc_stop &= ~(BIT(ch->id)); + host1x_sync_writel(host1x, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP); + + cdma->torndown = false; + cdma_timeout_restart(cdma, getptr); +} + +/* + * If this timeout fires, it indicates the current sync_queue entry has + * exceeded its TTL and the userctx should be timed out and remaining + * submits already issued cleaned up (future submits return an error). + */ +static void cdma_timeout_handler(struct work_struct *work) +{ + struct host1x_cdma *cdma; + struct host1x *host1x; + struct host1x_channel *ch; + + u32 syncpt_val; + + u32 prev_cmdproc, cmdproc_stop; + + cdma = container_of(to_delayed_work(work), struct host1x_cdma, + timeout.wq); + host1x = cdma_to_host1x(cdma); + ch = cdma_to_channel(cdma); + + mutex_lock(&cdma->lock); + + if (!cdma->timeout.client) { + dev_dbg(host1x->dev, + "cdma_timeout: expired, but has no clientid\n"); + mutex_unlock(&cdma->lock); + return; + } + + /* stop processing to get a clean snapshot */ + prev_cmdproc = host1x_sync_readl(host1x, HOST1X_SYNC_CMDPROC_STOP); + cmdproc_stop = prev_cmdproc | BIT(ch->id); + host1x_sync_writel(host1x, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP); + + dev_dbg(host1x->dev, "cdma_timeout: cmdproc was 0x%x is 0x%x\n", + prev_cmdproc, cmdproc_stop); + + syncpt_val = host1x_syncpt_load(cdma->timeout.syncpt); + + /* has buffer actually completed? */ + if ((s32)(syncpt_val - cdma->timeout.syncpt_val) >= 0) { + dev_dbg(host1x->dev, + "cdma_timeout: expired, but buffer had completed\n"); + /* restore */ + cmdproc_stop = prev_cmdproc & ~(BIT(ch->id)); + host1x_sync_writel(host1x, cmdproc_stop, + HOST1X_SYNC_CMDPROC_STOP); + mutex_unlock(&cdma->lock); + return; + } + + dev_warn(host1x->dev, "%s: timeout: %d (%s), HW thresh %d, done %d\n", + __func__, cdma->timeout.syncpt->id, cdma->timeout.syncpt->name, + syncpt_val, cdma->timeout.syncpt_val); + + /* stop HW, resetting channel/module */ + host1x_hw_cdma_freeze(host1x, cdma); + + host1x_cdma_update_sync_queue(cdma, ch->dev); + mutex_unlock(&cdma->lock); +} + +/* + * Init timeout resources + */ +static int cdma_timeout_init(struct host1x_cdma *cdma, u32 syncpt_id) +{ + INIT_DELAYED_WORK(&cdma->timeout.wq, cdma_timeout_handler); + cdma->timeout.initialized = true; + + return 0; +} + +/* + * Clean up timeout resources + */ +static void cdma_timeout_destroy(struct host1x_cdma *cdma) +{ + if (cdma->timeout.initialized) + cancel_delayed_work(&cdma->timeout.wq); + cdma->timeout.initialized = false; +} + +static const struct host1x_cdma_ops host1x_cdma_ops = { + .start = cdma_start, + .stop = cdma_stop, + .flush = cdma_flush, + + .timeout_init = cdma_timeout_init, + .timeout_destroy = cdma_timeout_destroy, + .freeze = cdma_freeze, + .resume = cdma_resume, + .timeout_cpu_incr = cdma_timeout_cpu_incr, +}; + +static const struct host1x_pushbuffer_ops host1x_pushbuffer_ops = { + .init = push_buffer_init, +}; diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c new file mode 100644 index 000000000000..5137a5604215 --- /dev/null +++ b/drivers/gpu/host1x/hw/channel_hw.c @@ -0,0 +1,143 @@ +/* + * Tegra host1x Channel + * + * Copyright (c) 2010-2013, NVIDIA Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include "host1x.h" +#include "host1x_bo.h" +#include "channel.h" +#include "dev.h" +#include "intr.h" +#include "job.h" + +#define HOST1X_CHANNEL_SIZE 16384 +#define TRACE_MAX_LENGTH 128U + +static void submit_gathers(struct host1x_job *job) +{ + struct host1x_cdma *cdma = &job->channel->cdma; + unsigned int i; + + for (i = 0; i < job->num_gathers; i++) { + struct host1x_job_gather *g = &job->gathers[i]; + u32 op1 = host1x_opcode_gather(g->words); + u32 op2 = g->base + g->offset; + host1x_cdma_push(cdma, op1, op2); + } +} + +static int channel_submit(struct host1x_job *job) +{ + struct host1x_channel *ch = job->channel; + struct host1x_syncpt *sp; + u32 user_syncpt_incrs = job->syncpt_incrs; + u32 prev_max = 0; + u32 syncval; + int err; + struct host1x_waitlist *completed_waiter = NULL; + struct host1x *host = dev_get_drvdata(ch->dev->parent); + + sp = host->syncpt + job->syncpt_id; + trace_host1x_channel_submit(dev_name(ch->dev), + job->num_gathers, job->num_relocs, + job->num_waitchk, job->syncpt_id, + job->syncpt_incrs); + + /* before error checks, return current max */ + prev_max = job->syncpt_end = host1x_syncpt_read_max(sp); + + /* get submit lock */ + err = mutex_lock_interruptible(&ch->submitlock); + if (err) + goto error; + + completed_waiter = kzalloc(sizeof(*completed_waiter), GFP_KERNEL); + if (!completed_waiter) { + mutex_unlock(&ch->submitlock); + err = -ENOMEM; + goto error; + } + + /* begin a CDMA submit */ + err = host1x_cdma_begin(&ch->cdma, job); + if (err) { + mutex_unlock(&ch->submitlock); + goto error; + } + + if (job->serialize) { + /* + * Force serialization by inserting a host wait for the + * previous job to finish before this one can commence. + */ + host1x_cdma_push(&ch->cdma, + host1x_opcode_setclass(HOST1X_CLASS_HOST1X, + host1x_uclass_wait_syncpt_r(), 1), + host1x_class_host_wait_syncpt(job->syncpt_id, + host1x_syncpt_read_max(sp))); + } + + syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs); + + job->syncpt_end = syncval; + + /* add a setclass for modules that require it */ + if (job->class) + host1x_cdma_push(&ch->cdma, + host1x_opcode_setclass(job->class, 0, 0), + HOST1X_OPCODE_NOP); + + submit_gathers(job); + + /* end CDMA submit & stash pinned hMems into sync queue */ + host1x_cdma_end(&ch->cdma, job); + + trace_host1x_channel_submitted(dev_name(ch->dev), prev_max, syncval); + + /* schedule a submit complete interrupt */ + err = host1x_intr_add_action(host, job->syncpt_id, syncval, + HOST1X_INTR_ACTION_SUBMIT_COMPLETE, ch, + completed_waiter, NULL); + completed_waiter = NULL; + WARN(err, "Failed to set submit complete interrupt"); + + mutex_unlock(&ch->submitlock); + + return 0; + +error: + kfree(completed_waiter); + return err; +} + +static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev, + unsigned int index) +{ + ch->id = index; + mutex_init(&ch->reflock); + mutex_init(&ch->submitlock); + + ch->regs = dev->regs + index * HOST1X_CHANNEL_SIZE; + return 0; +} + +static const struct host1x_channel_ops host1x_channel_ops = { + .init = host1x_channel_init, + .submit = channel_submit, +}; diff --git a/drivers/gpu/host1x/hw/host1x01.c b/drivers/gpu/host1x/hw/host1x01.c index f5c35fa66d05..013ff381d825 100644 --- a/drivers/gpu/host1x/hw/host1x01.c +++ b/drivers/gpu/host1x/hw/host1x01.c @@ -21,6 +21,8 @@ #include "hw/host1x01_hardware.h" /* include code */ +#include "hw/cdma_hw.c" +#include "hw/channel_hw.c" #include "hw/intr_hw.c" #include "hw/syncpt_hw.c" @@ -28,6 +30,9 @@ int host1x01_init(struct host1x *host) { + host->channel_op = &host1x_channel_ops; + host->cdma_op = &host1x_cdma_ops; + host->cdma_pb_op = &host1x_pushbuffer_ops; host->syncpt_op = &host1x_syncpt_ops; host->intr_op = &host1x_intr_ops; diff --git a/drivers/gpu/host1x/hw/host1x01_hardware.h b/drivers/gpu/host1x/hw/host1x01_hardware.h index 8cecbee7a270..5f0fb866efa8 100644 --- a/drivers/gpu/host1x/hw/host1x01_hardware.h +++ b/drivers/gpu/host1x/hw/host1x01_hardware.h @@ -22,6 +22,122 @@ #include #include +#include "hw_host1x01_channel.h" #include "hw_host1x01_sync.h" +#include "hw_host1x01_uclass.h" + +static inline u32 host1x_class_host_wait_syncpt( + unsigned indx, unsigned threshold) +{ + return host1x_uclass_wait_syncpt_indx_f(indx) + | host1x_uclass_wait_syncpt_thresh_f(threshold); +} + +static inline u32 host1x_class_host_load_syncpt_base( + unsigned indx, unsigned threshold) +{ + return host1x_uclass_load_syncpt_base_base_indx_f(indx) + | host1x_uclass_load_syncpt_base_value_f(threshold); +} + +static inline u32 host1x_class_host_wait_syncpt_base( + unsigned indx, unsigned base_indx, unsigned offset) +{ + return host1x_uclass_wait_syncpt_base_indx_f(indx) + | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx) + | host1x_uclass_wait_syncpt_base_offset_f(offset); +} + +static inline u32 host1x_class_host_incr_syncpt_base( + unsigned base_indx, unsigned offset) +{ + return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx) + | host1x_uclass_incr_syncpt_base_offset_f(offset); +} + +static inline u32 host1x_class_host_incr_syncpt( + unsigned cond, unsigned indx) +{ + return host1x_uclass_incr_syncpt_cond_f(cond) + | host1x_uclass_incr_syncpt_indx_f(indx); +} + +static inline u32 host1x_class_host_indoff_reg_write( + unsigned mod_id, unsigned offset, bool auto_inc) +{ + u32 v = host1x_uclass_indoff_indbe_f(0xf) + | host1x_uclass_indoff_indmodid_f(mod_id) + | host1x_uclass_indoff_indroffset_f(offset); + if (auto_inc) + v |= host1x_uclass_indoff_autoinc_f(1); + return v; +} + +static inline u32 host1x_class_host_indoff_reg_read( + unsigned mod_id, unsigned offset, bool auto_inc) +{ + u32 v = host1x_uclass_indoff_indmodid_f(mod_id) + | host1x_uclass_indoff_indroffset_f(offset) + | host1x_uclass_indoff_rwn_read_v(); + if (auto_inc) + v |= host1x_uclass_indoff_autoinc_f(1); + return v; +} + + +/* cdma opcodes */ +static inline u32 host1x_opcode_setclass( + unsigned class_id, unsigned offset, unsigned mask) +{ + return (0 << 28) | (offset << 16) | (class_id << 6) | mask; +} + +static inline u32 host1x_opcode_incr(unsigned offset, unsigned count) +{ + return (1 << 28) | (offset << 16) | count; +} + +static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count) +{ + return (2 << 28) | (offset << 16) | count; +} + +static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask) +{ + return (3 << 28) | (offset << 16) | mask; +} + +static inline u32 host1x_opcode_imm(unsigned offset, unsigned value) +{ + return (4 << 28) | (offset << 16) | value; +} + +static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx) +{ + return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(), + host1x_class_host_incr_syncpt(cond, indx)); +} + +static inline u32 host1x_opcode_restart(unsigned address) +{ + return (5 << 28) | (address >> 4); +} + +static inline u32 host1x_opcode_gather(unsigned count) +{ + return (6 << 28) | count; +} + +static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count) +{ + return (6 << 28) | (offset << 16) | BIT(15) | count; +} + +static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count) +{ + return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count; +} + +#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0) #endif diff --git a/drivers/gpu/host1x/hw/hw_host1x01_channel.h b/drivers/gpu/host1x/hw/hw_host1x01_channel.h new file mode 100644 index 000000000000..9ba133205668 --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x01_channel.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2012-2013, NVIDIA Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + + /* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ + +#ifndef __hw_host1x_channel_host1x_h__ +#define __hw_host1x_channel_host1x_h__ + +static inline u32 host1x_channel_dmastart_r(void) +{ + return 0x14; +} +#define HOST1X_CHANNEL_DMASTART \ + host1x_channel_dmastart_r() +static inline u32 host1x_channel_dmaput_r(void) +{ + return 0x18; +} +#define HOST1X_CHANNEL_DMAPUT \ + host1x_channel_dmaput_r() +static inline u32 host1x_channel_dmaget_r(void) +{ + return 0x1c; +} +#define HOST1X_CHANNEL_DMAGET \ + host1x_channel_dmaget_r() +static inline u32 host1x_channel_dmaend_r(void) +{ + return 0x20; +} +#define HOST1X_CHANNEL_DMAEND \ + host1x_channel_dmaend_r() +static inline u32 host1x_channel_dmactrl_r(void) +{ + return 0x24; +} +#define HOST1X_CHANNEL_DMACTRL \ + host1x_channel_dmactrl_r() +static inline u32 host1x_channel_dmactrl_dmastop(void) +{ + return 1 << 0; +} +#define HOST1X_CHANNEL_DMACTRL_DMASTOP \ + host1x_channel_dmactrl_dmastop() +static inline u32 host1x_channel_dmactrl_dmagetrst(void) +{ + return 1 << 1; +} +#define HOST1X_CHANNEL_DMACTRL_DMAGETRST \ + host1x_channel_dmactrl_dmagetrst() +static inline u32 host1x_channel_dmactrl_dmainitget(void) +{ + return 1 << 2; +} +#define HOST1X_CHANNEL_DMACTRL_DMAINITGET \ + host1x_channel_dmactrl_dmainitget() +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x01_sync.h b/drivers/gpu/host1x/hw/hw_host1x01_sync.h index eea0bb06052a..8f2a246c5426 100644 --- a/drivers/gpu/host1x/hw/hw_host1x01_sync.h +++ b/drivers/gpu/host1x/hw/hw_host1x01_sync.h @@ -77,6 +77,18 @@ static inline u32 host1x_sync_syncpt_thresh_int_enable_cpu0_r(unsigned int id) } #define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(id) \ host1x_sync_syncpt_thresh_int_enable_cpu0_r(id) +static inline u32 host1x_sync_cmdproc_stop_r(void) +{ + return 0xac; +} +#define HOST1X_SYNC_CMDPROC_STOP \ + host1x_sync_cmdproc_stop_r() +static inline u32 host1x_sync_ch_teardown_r(void) +{ + return 0xb0; +} +#define HOST1X_SYNC_CH_TEARDOWN \ + host1x_sync_ch_teardown_r() static inline u32 host1x_sync_usec_clk_r(void) { return 0x1a4; diff --git a/drivers/gpu/host1x/hw/hw_host1x01_uclass.h b/drivers/gpu/host1x/hw/hw_host1x01_uclass.h new file mode 100644 index 000000000000..7af660966ad6 --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x01_uclass.h @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2012-2013, NVIDIA Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + + /* + * Function naming determines intended use: + * + * _r(void) : Returns the offset for register . + * + * _w(void) : Returns the word offset for word (4 byte) element . + * + * __s(void) : Returns size of field of register in bits. + * + * __f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field of register . This value + * can be |'d with others to produce a full register value for + * register . + * + * __m(void) : Returns a mask for field of register . This + * value can be ~'d and then &'d to clear the value of field for + * register . + * + * ___f(void) : Returns the constant value after being shifted + * to place it at field of register . This value can be |'d + * with others to produce a full register value for . + * + * __v(u32 r) : Returns the value of field from a full register + * value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field of register . + * + * ___v(void) : Returns the constant value for defined for + * field of register . This value is suitable for direct + * comparison with unshifted values appropriate for use in field + * of register . + */ + +#ifndef __hw_host1x_uclass_host1x_h__ +#define __hw_host1x_uclass_host1x_h__ + +static inline u32 host1x_uclass_incr_syncpt_r(void) +{ + return 0x0; +} +#define HOST1X_UCLASS_INCR_SYNCPT \ + host1x_uclass_incr_syncpt_r() +static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v) +{ + return (v & 0xff) << 8; +} +#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \ + host1x_uclass_incr_syncpt_cond_f(v) +static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v) +{ + return (v & 0xff) << 0; +} +#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \ + host1x_uclass_incr_syncpt_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_r(void) +{ + return 0x8; +} +#define HOST1X_UCLASS_WAIT_SYNCPT \ + host1x_uclass_wait_syncpt_r() +static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \ + host1x_uclass_wait_syncpt_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \ + host1x_uclass_wait_syncpt_thresh_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \ + host1x_uclass_wait_syncpt_base_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 16; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_wait_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v) +{ + return (v & 0xffff) << 0; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \ + host1x_uclass_wait_syncpt_base_offset_f(v) +static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_load_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \ + host1x_uclass_load_syncpt_base_value_f(v) +static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_incr_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \ + host1x_uclass_incr_syncpt_base_offset_f(v) +static inline u32 host1x_uclass_indoff_r(void) +{ + return 0x2d; +} +#define HOST1X_UCLASS_INDOFF \ + host1x_uclass_indoff_r() +static inline u32 host1x_uclass_indoff_indbe_f(u32 v) +{ + return (v & 0xf) << 28; +} +#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \ + host1x_uclass_indoff_indbe_f(v) +static inline u32 host1x_uclass_indoff_autoinc_f(u32 v) +{ + return (v & 0x1) << 27; +} +#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \ + host1x_uclass_indoff_autoinc_f(v) +static inline u32 host1x_uclass_indoff_indmodid_f(u32 v) +{ + return (v & 0xff) << 18; +} +#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \ + host1x_uclass_indoff_indmodid_f(v) +static inline u32 host1x_uclass_indoff_indroffset_f(u32 v) +{ + return (v & 0xffff) << 2; +} +#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ + host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_indoff_rwn_read_v(void) +{ + return 1; +} +#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ + host1x_uclass_indoff_indroffset_f(v) +#endif diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c index 885b2578dc86..2c1f4af1094c 100644 --- a/drivers/gpu/host1x/hw/syncpt_hw.c +++ b/drivers/gpu/host1x/hw/syncpt_hw.c @@ -93,10 +93,21 @@ static void syncpt_cpu_incr(struct host1x_syncpt *sp) wmb(); } +/* remove a wait pointed to by patch_addr */ +static int syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr) +{ + u32 override = host1x_class_host_wait_syncpt( + HOST1X_SYNCPT_RESERVED, 0); + + *((u32 *)patch_addr) = override; + return 0; +} + static const struct host1x_syncpt_ops host1x_syncpt_ops = { .restore = syncpt_restore, .restore_wait_base = syncpt_restore_wait_base, .load_wait_base = syncpt_read_wait_base, .load = syncpt_load, .cpu_incr = syncpt_cpu_incr, + .patch_wait = syncpt_patch_wait, }; diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c index b1b5a80e3125..2491bf82e30c 100644 --- a/drivers/gpu/host1x/intr.c +++ b/drivers/gpu/host1x/intr.c @@ -21,6 +21,8 @@ #include #include +#include +#include "channel.h" #include "dev.h" #include "intr.h" @@ -66,7 +68,7 @@ static void remove_completed_waiters(struct list_head *head, u32 sync, struct list_head completed[HOST1X_INTR_ACTION_COUNT]) { struct list_head *dest; - struct host1x_waitlist *waiter, *next; + struct host1x_waitlist *waiter, *next, *prev; list_for_each_entry_safe(waiter, next, head, list) { if ((s32)(waiter->thresh - sync) > 0) @@ -74,6 +76,17 @@ static void remove_completed_waiters(struct list_head *head, u32 sync, dest = completed + waiter->action; + /* consolidate submit cleanups */ + if (waiter->action == HOST1X_INTR_ACTION_SUBMIT_COMPLETE && + !list_empty(dest)) { + prev = list_entry(dest->prev, + struct host1x_waitlist, list); + if (prev->data == waiter->data) { + prev->count++; + dest = NULL; + } + } + /* PENDING->REMOVED or CANCELLED->HANDLED */ if (atomic_inc_return(&waiter->state) == WLS_HANDLED || !dest) { list_del(&waiter->list); @@ -94,6 +107,18 @@ static void reset_threshold_interrupt(struct host1x *host, host1x_hw_intr_enable_syncpt_intr(host, id); } +static void action_submit_complete(struct host1x_waitlist *waiter) +{ + struct host1x_channel *channel = waiter->data; + + host1x_cdma_update(&channel->cdma); + + /* Add nr_completed to trace */ + trace_host1x_channel_submit_complete(dev_name(channel->dev), + waiter->count, waiter->thresh); + +} + static void action_wakeup(struct host1x_waitlist *waiter) { wait_queue_head_t *wq = waiter->data; @@ -109,6 +134,7 @@ static void action_wakeup_interruptible(struct host1x_waitlist *waiter) typedef void (*action_handler)(struct host1x_waitlist *waiter); static action_handler action_handlers[HOST1X_INTR_ACTION_COUNT] = { + action_submit_complete, action_wakeup, action_wakeup_interruptible, }; diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h index a3f06abe94bb..2b8adf016a05 100644 --- a/drivers/gpu/host1x/intr.h +++ b/drivers/gpu/host1x/intr.h @@ -25,6 +25,12 @@ struct host1x; enum host1x_intr_action { + /* + * Perform cleanup after a submit has completed. + * 'data' points to a channel + */ + HOST1X_INTR_ACTION_SUBMIT_COMPLETE = 0, + /* * Wake up a task. * 'data' points to a wait_queue_head_t diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c new file mode 100644 index 000000000000..f665d679031c --- /dev/null +++ b/drivers/gpu/host1x/job.c @@ -0,0 +1,603 @@ +/* + * Tegra host1x Job + * + * Copyright (c) 2010-2013, NVIDIA Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "channel.h" +#include "dev.h" +#include "host1x_bo.h" +#include "job.h" +#include "syncpt.h" + +struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, + u32 num_cmdbufs, u32 num_relocs, + u32 num_waitchks) +{ + struct host1x_job *job = NULL; + unsigned int num_unpins = num_cmdbufs + num_relocs; + u64 total; + void *mem; + + /* Check that we're not going to overflow */ + total = sizeof(struct host1x_job) + + num_relocs * sizeof(struct host1x_reloc) + + num_unpins * sizeof(struct host1x_job_unpin_data) + + num_waitchks * sizeof(struct host1x_waitchk) + + num_cmdbufs * sizeof(struct host1x_job_gather) + + num_unpins * sizeof(dma_addr_t) + + num_unpins * sizeof(u32 *); + if (total > ULONG_MAX) + return NULL; + + mem = job = kzalloc(total, GFP_KERNEL); + if (!job) + return NULL; + + kref_init(&job->ref); + job->channel = ch; + + /* Redistribute memory to the structs */ + mem += sizeof(struct host1x_job); + job->relocarray = num_relocs ? mem : NULL; + mem += num_relocs * sizeof(struct host1x_reloc); + job->unpins = num_unpins ? mem : NULL; + mem += num_unpins * sizeof(struct host1x_job_unpin_data); + job->waitchk = num_waitchks ? mem : NULL; + mem += num_waitchks * sizeof(struct host1x_waitchk); + job->gathers = num_cmdbufs ? mem : NULL; + mem += num_cmdbufs * sizeof(struct host1x_job_gather); + job->addr_phys = num_unpins ? mem : NULL; + + job->reloc_addr_phys = job->addr_phys; + job->gather_addr_phys = &job->addr_phys[num_relocs]; + + return job; +} + +struct host1x_job *host1x_job_get(struct host1x_job *job) +{ + kref_get(&job->ref); + return job; +} + +static void job_free(struct kref *ref) +{ + struct host1x_job *job = container_of(ref, struct host1x_job, ref); + + kfree(job); +} + +void host1x_job_put(struct host1x_job *job) +{ + kref_put(&job->ref, job_free); +} + +void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo, + u32 words, u32 offset) +{ + struct host1x_job_gather *cur_gather = &job->gathers[job->num_gathers]; + + cur_gather->words = words; + cur_gather->bo = bo; + cur_gather->offset = offset; + job->num_gathers++; +} + +/* + * NULL an already satisfied WAIT_SYNCPT host method, by patching its + * args in the command stream. The method data is changed to reference + * a reserved (never given out or incr) HOST1X_SYNCPT_RESERVED syncpt + * with a matching threshold value of 0, so is guaranteed to be popped + * by the host HW. + */ +static void host1x_syncpt_patch_offset(struct host1x_syncpt *sp, + struct host1x_bo *h, u32 offset) +{ + void *patch_addr = NULL; + + /* patch the wait */ + patch_addr = host1x_bo_kmap(h, offset >> PAGE_SHIFT); + if (patch_addr) { + host1x_syncpt_patch_wait(sp, + patch_addr + (offset & ~PAGE_MASK)); + host1x_bo_kunmap(h, offset >> PAGE_SHIFT, patch_addr); + } else + pr_err("Could not map cmdbuf for wait check\n"); +} + +/* + * Check driver supplied waitchk structs for syncpt thresholds + * that have already been satisfied and NULL the comparison (to + * avoid a wrap condition in the HW). + */ +static int do_waitchks(struct host1x_job *job, struct host1x *host, + struct host1x_bo *patch) +{ + int i; + + /* compare syncpt vs wait threshold */ + for (i = 0; i < job->num_waitchk; i++) { + struct host1x_waitchk *wait = &job->waitchk[i]; + struct host1x_syncpt *sp = + host1x_syncpt_get(host, wait->syncpt_id); + + /* validate syncpt id */ + if (wait->syncpt_id > host1x_syncpt_nb_pts(host)) + continue; + + /* skip all other gathers */ + if (patch != wait->bo) + continue; + + trace_host1x_syncpt_wait_check(wait->bo, wait->offset, + wait->syncpt_id, wait->thresh, + host1x_syncpt_read_min(sp)); + + if (host1x_syncpt_is_expired(sp, wait->thresh)) { + dev_dbg(host->dev, + "drop WAIT id %d (%s) thresh 0x%x, min 0x%x\n", + wait->syncpt_id, sp->name, wait->thresh, + host1x_syncpt_read_min(sp)); + + host1x_syncpt_patch_offset(sp, patch, wait->offset); + } + + wait->bo = NULL; + } + + return 0; +} + +static unsigned int pin_job(struct host1x_job *job) +{ + unsigned int i; + + job->num_unpins = 0; + + for (i = 0; i < job->num_relocs; i++) { + struct host1x_reloc *reloc = &job->relocarray[i]; + struct sg_table *sgt; + dma_addr_t phys_addr; + + reloc->target = host1x_bo_get(reloc->target); + if (!reloc->target) + goto unpin; + + phys_addr = host1x_bo_pin(reloc->target, &sgt); + if (!phys_addr) + goto unpin; + + job->addr_phys[job->num_unpins] = phys_addr; + job->unpins[job->num_unpins].bo = reloc->target; + job->unpins[job->num_unpins].sgt = sgt; + job->num_unpins++; + } + + for (i = 0; i < job->num_gathers; i++) { + struct host1x_job_gather *g = &job->gathers[i]; + struct sg_table *sgt; + dma_addr_t phys_addr; + + g->bo = host1x_bo_get(g->bo); + if (!g->bo) + goto unpin; + + phys_addr = host1x_bo_pin(g->bo, &sgt); + if (!phys_addr) + goto unpin; + + job->addr_phys[job->num_unpins] = phys_addr; + job->unpins[job->num_unpins].bo = g->bo; + job->unpins[job->num_unpins].sgt = sgt; + job->num_unpins++; + } + + return job->num_unpins; + +unpin: + host1x_job_unpin(job); + return 0; +} + +static unsigned int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf) +{ + int i = 0; + u32 last_page = ~0; + void *cmdbuf_page_addr = NULL; + + /* pin & patch the relocs for one gather */ + while (i < job->num_relocs) { + struct host1x_reloc *reloc = &job->relocarray[i]; + u32 reloc_addr = (job->reloc_addr_phys[i] + + reloc->target_offset) >> reloc->shift; + u32 *target; + + /* skip all other gathers */ + if (!(reloc->cmdbuf && cmdbuf == reloc->cmdbuf)) { + i++; + continue; + } + + if (last_page != reloc->cmdbuf_offset >> PAGE_SHIFT) { + if (cmdbuf_page_addr) + host1x_bo_kunmap(cmdbuf, last_page, + cmdbuf_page_addr); + + cmdbuf_page_addr = host1x_bo_kmap(cmdbuf, + reloc->cmdbuf_offset >> PAGE_SHIFT); + last_page = reloc->cmdbuf_offset >> PAGE_SHIFT; + + if (unlikely(!cmdbuf_page_addr)) { + pr_err("Could not map cmdbuf for relocation\n"); + return -ENOMEM; + } + } + + target = cmdbuf_page_addr + (reloc->cmdbuf_offset & ~PAGE_MASK); + *target = reloc_addr; + + /* mark this gather as handled */ + reloc->cmdbuf = 0; + } + + if (cmdbuf_page_addr) + host1x_bo_kunmap(cmdbuf, last_page, cmdbuf_page_addr); + + return 0; +} + +static int check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf, + unsigned int offset) +{ + offset *= sizeof(u32); + + if (reloc->cmdbuf != cmdbuf || reloc->cmdbuf_offset != offset) + return -EINVAL; + + return 0; +} + +struct host1x_firewall { + struct host1x_job *job; + struct device *dev; + + unsigned int num_relocs; + struct host1x_reloc *reloc; + + struct host1x_bo *cmdbuf_id; + unsigned int offset; + + u32 words; + u32 class; + u32 reg; + u32 mask; + u32 count; +}; + +static int check_mask(struct host1x_firewall *fw) +{ + u32 mask = fw->mask; + u32 reg = fw->reg; + + while (mask) { + if (fw->words == 0) + return -EINVAL; + + if (mask & 1) { + if (fw->job->is_addr_reg(fw->dev, fw->class, reg)) { + bool bad_reloc = check_reloc(fw->reloc, + fw->cmdbuf_id, + fw->offset); + if (!fw->num_relocs || bad_reloc) + return -EINVAL; + fw->reloc++; + fw->num_relocs--; + } + fw->words--; + fw->offset++; + } + mask >>= 1; + reg++; + } + + return 0; +} + +static int check_incr(struct host1x_firewall *fw) +{ + u32 count = fw->count; + u32 reg = fw->reg; + + while (fw) { + if (fw->words == 0) + return -EINVAL; + + if (fw->job->is_addr_reg(fw->dev, fw->class, reg)) { + bool bad_reloc = check_reloc(fw->reloc, fw->cmdbuf_id, + fw->offset); + if (!fw->num_relocs || bad_reloc) + return -EINVAL; + fw->reloc++; + fw->num_relocs--; + } + reg++; + fw->words--; + fw->offset++; + count--; + } + + return 0; +} + +static int check_nonincr(struct host1x_firewall *fw) +{ + int is_addr_reg = fw->job->is_addr_reg(fw->dev, fw->class, fw->reg); + u32 count = fw->count; + + while (count) { + if (fw->words == 0) + return -EINVAL; + + if (is_addr_reg) { + bool bad_reloc = check_reloc(fw->reloc, fw->cmdbuf_id, + fw->offset); + if (!fw->num_relocs || bad_reloc) + return -EINVAL; + fw->reloc++; + fw->num_relocs--; + } + fw->words--; + fw->offset++; + count--; + } + + return 0; +} + +static int validate(struct host1x_job *job, struct device *dev, + struct host1x_job_gather *g) +{ + u32 *cmdbuf_base; + int err = 0; + struct host1x_firewall fw; + + fw.job = job; + fw.dev = dev; + fw.reloc = job->relocarray; + fw.num_relocs = job->num_relocs; + fw.cmdbuf_id = g->bo; + + fw.offset = 0; + fw.class = 0; + + if (!job->is_addr_reg) + return 0; + + cmdbuf_base = host1x_bo_mmap(g->bo); + if (!cmdbuf_base) + return -ENOMEM; + + fw.words = g->words; + while (fw.words && !err) { + u32 word = cmdbuf_base[fw.offset]; + u32 opcode = (word & 0xf0000000) >> 28; + + fw.mask = 0; + fw.reg = 0; + fw.count = 0; + fw.words--; + fw.offset++; + + switch (opcode) { + case 0: + fw.class = word >> 6 & 0x3ff; + fw.mask = word & 0x3f; + fw.reg = word >> 16 & 0xfff; + err = check_mask(&fw); + if (err) + goto out; + break; + case 1: + fw.reg = word >> 16 & 0xfff; + fw.count = word & 0xffff; + err = check_incr(&fw); + if (err) + goto out; + break; + + case 2: + fw.reg = word >> 16 & 0xfff; + fw.count = word & 0xffff; + err = check_nonincr(&fw); + if (err) + goto out; + break; + + case 3: + fw.mask = word & 0xffff; + fw.reg = word >> 16 & 0xfff; + err = check_mask(&fw); + if (err) + goto out; + break; + case 4: + case 5: + case 14: + break; + default: + err = -EINVAL; + break; + } + } + + /* No relocs should remain at this point */ + if (fw.num_relocs) + err = -EINVAL; + +out: + host1x_bo_munmap(g->bo, cmdbuf_base); + + return err; +} + +static inline int copy_gathers(struct host1x_job *job, struct device *dev) +{ + size_t size = 0; + size_t offset = 0; + int i; + + for (i = 0; i < job->num_gathers; i++) { + struct host1x_job_gather *g = &job->gathers[i]; + size += g->words * sizeof(u32); + } + + job->gather_copy_mapped = dma_alloc_writecombine(dev, size, + &job->gather_copy, + GFP_KERNEL); + if (!job->gather_copy_mapped) { + int err = PTR_ERR(job->gather_copy_mapped); + job->gather_copy_mapped = NULL; + return err; + } + + job->gather_copy_size = size; + + for (i = 0; i < job->num_gathers; i++) { + struct host1x_job_gather *g = &job->gathers[i]; + void *gather; + + gather = host1x_bo_mmap(g->bo); + memcpy(job->gather_copy_mapped + offset, gather + g->offset, + g->words * sizeof(u32)); + host1x_bo_munmap(g->bo, gather); + + g->base = job->gather_copy; + g->offset = offset; + g->bo = NULL; + + offset += g->words * sizeof(u32); + } + + return 0; +} + +int host1x_job_pin(struct host1x_job *job, struct device *dev) +{ + int err; + unsigned int i, j; + struct host1x *host = dev_get_drvdata(dev->parent); + DECLARE_BITMAP(waitchk_mask, host1x_syncpt_nb_pts(host)); + + bitmap_zero(waitchk_mask, host1x_syncpt_nb_pts(host)); + for (i = 0; i < job->num_waitchk; i++) { + u32 syncpt_id = job->waitchk[i].syncpt_id; + if (syncpt_id < host1x_syncpt_nb_pts(host)) + set_bit(syncpt_id, waitchk_mask); + } + + /* get current syncpt values for waitchk */ + for_each_set_bit(i, waitchk_mask, host1x_syncpt_nb_pts(host)) + host1x_syncpt_load(host->syncpt + i); + + /* pin memory */ + err = pin_job(job); + if (!err) + goto out; + + /* patch gathers */ + for (i = 0; i < job->num_gathers; i++) { + struct host1x_job_gather *g = &job->gathers[i]; + + /* process each gather mem only once */ + if (g->handled) + continue; + + g->base = job->gather_addr_phys[i]; + + for (j = 0; j < job->num_gathers; j++) + if (job->gathers[j].bo == g->bo) + job->gathers[j].handled = true; + + err = 0; + + if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) + err = validate(job, dev, g); + + if (err) + dev_err(dev, "Job invalid (err=%d)\n", err); + + if (!err) + err = do_relocs(job, g->bo); + + if (!err) + err = do_waitchks(job, host, g->bo); + + if (err) + break; + } + + if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && !err) { + err = copy_gathers(job, dev); + if (err) { + host1x_job_unpin(job); + return err; + } + } + +out: + wmb(); + + return err; +} + +void host1x_job_unpin(struct host1x_job *job) +{ + unsigned int i; + + for (i = 0; i < job->num_unpins; i++) { + struct host1x_job_unpin_data *unpin = &job->unpins[i]; + host1x_bo_unpin(unpin->bo, unpin->sgt); + host1x_bo_put(unpin->bo); + } + job->num_unpins = 0; + + if (job->gather_copy_size) + dma_free_writecombine(job->channel->dev, job->gather_copy_size, + job->gather_copy_mapped, + job->gather_copy); +} + +/* + * Debug routine used to dump job entries + */ +void host1x_job_dump(struct device *dev, struct host1x_job *job) +{ + dev_dbg(dev, " SYNCPT_ID %d\n", job->syncpt_id); + dev_dbg(dev, " SYNCPT_VAL %d\n", job->syncpt_end); + dev_dbg(dev, " FIRST_GET 0x%x\n", job->first_get); + dev_dbg(dev, " TIMEOUT %d\n", job->timeout); + dev_dbg(dev, " NUM_SLOTS %d\n", job->num_slots); + dev_dbg(dev, " NUM_HANDLES %d\n", job->num_unpins); +} diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h new file mode 100644 index 000000000000..fba45f20458e --- /dev/null +++ b/drivers/gpu/host1x/job.h @@ -0,0 +1,162 @@ +/* + * Tegra host1x Job + * + * Copyright (c) 2011-2013, NVIDIA Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __HOST1X_JOB_H +#define __HOST1X_JOB_H + +struct host1x_job_gather { + u32 words; + dma_addr_t base; + struct host1x_bo *bo; + int offset; + bool handled; +}; + +struct host1x_cmdbuf { + u32 handle; + u32 offset; + u32 words; + u32 pad; +}; + +struct host1x_reloc { + struct host1x_bo *cmdbuf; + u32 cmdbuf_offset; + struct host1x_bo *target; + u32 target_offset; + u32 shift; + u32 pad; +}; + +struct host1x_waitchk { + struct host1x_bo *bo; + u32 offset; + u32 syncpt_id; + u32 thresh; +}; + +struct host1x_job_unpin_data { + struct host1x_bo *bo; + struct sg_table *sgt; +}; + +/* + * Each submit is tracked as a host1x_job. + */ +struct host1x_job { + /* When refcount goes to zero, job can be freed */ + struct kref ref; + + /* List entry */ + struct list_head list; + + /* Channel where job is submitted to */ + struct host1x_channel *channel; + + u32 client; + + /* Gathers and their memory */ + struct host1x_job_gather *gathers; + unsigned int num_gathers; + + /* Wait checks to be processed at submit time */ + struct host1x_waitchk *waitchk; + unsigned int num_waitchk; + u32 waitchk_mask; + + /* Array of handles to be pinned & unpinned */ + struct host1x_reloc *relocarray; + unsigned int num_relocs; + struct host1x_job_unpin_data *unpins; + unsigned int num_unpins; + + dma_addr_t *addr_phys; + dma_addr_t *gather_addr_phys; + dma_addr_t *reloc_addr_phys; + + /* Sync point id, number of increments and end related to the submit */ + u32 syncpt_id; + u32 syncpt_incrs; + u32 syncpt_end; + + /* Maximum time to wait for this job */ + unsigned int timeout; + + /* Index and number of slots used in the push buffer */ + unsigned int first_get; + unsigned int num_slots; + + /* Copy of gathers */ + size_t gather_copy_size; + dma_addr_t gather_copy; + u8 *gather_copy_mapped; + + /* Check if register is marked as an address reg */ + int (*is_addr_reg)(struct device *dev, u32 reg, u32 class); + + /* Request a SETCLASS to this class */ + u32 class; + + /* Add a channel wait for previous ops to complete */ + bool serialize; +}; +/* + * Allocate memory for a job. Just enough memory will be allocated to + * accomodate the submit. + */ +struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, + u32 num_cmdbufs, u32 num_relocs, + u32 num_waitchks); + +/* + * Add a gather to a job. + */ +void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *mem_id, + u32 words, u32 offset); + +/* + * Increment reference going to host1x_job. + */ +struct host1x_job *host1x_job_get(struct host1x_job *job); + +/* + * Decrement reference job, free if goes to zero. + */ +void host1x_job_put(struct host1x_job *job); + +/* + * Pin memory related to job. This handles relocation of addresses to the + * host1x address space. Handles both the gather memory and any other memory + * referred to from the gather buffers. + * + * Handles also patching out host waits that would wait for an expired sync + * point value. + */ +int host1x_job_pin(struct host1x_job *job, struct device *dev); + +/* + * Unpin memory related to job. + */ +void host1x_job_unpin(struct host1x_job *job); + +/* + * Dump contents of job to debug output. + */ +void host1x_job_dump(struct device *dev, struct host1x_job *job); + +#endif diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index 07fad1412bab..7e77e63da57b 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -300,6 +300,12 @@ bool host1x_syncpt_is_expired(struct host1x_syncpt *sp, u32 thresh) return (s32)(current_val - thresh) >= 0; } +/* remove a wait pointed to by patch_addr */ +int host1x_syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr) +{ + return host1x_hw_syncpt_patch_wait(sp->host, sp, patch_addr); +} + int host1x_syncpt_init(struct host1x *host) { struct host1x_syncpt *syncpt; @@ -319,6 +325,11 @@ int host1x_syncpt_init(struct host1x *host) host1x_syncpt_restore(host); + /* Allocate sync point to use for clearing waits for expired fences */ + host->nop_sp = _host1x_syncpt_alloc(host, NULL, 0); + if (!host->nop_sp) + return -ENOMEM; + return 0; } diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h index 17c1616de100..c99806130f2e 100644 --- a/drivers/gpu/host1x/syncpt.h +++ b/drivers/gpu/host1x/syncpt.h @@ -27,6 +27,9 @@ struct host1x; +/* Reserved for replacing an expired wait with a NOP */ +#define HOST1X_SYNCPT_RESERVED 0 + struct host1x_syncpt { int id; atomic_t min_val; @@ -146,6 +149,9 @@ static inline int host1x_syncpt_is_valid(struct host1x_syncpt *sp) return sp->id < host1x_syncpt_nb_pts(sp->host); } +/* Patch a wait by replacing it with a wait for syncpt 0 value 0 */ +int host1x_syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr); + /* Return id of the sync point */ u32 host1x_syncpt_id(struct host1x_syncpt *sp); diff --git a/include/trace/events/host1x.h b/include/trace/events/host1x.h index 4bd2b0253d27..94db6a2c3540 100644 --- a/include/trace/events/host1x.h +++ b/include/trace/events/host1x.h @@ -37,6 +37,171 @@ DECLARE_EVENT_CLASS(host1x, TP_printk("name=%s", __entry->name) ); +DEFINE_EVENT(host1x, host1x_channel_open, + TP_PROTO(const char *name), + TP_ARGS(name) +); + +DEFINE_EVENT(host1x, host1x_channel_release, + TP_PROTO(const char *name), + TP_ARGS(name) +); + +DEFINE_EVENT(host1x, host1x_cdma_begin, + TP_PROTO(const char *name), + TP_ARGS(name) +); + +DEFINE_EVENT(host1x, host1x_cdma_end, + TP_PROTO(const char *name), + TP_ARGS(name) +); + +TRACE_EVENT(host1x_cdma_push, + TP_PROTO(const char *name, u32 op1, u32 op2), + + TP_ARGS(name, op1, op2), + + TP_STRUCT__entry( + __field(const char *, name) + __field(u32, op1) + __field(u32, op2) + ), + + TP_fast_assign( + __entry->name = name; + __entry->op1 = op1; + __entry->op2 = op2; + ), + + TP_printk("name=%s, op1=%08x, op2=%08x", + __entry->name, __entry->op1, __entry->op2) +); + +TRACE_EVENT(host1x_cdma_push_gather, + TP_PROTO(const char *name, u32 mem_id, + u32 words, u32 offset, void *cmdbuf), + + TP_ARGS(name, mem_id, words, offset, cmdbuf), + + TP_STRUCT__entry( + __field(const char *, name) + __field(u32, mem_id) + __field(u32, words) + __field(u32, offset) + __field(bool, cmdbuf) + __dynamic_array(u32, cmdbuf, words) + ), + + TP_fast_assign( + if (cmdbuf) { + memcpy(__get_dynamic_array(cmdbuf), cmdbuf+offset, + words * sizeof(u32)); + } + __entry->cmdbuf = cmdbuf; + __entry->name = name; + __entry->mem_id = mem_id; + __entry->words = words; + __entry->offset = offset; + ), + + TP_printk("name=%s, mem_id=%08x, words=%u, offset=%d, contents=[%s]", + __entry->name, __entry->mem_id, + __entry->words, __entry->offset, + __print_hex(__get_dynamic_array(cmdbuf), + __entry->cmdbuf ? __entry->words * 4 : 0)) +); + +TRACE_EVENT(host1x_channel_submit, + TP_PROTO(const char *name, u32 cmdbufs, u32 relocs, u32 waitchks, + u32 syncpt_id, u32 syncpt_incrs), + + TP_ARGS(name, cmdbufs, relocs, waitchks, syncpt_id, syncpt_incrs), + + TP_STRUCT__entry( + __field(const char *, name) + __field(u32, cmdbufs) + __field(u32, relocs) + __field(u32, waitchks) + __field(u32, syncpt_id) + __field(u32, syncpt_incrs) + ), + + TP_fast_assign( + __entry->name = name; + __entry->cmdbufs = cmdbufs; + __entry->relocs = relocs; + __entry->waitchks = waitchks; + __entry->syncpt_id = syncpt_id; + __entry->syncpt_incrs = syncpt_incrs; + ), + + TP_printk("name=%s, cmdbufs=%u, relocs=%u, waitchks=%d," + "syncpt_id=%u, syncpt_incrs=%u", + __entry->name, __entry->cmdbufs, __entry->relocs, __entry->waitchks, + __entry->syncpt_id, __entry->syncpt_incrs) +); + +TRACE_EVENT(host1x_channel_submitted, + TP_PROTO(const char *name, u32 syncpt_base, u32 syncpt_max), + + TP_ARGS(name, syncpt_base, syncpt_max), + + TP_STRUCT__entry( + __field(const char *, name) + __field(u32, syncpt_base) + __field(u32, syncpt_max) + ), + + TP_fast_assign( + __entry->name = name; + __entry->syncpt_base = syncpt_base; + __entry->syncpt_max = syncpt_max; + ), + + TP_printk("name=%s, syncpt_base=%d, syncpt_max=%d", + __entry->name, __entry->syncpt_base, __entry->syncpt_max) +); + +TRACE_EVENT(host1x_channel_submit_complete, + TP_PROTO(const char *name, int count, u32 thresh), + + TP_ARGS(name, count, thresh), + + TP_STRUCT__entry( + __field(const char *, name) + __field(int, count) + __field(u32, thresh) + ), + + TP_fast_assign( + __entry->name = name; + __entry->count = count; + __entry->thresh = thresh; + ), + + TP_printk("name=%s, count=%d, thresh=%d", + __entry->name, __entry->count, __entry->thresh) +); + +TRACE_EVENT(host1x_wait_cdma, + TP_PROTO(const char *name, u32 eventid), + + TP_ARGS(name, eventid), + + TP_STRUCT__entry( + __field(const char *, name) + __field(u32, eventid) + ), + + TP_fast_assign( + __entry->name = name; + __entry->eventid = eventid; + ), + + TP_printk("name=%s, event=%d", __entry->name, __entry->eventid) +); + TRACE_EVENT(host1x_syncpt_load_min, TP_PROTO(u32 id, u32 val), @@ -55,6 +220,33 @@ TRACE_EVENT(host1x_syncpt_load_min, TP_printk("id=%d, val=%d", __entry->id, __entry->val) ); +TRACE_EVENT(host1x_syncpt_wait_check, + TP_PROTO(void *mem_id, u32 offset, u32 syncpt_id, u32 thresh, u32 min), + + TP_ARGS(mem_id, offset, syncpt_id, thresh, min), + + TP_STRUCT__entry( + __field(void *, mem_id) + __field(u32, offset) + __field(u32, syncpt_id) + __field(u32, thresh) + __field(u32, min) + ), + + TP_fast_assign( + __entry->mem_id = mem_id; + __entry->offset = offset; + __entry->syncpt_id = syncpt_id; + __entry->thresh = thresh; + __entry->min = min; + ), + + TP_printk("mem_id=%p, offset=%05x, id=%d, thresh=%d, current=%d", + __entry->mem_id, __entry->offset, + __entry->syncpt_id, __entry->thresh, + __entry->min) +); + #endif /* _TRACE_HOST1X_H */ /* This part must be outside protection */ -- cgit From 6236451d83a720072053855fa63d51934024a707 Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Fri, 22 Mar 2013 16:34:04 +0200 Subject: gpu: host1x: Add debug support Add support for host1x debugging. Adds debugfs entries, and dumps channel state to UART in case of stuck job. Signed-off-by: Arto Merilainen Signed-off-by: Terje Bergstrom Reviewed-by: Thierry Reding Tested-by: Thierry Reding Tested-by: Erik Faye-Lund Signed-off-by: Thierry Reding --- drivers/gpu/host1x/Makefile | 1 + drivers/gpu/host1x/cdma.c | 4 + drivers/gpu/host1x/debug.c | 210 ++++++++++++++++++ drivers/gpu/host1x/debug.h | 51 +++++ drivers/gpu/host1x/dev.c | 3 + drivers/gpu/host1x/dev.h | 42 ++++ drivers/gpu/host1x/hw/cdma_hw.c | 2 + drivers/gpu/host1x/hw/channel_hw.c | 25 +++ drivers/gpu/host1x/hw/debug_hw.c | 322 ++++++++++++++++++++++++++++ drivers/gpu/host1x/hw/host1x01.c | 2 + drivers/gpu/host1x/hw/hw_host1x01_channel.h | 18 ++ drivers/gpu/host1x/hw/hw_host1x01_sync.h | 115 ++++++++++ drivers/gpu/host1x/hw/hw_host1x01_uclass.h | 6 + drivers/gpu/host1x/hw/syncpt_hw.c | 1 + drivers/gpu/host1x/syncpt.c | 5 + 15 files changed, 807 insertions(+) create mode 100644 drivers/gpu/host1x/debug.c create mode 100644 drivers/gpu/host1x/debug.h create mode 100644 drivers/gpu/host1x/hw/debug_hw.c (limited to 'drivers/gpu/host1x/hw/host1x01.c') diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile index 06a995b60902..49fd5807b0e7 100644 --- a/drivers/gpu/host1x/Makefile +++ b/drivers/gpu/host1x/Makefile @@ -7,6 +7,7 @@ host1x-y = \ cdma.o \ channel.o \ job.o \ + debug.o \ hw/host1x01.o obj-$(CONFIG_TEGRA_HOST1X) += host1x.o diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c index 33935de91bb1..de72172d3b5f 100644 --- a/drivers/gpu/host1x/cdma.c +++ b/drivers/gpu/host1x/cdma.c @@ -439,6 +439,10 @@ void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2) struct push_buffer *pb = &cdma->push_buffer; u32 slots_free = cdma->slots_free; + if (host1x_debug_trace_cmdbuf) + trace_host1x_cdma_push(dev_name(cdma_to_channel(cdma)->dev), + op1, op2); + if (slots_free == 0) { host1x_hw_cdma_flush(host1x, cdma); slots_free = host1x_cdma_wait_locked(cdma, diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c new file mode 100644 index 000000000000..3ec7d77de24d --- /dev/null +++ b/drivers/gpu/host1x/debug.c @@ -0,0 +1,210 @@ +/* + * Copyright (C) 2010 Google, Inc. + * Author: Erik Gilling + * + * Copyright (C) 2011-2013 NVIDIA Corporation + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include + +#include + +#include "dev.h" +#include "debug.h" +#include "channel.h" + +unsigned int host1x_debug_trace_cmdbuf; + +static pid_t host1x_debug_force_timeout_pid; +static u32 host1x_debug_force_timeout_val; +static u32 host1x_debug_force_timeout_channel; + +void host1x_debug_output(struct output *o, const char *fmt, ...) +{ + va_list args; + int len; + + va_start(args, fmt); + len = vsnprintf(o->buf, sizeof(o->buf), fmt, args); + va_end(args); + o->fn(o->ctx, o->buf, len); +} + +static int show_channels(struct host1x_channel *ch, void *data, bool show_fifo) +{ + struct host1x *m = dev_get_drvdata(ch->dev->parent); + struct output *o = data; + + mutex_lock(&ch->reflock); + if (ch->refcount) { + mutex_lock(&ch->cdma.lock); + if (show_fifo) + host1x_hw_show_channel_fifo(m, ch, o); + host1x_hw_show_channel_cdma(m, ch, o); + mutex_unlock(&ch->cdma.lock); + } + mutex_unlock(&ch->reflock); + + return 0; +} + +static void show_syncpts(struct host1x *m, struct output *o) +{ + int i; + host1x_debug_output(o, "---- syncpts ----\n"); + for (i = 0; i < host1x_syncpt_nb_pts(m); i++) { + u32 max = host1x_syncpt_read_max(m->syncpt + i); + u32 min = host1x_syncpt_load(m->syncpt + i); + if (!min && !max) + continue; + host1x_debug_output(o, "id %d (%s) min %d max %d\n", + i, m->syncpt[i].name, min, max); + } + + for (i = 0; i < host1x_syncpt_nb_bases(m); i++) { + u32 base_val; + base_val = host1x_syncpt_load_wait_base(m->syncpt + i); + if (base_val) + host1x_debug_output(o, "waitbase id %d val %d\n", i, + base_val); + } + + host1x_debug_output(o, "\n"); +} + +static void show_all(struct host1x *m, struct output *o) +{ + struct host1x_channel *ch; + + host1x_hw_show_mlocks(m, o); + show_syncpts(m, o); + host1x_debug_output(o, "---- channels ----\n"); + + host1x_for_each_channel(m, ch) + show_channels(ch, o, true); +} + +#ifdef CONFIG_DEBUG_FS +static void show_all_no_fifo(struct host1x *host1x, struct output *o) +{ + struct host1x_channel *ch; + + host1x_hw_show_mlocks(host1x, o); + show_syncpts(host1x, o); + host1x_debug_output(o, "---- channels ----\n"); + + host1x_for_each_channel(host1x, ch) + show_channels(ch, o, false); +} + +static int host1x_debug_show_all(struct seq_file *s, void *unused) +{ + struct output o = { + .fn = write_to_seqfile, + .ctx = s + }; + show_all(s->private, &o); + return 0; +} + +static int host1x_debug_show(struct seq_file *s, void *unused) +{ + struct output o = { + .fn = write_to_seqfile, + .ctx = s + }; + show_all_no_fifo(s->private, &o); + return 0; +} + +static int host1x_debug_open_all(struct inode *inode, struct file *file) +{ + return single_open(file, host1x_debug_show_all, inode->i_private); +} + +static const struct file_operations host1x_debug_all_fops = { + .open = host1x_debug_open_all, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int host1x_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, host1x_debug_show, inode->i_private); +} + +static const struct file_operations host1x_debug_fops = { + .open = host1x_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void host1x_debug_init(struct host1x *host1x) +{ + struct dentry *de = debugfs_create_dir("tegra-host1x", NULL); + + if (!de) + return; + + /* Store the created entry */ + host1x->debugfs = de; + + debugfs_create_file("status", S_IRUGO, de, host1x, &host1x_debug_fops); + debugfs_create_file("status_all", S_IRUGO, de, host1x, + &host1x_debug_all_fops); + + debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR, de, + &host1x_debug_trace_cmdbuf); + + host1x_hw_debug_init(host1x, de); + + debugfs_create_u32("force_timeout_pid", S_IRUGO|S_IWUSR, de, + &host1x_debug_force_timeout_pid); + debugfs_create_u32("force_timeout_val", S_IRUGO|S_IWUSR, de, + &host1x_debug_force_timeout_val); + debugfs_create_u32("force_timeout_channel", S_IRUGO|S_IWUSR, de, + &host1x_debug_force_timeout_channel); +} + +void host1x_debug_deinit(struct host1x *host1x) +{ + debugfs_remove_recursive(host1x->debugfs); +} +#else +void host1x_debug_init(struct host1x *host1x) +{ +} +void host1x_debug_deinit(struct host1x *host1x) +{ +} +#endif + +void host1x_debug_dump(struct host1x *host1x) +{ + struct output o = { + .fn = write_to_printk + }; + show_all(host1x, &o); +} + +void host1x_debug_dump_syncpts(struct host1x *host1x) +{ + struct output o = { + .fn = write_to_printk + }; + show_syncpts(host1x, &o); +} diff --git a/drivers/gpu/host1x/debug.h b/drivers/gpu/host1x/debug.h new file mode 100644 index 000000000000..4595b2e0799f --- /dev/null +++ b/drivers/gpu/host1x/debug.h @@ -0,0 +1,51 @@ +/* + * Tegra host1x Debug + * + * Copyright (c) 2011-2013 NVIDIA Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __HOST1X_DEBUG_H +#define __HOST1X_DEBUG_H + +#include +#include + +struct host1x; + +struct output { + void (*fn)(void *ctx, const char *str, size_t len); + void *ctx; + char buf[256]; +}; + +static inline void write_to_seqfile(void *ctx, const char *str, size_t len) +{ + seq_write((struct seq_file *)ctx, str, len); +} + +static inline void write_to_printk(void *ctx, const char *str, size_t len) +{ + pr_info("%s", str); +} + +void __printf(2, 3) host1x_debug_output(struct output *o, const char *fmt, ...); + +extern unsigned int host1x_debug_trace_cmdbuf; + +void host1x_debug_init(struct host1x *host1x); +void host1x_debug_deinit(struct host1x *host1x); +void host1x_debug_dump(struct host1x *host1x); +void host1x_debug_dump_syncpts(struct host1x *host1x); + +#endif diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 4e522c532bc8..96897242fcc2 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -30,6 +30,7 @@ #include "dev.h" #include "intr.h" #include "channel.h" +#include "debug.h" #include "hw/host1x01.h" void host1x_sync_writel(struct host1x *host1x, u32 v, u32 r) @@ -147,6 +148,8 @@ static int host1x_probe(struct platform_device *pdev) goto fail_deinit_syncpt; } + host1x_debug_init(host); + return 0; fail_deinit_syncpt: diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index 1a9b4383dc3b..4d16fe92400a 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -31,6 +31,8 @@ struct host1x_channel; struct host1x_cdma; struct host1x_job; struct push_buffer; +struct output; +struct dentry; struct host1x_channel_ops { int (*init)(struct host1x_channel *channel, struct host1x *host, @@ -54,6 +56,18 @@ struct host1x_pushbuffer_ops { void (*init)(struct push_buffer *pb); }; +struct host1x_debug_ops { + void (*debug_init)(struct dentry *de); + void (*show_channel_cdma)(struct host1x *host, + struct host1x_channel *ch, + struct output *o); + void (*show_channel_fifo)(struct host1x *host, + struct host1x_channel *ch, + struct output *o); + void (*show_mlocks)(struct host1x *host, struct output *output); + +}; + struct host1x_syncpt_ops { void (*restore)(struct host1x_syncpt *syncpt); void (*restore_wait_base)(struct host1x_syncpt *syncpt); @@ -100,6 +114,7 @@ struct host1x { const struct host1x_channel_ops *channel_op; const struct host1x_cdma_ops *cdma_op; const struct host1x_pushbuffer_ops *cdma_pb_op; + const struct host1x_debug_ops *debug_op; struct host1x_syncpt *nop_sp; @@ -107,6 +122,8 @@ struct host1x { struct host1x_channel chlist; unsigned long allocated_channels; unsigned int num_allocated_channels; + + struct dentry *debugfs; }; void host1x_sync_writel(struct host1x *host1x, u32 r, u32 v); @@ -257,4 +274,29 @@ static inline void host1x_hw_pushbuffer_init(struct host1x *host, host->cdma_pb_op->init(pb); } +static inline void host1x_hw_debug_init(struct host1x *host, struct dentry *de) +{ + if (host->debug_op && host->debug_op->debug_init) + host->debug_op->debug_init(de); +} + +static inline void host1x_hw_show_channel_cdma(struct host1x *host, + struct host1x_channel *channel, + struct output *o) +{ + host->debug_op->show_channel_cdma(host, channel, o); +} + +static inline void host1x_hw_show_channel_fifo(struct host1x *host, + struct host1x_channel *channel, + struct output *o) +{ + host->debug_op->show_channel_fifo(host, channel, o); +} + +static inline void host1x_hw_show_mlocks(struct host1x *host, struct output *o) +{ + host->debug_op->show_mlocks(host, o); +} + #endif diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c index 4eb22ef29776..590b69d91dab 100644 --- a/drivers/gpu/host1x/hw/cdma_hw.c +++ b/drivers/gpu/host1x/hw/cdma_hw.c @@ -244,6 +244,8 @@ static void cdma_timeout_handler(struct work_struct *work) host1x = cdma_to_host1x(cdma); ch = cdma_to_channel(cdma); + host1x_debug_dump(cdma_to_host1x(cdma)); + mutex_lock(&cdma->lock); if (!cdma->timeout.client) { diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c index 5137a5604215..ee199623e365 100644 --- a/drivers/gpu/host1x/hw/channel_hw.c +++ b/drivers/gpu/host1x/hw/channel_hw.c @@ -29,6 +29,30 @@ #define HOST1X_CHANNEL_SIZE 16384 #define TRACE_MAX_LENGTH 128U +static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo, + u32 offset, u32 words) +{ + void *mem = NULL; + + if (host1x_debug_trace_cmdbuf) + mem = host1x_bo_mmap(bo); + + if (mem) { + u32 i; + /* + * Write in batches of 128 as there seems to be a limit + * of how much you can output to ftrace at once. + */ + for (i = 0; i < words; i += TRACE_MAX_LENGTH) { + trace_host1x_cdma_push_gather( + dev_name(cdma_to_channel(cdma)->dev), + (u32)bo, min(words - i, TRACE_MAX_LENGTH), + offset + i * sizeof(u32), mem); + } + host1x_bo_munmap(bo, mem); + } +} + static void submit_gathers(struct host1x_job *job) { struct host1x_cdma *cdma = &job->channel->cdma; @@ -38,6 +62,7 @@ static void submit_gathers(struct host1x_job *job) struct host1x_job_gather *g = &job->gathers[i]; u32 op1 = host1x_opcode_gather(g->words); u32 op2 = g->base + g->offset; + trace_write_gather(cdma, g->bo, g->offset, op1 & 0xffff); host1x_cdma_push(cdma, op1, op2); } } diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c new file mode 100644 index 000000000000..334c038052f5 --- /dev/null +++ b/drivers/gpu/host1x/hw/debug_hw.c @@ -0,0 +1,322 @@ +/* + * Copyright (C) 2010 Google, Inc. + * Author: Erik Gilling + * + * Copyright (C) 2011-2013 NVIDIA Corporation + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include + +#include + +#include "dev.h" +#include "debug.h" +#include "cdma.h" +#include "channel.h" +#include "host1x_bo.h" + +#define HOST1X_DEBUG_MAX_PAGE_OFFSET 102400 + +enum { + HOST1X_OPCODE_SETCLASS = 0x00, + HOST1X_OPCODE_INCR = 0x01, + HOST1X_OPCODE_NONINCR = 0x02, + HOST1X_OPCODE_MASK = 0x03, + HOST1X_OPCODE_IMM = 0x04, + HOST1X_OPCODE_RESTART = 0x05, + HOST1X_OPCODE_GATHER = 0x06, + HOST1X_OPCODE_EXTEND = 0x0e, +}; + +enum { + HOST1X_OPCODE_EXTEND_ACQUIRE_MLOCK = 0x00, + HOST1X_OPCODE_EXTEND_RELEASE_MLOCK = 0x01, +}; + +static unsigned int show_channel_command(struct output *o, u32 val) +{ + unsigned mask; + unsigned subop; + + switch (val >> 28) { + case HOST1X_OPCODE_SETCLASS: + mask = val & 0x3f; + if (mask) { + host1x_debug_output(o, "SETCL(class=%03x, offset=%03x, mask=%02x, [", + val >> 6 & 0x3ff, + val >> 16 & 0xfff, mask); + return hweight8(mask); + } else { + host1x_debug_output(o, "SETCL(class=%03x)\n", + val >> 6 & 0x3ff); + return 0; + } + + case HOST1X_OPCODE_INCR: + host1x_debug_output(o, "INCR(offset=%03x, [", + val >> 16 & 0xfff); + return val & 0xffff; + + case HOST1X_OPCODE_NONINCR: + host1x_debug_output(o, "NONINCR(offset=%03x, [", + val >> 16 & 0xfff); + return val & 0xffff; + + case HOST1X_OPCODE_MASK: + mask = val & 0xffff; + host1x_debug_output(o, "MASK(offset=%03x, mask=%03x, [", + val >> 16 & 0xfff, mask); + return hweight16(mask); + + case HOST1X_OPCODE_IMM: + host1x_debug_output(o, "IMM(offset=%03x, data=%03x)\n", + val >> 16 & 0xfff, val & 0xffff); + return 0; + + case HOST1X_OPCODE_RESTART: + host1x_debug_output(o, "RESTART(offset=%08x)\n", val << 4); + return 0; + + case HOST1X_OPCODE_GATHER: + host1x_debug_output(o, "GATHER(offset=%03x, insert=%d, type=%d, count=%04x, addr=[", + val >> 16 & 0xfff, val >> 15 & 0x1, + val >> 14 & 0x1, val & 0x3fff); + return 1; + + case HOST1X_OPCODE_EXTEND: + subop = val >> 24 & 0xf; + if (subop == HOST1X_OPCODE_EXTEND_ACQUIRE_MLOCK) + host1x_debug_output(o, "ACQUIRE_MLOCK(index=%d)\n", + val & 0xff); + else if (subop == HOST1X_OPCODE_EXTEND_RELEASE_MLOCK) + host1x_debug_output(o, "RELEASE_MLOCK(index=%d)\n", + val & 0xff); + else + host1x_debug_output(o, "EXTEND_UNKNOWN(%08x)\n", val); + return 0; + + default: + return 0; + } +} + +static void show_gather(struct output *o, phys_addr_t phys_addr, + unsigned int words, struct host1x_cdma *cdma, + phys_addr_t pin_addr, u32 *map_addr) +{ + /* Map dmaget cursor to corresponding mem handle */ + u32 offset = phys_addr - pin_addr; + unsigned int data_count = 0, i; + + /* + * Sometimes we're given different hardware address to the same + * page - in these cases the offset will get an invalid number and + * we just have to bail out. + */ + if (offset > HOST1X_DEBUG_MAX_PAGE_OFFSET) { + host1x_debug_output(o, "[address mismatch]\n"); + return; + } + + for (i = 0; i < words; i++) { + u32 addr = phys_addr + i * 4; + u32 val = *(map_addr + offset / 4 + i); + + if (!data_count) { + host1x_debug_output(o, "%08x: %08x:", addr, val); + data_count = show_channel_command(o, val); + } else { + host1x_debug_output(o, "%08x%s", val, + data_count > 0 ? ", " : "])\n"); + data_count--; + } + } +} + +static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma) +{ + struct host1x_job *job; + + list_for_each_entry(job, &cdma->sync_queue, list) { + int i; + host1x_debug_output(o, "\n%p: JOB, syncpt_id=%d, syncpt_val=%d, first_get=%08x, timeout=%d num_slots=%d, num_handles=%d\n", + job, job->syncpt_id, job->syncpt_end, + job->first_get, job->timeout, + job->num_slots, job->num_unpins); + + for (i = 0; i < job->num_gathers; i++) { + struct host1x_job_gather *g = &job->gathers[i]; + u32 *mapped; + + if (job->gather_copy_mapped) + mapped = (u32 *)job->gather_copy_mapped; + else + mapped = host1x_bo_mmap(g->bo); + + if (!mapped) { + host1x_debug_output(o, "[could not mmap]\n"); + continue; + } + + host1x_debug_output(o, " GATHER at %08x+%04x, %d words\n", + g->base, g->offset, g->words); + + show_gather(o, g->base + g->offset, g->words, cdma, + g->base, mapped); + + if (!job->gather_copy_mapped) + host1x_bo_munmap(g->bo, mapped); + } + } +} + +static void host1x_debug_show_channel_cdma(struct host1x *host, + struct host1x_channel *ch, + struct output *o) +{ + struct host1x_cdma *cdma = &ch->cdma; + u32 dmaput, dmaget, dmactrl; + u32 cbstat, cbread; + u32 val, base, baseval; + + dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT); + dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET); + dmactrl = host1x_ch_readl(ch, HOST1X_CHANNEL_DMACTRL); + cbread = host1x_sync_readl(host, HOST1X_SYNC_CBREAD(ch->id)); + cbstat = host1x_sync_readl(host, HOST1X_SYNC_CBSTAT(ch->id)); + + host1x_debug_output(o, "%d-%s: ", ch->id, dev_name(ch->dev)); + + if (HOST1X_CHANNEL_DMACTRL_DMASTOP_V(dmactrl) || + !ch->cdma.push_buffer.mapped) { + host1x_debug_output(o, "inactive\n\n"); + return; + } + + if (HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat) == HOST1X_CLASS_HOST1X && + HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat) == + HOST1X_UCLASS_WAIT_SYNCPT) + host1x_debug_output(o, "waiting on syncpt %d val %d\n", + cbread >> 24, cbread & 0xffffff); + else if (HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat) == + HOST1X_CLASS_HOST1X && + HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat) == + HOST1X_UCLASS_WAIT_SYNCPT_BASE) { + + base = (cbread >> 16) & 0xff; + baseval = + host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_BASE(base)); + val = cbread & 0xffff; + host1x_debug_output(o, "waiting on syncpt %d val %d (base %d = %d; offset = %d)\n", + cbread >> 24, baseval + val, base, + baseval, val); + } else + host1x_debug_output(o, "active class %02x, offset %04x, val %08x\n", + HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat), + HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat), + cbread); + + host1x_debug_output(o, "DMAPUT %08x, DMAGET %08x, DMACTL %08x\n", + dmaput, dmaget, dmactrl); + host1x_debug_output(o, "CBREAD %08x, CBSTAT %08x\n", cbread, cbstat); + + show_channel_gathers(o, cdma); + host1x_debug_output(o, "\n"); +} + +static void host1x_debug_show_channel_fifo(struct host1x *host, + struct host1x_channel *ch, + struct output *o) +{ + u32 val, rd_ptr, wr_ptr, start, end; + unsigned int data_count = 0; + + host1x_debug_output(o, "%d: fifo:\n", ch->id); + + val = host1x_ch_readl(ch, HOST1X_CHANNEL_FIFOSTAT); + host1x_debug_output(o, "FIFOSTAT %08x\n", val); + if (HOST1X_CHANNEL_FIFOSTAT_CFEMPTY_V(val)) { + host1x_debug_output(o, "[empty]\n"); + return; + } + + host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL); + host1x_sync_writel(host, HOST1X_SYNC_CFPEEK_CTRL_ENA_F(1) | + HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(ch->id), + HOST1X_SYNC_CFPEEK_CTRL); + + val = host1x_sync_readl(host, HOST1X_SYNC_CFPEEK_PTRS); + rd_ptr = HOST1X_SYNC_CFPEEK_PTRS_CF_RD_PTR_V(val); + wr_ptr = HOST1X_SYNC_CFPEEK_PTRS_CF_WR_PTR_V(val); + + val = host1x_sync_readl(host, HOST1X_SYNC_CF_SETUP(ch->id)); + start = HOST1X_SYNC_CF_SETUP_BASE_V(val); + end = HOST1X_SYNC_CF_SETUP_LIMIT_V(val); + + do { + host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL); + host1x_sync_writel(host, HOST1X_SYNC_CFPEEK_CTRL_ENA_F(1) | + HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(ch->id) | + HOST1X_SYNC_CFPEEK_CTRL_ADDR_F(rd_ptr), + HOST1X_SYNC_CFPEEK_CTRL); + val = host1x_sync_readl(host, HOST1X_SYNC_CFPEEK_READ); + + if (!data_count) { + host1x_debug_output(o, "%08x:", val); + data_count = show_channel_command(o, val); + } else { + host1x_debug_output(o, "%08x%s", val, + data_count > 0 ? ", " : "])\n"); + data_count--; + } + + if (rd_ptr == end) + rd_ptr = start; + else + rd_ptr++; + } while (rd_ptr != wr_ptr); + + if (data_count) + host1x_debug_output(o, ", ...])\n"); + host1x_debug_output(o, "\n"); + + host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL); +} + +static void host1x_debug_show_mlocks(struct host1x *host, struct output *o) +{ + int i; + + host1x_debug_output(o, "---- mlocks ----\n"); + for (i = 0; i < host1x_syncpt_nb_mlocks(host); i++) { + u32 owner = + host1x_sync_readl(host, HOST1X_SYNC_MLOCK_OWNER(i)); + if (HOST1X_SYNC_MLOCK_OWNER_CH_OWNS_V(owner)) + host1x_debug_output(o, "%d: locked by channel %d\n", + i, HOST1X_SYNC_MLOCK_OWNER_CHID_F(owner)); + else if (HOST1X_SYNC_MLOCK_OWNER_CPU_OWNS_V(owner)) + host1x_debug_output(o, "%d: locked by cpu\n", i); + else + host1x_debug_output(o, "%d: unlocked\n", i); + } + host1x_debug_output(o, "\n"); +} + +static const struct host1x_debug_ops host1x_debug_ops = { + .show_channel_cdma = host1x_debug_show_channel_cdma, + .show_channel_fifo = host1x_debug_show_channel_fifo, + .show_mlocks = host1x_debug_show_mlocks, +}; diff --git a/drivers/gpu/host1x/hw/host1x01.c b/drivers/gpu/host1x/hw/host1x01.c index 013ff381d825..a14e91cd1e58 100644 --- a/drivers/gpu/host1x/hw/host1x01.c +++ b/drivers/gpu/host1x/hw/host1x01.c @@ -23,6 +23,7 @@ /* include code */ #include "hw/cdma_hw.c" #include "hw/channel_hw.c" +#include "hw/debug_hw.c" #include "hw/intr_hw.c" #include "hw/syncpt_hw.c" @@ -35,6 +36,7 @@ int host1x01_init(struct host1x *host) host->cdma_pb_op = &host1x_pushbuffer_ops; host->syncpt_op = &host1x_syncpt_ops; host->intr_op = &host1x_intr_ops; + host->debug_op = &host1x_debug_ops; return 0; } diff --git a/drivers/gpu/host1x/hw/hw_host1x01_channel.h b/drivers/gpu/host1x/hw/hw_host1x01_channel.h index 9ba133205668..b4bc7ca4e051 100644 --- a/drivers/gpu/host1x/hw/hw_host1x01_channel.h +++ b/drivers/gpu/host1x/hw/hw_host1x01_channel.h @@ -51,6 +51,18 @@ #ifndef __hw_host1x_channel_host1x_h__ #define __hw_host1x_channel_host1x_h__ +static inline u32 host1x_channel_fifostat_r(void) +{ + return 0x0; +} +#define HOST1X_CHANNEL_FIFOSTAT \ + host1x_channel_fifostat_r() +static inline u32 host1x_channel_fifostat_cfempty_v(u32 r) +{ + return (r >> 10) & 0x1; +} +#define HOST1X_CHANNEL_FIFOSTAT_CFEMPTY_V(r) \ + host1x_channel_fifostat_cfempty_v(r) static inline u32 host1x_channel_dmastart_r(void) { return 0x14; @@ -87,6 +99,12 @@ static inline u32 host1x_channel_dmactrl_dmastop(void) } #define HOST1X_CHANNEL_DMACTRL_DMASTOP \ host1x_channel_dmactrl_dmastop() +static inline u32 host1x_channel_dmactrl_dmastop_v(u32 r) +{ + return (r >> 0) & 0x1; +} +#define HOST1X_CHANNEL_DMACTRL_DMASTOP_V(r) \ + host1x_channel_dmactrl_dmastop_v(r) static inline u32 host1x_channel_dmactrl_dmagetrst(void) { return 1 << 1; diff --git a/drivers/gpu/host1x/hw/hw_host1x01_sync.h b/drivers/gpu/host1x/hw/hw_host1x01_sync.h index 8f2a246c5426..ac704e579977 100644 --- a/drivers/gpu/host1x/hw/hw_host1x01_sync.h +++ b/drivers/gpu/host1x/hw/hw_host1x01_sync.h @@ -77,6 +77,24 @@ static inline u32 host1x_sync_syncpt_thresh_int_enable_cpu0_r(unsigned int id) } #define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(id) \ host1x_sync_syncpt_thresh_int_enable_cpu0_r(id) +static inline u32 host1x_sync_cf_setup_r(unsigned int channel) +{ + return 0x80 + channel * REGISTER_STRIDE; +} +#define HOST1X_SYNC_CF_SETUP(channel) \ + host1x_sync_cf_setup_r(channel) +static inline u32 host1x_sync_cf_setup_base_v(u32 r) +{ + return (r >> 0) & 0x1ff; +} +#define HOST1X_SYNC_CF_SETUP_BASE_V(r) \ + host1x_sync_cf_setup_base_v(r) +static inline u32 host1x_sync_cf_setup_limit_v(u32 r) +{ + return (r >> 16) & 0x1ff; +} +#define HOST1X_SYNC_CF_SETUP_LIMIT_V(r) \ + host1x_sync_cf_setup_limit_v(r) static inline u32 host1x_sync_cmdproc_stop_r(void) { return 0xac; @@ -107,6 +125,30 @@ static inline u32 host1x_sync_ip_busy_timeout_r(void) } #define HOST1X_SYNC_IP_BUSY_TIMEOUT \ host1x_sync_ip_busy_timeout_r() +static inline u32 host1x_sync_mlock_owner_r(unsigned int id) +{ + return 0x340 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_MLOCK_OWNER(id) \ + host1x_sync_mlock_owner_r(id) +static inline u32 host1x_sync_mlock_owner_chid_f(u32 v) +{ + return (v & 0xf) << 8; +} +#define HOST1X_SYNC_MLOCK_OWNER_CHID_F(v) \ + host1x_sync_mlock_owner_chid_f(v) +static inline u32 host1x_sync_mlock_owner_cpu_owns_v(u32 r) +{ + return (r >> 1) & 0x1; +} +#define HOST1X_SYNC_MLOCK_OWNER_CPU_OWNS_V(r) \ + host1x_sync_mlock_owner_cpu_owns_v(r) +static inline u32 host1x_sync_mlock_owner_ch_owns_v(u32 r) +{ + return (r >> 0) & 0x1; +} +#define HOST1X_SYNC_MLOCK_OWNER_CH_OWNS_V(r) \ + host1x_sync_mlock_owner_ch_owns_v(r) static inline u32 host1x_sync_syncpt_int_thresh_r(unsigned int id) { return 0x500 + id * REGISTER_STRIDE; @@ -125,4 +167,77 @@ static inline u32 host1x_sync_syncpt_cpu_incr_r(unsigned int id) } #define HOST1X_SYNC_SYNCPT_CPU_INCR(id) \ host1x_sync_syncpt_cpu_incr_r(id) +static inline u32 host1x_sync_cbread_r(unsigned int channel) +{ + return 0x720 + channel * REGISTER_STRIDE; +} +#define HOST1X_SYNC_CBREAD(channel) \ + host1x_sync_cbread_r(channel) +static inline u32 host1x_sync_cfpeek_ctrl_r(void) +{ + return 0x74c; +} +#define HOST1X_SYNC_CFPEEK_CTRL \ + host1x_sync_cfpeek_ctrl_r() +static inline u32 host1x_sync_cfpeek_ctrl_addr_f(u32 v) +{ + return (v & 0x1ff) << 0; +} +#define HOST1X_SYNC_CFPEEK_CTRL_ADDR_F(v) \ + host1x_sync_cfpeek_ctrl_addr_f(v) +static inline u32 host1x_sync_cfpeek_ctrl_channr_f(u32 v) +{ + return (v & 0x7) << 16; +} +#define HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(v) \ + host1x_sync_cfpeek_ctrl_channr_f(v) +static inline u32 host1x_sync_cfpeek_ctrl_ena_f(u32 v) +{ + return (v & 0x1) << 31; +} +#define HOST1X_SYNC_CFPEEK_CTRL_ENA_F(v) \ + host1x_sync_cfpeek_ctrl_ena_f(v) +static inline u32 host1x_sync_cfpeek_read_r(void) +{ + return 0x750; +} +#define HOST1X_SYNC_CFPEEK_READ \ + host1x_sync_cfpeek_read_r() +static inline u32 host1x_sync_cfpeek_ptrs_r(void) +{ + return 0x754; +} +#define HOST1X_SYNC_CFPEEK_PTRS \ + host1x_sync_cfpeek_ptrs_r() +static inline u32 host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(u32 r) +{ + return (r >> 0) & 0x1ff; +} +#define HOST1X_SYNC_CFPEEK_PTRS_CF_RD_PTR_V(r) \ + host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(r) +static inline u32 host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(u32 r) +{ + return (r >> 16) & 0x1ff; +} +#define HOST1X_SYNC_CFPEEK_PTRS_CF_WR_PTR_V(r) \ + host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(r) +static inline u32 host1x_sync_cbstat_r(unsigned int channel) +{ + return 0x758 + channel * REGISTER_STRIDE; +} +#define HOST1X_SYNC_CBSTAT(channel) \ + host1x_sync_cbstat_r(channel) +static inline u32 host1x_sync_cbstat_cboffset_v(u32 r) +{ + return (r >> 0) & 0xffff; +} +#define HOST1X_SYNC_CBSTAT_CBOFFSET_V(r) \ + host1x_sync_cbstat_cboffset_v(r) +static inline u32 host1x_sync_cbstat_cbclass_v(u32 r) +{ + return (r >> 16) & 0x3ff; +} +#define HOST1X_SYNC_CBSTAT_CBCLASS_V(r) \ + host1x_sync_cbstat_cbclass_v(r) + #endif /* __hw_host1x01_sync_h__ */ diff --git a/drivers/gpu/host1x/hw/hw_host1x01_uclass.h b/drivers/gpu/host1x/hw/hw_host1x01_uclass.h index 7af660966ad6..42f3ce19ca32 100644 --- a/drivers/gpu/host1x/hw/hw_host1x01_uclass.h +++ b/drivers/gpu/host1x/hw/hw_host1x01_uclass.h @@ -87,6 +87,12 @@ static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v) } #define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \ host1x_uclass_wait_syncpt_thresh_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_r(void) +{ + return 0x9; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \ + host1x_uclass_wait_syncpt_base_r() static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v) { return (v & 0xff) << 24; diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c index 2c1f4af1094c..61174990102a 100644 --- a/drivers/gpu/host1x/hw/syncpt_hw.c +++ b/drivers/gpu/host1x/hw/syncpt_hw.c @@ -86,6 +86,7 @@ static void syncpt_cpu_incr(struct host1x_syncpt *sp) host1x_syncpt_idle(sp)) { dev_err(host->dev, "Trying to increment syncpoint id %d beyond max\n", sp->id); + host1x_debug_dump(sp->host); return; } host1x_sync_writel(host, BIT_MASK(sp->id), diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index 7e77e63da57b..4b493453e805 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -25,6 +25,7 @@ #include "syncpt.h" #include "dev.h" #include "intr.h" +#include "debug.h" #define SYNCPT_CHECK_PERIOD (2 * HZ) #define MAX_STUCK_CHECK_COUNT 15 @@ -231,6 +232,10 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout, "%s: syncpoint id %d (%s) stuck waiting %d, timeout=%ld\n", current->comm, sp->id, sp->name, thresh, timeout); + + host1x_debug_dump_syncpts(sp->host); + if (check_count == MAX_STUCK_CHECK_COUNT) + host1x_debug_dump(sp->host); check_count++; } } -- cgit