diff options
Diffstat (limited to 'drivers/gpu/drm/vc4')
| -rw-r--r-- | drivers/gpu/drm/vc4/Kconfig | 10 | ||||
| -rw-r--r-- | drivers/gpu/drm/vc4/Makefile | 4 | ||||
| -rw-r--r-- | drivers/gpu/drm/vc4/vc4_bo.c | 142 | ||||
| -rw-r--r-- | drivers/gpu/drm/vc4/vc4_crtc.c | 202 | ||||
| -rw-r--r-- | drivers/gpu/drm/vc4/vc4_debugfs.c | 7 | ||||
| -rw-r--r-- | drivers/gpu/drm/vc4/vc4_dpi.c | 201 | ||||
| -rw-r--r-- | drivers/gpu/drm/vc4/vc4_drv.c | 53 | ||||
| -rw-r--r-- | drivers/gpu/drm/vc4/vc4_drv.h | 91 | ||||
| -rw-r--r-- | drivers/gpu/drm/vc4/vc4_dsi.c | 1664 | ||||
| -rw-r--r-- | drivers/gpu/drm/vc4/vc4_fence.c | 56 | ||||
| -rw-r--r-- | drivers/gpu/drm/vc4/vc4_gem.c | 216 | ||||
| -rw-r--r-- | drivers/gpu/drm/vc4/vc4_hdmi.c | 725 | ||||
| -rw-r--r-- | drivers/gpu/drm/vc4/vc4_hvs.c | 31 | ||||
| -rw-r--r-- | drivers/gpu/drm/vc4/vc4_irq.c | 68 | ||||
| -rw-r--r-- | drivers/gpu/drm/vc4/vc4_kms.c | 102 | ||||
| -rw-r--r-- | drivers/gpu/drm/vc4/vc4_plane.c | 83 | ||||
| -rw-r--r-- | drivers/gpu/drm/vc4/vc4_regs.h | 131 | ||||
| -rw-r--r-- | drivers/gpu/drm/vc4/vc4_render_cl.c | 7 | ||||
| -rw-r--r-- | drivers/gpu/drm/vc4/vc4_v3d.c | 185 | ||||
| -rw-r--r-- | drivers/gpu/drm/vc4/vc4_validate.c | 91 | ||||
| -rw-r--r-- | drivers/gpu/drm/vc4/vc4_validate_shaders.c | 21 | ||||
| -rw-r--r-- | drivers/gpu/drm/vc4/vc4_vec.c | 6 |
22 files changed, 3484 insertions, 612 deletions
diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig index e53df59cb139..4361bdcfd28a 100644 --- a/drivers/gpu/drm/vc4/Kconfig +++ b/drivers/gpu/drm/vc4/Kconfig @@ -1,11 +1,17 @@ config DRM_VC4 tristate "Broadcom VC4 Graphics" - depends on ARCH_BCM2835 || COMPILE_TEST + depends on ARCH_BCM || ARCH_BCM2835 || COMPILE_TEST depends on DRM + depends on SND && SND_SOC + depends on COMMON_CLK select DRM_KMS_HELPER select DRM_KMS_CMA_HELPER select DRM_GEM_CMA_HELPER - select DRM_PANEL + select DRM_PANEL_BRIDGE + select SND_PCM + select SND_PCM_ELD + select SND_SOC_GENERIC_DMAENGINE_PCM + select DRM_MIPI_DSI help Choose this option if you have a system that has a Broadcom VC4 GPU, such as the Raspberry Pi or other BCM2708/BCM2835. diff --git a/drivers/gpu/drm/vc4/Makefile b/drivers/gpu/drm/vc4/Makefile index 7757f69a8a77..25bd5d30415d 100644 --- a/drivers/gpu/drm/vc4/Makefile +++ b/drivers/gpu/drm/vc4/Makefile @@ -1,5 +1,3 @@ -ccflags-y := -Iinclude/drm - # Please keep these build lists sorted! # core driver code @@ -8,6 +6,8 @@ vc4-y := \ vc4_crtc.o \ vc4_drv.o \ vc4_dpi.o \ + vc4_dsi.o \ + vc4_fence.o \ vc4_kms.o \ vc4_gem.o \ vc4_hdmi.o \ diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index 3f6704cf6608..487f96412d35 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -6,7 +6,8 @@ * published by the Free Software Foundation. */ -/* DOC: VC4 GEM BO management support. +/** + * DOC: VC4 GEM BO management support * * The VC4 GPU architecture (both scanout and rendering) has direct * access to system memory with no MMU in between. To support it, we @@ -18,6 +19,8 @@ * rendering can return quickly. */ +#include <linux/dma-buf.h> + #include "vc4_drv.h" #include "uapi/drm/vc4_drm.h" @@ -87,6 +90,9 @@ static void vc4_bo_destroy(struct vc4_bo *bo) vc4->bo_stats.num_allocated--; vc4->bo_stats.size_allocated -= obj->size; + + reservation_object_fini(&bo->_resv); + drm_gem_cma_free_object(obj); } @@ -186,6 +192,8 @@ out: /** * vc4_gem_create_object - Implementation of driver->gem_create_object. + * @dev: DRM device + * @size: Size in bytes of the memory the object will reference * * This lets the CMA helpers allocate object structs for us, and keep * our BO stats correct. @@ -203,26 +211,29 @@ struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size) vc4->bo_stats.num_allocated++; vc4->bo_stats.size_allocated += size; mutex_unlock(&vc4->bo_lock); + bo->resv = &bo->_resv; + reservation_object_init(bo->resv); return &bo->base.base; } struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size, - bool from_cache) + bool allow_unzeroed) { size_t size = roundup(unaligned_size, PAGE_SIZE); struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_gem_cma_object *cma_obj; + struct vc4_bo *bo; if (size == 0) return ERR_PTR(-EINVAL); /* First, try to get a vc4_bo from the kernel BO cache. */ - if (from_cache) { - struct vc4_bo *bo = vc4_bo_get_from_cache(dev, size); - - if (bo) - return bo; + bo = vc4_bo_get_from_cache(dev, size); + if (bo) { + if (!allow_unzeroed) + memset(bo->base.vaddr, 0, bo->base.base.size); + return bo; } cma_obj = drm_gem_cma_create(dev, size); @@ -240,7 +251,6 @@ struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size, return ERR_PTR(-ENOMEM); } } - return to_vc4_bo(&cma_obj->base); } @@ -313,6 +323,14 @@ void vc4_free_object(struct drm_gem_object *gem_bo) goto out; } + /* If this object was partially constructed but CMA allocation + * had failed, just free it. + */ + if (!bo->base.vaddr) { + vc4_bo_destroy(bo); + goto out; + } + cache_list = vc4_get_cache_list_for_size(dev, gem_bo->size); if (!cache_list) { vc4_bo_destroy(bo); @@ -325,6 +343,7 @@ void vc4_free_object(struct drm_gem_object *gem_bo) bo->validated_shader = NULL; } + bo->t_format = false; bo->free_time = jiffies; list_add(&bo->size_head, cache_list); list_add(&bo->unref_head, &vc4->bo_cache.time_list); @@ -357,6 +376,13 @@ static void vc4_bo_cache_time_timer(unsigned long data) schedule_work(&vc4->bo_cache.time_work); } +struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj) +{ + struct vc4_bo *bo = to_vc4_bo(obj); + + return bo->resv; +} + struct dma_buf * vc4_prime_export(struct drm_device *dev, struct drm_gem_object *obj, int flags) { @@ -428,6 +454,24 @@ void *vc4_prime_vmap(struct drm_gem_object *obj) return drm_gem_cma_prime_vmap(obj); } +struct drm_gem_object * +vc4_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sgt) +{ + struct drm_gem_object *obj; + struct vc4_bo *bo; + + obj = drm_gem_cma_prime_import_sg_table(dev, attach, sgt); + if (IS_ERR(obj)) + return obj; + + bo = to_vc4_bo(obj); + bo->resv = attach->dmabuf->resv; + + return obj; +} + int vc4_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { @@ -525,6 +569,88 @@ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data, return ret; } +/** + * vc4_set_tiling_ioctl() - Sets the tiling modifier for a BO. + * @dev: DRM device + * @data: ioctl argument + * @file_priv: DRM file for this fd + * + * The tiling state of the BO decides the default modifier of an fb if + * no specific modifier was set by userspace, and the return value of + * vc4_get_tiling_ioctl() (so that userspace can treat a BO it + * received from dmabuf as the same tiling format as the producer + * used). + */ +int vc4_set_tiling_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_vc4_set_tiling *args = data; + struct drm_gem_object *gem_obj; + struct vc4_bo *bo; + bool t_format; + + if (args->flags != 0) + return -EINVAL; + + switch (args->modifier) { + case DRM_FORMAT_MOD_NONE: + t_format = false; + break; + case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: + t_format = true; + break; + default: + return -EINVAL; + } + + gem_obj = drm_gem_object_lookup(file_priv, args->handle); + if (!gem_obj) { + DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); + return -ENOENT; + } + bo = to_vc4_bo(gem_obj); + bo->t_format = t_format; + + drm_gem_object_unreference_unlocked(gem_obj); + + return 0; +} + +/** + * vc4_get_tiling_ioctl() - Gets the tiling modifier for a BO. + * @dev: DRM device + * @data: ioctl argument + * @file_priv: DRM file for this fd + * + * Returns the tiling modifier for a BO as set by vc4_set_tiling_ioctl(). + */ +int vc4_get_tiling_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_vc4_get_tiling *args = data; + struct drm_gem_object *gem_obj; + struct vc4_bo *bo; + + if (args->flags != 0 || args->modifier != 0) + return -EINVAL; + + gem_obj = drm_gem_object_lookup(file_priv, args->handle); + if (!gem_obj) { + DRM_ERROR("Failed to look up GEM BO %d\n", args->handle); + return -ENOENT; + } + bo = to_vc4_bo(gem_obj); + + if (bo->t_format) + args->modifier = DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED; + else + args->modifier = DRM_FORMAT_MOD_NONE; + + drm_gem_object_unreference_unlocked(gem_obj); + + return 0; +} + void vc4_bo_cache_init(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 7aadce1f7e7a..a12cc7ea99b6 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -11,12 +11,13 @@ * * In VC4, the Pixel Valve is what most closely corresponds to the * DRM's concept of a CRTC. The PV generates video timings from the - * output's clock plus its configuration. It pulls scaled pixels from + * encoder's clock plus its configuration. It pulls scaled pixels from * the HVS at that timing, and feeds it to the encoder. * * However, the DRM CRTC also collects the configuration of all the - * DRM planes attached to it. As a result, this file also manages - * setup of the VC4 HVS's display elements on the CRTC. + * DRM planes attached to it. As a result, the CRTC is also + * responsible for writing the display list for the HVS channel that + * the CRTC will use. * * The 2835 has 3 different pixel valves. pv0 in the audio power * domain feeds DSI0 or DPI, while pv1 feeds DS1 or SMI. pv2 in the @@ -31,13 +32,13 @@ * ones that set the clock. */ -#include "drm_atomic.h" -#include "drm_atomic_helper.h" -#include "drm_crtc_helper.h" -#include "linux/clk.h" -#include "drm_fb_cma_helper.h" -#include "linux/component.h" -#include "linux/of_device.h" +#include <drm/drm_atomic.h> +#include <drm/drm_atomic_helper.h> +#include <drm/drm_crtc_helper.h> +#include <linux/clk.h> +#include <drm/drm_fb_cma_helper.h> +#include <linux/component.h> +#include <linux/of_device.h> #include "vc4_drv.h" #include "vc4_regs.h" @@ -150,17 +151,18 @@ int vc4_crtc_debugfs_regs(struct seq_file *m, void *unused) } #endif -int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id, - unsigned int flags, int *vpos, int *hpos, - ktime_t *stime, ktime_t *etime, - const struct drm_display_mode *mode) +bool vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id, + bool in_vblank_irq, int *vpos, int *hpos, + ktime_t *stime, ktime_t *etime, + const struct drm_display_mode *mode) { struct vc4_dev *vc4 = to_vc4_dev(dev); - struct vc4_crtc *vc4_crtc = vc4->crtc[crtc_id]; + struct drm_crtc *crtc = drm_crtc_from_index(dev, crtc_id); + struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); u32 val; int fifo_lines; int vblank_lines; - int ret = 0; + bool ret = false; /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ @@ -196,7 +198,7 @@ int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id, fifo_lines = vc4_crtc->cob_size / mode->crtc_hdisplay; if (fifo_lines > 0) - ret |= DRM_SCANOUTPOS_VALID; + ret = true; /* HVS more than fifo_lines into frame for compositing? */ if (*vpos > fifo_lines) { @@ -214,7 +216,6 @@ int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id, */ *vpos -= fifo_lines + 1; - ret |= DRM_SCANOUTPOS_ACCURATE; return ret; } @@ -227,10 +228,9 @@ int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id, * We can't get meaningful readings wrt. scanline position of the PV * and need to make things up in a approximative but consistent way. */ - ret |= DRM_SCANOUTPOS_IN_VBLANK; vblank_lines = mode->vtotal - mode->vdisplay; - if (flags & DRM_CALLED_FROM_VBLIRQ) { + if (in_vblank_irq) { /* * Assume the irq handler got called close to first * line of vblank, so PV has about a full vblank @@ -252,9 +252,10 @@ int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id, * we are at the very beginning of vblank, as the hvs just * started refilling, and the stime and etime timestamps * truly correspond to start of vblank. + * + * Unfortunately there's no way to report this to upper levels + * and make it more useful. */ - if ((val & SCALER_DISPSTATX_FULL) != SCALER_DISPSTATX_FULL) - ret |= DRM_SCANOUTPOS_ACCURATE; } else { /* * No clue where we are inside vblank. Return a vpos of zero, @@ -268,21 +269,6 @@ int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id, return ret; } -int vc4_crtc_get_vblank_timestamp(struct drm_device *dev, unsigned int crtc_id, - int *max_error, struct timeval *vblank_time, - unsigned flags) -{ - struct vc4_dev *vc4 = to_vc4_dev(dev); - struct vc4_crtc *vc4_crtc = vc4->crtc[crtc_id]; - struct drm_crtc *crtc = &vc4_crtc->base; - struct drm_crtc_state *state = crtc->state; - - /* Helper routine in DRM core does all the work: */ - return drm_calc_vbltimestamp_from_scanoutpos(dev, crtc_id, max_error, - vblank_time, flags, - &state->adjusted_mode); -} - static void vc4_crtc_destroy(struct drm_crtc *crtc) { drm_crtc_cleanup(crtc); @@ -314,7 +300,8 @@ vc4_crtc_lut_load(struct drm_crtc *crtc) static int vc4_crtc_gamma_set(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b, - uint32_t size) + uint32_t size, + struct drm_modeset_acquire_ctx *ctx) { struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); u32 i; @@ -349,38 +336,44 @@ static u32 vc4_get_fifo_full_level(u32 format) } /* - * Returns the clock select bit for the connector attached to the - * CRTC. + * Returns the encoder attached to the CRTC. + * + * VC4 can only scan out to one encoder at a time, while the DRM core + * allows drivers to push pixels to more than one encoder from the + * same CRTC. */ -static int vc4_get_clock_select(struct drm_crtc *crtc) +static struct drm_encoder *vc4_get_crtc_encoder(struct drm_crtc *crtc) { struct drm_connector *connector; + struct drm_connector_list_iter conn_iter; - drm_for_each_connector(connector, crtc->dev) { + drm_connector_list_iter_begin(crtc->dev, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) { if (connector->state->crtc == crtc) { - struct drm_encoder *encoder = connector->encoder; - struct vc4_encoder *vc4_encoder = - to_vc4_encoder(encoder); - - return vc4_encoder->clock_select; + drm_connector_list_iter_end(&conn_iter); + return connector->encoder; } } + drm_connector_list_iter_end(&conn_iter); - return -1; + return NULL; } static void vc4_crtc_mode_set_nofb(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct vc4_dev *vc4 = to_vc4_dev(dev); + struct drm_encoder *encoder = vc4_get_crtc_encoder(crtc); + struct vc4_encoder *vc4_encoder = to_vc4_encoder(encoder); struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); struct drm_crtc_state *state = crtc->state; struct drm_display_mode *mode = &state->adjusted_mode; bool interlace = mode->flags & DRM_MODE_FLAG_INTERLACE; u32 pixel_rep = (mode->flags & DRM_MODE_FLAG_DBLCLK) ? 2 : 1; - u32 format = PV_CONTROL_FORMAT_24; + bool is_dsi = (vc4_encoder->type == VC4_ENCODER_TYPE_DSI0 || + vc4_encoder->type == VC4_ENCODER_TYPE_DSI1); + u32 format = is_dsi ? PV_CONTROL_FORMAT_DSIV_24 : PV_CONTROL_FORMAT_24; bool debug_dump_regs = false; - int clock_select = vc4_get_clock_select(crtc); if (debug_dump_regs) { DRM_INFO("CRTC %d regs before:\n", drm_crtc_index(crtc)); @@ -436,17 +429,19 @@ static void vc4_crtc_mode_set_nofb(struct drm_crtc *crtc) */ CRTC_WRITE(PV_V_CONTROL, PV_VCONTROL_CONTINUOUS | + (is_dsi ? PV_VCONTROL_DSI : 0) | PV_VCONTROL_INTERLACE | VC4_SET_FIELD(mode->htotal * pixel_rep / 2, PV_VCONTROL_ODD_DELAY)); CRTC_WRITE(PV_VSYNCD_EVEN, 0); } else { - CRTC_WRITE(PV_V_CONTROL, PV_VCONTROL_CONTINUOUS); + CRTC_WRITE(PV_V_CONTROL, + PV_VCONTROL_CONTINUOUS | + (is_dsi ? PV_VCONTROL_DSI : 0)); } CRTC_WRITE(PV_HACT_ACT, mode->hdisplay * pixel_rep); - CRTC_WRITE(PV_CONTROL, VC4_SET_FIELD(format, PV_CONTROL_FORMAT) | VC4_SET_FIELD(vc4_get_fifo_full_level(format), @@ -455,7 +450,8 @@ static void vc4_crtc_mode_set_nofb(struct drm_crtc *crtc) PV_CONTROL_CLR_AT_START | PV_CONTROL_TRIGGER_UNDERFLOW | PV_CONTROL_WAIT_HSTART | - VC4_SET_FIELD(clock_select, PV_CONTROL_CLK_SELECT) | + VC4_SET_FIELD(vc4_encoder->clock_select, + PV_CONTROL_CLK_SELECT) | PV_CONTROL_FIFO_CLR | PV_CONTROL_EN); @@ -524,6 +520,34 @@ static void vc4_crtc_disable(struct drm_crtc *crtc) SCALER_DISPSTATX_EMPTY); } +static void vc4_crtc_update_dlist(struct drm_crtc *crtc) +{ + struct drm_device *dev = crtc->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); + struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state); + + if (crtc->state->event) { + unsigned long flags; + + crtc->state->event->pipe = drm_crtc_index(crtc); + + WARN_ON(drm_crtc_vblank_get(crtc) != 0); + + spin_lock_irqsave(&dev->event_lock, flags); + vc4_crtc->event = crtc->state->event; + crtc->state->event = NULL; + + HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel), + vc4_state->mm.start); + + spin_unlock_irqrestore(&dev->event_lock, flags); + } else { + HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel), + vc4_state->mm.start); + } +} + static void vc4_crtc_enable(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; @@ -534,6 +558,12 @@ static void vc4_crtc_enable(struct drm_crtc *crtc) require_hvs_enabled(dev); + /* Enable vblank irq handling before crtc is started otherwise + * drm_crtc_get_vblank() fails in vc4_crtc_update_dlist(). + */ + drm_crtc_vblank_on(crtc); + vc4_crtc_update_dlist(crtc); + /* Turn on the scaler, which will wait for vstart to start * compositing. */ @@ -545,9 +575,6 @@ static void vc4_crtc_enable(struct drm_crtc *crtc) /* Turn on the pixel valve, which will emit the vstart signal. */ CRTC_WRITE(PV_V_CONTROL, CRTC_READ(PV_V_CONTROL) | PV_VCONTROL_VIDEN); - - /* Enable vblank irq handling after crtc is started. */ - drm_crtc_vblank_on(crtc); } static bool vc4_crtc_mode_fixup(struct drm_crtc *crtc, @@ -589,7 +616,7 @@ static int vc4_crtc_atomic_check(struct drm_crtc *crtc, spin_lock_irqsave(&vc4->hvs->mm_lock, flags); ret = drm_mm_insert_node(&vc4->hvs->dlist_mm, &vc4_state->mm, - dlist_count, 1, 0); + dlist_count); spin_unlock_irqrestore(&vc4->hvs->mm_lock, flags); if (ret) return ret; @@ -602,7 +629,6 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc, { struct drm_device *dev = crtc->dev; struct vc4_dev *vc4 = to_vc4_dev(dev); - struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state); struct drm_plane *plane; bool debug_dump_regs = false; @@ -624,25 +650,15 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc, WARN_ON_ONCE(dlist_next - dlist_start != vc4_state->mm.size); - if (crtc->state->event) { - unsigned long flags; - - crtc->state->event->pipe = drm_crtc_index(crtc); - - WARN_ON(drm_crtc_vblank_get(crtc) != 0); - - spin_lock_irqsave(&dev->event_lock, flags); - vc4_crtc->event = crtc->state->event; - crtc->state->event = NULL; - - HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel), - vc4_state->mm.start); - - spin_unlock_irqrestore(&dev->event_lock, flags); - } else { - HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel), - vc4_state->mm.start); - } + /* Only update DISPLIST if the CRTC was already running and is not + * being disabled. + * vc4_crtc_enable() takes care of updating the dlist just after + * re-enabling VBLANK interrupts and before enabling the engine. + * If the CRTC is being disabled, there's no point in updating this + * information. + */ + if (crtc->state->active && old_state->active) + vc4_crtc_update_dlist(crtc); if (debug_dump_regs) { DRM_INFO("CRTC %d HVS after:\n", drm_crtc_index(crtc)); @@ -650,20 +666,18 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc, } } -int vc4_enable_vblank(struct drm_device *dev, unsigned int crtc_id) +static int vc4_enable_vblank(struct drm_crtc *crtc) { - struct vc4_dev *vc4 = to_vc4_dev(dev); - struct vc4_crtc *vc4_crtc = vc4->crtc[crtc_id]; + struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); CRTC_WRITE(PV_INTEN, PV_INT_VFP_START); return 0; } -void vc4_disable_vblank(struct drm_device *dev, unsigned int crtc_id) +static void vc4_disable_vblank(struct drm_crtc *crtc) { - struct vc4_dev *vc4 = to_vc4_dev(dev); - struct vc4_crtc *vc4_crtc = vc4->crtc[crtc_id]; + struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); CRTC_WRITE(PV_INTEN, 0); } @@ -804,12 +818,13 @@ static int vc4_async_page_flip(struct drm_crtc *crtc, static int vc4_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_pending_vblank_event *event, - uint32_t flags) + uint32_t flags, + struct drm_modeset_acquire_ctx *ctx) { if (flags & DRM_MODE_PAGE_FLIP_ASYNC) return vc4_async_page_flip(crtc, fb, event, flags); else - return drm_atomic_helper_page_flip(crtc, fb, event, flags); + return drm_atomic_helper_page_flip(crtc, fb, event, flags, ctx); } static struct drm_crtc_state *vc4_crtc_duplicate_state(struct drm_crtc *crtc) @@ -842,6 +857,17 @@ static void vc4_crtc_destroy_state(struct drm_crtc *crtc, drm_atomic_helper_crtc_destroy_state(crtc, state); } +static void +vc4_crtc_reset(struct drm_crtc *crtc) +{ + if (crtc->state) + __drm_atomic_helper_crtc_destroy_state(crtc->state); + + crtc->state = kzalloc(sizeof(struct vc4_crtc_state), GFP_KERNEL); + if (crtc->state) + crtc->state->crtc = crtc; +} + static const struct drm_crtc_funcs vc4_crtc_funcs = { .set_config = drm_atomic_helper_set_config, .destroy = vc4_crtc_destroy, @@ -849,10 +875,12 @@ static const struct drm_crtc_funcs vc4_crtc_funcs = { .set_property = NULL, .cursor_set = NULL, /* handled by drm_mode_cursor_universal */ .cursor_move = NULL, /* handled by drm_mode_cursor_universal */ - .reset = drm_atomic_helper_crtc_reset, + .reset = vc4_crtc_reset, .atomic_duplicate_state = vc4_crtc_duplicate_state, .atomic_destroy_state = vc4_crtc_destroy_state, .gamma_set = vc4_crtc_gamma_set, + .enable_vblank = vc4_enable_vblank, + .disable_vblank = vc4_disable_vblank, }; static const struct drm_crtc_helper_funcs vc4_crtc_helper_funcs = { @@ -937,7 +965,6 @@ static int vc4_crtc_bind(struct device *dev, struct device *master, void *data) { struct platform_device *pdev = to_platform_device(dev); struct drm_device *drm = dev_get_drvdata(master); - struct vc4_dev *vc4 = to_vc4_dev(drm); struct vc4_crtc *vc4_crtc; struct drm_crtc *crtc; struct drm_plane *primary_plane, *cursor_plane, *destroy_plane, *temp; @@ -975,7 +1002,6 @@ static int vc4_crtc_bind(struct device *dev, struct device *master, void *data) &vc4_crtc_funcs, NULL); drm_crtc_helper_add(crtc, &vc4_crtc_helper_funcs); primary_plane->crtc = crtc; - vc4->crtc[drm_crtc_index(crtc)] = vc4_crtc; vc4_crtc->channel = vc4_crtc->data->hvs_channel; drm_mode_crtc_set_gamma_size(crtc, ARRAY_SIZE(vc4_crtc->lut_r)); diff --git a/drivers/gpu/drm/vc4/vc4_debugfs.c b/drivers/gpu/drm/vc4/vc4_debugfs.c index caf817bac885..5db06bdb5f27 100644 --- a/drivers/gpu/drm/vc4/vc4_debugfs.c +++ b/drivers/gpu/drm/vc4/vc4_debugfs.c @@ -18,6 +18,7 @@ static const struct drm_info_list vc4_debugfs_list[] = { {"bo_stats", vc4_bo_stats_debugfs, 0}, {"dpi_regs", vc4_dpi_debugfs_regs, 0}, + {"dsi1_regs", vc4_dsi_debugfs_regs, 0, (void *)(uintptr_t)1}, {"hdmi_regs", vc4_hdmi_debugfs_regs, 0}, {"vec_regs", vc4_vec_debugfs_regs, 0}, {"hvs_regs", vc4_hvs_debugfs_regs, 0}, @@ -36,9 +37,3 @@ vc4_debugfs_init(struct drm_minor *minor) return drm_debugfs_create_files(vc4_debugfs_list, VC4_DEBUGFS_ENTRIES, minor->debugfs_root, minor); } - -void -vc4_debugfs_cleanup(struct drm_minor *minor) -{ - drm_debugfs_remove_files(vc4_debugfs_list, VC4_DEBUGFS_ENTRIES, minor); -} diff --git a/drivers/gpu/drm/vc4/vc4_dpi.c b/drivers/gpu/drm/vc4/vc4_dpi.c index 1e1f6b8184d0..2e0fe46aeb2e 100644 --- a/drivers/gpu/drm/vc4/vc4_dpi.c +++ b/drivers/gpu/drm/vc4/vc4_dpi.c @@ -18,17 +18,20 @@ * DOC: VC4 DPI module * * The VC4 DPI hardware supports MIPI DPI type 4 and Nokia ViSSI - * signals, which are routed out to GPIO0-27 with the ALT2 function. + * signals. On BCM2835, these can be routed out to GPIO0-27 with the + * ALT2 function. */ -#include "drm_atomic_helper.h" -#include "drm_crtc_helper.h" -#include "drm_edid.h" -#include "drm_panel.h" -#include "linux/clk.h" -#include "linux/component.h" -#include "linux/of_graph.h" -#include "linux/of_platform.h" +#include <drm/drm_atomic_helper.h> +#include <drm/drm_bridge.h> +#include <drm/drm_crtc_helper.h> +#include <drm/drm_edid.h> +#include <drm/drm_of.h> +#include <drm/drm_panel.h> +#include <linux/clk.h> +#include <linux/component.h> +#include <linux/of_graph.h> +#include <linux/of_platform.h> #include "vc4_drv.h" #include "vc4_regs.h" @@ -94,7 +97,8 @@ struct vc4_dpi { struct drm_encoder *encoder; struct drm_connector *connector; - struct drm_panel *panel; + struct drm_bridge *bridge; + bool is_panel_bridge; void __iomem *regs; @@ -117,24 +121,6 @@ to_vc4_dpi_encoder(struct drm_encoder *encoder) return container_of(encoder, struct vc4_dpi_encoder, base.base); } -/* VC4 DPI connector KMS struct */ -struct vc4_dpi_connector { - struct drm_connector base; - struct vc4_dpi *dpi; - - /* Since the connector is attached to just the one encoder, - * this is the reference to it so we can do the best_encoder() - * hook. - */ - struct drm_encoder *encoder; -}; - -static inline struct vc4_dpi_connector * -to_vc4_dpi_connector(struct drm_connector *connector) -{ - return container_of(connector, struct vc4_dpi_connector, base); -} - #define DPI_REG(reg) { reg, #reg } static const struct { u32 reg; @@ -144,17 +130,6 @@ static const struct { DPI_REG(DPI_ID), }; -static void vc4_dpi_dump_regs(struct vc4_dpi *dpi) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(dpi_regs); i++) { - DRM_INFO("0x%04x (%s): 0x%08x\n", - dpi_regs[i].reg, dpi_regs[i].name, - DPI_READ(dpi_regs[i].reg)); - } -} - #ifdef CONFIG_DEBUG_FS int vc4_dpi_debugfs_regs(struct seq_file *m, void *unused) { @@ -177,80 +152,6 @@ int vc4_dpi_debugfs_regs(struct seq_file *m, void *unused) } #endif -static enum drm_connector_status -vc4_dpi_connector_detect(struct drm_connector *connector, bool force) -{ - struct vc4_dpi_connector *vc4_connector = - to_vc4_dpi_connector(connector); - struct vc4_dpi *dpi = vc4_connector->dpi; - - if (dpi->panel) - return connector_status_connected; - else - return connector_status_disconnected; -} - -static void vc4_dpi_connector_destroy(struct drm_connector *connector) -{ - drm_connector_unregister(connector); - drm_connector_cleanup(connector); -} - -static int vc4_dpi_connector_get_modes(struct drm_connector *connector) -{ - struct vc4_dpi_connector *vc4_connector = - to_vc4_dpi_connector(connector); - struct vc4_dpi *dpi = vc4_connector->dpi; - - if (dpi->panel) - return drm_panel_get_modes(dpi->panel); - - return 0; -} - -static const struct drm_connector_funcs vc4_dpi_connector_funcs = { - .dpms = drm_atomic_helper_connector_dpms, - .detect = vc4_dpi_connector_detect, - .fill_modes = drm_helper_probe_single_connector_modes, - .destroy = vc4_dpi_connector_destroy, - .reset = drm_atomic_helper_connector_reset, - .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, -}; - -static const struct drm_connector_helper_funcs vc4_dpi_connector_helper_funcs = { - .get_modes = vc4_dpi_connector_get_modes, -}; - -static struct drm_connector *vc4_dpi_connector_init(struct drm_device *dev, - struct vc4_dpi *dpi) -{ - struct drm_connector *connector = NULL; - struct vc4_dpi_connector *dpi_connector; - - dpi_connector = devm_kzalloc(dev->dev, sizeof(*dpi_connector), - GFP_KERNEL); - if (!dpi_connector) - return ERR_PTR(-ENOMEM); - - connector = &dpi_connector->base; - - dpi_connector->encoder = dpi->encoder; - dpi_connector->dpi = dpi; - - drm_connector_init(dev, connector, &vc4_dpi_connector_funcs, - DRM_MODE_CONNECTOR_DPI); - drm_connector_helper_add(connector, &vc4_dpi_connector_helper_funcs); - - connector->polled = 0; - connector->interlace_allowed = 0; - connector->doublescan_allowed = 0; - - drm_mode_connector_attach_encoder(connector, dpi->encoder); - - return connector; -} - static const struct drm_encoder_funcs vc4_dpi_encoder_funcs = { .destroy = drm_encoder_cleanup, }; @@ -260,11 +161,7 @@ static void vc4_dpi_encoder_disable(struct drm_encoder *encoder) struct vc4_dpi_encoder *vc4_encoder = to_vc4_dpi_encoder(encoder); struct vc4_dpi *dpi = vc4_encoder->dpi; - drm_panel_disable(dpi->panel); - clk_disable_unprepare(dpi->pixel_clock); - - drm_panel_unprepare(dpi->panel); } static void vc4_dpi_encoder_enable(struct drm_encoder *encoder) @@ -275,12 +172,6 @@ static void vc4_dpi_encoder_enable(struct drm_encoder *encoder) u32 dpi_c = DPI_ENABLE | DPI_OUTPUT_ENABLE_MODE; int ret; - ret = drm_panel_prepare(dpi->panel); - if (ret) { - DRM_ERROR("Panel failed to prepare\n"); - return; - } - if (dpi->connector->display_info.num_bus_formats) { u32 bus_format = dpi->connector->display_info.bus_formats[0]; @@ -331,13 +222,6 @@ static void vc4_dpi_encoder_enable(struct drm_encoder *encoder) ret = clk_prepare_enable(dpi->pixel_clock); if (ret) DRM_ERROR("Failed to set clock rate: %d\n", ret); - - ret = drm_panel_enable(dpi->panel); - if (ret) { - DRM_ERROR("Panel failed to enable\n"); - drm_panel_unprepare(dpi->panel); - return; - } } static bool vc4_dpi_encoder_mode_fixup(struct drm_encoder *encoder, @@ -361,33 +245,34 @@ static const struct of_device_id vc4_dpi_dt_match[] = { {} }; -/* Walks the OF graph to find the panel node and then asks DRM to look - * up the panel. +/* Sets up the next link in the display chain, whether it's a panel or + * a bridge. */ -static struct drm_panel *vc4_dpi_get_panel(struct device *dev) +static int vc4_dpi_init_bridge(struct vc4_dpi *dpi) { - struct device_node *endpoint, *panel_node; - struct device_node *np = dev->of_node; + struct device *dev = &dpi->pdev->dev; struct drm_panel *panel; + int ret; - endpoint = of_graph_get_next_endpoint(np, NULL); - if (!endpoint) { - dev_err(dev, "no endpoint to fetch DPI panel\n"); - return NULL; + ret = drm_of_find_panel_or_bridge(dev->of_node, 0, 0, + &panel, &dpi->bridge); + if (ret) { + /* If nothing was connected in the DT, that's not an + * error. + */ + if (ret == -ENODEV) + return 0; + else + return ret; } - /* don't proceed if we have an endpoint but no panel_node tied to it */ - panel_node = of_graph_get_remote_port_parent(endpoint); - of_node_put(endpoint); - if (!panel_node) { - dev_err(dev, "no valid panel node\n"); - return NULL; + if (panel) { + dpi->bridge = drm_panel_bridge_add(panel, + DRM_MODE_CONNECTOR_DPI); + dpi->is_panel_bridge = true; } - panel = of_drm_find_panel(panel_node); - of_node_put(panel_node); - - return panel; + return drm_bridge_attach(dpi->encoder, dpi->bridge, NULL); } static int vc4_dpi_bind(struct device *dev, struct device *master, void *data) @@ -416,8 +301,6 @@ static int vc4_dpi_bind(struct device *dev, struct device *master, void *data) if (IS_ERR(dpi->regs)) return PTR_ERR(dpi->regs); - vc4_dpi_dump_regs(dpi); - if (DPI_READ(DPI_ID) != DPI_ID_VALUE) { dev_err(dev, "Port returned 0x%08x for ID instead of 0x%08x\n", DPI_READ(DPI_ID), DPI_ID_VALUE); @@ -443,20 +326,13 @@ static int vc4_dpi_bind(struct device *dev, struct device *master, void *data) if (ret) DRM_ERROR("Failed to turn on core clock: %d\n", ret); - dpi->panel = vc4_dpi_get_panel(dev); - drm_encoder_init(drm, dpi->encoder, &vc4_dpi_encoder_funcs, DRM_MODE_ENCODER_DPI, NULL); drm_encoder_helper_add(dpi->encoder, &vc4_dpi_encoder_helper_funcs); - dpi->connector = vc4_dpi_connector_init(drm, dpi); - if (IS_ERR(dpi->connector)) { - ret = PTR_ERR(dpi->connector); + ret = vc4_dpi_init_bridge(dpi); + if (ret) goto err_destroy_encoder; - } - - if (dpi->panel) - drm_panel_attach(dpi->panel, dpi->connector); dev_set_drvdata(dev, dpi); @@ -477,10 +353,9 @@ static void vc4_dpi_unbind(struct device *dev, struct device *master, struct vc4_dev *vc4 = to_vc4_dev(drm); struct vc4_dpi *dpi = dev_get_drvdata(dev); - if (dpi->panel) - drm_panel_detach(dpi->panel); + if (dpi->is_panel_bridge) + drm_panel_bridge_remove(dpi->bridge); - vc4_dpi_connector_destroy(dpi->connector); drm_encoder_cleanup(dpi->encoder); clk_disable_unprepare(dpi->core_clock); diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index ac09ca7ff430..c6b487c3d2b7 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -7,6 +7,22 @@ * published by the Free Software Foundation. */ +/** + * DOC: Broadcom VC4 Graphics Driver + * + * The Broadcom VideoCore 4 (present in the Raspberry Pi) contains a + * OpenGL ES 2.0-compatible 3D engine called V3D, and a highly + * configurable display output pipeline that supports HDMI, DSI, DPI, + * and Composite TV output. + * + * The 3D engine also has an interface for submitting arbitrary + * compute shader-style jobs using the same shader processor as is + * used for vertex and fragment shaders in GLES 2.0. However, given + * that the hardware isn't able to expose any standard interfaces like + * OpenGL compute shaders or OpenCL, it isn't supported by this + * driver. + */ + #include <linux/clk.h> #include <linux/component.h> #include <linux/device.h> @@ -15,7 +31,7 @@ #include <linux/of_platform.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> -#include "drm_fb_cma_helper.h" +#include <drm/drm_fb_cma_helper.h> #include <drm/drm_fb_helper.h> #include "uapi/drm/vc4_drm.h" @@ -122,6 +138,8 @@ static const struct drm_ioctl_desc vc4_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl, DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(VC4_GET_PARAM, vc4_get_param_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_SET_TILING, vc4_set_tiling_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(VC4_GET_TILING, vc4_get_tiling_ioctl, DRM_RENDER_ALLOW), }; static struct drm_driver vc4_drm_driver = { @@ -137,15 +155,11 @@ static struct drm_driver vc4_drm_driver = { .irq_postinstall = vc4_irq_postinstall, .irq_uninstall = vc4_irq_uninstall, - .enable_vblank = vc4_enable_vblank, - .disable_vblank = vc4_disable_vblank, - .get_vblank_counter = drm_vblank_no_hw_counter, .get_scanout_position = vc4_crtc_get_scanoutpos, - .get_vblank_timestamp = vc4_crtc_get_vblank_timestamp, + .get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos, #if defined(CONFIG_DEBUG_FS) .debugfs_init = vc4_debugfs_init, - .debugfs_cleanup = vc4_debugfs_cleanup, #endif .gem_create_object = vc4_create_object, @@ -156,8 +170,9 @@ static struct drm_driver vc4_drm_driver = { .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_import = drm_gem_prime_import, .gem_prime_export = vc4_prime_export, + .gem_prime_res_obj = vc4_prime_res_obj, .gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table, - .gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table, + .gem_prime_import_sg_table = vc4_prime_import_sg_table, .gem_prime_vmap = vc4_prime_vmap, .gem_prime_vunmap = drm_gem_cma_prime_vunmap, .gem_prime_mmap = vc4_prime_mmap, @@ -296,6 +311,7 @@ static struct platform_driver *const component_drivers[] = { &vc4_hdmi_driver, &vc4_vec_driver, &vc4_dpi_driver, + &vc4_dsi_driver, &vc4_hvs_driver, &vc4_crtc_driver, &vc4_v3d_driver, @@ -321,6 +337,7 @@ static int vc4_platform_drm_remove(struct platform_device *pdev) static const struct of_device_id vc4_of_match[] = { { .compatible = "brcm,bcm2835-vc4", }, + { .compatible = "brcm,cygnus-vc4", }, {}, }; MODULE_DEVICE_TABLE(of, vc4_of_match); @@ -336,26 +353,20 @@ static struct platform_driver vc4_platform_driver = { static int __init vc4_drm_register(void) { - int i, ret; + int ret; + + ret = platform_register_drivers(component_drivers, + ARRAY_SIZE(component_drivers)); + if (ret) + return ret; - for (i = 0; i < ARRAY_SIZE(component_drivers); i++) { - ret = platform_driver_register(component_drivers[i]); - if (ret) { - while (--i >= 0) - platform_driver_unregister(component_drivers[i]); - return ret; - } - } return platform_driver_register(&vc4_platform_driver); } static void __exit vc4_drm_unregister(void) { - int i; - - for (i = ARRAY_SIZE(component_drivers) - 1; i >= 0; i--) - platform_driver_unregister(component_drivers[i]); - + platform_unregister_drivers(component_drivers, + ARRAY_SIZE(component_drivers)); platform_driver_unregister(&vc4_platform_driver); } diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index b5c4bb14d0d1..df22698d62ee 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -6,17 +6,19 @@ * published by the Free Software Foundation. */ -#include "drmP.h" -#include "drm_gem_cma_helper.h" +#include <linux/reservation.h> +#include <drm/drmP.h> +#include <drm/drm_encoder.h> +#include <drm/drm_gem_cma_helper.h> struct vc4_dev { struct drm_device *dev; struct vc4_hdmi *hdmi; struct vc4_hvs *hvs; - struct vc4_crtc *crtc[3]; struct vc4_v3d *v3d; struct vc4_dpi *dpi; + struct vc4_dsi *dsi1; struct vc4_vec *vec; struct drm_fbdev_cma *fbdev; @@ -54,6 +56,8 @@ struct vc4_dev { /* Protects bo_cache and the BO stats. */ struct mutex bo_lock; + uint64_t dma_fence_context; + /* Sequence number for the last job queued in bin_job_list. * Starts at 0 (no jobs emitted). */ @@ -93,12 +97,23 @@ struct vc4_dev { */ struct list_head seqno_cb_list; - /* The binner overflow memory that's currently set up in - * BPOA/BPOS registers. When overflow occurs and a new one is - * allocated, the previous one will be moved to - * vc4->current_exec's free list. + /* The memory used for storing binner tile alloc, tile state, + * and overflow memory allocations. This is freed when V3D + * powers down. + */ + struct vc4_bo *bin_bo; + + /* Size of blocks allocated within bin_bo. */ + uint32_t bin_alloc_size; + + /* Bitmask of the bin_alloc_size chunks in bin_bo that are + * used. */ - struct vc4_bo *overflow_mem; + uint32_t bin_alloc_used; + + /* Bitmask of the current bin_alloc used for overflow memory. */ + uint32_t bin_alloc_overflow; + struct work_struct overflow_mem_work; int power_refcount; @@ -133,6 +148,8 @@ struct vc4_bo { */ uint64_t write_seqno; + bool t_format; + /* List entry for the BO's position in either * vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list */ @@ -148,6 +165,10 @@ struct vc4_bo { * DRM_IOCTL_VC4_CREATE_SHADER_BO. */ struct vc4_validated_shader_info *validated_shader; + + /* normally (resv == &_resv) except for imported bo's */ + struct reservation_object *resv; + struct reservation_object _resv; }; static inline struct vc4_bo * @@ -156,6 +177,19 @@ to_vc4_bo(struct drm_gem_object *bo) return (struct vc4_bo *)bo; } +struct vc4_fence { + struct dma_fence base; + struct drm_device *dev; + /* vc4 seqno for signaled() test */ + uint64_t seqno; +}; + +static inline struct vc4_fence * +to_vc4_fence(struct dma_fence *fence) +{ + return (struct vc4_fence *)fence; +} + struct vc4_seqno_cb { struct work_struct work; uint64_t seqno; @@ -166,6 +200,7 @@ struct vc4_v3d { struct vc4_dev *vc4; struct platform_device *pdev; void __iomem *regs; + struct clk *clk; }; struct vc4_hvs { @@ -228,6 +263,8 @@ struct vc4_exec_info { /* Latest write_seqno of any BO that binning depends on. */ uint64_t bin_dep_seqno; + struct dma_fence *fence; + /* Last current addresses the hardware was processing when the * hangcheck timer checked on us. */ @@ -291,8 +328,12 @@ struct vc4_exec_info { bool found_increment_semaphore_packet; bool found_flush; uint8_t bin_tiles_x, bin_tiles_y; - struct drm_gem_cma_object *tile_bo; + /* Physical address of the start of the tile alloc array + * (where each tile's binned CL will start) + */ uint32_t tile_alloc_offset; + /* Bitmask of which binner slots are freed when this job completes. */ + uint32_t bin_slots; /** * Computed addresses pointing into exec_bo where we start the @@ -431,10 +472,18 @@ int vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int vc4_set_tiling_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int vc4_get_tiling_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int vc4_mmap(struct file *filp, struct vm_area_struct *vma); +struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj); int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); +struct drm_gem_object *vc4_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sgt); void *vc4_prime_vmap(struct drm_gem_object *obj); void vc4_bo_cache_init(struct drm_device *dev); void vc4_bo_cache_destroy(struct drm_device *dev); @@ -442,21 +491,15 @@ int vc4_bo_stats_debugfs(struct seq_file *m, void *arg); /* vc4_crtc.c */ extern struct platform_driver vc4_crtc_driver; -int vc4_enable_vblank(struct drm_device *dev, unsigned int crtc_id); -void vc4_disable_vblank(struct drm_device *dev, unsigned int crtc_id); bool vc4_event_pending(struct drm_crtc *crtc); int vc4_crtc_debugfs_regs(struct seq_file *m, void *arg); -int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id, - unsigned int flags, int *vpos, int *hpos, - ktime_t *stime, ktime_t *etime, - const struct drm_display_mode *mode); -int vc4_crtc_get_vblank_timestamp(struct drm_device *dev, unsigned int crtc_id, - int *max_error, struct timeval *vblank_time, - unsigned flags); +bool vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id, + bool in_vblank_irq, int *vpos, int *hpos, + ktime_t *stime, ktime_t *etime, + const struct drm_display_mode *mode); /* vc4_debugfs.c */ int vc4_debugfs_init(struct drm_minor *minor); -void vc4_debugfs_cleanup(struct drm_minor *minor); /* vc4_drv.c */ void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index); @@ -465,6 +508,13 @@ void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index); extern struct platform_driver vc4_dpi_driver; int vc4_dpi_debugfs_regs(struct seq_file *m, void *unused); +/* vc4_dsi.c */ +extern struct platform_driver vc4_dsi_driver; +int vc4_dsi_debugfs_regs(struct seq_file *m, void *unused); + +/* vc4_fence.c */ +extern const struct dma_fence_ops vc4_fence_ops; + /* vc4_gem.c */ void vc4_gem_init(struct drm_device *dev); void vc4_gem_destroy(struct drm_device *dev); @@ -488,7 +538,7 @@ int vc4_queue_seqno_cb(struct drm_device *dev, extern struct platform_driver vc4_hdmi_driver; int vc4_hdmi_debugfs_regs(struct seq_file *m, void *unused); -/* vc4_hdmi.c */ +/* vc4_vec.c */ extern struct platform_driver vc4_vec_driver; int vc4_vec_debugfs_regs(struct seq_file *m, void *unused); @@ -519,6 +569,7 @@ void vc4_plane_async_set_fb(struct drm_plane *plane, extern struct platform_driver vc4_v3d_driver; int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused); int vc4_v3d_debugfs_regs(struct seq_file *m, void *unused); +int vc4_v3d_get_bin_slot(struct vc4_dev *vc4); /* vc4_validate.c */ int diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c new file mode 100644 index 000000000000..5e8b81eaa168 --- /dev/null +++ b/drivers/gpu/drm/vc4/vc4_dsi.c @@ -0,0 +1,1664 @@ +/* + * Copyright (C) 2016 Broadcom + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * DOC: VC4 DSI0/DSI1 module + * + * BCM2835 contains two DSI modules, DSI0 and DSI1. DSI0 is a + * single-lane DSI controller, while DSI1 is a more modern 4-lane DSI + * controller. + * + * Most Raspberry Pi boards expose DSI1 as their "DISPLAY" connector, + * while the compute module brings both DSI0 and DSI1 out. + * + * This driver has been tested for DSI1 video-mode display only + * currently, with most of the information necessary for DSI0 + * hopefully present. + */ + +#include <drm/drm_atomic_helper.h> +#include <drm/drm_crtc_helper.h> +#include <drm/drm_edid.h> +#include <drm/drm_mipi_dsi.h> +#include <drm/drm_panel.h> +#include <linux/clk.h> +#include <linux/clk-provider.h> +#include <linux/completion.h> +#include <linux/component.h> +#include <linux/dmaengine.h> +#include <linux/i2c.h> +#include <linux/of_address.h> +#include <linux/of_platform.h> +#include <linux/pm_runtime.h> +#include "vc4_drv.h" +#include "vc4_regs.h" + +#define DSI_CMD_FIFO_DEPTH 16 +#define DSI_PIX_FIFO_DEPTH 256 +#define DSI_PIX_FIFO_WIDTH 4 + +#define DSI0_CTRL 0x00 + +/* Command packet control. */ +#define DSI0_TXPKT1C 0x04 /* AKA PKTC */ +#define DSI1_TXPKT1C 0x04 +# define DSI_TXPKT1C_TRIG_CMD_MASK VC4_MASK(31, 24) +# define DSI_TXPKT1C_TRIG_CMD_SHIFT 24 +# define DSI_TXPKT1C_CMD_REPEAT_MASK VC4_MASK(23, 10) +# define DSI_TXPKT1C_CMD_REPEAT_SHIFT 10 + +# define DSI_TXPKT1C_DISPLAY_NO_MASK VC4_MASK(9, 8) +# define DSI_TXPKT1C_DISPLAY_NO_SHIFT 8 +/* Short, trigger, BTA, or a long packet that fits all in CMDFIFO. */ +# define DSI_TXPKT1C_DISPLAY_NO_SHORT 0 +/* Primary display where cmdfifo provides part of the payload and + * pixelvalve the rest. + */ +# define DSI_TXPKT1C_DISPLAY_NO_PRIMARY 1 +/* Secondary display where cmdfifo provides part of the payload and + * pixfifo the rest. + */ +# define DSI_TXPKT1C_DISPLAY_NO_SECONDARY 2 + +# define DSI_TXPKT1C_CMD_TX_TIME_MASK VC4_MASK(7, 6) +# define DSI_TXPKT1C_CMD_TX_TIME_SHIFT 6 + +# define DSI_TXPKT1C_CMD_CTRL_MASK VC4_MASK(5, 4) +# define DSI_TXPKT1C_CMD_CTRL_SHIFT 4 +/* Command only. Uses TXPKT1H and DISPLAY_NO */ +# define DSI_TXPKT1C_CMD_CTRL_TX 0 +/* Command with BTA for either ack or read data. */ +# define DSI_TXPKT1C_CMD_CTRL_RX 1 +/* Trigger according to TRIG_CMD */ +# define DSI_TXPKT1C_CMD_CTRL_TRIG 2 +/* BTA alone for getting error status after a command, or a TE trigger + * without a previous command. + */ +# define DSI_TXPKT1C_CMD_CTRL_BTA 3 + +# define DSI_TXPKT1C_CMD_MODE_LP BIT(3) +# define DSI_TXPKT1C_CMD_TYPE_LONG BIT(2) +# define DSI_TXPKT1C_CMD_TE_EN BIT(1) +# define DSI_TXPKT1C_CMD_EN BIT(0) + +/* Command packet header. */ +#define DSI0_TXPKT1H 0x08 /* AKA PKTH */ +#define DSI1_TXPKT1H 0x08 +# define DSI_TXPKT1H_BC_CMDFIFO_MASK VC4_MASK(31, 24) +# define DSI_TXPKT1H_BC_CMDFIFO_SHIFT 24 +# define DSI_TXPKT1H_BC_PARAM_MASK VC4_MASK(23, 8) +# define DSI_TXPKT1H_BC_PARAM_SHIFT 8 +# define DSI_TXPKT1H_BC_DT_MASK VC4_MASK(7, 0) +# define DSI_TXPKT1H_BC_DT_SHIFT 0 + +#define DSI0_RXPKT1H 0x0c /* AKA RX1_PKTH */ +#define DSI1_RXPKT1H 0x14 +# define DSI_RXPKT1H_CRC_ERR BIT(31) +# define DSI_RXPKT1H_DET_ERR BIT(30) +# define DSI_RXPKT1H_ECC_ERR BIT(29) +# define DSI_RXPKT1H_COR_ERR BIT(28) +# define DSI_RXPKT1H_INCOMP_PKT BIT(25) +# define DSI_RXPKT1H_PKT_TYPE_LONG BIT(24) +/* Byte count if DSI_RXPKT1H_PKT_TYPE_LONG */ +# define DSI_RXPKT1H_BC_PARAM_MASK VC4_MASK(23, 8) +# define DSI_RXPKT1H_BC_PARAM_SHIFT 8 +/* Short return bytes if !DSI_RXPKT1H_PKT_TYPE_LONG */ +# define DSI_RXPKT1H_SHORT_1_MASK VC4_MASK(23, 16) +# define DSI_RXPKT1H_SHORT_1_SHIFT 16 +# define DSI_RXPKT1H_SHORT_0_MASK VC4_MASK(15, 8) +# define DSI_RXPKT1H_SHORT_0_SHIFT 8 +# define DSI_RXPKT1H_DT_LP_CMD_MASK VC4_MASK(7, 0) +# define DSI_RXPKT1H_DT_LP_CMD_SHIFT 0 + +#define DSI0_RXPKT2H 0x10 /* AKA RX2_PKTH */ +#define DSI1_RXPKT2H 0x18 +# define DSI_RXPKT1H_DET_ERR BIT(30) +# define DSI_RXPKT1H_ECC_ERR BIT(29) +# define DSI_RXPKT1H_COR_ERR BIT(28) +# define DSI_RXPKT1H_INCOMP_PKT BIT(25) +# define DSI_RXPKT1H_BC_PARAM_MASK VC4_MASK(23, 8) +# define DSI_RXPKT1H_BC_PARAM_SHIFT 8 +# define DSI_RXPKT1H_DT_MASK VC4_MASK(7, 0) +# define DSI_RXPKT1H_DT_SHIFT 0 + +#define DSI0_TXPKT_CMD_FIFO 0x14 /* AKA CMD_DATAF */ +#define DSI1_TXPKT_CMD_FIFO 0x1c + +#define DSI0_DISP0_CTRL 0x18 +# define DSI_DISP0_PIX_CLK_DIV_MASK VC4_MASK(21, 13) +# define DSI_DISP0_PIX_CLK_DIV_SHIFT 13 +# define DSI_DISP0_LP_STOP_CTRL_MASK VC4_MASK(12, 11) +# define DSI_DISP0_LP_STOP_CTRL_SHIFT 11 +# define DSI_DISP0_LP_STOP_DISABLE 0 +# define DSI_DISP0_LP_STOP_PERLINE 1 +# define DSI_DISP0_LP_STOP_PERFRAME 2 + +/* Transmit RGB pixels and null packets only during HACTIVE, instead + * of going to LP-STOP. + */ +# define DSI_DISP_HACTIVE_NULL BIT(10) +/* Transmit blanking packet only during vblank, instead of allowing LP-STOP. */ +# define DSI_DISP_VBLP_CTRL BIT(9) +/* Transmit blanking packet only during HFP, instead of allowing LP-STOP. */ +# define DSI_DISP_HFP_CTRL BIT(8) +/* Transmit blanking packet only during HBP, instead of allowing LP-STOP. */ +# define DSI_DISP_HBP_CTRL BIT(7) +# define DSI_DISP0_CHANNEL_MASK VC4_MASK(6, 5) +# define DSI_DISP0_CHANNEL_SHIFT 5 +/* Enables end events for HSYNC/VSYNC, not just start events. */ +# define DSI_DISP0_ST_END BIT(4) +# define DSI_DISP0_PFORMAT_MASK VC4_MASK(3, 2) +# define DSI_DISP0_PFORMAT_SHIFT 2 +# define DSI_PFORMAT_RGB565 0 +# define DSI_PFORMAT_RGB666_PACKED 1 +# define DSI_PFORMAT_RGB666 2 +# define DSI_PFORMAT_RGB888 3 +/* Default is VIDEO mode. */ +# define DSI_DISP0_COMMAND_MODE BIT(1) +# define DSI_DISP0_ENABLE BIT(0) + +#define DSI0_DISP1_CTRL 0x1c +#define DSI1_DISP1_CTRL 0x2c +/* Format of the data written to TXPKT_PIX_FIFO. */ +# define DSI_DISP1_PFORMAT_MASK VC4_MASK(2, 1) +# define DSI_DISP1_PFORMAT_SHIFT 1 +# define DSI_DISP1_PFORMAT_16BIT 0 +# define DSI_DISP1_PFORMAT_24BIT 1 +# define DSI_DISP1_PFORMAT_32BIT_LE 2 +# define DSI_DISP1_PFORMAT_32BIT_BE 3 + +/* DISP1 is always command mode. */ +# define DSI_DISP1_ENABLE BIT(0) + +#define DSI0_TXPKT_PIX_FIFO 0x20 /* AKA PIX_FIFO */ + +#define DSI0_INT_STAT 0x24 +#define DSI0_INT_EN 0x28 +# define DSI1_INT_PHY_D3_ULPS BIT(30) +# define DSI1_INT_PHY_D3_STOP BIT(29) +# define DSI1_INT_PHY_D2_ULPS BIT(28) +# define DSI1_INT_PHY_D2_STOP BIT(27) +# define DSI1_INT_PHY_D1_ULPS BIT(26) +# define DSI1_INT_PHY_D1_STOP BIT(25) +# define DSI1_INT_PHY_D0_ULPS BIT(24) +# define DSI1_INT_PHY_D0_STOP BIT(23) +# define DSI1_INT_FIFO_ERR BIT(22) +# define DSI1_INT_PHY_DIR_RTF BIT(21) +# define DSI1_INT_PHY_RXLPDT BIT(20) +# define DSI1_INT_PHY_RXTRIG BIT(19) +# define DSI1_INT_PHY_D0_LPDT BIT(18) +# define DSI1_INT_PHY_DIR_FTR BIT(17) + +/* Signaled when the clock lane enters the given state. */ +# define DSI1_INT_PHY_CLOCK_ULPS BIT(16) +# define DSI1_INT_PHY_CLOCK_HS BIT(15) +# define DSI1_INT_PHY_CLOCK_STOP BIT(14) + +/* Signaled on timeouts */ +# define DSI1_INT_PR_TO BIT(13) +# define DSI1_INT_TA_TO BIT(12) +# define DSI1_INT_LPRX_TO BIT(11) +# define DSI1_INT_HSTX_TO BIT(10) + +/* Contention on a line when trying to drive the line low */ +# define DSI1_INT_ERR_CONT_LP1 BIT(9) +# define DSI1_INT_ERR_CONT_LP0 BIT(8) + +/* Control error: incorrect line state sequence on data lane 0. */ +# define DSI1_INT_ERR_CONTROL BIT(7) +/* LPDT synchronization error (bits received not a multiple of 8. */ + +# define DSI1_INT_ERR_SYNC_ESC BIT(6) +/* Signaled after receiving an error packet from the display in + * response to a read. + */ +# define DSI1_INT_RXPKT2 BIT(5) +/* Signaled after receiving a packet. The header and optional short + * response will be in RXPKT1H, and a long response will be in the + * RXPKT_FIFO. + */ +# define DSI1_INT_RXPKT1 BIT(4) +# define DSI1_INT_TXPKT2_DONE BIT(3) +# define DSI1_INT_TXPKT2_END BIT(2) +/* Signaled after all repeats of TXPKT1 are transferred. */ +# define DSI1_INT_TXPKT1_DONE BIT(1) +/* Signaled after each TXPKT1 repeat is scheduled. */ +# define DSI1_INT_TXPKT1_END BIT(0) + +#define DSI1_INTERRUPTS_ALWAYS_ENABLED (DSI1_INT_ERR_SYNC_ESC | \ + DSI1_INT_ERR_CONTROL | \ + DSI1_INT_ERR_CONT_LP0 | \ + DSI1_INT_ERR_CONT_LP1 | \ + DSI1_INT_HSTX_TO | \ + DSI1_INT_LPRX_TO | \ + DSI1_INT_TA_TO | \ + DSI1_INT_PR_TO) + +#define DSI0_STAT 0x2c +#define DSI0_HSTX_TO_CNT 0x30 +#define DSI0_LPRX_TO_CNT 0x34 +#define DSI0_TA_TO_CNT 0x38 +#define DSI0_PR_TO_CNT 0x3c +#define DSI0_PHYC 0x40 +# define DSI1_PHYC_ESC_CLK_LPDT_MASK VC4_MASK(25, 20) +# define DSI1_PHYC_ESC_CLK_LPDT_SHIFT 20 +# define DSI1_PHYC_HS_CLK_CONTINUOUS BIT(18) +# define DSI0_PHYC_ESC_CLK_LPDT_MASK VC4_MASK(17, 12) +# define DSI0_PHYC_ESC_CLK_LPDT_SHIFT 12 +# define DSI1_PHYC_CLANE_ULPS BIT(17) +# define DSI1_PHYC_CLANE_ENABLE BIT(16) +# define DSI_PHYC_DLANE3_ULPS BIT(13) +# define DSI_PHYC_DLANE3_ENABLE BIT(12) +# define DSI0_PHYC_HS_CLK_CONTINUOUS BIT(10) +# define DSI0_PHYC_CLANE_ULPS BIT(9) +# define DSI_PHYC_DLANE2_ULPS BIT(9) +# define DSI0_PHYC_CLANE_ENABLE BIT(8) +# define DSI_PHYC_DLANE2_ENABLE BIT(8) +# define DSI_PHYC_DLANE1_ULPS BIT(5) +# define DSI_PHYC_DLANE1_ENABLE BIT(4) +# define DSI_PHYC_DLANE0_FORCE_STOP BIT(2) +# define DSI_PHYC_DLANE0_ULPS BIT(1) +# define DSI_PHYC_DLANE0_ENABLE BIT(0) + +#define DSI0_HS_CLT0 0x44 +#define DSI0_HS_CLT1 0x48 +#define DSI0_HS_CLT2 0x4c +#define DSI0_HS_DLT3 0x50 +#define DSI0_HS_DLT4 0x54 +#define DSI0_HS_DLT5 0x58 +#define DSI0_HS_DLT6 0x5c +#define DSI0_HS_DLT7 0x60 + +#define DSI0_PHY_AFEC0 0x64 +# define DSI0_PHY_AFEC0_DDR2CLK_EN BIT(26) +# define DSI0_PHY_AFEC0_DDRCLK_EN BIT(25) +# define DSI0_PHY_AFEC0_LATCH_ULPS BIT(24) +# define DSI1_PHY_AFEC0_IDR_DLANE3_MASK VC4_MASK(31, 29) +# define DSI1_PHY_AFEC0_IDR_DLANE3_SHIFT 29 +# define DSI1_PHY_AFEC0_IDR_DLANE2_MASK VC4_MASK(28, 26) +# define DSI1_PHY_AFEC0_IDR_DLANE2_SHIFT 26 +# define DSI1_PHY_AFEC0_IDR_DLANE1_MASK VC4_MASK(27, 23) +# define DSI1_PHY_AFEC0_IDR_DLANE1_SHIFT 23 +# define DSI1_PHY_AFEC0_IDR_DLANE0_MASK VC4_MASK(22, 20) +# define DSI1_PHY_AFEC0_IDR_DLANE0_SHIFT 20 +# define DSI1_PHY_AFEC0_IDR_CLANE_MASK VC4_MASK(19, 17) +# define DSI1_PHY_AFEC0_IDR_CLANE_SHIFT 17 +# define DSI0_PHY_AFEC0_ACTRL_DLANE1_MASK VC4_MASK(23, 20) +# define DSI0_PHY_AFEC0_ACTRL_DLANE1_SHIFT 20 +# define DSI0_PHY_AFEC0_ACTRL_DLANE0_MASK VC4_MASK(19, 16) +# define DSI0_PHY_AFEC0_ACTRL_DLANE0_SHIFT 16 +# define DSI0_PHY_AFEC0_ACTRL_CLANE_MASK VC4_MASK(15, 12) +# define DSI0_PHY_AFEC0_ACTRL_CLANE_SHIFT 12 +# define DSI1_PHY_AFEC0_DDR2CLK_EN BIT(16) +# define DSI1_PHY_AFEC0_DDRCLK_EN BIT(15) +# define DSI1_PHY_AFEC0_LATCH_ULPS BIT(14) +# define DSI1_PHY_AFEC0_RESET BIT(13) +# define DSI1_PHY_AFEC0_PD BIT(12) +# define DSI0_PHY_AFEC0_RESET BIT(11) +# define DSI1_PHY_AFEC0_PD_BG BIT(11) +# define DSI0_PHY_AFEC0_PD BIT(10) +# define DSI1_PHY_AFEC0_PD_DLANE3 BIT(10) +# define DSI0_PHY_AFEC0_PD_BG BIT(9) +# define DSI1_PHY_AFEC0_PD_DLANE2 BIT(9) +# define DSI0_PHY_AFEC0_PD_DLANE1 BIT(8) +# define DSI1_PHY_AFEC0_PD_DLANE1 BIT(8) +# define DSI_PHY_AFEC0_PTATADJ_MASK VC4_MASK(7, 4) +# define DSI_PHY_AFEC0_PTATADJ_SHIFT 4 +# define DSI_PHY_AFEC0_CTATADJ_MASK VC4_MASK(3, 0) +# define DSI_PHY_AFEC0_CTATADJ_SHIFT 0 + +#define DSI0_PHY_AFEC1 0x68 +# define DSI0_PHY_AFEC1_IDR_DLANE1_MASK VC4_MASK(10, 8) +# define DSI0_PHY_AFEC1_IDR_DLANE1_SHIFT 8 +# define DSI0_PHY_AFEC1_IDR_DLANE0_MASK VC4_MASK(6, 4) +# define DSI0_PHY_AFEC1_IDR_DLANE0_SHIFT 4 +# define DSI0_PHY_AFEC1_IDR_CLANE_MASK VC4_MASK(2, 0) +# define DSI0_PHY_AFEC1_IDR_CLANE_SHIFT 0 + +#define DSI0_TST_SEL 0x6c +#define DSI0_TST_MON 0x70 +#define DSI0_ID 0x74 +# define DSI_ID_VALUE 0x00647369 + +#define DSI1_CTRL 0x00 +# define DSI_CTRL_HS_CLKC_MASK VC4_MASK(15, 14) +# define DSI_CTRL_HS_CLKC_SHIFT 14 +# define DSI_CTRL_HS_CLKC_BYTE 0 +# define DSI_CTRL_HS_CLKC_DDR2 1 +# define DSI_CTRL_HS_CLKC_DDR 2 + +# define DSI_CTRL_RX_LPDT_EOT_DISABLE BIT(13) +# define DSI_CTRL_LPDT_EOT_DISABLE BIT(12) +# define DSI_CTRL_HSDT_EOT_DISABLE BIT(11) +# define DSI_CTRL_SOFT_RESET_CFG BIT(10) +# define DSI_CTRL_CAL_BYTE BIT(9) +# define DSI_CTRL_INV_BYTE BIT(8) +# define DSI_CTRL_CLR_LDF BIT(7) +# define DSI0_CTRL_CLR_PBCF BIT(6) +# define DSI1_CTRL_CLR_RXF BIT(6) +# define DSI0_CTRL_CLR_CPBCF BIT(5) +# define DSI1_CTRL_CLR_PDF BIT(5) +# define DSI0_CTRL_CLR_PDF BIT(4) +# define DSI1_CTRL_CLR_CDF BIT(4) +# define DSI0_CTRL_CLR_CDF BIT(3) +# define DSI0_CTRL_CTRL2 BIT(2) +# define DSI1_CTRL_DISABLE_DISP_CRCC BIT(2) +# define DSI0_CTRL_CTRL1 BIT(1) +# define DSI1_CTRL_DISABLE_DISP_ECCC BIT(1) +# define DSI0_CTRL_CTRL0 BIT(0) +# define DSI1_CTRL_EN BIT(0) +# define DSI0_CTRL_RESET_FIFOS (DSI_CTRL_CLR_LDF | \ + DSI0_CTRL_CLR_PBCF | \ + DSI0_CTRL_CLR_CPBCF | \ + DSI0_CTRL_CLR_PDF | \ + DSI0_CTRL_CLR_CDF) +# define DSI1_CTRL_RESET_FIFOS (DSI_CTRL_CLR_LDF | \ + DSI1_CTRL_CLR_RXF | \ + DSI1_CTRL_CLR_PDF | \ + DSI1_CTRL_CLR_CDF) + +#define DSI1_TXPKT2C 0x0c +#define DSI1_TXPKT2H 0x10 +#define DSI1_TXPKT_PIX_FIFO 0x20 +#define DSI1_RXPKT_FIFO 0x24 +#define DSI1_DISP0_CTRL 0x28 +#define DSI1_INT_STAT 0x30 +#define DSI1_INT_EN 0x34 +/* State reporting bits. These mostly behave like INT_STAT, where + * writing a 1 clears the bit. + */ +#define DSI1_STAT 0x38 +# define DSI1_STAT_PHY_D3_ULPS BIT(31) +# define DSI1_STAT_PHY_D3_STOP BIT(30) +# define DSI1_STAT_PHY_D2_ULPS BIT(29) +# define DSI1_STAT_PHY_D2_STOP BIT(28) +# define DSI1_STAT_PHY_D1_ULPS BIT(27) +# define DSI1_STAT_PHY_D1_STOP BIT(26) +# define DSI1_STAT_PHY_D0_ULPS BIT(25) +# define DSI1_STAT_PHY_D0_STOP BIT(24) +# define DSI1_STAT_FIFO_ERR BIT(23) +# define DSI1_STAT_PHY_RXLPDT BIT(22) +# define DSI1_STAT_PHY_RXTRIG BIT(21) +# define DSI1_STAT_PHY_D0_LPDT BIT(20) +/* Set when in forward direction */ +# define DSI1_STAT_PHY_DIR BIT(19) +# define DSI1_STAT_PHY_CLOCK_ULPS BIT(18) +# define DSI1_STAT_PHY_CLOCK_HS BIT(17) +# define DSI1_STAT_PHY_CLOCK_STOP BIT(16) +# define DSI1_STAT_PR_TO BIT(15) +# define DSI1_STAT_TA_TO BIT(14) +# define DSI1_STAT_LPRX_TO BIT(13) +# define DSI1_STAT_HSTX_TO BIT(12) +# define DSI1_STAT_ERR_CONT_LP1 BIT(11) +# define DSI1_STAT_ERR_CONT_LP0 BIT(10) +# define DSI1_STAT_ERR_CONTROL BIT(9) +# define DSI1_STAT_ERR_SYNC_ESC BIT(8) +# define DSI1_STAT_RXPKT2 BIT(7) +# define DSI1_STAT_RXPKT1 BIT(6) +# define DSI1_STAT_TXPKT2_BUSY BIT(5) +# define DSI1_STAT_TXPKT2_DONE BIT(4) +# define DSI1_STAT_TXPKT2_END BIT(3) +# define DSI1_STAT_TXPKT1_BUSY BIT(2) +# define DSI1_STAT_TXPKT1_DONE BIT(1) +# define DSI1_STAT_TXPKT1_END BIT(0) + +#define DSI1_HSTX_TO_CNT 0x3c +#define DSI1_LPRX_TO_CNT 0x40 +#define DSI1_TA_TO_CNT 0x44 +#define DSI1_PR_TO_CNT 0x48 +#define DSI1_PHYC 0x4c + +#define DSI1_HS_CLT0 0x50 +# define DSI_HS_CLT0_CZERO_MASK VC4_MASK(26, 18) +# define DSI_HS_CLT0_CZERO_SHIFT 18 +# define DSI_HS_CLT0_CPRE_MASK VC4_MASK(17, 9) +# define DSI_HS_CLT0_CPRE_SHIFT 9 +# define DSI_HS_CLT0_CPREP_MASK VC4_MASK(8, 0) +# define DSI_HS_CLT0_CPREP_SHIFT 0 + +#define DSI1_HS_CLT1 0x54 +# define DSI_HS_CLT1_CTRAIL_MASK VC4_MASK(17, 9) +# define DSI_HS_CLT1_CTRAIL_SHIFT 9 +# define DSI_HS_CLT1_CPOST_MASK VC4_MASK(8, 0) +# define DSI_HS_CLT1_CPOST_SHIFT 0 + +#define DSI1_HS_CLT2 0x58 +# define DSI_HS_CLT2_WUP_MASK VC4_MASK(23, 0) +# define DSI_HS_CLT2_WUP_SHIFT 0 + +#define DSI1_HS_DLT3 0x5c +# define DSI_HS_DLT3_EXIT_MASK VC4_MASK(26, 18) +# define DSI_HS_DLT3_EXIT_SHIFT 18 +# define DSI_HS_DLT3_ZERO_MASK VC4_MASK(17, 9) +# define DSI_HS_DLT3_ZERO_SHIFT 9 +# define DSI_HS_DLT3_PRE_MASK VC4_MASK(8, 0) +# define DSI_HS_DLT3_PRE_SHIFT 0 + +#define DSI1_HS_DLT4 0x60 +# define DSI_HS_DLT4_ANLAT_MASK VC4_MASK(22, 18) +# define DSI_HS_DLT4_ANLAT_SHIFT 18 +# define DSI_HS_DLT4_TRAIL_MASK VC4_MASK(17, 9) +# define DSI_HS_DLT4_TRAIL_SHIFT 9 +# define DSI_HS_DLT4_LPX_MASK VC4_MASK(8, 0) +# define DSI_HS_DLT4_LPX_SHIFT 0 + +#define DSI1_HS_DLT5 0x64 +# define DSI_HS_DLT5_INIT_MASK VC4_MASK(23, 0) +# define DSI_HS_DLT5_INIT_SHIFT 0 + +#define DSI1_HS_DLT6 0x68 +# define DSI_HS_DLT6_TA_GET_MASK VC4_MASK(31, 24) +# define DSI_HS_DLT6_TA_GET_SHIFT 24 +# define DSI_HS_DLT6_TA_SURE_MASK VC4_MASK(23, 16) +# define DSI_HS_DLT6_TA_SURE_SHIFT 16 +# define DSI_HS_DLT6_TA_GO_MASK VC4_MASK(15, 8) +# define DSI_HS_DLT6_TA_GO_SHIFT 8 +# define DSI_HS_DLT6_LP_LPX_MASK VC4_MASK(7, 0) +# define DSI_HS_DLT6_LP_LPX_SHIFT 0 + +#define DSI1_HS_DLT7 0x6c +# define DSI_HS_DLT7_LP_WUP_MASK VC4_MASK(23, 0) +# define DSI_HS_DLT7_LP_WUP_SHIFT 0 + +#define DSI1_PHY_AFEC0 0x70 + +#define DSI1_PHY_AFEC1 0x74 +# define DSI1_PHY_AFEC1_ACTRL_DLANE3_MASK VC4_MASK(19, 16) +# define DSI1_PHY_AFEC1_ACTRL_DLANE3_SHIFT 16 +# define DSI1_PHY_AFEC1_ACTRL_DLANE2_MASK VC4_MASK(15, 12) +# define DSI1_PHY_AFEC1_ACTRL_DLANE2_SHIFT 12 +# define DSI1_PHY_AFEC1_ACTRL_DLANE1_MASK VC4_MASK(11, 8) +# define DSI1_PHY_AFEC1_ACTRL_DLANE1_SHIFT 8 +# define DSI1_PHY_AFEC1_ACTRL_DLANE0_MASK VC4_MASK(7, 4) +# define DSI1_PHY_AFEC1_ACTRL_DLANE0_SHIFT 4 +# define DSI1_PHY_AFEC1_ACTRL_CLANE_MASK VC4_MASK(3, 0) +# define DSI1_PHY_AFEC1_ACTRL_CLANE_SHIFT 0 + +#define DSI1_TST_SEL 0x78 +#define DSI1_TST_MON 0x7c +#define DSI1_PHY_TST1 0x80 +#define DSI1_PHY_TST2 0x84 +#define DSI1_PHY_FIFO_STAT 0x88 +/* Actually, all registers in the range that aren't otherwise claimed + * will return the ID. + */ +#define DSI1_ID 0x8c + +/* General DSI hardware state. */ +struct vc4_dsi { + struct platform_device *pdev; + + struct mipi_dsi_host dsi_host; + struct drm_encoder *encoder; + struct drm_bridge *bridge; + bool is_panel_bridge; + + void __iomem *regs; + + struct dma_chan *reg_dma_chan; + dma_addr_t reg_dma_paddr; + u32 *reg_dma_mem; + dma_addr_t reg_paddr; + + /* Whether we're on bcm2835's DSI0 or DSI1. */ + int port; + + /* DSI channel for the panel we're connected to. */ + u32 channel; + u32 lanes; + u32 format; + u32 divider; + u32 mode_flags; + + /* Input clock from CPRMAN to the digital PHY, for the DSI + * escape clock. + */ + struct clk *escape_clock; + + /* Input clock to the analog PHY, used to generate the DSI bit + * clock. + */ + struct clk *pll_phy_clock; + + /* HS Clocks generated within the DSI analog PHY. */ + struct clk_fixed_factor phy_clocks[3]; + + struct clk_hw_onecell_data *clk_onecell; + + /* Pixel clock output to the pixelvalve, generated from the HS + * clock. + */ + struct clk *pixel_clock; + + struct completion xfer_completion; + int xfer_result; +}; + +#define host_to_dsi(host) container_of(host, struct vc4_dsi, dsi_host) + +static inline void +dsi_dma_workaround_write(struct vc4_dsi *dsi, u32 offset, u32 val) +{ + struct dma_chan *chan = dsi->reg_dma_chan; + struct dma_async_tx_descriptor *tx; + dma_cookie_t cookie; + int ret; + + /* DSI0 should be able to write normally. */ + if (!chan) { + writel(val, dsi->regs + offset); + return; + } + + *dsi->reg_dma_mem = val; + + tx = chan->device->device_prep_dma_memcpy(chan, + dsi->reg_paddr + offset, + dsi->reg_dma_paddr, + 4, 0); + if (!tx) { + DRM_ERROR("Failed to set up DMA register write\n"); + return; + } + + cookie = tx->tx_submit(tx); + ret = dma_submit_error(cookie); + if (ret) { + DRM_ERROR("Failed to submit DMA: %d\n", ret); + return; + } + ret = dma_sync_wait(chan, cookie); + if (ret) + DRM_ERROR("Failed to wait for DMA: %d\n", ret); +} + +#define DSI_READ(offset) readl(dsi->regs + (offset)) +#define DSI_WRITE(offset, val) dsi_dma_workaround_write(dsi, offset, val) +#define DSI_PORT_READ(offset) \ + DSI_READ(dsi->port ? DSI1_##offset : DSI0_##offset) +#define DSI_PORT_WRITE(offset, val) \ + DSI_WRITE(dsi->port ? DSI1_##offset : DSI0_##offset, val) +#define DSI_PORT_BIT(bit) (dsi->port ? DSI1_##bit : DSI0_##bit) + +/* VC4 DSI encoder KMS struct */ +struct vc4_dsi_encoder { + struct vc4_encoder base; + struct vc4_dsi *dsi; +}; + +static inline struct vc4_dsi_encoder * +to_vc4_dsi_encoder(struct drm_encoder *encoder) +{ + return container_of(encoder, struct vc4_dsi_encoder, base.base); +} + +#define DSI_REG(reg) { reg, #reg } +static const struct { + u32 reg; + const char *name; +} dsi0_regs[] = { + DSI_REG(DSI0_CTRL), + DSI_REG(DSI0_STAT), + DSI_REG(DSI0_HSTX_TO_CNT), + DSI_REG(DSI0_LPRX_TO_CNT), + DSI_REG(DSI0_TA_TO_CNT), + DSI_REG(DSI0_PR_TO_CNT), + DSI_REG(DSI0_DISP0_CTRL), + DSI_REG(DSI0_DISP1_CTRL), + DSI_REG(DSI0_INT_STAT), + DSI_REG(DSI0_INT_EN), + DSI_REG(DSI0_PHYC), + DSI_REG(DSI0_HS_CLT0), + DSI_REG(DSI0_HS_CLT1), + DSI_REG(DSI0_HS_CLT2), + DSI_REG(DSI0_HS_DLT3), + DSI_REG(DSI0_HS_DLT4), + DSI_REG(DSI0_HS_DLT5), + DSI_REG(DSI0_HS_DLT6), + DSI_REG(DSI0_HS_DLT7), + DSI_REG(DSI0_PHY_AFEC0), + DSI_REG(DSI0_PHY_AFEC1), + DSI_REG(DSI0_ID), +}; + +static const struct { + u32 reg; + const char *name; +} dsi1_regs[] = { + DSI_REG(DSI1_CTRL), + DSI_REG(DSI1_STAT), + DSI_REG(DSI1_HSTX_TO_CNT), + DSI_REG(DSI1_LPRX_TO_CNT), + DSI_REG(DSI1_TA_TO_CNT), + DSI_REG(DSI1_PR_TO_CNT), + DSI_REG(DSI1_DISP0_CTRL), + DSI_REG(DSI1_DISP1_CTRL), + DSI_REG(DSI1_INT_STAT), + DSI_REG(DSI1_INT_EN), + DSI_REG(DSI1_PHYC), + DSI_REG(DSI1_HS_CLT0), + DSI_REG(DSI1_HS_CLT1), + DSI_REG(DSI1_HS_CLT2), + DSI_REG(DSI1_HS_DLT3), + DSI_REG(DSI1_HS_DLT4), + DSI_REG(DSI1_HS_DLT5), + DSI_REG(DSI1_HS_DLT6), + DSI_REG(DSI1_HS_DLT7), + DSI_REG(DSI1_PHY_AFEC0), + DSI_REG(DSI1_PHY_AFEC1), + DSI_REG(DSI1_ID), +}; + +static void vc4_dsi_dump_regs(struct vc4_dsi *dsi) +{ + int i; + + if (dsi->port == 0) { + for (i = 0; i < ARRAY_SIZE(dsi0_regs); i++) { + DRM_INFO("0x%04x (%s): 0x%08x\n", + dsi0_regs[i].reg, dsi0_regs[i].name, + DSI_READ(dsi0_regs[i].reg)); + } + } else { + for (i = 0; i < ARRAY_SIZE(dsi1_regs); i++) { + DRM_INFO("0x%04x (%s): 0x%08x\n", + dsi1_regs[i].reg, dsi1_regs[i].name, + DSI_READ(dsi1_regs[i].reg)); + } + } +} + +#ifdef CONFIG_DEBUG_FS +int vc4_dsi_debugfs_regs(struct seq_file *m, void *unused) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *drm = node->minor->dev; + struct vc4_dev *vc4 = to_vc4_dev(drm); + int dsi_index = (uintptr_t)node->info_ent->data; + struct vc4_dsi *dsi = (dsi_index == 1 ? vc4->dsi1 : NULL); + int i; + + if (!dsi) + return 0; + + if (dsi->port == 0) { + for (i = 0; i < ARRAY_SIZE(dsi0_regs); i++) { + seq_printf(m, "0x%04x (%s): 0x%08x\n", + dsi0_regs[i].reg, dsi0_regs[i].name, + DSI_READ(dsi0_regs[i].reg)); + } + } else { + for (i = 0; i < ARRAY_SIZE(dsi1_regs); i++) { + seq_printf(m, "0x%04x (%s): 0x%08x\n", + dsi1_regs[i].reg, dsi1_regs[i].name, + DSI_READ(dsi1_regs[i].reg)); + } + } + + return 0; +} +#endif + +static void vc4_dsi_encoder_destroy(struct drm_encoder *encoder) +{ + drm_encoder_cleanup(encoder); +} + +static const struct drm_encoder_funcs vc4_dsi_encoder_funcs = { + .destroy = vc4_dsi_encoder_destroy, +}; + +static void vc4_dsi_latch_ulps(struct vc4_dsi *dsi, bool latch) +{ + u32 afec0 = DSI_PORT_READ(PHY_AFEC0); + + if (latch) + afec0 |= DSI_PORT_BIT(PHY_AFEC0_LATCH_ULPS); + else + afec0 &= ~DSI_PORT_BIT(PHY_AFEC0_LATCH_ULPS); + + DSI_PORT_WRITE(PHY_AFEC0, afec0); +} + +/* Enters or exits Ultra Low Power State. */ +static void vc4_dsi_ulps(struct vc4_dsi *dsi, bool ulps) +{ + bool continuous = dsi->mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS; + u32 phyc_ulps = ((continuous ? DSI_PORT_BIT(PHYC_CLANE_ULPS) : 0) | + DSI_PHYC_DLANE0_ULPS | + (dsi->lanes > 1 ? DSI_PHYC_DLANE1_ULPS : 0) | + (dsi->lanes > 2 ? DSI_PHYC_DLANE2_ULPS : 0) | + (dsi->lanes > 3 ? DSI_PHYC_DLANE3_ULPS : 0)); + u32 stat_ulps = ((continuous ? DSI1_STAT_PHY_CLOCK_ULPS : 0) | + DSI1_STAT_PHY_D0_ULPS | + (dsi->lanes > 1 ? DSI1_STAT_PHY_D1_ULPS : 0) | + (dsi->lanes > 2 ? DSI1_STAT_PHY_D2_ULPS : 0) | + (dsi->lanes > 3 ? DSI1_STAT_PHY_D3_ULPS : 0)); + u32 stat_stop = ((continuous ? DSI1_STAT_PHY_CLOCK_STOP : 0) | + DSI1_STAT_PHY_D0_STOP | + (dsi->lanes > 1 ? DSI1_STAT_PHY_D1_STOP : 0) | + (dsi->lanes > 2 ? DSI1_STAT_PHY_D2_STOP : 0) | + (dsi->lanes > 3 ? DSI1_STAT_PHY_D3_STOP : 0)); + int ret; + + DSI_PORT_WRITE(STAT, stat_ulps); + DSI_PORT_WRITE(PHYC, DSI_PORT_READ(PHYC) | phyc_ulps); + ret = wait_for((DSI_PORT_READ(STAT) & stat_ulps) == stat_ulps, 200); + if (ret) { + dev_warn(&dsi->pdev->dev, + "Timeout waiting for DSI ULPS entry: STAT 0x%08x", + DSI_PORT_READ(STAT)); + DSI_PORT_WRITE(PHYC, DSI_PORT_READ(PHYC) & ~phyc_ulps); + vc4_dsi_latch_ulps(dsi, false); + return; + } + + /* The DSI module can't be disabled while the module is + * generating ULPS state. So, to be able to disable the + * module, we have the AFE latch the ULPS state and continue + * on to having the module enter STOP. + */ + vc4_dsi_latch_ulps(dsi, ulps); + + DSI_PORT_WRITE(STAT, stat_stop); + DSI_PORT_WRITE(PHYC, DSI_PORT_READ(PHYC) & ~phyc_ulps); + ret = wait_for((DSI_PORT_READ(STAT) & stat_stop) == stat_stop, 200); + if (ret) { + dev_warn(&dsi->pdev->dev, + "Timeout waiting for DSI STOP entry: STAT 0x%08x", + DSI_PORT_READ(STAT)); + DSI_PORT_WRITE(PHYC, DSI_PORT_READ(PHYC) & ~phyc_ulps); + return; + } +} + +static u32 +dsi_hs_timing(u32 ui_ns, u32 ns, u32 ui) +{ + /* The HS timings have to be rounded up to a multiple of 8 + * because we're using the byte clock. + */ + return roundup(ui + DIV_ROUND_UP(ns, ui_ns), 8); +} + +/* ESC always runs at 100Mhz. */ +#define ESC_TIME_NS 10 + +static u32 +dsi_esc_timing(u32 ns) +{ + return DIV_ROUND_UP(ns, ESC_TIME_NS); +} + +static void vc4_dsi_encoder_disable(struct drm_encoder *encoder) +{ + struct vc4_dsi_encoder *vc4_encoder = to_vc4_dsi_encoder(encoder); + struct vc4_dsi *dsi = vc4_encoder->dsi; + struct device *dev = &dsi->pdev->dev; + + vc4_dsi_ulps(dsi, true); + + clk_disable_unprepare(dsi->pll_phy_clock); + clk_disable_unprepare(dsi->escape_clock); + clk_disable_unprepare(dsi->pixel_clock); + + pm_runtime_put(dev); +} + +/* Extends the mode's blank intervals to handle BCM2835's integer-only + * DSI PLL divider. + * + * On 2835, PLLD is set to 2Ghz, and may not be changed by the display + * driver since most peripherals are hanging off of the PLLD_PER + * divider. PLLD_DSI1, which drives our DSI bit clock (and therefore + * the pixel clock), only has an integer divider off of DSI. + * + * To get our panel mode to refresh at the expected 60Hz, we need to + * extend the horizontal blank time. This means we drive a + * higher-than-expected clock rate to the panel, but that's what the + * firmware does too. + */ +static bool vc4_dsi_encoder_mode_fixup(struct drm_encoder *encoder, + const struct drm_display_mode *mode, + struct drm_display_mode *adjusted_mode) +{ + struct vc4_dsi_encoder *vc4_encoder = to_vc4_dsi_encoder(encoder); + struct vc4_dsi *dsi = vc4_encoder->dsi; + struct clk *phy_parent = clk_get_parent(dsi->pll_phy_clock); + unsigned long parent_rate = clk_get_rate(phy_parent); + unsigned long pixel_clock_hz = mode->clock * 1000; + unsigned long pll_clock = pixel_clock_hz * dsi->divider; + int divider; + + /* Find what divider gets us a faster clock than the requested + * pixel clock. + */ + for (divider = 1; divider < 8; divider++) { + if (parent_rate / divider < pll_clock) { + divider--; + break; + } + } + + /* Now that we've picked a PLL divider, calculate back to its + * pixel clock. + */ + pll_clock = parent_rate / divider; + pixel_clock_hz = pll_clock / dsi->divider; + + /* Round up the clk_set_rate() request slightly, since + * PLLD_DSI1 is an integer divider and its rate selection will + * never round up. + */ + adjusted_mode->clock = pixel_clock_hz / 1000 + 1; + + /* Given the new pixel clock, adjust HFP to keep vrefresh the same. */ + adjusted_mode->htotal = pixel_clock_hz / (mode->vrefresh * mode->vtotal); + adjusted_mode->hsync_end += adjusted_mode->htotal - mode->htotal; + adjusted_mode->hsync_start += adjusted_mode->htotal - mode->htotal; + + return true; +} + +static void vc4_dsi_encoder_enable(struct drm_encoder *encoder) +{ + struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode; + struct vc4_dsi_encoder *vc4_encoder = to_vc4_dsi_encoder(encoder); + struct vc4_dsi *dsi = vc4_encoder->dsi; + struct device *dev = &dsi->pdev->dev; + bool debug_dump_regs = false; + unsigned long hs_clock; + u32 ui_ns; + /* Minimum LP state duration in escape clock cycles. */ + u32 lpx = dsi_esc_timing(60); + unsigned long pixel_clock_hz = mode->clock * 1000; + unsigned long dsip_clock; + unsigned long phy_clock; + int ret; + + ret = pm_runtime_get_sync(dev); + if (ret) { + DRM_ERROR("Failed to runtime PM enable on DSI%d\n", dsi->port); + return; + } + + if (debug_dump_regs) { + DRM_INFO("DSI regs before:\n"); + vc4_dsi_dump_regs(dsi); + } + + phy_clock = pixel_clock_hz * dsi->divider; + ret = clk_set_rate(dsi->pll_phy_clock, phy_clock); + if (ret) { + dev_err(&dsi->pdev->dev, + "Failed to set phy clock to %ld: %d\n", phy_clock, ret); + } + + /* Reset the DSI and all its fifos. */ + DSI_PORT_WRITE(CTRL, + DSI_CTRL_SOFT_RESET_CFG | + DSI_PORT_BIT(CTRL_RESET_FIFOS)); + + DSI_PORT_WRITE(CTRL, + DSI_CTRL_HSDT_EOT_DISABLE | + DSI_CTRL_RX_LPDT_EOT_DISABLE); + + /* Clear all stat bits so we see what has happened during enable. */ + DSI_PORT_WRITE(STAT, DSI_PORT_READ(STAT)); + + /* Set AFE CTR00/CTR1 to release powerdown of analog. */ + if (dsi->port == 0) { + u32 afec0 = (VC4_SET_FIELD(7, DSI_PHY_AFEC0_PTATADJ) | + VC4_SET_FIELD(7, DSI_PHY_AFEC0_CTATADJ)); + + if (dsi->lanes < 2) + afec0 |= DSI0_PHY_AFEC0_PD_DLANE1; + + if (!(dsi->mode_flags & MIPI_DSI_MODE_VIDEO)) + afec0 |= DSI0_PHY_AFEC0_RESET; + + DSI_PORT_WRITE(PHY_AFEC0, afec0); + + DSI_PORT_WRITE(PHY_AFEC1, + VC4_SET_FIELD(6, DSI0_PHY_AFEC1_IDR_DLANE1) | + VC4_SET_FIELD(6, DSI0_PHY_AFEC1_IDR_DLANE0) | + VC4_SET_FIELD(6, DSI0_PHY_AFEC1_IDR_CLANE)); + } else { + u32 afec0 = (VC4_SET_FIELD(7, DSI_PHY_AFEC0_PTATADJ) | + VC4_SET_FIELD(7, DSI_PHY_AFEC0_CTATADJ) | + VC4_SET_FIELD(6, DSI1_PHY_AFEC0_IDR_CLANE) | + VC4_SET_FIELD(6, DSI1_PHY_AFEC0_IDR_DLANE0) | + VC4_SET_FIELD(6, DSI1_PHY_AFEC0_IDR_DLANE1) | + VC4_SET_FIELD(6, DSI1_PHY_AFEC0_IDR_DLANE2) | + VC4_SET_FIELD(6, DSI1_PHY_AFEC0_IDR_DLANE3)); + + if (dsi->lanes < 4) + afec0 |= DSI1_PHY_AFEC0_PD_DLANE3; + if (dsi->lanes < 3) + afec0 |= DSI1_PHY_AFEC0_PD_DLANE2; + if (dsi->lanes < 2) + afec0 |= DSI1_PHY_AFEC0_PD_DLANE1; + + afec0 |= DSI1_PHY_AFEC0_RESET; + + DSI_PORT_WRITE(PHY_AFEC0, afec0); + + DSI_PORT_WRITE(PHY_AFEC1, 0); + + /* AFEC reset hold time */ + mdelay(1); + } + + ret = clk_prepare_enable(dsi->escape_clock); + if (ret) { + DRM_ERROR("Failed to turn on DSI escape clock: %d\n", ret); + return; + } + + ret = clk_prepare_enable(dsi->pll_phy_clock); + if (ret) { + DRM_ERROR("Failed to turn on DSI PLL: %d\n", ret); + return; + } + + hs_clock = clk_get_rate(dsi->pll_phy_clock); + + /* Yes, we set the DSI0P/DSI1P pixel clock to the byte rate, + * not the pixel clock rate. DSIxP take from the APHY's byte, + * DDR2, or DDR4 clock (we use byte) and feed into the PV at + * that rate. Separately, a value derived from PIX_CLK_DIV + * and HS_CLKC is fed into the PV to divide down to the actual + * pixel clock for pushing pixels into DSI. + */ + dsip_clock = phy_clock / 8; + ret = clk_set_rate(dsi->pixel_clock, dsip_clock); + if (ret) { + dev_err(dev, "Failed to set pixel clock to %ldHz: %d\n", + dsip_clock, ret); + } + + ret = clk_prepare_enable(dsi->pixel_clock); + if (ret) { + DRM_ERROR("Failed to turn on DSI pixel clock: %d\n", ret); + return; + } + + /* How many ns one DSI unit interval is. Note that the clock + * is DDR, so there's an extra divide by 2. + */ + ui_ns = DIV_ROUND_UP(500000000, hs_clock); + + DSI_PORT_WRITE(HS_CLT0, + VC4_SET_FIELD(dsi_hs_timing(ui_ns, 262, 0), + DSI_HS_CLT0_CZERO) | + VC4_SET_FIELD(dsi_hs_timing(ui_ns, 0, 8), + DSI_HS_CLT0_CPRE) | + VC4_SET_FIELD(dsi_hs_timing(ui_ns, 38, 0), + DSI_HS_CLT0_CPREP)); + + DSI_PORT_WRITE(HS_CLT1, + VC4_SET_FIELD(dsi_hs_timing(ui_ns, 60, 0), + DSI_HS_CLT1_CTRAIL) | + VC4_SET_FIELD(dsi_hs_timing(ui_ns, 60, 52), + DSI_HS_CLT1_CPOST)); + + DSI_PORT_WRITE(HS_CLT2, + VC4_SET_FIELD(dsi_hs_timing(ui_ns, 1000000, 0), + DSI_HS_CLT2_WUP)); + + DSI_PORT_WRITE(HS_DLT3, + VC4_SET_FIELD(dsi_hs_timing(ui_ns, 100, 0), + DSI_HS_DLT3_EXIT) | + VC4_SET_FIELD(dsi_hs_timing(ui_ns, 105, 6), + DSI_HS_DLT3_ZERO) | + VC4_SET_FIELD(dsi_hs_timing(ui_ns, 40, 4), + DSI_HS_DLT3_PRE)); + + DSI_PORT_WRITE(HS_DLT4, + VC4_SET_FIELD(dsi_hs_timing(ui_ns, lpx * ESC_TIME_NS, 0), + DSI_HS_DLT4_LPX) | + VC4_SET_FIELD(max(dsi_hs_timing(ui_ns, 0, 8), + dsi_hs_timing(ui_ns, 60, 4)), + DSI_HS_DLT4_TRAIL) | + VC4_SET_FIELD(0, DSI_HS_DLT4_ANLAT)); + + DSI_PORT_WRITE(HS_DLT5, VC4_SET_FIELD(dsi_hs_timing(ui_ns, 1000, 5000), + DSI_HS_DLT5_INIT)); + + DSI_PORT_WRITE(HS_DLT6, + VC4_SET_FIELD(lpx * 5, DSI_HS_DLT6_TA_GET) | + VC4_SET_FIELD(lpx, DSI_HS_DLT6_TA_SURE) | + VC4_SET_FIELD(lpx * 4, DSI_HS_DLT6_TA_GO) | + VC4_SET_FIELD(lpx, DSI_HS_DLT6_LP_LPX)); + + DSI_PORT_WRITE(HS_DLT7, + VC4_SET_FIELD(dsi_esc_timing(1000000), + DSI_HS_DLT7_LP_WUP)); + + DSI_PORT_WRITE(PHYC, + DSI_PHYC_DLANE0_ENABLE | + (dsi->lanes >= 2 ? DSI_PHYC_DLANE1_ENABLE : 0) | + (dsi->lanes >= 3 ? DSI_PHYC_DLANE2_ENABLE : 0) | + (dsi->lanes >= 4 ? DSI_PHYC_DLANE3_ENABLE : 0) | + DSI_PORT_BIT(PHYC_CLANE_ENABLE) | + ((dsi->mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS) ? + 0 : DSI_PORT_BIT(PHYC_HS_CLK_CONTINUOUS)) | + (dsi->port == 0 ? + VC4_SET_FIELD(lpx - 1, DSI0_PHYC_ESC_CLK_LPDT) : + VC4_SET_FIELD(lpx - 1, DSI1_PHYC_ESC_CLK_LPDT))); + + DSI_PORT_WRITE(CTRL, + DSI_PORT_READ(CTRL) | + DSI_CTRL_CAL_BYTE); + + /* HS timeout in HS clock cycles: disabled. */ + DSI_PORT_WRITE(HSTX_TO_CNT, 0); + /* LP receive timeout in HS clocks. */ + DSI_PORT_WRITE(LPRX_TO_CNT, 0xffffff); + /* Bus turnaround timeout */ + DSI_PORT_WRITE(TA_TO_CNT, 100000); + /* Display reset sequence timeout */ + DSI_PORT_WRITE(PR_TO_CNT, 100000); + + if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO) { + DSI_PORT_WRITE(DISP0_CTRL, + VC4_SET_FIELD(dsi->divider, + DSI_DISP0_PIX_CLK_DIV) | + VC4_SET_FIELD(dsi->format, DSI_DISP0_PFORMAT) | + VC4_SET_FIELD(DSI_DISP0_LP_STOP_PERFRAME, + DSI_DISP0_LP_STOP_CTRL) | + DSI_DISP0_ST_END | + DSI_DISP0_ENABLE); + } else { + DSI_PORT_WRITE(DISP0_CTRL, + DSI_DISP0_COMMAND_MODE | + DSI_DISP0_ENABLE); + } + + /* Set up DISP1 for transferring long command payloads through + * the pixfifo. + */ + DSI_PORT_WRITE(DISP1_CTRL, + VC4_SET_FIELD(DSI_DISP1_PFORMAT_32BIT_LE, + DSI_DISP1_PFORMAT) | + DSI_DISP1_ENABLE); + + /* Ungate the block. */ + if (dsi->port == 0) + DSI_PORT_WRITE(CTRL, DSI_PORT_READ(CTRL) | DSI0_CTRL_CTRL0); + else + DSI_PORT_WRITE(CTRL, DSI_PORT_READ(CTRL) | DSI1_CTRL_EN); + + /* Bring AFE out of reset. */ + if (dsi->port == 0) { + } else { + DSI_PORT_WRITE(PHY_AFEC0, + DSI_PORT_READ(PHY_AFEC0) & + ~DSI1_PHY_AFEC0_RESET); + } + + vc4_dsi_ulps(dsi, false); + + if (debug_dump_regs) { + DRM_INFO("DSI regs after:\n"); + vc4_dsi_dump_regs(dsi); + } +} + +static ssize_t vc4_dsi_host_transfer(struct mipi_dsi_host *host, + const struct mipi_dsi_msg *msg) +{ + struct vc4_dsi *dsi = host_to_dsi(host); + struct mipi_dsi_packet packet; + u32 pkth = 0, pktc = 0; + int i, ret; + bool is_long = mipi_dsi_packet_format_is_long(msg->type); + u32 cmd_fifo_len = 0, pix_fifo_len = 0; + + mipi_dsi_create_packet(&packet, msg); + + pkth |= VC4_SET_FIELD(packet.header[0], DSI_TXPKT1H_BC_DT); + pkth |= VC4_SET_FIELD(packet.header[1] | + (packet.header[2] << 8), + DSI_TXPKT1H_BC_PARAM); + if (is_long) { + /* Divide data across the various FIFOs we have available. + * The command FIFO takes byte-oriented data, but is of + * limited size. The pixel FIFO (never actually used for + * pixel data in reality) is word oriented, and substantially + * larger. So, we use the pixel FIFO for most of the data, + * sending the residual bytes in the command FIFO at the start. + * + * With this arrangement, the command FIFO will never get full. + */ + if (packet.payload_length <= 16) { + cmd_fifo_len = packet.payload_length; + pix_fifo_len = 0; + } else { + cmd_fifo_len = (packet.payload_length % + DSI_PIX_FIFO_WIDTH); + pix_fifo_len = ((packet.payload_length - cmd_fifo_len) / + DSI_PIX_FIFO_WIDTH); + } + + WARN_ON_ONCE(pix_fifo_len >= DSI_PIX_FIFO_DEPTH); + + pkth |= VC4_SET_FIELD(cmd_fifo_len, DSI_TXPKT1H_BC_CMDFIFO); + } + + if (msg->rx_len) { + pktc |= VC4_SET_FIELD(DSI_TXPKT1C_CMD_CTRL_RX, + DSI_TXPKT1C_CMD_CTRL); + } else { + pktc |= VC4_SET_FIELD(DSI_TXPKT1C_CMD_CTRL_TX, + DSI_TXPKT1C_CMD_CTRL); + } + + for (i = 0; i < cmd_fifo_len; i++) + DSI_PORT_WRITE(TXPKT_CMD_FIFO, packet.payload[i]); + for (i = 0; i < pix_fifo_len; i++) { + const u8 *pix = packet.payload + cmd_fifo_len + i * 4; + + DSI_PORT_WRITE(TXPKT_PIX_FIFO, + pix[0] | + pix[1] << 8 | + pix[2] << 16 | + pix[3] << 24); + } + + if (msg->flags & MIPI_DSI_MSG_USE_LPM) + pktc |= DSI_TXPKT1C_CMD_MODE_LP; + if (is_long) + pktc |= DSI_TXPKT1C_CMD_TYPE_LONG; + + /* Send one copy of the packet. Larger repeats are used for pixel + * data in command mode. + */ + pktc |= VC4_SET_FIELD(1, DSI_TXPKT1C_CMD_REPEAT); + + pktc |= DSI_TXPKT1C_CMD_EN; + if (pix_fifo_len) { + pktc |= VC4_SET_FIELD(DSI_TXPKT1C_DISPLAY_NO_SECONDARY, + DSI_TXPKT1C_DISPLAY_NO); + } else { + pktc |= VC4_SET_FIELD(DSI_TXPKT1C_DISPLAY_NO_SHORT, + DSI_TXPKT1C_DISPLAY_NO); + } + + /* Enable the appropriate interrupt for the transfer completion. */ + dsi->xfer_result = 0; + reinit_completion(&dsi->xfer_completion); + DSI_PORT_WRITE(INT_STAT, DSI1_INT_TXPKT1_DONE | DSI1_INT_PHY_DIR_RTF); + if (msg->rx_len) { + DSI_PORT_WRITE(INT_EN, (DSI1_INTERRUPTS_ALWAYS_ENABLED | + DSI1_INT_PHY_DIR_RTF)); + } else { + DSI_PORT_WRITE(INT_EN, (DSI1_INTERRUPTS_ALWAYS_ENABLED | + DSI1_INT_TXPKT1_DONE)); + } + + /* Send the packet. */ + DSI_PORT_WRITE(TXPKT1H, pkth); + DSI_PORT_WRITE(TXPKT1C, pktc); + + if (!wait_for_completion_timeout(&dsi->xfer_completion, + msecs_to_jiffies(1000))) { + dev_err(&dsi->pdev->dev, "transfer interrupt wait timeout"); + dev_err(&dsi->pdev->dev, "instat: 0x%08x\n", + DSI_PORT_READ(INT_STAT)); + ret = -ETIMEDOUT; + } else { + ret = dsi->xfer_result; + } + + DSI_PORT_WRITE(INT_EN, DSI1_INTERRUPTS_ALWAYS_ENABLED); + + if (ret) + goto reset_fifo_and_return; + + if (ret == 0 && msg->rx_len) { + u32 rxpkt1h = DSI_PORT_READ(RXPKT1H); + u8 *msg_rx = msg->rx_buf; + + if (rxpkt1h & DSI_RXPKT1H_PKT_TYPE_LONG) { + u32 rxlen = VC4_GET_FIELD(rxpkt1h, + DSI_RXPKT1H_BC_PARAM); + + if (rxlen != msg->rx_len) { + DRM_ERROR("DSI returned %db, expecting %db\n", + rxlen, (int)msg->rx_len); + ret = -ENXIO; + goto reset_fifo_and_return; + } + + for (i = 0; i < msg->rx_len; i++) + msg_rx[i] = DSI_READ(DSI1_RXPKT_FIFO); + } else { + /* FINISHME: Handle AWER */ + + msg_rx[0] = VC4_GET_FIELD(rxpkt1h, + DSI_RXPKT1H_SHORT_0); + if (msg->rx_len > 1) { + msg_rx[1] = VC4_GET_FIELD(rxpkt1h, + DSI_RXPKT1H_SHORT_1); + } + } + } + + return ret; + +reset_fifo_and_return: + DRM_ERROR("DSI transfer failed, resetting: %d\n", ret); + + DSI_PORT_WRITE(TXPKT1C, DSI_PORT_READ(TXPKT1C) & ~DSI_TXPKT1C_CMD_EN); + udelay(1); + DSI_PORT_WRITE(CTRL, + DSI_PORT_READ(CTRL) | + DSI_PORT_BIT(CTRL_RESET_FIFOS)); + + DSI_PORT_WRITE(TXPKT1C, 0); + DSI_PORT_WRITE(INT_EN, DSI1_INTERRUPTS_ALWAYS_ENABLED); + return ret; +} + +static int vc4_dsi_host_attach(struct mipi_dsi_host *host, + struct mipi_dsi_device *device) +{ + struct vc4_dsi *dsi = host_to_dsi(host); + int ret = 0; + + dsi->lanes = device->lanes; + dsi->channel = device->channel; + dsi->mode_flags = device->mode_flags; + + switch (device->format) { + case MIPI_DSI_FMT_RGB888: + dsi->format = DSI_PFORMAT_RGB888; + dsi->divider = 24 / dsi->lanes; + break; + case MIPI_DSI_FMT_RGB666: + dsi->format = DSI_PFORMAT_RGB666; + dsi->divider = 24 / dsi->lanes; + break; + case MIPI_DSI_FMT_RGB666_PACKED: + dsi->format = DSI_PFORMAT_RGB666_PACKED; + dsi->divider = 18 / dsi->lanes; + break; + case MIPI_DSI_FMT_RGB565: + dsi->format = DSI_PFORMAT_RGB565; + dsi->divider = 16 / dsi->lanes; + break; + default: + dev_err(&dsi->pdev->dev, "Unknown DSI format: %d.\n", + dsi->format); + return 0; + } + + if (!(dsi->mode_flags & MIPI_DSI_MODE_VIDEO)) { + dev_err(&dsi->pdev->dev, + "Only VIDEO mode panels supported currently.\n"); + return 0; + } + + dsi->bridge = of_drm_find_bridge(device->dev.of_node); + if (!dsi->bridge) { + struct drm_panel *panel = + of_drm_find_panel(device->dev.of_node); + + dsi->bridge = drm_panel_bridge_add(panel, + DRM_MODE_CONNECTOR_DSI); + if (IS_ERR(dsi->bridge)) { + ret = PTR_ERR(dsi->bridge); + dsi->bridge = NULL; + return ret; + } + dsi->is_panel_bridge = true; + } + + return drm_bridge_attach(dsi->encoder, dsi->bridge, NULL); +} + +static int vc4_dsi_host_detach(struct mipi_dsi_host *host, + struct mipi_dsi_device *device) +{ + struct vc4_dsi *dsi = host_to_dsi(host); + + if (dsi->is_panel_bridge) { + drm_panel_bridge_remove(dsi->bridge); + dsi->bridge = NULL; + } + + return 0; +} + +static const struct mipi_dsi_host_ops vc4_dsi_host_ops = { + .attach = vc4_dsi_host_attach, + .detach = vc4_dsi_host_detach, + .transfer = vc4_dsi_host_transfer, +}; + +static const struct drm_encoder_helper_funcs vc4_dsi_encoder_helper_funcs = { + .disable = vc4_dsi_encoder_disable, + .enable = vc4_dsi_encoder_enable, + .mode_fixup = vc4_dsi_encoder_mode_fixup, +}; + +static const struct of_device_id vc4_dsi_dt_match[] = { + { .compatible = "brcm,bcm2835-dsi1", (void *)(uintptr_t)1 }, + {} +}; + +static void dsi_handle_error(struct vc4_dsi *dsi, + irqreturn_t *ret, u32 stat, u32 bit, + const char *type) +{ + if (!(stat & bit)) + return; + + DRM_ERROR("DSI%d: %s error\n", dsi->port, type); + *ret = IRQ_HANDLED; +} + +static irqreturn_t vc4_dsi_irq_handler(int irq, void *data) +{ + struct vc4_dsi *dsi = data; + u32 stat = DSI_PORT_READ(INT_STAT); + irqreturn_t ret = IRQ_NONE; + + DSI_PORT_WRITE(INT_STAT, stat); + + dsi_handle_error(dsi, &ret, stat, + DSI1_INT_ERR_SYNC_ESC, "LPDT sync"); + dsi_handle_error(dsi, &ret, stat, + DSI1_INT_ERR_CONTROL, "data lane 0 sequence"); + dsi_handle_error(dsi, &ret, stat, + DSI1_INT_ERR_CONT_LP0, "LP0 contention"); + dsi_handle_error(dsi, &ret, stat, + DSI1_INT_ERR_CONT_LP1, "LP1 contention"); + dsi_handle_error(dsi, &ret, stat, + DSI1_INT_HSTX_TO, "HSTX timeout"); + dsi_handle_error(dsi, &ret, stat, + DSI1_INT_LPRX_TO, "LPRX timeout"); + dsi_handle_error(dsi, &ret, stat, + DSI1_INT_TA_TO, "turnaround timeout"); + dsi_handle_error(dsi, &ret, stat, + DSI1_INT_PR_TO, "peripheral reset timeout"); + + if (stat & (DSI1_INT_TXPKT1_DONE | DSI1_INT_PHY_DIR_RTF)) { + complete(&dsi->xfer_completion); + ret = IRQ_HANDLED; + } else if (stat & DSI1_INT_HSTX_TO) { + complete(&dsi->xfer_completion); + dsi->xfer_result = -ETIMEDOUT; + ret = IRQ_HANDLED; + } + + return ret; +} + +/** + * vc4_dsi_init_phy_clocks - Exposes clocks generated by the analog + * PHY that are consumed by CPRMAN (clk-bcm2835.c). + * @dsi: DSI encoder + */ +static int +vc4_dsi_init_phy_clocks(struct vc4_dsi *dsi) +{ + struct device *dev = &dsi->pdev->dev; + const char *parent_name = __clk_get_name(dsi->pll_phy_clock); + static const struct { + const char *dsi0_name, *dsi1_name; + int div; + } phy_clocks[] = { + { "dsi0_byte", "dsi1_byte", 8 }, + { "dsi0_ddr2", "dsi1_ddr2", 4 }, + { "dsi0_ddr", "dsi1_ddr", 2 }, + }; + int i; + + dsi->clk_onecell = devm_kzalloc(dev, + sizeof(*dsi->clk_onecell) + + ARRAY_SIZE(phy_clocks) * + sizeof(struct clk_hw *), + GFP_KERNEL); + if (!dsi->clk_onecell) + return -ENOMEM; + dsi->clk_onecell->num = ARRAY_SIZE(phy_clocks); + + for (i = 0; i < ARRAY_SIZE(phy_clocks); i++) { + struct clk_fixed_factor *fix = &dsi->phy_clocks[i]; + struct clk_init_data init; + int ret; + + /* We just use core fixed factor clock ops for the PHY + * clocks. The clocks are actually gated by the + * PHY_AFEC0_DDRCLK_EN bits, which we should be + * setting if we use the DDR/DDR2 clocks. However, + * vc4_dsi_encoder_enable() is setting up both AFEC0, + * setting both our parent DSI PLL's rate and this + * clock's rate, so it knows if DDR/DDR2 are going to + * be used and could enable the gates itself. + */ + fix->mult = 1; + fix->div = phy_clocks[i].div; + fix->hw.init = &init; + + memset(&init, 0, sizeof(init)); + init.parent_names = &parent_name; + init.num_parents = 1; + if (dsi->port == 1) + init.name = phy_clocks[i].dsi1_name; + else + init.name = phy_clocks[i].dsi0_name; + init.ops = &clk_fixed_factor_ops; + + ret = devm_clk_hw_register(dev, &fix->hw); + if (ret) + return ret; + + dsi->clk_onecell->hws[i] = &fix->hw; + } + + return of_clk_add_hw_provider(dev->of_node, + of_clk_hw_onecell_get, + dsi->clk_onecell); +} + +static int vc4_dsi_bind(struct device *dev, struct device *master, void *data) +{ + struct platform_device *pdev = to_platform_device(dev); + struct drm_device *drm = dev_get_drvdata(master); + struct vc4_dev *vc4 = to_vc4_dev(drm); + struct vc4_dsi *dsi; + struct vc4_dsi_encoder *vc4_dsi_encoder; + const struct of_device_id *match; + dma_cap_mask_t dma_mask; + int ret; + + dsi = devm_kzalloc(dev, sizeof(*dsi), GFP_KERNEL); + if (!dsi) + return -ENOMEM; + + match = of_match_device(vc4_dsi_dt_match, dev); + if (!match) + return -ENODEV; + + dsi->port = (uintptr_t)match->data; + + vc4_dsi_encoder = devm_kzalloc(dev, sizeof(*vc4_dsi_encoder), + GFP_KERNEL); + if (!vc4_dsi_encoder) + return -ENOMEM; + vc4_dsi_encoder->base.type = VC4_ENCODER_TYPE_DSI1; + vc4_dsi_encoder->dsi = dsi; + dsi->encoder = &vc4_dsi_encoder->base.base; + + dsi->pdev = pdev; + dsi->regs = vc4_ioremap_regs(pdev, 0); + if (IS_ERR(dsi->regs)) + return PTR_ERR(dsi->regs); + + if (DSI_PORT_READ(ID) != DSI_ID_VALUE) { + dev_err(dev, "Port returned 0x%08x for ID instead of 0x%08x\n", + DSI_PORT_READ(ID), DSI_ID_VALUE); + return -ENODEV; + } + + /* DSI1 has a broken AXI slave that doesn't respond to writes + * from the ARM. It does handle writes from the DMA engine, + * so set up a channel for talking to it. + */ + if (dsi->port == 1) { + dsi->reg_dma_mem = dma_alloc_coherent(dev, 4, + &dsi->reg_dma_paddr, + GFP_KERNEL); + if (!dsi->reg_dma_mem) { + DRM_ERROR("Failed to get DMA memory\n"); + return -ENOMEM; + } + + dma_cap_zero(dma_mask); + dma_cap_set(DMA_MEMCPY, dma_mask); + dsi->reg_dma_chan = dma_request_chan_by_mask(&dma_mask); + if (IS_ERR(dsi->reg_dma_chan)) { + ret = PTR_ERR(dsi->reg_dma_chan); + if (ret != -EPROBE_DEFER) + DRM_ERROR("Failed to get DMA channel: %d\n", + ret); + return ret; + } + + /* Get the physical address of the device's registers. The + * struct resource for the regs gives us the bus address + * instead. + */ + dsi->reg_paddr = be32_to_cpup(of_get_address(dev->of_node, + 0, NULL, NULL)); + } + + init_completion(&dsi->xfer_completion); + /* At startup enable error-reporting interrupts and nothing else. */ + DSI_PORT_WRITE(INT_EN, DSI1_INTERRUPTS_ALWAYS_ENABLED); + /* Clear any existing interrupt state. */ + DSI_PORT_WRITE(INT_STAT, DSI_PORT_READ(INT_STAT)); + + ret = devm_request_irq(dev, platform_get_irq(pdev, 0), + vc4_dsi_irq_handler, 0, "vc4 dsi", dsi); + if (ret) { + if (ret != -EPROBE_DEFER) + dev_err(dev, "Failed to get interrupt: %d\n", ret); + return ret; + } + + dsi->escape_clock = devm_clk_get(dev, "escape"); + if (IS_ERR(dsi->escape_clock)) { + ret = PTR_ERR(dsi->escape_clock); + if (ret != -EPROBE_DEFER) + dev_err(dev, "Failed to get escape clock: %d\n", ret); + return ret; + } + + dsi->pll_phy_clock = devm_clk_get(dev, "phy"); + if (IS_ERR(dsi->pll_phy_clock)) { + ret = PTR_ERR(dsi->pll_phy_clock); + if (ret != -EPROBE_DEFER) + dev_err(dev, "Failed to get phy clock: %d\n", ret); + return ret; + } + + dsi->pixel_clock = devm_clk_get(dev, "pixel"); + if (IS_ERR(dsi->pixel_clock)) { + ret = PTR_ERR(dsi->pixel_clock); + if (ret != -EPROBE_DEFER) + dev_err(dev, "Failed to get pixel clock: %d\n", ret); + return ret; + } + + /* The esc clock rate is supposed to always be 100Mhz. */ + ret = clk_set_rate(dsi->escape_clock, 100 * 1000000); + if (ret) { + dev_err(dev, "Failed to set esc clock: %d\n", ret); + return ret; + } + + ret = vc4_dsi_init_phy_clocks(dsi); + if (ret) + return ret; + + if (dsi->port == 1) + vc4->dsi1 = dsi; + + drm_encoder_init(drm, dsi->encoder, &vc4_dsi_encoder_funcs, + DRM_MODE_ENCODER_DSI, NULL); + drm_encoder_helper_add(dsi->encoder, &vc4_dsi_encoder_helper_funcs); + + dsi->dsi_host.ops = &vc4_dsi_host_ops; + dsi->dsi_host.dev = dev; + + mipi_dsi_host_register(&dsi->dsi_host); + + dev_set_drvdata(dev, dsi); + + pm_runtime_enable(dev); + + return 0; +} + +static void vc4_dsi_unbind(struct device *dev, struct device *master, + void *data) +{ + struct drm_device *drm = dev_get_drvdata(master); + struct vc4_dev *vc4 = to_vc4_dev(drm); + struct vc4_dsi *dsi = dev_get_drvdata(dev); + + pm_runtime_disable(dev); + + drm_bridge_remove(dsi->bridge); + vc4_dsi_encoder_destroy(dsi->encoder); + + mipi_dsi_host_unregister(&dsi->dsi_host); + + clk_disable_unprepare(dsi->pll_phy_clock); + clk_disable_unprepare(dsi->escape_clock); + + if (dsi->port == 1) + vc4->dsi1 = NULL; +} + +static const struct component_ops vc4_dsi_ops = { + .bind = vc4_dsi_bind, + .unbind = vc4_dsi_unbind, +}; + +static int vc4_dsi_dev_probe(struct platform_device *pdev) +{ + return component_add(&pdev->dev, &vc4_dsi_ops); +} + +static int vc4_dsi_dev_remove(struct platform_device *pdev) +{ + component_del(&pdev->dev, &vc4_dsi_ops); + return 0; +} + +struct platform_driver vc4_dsi_driver = { + .probe = vc4_dsi_dev_probe, + .remove = vc4_dsi_dev_remove, + .driver = { + .name = "vc4_dsi", + .of_match_table = vc4_dsi_dt_match, + }, +}; diff --git a/drivers/gpu/drm/vc4/vc4_fence.c b/drivers/gpu/drm/vc4/vc4_fence.c new file mode 100644 index 000000000000..dbf5a5a5d5f5 --- /dev/null +++ b/drivers/gpu/drm/vc4/vc4_fence.c @@ -0,0 +1,56 @@ +/* + * Copyright © 2017 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "vc4_drv.h" + +static const char *vc4_fence_get_driver_name(struct dma_fence *fence) +{ + return "vc4"; +} + +static const char *vc4_fence_get_timeline_name(struct dma_fence *fence) +{ + return "vc4-v3d"; +} + +static bool vc4_fence_enable_signaling(struct dma_fence *fence) +{ + return true; +} + +static bool vc4_fence_signaled(struct dma_fence *fence) +{ + struct vc4_fence *f = to_vc4_fence(fence); + struct vc4_dev *vc4 = to_vc4_dev(f->dev); + + return vc4->finished_seqno >= f->seqno; +} + +const struct dma_fence_ops vc4_fence_ops = { + .get_driver_name = vc4_fence_get_driver_name, + .get_timeline_name = vc4_fence_get_timeline_name, + .enable_signaling = vc4_fence_enable_signaling, + .signaled = vc4_fence_signaled, + .wait = dma_fence_default_wait, + .release = dma_fence_free, +}; diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index ab3016982466..d5b821ad06af 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -26,6 +26,7 @@ #include <linux/pm_runtime.h> #include <linux/device.h> #include <linux/io.h> +#include <linux/sched/signal.h> #include "uapi/drm/vc4_drm.h" #include "vc4_drv.h" @@ -110,8 +111,8 @@ vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, &handle); if (ret) { - state->bo_count = i - 1; - goto err; + state->bo_count = i; + goto err_delete_handle; } bo_state[i].handle = handle; bo_state[i].paddr = vc4_bo->base.paddr; @@ -123,13 +124,16 @@ vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, state->bo_count * sizeof(*bo_state))) ret = -EFAULT; - kfree(bo_state); +err_delete_handle: + if (ret) { + for (i = 0; i < state->bo_count; i++) + drm_gem_handle_delete(file_priv, bo_state[i].handle); + } err_free: - vc4_free_hang_state(dev, kernel_state); + kfree(bo_state); -err: return ret; } @@ -462,6 +466,8 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) for (i = 0; i < exec->bo_count; i++) { bo = to_vc4_bo(&exec->bo[i]->base); bo->seqno = seqno; + + reservation_object_add_shared_fence(bo->resv, exec->fence); } list_for_each_entry(bo, &exec->unref_list, unref_head) { @@ -471,7 +477,103 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) for (i = 0; i < exec->rcl_write_bo_count; i++) { bo = to_vc4_bo(&exec->rcl_write_bo[i]->base); bo->write_seqno = seqno; + + reservation_object_add_excl_fence(bo->resv, exec->fence); + } +} + +static void +vc4_unlock_bo_reservations(struct drm_device *dev, + struct vc4_exec_info *exec, + struct ww_acquire_ctx *acquire_ctx) +{ + int i; + + for (i = 0; i < exec->bo_count; i++) { + struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base); + + ww_mutex_unlock(&bo->resv->lock); + } + + ww_acquire_fini(acquire_ctx); +} + +/* Takes the reservation lock on all the BOs being referenced, so that + * at queue submit time we can update the reservations. + * + * We don't lock the RCL the tile alloc/state BOs, or overflow memory + * (all of which are on exec->unref_list). They're entirely private + * to vc4, so we don't attach dma-buf fences to them. + */ +static int +vc4_lock_bo_reservations(struct drm_device *dev, + struct vc4_exec_info *exec, + struct ww_acquire_ctx *acquire_ctx) +{ + int contended_lock = -1; + int i, ret; + struct vc4_bo *bo; + + ww_acquire_init(acquire_ctx, &reservation_ww_class); + +retry: + if (contended_lock != -1) { + bo = to_vc4_bo(&exec->bo[contended_lock]->base); + ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock, + acquire_ctx); + if (ret) { + ww_acquire_done(acquire_ctx); + return ret; + } + } + + for (i = 0; i < exec->bo_count; i++) { + if (i == contended_lock) + continue; + + bo = to_vc4_bo(&exec->bo[i]->base); + + ret = ww_mutex_lock_interruptible(&bo->resv->lock, acquire_ctx); + if (ret) { + int j; + + for (j = 0; j < i; j++) { + bo = to_vc4_bo(&exec->bo[j]->base); + ww_mutex_unlock(&bo->resv->lock); + } + + if (contended_lock != -1 && contended_lock >= i) { + bo = to_vc4_bo(&exec->bo[contended_lock]->base); + + ww_mutex_unlock(&bo->resv->lock); + } + + if (ret == -EDEADLK) { + contended_lock = i; + goto retry; + } + + ww_acquire_done(acquire_ctx); + return ret; + } } + + ww_acquire_done(acquire_ctx); + + /* Reserve space for our shared (read-only) fence references, + * before we commit the CL to the hardware. + */ + for (i = 0; i < exec->bo_count; i++) { + bo = to_vc4_bo(&exec->bo[i]->base); + + ret = reservation_object_reserve_shared(bo->resv); + if (ret) { + vc4_unlock_bo_reservations(dev, exec, acquire_ctx); + return ret; + } + } + + return 0; } /* Queues a struct vc4_exec_info for execution. If no job is @@ -483,19 +585,34 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno) * then bump the end address. That's a change for a later date, * though. */ -static void -vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec) +static int +vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, + struct ww_acquire_ctx *acquire_ctx) { struct vc4_dev *vc4 = to_vc4_dev(dev); uint64_t seqno; unsigned long irqflags; + struct vc4_fence *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return -ENOMEM; + fence->dev = dev; spin_lock_irqsave(&vc4->job_lock, irqflags); seqno = ++vc4->emit_seqno; exec->seqno = seqno; + + dma_fence_init(&fence->base, &vc4_fence_ops, &vc4->job_lock, + vc4->dma_fence_context, exec->seqno); + fence->seqno = exec->seqno; + exec->fence = &fence->base; + vc4_update_bo_seqnos(exec, seqno); + vc4_unlock_bo_reservations(dev, exec, acquire_ctx); + list_add_tail(&exec->head, &vc4->bin_job_list); /* If no job was executing, kick ours off. Otherwise, it'll @@ -508,12 +625,23 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec) } spin_unlock_irqrestore(&vc4->job_lock, irqflags); + + return 0; } /** - * Looks up a bunch of GEM handles for BOs and stores the array for - * use in the command validator that actually writes relocated - * addresses pointing to them. + * vc4_cl_lookup_bos() - Sets up exec->bo[] with the GEM objects + * referenced by the job. + * @dev: DRM device + * @file_priv: DRM file for this fd + * @exec: V3D job being set up + * + * The command validator needs to reference BOs by their index within + * the submitted job's BO list. This does the validation of the job's + * BO list and reference counting for the lifetime of the job. + * + * Note that this function doesn't need to unreference the BOs on + * failure, because that will happen at vc4_complete_exec() time. */ static int vc4_cl_lookup_bos(struct drm_device *dev, @@ -535,14 +663,15 @@ vc4_cl_lookup_bos(struct drm_device *dev, return -EINVAL; } - exec->bo = drm_calloc_large(exec->bo_count, - sizeof(struct drm_gem_cma_object *)); + exec->bo = kvmalloc_array(exec->bo_count, + sizeof(struct drm_gem_cma_object *), + GFP_KERNEL | __GFP_ZERO); if (!exec->bo) { DRM_ERROR("Failed to allocate validated BO pointers\n"); return -ENOMEM; } - handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t)); + handles = kvmalloc_array(exec->bo_count, sizeof(uint32_t), GFP_KERNEL); if (!handles) { ret = -ENOMEM; DRM_ERROR("Failed to allocate incoming GEM handles\n"); @@ -574,7 +703,7 @@ vc4_cl_lookup_bos(struct drm_device *dev, spin_unlock(&file_priv->table_lock); fail: - drm_free_large(handles); + kvfree(handles); return ret; } @@ -612,7 +741,7 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) * read the contents back for validation, and I think the * bo->vaddr is uncached access. */ - temp = drm_malloc_ab(temp_size, 1); + temp = kvmalloc_array(temp_size, 1, GFP_KERNEL); if (!temp) { DRM_ERROR("Failed to allocate storage for copying " "in bin/render CLs.\n"); @@ -687,7 +816,7 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec) ret = vc4_wait_for_seqno(dev, exec->bin_dep_seqno, ~0ull, true); fail: - drm_free_large(temp); + kvfree(temp); return ret; } @@ -695,12 +824,19 @@ static void vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) { struct vc4_dev *vc4 = to_vc4_dev(dev); + unsigned long irqflags; unsigned i; + /* If we got force-completed because of GPU reset rather than + * through our IRQ handler, signal the fence now. + */ + if (exec->fence) + dma_fence_signal(exec->fence); + if (exec->bo) { for (i = 0; i < exec->bo_count; i++) drm_gem_object_unreference_unlocked(&exec->bo[i]->base); - drm_free_large(exec->bo); + kvfree(exec->bo); } while (!list_empty(&exec->unref_list)) { @@ -710,6 +846,11 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) drm_gem_object_unreference_unlocked(&bo->base.base); } + /* Free up the allocation of any bin slots we used. */ + spin_lock_irqsave(&vc4->job_lock, irqflags); + vc4->bin_alloc_used &= ~exec->bin_slots; + spin_unlock_irqrestore(&vc4->job_lock, irqflags); + mutex_lock(&vc4->power_lock); if (--vc4->power_refcount == 0) { pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); @@ -846,9 +987,16 @@ vc4_wait_bo_ioctl(struct drm_device *dev, void *data, } /** - * Submits a command list to the VC4. + * vc4_submit_cl_ioctl() - Submits a job (frame) to the VC4. + * @dev: DRM device + * @data: ioctl argument + * @file_priv: DRM file for this fd * - * This is what is called batchbuffer emitting on other hardware. + * This is the main entrypoint for userspace to submit a 3D frame to + * the GPU. Userspace provides the binner command list (if + * applicable), and the kernel sets up the render command list to draw + * to the framebuffer described in the ioctl, using the command lists + * that the 3D engine's binner will produce. */ int vc4_submit_cl_ioctl(struct drm_device *dev, void *data, @@ -857,6 +1005,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_vc4_submit_cl *args = data; struct vc4_exec_info *exec; + struct ww_acquire_ctx acquire_ctx; int ret = 0; if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) { @@ -871,13 +1020,16 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, } mutex_lock(&vc4->power_lock); - if (vc4->power_refcount++ == 0) + if (vc4->power_refcount++ == 0) { ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); - mutex_unlock(&vc4->power_lock); - if (ret < 0) { - kfree(exec); - return ret; + if (ret < 0) { + mutex_unlock(&vc4->power_lock); + vc4->power_refcount--; + kfree(exec); + return ret; + } } + mutex_unlock(&vc4->power_lock); exec->args = args; INIT_LIST_HEAD(&exec->unref_list); @@ -899,12 +1051,18 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, if (ret) goto fail; + ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx); + if (ret) + goto fail; + /* Clear this out of the struct we'll be putting in the queue, * since it's part of our stack. */ exec->args = NULL; - vc4_queue_submit(dev, exec); + ret = vc4_queue_submit(dev, exec, &acquire_ctx); + if (ret) + goto fail; /* Return the seqno for our job. */ args->seqno = vc4->emit_seqno; @@ -922,6 +1080,8 @@ vc4_gem_init(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); + vc4->dma_fence_context = dma_fence_context_alloc(1); + INIT_LIST_HEAD(&vc4->bin_job_list); INIT_LIST_HEAD(&vc4->render_job_list); INIT_LIST_HEAD(&vc4->job_done_list); @@ -951,9 +1111,9 @@ vc4_gem_destroy(struct drm_device *dev) /* V3D should already have disabled its interrupt and cleared * the overflow allocation registers. Now free the object. */ - if (vc4->overflow_mem) { - drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base); - vc4->overflow_mem = NULL; + if (vc4->bin_bo) { + drm_gem_object_put_unlocked(&vc4->bin_bo->base.base); + vc4->bin_bo = NULL; } if (vc4->hang_state) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index c4cb2e26de32..ed63d4e85762 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -20,22 +20,56 @@ /** * DOC: VC4 Falcon HDMI module * - * The HDMI core has a state machine and a PHY. Most of the unit - * operates off of the HSM clock from CPRMAN. It also internally uses - * the PLLH_PIX clock for the PHY. + * The HDMI core has a state machine and a PHY. On BCM2835, most of + * the unit operates off of the HSM clock from CPRMAN. It also + * internally uses the PLLH_PIX clock for the PHY. + * + * HDMI infoframes are kept within a small packet ram, where each + * packet can be individually enabled for including in a frame. + * + * HDMI audio is implemented entirely within the HDMI IP block. A + * register in the HDMI encoder takes SPDIF frames from the DMA engine + * and transfers them over an internal MAI (multi-channel audio + * interconnect) bus to the encoder side for insertion into the video + * blank regions. + * + * The driver's HDMI encoder does not yet support power management. + * The HDMI encoder's power domain and the HSM/pixel clocks are kept + * continuously running, and only the HDMI logic and packet ram are + * powered off/on at disable/enable time. + * + * The driver does not yet support CEC control, though the HDMI + * encoder block has CEC support. */ -#include "drm_atomic_helper.h" -#include "drm_crtc_helper.h" -#include "drm_edid.h" -#include "linux/clk.h" -#include "linux/component.h" -#include "linux/i2c.h" -#include "linux/of_gpio.h" -#include "linux/of_platform.h" +#include <drm/drm_atomic_helper.h> +#include <drm/drm_crtc_helper.h> +#include <drm/drm_edid.h> +#include <linux/clk.h> +#include <linux/component.h> +#include <linux/i2c.h> +#include <linux/of_address.h> +#include <linux/of_gpio.h> +#include <linux/of_platform.h> +#include <linux/pm_runtime.h> +#include <linux/rational.h> +#include <sound/dmaengine_pcm.h> +#include <sound/pcm_drm_eld.h> +#include <sound/pcm_params.h> +#include <sound/soc.h> #include "vc4_drv.h" #include "vc4_regs.h" +/* HDMI audio information */ +struct vc4_hdmi_audio { + struct snd_soc_card card; + struct snd_soc_dai_link link; + int samplerate; + int channels; + struct snd_dmaengine_dai_dma_data dma_data; + struct snd_pcm_substream *substream; +}; + /* General HDMI hardware state. */ struct vc4_hdmi { struct platform_device *pdev; @@ -43,6 +77,8 @@ struct vc4_hdmi { struct drm_encoder *encoder; struct drm_connector *connector; + struct vc4_hdmi_audio audio; + struct i2c_adapter *ddc; void __iomem *hdmicore_regs; void __iomem *hd_regs; @@ -98,6 +134,10 @@ static const struct { HDMI_REG(VC4_HDMI_SW_RESET_CONTROL), HDMI_REG(VC4_HDMI_HOTPLUG_INT), HDMI_REG(VC4_HDMI_HOTPLUG), + HDMI_REG(VC4_HDMI_MAI_CHANNEL_MAP), + HDMI_REG(VC4_HDMI_MAI_CONFIG), + HDMI_REG(VC4_HDMI_MAI_FORMAT), + HDMI_REG(VC4_HDMI_AUDIO_PACKET_CONFIG), HDMI_REG(VC4_HDMI_RAM_PACKET_CONFIG), HDMI_REG(VC4_HDMI_HORZA), HDMI_REG(VC4_HDMI_HORZB), @@ -108,6 +148,7 @@ static const struct { HDMI_REG(VC4_HDMI_VERTB0), HDMI_REG(VC4_HDMI_VERTB1), HDMI_REG(VC4_HDMI_TX_PHY_RESET_CTL), + HDMI_REG(VC4_HDMI_TX_PHY_CTL0), }; static const struct { @@ -116,6 +157,9 @@ static const struct { } hd_regs[] = { HDMI_REG(VC4_HD_M_CTL), HDMI_REG(VC4_HD_MAI_CTL), + HDMI_REG(VC4_HD_MAI_THR), + HDMI_REG(VC4_HD_MAI_FMT), + HDMI_REG(VC4_HD_MAI_SMP), HDMI_REG(VC4_HD_VID_CTL), HDMI_REG(VC4_HD_CSC_CTL), HDMI_REG(VC4_HD_FRAME_COUNT), @@ -215,6 +259,7 @@ static int vc4_hdmi_connector_get_modes(struct drm_connector *connector) drm_mode_connector_update_edid_property(connector, edid); ret = drm_add_edid_modes(connector, edid); + drm_edid_to_eld(connector, edid); return ret; } @@ -300,7 +345,7 @@ static void vc4_hdmi_write_infoframe(struct drm_encoder *encoder, struct drm_device *dev = encoder->dev; struct vc4_dev *vc4 = to_vc4_dev(dev); u32 packet_id = frame->any.type - 0x80; - u32 packet_reg = VC4_HDMI_GCP_0 + VC4_HDMI_PACKET_STRIDE * packet_id; + u32 packet_reg = VC4_HDMI_RAM_PACKET(packet_id); uint8_t buffer[VC4_HDMI_PACKET_STRIDE]; ssize_t len, i; int ret; @@ -356,15 +401,11 @@ static void vc4_hdmi_set_avi_infoframe(struct drm_encoder *encoder) return; } - if (vc4_encoder->rgb_range_selectable) { - if (vc4_encoder->limited_rgb_range) { - frame.avi.quantization_range = - HDMI_QUANTIZATION_RANGE_LIMITED; - } else { - frame.avi.quantization_range = - HDMI_QUANTIZATION_RANGE_FULL; - } - } + drm_hdmi_avi_infoframe_quant_range(&frame.avi, mode, + vc4_encoder->limited_rgb_range ? + HDMI_QUANTIZATION_RANGE_LIMITED : + HDMI_QUANTIZATION_RANGE_FULL, + vc4_encoder->rgb_range_selectable); vc4_hdmi_write_infoframe(encoder, &frame); } @@ -385,19 +426,62 @@ static void vc4_hdmi_set_spd_infoframe(struct drm_encoder *encoder) vc4_hdmi_write_infoframe(encoder, &frame); } +static void vc4_hdmi_set_audio_infoframe(struct drm_encoder *encoder) +{ + struct drm_device *drm = encoder->dev; + struct vc4_dev *vc4 = drm->dev_private; + struct vc4_hdmi *hdmi = vc4->hdmi; + union hdmi_infoframe frame; + int ret; + + ret = hdmi_audio_infoframe_init(&frame.audio); + + frame.audio.coding_type = HDMI_AUDIO_CODING_TYPE_STREAM; + frame.audio.sample_frequency = HDMI_AUDIO_SAMPLE_FREQUENCY_STREAM; + frame.audio.sample_size = HDMI_AUDIO_SAMPLE_SIZE_STREAM; + frame.audio.channels = hdmi->audio.channels; + + vc4_hdmi_write_infoframe(encoder, &frame); +} + static void vc4_hdmi_set_infoframes(struct drm_encoder *encoder) { vc4_hdmi_set_avi_infoframe(encoder); vc4_hdmi_set_spd_infoframe(encoder); } -static void vc4_hdmi_encoder_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *unadjusted_mode, - struct drm_display_mode *mode) +static void vc4_hdmi_encoder_disable(struct drm_encoder *encoder) +{ + struct drm_device *dev = encoder->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_hdmi *hdmi = vc4->hdmi; + int ret; + + HDMI_WRITE(VC4_HDMI_RAM_PACKET_CONFIG, 0); + + HDMI_WRITE(VC4_HDMI_TX_PHY_RESET_CTL, 0xf << 16); + HD_WRITE(VC4_HD_VID_CTL, + HD_READ(VC4_HD_VID_CTL) & ~VC4_HD_VID_CTL_ENABLE); + + HD_WRITE(VC4_HD_M_CTL, VC4_HD_M_SW_RST); + udelay(1); + HD_WRITE(VC4_HD_M_CTL, 0); + + clk_disable_unprepare(hdmi->hsm_clock); + clk_disable_unprepare(hdmi->pixel_clock); + + ret = pm_runtime_put(&hdmi->pdev->dev); + if (ret < 0) + DRM_ERROR("Failed to release power domain: %d\n", ret); +} + +static void vc4_hdmi_encoder_enable(struct drm_encoder *encoder) { + struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode; struct vc4_hdmi_encoder *vc4_encoder = to_vc4_hdmi_encoder(encoder); struct drm_device *dev = encoder->dev; struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_hdmi *hdmi = vc4->hdmi; bool debug_dump_regs = false; bool hsync_pos = mode->flags & DRM_MODE_FLAG_PHSYNC; bool vsync_pos = mode->flags & DRM_MODE_FLAG_PVSYNC; @@ -417,6 +501,64 @@ static void vc4_hdmi_encoder_mode_set(struct drm_encoder *encoder, interlaced, VC4_HDMI_VERTB_VBP)); u32 csc_ctl; + int ret; + + ret = pm_runtime_get_sync(&hdmi->pdev->dev); + if (ret < 0) { + DRM_ERROR("Failed to retain power domain: %d\n", ret); + return; + } + + /* This is the rate that is set by the firmware. The number + * needs to be a bit higher than the pixel clock rate + * (generally 148.5Mhz). + */ + ret = clk_set_rate(hdmi->hsm_clock, 163682864); + if (ret) { + DRM_ERROR("Failed to set HSM clock rate: %d\n", ret); + return; + } + + ret = clk_set_rate(hdmi->pixel_clock, + mode->clock * 1000 * + ((mode->flags & DRM_MODE_FLAG_DBLCLK) ? 2 : 1)); + if (ret) { + DRM_ERROR("Failed to set pixel clock rate: %d\n", ret); + return; + } + + ret = clk_prepare_enable(hdmi->pixel_clock); + if (ret) { + DRM_ERROR("Failed to turn on pixel clock: %d\n", ret); + return; + } + + ret = clk_prepare_enable(hdmi->hsm_clock); + if (ret) { + DRM_ERROR("Failed to turn on HDMI state machine clock: %d\n", + ret); + clk_disable_unprepare(hdmi->pixel_clock); + return; + } + + HD_WRITE(VC4_HD_M_CTL, VC4_HD_M_SW_RST); + udelay(1); + HD_WRITE(VC4_HD_M_CTL, 0); + + HD_WRITE(VC4_HD_M_CTL, VC4_HD_M_ENABLE); + + HDMI_WRITE(VC4_HDMI_SW_RESET_CONTROL, + VC4_HDMI_SW_RESET_HDMI | + VC4_HDMI_SW_RESET_FORMAT_DETECT); + + HDMI_WRITE(VC4_HDMI_SW_RESET_CONTROL, 0); + + /* PHY should be in reset, like + * vc4_hdmi_encoder_disable() does. + */ + HDMI_WRITE(VC4_HDMI_TX_PHY_RESET_CTL, 0xf << 16); + + HDMI_WRITE(VC4_HDMI_TX_PHY_RESET_CTL, 0); if (debug_dump_regs) { DRM_INFO("HDMI regs before:\n"); @@ -425,9 +567,6 @@ static void vc4_hdmi_encoder_mode_set(struct drm_encoder *encoder, HD_WRITE(VC4_HD_VID_CTL, 0); - clk_set_rate(vc4->hdmi->pixel_clock, mode->clock * 1000 * - ((mode->flags & DRM_MODE_FLAG_DBLCLK) ? 2 : 1)); - HDMI_WRITE(VC4_HDMI_SCHEDULER_CONTROL, HDMI_READ(VC4_HDMI_SCHEDULER_CONTROL) | VC4_HDMI_SCHEDULER_CONTROL_MANUAL_FORMAT | @@ -463,7 +602,9 @@ static void vc4_hdmi_encoder_mode_set(struct drm_encoder *encoder, csc_ctl = VC4_SET_FIELD(VC4_HD_CSC_CTL_ORDER_BGR, VC4_HD_CSC_CTL_ORDER); - if (vc4_encoder->hdmi_monitor && drm_match_cea_mode(mode) > 1) { + if (vc4_encoder->hdmi_monitor && + drm_default_rgb_quant_range(mode) == + HDMI_QUANTIZATION_RANGE_LIMITED) { /* CEA VICs other than #1 requre limited range RGB * output unless overridden by an AVI infoframe. * Apply a colorspace conversion to squash 0-255 down @@ -499,28 +640,6 @@ static void vc4_hdmi_encoder_mode_set(struct drm_encoder *encoder, DRM_INFO("HDMI regs after:\n"); vc4_hdmi_dump_regs(dev); } -} - -static void vc4_hdmi_encoder_disable(struct drm_encoder *encoder) -{ - struct drm_device *dev = encoder->dev; - struct vc4_dev *vc4 = to_vc4_dev(dev); - - HDMI_WRITE(VC4_HDMI_RAM_PACKET_CONFIG, 0); - - HDMI_WRITE(VC4_HDMI_TX_PHY_RESET_CTL, 0xf << 16); - HD_WRITE(VC4_HD_VID_CTL, - HD_READ(VC4_HD_VID_CTL) & ~VC4_HD_VID_CTL_ENABLE); -} - -static void vc4_hdmi_encoder_enable(struct drm_encoder *encoder) -{ - struct vc4_hdmi_encoder *vc4_encoder = to_vc4_hdmi_encoder(encoder); - struct drm_device *dev = encoder->dev; - struct vc4_dev *vc4 = to_vc4_dev(dev); - int ret; - - HDMI_WRITE(VC4_HDMI_TX_PHY_RESET_CTL, 0); HD_WRITE(VC4_HD_VID_CTL, HD_READ(VC4_HD_VID_CTL) | @@ -586,11 +705,451 @@ static void vc4_hdmi_encoder_enable(struct drm_encoder *encoder) } static const struct drm_encoder_helper_funcs vc4_hdmi_encoder_helper_funcs = { - .mode_set = vc4_hdmi_encoder_mode_set, .disable = vc4_hdmi_encoder_disable, .enable = vc4_hdmi_encoder_enable, }; +/* HDMI audio codec callbacks */ +static void vc4_hdmi_audio_set_mai_clock(struct vc4_hdmi *hdmi) +{ + struct drm_device *drm = hdmi->encoder->dev; + struct vc4_dev *vc4 = to_vc4_dev(drm); + u32 hsm_clock = clk_get_rate(hdmi->hsm_clock); + unsigned long n, m; + + rational_best_approximation(hsm_clock, hdmi->audio.samplerate, + VC4_HD_MAI_SMP_N_MASK >> + VC4_HD_MAI_SMP_N_SHIFT, + (VC4_HD_MAI_SMP_M_MASK >> + VC4_HD_MAI_SMP_M_SHIFT) + 1, + &n, &m); + + HD_WRITE(VC4_HD_MAI_SMP, + VC4_SET_FIELD(n, VC4_HD_MAI_SMP_N) | + VC4_SET_FIELD(m - 1, VC4_HD_MAI_SMP_M)); +} + +static void vc4_hdmi_set_n_cts(struct vc4_hdmi *hdmi) +{ + struct drm_encoder *encoder = hdmi->encoder; + struct drm_crtc *crtc = encoder->crtc; + struct drm_device *drm = encoder->dev; + struct vc4_dev *vc4 = to_vc4_dev(drm); + const struct drm_display_mode *mode = &crtc->state->adjusted_mode; + u32 samplerate = hdmi->audio.samplerate; + u32 n, cts; + u64 tmp; + + n = 128 * samplerate / 1000; + tmp = (u64)(mode->clock * 1000) * n; + do_div(tmp, 128 * samplerate); + cts = tmp; + + HDMI_WRITE(VC4_HDMI_CRP_CFG, + VC4_HDMI_CRP_CFG_EXTERNAL_CTS_EN | + VC4_SET_FIELD(n, VC4_HDMI_CRP_CFG_N)); + + /* + * We could get slightly more accurate clocks in some cases by + * providing a CTS_1 value. The two CTS values are alternated + * between based on the period fields + */ + HDMI_WRITE(VC4_HDMI_CTS_0, cts); + HDMI_WRITE(VC4_HDMI_CTS_1, cts); +} + +static inline struct vc4_hdmi *dai_to_hdmi(struct snd_soc_dai *dai) +{ + struct snd_soc_card *card = snd_soc_dai_get_drvdata(dai); + + return snd_soc_card_get_drvdata(card); +} + +static int vc4_hdmi_audio_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) +{ + struct vc4_hdmi *hdmi = dai_to_hdmi(dai); + struct drm_encoder *encoder = hdmi->encoder; + struct vc4_dev *vc4 = to_vc4_dev(encoder->dev); + int ret; + + if (hdmi->audio.substream && hdmi->audio.substream != substream) + return -EINVAL; + + hdmi->audio.substream = substream; + + /* + * If the HDMI encoder hasn't probed, or the encoder is + * currently in DVI mode, treat the codec dai as missing. + */ + if (!encoder->crtc || !(HDMI_READ(VC4_HDMI_RAM_PACKET_CONFIG) & + VC4_HDMI_RAM_PACKET_ENABLE)) + return -ENODEV; + + ret = snd_pcm_hw_constraint_eld(substream->runtime, + hdmi->connector->eld); + if (ret) + return ret; + + return 0; +} + +static int vc4_hdmi_audio_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) +{ + return 0; +} + +static void vc4_hdmi_audio_reset(struct vc4_hdmi *hdmi) +{ + struct drm_encoder *encoder = hdmi->encoder; + struct drm_device *drm = encoder->dev; + struct device *dev = &hdmi->pdev->dev; + struct vc4_dev *vc4 = to_vc4_dev(drm); + int ret; + + ret = vc4_hdmi_stop_packet(encoder, HDMI_INFOFRAME_TYPE_AUDIO); + if (ret) + dev_err(dev, "Failed to stop audio infoframe: %d\n", ret); + + HD_WRITE(VC4_HD_MAI_CTL, VC4_HD_MAI_CTL_RESET); + HD_WRITE(VC4_HD_MAI_CTL, VC4_HD_MAI_CTL_ERRORF); + HD_WRITE(VC4_HD_MAI_CTL, VC4_HD_MAI_CTL_FLUSH); +} + +static void vc4_hdmi_audio_shutdown(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) +{ + struct vc4_hdmi *hdmi = dai_to_hdmi(dai); + + if (substream != hdmi->audio.substream) + return; + + vc4_hdmi_audio_reset(hdmi); + + hdmi->audio.substream = NULL; +} + +/* HDMI audio codec callbacks */ +static int vc4_hdmi_audio_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) +{ + struct vc4_hdmi *hdmi = dai_to_hdmi(dai); + struct drm_encoder *encoder = hdmi->encoder; + struct drm_device *drm = encoder->dev; + struct device *dev = &hdmi->pdev->dev; + struct vc4_dev *vc4 = to_vc4_dev(drm); + u32 audio_packet_config, channel_mask; + u32 channel_map, i; + + if (substream != hdmi->audio.substream) + return -EINVAL; + + dev_dbg(dev, "%s: %u Hz, %d bit, %d channels\n", __func__, + params_rate(params), params_width(params), + params_channels(params)); + + hdmi->audio.channels = params_channels(params); + hdmi->audio.samplerate = params_rate(params); + + HD_WRITE(VC4_HD_MAI_CTL, + VC4_HD_MAI_CTL_RESET | + VC4_HD_MAI_CTL_FLUSH | + VC4_HD_MAI_CTL_DLATE | + VC4_HD_MAI_CTL_ERRORE | + VC4_HD_MAI_CTL_ERRORF); + + vc4_hdmi_audio_set_mai_clock(hdmi); + + audio_packet_config = + VC4_HDMI_AUDIO_PACKET_ZERO_DATA_ON_SAMPLE_FLAT | + VC4_HDMI_AUDIO_PACKET_ZERO_DATA_ON_INACTIVE_CHANNELS | + VC4_SET_FIELD(0xf, VC4_HDMI_AUDIO_PACKET_B_FRAME_IDENTIFIER); + + channel_mask = GENMASK(hdmi->audio.channels - 1, 0); + audio_packet_config |= VC4_SET_FIELD(channel_mask, + VC4_HDMI_AUDIO_PACKET_CEA_MASK); + + /* Set the MAI threshold. This logic mimics the firmware's. */ + if (hdmi->audio.samplerate > 96000) { + HD_WRITE(VC4_HD_MAI_THR, + VC4_SET_FIELD(0x12, VC4_HD_MAI_THR_DREQHIGH) | + VC4_SET_FIELD(0x12, VC4_HD_MAI_THR_DREQLOW)); + } else if (hdmi->audio.samplerate > 48000) { + HD_WRITE(VC4_HD_MAI_THR, + VC4_SET_FIELD(0x14, VC4_HD_MAI_THR_DREQHIGH) | + VC4_SET_FIELD(0x12, VC4_HD_MAI_THR_DREQLOW)); + } else { + HD_WRITE(VC4_HD_MAI_THR, + VC4_SET_FIELD(0x10, VC4_HD_MAI_THR_PANICHIGH) | + VC4_SET_FIELD(0x10, VC4_HD_MAI_THR_PANICLOW) | + VC4_SET_FIELD(0x10, VC4_HD_MAI_THR_DREQHIGH) | + VC4_SET_FIELD(0x10, VC4_HD_MAI_THR_DREQLOW)); + } + + HDMI_WRITE(VC4_HDMI_MAI_CONFIG, + VC4_HDMI_MAI_CONFIG_BIT_REVERSE | + VC4_SET_FIELD(channel_mask, VC4_HDMI_MAI_CHANNEL_MASK)); + + channel_map = 0; + for (i = 0; i < 8; i++) { + if (channel_mask & BIT(i)) + channel_map |= i << (3 * i); + } + + HDMI_WRITE(VC4_HDMI_MAI_CHANNEL_MAP, channel_map); + HDMI_WRITE(VC4_HDMI_AUDIO_PACKET_CONFIG, audio_packet_config); + vc4_hdmi_set_n_cts(hdmi); + + return 0; +} + +static int vc4_hdmi_audio_trigger(struct snd_pcm_substream *substream, int cmd, + struct snd_soc_dai *dai) +{ + struct vc4_hdmi *hdmi = dai_to_hdmi(dai); + struct drm_encoder *encoder = hdmi->encoder; + struct drm_device *drm = encoder->dev; + struct vc4_dev *vc4 = to_vc4_dev(drm); + + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + vc4_hdmi_set_audio_infoframe(encoder); + HDMI_WRITE(VC4_HDMI_TX_PHY_CTL0, + HDMI_READ(VC4_HDMI_TX_PHY_CTL0) & + ~VC4_HDMI_TX_PHY_RNG_PWRDN); + HD_WRITE(VC4_HD_MAI_CTL, + VC4_SET_FIELD(hdmi->audio.channels, + VC4_HD_MAI_CTL_CHNUM) | + VC4_HD_MAI_CTL_ENABLE); + break; + case SNDRV_PCM_TRIGGER_STOP: + HD_WRITE(VC4_HD_MAI_CTL, + VC4_HD_MAI_CTL_DLATE | + VC4_HD_MAI_CTL_ERRORE | + VC4_HD_MAI_CTL_ERRORF); + HDMI_WRITE(VC4_HDMI_TX_PHY_CTL0, + HDMI_READ(VC4_HDMI_TX_PHY_CTL0) | + VC4_HDMI_TX_PHY_RNG_PWRDN); + break; + default: + break; + } + + return 0; +} + +static inline struct vc4_hdmi * +snd_component_to_hdmi(struct snd_soc_component *component) +{ + struct snd_soc_card *card = snd_soc_component_get_drvdata(component); + + return snd_soc_card_get_drvdata(card); +} + +static int vc4_hdmi_audio_eld_ctl_info(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *uinfo) +{ + struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); + struct vc4_hdmi *hdmi = snd_component_to_hdmi(component); + + uinfo->type = SNDRV_CTL_ELEM_TYPE_BYTES; + uinfo->count = sizeof(hdmi->connector->eld); + + return 0; +} + +static int vc4_hdmi_audio_eld_ctl_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *component = snd_kcontrol_chip(kcontrol); + struct vc4_hdmi *hdmi = snd_component_to_hdmi(component); + + memcpy(ucontrol->value.bytes.data, hdmi->connector->eld, + sizeof(hdmi->connector->eld)); + + return 0; +} + +static const struct snd_kcontrol_new vc4_hdmi_audio_controls[] = { + { + .access = SNDRV_CTL_ELEM_ACCESS_READ | + SNDRV_CTL_ELEM_ACCESS_VOLATILE, + .iface = SNDRV_CTL_ELEM_IFACE_PCM, + .name = "ELD", + .info = vc4_hdmi_audio_eld_ctl_info, + .get = vc4_hdmi_audio_eld_ctl_get, + }, +}; + +static const struct snd_soc_dapm_widget vc4_hdmi_audio_widgets[] = { + SND_SOC_DAPM_OUTPUT("TX"), +}; + +static const struct snd_soc_dapm_route vc4_hdmi_audio_routes[] = { + { "TX", NULL, "Playback" }, +}; + +static const struct snd_soc_codec_driver vc4_hdmi_audio_codec_drv = { + .component_driver = { + .controls = vc4_hdmi_audio_controls, + .num_controls = ARRAY_SIZE(vc4_hdmi_audio_controls), + .dapm_widgets = vc4_hdmi_audio_widgets, + .num_dapm_widgets = ARRAY_SIZE(vc4_hdmi_audio_widgets), + .dapm_routes = vc4_hdmi_audio_routes, + .num_dapm_routes = ARRAY_SIZE(vc4_hdmi_audio_routes), + }, +}; + +static const struct snd_soc_dai_ops vc4_hdmi_audio_dai_ops = { + .startup = vc4_hdmi_audio_startup, + .shutdown = vc4_hdmi_audio_shutdown, + .hw_params = vc4_hdmi_audio_hw_params, + .set_fmt = vc4_hdmi_audio_set_fmt, + .trigger = vc4_hdmi_audio_trigger, +}; + +static struct snd_soc_dai_driver vc4_hdmi_audio_codec_dai_drv = { + .name = "vc4-hdmi-hifi", + .playback = { + .stream_name = "Playback", + .channels_min = 2, + .channels_max = 8, + .rates = SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_44100 | + SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_88200 | + SNDRV_PCM_RATE_96000 | SNDRV_PCM_RATE_176400 | + SNDRV_PCM_RATE_192000, + .formats = SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_LE, + }, +}; + +static const struct snd_soc_component_driver vc4_hdmi_audio_cpu_dai_comp = { + .name = "vc4-hdmi-cpu-dai-component", +}; + +static int vc4_hdmi_audio_cpu_dai_probe(struct snd_soc_dai *dai) +{ + struct vc4_hdmi *hdmi = dai_to_hdmi(dai); + + snd_soc_dai_init_dma_data(dai, &hdmi->audio.dma_data, NULL); + + return 0; +} + +static struct snd_soc_dai_driver vc4_hdmi_audio_cpu_dai_drv = { + .name = "vc4-hdmi-cpu-dai", + .probe = vc4_hdmi_audio_cpu_dai_probe, + .playback = { + .stream_name = "Playback", + .channels_min = 1, + .channels_max = 8, + .rates = SNDRV_PCM_RATE_32000 | SNDRV_PCM_RATE_44100 | + SNDRV_PCM_RATE_48000 | SNDRV_PCM_RATE_88200 | + SNDRV_PCM_RATE_96000 | SNDRV_PCM_RATE_176400 | + SNDRV_PCM_RATE_192000, + .formats = SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_LE, + }, + .ops = &vc4_hdmi_audio_dai_ops, +}; + +static const struct snd_dmaengine_pcm_config pcm_conf = { + .chan_names[SNDRV_PCM_STREAM_PLAYBACK] = "audio-rx", + .prepare_slave_config = snd_dmaengine_pcm_prepare_slave_config, +}; + +static int vc4_hdmi_audio_init(struct vc4_hdmi *hdmi) +{ + struct snd_soc_dai_link *dai_link = &hdmi->audio.link; + struct snd_soc_card *card = &hdmi->audio.card; + struct device *dev = &hdmi->pdev->dev; + const __be32 *addr; + int ret; + + if (!of_find_property(dev->of_node, "dmas", NULL)) { + dev_warn(dev, + "'dmas' DT property is missing, no HDMI audio\n"); + return 0; + } + + /* + * Get the physical address of VC4_HD_MAI_DATA. We need to retrieve + * the bus address specified in the DT, because the physical address + * (the one returned by platform_get_resource()) is not appropriate + * for DMA transfers. + * This VC/MMU should probably be exposed to avoid this kind of hacks. + */ + addr = of_get_address(dev->of_node, 1, NULL, NULL); + hdmi->audio.dma_data.addr = be32_to_cpup(addr) + VC4_HD_MAI_DATA; + hdmi->audio.dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + hdmi->audio.dma_data.maxburst = 2; + + ret = devm_snd_dmaengine_pcm_register(dev, &pcm_conf, 0); + if (ret) { + dev_err(dev, "Could not register PCM component: %d\n", ret); + return ret; + } + + ret = devm_snd_soc_register_component(dev, &vc4_hdmi_audio_cpu_dai_comp, + &vc4_hdmi_audio_cpu_dai_drv, 1); + if (ret) { + dev_err(dev, "Could not register CPU DAI: %d\n", ret); + return ret; + } + + /* register codec and codec dai */ + ret = snd_soc_register_codec(dev, &vc4_hdmi_audio_codec_drv, + &vc4_hdmi_audio_codec_dai_drv, 1); + if (ret) { + dev_err(dev, "Could not register codec: %d\n", ret); + return ret; + } + + dai_link->name = "MAI"; + dai_link->stream_name = "MAI PCM"; + dai_link->codec_dai_name = vc4_hdmi_audio_codec_dai_drv.name; + dai_link->cpu_dai_name = dev_name(dev); + dai_link->codec_name = dev_name(dev); + dai_link->platform_name = dev_name(dev); + + card->dai_link = dai_link; + card->num_links = 1; + card->name = "vc4-hdmi"; + card->dev = dev; + + /* + * Be careful, snd_soc_register_card() calls dev_set_drvdata() and + * stores a pointer to the snd card object in dev->driver_data. This + * means we cannot use it for something else. The hdmi back-pointer is + * now stored in card->drvdata and should be retrieved with + * snd_soc_card_get_drvdata() if needed. + */ + snd_soc_card_set_drvdata(card, hdmi); + ret = devm_snd_soc_register_card(dev, card); + if (ret) { + dev_err(dev, "Could not register sound card: %d\n", ret); + goto unregister_codec; + } + + return 0; + +unregister_codec: + snd_soc_unregister_codec(dev); + + return ret; +} + +static void vc4_hdmi_audio_cleanup(struct vc4_hdmi *hdmi) +{ + struct device *dev = &hdmi->pdev->dev; + + /* + * If drvdata is not set this means the audio card was not + * registered, just skip codec unregistration in this case. + */ + if (dev_get_drvdata(dev)) + snd_soc_unregister_codec(dev); +} + static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) { struct platform_device *pdev = to_platform_device(dev); @@ -646,33 +1205,6 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) return -EPROBE_DEFER; } - /* Enable the clocks at startup. We can't quite recover from - * turning off the pixel clock during disable/enables yet, so - * it's always running. - */ - ret = clk_prepare_enable(hdmi->pixel_clock); - if (ret) { - DRM_ERROR("Failed to turn on pixel clock: %d\n", ret); - goto err_put_i2c; - } - - /* This is the rate that is set by the firmware. The number - * needs to be a bit higher than the pixel clock rate - * (generally 148.5Mhz). - */ - ret = clk_set_rate(hdmi->hsm_clock, 163682864); - if (ret) { - DRM_ERROR("Failed to set HSM clock rate: %d\n", ret); - goto err_unprepare_pix; - } - - ret = clk_prepare_enable(hdmi->hsm_clock); - if (ret) { - DRM_ERROR("Failed to turn on HDMI state machine clock: %d\n", - ret); - goto err_unprepare_pix; - } - /* Only use the GPIO HPD pin if present in the DT, otherwise * we'll use the HDMI core's register. */ @@ -684,7 +1216,7 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) &hpd_gpio_flags); if (hdmi->hpd_gpio < 0) { ret = hdmi->hpd_gpio; - goto err_unprepare_hsm; + goto err_put_i2c; } hdmi->hpd_active_low = hpd_gpio_flags & OF_GPIO_ACTIVE_LOW; @@ -692,25 +1224,7 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) vc4->hdmi = hdmi; - /* HDMI core must be enabled. */ - if (!(HD_READ(VC4_HD_M_CTL) & VC4_HD_M_ENABLE)) { - HD_WRITE(VC4_HD_M_CTL, VC4_HD_M_SW_RST); - udelay(1); - HD_WRITE(VC4_HD_M_CTL, 0); - - HD_WRITE(VC4_HD_M_CTL, VC4_HD_M_ENABLE); - - HDMI_WRITE(VC4_HDMI_SW_RESET_CONTROL, - VC4_HDMI_SW_RESET_HDMI | - VC4_HDMI_SW_RESET_FORMAT_DETECT); - - HDMI_WRITE(VC4_HDMI_SW_RESET_CONTROL, 0); - - /* PHY should be in reset, like - * vc4_hdmi_encoder_disable() does. - */ - HDMI_WRITE(VC4_HDMI_TX_PHY_RESET_CTL, 0xf << 16); - } + pm_runtime_enable(dev); drm_encoder_init(drm, hdmi->encoder, &vc4_hdmi_encoder_funcs, DRM_MODE_ENCODER_TMDS, NULL); @@ -722,14 +1236,15 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) goto err_destroy_encoder; } + ret = vc4_hdmi_audio_init(hdmi); + if (ret) + goto err_destroy_encoder; + return 0; err_destroy_encoder: vc4_hdmi_encoder_destroy(hdmi->encoder); -err_unprepare_hsm: - clk_disable_unprepare(hdmi->hsm_clock); -err_unprepare_pix: - clk_disable_unprepare(hdmi->pixel_clock); + pm_runtime_disable(dev); err_put_i2c: put_device(&hdmi->ddc->dev); @@ -743,11 +1258,13 @@ static void vc4_hdmi_unbind(struct device *dev, struct device *master, struct vc4_dev *vc4 = drm->dev_private; struct vc4_hdmi *hdmi = vc4->hdmi; + vc4_hdmi_audio_cleanup(hdmi); + vc4_hdmi_connector_destroy(hdmi->connector); vc4_hdmi_encoder_destroy(hdmi->encoder); - clk_disable_unprepare(hdmi->pixel_clock); - clk_disable_unprepare(hdmi->hsm_clock); + pm_runtime_disable(dev); + put_device(&hdmi->ddc->dev); vc4->hdmi = NULL; diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c index 6fbab1c82cb1..2b62fc5b8d85 100644 --- a/drivers/gpu/drm/vc4/vc4_hvs.c +++ b/drivers/gpu/drm/vc4/vc4_hvs.c @@ -9,12 +9,12 @@ /** * DOC: VC4 HVS module. * - * The HVS is the piece of hardware that does translation, scaling, - * colorspace conversion, and compositing of pixels stored in - * framebuffers into a FIFO of pixels going out to the Pixel Valve - * (CRTC). It operates at the system clock rate (the system audio - * clock gate, specifically), which is much higher than the pixel - * clock rate. + * The Hardware Video Scaler (HVS) is the piece of hardware that does + * translation, scaling, colorspace conversion, and compositing of + * pixels stored in framebuffers into a FIFO of pixels going out to + * the Pixel Valve (CRTC). It operates at the system clock rate (the + * system audio clock gate, specifically), which is much higher than + * the pixel clock rate. * * There is a single global HVS, with multiple output FIFOs that can * be consumed by the PVs. This file just manages the resources for @@ -22,7 +22,7 @@ * each CRTC. */ -#include "linux/component.h" +#include <linux/component.h> #include "vc4_drv.h" #include "vc4_regs.h" @@ -141,8 +141,7 @@ static int vc4_hvs_upload_linear_kernel(struct vc4_hvs *hvs, int ret, i; u32 __iomem *dst_kernel; - ret = drm_mm_insert_node(&hvs->dlist_mm, space, VC4_KERNEL_DWORDS, 1, - 0); + ret = drm_mm_insert_node(&hvs->dlist_mm, space, VC4_KERNEL_DWORDS); if (ret) { DRM_ERROR("Failed to allocate space for filter kernel: %d\n", ret); @@ -170,6 +169,7 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) struct vc4_dev *vc4 = drm->dev_private; struct vc4_hvs *hvs = NULL; int ret; + u32 dispctrl; hvs = devm_kzalloc(&pdev->dev, sizeof(*hvs), GFP_KERNEL); if (!hvs) @@ -211,6 +211,19 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) return ret; vc4->hvs = hvs; + + dispctrl = HVS_READ(SCALER_DISPCTRL); + + dispctrl |= SCALER_DISPCTRL_ENABLE; + + /* Set DSP3 (PV1) to use HVS channel 2, which would otherwise + * be unused. + */ + dispctrl &= ~SCALER_DISPCTRL_DSP3_MUX_MASK; + dispctrl |= VC4_SET_FIELD(2, SCALER_DISPCTRL_DSP3_MUX); + + HVS_WRITE(SCALER_DISPCTRL, dispctrl); + return 0; } diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c index 094bc6a475c1..7d7af3a93d94 100644 --- a/drivers/gpu/drm/vc4/vc4_irq.c +++ b/drivers/gpu/drm/vc4/vc4_irq.c @@ -21,7 +21,8 @@ * IN THE SOFTWARE. */ -/** DOC: Interrupt management for the V3D engine. +/** + * DOC: Interrupt management for the V3D engine * * We have an interrupt status register (V3D_INTCTL) which reports * interrupts, and where writing 1 bits clears those interrupts. @@ -58,50 +59,45 @@ vc4_overflow_mem_work(struct work_struct *work) { struct vc4_dev *vc4 = container_of(work, struct vc4_dev, overflow_mem_work); - struct drm_device *dev = vc4->dev; - struct vc4_bo *bo; + struct vc4_bo *bo = vc4->bin_bo; + int bin_bo_slot; + struct vc4_exec_info *exec; + unsigned long irqflags; - bo = vc4_bo_create(dev, 256 * 1024, true); - if (IS_ERR(bo)) { + bin_bo_slot = vc4_v3d_get_bin_slot(vc4); + if (bin_bo_slot < 0) { DRM_ERROR("Couldn't allocate binner overflow mem\n"); return; } - /* If there's a job executing currently, then our previous - * overflow allocation is getting used in that job and we need - * to queue it to be released when the job is done. But if no - * job is executing at all, then we can free the old overflow - * object direcctly. - * - * No lock necessary for this pointer since we're the only - * ones that update the pointer, and our workqueue won't - * reenter. - */ - if (vc4->overflow_mem) { - struct vc4_exec_info *current_exec; - unsigned long irqflags; - - spin_lock_irqsave(&vc4->job_lock, irqflags); - current_exec = vc4_first_bin_job(vc4); - if (!current_exec) - current_exec = vc4_last_render_job(vc4); - if (current_exec) { - vc4->overflow_mem->seqno = current_exec->seqno; - list_add_tail(&vc4->overflow_mem->unref_head, - ¤t_exec->unref_list); - vc4->overflow_mem = NULL; + spin_lock_irqsave(&vc4->job_lock, irqflags); + + if (vc4->bin_alloc_overflow) { + /* If we had overflow memory allocated previously, + * then that chunk will free when the current bin job + * is done. If we don't have a bin job running, then + * the chunk will be done whenever the list of render + * jobs has drained. + */ + exec = vc4_first_bin_job(vc4); + if (!exec) + exec = vc4_last_render_job(vc4); + if (exec) { + exec->bin_slots |= vc4->bin_alloc_overflow; + } else { + /* There's nothing queued in the hardware, so + * the old slot is free immediately. + */ + vc4->bin_alloc_used &= ~vc4->bin_alloc_overflow; } - spin_unlock_irqrestore(&vc4->job_lock, irqflags); } + vc4->bin_alloc_overflow = BIT(bin_bo_slot); - if (vc4->overflow_mem) - drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base); - vc4->overflow_mem = bo; - - V3D_WRITE(V3D_BPOA, bo->base.paddr); + V3D_WRITE(V3D_BPOA, bo->base.paddr + bin_bo_slot * vc4->bin_alloc_size); V3D_WRITE(V3D_BPOS, bo->base.base.size); V3D_WRITE(V3D_INTCTL, V3D_INT_OUTOMEM); V3D_WRITE(V3D_INTENA, V3D_INT_OUTOMEM); + spin_unlock_irqrestore(&vc4->job_lock, irqflags); } static void @@ -141,6 +137,10 @@ vc4_irq_finish_render_job(struct drm_device *dev) vc4->finished_seqno++; list_move_tail(&exec->head, &vc4->job_done_list); + if (exec->fence) { + dma_fence_signal_locked(exec->fence); + exec->fence = NULL; + } vc4_submit_next_render_job(dev); wake_up_all(&vc4->job_wait_queue); diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index be8dd8262f27..bc6ecdc6f104 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -14,12 +14,12 @@ * crtc, HDMI encoder). */ -#include "drm_crtc.h" -#include "drm_atomic.h" -#include "drm_atomic_helper.h" -#include "drm_crtc_helper.h" -#include "drm_plane_helper.h" -#include "drm_fb_cma_helper.h" +#include <drm/drm_crtc.h> +#include <drm/drm_atomic.h> +#include <drm/drm_atomic_helper.h> +#include <drm/drm_crtc_helper.h> +#include <drm/drm_plane_helper.h> +#include <drm/drm_fb_cma_helper.h> #include "vc4_drv.h" static void vc4_output_poll_changed(struct drm_device *dev) @@ -42,6 +42,10 @@ vc4_atomic_complete_commit(struct vc4_commit *c) struct drm_device *dev = state->dev; struct vc4_dev *vc4 = to_vc4_dev(dev); + drm_atomic_helper_wait_for_fences(dev, state, false); + + drm_atomic_helper_wait_for_dependencies(state); + drm_atomic_helper_commit_modeset_disables(dev, state); drm_atomic_helper_commit_planes(dev, state, 0); @@ -57,10 +61,14 @@ vc4_atomic_complete_commit(struct vc4_commit *c) */ state->legacy_cursor_update = false; + drm_atomic_helper_commit_hw_done(state); + drm_atomic_helper_wait_for_vblanks(dev, state); drm_atomic_helper_cleanup_planes(dev, state); + drm_atomic_helper_commit_cleanup_done(state); + drm_atomic_state_put(state); up(&vc4->async_modeset); @@ -117,32 +125,10 @@ static int vc4_atomic_commit(struct drm_device *dev, if (!c) return -ENOMEM; - /* Make sure that any outstanding modesets have finished. */ - if (nonblock) { - struct drm_crtc *crtc; - struct drm_crtc_state *crtc_state; - unsigned long flags; - bool busy = false; - - /* - * If there's an undispatched event to send then we're - * obviously still busy. If there isn't, then we can - * unconditionally wait for the semaphore because it - * shouldn't be contended (for long). - * - * This is to prevent a race where queuing a new flip - * from userspace immediately on receipt of an event - * beats our clean-up and returns EBUSY. - */ - spin_lock_irqsave(&dev->event_lock, flags); - for_each_crtc_in_state(state, crtc, crtc_state, i) - busy |= vc4_event_pending(crtc); - spin_unlock_irqrestore(&dev->event_lock, flags); - if (busy) { - kfree(c); - return -EBUSY; - } - } + ret = drm_atomic_helper_setup_commit(state, nonblock); + if (ret) + return ret; + ret = down_interruptible(&vc4->async_modeset); if (ret) { kfree(c); @@ -202,11 +188,50 @@ static int vc4_atomic_commit(struct drm_device *dev, return 0; } +static struct drm_framebuffer *vc4_fb_create(struct drm_device *dev, + struct drm_file *file_priv, + const struct drm_mode_fb_cmd2 *mode_cmd) +{ + struct drm_mode_fb_cmd2 mode_cmd_local; + + /* If the user didn't specify a modifier, use the + * vc4_set_tiling_ioctl() state for the BO. + */ + if (!(mode_cmd->flags & DRM_MODE_FB_MODIFIERS)) { + struct drm_gem_object *gem_obj; + struct vc4_bo *bo; + + gem_obj = drm_gem_object_lookup(file_priv, + mode_cmd->handles[0]); + if (!gem_obj) { + DRM_ERROR("Failed to look up GEM BO %d\n", + mode_cmd->handles[0]); + return ERR_PTR(-ENOENT); + } + bo = to_vc4_bo(gem_obj); + + mode_cmd_local = *mode_cmd; + + if (bo->t_format) { + mode_cmd_local.modifier[0] = + DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED; + } else { + mode_cmd_local.modifier[0] = DRM_FORMAT_MOD_NONE; + } + + drm_gem_object_unreference_unlocked(gem_obj); + + mode_cmd = &mode_cmd_local; + } + + return drm_fb_cma_create(dev, file_priv, mode_cmd); +} + static const struct drm_mode_config_funcs vc4_mode_funcs = { .output_poll_changed = vc4_output_poll_changed, .atomic_check = drm_atomic_helper_check, .atomic_commit = vc4_atomic_commit, - .fb_create = drm_fb_cma_create, + .fb_create = vc4_fb_create, }; int vc4_kms_load(struct drm_device *dev) @@ -230,11 +255,12 @@ int vc4_kms_load(struct drm_device *dev) drm_mode_config_reset(dev); - vc4->fbdev = drm_fbdev_cma_init(dev, 32, - dev->mode_config.num_crtc, - dev->mode_config.num_connector); - if (IS_ERR(vc4->fbdev)) - vc4->fbdev = NULL; + if (dev->mode_config.num_connector) { + vc4->fbdev = drm_fbdev_cma_init(dev, 32, + dev->mode_config.num_connector); + if (IS_ERR(vc4->fbdev)) + vc4->fbdev = NULL; + } drm_kms_helper_poll_init(dev); diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 881bf489478b..fa6809d8b0fe 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -18,11 +18,13 @@ * into the region of the HVS that it has allocated for us. */ +#include <drm/drm_atomic.h> +#include <drm/drm_atomic_helper.h> +#include <drm/drm_fb_cma_helper.h> +#include <drm/drm_plane_helper.h> + #include "vc4_drv.h" #include "vc4_regs.h" -#include "drm_atomic_helper.h" -#include "drm_fb_cma_helper.h" -#include "drm_plane_helper.h" enum vc4_scaling_mode { VC4_SCALING_NONE, @@ -295,8 +297,8 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) struct drm_framebuffer *fb = state->fb; struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0); u32 subpixel_src_mask = (1 << 16) - 1; - u32 format = fb->pixel_format; - int num_planes = drm_format_num_planes(format); + u32 format = fb->format->format; + int num_planes = fb->format->num_planes; u32 h_subsample = 1; u32 v_subsample = 1; int i; @@ -369,7 +371,7 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) */ if (vc4_state->crtc_x < 0) { for (i = 0; i < num_planes; i++) { - u32 cpp = drm_format_plane_cpp(fb->pixel_format, i); + u32 cpp = fb->format->cpp[i]; u32 subs = ((i == 0) ? 1 : h_subsample); vc4_state->offsets[i] += (cpp * @@ -496,10 +498,10 @@ static int vc4_plane_mode_set(struct drm_plane *plane, struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); struct drm_framebuffer *fb = state->fb; u32 ctl0_offset = vc4_state->dlist_count; - const struct hvs_format *format = vc4_get_hvs_format(fb->pixel_format); + const struct hvs_format *format = vc4_get_hvs_format(fb->format->format); int num_planes = drm_format_num_planes(format->drm); - u32 scl0, scl1; - u32 lbm_size; + u32 scl0, scl1, pitch0; + u32 lbm_size, tiling; unsigned long irqflags; int ret, i; @@ -514,9 +516,9 @@ static int vc4_plane_mode_set(struct drm_plane *plane, if (lbm_size) { if (!vc4_state->lbm.allocated) { spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags); - ret = drm_mm_insert_node(&vc4->hvs->lbm_mm, - &vc4_state->lbm, - lbm_size, 32, 0); + ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm, + &vc4_state->lbm, + lbm_size, 32, 0, 0); spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags); } else { WARN_ON_ONCE(lbm_size != vc4_state->lbm.size); @@ -540,11 +542,31 @@ static int vc4_plane_mode_set(struct drm_plane *plane, scl1 = vc4_get_scl_field(state, 0); } + switch (fb->modifier) { + case DRM_FORMAT_MOD_LINEAR: + tiling = SCALER_CTL0_TILING_LINEAR; + pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH); + break; + case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: + tiling = SCALER_CTL0_TILING_256B_OR_T; + + pitch0 = (VC4_SET_FIELD(0, SCALER_PITCH0_TILE_Y_OFFSET), + VC4_SET_FIELD(0, SCALER_PITCH0_TILE_WIDTH_L), + VC4_SET_FIELD((vc4_state->src_w[0] + 31) >> 5, + SCALER_PITCH0_TILE_WIDTH_R)); + break; + default: + DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx", + (long long)fb->modifier); + return -EINVAL; + } + /* Control word */ vc4_dlist_write(vc4_state, SCALER_CTL0_VALID | (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) | (format->hvs << SCALER_CTL0_PIXEL_FORMAT_SHIFT) | + VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) | (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) | VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) | VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1)); @@ -598,8 +620,11 @@ static int vc4_plane_mode_set(struct drm_plane *plane, for (i = 0; i < num_planes; i++) vc4_dlist_write(vc4_state, 0xc0c0c0c0); - /* Pitch word 0/1/2 */ - for (i = 0; i < num_planes; i++) { + /* Pitch word 0 */ + vc4_dlist_write(vc4_state, pitch0); + + /* Pitch word 1/2 */ + for (i = 1; i < num_planes; i++) { vc4_dlist_write(vc4_state, VC4_SET_FIELD(fb->pitches[i], SCALER_SRC_PITCH)); } @@ -755,7 +780,8 @@ vc4_update_plane(struct drm_plane *plane, int crtc_x, int crtc_y, unsigned int crtc_w, unsigned int crtc_h, uint32_t src_x, uint32_t src_y, - uint32_t src_w, uint32_t src_h) + uint32_t src_w, uint32_t src_h, + struct drm_modeset_acquire_ctx *ctx) { struct drm_plane_state *plane_state; struct vc4_plane_state *vc4_state; @@ -769,12 +795,6 @@ vc4_update_plane(struct drm_plane *plane, if (!plane_state) goto out; - /* If we're changing the cursor contents, do that in the - * normal vblank-synced atomic path. - */ - if (fb != plane_state->fb) - goto out; - /* No configuring new scaling in the fast path. */ if (crtc_w != plane_state->crtc_w || crtc_h != plane_state->crtc_h || @@ -783,6 +803,11 @@ vc4_update_plane(struct drm_plane *plane, goto out; } + if (fb != plane_state->fb) { + drm_atomic_set_fb_for_plane(plane->state, fb); + vc4_plane_async_set_fb(plane, fb); + } + /* Set the cursor's position on the screen. This is the * expected change from the drm_mode_cursor_universal() * helper. @@ -817,7 +842,8 @@ out: crtc_x, crtc_y, crtc_w, crtc_h, src_x, src_y, - src_w, src_h); + src_w, src_h, + ctx); } static const struct drm_plane_funcs vc4_plane_funcs = { @@ -842,10 +868,8 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev, vc4_plane = devm_kzalloc(dev->dev, sizeof(*vc4_plane), GFP_KERNEL); - if (!vc4_plane) { - ret = -ENOMEM; - goto fail; - } + if (!vc4_plane) + return ERR_PTR(-ENOMEM); for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) { /* Don't allow YUV in cursor planes, since that means @@ -858,7 +882,7 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev, } } plane = &vc4_plane->base; - ret = drm_universal_plane_init(dev, plane, 0xff, + ret = drm_universal_plane_init(dev, plane, 0, &vc4_plane_funcs, formats, num_formats, type, NULL); @@ -866,9 +890,4 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev, drm_plane_helper_add(plane, &vc4_plane_helper_funcs); return plane; -fail: - if (plane) - vc4_plane_destroy(plane); - - return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h index 39f6886b2410..d382c34c1b9e 100644 --- a/drivers/gpu/drm/vc4/vc4_regs.h +++ b/drivers/gpu/drm/vc4/vc4_regs.h @@ -190,6 +190,8 @@ # define PV_VCONTROL_ODD_DELAY_SHIFT 6 # define PV_VCONTROL_ODD_FIRST BIT(5) # define PV_VCONTROL_INTERLACE BIT(4) +# define PV_VCONTROL_DSI BIT(3) +# define PV_VCONTROL_COMMAND BIT(2) # define PV_VCONTROL_CONTINUOUS BIT(1) # define PV_VCONTROL_VIDEN BIT(0) @@ -244,6 +246,9 @@ # define SCALER_DISPCTRL_ENABLE BIT(31) # define SCALER_DISPCTRL_DSP2EISLUR BIT(15) # define SCALER_DISPCTRL_DSP1EISLUR BIT(14) +# define SCALER_DISPCTRL_DSP3_MUX_MASK VC4_MASK(19, 18) +# define SCALER_DISPCTRL_DSP3_MUX_SHIFT 18 + /* Enables Display 0 short line and underrun contribution to * SCALER_DISPSTAT_IRQDISP0. Note that short frame contributions are * always enabled. @@ -441,11 +446,62 @@ #define VC4_HDMI_HOTPLUG 0x00c # define VC4_HDMI_HOTPLUG_CONNECTED BIT(0) +/* 3 bits per field, where each field maps from that corresponding MAI + * bus channel to the given HDMI channel. + */ +#define VC4_HDMI_MAI_CHANNEL_MAP 0x090 + +#define VC4_HDMI_MAI_CONFIG 0x094 +# define VC4_HDMI_MAI_CONFIG_FORMAT_REVERSE BIT(27) +# define VC4_HDMI_MAI_CONFIG_BIT_REVERSE BIT(26) +# define VC4_HDMI_MAI_CHANNEL_MASK_MASK VC4_MASK(15, 0) +# define VC4_HDMI_MAI_CHANNEL_MASK_SHIFT 0 + +/* Last received format word on the MAI bus. */ +#define VC4_HDMI_MAI_FORMAT 0x098 + +#define VC4_HDMI_AUDIO_PACKET_CONFIG 0x09c +# define VC4_HDMI_AUDIO_PACKET_ZERO_DATA_ON_SAMPLE_FLAT BIT(29) +# define VC4_HDMI_AUDIO_PACKET_ZERO_DATA_ON_INACTIVE_CHANNELS BIT(24) +# define VC4_HDMI_AUDIO_PACKET_FORCE_SAMPLE_PRESENT BIT(19) +# define VC4_HDMI_AUDIO_PACKET_FORCE_B_FRAME BIT(18) +# define VC4_HDMI_AUDIO_PACKET_B_FRAME_IDENTIFIER_MASK VC4_MASK(13, 10) +# define VC4_HDMI_AUDIO_PACKET_B_FRAME_IDENTIFIER_SHIFT 10 +/* If set, then multichannel, otherwise 2 channel. */ +# define VC4_HDMI_AUDIO_PACKET_AUDIO_LAYOUT BIT(9) +/* If set, then AUDIO_LAYOUT overrides audio_cea_mask */ +# define VC4_HDMI_AUDIO_PACKET_FORCE_AUDIO_LAYOUT BIT(8) +# define VC4_HDMI_AUDIO_PACKET_CEA_MASK_MASK VC4_MASK(7, 0) +# define VC4_HDMI_AUDIO_PACKET_CEA_MASK_SHIFT 0 + #define VC4_HDMI_RAM_PACKET_CONFIG 0x0a0 # define VC4_HDMI_RAM_PACKET_ENABLE BIT(16) #define VC4_HDMI_RAM_PACKET_STATUS 0x0a4 +#define VC4_HDMI_CRP_CFG 0x0a8 +/* When set, the CTS_PERIOD counts based on MAI bus sync pulse instead + * of pixel clock. + */ +# define VC4_HDMI_CRP_USE_MAI_BUS_SYNC_FOR_CTS BIT(26) +/* When set, no CRP packets will be sent. */ +# define VC4_HDMI_CRP_CFG_DISABLE BIT(25) +/* If set, generates CTS values based on N, audio clock, and video + * clock. N must be divisible by 128. + */ +# define VC4_HDMI_CRP_CFG_EXTERNAL_CTS_EN BIT(24) +# define VC4_HDMI_CRP_CFG_N_MASK VC4_MASK(19, 0) +# define VC4_HDMI_CRP_CFG_N_SHIFT 0 + +/* 20-bit fields containing CTS values to be transmitted if !EXTERNAL_CTS_EN */ +#define VC4_HDMI_CTS_0 0x0ac +#define VC4_HDMI_CTS_1 0x0b0 +/* 20-bit fields containing number of clocks to send CTS0/1 before + * switching to the other one. + */ +#define VC4_HDMI_CTS_PERIOD_0 0x0b4 +#define VC4_HDMI_CTS_PERIOD_1 0x0b8 + #define VC4_HDMI_HORZA 0x0c4 # define VC4_HDMI_HORZA_VPOS BIT(14) # define VC4_HDMI_HORZA_HPOS BIT(13) @@ -507,7 +563,11 @@ #define VC4_HDMI_TX_PHY_RESET_CTL 0x2c0 -#define VC4_HDMI_GCP_0 0x400 +#define VC4_HDMI_TX_PHY_CTL0 0x2c4 +# define VC4_HDMI_TX_PHY_RNG_PWRDN BIT(25) + +#define VC4_HDMI_GCP(x) (0x400 + ((x) * 0x4)) +#define VC4_HDMI_RAM_PACKET(x) (0x400 + ((x) * 0x24)) #define VC4_HDMI_PACKET_STRIDE 0x24 #define VC4_HD_M_CTL 0x00c @@ -517,6 +577,56 @@ # define VC4_HD_M_ENABLE BIT(0) #define VC4_HD_MAI_CTL 0x014 +/* Set when audio stream is received at a slower rate than the + * sampling period, so MAI fifo goes empty. Write 1 to clear. + */ +# define VC4_HD_MAI_CTL_DLATE BIT(15) +# define VC4_HD_MAI_CTL_BUSY BIT(14) +# define VC4_HD_MAI_CTL_CHALIGN BIT(13) +# define VC4_HD_MAI_CTL_WHOLSMP BIT(12) +# define VC4_HD_MAI_CTL_FULL BIT(11) +# define VC4_HD_MAI_CTL_EMPTY BIT(10) +# define VC4_HD_MAI_CTL_FLUSH BIT(9) +/* If set, MAI bus generates SPDIF (bit 31) parity instead of passing + * through. + */ +# define VC4_HD_MAI_CTL_PAREN BIT(8) +# define VC4_HD_MAI_CTL_CHNUM_MASK VC4_MASK(7, 4) +# define VC4_HD_MAI_CTL_CHNUM_SHIFT 4 +# define VC4_HD_MAI_CTL_ENABLE BIT(3) +/* Underflow error status bit, write 1 to clear. */ +# define VC4_HD_MAI_CTL_ERRORE BIT(2) +/* Overflow error status bit, write 1 to clear. */ +# define VC4_HD_MAI_CTL_ERRORF BIT(1) +/* Single-shot reset bit. Read value is undefined. */ +# define VC4_HD_MAI_CTL_RESET BIT(0) + +#define VC4_HD_MAI_THR 0x018 +# define VC4_HD_MAI_THR_PANICHIGH_MASK VC4_MASK(29, 24) +# define VC4_HD_MAI_THR_PANICHIGH_SHIFT 24 +# define VC4_HD_MAI_THR_PANICLOW_MASK VC4_MASK(21, 16) +# define VC4_HD_MAI_THR_PANICLOW_SHIFT 16 +# define VC4_HD_MAI_THR_DREQHIGH_MASK VC4_MASK(13, 8) +# define VC4_HD_MAI_THR_DREQHIGH_SHIFT 8 +# define VC4_HD_MAI_THR_DREQLOW_MASK VC4_MASK(5, 0) +# define VC4_HD_MAI_THR_DREQLOW_SHIFT 0 + +/* Format header to be placed on the MAI data. Unused. */ +#define VC4_HD_MAI_FMT 0x01c + +/* Register for DMAing in audio data to be transported over the MAI + * bus to the Falcon core. + */ +#define VC4_HD_MAI_DATA 0x020 + +/* Divider from HDMI HSM clock to MAI serial clock. Sampling period + * converges to N / (M + 1) cycles. + */ +#define VC4_HD_MAI_SMP 0x02c +# define VC4_HD_MAI_SMP_N_MASK VC4_MASK(31, 8) +# define VC4_HD_MAI_SMP_N_SHIFT 8 +# define VC4_HD_MAI_SMP_M_MASK VC4_MASK(7, 0) +# define VC4_HD_MAI_SMP_M_SHIFT 0 #define VC4_HD_VID_CTL 0x038 # define VC4_HD_VID_CTL_ENABLE BIT(31) @@ -599,6 +709,13 @@ enum hvs_pixel_format { #define SCALER_CTL0_SIZE_MASK VC4_MASK(29, 24) #define SCALER_CTL0_SIZE_SHIFT 24 +#define SCALER_CTL0_TILING_MASK VC4_MASK(21, 20) +#define SCALER_CTL0_TILING_SHIFT 20 +#define SCALER_CTL0_TILING_LINEAR 0 +#define SCALER_CTL0_TILING_64B 1 +#define SCALER_CTL0_TILING_128B 2 +#define SCALER_CTL0_TILING_256B_OR_T 3 + #define SCALER_CTL0_HFLIP BIT(16) #define SCALER_CTL0_VFLIP BIT(15) @@ -728,7 +845,19 @@ enum hvs_pixel_format { #define SCALER_PPF_KERNEL_OFFSET_SHIFT 0 #define SCALER_PPF_KERNEL_UNCACHED BIT(31) +/* PITCH0/1/2 fields for raster. */ #define SCALER_SRC_PITCH_MASK VC4_MASK(15, 0) #define SCALER_SRC_PITCH_SHIFT 0 +/* PITCH0 fields for T-tiled. */ +#define SCALER_PITCH0_TILE_WIDTH_L_MASK VC4_MASK(22, 16) +#define SCALER_PITCH0_TILE_WIDTH_L_SHIFT 16 +#define SCALER_PITCH0_TILE_LINE_DIR BIT(15) +#define SCALER_PITCH0_TILE_INITIAL_LINE_DIR BIT(14) +/* Y offset within a tile. */ +#define SCALER_PITCH0_TILE_Y_OFFSET_MASK VC4_MASK(13, 7) +#define SCALER_PITCH0_TILE_Y_OFFSET_SHIFT 7 +#define SCALER_PITCH0_TILE_WIDTH_R_MASK VC4_MASK(6, 0) +#define SCALER_PITCH0_TILE_WIDTH_R_SHIFT 0 + #endif /* VC4_REGS_H */ diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c index 5cdd003605f5..5dc19429d4ae 100644 --- a/drivers/gpu/drm/vc4/vc4_render_cl.c +++ b/drivers/gpu/drm/vc4/vc4_render_cl.c @@ -24,6 +24,10 @@ /** * DOC: Render command list generation * + * In the V3D hardware, render command lists are what load and store + * tiles of a framebuffer and optionally call out to binner-generated + * command lists to do the 3D drawing for that tile. + * * In the VC4 driver, render command list generation is performed by the * kernel instead of userspace. We do this because validating a * user-submitted command list is hard to get right and has high CPU overhead, @@ -178,8 +182,7 @@ static void emit_tile(struct vc4_exec_info *exec, if (has_bin) { rcl_u8(setup, VC4_PACKET_BRANCH_TO_SUB_LIST); - rcl_u32(setup, (exec->tile_bo->paddr + - exec->tile_alloc_offset + + rcl_u32(setup, (exec->tile_alloc_offset + (y * exec->bin_tiles_x + x) * 32)); } diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c index 7cc346ad9b0b..8c723da71f66 100644 --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c @@ -16,8 +16,9 @@ * this program. If not, see <http://www.gnu.org/licenses/>. */ -#include "linux/component.h" -#include "linux/pm_runtime.h" +#include <linux/clk.h> +#include <linux/component.h> +#include <linux/pm_runtime.h> #include "vc4_drv.h" #include "vc4_regs.h" @@ -156,6 +157,144 @@ static void vc4_v3d_init_hw(struct drm_device *dev) V3D_WRITE(V3D_VPMBASE, 0); } +int vc4_v3d_get_bin_slot(struct vc4_dev *vc4) +{ + struct drm_device *dev = vc4->dev; + unsigned long irqflags; + int slot; + uint64_t seqno = 0; + struct vc4_exec_info *exec; + +try_again: + spin_lock_irqsave(&vc4->job_lock, irqflags); + slot = ffs(~vc4->bin_alloc_used); + if (slot != 0) { + /* Switch from ffs() bit index to a 0-based index. */ + slot--; + vc4->bin_alloc_used |= BIT(slot); + spin_unlock_irqrestore(&vc4->job_lock, irqflags); + return slot; + } + + /* Couldn't find an open slot. Wait for render to complete + * and try again. + */ + exec = vc4_last_render_job(vc4); + if (exec) + seqno = exec->seqno; + spin_unlock_irqrestore(&vc4->job_lock, irqflags); + + if (seqno) { + int ret = vc4_wait_for_seqno(dev, seqno, ~0ull, true); + + if (ret == 0) + goto try_again; + + return ret; + } + + return -ENOMEM; +} + +/** + * vc4_allocate_bin_bo() - allocates the memory that will be used for + * tile binning. + * + * The binner has a limitation that the addresses in the tile state + * buffer that point into the tile alloc buffer or binner overflow + * memory only have 28 bits (256MB), and the top 4 on the bus for + * tile alloc references end up coming from the tile state buffer's + * address. + * + * To work around this, we allocate a single large buffer while V3D is + * in use, make sure that it has the top 4 bits constant across its + * entire extent, and then put the tile state, tile alloc, and binner + * overflow memory inside that buffer. + * + * This creates a limitation where we may not be able to execute a job + * if it doesn't fit within the buffer that we allocated up front. + * However, it turns out that 16MB is "enough for anybody", and + * real-world applications run into allocation failures from the + * overall CMA pool before they make scenes complicated enough to run + * out of bin space. + */ +int +vc4_allocate_bin_bo(struct drm_device *drm) +{ + struct vc4_dev *vc4 = to_vc4_dev(drm); + struct vc4_v3d *v3d = vc4->v3d; + uint32_t size = 16 * 1024 * 1024; + int ret = 0; + struct list_head list; + + /* We may need to try allocating more than once to get a BO + * that doesn't cross 256MB. Track the ones we've allocated + * that failed so far, so that we can free them when we've got + * one that succeeded (if we freed them right away, our next + * allocation would probably be the same chunk of memory). + */ + INIT_LIST_HEAD(&list); + + while (true) { + struct vc4_bo *bo = vc4_bo_create(drm, size, true); + + if (IS_ERR(bo)) { + ret = PTR_ERR(bo); + + dev_err(&v3d->pdev->dev, + "Failed to allocate memory for tile binning: " + "%d. You may need to enable CMA or give it " + "more memory.", + ret); + break; + } + + /* Check if this BO won't trigger the addressing bug. */ + if ((bo->base.paddr & 0xf0000000) == + ((bo->base.paddr + bo->base.base.size - 1) & 0xf0000000)) { + vc4->bin_bo = bo; + + /* Set up for allocating 512KB chunks of + * binner memory. The biggest allocation we + * need to do is for the initial tile alloc + + * tile state buffer. We can render to a + * maximum of ((2048*2048) / (32*32) = 4096 + * tiles in a frame (until we do floating + * point rendering, at which point it would be + * 8192). Tile state is 48b/tile (rounded to + * a page), and tile alloc is 32b/tile + * (rounded to a page), plus a page of extra, + * for a total of 320kb for our worst-case. + * We choose 512kb so that it divides evenly + * into our 16MB, and the rest of the 512kb + * will be used as storage for the overflow + * from the initial 32b CL per bin. + */ + vc4->bin_alloc_size = 512 * 1024; + vc4->bin_alloc_used = 0; + vc4->bin_alloc_overflow = 0; + WARN_ON_ONCE(sizeof(vc4->bin_alloc_used) * 8 != + bo->base.base.size / vc4->bin_alloc_size); + + break; + } + + /* Put it on the list to free later, and try again. */ + list_add(&bo->unref_head, &list); + } + + /* Free all the BOs we allocated but didn't choose. */ + while (!list_empty(&list)) { + struct vc4_bo *bo = list_last_entry(&list, + struct vc4_bo, unref_head); + + list_del(&bo->unref_head); + drm_gem_object_put_unlocked(&bo->base.base); + } + + return ret; +} + #ifdef CONFIG_PM static int vc4_v3d_runtime_suspend(struct device *dev) { @@ -164,6 +303,11 @@ static int vc4_v3d_runtime_suspend(struct device *dev) vc4_irq_uninstall(vc4->dev); + drm_gem_object_put_unlocked(&vc4->bin_bo->base.base); + vc4->bin_bo = NULL; + + clk_disable_unprepare(v3d->clk); + return 0; } @@ -171,6 +315,15 @@ static int vc4_v3d_runtime_resume(struct device *dev) { struct vc4_v3d *v3d = dev_get_drvdata(dev); struct vc4_dev *vc4 = v3d->vc4; + int ret; + + ret = vc4_allocate_bin_bo(vc4->dev); + if (ret) + return ret; + + ret = clk_prepare_enable(v3d->clk); + if (ret != 0) + return ret; vc4_v3d_init_hw(vc4->dev); vc4_irq_postinstall(vc4->dev); @@ -202,12 +355,38 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data) vc4->v3d = v3d; v3d->vc4 = vc4; + v3d->clk = devm_clk_get(dev, NULL); + if (IS_ERR(v3d->clk)) { + int ret = PTR_ERR(v3d->clk); + + if (ret == -ENOENT) { + /* bcm2835 didn't have a clock reference in the DT. */ + ret = 0; + v3d->clk = NULL; + } else { + if (ret != -EPROBE_DEFER) + dev_err(dev, "Failed to get V3D clock: %d\n", + ret); + return ret; + } + } + if (V3D_READ(V3D_IDENT0) != V3D_EXPECTED_IDENT0) { DRM_ERROR("V3D_IDENT0 read 0x%08x instead of 0x%08x\n", V3D_READ(V3D_IDENT0), V3D_EXPECTED_IDENT0); return -EINVAL; } + ret = clk_prepare_enable(v3d->clk); + if (ret != 0) + return ret; + + ret = vc4_allocate_bin_bo(drm); + if (ret) { + clk_disable_unprepare(v3d->clk); + return ret; + } + /* Reset the binner overflow address/size at setup, to be sure * we don't reuse an old one. */ @@ -222,6 +401,7 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data) return ret; } + pm_runtime_set_active(dev); pm_runtime_use_autosuspend(dev); pm_runtime_set_autosuspend_delay(dev, 40); /* a little over 2 frames. */ pm_runtime_enable(dev); @@ -271,6 +451,7 @@ static int vc4_v3d_dev_remove(struct platform_device *pdev) static const struct of_device_id vc4_v3d_dt_match[] = { { .compatible = "brcm,bcm2835-v3d" }, + { .compatible = "brcm,cygnus-v3d" }, { .compatible = "brcm,vc4-v3d" }, {} }; diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c index 9fd171c361c2..814b512c6b9a 100644 --- a/drivers/gpu/drm/vc4/vc4_validate.c +++ b/drivers/gpu/drm/vc4/vc4_validate.c @@ -22,21 +22,25 @@ */ /** - * Command list validator for VC4. + * DOC: Command list validator for VC4. * - * The VC4 has no IOMMU between it and system memory. So, a user with - * access to execute command lists could escalate privilege by + * Since the VC4 has no IOMMU between it and system memory, a user + * with access to execute command lists could escalate privilege by * overwriting system memory (drawing to it as a framebuffer) or - * reading system memory it shouldn't (reading it as a texture, or - * uniform data, or vertex data). + * reading system memory it shouldn't (reading it as a vertex buffer + * or index buffer) * - * This validates command lists to ensure that all accesses are within - * the bounds of the GEM objects referenced. It explicitly whitelists - * packets, and looks at the offsets in any address fields to make - * sure they're constrained within the BOs they reference. + * We validate binner command lists to ensure that all accesses are + * within the bounds of the GEM objects referenced by the submitted + * job. It explicitly whitelists packets, and looks at the offsets in + * any address fields to make sure they're contained within the BOs + * they reference. * - * Note that because of the validation that's happening anyway, this - * is where GEM relocation processing happens. + * Note that because CL validation is already reading the + * user-submitted CL and writing the validated copy out to the memory + * that the GPU will actually read, this is also where GEM relocation + * processing (turning BO references into actual addresses for the GPU + * to use) happens. */ #include "uapi/drm/vc4_drm.h" @@ -84,8 +88,12 @@ utile_height(int cpp) } /** - * The texture unit decides what tiling format a particular miplevel is using - * this function, so we lay out our miptrees accordingly. + * size_is_lt() - Returns whether a miplevel of the given size will + * use the lineartile (LT) tiling layout rather than the normal T + * tiling layout. + * @width: Width in pixels of the miplevel + * @height: Height in pixels of the miplevel + * @cpp: Bytes per pixel of the pixel format */ static bool size_is_lt(uint32_t width, uint32_t height, int cpp) @@ -164,7 +172,8 @@ vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo, * our math. */ if (width > 4096 || height > 4096) { - DRM_ERROR("Surface dimesions (%d,%d) too large", width, height); + DRM_ERROR("Surface dimensions (%d,%d) too large", + width, height); return false; } @@ -340,10 +349,11 @@ static int validate_tile_binning_config(VALIDATE_ARGS) { struct drm_device *dev = exec->exec_bo->base.dev; - struct vc4_bo *tile_bo; + struct vc4_dev *vc4 = to_vc4_dev(dev); uint8_t flags; - uint32_t tile_state_size, tile_alloc_size; - uint32_t tile_count; + uint32_t tile_state_size; + uint32_t tile_count, bin_addr; + int bin_slot; if (exec->found_tile_binning_mode_config_packet) { DRM_ERROR("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n"); @@ -369,13 +379,28 @@ validate_tile_binning_config(VALIDATE_ARGS) return -EINVAL; } + bin_slot = vc4_v3d_get_bin_slot(vc4); + if (bin_slot < 0) { + if (bin_slot != -EINTR && bin_slot != -ERESTARTSYS) { + DRM_ERROR("Failed to allocate binner memory: %d\n", + bin_slot); + } + return bin_slot; + } + + /* The slot we allocated will only be used by this job, and is + * free when the job completes rendering. + */ + exec->bin_slots |= BIT(bin_slot); + bin_addr = vc4->bin_bo->base.paddr + bin_slot * vc4->bin_alloc_size; + /* The tile state data array is 48 bytes per tile, and we put it at * the start of a BO containing both it and the tile alloc. */ tile_state_size = 48 * tile_count; /* Since the tile alloc array will follow us, align. */ - exec->tile_alloc_offset = roundup(tile_state_size, 4096); + exec->tile_alloc_offset = bin_addr + roundup(tile_state_size, 4096); *(uint8_t *)(validated + 14) = ((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK | @@ -386,35 +411,13 @@ validate_tile_binning_config(VALIDATE_ARGS) VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128, VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE)); - /* Initial block size. */ - tile_alloc_size = 32 * tile_count; - - /* - * The initial allocation gets rounded to the next 256 bytes before - * the hardware starts fulfilling further allocations. - */ - tile_alloc_size = roundup(tile_alloc_size, 256); - - /* Add space for the extra allocations. This is what gets used first, - * before overflow memory. It must have at least 4096 bytes, but we - * want to avoid overflow memory usage if possible. - */ - tile_alloc_size += 1024 * 1024; - - tile_bo = vc4_bo_create(dev, exec->tile_alloc_offset + tile_alloc_size, - true); - exec->tile_bo = &tile_bo->base; - if (IS_ERR(exec->tile_bo)) - return PTR_ERR(exec->tile_bo); - list_add_tail(&tile_bo->unref_head, &exec->unref_list); - /* tile alloc address. */ - *(uint32_t *)(validated + 0) = (exec->tile_bo->paddr + - exec->tile_alloc_offset); + *(uint32_t *)(validated + 0) = exec->tile_alloc_offset; /* tile alloc size. */ - *(uint32_t *)(validated + 4) = tile_alloc_size; + *(uint32_t *)(validated + 4) = (bin_addr + vc4->bin_alloc_size - + exec->tile_alloc_offset); /* tile state address. */ - *(uint32_t *)(validated + 8) = exec->tile_bo->paddr; + *(uint32_t *)(validated + 8) = bin_addr; return 0; } diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c index 5dba13dd1e9b..0b2df5c6efb4 100644 --- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c +++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c @@ -24,16 +24,21 @@ /** * DOC: Shader validator for VC4. * - * The VC4 has no IOMMU between it and system memory, so a user with - * access to execute shaders could escalate privilege by overwriting - * system memory (using the VPM write address register in the - * general-purpose DMA mode) or reading system memory it shouldn't - * (reading it as a texture, or uniform data, or vertex data). + * Since the VC4 has no IOMMU between it and system memory, a user + * with access to execute shaders could escalate privilege by + * overwriting system memory (using the VPM write address register in + * the general-purpose DMA mode) or reading system memory it shouldn't + * (reading it as a texture, uniform data, or direct-addressed TMU + * lookup). * - * This walks over a shader BO, ensuring that its accesses are - * appropriately bounded, and recording how many texture accesses are - * made and where so that we can do relocations for them in the + * The shader validator walks over a shader's BO, ensuring that its + * accesses are appropriately bounded, and recording where texture + * accesses are made so that we can do relocations for them in the * uniform stream. + * + * Shader BO are immutable for their lifetimes (enforced by not + * allowing mmaps, GEM prime export, or rendering to from a CL), so + * this validation is only performed at BO creation time. */ #include "vc4_drv.h" diff --git a/drivers/gpu/drm/vc4/vc4_vec.c b/drivers/gpu/drm/vc4/vc4_vec.c index 32bb8ef985fb..09c1e05765fa 100644 --- a/drivers/gpu/drm/vc4/vc4_vec.c +++ b/drivers/gpu/drm/vc4/vc4_vec.c @@ -16,6 +16,12 @@ /** * DOC: VC4 SDTV module + * + * The VEC encoder generates PAL or NTSC composite video output. + * + * TV mode selection is done by an atomic property on the encoder, + * because a drm_mode_modeinfo is insufficient to distinguish between + * PAL and PAL-M or NTSC and NTSC-J. */ #include <drm/drm_atomic_helper.h> |