From 6b45e0f24c7b5316dea7ed5995e1b809a3266b43 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 1 Feb 2017 12:48:42 -0700 Subject: fpga zynq: Check for errors after completing DMA The completion did not check the interrupt status to see if any error bits were asserted, check error bits and dump some registers if things went wrong. A few fixes are needed to make this work, the IXR_ERROR_FLAGS_MASK was wrong, it included the done bits, which shows a bug in mask/unmask_irqs which were using the wrong bits, simplify all of this stuff. Signed-off-by: Jason Gunthorpe Reviewed-by: Moritz Fischer Acked-by: Alan Tull Signed-off-by: Greg Kroah-Hartman --- drivers/fpga/zynq-fpga.c | 54 ++++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 22 deletions(-) (limited to 'drivers/fpga/zynq-fpga.c') diff --git a/drivers/fpga/zynq-fpga.c b/drivers/fpga/zynq-fpga.c index 1812bf7614e1..f674e32832ec 100644 --- a/drivers/fpga/zynq-fpga.c +++ b/drivers/fpga/zynq-fpga.c @@ -89,7 +89,7 @@ #define IXR_D_P_DONE_MASK BIT(12) /* FPGA programmed */ #define IXR_PCFG_DONE_MASK BIT(2) -#define IXR_ERROR_FLAGS_MASK 0x00F0F860 +#define IXR_ERROR_FLAGS_MASK 0x00F0C860 #define IXR_ALL_MASK 0xF8F7F87F /* Miscellaneous constant values */ @@ -143,23 +143,10 @@ static inline u32 zynq_fpga_read(const struct zynq_fpga_priv *priv, readl_poll_timeout(priv->io_base + addr, val, cond, sleep_us, \ timeout_us) -static void zynq_fpga_mask_irqs(struct zynq_fpga_priv *priv) +/* Cause the specified irq mask bits to generate IRQs */ +static inline void zynq_fpga_set_irq(struct zynq_fpga_priv *priv, u32 enable) { - u32 intr_mask; - - intr_mask = zynq_fpga_read(priv, INT_MASK_OFFSET); - zynq_fpga_write(priv, INT_MASK_OFFSET, - intr_mask | IXR_DMA_DONE_MASK | IXR_ERROR_FLAGS_MASK); -} - -static void zynq_fpga_unmask_irqs(struct zynq_fpga_priv *priv) -{ - u32 intr_mask; - - intr_mask = zynq_fpga_read(priv, INT_MASK_OFFSET); - zynq_fpga_write(priv, INT_MASK_OFFSET, - intr_mask - & ~(IXR_D_P_DONE_MASK | IXR_ERROR_FLAGS_MASK)); + zynq_fpga_write(priv, INT_MASK_OFFSET, ~enable); } static irqreturn_t zynq_fpga_isr(int irq, void *data) @@ -167,7 +154,7 @@ static irqreturn_t zynq_fpga_isr(int irq, void *data) struct zynq_fpga_priv *priv = data; /* disable DMA and error IRQs */ - zynq_fpga_mask_irqs(priv); + zynq_fpga_set_irq(priv, 0); complete(&priv->dma_done); @@ -285,6 +272,7 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr, const char *buf, size_t count) { struct zynq_fpga_priv *priv; + const char *why; int err; char *kbuf; size_t in_count; @@ -312,7 +300,7 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr, reinit_completion(&priv->dma_done); /* enable DMA and error IRQs */ - zynq_fpga_unmask_irqs(priv); + zynq_fpga_set_irq(priv, IXR_D_P_DONE_MASK | IXR_ERROR_FLAGS_MASK); /* the +1 in the src addr is used to hold off on DMA_DONE IRQ * until both AXI and PCAP are done ... @@ -331,11 +319,33 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr, intr_status = zynq_fpga_read(priv, INT_STS_OFFSET); zynq_fpga_write(priv, INT_STS_OFFSET, intr_status); + if (intr_status & IXR_ERROR_FLAGS_MASK) { + why = "DMA reported error"; + err = -EIO; + goto out_report; + } + if (!((intr_status & IXR_D_P_DONE_MASK) == IXR_D_P_DONE_MASK)) { - dev_err(&mgr->dev, "Error configuring FPGA\n"); - err = -EFAULT; + why = "DMA did not complete"; + err = -EIO; + goto out_report; } + err = 0; + goto out_clk; + +out_report: + dev_err(&mgr->dev, + "%s: INT_STS:0x%x CTRL:0x%x LOCK:0x%x INT_MASK:0x%x STATUS:0x%x MCTRL:0x%x\n", + why, + intr_status, + zynq_fpga_read(priv, CTRL_OFFSET), + zynq_fpga_read(priv, LOCK_OFFSET), + zynq_fpga_read(priv, INT_MASK_OFFSET), + zynq_fpga_read(priv, STATUS_OFFSET), + zynq_fpga_read(priv, MCTRL_OFFSET)); + +out_clk: clk_disable(priv->clk); out_free: @@ -452,7 +462,7 @@ static int zynq_fpga_probe(struct platform_device *pdev) /* unlock the device */ zynq_fpga_write(priv, UNLOCK_OFFSET, UNLOCK_MASK); - zynq_fpga_write(priv, INT_MASK_OFFSET, 0xFFFFFFFF); + zynq_fpga_set_irq(priv, 0); zynq_fpga_write(priv, INT_STS_OFFSET, IXR_ALL_MASK); err = devm_request_irq(dev, priv->irq, zynq_fpga_isr, 0, dev_name(dev), priv); -- cgit From b496df86ac1bbe393a55ddbfed35d46e74ef9767 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 1 Feb 2017 12:48:43 -0700 Subject: fpga zynq: Check the bitstream for validity There is no sense in sending a bitstream we know will not work, and with the variety of options for bitstream generation in Xilinx tools it is not terribly clear what the correct input should be. This is particularly important for Zynq since auto-correction was removed from the driver and the Zynq hardware only accepts a bitstream format that is different from what the Xilinx tools typically produce. Worse, the hardware provides no indication why the bitstream fails, it simply times out if the input is wrong. The best option here is to have the kernel print a message informing the user they are using a malformed bistream and programming failure isn't for any of the myriad of other reasons. Signed-off-by: Jason Gunthorpe Acked-by: Moritz Fischer Acked-by: Alan Tull Signed-off-by: Greg Kroah-Hartman --- drivers/fpga/zynq-fpga.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'drivers/fpga/zynq-fpga.c') diff --git a/drivers/fpga/zynq-fpga.c b/drivers/fpga/zynq-fpga.c index f674e32832ec..c3fc2a231e28 100644 --- a/drivers/fpga/zynq-fpga.c +++ b/drivers/fpga/zynq-fpga.c @@ -161,6 +161,19 @@ static irqreturn_t zynq_fpga_isr(int irq, void *data) return IRQ_HANDLED; } +/* Sanity check the proposed bitstream. It must start with the sync word in + * the correct byte order, and be dword aligned. The input is a Xilinx .bin + * file with every 32 bit quantity swapped. + */ +static bool zynq_fpga_has_sync(const u8 *buf, size_t count) +{ + for (; count >= 4; buf += 4, count -= 4) + if (buf[0] == 0x66 && buf[1] == 0x55 && buf[2] == 0x99 && + buf[3] == 0xaa) + return true; + return false; +} + static int zynq_fpga_ops_write_init(struct fpga_manager *mgr, struct fpga_image_info *info, const char *buf, size_t count) @@ -177,6 +190,13 @@ static int zynq_fpga_ops_write_init(struct fpga_manager *mgr, /* don't globally reset PL if we're doing partial reconfig */ if (!(info->flags & FPGA_MGR_PARTIAL_RECONFIG)) { + if (!zynq_fpga_has_sync(buf, count)) { + dev_err(&mgr->dev, + "Invalid bitstream, could not find a sync word. Bitstream must be a byte swapped .bin file\n"); + err = -EINVAL; + goto out_err; + } + /* assert AXI interface resets */ regmap_write(priv->slcr, SLCR_FPGA_RST_CTRL_OFFSET, FPGA_RST_ALL_MASK); @@ -410,6 +430,7 @@ static enum fpga_mgr_states zynq_fpga_ops_state(struct fpga_manager *mgr) } static const struct fpga_manager_ops zynq_fpga_ops = { + .initial_header_size = 128, .state = zynq_fpga_ops_state, .write_init = zynq_fpga_ops_write_init, .write = zynq_fpga_ops_write, -- cgit From 425902f5c8e3034b2d9d7a714289d8d579c733b2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 1 Feb 2017 12:48:45 -0700 Subject: fpga zynq: Use the scatterlist interface This allows the driver to avoid a high order coherent DMA allocation and memory copy. With this patch it can DMA directly from the kernel pages that the bitfile is stored in. Since this is now a gather DMA operation the driver uses the ISR to feed the chips DMA queue with each entry from the SGL. Signed-off-by: Jason Gunthorpe Acked-by: Moritz Fischer Signed-off-by: Greg Kroah-Hartman --- drivers/fpga/zynq-fpga.c | 174 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 135 insertions(+), 39 deletions(-) (limited to 'drivers/fpga/zynq-fpga.c') diff --git a/drivers/fpga/zynq-fpga.c b/drivers/fpga/zynq-fpga.c index c3fc2a231e28..34cb98139442 100644 --- a/drivers/fpga/zynq-fpga.c +++ b/drivers/fpga/zynq-fpga.c @@ -30,6 +30,7 @@ #include #include #include +#include /* Offsets into SLCR regmap */ @@ -80,6 +81,7 @@ /* FPGA init status */ #define STATUS_DMA_Q_F BIT(31) +#define STATUS_DMA_Q_E BIT(30) #define STATUS_PCFG_INIT_MASK BIT(4) /* Interrupt Status/Mask Register Bit definitions */ @@ -98,12 +100,16 @@ #define DMA_INVALID_ADDRESS GENMASK(31, 0) /* Used to unlock the dev */ #define UNLOCK_MASK 0x757bdf0d -/* Timeout for DMA to complete */ -#define DMA_DONE_TIMEOUT msecs_to_jiffies(1000) /* Timeout for polling reset bits */ #define INIT_POLL_TIMEOUT 2500000 /* Delay for polling reset bits */ #define INIT_POLL_DELAY 20 +/* Signal this is the last DMA transfer, wait for the AXI and PCAP before + * interrupting + */ +#define DMA_SRC_LAST_TRANSFER 1 +/* Timeout for DMA completion */ +#define DMA_TIMEOUT_MS 5000 /* Masks for controlling stuff in SLCR */ /* Disable all Level shifters */ @@ -124,6 +130,11 @@ struct zynq_fpga_priv { void __iomem *io_base; struct regmap *slcr; + spinlock_t dma_lock; + unsigned int dma_elm; + unsigned int dma_nelms; + struct scatterlist *cur_sg; + struct completion dma_done; }; @@ -149,13 +160,80 @@ static inline void zynq_fpga_set_irq(struct zynq_fpga_priv *priv, u32 enable) zynq_fpga_write(priv, INT_MASK_OFFSET, ~enable); } +/* Must be called with dma_lock held */ +static void zynq_step_dma(struct zynq_fpga_priv *priv) +{ + u32 addr; + u32 len; + bool first; + + first = priv->dma_elm == 0; + while (priv->cur_sg) { + /* Feed the DMA queue until it is full. */ + if (zynq_fpga_read(priv, STATUS_OFFSET) & STATUS_DMA_Q_F) + break; + + addr = sg_dma_address(priv->cur_sg); + len = sg_dma_len(priv->cur_sg); + if (priv->dma_elm + 1 == priv->dma_nelms) { + /* The last transfer waits for the PCAP to finish too, + * notice this also changes the irq_mask to ignore + * IXR_DMA_DONE_MASK which ensures we do not trigger + * the completion too early. + */ + addr |= DMA_SRC_LAST_TRANSFER; + priv->cur_sg = NULL; + } else { + priv->cur_sg = sg_next(priv->cur_sg); + priv->dma_elm++; + } + + zynq_fpga_write(priv, DMA_SRC_ADDR_OFFSET, addr); + zynq_fpga_write(priv, DMA_DST_ADDR_OFFSET, DMA_INVALID_ADDRESS); + zynq_fpga_write(priv, DMA_SRC_LEN_OFFSET, len / 4); + zynq_fpga_write(priv, DMA_DEST_LEN_OFFSET, 0); + } + + /* Once the first transfer is queued we can turn on the ISR, future + * calls to zynq_step_dma will happen from the ISR context. The + * dma_lock spinlock guarentees this handover is done coherently, the + * ISR enable is put at the end to avoid another CPU spinning in the + * ISR on this lock. + */ + if (first && priv->cur_sg) { + zynq_fpga_set_irq(priv, + IXR_DMA_DONE_MASK | IXR_ERROR_FLAGS_MASK); + } else if (!priv->cur_sg) { + /* The last transfer changes to DMA & PCAP mode since we do + * not want to continue until everything has been flushed into + * the PCAP. + */ + zynq_fpga_set_irq(priv, + IXR_D_P_DONE_MASK | IXR_ERROR_FLAGS_MASK); + } +} + static irqreturn_t zynq_fpga_isr(int irq, void *data) { struct zynq_fpga_priv *priv = data; + u32 intr_status; - /* disable DMA and error IRQs */ - zynq_fpga_set_irq(priv, 0); + /* If anything other than DMA completion is reported stop and hand + * control back to zynq_fpga_ops_write, something went wrong, + * otherwise progress the DMA. + */ + spin_lock(&priv->dma_lock); + intr_status = zynq_fpga_read(priv, INT_STS_OFFSET); + if (!(intr_status & IXR_ERROR_FLAGS_MASK) && + (intr_status & IXR_DMA_DONE_MASK) && priv->cur_sg) { + zynq_fpga_write(priv, INT_STS_OFFSET, IXR_DMA_DONE_MASK); + zynq_step_dma(priv); + spin_unlock(&priv->dma_lock); + return IRQ_HANDLED; + } + spin_unlock(&priv->dma_lock); + zynq_fpga_set_irq(priv, 0); complete(&priv->dma_done); return IRQ_HANDLED; @@ -266,10 +344,11 @@ static int zynq_fpga_ops_write_init(struct fpga_manager *mgr, zynq_fpga_write(priv, CTRL_OFFSET, (CTRL_PCAP_PR_MASK | CTRL_PCAP_MODE_MASK | ctrl)); - /* check that we have room in the command queue */ + /* We expect that the command queue is empty right now. */ status = zynq_fpga_read(priv, STATUS_OFFSET); - if (status & STATUS_DMA_Q_F) { - dev_err(&mgr->dev, "DMA command queue full\n"); + if ((status & STATUS_DMA_Q_F) || + (status & STATUS_DMA_Q_E) != STATUS_DMA_Q_E) { + dev_err(&mgr->dev, "DMA command queue not right\n"); err = -EBUSY; goto out_err; } @@ -288,27 +367,36 @@ out_err: return err; } -static int zynq_fpga_ops_write(struct fpga_manager *mgr, - const char *buf, size_t count) +static int zynq_fpga_ops_write(struct fpga_manager *mgr, struct sg_table *sgt) { struct zynq_fpga_priv *priv; const char *why; int err; - char *kbuf; - size_t in_count; - dma_addr_t dma_addr; - u32 transfer_length; u32 intr_status; + unsigned long timeout; + unsigned long flags; + struct scatterlist *sg; + int i; - in_count = count; priv = mgr->priv; - kbuf = - dma_alloc_coherent(mgr->dev.parent, count, &dma_addr, GFP_KERNEL); - if (!kbuf) - return -ENOMEM; + /* The hardware can only DMA multiples of 4 bytes, and it requires the + * starting addresses to be aligned to 64 bits (UG585 pg 212). + */ + for_each_sg(sgt->sgl, sg, sgt->nents, i) { + if ((sg->offset % 8) || (sg->length % 4)) { + dev_err(&mgr->dev, + "Invalid bitstream, chunks must be aligned\n"); + return -EINVAL; + } + } - memcpy(kbuf, buf, count); + priv->dma_nelms = + dma_map_sg(mgr->dev.parent, sgt->sgl, sgt->nents, DMA_TO_DEVICE); + if (priv->dma_nelms == 0) { + dev_err(&mgr->dev, "Unable to DMA map (TO_DEVICE)\n"); + return -ENOMEM; + } /* enable clock */ err = clk_enable(priv->clk); @@ -316,28 +404,31 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr, goto out_free; zynq_fpga_write(priv, INT_STS_OFFSET, IXR_ALL_MASK); - reinit_completion(&priv->dma_done); - /* enable DMA and error IRQs */ - zynq_fpga_set_irq(priv, IXR_D_P_DONE_MASK | IXR_ERROR_FLAGS_MASK); - - /* the +1 in the src addr is used to hold off on DMA_DONE IRQ - * until both AXI and PCAP are done ... - */ - zynq_fpga_write(priv, DMA_SRC_ADDR_OFFSET, (u32)(dma_addr) + 1); - zynq_fpga_write(priv, DMA_DST_ADDR_OFFSET, (u32)DMA_INVALID_ADDRESS); - - /* convert #bytes to #words */ - transfer_length = (count + 3) / 4; + /* zynq_step_dma will turn on interrupts */ + spin_lock_irqsave(&priv->dma_lock, flags); + priv->dma_elm = 0; + priv->cur_sg = sgt->sgl; + zynq_step_dma(priv); + spin_unlock_irqrestore(&priv->dma_lock, flags); - zynq_fpga_write(priv, DMA_SRC_LEN_OFFSET, transfer_length); - zynq_fpga_write(priv, DMA_DEST_LEN_OFFSET, 0); + timeout = wait_for_completion_timeout(&priv->dma_done, + msecs_to_jiffies(DMA_TIMEOUT_MS)); - wait_for_completion(&priv->dma_done); + spin_lock_irqsave(&priv->dma_lock, flags); + zynq_fpga_set_irq(priv, 0); + priv->cur_sg = NULL; + spin_unlock_irqrestore(&priv->dma_lock, flags); intr_status = zynq_fpga_read(priv, INT_STS_OFFSET); - zynq_fpga_write(priv, INT_STS_OFFSET, intr_status); + zynq_fpga_write(priv, INT_STS_OFFSET, IXR_ALL_MASK); + + /* There doesn't seem to be a way to force cancel any DMA, so if + * something went wrong we are relying on the hardware to have halted + * the DMA before we get here, if there was we could use + * wait_for_completion_interruptible too. + */ if (intr_status & IXR_ERROR_FLAGS_MASK) { why = "DMA reported error"; @@ -345,8 +436,12 @@ static int zynq_fpga_ops_write(struct fpga_manager *mgr, goto out_report; } - if (!((intr_status & IXR_D_P_DONE_MASK) == IXR_D_P_DONE_MASK)) { - why = "DMA did not complete"; + if (priv->cur_sg || + !((intr_status & IXR_D_P_DONE_MASK) == IXR_D_P_DONE_MASK)) { + if (timeout == 0) + why = "DMA timed out"; + else + why = "DMA did not complete"; err = -EIO; goto out_report; } @@ -369,7 +464,7 @@ out_clk: clk_disable(priv->clk); out_free: - dma_free_coherent(mgr->dev.parent, count, kbuf, dma_addr); + dma_unmap_sg(mgr->dev.parent, sgt->sgl, sgt->nents, DMA_TO_DEVICE); return err; } @@ -433,7 +528,7 @@ static const struct fpga_manager_ops zynq_fpga_ops = { .initial_header_size = 128, .state = zynq_fpga_ops_state, .write_init = zynq_fpga_ops_write_init, - .write = zynq_fpga_ops_write, + .write_sg = zynq_fpga_ops_write, .write_complete = zynq_fpga_ops_write_complete, }; @@ -447,6 +542,7 @@ static int zynq_fpga_probe(struct platform_device *pdev) priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; + spin_lock_init(&priv->dma_lock); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); priv->io_base = devm_ioremap_resource(dev, res); -- cgit