aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Vetter <[email protected]>2023-04-06 14:37:14 +0200
committerDaniel Vetter <[email protected]>2023-04-06 14:37:15 +0200
commit52b113e968be66b57f792b2e2a9b8b77f382bd5f (patch)
treeb0d29f82fe76a7078422fcd3326d2591718af3b9
parentf86286569e92a260fbf8a1975f9421b4a66581d8 (diff)
parente44f18c6ff8beef7b2b10592287f0a9766376d9b (diff)
Merge tag 'drm-misc-next-2023-04-06' of git://anongit.freedesktop.org/drm/drm-misc into drm-next
drm-misc-next for v6.4-rc1: UAPI Changes: Cross-subsystem Changes: - Document port and rotation dt bindings better. - For panel timing DT bindings, document that vsync and hsync are first, rather than last in image. - Fix video/aperture typos. Core Changes: - Reject prime DMA-Buf attachment if get_sg_table is missing. (For self-importing dma-buf only.) - Add prime import/export to vram-helper. - Fix oops in drm/vblank when init is not called. - Fixup xres/yres_virtual and other fixes in fb helper. - Improve SCDC debugs. - Skip setting deadline on modesets. - Assorted TTM fixes. Driver Changes: - Add lima usage stats. - Assorted fixes to bridge/lt8192b, tc358767, ivpu, bridge/ti-sn65dsi83, ps8640. - Use pci aperture helpers in drm/ast lynxfb, radeonfb. - Revert some lima patches, as they required a commit that has been reverted upstream. - Add AUO NE135FBM-N41 v8.1 eDP panel. - Add QAIC accel driver. Signed-off-by: Daniel Vetter <[email protected]> From: Maarten Lankhorst <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
-rw-r--r--Documentation/accel/index.rst1
-rw-r--r--Documentation/accel/qaic/aic100.rst510
-rw-r--r--Documentation/accel/qaic/index.rst13
-rw-r--r--Documentation/accel/qaic/qaic.rst170
-rw-r--r--Documentation/devicetree/bindings/display/panel/elida,kd35t133.yaml9
-rw-r--r--Documentation/devicetree/bindings/display/panel/feiyang,fy07024di26a30d.yaml8
-rw-r--r--Documentation/devicetree/bindings/display/panel/panel-timing.yaml46
-rw-r--r--Documentation/devicetree/bindings/display/panel/sitronix,st7701.yaml9
-rw-r--r--Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml4
-rw-r--r--Documentation/devicetree/bindings/display/panel/xinpeng,xpp055c272.yaml8
-rw-r--r--MAINTAINERS10
-rw-r--r--drivers/accel/Kconfig1
-rw-r--r--drivers/accel/Makefile1
-rw-r--r--drivers/accel/ivpu/ivpu_drv.c4
-rw-r--r--drivers/accel/qaic/Kconfig23
-rw-r--r--drivers/accel/qaic/Makefile13
-rw-r--r--drivers/accel/qaic/mhi_controller.c563
-rw-r--r--drivers/accel/qaic/mhi_controller.h16
-rw-r--r--drivers/accel/qaic/mhi_qaic_ctrl.c569
-rw-r--r--drivers/accel/qaic/mhi_qaic_ctrl.h12
-rw-r--r--drivers/accel/qaic/qaic.h282
-rw-r--r--drivers/accel/qaic/qaic_control.c1526
-rw-r--r--drivers/accel/qaic/qaic_data.c1902
-rw-r--r--drivers/accel/qaic/qaic_drv.c647
-rw-r--r--drivers/gpu/drm/ast/ast_drv.c16
-rw-r--r--drivers/gpu/drm/bridge/fsl-ldb.c103
-rw-r--r--drivers/gpu/drm/bridge/lontium-lt8912b.c1
-rw-r--r--drivers/gpu/drm/bridge/parade-ps8640.c2
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-hdmi.c8
-rw-r--r--drivers/gpu/drm/bridge/tc358767.c4
-rw-r--r--drivers/gpu/drm/bridge/ti-sn65dsi83.c8
-rw-r--r--drivers/gpu/drm/bridge/ti-sn65dsi86.c4
-rw-r--r--drivers/gpu/drm/display/drm_scdc_helper.c46
-rw-r--r--drivers/gpu/drm/drm_atomic_helper.c6
-rw-r--r--drivers/gpu/drm/drm_fb_helper.c53
-rw-r--r--drivers/gpu/drm/drm_prime.c6
-rw-r--r--drivers/gpu/drm/drm_vblank.c10
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi.c4
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdmi.c8
-rw-r--r--drivers/gpu/drm/lima/lima_drv.c6
-rw-r--r--drivers/gpu/drm/panel/panel-edp.c1
-rw-r--r--drivers/gpu/drm/tegra/sor.c15
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo_vm.c13
-rw-r--r--drivers/gpu/drm/ttm/ttm_pool.c111
-rw-r--r--drivers/gpu/drm/vc4/vc4_hdmi.c21
-rw-r--r--drivers/staging/sm750fb/sm750.c16
-rw-r--r--drivers/video/aperture.c8
-rw-r--r--drivers/video/fbdev/aty/radeon_base.c10
-rw-r--r--include/drm/display/drm_scdc_helper.h7
-rw-r--r--include/drm/drm_gem_vram_helper.h4
-rw-r--r--include/uapi/drm/qaic_accel.h397
51 files changed, 7004 insertions, 231 deletions
diff --git a/Documentation/accel/index.rst b/Documentation/accel/index.rst
index 2b43c9a7f67b..e94a0160b6a0 100644
--- a/Documentation/accel/index.rst
+++ b/Documentation/accel/index.rst
@@ -8,6 +8,7 @@ Compute Accelerators
:maxdepth: 1
introduction
+ qaic/index
.. only:: subproject and html
diff --git a/Documentation/accel/qaic/aic100.rst b/Documentation/accel/qaic/aic100.rst
new file mode 100644
index 000000000000..c80d0f1307db
--- /dev/null
+++ b/Documentation/accel/qaic/aic100.rst
@@ -0,0 +1,510 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+===============================
+ Qualcomm Cloud AI 100 (AIC100)
+===============================
+
+Overview
+========
+
+The Qualcomm Cloud AI 100/AIC100 family of products (including SA9000P - part of
+Snapdragon Ride) are PCIe adapter cards which contain a dedicated SoC ASIC for
+the purpose of efficiently running Artificial Intelligence (AI) Deep Learning
+inference workloads. They are AI accelerators.
+
+The PCIe interface of AIC100 is capable of PCIe Gen4 speeds over eight lanes
+(x8). An individual SoC on a card can have up to 16 NSPs for running workloads.
+Each SoC has an A53 management CPU. On card, there can be up to 32 GB of DDR.
+
+Multiple AIC100 cards can be hosted in a single system to scale overall
+performance. AIC100 cards are multi-user capable and able to execute workloads
+from multiple users in a concurrent manner.
+
+Hardware Description
+====================
+
+An AIC100 card consists of an AIC100 SoC, on-card DDR, and a set of misc
+peripherals (PMICs, etc).
+
+An AIC100 card can either be a PCIe HHHL form factor (a traditional PCIe card),
+or a Dual M.2 card. Both use PCIe to connect to the host system.
+
+As a PCIe endpoint/adapter, AIC100 uses the standard VendorID(VID)/
+DeviceID(DID) combination to uniquely identify itself to the host. AIC100
+uses the standard Qualcomm VID (0x17cb). All AIC100 SKUs use the same
+AIC100 DID (0xa100).
+
+AIC100 does not implement FLR (function level reset).
+
+AIC100 implements MSI but does not implement MSI-X. AIC100 requires 17 MSIs to
+operate (1 for MHI, 16 for the DMA Bridge).
+
+As a PCIe device, AIC100 utilizes BARs to provide host interfaces to the device
+hardware. AIC100 provides 3, 64-bit BARs.
+
+* The first BAR is 4K in size, and exposes the MHI interface to the host.
+
+* The second BAR is 2M in size, and exposes the DMA Bridge interface to the
+ host.
+
+* The third BAR is variable in size based on an individual AIC100's
+ configuration, but defaults to 64K. This BAR currently has no purpose.
+
+From the host perspective, AIC100 has several key hardware components -
+
+* MHI (Modem Host Interface)
+* QSM (QAIC Service Manager)
+* NSPs (Neural Signal Processor)
+* DMA Bridge
+* DDR
+
+MHI
+---
+
+AIC100 has one MHI interface over PCIe. MHI itself is documented at
+Documentation/mhi/index.rst MHI is the mechanism the host uses to communicate
+with the QSM. Except for workload data via the DMA Bridge, all interaction with
+the device occurs via MHI.
+
+QSM
+---
+
+QAIC Service Manager. This is an ARM A53 CPU that runs the primary
+firmware of the card and performs on-card management tasks. It also
+communicates with the host via MHI. Each AIC100 has one of
+these.
+
+NSP
+---
+
+Neural Signal Processor. Each AIC100 has up to 16 of these. These are
+the processors that run the workloads on AIC100. Each NSP is a Qualcomm Hexagon
+(Q6) DSP with HVX and HMX. Each NSP can only run one workload at a time, but
+multiple NSPs may be assigned to a single workload. Since each NSP can only run
+one workload, AIC100 is limited to 16 concurrent workloads. Workload
+"scheduling" is under the purview of the host. AIC100 does not automatically
+timeslice.
+
+DMA Bridge
+----------
+
+The DMA Bridge is custom DMA engine that manages the flow of data
+in and out of workloads. AIC100 has one of these. The DMA Bridge has 16
+channels, each consisting of a set of request/response FIFOs. Each active
+workload is assigned a single DMA Bridge channel. The DMA Bridge exposes
+hardware registers to manage the FIFOs (head/tail pointers), but requires host
+memory to store the FIFOs.
+
+DDR
+---
+
+AIC100 has on-card DDR. In total, an AIC100 can have up to 32 GB of DDR.
+This DDR is used to store workloads, data for the workloads, and is used by the
+QSM for managing the device. NSPs are granted access to sections of the DDR by
+the QSM. The host does not have direct access to the DDR, and must make
+requests to the QSM to transfer data to the DDR.
+
+High-level Use Flow
+===================
+
+AIC100 is a multi-user, programmable accelerator typically used for running
+neural networks in inferencing mode to efficiently perform AI operations.
+AIC100 is not intended for training neural networks. AIC100 can be utilized
+for generic compute workloads.
+
+Assuming a user wants to utilize AIC100, they would follow these steps:
+
+1. Compile the workload into an ELF targeting the NSP(s)
+2. Make requests to the QSM to load the workload and related artifacts into the
+ device DDR
+3. Make a request to the QSM to activate the workload onto a set of idle NSPs
+4. Make requests to the DMA Bridge to send input data to the workload to be
+ processed, and other requests to receive processed output data from the
+ workload.
+5. Once the workload is no longer required, make a request to the QSM to
+ deactivate the workload, thus putting the NSPs back into an idle state.
+6. Once the workload and related artifacts are no longer needed for future
+ sessions, make requests to the QSM to unload the data from DDR. This frees
+ the DDR to be used by other users.
+
+
+Boot Flow
+=========
+
+AIC100 uses a flashless boot flow, derived from Qualcomm MSMs.
+
+When AIC100 is first powered on, it begins executing PBL (Primary Bootloader)
+from ROM. PBL enumerates the PCIe link, and initializes the BHI (Boot Host
+Interface) component of MHI.
+
+Using BHI, the host points PBL to the location of the SBL (Secondary Bootloader)
+image. The PBL pulls the image from the host, validates it, and begins
+execution of SBL.
+
+SBL initializes MHI, and uses MHI to notify the host that the device has entered
+the SBL stage. SBL performs a number of operations:
+
+* SBL initializes the majority of hardware (anything PBL left uninitialized),
+ including DDR.
+* SBL offloads the bootlog to the host.
+* SBL synchronizes timestamps with the host for future logging.
+* SBL uses the Sahara protocol to obtain the runtime firmware images from the
+ host.
+
+Once SBL has obtained and validated the runtime firmware, it brings the NSPs out
+of reset, and jumps into the QSM.
+
+The QSM uses MHI to notify the host that the device has entered the QSM stage
+(AMSS in MHI terms). At this point, the AIC100 device is fully functional, and
+ready to process workloads.
+
+Userspace components
+====================
+
+Compiler
+--------
+
+An open compiler for AIC100 based on upstream LLVM can be found at:
+https://github.com/quic/software-kit-for-qualcomm-cloud-ai-100-cc
+
+Usermode Driver (UMD)
+---------------------
+
+An open UMD that interfaces with the qaic kernel driver can be found at:
+https://github.com/quic/software-kit-for-qualcomm-cloud-ai-100
+
+Sahara loader
+-------------
+
+An open implementation of the Sahara protocol called kickstart can be found at:
+https://github.com/andersson/qdl
+
+MHI Channels
+============
+
+AIC100 defines a number of MHI channels for different purposes. This is a list
+of the defined channels, and their uses.
+
++----------------+---------+----------+----------------------------------------+
+| Channel name | IDs | EEs | Purpose |
++================+=========+==========+========================================+
+| QAIC_LOOPBACK | 0 & 1 | AMSS | Any data sent to the device on this |
+| | | | channel is sent back to the host. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_SAHARA | 2 & 3 | SBL | Used by SBL to obtain the runtime |
+| | | | firmware from the host. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_DIAG | 4 & 5 | AMSS | Used to communicate with QSM via the |
+| | | | DIAG protocol. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_SSR | 6 & 7 | AMSS | Used to notify the host of subsystem |
+| | | | restart events, and to offload SSR |
+| | | | crashdumps. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_QDSS | 8 & 9 | AMSS | Used for the Qualcomm Debug Subsystem. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_CONTROL | 10 & 11 | AMSS | Used for the Neural Network Control |
+| | | | (NNC) protocol. This is the primary |
+| | | | channel between host and QSM for |
+| | | | managing workloads. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_LOGGING | 12 & 13 | SBL | Used by the SBL to send the bootlog to |
+| | | | the host. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_STATUS | 14 & 15 | AMSS | Used to notify the host of Reliability,|
+| | | | Accessibility, Serviceability (RAS) |
+| | | | events. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_TELEMETRY | 16 & 17 | AMSS | Used to get/set power/thermal/etc |
+| | | | attributes. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_DEBUG | 18 & 19 | AMSS | Not used. |
++----------------+---------+----------+----------------------------------------+
+| QAIC_TIMESYNC | 20 & 21 | SBL/AMSS | Used to synchronize timestamps in the |
+| | | | device side logs with the host time |
+| | | | source. |
++----------------+---------+----------+----------------------------------------+
+
+DMA Bridge
+==========
+
+Overview
+--------
+
+The DMA Bridge is one of the main interfaces to the host from the device
+(the other being MHI). As part of activating a workload to run on NSPs, the QSM
+assigns that network a DMA Bridge channel. A workload's DMA Bridge channel
+(DBC for short) is solely for the use of that workload and is not shared with
+other workloads.
+
+Each DBC is a pair of FIFOs that manage data in and out of the workload. One
+FIFO is the request FIFO. The other FIFO is the response FIFO.
+
+Each DBC contains 4 registers in hardware:
+
+* Request FIFO head pointer (offset 0x0). Read only by the host. Indicates the
+ latest item in the FIFO the device has consumed.
+* Request FIFO tail pointer (offset 0x4). Read/write by the host. Host
+ increments this register to add new items to the FIFO.
+* Response FIFO head pointer (offset 0x8). Read/write by the host. Indicates
+ the latest item in the FIFO the host has consumed.
+* Response FIFO tail pointer (offset 0xc). Read only by the host. Device
+ increments this register to add new items to the FIFO.
+
+The values in each register are indexes in the FIFO. To get the location of the
+FIFO element pointed to by the register: FIFO base address + register * element
+size.
+
+DBC registers are exposed to the host via the second BAR. Each DBC consumes
+4KB of space in the BAR.
+
+The actual FIFOs are backed by host memory. When sending a request to the QSM
+to activate a network, the host must donate memory to be used for the FIFOs.
+Due to internal mapping limitations of the device, a single contiguous chunk of
+memory must be provided per DBC, which hosts both FIFOs. The request FIFO will
+consume the beginning of the memory chunk, and the response FIFO will consume
+the end of the memory chunk.
+
+Request FIFO
+------------
+
+A request FIFO element has the following structure:
+
+.. code-block:: c
+
+ struct request_elem {
+ u16 req_id;
+ u8 seq_id;
+ u8 pcie_dma_cmd;
+ u32 reserved;
+ u64 pcie_dma_source_addr;
+ u64 pcie_dma_dest_addr;
+ u32 pcie_dma_len;
+ u32 reserved;
+ u64 doorbell_addr;
+ u8 doorbell_attr;
+ u8 reserved;
+ u16 reserved;
+ u32 doorbell_data;
+ u32 sem_cmd0;
+ u32 sem_cmd1;
+ u32 sem_cmd2;
+ u32 sem_cmd3;
+ };
+
+Request field descriptions:
+
+req_id
+ request ID. A request FIFO element and a response FIFO element with
+ the same request ID refer to the same command.
+
+seq_id
+ sequence ID within a request. Ignored by the DMA Bridge.
+
+pcie_dma_cmd
+ describes the DMA element of this request.
+
+ * Bit(7) is the force msi flag, which overrides the DMA Bridge MSI logic
+ and generates a MSI when this request is complete, and QSM
+ configures the DMA Bridge to look at this bit.
+ * Bits(6:5) are reserved.
+ * Bit(4) is the completion code flag, and indicates that the DMA Bridge
+ shall generate a response FIFO element when this request is
+ complete.
+ * Bit(3) indicates if this request is a linked list transfer(0) or a bulk
+ transfer(1).
+ * Bit(2) is reserved.
+ * Bits(1:0) indicate the type of transfer. No transfer(0), to device(1),
+ from device(2). Value 3 is illegal.
+
+pcie_dma_source_addr
+ source address for a bulk transfer, or the address of the linked list.
+
+pcie_dma_dest_addr
+ destination address for a bulk transfer.
+
+pcie_dma_len
+ length of the bulk transfer. Note that the size of this field
+ limits transfers to 4G in size.
+
+doorbell_addr
+ address of the doorbell to ring when this request is complete.
+
+doorbell_attr
+ doorbell attributes.
+
+ * Bit(7) indicates if a write to a doorbell is to occur.
+ * Bits(6:2) are reserved.
+ * Bits(1:0) contain the encoding of the doorbell length. 0 is 32-bit,
+ 1 is 16-bit, 2 is 8-bit, 3 is reserved. The doorbell address
+ must be naturally aligned to the specified length.
+
+doorbell_data
+ data to write to the doorbell. Only the bits corresponding to
+ the doorbell length are valid.
+
+sem_cmdN
+ semaphore command.
+
+ * Bit(31) indicates this semaphore command is enabled.
+ * Bit(30) is the to-device DMA fence. Block this request until all
+ to-device DMA transfers are complete.
+ * Bit(29) is the from-device DMA fence. Block this request until all
+ from-device DMA transfers are complete.
+ * Bits(28:27) are reserved.
+ * Bits(26:24) are the semaphore command. 0 is NOP. 1 is init with the
+ specified value. 2 is increment. 3 is decrement. 4 is wait
+ until the semaphore is equal to the specified value. 5 is wait
+ until the semaphore is greater or equal to the specified value.
+ 6 is "P", wait until semaphore is greater than 0, then
+ decrement by 1. 7 is reserved.
+ * Bit(23) is reserved.
+ * Bit(22) is the semaphore sync. 0 is post sync, which means that the
+ semaphore operation is done after the DMA transfer. 1 is
+ presync, which gates the DMA transfer. Only one presync is
+ allowed per request.
+ * Bit(21) is reserved.
+ * Bits(20:16) is the index of the semaphore to operate on.
+ * Bits(15:12) are reserved.
+ * Bits(11:0) are the semaphore value to use in operations.
+
+Overall, a request is processed in 4 steps:
+
+1. If specified, the presync semaphore condition must be true
+2. If enabled, the DMA transfer occurs
+3. If specified, the postsync semaphore conditions must be true
+4. If enabled, the doorbell is written
+
+By using the semaphores in conjunction with the workload running on the NSPs,
+the data pipeline can be synchronized such that the host can queue multiple
+requests of data for the workload to process, but the DMA Bridge will only copy
+the data into the memory of the workload when the workload is ready to process
+the next input.
+
+Response FIFO
+-------------
+
+Once a request is fully processed, a response FIFO element is generated if
+specified in pcie_dma_cmd. The structure of a response FIFO element:
+
+.. code-block:: c
+
+ struct response_elem {
+ u16 req_id;
+ u16 completion_code;
+ };
+
+req_id
+ matches the req_id of the request that generated this element.
+
+completion_code
+ status of this request. 0 is success. Non-zero is an error.
+
+The DMA Bridge will generate a MSI to the host as a reaction to activity in the
+response FIFO of a DBC. The DMA Bridge hardware has an IRQ storm mitigation
+algorithm, where it will only generate a MSI when the response FIFO transitions
+from empty to non-empty (unless force MSI is enabled and triggered). In
+response to this MSI, the host is expected to drain the response FIFO, and must
+take care to handle any race conditions between draining the FIFO, and the
+device inserting elements into the FIFO.
+
+Neural Network Control (NNC) Protocol
+=====================================
+
+The NNC protocol is how the host makes requests to the QSM to manage workloads.
+It uses the QAIC_CONTROL MHI channel.
+
+Each NNC request is packaged into a message. Each message is a series of
+transactions. A passthrough type transaction can contain elements known as
+commands.
+
+QSM requires NNC messages be little endian encoded and the fields be naturally
+aligned. Since there are 64-bit elements in some NNC messages, 64-bit alignment
+must be maintained.
+
+A message contains a header and then a series of transactions. A message may be
+at most 4K in size from QSM to the host. From the host to the QSM, a message
+can be at most 64K (maximum size of a single MHI packet), but there is a
+continuation feature where message N+1 can be marked as a continuation of
+message N. This is used for exceedingly large DMA xfer transactions.
+
+Transaction descriptions
+------------------------
+
+passthrough
+ Allows userspace to send an opaque payload directly to the QSM.
+ This is used for NNC commands. Userspace is responsible for managing
+ the QSM message requirements in the payload.
+
+dma_xfer
+ DMA transfer. Describes an object that the QSM should DMA into the
+ device via address and size tuples.
+
+activate
+ Activate a workload onto NSPs. The host must provide memory to be
+ used by the DBC.
+
+deactivate
+ Deactivate an active workload and return the NSPs to idle.
+
+status
+ Query the QSM about it's NNC implementation. Returns the NNC version,
+ and if CRC is used.
+
+terminate
+ Release a user's resources.
+
+dma_xfer_cont
+ Continuation of a previous DMA transfer. If a DMA transfer
+ cannot be specified in a single message (highly fragmented), this
+ transaction can be used to specify more ranges.
+
+validate_partition
+ Query to QSM to determine if a partition identifier is valid.
+
+Each message is tagged with a user id, and a partition id. The user id allows
+QSM to track resources, and release them when the user goes away (eg the process
+crashes). A partition id identifies the resource partition that QSM manages,
+which this message applies to.
+
+Messages may have CRCs. Messages should have CRCs applied until the QSM
+reports via the status transaction that CRCs are not needed. The QSM on the
+SA9000P requires CRCs for black channel safing.
+
+Subsystem Restart (SSR)
+=======================
+
+SSR is the concept of limiting the impact of an error. An AIC100 device may
+have multiple users, each with their own workload running. If the workload of
+one user crashes, the fallout of that should be limited to that workload and not
+impact other workloads. SSR accomplishes this.
+
+If a particular workload crashes, QSM notifies the host via the QAIC_SSR MHI
+channel. This notification identifies the workload by it's assigned DBC. A
+multi-stage recovery process is then used to cleanup both sides, and get the
+DBC/NSPs into a working state.
+
+When SSR occurs, any state in the workload is lost. Any inputs that were in
+process, or queued by not yet serviced, are lost. The loaded artifacts will
+remain in on-card DDR, but the host will need to re-activate the workload if
+it desires to recover the workload.
+
+Reliability, Accessibility, Serviceability (RAS)
+================================================
+
+AIC100 is expected to be deployed in server systems where RAS ideology is
+applied. Simply put, RAS is the concept of detecting, classifying, and
+reporting errors. While PCIe has AER (Advanced Error Reporting) which factors
+into RAS, AER does not allow for a device to report details about internal
+errors. Therefore, AIC100 implements a custom RAS mechanism. When a RAS event
+occurs, QSM will report the event with appropriate details via the QAIC_STATUS
+MHI channel. A sysadmin may determine that a particular device needs
+additional service based on RAS reports.
+
+Telemetry
+=========
+
+QSM has the ability to report various physical attributes of the device, and in
+some cases, to allow the host to control them. Examples include thermal limits,
+thermal readings, and power readings. These items are communicated via the
+QAIC_TELEMETRY MHI channel.
diff --git a/Documentation/accel/qaic/index.rst b/Documentation/accel/qaic/index.rst
new file mode 100644
index 000000000000..ad19b88d1a66
--- /dev/null
+++ b/Documentation/accel/qaic/index.rst
@@ -0,0 +1,13 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+=====================================
+ accel/qaic Qualcomm Cloud AI driver
+=====================================
+
+The accel/qaic driver supports the Qualcomm Cloud AI machine learning
+accelerator cards.
+
+.. toctree::
+
+ qaic
+ aic100
diff --git a/Documentation/accel/qaic/qaic.rst b/Documentation/accel/qaic/qaic.rst
new file mode 100644
index 000000000000..72a70ab6e3a8
--- /dev/null
+++ b/Documentation/accel/qaic/qaic.rst
@@ -0,0 +1,170 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+=============
+ QAIC driver
+=============
+
+The QAIC driver is the Kernel Mode Driver (KMD) for the AIC100 family of AI
+accelerator products.
+
+Interrupts
+==========
+
+While the AIC100 DMA Bridge hardware implements an IRQ storm mitigation
+mechanism, it is still possible for an IRQ storm to occur. A storm can happen
+if the workload is particularly quick, and the host is responsive. If the host
+can drain the response FIFO as quickly as the device can insert elements into
+it, then the device will frequently transition the response FIFO from empty to
+non-empty and generate MSIs at a rate equivalent to the speed of the
+workload's ability to process inputs. The lprnet (license plate reader network)
+workload is known to trigger this condition, and can generate in excess of 100k
+MSIs per second. It has been observed that most systems cannot tolerate this
+for long, and will crash due to some form of watchdog due to the overhead of
+the interrupt controller interrupting the host CPU.
+
+To mitigate this issue, the QAIC driver implements specific IRQ handling. When
+QAIC receives an IRQ, it disables that line. This prevents the interrupt
+controller from interrupting the CPU. Then AIC drains the FIFO. Once the FIFO
+is drained, QAIC implements a "last chance" polling algorithm where QAIC will
+sleep for a time to see if the workload will generate more activity. The IRQ
+line remains disabled during this time. If no activity is detected, QAIC exits
+polling mode and reenables the IRQ line.
+
+This mitigation in QAIC is very effective. The same lprnet usecase that
+generates 100k IRQs per second (per /proc/interrupts) is reduced to roughly 64
+IRQs over 5 minutes while keeping the host system stable, and having the same
+workload throughput performance (within run to run noise variation).
+
+
+Neural Network Control (NNC) Protocol
+=====================================
+
+The implementation of NNC is split between the KMD (QAIC) and UMD. In general
+QAIC understands how to encode/decode NNC wire protocol, and elements of the
+protocol which require kernel space knowledge to process (for example, mapping
+host memory to device IOVAs). QAIC understands the structure of a message, and
+all of the transactions. QAIC does not understand commands (the payload of a
+passthrough transaction).
+
+QAIC handles and enforces the required little endianness and 64-bit alignment,
+to the degree that it can. Since QAIC does not know the contents of a
+passthrough transaction, it relies on the UMD to satisfy the requirements.
+
+The terminate transaction is of particular use to QAIC. QAIC is not aware of
+the resources that are loaded onto a device since the majority of that activity
+occurs within NNC commands. As a result, QAIC does not have the means to
+roll back userspace activity. To ensure that a userspace client's resources
+are fully released in the case of a process crash, or a bug, QAIC uses the
+terminate command to let QSM know when a user has gone away, and the resources
+can be released.
+
+QSM can report a version number of the NNC protocol it supports. This is in the
+form of a Major number and a Minor number.
+
+Major number updates indicate changes to the NNC protocol which impact the
+message format, or transactions (impacts QAIC).
+
+Minor number updates indicate changes to the NNC protocol which impact the
+commands (does not impact QAIC).
+
+uAPI
+====
+
+QAIC defines a number of driver specific IOCTLs as part of the userspace API.
+This section describes those APIs.
+
+DRM_IOCTL_QAIC_MANAGE
+ This IOCTL allows userspace to send a NNC request to the QSM. The call will
+ block until a response is received, or the request has timed out.
+
+DRM_IOCTL_QAIC_CREATE_BO
+ This IOCTL allows userspace to allocate a buffer object (BO) which can send
+ or receive data from a workload. The call will return a GEM handle that
+ represents the allocated buffer. The BO is not usable until it has been
+ sliced (see DRM_IOCTL_QAIC_ATTACH_SLICE_BO).
+
+DRM_IOCTL_QAIC_MMAP_BO
+ This IOCTL allows userspace to prepare an allocated BO to be mmap'd into the
+ userspace process.
+
+DRM_IOCTL_QAIC_ATTACH_SLICE_BO
+ This IOCTL allows userspace to slice a BO in preparation for sending the BO
+ to the device. Slicing is the operation of describing what portions of a BO
+ get sent where to a workload. This requires a set of DMA transfers for the
+ DMA Bridge, and as such, locks the BO to a specific DBC.
+
+DRM_IOCTL_QAIC_EXECUTE_BO
+ This IOCTL allows userspace to submit a set of sliced BOs to the device. The
+ call is non-blocking. Success only indicates that the BOs have been queued
+ to the device, but does not guarantee they have been executed.
+
+DRM_IOCTL_QAIC_PARTIAL_EXECUTE_BO
+ This IOCTL operates like DRM_IOCTL_QAIC_EXECUTE_BO, but it allows userspace
+ to shrink the BOs sent to the device for this specific call. If a BO
+ typically has N inputs, but only a subset of those is available, this IOCTL
+ allows userspace to indicate that only the first M bytes of the BO should be
+ sent to the device to minimize data transfer overhead. This IOCTL dynamically
+ recomputes the slicing, and therefore has some processing overhead before the
+ BOs can be queued to the device.
+
+DRM_IOCTL_QAIC_WAIT_BO
+ This IOCTL allows userspace to determine when a particular BO has been
+ processed by the device. The call will block until either the BO has been
+ processed and can be re-queued to the device, or a timeout occurs.
+
+DRM_IOCTL_QAIC_PERF_STATS_BO
+ This IOCTL allows userspace to collect performance statistics on the most
+ recent execution of a BO. This allows userspace to construct an end to end
+ timeline of the BO processing for a performance analysis.
+
+DRM_IOCTL_QAIC_PART_DEV
+ This IOCTL allows userspace to request a duplicate "shadow device". This extra
+ accelN device is associated with a specific partition of resources on the
+ AIC100 device and can be used for limiting a process to some subset of
+ resources.
+
+Userspace Client Isolation
+==========================
+
+AIC100 supports multiple clients. Multiple DBCs can be consumed by a single
+client, and multiple clients can each consume one or more DBCs. Workloads
+may contain sensitive information therefore only the client that owns the
+workload should be allowed to interface with the DBC.
+
+Clients are identified by the instance associated with their open(). A client
+may only use memory they allocate, and DBCs that are assigned to their
+workloads. Attempts to access resources assigned to other clients will be
+rejected.
+
+Module parameters
+=================
+
+QAIC supports the following module parameters:
+
+**datapath_polling (bool)**
+
+Configures QAIC to use a polling thread for datapath events instead of relying
+on the device interrupts. Useful for platforms with broken multiMSI. Must be
+set at QAIC driver initialization. Default is 0 (off).
+
+**mhi_timeout_ms (unsigned int)**
+
+Sets the timeout value for MHI operations in milliseconds (ms). Must be set
+at the time the driver detects a device. Default is 2000 (2 seconds).
+
+**control_resp_timeout_s (unsigned int)**
+
+Sets the timeout value for QSM responses to NNC messages in seconds (s). Must
+be set at the time the driver is sending a request to QSM. Default is 60 (one
+minute).
+
+**wait_exec_default_timeout_ms (unsigned int)**
+
+Sets the default timeout for the wait_exec ioctl in milliseconds (ms). Must be
+set prior to the waic_exec ioctl call. A value specified in the ioctl call
+overrides this for that call. Default is 5000 (5 seconds).
+
+**datapath_poll_interval_us (unsigned int)**
+
+Sets the polling interval in microseconds (us) when datapath polling is active.
+Takes effect at the next polling interval. Default is 100 (100 us).
diff --git a/Documentation/devicetree/bindings/display/panel/elida,kd35t133.yaml b/Documentation/devicetree/bindings/display/panel/elida,kd35t133.yaml
index 7adb83e2e8d9..265ab6d30572 100644
--- a/Documentation/devicetree/bindings/display/panel/elida,kd35t133.yaml
+++ b/Documentation/devicetree/bindings/display/panel/elida,kd35t133.yaml
@@ -17,7 +17,9 @@ properties:
const: elida,kd35t133
reg: true
backlight: true
+ port: true
reset-gpios: true
+ rotation: true
iovcc-supply:
description: regulator that supplies the iovcc voltage
vdd-supply:
@@ -27,6 +29,7 @@ required:
- compatible
- reg
- backlight
+ - port
- iovcc-supply
- vdd-supply
@@ -43,6 +46,12 @@ examples:
backlight = <&backlight>;
iovcc-supply = <&vcc_1v8>;
vdd-supply = <&vcc3v3_lcd>;
+
+ port {
+ mipi_in_panel: endpoint {
+ remote-endpoint = <&mipi_out_panel>;
+ };
+ };
};
};
diff --git a/Documentation/devicetree/bindings/display/panel/feiyang,fy07024di26a30d.yaml b/Documentation/devicetree/bindings/display/panel/feiyang,fy07024di26a30d.yaml
index 1cf84c8dd85e..92df69e80a82 100644
--- a/Documentation/devicetree/bindings/display/panel/feiyang,fy07024di26a30d.yaml
+++ b/Documentation/devicetree/bindings/display/panel/feiyang,fy07024di26a30d.yaml
@@ -26,6 +26,7 @@ properties:
dvdd-supply:
description: 3v3 digital regulator
+ port: true
reset-gpios: true
backlight: true
@@ -35,6 +36,7 @@ required:
- reg
- avdd-supply
- dvdd-supply
+ - port
additionalProperties: false
@@ -53,5 +55,11 @@ examples:
dvdd-supply = <&reg_dldo2>;
reset-gpios = <&pio 3 24 GPIO_ACTIVE_HIGH>; /* LCD-RST: PD24 */
backlight = <&backlight>;
+
+ port {
+ mipi_in_panel: endpoint {
+ remote-endpoint = <&mipi_out_panel>;
+ };
+ };
};
};
diff --git a/Documentation/devicetree/bindings/display/panel/panel-timing.yaml b/Documentation/devicetree/bindings/display/panel/panel-timing.yaml
index 0d317e61edd8..aea69b84ca5d 100644
--- a/Documentation/devicetree/bindings/display/panel/panel-timing.yaml
+++ b/Documentation/devicetree/bindings/display/panel/panel-timing.yaml
@@ -17,29 +17,29 @@ description: |
The parameters are defined as seen in the following illustration.
- +----------+-------------------------------------+----------+-------+
- | | ^ | | |
- | | |vback_porch | | |
- | | v | | |
- +----------#######################################----------+-------+
- | # ^ # | |
- | # | # | |
- | hback # | # hfront | hsync |
- | porch # | hactive # porch | len |
- |<-------->#<-------+--------------------------->#<-------->|<----->|
- | # | # | |
- | # |vactive # | |
- | # | # | |
- | # v # | |
- +----------#######################################----------+-------+
- | | ^ | | |
- | | |vfront_porch | | |
- | | v | | |
- +----------+-------------------------------------+----------+-------+
- | | ^ | | |
- | | |vsync_len | | |
- | | v | | |
- +----------+-------------------------------------+----------+-------+
+ +-------+----------+-------------------------------------+----------+
+ | | | ^ | |
+ | | | |vsync_len | |
+ | | | v | |
+ +-------+----------+-------------------------------------+----------+
+ | | | ^ | |
+ | | | |vback_porch | |
+ | | | v | |
+ +-------+----------#######################################----------+
+ | | # ^ # |
+ | | # | # |
+ | hsync | hback # | # hfront |
+ | len | porch # | hactive # porch |
+ |<----->|<-------->#<-------+--------------------------->#<-------->|
+ | | # | # |
+ | | # |vactive # |
+ | | # | # |
+ | | # v # |
+ +-------+----------#######################################----------+
+ | | | ^ | |
+ | | | |vfront_porch | |
+ | | | v | |
+ +-------+----------+-------------------------------------+----------+
The following is the panel timings shown with time on the x-axis.
diff --git a/Documentation/devicetree/bindings/display/panel/sitronix,st7701.yaml b/Documentation/devicetree/bindings/display/panel/sitronix,st7701.yaml
index 83d30eadf7d9..4dc0cd4a6a77 100644
--- a/Documentation/devicetree/bindings/display/panel/sitronix,st7701.yaml
+++ b/Documentation/devicetree/bindings/display/panel/sitronix,st7701.yaml
@@ -42,7 +42,9 @@ properties:
IOVCC-supply:
description: I/O system regulator
+ port: true
reset-gpios: true
+ rotation: true
backlight: true
@@ -51,6 +53,7 @@ required:
- reg
- VCC-supply
- IOVCC-supply
+ - port
- reset-gpios
additionalProperties: false
@@ -70,5 +73,11 @@ examples:
IOVCC-supply = <&reg_dldo2>;
reset-gpios = <&pio 3 24 GPIO_ACTIVE_HIGH>; /* LCD-RST: PD24 */
backlight = <&backlight>;
+
+ port {
+ mipi_in_panel: endpoint {
+ remote-endpoint = <&mipi_out_panel>;
+ };
+ };
};
};
diff --git a/Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml b/Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml
index d984b59daa4a..fa6556363cca 100644
--- a/Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml
+++ b/Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml
@@ -26,6 +26,10 @@ properties:
spi-cpha: true
spi-cpol: true
+ dc-gpios:
+ maxItems: 1
+ description: DCX pin, Display data/command selection pin in parallel interface
+
required:
- compatible
- reg
diff --git a/Documentation/devicetree/bindings/display/panel/xinpeng,xpp055c272.yaml b/Documentation/devicetree/bindings/display/panel/xinpeng,xpp055c272.yaml
index d5c46a3cc2b0..c407deb6afb1 100644
--- a/Documentation/devicetree/bindings/display/panel/xinpeng,xpp055c272.yaml
+++ b/Documentation/devicetree/bindings/display/panel/xinpeng,xpp055c272.yaml
@@ -17,6 +17,7 @@ properties:
const: xinpeng,xpp055c272
reg: true
backlight: true
+ port: true
reset-gpios: true
iovcc-supply:
description: regulator that supplies the iovcc voltage
@@ -27,6 +28,7 @@ required:
- compatible
- reg
- backlight
+ - port
- iovcc-supply
- vci-supply
@@ -44,6 +46,12 @@ examples:
backlight = <&backlight>;
iovcc-supply = <&vcc_1v8>;
vci-supply = <&vcc3v3_lcd>;
+
+ port {
+ mipi_in_panel: endpoint {
+ remote-endpoint = <&mipi_out_panel>;
+ };
+ };
};
};
diff --git a/MAINTAINERS b/MAINTAINERS
index 9736e04d3bd3..d037504a5748 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17265,6 +17265,16 @@ F: Documentation/devicetree/bindings/clock/qcom,*
F: drivers/clk/qcom/
F: include/dt-bindings/clock/qcom,*
+QUALCOMM CLOUD AI (QAIC) DRIVER
+M: Jeffrey Hugo <[email protected]>
+S: Supported
+T: git git://anongit.freedesktop.org/drm/drm-misc
+F: Documentation/accel/qaic/
+F: drivers/accel/qaic/
+F: include/uapi/drm/qaic_accel.h
+
QUALCOMM CORE POWER REDUCTION (CPR) AVS DRIVER
M: Bjorn Andersson <[email protected]>
M: Konrad Dybcio <[email protected]>
diff --git a/drivers/accel/Kconfig b/drivers/accel/Kconfig
index c437206aa3f1..64065fb8922b 100644
--- a/drivers/accel/Kconfig
+++ b/drivers/accel/Kconfig
@@ -26,5 +26,6 @@ menuconfig DRM_ACCEL
source "drivers/accel/habanalabs/Kconfig"
source "drivers/accel/ivpu/Kconfig"
+source "drivers/accel/qaic/Kconfig"
endif
diff --git a/drivers/accel/Makefile b/drivers/accel/Makefile
index f22fd44d586b..ab3df932937f 100644
--- a/drivers/accel/Makefile
+++ b/drivers/accel/Makefile
@@ -2,3 +2,4 @@
obj-$(CONFIG_DRM_ACCEL_HABANALABS) += habanalabs/
obj-$(CONFIG_DRM_ACCEL_IVPU) += ivpu/
+obj-$(CONFIG_DRM_ACCEL_QAIC) += qaic/
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index 231f29bb5025..eb6405f9bf6b 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -433,6 +433,10 @@ static int ivpu_pci_init(struct ivpu_device *vdev)
/* Clear any pending errors */
pcie_capability_clear_word(pdev, PCI_EXP_DEVSTA, 0x3f);
+ /* VPU MTL does not require PCI spec 10m D3hot delay */
+ if (ivpu_is_mtl(vdev))
+ pdev->d3hot_delay = 0;
+
ret = pcim_enable_device(pdev);
if (ret) {
ivpu_err(vdev, "Failed to enable PCI device: %d\n", ret);
diff --git a/drivers/accel/qaic/Kconfig b/drivers/accel/qaic/Kconfig
new file mode 100644
index 000000000000..a9f866230058
--- /dev/null
+++ b/drivers/accel/qaic/Kconfig
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Qualcomm Cloud AI accelerators driver
+#
+
+config DRM_ACCEL_QAIC
+ tristate "Qualcomm Cloud AI accelerators"
+ depends on DRM_ACCEL
+ depends on PCI && HAS_IOMEM
+ depends on MHI_BUS
+ depends on MMU
+ select CRC32
+ help
+ Enables driver for Qualcomm's Cloud AI accelerator PCIe cards that are
+ designed to accelerate Deep Learning inference workloads.
+
+ The driver manages the PCIe devices and provides an IOCTL interface
+ for users to submit workloads to the devices.
+
+ If unsure, say N.
+
+ To compile this driver as a module, choose M here: the
+ module will be called qaic.
diff --git a/drivers/accel/qaic/Makefile b/drivers/accel/qaic/Makefile
new file mode 100644
index 000000000000..d5f4952ae79a
--- /dev/null
+++ b/drivers/accel/qaic/Makefile
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for Qualcomm Cloud AI accelerators driver
+#
+
+obj-$(CONFIG_DRM_ACCEL_QAIC) := qaic.o
+
+qaic-y := \
+ mhi_controller.o \
+ mhi_qaic_ctrl.o \
+ qaic_control.o \
+ qaic_data.o \
+ qaic_drv.o
diff --git a/drivers/accel/qaic/mhi_controller.c b/drivers/accel/qaic/mhi_controller.c
new file mode 100644
index 000000000000..5036e58e7235
--- /dev/null
+++ b/drivers/accel/qaic/mhi_controller.c
@@ -0,0 +1,563 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. */
+/* Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. */
+
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/memblock.h>
+#include <linux/mhi.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/sizes.h>
+
+#include "mhi_controller.h"
+#include "qaic.h"
+
+#define MAX_RESET_TIME_SEC 25
+
+static unsigned int mhi_timeout_ms = 2000; /* 2 sec default */
+module_param(mhi_timeout_ms, uint, 0600);
+MODULE_PARM_DESC(mhi_timeout_ms, "MHI controller timeout value");
+
+static struct mhi_channel_config aic100_channels[] = {
+ {
+ .name = "QAIC_LOOPBACK",
+ .num = 0,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_LOOPBACK",
+ .num = 1,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_SAHARA",
+ .num = 2,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_SBL,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_SAHARA",
+ .num = 3,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_SBL,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_DIAG",
+ .num = 4,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_DIAG",
+ .num = 5,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_SSR",
+ .num = 6,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_SSR",
+ .num = 7,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_QDSS",
+ .num = 8,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_QDSS",
+ .num = 9,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_CONTROL",
+ .num = 10,
+ .num_elements = 128,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_CONTROL",
+ .num = 11,
+ .num_elements = 128,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_LOGGING",
+ .num = 12,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_SBL,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_LOGGING",
+ .num = 13,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_SBL,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_STATUS",
+ .num = 14,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_STATUS",
+ .num = 15,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_TELEMETRY",
+ .num = 16,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_TELEMETRY",
+ .num = 17,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_DEBUG",
+ .num = 18,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_DEBUG",
+ .num = 19,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .name = "QAIC_TIMESYNC",
+ .num = 20,
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_TO_DEVICE,
+ .ee_mask = MHI_CH_EE_SBL | MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+ {
+ .num = 21,
+ .name = "QAIC_TIMESYNC",
+ .num_elements = 32,
+ .local_elements = 0,
+ .event_ring = 0,
+ .dir = DMA_FROM_DEVICE,
+ .ee_mask = MHI_CH_EE_SBL | MHI_CH_EE_AMSS,
+ .pollcfg = 0,
+ .doorbell = MHI_DB_BRST_DISABLE,
+ .lpm_notify = false,
+ .offload_channel = false,
+ .doorbell_mode_switch = false,
+ .auto_queue = false,
+ .wake_capable = false,
+ },
+};
+
+static struct mhi_event_config aic100_events[] = {
+ {
+ .num_elements = 32,
+ .irq_moderation_ms = 0,
+ .irq = 0,
+ .channel = U32_MAX,
+ .priority = 1,
+ .mode = MHI_DB_BRST_DISABLE,
+ .data_type = MHI_ER_CTRL,
+ .hardware_event = false,
+ .client_managed = false,
+ .offload_channel = false,
+ },
+};
+
+static struct mhi_controller_config aic100_config = {
+ .max_channels = 128,
+ .timeout_ms = 0, /* controlled by mhi_timeout */
+ .buf_len = 0,
+ .num_channels = ARRAY_SIZE(aic100_channels),
+ .ch_cfg = aic100_channels,
+ .num_events = ARRAY_SIZE(aic100_events),
+ .event_cfg = aic100_events,
+ .use_bounce_buf = false,
+ .m2_no_db = false,
+};
+
+static int mhi_read_reg(struct mhi_controller *mhi_cntrl, void __iomem *addr, u32 *out)
+{
+ u32 tmp = readl_relaxed(addr);
+
+ if (tmp == U32_MAX)
+ return -EIO;
+
+ *out = tmp;
+
+ return 0;
+}
+
+static void mhi_write_reg(struct mhi_controller *mhi_cntrl, void __iomem *addr, u32 val)
+{
+ writel_relaxed(val, addr);
+}
+
+static int mhi_runtime_get(struct mhi_controller *mhi_cntrl)
+{
+ return 0;
+}
+
+static void mhi_runtime_put(struct mhi_controller *mhi_cntrl)
+{
+}
+
+static void mhi_status_cb(struct mhi_controller *mhi_cntrl, enum mhi_callback reason)
+{
+ struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_cntrl->cntrl_dev));
+
+ /* this event occurs in atomic context */
+ if (reason == MHI_CB_FATAL_ERROR)
+ pci_err(qdev->pdev, "Fatal error received from device. Attempting to recover\n");
+ /* this event occurs in non-atomic context */
+ if (reason == MHI_CB_SYS_ERROR)
+ qaic_dev_reset_clean_local_state(qdev, true);
+}
+
+static int mhi_reset_and_async_power_up(struct mhi_controller *mhi_cntrl)
+{
+ u8 time_sec = 1;
+ int current_ee;
+ int ret;
+
+ /* Reset the device to bring the device in PBL EE */
+ mhi_soc_reset(mhi_cntrl);
+
+ /*
+ * Keep checking the execution environment(EE) after every 1 second
+ * interval.
+ */
+ do {
+ msleep(1000);
+ current_ee = mhi_get_exec_env(mhi_cntrl);
+ } while (current_ee != MHI_EE_PBL && time_sec++ <= MAX_RESET_TIME_SEC);
+
+ /* If the device is in PBL EE retry power up */
+ if (current_ee == MHI_EE_PBL)
+ ret = mhi_async_power_up(mhi_cntrl);
+ else
+ ret = -EIO;
+
+ return ret;
+}
+
+struct mhi_controller *qaic_mhi_register_controller(struct pci_dev *pci_dev, void __iomem *mhi_bar,
+ int mhi_irq)
+{
+ struct mhi_controller *mhi_cntrl;
+ int ret;
+
+ mhi_cntrl = devm_kzalloc(&pci_dev->dev, sizeof(*mhi_cntrl), GFP_KERNEL);
+ if (!mhi_cntrl)
+ return ERR_PTR(-ENOMEM);
+
+ mhi_cntrl->cntrl_dev = &pci_dev->dev;
+
+ /*
+ * Covers the entire possible physical ram region. Remote side is
+ * going to calculate a size of this range, so subtract 1 to prevent
+ * rollover.
+ */
+ mhi_cntrl->iova_start = 0;
+ mhi_cntrl->iova_stop = PHYS_ADDR_MAX - 1;
+ mhi_cntrl->status_cb = mhi_status_cb;
+ mhi_cntrl->runtime_get = mhi_runtime_get;
+ mhi_cntrl->runtime_put = mhi_runtime_put;
+ mhi_cntrl->read_reg = mhi_read_reg;
+ mhi_cntrl->write_reg = mhi_write_reg;
+ mhi_cntrl->regs = mhi_bar;
+ mhi_cntrl->reg_len = SZ_4K;
+ mhi_cntrl->nr_irqs = 1;
+ mhi_cntrl->irq = devm_kmalloc(&pci_dev->dev, sizeof(*mhi_cntrl->irq), GFP_KERNEL);
+
+ if (!mhi_cntrl->irq)
+ return ERR_PTR(-ENOMEM);
+
+ mhi_cntrl->irq[0] = mhi_irq;
+ mhi_cntrl->fw_image = "qcom/aic100/sbl.bin";
+
+ /* use latest configured timeout */
+ aic100_config.timeout_ms = mhi_timeout_ms;
+ ret = mhi_register_controller(mhi_cntrl, &aic100_config);
+ if (ret) {
+ pci_err(pci_dev, "mhi_register_controller failed %d\n", ret);
+ return ERR_PTR(ret);
+ }
+
+ ret = mhi_prepare_for_power_up(mhi_cntrl);
+ if (ret) {
+ pci_err(pci_dev, "mhi_prepare_for_power_up failed %d\n", ret);
+ goto prepare_power_up_fail;
+ }
+
+ ret = mhi_async_power_up(mhi_cntrl);
+ /*
+ * If EIO is returned it is possible that device is in SBL EE, which is
+ * undesired. SOC reset the device and try to power up again.
+ */
+ if (ret == -EIO && MHI_EE_SBL == mhi_get_exec_env(mhi_cntrl)) {
+ pci_err(pci_dev, "Found device in SBL at MHI init. Attempting a reset.\n");
+ ret = mhi_reset_and_async_power_up(mhi_cntrl);
+ }
+
+ if (ret) {
+ pci_err(pci_dev, "mhi_async_power_up failed %d\n", ret);
+ goto power_up_fail;
+ }
+
+ return mhi_cntrl;
+
+power_up_fail:
+ mhi_unprepare_after_power_down(mhi_cntrl);
+prepare_power_up_fail:
+ mhi_unregister_controller(mhi_cntrl);
+ return ERR_PTR(ret);
+}
+
+void qaic_mhi_free_controller(struct mhi_controller *mhi_cntrl, bool link_up)
+{
+ mhi_power_down(mhi_cntrl, link_up);
+ mhi_unprepare_after_power_down(mhi_cntrl);
+ mhi_unregister_controller(mhi_cntrl);
+}
+
+void qaic_mhi_start_reset(struct mhi_controller *mhi_cntrl)
+{
+ mhi_power_down(mhi_cntrl, true);
+}
+
+void qaic_mhi_reset_done(struct mhi_controller *mhi_cntrl)
+{
+ struct pci_dev *pci_dev = container_of(mhi_cntrl->cntrl_dev, struct pci_dev, dev);
+ int ret;
+
+ ret = mhi_async_power_up(mhi_cntrl);
+ if (ret)
+ pci_err(pci_dev, "mhi_async_power_up failed after reset %d\n", ret);
+}
diff --git a/drivers/accel/qaic/mhi_controller.h b/drivers/accel/qaic/mhi_controller.h
new file mode 100644
index 000000000000..2ae45d768e24
--- /dev/null
+++ b/drivers/accel/qaic/mhi_controller.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (c) 2019-2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef MHICONTROLLERQAIC_H_
+#define MHICONTROLLERQAIC_H_
+
+struct mhi_controller *qaic_mhi_register_controller(struct pci_dev *pci_dev, void __iomem *mhi_bar,
+ int mhi_irq);
+void qaic_mhi_free_controller(struct mhi_controller *mhi_cntrl, bool link_up);
+void qaic_mhi_start_reset(struct mhi_controller *mhi_cntrl);
+void qaic_mhi_reset_done(struct mhi_controller *mhi_cntrl);
+
+#endif /* MHICONTROLLERQAIC_H_ */
diff --git a/drivers/accel/qaic/mhi_qaic_ctrl.c b/drivers/accel/qaic/mhi_qaic_ctrl.c
new file mode 100644
index 000000000000..0c7e571f1f12
--- /dev/null
+++ b/drivers/accel/qaic/mhi_qaic_ctrl.c
@@ -0,0 +1,569 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. */
+
+#include <linux/kernel.h>
+#include <linux/mhi.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/xarray.h>
+#include <uapi/linux/eventpoll.h>
+
+#include "mhi_qaic_ctrl.h"
+#include "qaic.h"
+
+#define MHI_QAIC_CTRL_DRIVER_NAME "mhi_qaic_ctrl"
+#define MHI_QAIC_CTRL_MAX_MINORS 128
+#define MHI_MAX_MTU 0xffff
+static DEFINE_XARRAY_ALLOC(mqc_xa);
+static struct class *mqc_dev_class;
+static int mqc_dev_major;
+
+/**
+ * struct mqc_buf - Buffer structure used to receive data from device
+ * @data: Address of data to read from
+ * @odata: Original address returned from *alloc() API. Used to free this buf.
+ * @len: Length of data in byte
+ * @node: This buffer will be part of list managed in struct mqc_dev
+ */
+struct mqc_buf {
+ void *data;
+ void *odata;
+ size_t len;
+ struct list_head node;
+};
+
+/**
+ * struct mqc_dev - MHI QAIC Control Device
+ * @minor: MQC device node minor number
+ * @mhi_dev: Associated mhi device object
+ * @mtu: Max TRE buffer length
+ * @enabled: Flag to track the state of the MQC device
+ * @lock: Mutex lock to serialize access to open_count
+ * @read_lock: Mutex lock to serialize readers
+ * @write_lock: Mutex lock to serialize writers
+ * @ul_wq: Wait queue for writers
+ * @dl_wq: Wait queue for readers
+ * @dl_queue_lock: Spin lock to serialize access to download queue
+ * @dl_queue: Queue of downloaded buffers
+ * @open_count: Track open counts
+ * @ref_count: Reference count for this structure
+ */
+struct mqc_dev {
+ u32 minor;
+ struct mhi_device *mhi_dev;
+ size_t mtu;
+ bool enabled;
+ struct mutex lock;
+ struct mutex read_lock;
+ struct mutex write_lock;
+ wait_queue_head_t ul_wq;
+ wait_queue_head_t dl_wq;
+ spinlock_t dl_queue_lock;
+ struct list_head dl_queue;
+ unsigned int open_count;
+ struct kref ref_count;
+};
+
+static void mqc_dev_release(struct kref *ref)
+{
+ struct mqc_dev *mqcdev = container_of(ref, struct mqc_dev, ref_count);
+
+ mutex_destroy(&mqcdev->read_lock);
+ mutex_destroy(&mqcdev->write_lock);
+ mutex_destroy(&mqcdev->lock);
+ kfree(mqcdev);
+}
+
+static int mhi_qaic_ctrl_fill_dl_queue(struct mqc_dev *mqcdev)
+{
+ struct mhi_device *mhi_dev = mqcdev->mhi_dev;
+ struct mqc_buf *ctrlbuf;
+ int rx_budget;
+ int ret = 0;
+ void *data;
+
+ rx_budget = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
+ if (rx_budget < 0)
+ return -EIO;
+
+ while (rx_budget--) {
+ data = kzalloc(mqcdev->mtu + sizeof(*ctrlbuf), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ ctrlbuf = data + mqcdev->mtu;
+ ctrlbuf->odata = data;
+
+ ret = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, data, mqcdev->mtu, MHI_EOT);
+ if (ret) {
+ kfree(data);
+ dev_err(&mhi_dev->dev, "Failed to queue buffer\n");
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
+static int mhi_qaic_ctrl_dev_start_chan(struct mqc_dev *mqcdev)
+{
+ struct device *dev = &mqcdev->mhi_dev->dev;
+ int ret = 0;
+
+ ret = mutex_lock_interruptible(&mqcdev->lock);
+ if (ret)
+ return ret;
+ if (!mqcdev->enabled) {
+ ret = -ENODEV;
+ goto release_dev_lock;
+ }
+ if (!mqcdev->open_count) {
+ ret = mhi_prepare_for_transfer(mqcdev->mhi_dev);
+ if (ret) {
+ dev_err(dev, "Error starting transfer channels\n");
+ goto release_dev_lock;
+ }
+
+ ret = mhi_qaic_ctrl_fill_dl_queue(mqcdev);
+ if (ret) {
+ dev_err(dev, "Error filling download queue.\n");
+ goto mhi_unprepare;
+ }
+ }
+ mqcdev->open_count++;
+ mutex_unlock(&mqcdev->lock);
+
+ return 0;
+
+mhi_unprepare:
+ mhi_unprepare_from_transfer(mqcdev->mhi_dev);
+release_dev_lock:
+ mutex_unlock(&mqcdev->lock);
+ return ret;
+}
+
+static struct mqc_dev *mqc_dev_get_by_minor(unsigned int minor)
+{
+ struct mqc_dev *mqcdev;
+
+ xa_lock(&mqc_xa);
+ mqcdev = xa_load(&mqc_xa, minor);
+ if (mqcdev)
+ kref_get(&mqcdev->ref_count);
+ xa_unlock(&mqc_xa);
+
+ return mqcdev;
+}
+
+static int mhi_qaic_ctrl_open(struct inode *inode, struct file *filp)
+{
+ struct mqc_dev *mqcdev;
+ int ret;
+
+ mqcdev = mqc_dev_get_by_minor(iminor(inode));
+ if (!mqcdev) {
+ pr_debug("mqc: minor %d not found\n", iminor(inode));
+ return -EINVAL;
+ }
+
+ ret = mhi_qaic_ctrl_dev_start_chan(mqcdev);
+ if (ret) {
+ kref_put(&mqcdev->ref_count, mqc_dev_release);
+ return ret;
+ }
+
+ filp->private_data = mqcdev;
+
+ return 0;
+}
+
+static void mhi_qaic_ctrl_buf_free(struct mqc_buf *ctrlbuf)
+{
+ list_del(&ctrlbuf->node);
+ kfree(ctrlbuf->odata);
+}
+
+static void __mhi_qaic_ctrl_release(struct mqc_dev *mqcdev)
+{
+ struct mqc_buf *ctrlbuf, *tmp;
+
+ mhi_unprepare_from_transfer(mqcdev->mhi_dev);
+ wake_up_interruptible(&mqcdev->ul_wq);
+ wake_up_interruptible(&mqcdev->dl_wq);
+ /*
+ * Free the dl_queue. As we have already unprepared mhi transfers, we
+ * do not expect any callback functions that update dl_queue hence no need
+ * to grab dl_queue lock.
+ */
+ mutex_lock(&mqcdev->read_lock);
+ list_for_each_entry_safe(ctrlbuf, tmp, &mqcdev->dl_queue, node)
+ mhi_qaic_ctrl_buf_free(ctrlbuf);
+ mutex_unlock(&mqcdev->read_lock);
+}
+
+static int mhi_qaic_ctrl_release(struct inode *inode, struct file *file)
+{
+ struct mqc_dev *mqcdev = file->private_data;
+
+ mutex_lock(&mqcdev->lock);
+ mqcdev->open_count--;
+ if (!mqcdev->open_count && mqcdev->enabled)
+ __mhi_qaic_ctrl_release(mqcdev);
+ mutex_unlock(&mqcdev->lock);
+
+ kref_put(&mqcdev->ref_count, mqc_dev_release);
+
+ return 0;
+}
+
+static __poll_t mhi_qaic_ctrl_poll(struct file *file, poll_table *wait)
+{
+ struct mqc_dev *mqcdev = file->private_data;
+ struct mhi_device *mhi_dev;
+ __poll_t mask = 0;
+
+ mhi_dev = mqcdev->mhi_dev;
+
+ poll_wait(file, &mqcdev->ul_wq, wait);
+ poll_wait(file, &mqcdev->dl_wq, wait);
+
+ mutex_lock(&mqcdev->lock);
+ if (!mqcdev->enabled) {
+ mutex_unlock(&mqcdev->lock);
+ return EPOLLERR;
+ }
+
+ spin_lock_bh(&mqcdev->dl_queue_lock);
+ if (!list_empty(&mqcdev->dl_queue))
+ mask |= EPOLLIN | EPOLLRDNORM;
+ spin_unlock_bh(&mqcdev->dl_queue_lock);
+
+ if (mutex_lock_interruptible(&mqcdev->write_lock)) {
+ mutex_unlock(&mqcdev->lock);
+ return EPOLLERR;
+ }
+ if (mhi_get_free_desc_count(mhi_dev, DMA_TO_DEVICE) > 0)
+ mask |= EPOLLOUT | EPOLLWRNORM;
+ mutex_unlock(&mqcdev->write_lock);
+ mutex_unlock(&mqcdev->lock);
+
+ dev_dbg(&mhi_dev->dev, "Client attempted to poll, returning mask 0x%x\n", mask);
+
+ return mask;
+}
+
+static int mhi_qaic_ctrl_tx(struct mqc_dev *mqcdev)
+{
+ int ret;
+
+ ret = wait_event_interruptible(mqcdev->ul_wq, !mqcdev->enabled ||
+ mhi_get_free_desc_count(mqcdev->mhi_dev, DMA_TO_DEVICE) > 0);
+
+ if (!mqcdev->enabled)
+ return -ENODEV;
+
+ return ret;
+}
+
+static ssize_t mhi_qaic_ctrl_write(struct file *file, const char __user *buf, size_t count,
+ loff_t *offp)
+{
+ struct mqc_dev *mqcdev = file->private_data;
+ struct mhi_device *mhi_dev;
+ size_t bytes_xfered = 0;
+ struct device *dev;
+ int ret, nr_desc;
+
+ mhi_dev = mqcdev->mhi_dev;
+ dev = &mhi_dev->dev;
+
+ if (!mhi_dev->ul_chan)
+ return -EOPNOTSUPP;
+
+ if (!buf || !count)
+ return -EINVAL;
+
+ dev_dbg(dev, "Request to transfer %zu bytes\n", count);
+
+ ret = mhi_qaic_ctrl_tx(mqcdev);
+ if (ret)
+ return ret;
+
+ if (mutex_lock_interruptible(&mqcdev->write_lock))
+ return -EINTR;
+
+ nr_desc = mhi_get_free_desc_count(mhi_dev, DMA_TO_DEVICE);
+ if (nr_desc * mqcdev->mtu < count) {
+ ret = -EMSGSIZE;
+ dev_dbg(dev, "Buffer too big to transfer\n");
+ goto unlock_mutex;
+ }
+
+ while (count != bytes_xfered) {
+ enum mhi_flags flags;
+ size_t to_copy;
+ void *kbuf;
+
+ to_copy = min_t(size_t, count - bytes_xfered, mqcdev->mtu);
+ kbuf = kmalloc(to_copy, GFP_KERNEL);
+ if (!kbuf) {
+ ret = -ENOMEM;
+ goto unlock_mutex;
+ }
+
+ ret = copy_from_user(kbuf, buf + bytes_xfered, to_copy);
+ if (ret) {
+ kfree(kbuf);
+ ret = -EFAULT;
+ goto unlock_mutex;
+ }
+
+ if (bytes_xfered + to_copy == count)
+ flags = MHI_EOT;
+ else
+ flags = MHI_CHAIN;
+
+ ret = mhi_queue_buf(mhi_dev, DMA_TO_DEVICE, kbuf, to_copy, flags);
+ if (ret) {
+ kfree(kbuf);
+ dev_err(dev, "Failed to queue buf of size %zu\n", to_copy);
+ goto unlock_mutex;
+ }
+
+ bytes_xfered += to_copy;
+ }
+
+ mutex_unlock(&mqcdev->write_lock);
+ dev_dbg(dev, "bytes xferred: %zu\n", bytes_xfered);
+
+ return bytes_xfered;
+
+unlock_mutex:
+ mutex_unlock(&mqcdev->write_lock);
+ return ret;
+}
+
+static int mhi_qaic_ctrl_rx(struct mqc_dev *mqcdev)
+{
+ int ret;
+
+ ret = wait_event_interruptible(mqcdev->dl_wq,
+ !mqcdev->enabled || !list_empty(&mqcdev->dl_queue));
+
+ if (!mqcdev->enabled)
+ return -ENODEV;
+
+ return ret;
+}
+
+static ssize_t mhi_qaic_ctrl_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+{
+ struct mqc_dev *mqcdev = file->private_data;
+ struct mqc_buf *ctrlbuf;
+ size_t to_copy;
+ int ret;
+
+ if (!mqcdev->mhi_dev->dl_chan)
+ return -EOPNOTSUPP;
+
+ ret = mhi_qaic_ctrl_rx(mqcdev);
+ if (ret)
+ return ret;
+
+ if (mutex_lock_interruptible(&mqcdev->read_lock))
+ return -EINTR;
+
+ ctrlbuf = list_first_entry_or_null(&mqcdev->dl_queue, struct mqc_buf, node);
+ if (!ctrlbuf) {
+ mutex_unlock(&mqcdev->read_lock);
+ ret = -ENODEV;
+ goto error_out;
+ }
+
+ to_copy = min_t(size_t, count, ctrlbuf->len);
+ if (copy_to_user(buf, ctrlbuf->data, to_copy)) {
+ mutex_unlock(&mqcdev->read_lock);
+ dev_dbg(&mqcdev->mhi_dev->dev, "Failed to copy data to user buffer\n");
+ ret = -EFAULT;
+ goto error_out;
+ }
+
+ ctrlbuf->len -= to_copy;
+ ctrlbuf->data += to_copy;
+
+ if (!ctrlbuf->len) {
+ spin_lock_bh(&mqcdev->dl_queue_lock);
+ mhi_qaic_ctrl_buf_free(ctrlbuf);
+ spin_unlock_bh(&mqcdev->dl_queue_lock);
+ mhi_qaic_ctrl_fill_dl_queue(mqcdev);
+ dev_dbg(&mqcdev->mhi_dev->dev, "Read buf freed\n");
+ }
+
+ mutex_unlock(&mqcdev->read_lock);
+ return to_copy;
+
+error_out:
+ mutex_unlock(&mqcdev->read_lock);
+ return ret;
+}
+
+static const struct file_operations mhidev_fops = {
+ .owner = THIS_MODULE,
+ .open = mhi_qaic_ctrl_open,
+ .release = mhi_qaic_ctrl_release,
+ .read = mhi_qaic_ctrl_read,
+ .write = mhi_qaic_ctrl_write,
+ .poll = mhi_qaic_ctrl_poll,
+};
+
+static void mhi_qaic_ctrl_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
+{
+ struct mqc_dev *mqcdev = dev_get_drvdata(&mhi_dev->dev);
+
+ dev_dbg(&mhi_dev->dev, "%s: status: %d xfer_len: %zu\n", __func__,
+ mhi_result->transaction_status, mhi_result->bytes_xferd);
+
+ kfree(mhi_result->buf_addr);
+
+ if (!mhi_result->transaction_status)
+ wake_up_interruptible(&mqcdev->ul_wq);
+}
+
+static void mhi_qaic_ctrl_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
+{
+ struct mqc_dev *mqcdev = dev_get_drvdata(&mhi_dev->dev);
+ struct mqc_buf *ctrlbuf;
+
+ dev_dbg(&mhi_dev->dev, "%s: status: %d receive_len: %zu\n", __func__,
+ mhi_result->transaction_status, mhi_result->bytes_xferd);
+
+ if (mhi_result->transaction_status &&
+ mhi_result->transaction_status != -EOVERFLOW) {
+ kfree(mhi_result->buf_addr);
+ return;
+ }
+
+ ctrlbuf = mhi_result->buf_addr + mqcdev->mtu;
+ ctrlbuf->data = mhi_result->buf_addr;
+ ctrlbuf->len = mhi_result->bytes_xferd;
+ spin_lock_bh(&mqcdev->dl_queue_lock);
+ list_add_tail(&ctrlbuf->node, &mqcdev->dl_queue);
+ spin_unlock_bh(&mqcdev->dl_queue_lock);
+
+ wake_up_interruptible(&mqcdev->dl_wq);
+}
+
+static int mhi_qaic_ctrl_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
+{
+ struct mqc_dev *mqcdev;
+ struct device *dev;
+ int ret;
+
+ mqcdev = kzalloc(sizeof(*mqcdev), GFP_KERNEL);
+ if (!mqcdev)
+ return -ENOMEM;
+
+ kref_init(&mqcdev->ref_count);
+ mutex_init(&mqcdev->lock);
+ mqcdev->mhi_dev = mhi_dev;
+
+ ret = xa_alloc(&mqc_xa, &mqcdev->minor, mqcdev, XA_LIMIT(0, MHI_QAIC_CTRL_MAX_MINORS),
+ GFP_KERNEL);
+ if (ret) {
+ kfree(mqcdev);
+ return ret;
+ }
+
+ init_waitqueue_head(&mqcdev->ul_wq);
+ init_waitqueue_head(&mqcdev->dl_wq);
+ mutex_init(&mqcdev->read_lock);
+ mutex_init(&mqcdev->write_lock);
+ spin_lock_init(&mqcdev->dl_queue_lock);
+ INIT_LIST_HEAD(&mqcdev->dl_queue);
+ mqcdev->mtu = min_t(size_t, id->driver_data, MHI_MAX_MTU);
+ mqcdev->enabled = true;
+ mqcdev->open_count = 0;
+ dev_set_drvdata(&mhi_dev->dev, mqcdev);
+
+ dev = device_create(mqc_dev_class, &mhi_dev->dev, MKDEV(mqc_dev_major, mqcdev->minor),
+ mqcdev, "%s", dev_name(&mhi_dev->dev));
+ if (IS_ERR(dev)) {
+ xa_erase(&mqc_xa, mqcdev->minor);
+ dev_set_drvdata(&mhi_dev->dev, NULL);
+ kfree(mqcdev);
+ return PTR_ERR(dev);
+ }
+
+ return 0;
+};
+
+static void mhi_qaic_ctrl_remove(struct mhi_device *mhi_dev)
+{
+ struct mqc_dev *mqcdev = dev_get_drvdata(&mhi_dev->dev);
+
+ device_destroy(mqc_dev_class, MKDEV(mqc_dev_major, mqcdev->minor));
+
+ mutex_lock(&mqcdev->lock);
+ mqcdev->enabled = false;
+ if (mqcdev->open_count)
+ __mhi_qaic_ctrl_release(mqcdev);
+ mutex_unlock(&mqcdev->lock);
+
+ xa_erase(&mqc_xa, mqcdev->minor);
+ kref_put(&mqcdev->ref_count, mqc_dev_release);
+}
+
+/* .driver_data stores max mtu */
+static const struct mhi_device_id mhi_qaic_ctrl_match_table[] = {
+ { .chan = "QAIC_SAHARA", .driver_data = SZ_32K},
+ {},
+};
+MODULE_DEVICE_TABLE(mhi, mhi_qaic_ctrl_match_table);
+
+static struct mhi_driver mhi_qaic_ctrl_driver = {
+ .id_table = mhi_qaic_ctrl_match_table,
+ .remove = mhi_qaic_ctrl_remove,
+ .probe = mhi_qaic_ctrl_probe,
+ .ul_xfer_cb = mhi_qaic_ctrl_ul_xfer_cb,
+ .dl_xfer_cb = mhi_qaic_ctrl_dl_xfer_cb,
+ .driver = {
+ .name = MHI_QAIC_CTRL_DRIVER_NAME,
+ },
+};
+
+int mhi_qaic_ctrl_init(void)
+{
+ int ret;
+
+ ret = register_chrdev(0, MHI_QAIC_CTRL_DRIVER_NAME, &mhidev_fops);
+ if (ret < 0)
+ return ret;
+
+ mqc_dev_major = ret;
+ mqc_dev_class = class_create(THIS_MODULE, MHI_QAIC_CTRL_DRIVER_NAME);
+ if (IS_ERR(mqc_dev_class)) {
+ ret = PTR_ERR(mqc_dev_class);
+ goto unregister_chrdev;
+ }
+
+ ret = mhi_driver_register(&mhi_qaic_ctrl_driver);
+ if (ret)
+ goto destroy_class;
+
+ return 0;
+
+destroy_class:
+ class_destroy(mqc_dev_class);
+unregister_chrdev:
+ unregister_chrdev(mqc_dev_major, MHI_QAIC_CTRL_DRIVER_NAME);
+ return ret;
+}
+
+void mhi_qaic_ctrl_deinit(void)
+{
+ mhi_driver_unregister(&mhi_qaic_ctrl_driver);
+ class_destroy(mqc_dev_class);
+ unregister_chrdev(mqc_dev_major, MHI_QAIC_CTRL_DRIVER_NAME);
+ xa_destroy(&mqc_xa);
+}
diff --git a/drivers/accel/qaic/mhi_qaic_ctrl.h b/drivers/accel/qaic/mhi_qaic_ctrl.h
new file mode 100644
index 000000000000..930b3ace1a59
--- /dev/null
+++ b/drivers/accel/qaic/mhi_qaic_ctrl.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef __MHI_QAIC_CTRL_H__
+#define __MHI_QAIC_CTRL_H__
+
+int mhi_qaic_ctrl_init(void);
+void mhi_qaic_ctrl_deinit(void);
+
+#endif /* __MHI_QAIC_CTRL_H__ */
diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h
new file mode 100644
index 000000000000..f2bd637a0d4e
--- /dev/null
+++ b/drivers/accel/qaic/qaic.h
@@ -0,0 +1,282 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ *
+ * Copyright (c) 2019-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _QAIC_H_
+#define _QAIC_H_
+
+#include <linux/interrupt.h>
+#include <linux/kref.h>
+#include <linux/mhi.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/srcu.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+#include <drm/drm_device.h>
+#include <drm/drm_gem.h>
+
+#define QAIC_DBC_BASE SZ_128K
+#define QAIC_DBC_SIZE SZ_4K
+
+#define QAIC_NO_PARTITION -1
+
+#define QAIC_DBC_OFF(i) ((i) * QAIC_DBC_SIZE + QAIC_DBC_BASE)
+
+#define to_qaic_bo(obj) container_of(obj, struct qaic_bo, base)
+
+extern bool datapath_polling;
+
+struct qaic_user {
+ /* Uniquely identifies this user for the device */
+ int handle;
+ struct kref ref_count;
+ /* Char device opened by this user */
+ struct qaic_drm_device *qddev;
+ /* Node in list of users that opened this drm device */
+ struct list_head node;
+ /* SRCU used to synchronize this user during cleanup */
+ struct srcu_struct qddev_lock;
+ atomic_t chunk_id;
+};
+
+struct dma_bridge_chan {
+ /* Pointer to device strcut maintained by driver */
+ struct qaic_device *qdev;
+ /* ID of this DMA bridge channel(DBC) */
+ unsigned int id;
+ /* Synchronizes access to xfer_list */
+ spinlock_t xfer_lock;
+ /* Base address of request queue */
+ void *req_q_base;
+ /* Base address of response queue */
+ void *rsp_q_base;
+ /*
+ * Base bus address of request queue. Response queue bus address can be
+ * calculated by adding request queue size to this variable
+ */
+ dma_addr_t dma_addr;
+ /* Total size of request and response queue in byte */
+ u32 total_size;
+ /* Capacity of request/response queue */
+ u32 nelem;
+ /* The user that opened this DBC */
+ struct qaic_user *usr;
+ /*
+ * Request ID of next memory handle that goes in request queue. One
+ * memory handle can enqueue more than one request elements, all
+ * this requests that belong to same memory handle have same request ID
+ */
+ u16 next_req_id;
+ /* true: DBC is in use; false: DBC not in use */
+ bool in_use;
+ /*
+ * Base address of device registers. Used to read/write request and
+ * response queue's head and tail pointer of this DBC.
+ */
+ void __iomem *dbc_base;
+ /* Head of list where each node is a memory handle queued in request queue */
+ struct list_head xfer_list;
+ /* Synchronizes DBC readers during cleanup */
+ struct srcu_struct ch_lock;
+ /*
+ * When this DBC is released, any thread waiting on this wait queue is
+ * woken up
+ */
+ wait_queue_head_t dbc_release;
+ /* Head of list where each node is a bo associated with this DBC */
+ struct list_head bo_lists;
+ /* The irq line for this DBC. Used for polling */
+ unsigned int irq;
+ /* Polling work item to simulate interrupts */
+ struct work_struct poll_work;
+};
+
+struct qaic_device {
+ /* Pointer to base PCI device struct of our physical device */
+ struct pci_dev *pdev;
+ /* Req. ID of request that will be queued next in MHI control device */
+ u32 next_seq_num;
+ /* Base address of bar 0 */
+ void __iomem *bar_0;
+ /* Base address of bar 2 */
+ void __iomem *bar_2;
+ /* Controller structure for MHI devices */
+ struct mhi_controller *mhi_cntrl;
+ /* MHI control channel device */
+ struct mhi_device *cntl_ch;
+ /* List of requests queued in MHI control device */
+ struct list_head cntl_xfer_list;
+ /* Synchronizes MHI control device transactions and its xfer list */
+ struct mutex cntl_mutex;
+ /* Array of DBC struct of this device */
+ struct dma_bridge_chan *dbc;
+ /* Work queue for tasks related to MHI control device */
+ struct workqueue_struct *cntl_wq;
+ /* Synchronizes all the users of device during cleanup */
+ struct srcu_struct dev_lock;
+ /* true: Device under reset; false: Device not under reset */
+ bool in_reset;
+ /*
+ * true: A tx MHI transaction has failed and a rx buffer is still queued
+ * in control device. Such a buffer is considered lost rx buffer
+ * false: No rx buffer is lost in control device
+ */
+ bool cntl_lost_buf;
+ /* Maximum number of DBC supported by this device */
+ u32 num_dbc;
+ /* Reference to the drm_device for this device when it is created */
+ struct qaic_drm_device *qddev;
+ /* Generate the CRC of a control message */
+ u32 (*gen_crc)(void *msg);
+ /* Validate the CRC of a control message */
+ bool (*valid_crc)(void *msg);
+};
+
+struct qaic_drm_device {
+ /* Pointer to the root device struct driven by this driver */
+ struct qaic_device *qdev;
+ /*
+ * The physical device can be partition in number of logical devices.
+ * And each logical device is given a partition id. This member stores
+ * that id. QAIC_NO_PARTITION is a sentinel used to mark that this drm
+ * device is the actual physical device
+ */
+ s32 partition_id;
+ /* Pointer to the drm device struct of this drm device */
+ struct drm_device *ddev;
+ /* Head in list of users who have opened this drm device */
+ struct list_head users;
+ /* Synchronizes access to users list */
+ struct mutex users_mutex;
+};
+
+struct qaic_bo {
+ struct drm_gem_object base;
+ /* Scatter/gather table for allocate/imported BO */
+ struct sg_table *sgt;
+ /* BO size requested by user. GEM object might be bigger in size. */
+ u64 size;
+ /* Head in list of slices of this BO */
+ struct list_head slices;
+ /* Total nents, for all slices of this BO */
+ int total_slice_nents;
+ /*
+ * Direction of transfer. It can assume only two value DMA_TO_DEVICE and
+ * DMA_FROM_DEVICE.
+ */
+ int dir;
+ /* The pointer of the DBC which operates on this BO */
+ struct dma_bridge_chan *dbc;
+ /* Number of slice that belongs to this buffer */
+ u32 nr_slice;
+ /* Number of slice that have been transferred by DMA engine */
+ u32 nr_slice_xfer_done;
+ /* true = BO is queued for execution, true = BO is not queued */
+ bool queued;
+ /*
+ * If true then user has attached slicing information to this BO by
+ * calling DRM_IOCTL_QAIC_ATTACH_SLICE_BO ioctl.
+ */
+ bool sliced;
+ /* Request ID of this BO if it is queued for execution */
+ u16 req_id;
+ /* Handle assigned to this BO */
+ u32 handle;
+ /* Wait on this for completion of DMA transfer of this BO */
+ struct completion xfer_done;
+ /*
+ * Node in linked list where head is dbc->xfer_list.
+ * This link list contain BO's that are queued for DMA transfer.
+ */
+ struct list_head xfer_list;
+ /*
+ * Node in linked list where head is dbc->bo_lists.
+ * This link list contain BO's that are associated with the DBC it is
+ * linked to.
+ */
+ struct list_head bo_list;
+ struct {
+ /*
+ * Latest timestamp(ns) at which kernel received a request to
+ * execute this BO
+ */
+ u64 req_received_ts;
+ /*
+ * Latest timestamp(ns) at which kernel enqueued requests of
+ * this BO for execution in DMA queue
+ */
+ u64 req_submit_ts;
+ /*
+ * Latest timestamp(ns) at which kernel received a completion
+ * interrupt for requests of this BO
+ */
+ u64 req_processed_ts;
+ /*
+ * Number of elements already enqueued in DMA queue before
+ * enqueuing requests of this BO
+ */
+ u32 queue_level_before;
+ } perf_stats;
+
+};
+
+struct bo_slice {
+ /* Mapped pages */
+ struct sg_table *sgt;
+ /* Number of requests required to queue in DMA queue */
+ int nents;
+ /* See enum dma_data_direction */
+ int dir;
+ /* Actual requests that will be copied in DMA queue */
+ struct dbc_req *reqs;
+ struct kref ref_count;
+ /* true: No DMA transfer required */
+ bool no_xfer;
+ /* Pointer to the parent BO handle */
+ struct qaic_bo *bo;
+ /* Node in list of slices maintained by parent BO */
+ struct list_head slice;
+ /* Size of this slice in bytes */
+ u64 size;
+ /* Offset of this slice in buffer */
+ u64 offset;
+};
+
+int get_dbc_req_elem_size(void);
+int get_dbc_rsp_elem_size(void);
+int get_cntl_version(struct qaic_device *qdev, struct qaic_user *usr, u16 *major, u16 *minor);
+int qaic_manage_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv);
+void qaic_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result);
+
+void qaic_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result);
+
+int qaic_control_open(struct qaic_device *qdev);
+void qaic_control_close(struct qaic_device *qdev);
+void qaic_release_usr(struct qaic_device *qdev, struct qaic_user *usr);
+
+irqreturn_t dbc_irq_threaded_fn(int irq, void *data);
+irqreturn_t dbc_irq_handler(int irq, void *data);
+int disable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr);
+void enable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr);
+void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id);
+void release_dbc(struct qaic_device *qdev, u32 dbc_id);
+
+void wake_all_cntl(struct qaic_device *qdev);
+void qaic_dev_reset_clean_local_state(struct qaic_device *qdev, bool exit_reset);
+
+struct drm_gem_object *qaic_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf);
+
+int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv);
+int qaic_mmap_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv);
+int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv);
+int qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv);
+int qaic_partial_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv);
+int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv);
+int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv);
+void irq_polling_work(struct work_struct *work);
+
+#endif /* _QAIC_H_ */
diff --git a/drivers/accel/qaic/qaic_control.c b/drivers/accel/qaic/qaic_control.c
new file mode 100644
index 000000000000..9f216eb6f76e
--- /dev/null
+++ b/drivers/accel/qaic/qaic_control.c
@@ -0,0 +1,1526 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. */
+/* Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. */
+
+#include <asm/byteorder.h>
+#include <linux/completion.h>
+#include <linux/crc32.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/mhi.h>
+#include <linux/mm.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/scatterlist.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/workqueue.h>
+#include <linux/wait.h>
+#include <drm/drm_device.h>
+#include <drm/drm_file.h>
+#include <uapi/drm/qaic_accel.h>
+
+#include "qaic.h"
+
+#define MANAGE_MAGIC_NUMBER ((__force __le32)0x43494151) /* "QAIC" in little endian */
+#define QAIC_DBC_Q_GAP SZ_256
+#define QAIC_DBC_Q_BUF_ALIGN SZ_4K
+#define QAIC_MANAGE_EXT_MSG_LENGTH SZ_64K /* Max DMA message length */
+#define QAIC_WRAPPER_MAX_SIZE SZ_4K
+#define QAIC_MHI_RETRY_WAIT_MS 100
+#define QAIC_MHI_RETRY_MAX 20
+
+static unsigned int control_resp_timeout_s = 60; /* 60 sec default */
+module_param(control_resp_timeout_s, uint, 0600);
+MODULE_PARM_DESC(control_resp_timeout_s, "Timeout for NNC responses from QSM");
+
+struct manage_msg {
+ u32 len;
+ u32 count;
+ u8 data[];
+};
+
+/*
+ * wire encoding structures for the manage protocol.
+ * All fields are little endian on the wire
+ */
+struct wire_msg_hdr {
+ __le32 crc32; /* crc of everything following this field in the message */
+ __le32 magic_number;
+ __le32 sequence_number;
+ __le32 len; /* length of this message */
+ __le32 count; /* number of transactions in this message */
+ __le32 handle; /* unique id to track the resources consumed */
+ __le32 partition_id; /* partition id for the request (signed) */
+ __le32 padding; /* must be 0 */
+} __packed;
+
+struct wire_msg {
+ struct wire_msg_hdr hdr;
+ u8 data[];
+} __packed;
+
+struct wire_trans_hdr {
+ __le32 type;
+ __le32 len;
+} __packed;
+
+/* Each message sent from driver to device are organized in a list of wrapper_msg */
+struct wrapper_msg {
+ struct list_head list;
+ struct kref ref_count;
+ u32 len; /* length of data to transfer */
+ struct wrapper_list *head;
+ union {
+ struct wire_msg msg;
+ struct wire_trans_hdr trans;
+ };
+};
+
+struct wrapper_list {
+ struct list_head list;
+ spinlock_t lock; /* Protects the list state during additions and removals */
+};
+
+struct wire_trans_passthrough {
+ struct wire_trans_hdr hdr;
+ u8 data[];
+} __packed;
+
+struct wire_addr_size_pair {
+ __le64 addr;
+ __le64 size;
+} __packed;
+
+struct wire_trans_dma_xfer {
+ struct wire_trans_hdr hdr;
+ __le32 tag;
+ __le32 count;
+ __le32 dma_chunk_id;
+ __le32 padding;
+ struct wire_addr_size_pair data[];
+} __packed;
+
+/* Initiated by device to continue the DMA xfer of a large piece of data */
+struct wire_trans_dma_xfer_cont {
+ struct wire_trans_hdr hdr;
+ __le32 dma_chunk_id;
+ __le32 padding;
+ __le64 xferred_size;
+} __packed;
+
+struct wire_trans_activate_to_dev {
+ struct wire_trans_hdr hdr;
+ __le64 req_q_addr;
+ __le64 rsp_q_addr;
+ __le32 req_q_size;
+ __le32 rsp_q_size;
+ __le32 buf_len;
+ __le32 options; /* unused, but BIT(16) has meaning to the device */
+} __packed;
+
+struct wire_trans_activate_from_dev {
+ struct wire_trans_hdr hdr;
+ __le32 status;
+ __le32 dbc_id;
+ __le64 options; /* unused */
+} __packed;
+
+struct wire_trans_deactivate_from_dev {
+ struct wire_trans_hdr hdr;
+ __le32 status;
+ __le32 dbc_id;
+} __packed;
+
+struct wire_trans_terminate_to_dev {
+ struct wire_trans_hdr hdr;
+ __le32 handle;
+ __le32 padding;
+} __packed;
+
+struct wire_trans_terminate_from_dev {
+ struct wire_trans_hdr hdr;
+ __le32 status;
+ __le32 padding;
+} __packed;
+
+struct wire_trans_status_to_dev {
+ struct wire_trans_hdr hdr;
+} __packed;
+
+struct wire_trans_status_from_dev {
+ struct wire_trans_hdr hdr;
+ __le16 major;
+ __le16 minor;
+ __le32 status;
+ __le64 status_flags;
+} __packed;
+
+struct wire_trans_validate_part_to_dev {
+ struct wire_trans_hdr hdr;
+ __le32 part_id;
+ __le32 padding;
+} __packed;
+
+struct wire_trans_validate_part_from_dev {
+ struct wire_trans_hdr hdr;
+ __le32 status;
+ __le32 padding;
+} __packed;
+
+struct xfer_queue_elem {
+ /*
+ * Node in list of ongoing transfer request on control channel.
+ * Maintained by root device struct.
+ */
+ struct list_head list;
+ /* Sequence number of this transfer request */
+ u32 seq_num;
+ /* This is used to wait on until completion of transfer request */
+ struct completion xfer_done;
+ /* Received data from device */
+ void *buf;
+};
+
+struct dma_xfer {
+ /* Node in list of DMA transfers which is used for cleanup */
+ struct list_head list;
+ /* SG table of memory used for DMA */
+ struct sg_table *sgt;
+ /* Array pages used for DMA */
+ struct page **page_list;
+ /* Number of pages used for DMA */
+ unsigned long nr_pages;
+};
+
+struct ioctl_resources {
+ /* List of all DMA transfers which is used later for cleanup */
+ struct list_head dma_xfers;
+ /* Base address of request queue which belongs to a DBC */
+ void *buf;
+ /*
+ * Base bus address of request queue which belongs to a DBC. Response
+ * queue base bus address can be calculated by adding size of request
+ * queue to base bus address of request queue.
+ */
+ dma_addr_t dma_addr;
+ /* Total size of request queue and response queue in byte */
+ u32 total_size;
+ /* Total number of elements that can be queued in each of request and response queue */
+ u32 nelem;
+ /* Base address of response queue which belongs to a DBC */
+ void *rsp_q_base;
+ /* Status of the NNC message received */
+ u32 status;
+ /* DBC id of the DBC received from device */
+ u32 dbc_id;
+ /*
+ * DMA transfer request messages can be big in size and it may not be
+ * possible to send them in one shot. In such cases the messages are
+ * broken into chunks, this field stores ID of such chunks.
+ */
+ u32 dma_chunk_id;
+ /* Total number of bytes transferred for a DMA xfer request */
+ u64 xferred_dma_size;
+ /* Header of transaction message received from user. Used during DMA xfer request. */
+ void *trans_hdr;
+};
+
+struct resp_work {
+ struct work_struct work;
+ struct qaic_device *qdev;
+ void *buf;
+};
+
+/*
+ * Since we're working with little endian messages, its useful to be able to
+ * increment without filling a whole line with conversions back and forth just
+ * to add one(1) to a message count.
+ */
+static __le32 incr_le32(__le32 val)
+{
+ return cpu_to_le32(le32_to_cpu(val) + 1);
+}
+
+static u32 gen_crc(void *msg)
+{
+ struct wrapper_list *wrappers = msg;
+ struct wrapper_msg *w;
+ u32 crc = ~0;
+
+ list_for_each_entry(w, &wrappers->list, list)
+ crc = crc32(crc, &w->msg, w->len);
+
+ return crc ^ ~0;
+}
+
+static u32 gen_crc_stub(void *msg)
+{
+ return 0;
+}
+
+static bool valid_crc(void *msg)
+{
+ struct wire_msg_hdr *hdr = msg;
+ bool ret;
+ u32 crc;
+
+ /*
+ * The output of this algorithm is always converted to the native
+ * endianness.
+ */
+ crc = le32_to_cpu(hdr->crc32);
+ hdr->crc32 = 0;
+ ret = (crc32(~0, msg, le32_to_cpu(hdr->len)) ^ ~0) == crc;
+ hdr->crc32 = cpu_to_le32(crc);
+ return ret;
+}
+
+static bool valid_crc_stub(void *msg)
+{
+ return true;
+}
+
+static void free_wrapper(struct kref *ref)
+{
+ struct wrapper_msg *wrapper = container_of(ref, struct wrapper_msg, ref_count);
+
+ list_del(&wrapper->list);
+ kfree(wrapper);
+}
+
+static void save_dbc_buf(struct qaic_device *qdev, struct ioctl_resources *resources,
+ struct qaic_user *usr)
+{
+ u32 dbc_id = resources->dbc_id;
+
+ if (resources->buf) {
+ wait_event_interruptible(qdev->dbc[dbc_id].dbc_release, !qdev->dbc[dbc_id].in_use);
+ qdev->dbc[dbc_id].req_q_base = resources->buf;
+ qdev->dbc[dbc_id].rsp_q_base = resources->rsp_q_base;
+ qdev->dbc[dbc_id].dma_addr = resources->dma_addr;
+ qdev->dbc[dbc_id].total_size = resources->total_size;
+ qdev->dbc[dbc_id].nelem = resources->nelem;
+ enable_dbc(qdev, dbc_id, usr);
+ qdev->dbc[dbc_id].in_use = true;
+ resources->buf = NULL;
+ }
+}
+
+static void free_dbc_buf(struct qaic_device *qdev, struct ioctl_resources *resources)
+{
+ if (resources->buf)
+ dma_free_coherent(&qdev->pdev->dev, resources->total_size, resources->buf,
+ resources->dma_addr);
+ resources->buf = NULL;
+}
+
+static void free_dma_xfers(struct qaic_device *qdev, struct ioctl_resources *resources)
+{
+ struct dma_xfer *xfer;
+ struct dma_xfer *x;
+ int i;
+
+ list_for_each_entry_safe(xfer, x, &resources->dma_xfers, list) {
+ dma_unmap_sgtable(&qdev->pdev->dev, xfer->sgt, DMA_TO_DEVICE, 0);
+ sg_free_table(xfer->sgt);
+ kfree(xfer->sgt);
+ for (i = 0; i < xfer->nr_pages; ++i)
+ put_page(xfer->page_list[i]);
+ kfree(xfer->page_list);
+ list_del(&xfer->list);
+ kfree(xfer);
+ }
+}
+
+static struct wrapper_msg *add_wrapper(struct wrapper_list *wrappers, u32 size)
+{
+ struct wrapper_msg *w = kzalloc(size, GFP_KERNEL);
+
+ if (!w)
+ return NULL;
+ list_add_tail(&w->list, &wrappers->list);
+ kref_init(&w->ref_count);
+ w->head = wrappers;
+ return w;
+}
+
+static int encode_passthrough(struct qaic_device *qdev, void *trans, struct wrapper_list *wrappers,
+ u32 *user_len)
+{
+ struct qaic_manage_trans_passthrough *in_trans = trans;
+ struct wire_trans_passthrough *out_trans;
+ struct wrapper_msg *trans_wrapper;
+ struct wrapper_msg *wrapper;
+ struct wire_msg *msg;
+ u32 msg_hdr_len;
+
+ wrapper = list_first_entry(&wrappers->list, struct wrapper_msg, list);
+ msg = &wrapper->msg;
+ msg_hdr_len = le32_to_cpu(msg->hdr.len);
+
+ if (in_trans->hdr.len % 8 != 0)
+ return -EINVAL;
+
+ if (msg_hdr_len + in_trans->hdr.len > QAIC_MANAGE_EXT_MSG_LENGTH)
+ return -ENOSPC;
+
+ trans_wrapper = add_wrapper(wrappers,
+ offsetof(struct wrapper_msg, trans) + in_trans->hdr.len);
+ if (!trans_wrapper)
+ return -ENOMEM;
+ trans_wrapper->len = in_trans->hdr.len;
+ out_trans = (struct wire_trans_passthrough *)&trans_wrapper->trans;
+
+ memcpy(out_trans->data, in_trans->data, in_trans->hdr.len - sizeof(in_trans->hdr));
+ msg->hdr.len = cpu_to_le32(msg_hdr_len + in_trans->hdr.len);
+ msg->hdr.count = incr_le32(msg->hdr.count);
+ *user_len += in_trans->hdr.len;
+ out_trans->hdr.type = cpu_to_le32(QAIC_TRANS_PASSTHROUGH_TO_DEV);
+ out_trans->hdr.len = cpu_to_le32(in_trans->hdr.len);
+
+ return 0;
+}
+
+/* returns error code for failure, 0 if enough pages alloc'd, 1 if dma_cont is needed */
+static int find_and_map_user_pages(struct qaic_device *qdev,
+ struct qaic_manage_trans_dma_xfer *in_trans,
+ struct ioctl_resources *resources, struct dma_xfer *xfer)
+{
+ unsigned long need_pages;
+ struct page **page_list;
+ unsigned long nr_pages;
+ struct sg_table *sgt;
+ u64 xfer_start_addr;
+ int ret;
+ int i;
+
+ xfer_start_addr = in_trans->addr + resources->xferred_dma_size;
+
+ need_pages = DIV_ROUND_UP(in_trans->size + offset_in_page(xfer_start_addr) -
+ resources->xferred_dma_size, PAGE_SIZE);
+
+ nr_pages = need_pages;
+
+ while (1) {
+ page_list = kmalloc_array(nr_pages, sizeof(*page_list), GFP_KERNEL | __GFP_NOWARN);
+ if (!page_list) {
+ nr_pages = nr_pages / 2;
+ if (!nr_pages)
+ return -ENOMEM;
+ } else {
+ break;
+ }
+ }
+
+ ret = get_user_pages_fast(xfer_start_addr, nr_pages, 0, page_list);
+ if (ret < 0 || ret != nr_pages) {
+ ret = -EFAULT;
+ goto free_page_list;
+ }
+
+ sgt = kmalloc(sizeof(*sgt), GFP_KERNEL);
+ if (!sgt) {
+ ret = -ENOMEM;
+ goto put_pages;
+ }
+
+ ret = sg_alloc_table_from_pages(sgt, page_list, nr_pages,
+ offset_in_page(xfer_start_addr),
+ in_trans->size - resources->xferred_dma_size, GFP_KERNEL);
+ if (ret) {
+ ret = -ENOMEM;
+ goto free_sgt;
+ }
+
+ ret = dma_map_sgtable(&qdev->pdev->dev, sgt, DMA_TO_DEVICE, 0);
+ if (ret)
+ goto free_table;
+
+ xfer->sgt = sgt;
+ xfer->page_list = page_list;
+ xfer->nr_pages = nr_pages;
+
+ return need_pages > nr_pages ? 1 : 0;
+
+free_table:
+ sg_free_table(sgt);
+free_sgt:
+ kfree(sgt);
+put_pages:
+ for (i = 0; i < nr_pages; ++i)
+ put_page(page_list[i]);
+free_page_list:
+ kfree(page_list);
+ return ret;
+}
+
+/* returns error code for failure, 0 if everything was encoded, 1 if dma_cont is needed */
+static int encode_addr_size_pairs(struct dma_xfer *xfer, struct wrapper_list *wrappers,
+ struct ioctl_resources *resources, u32 msg_hdr_len, u32 *size,
+ struct wire_trans_dma_xfer **out_trans)
+{
+ struct wrapper_msg *trans_wrapper;
+ struct sg_table *sgt = xfer->sgt;
+ struct wire_addr_size_pair *asp;
+ struct scatterlist *sg;
+ struct wrapper_msg *w;
+ unsigned int dma_len;
+ u64 dma_chunk_len;
+ void *boundary;
+ int nents_dma;
+ int nents;
+ int i;
+
+ nents = sgt->nents;
+ nents_dma = nents;
+ *size = QAIC_MANAGE_EXT_MSG_LENGTH - msg_hdr_len - sizeof(**out_trans);
+ for_each_sgtable_sg(sgt, sg, i) {
+ *size -= sizeof(*asp);
+ /* Save 1K for possible follow-up transactions. */
+ if (*size < SZ_1K) {
+ nents_dma = i;
+ break;
+ }
+ }
+
+ trans_wrapper = add_wrapper(wrappers, QAIC_WRAPPER_MAX_SIZE);
+ if (!trans_wrapper)
+ return -ENOMEM;
+ *out_trans = (struct wire_trans_dma_xfer *)&trans_wrapper->trans;
+
+ asp = (*out_trans)->data;
+ boundary = (void *)trans_wrapper + QAIC_WRAPPER_MAX_SIZE;
+ *size = 0;
+
+ dma_len = 0;
+ w = trans_wrapper;
+ dma_chunk_len = 0;
+ for_each_sg(sgt->sgl, sg, nents_dma, i) {
+ asp->size = cpu_to_le64(dma_len);
+ dma_chunk_len += dma_len;
+ if (dma_len) {
+ asp++;
+ if ((void *)asp + sizeof(*asp) > boundary) {
+ w->len = (void *)asp - (void *)&w->msg;
+ *size += w->len;
+ w = add_wrapper(wrappers, QAIC_WRAPPER_MAX_SIZE);
+ if (!w)
+ return -ENOMEM;
+ boundary = (void *)w + QAIC_WRAPPER_MAX_SIZE;
+ asp = (struct wire_addr_size_pair *)&w->msg;
+ }
+ }
+ asp->addr = cpu_to_le64(sg_dma_address(sg));
+ dma_len = sg_dma_len(sg);
+ }
+ /* finalize the last segment */
+ asp->size = cpu_to_le64(dma_len);
+ w->len = (void *)asp + sizeof(*asp) - (void *)&w->msg;
+ *size += w->len;
+ dma_chunk_len += dma_len;
+ resources->xferred_dma_size += dma_chunk_len;
+
+ return nents_dma < nents ? 1 : 0;
+}
+
+static void cleanup_xfer(struct qaic_device *qdev, struct dma_xfer *xfer)
+{
+ int i;
+
+ dma_unmap_sgtable(&qdev->pdev->dev, xfer->sgt, DMA_TO_DEVICE, 0);
+ sg_free_table(xfer->sgt);
+ kfree(xfer->sgt);
+ for (i = 0; i < xfer->nr_pages; ++i)
+ put_page(xfer->page_list[i]);
+ kfree(xfer->page_list);
+}
+
+static int encode_dma(struct qaic_device *qdev, void *trans, struct wrapper_list *wrappers,
+ u32 *user_len, struct ioctl_resources *resources, struct qaic_user *usr)
+{
+ struct qaic_manage_trans_dma_xfer *in_trans = trans;
+ struct wire_trans_dma_xfer *out_trans;
+ struct wrapper_msg *wrapper;
+ struct dma_xfer *xfer;
+ struct wire_msg *msg;
+ bool need_cont_dma;
+ u32 msg_hdr_len;
+ u32 size;
+ int ret;
+
+ wrapper = list_first_entry(&wrappers->list, struct wrapper_msg, list);
+ msg = &wrapper->msg;
+ msg_hdr_len = le32_to_cpu(msg->hdr.len);
+
+ if (msg_hdr_len > (UINT_MAX - QAIC_MANAGE_EXT_MSG_LENGTH))
+ return -EINVAL;
+
+ /* There should be enough space to hold at least one ASP entry. */
+ if (msg_hdr_len + sizeof(*out_trans) + sizeof(struct wire_addr_size_pair) >
+ QAIC_MANAGE_EXT_MSG_LENGTH)
+ return -ENOMEM;
+
+ if (in_trans->addr + in_trans->size < in_trans->addr || !in_trans->size)
+ return -EINVAL;
+
+ xfer = kmalloc(sizeof(*xfer), GFP_KERNEL);
+ if (!xfer)
+ return -ENOMEM;
+
+ ret = find_and_map_user_pages(qdev, in_trans, resources, xfer);
+ if (ret < 0)
+ goto free_xfer;
+
+ need_cont_dma = (bool)ret;
+
+ ret = encode_addr_size_pairs(xfer, wrappers, resources, msg_hdr_len, &size, &out_trans);
+ if (ret < 0)
+ goto cleanup_xfer;
+
+ need_cont_dma = need_cont_dma || (bool)ret;
+
+ msg->hdr.len = cpu_to_le32(msg_hdr_len + size);
+ msg->hdr.count = incr_le32(msg->hdr.count);
+
+ out_trans->hdr.type = cpu_to_le32(QAIC_TRANS_DMA_XFER_TO_DEV);
+ out_trans->hdr.len = cpu_to_le32(size);
+ out_trans->tag = cpu_to_le32(in_trans->tag);
+ out_trans->count = cpu_to_le32((size - sizeof(*out_trans)) /
+ sizeof(struct wire_addr_size_pair));
+
+ *user_len += in_trans->hdr.len;
+
+ if (resources->dma_chunk_id) {
+ out_trans->dma_chunk_id = cpu_to_le32(resources->dma_chunk_id);
+ } else if (need_cont_dma) {
+ while (resources->dma_chunk_id == 0)
+ resources->dma_chunk_id = atomic_inc_return(&usr->chunk_id);
+
+ out_trans->dma_chunk_id = cpu_to_le32(resources->dma_chunk_id);
+ }
+ resources->trans_hdr = trans;
+
+ list_add(&xfer->list, &resources->dma_xfers);
+ return 0;
+
+cleanup_xfer:
+ cleanup_xfer(qdev, xfer);
+free_xfer:
+ kfree(xfer);
+ return ret;
+}
+
+static int encode_activate(struct qaic_device *qdev, void *trans, struct wrapper_list *wrappers,
+ u32 *user_len, struct ioctl_resources *resources)
+{
+ struct qaic_manage_trans_activate_to_dev *in_trans = trans;
+ struct wire_trans_activate_to_dev *out_trans;
+ struct wrapper_msg *trans_wrapper;
+ struct wrapper_msg *wrapper;
+ struct wire_msg *msg;
+ dma_addr_t dma_addr;
+ u32 msg_hdr_len;
+ void *buf;
+ u32 nelem;
+ u32 size;
+ int ret;
+
+ wrapper = list_first_entry(&wrappers->list, struct wrapper_msg, list);
+ msg = &wrapper->msg;
+ msg_hdr_len = le32_to_cpu(msg->hdr.len);
+
+ if (msg_hdr_len + sizeof(*out_trans) > QAIC_MANAGE_MAX_MSG_LENGTH)
+ return -ENOSPC;
+
+ if (!in_trans->queue_size)
+ return -EINVAL;
+
+ if (in_trans->pad)
+ return -EINVAL;
+
+ nelem = in_trans->queue_size;
+ size = (get_dbc_req_elem_size() + get_dbc_rsp_elem_size()) * nelem;
+ if (size / nelem != get_dbc_req_elem_size() + get_dbc_rsp_elem_size())
+ return -EINVAL;
+
+ if (size + QAIC_DBC_Q_GAP + QAIC_DBC_Q_BUF_ALIGN < size)
+ return -EINVAL;
+
+ size = ALIGN((size + QAIC_DBC_Q_GAP), QAIC_DBC_Q_BUF_ALIGN);
+
+ buf = dma_alloc_coherent(&qdev->pdev->dev, size, &dma_addr, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ trans_wrapper = add_wrapper(wrappers,
+ offsetof(struct wrapper_msg, trans) + sizeof(*out_trans));
+ if (!trans_wrapper) {
+ ret = -ENOMEM;
+ goto free_dma;
+ }
+ trans_wrapper->len = sizeof(*out_trans);
+ out_trans = (struct wire_trans_activate_to_dev *)&trans_wrapper->trans;
+
+ out_trans->hdr.type = cpu_to_le32(QAIC_TRANS_ACTIVATE_TO_DEV);
+ out_trans->hdr.len = cpu_to_le32(sizeof(*out_trans));
+ out_trans->buf_len = cpu_to_le32(size);
+ out_trans->req_q_addr = cpu_to_le64(dma_addr);
+ out_trans->req_q_size = cpu_to_le32(nelem);
+ out_trans->rsp_q_addr = cpu_to_le64(dma_addr + size - nelem * get_dbc_rsp_elem_size());
+ out_trans->rsp_q_size = cpu_to_le32(nelem);
+ out_trans->options = cpu_to_le32(in_trans->options);
+
+ *user_len += in_trans->hdr.len;
+ msg->hdr.len = cpu_to_le32(msg_hdr_len + sizeof(*out_trans));
+ msg->hdr.count = incr_le32(msg->hdr.count);
+
+ resources->buf = buf;
+ resources->dma_addr = dma_addr;
+ resources->total_size = size;
+ resources->nelem = nelem;
+ resources->rsp_q_base = buf + size - nelem * get_dbc_rsp_elem_size();
+ return 0;
+
+free_dma:
+ dma_free_coherent(&qdev->pdev->dev, size, buf, dma_addr);
+ return ret;
+}
+
+static int encode_deactivate(struct qaic_device *qdev, void *trans,
+ u32 *user_len, struct qaic_user *usr)
+{
+ struct qaic_manage_trans_deactivate *in_trans = trans;
+
+ if (in_trans->dbc_id >= qdev->num_dbc || in_trans->pad)
+ return -EINVAL;
+
+ *user_len += in_trans->hdr.len;
+
+ return disable_dbc(qdev, in_trans->dbc_id, usr);
+}
+
+static int encode_status(struct qaic_device *qdev, void *trans, struct wrapper_list *wrappers,
+ u32 *user_len)
+{
+ struct qaic_manage_trans_status_to_dev *in_trans = trans;
+ struct wire_trans_status_to_dev *out_trans;
+ struct wrapper_msg *trans_wrapper;
+ struct wrapper_msg *wrapper;
+ struct wire_msg *msg;
+ u32 msg_hdr_len;
+
+ wrapper = list_first_entry(&wrappers->list, struct wrapper_msg, list);
+ msg = &wrapper->msg;
+ msg_hdr_len = le32_to_cpu(msg->hdr.len);
+
+ if (msg_hdr_len + in_trans->hdr.len > QAIC_MANAGE_MAX_MSG_LENGTH)
+ return -ENOSPC;
+
+ trans_wrapper = add_wrapper(wrappers, sizeof(*trans_wrapper));
+ if (!trans_wrapper)
+ return -ENOMEM;
+
+ trans_wrapper->len = sizeof(*out_trans);
+ out_trans = (struct wire_trans_status_to_dev *)&trans_wrapper->trans;
+
+ out_trans->hdr.type = cpu_to_le32(QAIC_TRANS_STATUS_TO_DEV);
+ out_trans->hdr.len = cpu_to_le32(in_trans->hdr.len);
+ msg->hdr.len = cpu_to_le32(msg_hdr_len + in_trans->hdr.len);
+ msg->hdr.count = incr_le32(msg->hdr.count);
+ *user_len += in_trans->hdr.len;
+
+ return 0;
+}
+
+static int encode_message(struct qaic_device *qdev, struct manage_msg *user_msg,
+ struct wrapper_list *wrappers, struct ioctl_resources *resources,
+ struct qaic_user *usr)
+{
+ struct qaic_manage_trans_hdr *trans_hdr;
+ struct wrapper_msg *wrapper;
+ struct wire_msg *msg;
+ u32 user_len = 0;
+ int ret;
+ int i;
+
+ if (!user_msg->count) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ wrapper = list_first_entry(&wrappers->list, struct wrapper_msg, list);
+ msg = &wrapper->msg;
+
+ msg->hdr.len = cpu_to_le32(sizeof(msg->hdr));
+
+ if (resources->dma_chunk_id) {
+ ret = encode_dma(qdev, resources->trans_hdr, wrappers, &user_len, resources, usr);
+ msg->hdr.count = cpu_to_le32(1);
+ goto out;
+ }
+
+ for (i = 0; i < user_msg->count; ++i) {
+ if (user_len >= user_msg->len) {
+ ret = -EINVAL;
+ break;
+ }
+ trans_hdr = (struct qaic_manage_trans_hdr *)(user_msg->data + user_len);
+ if (user_len + trans_hdr->len > user_msg->len) {
+ ret = -EINVAL;
+ break;
+ }
+
+ switch (trans_hdr->type) {
+ case QAIC_TRANS_PASSTHROUGH_FROM_USR:
+ ret = encode_passthrough(qdev, trans_hdr, wrappers, &user_len);
+ break;
+ case QAIC_TRANS_DMA_XFER_FROM_USR:
+ ret = encode_dma(qdev, trans_hdr, wrappers, &user_len, resources, usr);
+ break;
+ case QAIC_TRANS_ACTIVATE_FROM_USR:
+ ret = encode_activate(qdev, trans_hdr, wrappers, &user_len, resources);
+ break;
+ case QAIC_TRANS_DEACTIVATE_FROM_USR:
+ ret = encode_deactivate(qdev, trans_hdr, &user_len, usr);
+ break;
+ case QAIC_TRANS_STATUS_FROM_USR:
+ ret = encode_status(qdev, trans_hdr, wrappers, &user_len);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ if (ret)
+ break;
+ }
+
+ if (user_len != user_msg->len)
+ ret = -EINVAL;
+out:
+ if (ret) {
+ free_dma_xfers(qdev, resources);
+ free_dbc_buf(qdev, resources);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int decode_passthrough(struct qaic_device *qdev, void *trans, struct manage_msg *user_msg,
+ u32 *msg_len)
+{
+ struct qaic_manage_trans_passthrough *out_trans;
+ struct wire_trans_passthrough *in_trans = trans;
+ u32 len;
+
+ out_trans = (void *)user_msg->data + user_msg->len;
+
+ len = le32_to_cpu(in_trans->hdr.len);
+ if (len % 8 != 0)
+ return -EINVAL;
+
+ if (user_msg->len + len > QAIC_MANAGE_MAX_MSG_LENGTH)
+ return -ENOSPC;
+
+ memcpy(out_trans->data, in_trans->data, len - sizeof(in_trans->hdr));
+ user_msg->len += len;
+ *msg_len += len;
+ out_trans->hdr.type = le32_to_cpu(in_trans->hdr.type);
+ out_trans->hdr.len = len;
+
+ return 0;
+}
+
+static int decode_activate(struct qaic_device *qdev, void *trans, struct manage_msg *user_msg,
+ u32 *msg_len, struct ioctl_resources *resources, struct qaic_user *usr)
+{
+ struct qaic_manage_trans_activate_from_dev *out_trans;
+ struct wire_trans_activate_from_dev *in_trans = trans;
+ u32 len;
+
+ out_trans = (void *)user_msg->data + user_msg->len;
+
+ len = le32_to_cpu(in_trans->hdr.len);
+ if (user_msg->len + len > QAIC_MANAGE_MAX_MSG_LENGTH)
+ return -ENOSPC;
+
+ user_msg->len += len;
+ *msg_len += len;
+ out_trans->hdr.type = le32_to_cpu(in_trans->hdr.type);
+ out_trans->hdr.len = len;
+ out_trans->status = le32_to_cpu(in_trans->status);
+ out_trans->dbc_id = le32_to_cpu(in_trans->dbc_id);
+ out_trans->options = le64_to_cpu(in_trans->options);
+
+ if (!resources->buf)
+ /* how did we get an activate response without a request? */
+ return -EINVAL;
+
+ if (out_trans->dbc_id >= qdev->num_dbc)
+ /*
+ * The device assigned an invalid resource, which should never
+ * happen. Return an error so the user can try to recover.
+ */
+ return -ENODEV;
+
+ if (out_trans->status)
+ /*
+ * Allocating resources failed on device side. This is not an
+ * expected behaviour, user is expected to handle this situation.
+ */
+ return -ECANCELED;
+
+ resources->status = out_trans->status;
+ resources->dbc_id = out_trans->dbc_id;
+ save_dbc_buf(qdev, resources, usr);
+
+ return 0;
+}
+
+static int decode_deactivate(struct qaic_device *qdev, void *trans, u32 *msg_len,
+ struct qaic_user *usr)
+{
+ struct wire_trans_deactivate_from_dev *in_trans = trans;
+ u32 dbc_id = le32_to_cpu(in_trans->dbc_id);
+ u32 status = le32_to_cpu(in_trans->status);
+
+ if (dbc_id >= qdev->num_dbc)
+ /*
+ * The device assigned an invalid resource, which should never
+ * happen. Inject an error so the user can try to recover.
+ */
+ return -ENODEV;
+
+ if (status) {
+ /*
+ * Releasing resources failed on the device side, which puts
+ * us in a bind since they may still be in use, so enable the
+ * dbc. User is expected to retry deactivation.
+ */
+ enable_dbc(qdev, dbc_id, usr);
+ return -ECANCELED;
+ }
+
+ release_dbc(qdev, dbc_id);
+ *msg_len += sizeof(*in_trans);
+
+ return 0;
+}
+
+static int decode_status(struct qaic_device *qdev, void *trans, struct manage_msg *user_msg,
+ u32 *user_len, struct wire_msg *msg)
+{
+ struct qaic_manage_trans_status_from_dev *out_trans;
+ struct wire_trans_status_from_dev *in_trans = trans;
+ u32 len;
+
+ out_trans = (void *)user_msg->data + user_msg->len;
+
+ len = le32_to_cpu(in_trans->hdr.len);
+ if (user_msg->len + len > QAIC_MANAGE_MAX_MSG_LENGTH)
+ return -ENOSPC;
+
+ out_trans->hdr.type = QAIC_TRANS_STATUS_FROM_DEV;
+ out_trans->hdr.len = len;
+ out_trans->major = le16_to_cpu(in_trans->major);
+ out_trans->minor = le16_to_cpu(in_trans->minor);
+ out_trans->status_flags = le64_to_cpu(in_trans->status_flags);
+ out_trans->status = le32_to_cpu(in_trans->status);
+ *user_len += le32_to_cpu(in_trans->hdr.len);
+ user_msg->len += len;
+
+ if (out_trans->status)
+ return -ECANCELED;
+ if (out_trans->status_flags & BIT(0) && !valid_crc(msg))
+ return -EPIPE;
+
+ return 0;
+}
+
+static int decode_message(struct qaic_device *qdev, struct manage_msg *user_msg,
+ struct wire_msg *msg, struct ioctl_resources *resources,
+ struct qaic_user *usr)
+{
+ u32 msg_hdr_len = le32_to_cpu(msg->hdr.len);
+ struct wire_trans_hdr *trans_hdr;
+ u32 msg_len = 0;
+ int ret;
+ int i;
+
+ if (msg_hdr_len > QAIC_MANAGE_MAX_MSG_LENGTH)
+ return -EINVAL;
+
+ user_msg->len = 0;
+ user_msg->count = le32_to_cpu(msg->hdr.count);
+
+ for (i = 0; i < user_msg->count; ++i) {
+ trans_hdr = (struct wire_trans_hdr *)(msg->data + msg_len);
+ if (msg_len + le32_to_cpu(trans_hdr->len) > msg_hdr_len)
+ return -EINVAL;
+
+ switch (le32_to_cpu(trans_hdr->type)) {
+ case QAIC_TRANS_PASSTHROUGH_FROM_DEV:
+ ret = decode_passthrough(qdev, trans_hdr, user_msg, &msg_len);
+ break;
+ case QAIC_TRANS_ACTIVATE_FROM_DEV:
+ ret = decode_activate(qdev, trans_hdr, user_msg, &msg_len, resources, usr);
+ break;
+ case QAIC_TRANS_DEACTIVATE_FROM_DEV:
+ ret = decode_deactivate(qdev, trans_hdr, &msg_len, usr);
+ break;
+ case QAIC_TRANS_STATUS_FROM_DEV:
+ ret = decode_status(qdev, trans_hdr, user_msg, &msg_len, msg);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (ret)
+ return ret;
+ }
+
+ if (msg_len != (msg_hdr_len - sizeof(msg->hdr)))
+ return -EINVAL;
+
+ return 0;
+}
+
+static void *msg_xfer(struct qaic_device *qdev, struct wrapper_list *wrappers, u32 seq_num,
+ bool ignore_signal)
+{
+ struct xfer_queue_elem elem;
+ struct wire_msg *out_buf;
+ struct wrapper_msg *w;
+ int retry_count;
+ long ret;
+
+ if (qdev->in_reset) {
+ mutex_unlock(&qdev->cntl_mutex);
+ return ERR_PTR(-ENODEV);
+ }
+
+ elem.seq_num = seq_num;
+ elem.buf = NULL;
+ init_completion(&elem.xfer_done);
+ if (likely(!qdev->cntl_lost_buf)) {
+ /*
+ * The max size of request to device is QAIC_MANAGE_EXT_MSG_LENGTH.
+ * The max size of response from device is QAIC_MANAGE_MAX_MSG_LENGTH.
+ */
+ out_buf = kmalloc(QAIC_MANAGE_MAX_MSG_LENGTH, GFP_KERNEL);
+ if (!out_buf) {
+ mutex_unlock(&qdev->cntl_mutex);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ret = mhi_queue_buf(qdev->cntl_ch, DMA_FROM_DEVICE, out_buf,
+ QAIC_MANAGE_MAX_MSG_LENGTH, MHI_EOT);
+ if (ret) {
+ mutex_unlock(&qdev->cntl_mutex);
+ return ERR_PTR(ret);
+ }
+ } else {
+ /*
+ * we lost a buffer because we queued a recv buf, but then
+ * queuing the corresponding tx buf failed. To try to avoid
+ * a memory leak, lets reclaim it and use it for this
+ * transaction.
+ */
+ qdev->cntl_lost_buf = false;
+ }
+
+ list_for_each_entry(w, &wrappers->list, list) {
+ kref_get(&w->ref_count);
+ retry_count = 0;
+retry:
+ ret = mhi_queue_buf(qdev->cntl_ch, DMA_TO_DEVICE, &w->msg, w->len,
+ list_is_last(&w->list, &wrappers->list) ? MHI_EOT : MHI_CHAIN);
+ if (ret) {
+ if (ret == -EAGAIN && retry_count++ < QAIC_MHI_RETRY_MAX) {
+ msleep_interruptible(QAIC_MHI_RETRY_WAIT_MS);
+ if (!signal_pending(current))
+ goto retry;
+ }
+
+ qdev->cntl_lost_buf = true;
+ kref_put(&w->ref_count, free_wrapper);
+ mutex_unlock(&qdev->cntl_mutex);
+ return ERR_PTR(ret);
+ }
+ }
+
+ list_add_tail(&elem.list, &qdev->cntl_xfer_list);
+ mutex_unlock(&qdev->cntl_mutex);
+
+ if (ignore_signal)
+ ret = wait_for_completion_timeout(&elem.xfer_done, control_resp_timeout_s * HZ);
+ else
+ ret = wait_for_completion_interruptible_timeout(&elem.xfer_done,
+ control_resp_timeout_s * HZ);
+ /*
+ * not using _interruptable because we have to cleanup or we'll
+ * likely cause memory corruption
+ */
+ mutex_lock(&qdev->cntl_mutex);
+ if (!list_empty(&elem.list))
+ list_del(&elem.list);
+ if (!ret && !elem.buf)
+ ret = -ETIMEDOUT;
+ else if (ret > 0 && !elem.buf)
+ ret = -EIO;
+ mutex_unlock(&qdev->cntl_mutex);
+
+ if (ret < 0) {
+ kfree(elem.buf);
+ return ERR_PTR(ret);
+ } else if (!qdev->valid_crc(elem.buf)) {
+ kfree(elem.buf);
+ return ERR_PTR(-EPIPE);
+ }
+
+ return elem.buf;
+}
+
+/* Add a transaction to abort the outstanding DMA continuation */
+static int abort_dma_cont(struct qaic_device *qdev, struct wrapper_list *wrappers, u32 dma_chunk_id)
+{
+ struct wire_trans_dma_xfer *out_trans;
+ u32 size = sizeof(*out_trans);
+ struct wrapper_msg *wrapper;
+ struct wrapper_msg *w;
+ struct wire_msg *msg;
+
+ wrapper = list_first_entry(&wrappers->list, struct wrapper_msg, list);
+ msg = &wrapper->msg;
+
+ /* Remove all but the first wrapper which has the msg header */
+ list_for_each_entry_safe(wrapper, w, &wrappers->list, list)
+ if (!list_is_first(&wrapper->list, &wrappers->list))
+ kref_put(&wrapper->ref_count, free_wrapper);
+
+ wrapper = add_wrapper(wrappers, offsetof(struct wrapper_msg, trans) + sizeof(*out_trans));
+
+ if (!wrapper)
+ return -ENOMEM;
+
+ out_trans = (struct wire_trans_dma_xfer *)&wrapper->trans;
+ out_trans->hdr.type = cpu_to_le32(QAIC_TRANS_DMA_XFER_TO_DEV);
+ out_trans->hdr.len = cpu_to_le32(size);
+ out_trans->tag = cpu_to_le32(0);
+ out_trans->count = cpu_to_le32(0);
+ out_trans->dma_chunk_id = cpu_to_le32(dma_chunk_id);
+
+ msg->hdr.len = cpu_to_le32(size + sizeof(*msg));
+ msg->hdr.count = cpu_to_le32(1);
+ wrapper->len = size;
+
+ return 0;
+}
+
+static struct wrapper_list *alloc_wrapper_list(void)
+{
+ struct wrapper_list *wrappers;
+
+ wrappers = kmalloc(sizeof(*wrappers), GFP_KERNEL);
+ if (!wrappers)
+ return NULL;
+ INIT_LIST_HEAD(&wrappers->list);
+ spin_lock_init(&wrappers->lock);
+
+ return wrappers;
+}
+
+static int qaic_manage_msg_xfer(struct qaic_device *qdev, struct qaic_user *usr,
+ struct manage_msg *user_msg, struct ioctl_resources *resources,
+ struct wire_msg **rsp)
+{
+ struct wrapper_list *wrappers;
+ struct wrapper_msg *wrapper;
+ struct wrapper_msg *w;
+ bool all_done = false;
+ struct wire_msg *msg;
+ int ret;
+
+ wrappers = alloc_wrapper_list();
+ if (!wrappers)
+ return -ENOMEM;
+
+ wrapper = add_wrapper(wrappers, sizeof(*wrapper));
+ if (!wrapper) {
+ kfree(wrappers);
+ return -ENOMEM;
+ }
+
+ msg = &wrapper->msg;
+ wrapper->len = sizeof(*msg);
+
+ ret = encode_message(qdev, user_msg, wrappers, resources, usr);
+ if (ret && resources->dma_chunk_id)
+ ret = abort_dma_cont(qdev, wrappers, resources->dma_chunk_id);
+ if (ret)
+ goto encode_failed;
+
+ ret = mutex_lock_interruptible(&qdev->cntl_mutex);
+ if (ret)
+ goto lock_failed;
+
+ msg->hdr.magic_number = MANAGE_MAGIC_NUMBER;
+ msg->hdr.sequence_number = cpu_to_le32(qdev->next_seq_num++);
+
+ if (usr) {
+ msg->hdr.handle = cpu_to_le32(usr->handle);
+ msg->hdr.partition_id = cpu_to_le32(usr->qddev->partition_id);
+ } else {
+ msg->hdr.handle = 0;
+ msg->hdr.partition_id = cpu_to_le32(QAIC_NO_PARTITION);
+ }
+
+ msg->hdr.padding = cpu_to_le32(0);
+ msg->hdr.crc32 = cpu_to_le32(qdev->gen_crc(wrappers));
+
+ /* msg_xfer releases the mutex */
+ *rsp = msg_xfer(qdev, wrappers, qdev->next_seq_num - 1, false);
+ if (IS_ERR(*rsp))
+ ret = PTR_ERR(*rsp);
+
+lock_failed:
+ free_dma_xfers(qdev, resources);
+encode_failed:
+ spin_lock(&wrappers->lock);
+ list_for_each_entry_safe(wrapper, w, &wrappers->list, list)
+ kref_put(&wrapper->ref_count, free_wrapper);
+ all_done = list_empty(&wrappers->list);
+ spin_unlock(&wrappers->lock);
+ if (all_done)
+ kfree(wrappers);
+
+ return ret;
+}
+
+static int qaic_manage(struct qaic_device *qdev, struct qaic_user *usr, struct manage_msg *user_msg)
+{
+ struct wire_trans_dma_xfer_cont *dma_cont = NULL;
+ struct ioctl_resources resources;
+ struct wire_msg *rsp = NULL;
+ int ret;
+
+ memset(&resources, 0, sizeof(struct ioctl_resources));
+
+ INIT_LIST_HEAD(&resources.dma_xfers);
+
+ if (user_msg->len > QAIC_MANAGE_MAX_MSG_LENGTH ||
+ user_msg->count > QAIC_MANAGE_MAX_MSG_LENGTH / sizeof(struct qaic_manage_trans_hdr))
+ return -EINVAL;
+
+dma_xfer_continue:
+ ret = qaic_manage_msg_xfer(qdev, usr, user_msg, &resources, &rsp);
+ if (ret)
+ return ret;
+ /* dma_cont should be the only transaction if present */
+ if (le32_to_cpu(rsp->hdr.count) == 1) {
+ dma_cont = (struct wire_trans_dma_xfer_cont *)rsp->data;
+ if (le32_to_cpu(dma_cont->hdr.type) != QAIC_TRANS_DMA_XFER_CONT)
+ dma_cont = NULL;
+ }
+ if (dma_cont) {
+ if (le32_to_cpu(dma_cont->dma_chunk_id) == resources.dma_chunk_id &&
+ le64_to_cpu(dma_cont->xferred_size) == resources.xferred_dma_size) {
+ kfree(rsp);
+ goto dma_xfer_continue;
+ }
+
+ ret = -EINVAL;
+ goto dma_cont_failed;
+ }
+
+ ret = decode_message(qdev, user_msg, rsp, &resources, usr);
+
+dma_cont_failed:
+ free_dbc_buf(qdev, &resources);
+ kfree(rsp);
+ return ret;
+}
+
+int qaic_manage_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
+{
+ struct qaic_manage_msg *user_msg;
+ struct qaic_device *qdev;
+ struct manage_msg *msg;
+ struct qaic_user *usr;
+ u8 __user *user_data;
+ int qdev_rcu_id;
+ int usr_rcu_id;
+ int ret;
+
+ usr = file_priv->driver_priv;
+
+ usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
+ if (!usr->qddev) {
+ srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
+ return -ENODEV;
+ }
+
+ qdev = usr->qddev->qdev;
+
+ qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
+ if (qdev->in_reset) {
+ srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
+ srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
+ return -ENODEV;
+ }
+
+ user_msg = data;
+
+ if (user_msg->len > QAIC_MANAGE_MAX_MSG_LENGTH) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ msg = kzalloc(QAIC_MANAGE_MAX_MSG_LENGTH + sizeof(*msg), GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ msg->len = user_msg->len;
+ msg->count = user_msg->count;
+
+ user_data = u64_to_user_ptr(user_msg->data);
+
+ if (copy_from_user(msg->data, user_data, user_msg->len)) {
+ ret = -EFAULT;
+ goto free_msg;
+ }
+
+ ret = qaic_manage(qdev, usr, msg);
+
+ /*
+ * If the qaic_manage() is successful then we copy the message onto
+ * userspace memory but we have an exception for -ECANCELED.
+ * For -ECANCELED, it means that device has NACKed the message with a
+ * status error code which userspace would like to know.
+ */
+ if (ret == -ECANCELED || !ret) {
+ if (copy_to_user(user_data, msg->data, msg->len)) {
+ ret = -EFAULT;
+ } else {
+ user_msg->len = msg->len;
+ user_msg->count = msg->count;
+ }
+ }
+
+free_msg:
+ kfree(msg);
+out:
+ srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
+ srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
+ return ret;
+}
+
+int get_cntl_version(struct qaic_device *qdev, struct qaic_user *usr, u16 *major, u16 *minor)
+{
+ struct qaic_manage_trans_status_from_dev *status_result;
+ struct qaic_manage_trans_status_to_dev *status_query;
+ struct manage_msg *user_msg;
+ int ret;
+
+ user_msg = kmalloc(sizeof(*user_msg) + sizeof(*status_result), GFP_KERNEL);
+ if (!user_msg) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ user_msg->len = sizeof(*status_query);
+ user_msg->count = 1;
+
+ status_query = (struct qaic_manage_trans_status_to_dev *)user_msg->data;
+ status_query->hdr.type = QAIC_TRANS_STATUS_FROM_USR;
+ status_query->hdr.len = sizeof(status_query->hdr);
+
+ ret = qaic_manage(qdev, usr, user_msg);
+ if (ret)
+ goto kfree_user_msg;
+ status_result = (struct qaic_manage_trans_status_from_dev *)user_msg->data;
+ *major = status_result->major;
+ *minor = status_result->minor;
+
+ if (status_result->status_flags & BIT(0)) { /* device is using CRC */
+ /* By default qdev->gen_crc is programmed to generate CRC */
+ qdev->valid_crc = valid_crc;
+ } else {
+ /* By default qdev->valid_crc is programmed to bypass CRC */
+ qdev->gen_crc = gen_crc_stub;
+ }
+
+kfree_user_msg:
+ kfree(user_msg);
+out:
+ return ret;
+}
+
+static void resp_worker(struct work_struct *work)
+{
+ struct resp_work *resp = container_of(work, struct resp_work, work);
+ struct qaic_device *qdev = resp->qdev;
+ struct wire_msg *msg = resp->buf;
+ struct xfer_queue_elem *elem;
+ struct xfer_queue_elem *i;
+ bool found = false;
+
+ mutex_lock(&qdev->cntl_mutex);
+ list_for_each_entry_safe(elem, i, &qdev->cntl_xfer_list, list) {
+ if (elem->seq_num == le32_to_cpu(msg->hdr.sequence_number)) {
+ found = true;
+ list_del_init(&elem->list);
+ elem->buf = msg;
+ complete_all(&elem->xfer_done);
+ break;
+ }
+ }
+ mutex_unlock(&qdev->cntl_mutex);
+
+ if (!found)
+ /* request must have timed out, drop packet */
+ kfree(msg);
+
+ kfree(resp);
+}
+
+static void free_wrapper_from_list(struct wrapper_list *wrappers, struct wrapper_msg *wrapper)
+{
+ bool all_done = false;
+
+ spin_lock(&wrappers->lock);
+ kref_put(&wrapper->ref_count, free_wrapper);
+ all_done = list_empty(&wrappers->list);
+ spin_unlock(&wrappers->lock);
+
+ if (all_done)
+ kfree(wrappers);
+}
+
+void qaic_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
+{
+ struct wire_msg *msg = mhi_result->buf_addr;
+ struct wrapper_msg *wrapper = container_of(msg, struct wrapper_msg, msg);
+
+ free_wrapper_from_list(wrapper->head, wrapper);
+}
+
+void qaic_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
+{
+ struct qaic_device *qdev = dev_get_drvdata(&mhi_dev->dev);
+ struct wire_msg *msg = mhi_result->buf_addr;
+ struct resp_work *resp;
+
+ if (mhi_result->transaction_status || msg->hdr.magic_number != MANAGE_MAGIC_NUMBER) {
+ kfree(msg);
+ return;
+ }
+
+ resp = kmalloc(sizeof(*resp), GFP_ATOMIC);
+ if (!resp) {
+ kfree(msg);
+ return;
+ }
+
+ INIT_WORK(&resp->work, resp_worker);
+ resp->qdev = qdev;
+ resp->buf = msg;
+ queue_work(qdev->cntl_wq, &resp->work);
+}
+
+int qaic_control_open(struct qaic_device *qdev)
+{
+ if (!qdev->cntl_ch)
+ return -ENODEV;
+
+ qdev->cntl_lost_buf = false;
+ /*
+ * By default qaic should assume that device has CRC enabled.
+ * Qaic comes to know if device has CRC enabled or disabled during the
+ * device status transaction, which is the first transaction performed
+ * on control channel.
+ *
+ * So CRC validation of first device status transaction response is
+ * ignored (by calling valid_crc_stub) and is done later during decoding
+ * if device has CRC enabled.
+ * Now that qaic knows whether device has CRC enabled or not it acts
+ * accordingly.
+ */
+ qdev->gen_crc = gen_crc;
+ qdev->valid_crc = valid_crc_stub;
+
+ return mhi_prepare_for_transfer(qdev->cntl_ch);
+}
+
+void qaic_control_close(struct qaic_device *qdev)
+{
+ mhi_unprepare_from_transfer(qdev->cntl_ch);
+}
+
+void qaic_release_usr(struct qaic_device *qdev, struct qaic_user *usr)
+{
+ struct wire_trans_terminate_to_dev *trans;
+ struct wrapper_list *wrappers;
+ struct wrapper_msg *wrapper;
+ struct wire_msg *msg;
+ struct wire_msg *rsp;
+
+ wrappers = alloc_wrapper_list();
+ if (!wrappers)
+ return;
+
+ wrapper = add_wrapper(wrappers, sizeof(*wrapper) + sizeof(*msg) + sizeof(*trans));
+ if (!wrapper)
+ return;
+
+ msg = &wrapper->msg;
+
+ trans = (struct wire_trans_terminate_to_dev *)msg->data;
+
+ trans->hdr.type = cpu_to_le32(QAIC_TRANS_TERMINATE_TO_DEV);
+ trans->hdr.len = cpu_to_le32(sizeof(*trans));
+ trans->handle = cpu_to_le32(usr->handle);
+
+ mutex_lock(&qdev->cntl_mutex);
+ wrapper->len = sizeof(msg->hdr) + sizeof(*trans);
+ msg->hdr.magic_number = MANAGE_MAGIC_NUMBER;
+ msg->hdr.sequence_number = cpu_to_le32(qdev->next_seq_num++);
+ msg->hdr.len = cpu_to_le32(wrapper->len);
+ msg->hdr.count = cpu_to_le32(1);
+ msg->hdr.handle = cpu_to_le32(usr->handle);
+ msg->hdr.padding = cpu_to_le32(0);
+ msg->hdr.crc32 = cpu_to_le32(qdev->gen_crc(wrappers));
+
+ /*
+ * msg_xfer releases the mutex
+ * We don't care about the return of msg_xfer since we will not do
+ * anything different based on what happens.
+ * We ignore pending signals since one will be set if the user is
+ * killed, and we need give the device a chance to cleanup, otherwise
+ * DMA may still be in progress when we return.
+ */
+ rsp = msg_xfer(qdev, wrappers, qdev->next_seq_num - 1, true);
+ if (!IS_ERR(rsp))
+ kfree(rsp);
+ free_wrapper_from_list(wrappers, wrapper);
+}
+
+void wake_all_cntl(struct qaic_device *qdev)
+{
+ struct xfer_queue_elem *elem;
+ struct xfer_queue_elem *i;
+
+ mutex_lock(&qdev->cntl_mutex);
+ list_for_each_entry_safe(elem, i, &qdev->cntl_xfer_list, list) {
+ list_del_init(&elem->list);
+ complete_all(&elem->xfer_done);
+ }
+ mutex_unlock(&qdev->cntl_mutex);
+}
diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c
new file mode 100644
index 000000000000..c0a574cd1b35
--- /dev/null
+++ b/drivers/accel/qaic/qaic_data.c
@@ -0,0 +1,1902 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. */
+/* Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. */
+
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/dma-buf.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/math64.h>
+#include <linux/mm.h>
+#include <linux/moduleparam.h>
+#include <linux/scatterlist.h>
+#include <linux/spinlock.h>
+#include <linux/srcu.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/wait.h>
+#include <drm/drm_file.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_print.h>
+#include <uapi/drm/qaic_accel.h>
+
+#include "qaic.h"
+
+#define SEM_VAL_MASK GENMASK_ULL(11, 0)
+#define SEM_INDEX_MASK GENMASK_ULL(4, 0)
+#define BULK_XFER BIT(3)
+#define GEN_COMPLETION BIT(4)
+#define INBOUND_XFER 1
+#define OUTBOUND_XFER 2
+#define REQHP_OFF 0x0 /* we read this */
+#define REQTP_OFF 0x4 /* we write this */
+#define RSPHP_OFF 0x8 /* we write this */
+#define RSPTP_OFF 0xc /* we read this */
+
+#define ENCODE_SEM(val, index, sync, cmd, flags) \
+ ({ \
+ FIELD_PREP(GENMASK(11, 0), (val)) | \
+ FIELD_PREP(GENMASK(20, 16), (index)) | \
+ FIELD_PREP(BIT(22), (sync)) | \
+ FIELD_PREP(GENMASK(26, 24), (cmd)) | \
+ FIELD_PREP(GENMASK(30, 29), (flags)) | \
+ FIELD_PREP(BIT(31), (cmd) ? 1 : 0); \
+ })
+#define NUM_EVENTS 128
+#define NUM_DELAYS 10
+
+static unsigned int wait_exec_default_timeout_ms = 5000; /* 5 sec default */
+module_param(wait_exec_default_timeout_ms, uint, 0600);
+MODULE_PARM_DESC(wait_exec_default_timeout_ms, "Default timeout for DRM_IOCTL_QAIC_WAIT_BO");
+
+static unsigned int datapath_poll_interval_us = 100; /* 100 usec default */
+module_param(datapath_poll_interval_us, uint, 0600);
+MODULE_PARM_DESC(datapath_poll_interval_us,
+ "Amount of time to sleep between activity when datapath polling is enabled");
+
+struct dbc_req {
+ /*
+ * A request ID is assigned to each memory handle going in DMA queue.
+ * As a single memory handle can enqueue multiple elements in DMA queue
+ * all of them will have the same request ID.
+ */
+ __le16 req_id;
+ /* Future use */
+ __u8 seq_id;
+ /*
+ * Special encoded variable
+ * 7 0 - Do not force to generate MSI after DMA is completed
+ * 1 - Force to generate MSI after DMA is completed
+ * 6:5 Reserved
+ * 4 1 - Generate completion element in the response queue
+ * 0 - No Completion Code
+ * 3 0 - DMA request is a Link list transfer
+ * 1 - DMA request is a Bulk transfer
+ * 2 Reserved
+ * 1:0 00 - No DMA transfer involved
+ * 01 - DMA transfer is part of inbound transfer
+ * 10 - DMA transfer has outbound transfer
+ * 11 - NA
+ */
+ __u8 cmd;
+ __le32 resv;
+ /* Source address for the transfer */
+ __le64 src_addr;
+ /* Destination address for the transfer */
+ __le64 dest_addr;
+ /* Length of transfer request */
+ __le32 len;
+ __le32 resv2;
+ /* Doorbell address */
+ __le64 db_addr;
+ /*
+ * Special encoded variable
+ * 7 1 - Doorbell(db) write
+ * 0 - No doorbell write
+ * 6:2 Reserved
+ * 1:0 00 - 32 bit access, db address must be aligned to 32bit-boundary
+ * 01 - 16 bit access, db address must be aligned to 16bit-boundary
+ * 10 - 8 bit access, db address must be aligned to 8bit-boundary
+ * 11 - Reserved
+ */
+ __u8 db_len;
+ __u8 resv3;
+ __le16 resv4;
+ /* 32 bit data written to doorbell address */
+ __le32 db_data;
+ /*
+ * Special encoded variable
+ * All the fields of sem_cmdX are passed from user and all are ORed
+ * together to form sem_cmd.
+ * 0:11 Semaphore value
+ * 15:12 Reserved
+ * 20:16 Semaphore index
+ * 21 Reserved
+ * 22 Semaphore Sync
+ * 23 Reserved
+ * 26:24 Semaphore command
+ * 28:27 Reserved
+ * 29 Semaphore DMA out bound sync fence
+ * 30 Semaphore DMA in bound sync fence
+ * 31 Enable semaphore command
+ */
+ __le32 sem_cmd0;
+ __le32 sem_cmd1;
+ __le32 sem_cmd2;
+ __le32 sem_cmd3;
+} __packed;
+
+struct dbc_rsp {
+ /* Request ID of the memory handle whose DMA transaction is completed */
+ __le16 req_id;
+ /* Status of the DMA transaction. 0 : Success otherwise failure */
+ __le16 status;
+} __packed;
+
+inline int get_dbc_req_elem_size(void)
+{
+ return sizeof(struct dbc_req);
+}
+
+inline int get_dbc_rsp_elem_size(void)
+{
+ return sizeof(struct dbc_rsp);
+}
+
+static void free_slice(struct kref *kref)
+{
+ struct bo_slice *slice = container_of(kref, struct bo_slice, ref_count);
+
+ list_del(&slice->slice);
+ drm_gem_object_put(&slice->bo->base);
+ sg_free_table(slice->sgt);
+ kfree(slice->sgt);
+ kfree(slice->reqs);
+ kfree(slice);
+}
+
+static int clone_range_of_sgt_for_slice(struct qaic_device *qdev, struct sg_table **sgt_out,
+ struct sg_table *sgt_in, u64 size, u64 offset)
+{
+ int total_len, len, nents, offf = 0, offl = 0;
+ struct scatterlist *sg, *sgn, *sgf, *sgl;
+ struct sg_table *sgt;
+ int ret, j;
+
+ /* find out number of relevant nents needed for this mem */
+ total_len = 0;
+ sgf = NULL;
+ sgl = NULL;
+ nents = 0;
+
+ size = size ? size : PAGE_SIZE;
+ for (sg = sgt_in->sgl; sg; sg = sg_next(sg)) {
+ len = sg_dma_len(sg);
+
+ if (!len)
+ continue;
+ if (offset >= total_len && offset < total_len + len) {
+ sgf = sg;
+ offf = offset - total_len;
+ }
+ if (sgf)
+ nents++;
+ if (offset + size >= total_len &&
+ offset + size <= total_len + len) {
+ sgl = sg;
+ offl = offset + size - total_len;
+ break;
+ }
+ total_len += len;
+ }
+
+ if (!sgf || !sgl) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
+ if (!sgt) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = sg_alloc_table(sgt, nents, GFP_KERNEL);
+ if (ret)
+ goto free_sgt;
+
+ /* copy relevant sg node and fix page and length */
+ sgn = sgf;
+ for_each_sgtable_sg(sgt, sg, j) {
+ memcpy(sg, sgn, sizeof(*sg));
+ if (sgn == sgf) {
+ sg_dma_address(sg) += offf;
+ sg_dma_len(sg) -= offf;
+ sg_set_page(sg, sg_page(sgn), sg_dma_len(sg), offf);
+ } else {
+ offf = 0;
+ }
+ if (sgn == sgl) {
+ sg_dma_len(sg) = offl - offf;
+ sg_set_page(sg, sg_page(sgn), offl - offf, offf);
+ sg_mark_end(sg);
+ break;
+ }
+ sgn = sg_next(sgn);
+ }
+
+ *sgt_out = sgt;
+ return ret;
+
+free_sgt:
+ kfree(sgt);
+out:
+ *sgt_out = NULL;
+ return ret;
+}
+
+static int encode_reqs(struct qaic_device *qdev, struct bo_slice *slice,
+ struct qaic_attach_slice_entry *req)
+{
+ __le64 db_addr = cpu_to_le64(req->db_addr);
+ __le32 db_data = cpu_to_le32(req->db_data);
+ struct scatterlist *sg;
+ __u8 cmd = BULK_XFER;
+ int presync_sem;
+ u64 dev_addr;
+ __u8 db_len;
+ int i;
+
+ if (!slice->no_xfer)
+ cmd |= (slice->dir == DMA_TO_DEVICE ? INBOUND_XFER : OUTBOUND_XFER);
+
+ if (req->db_len && !IS_ALIGNED(req->db_addr, req->db_len / 8))
+ return -EINVAL;
+
+ presync_sem = req->sem0.presync + req->sem1.presync + req->sem2.presync + req->sem3.presync;
+ if (presync_sem > 1)
+ return -EINVAL;
+
+ presync_sem = req->sem0.presync << 0 | req->sem1.presync << 1 |
+ req->sem2.presync << 2 | req->sem3.presync << 3;
+
+ switch (req->db_len) {
+ case 32:
+ db_len = BIT(7);
+ break;
+ case 16:
+ db_len = BIT(7) | 1;
+ break;
+ case 8:
+ db_len = BIT(7) | 2;
+ break;
+ case 0:
+ db_len = 0; /* doorbell is not active for this command */
+ break;
+ default:
+ return -EINVAL; /* should never hit this */
+ }
+
+ /*
+ * When we end up splitting up a single request (ie a buf slice) into
+ * multiple DMA requests, we have to manage the sync data carefully.
+ * There can only be one presync sem. That needs to be on every xfer
+ * so that the DMA engine doesn't transfer data before the receiver is
+ * ready. We only do the doorbell and postsync sems after the xfer.
+ * To guarantee previous xfers for the request are complete, we use a
+ * fence.
+ */
+ dev_addr = req->dev_addr;
+ for_each_sgtable_sg(slice->sgt, sg, i) {
+ slice->reqs[i].cmd = cmd;
+ slice->reqs[i].src_addr = cpu_to_le64(slice->dir == DMA_TO_DEVICE ?
+ sg_dma_address(sg) : dev_addr);
+ slice->reqs[i].dest_addr = cpu_to_le64(slice->dir == DMA_TO_DEVICE ?
+ dev_addr : sg_dma_address(sg));
+ /*
+ * sg_dma_len(sg) returns size of a DMA segment, maximum DMA
+ * segment size is set to UINT_MAX by qaic and hence return
+ * values of sg_dma_len(sg) can never exceed u32 range. So,
+ * by down sizing we are not corrupting the value.
+ */
+ slice->reqs[i].len = cpu_to_le32((u32)sg_dma_len(sg));
+ switch (presync_sem) {
+ case BIT(0):
+ slice->reqs[i].sem_cmd0 = cpu_to_le32(ENCODE_SEM(req->sem0.val,
+ req->sem0.index,
+ req->sem0.presync,
+ req->sem0.cmd,
+ req->sem0.flags));
+ break;
+ case BIT(1):
+ slice->reqs[i].sem_cmd1 = cpu_to_le32(ENCODE_SEM(req->sem1.val,
+ req->sem1.index,
+ req->sem1.presync,
+ req->sem1.cmd,
+ req->sem1.flags));
+ break;
+ case BIT(2):
+ slice->reqs[i].sem_cmd2 = cpu_to_le32(ENCODE_SEM(req->sem2.val,
+ req->sem2.index,
+ req->sem2.presync,
+ req->sem2.cmd,
+ req->sem2.flags));
+ break;
+ case BIT(3):
+ slice->reqs[i].sem_cmd3 = cpu_to_le32(ENCODE_SEM(req->sem3.val,
+ req->sem3.index,
+ req->sem3.presync,
+ req->sem3.cmd,
+ req->sem3.flags));
+ break;
+ }
+ dev_addr += sg_dma_len(sg);
+ }
+ /* add post transfer stuff to last segment */
+ i--;
+ slice->reqs[i].cmd |= GEN_COMPLETION;
+ slice->reqs[i].db_addr = db_addr;
+ slice->reqs[i].db_len = db_len;
+ slice->reqs[i].db_data = db_data;
+ /*
+ * Add a fence if we have more than one request going to the hardware
+ * representing the entirety of the user request, and the user request
+ * has no presync condition.
+ * Fences are expensive, so we try to avoid them. We rely on the
+ * hardware behavior to avoid needing one when there is a presync
+ * condition. When a presync exists, all requests for that same
+ * presync will be queued into a fifo. Thus, since we queue the
+ * post xfer activity only on the last request we queue, the hardware
+ * will ensure that the last queued request is processed last, thus
+ * making sure the post xfer activity happens at the right time without
+ * a fence.
+ */
+ if (i && !presync_sem)
+ req->sem0.flags |= (slice->dir == DMA_TO_DEVICE ?
+ QAIC_SEM_INSYNCFENCE : QAIC_SEM_OUTSYNCFENCE);
+ slice->reqs[i].sem_cmd0 = cpu_to_le32(ENCODE_SEM(req->sem0.val, req->sem0.index,
+ req->sem0.presync, req->sem0.cmd,
+ req->sem0.flags));
+ slice->reqs[i].sem_cmd1 = cpu_to_le32(ENCODE_SEM(req->sem1.val, req->sem1.index,
+ req->sem1.presync, req->sem1.cmd,
+ req->sem1.flags));
+ slice->reqs[i].sem_cmd2 = cpu_to_le32(ENCODE_SEM(req->sem2.val, req->sem2.index,
+ req->sem2.presync, req->sem2.cmd,
+ req->sem2.flags));
+ slice->reqs[i].sem_cmd3 = cpu_to_le32(ENCODE_SEM(req->sem3.val, req->sem3.index,
+ req->sem3.presync, req->sem3.cmd,
+ req->sem3.flags));
+
+ return 0;
+}
+
+static int qaic_map_one_slice(struct qaic_device *qdev, struct qaic_bo *bo,
+ struct qaic_attach_slice_entry *slice_ent)
+{
+ struct sg_table *sgt = NULL;
+ struct bo_slice *slice;
+ int ret;
+
+ ret = clone_range_of_sgt_for_slice(qdev, &sgt, bo->sgt, slice_ent->size, slice_ent->offset);
+ if (ret)
+ goto out;
+
+ slice = kmalloc(sizeof(*slice), GFP_KERNEL);
+ if (!slice) {
+ ret = -ENOMEM;
+ goto free_sgt;
+ }
+
+ slice->reqs = kcalloc(sgt->nents, sizeof(*slice->reqs), GFP_KERNEL);
+ if (!slice->reqs) {
+ ret = -ENOMEM;
+ goto free_slice;
+ }
+
+ slice->no_xfer = !slice_ent->size;
+ slice->sgt = sgt;
+ slice->nents = sgt->nents;
+ slice->dir = bo->dir;
+ slice->bo = bo;
+ slice->size = slice_ent->size;
+ slice->offset = slice_ent->offset;
+
+ ret = encode_reqs(qdev, slice, slice_ent);
+ if (ret)
+ goto free_req;
+
+ bo->total_slice_nents += sgt->nents;
+ kref_init(&slice->ref_count);
+ drm_gem_object_get(&bo->base);
+ list_add_tail(&slice->slice, &bo->slices);
+
+ return 0;
+
+free_req:
+ kfree(slice->reqs);
+free_slice:
+ kfree(slice);
+free_sgt:
+ sg_free_table(sgt);
+ kfree(sgt);
+out:
+ return ret;
+}
+
+static int create_sgt(struct qaic_device *qdev, struct sg_table **sgt_out, u64 size)
+{
+ struct scatterlist *sg;
+ struct sg_table *sgt;
+ struct page **pages;
+ int *pages_order;
+ int buf_extra;
+ int max_order;
+ int nr_pages;
+ int ret = 0;
+ int i, j, k;
+ int order;
+
+ if (size) {
+ nr_pages = DIV_ROUND_UP(size, PAGE_SIZE);
+ /*
+ * calculate how much extra we are going to allocate, to remove
+ * later
+ */
+ buf_extra = (PAGE_SIZE - size % PAGE_SIZE) % PAGE_SIZE;
+ max_order = min(MAX_ORDER - 1, get_order(size));
+ } else {
+ /* allocate a single page for book keeping */
+ nr_pages = 1;
+ buf_extra = 0;
+ max_order = 0;
+ }
+
+ pages = kvmalloc_array(nr_pages, sizeof(*pages) + sizeof(*pages_order), GFP_KERNEL);
+ if (!pages) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ pages_order = (void *)pages + sizeof(*pages) * nr_pages;
+
+ /*
+ * Allocate requested memory using alloc_pages. It is possible to allocate
+ * the requested memory in multiple chunks by calling alloc_pages
+ * multiple times. Use SG table to handle multiple allocated pages.
+ */
+ i = 0;
+ while (nr_pages > 0) {
+ order = min(get_order(nr_pages * PAGE_SIZE), max_order);
+ while (1) {
+ pages[i] = alloc_pages(GFP_KERNEL | GFP_HIGHUSER |
+ __GFP_NOWARN | __GFP_ZERO |
+ (order ? __GFP_NORETRY : __GFP_RETRY_MAYFAIL),
+ order);
+ if (pages[i])
+ break;
+ if (!order--) {
+ ret = -ENOMEM;
+ goto free_partial_alloc;
+ }
+ }
+
+ max_order = order;
+ pages_order[i] = order;
+
+ nr_pages -= 1 << order;
+ if (nr_pages <= 0)
+ /* account for over allocation */
+ buf_extra += abs(nr_pages) * PAGE_SIZE;
+ i++;
+ }
+
+ sgt = kmalloc(sizeof(*sgt), GFP_KERNEL);
+ if (!sgt) {
+ ret = -ENOMEM;
+ goto free_partial_alloc;
+ }
+
+ if (sg_alloc_table(sgt, i, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto free_sgt;
+ }
+
+ /* Populate the SG table with the allocated memory pages */
+ sg = sgt->sgl;
+ for (k = 0; k < i; k++, sg = sg_next(sg)) {
+ /* Last entry requires special handling */
+ if (k < i - 1) {
+ sg_set_page(sg, pages[k], PAGE_SIZE << pages_order[k], 0);
+ } else {
+ sg_set_page(sg, pages[k], (PAGE_SIZE << pages_order[k]) - buf_extra, 0);
+ sg_mark_end(sg);
+ }
+ }
+
+ kvfree(pages);
+ *sgt_out = sgt;
+ return ret;
+
+free_sgt:
+ kfree(sgt);
+free_partial_alloc:
+ for (j = 0; j < i; j++)
+ __free_pages(pages[j], pages_order[j]);
+ kvfree(pages);
+out:
+ *sgt_out = NULL;
+ return ret;
+}
+
+static bool invalid_sem(struct qaic_sem *sem)
+{
+ if (sem->val & ~SEM_VAL_MASK || sem->index & ~SEM_INDEX_MASK ||
+ !(sem->presync == 0 || sem->presync == 1) || sem->pad ||
+ sem->flags & ~(QAIC_SEM_INSYNCFENCE | QAIC_SEM_OUTSYNCFENCE) ||
+ sem->cmd > QAIC_SEM_WAIT_GT_0)
+ return true;
+ return false;
+}
+
+static int qaic_validate_req(struct qaic_device *qdev, struct qaic_attach_slice_entry *slice_ent,
+ u32 count, u64 total_size)
+{
+ int i;
+
+ for (i = 0; i < count; i++) {
+ if (!(slice_ent[i].db_len == 32 || slice_ent[i].db_len == 16 ||
+ slice_ent[i].db_len == 8 || slice_ent[i].db_len == 0) ||
+ invalid_sem(&slice_ent[i].sem0) || invalid_sem(&slice_ent[i].sem1) ||
+ invalid_sem(&slice_ent[i].sem2) || invalid_sem(&slice_ent[i].sem3))
+ return -EINVAL;
+
+ if (slice_ent[i].offset + slice_ent[i].size > total_size)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void qaic_free_sgt(struct sg_table *sgt)
+{
+ struct scatterlist *sg;
+
+ for (sg = sgt->sgl; sg; sg = sg_next(sg))
+ if (sg_page(sg))
+ __free_pages(sg_page(sg), get_order(sg->length));
+ sg_free_table(sgt);
+ kfree(sgt);
+}
+
+static void qaic_gem_print_info(struct drm_printer *p, unsigned int indent,
+ const struct drm_gem_object *obj)
+{
+ struct qaic_bo *bo = to_qaic_bo(obj);
+
+ drm_printf_indent(p, indent, "user requested size=%llu\n", bo->size);
+}
+
+static const struct vm_operations_struct drm_vm_ops = {
+ .open = drm_gem_vm_open,
+ .close = drm_gem_vm_close,
+};
+
+static int qaic_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+ struct qaic_bo *bo = to_qaic_bo(obj);
+ unsigned long offset = 0;
+ struct scatterlist *sg;
+ int ret;
+
+ if (obj->import_attach)
+ return -EINVAL;
+
+ for (sg = bo->sgt->sgl; sg; sg = sg_next(sg)) {
+ if (sg_page(sg)) {
+ ret = remap_pfn_range(vma, vma->vm_start + offset, page_to_pfn(sg_page(sg)),
+ sg->length, vma->vm_page_prot);
+ if (ret)
+ goto out;
+ offset += sg->length;
+ }
+ }
+
+out:
+ return ret;
+}
+
+static void qaic_free_object(struct drm_gem_object *obj)
+{
+ struct qaic_bo *bo = to_qaic_bo(obj);
+
+ if (obj->import_attach) {
+ /* DMABUF/PRIME Path */
+ dma_buf_detach(obj->import_attach->dmabuf, obj->import_attach);
+ dma_buf_put(obj->import_attach->dmabuf);
+ } else {
+ /* Private buffer allocation path */
+ qaic_free_sgt(bo->sgt);
+ }
+
+ drm_gem_object_release(obj);
+ kfree(bo);
+}
+
+static const struct drm_gem_object_funcs qaic_gem_funcs = {
+ .free = qaic_free_object,
+ .print_info = qaic_gem_print_info,
+ .mmap = qaic_gem_object_mmap,
+ .vm_ops = &drm_vm_ops,
+};
+
+static struct qaic_bo *qaic_alloc_init_bo(void)
+{
+ struct qaic_bo *bo;
+
+ bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+ if (!bo)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&bo->slices);
+ init_completion(&bo->xfer_done);
+ complete_all(&bo->xfer_done);
+
+ return bo;
+}
+
+int qaic_create_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
+{
+ struct qaic_create_bo *args = data;
+ int usr_rcu_id, qdev_rcu_id;
+ struct drm_gem_object *obj;
+ struct qaic_device *qdev;
+ struct qaic_user *usr;
+ struct qaic_bo *bo;
+ size_t size;
+ int ret;
+
+ if (args->pad)
+ return -EINVAL;
+
+ usr = file_priv->driver_priv;
+ usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
+ if (!usr->qddev) {
+ ret = -ENODEV;
+ goto unlock_usr_srcu;
+ }
+
+ qdev = usr->qddev->qdev;
+ qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
+ if (qdev->in_reset) {
+ ret = -ENODEV;
+ goto unlock_dev_srcu;
+ }
+
+ size = PAGE_ALIGN(args->size);
+ if (size == 0) {
+ ret = -EINVAL;
+ goto unlock_dev_srcu;
+ }
+
+ bo = qaic_alloc_init_bo();
+ if (IS_ERR(bo)) {
+ ret = PTR_ERR(bo);
+ goto unlock_dev_srcu;
+ }
+ obj = &bo->base;
+
+ drm_gem_private_object_init(dev, obj, size);
+
+ obj->funcs = &qaic_gem_funcs;
+ ret = create_sgt(qdev, &bo->sgt, size);
+ if (ret)
+ goto free_bo;
+
+ bo->size = args->size;
+
+ ret = drm_gem_handle_create(file_priv, obj, &args->handle);
+ if (ret)
+ goto free_sgt;
+
+ bo->handle = args->handle;
+ drm_gem_object_put(obj);
+ srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
+ srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
+
+ return 0;
+
+free_sgt:
+ qaic_free_sgt(bo->sgt);
+free_bo:
+ kfree(bo);
+unlock_dev_srcu:
+ srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
+unlock_usr_srcu:
+ srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
+ return ret;
+}
+
+int qaic_mmap_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
+{
+ struct qaic_mmap_bo *args = data;
+ int usr_rcu_id, qdev_rcu_id;
+ struct drm_gem_object *obj;
+ struct qaic_device *qdev;
+ struct qaic_user *usr;
+ int ret;
+
+ usr = file_priv->driver_priv;
+ usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
+ if (!usr->qddev) {
+ ret = -ENODEV;
+ goto unlock_usr_srcu;
+ }
+
+ qdev = usr->qddev->qdev;
+ qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
+ if (qdev->in_reset) {
+ ret = -ENODEV;
+ goto unlock_dev_srcu;
+ }
+
+ obj = drm_gem_object_lookup(file_priv, args->handle);
+ if (!obj) {
+ ret = -ENOENT;
+ goto unlock_dev_srcu;
+ }
+
+ ret = drm_gem_create_mmap_offset(obj);
+ if (ret == 0)
+ args->offset = drm_vma_node_offset_addr(&obj->vma_node);
+
+ drm_gem_object_put(obj);
+
+unlock_dev_srcu:
+ srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
+unlock_usr_srcu:
+ srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
+ return ret;
+}
+
+struct drm_gem_object *qaic_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf)
+{
+ struct dma_buf_attachment *attach;
+ struct drm_gem_object *obj;
+ struct qaic_bo *bo;
+ size_t size;
+ int ret;
+
+ bo = qaic_alloc_init_bo();
+ if (IS_ERR(bo)) {
+ ret = PTR_ERR(bo);
+ goto out;
+ }
+
+ obj = &bo->base;
+ get_dma_buf(dma_buf);
+
+ attach = dma_buf_attach(dma_buf, dev->dev);
+ if (IS_ERR(attach)) {
+ ret = PTR_ERR(attach);
+ goto attach_fail;
+ }
+
+ size = PAGE_ALIGN(attach->dmabuf->size);
+ if (size == 0) {
+ ret = -EINVAL;
+ goto size_align_fail;
+ }
+
+ drm_gem_private_object_init(dev, obj, size);
+ /*
+ * skipping dma_buf_map_attachment() as we do not know the direction
+ * just yet. Once the direction is known in the subsequent IOCTL to
+ * attach slicing, we can do it then.
+ */
+
+ obj->funcs = &qaic_gem_funcs;
+ obj->import_attach = attach;
+ obj->resv = dma_buf->resv;
+
+ return obj;
+
+size_align_fail:
+ dma_buf_detach(dma_buf, attach);
+attach_fail:
+ dma_buf_put(dma_buf);
+ kfree(bo);
+out:
+ return ERR_PTR(ret);
+}
+
+static int qaic_prepare_import_bo(struct qaic_bo *bo, struct qaic_attach_slice_hdr *hdr)
+{
+ struct drm_gem_object *obj = &bo->base;
+ struct sg_table *sgt;
+ int ret;
+
+ if (obj->import_attach->dmabuf->size < hdr->size)
+ return -EINVAL;
+
+ sgt = dma_buf_map_attachment(obj->import_attach, hdr->dir);
+ if (IS_ERR(sgt)) {
+ ret = PTR_ERR(sgt);
+ return ret;
+ }
+
+ bo->sgt = sgt;
+ bo->size = hdr->size;
+
+ return 0;
+}
+
+static int qaic_prepare_export_bo(struct qaic_device *qdev, struct qaic_bo *bo,
+ struct qaic_attach_slice_hdr *hdr)
+{
+ int ret;
+
+ if (bo->size != hdr->size)
+ return -EINVAL;
+
+ ret = dma_map_sgtable(&qdev->pdev->dev, bo->sgt, hdr->dir, 0);
+ if (ret)
+ return -EFAULT;
+
+ return 0;
+}
+
+static int qaic_prepare_bo(struct qaic_device *qdev, struct qaic_bo *bo,
+ struct qaic_attach_slice_hdr *hdr)
+{
+ int ret;
+
+ if (bo->base.import_attach)
+ ret = qaic_prepare_import_bo(bo, hdr);
+ else
+ ret = qaic_prepare_export_bo(qdev, bo, hdr);
+
+ if (ret == 0)
+ bo->dir = hdr->dir;
+
+ return ret;
+}
+
+static void qaic_unprepare_import_bo(struct qaic_bo *bo)
+{
+ dma_buf_unmap_attachment(bo->base.import_attach, bo->sgt, bo->dir);
+ bo->sgt = NULL;
+ bo->size = 0;
+}
+
+static void qaic_unprepare_export_bo(struct qaic_device *qdev, struct qaic_bo *bo)
+{
+ dma_unmap_sgtable(&qdev->pdev->dev, bo->sgt, bo->dir, 0);
+}
+
+static void qaic_unprepare_bo(struct qaic_device *qdev, struct qaic_bo *bo)
+{
+ if (bo->base.import_attach)
+ qaic_unprepare_import_bo(bo);
+ else
+ qaic_unprepare_export_bo(qdev, bo);
+
+ bo->dir = 0;
+}
+
+static void qaic_free_slices_bo(struct qaic_bo *bo)
+{
+ struct bo_slice *slice, *temp;
+
+ list_for_each_entry_safe(slice, temp, &bo->slices, slice)
+ kref_put(&slice->ref_count, free_slice);
+}
+
+static int qaic_attach_slicing_bo(struct qaic_device *qdev, struct qaic_bo *bo,
+ struct qaic_attach_slice_hdr *hdr,
+ struct qaic_attach_slice_entry *slice_ent)
+{
+ int ret, i;
+
+ for (i = 0; i < hdr->count; i++) {
+ ret = qaic_map_one_slice(qdev, bo, &slice_ent[i]);
+ if (ret) {
+ qaic_free_slices_bo(bo);
+ return ret;
+ }
+ }
+
+ if (bo->total_slice_nents > qdev->dbc[hdr->dbc_id].nelem) {
+ qaic_free_slices_bo(bo);
+ return -ENOSPC;
+ }
+
+ bo->sliced = true;
+ bo->nr_slice = hdr->count;
+ list_add_tail(&bo->bo_list, &qdev->dbc[hdr->dbc_id].bo_lists);
+
+ return 0;
+}
+
+int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
+{
+ struct qaic_attach_slice_entry *slice_ent;
+ struct qaic_attach_slice *args = data;
+ struct dma_bridge_chan *dbc;
+ int usr_rcu_id, qdev_rcu_id;
+ struct drm_gem_object *obj;
+ struct qaic_device *qdev;
+ unsigned long arg_size;
+ struct qaic_user *usr;
+ u8 __user *user_data;
+ struct qaic_bo *bo;
+ int ret;
+
+ usr = file_priv->driver_priv;
+ usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
+ if (!usr->qddev) {
+ ret = -ENODEV;
+ goto unlock_usr_srcu;
+ }
+
+ qdev = usr->qddev->qdev;
+ qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
+ if (qdev->in_reset) {
+ ret = -ENODEV;
+ goto unlock_dev_srcu;
+ }
+
+ if (args->hdr.count == 0) {
+ ret = -EINVAL;
+ goto unlock_dev_srcu;
+ }
+
+ arg_size = args->hdr.count * sizeof(*slice_ent);
+ if (arg_size / args->hdr.count != sizeof(*slice_ent)) {
+ ret = -EINVAL;
+ goto unlock_dev_srcu;
+ }
+
+ if (args->hdr.dbc_id >= qdev->num_dbc) {
+ ret = -EINVAL;
+ goto unlock_dev_srcu;
+ }
+
+ if (args->hdr.size == 0) {
+ ret = -EINVAL;
+ goto unlock_dev_srcu;
+ }
+
+ if (!(args->hdr.dir == DMA_TO_DEVICE || args->hdr.dir == DMA_FROM_DEVICE)) {
+ ret = -EINVAL;
+ goto unlock_dev_srcu;
+ }
+
+ dbc = &qdev->dbc[args->hdr.dbc_id];
+ if (dbc->usr != usr) {
+ ret = -EINVAL;
+ goto unlock_dev_srcu;
+ }
+
+ if (args->data == 0) {
+ ret = -EINVAL;
+ goto unlock_dev_srcu;
+ }
+
+ user_data = u64_to_user_ptr(args->data);
+
+ slice_ent = kzalloc(arg_size, GFP_KERNEL);
+ if (!slice_ent) {
+ ret = -EINVAL;
+ goto unlock_dev_srcu;
+ }
+
+ ret = copy_from_user(slice_ent, user_data, arg_size);
+ if (ret) {
+ ret = -EFAULT;
+ goto free_slice_ent;
+ }
+
+ ret = qaic_validate_req(qdev, slice_ent, args->hdr.count, args->hdr.size);
+ if (ret)
+ goto free_slice_ent;
+
+ obj = drm_gem_object_lookup(file_priv, args->hdr.handle);
+ if (!obj) {
+ ret = -ENOENT;
+ goto free_slice_ent;
+ }
+
+ bo = to_qaic_bo(obj);
+
+ ret = qaic_prepare_bo(qdev, bo, &args->hdr);
+ if (ret)
+ goto put_bo;
+
+ ret = qaic_attach_slicing_bo(qdev, bo, &args->hdr, slice_ent);
+ if (ret)
+ goto unprepare_bo;
+
+ if (args->hdr.dir == DMA_TO_DEVICE)
+ dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, args->hdr.dir);
+
+ bo->dbc = dbc;
+ drm_gem_object_put(obj);
+ srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
+ srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
+
+ return 0;
+
+unprepare_bo:
+ qaic_unprepare_bo(qdev, bo);
+put_bo:
+ drm_gem_object_put(obj);
+free_slice_ent:
+ kfree(slice_ent);
+unlock_dev_srcu:
+ srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
+unlock_usr_srcu:
+ srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
+ return ret;
+}
+
+static inline int copy_exec_reqs(struct qaic_device *qdev, struct bo_slice *slice, u32 dbc_id,
+ u32 head, u32 *ptail)
+{
+ struct dma_bridge_chan *dbc = &qdev->dbc[dbc_id];
+ struct dbc_req *reqs = slice->reqs;
+ u32 tail = *ptail;
+ u32 avail;
+
+ avail = head - tail;
+ if (head <= tail)
+ avail += dbc->nelem;
+
+ --avail;
+
+ if (avail < slice->nents)
+ return -EAGAIN;
+
+ if (tail + slice->nents > dbc->nelem) {
+ avail = dbc->nelem - tail;
+ avail = min_t(u32, avail, slice->nents);
+ memcpy(dbc->req_q_base + tail * get_dbc_req_elem_size(), reqs,
+ sizeof(*reqs) * avail);
+ reqs += avail;
+ avail = slice->nents - avail;
+ if (avail)
+ memcpy(dbc->req_q_base, reqs, sizeof(*reqs) * avail);
+ } else {
+ memcpy(dbc->req_q_base + tail * get_dbc_req_elem_size(), reqs,
+ sizeof(*reqs) * slice->nents);
+ }
+
+ *ptail = (tail + slice->nents) % dbc->nelem;
+
+ return 0;
+}
+
+/*
+ * Based on the value of resize we may only need to transmit first_n
+ * entries and the last entry, with last_bytes to send from the last entry.
+ * Note that first_n could be 0.
+ */
+static inline int copy_partial_exec_reqs(struct qaic_device *qdev, struct bo_slice *slice,
+ u64 resize, u32 dbc_id, u32 head, u32 *ptail)
+{
+ struct dma_bridge_chan *dbc = &qdev->dbc[dbc_id];
+ struct dbc_req *reqs = slice->reqs;
+ struct dbc_req *last_req;
+ u32 tail = *ptail;
+ u64 total_bytes;
+ u64 last_bytes;
+ u32 first_n;
+ u32 avail;
+ int ret;
+ int i;
+
+ avail = head - tail;
+ if (head <= tail)
+ avail += dbc->nelem;
+
+ --avail;
+
+ total_bytes = 0;
+ for (i = 0; i < slice->nents; i++) {
+ total_bytes += le32_to_cpu(reqs[i].len);
+ if (total_bytes >= resize)
+ break;
+ }
+
+ if (total_bytes < resize) {
+ /* User space should have used the full buffer path. */
+ ret = -EINVAL;
+ return ret;
+ }
+
+ first_n = i;
+ last_bytes = i ? resize + le32_to_cpu(reqs[i].len) - total_bytes : resize;
+
+ if (avail < (first_n + 1))
+ return -EAGAIN;
+
+ if (first_n) {
+ if (tail + first_n > dbc->nelem) {
+ avail = dbc->nelem - tail;
+ avail = min_t(u32, avail, first_n);
+ memcpy(dbc->req_q_base + tail * get_dbc_req_elem_size(), reqs,
+ sizeof(*reqs) * avail);
+ last_req = reqs + avail;
+ avail = first_n - avail;
+ if (avail)
+ memcpy(dbc->req_q_base, last_req, sizeof(*reqs) * avail);
+ } else {
+ memcpy(dbc->req_q_base + tail * get_dbc_req_elem_size(), reqs,
+ sizeof(*reqs) * first_n);
+ }
+ }
+
+ /* Copy over the last entry. Here we need to adjust len to the left over
+ * size, and set src and dst to the entry it is copied to.
+ */
+ last_req = dbc->req_q_base + (tail + first_n) % dbc->nelem * get_dbc_req_elem_size();
+ memcpy(last_req, reqs + slice->nents - 1, sizeof(*reqs));
+
+ /*
+ * last_bytes holds size of a DMA segment, maximum DMA segment size is
+ * set to UINT_MAX by qaic and hence last_bytes can never exceed u32
+ * range. So, by down sizing we are not corrupting the value.
+ */
+ last_req->len = cpu_to_le32((u32)last_bytes);
+ last_req->src_addr = reqs[first_n].src_addr;
+ last_req->dest_addr = reqs[first_n].dest_addr;
+
+ *ptail = (tail + first_n + 1) % dbc->nelem;
+
+ return 0;
+}
+
+static int send_bo_list_to_device(struct qaic_device *qdev, struct drm_file *file_priv,
+ struct qaic_execute_entry *exec, unsigned int count,
+ bool is_partial, struct dma_bridge_chan *dbc, u32 head,
+ u32 *tail)
+{
+ struct qaic_partial_execute_entry *pexec = (struct qaic_partial_execute_entry *)exec;
+ struct drm_gem_object *obj;
+ struct bo_slice *slice;
+ unsigned long flags;
+ struct qaic_bo *bo;
+ bool queued;
+ int i, j;
+ int ret;
+
+ for (i = 0; i < count; i++) {
+ /*
+ * ref count will be decremented when the transfer of this
+ * buffer is complete. It is inside dbc_irq_threaded_fn().
+ */
+ obj = drm_gem_object_lookup(file_priv,
+ is_partial ? pexec[i].handle : exec[i].handle);
+ if (!obj) {
+ ret = -ENOENT;
+ goto failed_to_send_bo;
+ }
+
+ bo = to_qaic_bo(obj);
+
+ if (!bo->sliced) {
+ ret = -EINVAL;
+ goto failed_to_send_bo;
+ }
+
+ if (is_partial && pexec[i].resize > bo->size) {
+ ret = -EINVAL;
+ goto failed_to_send_bo;
+ }
+
+ spin_lock_irqsave(&dbc->xfer_lock, flags);
+ queued = bo->queued;
+ bo->queued = true;
+ if (queued) {
+ spin_unlock_irqrestore(&dbc->xfer_lock, flags);
+ ret = -EINVAL;
+ goto failed_to_send_bo;
+ }
+
+ bo->req_id = dbc->next_req_id++;
+
+ list_for_each_entry(slice, &bo->slices, slice) {
+ /*
+ * If this slice does not fall under the given
+ * resize then skip this slice and continue the loop
+ */
+ if (is_partial && pexec[i].resize && pexec[i].resize <= slice->offset)
+ continue;
+
+ for (j = 0; j < slice->nents; j++)
+ slice->reqs[j].req_id = cpu_to_le16(bo->req_id);
+
+ /*
+ * If it is a partial execute ioctl call then check if
+ * resize has cut this slice short then do a partial copy
+ * else do complete copy
+ */
+ if (is_partial && pexec[i].resize &&
+ pexec[i].resize < slice->offset + slice->size)
+ ret = copy_partial_exec_reqs(qdev, slice,
+ pexec[i].resize - slice->offset,
+ dbc->id, head, tail);
+ else
+ ret = copy_exec_reqs(qdev, slice, dbc->id, head, tail);
+ if (ret) {
+ bo->queued = false;
+ spin_unlock_irqrestore(&dbc->xfer_lock, flags);
+ goto failed_to_send_bo;
+ }
+ }
+ reinit_completion(&bo->xfer_done);
+ list_add_tail(&bo->xfer_list, &dbc->xfer_list);
+ spin_unlock_irqrestore(&dbc->xfer_lock, flags);
+ dma_sync_sgtable_for_device(&qdev->pdev->dev, bo->sgt, bo->dir);
+ }
+
+ return 0;
+
+failed_to_send_bo:
+ if (likely(obj))
+ drm_gem_object_put(obj);
+ for (j = 0; j < i; j++) {
+ spin_lock_irqsave(&dbc->xfer_lock, flags);
+ bo = list_last_entry(&dbc->xfer_list, struct qaic_bo, xfer_list);
+ obj = &bo->base;
+ bo->queued = false;
+ list_del(&bo->xfer_list);
+ spin_unlock_irqrestore(&dbc->xfer_lock, flags);
+ dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir);
+ drm_gem_object_put(obj);
+ }
+ return ret;
+}
+
+static void update_profiling_data(struct drm_file *file_priv,
+ struct qaic_execute_entry *exec, unsigned int count,
+ bool is_partial, u64 received_ts, u64 submit_ts, u32 queue_level)
+{
+ struct qaic_partial_execute_entry *pexec = (struct qaic_partial_execute_entry *)exec;
+ struct drm_gem_object *obj;
+ struct qaic_bo *bo;
+ int i;
+
+ for (i = 0; i < count; i++) {
+ /*
+ * Since we already committed the BO to hardware, the only way
+ * this should fail is a pending signal. We can't cancel the
+ * submit to hardware, so we have to just skip the profiling
+ * data. In case the signal is not fatal to the process, we
+ * return success so that the user doesn't try to resubmit.
+ */
+ obj = drm_gem_object_lookup(file_priv,
+ is_partial ? pexec[i].handle : exec[i].handle);
+ if (!obj)
+ break;
+ bo = to_qaic_bo(obj);
+ bo->perf_stats.req_received_ts = received_ts;
+ bo->perf_stats.req_submit_ts = submit_ts;
+ bo->perf_stats.queue_level_before = queue_level;
+ queue_level += bo->total_slice_nents;
+ drm_gem_object_put(obj);
+ }
+}
+
+static int __qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv,
+ bool is_partial)
+{
+ struct qaic_partial_execute_entry *pexec;
+ struct qaic_execute *args = data;
+ struct qaic_execute_entry *exec;
+ struct dma_bridge_chan *dbc;
+ int usr_rcu_id, qdev_rcu_id;
+ struct qaic_device *qdev;
+ struct qaic_user *usr;
+ u8 __user *user_data;
+ unsigned long n;
+ u64 received_ts;
+ u32 queue_level;
+ u64 submit_ts;
+ int rcu_id;
+ u32 head;
+ u32 tail;
+ u64 size;
+ int ret;
+
+ received_ts = ktime_get_ns();
+
+ size = is_partial ? sizeof(*pexec) : sizeof(*exec);
+
+ n = (unsigned long)size * args->hdr.count;
+ if (args->hdr.count == 0 || n / args->hdr.count != size)
+ return -EINVAL;
+
+ user_data = u64_to_user_ptr(args->data);
+
+ exec = kcalloc(args->hdr.count, size, GFP_KERNEL);
+ pexec = (struct qaic_partial_execute_entry *)exec;
+ if (!exec)
+ return -ENOMEM;
+
+ if (copy_from_user(exec, user_data, n)) {
+ ret = -EFAULT;
+ goto free_exec;
+ }
+
+ usr = file_priv->driver_priv;
+ usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
+ if (!usr->qddev) {
+ ret = -ENODEV;
+ goto unlock_usr_srcu;
+ }
+
+ qdev = usr->qddev->qdev;
+ qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
+ if (qdev->in_reset) {
+ ret = -ENODEV;
+ goto unlock_dev_srcu;
+ }
+
+ if (args->hdr.dbc_id >= qdev->num_dbc) {
+ ret = -EINVAL;
+ goto unlock_dev_srcu;
+ }
+
+ dbc = &qdev->dbc[args->hdr.dbc_id];
+
+ rcu_id = srcu_read_lock(&dbc->ch_lock);
+ if (!dbc->usr || dbc->usr->handle != usr->handle) {
+ ret = -EPERM;
+ goto release_ch_rcu;
+ }
+
+ head = readl(dbc->dbc_base + REQHP_OFF);
+ tail = readl(dbc->dbc_base + REQTP_OFF);
+
+ if (head == U32_MAX || tail == U32_MAX) {
+ /* PCI link error */
+ ret = -ENODEV;
+ goto release_ch_rcu;
+ }
+
+ queue_level = head <= tail ? tail - head : dbc->nelem - (head - tail);
+
+ ret = send_bo_list_to_device(qdev, file_priv, exec, args->hdr.count, is_partial, dbc,
+ head, &tail);
+ if (ret)
+ goto release_ch_rcu;
+
+ /* Finalize commit to hardware */
+ submit_ts = ktime_get_ns();
+ writel(tail, dbc->dbc_base + REQTP_OFF);
+
+ update_profiling_data(file_priv, exec, args->hdr.count, is_partial, received_ts,
+ submit_ts, queue_level);
+
+ if (datapath_polling)
+ schedule_work(&dbc->poll_work);
+
+release_ch_rcu:
+ srcu_read_unlock(&dbc->ch_lock, rcu_id);
+unlock_dev_srcu:
+ srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
+unlock_usr_srcu:
+ srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
+free_exec:
+ kfree(exec);
+ return ret;
+}
+
+int qaic_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
+{
+ return __qaic_execute_bo_ioctl(dev, data, file_priv, false);
+}
+
+int qaic_partial_execute_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
+{
+ return __qaic_execute_bo_ioctl(dev, data, file_priv, true);
+}
+
+/*
+ * Our interrupt handling is a bit more complicated than a simple ideal, but
+ * sadly necessary.
+ *
+ * Each dbc has a completion queue. Entries in the queue correspond to DMA
+ * requests which the device has processed. The hardware already has a built
+ * in irq mitigation. When the device puts an entry into the queue, it will
+ * only trigger an interrupt if the queue was empty. Therefore, when adding
+ * the Nth event to a non-empty queue, the hardware doesn't trigger an
+ * interrupt. This means the host doesn't get additional interrupts signaling
+ * the same thing - the queue has something to process.
+ * This behavior can be overridden in the DMA request.
+ * This means that when the host receives an interrupt, it is required to
+ * drain the queue.
+ *
+ * This behavior is what NAPI attempts to accomplish, although we can't use
+ * NAPI as we don't have a netdev. We use threaded irqs instead.
+ *
+ * However, there is a situation where the host drains the queue fast enough
+ * that every event causes an interrupt. Typically this is not a problem as
+ * the rate of events would be low. However, that is not the case with
+ * lprnet for example. On an Intel Xeon D-2191 where we run 8 instances of
+ * lprnet, the host receives roughly 80k interrupts per second from the device
+ * (per /proc/interrupts). While NAPI documentation indicates the host should
+ * just chug along, sadly that behavior causes instability in some hosts.
+ *
+ * Therefore, we implement an interrupt disable scheme similar to NAPI. The
+ * key difference is that we will delay after draining the queue for a small
+ * time to allow additional events to come in via polling. Using the above
+ * lprnet workload, this reduces the number of interrupts processed from
+ * ~80k/sec to about 64 in 5 minutes and appears to solve the system
+ * instability.
+ */
+irqreturn_t dbc_irq_handler(int irq, void *data)
+{
+ struct dma_bridge_chan *dbc = data;
+ int rcu_id;
+ u32 head;
+ u32 tail;
+
+ rcu_id = srcu_read_lock(&dbc->ch_lock);
+
+ if (!dbc->usr) {
+ srcu_read_unlock(&dbc->ch_lock, rcu_id);
+ return IRQ_HANDLED;
+ }
+
+ head = readl(dbc->dbc_base + RSPHP_OFF);
+ if (head == U32_MAX) { /* PCI link error */
+ srcu_read_unlock(&dbc->ch_lock, rcu_id);
+ return IRQ_NONE;
+ }
+
+ tail = readl(dbc->dbc_base + RSPTP_OFF);
+ if (tail == U32_MAX) { /* PCI link error */
+ srcu_read_unlock(&dbc->ch_lock, rcu_id);
+ return IRQ_NONE;
+ }
+
+ if (head == tail) { /* queue empty */
+ srcu_read_unlock(&dbc->ch_lock, rcu_id);
+ return IRQ_NONE;
+ }
+
+ disable_irq_nosync(irq);
+ srcu_read_unlock(&dbc->ch_lock, rcu_id);
+ return IRQ_WAKE_THREAD;
+}
+
+void irq_polling_work(struct work_struct *work)
+{
+ struct dma_bridge_chan *dbc = container_of(work, struct dma_bridge_chan, poll_work);
+ unsigned long flags;
+ int rcu_id;
+ u32 head;
+ u32 tail;
+
+ rcu_id = srcu_read_lock(&dbc->ch_lock);
+
+ while (1) {
+ if (dbc->qdev->in_reset) {
+ srcu_read_unlock(&dbc->ch_lock, rcu_id);
+ return;
+ }
+ if (!dbc->usr) {
+ srcu_read_unlock(&dbc->ch_lock, rcu_id);
+ return;
+ }
+ spin_lock_irqsave(&dbc->xfer_lock, flags);
+ if (list_empty(&dbc->xfer_list)) {
+ spin_unlock_irqrestore(&dbc->xfer_lock, flags);
+ srcu_read_unlock(&dbc->ch_lock, rcu_id);
+ return;
+ }
+ spin_unlock_irqrestore(&dbc->xfer_lock, flags);
+
+ head = readl(dbc->dbc_base + RSPHP_OFF);
+ if (head == U32_MAX) { /* PCI link error */
+ srcu_read_unlock(&dbc->ch_lock, rcu_id);
+ return;
+ }
+
+ tail = readl(dbc->dbc_base + RSPTP_OFF);
+ if (tail == U32_MAX) { /* PCI link error */
+ srcu_read_unlock(&dbc->ch_lock, rcu_id);
+ return;
+ }
+
+ if (head != tail) {
+ irq_wake_thread(dbc->irq, dbc);
+ srcu_read_unlock(&dbc->ch_lock, rcu_id);
+ return;
+ }
+
+ cond_resched();
+ usleep_range(datapath_poll_interval_us, 2 * datapath_poll_interval_us);
+ }
+}
+
+irqreturn_t dbc_irq_threaded_fn(int irq, void *data)
+{
+ struct dma_bridge_chan *dbc = data;
+ int event_count = NUM_EVENTS;
+ int delay_count = NUM_DELAYS;
+ struct qaic_device *qdev;
+ struct qaic_bo *bo, *i;
+ struct dbc_rsp *rsp;
+ unsigned long flags;
+ int rcu_id;
+ u16 status;
+ u16 req_id;
+ u32 head;
+ u32 tail;
+
+ rcu_id = srcu_read_lock(&dbc->ch_lock);
+
+ head = readl(dbc->dbc_base + RSPHP_OFF);
+ if (head == U32_MAX) /* PCI link error */
+ goto error_out;
+
+ qdev = dbc->qdev;
+read_fifo:
+
+ if (!event_count) {
+ event_count = NUM_EVENTS;
+ cond_resched();
+ }
+
+ /*
+ * if this channel isn't assigned or gets unassigned during processing
+ * we have nothing further to do
+ */
+ if (!dbc->usr)
+ goto error_out;
+
+ tail = readl(dbc->dbc_base + RSPTP_OFF);
+ if (tail == U32_MAX) /* PCI link error */
+ goto error_out;
+
+ if (head == tail) { /* queue empty */
+ if (delay_count) {
+ --delay_count;
+ usleep_range(100, 200);
+ goto read_fifo; /* check for a new event */
+ }
+ goto normal_out;
+ }
+
+ delay_count = NUM_DELAYS;
+ while (head != tail) {
+ if (!event_count)
+ break;
+ --event_count;
+ rsp = dbc->rsp_q_base + head * sizeof(*rsp);
+ req_id = le16_to_cpu(rsp->req_id);
+ status = le16_to_cpu(rsp->status);
+ if (status)
+ pci_dbg(qdev->pdev, "req_id %d failed with status %d\n", req_id, status);
+ spin_lock_irqsave(&dbc->xfer_lock, flags);
+ /*
+ * A BO can receive multiple interrupts, since a BO can be
+ * divided into multiple slices and a buffer receives as many
+ * interrupts as slices. So until it receives interrupts for
+ * all the slices we cannot mark that buffer complete.
+ */
+ list_for_each_entry_safe(bo, i, &dbc->xfer_list, xfer_list) {
+ if (bo->req_id == req_id)
+ bo->nr_slice_xfer_done++;
+ else
+ continue;
+
+ if (bo->nr_slice_xfer_done < bo->nr_slice)
+ break;
+
+ /*
+ * At this point we have received all the interrupts for
+ * BO, which means BO execution is complete.
+ */
+ dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir);
+ bo->nr_slice_xfer_done = 0;
+ bo->queued = false;
+ list_del(&bo->xfer_list);
+ bo->perf_stats.req_processed_ts = ktime_get_ns();
+ complete_all(&bo->xfer_done);
+ drm_gem_object_put(&bo->base);
+ break;
+ }
+ spin_unlock_irqrestore(&dbc->xfer_lock, flags);
+ head = (head + 1) % dbc->nelem;
+ }
+
+ /*
+ * Update the head pointer of response queue and let the device know
+ * that we have consumed elements from the queue.
+ */
+ writel(head, dbc->dbc_base + RSPHP_OFF);
+
+ /* elements might have been put in the queue while we were processing */
+ goto read_fifo;
+
+normal_out:
+ if (likely(!datapath_polling))
+ enable_irq(irq);
+ else
+ schedule_work(&dbc->poll_work);
+ /* checking the fifo and enabling irqs is a race, missed event check */
+ tail = readl(dbc->dbc_base + RSPTP_OFF);
+ if (tail != U32_MAX && head != tail) {
+ if (likely(!datapath_polling))
+ disable_irq_nosync(irq);
+ goto read_fifo;
+ }
+ srcu_read_unlock(&dbc->ch_lock, rcu_id);
+ return IRQ_HANDLED;
+
+error_out:
+ srcu_read_unlock(&dbc->ch_lock, rcu_id);
+ if (likely(!datapath_polling))
+ enable_irq(irq);
+ else
+ schedule_work(&dbc->poll_work);
+
+ return IRQ_HANDLED;
+}
+
+int qaic_wait_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
+{
+ struct qaic_wait *args = data;
+ int usr_rcu_id, qdev_rcu_id;
+ struct dma_bridge_chan *dbc;
+ struct drm_gem_object *obj;
+ struct qaic_device *qdev;
+ unsigned long timeout;
+ struct qaic_user *usr;
+ struct qaic_bo *bo;
+ int rcu_id;
+ int ret;
+
+ usr = file_priv->driver_priv;
+ usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
+ if (!usr->qddev) {
+ ret = -ENODEV;
+ goto unlock_usr_srcu;
+ }
+
+ qdev = usr->qddev->qdev;
+ qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
+ if (qdev->in_reset) {
+ ret = -ENODEV;
+ goto unlock_dev_srcu;
+ }
+
+ if (args->pad != 0) {
+ ret = -EINVAL;
+ goto unlock_dev_srcu;
+ }
+
+ if (args->dbc_id >= qdev->num_dbc) {
+ ret = -EINVAL;
+ goto unlock_dev_srcu;
+ }
+
+ dbc = &qdev->dbc[args->dbc_id];
+
+ rcu_id = srcu_read_lock(&dbc->ch_lock);
+ if (dbc->usr != usr) {
+ ret = -EPERM;
+ goto unlock_ch_srcu;
+ }
+
+ obj = drm_gem_object_lookup(file_priv, args->handle);
+ if (!obj) {
+ ret = -ENOENT;
+ goto unlock_ch_srcu;
+ }
+
+ bo = to_qaic_bo(obj);
+ timeout = args->timeout ? args->timeout : wait_exec_default_timeout_ms;
+ timeout = msecs_to_jiffies(timeout);
+ ret = wait_for_completion_interruptible_timeout(&bo->xfer_done, timeout);
+ if (!ret) {
+ ret = -ETIMEDOUT;
+ goto put_obj;
+ }
+ if (ret > 0)
+ ret = 0;
+
+ if (!dbc->usr)
+ ret = -EPERM;
+
+put_obj:
+ drm_gem_object_put(obj);
+unlock_ch_srcu:
+ srcu_read_unlock(&dbc->ch_lock, rcu_id);
+unlock_dev_srcu:
+ srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
+unlock_usr_srcu:
+ srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
+ return ret;
+}
+
+int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
+{
+ struct qaic_perf_stats_entry *ent = NULL;
+ struct qaic_perf_stats *args = data;
+ int usr_rcu_id, qdev_rcu_id;
+ struct drm_gem_object *obj;
+ struct qaic_device *qdev;
+ struct qaic_user *usr;
+ struct qaic_bo *bo;
+ int ret, i;
+
+ usr = file_priv->driver_priv;
+ usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
+ if (!usr->qddev) {
+ ret = -ENODEV;
+ goto unlock_usr_srcu;
+ }
+
+ qdev = usr->qddev->qdev;
+ qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
+ if (qdev->in_reset) {
+ ret = -ENODEV;
+ goto unlock_dev_srcu;
+ }
+
+ if (args->hdr.dbc_id >= qdev->num_dbc) {
+ ret = -EINVAL;
+ goto unlock_dev_srcu;
+ }
+
+ ent = kcalloc(args->hdr.count, sizeof(*ent), GFP_KERNEL);
+ if (!ent) {
+ ret = -EINVAL;
+ goto unlock_dev_srcu;
+ }
+
+ ret = copy_from_user(ent, u64_to_user_ptr(args->data), args->hdr.count * sizeof(*ent));
+ if (ret) {
+ ret = -EFAULT;
+ goto free_ent;
+ }
+
+ for (i = 0; i < args->hdr.count; i++) {
+ obj = drm_gem_object_lookup(file_priv, ent[i].handle);
+ if (!obj) {
+ ret = -ENOENT;
+ goto free_ent;
+ }
+ bo = to_qaic_bo(obj);
+ /*
+ * perf stats ioctl is called before wait ioctl is complete then
+ * the latency information is invalid.
+ */
+ if (bo->perf_stats.req_processed_ts < bo->perf_stats.req_submit_ts) {
+ ent[i].device_latency_us = 0;
+ } else {
+ ent[i].device_latency_us = div_u64((bo->perf_stats.req_processed_ts -
+ bo->perf_stats.req_submit_ts), 1000);
+ }
+ ent[i].submit_latency_us = div_u64((bo->perf_stats.req_submit_ts -
+ bo->perf_stats.req_received_ts), 1000);
+ ent[i].queue_level_before = bo->perf_stats.queue_level_before;
+ ent[i].num_queue_element = bo->total_slice_nents;
+ drm_gem_object_put(obj);
+ }
+
+ if (copy_to_user(u64_to_user_ptr(args->data), ent, args->hdr.count * sizeof(*ent)))
+ ret = -EFAULT;
+
+free_ent:
+ kfree(ent);
+unlock_dev_srcu:
+ srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
+unlock_usr_srcu:
+ srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
+ return ret;
+}
+
+static void empty_xfer_list(struct qaic_device *qdev, struct dma_bridge_chan *dbc)
+{
+ unsigned long flags;
+ struct qaic_bo *bo;
+
+ spin_lock_irqsave(&dbc->xfer_lock, flags);
+ while (!list_empty(&dbc->xfer_list)) {
+ bo = list_first_entry(&dbc->xfer_list, typeof(*bo), xfer_list);
+ bo->queued = false;
+ list_del(&bo->xfer_list);
+ spin_unlock_irqrestore(&dbc->xfer_lock, flags);
+ dma_sync_sgtable_for_cpu(&qdev->pdev->dev, bo->sgt, bo->dir);
+ complete_all(&bo->xfer_done);
+ drm_gem_object_put(&bo->base);
+ spin_lock_irqsave(&dbc->xfer_lock, flags);
+ }
+ spin_unlock_irqrestore(&dbc->xfer_lock, flags);
+}
+
+int disable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr)
+{
+ if (!qdev->dbc[dbc_id].usr || qdev->dbc[dbc_id].usr->handle != usr->handle)
+ return -EPERM;
+
+ qdev->dbc[dbc_id].usr = NULL;
+ synchronize_srcu(&qdev->dbc[dbc_id].ch_lock);
+ return 0;
+}
+
+/**
+ * enable_dbc - Enable the DBC. DBCs are disabled by removing the context of
+ * user. Add user context back to DBC to enable it. This function trusts the
+ * DBC ID passed and expects the DBC to be disabled.
+ * @qdev: Qranium device handle
+ * @dbc_id: ID of the DBC
+ * @usr: User context
+ */
+void enable_dbc(struct qaic_device *qdev, u32 dbc_id, struct qaic_user *usr)
+{
+ qdev->dbc[dbc_id].usr = usr;
+}
+
+void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id)
+{
+ struct dma_bridge_chan *dbc = &qdev->dbc[dbc_id];
+
+ dbc->usr = NULL;
+ empty_xfer_list(qdev, dbc);
+ synchronize_srcu(&dbc->ch_lock);
+}
+
+void release_dbc(struct qaic_device *qdev, u32 dbc_id)
+{
+ struct bo_slice *slice, *slice_temp;
+ struct qaic_bo *bo, *bo_temp;
+ struct dma_bridge_chan *dbc;
+
+ dbc = &qdev->dbc[dbc_id];
+ if (!dbc->in_use)
+ return;
+
+ wakeup_dbc(qdev, dbc_id);
+
+ dma_free_coherent(&qdev->pdev->dev, dbc->total_size, dbc->req_q_base, dbc->dma_addr);
+ dbc->total_size = 0;
+ dbc->req_q_base = NULL;
+ dbc->dma_addr = 0;
+ dbc->nelem = 0;
+ dbc->usr = NULL;
+
+ list_for_each_entry_safe(bo, bo_temp, &dbc->bo_lists, bo_list) {
+ list_for_each_entry_safe(slice, slice_temp, &bo->slices, slice)
+ kref_put(&slice->ref_count, free_slice);
+ bo->sliced = false;
+ INIT_LIST_HEAD(&bo->slices);
+ bo->total_slice_nents = 0;
+ bo->dir = 0;
+ bo->dbc = NULL;
+ bo->nr_slice = 0;
+ bo->nr_slice_xfer_done = 0;
+ bo->queued = false;
+ bo->req_id = 0;
+ init_completion(&bo->xfer_done);
+ complete_all(&bo->xfer_done);
+ list_del(&bo->bo_list);
+ bo->perf_stats.req_received_ts = 0;
+ bo->perf_stats.req_submit_ts = 0;
+ bo->perf_stats.req_processed_ts = 0;
+ bo->perf_stats.queue_level_before = 0;
+ }
+
+ dbc->in_use = false;
+ wake_up(&dbc->dbc_release);
+}
diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c
new file mode 100644
index 000000000000..1106ad88a5b6
--- /dev/null
+++ b/drivers/accel/qaic/qaic_drv.c
@@ -0,0 +1,647 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. */
+/* Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. */
+
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/idr.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/kref.h>
+#include <linux/mhi.h>
+#include <linux/module.h>
+#include <linux/msi.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <linux/wait.h>
+#include <drm/drm_accel.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_ioctl.h>
+#include <uapi/drm/qaic_accel.h>
+
+#include "mhi_controller.h"
+#include "mhi_qaic_ctrl.h"
+#include "qaic.h"
+
+MODULE_IMPORT_NS(DMA_BUF);
+
+#define PCI_DEV_AIC100 0xa100
+#define QAIC_NAME "qaic"
+#define QAIC_DESC "Qualcomm Cloud AI Accelerators"
+#define CNTL_MAJOR 5
+#define CNTL_MINOR 0
+
+bool datapath_polling;
+module_param(datapath_polling, bool, 0400);
+MODULE_PARM_DESC(datapath_polling, "Operate the datapath in polling mode");
+static bool link_up;
+static DEFINE_IDA(qaic_usrs);
+
+static int qaic_create_drm_device(struct qaic_device *qdev, s32 partition_id);
+static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id);
+
+static void free_usr(struct kref *kref)
+{
+ struct qaic_user *usr = container_of(kref, struct qaic_user, ref_count);
+
+ cleanup_srcu_struct(&usr->qddev_lock);
+ ida_free(&qaic_usrs, usr->handle);
+ kfree(usr);
+}
+
+static int qaic_open(struct drm_device *dev, struct drm_file *file)
+{
+ struct qaic_drm_device *qddev = dev->dev_private;
+ struct qaic_device *qdev = qddev->qdev;
+ struct qaic_user *usr;
+ int rcu_id;
+ int ret;
+
+ rcu_id = srcu_read_lock(&qdev->dev_lock);
+ if (qdev->in_reset) {
+ ret = -ENODEV;
+ goto dev_unlock;
+ }
+
+ usr = kmalloc(sizeof(*usr), GFP_KERNEL);
+ if (!usr) {
+ ret = -ENOMEM;
+ goto dev_unlock;
+ }
+
+ usr->handle = ida_alloc(&qaic_usrs, GFP_KERNEL);
+ if (usr->handle < 0) {
+ ret = usr->handle;
+ goto free_usr;
+ }
+ usr->qddev = qddev;
+ atomic_set(&usr->chunk_id, 0);
+ init_srcu_struct(&usr->qddev_lock);
+ kref_init(&usr->ref_count);
+
+ ret = mutex_lock_interruptible(&qddev->users_mutex);
+ if (ret)
+ goto cleanup_usr;
+
+ list_add(&usr->node, &qddev->users);
+ mutex_unlock(&qddev->users_mutex);
+
+ file->driver_priv = usr;
+
+ srcu_read_unlock(&qdev->dev_lock, rcu_id);
+ return 0;
+
+cleanup_usr:
+ cleanup_srcu_struct(&usr->qddev_lock);
+free_usr:
+ kfree(usr);
+dev_unlock:
+ srcu_read_unlock(&qdev->dev_lock, rcu_id);
+ return ret;
+}
+
+static void qaic_postclose(struct drm_device *dev, struct drm_file *file)
+{
+ struct qaic_user *usr = file->driver_priv;
+ struct qaic_drm_device *qddev;
+ struct qaic_device *qdev;
+ int qdev_rcu_id;
+ int usr_rcu_id;
+ int i;
+
+ qddev = usr->qddev;
+ usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
+ if (qddev) {
+ qdev = qddev->qdev;
+ qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
+ if (!qdev->in_reset) {
+ qaic_release_usr(qdev, usr);
+ for (i = 0; i < qdev->num_dbc; ++i)
+ if (qdev->dbc[i].usr && qdev->dbc[i].usr->handle == usr->handle)
+ release_dbc(qdev, i);
+ }
+ srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
+
+ mutex_lock(&qddev->users_mutex);
+ if (!list_empty(&usr->node))
+ list_del_init(&usr->node);
+ mutex_unlock(&qddev->users_mutex);
+ }
+
+ srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
+ kref_put(&usr->ref_count, free_usr);
+
+ file->driver_priv = NULL;
+}
+
+DEFINE_DRM_ACCEL_FOPS(qaic_accel_fops);
+
+static const struct drm_ioctl_desc qaic_drm_ioctls[] = {
+ DRM_IOCTL_DEF_DRV(QAIC_MANAGE, qaic_manage_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(QAIC_CREATE_BO, qaic_create_bo_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(QAIC_MMAP_BO, qaic_mmap_bo_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(QAIC_ATTACH_SLICE_BO, qaic_attach_slice_bo_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(QAIC_EXECUTE_BO, qaic_execute_bo_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(QAIC_PARTIAL_EXECUTE_BO, qaic_partial_execute_bo_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(QAIC_WAIT_BO, qaic_wait_bo_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(QAIC_PERF_STATS_BO, qaic_perf_stats_bo_ioctl, 0),
+};
+
+static const struct drm_driver qaic_accel_driver = {
+ .driver_features = DRIVER_GEM | DRIVER_COMPUTE_ACCEL,
+
+ .name = QAIC_NAME,
+ .desc = QAIC_DESC,
+ .date = "20190618",
+
+ .fops = &qaic_accel_fops,
+ .open = qaic_open,
+ .postclose = qaic_postclose,
+
+ .ioctls = qaic_drm_ioctls,
+ .num_ioctls = ARRAY_SIZE(qaic_drm_ioctls),
+ .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+ .gem_prime_import = qaic_gem_prime_import,
+};
+
+static int qaic_create_drm_device(struct qaic_device *qdev, s32 partition_id)
+{
+ struct qaic_drm_device *qddev;
+ struct drm_device *ddev;
+ struct device *pdev;
+ int ret;
+
+ /* Hold off implementing partitions until the uapi is determined */
+ if (partition_id != QAIC_NO_PARTITION)
+ return -EINVAL;
+
+ pdev = &qdev->pdev->dev;
+
+ qddev = kzalloc(sizeof(*qddev), GFP_KERNEL);
+ if (!qddev)
+ return -ENOMEM;
+
+ ddev = drm_dev_alloc(&qaic_accel_driver, pdev);
+ if (IS_ERR(ddev)) {
+ ret = PTR_ERR(ddev);
+ goto ddev_fail;
+ }
+
+ ddev->dev_private = qddev;
+ qddev->ddev = ddev;
+
+ qddev->qdev = qdev;
+ qddev->partition_id = partition_id;
+ INIT_LIST_HEAD(&qddev->users);
+ mutex_init(&qddev->users_mutex);
+
+ qdev->qddev = qddev;
+
+ ret = drm_dev_register(ddev, 0);
+ if (ret) {
+ pci_dbg(qdev->pdev, "%s: drm_dev_register failed %d\n", __func__, ret);
+ goto drm_reg_fail;
+ }
+
+ return 0;
+
+drm_reg_fail:
+ mutex_destroy(&qddev->users_mutex);
+ qdev->qddev = NULL;
+ drm_dev_put(ddev);
+ddev_fail:
+ kfree(qddev);
+ return ret;
+}
+
+static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id)
+{
+ struct qaic_drm_device *qddev;
+ struct qaic_user *usr;
+
+ qddev = qdev->qddev;
+
+ /*
+ * Existing users get unresolvable errors till they close FDs.
+ * Need to sync carefully with users calling close(). The
+ * list of users can be modified elsewhere when the lock isn't
+ * held here, but the sync'ing the srcu with the mutex held
+ * could deadlock. Grab the mutex so that the list will be
+ * unmodified. The user we get will exist as long as the
+ * lock is held. Signal that the qcdev is going away, and
+ * grab a reference to the user so they don't go away for
+ * synchronize_srcu(). Then release the mutex to avoid
+ * deadlock and make sure the user has observed the signal.
+ * With the lock released, we cannot maintain any state of the
+ * user list.
+ */
+ mutex_lock(&qddev->users_mutex);
+ while (!list_empty(&qddev->users)) {
+ usr = list_first_entry(&qddev->users, struct qaic_user, node);
+ list_del_init(&usr->node);
+ kref_get(&usr->ref_count);
+ usr->qddev = NULL;
+ mutex_unlock(&qddev->users_mutex);
+ synchronize_srcu(&usr->qddev_lock);
+ kref_put(&usr->ref_count, free_usr);
+ mutex_lock(&qddev->users_mutex);
+ }
+ mutex_unlock(&qddev->users_mutex);
+
+ if (qddev->ddev) {
+ drm_dev_unregister(qddev->ddev);
+ drm_dev_put(qddev->ddev);
+ }
+
+ kfree(qddev);
+}
+
+static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
+{
+ struct qaic_device *qdev;
+ u16 major, minor;
+ int ret;
+
+ /*
+ * Invoking this function indicates that the control channel to the
+ * device is available. We use that as a signal to indicate that
+ * the device side firmware has booted. The device side firmware
+ * manages the device resources, so we need to communicate with it
+ * via the control channel in order to utilize the device. Therefore
+ * we wait until this signal to create the drm dev that userspace will
+ * use to control the device, because without the device side firmware,
+ * userspace can't do anything useful.
+ */
+
+ qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev));
+
+ qdev->in_reset = false;
+
+ dev_set_drvdata(&mhi_dev->dev, qdev);
+ qdev->cntl_ch = mhi_dev;
+
+ ret = qaic_control_open(qdev);
+ if (ret) {
+ pci_dbg(qdev->pdev, "%s: control_open failed %d\n", __func__, ret);
+ return ret;
+ }
+
+ ret = get_cntl_version(qdev, NULL, &major, &minor);
+ if (ret || major != CNTL_MAJOR || minor > CNTL_MINOR) {
+ pci_err(qdev->pdev, "%s: Control protocol version (%d.%d) not supported. Supported version is (%d.%d). Ret: %d\n",
+ __func__, major, minor, CNTL_MAJOR, CNTL_MINOR, ret);
+ ret = -EINVAL;
+ goto close_control;
+ }
+
+ ret = qaic_create_drm_device(qdev, QAIC_NO_PARTITION);
+
+ return ret;
+
+close_control:
+ qaic_control_close(qdev);
+ return ret;
+}
+
+static void qaic_mhi_remove(struct mhi_device *mhi_dev)
+{
+/* This is redundant since we have already observed the device crash */
+}
+
+static void qaic_notify_reset(struct qaic_device *qdev)
+{
+ int i;
+
+ qdev->in_reset = true;
+ /* wake up any waiters to avoid waiting for timeouts at sync */
+ wake_all_cntl(qdev);
+ for (i = 0; i < qdev->num_dbc; ++i)
+ wakeup_dbc(qdev, i);
+ synchronize_srcu(&qdev->dev_lock);
+}
+
+void qaic_dev_reset_clean_local_state(struct qaic_device *qdev, bool exit_reset)
+{
+ int i;
+
+ qaic_notify_reset(qdev);
+
+ /* remove drmdevs to prevent new users from coming in */
+ qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION);
+
+ /* start tearing things down */
+ for (i = 0; i < qdev->num_dbc; ++i)
+ release_dbc(qdev, i);
+
+ if (exit_reset)
+ qdev->in_reset = false;
+}
+
+static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct qaic_device *qdev;
+ int i;
+
+ qdev = devm_kzalloc(&pdev->dev, sizeof(*qdev), GFP_KERNEL);
+ if (!qdev)
+ return NULL;
+
+ if (id->device == PCI_DEV_AIC100) {
+ qdev->num_dbc = 16;
+ qdev->dbc = devm_kcalloc(&pdev->dev, qdev->num_dbc, sizeof(*qdev->dbc), GFP_KERNEL);
+ if (!qdev->dbc)
+ return NULL;
+ }
+
+ qdev->cntl_wq = alloc_workqueue("qaic_cntl", WQ_UNBOUND, 0);
+ if (!qdev->cntl_wq)
+ return NULL;
+
+ pci_set_drvdata(pdev, qdev);
+ qdev->pdev = pdev;
+
+ mutex_init(&qdev->cntl_mutex);
+ INIT_LIST_HEAD(&qdev->cntl_xfer_list);
+ init_srcu_struct(&qdev->dev_lock);
+
+ for (i = 0; i < qdev->num_dbc; ++i) {
+ spin_lock_init(&qdev->dbc[i].xfer_lock);
+ qdev->dbc[i].qdev = qdev;
+ qdev->dbc[i].id = i;
+ INIT_LIST_HEAD(&qdev->dbc[i].xfer_list);
+ init_srcu_struct(&qdev->dbc[i].ch_lock);
+ init_waitqueue_head(&qdev->dbc[i].dbc_release);
+ INIT_LIST_HEAD(&qdev->dbc[i].bo_lists);
+ }
+
+ return qdev;
+}
+
+static void cleanup_qdev(struct qaic_device *qdev)
+{
+ int i;
+
+ for (i = 0; i < qdev->num_dbc; ++i)
+ cleanup_srcu_struct(&qdev->dbc[i].ch_lock);
+ cleanup_srcu_struct(&qdev->dev_lock);
+ pci_set_drvdata(qdev->pdev, NULL);
+ destroy_workqueue(qdev->cntl_wq);
+}
+
+static int init_pci(struct qaic_device *qdev, struct pci_dev *pdev)
+{
+ int bars;
+ int ret;
+
+ bars = pci_select_bars(pdev, IORESOURCE_MEM);
+
+ /* make sure the device has the expected BARs */
+ if (bars != (BIT(0) | BIT(2) | BIT(4))) {
+ pci_dbg(pdev, "%s: expected BARs 0, 2, and 4 not found in device. Found 0x%x\n",
+ __func__, bars);
+ return -EINVAL;
+ }
+
+ ret = pcim_enable_device(pdev);
+ if (ret)
+ return ret;
+
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (ret)
+ return ret;
+ ret = dma_set_max_seg_size(&pdev->dev, UINT_MAX);
+ if (ret)
+ return ret;
+
+ qdev->bar_0 = devm_ioremap_resource(&pdev->dev, &pdev->resource[0]);
+ if (IS_ERR(qdev->bar_0))
+ return PTR_ERR(qdev->bar_0);
+
+ qdev->bar_2 = devm_ioremap_resource(&pdev->dev, &pdev->resource[2]);
+ if (IS_ERR(qdev->bar_2))
+ return PTR_ERR(qdev->bar_2);
+
+ /* Managed release since we use pcim_enable_device above */
+ pci_set_master(pdev);
+
+ return 0;
+}
+
+static int init_msi(struct qaic_device *qdev, struct pci_dev *pdev)
+{
+ int mhi_irq;
+ int ret;
+ int i;
+
+ /* Managed release since we use pcim_enable_device */
+ ret = pci_alloc_irq_vectors(pdev, 1, 32, PCI_IRQ_MSI);
+ if (ret < 0)
+ return ret;
+
+ if (ret < 32) {
+ pci_err(pdev, "%s: Requested 32 MSIs. Obtained %d MSIs which is less than the 32 required.\n",
+ __func__, ret);
+ return -ENODEV;
+ }
+
+ mhi_irq = pci_irq_vector(pdev, 0);
+ if (mhi_irq < 0)
+ return mhi_irq;
+
+ for (i = 0; i < qdev->num_dbc; ++i) {
+ ret = devm_request_threaded_irq(&pdev->dev, pci_irq_vector(pdev, i + 1),
+ dbc_irq_handler, dbc_irq_threaded_fn, IRQF_SHARED,
+ "qaic_dbc", &qdev->dbc[i]);
+ if (ret)
+ return ret;
+
+ if (datapath_polling) {
+ qdev->dbc[i].irq = pci_irq_vector(pdev, i + 1);
+ disable_irq_nosync(qdev->dbc[i].irq);
+ INIT_WORK(&qdev->dbc[i].poll_work, irq_polling_work);
+ }
+ }
+
+ return mhi_irq;
+}
+
+static int qaic_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct qaic_device *qdev;
+ int mhi_irq;
+ int ret;
+ int i;
+
+ qdev = create_qdev(pdev, id);
+ if (!qdev)
+ return -ENOMEM;
+
+ ret = init_pci(qdev, pdev);
+ if (ret)
+ goto cleanup_qdev;
+
+ for (i = 0; i < qdev->num_dbc; ++i)
+ qdev->dbc[i].dbc_base = qdev->bar_2 + QAIC_DBC_OFF(i);
+
+ mhi_irq = init_msi(qdev, pdev);
+ if (mhi_irq < 0) {
+ ret = mhi_irq;
+ goto cleanup_qdev;
+ }
+
+ qdev->mhi_cntrl = qaic_mhi_register_controller(pdev, qdev->bar_0, mhi_irq);
+ if (IS_ERR(qdev->mhi_cntrl)) {
+ ret = PTR_ERR(qdev->mhi_cntrl);
+ goto cleanup_qdev;
+ }
+
+ return 0;
+
+cleanup_qdev:
+ cleanup_qdev(qdev);
+ return ret;
+}
+
+static void qaic_pci_remove(struct pci_dev *pdev)
+{
+ struct qaic_device *qdev = pci_get_drvdata(pdev);
+
+ if (!qdev)
+ return;
+
+ qaic_dev_reset_clean_local_state(qdev, false);
+ qaic_mhi_free_controller(qdev->mhi_cntrl, link_up);
+ cleanup_qdev(qdev);
+}
+
+static void qaic_pci_shutdown(struct pci_dev *pdev)
+{
+ /* see qaic_exit for what link_up is doing */
+ link_up = true;
+ qaic_pci_remove(pdev);
+}
+
+static pci_ers_result_t qaic_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t error)
+{
+ return PCI_ERS_RESULT_NEED_RESET;
+}
+
+static void qaic_pci_reset_prepare(struct pci_dev *pdev)
+{
+ struct qaic_device *qdev = pci_get_drvdata(pdev);
+
+ qaic_notify_reset(qdev);
+ qaic_mhi_start_reset(qdev->mhi_cntrl);
+ qaic_dev_reset_clean_local_state(qdev, false);
+}
+
+static void qaic_pci_reset_done(struct pci_dev *pdev)
+{
+ struct qaic_device *qdev = pci_get_drvdata(pdev);
+
+ qdev->in_reset = false;
+ qaic_mhi_reset_done(qdev->mhi_cntrl);
+}
+
+static const struct mhi_device_id qaic_mhi_match_table[] = {
+ { .chan = "QAIC_CONTROL", },
+ {},
+};
+
+static struct mhi_driver qaic_mhi_driver = {
+ .id_table = qaic_mhi_match_table,
+ .remove = qaic_mhi_remove,
+ .probe = qaic_mhi_probe,
+ .ul_xfer_cb = qaic_mhi_ul_xfer_cb,
+ .dl_xfer_cb = qaic_mhi_dl_xfer_cb,
+ .driver = {
+ .name = "qaic_mhi",
+ },
+};
+
+static const struct pci_device_id qaic_ids[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_QCOM, PCI_DEV_AIC100), },
+ { }
+};
+MODULE_DEVICE_TABLE(pci, qaic_ids);
+
+static const struct pci_error_handlers qaic_pci_err_handler = {
+ .error_detected = qaic_pci_error_detected,
+ .reset_prepare = qaic_pci_reset_prepare,
+ .reset_done = qaic_pci_reset_done,
+};
+
+static struct pci_driver qaic_pci_driver = {
+ .name = QAIC_NAME,
+ .id_table = qaic_ids,
+ .probe = qaic_pci_probe,
+ .remove = qaic_pci_remove,
+ .shutdown = qaic_pci_shutdown,
+ .err_handler = &qaic_pci_err_handler,
+};
+
+static int __init qaic_init(void)
+{
+ int ret;
+
+ ret = mhi_driver_register(&qaic_mhi_driver);
+ if (ret) {
+ pr_debug("qaic: mhi_driver_register failed %d\n", ret);
+ return ret;
+ }
+
+ ret = pci_register_driver(&qaic_pci_driver);
+ if (ret) {
+ pr_debug("qaic: pci_register_driver failed %d\n", ret);
+ goto free_mhi;
+ }
+
+ ret = mhi_qaic_ctrl_init();
+ if (ret) {
+ pr_debug("qaic: mhi_qaic_ctrl_init failed %d\n", ret);
+ goto free_pci;
+ }
+
+ return 0;
+
+free_pci:
+ pci_unregister_driver(&qaic_pci_driver);
+free_mhi:
+ mhi_driver_unregister(&qaic_mhi_driver);
+ return ret;
+}
+
+static void __exit qaic_exit(void)
+{
+ /*
+ * We assume that qaic_pci_remove() is called due to a hotplug event
+ * which would mean that the link is down, and thus
+ * qaic_mhi_free_controller() should not try to access the device during
+ * cleanup.
+ * We call pci_unregister_driver() below, which also triggers
+ * qaic_pci_remove(), but since this is module exit, we expect the link
+ * to the device to be up, in which case qaic_mhi_free_controller()
+ * should try to access the device during cleanup to put the device in
+ * a sane state.
+ * For that reason, we set link_up here to let qaic_mhi_free_controller
+ * know the expected link state. Since the module is going to be
+ * removed at the end of this, we don't need to worry about
+ * reinitializing the link_up state after the cleanup is done.
+ */
+ link_up = true;
+ mhi_qaic_ctrl_deinit();
+ pci_unregister_driver(&qaic_pci_driver);
+ mhi_driver_unregister(&qaic_mhi_driver);
+}
+
+module_init(qaic_init);
+module_exit(qaic_exit);
+
+MODULE_AUTHOR(QAIC_DESC " Kernel Driver Team");
+MODULE_DESCRIPTION(QAIC_DESC " Accel Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c
index 3a7af6d5aa79..e1224ef4ad83 100644
--- a/drivers/gpu/drm/ast/ast_drv.c
+++ b/drivers/gpu/drm/ast/ast_drv.c
@@ -89,27 +89,13 @@ static const struct pci_device_id ast_pciidlist[] = {
MODULE_DEVICE_TABLE(pci, ast_pciidlist);
-static int ast_remove_conflicting_framebuffers(struct pci_dev *pdev)
-{
- bool primary = false;
- resource_size_t base, size;
-
- base = pci_resource_start(pdev, 0);
- size = pci_resource_len(pdev, 0);
-#ifdef CONFIG_X86
- primary = pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW;
-#endif
-
- return drm_aperture_remove_conflicting_framebuffers(base, size, primary, &ast_driver);
-}
-
static int ast_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
struct ast_device *ast;
struct drm_device *dev;
int ret;
- ret = ast_remove_conflicting_framebuffers(pdev);
+ ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &ast_driver);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/bridge/fsl-ldb.c b/drivers/gpu/drm/bridge/fsl-ldb.c
index 450b352914f4..682623369498 100644
--- a/drivers/gpu/drm/bridge/fsl-ldb.c
+++ b/drivers/gpu/drm/bridge/fsl-ldb.c
@@ -84,10 +84,16 @@ struct fsl_ldb {
struct drm_bridge *panel_bridge;
struct clk *clk;
struct regmap *regmap;
- bool lvds_dual_link;
const struct fsl_ldb_devdata *devdata;
+ bool ch0_enabled;
+ bool ch1_enabled;
};
+static bool fsl_ldb_is_dual(const struct fsl_ldb *fsl_ldb)
+{
+ return (fsl_ldb->ch0_enabled && fsl_ldb->ch1_enabled);
+}
+
static inline struct fsl_ldb *to_fsl_ldb(struct drm_bridge *bridge)
{
return container_of(bridge, struct fsl_ldb, bridge);
@@ -95,7 +101,7 @@ static inline struct fsl_ldb *to_fsl_ldb(struct drm_bridge *bridge)
static unsigned long fsl_ldb_link_frequency(struct fsl_ldb *fsl_ldb, int clock)
{
- if (fsl_ldb->lvds_dual_link)
+ if (fsl_ldb_is_dual(fsl_ldb))
return clock * 3500;
else
return clock * 7000;
@@ -170,35 +176,28 @@ static void fsl_ldb_atomic_enable(struct drm_bridge *bridge,
configured_link_freq = clk_get_rate(fsl_ldb->clk);
if (configured_link_freq != requested_link_freq)
- dev_warn(fsl_ldb->dev, "Configured LDB clock (%lu Hz) does not match requested LVDS clock: %lu Hz",
+ dev_warn(fsl_ldb->dev, "Configured LDB clock (%lu Hz) does not match requested LVDS clock: %lu Hz\n",
configured_link_freq,
requested_link_freq);
clk_prepare_enable(fsl_ldb->clk);
/* Program LDB_CTRL */
- reg = LDB_CTRL_CH0_ENABLE;
+ reg = (fsl_ldb->ch0_enabled ? LDB_CTRL_CH0_ENABLE : 0) |
+ (fsl_ldb->ch1_enabled ? LDB_CTRL_CH1_ENABLE : 0) |
+ (fsl_ldb_is_dual(fsl_ldb) ? LDB_CTRL_SPLIT_MODE : 0);
- if (fsl_ldb->lvds_dual_link)
- reg |= LDB_CTRL_CH1_ENABLE | LDB_CTRL_SPLIT_MODE;
+ if (lvds_format_24bpp)
+ reg |= (fsl_ldb->ch0_enabled ? LDB_CTRL_CH0_DATA_WIDTH : 0) |
+ (fsl_ldb->ch1_enabled ? LDB_CTRL_CH1_DATA_WIDTH : 0);
- if (lvds_format_24bpp) {
- reg |= LDB_CTRL_CH0_DATA_WIDTH;
- if (fsl_ldb->lvds_dual_link)
- reg |= LDB_CTRL_CH1_DATA_WIDTH;
- }
+ if (lvds_format_jeida)
+ reg |= (fsl_ldb->ch0_enabled ? LDB_CTRL_CH0_BIT_MAPPING : 0) |
+ (fsl_ldb->ch1_enabled ? LDB_CTRL_CH1_BIT_MAPPING : 0);
- if (lvds_format_jeida) {
- reg |= LDB_CTRL_CH0_BIT_MAPPING;
- if (fsl_ldb->lvds_dual_link)
- reg |= LDB_CTRL_CH1_BIT_MAPPING;
- }
-
- if (mode->flags & DRM_MODE_FLAG_PVSYNC) {
- reg |= LDB_CTRL_DI0_VSYNC_POLARITY;
- if (fsl_ldb->lvds_dual_link)
- reg |= LDB_CTRL_DI1_VSYNC_POLARITY;
- }
+ if (mode->flags & DRM_MODE_FLAG_PVSYNC)
+ reg |= (fsl_ldb->ch0_enabled ? LDB_CTRL_DI0_VSYNC_POLARITY : 0) |
+ (fsl_ldb->ch1_enabled ? LDB_CTRL_DI1_VSYNC_POLARITY : 0);
regmap_write(fsl_ldb->regmap, fsl_ldb->devdata->ldb_ctrl, reg);
@@ -210,9 +209,8 @@ static void fsl_ldb_atomic_enable(struct drm_bridge *bridge,
/* Wait for VBG to stabilize. */
usleep_range(15, 20);
- reg |= LVDS_CTRL_CH0_EN;
- if (fsl_ldb->lvds_dual_link)
- reg |= LVDS_CTRL_CH1_EN;
+ reg |= (fsl_ldb->ch0_enabled ? LVDS_CTRL_CH0_EN : 0) |
+ (fsl_ldb->ch1_enabled ? LVDS_CTRL_CH1_EN : 0);
regmap_write(fsl_ldb->regmap, fsl_ldb->devdata->lvds_ctrl, reg);
}
@@ -265,7 +263,7 @@ fsl_ldb_mode_valid(struct drm_bridge *bridge,
{
struct fsl_ldb *fsl_ldb = to_fsl_ldb(bridge);
- if (mode->clock > (fsl_ldb->lvds_dual_link ? 160000 : 80000))
+ if (mode->clock > (fsl_ldb_is_dual(fsl_ldb) ? 160000 : 80000))
return MODE_CLOCK_HIGH;
return MODE_OK;
@@ -286,7 +284,7 @@ static int fsl_ldb_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct device_node *panel_node;
- struct device_node *port1, *port2;
+ struct device_node *remote1, *remote2;
struct drm_panel *panel;
struct fsl_ldb *fsl_ldb;
int dual_link;
@@ -311,10 +309,23 @@ static int fsl_ldb_probe(struct platform_device *pdev)
if (IS_ERR(fsl_ldb->regmap))
return PTR_ERR(fsl_ldb->regmap);
- /* Locate the panel DT node. */
- panel_node = of_graph_get_remote_node(dev->of_node, 1, 0);
- if (!panel_node)
- return -ENXIO;
+ /* Locate the remote ports and the panel node */
+ remote1 = of_graph_get_remote_node(dev->of_node, 1, 0);
+ remote2 = of_graph_get_remote_node(dev->of_node, 2, 0);
+ fsl_ldb->ch0_enabled = (remote1 != NULL);
+ fsl_ldb->ch1_enabled = (remote2 != NULL);
+ panel_node = of_node_get(remote1 ? remote1 : remote2);
+ of_node_put(remote1);
+ of_node_put(remote2);
+
+ if (!fsl_ldb->ch0_enabled && !fsl_ldb->ch1_enabled) {
+ of_node_put(panel_node);
+ return dev_err_probe(dev, -ENXIO, "No panel node found");
+ }
+
+ dev_dbg(dev, "Using %s\n",
+ fsl_ldb_is_dual(fsl_ldb) ? "dual-link mode" :
+ fsl_ldb->ch0_enabled ? "channel 0" : "channel 1");
panel = of_drm_find_panel(panel_node);
of_node_put(panel_node);
@@ -325,20 +336,26 @@ static int fsl_ldb_probe(struct platform_device *pdev)
if (IS_ERR(fsl_ldb->panel_bridge))
return PTR_ERR(fsl_ldb->panel_bridge);
- /* Determine whether this is dual-link configuration */
- port1 = of_graph_get_port_by_id(dev->of_node, 1);
- port2 = of_graph_get_port_by_id(dev->of_node, 2);
- dual_link = drm_of_lvds_get_dual_link_pixel_order(port1, port2);
- of_node_put(port1);
- of_node_put(port2);
- if (dual_link == DRM_LVDS_DUAL_LINK_EVEN_ODD_PIXELS) {
- dev_err(dev, "LVDS channel pixel swap not supported.\n");
- return -EINVAL;
- }
+ if (fsl_ldb_is_dual(fsl_ldb)) {
+ struct device_node *port1, *port2;
- if (dual_link == DRM_LVDS_DUAL_LINK_ODD_EVEN_PIXELS)
- fsl_ldb->lvds_dual_link = true;
+ port1 = of_graph_get_port_by_id(dev->of_node, 1);
+ port2 = of_graph_get_port_by_id(dev->of_node, 2);
+ dual_link = drm_of_lvds_get_dual_link_pixel_order(port1, port2);
+ of_node_put(port1);
+ of_node_put(port2);
+
+ if (dual_link < 0)
+ return dev_err_probe(dev, dual_link,
+ "Error getting dual link configuration\n");
+
+ /* Only DRM_LVDS_DUAL_LINK_ODD_EVEN_PIXELS is supported */
+ if (dual_link == DRM_LVDS_DUAL_LINK_EVEN_ODD_PIXELS) {
+ dev_err(dev, "LVDS channel pixel swap not supported.\n");
+ return -EINVAL;
+ }
+ }
platform_set_drvdata(pdev, fsl_ldb);
diff --git a/drivers/gpu/drm/bridge/lontium-lt8912b.c b/drivers/gpu/drm/bridge/lontium-lt8912b.c
index b40baced1331..13c131ade268 100644
--- a/drivers/gpu/drm/bridge/lontium-lt8912b.c
+++ b/drivers/gpu/drm/bridge/lontium-lt8912b.c
@@ -504,7 +504,6 @@ static int lt8912_attach_dsi(struct lt8912 *lt)
dsi->format = MIPI_DSI_FMT_RGB888;
dsi->mode_flags = MIPI_DSI_MODE_VIDEO |
- MIPI_DSI_MODE_VIDEO_BURST |
MIPI_DSI_MODE_LPM |
MIPI_DSI_MODE_NO_EOT_PACKET;
diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c b/drivers/gpu/drm/bridge/parade-ps8640.c
index b823e55650b1..c3eb45179405 100644
--- a/drivers/gpu/drm/bridge/parade-ps8640.c
+++ b/drivers/gpu/drm/bridge/parade-ps8640.c
@@ -184,7 +184,7 @@ static int _ps8640_wait_hpd_asserted(struct ps8640 *ps_bridge, unsigned long wai
* actually connected to GPIO9).
*/
ret = regmap_read_poll_timeout(map, PAGE2_GPIO_H, status,
- status & PS_GPIO9, wait_us / 10, wait_us);
+ status & PS_GPIO9, 20000, wait_us);
/*
* The first time we see HPD go high after a reset we delay an extra
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
index aa51c61a78c7..603bb3c51027 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
@@ -1426,9 +1426,9 @@ void dw_hdmi_set_high_tmds_clock_ratio(struct dw_hdmi *hdmi,
/* Control for TMDS Bit Period/TMDS Clock-Period Ratio */
if (dw_hdmi_support_scdc(hdmi, display)) {
if (mtmdsclock > HDMI14_MAX_TMDSCLK)
- drm_scdc_set_high_tmds_clock_ratio(hdmi->ddc, 1);
+ drm_scdc_set_high_tmds_clock_ratio(&hdmi->connector, 1);
else
- drm_scdc_set_high_tmds_clock_ratio(hdmi->ddc, 0);
+ drm_scdc_set_high_tmds_clock_ratio(&hdmi->connector, 0);
}
}
EXPORT_SYMBOL_GPL(dw_hdmi_set_high_tmds_clock_ratio);
@@ -2116,7 +2116,7 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi,
min_t(u8, bytes, SCDC_MIN_SOURCE_VERSION));
/* Enabled Scrambling in the Sink */
- drm_scdc_set_scrambling(hdmi->ddc, 1);
+ drm_scdc_set_scrambling(&hdmi->connector, 1);
/*
* To activate the scrambler feature, you must ensure
@@ -2132,7 +2132,7 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi,
hdmi_writeb(hdmi, 0, HDMI_FC_SCRAMBLER_CTRL);
hdmi_writeb(hdmi, (u8)~HDMI_MC_SWRSTZ_TMDSSWRST_REQ,
HDMI_MC_SWRSTZ);
- drm_scdc_set_scrambling(hdmi->ddc, 0);
+ drm_scdc_set_scrambling(&hdmi->connector, 0);
}
}
diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c
index 6d16ec45ea61..91f7cb56a654 100644
--- a/drivers/gpu/drm/bridge/tc358767.c
+++ b/drivers/gpu/drm/bridge/tc358767.c
@@ -1896,10 +1896,10 @@ static int tc_mipi_dsi_host_attach(struct tc_data *tc)
"failed to create dsi device\n");
tc->dsi = dsi;
-
dsi->lanes = dsi_lanes;
dsi->format = MIPI_DSI_FMT_RGB888;
- dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE;
+ dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST |
+ MIPI_DSI_MODE_LPM | MIPI_DSI_CLOCK_NON_CONTINUOUS;
ret = mipi_dsi_attach(dsi);
if (ret < 0) {
diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi83.c b/drivers/gpu/drm/bridge/ti-sn65dsi83.c
index 91ecfbe45bf9..75286c9afbb9 100644
--- a/drivers/gpu/drm/bridge/ti-sn65dsi83.c
+++ b/drivers/gpu/drm/bridge/ti-sn65dsi83.c
@@ -642,7 +642,9 @@ static int sn65dsi83_host_attach(struct sn65dsi83 *ctx)
dsi->lanes = dsi_lanes;
dsi->format = MIPI_DSI_FMT_RGB888;
- dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST;
+ dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST |
+ MIPI_DSI_MODE_VIDEO_NO_HFP | MIPI_DSI_MODE_VIDEO_NO_HBP |
+ MIPI_DSI_MODE_VIDEO_NO_HSA | MIPI_DSI_MODE_NO_EOT_PACKET;
ret = devm_mipi_dsi_attach(dev, dsi);
if (ret < 0) {
@@ -698,8 +700,10 @@ static int sn65dsi83_probe(struct i2c_client *client)
drm_bridge_add(&ctx->bridge);
ret = sn65dsi83_host_attach(ctx);
- if (ret)
+ if (ret) {
+ dev_err_probe(dev, ret, "failed to attach DSI host\n");
goto err_remove_bridge;
+ }
return 0;
diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
index 1e26fa63845a..7a748785c545 100644
--- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
@@ -363,7 +363,7 @@ static int __maybe_unused ti_sn65dsi86_resume(struct device *dev)
/* td2: min 100 us after regulators before enabling the GPIO */
usleep_range(100, 110);
- gpiod_set_value(pdata->enable_gpio, 1);
+ gpiod_set_value_cansleep(pdata->enable_gpio, 1);
/*
* If we have a reference clock we can enable communication w/ the
@@ -386,7 +386,7 @@ static int __maybe_unused ti_sn65dsi86_suspend(struct device *dev)
if (pdata->refclk)
ti_sn65dsi86_disable_comms(pdata);
- gpiod_set_value(pdata->enable_gpio, 0);
+ gpiod_set_value_cansleep(pdata->enable_gpio, 0);
ret = regulator_bulk_disable(SN_REGULATOR_SUPPLY_NUM, pdata->supplies);
if (ret)
diff --git a/drivers/gpu/drm/display/drm_scdc_helper.c b/drivers/gpu/drm/display/drm_scdc_helper.c
index c3ad4ab2b456..6d2f244e5830 100644
--- a/drivers/gpu/drm/display/drm_scdc_helper.c
+++ b/drivers/gpu/drm/display/drm_scdc_helper.c
@@ -26,6 +26,8 @@
#include <linux/delay.h>
#include <drm/display/drm_scdc_helper.h>
+#include <drm/drm_connector.h>
+#include <drm/drm_device.h>
#include <drm/drm_print.h>
/**
@@ -140,7 +142,7 @@ EXPORT_SYMBOL(drm_scdc_write);
/**
* drm_scdc_get_scrambling_status - what is status of scrambling?
- * @adapter: I2C adapter for DDC channel
+ * @connector: connector
*
* Reads the scrambler status over SCDC, and checks the
* scrambling status.
@@ -148,14 +150,16 @@ EXPORT_SYMBOL(drm_scdc_write);
* Returns:
* True if the scrambling is enabled, false otherwise.
*/
-bool drm_scdc_get_scrambling_status(struct i2c_adapter *adapter)
+bool drm_scdc_get_scrambling_status(struct drm_connector *connector)
{
u8 status;
int ret;
- ret = drm_scdc_readb(adapter, SCDC_SCRAMBLER_STATUS, &status);
+ ret = drm_scdc_readb(connector->ddc, SCDC_SCRAMBLER_STATUS, &status);
if (ret < 0) {
- DRM_DEBUG_KMS("Failed to read scrambling status: %d\n", ret);
+ drm_dbg_kms(connector->dev,
+ "[CONNECTOR:%d:%s] Failed to read scrambling status: %d\n",
+ connector->base.id, connector->name, ret);
return false;
}
@@ -165,7 +169,7 @@ EXPORT_SYMBOL(drm_scdc_get_scrambling_status);
/**
* drm_scdc_set_scrambling - enable scrambling
- * @adapter: I2C adapter for DDC channel
+ * @connector: connector
* @enable: bool to indicate if scrambling is to be enabled/disabled
*
* Writes the TMDS config register over SCDC channel, and:
@@ -175,14 +179,17 @@ EXPORT_SYMBOL(drm_scdc_get_scrambling_status);
* Returns:
* True if scrambling is set/reset successfully, false otherwise.
*/
-bool drm_scdc_set_scrambling(struct i2c_adapter *adapter, bool enable)
+bool drm_scdc_set_scrambling(struct drm_connector *connector,
+ bool enable)
{
u8 config;
int ret;
- ret = drm_scdc_readb(adapter, SCDC_TMDS_CONFIG, &config);
+ ret = drm_scdc_readb(connector->ddc, SCDC_TMDS_CONFIG, &config);
if (ret < 0) {
- DRM_DEBUG_KMS("Failed to read TMDS config: %d\n", ret);
+ drm_dbg_kms(connector->dev,
+ "[CONNECTOR:%d:%s] Failed to read TMDS config: %d\n",
+ connector->base.id, connector->name, ret);
return false;
}
@@ -191,9 +198,11 @@ bool drm_scdc_set_scrambling(struct i2c_adapter *adapter, bool enable)
else
config &= ~SCDC_SCRAMBLING_ENABLE;
- ret = drm_scdc_writeb(adapter, SCDC_TMDS_CONFIG, config);
+ ret = drm_scdc_writeb(connector->ddc, SCDC_TMDS_CONFIG, config);
if (ret < 0) {
- DRM_DEBUG_KMS("Failed to enable scrambling: %d\n", ret);
+ drm_dbg_kms(connector->dev,
+ "[CONNECTOR:%d:%s] Failed to enable scrambling: %d\n",
+ connector->base.id, connector->name, ret);
return false;
}
@@ -203,7 +212,7 @@ EXPORT_SYMBOL(drm_scdc_set_scrambling);
/**
* drm_scdc_set_high_tmds_clock_ratio - set TMDS clock ratio
- * @adapter: I2C adapter for DDC channel
+ * @connector: connector
* @set: ret or reset the high clock ratio
*
*
@@ -230,14 +239,17 @@ EXPORT_SYMBOL(drm_scdc_set_scrambling);
* Returns:
* True if write is successful, false otherwise.
*/
-bool drm_scdc_set_high_tmds_clock_ratio(struct i2c_adapter *adapter, bool set)
+bool drm_scdc_set_high_tmds_clock_ratio(struct drm_connector *connector,
+ bool set)
{
u8 config;
int ret;
- ret = drm_scdc_readb(adapter, SCDC_TMDS_CONFIG, &config);
+ ret = drm_scdc_readb(connector->ddc, SCDC_TMDS_CONFIG, &config);
if (ret < 0) {
- DRM_DEBUG_KMS("Failed to read TMDS config: %d\n", ret);
+ drm_dbg_kms(connector->dev,
+ "[CONNECTOR:%d:%s] Failed to read TMDS config: %d\n",
+ connector->base.id, connector->name, ret);
return false;
}
@@ -246,9 +258,11 @@ bool drm_scdc_set_high_tmds_clock_ratio(struct i2c_adapter *adapter, bool set)
else
config &= ~SCDC_TMDS_BIT_CLOCK_RATIO_BY_40;
- ret = drm_scdc_writeb(adapter, SCDC_TMDS_CONFIG, config);
+ ret = drm_scdc_writeb(connector->ddc, SCDC_TMDS_CONFIG, config);
if (ret < 0) {
- DRM_DEBUG_KMS("Failed to set TMDS clock ratio: %d\n", ret);
+ drm_dbg_kms(connector->dev,
+ "[CONNECTOR:%d:%s] Failed to set TMDS clock ratio: %d\n",
+ connector->base.id, connector->name, ret);
return false;
}
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index d4d2a2ce40f8..2c2c9caf0be5 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -1528,6 +1528,12 @@ static void set_fence_deadline(struct drm_device *dev,
for_each_new_crtc_in_state (state, crtc, new_crtc_state, i) {
ktime_t v;
+ if (drm_atomic_crtc_needs_modeset(new_crtc_state))
+ continue;
+
+ if (!new_crtc_state->active)
+ continue;
+
if (drm_crtc_next_vblank_start(crtc, &v))
continue;
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 63ec95e86d0e..64458982be40 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -1537,6 +1537,27 @@ static void drm_fb_helper_fill_pixel_fmt(struct fb_var_screeninfo *var,
}
}
+static void __fill_var(struct fb_var_screeninfo *var,
+ struct drm_framebuffer *fb)
+{
+ int i;
+
+ var->xres_virtual = fb->width;
+ var->yres_virtual = fb->height;
+ var->accel_flags = FB_ACCELF_TEXT;
+ var->bits_per_pixel = drm_format_info_bpp(fb->format, 0);
+
+ var->height = var->width = 0;
+ var->left_margin = var->right_margin = 0;
+ var->upper_margin = var->lower_margin = 0;
+ var->hsync_len = var->vsync_len = 0;
+ var->sync = var->vmode = 0;
+ var->rotate = 0;
+ var->colorspace = 0;
+ for (i = 0; i < 4; i++)
+ var->reserved[i] = 0;
+}
+
/**
* drm_fb_helper_check_var - implementation for &fb_ops.fb_check_var
* @var: screeninfo to check
@@ -1589,6 +1610,23 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
return -EINVAL;
}
+ __fill_var(var, fb);
+
+ /*
+ * fb_pan_display() validates this, but fb_set_par() doesn't and just
+ * falls over. Note that __fill_var above adjusts y/res_virtual.
+ */
+ if (var->yoffset > var->yres_virtual - var->yres ||
+ var->xoffset > var->xres_virtual - var->xres)
+ return -EINVAL;
+
+ /* We neither support grayscale nor FOURCC (also stored in here). */
+ if (var->grayscale > 0)
+ return -EINVAL;
+
+ if (var->nonstd)
+ return -EINVAL;
+
/*
* Workaround for SDL 1.2, which is known to be setting all pixel format
* fields values to zero in some cases. We treat this situation as a
@@ -1604,11 +1642,6 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
}
/*
- * Likewise, bits_per_pixel should be rounded up to a supported value.
- */
- var->bits_per_pixel = bpp;
-
- /*
* drm fbdev emulation doesn't support changing the pixel format at all,
* so reject all pixel format changing requests.
*/
@@ -1638,11 +1671,6 @@ int drm_fb_helper_set_par(struct fb_info *info)
if (oops_in_progress)
return -EBUSY;
- if (var->pixclock != 0) {
- drm_err(fb_helper->dev, "PIXEL CLOCK SET\n");
- return -EINVAL;
- }
-
/*
* Normally we want to make sure that a kms master takes precedence over
* fbdev, to avoid fbdev flickering and occasionally stealing the
@@ -2036,12 +2064,9 @@ static void drm_fb_helper_fill_var(struct fb_info *info,
}
info->pseudo_palette = fb_helper->pseudo_palette;
- info->var.xres_virtual = fb->width;
- info->var.yres_virtual = fb->height;
- info->var.bits_per_pixel = drm_format_info_bpp(format, 0);
- info->var.accel_flags = FB_ACCELF_TEXT;
info->var.xoffset = 0;
info->var.yoffset = 0;
+ __fill_var(&info->var, fb);
info->var.activate = FB_ACTIVATE_NOW;
drm_fb_helper_fill_pixel_fmt(&info->var, format);
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 149cd4ff6a3b..d29dafce9bb0 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -544,7 +544,8 @@ int drm_prime_handle_to_fd_ioctl(struct drm_device *dev, void *data,
* Optional pinning of buffers is handled at dma-buf attach and detach time in
* drm_gem_map_attach() and drm_gem_map_detach(). Backing storage itself is
* handled by drm_gem_map_dma_buf() and drm_gem_unmap_dma_buf(), which relies on
- * &drm_gem_object_funcs.get_sg_table.
+ * &drm_gem_object_funcs.get_sg_table. If &drm_gem_object_funcs.get_sg_table is
+ * unimplemented, exports into another device are rejected.
*
* For kernel-internal access there's drm_gem_dmabuf_vmap() and
* drm_gem_dmabuf_vunmap(). Userspace mmap support is provided by
@@ -583,6 +584,9 @@ int drm_gem_map_attach(struct dma_buf *dma_buf,
{
struct drm_gem_object *obj = dma_buf->priv;
+ if (!obj->funcs->get_sg_table)
+ return -ENOSYS;
+
return drm_gem_pin(obj);
}
EXPORT_SYMBOL(drm_gem_map_attach);
diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c
index 299fa2a19a90..877e2067534f 100644
--- a/drivers/gpu/drm/drm_vblank.c
+++ b/drivers/gpu/drm/drm_vblank.c
@@ -996,10 +996,16 @@ EXPORT_SYMBOL(drm_crtc_vblank_count_and_time);
int drm_crtc_next_vblank_start(struct drm_crtc *crtc, ktime_t *vblanktime)
{
unsigned int pipe = drm_crtc_index(crtc);
- struct drm_vblank_crtc *vblank = &crtc->dev->vblank[pipe];
- struct drm_display_mode *mode = &vblank->hwmode;
+ struct drm_vblank_crtc *vblank;
+ struct drm_display_mode *mode;
u64 vblank_start;
+ if (!drm_dev_has_vblank(crtc->dev))
+ return -EINVAL;
+
+ vblank = &crtc->dev->vblank[pipe];
+ mode = &vblank->hwmode;
+
if (!vblank->framedur_ns || !vblank->linedur_ns)
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 73240cf78c8b..d8a9790f9d36 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -3988,8 +3988,8 @@ static int intel_hdmi_reset_link(struct intel_encoder *encoder,
ret = drm_scdc_readb(adapter, SCDC_TMDS_CONFIG, &config);
if (ret < 0) {
- drm_err(&dev_priv->drm, "Failed to read TMDS config: %d\n",
- ret);
+ drm_err(&dev_priv->drm, "[CONNECTOR:%d:%s] Failed to read TMDS config: %d\n",
+ connector->base.base.id, connector->base.name, ret);
return 0;
}
diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c
index c7e9e1fbed37..a690a5616506 100644
--- a/drivers/gpu/drm/i915/display/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/display/intel_hdmi.c
@@ -2646,11 +2646,8 @@ bool intel_hdmi_handle_sink_scrambling(struct intel_encoder *encoder,
bool scrambling)
{
struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
- struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
struct drm_scrambling *sink_scrambling =
&connector->display_info.hdmi.scdc.scrambling;
- struct i2c_adapter *adapter =
- intel_gmbus_get_adapter(dev_priv, intel_hdmi->ddc_bus);
if (!sink_scrambling->supported)
return true;
@@ -2661,9 +2658,8 @@ bool intel_hdmi_handle_sink_scrambling(struct intel_encoder *encoder,
str_yes_no(scrambling), high_tmds_clock_ratio ? 40 : 10);
/* Set TMDS bit clock ratio to 1/40 or 1/10, and enable/disable scrambling */
- return drm_scdc_set_high_tmds_clock_ratio(adapter,
- high_tmds_clock_ratio) &&
- drm_scdc_set_scrambling(adapter, scrambling);
+ return drm_scdc_set_high_tmds_clock_ratio(connector, high_tmds_clock_ratio) &&
+ drm_scdc_set_scrambling(connector, scrambling);
}
static u8 chv_port_to_ddc_pin(struct drm_i915_private *dev_priv, enum port port)
diff --git a/drivers/gpu/drm/lima/lima_drv.c b/drivers/gpu/drm/lima/lima_drv.c
index 7b8d7178d09a..39cab4a55f57 100644
--- a/drivers/gpu/drm/lima/lima_drv.c
+++ b/drivers/gpu/drm/lima/lima_drv.c
@@ -392,8 +392,10 @@ static int lima_pdev_probe(struct platform_device *pdev)
/* Allocate and initialize the DRM device. */
ddev = drm_dev_alloc(&lima_drm_driver, &pdev->dev);
- if (IS_ERR(ddev))
- return PTR_ERR(ddev);
+ if (IS_ERR(ddev)) {
+ err = PTR_ERR(ddev);
+ goto err_out0;
+ }
ddev->dev_private = ldev;
ldev->ddev = ddev;
diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c
index 926906ca2304..e23ddab2126e 100644
--- a/drivers/gpu/drm/panel/panel-edp.c
+++ b/drivers/gpu/drm/panel/panel-edp.c
@@ -1879,6 +1879,7 @@ static const struct edp_panel_entry edp_panels[] = {
EDP_PANEL_ENTRY('B', 'O', 'E', 0x07d1, &boe_nv133fhm_n61.delay, "NV133FHM-N61"),
EDP_PANEL_ENTRY('B', 'O', 'E', 0x082d, &boe_nv133fhm_n61.delay, "NV133FHM-N62"),
EDP_PANEL_ENTRY('B', 'O', 'E', 0x094b, &delay_200_500_e50, "NT116WHM-N21"),
+ EDP_PANEL_ENTRY('B', 'O', 'E', 0x095f, &delay_200_500_e50, "NE135FBM-N41 v8.1"),
EDP_PANEL_ENTRY('B', 'O', 'E', 0x098d, &boe_nv110wtm_n61.delay, "NV110WTM-N61"),
EDP_PANEL_ENTRY('B', 'O', 'E', 0x09dd, &delay_200_500_e50, "NT116WHM-N21"),
EDP_PANEL_ENTRY('B', 'O', 'E', 0x0a5d, &delay_200_500_e50, "NV116WHM-N45"),
diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 8af632740673..34af6724914f 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -2140,10 +2140,8 @@ static void tegra_sor_hdmi_disable_scrambling(struct tegra_sor *sor)
static void tegra_sor_hdmi_scdc_disable(struct tegra_sor *sor)
{
- struct i2c_adapter *ddc = sor->output.ddc;
-
- drm_scdc_set_high_tmds_clock_ratio(ddc, false);
- drm_scdc_set_scrambling(ddc, false);
+ drm_scdc_set_high_tmds_clock_ratio(&sor->output.connector, false);
+ drm_scdc_set_scrambling(&sor->output.connector, false);
tegra_sor_hdmi_disable_scrambling(sor);
}
@@ -2168,10 +2166,8 @@ static void tegra_sor_hdmi_enable_scrambling(struct tegra_sor *sor)
static void tegra_sor_hdmi_scdc_enable(struct tegra_sor *sor)
{
- struct i2c_adapter *ddc = sor->output.ddc;
-
- drm_scdc_set_high_tmds_clock_ratio(ddc, true);
- drm_scdc_set_scrambling(ddc, true);
+ drm_scdc_set_high_tmds_clock_ratio(&sor->output.connector, true);
+ drm_scdc_set_scrambling(&sor->output.connector, true);
tegra_sor_hdmi_enable_scrambling(sor);
}
@@ -2179,9 +2175,8 @@ static void tegra_sor_hdmi_scdc_enable(struct tegra_sor *sor)
static void tegra_sor_hdmi_scdc_work(struct work_struct *work)
{
struct tegra_sor *sor = container_of(work, struct tegra_sor, scdc.work);
- struct i2c_adapter *ddc = sor->output.ddc;
- if (!drm_scdc_get_scrambling_status(ddc)) {
+ if (!drm_scdc_get_scrambling_status(&sor->output.connector)) {
DRM_DEBUG_KMS("SCDC not scrambled\n");
tegra_sor_hdmi_scdc_enable(sor);
}
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index ca7744b852f5..4bca6b54520a 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -218,14 +218,21 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
prot = ttm_io_prot(bo, bo->resource, prot);
if (!bo->resource->bus.is_iomem) {
struct ttm_operation_ctx ctx = {
- .interruptible = false,
+ .interruptible = true,
.no_wait_gpu = false,
.force_alloc = true
};
ttm = bo->ttm;
- if (ttm_tt_populate(bdev, bo->ttm, &ctx))
- return VM_FAULT_OOM;
+ err = ttm_tt_populate(bdev, bo->ttm, &ctx);
+ if (err) {
+ if (err == -EINTR || err == -ERESTARTSYS ||
+ err == -EAGAIN)
+ return VM_FAULT_NOPAGE;
+
+ pr_debug("TTM fault hit %pe.\n", ERR_PTR(err));
+ return VM_FAULT_SIGBUS;
+ }
} else {
/* Iomem should not be marked encrypted */
prot = pgprot_decrypted(prot);
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index aa116a7bbae3..18c342a919a2 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -47,6 +47,11 @@
#include "ttm_module.h"
+#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT)
+#define __TTM_DIM_ORDER (TTM_MAX_ORDER + 1)
+/* Some architectures have a weird PMD_SHIFT */
+#define TTM_DIM_ORDER (__TTM_DIM_ORDER <= MAX_ORDER ? __TTM_DIM_ORDER : MAX_ORDER)
+
/**
* struct ttm_pool_dma - Helper object for coherent DMA mappings
*
@@ -65,11 +70,11 @@ module_param(page_pool_size, ulong, 0644);
static atomic_long_t allocated_pages;
-static struct ttm_pool_type global_write_combined[MAX_ORDER];
-static struct ttm_pool_type global_uncached[MAX_ORDER];
+static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER];
+static struct ttm_pool_type global_uncached[TTM_DIM_ORDER];
-static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER];
-static struct ttm_pool_type global_dma32_uncached[MAX_ORDER];
+static struct ttm_pool_type global_dma32_write_combined[TTM_DIM_ORDER];
+static struct ttm_pool_type global_dma32_uncached[TTM_DIM_ORDER];
static spinlock_t shrinker_lock;
static struct list_head shrinker_list;
@@ -368,6 +373,43 @@ static int ttm_pool_page_allocated(struct ttm_pool *pool, unsigned int order,
}
/**
+ * ttm_pool_free_range() - Free a range of TTM pages
+ * @pool: The pool used for allocating.
+ * @tt: The struct ttm_tt holding the page pointers.
+ * @caching: The page caching mode used by the range.
+ * @start_page: index for first page to free.
+ * @end_page: index for last page to free + 1.
+ *
+ * During allocation the ttm_tt page-vector may be populated with ranges of
+ * pages with different attributes if allocation hit an error without being
+ * able to completely fulfill the allocation. This function can be used
+ * to free these individual ranges.
+ */
+static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt,
+ enum ttm_caching caching,
+ pgoff_t start_page, pgoff_t end_page)
+{
+ struct page **pages = tt->pages;
+ unsigned int order;
+ pgoff_t i, nr;
+
+ for (i = start_page; i < end_page; i += nr, pages += nr) {
+ struct ttm_pool_type *pt = NULL;
+
+ order = ttm_pool_page_order(pool, *pages);
+ nr = (1UL << order);
+ if (tt->dma_address)
+ ttm_pool_unmap(pool, tt->dma_address[i], nr);
+
+ pt = ttm_pool_select_type(pool, caching, order);
+ if (pt)
+ ttm_pool_type_give(pt, *pages);
+ else
+ ttm_pool_free_page(pool, caching, order, *pages);
+ }
+}
+
+/**
* ttm_pool_alloc - Fill a ttm_tt object
*
* @pool: ttm_pool to use
@@ -382,12 +424,14 @@ static int ttm_pool_page_allocated(struct ttm_pool *pool, unsigned int order,
int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
struct ttm_operation_ctx *ctx)
{
- unsigned long num_pages = tt->num_pages;
+ pgoff_t num_pages = tt->num_pages;
dma_addr_t *dma_addr = tt->dma_address;
struct page **caching = tt->pages;
struct page **pages = tt->pages;
+ enum ttm_caching page_caching;
gfp_t gfp_flags = GFP_USER;
- unsigned int i, order;
+ pgoff_t caching_divide;
+ unsigned int order;
struct page *p;
int r;
@@ -405,11 +449,12 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
else
gfp_flags |= GFP_HIGHUSER;
- for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages));
+ for (order = min_t(unsigned int, TTM_MAX_ORDER, __fls(num_pages));
num_pages;
order = min_t(unsigned int, order, __fls(num_pages))) {
struct ttm_pool_type *pt;
+ page_caching = tt->caching;
pt = ttm_pool_select_type(pool, tt->caching, order);
p = pt ? ttm_pool_type_take(pt) : NULL;
if (p) {
@@ -418,6 +463,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
if (r)
goto error_free_page;
+ caching = pages;
do {
r = ttm_pool_page_allocated(pool, order, p,
&dma_addr,
@@ -426,14 +472,15 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
if (r)
goto error_free_page;
+ caching = pages;
if (num_pages < (1 << order))
break;
p = ttm_pool_type_take(pt);
} while (p);
- caching = pages;
}
+ page_caching = ttm_cached;
while (num_pages >= (1 << order) &&
(p = ttm_pool_alloc_page(pool, gfp_flags, order))) {
@@ -442,6 +489,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
tt->caching);
if (r)
goto error_free_page;
+ caching = pages;
}
r = ttm_pool_page_allocated(pool, order, p, &dma_addr,
&num_pages, &pages);
@@ -468,15 +516,13 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
return 0;
error_free_page:
- ttm_pool_free_page(pool, tt->caching, order, p);
+ ttm_pool_free_page(pool, page_caching, order, p);
error_free_all:
num_pages = tt->num_pages - num_pages;
- for (i = 0; i < num_pages; ) {
- order = ttm_pool_page_order(pool, tt->pages[i]);
- ttm_pool_free_page(pool, tt->caching, order, tt->pages[i]);
- i += 1 << order;
- }
+ caching_divide = caching - tt->pages;
+ ttm_pool_free_range(pool, tt, tt->caching, 0, caching_divide);
+ ttm_pool_free_range(pool, tt, ttm_cached, caching_divide, num_pages);
return r;
}
@@ -492,27 +538,7 @@ EXPORT_SYMBOL(ttm_pool_alloc);
*/
void ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt)
{
- unsigned int i;
-
- for (i = 0; i < tt->num_pages; ) {
- struct page *p = tt->pages[i];
- unsigned int order, num_pages;
- struct ttm_pool_type *pt;
-
- order = ttm_pool_page_order(pool, p);
- num_pages = 1ULL << order;
- if (tt->dma_address)
- ttm_pool_unmap(pool, tt->dma_address[i], num_pages);
-
- pt = ttm_pool_select_type(pool, tt->caching, order);
- if (pt)
- ttm_pool_type_give(pt, tt->pages[i]);
- else
- ttm_pool_free_page(pool, tt->caching, order,
- tt->pages[i]);
-
- i += num_pages;
- }
+ ttm_pool_free_range(pool, tt, tt->caching, 0, tt->num_pages);
while (atomic_long_read(&allocated_pages) > page_pool_size)
ttm_pool_shrink();
@@ -542,7 +568,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
if (use_dma_alloc) {
for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
- for (j = 0; j < MAX_ORDER; ++j)
+ for (j = 0; j < TTM_DIM_ORDER; ++j)
ttm_pool_type_init(&pool->caching[i].orders[j],
pool, i, j);
}
@@ -562,7 +588,7 @@ void ttm_pool_fini(struct ttm_pool *pool)
if (pool->use_dma_alloc) {
for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
- for (j = 0; j < MAX_ORDER; ++j)
+ for (j = 0; j < TTM_DIM_ORDER; ++j)
ttm_pool_type_fini(&pool->caching[i].orders[j]);
}
@@ -616,7 +642,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m)
unsigned int i;
seq_puts(m, "\t ");
- for (i = 0; i < MAX_ORDER; ++i)
+ for (i = 0; i < TTM_DIM_ORDER; ++i)
seq_printf(m, " ---%2u---", i);
seq_puts(m, "\n");
}
@@ -627,7 +653,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt,
{
unsigned int i;
- for (i = 0; i < MAX_ORDER; ++i)
+ for (i = 0; i < TTM_DIM_ORDER; ++i)
seq_printf(m, " %8u", ttm_pool_type_count(&pt[i]));
seq_puts(m, "\n");
}
@@ -730,13 +756,16 @@ int ttm_pool_mgr_init(unsigned long num_pages)
{
unsigned int i;
+ BUILD_BUG_ON(TTM_DIM_ORDER > MAX_ORDER);
+ BUILD_BUG_ON(TTM_DIM_ORDER < 1);
+
if (!page_pool_size)
page_pool_size = num_pages;
spin_lock_init(&shrinker_lock);
INIT_LIST_HEAD(&shrinker_list);
- for (i = 0; i < MAX_ORDER; ++i) {
+ for (i = 0; i < TTM_DIM_ORDER; ++i) {
ttm_pool_type_init(&global_write_combined[i], NULL,
ttm_write_combined, i);
ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i);
@@ -769,7 +798,7 @@ void ttm_pool_mgr_fini(void)
{
unsigned int i;
- for (i = 0; i < MAX_ORDER; ++i) {
+ for (i = 0; i < TTM_DIM_ORDER; ++i) {
ttm_pool_type_fini(&global_write_combined[i]);
ttm_pool_type_fini(&global_uncached[i]);
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index 464c3cc8e6fb..06713d8b82b5 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -885,7 +885,8 @@ static void vc4_hdmi_set_infoframes(struct drm_encoder *encoder)
static void vc4_hdmi_enable_scrambling(struct drm_encoder *encoder)
{
struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder);
- struct drm_device *drm = vc4_hdmi->connector.dev;
+ struct drm_connector *connector = &vc4_hdmi->connector;
+ struct drm_device *drm = connector->dev;
const struct drm_display_mode *mode = &vc4_hdmi->saved_adjusted_mode;
unsigned long flags;
int idx;
@@ -903,8 +904,8 @@ static void vc4_hdmi_enable_scrambling(struct drm_encoder *encoder)
if (!drm_dev_enter(drm, &idx))
return;
- drm_scdc_set_high_tmds_clock_ratio(vc4_hdmi->ddc, true);
- drm_scdc_set_scrambling(vc4_hdmi->ddc, true);
+ drm_scdc_set_high_tmds_clock_ratio(connector, true);
+ drm_scdc_set_scrambling(connector, true);
spin_lock_irqsave(&vc4_hdmi->hw_lock, flags);
HDMI_WRITE(HDMI_SCRAMBLER_CTL, HDMI_READ(HDMI_SCRAMBLER_CTL) |
@@ -922,7 +923,8 @@ static void vc4_hdmi_enable_scrambling(struct drm_encoder *encoder)
static void vc4_hdmi_disable_scrambling(struct drm_encoder *encoder)
{
struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder);
- struct drm_device *drm = vc4_hdmi->connector.dev;
+ struct drm_connector *connector = &vc4_hdmi->connector;
+ struct drm_device *drm = connector->dev;
unsigned long flags;
int idx;
@@ -944,8 +946,8 @@ static void vc4_hdmi_disable_scrambling(struct drm_encoder *encoder)
~VC5_HDMI_SCRAMBLER_CTL_ENABLE);
spin_unlock_irqrestore(&vc4_hdmi->hw_lock, flags);
- drm_scdc_set_scrambling(vc4_hdmi->ddc, false);
- drm_scdc_set_high_tmds_clock_ratio(vc4_hdmi->ddc, false);
+ drm_scdc_set_scrambling(connector, false);
+ drm_scdc_set_high_tmds_clock_ratio(connector, false);
drm_dev_exit(idx);
}
@@ -955,12 +957,13 @@ static void vc4_hdmi_scrambling_wq(struct work_struct *work)
struct vc4_hdmi *vc4_hdmi = container_of(to_delayed_work(work),
struct vc4_hdmi,
scrambling_work);
+ struct drm_connector *connector = &vc4_hdmi->connector;
- if (drm_scdc_get_scrambling_status(vc4_hdmi->ddc))
+ if (drm_scdc_get_scrambling_status(connector))
return;
- drm_scdc_set_high_tmds_clock_ratio(vc4_hdmi->ddc, true);
- drm_scdc_set_scrambling(vc4_hdmi->ddc, true);
+ drm_scdc_set_high_tmds_clock_ratio(connector, true);
+ drm_scdc_set_scrambling(connector, true);
queue_delayed_work(system_wq, &vc4_hdmi->scrambling_work,
msecs_to_jiffies(SCRAMBLING_POLLING_DELAY_MS));
diff --git a/drivers/staging/sm750fb/sm750.c b/drivers/staging/sm750fb/sm750.c
index effc7fcc3703..22ace3168723 100644
--- a/drivers/staging/sm750fb/sm750.c
+++ b/drivers/staging/sm750fb/sm750.c
@@ -989,20 +989,6 @@ release_fb:
return err;
}
-static int lynxfb_kick_out_firmware_fb(struct pci_dev *pdev)
-{
- resource_size_t base = pci_resource_start(pdev, 0);
- resource_size_t size = pci_resource_len(pdev, 0);
- bool primary = false;
-
-#ifdef CONFIG_X86
- primary = pdev->resource[PCI_ROM_RESOURCE].flags &
- IORESOURCE_ROM_SHADOW;
-#endif
-
- return aperture_remove_conflicting_devices(base, size, primary, "sm750_fb1");
-}
-
static int lynxfb_pci_probe(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
@@ -1011,7 +997,7 @@ static int lynxfb_pci_probe(struct pci_dev *pdev,
int fbidx;
int err;
- err = lynxfb_kick_out_firmware_fb(pdev);
+ err = aperture_remove_conflicting_pci_devices(pdev, "sm750_fb1");
if (err)
return err;
diff --git a/drivers/video/aperture.c b/drivers/video/aperture.c
index 41e77de1ea82..b009468ffdff 100644
--- a/drivers/video/aperture.c
+++ b/drivers/video/aperture.c
@@ -20,7 +20,7 @@
* driver can be active at any given time. Many systems load a generic
* graphics drivers, such as EFI-GOP or VESA, early during the boot process.
* During later boot stages, they replace the generic driver with a dedicated,
- * hardware-specific driver. To take over the device the dedicated driver
+ * hardware-specific driver. To take over the device, the dedicated driver
* first has to remove the generic driver. Aperture functions manage
* ownership of framebuffer memory and hand-over between drivers.
*
@@ -76,7 +76,7 @@
* generic EFI or VESA drivers, have to register themselves as owners of their
* framebuffer apertures. Ownership of the framebuffer memory is achieved
* by calling devm_aperture_acquire_for_platform_device(). If successful, the
- * driveris the owner of the framebuffer range. The function fails if the
+ * driver is the owner of the framebuffer range. The function fails if the
* framebuffer is already owned by another driver. See below for an example.
*
* .. code-block:: c
@@ -126,7 +126,7 @@
* et al for the registered framebuffer range, the aperture helpers call
* platform_device_unregister() and the generic driver unloads itself. The
* generic driver also has to provide a remove function to make this work.
- * Once hot unplugged fro mhardware, it may not access the device's
+ * Once hot unplugged from hardware, it may not access the device's
* registers, framebuffer memory, ROM, etc afterwards.
*/
@@ -203,7 +203,7 @@ static void aperture_detach_platform_device(struct device *dev)
/*
* Remove the device from the device hierarchy. This is the right thing
- * to do for firmware-based DRM drivers, such as EFI, VESA or VGA. After
+ * to do for firmware-based fb drivers, such as EFI, VESA or VGA. After
* the new driver takes over the hardware, the firmware device's state
* will be lost.
*
diff --git a/drivers/video/fbdev/aty/radeon_base.c b/drivers/video/fbdev/aty/radeon_base.c
index 657064227de8..972c4bbedfa3 100644
--- a/drivers/video/fbdev/aty/radeon_base.c
+++ b/drivers/video/fbdev/aty/radeon_base.c
@@ -2238,14 +2238,6 @@ static const struct bin_attribute edid2_attr = {
.read = radeon_show_edid2,
};
-static int radeon_kick_out_firmware_fb(struct pci_dev *pdev)
-{
- resource_size_t base = pci_resource_start(pdev, 0);
- resource_size_t size = pci_resource_len(pdev, 0);
-
- return aperture_remove_conflicting_devices(base, size, false, KBUILD_MODNAME);
-}
-
static int radeonfb_pci_register(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
@@ -2296,7 +2288,7 @@ static int radeonfb_pci_register(struct pci_dev *pdev,
rinfo->fb_base_phys = pci_resource_start (pdev, 0);
rinfo->mmio_base_phys = pci_resource_start (pdev, 2);
- ret = radeon_kick_out_firmware_fb(pdev);
+ ret = aperture_remove_conflicting_pci_devices(pdev, KBUILD_MODNAME);
if (ret)
goto err_release_fb;
diff --git a/include/drm/display/drm_scdc_helper.h b/include/drm/display/drm_scdc_helper.h
index ded01fd948b4..34600476a1b9 100644
--- a/include/drm/display/drm_scdc_helper.h
+++ b/include/drm/display/drm_scdc_helper.h
@@ -28,6 +28,7 @@
#include <drm/display/drm_scdc.h>
+struct drm_connector;
struct i2c_adapter;
ssize_t drm_scdc_read(struct i2c_adapter *adapter, u8 offset, void *buffer,
@@ -71,9 +72,9 @@ static inline int drm_scdc_writeb(struct i2c_adapter *adapter, u8 offset,
return drm_scdc_write(adapter, offset, &value, sizeof(value));
}
-bool drm_scdc_get_scrambling_status(struct i2c_adapter *adapter);
+bool drm_scdc_get_scrambling_status(struct drm_connector *connector);
-bool drm_scdc_set_scrambling(struct i2c_adapter *adapter, bool enable);
-bool drm_scdc_set_high_tmds_clock_ratio(struct i2c_adapter *adapter, bool set);
+bool drm_scdc_set_scrambling(struct drm_connector *connector, bool enable);
+bool drm_scdc_set_high_tmds_clock_ratio(struct drm_connector *connector, bool set);
#endif
diff --git a/include/drm/drm_gem_vram_helper.h b/include/drm/drm_gem_vram_helper.h
index d3e8920c0b64..f4aab64411d8 100644
--- a/include/drm/drm_gem_vram_helper.h
+++ b/include/drm/drm_gem_vram_helper.h
@@ -160,7 +160,9 @@ void drm_gem_vram_simple_display_pipe_cleanup_fb(
.debugfs_init = drm_vram_mm_debugfs_init, \
.dumb_create = drm_gem_vram_driver_dumb_create, \
.dumb_map_offset = drm_gem_ttm_dumb_map_offset, \
- .gem_prime_mmap = drm_gem_prime_mmap
+ .gem_prime_mmap = drm_gem_prime_mmap, \
+ .prime_handle_to_fd = drm_gem_prime_handle_to_fd, \
+ .prime_fd_to_handle = drm_gem_prime_fd_to_handle
/*
* VRAM memory manager
diff --git a/include/uapi/drm/qaic_accel.h b/include/uapi/drm/qaic_accel.h
new file mode 100644
index 000000000000..2d348744a853
--- /dev/null
+++ b/include/uapi/drm/qaic_accel.h
@@ -0,0 +1,397 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+ *
+ * Copyright (c) 2019-2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef QAIC_ACCEL_H_
+#define QAIC_ACCEL_H_
+
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* The length(4K) includes len and count fields of qaic_manage_msg */
+#define QAIC_MANAGE_MAX_MSG_LENGTH SZ_4K
+
+/* semaphore flags */
+#define QAIC_SEM_INSYNCFENCE 2
+#define QAIC_SEM_OUTSYNCFENCE 1
+
+/* Semaphore commands */
+#define QAIC_SEM_NOP 0
+#define QAIC_SEM_INIT 1
+#define QAIC_SEM_INC 2
+#define QAIC_SEM_DEC 3
+#define QAIC_SEM_WAIT_EQUAL 4
+#define QAIC_SEM_WAIT_GT_EQ 5 /* Greater than or equal */
+#define QAIC_SEM_WAIT_GT_0 6 /* Greater than 0 */
+
+#define QAIC_TRANS_UNDEFINED 0
+#define QAIC_TRANS_PASSTHROUGH_FROM_USR 1
+#define QAIC_TRANS_PASSTHROUGH_TO_USR 2
+#define QAIC_TRANS_PASSTHROUGH_FROM_DEV 3
+#define QAIC_TRANS_PASSTHROUGH_TO_DEV 4
+#define QAIC_TRANS_DMA_XFER_FROM_USR 5
+#define QAIC_TRANS_DMA_XFER_TO_DEV 6
+#define QAIC_TRANS_ACTIVATE_FROM_USR 7
+#define QAIC_TRANS_ACTIVATE_FROM_DEV 8
+#define QAIC_TRANS_ACTIVATE_TO_DEV 9
+#define QAIC_TRANS_DEACTIVATE_FROM_USR 10
+#define QAIC_TRANS_DEACTIVATE_FROM_DEV 11
+#define QAIC_TRANS_STATUS_FROM_USR 12
+#define QAIC_TRANS_STATUS_TO_USR 13
+#define QAIC_TRANS_STATUS_FROM_DEV 14
+#define QAIC_TRANS_STATUS_TO_DEV 15
+#define QAIC_TRANS_TERMINATE_FROM_DEV 16
+#define QAIC_TRANS_TERMINATE_TO_DEV 17
+#define QAIC_TRANS_DMA_XFER_CONT 18
+#define QAIC_TRANS_VALIDATE_PARTITION_FROM_DEV 19
+#define QAIC_TRANS_VALIDATE_PARTITION_TO_DEV 20
+
+/**
+ * struct qaic_manage_trans_hdr - Header for a transaction in a manage message.
+ * @type: In. Identifies this transaction. See QAIC_TRANS_* defines.
+ * @len: In. Length of this transaction, including this header.
+ */
+struct qaic_manage_trans_hdr {
+ __u32 type;
+ __u32 len;
+};
+
+/**
+ * struct qaic_manage_trans_passthrough - Defines a passthrough transaction.
+ * @hdr: In. Header to identify this transaction.
+ * @data: In. Payload of this ransaction. Opaque to the driver. Userspace must
+ * encode in little endian and align/pad to 64-bit.
+ */
+struct qaic_manage_trans_passthrough {
+ struct qaic_manage_trans_hdr hdr;
+ __u8 data[];
+};
+
+/**
+ * struct qaic_manage_trans_dma_xfer - Defines a DMA transfer transaction.
+ * @hdr: In. Header to identify this transaction.
+ * @tag: In. Identified this transfer in other transactions. Opaque to the
+ * driver.
+ * @pad: Structure padding.
+ * @addr: In. Address of the data to DMA to the device.
+ * @size: In. Length of the data to DMA to the device.
+ */
+struct qaic_manage_trans_dma_xfer {
+ struct qaic_manage_trans_hdr hdr;
+ __u32 tag;
+ __u32 pad;
+ __u64 addr;
+ __u64 size;
+};
+
+/**
+ * struct qaic_manage_trans_activate_to_dev - Defines an activate request.
+ * @hdr: In. Header to identify this transaction.
+ * @queue_size: In. Number of elements for DBC request and response queues.
+ * @eventfd: Unused.
+ * @options: In. Device specific options for this activate.
+ * @pad: Structure padding. Must be 0.
+ */
+struct qaic_manage_trans_activate_to_dev {
+ struct qaic_manage_trans_hdr hdr;
+ __u32 queue_size;
+ __u32 eventfd;
+ __u32 options;
+ __u32 pad;
+};
+
+/**
+ * struct qaic_manage_trans_activate_from_dev - Defines an activate response.
+ * @hdr: Out. Header to identify this transaction.
+ * @status: Out. Return code of the request from the device.
+ * @dbc_id: Out. Id of the assigned DBC for successful request.
+ * @options: Out. Device specific options for this activate.
+ */
+struct qaic_manage_trans_activate_from_dev {
+ struct qaic_manage_trans_hdr hdr;
+ __u32 status;
+ __u32 dbc_id;
+ __u64 options;
+};
+
+/**
+ * struct qaic_manage_trans_deactivate - Defines a deactivate request.
+ * @hdr: In. Header to identify this transaction.
+ * @dbc_id: In. Id of assigned DBC.
+ * @pad: Structure padding. Must be 0.
+ */
+struct qaic_manage_trans_deactivate {
+ struct qaic_manage_trans_hdr hdr;
+ __u32 dbc_id;
+ __u32 pad;
+};
+
+/**
+ * struct qaic_manage_trans_status_to_dev - Defines a status request.
+ * @hdr: In. Header to identify this transaction.
+ */
+struct qaic_manage_trans_status_to_dev {
+ struct qaic_manage_trans_hdr hdr;
+};
+
+/**
+ * struct qaic_manage_trans_status_from_dev - Defines a status response.
+ * @hdr: Out. Header to identify this transaction.
+ * @major: Out. NNC protocol version major number.
+ * @minor: Out. NNC protocol version minor number.
+ * @status: Out. Return code from device.
+ * @status_flags: Out. Flags from device. Bit 0 indicates if CRCs are required.
+ */
+struct qaic_manage_trans_status_from_dev {
+ struct qaic_manage_trans_hdr hdr;
+ __u16 major;
+ __u16 minor;
+ __u32 status;
+ __u64 status_flags;
+};
+
+/**
+ * struct qaic_manage_msg - Defines a message to the device.
+ * @len: In. Length of all the transactions contained within this message.
+ * @count: In. Number of transactions in this message.
+ * @data: In. Address to an array where the transactions can be found.
+ */
+struct qaic_manage_msg {
+ __u32 len;
+ __u32 count;
+ __u64 data;
+};
+
+/**
+ * struct qaic_create_bo - Defines a request to create a buffer object.
+ * @size: In. Size of the buffer in bytes.
+ * @handle: Out. GEM handle for the BO.
+ * @pad: Structure padding. Must be 0.
+ */
+struct qaic_create_bo {
+ __u64 size;
+ __u32 handle;
+ __u32 pad;
+};
+
+/**
+ * struct qaic_mmap_bo - Defines a request to prepare a BO for mmap().
+ * @handle: In. Handle of the GEM BO to prepare for mmap().
+ * @pad: Structure padding. Must be 0.
+ * @offset: Out. Offset value to provide to mmap().
+ */
+struct qaic_mmap_bo {
+ __u32 handle;
+ __u32 pad;
+ __u64 offset;
+};
+
+/**
+ * struct qaic_sem - Defines a semaphore command for a BO slice.
+ * @val: In. Only lower 12 bits are valid.
+ * @index: In. Only lower 5 bits are valid.
+ * @presync: In. 1 if presync operation, 0 if postsync.
+ * @cmd: In. One of QAIC_SEM_*.
+ * @flags: In. Bitfield. See QAIC_SEM_INSYNCFENCE and QAIC_SEM_OUTSYNCFENCE
+ * @pad: Structure padding. Must be 0.
+ */
+struct qaic_sem {
+ __u16 val;
+ __u8 index;
+ __u8 presync;
+ __u8 cmd;
+ __u8 flags;
+ __u16 pad;
+};
+
+/**
+ * struct qaic_attach_slice_entry - Defines a single BO slice.
+ * @size: In. Size of this slice in bytes.
+ * @sem0: In. Semaphore command 0. Must be 0 is not valid.
+ * @sem1: In. Semaphore command 1. Must be 0 is not valid.
+ * @sem2: In. Semaphore command 2. Must be 0 is not valid.
+ * @sem3: In. Semaphore command 3. Must be 0 is not valid.
+ * @dev_addr: In. Device address this slice pushes to or pulls from.
+ * @db_addr: In. Address of the doorbell to ring.
+ * @db_data: In. Data to write to the doorbell.
+ * @db_len: In. Size of the doorbell data in bits - 32, 16, or 8. 0 is for
+ * inactive doorbells.
+ * @offset: In. Start of this slice as an offset from the start of the BO.
+ */
+struct qaic_attach_slice_entry {
+ __u64 size;
+ struct qaic_sem sem0;
+ struct qaic_sem sem1;
+ struct qaic_sem sem2;
+ struct qaic_sem sem3;
+ __u64 dev_addr;
+ __u64 db_addr;
+ __u32 db_data;
+ __u32 db_len;
+ __u64 offset;
+};
+
+/**
+ * struct qaic_attach_slice_hdr - Defines metadata for a set of BO slices.
+ * @count: In. Number of slices for this BO.
+ * @dbc_id: In. Associate the sliced BO with this DBC.
+ * @handle: In. GEM handle of the BO to slice.
+ * @dir: In. Direction of data flow. 1 = DMA_TO_DEVICE, 2 = DMA_FROM_DEVICE
+ * @size: In. Total length of the BO.
+ * If BO is imported (DMABUF/PRIME) then this size
+ * should not exceed the size of DMABUF provided.
+ * If BO is allocated using DRM_IOCTL_QAIC_CREATE_BO
+ * then this size should be exactly same as the size
+ * provided during DRM_IOCTL_QAIC_CREATE_BO.
+ * @dev_addr: In. Device address this slice pushes to or pulls from.
+ * @db_addr: In. Address of the doorbell to ring.
+ * @db_data: In. Data to write to the doorbell.
+ * @db_len: In. Size of the doorbell data in bits - 32, 16, or 8. 0 is for
+ * inactive doorbells.
+ * @offset: In. Start of this slice as an offset from the start of the BO.
+ */
+struct qaic_attach_slice_hdr {
+ __u32 count;
+ __u32 dbc_id;
+ __u32 handle;
+ __u32 dir;
+ __u64 size;
+};
+
+/**
+ * struct qaic_attach_slice - Defines a set of BO slices.
+ * @hdr: In. Metadata of the set of slices.
+ * @data: In. Pointer to an array containing the slice definitions.
+ */
+struct qaic_attach_slice {
+ struct qaic_attach_slice_hdr hdr;
+ __u64 data;
+};
+
+/**
+ * struct qaic_execute_entry - Defines a BO to submit to the device.
+ * @handle: In. GEM handle of the BO to commit to the device.
+ * @dir: In. Direction of data. 1 = to device, 2 = from device.
+ */
+struct qaic_execute_entry {
+ __u32 handle;
+ __u32 dir;
+};
+
+/**
+ * struct qaic_partial_execute_entry - Defines a BO to resize and submit.
+ * @handle: In. GEM handle of the BO to commit to the device.
+ * @dir: In. Direction of data. 1 = to device, 2 = from device.
+ * @resize: In. New size of the BO. Must be <= the original BO size. 0 is
+ * short for no resize.
+ */
+struct qaic_partial_execute_entry {
+ __u32 handle;
+ __u32 dir;
+ __u64 resize;
+};
+
+/**
+ * struct qaic_execute_hdr - Defines metadata for BO submission.
+ * @count: In. Number of BOs to submit.
+ * @dbc_id: In. DBC to submit the BOs on.
+ */
+struct qaic_execute_hdr {
+ __u32 count;
+ __u32 dbc_id;
+};
+
+/**
+ * struct qaic_execute - Defines a list of BOs to submit to the device.
+ * @hdr: In. BO list metadata.
+ * @data: In. Pointer to an array of BOs to submit.
+ */
+struct qaic_execute {
+ struct qaic_execute_hdr hdr;
+ __u64 data;
+};
+
+/**
+ * struct qaic_wait - Defines a blocking wait for BO execution.
+ * @handle: In. GEM handle of the BO to wait on.
+ * @timeout: In. Maximum time in ms to wait for the BO.
+ * @dbc_id: In. DBC the BO is submitted to.
+ * @pad: Structure padding. Must be 0.
+ */
+struct qaic_wait {
+ __u32 handle;
+ __u32 timeout;
+ __u32 dbc_id;
+ __u32 pad;
+};
+
+/**
+ * struct qaic_perf_stats_hdr - Defines metadata for getting BO perf info.
+ * @count: In. Number of BOs requested.
+ * @pad: Structure padding. Must be 0.
+ * @dbc_id: In. DBC the BO are associated with.
+ */
+struct qaic_perf_stats_hdr {
+ __u16 count;
+ __u16 pad;
+ __u32 dbc_id;
+};
+
+/**
+ * struct qaic_perf_stats - Defines a request for getting BO perf info.
+ * @hdr: In. Request metadata
+ * @data: In. Pointer to array of stats structures that will receive the data.
+ */
+struct qaic_perf_stats {
+ struct qaic_perf_stats_hdr hdr;
+ __u64 data;
+};
+
+/**
+ * struct qaic_perf_stats_entry - Defines a BO perf info.
+ * @handle: In. GEM handle of the BO to get perf stats for.
+ * @queue_level_before: Out. Number of elements in the queue before this BO
+ * was submitted.
+ * @num_queue_element: Out. Number of elements added to the queue to submit
+ * this BO.
+ * @submit_latency_us: Out. Time taken by the driver to submit this BO.
+ * @device_latency_us: Out. Time taken by the device to execute this BO.
+ * @pad: Structure padding. Must be 0.
+ */
+struct qaic_perf_stats_entry {
+ __u32 handle;
+ __u32 queue_level_before;
+ __u32 num_queue_element;
+ __u32 submit_latency_us;
+ __u32 device_latency_us;
+ __u32 pad;
+};
+
+#define DRM_QAIC_MANAGE 0x00
+#define DRM_QAIC_CREATE_BO 0x01
+#define DRM_QAIC_MMAP_BO 0x02
+#define DRM_QAIC_ATTACH_SLICE_BO 0x03
+#define DRM_QAIC_EXECUTE_BO 0x04
+#define DRM_QAIC_PARTIAL_EXECUTE_BO 0x05
+#define DRM_QAIC_WAIT_BO 0x06
+#define DRM_QAIC_PERF_STATS_BO 0x07
+
+#define DRM_IOCTL_QAIC_MANAGE DRM_IOWR(DRM_COMMAND_BASE + DRM_QAIC_MANAGE, struct qaic_manage_msg)
+#define DRM_IOCTL_QAIC_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_QAIC_CREATE_BO, struct qaic_create_bo)
+#define DRM_IOCTL_QAIC_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_QAIC_MMAP_BO, struct qaic_mmap_bo)
+#define DRM_IOCTL_QAIC_ATTACH_SLICE_BO DRM_IOW(DRM_COMMAND_BASE + DRM_QAIC_ATTACH_SLICE_BO, struct qaic_attach_slice)
+#define DRM_IOCTL_QAIC_EXECUTE_BO DRM_IOW(DRM_COMMAND_BASE + DRM_QAIC_EXECUTE_BO, struct qaic_execute)
+#define DRM_IOCTL_QAIC_PARTIAL_EXECUTE_BO DRM_IOW(DRM_COMMAND_BASE + DRM_QAIC_PARTIAL_EXECUTE_BO, struct qaic_execute)
+#define DRM_IOCTL_QAIC_WAIT_BO DRM_IOW(DRM_COMMAND_BASE + DRM_QAIC_WAIT_BO, struct qaic_wait)
+#define DRM_IOCTL_QAIC_PERF_STATS_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_QAIC_PERF_STATS_BO, struct qaic_perf_stats)
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* QAIC_ACCEL_H_ */