aboutsummaryrefslogtreecommitdiff
path: root/drivers/net/wireless/intel/iwlwifi/pcie
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/wireless/intel/iwlwifi/pcie')
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/internal.h36
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c139
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/tx.c311
3 files changed, 363 insertions, 123 deletions
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
index f2bb34270ccf..b59de4f80b4b 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
@@ -603,6 +603,22 @@ struct iwl_tso_hdr_page {
u8 *pos;
};
+/*
+ * Note that we put this struct *last* in the page. By doing that, we ensure
+ * that no TB referencing this page can trigger the 32-bit boundary hardware
+ * bug.
+ */
+struct iwl_tso_page_info {
+ dma_addr_t dma_addr;
+ struct page *next;
+ refcount_t use_count;
+};
+
+#define IWL_TSO_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(struct iwl_tso_page_info))
+#define IWL_TSO_PAGE_INFO(addr) \
+ ((struct iwl_tso_page_info *)(((unsigned long)addr & PAGE_MASK) + \
+ IWL_TSO_PAGE_DATA_SIZE))
+
int iwl_pcie_tx_init(struct iwl_trans *trans);
void iwl_pcie_tx_start(struct iwl_trans *trans, u32 scd_base_addr);
int iwl_pcie_tx_stop(struct iwl_trans *trans);
@@ -623,9 +639,23 @@ void iwl_trans_pcie_tx_reset(struct iwl_trans *trans);
int iwl_pcie_txq_alloc(struct iwl_trans *trans, struct iwl_txq *txq,
int slots_num, bool cmd_queue);
-struct iwl_tso_hdr_page *iwl_pcie_get_page_hdr(struct iwl_trans *trans,
- size_t len, struct sk_buff *skb);
-void iwl_pcie_free_tso_page(struct iwl_trans *trans, struct sk_buff *skb);
+dma_addr_t iwl_pcie_get_sgt_tb_phys(struct sg_table *sgt, void *addr);
+struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb,
+ struct iwl_cmd_meta *cmd_meta,
+ u8 **hdr, unsigned int hdr_room);
+
+void iwl_pcie_free_tso_pages(struct iwl_trans *trans, struct sk_buff *skb,
+ struct iwl_cmd_meta *cmd_meta);
+
+static inline dma_addr_t iwl_pcie_get_tso_page_phys(void *addr)
+{
+ dma_addr_t res;
+
+ res = IWL_TSO_PAGE_INFO(addr)->dma_addr;
+ res += (unsigned long)addr & ~PAGE_MASK;
+
+ return res;
+}
static inline dma_addr_t
iwl_txq_get_first_tb_dma(struct iwl_txq *txq, int idx)
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
index b897de1b9226..2e780fb2da42 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
@@ -19,8 +19,10 @@ static struct page *get_workaround_page(struct iwl_trans *trans,
struct sk_buff *skb)
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+ struct iwl_tso_page_info *info;
struct page **page_ptr;
struct page *ret;
+ dma_addr_t phys;
page_ptr = (void *)((u8 *)skb->cb + trans_pcie->txqs.page_offs);
@@ -28,8 +30,22 @@ static struct page *get_workaround_page(struct iwl_trans *trans,
if (!ret)
return NULL;
+ info = IWL_TSO_PAGE_INFO(page_address(ret));
+
+ /* Create a DMA mapping for the page */
+ phys = dma_map_page_attrs(trans->dev, ret, 0, PAGE_SIZE,
+ DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+ if (unlikely(dma_mapping_error(trans->dev, phys))) {
+ __free_page(ret);
+ return NULL;
+ }
+
+ /* Store physical address and set use count */
+ info->dma_addr = phys;
+ refcount_set(&info->use_count, 1);
+
/* set the chaining pointer to the previous page if there */
- *(void **)((u8 *)page_address(ret) + PAGE_SIZE - sizeof(void *)) = *page_ptr;
+ info->next = *page_ptr;
*page_ptr = ret;
return ret;
@@ -45,7 +61,8 @@ static int iwl_txq_gen2_set_tb_with_wa(struct iwl_trans *trans,
struct sk_buff *skb,
struct iwl_tfh_tfd *tfd,
dma_addr_t phys, void *virt,
- u16 len, struct iwl_cmd_meta *meta)
+ u16 len, struct iwl_cmd_meta *meta,
+ bool unmap)
{
dma_addr_t oldphys = phys;
struct page *page;
@@ -76,7 +93,7 @@ static int iwl_txq_gen2_set_tb_with_wa(struct iwl_trans *trans,
* a new mapping for it so the device will not fail.
*/
- if (WARN_ON(len > PAGE_SIZE - sizeof(void *))) {
+ if (WARN_ON(len > IWL_TSO_PAGE_DATA_SIZE)) {
ret = -ENOBUFS;
goto unmap;
}
@@ -89,10 +106,27 @@ static int iwl_txq_gen2_set_tb_with_wa(struct iwl_trans *trans,
memcpy(page_address(page), virt, len);
- phys = dma_map_single(trans->dev, page_address(page), len,
- DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(trans->dev, phys)))
- return -ENOMEM;
+ /*
+ * This is a bit odd, but performance does not matter here, what
+ * matters are the expectations of the calling code and TB cleanup
+ * function.
+ *
+ * As such, if unmap is set, then create another mapping for the TB
+ * entry as it will be unmapped later. On the other hand, if it is not
+ * set, then the TB entry will not be unmapped and instead we simply
+ * reference and sync the mapping that get_workaround_page() created.
+ */
+ if (unmap) {
+ phys = dma_map_single(trans->dev, page_address(page), len,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(trans->dev, phys)))
+ return -ENOMEM;
+ } else {
+ phys = iwl_pcie_get_tso_page_phys(page_address(page));
+ dma_sync_single_for_device(trans->dev, phys, len,
+ DMA_TO_DEVICE);
+ }
+
ret = iwl_txq_gen2_set_tb(trans, tfd, phys, len);
if (ret < 0) {
/* unmap the new allocation as single */
@@ -100,6 +134,7 @@ static int iwl_txq_gen2_set_tb_with_wa(struct iwl_trans *trans,
meta = NULL;
goto unmap;
}
+
IWL_DEBUG_TX(trans,
"TB bug workaround: copied %d bytes from 0x%llx to 0x%llx\n",
len, (unsigned long long)oldphys,
@@ -107,6 +142,9 @@ static int iwl_txq_gen2_set_tb_with_wa(struct iwl_trans *trans,
ret = 0;
unmap:
+ if (!unmap)
+ goto trace;
+
if (meta)
dma_unmap_page(trans->dev, oldphys, len, DMA_TO_DEVICE);
else
@@ -119,7 +157,9 @@ trace:
static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans,
struct sk_buff *skb,
- struct iwl_tfh_tfd *tfd, int start_len,
+ struct iwl_tfh_tfd *tfd,
+ struct iwl_cmd_meta *out_meta,
+ int start_len,
u8 hdr_len,
struct iwl_device_tx_cmd *dev_cmd)
{
@@ -128,9 +168,10 @@ static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans,
struct ieee80211_hdr *hdr = (void *)skb->data;
unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room;
unsigned int mss = skb_shinfo(skb)->gso_size;
+ dma_addr_t start_hdr_phys;
u16 length, amsdu_pad;
u8 *start_hdr;
- struct iwl_tso_hdr_page *hdr_page;
+ struct sg_table *sgt;
struct tso_t tso;
trace_iwlwifi_dev_tx(trans->dev, skb, tfd, sizeof(*tfd),
@@ -146,11 +187,11 @@ static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans,
(3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr));
/* Our device supports 9 segments at most, it will fit in 1 page */
- hdr_page = iwl_pcie_get_page_hdr(trans, hdr_room, skb);
- if (!hdr_page)
+ sgt = iwl_pcie_prep_tso(trans, skb, out_meta, &start_hdr, hdr_room);
+ if (!sgt)
return -ENOMEM;
- start_hdr = hdr_page->pos;
+ start_hdr_phys = iwl_pcie_get_tso_page_phys(start_hdr);
/*
* Pull the ieee80211 header to be able to use TSO core,
@@ -172,36 +213,34 @@ static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans,
unsigned int data_left = min_t(unsigned int, mss, total_len);
unsigned int tb_len;
dma_addr_t tb_phys;
- u8 *subf_hdrs_start = hdr_page->pos;
+ u8 *pos_hdr = start_hdr;
total_len -= data_left;
- memset(hdr_page->pos, 0, amsdu_pad);
- hdr_page->pos += amsdu_pad;
+ memset(pos_hdr, 0, amsdu_pad);
+ pos_hdr += amsdu_pad;
amsdu_pad = (4 - (sizeof(struct ethhdr) + snap_ip_tcp_hdrlen +
data_left)) & 0x3;
- ether_addr_copy(hdr_page->pos, ieee80211_get_DA(hdr));
- hdr_page->pos += ETH_ALEN;
- ether_addr_copy(hdr_page->pos, ieee80211_get_SA(hdr));
- hdr_page->pos += ETH_ALEN;
+ ether_addr_copy(pos_hdr, ieee80211_get_DA(hdr));
+ pos_hdr += ETH_ALEN;
+ ether_addr_copy(pos_hdr, ieee80211_get_SA(hdr));
+ pos_hdr += ETH_ALEN;
length = snap_ip_tcp_hdrlen + data_left;
- *((__be16 *)hdr_page->pos) = cpu_to_be16(length);
- hdr_page->pos += sizeof(length);
+ *((__be16 *)pos_hdr) = cpu_to_be16(length);
+ pos_hdr += sizeof(length);
/*
* This will copy the SNAP as well which will be considered
* as MAC header.
*/
- tso_build_hdr(skb, hdr_page->pos, &tso, data_left, !total_len);
+ tso_build_hdr(skb, pos_hdr, &tso, data_left, !total_len);
- hdr_page->pos += snap_ip_tcp_hdrlen;
+ pos_hdr += snap_ip_tcp_hdrlen;
+
+ tb_len = pos_hdr - start_hdr;
+ tb_phys = iwl_pcie_get_tso_page_phys(start_hdr);
- tb_len = hdr_page->pos - start_hdr;
- tb_phys = dma_map_single(trans->dev, start_hdr,
- tb_len, DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(trans->dev, tb_phys)))
- goto out_err;
/*
* No need for _with_wa, this is from the TSO page and
* we leave some space at the end of it so can't hit
@@ -211,21 +250,24 @@ static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans,
trace_iwlwifi_dev_tx_tb(trans->dev, skb, start_hdr,
tb_phys, tb_len);
/* add this subframe's headers' length to the tx_cmd */
- le16_add_cpu(&tx_cmd->len, hdr_page->pos - subf_hdrs_start);
+ le16_add_cpu(&tx_cmd->len, tb_len);
/* prepare the start_hdr for the next subframe */
- start_hdr = hdr_page->pos;
+ start_hdr = pos_hdr;
/* put the payload */
while (data_left) {
int ret;
tb_len = min_t(unsigned int, tso.size, data_left);
- tb_phys = dma_map_single(trans->dev, tso.data,
- tb_len, DMA_TO_DEVICE);
+ tb_phys = iwl_pcie_get_sgt_tb_phys(sgt, tso.data);
+ /* Not a real mapping error, use direct comparison */
+ if (unlikely(tb_phys == DMA_MAPPING_ERROR))
+ goto out_err;
+
ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd,
tb_phys, tso.data,
- tb_len, NULL);
+ tb_len, NULL, false);
if (ret)
goto out_err;
@@ -234,6 +276,9 @@ static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans,
}
}
+ dma_sync_single_for_device(trans->dev, start_hdr_phys, hdr_room,
+ DMA_TO_DEVICE);
+
/* re -add the WiFi header */
skb_push(skb, hdr_len);
@@ -290,8 +335,8 @@ iwl_tfh_tfd *iwl_txq_gen2_build_tx_amsdu(struct iwl_trans *trans,
*/
iwl_txq_gen2_set_tb(trans, tfd, tb_phys, len);
- if (iwl_txq_gen2_build_amsdu(trans, skb, tfd, len + IWL_FIRST_TB_SIZE,
- hdr_len, dev_cmd))
+ if (iwl_txq_gen2_build_amsdu(trans, skb, tfd, out_meta,
+ len + IWL_FIRST_TB_SIZE, hdr_len, dev_cmd))
goto out_err;
/* building the A-MSDU might have changed this data, memcpy it now */
@@ -323,7 +368,7 @@ static int iwl_txq_gen2_tx_add_frags(struct iwl_trans *trans,
fragsz, DMA_TO_DEVICE);
ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd, tb_phys,
skb_frag_address(frag),
- fragsz, out_meta);
+ fragsz, out_meta, true);
if (ret)
return ret;
}
@@ -397,7 +442,7 @@ iwl_tfh_tfd *iwl_txq_gen2_build_tx(struct iwl_trans *trans,
tb2_len, DMA_TO_DEVICE);
ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd, tb_phys,
skb->data + hdr_len, tb2_len,
- NULL);
+ NULL, true);
if (ret)
goto out_err;
}
@@ -412,7 +457,8 @@ iwl_tfh_tfd *iwl_txq_gen2_build_tx(struct iwl_trans *trans,
skb_headlen(frag), DMA_TO_DEVICE);
ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd, tb_phys,
frag->data,
- skb_headlen(frag), NULL);
+ skb_headlen(frag), NULL,
+ true);
if (ret)
goto out_err;
if (iwl_txq_gen2_tx_add_frags(trans, frag, tfd, out_meta))
@@ -607,6 +653,10 @@ void iwl_txq_gen2_tfd_unmap(struct iwl_trans *trans,
return;
}
+ /* TB1 is mapped directly, the rest is the TSO page and SG list. */
+ if (meta->sg_offset)
+ num_tbs = 2;
+
/* first TB is never freed - it's the bidirectional DMA data */
for (i = 1; i < num_tbs; i++) {
if (meta->tbs & BIT(i))
@@ -722,7 +772,7 @@ int iwl_txq_gen2_tx(struct iwl_trans *trans, struct sk_buff *skb,
/* Set up first empty entry in queue's array of Tx/cmd buffers */
out_meta = &txq->entries[idx].meta;
- out_meta->flags = 0;
+ memset(out_meta, 0, sizeof(*out_meta));
tfd = iwl_txq_gen2_build_tfd(trans, txq, dev_cmd, skb, out_meta);
if (!tfd) {
@@ -771,17 +821,19 @@ static void iwl_txq_gen2_unmap(struct iwl_trans *trans, int txq_id)
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
struct iwl_txq *txq = trans_pcie->txqs.txq[txq_id];
- spin_lock_bh(&txq->lock);
+ spin_lock_bh(&txq->reclaim_lock);
+ spin_lock(&txq->lock);
while (txq->write_ptr != txq->read_ptr) {
IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n",
txq_id, txq->read_ptr);
if (txq_id != trans_pcie->txqs.cmd.q_id) {
int idx = iwl_txq_get_cmd_index(txq, txq->read_ptr);
+ struct iwl_cmd_meta *cmd_meta = &txq->entries[idx].meta;
struct sk_buff *skb = txq->entries[idx].skb;
if (!WARN_ON_ONCE(!skb))
- iwl_pcie_free_tso_page(trans, skb);
+ iwl_pcie_free_tso_pages(trans, skb, cmd_meta);
}
iwl_txq_gen2_free_tfd(trans, txq);
txq->read_ptr = iwl_txq_inc_wrap(trans, txq->read_ptr);
@@ -793,7 +845,8 @@ static void iwl_txq_gen2_unmap(struct iwl_trans *trans, int txq_id)
iwl_op_mode_free_skb(trans->op_mode, skb);
}
- spin_unlock_bh(&txq->lock);
+ spin_unlock(&txq->lock);
+ spin_unlock_bh(&txq->reclaim_lock);
/* just in case - this queue may have been stopped */
iwl_trans_pcie_wake_queue(trans, txq);
@@ -1250,7 +1303,7 @@ int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans,
out_cmd = txq->entries[idx].cmd;
out_meta = &txq->entries[idx].meta;
- /* re-initialize to NULL */
+ /* re-initialize, this also marks the SG list as unused */
memset(out_meta, 0, sizeof(*out_meta));
if (cmd->flags & CMD_WANT_SKB)
out_meta->source = cmd;
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
index f4452732417d..22d482ae53d9 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
@@ -209,7 +209,22 @@ static void iwl_pcie_clear_cmd_in_flight(struct iwl_trans *trans)
spin_unlock(&trans_pcie->reg_lock);
}
-void iwl_pcie_free_tso_page(struct iwl_trans *trans, struct sk_buff *skb)
+static void iwl_pcie_free_and_unmap_tso_page(struct iwl_trans *trans,
+ struct page *page)
+{
+ struct iwl_tso_page_info *info = IWL_TSO_PAGE_INFO(page_address(page));
+
+ /* Decrease internal use count and unmap/free page if needed */
+ if (refcount_dec_and_test(&info->use_count)) {
+ dma_unmap_page(trans->dev, info->dma_addr, PAGE_SIZE,
+ DMA_TO_DEVICE);
+
+ __free_page(page);
+ }
+}
+
+void iwl_pcie_free_tso_pages(struct iwl_trans *trans, struct sk_buff *skb,
+ struct iwl_cmd_meta *cmd_meta)
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
struct page **page_ptr;
@@ -220,11 +235,23 @@ void iwl_pcie_free_tso_page(struct iwl_trans *trans, struct sk_buff *skb)
*page_ptr = NULL;
while (next) {
+ struct iwl_tso_page_info *info;
struct page *tmp = next;
- next = *(void **)((u8 *)page_address(next) + PAGE_SIZE -
- sizeof(void *));
- __free_page(tmp);
+ info = IWL_TSO_PAGE_INFO(page_address(next));
+ next = info->next;
+
+ /* Unmap the scatter gather list that is on the last page */
+ if (!next && cmd_meta->sg_offset) {
+ struct sg_table *sgt;
+
+ sgt = (void *)((u8 *)page_address(tmp) +
+ cmd_meta->sg_offset);
+
+ dma_unmap_sgtable(trans->dev, sgt, DMA_TO_DEVICE, 0);
+ }
+
+ iwl_pcie_free_and_unmap_tso_page(trans, tmp);
}
}
@@ -276,6 +303,10 @@ static void iwl_txq_gen1_tfd_unmap(struct iwl_trans *trans,
return;
}
+ /* TB1 is mapped directly, the rest is the TSO page and SG list. */
+ if (meta->sg_offset)
+ num_tbs = 2;
+
/* first TB is never freed - it's the bidirectional DMA data */
for (i = 1; i < num_tbs; i++) {
@@ -302,20 +333,21 @@ static void iwl_txq_gen1_tfd_unmap(struct iwl_trans *trans,
* iwl_txq_free_tfd - Free all chunks referenced by TFD [txq->q.read_ptr]
* @trans: transport private data
* @txq: tx queue
+ * @read_ptr: the TXQ read_ptr to free
*
* Does NOT advance any TFD circular buffer read/write indexes
* Does NOT free the TFD itself (which is within circular buffer)
*/
-static void iwl_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq)
+static void iwl_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq,
+ int read_ptr)
{
/* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and
* idx is bounded by n_window
*/
- int rd_ptr = txq->read_ptr;
- int idx = iwl_txq_get_cmd_index(txq, rd_ptr);
+ int idx = iwl_txq_get_cmd_index(txq, read_ptr);
struct sk_buff *skb;
- lockdep_assert_held(&txq->lock);
+ lockdep_assert_held(&txq->reclaim_lock);
if (!txq->entries)
return;
@@ -325,10 +357,10 @@ static void iwl_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq)
*/
if (trans->trans_cfg->gen2)
iwl_txq_gen2_tfd_unmap(trans, &txq->entries[idx].meta,
- iwl_txq_get_tfd(trans, txq, rd_ptr));
+ iwl_txq_get_tfd(trans, txq, read_ptr));
else
iwl_txq_gen1_tfd_unmap(trans, &txq->entries[idx].meta,
- txq, rd_ptr);
+ txq, read_ptr);
/* free SKB */
skb = txq->entries[idx].skb;
@@ -356,20 +388,23 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id)
return;
}
- spin_lock_bh(&txq->lock);
+ spin_lock_bh(&txq->reclaim_lock);
+ spin_lock(&txq->lock);
while (txq->write_ptr != txq->read_ptr) {
IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n",
txq_id, txq->read_ptr);
if (txq_id != trans_pcie->txqs.cmd.q_id) {
struct sk_buff *skb = txq->entries[txq->read_ptr].skb;
+ struct iwl_cmd_meta *cmd_meta =
+ &txq->entries[txq->read_ptr].meta;
if (WARN_ON_ONCE(!skb))
continue;
- iwl_pcie_free_tso_page(trans, skb);
+ iwl_pcie_free_tso_pages(trans, skb, cmd_meta);
}
- iwl_txq_free_tfd(trans, txq);
+ iwl_txq_free_tfd(trans, txq, txq->read_ptr);
txq->read_ptr = iwl_txq_inc_wrap(trans, txq->read_ptr);
if (txq->read_ptr == txq->write_ptr &&
@@ -383,7 +418,8 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id)
iwl_op_mode_free_skb(trans->op_mode, skb);
}
- spin_unlock_bh(&txq->lock);
+ spin_unlock(&txq->lock);
+ spin_unlock_bh(&txq->reclaim_lock);
/* just in case - this queue may have been stopped */
iwl_trans_pcie_wake_queue(trans, txq);
@@ -888,6 +924,7 @@ int iwl_txq_init(struct iwl_trans *trans, struct iwl_txq *txq,
return ret;
spin_lock_init(&txq->lock);
+ spin_lock_init(&txq->reclaim_lock);
if (cmd_queue) {
static struct lock_class_key iwl_txq_cmd_queue_lock_class;
@@ -1022,11 +1059,12 @@ static void iwl_txq_progress(struct iwl_txq *txq)
mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout);
}
-static inline bool iwl_txq_used(const struct iwl_txq *q, int i)
+static inline bool iwl_txq_used(const struct iwl_txq *q, int i,
+ int read_ptr, int write_ptr)
{
int index = iwl_txq_get_cmd_index(q, i);
- int r = iwl_txq_get_cmd_index(q, q->read_ptr);
- int w = iwl_txq_get_cmd_index(q, q->write_ptr);
+ int r = iwl_txq_get_cmd_index(q, read_ptr);
+ int w = iwl_txq_get_cmd_index(q, write_ptr);
return w >= r ?
(index >= r && index < w) :
@@ -1053,7 +1091,7 @@ static void iwl_pcie_cmdq_reclaim(struct iwl_trans *trans, int txq_id, int idx)
r = iwl_txq_get_cmd_index(txq, txq->read_ptr);
if (idx >= trans->trans_cfg->base_params->max_tfd_queue_size ||
- (!iwl_txq_used(txq, idx))) {
+ (!iwl_txq_used(txq, idx, txq->read_ptr, txq->write_ptr))) {
WARN_ONCE(test_bit(txq_id, trans_pcie->txqs.queue_used),
"%s: Read index for DMA queue txq id (%d), index %d is out of range [0-%d] %d %d.\n",
__func__, txq_id, idx,
@@ -1420,7 +1458,8 @@ int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
out_cmd = txq->entries[idx].cmd;
out_meta = &txq->entries[idx].meta;
- memset(out_meta, 0, sizeof(*out_meta)); /* re-initialize to NULL */
+ /* re-initialize, this also marks the SG list as unused */
+ memset(out_meta, 0, sizeof(*out_meta));
if (cmd->flags & CMD_WANT_SKB)
out_meta->source = cmd;
@@ -1702,12 +1741,15 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb,
}
#ifdef CONFIG_INET
-struct iwl_tso_hdr_page *iwl_pcie_get_page_hdr(struct iwl_trans *trans,
- size_t len, struct sk_buff *skb)
+static void *iwl_pcie_get_page_hdr(struct iwl_trans *trans,
+ size_t len, struct sk_buff *skb)
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
struct iwl_tso_hdr_page *p = this_cpu_ptr(trans_pcie->txqs.tso_hdr_page);
+ struct iwl_tso_page_info *info;
struct page **page_ptr;
+ dma_addr_t phys;
+ void *ret;
page_ptr = (void *)((u8 *)skb->cb + trans_pcie->txqs.page_offs);
@@ -1727,24 +1769,124 @@ struct iwl_tso_hdr_page *iwl_pcie_get_page_hdr(struct iwl_trans *trans,
*
* (see also get_workaround_page() in tx-gen2.c)
*/
- if (p->pos + len < (u8 *)page_address(p->page) + PAGE_SIZE -
- sizeof(void *))
+ if (((unsigned long)p->pos & ~PAGE_MASK) + len < IWL_TSO_PAGE_DATA_SIZE) {
+ info = IWL_TSO_PAGE_INFO(page_address(p->page));
goto out;
+ }
/* We don't have enough room on this page, get a new one. */
- __free_page(p->page);
+ iwl_pcie_free_and_unmap_tso_page(trans, p->page);
alloc:
p->page = alloc_page(GFP_ATOMIC);
if (!p->page)
return NULL;
p->pos = page_address(p->page);
+
+ info = IWL_TSO_PAGE_INFO(page_address(p->page));
+
/* set the chaining pointer to NULL */
- *(void **)((u8 *)page_address(p->page) + PAGE_SIZE - sizeof(void *)) = NULL;
+ info->next = NULL;
+
+ /* Create a DMA mapping for the page */
+ phys = dma_map_page_attrs(trans->dev, p->page, 0, PAGE_SIZE,
+ DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+ if (unlikely(dma_mapping_error(trans->dev, phys))) {
+ __free_page(p->page);
+ p->page = NULL;
+
+ return NULL;
+ }
+
+ /* Store physical address and set use count */
+ info->dma_addr = phys;
+ refcount_set(&info->use_count, 1);
out:
*page_ptr = p->page;
- get_page(p->page);
- return p;
+ /* Return an internal reference for the caller */
+ refcount_inc(&info->use_count);
+ ret = p->pos;
+ p->pos += len;
+
+ return ret;
+}
+
+/**
+ * iwl_pcie_get_sgt_tb_phys - Find TB address in mapped SG list
+ * @sgt: scatter gather table
+ * @addr: Virtual address
+ *
+ * Find the entry that includes the address for the given address and return
+ * correct physical address for the TB entry.
+ *
+ * Returns: Address for TB entry
+ */
+dma_addr_t iwl_pcie_get_sgt_tb_phys(struct sg_table *sgt, void *addr)
+{
+ struct scatterlist *sg;
+ int i;
+
+ for_each_sgtable_dma_sg(sgt, sg, i) {
+ if (addr >= sg_virt(sg) &&
+ (u8 *)addr < (u8 *)sg_virt(sg) + sg_dma_len(sg))
+ return sg_dma_address(sg) +
+ ((unsigned long)addr - (unsigned long)sg_virt(sg));
+ }
+
+ WARN_ON_ONCE(1);
+
+ return DMA_MAPPING_ERROR;
+}
+
+/**
+ * iwl_pcie_prep_tso - Prepare TSO page and SKB for sending
+ * @trans: transport private data
+ * @skb: the SKB to map
+ * @cmd_meta: command meta to store the scatter list information for unmapping
+ * @hdr: output argument for TSO headers
+ * @hdr_room: requested length for TSO headers
+ *
+ * Allocate space for a scatter gather list and TSO headers and map the SKB
+ * using the scatter gather list. The SKB is unmapped again when the page is
+ * free'ed again at the end of the operation.
+ *
+ * Returns: newly allocated and mapped scatter gather table with list
+ */
+struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb,
+ struct iwl_cmd_meta *cmd_meta,
+ u8 **hdr, unsigned int hdr_room)
+{
+ struct sg_table *sgt;
+
+ if (WARN_ON_ONCE(skb_has_frag_list(skb)))
+ return NULL;
+
+ *hdr = iwl_pcie_get_page_hdr(trans,
+ hdr_room + __alignof__(struct sg_table) +
+ sizeof(struct sg_table) +
+ (skb_shinfo(skb)->nr_frags + 1) *
+ sizeof(struct scatterlist),
+ skb);
+ if (!*hdr)
+ return NULL;
+
+ sgt = (void *)PTR_ALIGN(*hdr + hdr_room, __alignof__(struct sg_table));
+ sgt->sgl = (void *)(sgt + 1);
+
+ sg_init_table(sgt->sgl, skb_shinfo(skb)->nr_frags + 1);
+
+ sgt->orig_nents = skb_to_sgvec(skb, sgt->sgl, 0, skb->len);
+ if (WARN_ON_ONCE(sgt->orig_nents <= 0))
+ return NULL;
+
+ /* And map the entire SKB */
+ if (dma_map_sgtable(trans->dev, sgt, DMA_TO_DEVICE, 0) < 0)
+ return NULL;
+
+ /* Store non-zero (i.e. valid) offset for unmapping */
+ cmd_meta->sg_offset = (unsigned long) sgt & ~PAGE_MASK;
+
+ return sgt;
}
static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
@@ -1759,8 +1901,9 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room;
unsigned int mss = skb_shinfo(skb)->gso_size;
u16 length, iv_len, amsdu_pad;
- u8 *start_hdr;
- struct iwl_tso_hdr_page *hdr_page;
+ dma_addr_t start_hdr_phys;
+ u8 *start_hdr, *pos_hdr;
+ struct sg_table *sgt;
struct tso_t tso;
/* if the packet is protected, then it must be CCMP or GCMP */
@@ -1783,13 +1926,14 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
(3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr)) + iv_len;
/* Our device supports 9 segments at most, it will fit in 1 page */
- hdr_page = iwl_pcie_get_page_hdr(trans, hdr_room, skb);
- if (!hdr_page)
+ sgt = iwl_pcie_prep_tso(trans, skb, out_meta, &start_hdr, hdr_room);
+ if (!sgt)
return -ENOMEM;
- start_hdr = hdr_page->pos;
- memcpy(hdr_page->pos, skb->data + hdr_len, iv_len);
- hdr_page->pos += iv_len;
+ start_hdr_phys = iwl_pcie_get_tso_page_phys(start_hdr);
+ pos_hdr = start_hdr;
+ memcpy(pos_hdr, skb->data + hdr_len, iv_len);
+ pos_hdr += iv_len;
/*
* Pull the ieee80211 header + IV to be able to use TSO core,
@@ -1812,45 +1956,43 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
min_t(unsigned int, mss, total_len);
unsigned int hdr_tb_len;
dma_addr_t hdr_tb_phys;
- u8 *subf_hdrs_start = hdr_page->pos;
+ u8 *subf_hdrs_start = pos_hdr;
total_len -= data_left;
- memset(hdr_page->pos, 0, amsdu_pad);
- hdr_page->pos += amsdu_pad;
+ memset(pos_hdr, 0, amsdu_pad);
+ pos_hdr += amsdu_pad;
amsdu_pad = (4 - (sizeof(struct ethhdr) + snap_ip_tcp_hdrlen +
data_left)) & 0x3;
- ether_addr_copy(hdr_page->pos, ieee80211_get_DA(hdr));
- hdr_page->pos += ETH_ALEN;
- ether_addr_copy(hdr_page->pos, ieee80211_get_SA(hdr));
- hdr_page->pos += ETH_ALEN;
+ ether_addr_copy(pos_hdr, ieee80211_get_DA(hdr));
+ pos_hdr += ETH_ALEN;
+ ether_addr_copy(pos_hdr, ieee80211_get_SA(hdr));
+ pos_hdr += ETH_ALEN;
length = snap_ip_tcp_hdrlen + data_left;
- *((__be16 *)hdr_page->pos) = cpu_to_be16(length);
- hdr_page->pos += sizeof(length);
+ *((__be16 *)pos_hdr) = cpu_to_be16(length);
+ pos_hdr += sizeof(length);
/*
* This will copy the SNAP as well which will be considered
* as MAC header.
*/
- tso_build_hdr(skb, hdr_page->pos, &tso, data_left, !total_len);
+ tso_build_hdr(skb, pos_hdr, &tso, data_left, !total_len);
- hdr_page->pos += snap_ip_tcp_hdrlen;
+ pos_hdr += snap_ip_tcp_hdrlen;
+
+ hdr_tb_len = pos_hdr - start_hdr;
+ hdr_tb_phys = iwl_pcie_get_tso_page_phys(start_hdr);
- hdr_tb_len = hdr_page->pos - start_hdr;
- hdr_tb_phys = dma_map_single(trans->dev, start_hdr,
- hdr_tb_len, DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(trans->dev, hdr_tb_phys)))
- return -EINVAL;
iwl_pcie_txq_build_tfd(trans, txq, hdr_tb_phys,
hdr_tb_len, false);
trace_iwlwifi_dev_tx_tb(trans->dev, skb, start_hdr,
hdr_tb_phys, hdr_tb_len);
/* add this subframe's headers' length to the tx_cmd */
- le16_add_cpu(&tx_cmd->len, hdr_page->pos - subf_hdrs_start);
+ le16_add_cpu(&tx_cmd->len, pos_hdr - subf_hdrs_start);
/* prepare the start_hdr for the next subframe */
- start_hdr = hdr_page->pos;
+ start_hdr = pos_hdr;
/* put the payload */
while (data_left) {
@@ -1858,9 +2000,9 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
data_left);
dma_addr_t tb_phys;
- tb_phys = dma_map_single(trans->dev, tso.data,
- size, DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(trans->dev, tb_phys)))
+ tb_phys = iwl_pcie_get_sgt_tb_phys(sgt, tso.data);
+ /* Not a real mapping error, use direct comparison */
+ if (unlikely(tb_phys == DMA_MAPPING_ERROR))
return -EINVAL;
iwl_pcie_txq_build_tfd(trans, txq, tb_phys,
@@ -1873,6 +2015,9 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
}
}
+ dma_sync_single_for_device(trans->dev, start_hdr_phys, hdr_room,
+ DMA_TO_DEVICE);
+
/* re -add the WiFi header and IV */
skb_push(skb, hdr_len + iv_len);
@@ -2027,7 +2172,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
/* Set up first empty entry in queue's array of Tx/cmd buffers */
out_meta = &txq->entries[txq->write_ptr].meta;
- out_meta->flags = 0;
+ memset(out_meta, 0, sizeof(*out_meta));
/*
* The second TB (tb1) points to the remainder of the TX command
@@ -2144,12 +2289,12 @@ out_err:
}
static void iwl_txq_gen1_inval_byte_cnt_tbl(struct iwl_trans *trans,
- struct iwl_txq *txq)
+ struct iwl_txq *txq,
+ int read_ptr)
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
struct iwlagn_scd_bc_tbl *scd_bc_tbl = trans_pcie->txqs.scd_bc_tbls.addr;
int txq_id = txq->id;
- int read_ptr = txq->read_ptr;
u8 sta_id = 0;
__le16 bc_ent;
struct iwl_device_tx_cmd *dev_cmd = txq->entries[read_ptr].cmd;
@@ -2176,6 +2321,7 @@ void iwl_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
struct iwl_txq *txq = trans_pcie->txqs.txq[txq_id];
int tfd_num, read_ptr, last_to_free;
+ int txq_read_ptr, txq_write_ptr;
/* This function is not meant to release cmd queue*/
if (WARN_ON(txq_id == trans_pcie->txqs.cmd.q_id))
@@ -2186,8 +2332,14 @@ void iwl_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
tfd_num = iwl_txq_get_cmd_index(txq, ssn);
- spin_lock_bh(&txq->lock);
- read_ptr = iwl_txq_get_cmd_index(txq, txq->read_ptr);
+ spin_lock_bh(&txq->reclaim_lock);
+
+ spin_lock(&txq->lock);
+ txq_read_ptr = txq->read_ptr;
+ txq_write_ptr = txq->write_ptr;
+ spin_unlock(&txq->lock);
+
+ read_ptr = iwl_txq_get_cmd_index(txq, txq_read_ptr);
if (!test_bit(txq_id, trans_pcie->txqs.queue_used)) {
IWL_DEBUG_TX_QUEUES(trans, "Q %d inactive - ignoring idx %d\n",
@@ -2199,19 +2351,19 @@ void iwl_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
goto out;
IWL_DEBUG_TX_REPLY(trans, "[Q %d] %d (%d) -> %d (%d)\n",
- txq_id, read_ptr, txq->read_ptr, tfd_num, ssn);
+ txq_id, read_ptr, txq_read_ptr, tfd_num, ssn);
/* Since we free until index _not_ inclusive, the one before index is
* the last we will free. This one must be used
*/
last_to_free = iwl_txq_dec_wrap(trans, tfd_num);
- if (!iwl_txq_used(txq, last_to_free)) {
+ if (!iwl_txq_used(txq, last_to_free, txq_read_ptr, txq_write_ptr)) {
IWL_ERR(trans,
"%s: Read index for txq id (%d), last_to_free %d is out of range [0-%d] %d %d.\n",
__func__, txq_id, last_to_free,
trans->trans_cfg->base_params->max_tfd_queue_size,
- txq->write_ptr, txq->read_ptr);
+ txq_write_ptr, txq_read_ptr);
iwl_op_mode_time_point(trans->op_mode,
IWL_FW_INI_TIME_POINT_FAKE_TX,
@@ -2224,26 +2376,31 @@ void iwl_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
for (;
read_ptr != tfd_num;
- txq->read_ptr = iwl_txq_inc_wrap(trans, txq->read_ptr),
- read_ptr = iwl_txq_get_cmd_index(txq, txq->read_ptr)) {
+ txq_read_ptr = iwl_txq_inc_wrap(trans, txq_read_ptr),
+ read_ptr = iwl_txq_get_cmd_index(txq, txq_read_ptr)) {
+ struct iwl_cmd_meta *cmd_meta = &txq->entries[read_ptr].meta;
struct sk_buff *skb = txq->entries[read_ptr].skb;
if (WARN_ONCE(!skb, "no SKB at %d (%d) on queue %d\n",
- read_ptr, txq->read_ptr, txq_id))
+ read_ptr, txq_read_ptr, txq_id))
continue;
- iwl_pcie_free_tso_page(trans, skb);
+ iwl_pcie_free_tso_pages(trans, skb, cmd_meta);
__skb_queue_tail(skbs, skb);
txq->entries[read_ptr].skb = NULL;
if (!trans->trans_cfg->gen2)
- iwl_txq_gen1_inval_byte_cnt_tbl(trans, txq);
+ iwl_txq_gen1_inval_byte_cnt_tbl(trans, txq,
+ txq_read_ptr);
- iwl_txq_free_tfd(trans, txq);
+ iwl_txq_free_tfd(trans, txq, txq_read_ptr);
}
+ spin_lock(&txq->lock);
+ txq->read_ptr = txq_read_ptr;
+
iwl_txq_progress(txq);
if (iwl_txq_space(trans, txq) > txq->low_mark &&
@@ -2265,13 +2422,12 @@ void iwl_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
txq->overflow_tx = true;
/*
- * This is tricky: we are in reclaim path which is non
- * re-entrant, so noone will try to take the access the
- * txq data from that path. We stopped tx, so we can't
- * have tx as well. Bottom line, we can unlock and re-lock
- * later.
+ * This is tricky: we are in reclaim path and are holding
+ * reclaim_lock, so noone will try to access the txq data
+ * from that path. We stopped tx, so we can't have tx as well.
+ * Bottom line, we can unlock and re-lock later.
*/
- spin_unlock_bh(&txq->lock);
+ spin_unlock(&txq->lock);
while ((skb = __skb_dequeue(&overflow_skbs))) {
struct iwl_device_tx_cmd *dev_cmd_ptr;
@@ -2290,12 +2446,13 @@ void iwl_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
if (iwl_txq_space(trans, txq) > txq->low_mark)
iwl_trans_pcie_wake_queue(trans, txq);
- spin_lock_bh(&txq->lock);
+ spin_lock(&txq->lock);
txq->overflow_tx = false;
}
+ spin_unlock(&txq->lock);
out:
- spin_unlock_bh(&txq->lock);
+ spin_unlock_bh(&txq->reclaim_lock);
}
/* Set wr_ptr of specific device and txq */