aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/ip-sysctl.rst7
-rw-r--r--Documentation/networking/netdev-FAQ.rst16
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_main.c4
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c3
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c4
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c14
-rw-r--r--drivers/net/ethernet/freescale/enetc/Kconfig2
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c259
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c66
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c39
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c3
-rw-r--r--drivers/net/phy/phy_device.c53
-rw-r--r--include/uapi/linux/pkt_cls.h2
-rw-r--r--net/mptcp/options.c10
-rw-r--r--net/mptcp/protocol.c55
-rw-r--r--net/mptcp/protocol.h18
-rw-r--r--net/mptcp/subflow.c83
-rw-r--r--net/sched/cls_flower.c39
29 files changed, 498 insertions, 252 deletions
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index fa544e9037b9..1b7f8debada6 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -630,16 +630,15 @@ tcp_rmem - vector of 3 INTEGERs: min, default, max
default: initial size of receive buffer used by TCP sockets.
This value overrides net.core.rmem_default used by other protocols.
- Default: 87380 bytes. This value results in window of 65535 with
- default setting of tcp_adv_win_scale and tcp_app_win:0 and a bit
- less for default tcp_app_win. See below about these variables.
+ Default: 131072 bytes.
+ This value results in initial window of 65535.
max: maximal size of receive buffer allowed for automatically
selected receiver buffers for TCP socket. This value does not override
net.core.rmem_max. Calling setsockopt() with SO_RCVBUF disables
automatic tuning of that socket's receive buffer size, in which
case this value is ignored.
- Default: between 87380B and 6MB, depending on RAM size.
+ Default: between 131072 and 6MB, depending on RAM size.
tcp_sack - BOOLEAN
Enable select acknowledgments (SACKS).
diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst
index ae2ae37cd921..a64c01b52b4c 100644
--- a/Documentation/networking/netdev-FAQ.rst
+++ b/Documentation/networking/netdev-FAQ.rst
@@ -272,6 +272,22 @@ to the mailing list, e.g.::
Posting as one thread is discouraged because it confuses patchwork
(as of patchwork 2.2.2).
+Can I reproduce the checks from patchwork on my local machine?
+--------------------------------------------------------------
+
+Checks in patchwork are mostly simple wrappers around existing kernel
+scripts, the sources are available at:
+
+https://github.com/kuba-moo/nipa/tree/master/tests
+
+Running all the builds and checks locally is a pain, can I post my patches and have the patchwork bot validate them?
+--------------------------------------------------------------------------------------------------------------------
+
+No, you must ensure that your patches are ready by testing them locally
+before posting to the mailing list. The patchwork build bot instance
+gets overloaded very easily and netdev@vger really doesn't need more
+traffic if we can help it.
+
Any other tips to help ensure my net/net-next patch gets OK'd?
--------------------------------------------------------------
Attention to detail. Re-read your own work as if you were the
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
index 8f70a3909929..4af0cd9530de 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
@@ -71,8 +71,10 @@ static int aq_ndev_open(struct net_device *ndev)
goto err_exit;
err = aq_nic_start(aq_nic);
- if (err < 0)
+ if (err < 0) {
+ aq_nic_stop(aq_nic);
goto err_exit;
+ }
err_exit:
if (err < 0)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index d10e4f85dd11..1c96b7ba24f2 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -8856,9 +8856,10 @@ void bnxt_tx_disable(struct bnxt *bp)
txr->dev_state = BNXT_DEV_STATE_CLOSING;
}
}
+ /* Drop carrier first to prevent TX timeout */
+ netif_carrier_off(bp->dev);
/* Stop all TX queues */
netif_tx_disable(bp->dev);
- netif_carrier_off(bp->dev);
}
void bnxt_tx_enable(struct bnxt *bp)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index 6b7b69ed62db..a9bcf887d2fb 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -472,8 +472,8 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
if (BNXT_PF(bp) && !bnxt_hwrm_get_nvm_cfg_ver(bp, &nvm_cfg_ver)) {
u32 ver = nvm_cfg_ver.vu32;
- sprintf(buf, "%X.%X.%X", (ver >> 16) & 0xF, (ver >> 8) & 0xF,
- ver & 0xF);
+ sprintf(buf, "%d.%d.%d", (ver >> 16) & 0xf, (ver >> 8) & 0xf,
+ ver & 0xf);
rc = bnxt_dl_info_put(bp, req, BNXT_VERSION_STORED,
DEVLINK_INFO_VERSION_GENERIC_FW_PSID,
buf);
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index fb0bcd18ec0c..f1c2b3c7f7e9 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -399,10 +399,20 @@ static u32 dpaa2_eth_run_xdp(struct dpaa2_eth_priv *priv,
xdp.frame_sz = DPAA2_ETH_RX_BUF_RAW_SIZE;
err = xdp_do_redirect(priv->net_dev, &xdp, xdp_prog);
- if (unlikely(err))
+ if (unlikely(err)) {
+ addr = dma_map_page(priv->net_dev->dev.parent,
+ virt_to_page(vaddr), 0,
+ priv->rx_buf_size, DMA_BIDIRECTIONAL);
+ if (unlikely(dma_mapping_error(priv->net_dev->dev.parent, addr))) {
+ free_pages((unsigned long)vaddr, 0);
+ } else {
+ ch->buf_count++;
+ dpaa2_eth_xdp_release_buf(priv, ch, addr);
+ }
ch->stats.xdp_drop++;
- else
+ } else {
ch->stats.xdp_redirect++;
+ }
break;
}
diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig
index d99ea0f4e4a6..ab92382c399a 100644
--- a/drivers/net/ethernet/freescale/enetc/Kconfig
+++ b/drivers/net/ethernet/freescale/enetc/Kconfig
@@ -27,7 +27,7 @@ config FSL_ENETC_VF
config FSL_ENETC_MDIO
tristate "ENETC MDIO driver"
- depends on PCI
+ depends on PCI && MDIO_DEVRES && MDIO_BUS
help
This driver supports NXP ENETC Central MDIO controller as a PCIe
physical function (PF) device.
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index a536fdbf05e1..621be6d2da97 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1353,10 +1353,8 @@ static int __ibmvnic_close(struct net_device *netdev)
adapter->state = VNIC_CLOSING;
rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
- if (rc)
- return rc;
adapter->state = VNIC_CLOSED;
- return 0;
+ return rc;
}
static int ibmvnic_close(struct net_device *netdev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index 3261d0dc1104..41474e42a819 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -128,6 +128,11 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
+ if (mlx5_lag_is_active(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "reload is unsupported in Lag mode\n");
+ return -EOPNOTSUPP;
+ }
+
switch (action) {
case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
mlx5_unload_one(dev, false);
@@ -273,6 +278,10 @@ static int mlx5_devlink_enable_roce_validate(struct devlink *devlink, u32 id,
NL_SET_ERR_MSG_MOD(extack, "Device doesn't support RoCE");
return -EOPNOTSUPP;
}
+ if (mlx5_core_is_mp_slave(dev) || mlx5_lag_is_active(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Multi port slave/Lag device can't configure RoCE");
+ return -EOPNOTSUPP;
+ }
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index 6bc6b48a56dc..24e2c0d955b9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -12,6 +12,7 @@
#include <net/flow_offload.h>
#include <net/netfilter/nf_flow_table.h>
#include <linux/workqueue.h>
+#include <linux/refcount.h>
#include <linux/xarray.h>
#include "lib/fs_chains.h"
@@ -51,11 +52,11 @@ struct mlx5_tc_ct_priv {
struct mlx5_flow_table *ct_nat;
struct mlx5_flow_table *post_ct;
struct mutex control_lock; /* guards parallel adds/dels */
- struct mutex shared_counter_lock;
struct mapping_ctx *zone_mapping;
struct mapping_ctx *labels_mapping;
enum mlx5_flow_namespace_type ns_type;
struct mlx5_fs_chains *chains;
+ spinlock_t ht_lock; /* protects ft entries */
};
struct mlx5_ct_flow {
@@ -124,6 +125,10 @@ struct mlx5_ct_counter {
bool is_shared;
};
+enum {
+ MLX5_CT_ENTRY_FLAG_VALID,
+};
+
struct mlx5_ct_entry {
struct rhash_head node;
struct rhash_head tuple_node;
@@ -134,6 +139,12 @@ struct mlx5_ct_entry {
struct mlx5_ct_tuple tuple;
struct mlx5_ct_tuple tuple_nat;
struct mlx5_ct_zone_rule zone_rules[2];
+
+ struct mlx5_tc_ct_priv *ct_priv;
+ struct work_struct work;
+
+ refcount_t refcnt;
+ unsigned long flags;
};
static const struct rhashtable_params cts_ht_params = {
@@ -740,6 +751,87 @@ err_attr:
return err;
}
+static bool
+mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry)
+{
+ return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
+}
+
+static struct mlx5_ct_entry *
+mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple)
+{
+ struct mlx5_ct_entry *entry;
+
+ entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple,
+ tuples_ht_params);
+ if (entry && mlx5_tc_ct_entry_valid(entry) &&
+ refcount_inc_not_zero(&entry->refcnt)) {
+ return entry;
+ } else if (!entry) {
+ entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
+ tuple, tuples_nat_ht_params);
+ if (entry && mlx5_tc_ct_entry_valid(entry) &&
+ refcount_inc_not_zero(&entry->refcnt))
+ return entry;
+ }
+
+ return entry ? ERR_PTR(-EINVAL) : NULL;
+}
+
+static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry)
+{
+ struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
+
+ rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
+ &entry->tuple_nat_node,
+ tuples_nat_ht_params);
+ rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
+ tuples_ht_params);
+}
+
+static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry)
+{
+ struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
+
+ mlx5_tc_ct_entry_del_rules(ct_priv, entry);
+
+ spin_lock_bh(&ct_priv->ht_lock);
+ mlx5_tc_ct_entry_remove_from_tuples(entry);
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ mlx5_tc_ct_counter_put(ct_priv, entry);
+ kfree(entry);
+}
+
+static void
+mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
+{
+ if (!refcount_dec_and_test(&entry->refcnt))
+ return;
+
+ mlx5_tc_ct_entry_del(entry);
+}
+
+static void mlx5_tc_ct_entry_del_work(struct work_struct *work)
+{
+ struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work);
+
+ mlx5_tc_ct_entry_del(entry);
+}
+
+static void
+__mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
+{
+ struct mlx5e_priv *priv;
+
+ if (!refcount_dec_and_test(&entry->refcnt))
+ return;
+
+ priv = netdev_priv(entry->ct_priv->netdev);
+ INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work);
+ queue_work(priv->wq, &entry->work);
+}
+
static struct mlx5_ct_counter *
mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
{
@@ -792,16 +884,26 @@ mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
}
/* Use the same counter as the reverse direction */
- mutex_lock(&ct_priv->shared_counter_lock);
- rev_entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &rev_tuple,
- tuples_ht_params);
- if (rev_entry) {
- if (refcount_inc_not_zero(&rev_entry->counter->refcount)) {
- mutex_unlock(&ct_priv->shared_counter_lock);
- return rev_entry->counter;
- }
+ spin_lock_bh(&ct_priv->ht_lock);
+ rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple);
+
+ if (IS_ERR(rev_entry)) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ goto create_counter;
+ }
+
+ if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) {
+ ct_dbg("Using shared counter entry=0x%p rev=0x%p\n", entry, rev_entry);
+ shared_counter = rev_entry->counter;
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ mlx5_tc_ct_entry_put(rev_entry);
+ return shared_counter;
}
- mutex_unlock(&ct_priv->shared_counter_lock);
+
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+create_counter:
shared_counter = mlx5_tc_ct_counter_create(ct_priv);
if (IS_ERR(shared_counter)) {
@@ -866,10 +968,14 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
if (!meta_action)
return -EOPNOTSUPP;
- entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
- cts_ht_params);
- if (entry)
- return 0;
+ spin_lock_bh(&ct_priv->ht_lock);
+ entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
+ if (entry && refcount_inc_not_zero(&entry->refcnt)) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ mlx5_tc_ct_entry_put(entry);
+ return -EEXIST;
+ }
+ spin_unlock_bh(&ct_priv->ht_lock);
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
@@ -878,6 +984,8 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
entry->tuple.zone = ft->zone;
entry->cookie = flow->cookie;
entry->restore_cookie = meta_action->ct_metadata.cookie;
+ refcount_set(&entry->refcnt, 2);
+ entry->ct_priv = ct_priv;
err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
if (err)
@@ -888,35 +996,40 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
if (err)
goto err_set;
- err = rhashtable_insert_fast(&ct_priv->ct_tuples_ht,
- &entry->tuple_node,
- tuples_ht_params);
+ spin_lock_bh(&ct_priv->ht_lock);
+
+ err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node,
+ cts_ht_params);
+ if (err)
+ goto err_entries;
+
+ err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht,
+ &entry->tuple_node,
+ tuples_ht_params);
if (err)
goto err_tuple;
if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
- err = rhashtable_insert_fast(&ct_priv->ct_tuples_nat_ht,
- &entry->tuple_nat_node,
- tuples_nat_ht_params);
+ err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht,
+ &entry->tuple_nat_node,
+ tuples_nat_ht_params);
if (err)
goto err_tuple_nat;
}
+ spin_unlock_bh(&ct_priv->ht_lock);
err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
ft->zone_restore_id);
if (err)
goto err_rules;
- err = rhashtable_insert_fast(&ft->ct_entries_ht, &entry->node,
- cts_ht_params);
- if (err)
- goto err_insert;
+ set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
+ mlx5_tc_ct_entry_put(entry); /* this function reference */
return 0;
-err_insert:
- mlx5_tc_ct_entry_del_rules(ct_priv, entry);
err_rules:
+ spin_lock_bh(&ct_priv->ht_lock);
if (mlx5_tc_ct_entry_has_nat(entry))
rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
&entry->tuple_nat_node, tuples_nat_ht_params);
@@ -925,47 +1038,43 @@ err_tuple_nat:
&entry->tuple_node,
tuples_ht_params);
err_tuple:
+ rhashtable_remove_fast(&ft->ct_entries_ht,
+ &entry->node,
+ cts_ht_params);
+err_entries:
+ spin_unlock_bh(&ct_priv->ht_lock);
err_set:
kfree(entry);
- netdev_warn(ct_priv->netdev,
- "Failed to offload ct entry, err: %d\n", err);
+ if (err != -EEXIST)
+ netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err);
return err;
}
-static void
-mlx5_tc_ct_del_ft_entry(struct mlx5_tc_ct_priv *ct_priv,
- struct mlx5_ct_entry *entry)
-{
- mlx5_tc_ct_entry_del_rules(ct_priv, entry);
- mutex_lock(&ct_priv->shared_counter_lock);
- if (mlx5_tc_ct_entry_has_nat(entry))
- rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
- &entry->tuple_nat_node,
- tuples_nat_ht_params);
- rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
- tuples_ht_params);
- mutex_unlock(&ct_priv->shared_counter_lock);
- mlx5_tc_ct_counter_put(ct_priv, entry);
-
-}
-
static int
mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
struct flow_cls_offload *flow)
{
+ struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
unsigned long cookie = flow->cookie;
struct mlx5_ct_entry *entry;
- entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
- cts_ht_params);
- if (!entry)
+ spin_lock_bh(&ct_priv->ht_lock);
+ entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
+ if (!entry) {
+ spin_unlock_bh(&ct_priv->ht_lock);
return -ENOENT;
+ }
- mlx5_tc_ct_del_ft_entry(ft->ct_priv, entry);
- WARN_ON(rhashtable_remove_fast(&ft->ct_entries_ht,
- &entry->node,
- cts_ht_params));
- kfree(entry);
+ if (!mlx5_tc_ct_entry_valid(entry)) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ return -EINVAL;
+ }
+
+ rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
+ mlx5_tc_ct_entry_remove_from_tuples(entry);
+ spin_unlock_bh(&ct_priv->ht_lock);
+
+ mlx5_tc_ct_entry_put(entry);
return 0;
}
@@ -974,19 +1083,30 @@ static int
mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
struct flow_cls_offload *f)
{
+ struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
unsigned long cookie = f->cookie;
struct mlx5_ct_entry *entry;
u64 lastuse, packets, bytes;
- entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
- cts_ht_params);
- if (!entry)
+ spin_lock_bh(&ct_priv->ht_lock);
+ entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
+ if (!entry) {
+ spin_unlock_bh(&ct_priv->ht_lock);
return -ENOENT;
+ }
+
+ if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) {
+ spin_unlock_bh(&ct_priv->ht_lock);
+ return -EINVAL;
+ }
+
+ spin_unlock_bh(&ct_priv->ht_lock);
mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse);
flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
FLOW_ACTION_HW_STATS_DELAYED);
+ mlx5_tc_ct_entry_put(entry);
return 0;
}
@@ -1478,11 +1598,9 @@ err_mapping:
static void
mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
{
- struct mlx5_tc_ct_priv *ct_priv = arg;
struct mlx5_ct_entry *entry = ptr;
- mlx5_tc_ct_del_ft_entry(ct_priv, entry);
- kfree(entry);
+ mlx5_tc_ct_entry_put(entry);
}
static void
@@ -1960,6 +2078,7 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
goto err_mapping_labels;
}
+ spin_lock_init(&ct_priv->ht_lock);
ct_priv->ns_type = ns_type;
ct_priv->chains = chains;
ct_priv->netdev = priv->netdev;
@@ -1994,7 +2113,6 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
idr_init(&ct_priv->fte_ids);
mutex_init(&ct_priv->control_lock);
- mutex_init(&ct_priv->shared_counter_lock);
rhashtable_init(&ct_priv->zone_ht, &zone_params);
rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params);
rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params);
@@ -2037,7 +2155,6 @@ mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
rhashtable_destroy(&ct_priv->zone_ht);
mutex_destroy(&ct_priv->control_lock);
- mutex_destroy(&ct_priv->shared_counter_lock);
idr_destroy(&ct_priv->fte_ids);
kfree(ct_priv);
}
@@ -2059,14 +2176,22 @@ mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
return false;
- entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &tuple,
- tuples_ht_params);
- if (!entry)
- entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
- &tuple, tuples_nat_ht_params);
- if (!entry)
+ spin_lock(&ct_priv->ht_lock);
+
+ entry = mlx5_tc_ct_entry_get(ct_priv, &tuple);
+ if (!entry) {
+ spin_unlock(&ct_priv->ht_lock);
+ return false;
+ }
+
+ if (IS_ERR(entry)) {
+ spin_unlock(&ct_priv->ht_lock);
return false;
+ }
+ spin_unlock(&ct_priv->ht_lock);
tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
+ __mlx5_tc_ct_entry_put(entry);
+
return true;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index d487e5e37162..8d991c3b7a50 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -83,7 +83,7 @@ static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv)
clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
/* Let other device's napi(s) and XSK wakeups see our new state. */
- synchronize_rcu();
+ synchronize_net();
}
static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index d87c345878d3..f4bce1365639 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -111,7 +111,7 @@ err_free_cparam:
void mlx5e_close_xsk(struct mlx5e_channel *c)
{
clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
- synchronize_rcu(); /* Sync with the XSK wakeup and with NAPI. */
+ synchronize_net(); /* Sync with the XSK wakeup and with NAPI. */
mlx5e_close_rq(&c->xskrq);
mlx5e_close_cq(&c->xskrq.cq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
index 1fae7fab8297..ff81b69a59a9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -173,7 +173,7 @@ static inline bool mlx5e_accel_tx_eseg(struct mlx5e_priv *priv,
#endif
#if IS_ENABLED(CONFIG_GENEVE)
- if (skb->encapsulation)
+ if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
mlx5e_tx_tunnel_accel(skb, eseg, ihs);
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
index 6a1d82503ef8..d06532d0baa4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
@@ -57,6 +57,20 @@ struct mlx5e_ktls_offload_context_rx {
struct mlx5e_ktls_rx_resync_ctx resync;
};
+static bool mlx5e_ktls_priv_rx_put(struct mlx5e_ktls_offload_context_rx *priv_rx)
+{
+ if (!refcount_dec_and_test(&priv_rx->resync.refcnt))
+ return false;
+
+ kfree(priv_rx);
+ return true;
+}
+
+static void mlx5e_ktls_priv_rx_get(struct mlx5e_ktls_offload_context_rx *priv_rx)
+{
+ refcount_inc(&priv_rx->resync.refcnt);
+}
+
static int mlx5e_ktls_create_tir(struct mlx5_core_dev *mdev, u32 *tirn, u32 rqtn)
{
int err, inlen;
@@ -326,7 +340,7 @@ static void resync_handle_work(struct work_struct *work)
priv_rx = container_of(resync, struct mlx5e_ktls_offload_context_rx, resync);
if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) {
- refcount_dec(&resync->refcnt);
+ mlx5e_ktls_priv_rx_put(priv_rx);
return;
}
@@ -334,7 +348,7 @@ static void resync_handle_work(struct work_struct *work)
sq = &c->async_icosq;
if (resync_post_get_progress_params(sq, priv_rx))
- refcount_dec(&resync->refcnt);
+ mlx5e_ktls_priv_rx_put(priv_rx);
}
static void resync_init(struct mlx5e_ktls_rx_resync_ctx *resync,
@@ -377,7 +391,11 @@ unlock:
return err;
}
-/* Function is called with elevated refcount, it decreases it. */
+/* Function can be called with the refcount being either elevated or not.
+ * It decreases the refcount and may free the kTLS priv context.
+ * Refcount is not elevated only if tls_dev_del has been called, but GET_PSV was
+ * already in flight.
+ */
void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
struct mlx5e_icosq *sq)
{
@@ -410,7 +428,7 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
tls_offload_rx_resync_async_request_end(priv_rx->sk, cpu_to_be32(hw_seq));
priv_rx->stats->tls_resync_req_end++;
out:
- refcount_dec(&resync->refcnt);
+ mlx5e_ktls_priv_rx_put(priv_rx);
dma_unmap_single(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE);
kfree(buf);
}
@@ -431,9 +449,9 @@ static bool resync_queue_get_psv(struct sock *sk)
return false;
resync = &priv_rx->resync;
- refcount_inc(&resync->refcnt);
+ mlx5e_ktls_priv_rx_get(priv_rx);
if (unlikely(!queue_work(resync->priv->tls->rx_wq, &resync->work)))
- refcount_dec(&resync->refcnt);
+ mlx5e_ktls_priv_rx_put(priv_rx);
return true;
}
@@ -625,31 +643,6 @@ err_create_key:
return err;
}
-/* Elevated refcount on the resync object means there are
- * outstanding operations (uncompleted GET_PSV WQEs) that
- * will read the resync / priv_rx objects once completed.
- * Wait for them to avoid use-after-free.
- */
-static void wait_for_resync(struct net_device *netdev,
- struct mlx5e_ktls_rx_resync_ctx *resync)
-{
-#define MLX5E_KTLS_RX_RESYNC_TIMEOUT 20000 /* msecs */
- unsigned long exp_time = jiffies + msecs_to_jiffies(MLX5E_KTLS_RX_RESYNC_TIMEOUT);
- unsigned int refcnt;
-
- do {
- refcnt = refcount_read(&resync->refcnt);
- if (refcnt == 1)
- return;
-
- msleep(20);
- } while (time_before(jiffies, exp_time));
-
- netdev_warn(netdev,
- "Failed waiting for kTLS RX resync refcnt to be released (%u).\n",
- refcnt);
-}
-
void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx)
{
struct mlx5e_ktls_offload_context_rx *priv_rx;
@@ -663,7 +656,7 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx)
priv_rx = mlx5e_get_ktls_rx_priv_ctx(tls_ctx);
set_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags);
mlx5e_set_ktls_rx_priv_ctx(tls_ctx, NULL);
- synchronize_rcu(); /* Sync with NAPI */
+ synchronize_net(); /* Sync with NAPI */
if (!cancel_work_sync(&priv_rx->rule.work))
/* completion is needed, as the priv_rx in the add flow
* is maintained on the wqe info (wi), not on the socket.
@@ -671,8 +664,7 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx)
wait_for_completion(&priv_rx->add_ctx);
resync = &priv_rx->resync;
if (cancel_work_sync(&resync->work))
- refcount_dec(&resync->refcnt);
- wait_for_resync(netdev, resync);
+ mlx5e_ktls_priv_rx_put(priv_rx);
priv_rx->stats->tls_del++;
if (priv_rx->rule.rule)
@@ -680,5 +672,9 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx)
mlx5_core_destroy_tir(mdev, priv_rx->tirn);
mlx5_ktls_destroy_key(mdev, priv_rx->key_id);
- kfree(priv_rx);
+ /* priv_rx should normally be freed here, but if there is an outstanding
+ * GET_PSV, deallocation will be delayed until the CQE for GET_PSV is
+ * processed.
+ */
+ mlx5e_ktls_priv_rx_put(priv_rx);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 302001d6661e..8612c388db7d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -525,7 +525,7 @@ static int mlx5e_get_coalesce(struct net_device *netdev,
#define MLX5E_MAX_COAL_FRAMES MLX5_MAX_CQ_COUNT
static void
-mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal)
+mlx5e_set_priv_channels_tx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal)
{
struct mlx5_core_dev *mdev = priv->mdev;
int tc;
@@ -540,6 +540,17 @@ mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesc
coal->tx_coalesce_usecs,
coal->tx_max_coalesced_frames);
}
+ }
+}
+
+static void
+mlx5e_set_priv_channels_rx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int i;
+
+ for (i = 0; i < priv->channels.num; ++i) {
+ struct mlx5e_channel *c = priv->channels.c[i];
mlx5_core_modify_cq_moderation(mdev, &c->rq.cq.mcq,
coal->rx_coalesce_usecs,
@@ -586,21 +597,9 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
tx_moder->pkts = coal->tx_max_coalesced_frames;
new_channels.params.tx_dim_enabled = !!coal->use_adaptive_tx_coalesce;
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
- priv->channels.params = new_channels.params;
- goto out;
- }
- /* we are opened */
-
reset_rx = !!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled;
reset_tx = !!coal->use_adaptive_tx_coalesce != priv->channels.params.tx_dim_enabled;
- if (!reset_rx && !reset_tx) {
- mlx5e_set_priv_channels_coalesce(priv, coal);
- priv->channels.params = new_channels.params;
- goto out;
- }
-
if (reset_rx) {
u8 mode = MLX5E_GET_PFLAG(&new_channels.params,
MLX5E_PFLAG_RX_CQE_BASED_MODER);
@@ -614,6 +613,20 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
mlx5e_reset_tx_moderation(&new_channels.params, mode);
}
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+ priv->channels.params = new_channels.params;
+ goto out;
+ }
+
+ if (!reset_rx && !reset_tx) {
+ if (!coal->use_adaptive_rx_coalesce)
+ mlx5e_set_priv_channels_rx_coalesce(priv, coal);
+ if (!coal->use_adaptive_tx_coalesce)
+ mlx5e_set_priv_channels_tx_coalesce(priv, coal);
+ priv->channels.params = new_channels.params;
+ goto out;
+ }
+
err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL);
out:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 3fc7d18ac868..a2e0b548bf57 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -65,6 +65,7 @@
#include "en/devlink.h"
#include "lib/mlx5.h"
#include "en/ptp.h"
+#include "fpga/ipsec.h"
bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
{
@@ -106,7 +107,7 @@ bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
if (!mlx5e_check_fragmented_striding_rq_cap(mdev))
return false;
- if (MLX5_IPSEC_DEV(mdev))
+ if (mlx5_fpga_is_ipsec_device(mdev))
return false;
if (params->xdp_prog) {
@@ -914,7 +915,7 @@ void mlx5e_activate_rq(struct mlx5e_rq *rq)
void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
{
clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
- synchronize_rcu(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */
+ synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */
}
void mlx5e_close_rq(struct mlx5e_rq *rq)
@@ -1348,7 +1349,7 @@ void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq)
struct mlx5_wq_cyc *wq = &sq->wq;
clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
- synchronize_rcu(); /* Sync with NAPI to prevent netif_tx_wake_queue. */
+ synchronize_net(); /* Sync with NAPI to prevent netif_tx_wake_queue. */
mlx5e_tx_disable_queue(sq->txq);
@@ -1423,7 +1424,7 @@ void mlx5e_activate_icosq(struct mlx5e_icosq *icosq)
void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq)
{
clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
- synchronize_rcu(); /* Sync with NAPI. */
+ synchronize_net(); /* Sync with NAPI. */
}
void mlx5e_close_icosq(struct mlx5e_icosq *sq)
@@ -1502,7 +1503,7 @@ void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq)
struct mlx5e_channel *c = sq->channel;
clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
- synchronize_rcu(); /* Sync with NAPI. */
+ synchronize_net(); /* Sync with NAPI. */
mlx5e_destroy_sq(c->mdev, sq->sqn);
mlx5e_free_xdpsq_descs(sq);
@@ -1826,12 +1827,12 @@ static int mlx5e_open_queues(struct mlx5e_channel *c,
mlx5e_build_create_cq_param(&ccp, c);
- err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->icosq.cqp, &ccp,
+ err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->async_icosq.cqp, &ccp,
&c->async_icosq.cq);
if (err)
return err;
- err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->async_icosq.cqp, &ccp,
+ err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->icosq.cqp, &ccp,
&c->icosq.cq);
if (err)
goto err_close_async_icosq_cq;
@@ -2069,7 +2070,7 @@ static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
int i;
#ifdef CONFIG_MLX5_EN_IPSEC
- if (MLX5_IPSEC_DEV(mdev))
+ if (mlx5_fpga_is_ipsec_device(mdev))
byte_count += MLX5E_METADATA_ETHER_LEN;
#endif
@@ -4455,8 +4456,9 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
return -EINVAL;
}
- if (MLX5_IPSEC_DEV(priv->mdev)) {
- netdev_warn(netdev, "can't set XDP with IPSec offload\n");
+ if (mlx5_fpga_is_ipsec_device(priv->mdev)) {
+ netdev_warn(netdev,
+ "XDP is not available on Innova cards with IPsec support\n");
return -EINVAL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index ca4b55839a8a..4864deed9dc9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -1795,8 +1795,8 @@ int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool
rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe;
#ifdef CONFIG_MLX5_EN_IPSEC
- if (MLX5_IPSEC_DEV(mdev)) {
- netdev_err(netdev, "MPWQE RQ with IPSec offload not supported\n");
+ if (mlx5_fpga_is_ipsec_device(mdev)) {
+ netdev_err(netdev, "MPWQE RQ with Innova IPSec offload not supported\n");
return -EINVAL;
}
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index dd0bfbacad47..717fbaa6ce73 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -5040,7 +5040,7 @@ static int apply_police_params(struct mlx5e_priv *priv, u64 rate,
*/
if (rate) {
rate = (rate * BITS_PER_BYTE) + 500000;
- rate_mbps = max_t(u32, do_div(rate, 1000000), 1);
+ rate_mbps = max_t(u64, do_div(rate, 1000000), 1);
}
err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index cc67366495b0..22bee4990232 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -124,7 +124,7 @@ struct mlx5_fpga_ipsec {
struct ida halloc;
};
-static bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev)
+bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev)
{
if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga))
return false;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
index db88eb4c49e3..8931b5584477 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
@@ -43,6 +43,7 @@ u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev);
const struct mlx5_flow_cmds *
mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type);
void mlx5_fpga_ipsec_build_fs_cmds(void);
+bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev);
#else
static inline
const struct mlx5_accel_ipsec_ops *mlx5_fpga_ipsec_ops(struct mlx5_core_dev *mdev)
@@ -55,6 +56,7 @@ mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type)
}
static inline void mlx5_fpga_ipsec_build_fs_cmds(void) {};
+static inline bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev) { return false; }
#endif /* CONFIG_MLX5_FPGA_IPSEC */
#endif /* __MLX5_FPGA_IPSEC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 54523bed16cd..0c32c485eb58 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -190,6 +190,16 @@ static bool reset_fw_if_needed(struct mlx5_core_dev *dev)
return true;
}
+static void enter_error_state(struct mlx5_core_dev *dev, bool force)
+{
+ if (mlx5_health_check_fatal_sensors(dev) || force) { /* protected state setting */
+ dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+ mlx5_cmd_flush(dev);
+ }
+
+ mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1);
+}
+
void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
{
bool err_detected = false;
@@ -208,12 +218,7 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
goto unlock;
}
- if (mlx5_health_check_fatal_sensors(dev) || force) { /* protected state setting */
- dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
- mlx5_cmd_flush(dev);
- }
-
- mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1);
+ enter_error_state(dev, force);
unlock:
mutex_unlock(&dev->intf_state_mutex);
}
@@ -613,7 +618,7 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
priv = container_of(health, struct mlx5_priv, health);
dev = container_of(priv, struct mlx5_core_dev, priv);
- mlx5_enter_error_state(dev, false);
+ enter_error_state(dev, false);
if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) {
if (mlx5_health_try_recover(dev))
mlx5_core_err(dev, "health recovery failed\n");
@@ -707,8 +712,9 @@ static void poll_health(struct timer_list *t)
mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error);
dev->priv.health.fatal_error = fatal_error;
print_health_info(dev);
+ dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
mlx5_trigger_health_work(dev);
- goto out;
+ return;
}
count = ioread32be(health->health_counter);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index ca6f2fc39ea0..ba1a4ae28097 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1396,7 +1396,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err);
pci_save_state(pdev);
- devlink_reload_enable(devlink);
+ if (!mlx5_core_is_mp_slave(dev))
+ devlink_reload_enable(devlink);
return 0;
err_load_one:
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 80c2e646c093..71169e7d6177 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -300,50 +300,22 @@ static int mdio_bus_phy_resume(struct device *dev)
phydev->suspended_by_mdio_bus = 0;
- ret = phy_resume(phydev);
+ ret = phy_init_hw(phydev);
if (ret < 0)
return ret;
-no_resume:
- if (phydev->attached_dev && phydev->adjust_link)
- phy_start_machine(phydev);
-
- return 0;
-}
-
-static int mdio_bus_phy_restore(struct device *dev)
-{
- struct phy_device *phydev = to_phy_device(dev);
- struct net_device *netdev = phydev->attached_dev;
- int ret;
-
- if (!netdev)
- return 0;
-
- ret = phy_init_hw(phydev);
+ ret = phy_resume(phydev);
if (ret < 0)
return ret;
-
+no_resume:
if (phydev->attached_dev && phydev->adjust_link)
phy_start_machine(phydev);
return 0;
}
-static const struct dev_pm_ops mdio_bus_phy_pm_ops = {
- .suspend = mdio_bus_phy_suspend,
- .resume = mdio_bus_phy_resume,
- .freeze = mdio_bus_phy_suspend,
- .thaw = mdio_bus_phy_resume,
- .restore = mdio_bus_phy_restore,
-};
-
-#define MDIO_BUS_PHY_PM_OPS (&mdio_bus_phy_pm_ops)
-
-#else
-
-#define MDIO_BUS_PHY_PM_OPS NULL
-
+static SIMPLE_DEV_PM_OPS(mdio_bus_phy_pm_ops, mdio_bus_phy_suspend,
+ mdio_bus_phy_resume);
#endif /* CONFIG_PM */
/**
@@ -554,7 +526,7 @@ static const struct device_type mdio_bus_phy_type = {
.name = "PHY",
.groups = phy_dev_groups,
.release = phy_device_release,
- .pm = MDIO_BUS_PHY_PM_OPS,
+ .pm = pm_ptr(&mdio_bus_phy_pm_ops),
};
static int phy_request_driver_module(struct phy_device *dev, u32 phy_id)
@@ -1143,10 +1115,19 @@ int phy_init_hw(struct phy_device *phydev)
if (ret < 0)
return ret;
- if (phydev->drv->config_init)
+ if (phydev->drv->config_init) {
ret = phydev->drv->config_init(phydev);
+ if (ret < 0)
+ return ret;
+ }
- return ret;
+ if (phydev->drv->config_intr) {
+ ret = phydev->drv->config_intr(phydev);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
}
EXPORT_SYMBOL(phy_init_hw);
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index ee95f42fb0ec..88f4bf0047e7 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -591,6 +591,8 @@ enum {
TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED = 1 << 1, /* Part of an existing connection. */
TCA_FLOWER_KEY_CT_FLAGS_RELATED = 1 << 2, /* Related to an established connection. */
TCA_FLOWER_KEY_CT_FLAGS_TRACKED = 1 << 3, /* Conntrack has occurred. */
+
+ __TCA_FLOWER_KEY_CT_FLAGS_MAX,
};
enum {
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index e0d21c0607e5..8fec3dabe109 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -498,8 +498,8 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+ u64 snd_data_fin_enable, ack_seq;
unsigned int dss_size = 0;
- u64 snd_data_fin_enable;
struct mptcp_ext *mpext;
unsigned int ack_size;
bool ret = false;
@@ -531,13 +531,14 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
return ret;
}
+ ack_seq = READ_ONCE(msk->ack_seq);
if (READ_ONCE(msk->use_64bit_ack)) {
ack_size = TCPOLEN_MPTCP_DSS_ACK64;
- opts->ext_copy.data_ack = READ_ONCE(msk->ack_seq);
+ opts->ext_copy.data_ack = ack_seq;
opts->ext_copy.ack64 = 1;
} else {
ack_size = TCPOLEN_MPTCP_DSS_ACK32;
- opts->ext_copy.data_ack32 = (uint32_t)READ_ONCE(msk->ack_seq);
+ opts->ext_copy.data_ack32 = (uint32_t)ack_seq;
opts->ext_copy.ack64 = 0;
}
opts->ext_copy.use_ack = 1;
@@ -879,8 +880,7 @@ static void ack_update_msk(struct mptcp_sock *msk,
msk->wnd_end = new_wnd_end;
/* this assumes mptcp_incoming_options() is invoked after tcp_ack() */
- if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)) &&
- sk_stream_memory_free(ssk))
+ if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)))
__mptcp_check_push(sk, ssk);
if (after64(new_snd_una, old_snd_una)) {
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index f998a077c7dd..06da6ad31c87 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -364,8 +364,6 @@ static void mptcp_check_data_fin_ack(struct sock *sk)
/* Look for an acknowledged DATA_FIN */
if (mptcp_pending_data_fin_ack(sk)) {
- mptcp_stop_timer(sk);
-
WRITE_ONCE(msk->snd_data_fin_enable, 0);
switch (sk->sk_state) {
@@ -459,7 +457,18 @@ static bool mptcp_subflow_cleanup_rbuf(struct sock *ssk)
static void mptcp_cleanup_rbuf(struct mptcp_sock *msk)
{
struct sock *ack_hint = READ_ONCE(msk->ack_hint);
+ int old_space = READ_ONCE(msk->old_wspace);
struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
+ bool cleanup;
+
+ /* this is a simple superset of what tcp_cleanup_rbuf() implements
+ * so that we don't have to acquire the ssk socket lock most of the time
+ * to do actually nothing
+ */
+ cleanup = __mptcp_space(sk) - old_space >= max(0, old_space);
+ if (!cleanup)
+ return;
/* if the hinted ssk is still active, try to use it */
if (likely(ack_hint)) {
@@ -1573,6 +1582,9 @@ out:
mptcp_set_timeout(sk, ssk);
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
+ if (!mptcp_timer_pending(sk))
+ mptcp_reset_timer(sk);
+
if (msk->snd_data_fin_enable &&
msk->snd_nxt + 1 == msk->write_seq)
mptcp_schedule_work(sk);
@@ -1867,7 +1879,7 @@ static void __mptcp_splice_receive_queue(struct sock *sk)
skb_queue_splice_tail_init(&sk->sk_receive_queue, &msk->receive_queue);
}
-static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv)
+static bool __mptcp_move_skbs(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
unsigned int moved = 0;
@@ -1887,13 +1899,10 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv)
slowpath = lock_sock_fast(ssk);
mptcp_data_lock(sk);
+ __mptcp_update_rmem(sk);
done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
mptcp_data_unlock(sk);
- if (moved && rcv) {
- WRITE_ONCE(msk->rmem_pending, min(rcv, moved));
- tcp_cleanup_rbuf(ssk, 1);
- WRITE_ONCE(msk->rmem_pending, 0);
- }
+ tcp_cleanup_rbuf(ssk, moved);
unlock_sock_fast(ssk, slowpath);
} while (!done);
@@ -1906,6 +1915,7 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv)
ret |= __mptcp_ofo_queue(msk);
__mptcp_splice_receive_queue(sk);
mptcp_data_unlock(sk);
+ mptcp_cleanup_rbuf(msk);
}
if (ret)
mptcp_check_data_fin((struct sock *)msk);
@@ -1935,7 +1945,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
while (copied < len) {
- int bytes_read, old_space;
+ int bytes_read;
bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied);
if (unlikely(bytes_read < 0)) {
@@ -1946,14 +1956,11 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
copied += bytes_read;
- if (skb_queue_empty(&msk->receive_queue) &&
- __mptcp_move_skbs(msk, len - copied))
- continue;
-
/* be sure to advertise window change */
- old_space = READ_ONCE(msk->old_wspace);
- if ((tcp_space(sk) - old_space) >= old_space)
- mptcp_cleanup_rbuf(msk);
+ mptcp_cleanup_rbuf(msk);
+
+ if (skb_queue_empty(&msk->receive_queue) && __mptcp_move_skbs(msk))
+ continue;
/* only the master socket status is relevant here. The exit
* conditions mirror closely tcp_recvmsg()
@@ -1981,7 +1988,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
/* race breaker: the shutdown could be after the
* previous receive queue check
*/
- if (__mptcp_move_skbs(msk, len - copied))
+ if (__mptcp_move_skbs(msk))
continue;
break;
}
@@ -2014,7 +2021,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
/* .. race-breaker: ssk might have gotten new data
* after last __mptcp_move_skbs() returned false.
*/
- if (unlikely(__mptcp_move_skbs(msk, 0)))
+ if (unlikely(__mptcp_move_skbs(msk)))
set_bit(MPTCP_DATA_READY, &msk->flags);
} else if (unlikely(!test_bit(MPTCP_DATA_READY, &msk->flags))) {
/* data to read but mptcp_wait_data() cleared DATA_READY */
@@ -2299,6 +2306,7 @@ static void mptcp_worker(struct work_struct *work)
if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
goto unlock;
+ __mptcp_clean_una(sk);
dfrag = mptcp_rtx_head(sk);
if (!dfrag)
goto unlock;
@@ -2959,6 +2967,8 @@ static void mptcp_release_cb(struct sock *sk)
mptcp_push_pending(sk, 0);
spin_lock_bh(&sk->sk_lock.slock);
}
+ if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags))
+ __mptcp_error_report(sk);
/* clear any wmem reservation and errors */
__mptcp_update_wmem(sk);
@@ -3319,7 +3329,7 @@ static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)
struct sock *sk = (struct sock *)msk;
if (unlikely(sk->sk_shutdown & SEND_SHUTDOWN))
- return 0;
+ return EPOLLOUT | EPOLLWRNORM;
if (sk_stream_is_writeable(sk))
return EPOLLOUT | EPOLLWRNORM;
@@ -3352,9 +3362,16 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
mask |= mptcp_check_readable(msk);
mask |= mptcp_check_writeable(msk);
}
+ if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
+ mask |= EPOLLHUP;
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
+ /* This barrier is coupled with smp_wmb() in tcp_reset() */
+ smp_rmb();
+ if (sk->sk_err)
+ mask |= EPOLLERR;
+
return mask;
}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d67de793d363..8d9f0ff10cb8 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -95,6 +95,7 @@
#define MPTCP_WORK_CLOSE_SUBFLOW 5
#define MPTCP_PUSH_PENDING 6
#define MPTCP_CLEAN_UNA 7
+#define MPTCP_ERROR_REPORT 8
static inline bool before64(__u64 seq1, __u64 seq2)
{
@@ -233,7 +234,6 @@ struct mptcp_sock {
u64 wnd_end;
unsigned long timer_ival;
u32 token;
- int rmem_pending;
int rmem_released;
unsigned long flags;
bool can_ack;
@@ -292,7 +292,7 @@ static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
static inline int __mptcp_space(const struct sock *sk)
{
- return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_pending);
+ return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_released);
}
static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)
@@ -325,20 +325,13 @@ static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk)
return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list);
}
-static inline struct mptcp_data_frag *mptcp_rtx_tail(const struct sock *sk)
+static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- if (!before64(msk->snd_nxt, READ_ONCE(msk->snd_una)))
+ if (msk->snd_una == READ_ONCE(msk->snd_nxt))
return NULL;
- return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list);
-}
-
-static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
-{
- struct mptcp_sock *msk = mptcp_sk(sk);
-
return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);
}
@@ -414,6 +407,7 @@ struct mptcp_subflow_context {
void (*tcp_data_ready)(struct sock *sk);
void (*tcp_state_change)(struct sock *sk);
void (*tcp_write_space)(struct sock *sk);
+ void (*tcp_error_report)(struct sock *sk);
struct rcu_head rcu;
};
@@ -478,6 +472,7 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
sk->sk_data_ready = ctx->tcp_data_ready;
sk->sk_state_change = ctx->tcp_state_change;
sk->sk_write_space = ctx->tcp_write_space;
+ sk->sk_error_report = ctx->tcp_error_report;
inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
}
@@ -505,6 +500,7 @@ bool mptcp_finish_join(struct sock *sk);
bool mptcp_schedule_work(struct sock *sk);
void __mptcp_check_push(struct sock *sk, struct sock *ssk);
void __mptcp_data_acked(struct sock *sk);
+void __mptcp_error_report(struct sock *sk);
void mptcp_subflow_eof(struct sock *sk);
bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit);
void __mptcp_flush_join_list(struct mptcp_sock *msk);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 278cbe3e539e..8b2338dfdc80 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -92,7 +92,7 @@ static struct mptcp_sock *subflow_token_join_request(struct request_sock *req,
return msk;
}
-static int __subflow_init_req(struct request_sock *req, const struct sock *sk_listener)
+static void subflow_init_req(struct request_sock *req, const struct sock *sk_listener)
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
@@ -100,16 +100,6 @@ static int __subflow_init_req(struct request_sock *req, const struct sock *sk_li
subflow_req->mp_join = 0;
subflow_req->msk = NULL;
mptcp_token_init_request(req);
-
-#ifdef CONFIG_TCP_MD5SIG
- /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
- * TCP option space.
- */
- if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info))
- return -EINVAL;
-#endif
-
- return 0;
}
/* Init mptcp request socket.
@@ -117,20 +107,23 @@ static int __subflow_init_req(struct request_sock *req, const struct sock *sk_li
* Returns an error code if a JOIN has failed and a TCP reset
* should be sent.
*/
-static int subflow_init_req(struct request_sock *req,
- const struct sock *sk_listener,
- struct sk_buff *skb)
+static int subflow_check_req(struct request_sock *req,
+ const struct sock *sk_listener,
+ struct sk_buff *skb)
{
struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct mptcp_options_received mp_opt;
- int ret;
pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
- ret = __subflow_init_req(req, sk_listener);
- if (ret)
- return 0;
+#ifdef CONFIG_TCP_MD5SIG
+ /* no MPTCP if MD5SIG is enabled on this socket or we may run out of
+ * TCP option space.
+ */
+ if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info))
+ return -EINVAL;
+#endif
mptcp_get_options(skb, &mp_opt);
@@ -205,10 +198,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
struct mptcp_options_received mp_opt;
int err;
- err = __subflow_init_req(req, sk_listener);
- if (err)
- return err;
-
+ subflow_init_req(req, sk_listener);
mptcp_get_options(skb, &mp_opt);
if (mp_opt.mp_capable && mp_opt.mp_join)
@@ -248,12 +238,13 @@ static struct dst_entry *subflow_v4_route_req(const struct sock *sk,
int err;
tcp_rsk(req)->is_mptcp = 1;
+ subflow_init_req(req, sk);
dst = tcp_request_sock_ipv4_ops.route_req(sk, skb, fl, req);
if (!dst)
return NULL;
- err = subflow_init_req(req, sk, skb);
+ err = subflow_check_req(req, sk, skb);
if (err == 0)
return dst;
@@ -273,12 +264,13 @@ static struct dst_entry *subflow_v6_route_req(const struct sock *sk,
int err;
tcp_rsk(req)->is_mptcp = 1;
+ subflow_init_req(req, sk);
dst = tcp_request_sock_ipv6_ops.route_req(sk, skb, fl, req);
if (!dst)
return NULL;
- err = subflow_init_req(req, sk, skb);
+ err = subflow_check_req(req, sk, skb);
if (err == 0)
return dst;
@@ -1043,6 +1035,46 @@ static void subflow_write_space(struct sock *ssk)
/* we take action in __mptcp_clean_una() */
}
+void __mptcp_error_report(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ int err = sock_error(ssk);
+
+ if (!err)
+ continue;
+
+ /* only propagate errors on fallen-back sockets or
+ * on MPC connect
+ */
+ if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
+ continue;
+
+ inet_sk_state_store(sk, inet_sk_state_load(ssk));
+ sk->sk_err = -err;
+
+ /* This barrier is coupled with smp_rmb() in mptcp_poll() */
+ smp_wmb();
+ sk->sk_error_report(sk);
+ break;
+ }
+}
+
+static void subflow_error_report(struct sock *ssk)
+{
+ struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
+
+ mptcp_data_lock(sk);
+ if (!sock_owned_by_user(sk))
+ __mptcp_error_report(sk);
+ else
+ set_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags);
+ mptcp_data_unlock(sk);
+}
+
static struct inet_connection_sock_af_ops *
subflow_default_af_ops(struct sock *sk)
{
@@ -1352,9 +1384,11 @@ static int subflow_ulp_init(struct sock *sk)
ctx->tcp_data_ready = sk->sk_data_ready;
ctx->tcp_state_change = sk->sk_state_change;
ctx->tcp_write_space = sk->sk_write_space;
+ ctx->tcp_error_report = sk->sk_error_report;
sk->sk_data_ready = subflow_data_ready;
sk->sk_write_space = subflow_write_space;
sk->sk_state_change = subflow_state_change;
+ sk->sk_error_report = subflow_error_report;
out:
return err;
}
@@ -1407,6 +1441,7 @@ static void subflow_ulp_clone(const struct request_sock *req,
new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
new_ctx->tcp_state_change = old_ctx->tcp_state_change;
new_ctx->tcp_write_space = old_ctx->tcp_write_space;
+ new_ctx->tcp_error_report = old_ctx->tcp_error_report;
new_ctx->rel_write_seq = 1;
new_ctx->tcp_sock = newsk;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 84f932532db7..46c1b3e9f66a 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -30,6 +30,11 @@
#include <uapi/linux/netfilter/nf_conntrack_common.h>
+#define TCA_FLOWER_KEY_CT_FLAGS_MAX \
+ ((__TCA_FLOWER_KEY_CT_FLAGS_MAX - 1) << 1)
+#define TCA_FLOWER_KEY_CT_FLAGS_MASK \
+ (TCA_FLOWER_KEY_CT_FLAGS_MAX - 1)
+
struct fl_flow_key {
struct flow_dissector_key_meta meta;
struct flow_dissector_key_control control;
@@ -686,8 +691,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 },
[TCA_FLOWER_KEY_ENC_OPTS] = { .type = NLA_NESTED },
[TCA_FLOWER_KEY_ENC_OPTS_MASK] = { .type = NLA_NESTED },
- [TCA_FLOWER_KEY_CT_STATE] = { .type = NLA_U16 },
- [TCA_FLOWER_KEY_CT_STATE_MASK] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_CT_STATE] =
+ NLA_POLICY_MASK(NLA_U16, TCA_FLOWER_KEY_CT_FLAGS_MASK),
+ [TCA_FLOWER_KEY_CT_STATE_MASK] =
+ NLA_POLICY_MASK(NLA_U16, TCA_FLOWER_KEY_CT_FLAGS_MASK),
[TCA_FLOWER_KEY_CT_ZONE] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_CT_ZONE_MASK] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_CT_MARK] = { .type = NLA_U32 },
@@ -1390,12 +1397,33 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
return 0;
}
+static int fl_validate_ct_state(u16 state, struct nlattr *tb,
+ struct netlink_ext_ack *extack)
+{
+ if (state && !(state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb,
+ "no trk, so no other flag can be set");
+ return -EINVAL;
+ }
+
+ if (state & TCA_FLOWER_KEY_CT_FLAGS_NEW &&
+ state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED) {
+ NL_SET_ERR_MSG_ATTR(extack, tb,
+ "new and est are mutually exclusive");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int fl_set_key_ct(struct nlattr **tb,
struct flow_dissector_key_ct *key,
struct flow_dissector_key_ct *mask,
struct netlink_ext_ack *extack)
{
if (tb[TCA_FLOWER_KEY_CT_STATE]) {
+ int err;
+
if (!IS_ENABLED(CONFIG_NF_CONNTRACK)) {
NL_SET_ERR_MSG(extack, "Conntrack isn't enabled");
return -EOPNOTSUPP;
@@ -1403,6 +1431,13 @@ static int fl_set_key_ct(struct nlattr **tb,
fl_set_key_val(tb, &key->ct_state, TCA_FLOWER_KEY_CT_STATE,
&mask->ct_state, TCA_FLOWER_KEY_CT_STATE_MASK,
sizeof(key->ct_state));
+
+ err = fl_validate_ct_state(mask->ct_state,
+ tb[TCA_FLOWER_KEY_CT_STATE_MASK],
+ extack);
+ if (err)
+ return err;
+
}
if (tb[TCA_FLOWER_KEY_CT_ZONE]) {
if (!IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES)) {