diff options
35 files changed, 349 insertions, 153 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 8696ead91480..bfb3d0931cba 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12903,7 +12903,7 @@ F: net/ipv4/nexthop.c NFC SUBSYSTEM M: Krzysztof Kozlowski <[email protected]> -L: [email protected] (moderated for non-subscribers) +L: [email protected] (subscribers-only) S: Maintained F: Documentation/devicetree/bindings/net/nfc/ @@ -12916,7 +12916,7 @@ F: net/nfc/ NFC VIRTUAL NCI DEVICE DRIVER M: Bongsu Jeon <[email protected]> -L: [email protected] (moderated for non-subscribers) +L: [email protected] (subscribers-only) S: Supported F: drivers/nfc/virtual_ncidev.c F: tools/testing/selftests/nci/ @@ -13214,7 +13214,7 @@ F: sound/soc/codecs/tfa9879* NXP-NCI NFC DRIVER R: Charles Gorand <[email protected]> -L: [email protected] (moderated for non-subscribers) +L: [email protected] (subscribers-only) S: Supported F: drivers/nfc/nxp-nci @@ -16141,7 +16141,7 @@ F: include/media/drv-intf/s3c_camif.h SAMSUNG S3FWRN5 NFC DRIVER M: Krzysztof Kozlowski <[email protected]> M: Krzysztof Opasiak <[email protected]> -L: [email protected] (moderated for non-subscribers) +L: [email protected] (subscribers-only) S: Maintained F: Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml F: drivers/nfc/s3fwrn5 @@ -18332,7 +18332,7 @@ F: sound/soc/codecs/tas571x* TI TRF7970A NFC DRIVER M: Mark Greer <[email protected]> -L: [email protected] (moderated for non-subscribers) +L: [email protected] (subscribers-only) S: Supported F: Documentation/devicetree/bindings/net/nfc/trf7970a.txt F: drivers/nfc/trf7970a.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 8360289813f0..d6513aef5cd4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1624,12 +1624,13 @@ static int mlx5e_set_fecparam(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; + unsigned long fec_bitmap; u16 fec_policy = 0; int mode; int err; - if (bitmap_weight((unsigned long *)&fecparam->fec, - ETHTOOL_FEC_LLRS_BIT + 1) > 1) + bitmap_from_arr32(&fec_bitmap, &fecparam->fec, sizeof(fecparam->fec) * BITS_PER_BYTE); + if (bitmap_weight(&fec_bitmap, ETHTOOL_FEC_LLRS_BIT + 1) > 1) return -EOPNOTSUPP; for (mode = 0; mode < ARRAY_SIZE(pplm_fec_2_ethtool); mode++) { @@ -1893,6 +1894,13 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val if (curr_val == new_val) return 0; + if (new_val && !priv->profile->rx_ptp_support && + priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE) { + netdev_err(priv->netdev, + "Profile doesn't support enabling of CQE compression while hardware time-stamping is enabled.\n"); + return -EINVAL; + } + new_params = priv->channels.params; MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val); if (priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ad0f69480b9c..ec6bafe7a2e5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3858,6 +3858,16 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n"); } + if (mlx5e_is_uplink_rep(priv)) { + features &= ~NETIF_F_HW_TLS_RX; + if (netdev->features & NETIF_F_HW_TLS_RX) + netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n"); + + features &= ~NETIF_F_HW_TLS_TX; + if (netdev->features & NETIF_F_HW_TLS_TX) + netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n"); + } + mutex_unlock(&priv->state_lock); return features; @@ -3974,11 +3984,45 @@ int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx) return mlx5e_ptp_rx_manage_fs(priv, set); } -int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) +static int mlx5e_hwstamp_config_no_ptp_rx(struct mlx5e_priv *priv, bool rx_filter) +{ + bool rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def; + int err; + + if (!rx_filter) + /* Reset CQE compression to Admin default */ + return mlx5e_modify_rx_cqe_compression_locked(priv, rx_cqe_compress_def); + + if (!MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS)) + return 0; + + /* Disable CQE compression */ + netdev_warn(priv->netdev, "Disabling RX cqe compression\n"); + err = mlx5e_modify_rx_cqe_compression_locked(priv, false); + if (err) + netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err); + + return err; +} + +static int mlx5e_hwstamp_config_ptp_rx(struct mlx5e_priv *priv, bool ptp_rx) { struct mlx5e_params new_params; + + if (ptp_rx == priv->channels.params.ptp_rx) + return 0; + + new_params = priv->channels.params; + new_params.ptp_rx = ptp_rx; + return mlx5e_safe_switch_params(priv, &new_params, mlx5e_ptp_rx_manage_fs_ctx, + &new_params.ptp_rx, true); +} + +int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) +{ struct hwtstamp_config config; bool rx_cqe_compress_def; + bool ptp_rx; int err; if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) || @@ -3998,13 +4042,12 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) } mutex_lock(&priv->state_lock); - new_params = priv->channels.params; rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def; /* RX HW timestamp */ switch (config.rx_filter) { case HWTSTAMP_FILTER_NONE: - new_params.ptp_rx = false; + ptp_rx = false; break; case HWTSTAMP_FILTER_ALL: case HWTSTAMP_FILTER_SOME: @@ -4021,24 +4064,25 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: case HWTSTAMP_FILTER_NTP_ALL: - new_params.ptp_rx = rx_cqe_compress_def; config.rx_filter = HWTSTAMP_FILTER_ALL; + /* ptp_rx is set if both HW TS is set and CQE + * compression is set + */ + ptp_rx = rx_cqe_compress_def; break; default: - mutex_unlock(&priv->state_lock); - return -ERANGE; + err = -ERANGE; + goto err_unlock; } - if (new_params.ptp_rx == priv->channels.params.ptp_rx) - goto out; + if (!priv->profile->rx_ptp_support) + err = mlx5e_hwstamp_config_no_ptp_rx(priv, + config.rx_filter != HWTSTAMP_FILTER_NONE); + else + err = mlx5e_hwstamp_config_ptp_rx(priv, ptp_rx); + if (err) + goto err_unlock; - err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_ptp_rx_manage_fs_ctx, - &new_params.ptp_rx, true); - if (err) { - mutex_unlock(&priv->state_lock); - return err; - } -out: memcpy(&priv->tstamp, &config, sizeof(config)); mutex_unlock(&priv->state_lock); @@ -4047,6 +4091,9 @@ out: return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? -EFAULT : 0; +err_unlock: + mutex_unlock(&priv->state_lock); + return err; } int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 2c776e7a7692..dd64878e5b38 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2015,11 +2015,13 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, misc_parameters_3); struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct flow_dissector *dissector = rule->match.dissector; + enum fs_flow_table_type fs_type; u16 addr_type = 0; u8 ip_proto = 0; u8 *match_level; int err; + fs_type = mlx5e_is_eswitch_flow(flow) ? FS_FT_FDB : FS_FT_NIC_RX; match_level = outer_match_level; if (dissector->used_keys & @@ -2145,6 +2147,13 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, if (match.mask->vlan_id || match.mask->vlan_priority || match.mask->vlan_tpid) { + if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ft_field_support.outer_second_vid, + fs_type)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on CVLAN is not supported"); + return -EOPNOTSUPP; + } + if (match.key->vlan_tpid == htons(ETH_P_8021AD)) { MLX5_SET(fte_match_set_misc, misc_c, outer_second_svlan_tag, 1); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index db1e74280e57..d18a28a6e9a6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -219,7 +219,8 @@ esw_setup_slow_path_dest(struct mlx5_flow_destination *dest, struct mlx5_fs_chains *chains, int i) { - flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + if (mlx5_chains_ignore_flow_level_supported(chains)) + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[i].ft = mlx5_chains_get_tc_end_ft(chains); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index d5d57630015f..106b50e42b46 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -349,6 +349,9 @@ static void mlx5_sync_reset_abort_event(struct work_struct *work) reset_abort_work); struct mlx5_core_dev *dev = fw_reset->dev; + if (!test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) + return; + mlx5_sync_reset_clear_reset_requested(dev, true); mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n"); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index 00ef10a1a9f8..20a4047f2737 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -107,7 +107,7 @@ bool mlx5_chains_prios_supported(struct mlx5_fs_chains *chains) return chains->flags & MLX5_CHAINS_AND_PRIOS_SUPPORTED; } -static bool mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains) +bool mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains) { return chains->flags & MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h index e96f345e7dae..d50bdb226cef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h @@ -28,6 +28,7 @@ struct mlx5_chains_attr { bool mlx5_chains_prios_supported(struct mlx5_fs_chains *chains); +bool mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains); bool mlx5_chains_backwards_supported(struct mlx5_fs_chains *chains); u32 @@ -70,6 +71,10 @@ mlx5_chains_set_end_ft(struct mlx5_fs_chains *chains, #else /* CONFIG_MLX5_CLS_ACT */ +static inline bool +mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains) +{ return false; } + static inline struct mlx5_flow_table * mlx5_chains_get_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio, u32 level) { return ERR_PTR(-EOPNOTSUPP); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c index 1fbcd012bb85..7ccfd40586ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c @@ -112,7 +112,8 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn, int ret; ft_attr.table_type = MLX5_FLOW_TABLE_TYPE_FDB; - ft_attr.level = dmn->info.caps.max_ft_level - 2; + ft_attr.level = min_t(int, dmn->info.caps.max_ft_level - 2, + MLX5_FT_MAX_MULTIPATH_LEVEL); ft_attr.reformat_en = reformat_req; ft_attr.decap_en = reformat_req; diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index c84c8bf2bc20..fc99ad8e4a38 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -3815,6 +3815,7 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev_err(&pdev->dev, "invalid sram_size %dB or board span %ldB\n", mgp->sram_size, mgp->board_span); + status = -EINVAL; goto abort_with_ioremap; } memcpy_fromio(mgp->eeprom_strings, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 5d956a553434..c87202cbd3d6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1240,8 +1240,9 @@ static int stmmac_phy_setup(struct stmmac_priv *priv) priv->phylink_config.dev = &priv->dev->dev; priv->phylink_config.type = PHYLINK_NETDEV; priv->phylink_config.pcs_poll = true; - priv->phylink_config.ovr_an_inband = - priv->plat->mdio_bus_data->xpcs_an_inband; + if (priv->plat->mdio_bus_data) + priv->phylink_config.ovr_an_inband = + priv->plat->mdio_bus_data->xpcs_an_inband; if (!fwnode) fwnode = dev_fwnode(priv->device); @@ -7048,7 +7049,6 @@ error_mdio_register: stmmac_napi_del(ndev); error_hw_init: destroy_workqueue(priv->wq); - stmmac_bus_clks_config(priv, false); bitmap_free(priv->af_xdp_zc_qps); return ret; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 9b6a4a875c55..fa407eb8b457 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -401,18 +401,13 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, /* If headroom is not 0, there is an offset between the beginning of the * data and the allocated space, otherwise the data and the allocated * space are aligned. + * + * Buffers with headroom use PAGE_SIZE as alloc size, see + * add_recvbuf_mergeable() + get_mergeable_buf_len() */ - if (headroom) { - /* Buffers with headroom use PAGE_SIZE as alloc size, - * see add_recvbuf_mergeable() + get_mergeable_buf_len() - */ - truesize = PAGE_SIZE; - tailroom = truesize - len - offset; - buf = page_address(page); - } else { - tailroom = truesize - len; - buf = p; - } + truesize = headroom ? PAGE_SIZE : truesize; + tailroom = truesize - len - headroom; + buf = p - headroom; len -= hdr_len; offset += hdr_padded_len; @@ -958,7 +953,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, put_page(page); head_skb = page_to_skb(vi, rq, xdp_page, offset, len, PAGE_SIZE, false, - metasize, headroom); + metasize, + VIRTIO_XDP_HEADROOM); return head_skb; } break; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6d16eed6850e..eb86e80e4643 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1289,6 +1289,8 @@ enum mlx5_fc_bulk_alloc_bitmask { #define MLX5_FC_BULK_NUM_FCS(fc_enum) (MLX5_FC_BULK_SIZE_FACTOR * (fc_enum)) +#define MLX5_FT_MAX_MULTIPATH_LEVEL 63 + enum { MLX5_STEERING_FORMAT_CONNECTX_5 = 0, MLX5_STEERING_FORMAT_CONNECTX_6DX = 1, diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 27eeb613bb4e..0a5655e300b5 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1506,16 +1506,10 @@ struct nft_trans_chain { struct nft_trans_table { bool update; - u8 state; - u32 flags; }; #define nft_trans_table_update(trans) \ (((struct nft_trans_table *)trans->data)->update) -#define nft_trans_table_state(trans) \ - (((struct nft_trans_table *)trans->data)->state) -#define nft_trans_table_flags(trans) \ - (((struct nft_trans_table *)trans->data)->flags) struct nft_trans_elem { struct nft_set *set; diff --git a/include/net/tls.h b/include/net/tls.h index 3eccb525e8f7..8341a8d1e807 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -193,7 +193,11 @@ struct tls_offload_context_tx { (sizeof(struct tls_offload_context_tx) + TLS_DRIVER_STATE_SIZE_TX) enum tls_context_flags { - TLS_RX_SYNC_RUNNING = 0, + /* tls_device_down was called after the netdev went down, device state + * was released, and kTLS works in software, even though rx_conf is + * still TLS_HW (needed for transition). + */ + TLS_RX_DEV_DEGRADED = 0, /* Unlike RX where resync is driven entirely by the core in TX only * the driver knows when things went out of sync, so we need the flag * to be atomic. @@ -266,6 +270,7 @@ struct tls_context { /* cache cold stuff */ struct proto *sk_proto; + struct sock *sk; void (*sk_destruct)(struct sock *sk); @@ -448,6 +453,9 @@ static inline u16 tls_user_config(struct tls_context *ctx, bool tx) struct sk_buff * tls_validate_xmit_skb(struct sock *sk, struct net_device *dev, struct sk_buff *skb); +struct sk_buff * +tls_validate_xmit_skb_sw(struct sock *sk, struct net_device *dev, + struct sk_buff *skb); static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk) { diff --git a/net/core/devlink.c b/net/core/devlink.c index 4eb969518ee0..051432ea4f69 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -705,7 +705,6 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg, case DEVLINK_PORT_FLAVOUR_PHYSICAL: case DEVLINK_PORT_FLAVOUR_CPU: case DEVLINK_PORT_FLAVOUR_DSA: - case DEVLINK_PORT_FLAVOUR_VIRTUAL: if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER, attrs->phys.port_number)) return -EMSGSIZE; @@ -8631,7 +8630,6 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, switch (attrs->flavour) { case DEVLINK_PORT_FLAVOUR_PHYSICAL: - case DEVLINK_PORT_FLAVOUR_VIRTUAL: if (!attrs->split) n = snprintf(name, len, "p%u", attrs->phys.port_number); else @@ -8679,6 +8677,8 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, n = snprintf(name, len, "pf%usf%u", attrs->pci_sf.pf, attrs->pci_sf.sf); break; + case DEVLINK_PORT_FLAVOUR_VIRTUAL: + return -EOPNOTSUPP; } if (n >= len) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 714d5fa38546..3e84279c4123 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4842,8 +4842,10 @@ static int rtnl_bridge_notify(struct net_device *dev) if (err < 0) goto errout; - if (!skb->len) + if (!skb->len) { + err = -EINVAL; goto errout; + } rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); return 0; diff --git a/net/core/sock.c b/net/core/sock.c index 958614ea16ed..946888afef88 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -815,10 +815,18 @@ void sock_set_rcvbuf(struct sock *sk, int val) } EXPORT_SYMBOL(sock_set_rcvbuf); +static void __sock_set_mark(struct sock *sk, u32 val) +{ + if (val != sk->sk_mark) { + sk->sk_mark = val; + sk_dst_reset(sk); + } +} + void sock_set_mark(struct sock *sk, u32 val) { lock_sock(sk); - sk->sk_mark = val; + __sock_set_mark(sk, val); release_sock(sk); } EXPORT_SYMBOL(sock_set_mark); @@ -1126,10 +1134,10 @@ set_sndbuf: case SO_MARK: if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { ret = -EPERM; - } else if (val != sk->sk_mark) { - sk->sk_mark = val; - sk_dst_reset(sk); + break; } + + __sock_set_mark(sk, val); break; case SO_RXQ_OVFL: diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 008c1ec6e20c..122ad5833fb1 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -64,7 +64,7 @@ #define DSA_8021Q_SUBVLAN_HI_SHIFT 9 #define DSA_8021Q_SUBVLAN_HI_MASK GENMASK(9, 9) #define DSA_8021Q_SUBVLAN_LO_SHIFT 4 -#define DSA_8021Q_SUBVLAN_LO_MASK GENMASK(4, 3) +#define DSA_8021Q_SUBVLAN_LO_MASK GENMASK(5, 4) #define DSA_8021Q_SUBVLAN_HI(x) (((x) & GENMASK(2, 2)) >> 2) #define DSA_8021Q_SUBVLAN_LO(x) ((x) & GENMASK(1, 0)) #define DSA_8021Q_SUBVLAN(x) \ diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index bc2f6ca97152..816d8aad5a68 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -886,7 +886,7 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d /* - * Copy BOOTP-supplied string if not already set. + * Copy BOOTP-supplied string */ static int __init ic_bootp_string(char *dest, char *src, int len, int max) { @@ -935,12 +935,15 @@ static void __init ic_do_bootp_ext(u8 *ext) } break; case 12: /* Host name */ - ic_bootp_string(utsname()->nodename, ext+1, *ext, - __NEW_UTS_LEN); - ic_host_name_set = 1; + if (!ic_host_name_set) { + ic_bootp_string(utsname()->nodename, ext+1, *ext, + __NEW_UTS_LEN); + ic_host_name_set = 1; + } break; case 15: /* Domain name (DNS) */ - ic_bootp_string(ic_domain, ext+1, *ext, sizeof(ic_domain)); + if (!ic_domain[0]) + ic_bootp_string(ic_domain, ext+1, *ext, sizeof(ic_domain)); break; case 17: /* Root path */ if (!root_server_path[0]) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index aa98294a3ad3..f7c8110ece5f 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -271,6 +271,9 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, if (ipip6_tunnel_create(dev) < 0) goto failed_free; + if (!parms->name[0]) + strcpy(parms->name, dev->name); + return nt; failed_free: diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 6201965bd822..1c572c8daced 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1066,6 +1066,11 @@ out_error: goto partial_message; } + if (skb_has_frag_list(head)) { + kfree_skb_list(skb_shinfo(head)->frag_list); + skb_shinfo(head)->frag_list = NULL; + } + if (head != kcm->seq_skb) kfree_skb(head); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 2bc199549a88..5edc686faff1 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -947,6 +947,10 @@ static void __mptcp_update_wmem(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); +#ifdef CONFIG_LOCKDEP + WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock)); +#endif + if (!msk->wmem_reserved) return; @@ -1085,10 +1089,20 @@ out: static void __mptcp_clean_una_wakeup(struct sock *sk) { +#ifdef CONFIG_LOCKDEP + WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock)); +#endif __mptcp_clean_una(sk); mptcp_write_space(sk); } +static void mptcp_clean_una_wakeup(struct sock *sk) +{ + mptcp_data_lock(sk); + __mptcp_clean_una_wakeup(sk); + mptcp_data_unlock(sk); +} + static void mptcp_enter_memory_pressure(struct sock *sk) { struct mptcp_subflow_context *subflow; @@ -2299,7 +2313,7 @@ static void __mptcp_retrans(struct sock *sk) struct sock *ssk; int ret; - __mptcp_clean_una_wakeup(sk); + mptcp_clean_una_wakeup(sk); dfrag = mptcp_rtx_head(sk); if (!dfrag) { if (mptcp_data_fin_enabled(msk)) { diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index bde6be77ea73..ef3d037f984a 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -630,21 +630,20 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, /* if the sk is MP_CAPABLE, we try to fetch the client key */ if (subflow_req->mp_capable) { - if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) { - /* here we can receive and accept an in-window, - * out-of-order pkt, which will not carry the MP_CAPABLE - * opt even on mptcp enabled paths - */ - goto create_msk; - } - + /* we can receive and accept an in-window, out-of-order pkt, + * which may not carry the MP_CAPABLE opt even on mptcp enabled + * paths: always try to extract the peer key, and fallback + * for packets missing it. + * Even OoO DSS packets coming legitly after dropped or + * reordered MPC will cause fallback, but we don't have other + * options. + */ mptcp_get_options(skb, &mp_opt); if (!mp_opt.mp_capable) { fallback = true; goto create_child; } -create_msk: new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req); if (!new_msk) fallback = true; @@ -1012,21 +1011,11 @@ static bool subflow_check_data_avail(struct sock *ssk) status = get_mapping_status(ssk, msk); trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue)); - if (status == MAPPING_INVALID) { - ssk->sk_err = EBADMSG; - goto fatal; - } - if (status == MAPPING_DUMMY) { - __mptcp_do_fallback(msk); - skb = skb_peek(&ssk->sk_receive_queue); - subflow->map_valid = 1; - subflow->map_seq = READ_ONCE(msk->ack_seq); - subflow->map_data_len = skb->len; - subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - - subflow->ssn_offset; - subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; - return true; - } + if (unlikely(status == MAPPING_INVALID)) + goto fallback; + + if (unlikely(status == MAPPING_DUMMY)) + goto fallback; if (status != MAPPING_OK) goto no_data; @@ -1039,10 +1028,8 @@ static bool subflow_check_data_avail(struct sock *ssk) * MP_CAPABLE-based mapping */ if (unlikely(!READ_ONCE(msk->can_ack))) { - if (!subflow->mpc_map) { - ssk->sk_err = EBADMSG; - goto fatal; - } + if (!subflow->mpc_map) + goto fallback; WRITE_ONCE(msk->remote_key, subflow->remote_key); WRITE_ONCE(msk->ack_seq, subflow->map_seq); WRITE_ONCE(msk->can_ack, true); @@ -1070,17 +1057,31 @@ static bool subflow_check_data_avail(struct sock *ssk) no_data: subflow_sched_work_if_closed(msk, ssk); return false; -fatal: - /* fatal protocol error, close the socket */ - /* This barrier is coupled with smp_rmb() in tcp_poll() */ - smp_wmb(); - ssk->sk_error_report(ssk); - tcp_set_state(ssk, TCP_CLOSE); - subflow->reset_transient = 0; - subflow->reset_reason = MPTCP_RST_EMPTCP; - tcp_send_active_reset(ssk, GFP_ATOMIC); - subflow->data_avail = 0; - return false; + +fallback: + /* RFC 8684 section 3.7. */ + if (subflow->mp_join || subflow->fully_established) { + /* fatal protocol error, close the socket. + * subflow_error_report() will introduce the appropriate barriers + */ + ssk->sk_err = EBADMSG; + ssk->sk_error_report(ssk); + tcp_set_state(ssk, TCP_CLOSE); + subflow->reset_transient = 0; + subflow->reset_reason = MPTCP_RST_EMPTCP; + tcp_send_active_reset(ssk, GFP_ATOMIC); + subflow->data_avail = 0; + return false; + } + + __mptcp_do_fallback(msk); + skb = skb_peek(&ssk->sk_receive_queue); + subflow->map_valid = 1; + subflow->map_seq = READ_ONCE(msk->ack_seq); + subflow->map_data_len = skb->len; + subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset; + subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; + return true; } bool mptcp_subflow_data_available(struct sock *sk) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index d45dbcba8b49..c25097092a06 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1367,7 +1367,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, ip_vs_addr_copy(svc->af, &svc->addr, &u->addr); svc->port = u->port; svc->fwmark = u->fwmark; - svc->flags = u->flags; + svc->flags = u->flags & ~IP_VS_SVC_F_HASHED; svc->timeout = u->timeout * HZ; svc->netmask = u->netmask; svc->ipvs = ipvs; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 89e5bac384d7..dc9ca12b0489 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -664,7 +664,7 @@ int nf_conntrack_proto_init(void) #if IS_ENABLED(CONFIG_IPV6) cleanup_sockopt: - nf_unregister_sockopt(&so_getorigdst6); + nf_unregister_sockopt(&so_getorigdst); #endif return ret; } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d63d2d8f769c..72bc759179ef 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -736,7 +736,8 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, goto nla_put_failure; if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) || - nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) || + nla_put_be32(skb, NFTA_TABLE_FLAGS, + htonl(table->flags & NFT_TABLE_F_MASK)) || nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) || nla_put_be64(skb, NFTA_TABLE_HANDLE, cpu_to_be64(table->handle), NFTA_TABLE_PAD)) @@ -947,20 +948,22 @@ err_register_hooks: static void nf_tables_table_disable(struct net *net, struct nft_table *table) { + table->flags &= ~NFT_TABLE_F_DORMANT; nft_table_disable(net, table, 0); + table->flags |= NFT_TABLE_F_DORMANT; } -enum { - NFT_TABLE_STATE_UNCHANGED = 0, - NFT_TABLE_STATE_DORMANT, - NFT_TABLE_STATE_WAKEUP -}; +#define __NFT_TABLE_F_INTERNAL (NFT_TABLE_F_MASK + 1) +#define __NFT_TABLE_F_WAS_DORMANT (__NFT_TABLE_F_INTERNAL << 0) +#define __NFT_TABLE_F_WAS_AWAKEN (__NFT_TABLE_F_INTERNAL << 1) +#define __NFT_TABLE_F_UPDATE (__NFT_TABLE_F_WAS_DORMANT | \ + __NFT_TABLE_F_WAS_AWAKEN) static int nf_tables_updtable(struct nft_ctx *ctx) { struct nft_trans *trans; u32 flags; - int ret = 0; + int ret; if (!ctx->nla[NFTA_TABLE_FLAGS]) return 0; @@ -985,21 +988,27 @@ static int nf_tables_updtable(struct nft_ctx *ctx) if ((flags & NFT_TABLE_F_DORMANT) && !(ctx->table->flags & NFT_TABLE_F_DORMANT)) { - nft_trans_table_state(trans) = NFT_TABLE_STATE_DORMANT; + ctx->table->flags |= NFT_TABLE_F_DORMANT; + if (!(ctx->table->flags & __NFT_TABLE_F_UPDATE)) + ctx->table->flags |= __NFT_TABLE_F_WAS_AWAKEN; } else if (!(flags & NFT_TABLE_F_DORMANT) && ctx->table->flags & NFT_TABLE_F_DORMANT) { - ret = nf_tables_table_enable(ctx->net, ctx->table); - if (ret >= 0) - nft_trans_table_state(trans) = NFT_TABLE_STATE_WAKEUP; + ctx->table->flags &= ~NFT_TABLE_F_DORMANT; + if (!(ctx->table->flags & __NFT_TABLE_F_UPDATE)) { + ret = nf_tables_table_enable(ctx->net, ctx->table); + if (ret < 0) + goto err_register_hooks; + + ctx->table->flags |= __NFT_TABLE_F_WAS_DORMANT; + } } - if (ret < 0) - goto err; - nft_trans_table_flags(trans) = flags; nft_trans_table_update(trans) = true; nft_trans_commit_list_add_tail(ctx->net, trans); + return 0; -err: + +err_register_hooks: nft_trans_destroy(trans); return ret; } @@ -1905,7 +1914,7 @@ static int nft_chain_parse_netdev(struct net *net, static int nft_chain_parse_hook(struct net *net, const struct nlattr * const nla[], struct nft_chain_hook *hook, u8 family, - bool autoload) + struct netlink_ext_ack *extack, bool autoload) { struct nftables_pernet *nft_net = nft_pernet(net); struct nlattr *ha[NFTA_HOOK_MAX + 1]; @@ -1935,8 +1944,10 @@ static int nft_chain_parse_hook(struct net *net, if (nla[NFTA_CHAIN_TYPE]) { type = nf_tables_chain_type_lookup(net, nla[NFTA_CHAIN_TYPE], family, autoload); - if (IS_ERR(type)) + if (IS_ERR(type)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TYPE]); return PTR_ERR(type); + } } if (hook->num >= NFT_MAX_HOOKS || !(type->hook_mask & (1 << hook->num))) return -EOPNOTSUPP; @@ -1945,8 +1956,11 @@ static int nft_chain_parse_hook(struct net *net, hook->priority <= NF_IP_PRI_CONNTRACK) return -EOPNOTSUPP; - if (!try_module_get(type->owner)) + if (!try_module_get(type->owner)) { + if (nla[NFTA_CHAIN_TYPE]) + NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TYPE]); return -ENOENT; + } hook->type = type; @@ -2057,7 +2071,8 @@ static int nft_chain_add(struct nft_table *table, struct nft_chain *chain) static u64 chain_id; static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, - u8 policy, u32 flags) + u8 policy, u32 flags, + struct netlink_ext_ack *extack) { const struct nlattr * const *nla = ctx->nla; struct nft_table *table = ctx->table; @@ -2079,7 +2094,8 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (flags & NFT_CHAIN_BINDING) return -EOPNOTSUPP; - err = nft_chain_parse_hook(net, nla, &hook, family, true); + err = nft_chain_parse_hook(net, nla, &hook, family, extack, + true); if (err < 0) return err; @@ -2234,7 +2250,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, return -EEXIST; } err = nft_chain_parse_hook(ctx->net, nla, &hook, ctx->family, - false); + extack, false); if (err < 0) return err; @@ -2447,7 +2463,7 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info, extack); } - return nf_tables_addchain(&ctx, family, genmask, policy, flags); + return nf_tables_addchain(&ctx, family, genmask, policy, flags, extack); } static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info, @@ -3328,8 +3344,10 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, if (n == NFT_RULE_MAXEXPRS) goto err1; err = nf_tables_expr_parse(&ctx, tmp, &expr_info[n]); - if (err < 0) + if (err < 0) { + NL_SET_BAD_ATTR(extack, tmp); goto err1; + } size += expr_info[n].ops->size; n++; } @@ -8547,10 +8565,14 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { - if (nft_trans_table_state(trans) == NFT_TABLE_STATE_DORMANT) + if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) { + nft_trans_destroy(trans); + break; + } + if (trans->ctx.table->flags & NFT_TABLE_F_DORMANT) nf_tables_table_disable(net, trans->ctx.table); - trans->ctx.table->flags = nft_trans_table_flags(trans); + trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE; } else { nft_clear(net, trans->ctx.table); } @@ -8768,9 +8790,17 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { - if (nft_trans_table_state(trans) == NFT_TABLE_STATE_WAKEUP) + if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) { + nft_trans_destroy(trans); + break; + } + if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_DORMANT) { nf_tables_table_disable(net, trans->ctx.table); - + trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; + } else if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_AWAKEN) { + trans->ctx.table->flags &= ~NFT_TABLE_F_DORMANT; + } + trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE; nft_trans_destroy(trans); } else { list_del_rcu(&trans->ctx.table->list); diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 322ac5dd5402..752b10cae524 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -380,10 +380,14 @@ static int nfnl_cthelper_update(const struct nlattr * const tb[], struct nf_conntrack_helper *helper) { + u32 size; int ret; - if (tb[NFCTH_PRIV_DATA_LEN]) - return -EBUSY; + if (tb[NFCTH_PRIV_DATA_LEN]) { + size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN])); + if (size != helper->data_len) + return -EBUSY; + } if (tb[NFCTH_POLICY]) { ret = nfnl_cthelper_update_policy(helper, tb[NFCTH_POLICY]); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 0592a9456084..337e22d8b40b 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1217,7 +1217,7 @@ static void nft_ct_expect_obj_eval(struct nft_object *obj, struct nf_conn *ct; ct = nf_ct_get(pkt->skb, &ctinfo); - if (!ct || ctinfo == IP_CT_UNTRACKED) { + if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) { regs->verdict.code = NFT_BREAK; return; } diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 53dbe733f998..6cfd30fc0798 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -110,6 +110,7 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) if (!llcp_sock->service_name) { nfc_llcp_local_put(llcp_sock->local); llcp_sock->local = NULL; + llcp_sock->dev = NULL; ret = -ENOMEM; goto put_dev; } @@ -119,6 +120,7 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) llcp_sock->local = NULL; kfree(llcp_sock->service_name); llcp_sock->service_name = NULL; + llcp_sock->dev = NULL; ret = -EADDRINUSE; goto put_dev; } diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index ec7a1c438df9..18edd9ad1410 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -984,7 +984,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, */ cached = tcf_ct_skb_nfct_cached(net, skb, p->zone, force); if (!cached) { - if (!commit && tcf_ct_flow_table_lookup(p, skb, family)) { + if (tcf_ct_flow_table_lookup(p, skb, family)) { skip_add = true; goto do_nat; } @@ -1022,10 +1022,11 @@ do_nat: * even if the connection is already confirmed. */ nf_conntrack_confirm(skb); - } else if (!skip_add) { - tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo); } + if (!skip_add) + tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo); + out_push: skb_push_rcsum(skb, nh_ofs); @@ -1202,9 +1203,6 @@ static int tcf_ct_fill_params(struct net *net, sizeof(p->zone)); } - if (p->zone == NF_CT_DEFAULT_ZONE_ID) - return 0; - nf_ct_zone_init(&zone, p->zone, NF_CT_DEFAULT_ZONE_DIR, 0); tmpl = nf_ct_tmpl_alloc(net, &zone, GFP_KERNEL); if (!tmpl) { diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 76a6f8c2eec4..bd9f1567aa39 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -50,6 +50,7 @@ static void tls_device_gc_task(struct work_struct *work); static DECLARE_WORK(tls_device_gc_work, tls_device_gc_task); static LIST_HEAD(tls_device_gc_list); static LIST_HEAD(tls_device_list); +static LIST_HEAD(tls_device_down_list); static DEFINE_SPINLOCK(tls_device_lock); static void tls_device_free_ctx(struct tls_context *ctx) @@ -680,15 +681,13 @@ static void tls_device_resync_rx(struct tls_context *tls_ctx, struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx); struct net_device *netdev; - if (WARN_ON(test_and_set_bit(TLS_RX_SYNC_RUNNING, &tls_ctx->flags))) - return; - trace_tls_device_rx_resync_send(sk, seq, rcd_sn, rx_ctx->resync_type); + rcu_read_lock(); netdev = READ_ONCE(tls_ctx->netdev); if (netdev) netdev->tlsdev_ops->tls_dev_resync(netdev, sk, seq, rcd_sn, TLS_OFFLOAD_CTX_DIR_RX); - clear_bit_unlock(TLS_RX_SYNC_RUNNING, &tls_ctx->flags); + rcu_read_unlock(); TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXDEVICERESYNC); } @@ -761,6 +760,8 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) if (tls_ctx->rx_conf != TLS_HW) return; + if (unlikely(test_bit(TLS_RX_DEV_DEGRADED, &tls_ctx->flags))) + return; prot = &tls_ctx->prot_info; rx_ctx = tls_offload_ctx_rx(tls_ctx); @@ -963,6 +964,17 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx, ctx->sw.decrypted |= is_decrypted; + if (unlikely(test_bit(TLS_RX_DEV_DEGRADED, &tls_ctx->flags))) { + if (likely(is_encrypted || is_decrypted)) + return 0; + + /* After tls_device_down disables the offload, the next SKB will + * likely have initial fragments decrypted, and final ones not + * decrypted. We need to reencrypt that single SKB. + */ + return tls_device_reencrypt(sk, skb); + } + /* Return immediately if the record is either entirely plaintext or * entirely ciphertext. Otherwise handle reencrypt partially decrypted * record. @@ -1292,6 +1304,26 @@ static int tls_device_down(struct net_device *netdev) spin_unlock_irqrestore(&tls_device_lock, flags); list_for_each_entry_safe(ctx, tmp, &list, list) { + /* Stop offloaded TX and switch to the fallback. + * tls_is_sk_tx_device_offloaded will return false. + */ + WRITE_ONCE(ctx->sk->sk_validate_xmit_skb, tls_validate_xmit_skb_sw); + + /* Stop the RX and TX resync. + * tls_dev_resync must not be called after tls_dev_del. + */ + WRITE_ONCE(ctx->netdev, NULL); + + /* Start skipping the RX resync logic completely. */ + set_bit(TLS_RX_DEV_DEGRADED, &ctx->flags); + + /* Sync with inflight packets. After this point: + * TX: no non-encrypted packets will be passed to the driver. + * RX: resync requests from the driver will be ignored. + */ + synchronize_net(); + + /* Release the offload context on the driver side. */ if (ctx->tx_conf == TLS_HW) netdev->tlsdev_ops->tls_dev_del(netdev, ctx, TLS_OFFLOAD_CTX_DIR_TX); @@ -1299,15 +1331,21 @@ static int tls_device_down(struct net_device *netdev) !test_bit(TLS_RX_DEV_CLOSED, &ctx->flags)) netdev->tlsdev_ops->tls_dev_del(netdev, ctx, TLS_OFFLOAD_CTX_DIR_RX); - WRITE_ONCE(ctx->netdev, NULL); - smp_mb__before_atomic(); /* pairs with test_and_set_bit() */ - while (test_bit(TLS_RX_SYNC_RUNNING, &ctx->flags)) - usleep_range(10, 200); + dev_put(netdev); - list_del_init(&ctx->list); - if (refcount_dec_and_test(&ctx->refcount)) - tls_device_free_ctx(ctx); + /* Move the context to a separate list for two reasons: + * 1. When the context is deallocated, list_del is called. + * 2. It's no longer an offloaded context, so we don't want to + * run offload-specific code on this context. + */ + spin_lock_irqsave(&tls_device_lock, flags); + list_move_tail(&ctx->list, &tls_device_down_list); + spin_unlock_irqrestore(&tls_device_lock, flags); + + /* Device contexts for RX and TX will be freed in on sk_destruct + * by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW. + */ } up_write(&device_offload_lock); diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index cacf040872c7..e40bedd112b6 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -431,6 +431,13 @@ struct sk_buff *tls_validate_xmit_skb(struct sock *sk, } EXPORT_SYMBOL_GPL(tls_validate_xmit_skb); +struct sk_buff *tls_validate_xmit_skb_sw(struct sock *sk, + struct net_device *dev, + struct sk_buff *skb) +{ + return tls_sw_fallback(sk, skb); +} + struct sk_buff *tls_encrypt_skb(struct sk_buff *skb) { return tls_sw_fallback(skb->sk, skb); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 47b7c5334c34..fde56ff49163 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -636,6 +636,7 @@ struct tls_context *tls_ctx_create(struct sock *sk) mutex_init(&ctx->tx_lock); rcu_assign_pointer(icsk->icsk_ulp_data, ctx); ctx->sk_proto = READ_ONCE(sk->sk_prot); + ctx->sk = sk; return ctx; } diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 3c4cb72ed8a4..9ca5f1ba461e 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -501,6 +501,7 @@ do_transfer() local stat_ackrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") local stat_cookietx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent") local stat_cookierx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv") + local stat_ooo_now=$(get_mib_counter "${listener_ns}" "TcpExtTCPOFOQueue") expect_synrx=$((stat_synrx_last_l)) expect_ackrx=$((stat_ackrx_last_l)) @@ -518,10 +519,14 @@ do_transfer() "${stat_synrx_now_l}" "${expect_synrx}" 1>&2 retc=1 fi - if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ]; then - printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \ - "${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2 - rets=1 + if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} -a ${stat_ooo_now} -eq 0 ]; then + if [ ${stat_ooo_now} -eq 0 ]; then + printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \ + "${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2 + rets=1 + else + printf "[ Note ] fallback due to TCP OoO" + fi fi if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then |