aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/firewire/net.c14
-rw-r--r--drivers/media/dvb-core/dvb_net.c8
-rw-r--r--drivers/message/fusion/mptlan.c2
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c2
-rw-r--r--drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c97
-rw-r--r--drivers/net/phy/at803x.c2
-rw-r--r--drivers/net/thunderbolt.c8
-rw-r--r--drivers/staging/octeon/ethernet.c2
-rw-r--r--include/linux/bpf-cgroup.h1
-rw-r--r--include/net/sch_generic.h2
-rw-r--r--include/net/sock.h20
-rw-r--r--include/net/tcp.h2
-rw-r--r--net/core/xdp.c2
-rw-r--r--net/ipv4/af_inet.c18
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/tcp.c29
-rw-r--r--net/ipv4/tcp_output.c22
-rw-r--r--net/ipv6/af_inet6.c21
-rw-r--r--net/ipv6/route.c24
-rw-r--r--net/ipv6/tcp_ipv6.c1
-rw-r--r--net/mptcp/protocol.c241
-rw-r--r--net/mptcp/protocol.h15
-rw-r--r--net/sched/em_meta.c2
-rw-r--r--net/sched/sch_generic.c38
24 files changed, 259 insertions, 316 deletions
diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
index 4c3fd2eed1da..dcc141068128 100644
--- a/drivers/firewire/net.c
+++ b/drivers/firewire/net.c
@@ -1443,8 +1443,8 @@ static int fwnet_probe(struct fw_unit *unit,
struct net_device *net;
bool allocated_netdev = false;
struct fwnet_device *dev;
+ union fwnet_hwaddr ha;
int ret;
- union fwnet_hwaddr *ha;
mutex_lock(&fwnet_device_mutex);
@@ -1491,12 +1491,12 @@ static int fwnet_probe(struct fw_unit *unit,
net->max_mtu = 4096U;
/* Set our hardware address while we're at it */
- ha = (union fwnet_hwaddr *)net->dev_addr;
- put_unaligned_be64(card->guid, &ha->uc.uniq_id);
- ha->uc.max_rec = dev->card->max_receive;
- ha->uc.sspd = dev->card->link_speed;
- put_unaligned_be16(dev->local_fifo >> 32, &ha->uc.fifo_hi);
- put_unaligned_be32(dev->local_fifo & 0xffffffff, &ha->uc.fifo_lo);
+ ha.uc.uniq_id = cpu_to_be64(card->guid);
+ ha.uc.max_rec = dev->card->max_receive;
+ ha.uc.sspd = dev->card->link_speed;
+ ha.uc.fifo_hi = cpu_to_be16(dev->local_fifo >> 32);
+ ha.uc.fifo_lo = cpu_to_be32(dev->local_fifo & 0xffffffff);
+ dev_addr_set(net, ha.u);
memset(net->broadcast, -1, net->addr_len);
diff --git a/drivers/media/dvb-core/dvb_net.c b/drivers/media/dvb-core/dvb_net.c
index dddebea644bb..8a2febf33ce2 100644
--- a/drivers/media/dvb-core/dvb_net.c
+++ b/drivers/media/dvb-core/dvb_net.c
@@ -1008,7 +1008,7 @@ static u8 mask_promisc[6]={0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
static int dvb_net_filter_sec_set(struct net_device *dev,
struct dmx_section_filter **secfilter,
- u8 *mac, u8 *mac_mask)
+ const u8 *mac, u8 *mac_mask)
{
struct dvb_net_priv *priv = netdev_priv(dev);
int ret;
@@ -1052,7 +1052,7 @@ static int dvb_net_feed_start(struct net_device *dev)
int ret = 0, i;
struct dvb_net_priv *priv = netdev_priv(dev);
struct dmx_demux *demux = priv->demux;
- unsigned char *mac = (unsigned char *) dev->dev_addr;
+ const unsigned char *mac = (const unsigned char *) dev->dev_addr;
netdev_dbg(dev, "rx_mode %i\n", priv->rx_mode);
mutex_lock(&priv->mutex);
@@ -1272,7 +1272,7 @@ static int dvb_net_set_mac (struct net_device *dev, void *p)
struct dvb_net_priv *priv = netdev_priv(dev);
struct sockaddr *addr=p;
- memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+ eth_hw_addr_set(dev, addr->sa_data);
if (netif_running(dev))
schedule_work(&priv->restart_net_feed_wq);
@@ -1367,7 +1367,7 @@ static int dvb_net_add_if(struct dvb_net *dvbnet, u16 pid, u8 feedtype)
dvbnet->dvbdev->adapter->num, if_num);
net->addr_len = 6;
- memcpy(net->dev_addr, dvbnet->dvbdev->adapter->proposed_mac, 6);
+ eth_hw_addr_set(net, dvbnet->dvbdev->adapter->proposed_mac);
dvbnet->device[if_num] = net;
diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c
index 3261cac762de..acdc257a900e 100644
--- a/drivers/message/fusion/mptlan.c
+++ b/drivers/message/fusion/mptlan.c
@@ -1350,7 +1350,7 @@ mpt_register_lan_device (MPT_ADAPTER *mpt_dev, int pnum)
HWaddr[5] = a[0];
dev->addr_len = FC_ALEN;
- memcpy(dev->dev_addr, HWaddr, FC_ALEN);
+ dev_addr_set(dev, HWaddr);
memset(dev->broadcast, 0xff, FC_ALEN);
/* The Tx queue is 127 deep on the 909.
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 309371abfe23..ffce528aa00e 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -901,7 +901,7 @@ static int macb_mdiobus_register(struct macb *bp)
* directly under the MAC node
*/
child = of_get_child_by_name(np, "mdio");
- if (np) {
+ if (child) {
int ret = of_mdiobus_register(bp->mii_bus, child);
of_node_put(child);
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 8ddf58f379ac..587def69a6f7 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -6261,32 +6261,13 @@ static void mvpp2_phylink_validate(struct phylink_config *config,
struct mvpp2_port *port = mvpp2_phylink_to_port(config);
__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
- /* Invalid combinations */
- switch (state->interface) {
- case PHY_INTERFACE_MODE_10GBASER:
- case PHY_INTERFACE_MODE_XAUI:
- if (!mvpp2_port_supports_xlg(port))
- goto empty_set;
- break;
- case PHY_INTERFACE_MODE_RGMII:
- case PHY_INTERFACE_MODE_RGMII_ID:
- case PHY_INTERFACE_MODE_RGMII_RXID:
- case PHY_INTERFACE_MODE_RGMII_TXID:
- if (!mvpp2_port_supports_rgmii(port))
- goto empty_set;
- break;
- case PHY_INTERFACE_MODE_1000BASEX:
- case PHY_INTERFACE_MODE_2500BASEX:
- /* When in 802.3z mode, we must have AN enabled:
- * Bit 2 Field InBandAnEn In-band Auto-Negotiation enable. ...
- * When <PortType> = 1 (1000BASE-X) this field must be set to 1.
- */
- if (!phylink_test(state->advertising, Autoneg))
- goto empty_set;
- break;
- default:
- break;
- }
+ /* When in 802.3z mode, we must have AN enabled:
+ * Bit 2 Field InBandAnEn In-band Auto-Negotiation enable. ...
+ * When <PortType> = 1 (1000BASE-X) this field must be set to 1.
+ */
+ if (phy_interface_mode_is_8023z(state->interface) &&
+ !phylink_test(state->advertising, Autoneg))
+ goto empty_set;
phylink_set(mask, Autoneg);
phylink_set_port_modes(mask);
@@ -6299,14 +6280,12 @@ static void mvpp2_phylink_validate(struct phylink_config *config,
switch (state->interface) {
case PHY_INTERFACE_MODE_10GBASER:
case PHY_INTERFACE_MODE_XAUI:
- case PHY_INTERFACE_MODE_NA:
if (mvpp2_port_supports_xlg(port)) {
phylink_set_10g_modes(mask);
phylink_set(mask, 10000baseKR_Full);
}
- if (state->interface != PHY_INTERFACE_MODE_NA)
- break;
- fallthrough;
+ break;
+
case PHY_INTERFACE_MODE_RGMII:
case PHY_INTERFACE_MODE_RGMII_ID:
case PHY_INTERFACE_MODE_RGMII_RXID:
@@ -6318,30 +6297,24 @@ static void mvpp2_phylink_validate(struct phylink_config *config,
phylink_set(mask, 100baseT_Full);
phylink_set(mask, 1000baseT_Full);
phylink_set(mask, 1000baseX_Full);
- if (state->interface != PHY_INTERFACE_MODE_NA)
- break;
- fallthrough;
+ break;
+
case PHY_INTERFACE_MODE_1000BASEX:
+ phylink_set(mask, 1000baseT_Full);
+ phylink_set(mask, 1000baseX_Full);
+ break;
+
case PHY_INTERFACE_MODE_2500BASEX:
- if (port->comphy ||
- state->interface != PHY_INTERFACE_MODE_2500BASEX) {
- phylink_set(mask, 1000baseT_Full);
- phylink_set(mask, 1000baseX_Full);
- }
- if (port->comphy ||
- state->interface == PHY_INTERFACE_MODE_2500BASEX) {
- phylink_set(mask, 2500baseT_Full);
- phylink_set(mask, 2500baseX_Full);
- }
+ phylink_set(mask, 2500baseT_Full);
+ phylink_set(mask, 2500baseX_Full);
break;
+
default:
goto empty_set;
}
linkmode_and(supported, supported, mask);
linkmode_and(state->advertising, state->advertising, mask);
-
- phylink_helper_basex_speed(state);
return;
empty_set:
@@ -6937,6 +6910,40 @@ static int mvpp2_port_probe(struct platform_device *pdev,
port->phylink_config.dev = &dev->dev;
port->phylink_config.type = PHYLINK_NETDEV;
+ if (mvpp2_port_supports_xlg(port)) {
+ __set_bit(PHY_INTERFACE_MODE_10GBASER,
+ port->phylink_config.supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_XAUI,
+ port->phylink_config.supported_interfaces);
+ }
+
+ if (mvpp2_port_supports_rgmii(port))
+ phy_interface_set_rgmii(port->phylink_config.supported_interfaces);
+
+ if (comphy) {
+ /* If a COMPHY is present, we can support any of the
+ * serdes modes and switch between them.
+ */
+ __set_bit(PHY_INTERFACE_MODE_SGMII,
+ port->phylink_config.supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_1000BASEX,
+ port->phylink_config.supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_2500BASEX,
+ port->phylink_config.supported_interfaces);
+ } else if (phy_mode == PHY_INTERFACE_MODE_2500BASEX) {
+ /* No COMPHY, with only 2500BASE-X mode supported */
+ __set_bit(PHY_INTERFACE_MODE_2500BASEX,
+ port->phylink_config.supported_interfaces);
+ } else if (phy_mode == PHY_INTERFACE_MODE_1000BASEX ||
+ phy_mode == PHY_INTERFACE_MODE_SGMII) {
+ /* No COMPHY, we can switch between 1000BASE-X and SGMII
+ */
+ __set_bit(PHY_INTERFACE_MODE_1000BASEX,
+ port->phylink_config.supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_SGMII,
+ port->phylink_config.supported_interfaces);
+ }
+
phylink = phylink_create(&port->phylink_config, port_fwnode,
phy_mode, &mvpp2_phylink_ops);
if (IS_ERR(phylink)) {
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index f1cbe1f6ddec..dae95d9a07e8 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -484,7 +484,7 @@ static int at803x_set_wol(struct phy_device *phydev,
static void at803x_get_wol(struct phy_device *phydev,
struct ethtool_wolinfo *wol)
{
- u32 value;
+ int value;
wol->supported = WAKE_MAGIC;
wol->wolopts = 0;
diff --git a/drivers/net/thunderbolt.c b/drivers/net/thunderbolt.c
index 9a6a8353e192..ff5d0e98a088 100644
--- a/drivers/net/thunderbolt.c
+++ b/drivers/net/thunderbolt.c
@@ -1202,17 +1202,19 @@ static void tbnet_generate_mac(struct net_device *dev)
{
const struct tbnet *net = netdev_priv(dev);
const struct tb_xdomain *xd = net->xd;
+ u8 addr[ETH_ALEN];
u8 phy_port;
u32 hash;
phy_port = tb_phy_port_from_link(TBNET_L0_PORT_NUM(xd->route));
/* Unicast and locally administered MAC */
- dev->dev_addr[0] = phy_port << 4 | 0x02;
+ addr[0] = phy_port << 4 | 0x02;
hash = jhash2((u32 *)xd->local_uuid, 4, 0);
- memcpy(dev->dev_addr + 1, &hash, sizeof(hash));
+ memcpy(addr + 1, &hash, sizeof(hash));
hash = jhash2((u32 *)xd->local_uuid, 4, hash);
- dev->dev_addr[5] = hash & 0xff;
+ addr[5] = hash & 0xff;
+ eth_hw_addr_set(dev, addr);
}
static int tbnet_probe(struct tb_service *svc, const struct tb_service_id *id)
diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c
index 5d24c1b6663b..d5785c0d06b0 100644
--- a/drivers/staging/octeon/ethernet.c
+++ b/drivers/staging/octeon/ethernet.c
@@ -409,7 +409,7 @@ int cvm_oct_common_init(struct net_device *dev)
struct octeon_ethernet *priv = netdev_priv(dev);
int ret;
- ret = of_get_mac_address(priv->of_node, dev->dev_addr);
+ ret = of_get_ethdev_address(priv->of_node, dev);
if (ret)
eth_hw_addr_random(dev);
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 2746fd804216..3536ab432b30 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -517,6 +517,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
#define cgroup_bpf_enabled(atype) (0)
#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) ({ 0; })
+#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) ({ 0; })
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index ada02c4a4f51..22179b2fda72 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -1302,7 +1302,7 @@ struct mini_Qdisc {
struct tcf_block *block;
struct gnet_stats_basic_sync __percpu *cpu_bstats;
struct gnet_stats_queue __percpu *cpu_qstats;
- struct rcu_head rcu;
+ unsigned long rcu_state;
};
static inline void mini_qdisc_bstats_cpu_update(struct mini_Qdisc *miniq,
diff --git a/include/net/sock.h b/include/net/sock.h
index ff4e62aa62e5..4f0280ad6c23 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1210,6 +1210,8 @@ struct proto {
unsigned int inuse_idx;
#endif
+ int (*forward_alloc_get)(const struct sock *sk);
+
bool (*stream_memory_free)(const struct sock *sk, int wake);
bool (*stream_memory_read)(const struct sock *sk);
/* Memory pressure */
@@ -1217,6 +1219,7 @@ struct proto {
void (*leave_memory_pressure)(struct sock *sk);
atomic_long_t *memory_allocated; /* Current allocated memory. */
struct percpu_counter *sockets_allocated; /* Current number of sockets. */
+
/*
* Pressure flag: try to collapse.
* Technical note: it is used by multiple contexts non atomically.
@@ -1294,6 +1297,14 @@ static inline void sk_refcnt_debug_release(const struct sock *sk)
INDIRECT_CALLABLE_DECLARE(bool tcp_stream_memory_free(const struct sock *sk, int wake));
+static inline int sk_forward_alloc_get(const struct sock *sk)
+{
+ if (!sk->sk_prot->forward_alloc_get)
+ return sk->sk_forward_alloc;
+
+ return sk->sk_prot->forward_alloc_get(sk);
+}
+
static inline bool __sk_stream_memory_free(const struct sock *sk, int wake)
{
if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf))
@@ -1573,6 +1584,11 @@ static inline void sk_mem_charge(struct sock *sk, int size)
sk->sk_forward_alloc -= size;
}
+/* the following macros control memory reclaiming in sk_mem_uncharge()
+ */
+#define SK_RECLAIM_THRESHOLD (1 << 21)
+#define SK_RECLAIM_CHUNK (1 << 20)
+
static inline void sk_mem_uncharge(struct sock *sk, int size)
{
int reclaimable;
@@ -1589,8 +1605,8 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
* If we reach 2 MBytes, reclaim 1 MBytes right now, there is
* no need to hold that much forward allocation anyway.
*/
- if (unlikely(reclaimable >= 1 << 21))
- __sk_mem_reclaim(sk, 1 << 20);
+ if (unlikely(reclaimable >= SK_RECLAIM_THRESHOLD))
+ __sk_mem_reclaim(sk, SK_RECLAIM_CHUNK);
}
static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 701587af6852..8e8c5922a7b0 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -311,7 +311,7 @@ void tcp_shutdown(struct sock *sk, int how);
int tcp_v4_early_demux(struct sk_buff *skb);
int tcp_v4_rcv(struct sk_buff *skb);
-void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb);
+void tcp_remove_empty_skb(struct sock *sk);
int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size);
diff --git a/net/core/xdp.c b/net/core/xdp.c
index cc92ccb38432..5ddc29f29bad 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -143,8 +143,6 @@ void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
if (xdp_rxq->reg_state == REG_STATE_UNUSED)
return;
- WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG");
-
xdp_rxq_info_unreg_mem_model(xdp_rxq);
xdp_rxq->reg_state = REG_STATE_UNREGISTERED;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 8eb428387bac..0189e3cd4a7d 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -150,7 +150,7 @@ void inet_sock_destruct(struct sock *sk)
WARN_ON(atomic_read(&sk->sk_rmem_alloc));
WARN_ON(refcount_read(&sk->sk_wmem_alloc));
WARN_ON(sk->sk_wmem_queued);
- WARN_ON(sk->sk_forward_alloc);
+ WARN_ON(sk_forward_alloc_get(sk));
kfree(rcu_dereference_protected(inet->inet_opt, 1));
dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1));
@@ -769,26 +769,28 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
DECLARE_SOCKADDR(struct sockaddr_in *, sin, uaddr);
sin->sin_family = AF_INET;
+ lock_sock(sk);
if (peer) {
if (!inet->inet_dport ||
(((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
- peer == 1))
+ peer == 1)) {
+ release_sock(sk);
return -ENOTCONN;
+ }
sin->sin_port = inet->inet_dport;
sin->sin_addr.s_addr = inet->inet_daddr;
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
- CGROUP_INET4_GETPEERNAME,
- NULL);
+ BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+ CGROUP_INET4_GETPEERNAME);
} else {
__be32 addr = inet->inet_rcv_saddr;
if (!addr)
addr = inet->inet_saddr;
sin->sin_port = inet->inet_sport;
sin->sin_addr.s_addr = addr;
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
- CGROUP_INET4_GETSOCKNAME,
- NULL);
+ BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+ CGROUP_INET4_GETSOCKNAME);
}
+ release_sock(sk);
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
return sizeof(*sin);
}
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ef7897226f08..c8fa6e7f7d12 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -271,7 +271,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
struct inet_diag_meminfo minfo = {
.idiag_rmem = sk_rmem_alloc_get(sk),
.idiag_wmem = READ_ONCE(sk->sk_wmem_queued),
- .idiag_fmem = sk->sk_forward_alloc,
+ .idiag_fmem = sk_forward_alloc_get(sk),
.idiag_tmem = sk_wmem_alloc_get(sk),
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d0b848ff5c0f..7a7b9aa8f19a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -658,10 +658,8 @@ void tcp_skb_entail(struct sock *sk, struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
- skb->csum = 0;
tcb->seq = tcb->end_seq = tp->write_seq;
tcb->tcp_flags = TCPHDR_ACK;
- tcb->sacked = 0;
__skb_header_release(skb);
tcp_add_write_queue_tail(sk, skb);
sk_wmem_queued_add(sk, skb->truesize);
@@ -876,11 +874,7 @@ struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
}
if (likely(mem_scheduled)) {
skb_reserve(skb, MAX_TCP_HEADER);
- /*
- * Make sure that we have exactly size bytes
- * available to the caller, no more, no less.
- */
- skb->reserved_tailroom = skb->end - skb->tail - size;
+ skb->ip_summed = CHECKSUM_PARTIAL;
INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
return skb;
}
@@ -933,8 +927,10 @@ int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
* importantly be able to generate EPOLLOUT for Edge Trigger epoll()
* users.
*/
-void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb)
+void tcp_remove_empty_skb(struct sock *sk)
{
+ struct sk_buff *skb = tcp_write_queue_tail(sk);
+
if (skb && TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
tcp_unlink_write_queue(skb, sk);
if (tcp_write_queue_empty(sk))
@@ -996,7 +992,6 @@ new_segment:
skb->truesize += copy;
sk_wmem_queued_add(sk, copy);
sk_mem_charge(sk, copy);
- skb->ip_summed = CHECKSUM_PARTIAL;
WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
TCP_SKB_CB(skb)->end_seq += copy;
tcp_skb_pcount_set(skb, 0);
@@ -1087,7 +1082,7 @@ out:
return copied;
do_error:
- tcp_remove_empty_skb(sk, tcp_write_queue_tail(sk));
+ tcp_remove_empty_skb(sk);
if (copied)
goto out;
out_err:
@@ -1292,7 +1287,6 @@ new_segment:
goto wait_for_space;
process_backlog++;
- skb->ip_summed = CHECKSUM_PARTIAL;
tcp_skb_entail(sk, skb);
copy = size_goal;
@@ -1309,14 +1303,7 @@ new_segment:
if (copy > msg_data_left(msg))
copy = msg_data_left(msg);
- /* Where to copy to? */
- if (skb_availroom(skb) > 0 && !zc) {
- /* We have some space in skb head. Superb! */
- copy = min_t(int, copy, skb_availroom(skb));
- err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy);
- if (err)
- goto do_fault;
- } else if (!zc) {
+ if (!zc) {
bool merge = true;
int i = skb_shinfo(skb)->nr_frags;
struct page_frag *pfrag = sk_page_frag(sk);
@@ -1415,9 +1402,7 @@ out_nopush:
return copied + copied_syn;
do_error:
- skb = tcp_write_queue_tail(sk);
-do_fault:
- tcp_remove_empty_skb(sk, skb);
+ tcp_remove_empty_skb(sk);
if (copied + copied_syn)
goto out;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index c0c55a8be8f7..6867e5db3e35 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -394,7 +394,6 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
skb->ip_summed = CHECKSUM_PARTIAL;
TCP_SKB_CB(skb)->tcp_flags = flags;
- TCP_SKB_CB(skb)->sacked = 0;
tcp_skb_pcount_set(skb, 1);
@@ -1590,8 +1589,6 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
skb_split(skb, buff, len);
- buff->ip_summed = CHECKSUM_PARTIAL;
-
buff->tstamp = skb->tstamp;
tcp_fragment_tstamp(skb, buff);
@@ -1676,7 +1673,6 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
delta_truesize = __pskb_trim_head(skb, len);
TCP_SKB_CB(skb)->seq += len;
- skb->ip_summed = CHECKSUM_PARTIAL;
if (delta_truesize) {
skb->truesize -= delta_truesize;
@@ -2142,12 +2138,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
TCP_SKB_CB(buff)->tcp_flags = flags;
- /* This packet was never sent out yet, so no SACK bits. */
- TCP_SKB_CB(buff)->sacked = 0;
-
tcp_skb_fragment_eor(skb, buff);
- buff->ip_summed = CHECKSUM_PARTIAL;
skb_split(skb, buff, len);
tcp_fragment_tstamp(skb, buff);
@@ -2401,9 +2393,6 @@ static int tcp_mtu_probe(struct sock *sk)
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK;
- TCP_SKB_CB(nskb)->sacked = 0;
- nskb->csum = 0;
- nskb->ip_summed = CHECKSUM_PARTIAL;
tcp_insert_write_queue_before(nskb, skb, sk);
tcp_highest_sack_replace(sk, skb, nskb);
@@ -3045,13 +3034,9 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
- if (next_skb_size) {
- if (next_skb_size <= skb_availroom(skb))
- skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
- next_skb_size);
- else if (!tcp_skb_shift(skb, next_skb, 1, next_skb_size))
- return false;
- }
+ if (next_skb_size && !tcp_skb_shift(skb, next_skb, 1, next_skb_size))
+ return false;
+
tcp_highest_sack_replace(sk, next_skb, skb);
/* Update sequence range on original skb. */
@@ -3757,7 +3742,6 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
syn_data = tcp_stream_alloc_skb(sk, space, sk->sk_allocation, false);
if (!syn_data)
goto fallback;
- syn_data->ip_summed = CHECKSUM_PARTIAL;
memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
if (space) {
int copied = copy_from_iter(skb_put(syn_data, space), space,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index b5878bb8e419..0c4da163535a 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -521,31 +521,32 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
sin->sin6_family = AF_INET6;
sin->sin6_flowinfo = 0;
sin->sin6_scope_id = 0;
+ lock_sock(sk);
if (peer) {
- if (!inet->inet_dport)
- return -ENOTCONN;
- if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
- peer == 1)
+ if (!inet->inet_dport ||
+ (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
+ peer == 1)) {
+ release_sock(sk);
return -ENOTCONN;
+ }
sin->sin6_port = inet->inet_dport;
sin->sin6_addr = sk->sk_v6_daddr;
if (np->sndflow)
sin->sin6_flowinfo = np->flow_label;
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
- CGROUP_INET6_GETPEERNAME,
- NULL);
+ BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+ CGROUP_INET6_GETPEERNAME);
} else {
if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
sin->sin6_addr = np->saddr;
else
sin->sin6_addr = sk->sk_v6_rcv_saddr;
sin->sin6_port = inet->inet_sport;
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
- CGROUP_INET6_GETSOCKNAME,
- NULL);
+ BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+ CGROUP_INET6_GETSOCKNAME);
}
sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
sk->sk_bound_dev_if);
+ release_sock(sk);
return sizeof(*sin);
}
EXPORT_SYMBOL(inet6_getname);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9b9ef09382ab..3ae25b8ffbd6 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -6306,11 +6306,11 @@ static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
static struct ctl_table ipv6_route_table_template[] = {
{
- .procname = "flush",
- .data = &init_net.ipv6.sysctl.flush_delay,
+ .procname = "max_size",
+ .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
.maxlen = sizeof(int),
- .mode = 0200,
- .proc_handler = ipv6_sysctl_rtcache_flush
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
},
{
.procname = "gc_thresh",
@@ -6320,11 +6320,11 @@ static struct ctl_table ipv6_route_table_template[] = {
.proc_handler = proc_dointvec,
},
{
- .procname = "max_size",
- .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
+ .procname = "flush",
+ .data = &init_net.ipv6.sysctl.flush_delay,
.maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
+ .mode = 0200,
+ .proc_handler = ipv6_sysctl_rtcache_flush
},
{
.procname = "gc_min_interval",
@@ -6396,10 +6396,10 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
GFP_KERNEL);
if (table) {
- table[0].data = &net->ipv6.sysctl.flush_delay;
- table[0].extra1 = net;
+ table[0].data = &net->ipv6.sysctl.ip6_rt_max_size;
table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
- table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
+ table[2].data = &net->ipv6.sysctl.flush_delay;
+ table[2].extra1 = net;
table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
@@ -6411,7 +6411,7 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
/* Don't export sysctls to unprivileged users */
if (net->user_ns != &init_user_ns)
- table[0].procname = NULL;
+ table[1].procname = NULL;
}
return table;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c678e778c1fb..2cc9b0e53ad1 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -969,7 +969,6 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
fl6.flowlabel = label;
buff->ip_summed = CHECKSUM_PARTIAL;
- buff->csum = 0;
__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index cd6b11c9b54d..b7e32e316738 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -126,6 +126,11 @@ static void mptcp_drop(struct sock *sk, struct sk_buff *skb)
__kfree_skb(skb);
}
+static void mptcp_rmem_charge(struct sock *sk, int size)
+{
+ mptcp_sk(sk)->rmem_fwd_alloc -= size;
+}
+
static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
struct sk_buff *from)
{
@@ -142,7 +147,7 @@ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
MPTCP_SKB_CB(to)->end_seq = MPTCP_SKB_CB(from)->end_seq;
kfree_skb_partial(from, fragstolen);
atomic_add(delta, &sk->sk_rmem_alloc);
- sk_mem_charge(sk, delta);
+ mptcp_rmem_charge(sk, delta);
return true;
}
@@ -155,6 +160,44 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to,
return mptcp_try_coalesce((struct sock *)msk, to, from);
}
+static void __mptcp_rmem_reclaim(struct sock *sk, int amount)
+{
+ amount >>= SK_MEM_QUANTUM_SHIFT;
+ mptcp_sk(sk)->rmem_fwd_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
+ __sk_mem_reduce_allocated(sk, amount);
+}
+
+static void mptcp_rmem_uncharge(struct sock *sk, int size)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ int reclaimable;
+
+ msk->rmem_fwd_alloc += size;
+ reclaimable = msk->rmem_fwd_alloc - sk_unused_reserved_mem(sk);
+
+ /* see sk_mem_uncharge() for the rationale behind the following schema */
+ if (unlikely(reclaimable >= SK_RECLAIM_THRESHOLD))
+ __mptcp_rmem_reclaim(sk, SK_RECLAIM_CHUNK);
+}
+
+static void mptcp_rfree(struct sk_buff *skb)
+{
+ unsigned int len = skb->truesize;
+ struct sock *sk = skb->sk;
+
+ atomic_sub(len, &sk->sk_rmem_alloc);
+ mptcp_rmem_uncharge(sk, len);
+}
+
+static void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk)
+{
+ skb_orphan(skb);
+ skb->sk = sk;
+ skb->destructor = mptcp_rfree;
+ atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+ mptcp_rmem_charge(sk, skb->truesize);
+}
+
/* "inspired" by tcp_data_queue_ofo(), main differences:
* - use mptcp seqs
* - don't cope with sacks
@@ -267,7 +310,29 @@ merge_right:
end:
skb_condense(skb);
- skb_set_owner_r(skb, sk);
+ mptcp_set_owner_r(skb, sk);
+}
+
+static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ int amt, amount;
+
+ if (size < msk->rmem_fwd_alloc)
+ return true;
+
+ amt = sk_mem_pages(size);
+ amount = amt << SK_MEM_QUANTUM_SHIFT;
+ msk->rmem_fwd_alloc += amount;
+ if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV)) {
+ if (ssk->sk_forward_alloc < amount) {
+ msk->rmem_fwd_alloc -= amount;
+ return false;
+ }
+
+ ssk->sk_forward_alloc -= amount;
+ }
+ return true;
}
static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
@@ -285,15 +350,8 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
skb_orphan(skb);
/* try to fetch required memory from subflow */
- if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
- int amount = sk_mem_pages(skb->truesize) << SK_MEM_QUANTUM_SHIFT;
-
- if (ssk->sk_forward_alloc < amount)
- goto drop;
-
- ssk->sk_forward_alloc -= amount;
- sk->sk_forward_alloc += amount;
- }
+ if (!mptcp_rmem_schedule(sk, ssk, skb->truesize))
+ goto drop;
has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;
@@ -313,7 +371,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
if (tail && mptcp_try_coalesce(sk, tail, skb))
return true;
- skb_set_owner_r(skb, sk);
+ mptcp_set_owner_r(skb, sk);
__skb_queue_tail(&sk->sk_receive_queue, skb);
return true;
} else if (after64(MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq)) {
@@ -908,122 +966,20 @@ static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk,
df->data_seq + df->data_len == msk->write_seq;
}
-static int mptcp_wmem_with_overhead(int size)
-{
- return size + ((sizeof(struct mptcp_data_frag) * size) >> PAGE_SHIFT);
-}
-
-static void __mptcp_wmem_reserve(struct sock *sk, int size)
-{
- int amount = mptcp_wmem_with_overhead(size);
- struct mptcp_sock *msk = mptcp_sk(sk);
-
- WARN_ON_ONCE(msk->wmem_reserved);
- if (WARN_ON_ONCE(amount < 0))
- amount = 0;
-
- if (amount <= sk->sk_forward_alloc)
- goto reserve;
-
- /* under memory pressure try to reserve at most a single page
- * otherwise try to reserve the full estimate and fallback
- * to a single page before entering the error path
- */
- if ((tcp_under_memory_pressure(sk) && amount > PAGE_SIZE) ||
- !sk_wmem_schedule(sk, amount)) {
- if (amount <= PAGE_SIZE)
- goto nomem;
-
- amount = PAGE_SIZE;
- if (!sk_wmem_schedule(sk, amount))
- goto nomem;
- }
-
-reserve:
- msk->wmem_reserved = amount;
- sk->sk_forward_alloc -= amount;
- return;
-
-nomem:
- /* we will wait for memory on next allocation */
- msk->wmem_reserved = -1;
-}
-
-static void __mptcp_update_wmem(struct sock *sk)
+static void __mptcp_mem_reclaim_partial(struct sock *sk)
{
- struct mptcp_sock *msk = mptcp_sk(sk);
+ int reclaimable = mptcp_sk(sk)->rmem_fwd_alloc - sk_unused_reserved_mem(sk);
lockdep_assert_held_once(&sk->sk_lock.slock);
- if (!msk->wmem_reserved)
- return;
-
- if (msk->wmem_reserved < 0)
- msk->wmem_reserved = 0;
- if (msk->wmem_reserved > 0) {
- sk->sk_forward_alloc += msk->wmem_reserved;
- msk->wmem_reserved = 0;
- }
-}
-
-static bool mptcp_wmem_alloc(struct sock *sk, int size)
-{
- struct mptcp_sock *msk = mptcp_sk(sk);
-
- /* check for pre-existing error condition */
- if (msk->wmem_reserved < 0)
- return false;
-
- if (msk->wmem_reserved >= size)
- goto account;
-
- mptcp_data_lock(sk);
- if (!sk_wmem_schedule(sk, size)) {
- mptcp_data_unlock(sk);
- return false;
- }
-
- sk->sk_forward_alloc -= size;
- msk->wmem_reserved += size;
- mptcp_data_unlock(sk);
-
-account:
- msk->wmem_reserved -= size;
- return true;
-}
-
-static void mptcp_wmem_uncharge(struct sock *sk, int size)
-{
- struct mptcp_sock *msk = mptcp_sk(sk);
-
- if (msk->wmem_reserved < 0)
- msk->wmem_reserved = 0;
- msk->wmem_reserved += size;
-}
-
-static void __mptcp_mem_reclaim_partial(struct sock *sk)
-{
- lockdep_assert_held_once(&sk->sk_lock.slock);
- __mptcp_update_wmem(sk);
+ __mptcp_rmem_reclaim(sk, reclaimable - 1);
sk_mem_reclaim_partial(sk);
}
static void mptcp_mem_reclaim_partial(struct sock *sk)
{
- struct mptcp_sock *msk = mptcp_sk(sk);
-
- /* if we are experiencing a transint allocation error,
- * the forward allocation memory has been already
- * released
- */
- if (msk->wmem_reserved < 0)
- return;
-
mptcp_data_lock(sk);
- sk->sk_forward_alloc += msk->wmem_reserved;
- sk_mem_reclaim_partial(sk);
- msk->wmem_reserved = sk->sk_forward_alloc;
- sk->sk_forward_alloc = 0;
+ __mptcp_mem_reclaim_partial(sk);
mptcp_data_unlock(sk);
}
@@ -1218,7 +1174,7 @@ static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp)
if (likely(skb)) {
if (likely(__mptcp_add_ext(skb, gfp))) {
skb_reserve(skb, MAX_TCP_HEADER);
- skb->reserved_tailroom = skb->end - skb->tail;
+ skb->ip_summed = CHECKSUM_PARTIAL;
INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
return skb;
}
@@ -1335,7 +1291,7 @@ alloc_skb:
u64 snd_una = READ_ONCE(msk->snd_una);
if (snd_una != msk->snd_nxt) {
- tcp_remove_empty_skb(ssk, tcp_write_queue_tail(ssk));
+ tcp_remove_empty_skb(ssk);
return 0;
}
@@ -1351,7 +1307,7 @@ alloc_skb:
copy = min_t(size_t, copy, info->limit - info->sent);
if (!sk_wmem_schedule(ssk, copy)) {
- tcp_remove_empty_skb(ssk, tcp_write_queue_tail(ssk));
+ tcp_remove_empty_skb(ssk);
return -ENOMEM;
}
@@ -1367,7 +1323,6 @@ alloc_skb:
skb->truesize += copy;
sk_wmem_queued_add(ssk, copy);
sk_mem_charge(ssk, copy);
- skb->ip_summed = CHECKSUM_PARTIAL;
WRITE_ONCE(tcp_sk(ssk)->write_seq, tcp_sk(ssk)->write_seq + copy);
TCP_SKB_CB(skb)->end_seq += copy;
tcp_skb_pcount_set(skb, 0);
@@ -1513,8 +1468,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
return NULL;
}
-static void mptcp_push_release(struct sock *sk, struct sock *ssk,
- struct mptcp_sendmsg_info *info)
+static void mptcp_push_release(struct sock *ssk, struct mptcp_sendmsg_info *info)
{
tcp_push(ssk, 0, info->mss_now, tcp_sk(ssk)->nonagle, info->size_goal);
release_sock(ssk);
@@ -1577,7 +1531,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
* the last round, release prev_ssk
*/
if (ssk != prev_ssk && prev_ssk)
- mptcp_push_release(sk, prev_ssk, &info);
+ mptcp_push_release(prev_ssk, &info);
if (!ssk)
goto out;
@@ -1590,7 +1544,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
if (ret <= 0) {
- mptcp_push_release(sk, ssk, &info);
+ mptcp_push_release(ssk, &info);
goto out;
}
@@ -1605,7 +1559,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
/* at this point we held the socket lock for the last subflow we used */
if (ssk)
- mptcp_push_release(sk, ssk, &info);
+ mptcp_push_release(ssk, &info);
out:
/* ensure the rtx timer is running */
@@ -1664,7 +1618,6 @@ out:
/* __mptcp_alloc_tx_skb could have released some wmem and we are
* not going to flush it via release_sock()
*/
- __mptcp_update_wmem(sk);
if (copied) {
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
@@ -1701,7 +1654,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
/* silently ignore everything else */
msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL;
- mptcp_lock_sock(sk, __mptcp_wmem_reserve(sk, min_t(size_t, 1 << 20, len)));
+ lock_sock(sk);
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
@@ -1749,17 +1702,17 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
psize = min_t(size_t, psize, msg_data_left(msg));
total_ts = psize + frag_truesize;
- if (!mptcp_wmem_alloc(sk, total_ts))
+ if (!sk_wmem_schedule(sk, total_ts))
goto wait_for_memory;
if (copy_page_from_iter(dfrag->page, offset, psize,
&msg->msg_iter) != psize) {
- mptcp_wmem_uncharge(sk, psize + frag_truesize);
ret = -EFAULT;
goto out;
}
/* data successfully copied into the write queue */
+ sk->sk_forward_alloc -= total_ts;
copied += psize;
dfrag->data_len += psize;
frag_truesize += psize;
@@ -1956,7 +1909,7 @@ static void __mptcp_update_rmem(struct sock *sk)
return;
atomic_sub(msk->rmem_released, &sk->sk_rmem_alloc);
- sk_mem_uncharge(sk, msk->rmem_released);
+ mptcp_rmem_uncharge(sk, msk->rmem_released);
WRITE_ONCE(msk->rmem_released, 0);
}
@@ -2024,7 +1977,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (unlikely(flags & MSG_ERRQUEUE))
return inet_recv_error(sk, msg, len, addr_len);
- mptcp_lock_sock(sk, __mptcp_splice_receive_queue(sk));
+ lock_sock(sk);
if (unlikely(sk->sk_state == TCP_LISTEN)) {
copied = -ENOTCONN;
goto out_err;
@@ -2504,7 +2457,7 @@ static int __mptcp_init_sock(struct sock *sk)
__skb_queue_head_init(&msk->receive_queue);
msk->out_of_order_queue = RB_ROOT;
msk->first_pending = NULL;
- msk->wmem_reserved = 0;
+ msk->rmem_fwd_alloc = 0;
WRITE_ONCE(msk->rmem_released, 0);
msk->timer_ival = TCP_RTO_MIN;
@@ -2715,7 +2668,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
sk->sk_prot->destroy(sk);
- WARN_ON_ONCE(msk->wmem_reserved);
+ WARN_ON_ONCE(msk->rmem_fwd_alloc);
WARN_ON_ONCE(msk->rmem_released);
sk_stream_kill_queues(sk);
xfrm_sk_free_policy(sk);
@@ -2948,8 +2901,14 @@ void mptcp_destroy_common(struct mptcp_sock *msk)
/* move to sk_receive_queue, sk_stream_kill_queues will purge it */
skb_queue_splice_tail_init(&msk->receive_queue, &sk->sk_receive_queue);
-
+ __skb_queue_purge(&sk->sk_receive_queue);
skb_rbtree_purge(&msk->out_of_order_queue);
+
+ /* move all the rx fwd alloc into the sk_mem_reclaim_final in
+ * inet_sock_destruct() will dispose it
+ */
+ sk->sk_forward_alloc += msk->rmem_fwd_alloc;
+ msk->rmem_fwd_alloc = 0;
mptcp_token_destroy(msk);
mptcp_pm_free_anno_list(msk);
}
@@ -3031,10 +2990,6 @@ static void mptcp_release_cb(struct sock *sk)
if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags))
__mptcp_error_report(sk);
- /* push_pending may touch wmem_reserved, ensure we do the cleanup
- * later
- */
- __mptcp_update_wmem(sk);
__mptcp_update_rmem(sk);
}
@@ -3184,6 +3139,11 @@ static void mptcp_shutdown(struct sock *sk, int how)
__mptcp_wr_shutdown(sk);
}
+static int mptcp_forward_alloc_get(const struct sock *sk)
+{
+ return sk->sk_forward_alloc + mptcp_sk(sk)->rmem_fwd_alloc;
+}
+
static struct proto mptcp_prot = {
.name = "MPTCP",
.owner = THIS_MODULE,
@@ -3201,6 +3161,7 @@ static struct proto mptcp_prot = {
.hash = mptcp_hash,
.unhash = mptcp_unhash,
.get_port = mptcp_get_port,
+ .forward_alloc_get = mptcp_forward_alloc_get,
.sockets_allocated = &mptcp_sockets_allocated,
.memory_allocated = &tcp_memory_allocated,
.memory_pressure = &tcp_memory_pressure,
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 284fdcec067e..67a61ac48b20 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -227,7 +227,7 @@ struct mptcp_sock {
u64 ack_seq;
u64 rcv_wnd_sent;
u64 rcv_data_fin_seq;
- int wmem_reserved;
+ int rmem_fwd_alloc;
struct sock *last_snd;
int snd_burst;
int old_wspace;
@@ -272,19 +272,6 @@ struct mptcp_sock {
char ca_name[TCP_CA_NAME_MAX];
};
-#define mptcp_lock_sock(___sk, cb) do { \
- struct sock *__sk = (___sk); /* silence macro reuse warning */ \
- might_sleep(); \
- spin_lock_bh(&__sk->sk_lock.slock); \
- if (__sk->sk_lock.owned) \
- __lock_sock(__sk); \
- cb; \
- __sk->sk_lock.owned = 1; \
- spin_unlock(&__sk->sk_lock.slock); \
- mutex_acquire(&__sk->sk_lock.dep_map, 0, 0, _RET_IP_); \
- local_bh_enable(); \
-} while (0)
-
#define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock)
#define mptcp_data_unlock(sk) spin_unlock_bh(&(sk)->sk_lock.slock)
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 46254968d390..0a04468b7314 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -457,7 +457,7 @@ META_COLLECTOR(int_sk_fwd_alloc)
*err = -1;
return;
}
- dst->value = sk->sk_forward_alloc;
+ dst->value = sk_forward_alloc_get(sk);
}
META_COLLECTOR(int_sk_sndbuf)
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index b0ff0dff2773..3b0f62095803 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -1487,10 +1487,6 @@ void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64)
}
EXPORT_SYMBOL(psched_ppscfg_precompute);
-static void mini_qdisc_rcu_func(struct rcu_head *head)
-{
-}
-
void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
struct tcf_proto *tp_head)
{
@@ -1503,28 +1499,30 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
if (!tp_head) {
RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
- /* Wait for flying RCU callback before it is freed. */
- rcu_barrier();
- return;
- }
+ } else {
+ miniq = miniq_old != &miniqp->miniq1 ?
+ &miniqp->miniq1 : &miniqp->miniq2;
- miniq = !miniq_old || miniq_old == &miniqp->miniq2 ?
- &miniqp->miniq1 : &miniqp->miniq2;
+ /* We need to make sure that readers won't see the miniq
+ * we are about to modify. So ensure that at least one RCU
+ * grace period has elapsed since the miniq was made
+ * inactive.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ cond_synchronize_rcu(miniq->rcu_state);
+ else if (!poll_state_synchronize_rcu(miniq->rcu_state))
+ synchronize_rcu_expedited();
- /* We need to make sure that readers won't see the miniq
- * we are about to modify. So wait until previous call_rcu callback
- * is done.
- */
- rcu_barrier();
- miniq->filter_list = tp_head;
- rcu_assign_pointer(*miniqp->p_miniq, miniq);
+ miniq->filter_list = tp_head;
+ rcu_assign_pointer(*miniqp->p_miniq, miniq);
+ }
if (miniq_old)
- /* This is counterpart of the rcu barriers above. We need to
+ /* This is counterpart of the rcu sync above. We need to
* block potential new user of miniq_old until all readers
* are not seeing it.
*/
- call_rcu(&miniq_old->rcu, mini_qdisc_rcu_func);
+ miniq_old->rcu_state = start_poll_synchronize_rcu();
}
EXPORT_SYMBOL(mini_qdisc_pair_swap);
@@ -1543,6 +1541,8 @@ void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
miniqp->miniq1.cpu_qstats = qdisc->cpu_qstats;
miniqp->miniq2.cpu_bstats = qdisc->cpu_bstats;
miniqp->miniq2.cpu_qstats = qdisc->cpu_qstats;
+ miniqp->miniq1.rcu_state = get_state_synchronize_rcu();
+ miniqp->miniq2.rcu_state = miniqp->miniq1.rcu_state;
miniqp->p_miniq = p_miniq;
}
EXPORT_SYMBOL(mini_qdisc_pair_init);