diff options
41 files changed, 713 insertions, 434 deletions
diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt index 1d6d02d6ba52..a5c784c89312 100644 --- a/Documentation/networking/timestamping.txt +++ b/Documentation/networking/timestamping.txt @@ -122,7 +122,7 @@ SOF_TIMESTAMPING_RAW_HARDWARE: 1.3.3 Timestamp Options -The interface supports one option +The interface supports the options SOF_TIMESTAMPING_OPT_ID: @@ -130,19 +130,36 @@ SOF_TIMESTAMPING_OPT_ID: have multiple concurrent timestamping requests outstanding. Packets can be reordered in the transmit path, for instance in the packet scheduler. In that case timestamps will be queued onto the error - queue out of order from the original send() calls. This option - embeds a counter that is incremented at send() time, to order - timestamps within a flow. + queue out of order from the original send() calls. It is not always + possible to uniquely match timestamps to the original send() calls + based on timestamp order or payload inspection alone, then. + + This option associates each packet at send() with a unique + identifier and returns that along with the timestamp. The identifier + is derived from a per-socket u32 counter (that wraps). For datagram + sockets, the counter increments with each sent packet. For stream + sockets, it increments with every byte. + + The counter starts at zero. It is initialized the first time that + the socket option is enabled. It is reset each time the option is + enabled after having been disabled. Resetting the counter does not + change the identifiers of existing packets in the system. This option is implemented only for transmit timestamps. There, the timestamp is always looped along with a struct sock_extended_err. The option modifies field ee_data to pass an id that is unique among all possibly concurrently outstanding timestamp requests for - that socket. In practice, it is a monotonically increasing u32 - (that wraps). + that socket. + + +SOF_TIMESTAMPING_OPT_CMSG: - In datagram sockets, the counter increments on each send call. In - stream sockets, it increments with every byte. + Support recv() cmsg for all timestamped packets. Control messages + are already supported unconditionally on all packets with receive + timestamps and on IPv6 packets with transmit timestamp. This option + extends them to IPv4 packets with transmit timestamp. One use case + is to correlate packets with their egress device, by enabling socket + option IP_PKTINFO simultaneously. 1.4 Bytestream Timestamps diff --git a/Documentation/networking/timestamping/txtimestamp.c b/Documentation/networking/timestamping/txtimestamp.c index b32fc2a07734..876f71c5625a 100644 --- a/Documentation/networking/timestamping/txtimestamp.c +++ b/Documentation/networking/timestamping/txtimestamp.c @@ -46,6 +46,7 @@ #include <netpacket/packet.h> #include <poll.h> #include <stdarg.h> +#include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> @@ -58,6 +59,14 @@ #include <time.h> #include <unistd.h> +/* ugly hack to work around netinet/in.h and linux/ipv6.h conflicts */ +#ifndef in6_pktinfo +struct in6_pktinfo { + struct in6_addr ipi6_addr; + int ipi6_ifindex; +}; +#endif + /* command line parameters */ static int cfg_proto = SOCK_STREAM; static int cfg_ipproto = IPPROTO_TCP; @@ -65,6 +74,8 @@ static int cfg_num_pkts = 4; static int do_ipv4 = 1; static int do_ipv6 = 1; static int cfg_payload_len = 10; +static bool cfg_show_payload; +static bool cfg_do_pktinfo; static uint16_t dest_port = 9000; static struct sockaddr_in daddr; @@ -131,6 +142,30 @@ static void print_timestamp(struct scm_timestamping *tss, int tstype, __print_timestamp(tsname, &tss->ts[0], tskey, payload_len); } +/* TODO: convert to check_and_print payload once API is stable */ +static void print_payload(char *data, int len) +{ + int i; + + if (len > 70) + len = 70; + + fprintf(stderr, "payload: "); + for (i = 0; i < len; i++) + fprintf(stderr, "%02hhx ", data[i]); + fprintf(stderr, "\n"); +} + +static void print_pktinfo(int family, int ifindex, void *saddr, void *daddr) +{ + char sa[INET6_ADDRSTRLEN], da[INET6_ADDRSTRLEN]; + + fprintf(stderr, " pktinfo: ifindex=%u src=%s dst=%s\n", + ifindex, + saddr ? inet_ntop(family, saddr, sa, sizeof(sa)) : "unknown", + daddr ? inet_ntop(family, daddr, da, sizeof(da)) : "unknown"); +} + static void __poll(int fd) { struct pollfd pollfd; @@ -156,10 +191,9 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len) cm->cmsg_type == SCM_TIMESTAMPING) { tss = (void *) CMSG_DATA(cm); } else if ((cm->cmsg_level == SOL_IP && - cm->cmsg_type == IP_RECVERR) || - (cm->cmsg_level == SOL_IPV6 && - cm->cmsg_type == IPV6_RECVERR)) { - + cm->cmsg_type == IP_RECVERR) || + (cm->cmsg_level == SOL_IPV6 && + cm->cmsg_type == IPV6_RECVERR)) { serr = (void *) CMSG_DATA(cm); if (serr->ee_errno != ENOMSG || serr->ee_origin != SO_EE_ORIGIN_TIMESTAMPING) { @@ -168,6 +202,16 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len) serr->ee_origin); serr = NULL; } + } else if (cm->cmsg_level == SOL_IP && + cm->cmsg_type == IP_PKTINFO) { + struct in_pktinfo *info = (void *) CMSG_DATA(cm); + print_pktinfo(AF_INET, info->ipi_ifindex, + &info->ipi_spec_dst, &info->ipi_addr); + } else if (cm->cmsg_level == SOL_IPV6 && + cm->cmsg_type == IPV6_PKTINFO) { + struct in6_pktinfo *info6 = (void *) CMSG_DATA(cm); + print_pktinfo(AF_INET6, info6->ipi6_ifindex, + NULL, &info6->ipi6_addr); } else fprintf(stderr, "unknown cmsg %d,%d\n", cm->cmsg_level, cm->cmsg_type); @@ -206,7 +250,11 @@ static int recv_errmsg(int fd) if (ret == -1 && errno != EAGAIN) error(1, errno, "recvmsg"); - __recv_errmsg_cmsg(&msg, ret); + if (ret > 0) { + __recv_errmsg_cmsg(&msg, ret); + if (cfg_show_payload) + print_payload(data, cfg_payload_len); + } free(data); return ret == -1; @@ -215,9 +263,9 @@ static int recv_errmsg(int fd) static void do_test(int family, unsigned int opt) { char *buf; - int fd, i, val, total_len; + int fd, i, val = 1, total_len; - if (family == IPPROTO_IPV6 && cfg_proto != SOCK_STREAM) { + if (family == AF_INET6 && cfg_proto != SOCK_STREAM) { /* due to lack of checksum generation code */ fprintf(stderr, "test: skipping datagram over IPv6\n"); return; @@ -239,7 +287,6 @@ static void do_test(int family, unsigned int opt) error(1, errno, "socket"); if (cfg_proto == SOCK_STREAM) { - val = 1; if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char*) &val, sizeof(val))) error(1, 0, "setsockopt no nagle"); @@ -253,7 +300,20 @@ static void do_test(int family, unsigned int opt) } } + if (cfg_do_pktinfo) { + if (family == AF_INET6) { + if (setsockopt(fd, SOL_IPV6, IPV6_RECVPKTINFO, + &val, sizeof(val))) + error(1, errno, "setsockopt pktinfo ipv6"); + } else { + if (setsockopt(fd, SOL_IP, IP_PKTINFO, + &val, sizeof(val))) + error(1, errno, "setsockopt pktinfo ipv4"); + } + } + opt |= SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_CMSG | SOF_TIMESTAMPING_OPT_ID; if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, (char *) &opt, sizeof(opt))) @@ -262,8 +322,6 @@ static void do_test(int family, unsigned int opt) for (i = 0; i < cfg_num_pkts; i++) { memset(&ts_prev, 0, sizeof(ts_prev)); memset(buf, 'a' + i, total_len); - buf[total_len - 2] = '\n'; - buf[total_len - 1] = '\0'; if (cfg_proto == SOCK_RAW) { struct udphdr *udph; @@ -324,11 +382,13 @@ static void __attribute__((noreturn)) usage(const char *filepath) " -4: only IPv4\n" " -6: only IPv6\n" " -h: show this message\n" + " -I: request PKTINFO\n" " -l N: send N bytes at a time\n" " -r: use raw\n" " -R: use raw (IP_HDRINCL)\n" " -p N: connect to port N\n" - " -u: use udp\n", + " -u: use udp\n" + " -x: show payload (up to 70 bytes)\n", filepath); exit(1); } @@ -338,7 +398,7 @@ static void parse_opt(int argc, char **argv) int proto_count = 0; char c; - while ((c = getopt(argc, argv, "46hl:p:rRu")) != -1) { + while ((c = getopt(argc, argv, "46hIl:p:rRux")) != -1) { switch (c) { case '4': do_ipv6 = 0; @@ -346,6 +406,9 @@ static void parse_opt(int argc, char **argv) case '6': do_ipv4 = 0; break; + case 'I': + cfg_do_pktinfo = true; + break; case 'r': proto_count++; cfg_proto = SOCK_RAW; @@ -367,6 +430,9 @@ static void parse_opt(int argc, char **argv) case 'p': dest_port = strtoul(optarg, NULL, 10); break; + case 'x': + cfg_show_payload = true; + break; case 'h': default: usage(argv[0]); diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index a2d1a9612c86..191a6a3ae6ca 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -517,6 +517,8 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) /* Just return here, no channel found */ return; + channel->rescind = true; + /* work is initialized for vmbus_process_rescind_offer() from * vmbus_process_offer() where the channel got created */ queue_work(channel->controlwq, &channel->work); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 4c89b64aa9cf..5a80dd993761 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -159,6 +159,9 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) sizeof(*in), reg_mr_callback, mr, &mr->out); if (err) { + spin_lock_irq(&ent->lock); + ent->pending--; + spin_unlock_irq(&ent->lock); mlx5_ib_warn(dev, "create mkey failed %d\n", err); kfree(mr); break; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 0e2ef9fe0e29..1cae1c7132b4 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1011,9 +1011,14 @@ static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *recv } } else { spin_lock_irq(&send_cq->lock); + __acquire(&recv_cq->lock); } } else if (recv_cq) { spin_lock_irq(&recv_cq->lock); + __acquire(&send_cq->lock); + } else { + __acquire(&send_cq->lock); + __acquire(&recv_cq->lock); } } @@ -1033,10 +1038,15 @@ static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq, struct mlx5_ib_cq *re spin_unlock_irq(&recv_cq->lock); } } else { + __release(&recv_cq->lock); spin_unlock_irq(&send_cq->lock); } } else if (recv_cq) { + __release(&send_cq->lock); spin_unlock_irq(&recv_cq->lock); + } else { + __release(&recv_cq->lock); + __release(&send_cq->lock); } } @@ -2411,7 +2421,7 @@ static u8 get_fence(u8 fence, struct ib_send_wr *wr) static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, struct mlx5_wqe_ctrl_seg **ctrl, - struct ib_send_wr *wr, int *idx, + struct ib_send_wr *wr, unsigned *idx, int *size, int nreq) { int err = 0; @@ -2737,6 +2747,8 @@ out: if (bf->need_lock) spin_lock(&bf->lock); + else + __acquire(&bf->lock); /* TBD enable WC */ if (0 && nreq == 1 && bf->uuarn && inl && size > 1 && size <= bf->buf_size / 16) { @@ -2753,6 +2765,8 @@ out: bf->offset ^= bf->buf_size; if (bf->need_lock) spin_unlock(&bf->lock); + else + __release(&bf->lock); } spin_unlock_irqrestore(&qp->sq.lock, flags); diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index c3e260c21734..888247ad9068 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -62,7 +62,6 @@ config BCM63XX_ENET config BCMGENET tristate "Broadcom GENET internal MAC support" - depends on OF select MII select PHYLIB select FIXED_PHY if BCMGENET=y diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index f2fadb053d52..adfef5ca0d55 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -42,6 +42,7 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/phy.h> +#include <linux/platform_data/bcmgenet.h> #include <asm/unaligned.h> @@ -2586,8 +2587,9 @@ static const struct of_device_id bcmgenet_match[] = { static int bcmgenet_probe(struct platform_device *pdev) { + struct bcmgenet_platform_data *pd = pdev->dev.platform_data; struct device_node *dn = pdev->dev.of_node; - const struct of_device_id *of_id; + const struct of_device_id *of_id = NULL; struct bcmgenet_priv *priv; struct net_device *dev; const void *macaddr; @@ -2601,9 +2603,11 @@ static int bcmgenet_probe(struct platform_device *pdev) return -ENOMEM; } - of_id = of_match_node(bcmgenet_match, dn); - if (!of_id) - return -EINVAL; + if (dn) { + of_id = of_match_node(bcmgenet_match, dn); + if (!of_id) + return -EINVAL; + } priv = netdev_priv(dev); priv->irq0 = platform_get_irq(pdev, 0); @@ -2615,11 +2619,15 @@ static int bcmgenet_probe(struct platform_device *pdev) goto err; } - macaddr = of_get_mac_address(dn); - if (!macaddr) { - dev_err(&pdev->dev, "can't find MAC address\n"); - err = -EINVAL; - goto err; + if (dn) { + macaddr = of_get_mac_address(dn); + if (!macaddr) { + dev_err(&pdev->dev, "can't find MAC address\n"); + err = -EINVAL; + goto err; + } + } else { + macaddr = pd->mac_address; } r = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -2659,7 +2667,10 @@ static int bcmgenet_probe(struct platform_device *pdev) priv->dev = dev; priv->pdev = pdev; - priv->version = (enum bcmgenet_version)of_id->data; + if (of_id) + priv->version = (enum bcmgenet_version)of_id->data; + else + priv->version = pd->genet_version; priv->clk = devm_clk_get(&priv->pdev->dev, "enet"); if (IS_ERR(priv->clk)) diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 933cd7e7cd33..446889cc3c6a 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -23,6 +23,7 @@ #include <linux/of.h> #include <linux/of_net.h> #include <linux/of_mdio.h> +#include <linux/platform_data/bcmgenet.h> #include "bcmgenet.h" @@ -312,22 +313,6 @@ static int bcmgenet_mii_probe(struct net_device *dev) u32 phy_flags; int ret; - if (priv->phydev) { - pr_info("PHY already attached\n"); - return 0; - } - - /* In the case of a fixed PHY, the DT node associated - * to the PHY is the Ethernet MAC DT node. - */ - if (!priv->phy_dn && of_phy_is_fixed_link(dn)) { - ret = of_phy_register_fixed_link(dn); - if (ret) - return ret; - - priv->phy_dn = of_node_get(dn); - } - /* Communicate the integrated PHY revision */ phy_flags = priv->gphy_rev; @@ -337,11 +322,39 @@ static int bcmgenet_mii_probe(struct net_device *dev) priv->old_duplex = -1; priv->old_pause = -1; - phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup, - phy_flags, priv->phy_interface); - if (!phydev) { - pr_err("could not attach to PHY\n"); - return -ENODEV; + if (dn) { + if (priv->phydev) { + pr_info("PHY already attached\n"); + return 0; + } + + /* In the case of a fixed PHY, the DT node associated + * to the PHY is the Ethernet MAC DT node. + */ + if (!priv->phy_dn && of_phy_is_fixed_link(dn)) { + ret = of_phy_register_fixed_link(dn); + if (ret) + return ret; + + priv->phy_dn = of_node_get(dn); + } + + phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup, + phy_flags, priv->phy_interface); + if (!phydev) { + pr_err("could not attach to PHY\n"); + return -ENODEV; + } + } else { + phydev = priv->phydev; + phydev->dev_flags = phy_flags; + + ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup, + priv->phy_interface); + if (ret) { + pr_err("could not attach to PHY\n"); + return -ENODEV; + } } priv->phydev = phydev; @@ -438,6 +451,75 @@ static int bcmgenet_mii_of_init(struct bcmgenet_priv *priv) return 0; } +static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv) +{ + struct device *kdev = &priv->pdev->dev; + struct bcmgenet_platform_data *pd = kdev->platform_data; + struct mii_bus *mdio = priv->mii_bus; + struct phy_device *phydev; + int ret; + + if (pd->phy_interface != PHY_INTERFACE_MODE_MOCA && pd->mdio_enabled) { + /* + * Internal or external PHY with MDIO access + */ + if (pd->phy_address >= 0 && pd->phy_address < PHY_MAX_ADDR) + mdio->phy_mask = ~(1 << pd->phy_address); + else + mdio->phy_mask = 0; + + ret = mdiobus_register(mdio); + if (ret) { + dev_err(kdev, "failed to register MDIO bus\n"); + return ret; + } + + if (pd->phy_address >= 0 && pd->phy_address < PHY_MAX_ADDR) + phydev = mdio->phy_map[pd->phy_address]; + else + phydev = phy_find_first(mdio); + + if (!phydev) { + dev_err(kdev, "failed to register PHY device\n"); + mdiobus_unregister(mdio); + return -ENODEV; + } + } else { + /* + * MoCA port or no MDIO access. + * Use fixed PHY to represent the link layer. + */ + struct fixed_phy_status fphy_status = { + .link = 1, + .speed = pd->phy_speed, + .duplex = pd->phy_duplex, + .pause = 0, + .asym_pause = 0, + }; + + phydev = fixed_phy_register(PHY_POLL, &fphy_status, NULL); + if (!phydev || IS_ERR(phydev)) { + dev_err(kdev, "failed to register fixed PHY device\n"); + return -ENODEV; + } + } + + priv->phydev = phydev; + priv->phy_interface = pd->phy_interface; + + return 0; +} + +static int bcmgenet_mii_bus_init(struct bcmgenet_priv *priv) +{ + struct device_node *dn = priv->pdev->dev.of_node; + + if (dn) + return bcmgenet_mii_of_init(priv); + else + return bcmgenet_mii_pd_init(priv); +} + int bcmgenet_mii_init(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); @@ -447,7 +529,7 @@ int bcmgenet_mii_init(struct net_device *dev) if (ret) return ret; - ret = bcmgenet_mii_of_init(priv); + ret = bcmgenet_mii_bus_init(priv); if (ret) goto out_free; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index e0ab7673afe7..9461ad8d837b 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1017,9 +1017,8 @@ static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter, * to pad short packets (<= 32 bytes) to a 36-byte length. */ if (unlikely(!BEx_chip(adapter) && skb->len <= 32)) { - if (skb_padto(skb, 36)) + if (skb_put_padto(skb, 36)) return NULL; - skb->len = 36; } if (BEx_chip(adapter) || lancer_chip(adapter)) { diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 24f3986cfae2..862d1989ae1c 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -3136,12 +3136,8 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, * packets may get corrupted during padding by HW. * To WA this issue, pad all small packets manually. */ - if (skb->len < ETH_ZLEN) { - if (skb_pad(skb, ETH_ZLEN - skb->len)) - return NETDEV_TX_OK; - skb->len = ETH_ZLEN; - skb_set_tail_pointer(skb, ETH_ZLEN); - } + if (eth_skb_pad(skb)) + return NETDEV_TX_OK; mss = skb_shinfo(skb)->gso_size; /* The controller does a simple calculation to diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 370cfa275ddb..88936aa0029d 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5554,12 +5554,8 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, /* The minimum packet size with TCTL.PSP set is 17 bytes so * pad skb in order to meet this minimum size requirement */ - if (unlikely(skb->len < 17)) { - if (skb_pad(skb, 17 - skb->len)) - return NETDEV_TX_OK; - skb->len = 17; - skb_set_tail_pointer(skb, 17); - } + if (skb_put_padto(skb, 17)) + return NETDEV_TX_OK; mss = skb_shinfo(skb)->gso_size; if (mss) { diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 73457ede53ec..91516aed373e 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -578,14 +578,9 @@ static bool fm10k_cleanup_headers(struct fm10k_ring *rx_ring, if (skb_is_nonlinear(skb)) fm10k_pull_tail(rx_ring, rx_desc, skb); - /* if skb_pad returns an error the skb was freed */ - if (unlikely(skb->len < 60)) { - int pad_len = 60 - skb->len; - - if (skb_pad(skb, pad_len)) - return true; - __skb_put(skb, pad_len); - } + /* if eth_skb_pad returns an error the skb was freed */ + if (eth_skb_pad(skb)) + return true; return false; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 3195d82e4942..04b441460bbd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2399,12 +2399,8 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev) /* hardware can't handle really short frames, hardware padding works * beyond this point */ - if (unlikely(skb->len < I40E_MIN_TX_LEN)) { - if (skb_pad(skb, I40E_MIN_TX_LEN - skb->len)) - return NETDEV_TX_OK; - skb->len = I40E_MIN_TX_LEN; - skb_set_tail_pointer(skb, I40E_MIN_TX_LEN); - } + if (skb_put_padto(skb, I40E_MIN_TX_LEN)) + return NETDEV_TX_OK; return i40e_xmit_frame_ring(skb, tx_ring); } diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 3c0221620c9d..f04ad13f7159 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -5091,12 +5091,8 @@ static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, /* The minimum packet size with TCTL.PSP set is 17 so pad the skb * in order to meet this minimum size requirement. */ - if (unlikely(skb->len < 17)) { - if (skb_pad(skb, 17 - skb->len)) - return NETDEV_TX_OK; - skb->len = 17; - skb_set_tail_pointer(skb, 17); - } + if (skb_put_padto(skb, 17)) + return NETDEV_TX_OK; return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb)); } @@ -6850,14 +6846,9 @@ static bool igb_cleanup_headers(struct igb_ring *rx_ring, if (skb_is_nonlinear(skb)) igb_pull_tail(rx_ring, rx_desc, skb); - /* if skb_pad returns an error the skb was freed */ - if (unlikely(skb->len < 60)) { - int pad_len = 60 - skb->len; - - if (skb_pad(skb, pad_len)) - return true; - __skb_put(skb, pad_len); - } + /* if eth_skb_pad returns an error the skb was freed */ + if (eth_skb_pad(skb)) + return true; return false; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 82d418729dd4..fbd52924ee34 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1774,14 +1774,9 @@ static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring, return false; #endif - /* if skb_pad returns an error the skb was freed */ - if (unlikely(skb->len < 60)) { - int pad_len = 60 - skb->len; - - if (skb_pad(skb, pad_len)) - return true; - __skb_put(skb, pad_len); - } + /* if eth_skb_pad returns an error the skb was freed */ + if (eth_skb_pad(skb)) + return true; return false; } @@ -7334,12 +7329,8 @@ static netdev_tx_t __ixgbe_xmit_frame(struct sk_buff *skb, * The minimum packet size for olinfo paylen is 17 so pad the skb * in order to meet this minimum size requirement. */ - if (unlikely(skb->len < 17)) { - if (skb_pad(skb, 17 - skb->len)) - return NETDEV_TX_OK; - skb->len = 17; - skb_set_tail_pointer(skb, 17); - } + if (skb_put_padto(skb, 17)) + return NETDEV_TX_OK; tx_ring = ring ? ring : adapter->tx_ring[skb->queue_mapping]; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 3b0ddf757fb6..62a0d8e0f17d 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -616,14 +616,9 @@ static bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring, if (skb_is_nonlinear(skb)) ixgbevf_pull_tail(rx_ring, skb); - /* if skb_pad returns an error the skb was freed */ - if (unlikely(skb->len < 60)) { - int pad_len = 60 - skb->len; - - if (skb_pad(skb, pad_len)) - return true; - __skb_put(skb, pad_len); - } + /* if eth_skb_pad returns an error the skb was freed */ + if (eth_skb_pad(skb)) + return true; return false; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 368c6c5ea014..a2853057c779 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1363,7 +1363,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) goto err_map; } - if (cmd->log_sz + cmd->log_stride > PAGE_SHIFT) { + if (cmd->log_sz + cmd->log_stride > MLX5_ADAPTER_PAGE_SHIFT) { dev_err(&dev->pdev->dev, "command queue size overflow\n"); err = -EINVAL; goto err_map; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index dfd3ad0a39c1..ab684463780b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -225,8 +225,8 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; - mlx5_core_dbg(dev, "event %s(%d) arrived\n", - eqe_type_str(eqe->type), eqe->type); + mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n", + eqe_type_str(eqe->type), eqe->type, rsn); mlx5_rsc_event(dev, rsn, eqe->type); break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 71b10b210792..3f4525619a07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -43,6 +43,7 @@ #include <linux/mlx5/qp.h> #include <linux/mlx5/srq.h> #include <linux/debugfs.h> +#include <linux/kmod.h> #include <linux/mlx5/mlx5_ifc.h> #include "mlx5_core.h" @@ -225,7 +226,7 @@ static int mlx5_enable_msix(struct mlx5_core_dev *dev) table->msix_arr[i].entry = i; nvec = pci_enable_msix_range(dev->pdev, table->msix_arr, - MLX5_EQ_VEC_COMP_BASE, nvec); + MLX5_EQ_VEC_COMP_BASE + 1, nvec); if (nvec < 0) return nvec; @@ -840,6 +841,8 @@ struct mlx5_core_event_handler { void *data); }; +#define MLX5_IB_MOD "mlx5_ib" + static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) { @@ -878,6 +881,10 @@ static int init_one(struct pci_dev *pdev, goto out_init; } + err = request_module_nowait(MLX5_IB_MOD); + if (err) + pr_info("failed request module on %s\n", MLX5_IB_MOD); + return 0; out_init: @@ -896,8 +903,12 @@ static void remove_one(struct pci_dev *pdev) } static const struct pci_device_id mlx5_core_pci_table[] = { - { PCI_VDEVICE(MELLANOX, 4113) }, /* MT4113 Connect-IB */ + { PCI_VDEVICE(MELLANOX, 4113) }, /* Connect-IB */ + { PCI_VDEVICE(MELLANOX, 4114) }, /* Connect-IB VF */ { PCI_VDEVICE(MELLANOX, 4115) }, /* ConnectX-4 */ + { PCI_VDEVICE(MELLANOX, 4116) }, /* ConnectX-4 VF */ + { PCI_VDEVICE(MELLANOX, 4117) }, /* ConnectX-4LX */ + { PCI_VDEVICE(MELLANOX, 4118) }, /* ConnectX-4LX VF */ { 0, } }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c index 0a6348cefc01..06801d6f595e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -96,6 +96,7 @@ int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn) int err; memset(&in, 0, sizeof(in)); + memset(&out, 0, sizeof(out)); in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_UAR); in.uarn = cpu_to_be32(uarn); err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 9e7e3f1dce3e..af099057f0e9 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -2913,16 +2913,11 @@ again: flags |= MXGEFW_FLAGS_SMALL; /* pad frames to at least ETH_ZLEN bytes */ - if (unlikely(skb->len < ETH_ZLEN)) { - if (skb_padto(skb, ETH_ZLEN)) { - /* The packet is gone, so we must - * return 0 */ - ss->stats.tx_dropped += 1; - return NETDEV_TX_OK; - } - /* adjust the len to account for the zero pad - * so that the nic can know how long it is */ - skb->len = ETH_ZLEN; + if (eth_skb_pad(skb)) { + /* The packet is gone, so we must + * return 0 */ + ss->stats.tx_dropped += 1; + return NETDEV_TX_OK; } } diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index cf154f74cba1..b9c2f33b463d 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -1377,6 +1377,16 @@ DECLARE_RTL_COND(rtl_ocp_tx_cond) return RTL_R8(IBISR0) & 0x02; } +static void rtl8168ep_stop_cmac(struct rtl8169_private *tp) +{ + void __iomem *ioaddr = tp->mmio_addr; + + RTL_W8(IBCR2, RTL_R8(IBCR2) & ~0x01); + rtl_msleep_loop_wait_low(tp, &rtl_ocp_tx_cond, 50, 2000); + RTL_W8(IBISR0, RTL_R8(IBISR0) | 0x20); + RTL_W8(IBCR0, RTL_R8(IBCR0) & ~0x01); +} + static void rtl8168dp_driver_start(struct rtl8169_private *tp) { rtl8168_oob_notify(tp, OOB_CMD_DRIVER_START); @@ -1417,12 +1427,7 @@ static void rtl8168dp_driver_stop(struct rtl8169_private *tp) static void rtl8168ep_driver_stop(struct rtl8169_private *tp) { - void __iomem *ioaddr = tp->mmio_addr; - - RTL_W8(IBCR2, RTL_R8(IBCR2) & ~0x01); - rtl_msleep_loop_wait_low(tp, &rtl_ocp_tx_cond, 50, 2000); - RTL_W8(IBISR0, RTL_R8(IBISR0) | 0x20); - RTL_W8(IBCR0, RTL_R8(IBCR0) & ~0x01); + rtl8168ep_stop_cmac(tp); ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_STOP); ocp_write(tp, 0x01, 0x30, ocp_read(tp, 0x01, 0x30) | 0x01); rtl_msleep_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10, 10); @@ -5934,7 +5939,6 @@ static void rtl_hw_start_8168g_1(struct rtl8169_private *tp) rtl_w0w1_eri(tp, 0xdc, ERIAR_MASK_0001, 0x01, 0x00, ERIAR_EXGMAC); rtl_eri_write(tp, 0x2f8, ERIAR_MASK_0011, 0x1d8f, ERIAR_EXGMAC); - RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb); RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN); RTL_W8(MaxTxPacketSize, EarlySize); @@ -6027,7 +6031,6 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp) rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87, ERIAR_EXGMAC); - RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb); RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN); RTL_W8(MaxTxPacketSize, EarlySize); @@ -6091,6 +6094,8 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp) void __iomem *ioaddr = tp->mmio_addr; struct pci_dev *pdev = tp->pci_dev; + rtl8168ep_stop_cmac(tp); + RTL_W32(TxConfig, RTL_R32(TxConfig) | TXCFG_AUTO_FIFO); rtl_eri_write(tp, 0xc8, ERIAR_MASK_0101, 0x00080002, ERIAR_EXGMAC); @@ -6109,7 +6114,6 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp) rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87, ERIAR_EXGMAC); - RTL_W8(ChipCmd, CmdTxEnb | CmdRxEnb); RTL_W32(MISC, RTL_R32(MISC) & ~RXDV_GATED_EN); RTL_W8(MaxTxPacketSize, EarlySize); @@ -6832,14 +6836,6 @@ err_out: return -EIO; } -static bool rtl_skb_pad(struct sk_buff *skb) -{ - if (skb_padto(skb, ETH_ZLEN)) - return false; - skb_put(skb, ETH_ZLEN - skb->len); - return true; -} - static bool rtl_test_hw_pad_bug(struct rtl8169_private *tp, struct sk_buff *skb) { return skb->len < ETH_ZLEN && tp->mac_version == RTL_GIGA_MAC_VER_34; @@ -6980,7 +6976,7 @@ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp, u8 ip_protocol; if (unlikely(rtl_test_hw_pad_bug(tp, skb))) - return skb_checksum_help(skb) == 0 && rtl_skb_pad(skb); + return !(skb_checksum_help(skb) || eth_skb_pad(skb)); if (transport_offset > TCPHO_MAX) { netif_warn(tp, tx_err, tp->dev, @@ -7015,7 +7011,7 @@ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp, opts[1] |= transport_offset << TCPHO_SHIFT; } else { if (unlikely(rtl_test_hw_pad_bug(tp, skb))) - return rtl_skb_pad(skb); + return !eth_skb_pad(skb); } return true; @@ -8005,6 +8001,12 @@ static void rtl_hw_init_8168g(struct rtl8169_private *tp) return; } +static void rtl_hw_init_8168ep(struct rtl8169_private *tp) +{ + rtl8168ep_stop_cmac(tp); + rtl_hw_init_8168g(tp); +} + static void rtl_hw_initialize(struct rtl8169_private *tp) { switch (tp->mac_version) { @@ -8017,12 +8019,13 @@ static void rtl_hw_initialize(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_46: case RTL_GIGA_MAC_VER_47: case RTL_GIGA_MAC_VER_48: + rtl_hw_init_8168g(tp); + break; case RTL_GIGA_MAC_VER_49: case RTL_GIGA_MAC_VER_50: case RTL_GIGA_MAC_VER_51: - rtl_hw_init_8168g(tp); + rtl_hw_init_8168ep(tp); break; - default: break; } diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 904fd1ab5f6e..4aaa3240453a 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -6651,13 +6651,8 @@ static netdev_tx_t niu_start_xmit(struct sk_buff *skb, return NETDEV_TX_BUSY; } - if (skb->len < ETH_ZLEN) { - unsigned int pad_bytes = ETH_ZLEN - skb->len; - - if (skb_pad(skb, pad_bytes)) - goto out; - skb_put(skb, pad_bytes); - } + if (eth_skb_pad(skb)) + goto out; len = sizeof(struct tx_pkt_hdr) + 15; if (skb_headroom(skb) < len) { diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 6fc834e4306d..dd867e6cabd6 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -764,6 +764,9 @@ int netvsc_send(struct hv_device *device, out_channel = device->channel; packet->channel = out_channel; + if (out_channel->rescind) + return -ENODEV; + if (packet->page_buf_cnt) { ret = vmbus_sendpacket_pagebuffer(out_channel, packet->page_buf, diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 7b2c5d1e9bad..ec0c40a8f653 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -958,6 +958,9 @@ static int rndis_filter_close_device(struct rndis_device *dev) return 0; ret = rndis_filter_set_packet_filter(dev, 0); + if (ret == -ENODEV) + ret = 0; + if (ret == 0) dev->state = RNDIS_DEV_INITIALIZED; diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 733980fce8e3..41c891d05f04 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -392,4 +392,16 @@ static inline unsigned long compare_ether_header(const void *a, const void *b) #endif } +/** + * eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame + * @skb: Buffer to pad + * + * An Ethernet frame should have a minimum size of 60 bytes. This function + * takes short frames and pads them with zeros up to the 60 byte limit. + */ +static inline int eth_skb_pad(struct sk_buff *skb) +{ + return skb_put_padto(skb, ETH_ZLEN); +} + #endif /* _LINUX_ETHERDEVICE_H */ diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 08cfaff8a072..476c685ca6f9 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -650,6 +650,8 @@ struct vmbus_channel { u8 monitor_grp; u8 monitor_bit; + bool rescind; /* got rescind msg */ + u32 ringbuffer_gpadlhandle; /* Allocated memory for ring buffer */ diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 1d67fd32e71c..ea4f1c46f761 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -219,23 +219,15 @@ enum { }; enum { - MLX5_DEV_CAP_FLAG_RC = 1LL << 0, - MLX5_DEV_CAP_FLAG_UC = 1LL << 1, - MLX5_DEV_CAP_FLAG_UD = 1LL << 2, MLX5_DEV_CAP_FLAG_XRC = 1LL << 3, - MLX5_DEV_CAP_FLAG_SRQ = 1LL << 6, MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR = 1LL << 8, MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR = 1LL << 9, MLX5_DEV_CAP_FLAG_APM = 1LL << 17, MLX5_DEV_CAP_FLAG_ATOMIC = 1LL << 18, MLX5_DEV_CAP_FLAG_BLOCK_MCAST = 1LL << 23, - MLX5_DEV_CAP_FLAG_ON_DMND_PG = 1LL << 24, MLX5_DEV_CAP_FLAG_CQ_MODER = 1LL << 29, MLX5_DEV_CAP_FLAG_RESIZE_CQ = 1LL << 30, - MLX5_DEV_CAP_FLAG_RESIZE_SRQ = 1LL << 32, MLX5_DEV_CAP_FLAG_DCT = 1LL << 37, - MLX5_DEV_CAP_FLAG_REMOTE_FENCE = 1LL << 38, - MLX5_DEV_CAP_FLAG_TLP_HINTS = 1LL << 39, MLX5_DEV_CAP_FLAG_SIG_HAND_OVER = 1LL << 40, MLX5_DEV_CAP_FLAG_CMDIF_CSUM = 3LL << 46, }; diff --git a/include/linux/platform_data/bcmgenet.h b/include/linux/platform_data/bcmgenet.h new file mode 100644 index 000000000000..26af54321958 --- /dev/null +++ b/include/linux/platform_data/bcmgenet.h @@ -0,0 +1,18 @@ +#ifndef __LINUX_PLATFORM_DATA_BCMGENET_H__ +#define __LINUX_PLATFORM_DATA_BCMGENET_H__ + +#include <linux/types.h> +#include <linux/if_ether.h> +#include <linux/phy.h> + +struct bcmgenet_platform_data { + bool mdio_enabled; + phy_interface_t phy_interface; + int phy_address; + int phy_speed; + int phy_duplex; + u8 mac_address[ETH_ALEN]; + int genet_version; +}; + +#endif diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 372ad5e0dcb8..aa79b3c24f66 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -542,6 +542,15 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n, pos = hlist_entry_safe(rcu_dereference_bh((pos)->member.next),\ typeof(*(pos)), member)) +/** + * hlist_for_each_entry_from_rcu - iterate over a hlist continuing from current point + * @pos: the type * to use as a loop cursor. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_for_each_entry_from_rcu(pos, member) \ + for (; pos; \ + pos = hlist_entry_safe(rcu_dereference((pos)->member.next),\ + typeof(*(pos)), member)) #endif /* __KERNEL__ */ #endif diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7691ad5b4771..d1e2575000b9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2461,7 +2461,6 @@ static inline int skb_cow_head(struct sk_buff *skb, unsigned int headroom) * is untouched. Otherwise it is extended. Returns zero on * success. The skb is freed on error. */ - static inline int skb_padto(struct sk_buff *skb, unsigned int len) { unsigned int size = skb->len; @@ -2470,6 +2469,29 @@ static inline int skb_padto(struct sk_buff *skb, unsigned int len) return skb_pad(skb, len - size); } +/** + * skb_put_padto - increase size and pad an skbuff up to a minimal size + * @skb: buffer to pad + * @len: minimal length + * + * Pads up a buffer to ensure the trailing bytes exist and are + * blanked. If the buffer already contains sufficient data it + * is untouched. Otherwise it is extended. Returns zero on + * success. The skb is freed on error. + */ +static inline int skb_put_padto(struct sk_buff *skb, unsigned int len) +{ + unsigned int size = skb->len; + + if (unlikely(size < len)) { + len -= size; + if (skb_pad(skb, len)) + return -ENOMEM; + __skb_put(skb, len); + } + return 0; +} + static inline int skb_add_data(struct sk_buff *skb, char __user *from, int copy) { diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index ff354021bb69..edbc888ceb51 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -23,8 +23,9 @@ enum { SOF_TIMESTAMPING_OPT_ID = (1<<7), SOF_TIMESTAMPING_TX_SCHED = (1<<8), SOF_TIMESTAMPING_TX_ACK = (1<<9), + SOF_TIMESTAMPING_OPT_CMSG = (1<<10), - SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_TX_ACK, + SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_CMSG, SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | SOF_TIMESTAMPING_LAST }; diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 3f167d2eeb94..80d78c51f65f 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -124,7 +124,7 @@ static struct bpf_test tests[] = { { { 0, 0xfffffffd } } }, { - "DIV_KX", + "DIV_MOD_KX", .u.insns = { BPF_STMT(BPF_LD | BPF_IMM, 8), BPF_STMT(BPF_ALU | BPF_DIV | BPF_K, 2), @@ -134,12 +134,18 @@ static struct bpf_test tests[] = { BPF_STMT(BPF_MISC | BPF_TAX, 0), BPF_STMT(BPF_LD | BPF_IMM, 0xffffffff), BPF_STMT(BPF_ALU | BPF_DIV | BPF_K, 0x70000000), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_IMM, 0xffffffff), + BPF_STMT(BPF_ALU | BPF_MOD | BPF_X, 0), + BPF_STMT(BPF_MISC | BPF_TAX, 0), + BPF_STMT(BPF_LD | BPF_IMM, 0xffffffff), + BPF_STMT(BPF_ALU | BPF_MOD | BPF_K, 0x70000000), BPF_STMT(BPF_ALU | BPF_ADD | BPF_X, 0), BPF_STMT(BPF_RET | BPF_A, 0) }, CLASSIC | FLAG_NO_DATA, { }, - { { 0, 0x40000001 } } + { { 0, 0x20000000 } } }, { "AND_OR_LSH_K", diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index b7826575d215..640f26c6a9fe 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -399,6 +399,22 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf kfree_skb(skb); } +static bool ipv4_pktinfo_prepare_errqueue(const struct sock *sk, + const struct sk_buff *skb, + int ee_origin) +{ + struct in_pktinfo *info = PKTINFO_SKB_CB(skb); + + if ((ee_origin != SO_EE_ORIGIN_TIMESTAMPING) || + (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) || + (!skb->dev)) + return false; + + info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr; + info->ipi_ifindex = skb->dev->ifindex; + return true; +} + /* * Handle MSG_ERRQUEUE */ @@ -414,6 +430,8 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) int err; int copied; + WARN_ON_ONCE(sk->sk_family == AF_INET6); + err = -EAGAIN; skb = sock_dequeue_err_skb(sk); if (skb == NULL) @@ -444,7 +462,9 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); sin = &errhdr.offender; sin->sin_family = AF_UNSPEC; - if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP) { + + if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || + ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin)) { struct inet_sock *inet = inet_sk(sk); sin->sin_family = AF_INET; @@ -1049,7 +1069,7 @@ e_inval: } /** - * ipv4_pktinfo_prepare - transfert some info from rtable to skb + * ipv4_pktinfo_prepare - transfer some info from rtable to skb * @sk: socket * @skb: buffer * diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b2d606833ce4..dd8e00634563 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -336,38 +336,45 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum) return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr); } -static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, - unsigned short hnum, - __be16 sport, __be32 daddr, __be16 dport, int dif) +static inline int compute_score(struct sock *sk, struct net *net, + __be32 saddr, unsigned short hnum, __be16 sport, + __be32 daddr, __be16 dport, int dif) { - int score = -1; + int score; + struct inet_sock *inet; - if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && - !ipv6_only_sock(sk)) { - struct inet_sock *inet = inet_sk(sk); + if (!net_eq(sock_net(sk), net) || + udp_sk(sk)->udp_port_hash != hnum || + ipv6_only_sock(sk)) + return -1; - score = (sk->sk_family == PF_INET ? 2 : 1); - if (inet->inet_rcv_saddr) { - if (inet->inet_rcv_saddr != daddr) - return -1; - score += 4; - } - if (inet->inet_daddr) { - if (inet->inet_daddr != saddr) - return -1; - score += 4; - } - if (inet->inet_dport) { - if (inet->inet_dport != sport) - return -1; - score += 4; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - return -1; - score += 4; - } + score = (sk->sk_family == PF_INET) ? 2 : 1; + inet = inet_sk(sk); + + if (inet->inet_rcv_saddr) { + if (inet->inet_rcv_saddr != daddr) + return -1; + score += 4; + } + + if (inet->inet_daddr) { + if (inet->inet_daddr != saddr) + return -1; + score += 4; } + + if (inet->inet_dport) { + if (inet->inet_dport != sport) + return -1; + score += 4; + } + + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score += 4; + } + return score; } @@ -378,33 +385,39 @@ static inline int compute_score2(struct sock *sk, struct net *net, __be32 saddr, __be16 sport, __be32 daddr, unsigned int hnum, int dif) { - int score = -1; + int score; + struct inet_sock *inet; + + if (!net_eq(sock_net(sk), net) || + ipv6_only_sock(sk)) + return -1; - if (net_eq(sock_net(sk), net) && !ipv6_only_sock(sk)) { - struct inet_sock *inet = inet_sk(sk); + inet = inet_sk(sk); - if (inet->inet_rcv_saddr != daddr) + if (inet->inet_rcv_saddr != daddr || + inet->inet_num != hnum) + return -1; + + score = (sk->sk_family == PF_INET) ? 2 : 1; + + if (inet->inet_daddr) { + if (inet->inet_daddr != saddr) return -1; - if (inet->inet_num != hnum) + score += 4; + } + + if (inet->inet_dport) { + if (inet->inet_dport != sport) return -1; + score += 4; + } - score = (sk->sk_family == PF_INET ? 2 : 1); - if (inet->inet_daddr) { - if (inet->inet_daddr != saddr) - return -1; - score += 4; - } - if (inet->inet_dport) { - if (inet->inet_dport != sport) - return -1; - score += 4; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - return -1; - score += 4; - } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score += 4; } + return score; } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index cc1139687fd7..2464a00e36ab 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -325,6 +325,16 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) kfree_skb(skb); } +static void ip6_datagram_prepare_pktinfo_errqueue(struct sk_buff *skb) +{ + int ifindex = skb->dev ? skb->dev->ifindex : -1; + + if (skb->protocol == htons(ETH_P_IPV6)) + IP6CB(skb)->iif = ifindex; + else + PKTINFO_SKB_CB(skb)->ipi_ifindex = ifindex; +} + /* * Handle MSG_ERRQUEUE */ @@ -388,8 +398,12 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; sin->sin6_port = 0; - if (np->rxopt.all) + if (np->rxopt.all) { + if (serr->ee.ee_origin != SO_EE_ORIGIN_ICMP && + serr->ee.ee_origin != SO_EE_ORIGIN_ICMP6) + ip6_datagram_prepare_pktinfo_errqueue(skb); ip6_datagram_recv_common_ctl(sk, msg, skb); + } if (skb->protocol == htons(ETH_P_IPV6)) { sin->sin6_addr = ipv6_hdr(skb)->saddr; if (np->rxopt.all) @@ -491,7 +505,10 @@ void ip6_datagram_recv_common_ctl(struct sock *sk, struct msghdr *msg, ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, &src_info.ipi6_addr); } - put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); + + if (src_info.ipi6_ifindex >= 0) + put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, + sizeof(src_info), &src_info); } } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 7cfb5d745a2d..7f96432292ce 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -148,72 +148,85 @@ static inline int compute_score(struct sock *sk, struct net *net, const struct in6_addr *daddr, __be16 dport, int dif) { - int score = -1; + int score; + struct inet_sock *inet; - if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && - sk->sk_family == PF_INET6) { - struct inet_sock *inet = inet_sk(sk); + if (!net_eq(sock_net(sk), net) || + udp_sk(sk)->udp_port_hash != hnum || + sk->sk_family != PF_INET6) + return -1; - score = 0; - if (inet->inet_dport) { - if (inet->inet_dport != sport) - return -1; - score++; - } - if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { - if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) - return -1; - score++; - } - if (!ipv6_addr_any(&sk->sk_v6_daddr)) { - if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) - return -1; - score++; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - return -1; - score++; - } + score = 0; + inet = inet_sk(sk); + + if (inet->inet_dport) { + if (inet->inet_dport != sport) + return -1; + score++; } + + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) + return -1; + score++; + } + + if (!ipv6_addr_any(&sk->sk_v6_daddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) + return -1; + score++; + } + + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score++; + } + return score; } #define SCORE2_MAX (1 + 1 + 1) static inline int compute_score2(struct sock *sk, struct net *net, - const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, unsigned short hnum, - int dif) + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, + unsigned short hnum, int dif) { - int score = -1; + int score; + struct inet_sock *inet; - if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum && - sk->sk_family == PF_INET6) { - struct inet_sock *inet = inet_sk(sk); + if (!net_eq(sock_net(sk), net) || + udp_sk(sk)->udp_port_hash != hnum || + sk->sk_family != PF_INET6) + return -1; - if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) + if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) + return -1; + + score = 0; + inet = inet_sk(sk); + + if (inet->inet_dport) { + if (inet->inet_dport != sport) return -1; - score = 0; - if (inet->inet_dport) { - if (inet->inet_dport != sport) - return -1; - score++; - } - if (!ipv6_addr_any(&sk->sk_v6_daddr)) { - if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) - return -1; - score++; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - return -1; - score++; - } + score++; } + + if (!ipv6_addr_any(&sk->sk_v6_daddr)) { + if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) + return -1; + score++; + } + + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score++; + } + return score; } - /* called with read_rcu_lock() */ static struct sock *udp6_lib_lookup2(struct net *net, const struct in6_addr *saddr, __be16 sport, diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 56248db75274..ba6083dca95b 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -38,39 +38,6 @@ #include "link.h" #include "name_distr.h" -/** - * struct publ_list - list of publications made by this node - * @list: circular list of publications - * @list_size: number of entries in list - */ -struct publ_list { - struct list_head list; - u32 size; -}; - -static struct publ_list publ_zone = { - .list = LIST_HEAD_INIT(publ_zone.list), - .size = 0, -}; - -static struct publ_list publ_cluster = { - .list = LIST_HEAD_INIT(publ_cluster.list), - .size = 0, -}; - -static struct publ_list publ_node = { - .list = LIST_HEAD_INIT(publ_node.list), - .size = 0, -}; - -static struct publ_list *publ_lists[] = { - NULL, - &publ_zone, /* publ_lists[TIPC_ZONE_SCOPE] */ - &publ_cluster, /* publ_lists[TIPC_CLUSTER_SCOPE] */ - &publ_node /* publ_lists[TIPC_NODE_SCOPE] */ -}; - - int sysctl_tipc_named_timeout __read_mostly = 2000; /** @@ -146,8 +113,8 @@ struct sk_buff *tipc_named_publish(struct publication *publ) struct sk_buff *buf; struct distr_item *item; - list_add_tail(&publ->local_list, &publ_lists[publ->scope]->list); - publ_lists[publ->scope]->size++; + list_add_tail_rcu(&publ->local_list, + &tipc_nametbl->publ_list[publ->scope]); if (publ->scope == TIPC_NODE_SCOPE) return NULL; @@ -172,7 +139,6 @@ struct sk_buff *tipc_named_withdraw(struct publication *publ) struct distr_item *item; list_del(&publ->local_list); - publ_lists[publ->scope]->size--; if (publ->scope == TIPC_NODE_SCOPE) return NULL; @@ -195,21 +161,17 @@ struct sk_buff *tipc_named_withdraw(struct publication *publ) * @pls: linked list of publication items to be packed into buffer chain */ static void named_distribute(struct sk_buff_head *list, u32 dnode, - struct publ_list *pls) + struct list_head *pls) { struct publication *publ; struct sk_buff *skb = NULL; struct distr_item *item = NULL; - uint dsz = pls->size * ITEM_SIZE; uint msg_dsz = (tipc_node_get_mtu(dnode, 0) / ITEM_SIZE) * ITEM_SIZE; - uint rem = dsz; - uint msg_rem = 0; + uint msg_rem = msg_dsz; - list_for_each_entry(publ, &pls->list, local_list) { + list_for_each_entry(publ, pls, local_list) { /* Prepare next buffer: */ if (!skb) { - msg_rem = min_t(uint, rem, msg_dsz); - rem -= msg_rem; skb = named_prepare_buf(PUBLICATION, msg_rem, dnode); if (!skb) { pr_warn("Bulk publication failure\n"); @@ -227,8 +189,14 @@ static void named_distribute(struct sk_buff_head *list, u32 dnode, if (!msg_rem) { __skb_queue_tail(list, skb); skb = NULL; + msg_rem = msg_dsz; } } + if (skb) { + msg_set_size(buf_msg(skb), INT_H_SIZE + (msg_dsz - msg_rem)); + skb_trim(skb, INT_H_SIZE + (msg_dsz - msg_rem)); + __skb_queue_tail(list, skb); + } } /** @@ -240,10 +208,12 @@ void tipc_named_node_up(u32 dnode) __skb_queue_head_init(&head); - read_lock_bh(&tipc_nametbl_lock); - named_distribute(&head, dnode, &publ_cluster); - named_distribute(&head, dnode, &publ_zone); - read_unlock_bh(&tipc_nametbl_lock); + rcu_read_lock(); + named_distribute(&head, dnode, + &tipc_nametbl->publ_list[TIPC_CLUSTER_SCOPE]); + named_distribute(&head, dnode, + &tipc_nametbl->publ_list[TIPC_ZONE_SCOPE]); + rcu_read_unlock(); tipc_link_xmit(&head, dnode, dnode); } @@ -290,12 +260,12 @@ static void tipc_publ_purge(struct publication *publ, u32 addr) { struct publication *p; - write_lock_bh(&tipc_nametbl_lock); + spin_lock_bh(&tipc_nametbl_lock); p = tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node, publ->ref, publ->key); if (p) tipc_publ_unsubscribe(p, addr); - write_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tipc_nametbl_lock); if (p != publ) { pr_err("Unable to remove publication from failed node\n" @@ -304,7 +274,7 @@ static void tipc_publ_purge(struct publication *publ, u32 addr) publ->key); } - kfree(p); + kfree_rcu(p, rcu); } void tipc_publ_notify(struct list_head *nsub_list, u32 addr) @@ -341,7 +311,7 @@ static bool tipc_update_nametbl(struct distr_item *i, u32 node, u32 dtype) ntohl(i->key)); if (publ) { tipc_publ_unsubscribe(publ, node); - kfree(publ); + kfree_rcu(publ, rcu); return true; } } else { @@ -406,14 +376,14 @@ void tipc_named_rcv(struct sk_buff *buf) u32 count = msg_data_sz(msg) / ITEM_SIZE; u32 node = msg_orignode(msg); - write_lock_bh(&tipc_nametbl_lock); + spin_lock_bh(&tipc_nametbl_lock); while (count--) { if (!tipc_update_nametbl(item, node, msg_type(msg))) tipc_named_add_backlog(item, msg_type(msg), node); item++; } tipc_named_process_backlog(); - write_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tipc_nametbl_lock); kfree_skb(buf); } @@ -429,11 +399,12 @@ void tipc_named_reinit(void) struct publication *publ; int scope; - write_lock_bh(&tipc_nametbl_lock); + spin_lock_bh(&tipc_nametbl_lock); for (scope = TIPC_ZONE_SCOPE; scope <= TIPC_NODE_SCOPE; scope++) - list_for_each_entry(publ, &publ_lists[scope]->list, local_list) + list_for_each_entry_rcu(publ, &tipc_nametbl->publ_list[scope], + local_list) publ->node = tipc_own_addr; - write_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tipc_nametbl_lock); } diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 772be1cd8bf6..aafa684c4db9 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -2,7 +2,7 @@ * net/tipc/name_table.c: TIPC name table code * * Copyright (c) 2000-2006, 2014, Ericsson AB - * Copyright (c) 2004-2008, 2010-2011, Wind River Systems + * Copyright (c) 2004-2008, 2010-2014, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -92,6 +92,7 @@ struct sub_seq { * @ns_list: links to adjacent name sequences in hash chain * @subscriptions: list of subscriptions for this 'type' * @lock: spinlock controlling access to publication lists of all sub-sequences + * @rcu: RCU callback head used for deferred freeing */ struct name_seq { u32 type; @@ -101,21 +102,11 @@ struct name_seq { struct hlist_node ns_list; struct list_head subscriptions; spinlock_t lock; + struct rcu_head rcu; }; -/** - * struct name_table - table containing all existing port name publications - * @types: pointer to fixed-sized array of name sequence lists, - * accessed via hashing on 'type'; name sequence lists are *not* sorted - * @local_publ_count: number of publications issued by this node - */ -struct name_table { - struct hlist_head *types; - u32 local_publ_count; -}; - -static struct name_table table; -DEFINE_RWLOCK(tipc_nametbl_lock); +struct name_table *tipc_nametbl; +DEFINE_SPINLOCK(tipc_nametbl_lock); static int hash(int x) { @@ -142,9 +133,7 @@ static struct publication *publ_create(u32 type, u32 lower, u32 upper, publ->node = node; publ->ref = port_ref; publ->key = key; - INIT_LIST_HEAD(&publ->local_list); INIT_LIST_HEAD(&publ->pport_list); - INIT_LIST_HEAD(&publ->nodesub_list); return publ; } @@ -179,22 +168,10 @@ static struct name_seq *tipc_nameseq_create(u32 type, struct hlist_head *seq_hea nseq->alloc = 1; INIT_HLIST_NODE(&nseq->ns_list); INIT_LIST_HEAD(&nseq->subscriptions); - hlist_add_head(&nseq->ns_list, seq_head); + hlist_add_head_rcu(&nseq->ns_list, seq_head); return nseq; } -/* - * nameseq_delete_empty - deletes a name sequence structure if now unused - */ -static void nameseq_delete_empty(struct name_seq *seq) -{ - if (!seq->first_free && list_empty(&seq->subscriptions)) { - hlist_del_init(&seq->ns_list); - kfree(seq->sseqs); - kfree(seq); - } -} - /** * nameseq_find_subseq - find sub-sequence (if any) matching a name instance * @@ -475,8 +452,8 @@ static struct name_seq *nametbl_find_seq(u32 type) struct hlist_head *seq_head; struct name_seq *ns; - seq_head = &table.types[hash(type)]; - hlist_for_each_entry(ns, seq_head, ns_list) { + seq_head = &tipc_nametbl->seq_hlist[hash(type)]; + hlist_for_each_entry_rcu(ns, seq_head, ns_list) { if (ns->type == type) return ns; } @@ -487,7 +464,9 @@ static struct name_seq *nametbl_find_seq(u32 type) struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper, u32 scope, u32 node, u32 port, u32 key) { + struct publication *publ; struct name_seq *seq = nametbl_find_seq(type); + int index = hash(type); if ((scope < TIPC_ZONE_SCOPE) || (scope > TIPC_NODE_SCOPE) || (lower > upper)) { @@ -497,12 +476,16 @@ struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper, } if (!seq) - seq = tipc_nameseq_create(type, &table.types[hash(type)]); + seq = tipc_nameseq_create(type, + &tipc_nametbl->seq_hlist[index]); if (!seq) return NULL; - return tipc_nameseq_insert_publ(seq, type, lower, upper, + spin_lock_bh(&seq->lock); + publ = tipc_nameseq_insert_publ(seq, type, lower, upper, scope, node, port, key); + spin_unlock_bh(&seq->lock); + return publ; } struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, @@ -514,8 +497,16 @@ struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, if (!seq) return NULL; + spin_lock_bh(&seq->lock); publ = tipc_nameseq_remove_publ(seq, lower, node, ref, key); - nameseq_delete_empty(seq); + if (!seq->first_free && list_empty(&seq->subscriptions)) { + hlist_del_init_rcu(&seq->ns_list); + kfree(seq->sseqs); + spin_unlock_bh(&seq->lock); + kfree_rcu(seq, rcu); + return publ; + } + spin_unlock_bh(&seq->lock); return publ; } @@ -544,14 +535,14 @@ u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode) if (!tipc_in_scope(*destnode, tipc_own_addr)) return 0; - read_lock_bh(&tipc_nametbl_lock); + rcu_read_lock(); seq = nametbl_find_seq(type); if (unlikely(!seq)) goto not_found; + spin_lock_bh(&seq->lock); sseq = nameseq_find_subseq(seq, instance); if (unlikely(!sseq)) - goto not_found; - spin_lock_bh(&seq->lock); + goto no_match; info = sseq->info; /* Closest-First Algorithm */ @@ -601,7 +592,7 @@ u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode) no_match: spin_unlock_bh(&seq->lock); not_found: - read_unlock_bh(&tipc_nametbl_lock); + rcu_read_unlock(); *destnode = node; return ref; } @@ -627,13 +618,12 @@ int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, struct name_info *info; int res = 0; - read_lock_bh(&tipc_nametbl_lock); + rcu_read_lock(); seq = nametbl_find_seq(type); if (!seq) goto exit; spin_lock_bh(&seq->lock); - sseq = seq->sseqs + nameseq_locate_subseq(seq, lower); sseq_stop = seq->sseqs + seq->first_free; for (; sseq != sseq_stop; sseq++) { @@ -651,10 +641,9 @@ int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, if (info->cluster_list_size != info->node_list_size) res = 1; } - spin_unlock_bh(&seq->lock); exit: - read_unlock_bh(&tipc_nametbl_lock); + rcu_read_unlock(); return res; } @@ -667,22 +656,23 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, struct publication *publ; struct sk_buff *buf = NULL; - if (table.local_publ_count >= TIPC_MAX_PUBLICATIONS) { + spin_lock_bh(&tipc_nametbl_lock); + if (tipc_nametbl->local_publ_count >= TIPC_MAX_PUBLICATIONS) { pr_warn("Publication failed, local publication limit reached (%u)\n", TIPC_MAX_PUBLICATIONS); + spin_unlock_bh(&tipc_nametbl_lock); return NULL; } - write_lock_bh(&tipc_nametbl_lock); publ = tipc_nametbl_insert_publ(type, lower, upper, scope, tipc_own_addr, port_ref, key); if (likely(publ)) { - table.local_publ_count++; + tipc_nametbl->local_publ_count++; buf = tipc_named_publish(publ); /* Any pending external events? */ tipc_named_process_backlog(); } - write_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tipc_nametbl_lock); if (buf) named_cluster_distribute(buf); @@ -695,27 +685,28 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key) { struct publication *publ; - struct sk_buff *buf; + struct sk_buff *skb = NULL; - write_lock_bh(&tipc_nametbl_lock); + spin_lock_bh(&tipc_nametbl_lock); publ = tipc_nametbl_remove_publ(type, lower, tipc_own_addr, ref, key); if (likely(publ)) { - table.local_publ_count--; - buf = tipc_named_withdraw(publ); + tipc_nametbl->local_publ_count--; + skb = tipc_named_withdraw(publ); /* Any pending external events? */ tipc_named_process_backlog(); - write_unlock_bh(&tipc_nametbl_lock); list_del_init(&publ->pport_list); - kfree(publ); + kfree_rcu(publ, rcu); + } else { + pr_err("Unable to remove local publication\n" + "(type=%u, lower=%u, ref=%u, key=%u)\n", + type, lower, ref, key); + } + spin_unlock_bh(&tipc_nametbl_lock); - if (buf) - named_cluster_distribute(buf); + if (skb) { + named_cluster_distribute(skb); return 1; } - write_unlock_bh(&tipc_nametbl_lock); - pr_err("Unable to remove local publication\n" - "(type=%u, lower=%u, ref=%u, key=%u)\n", - type, lower, ref, key); return 0; } @@ -725,12 +716,14 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key) void tipc_nametbl_subscribe(struct tipc_subscription *s) { u32 type = s->seq.type; + int index = hash(type); struct name_seq *seq; - write_lock_bh(&tipc_nametbl_lock); + spin_lock_bh(&tipc_nametbl_lock); seq = nametbl_find_seq(type); if (!seq) - seq = tipc_nameseq_create(type, &table.types[hash(type)]); + seq = tipc_nameseq_create(type, + &tipc_nametbl->seq_hlist[index]); if (seq) { spin_lock_bh(&seq->lock); tipc_nameseq_subscribe(seq, s); @@ -739,7 +732,7 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s) pr_warn("Failed to create subscription for {%u,%u,%u}\n", s->seq.type, s->seq.lower, s->seq.upper); } - write_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tipc_nametbl_lock); } /** @@ -749,18 +742,23 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s) { struct name_seq *seq; - write_lock_bh(&tipc_nametbl_lock); + spin_lock_bh(&tipc_nametbl_lock); seq = nametbl_find_seq(s->seq.type); if (seq != NULL) { spin_lock_bh(&seq->lock); list_del_init(&s->nameseq_list); - spin_unlock_bh(&seq->lock); - nameseq_delete_empty(seq); + if (!seq->first_free && list_empty(&seq->subscriptions)) { + hlist_del_init_rcu(&seq->ns_list); + kfree(seq->sseqs); + spin_unlock_bh(&seq->lock); + kfree_rcu(seq, rcu); + } else { + spin_unlock_bh(&seq->lock); + } } - write_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tipc_nametbl_lock); } - /** * subseq_list - print specified sub-sequence contents into the given buffer */ @@ -882,8 +880,8 @@ static int nametbl_list(char *buf, int len, u32 depth_info, lowbound = 0; upbound = ~0; for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { - seq_head = &table.types[i]; - hlist_for_each_entry(seq, seq_head, ns_list) { + seq_head = &tipc_nametbl->seq_hlist[i]; + hlist_for_each_entry_rcu(seq, seq_head, ns_list) { ret += nameseq_list(seq, buf + ret, len - ret, depth, seq->type, lowbound, upbound, i); @@ -898,8 +896,8 @@ static int nametbl_list(char *buf, int len, u32 depth_info, } ret += nametbl_header(buf + ret, len - ret, depth); i = hash(type); - seq_head = &table.types[i]; - hlist_for_each_entry(seq, seq_head, ns_list) { + seq_head = &tipc_nametbl->seq_hlist[i]; + hlist_for_each_entry_rcu(seq, seq_head, ns_list) { if (seq->type == type) { ret += nameseq_list(seq, buf + ret, len - ret, depth, type, @@ -931,11 +929,11 @@ struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space) pb = TLV_DATA(rep_tlv); pb_len = ULTRA_STRING_MAX_LEN; argv = (struct tipc_name_table_query *)TLV_DATA(req_tlv_area); - read_lock_bh(&tipc_nametbl_lock); + rcu_read_lock(); str_len = nametbl_list(pb, pb_len, ntohl(argv->depth), ntohl(argv->type), ntohl(argv->lowbound), ntohl(argv->upbound)); - read_unlock_bh(&tipc_nametbl_lock); + rcu_read_unlock(); str_len += 1; /* for "\0" */ skb_put(buf, TLV_SPACE(str_len)); TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len); @@ -945,12 +943,18 @@ struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space) int tipc_nametbl_init(void) { - table.types = kcalloc(TIPC_NAMETBL_SIZE, sizeof(struct hlist_head), - GFP_ATOMIC); - if (!table.types) + int i; + + tipc_nametbl = kzalloc(sizeof(*tipc_nametbl), GFP_ATOMIC); + if (!tipc_nametbl) return -ENOMEM; - table.local_publ_count = 0; + for (i = 0; i < TIPC_NAMETBL_SIZE; i++) + INIT_HLIST_HEAD(&tipc_nametbl->seq_hlist[i]); + + INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_ZONE_SCOPE]); + INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_CLUSTER_SCOPE]); + INIT_LIST_HEAD(&tipc_nametbl->publ_list[TIPC_NODE_SCOPE]); return 0; } @@ -965,17 +969,19 @@ static void tipc_purge_publications(struct name_seq *seq) struct sub_seq *sseq; struct name_info *info; - if (!seq->sseqs) { - nameseq_delete_empty(seq); - return; - } + spin_lock_bh(&seq->lock); sseq = seq->sseqs; info = sseq->info; list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) { tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node, publ->ref, publ->key); - kfree(publ); + kfree_rcu(publ, rcu); } + hlist_del_init_rcu(&seq->ns_list); + kfree(seq->sseqs); + spin_lock_bh(&seq->lock); + + kfree_rcu(seq, rcu); } void tipc_nametbl_stop(void) @@ -983,23 +989,24 @@ void tipc_nametbl_stop(void) u32 i; struct name_seq *seq; struct hlist_head *seq_head; - struct hlist_node *safe; /* Verify name table is empty and purge any lingering * publications, then release the name table */ - write_lock_bh(&tipc_nametbl_lock); + spin_lock_bh(&tipc_nametbl_lock); for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { - if (hlist_empty(&table.types[i])) + if (hlist_empty(&tipc_nametbl->seq_hlist[i])) continue; - seq_head = &table.types[i]; - hlist_for_each_entry_safe(seq, safe, seq_head, ns_list) { + seq_head = &tipc_nametbl->seq_hlist[i]; + hlist_for_each_entry_rcu(seq, seq_head, ns_list) { tipc_purge_publications(seq); } } - kfree(table.types); - table.types = NULL; - write_unlock_bh(&tipc_nametbl_lock); + spin_unlock_bh(&tipc_nametbl_lock); + + synchronize_net(); + kfree(tipc_nametbl); + } static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, @@ -1103,7 +1110,7 @@ static int __tipc_nl_seq_list(struct tipc_nl_msg *msg, u32 *last_type, u32 *last_lower, u32 *last_publ) { struct hlist_head *seq_head; - struct name_seq *seq; + struct name_seq *seq = NULL; int err; int i; @@ -1113,22 +1120,21 @@ static int __tipc_nl_seq_list(struct tipc_nl_msg *msg, u32 *last_type, i = 0; for (; i < TIPC_NAMETBL_SIZE; i++) { - seq_head = &table.types[i]; + seq_head = &tipc_nametbl->seq_hlist[i]; if (*last_type) { seq = nametbl_find_seq(*last_type); if (!seq) return -EPIPE; } else { - seq = hlist_entry_safe((seq_head)->first, - struct name_seq, ns_list); + hlist_for_each_entry_rcu(seq, seq_head, ns_list) + break; if (!seq) continue; } - hlist_for_each_entry_from(seq, ns_list) { + hlist_for_each_entry_from_rcu(seq, ns_list) { spin_lock_bh(&seq->lock); - err = __tipc_nl_subseq_list(msg, seq, last_lower, last_publ); @@ -1160,8 +1166,7 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb) msg.portid = NETLINK_CB(cb->skb).portid; msg.seq = cb->nlh->nlmsg_seq; - read_lock_bh(&tipc_nametbl_lock); - + rcu_read_lock(); err = __tipc_nl_seq_list(&msg, &last_type, &last_lower, &last_publ); if (!err) { done = 1; @@ -1174,8 +1179,7 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb) */ cb->prev_seq = 1; } - - read_unlock_bh(&tipc_nametbl_lock); + rcu_read_unlock(); cb->args[0] = last_type; cb->args[1] = last_lower; diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index c62877826655..5f0dee92010d 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -43,7 +43,9 @@ struct tipc_port_list; /* * TIPC name types reserved for internal TIPC use (both current and planned) */ -#define TIPC_ZM_SRV 3 /* zone master service name type */ +#define TIPC_ZM_SRV 3 /* zone master service name type */ +#define TIPC_PUBL_SCOPE_NUM (TIPC_NODE_SCOPE + 1) +#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */ /** * struct publication - info about a published (name or) name sequence @@ -60,6 +62,7 @@ struct tipc_port_list; * @node_list: adjacent matching name seq publications with >= node scope * @cluster_list: adjacent matching name seq publications with >= cluster scope * @zone_list: adjacent matching name seq publications with >= zone scope + * @rcu: RCU callback head used for deferred freeing * * Note that the node list, cluster list, and zone list are circular lists. */ @@ -77,10 +80,23 @@ struct publication { struct list_head node_list; struct list_head cluster_list; struct list_head zone_list; + struct rcu_head rcu; }; +/** + * struct name_table - table containing all existing port name publications + * @seq_hlist: name sequence hash lists + * @publ_list: pulication lists + * @local_publ_count: number of publications issued by this node + */ +struct name_table { + struct hlist_head seq_hlist[TIPC_NAMETBL_SIZE]; + struct list_head publ_list[TIPC_PUBL_SCOPE_NUM]; + u32 local_publ_count; +}; -extern rwlock_t tipc_nametbl_lock; +extern spinlock_t tipc_nametbl_lock; +extern struct name_table *tipc_nametbl; int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 31b5cb232a43..0344206b984f 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -305,7 +305,6 @@ static int subscr_subscribe(struct tipc_subscr *s, kfree(sub); return -EINVAL; } - INIT_LIST_HEAD(&sub->nameseq_list); list_add(&sub->subscription_list, &subscriber->subscription_list); sub->subscriber = subscriber; sub->swap = swap; |