diff options
36 files changed, 1640 insertions, 162 deletions
diff --git a/Documentation/devicetree/bindings/net/nixge.txt b/Documentation/devicetree/bindings/net/nixge.txt new file mode 100644 index 000000000000..e55af7f0881a --- /dev/null +++ b/Documentation/devicetree/bindings/net/nixge.txt @@ -0,0 +1,32 @@ +* NI XGE Ethernet controller + +Required properties: +- compatible: Should be "ni,xge-enet-2.00" +- reg: Address and length of the register set for the device +- interrupts: Should contain tx and rx interrupt +- interrupt-names: Should be "rx" and "tx" +- phy-mode: See ethernet.txt file in the same directory. +- phy-handle: See ethernet.txt file in the same directory. +- nvmem-cells: Phandle of nvmem cell containing the MAC address +- nvmem-cell-names: Should be "address" + +Examples (10G generic PHY): + nixge0: ethernet@40000000 { + compatible = "ni,xge-enet-2.00"; + reg = <0x40000000 0x6000>; + + nvmem-cells = <ð1_addr>; + nvmem-cell-names = "address"; + + interrupts = <0 29 IRQ_TYPE_LEVEL_HIGH>, <0 30 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "rx", "tx"; + interrupt-parent = <&intc>; + + phy-mode = "xgmii"; + phy-handle = <ðernet_phy1>; + + ethernet_phy1: ethernet-phy@4 { + compatible = "ethernet-phy-ieee802.3-c45"; + reg = <4>; + }; + }; diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index 5a52ec77940a..cc2966380c0c 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -403,10 +403,12 @@ static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev, * our feet */ rtnl_lock(); + down_read(&net_rwsem); for_each_net(net) for_each_netdev(net, ndev) if (is_eth_port_of_netdev(ib_dev, port, rdma_ndev, ndev)) add_netdev_ips(ib_dev, port, rdma_ndev, ndev); + up_read(&net_rwsem); rtnl_unlock(); } diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index b6cf4b6962f5..908218561fdd 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -129,6 +129,7 @@ config FEALNX source "drivers/net/ethernet/natsemi/Kconfig" source "drivers/net/ethernet/netronome/Kconfig" +source "drivers/net/ethernet/ni/Kconfig" source "drivers/net/ethernet/8390/Kconfig" config NET_NETX diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index 3cdf01e96e0b..d732e9522b76 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -61,6 +61,7 @@ obj-$(CONFIG_NET_VENDOR_MYRI) += myricom/ obj-$(CONFIG_FEALNX) += fealnx.o obj-$(CONFIG_NET_VENDOR_NATSEMI) += natsemi/ obj-$(CONFIG_NET_VENDOR_NETRONOME) += netronome/ +obj-$(CONFIG_NET_VENDOR_NI) += ni/ obj-$(CONFIG_NET_NETX) += netx-eth.o obj-$(CONFIG_NET_VENDOR_NUVOTON) += nuvoton/ obj-$(CONFIG_NET_VENDOR_NVIDIA) += nvidia/ diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 8eef9fb6b1fe..2326cc219c46 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1190,7 +1190,7 @@ static int bgmac_open(struct net_device *net_dev) bgmac_chip_init(bgmac); err = request_irq(bgmac->irq, bgmac_interrupt, IRQF_SHARED, - KBUILD_MODNAME, net_dev); + net_dev->name, net_dev); if (err < 0) { dev_err(bgmac->dev, "IRQ request error: %d!\n", err); bgmac_dma_cleanup(bgmac); @@ -1492,6 +1492,8 @@ int bgmac_enet_probe(struct bgmac *bgmac) struct net_device *net_dev = bgmac->net_dev; int err; + bgmac_chip_intrs_off(bgmac); + net_dev->irq = bgmac->irq; SET_NETDEV_DEV(net_dev, bgmac->dev); dev_set_drvdata(bgmac->dev, bgmac); diff --git a/drivers/net/ethernet/ni/Kconfig b/drivers/net/ethernet/ni/Kconfig new file mode 100644 index 000000000000..aa41e5f6e437 --- /dev/null +++ b/drivers/net/ethernet/ni/Kconfig @@ -0,0 +1,27 @@ +# +# National Instuments network device configuration +# + +config NET_VENDOR_NI + bool "National Instruments Devices" + default y + help + If you have a network (Ethernet) device belonging to this class, say Y. + + Note that the answer to this question doesn't directly affect the + kernel: saying N will just cause the configurator to skip all + the questions about National Instrument devices. + If you say Y, you will be asked for your specific device in the + following questions. + +if NET_VENDOR_NI + +config NI_XGE_MANAGEMENT_ENET + tristate "National Instruments XGE management enet support" + depends on ARCH_ZYNQ + select PHYLIB + help + Simple LAN device for debug or management purposes. Can + support either 10G or 1G PHYs via SFP+ ports. + +endif diff --git a/drivers/net/ethernet/ni/Makefile b/drivers/net/ethernet/ni/Makefile new file mode 100644 index 000000000000..99c664651c51 --- /dev/null +++ b/drivers/net/ethernet/ni/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_NI_XGE_MANAGEMENT_ENET) += nixge.o diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c new file mode 100644 index 000000000000..27364b7572fc --- /dev/null +++ b/drivers/net/ethernet/ni/nixge.c @@ -0,0 +1,1310 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2016-2017, National Instruments Corp. + * + * Author: Moritz Fischer <[email protected]> + */ + +#include <linux/etherdevice.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/of_address.h> +#include <linux/of_mdio.h> +#include <linux/of_net.h> +#include <linux/of_platform.h> +#include <linux/of_irq.h> +#include <linux/skbuff.h> +#include <linux/phy.h> +#include <linux/mii.h> +#include <linux/nvmem-consumer.h> +#include <linux/ethtool.h> +#include <linux/iopoll.h> + +#define TX_BD_NUM 64 +#define RX_BD_NUM 128 + +/* Axi DMA Register definitions */ +#define XAXIDMA_TX_CR_OFFSET 0x00 /* Channel control */ +#define XAXIDMA_TX_SR_OFFSET 0x04 /* Status */ +#define XAXIDMA_TX_CDESC_OFFSET 0x08 /* Current descriptor pointer */ +#define XAXIDMA_TX_TDESC_OFFSET 0x10 /* Tail descriptor pointer */ + +#define XAXIDMA_RX_CR_OFFSET 0x30 /* Channel control */ +#define XAXIDMA_RX_SR_OFFSET 0x34 /* Status */ +#define XAXIDMA_RX_CDESC_OFFSET 0x38 /* Current descriptor pointer */ +#define XAXIDMA_RX_TDESC_OFFSET 0x40 /* Tail descriptor pointer */ + +#define XAXIDMA_CR_RUNSTOP_MASK 0x1 /* Start/stop DMA channel */ +#define XAXIDMA_CR_RESET_MASK 0x4 /* Reset DMA engine */ + +#define XAXIDMA_BD_CTRL_LENGTH_MASK 0x007FFFFF /* Requested len */ +#define XAXIDMA_BD_CTRL_TXSOF_MASK 0x08000000 /* First tx packet */ +#define XAXIDMA_BD_CTRL_TXEOF_MASK 0x04000000 /* Last tx packet */ +#define XAXIDMA_BD_CTRL_ALL_MASK 0x0C000000 /* All control bits */ + +#define XAXIDMA_DELAY_MASK 0xFF000000 /* Delay timeout counter */ +#define XAXIDMA_COALESCE_MASK 0x00FF0000 /* Coalesce counter */ + +#define XAXIDMA_DELAY_SHIFT 24 +#define XAXIDMA_COALESCE_SHIFT 16 + +#define XAXIDMA_IRQ_IOC_MASK 0x00001000 /* Completion intr */ +#define XAXIDMA_IRQ_DELAY_MASK 0x00002000 /* Delay interrupt */ +#define XAXIDMA_IRQ_ERROR_MASK 0x00004000 /* Error interrupt */ +#define XAXIDMA_IRQ_ALL_MASK 0x00007000 /* All interrupts */ + +/* Default TX/RX Threshold and waitbound values for SGDMA mode */ +#define XAXIDMA_DFT_TX_THRESHOLD 24 +#define XAXIDMA_DFT_TX_WAITBOUND 254 +#define XAXIDMA_DFT_RX_THRESHOLD 24 +#define XAXIDMA_DFT_RX_WAITBOUND 254 + +#define XAXIDMA_BD_STS_ACTUAL_LEN_MASK 0x007FFFFF /* Actual len */ +#define XAXIDMA_BD_STS_COMPLETE_MASK 0x80000000 /* Completed */ +#define XAXIDMA_BD_STS_DEC_ERR_MASK 0x40000000 /* Decode error */ +#define XAXIDMA_BD_STS_SLV_ERR_MASK 0x20000000 /* Slave error */ +#define XAXIDMA_BD_STS_INT_ERR_MASK 0x10000000 /* Internal err */ +#define XAXIDMA_BD_STS_ALL_ERR_MASK 0x70000000 /* All errors */ +#define XAXIDMA_BD_STS_RXSOF_MASK 0x08000000 /* First rx pkt */ +#define XAXIDMA_BD_STS_RXEOF_MASK 0x04000000 /* Last rx pkt */ +#define XAXIDMA_BD_STS_ALL_MASK 0xFC000000 /* All status bits */ + +#define NIXGE_REG_CTRL_OFFSET 0x4000 +#define NIXGE_REG_INFO 0x00 +#define NIXGE_REG_MAC_CTL 0x04 +#define NIXGE_REG_PHY_CTL 0x08 +#define NIXGE_REG_LED_CTL 0x0c +#define NIXGE_REG_MDIO_DATA 0x10 +#define NIXGE_REG_MDIO_ADDR 0x14 +#define NIXGE_REG_MDIO_OP 0x18 +#define NIXGE_REG_MDIO_CTRL 0x1c + +#define NIXGE_ID_LED_CTL_EN BIT(0) +#define NIXGE_ID_LED_CTL_VAL BIT(1) + +#define NIXGE_MDIO_CLAUSE45 BIT(12) +#define NIXGE_MDIO_CLAUSE22 0 +#define NIXGE_MDIO_OP(n) (((n) & 0x3) << 10) +#define NIXGE_MDIO_OP_ADDRESS 0 +#define NIXGE_MDIO_C45_WRITE BIT(0) +#define NIXGE_MDIO_C45_READ (BIT(1) | BIT(0)) +#define NIXGE_MDIO_C22_WRITE BIT(0) +#define NIXGE_MDIO_C22_READ BIT(1) +#define NIXGE_MDIO_ADDR(n) (((n) & 0x1f) << 5) +#define NIXGE_MDIO_MMD(n) (((n) & 0x1f) << 0) + +#define NIXGE_REG_MAC_LSB 0x1000 +#define NIXGE_REG_MAC_MSB 0x1004 + +/* Packet size info */ +#define NIXGE_HDR_SIZE 14 /* Size of Ethernet header */ +#define NIXGE_TRL_SIZE 4 /* Size of Ethernet trailer (FCS) */ +#define NIXGE_MTU 1500 /* Max MTU of an Ethernet frame */ +#define NIXGE_JUMBO_MTU 9000 /* Max MTU of a jumbo Eth. frame */ + +#define NIXGE_MAX_FRAME_SIZE (NIXGE_MTU + NIXGE_HDR_SIZE + NIXGE_TRL_SIZE) +#define NIXGE_MAX_JUMBO_FRAME_SIZE \ + (NIXGE_JUMBO_MTU + NIXGE_HDR_SIZE + NIXGE_TRL_SIZE) + +struct nixge_hw_dma_bd { + u32 next; + u32 reserved1; + u32 phys; + u32 reserved2; + u32 reserved3; + u32 reserved4; + u32 cntrl; + u32 status; + u32 app0; + u32 app1; + u32 app2; + u32 app3; + u32 app4; + u32 sw_id_offset; + u32 reserved5; + u32 reserved6; +}; + +struct nixge_tx_skb { + struct sk_buff *skb; + dma_addr_t mapping; + size_t size; + bool mapped_as_page; +}; + +struct nixge_priv { + struct net_device *ndev; + struct napi_struct napi; + struct device *dev; + + /* Connection to PHY device */ + struct device_node *phy_node; + phy_interface_t phy_mode; + + int link; + unsigned int speed; + unsigned int duplex; + + /* MDIO bus data */ + struct mii_bus *mii_bus; /* MII bus reference */ + + /* IO registers, dma functions and IRQs */ + void __iomem *ctrl_regs; + void __iomem *dma_regs; + + struct tasklet_struct dma_err_tasklet; + + int tx_irq; + int rx_irq; + u32 last_link; + + /* Buffer descriptors */ + struct nixge_hw_dma_bd *tx_bd_v; + struct nixge_tx_skb *tx_skb; + dma_addr_t tx_bd_p; + + struct nixge_hw_dma_bd *rx_bd_v; + dma_addr_t rx_bd_p; + u32 tx_bd_ci; + u32 tx_bd_tail; + u32 rx_bd_ci; + + u32 coalesce_count_rx; + u32 coalesce_count_tx; +}; + +static void nixge_dma_write_reg(struct nixge_priv *priv, off_t offset, u32 val) +{ + writel(val, priv->dma_regs + offset); +} + +static u32 nixge_dma_read_reg(const struct nixge_priv *priv, off_t offset) +{ + return readl(priv->dma_regs + offset); +} + +static void nixge_ctrl_write_reg(struct nixge_priv *priv, off_t offset, u32 val) +{ + writel(val, priv->ctrl_regs + offset); +} + +static u32 nixge_ctrl_read_reg(struct nixge_priv *priv, off_t offset) +{ + return readl(priv->ctrl_regs + offset); +} + +#define nixge_ctrl_poll_timeout(priv, addr, val, cond, sleep_us, timeout_us) \ + readl_poll_timeout((priv)->ctrl_regs + (addr), (val), (cond), \ + (sleep_us), (timeout_us)) + +#define nixge_dma_poll_timeout(priv, addr, val, cond, sleep_us, timeout_us) \ + readl_poll_timeout((priv)->dma_regs + (addr), (val), (cond), \ + (sleep_us), (timeout_us)) + +static void nixge_hw_dma_bd_release(struct net_device *ndev) +{ + struct nixge_priv *priv = netdev_priv(ndev); + int i; + + for (i = 0; i < RX_BD_NUM; i++) { + dma_unmap_single(ndev->dev.parent, priv->rx_bd_v[i].phys, + NIXGE_MAX_JUMBO_FRAME_SIZE, DMA_FROM_DEVICE); + dev_kfree_skb((struct sk_buff *) + (priv->rx_bd_v[i].sw_id_offset)); + } + + if (priv->rx_bd_v) + dma_free_coherent(ndev->dev.parent, + sizeof(*priv->rx_bd_v) * RX_BD_NUM, + priv->rx_bd_v, + priv->rx_bd_p); + + if (priv->tx_skb) + devm_kfree(ndev->dev.parent, priv->tx_skb); + + if (priv->tx_bd_v) + dma_free_coherent(ndev->dev.parent, + sizeof(*priv->tx_bd_v) * TX_BD_NUM, + priv->tx_bd_v, + priv->tx_bd_p); +} + +static int nixge_hw_dma_bd_init(struct net_device *ndev) +{ + struct nixge_priv *priv = netdev_priv(ndev); + struct sk_buff *skb; + u32 cr; + int i; + + /* Reset the indexes which are used for accessing the BDs */ + priv->tx_bd_ci = 0; + priv->tx_bd_tail = 0; + priv->rx_bd_ci = 0; + + /* Allocate the Tx and Rx buffer descriptors. */ + priv->tx_bd_v = dma_zalloc_coherent(ndev->dev.parent, + sizeof(*priv->tx_bd_v) * TX_BD_NUM, + &priv->tx_bd_p, GFP_KERNEL); + if (!priv->tx_bd_v) + goto out; + + priv->tx_skb = devm_kzalloc(ndev->dev.parent, + sizeof(*priv->tx_skb) * + TX_BD_NUM, + GFP_KERNEL); + if (!priv->tx_skb) + goto out; + + priv->rx_bd_v = dma_zalloc_coherent(ndev->dev.parent, + sizeof(*priv->rx_bd_v) * RX_BD_NUM, + &priv->rx_bd_p, GFP_KERNEL); + if (!priv->rx_bd_v) + goto out; + + for (i = 0; i < TX_BD_NUM; i++) { + priv->tx_bd_v[i].next = priv->tx_bd_p + + sizeof(*priv->tx_bd_v) * + ((i + 1) % TX_BD_NUM); + } + + for (i = 0; i < RX_BD_NUM; i++) { + priv->rx_bd_v[i].next = priv->rx_bd_p + + sizeof(*priv->rx_bd_v) * + ((i + 1) % RX_BD_NUM); + + skb = netdev_alloc_skb_ip_align(ndev, + NIXGE_MAX_JUMBO_FRAME_SIZE); + if (!skb) + goto out; + + priv->rx_bd_v[i].sw_id_offset = (u32)skb; + priv->rx_bd_v[i].phys = + dma_map_single(ndev->dev.parent, + skb->data, + NIXGE_MAX_JUMBO_FRAME_SIZE, + DMA_FROM_DEVICE); + priv->rx_bd_v[i].cntrl = NIXGE_MAX_JUMBO_FRAME_SIZE; + } + + /* Start updating the Rx channel control register */ + cr = nixge_dma_read_reg(priv, XAXIDMA_RX_CR_OFFSET); + /* Update the interrupt coalesce count */ + cr = ((cr & ~XAXIDMA_COALESCE_MASK) | + ((priv->coalesce_count_rx) << XAXIDMA_COALESCE_SHIFT)); + /* Update the delay timer count */ + cr = ((cr & ~XAXIDMA_DELAY_MASK) | + (XAXIDMA_DFT_RX_WAITBOUND << XAXIDMA_DELAY_SHIFT)); + /* Enable coalesce, delay timer and error interrupts */ + cr |= XAXIDMA_IRQ_ALL_MASK; + /* Write to the Rx channel control register */ + nixge_dma_write_reg(priv, XAXIDMA_RX_CR_OFFSET, cr); + + /* Start updating the Tx channel control register */ + cr = nixge_dma_read_reg(priv, XAXIDMA_TX_CR_OFFSET); + /* Update the interrupt coalesce count */ + cr = (((cr & ~XAXIDMA_COALESCE_MASK)) | + ((priv->coalesce_count_tx) << XAXIDMA_COALESCE_SHIFT)); + /* Update the delay timer count */ + cr = (((cr & ~XAXIDMA_DELAY_MASK)) | + (XAXIDMA_DFT_TX_WAITBOUND << XAXIDMA_DELAY_SHIFT)); + /* Enable coalesce, delay timer and error interrupts */ + cr |= XAXIDMA_IRQ_ALL_MASK; + /* Write to the Tx channel control register */ + nixge_dma_write_reg(priv, XAXIDMA_TX_CR_OFFSET, cr); + + /* Populate the tail pointer and bring the Rx Axi DMA engine out of + * halted state. This will make the Rx side ready for reception. + */ + nixge_dma_write_reg(priv, XAXIDMA_RX_CDESC_OFFSET, priv->rx_bd_p); + cr = nixge_dma_read_reg(priv, XAXIDMA_RX_CR_OFFSET); + nixge_dma_write_reg(priv, XAXIDMA_RX_CR_OFFSET, + cr | XAXIDMA_CR_RUNSTOP_MASK); + nixge_dma_write_reg(priv, XAXIDMA_RX_TDESC_OFFSET, priv->rx_bd_p + + (sizeof(*priv->rx_bd_v) * (RX_BD_NUM - 1))); + + /* Write to the RS (Run-stop) bit in the Tx channel control register. + * Tx channel is now ready to run. But only after we write to the + * tail pointer register that the Tx channel will start transmitting. + */ + nixge_dma_write_reg(priv, XAXIDMA_TX_CDESC_OFFSET, priv->tx_bd_p); + cr = nixge_dma_read_reg(priv, XAXIDMA_TX_CR_OFFSET); + nixge_dma_write_reg(priv, XAXIDMA_TX_CR_OFFSET, + cr | XAXIDMA_CR_RUNSTOP_MASK); + + return 0; +out: + nixge_hw_dma_bd_release(ndev); + return -ENOMEM; +} + +static void __nixge_device_reset(struct nixge_priv *priv, off_t offset) +{ + u32 status; + int err; + + /* Reset Axi DMA. This would reset NIXGE Ethernet core as well. + * The reset process of Axi DMA takes a while to complete as all + * pending commands/transfers will be flushed or completed during + * this reset process. + */ + nixge_dma_write_reg(priv, offset, XAXIDMA_CR_RESET_MASK); + err = nixge_dma_poll_timeout(priv, offset, status, + !(status & XAXIDMA_CR_RESET_MASK), 10, + 1000); + if (err) + netdev_err(priv->ndev, "%s: DMA reset timeout!\n", __func__); +} + +static void nixge_device_reset(struct net_device *ndev) +{ + struct nixge_priv *priv = netdev_priv(ndev); + + __nixge_device_reset(priv, XAXIDMA_TX_CR_OFFSET); + __nixge_device_reset(priv, XAXIDMA_RX_CR_OFFSET); + + if (nixge_hw_dma_bd_init(ndev)) + netdev_err(ndev, "%s: descriptor allocation failed\n", + __func__); + + netif_trans_update(ndev); +} + +static void nixge_handle_link_change(struct net_device *ndev) +{ + struct nixge_priv *priv = netdev_priv(ndev); + struct phy_device *phydev = ndev->phydev; + + if (phydev->link != priv->link || phydev->speed != priv->speed || + phydev->duplex != priv->duplex) { + priv->link = phydev->link; + priv->speed = phydev->speed; + priv->duplex = phydev->duplex; + phy_print_status(phydev); + } +} + +static void nixge_tx_skb_unmap(struct nixge_priv *priv, + struct nixge_tx_skb *tx_skb) +{ + if (tx_skb->mapping) { + if (tx_skb->mapped_as_page) + dma_unmap_page(priv->ndev->dev.parent, tx_skb->mapping, + tx_skb->size, DMA_TO_DEVICE); + else + dma_unmap_single(priv->ndev->dev.parent, + tx_skb->mapping, + tx_skb->size, DMA_TO_DEVICE); + tx_skb->mapping = 0; + } + + if (tx_skb->skb) { + dev_kfree_skb_any(tx_skb->skb); + tx_skb->skb = NULL; + } +} + +static void nixge_start_xmit_done(struct net_device *ndev) +{ + struct nixge_priv *priv = netdev_priv(ndev); + struct nixge_hw_dma_bd *cur_p; + struct nixge_tx_skb *tx_skb; + unsigned int status = 0; + u32 packets = 0; + u32 size = 0; + + cur_p = &priv->tx_bd_v[priv->tx_bd_ci]; + tx_skb = &priv->tx_skb[priv->tx_bd_ci]; + + status = cur_p->status; + + while (status & XAXIDMA_BD_STS_COMPLETE_MASK) { + nixge_tx_skb_unmap(priv, tx_skb); + cur_p->status = 0; + + size += status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK; + packets++; + + ++priv->tx_bd_ci; + priv->tx_bd_ci %= TX_BD_NUM; + cur_p = &priv->tx_bd_v[priv->tx_bd_ci]; + tx_skb = &priv->tx_skb[priv->tx_bd_ci]; + status = cur_p->status; + } + + ndev->stats.tx_packets += packets; + ndev->stats.tx_bytes += size; + + if (packets) + netif_wake_queue(ndev); +} + +static int nixge_check_tx_bd_space(struct nixge_priv *priv, + int num_frag) +{ + struct nixge_hw_dma_bd *cur_p; + + cur_p = &priv->tx_bd_v[(priv->tx_bd_tail + num_frag) % TX_BD_NUM]; + if (cur_p->status & XAXIDMA_BD_STS_ALL_MASK) + return NETDEV_TX_BUSY; + return 0; +} + +static int nixge_start_xmit(struct sk_buff *skb, struct net_device *ndev) +{ + struct nixge_priv *priv = netdev_priv(ndev); + struct nixge_hw_dma_bd *cur_p; + struct nixge_tx_skb *tx_skb; + dma_addr_t tail_p; + skb_frag_t *frag; + u32 num_frag; + u32 ii; + + num_frag = skb_shinfo(skb)->nr_frags; + cur_p = &priv->tx_bd_v[priv->tx_bd_tail]; + tx_skb = &priv->tx_skb[priv->tx_bd_tail]; + + if (nixge_check_tx_bd_space(priv, num_frag)) { + if (!netif_queue_stopped(ndev)) + netif_stop_queue(ndev); + return NETDEV_TX_OK; + } + + cur_p->phys = dma_map_single(ndev->dev.parent, skb->data, + skb_headlen(skb), DMA_TO_DEVICE); + if (dma_mapping_error(ndev->dev.parent, cur_p->phys)) + goto drop; + + cur_p->cntrl = skb_headlen(skb) | XAXIDMA_BD_CTRL_TXSOF_MASK; + + tx_skb->skb = NULL; + tx_skb->mapping = cur_p->phys; + tx_skb->size = skb_headlen(skb); + tx_skb->mapped_as_page = false; + + for (ii = 0; ii < num_frag; ii++) { + ++priv->tx_bd_tail; + priv->tx_bd_tail %= TX_BD_NUM; + cur_p = &priv->tx_bd_v[priv->tx_bd_tail]; + tx_skb = &priv->tx_skb[priv->tx_bd_tail]; + frag = &skb_shinfo(skb)->frags[ii]; + + cur_p->phys = skb_frag_dma_map(ndev->dev.parent, frag, 0, + skb_frag_size(frag), + DMA_TO_DEVICE); + if (dma_mapping_error(ndev->dev.parent, cur_p->phys)) + goto frag_err; + + cur_p->cntrl = skb_frag_size(frag); + + tx_skb->skb = NULL; + tx_skb->mapping = cur_p->phys; + tx_skb->size = skb_frag_size(frag); + tx_skb->mapped_as_page = true; + } + + /* last buffer of the frame */ + tx_skb->skb = skb; + + cur_p->cntrl |= XAXIDMA_BD_CTRL_TXEOF_MASK; + cur_p->app4 = (unsigned long)skb; + + tail_p = priv->tx_bd_p + sizeof(*priv->tx_bd_v) * priv->tx_bd_tail; + /* Start the transfer */ + nixge_dma_write_reg(priv, XAXIDMA_TX_TDESC_OFFSET, tail_p); + ++priv->tx_bd_tail; + priv->tx_bd_tail %= TX_BD_NUM; + + return NETDEV_TX_OK; +frag_err: + for (; ii > 0; ii--) { + if (priv->tx_bd_tail) + priv->tx_bd_tail--; + else + priv->tx_bd_tail = TX_BD_NUM - 1; + + tx_skb = &priv->tx_skb[priv->tx_bd_tail]; + nixge_tx_skb_unmap(priv, tx_skb); + + cur_p = &priv->tx_bd_v[priv->tx_bd_tail]; + cur_p->status = 0; + } + dma_unmap_single(priv->ndev->dev.parent, + tx_skb->mapping, + tx_skb->size, DMA_TO_DEVICE); +drop: + ndev->stats.tx_dropped++; + return NETDEV_TX_OK; +} + +static int nixge_recv(struct net_device *ndev, int budget) +{ + struct nixge_priv *priv = netdev_priv(ndev); + struct sk_buff *skb, *new_skb; + struct nixge_hw_dma_bd *cur_p; + dma_addr_t tail_p = 0; + u32 packets = 0; + u32 length = 0; + u32 size = 0; + + cur_p = &priv->rx_bd_v[priv->rx_bd_ci]; + + while ((cur_p->status & XAXIDMA_BD_STS_COMPLETE_MASK && + budget > packets)) { + tail_p = priv->rx_bd_p + sizeof(*priv->rx_bd_v) * + priv->rx_bd_ci; + + skb = (struct sk_buff *)(cur_p->sw_id_offset); + + length = cur_p->status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK; + if (length > NIXGE_MAX_JUMBO_FRAME_SIZE) + length = NIXGE_MAX_JUMBO_FRAME_SIZE; + + dma_unmap_single(ndev->dev.parent, cur_p->phys, + NIXGE_MAX_JUMBO_FRAME_SIZE, + DMA_FROM_DEVICE); + + skb_put(skb, length); + + skb->protocol = eth_type_trans(skb, ndev); + skb_checksum_none_assert(skb); + + /* For now mark them as CHECKSUM_NONE since + * we don't have offload capabilities + */ + skb->ip_summed = CHECKSUM_NONE; + + napi_gro_receive(&priv->napi, skb); + + size += length; + packets++; + + new_skb = netdev_alloc_skb_ip_align(ndev, + NIXGE_MAX_JUMBO_FRAME_SIZE); + if (!new_skb) + return packets; + + cur_p->phys = dma_map_single(ndev->dev.parent, new_skb->data, + NIXGE_MAX_JUMBO_FRAME_SIZE, + DMA_FROM_DEVICE); + if (dma_mapping_error(ndev->dev.parent, cur_p->phys)) { + /* FIXME: bail out and clean up */ + netdev_err(ndev, "Failed to map ...\n"); + } + cur_p->cntrl = NIXGE_MAX_JUMBO_FRAME_SIZE; + cur_p->status = 0; + cur_p->sw_id_offset = (u32)new_skb; + + ++priv->rx_bd_ci; + priv->rx_bd_ci %= RX_BD_NUM; + cur_p = &priv->rx_bd_v[priv->rx_bd_ci]; + } + + ndev->stats.rx_packets += packets; + ndev->stats.rx_bytes += size; + + if (tail_p) + nixge_dma_write_reg(priv, XAXIDMA_RX_TDESC_OFFSET, tail_p); + + return packets; +} + +static int nixge_poll(struct napi_struct *napi, int budget) +{ + struct nixge_priv *priv = container_of(napi, struct nixge_priv, napi); + int work_done; + u32 status, cr; + + work_done = 0; + + work_done = nixge_recv(priv->ndev, budget); + if (work_done < budget) { + napi_complete_done(napi, work_done); + status = nixge_dma_read_reg(priv, XAXIDMA_RX_SR_OFFSET); + + if (status & (XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK)) { + /* If there's more, reschedule, but clear */ + nixge_dma_write_reg(priv, XAXIDMA_RX_SR_OFFSET, status); + napi_reschedule(napi); + } else { + /* if not, turn on RX IRQs again ... */ + cr = nixge_dma_read_reg(priv, XAXIDMA_RX_CR_OFFSET); + cr |= (XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK); + nixge_dma_write_reg(priv, XAXIDMA_RX_CR_OFFSET, cr); + } + } + + return work_done; +} + +static irqreturn_t nixge_tx_irq(int irq, void *_ndev) +{ + struct nixge_priv *priv = netdev_priv(_ndev); + struct net_device *ndev = _ndev; + unsigned int status; + u32 cr; + + status = nixge_dma_read_reg(priv, XAXIDMA_TX_SR_OFFSET); + if (status & (XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK)) { + nixge_dma_write_reg(priv, XAXIDMA_TX_SR_OFFSET, status); + nixge_start_xmit_done(priv->ndev); + goto out; + } + if (!(status & XAXIDMA_IRQ_ALL_MASK)) { + netdev_err(ndev, "No interrupts asserted in Tx path\n"); + return IRQ_NONE; + } + if (status & XAXIDMA_IRQ_ERROR_MASK) { + netdev_err(ndev, "DMA Tx error 0x%x\n", status); + netdev_err(ndev, "Current BD is at: 0x%x\n", + (priv->tx_bd_v[priv->tx_bd_ci]).phys); + + cr = nixge_dma_read_reg(priv, XAXIDMA_TX_CR_OFFSET); + /* Disable coalesce, delay timer and error interrupts */ + cr &= (~XAXIDMA_IRQ_ALL_MASK); + /* Write to the Tx channel control register */ + nixge_dma_write_reg(priv, XAXIDMA_TX_CR_OFFSET, cr); + + cr = nixge_dma_read_reg(priv, XAXIDMA_RX_CR_OFFSET); + /* Disable coalesce, delay timer and error interrupts */ + cr &= (~XAXIDMA_IRQ_ALL_MASK); + /* Write to the Rx channel control register */ + nixge_dma_write_reg(priv, XAXIDMA_RX_CR_OFFSET, cr); + + tasklet_schedule(&priv->dma_err_tasklet); + nixge_dma_write_reg(priv, XAXIDMA_TX_SR_OFFSET, status); + } +out: + return IRQ_HANDLED; +} + +static irqreturn_t nixge_rx_irq(int irq, void *_ndev) +{ + struct nixge_priv *priv = netdev_priv(_ndev); + struct net_device *ndev = _ndev; + unsigned int status; + u32 cr; + + status = nixge_dma_read_reg(priv, XAXIDMA_RX_SR_OFFSET); + if (status & (XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK)) { + /* Turn of IRQs because NAPI */ + nixge_dma_write_reg(priv, XAXIDMA_RX_SR_OFFSET, status); + cr = nixge_dma_read_reg(priv, XAXIDMA_RX_CR_OFFSET); + cr &= ~(XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_DELAY_MASK); + nixge_dma_write_reg(priv, XAXIDMA_RX_CR_OFFSET, cr); + + if (napi_schedule_prep(&priv->napi)) + __napi_schedule(&priv->napi); + goto out; + } + if (!(status & XAXIDMA_IRQ_ALL_MASK)) { + netdev_err(ndev, "No interrupts asserted in Rx path\n"); + return IRQ_NONE; + } + if (status & XAXIDMA_IRQ_ERROR_MASK) { + netdev_err(ndev, "DMA Rx error 0x%x\n", status); + netdev_err(ndev, "Current BD is at: 0x%x\n", + (priv->rx_bd_v[priv->rx_bd_ci]).phys); + + cr = nixge_dma_read_reg(priv, XAXIDMA_TX_CR_OFFSET); + /* Disable coalesce, delay timer and error interrupts */ + cr &= (~XAXIDMA_IRQ_ALL_MASK); + /* Finally write to the Tx channel control register */ + nixge_dma_write_reg(priv, XAXIDMA_TX_CR_OFFSET, cr); + + cr = nixge_dma_read_reg(priv, XAXIDMA_RX_CR_OFFSET); + /* Disable coalesce, delay timer and error interrupts */ + cr &= (~XAXIDMA_IRQ_ALL_MASK); + /* write to the Rx channel control register */ + nixge_dma_write_reg(priv, XAXIDMA_RX_CR_OFFSET, cr); + + tasklet_schedule(&priv->dma_err_tasklet); + nixge_dma_write_reg(priv, XAXIDMA_RX_SR_OFFSET, status); + } +out: + return IRQ_HANDLED; +} + +static void nixge_dma_err_handler(unsigned long data) +{ + struct nixge_priv *lp = (struct nixge_priv *)data; + struct nixge_hw_dma_bd *cur_p; + struct nixge_tx_skb *tx_skb; + u32 cr, i; + + __nixge_device_reset(lp, XAXIDMA_TX_CR_OFFSET); + __nixge_device_reset(lp, XAXIDMA_RX_CR_OFFSET); + + for (i = 0; i < TX_BD_NUM; i++) { + cur_p = &lp->tx_bd_v[i]; + tx_skb = &lp->tx_skb[i]; + nixge_tx_skb_unmap(lp, tx_skb); + + cur_p->phys = 0; + cur_p->cntrl = 0; + cur_p->status = 0; + cur_p->app0 = 0; + cur_p->app1 = 0; + cur_p->app2 = 0; + cur_p->app3 = 0; + cur_p->app4 = 0; + cur_p->sw_id_offset = 0; + } + + for (i = 0; i < RX_BD_NUM; i++) { + cur_p = &lp->rx_bd_v[i]; + cur_p->status = 0; + cur_p->app0 = 0; + cur_p->app1 = 0; + cur_p->app2 = 0; + cur_p->app3 = 0; + cur_p->app4 = 0; + } + + lp->tx_bd_ci = 0; + lp->tx_bd_tail = 0; + lp->rx_bd_ci = 0; + + /* Start updating the Rx channel control register */ + cr = nixge_dma_read_reg(lp, XAXIDMA_RX_CR_OFFSET); + /* Update the interrupt coalesce count */ + cr = ((cr & ~XAXIDMA_COALESCE_MASK) | + (XAXIDMA_DFT_RX_THRESHOLD << XAXIDMA_COALESCE_SHIFT)); + /* Update the delay timer count */ + cr = ((cr & ~XAXIDMA_DELAY_MASK) | + (XAXIDMA_DFT_RX_WAITBOUND << XAXIDMA_DELAY_SHIFT)); + /* Enable coalesce, delay timer and error interrupts */ + cr |= XAXIDMA_IRQ_ALL_MASK; + /* Finally write to the Rx channel control register */ + nixge_dma_write_reg(lp, XAXIDMA_RX_CR_OFFSET, cr); + + /* Start updating the Tx channel control register */ + cr = nixge_dma_read_reg(lp, XAXIDMA_TX_CR_OFFSET); + /* Update the interrupt coalesce count */ + cr = (((cr & ~XAXIDMA_COALESCE_MASK)) | + (XAXIDMA_DFT_TX_THRESHOLD << XAXIDMA_COALESCE_SHIFT)); + /* Update the delay timer count */ + cr = (((cr & ~XAXIDMA_DELAY_MASK)) | + (XAXIDMA_DFT_TX_WAITBOUND << XAXIDMA_DELAY_SHIFT)); + /* Enable coalesce, delay timer and error interrupts */ + cr |= XAXIDMA_IRQ_ALL_MASK; + /* Finally write to the Tx channel control register */ + nixge_dma_write_reg(lp, XAXIDMA_TX_CR_OFFSET, cr); + + /* Populate the tail pointer and bring the Rx Axi DMA engine out of + * halted state. This will make the Rx side ready for reception. + */ + nixge_dma_write_reg(lp, XAXIDMA_RX_CDESC_OFFSET, lp->rx_bd_p); + cr = nixge_dma_read_reg(lp, XAXIDMA_RX_CR_OFFSET); + nixge_dma_write_reg(lp, XAXIDMA_RX_CR_OFFSET, + cr | XAXIDMA_CR_RUNSTOP_MASK); + nixge_dma_write_reg(lp, XAXIDMA_RX_TDESC_OFFSET, lp->rx_bd_p + + (sizeof(*lp->rx_bd_v) * (RX_BD_NUM - 1))); + + /* Write to the RS (Run-stop) bit in the Tx channel control register. + * Tx channel is now ready to run. But only after we write to the + * tail pointer register that the Tx channel will start transmitting + */ + nixge_dma_write_reg(lp, XAXIDMA_TX_CDESC_OFFSET, lp->tx_bd_p); + cr = nixge_dma_read_reg(lp, XAXIDMA_TX_CR_OFFSET); + nixge_dma_write_reg(lp, XAXIDMA_TX_CR_OFFSET, + cr | XAXIDMA_CR_RUNSTOP_MASK); +} + +static int nixge_open(struct net_device *ndev) +{ + struct nixge_priv *priv = netdev_priv(ndev); + struct phy_device *phy; + int ret; + + nixge_device_reset(ndev); + + phy = of_phy_connect(ndev, priv->phy_node, + &nixge_handle_link_change, 0, priv->phy_mode); + if (!phy) + return -ENODEV; + + phy_start(phy); + + /* Enable tasklets for Axi DMA error handling */ + tasklet_init(&priv->dma_err_tasklet, nixge_dma_err_handler, + (unsigned long)priv); + + napi_enable(&priv->napi); + + /* Enable interrupts for Axi DMA Tx */ + ret = request_irq(priv->tx_irq, nixge_tx_irq, 0, ndev->name, ndev); + if (ret) + goto err_tx_irq; + /* Enable interrupts for Axi DMA Rx */ + ret = request_irq(priv->rx_irq, nixge_rx_irq, 0, ndev->name, ndev); + if (ret) + goto err_rx_irq; + + netif_start_queue(ndev); + + return 0; + +err_rx_irq: + free_irq(priv->tx_irq, ndev); +err_tx_irq: + phy_stop(phy); + phy_disconnect(phy); + tasklet_kill(&priv->dma_err_tasklet); + netdev_err(ndev, "request_irq() failed\n"); + return ret; +} + +static int nixge_stop(struct net_device *ndev) +{ + struct nixge_priv *priv = netdev_priv(ndev); + u32 cr; + + netif_stop_queue(ndev); + napi_disable(&priv->napi); + + if (ndev->phydev) { + phy_stop(ndev->phydev); + phy_disconnect(ndev->phydev); + } + + cr = nixge_dma_read_reg(priv, XAXIDMA_RX_CR_OFFSET); + nixge_dma_write_reg(priv, XAXIDMA_RX_CR_OFFSET, + cr & (~XAXIDMA_CR_RUNSTOP_MASK)); + cr = nixge_dma_read_reg(priv, XAXIDMA_TX_CR_OFFSET); + nixge_dma_write_reg(priv, XAXIDMA_TX_CR_OFFSET, + cr & (~XAXIDMA_CR_RUNSTOP_MASK)); + + tasklet_kill(&priv->dma_err_tasklet); + + free_irq(priv->tx_irq, ndev); + free_irq(priv->rx_irq, ndev); + + nixge_hw_dma_bd_release(ndev); + + return 0; +} + +static int nixge_change_mtu(struct net_device *ndev, int new_mtu) +{ + if (netif_running(ndev)) + return -EBUSY; + + if ((new_mtu + NIXGE_HDR_SIZE + NIXGE_TRL_SIZE) > + NIXGE_MAX_JUMBO_FRAME_SIZE) + return -EINVAL; + + ndev->mtu = new_mtu; + + return 0; +} + +static s32 __nixge_hw_set_mac_address(struct net_device *ndev) +{ + struct nixge_priv *priv = netdev_priv(ndev); + + nixge_ctrl_write_reg(priv, NIXGE_REG_MAC_LSB, + (ndev->dev_addr[2]) << 24 | + (ndev->dev_addr[3] << 16) | + (ndev->dev_addr[4] << 8) | + (ndev->dev_addr[5] << 0)); + + nixge_ctrl_write_reg(priv, NIXGE_REG_MAC_MSB, + (ndev->dev_addr[1] | (ndev->dev_addr[0] << 8))); + + return 0; +} + +static int nixge_net_set_mac_address(struct net_device *ndev, void *p) +{ + int err; + + err = eth_mac_addr(ndev, p); + if (!err) + __nixge_hw_set_mac_address(ndev); + + return err; +} + +static const struct net_device_ops nixge_netdev_ops = { + .ndo_open = nixge_open, + .ndo_stop = nixge_stop, + .ndo_start_xmit = nixge_start_xmit, + .ndo_change_mtu = nixge_change_mtu, + .ndo_set_mac_address = nixge_net_set_mac_address, + .ndo_validate_addr = eth_validate_addr, +}; + +static void nixge_ethtools_get_drvinfo(struct net_device *ndev, + struct ethtool_drvinfo *ed) +{ + strlcpy(ed->driver, "nixge", sizeof(ed->driver)); + strlcpy(ed->bus_info, "platform", sizeof(ed->driver)); +} + +static int nixge_ethtools_get_coalesce(struct net_device *ndev, + struct ethtool_coalesce *ecoalesce) +{ + struct nixge_priv *priv = netdev_priv(ndev); + u32 regval = 0; + + regval = nixge_dma_read_reg(priv, XAXIDMA_RX_CR_OFFSET); + ecoalesce->rx_max_coalesced_frames = (regval & XAXIDMA_COALESCE_MASK) + >> XAXIDMA_COALESCE_SHIFT; + regval = nixge_dma_read_reg(priv, XAXIDMA_TX_CR_OFFSET); + ecoalesce->tx_max_coalesced_frames = (regval & XAXIDMA_COALESCE_MASK) + >> XAXIDMA_COALESCE_SHIFT; + return 0; +} + +static int nixge_ethtools_set_coalesce(struct net_device *ndev, + struct ethtool_coalesce *ecoalesce) +{ + struct nixge_priv *priv = netdev_priv(ndev); + + if (netif_running(ndev)) { + netdev_err(ndev, + "Please stop netif before applying configuration\n"); + return -EBUSY; + } + + if (ecoalesce->rx_coalesce_usecs || + ecoalesce->rx_coalesce_usecs_irq || + ecoalesce->rx_max_coalesced_frames_irq || + ecoalesce->tx_coalesce_usecs || + ecoalesce->tx_coalesce_usecs_irq || + ecoalesce->tx_max_coalesced_frames_irq || + ecoalesce->stats_block_coalesce_usecs || + ecoalesce->use_adaptive_rx_coalesce || + ecoalesce->use_adaptive_tx_coalesce || + ecoalesce->pkt_rate_low || + ecoalesce->rx_coalesce_usecs_low || + ecoalesce->rx_max_coalesced_frames_low || + ecoalesce->tx_coalesce_usecs_low || + ecoalesce->tx_max_coalesced_frames_low || + ecoalesce->pkt_rate_high || + ecoalesce->rx_coalesce_usecs_high || + ecoalesce->rx_max_coalesced_frames_high || + ecoalesce->tx_coalesce_usecs_high || + ecoalesce->tx_max_coalesced_frames_high || + ecoalesce->rate_sample_interval) + return -EOPNOTSUPP; + if (ecoalesce->rx_max_coalesced_frames) + priv->coalesce_count_rx = ecoalesce->rx_max_coalesced_frames; + if (ecoalesce->tx_max_coalesced_frames) + priv->coalesce_count_tx = ecoalesce->tx_max_coalesced_frames; + + return 0; +} + +static int nixge_ethtools_set_phys_id(struct net_device *ndev, + enum ethtool_phys_id_state state) +{ + struct nixge_priv *priv = netdev_priv(ndev); + u32 ctrl; + + ctrl = nixge_ctrl_read_reg(priv, NIXGE_REG_LED_CTL); + switch (state) { + case ETHTOOL_ID_ACTIVE: + ctrl |= NIXGE_ID_LED_CTL_EN; + /* Enable identification LED override*/ + nixge_ctrl_write_reg(priv, NIXGE_REG_LED_CTL, ctrl); + return 2; + + case ETHTOOL_ID_ON: + ctrl |= NIXGE_ID_LED_CTL_VAL; + nixge_ctrl_write_reg(priv, NIXGE_REG_LED_CTL, ctrl); + break; + + case ETHTOOL_ID_OFF: + ctrl &= ~NIXGE_ID_LED_CTL_VAL; + nixge_ctrl_write_reg(priv, NIXGE_REG_LED_CTL, ctrl); + break; + + case ETHTOOL_ID_INACTIVE: + /* Restore LED settings */ + ctrl &= ~NIXGE_ID_LED_CTL_EN; + nixge_ctrl_write_reg(priv, NIXGE_REG_LED_CTL, ctrl); + break; + } + + return 0; +} + +static const struct ethtool_ops nixge_ethtool_ops = { + .get_drvinfo = nixge_ethtools_get_drvinfo, + .get_coalesce = nixge_ethtools_get_coalesce, + .set_coalesce = nixge_ethtools_set_coalesce, + .set_phys_id = nixge_ethtools_set_phys_id, + .get_link_ksettings = phy_ethtool_get_link_ksettings, + .set_link_ksettings = phy_ethtool_set_link_ksettings, + .get_link = ethtool_op_get_link, +}; + +static int nixge_mdio_read(struct mii_bus *bus, int phy_id, int reg) +{ + struct nixge_priv *priv = bus->priv; + u32 status, tmp; + int err; + u16 device; + + if (reg & MII_ADDR_C45) { + device = (reg >> 16) & 0x1f; + + nixge_ctrl_write_reg(priv, NIXGE_REG_MDIO_ADDR, reg & 0xffff); + + tmp = NIXGE_MDIO_CLAUSE45 | NIXGE_MDIO_OP(NIXGE_MDIO_OP_ADDRESS) + | NIXGE_MDIO_ADDR(phy_id) | NIXGE_MDIO_MMD(device); + + nixge_ctrl_write_reg(priv, NIXGE_REG_MDIO_OP, tmp); + nixge_ctrl_write_reg(priv, NIXGE_REG_MDIO_CTRL, 1); + + err = nixge_ctrl_poll_timeout(priv, NIXGE_REG_MDIO_CTRL, status, + !status, 10, 1000); + if (err) { + dev_err(priv->dev, "timeout setting address"); + return err; + } + + tmp = NIXGE_MDIO_CLAUSE45 | NIXGE_MDIO_OP(NIXGE_MDIO_C45_READ) | + NIXGE_MDIO_ADDR(phy_id) | NIXGE_MDIO_MMD(device); + } else { + device = reg & 0x1f; + + tmp = NIXGE_MDIO_CLAUSE22 | NIXGE_MDIO_OP(NIXGE_MDIO_C22_READ) | + NIXGE_MDIO_ADDR(phy_id) | NIXGE_MDIO_MMD(device); + } + + nixge_ctrl_write_reg(priv, NIXGE_REG_MDIO_OP, tmp); + nixge_ctrl_write_reg(priv, NIXGE_REG_MDIO_CTRL, 1); + + err = nixge_ctrl_poll_timeout(priv, NIXGE_REG_MDIO_CTRL, status, + !status, 10, 1000); + if (err) { + dev_err(priv->dev, "timeout setting read command"); + return err; + } + + status = nixge_ctrl_read_reg(priv, NIXGE_REG_MDIO_DATA); + + return status; +} + +static int nixge_mdio_write(struct mii_bus *bus, int phy_id, int reg, u16 val) +{ + struct nixge_priv *priv = bus->priv; + u32 status, tmp; + u16 device; + int err; + + if (reg & MII_ADDR_C45) { + device = (reg >> 16) & 0x1f; + + nixge_ctrl_write_reg(priv, NIXGE_REG_MDIO_ADDR, reg & 0xffff); + + tmp = NIXGE_MDIO_CLAUSE45 | NIXGE_MDIO_OP(NIXGE_MDIO_OP_ADDRESS) + | NIXGE_MDIO_ADDR(phy_id) | NIXGE_MDIO_MMD(device); + + nixge_ctrl_write_reg(priv, NIXGE_REG_MDIO_OP, tmp); + nixge_ctrl_write_reg(priv, NIXGE_REG_MDIO_CTRL, 1); + + err = nixge_ctrl_poll_timeout(priv, NIXGE_REG_MDIO_CTRL, status, + !status, 10, 1000); + if (err) { + dev_err(priv->dev, "timeout setting address"); + return err; + } + + tmp = NIXGE_MDIO_CLAUSE45 | NIXGE_MDIO_OP(NIXGE_MDIO_C45_WRITE) + | NIXGE_MDIO_ADDR(phy_id) | NIXGE_MDIO_MMD(device); + + nixge_ctrl_write_reg(priv, NIXGE_REG_MDIO_DATA, val); + nixge_ctrl_write_reg(priv, NIXGE_REG_MDIO_OP, tmp); + err = nixge_ctrl_poll_timeout(priv, NIXGE_REG_MDIO_CTRL, status, + !status, 10, 1000); + if (err) + dev_err(priv->dev, "timeout setting write command"); + } else { + device = reg & 0x1f; + + tmp = NIXGE_MDIO_CLAUSE22 | + NIXGE_MDIO_OP(NIXGE_MDIO_C22_WRITE) | + NIXGE_MDIO_ADDR(phy_id) | NIXGE_MDIO_MMD(device); + + nixge_ctrl_write_reg(priv, NIXGE_REG_MDIO_DATA, val); + nixge_ctrl_write_reg(priv, NIXGE_REG_MDIO_OP, tmp); + nixge_ctrl_write_reg(priv, NIXGE_REG_MDIO_CTRL, 1); + + err = nixge_ctrl_poll_timeout(priv, NIXGE_REG_MDIO_CTRL, status, + !status, 10, 1000); + if (err) + dev_err(priv->dev, "timeout setting write command"); + } + + return err; +} + +static int nixge_mdio_setup(struct nixge_priv *priv, struct device_node *np) +{ + struct mii_bus *bus; + + bus = devm_mdiobus_alloc(priv->dev); + if (!bus) + return -ENOMEM; + + snprintf(bus->id, MII_BUS_ID_SIZE, "%s-mii", dev_name(priv->dev)); + bus->priv = priv; + bus->name = "nixge_mii_bus"; + bus->read = nixge_mdio_read; + bus->write = nixge_mdio_write; + bus->parent = priv->dev; + + priv->mii_bus = bus; + + return of_mdiobus_register(bus, np); +} + +static void *nixge_get_nvmem_address(struct device *dev) +{ + struct nvmem_cell *cell; + size_t cell_size; + char *mac; + + cell = nvmem_cell_get(dev, "address"); + if (IS_ERR(cell)) + return cell; + + mac = nvmem_cell_read(cell, &cell_size); + nvmem_cell_put(cell); + + return mac; +} + +static int nixge_probe(struct platform_device *pdev) +{ + struct nixge_priv *priv; + struct net_device *ndev; + struct resource *dmares; + const char *mac_addr; + int err; + + ndev = alloc_etherdev(sizeof(*priv)); + if (!ndev) + return -ENOMEM; + + platform_set_drvdata(pdev, ndev); + SET_NETDEV_DEV(ndev, &pdev->dev); + + ndev->features = NETIF_F_SG; + ndev->netdev_ops = &nixge_netdev_ops; + ndev->ethtool_ops = &nixge_ethtool_ops; + + /* MTU range: 64 - 9000 */ + ndev->min_mtu = 64; + ndev->max_mtu = NIXGE_JUMBO_MTU; + + mac_addr = nixge_get_nvmem_address(&pdev->dev); + if (mac_addr && is_valid_ether_addr(mac_addr)) + ether_addr_copy(ndev->dev_addr, mac_addr); + else + eth_hw_addr_random(ndev); + + priv = netdev_priv(ndev); + priv->ndev = ndev; + priv->dev = &pdev->dev; + + netif_napi_add(ndev, &priv->napi, nixge_poll, NAPI_POLL_WEIGHT); + + dmares = platform_get_resource(pdev, IORESOURCE_MEM, 0); + priv->dma_regs = devm_ioremap_resource(&pdev->dev, dmares); + if (IS_ERR(priv->dma_regs)) { + netdev_err(ndev, "failed to map dma regs\n"); + return PTR_ERR(priv->dma_regs); + } + priv->ctrl_regs = priv->dma_regs + NIXGE_REG_CTRL_OFFSET; + __nixge_hw_set_mac_address(ndev); + + priv->tx_irq = platform_get_irq_byname(pdev, "tx"); + if (priv->tx_irq < 0) { + netdev_err(ndev, "could not find 'tx' irq"); + return priv->tx_irq; + } + + priv->rx_irq = platform_get_irq_byname(pdev, "rx"); + if (priv->rx_irq < 0) { + netdev_err(ndev, "could not find 'rx' irq"); + return priv->rx_irq; + } + + priv->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD; + priv->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD; + + err = nixge_mdio_setup(priv, pdev->dev.of_node); + if (err) { + netdev_err(ndev, "error registering mdio bus"); + goto free_netdev; + } + + priv->phy_mode = of_get_phy_mode(pdev->dev.of_node); + if (priv->phy_mode < 0) { + netdev_err(ndev, "not find \"phy-mode\" property\n"); + err = -EINVAL; + goto unregister_mdio; + } + + priv->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); + if (!priv->phy_node) { + netdev_err(ndev, "not find \"phy-handle\" property\n"); + err = -EINVAL; + goto unregister_mdio; + } + + err = register_netdev(priv->ndev); + if (err) { + netdev_err(ndev, "register_netdev() error (%i)\n", err); + goto unregister_mdio; + } + + return 0; + +unregister_mdio: + mdiobus_unregister(priv->mii_bus); + +free_netdev: + free_netdev(ndev); + + return err; +} + +static int nixge_remove(struct platform_device *pdev) +{ + struct net_device *ndev = platform_get_drvdata(pdev); + struct nixge_priv *priv = netdev_priv(ndev); + + unregister_netdev(ndev); + + mdiobus_unregister(priv->mii_bus); + + free_netdev(ndev); + + return 0; +} + +/* Match table for of_platform binding */ +static const struct of_device_id nixge_dt_ids[] = { + { .compatible = "ni,xge-enet-2.00", }, + {}, +}; +MODULE_DEVICE_TABLE(of, nixge_dt_ids); + +static struct platform_driver nixge_driver = { + .probe = nixge_probe, + .remove = nixge_remove, + .driver = { + .name = "nixge", + .of_match_table = of_match_ptr(nixge_dt_ids), + }, +}; +module_platform_driver(nixge_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("National Instruments XGE Management MAC"); +MODULE_AUTHOR("Moritz Fischer <[email protected]>"); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index f38d6a561a84..72217170b155 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -118,6 +118,7 @@ struct afs_call { bool ret_reply0; /* T if should return reply[0] on success */ bool upgrade; /* T to request service upgrade */ u16 service_id; /* Actual service ID (after upgrade) */ + unsigned int debug_id; /* Trace ID */ u32 operation_ID; /* operation ID for an incoming call */ u32 count; /* count for use in unmarshalling */ __be32 tmp; /* place to extract temporary data */ diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index e1126659f043..b819900916e6 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -131,6 +131,7 @@ static struct afs_call *afs_alloc_call(struct afs_net *net, call->type = type; call->net = net; + call->debug_id = atomic_inc_return(&rxrpc_debug_id); atomic_set(&call->usage, 1); INIT_WORK(&call->async_work, afs_process_async_call); init_waitqueue_head(&call->waitq); @@ -169,11 +170,12 @@ void afs_put_call(struct afs_call *call) afs_put_server(call->net, call->cm_server); afs_put_cb_interest(call->net, call->cbi); kfree(call->request); - kfree(call); - o = atomic_dec_return(&net->nr_outstanding_calls); trace_afs_call(call, afs_call_trace_free, 0, o, __builtin_return_address(0)); + kfree(call); + + o = atomic_dec_return(&net->nr_outstanding_calls); if (o == 0) wake_up_atomic_t(&net->nr_outstanding_calls); } @@ -378,7 +380,8 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, (async ? afs_wake_up_async_call : afs_wake_up_call_waiter), - call->upgrade); + call->upgrade, + call->debug_id); if (IS_ERR(rxcall)) { ret = PTR_ERR(rxcall); goto error_kill_call; @@ -727,7 +730,8 @@ void afs_charge_preallocation(struct work_struct *work) afs_wake_up_async_call, afs_rx_attach, (unsigned long)call, - GFP_KERNEL) < 0) + GFP_KERNEL, + call->debug_id) < 0) break; call = NULL; } diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index c7d1e4689325..5225832bd6ff 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -37,6 +37,7 @@ extern int rtnl_lock_killable(void); extern wait_queue_head_t netdev_unregistering_wq; extern struct rw_semaphore pernet_ops_rwsem; +extern struct rw_semaphore net_rwsem; #ifdef CONFIG_PROVE_LOCKING extern bool lockdep_rtnl_is_held(void); diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 2b3a6eec4570..8ae8ee004258 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -31,6 +31,11 @@ enum rxrpc_call_completion { NR__RXRPC_CALL_COMPLETIONS }; +/* + * Debug ID counter for tracing. + */ +extern atomic_t rxrpc_debug_id; + typedef void (*rxrpc_notify_rx_t)(struct sock *, struct rxrpc_call *, unsigned long); typedef void (*rxrpc_notify_end_tx_t)(struct sock *, struct rxrpc_call *, @@ -50,7 +55,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *, s64, gfp_t, rxrpc_notify_rx_t, - bool); + bool, + unsigned int); int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, struct msghdr *, size_t, rxrpc_notify_end_tx_t); @@ -63,7 +69,8 @@ void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *); u64 rxrpc_kernel_get_rtt(struct socket *, struct rxrpc_call *); int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, - rxrpc_user_attach_call_t, unsigned long, gfp_t); + rxrpc_user_attach_call_t, unsigned long, gfp_t, + unsigned int); void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64); int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *, struct sockaddr_rxrpc *, struct key *); diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 1ab4f920f109..47e35cce3b64 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -291,6 +291,7 @@ static inline struct net *read_pnet(const possible_net_t *pnet) #endif } +/* Protected by net_rwsem */ #define for_each_net(VAR) \ list_for_each_entry(VAR, &net_namespace_list, list) diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 6b59c63a8e51..63815f66b274 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -133,8 +133,7 @@ TRACE_EVENT(afs_recv_data, TP_ARGS(call, count, offset, want_more, ret), TP_STRUCT__entry( - __field(struct rxrpc_call *, rxcall ) - __field(struct afs_call *, call ) + __field(unsigned int, call ) __field(enum afs_call_state, state ) __field(unsigned int, count ) __field(unsigned int, offset ) @@ -144,8 +143,7 @@ TRACE_EVENT(afs_recv_data, ), TP_fast_assign( - __entry->rxcall = call->rxcall; - __entry->call = call; + __entry->call = call->debug_id; __entry->state = call->state; __entry->unmarshall = call->unmarshall; __entry->count = count; @@ -154,8 +152,7 @@ TRACE_EVENT(afs_recv_data, __entry->ret = ret; ), - TP_printk("c=%p ac=%p s=%u u=%u %u/%u wm=%u ret=%d", - __entry->rxcall, + TP_printk("c=%08x s=%u u=%u %u/%u wm=%u ret=%d", __entry->call, __entry->state, __entry->unmarshall, __entry->offset, __entry->count, @@ -168,21 +165,18 @@ TRACE_EVENT(afs_notify_call, TP_ARGS(rxcall, call), TP_STRUCT__entry( - __field(struct rxrpc_call *, rxcall ) - __field(struct afs_call *, call ) + __field(unsigned int, call ) __field(enum afs_call_state, state ) __field(unsigned short, unmarshall ) ), TP_fast_assign( - __entry->rxcall = rxcall; - __entry->call = call; + __entry->call = call->debug_id; __entry->state = call->state; __entry->unmarshall = call->unmarshall; ), - TP_printk("c=%p ac=%p s=%u u=%u", - __entry->rxcall, + TP_printk("c=%08x s=%u u=%u", __entry->call, __entry->state, __entry->unmarshall) ); @@ -193,21 +187,18 @@ TRACE_EVENT(afs_cb_call, TP_ARGS(call), TP_STRUCT__entry( - __field(struct rxrpc_call *, rxcall ) - __field(struct afs_call *, call ) + __field(unsigned int, call ) __field(const char *, name ) __field(u32, op ) ), TP_fast_assign( - __entry->rxcall = call->rxcall; - __entry->call = call; + __entry->call = call->debug_id; __entry->name = call->type->name; __entry->op = call->operation_ID; ), - TP_printk("c=%p ac=%p %s o=%u", - __entry->rxcall, + TP_printk("c=%08x %s o=%u", __entry->call, __entry->name, __entry->op) @@ -220,7 +211,7 @@ TRACE_EVENT(afs_call, TP_ARGS(call, op, usage, outstanding, where), TP_STRUCT__entry( - __field(struct afs_call *, call ) + __field(unsigned int, call ) __field(int, op ) __field(int, usage ) __field(int, outstanding ) @@ -228,14 +219,14 @@ TRACE_EVENT(afs_call, ), TP_fast_assign( - __entry->call = call; + __entry->call = call->debug_id; __entry->op = op; __entry->usage = usage; __entry->outstanding = outstanding; __entry->where = where; ), - TP_printk("c=%p %s u=%d o=%d sp=%pSR", + TP_printk("c=%08x %s u=%d o=%d sp=%pSR", __entry->call, __print_symbolic(__entry->op, afs_call_traces), __entry->usage, @@ -249,13 +240,13 @@ TRACE_EVENT(afs_make_fs_call, TP_ARGS(call, fid), TP_STRUCT__entry( - __field(struct afs_call *, call ) + __field(unsigned int, call ) __field(enum afs_fs_operation, op ) __field_struct(struct afs_fid, fid ) ), TP_fast_assign( - __entry->call = call; + __entry->call = call->debug_id; __entry->op = call->operation_ID; if (fid) { __entry->fid = *fid; @@ -266,7 +257,7 @@ TRACE_EVENT(afs_make_fs_call, } ), - TP_printk("c=%p %06x:%06x:%06x %s", + TP_printk("c=%08x %06x:%06x:%06x %s", __entry->call, __entry->fid.vid, __entry->fid.vnode, @@ -280,16 +271,16 @@ TRACE_EVENT(afs_make_vl_call, TP_ARGS(call), TP_STRUCT__entry( - __field(struct afs_call *, call ) + __field(unsigned int, call ) __field(enum afs_vl_operation, op ) ), TP_fast_assign( - __entry->call = call; + __entry->call = call->debug_id; __entry->op = call->operation_ID; ), - TP_printk("c=%p %s", + TP_printk("c=%08x %s", __entry->call, __print_symbolic(__entry->op, afs_vl_operations)) ); @@ -300,20 +291,20 @@ TRACE_EVENT(afs_call_done, TP_ARGS(call), TP_STRUCT__entry( - __field(struct afs_call *, call ) + __field(unsigned int, call ) __field(struct rxrpc_call *, rx_call ) __field(int, ret ) __field(u32, abort_code ) ), TP_fast_assign( - __entry->call = call; + __entry->call = call->debug_id; __entry->rx_call = call->rxcall; __entry->ret = call->error; __entry->abort_code = call->abort_code; ), - TP_printk(" c=%p ret=%d ab=%d [%p]", + TP_printk(" c=%08x ret=%d ab=%d [%p]", __entry->call, __entry->ret, __entry->abort_code, @@ -327,7 +318,7 @@ TRACE_EVENT(afs_send_pages, TP_ARGS(call, msg, first, last, offset), TP_STRUCT__entry( - __field(struct afs_call *, call ) + __field(unsigned int, call ) __field(pgoff_t, first ) __field(pgoff_t, last ) __field(unsigned int, nr ) @@ -337,7 +328,7 @@ TRACE_EVENT(afs_send_pages, ), TP_fast_assign( - __entry->call = call; + __entry->call = call->debug_id; __entry->first = first; __entry->last = last; __entry->nr = msg->msg_iter.nr_segs; @@ -346,7 +337,7 @@ TRACE_EVENT(afs_send_pages, __entry->flags = msg->msg_flags; ), - TP_printk(" c=%p %lx-%lx-%lx b=%x o=%x f=%x", + TP_printk(" c=%08x %lx-%lx-%lx b=%x o=%x f=%x", __entry->call, __entry->first, __entry->first + __entry->nr - 1, __entry->last, __entry->bytes, __entry->offset, @@ -360,7 +351,7 @@ TRACE_EVENT(afs_sent_pages, TP_ARGS(call, first, last, cursor, ret), TP_STRUCT__entry( - __field(struct afs_call *, call ) + __field(unsigned int, call ) __field(pgoff_t, first ) __field(pgoff_t, last ) __field(pgoff_t, cursor ) @@ -368,14 +359,14 @@ TRACE_EVENT(afs_sent_pages, ), TP_fast_assign( - __entry->call = call; + __entry->call = call->debug_id; __entry->first = first; __entry->last = last; __entry->cursor = cursor; __entry->ret = ret; ), - TP_printk(" c=%p %lx-%lx c=%lx r=%d", + TP_printk(" c=%08x %lx-%lx c=%lx r=%d", __entry->call, __entry->first, __entry->last, __entry->cursor, __entry->ret) @@ -450,7 +441,7 @@ TRACE_EVENT(afs_call_state, TP_ARGS(call, from, to, ret, remote_abort), TP_STRUCT__entry( - __field(struct afs_call *, call ) + __field(unsigned int, call ) __field(enum afs_call_state, from ) __field(enum afs_call_state, to ) __field(int, ret ) @@ -458,14 +449,14 @@ TRACE_EVENT(afs_call_state, ), TP_fast_assign( - __entry->call = call; + __entry->call = call->debug_id; __entry->from = from; __entry->to = to; __entry->ret = ret; __entry->abort = remote_abort; ), - TP_printk("c=%p %u->%u r=%d ab=%d", + TP_printk("c=%08x %u->%u r=%d ab=%d", __entry->call, __entry->from, __entry->to, __entry->ret, __entry->abort) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 36cb50c111a6..2ea788f6f95d 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -400,6 +400,13 @@ enum rxrpc_congest_change { EM(RXRPC_ACK_IDLE, "IDL") \ E_(RXRPC_ACK__INVALID, "-?-") +#define rxrpc_completions \ + EM(RXRPC_CALL_SUCCEEDED, "Succeeded") \ + EM(RXRPC_CALL_REMOTELY_ABORTED, "RemoteAbort") \ + EM(RXRPC_CALL_LOCALLY_ABORTED, "LocalAbort") \ + EM(RXRPC_CALL_LOCAL_ERROR, "LocalError") \ + E_(RXRPC_CALL_NETWORK_ERROR, "NetError") + /* * Export enum symbols via userspace. */ @@ -420,6 +427,7 @@ rxrpc_rtt_rx_traces; rxrpc_timer_traces; rxrpc_propose_ack_traces; rxrpc_propose_ack_outcomes; +rxrpc_congest_modes; rxrpc_congest_changes; /* @@ -438,20 +446,20 @@ TRACE_EVENT(rxrpc_conn, TP_ARGS(conn, op, usage, where), TP_STRUCT__entry( - __field(struct rxrpc_connection *, conn ) - __field(int, op ) - __field(int, usage ) - __field(const void *, where ) + __field(unsigned int, conn ) + __field(int, op ) + __field(int, usage ) + __field(const void *, where ) ), TP_fast_assign( - __entry->conn = conn; + __entry->conn = conn->debug_id; __entry->op = op; __entry->usage = usage; __entry->where = where; ), - TP_printk("C=%p %s u=%d sp=%pSR", + TP_printk("C=%08x %s u=%d sp=%pSR", __entry->conn, __print_symbolic(__entry->op, rxrpc_conn_traces), __entry->usage, @@ -465,7 +473,7 @@ TRACE_EVENT(rxrpc_client, TP_ARGS(conn, channel, op), TP_STRUCT__entry( - __field(struct rxrpc_connection *, conn ) + __field(unsigned int, conn ) __field(u32, cid ) __field(int, channel ) __field(int, usage ) @@ -474,7 +482,7 @@ TRACE_EVENT(rxrpc_client, ), TP_fast_assign( - __entry->conn = conn; + __entry->conn = conn->debug_id; __entry->channel = channel; __entry->usage = atomic_read(&conn->usage); __entry->op = op; @@ -482,7 +490,7 @@ TRACE_EVENT(rxrpc_client, __entry->cs = conn->cache_state; ), - TP_printk("C=%p h=%2d %s %s i=%08x u=%d", + TP_printk("C=%08x h=%2d %s %s i=%08x u=%d", __entry->conn, __entry->channel, __print_symbolic(__entry->op, rxrpc_client_traces), @@ -498,7 +506,7 @@ TRACE_EVENT(rxrpc_call, TP_ARGS(call, op, usage, where, aux), TP_STRUCT__entry( - __field(struct rxrpc_call *, call ) + __field(unsigned int, call ) __field(int, op ) __field(int, usage ) __field(const void *, where ) @@ -506,14 +514,14 @@ TRACE_EVENT(rxrpc_call, ), TP_fast_assign( - __entry->call = call; + __entry->call = call->debug_id; __entry->op = op; __entry->usage = usage; __entry->where = where; __entry->aux = aux; ), - TP_printk("c=%p %s u=%d sp=%pSR a=%p", + TP_printk("c=%08x %s u=%d sp=%pSR a=%p", __entry->call, __print_symbolic(__entry->op, rxrpc_call_traces), __entry->usage, @@ -592,12 +600,13 @@ TRACE_EVENT(rxrpc_rx_done, ); TRACE_EVENT(rxrpc_abort, - TP_PROTO(const char *why, u32 cid, u32 call_id, rxrpc_seq_t seq, - int abort_code, int error), + TP_PROTO(unsigned int call_nr, const char *why, u32 cid, u32 call_id, + rxrpc_seq_t seq, int abort_code, int error), - TP_ARGS(why, cid, call_id, seq, abort_code, error), + TP_ARGS(call_nr, why, cid, call_id, seq, abort_code, error), TP_STRUCT__entry( + __field(unsigned int, call_nr ) __array(char, why, 4 ) __field(u32, cid ) __field(u32, call_id ) @@ -608,6 +617,7 @@ TRACE_EVENT(rxrpc_abort, TP_fast_assign( memcpy(__entry->why, why, 4); + __entry->call_nr = call_nr; __entry->cid = cid; __entry->call_id = call_id; __entry->abort_code = abort_code; @@ -615,18 +625,45 @@ TRACE_EVENT(rxrpc_abort, __entry->seq = seq; ), - TP_printk("%08x:%08x s=%u a=%d e=%d %s", + TP_printk("c=%08x %08x:%08x s=%u a=%d e=%d %s", + __entry->call_nr, __entry->cid, __entry->call_id, __entry->seq, __entry->abort_code, __entry->error, __entry->why) ); +TRACE_EVENT(rxrpc_call_complete, + TP_PROTO(struct rxrpc_call *call), + + TP_ARGS(call), + + TP_STRUCT__entry( + __field(unsigned int, call ) + __field(enum rxrpc_call_completion, compl ) + __field(int, error ) + __field(u32, abort_code ) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->compl = call->completion; + __entry->error = call->error; + __entry->abort_code = call->abort_code; + ), + + TP_printk("c=%08x %s r=%d ac=%d", + __entry->call, + __print_symbolic(__entry->compl, rxrpc_completions), + __entry->error, + __entry->abort_code) + ); + TRACE_EVENT(rxrpc_transmit, TP_PROTO(struct rxrpc_call *call, enum rxrpc_transmit_trace why), TP_ARGS(call, why), TP_STRUCT__entry( - __field(struct rxrpc_call *, call ) + __field(unsigned int, call ) __field(enum rxrpc_transmit_trace, why ) __field(rxrpc_seq_t, tx_hard_ack ) __field(rxrpc_seq_t, tx_top ) @@ -634,14 +671,14 @@ TRACE_EVENT(rxrpc_transmit, ), TP_fast_assign( - __entry->call = call; + __entry->call = call->debug_id; __entry->why = why; __entry->tx_hard_ack = call->tx_hard_ack; __entry->tx_top = call->tx_top; __entry->tx_winsize = call->tx_winsize; ), - TP_printk("c=%p %s f=%08x n=%u/%u", + TP_printk("c=%08x %s f=%08x n=%u/%u", __entry->call, __print_symbolic(__entry->why, rxrpc_transmit_traces), __entry->tx_hard_ack + 1, @@ -656,7 +693,7 @@ TRACE_EVENT(rxrpc_rx_data, TP_ARGS(call, seq, serial, flags, anno), TP_STRUCT__entry( - __field(struct rxrpc_call *, call ) + __field(unsigned int, call ) __field(rxrpc_seq_t, seq ) __field(rxrpc_serial_t, serial ) __field(u8, flags ) @@ -664,14 +701,14 @@ TRACE_EVENT(rxrpc_rx_data, ), TP_fast_assign( - __entry->call = call; + __entry->call = call->debug_id; __entry->seq = seq; __entry->serial = serial; __entry->flags = flags; __entry->anno = anno; ), - TP_printk("c=%p DATA %08x q=%08x fl=%02x a=%02x", + TP_printk("c=%08x DATA %08x q=%08x fl=%02x a=%02x", __entry->call, __entry->serial, __entry->seq, @@ -687,7 +724,7 @@ TRACE_EVENT(rxrpc_rx_ack, TP_ARGS(call, serial, ack_serial, first, prev, reason, n_acks), TP_STRUCT__entry( - __field(struct rxrpc_call *, call ) + __field(unsigned int, call ) __field(rxrpc_serial_t, serial ) __field(rxrpc_serial_t, ack_serial ) __field(rxrpc_seq_t, first ) @@ -697,7 +734,7 @@ TRACE_EVENT(rxrpc_rx_ack, ), TP_fast_assign( - __entry->call = call; + __entry->call = call->debug_id; __entry->serial = serial; __entry->ack_serial = ack_serial; __entry->first = first; @@ -706,7 +743,7 @@ TRACE_EVENT(rxrpc_rx_ack, __entry->n_acks = n_acks; ), - TP_printk("c=%p %08x %s r=%08x f=%08x p=%08x n=%u", + TP_printk("c=%08x %08x %s r=%08x f=%08x p=%08x n=%u", __entry->call, __entry->serial, __print_symbolic(__entry->reason, rxrpc_ack_names), @@ -723,18 +760,18 @@ TRACE_EVENT(rxrpc_rx_abort, TP_ARGS(call, serial, abort_code), TP_STRUCT__entry( - __field(struct rxrpc_call *, call ) + __field(unsigned int, call ) __field(rxrpc_serial_t, serial ) __field(u32, abort_code ) ), TP_fast_assign( - __entry->call = call; + __entry->call = call->debug_id; __entry->serial = serial; __entry->abort_code = abort_code; ), - TP_printk("c=%p ABORT %08x ac=%d", + TP_printk("c=%08x ABORT %08x ac=%d", __entry->call, __entry->serial, __entry->abort_code) @@ -747,20 +784,20 @@ TRACE_EVENT(rxrpc_rx_rwind_change, TP_ARGS(call, serial, rwind, wake), TP_STRUCT__entry( - __field(struct rxrpc_call *, call ) + __field(unsigned int, call ) __field(rxrpc_serial_t, serial ) __field(u32, rwind ) __field(bool, wake ) ), TP_fast_assign( - __entry->call = call; + __entry->call = call->debug_id; __entry->serial = serial; __entry->rwind = rwind; __entry->wake = wake; ), - TP_printk("c=%p %08x rw=%u%s", + TP_printk("c=%08x %08x rw=%u%s", __entry->call, __entry->serial, __entry->rwind, @@ -774,7 +811,7 @@ TRACE_EVENT(rxrpc_tx_data, TP_ARGS(call, seq, serial, flags, retrans, lose), TP_STRUCT__entry( - __field(struct rxrpc_call *, call ) + __field(unsigned int, call ) __field(rxrpc_seq_t, seq ) __field(rxrpc_serial_t, serial ) __field(u8, flags ) @@ -783,7 +820,7 @@ TRACE_EVENT(rxrpc_tx_data, ), TP_fast_assign( - __entry->call = call; + __entry->call = call->debug_id; __entry->seq = seq; __entry->serial = serial; __entry->flags = flags; @@ -791,7 +828,7 @@ TRACE_EVENT(rxrpc_tx_data, __entry->lose = lose; ), - TP_printk("c=%p DATA %08x q=%08x fl=%02x%s%s", + TP_printk("c=%08x DATA %08x q=%08x fl=%02x%s%s", __entry->call, __entry->serial, __entry->seq, @@ -808,7 +845,7 @@ TRACE_EVENT(rxrpc_tx_ack, TP_ARGS(call, serial, ack_first, ack_serial, reason, n_acks), TP_STRUCT__entry( - __field(struct rxrpc_call *, call ) + __field(unsigned int, call ) __field(rxrpc_serial_t, serial ) __field(rxrpc_seq_t, ack_first ) __field(rxrpc_serial_t, ack_serial ) @@ -817,7 +854,7 @@ TRACE_EVENT(rxrpc_tx_ack, ), TP_fast_assign( - __entry->call = call; + __entry->call = call ? call->debug_id : 0; __entry->serial = serial; __entry->ack_first = ack_first; __entry->ack_serial = ack_serial; @@ -825,7 +862,7 @@ TRACE_EVENT(rxrpc_tx_ack, __entry->n_acks = n_acks; ), - TP_printk(" c=%p ACK %08x %s f=%08x r=%08x n=%u", + TP_printk(" c=%08x ACK %08x %s f=%08x r=%08x n=%u", __entry->call, __entry->serial, __print_symbolic(__entry->reason, rxrpc_ack_names), @@ -841,7 +878,7 @@ TRACE_EVENT(rxrpc_receive, TP_ARGS(call, why, serial, seq), TP_STRUCT__entry( - __field(struct rxrpc_call *, call ) + __field(unsigned int, call ) __field(enum rxrpc_receive_trace, why ) __field(rxrpc_serial_t, serial ) __field(rxrpc_seq_t, seq ) @@ -850,7 +887,7 @@ TRACE_EVENT(rxrpc_receive, ), TP_fast_assign( - __entry->call = call; + __entry->call = call->debug_id; __entry->why = why; __entry->serial = serial; __entry->seq = seq; @@ -858,7 +895,7 @@ TRACE_EVENT(rxrpc_receive, __entry->top = call->rx_top; ), - TP_printk("c=%p %s r=%08x q=%08x w=%08x-%08x", + TP_printk("c=%08x %s r=%08x q=%08x w=%08x-%08x", __entry->call, __print_symbolic(__entry->why, rxrpc_receive_traces), __entry->serial, @@ -875,7 +912,7 @@ TRACE_EVENT(rxrpc_recvmsg, TP_ARGS(call, why, seq, offset, len, ret), TP_STRUCT__entry( - __field(struct rxrpc_call *, call ) + __field(unsigned int, call ) __field(enum rxrpc_recvmsg_trace, why ) __field(rxrpc_seq_t, seq ) __field(unsigned int, offset ) @@ -884,7 +921,7 @@ TRACE_EVENT(rxrpc_recvmsg, ), TP_fast_assign( - __entry->call = call; + __entry->call = call->debug_id; __entry->why = why; __entry->seq = seq; __entry->offset = offset; @@ -892,7 +929,7 @@ TRACE_EVENT(rxrpc_recvmsg, __entry->ret = ret; ), - TP_printk("c=%p %s q=%08x o=%u l=%u ret=%d", + TP_printk("c=%08x %s q=%08x o=%u l=%u ret=%d", __entry->call, __print_symbolic(__entry->why, rxrpc_recvmsg_traces), __entry->seq, @@ -908,18 +945,18 @@ TRACE_EVENT(rxrpc_rtt_tx, TP_ARGS(call, why, send_serial), TP_STRUCT__entry( - __field(struct rxrpc_call *, call ) + __field(unsigned int, call ) __field(enum rxrpc_rtt_tx_trace, why ) __field(rxrpc_serial_t, send_serial ) ), TP_fast_assign( - __entry->call = call; + __entry->call = call->debug_id; __entry->why = why; __entry->send_serial = send_serial; ), - TP_printk("c=%p %s sr=%08x", + TP_printk("c=%08x %s sr=%08x", __entry->call, __print_symbolic(__entry->why, rxrpc_rtt_tx_traces), __entry->send_serial) @@ -933,7 +970,7 @@ TRACE_EVENT(rxrpc_rtt_rx, TP_ARGS(call, why, send_serial, resp_serial, rtt, nr, avg), TP_STRUCT__entry( - __field(struct rxrpc_call *, call ) + __field(unsigned int, call ) __field(enum rxrpc_rtt_rx_trace, why ) __field(u8, nr ) __field(rxrpc_serial_t, send_serial ) @@ -943,7 +980,7 @@ TRACE_EVENT(rxrpc_rtt_rx, ), TP_fast_assign( - __entry->call = call; + __entry->call = call->debug_id; __entry->why = why; __entry->send_serial = send_serial; __entry->resp_serial = resp_serial; @@ -952,7 +989,7 @@ TRACE_EVENT(rxrpc_rtt_rx, __entry->avg = avg; ), - TP_printk("c=%p %s sr=%08x rr=%08x rtt=%lld nr=%u avg=%lld", + TP_printk("c=%08x %s sr=%08x rr=%08x rtt=%lld nr=%u avg=%lld", __entry->call, __print_symbolic(__entry->why, rxrpc_rtt_rx_traces), __entry->send_serial, @@ -969,7 +1006,7 @@ TRACE_EVENT(rxrpc_timer, TP_ARGS(call, why, now), TP_STRUCT__entry( - __field(struct rxrpc_call *, call ) + __field(unsigned int, call ) __field(enum rxrpc_timer_trace, why ) __field(long, now ) __field(long, ack_at ) @@ -983,7 +1020,7 @@ TRACE_EVENT(rxrpc_timer, ), TP_fast_assign( - __entry->call = call; + __entry->call = call->debug_id; __entry->why = why; __entry->now = now; __entry->ack_at = call->ack_at; @@ -995,7 +1032,7 @@ TRACE_EVENT(rxrpc_timer, __entry->timer = call->timer.expires; ), - TP_printk("c=%p %s a=%ld la=%ld r=%ld xr=%ld xq=%ld xt=%ld t=%ld", + TP_printk("c=%08x %s a=%ld la=%ld r=%ld xr=%ld xq=%ld xt=%ld t=%ld", __entry->call, __print_symbolic(__entry->why, rxrpc_timer_traces), __entry->ack_at - __entry->now, @@ -1038,7 +1075,7 @@ TRACE_EVENT(rxrpc_propose_ack, outcome), TP_STRUCT__entry( - __field(struct rxrpc_call *, call ) + __field(unsigned int, call ) __field(enum rxrpc_propose_ack_trace, why ) __field(rxrpc_serial_t, serial ) __field(u8, ack_reason ) @@ -1048,7 +1085,7 @@ TRACE_EVENT(rxrpc_propose_ack, ), TP_fast_assign( - __entry->call = call; + __entry->call = call->debug_id; __entry->why = why; __entry->serial = serial; __entry->ack_reason = ack_reason; @@ -1057,7 +1094,7 @@ TRACE_EVENT(rxrpc_propose_ack, __entry->outcome = outcome; ), - TP_printk("c=%p %s %s r=%08x i=%u b=%u%s", + TP_printk("c=%08x %s %s r=%08x i=%u b=%u%s", __entry->call, __print_symbolic(__entry->why, rxrpc_propose_ack_traces), __print_symbolic(__entry->ack_reason, rxrpc_ack_names), @@ -1074,20 +1111,20 @@ TRACE_EVENT(rxrpc_retransmit, TP_ARGS(call, seq, annotation, expiry), TP_STRUCT__entry( - __field(struct rxrpc_call *, call ) + __field(unsigned int, call ) __field(rxrpc_seq_t, seq ) __field(u8, annotation ) __field(s64, expiry ) ), TP_fast_assign( - __entry->call = call; + __entry->call = call->debug_id; __entry->seq = seq; __entry->annotation = annotation; __entry->expiry = expiry; ), - TP_printk("c=%p q=%x a=%02x xp=%lld", + TP_printk("c=%08x q=%x a=%02x xp=%lld", __entry->call, __entry->seq, __entry->annotation, @@ -1101,7 +1138,7 @@ TRACE_EVENT(rxrpc_congest, TP_ARGS(call, summary, ack_serial, change), TP_STRUCT__entry( - __field(struct rxrpc_call *, call ) + __field(unsigned int, call ) __field(enum rxrpc_congest_change, change ) __field(rxrpc_seq_t, hard_ack ) __field(rxrpc_seq_t, top ) @@ -1111,7 +1148,7 @@ TRACE_EVENT(rxrpc_congest, ), TP_fast_assign( - __entry->call = call; + __entry->call = call->debug_id; __entry->change = change; __entry->hard_ack = call->tx_hard_ack; __entry->top = call->tx_top; @@ -1120,7 +1157,7 @@ TRACE_EVENT(rxrpc_congest, memcpy(&__entry->sum, summary, sizeof(__entry->sum)); ), - TP_printk("c=%p r=%08x %s q=%08x %s cw=%u ss=%u nr=%u,%u nw=%u,%u r=%u b=%u u=%u d=%u l=%x%s%s%s", + TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u nr=%u,%u nw=%u,%u r=%u b=%u u=%u d=%u l=%x%s%s%s", __entry->call, __entry->ack_serial, __print_symbolic(__entry->sum.ack_reason, rxrpc_ack_names), @@ -1145,16 +1182,16 @@ TRACE_EVENT(rxrpc_disconnect_call, TP_ARGS(call), TP_STRUCT__entry( - __field(struct rxrpc_call *, call ) + __field(unsigned int, call ) __field(u32, abort_code ) ), TP_fast_assign( - __entry->call = call; + __entry->call = call->debug_id; __entry->abort_code = call->abort_code; ), - TP_printk("c=%p ab=%08x", + TP_printk("c=%08x ab=%08x", __entry->call, __entry->abort_code) ); @@ -1165,16 +1202,16 @@ TRACE_EVENT(rxrpc_improper_term, TP_ARGS(call), TP_STRUCT__entry( - __field(struct rxrpc_call *, call ) + __field(unsigned int, call ) __field(u32, abort_code ) ), TP_fast_assign( - __entry->call = call; + __entry->call = call->debug_id; __entry->abort_code = call->abort_code; ), - TP_printk("c=%p ab=%08x", + TP_printk("c=%08x ab=%08x", __entry->call, __entry->abort_code) ); @@ -1186,18 +1223,18 @@ TRACE_EVENT(rxrpc_rx_eproto, TP_ARGS(call, serial, why), TP_STRUCT__entry( - __field(struct rxrpc_call *, call ) + __field(unsigned int, call ) __field(rxrpc_serial_t, serial ) __field(const char *, why ) ), TP_fast_assign( - __entry->call = call; + __entry->call = call->debug_id; __entry->serial = serial; __entry->why = why; ), - TP_printk("c=%p EPROTO %08x %s", + TP_printk("c=%08x EPROTO %08x %s", __entry->call, __entry->serial, __entry->why) @@ -1209,26 +1246,49 @@ TRACE_EVENT(rxrpc_connect_call, TP_ARGS(call), TP_STRUCT__entry( - __field(struct rxrpc_call *, call ) + __field(unsigned int, call ) __field(unsigned long, user_call_ID ) __field(u32, cid ) __field(u32, call_id ) ), TP_fast_assign( - __entry->call = call; + __entry->call = call->debug_id; __entry->user_call_ID = call->user_call_ID; __entry->cid = call->cid; __entry->call_id = call->call_id; ), - TP_printk("c=%p u=%p %08x:%08x", + TP_printk("c=%08x u=%p %08x:%08x", __entry->call, (void *)__entry->user_call_ID, __entry->cid, __entry->call_id) ); +TRACE_EVENT(rxrpc_resend, + TP_PROTO(struct rxrpc_call *call, int ix), + + TP_ARGS(call, ix), + + TP_STRUCT__entry( + __field(unsigned int, call ) + __field(int, ix ) + __array(u8, anno, 64 ) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->ix = ix; + memcpy(__entry->anno, call->rxtx_annotations, 64); + ), + + TP_printk("c=%08x ix=%u a=%64phN", + __entry->call, + __entry->ix, + __entry->anno) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/core/dev.c b/net/core/dev.c index e13807b5c84d..eca5458b2753 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1629,6 +1629,7 @@ int register_netdevice_notifier(struct notifier_block *nb) goto unlock; if (dev_boot_phase) goto unlock; + down_read(&net_rwsem); for_each_net(net) { for_each_netdev(net, dev) { err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev); @@ -1642,6 +1643,7 @@ int register_netdevice_notifier(struct notifier_block *nb) call_netdevice_notifier(nb, NETDEV_UP, dev); } } + up_read(&net_rwsem); unlock: rtnl_unlock(); @@ -1664,6 +1666,7 @@ rollback: } outroll: + up_read(&net_rwsem); raw_notifier_chain_unregister(&netdev_chain, nb); goto unlock; } @@ -1694,6 +1697,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb) if (err) goto unlock; + down_read(&net_rwsem); for_each_net(net) { for_each_netdev(net, dev) { if (dev->flags & IFF_UP) { @@ -1704,6 +1708,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb) call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); } } + up_read(&net_rwsem); unlock: rtnl_unlock(); return err; diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c index 0c048bdeb016..614b985c92a4 100644 --- a/net/core/fib_notifier.c +++ b/net/core/fib_notifier.c @@ -33,6 +33,7 @@ static unsigned int fib_seq_sum(void) struct net *net; rtnl_lock(); + down_read(&net_rwsem); for_each_net(net) { rcu_read_lock(); list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) { @@ -43,6 +44,7 @@ static unsigned int fib_seq_sum(void) } rcu_read_unlock(); } + up_read(&net_rwsem); rtnl_unlock(); return fib_seq; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index b5796d17a302..7fdf321d4997 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -33,6 +33,10 @@ static struct list_head *first_device = &pernet_list; LIST_HEAD(net_namespace_list); EXPORT_SYMBOL_GPL(net_namespace_list); +/* Protects net_namespace_list. Nests iside rtnl_lock() */ +DECLARE_RWSEM(net_rwsem); +EXPORT_SYMBOL_GPL(net_rwsem); + struct net init_net = { .count = REFCOUNT_INIT(1), .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), @@ -309,9 +313,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) if (error < 0) goto out_undo; } - rtnl_lock(); + down_write(&net_rwsem); list_add_tail_rcu(&net->list, &net_namespace_list); - rtnl_unlock(); + up_write(&net_rwsem); out: return error; @@ -450,7 +454,7 @@ static void unhash_nsid(struct net *net, struct net *last) * and this work is the only process, that may delete * a net from net_namespace_list. So, when the below * is executing, the list may only grow. Thus, we do not - * use for_each_net_rcu() or rtnl_lock(). + * use for_each_net_rcu() or net_rwsem. */ for_each_net(tmp) { int id; @@ -485,7 +489,7 @@ static void cleanup_net(struct work_struct *work) down_read(&pernet_ops_rwsem); /* Don't let anyone else find us. */ - rtnl_lock(); + down_write(&net_rwsem); llist_for_each_entry(net, net_kill_list, cleanup_list) list_del_rcu(&net->list); /* Cache last net. After we unlock rtnl, no one new net @@ -499,7 +503,7 @@ static void cleanup_net(struct work_struct *work) * useless anyway, as netns_ids are destroyed there. */ last = list_last_entry(&net_namespace_list, struct net, list); - rtnl_unlock(); + up_write(&net_rwsem); llist_for_each_entry(net, net_kill_list, cleanup_list) { unhash_nsid(net, last); @@ -900,6 +904,9 @@ static int __register_pernet_operations(struct list_head *list, list_add_tail(&ops->list, list); if (ops->init || (ops->id && ops->size)) { + /* We held write locked pernet_ops_rwsem, and parallel + * setup_net() and cleanup_net() are not possible. + */ for_each_net(net) { error = ops_init(ops, net); if (error) @@ -923,6 +930,7 @@ static void __unregister_pernet_operations(struct pernet_operations *ops) LIST_HEAD(net_exit_list); list_del(&ops->list); + /* See comment in __register_pernet_operations() */ for_each_net(net) list_add_tail(&net->exit_list, &net_exit_list); ops_exit_list(ops, &net_exit_list); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2d3949789cef..e86b28482ca7 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -418,9 +418,11 @@ void __rtnl_link_unregister(struct rtnl_link_ops *ops) { struct net *net; + down_read(&net_rwsem); for_each_net(net) { __rtnl_kill_links(net, ops); } + up_read(&net_rwsem); list_del(&ops->list); } EXPORT_SYMBOL_GPL(__rtnl_link_unregister); @@ -438,6 +440,9 @@ static void rtnl_lock_unregistering_all(void) for (;;) { unregistering = false; rtnl_lock(); + /* We held write locked pernet_ops_rwsem, and parallel + * setup_net() and cleanup_net() are not possible. + */ for_each_net(net) { if (net->dev_unreg_count > 0) { unregistering = true; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 296d0b956bfe..97689012b357 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -654,7 +654,7 @@ static void esp_input_restore_header(struct sk_buff *skb) static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi) { struct xfrm_state *x = xfrm_input_state(skb); - struct ip_esp_hdr *esph = (struct ip_esp_hdr *)skb->data; + struct ip_esp_hdr *esph; /* For ESN we move the header forward by 4 bytes to * accomodate the high bits. We will move it back after diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index da5635fc52c2..7cf755ef9efb 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -138,6 +138,8 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb, if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle || (x->xso.dev != skb->dev)) esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK); + else if (!(features & NETIF_F_HW_ESP_TX_CSUM)) + esp_features = features & ~NETIF_F_CSUM_MASK; xo->flags |= XFRM_GSO_SEGMENT; diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 3fd1ec775dc2..27f59b61f70f 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -165,6 +165,8 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb, if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle || (x->xso.dev != skb->dev)) esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK); + else if (!(features & NETIF_F_HW_ESP_TX_CSUM)) + esp_features = features & ~NETIF_F_CSUM_MASK; xo->flags |= XFRM_GSO_SEGMENT; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 705198de671d..41ff04ee2554 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1763,14 +1763,14 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data) { struct net *net; - rtnl_lock(); + down_read(&net_rwsem); for_each_net(net) { if (atomic_read(&net->ct.count) == 0) continue; __nf_ct_unconfirmed_destroy(net); nf_queue_nf_hook_drop(net); } - rtnl_unlock(); + up_read(&net_rwsem); /* Need to wait for netns cleanup worker to finish, if its * running -- it might have deleted a net namespace from diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index ef38e5aecd28..015e24e08909 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -2363,10 +2363,10 @@ static void __net_exit ovs_exit_net(struct net *dnet) list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) __dp_destroy(dp); - rtnl_lock(); + down_read(&net_rwsem); for_each_net(net) list_vports_from_net(net, dnet, &head); - rtnl_unlock(); + up_read(&net_rwsem); /* Detach all vports from given namespace. */ list_for_each_entry_safe(vport, vport_next, &head, detach_list) { diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 9e1c2c6b6a67..ec5ec68be1aa 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -40,6 +40,7 @@ static const struct proto_ops rxrpc_rpc_ops; /* current debugging ID */ atomic_t rxrpc_debug_id; +EXPORT_SYMBOL(rxrpc_debug_id); /* count of skbs currently in use */ atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs; @@ -267,6 +268,7 @@ static int rxrpc_listen(struct socket *sock, int backlog) * @gfp: The allocation constraints * @notify_rx: Where to send notifications instead of socket queue * @upgrade: Request service upgrade for call + * @debug_id: The debug ID for tracing to be assigned to the call * * Allow a kernel service to begin a call on the nominated socket. This just * sets up all the internal tracking structures and allocates connection and @@ -282,7 +284,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, s64 tx_total_len, gfp_t gfp, rxrpc_notify_rx_t notify_rx, - bool upgrade) + bool upgrade, + unsigned int debug_id) { struct rxrpc_conn_parameters cp; struct rxrpc_call_params p; @@ -314,7 +317,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, cp.exclusive = false; cp.upgrade = upgrade; cp.service_id = srx->srx_service; - call = rxrpc_new_client_call(rx, &cp, srx, &p, gfp); + call = rxrpc_new_client_call(rx, &cp, srx, &p, gfp, debug_id); /* The socket has been unlocked. */ if (!IS_ERR(call)) { call->notify_rx = notify_rx; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 416688381eb7..21cf164b6d85 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -691,7 +691,6 @@ struct rxrpc_send_params { * af_rxrpc.c */ extern atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs; -extern atomic_t rxrpc_debug_id; extern struct workqueue_struct *rxrpc_workqueue; /* @@ -732,11 +731,12 @@ extern unsigned int rxrpc_max_call_lifetime; extern struct kmem_cache *rxrpc_call_jar; struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long); -struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t); +struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t, unsigned int); struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, - struct rxrpc_call_params *, gfp_t); + struct rxrpc_call_params *, gfp_t, + unsigned int); int rxrpc_retry_client_call(struct rxrpc_sock *, struct rxrpc_call *, struct rxrpc_conn_parameters *, @@ -778,6 +778,7 @@ static inline bool __rxrpc_set_call_completion(struct rxrpc_call *call, call->error = error; call->completion = compl, call->state = RXRPC_CALL_COMPLETE; + trace_rxrpc_call_complete(call); wake_up(&call->waitq); return true; } @@ -822,7 +823,7 @@ static inline bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call, rxrpc_seq_t seq, u32 abort_code, int error) { - trace_rxrpc_abort(why, call->cid, call->call_id, seq, + trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq, abort_code, error); return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED, abort_code, error); diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 3028298ca561..92ebd1d7e0bb 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -34,7 +34,8 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, struct rxrpc_backlog *b, rxrpc_notify_rx_t notify_rx, rxrpc_user_attach_call_t user_attach_call, - unsigned long user_call_ID, gfp_t gfp) + unsigned long user_call_ID, gfp_t gfp, + unsigned int debug_id) { const void *here = __builtin_return_address(0); struct rxrpc_call *call; @@ -94,7 +95,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, /* Now it gets complicated, because calls get registered with the * socket here, particularly if a user ID is preassigned by the user. */ - call = rxrpc_alloc_call(rx, gfp); + call = rxrpc_alloc_call(rx, gfp, debug_id); if (!call) return -ENOMEM; call->flags |= (1 << RXRPC_CALL_IS_SERVICE); @@ -174,7 +175,8 @@ int rxrpc_service_prealloc(struct rxrpc_sock *rx, gfp_t gfp) if (rx->discard_new_call) return 0; - while (rxrpc_service_prealloc_one(rx, b, NULL, NULL, 0, gfp) == 0) + while (rxrpc_service_prealloc_one(rx, b, NULL, NULL, 0, gfp, + atomic_inc_return(&rxrpc_debug_id)) == 0) ; return 0; @@ -347,7 +349,7 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, service_id == rx->second_service)) goto found_service; - trace_rxrpc_abort("INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_INVALID_OPERATION, EOPNOTSUPP); skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; skb->priority = RX_INVALID_OPERATION; @@ -358,7 +360,7 @@ found_service: spin_lock(&rx->incoming_lock); if (rx->sk.sk_state == RXRPC_SERVER_LISTEN_DISABLED || rx->sk.sk_state == RXRPC_CLOSE) { - trace_rxrpc_abort("CLS", sp->hdr.cid, sp->hdr.callNumber, + trace_rxrpc_abort(0, "CLS", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_INVALID_OPERATION, ESHUTDOWN); skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; skb->priority = RX_INVALID_OPERATION; @@ -635,6 +637,7 @@ out_discard: * @user_attach_call: Func to attach call to user_call_ID * @user_call_ID: The tag to attach to the preallocated call * @gfp: The allocation conditions. + * @debug_id: The tracing debug ID. * * Charge up the socket with preallocated calls, each with a user ID. A * function should be provided to effect the attachment from the user's side. @@ -645,7 +648,8 @@ out_discard: int rxrpc_kernel_charge_accept(struct socket *sock, rxrpc_notify_rx_t notify_rx, rxrpc_user_attach_call_t user_attach_call, - unsigned long user_call_ID, gfp_t gfp) + unsigned long user_call_ID, gfp_t gfp, + unsigned int debug_id) { struct rxrpc_sock *rx = rxrpc_sk(sock->sk); struct rxrpc_backlog *b = rx->backlog; @@ -655,6 +659,6 @@ int rxrpc_kernel_charge_accept(struct socket *sock, return rxrpc_service_prealloc_one(rx, b, notify_rx, user_attach_call, user_call_ID, - gfp); + gfp, debug_id); } EXPORT_SYMBOL(rxrpc_kernel_charge_accept); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index ad2ab1103189..6a62e42e1d8d 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -195,6 +195,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) * the packets in the Tx buffer we're going to resend and what the new * resend timeout will be. */ + trace_rxrpc_resend(call, (cursor + 1) & RXRPC_RXTX_BUFF_MASK); oldest = now; for (seq = cursor + 1; before_eq(seq, top); seq++) { ix = seq & RXRPC_RXTX_BUFF_MASK; diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 0b2db38dd32d..147657dfe757 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -99,7 +99,8 @@ found_extant_call: /* * allocate a new call */ -struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp) +struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, + unsigned int debug_id) { struct rxrpc_call *call; @@ -138,7 +139,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp) spin_lock_init(&call->notify_lock); rwlock_init(&call->state_lock); atomic_set(&call->usage, 1); - call->debug_id = atomic_inc_return(&rxrpc_debug_id); + call->debug_id = debug_id; call->tx_total_len = -1; call->next_rx_timo = 20 * HZ; call->next_req_timo = 1 * HZ; @@ -166,14 +167,15 @@ nomem: */ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, struct sockaddr_rxrpc *srx, - gfp_t gfp) + gfp_t gfp, + unsigned int debug_id) { struct rxrpc_call *call; ktime_t now; _enter(""); - call = rxrpc_alloc_call(rx, gfp); + call = rxrpc_alloc_call(rx, gfp, debug_id); if (!call) return ERR_PTR(-ENOMEM); call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; @@ -214,7 +216,8 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, struct rxrpc_conn_parameters *cp, struct sockaddr_rxrpc *srx, struct rxrpc_call_params *p, - gfp_t gfp) + gfp_t gfp, + unsigned int debug_id) __releases(&rx->sk.sk_lock.slock) { struct rxrpc_call *call, *xcall; @@ -225,7 +228,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, _enter("%p,%lx", rx, p->user_call_ID); - call = rxrpc_alloc_client_call(rx, srx, gfp); + call = rxrpc_alloc_client_call(rx, srx, gfp, debug_id); if (IS_ERR(call)) { release_sock(&rx->sk); _leave(" = %ld", PTR_ERR(call)); diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index b1dfae107431..d2ec3fd593e8 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -160,7 +160,8 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, lockdep_is_held(&conn->channel_lock)); if (call) { if (compl == RXRPC_CALL_LOCALLY_ABORTED) - trace_rxrpc_abort("CON", call->cid, + trace_rxrpc_abort(call->debug_id, + "CON", call->cid, call->call_id, 0, abort_code, error); if (rxrpc_set_call_completion(call, compl, diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 6fc61400337f..2a868fdab0ae 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1307,21 +1307,21 @@ out_unlock: wrong_security: rcu_read_unlock(); - trace_rxrpc_abort("SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + trace_rxrpc_abort(0, "SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RXKADINCONSISTENCY, EBADMSG); skb->priority = RXKADINCONSISTENCY; goto post_abort; reupgrade: rcu_read_unlock(); - trace_rxrpc_abort("UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_PROTOCOL_ERROR, EBADMSG); goto protocol_error; bad_message_unlock: rcu_read_unlock(); bad_message: - trace_rxrpc_abort("BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, RX_PROTOCOL_ERROR, EBADMSG); protocol_error: skb->priority = RX_PROTOCOL_ERROR; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 09f2a3e05221..8503f279b467 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -579,7 +579,8 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, cp.exclusive = rx->exclusive | p->exclusive; cp.upgrade = p->upgrade; cp.service_id = srx->srx_service; - call = rxrpc_new_client_call(rx, &cp, srx, &p->call, GFP_KERNEL); + call = rxrpc_new_client_call(rx, &cp, srx, &p->call, GFP_KERNEL, + atomic_inc_return(&rxrpc_debug_id)); /* The socket is now unlocked */ _leave(" = %p\n", call); diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 9efbfc753347..5e677dac2a0c 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -347,13 +347,13 @@ void wireless_nlevent_flush(void) struct sk_buff *skb; struct net *net; - ASSERT_RTNL(); - + down_read(&net_rwsem); for_each_net(net) { while ((skb = skb_dequeue(&net->wext_nlevents))) rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); } + up_read(&net_rwsem); } EXPORT_SYMBOL_GPL(wireless_nlevent_flush); @@ -410,9 +410,7 @@ subsys_initcall(wireless_nlevent_init); /* Process events generated by the wireless layer or the driver. */ static void wireless_nlevent_process(struct work_struct *work) { - rtnl_lock(); wireless_nlevent_flush(); - rtnl_unlock(); } static DECLARE_WORK(wireless_nlevent_work, wireless_nlevent_process); diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 1472c0857975..44fc54dc013c 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -9,6 +9,7 @@ */ #include <linux/bottom_half.h> +#include <linux/cache.h> #include <linux/interrupt.h> #include <linux/slab.h> #include <linux/module.h> @@ -31,7 +32,7 @@ struct xfrm_trans_cb { #define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0])) -static struct kmem_cache *secpath_cachep __read_mostly; +static struct kmem_cache *secpath_cachep __ro_after_init; static DEFINE_SPINLOCK(xfrm_input_afinfo_lock); static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[AF_INET6 + 1]; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 625b3fca5704..0e065db6c7c0 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -51,7 +51,7 @@ static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1] __read_mostly; -static struct kmem_cache *xfrm_dst_cache __read_mostly; +static struct kmem_cache *xfrm_dst_cache __ro_after_init; static __read_mostly seqcount_t xfrm_policy_hash_generation; static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr); @@ -1743,7 +1743,7 @@ static void xfrm_pcpu_work_fn(struct work_struct *work) void xfrm_policy_cache_flush(void) { struct xfrm_dst *old; - bool found = 0; + bool found = false; int cpu; might_sleep(); diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h index 1f173a7a4daa..a0b465316292 100644 --- a/security/selinux/include/xfrm.h +++ b/security/selinux/include/xfrm.h @@ -47,10 +47,10 @@ static inline void selinux_xfrm_notify_policyload(void) { struct net *net; - rtnl_lock(); + down_read(&net_rwsem); for_each_net(net) rt_genid_bump_all(net); - rtnl_unlock(); + up_read(&net_rwsem); } #else static inline int selinux_xfrm_enabled(void) |