diff options
91 files changed, 3820 insertions, 2345 deletions
diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt index cccd945fc45b..95383c5131fc 100644 --- a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt +++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt @@ -3,8 +3,12 @@ Rockchip SoC RK3288 10/100/1000 Ethernet driver(GMAC) The device node has following properties. Required properties: - - compatible: Can be one of "rockchip,rk3228-gmac", "rockchip,rk3288-gmac", - "rockchip,rk3368-gmac" + - compatible: should be "rockchip,<name>-gamc" + "rockchip,rk3228-gmac": found on RK322x SoCs + "rockchip,rk3288-gmac": found on RK3288 SoCs + "rockchip,rk3366-gmac": found on RK3366 SoCs + "rockchip,rk3368-gmac": found on RK3368 SoCs + "rockchip,rk3399-gmac": found on RK3399 SoCs - reg: addresses and length of the register sets for the device. - interrupts: Should contain the GMAC interrupts. - interrupt-names: Should contain the interrupt names "macirq". diff --git a/MAINTAINERS b/MAINTAINERS index 734e03556e2c..9a1783547baf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12285,6 +12285,7 @@ F: drivers/net/usb/smsc75xx.* USB SMSC95XX ETHERNET DRIVER M: Steve Glendinning <steve.glendinning@shawell.net> +M: Microchip Linux Driver Support <UNGLinuxDriver@microchip.com> L: netdev@vger.kernel.org S: Maintained F: drivers/net/usb/smsc95xx.* diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig index ac77737bbd87..486668813e15 100644 --- a/drivers/net/dsa/mv88e6xxx/Kconfig +++ b/drivers/net/dsa/mv88e6xxx/Kconfig @@ -6,3 +6,14 @@ config NET_DSA_MV88E6XXX help This driver adds support for most of the Marvell 88E6xxx models of Ethernet switch chips, except 88E6060. + +config NET_DSA_MV88E6XXX_GLOBAL2 + bool "Switch Global 2 Registers support" + default y + depends on NET_DSA_MV88E6XXX + help + This registers set at internal SMI address 0x1C provides extended + features like EEPROM interface, trunking, cross-chip setup, etc. + + It is required on most chips. If the chip you compile the support for + doesn't have such registers set, say N here. In doubt, say Y. diff --git a/drivers/net/dsa/mv88e6xxx/Makefile b/drivers/net/dsa/mv88e6xxx/Makefile index 6e29a75ee2f7..697103934317 100644 --- a/drivers/net/dsa/mv88e6xxx/Makefile +++ b/drivers/net/dsa/mv88e6xxx/Makefile @@ -1 +1,3 @@ -obj-$(CONFIG_NET_DSA_MV88E6XXX) += chip.o +obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o +mv88e6xxx-objs := chip.o +mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2.o diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index d6b0f78e9598..70a812d159c9 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -29,7 +29,9 @@ #include <linux/phy.h> #include <net/dsa.h> #include <net/switchdev.h> + #include "mv88e6xxx.h" +#include "global2.h" static void assert_reg_lock(struct mv88e6xxx_chip *chip) { @@ -182,8 +184,7 @@ static const struct mv88e6xxx_ops mv88e6xxx_smi_multi_chip_ops = { .write = mv88e6xxx_smi_multi_chip_write, }; -static int mv88e6xxx_read(struct mv88e6xxx_chip *chip, - int addr, int reg, u16 *val) +int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val) { int err; @@ -199,8 +200,7 @@ static int mv88e6xxx_read(struct mv88e6xxx_chip *chip, return 0; } -static int mv88e6xxx_write(struct mv88e6xxx_chip *chip, - int addr, int reg, u16 val) +int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val) { int err; @@ -306,8 +306,7 @@ static int mv88e6xxx_serdes_write(struct mv88e6xxx_chip *chip, int reg, u16 val) reg, val); } -static int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, - u16 mask) +int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask) { int i; @@ -330,8 +329,7 @@ static int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, } /* Indirect write to single pointer-data register with an Update bit */ -static int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg, - u16 update) +int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg, u16 update) { u16 val; int err; @@ -2878,330 +2876,6 @@ static int mv88e6xxx_g1_setup(struct mv88e6xxx_chip *chip) return 0; } -static int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, - int target, int port) -{ - u16 val = (target << 8) | (port & 0xf); - - return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_DEVICE_MAPPING, val); -} - -static int mv88e6xxx_g2_set_device_mapping(struct mv88e6xxx_chip *chip) -{ - int target, port; - int err; - - /* Initialize the routing port to the 32 possible target devices */ - for (target = 0; target < 32; ++target) { - port = 0xf; - - if (target < DSA_MAX_SWITCHES) { - port = chip->ds->rtable[target]; - if (port == DSA_RTABLE_NONE) - port = 0xf; - } - - err = mv88e6xxx_g2_device_mapping_write(chip, target, port); - if (err) - break; - } - - return err; -} - -static int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, int num, - bool hask, u16 mask) -{ - const u16 port_mask = BIT(chip->info->num_ports) - 1; - u16 val = (num << 12) | (mask & port_mask); - - if (hask) - val |= GLOBAL2_TRUNK_MASK_HASK; - - return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_TRUNK_MASK, val); -} - -static int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, int id, - u16 map) -{ - const u16 port_mask = BIT(chip->info->num_ports) - 1; - u16 val = (id << 11) | (map & port_mask); - - return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_TRUNK_MAPPING, val); -} - -static int mv88e6xxx_g2_clear_trunk(struct mv88e6xxx_chip *chip) -{ - const u16 port_mask = BIT(chip->info->num_ports) - 1; - int i, err; - - /* Clear all eight possible Trunk Mask vectors */ - for (i = 0; i < 8; ++i) { - err = mv88e6xxx_g2_trunk_mask_write(chip, i, false, port_mask); - if (err) - return err; - } - - /* Clear all sixteen possible Trunk ID routing vectors */ - for (i = 0; i < 16; ++i) { - err = mv88e6xxx_g2_trunk_mapping_write(chip, i, 0); - if (err) - return err; - } - - return 0; -} - -static int mv88e6xxx_g2_clear_irl(struct mv88e6xxx_chip *chip) -{ - int port, err; - - /* Init all Ingress Rate Limit resources of all ports */ - for (port = 0; port < chip->info->num_ports; ++port) { - /* XXX newer chips (like 88E6390) have different 2-bit ops */ - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD, - GLOBAL2_IRL_CMD_OP_INIT_ALL | - (port << 8)); - if (err) - break; - - /* Wait for the operation to complete */ - err = mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD, - GLOBAL2_IRL_CMD_BUSY); - if (err) - break; - } - - return err; -} - -/* Indirect write to the Switch MAC/WoL/WoF register */ -static int mv88e6xxx_g2_switch_mac_write(struct mv88e6xxx_chip *chip, - unsigned int pointer, u8 data) -{ - u16 val = (pointer << 8) | data; - - return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MAC, val); -} - -static int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr) -{ - int i, err; - - for (i = 0; i < 6; i++) { - err = mv88e6xxx_g2_switch_mac_write(chip, i, addr[i]); - if (err) - break; - } - - return err; -} - -static int mv88e6xxx_g2_pot_write(struct mv88e6xxx_chip *chip, int pointer, - u8 data) -{ - u16 val = (pointer << 8) | (data & 0x7); - - return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE, val); -} - -static int mv88e6xxx_g2_clear_pot(struct mv88e6xxx_chip *chip) -{ - int i, err; - - /* Clear all sixteen possible Priority Override entries */ - for (i = 0; i < 16; i++) { - err = mv88e6xxx_g2_pot_write(chip, i, 0); - if (err) - break; - } - - return err; -} - -static int mv88e6xxx_g2_eeprom_wait(struct mv88e6xxx_chip *chip) -{ - return mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, - GLOBAL2_EEPROM_CMD_BUSY | - GLOBAL2_EEPROM_CMD_RUNNING); -} - -static int mv88e6xxx_g2_eeprom_cmd(struct mv88e6xxx_chip *chip, u16 cmd) -{ - int err; - - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, cmd); - if (err) - return err; - - return mv88e6xxx_g2_eeprom_wait(chip); -} - -static int mv88e6xxx_g2_eeprom_read16(struct mv88e6xxx_chip *chip, - u8 addr, u16 *data) -{ - u16 cmd = GLOBAL2_EEPROM_CMD_OP_READ | addr; - int err; - - err = mv88e6xxx_g2_eeprom_wait(chip); - if (err) - return err; - - err = mv88e6xxx_g2_eeprom_cmd(chip, cmd); - if (err) - return err; - - return mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data); -} - -static int mv88e6xxx_g2_eeprom_write16(struct mv88e6xxx_chip *chip, - u8 addr, u16 data) -{ - u16 cmd = GLOBAL2_EEPROM_CMD_OP_WRITE | addr; - int err; - - err = mv88e6xxx_g2_eeprom_wait(chip); - if (err) - return err; - - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data); - if (err) - return err; - - return mv88e6xxx_g2_eeprom_cmd(chip, cmd); -} - -static int mv88e6xxx_g2_smi_phy_wait(struct mv88e6xxx_chip *chip) -{ - return mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_CMD, - GLOBAL2_SMI_PHY_CMD_BUSY); -} - -static int mv88e6xxx_g2_smi_phy_cmd(struct mv88e6xxx_chip *chip, u16 cmd) -{ - int err; - - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_CMD, cmd); - if (err) - return err; - - return mv88e6xxx_g2_smi_phy_wait(chip); -} - -static int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, int addr, - int reg, u16 *val) -{ - u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_READ_DATA | (addr << 5) | reg; - int err; - - err = mv88e6xxx_g2_smi_phy_wait(chip); - if (err) - return err; - - err = mv88e6xxx_g2_smi_phy_cmd(chip, cmd); - if (err) - return err; - - return mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_DATA, val); -} - -static int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, int addr, - int reg, u16 val) -{ - u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_WRITE_DATA | (addr << 5) | reg; - int err; - - err = mv88e6xxx_g2_smi_phy_wait(chip); - if (err) - return err; - - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_DATA, val); - if (err) - return err; - - return mv88e6xxx_g2_smi_phy_cmd(chip, cmd); -} - -static const struct mv88e6xxx_ops mv88e6xxx_g2_smi_phy_ops = { - .read = mv88e6xxx_g2_smi_phy_read, - .write = mv88e6xxx_g2_smi_phy_write, -}; - -static int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) -{ - u16 reg; - int err; - - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) { - /* Consider the frames with reserved multicast destination - * addresses matching 01:80:c2:00:00:2x as MGMT. - */ - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_2X, - 0xffff); - if (err) - return err; - } - - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X)) { - /* Consider the frames with reserved multicast destination - * addresses matching 01:80:c2:00:00:0x as MGMT. - */ - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_0X, - 0xffff); - if (err) - return err; - } - - /* Ignore removed tag data on doubly tagged packets, disable - * flow control messages, force flow control priority to the - * highest, and send all special multicast frames to the CPU - * port at the highest priority. - */ - reg = GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI | (0x7 << 4); - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X) || - mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) - reg |= GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x7; - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MGMT, reg); - if (err) - return err; - - /* Program the DSA routing table. */ - err = mv88e6xxx_g2_set_device_mapping(chip); - if (err) - return err; - - /* Clear all trunk masks and mapping. */ - err = mv88e6xxx_g2_clear_trunk(chip); - if (err) - return err; - - if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_IRL)) { - /* Disable ingress rate limiting by resetting all per port - * ingress rate limit resources to their initial state. - */ - err = mv88e6xxx_g2_clear_irl(chip); - if (err) - return err; - } - - if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_PVT)) { - /* Initialize Cross-chip Port VLAN Table to reset defaults */ - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_PVT_ADDR, - GLOBAL2_PVT_ADDR_OP_INIT_ONES); - if (err) - return err; - } - - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_POT)) { - /* Clear the priority override table. */ - err = mv88e6xxx_g2_clear_pot(chip); - if (err) - return err; - } - - return 0; -} - static int mv88e6xxx_setup(struct dsa_switch *ds) { struct mv88e6xxx_chip *chip = ds->priv; @@ -3503,56 +3177,6 @@ static int mv88e6xxx_get_eeprom_len(struct dsa_switch *ds) return chip->eeprom_len; } -static int mv88e6xxx_get_eeprom16(struct mv88e6xxx_chip *chip, - struct ethtool_eeprom *eeprom, u8 *data) -{ - unsigned int offset = eeprom->offset; - unsigned int len = eeprom->len; - u16 val; - int err; - - eeprom->len = 0; - - if (offset & 1) { - err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); - if (err) - return err; - - *data++ = (val >> 8) & 0xff; - - offset++; - len--; - eeprom->len++; - } - - while (len >= 2) { - err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); - if (err) - return err; - - *data++ = val & 0xff; - *data++ = (val >> 8) & 0xff; - - offset += 2; - len -= 2; - eeprom->len += 2; - } - - if (len) { - err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); - if (err) - return err; - - *data++ = val & 0xff; - - offset++; - len--; - eeprom->len++; - } - - return 0; -} - static int mv88e6xxx_get_eeprom(struct dsa_switch *ds, struct ethtool_eeprom *eeprom, u8 *data) { @@ -3562,7 +3186,7 @@ static int mv88e6xxx_get_eeprom(struct dsa_switch *ds, mutex_lock(&chip->reg_lock); if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_EEPROM16)) - err = mv88e6xxx_get_eeprom16(chip, eeprom, data); + err = mv88e6xxx_g2_get_eeprom16(chip, eeprom, data); else err = -EOPNOTSUPP; @@ -3576,72 +3200,6 @@ static int mv88e6xxx_get_eeprom(struct dsa_switch *ds, return 0; } -static int mv88e6xxx_set_eeprom16(struct mv88e6xxx_chip *chip, - struct ethtool_eeprom *eeprom, u8 *data) -{ - unsigned int offset = eeprom->offset; - unsigned int len = eeprom->len; - u16 val; - int err; - - /* Ensure the RO WriteEn bit is set */ - err = mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, &val); - if (err) - return err; - - if (!(val & GLOBAL2_EEPROM_CMD_WRITE_EN)) - return -EROFS; - - eeprom->len = 0; - - if (offset & 1) { - err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); - if (err) - return err; - - val = (*data++ << 8) | (val & 0xff); - - err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val); - if (err) - return err; - - offset++; - len--; - eeprom->len++; - } - - while (len >= 2) { - val = *data++; - val |= *data++ << 8; - - err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val); - if (err) - return err; - - offset += 2; - len -= 2; - eeprom->len += 2; - } - - if (len) { - err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); - if (err) - return err; - - val = (val & 0xff00) | *data++; - - err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val); - if (err) - return err; - - offset++; - len--; - eeprom->len++; - } - - return 0; -} - static int mv88e6xxx_set_eeprom(struct dsa_switch *ds, struct ethtool_eeprom *eeprom, u8 *data) { @@ -3654,7 +3212,7 @@ static int mv88e6xxx_set_eeprom(struct dsa_switch *ds, mutex_lock(&chip->reg_lock); if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_EEPROM16)) - err = mv88e6xxx_set_eeprom16(chip, eeprom, data); + err = mv88e6xxx_g2_set_eeprom16(chip, eeprom, data); else err = -EOPNOTSUPP; @@ -3886,6 +3444,10 @@ static int mv88e6xxx_detect(struct mv88e6xxx_chip *chip) /* Update the compatible info with the probed one */ chip->info = info; + err = mv88e6xxx_g2_require(chip); + if (err) + return err; + dev_info(chip->dev, "switch 0x%x detected: %s, revision %u\n", chip->info->prod_num, chip->info->name, rev); @@ -3907,6 +3469,11 @@ static struct mv88e6xxx_chip *mv88e6xxx_alloc_chip(struct device *dev) return chip; } +static const struct mv88e6xxx_ops mv88e6xxx_g2_smi_phy_ops = { + .read = mv88e6xxx_g2_smi_phy_read, + .write = mv88e6xxx_g2_smi_phy_write, +}; + static const struct mv88e6xxx_ops mv88e6xxx_phy_ops = { .read = mv88e6xxx_read, .write = mv88e6xxx_write, diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c new file mode 100644 index 000000000000..99ed028298ac --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/global2.c @@ -0,0 +1,471 @@ +/* + * Marvell 88E6xxx Switch Global 2 Registers support (device address 0x1C) + * + * Copyright (c) 2008 Marvell Semiconductor + * + * Copyright (c) 2016 Vivien Didelot <vivien.didelot@savoirfairelinux.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include "mv88e6xxx.h" +#include "global2.h" + +/* Offset 0x06: Device Mapping Table register */ + +static int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, + int target, int port) +{ + u16 val = (target << 8) | (port & 0xf); + + return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_DEVICE_MAPPING, val); +} + +static int mv88e6xxx_g2_set_device_mapping(struct mv88e6xxx_chip *chip) +{ + int target, port; + int err; + + /* Initialize the routing port to the 32 possible target devices */ + for (target = 0; target < 32; ++target) { + port = 0xf; + + if (target < DSA_MAX_SWITCHES) { + port = chip->ds->rtable[target]; + if (port == DSA_RTABLE_NONE) + port = 0xf; + } + + err = mv88e6xxx_g2_device_mapping_write(chip, target, port); + if (err) + break; + } + + return err; +} + +/* Offset 0x07: Trunk Mask Table register */ + +static int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, int num, + bool hask, u16 mask) +{ + const u16 port_mask = BIT(chip->info->num_ports) - 1; + u16 val = (num << 12) | (mask & port_mask); + + if (hask) + val |= GLOBAL2_TRUNK_MASK_HASK; + + return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_TRUNK_MASK, val); +} + +/* Offset 0x08: Trunk Mapping Table register */ + +static int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, int id, + u16 map) +{ + const u16 port_mask = BIT(chip->info->num_ports) - 1; + u16 val = (id << 11) | (map & port_mask); + + return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_TRUNK_MAPPING, val); +} + +static int mv88e6xxx_g2_clear_trunk(struct mv88e6xxx_chip *chip) +{ + const u16 port_mask = BIT(chip->info->num_ports) - 1; + int i, err; + + /* Clear all eight possible Trunk Mask vectors */ + for (i = 0; i < 8; ++i) { + err = mv88e6xxx_g2_trunk_mask_write(chip, i, false, port_mask); + if (err) + return err; + } + + /* Clear all sixteen possible Trunk ID routing vectors */ + for (i = 0; i < 16; ++i) { + err = mv88e6xxx_g2_trunk_mapping_write(chip, i, 0); + if (err) + return err; + } + + return 0; +} + +/* Offset 0x09: Ingress Rate Command register + * Offset 0x0A: Ingress Rate Data register + */ + +static int mv88e6xxx_g2_clear_irl(struct mv88e6xxx_chip *chip) +{ + int port, err; + + /* Init all Ingress Rate Limit resources of all ports */ + for (port = 0; port < chip->info->num_ports; ++port) { + /* XXX newer chips (like 88E6390) have different 2-bit ops */ + err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD, + GLOBAL2_IRL_CMD_OP_INIT_ALL | + (port << 8)); + if (err) + break; + + /* Wait for the operation to complete */ + err = mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD, + GLOBAL2_IRL_CMD_BUSY); + if (err) + break; + } + + return err; +} + +/* Offset 0x0D: Switch MAC/WoL/WoF register */ + +static int mv88e6xxx_g2_switch_mac_write(struct mv88e6xxx_chip *chip, + unsigned int pointer, u8 data) +{ + u16 val = (pointer << 8) | data; + + return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MAC, val); +} + +int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr) +{ + int i, err; + + for (i = 0; i < 6; i++) { + err = mv88e6xxx_g2_switch_mac_write(chip, i, addr[i]); + if (err) + break; + } + + return err; +} + +/* Offset 0x0F: Priority Override Table */ + +static int mv88e6xxx_g2_pot_write(struct mv88e6xxx_chip *chip, int pointer, + u8 data) +{ + u16 val = (pointer << 8) | (data & 0x7); + + return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE, val); +} + +static int mv88e6xxx_g2_clear_pot(struct mv88e6xxx_chip *chip) +{ + int i, err; + + /* Clear all sixteen possible Priority Override entries */ + for (i = 0; i < 16; i++) { + err = mv88e6xxx_g2_pot_write(chip, i, 0); + if (err) + break; + } + + return err; +} + +/* Offset 0x14: EEPROM Command + * Offset 0x15: EEPROM Data + */ + +static int mv88e6xxx_g2_eeprom_wait(struct mv88e6xxx_chip *chip) +{ + return mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, + GLOBAL2_EEPROM_CMD_BUSY | + GLOBAL2_EEPROM_CMD_RUNNING); +} + +static int mv88e6xxx_g2_eeprom_cmd(struct mv88e6xxx_chip *chip, u16 cmd) +{ + int err; + + err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, cmd); + if (err) + return err; + + return mv88e6xxx_g2_eeprom_wait(chip); +} + +static int mv88e6xxx_g2_eeprom_read16(struct mv88e6xxx_chip *chip, + u8 addr, u16 *data) +{ + u16 cmd = GLOBAL2_EEPROM_CMD_OP_READ | addr; + int err; + + err = mv88e6xxx_g2_eeprom_wait(chip); + if (err) + return err; + + err = mv88e6xxx_g2_eeprom_cmd(chip, cmd); + if (err) + return err; + + return mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data); +} + +static int mv88e6xxx_g2_eeprom_write16(struct mv88e6xxx_chip *chip, + u8 addr, u16 data) +{ + u16 cmd = GLOBAL2_EEPROM_CMD_OP_WRITE | addr; + int err; + + err = mv88e6xxx_g2_eeprom_wait(chip); + if (err) + return err; + + err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data); + if (err) + return err; + + return mv88e6xxx_g2_eeprom_cmd(chip, cmd); +} + +int mv88e6xxx_g2_get_eeprom16(struct mv88e6xxx_chip *chip, + struct ethtool_eeprom *eeprom, u8 *data) +{ + unsigned int offset = eeprom->offset; + unsigned int len = eeprom->len; + u16 val; + int err; + + eeprom->len = 0; + + if (offset & 1) { + err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); + if (err) + return err; + + *data++ = (val >> 8) & 0xff; + + offset++; + len--; + eeprom->len++; + } + + while (len >= 2) { + err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); + if (err) + return err; + + *data++ = val & 0xff; + *data++ = (val >> 8) & 0xff; + + offset += 2; + len -= 2; + eeprom->len += 2; + } + + if (len) { + err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); + if (err) + return err; + + *data++ = val & 0xff; + + offset++; + len--; + eeprom->len++; + } + + return 0; +} + +int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip, + struct ethtool_eeprom *eeprom, u8 *data) +{ + unsigned int offset = eeprom->offset; + unsigned int len = eeprom->len; + u16 val; + int err; + + /* Ensure the RO WriteEn bit is set */ + err = mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, &val); + if (err) + return err; + + if (!(val & GLOBAL2_EEPROM_CMD_WRITE_EN)) + return -EROFS; + + eeprom->len = 0; + + if (offset & 1) { + err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); + if (err) + return err; + + val = (*data++ << 8) | (val & 0xff); + + err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val); + if (err) + return err; + + offset++; + len--; + eeprom->len++; + } + + while (len >= 2) { + val = *data++; + val |= *data++ << 8; + + err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val); + if (err) + return err; + + offset += 2; + len -= 2; + eeprom->len += 2; + } + + if (len) { + err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); + if (err) + return err; + + val = (val & 0xff00) | *data++; + + err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val); + if (err) + return err; + + offset++; + len--; + eeprom->len++; + } + + return 0; +} + +/* Offset 0x18: SMI PHY Command Register + * Offset 0x19: SMI PHY Data Register + */ + +static int mv88e6xxx_g2_smi_phy_wait(struct mv88e6xxx_chip *chip) +{ + return mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_CMD, + GLOBAL2_SMI_PHY_CMD_BUSY); +} + +static int mv88e6xxx_g2_smi_phy_cmd(struct mv88e6xxx_chip *chip, u16 cmd) +{ + int err; + + err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_CMD, cmd); + if (err) + return err; + + return mv88e6xxx_g2_smi_phy_wait(chip); +} + +int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, int addr, int reg, + u16 *val) +{ + u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_READ_DATA | (addr << 5) | reg; + int err; + + err = mv88e6xxx_g2_smi_phy_wait(chip); + if (err) + return err; + + err = mv88e6xxx_g2_smi_phy_cmd(chip, cmd); + if (err) + return err; + + return mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_DATA, val); +} + +int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, int addr, int reg, + u16 val) +{ + u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_WRITE_DATA | (addr << 5) | reg; + int err; + + err = mv88e6xxx_g2_smi_phy_wait(chip); + if (err) + return err; + + err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SMI_PHY_DATA, val); + if (err) + return err; + + return mv88e6xxx_g2_smi_phy_cmd(chip, cmd); +} + +int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) +{ + u16 reg; + int err; + + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) { + /* Consider the frames with reserved multicast destination + * addresses matching 01:80:c2:00:00:2x as MGMT. + */ + err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_2X, + 0xffff); + if (err) + return err; + } + + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X)) { + /* Consider the frames with reserved multicast destination + * addresses matching 01:80:c2:00:00:0x as MGMT. + */ + err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_0X, + 0xffff); + if (err) + return err; + } + + /* Ignore removed tag data on doubly tagged packets, disable + * flow control messages, force flow control priority to the + * highest, and send all special multicast frames to the CPU + * port at the highest priority. + */ + reg = GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI | (0x7 << 4); + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X) || + mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) + reg |= GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x7; + err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MGMT, reg); + if (err) + return err; + + /* Program the DSA routing table. */ + err = mv88e6xxx_g2_set_device_mapping(chip); + if (err) + return err; + + /* Clear all trunk masks and mapping. */ + err = mv88e6xxx_g2_clear_trunk(chip); + if (err) + return err; + + if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_IRL)) { + /* Disable ingress rate limiting by resetting all per port + * ingress rate limit resources to their initial state. + */ + err = mv88e6xxx_g2_clear_irl(chip); + if (err) + return err; + } + + if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_PVT)) { + /* Initialize Cross-chip Port VLAN Table to reset defaults */ + err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_PVT_ADDR, + GLOBAL2_PVT_ADDR_OP_INIT_ONES); + if (err) + return err; + } + + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_POT)) { + /* Clear the priority override table. */ + err = mv88e6xxx_g2_clear_pot(chip); + if (err) + return err; + } + + return 0; +} diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h new file mode 100644 index 000000000000..c4bb9035ee3a --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/global2.h @@ -0,0 +1,88 @@ +/* + * Marvell 88E6xxx Switch Global 2 Registers support (device address 0x1C) + * + * Copyright (c) 2008 Marvell Semiconductor + * + * Copyright (c) 2016 Vivien Didelot <vivien.didelot@savoirfairelinux.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _MV88E6XXX_GLOBAL2_H +#define _MV88E6XXX_GLOBAL2_H + +#include "mv88e6xxx.h" + +#ifdef CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 + +static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip) +{ + return 0; +} + +int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, int addr, int reg, + u16 *val); +int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, int addr, int reg, + u16 val); +int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr); +int mv88e6xxx_g2_get_eeprom16(struct mv88e6xxx_chip *chip, + struct ethtool_eeprom *eeprom, u8 *data); +int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip, + struct ethtool_eeprom *eeprom, u8 *data); +int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip); + +#else /* !CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */ + +static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip) +{ + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_GLOBAL2)) { + dev_err(chip->dev, "this chip requires CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 enabled\n"); + return -EOPNOTSUPP; + } + + return 0; +} + +static inline int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, + int addr, int reg, u16 *val) +{ + return -EOPNOTSUPP; +} + +static inline int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, + int addr, int reg, u16 val) +{ + return -EOPNOTSUPP; +} + +static inline int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, + u8 *addr) +{ + return -EOPNOTSUPP; +} + +static inline int mv88e6xxx_g2_get_eeprom16(struct mv88e6xxx_chip *chip, + struct ethtool_eeprom *eeprom, + u8 *data) +{ + return -EOPNOTSUPP; +} + +static inline int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip, + struct ethtool_eeprom *eeprom, + u8 *data) +{ + return -EOPNOTSUPP; +} + +static inline int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) +{ + return -EOPNOTSUPP; +} + +#endif /* CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */ + +#endif /* _MV88E6XXX_GLOBAL2_H */ diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index e157d4f69864..52f3f5231f51 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -718,4 +718,10 @@ static inline bool mv88e6xxx_has(struct mv88e6xxx_chip *chip, return (chip->info->flags & flags) == flags; } +int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val); +int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val); +int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg, + u16 update); +int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask); + #endif diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c index 2d812064731a..bddb198c0b74 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c @@ -887,6 +887,67 @@ static irqreturn_t cn23xx_interrupt_handler(void *dev) return IRQ_HANDLED; } +static void cn23xx_bar1_idx_setup(struct octeon_device *oct, u64 core_addr, + u32 idx, int valid) +{ + u64 bar1; + u64 reg_adr; + + if (!valid) { + reg_adr = lio_pci_readq( + oct, CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx)); + WRITE_ONCE(bar1, reg_adr); + lio_pci_writeq(oct, (READ_ONCE(bar1) & 0xFFFFFFFEULL), + CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx)); + reg_adr = lio_pci_readq( + oct, CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx)); + WRITE_ONCE(bar1, reg_adr); + return; + } + + /* The PEM(0..3)_BAR1_INDEX(0..15)[ADDR_IDX]<23:4> stores + * bits <41:22> of the Core Addr + */ + lio_pci_writeq(oct, (((core_addr >> 22) << 4) | PCI_BAR1_MASK), + CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx)); + + WRITE_ONCE(bar1, lio_pci_readq( + oct, CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx))); +} + +static void cn23xx_bar1_idx_write(struct octeon_device *oct, u32 idx, u32 mask) +{ + lio_pci_writeq(oct, mask, + CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx)); +} + +static u32 cn23xx_bar1_idx_read(struct octeon_device *oct, u32 idx) +{ + return (u32)lio_pci_readq( + oct, CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx)); +} + +/* always call with lock held */ +static u32 cn23xx_update_read_index(struct octeon_instr_queue *iq) +{ + u32 new_idx; + u32 last_done; + u32 pkt_in_done = readl(iq->inst_cnt_reg); + + last_done = pkt_in_done - iq->pkt_in_done; + iq->pkt_in_done = pkt_in_done; + + /* Modulo of the new index with the IQ size will give us + * the new index. The iq->reset_instr_cnt is always zero for + * cn23xx, so no extra adjustments are needed. + */ + new_idx = (iq->octeon_read_index + + (u32)(last_done & CN23XX_PKT_IN_DONE_CNT_MASK)) % + iq->max_count; + + return new_idx; +} + static void cn23xx_enable_pf_interrupt(struct octeon_device *oct, u8 intr_flag) { struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip; @@ -1063,6 +1124,11 @@ int setup_cn23xx_octeon_pf_device(struct octeon_device *oct) oct->fn_list.soft_reset = cn23xx_pf_soft_reset; oct->fn_list.setup_device_regs = cn23xx_setup_pf_device_regs; + oct->fn_list.update_iq_read_idx = cn23xx_update_read_index; + + oct->fn_list.bar1_idx_setup = cn23xx_bar1_idx_setup; + oct->fn_list.bar1_idx_write = cn23xx_bar1_idx_write; + oct->fn_list.bar1_idx_read = cn23xx_bar1_idx_read; oct->fn_list.enable_interrupt = cn23xx_enable_pf_interrupt; oct->fn_list.disable_interrupt = cn23xx_disable_pf_interrupt; diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h index 33b758992eb2..21b5c9051967 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h @@ -51,6 +51,8 @@ int setup_cn23xx_octeon_pf_device(struct octeon_device *oct); int validate_cn23xx_pf_config_info(struct octeon_device *oct, struct octeon_config *conf23xx); +u32 cn23xx_pf_get_oq_ticks(struct octeon_device *oct, u32 time_intr_in_us); + void cn23xx_dump_pf_initialized_regs(struct octeon_device *oct); int cn23xx_fw_loaded(struct octeon_device *oct); diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c index 809179c040a7..201eddb3013a 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_core.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c @@ -156,14 +156,19 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr) dev_info(&oct->pci_dev->dev, "%s MTU Changed from %d to %d\n", netdev->name, netdev->mtu, nctrl->ncmd.s.param1); - rtnl_lock(); netdev->mtu = nctrl->ncmd.s.param1; - call_netdevice_notifiers(NETDEV_CHANGEMTU, netdev); - rtnl_unlock(); + queue_delayed_work(lio->link_status_wq.wq, + &lio->link_status_wq.wk.work, 0); break; case OCTNET_CMD_GPIO_ACCESS: netif_info(lio, probe, lio->netdev, "LED Flashing visual identification\n"); + + break; + + case OCTNET_CMD_ID_ACTIVE: + netif_info(lio, probe, lio->netdev, "LED Flashing visual identification\n"); + break; case OCTNET_CMD_LRO_ENABLE: diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c index f3ce7441cb5f..f163e0abbeb2 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c @@ -32,6 +32,7 @@ #include "octeon_network.h" #include "cn66xx_regs.h" #include "cn66xx_device.h" +#include "cn23xx_pf_device.h" static int octnet_get_link_stats(struct net_device *netdev); @@ -75,6 +76,7 @@ enum { #define ARRAY_LENGTH(a) (sizeof(a) / sizeof((a)[0])) #define OCT_ETHTOOL_REGDUMP_LEN 4096 +#define OCT_ETHTOOL_REGDUMP_LEN_23XX (4096 * 11) #define OCT_ETHTOOL_REGSVER 1 /* statistics of PF */ @@ -188,6 +190,10 @@ static const char oct_droq_stats_strings[][ETH_GSTRING_LEN] = { "buffer_alloc_failure", }; +/* LiquidIO driver private flags */ +static const char oct_priv_flags_strings[][ETH_GSTRING_LEN] = { +}; + #define OCTNIC_NCMD_AUTONEG_ON 0x1 #define OCTNIC_NCMD_PHY_ON 0x2 @@ -259,6 +265,13 @@ lio_ethtool_get_channels(struct net_device *dev, max_tx = CFG_GET_IQ_MAX_Q(conf6x); rx_count = CFG_GET_NUM_RXQS_NIC_IF(conf6x, lio->ifidx); tx_count = CFG_GET_NUM_TXQS_NIC_IF(conf6x, lio->ifidx); + } else if (OCTEON_CN23XX_PF(oct)) { + struct octeon_config *conf23 = CHIP_FIELD(oct, cn23xx_pf, conf); + + max_rx = CFG_GET_OQ_MAX_Q(conf23); + max_tx = CFG_GET_IQ_MAX_Q(conf23); + rx_count = CFG_GET_NUM_RXQS_NIC_IF(conf23, lio->ifidx); + tx_count = CFG_GET_NUM_TXQS_NIC_IF(conf23, lio->ifidx); } channel->max_rx = max_rx; @@ -331,6 +344,32 @@ static int octnet_gpio_access(struct net_device *netdev, int addr, int val) return 0; } +static int octnet_id_active(struct net_device *netdev, int val) +{ + struct lio *lio = GET_LIO(netdev); + struct octeon_device *oct = lio->oct_dev; + struct octnic_ctrl_pkt nctrl; + int ret = 0; + + memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt)); + + nctrl.ncmd.u64 = 0; + nctrl.ncmd.s.cmd = OCTNET_CMD_ID_ACTIVE; + nctrl.ncmd.s.param1 = val; + nctrl.iq_no = lio->linfo.txpciq[0].s.q_no; + nctrl.wait_time = 100; + nctrl.netpndev = (u64)netdev; + nctrl.cb_fn = liquidio_link_ctrl_cmd_completion; + + ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl); + if (ret < 0) { + dev_err(&oct->pci_dev->dev, "Failed to configure gpio value\n"); + return -EINVAL; + } + + return 0; +} + /* Callback for when mdio command response arrives */ static void octnet_mdio_resp_callback(struct octeon_device *oct, @@ -474,6 +513,11 @@ static int lio_set_phys_id(struct net_device *netdev, &value); if (ret) return ret; + } else if (oct->chip_id == OCTEON_CN23XX_PF_VID) { + octnet_id_active(netdev, LED_IDENTIFICATION_ON); + + /* returns 0 since updates are asynchronous */ + return 0; } else { return -EINVAL; } @@ -519,7 +563,10 @@ static int lio_set_phys_id(struct net_device *netdev, &lio->phy_beacon_val); if (ret) return ret; + } else if (oct->chip_id == OCTEON_CN23XX_PF_VID) { + octnet_id_active(netdev, LED_IDENTIFICATION_OFF); + return 0; } else { return -EINVAL; } @@ -548,6 +595,13 @@ lio_ethtool_get_ringparam(struct net_device *netdev, rx_max_pending = CN6XXX_MAX_OQ_DESCRIPTORS; rx_pending = CFG_GET_NUM_RX_DESCS_NIC_IF(conf6x, lio->ifidx); tx_pending = CFG_GET_NUM_TX_DESCS_NIC_IF(conf6x, lio->ifidx); + } else if (OCTEON_CN23XX_PF(oct)) { + struct octeon_config *conf23 = CHIP_FIELD(oct, cn23xx_pf, conf); + + tx_max_pending = CN23XX_MAX_IQ_DESCRIPTORS; + rx_max_pending = CN23XX_MAX_OQ_DESCRIPTORS; + rx_pending = CFG_GET_NUM_RX_DESCS_NIC_IF(conf23, lio->ifidx); + tx_pending = CFG_GET_NUM_TX_DESCS_NIC_IF(conf23, lio->ifidx); } if (lio->mtu > OCTNET_DEFAULT_FRM_SIZE - OCTNET_FRM_HEADER_SIZE) { @@ -608,6 +662,69 @@ lio_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) pause->rx_pause = oct->rx_pause; } +static int +lio_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) +{ + /* Notes: Not supporting any auto negotiation in these + * drivers. + */ + struct lio *lio = GET_LIO(netdev); + struct octeon_device *oct = lio->oct_dev; + struct octnic_ctrl_pkt nctrl; + struct oct_link_info *linfo = &lio->linfo; + + int ret = 0; + + if (oct->chip_id != OCTEON_CN23XX_PF_VID) + return -EINVAL; + + if (linfo->link.s.duplex == 0) { + /*no flow control for half duplex*/ + if (pause->rx_pause || pause->tx_pause) + return -EINVAL; + } + + /*do not support autoneg of link flow control*/ + if (pause->autoneg == AUTONEG_ENABLE) + return -EINVAL; + + memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt)); + + nctrl.ncmd.u64 = 0; + nctrl.ncmd.s.cmd = OCTNET_CMD_SET_FLOW_CTL; + nctrl.iq_no = lio->linfo.txpciq[0].s.q_no; + nctrl.wait_time = 100; + nctrl.netpndev = (u64)netdev; + nctrl.cb_fn = liquidio_link_ctrl_cmd_completion; + + if (pause->rx_pause) { + /*enable rx pause*/ + nctrl.ncmd.s.param1 = 1; + } else { + /*disable rx pause*/ + nctrl.ncmd.s.param1 = 0; + } + + if (pause->tx_pause) { + /*enable tx pause*/ + nctrl.ncmd.s.param2 = 1; + } else { + /*disable tx pause*/ + nctrl.ncmd.s.param2 = 0; + } + + ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl); + if (ret < 0) { + dev_err(&oct->pci_dev->dev, "Failed to set pause parameter\n"); + return -EINVAL; + } + + oct->rx_pause = pause->rx_pause; + oct->tx_pause = pause->tx_pause; + + return 0; +} + static void lio_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats __attribute__((unused)), @@ -875,6 +992,27 @@ lio_get_ethtool_stats(struct net_device *netdev, } } +static void lio_get_priv_flags_strings(struct lio *lio, u8 *data) +{ + struct octeon_device *oct_dev = lio->oct_dev; + int i; + + switch (oct_dev->chip_id) { + case OCTEON_CN23XX_PF_VID: + for (i = 0; i < ARRAY_SIZE(oct_priv_flags_strings); i++) { + sprintf(data, "%s", oct_priv_flags_strings[i]); + data += ETH_GSTRING_LEN; + } + break; + case OCTEON_CN68XX: + case OCTEON_CN66XX: + break; + default: + netif_info(lio, drv, lio->netdev, "Unknown Chip !!\n"); + break; + } +} + static void lio_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { struct lio *lio = GET_LIO(netdev); @@ -914,12 +1052,31 @@ static void lio_get_strings(struct net_device *netdev, u32 stringset, u8 *data) } break; + case ETH_SS_PRIV_FLAGS: + lio_get_priv_flags_strings(lio, data); + break; default: netif_info(lio, drv, lio->netdev, "Unknown Stringset !!\n"); break; } } +static int lio_get_priv_flags_ss_count(struct lio *lio) +{ + struct octeon_device *oct_dev = lio->oct_dev; + + switch (oct_dev->chip_id) { + case OCTEON_CN23XX_PF_VID: + return ARRAY_SIZE(oct_priv_flags_strings); + case OCTEON_CN68XX: + case OCTEON_CN66XX: + return -EOPNOTSUPP; + default: + netif_info(lio, drv, lio->netdev, "Unknown Chip !!\n"); + return -EOPNOTSUPP; + } +} + static int lio_get_sset_count(struct net_device *netdev, int sset) { struct lio *lio = GET_LIO(netdev); @@ -930,6 +1087,8 @@ static int lio_get_sset_count(struct net_device *netdev, int sset) return (ARRAY_SIZE(oct_stats_strings) + ARRAY_SIZE(oct_iq_stats_strings) * oct_dev->num_iqs + ARRAY_SIZE(oct_droq_stats_strings) * oct_dev->num_oqs); + case ETH_SS_PRIV_FLAGS: + return lio_get_priv_flags_ss_count(lio); default: return -EOPNOTSUPP; } @@ -946,6 +1105,16 @@ static int lio_get_intr_coalesce(struct net_device *netdev, intrmod_cfg = &oct->intrmod; switch (oct->chip_id) { + case OCTEON_CN23XX_PF_VID: + if (!intrmod_cfg->rx_enable) { + intr_coal->rx_coalesce_usecs = intrmod_cfg->rx_usecs; + intr_coal->rx_max_coalesced_frames = + intrmod_cfg->rx_frames; + } + if (!intrmod_cfg->tx_enable) + intr_coal->tx_max_coalesced_frames = + intrmod_cfg->tx_frames; + break; case OCTEON_CN68XX: case OCTEON_CN66XX: { struct octeon_cn6xxx *cn6xxx = @@ -981,7 +1150,15 @@ static int lio_get_intr_coalesce(struct net_device *netdev, intr_coal->rx_coalesce_usecs_low = intrmod_cfg->rx_mintmr_trigger; intr_coal->rx_max_coalesced_frames_low = - intrmod_cfg->rx_mincnt_trigger; + intrmod_cfg->rx_mincnt_trigger; + } + if (OCTEON_CN23XX_PF(oct) && + (intrmod_cfg->tx_enable)) { + intr_coal->use_adaptive_tx_coalesce = intrmod_cfg->tx_enable; + intr_coal->tx_max_coalesced_frames_high = + intrmod_cfg->tx_maxcnt_trigger; + intr_coal->tx_max_coalesced_frames_low = + intrmod_cfg->tx_mincnt_trigger; } return 0; } @@ -1058,11 +1235,11 @@ static void octnet_nic_stats_callback(struct octeon_device *oct_dev, u32 status, void *ptr) { - struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr; - struct oct_nic_stats_resp *resp = (struct oct_nic_stats_resp *) - sc->virtrptr; - struct oct_nic_stats_ctrl *ctrl = (struct oct_nic_stats_ctrl *) - sc->ctxptr; + struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr; + struct oct_nic_stats_resp *resp = + (struct oct_nic_stats_resp *)sc->virtrptr; + struct oct_nic_stats_ctrl *ctrl = + (struct oct_nic_stats_ctrl *)sc->ctxptr; struct nic_rx_stats *rsp_rstats = &resp->stats.fromwire; struct nic_tx_stats *rsp_tstats = &resp->stats.fromhost; @@ -1312,6 +1489,27 @@ oct_cfg_rx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal) CFG_SET_OQ_INTR_PKT(cn6xxx->conf, rx_max_coalesced_frames); break; } + case OCTEON_CN23XX_PF_VID: { + int q_no; + + if (!intr_coal->rx_max_coalesced_frames) + rx_max_coalesced_frames = oct->intrmod.rx_frames; + else + rx_max_coalesced_frames = + intr_coal->rx_max_coalesced_frames; + for (q_no = 0; q_no < oct->num_oqs; q_no++) { + q_no += oct->sriov_info.pf_srn; + octeon_write_csr64( + oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(q_no), + (octeon_read_csr64( + oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(q_no)) & + (0x3fffff00000000UL)) | + rx_max_coalesced_frames); + /*consider setting resend bit*/ + } + oct->intrmod.rx_frames = rx_max_coalesced_frames; + break; + } default: return -EINVAL; } @@ -1344,6 +1542,27 @@ static int oct_cfg_rx_intrtime(struct lio *lio, CFG_SET_OQ_INTR_TIME(cn6xxx->conf, rx_coalesce_usecs); break; } + case OCTEON_CN23XX_PF_VID: { + u64 time_threshold; + int q_no; + + if (!intr_coal->rx_coalesce_usecs) + rx_coalesce_usecs = oct->intrmod.rx_usecs; + else + rx_coalesce_usecs = intr_coal->rx_coalesce_usecs; + time_threshold = + cn23xx_pf_get_oq_ticks(oct, (u32)rx_coalesce_usecs); + for (q_no = 0; q_no < oct->num_oqs; q_no++) { + q_no += oct->sriov_info.pf_srn; + octeon_write_csr64(oct, + CN23XX_SLI_OQ_PKT_INT_LEVELS(q_no), + (oct->intrmod.rx_frames | + (time_threshold << 32))); + /*consider writing to resend bit here*/ + } + oct->intrmod.rx_usecs = rx_coalesce_usecs; + break; + } default: return -EINVAL; } @@ -1356,12 +1575,37 @@ oct_cfg_tx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal __attribute__((unused))) { struct octeon_device *oct = lio->oct_dev; + u32 iq_intr_pkt; + void __iomem *inst_cnt_reg; + u64 val; /* Config Cnt based interrupt values */ switch (oct->chip_id) { case OCTEON_CN68XX: case OCTEON_CN66XX: break; + case OCTEON_CN23XX_PF_VID: { + int q_no; + + if (!intr_coal->tx_max_coalesced_frames) + iq_intr_pkt = CN23XX_DEF_IQ_INTR_THRESHOLD & + CN23XX_PKT_IN_DONE_WMARK_MASK; + else + iq_intr_pkt = intr_coal->tx_max_coalesced_frames & + CN23XX_PKT_IN_DONE_WMARK_MASK; + for (q_no = 0; q_no < oct->num_iqs; q_no++) { + inst_cnt_reg = (oct->instr_queue[q_no])->inst_cnt_reg; + val = readq(inst_cnt_reg); + /*clear wmark and count.dont want to write count back*/ + val = (val & 0xFFFF000000000000ULL) | + ((u64)iq_intr_pkt + << CN23XX_PKT_IN_DONE_WMARK_BIT_POS); + writeq(val, inst_cnt_reg); + /*consider setting resend bit*/ + } + oct->intrmod.tx_frames = iq_intr_pkt; + break; + } default: return -EINVAL; } @@ -1397,6 +1641,8 @@ static int lio_set_intr_coalesce(struct net_device *netdev, return -EINVAL; } break; + case OCTEON_CN23XX_PF_VID: + break; default: return -EINVAL; } @@ -1539,9 +1785,237 @@ static int lio_nway_reset(struct net_device *netdev) } /* Return register dump len. */ -static int lio_get_regs_len(struct net_device *dev __attribute__((unused))) +static int lio_get_regs_len(struct net_device *dev) { - return OCT_ETHTOOL_REGDUMP_LEN; + struct lio *lio = GET_LIO(dev); + struct octeon_device *oct = lio->oct_dev; + + switch (oct->chip_id) { + case OCTEON_CN23XX_PF_VID: + return OCT_ETHTOOL_REGDUMP_LEN_23XX; + default: + return OCT_ETHTOOL_REGDUMP_LEN; + } +} + +static int cn23xx_read_csr_reg(char *s, struct octeon_device *oct) +{ + u32 reg; + u8 pf_num = oct->pf_num; + int len = 0; + int i; + + /* PCI Window Registers */ + + len += sprintf(s + len, "\n\t Octeon CSR Registers\n\n"); + + /*0x29030 or 0x29040*/ + reg = CN23XX_SLI_PKT_MAC_RINFO64(oct->pcie_port, oct->pf_num); + len += sprintf(s + len, + "\n[%08x] (SLI_PKT_MAC%d_PF%d_RINFO): %016llx\n", + reg, oct->pcie_port, oct->pf_num, + (u64)octeon_read_csr64(oct, reg)); + + /*0x27080 or 0x27090*/ + reg = CN23XX_SLI_MAC_PF_INT_ENB64(oct->pcie_port, oct->pf_num); + len += + sprintf(s + len, "\n[%08x] (SLI_MAC%d_PF%d_INT_ENB): %016llx\n", + reg, oct->pcie_port, oct->pf_num, + (u64)octeon_read_csr64(oct, reg)); + + /*0x27000 or 0x27010*/ + reg = CN23XX_SLI_MAC_PF_INT_SUM64(oct->pcie_port, oct->pf_num); + len += + sprintf(s + len, "\n[%08x] (SLI_MAC%d_PF%d_INT_SUM): %016llx\n", + reg, oct->pcie_port, oct->pf_num, + (u64)octeon_read_csr64(oct, reg)); + + /*0x29120*/ + reg = 0x29120; + len += sprintf(s + len, "\n[%08x] (SLI_PKT_MEM_CTL): %016llx\n", reg, + (u64)octeon_read_csr64(oct, reg)); + + /*0x27300*/ + reg = 0x27300 + oct->pcie_port * CN23XX_MAC_INT_OFFSET + + (oct->pf_num) * CN23XX_PF_INT_OFFSET; + len += sprintf( + s + len, "\n[%08x] (SLI_MAC%d_PF%d_PKT_VF_INT): %016llx\n", reg, + oct->pcie_port, oct->pf_num, (u64)octeon_read_csr64(oct, reg)); + + /*0x27200*/ + reg = 0x27200 + oct->pcie_port * CN23XX_MAC_INT_OFFSET + + (oct->pf_num) * CN23XX_PF_INT_OFFSET; + len += sprintf(s + len, + "\n[%08x] (SLI_MAC%d_PF%d_PP_VF_INT): %016llx\n", + reg, oct->pcie_port, oct->pf_num, + (u64)octeon_read_csr64(oct, reg)); + + /*29130*/ + reg = CN23XX_SLI_PKT_CNT_INT; + len += sprintf(s + len, "\n[%08x] (SLI_PKT_CNT_INT): %016llx\n", reg, + (u64)octeon_read_csr64(oct, reg)); + + /*0x29140*/ + reg = CN23XX_SLI_PKT_TIME_INT; + len += sprintf(s + len, "\n[%08x] (SLI_PKT_TIME_INT): %016llx\n", reg, + (u64)octeon_read_csr64(oct, reg)); + + /*0x29160*/ + reg = 0x29160; + len += sprintf(s + len, "\n[%08x] (SLI_PKT_INT): %016llx\n", reg, + (u64)octeon_read_csr64(oct, reg)); + + /*0x29180*/ + reg = CN23XX_SLI_OQ_WMARK; + len += sprintf(s + len, "\n[%08x] (SLI_PKT_OUTPUT_WMARK): %016llx\n", + reg, (u64)octeon_read_csr64(oct, reg)); + + /*0x291E0*/ + reg = CN23XX_SLI_PKT_IOQ_RING_RST; + len += sprintf(s + len, "\n[%08x] (SLI_PKT_RING_RST): %016llx\n", reg, + (u64)octeon_read_csr64(oct, reg)); + + /*0x29210*/ + reg = CN23XX_SLI_GBL_CONTROL; + len += sprintf(s + len, + "\n[%08x] (SLI_PKT_GBL_CONTROL): %016llx\n", reg, + (u64)octeon_read_csr64(oct, reg)); + + /*0x29220*/ + reg = 0x29220; + len += sprintf(s + len, "\n[%08x] (SLI_PKT_BIST_STATUS): %016llx\n", + reg, (u64)octeon_read_csr64(oct, reg)); + + /*PF only*/ + if (pf_num == 0) { + /*0x29260*/ + reg = CN23XX_SLI_OUT_BP_EN_W1S; + len += sprintf(s + len, + "\n[%08x] (SLI_PKT_OUT_BP_EN_W1S): %016llx\n", + reg, (u64)octeon_read_csr64(oct, reg)); + } else if (pf_num == 1) { + /*0x29270*/ + reg = CN23XX_SLI_OUT_BP_EN2_W1S; + len += sprintf(s + len, + "\n[%08x] (SLI_PKT_OUT_BP_EN2_W1S): %016llx\n", + reg, (u64)octeon_read_csr64(oct, reg)); + } + + for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) { + reg = CN23XX_SLI_OQ_BUFF_INFO_SIZE(i); + len += + sprintf(s + len, "\n[%08x] (SLI_PKT%d_OUT_SIZE): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x10040*/ + for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) { + reg = CN23XX_SLI_IQ_INSTR_COUNT64(i); + len += sprintf(s + len, + "\n[%08x] (SLI_PKT_IN_DONE%d_CNTS): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x10080*/ + for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) { + reg = CN23XX_SLI_OQ_PKTS_CREDIT(i); + len += sprintf(s + len, + "\n[%08x] (SLI_PKT%d_SLIST_BAOFF_DBELL): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x10090*/ + for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) { + reg = CN23XX_SLI_OQ_SIZE(i); + len += sprintf( + s + len, "\n[%08x] (SLI_PKT%d_SLIST_FIFO_RSIZE): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x10050*/ + for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) { + reg = CN23XX_SLI_OQ_PKT_CONTROL(i); + len += sprintf( + s + len, + "\n[%08x] (SLI_PKT%d__OUTPUT_CONTROL): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x10070*/ + for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) { + reg = CN23XX_SLI_OQ_BASE_ADDR64(i); + len += sprintf(s + len, + "\n[%08x] (SLI_PKT%d_SLIST_BADDR): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x100a0*/ + for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) { + reg = CN23XX_SLI_OQ_PKT_INT_LEVELS(i); + len += sprintf(s + len, + "\n[%08x] (SLI_PKT%d_INT_LEVELS): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x100b0*/ + for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) { + reg = CN23XX_SLI_OQ_PKTS_SENT(i); + len += sprintf(s + len, "\n[%08x] (SLI_PKT%d_CNTS): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x100c0*/ + for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) { + reg = 0x100c0 + i * CN23XX_OQ_OFFSET; + len += sprintf(s + len, + "\n[%08x] (SLI_PKT%d_ERROR_INFO): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + + /*0x10000*/ + for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) { + reg = CN23XX_SLI_IQ_PKT_CONTROL64(i); + len += sprintf( + s + len, + "\n[%08x] (SLI_PKT%d_INPUT_CONTROL): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x10010*/ + for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) { + reg = CN23XX_SLI_IQ_BASE_ADDR64(i); + len += sprintf( + s + len, + "\n[%08x] (SLI_PKT%d_INSTR_BADDR): %016llx\n", reg, + i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x10020*/ + for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) { + reg = CN23XX_SLI_IQ_DOORBELL(i); + len += sprintf( + s + len, + "\n[%08x] (SLI_PKT%d_INSTR_BAOFF_DBELL): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x10030*/ + for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) { + reg = CN23XX_SLI_IQ_SIZE(i); + len += sprintf( + s + len, + "\n[%08x] (SLI_PKT%d_INSTR_FIFO_RSIZE): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x10040*/ + for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) + reg = CN23XX_SLI_IQ_INSTR_COUNT64(i); + len += sprintf(s + len, + "\n[%08x] (SLI_PKT_IN_DONE%d_CNTS): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + return len; } static int cn6xxx_read_csr_reg(char *s, struct octeon_device *oct) @@ -1686,6 +2160,10 @@ static void lio_get_regs(struct net_device *dev, regs->version = OCT_ETHTOOL_REGSVER; switch (oct->chip_id) { + case OCTEON_CN23XX_PF_VID: + memset(regbuf, 0, OCT_ETHTOOL_REGDUMP_LEN_23XX); + len += cn23xx_read_csr_reg(regbuf + len, oct); + break; case OCTEON_CN68XX: case OCTEON_CN66XX: memset(regbuf, 0, OCT_ETHTOOL_REGDUMP_LEN); @@ -1727,6 +2205,7 @@ static const struct ethtool_ops lio_ethtool_ops = { .get_strings = lio_get_strings, .get_ethtool_stats = lio_get_ethtool_stats, .get_pauseparam = lio_get_pauseparam, + .set_pauseparam = lio_set_pauseparam, .get_regs_len = lio_get_regs_len, .get_regs = lio_get_regs, .get_msglevel = lio_get_msglevel, diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 866c075cc7ea..afc6f9dc8119 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -24,6 +24,7 @@ #include <linux/firmware.h> #include <linux/ptp_clock_kernel.h> #include <net/vxlan.h> +#include <linux/kthread.h> #include "liquidio_common.h" #include "octeon_droq.h" #include "octeon_iq.h" @@ -96,6 +97,14 @@ struct liquidio_if_cfg_resp { u64 status; }; +struct liquidio_rx_ctl_context { + int octeon_id; + + wait_queue_head_t wc; + + int cond; +}; + struct oct_link_status_resp { u64 rh; struct oct_link_info link_info; @@ -857,6 +866,52 @@ static void print_link_info(struct net_device *netdev) } /** + * \brief Routine to notify MTU change + * @param work work_struct data structure + */ +static void octnet_link_status_change(struct work_struct *work) +{ + struct cavium_wk *wk = (struct cavium_wk *)work; + struct lio *lio = (struct lio *)wk->ctxptr; + + rtnl_lock(); + call_netdevice_notifiers(NETDEV_CHANGEMTU, lio->netdev); + rtnl_unlock(); +} + +/** + * \brief Sets up the mtu status change work + * @param netdev network device + */ +static inline int setup_link_status_change_wq(struct net_device *netdev) +{ + struct lio *lio = GET_LIO(netdev); + struct octeon_device *oct = lio->oct_dev; + + lio->link_status_wq.wq = alloc_workqueue("link-status", + WQ_MEM_RECLAIM, 0); + if (!lio->link_status_wq.wq) { + dev_err(&oct->pci_dev->dev, "unable to create cavium link status wq\n"); + return -1; + } + INIT_DELAYED_WORK(&lio->link_status_wq.wk.work, + octnet_link_status_change); + lio->link_status_wq.wk.ctxptr = lio; + + return 0; +} + +static inline void cleanup_link_status_change_wq(struct net_device *netdev) +{ + struct lio *lio = GET_LIO(netdev); + + if (lio->link_status_wq.wq) { + cancel_delayed_work_sync(&lio->link_status_wq.wk.work); + destroy_workqueue(lio->link_status_wq.wq); + } +} + +/** * \brief Update link status * @param netdev network device * @param ls link status structure @@ -894,8 +949,6 @@ static void update_txq_status(struct octeon_device *oct, int iq_num) struct lio *lio; struct octeon_instr_queue *iq = oct->instr_queue[iq_num]; - /*octeon_update_iq_read_idx(oct, iq);*/ - netdev = oct->props[iq->ifidx].netdev; /* This is needed because the first IQ does not have @@ -948,8 +1001,7 @@ int liquidio_schedule_msix_droq_pkt_handler(struct octeon_droq *droq, u64 ret) * \brief Droq packet processor sceduler * @param oct octeon device */ -static -void liquidio_schedule_droq_pkt_handlers(struct octeon_device *oct) +static void liquidio_schedule_droq_pkt_handlers(struct octeon_device *oct) { struct octeon_device_priv *oct_priv = (struct octeon_device_priv *)oct->priv; @@ -1133,6 +1185,102 @@ static int octeon_setup_interrupt(struct octeon_device *oct) return 0; } +static int liquidio_watchdog(void *param) +{ + u64 wdog; + u16 mask_of_stuck_cores = 0; + u16 mask_of_crashed_cores = 0; + int core_num; + u8 core_is_stuck[LIO_MAX_CORES]; + u8 core_crashed[LIO_MAX_CORES]; + struct octeon_device *oct = param; + + memset(core_is_stuck, 0, sizeof(core_is_stuck)); + memset(core_crashed, 0, sizeof(core_crashed)); + + while (!kthread_should_stop()) { + mask_of_crashed_cores = + (u16)octeon_read_csr64(oct, CN23XX_SLI_SCRATCH2); + + for (core_num = 0; core_num < LIO_MAX_CORES; core_num++) { + if (!core_is_stuck[core_num]) { + wdog = lio_pci_readq(oct, CIU3_WDOG(core_num)); + + /* look at watchdog state field */ + wdog &= CIU3_WDOG_MASK; + if (wdog) { + /* this watchdog timer has expired */ + core_is_stuck[core_num] = + LIO_MONITOR_WDOG_EXPIRE; + mask_of_stuck_cores |= (1 << core_num); + } + } + + if (!core_crashed[core_num]) + core_crashed[core_num] = + (mask_of_crashed_cores >> core_num) & 1; + } + + if (mask_of_stuck_cores) { + for (core_num = 0; core_num < LIO_MAX_CORES; + core_num++) { + if (core_is_stuck[core_num] == 1) { + dev_err(&oct->pci_dev->dev, + "ERROR: Octeon core %d is stuck!\n", + core_num); + /* 2 means we have printk'd an error + * so no need to repeat the same printk + */ + core_is_stuck[core_num] = + LIO_MONITOR_CORE_STUCK_MSGD; + } + } + } + + if (mask_of_crashed_cores) { + for (core_num = 0; core_num < LIO_MAX_CORES; + core_num++) { + if (core_crashed[core_num] == 1) { + dev_err(&oct->pci_dev->dev, + "ERROR: Octeon core %d crashed! See oct-fwdump for details.\n", + core_num); + /* 2 means we have printk'd an error + * so no need to repeat the same printk + */ + core_crashed[core_num] = + LIO_MONITOR_CORE_STUCK_MSGD; + } + } + } +#ifdef CONFIG_MODULE_UNLOAD + if (mask_of_stuck_cores || mask_of_crashed_cores) { + /* make module refcount=0 so that rmmod will work */ + long refcount; + + refcount = module_refcount(THIS_MODULE); + + while (refcount > 0) { + module_put(THIS_MODULE); + refcount = module_refcount(THIS_MODULE); + } + + /* compensate for and withstand an unlikely (but still + * possible) race condition + */ + while (refcount < 0) { + try_module_get(THIS_MODULE); + refcount = module_refcount(THIS_MODULE); + } + } +#endif + /* sleep for two seconds */ + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(2 * HZ); + } + + return 0; +} + /** * \brief PCI probe handler * @param pdev PCI device structure @@ -1178,6 +1326,30 @@ liquidio_probe(struct pci_dev *pdev, return -ENOMEM; } + if (OCTEON_CN23XX_PF(oct_dev)) { + u64 scratch1; + u8 bus, device, function; + + scratch1 = octeon_read_csr64(oct_dev, CN23XX_SLI_SCRATCH1); + if (!(scratch1 & 4ULL)) { + /* Bit 2 of SLI_SCRATCH_1 is a flag that indicates that + * the lio watchdog kernel thread is running for this + * NIC. Each NIC gets one watchdog kernel thread. + */ + scratch1 |= 4ULL; + octeon_write_csr64(oct_dev, CN23XX_SLI_SCRATCH1, + scratch1); + + bus = pdev->bus->number; + device = PCI_SLOT(pdev->devfn); + function = PCI_FUNC(pdev->devfn); + oct_dev->watchdog_task = kthread_create( + liquidio_watchdog, oct_dev, + "liowd/%02hhx:%02hhx.%hhx", bus, device, function); + wake_up_process(oct_dev->watchdog_task); + } + } + oct_dev->rx_pause = 1; oct_dev->tx_pause = 1; @@ -1294,16 +1466,15 @@ static void octeon_destroy_resources(struct octeon_device *oct) octeon_delete_response_list(oct); /* fallthrough */ - case OCT_DEV_SC_BUFF_POOL_INIT_DONE: - octeon_free_sc_buffer_pool(oct); - - /* fallthrough */ case OCT_DEV_INSTR_QUEUE_INIT_DONE: for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) { - if (!(oct->io_qmask.iq & (1ULL << i))) + if (!(oct->io_qmask.iq & BIT_ULL(i))) continue; octeon_delete_instr_queue(oct, i); } + /* fallthrough */ + case OCT_DEV_SC_BUFF_POOL_INIT_DONE: + octeon_free_sc_buffer_pool(oct); /* fallthrough */ case OCT_DEV_DISPATCH_INIT_DONE: @@ -1332,23 +1503,89 @@ static void octeon_destroy_resources(struct octeon_device *oct) } /** + * \brief Callback for rx ctrl + * @param status status of request + * @param buf pointer to resp structure + */ +static void rx_ctl_callback(struct octeon_device *oct, + u32 status, + void *buf) +{ + struct octeon_soft_command *sc = (struct octeon_soft_command *)buf; + struct liquidio_rx_ctl_context *ctx; + + ctx = (struct liquidio_rx_ctl_context *)sc->ctxptr; + + oct = lio_get_device(ctx->octeon_id); + if (status) + dev_err(&oct->pci_dev->dev, "rx ctl instruction failed. Status: %llx\n", + CVM_CAST64(status)); + WRITE_ONCE(ctx->cond, 1); + + /* This barrier is required to be sure that the response has been + * written fully before waking up the handler + */ + wmb(); + + wake_up_interruptible(&ctx->wc); +} + +/** * \brief Send Rx control command * @param lio per-network private data * @param start_stop whether to start or stop */ static void send_rx_ctrl_cmd(struct lio *lio, int start_stop) { - struct octnic_ctrl_pkt nctrl; + struct octeon_soft_command *sc; + struct liquidio_rx_ctl_context *ctx; + union octnet_cmd *ncmd; + int ctx_size = sizeof(struct liquidio_rx_ctl_context); + struct octeon_device *oct = (struct octeon_device *)lio->oct_dev; + int retval; - memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt)); + if (oct->props[lio->ifidx].rx_on == start_stop) + return; - nctrl.ncmd.s.cmd = OCTNET_CMD_RX_CTL; - nctrl.ncmd.s.param1 = start_stop; - nctrl.iq_no = lio->linfo.txpciq[0].s.q_no; - nctrl.netpndev = (u64)lio->netdev; + sc = (struct octeon_soft_command *) + octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE, + 16, ctx_size); + + ncmd = (union octnet_cmd *)sc->virtdptr; + ctx = (struct liquidio_rx_ctl_context *)sc->ctxptr; + + WRITE_ONCE(ctx->cond, 0); + ctx->octeon_id = lio_get_device_id(oct); + init_waitqueue_head(&ctx->wc); + + ncmd->u64 = 0; + ncmd->s.cmd = OCTNET_CMD_RX_CTL; + ncmd->s.param1 = start_stop; + + octeon_swap_8B_data((u64 *)ncmd, (OCTNET_CMD_SIZE >> 3)); + + sc->iq_no = lio->linfo.txpciq[0].s.q_no; + + octeon_prepare_soft_command(oct, sc, OPCODE_NIC, + OPCODE_NIC_CMD, 0, 0, 0); - if (octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl) < 0) + sc->callback = rx_ctl_callback; + sc->callback_arg = sc; + sc->wait_time = 5000; + + retval = octeon_send_soft_command(oct, sc); + if (retval == IQ_SEND_FAILED) { netif_info(lio, rx_err, lio->netdev, "Failed to send RX Control message\n"); + } else { + /* Sleep on a wait queue till the cond flag indicates that the + * response arrived or timed-out. + */ + if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR) + return; + oct->props[lio->ifidx].rx_on = start_stop; + } + + octeon_free_soft_command(oct, sc); } /** @@ -1375,21 +1612,24 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx) dev_dbg(&oct->pci_dev->dev, "NIC device cleanup\n"); - send_rx_ctrl_cmd(lio, 0); - if (atomic_read(&lio->ifstate) & LIO_IFSTATE_RUNNING) - txqs_stop(netdev); + liquidio_stop(netdev); if (oct->props[lio->ifidx].napi_enabled == 1) { list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list) napi_disable(napi); oct->props[lio->ifidx].napi_enabled = 0; + + if (OCTEON_CN23XX_PF(oct)) + oct->droq[0]->ops.poll_mode = 0; } if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED) unregister_netdev(netdev); + cleanup_link_status_change_wq(netdev); + delete_glists(lio); free_netdev(netdev); @@ -1442,6 +1682,9 @@ static void liquidio_remove(struct pci_dev *pdev) dev_dbg(&oct_dev->pci_dev->dev, "Stopping device\n"); + if (oct_dev->watchdog_task) + kthread_stop(oct_dev->watchdog_task); + if (oct_dev->app_mode && (oct_dev->app_mode == CVM_DRV_NIC_APP)) liquidio_stop_nic_module(oct_dev); @@ -1941,7 +2184,7 @@ static void if_cfg_callback(struct octeon_device *oct, struct liquidio_if_cfg_context *ctx; resp = (struct liquidio_if_cfg_resp *)sc->virtrptr; - ctx = (struct liquidio_if_cfg_context *)sc->ctxptr; + ctx = (struct liquidio_if_cfg_context *)sc->ctxptr; oct = lio_get_device(ctx->octeon_id); if (resp->status) @@ -2134,11 +2377,14 @@ static void napi_schedule_wrapper(void *param) */ static void liquidio_napi_drv_callback(void *arg) { + struct octeon_device *oct; struct octeon_droq *droq = arg; int this_cpu = smp_processor_id(); - if (droq->cpu_id == this_cpu) { - napi_schedule(&droq->napi); + oct = droq->oct_dev; + + if (OCTEON_CN23XX_PF(oct) || droq->cpu_id == this_cpu) { + napi_schedule_irqoff(&droq->napi); } else { struct call_single_data *csd = &droq->csd; @@ -2267,6 +2513,14 @@ static inline int setup_io_queues(struct octeon_device *octeon_dev, octeon_register_droq_ops(octeon_dev, q_no, &droq_ops); } + if (OCTEON_CN23XX_PF(octeon_dev)) { + /* 23XX PF can receive control messages (via the first PF-owned + * droq) from the firmware even if the ethX interface is down, + * so that's why poll_mode must be off for the first droq. + */ + octeon_dev->droq[0]->ops.poll_mode = 0; + } + /* set up IQs. */ for (q = 0; q < lio->linfo.num_txpciq; q++) { num_tx_descs = CFG_GET_NUM_TX_DESCS_NIC_IF(octeon_get_conf @@ -2351,12 +2605,20 @@ static int liquidio_open(struct net_device *netdev) napi_enable(napi); oct->props[lio->ifidx].napi_enabled = 1; + + if (OCTEON_CN23XX_PF(oct)) + oct->droq[0]->ops.poll_mode = 1; } oct_ptp_open(netdev); ifstate_set(lio, LIO_IFSTATE_RUNNING); + /* Ready for link status updates */ + lio->intf_open = 1; + + netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n"); + if (OCTEON_CN23XX_PF(oct)) { if (!oct->msix_on) if (setup_tx_poll_fn(netdev)) @@ -2368,14 +2630,9 @@ static int liquidio_open(struct net_device *netdev) start_txq(netdev); - netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n"); - /* tell Octeon to start forwarding packets to host */ send_rx_ctrl_cmd(lio, 1); - /* Ready for link status updates */ - lio->intf_open = 1; - dev_info(&oct->pci_dev->dev, "%s interface is opened\n", netdev->name); @@ -2795,9 +3052,15 @@ static inline int send_nic_timestamp_pkt(struct octeon_device *oct, sc->callback_arg = finfo->skb; sc->iq_no = ndata->q_no; - len = (u32)((struct octeon_instr_ih2 *)(&sc->cmd.cmd2.ih2))->dlengsz; + if (OCTEON_CN23XX_PF(oct)) + len = (u32)((struct octeon_instr_ih3 *) + (&sc->cmd.cmd3.ih3))->dlengsz; + else + len = (u32)((struct octeon_instr_ih2 *) + (&sc->cmd.cmd2.ih2))->dlengsz; ring_doorbell = 1; + retval = octeon_send_command(oct, sc->iq_no, ring_doorbell, &sc->cmd, sc, len, ndata->reqtype); @@ -2929,7 +3192,10 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_BUSY; } - ndata.cmd.cmd2.dptr = dptr; + if (OCTEON_CN23XX_PF(oct)) + ndata.cmd.cmd3.dptr = dptr; + else + ndata.cmd.cmd2.dptr = dptr; finfo->dptr = dptr; ndata.reqtype = REQTYPE_NORESP_NET; @@ -3004,15 +3270,23 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) g->sg_size, DMA_TO_DEVICE); dptr = g->sg_dma_ptr; - ndata.cmd.cmd2.dptr = dptr; + if (OCTEON_CN23XX_PF(oct)) + ndata.cmd.cmd3.dptr = dptr; + else + ndata.cmd.cmd2.dptr = dptr; finfo->dptr = dptr; finfo->g = g; ndata.reqtype = REQTYPE_NORESP_NET_SG; } - irh = (struct octeon_instr_irh *)&ndata.cmd.cmd2.irh; - tx_info = (union tx_info *)&ndata.cmd.cmd2.ossp[0]; + if (OCTEON_CN23XX_PF(oct)) { + irh = (struct octeon_instr_irh *)&ndata.cmd.cmd3.irh; + tx_info = (union tx_info *)&ndata.cmd.cmd3.ossp[0]; + } else { + irh = (struct octeon_instr_irh *)&ndata.cmd.cmd2.irh; + tx_info = (union tx_info *)&ndata.cmd.cmd2.ossp[0]; + } if (skb_shinfo(skb)->gso_size) { tx_info->s.gso_size = skb_shinfo(skb)->gso_size; @@ -3505,7 +3779,11 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) /* Sleep on a wait queue till the cond flag indicates that the * response arrived or timed-out. */ - sleep_cond(&ctx->wc, &ctx->cond); + if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR) { + dev_err(&octeon_dev->pci_dev->dev, "Wait interrupted\n"); + goto setup_nic_wait_intr; + } + retval = resp->status; if (retval) { dev_err(&octeon_dev->pci_dev->dev, "iq/oq config failed\n"); @@ -3656,7 +3934,10 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) /* Register ethtool support */ liquidio_set_ethtool_ops(netdev); - octeon_dev->priv_flags = 0x0; + if (lio->oct_dev->chip_id == OCTEON_CN23XX_PF_VID) + octeon_dev->priv_flags = OCT_PRIV_FLAG_DEFAULT; + else + octeon_dev->priv_flags = 0x0; if (netdev->features & NETIF_F_LRO) liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE, @@ -3668,6 +3949,9 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) liquidio_set_feature(netdev, OCTNET_CMD_VERBOSE_ENABLE, 0); + if (setup_link_status_change_wq(netdev)) + goto setup_nic_dev_fail; + /* Register the network device with the OS */ if (register_netdev(netdev)) { dev_err(&octeon_dev->pci_dev->dev, "Device registration failed\n"); @@ -3703,6 +3987,8 @@ setup_nic_dev_fail: octeon_free_soft_command(octeon_dev, sc); +setup_nic_wait_intr: + while (i--) { dev_err(&octeon_dev->pci_dev->dev, "NIC ifidx:%d Setup failed\n", i); @@ -3732,8 +4018,7 @@ static int liquidio_init_nic_module(struct octeon_device *oct) /* run port_config command for each port */ oct->ifcount = num_nic_ports; - memset(oct->props, 0, - sizeof(struct octdev_props) * num_nic_ports); + memset(oct->props, 0, sizeof(struct octdev_props) * num_nic_ports); for (i = 0; i < MAX_OCTEON_LINKS; i++) oct->props[i].gmxport = -1; @@ -3749,7 +4034,7 @@ static int liquidio_init_nic_module(struct octeon_device *oct) /* Initialize interrupt moderation params */ intrmod_cfg = &((struct octeon_device *)oct)->intrmod; intrmod_cfg->rx_enable = 1; - intrmod_cfg->check_intrvl = LIO_INTRMOD_CHECK_INTERVAL; + intrmod_cfg->check_intrvl = LIO_INTRMOD_CHECK_INTERVAL; intrmod_cfg->maxpkt_ratethr = LIO_INTRMOD_MAXPKT_RATETHR; intrmod_cfg->minpkt_ratethr = LIO_INTRMOD_MINPKT_RATETHR; intrmod_cfg->rx_maxcnt_trigger = LIO_INTRMOD_RXMAXCNT_TRIGGER; @@ -3761,6 +4046,7 @@ static int liquidio_init_nic_module(struct octeon_device *oct) intrmod_cfg->tx_mincnt_trigger = LIO_INTRMOD_TXMINCNT_TRIGGER; intrmod_cfg->rx_frames = CFG_GET_OQ_INTR_PKT(octeon_get_conf(oct)); intrmod_cfg->rx_usecs = CFG_GET_OQ_INTR_TIME(octeon_get_conf(oct)); + intrmod_cfg->tx_frames = CFG_GET_IQ_INTR_PKT(octeon_get_conf(oct)); dev_dbg(&oct->pci_dev->dev, "Network interfaces ready\n"); return retval; diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h index 8ffd3b8c4d0f..0d990accb65e 100644 --- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h +++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h @@ -232,6 +232,9 @@ static inline void add_sg_size(struct octeon_sg_entry *sg_entry, #define OCTNET_CMD_ADD_VLAN_FILTER 0x17 #define OCTNET_CMD_DEL_VLAN_FILTER 0x18 #define OCTNET_CMD_VXLAN_PORT_CONFIG 0x19 + +#define OCTNET_CMD_ID_ACTIVE 0x1a + #define OCTNET_CMD_VXLAN_PORT_ADD 0x0 #define OCTNET_CMD_VXLAN_PORT_DEL 0x1 #define OCTNET_CMD_RXCSUM_ENABLE 0x0 @@ -310,6 +313,13 @@ union octnet_cmd { #define OCTNET_CMD_SIZE (sizeof(union octnet_cmd)) +/*pkiih3 + irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */ +#define LIO_SOFTCMDRESP_IH2 40 +#define LIO_SOFTCMDRESP_IH3 (40 + 8) + +#define LIO_PCICMD_O2 24 +#define LIO_PCICMD_O3 (24 + 8) + /* Instruction Header(DPI) - for OCTEON-III models */ struct octeon_instr_ih3 { #ifdef __BIG_ENDIAN_BITFIELD @@ -828,6 +838,8 @@ struct oct_link_stats { #define VITESSE_PHY_GPIO_DRIVEOFF 0x4 #define VITESSE_PHY_GPIO_HIGH 0x2 #define VITESSE_PHY_GPIO_LOW 0x3 +#define LED_IDENTIFICATION_ON 0x1 +#define LED_IDENTIFICATION_OFF 0x0 struct oct_mdio_cmd { u64 op; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c index 52527638d413..586b68899b06 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c @@ -793,7 +793,6 @@ int octeon_setup_instr_queues(struct octeon_device *oct) union oct_txpciq txpciq; int numa_node = cpu_to_node(iq_no % num_online_cpus()); - /* this causes queue 0 to be default queue */ if (OCTEON_CN6XXX(oct)) num_descs = CFG_GET_NUM_DEF_TX_DESCS(CHIP_FIELD(oct, cn6xxx, conf)); @@ -816,6 +815,7 @@ int octeon_setup_instr_queues(struct octeon_device *oct) oct->instr_queue[0]->ifidx = 0; txpciq.u64 = 0; txpciq.s.q_no = iq_no; + txpciq.s.pkind = oct->pfvf_hsword.pkind; txpciq.s.use_qpg = 0; txpciq.s.qpg = 0; if (octeon_init_instr_queue(oct, txpciq, num_descs)) { @@ -835,7 +835,6 @@ int octeon_setup_output_queues(struct octeon_device *oct) u32 oq_no = 0; int numa_node = cpu_to_node(oq_no % num_online_cpus()); - /* this causes queue 0 to be default queue */ if (OCTEON_CN6XXX(oct)) { num_descs = CFG_GET_NUM_DEF_RX_DESCS(CHIP_FIELD(oct, cn6xxx, conf)); @@ -863,10 +862,10 @@ int octeon_setup_output_queues(struct octeon_device *oct) void octeon_set_io_queues_off(struct octeon_device *oct) { - /* Disable the i/p and o/p queues for this Octeon. */ - - octeon_write_csr(oct, CN6XXX_SLI_PKT_INSTR_ENB, 0); - octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, 0); + if (OCTEON_CN6XXX(oct)) { + octeon_write_csr(oct, CN6XXX_SLI_PKT_INSTR_ENB, 0); + octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, 0); + } } void octeon_set_droq_pkt_op(struct octeon_device *oct, @@ -876,14 +875,16 @@ void octeon_set_droq_pkt_op(struct octeon_device *oct, u32 reg_val = 0; /* Disable the i/p and o/p queues for this Octeon. */ - reg_val = octeon_read_csr(oct, CN6XXX_SLI_PKT_OUT_ENB); + if (OCTEON_CN6XXX(oct)) { + reg_val = octeon_read_csr(oct, CN6XXX_SLI_PKT_OUT_ENB); - if (enable) - reg_val = reg_val | (1 << q_no); - else - reg_val = reg_val & (~(1 << q_no)); + if (enable) + reg_val = reg_val | (1 << q_no); + else + reg_val = reg_val & (~(1 << q_no)); - octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, reg_val); + octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, reg_val); + } } int octeon_init_dispatch_list(struct octeon_device *oct) @@ -1100,6 +1101,12 @@ int octeon_core_drv_init(struct octeon_recv_info *recv_info, void *buf) } oct->fw_info.app_cap_flags = recv_pkt->rh.r_core_drv_init.app_cap_flags; oct->fw_info.app_mode = (u32)recv_pkt->rh.r_core_drv_init.app_mode; + oct->pfvf_hsword.app_mode = (u32)recv_pkt->rh.r_core_drv_init.app_mode; + + oct->pfvf_hsword.pkind = recv_pkt->rh.r_core_drv_init.pkind; + + for (i = 0; i < oct->num_iqs; i++) + oct->instr_queue[i]->txpciq.s.pkind = oct->pfvf_hsword.pkind; atomic_set(&oct->status, OCT_DEV_CORE_OK); @@ -1294,17 +1301,36 @@ int lio_get_device_id(void *dev) void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq) { + u64 instr_cnt; + struct octeon_device *oct = NULL; + /* the whole thing needs to be atomic, ideally */ if (droq) { spin_lock_bh(&droq->lock); writel(droq->pkt_count, droq->pkts_sent_reg); droq->pkt_count = 0; spin_unlock_bh(&droq->lock); + oct = droq->oct_dev; } if (iq) { spin_lock_bh(&iq->lock); writel(iq->pkt_in_done, iq->inst_cnt_reg); iq->pkt_in_done = 0; spin_unlock_bh(&iq->lock); + oct = iq->oct_dev; + } + /*write resend. Writing RESEND in SLI_PKTX_CNTS should be enough + *to trigger tx interrupts as well, if they are pending. + */ + if (oct && OCTEON_CN23XX_PF(oct)) { + if (droq) + writeq(CN23XX_INTR_RESEND, droq->pkts_sent_reg); + /*we race with firmrware here. read and write the IN_DONE_CNTS*/ + else if (iq) { + instr_cnt = readq(iq->inst_cnt_reg); + writeq(((instr_cnt & 0xFFFFFFFF00000000ULL) | + CN23XX_INTR_RESEND), + iq->inst_cnt_reg); + } } } diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h index 99fc1d899208..da15c2ae9330 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h @@ -275,6 +275,7 @@ struct octdev_props { /* Each interface in the Octeon device has a network * device pointer (used for OS specific calls). */ + int rx_on; int napi_enabled; int gmxport; struct net_device *netdev; @@ -483,6 +484,8 @@ struct octeon_device { /* private flags to control driver-specific features through ethtool */ u32 priv_flags; + + void *watchdog_task; }; #define OCT_DRV_ONLINE 1 @@ -752,8 +755,15 @@ enum { OCT_PRIV_FLAG_TX_BYTES = 0, /* Tx interrupts by pending byte count */ }; -static inline void lio_set_priv_flag(struct octeon_device *octdev, u32 flag, - u32 val) +#define OCT_PRIV_FLAG_DEFAULT 0x0 + +static inline u32 lio_get_priv_flag(struct octeon_device *octdev, u32 flag) +{ + return !!(octdev->priv_flags & (0x1 << flag)); +} + +static inline void lio_set_priv_flag(struct octeon_device *octdev, + u32 flag, u32 val) { if (val) octdev->priv_flags |= (0x1 << flag); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c index 5dfc23d70d05..f60e5320daf4 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c @@ -31,6 +31,7 @@ #include "octeon_network.h" #include "cn66xx_regs.h" #include "cn66xx_device.h" +#include "cn23xx_pf_device.h" #define CVM_MIN(d1, d2) (((d1) < (d2)) ? (d1) : (d2)) #define CVM_MAX(d1, d2) (((d1) > (d2)) ? (d1) : (d2)) @@ -262,6 +263,11 @@ int octeon_init_droq(struct octeon_device *oct, c_pkts_per_intr = (u32)CFG_GET_OQ_PKTS_PER_INTR(conf6x); c_refill_threshold = (u32)CFG_GET_OQ_REFILL_THRESHOLD(conf6x); + } else if (OCTEON_CN23XX_PF(oct)) { + struct octeon_config *conf23 = CHIP_FIELD(oct, cn23xx_pf, conf); + + c_pkts_per_intr = (u32)CFG_GET_OQ_PKTS_PER_INTR(conf23); + c_refill_threshold = (u32)CFG_GET_OQ_REFILL_THRESHOLD(conf23); } else { return 1; } @@ -567,7 +573,7 @@ octeon_droq_dispatch_pkt(struct octeon_device *oct, (unsigned int)rh->r.opcode, (unsigned int)rh->r.subcode); droq->stats.dropped_nodispatch++; - } /* else (dispatch_fn ... */ + } return cnt; } @@ -881,8 +887,11 @@ octeon_process_droq_poll_cmd(struct octeon_device *oct, u32 q_no, int cmd, return 0; } break; + case OCTEON_CN23XX_PF_VID: { + lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]); + } + break; } - return 0; } diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h index ebeef95ed9b0..366298f7bcb2 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h @@ -181,22 +181,26 @@ cnnic_numa_alloc_aligned_dma(u32 size, #define cnnic_free_aligned_dma(pci_dev, ptr, size, orig_ptr, dma_addr) \ free_pages(orig_ptr, get_order(size)) -static inline void +static inline int sleep_cond(wait_queue_head_t *wait_queue, int *condition) { + int errno = 0; wait_queue_t we; init_waitqueue_entry(&we, current); add_wait_queue(wait_queue, &we); while (!(READ_ONCE(*condition))) { set_current_state(TASK_INTERRUPTIBLE); - if (signal_pending(current)) + if (signal_pending(current)) { + errno = -EINTR; goto out; + } schedule(); } out: set_current_state(TASK_RUNNING); remove_wait_queue(wait_queue, &we); + return errno; } static inline void diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h index 7cfbdf9b039c..e5d1debd05ad 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h @@ -122,11 +122,21 @@ struct lio { /* work queue for txq status */ struct cavium_wq txq_status_wq; + + /* work queue for link status */ + struct cavium_wq link_status_wq; + }; #define LIO_SIZE (sizeof(struct lio)) #define GET_LIO(netdev) ((struct lio *)netdev_priv(netdev)) +#define CIU3_WDOG(c) (0x1010000020000ULL + (c << 3)) +#define CIU3_WDOG_MASK 12ULL +#define LIO_MONITOR_WDOG_EXPIRE 1 +#define LIO_MONITOR_CORE_STUCK_MSGD 2 +#define LIO_MAX_CORES 12 + /** * \brief Enable or disable feature * @param netdev pointer to network device diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c index 0c4013de599a..40ac1fe88956 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c @@ -35,6 +35,7 @@ octeon_alloc_soft_command_resp(struct octeon_device *oct, u32 rdatasize) { struct octeon_soft_command *sc; + struct octeon_instr_ih3 *ih3; struct octeon_instr_ih2 *ih2; struct octeon_instr_irh *irh; struct octeon_instr_rdp *rdp; @@ -51,10 +52,19 @@ octeon_alloc_soft_command_resp(struct octeon_device *oct, /* Add in the response related fields. Opcode and Param are already * there. */ - ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2; - rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp; - irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh; - ih2->fsz = 40; /* irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */ + if (OCTEON_CN23XX_PF(oct)) { + ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3; + rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp; + irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh; + /*pkiih3 + irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */ + ih3->fsz = LIO_SOFTCMDRESP_IH3; + } else { + ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2; + rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp; + irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh; + /* irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */ + ih2->fsz = LIO_SOFTCMDRESP_IH2; + } irh->rflag = 1; /* a response is required */ @@ -63,7 +73,10 @@ octeon_alloc_soft_command_resp(struct octeon_device *oct, *sc->status_word = COMPLETION_WORD_INIT; - sc->cmd.cmd2.rptr = sc->dmarptr; + if (OCTEON_CN23XX_PF(oct)) + sc->cmd.cmd3.rptr = sc->dmarptr; + else + sc->cmd.cmd2.rptr = sc->dmarptr; sc->wait_time = 1000; sc->timeout = jiffies + sc->wait_time; @@ -179,8 +192,8 @@ octnet_send_nic_ctrl_pkt(struct octeon_device *oct, retval = octeon_send_soft_command(oct, sc); if (retval == IQ_SEND_FAILED) { octeon_free_soft_command(oct, sc); - dev_err(&oct->pci_dev->dev, "%s soft command:%d send failed status: %x\n", - __func__, nctrl->ncmd.s.cmd, retval); + dev_err(&oct->pci_dev->dev, "%s pf_num:%d soft command:%d send failed status: %x\n", + __func__, oct->pf_num, nctrl->ncmd.s.cmd, retval); spin_unlock_bh(&oct->cmd_resp_wqlock); return -1; } diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h index e55400c91574..4b8da67b995f 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h @@ -138,7 +138,7 @@ octnet_prepare_pci_cmd_o2(struct octeon_device *oct, /* assume that rflag is cleared so therefore front data will only have * irh and ossp[0], ossp[1] for a total of 32 bytes */ - ih2->fsz = 24; + ih2->fsz = LIO_PCICMD_O2; ih2->tagtype = ORDERED_TAG; ih2->grp = DEFAULT_POW_GRP; @@ -196,7 +196,7 @@ octnet_prepare_pci_cmd_o3(struct octeon_device *oct, */ ih3->pkind = oct->instr_queue[setup->s.iq_no]->txpciq.s.pkind; /*PKI IH*/ - ih3->fsz = 24 + 8; + ih3->fsz = LIO_PCICMD_O3; if (!setup->s.gather) { ih3->dlengsz = setup->s.u.datasize; diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c index 3ee5d023c789..90866bb50033 100644 --- a/drivers/net/ethernet/cavium/liquidio/request_manager.c +++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c @@ -30,6 +30,7 @@ #include "octeon_main.h" #include "octeon_network.h" #include "cn66xx_device.h" +#include "cn23xx_pf_device.h" #define INCR_INSTRQUEUE_PKT_COUNT(octeon_dev_ptr, iq_no, field, count) \ (octeon_dev_ptr->instr_queue[iq_no]->stats.field += count) @@ -71,7 +72,8 @@ int octeon_init_instr_queue(struct octeon_device *oct, if (OCTEON_CN6XXX(oct)) conf = &(CFG_GET_IQ_CFG(CHIP_FIELD(oct, cn6xxx, conf))); - + else if (OCTEON_CN23XX_PF(oct)) + conf = &(CFG_GET_IQ_CFG(CHIP_FIELD(oct, cn23xx_pf, conf))); if (!conf) { dev_err(&oct->pci_dev->dev, "Unsupported Chip %x\n", oct->chip_id); @@ -88,6 +90,7 @@ int octeon_init_instr_queue(struct octeon_device *oct, q_size = (u32)conf->instr_type * num_descs; iq = oct->instr_queue[iq_no]; + iq->oct_dev = oct; set_dev_node(&oct->pci_dev->dev, numa_node); @@ -181,6 +184,9 @@ int octeon_delete_instr_queue(struct octeon_device *oct, u32 iq_no) if (OCTEON_CN6XXX(oct)) desc_size = CFG_GET_IQ_INSTR_TYPE(CHIP_FIELD(oct, cn6xxx, conf)); + else if (OCTEON_CN23XX_PF(oct)) + desc_size = + CFG_GET_IQ_INSTR_TYPE(CHIP_FIELD(oct, cn23xx_pf, conf)); vfree(iq->request_list); @@ -383,7 +389,12 @@ lio_process_iq_request_list(struct octeon_device *oct, case REQTYPE_SOFT_COMMAND: sc = buf; - irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh; + if (OCTEON_CN23XX_PF(oct)) + irh = (struct octeon_instr_irh *) + &sc->cmd.cmd3.irh; + else + irh = (struct octeon_instr_irh *) + &sc->cmd.cmd2.irh; if (irh->rflag) { /* We're expecting a response from Octeon. * It's up to lio_process_ordered_list() to @@ -583,6 +594,8 @@ octeon_prepare_soft_command(struct octeon_device *oct, { struct octeon_config *oct_cfg; struct octeon_instr_ih2 *ih2; + struct octeon_instr_ih3 *ih3; + struct octeon_instr_pki_ih3 *pki_ih3; struct octeon_instr_irh *irh; struct octeon_instr_rdp *rdp; @@ -591,36 +604,88 @@ octeon_prepare_soft_command(struct octeon_device *oct, oct_cfg = octeon_get_conf(oct); - ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2; - ih2->tagtype = ATOMIC_TAG; - ih2->tag = LIO_CONTROL; - ih2->raw = 1; - ih2->grp = CFG_GET_CTRL_Q_GRP(oct_cfg); - - if (sc->datasize) { - ih2->dlengsz = sc->datasize; - ih2->rs = 1; - } - - irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh; - irh->opcode = opcode; - irh->subcode = subcode; - - /* opcode/subcode specific parameters (ossp) */ - irh->ossp = irh_ossp; - sc->cmd.cmd2.ossp[0] = ossp0; - sc->cmd.cmd2.ossp[1] = ossp1; - - if (sc->rdatasize) { - rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp; - rdp->pcie_port = oct->pcie_port; - rdp->rlen = sc->rdatasize; + if (OCTEON_CN23XX_PF(oct)) { + ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3; + + ih3->pkind = oct->instr_queue[sc->iq_no]->txpciq.s.pkind; + + pki_ih3 = (struct octeon_instr_pki_ih3 *)&sc->cmd.cmd3.pki_ih3; + + pki_ih3->w = 1; + pki_ih3->raw = 1; + pki_ih3->utag = 1; + pki_ih3->uqpg = + oct->instr_queue[sc->iq_no]->txpciq.s.use_qpg; + pki_ih3->utt = 1; + pki_ih3->tag = LIO_CONTROL; + pki_ih3->tagtype = ATOMIC_TAG; + pki_ih3->qpg = + oct->instr_queue[sc->iq_no]->txpciq.s.qpg; + pki_ih3->pm = 0x7; + pki_ih3->sl = 8; + + if (sc->datasize) + ih3->dlengsz = sc->datasize; + + irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh; + irh->opcode = opcode; + irh->subcode = subcode; + + /* opcode/subcode specific parameters (ossp) */ + irh->ossp = irh_ossp; + sc->cmd.cmd3.ossp[0] = ossp0; + sc->cmd.cmd3.ossp[1] = ossp1; + + if (sc->rdatasize) { + rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp; + rdp->pcie_port = oct->pcie_port; + rdp->rlen = sc->rdatasize; + + irh->rflag = 1; + /*PKI IH3*/ + /* pki_ih3 irh+ossp[0]+ossp[1]+rdp+rptr = 48 bytes */ + ih3->fsz = LIO_SOFTCMDRESP_IH3; + } else { + irh->rflag = 0; + /*PKI IH3*/ + /* pki_h3 + irh + ossp[0] + ossp[1] = 32 bytes */ + ih3->fsz = LIO_PCICMD_O3; + } - irh->rflag = 1; - ih2->fsz = 40; /* irh+ossp[0]+ossp[1]+rdp+rptr = 40 bytes */ } else { - irh->rflag = 0; - ih2->fsz = 24; /* irh + ossp[0] + ossp[1] = 24 bytes */ + ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2; + ih2->tagtype = ATOMIC_TAG; + ih2->tag = LIO_CONTROL; + ih2->raw = 1; + ih2->grp = CFG_GET_CTRL_Q_GRP(oct_cfg); + + if (sc->datasize) { + ih2->dlengsz = sc->datasize; + ih2->rs = 1; + } + + irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh; + irh->opcode = opcode; + irh->subcode = subcode; + + /* opcode/subcode specific parameters (ossp) */ + irh->ossp = irh_ossp; + sc->cmd.cmd2.ossp[0] = ossp0; + sc->cmd.cmd2.ossp[1] = ossp1; + + if (sc->rdatasize) { + rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp; + rdp->pcie_port = oct->pcie_port; + rdp->rlen = sc->rdatasize; + + irh->rflag = 1; + /* irh+ossp[0]+ossp[1]+rdp+rptr = 40 bytes */ + ih2->fsz = LIO_SOFTCMDRESP_IH2; + } else { + irh->rflag = 0; + /* irh + ossp[0] + ossp[1] = 24 bytes */ + ih2->fsz = LIO_PCICMD_O2; + } } } @@ -628,23 +693,39 @@ int octeon_send_soft_command(struct octeon_device *oct, struct octeon_soft_command *sc) { struct octeon_instr_ih2 *ih2; + struct octeon_instr_ih3 *ih3; struct octeon_instr_irh *irh; u32 len; - ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2; - if (ih2->dlengsz) { - WARN_ON(!sc->dmadptr); - sc->cmd.cmd2.dptr = sc->dmadptr; - } - irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh; - if (irh->rflag) { - WARN_ON(!sc->dmarptr); - WARN_ON(!sc->status_word); - *sc->status_word = COMPLETION_WORD_INIT; - - sc->cmd.cmd2.rptr = sc->dmarptr; + if (OCTEON_CN23XX_PF(oct)) { + ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3; + if (ih3->dlengsz) { + WARN_ON(!sc->dmadptr); + sc->cmd.cmd3.dptr = sc->dmadptr; + } + irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh; + if (irh->rflag) { + WARN_ON(!sc->dmarptr); + WARN_ON(!sc->status_word); + *sc->status_word = COMPLETION_WORD_INIT; + sc->cmd.cmd3.rptr = sc->dmarptr; + } + len = (u32)ih3->dlengsz; + } else { + ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2; + if (ih2->dlengsz) { + WARN_ON(!sc->dmadptr); + sc->cmd.cmd2.dptr = sc->dmadptr; + } + irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh; + if (irh->rflag) { + WARN_ON(!sc->dmarptr); + WARN_ON(!sc->status_word); + *sc->status_word = COMPLETION_WORD_INIT; + sc->cmd.cmd2.rptr = sc->dmarptr; + } + len = (u32)ih2->dlengsz; } - len = (u32)ih2->dlengsz; if (sc->wait_time) sc->timeout = jiffies + sc->wait_time; diff --git a/drivers/net/ethernet/cavium/liquidio/response_manager.c b/drivers/net/ethernet/cavium/liquidio/response_manager.c index 709049e36627..be52178d8cb6 100644 --- a/drivers/net/ethernet/cavium/liquidio/response_manager.c +++ b/drivers/net/ethernet/cavium/liquidio/response_manager.c @@ -91,8 +91,13 @@ int lio_process_ordered_list(struct octeon_device *octeon_dev, sc = (struct octeon_soft_command *)ordered_sc_list-> head.next; - rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp; - rptr = sc->cmd.cmd2.rptr; + if (OCTEON_CN23XX_PF(octeon_dev)) { + rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp; + rptr = sc->cmd.cmd3.rptr; + } else { + rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp; + rptr = sc->cmd.cmd2.rptr; + } status = OCTEON_REQUEST_PENDING; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 3f7b33aa5ec5..45955691edc7 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -788,6 +788,11 @@ struct uld_msix_info { char desc[IFNAMSIZ + 10]; }; +struct vf_info { + unsigned char vf_mac_addr[ETH_ALEN]; + bool pf_set_mac; +}; + struct adapter { void __iomem *regs; void __iomem *bar2; @@ -821,6 +826,9 @@ struct adapter { struct net_device *port[MAX_NPORTS]; u8 chan_map[NCHAN]; /* channel -> port map */ + struct vf_info *vfinfo; + u8 num_vfs; + u32 filter_mode; unsigned int l2t_start; unsigned int l2t_end; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 44019bdd526d..44cc9767936f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -3094,10 +3094,44 @@ static int dummy_open(struct net_device *dev) return 0; } +/* Fill MAC address that will be assigned by the FW */ +static void fill_vf_station_mac_addr(struct adapter *adap) +{ + unsigned int i; + u8 hw_addr[ETH_ALEN], macaddr[ETH_ALEN]; + int err; + u8 *na; + u16 a, b; + + err = t4_get_raw_vpd_params(adap, &adap->params.vpd); + if (!err) { + na = adap->params.vpd.na; + for (i = 0; i < ETH_ALEN; i++) + hw_addr[i] = (hex2val(na[2 * i + 0]) * 16 + + hex2val(na[2 * i + 1])); + a = (hw_addr[0] << 8) | hw_addr[1]; + b = (hw_addr[1] << 8) | hw_addr[2]; + a ^= b; + a |= 0x0200; /* locally assigned Ethernet MAC address */ + a &= ~0x0100; /* not a multicast Ethernet MAC address */ + macaddr[0] = a >> 8; + macaddr[1] = a & 0xff; + + for (i = 2; i < 5; i++) + macaddr[i] = hw_addr[i + 1]; + + for (i = 0; i < adap->num_vfs; i++) { + macaddr[5] = adap->pf * 16 + i; + ether_addr_copy(adap->vfinfo[i].vf_mac_addr, macaddr); + } + } +} + static int cxgb_set_vf_mac(struct net_device *dev, int vf, u8 *mac) { struct port_info *pi = netdev_priv(dev); struct adapter *adap = pi->adapter; + int ret; /* verify MAC addr is valid */ if (!is_valid_ether_addr(mac)) { @@ -3109,7 +3143,23 @@ static int cxgb_set_vf_mac(struct net_device *dev, int vf, u8 *mac) dev_info(pi->adapter->pdev_dev, "Setting MAC %pM on VF %d\n", mac, vf); - return t4_set_vf_mac_acl(adap, vf + 1, 1, mac); + ret = t4_set_vf_mac_acl(adap, vf + 1, 1, mac); + if (!ret) + ether_addr_copy(adap->vfinfo[vf].vf_mac_addr, mac); + return ret; +} + +static int cxgb_get_vf_config(struct net_device *dev, + int vf, struct ifla_vf_info *ivi) +{ + struct port_info *pi = netdev_priv(dev); + struct adapter *adap = pi->adapter; + + if (vf >= adap->num_vfs) + return -EINVAL; + ivi->vf = vf; + ether_addr_copy(ivi->mac, adap->vfinfo[vf].vf_mac_addr); + return 0; } #endif @@ -3259,6 +3309,7 @@ static const struct net_device_ops cxgb4_netdev_ops = { static const struct net_device_ops cxgb4_mgmt_netdev_ops = { .ndo_open = dummy_open, .ndo_set_vf_mac = cxgb_set_vf_mac, + .ndo_get_vf_config = cxgb_get_vf_config, }; #endif @@ -5116,6 +5167,10 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs) unregister_netdev(adap->port[0]); adap->port[0] = NULL; } + /* free VF resources */ + kfree(adap->vfinfo); + adap->vfinfo = NULL; + adap->num_vfs = 0; return num_vfs; } @@ -5124,10 +5179,16 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs) if (err) return err; + adap->num_vfs = num_vfs; err = config_mgmt_dev(pdev); if (err) return err; } + + adap->vfinfo = kcalloc(adap->num_vfs, + sizeof(struct vf_info), GFP_KERNEL); + if (adap->vfinfo) + fill_vf_station_mac_addr(adap); return num_vfs; } #endif @@ -5621,6 +5682,7 @@ static void remove_one(struct pci_dev *pdev) if (adapter->port[0]) unregister_netdev(adapter->port[0]); iounmap(adapter->regs); + kfree(adapter->vfinfo); kfree(adapter); pci_disable_sriov(pdev); pci_release_regions(pdev); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index de451ee2ba75..15be54324e3d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -2729,7 +2729,7 @@ int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p) out: vfree(vpd); - return ret; + return ret < 0 ? ret : 0; } /** diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 2dadfa961898..473977d6e9e8 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -895,17 +895,18 @@ release_desc: rxd->rxd2 = RX_DMA_PLEN0(ring->buf_size); ring->calc_idx = idx; + + done++; + } + + if (done) { /* make sure that all changes to the dma ring are flushed before * we continue */ wmb(); mtk_w32(eth, ring->calc_idx, MTK_PRX_CRX_IDX0); - done++; } - if (done < budget) - mtk_w32(eth, MTK_RX_DONE_INT, MTK_PDMA_INT_STATUS); - return done; } @@ -1024,10 +1025,13 @@ static int mtk_napi_rx(struct napi_struct *napi, int budget) struct mtk_eth *eth = container_of(napi, struct mtk_eth, rx_napi); u32 status, mask; int rx_done = 0; + int remain_budget = budget; mtk_handle_status_irq(eth); + +poll_again: mtk_w32(eth, MTK_RX_DONE_INT, MTK_PDMA_INT_STATUS); - rx_done = mtk_poll_rx(napi, budget, eth); + rx_done = mtk_poll_rx(napi, remain_budget, eth); if (unlikely(netif_msg_intr(eth))) { status = mtk_r32(eth, MTK_PDMA_INT_STATUS); @@ -1036,18 +1040,18 @@ static int mtk_napi_rx(struct napi_struct *napi, int budget) "done rx %d, intr 0x%08x/0x%x\n", rx_done, status, mask); } - - if (rx_done == budget) + if (rx_done == remain_budget) return budget; status = mtk_r32(eth, MTK_PDMA_INT_STATUS); - if (status & MTK_RX_DONE_INT) - return budget; - + if (status & MTK_RX_DONE_INT) { + remain_budget -= rx_done; + goto poll_again; + } napi_complete(napi); mtk_irq_enable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT); - return rx_done; + return rx_done + budget - remain_budget; } static int mtk_tx_alloc(struct mtk_eth *eth) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 4198e9bf89d0..31a41add5b4c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2642,12 +2642,16 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog) if (IS_ERR(prog)) return PTR_ERR(prog); } + mutex_lock(&mdev->state_lock); for (i = 0; i < priv->rx_ring_num; i++) { - /* This xchg is paired with READ_ONCE in the fastpath */ - old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog); + old_prog = rcu_dereference_protected( + priv->rx_ring[i]->xdp_prog, + lockdep_is_held(&mdev->state_lock)); + rcu_assign_pointer(priv->rx_ring[i]->xdp_prog, prog); if (old_prog) bpf_prog_put(old_prog); } + mutex_unlock(&mdev->state_lock); return 0; } @@ -2680,7 +2684,10 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog) priv->xdp_ring_num); for (i = 0; i < priv->rx_ring_num; i++) { - old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog); + old_prog = rcu_dereference_protected( + priv->rx_ring[i]->xdp_prog, + lockdep_is_held(&mdev->state_lock)); + rcu_assign_pointer(priv->rx_ring[i]->xdp_prog, prog); if (old_prog) bpf_prog_put(old_prog); } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 2040dad8611d..6758292311f4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -537,7 +537,9 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring = *pring; struct bpf_prog *old_prog; - old_prog = READ_ONCE(ring->xdp_prog); + old_prog = rcu_dereference_protected( + ring->xdp_prog, + lockdep_is_held(&mdev->state_lock)); if (old_prog) bpf_prog_put(old_prog); mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE); @@ -800,7 +802,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud if (budget <= 0) return polled; - xdp_prog = READ_ONCE(ring->xdp_prog); + /* Protect accesses to: ring->xdp_prog, priv->mac_hash list */ + rcu_read_lock(); + xdp_prog = rcu_dereference(ring->xdp_prog); doorbell_pending = 0; tx_index = (priv->tx_ring_num - priv->xdp_ring_num) + cq->ring; @@ -858,15 +862,11 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud /* Drop the packet, since HW loopback-ed it */ mac_hash = ethh->h_source[MLX4_EN_MAC_HASH_IDX]; bucket = &priv->mac_hash[mac_hash]; - rcu_read_lock(); hlist_for_each_entry_rcu(entry, bucket, hlist) { if (ether_addr_equal_64bits(entry->mac, - ethh->h_source)) { - rcu_read_unlock(); + ethh->h_source)) goto next; - } } - rcu_read_unlock(); } } @@ -1077,6 +1077,7 @@ consumed: } out: + rcu_read_unlock(); if (doorbell_pending) mlx4_en_xmit_doorbell(priv->tx_ring[tx_index]); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 2c2913dcae98..47867c49f91c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -340,7 +340,7 @@ struct mlx4_en_rx_ring { u8 fcs_del; void *buf; void *rx_info; - struct bpf_prog *xdp_prog; + struct bpf_prog __rcu *xdp_prog; struct mlx4_en_page_cache page_cache; unsigned long bytes; unsigned long packets; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 92105916ef40..3740a4417fa0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -30,6 +30,7 @@ #include <linux/delay.h> #include <linux/mfd/syscon.h> #include <linux/regmap.h> +#include <linux/pm_runtime.h> #include "stmmac_platform.h" @@ -301,6 +302,118 @@ static const struct rk_gmac_ops rk3288_ops = { .set_rmii_speed = rk3288_set_rmii_speed, }; +#define RK3366_GRF_SOC_CON6 0x0418 +#define RK3366_GRF_SOC_CON7 0x041c + +/* RK3366_GRF_SOC_CON6 */ +#define RK3366_GMAC_PHY_INTF_SEL_RGMII (GRF_BIT(9) | GRF_CLR_BIT(10) | \ + GRF_CLR_BIT(11)) +#define RK3366_GMAC_PHY_INTF_SEL_RMII (GRF_CLR_BIT(9) | GRF_CLR_BIT(10) | \ + GRF_BIT(11)) +#define RK3366_GMAC_FLOW_CTRL GRF_BIT(8) +#define RK3366_GMAC_FLOW_CTRL_CLR GRF_CLR_BIT(8) +#define RK3366_GMAC_SPEED_10M GRF_CLR_BIT(7) +#define RK3366_GMAC_SPEED_100M GRF_BIT(7) +#define RK3366_GMAC_RMII_CLK_25M GRF_BIT(3) +#define RK3366_GMAC_RMII_CLK_2_5M GRF_CLR_BIT(3) +#define RK3366_GMAC_CLK_125M (GRF_CLR_BIT(4) | GRF_CLR_BIT(5)) +#define RK3366_GMAC_CLK_25M (GRF_BIT(4) | GRF_BIT(5)) +#define RK3366_GMAC_CLK_2_5M (GRF_CLR_BIT(4) | GRF_BIT(5)) +#define RK3366_GMAC_RMII_MODE GRF_BIT(6) +#define RK3366_GMAC_RMII_MODE_CLR GRF_CLR_BIT(6) + +/* RK3366_GRF_SOC_CON7 */ +#define RK3366_GMAC_TXCLK_DLY_ENABLE GRF_BIT(7) +#define RK3366_GMAC_TXCLK_DLY_DISABLE GRF_CLR_BIT(7) +#define RK3366_GMAC_RXCLK_DLY_ENABLE GRF_BIT(15) +#define RK3366_GMAC_RXCLK_DLY_DISABLE GRF_CLR_BIT(15) +#define RK3366_GMAC_CLK_RX_DL_CFG(val) HIWORD_UPDATE(val, 0x7F, 8) +#define RK3366_GMAC_CLK_TX_DL_CFG(val) HIWORD_UPDATE(val, 0x7F, 0) + +static void rk3366_set_to_rgmii(struct rk_priv_data *bsp_priv, + int tx_delay, int rx_delay) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, + RK3366_GMAC_PHY_INTF_SEL_RGMII | + RK3366_GMAC_RMII_MODE_CLR); + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON7, + RK3366_GMAC_RXCLK_DLY_ENABLE | + RK3366_GMAC_TXCLK_DLY_ENABLE | + RK3366_GMAC_CLK_RX_DL_CFG(rx_delay) | + RK3366_GMAC_CLK_TX_DL_CFG(tx_delay)); +} + +static void rk3366_set_to_rmii(struct rk_priv_data *bsp_priv) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, + RK3366_GMAC_PHY_INTF_SEL_RMII | RK3366_GMAC_RMII_MODE); +} + +static void rk3366_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + if (speed == 10) + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, + RK3366_GMAC_CLK_2_5M); + else if (speed == 100) + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, + RK3366_GMAC_CLK_25M); + else if (speed == 1000) + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, + RK3366_GMAC_CLK_125M); + else + dev_err(dev, "unknown speed value for RGMII! speed=%d", speed); +} + +static void rk3366_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + if (speed == 10) { + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, + RK3366_GMAC_RMII_CLK_2_5M | + RK3366_GMAC_SPEED_10M); + } else if (speed == 100) { + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, + RK3366_GMAC_RMII_CLK_25M | + RK3366_GMAC_SPEED_100M); + } else { + dev_err(dev, "unknown speed value for RMII! speed=%d", speed); + } +} + +static const struct rk_gmac_ops rk3366_ops = { + .set_to_rgmii = rk3366_set_to_rgmii, + .set_to_rmii = rk3366_set_to_rmii, + .set_rgmii_speed = rk3366_set_rgmii_speed, + .set_rmii_speed = rk3366_set_rmii_speed, +}; + #define RK3368_GRF_SOC_CON15 0x043c #define RK3368_GRF_SOC_CON16 0x0440 @@ -413,6 +526,118 @@ static const struct rk_gmac_ops rk3368_ops = { .set_rmii_speed = rk3368_set_rmii_speed, }; +#define RK3399_GRF_SOC_CON5 0xc214 +#define RK3399_GRF_SOC_CON6 0xc218 + +/* RK3399_GRF_SOC_CON5 */ +#define RK3399_GMAC_PHY_INTF_SEL_RGMII (GRF_BIT(9) | GRF_CLR_BIT(10) | \ + GRF_CLR_BIT(11)) +#define RK3399_GMAC_PHY_INTF_SEL_RMII (GRF_CLR_BIT(9) | GRF_CLR_BIT(10) | \ + GRF_BIT(11)) +#define RK3399_GMAC_FLOW_CTRL GRF_BIT(8) +#define RK3399_GMAC_FLOW_CTRL_CLR GRF_CLR_BIT(8) +#define RK3399_GMAC_SPEED_10M GRF_CLR_BIT(7) +#define RK3399_GMAC_SPEED_100M GRF_BIT(7) +#define RK3399_GMAC_RMII_CLK_25M GRF_BIT(3) +#define RK3399_GMAC_RMII_CLK_2_5M GRF_CLR_BIT(3) +#define RK3399_GMAC_CLK_125M (GRF_CLR_BIT(4) | GRF_CLR_BIT(5)) +#define RK3399_GMAC_CLK_25M (GRF_BIT(4) | GRF_BIT(5)) +#define RK3399_GMAC_CLK_2_5M (GRF_CLR_BIT(4) | GRF_BIT(5)) +#define RK3399_GMAC_RMII_MODE GRF_BIT(6) +#define RK3399_GMAC_RMII_MODE_CLR GRF_CLR_BIT(6) + +/* RK3399_GRF_SOC_CON6 */ +#define RK3399_GMAC_TXCLK_DLY_ENABLE GRF_BIT(7) +#define RK3399_GMAC_TXCLK_DLY_DISABLE GRF_CLR_BIT(7) +#define RK3399_GMAC_RXCLK_DLY_ENABLE GRF_BIT(15) +#define RK3399_GMAC_RXCLK_DLY_DISABLE GRF_CLR_BIT(15) +#define RK3399_GMAC_CLK_RX_DL_CFG(val) HIWORD_UPDATE(val, 0x7F, 8) +#define RK3399_GMAC_CLK_TX_DL_CFG(val) HIWORD_UPDATE(val, 0x7F, 0) + +static void rk3399_set_to_rgmii(struct rk_priv_data *bsp_priv, + int tx_delay, int rx_delay) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, + RK3399_GMAC_PHY_INTF_SEL_RGMII | + RK3399_GMAC_RMII_MODE_CLR); + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON6, + RK3399_GMAC_RXCLK_DLY_ENABLE | + RK3399_GMAC_TXCLK_DLY_ENABLE | + RK3399_GMAC_CLK_RX_DL_CFG(rx_delay) | + RK3399_GMAC_CLK_TX_DL_CFG(tx_delay)); +} + +static void rk3399_set_to_rmii(struct rk_priv_data *bsp_priv) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, + RK3399_GMAC_PHY_INTF_SEL_RMII | RK3399_GMAC_RMII_MODE); +} + +static void rk3399_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + if (speed == 10) + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, + RK3399_GMAC_CLK_2_5M); + else if (speed == 100) + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, + RK3399_GMAC_CLK_25M); + else if (speed == 1000) + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, + RK3399_GMAC_CLK_125M); + else + dev_err(dev, "unknown speed value for RGMII! speed=%d", speed); +} + +static void rk3399_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + if (speed == 10) { + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, + RK3399_GMAC_RMII_CLK_2_5M | + RK3399_GMAC_SPEED_10M); + } else if (speed == 100) { + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, + RK3399_GMAC_RMII_CLK_25M | + RK3399_GMAC_SPEED_100M); + } else { + dev_err(dev, "unknown speed value for RMII! speed=%d", speed); + } +} + +static const struct rk_gmac_ops rk3399_ops = { + .set_to_rgmii = rk3399_set_to_rgmii, + .set_to_rmii = rk3399_set_to_rmii, + .set_rgmii_speed = rk3399_set_rgmii_speed, + .set_rmii_speed = rk3399_set_rmii_speed, +}; + static int gmac_clk_init(struct rk_priv_data *bsp_priv) { struct device *dev = &bsp_priv->pdev->dev; @@ -629,6 +854,16 @@ static struct rk_priv_data *rk_gmac_setup(struct platform_device *pdev, "rockchip,grf"); bsp_priv->pdev = pdev; + gmac_clk_init(bsp_priv); + + return bsp_priv; +} + +static int rk_gmac_powerup(struct rk_priv_data *bsp_priv) +{ + int ret; + struct device *dev = &bsp_priv->pdev->dev; + /*rmii or rgmii*/ if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RGMII) { dev_info(dev, "init for RGMII\n"); @@ -641,15 +876,6 @@ static struct rk_priv_data *rk_gmac_setup(struct platform_device *pdev, dev_err(dev, "NO interface defined!\n"); } - gmac_clk_init(bsp_priv); - - return bsp_priv; -} - -static int rk_gmac_powerup(struct rk_priv_data *bsp_priv) -{ - int ret; - ret = phy_power_on(bsp_priv, true); if (ret) return ret; @@ -658,11 +884,19 @@ static int rk_gmac_powerup(struct rk_priv_data *bsp_priv) if (ret) return ret; + pm_runtime_enable(dev); + pm_runtime_get_sync(dev); + return 0; } static void rk_gmac_powerdown(struct rk_priv_data *gmac) { + struct device *dev = &gmac->pdev->dev; + + pm_runtime_put_sync(dev); + pm_runtime_disable(dev); + phy_power_on(gmac, false); gmac_clk_enable(gmac, false); } @@ -760,7 +994,9 @@ static int rk_gmac_probe(struct platform_device *pdev) static const struct of_device_id rk_gmac_dwmac_match[] = { { .compatible = "rockchip,rk3228-gmac", .data = &rk3228_ops }, { .compatible = "rockchip,rk3288-gmac", .data = &rk3288_ops }, + { .compatible = "rockchip,rk3366-gmac", .data = &rk3366_ops }, { .compatible = "rockchip,rk3368-gmac", .data = &rk3368_ops }, + { .compatible = "rockchip,rk3399-gmac", .data = &rk3399_ops }, { } }; MODULE_DEVICE_TABLE(of, rk_gmac_dwmac_match); diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c index d300d536d06f..fa0cfda24fd9 100644 --- a/drivers/net/ethernet/ti/cpmac.c +++ b/drivers/net/ethernet/ti/cpmac.c @@ -546,7 +546,8 @@ fatal_error: static int cpmac_start_xmit(struct sk_buff *skb, struct net_device *dev) { - int queue, len; + int queue; + unsigned int len; struct cpmac_desc *desc; struct cpmac_priv *priv = netdev_priv(dev); @@ -556,7 +557,7 @@ static int cpmac_start_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(skb_padto(skb, ETH_ZLEN))) return NETDEV_TX_OK; - len = max(skb->len, ETH_ZLEN); + len = max_t(unsigned int, skb->len, ETH_ZLEN); queue = skb_get_queue_mapping(skb); netif_stop_subqueue(dev, queue); diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index c5544d36c54f..e7b516342678 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -50,6 +50,8 @@ * *****************************************************************************/ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/sched.h> #include <linux/slab.h> #include <linux/init.h> @@ -108,23 +110,12 @@ /*****************************************************************************/ /* Debugging functions */ /*****************************************************************************/ -#define D__(lvl_, fmt, arg...) \ - do { \ - printk(lvl_ "[%d:%s]: " fmt "\n", \ - __LINE__, __func__, ## arg); \ - } while (0) - -#define D_(lvl, args...) \ - do { \ - if (lvl & debug) \ - D__(KERN_INFO, args); \ - } while (0) - -#define D1(args...) D_(0x01, ##args) -#define D2(args...) D_(0x02, ##args) -#define D3(args...) D_(0x04, ##args) -#define D4(args...) D_(0x08, ##args) -#define D5(args...) D_(0x10, ##args) +#define hso_dbg(lvl, fmt, ...) \ +do { \ + if ((lvl) & debug) \ + pr_info("[%d:%s] " fmt, \ + __LINE__, __func__, ##__VA_ARGS__); \ +} while (0) /*****************************************************************************/ /* Enumerators */ @@ -649,7 +640,7 @@ static int get_free_serial_index(void) } spin_unlock_irqrestore(&serial_table_lock, flags); - printk(KERN_ERR "%s: no free serial devices in table\n", __func__); + pr_err("%s: no free serial devices in table\n", __func__); return -1; } @@ -709,7 +700,8 @@ static void handle_usb_error(int status, const char *function, } /* log a meaningful explanation of an USB status */ - D1("%s: received USB status - %s (%d)", function, explanation, status); + hso_dbg(0x1, "%s: received USB status - %s (%d)\n", + function, explanation, status); } /* Network interface functions */ @@ -808,7 +800,7 @@ static netdev_tx_t hso_net_start_xmit(struct sk_buff *skb, DUMP1(skb->data, skb->len); /* Copy it from kernel memory to OUR memory */ memcpy(odev->mux_bulk_tx_buf, skb->data, skb->len); - D1("len: %d/%d", skb->len, MUX_BULK_TX_BUF_SIZE); + hso_dbg(0x1, "len: %d/%d\n", skb->len, MUX_BULK_TX_BUF_SIZE); /* Fill in the URB for shipping it out. */ usb_fill_bulk_urb(odev->mux_bulk_tx_urb, @@ -872,7 +864,7 @@ static void packetizeRx(struct hso_net *odev, unsigned char *ip_pkt, unsigned char *tmp_rx_buf; /* log if needed */ - D1("Rx %d bytes", count); + hso_dbg(0x1, "Rx %d bytes\n", count); DUMP(ip_pkt, min(128, (int)count)); while (count) { @@ -912,7 +904,7 @@ static void packetizeRx(struct hso_net *odev, unsigned char *ip_pkt, frame_len); if (!odev->skb_rx_buf) { /* We got no receive buffer. */ - D1("could not allocate memory"); + hso_dbg(0x1, "could not allocate memory\n"); odev->rx_parse_state = WAIT_SYNC; continue; } @@ -972,11 +964,11 @@ static void packetizeRx(struct hso_net *odev, unsigned char *ip_pkt, break; case WAIT_SYNC: - D1(" W_S"); + hso_dbg(0x1, " W_S\n"); count = 0; break; default: - D1(" "); + hso_dbg(0x1, "\n"); count--; break; } @@ -1020,7 +1012,7 @@ static void read_bulk_callback(struct urb *urb) /* Sanity check */ if (!odev || !test_bit(HSO_NET_RUNNING, &odev->flags)) { - D1("BULK IN callback but driver is not active!"); + hso_dbg(0x1, "BULK IN callback but driver is not active!\n"); return; } usb_mark_last_busy(urb->dev); @@ -1112,11 +1104,11 @@ static void _hso_serial_set_termios(struct tty_struct *tty, struct hso_serial *serial = tty->driver_data; if (!serial) { - printk(KERN_ERR "%s: no tty structures", __func__); + pr_err("%s: no tty structures", __func__); return; } - D4("port %d", serial->minor); + hso_dbg(0x8, "port %d\n", serial->minor); /* * Fix up unsupported bits @@ -1205,11 +1197,11 @@ static void hso_std_serial_read_bulk_callback(struct urb *urb) struct hso_serial *serial = urb->context; int status = urb->status; - D4("\n--- Got serial_read_bulk callback %02x ---", status); + hso_dbg(0x8, "--- Got serial_read_bulk callback %02x ---\n", status); /* sanity check */ if (!serial) { - D1("serial == NULL"); + hso_dbg(0x1, "serial == NULL\n"); return; } if (status) { @@ -1217,7 +1209,7 @@ static void hso_std_serial_read_bulk_callback(struct urb *urb) return; } - D1("Actual length = %d\n", urb->actual_length); + hso_dbg(0x1, "Actual length = %d\n", urb->actual_length); DUMP1(urb->transfer_buffer, urb->actual_length); /* Anyone listening? */ @@ -1266,7 +1258,7 @@ static int hso_serial_open(struct tty_struct *tty, struct file *filp) if (serial == NULL || serial->magic != HSO_SERIAL_MAGIC) { WARN_ON(1); tty->driver_data = NULL; - D1("Failed to open port"); + hso_dbg(0x1, "Failed to open port\n"); return -ENODEV; } @@ -1275,7 +1267,7 @@ static int hso_serial_open(struct tty_struct *tty, struct file *filp) if (result < 0) goto err_out; - D1("Opening %d", serial->minor); + hso_dbg(0x1, "Opening %d\n", serial->minor); /* setup */ tty->driver_data = serial; @@ -1298,7 +1290,7 @@ static int hso_serial_open(struct tty_struct *tty, struct file *filp) kref_get(&serial->parent->ref); } } else { - D1("Port was already open"); + hso_dbg(0x1, "Port was already open\n"); } usb_autopm_put_interface(serial->parent->interface); @@ -1317,7 +1309,7 @@ static void hso_serial_close(struct tty_struct *tty, struct file *filp) struct hso_serial *serial = tty->driver_data; u8 usb_gone; - D1("Closing serial port"); + hso_dbg(0x1, "Closing serial port\n"); /* Open failed, no close cleanup required */ if (serial == NULL) @@ -1357,7 +1349,7 @@ static int hso_serial_write(struct tty_struct *tty, const unsigned char *buf, /* sanity check */ if (serial == NULL) { - printk(KERN_ERR "%s: serial is NULL\n", __func__); + pr_err("%s: serial is NULL\n", __func__); return -ENODEV; } @@ -1412,8 +1404,8 @@ static void hso_serial_set_termios(struct tty_struct *tty, struct ktermios *old) unsigned long flags; if (old) - D5("Termios called with: cflags new[%d] - old[%d]", - tty->termios.c_cflag, old->c_cflag); + hso_dbg(0x16, "Termios called with: cflags new[%d] - old[%d]\n", + tty->termios.c_cflag, old->c_cflag); /* the actual setup */ spin_lock_irqsave(&serial->serial_lock, flags); @@ -1649,7 +1641,7 @@ static int hso_serial_tiocmget(struct tty_struct *tty) /* sanity check */ if (!serial) { - D1("no tty structures"); + hso_dbg(0x1, "no tty structures\n"); return -EINVAL; } spin_lock_irq(&serial->serial_lock); @@ -1682,7 +1674,7 @@ static int hso_serial_tiocmset(struct tty_struct *tty, /* sanity check */ if (!serial) { - D1("no tty structures"); + hso_dbg(0x1, "no tty structures\n"); return -EINVAL; } @@ -1721,7 +1713,7 @@ static int hso_serial_ioctl(struct tty_struct *tty, { struct hso_serial *serial = tty->driver_data; int ret = 0; - D4("IOCTL cmd: %d, arg: %ld", cmd, arg); + hso_dbg(0x8, "IOCTL cmd: %d, arg: %ld\n", cmd, arg); if (!serial) return -ENODEV; @@ -1783,7 +1775,7 @@ static int mux_device_request(struct hso_serial *serial, u8 type, u16 port, /* Sanity check */ if (!serial || !ctrl_urb || !ctrl_req) { - printk(KERN_ERR "%s: Wrong arguments\n", __func__); + pr_err("%s: Wrong arguments\n", __func__); return -EINVAL; } @@ -1808,9 +1800,9 @@ static int mux_device_request(struct hso_serial *serial, u8 type, u16 port, pipe = usb_sndctrlpipe(serial->parent->usb, 0); } /* syslog */ - D2("%s command (%02x) len: %d, port: %d", - type == USB_CDC_GET_ENCAPSULATED_RESPONSE ? "Read" : "Write", - ctrl_req->bRequestType, ctrl_req->wLength, port); + hso_dbg(0x2, "%s command (%02x) len: %d, port: %d\n", + type == USB_CDC_GET_ENCAPSULATED_RESPONSE ? "Read" : "Write", + ctrl_req->bRequestType, ctrl_req->wLength, port); /* Load ctrl urb */ ctrl_urb->transfer_flags = 0; @@ -1876,11 +1868,11 @@ static void intr_callback(struct urb *urb) handle_usb_error(status, __func__, NULL); return; } - D4("\n--- Got intr callback 0x%02X ---", status); + hso_dbg(0x8, "--- Got intr callback 0x%02X ---\n", status); /* what request? */ port_req = urb->transfer_buffer; - D4(" port_req = 0x%.2X\n", *port_req); + hso_dbg(0x8, "port_req = 0x%.2X\n", *port_req); /* loop over all muxed ports to find the one sending this */ for (i = 0; i < 8; i++) { /* max 8 channels on MUX */ @@ -1888,7 +1880,8 @@ static void intr_callback(struct urb *urb) serial = get_serial_by_shared_int_and_type(shared_int, (1 << i)); if (serial != NULL) { - D1("Pending read interrupt on port %d\n", i); + hso_dbg(0x1, "Pending read interrupt on port %d\n", + i); spin_lock(&serial->serial_lock); if (serial->rx_state == RX_IDLE && serial->port.count > 0) { @@ -1900,8 +1893,8 @@ static void intr_callback(struct urb *urb) } else serial->rx_state = RX_PENDING; } else { - D1("Already a read pending on " - "port %d or port not open\n", i); + hso_dbg(0x1, "Already a read pending on port %d or port not open\n", + i); } spin_unlock(&serial->serial_lock); } @@ -1933,7 +1926,7 @@ static void hso_std_serial_write_bulk_callback(struct urb *urb) /* sanity check */ if (!serial) { - D1("serial == NULL"); + hso_dbg(0x1, "serial == NULL\n"); return; } @@ -1948,7 +1941,7 @@ static void hso_std_serial_write_bulk_callback(struct urb *urb) tty_port_tty_wakeup(&serial->port); hso_kick_transmit(serial); - D1(" "); + hso_dbg(0x1, "\n"); } /* called for writing diag or CS serial port */ @@ -1996,8 +1989,8 @@ static void ctrl_callback(struct urb *urb) /* what request? */ req = (struct usb_ctrlrequest *)(urb->setup_packet); - D4("\n--- Got muxed ctrl callback 0x%02X ---", status); - D4("Actual length of urb = %d\n", urb->actual_length); + hso_dbg(0x8, "--- Got muxed ctrl callback 0x%02X ---\n", status); + hso_dbg(0x8, "Actual length of urb = %d\n", urb->actual_length); DUMP1(urb->transfer_buffer, urb->actual_length); if (req->bRequestType == @@ -2023,7 +2016,7 @@ static int put_rxbuf_data(struct urb *urb, struct hso_serial *serial) /* Sanity check */ if (urb == NULL || serial == NULL) { - D1("serial = NULL"); + hso_dbg(0x1, "serial = NULL\n"); return -2; } @@ -2035,7 +2028,7 @@ static int put_rxbuf_data(struct urb *urb, struct hso_serial *serial) } /* Push data to tty */ - D1("data to push to tty"); + hso_dbg(0x1, "data to push to tty\n"); count = tty_buffer_request_room(&serial->port, urb->actual_length); if (count >= urb->actual_length) { tty_insert_flip_string(&serial->port, urb->transfer_buffer, @@ -2415,7 +2408,7 @@ static void hso_net_init(struct net_device *net) { struct hso_net *hso_net = netdev_priv(net); - D1("sizeof hso_net is %d", (int)sizeof(*hso_net)); + hso_dbg(0x1, "sizeof hso_net is %zu\n", sizeof(*hso_net)); /* fill in the other fields */ net->netdev_ops = &hso_netdev_ops; @@ -3229,7 +3222,7 @@ static int __init hso_init(void) int result; /* put it in the log */ - printk(KERN_INFO "hso: %s\n", version); + pr_info("%s\n", version); /* Initialise the serial table semaphore and table */ spin_lock_init(&serial_table_lock); @@ -3260,16 +3253,15 @@ static int __init hso_init(void) /* register the tty driver */ result = tty_register_driver(tty_drv); if (result) { - printk(KERN_ERR "%s - tty_register_driver failed(%d)\n", - __func__, result); + pr_err("%s - tty_register_driver failed(%d)\n", + __func__, result); goto err_free_tty; } /* register this module as an usb driver */ result = usb_register(&hso_driver); if (result) { - printk(KERN_ERR "Could not register hso driver? error: %d\n", - result); + pr_err("Could not register hso driver - error: %d\n", result); goto err_unreg_tty; } @@ -3284,7 +3276,7 @@ err_free_tty: static void __exit hso_exit(void) { - printk(KERN_INFO "hso: unloaded\n"); + pr_info("unloaded\n"); tty_unregister_driver(tty_drv); put_tty_driver(tty_drv); @@ -3301,7 +3293,7 @@ MODULE_DESCRIPTION(MOD_DESCRIPTION); MODULE_LICENSE(MOD_LICENSE); /* change the debug level (eg: insmod hso.ko debug=0x04) */ -MODULE_PARM_DESC(debug, "Level of debug [0x01 | 0x02 | 0x04 | 0x08 | 0x10]"); +MODULE_PARM_DESC(debug, "debug level mask [0x01 | 0x02 | 0x04 | 0x08 | 0x10]"); module_param(debug, int, S_IRUGO | S_IWUSR); /* set the major tty number (eg: insmod hso.ko tty_major=245) */ diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index dc989a8b5afb..831aa33d078a 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -33,7 +33,7 @@ #include "smsc95xx.h" #define SMSC_CHIPNAME "smsc95xx" -#define SMSC_DRIVER_VERSION "1.0.4" +#define SMSC_DRIVER_VERSION "1.0.5" #define HS_USB_PKT_SIZE (512) #define FS_USB_PKT_SIZE (64) #define DEFAULT_HS_BURST_CAP_SIZE (16 * 1024 + 5 * HS_USB_PKT_SIZE) @@ -64,6 +64,7 @@ #define CARRIER_CHECK_DELAY (2 * HZ) struct smsc95xx_priv { + u32 chip_id; u32 mac_cr; u32 hash_hi; u32 hash_lo; @@ -71,6 +72,7 @@ struct smsc95xx_priv { spinlock_t mac_cr_lock; u8 features; u8 suspend_flags; + u8 mdix_ctrl; bool link_ok; struct delayed_work carrier_check; struct usbnet *dev; @@ -782,14 +784,113 @@ static int smsc95xx_ethtool_set_wol(struct net_device *net, return ret; } +static int get_mdix_status(struct net_device *net) +{ + struct usbnet *dev = netdev_priv(net); + u32 val; + int buf; + + buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id, SPECIAL_CTRL_STS); + if (buf & SPECIAL_CTRL_STS_OVRRD_AMDIX_) { + if (buf & SPECIAL_CTRL_STS_AMDIX_ENABLE_) + return ETH_TP_MDI_AUTO; + else if (buf & SPECIAL_CTRL_STS_AMDIX_STATE_) + return ETH_TP_MDI_X; + } else { + buf = smsc95xx_read_reg(dev, STRAP_STATUS, &val); + if (val & STRAP_STATUS_AMDIX_EN_) + return ETH_TP_MDI_AUTO; + } + + return ETH_TP_MDI; +} + +static void set_mdix_status(struct net_device *net, __u8 mdix_ctrl) +{ + struct usbnet *dev = netdev_priv(net); + struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]); + int buf; + + if ((pdata->chip_id == ID_REV_CHIP_ID_9500A_) || + (pdata->chip_id == ID_REV_CHIP_ID_9530_) || + (pdata->chip_id == ID_REV_CHIP_ID_89530_) || + (pdata->chip_id == ID_REV_CHIP_ID_9730_)) { + /* Extend Manual AutoMDIX timer for 9500A/9500Ai */ + buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id, + PHY_EDPD_CONFIG); + buf |= PHY_EDPD_CONFIG_EXT_CROSSOVER_; + smsc95xx_mdio_write(dev->net, dev->mii.phy_id, + PHY_EDPD_CONFIG, buf); + } + + if (mdix_ctrl == ETH_TP_MDI) { + buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id, + SPECIAL_CTRL_STS); + buf |= SPECIAL_CTRL_STS_OVRRD_AMDIX_; + buf &= ~(SPECIAL_CTRL_STS_AMDIX_ENABLE_ | + SPECIAL_CTRL_STS_AMDIX_STATE_); + smsc95xx_mdio_write(dev->net, dev->mii.phy_id, + SPECIAL_CTRL_STS, buf); + } else if (mdix_ctrl == ETH_TP_MDI_X) { + buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id, + SPECIAL_CTRL_STS); + buf |= SPECIAL_CTRL_STS_OVRRD_AMDIX_; + buf &= ~(SPECIAL_CTRL_STS_AMDIX_ENABLE_ | + SPECIAL_CTRL_STS_AMDIX_STATE_); + buf |= SPECIAL_CTRL_STS_AMDIX_STATE_; + smsc95xx_mdio_write(dev->net, dev->mii.phy_id, + SPECIAL_CTRL_STS, buf); + } else if (mdix_ctrl == ETH_TP_MDI_AUTO) { + buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id, + SPECIAL_CTRL_STS); + buf &= ~SPECIAL_CTRL_STS_OVRRD_AMDIX_; + buf &= ~(SPECIAL_CTRL_STS_AMDIX_ENABLE_ | + SPECIAL_CTRL_STS_AMDIX_STATE_); + buf |= SPECIAL_CTRL_STS_AMDIX_ENABLE_; + smsc95xx_mdio_write(dev->net, dev->mii.phy_id, + SPECIAL_CTRL_STS, buf); + } + pdata->mdix_ctrl = mdix_ctrl; +} + +static int smsc95xx_get_settings(struct net_device *net, + struct ethtool_cmd *cmd) +{ + struct usbnet *dev = netdev_priv(net); + struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]); + int retval; + + retval = usbnet_get_settings(net, cmd); + + cmd->eth_tp_mdix = pdata->mdix_ctrl; + cmd->eth_tp_mdix_ctrl = pdata->mdix_ctrl; + + return retval; +} + +static int smsc95xx_set_settings(struct net_device *net, + struct ethtool_cmd *cmd) +{ + struct usbnet *dev = netdev_priv(net); + struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]); + int retval; + + if (pdata->mdix_ctrl != cmd->eth_tp_mdix_ctrl) + set_mdix_status(net, cmd->eth_tp_mdix_ctrl); + + retval = usbnet_set_settings(net, cmd); + + return retval; +} + static const struct ethtool_ops smsc95xx_ethtool_ops = { .get_link = usbnet_get_link, .nway_reset = usbnet_nway_reset, .get_drvinfo = usbnet_get_drvinfo, .get_msglevel = usbnet_get_msglevel, .set_msglevel = usbnet_set_msglevel, - .get_settings = usbnet_get_settings, - .set_settings = usbnet_set_settings, + .get_settings = smsc95xx_get_settings, + .set_settings = smsc95xx_set_settings, .get_eeprom_len = smsc95xx_ethtool_get_eeprom_len, .get_eeprom = smsc95xx_ethtool_get_eeprom, .set_eeprom = smsc95xx_ethtool_set_eeprom, @@ -1194,6 +1295,8 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf) if (ret < 0) return ret; val >>= 16; + pdata->chip_id = val; + pdata->mdix_ctrl = get_mdix_status(dev->net); if ((val == ID_REV_CHIP_ID_9500A_) || (val == ID_REV_CHIP_ID_9530_) || (val == ID_REV_CHIP_ID_89530_) || (val == ID_REV_CHIP_ID_9730_)) diff --git a/drivers/net/usb/smsc95xx.h b/drivers/net/usb/smsc95xx.h index 526faa0c44e6..29a4d9efce7c 100644 --- a/drivers/net/usb/smsc95xx.h +++ b/drivers/net/usb/smsc95xx.h @@ -144,6 +144,14 @@ #define BURST_CAP (0x38) +#define STRAP_STATUS (0x3C) +#define STRAP_STATUS_PWR_SEL_ (0x00000020) +#define STRAP_STATUS_AMDIX_EN_ (0x00000010) +#define STRAP_STATUS_PORT_SWAP_ (0x00000008) +#define STRAP_STATUS_EEP_SIZE_ (0x00000004) +#define STRAP_STATUS_RMT_WKP_ (0x00000002) +#define STRAP_STATUS_EEP_DISABLE_ (0x00000001) + #define GPIO_WAKE (0x64) #define INT_EP_CTL (0x68) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index f605a3684a7f..199dec033cf8 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -287,7 +287,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, if (!net_eq(dev_net(vxlan->dev), vxlan->net) && nla_put_s32(skb, NDA_LINK_NETNSID, - peernet2id_alloc(dev_net(vxlan->dev), vxlan->net))) + peernet2id(dev_net(vxlan->dev), vxlan->net))) goto nla_put_failure; if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr)) @@ -2103,6 +2103,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, vni, md, flags, udp_sum); if (err < 0) { dst_release(ndst); + dev->stats.tx_errors++; return; } udp_tunnel6_xmit_skb(ndst, sk, skb, dev, diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 8b72ee710f95..fd82584acd48 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -343,7 +343,8 @@ int rhashtable_init(struct rhashtable *ht, struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj, - struct bucket_table *old_tbl); + struct bucket_table *old_tbl, + void **data); int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl); void rhashtable_walk_enter(struct rhashtable *ht, @@ -563,8 +564,11 @@ restart: return NULL; } -/* Internal function, please use rhashtable_insert_fast() instead */ -static inline int __rhashtable_insert_fast( +/* Internal function, please use rhashtable_insert_fast() instead. This + * function returns the existing element already in hashes in there is a clash, + * otherwise it returns an error via ERR_PTR(). + */ +static inline void *__rhashtable_insert_fast( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params) { @@ -577,6 +581,7 @@ static inline int __rhashtable_insert_fast( spinlock_t *lock; unsigned int elasticity; unsigned int hash; + void *data = NULL; int err; restart: @@ -601,11 +606,14 @@ restart: new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (unlikely(new_tbl)) { - tbl = rhashtable_insert_slow(ht, key, obj, new_tbl); + tbl = rhashtable_insert_slow(ht, key, obj, new_tbl, &data); if (!IS_ERR_OR_NULL(tbl)) goto slow_path; err = PTR_ERR(tbl); + if (err == -EEXIST) + err = 0; + goto out; } @@ -619,25 +627,25 @@ slow_path: err = rhashtable_insert_rehash(ht, tbl); rcu_read_unlock(); if (err) - return err; + return ERR_PTR(err); goto restart; } - err = -EEXIST; + err = 0; elasticity = ht->elasticity; rht_for_each(head, tbl, hash) { if (key && unlikely(!(params.obj_cmpfn ? params.obj_cmpfn(&arg, rht_obj(ht, head)) : - rhashtable_compare(&arg, rht_obj(ht, head))))) + rhashtable_compare(&arg, rht_obj(ht, head))))) { + data = rht_obj(ht, head); goto out; + } if (!--elasticity) goto slow_path; } - err = 0; - head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); RCU_INIT_POINTER(obj->next, head); @@ -652,7 +660,7 @@ out: spin_unlock_bh(lock); rcu_read_unlock(); - return err; + return err ? ERR_PTR(err) : data; } /** @@ -675,7 +683,13 @@ static inline int rhashtable_insert_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { - return __rhashtable_insert_fast(ht, NULL, obj, params); + void *ret; + + ret = __rhashtable_insert_fast(ht, NULL, obj, params); + if (IS_ERR(ret)) + return PTR_ERR(ret); + + return ret == NULL ? 0 : -EEXIST; } /** @@ -704,11 +718,15 @@ static inline int rhashtable_lookup_insert_fast( const struct rhashtable_params params) { const char *key = rht_obj(ht, obj); + void *ret; BUG_ON(ht->p.obj_hashfn); - return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, - params); + ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params); + if (IS_ERR(ret)) + return PTR_ERR(ret); + + return ret == NULL ? 0 : -EEXIST; } /** @@ -737,6 +755,32 @@ static inline int rhashtable_lookup_insert_key( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params) { + void *ret; + + BUG_ON(!ht->p.obj_hashfn || !key); + + ret = __rhashtable_insert_fast(ht, key, obj, params); + if (IS_ERR(ret)) + return PTR_ERR(ret); + + return ret == NULL ? 0 : -EEXIST; +} + +/** + * rhashtable_lookup_get_insert_key - lookup and insert object into hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @params: hash table parameters + * @data: pointer to element data already in hashes + * + * Just like rhashtable_lookup_insert_key(), but this function returns the + * object if it exists, NULL if it does not and the insertion was successful, + * and an ERR_PTR otherwise. + */ +static inline void *rhashtable_lookup_get_insert_key( + struct rhashtable *ht, const void *key, struct rhash_head *obj, + const struct rhashtable_params params) +{ BUG_ON(!ht->p.obj_hashfn || !key); return __rhashtable_insert_fast(ht, key, obj, params); diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 445b019c2078..50418052a520 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -42,7 +42,6 @@ union nf_conntrack_expect_proto { #include <linux/types.h> #include <linux/skbuff.h> -#include <linux/timer.h> #ifdef CONFIG_NETFILTER_DEBUG #define NF_CT_ASSERT(x) WARN_ON(!(x)) @@ -73,7 +72,7 @@ struct nf_conn_help { #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> struct nf_conn { - /* Usage count in here is 1 for hash table/destruct timer, 1 per skb, + /* Usage count in here is 1 for hash table, 1 per skb, * plus 1 for any connection(s) we are `master' for * * Hint, SKB address this struct and refcnt via skb->nfct and @@ -96,8 +95,8 @@ struct nf_conn { /* Have we seen traffic both ways yet? (bitset) */ unsigned long status; - /* Timer function; drops refcnt when it goes off. */ - struct timer_list timeout; + /* jiffies32 when this ct is considered dead */ + u32 timeout; possible_net_t ct_net; @@ -220,21 +219,14 @@ static inline void nf_ct_refresh(struct nf_conn *ct, __nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0); } -bool __nf_ct_kill_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, - const struct sk_buff *skb, int do_acct); - /* kill conntrack and do accounting */ -static inline bool nf_ct_kill_acct(struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - const struct sk_buff *skb) -{ - return __nf_ct_kill_acct(ct, ctinfo, skb, 1); -} +bool nf_ct_kill_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, + const struct sk_buff *skb); /* kill conntrack without accounting */ static inline bool nf_ct_kill(struct nf_conn *ct) { - return __nf_ct_kill_acct(ct, 0, NULL, 0); + return nf_ct_delete(ct, 0, 0); } /* These are for NAT. Icky. */ @@ -291,21 +283,55 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb) return skb->dev && skb->skb_iif && skb->dev->flags & IFF_LOOPBACK; } +#define nfct_time_stamp ((u32)(jiffies)) + /* jiffies until ct expires, 0 if already expired */ static inline unsigned long nf_ct_expires(const struct nf_conn *ct) { - long timeout = (long)ct->timeout.expires - (long)jiffies; + s32 timeout = ct->timeout - nfct_time_stamp; return timeout > 0 ? timeout : 0; } +static inline bool nf_ct_is_expired(const struct nf_conn *ct) +{ + return (__s32)(ct->timeout - nfct_time_stamp) <= 0; +} + +/* use after obtaining a reference count */ +static inline bool nf_ct_should_gc(const struct nf_conn *ct) +{ + return nf_ct_is_expired(ct) && nf_ct_is_confirmed(ct) && + !nf_ct_is_dying(ct); +} + struct kernel_param; int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); int nf_conntrack_hash_resize(unsigned int hashsize); + +extern struct hlist_nulls_head *nf_conntrack_hash; extern unsigned int nf_conntrack_htable_size; +extern seqcount_t nf_conntrack_generation; extern unsigned int nf_conntrack_max; +/* must be called with rcu read lock held */ +static inline void +nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize) +{ + struct hlist_nulls_head *hptr; + unsigned int sequence, hsz; + + do { + sequence = read_seqcount_begin(&nf_conntrack_generation); + hsz = nf_conntrack_htable_size; + hptr = nf_conntrack_hash; + } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); + + *hash = hptr; + *hsize = hsz; +} + struct nf_conn *nf_ct_tmpl_alloc(struct net *net, const struct nf_conntrack_zone *zone, gfp_t flags); diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 79d7ac5c9740..62e17d1319ff 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -51,8 +51,6 @@ bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, const struct nf_conntrack_l3proto *l3proto, const struct nf_conntrack_l4proto *l4proto); -void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize); - /* Find a connection corresponding to a tuple. */ struct nf_conntrack_tuple_hash * nf_conntrack_find_get(struct net *net, @@ -83,7 +81,6 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, #define CONNTRACK_LOCKS 1024 -extern struct hlist_nulls_head *nf_conntrack_hash; extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS]; void nf_conntrack_lock(spinlock_t *lock); diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index fa36447371c6..12d967b58726 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -12,12 +12,19 @@ #include <linux/netfilter/nf_conntrack_tuple_common.h> #include <net/netfilter/nf_conntrack_extend.h> +enum nf_ct_ecache_state { + NFCT_ECACHE_UNKNOWN, /* destroy event not sent */ + NFCT_ECACHE_DESTROY_FAIL, /* tried but failed to send destroy event */ + NFCT_ECACHE_DESTROY_SENT, /* sent destroy event after failure */ +}; + struct nf_conntrack_ecache { - unsigned long cache; /* bitops want long */ - unsigned long missed; /* missed events */ - u16 ctmask; /* bitmask of ct events to be delivered */ - u16 expmask; /* bitmask of expect events to be delivered */ - u32 portid; /* netlink portid of destroyer */ + unsigned long cache; /* bitops want long */ + unsigned long missed; /* missed events */ + u16 ctmask; /* bitmask of ct events to be delivered */ + u16 expmask; /* bitmask of expect events to be delivered */ + u32 portid; /* netlink portid of destroyer */ + enum nf_ct_ecache_state state; /* ecache state */ }; static inline struct nf_conntrack_ecache * diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 1a5fb36f165f..de629f1520df 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -134,14 +134,6 @@ void nf_ct_l4proto_pernet_unregister(struct net *net, int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto); void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto); -static inline void nf_ct_kfree_compat_sysctl_table(struct nf_proto_net *pn) -{ -#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - kfree(pn->ctl_compat_table); - pn->ctl_compat_table = NULL; -#endif -} - /* Generic netlink helpers */ int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple); diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 83d855ba6af1..ee07dc8b0a7b 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -60,8 +60,7 @@ struct nf_logger { int nf_log_register(u_int8_t pf, struct nf_logger *logger); void nf_log_unregister(struct nf_logger *logger); -void nf_log_set(struct net *net, u_int8_t pf, - const struct nf_logger *logger); +int nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger); void nf_log_unset(struct net *net, const struct nf_logger *logger); int nf_log_bind_pf(struct net *net, u_int8_t pf, diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f2f13399ce44..8972468bc94b 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -251,7 +251,8 @@ struct nft_set_ops { int (*insert)(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem); + const struct nft_set_elem *elem, + struct nft_set_ext **ext); void (*activate)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem); diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 38b1a80517f0..e469e85de3f9 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -15,10 +15,6 @@ struct nf_proto_net { #ifdef CONFIG_SYSCTL struct ctl_table_header *ctl_table_header; struct ctl_table *ctl_table; -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - struct ctl_table_header *ctl_compat_header; - struct ctl_table *ctl_compat_table; -#endif #endif unsigned int users; }; @@ -58,10 +54,6 @@ struct nf_ip_net { struct nf_udp_net udp; struct nf_icmp_net icmp; struct nf_icmp_net icmpv6; -#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - struct ctl_table_header *ctl_table_header; - struct ctl_table *ctl_table; -#endif }; struct ct_pcpu { diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index c674ba2563b7..28ce01d79707 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -724,6 +724,26 @@ enum nft_meta_keys { }; /** + * enum nft_hash_attributes - nf_tables hash expression netlink attributes + * + * @NFTA_HASH_SREG: source register (NLA_U32) + * @NFTA_HASH_DREG: destination register (NLA_U32) + * @NFTA_HASH_LEN: source data length (NLA_U32) + * @NFTA_HASH_MODULUS: modulus value (NLA_U32) + * @NFTA_HASH_SEED: seed value (NLA_U32) + */ +enum nft_hash_attributes { + NFTA_HASH_UNSPEC, + NFTA_HASH_SREG, + NFTA_HASH_DREG, + NFTA_HASH_LEN, + NFTA_HASH_MODULUS, + NFTA_HASH_SEED, + __NFTA_HASH_MAX, +}; +#define NFTA_HASH_MAX (__NFTA_HASH_MAX - 1) + +/** * enum nft_meta_attributes - nf_tables meta expression netlink attributes * * @NFTA_META_DREG: destination register (NLA_U32) @@ -880,6 +900,25 @@ enum nft_queue_attributes { #define NFT_QUEUE_FLAG_CPU_FANOUT 0x02 /* use current CPU (no hashing) */ #define NFT_QUEUE_FLAG_MASK 0x03 +enum nft_quota_flags { + NFT_QUOTA_F_INV = (1 << 0), +}; + +/** + * enum nft_quota_attributes - nf_tables quota expression netlink attributes + * + * @NFTA_QUOTA_BYTES: quota in bytes (NLA_U16) + * @NFTA_QUOTA_FLAGS: flags (NLA_U32) + */ +enum nft_quota_attributes { + NFTA_QUOTA_UNSPEC, + NFTA_QUOTA_BYTES, + NFTA_QUOTA_FLAGS, + NFTA_QUOTA_PAD, + __NFTA_QUOTA_MAX +}; +#define NFTA_QUOTA_MAX (__NFTA_QUOTA_MAX - 1) + /** * enum nft_reject_types - nf_tables reject expression reject types * @@ -1051,7 +1090,7 @@ enum nft_gen_attributes { * @NFTA_TRACE_NFPROTO: nf protocol processed (NLA_U32) * @NFTA_TRACE_POLICY: policy that decided fate of packet (NLA_U32) */ -enum nft_trace_attibutes { +enum nft_trace_attributes { NFTA_TRACE_UNSPEC, NFTA_TRACE_TABLE, NFTA_TRACE_CHAIN, @@ -1082,4 +1121,28 @@ enum nft_trace_types { __NFT_TRACETYPE_MAX }; #define NFT_TRACETYPE_MAX (__NFT_TRACETYPE_MAX - 1) + +/** + * enum nft_ng_attributes - nf_tables number generator expression netlink attributes + * + * @NFTA_NG_DREG: destination register (NLA_U32) + * @NFTA_NG_UNTIL: source value to increment the counter until reset (NLA_U32) + * @NFTA_NG_TYPE: operation type (NLA_U32) + */ +enum nft_ng_attributes { + NFTA_NG_UNSPEC, + NFTA_NG_DREG, + NFTA_NG_UNTIL, + NFTA_NG_TYPE, + __NFTA_NG_MAX +}; +#define NFTA_NG_MAX (__NFTA_NG_MAX - 1) + +enum nft_ng_types { + NFT_NG_INCREMENTAL, + NFT_NG_RANDOM, + __NFT_NG_MAX +}; +#define NFT_NG_MAX (__NFT_NG_MAX - 1) + #endif /* _LINUX_NF_TABLES_H */ diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 4320b92ef696..06c28728bb53 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -444,7 +444,8 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_rehash); struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj, - struct bucket_table *tbl) + struct bucket_table *tbl, + void **data) { struct rhash_head *head; unsigned int hash; @@ -455,8 +456,11 @@ struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING); err = -EEXIST; - if (key && rhashtable_lookup_fast(ht, key, ht->p)) - goto exit; + if (key) { + *data = rhashtable_lookup_fast(ht, key, ht->p); + if (*data) + goto exit; + } err = -E2BIG; if (unlikely(rht_grow_above_max(ht, tbl))) diff --git a/net/bridge/netfilter/nf_log_bridge.c b/net/bridge/netfilter/nf_log_bridge.c index 5d9953a90929..1663df598545 100644 --- a/net/bridge/netfilter/nf_log_bridge.c +++ b/net/bridge/netfilter/nf_log_bridge.c @@ -50,8 +50,7 @@ static struct nf_logger nf_bridge_logger __read_mostly = { static int __net_init nf_log_bridge_net_init(struct net *net) { - nf_log_set(net, NFPROTO_BRIDGE, &nf_bridge_logger); - return 0; + return nf_log_set(net, NFPROTO_BRIDGE, &nf_bridge_logger); } static void __net_exit nf_log_bridge_net_exit(struct net *net) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 7a77dcabd4e8..42bdda0e616b 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -215,31 +215,29 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id); */ int peernet2id_alloc(struct net *net, struct net *peer) { - unsigned long flags; bool alloc; int id; - spin_lock_irqsave(&net->nsid_lock, flags); + spin_lock_bh(&net->nsid_lock); alloc = atomic_read(&peer->count) == 0 ? false : true; id = __peernet2id_alloc(net, peer, &alloc); - spin_unlock_irqrestore(&net->nsid_lock, flags); + spin_unlock_bh(&net->nsid_lock); if (alloc && id >= 0) rtnl_net_notifyid(net, RTM_NEWNSID, id); return id; } -EXPORT_SYMBOL(peernet2id_alloc); /* This function returns, if assigned, the id of a peer netns. */ int peernet2id(struct net *net, struct net *peer) { - unsigned long flags; int id; - spin_lock_irqsave(&net->nsid_lock, flags); + spin_lock_bh(&net->nsid_lock); id = __peernet2id(net, peer); - spin_unlock_irqrestore(&net->nsid_lock, flags); + spin_unlock_bh(&net->nsid_lock); return id; } +EXPORT_SYMBOL(peernet2id); /* This function returns true is the peer netns has an id assigned into the * current netns. @@ -251,18 +249,17 @@ bool peernet_has_id(struct net *net, struct net *peer) struct net *get_net_ns_by_id(struct net *net, int id) { - unsigned long flags; struct net *peer; if (id < 0) return NULL; rcu_read_lock(); - spin_lock_irqsave(&net->nsid_lock, flags); + spin_lock_bh(&net->nsid_lock); peer = idr_find(&net->netns_ids, id); if (peer) get_net(peer); - spin_unlock_irqrestore(&net->nsid_lock, flags); + spin_unlock_bh(&net->nsid_lock); rcu_read_unlock(); return peer; @@ -406,17 +403,17 @@ static void cleanup_net(struct work_struct *work) for_each_net(tmp) { int id; - spin_lock_irq(&tmp->nsid_lock); + spin_lock_bh(&tmp->nsid_lock); id = __peernet2id(tmp, net); if (id >= 0) idr_remove(&tmp->netns_ids, id); - spin_unlock_irq(&tmp->nsid_lock); + spin_unlock_bh(&tmp->nsid_lock); if (id >= 0) rtnl_net_notifyid(tmp, RTM_DELNSID, id); } - spin_lock_irq(&net->nsid_lock); + spin_lock_bh(&net->nsid_lock); idr_destroy(&net->netns_ids); - spin_unlock_irq(&net->nsid_lock); + spin_unlock_bh(&net->nsid_lock); } rtnl_unlock(); @@ -544,7 +541,6 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[NETNSA_MAX + 1]; - unsigned long flags; struct net *peer; int nsid, err; @@ -565,15 +561,15 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh) if (IS_ERR(peer)) return PTR_ERR(peer); - spin_lock_irqsave(&net->nsid_lock, flags); + spin_lock_bh(&net->nsid_lock); if (__peernet2id(net, peer) >= 0) { - spin_unlock_irqrestore(&net->nsid_lock, flags); + spin_unlock_bh(&net->nsid_lock); err = -EEXIST; goto out; } err = alloc_netid(net, peer, nsid); - spin_unlock_irqrestore(&net->nsid_lock, flags); + spin_unlock_bh(&net->nsid_lock); if (err >= 0) { rtnl_net_notifyid(net, RTM_NEWNSID, err); err = 0; @@ -695,11 +691,10 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb) .idx = 0, .s_idx = cb->args[0], }; - unsigned long flags; - spin_lock_irqsave(&net->nsid_lock, flags); + spin_lock_bh(&net->nsid_lock); idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb); - spin_unlock_irqrestore(&net->nsid_lock, flags); + spin_unlock_bh(&net->nsid_lock); cb->args[0] = net_cb.idx; return skb->len; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index c187c60e3e0c..d613309e3e5d 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -25,17 +25,6 @@ config NF_CONNTRACK_IPV4 To compile it as a module, choose M here. If unsure, say N. -config NF_CONNTRACK_PROC_COMPAT - bool "proc/sysctl compatibility with old connection tracking" - depends on NF_CONNTRACK_PROCFS && NF_CONNTRACK_IPV4 - default y - help - This option enables /proc and sysctl compatibility with the old - layer 3 dependent connection tracking. This is needed to keep - old programs that have not been adapted to the new names working. - - If unsure, say Y. - if NF_TABLES config NF_TABLES_IPV4 diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 87b073da14c9..853328f8fd05 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -4,11 +4,6 @@ # objects for l3 independent conntrack nf_conntrack_ipv4-y := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o -ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y) -ifeq ($(CONFIG_PROC_FS),y) -nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o -endif -endif # connection tracking obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index ae1a71a97132..870aebda2932 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -202,47 +202,6 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { }, }; -#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) -static int log_invalid_proto_min = 0; -static int log_invalid_proto_max = 255; - -static struct ctl_table ip_ct_sysctl_table[] = { - { - .procname = "ip_conntrack_max", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "ip_conntrack_count", - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, - { - .procname = "ip_conntrack_buckets", - .maxlen = sizeof(unsigned int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, - { - .procname = "ip_conntrack_checksum", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "ip_conntrack_log_invalid", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &log_invalid_proto_min, - .extra2 = &log_invalid_proto_max, - }, - { } -}; -#endif /* CONFIG_SYSCTL && CONFIG_NF_CONNTRACK_PROC_COMPAT */ - /* Fast function for those who don't want to parse /proc (and I don't blame them). */ /* Reversing the socket's dst/src point of view gives us the reply @@ -350,20 +309,6 @@ static struct nf_sockopt_ops so_getorigdst = { static int ipv4_init_net(struct net *net) { -#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - struct nf_ip_net *in = &net->ct.nf_ct_proto; - in->ctl_table = kmemdup(ip_ct_sysctl_table, - sizeof(ip_ct_sysctl_table), - GFP_KERNEL); - if (!in->ctl_table) - return -ENOMEM; - - in->ctl_table[0].data = &nf_conntrack_max; - in->ctl_table[1].data = &net->ct.count; - in->ctl_table[2].data = &nf_conntrack_htable_size; - in->ctl_table[3].data = &net->ct.sysctl_checksum; - in->ctl_table[4].data = &net->ct.sysctl_log_invalid; -#endif return 0; } @@ -380,9 +325,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { .nlattr_to_tuple = ipv4_nlattr_to_tuple, .nla_policy = ipv4_nla_policy, #endif -#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - .ctl_table_path = "net/ipv4/netfilter", -#endif .init_net = ipv4_init_net, .me = THIS_MODULE, }; @@ -492,16 +434,7 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) goto cleanup_icmpv4; } -#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - ret = nf_conntrack_ipv4_compat_init(); - if (ret < 0) - goto cleanup_proto; -#endif return ret; -#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - cleanup_proto: - nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4); -#endif cleanup_icmpv4: nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp); cleanup_udp4: @@ -520,9 +453,6 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) static void __exit nf_conntrack_l3proto_ipv4_fini(void) { synchronize_net(); -#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - nf_conntrack_ipv4_compat_fini(); -#endif nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4); nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp); nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c deleted file mode 100644 index 63923710f325..000000000000 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ /dev/null @@ -1,492 +0,0 @@ -/* ip_conntrack proc compat - based on ip_conntrack_standalone.c - * - * (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> - * (C) 2006-2010 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/types.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/percpu.h> -#include <linux/security.h> -#include <net/net_namespace.h> - -#include <linux/netfilter.h> -#include <net/netfilter/nf_conntrack_core.h> -#include <net/netfilter/nf_conntrack_l3proto.h> -#include <net/netfilter/nf_conntrack_l4proto.h> -#include <net/netfilter/nf_conntrack_expect.h> -#include <net/netfilter/nf_conntrack_acct.h> -#include <linux/rculist_nulls.h> -#include <linux/export.h> - -struct ct_iter_state { - struct seq_net_private p; - struct hlist_nulls_head *hash; - unsigned int htable_size; - unsigned int bucket; -}; - -static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) -{ - struct ct_iter_state *st = seq->private; - struct hlist_nulls_node *n; - - for (st->bucket = 0; - st->bucket < st->htable_size; - st->bucket++) { - n = rcu_dereference( - hlist_nulls_first_rcu(&st->hash[st->bucket])); - if (!is_a_nulls(n)) - return n; - } - return NULL; -} - -static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, - struct hlist_nulls_node *head) -{ - struct ct_iter_state *st = seq->private; - - head = rcu_dereference(hlist_nulls_next_rcu(head)); - while (is_a_nulls(head)) { - if (likely(get_nulls_value(head) == st->bucket)) { - if (++st->bucket >= st->htable_size) - return NULL; - } - head = rcu_dereference( - hlist_nulls_first_rcu(&st->hash[st->bucket])); - } - return head; -} - -static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos) -{ - struct hlist_nulls_node *head = ct_get_first(seq); - - if (head) - while (pos && (head = ct_get_next(seq, head))) - pos--; - return pos ? NULL : head; -} - -static void *ct_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) -{ - struct ct_iter_state *st = seq->private; - - rcu_read_lock(); - - nf_conntrack_get_ht(&st->hash, &st->htable_size); - return ct_get_idx(seq, *pos); -} - -static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos) -{ - (*pos)++; - return ct_get_next(s, v); -} - -static void ct_seq_stop(struct seq_file *s, void *v) - __releases(RCU) -{ - rcu_read_unlock(); -} - -#ifdef CONFIG_NF_CONNTRACK_SECMARK -static void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) -{ - int ret; - u32 len; - char *secctx; - - ret = security_secid_to_secctx(ct->secmark, &secctx, &len); - if (ret) - return; - - seq_printf(s, "secctx=%s ", secctx); - - security_release_secctx(secctx, len); -} -#else -static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) -{ -} -#endif - -static bool ct_seq_should_skip(const struct nf_conn *ct, - const struct net *net, - const struct nf_conntrack_tuple_hash *hash) -{ - /* we only want to print DIR_ORIGINAL */ - if (NF_CT_DIRECTION(hash)) - return true; - - if (nf_ct_l3num(ct) != AF_INET) - return true; - - if (!net_eq(nf_ct_net(ct), net)) - return true; - - return false; -} - -static int ct_seq_show(struct seq_file *s, void *v) -{ - struct nf_conntrack_tuple_hash *hash = v; - struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); - const struct nf_conntrack_l3proto *l3proto; - const struct nf_conntrack_l4proto *l4proto; - int ret = 0; - - NF_CT_ASSERT(ct); - if (ct_seq_should_skip(ct, seq_file_net(s), hash)) - return 0; - - if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) - return 0; - - /* check if we raced w. object reuse */ - if (!nf_ct_is_confirmed(ct) || - ct_seq_should_skip(ct, seq_file_net(s), hash)) - goto release; - - l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); - NF_CT_ASSERT(l3proto); - l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); - NF_CT_ASSERT(l4proto); - - ret = -ENOSPC; - seq_printf(s, "%-8s %u %ld ", - l4proto->name, nf_ct_protonum(ct), - timer_pending(&ct->timeout) - ? (long)(ct->timeout.expires - jiffies)/HZ : 0); - - if (l4proto->print_conntrack) - l4proto->print_conntrack(s, ct); - - if (seq_has_overflowed(s)) - goto release; - - print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, - l3proto, l4proto); - - if (seq_has_overflowed(s)) - goto release; - - if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL)) - goto release; - - if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) - seq_printf(s, "[UNREPLIED] "); - - print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, - l3proto, l4proto); - - if (seq_has_overflowed(s)) - goto release; - - if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) - goto release; - - if (test_bit(IPS_ASSURED_BIT, &ct->status)) - seq_printf(s, "[ASSURED] "); - -#ifdef CONFIG_NF_CONNTRACK_MARK - seq_printf(s, "mark=%u ", ct->mark); -#endif - - ct_show_secctx(s, ct); - - seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)); - - if (seq_has_overflowed(s)) - goto release; - - ret = 0; -release: - nf_ct_put(ct); - return ret; -} - -static const struct seq_operations ct_seq_ops = { - .start = ct_seq_start, - .next = ct_seq_next, - .stop = ct_seq_stop, - .show = ct_seq_show -}; - -static int ct_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &ct_seq_ops, - sizeof(struct ct_iter_state)); -} - -static const struct file_operations ct_file_ops = { - .owner = THIS_MODULE, - .open = ct_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -/* expects */ -struct ct_expect_iter_state { - struct seq_net_private p; - unsigned int bucket; -}; - -static struct hlist_node *ct_expect_get_first(struct seq_file *seq) -{ - struct ct_expect_iter_state *st = seq->private; - struct hlist_node *n; - - for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { - n = rcu_dereference( - hlist_first_rcu(&nf_ct_expect_hash[st->bucket])); - if (n) - return n; - } - return NULL; -} - -static struct hlist_node *ct_expect_get_next(struct seq_file *seq, - struct hlist_node *head) -{ - struct ct_expect_iter_state *st = seq->private; - - head = rcu_dereference(hlist_next_rcu(head)); - while (head == NULL) { - if (++st->bucket >= nf_ct_expect_hsize) - return NULL; - head = rcu_dereference( - hlist_first_rcu(&nf_ct_expect_hash[st->bucket])); - } - return head; -} - -static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos) -{ - struct hlist_node *head = ct_expect_get_first(seq); - - if (head) - while (pos && (head = ct_expect_get_next(seq, head))) - pos--; - return pos ? NULL : head; -} - -static void *exp_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) -{ - rcu_read_lock(); - return ct_expect_get_idx(seq, *pos); -} - -static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - (*pos)++; - return ct_expect_get_next(seq, v); -} - -static void exp_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) -{ - rcu_read_unlock(); -} - -static int exp_seq_show(struct seq_file *s, void *v) -{ - struct nf_conntrack_expect *exp; - const struct hlist_node *n = v; - - exp = hlist_entry(n, struct nf_conntrack_expect, hnode); - - if (!net_eq(nf_ct_net(exp->master), seq_file_net(s))) - return 0; - - if (exp->tuple.src.l3num != AF_INET) - return 0; - - if (exp->timeout.function) - seq_printf(s, "%ld ", timer_pending(&exp->timeout) - ? (long)(exp->timeout.expires - jiffies)/HZ : 0); - else - seq_printf(s, "- "); - - seq_printf(s, "proto=%u ", exp->tuple.dst.protonum); - - print_tuple(s, &exp->tuple, - __nf_ct_l3proto_find(exp->tuple.src.l3num), - __nf_ct_l4proto_find(exp->tuple.src.l3num, - exp->tuple.dst.protonum)); - seq_putc(s, '\n'); - - return 0; -} - -static const struct seq_operations exp_seq_ops = { - .start = exp_seq_start, - .next = exp_seq_next, - .stop = exp_seq_stop, - .show = exp_seq_show -}; - -static int exp_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &exp_seq_ops, - sizeof(struct ct_expect_iter_state)); -} - -static const struct file_operations ip_exp_file_ops = { - .owner = THIS_MODULE, - .open = exp_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) -{ - struct net *net = seq_file_net(seq); - int cpu; - - if (*pos == 0) - return SEQ_START_TOKEN; - - for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { - if (!cpu_possible(cpu)) - continue; - *pos = cpu+1; - return per_cpu_ptr(net->ct.stat, cpu); - } - - return NULL; -} - -static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct net *net = seq_file_net(seq); - int cpu; - - for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { - if (!cpu_possible(cpu)) - continue; - *pos = cpu+1; - return per_cpu_ptr(net->ct.stat, cpu); - } - - return NULL; -} - -static void ct_cpu_seq_stop(struct seq_file *seq, void *v) -{ -} - -static int ct_cpu_seq_show(struct seq_file *seq, void *v) -{ - struct net *net = seq_file_net(seq); - unsigned int nr_conntracks = atomic_read(&net->ct.count); - const struct ip_conntrack_stat *st = v; - - if (v == SEQ_START_TOKEN) { - seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n"); - return 0; - } - - seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x " - "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", - nr_conntracks, - st->searched, - st->found, - st->new, - st->invalid, - st->ignore, - st->delete, - st->delete_list, - st->insert, - st->insert_failed, - st->drop, - st->early_drop, - st->error, - - st->expect_new, - st->expect_create, - st->expect_delete, - st->search_restart - ); - return 0; -} - -static const struct seq_operations ct_cpu_seq_ops = { - .start = ct_cpu_seq_start, - .next = ct_cpu_seq_next, - .stop = ct_cpu_seq_stop, - .show = ct_cpu_seq_show, -}; - -static int ct_cpu_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &ct_cpu_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations ct_cpu_seq_fops = { - .owner = THIS_MODULE, - .open = ct_cpu_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -static int __net_init ip_conntrack_net_init(struct net *net) -{ - struct proc_dir_entry *proc, *proc_exp, *proc_stat; - - proc = proc_create("ip_conntrack", 0440, net->proc_net, &ct_file_ops); - if (!proc) - goto err1; - - proc_exp = proc_create("ip_conntrack_expect", 0440, net->proc_net, - &ip_exp_file_ops); - if (!proc_exp) - goto err2; - - proc_stat = proc_create("ip_conntrack", S_IRUGO, - net->proc_net_stat, &ct_cpu_seq_fops); - if (!proc_stat) - goto err3; - return 0; - -err3: - remove_proc_entry("ip_conntrack_expect", net->proc_net); -err2: - remove_proc_entry("ip_conntrack", net->proc_net); -err1: - return -ENOMEM; -} - -static void __net_exit ip_conntrack_net_exit(struct net *net) -{ - remove_proc_entry("ip_conntrack", net->proc_net_stat); - remove_proc_entry("ip_conntrack_expect", net->proc_net); - remove_proc_entry("ip_conntrack", net->proc_net); -} - -static struct pernet_operations ip_conntrack_net_ops = { - .init = ip_conntrack_net_init, - .exit = ip_conntrack_net_exit, -}; - -int __init nf_conntrack_ipv4_compat_init(void) -{ - return register_pernet_subsys(&ip_conntrack_net_ops); -} - -void __exit nf_conntrack_ipv4_compat_fini(void) -{ - unregister_pernet_subsys(&ip_conntrack_net_ops); -} diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index c567e1b5d799..4b5904bc2614 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -327,17 +327,6 @@ static struct ctl_table icmp_sysctl_table[] = { }, { } }; -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT -static struct ctl_table icmp_compat_sysctl_table[] = { - { - .procname = "ip_conntrack_icmp_timeout", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn, @@ -355,40 +344,14 @@ static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn, return 0; } -static int icmp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn, - struct nf_icmp_net *in) -{ -#ifdef CONFIG_SYSCTL -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - pn->ctl_compat_table = kmemdup(icmp_compat_sysctl_table, - sizeof(icmp_compat_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_compat_table) - return -ENOMEM; - - pn->ctl_compat_table[0].data = &in->timeout; -#endif -#endif - return 0; -} - static int icmp_init_net(struct net *net, u_int16_t proto) { - int ret; struct nf_icmp_net *in = icmp_pernet(net); struct nf_proto_net *pn = &in->pn; in->timeout = nf_ct_icmp_timeout; - ret = icmp_kmemdup_compat_sysctl_table(pn, in); - if (ret < 0) - return ret; - - ret = icmp_kmemdup_sysctl_table(pn, in); - if (ret < 0) - nf_ct_kfree_compat_sysctl_table(pn); - - return ret; + return icmp_kmemdup_sysctl_table(pn, in); } static struct nf_proto_net *icmp_get_net_proto(struct net *net) diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c index ceb187308120..cf986e1c7bbd 100644 --- a/net/ipv4/netfilter/nf_dup_ipv4.c +++ b/net/ipv4/netfilter/nf_dup_ipv4.c @@ -74,21 +74,19 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, nf_conntrack_get(skb->nfct); #endif /* - * If we are in PREROUTING/INPUT, the checksum must be recalculated - * since the length could have changed as a result of defragmentation. - * - * We also decrease the TTL to mitigate potential loops between two - * hosts. + * If we are in PREROUTING/INPUT, decrease the TTL to mitigate potential + * loops between two hosts. * * Set %IP_DF so that the original source is notified of a potentially * decreased MTU on the clone route. IPv6 does this too. + * + * IP header checksum will be recalculated at ip_local_out. */ iph = ip_hdr(skb); iph->frag_off |= htons(IP_DF); if (hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_LOCAL_IN) --iph->ttl; - ip_send_check(iph); if (nf_dup_ipv4_route(net, skb, gw, oif)) { __this_cpu_write(nf_skb_duplicated, true); diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c index e7ad950cf9ef..8945c2653814 100644 --- a/net/ipv4/netfilter/nf_log_arp.c +++ b/net/ipv4/netfilter/nf_log_arp.c @@ -62,7 +62,7 @@ static void dump_arp_packet(struct nf_log_buf *m, /* If it's for Ethernet and the lengths are OK, then log the ARP * payload. */ - if (ah->ar_hrd != htons(1) || + if (ah->ar_hrd != htons(ARPHRD_ETHER) || ah->ar_hln != ETH_ALEN || ah->ar_pln != sizeof(__be32)) return; @@ -111,8 +111,7 @@ static struct nf_logger nf_arp_logger __read_mostly = { static int __net_init nf_log_arp_net_init(struct net *net) { - nf_log_set(net, NFPROTO_ARP, &nf_arp_logger); - return 0; + return nf_log_set(net, NFPROTO_ARP, &nf_arp_logger); } static void __net_exit nf_log_arp_net_exit(struct net *net) diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c index 076aadda0473..20f225593a8b 100644 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -347,8 +347,7 @@ static struct nf_logger nf_ip_logger __read_mostly = { static int __net_init nf_log_ipv4_net_init(struct net *net) { - nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger); - return 0; + return nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger); } static void __net_exit nf_log_ipv4_net_exit(struct net *net) diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index 8dd869642f45..c1bcf699a23d 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -379,8 +379,7 @@ static struct nf_logger nf_ip6_logger __read_mostly = { static int __net_init nf_log_ipv6_net_init(struct net *net) { - nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger); - return 0; + return nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger); } static void __net_exit nf_log_ipv6_net_exit(struct net *net) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 9266ceebd112..e8d56d9a4df2 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -474,6 +474,12 @@ config NFT_META This option adds the "meta" expression that you can use to match and to set packet metainformation such as the packet mark. +config NFT_NUMGEN + tristate "Netfilter nf_tables number generator module" + help + This option adds the number generator expression used to perform + incremental counting and random numbers bound to a upper limit. + config NFT_CT depends on NF_CONNTRACK tristate "Netfilter nf_tables conntrack module" @@ -481,13 +487,13 @@ config NFT_CT This option adds the "meta" expression that you can use to match connection tracking information such as the flow state. -config NFT_RBTREE +config NFT_SET_RBTREE tristate "Netfilter nf_tables rbtree set module" help This option adds the "rbtree" set type (Red Black tree) that is used to build interval-based sets. -config NFT_HASH +config NFT_SET_HASH tristate "Netfilter nf_tables hash set module" help This option adds the "hash" set type that is used to build one-way @@ -542,6 +548,12 @@ config NFT_QUEUE This is required if you intend to use the userspace queueing infrastructure (also known as NFQUEUE) from nftables. +config NFT_QUOTA + tristate "Netfilter nf_tables quota module" + help + This option adds the "quota" expression that you can use to match + enforce bytes quotas. + config NFT_REJECT default m if NETFILTER_ADVANCED=n tristate "Netfilter nf_tables reject support" @@ -563,6 +575,12 @@ config NFT_COMPAT x_tables match/target extensions over the nf_tables framework. +config NFT_HASH + tristate "Netfilter nf_tables hash module" + help + This option adds the "hash" expression that you can use to perform + a hash operation on registers. + if NF_TABLES_NETDEV config NF_DUP_NETDEV diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 69134541d65b..0c8581100ac6 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -80,18 +80,21 @@ obj-$(CONFIG_NF_TABLES_NETDEV) += nf_tables_netdev.o obj-$(CONFIG_NFT_COMPAT) += nft_compat.o obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o obj-$(CONFIG_NFT_META) += nft_meta.o +obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o obj-$(CONFIG_NFT_CT) += nft_ct.o obj-$(CONFIG_NFT_LIMIT) += nft_limit.o obj-$(CONFIG_NFT_NAT) += nft_nat.o obj-$(CONFIG_NFT_QUEUE) += nft_queue.o +obj-$(CONFIG_NFT_QUOTA) += nft_quota.o obj-$(CONFIG_NFT_REJECT) += nft_reject.o obj-$(CONFIG_NFT_REJECT_INET) += nft_reject_inet.o -obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o -obj-$(CONFIG_NFT_HASH) += nft_hash.o +obj-$(CONFIG_NFT_SET_RBTREE) += nft_set_rbtree.o +obj-$(CONFIG_NFT_SET_HASH) += nft_set_hash.o obj-$(CONFIG_NFT_COUNTER) += nft_counter.o obj-$(CONFIG_NFT_LOG) += nft_log.o obj-$(CONFIG_NFT_MASQ) += nft_masq.o obj-$(CONFIG_NFT_REDIR) += nft_redir.o +obj-$(CONFIG_NFT_HASH) += nft_hash.o # nf_tables netdev obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c index f04fd8df210b..fc230d99aa3b 100644 --- a/net/netfilter/ipvs/ip_vs_nfct.c +++ b/net/netfilter/ipvs/ip_vs_nfct.c @@ -281,13 +281,10 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) h = nf_conntrack_find_get(cp->ipvs->net, &nf_ct_zone_dflt, &tuple); if (h) { ct = nf_ct_tuplehash_to_ctrack(h); - /* Show what happens instead of calling nf_ct_kill() */ - if (del_timer(&ct->timeout)) { - IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple=" + if (nf_ct_kill(ct)) { + IP_VS_DBG(7, "%s: ct=%p, deleted conntrack for tuple=" FMT_TUPLE "\n", __func__, ct, ARG_TUPLE(&tuple)); - if (ct->timeout.function) - ct->timeout.function(ct->timeout.data); } else { IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple=" FMT_TUPLE "\n", diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index dd2c43abf9e2..ac1db4019d5c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -72,12 +72,24 @@ EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock); struct hlist_nulls_head *nf_conntrack_hash __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_hash); +struct conntrack_gc_work { + struct delayed_work dwork; + u32 last_bucket; + bool exiting; +}; + static __read_mostly struct kmem_cache *nf_conntrack_cachep; static __read_mostly spinlock_t nf_conntrack_locks_all_lock; -static __read_mostly seqcount_t nf_conntrack_generation; static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static __read_mostly bool nf_conntrack_locks_all; +#define GC_MAX_BUCKETS_DIV 64u +#define GC_MAX_BUCKETS 8192u +#define GC_INTERVAL (5 * HZ) +#define GC_MAX_EVICTS 256u + +static struct conntrack_gc_work conntrack_gc_work; + void nf_conntrack_lock(spinlock_t *lock) __acquires(lock) { spin_lock(lock); @@ -164,7 +176,7 @@ unsigned int nf_conntrack_htable_size __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); unsigned int nf_conntrack_max __read_mostly; -EXPORT_SYMBOL_GPL(nf_conntrack_max); +seqcount_t nf_conntrack_generation __read_mostly; DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked); EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); @@ -372,7 +384,6 @@ destroy_conntrack(struct nf_conntrack *nfct) pr_debug("destroy_conntrack(%p)\n", ct); NF_CT_ASSERT(atomic_read(&nfct->use) == 0); - NF_CT_ASSERT(!timer_pending(&ct->timeout)); if (unlikely(nf_ct_is_template(ct))) { nf_ct_tmpl_free(ct); @@ -435,35 +446,30 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) { struct nf_conn_tstamp *tstamp; + if (test_and_set_bit(IPS_DYING_BIT, &ct->status)) + return false; + tstamp = nf_conn_tstamp_find(ct); if (tstamp && tstamp->stop == 0) tstamp->stop = ktime_get_real_ns(); - if (nf_ct_is_dying(ct)) - goto delete; - if (nf_conntrack_event_report(IPCT_DESTROY, ct, portid, report) < 0) { - /* destroy event was not delivered */ + /* destroy event was not delivered. nf_ct_put will + * be done by event cache worker on redelivery. + */ nf_ct_delete_from_lists(ct); nf_conntrack_ecache_delayed_work(nf_ct_net(ct)); return false; } nf_conntrack_ecache_work(nf_ct_net(ct)); - set_bit(IPS_DYING_BIT, &ct->status); - delete: nf_ct_delete_from_lists(ct); nf_ct_put(ct); return true; } EXPORT_SYMBOL_GPL(nf_ct_delete); -static void death_by_timeout(unsigned long ul_conntrack) -{ - nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0); -} - static inline bool nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, const struct nf_conntrack_tuple *tuple, @@ -481,22 +487,17 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, net_eq(net, nf_ct_net(ct)); } -/* must be called with rcu read lock held */ -void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize) +/* caller must hold rcu readlock and none of the nf_conntrack_locks */ +static void nf_ct_gc_expired(struct nf_conn *ct) { - struct hlist_nulls_head *hptr; - unsigned int sequence, hsz; + if (!atomic_inc_not_zero(&ct->ct_general.use)) + return; - do { - sequence = read_seqcount_begin(&nf_conntrack_generation); - hsz = nf_conntrack_htable_size; - hptr = nf_conntrack_hash; - } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); + if (nf_ct_should_gc(ct)) + nf_ct_kill(ct); - *hash = hptr; - *hsize = hsz; + nf_ct_put(ct); } -EXPORT_SYMBOL_GPL(nf_conntrack_get_ht); /* * Warning : @@ -510,16 +511,24 @@ ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone, struct nf_conntrack_tuple_hash *h; struct hlist_nulls_head *ct_hash; struct hlist_nulls_node *n; - unsigned int bucket, sequence; + unsigned int bucket, hsize; begin: - do { - sequence = read_seqcount_begin(&nf_conntrack_generation); - bucket = scale_hash(hash); - ct_hash = nf_conntrack_hash; - } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); + nf_conntrack_get_ht(&ct_hash, &hsize); + bucket = reciprocal_scale(hash, hsize); hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) { + struct nf_conn *ct; + + ct = nf_ct_tuplehash_to_ctrack(h); + if (nf_ct_is_expired(ct)) { + nf_ct_gc_expired(ct); + continue; + } + + if (nf_ct_is_dying(ct)) + continue; + if (nf_ct_key_equal(h, tuple, zone, net)) { NF_CT_STAT_INC_ATOMIC(net, found); return h; @@ -618,7 +627,6 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) zone, net)) goto out; - add_timer(&ct->timeout); smp_wmb(); /* The caller holds a reference to this object */ atomic_set(&ct->ct_general.use, 2); @@ -771,8 +779,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) /* Timer relative to confirmation time, not original setting time, otherwise we'd get timer wrap in weird delay cases. */ - ct->timeout.expires += jiffies; - add_timer(&ct->timeout); + ct->timeout += nfct_time_stamp; atomic_inc(&ct->ct_general.use); ct->status |= IPS_CONFIRMED; @@ -823,29 +830,41 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_zone *zone; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_head *ct_hash; - unsigned int hash, sequence; + unsigned int hash, hsize; struct hlist_nulls_node *n; struct nf_conn *ct; zone = nf_ct_zone(ignored_conntrack); rcu_read_lock(); - do { - sequence = read_seqcount_begin(&nf_conntrack_generation); - hash = hash_conntrack(net, tuple); - ct_hash = nf_conntrack_hash; - } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); + begin: + nf_conntrack_get_ht(&ct_hash, &hsize); + hash = __hash_conntrack(net, tuple, hsize); hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) { ct = nf_ct_tuplehash_to_ctrack(h); - if (ct != ignored_conntrack && - nf_ct_key_equal(h, tuple, zone, net)) { + + if (ct == ignored_conntrack) + continue; + + if (nf_ct_is_expired(ct)) { + nf_ct_gc_expired(ct); + continue; + } + + if (nf_ct_key_equal(h, tuple, zone, net)) { NF_CT_STAT_INC_ATOMIC(net, found); rcu_read_unlock(); return 1; } NF_CT_STAT_INC_ATOMIC(net, searched); } + + if (get_nulls_value(n) != hash) { + NF_CT_STAT_INC_ATOMIC(net, search_restart); + goto begin; + } + rcu_read_unlock(); return 0; @@ -867,6 +886,11 @@ static unsigned int early_drop_list(struct net *net, hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) { tmp = nf_ct_tuplehash_to_ctrack(h); + if (nf_ct_is_expired(tmp)) { + nf_ct_gc_expired(tmp); + continue; + } + if (test_bit(IPS_ASSURED_BIT, &tmp->status) || !net_eq(nf_ct_net(tmp), net) || nf_ct_is_dying(tmp)) @@ -884,7 +908,6 @@ static unsigned int early_drop_list(struct net *net, */ if (net_eq(nf_ct_net(tmp), net) && nf_ct_is_confirmed(tmp) && - del_timer(&tmp->timeout) && nf_ct_delete(tmp, 0, 0)) drops++; @@ -900,14 +923,11 @@ static noinline int early_drop(struct net *net, unsigned int _hash) for (i = 0; i < NF_CT_EVICTION_RANGE; i++) { struct hlist_nulls_head *ct_hash; - unsigned hash, sequence, drops; + unsigned int hash, hsize, drops; rcu_read_lock(); - do { - sequence = read_seqcount_begin(&nf_conntrack_generation); - hash = scale_hash(_hash++); - ct_hash = nf_conntrack_hash; - } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); + nf_conntrack_get_ht(&ct_hash, &hsize); + hash = reciprocal_scale(_hash++, hsize); drops = early_drop_list(net, &ct_hash[hash]); rcu_read_unlock(); @@ -921,6 +941,69 @@ static noinline int early_drop(struct net *net, unsigned int _hash) return false; } +static void gc_worker(struct work_struct *work) +{ + unsigned int i, goal, buckets = 0, expired_count = 0; + unsigned long next_run = GC_INTERVAL; + unsigned int ratio, scanned = 0; + struct conntrack_gc_work *gc_work; + + gc_work = container_of(work, struct conntrack_gc_work, dwork.work); + + goal = min(nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV, GC_MAX_BUCKETS); + i = gc_work->last_bucket; + + do { + struct nf_conntrack_tuple_hash *h; + struct hlist_nulls_head *ct_hash; + struct hlist_nulls_node *n; + unsigned int hashsz; + struct nf_conn *tmp; + + i++; + rcu_read_lock(); + + nf_conntrack_get_ht(&ct_hash, &hashsz); + if (i >= hashsz) + i = 0; + + hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) { + tmp = nf_ct_tuplehash_to_ctrack(h); + + scanned++; + if (nf_ct_is_expired(tmp)) { + nf_ct_gc_expired(tmp); + expired_count++; + continue; + } + } + + /* could check get_nulls_value() here and restart if ct + * was moved to another chain. But given gc is best-effort + * we will just continue with next hash slot. + */ + rcu_read_unlock(); + cond_resched_rcu_qs(); + } while (++buckets < goal && + expired_count < GC_MAX_EVICTS); + + if (gc_work->exiting) + return; + + ratio = scanned ? expired_count * 100 / scanned : 0; + if (ratio >= 90) + next_run = 0; + + gc_work->last_bucket = i; + schedule_delayed_work(&gc_work->dwork, next_run); +} + +static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) +{ + INIT_DELAYED_WORK(&gc_work->dwork, gc_worker); + gc_work->exiting = false; +} + static struct nf_conn * __nf_conntrack_alloc(struct net *net, const struct nf_conntrack_zone *zone, @@ -957,8 +1040,6 @@ __nf_conntrack_alloc(struct net *net, /* save hash for reusing when confirming */ *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash; ct->status = 0; - /* Don't set timer yet: wait for confirmation */ - setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct); write_pnet(&ct->ct_net, net); memset(&ct->__nfct_init_offset[0], 0, offsetof(struct nf_conn, proto) - @@ -1332,7 +1413,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, unsigned long extra_jiffies, int do_acct) { - NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct); NF_CT_ASSERT(skb); /* Only update if this is not a fixed timeout */ @@ -1340,39 +1420,25 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, goto acct; /* If not in hash table, timer will not be active yet */ - if (!nf_ct_is_confirmed(ct)) { - ct->timeout.expires = extra_jiffies; - } else { - unsigned long newtime = jiffies + extra_jiffies; - - /* Only update the timeout if the new timeout is at least - HZ jiffies from the old timeout. Need del_timer for race - avoidance (may already be dying). */ - if (newtime - ct->timeout.expires >= HZ) - mod_timer_pending(&ct->timeout, newtime); - } + if (nf_ct_is_confirmed(ct)) + extra_jiffies += nfct_time_stamp; + ct->timeout = extra_jiffies; acct: if (do_acct) nf_ct_acct_update(ct, ctinfo, skb->len); } EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); -bool __nf_ct_kill_acct(struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - const struct sk_buff *skb, - int do_acct) +bool nf_ct_kill_acct(struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + const struct sk_buff *skb) { - if (do_acct) - nf_ct_acct_update(ct, ctinfo, skb->len); + nf_ct_acct_update(ct, ctinfo, skb->len); - if (del_timer(&ct->timeout)) { - ct->timeout.function((unsigned long)ct); - return true; - } - return false; + return nf_ct_delete(ct, 0, 0); } -EXPORT_SYMBOL_GPL(__nf_ct_kill_acct); +EXPORT_SYMBOL_GPL(nf_ct_kill_acct); #if IS_ENABLED(CONFIG_NF_CT_NETLINK) @@ -1505,11 +1571,8 @@ void nf_ct_iterate_cleanup(struct net *net, while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) { /* Time to push up daises... */ - if (del_timer(&ct->timeout)) - nf_ct_delete(ct, portid, report); - - /* ... else the timer will get him soon. */ + nf_ct_delete(ct, portid, report); nf_ct_put(ct); cond_resched(); } @@ -1545,6 +1608,7 @@ static int untrack_refs(void) void nf_conntrack_cleanup_start(void) { + conntrack_gc_work.exiting = true; RCU_INIT_POINTER(ip_ct_attach, NULL); } @@ -1554,6 +1618,7 @@ void nf_conntrack_cleanup_end(void) while (untrack_refs() > 0) schedule(); + cancel_delayed_work_sync(&conntrack_gc_work.dwork); nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size); nf_conntrack_proto_fini(); @@ -1828,6 +1893,10 @@ int nf_conntrack_init_start(void) } /* - and look it like as a confirmed connection */ nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED); + + conntrack_gc_work_init(&conntrack_gc_work); + schedule_delayed_work(&conntrack_gc_work.dwork, GC_INTERVAL); + return 0; err_proto: diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index d28011b42845..da9df2d56e66 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -49,8 +49,13 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu) hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) { struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + struct nf_conntrack_ecache *e; - if (nf_ct_is_dying(ct)) + if (!nf_ct_is_confirmed(ct)) + continue; + + e = nf_ct_ecache_find(ct); + if (!e || e->state != NFCT_ECACHE_DESTROY_FAIL) continue; if (nf_conntrack_event(IPCT_DESTROY, ct)) { @@ -58,8 +63,7 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu) break; } - /* we've got the event delivered, now it's dying */ - set_bit(IPS_DYING_BIT, &ct->status); + e->state = NFCT_ECACHE_DESTROY_SENT; refs[evicted] = ct; if (++evicted >= ARRAY_SIZE(refs)) { @@ -130,7 +134,7 @@ int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, if (!e) goto out_unlock; - if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) { + if (nf_ct_is_confirmed(ct)) { struct nf_ct_event item = { .ct = ct, .portid = e->portid ? e->portid : portid, @@ -150,11 +154,13 @@ int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, * triggered by a process, we store the PORTID * to include it in the retransmission. */ - if (eventmask & (1 << IPCT_DESTROY) && - e->portid == 0 && portid != 0) - e->portid = portid; - else + if (eventmask & (1 << IPCT_DESTROY)) { + if (e->portid == 0 && portid != 0) + e->portid = portid; + e->state = NFCT_ECACHE_DESTROY_FAIL; + } else { e->missed |= eventmask; + } } else { e->missed &= ~missed; } diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 43147005bea3..b6934b5edf7a 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -237,7 +237,7 @@ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd, } delim = data[0]; if (isdigit(delim) || delim < 33 || delim > 126 || data[2] != delim) { - pr_debug("try_eprt: invalid delimitter.\n"); + pr_debug("try_eprt: invalid delimiter.\n"); return 0; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index fdfc71f416b7..c052b712c49f 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -149,10 +149,7 @@ nla_put_failure: static int ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct) { - long timeout = ((long)ct->timeout.expires - (long)jiffies) / HZ; - - if (timeout < 0) - timeout = 0; + long timeout = nf_ct_expires(ct) / HZ; if (nla_put_be32(skb, CTA_TIMEOUT, htonl(timeout))) goto nla_put_failure; @@ -818,14 +815,23 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) struct hlist_nulls_node *n; struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); u_int8_t l3proto = nfmsg->nfgen_family; - int res; + struct nf_conn *nf_ct_evict[8]; + int res, i; spinlock_t *lockp; last = (struct nf_conn *)cb->args[1]; + i = 0; local_bh_disable(); for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) { restart: + while (i) { + i--; + if (nf_ct_should_gc(nf_ct_evict[i])) + nf_ct_kill(nf_ct_evict[i]); + nf_ct_put(nf_ct_evict[i]); + } + lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS]; nf_conntrack_lock(lockp); if (cb->args[0] >= nf_conntrack_htable_size) { @@ -837,6 +843,13 @@ restart: if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; ct = nf_ct_tuplehash_to_ctrack(h); + if (nf_ct_is_expired(ct)) { + if (i < ARRAY_SIZE(nf_ct_evict) && + atomic_inc_not_zero(&ct->ct_general.use)) + nf_ct_evict[i++] = ct; + continue; + } + if (!net_eq(net, nf_ct_net(ct))) continue; @@ -878,6 +891,13 @@ out: if (last) nf_ct_put(last); + while (i) { + i--; + if (nf_ct_should_gc(nf_ct_evict[i])) + nf_ct_kill(nf_ct_evict[i]); + nf_ct_put(nf_ct_evict[i]); + } + return skb->len; } @@ -1147,9 +1167,7 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, } } - if (del_timer(&ct->timeout)) - nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh)); - + nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh)); nf_ct_put(ct); return 0; @@ -1517,11 +1535,10 @@ static int ctnetlink_change_timeout(struct nf_conn *ct, { u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT])); - if (!del_timer(&ct->timeout)) - return -ETIME; + ct->timeout = nfct_time_stamp + timeout * HZ; - ct->timeout.expires = jiffies + timeout * HZ; - add_timer(&ct->timeout); + if (test_bit(IPS_DYING_BIT, &ct->status)) + return -ETIME; return 0; } @@ -1719,9 +1736,8 @@ ctnetlink_create_conntrack(struct net *net, if (!cda[CTA_TIMEOUT]) goto err1; - ct->timeout.expires = ntohl(nla_get_be32(cda[CTA_TIMEOUT])); - ct->timeout.expires = jiffies + ct->timeout.expires * HZ; + ct->timeout = nfct_time_stamp + ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ; rcu_read_lock(); if (cda[CTA_HELP]) { diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 5588c7ae1ac2..f60a4755d71e 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -157,8 +157,7 @@ static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct, pr_debug("setting timeout of conntrack %p to 0\n", sibling); sibling->proto.gre.timeout = 0; sibling->proto.gre.stream_timeout = 0; - if (del_timer(&sibling->timeout)) - sibling->timeout.function((unsigned long)sibling); + nf_ct_kill(sibling); nf_ct_put(sibling); return 1; } else { diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index b65d5864b6d9..8d2c7d8c666a 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -159,54 +159,6 @@ static int kill_l4proto(struct nf_conn *i, void *data) nf_ct_l3num(i) == l4proto->l3proto; } -static struct nf_ip_net *nf_ct_l3proto_net(struct net *net, - struct nf_conntrack_l3proto *l3proto) -{ - if (l3proto->l3proto == PF_INET) - return &net->ct.nf_ct_proto; - else - return NULL; -} - -static int nf_ct_l3proto_register_sysctl(struct net *net, - struct nf_conntrack_l3proto *l3proto) -{ - int err = 0; - struct nf_ip_net *in = nf_ct_l3proto_net(net, l3proto); - /* nf_conntrack_l3proto_ipv6 doesn't support sysctl */ - if (in == NULL) - return 0; - -#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - if (in->ctl_table != NULL) { - err = nf_ct_register_sysctl(net, - &in->ctl_table_header, - l3proto->ctl_table_path, - in->ctl_table); - if (err < 0) { - kfree(in->ctl_table); - in->ctl_table = NULL; - } - } -#endif - return err; -} - -static void nf_ct_l3proto_unregister_sysctl(struct net *net, - struct nf_conntrack_l3proto *l3proto) -{ - struct nf_ip_net *in = nf_ct_l3proto_net(net, l3proto); - - if (in == NULL) - return; -#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - if (in->ctl_table_header != NULL) - nf_ct_unregister_sysctl(&in->ctl_table_header, - &in->ctl_table, - 0); -#endif -} - int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto) { int ret = 0; @@ -241,7 +193,7 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_register); int nf_ct_l3proto_pernet_register(struct net *net, struct nf_conntrack_l3proto *proto) { - int ret = 0; + int ret; if (proto->init_net) { ret = proto->init_net(net); @@ -249,7 +201,7 @@ int nf_ct_l3proto_pernet_register(struct net *net, return ret; } - return nf_ct_l3proto_register_sysctl(net, proto); + return 0; } EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register); @@ -272,8 +224,6 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister); void nf_ct_l3proto_pernet_unregister(struct net *net, struct nf_conntrack_l3proto *proto) { - nf_ct_l3proto_unregister_sysctl(net, proto); - /* Remove all contrack entries for this protocol */ nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0); } @@ -312,26 +262,6 @@ int nf_ct_l4proto_register_sysctl(struct net *net, } } } -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_table != NULL) { - if (err < 0) { - nf_ct_kfree_compat_sysctl_table(pn); - goto out; - } - err = nf_ct_register_sysctl(net, - &pn->ctl_compat_header, - "net/ipv4/netfilter", - pn->ctl_compat_table); - if (err == 0) - goto out; - - nf_ct_kfree_compat_sysctl_table(pn); - nf_ct_unregister_sysctl(&pn->ctl_table_header, - &pn->ctl_table, - pn->users); - } -out: -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ return err; } @@ -346,13 +276,6 @@ void nf_ct_l4proto_unregister_sysctl(struct net *net, nf_ct_unregister_sysctl(&pn->ctl_table_header, &pn->ctl_table, pn->users); - -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_header != NULL) - nf_ct_unregister_sysctl(&pn->ctl_compat_header, - &pn->ctl_compat_table, - 0); -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ } diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 399a38fd685a..a45bee52dccc 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -402,7 +402,8 @@ static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, { struct dccp_hdr _hdr, *dh; - dh = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); + /* Actually only need first 4 bytes to get ports. */ + dh = skb_header_pointer(skb, dataoff, 4, &_hdr); if (dh == NULL) return false; diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 86dc752e5349..d5868bad33a7 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -151,17 +151,6 @@ static struct ctl_table generic_sysctl_table[] = { }, { } }; -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT -static struct ctl_table generic_compat_sysctl_table[] = { - { - .procname = "ip_conntrack_generic_timeout", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn, @@ -179,40 +168,14 @@ static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn, return 0; } -static int generic_kmemdup_compat_sysctl_table(struct nf_proto_net *pn, - struct nf_generic_net *gn) -{ -#ifdef CONFIG_SYSCTL -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - pn->ctl_compat_table = kmemdup(generic_compat_sysctl_table, - sizeof(generic_compat_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_compat_table) - return -ENOMEM; - - pn->ctl_compat_table[0].data = &gn->timeout; -#endif -#endif - return 0; -} - static int generic_init_net(struct net *net, u_int16_t proto) { - int ret; struct nf_generic_net *gn = generic_pernet(net); struct nf_proto_net *pn = &gn->pn; gn->timeout = nf_ct_generic_timeout; - ret = generic_kmemdup_compat_sysctl_table(pn, gn); - if (ret < 0) - return ret; - - ret = generic_kmemdup_sysctl_table(pn, gn); - if (ret < 0) - nf_ct_kfree_compat_sysctl_table(pn); - - return ret; + return generic_kmemdup_sysctl_table(pn, gn); } static struct nf_proto_net *generic_get_net_proto(struct net *net) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 1d7ab960a9e6..982ea62606c7 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -161,8 +161,8 @@ static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, const struct sctphdr *hp; struct sctphdr _hdr; - /* Actually only need first 8 bytes. */ - hp = skb_header_pointer(skb, dataoff, 8, &_hdr); + /* Actually only need first 4 bytes to get ports. */ + hp = skb_header_pointer(skb, dataoff, 4, &_hdr); if (hp == NULL) return false; @@ -705,54 +705,6 @@ static struct ctl_table sctp_sysctl_table[] = { }, { } }; - -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT -static struct ctl_table sctp_compat_sysctl_table[] = { - { - .procname = "ip_conntrack_sctp_timeout_closed", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_sctp_timeout_cookie_wait", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_sctp_timeout_cookie_echoed", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_sctp_timeout_established", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_sctp_timeout_shutdown_sent", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_sctp_timeout_shutdown_recd", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn, @@ -781,32 +733,8 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn, return 0; } -static int sctp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn, - struct sctp_net *sn) -{ -#ifdef CONFIG_SYSCTL -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - pn->ctl_compat_table = kmemdup(sctp_compat_sysctl_table, - sizeof(sctp_compat_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_compat_table) - return -ENOMEM; - - pn->ctl_compat_table[0].data = &sn->timeouts[SCTP_CONNTRACK_CLOSED]; - pn->ctl_compat_table[1].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_WAIT]; - pn->ctl_compat_table[2].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_ECHOED]; - pn->ctl_compat_table[3].data = &sn->timeouts[SCTP_CONNTRACK_ESTABLISHED]; - pn->ctl_compat_table[4].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT]; - pn->ctl_compat_table[5].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD]; - pn->ctl_compat_table[6].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT]; -#endif -#endif - return 0; -} - static int sctp_init_net(struct net *net, u_int16_t proto) { - int ret; struct sctp_net *sn = sctp_pernet(net); struct nf_proto_net *pn = &sn->pn; @@ -817,18 +745,7 @@ static int sctp_init_net(struct net *net, u_int16_t proto) sn->timeouts[i] = sctp_timeouts[i]; } - if (proto == AF_INET) { - ret = sctp_kmemdup_compat_sysctl_table(pn, sn); - if (ret < 0) - return ret; - - ret = sctp_kmemdup_sysctl_table(pn, sn); - if (ret < 0) - nf_ct_kfree_compat_sysctl_table(pn); - } else - ret = sctp_kmemdup_sysctl_table(pn, sn); - - return ret; + return sctp_kmemdup_sysctl_table(pn, sn); } static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 70c8381641a7..69f687740c76 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -282,8 +282,8 @@ static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, const struct tcphdr *hp; struct tcphdr _hdr; - /* Actually only need first 8 bytes. */ - hp = skb_header_pointer(skb, dataoff, 8, &_hdr); + /* Actually only need first 4 bytes to get ports. */ + hp = skb_header_pointer(skb, dataoff, 4, &_hdr); if (hp == NULL) return false; @@ -1481,90 +1481,6 @@ static struct ctl_table tcp_sysctl_table[] = { }, { } }; - -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT -static struct ctl_table tcp_compat_sysctl_table[] = { - { - .procname = "ip_conntrack_tcp_timeout_syn_sent", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_syn_sent2", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_syn_recv", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_established", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_fin_wait", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_close_wait", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_last_ack", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_time_wait", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_close", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_max_retrans", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_loose", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "ip_conntrack_tcp_be_liberal", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "ip_conntrack_tcp_max_retrans", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { } -}; -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn, @@ -1597,38 +1513,8 @@ static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn, return 0; } -static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn, - struct nf_tcp_net *tn) -{ -#ifdef CONFIG_SYSCTL -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table, - sizeof(tcp_compat_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_compat_table) - return -ENOMEM; - - pn->ctl_compat_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT]; - pn->ctl_compat_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT2]; - pn->ctl_compat_table[2].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV]; - pn->ctl_compat_table[3].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED]; - pn->ctl_compat_table[4].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT]; - pn->ctl_compat_table[5].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT]; - pn->ctl_compat_table[6].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK]; - pn->ctl_compat_table[7].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT]; - pn->ctl_compat_table[8].data = &tn->timeouts[TCP_CONNTRACK_CLOSE]; - pn->ctl_compat_table[9].data = &tn->timeouts[TCP_CONNTRACK_RETRANS]; - pn->ctl_compat_table[10].data = &tn->tcp_loose; - pn->ctl_compat_table[11].data = &tn->tcp_be_liberal; - pn->ctl_compat_table[12].data = &tn->tcp_max_retrans; -#endif -#endif - return 0; -} - static int tcp_init_net(struct net *net, u_int16_t proto) { - int ret; struct nf_tcp_net *tn = tcp_pernet(net); struct nf_proto_net *pn = &tn->pn; @@ -1643,18 +1529,7 @@ static int tcp_init_net(struct net *net, u_int16_t proto) tn->tcp_max_retrans = nf_ct_tcp_max_retrans; } - if (proto == AF_INET) { - ret = tcp_kmemdup_compat_sysctl_table(pn, tn); - if (ret < 0) - return ret; - - ret = tcp_kmemdup_sysctl_table(pn, tn); - if (ret < 0) - nf_ct_kfree_compat_sysctl_table(pn); - } else - ret = tcp_kmemdup_sysctl_table(pn, tn); - - return ret; + return tcp_kmemdup_sysctl_table(pn, tn); } static struct nf_proto_net *tcp_get_net_proto(struct net *net) diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 4fd040575ffe..20f35ed68030 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -44,8 +44,8 @@ static bool udp_pkt_to_tuple(const struct sk_buff *skb, const struct udphdr *hp; struct udphdr _hdr; - /* Actually only need first 8 bytes. */ - hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); + /* Actually only need first 4 bytes to get ports. */ + hp = skb_header_pointer(skb, dataoff, 4, &_hdr); if (hp == NULL) return false; @@ -218,23 +218,6 @@ static struct ctl_table udp_sysctl_table[] = { }, { } }; -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT -static struct ctl_table udp_compat_sysctl_table[] = { - { - .procname = "ip_conntrack_udp_timeout", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_udp_timeout_stream", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn, @@ -254,27 +237,8 @@ static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn, return 0; } -static int udp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn, - struct nf_udp_net *un) -{ -#ifdef CONFIG_SYSCTL -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - pn->ctl_compat_table = kmemdup(udp_compat_sysctl_table, - sizeof(udp_compat_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_compat_table) - return -ENOMEM; - - pn->ctl_compat_table[0].data = &un->timeouts[UDP_CT_UNREPLIED]; - pn->ctl_compat_table[1].data = &un->timeouts[UDP_CT_REPLIED]; -#endif -#endif - return 0; -} - static int udp_init_net(struct net *net, u_int16_t proto) { - int ret; struct nf_udp_net *un = udp_pernet(net); struct nf_proto_net *pn = &un->pn; @@ -285,18 +249,7 @@ static int udp_init_net(struct net *net, u_int16_t proto) un->timeouts[i] = udp_timeouts[i]; } - if (proto == AF_INET) { - ret = udp_kmemdup_compat_sysctl_table(pn, un); - if (ret < 0) - return ret; - - ret = udp_kmemdup_sysctl_table(pn, un); - if (ret < 0) - nf_ct_kfree_compat_sysctl_table(pn); - } else - ret = udp_kmemdup_sysctl_table(pn, un); - - return ret; + return udp_kmemdup_sysctl_table(pn, un); } static struct nf_proto_net *udp_get_net_proto(struct net *net) diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 9d692f5adb94..029206e8dec4 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -54,7 +54,8 @@ static bool udplite_pkt_to_tuple(const struct sk_buff *skb, const struct udphdr *hp; struct udphdr _hdr; - hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); + /* Actually only need first 4 bytes to get ports. */ + hp = skb_header_pointer(skb, dataoff, 4, &_hdr); if (hp == NULL) return false; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 9f267c3ffb39..3d9a316a3c77 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -228,8 +228,7 @@ static int ct_seq_show(struct seq_file *s, void *v) seq_printf(s, "%-8s %u %-8s %u %ld ", l3proto->name, nf_ct_l3num(ct), l4proto->name, nf_ct_protonum(ct), - timer_pending(&ct->timeout) - ? (long)(ct->timeout.expires - jiffies)/HZ : 0); + nf_ct_expires(ct) / HZ); if (l4proto->print_conntrack) l4proto->print_conntrack(s, ct); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index aa5847a16713..30a17d649a83 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -39,12 +39,12 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger) return NULL; } -void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger) +int nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger) { const struct nf_logger *log; - if (pf == NFPROTO_UNSPEC) - return; + if (pf == NFPROTO_UNSPEC || pf >= ARRAY_SIZE(net->nf.nf_loggers)) + return -EOPNOTSUPP; mutex_lock(&nf_log_mutex); log = nft_log_dereference(net->nf.nf_loggers[pf]); @@ -52,6 +52,8 @@ void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger) rcu_assign_pointer(net->nf.nf_loggers[pf], logger); mutex_unlock(&nf_log_mutex); + + return 0; } EXPORT_SYMBOL(nf_log_set); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index de31818417b8..81ae41f85d3a 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -565,16 +565,10 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data) * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack() * will delete entry from already-freed table. */ - if (!del_timer(&ct->timeout)) - return 1; - ct->status &= ~IPS_NAT_DONE_MASK; - rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource, nf_nat_bysource_params); - add_timer(&ct->timeout); - /* don't delete conntrack. Although that would make things a lot * simpler, we'd end up flushing all conntracks on nat rmmod. */ diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7e1c876c7608..bd9715e5ff26 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1196,6 +1196,83 @@ static void nf_tables_chain_destroy(struct nft_chain *chain) } } +struct nft_chain_hook { + u32 num; + u32 priority; + const struct nf_chain_type *type; + struct net_device *dev; +}; + +static int nft_chain_parse_hook(struct net *net, + const struct nlattr * const nla[], + struct nft_af_info *afi, + struct nft_chain_hook *hook, bool create) +{ + struct nlattr *ha[NFTA_HOOK_MAX + 1]; + const struct nf_chain_type *type; + struct net_device *dev; + int err; + + err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK], + nft_hook_policy); + if (err < 0) + return err; + + if (ha[NFTA_HOOK_HOOKNUM] == NULL || + ha[NFTA_HOOK_PRIORITY] == NULL) + return -EINVAL; + + hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); + if (hook->num >= afi->nhooks) + return -EINVAL; + + hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); + + type = chain_type[afi->family][NFT_CHAIN_T_DEFAULT]; + if (nla[NFTA_CHAIN_TYPE]) { + type = nf_tables_chain_type_lookup(afi, nla[NFTA_CHAIN_TYPE], + create); + if (IS_ERR(type)) + return PTR_ERR(type); + } + if (!(type->hook_mask & (1 << hook->num))) + return -EOPNOTSUPP; + if (!try_module_get(type->owner)) + return -ENOENT; + + hook->type = type; + + hook->dev = NULL; + if (afi->flags & NFT_AF_NEEDS_DEV) { + char ifname[IFNAMSIZ]; + + if (!ha[NFTA_HOOK_DEV]) { + module_put(type->owner); + return -EOPNOTSUPP; + } + + nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ); + dev = dev_get_by_name(net, ifname); + if (!dev) { + module_put(type->owner); + return -ENOENT; + } + hook->dev = dev; + } else if (ha[NFTA_HOOK_DEV]) { + module_put(type->owner); + return -EOPNOTSUPP; + } + + return 0; +} + +static void nft_chain_release_hook(struct nft_chain_hook *hook) +{ + module_put(hook->type->owner); + if (hook->dev != NULL) + dev_put(hook->dev); +} + static int nf_tables_newchain(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -1206,10 +1283,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, struct nft_table *table; struct nft_chain *chain; struct nft_base_chain *basechain = NULL; - struct nlattr *ha[NFTA_HOOK_MAX + 1]; u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; - struct net_device *dev = NULL; u8 policy = NF_ACCEPT; u64 handle = 0; unsigned int i; @@ -1273,6 +1348,37 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, if (nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; + if (nla[NFTA_CHAIN_HOOK]) { + struct nft_base_chain *basechain; + struct nft_chain_hook hook; + struct nf_hook_ops *ops; + + if (!(chain->flags & NFT_BASE_CHAIN)) + return -EBUSY; + + err = nft_chain_parse_hook(net, nla, afi, &hook, + create); + if (err < 0) + return err; + + basechain = nft_base_chain(chain); + if (basechain->type != hook.type) { + nft_chain_release_hook(&hook); + return -EBUSY; + } + + for (i = 0; i < afi->nops; i++) { + ops = &basechain->ops[i]; + if (ops->hooknum != hook.num || + ops->priority != hook.priority || + ops->dev != hook.dev) { + nft_chain_release_hook(&hook); + return -EBUSY; + } + } + nft_chain_release_hook(&hook); + } + if (nla[NFTA_CHAIN_HANDLE] && name) { struct nft_chain *chain2; @@ -1320,102 +1426,53 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, return -EOVERFLOW; if (nla[NFTA_CHAIN_HOOK]) { - const struct nf_chain_type *type; + struct nft_chain_hook hook; struct nf_hook_ops *ops; nf_hookfn *hookfn; - u32 hooknum, priority; - - type = chain_type[family][NFT_CHAIN_T_DEFAULT]; - if (nla[NFTA_CHAIN_TYPE]) { - type = nf_tables_chain_type_lookup(afi, - nla[NFTA_CHAIN_TYPE], - create); - if (IS_ERR(type)) - return PTR_ERR(type); - } - err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK], - nft_hook_policy); + err = nft_chain_parse_hook(net, nla, afi, &hook, create); if (err < 0) return err; - if (ha[NFTA_HOOK_HOOKNUM] == NULL || - ha[NFTA_HOOK_PRIORITY] == NULL) - return -EINVAL; - - hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); - if (hooknum >= afi->nhooks) - return -EINVAL; - priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); - - if (!(type->hook_mask & (1 << hooknum))) - return -EOPNOTSUPP; - if (!try_module_get(type->owner)) - return -ENOENT; - hookfn = type->hooks[hooknum]; - - if (afi->flags & NFT_AF_NEEDS_DEV) { - char ifname[IFNAMSIZ]; - - if (!ha[NFTA_HOOK_DEV]) { - module_put(type->owner); - return -EOPNOTSUPP; - } - - nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ); - dev = dev_get_by_name(net, ifname); - if (!dev) { - module_put(type->owner); - return -ENOENT; - } - } else if (ha[NFTA_HOOK_DEV]) { - module_put(type->owner); - return -EOPNOTSUPP; - } basechain = kzalloc(sizeof(*basechain), GFP_KERNEL); if (basechain == NULL) { - module_put(type->owner); - if (dev != NULL) - dev_put(dev); + nft_chain_release_hook(&hook); return -ENOMEM; } - if (dev != NULL) - strncpy(basechain->dev_name, dev->name, IFNAMSIZ); + if (hook.dev != NULL) + strncpy(basechain->dev_name, hook.dev->name, IFNAMSIZ); if (nla[NFTA_CHAIN_COUNTERS]) { stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]); if (IS_ERR(stats)) { - module_put(type->owner); + nft_chain_release_hook(&hook); kfree(basechain); - if (dev != NULL) - dev_put(dev); return PTR_ERR(stats); } basechain->stats = stats; } else { stats = netdev_alloc_pcpu_stats(struct nft_stats); if (stats == NULL) { - module_put(type->owner); + nft_chain_release_hook(&hook); kfree(basechain); - if (dev != NULL) - dev_put(dev); return -ENOMEM; } rcu_assign_pointer(basechain->stats, stats); } - basechain->type = type; + hookfn = hook.type->hooks[hook.num]; + basechain->type = hook.type; chain = &basechain->chain; for (i = 0; i < afi->nops; i++) { ops = &basechain->ops[i]; ops->pf = family; - ops->hooknum = hooknum; - ops->priority = priority; + ops->hooknum = hook.num; + ops->priority = hook.priority; ops->priv = chain; ops->hook = afi->hooks[ops->hooknum]; - ops->dev = dev; + ops->dev = hook.dev; if (hookfn) ops->hook = hookfn; if (afi->hook_ops_init) @@ -3426,12 +3483,12 @@ static int nft_setelem_parse_flags(const struct nft_set *set, } static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, - const struct nlattr *attr) + const struct nlattr *attr, u32 nlmsg_flags) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; struct nft_data_desc d1, d2; struct nft_set_ext_tmpl tmpl; - struct nft_set_ext *ext; + struct nft_set_ext *ext, *ext2; struct nft_set_elem elem; struct nft_set_binding *binding; struct nft_userdata *udata; @@ -3558,9 +3615,19 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, goto err4; ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK; - err = set->ops->insert(ctx->net, set, &elem); - if (err < 0) + err = set->ops->insert(ctx->net, set, &elem, &ext2); + if (err) { + if (err == -EEXIST) { + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && + nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) && + memcmp(nft_set_ext_data(ext), + nft_set_ext_data(ext2), set->dlen) != 0) + err = -EBUSY; + else if (!(nlmsg_flags & NLM_F_EXCL)) + err = 0; + } goto err5; + } nft_trans_elem(trans) = elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); @@ -3616,7 +3683,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact)) return -ENFILE; - err = nft_add_set_elem(&ctx, set, attr); + err = nft_add_set_elem(&ctx, set, attr, nlh->nlmsg_flags); if (err < 0) { atomic_dec(&set->nelems); break; diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 564fa7929ed5..764251d31e46 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -1,395 +1,136 @@ /* - * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2016 Laura Garcia <nevola@gmail.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * - * Development of this code funded by Astaro AG (http://www.astaro.com/) */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> -#include <linux/list.h> -#include <linux/log2.h> -#include <linux/jhash.h> #include <linux/netlink.h> -#include <linux/workqueue.h> -#include <linux/rhashtable.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> - -/* We target a hash table size of 4, element hint is 75% of final size */ -#define NFT_HASH_ELEMENT_HINT 3 +#include <net/netfilter/nf_tables_core.h> +#include <linux/jhash.h> struct nft_hash { - struct rhashtable ht; - struct delayed_work gc_work; -}; - -struct nft_hash_elem { - struct rhash_head node; - struct nft_set_ext ext; + enum nft_registers sreg:8; + enum nft_registers dreg:8; + u8 len; + u32 modulus; + u32 seed; }; -struct nft_hash_cmp_arg { - const struct nft_set *set; - const u32 *key; - u8 genmask; -}; - -static const struct rhashtable_params nft_hash_params; - -static inline u32 nft_hash_key(const void *data, u32 len, u32 seed) -{ - const struct nft_hash_cmp_arg *arg = data; - - return jhash(arg->key, len, seed); -} - -static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed) -{ - const struct nft_hash_elem *he = data; - - return jhash(nft_set_ext_key(&he->ext), len, seed); -} - -static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg, - const void *ptr) -{ - const struct nft_hash_cmp_arg *x = arg->key; - const struct nft_hash_elem *he = ptr; - - if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen)) - return 1; - if (nft_set_elem_expired(&he->ext)) - return 1; - if (!nft_set_elem_active(&he->ext, x->genmask)) - return 1; - return 0; -} - -static bool nft_hash_lookup(const struct net *net, const struct nft_set *set, - const u32 *key, const struct nft_set_ext **ext) -{ - struct nft_hash *priv = nft_set_priv(set); - const struct nft_hash_elem *he; - struct nft_hash_cmp_arg arg = { - .genmask = nft_genmask_cur(net), - .set = set, - .key = key, - }; - - he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); - if (he != NULL) - *ext = &he->ext; - - return !!he; -} - -static bool nft_hash_update(struct nft_set *set, const u32 *key, - void *(*new)(struct nft_set *, - const struct nft_expr *, - struct nft_regs *regs), - const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_set_ext **ext) -{ - struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *he; - struct nft_hash_cmp_arg arg = { - .genmask = NFT_GENMASK_ANY, - .set = set, - .key = key, - }; - - he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); - if (he != NULL) - goto out; - - he = new(set, expr, regs); - if (he == NULL) - goto err1; - if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node, - nft_hash_params)) - goto err2; -out: - *ext = &he->ext; - return true; - -err2: - nft_set_elem_destroy(set, he); -err1: - return false; -} - -static int nft_hash_insert(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) -{ - struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *he = elem->priv; - struct nft_hash_cmp_arg arg = { - .genmask = nft_genmask_next(net), - .set = set, - .key = elem->key.val.data, - }; - - return rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node, - nft_hash_params); -} - -static void nft_hash_activate(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) +static void nft_hash_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { - struct nft_hash_elem *he = elem->priv; + struct nft_hash *priv = nft_expr_priv(expr); + const void *data = ®s->data[priv->sreg]; - nft_set_elem_change_active(net, set, &he->ext); - nft_set_elem_clear_busy(&he->ext); + regs->data[priv->dreg] = + reciprocal_scale(jhash(data, priv->len, priv->seed), + priv->modulus); } -static void *nft_hash_deactivate(const struct net *net, - const struct nft_set *set, - const struct nft_set_elem *elem) -{ - struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *he; - struct nft_hash_cmp_arg arg = { - .genmask = nft_genmask_next(net), - .set = set, - .key = elem->key.val.data, - }; - - rcu_read_lock(); - he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); - if (he != NULL) { - if (!nft_set_elem_mark_busy(&he->ext) || - !nft_is_active(net, &he->ext)) - nft_set_elem_change_active(net, set, &he->ext); - else - he = NULL; - } - rcu_read_unlock(); - - return he; -} - -static void nft_hash_remove(const struct nft_set *set, - const struct nft_set_elem *elem) -{ - struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *he = elem->priv; - - rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params); -} - -static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, - struct nft_set_iter *iter) -{ - struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *he; - struct rhashtable_iter hti; - struct nft_set_elem elem; - int err; - - err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL); - iter->err = err; - if (err) - return; - - err = rhashtable_walk_start(&hti); - if (err && err != -EAGAIN) { - iter->err = err; - goto out; - } - - while ((he = rhashtable_walk_next(&hti))) { - if (IS_ERR(he)) { - err = PTR_ERR(he); - if (err != -EAGAIN) { - iter->err = err; - goto out; - } - - continue; - } - - if (iter->count < iter->skip) - goto cont; - if (nft_set_elem_expired(&he->ext)) - goto cont; - if (!nft_set_elem_active(&he->ext, iter->genmask)) - goto cont; - - elem.priv = he; - - iter->err = iter->fn(ctx, set, iter, &elem); - if (iter->err < 0) - goto out; - -cont: - iter->count++; - } - -out: - rhashtable_walk_stop(&hti); - rhashtable_walk_exit(&hti); -} - -static void nft_hash_gc(struct work_struct *work) -{ - struct nft_set *set; - struct nft_hash_elem *he; - struct nft_hash *priv; - struct nft_set_gc_batch *gcb = NULL; - struct rhashtable_iter hti; - int err; - - priv = container_of(work, struct nft_hash, gc_work.work); - set = nft_set_container_of(priv); - - err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL); - if (err) - goto schedule; - - err = rhashtable_walk_start(&hti); - if (err && err != -EAGAIN) - goto out; - - while ((he = rhashtable_walk_next(&hti))) { - if (IS_ERR(he)) { - if (PTR_ERR(he) != -EAGAIN) - goto out; - continue; - } - - if (!nft_set_elem_expired(&he->ext)) - continue; - if (nft_set_elem_mark_busy(&he->ext)) - continue; - - gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); - if (gcb == NULL) - goto out; - rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params); - atomic_dec(&set->nelems); - nft_set_gc_batch_add(gcb, he); - } -out: - rhashtable_walk_stop(&hti); - rhashtable_walk_exit(&hti); - - nft_set_gc_batch_complete(gcb); -schedule: - queue_delayed_work(system_power_efficient_wq, &priv->gc_work, - nft_set_gc_interval(set)); -} - -static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) -{ - return sizeof(struct nft_hash); -} - -static const struct rhashtable_params nft_hash_params = { - .head_offset = offsetof(struct nft_hash_elem, node), - .hashfn = nft_hash_key, - .obj_hashfn = nft_hash_obj, - .obj_cmpfn = nft_hash_cmp, - .automatic_shrinking = true, +static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { + [NFTA_HASH_SREG] = { .type = NLA_U32 }, + [NFTA_HASH_DREG] = { .type = NLA_U32 }, + [NFTA_HASH_LEN] = { .type = NLA_U32 }, + [NFTA_HASH_MODULUS] = { .type = NLA_U32 }, + [NFTA_HASH_SEED] = { .type = NLA_U32 }, }; -static int nft_hash_init(const struct nft_set *set, - const struct nft_set_desc *desc, +static int nft_hash_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, const struct nlattr * const tb[]) { - struct nft_hash *priv = nft_set_priv(set); - struct rhashtable_params params = nft_hash_params; - int err; + struct nft_hash *priv = nft_expr_priv(expr); + u32 len; - params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT; - params.key_len = set->klen; + if (!tb[NFTA_HASH_SREG] || + !tb[NFTA_HASH_DREG] || + !tb[NFTA_HASH_LEN] || + !tb[NFTA_HASH_SEED] || + !tb[NFTA_HASH_MODULUS]) + return -EINVAL; - err = rhashtable_init(&priv->ht, ¶ms); - if (err < 0) - return err; + priv->sreg = nft_parse_register(tb[NFTA_HASH_SREG]); + priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]); - INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc); - if (set->flags & NFT_SET_TIMEOUT) - queue_delayed_work(system_power_efficient_wq, &priv->gc_work, - nft_set_gc_interval(set)); - return 0; -} + len = ntohl(nla_get_be32(tb[NFTA_HASH_LEN])); + if (len == 0 || len > U8_MAX) + return -ERANGE; -static void nft_hash_elem_destroy(void *ptr, void *arg) -{ - nft_set_elem_destroy((const struct nft_set *)arg, ptr); -} + priv->len = len; -static void nft_hash_destroy(const struct nft_set *set) -{ - struct nft_hash *priv = nft_set_priv(set); + priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS])); + if (priv->modulus <= 1) + return -ERANGE; - cancel_delayed_work_sync(&priv->gc_work); - rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy, - (void *)set); + priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED])); + + return nft_validate_register_load(priv->sreg, len) && + nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, sizeof(u32)); } -static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, - struct nft_set_estimate *est) +static int nft_hash_dump(struct sk_buff *skb, + const struct nft_expr *expr) { - unsigned int esize; + const struct nft_hash *priv = nft_expr_priv(expr); - esize = sizeof(struct nft_hash_elem); - if (desc->size) { - est->size = sizeof(struct nft_hash) + - roundup_pow_of_two(desc->size * 4 / 3) * - sizeof(struct nft_hash_elem *) + - desc->size * esize; - } else { - /* Resizing happens when the load drops below 30% or goes - * above 75%. The average of 52.5% load (approximated by 50%) - * is used for the size estimation of the hash buckets, - * meaning we calculate two buckets per element. - */ - est->size = esize + 2 * sizeof(struct nft_hash_elem *); - } + if (nft_dump_register(skb, NFTA_HASH_SREG, priv->sreg)) + goto nla_put_failure; + if (nft_dump_register(skb, NFTA_HASH_DREG, priv->dreg)) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HASH_LEN, htonl(priv->len))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HASH_MODULUS, htonl(priv->modulus))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HASH_SEED, htonl(priv->seed))) + goto nla_put_failure; - est->class = NFT_SET_CLASS_O_1; + return 0; - return true; +nla_put_failure: + return -1; } -static struct nft_set_ops nft_hash_ops __read_mostly = { - .privsize = nft_hash_privsize, - .elemsize = offsetof(struct nft_hash_elem, ext), - .estimate = nft_hash_estimate, +static struct nft_expr_type nft_hash_type; +static const struct nft_expr_ops nft_hash_ops = { + .type = &nft_hash_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_hash)), + .eval = nft_hash_eval, .init = nft_hash_init, - .destroy = nft_hash_destroy, - .insert = nft_hash_insert, - .activate = nft_hash_activate, - .deactivate = nft_hash_deactivate, - .remove = nft_hash_remove, - .lookup = nft_hash_lookup, - .update = nft_hash_update, - .walk = nft_hash_walk, - .features = NFT_SET_MAP | NFT_SET_TIMEOUT, + .dump = nft_hash_dump, +}; + +static struct nft_expr_type nft_hash_type __read_mostly = { + .name = "hash", + .ops = &nft_hash_ops, + .policy = nft_hash_policy, + .maxattr = NFTA_HASH_MAX, .owner = THIS_MODULE, }; static int __init nft_hash_module_init(void) { - return nft_register_set(&nft_hash_ops); + return nft_register_expr(&nft_hash_type); } static void __exit nft_hash_module_exit(void) { - nft_unregister_set(&nft_hash_ops); + nft_unregister_expr(&nft_hash_type); } module_init(nft_hash_module_init); module_exit(nft_hash_module_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_ALIAS_NFT_SET(); +MODULE_AUTHOR("Laura Garcia <nevola@gmail.com>"); +MODULE_ALIAS_NFT_EXPR("hash"); diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c new file mode 100644 index 000000000000..294745ecb0fc --- /dev/null +++ b/net/netfilter/nft_numgen.c @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2016 Laura Garcia <nevola@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <linux/static_key.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> + +static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state); + +struct nft_ng_inc { + enum nft_registers dreg:8; + u32 until; + atomic_t counter; +}; + +static void nft_ng_inc_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_ng_inc *priv = nft_expr_priv(expr); + u32 nval, oval; + + do { + oval = atomic_read(&priv->counter); + nval = (oval + 1 < priv->until) ? oval + 1 : 0; + } while (atomic_cmpxchg(&priv->counter, oval, nval) != oval); + + memcpy(®s->data[priv->dreg], &priv->counter, sizeof(u32)); +} + +static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = { + [NFTA_NG_DREG] = { .type = NLA_U32 }, + [NFTA_NG_UNTIL] = { .type = NLA_U32 }, + [NFTA_NG_TYPE] = { .type = NLA_U32 }, +}; + +static int nft_ng_inc_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_ng_inc *priv = nft_expr_priv(expr); + + priv->until = ntohl(nla_get_be32(tb[NFTA_NG_UNTIL])); + if (priv->until == 0) + return -ERANGE; + + priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]); + atomic_set(&priv->counter, 0); + + return nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, sizeof(u32)); +} + +static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg, + u32 until, enum nft_ng_types type) +{ + if (nft_dump_register(skb, NFTA_NG_DREG, dreg)) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_NG_UNTIL, htonl(until))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_NG_TYPE, htonl(type))) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_ng_inc *priv = nft_expr_priv(expr); + + return nft_ng_dump(skb, priv->dreg, priv->until, NFT_NG_INCREMENTAL); +} + +struct nft_ng_random { + enum nft_registers dreg:8; + u32 until; +}; + +static void nft_ng_random_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_ng_random *priv = nft_expr_priv(expr); + struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state); + + regs->data[priv->dreg] = reciprocal_scale(prandom_u32_state(state), + priv->until); +} + +static int nft_ng_random_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_ng_random *priv = nft_expr_priv(expr); + + priv->until = ntohl(nla_get_be32(tb[NFTA_NG_UNTIL])); + if (priv->until == 0) + return -ERANGE; + + prandom_init_once(&nft_numgen_prandom_state); + + priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]); + + return nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, sizeof(u32)); +} + +static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_ng_random *priv = nft_expr_priv(expr); + + return nft_ng_dump(skb, priv->dreg, priv->until, NFT_NG_RANDOM); +} + +static struct nft_expr_type nft_ng_type; +static const struct nft_expr_ops nft_ng_inc_ops = { + .type = &nft_ng_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_inc)), + .eval = nft_ng_inc_eval, + .init = nft_ng_inc_init, + .dump = nft_ng_inc_dump, +}; + +static const struct nft_expr_ops nft_ng_random_ops = { + .type = &nft_ng_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_random)), + .eval = nft_ng_random_eval, + .init = nft_ng_random_init, + .dump = nft_ng_random_dump, +}; + +static const struct nft_expr_ops * +nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) +{ + u32 type; + + if (!tb[NFTA_NG_DREG] || + !tb[NFTA_NG_UNTIL] || + !tb[NFTA_NG_TYPE]) + return ERR_PTR(-EINVAL); + + type = ntohl(nla_get_be32(tb[NFTA_NG_TYPE])); + + switch (type) { + case NFT_NG_INCREMENTAL: + return &nft_ng_inc_ops; + case NFT_NG_RANDOM: + return &nft_ng_random_ops; + } + + return ERR_PTR(-EINVAL); +} + +static struct nft_expr_type nft_ng_type __read_mostly = { + .name = "numgen", + .select_ops = &nft_ng_select_ops, + .policy = nft_ng_policy, + .maxattr = NFTA_NG_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_ng_module_init(void) +{ + return nft_register_expr(&nft_ng_type); +} + +static void __exit nft_ng_module_exit(void) +{ + nft_unregister_expr(&nft_ng_type); +} + +module_init(nft_ng_module_init); +module_exit(nft_ng_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Laura Garcia <nevola@gmail.com>"); +MODULE_ALIAS_NFT_EXPR("numgen"); diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c new file mode 100644 index 000000000000..6eafbf987ed9 --- /dev/null +++ b/net/netfilter/nft_quota.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/atomic.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> + +struct nft_quota { + u64 quota; + bool invert; + atomic64_t remain; +}; + +static inline long nft_quota(struct nft_quota *priv, + const struct nft_pktinfo *pkt) +{ + return atomic64_sub_return(pkt->skb->len, &priv->remain); +} + +static void nft_quota_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_quota *priv = nft_expr_priv(expr); + + if (nft_quota(priv, pkt) < 0 && !priv->invert) + regs->verdict.code = NFT_BREAK; +} + +static const struct nla_policy nft_quota_policy[NFTA_QUOTA_MAX + 1] = { + [NFTA_QUOTA_BYTES] = { .type = NLA_U64 }, + [NFTA_QUOTA_FLAGS] = { .type = NLA_U32 }, +}; + +static int nft_quota_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_quota *priv = nft_expr_priv(expr); + u32 flags = 0; + u64 quota; + + if (!tb[NFTA_QUOTA_BYTES]) + return -EINVAL; + + quota = be64_to_cpu(nla_get_be64(tb[NFTA_QUOTA_BYTES])); + if (quota > S64_MAX) + return -EOVERFLOW; + + if (tb[NFTA_QUOTA_FLAGS]) { + flags = ntohl(nla_get_be32(tb[NFTA_QUOTA_FLAGS])); + if (flags & ~NFT_QUOTA_F_INV) + return -EINVAL; + } + + priv->quota = quota; + priv->invert = (flags & NFT_QUOTA_F_INV) ? true : false; + atomic64_set(&priv->remain, quota); + + return 0; +} + +static int nft_quota_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_quota *priv = nft_expr_priv(expr); + u32 flags = priv->invert ? NFT_QUOTA_F_INV : 0; + + if (nla_put_be64(skb, NFTA_QUOTA_BYTES, cpu_to_be64(priv->quota), + NFTA_QUOTA_PAD) || + nla_put_be32(skb, NFTA_QUOTA_FLAGS, htonl(flags))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_quota_type; +static const struct nft_expr_ops nft_quota_ops = { + .type = &nft_quota_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_quota)), + .eval = nft_quota_eval, + .init = nft_quota_init, + .dump = nft_quota_dump, +}; + +static struct nft_expr_type nft_quota_type __read_mostly = { + .name = "quota", + .ops = &nft_quota_ops, + .policy = nft_quota_policy, + .maxattr = NFTA_QUOTA_MAX, + .flags = NFT_EXPR_STATEFUL, + .owner = THIS_MODULE, +}; + +static int __init nft_quota_module_init(void) +{ + return nft_register_expr(&nft_quota_type); +} + +static void __exit nft_quota_module_exit(void) +{ + nft_unregister_expr(&nft_quota_type); +} + +module_init(nft_quota_module_init); +module_exit(nft_quota_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_ALIAS_NFT_EXPR("quota"); diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c new file mode 100644 index 000000000000..3794cb2fc788 --- /dev/null +++ b/net/netfilter/nft_set_hash.c @@ -0,0 +1,404 @@ +/* + * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/list.h> +#include <linux/log2.h> +#include <linux/jhash.h> +#include <linux/netlink.h> +#include <linux/workqueue.h> +#include <linux/rhashtable.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> + +/* We target a hash table size of 4, element hint is 75% of final size */ +#define NFT_HASH_ELEMENT_HINT 3 + +struct nft_hash { + struct rhashtable ht; + struct delayed_work gc_work; +}; + +struct nft_hash_elem { + struct rhash_head node; + struct nft_set_ext ext; +}; + +struct nft_hash_cmp_arg { + const struct nft_set *set; + const u32 *key; + u8 genmask; +}; + +static const struct rhashtable_params nft_hash_params; + +static inline u32 nft_hash_key(const void *data, u32 len, u32 seed) +{ + const struct nft_hash_cmp_arg *arg = data; + + return jhash(arg->key, len, seed); +} + +static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed) +{ + const struct nft_hash_elem *he = data; + + return jhash(nft_set_ext_key(&he->ext), len, seed); +} + +static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct nft_hash_cmp_arg *x = arg->key; + const struct nft_hash_elem *he = ptr; + + if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen)) + return 1; + if (nft_set_elem_expired(&he->ext)) + return 1; + if (!nft_set_elem_active(&he->ext, x->genmask)) + return 1; + return 0; +} + +static bool nft_hash_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext) +{ + struct nft_hash *priv = nft_set_priv(set); + const struct nft_hash_elem *he; + struct nft_hash_cmp_arg arg = { + .genmask = nft_genmask_cur(net), + .set = set, + .key = key, + }; + + he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); + if (he != NULL) + *ext = &he->ext; + + return !!he; +} + +static bool nft_hash_update(struct nft_set *set, const u32 *key, + void *(*new)(struct nft_set *, + const struct nft_expr *, + struct nft_regs *regs), + const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_set_ext **ext) +{ + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he; + struct nft_hash_cmp_arg arg = { + .genmask = NFT_GENMASK_ANY, + .set = set, + .key = key, + }; + + he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); + if (he != NULL) + goto out; + + he = new(set, expr, regs); + if (he == NULL) + goto err1; + if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node, + nft_hash_params)) + goto err2; +out: + *ext = &he->ext; + return true; + +err2: + nft_set_elem_destroy(set, he); +err1: + return false; +} + +static int nft_hash_insert(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, + struct nft_set_ext **ext) +{ + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he = elem->priv; + struct nft_hash_cmp_arg arg = { + .genmask = nft_genmask_next(net), + .set = set, + .key = elem->key.val.data, + }; + struct nft_hash_elem *prev; + + prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node, + nft_hash_params); + if (IS_ERR(prev)) + return PTR_ERR(prev); + if (prev) { + *ext = &prev->ext; + return -EEXIST; + } + return 0; +} + +static void nft_hash_activate(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_hash_elem *he = elem->priv; + + nft_set_elem_change_active(net, set, &he->ext); + nft_set_elem_clear_busy(&he->ext); +} + +static void *nft_hash_deactivate(const struct net *net, + const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he; + struct nft_hash_cmp_arg arg = { + .genmask = nft_genmask_next(net), + .set = set, + .key = elem->key.val.data, + }; + + rcu_read_lock(); + he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); + if (he != NULL) { + if (!nft_set_elem_mark_busy(&he->ext) || + !nft_is_active(net, &he->ext)) + nft_set_elem_change_active(net, set, &he->ext); + else + he = NULL; + } + rcu_read_unlock(); + + return he; +} + +static void nft_hash_remove(const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he = elem->priv; + + rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params); +} + +static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, + struct nft_set_iter *iter) +{ + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he; + struct rhashtable_iter hti; + struct nft_set_elem elem; + int err; + + err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL); + iter->err = err; + if (err) + return; + + err = rhashtable_walk_start(&hti); + if (err && err != -EAGAIN) { + iter->err = err; + goto out; + } + + while ((he = rhashtable_walk_next(&hti))) { + if (IS_ERR(he)) { + err = PTR_ERR(he); + if (err != -EAGAIN) { + iter->err = err; + goto out; + } + + continue; + } + + if (iter->count < iter->skip) + goto cont; + if (nft_set_elem_expired(&he->ext)) + goto cont; + if (!nft_set_elem_active(&he->ext, iter->genmask)) + goto cont; + + elem.priv = he; + + iter->err = iter->fn(ctx, set, iter, &elem); + if (iter->err < 0) + goto out; + +cont: + iter->count++; + } + +out: + rhashtable_walk_stop(&hti); + rhashtable_walk_exit(&hti); +} + +static void nft_hash_gc(struct work_struct *work) +{ + struct nft_set *set; + struct nft_hash_elem *he; + struct nft_hash *priv; + struct nft_set_gc_batch *gcb = NULL; + struct rhashtable_iter hti; + int err; + + priv = container_of(work, struct nft_hash, gc_work.work); + set = nft_set_container_of(priv); + + err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL); + if (err) + goto schedule; + + err = rhashtable_walk_start(&hti); + if (err && err != -EAGAIN) + goto out; + + while ((he = rhashtable_walk_next(&hti))) { + if (IS_ERR(he)) { + if (PTR_ERR(he) != -EAGAIN) + goto out; + continue; + } + + if (!nft_set_elem_expired(&he->ext)) + continue; + if (nft_set_elem_mark_busy(&he->ext)) + continue; + + gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); + if (gcb == NULL) + goto out; + rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params); + atomic_dec(&set->nelems); + nft_set_gc_batch_add(gcb, he); + } +out: + rhashtable_walk_stop(&hti); + rhashtable_walk_exit(&hti); + + nft_set_gc_batch_complete(gcb); +schedule: + queue_delayed_work(system_power_efficient_wq, &priv->gc_work, + nft_set_gc_interval(set)); +} + +static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) +{ + return sizeof(struct nft_hash); +} + +static const struct rhashtable_params nft_hash_params = { + .head_offset = offsetof(struct nft_hash_elem, node), + .hashfn = nft_hash_key, + .obj_hashfn = nft_hash_obj, + .obj_cmpfn = nft_hash_cmp, + .automatic_shrinking = true, +}; + +static int nft_hash_init(const struct nft_set *set, + const struct nft_set_desc *desc, + const struct nlattr * const tb[]) +{ + struct nft_hash *priv = nft_set_priv(set); + struct rhashtable_params params = nft_hash_params; + int err; + + params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT; + params.key_len = set->klen; + + err = rhashtable_init(&priv->ht, ¶ms); + if (err < 0) + return err; + + INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc); + if (set->flags & NFT_SET_TIMEOUT) + queue_delayed_work(system_power_efficient_wq, &priv->gc_work, + nft_set_gc_interval(set)); + return 0; +} + +static void nft_hash_elem_destroy(void *ptr, void *arg) +{ + nft_set_elem_destroy((const struct nft_set *)arg, ptr); +} + +static void nft_hash_destroy(const struct nft_set *set) +{ + struct nft_hash *priv = nft_set_priv(set); + + cancel_delayed_work_sync(&priv->gc_work); + rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy, + (void *)set); +} + +static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, + struct nft_set_estimate *est) +{ + unsigned int esize; + + esize = sizeof(struct nft_hash_elem); + if (desc->size) { + est->size = sizeof(struct nft_hash) + + roundup_pow_of_two(desc->size * 4 / 3) * + sizeof(struct nft_hash_elem *) + + desc->size * esize; + } else { + /* Resizing happens when the load drops below 30% or goes + * above 75%. The average of 52.5% load (approximated by 50%) + * is used for the size estimation of the hash buckets, + * meaning we calculate two buckets per element. + */ + est->size = esize + 2 * sizeof(struct nft_hash_elem *); + } + + est->class = NFT_SET_CLASS_O_1; + + return true; +} + +static struct nft_set_ops nft_hash_ops __read_mostly = { + .privsize = nft_hash_privsize, + .elemsize = offsetof(struct nft_hash_elem, ext), + .estimate = nft_hash_estimate, + .init = nft_hash_init, + .destroy = nft_hash_destroy, + .insert = nft_hash_insert, + .activate = nft_hash_activate, + .deactivate = nft_hash_deactivate, + .remove = nft_hash_remove, + .lookup = nft_hash_lookup, + .update = nft_hash_update, + .walk = nft_hash_walk, + .features = NFT_SET_MAP | NFT_SET_TIMEOUT, + .owner = THIS_MODULE, +}; + +static int __init nft_hash_module_init(void) +{ + return nft_register_set(&nft_hash_ops); +} + +static void __exit nft_hash_module_exit(void) +{ + nft_unregister_set(&nft_hash_ops); +} + +module_init(nft_hash_module_init); +module_exit(nft_hash_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_SET(); diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_set_rbtree.c index ffe9ae062d23..38b5bda242f8 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -96,7 +96,8 @@ out: } static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, - struct nft_rbtree_elem *new) + struct nft_rbtree_elem *new, + struct nft_set_ext **ext) { struct nft_rbtree *priv = nft_set_priv(set); u8 genmask = nft_genmask_next(net); @@ -124,8 +125,10 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, else if (!nft_rbtree_interval_end(rbe) && nft_rbtree_interval_end(new)) p = &parent->rb_right; - else + else { + *ext = &rbe->ext; return -EEXIST; + } } } } @@ -135,13 +138,14 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, } static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + const struct nft_set_elem *elem, + struct nft_set_ext **ext) { struct nft_rbtree_elem *rbe = elem->priv; int err; spin_lock_bh(&nft_rbtree_lock); - err = __nft_rbtree_insert(net, set, rbe); + err = __nft_rbtree_insert(net, set, rbe, ext); spin_unlock_bh(&nft_rbtree_lock); return err; diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 188404b9b002..a3b8f697cfc5 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -233,10 +233,8 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, return false; if (info->match_flags & XT_CONNTRACK_EXPIRES) { - unsigned long expires = 0; + unsigned long expires = nf_ct_expires(ct) / HZ; - if (timer_pending(&ct->timeout)) - expires = (ct->timeout.expires - jiffies) / HZ; if ((expires >= info->expires_min && expires <= info->expires_max) ^ !(info->invert_flags & XT_CONNTRACK_EXPIRES)) diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index e5f18988aee0..bb33598e4530 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -107,8 +107,8 @@ static int physdev_mt_check(const struct xt_mtchk_param *par) info->invert & XT_PHYSDEV_OP_BRIDGED) && par->hook_mask & ((1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) { - pr_info("using --physdev-out and --physdev-is-out are only" - "supported in the FORWARD and POSTROUTING chains with" + pr_info("using --physdev-out and --physdev-is-out are only " + "supported in the FORWARD and POSTROUTING chains with " "bridged traffic.\n"); if (par->hook_mask & (1 << NF_INET_LOCAL_OUT)) return -EINVAL; diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index e054a748ff25..31045ef44a82 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1367,7 +1367,7 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info) if (ct_info->helper) module_put(ct_info->helper->me); if (ct_info->ct) - nf_ct_put(ct_info->ct); + nf_ct_tmpl_free(ct_info->ct); } void ovs_ct_init(struct net *net) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index ae469b37d852..753f774cb46f 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -269,18 +269,19 @@ void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked) * * RCU is locked, no other locks set */ -void tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, - struct tipc_msg *hdr) +int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, + struct tipc_msg *hdr) { struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; struct sk_buff_head xmitq; + int rc = 0; __skb_queue_head_init(&xmitq); tipc_bcast_lock(net); if (msg_type(hdr) == STATE_MSG) { tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), &xmitq); - tipc_link_bc_sync_rcv(l, hdr, &xmitq); + rc = tipc_link_bc_sync_rcv(l, hdr, &xmitq); } else { tipc_link_bc_init_rcv(l, hdr); } @@ -291,6 +292,7 @@ void tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, /* Any socket wakeup messages ? */ if (!skb_queue_empty(inputq)) tipc_sk_rcv(net, inputq); + return rc; } /* tipc_bcast_add_peer - add a peer node to broadcast link and bearer diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index d5e79b3767fd..5ffe34472ccd 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -56,8 +56,8 @@ int tipc_bcast_get_mtu(struct net *net); int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list); int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb); void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked); -void tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, - struct tipc_msg *hdr); +int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, + struct tipc_msg *hdr); int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg); int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]); int tipc_bclink_reset_stats(struct net *net); diff --git a/net/tipc/link.c b/net/tipc/link.c index 2c6e1b9e024b..b36e16cdc945 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -181,7 +181,10 @@ struct tipc_link { u16 acked; struct tipc_link *bc_rcvlink; struct tipc_link *bc_sndlink; - int nack_state; + unsigned long prev_retr; + u16 prev_from; + u16 prev_to; + u8 nack_state; bool bc_peer_is_up; /* Statistics */ @@ -202,6 +205,8 @@ enum { BC_NACK_SND_SUPPRESS, }; +#define TIPC_BC_RETR_LIMIT 10 /* [ms] */ + /* * Interval between NACKs when packets arrive out of order */ @@ -237,8 +242,8 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, u16 rcvgap, int tolerance, int priority, struct sk_buff_head *xmitq); static void link_print(struct tipc_link *l, const char *str); -static void tipc_link_build_nack_msg(struct tipc_link *l, - struct sk_buff_head *xmitq); +static int tipc_link_build_nack_msg(struct tipc_link *l, + struct sk_buff_head *xmitq); static void tipc_link_build_bc_init_msg(struct tipc_link *l, struct sk_buff_head *xmitq); static bool tipc_link_release_pkts(struct tipc_link *l, u16 to); @@ -367,6 +372,18 @@ int tipc_link_bc_peers(struct tipc_link *l) return l->ackers; } +u16 link_bc_rcv_gap(struct tipc_link *l) +{ + struct sk_buff *skb = skb_peek(&l->deferdq); + u16 gap = 0; + + if (more(l->snd_nxt, l->rcv_nxt)) + gap = l->snd_nxt - l->rcv_nxt; + if (skb) + gap = buf_seqno(skb) - l->rcv_nxt; + return gap; +} + void tipc_link_set_mtu(struct tipc_link *l, int mtu) { l->mtu = mtu; @@ -1135,7 +1152,10 @@ int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq) if (((l->rcv_nxt ^ tipc_own_addr(l->net)) & 0xf) != 0xf) return 0; l->rcv_unacked = 0; - return TIPC_LINK_SND_BC_ACK; + + /* Use snd_nxt to store peer's snd_nxt in broadcast rcv link */ + l->snd_nxt = l->rcv_nxt; + return TIPC_LINK_SND_STATE; } /* Unicast ACK */ @@ -1164,17 +1184,26 @@ void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq) } /* tipc_link_build_nack_msg: prepare link nack message for transmission + * Note that sending of broadcast NACK is coordinated among nodes, to + * reduce the risk of NACK storms towards the sender */ -static void tipc_link_build_nack_msg(struct tipc_link *l, - struct sk_buff_head *xmitq) +static int tipc_link_build_nack_msg(struct tipc_link *l, + struct sk_buff_head *xmitq) { u32 def_cnt = ++l->stats.deferred_recv; + int match1, match2; - if (link_is_bc_rcvlink(l)) - return; + if (link_is_bc_rcvlink(l)) { + match1 = def_cnt & 0xf; + match2 = tipc_own_addr(l->net) & 0xf; + if (match1 == match2) + return TIPC_LINK_SND_STATE; + return 0; + } if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV)) tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq); + return 0; } /* tipc_link_rcv - process TIPC packets/messages arriving from off-node @@ -1225,7 +1254,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, /* Defer delivery if sequence gap */ if (unlikely(seqno != rcv_nxt)) { __tipc_skb_queue_sorted(defq, seqno, skb); - tipc_link_build_nack_msg(l, xmitq); + rc |= tipc_link_build_nack_msg(l, xmitq); break; } @@ -1236,7 +1265,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, rc |= tipc_link_input(l, skb, l->inputq); if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) rc |= tipc_link_build_state_msg(l, xmitq); - if (unlikely(rc & ~TIPC_LINK_SND_BC_ACK)) + if (unlikely(rc & ~TIPC_LINK_SND_STATE)) break; } while ((skb = __skb_dequeue(defq))); @@ -1250,10 +1279,11 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, u16 rcvgap, int tolerance, int priority, struct sk_buff_head *xmitq) { + struct tipc_link *bcl = l->bc_rcvlink; struct sk_buff *skb; struct tipc_msg *hdr; struct sk_buff_head *dfq = &l->deferdq; - bool node_up = link_is_up(l->bc_rcvlink); + bool node_up = link_is_up(bcl); struct tipc_mon_state *mstate = &l->mon_state; int dlen = 0; void *data; @@ -1281,7 +1311,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, msg_set_net_plane(hdr, l->net_plane); msg_set_next_sent(hdr, l->snd_nxt); msg_set_ack(hdr, l->rcv_nxt - 1); - msg_set_bcast_ack(hdr, l->bc_rcvlink->rcv_nxt - 1); + msg_set_bcast_ack(hdr, bcl->rcv_nxt - 1); msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1); msg_set_link_tolerance(hdr, tolerance); msg_set_linkprio(hdr, priority); @@ -1291,6 +1321,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, if (mtyp == STATE_MSG) { msg_set_seq_gap(hdr, rcvgap); + msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl)); msg_set_probe(hdr, probe); tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id); msg_set_size(hdr, INT_H_SIZE + dlen); @@ -1573,51 +1604,107 @@ void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr) l->rcv_nxt = peers_snd_nxt; } +/* link_bc_retr eval()- check if the indicated range can be retransmitted now + * - Adjust permitted range if there is overlap with previous retransmission + */ +static bool link_bc_retr_eval(struct tipc_link *l, u16 *from, u16 *to) +{ + unsigned long elapsed = jiffies_to_msecs(jiffies - l->prev_retr); + + if (less(*to, *from)) + return false; + + /* New retransmission request */ + if ((elapsed > TIPC_BC_RETR_LIMIT) || + less(*to, l->prev_from) || more(*from, l->prev_to)) { + l->prev_from = *from; + l->prev_to = *to; + l->prev_retr = jiffies; + return true; + } + + /* Inside range of previous retransmit */ + if (!less(*from, l->prev_from) && !more(*to, l->prev_to)) + return false; + + /* Fully or partially outside previous range => exclude overlap */ + if (less(*from, l->prev_from)) { + *to = l->prev_from - 1; + l->prev_from = *from; + } + if (more(*to, l->prev_to)) { + *from = l->prev_to + 1; + l->prev_to = *to; + } + l->prev_retr = jiffies; + return true; +} + /* tipc_link_bc_sync_rcv - update rcv link according to peer's send state */ -void tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, - struct sk_buff_head *xmitq) +int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, + struct sk_buff_head *xmitq) { + struct tipc_link *snd_l = l->bc_sndlink; u16 peers_snd_nxt = msg_bc_snd_nxt(hdr); + u16 from = msg_bcast_ack(hdr) + 1; + u16 to = from + msg_bc_gap(hdr) - 1; + int rc = 0; if (!link_is_up(l)) - return; + return rc; if (!msg_peer_node_is_up(hdr)) - return; + return rc; /* Open when peer ackowledges our bcast init msg (pkt #1) */ if (msg_ack(hdr)) l->bc_peer_is_up = true; if (!l->bc_peer_is_up) - return; + return rc; + + l->stats.recv_nacks++; /* Ignore if peers_snd_nxt goes beyond receive window */ if (more(peers_snd_nxt, l->rcv_nxt + l->window)) - return; + return rc; + + if (link_bc_retr_eval(snd_l, &from, &to)) + rc = tipc_link_retrans(snd_l, from, to, xmitq); + + l->snd_nxt = peers_snd_nxt; + if (link_bc_rcv_gap(l)) + rc |= TIPC_LINK_SND_STATE; + + /* Return now if sender supports nack via STATE messages */ + if (l->peer_caps & TIPC_BCAST_STATE_NACK) + return rc; + + /* Otherwise, be backwards compatible */ if (!more(peers_snd_nxt, l->rcv_nxt)) { l->nack_state = BC_NACK_SND_CONDITIONAL; - return; + return 0; } /* Don't NACK if one was recently sent or peeked */ if (l->nack_state == BC_NACK_SND_SUPPRESS) { l->nack_state = BC_NACK_SND_UNCONDITIONAL; - return; + return 0; } /* Conditionally delay NACK sending until next synch rcv */ if (l->nack_state == BC_NACK_SND_CONDITIONAL) { l->nack_state = BC_NACK_SND_UNCONDITIONAL; if ((peers_snd_nxt - l->rcv_nxt) < TIPC_MIN_LINK_WIN) - return; + return 0; } /* Send NACK now but suppress next one */ tipc_link_build_bc_proto_msg(l, true, peers_snd_nxt, xmitq); l->nack_state = BC_NACK_SND_SUPPRESS; + return 0; } void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, @@ -1654,6 +1741,8 @@ void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, } /* tipc_link_bc_nack_rcv(): receive broadcast nack message + * This function is here for backwards compatibility, since + * no BCAST_PROTOCOL/STATE messages occur from TIPC v2.5. */ int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq) diff --git a/net/tipc/link.h b/net/tipc/link.h index d7e9d42fcb2d..d1bd1787a768 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -63,7 +63,7 @@ enum { enum { TIPC_LINK_UP_EVT = 1, TIPC_LINK_DOWN_EVT = (1 << 1), - TIPC_LINK_SND_BC_ACK = (1 << 2) + TIPC_LINK_SND_STATE = (1 << 2) }; /* Starting value for maximum packet size negotiation on unicast links @@ -138,8 +138,8 @@ void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, void tipc_link_build_bc_sync_msg(struct tipc_link *l, struct sk_buff_head *xmitq); void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr); -void tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, - struct sk_buff_head *xmitq); +int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, + struct sk_buff_head *xmitq); int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq); #endif diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 7cf52fb39bee..c3832cdf2278 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -719,6 +719,16 @@ static inline char *msg_media_addr(struct tipc_msg *m) return (char *)&m->hdr[TIPC_MEDIA_INFO_OFFSET]; } +static inline u32 msg_bc_gap(struct tipc_msg *m) +{ + return msg_bits(m, 8, 0, 0x3ff); +} + +static inline void msg_set_bc_gap(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 8, 0, 0x3ff, n); +} + /* * Word 9 */ diff --git a/net/tipc/node.c b/net/tipc/node.c index 7e8b75fd1a02..7ef14e2d2356 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1262,6 +1262,34 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb) kfree_skb(skb); } +static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg *hdr, + int bearer_id, struct sk_buff_head *xmitq) +{ + struct tipc_link *ucl; + int rc; + + rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr); + + if (rc & TIPC_LINK_DOWN_EVT) { + tipc_bearer_reset_all(n->net); + return; + } + + if (!(rc & TIPC_LINK_SND_STATE)) + return; + + /* If probe message, a STATE response will be sent anyway */ + if (msg_probe(hdr)) + return; + + /* Produce a STATE message carrying broadcast NACK */ + tipc_node_read_lock(n); + ucl = n->links[bearer_id].link; + if (ucl) + tipc_link_build_state_msg(ucl, xmitq); + tipc_node_read_unlock(n); +} + /** * tipc_node_bc_rcv - process TIPC broadcast packet arriving from off-node * @net: the applicable net namespace @@ -1298,7 +1326,7 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id rc = tipc_bcast_rcv(net, be->link, skb); /* Broadcast ACKs are sent on a unicast link */ - if (rc & TIPC_LINK_SND_BC_ACK) { + if (rc & TIPC_LINK_SND_STATE) { tipc_node_read_lock(n); tipc_link_build_state_msg(le->link, &xmitq); tipc_node_read_unlock(n); @@ -1505,7 +1533,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) /* Ensure broadcast reception is in synch with peer's send state */ if (unlikely(usr == LINK_PROTOCOL)) - tipc_bcast_sync_rcv(net, n->bc_entry.link, hdr); + tipc_node_bc_sync_rcv(n, hdr, bearer_id, &xmitq); else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack)) tipc_bcast_ack_rcv(net, n->bc_entry.link, bc_ack); diff --git a/net/tipc/node.h b/net/tipc/node.h index 4578b34c7dca..39ef54c1f2ad 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -1,7 +1,7 @@ /* * net/tipc/node.h: Include file for TIPC node management routines * - * Copyright (c) 2000-2006, 2014-2015, Ericsson AB + * Copyright (c) 2000-2006, 2014-2016, Ericsson AB * Copyright (c) 2005, 2010-2014, Wind River Systems * All rights reserved. * @@ -45,11 +45,14 @@ /* Optional capabilities supported by this code version */ enum { - TIPC_BCAST_SYNCH = (1 << 1), - TIPC_BLOCK_FLOWCTL = (2 << 1) + TIPC_BCAST_SYNCH = (1 << 1), + TIPC_BCAST_STATE_NACK = (1 << 2), + TIPC_BLOCK_FLOWCTL = (1 << 3) }; -#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | TIPC_BLOCK_FLOWCTL) +#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \ + TIPC_BCAST_STATE_NACK | \ + TIPC_BLOCK_FLOWCTL) #define INVALID_BEARER_ID -1 void tipc_node_stop(struct net *net); |