diff options
237 files changed, 10718 insertions, 4515 deletions
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index 2ad1b77a7182..42cd04bca548 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -339,7 +339,9 @@ settings from init_net and for IPv6 we reset all settings to default. If set to 1, both IPv4 and IPv6 settings are forced to inherit from current ones in init_net. If set to 2, both IPv4 and IPv6 settings are -forced to reset to their default values. +forced to reset to their default values. If set to 3, both IPv4 and IPv6 +settings are forced to inherit from current ones in the netns where this +new netns has been created. Default : 0 (for compatibility reasons) diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.txt deleted file mode 100644 index ecf027a9003a..000000000000 --- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.txt +++ /dev/null @@ -1,36 +0,0 @@ -Mediatek pericfg controller -=========================== - -The Mediatek pericfg controller provides various clocks and reset -outputs to the system. - -Required Properties: - -- compatible: Should be one of: - - "mediatek,mt2701-pericfg", "syscon" - - "mediatek,mt2712-pericfg", "syscon" - - "mediatek,mt7622-pericfg", "syscon" - - "mediatek,mt7623-pericfg", "mediatek,mt2701-pericfg", "syscon" - - "mediatek,mt7629-pericfg", "syscon" - - "mediatek,mt8135-pericfg", "syscon" - - "mediatek,mt8173-pericfg", "syscon" - - "mediatek,mt8183-pericfg", "syscon" -- #clock-cells: Must be 1 -- #reset-cells: Must be 1 - -The pericfg controller uses the common clk binding from -Documentation/devicetree/bindings/clock/clock-bindings.txt -The available clocks are defined in dt-bindings/clock/mt*-clk.h. -Also it uses the common reset controller binding from -Documentation/devicetree/bindings/reset/reset.txt. -The available reset outputs are defined in -dt-bindings/reset/mt*-resets.h - -Example: - -pericfg: power-controller@10003000 { - compatible = "mediatek,mt8173-pericfg", "syscon"; - reg = <0 0x10003000 0 0x1000>; - #clock-cells = <1>; - #reset-cells = <1>; -}; diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.yaml new file mode 100644 index 000000000000..55209a2baedc --- /dev/null +++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.yaml @@ -0,0 +1,64 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: "http://devicetree.org/schemas/arm/mediatek/mediatek,pericfg.yaml#" +$schema: "http://devicetree.org/meta-schemas/core.yaml#" + +title: MediaTek Peripheral Configuration Controller + +maintainers: + - Bartosz Golaszewski <[email protected]> + +description: + The Mediatek pericfg controller provides various clocks and reset outputs + to the system. + +properties: + compatible: + oneOf: + - items: + - enum: + - mediatek,mt2701-pericfg + - mediatek,mt2712-pericfg + - mediatek,mt7622-pericfg + - mediatek,mt7629-pericfg + - mediatek,mt8135-pericfg + - mediatek,mt8173-pericfg + - mediatek,mt8183-pericfg + - mediatek,mt8516-pericfg + - const: syscon + - items: + # Special case for mt7623 for backward compatibility + - const: mediatek,mt7623-pericfg + - const: mediatek,mt2701-pericfg + - const: syscon + + reg: + maxItems: 1 + + '#clock-cells': + const: 1 + + '#reset-cells': + const: 1 + +required: + - compatible + - reg + +examples: + - | + pericfg@10003000 { + compatible = "mediatek,mt8173-pericfg", "syscon"; + reg = <0x10003000 0x1000>; + #clock-cells = <1>; + #reset-cells = <1>; + }; + + - | + pericfg@10003000 { + compatible = "mediatek,mt7623-pericfg", "mediatek,mt2701-pericfg", "syscon"; + reg = <0x10003000 0x1000>; + #clock-cells = <1>; + #reset-cells = <1>; + }; diff --git a/Documentation/devicetree/bindings/net/mediatek,eth-mac.yaml b/Documentation/devicetree/bindings/net/mediatek,eth-mac.yaml new file mode 100644 index 000000000000..f85d91a9d6e5 --- /dev/null +++ b/Documentation/devicetree/bindings/net/mediatek,eth-mac.yaml @@ -0,0 +1,89 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/mediatek,eth-mac.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: MediaTek STAR Ethernet MAC Controller + +maintainers: + - Bartosz Golaszewski <[email protected]> + +description: + This Ethernet MAC is used on the MT8* family of SoCs from MediaTek. + It's compliant with 802.3 standards and supports half- and full-duplex + modes with flow-control as well as CRC offloading and VLAN tags. + +allOf: + - $ref: "ethernet-controller.yaml#" + +properties: + compatible: + enum: + - mediatek,mt8516-eth + - mediatek,mt8518-eth + - mediatek,mt8175-eth + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + minItems: 3 + maxItems: 3 + + clock-names: + additionalItems: false + items: + - const: core + - const: reg + - const: trans + + mediatek,pericfg: + $ref: /schemas/types.yaml#definitions/phandle + description: + Phandle to the device containing the PERICFG register range. This is used + to control the MII mode. + + mdio: + type: object + description: + Creates and registers an MDIO bus. + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + - mediatek,pericfg + - phy-handle + +examples: + - | + #include <dt-bindings/interrupt-controller/arm-gic.h> + #include <dt-bindings/clock/mt8516-clk.h> + + ethernet: ethernet@11180000 { + compatible = "mediatek,mt8516-eth"; + reg = <0x11180000 0x1000>; + mediatek,pericfg = <&pericfg>; + interrupts = <GIC_SPI 111 IRQ_TYPE_LEVEL_LOW>; + clocks = <&topckgen CLK_TOP_RG_ETH>, + <&topckgen CLK_TOP_66M_ETH>, + <&topckgen CLK_TOP_133M_ETH>; + clock-names = "core", "reg", "trans"; + phy-handle = <ð_phy>; + phy-mode = "rmii"; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + eth_phy: ethernet-phy@0 { + reg = <0>; + }; + }; + }; diff --git a/Documentation/networking/devlink/ice.rst b/Documentation/networking/devlink/ice.rst index 4574352d6ff4..72ea8d295724 100644 --- a/Documentation/networking/devlink/ice.rst +++ b/Documentation/networking/devlink/ice.rst @@ -69,6 +69,17 @@ The ``ice`` driver reports the following versions - The version of the DDP package that is active in the device. Note that both the name (as reported by ``fw.app.name``) and version are required to uniquely identify the package. + * - ``fw.netlist`` + - running + - 1.1.2000-6.7.0 + - The version of the netlist module. This module defines the device's + Ethernet capabilities and default settings, and is used by the + management firmware as part of managing link and device + connectivity. + * - ``fw.netlist.build`` + - running + - 0xee16ced7 + - The first 4 bytes of the hash of the netlist module contents. Regions ======= diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index eed46b6aa07d..7e651ea33eab 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -454,10 +454,12 @@ Request contents: Kernel response contents: - ==================================== ====== ========================== + ==================================== ====== ============================ ``ETHTOOL_A_LINKSTATE_HEADER`` nested reply header ``ETHTOOL_A_LINKSTATE_LINK`` bool link state (up/down) - ==================================== ====== ========================== + ``ETHTOOL_A_LINKSTATE_SQI`` u32 Current Signal Quality Index + ``ETHTOOL_A_LINKSTATE_SQI_MAX`` u32 Max support SQI value + ==================================== ====== ============================ For most NIC drivers, the value of ``ETHTOOL_A_LINKSTATE_LINK`` returns carrier flag provided by ``netif_carrier_ok()`` but there are drivers which diff --git a/arch/arm64/boot/dts/mediatek/mt8516.dtsi b/arch/arm64/boot/dts/mediatek/mt8516.dtsi index 2f8adf042195..89af661e7f63 100644 --- a/arch/arm64/boot/dts/mediatek/mt8516.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8516.dtsi @@ -191,6 +191,11 @@ #clock-cells = <1>; }; + pericfg: pericfg@10003050 { + compatible = "mediatek,mt8516-pericfg", "syscon"; + reg = <0 0x10003050 0 0x1000>; + }; + apmixedsys: apmixedsys@10018000 { compatible = "mediatek,mt8516-apmixedsys", "syscon"; reg = <0 0x10018000 0 0x710>; @@ -401,6 +406,18 @@ status = "disabled"; }; + ethernet: ethernet@11180000 { + compatible = "mediatek,mt8516-eth"; + reg = <0 0x11180000 0 0x1000>; + mediatek,pericfg = <&pericfg>; + interrupts = <GIC_SPI 111 IRQ_TYPE_LEVEL_LOW>; + clocks = <&topckgen CLK_TOP_RG_ETH>, + <&topckgen CLK_TOP_66M_ETH>, + <&topckgen CLK_TOP_133M_ETH>; + clock-names = "core", "reg", "trans"; + status = "disabled"; + }; + rng: rng@1020c000 { compatible = "mediatek,mt8516-rng", "mediatek,mt7623-rng"; diff --git a/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi b/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi index a31093d7142b..dfceffe6950a 100644 --- a/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi +++ b/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi @@ -9,6 +9,7 @@ / { aliases { serial0 = &uart0; + ethernet0 = ðernet; }; chosen { @@ -166,6 +167,24 @@ status = "okay"; }; +ðernet { + pinctrl-names = "default"; + pinctrl-0 = <ðernet_pins_default>; + phy-handle = <ð_phy>; + phy-mode = "rmii"; + mac-address = [00 00 00 00 00 00]; + status = "okay"; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + eth_phy: ethernet-phy@0 { + reg = <0>; + }; + }; +}; + &usb0 { status = "okay"; dr_mode = "peripheral"; @@ -218,4 +237,19 @@ bias-pull-up; }; }; + + ethernet_pins_default: ethernet { + pins_ethernet { + pinmux = <MT8516_PIN_0_EINT0__FUNC_EXT_TXD0>, + <MT8516_PIN_1_EINT1__FUNC_EXT_TXD1>, + <MT8516_PIN_5_EINT5__FUNC_EXT_RXER>, + <MT8516_PIN_6_EINT6__FUNC_EXT_RXC>, + <MT8516_PIN_7_EINT7__FUNC_EXT_RXDV>, + <MT8516_PIN_8_EINT8__FUNC_EXT_RXD0>, + <MT8516_PIN_9_EINT9__FUNC_EXT_RXD1>, + <MT8516_PIN_12_EINT12__FUNC_EXT_TXEN>, + <MT8516_PIN_38_MRG_DI__FUNC_EXT_MDIO>, + <MT8516_PIN_39_MRG_DO__FUNC_EXT_MDC>; + }; + }; }; diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h index 7293c139dd79..ad3acb1e882b 100644 --- a/arch/s390/include/asm/ccwgroup.h +++ b/arch/s390/include/asm/ccwgroup.h @@ -36,11 +36,6 @@ struct ccwgroup_device { * @set_online: function called when device is set online * @set_offline: function called when device is set offline * @shutdown: function called when device is shut down - * @prepare: prepare for pm state transition - * @complete: undo work done in @prepare - * @freeze: callback for freezing during hibernation snapshotting - * @thaw: undo work done in @freeze - * @restore: callback for restoring after hibernation * @driver: embedded driver structure * @ccw_driver: supported ccw_driver (optional) */ @@ -50,11 +45,6 @@ struct ccwgroup_driver { int (*set_online) (struct ccwgroup_device *); int (*set_offline) (struct ccwgroup_device *); void (*shutdown)(struct ccwgroup_device *); - int (*prepare) (struct ccwgroup_device *); - void (*complete) (struct ccwgroup_device *); - int (*freeze)(struct ccwgroup_device *); - int (*thaw) (struct ccwgroup_device *); - int (*restore)(struct ccwgroup_device *); struct device_driver driver; struct ccw_driver *ccw_driver; diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h index 7be3dcbf3d16..336742f6e3c3 100644 --- a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h +++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h @@ -768,8 +768,8 @@ enum ena_admin_os_type { ENA_ADMIN_OS_DPDK = 3, ENA_ADMIN_OS_FREEBSD = 4, ENA_ADMIN_OS_IPXE = 5, - ENA_ADMIN_OS_ESXI = 6, - ENA_ADMIN_OS_GROUPS_NUM = 6, + ENA_ADMIN_OS_ESXI = 6, + ENA_ADMIN_OS_GROUPS_NUM = 6, }; struct ena_admin_host_info { @@ -813,7 +813,8 @@ struct ena_admin_host_info { u16 reserved; - /* 1 :0 : reserved + /* 0 : reserved + * 1 : rx_offset * 2 : interrupt_moderation * 31:3 : reserved */ @@ -1124,6 +1125,8 @@ struct ena_admin_ena_mmio_req_read_less_resp { #define ENA_ADMIN_HOST_INFO_DEVICE_MASK GENMASK(7, 3) #define ENA_ADMIN_HOST_INFO_BUS_SHIFT 8 #define ENA_ADMIN_HOST_INFO_BUS_MASK GENMASK(15, 8) +#define ENA_ADMIN_HOST_INFO_RX_OFFSET_SHIFT 1 +#define ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK BIT(1) #define ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_SHIFT 2 #define ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK BIT(2) @@ -1133,4 +1136,4 @@ struct ena_admin_ena_mmio_req_read_less_resp { /* aenq_link_change_desc */ #define ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK BIT(0) -#endif /*_ENA_ADMIN_H_ */ +#endif /* _ENA_ADMIN_H_ */ diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index b51bf62af11b..432f143559a1 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -62,7 +62,9 @@ #define ENA_REGS_ADMIN_INTR_MASK 1 -#define ENA_POLL_MS 5 +#define ENA_MIN_ADMIN_POLL_US 100 + +#define ENA_MAX_ADMIN_POLL_US 5000 /*****************************************************************************/ /*****************************************************************************/ @@ -200,17 +202,17 @@ static void comp_ctxt_release(struct ena_com_admin_queue *queue, static struct ena_comp_ctx *get_comp_ctxt(struct ena_com_admin_queue *queue, u16 command_id, bool capture) { - if (unlikely(!queue->comp_ctx)) { - pr_err("Completion context is NULL\n"); - return NULL; - } - if (unlikely(command_id >= queue->q_depth)) { pr_err("command id is larger than the queue size. cmd_id: %u queue size %d\n", command_id, queue->q_depth); return NULL; } + if (unlikely(!queue->comp_ctx)) { + pr_err("Completion context is NULL\n"); + return NULL; + } + if (unlikely(queue->comp_ctx[command_id].occupied && capture)) { pr_err("Completion context is occupied\n"); return NULL; @@ -375,7 +377,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev, io_sq->bounce_buf_ctrl.next_to_use = 0; size = io_sq->bounce_buf_ctrl.buffer_size * - io_sq->bounce_buf_ctrl.buffers_num; + io_sq->bounce_buf_ctrl.buffers_num; dev_node = dev_to_node(ena_dev->dmadev); set_dev_node(ena_dev->dmadev, ctx->numa_node); @@ -523,9 +525,6 @@ static int ena_com_comp_status_to_errno(u8 comp_status) if (unlikely(comp_status != 0)) pr_err("admin command failed[%u]\n", comp_status); - if (unlikely(comp_status > ENA_ADMIN_UNKNOWN_ERROR)) - return -EINVAL; - switch (comp_status) { case ENA_ADMIN_SUCCESS: return 0; @@ -540,7 +539,14 @@ static int ena_com_comp_status_to_errno(u8 comp_status) return -EINVAL; } - return 0; + return -EINVAL; +} + +static void ena_delay_exponential_backoff_us(u32 exp, u32 delay_us) +{ + delay_us = max_t(u32, ENA_MIN_ADMIN_POLL_US, delay_us); + delay_us = min_t(u32, delay_us * (1U << exp), ENA_MAX_ADMIN_POLL_US); + usleep_range(delay_us, 2 * delay_us); } static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_ctx, @@ -549,6 +555,7 @@ static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_c unsigned long flags = 0; unsigned long timeout; int ret; + u32 exp = 0; timeout = jiffies + usecs_to_jiffies(admin_queue->completion_timeout); @@ -572,7 +579,8 @@ static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_c goto err; } - msleep(ENA_POLL_MS); + ena_delay_exponential_backoff_us(exp++, + admin_queue->ena_dev->ena_min_poll_delay_us); } if (unlikely(comp_ctx->status == ENA_CMD_ABORTED)) { @@ -702,8 +710,7 @@ static int ena_com_config_llq_info(struct ena_com_dev *ena_dev, /* The desc list entry size should be whole multiply of 8 * This requirement comes from __iowrite64_copy() */ - pr_err("illegal entry size %d\n", - llq_info->desc_list_entry_size); + pr_err("illegal entry size %d\n", llq_info->desc_list_entry_size); return -EINVAL; } @@ -775,7 +782,7 @@ static int ena_com_wait_and_process_admin_cq_interrupts(struct ena_comp_ctx *com if (admin_queue->auto_polling) admin_queue->polling = true; } else { - pr_err("The ena device doesn't send a completion for the admin cmd %d status %d\n", + pr_err("The ena device didn't send a completion for the admin cmd %d status %d\n", comp_ctx->cmd_opcode, comp_ctx->status); } /* Check if shifted to polling mode. @@ -943,12 +950,13 @@ static void ena_com_io_queue_free(struct ena_com_dev *ena_dev, static int wait_for_reset_state(struct ena_com_dev *ena_dev, u32 timeout, u16 exp_state) { - u32 val, i; + u32 val, exp = 0; + unsigned long timeout_stamp; - /* Convert timeout from resolution of 100ms to ENA_POLL_MS */ - timeout = (timeout * 100) / ENA_POLL_MS; + /* Convert timeout from resolution of 100ms to us resolution. */ + timeout_stamp = jiffies + usecs_to_jiffies(100 * 1000 * timeout); - for (i = 0; i < timeout; i++) { + while (1) { val = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF); if (unlikely(val == ENA_MMIO_READ_TIMEOUT)) { @@ -960,10 +968,11 @@ static int wait_for_reset_state(struct ena_com_dev *ena_dev, u32 timeout, exp_state) return 0; - msleep(ENA_POLL_MS); - } + if (time_is_before_jiffies(timeout_stamp)) + return -ETIME; - return -ETIME; + ena_delay_exponential_backoff_us(exp++, ena_dev->ena_min_poll_delay_us); + } } static bool ena_com_check_supported_feature_id(struct ena_com_dev *ena_dev, @@ -1284,13 +1293,9 @@ static int ena_com_ind_tbl_convert_to_device(struct ena_com_dev *ena_dev) static void ena_com_update_intr_delay_resolution(struct ena_com_dev *ena_dev, u16 intr_delay_resolution) { - /* Initial value of intr_delay_resolution might be 0 */ - u16 prev_intr_delay_resolution = - ena_dev->intr_delay_resolution ? - ena_dev->intr_delay_resolution : - ENA_DEFAULT_INTR_DELAY_RESOLUTION; + u16 prev_intr_delay_resolution = ena_dev->intr_delay_resolution; - if (!intr_delay_resolution) { + if (unlikely(!intr_delay_resolution)) { pr_err("Illegal intr_delay_resolution provided. Going to use default 1 usec resolution\n"); intr_delay_resolution = ENA_DEFAULT_INTR_DELAY_RESOLUTION; } @@ -1444,11 +1449,13 @@ void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev) { struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; unsigned long flags = 0; + u32 exp = 0; spin_lock_irqsave(&admin_queue->q_lock, flags); while (atomic_read(&admin_queue->outstanding_cmds) != 0) { spin_unlock_irqrestore(&admin_queue->q_lock, flags); - msleep(ENA_POLL_MS); + ena_delay_exponential_backoff_us(exp++, + ena_dev->ena_min_poll_delay_us); spin_lock_irqsave(&admin_queue->q_lock, flags); } spin_unlock_irqrestore(&admin_queue->q_lock, flags); @@ -1796,6 +1803,7 @@ int ena_com_admin_init(struct ena_com_dev *ena_dev, if (ret) goto error; + admin_queue->ena_dev = ena_dev; admin_queue->running_state = true; return 0; @@ -2003,7 +2011,7 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data) struct ena_admin_aenq_entry *aenq_e; struct ena_admin_aenq_common_desc *aenq_common; struct ena_com_aenq *aenq = &dev->aenq; - unsigned long long timestamp; + u64 timestamp; ena_aenq_handler handler_cb; u16 masked_head, processed = 0; u8 phase; @@ -2021,9 +2029,8 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data) */ dma_rmb(); - timestamp = - (unsigned long long)aenq_common->timestamp_low | - ((unsigned long long)aenq_common->timestamp_high << 32); + timestamp = (u64)aenq_common->timestamp_low | + ((u64)aenq_common->timestamp_high << 32); pr_debug("AENQ! Group[%x] Syndrom[%x] timestamp: [%llus]\n", aenq_common->group, aenq_common->syndrom, timestamp); @@ -2053,8 +2060,7 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data) /* write the aenq doorbell after all AENQ descriptors were read */ mb(); - writel_relaxed((u32)aenq->head, - dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF); + writel_relaxed((u32)aenq->head, dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF); } int ena_com_dev_reset(struct ena_com_dev *ena_dev, @@ -2276,13 +2282,14 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, enum ena_admin_hash_functions func, const u8 *key, u16 key_len, u32 init_val) { - struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_feature_rss_flow_hash_control *hash_key; struct ena_admin_get_feat_resp get_resp; - struct ena_admin_feature_rss_flow_hash_control *hash_key = - rss->hash_key; enum ena_admin_hash_functions old_func; + struct ena_rss *rss = &ena_dev->rss; int rc; + hash_key = rss->hash_key; + /* Make sure size is a mult of DWs */ if (unlikely(key_len & 0x3)) return -EINVAL; @@ -2294,7 +2301,7 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, if (unlikely(rc)) return rc; - if (!((1 << func) & get_resp.u.flow_hash_func.supported_func)) { + if (!(BIT(func) & get_resp.u.flow_hash_func.supported_func)) { pr_err("Flow hash function %d isn't supported\n", func); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h index 13a1b7812c46..bc187adf54e4 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.h +++ b/drivers/net/ethernet/amazon/ena/ena_com.h @@ -77,6 +77,8 @@ #define ENA_INTR_INITIAL_RX_INTERVAL_USECS 0 #define ENA_DEFAULT_INTR_DELAY_RESOLUTION 1 +#define ENA_HASH_KEY_SIZE 40 + #define ENA_HW_HINTS_NO_TIMEOUT 0xFFFF #define ENA_FEATURE_MAX_QUEUE_EXT_VER 1 @@ -237,6 +239,7 @@ struct ena_com_stats_admin { struct ena_com_admin_queue { void *q_dmadev; + struct ena_com_dev *ena_dev; spinlock_t q_lock; /* spinlock for the admin queue */ struct ena_comp_ctx *comp_ctx; @@ -349,6 +352,8 @@ struct ena_com_dev { struct ena_intr_moder_entry *intr_moder_tbl; struct ena_com_llq_info llq_info; + + u32 ena_min_poll_delay_us; }; struct ena_com_dev_get_features_ctx { @@ -393,7 +398,7 @@ struct ena_aenq_handlers { */ int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev); -/* ena_com_set_mmio_read_mode - Enable/disable the mmio reg read mechanism +/* ena_com_set_mmio_read_mode - Enable/disable the indirect mmio reg read mechanism * @ena_dev: ENA communication layer struct * @readless_supported: readless mode (enable/disable) */ @@ -515,7 +520,7 @@ void ena_com_set_admin_auto_polling_mode(struct ena_com_dev *ena_dev, /* ena_com_admin_q_comp_intr_handler - admin queue interrupt handler * @ena_dev: ENA communication layer struct * - * This method go over the admin completion queue and wake up all the pending + * This method goes over the admin completion queue and wakes up all the pending * threads that wait on the commands wait event. * * @note: Should be called after MSI-X interrupt. @@ -525,7 +530,7 @@ void ena_com_admin_q_comp_intr_handler(struct ena_com_dev *ena_dev); /* ena_com_aenq_intr_handler - AENQ interrupt handler * @ena_dev: ENA communication layer struct * - * This method go over the async event notification queue and call the proper + * This method goes over the async event notification queue and calls the proper * aenq handler. */ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data); @@ -542,14 +547,14 @@ void ena_com_abort_admin_commands(struct ena_com_dev *ena_dev); /* ena_com_wait_for_abort_completion - Wait for admin commands abort. * @ena_dev: ENA communication layer struct * - * This method wait until all the outstanding admin commands will be completed. + * This method waits until all the outstanding admin commands are completed. */ void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev); /* ena_com_validate_version - Validate the device parameters * @ena_dev: ENA communication layer struct * - * This method validate the device parameters are the same as the saved + * This method verifies the device parameters are the same as the saved * parameters in ena_dev. * This method is useful after device reset, to validate the device mac address * and the device offloads are the same as before the reset. @@ -689,7 +694,7 @@ int ena_com_set_hash_function(struct ena_com_dev *ena_dev); * * Retrieve the hash function from the device. * - * @note: If the caller called ena_com_fill_hash_function but didn't flash + * @note: If the caller called ena_com_fill_hash_function but didn't flush * it to the device, the new configuration will be lost. * * @return: 0 on Success and negative value otherwise. @@ -703,7 +708,7 @@ int ena_com_get_hash_function(struct ena_com_dev *ena_dev, * * Retrieve the hash key. * - * @note: If the caller called ena_com_fill_hash_key but didn't flash + * @note: If the caller called ena_com_fill_hash_key but didn't flush * it to the device, the new configuration will be lost. * * @return: 0 on Success and negative value otherwise. @@ -743,7 +748,7 @@ int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev); * * Retrieve the hash control from the device. * - * @note, If the caller called ena_com_fill_hash_ctrl but didn't flash + * @note: If the caller called ena_com_fill_hash_ctrl but didn't flush * it to the device, the new configuration will be lost. * * @return: 0 on Success and negative value otherwise. @@ -795,7 +800,7 @@ int ena_com_indirect_table_set(struct ena_com_dev *ena_dev); * * Retrieve the RSS indirection table from the device. * - * @note: If the caller called ena_com_indirect_table_fill_entry but didn't flash + * @note: If the caller called ena_com_indirect_table_fill_entry but didn't flush * it to the device, the new configuration will be lost. * * @return: 0 on Success and negative value otherwise. @@ -821,14 +826,14 @@ int ena_com_allocate_debug_area(struct ena_com_dev *ena_dev, /* ena_com_delete_debug_area - Free the debug area resources. * @ena_dev: ENA communication layer struct * - * Free the allocate debug area. + * Free the allocated debug area. */ void ena_com_delete_debug_area(struct ena_com_dev *ena_dev); /* ena_com_delete_host_info - Free the host info resources. * @ena_dev: ENA communication layer struct * - * Free the allocate host info. + * Free the allocated host info. */ void ena_com_delete_host_info(struct ena_com_dev *ena_dev); @@ -869,9 +874,9 @@ int ena_com_destroy_io_cq(struct ena_com_dev *ena_dev, * @cmd_completion: command completion return value. * @cmd_comp_size: command completion size. - * Submit an admin command and then wait until the device will return a + * Submit an admin command and then wait until the device returns a * completion. - * The completion will be copyed into cmd_comp. + * The completion will be copied into cmd_comp. * * @return - 0 on success, negative value on failure. */ @@ -934,7 +939,7 @@ unsigned int ena_com_get_nonadaptive_moderation_interval_rx(struct ena_com_dev * /* ena_com_config_dev_mode - Configure the placement policy of the device. * @ena_dev: ENA communication layer struct * @llq_features: LLQ feature descriptor, retrieve via - * ena_com_get_dev_attr_feat. + * ena_com_get_dev_attr_feat. * @ena_llq_config: The default driver LLQ parameters configurations */ int ena_com_config_dev_mode(struct ena_com_dev *ena_dev, @@ -960,7 +965,7 @@ static inline void ena_com_disable_adaptive_moderation(struct ena_com_dev *ena_d * @intr_reg: interrupt register to update. * @rx_delay_interval: Rx interval in usecs * @tx_delay_interval: Tx interval in usecs - * @unmask: unask enable/disable + * @unmask: unmask enable/disable * * Prepare interrupt update register with the supplied parameters. */ diff --git a/drivers/net/ethernet/amazon/ena/ena_common_defs.h b/drivers/net/ethernet/amazon/ena/ena_common_defs.h index 23beb7e7ed7b..8a8ded0de9ac 100644 --- a/drivers/net/ethernet/amazon/ena/ena_common_defs.h +++ b/drivers/net/ethernet/amazon/ena/ena_common_defs.h @@ -45,4 +45,4 @@ struct ena_common_mem_addr { u16 reserved16; }; -#endif /*_ENA_COMMON_H_ */ +#endif /* _ENA_COMMON_H_ */ diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c index 2845ac277724..ec8ea25e988d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c @@ -519,7 +519,7 @@ int ena_com_rx_pkt(struct ena_com_io_cq *io_cq, struct ena_eth_io_rx_cdesc_base *cdesc = NULL; u16 cdesc_idx = 0; u16 nb_hw_desc; - u16 i; + u16 i = 0; WARN(io_cq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX, "wrong Q type"); @@ -538,13 +538,19 @@ int ena_com_rx_pkt(struct ena_com_io_cq *io_cq, return -ENOSPC; } - for (i = 0; i < nb_hw_desc; i++) { + cdesc = ena_com_rx_cdesc_idx_to_ptr(io_cq, cdesc_idx); + ena_rx_ctx->pkt_offset = cdesc->offset; + + do { + ena_buf[i].len = cdesc->length; + ena_buf[i].req_id = cdesc->req_id; + + if (++i >= nb_hw_desc) + break; + cdesc = ena_com_rx_cdesc_idx_to_ptr(io_cq, cdesc_idx + i); - ena_buf->len = cdesc->length; - ena_buf->req_id = cdesc->req_id; - ena_buf++; - } + } while (1); /* Update SQ head ptr */ io_sq->next_to_comp += nb_hw_desc; @@ -578,10 +584,10 @@ int ena_com_add_single_rx_desc(struct ena_com_io_sq *io_sq, desc->length = ena_buf->len; - desc->ctrl = ENA_ETH_IO_RX_DESC_FIRST_MASK; - desc->ctrl |= ENA_ETH_IO_RX_DESC_LAST_MASK; - desc->ctrl |= io_sq->phase & ENA_ETH_IO_RX_DESC_PHASE_MASK; - desc->ctrl |= ENA_ETH_IO_RX_DESC_COMP_REQ_MASK; + desc->ctrl = ENA_ETH_IO_RX_DESC_FIRST_MASK | + ENA_ETH_IO_RX_DESC_LAST_MASK | + (io_sq->phase & ENA_ETH_IO_RX_DESC_PHASE_MASK) | + ENA_ETH_IO_RX_DESC_COMP_REQ_MASK; desc->req_id = req_id; diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h index 77986c0ea52c..8b1afd3b32f2 100644 --- a/drivers/net/ethernet/amazon/ena/ena_eth_com.h +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h @@ -73,6 +73,7 @@ struct ena_com_rx_ctx { u32 hash; u16 descs; int max_bufs; + u8 pkt_offset; }; int ena_com_prepare_tx(struct ena_com_io_sq *io_sq, @@ -95,7 +96,7 @@ static inline void ena_com_unmask_intr(struct ena_com_io_cq *io_cq, writel(intr_reg->intr_control, io_cq->unmask_reg); } -static inline int ena_com_free_desc(struct ena_com_io_sq *io_sq) +static inline int ena_com_free_q_entries(struct ena_com_io_sq *io_sq) { u16 tail, next_to_comp, cnt; @@ -113,7 +114,7 @@ static inline bool ena_com_sq_have_enough_space(struct ena_com_io_sq *io_sq, int temp; if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) - return ena_com_free_desc(io_sq) >= required_buffers; + return ena_com_free_q_entries(io_sq) >= required_buffers; /* This calculation doesn't need to be 100% accurate. So to reduce * the calculation overhead just Subtract 2 lines from the free descs @@ -122,7 +123,7 @@ static inline bool ena_com_sq_have_enough_space(struct ena_com_io_sq *io_sq, */ temp = required_buffers / io_sq->llq_info.descs_per_entry + 2; - return ena_com_free_desc(io_sq) > temp; + return ena_com_free_q_entries(io_sq) > temp; } static inline bool ena_com_meta_desc_changed(struct ena_com_io_sq *io_sq, diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h b/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h index 00e0f056a741..d105c9c56192 100644 --- a/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h +++ b/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h @@ -264,7 +264,9 @@ struct ena_eth_io_rx_cdesc_base { u16 sub_qid; - u16 reserved; + u8 offset; + + u8 reserved; }; /* 8-word format */ @@ -412,4 +414,4 @@ struct ena_eth_io_numa_node_cfg_reg { #define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_SHIFT 31 #define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_MASK BIT(31) -#endif /*_ENA_ETH_IO_H_ */ +#endif /* _ENA_ETH_IO_H_ */ diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 830d3711d6ee..e340b65af08c 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -206,7 +206,7 @@ int ena_get_sset_count(struct net_device *netdev, int sset) if (sset != ETH_SS_STATS) return -EOPNOTSUPP; - return adapter->num_io_queues * (ENA_STATS_ARRAY_TX + ENA_STATS_ARRAY_RX) + return adapter->num_io_queues * (ENA_STATS_ARRAY_TX + ENA_STATS_ARRAY_RX) + ENA_STATS_ARRAY_GLOBAL + ENA_STATS_ARRAY_ENA_COM; } @@ -260,7 +260,6 @@ static void ena_get_strings(struct net_device *netdev, u32 sset, u8 *data) for (i = 0; i < ENA_STATS_ARRAY_GLOBAL; i++) { ena_stats = &ena_stats_global_strings[i]; - memcpy(data, ena_stats->name, ETH_GSTRING_LEN); data += ETH_GSTRING_LEN; } @@ -307,10 +306,8 @@ static int ena_get_coalesce(struct net_device *net_dev, struct ena_adapter *adapter = netdev_priv(net_dev); struct ena_com_dev *ena_dev = adapter->ena_dev; - if (!ena_com_interrupt_moderation_supported(ena_dev)) { - /* the devie doesn't support interrupt moderation */ + if (!ena_com_interrupt_moderation_supported(ena_dev)) return -EOPNOTSUPP; - } coalesce->tx_coalesce_usecs = ena_com_get_nonadaptive_moderation_interval_tx(ena_dev) * @@ -326,7 +323,7 @@ static int ena_get_coalesce(struct net_device *net_dev, return 0; } -static void ena_update_tx_rings_intr_moderation(struct ena_adapter *adapter) +static void ena_update_tx_rings_nonadaptive_intr_moderation(struct ena_adapter *adapter) { unsigned int val; int i; @@ -337,7 +334,7 @@ static void ena_update_tx_rings_intr_moderation(struct ena_adapter *adapter) adapter->tx_ring[i].smoothed_interval = val; } -static void ena_update_rx_rings_intr_moderation(struct ena_adapter *adapter) +static void ena_update_rx_rings_nonadaptive_intr_moderation(struct ena_adapter *adapter) { unsigned int val; int i; @@ -355,24 +352,22 @@ static int ena_set_coalesce(struct net_device *net_dev, struct ena_com_dev *ena_dev = adapter->ena_dev; int rc; - if (!ena_com_interrupt_moderation_supported(ena_dev)) { - /* the devie doesn't support interrupt moderation */ + if (!ena_com_interrupt_moderation_supported(ena_dev)) return -EOPNOTSUPP; - } rc = ena_com_update_nonadaptive_moderation_interval_tx(ena_dev, coalesce->tx_coalesce_usecs); if (rc) return rc; - ena_update_tx_rings_intr_moderation(adapter); + ena_update_tx_rings_nonadaptive_intr_moderation(adapter); rc = ena_com_update_nonadaptive_moderation_interval_rx(ena_dev, coalesce->rx_coalesce_usecs); if (rc) return rc; - ena_update_rx_rings_intr_moderation(adapter); + ena_update_rx_rings_nonadaptive_intr_moderation(adapter); if (coalesce->use_adaptive_rx_coalesce && !ena_com_get_adaptive_moderation_enabled(ena_dev)) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 85b87ed02dd5..46865d5bd7e7 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1435,6 +1435,8 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page, rx_info->page_offset, len, ENA_PAGE_SIZE); + /* The offset is non zero only for the first buffer */ + rx_info->page_offset = 0; netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, "rx skb updated. len %d. data_len %d\n", @@ -1590,6 +1592,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, { u16 next_to_clean = rx_ring->next_to_clean; struct ena_com_rx_ctx ena_rx_ctx; + struct ena_rx_buffer *rx_info; struct ena_adapter *adapter; u32 res_budget, work_done; int rx_copybreak_pkt = 0; @@ -1614,6 +1617,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; ena_rx_ctx.max_bufs = rx_ring->sgl_size; ena_rx_ctx.descs = 0; + ena_rx_ctx.pkt_offset = 0; rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq, rx_ring->ena_com_io_sq, &ena_rx_ctx); @@ -1623,6 +1627,9 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, if (unlikely(ena_rx_ctx.descs == 0)) break; + rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]; + rx_info->page_offset = ena_rx_ctx.pkt_offset; + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n", rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto, @@ -1684,7 +1691,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, rx_ring->next_to_clean = next_to_clean; - refill_required = ena_com_free_desc(rx_ring->ena_com_io_sq); + refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq); refill_threshold = min_t(int, rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER, ENA_RX_REFILL_THRESH_PACKET); @@ -2235,7 +2242,7 @@ static int ena_rss_configure(struct ena_adapter *adapter) rc = ena_rss_init_default(adapter); if (rc && (rc != -EOPNOTSUPP)) { netif_err(adapter, ifup, adapter->netdev, - "Failed to init RSS rc: %d\n", rc); + "Failed to init RSS rc: %d\n", rc); return rc; } } @@ -2308,7 +2315,7 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid) if (rc) { netif_err(adapter, ifup, adapter->netdev, "Failed to create I/O TX queue num %d rc: %d\n", - qid, rc); + qid, rc); return rc; } @@ -2457,7 +2464,7 @@ static int create_queues_with_size_backoff(struct ena_adapter *adapter) * ones due to past queue allocation failures. */ set_io_rings_size(adapter, adapter->requested_tx_ring_size, - adapter->requested_rx_ring_size); + adapter->requested_rx_ring_size); while (1) { if (ena_xdp_present(adapter)) { @@ -2498,7 +2505,7 @@ err_setup_tx: if (rc != -ENOMEM) { netif_err(adapter, ifup, adapter->netdev, "Queue creation failed with error code %d\n", - rc); + rc); return rc; } @@ -2521,7 +2528,7 @@ err_setup_tx: new_rx_ring_size = cur_rx_ring_size / 2; if (new_tx_ring_size < ENA_MIN_RING_SIZE || - new_rx_ring_size < ENA_MIN_RING_SIZE) { + new_rx_ring_size < ENA_MIN_RING_SIZE) { netif_err(adapter, ifup, adapter->netdev, "Queue creation failed with the smallest possible queue size of %d for both queues. Not retrying with smaller queues\n", ENA_MIN_RING_SIZE); @@ -3080,8 +3087,7 @@ static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb, return qid; } -static void ena_config_host_info(struct ena_com_dev *ena_dev, - struct pci_dev *pdev) +static void ena_config_host_info(struct ena_com_dev *ena_dev, struct pci_dev *pdev) { struct ena_admin_host_info *host_info; int rc; @@ -3111,6 +3117,7 @@ static void ena_config_host_info(struct ena_com_dev *ena_dev, host_info->num_cpus = num_online_cpus(); host_info->driver_supported_features = + ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK | ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK; rc = ena_com_set_host_attributes(ena_dev); @@ -3686,8 +3693,7 @@ static void check_for_empty_rx_ring(struct ena_adapter *adapter) for (i = 0; i < adapter->num_io_queues; i++) { rx_ring = &adapter->rx_ring[i]; - refill_required = - ena_com_free_desc(rx_ring->ena_com_io_sq); + refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq); if (unlikely(refill_required == (rx_ring->ring_size - 1))) { rx_ring->empty_rx_queue++; @@ -3825,11 +3831,11 @@ static void ena_timer_service(struct timer_list *t) mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); } -static int ena_calc_max_io_queue_num(struct pci_dev *pdev, +static u32 ena_calc_max_io_queue_num(struct pci_dev *pdev, struct ena_com_dev *ena_dev, struct ena_com_dev_get_features_ctx *get_feat_ctx) { - int io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues; + u32 io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues; if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { struct ena_admin_queue_ext_feature_fields *max_queue_ext = @@ -4115,8 +4121,8 @@ static int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx) */ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { - struct ena_com_dev_get_features_ctx get_feat_ctx; struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 }; + struct ena_com_dev_get_features_ctx get_feat_ctx; struct ena_llq_configurations llq_config; struct ena_com_dev *ena_dev = NULL; struct ena_adapter *adapter; @@ -4160,6 +4166,8 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_free_region; } + ena_dev->ena_min_poll_delay_us = ENA_ADMIN_POLL_DELAY_US; + ena_dev->dmadev = &pdev->dev; rc = ena_device_init(ena_dev, pdev, &get_feat_ctx, &wd_state); @@ -4183,7 +4191,7 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) calc_queue_ctx.get_feat_ctx = &get_feat_ctx; calc_queue_ctx.pdev = pdev; - /* Initial Tx and RX interrupt delay. Assumes 1 usec granularity. + /* Initial TX and RX interrupt delay. Assumes 1 usec granularity. * Updated during device initialization with the real granularity */ ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS; @@ -4227,12 +4235,11 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->num_io_queues = max_num_io_queues; adapter->max_num_io_queues = max_num_io_queues; + adapter->last_monitored_tx_qid = 0; adapter->xdp_first_ring = 0; adapter->xdp_num_queues = 0; - adapter->last_monitored_tx_qid = 0; - adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK; adapter->wd_state = wd_state; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 680099afcccf..ba030d260940 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -50,12 +50,6 @@ #define DRV_MODULE_GEN_SUBMINOR 0 #define DRV_MODULE_NAME "ena" -#ifndef DRV_MODULE_GENERATION -#define DRV_MODULE_GENERATION \ - __stringify(DRV_MODULE_GEN_MAJOR) "." \ - __stringify(DRV_MODULE_GEN_MINOR) "." \ - __stringify(DRV_MODULE_GEN_SUBMINOR) "K" -#endif #define DEVICE_NAME "Elastic Network Adapter (ENA)" @@ -104,8 +98,6 @@ #define ENA_RX_RSS_TABLE_LOG_SIZE 7 #define ENA_RX_RSS_TABLE_SIZE (1 << ENA_RX_RSS_TABLE_LOG_SIZE) -#define ENA_HASH_KEY_SIZE 40 - /* The number of tx packet completions that will be handled each NAPI poll * cycle is ring_size / ENA_TX_POLL_BUDGET_DIVIDER. */ @@ -137,6 +129,8 @@ #define ENA_IO_IRQ_FIRST_IDX 1 #define ENA_IO_IRQ_IDX(q) (ENA_IO_IRQ_FIRST_IDX + (q)) +#define ENA_ADMIN_POLL_DELAY_US 100 + /* ENA device should send keep alive msg every 1 sec. * We wait for 6 sec just to be on the safe side. */ diff --git a/drivers/net/ethernet/amazon/ena/ena_regs_defs.h b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h index 04fcafcc059c..b514bb1b855d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_regs_defs.h +++ b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h @@ -154,4 +154,4 @@ enum ena_regs_reset_reason_types { #define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_SHIFT 16 #define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_MASK 0xffff0000 -#endif /*_ENA_REGS_H_ */ +#endif /* _ENA_REGS_H_ */ diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c index 86fc77d85fda..743d3b13b39d 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c @@ -88,13 +88,13 @@ static const char aq_ethtool_stat_names[][ETH_GSTRING_LEN] = { "InDroppedDma", }; -static const char aq_ethtool_queue_stat_names[][ETH_GSTRING_LEN] = { - "Queue[%d] InPackets", - "Queue[%d] OutPackets", - "Queue[%d] Restarts", - "Queue[%d] InJumboPackets", - "Queue[%d] InLroPackets", - "Queue[%d] InErrors", +static const char * const aq_ethtool_queue_stat_names[] = { + "%sQueue[%d] InPackets", + "%sQueue[%d] OutPackets", + "%sQueue[%d] Restarts", + "%sQueue[%d] InJumboPackets", + "%sQueue[%d] InLroPackets", + "%sQueue[%d] InErrors", }; #if IS_ENABLED(CONFIG_MACSEC) @@ -166,7 +166,8 @@ static u32 aq_ethtool_n_stats(struct net_device *ndev) struct aq_nic_s *nic = netdev_priv(ndev); struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(nic); u32 n_stats = ARRAY_SIZE(aq_ethtool_stat_names) + - ARRAY_SIZE(aq_ethtool_queue_stat_names) * cfg->vecs; + ARRAY_SIZE(aq_ethtool_queue_stat_names) * cfg->vecs * + cfg->tcs; #if IS_ENABLED(CONFIG_MACSEC) if (nic->macsec_cfg) { @@ -223,7 +224,7 @@ static void aq_ethtool_get_drvinfo(struct net_device *ndev, static void aq_ethtool_get_strings(struct net_device *ndev, u32 stringset, u8 *data) { - struct aq_nic_s *aq_nic = netdev_priv(ndev); + struct aq_nic_s *nic = netdev_priv(ndev); struct aq_nic_cfg_s *cfg; u8 *p = data; int i, si; @@ -231,24 +232,35 @@ static void aq_ethtool_get_strings(struct net_device *ndev, int sa; #endif - cfg = aq_nic_get_cfg(aq_nic); + cfg = aq_nic_get_cfg(nic); switch (stringset) { - case ETH_SS_STATS: + case ETH_SS_STATS: { + const int stat_cnt = ARRAY_SIZE(aq_ethtool_queue_stat_names); + char tc_string[8]; + int tc; + + memset(tc_string, 0, sizeof(tc_string)); memcpy(p, aq_ethtool_stat_names, sizeof(aq_ethtool_stat_names)); p = p + sizeof(aq_ethtool_stat_names); - for (i = 0; i < cfg->vecs; i++) { - for (si = 0; - si < ARRAY_SIZE(aq_ethtool_queue_stat_names); - si++) { - snprintf(p, ETH_GSTRING_LEN, - aq_ethtool_queue_stat_names[si], i); - p += ETH_GSTRING_LEN; + + for (tc = 0; tc < cfg->tcs; tc++) { + if (cfg->is_qos) + snprintf(tc_string, 8, "TC%d ", tc); + + for (i = 0; i < cfg->vecs; i++) { + for (si = 0; si < stat_cnt; si++) { + snprintf(p, ETH_GSTRING_LEN, + aq_ethtool_queue_stat_names[si], + tc_string, + AQ_NIC_CFG_TCVEC2RING(cfg, tc, i)); + p += ETH_GSTRING_LEN; + } } } #if IS_ENABLED(CONFIG_MACSEC) - if (!aq_nic->macsec_cfg) + if (!nic->macsec_cfg) break; memcpy(p, aq_macsec_stat_names, sizeof(aq_macsec_stat_names)); @@ -256,7 +268,7 @@ static void aq_ethtool_get_strings(struct net_device *ndev, for (i = 0; i < AQ_MACSEC_MAX_SC; i++) { struct aq_macsec_txsc *aq_txsc; - if (!(test_bit(i, &aq_nic->macsec_cfg->txsc_idx_busy))) + if (!(test_bit(i, &nic->macsec_cfg->txsc_idx_busy))) continue; for (si = 0; @@ -266,7 +278,7 @@ static void aq_ethtool_get_strings(struct net_device *ndev, aq_macsec_txsc_stat_names[si], i); p += ETH_GSTRING_LEN; } - aq_txsc = &aq_nic->macsec_cfg->aq_txsc[i]; + aq_txsc = &nic->macsec_cfg->aq_txsc[i]; for (sa = 0; sa < MACSEC_NUM_AN; sa++) { if (!(test_bit(sa, &aq_txsc->tx_sa_idx_busy))) continue; @@ -283,10 +295,10 @@ static void aq_ethtool_get_strings(struct net_device *ndev, for (i = 0; i < AQ_MACSEC_MAX_SC; i++) { struct aq_macsec_rxsc *aq_rxsc; - if (!(test_bit(i, &aq_nic->macsec_cfg->rxsc_idx_busy))) + if (!(test_bit(i, &nic->macsec_cfg->rxsc_idx_busy))) continue; - aq_rxsc = &aq_nic->macsec_cfg->aq_rxsc[i]; + aq_rxsc = &nic->macsec_cfg->aq_rxsc[i]; for (sa = 0; sa < MACSEC_NUM_AN; sa++) { if (!(test_bit(sa, &aq_rxsc->rx_sa_idx_busy))) continue; @@ -302,6 +314,7 @@ static void aq_ethtool_get_strings(struct net_device *ndev, } #endif break; + } case ETH_SS_PRIV_FLAGS: memcpy(p, aq_ethtool_priv_flag_names, sizeof(aq_ethtool_priv_flag_names)); @@ -780,8 +793,6 @@ static int aq_set_ringparam(struct net_device *ndev, dev_close(ndev); } - aq_nic_free_vectors(aq_nic); - cfg->rxds = max(ring->rx_pending, hw_caps->rxds_min); cfg->rxds = min(cfg->rxds, hw_caps->rxds_max); cfg->rxds = ALIGN(cfg->rxds, AQ_HW_RXD_MULTIPLE); @@ -790,15 +801,10 @@ static int aq_set_ringparam(struct net_device *ndev, cfg->txds = min(cfg->txds, hw_caps->txds_max); cfg->txds = ALIGN(cfg->txds, AQ_HW_TXD_MULTIPLE); - for (aq_nic->aq_vecs = 0; aq_nic->aq_vecs < cfg->vecs; - aq_nic->aq_vecs++) { - aq_nic->aq_vec[aq_nic->aq_vecs] = - aq_vec_alloc(aq_nic, aq_nic->aq_vecs, cfg); - if (unlikely(!aq_nic->aq_vec[aq_nic->aq_vecs])) { - err = -ENOMEM; - goto err_exit; - } - } + err = aq_nic_realloc_vectors(aq_nic); + if (err) + goto err_exit; + if (ndev_running) err = dev_open(ndev, NULL); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c index 03ff92bc4a7f..1bc4d33a0ce5 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c @@ -153,6 +153,8 @@ aq_check_approve_fvlan(struct aq_nic_s *aq_nic, struct aq_hw_rx_fltrs_s *rx_fltrs, struct ethtool_rx_flow_spec *fsp) { + struct aq_nic_cfg_s *cfg = &aq_nic->aq_nic_cfg; + if (fsp->location < AQ_RX_FIRST_LOC_FVLANID || fsp->location > AQ_RX_LAST_LOC_FVLANID) { netdev_err(aq_nic->ndev, @@ -170,10 +172,10 @@ aq_check_approve_fvlan(struct aq_nic_s *aq_nic, return -EINVAL; } - if (fsp->ring_cookie > aq_nic->aq_nic_cfg.num_rss_queues) { + if (fsp->ring_cookie > cfg->num_rss_queues * cfg->tcs) { netdev_err(aq_nic->ndev, "ethtool: queue number must be in range [0, %d]", - aq_nic->aq_nic_cfg.num_rss_queues - 1); + cfg->num_rss_queues * cfg->tcs - 1); return -EINVAL; } return 0; @@ -262,6 +264,7 @@ static bool __must_check aq_rule_is_not_correct(struct aq_nic_s *aq_nic, struct ethtool_rx_flow_spec *fsp) { + struct aq_nic_cfg_s *cfg = &aq_nic->aq_nic_cfg; bool rule_is_not_correct = false; if (!aq_nic) { @@ -274,11 +277,11 @@ aq_rule_is_not_correct(struct aq_nic_s *aq_nic, } else if (aq_check_filter(aq_nic, fsp)) { rule_is_not_correct = true; } else if (fsp->ring_cookie != RX_CLS_FLOW_DISC) { - if (fsp->ring_cookie >= aq_nic->aq_nic_cfg.num_rss_queues) { + if (fsp->ring_cookie >= cfg->num_rss_queues * cfg->tcs) { netdev_err(aq_nic->ndev, "ethtool: The specified action is invalid.\n" "Maximum allowable value action is %u.\n", - aq_nic->aq_nic_cfg.num_rss_queues - 1); + cfg->num_rss_queues * cfg->tcs - 1); rule_is_not_correct = true; } } diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h index 03fea9469f01..ed5b465bc664 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h @@ -18,6 +18,12 @@ #define AQ_HW_MAC_COUNTER_HZ 312500000ll #define AQ_HW_PHY_COUNTER_HZ 160000000ll +enum aq_tc_mode { + AQ_TC_MODE_INVALID = -1, + AQ_TC_MODE_8TCS, + AQ_TC_MODE_4TCS, +}; + #define AQ_RX_FIRST_LOC_FVLANID 0U #define AQ_RX_LAST_LOC_FVLANID 15U #define AQ_RX_FIRST_LOC_FETHERT 16U @@ -29,6 +35,9 @@ (AQ_RX_LAST_LOC_FVLANID - AQ_RX_FIRST_LOC_FVLANID + 1U) #define AQ_RX_QUEUE_NOT_ASSIGNED 0xFFU +/* Used for rate to Mbps conversion */ +#define AQ_MBPS_DIVISOR 125000 /* 1000000 / 8 */ + /* NIC H/W capabilities */ struct aq_hw_caps_s { u64 hw_features; @@ -46,7 +55,7 @@ struct aq_hw_caps_s { u32 mac_regs_count; u32 hw_alive_check_addr; u8 msix_irqs; - u8 tcs; + u8 tcs_max; u8 rxd_alignment; u8 rxd_size; u8 txd_alignment; @@ -118,8 +127,11 @@ struct aq_stats_s { #define AQ_HW_TXD_MULTIPLE 8U #define AQ_HW_RXD_MULTIPLE 8U +#define AQ_HW_QUEUES_MAX 32U #define AQ_HW_MULTICAST_ADDRESS_MAX 32U +#define AQ_HW_PTP_TC 2U + #define AQ_HW_LED_BLINK 0x2U #define AQ_HW_LED_DEFAULT 0x0U @@ -268,6 +280,8 @@ struct aq_hw_ops { int (*hw_rss_hash_set)(struct aq_hw_s *self, struct aq_rss_parameters *rss_params); + int (*hw_tc_rate_limit_set)(struct aq_hw_s *self); + int (*hw_get_regs)(struct aq_hw_s *self, const struct aq_hw_caps_s *aq_hw_caps, u32 *regs_buff); @@ -279,10 +293,6 @@ struct aq_hw_ops { int (*hw_set_offload)(struct aq_hw_s *self, struct aq_nic_cfg_s *aq_nic_cfg); - int (*hw_tx_tc_mode_get)(struct aq_hw_s *self, u32 *tc_mode); - - int (*hw_rx_tc_mode_get)(struct aq_hw_s *self, u32 *tc_mode); - int (*hw_ring_hwts_rx_fill)(struct aq_hw_s *self, struct aq_ring_s *aq_ring); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.c b/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.c index 7dbf49adcea6..342c5179f846 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.c @@ -79,3 +79,29 @@ int aq_hw_err_from_flags(struct aq_hw_s *hw) err_exit: return err; } + +int aq_hw_num_tcs(struct aq_hw_s *hw) +{ + switch (hw->aq_nic_cfg->tc_mode) { + case AQ_TC_MODE_8TCS: + return 8; + case AQ_TC_MODE_4TCS: + return 4; + default: + break; + } + + return 1; +} + +int aq_hw_q_per_tc(struct aq_hw_s *hw) +{ + switch (hw->aq_nic_cfg->tc_mode) { + case AQ_TC_MODE_8TCS: + return 4; + case AQ_TC_MODE_4TCS: + return 8; + default: + return 4; + } +} diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.h index 9ef82d487e01..32aa5f2fb840 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.h @@ -34,5 +34,7 @@ u32 aq_hw_read_reg(struct aq_hw_s *hw, u32 reg); void aq_hw_write_reg(struct aq_hw_s *hw, u32 reg, u32 value); u64 aq_hw_read_reg64(struct aq_hw_s *hw, u32 reg); int aq_hw_err_from_flags(struct aq_hw_s *hw); +int aq_hw_num_tcs(struct aq_hw_s *hw); +int aq_hw_q_per_tc(struct aq_hw_s *hw); #endif /* AQ_HW_UTILS_H */ diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c index 91870ceaf3fe..4a6dfac857ca 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c @@ -478,7 +478,7 @@ static int aq_mdo_add_secy(struct macsec_context *ctx) set_bit(txsc_idx, &cfg->txsc_idx_busy); - return 0; + return ret; } static int aq_mdo_upd_secy(struct macsec_context *ctx) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c index 9fcab646cbd5..8a1da044e908 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c @@ -12,11 +12,13 @@ #include "aq_ethtool.h" #include "aq_ptp.h" #include "aq_filters.h" +#include "aq_hw_utils.h" #include <linux/netdevice.h> #include <linux/module.h> #include <linux/ip.h> #include <linux/udp.h> +#include <net/pkt_cls.h> MODULE_LICENSE("GPL v2"); MODULE_AUTHOR(AQ_CFG_DRV_AUTHOR); @@ -38,7 +40,7 @@ struct net_device *aq_ndev_alloc(void) struct net_device *ndev = NULL; struct aq_nic_s *aq_nic = NULL; - ndev = alloc_etherdev_mq(sizeof(struct aq_nic_s), AQ_CFG_VECS_MAX); + ndev = alloc_etherdev_mq(sizeof(struct aq_nic_s), AQ_HW_QUEUES_MAX); if (!ndev) return NULL; @@ -330,6 +332,73 @@ static int aq_ndo_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, return 0; } +static int aq_validate_mqprio_opt(struct aq_nic_s *self, + struct tc_mqprio_qopt_offload *mqprio, + const unsigned int num_tc) +{ + const bool has_min_rate = !!(mqprio->flags & TC_MQPRIO_F_MIN_RATE); + struct aq_nic_cfg_s *aq_nic_cfg = aq_nic_get_cfg(self); + const unsigned int tcs_max = min_t(u8, aq_nic_cfg->aq_hw_caps->tcs_max, + AQ_CFG_TCS_MAX); + + if (num_tc > tcs_max) { + netdev_err(self->ndev, "Too many TCs requested\n"); + return -EOPNOTSUPP; + } + + if (num_tc != 0 && !is_power_of_2(num_tc)) { + netdev_err(self->ndev, "TC count should be power of 2\n"); + return -EOPNOTSUPP; + } + + if (has_min_rate && !ATL_HW_IS_CHIP_FEATURE(self->aq_hw, ANTIGUA)) { + netdev_err(self->ndev, "Min tx rate is not supported\n"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int aq_ndo_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + struct tc_mqprio_qopt_offload *mqprio = type_data; + struct aq_nic_s *aq_nic = netdev_priv(dev); + bool has_min_rate; + bool has_max_rate; + int err; + int i; + + if (type != TC_SETUP_QDISC_MQPRIO) + return -EOPNOTSUPP; + + has_min_rate = !!(mqprio->flags & TC_MQPRIO_F_MIN_RATE); + has_max_rate = !!(mqprio->flags & TC_MQPRIO_F_MAX_RATE); + + err = aq_validate_mqprio_opt(aq_nic, mqprio, mqprio->qopt.num_tc); + if (err) + return err; + + for (i = 0; i < mqprio->qopt.num_tc; i++) { + if (has_max_rate) { + u64 max_rate = mqprio->max_rate[i]; + + do_div(max_rate, AQ_MBPS_DIVISOR); + aq_nic_setup_tc_max_rate(aq_nic, i, (u32)max_rate); + } + + if (has_min_rate) { + u64 min_rate = mqprio->min_rate[i]; + + do_div(min_rate, AQ_MBPS_DIVISOR); + aq_nic_setup_tc_min_rate(aq_nic, i, (u32)min_rate); + } + } + + return aq_nic_setup_tc_mqprio(aq_nic, mqprio->qopt.num_tc, + mqprio->qopt.prio_tc_map); +} + static const struct net_device_ops aq_ndev_ops = { .ndo_open = aq_ndev_open, .ndo_stop = aq_ndev_close, @@ -341,6 +410,7 @@ static const struct net_device_ops aq_ndev_ops = { .ndo_do_ioctl = aq_ndev_ioctl, .ndo_vlan_rx_add_vid = aq_ndo_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = aq_ndo_vlan_rx_kill_vid, + .ndo_setup_tc = aq_ndo_setup_tc, }; static int __init aq_ndev_init_module(void) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 1c6d12deb47a..4435c6374f7e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -26,6 +26,7 @@ #include <linux/ip.h> #include <linux/tcp.h> #include <net/ip.h> +#include <net/pkt_cls.h> static unsigned int aq_itr = AQ_CFG_INTERRUPT_MODERATION_AUTO; module_param_named(aq_itr, aq_itr, uint, 0644); @@ -64,10 +65,38 @@ static void aq_nic_rss_init(struct aq_nic_s *self, unsigned int num_rss_queues) rss_params->indirection_table[i] = i & (num_rss_queues - 1); } +/* Recalculate the number of vectors */ +static void aq_nic_cfg_update_num_vecs(struct aq_nic_s *self) +{ + struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg; + + cfg->vecs = min(cfg->aq_hw_caps->vecs, AQ_CFG_VECS_DEF); + cfg->vecs = min(cfg->vecs, num_online_cpus()); + if (self->irqvecs > AQ_HW_SERVICE_IRQS) + cfg->vecs = min(cfg->vecs, self->irqvecs - AQ_HW_SERVICE_IRQS); + /* cfg->vecs should be power of 2 for RSS */ + cfg->vecs = rounddown_pow_of_two(cfg->vecs); + + if (ATL_HW_IS_CHIP_FEATURE(self->aq_hw, ANTIGUA)) { + if (cfg->tcs > 2) + cfg->vecs = min(cfg->vecs, 4U); + } + + if (cfg->vecs <= 4) + cfg->tc_mode = AQ_TC_MODE_8TCS; + else + cfg->tc_mode = AQ_TC_MODE_4TCS; + + /*rss rings */ + cfg->num_rss_queues = min(cfg->vecs, AQ_CFG_NUM_RSS_QUEUES_DEF); + aq_nic_rss_init(self, cfg->num_rss_queues); +} + /* Checks hw_caps and 'corrects' aq_nic_cfg in runtime */ void aq_nic_cfg_start(struct aq_nic_s *self) { struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg; + int i; cfg->tcs = AQ_CFG_TCS_DEF; @@ -79,7 +108,6 @@ void aq_nic_cfg_start(struct aq_nic_s *self) cfg->rxpageorder = AQ_CFG_RX_PAGEORDER; cfg->is_rss = AQ_CFG_IS_RSS_DEF; - cfg->num_rss_queues = AQ_CFG_NUM_RSS_QUEUES_DEF; cfg->aq_rss.base_cpu_number = AQ_CFG_RSS_BASE_CPU_NUM_DEF; cfg->fc.req = AQ_CFG_FC_MODE; cfg->wol = AQ_CFG_WOL_MODES; @@ -89,29 +117,13 @@ void aq_nic_cfg_start(struct aq_nic_s *self) cfg->is_autoneg = AQ_CFG_IS_AUTONEG_DEF; cfg->is_lro = AQ_CFG_IS_LRO_DEF; + cfg->is_ptp = true; /*descriptors */ cfg->rxds = min(cfg->aq_hw_caps->rxds_max, AQ_CFG_RXDS_DEF); cfg->txds = min(cfg->aq_hw_caps->txds_max, AQ_CFG_TXDS_DEF); - /*rss rings */ - cfg->vecs = min(cfg->aq_hw_caps->vecs, AQ_CFG_VECS_DEF); - cfg->vecs = min(cfg->vecs, num_online_cpus()); - if (self->irqvecs > AQ_HW_SERVICE_IRQS) - cfg->vecs = min(cfg->vecs, self->irqvecs - AQ_HW_SERVICE_IRQS); - /* cfg->vecs should be power of 2 for RSS */ - if (cfg->vecs >= 8U) - cfg->vecs = 8U; - else if (cfg->vecs >= 4U) - cfg->vecs = 4U; - else if (cfg->vecs >= 2U) - cfg->vecs = 2U; - else - cfg->vecs = 1U; - - cfg->num_rss_queues = min(cfg->vecs, AQ_CFG_NUM_RSS_QUEUES_DEF); - - aq_nic_rss_init(self, cfg->num_rss_queues); + aq_nic_cfg_update_num_vecs(self); cfg->irq_type = aq_pci_func_get_irq_type(self); @@ -136,6 +148,9 @@ void aq_nic_cfg_start(struct aq_nic_s *self) cfg->is_vlan_rx_strip = !!(cfg->features & NETIF_F_HW_VLAN_CTAG_RX); cfg->is_vlan_tx_insert = !!(cfg->features & NETIF_F_HW_VLAN_CTAG_TX); cfg->is_vlan_force_promisc = true; + + for (i = 0; i < sizeof(cfg->prio_tc_map); i++) + cfg->prio_tc_map[i] = cfg->tcs * i / 8; } static int aq_nic_update_link_status(struct aq_nic_s *self) @@ -181,6 +196,9 @@ static int aq_nic_update_link_status(struct aq_nic_s *self) #if IS_ENABLED(CONFIG_MACSEC) aq_macsec_enable(self); #endif + if (self->aq_hw_ops->hw_tc_rate_limit_set) + self->aq_hw_ops->hw_tc_rate_limit_set(self->aq_hw); + netif_tx_wake_all_queues(self->ndev); } if (netif_carrier_ok(self->ndev) && !self->link_status.mbps) { @@ -399,21 +417,29 @@ int aq_nic_init(struct aq_nic_s *self) err = aq_phy_init(self->aq_hw); } - for (i = 0U, aq_vec = self->aq_vec[0]; - self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i]) + for (i = 0U; i < self->aq_vecs; i++) { + aq_vec = self->aq_vec[i]; + err = aq_vec_ring_alloc(aq_vec, self, i, + aq_nic_get_cfg(self)); + if (err) + goto err_exit; + aq_vec_init(aq_vec, self->aq_hw_ops, self->aq_hw); + } - err = aq_ptp_init(self, self->irqvecs - 1); - if (err < 0) - goto err_exit; + if (aq_nic_get_cfg(self)->is_ptp) { + err = aq_ptp_init(self, self->irqvecs - 1); + if (err < 0) + goto err_exit; - err = aq_ptp_ring_alloc(self); - if (err < 0) - goto err_exit; + err = aq_ptp_ring_alloc(self); + if (err < 0) + goto err_exit; - err = aq_ptp_ring_init(self); - if (err < 0) - goto err_exit; + err = aq_ptp_ring_init(self); + if (err < 0) + goto err_exit; + } netif_carrier_off(self->ndev); @@ -424,9 +450,12 @@ err_exit: int aq_nic_start(struct aq_nic_s *self) { struct aq_vec_s *aq_vec = NULL; + struct aq_nic_cfg_s *cfg; unsigned int i = 0U; int err = 0; + cfg = aq_nic_get_cfg(self); + err = self->aq_hw_ops->hw_multicast_list_set(self->aq_hw, self->mc_list.ar, self->mc_list.count); @@ -464,7 +493,7 @@ int aq_nic_start(struct aq_nic_s *self) timer_setup(&self->service_timer, aq_nic_service_timer_cb, 0); aq_nic_service_timer_cb(&self->service_timer); - if (self->aq_nic_cfg.is_polling) { + if (cfg->is_polling) { timer_setup(&self->polling_timer, aq_nic_polling_timer_cb, 0); mod_timer(&self->polling_timer, jiffies + AQ_CFG_POLLING_TIMER_INTERVAL); @@ -482,16 +511,16 @@ int aq_nic_start(struct aq_nic_s *self) if (err < 0) goto err_exit; - if (self->aq_nic_cfg.link_irq_vec) { + if (cfg->link_irq_vec) { int irqvec = pci_irq_vector(self->pdev, - self->aq_nic_cfg.link_irq_vec); + cfg->link_irq_vec); err = request_threaded_irq(irqvec, NULL, aq_linkstate_threaded_isr, IRQF_SHARED | IRQF_ONESHOT, self->ndev->name, self); if (err < 0) goto err_exit; - self->msix_entry_mask |= (1 << self->aq_nic_cfg.link_irq_vec); + self->msix_entry_mask |= (1 << cfg->link_irq_vec); } err = self->aq_hw_ops->hw_irq_enable(self->aq_hw, @@ -500,14 +529,21 @@ int aq_nic_start(struct aq_nic_s *self) goto err_exit; } - err = netif_set_real_num_tx_queues(self->ndev, self->aq_vecs); + err = netif_set_real_num_tx_queues(self->ndev, + self->aq_vecs * cfg->tcs); if (err < 0) goto err_exit; - err = netif_set_real_num_rx_queues(self->ndev, self->aq_vecs); + err = netif_set_real_num_rx_queues(self->ndev, + self->aq_vecs * cfg->tcs); if (err < 0) goto err_exit; + for (i = 0; i < cfg->tcs; i++) { + u16 offset = self->aq_vecs * i; + + netdev_set_tc_queue(self->ndev, i, self->aq_vecs, offset); + } netif_tx_start_all_queues(self->ndev); err_exit: @@ -518,6 +554,8 @@ unsigned int aq_nic_map_skb(struct aq_nic_s *self, struct sk_buff *skb, struct aq_ring_s *ring) { unsigned int nr_frags = skb_shinfo(skb)->nr_frags; + struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(self); + struct device *dev = aq_nic_get_dev(self); struct aq_ring_buff_s *first = NULL; u8 ipver = ip_hdr(skb)->version; struct aq_ring_buff_s *dx_buff; @@ -559,7 +597,7 @@ unsigned int aq_nic_map_skb(struct aq_nic_s *self, struct sk_buff *skb, need_context_tag = true; } - if (self->aq_nic_cfg.is_vlan_tx_insert && skb_vlan_tag_present(skb)) { + if (cfg->is_vlan_tx_insert && skb_vlan_tag_present(skb)) { dx_buff->vlan_tx_tag = skb_vlan_tag_get(skb); dx_buff->len_pkt = skb->len; dx_buff->is_vlan = 1U; @@ -574,12 +612,12 @@ unsigned int aq_nic_map_skb(struct aq_nic_s *self, struct sk_buff *skb, } dx_buff->len = skb_headlen(skb); - dx_buff->pa = dma_map_single(aq_nic_get_dev(self), + dx_buff->pa = dma_map_single(dev, skb->data, dx_buff->len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(aq_nic_get_dev(self), dx_buff->pa))) { + if (unlikely(dma_mapping_error(dev, dx_buff->pa))) { ret = 0; goto exit; } @@ -611,13 +649,13 @@ unsigned int aq_nic_map_skb(struct aq_nic_s *self, struct sk_buff *skb, else buff_size = frag_len; - frag_pa = skb_frag_dma_map(aq_nic_get_dev(self), + frag_pa = skb_frag_dma_map(dev, frag, buff_offset, buff_size, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(aq_nic_get_dev(self), + if (unlikely(dma_mapping_error(dev, frag_pa))) goto mapping_error; @@ -651,12 +689,12 @@ mapping_error: if (!(dx_buff->is_gso_tcp || dx_buff->is_gso_udp) && !dx_buff->is_vlan && dx_buff->pa) { if (unlikely(dx_buff->is_sop)) { - dma_unmap_single(aq_nic_get_dev(self), + dma_unmap_single(dev, dx_buff->pa, dx_buff->len, DMA_TO_DEVICE); } else { - dma_unmap_page(aq_nic_get_dev(self), + dma_unmap_page(dev, dx_buff->pa, dx_buff->len, DMA_TO_DEVICE); @@ -670,15 +708,16 @@ exit: int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb) { - unsigned int vec = skb->queue_mapping % self->aq_nic_cfg.vecs; + struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(self); + unsigned int vec = skb->queue_mapping % cfg->vecs; + unsigned int tc = skb->queue_mapping / cfg->vecs; struct aq_ring_s *ring = NULL; unsigned int frags = 0U; int err = NETDEV_TX_OK; - unsigned int tc = 0U; frags = skb_shinfo(skb)->nr_frags + 1; - ring = self->aq_ring_tx[AQ_NIC_TCVEC2RING(self, tc, vec)]; + ring = self->aq_ring_tx[AQ_NIC_CFG_TCVEC2RING(cfg, tc, vec)]; if (frags > AQ_CFG_SKB_FRAGS_MAX) { dev_kfree_skb_any(skb); @@ -687,13 +726,14 @@ int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb) aq_ring_update_queue_state(ring); - if (self->aq_nic_cfg.priv_flags & BIT(AQ_HW_LOOPBACK_DMA_NET)) { + if (cfg->priv_flags & BIT(AQ_HW_LOOPBACK_DMA_NET)) { err = NETDEV_TX_BUSY; goto err_exit; } /* Above status update may stop the queue. Check this. */ - if (__netif_subqueue_stopped(self->ndev, ring->idx)) { + if (__netif_subqueue_stopped(self->ndev, + AQ_NIC_RING2QMAP(self, ring->idx))) { err = NETDEV_TX_BUSY; goto err_exit; } @@ -823,6 +863,7 @@ u64 *aq_nic_get_stats(struct aq_nic_s *self, u64 *data) struct aq_stats_s *stats; unsigned int count = 0U; unsigned int i = 0U; + unsigned int tc; if (self->aq_fw_ops->update_stats) { mutex_lock(&self->fwreq_mutex); @@ -861,10 +902,13 @@ u64 *aq_nic_get_stats(struct aq_nic_s *self, u64 *data) data += i; - for (i = 0U, aq_vec = self->aq_vec[0]; - aq_vec && self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i]) { - data += count; - aq_vec_get_sw_stats(aq_vec, data, &count); + for (tc = 0U; tc < self->aq_nic_cfg.tcs; tc++) { + for (i = 0U, aq_vec = self->aq_vec[0]; + aq_vec && self->aq_vecs > i; + ++i, aq_vec = self->aq_vec[i]) { + data += count; + aq_vec_get_sw_stats(aq_vec, tc, data, &count); + } } data += count; @@ -1145,9 +1189,11 @@ void aq_nic_deinit(struct aq_nic_s *self, bool link_down) if (!self) goto err_exit; - for (i = 0U, aq_vec = self->aq_vec[0]; - self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i]) + for (i = 0U; i < self->aq_vecs; i++) { + aq_vec = self->aq_vec[i]; aq_vec_deinit(aq_vec); + aq_vec_ring_free(aq_vec); + } aq_ptp_unregister(self); aq_ptp_ring_deinit(self); @@ -1180,6 +1226,22 @@ void aq_nic_free_vectors(struct aq_nic_s *self) err_exit:; } +int aq_nic_realloc_vectors(struct aq_nic_s *self) +{ + struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(self); + + aq_nic_free_vectors(self); + + for (self->aq_vecs = 0; self->aq_vecs < cfg->vecs; self->aq_vecs++) { + self->aq_vec[self->aq_vecs] = aq_vec_alloc(self, self->aq_vecs, + cfg); + if (unlikely(!self->aq_vec[self->aq_vecs])) + return -ENOMEM; + } + + return 0; +} + void aq_nic_shutdown(struct aq_nic_s *self) { int err = 0; @@ -1245,3 +1307,98 @@ void aq_nic_release_filter(struct aq_nic_s *self, enum aq_rx_filter_type type, break; } } + +int aq_nic_setup_tc_mqprio(struct aq_nic_s *self, u32 tcs, u8 *prio_tc_map) +{ + struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg; + const unsigned int prev_vecs = cfg->vecs; + bool ndev_running; + int err = 0; + int i; + + /* if already the same configuration or + * disable request (tcs is 0) and we already is disabled + */ + if (tcs == cfg->tcs || (tcs == 0 && !cfg->is_qos)) + return 0; + + ndev_running = netif_running(self->ndev); + if (ndev_running) + dev_close(self->ndev); + + cfg->tcs = tcs; + if (cfg->tcs == 0) + cfg->tcs = 1; + if (prio_tc_map) + memcpy(cfg->prio_tc_map, prio_tc_map, sizeof(cfg->prio_tc_map)); + else + for (i = 0; i < sizeof(cfg->prio_tc_map); i++) + cfg->prio_tc_map[i] = cfg->tcs * i / 8; + + cfg->is_qos = (tcs != 0 ? true : false); + cfg->is_ptp = (cfg->tcs <= AQ_HW_PTP_TC); + if (!cfg->is_ptp) + netdev_warn(self->ndev, "%s\n", + "PTP is auto disabled due to requested TC count."); + + netdev_set_num_tc(self->ndev, cfg->tcs); + + /* Changing the number of TCs might change the number of vectors */ + aq_nic_cfg_update_num_vecs(self); + if (prev_vecs != cfg->vecs) { + err = aq_nic_realloc_vectors(self); + if (err) + goto err_exit; + } + + if (ndev_running) + err = dev_open(self->ndev, NULL); + +err_exit: + return err; +} + +int aq_nic_setup_tc_max_rate(struct aq_nic_s *self, const unsigned int tc, + const u32 max_rate) +{ + struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg; + + if (tc >= AQ_CFG_TCS_MAX) + return -EINVAL; + + if (max_rate && max_rate < 10) { + netdev_warn(self->ndev, + "Setting %s to the minimum usable value of %dMbps.\n", + "max rate", 10); + cfg->tc_max_rate[tc] = 10; + } else { + cfg->tc_max_rate[tc] = max_rate; + } + + return 0; +} + +int aq_nic_setup_tc_min_rate(struct aq_nic_s *self, const unsigned int tc, + const u32 min_rate) +{ + struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg; + + if (tc >= AQ_CFG_TCS_MAX) + return -EINVAL; + + if (min_rate) + set_bit(tc, &cfg->tc_min_rate_msk); + else + clear_bit(tc, &cfg->tc_min_rate_msk); + + if (min_rate && min_rate < 20) { + netdev_warn(self->ndev, + "Setting %s to the minimum usable value of %dMbps.\n", + "min rate", 20); + cfg->tc_min_rate[tc] = 20; + } else { + cfg->tc_min_rate[tc] = min_rate; + } + + return 0; +} diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h index 0663b8d0220d..2ab003065e62 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h @@ -59,8 +59,15 @@ struct aq_nic_cfg_s { bool is_polling; bool is_rss; bool is_lro; + bool is_qos; + bool is_ptp; + enum aq_tc_mode tc_mode; u32 priv_flags; u8 tcs; + u8 prio_tc_map[8]; + u32 tc_max_rate[AQ_CFG_TCS_MAX]; + unsigned long tc_min_rate_msk; + u32 tc_min_rate[AQ_CFG_TCS_MAX]; struct aq_rss_parameters aq_rss; u32 eee_speeds; }; @@ -77,8 +84,16 @@ struct aq_nic_cfg_s { #define AQ_NIC_WOL_MODES (WAKE_MAGIC |\ WAKE_PHY) -#define AQ_NIC_TCVEC2RING(_NIC_, _TC_, _VEC_) \ - ((_TC_) * AQ_CFG_TCS_MAX + (_VEC_)) +#define AQ_NIC_CFG_RING_PER_TC(_NIC_CFG_) \ + (((_NIC_CFG_)->tc_mode == AQ_TC_MODE_4TCS) ? 8 : 4) + +#define AQ_NIC_CFG_TCVEC2RING(_NIC_CFG_, _TC_, _VEC_) \ + ((_TC_) * AQ_NIC_CFG_RING_PER_TC(_NIC_CFG_) + (_VEC_)) + +#define AQ_NIC_RING2QMAP(_NIC_, _ID_) \ + ((_ID_) / AQ_NIC_CFG_RING_PER_TC(&(_NIC_)->aq_nic_cfg) * \ + (_NIC_)->aq_vecs + \ + ((_ID_) % AQ_NIC_CFG_RING_PER_TC(&(_NIC_)->aq_nic_cfg))) struct aq_hw_rx_fl2 { struct aq_rx_filter_vlan aq_vlans[AQ_VLAN_MAX_FILTERS]; @@ -104,7 +119,7 @@ struct aq_nic_s { atomic_t flags; u32 msg_enable; struct aq_vec_s *aq_vec[AQ_CFG_VECS_MAX]; - struct aq_ring_s *aq_ring_tx[AQ_CFG_VECS_MAX * AQ_CFG_TCS_MAX]; + struct aq_ring_s *aq_ring_tx[AQ_HW_QUEUES_MAX]; struct aq_hw_s *aq_hw; struct net_device *ndev; unsigned int aq_vecs; @@ -164,6 +179,7 @@ void aq_nic_deinit(struct aq_nic_s *self, bool link_down); void aq_nic_set_power(struct aq_nic_s *self); void aq_nic_free_hot_resources(struct aq_nic_s *self); void aq_nic_free_vectors(struct aq_nic_s *self); +int aq_nic_realloc_vectors(struct aq_nic_s *self); int aq_nic_set_mtu(struct aq_nic_s *self, int new_mtu); int aq_nic_set_mac(struct aq_nic_s *self, struct net_device *ndev); int aq_nic_set_packet_filter(struct aq_nic_s *self, unsigned int flags); @@ -181,4 +197,9 @@ void aq_nic_shutdown(struct aq_nic_s *self); u8 aq_nic_reserve_filter(struct aq_nic_s *self, enum aq_rx_filter_type type); void aq_nic_release_filter(struct aq_nic_s *self, enum aq_rx_filter_type type, u32 location); +int aq_nic_setup_tc_mqprio(struct aq_nic_s *self, u32 tcs, u8 *prio_tc_map); +int aq_nic_setup_tc_max_rate(struct aq_nic_s *self, const unsigned int tc, + const u32 max_rate); +int aq_nic_setup_tc_min_rate(struct aq_nic_s *self, const unsigned int tc, + const u32 min_rate); #endif /* AQ_NIC_H */ diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index d10fff8a8c71..41c0f560f95b 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -431,6 +431,9 @@ static int atl_resume_common(struct device *dev, bool deep) netif_tx_start_all_queues(nic->ndev); err_exit: + if (ret < 0) + aq_nic_deinit(nic, true); + rtnl_unlock(); return ret; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c index 58e8c641e8b3..599ced261b2a 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c @@ -945,26 +945,29 @@ void aq_ptp_ring_deinit(struct aq_nic_s *aq_nic) #define PTP_4TC_RING_IDX 16 #define PTP_HWST_RING_IDX 31 +/* Index must be 8 (8 TCs) or 16 (4 TCs). + * It depends on Traffic Class mode. + */ +static unsigned int ptp_ring_idx(const enum aq_tc_mode tc_mode) +{ + if (tc_mode == AQ_TC_MODE_8TCS) + return PTP_8TC_RING_IDX; + + return PTP_4TC_RING_IDX; +} + int aq_ptp_ring_alloc(struct aq_nic_s *aq_nic) { struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp; unsigned int tx_ring_idx, rx_ring_idx; struct aq_ring_s *hwts; - u32 tx_tc_mode, rx_tc_mode; struct aq_ring_s *ring; int err; if (!aq_ptp) return 0; - /* Index must to be 8 (8 TCs) or 16 (4 TCs). - * It depends from Traffic Class mode. - */ - aq_nic->aq_hw_ops->hw_tx_tc_mode_get(aq_nic->aq_hw, &tx_tc_mode); - if (tx_tc_mode == 0) - tx_ring_idx = PTP_8TC_RING_IDX; - else - tx_ring_idx = PTP_4TC_RING_IDX; + tx_ring_idx = ptp_ring_idx(aq_nic->aq_nic_cfg.tc_mode); ring = aq_ring_tx_alloc(&aq_ptp->ptp_tx, aq_nic, tx_ring_idx, &aq_nic->aq_nic_cfg); @@ -973,11 +976,7 @@ int aq_ptp_ring_alloc(struct aq_nic_s *aq_nic) goto err_exit; } - aq_nic->aq_hw_ops->hw_rx_tc_mode_get(aq_nic->aq_hw, &rx_tc_mode); - if (rx_tc_mode == 0) - rx_ring_idx = PTP_8TC_RING_IDX; - else - rx_ring_idx = PTP_4TC_RING_IDX; + rx_ring_idx = ptp_ring_idx(aq_nic->aq_nic_cfg.tc_mode); ring = aq_ring_rx_alloc(&aq_ptp->ptp_rx, aq_nic, rx_ring_idx, &aq_nic->aq_nic_cfg); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index bae95a618560..68fdb3994088 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -232,8 +232,11 @@ void aq_ring_queue_wake(struct aq_ring_s *ring) { struct net_device *ndev = aq_nic_get_ndev(ring->aq_nic); - if (__netif_subqueue_stopped(ndev, ring->idx)) { - netif_wake_subqueue(ndev, ring->idx); + if (__netif_subqueue_stopped(ndev, + AQ_NIC_RING2QMAP(ring->aq_nic, + ring->idx))) { + netif_wake_subqueue(ndev, + AQ_NIC_RING2QMAP(ring->aq_nic, ring->idx)); ring->stats.tx.queue_restarts++; } } @@ -242,8 +245,11 @@ void aq_ring_queue_stop(struct aq_ring_s *ring) { struct net_device *ndev = aq_nic_get_ndev(ring->aq_nic); - if (!__netif_subqueue_stopped(ndev, ring->idx)) - netif_stop_subqueue(ndev, ring->idx); + if (!__netif_subqueue_stopped(ndev, + AQ_NIC_RING2QMAP(ring->aq_nic, + ring->idx))) + netif_stop_subqueue(ndev, + AQ_NIC_RING2QMAP(ring->aq_nic, ring->idx)); } bool aq_ring_tx_clean(struct aq_ring_s *self) @@ -466,7 +472,10 @@ int aq_ring_rx_clean(struct aq_ring_s *self, buff->is_hash_l4 ? PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_NONE); /* Send all PTP traffic to 0 queue */ - skb_record_rx_queue(skb, is_ptp_ring ? 0 : self->idx); + skb_record_rx_queue(skb, + is_ptp_ring ? 0 + : AQ_NIC_RING2QMAP(self->aq_nic, + self->idx)); ++self->stats.rx.packets; self->stats.rx.bytes += skb->len; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c index f40a427970dc..d1d43c8ce400 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c @@ -103,16 +103,11 @@ err_exit: struct aq_vec_s *aq_vec_alloc(struct aq_nic_s *aq_nic, unsigned int idx, struct aq_nic_cfg_s *aq_nic_cfg) { - struct aq_ring_s *ring = NULL; struct aq_vec_s *self = NULL; - unsigned int i = 0U; - int err = 0; self = kzalloc(sizeof(*self), GFP_KERNEL); - if (!self) { - err = -ENOMEM; + if (!self) goto err_exit; - } self->aq_nic = aq_nic; self->aq_ring_param.vec_idx = idx; @@ -128,10 +123,20 @@ struct aq_vec_s *aq_vec_alloc(struct aq_nic_s *aq_nic, unsigned int idx, netif_napi_add(aq_nic_get_ndev(aq_nic), &self->napi, aq_vec_poll, AQ_CFG_NAPI_WEIGHT); +err_exit: + return self; +} + +int aq_vec_ring_alloc(struct aq_vec_s *self, struct aq_nic_s *aq_nic, + unsigned int idx, struct aq_nic_cfg_s *aq_nic_cfg) +{ + struct aq_ring_s *ring = NULL; + unsigned int i = 0U; + int err = 0; + for (i = 0; i < aq_nic_cfg->tcs; ++i) { - unsigned int idx_ring = AQ_NIC_TCVEC2RING(self->nic, - self->tx_rings, - self->aq_ring_param.vec_idx); + const unsigned int idx_ring = AQ_NIC_CFG_TCVEC2RING(aq_nic_cfg, + i, idx); ring = aq_ring_tx_alloc(&self->ring[i][AQ_VEC_TX_ID], aq_nic, idx_ring, aq_nic_cfg); @@ -156,11 +161,11 @@ struct aq_vec_s *aq_vec_alloc(struct aq_nic_s *aq_nic, unsigned int idx, err_exit: if (err < 0) { - aq_vec_free(self); + aq_vec_ring_free(self); self = NULL; } - return self; + return err; } int aq_vec_init(struct aq_vec_s *self, const struct aq_hw_ops *aq_hw_ops, @@ -270,6 +275,18 @@ err_exit:; void aq_vec_free(struct aq_vec_s *self) { + if (!self) + goto err_exit; + + netif_napi_del(&self->napi); + + kfree(self); + +err_exit:; +} + +void aq_vec_ring_free(struct aq_vec_s *self) +{ struct aq_ring_s *ring = NULL; unsigned int i = 0U; @@ -279,13 +296,12 @@ void aq_vec_free(struct aq_vec_s *self) for (i = 0U, ring = self->ring[0]; self->tx_rings > i; ++i, ring = self->ring[i]) { aq_ring_free(&ring[AQ_VEC_TX_ID]); - aq_ring_free(&ring[AQ_VEC_RX_ID]); + if (i < self->rx_rings) + aq_ring_free(&ring[AQ_VEC_RX_ID]); } - netif_napi_del(&self->napi); - - kfree(self); - + self->tx_rings = 0; + self->rx_rings = 0; err_exit:; } @@ -333,16 +349,14 @@ cpumask_t *aq_vec_get_affinity_mask(struct aq_vec_s *self) return &self->aq_ring_param.affinity_mask; } -void aq_vec_add_stats(struct aq_vec_s *self, - struct aq_ring_stats_rx_s *stats_rx, - struct aq_ring_stats_tx_s *stats_tx) +static void aq_vec_add_stats(struct aq_vec_s *self, + const unsigned int tc, + struct aq_ring_stats_rx_s *stats_rx, + struct aq_ring_stats_tx_s *stats_tx) { - struct aq_ring_s *ring = NULL; - unsigned int r = 0U; + struct aq_ring_s *ring = self->ring[tc]; - for (r = 0U, ring = self->ring[0]; - self->tx_rings > r; ++r, ring = self->ring[r]) { - struct aq_ring_stats_tx_s *tx = &ring[AQ_VEC_TX_ID].stats.tx; + if (tc < self->rx_rings) { struct aq_ring_stats_rx_s *rx = &ring[AQ_VEC_RX_ID].stats.rx; stats_rx->packets += rx->packets; @@ -353,6 +367,10 @@ void aq_vec_add_stats(struct aq_vec_s *self, stats_rx->pg_losts += rx->pg_losts; stats_rx->pg_flips += rx->pg_flips; stats_rx->pg_reuses += rx->pg_reuses; + } + + if (tc < self->tx_rings) { + struct aq_ring_stats_tx_s *tx = &ring[AQ_VEC_TX_ID].stats.tx; stats_tx->packets += tx->packets; stats_tx->bytes += tx->bytes; @@ -361,7 +379,8 @@ void aq_vec_add_stats(struct aq_vec_s *self, } } -int aq_vec_get_sw_stats(struct aq_vec_s *self, u64 *data, unsigned int *p_count) +int aq_vec_get_sw_stats(struct aq_vec_s *self, const unsigned int tc, u64 *data, + unsigned int *p_count) { struct aq_ring_stats_rx_s stats_rx; struct aq_ring_stats_tx_s stats_tx; @@ -369,7 +388,8 @@ int aq_vec_get_sw_stats(struct aq_vec_s *self, u64 *data, unsigned int *p_count) memset(&stats_rx, 0U, sizeof(struct aq_ring_stats_rx_s)); memset(&stats_tx, 0U, sizeof(struct aq_ring_stats_tx_s)); - aq_vec_add_stats(self, &stats_rx, &stats_tx); + + aq_vec_add_stats(self, tc, &stats_rx, &stats_tx); /* This data should mimic aq_ethtool_queue_stat_names structure */ diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.h b/drivers/net/ethernet/aquantia/atlantic/aq_vec.h index 0fe8e0904c7f..541af85e6510 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.h @@ -25,17 +25,17 @@ irqreturn_t aq_vec_isr(int irq, void *private); irqreturn_t aq_vec_isr_legacy(int irq, void *private); struct aq_vec_s *aq_vec_alloc(struct aq_nic_s *aq_nic, unsigned int idx, struct aq_nic_cfg_s *aq_nic_cfg); +int aq_vec_ring_alloc(struct aq_vec_s *self, struct aq_nic_s *aq_nic, + unsigned int idx, struct aq_nic_cfg_s *aq_nic_cfg); int aq_vec_init(struct aq_vec_s *self, const struct aq_hw_ops *aq_hw_ops, struct aq_hw_s *aq_hw); void aq_vec_deinit(struct aq_vec_s *self); void aq_vec_free(struct aq_vec_s *self); +void aq_vec_ring_free(struct aq_vec_s *self); int aq_vec_start(struct aq_vec_s *self); void aq_vec_stop(struct aq_vec_s *self); cpumask_t *aq_vec_get_affinity_mask(struct aq_vec_s *self); -int aq_vec_get_sw_stats(struct aq_vec_s *self, u64 *data, +int aq_vec_get_sw_stats(struct aq_vec_s *self, const unsigned int tc, u64 *data, unsigned int *p_count); -void aq_vec_add_stats(struct aq_vec_s *self, - struct aq_ring_stats_rx_s *stats_rx, - struct aq_ring_stats_tx_s *stats_tx); #endif /* AQ_VEC_H */ diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c index 1b0670a8ae33..a312864969af 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c @@ -21,7 +21,7 @@ .msix_irqs = 4U, \ .irq_mask = ~0U, \ .vecs = HW_ATL_A0_RSS_MAX, \ - .tcs = HW_ATL_A0_TC_MAX, \ + .tcs_max = HW_ATL_A0_TC_MAX, \ .rxd_alignment = 1U, \ .rxd_size = HW_ATL_A0_RXD_SIZE, \ .rxds_max = HW_ATL_A0_MAX_RXD, \ @@ -136,10 +136,10 @@ static int hw_atl_a0_hw_qos_set(struct aq_hw_s *self) hw_atl_tps_tx_pkt_shed_desc_tc_arb_mode_set(self, 0U); hw_atl_tps_tx_pkt_shed_data_arb_mode_set(self, 0U); - hw_atl_tps_tx_pkt_shed_tc_data_max_credit_set(self, 0xFFF, 0U); - hw_atl_tps_tx_pkt_shed_tc_data_weight_set(self, 0x64, 0U); - hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(self, 0x50, 0U); - hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(self, 0x1E, 0U); + hw_atl_tps_tx_pkt_shed_tc_data_max_credit_set(self, 0U, 0xFFF); + hw_atl_tps_tx_pkt_shed_tc_data_weight_set(self, 0U, 0x64); + hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(self, 0U, 0x50); + hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(self, 0U, 0x1E); /* Tx buf size */ buff_size = HW_ATL_A0_TXBUF_MAX; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index fa3cd7e9954b..14d79f70cad7 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -23,7 +23,7 @@ .msix_irqs = 8U, \ .irq_mask = ~0U, \ .vecs = HW_ATL_B0_RSS_MAX, \ - .tcs = HW_ATL_B0_TC_MAX, \ + .tcs_max = HW_ATL_B0_TC_MAX, \ .rxd_alignment = 1U, \ .rxd_size = HW_ATL_B0_RXD_SIZE, \ .rxds_max = HW_ATL_B0_MAX_RXD, \ @@ -46,7 +46,8 @@ NETIF_F_HW_VLAN_CTAG_RX | \ NETIF_F_HW_VLAN_CTAG_TX | \ NETIF_F_GSO_UDP_L4 | \ - NETIF_F_GSO_PARTIAL, \ + NETIF_F_GSO_PARTIAL | \ + NETIF_F_HW_TC, \ .hw_priv_flags = IFF_UNICAST_FLT, \ .flow_control = true, \ .mtu = HW_ATL_B0_MTU_JUMBO, \ @@ -114,12 +115,34 @@ static int hw_atl_b0_set_fc(struct aq_hw_s *self, u32 fc, u32 tc) return 0; } +static int hw_atl_b0_tc_ptp_set(struct aq_hw_s *self) +{ + /* Init TC2 for PTP_TX */ + hw_atl_tpb_tx_pkt_buff_size_per_tc_set(self, HW_ATL_B0_PTP_TXBUF_SIZE, + AQ_HW_PTP_TC); + + /* Init TC2 for PTP_RX */ + hw_atl_rpb_rx_pkt_buff_size_per_tc_set(self, HW_ATL_B0_PTP_RXBUF_SIZE, + AQ_HW_PTP_TC); + /* No flow control for PTP */ + hw_atl_rpb_rx_xoff_en_per_tc_set(self, 0U, AQ_HW_PTP_TC); + + return aq_hw_err_from_flags(self); +} + static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self) { - unsigned int i_priority = 0U; - u32 buff_size = 0U; + struct aq_nic_cfg_s *cfg = self->aq_nic_cfg; + u32 tx_buff_size = HW_ATL_B0_TXBUF_MAX; + u32 rx_buff_size = HW_ATL_B0_RXBUF_MAX; + unsigned int prio = 0U; u32 tc = 0U; + if (cfg->is_ptp) { + tx_buff_size -= HW_ATL_B0_PTP_TXBUF_SIZE; + rx_buff_size -= HW_ATL_B0_PTP_RXBUF_SIZE; + } + /* TPS Descriptor rate init */ hw_atl_tps_tx_pkt_shed_desc_rate_curr_time_res_set(self, 0x0U); hw_atl_tps_tx_pkt_shed_desc_rate_lim_set(self, 0xA); @@ -127,63 +150,39 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self) /* TPS VM init */ hw_atl_tps_tx_pkt_shed_desc_vm_arb_mode_set(self, 0U); - /* TPS TC credits init */ - hw_atl_tps_tx_pkt_shed_desc_tc_arb_mode_set(self, 0U); - hw_atl_tps_tx_pkt_shed_data_arb_mode_set(self, 0U); - - tc = 0; - - /* TX Packet Scheduler Data TC0 */ - hw_atl_tps_tx_pkt_shed_tc_data_max_credit_set(self, 0xFFF, tc); - hw_atl_tps_tx_pkt_shed_tc_data_weight_set(self, 0x64, tc); - hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(self, 0x50, tc); - hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(self, 0x1E, tc); - - /* Tx buf size TC0 */ - buff_size = HW_ATL_B0_TXBUF_MAX - HW_ATL_B0_PTP_TXBUF_SIZE; - - hw_atl_tpb_tx_pkt_buff_size_per_tc_set(self, buff_size, tc); - hw_atl_tpb_tx_buff_hi_threshold_per_tc_set(self, - (buff_size * - (1024 / 32U) * 66U) / - 100U, tc); - hw_atl_tpb_tx_buff_lo_threshold_per_tc_set(self, - (buff_size * - (1024 / 32U) * 50U) / - 100U, tc); - /* Init TC2 for PTP_TX */ - tc = 2; + tx_buff_size /= cfg->tcs; + rx_buff_size /= cfg->tcs; + for (tc = 0; tc < cfg->tcs; tc++) { + u32 threshold = 0U; - hw_atl_tpb_tx_pkt_buff_size_per_tc_set(self, HW_ATL_B0_PTP_TXBUF_SIZE, - tc); + /* Tx buf size TC0 */ + hw_atl_tpb_tx_pkt_buff_size_per_tc_set(self, tx_buff_size, tc); - /* QoS Rx buf size per TC */ - tc = 0; - buff_size = HW_ATL_B0_RXBUF_MAX - HW_ATL_B0_PTP_RXBUF_SIZE; + threshold = (tx_buff_size * (1024 / 32U) * 66U) / 100U; + hw_atl_tpb_tx_buff_hi_threshold_per_tc_set(self, threshold, tc); - hw_atl_rpb_rx_pkt_buff_size_per_tc_set(self, buff_size, tc); - hw_atl_rpb_rx_buff_hi_threshold_per_tc_set(self, - (buff_size * - (1024U / 32U) * 66U) / - 100U, tc); - hw_atl_rpb_rx_buff_lo_threshold_per_tc_set(self, - (buff_size * - (1024U / 32U) * 50U) / - 100U, tc); + threshold = (tx_buff_size * (1024 / 32U) * 50U) / 100U; + hw_atl_tpb_tx_buff_lo_threshold_per_tc_set(self, threshold, tc); - hw_atl_b0_set_fc(self, self->aq_nic_cfg->fc.req, tc); + /* QoS Rx buf size per TC */ + hw_atl_rpb_rx_pkt_buff_size_per_tc_set(self, rx_buff_size, tc); - /* Init TC2 for PTP_RX */ - tc = 2; + threshold = (rx_buff_size * (1024U / 32U) * 66U) / 100U; + hw_atl_rpb_rx_buff_hi_threshold_per_tc_set(self, threshold, tc); - hw_atl_rpb_rx_pkt_buff_size_per_tc_set(self, HW_ATL_B0_PTP_RXBUF_SIZE, - tc); - /* No flow control for PTP */ - hw_atl_rpb_rx_xoff_en_per_tc_set(self, 0U, tc); + threshold = (rx_buff_size * (1024U / 32U) * 50U) / 100U; + hw_atl_rpb_rx_buff_lo_threshold_per_tc_set(self, threshold, tc); + + hw_atl_b0_set_fc(self, self->aq_nic_cfg->fc.req, tc); + } + + if (cfg->is_ptp) + hw_atl_b0_tc_ptp_set(self); /* QoS 802.1p priority -> TC mapping */ - for (i_priority = 8U; i_priority--;) - hw_atl_rpf_rpb_user_priority_tc_map_set(self, i_priority, 0U); + for (prio = 0; prio < 8; ++prio) + hw_atl_rpf_rpb_user_priority_tc_map_set(self, prio, + cfg->prio_tc_map[prio]); return aq_hw_err_from_flags(self); } @@ -311,10 +310,124 @@ int hw_atl_b0_hw_offload_set(struct aq_hw_s *self, return aq_hw_err_from_flags(self); } +static int hw_atl_b0_hw_init_tx_tc_rate_limit(struct aq_hw_s *self) +{ + static const u32 max_weight = BIT(HW_ATL_TPS_DATA_TCTWEIGHT_WIDTH) - 1; + /* Scale factor is based on the number of bits in fractional portion */ + static const u32 scale = BIT(HW_ATL_TPS_DESC_RATE_Y_WIDTH); + static const u32 frac_msk = HW_ATL_TPS_DESC_RATE_Y_MSK >> + HW_ATL_TPS_DESC_RATE_Y_SHIFT; + const u32 link_speed = self->aq_link_status.mbps; + struct aq_nic_cfg_s *nic_cfg = self->aq_nic_cfg; + unsigned long num_min_rated_tcs = 0; + u32 tc_weight[AQ_CFG_TCS_MAX]; + u32 fixed_max_credit; + u8 min_rate_msk = 0; + u32 sum_weight = 0; + int tc; + + /* By default max_credit is based upon MTU (in unit of 64b) */ + fixed_max_credit = nic_cfg->aq_hw_caps->mtu / 64; + + if (link_speed) { + min_rate_msk = nic_cfg->tc_min_rate_msk & + (BIT(nic_cfg->tcs) - 1); + num_min_rated_tcs = hweight8(min_rate_msk); + } + + /* First, calculate weights where min_rate is specified */ + if (num_min_rated_tcs) { + for (tc = 0; tc != nic_cfg->tcs; tc++) { + if (!nic_cfg->tc_min_rate[tc]) { + tc_weight[tc] = 0; + continue; + } + + tc_weight[tc] = (-1L + link_speed + + nic_cfg->tc_min_rate[tc] * + max_weight) / + link_speed; + tc_weight[tc] = min(tc_weight[tc], max_weight); + sum_weight += tc_weight[tc]; + } + } + + /* WSP, if min_rate is set for at least one TC. + * RR otherwise. + */ + hw_atl_tps_tx_pkt_shed_data_arb_mode_set(self, min_rate_msk ? 1U : 0U); + /* Data TC Arbiter takes precedence over Descriptor TC Arbiter, + * leave Descriptor TC Arbiter as RR. + */ + hw_atl_tps_tx_pkt_shed_desc_tc_arb_mode_set(self, 0U); + + hw_atl_tps_tx_desc_rate_mode_set(self, nic_cfg->is_qos ? 1U : 0U); + + for (tc = 0; tc != nic_cfg->tcs; tc++) { + const u32 en = (nic_cfg->tc_max_rate[tc] != 0) ? 1U : 0U; + const u32 desc = AQ_NIC_CFG_TCVEC2RING(nic_cfg, tc, 0); + u32 weight, max_credit; + + hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(self, tc, + fixed_max_credit); + hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(self, tc, 0x1E); + + if (num_min_rated_tcs) { + weight = tc_weight[tc]; + + if (!weight && sum_weight < max_weight) + weight = (max_weight - sum_weight) / + (nic_cfg->tcs - num_min_rated_tcs); + else if (!weight) + weight = 0x64; + + max_credit = max(8 * weight, fixed_max_credit); + } else { + weight = 0x64; + max_credit = 0xFFF; + } + + hw_atl_tps_tx_pkt_shed_tc_data_weight_set(self, tc, weight); + hw_atl_tps_tx_pkt_shed_tc_data_max_credit_set(self, tc, + max_credit); + + hw_atl_tps_tx_desc_rate_en_set(self, desc, en); + + if (en) { + /* Nominal rate is always 10G */ + const u32 rate = 10000U * scale / + nic_cfg->tc_max_rate[tc]; + const u32 rate_int = rate >> + HW_ATL_TPS_DESC_RATE_Y_WIDTH; + const u32 rate_frac = rate & frac_msk; + + hw_atl_tps_tx_desc_rate_x_set(self, desc, rate_int); + hw_atl_tps_tx_desc_rate_y_set(self, desc, rate_frac); + } else { + /* A value of 1 indicates the queue is not + * rate controlled. + */ + hw_atl_tps_tx_desc_rate_x_set(self, desc, 1U); + hw_atl_tps_tx_desc_rate_y_set(self, desc, 0U); + } + } + for (tc = nic_cfg->tcs; tc != AQ_CFG_TCS_MAX; tc++) { + const u32 desc = AQ_NIC_CFG_TCVEC2RING(nic_cfg, tc, 0); + + hw_atl_tps_tx_desc_rate_en_set(self, desc, 0U); + hw_atl_tps_tx_desc_rate_x_set(self, desc, 1U); + hw_atl_tps_tx_desc_rate_y_set(self, desc, 0U); + } + + return aq_hw_err_from_flags(self); +} + static int hw_atl_b0_hw_init_tx_path(struct aq_hw_s *self) { + struct aq_nic_cfg_s *nic_cfg = self->aq_nic_cfg; + /* Tx TC/Queue number config */ - hw_atl_tpb_tps_tx_tc_mode_set(self, 1U); + hw_atl_tpb_tps_tx_tc_mode_set(self, nic_cfg->tc_mode); hw_atl_thm_lso_tcp_flag_of_first_pkt_set(self, 0x0FF6U); hw_atl_thm_lso_tcp_flag_of_middle_pkt_set(self, 0x0FF6U); @@ -334,20 +447,32 @@ static int hw_atl_b0_hw_init_tx_path(struct aq_hw_s *self) return aq_hw_err_from_flags(self); } +void hw_atl_b0_hw_init_rx_rss_ctrl1(struct aq_hw_s *self) +{ + struct aq_nic_cfg_s *cfg = self->aq_nic_cfg; + u32 rss_ctrl1 = HW_ATL_RSS_DISABLED; + + if (cfg->is_rss) + rss_ctrl1 = (cfg->tc_mode == AQ_TC_MODE_8TCS) ? + HW_ATL_RSS_ENABLED_8TCS_2INDEX_BITS : + HW_ATL_RSS_ENABLED_4TCS_3INDEX_BITS; + + hw_atl_reg_rx_flr_rss_control1set(self, rss_ctrl1); +} + static int hw_atl_b0_hw_init_rx_path(struct aq_hw_s *self) { struct aq_nic_cfg_s *cfg = self->aq_nic_cfg; int i; /* Rx TC/RSS number config */ - hw_atl_rpb_rpf_rx_traf_class_mode_set(self, 1U); + hw_atl_rpb_rpf_rx_traf_class_mode_set(self, cfg->tc_mode); /* Rx flow control */ hw_atl_rpb_rx_flow_ctl_mode_set(self, 1U); /* RSS Ring selection */ - hw_atl_reg_rx_flr_rss_control1set(self, cfg->is_rss ? - 0xB3333333U : 0x00000000U); + hw_atl_b0_hw_init_rx_rss_ctrl1(self); /* Multicast filters */ for (i = HW_ATL_B0_MAC_MAX; i--;) { @@ -1078,18 +1203,6 @@ int hw_atl_b0_hw_ring_rx_stop(struct aq_hw_s *self, struct aq_ring_s *ring) return aq_hw_err_from_flags(self); } -static int hw_atl_b0_tx_tc_mode_get(struct aq_hw_s *self, u32 *tc_mode) -{ - *tc_mode = hw_atl_tpb_tps_tx_tc_mode_get(self); - return aq_hw_err_from_flags(self); -} - -static int hw_atl_b0_rx_tc_mode_get(struct aq_hw_s *self, u32 *tc_mode) -{ - *tc_mode = hw_atl_rpb_rpf_rx_traf_class_mode_get(self); - return aq_hw_err_from_flags(self); -} - #define get_ptp_ts_val_u64(self, indx) \ ((u64)(hw_atl_pcs_ptp_clock_get(self, indx) & 0xffff)) @@ -1503,13 +1616,11 @@ const struct aq_hw_ops hw_atl_ops_b0 = { .hw_interrupt_moderation_set = hw_atl_b0_hw_interrupt_moderation_set, .hw_rss_set = hw_atl_b0_hw_rss_set, .hw_rss_hash_set = hw_atl_b0_hw_rss_hash_set, + .hw_tc_rate_limit_set = hw_atl_b0_hw_init_tx_tc_rate_limit, .hw_get_regs = hw_atl_utils_hw_get_regs, .hw_get_hw_stats = hw_atl_utils_get_hw_stats, .hw_get_fw_version = hw_atl_utils_get_fw_version, - .hw_tx_tc_mode_get = hw_atl_b0_tx_tc_mode_get, - .hw_rx_tc_mode_get = hw_atl_b0_rx_tc_mode_get, - .hw_ring_hwts_rx_fill = hw_atl_b0_hw_ring_hwts_rx_fill, .hw_ring_hwts_rx_receive = hw_atl_b0_hw_ring_hwts_rx_receive, diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h index b855459272ca..30f468f2084d 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h @@ -58,6 +58,8 @@ int hw_atl_b0_hw_ring_tx_head_update(struct aq_hw_s *self, int hw_atl_b0_hw_ring_tx_stop(struct aq_hw_s *self, struct aq_ring_s *ring); int hw_atl_b0_hw_ring_rx_stop(struct aq_hw_s *self, struct aq_ring_s *ring); +void hw_atl_b0_hw_init_rx_rss_ctrl1(struct aq_hw_s *self); + int hw_atl_b0_hw_mac_addr_set(struct aq_hw_s *self, u8 *mac_addr); int hw_atl_b0_hw_start(struct aq_hw_s *self); diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h index 7ab23a1751d3..cf460d61a45e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h @@ -75,7 +75,7 @@ #define HW_ATL_B0_RSS_HASHKEY_BITS 320U #define HW_ATL_B0_TCRSS_4_8 1 -#define HW_ATL_B0_TC_MAX 1U +#define HW_ATL_B0_TC_MAX 8U #define HW_ATL_B0_RSS_MAX 8U #define HW_ATL_B0_LRO_RXD_MAX 16U @@ -151,6 +151,10 @@ #define HW_ATL_B0_MAX_RXD 8184U #define HW_ATL_B0_MAX_TXD 8184U +#define HW_ATL_RSS_DISABLED 0x00000000U +#define HW_ATL_RSS_ENABLED_8TCS_2INDEX_BITS 0xA2222222U +#define HW_ATL_RSS_ENABLED_4TCS_3INDEX_BITS 0x80003333U + /* HW layer capabilities */ #endif /* HW_ATL_B0_INTERNAL_H */ diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c index 9e2d01a6aac8..3c8e8047ea1e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c @@ -754,7 +754,7 @@ void hw_atl_rpfl2_accept_all_mc_packets_set(struct aq_hw_s *aq_hw, } void hw_atl_rpf_rpb_user_priority_tc_map_set(struct aq_hw_s *aq_hw, - u32 user_priority_tc_map, u32 tc) + u32 user_priority, u32 tc) { /* register address for bitfield rx_tc_up{t}[2:0] */ static u32 rpf_rpb_rx_tc_upt_adr[8] = { @@ -773,10 +773,9 @@ void hw_atl_rpf_rpb_user_priority_tc_map_set(struct aq_hw_s *aq_hw, 0U, 4U, 8U, 12U, 16U, 20U, 24U, 28U }; - aq_hw_write_reg_bit(aq_hw, rpf_rpb_rx_tc_upt_adr[tc], - rpf_rpb_rx_tc_upt_msk[tc], - rpf_rpb_rx_tc_upt_shft[tc], - user_priority_tc_map); + aq_hw_write_reg_bit(aq_hw, rpf_rpb_rx_tc_upt_adr[user_priority], + rpf_rpb_rx_tc_upt_msk[user_priority], + rpf_rpb_rx_tc_upt_shft[user_priority], tc); } void hw_atl_rpf_rss_key_addr_set(struct aq_hw_s *aq_hw, u32 rss_key_addr) @@ -1464,8 +1463,8 @@ void hw_atl_tps_tx_pkt_shed_desc_tc_arb_mode_set(struct aq_hw_s *aq_hw, } void hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(struct aq_hw_s *aq_hw, - u32 max_credit, - u32 tc) + const u32 tc, + const u32 max_credit) { aq_hw_write_reg_bit(aq_hw, HW_ATL_TPS_DESC_TCTCREDIT_MAX_ADR(tc), HW_ATL_TPS_DESC_TCTCREDIT_MAX_MSK, @@ -1474,13 +1473,13 @@ void hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(struct aq_hw_s *aq_hw, } void hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(struct aq_hw_s *aq_hw, - u32 tx_pkt_shed_desc_tc_weight, - u32 tc) + const u32 tc, + const u32 weight) { aq_hw_write_reg_bit(aq_hw, HW_ATL_TPS_DESC_TCTWEIGHT_ADR(tc), HW_ATL_TPS_DESC_TCTWEIGHT_MSK, HW_ATL_TPS_DESC_TCTWEIGHT_SHIFT, - tx_pkt_shed_desc_tc_weight); + weight); } void hw_atl_tps_tx_pkt_shed_desc_vm_arb_mode_set(struct aq_hw_s *aq_hw, @@ -1493,8 +1492,8 @@ void hw_atl_tps_tx_pkt_shed_desc_vm_arb_mode_set(struct aq_hw_s *aq_hw, } void hw_atl_tps_tx_pkt_shed_tc_data_max_credit_set(struct aq_hw_s *aq_hw, - u32 max_credit, - u32 tc) + const u32 tc, + const u32 max_credit) { aq_hw_write_reg_bit(aq_hw, HW_ATL_TPS_DATA_TCTCREDIT_MAX_ADR(tc), HW_ATL_TPS_DATA_TCTCREDIT_MAX_MSK, @@ -1503,13 +1502,49 @@ void hw_atl_tps_tx_pkt_shed_tc_data_max_credit_set(struct aq_hw_s *aq_hw, } void hw_atl_tps_tx_pkt_shed_tc_data_weight_set(struct aq_hw_s *aq_hw, - u32 tx_pkt_shed_tc_data_weight, - u32 tc) + const u32 tc, + const u32 weight) { aq_hw_write_reg_bit(aq_hw, HW_ATL_TPS_DATA_TCTWEIGHT_ADR(tc), HW_ATL_TPS_DATA_TCTWEIGHT_MSK, HW_ATL_TPS_DATA_TCTWEIGHT_SHIFT, - tx_pkt_shed_tc_data_weight); + weight); +} + +void hw_atl_tps_tx_desc_rate_mode_set(struct aq_hw_s *aq_hw, + const u32 rate_mode) +{ + aq_hw_write_reg_bit(aq_hw, HW_ATL_TPS_TX_DESC_RATE_MODE_ADR, + HW_ATL_TPS_TX_DESC_RATE_MODE_MSK, + HW_ATL_TPS_TX_DESC_RATE_MODE_SHIFT, + rate_mode); +} + +void hw_atl_tps_tx_desc_rate_en_set(struct aq_hw_s *aq_hw, const u32 desc, + const u32 enable) +{ + aq_hw_write_reg_bit(aq_hw, HW_ATL_TPS_DESC_RATE_EN_ADR(desc), + HW_ATL_TPS_DESC_RATE_EN_MSK, + HW_ATL_TPS_DESC_RATE_EN_SHIFT, + enable); +} + +void hw_atl_tps_tx_desc_rate_x_set(struct aq_hw_s *aq_hw, const u32 desc, + const u32 rate_int) +{ + aq_hw_write_reg_bit(aq_hw, HW_ATL_TPS_DESC_RATE_X_ADR(desc), + HW_ATL_TPS_DESC_RATE_X_MSK, + HW_ATL_TPS_DESC_RATE_X_SHIFT, + rate_int); +} + +void hw_atl_tps_tx_desc_rate_y_set(struct aq_hw_s *aq_hw, const u32 desc, + const u32 rate_frac) +{ + aq_hw_write_reg_bit(aq_hw, HW_ATL_TPS_DESC_RATE_Y_ADR(desc), + HW_ATL_TPS_DESC_RATE_Y_MSK, + HW_ATL_TPS_DESC_RATE_Y_SHIFT, + rate_frac); } /* tx */ diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h index b88cb84805d5..61a6f70c51cd 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h @@ -688,13 +688,13 @@ void hw_atl_tps_tx_pkt_shed_desc_tc_arb_mode_set(struct aq_hw_s *aq_hw, /* set tx packet scheduler descriptor tc max credit */ void hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(struct aq_hw_s *aq_hw, - u32 max_credit, - u32 tc); + const u32 tc, + const u32 max_credit); /* set tx packet scheduler descriptor tc weight */ void hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(struct aq_hw_s *aq_hw, - u32 tx_pkt_shed_desc_tc_weight, - u32 tc); + const u32 tc, + const u32 weight); /* set tx packet scheduler descriptor vm arbitration mode */ void hw_atl_tps_tx_pkt_shed_desc_vm_arb_mode_set(struct aq_hw_s *aq_hw, @@ -702,13 +702,29 @@ void hw_atl_tps_tx_pkt_shed_desc_vm_arb_mode_set(struct aq_hw_s *aq_hw, /* set tx packet scheduler tc data max credit */ void hw_atl_tps_tx_pkt_shed_tc_data_max_credit_set(struct aq_hw_s *aq_hw, - u32 max_credit, - u32 tc); + const u32 tc, + const u32 max_credit); /* set tx packet scheduler tc data weight */ void hw_atl_tps_tx_pkt_shed_tc_data_weight_set(struct aq_hw_s *aq_hw, - u32 tx_pkt_shed_tc_data_weight, - u32 tc); + const u32 tc, + const u32 weight); + +/* set tx descriptor rate mode */ +void hw_atl_tps_tx_desc_rate_mode_set(struct aq_hw_s *aq_hw, + const u32 rate_mode); + +/* set tx packet scheduler descriptor rate enable */ +void hw_atl_tps_tx_desc_rate_en_set(struct aq_hw_s *aq_hw, const u32 desc, + const u32 enable); + +/* set tx packet scheduler descriptor rate integral value */ +void hw_atl_tps_tx_desc_rate_x_set(struct aq_hw_s *aq_hw, const u32 desc, + const u32 rate_int); + +/* set tx packet scheduler descriptor rate fractional value */ +void hw_atl_tps_tx_desc_rate_y_set(struct aq_hw_s *aq_hw, const u32 desc, + const u32 rate_frac); /* tx */ diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h index 18de2f7b8959..06220792daf1 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h @@ -2038,6 +2038,42 @@ /* default value of bitfield lso_tcp_flag_mid[b:0] */ #define HW_ATL_THM_LSO_TCP_FLAG_MID_DEFAULT 0x0 +/* tx tx_tc_mode bitfield definitions + * preprocessor definitions for the bitfield "tx_tc_mode". + * port="pif_tpb_tx_tc_mode_i,pif_tps_tx_tc_mode_i" + */ + +/* register address for bitfield tx_tc_mode */ +#define HW_ATL_TPB_TX_TC_MODE_ADDR 0x00007900 +/* bitmask for bitfield tx_tc_mode */ +#define HW_ATL_TPB_TX_TC_MODE_MSK 0x00000100 +/* inverted bitmask for bitfield tx_tc_mode */ +#define HW_ATL_TPB_TX_TC_MODE_MSKN 0xFFFFFEFF +/* lower bit position of bitfield tx_tc_mode */ +#define HW_ATL_TPB_TX_TC_MODE_SHIFT 8 +/* width of bitfield tx_tc_mode */ +#define HW_ATL_TPB_TX_TC_MODE_WIDTH 1 +/* default value of bitfield tx_tc_mode */ +#define HW_ATL_TPB_TX_TC_MODE_DEFAULT 0x0 + +/* tx tx_desc_rate_mode bitfield definitions + * preprocessor definitions for the bitfield "tx_desc_rate_mode". + * port="pif_tps_desc_rate_mode_i" + */ + +/* register address for bitfield tx_desc_rate_mode */ +#define HW_ATL_TPS_TX_DESC_RATE_MODE_ADR 0x00007900 +/* bitmask for bitfield tx_desc_rate_mode */ +#define HW_ATL_TPS_TX_DESC_RATE_MODE_MSK 0x00000080 +/* inverted bitmask for bitfield tx_desc_rate_mode */ +#define HW_ATL_TPS_TX_DESC_RATE_MODE_MSKN 0xFFFFFF7F +/* lower bit position of bitfield tx_desc_rate_mode */ +#define HW_ATL_TPS_TX_DESC_RATE_MODE_SHIFT 7 +/* width of bitfield tx_desc_rate_mode */ +#define HW_ATL_TPS_TX_DESC_RATE_MODE_WIDTH 1 +/* default value of bitfield tx_desc_rate_mode */ +#define HW_ATL_TPS_TX_DESC_RATE_MODE_DEFAULT 0x0 + /* tx tx_buf_en bitfield definitions * preprocessor definitions for the bitfield "tx_buf_en". * port="pif_tpb_tx_buf_en_i" @@ -2056,19 +2092,6 @@ /* default value of bitfield tx_buf_en */ #define HW_ATL_TPB_TX_BUF_EN_DEFAULT 0x0 -/* register address for bitfield tx_tc_mode */ -#define HW_ATL_TPB_TX_TC_MODE_ADDR 0x00007900 -/* bitmask for bitfield tx_tc_mode */ -#define HW_ATL_TPB_TX_TC_MODE_MSK 0x00000100 -/* inverted bitmask for bitfield tx_tc_mode */ -#define HW_ATL_TPB_TX_TC_MODE_MSKN 0xFFFFFEFF -/* lower bit position of bitfield tx_tc_mode */ -#define HW_ATL_TPB_TX_TC_MODE_SHIFT 8 -/* width of bitfield tx_tc_mode */ -#define HW_ATL_TPB_TX_TC_MODE_WIDTH 1 -/* default value of bitfield tx_tc_mode */ -#define HW_ATL_TPB_TX_TC_MODE_DEFAULT 0x0 - /* tx tx{b}_hi_thresh[c:0] bitfield definitions * preprocessor definitions for the bitfield "tx{b}_hi_thresh[c:0]". * parameter: buffer {b} | stride size 0x10 | range [0, 7] @@ -2270,6 +2293,58 @@ /* default value of bitfield data_tc_arb_mode */ #define HW_ATL_TPS_DATA_TC_ARB_MODE_DEFAULT 0x0 +/* tx desc{r}_rate_en bitfield definitions + * preprocessor definitions for the bitfield "desc{r}_rate_en". + * port="pif_tps_desc_rate_en_i[0]" + */ + +/* register address for bitfield desc{r}_rate_en */ +#define HW_ATL_TPS_DESC_RATE_EN_ADR(desc) (0x00007408 + (desc) * 0x10) +/* bitmask for bitfield desc{r}_rate_en */ +#define HW_ATL_TPS_DESC_RATE_EN_MSK 0x80000000 +/* inverted bitmask for bitfield desc{r}_rate_en */ +#define HW_ATL_TPS_DESC_RATE_EN_MSKN 0x7FFFFFFF +/* lower bit position of bitfield desc{r}_rate_en */ +#define HW_ATL_TPS_DESC_RATE_EN_SHIFT 31 +/* width of bitfield desc{r}_rate_en */ +#define HW_ATL_TPS_DESC_RATE_EN_WIDTH 1 +/* default value of bitfield desc{r}_rate_en */ +#define HW_ATL_TPS_DESC_RATE_EN_DEFAULT 0x0 + +/* tx desc{r}_rate_x bitfield definitions + * preprocessor definitions for the bitfield "desc{r}_rate_x". + * port="pif_tps_desc0_rate_x" + */ +/* register address for bitfield desc{r}_rate_x */ +#define HW_ATL_TPS_DESC_RATE_X_ADR(desc) (0x00007408 + (desc) * 0x10) +/* bitmask for bitfield desc{r}_rate_x */ +#define HW_ATL_TPS_DESC_RATE_X_MSK 0x03FF0000 +/* inverted bitmask for bitfield desc{r}_rate_x */ +#define HW_ATL_TPS_DESC_RATE_X_MSKN 0xFC00FFFF +/* lower bit position of bitfield desc{r}_rate_x */ +#define HW_ATL_TPS_DESC_RATE_X_SHIFT 16 +/* width of bitfield desc{r}_rate_x */ +#define HW_ATL_TPS_DESC_RATE_X_WIDTH 10 +/* default value of bitfield desc{r}_rate_x */ +#define HW_ATL_TPS_DESC_RATE_X_DEFAULT 0x0 + +/* tx desc{r}_rate_y bitfield definitions + * preprocessor definitions for the bitfield "desc{r}_rate_y". + * port="pif_tps_desc0_rate_y" + */ +/* register address for bitfield desc{r}_rate_y */ +#define HW_ATL_TPS_DESC_RATE_Y_ADR(desc) (0x00007408 + (desc) * 0x10) +/* bitmask for bitfield desc{r}_rate_y */ +#define HW_ATL_TPS_DESC_RATE_Y_MSK 0x00003FFF +/* inverted bitmask for bitfield desc{r}_rate_y */ +#define HW_ATL_TPS_DESC_RATE_Y_MSKN 0xFFFFC000 +/* lower bit position of bitfield desc{r}_rate_y */ +#define HW_ATL_TPS_DESC_RATE_Y_SHIFT 0 +/* width of bitfield desc{r}_rate_y */ +#define HW_ATL_TPS_DESC_RATE_Y_WIDTH 14 +/* default value of bitfield desc{r}_rate_y */ +#define HW_ATL_TPS_DESC_RATE_Y_DEFAULT 0x0 + /* tx desc_rate_ta_rst bitfield definitions * preprocessor definitions for the bitfield "desc_rate_ta_rst". * port="pif_tps_desc_rate_ta_rst_i" diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c index 6f2b33ae3d06..8df9d4ef36f0 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c @@ -10,6 +10,7 @@ #include "hw_atl/hw_atl_b0.h" #include "hw_atl/hw_atl_utils.h" #include "hw_atl/hw_atl_llh.h" +#include "hw_atl/hw_atl_llh_internal.h" #include "hw_atl2_utils.h" #include "hw_atl2_llh.h" #include "hw_atl2_internal.h" @@ -23,7 +24,7 @@ static int hw_atl2_act_rslvr_table_set(struct aq_hw_s *self, u8 location, .msix_irqs = 8U, \ .irq_mask = ~0U, \ .vecs = HW_ATL2_RSS_MAX, \ - .tcs = HW_ATL2_TC_MAX, \ + .tcs_max = HW_ATL2_TC_MAX, \ .rxd_alignment = 1U, \ .rxd_size = HW_ATL2_RXD_SIZE, \ .rxds_max = HW_ATL2_MAX_RXD, \ @@ -47,7 +48,8 @@ static int hw_atl2_act_rslvr_table_set(struct aq_hw_s *self, u8 location, NETIF_F_HW_VLAN_CTAG_RX | \ NETIF_F_HW_VLAN_CTAG_TX | \ NETIF_F_GSO_UDP_L4 | \ - NETIF_F_GSO_PARTIAL, \ + NETIF_F_GSO_PARTIAL | \ + NETIF_F_HW_TC, \ .hw_priv_flags = IFF_UNICAST_FLT, \ .flow_control = true, \ .mtu = HW_ATL2_MTU_JUMBO, \ @@ -91,16 +93,49 @@ static int hw_atl2_hw_reset(struct aq_hw_s *self) static int hw_atl2_hw_queue_to_tc_map_set(struct aq_hw_s *self) { - if (!hw_atl_rpb_rpf_rx_traf_class_mode_get(self)) { - aq_hw_write_reg(self, HW_ATL2_RX_Q_TC_MAP_ADR(0), 0x11110000); - aq_hw_write_reg(self, HW_ATL2_RX_Q_TC_MAP_ADR(8), 0x33332222); - aq_hw_write_reg(self, HW_ATL2_RX_Q_TC_MAP_ADR(16), 0x55554444); - aq_hw_write_reg(self, HW_ATL2_RX_Q_TC_MAP_ADR(24), 0x77776666); - } else { - aq_hw_write_reg(self, HW_ATL2_RX_Q_TC_MAP_ADR(0), 0x00000000); - aq_hw_write_reg(self, HW_ATL2_RX_Q_TC_MAP_ADR(8), 0x11111111); - aq_hw_write_reg(self, HW_ATL2_RX_Q_TC_MAP_ADR(16), 0x22222222); - aq_hw_write_reg(self, HW_ATL2_RX_Q_TC_MAP_ADR(24), 0x33333333); + struct aq_nic_cfg_s *cfg = self->aq_nic_cfg; + unsigned int tcs, q_per_tc; + unsigned int tc, q; + u32 rx_map = 0; + u32 tx_map = 0; + + hw_atl2_tpb_tx_tc_q_rand_map_en_set(self, 1U); + + switch (cfg->tc_mode) { + case AQ_TC_MODE_8TCS: + tcs = 8; + q_per_tc = 4; + break; + case AQ_TC_MODE_4TCS: + tcs = 4; + q_per_tc = 8; + break; + default: + return -EINVAL; + } + + for (tc = 0; tc != tcs; tc++) { + unsigned int tc_q_offset = tc * q_per_tc; + + for (q = tc_q_offset; q != tc_q_offset + q_per_tc; q++) { + rx_map |= tc << HW_ATL2_RX_Q_TC_MAP_SHIFT(q); + if (HW_ATL2_RX_Q_TC_MAP_ADR(q) != + HW_ATL2_RX_Q_TC_MAP_ADR(q + 1)) { + aq_hw_write_reg(self, + HW_ATL2_RX_Q_TC_MAP_ADR(q), + rx_map); + rx_map = 0; + } + + tx_map |= tc << HW_ATL2_TX_Q_TC_MAP_SHIFT(q); + if (HW_ATL2_TX_Q_TC_MAP_ADR(q) != + HW_ATL2_TX_Q_TC_MAP_ADR(q + 1)) { + aq_hw_write_reg(self, + HW_ATL2_TX_Q_TC_MAP_ADR(q), + tx_map); + tx_map = 0; + } + } } return aq_hw_err_from_flags(self); @@ -112,7 +147,6 @@ static int hw_atl2_hw_qos_set(struct aq_hw_s *self) u32 tx_buff_size = HW_ATL2_TXBUF_MAX; u32 rx_buff_size = HW_ATL2_RXBUF_MAX; unsigned int prio = 0U; - u32 threshold = 0U; u32 tc = 0U; /* TPS Descriptor rate init */ @@ -122,42 +156,36 @@ static int hw_atl2_hw_qos_set(struct aq_hw_s *self) /* TPS VM init */ hw_atl_tps_tx_pkt_shed_desc_vm_arb_mode_set(self, 0U); - /* TPS TC credits init */ - hw_atl_tps_tx_pkt_shed_desc_tc_arb_mode_set(self, 0U); - hw_atl_tps_tx_pkt_shed_data_arb_mode_set(self, 0U); + tx_buff_size /= cfg->tcs; + rx_buff_size /= cfg->tcs; + for (tc = 0; tc < cfg->tcs; tc++) { + u32 threshold = 0U; - tc = 0; + /* Tx buf size TC0 */ + hw_atl_tpb_tx_pkt_buff_size_per_tc_set(self, tx_buff_size, tc); - /* TX Packet Scheduler Data TC0 */ - hw_atl2_tps_tx_pkt_shed_tc_data_max_credit_set(self, 0xFFF0, tc); - hw_atl2_tps_tx_pkt_shed_tc_data_weight_set(self, 0x640, tc); - hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(self, 0x50, tc); - hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(self, 0x1E, tc); + threshold = (tx_buff_size * (1024 / 32U) * 66U) / 100U; + hw_atl_tpb_tx_buff_hi_threshold_per_tc_set(self, threshold, tc); - /* Tx buf size TC0 */ - hw_atl_tpb_tx_pkt_buff_size_per_tc_set(self, tx_buff_size, tc); + threshold = (tx_buff_size * (1024 / 32U) * 50U) / 100U; + hw_atl_tpb_tx_buff_lo_threshold_per_tc_set(self, threshold, tc); - threshold = (tx_buff_size * (1024 / 32U) * 66U) / 100U; - hw_atl_tpb_tx_buff_hi_threshold_per_tc_set(self, threshold, tc); + /* QoS Rx buf size per TC */ + hw_atl_rpb_rx_pkt_buff_size_per_tc_set(self, rx_buff_size, tc); - threshold = (tx_buff_size * (1024 / 32U) * 50U) / 100U; - hw_atl_tpb_tx_buff_lo_threshold_per_tc_set(self, threshold, tc); + threshold = (rx_buff_size * (1024U / 32U) * 66U) / 100U; + hw_atl_rpb_rx_buff_hi_threshold_per_tc_set(self, threshold, tc); - /* QoS Rx buf size per TC */ - hw_atl_rpb_rx_pkt_buff_size_per_tc_set(self, rx_buff_size, tc); - - threshold = (rx_buff_size * (1024U / 32U) * 66U) / 100U; - hw_atl_rpb_rx_buff_hi_threshold_per_tc_set(self, threshold, tc); - - threshold = (rx_buff_size * (1024U / 32U) * 50U) / 100U; - hw_atl_rpb_rx_buff_lo_threshold_per_tc_set(self, threshold, tc); + threshold = (rx_buff_size * (1024U / 32U) * 50U) / 100U; + hw_atl_rpb_rx_buff_lo_threshold_per_tc_set(self, threshold, tc); + } /* QoS 802.1p priority -> TC mapping */ for (prio = 0; prio < 8; ++prio) hw_atl_rpf_rpb_user_priority_tc_map_set(self, prio, - cfg->tcs * prio / 8); + cfg->prio_tc_map[prio]); - /* ATL2 Apply legacy ring to TC mapping */ + /* ATL2 Apply ring to TC mapping */ hw_atl2_hw_queue_to_tc_map_set(self); return aq_hw_err_from_flags(self); @@ -166,19 +194,149 @@ static int hw_atl2_hw_qos_set(struct aq_hw_s *self) static int hw_atl2_hw_rss_set(struct aq_hw_s *self, struct aq_rss_parameters *rss_params) { - u8 *indirection_table = rss_params->indirection_table; + u8 *indirection_table = rss_params->indirection_table; + const u32 num_tcs = aq_hw_num_tcs(self); + u32 rpf_redir2_enable; + int tc; int i; - for (i = HW_ATL2_RSS_REDIRECTION_MAX; i--;) - hw_atl2_new_rpf_rss_redir_set(self, 0, i, indirection_table[i]); + rpf_redir2_enable = num_tcs > 4 ? 1 : 0; + + hw_atl2_rpf_redirection_table2_select_set(self, rpf_redir2_enable); + + for (i = HW_ATL2_RSS_REDIRECTION_MAX; i--;) { + for (tc = 0; tc != num_tcs; tc++) { + hw_atl2_new_rpf_rss_redir_set(self, tc, i, + tc * + aq_hw_q_per_tc(self) + + indirection_table[i]); + } + } + + return aq_hw_err_from_flags(self); +} + +static int hw_atl2_hw_init_tx_tc_rate_limit(struct aq_hw_s *self) +{ + static const u32 max_weight = BIT(HW_ATL2_TPS_DATA_TCTWEIGHT_WIDTH) - 1; + /* Scale factor is based on the number of bits in fractional portion */ + static const u32 scale = BIT(HW_ATL_TPS_DESC_RATE_Y_WIDTH); + static const u32 frac_msk = HW_ATL_TPS_DESC_RATE_Y_MSK >> + HW_ATL_TPS_DESC_RATE_Y_SHIFT; + const u32 link_speed = self->aq_link_status.mbps; + struct aq_nic_cfg_s *nic_cfg = self->aq_nic_cfg; + unsigned long num_min_rated_tcs = 0; + u32 tc_weight[AQ_CFG_TCS_MAX]; + u32 fixed_max_credit_4b; + u32 fixed_max_credit; + u8 min_rate_msk = 0; + u32 sum_weight = 0; + int tc; + + /* By default max_credit is based upon MTU (in unit of 64b) */ + fixed_max_credit = nic_cfg->aq_hw_caps->mtu / 64; + /* in unit of 4b */ + fixed_max_credit_4b = nic_cfg->aq_hw_caps->mtu / 4; + + if (link_speed) { + min_rate_msk = nic_cfg->tc_min_rate_msk & + (BIT(nic_cfg->tcs) - 1); + num_min_rated_tcs = hweight8(min_rate_msk); + } + + /* First, calculate weights where min_rate is specified */ + if (num_min_rated_tcs) { + for (tc = 0; tc != nic_cfg->tcs; tc++) { + if (!nic_cfg->tc_min_rate[tc]) { + tc_weight[tc] = 0; + continue; + } + + tc_weight[tc] = (-1L + link_speed + + nic_cfg->tc_min_rate[tc] * + max_weight) / + link_speed; + tc_weight[tc] = min(tc_weight[tc], max_weight); + sum_weight += tc_weight[tc]; + } + } + + /* WSP, if min_rate is set for at least one TC. + * RR otherwise. + */ + hw_atl2_tps_tx_pkt_shed_data_arb_mode_set(self, min_rate_msk ? 1U : 0U); + /* Data TC Arbiter takes precedence over Descriptor TC Arbiter, + * leave Descriptor TC Arbiter as RR. + */ + hw_atl_tps_tx_pkt_shed_desc_tc_arb_mode_set(self, 0U); + + hw_atl_tps_tx_desc_rate_mode_set(self, nic_cfg->is_qos ? 1U : 0U); + + for (tc = 0; tc != nic_cfg->tcs; tc++) { + const u32 en = (nic_cfg->tc_max_rate[tc] != 0) ? 1U : 0U; + const u32 desc = AQ_NIC_CFG_TCVEC2RING(nic_cfg, tc, 0); + u32 weight, max_credit; + + hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(self, tc, + fixed_max_credit); + hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(self, tc, 0x1E); + + if (num_min_rated_tcs) { + weight = tc_weight[tc]; + + if (!weight && sum_weight < max_weight) + weight = (max_weight - sum_weight) / + (nic_cfg->tcs - num_min_rated_tcs); + else if (!weight) + weight = 0x640; + + max_credit = max(2 * weight, fixed_max_credit_4b); + } else { + weight = 0x640; + max_credit = 0xFFF0; + } + + hw_atl2_tps_tx_pkt_shed_tc_data_weight_set(self, tc, weight); + hw_atl2_tps_tx_pkt_shed_tc_data_max_credit_set(self, tc, + max_credit); + + hw_atl_tps_tx_desc_rate_en_set(self, desc, en); + + if (en) { + /* Nominal rate is always 10G */ + const u32 rate = 10000U * scale / + nic_cfg->tc_max_rate[tc]; + const u32 rate_int = rate >> + HW_ATL_TPS_DESC_RATE_Y_WIDTH; + const u32 rate_frac = rate & frac_msk; + + hw_atl_tps_tx_desc_rate_x_set(self, desc, rate_int); + hw_atl_tps_tx_desc_rate_y_set(self, desc, rate_frac); + } else { + /* A value of 1 indicates the queue is not + * rate controlled. + */ + hw_atl_tps_tx_desc_rate_x_set(self, desc, 1U); + hw_atl_tps_tx_desc_rate_y_set(self, desc, 0U); + } + } + for (tc = nic_cfg->tcs; tc != AQ_CFG_TCS_MAX; tc++) { + const u32 desc = AQ_NIC_CFG_TCVEC2RING(nic_cfg, tc, 0); + + hw_atl_tps_tx_desc_rate_en_set(self, desc, 0U); + hw_atl_tps_tx_desc_rate_x_set(self, desc, 1U); + hw_atl_tps_tx_desc_rate_y_set(self, desc, 0U); + } return aq_hw_err_from_flags(self); } static int hw_atl2_hw_init_tx_path(struct aq_hw_s *self) { + struct aq_nic_cfg_s *nic_cfg = self->aq_nic_cfg; + /* Tx TC/RSS number config */ - hw_atl_tpb_tps_tx_tc_mode_set(self, 1U); + hw_atl_tpb_tps_tx_tc_mode_set(self, nic_cfg->tc_mode); hw_atl_thm_lso_tcp_flag_of_first_pkt_set(self, 0x0FF6U); hw_atl_thm_lso_tcp_flag_of_middle_pkt_set(self, 0x0FF6U); @@ -201,13 +359,29 @@ static int hw_atl2_hw_init_tx_path(struct aq_hw_s *self) static void hw_atl2_hw_init_new_rx_filters(struct aq_hw_s *self) { struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv; + u8 *prio_tc_map = self->aq_nic_cfg->prio_tc_map; + u16 action; u8 index; + int i; + /* Action Resolver Table (ART) is used by RPF to decide which action + * to take with a packet based upon input tag and tag mask, where: + * - input tag is a combination of 3-bit VLan Prio (PTP) and + * 29-bit concatenation of all tags from filter block; + * - tag mask is a mask used for matching against input tag. + * The input_tag is compared with the all the Requested_tags in the + * Record table to find a match. Action field of the selected matched + * REC entry is used for further processing. If multiple entries match, + * the lowest REC entry, Action field will be selected. + */ hw_atl2_rpf_act_rslvr_section_en_set(self, 0xFFFF); hw_atl2_rpfl2_uc_flr_tag_set(self, HW_ATL2_RPF_TAG_BASE_UC, HW_ATL2_MAC_UC); hw_atl2_rpfl2_bc_flr_tag_set(self, HW_ATL2_RPF_TAG_BASE_UC); + /* FW reserves the beginning of ART, thus all driver entries must + * start from the offset specified in FW caps. + */ index = priv->art_base_index + HW_ATL2_RPF_L2_PROMISC_OFF_INDEX; hw_atl2_act_rslvr_table_set(self, index, 0, HW_ATL2_RPF_TAG_UC_MASK | @@ -220,33 +394,17 @@ static void hw_atl2_hw_init_new_rx_filters(struct aq_hw_s *self) HW_ATL2_RPF_TAG_UNTAG_MASK, HW_ATL2_ACTION_DROP); - index = priv->art_base_index + HW_ATL2_RPF_VLAN_INDEX; - hw_atl2_act_rslvr_table_set(self, index, HW_ATL2_RPF_TAG_BASE_VLAN, - HW_ATL2_RPF_TAG_VLAN_MASK, - HW_ATL2_ACTION_ASSIGN_TC(0)); - - index = priv->art_base_index + HW_ATL2_RPF_MAC_INDEX; - hw_atl2_act_rslvr_table_set(self, index, HW_ATL2_RPF_TAG_BASE_UC, - HW_ATL2_RPF_TAG_UC_MASK, - HW_ATL2_ACTION_ASSIGN_TC(0)); - - index = priv->art_base_index + HW_ATL2_RPF_ALLMC_INDEX; - hw_atl2_act_rslvr_table_set(self, index, HW_ATL2_RPF_TAG_BASE_ALLMC, - HW_ATL2_RPF_TAG_ALLMC_MASK, - HW_ATL2_ACTION_ASSIGN_TC(0)); + /* Configure ART to map given VLan Prio (PCP) to the TC index for + * RSS redirection table. + */ + for (i = 0; i < 8; i++) { + action = HW_ATL2_ACTION_ASSIGN_TC(prio_tc_map[i]); - index = priv->art_base_index + HW_ATL2_RPF_UNTAG_INDEX; - hw_atl2_act_rslvr_table_set(self, index, HW_ATL2_RPF_TAG_UNTAG_MASK, - HW_ATL2_RPF_TAG_UNTAG_MASK, - HW_ATL2_ACTION_ASSIGN_TC(0)); - - index = priv->art_base_index + HW_ATL2_RPF_VLAN_PROMISC_ON_INDEX; - hw_atl2_act_rslvr_table_set(self, index, 0, HW_ATL2_RPF_TAG_VLAN_MASK, - HW_ATL2_ACTION_DISABLE); - - index = priv->art_base_index + HW_ATL2_RPF_L2_PROMISC_ON_INDEX; - hw_atl2_act_rslvr_table_set(self, index, 0, HW_ATL2_RPF_TAG_UC_MASK, - HW_ATL2_ACTION_DISABLE); + index = priv->art_base_index + HW_ATL2_RPF_PCP_TO_TC_INDEX + i; + hw_atl2_act_rslvr_table_set(self, index, + i << HW_ATL2_RPF_TAG_PCP_OFFSET, + HW_ATL2_RPF_TAG_PCP_MASK, action); + } } static void hw_atl2_hw_new_rx_filter_vlan_promisc(struct aq_hw_s *self, @@ -309,7 +467,7 @@ static int hw_atl2_hw_init_rx_path(struct aq_hw_s *self) int i; /* Rx TC/RSS number config */ - hw_atl_rpb_rpf_rx_traf_class_mode_set(self, 1U); + hw_atl_rpb_rpf_rx_traf_class_mode_set(self, cfg->tc_mode); /* Rx flow control */ hw_atl_rpb_rx_flow_ctl_mode_set(self, 1U); @@ -317,9 +475,7 @@ static int hw_atl2_hw_init_rx_path(struct aq_hw_s *self) hw_atl2_rpf_rss_hash_type_set(self, HW_ATL2_RPF_RSS_HASH_TYPE_ALL); /* RSS Ring selection */ - hw_atl_reg_rx_flr_rss_control1set(self, cfg->is_rss ? - HW_ATL_RSS_ENABLED_3INDEX_BITS : - HW_ATL_RSS_DISABLED); + hw_atl_b0_hw_init_rx_rss_ctrl1(self); /* Multicast filters */ for (i = HW_ATL2_MAC_MAX; i--;) { @@ -678,6 +834,7 @@ const struct aq_hw_ops hw_atl2_ops = { .hw_interrupt_moderation_set = hw_atl2_hw_interrupt_moderation_set, .hw_rss_set = hw_atl2_hw_rss_set, .hw_rss_hash_set = hw_atl_b0_hw_rss_hash_set, + .hw_tc_rate_limit_set = hw_atl2_hw_init_tx_tc_rate_limit, .hw_get_hw_stats = hw_atl2_utils_get_hw_stats, .hw_get_fw_version = hw_atl2_utils_get_fw_version, .hw_set_offload = hw_atl_b0_hw_offload_set, diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h index e66b3583bfe9..5a89bb8722f9 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h @@ -31,7 +31,7 @@ #define HW_ATL2_RSS_REDIRECTION_MAX 64U -#define HW_ATL2_TC_MAX 1U +#define HW_ATL2_TC_MAX 8U #define HW_ATL2_RSS_MAX 8U #define HW_ATL2_INTR_MODER_MAX 0x1FF @@ -82,13 +82,6 @@ enum HW_ATL2_RPF_ART_INDEX { HW_ATL2_RPF_VLAN_USER_INDEX = HW_ATL2_RPF_ET_PCP_USER_INDEX + 16, HW_ATL2_RPF_PCP_TO_TC_INDEX = HW_ATL2_RPF_VLAN_USER_INDEX + HW_ATL_VLAN_MAX_FILTERS, - HW_ATL2_RPF_VLAN_INDEX = HW_ATL2_RPF_PCP_TO_TC_INDEX + - AQ_CFG_TCS_MAX, - HW_ATL2_RPF_MAC_INDEX, - HW_ATL2_RPF_ALLMC_INDEX, - HW_ATL2_RPF_UNTAG_INDEX, - HW_ATL2_RPF_VLAN_PROMISC_ON_INDEX, - HW_ATL2_RPF_L2_PROMISC_ON_INDEX, }; #define HW_ATL2_ACTION(ACTION, RSS, INDEX, VALID) \ @@ -124,9 +117,6 @@ enum HW_ATL2_RPF_RSS_HASH_TYPE { HW_ATL2_RPF_RSS_HASH_TYPE_IPV6_EX_UDP, }; -#define HW_ATL_RSS_DISABLED 0x00000000U -#define HW_ATL_RSS_ENABLED_3INDEX_BITS 0xB3333333U - #define HW_ATL_MCAST_FLT_ANY_TO_HOST 0x00010FFFU struct hw_atl2_priv { diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c index e779d70fde66..cd954b11d24a 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c @@ -7,6 +7,14 @@ #include "hw_atl2_llh_internal.h" #include "aq_hw_utils.h" +void hw_atl2_rpf_redirection_table2_select_set(struct aq_hw_s *aq_hw, + u32 select) +{ + aq_hw_write_reg_bit(aq_hw, HW_ATL2_RPF_PIF_RPF_REDIR2_ENI_ADR, + HW_ATL2_RPF_PIF_RPF_REDIR2_ENI_MSK, + HW_ATL2_RPF_PIF_RPF_REDIR2_ENI_SHIFT, select); +} + void hw_atl2_rpf_rss_hash_type_set(struct aq_hw_s *aq_hw, u32 rss_hash_type) { aq_hw_write_reg_bit(aq_hw, HW_ATL2_RPF_PIF_RPF_RSS_HASH_TYPEI_ADR, @@ -60,6 +68,15 @@ void hw_atl2_rpf_vlan_flr_tag_set(struct aq_hw_s *aq_hw, u32 tag, u32 filter) /* TX */ +void hw_atl2_tpb_tx_tc_q_rand_map_en_set(struct aq_hw_s *aq_hw, + const u32 tc_q_rand_map_en) +{ + aq_hw_write_reg_bit(aq_hw, HW_ATL2_TPB_TX_TC_Q_RAND_MAP_EN_ADR, + HW_ATL2_TPB_TX_TC_Q_RAND_MAP_EN_MSK, + HW_ATL2_TPB_TX_TC_Q_RAND_MAP_EN_SHIFT, + tc_q_rand_map_en); +} + void hw_atl2_tpb_tx_buf_clk_gate_en_set(struct aq_hw_s *aq_hw, u32 clk_gate_en) { aq_hw_write_reg_bit(aq_hw, HW_ATL2_TPB_TX_BUF_CLK_GATE_EN_ADR, @@ -76,9 +93,18 @@ void hw_atl2_reg_tx_intr_moder_ctrl_set(struct aq_hw_s *aq_hw, tx_intr_moderation_ctl); } +void hw_atl2_tps_tx_pkt_shed_data_arb_mode_set(struct aq_hw_s *aq_hw, + const u32 data_arb_mode) +{ + aq_hw_write_reg_bit(aq_hw, HW_ATL2_TPS_DATA_TC_ARB_MODE_ADR, + HW_ATL2_TPS_DATA_TC_ARB_MODE_MSK, + HW_ATL2_TPS_DATA_TC_ARB_MODE_SHIFT, + data_arb_mode); +} + void hw_atl2_tps_tx_pkt_shed_tc_data_max_credit_set(struct aq_hw_s *aq_hw, - u32 max_credit, - u32 tc) + const u32 tc, + const u32 max_credit) { aq_hw_write_reg_bit(aq_hw, HW_ATL2_TPS_DATA_TCTCREDIT_MAX_ADR(tc), HW_ATL2_TPS_DATA_TCTCREDIT_MAX_MSK, @@ -87,13 +113,13 @@ void hw_atl2_tps_tx_pkt_shed_tc_data_max_credit_set(struct aq_hw_s *aq_hw, } void hw_atl2_tps_tx_pkt_shed_tc_data_weight_set(struct aq_hw_s *aq_hw, - u32 tx_pkt_shed_tc_data_weight, - u32 tc) + const u32 tc, + const u32 weight) { aq_hw_write_reg_bit(aq_hw, HW_ATL2_TPS_DATA_TCTWEIGHT_ADR(tc), HW_ATL2_TPS_DATA_TCTWEIGHT_MSK, HW_ATL2_TPS_DATA_TCTWEIGHT_SHIFT, - tx_pkt_shed_tc_data_weight); + weight); } u32 hw_atl2_get_hw_version(struct aq_hw_s *aq_hw) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h index 8c6d78a64d42..98c7a4621297 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h @@ -15,6 +15,10 @@ void hw_atl2_reg_tx_intr_moder_ctrl_set(struct aq_hw_s *aq_hw, u32 tx_intr_moderation_ctl, u32 queue); +/* Set Redirection Table 2 Select */ +void hw_atl2_rpf_redirection_table2_select_set(struct aq_hw_s *aq_hw, + u32 select); + /** Set RSS HASH type */ void hw_atl2_rpf_rss_hash_type_set(struct aq_hw_s *aq_hw, u32 rss_hash_type); @@ -34,18 +38,25 @@ void hw_atl2_new_rpf_rss_redir_set(struct aq_hw_s *aq_hw, u32 tc, u32 index, /* Set VLAN filter tag */ void hw_atl2_rpf_vlan_flr_tag_set(struct aq_hw_s *aq_hw, u32 tag, u32 filter); +/* set tx random TC-queue mapping enable bit */ +void hw_atl2_tpb_tx_tc_q_rand_map_en_set(struct aq_hw_s *aq_hw, + const u32 tc_q_rand_map_en); + /* set tx buffer clock gate enable */ void hw_atl2_tpb_tx_buf_clk_gate_en_set(struct aq_hw_s *aq_hw, u32 clk_gate_en); +void hw_atl2_tps_tx_pkt_shed_data_arb_mode_set(struct aq_hw_s *aq_hw, + const u32 data_arb_mode); + /* set tx packet scheduler tc data max credit */ void hw_atl2_tps_tx_pkt_shed_tc_data_max_credit_set(struct aq_hw_s *aq_hw, - u32 max_credit, - u32 tc); + const u32 tc, + const u32 max_credit); /* set tx packet scheduler tc data weight */ void hw_atl2_tps_tx_pkt_shed_tc_data_weight_set(struct aq_hw_s *aq_hw, - u32 tx_pkt_shed_tc_data_weight, - u32 tc); + const u32 tc, + const u32 weight); u32 hw_atl2_get_hw_version(struct aq_hw_s *aq_hw); diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h index cde9e9d2836d..e34c5cda061e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h @@ -6,6 +6,16 @@ #ifndef HW_ATL2_LLH_INTERNAL_H #define HW_ATL2_LLH_INTERNAL_H +/* RX pif_rpf_redir_2_en_i Bitfield Definitions + * PORT="pif_rpf_redir_2_en_i" + */ +#define HW_ATL2_RPF_PIF_RPF_REDIR2_ENI_ADR 0x000054C8 +#define HW_ATL2_RPF_PIF_RPF_REDIR2_ENI_MSK 0x00001000 +#define HW_ATL2_RPF_PIF_RPF_REDIR2_ENI_MSKN 0xFFFFEFFF +#define HW_ATL2_RPF_PIF_RPF_REDIR2_ENI_SHIFT 12 +#define HW_ATL2_RPF_PIF_RPF_REDIR2_ENI_WIDTH 1 +#define HW_ATL2_RPF_PIF_RPF_REDIR2_ENI_DEFAULT 0x0 + /* RX pif_rpf_rss_hash_type_i Bitfield Definitions */ #define HW_ATL2_RPF_PIF_RPF_RSS_HASH_TYPEI_ADR 0x000054C8 @@ -122,6 +132,24 @@ /* Default value of bitfield rx_q{Q}_tc_map[2:0] */ #define HW_ATL2_RX_Q_TC_MAP_DEFAULT 0x0 +/* tx tx_tc_q_rand_map_en bitfield definitions + * preprocessor definitions for the bitfield "tx_tc_q_rand_map_en". + * port="pif_tpb_tx_tc_q_rand_map_en_i" + */ + +/* register address for bitfield tx_tc_q_rand_map_en */ +#define HW_ATL2_TPB_TX_TC_Q_RAND_MAP_EN_ADR 0x00007900 +/* bitmask for bitfield tx_tc_q_rand_map_en */ +#define HW_ATL2_TPB_TX_TC_Q_RAND_MAP_EN_MSK 0x00000200 +/* inverted bitmask for bitfield tx_tc_q_rand_map_en */ +#define HW_ATL2_TPB_TX_TC_Q_RAND_MAP_EN_MSKN 0xFFFFFDFF +/* lower bit position of bitfield tx_tc_q_rand_map_en */ +#define HW_ATL2_TPB_TX_TC_Q_RAND_MAP_EN_SHIFT 9 +/* width of bitfield tx_tc_q_rand_map_en */ +#define HW_ATL2_TPB_TX_TC_Q_RAND_MAP_EN_WIDTH 1 +/* default value of bitfield tx_tc_q_rand_map_en */ +#define HW_ATL2_TPB_TX_TC_Q_RAND_MAP_EN_DEFAULT 0x0 + /* tx tx_buffer_clk_gate_en bitfield definitions * preprocessor definitions for the bitfield "tx_buffer_clk_gate_en". * port="pif_tpb_tx_buffer_clk_gate_en_i" @@ -140,42 +168,77 @@ /* default value of bitfield tx_buffer_clk_gate_en */ #define HW_ATL2_TPB_TX_BUF_CLK_GATE_EN_DEFAULT 0x0 -/* tx data_tc{t}_credit_max[b:0] bitfield definitions - * preprocessor definitions for the bitfield "data_tc{t}_credit_max[b:0]". +/* tx tx_q_tc_map{q} bitfield definitions + * preprocessor definitions for the bitfield "tx_q_tc_map{q}". + * parameter: queue {q} | bit-level stride | range [0, 31] + * port="pif_tpb_tx_q_tc_map0_i[2:0]" + */ + +/* register address for bitfield tx_q_tc_map{q} */ +#define HW_ATL2_TX_Q_TC_MAP_ADR(queue) \ + (((queue) < 32) ? 0x0000799C + ((queue) / 4) * 4 : 0) +/* lower bit position of bitfield tx_q_tc_map{q} */ +#define HW_ATL2_TX_Q_TC_MAP_SHIFT(queue) \ + (((queue) < 32) ? ((queue) * 8) % 32 : 0) +/* width of bitfield tx_q_tc_map{q} */ +#define HW_ATL2_TX_Q_TC_MAP_WIDTH 3 +/* default value of bitfield tx_q_tc_map{q} */ +#define HW_ATL2_TX_Q_TC_MAP_DEFAULT 0x0 + +/* tx data_tc_arb_mode bitfield definitions + * preprocessor definitions for the bitfield "data_tc_arb_mode". + * port="pif_tps_data_tc_arb_mode_i" + */ + +/* register address for bitfield data_tc_arb_mode */ +#define HW_ATL2_TPS_DATA_TC_ARB_MODE_ADR 0x00007100 +/* bitmask for bitfield data_tc_arb_mode */ +#define HW_ATL2_TPS_DATA_TC_ARB_MODE_MSK 0x00000003 +/* inverted bitmask for bitfield data_tc_arb_mode */ +#define HW_ATL2_TPS_DATA_TC_ARB_MODE_MSKN 0xfffffffc +/* lower bit position of bitfield data_tc_arb_mode */ +#define HW_ATL2_TPS_DATA_TC_ARB_MODE_SHIFT 0 +/* width of bitfield data_tc_arb_mode */ +#define HW_ATL2_TPS_DATA_TC_ARB_MODE_WIDTH 2 +/* default value of bitfield data_tc_arb_mode */ +#define HW_ATL2_TPS_DATA_TC_ARB_MODE_DEFAULT 0x0 + +/* tx data_tc{t}_credit_max[f:0] bitfield definitions + * preprocessor definitions for the bitfield "data_tc{t}_credit_max[f:0]". * parameter: tc {t} | stride size 0x4 | range [0, 7] - * port="pif_tps_data_tc0_credit_max_i[11:0]" + * port="pif_tps_data_tc0_credit_max_i[15:0]" */ -/* register address for bitfield data_tc{t}_credit_max[b:0] */ +/* register address for bitfield data_tc{t}_credit_max[f:0] */ #define HW_ATL2_TPS_DATA_TCTCREDIT_MAX_ADR(tc) (0x00007110 + (tc) * 0x4) -/* bitmask for bitfield data_tc{t}_credit_max[b:0] */ -#define HW_ATL2_TPS_DATA_TCTCREDIT_MAX_MSK 0x0fff0000 -/* inverted bitmask for bitfield data_tc{t}_credit_max[b:0] */ -#define HW_ATL2_TPS_DATA_TCTCREDIT_MAX_MSKN 0xf000ffff -/* lower bit position of bitfield data_tc{t}_credit_max[b:0] */ +/* bitmask for bitfield data_tc{t}_credit_max[f:0] */ +#define HW_ATL2_TPS_DATA_TCTCREDIT_MAX_MSK 0xffff0000 +/* inverted bitmask for bitfield data_tc{t}_credit_max[f:0] */ +#define HW_ATL2_TPS_DATA_TCTCREDIT_MAX_MSKN 0x0000ffff +/* lower bit position of bitfield data_tc{t}_credit_max[f:0] */ #define HW_ATL2_TPS_DATA_TCTCREDIT_MAX_SHIFT 16 -/* width of bitfield data_tc{t}_credit_max[b:0] */ -#define HW_ATL2_TPS_DATA_TCTCREDIT_MAX_WIDTH 12 -/* default value of bitfield data_tc{t}_credit_max[b:0] */ +/* width of bitfield data_tc{t}_credit_max[f:0] */ +#define HW_ATL2_TPS_DATA_TCTCREDIT_MAX_WIDTH 16 +/* default value of bitfield data_tc{t}_credit_max[f:0] */ #define HW_ATL2_TPS_DATA_TCTCREDIT_MAX_DEFAULT 0x0 -/* tx data_tc{t}_weight[8:0] bitfield definitions - * preprocessor definitions for the bitfield "data_tc{t}_weight[8:0]". +/* tx data_tc{t}_weight[e:0] bitfield definitions + * preprocessor definitions for the bitfield "data_tc{t}_weight[e:0]". * parameter: tc {t} | stride size 0x4 | range [0, 7] - * port="pif_tps_data_tc0_weight_i[8:0]" + * port="pif_tps_data_tc0_weight_i[14:0]" */ -/* register address for bitfield data_tc{t}_weight[8:0] */ +/* register address for bitfield data_tc{t}_weight[e:0] */ #define HW_ATL2_TPS_DATA_TCTWEIGHT_ADR(tc) (0x00007110 + (tc) * 0x4) -/* bitmask for bitfield data_tc{t}_weight[8:0] */ -#define HW_ATL2_TPS_DATA_TCTWEIGHT_MSK 0x000001ff -/* inverted bitmask for bitfield data_tc{t}_weight[8:0] */ -#define HW_ATL2_TPS_DATA_TCTWEIGHT_MSKN 0xfffffe00 -/* lower bit position of bitfield data_tc{t}_weight[8:0] */ +/* bitmask for bitfield data_tc{t}_weight[e:0] */ +#define HW_ATL2_TPS_DATA_TCTWEIGHT_MSK 0x00007fff +/* inverted bitmask for bitfield data_tc{t}_weight[e:0] */ +#define HW_ATL2_TPS_DATA_TCTWEIGHT_MSKN 0xffff8000 +/* lower bit position of bitfield data_tc{t}_weight[e:0] */ #define HW_ATL2_TPS_DATA_TCTWEIGHT_SHIFT 0 -/* width of bitfield data_tc{t}_weight[8:0] */ -#define HW_ATL2_TPS_DATA_TCTWEIGHT_WIDTH 9 -/* default value of bitfield data_tc{t}_weight[8:0] */ +/* width of bitfield data_tc{t}_weight[e:0] */ +#define HW_ATL2_TPS_DATA_TCTWEIGHT_WIDTH 15 +/* default value of bitfield data_tc{t}_weight[e:0] */ #define HW_ATL2_TPS_DATA_TCTWEIGHT_DEFAULT 0x0 /* tx interrupt moderation control register definitions diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index fc1405a8ed74..5a41801acb6a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -60,6 +60,7 @@ #define CH_WARN(adap, fmt, ...) dev_warn(adap->pdev_dev, fmt, ## __VA_ARGS__) extern struct list_head adapter_list; +extern struct list_head uld_list; extern struct mutex uld_mutex; /* Suspend an Ethernet Tx queue with fewer available descriptors than this. @@ -822,6 +823,13 @@ struct sge_uld_txq_info { u16 ntxq; /* # of egress uld queues */ }; +/* struct to maintain ULD list to reallocate ULD resources on hotplug */ +struct cxgb4_uld_list { + struct cxgb4_uld_info uld_info; + struct list_head list_node; + enum cxgb4_uld uld_type; +}; + enum sge_eosw_state { CXGB4_EO_STATE_CLOSED = 0, /* Not ready to accept traffic */ CXGB4_EO_STATE_FLOWC_OPEN_SEND, /* Send FLOWC open request */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 7818c392da50..c3dd50b45c48 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -1813,12 +1813,8 @@ static int mps_tcam_show(struct seq_file *seq, void *v) /* Inner header lookup */ if (lookup_type && (lookup_type != DATALKPTYPE_M)) { seq_printf(seq, - "%3u %02x:%02x:%02x:%02x:%02x:%02x " - "%012llx %06x %06x - - %3c" - " 'I' %4x " - "%3c %#x%4u%4d", idx, addr[0], - addr[1], addr[2], addr[3], - addr[4], addr[5], + "%3u %pM %012llx %06x %06x - - %3c 'I' %4x %3c %#x%4u%4d", + idx, addr, (unsigned long long)mask, vniy, (vnix | vniy), dip_hit ? 'Y' : 'N', @@ -1830,10 +1826,8 @@ static int mps_tcam_show(struct seq_file *seq, void *v) T6_VF_G(cls_lo) : -1); } else { seq_printf(seq, - "%3u %02x:%02x:%02x:%02x:%02x:%02x " - "%012llx - - ", - idx, addr[0], addr[1], addr[2], - addr[3], addr[4], addr[5], + "%3u %pM %012llx - - ", + idx, addr, (unsigned long long)mask); if (vlan_vld) @@ -1851,10 +1845,8 @@ static int mps_tcam_show(struct seq_file *seq, void *v) T6_VF_G(cls_lo) : -1); } } else - seq_printf(seq, "%3u %02x:%02x:%02x:%02x:%02x:%02x " - "%012llx%3c %#x%4u%4d", - idx, addr[0], addr[1], addr[2], addr[3], - addr[4], addr[5], (unsigned long long)mask, + seq_printf(seq, "%3u %pM %012llx%3c %#x%4u%4d", + idx, addr, (unsigned long long)mask, (cls_lo & SRAM_VLD_F) ? 'Y' : 'N', PORTMAP_G(cls_hi), PF_G(cls_lo), diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index d05c2371d8c7..7a0414f379be 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -180,6 +180,7 @@ static struct dentry *cxgb4_debugfs_root; LIST_HEAD(adapter_list); DEFINE_MUTEX(uld_mutex); +LIST_HEAD(uld_list); static int cfg_queues(struct adapter *adap); @@ -6519,11 +6520,8 @@ fw_attach_fail: /* PCIe EEH recovery on powerpc platforms needs fundamental reset */ pdev->needs_freset = 1; - if (is_uld(adapter)) { - mutex_lock(&uld_mutex); - list_add_tail(&adapter->list_node, &adapter_list); - mutex_unlock(&uld_mutex); - } + if (is_uld(adapter)) + cxgb4_uld_enable(adapter); if (!is_t4(adapter->params.chip)) cxgb4_ptp_init(adapter); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c index e65b52375dd8..6b1d3df4b9ba 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -681,6 +681,74 @@ static void cxgb4_set_ktls_feature(struct adapter *adap, bool enable) } #endif +static void cxgb4_uld_alloc_resources(struct adapter *adap, + enum cxgb4_uld type, + const struct cxgb4_uld_info *p) +{ + int ret = 0; + + if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) || + (type != CXGB4_ULD_CRYPTO && !is_offload(adap))) + return; + if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip)) + return; + ret = cfg_queues_uld(adap, type, p); + if (ret) + goto out; + ret = setup_sge_queues_uld(adap, type, p->lro); + if (ret) + goto free_queues; + if (adap->flags & CXGB4_USING_MSIX) { + ret = request_msix_queue_irqs_uld(adap, type); + if (ret) + goto free_rxq; + } + if (adap->flags & CXGB4_FULL_INIT_DONE) + enable_rx_uld(adap, type); +#ifdef CONFIG_CHELSIO_TLS_DEVICE + /* send mbox to enable ktls related settings. */ + if (type == CXGB4_ULD_CRYPTO && + (adap->params.crypto & FW_CAPS_CONFIG_TX_TLS_HW)) + cxgb4_set_ktls_feature(adap, 1); +#endif + if (adap->uld[type].add) + goto free_irq; + ret = setup_sge_txq_uld(adap, type, p); + if (ret) + goto free_irq; + adap->uld[type] = *p; + ret = uld_attach(adap, type); + if (ret) + goto free_txq; + return; +free_txq: + release_sge_txq_uld(adap, type); +free_irq: + if (adap->flags & CXGB4_FULL_INIT_DONE) + quiesce_rx_uld(adap, type); + if (adap->flags & CXGB4_USING_MSIX) + free_msix_queue_irqs_uld(adap, type); +free_rxq: + free_sge_queues_uld(adap, type); +free_queues: + free_queues_uld(adap, type); +out: + dev_warn(adap->pdev_dev, + "ULD registration failed for uld type %d\n", type); +} + +void cxgb4_uld_enable(struct adapter *adap) +{ + struct cxgb4_uld_list *uld_entry; + + mutex_lock(&uld_mutex); + list_add_tail(&adap->list_node, &adapter_list); + list_for_each_entry(uld_entry, &uld_list, list_node) + cxgb4_uld_alloc_resources(adap, uld_entry->uld_type, + &uld_entry->uld_info); + mutex_unlock(&uld_mutex); +} + /* cxgb4_register_uld - register an upper-layer driver * @type: the ULD type * @p: the ULD methods @@ -691,63 +759,23 @@ static void cxgb4_set_ktls_feature(struct adapter *adap, bool enable) void cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p) { + struct cxgb4_uld_list *uld_entry; struct adapter *adap; - int ret = 0; if (type >= CXGB4_ULD_MAX) return; + uld_entry = kzalloc(sizeof(*uld_entry), GFP_KERNEL); + if (!uld_entry) + return; + + memcpy(&uld_entry->uld_info, p, sizeof(struct cxgb4_uld_info)); mutex_lock(&uld_mutex); - list_for_each_entry(adap, &adapter_list, list_node) { - if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) || - (type != CXGB4_ULD_CRYPTO && !is_offload(adap))) - continue; - if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip)) - continue; - ret = cfg_queues_uld(adap, type, p); - if (ret) - goto out; - ret = setup_sge_queues_uld(adap, type, p->lro); - if (ret) - goto free_queues; - if (adap->flags & CXGB4_USING_MSIX) { - ret = request_msix_queue_irqs_uld(adap, type); - if (ret) - goto free_rxq; - } - if (adap->flags & CXGB4_FULL_INIT_DONE) - enable_rx_uld(adap, type); -#ifdef CONFIG_CHELSIO_TLS_DEVICE - /* send mbox to enable ktls related settings. */ - if (type == CXGB4_ULD_CRYPTO && - (adap->params.crypto & FW_CAPS_CONFIG_TX_TLS_HW)) - cxgb4_set_ktls_feature(adap, 1); -#endif - if (adap->uld[type].add) - goto free_irq; - ret = setup_sge_txq_uld(adap, type, p); - if (ret) - goto free_irq; - adap->uld[type] = *p; - ret = uld_attach(adap, type); - if (ret) - goto free_txq; - continue; -free_txq: - release_sge_txq_uld(adap, type); -free_irq: - if (adap->flags & CXGB4_FULL_INIT_DONE) - quiesce_rx_uld(adap, type); - if (adap->flags & CXGB4_USING_MSIX) - free_msix_queue_irqs_uld(adap, type); -free_rxq: - free_sge_queues_uld(adap, type); -free_queues: - free_queues_uld(adap, type); -out: - dev_warn(adap->pdev_dev, - "ULD registration failed for uld type %d\n", type); - } + list_for_each_entry(adap, &adapter_list, list_node) + cxgb4_uld_alloc_resources(adap, type, p); + + uld_entry->uld_type = type; + list_add_tail(&uld_entry->list_node, &uld_list); mutex_unlock(&uld_mutex); return; } @@ -761,6 +789,7 @@ EXPORT_SYMBOL(cxgb4_register_uld); */ int cxgb4_unregister_uld(enum cxgb4_uld type) { + struct cxgb4_uld_list *uld_entry, *tmp; struct adapter *adap; if (type >= CXGB4_ULD_MAX) @@ -783,6 +812,13 @@ int cxgb4_unregister_uld(enum cxgb4_uld type) cxgb4_set_ktls_feature(adap, 0); #endif } + + list_for_each_entry_safe(uld_entry, tmp, &uld_list, list_node) { + if (uld_entry->uld_type == type) { + list_del(&uld_entry->list_node); + kfree(uld_entry); + } + } mutex_unlock(&uld_mutex); return 0; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 16796785eea3..085fa1424f9a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -327,6 +327,7 @@ enum cxgb4_control { CXGB4_CONTROL_DB_DROP, }; +struct adapter; struct pci_dev; struct l2t_data; struct net_device; @@ -465,6 +466,7 @@ struct cxgb4_uld_info { int (*tx_handler)(struct sk_buff *skb, struct net_device *dev); }; +void cxgb4_uld_enable(struct adapter *adap); void cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p); int cxgb4_unregister_uld(enum cxgb4_uld type); int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 5fbaa51b38bc..fe3806d54630 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -273,13 +273,43 @@ static int dpaa2_eth_xdp_flush(struct dpaa2_eth_priv *priv, return total_enqueued; } -static int xdp_enqueue(struct dpaa2_eth_priv *priv, struct dpaa2_fd *fd, - void *buf_start, u16 queue_id) +static void xdp_tx_flush(struct dpaa2_eth_priv *priv, + struct dpaa2_eth_channel *ch, + struct dpaa2_eth_fq *fq) +{ + struct rtnl_link_stats64 *percpu_stats; + struct dpaa2_fd *fds; + int enqueued, i; + + percpu_stats = this_cpu_ptr(priv->percpu_stats); + + // enqueue the array of XDP_TX frames + enqueued = dpaa2_eth_xdp_flush(priv, fq, &fq->xdp_tx_fds); + + /* update statistics */ + percpu_stats->tx_packets += enqueued; + fds = fq->xdp_tx_fds.fds; + for (i = 0; i < enqueued; i++) { + percpu_stats->tx_bytes += dpaa2_fd_get_len(&fds[i]); + ch->stats.xdp_tx++; + } + for (i = enqueued; i < fq->xdp_tx_fds.num; i++) { + xdp_release_buf(priv, ch, dpaa2_fd_get_addr(&fds[i])); + percpu_stats->tx_errors++; + ch->stats.xdp_tx_err++; + } + fq->xdp_tx_fds.num = 0; +} + +static void xdp_enqueue(struct dpaa2_eth_priv *priv, + struct dpaa2_eth_channel *ch, + struct dpaa2_fd *fd, + void *buf_start, u16 queue_id) { - struct dpaa2_eth_fq *fq; struct dpaa2_faead *faead; + struct dpaa2_fd *dest_fd; + struct dpaa2_eth_fq *fq; u32 ctrl, frc; - int i, err; /* Mark the egress frame hardware annotation area as valid */ frc = dpaa2_fd_get_frc(fd); @@ -296,13 +326,13 @@ static int xdp_enqueue(struct dpaa2_eth_priv *priv, struct dpaa2_fd *fd, faead->conf_fqid = 0; fq = &priv->fq[queue_id]; - for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) { - err = priv->enqueue(priv, fq, fd, 0, 1, NULL); - if (err != -EBUSY) - break; - } + dest_fd = &fq->xdp_tx_fds.fds[fq->xdp_tx_fds.num++]; + memcpy(dest_fd, fd, sizeof(*dest_fd)); - return err; + if (fq->xdp_tx_fds.num < DEV_MAP_BULK_SIZE) + return; + + xdp_tx_flush(priv, ch, fq); } static u32 run_xdp(struct dpaa2_eth_priv *priv, @@ -311,14 +341,11 @@ static u32 run_xdp(struct dpaa2_eth_priv *priv, struct dpaa2_fd *fd, void *vaddr) { dma_addr_t addr = dpaa2_fd_get_addr(fd); - struct rtnl_link_stats64 *percpu_stats; struct bpf_prog *xdp_prog; struct xdp_buff xdp; u32 xdp_act = XDP_PASS; int err; - percpu_stats = this_cpu_ptr(priv->percpu_stats); - rcu_read_lock(); xdp_prog = READ_ONCE(ch->xdp.prog); @@ -344,16 +371,7 @@ static u32 run_xdp(struct dpaa2_eth_priv *priv, case XDP_PASS: break; case XDP_TX: - err = xdp_enqueue(priv, fd, vaddr, rx_fq->flowid); - if (err) { - xdp_release_buf(priv, ch, addr); - percpu_stats->tx_errors++; - ch->stats.xdp_tx_err++; - } else { - percpu_stats->tx_packets++; - percpu_stats->tx_bytes += dpaa2_fd_get_len(fd); - ch->stats.xdp_tx++; - } + xdp_enqueue(priv, ch, fd, vaddr, rx_fq->flowid); break; default: bpf_warn_invalid_xdp_action(xdp_act); @@ -1175,6 +1193,7 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget) int store_cleaned, work_done; struct list_head rx_list; int retries = 0; + u16 flowid; int err; ch = container_of(napi, struct dpaa2_eth_channel, napi); @@ -1197,6 +1216,7 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget) break; if (fq->type == DPAA2_RX_FQ) { rx_cleaned += store_cleaned; + flowid = fq->flowid; } else { txconf_cleaned += store_cleaned; /* We have a single Tx conf FQ on this channel */ @@ -1239,6 +1259,8 @@ out: if (ch->xdp.res & XDP_REDIRECT) xdp_do_flush_map(); + else if (rx_cleaned && ch->xdp.res & XDP_TX) + xdp_tx_flush(priv, ch, &priv->fq[flowid]); return work_done; } diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h index 42f0a7a80afa..0581fbf1f98c 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h @@ -334,6 +334,7 @@ struct dpaa2_eth_fq { struct dpaa2_eth_fq_stats stats; struct dpaa2_eth_xdp_fds xdp_redirect_fds; + struct dpaa2_eth_xdp_fds xdp_tx_fds; }; struct dpaa2_eth_ch_xdp { diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 05bc6e216bca..d9fa4600f745 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -542,8 +542,13 @@ void e1000_reinit_locked(struct e1000_adapter *adapter) WARN_ON(in_interrupt()); while (test_and_set_bit(__E1000_RESETTING, &adapter->flags)) msleep(1); - e1000_down(adapter); - e1000_up(adapter); + + /* only run the task if not already down */ + if (!test_bit(__E1000_DOWN, &adapter->flags)) { + e1000_down(adapter); + e1000_up(adapter); + } + clear_bit(__E1000_RESETTING, &adapter->flags); } @@ -1433,10 +1438,15 @@ int e1000_close(struct net_device *netdev) struct e1000_hw *hw = &adapter->hw; int count = E1000_CHECK_RESET_COUNT; - while (test_bit(__E1000_RESETTING, &adapter->flags) && count--) + while (test_and_set_bit(__E1000_RESETTING, &adapter->flags) && count--) usleep_range(10000, 20000); - WARN_ON(test_bit(__E1000_RESETTING, &adapter->flags)); + WARN_ON(count < 0); + + /* signal that we're down so that the reset task will no longer run */ + set_bit(__E1000_DOWN, &adapter->flags); + clear_bit(__E1000_RESETTING, &adapter->flags); + e1000_down(adapter); e1000_power_down_phy(adapter); e1000_free_irq(adapter); diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index 29c6c6743450..2055e61eaf24 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -17,6 +17,7 @@ ice-y := ice_main.o \ ice_lib.o \ ice_txrx_lib.o \ ice_txrx.o \ + ice_fltr.o \ ice_flex_pipe.o \ ice_flow.o \ ice_devlink.o \ diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 5c11448bfbb3..58d0d6436c7f 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -37,6 +37,10 @@ #include <net/devlink.h> #include <net/ipv6.h> #include <net/xdp_sock.h> +#include <net/geneve.h> +#include <net/gre.h> +#include <net/udp_tunnel.h> +#include <net/vxlan.h> #include "ice_devids.h" #include "ice_type.h" #include "ice_txrx.h" @@ -244,8 +248,8 @@ struct ice_vsi { u32 tx_busy; u32 rx_buf_failed; u32 rx_page_failed; - int num_q_vectors; - int base_vector; /* IRQ base for OS reserved vectors */ + u16 num_q_vectors; + u16 base_vector; /* IRQ base for OS reserved vectors */ enum ice_vsi_type type; u16 vsi_num; /* HW (absolute) index of this VSI */ u16 idx; /* software index in pf->vsi[] */ @@ -341,6 +345,7 @@ enum ice_pf_flags { ICE_FLAG_FW_LLDP_AGENT, ICE_FLAG_ETHTOOL_CTXT, /* set when ethtool holds RTNL lock */ ICE_FLAG_LEGACY_RX, + ICE_FLAG_VF_TRUE_PROMISC_ENA, ICE_FLAG_MDD_AUTO_RESET_VF, ICE_PF_FLAGS_NBITS /* must be last */ }; @@ -366,7 +371,7 @@ struct ice_pf { struct ice_sw *first_sw; /* first switch created by firmware */ /* Virtchnl/SR-IOV config info */ struct ice_vf *vf; - int num_alloc_vfs; /* actual number of VFs allocated */ + u16 num_alloc_vfs; /* actual number of VFs allocated */ u16 num_vfs_supported; /* num VFs supported for this PF */ u16 num_qps_per_vf; u16 num_msix_per_vf; @@ -385,11 +390,11 @@ struct ice_pf { struct mutex tc_mutex; /* lock to protect TC changes */ u32 msg_enable; u32 hw_csum_rx_error; - u32 oicr_idx; /* Other interrupt cause MSIX vector index */ - u32 num_avail_sw_msix; /* remaining MSIX SW vectors left unclaimed */ + u16 oicr_idx; /* Other interrupt cause MSIX vector index */ + u16 num_avail_sw_msix; /* remaining MSIX SW vectors left unclaimed */ u16 max_pf_txqs; /* Total Tx queues PF wide */ u16 max_pf_rxqs; /* Total Rx queues PF wide */ - u32 num_lan_msix; /* Total MSIX vectors for base driver */ + u16 num_lan_msix; /* Total MSIX vectors for base driver */ u16 num_lan_tx; /* num LAN Tx queues setup */ u16 num_lan_rx; /* num LAN Rx queues setup */ u16 next_vsi; /* Next free slot in pf->vsi[] - 0-based! */ @@ -523,6 +528,8 @@ int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size); int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset); void ice_print_link_msg(struct ice_vsi *vsi, bool isup); +const char *ice_stat_str(enum ice_status stat_err); +const char *ice_aq_str(enum ice_aq_err aq_err); int ice_open(struct net_device *netdev); int ice_stop(struct net_device *netdev); diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 2381b4014ed6..979e9c6254af 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -541,7 +541,7 @@ struct ice_sw_rule_lkup_rx_tx { #define ICE_SINGLE_ACT_OTHER_ACTS 0x3 #define ICE_SINGLE_OTHER_ACT_IDENTIFIER_S 17 #define ICE_SINGLE_OTHER_ACT_IDENTIFIER_M \ - (0x3 << \ ICE_SINGLE_OTHER_ACT_IDENTIFIER_S) + (0x3 << ICE_SINGLE_OTHER_ACT_IDENTIFIER_S) /* Bit 17:18 - Defines other actions */ /* Other action = 0 - Mirror VSI */ @@ -1264,6 +1264,33 @@ struct ice_aqc_nvm_checksum { u8 rsvd2[12]; }; +/* The result of netlist NVM read comes in a TLV format. The actual data + * (netlist header) starts from word offset 1 (byte 2). The FW strips + * out the type field from the TLV header so all the netlist fields + * should adjust their offset value by 1 word (2 bytes) in order to map + * their correct location. + */ +#define ICE_AQC_NVM_LINK_TOPO_NETLIST_MOD_ID 0x11B +#define ICE_AQC_NVM_LINK_TOPO_NETLIST_LEN_OFFSET 1 +#define ICE_AQC_NVM_LINK_TOPO_NETLIST_LEN 2 /* In bytes */ +#define ICE_AQC_NVM_NETLIST_NODE_COUNT_OFFSET 2 +#define ICE_AQC_NVM_NETLIST_NODE_COUNT_LEN 2 /* In bytes */ +#define ICE_AQC_NVM_NETLIST_NODE_COUNT_M ICE_M(0x3FF, 0) +#define ICE_AQC_NVM_NETLIST_ID_BLK_START_OFFSET 5 +#define ICE_AQC_NVM_NETLIST_ID_BLK_LEN 0x30 /* In words */ + +/* netlist ID block field offsets (word offsets) */ +#define ICE_AQC_NVM_NETLIST_ID_BLK_MAJOR_VER_LOW 2 +#define ICE_AQC_NVM_NETLIST_ID_BLK_MAJOR_VER_HIGH 3 +#define ICE_AQC_NVM_NETLIST_ID_BLK_MINOR_VER_LOW 4 +#define ICE_AQC_NVM_NETLIST_ID_BLK_MINOR_VER_HIGH 5 +#define ICE_AQC_NVM_NETLIST_ID_BLK_TYPE_LOW 6 +#define ICE_AQC_NVM_NETLIST_ID_BLK_TYPE_HIGH 7 +#define ICE_AQC_NVM_NETLIST_ID_BLK_REV_LOW 8 +#define ICE_AQC_NVM_NETLIST_ID_BLK_REV_HIGH 9 +#define ICE_AQC_NVM_NETLIST_ID_BLK_SHA_HASH 0xA +#define ICE_AQC_NVM_NETLIST_ID_BLK_CUST_VER 0x2F + /** * Send to PF command (indirect 0x0801) ID is only used by PF * diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 433eb72b1c85..f066befadd39 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -25,7 +25,7 @@ static int __ice_vsi_get_qs_contig(struct ice_qs_cfg *qs_cfg) bitmap_set(qs_cfg->pf_map, offset, qs_cfg->q_count); for (i = 0; i < qs_cfg->q_count; i++) - qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = i + offset; + qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = (u16)(i + offset); mutex_unlock(qs_cfg->qs_mutex); return 0; @@ -48,7 +48,7 @@ static int __ice_vsi_get_qs_sc(struct ice_qs_cfg *qs_cfg) if (index >= qs_cfg->pf_map_size) goto err_scatter; set_bit(index, qs_cfg->pf_map); - qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = index; + qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = (u16)index; } mutex_unlock(qs_cfg->qs_mutex); @@ -97,7 +97,7 @@ static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena) * We allocate one q_vector and set default value for ITR setting associated * with this q_vector. If allocation fails we return -ENOMEM. */ -static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx) +static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx) { struct ice_pf *pf = vsi->back; struct ice_q_vector *q_vector; @@ -376,7 +376,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring) /* Max packet size for this queue - must not be set to a larger value * than 5 x DBUF */ - rlan_ctx.rxmax = min_t(u16, vsi->max_frame, + rlan_ctx.rxmax = min_t(u32, vsi->max_frame, chain_len * ring->rx_buf_len); /* Rx queue threshold in units of 64 */ @@ -453,7 +453,7 @@ int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg) if (ret) { /* contig failed, so try with scatter approach */ qs_cfg->mapping_mode = ICE_VSI_MAP_SCATTER; - qs_cfg->q_count = min_t(u16, qs_cfg->q_count, + qs_cfg->q_count = min_t(unsigned int, qs_cfg->q_count, qs_cfg->scatter_count); ret = __ice_vsi_get_qs_sc(qs_cfg); } @@ -526,7 +526,8 @@ int ice_vsi_wait_one_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx) int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi) { struct device *dev = ice_pf_to_dev(vsi->back); - int v_idx, err; + u16 v_idx; + int err; if (vsi->q_vectors[0]) { dev_dbg(dev, "VSI %d has existing q_vectors\n", vsi->vsi_num); @@ -562,7 +563,7 @@ err_out: void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi) { int q_vectors = vsi->num_q_vectors; - int tx_rings_rem, rx_rings_rem; + u16 tx_rings_rem, rx_rings_rem; int v_id; /* initially assigning remaining rings count to VSIs num queue value */ @@ -571,10 +572,12 @@ void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi) for (v_id = 0; v_id < q_vectors; v_id++) { struct ice_q_vector *q_vector = vsi->q_vectors[v_id]; - int tx_rings_per_v, rx_rings_per_v, q_id, q_base; + u8 tx_rings_per_v, rx_rings_per_v; + u16 q_id, q_base; /* Tx rings mapping to vector */ - tx_rings_per_v = DIV_ROUND_UP(tx_rings_rem, q_vectors - v_id); + tx_rings_per_v = (u8)DIV_ROUND_UP(tx_rings_rem, + q_vectors - v_id); q_vector->num_ring_tx = tx_rings_per_v; q_vector->tx.ring = NULL; q_vector->tx.itr_idx = ICE_TX_ITR; @@ -590,7 +593,8 @@ void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi) tx_rings_rem -= tx_rings_per_v; /* Rx rings mapping to vector */ - rx_rings_per_v = DIV_ROUND_UP(rx_rings_rem, q_vectors - v_id); + rx_rings_per_v = (u8)DIV_ROUND_UP(rx_rings_rem, + q_vectors - v_id); q_vector->num_ring_rx = rx_rings_per_v; q_vector->rx.ring = NULL; q_vector->rx.itr_idx = ICE_RX_ITR; @@ -662,8 +666,8 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring, status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, ring->q_handle, 1, qg_buf, buf_len, NULL); if (status) { - dev_err(ice_pf_to_dev(pf), "Failed to set LAN Tx queue context, error: %d\n", - status); + dev_err(ice_pf_to_dev(pf), "Failed to set LAN Tx queue context, error: %s\n", + ice_stat_str(status)); return -ENODEV; } @@ -832,8 +836,8 @@ ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, } else if (status == ICE_ERR_DOES_NOT_EXIST) { dev_dbg(ice_pf_to_dev(vsi->back), "LAN Tx queues do not exist, nothing to disable\n"); } else if (status) { - dev_err(ice_pf_to_dev(vsi->back), "Failed to disable LAN Tx queues, error: %d\n", - status); + dev_err(ice_pf_to_dev(vsi->back), "Failed to disable LAN Tx queues, error: %s\n", + ice_stat_str(status)); return -ENODEV; } diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 2c0d8fd3d5cd..1a613199d6cb 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -746,6 +746,7 @@ enum ice_status ice_init_hw(struct ice_hw *hw) status = ice_init_hw_tbls(hw); if (status) goto err_unroll_fltr_mgmt_struct; + mutex_init(&hw->tnl_lock); return 0; err_unroll_fltr_mgmt_struct: @@ -775,6 +776,7 @@ void ice_deinit_hw(struct ice_hw *hw) ice_sched_clear_agg(hw); ice_free_seg(hw); ice_free_hw_tbls(hw); + mutex_destroy(&hw->tnl_lock); if (hw->port_info) { devm_kfree(ice_hw_to_dev(hw), hw->port_info); diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c index dd946866d7b8..9a865962296d 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.c +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c @@ -1128,7 +1128,7 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq, } memcpy(&e->desc, desc, sizeof(e->desc)); datalen = le16_to_cpu(desc->datalen); - e->msg_len = min(datalen, e->buf_len); + e->msg_len = min_t(u16, datalen, e->buf_len); if (e->msg_buf && e->msg_len) memcpy(e->msg_buf, cq->rq.r.rq_bi[desc_idx].va, e->msg_len); diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index 7bea09363b42..3c7f604c0c49 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -63,6 +63,64 @@ u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg) } /** + * ice_is_pfc_causing_hung_q + * @pf: pointer to PF structure + * @txqueue: Tx queue which is supposedly hung queue + * + * find if PFC is causing the hung queue, if yes return true else false + */ +bool ice_is_pfc_causing_hung_q(struct ice_pf *pf, unsigned int txqueue) +{ + u8 num_tcs = 0, i, tc, up_mapped_tc, up_in_tc = 0; + u64 ref_prio_xoff[ICE_MAX_UP]; + struct ice_vsi *vsi; + u32 up2tc; + + vsi = ice_get_main_vsi(pf); + if (!vsi) + return false; + + ice_for_each_traffic_class(i) + if (vsi->tc_cfg.ena_tc & BIT(i)) + num_tcs++; + + /* first find out the TC to which the hung queue belongs to */ + for (tc = 0; tc < num_tcs - 1; tc++) + if (ice_find_q_in_range(vsi->tc_cfg.tc_info[tc].qoffset, + vsi->tc_cfg.tc_info[tc + 1].qoffset, + txqueue)) + break; + + /* Build a bit map of all UPs associated to the suspect hung queue TC, + * so that we check for its counter increment. + */ + up2tc = rd32(&pf->hw, PRTDCB_TUP2TC); + for (i = 0; i < ICE_MAX_UP; i++) { + up_mapped_tc = (up2tc >> (i * 3)) & 0x7; + if (up_mapped_tc == tc) + up_in_tc |= BIT(i); + } + + /* Now that we figured out that hung queue is PFC enabled, still the + * Tx timeout can be legitimate. So to make sure Tx timeout is + * absolutely caused by PFC storm, check if the counters are + * incrementing. + */ + for (i = 0; i < ICE_MAX_UP; i++) + if (up_in_tc & BIT(i)) + ref_prio_xoff[i] = pf->stats.priority_xoff_rx[i]; + + ice_update_dcb_stats(pf); + + for (i = 0; i < ICE_MAX_UP; i++) + if (up_in_tc & BIT(i)) + if (pf->stats.priority_xoff_rx[i] > ref_prio_xoff[i]) + return true; + + return false; +} + +/** * ice_dcb_get_mode - gets the DCB mode * @port_info: pointer to port info structure * @host: if set it's HOST if not it's MANAGED @@ -526,16 +584,21 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked) */ static bool ice_dcb_tc_contig(u8 *prio_table) { - u8 max_tc = 0; + bool found_empty = false; + u8 used_tc = 0; int i; - for (i = 0; i < CEE_DCBX_MAX_PRIO; i++) { - u8 cur_tc = prio_table[i]; + /* Create a bitmap of used TCs */ + for (i = 0; i < CEE_DCBX_MAX_PRIO; i++) + used_tc |= BIT(prio_table[i]); - if (cur_tc > max_tc) - return false; - else if (cur_tc == max_tc) - max_tc++; + for (i = 0; i < CEE_DCBX_MAX_PRIO; i++) { + if (used_tc & BIT(i)) { + if (found_empty) + return false; + } else { + found_empty = true; + } } return true; diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h index 37680e815b02..7c42324494d2 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h @@ -17,6 +17,8 @@ void ice_dcb_rebuild(struct ice_pf *pf); u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg); u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg); +void ice_vsi_set_dcb_tc_cfg(struct ice_vsi *vsi); +bool ice_is_pfc_causing_hung_q(struct ice_pf *pf, unsigned int txqueue); u8 ice_dcb_get_tc(struct ice_vsi *vsi, int queue_index); int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked); @@ -32,6 +34,20 @@ void ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, struct ice_rq_event_info *event); void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc); + +/** + * ice_find_q_in_range + * @low: start of queue range for a TC i.e. offset of TC + * @high: start of queue for next TC + * @tx_q: hung_queue/tx_queue + * + * finds if queue 'tx_q' falls between the two offsets of any given TC + */ +static inline bool ice_find_q_in_range(u16 low, u16 high, unsigned int tx_q) +{ + return (tx_q >= low) && (tx_q < high); +} + static inline void ice_set_cgd_num(struct ice_tlan_ctx *tlan_ctx, struct ice_ring *ring) { @@ -79,6 +95,13 @@ ice_tx_prepare_vlan_flags_dcb(struct ice_ring __always_unused *tx_ring, return 0; } +static inline bool +ice_is_pfc_causing_hung_q(struct ice_pf __always_unused *pf, + unsigned int __always_unused txqueue) +{ + return false; +} + #define ice_update_dcb_stats(pf) do {} while (0) #define ice_pf_dcb_recfg(pf) do {} while (0) #define ice_vsi_cfg_dcb_rings(vsi) do {} while (0) diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index c6833944b90a..a73d06e06b5d 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -105,6 +105,27 @@ static int ice_info_ddp_pkg_version(struct ice_pf *pf, char *buf, size_t len) return 0; } +static int ice_info_netlist_ver(struct ice_pf *pf, char *buf, size_t len) +{ + struct ice_netlist_ver_info *netlist = &pf->hw.netlist_ver; + + /* The netlist version fields are BCD formatted */ + snprintf(buf, len, "%x.%x.%x-%x.%x.%x", netlist->major, netlist->minor, + netlist->type >> 16, netlist->type & 0xFFFF, netlist->rev, + netlist->cust_ver); + + return 0; +} + +static int ice_info_netlist_build(struct ice_pf *pf, char *buf, size_t len) +{ + struct ice_netlist_ver_info *netlist = &pf->hw.netlist_ver; + + snprintf(buf, len, "0x%08x", netlist->hash); + + return 0; +} + #define fixed(key, getter) { ICE_VERSION_FIXED, key, getter } #define running(key, getter) { ICE_VERSION_RUNNING, key, getter } @@ -128,6 +149,8 @@ static const struct ice_devlink_version { running(DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID, ice_info_eetrack), running("fw.app.name", ice_info_ddp_pkg_name), running(DEVLINK_INFO_VERSION_GENERIC_FW_APP, ice_info_ddp_pkg_version), + running("fw.netlist", ice_info_netlist_ver), + running("fw.netlist.build", ice_info_netlist_build), }; /** diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 593fb37bd59e..9fb82c993df9 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -5,6 +5,7 @@ #include "ice.h" #include "ice_flow.h" +#include "ice_fltr.h" #include "ice_lib.h" #include "ice_dcb_lib.h" @@ -157,6 +158,8 @@ struct ice_priv_flag { static const struct ice_priv_flag ice_gstrings_priv_flags[] = { ICE_PRIV_FLAG("link-down-on-close", ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA), ICE_PRIV_FLAG("fw-lldp-agent", ICE_FLAG_FW_LLDP_AGENT), + ICE_PRIV_FLAG("vf-true-promisc-support", + ICE_FLAG_VF_TRUE_PROMISC_ENA), ICE_PRIV_FLAG("mdd-auto-reset-vf", ICE_FLAG_MDD_AUTO_RESET_VF), ICE_PRIV_FLAG("legacy-rx", ICE_FLAG_LEGACY_RX), }; @@ -273,8 +276,9 @@ ice_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom, status = ice_acquire_nvm(hw, ICE_RES_READ); if (status) { - dev_err(dev, "ice_acquire_nvm failed, err %d aq_err %d\n", - status, hw->adminq.sq_last_status); + dev_err(dev, "ice_acquire_nvm failed, err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); ret = -EIO; goto out; } @@ -282,8 +286,9 @@ ice_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom, status = ice_read_flat_nvm(hw, eeprom->offset, &eeprom->len, buf, false); if (status) { - dev_err(dev, "ice_read_flat_nvm failed, err %d aq_err %d\n", - status, hw->adminq.sq_last_status); + dev_err(dev, "ice_read_flat_nvm failed, err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); ret = -EIO; goto release; } @@ -332,7 +337,8 @@ static u64 ice_link_test(struct net_device *netdev) netdev_info(netdev, "link test\n"); status = ice_get_link_status(np->vsi->port_info, &link_up); if (status) { - netdev_err(netdev, "link query error, status = %d\n", status); + netdev_err(netdev, "link query error, status = %s\n", + ice_stat_str(status)); return 1; } @@ -671,7 +677,6 @@ static u64 ice_loopback_test(struct net_device *netdev) struct ice_ring *tx_ring, *rx_ring; u8 broadcast[ETH_ALEN], ret = 0; int num_frames, valid_frames; - LIST_HEAD(tmp_list); struct device *dev; u8 *tx_frame; int i; @@ -707,16 +712,11 @@ static u64 ice_loopback_test(struct net_device *netdev) /* Test VSI needs to receive broadcast packets */ eth_broadcast_addr(broadcast); - if (ice_add_mac_to_list(test_vsi, &tmp_list, broadcast)) { + if (ice_fltr_add_mac(test_vsi, broadcast, ICE_FWD_TO_VSI)) { ret = 5; goto lbtest_mac_dis; } - if (ice_add_mac(&pf->hw, &tmp_list)) { - ret = 6; - goto free_mac_list; - } - if (ice_lbtest_create_frame(pf, &tx_frame, ICE_LB_FRAME_SIZE)) { ret = 7; goto remove_mac_filters; @@ -739,10 +739,8 @@ static u64 ice_loopback_test(struct net_device *netdev) lbtest_free_frame: devm_kfree(dev, tx_frame); remove_mac_filters: - if (ice_remove_mac(&pf->hw, &tmp_list)) + if (ice_fltr_remove_mac(test_vsi, broadcast, ICE_FWD_TO_VSI)) netdev_err(netdev, "Could not remove MAC filter for the test VSI\n"); -free_mac_list: - ice_free_fltr_list(dev, &tmp_list); lbtest_mac_dis: /* Disable MAC loopback after the test is completed. */ if (ice_aq_set_mac_loopback(&pf->hw, false, NULL)) @@ -1158,8 +1156,9 @@ static int ice_nway_reset(struct net_device *netdev) status = ice_aq_set_link_restart_an(pi, false, NULL); if (status) { - netdev_info(netdev, "link restart failed, err %d aq_err %d\n", - status, pi->hw->adminq.sq_last_status); + netdev_info(netdev, "link restart failed, err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(pi->hw->adminq.sq_last_status)); return -EIO; } @@ -1308,6 +1307,16 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) ice_down(vsi); ice_up(vsi); } + /* don't allow modification of this flag when a single VF is in + * promiscuous mode because it's not supported + */ + if (test_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, change_flags) && + ice_is_any_vf_in_promisc(pf)) { + dev_err(dev, "Changing vf-true-promisc-support flag while VF(s) are in promiscuous mode not supported\n"); + /* toggle bit back to previous state */ + change_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, pf->flags); + ret = -EAGAIN; + } clear_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags); return ret; } @@ -2450,8 +2459,8 @@ ice_set_rss_hash_opt(struct ice_vsi *vsi, struct ethtool_rxnfc *nfc) status = ice_add_rss_cfg(&pf->hw, vsi->idx, hashed_flds, hdrs); if (status) { - dev_dbg(dev, "ice_add_rss_cfg failed, vsi num = %d, error = %d\n", - vsi->vsi_num, status); + dev_dbg(dev, "ice_add_rss_cfg failed, vsi num = %d, error = %s\n", + vsi->vsi_num, ice_stat_str(status)); return -EINVAL; } @@ -2593,7 +2602,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; int i, timeout = 50, err = 0; - u32 new_rx_cnt, new_tx_cnt; + u16 new_rx_cnt, new_tx_cnt; if (ring->tx_pending > ICE_MAX_NUM_DESC || ring->tx_pending < ICE_MIN_NUM_DESC || @@ -2645,8 +2654,8 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) if (ice_is_xdp_ena_vsi(vsi)) for (i = 0; i < vsi->num_xdp_txq; i++) vsi->xdp_rings[i]->count = new_tx_cnt; - vsi->num_tx_desc = new_tx_cnt; - vsi->num_rx_desc = new_rx_cnt; + vsi->num_tx_desc = (u16)new_tx_cnt; + vsi->num_rx_desc = (u16)new_rx_cnt; netdev_dbg(netdev, "Link is down, descriptor count change happens when link is brought up\n"); goto done; } @@ -2952,31 +2961,22 @@ ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) status = ice_set_fc(pi, &aq_failures, link_up); if (aq_failures & ICE_SET_FC_AQ_FAIL_GET) { - netdev_info(netdev, "Set fc failed on the get_phy_capabilities call with err %d aq_err %d\n", - status, hw->adminq.sq_last_status); + netdev_info(netdev, "Set fc failed on the get_phy_capabilities call with err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); err = -EAGAIN; } else if (aq_failures & ICE_SET_FC_AQ_FAIL_SET) { - netdev_info(netdev, "Set fc failed on the set_phy_config call with err %d aq_err %d\n", - status, hw->adminq.sq_last_status); + netdev_info(netdev, "Set fc failed on the set_phy_config call with err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); err = -EAGAIN; } else if (aq_failures & ICE_SET_FC_AQ_FAIL_UPDATE) { - netdev_info(netdev, "Set fc failed on the get_link_info call with err %d aq_err %d\n", - status, hw->adminq.sq_last_status); + netdev_info(netdev, "Set fc failed on the get_link_info call with err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); err = -EAGAIN; } - if (!test_bit(__ICE_DOWN, pf->state)) { - /* Give it a little more time to try to come back. If still - * down, restart autoneg link or reinitialize the interface. - */ - msleep(75); - if (!test_bit(__ICE_DOWN, pf->state)) - return ice_nway_reset(netdev); - - ice_down(vsi); - ice_up(vsi); - } - return err; } @@ -3227,8 +3227,9 @@ static int ice_vsi_set_dflt_rss_lut(struct ice_vsi *vsi, int req_rss_size) status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type, lut, vsi->rss_table_size); if (status) { - dev_err(dev, "Cannot set RSS lut, err %d aq_err %d\n", - status, hw->adminq.rq_last_status); + dev_err(dev, "Cannot set RSS lut, err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.rq_last_status)); err = -EIO; } diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c index e7a2671222d2..38c37f506257 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -5,6 +5,15 @@ #include "ice_flex_pipe.h" #include "ice_flow.h" +/* To support tunneling entries by PF, the package will append the PF number to + * the label; for example TNL_VXLAN_PF0, TNL_VXLAN_PF1, TNL_VXLAN_PF2, etc. + */ +static const struct ice_tunnel_type_scan tnls[] = { + { TNL_VXLAN, "TNL_VXLAN_PF" }, + { TNL_GENEVE, "TNL_GENEVE_PF" }, + { TNL_LAST, "" } +}; + static const u32 ice_sect_lkup[ICE_BLK_COUNT][ICE_SECT_COUNT] = { /* SWITCH */ { @@ -239,6 +248,268 @@ ice_pkg_enum_section(struct ice_seg *ice_seg, struct ice_pkg_enum *state, return state->sect; } +/** + * ice_pkg_enum_entry + * @ice_seg: pointer to the ice segment (or NULL on subsequent calls) + * @state: pointer to the enum state + * @sect_type: section type to enumerate + * @offset: pointer to variable that receives the offset in the table (optional) + * @handler: function that handles access to the entries into the section type + * + * This function will enumerate all the entries in particular section type in + * the ice segment. The first call is made with the ice_seg parameter non-NULL; + * on subsequent calls, ice_seg is set to NULL which continues the enumeration. + * When the function returns a NULL pointer, then the end of the entries has + * been reached. + * + * Since each section may have a different header and entry size, the handler + * function is needed to determine the number and location entries in each + * section. + * + * The offset parameter is optional, but should be used for sections that + * contain an offset for each section table. For such cases, the section handler + * function must return the appropriate offset + index to give the absolution + * offset for each entry. For example, if the base for a section's header + * indicates a base offset of 10, and the index for the entry is 2, then + * section handler function should set the offset to 10 + 2 = 12. + */ +static void * +ice_pkg_enum_entry(struct ice_seg *ice_seg, struct ice_pkg_enum *state, + u32 sect_type, u32 *offset, + void *(*handler)(u32 sect_type, void *section, + u32 index, u32 *offset)) +{ + void *entry; + + if (ice_seg) { + if (!handler) + return NULL; + + if (!ice_pkg_enum_section(ice_seg, state, sect_type)) + return NULL; + + state->entry_idx = 0; + state->handler = handler; + } else { + state->entry_idx++; + } + + if (!state->handler) + return NULL; + + /* get entry */ + entry = state->handler(state->sect_type, state->sect, state->entry_idx, + offset); + if (!entry) { + /* end of a section, look for another section of this type */ + if (!ice_pkg_enum_section(NULL, state, 0)) + return NULL; + + state->entry_idx = 0; + entry = state->handler(state->sect_type, state->sect, + state->entry_idx, offset); + } + + return entry; +} + +/** + * ice_boost_tcam_handler + * @sect_type: section type + * @section: pointer to section + * @index: index of the boost TCAM entry to be returned + * @offset: pointer to receive absolute offset, always 0 for boost TCAM sections + * + * This is a callback function that can be passed to ice_pkg_enum_entry. + * Handles enumeration of individual boost TCAM entries. + */ +static void * +ice_boost_tcam_handler(u32 sect_type, void *section, u32 index, u32 *offset) +{ + struct ice_boost_tcam_section *boost; + + if (!section) + return NULL; + + if (sect_type != ICE_SID_RXPARSER_BOOST_TCAM) + return NULL; + + if (index > ICE_MAX_BST_TCAMS_IN_BUF) + return NULL; + + if (offset) + *offset = 0; + + boost = section; + if (index >= le16_to_cpu(boost->count)) + return NULL; + + return boost->tcam + index; +} + +/** + * ice_find_boost_entry + * @ice_seg: pointer to the ice segment (non-NULL) + * @addr: Boost TCAM address of entry to search for + * @entry: returns pointer to the entry + * + * Finds a particular Boost TCAM entry and returns a pointer to that entry + * if it is found. The ice_seg parameter must not be NULL since the first call + * to ice_pkg_enum_entry requires a pointer to an actual ice_segment structure. + */ +static enum ice_status +ice_find_boost_entry(struct ice_seg *ice_seg, u16 addr, + struct ice_boost_tcam_entry **entry) +{ + struct ice_boost_tcam_entry *tcam; + struct ice_pkg_enum state; + + memset(&state, 0, sizeof(state)); + + if (!ice_seg) + return ICE_ERR_PARAM; + + do { + tcam = ice_pkg_enum_entry(ice_seg, &state, + ICE_SID_RXPARSER_BOOST_TCAM, NULL, + ice_boost_tcam_handler); + if (tcam && le16_to_cpu(tcam->addr) == addr) { + *entry = tcam; + return 0; + } + + ice_seg = NULL; + } while (tcam); + + *entry = NULL; + return ICE_ERR_CFG; +} + +/** + * ice_label_enum_handler + * @sect_type: section type + * @section: pointer to section + * @index: index of the label entry to be returned + * @offset: pointer to receive absolute offset, always zero for label sections + * + * This is a callback function that can be passed to ice_pkg_enum_entry. + * Handles enumeration of individual label entries. + */ +static void * +ice_label_enum_handler(u32 __always_unused sect_type, void *section, u32 index, + u32 *offset) +{ + struct ice_label_section *labels; + + if (!section) + return NULL; + + if (index > ICE_MAX_LABELS_IN_BUF) + return NULL; + + if (offset) + *offset = 0; + + labels = section; + if (index >= le16_to_cpu(labels->count)) + return NULL; + + return labels->label + index; +} + +/** + * ice_enum_labels + * @ice_seg: pointer to the ice segment (NULL on subsequent calls) + * @type: the section type that will contain the label (0 on subsequent calls) + * @state: ice_pkg_enum structure that will hold the state of the enumeration + * @value: pointer to a value that will return the label's value if found + * + * Enumerates a list of labels in the package. The caller will call + * ice_enum_labels(ice_seg, type, ...) to start the enumeration, then call + * ice_enum_labels(NULL, 0, ...) to continue. When the function returns a NULL + * the end of the list has been reached. + */ +static char * +ice_enum_labels(struct ice_seg *ice_seg, u32 type, struct ice_pkg_enum *state, + u16 *value) +{ + struct ice_label *label; + + /* Check for valid label section on first call */ + if (type && !(type >= ICE_SID_LBL_FIRST && type <= ICE_SID_LBL_LAST)) + return NULL; + + label = ice_pkg_enum_entry(ice_seg, state, type, NULL, + ice_label_enum_handler); + if (!label) + return NULL; + + *value = le16_to_cpu(label->value); + return label->name; +} + +/** + * ice_init_pkg_hints + * @hw: pointer to the HW structure + * @ice_seg: pointer to the segment of the package scan (non-NULL) + * + * This function will scan the package and save off relevant information + * (hints or metadata) for driver use. The ice_seg parameter must not be NULL + * since the first call to ice_enum_labels requires a pointer to an actual + * ice_seg structure. + */ +static void ice_init_pkg_hints(struct ice_hw *hw, struct ice_seg *ice_seg) +{ + struct ice_pkg_enum state; + char *label_name; + u16 val; + int i; + + memset(&hw->tnl, 0, sizeof(hw->tnl)); + memset(&state, 0, sizeof(state)); + + if (!ice_seg) + return; + + label_name = ice_enum_labels(ice_seg, ICE_SID_LBL_RXPARSER_TMEM, &state, + &val); + + while (label_name && hw->tnl.count < ICE_TUNNEL_MAX_ENTRIES) { + for (i = 0; tnls[i].type != TNL_LAST; i++) { + size_t len = strlen(tnls[i].label_prefix); + + /* Look for matching label start, before continuing */ + if (strncmp(label_name, tnls[i].label_prefix, len)) + continue; + + /* Make sure this label matches our PF. Note that the PF + * character ('0' - '7') will be located where our + * prefix string's null terminator is located. + */ + if ((label_name[len] - '0') == hw->pf_id) { + hw->tnl.tbl[hw->tnl.count].type = tnls[i].type; + hw->tnl.tbl[hw->tnl.count].valid = false; + hw->tnl.tbl[hw->tnl.count].in_use = false; + hw->tnl.tbl[hw->tnl.count].marked = false; + hw->tnl.tbl[hw->tnl.count].boost_addr = val; + hw->tnl.tbl[hw->tnl.count].port = 0; + hw->tnl.count++; + break; + } + } + + label_name = ice_enum_labels(NULL, 0, &state, &val); + } + + /* Cache the appropriate boost TCAM entry pointers */ + for (i = 0; i < hw->tnl.count; i++) { + ice_find_boost_entry(ice_seg, hw->tnl.tbl[i].boost_addr, + &hw->tnl.tbl[i].boost_entry); + if (hw->tnl.tbl[i].boost_entry) + hw->tnl.tbl[i].valid = true; + } +} + /* Key creation */ #define ICE_DC_KEY 0x1 /* don't care */ @@ -1050,7 +1321,8 @@ enum ice_status ice_init_pkg(struct ice_hw *hw, u8 *buf, u32 len) return ICE_ERR_CFG; } - /* download package */ + /* initialize package hints and then download package */ + ice_init_pkg_hints(hw, seg); status = ice_download_pkg(hw, seg); if (status == ICE_ERR_AQ_NO_WORK) { ice_debug(hw, ICE_DBG_INIT, @@ -1292,6 +1564,256 @@ static struct ice_buf *ice_pkg_buf(struct ice_buf_build *bld) return &bld->buf; } +/** + * ice_tunnel_port_in_use_hlpr - helper function to determine tunnel usage + * @hw: pointer to the HW structure + * @port: port to search for + * @index: optionally returns index + * + * Returns whether a port is already in use as a tunnel, and optionally its + * index + */ +static bool ice_tunnel_port_in_use_hlpr(struct ice_hw *hw, u16 port, u16 *index) +{ + u16 i; + + for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++) + if (hw->tnl.tbl[i].in_use && hw->tnl.tbl[i].port == port) { + if (index) + *index = i; + return true; + } + + return false; +} + +/** + * ice_tunnel_port_in_use + * @hw: pointer to the HW structure + * @port: port to search for + * @index: optionally returns index + * + * Returns whether a port is already in use as a tunnel, and optionally its + * index + */ +bool ice_tunnel_port_in_use(struct ice_hw *hw, u16 port, u16 *index) +{ + bool res; + + mutex_lock(&hw->tnl_lock); + res = ice_tunnel_port_in_use_hlpr(hw, port, index); + mutex_unlock(&hw->tnl_lock); + + return res; +} + +/** + * ice_find_free_tunnel_entry + * @hw: pointer to the HW structure + * @type: tunnel type + * @index: optionally returns index + * + * Returns whether there is a free tunnel entry, and optionally its index + */ +static bool +ice_find_free_tunnel_entry(struct ice_hw *hw, enum ice_tunnel_type type, + u16 *index) +{ + u16 i; + + for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++) + if (hw->tnl.tbl[i].valid && !hw->tnl.tbl[i].in_use && + hw->tnl.tbl[i].type == type) { + if (index) + *index = i; + return true; + } + + return false; +} + +/** + * ice_create_tunnel + * @hw: pointer to the HW structure + * @type: type of tunnel + * @port: port of tunnel to create + * + * Create a tunnel by updating the parse graph in the parser. We do that by + * creating a package buffer with the tunnel info and issuing an update package + * command. + */ +enum ice_status +ice_create_tunnel(struct ice_hw *hw, enum ice_tunnel_type type, u16 port) +{ + struct ice_boost_tcam_section *sect_rx, *sect_tx; + enum ice_status status = ICE_ERR_MAX_LIMIT; + struct ice_buf_build *bld; + u16 index; + + mutex_lock(&hw->tnl_lock); + + if (ice_tunnel_port_in_use_hlpr(hw, port, &index)) { + hw->tnl.tbl[index].ref++; + status = 0; + goto ice_create_tunnel_end; + } + + if (!ice_find_free_tunnel_entry(hw, type, &index)) { + status = ICE_ERR_OUT_OF_RANGE; + goto ice_create_tunnel_end; + } + + bld = ice_pkg_buf_alloc(hw); + if (!bld) { + status = ICE_ERR_NO_MEMORY; + goto ice_create_tunnel_end; + } + + /* allocate 2 sections, one for Rx parser, one for Tx parser */ + if (ice_pkg_buf_reserve_section(bld, 2)) + goto ice_create_tunnel_err; + + sect_rx = ice_pkg_buf_alloc_section(bld, ICE_SID_RXPARSER_BOOST_TCAM, + sizeof(*sect_rx)); + if (!sect_rx) + goto ice_create_tunnel_err; + sect_rx->count = cpu_to_le16(1); + + sect_tx = ice_pkg_buf_alloc_section(bld, ICE_SID_TXPARSER_BOOST_TCAM, + sizeof(*sect_tx)); + if (!sect_tx) + goto ice_create_tunnel_err; + sect_tx->count = cpu_to_le16(1); + + /* copy original boost entry to update package buffer */ + memcpy(sect_rx->tcam, hw->tnl.tbl[index].boost_entry, + sizeof(*sect_rx->tcam)); + + /* over-write the never-match dest port key bits with the encoded port + * bits + */ + ice_set_key((u8 *)§_rx->tcam[0].key, sizeof(sect_rx->tcam[0].key), + (u8 *)&port, NULL, NULL, NULL, + (u16)offsetof(struct ice_boost_key_value, hv_dst_port_key), + sizeof(sect_rx->tcam[0].key.key.hv_dst_port_key)); + + /* exact copy of entry to Tx section entry */ + memcpy(sect_tx->tcam, sect_rx->tcam, sizeof(*sect_tx->tcam)); + + status = ice_update_pkg(hw, ice_pkg_buf(bld), 1); + if (!status) { + hw->tnl.tbl[index].port = port; + hw->tnl.tbl[index].in_use = true; + hw->tnl.tbl[index].ref = 1; + } + +ice_create_tunnel_err: + ice_pkg_buf_free(hw, bld); + +ice_create_tunnel_end: + mutex_unlock(&hw->tnl_lock); + + return status; +} + +/** + * ice_destroy_tunnel + * @hw: pointer to the HW structure + * @port: port of tunnel to destroy (ignored if the all parameter is true) + * @all: flag that states to destroy all tunnels + * + * Destroys a tunnel or all tunnels by creating an update package buffer + * targeting the specific updates requested and then performing an update + * package. + */ +enum ice_status ice_destroy_tunnel(struct ice_hw *hw, u16 port, bool all) +{ + struct ice_boost_tcam_section *sect_rx, *sect_tx; + enum ice_status status = ICE_ERR_MAX_LIMIT; + struct ice_buf_build *bld; + u16 count = 0; + u16 index; + u16 size; + u16 i; + + mutex_lock(&hw->tnl_lock); + + if (!all && ice_tunnel_port_in_use_hlpr(hw, port, &index)) + if (hw->tnl.tbl[index].ref > 1) { + hw->tnl.tbl[index].ref--; + status = 0; + goto ice_destroy_tunnel_end; + } + + /* determine count */ + for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++) + if (hw->tnl.tbl[i].valid && hw->tnl.tbl[i].in_use && + (all || hw->tnl.tbl[i].port == port)) + count++; + + if (!count) { + status = ICE_ERR_PARAM; + goto ice_destroy_tunnel_end; + } + + /* size of section - there is at least one entry */ + size = struct_size(sect_rx, tcam, count - 1); + + bld = ice_pkg_buf_alloc(hw); + if (!bld) { + status = ICE_ERR_NO_MEMORY; + goto ice_destroy_tunnel_end; + } + + /* allocate 2 sections, one for Rx parser, one for Tx parser */ + if (ice_pkg_buf_reserve_section(bld, 2)) + goto ice_destroy_tunnel_err; + + sect_rx = ice_pkg_buf_alloc_section(bld, ICE_SID_RXPARSER_BOOST_TCAM, + size); + if (!sect_rx) + goto ice_destroy_tunnel_err; + sect_rx->count = cpu_to_le16(1); + + sect_tx = ice_pkg_buf_alloc_section(bld, ICE_SID_TXPARSER_BOOST_TCAM, + size); + if (!sect_tx) + goto ice_destroy_tunnel_err; + sect_tx->count = cpu_to_le16(1); + + /* copy original boost entry to update package buffer, one copy to Rx + * section, another copy to the Tx section + */ + for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++) + if (hw->tnl.tbl[i].valid && hw->tnl.tbl[i].in_use && + (all || hw->tnl.tbl[i].port == port)) { + memcpy(sect_rx->tcam + i, hw->tnl.tbl[i].boost_entry, + sizeof(*sect_rx->tcam)); + memcpy(sect_tx->tcam + i, hw->tnl.tbl[i].boost_entry, + sizeof(*sect_tx->tcam)); + hw->tnl.tbl[i].marked = true; + } + + status = ice_update_pkg(hw, ice_pkg_buf(bld), 1); + if (!status) + for (i = 0; i < hw->tnl.count && + i < ICE_TUNNEL_MAX_ENTRIES; i++) + if (hw->tnl.tbl[i].marked) { + hw->tnl.tbl[i].ref = 0; + hw->tnl.tbl[i].port = 0; + hw->tnl.tbl[i].in_use = false; + hw->tnl.tbl[i].marked = false; + } + +ice_destroy_tunnel_err: + ice_pkg_buf_free(hw, bld); + +ice_destroy_tunnel_end: + mutex_unlock(&hw->tnl_lock); + + return status; +} + /* PTG Management */ /** @@ -1807,9 +2329,10 @@ ice_find_prof_id(struct ice_hw *hw, enum ice_block blk, struct ice_fv_word *fv, u8 *prof_id) { struct ice_es *es = &hw->blk[blk].es; - u16 off, i; + u16 off; + u8 i; - for (i = 0; i < es->count; i++) { + for (i = 0; i < (u8)es->count; i++) { off = i * es->fvw; if (memcmp(&es->t[off], fv, es->fvw * sizeof(*fv))) @@ -2939,7 +3462,7 @@ ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[], DECLARE_BITMAP(ptgs_used, ICE_XLT1_CNT); struct ice_prof_map *prof; enum ice_status status; - u32 byte = 0; + u8 byte = 0; u8 prof_id; bitmap_zero(ptgs_used, ICE_XLT1_CNT); @@ -2974,7 +3497,7 @@ ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[], /* build list of ptgs */ while (bytes && prof->ptg_cnt < ICE_MAX_PTG_PER_PROFILE) { - u32 bit; + u8 bit; if (!ptypes[byte]) { bytes--; @@ -3008,7 +3531,7 @@ ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[], break; /* nothing left in byte, then exit */ - m = ~((1 << (bit + 1)) - 1); + m = ~(u8)((1 << (bit + 1)) - 1); if (!(ptypes[byte] & m)) break; } @@ -3705,8 +4228,10 @@ ice_add_prof_id_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, u64 hdl, t->tcam[i].prof_id, t->tcam[i].ptg, vsig, 0, 0, vl_msk, dc_msk, nm_msk); - if (status) + if (status) { + devm_kfree(ice_hw_to_dev(hw), p); goto err_ice_add_prof_id_vsig; + } /* log change */ list_add(&p->list_entry, chg); diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h index c7b5e1a6ea2b..70db213c9fe3 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h @@ -19,6 +19,11 @@ #define ICE_PKG_CNT 4 enum ice_status +ice_create_tunnel(struct ice_hw *hw, enum ice_tunnel_type type, u16 port); +enum ice_status ice_destroy_tunnel(struct ice_hw *hw, u16 port, bool all); +bool ice_tunnel_port_in_use(struct ice_hw *hw, u16 port, u16 *index); + +enum ice_status ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[], struct ice_fv_word *es); enum ice_status diff --git a/drivers/net/ethernet/intel/ice/ice_flex_type.h b/drivers/net/ethernet/intel/ice/ice_flex_type.h index 0fb3fe3ff3ea..249fb66fc230 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_type.h +++ b/drivers/net/ethernet/intel/ice/ice_flex_type.h @@ -149,6 +149,7 @@ struct ice_buf_hdr { #define ICE_SID_CDID_REDIR_RSS 48 #define ICE_SID_RXPARSER_BOOST_TCAM 56 +#define ICE_SID_TXPARSER_BOOST_TCAM 66 #define ICE_SID_XLT0_PE 80 #define ICE_SID_XLT_KEY_BUILDER_PE 81 @@ -291,6 +292,38 @@ struct ice_pkg_enum { void *(*handler)(u32 sect_type, void *section, u32 index, u32 *offset); }; +/* Tunnel enabling */ + +enum ice_tunnel_type { + TNL_VXLAN = 0, + TNL_GENEVE, + TNL_LAST = 0xFF, + TNL_ALL = 0xFF, +}; + +struct ice_tunnel_type_scan { + enum ice_tunnel_type type; + const char *label_prefix; +}; + +struct ice_tunnel_entry { + enum ice_tunnel_type type; + u16 boost_addr; + u16 port; + u16 ref; + struct ice_boost_tcam_entry *boost_entry; + u8 valid; + u8 in_use; + u8 marked; +}; + +#define ICE_TUNNEL_MAX_ENTRIES 16 + +struct ice_tunnel_table { + struct ice_tunnel_entry tbl[ICE_TUNNEL_MAX_ENTRIES]; + u16 count; +}; + struct ice_pkg_es { __le16 count; __le16 offset; diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c index 3de862a3c789..07875db08c3f 100644 --- a/drivers/net/ethernet/intel/ice/ice_flow.c +++ b/drivers/net/ethernet/intel/ice/ice_flow.c @@ -42,7 +42,10 @@ struct ice_flow_field_info ice_flds_info[ICE_FLOW_FIELD_IDX_MAX] = { ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_SCTP, 0, sizeof(__be16)), /* ICE_FLOW_FIELD_IDX_SCTP_DST_PORT */ ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_SCTP, 2, sizeof(__be16)), - + /* GRE */ + /* ICE_FLOW_FIELD_IDX_GRE_KEYID */ + ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GRE, 12, + sizeof_field(struct gre_full_hdr, key)), }; /* Bitmaps indicating relevant packet types for a particular protocol header @@ -134,6 +137,18 @@ static const u32 ice_ptypes_sctp_il[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, }; +/* Packet types for packets with an Outermost/First GRE header */ +static const u32 ice_ptypes_gre_of[] = { + 0x00000000, 0xBFBF7800, 0x000001DF, 0xFEFDE000, + 0x0000017E, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, +}; + /* Manage parameters and info. used during the creation of a flow profile */ struct ice_flow_prof_params { enum ice_block blk; @@ -225,6 +240,12 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params) src = (const unsigned long *)ice_ptypes_sctp_il; bitmap_and(params->ptypes, params->ptypes, src, ICE_FLOW_PTYPE_MAX); + } else if (hdrs & ICE_FLOW_SEG_HDR_GRE) { + if (!i) { + src = (const unsigned long *)ice_ptypes_gre_of; + bitmap_and(params->ptypes, params->ptypes, + src, ICE_FLOW_PTYPE_MAX); + } } } @@ -275,6 +296,9 @@ ice_flow_xtract_fld(struct ice_hw *hw, struct ice_flow_prof_params *params, case ICE_FLOW_FIELD_IDX_SCTP_DST_PORT: prot_id = ICE_PROT_SCTP_IL; break; + case ICE_FLOW_FIELD_IDX_GRE_KEYID: + prot_id = ICE_PROT_GRE_OF; + break; default: return ICE_ERR_NOT_IMPL; } @@ -945,6 +969,7 @@ ice_add_rss_list(struct ice_hw *hw, u16 vsi_handle, struct ice_flow_prof *prof) #define ICE_FLOW_PROF_ENCAP_M (BIT_ULL(ICE_FLOW_PROF_ENCAP_S)) #define ICE_RSS_OUTER_HEADERS 1 +#define ICE_RSS_INNER_HEADERS 2 /* Flow profile ID format: * [0:31] - Packet match fields @@ -1085,6 +1110,9 @@ ice_add_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds, mutex_lock(&hw->rss_locks); status = ice_add_rss_cfg_sync(hw, vsi_handle, hashed_flds, addl_hdrs, ICE_RSS_OUTER_HEADERS); + if (!status) + status = ice_add_rss_cfg_sync(hw, vsi_handle, hashed_flds, + addl_hdrs, ICE_RSS_INNER_HEADERS); mutex_unlock(&hw->rss_locks); return status; @@ -1238,6 +1266,12 @@ enum ice_status ice_replay_rss_cfg(struct ice_hw *hw, u16 vsi_handle) ICE_RSS_OUTER_HEADERS); if (status) break; + status = ice_add_rss_cfg_sync(hw, vsi_handle, + r->hashed_flds, + r->packet_hdr, + ICE_RSS_INNER_HEADERS); + if (status) + break; } } mutex_unlock(&hw->rss_locks); diff --git a/drivers/net/ethernet/intel/ice/ice_flow.h b/drivers/net/ethernet/intel/ice/ice_flow.h index 5558627bd5eb..00f2b7a9feed 100644 --- a/drivers/net/ethernet/intel/ice/ice_flow.h +++ b/drivers/net/ethernet/intel/ice/ice_flow.h @@ -43,6 +43,7 @@ enum ice_flow_seg_hdr { ICE_FLOW_SEG_HDR_TCP = 0x00000040, ICE_FLOW_SEG_HDR_UDP = 0x00000080, ICE_FLOW_SEG_HDR_SCTP = 0x00000100, + ICE_FLOW_SEG_HDR_GRE = 0x00000200, }; enum ice_flow_field { @@ -58,6 +59,8 @@ enum ice_flow_field { ICE_FLOW_FIELD_IDX_UDP_DST_PORT, ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT, ICE_FLOW_FIELD_IDX_SCTP_DST_PORT, + /* GRE */ + ICE_FLOW_FIELD_IDX_GRE_KEYID, /* The total number of enums must not exceed 64 */ ICE_FLOW_FIELD_IDX_MAX }; diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.c b/drivers/net/ethernet/intel/ice/ice_fltr.c new file mode 100644 index 000000000000..2418d4fff037 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_fltr.c @@ -0,0 +1,397 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2018-2020, Intel Corporation. */ + +#include "ice.h" +#include "ice_fltr.h" + +/** + * ice_fltr_free_list - free filter lists helper + * @dev: pointer to the device struct + * @h: pointer to the list head to be freed + * + * Helper function to free filter lists previously created using + * ice_fltr_add_mac_to_list + */ +void ice_fltr_free_list(struct device *dev, struct list_head *h) +{ + struct ice_fltr_list_entry *e, *tmp; + + list_for_each_entry_safe(e, tmp, h, list_entry) { + list_del(&e->list_entry); + devm_kfree(dev, e); + } +} + +/** + * ice_fltr_add_entry_to_list - allocate and add filter entry to list + * @dev: pointer to device needed by alloc function + * @info: filter info struct that gets added to the passed in list + * @list: pointer to the list which contains MAC filters entry + */ +static int +ice_fltr_add_entry_to_list(struct device *dev, struct ice_fltr_info *info, + struct list_head *list) +{ + struct ice_fltr_list_entry *entry; + + entry = devm_kzalloc(dev, sizeof(*entry), GFP_ATOMIC); + if (!entry) + return -ENOMEM; + + entry->fltr_info = *info; + + INIT_LIST_HEAD(&entry->list_entry); + list_add(&entry->list_entry, list); + + return 0; +} + +/** + * ice_fltr_add_mac_list - add list of MAC filters + * @vsi: pointer to VSI struct + * @list: list of filters + */ +enum ice_status +ice_fltr_add_mac_list(struct ice_vsi *vsi, struct list_head *list) +{ + return ice_add_mac(&vsi->back->hw, list); +} + +/** + * ice_fltr_remove_mac_list - remove list of MAC filters + * @vsi: pointer to VSI struct + * @list: list of filters + */ +enum ice_status +ice_fltr_remove_mac_list(struct ice_vsi *vsi, struct list_head *list) +{ + return ice_remove_mac(&vsi->back->hw, list); +} + +/** + * ice_fltr_add_vlan_list - add list of VLAN filters + * @vsi: pointer to VSI struct + * @list: list of filters + */ +static enum ice_status +ice_fltr_add_vlan_list(struct ice_vsi *vsi, struct list_head *list) +{ + return ice_add_vlan(&vsi->back->hw, list); +} + +/** + * ice_fltr_remove_vlan_list - remove list of VLAN filters + * @vsi: pointer to VSI struct + * @list: list of filters + */ +static enum ice_status +ice_fltr_remove_vlan_list(struct ice_vsi *vsi, struct list_head *list) +{ + return ice_remove_vlan(&vsi->back->hw, list); +} + +/** + * ice_fltr_add_eth_list - add list of ethertype filters + * @vsi: pointer to VSI struct + * @list: list of filters + */ +static enum ice_status +ice_fltr_add_eth_list(struct ice_vsi *vsi, struct list_head *list) +{ + return ice_add_eth_mac(&vsi->back->hw, list); +} + +/** + * ice_fltr_remove_eth_list - remove list of ethertype filters + * @vsi: pointer to VSI struct + * @list: list of filters + */ +static enum ice_status +ice_fltr_remove_eth_list(struct ice_vsi *vsi, struct list_head *list) +{ + return ice_remove_eth_mac(&vsi->back->hw, list); +} + +/** + * ice_fltr_remove_all - remove all filters associated with VSI + * @vsi: pointer to VSI struct + */ +void ice_fltr_remove_all(struct ice_vsi *vsi) +{ + ice_remove_vsi_fltr(&vsi->back->hw, vsi->idx); +} + +/** + * ice_fltr_add_mac_to_list - add MAC filter info to exsisting list + * @vsi: pointer to VSI struct + * @list: list to add filter info to + * @mac: MAC address to add + * @action: filter action + */ +int +ice_fltr_add_mac_to_list(struct ice_vsi *vsi, struct list_head *list, + const u8 *mac, enum ice_sw_fwd_act_type action) +{ + struct ice_fltr_info info = { 0 }; + + info.flag = ICE_FLTR_TX; + info.src_id = ICE_SRC_ID_VSI; + info.lkup_type = ICE_SW_LKUP_MAC; + info.fltr_act = action; + info.vsi_handle = vsi->idx; + + ether_addr_copy(info.l_data.mac.mac_addr, mac); + + return ice_fltr_add_entry_to_list(ice_pf_to_dev(vsi->back), &info, + list); +} + +/** + * ice_fltr_add_vlan_to_list - add VLAN filter info to exsisting list + * @vsi: pointer to VSI struct + * @list: list to add filter info to + * @vlan_id: VLAN ID to add + * @action: filter action + */ +static int +ice_fltr_add_vlan_to_list(struct ice_vsi *vsi, struct list_head *list, + u16 vlan_id, enum ice_sw_fwd_act_type action) +{ + struct ice_fltr_info info = { 0 }; + + info.flag = ICE_FLTR_TX; + info.src_id = ICE_SRC_ID_VSI; + info.lkup_type = ICE_SW_LKUP_VLAN; + info.fltr_act = action; + info.vsi_handle = vsi->idx; + info.l_data.vlan.vlan_id = vlan_id; + + return ice_fltr_add_entry_to_list(ice_pf_to_dev(vsi->back), &info, + list); +} + +/** + * ice_fltr_add_eth_to_list - add ethertype filter info to exsisting list + * @vsi: pointer to VSI struct + * @list: list to add filter info to + * @ethertype: ethertype of packet that matches filter + * @flag: filter direction, Tx or Rx + * @action: filter action + */ +static int +ice_fltr_add_eth_to_list(struct ice_vsi *vsi, struct list_head *list, + u16 ethertype, u16 flag, + enum ice_sw_fwd_act_type action) +{ + struct ice_fltr_info info = { 0 }; + + info.flag = flag; + info.lkup_type = ICE_SW_LKUP_ETHERTYPE; + info.fltr_act = action; + info.vsi_handle = vsi->idx; + info.l_data.ethertype_mac.ethertype = ethertype; + + if (flag == ICE_FLTR_TX) + info.src_id = ICE_SRC_ID_VSI; + else + info.src_id = ICE_SRC_ID_LPORT; + + return ice_fltr_add_entry_to_list(ice_pf_to_dev(vsi->back), &info, + list); +} + +/** + * ice_fltr_prepare_mac - add or remove MAC rule + * @vsi: pointer to VSI struct + * @mac: MAC address to add + * @action: action to be performed on filter match + * @mac_action: pointer to add or remove MAC function + */ +static enum ice_status +ice_fltr_prepare_mac(struct ice_vsi *vsi, const u8 *mac, + enum ice_sw_fwd_act_type action, + enum ice_status (*mac_action)(struct ice_vsi *, + struct list_head *)) +{ + enum ice_status result; + LIST_HEAD(tmp_list); + + if (ice_fltr_add_mac_to_list(vsi, &tmp_list, mac, action)) { + ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list); + return ICE_ERR_NO_MEMORY; + } + + result = mac_action(vsi, &tmp_list); + ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list); + return result; +} + +/** + * ice_fltr_prepare_mac_and_broadcast - add or remove MAC and broadcast filter + * @vsi: pointer to VSI struct + * @mac: MAC address to add + * @action: action to be performed on filter match + * @mac_action: pointer to add or remove MAC function + */ +static enum ice_status +ice_fltr_prepare_mac_and_broadcast(struct ice_vsi *vsi, const u8 *mac, + enum ice_sw_fwd_act_type action, + enum ice_status(*mac_action) + (struct ice_vsi *, struct list_head *)) +{ + u8 broadcast[ETH_ALEN]; + enum ice_status result; + LIST_HEAD(tmp_list); + + eth_broadcast_addr(broadcast); + if (ice_fltr_add_mac_to_list(vsi, &tmp_list, mac, action) || + ice_fltr_add_mac_to_list(vsi, &tmp_list, broadcast, action)) { + ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list); + return ICE_ERR_NO_MEMORY; + } + + result = mac_action(vsi, &tmp_list); + ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list); + return result; +} + +/** + * ice_fltr_prepare_vlan - add or remove VLAN filter + * @vsi: pointer to VSI struct + * @vlan_id: VLAN ID to add + * @action: action to be performed on filter match + * @vlan_action: pointer to add or remove VLAN function + */ +static enum ice_status +ice_fltr_prepare_vlan(struct ice_vsi *vsi, u16 vlan_id, + enum ice_sw_fwd_act_type action, + enum ice_status (*vlan_action)(struct ice_vsi *, + struct list_head *)) +{ + enum ice_status result; + LIST_HEAD(tmp_list); + + if (ice_fltr_add_vlan_to_list(vsi, &tmp_list, vlan_id, action)) + return ICE_ERR_NO_MEMORY; + + result = vlan_action(vsi, &tmp_list); + ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list); + return result; +} + +/** + * ice_fltr_prepare_eth - add or remove ethertype filter + * @vsi: pointer to VSI struct + * @ethertype: ethertype of packet to be filtered + * @flag: direction of packet, Tx or Rx + * @action: action to be performed on filter match + * @eth_action: pointer to add or remove ethertype function + */ +static enum ice_status +ice_fltr_prepare_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag, + enum ice_sw_fwd_act_type action, + enum ice_status (*eth_action)(struct ice_vsi *, + struct list_head *)) +{ + enum ice_status result; + LIST_HEAD(tmp_list); + + if (ice_fltr_add_eth_to_list(vsi, &tmp_list, ethertype, flag, action)) + return ICE_ERR_NO_MEMORY; + + result = eth_action(vsi, &tmp_list); + ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list); + return result; +} + +/** + * ice_fltr_add_mac - add single MAC filter + * @vsi: pointer to VSI struct + * @mac: MAC to add + * @action: action to be performed on filter match + */ +enum ice_status ice_fltr_add_mac(struct ice_vsi *vsi, const u8 *mac, + enum ice_sw_fwd_act_type action) +{ + return ice_fltr_prepare_mac(vsi, mac, action, ice_fltr_add_mac_list); +} + +/** + * ice_fltr_add_mac_and_broadcast - add single MAC and broadcast + * @vsi: pointer to VSI struct + * @mac: MAC to add + * @action: action to be performed on filter match + */ +enum ice_status +ice_fltr_add_mac_and_broadcast(struct ice_vsi *vsi, const u8 *mac, + enum ice_sw_fwd_act_type action) +{ + return ice_fltr_prepare_mac_and_broadcast(vsi, mac, action, + ice_fltr_add_mac_list); +} + +/** + * ice_fltr_remove_mac - remove MAC filter + * @vsi: pointer to VSI struct + * @mac: filter MAC to remove + * @action: action to remove + */ +enum ice_status ice_fltr_remove_mac(struct ice_vsi *vsi, const u8 *mac, + enum ice_sw_fwd_act_type action) +{ + return ice_fltr_prepare_mac(vsi, mac, action, ice_fltr_remove_mac_list); +} + +/** + * ice_fltr_add_vlan - add single VLAN filter + * @vsi: pointer to VSI struct + * @vlan_id: VLAN ID to add + * @action: action to be performed on filter match + */ +enum ice_status ice_fltr_add_vlan(struct ice_vsi *vsi, u16 vlan_id, + enum ice_sw_fwd_act_type action) +{ + return ice_fltr_prepare_vlan(vsi, vlan_id, action, + ice_fltr_add_vlan_list); +} + +/** + * ice_fltr_remove_vlan - remove VLAN filter + * @vsi: pointer to VSI struct + * @vlan_id: filter VLAN to remove + * @action: action to remove + */ +enum ice_status ice_fltr_remove_vlan(struct ice_vsi *vsi, u16 vlan_id, + enum ice_sw_fwd_act_type action) +{ + return ice_fltr_prepare_vlan(vsi, vlan_id, action, + ice_fltr_remove_vlan_list); +} + +/** + * ice_fltr_add_eth - add specyfic ethertype filter + * @vsi: pointer to VSI struct + * @ethertype: ethertype of filter + * @flag: direction of packet to be filtered, Tx or Rx + * @action: action to be performed on filter match + */ +enum ice_status ice_fltr_add_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag, + enum ice_sw_fwd_act_type action) +{ + return ice_fltr_prepare_eth(vsi, ethertype, flag, action, + ice_fltr_add_eth_list); +} + +/** + * ice_fltr_remove_eth - remove ethertype filter + * @vsi: pointer to VSI struct + * @ethertype: ethertype of filter + * @flag: direction of filter + * @action: action to remove + */ +enum ice_status ice_fltr_remove_eth(struct ice_vsi *vsi, u16 ethertype, + u16 flag, enum ice_sw_fwd_act_type action) +{ + return ice_fltr_prepare_eth(vsi, ethertype, flag, action, + ice_fltr_remove_eth_list); +} diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.h b/drivers/net/ethernet/intel/ice/ice_fltr.h new file mode 100644 index 000000000000..361cb4da9b43 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_fltr.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2018-2020, Intel Corporation. */ + +#ifndef _ICE_FLTR_H_ +#define _ICE_FLTR_H_ + +void ice_fltr_free_list(struct device *dev, struct list_head *h); +enum ice_status +ice_fltr_add_mac_to_list(struct ice_vsi *vsi, struct list_head *list, + const u8 *mac, enum ice_sw_fwd_act_type action); +enum ice_status +ice_fltr_add_mac(struct ice_vsi *vsi, const u8 *mac, + enum ice_sw_fwd_act_type action); +enum ice_status +ice_fltr_add_mac_and_broadcast(struct ice_vsi *vsi, const u8 *mac, + enum ice_sw_fwd_act_type action); +enum ice_status +ice_fltr_add_mac_list(struct ice_vsi *vsi, struct list_head *list); +enum ice_status +ice_fltr_remove_mac(struct ice_vsi *vsi, const u8 *mac, + enum ice_sw_fwd_act_type action); +enum ice_status +ice_fltr_remove_mac_list(struct ice_vsi *vsi, struct list_head *list); + +enum ice_status +ice_fltr_add_vlan(struct ice_vsi *vsi, u16 vid, + enum ice_sw_fwd_act_type action); +enum ice_status +ice_fltr_remove_vlan(struct ice_vsi *vsi, u16 vid, + enum ice_sw_fwd_act_type action); + +enum ice_status +ice_fltr_add_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag, + enum ice_sw_fwd_act_type action); +enum ice_status +ice_fltr_remove_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag, + enum ice_sw_fwd_act_type action); +void ice_fltr_remove_all(struct ice_vsi *vsi); +#endif diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 1d37a9f02c1c..bc48eda67c81 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -58,6 +58,7 @@ #define PRTDCB_GENS 0x00083020 #define PRTDCB_GENS_DCBX_STATUS_S 0 #define PRTDCB_GENS_DCBX_STATUS_M ICE_M(0x7, 0) +#define PRTDCB_TUP2TC 0x001D26C0 /* Reset Source: CORER */ #define GL_PREEXT_L2_PMASK0(_i) (0x0020F0FC + ((_i) * 4)) #define GL_PREEXT_L2_PMASK1(_i) (0x0020F108 + ((_i) * 4)) #define GLFLXP_RXDID_FLX_WRD_0(_i) (0x0045c800 + ((_i) * 4)) diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index 878e125d8b42..5d61acdec7ed 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -262,6 +262,12 @@ enum ice_rx_flex_desc_status_error_0_bits { ICE_RX_FLEX_DESC_STATUS0_LAST /* this entry must be last!!! */ }; +enum ice_rx_flex_desc_status_error_1_bits { + /* Note: These are predefined bit offsets */ + ICE_RX_FLEX_DESC_STATUS1_NAT_S = 4, + ICE_RX_FLEX_DESC_STATUS1_LAST /* this entry must be last!!! */ +}; + #define ICE_RXQ_CTX_SIZE_DWORDS 8 #define ICE_RXQ_CTX_SZ (ICE_RXQ_CTX_SIZE_DWORDS * sizeof(u32)) #define ICE_TX_CMPLTNQ_CTX_SIZE_DWORDS 22 @@ -413,6 +419,25 @@ enum ice_tx_ctx_desc_cmd_bits { ICE_TX_CTX_DESC_RESERVED = 0x40 }; +enum ice_tx_ctx_desc_eipt_offload { + ICE_TX_CTX_EIPT_NONE = 0x0, + ICE_TX_CTX_EIPT_IPV6 = 0x1, + ICE_TX_CTX_EIPT_IPV4_NO_CSUM = 0x2, + ICE_TX_CTX_EIPT_IPV4 = 0x3 +}; + +#define ICE_TXD_CTX_QW0_EIPLEN_S 2 + +#define ICE_TXD_CTX_QW0_L4TUNT_S 9 + +#define ICE_TXD_CTX_UDP_TUNNELING BIT_ULL(ICE_TXD_CTX_QW0_L4TUNT_S) +#define ICE_TXD_CTX_GRE_TUNNELING (0x2ULL << ICE_TXD_CTX_QW0_L4TUNT_S) + +#define ICE_TXD_CTX_QW0_NATLEN_S 12 + +#define ICE_TXD_CTX_QW0_L4T_CS_S 23 +#define ICE_TXD_CTX_QW0_L4T_CS_M BIT_ULL(ICE_TXD_CTX_QW0_L4T_CS_S) + #define ICE_LAN_TXQ_MAX_QGRPS 127 #define ICE_LAN_TXQ_MAX_QDIS 1023 diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 2f256bf45efc..c73c977f6967 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -5,6 +5,7 @@ #include "ice_base.h" #include "ice_flow.h" #include "ice_lib.h" +#include "ice_fltr.h" #include "ice_dcb_lib.h" /** @@ -37,7 +38,8 @@ const char *ice_vsi_type_str(enum ice_vsi_type vsi_type) */ static int ice_vsi_ctrl_all_rx_rings(struct ice_vsi *vsi, bool ena) { - int i, ret = 0; + int ret = 0; + u16 i; for (i = 0; i < vsi->num_rxq; i++) ice_vsi_ctrl_one_rx_ring(vsi, ena, i, false); @@ -248,8 +250,8 @@ void ice_vsi_delete(struct ice_vsi *vsi) status = ice_free_vsi(&pf->hw, vsi->idx, ctxt, false, NULL); if (status) - dev_err(ice_pf_to_dev(pf), "Failed to delete VSI %i in FW - error: %d\n", - vsi->vsi_num, status); + dev_err(ice_pf_to_dev(pf), "Failed to delete VSI %i in FW - error: %s\n", + vsi->vsi_num, ice_stat_str(status)); kfree(ctxt); } @@ -521,8 +523,8 @@ static void ice_vsi_clean_rss_flow_fld(struct ice_vsi *vsi) status = ice_rem_vsi_rss_cfg(&pf->hw, vsi->idx); if (status) - dev_dbg(ice_pf_to_dev(pf), "ice_rem_vsi_rss_cfg failed for vsi = %d, error = %d\n", - vsi->vsi_num, status); + dev_dbg(ice_pf_to_dev(pf), "ice_rem_vsi_rss_cfg failed for vsi = %d, error = %s\n", + vsi->vsi_num, ice_stat_str(status)); } /** @@ -565,8 +567,8 @@ static void ice_vsi_set_rss_params(struct ice_vsi *vsi) switch (vsi->type) { case ICE_VSI_PF: /* PF VSI will inherit RSS instance of PF */ - vsi->rss_table_size = cap->rss_table_size; - vsi->rss_size = min_t(int, num_online_cpus(), + vsi->rss_table_size = (u16)cap->rss_table_size; + vsi->rss_size = min_t(u16, num_online_cpus(), BIT(cap->rss_table_entry_width)); vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF; break; @@ -684,15 +686,15 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) max_rss = ICE_MAX_LG_RSS_QS; else max_rss = ICE_MAX_RSS_QS_PER_VF; - qcount_rx = min_t(int, rx_numq_tc, max_rss); + qcount_rx = min_t(u16, rx_numq_tc, max_rss); if (!vsi->req_rxq) - qcount_rx = min_t(int, qcount_rx, + qcount_rx = min_t(u16, qcount_rx, vsi->rss_size); } } /* find the (rounded up) power-of-2 of qcount */ - pow = order_base_2(qcount_rx); + pow = (u16)order_base_2(qcount_rx); ice_for_each_traffic_class(i) { if (!(vsi->tc_cfg.ena_tc & BIT(i))) { @@ -941,7 +943,7 @@ int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id) */ static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id) { - int start = 0, end = 0; + u16 start = 0, end = 0; if (needed > res->end) return -ENOMEM; @@ -1024,6 +1026,7 @@ static int ice_vsi_setup_vector_base(struct ice_vsi *vsi) struct ice_pf *pf = vsi->back; struct device *dev; u16 num_q_vectors; + int base; dev = ice_pf_to_dev(pf); /* SRIOV doesn't grab irq_tracker entries for each VSI */ @@ -1038,14 +1041,15 @@ static int ice_vsi_setup_vector_base(struct ice_vsi *vsi) num_q_vectors = vsi->num_q_vectors; /* reserve slots from OS requested IRQs */ - vsi->base_vector = ice_get_res(pf, pf->irq_tracker, num_q_vectors, - vsi->idx); - if (vsi->base_vector < 0) { + base = ice_get_res(pf, pf->irq_tracker, num_q_vectors, vsi->idx); + + if (base < 0) { dev_err(dev, "%d MSI-X interrupts available. %s %d failed to get %d MSI-X vectors\n", ice_get_free_res_count(pf->irq_tracker), ice_vsi_type_str(vsi->type), vsi->idx, num_q_vectors); return -ENOENT; } + vsi->base_vector = (u16)base; pf->num_avail_sw_msix -= num_q_vectors; return 0; @@ -1085,7 +1089,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; struct device *dev; - int i; + u16 i; dev = ice_pf_to_dev(pf); /* Allocate Tx rings */ @@ -1178,7 +1182,7 @@ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi) u8 *lut; dev = ice_pf_to_dev(pf); - vsi->rss_size = min_t(int, vsi->rss_size, vsi->num_rxq); + vsi->rss_size = min_t(u16, vsi->rss_size, vsi->num_rxq); lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); if (!lut) @@ -1193,7 +1197,8 @@ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi) vsi->rss_table_size); if (status) { - dev_err(dev, "set_rss_lut failed, error %d\n", status); + dev_err(dev, "set_rss_lut failed, error %s\n", + ice_stat_str(status)); err = -EIO; goto ice_vsi_cfg_rss_exit; } @@ -1215,7 +1220,8 @@ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi) status = ice_aq_set_rss_key(&pf->hw, vsi->idx, key); if (status) { - dev_err(dev, "set_rss_key failed, error %d\n", status); + dev_err(dev, "set_rss_key failed, error %s\n", + ice_stat_str(status)); err = -EIO; } @@ -1248,8 +1254,8 @@ static void ice_vsi_set_vf_rss_flow_fld(struct ice_vsi *vsi) status = ice_add_avf_rss_cfg(&pf->hw, vsi->idx, ICE_DEFAULT_RSS_HENA); if (status) - dev_dbg(dev, "ice_add_avf_rss_cfg failed for vsi = %d, error = %d\n", - vsi->vsi_num, status); + dev_dbg(dev, "ice_add_avf_rss_cfg failed for vsi = %d, error = %s\n", + vsi->vsi_num, ice_stat_str(status)); } /** @@ -1281,91 +1287,57 @@ static void ice_vsi_set_rss_flow_fld(struct ice_vsi *vsi) status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV4, ICE_FLOW_SEG_HDR_IPV4); if (status) - dev_dbg(dev, "ice_add_rss_cfg failed for ipv4 flow, vsi = %d, error = %d\n", - vsi_num, status); + dev_dbg(dev, "ice_add_rss_cfg failed for ipv4 flow, vsi = %d, error = %s\n", + vsi_num, ice_stat_str(status)); /* configure RSS for IPv6 with input set IPv6 src/dst */ status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV6, ICE_FLOW_SEG_HDR_IPV6); if (status) - dev_dbg(dev, "ice_add_rss_cfg failed for ipv6 flow, vsi = %d, error = %d\n", - vsi_num, status); + dev_dbg(dev, "ice_add_rss_cfg failed for ipv6 flow, vsi = %d, error = %s\n", + vsi_num, ice_stat_str(status)); /* configure RSS for tcp4 with input set IP src/dst, TCP src/dst */ status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_TCP_IPV4, ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV4); if (status) - dev_dbg(dev, "ice_add_rss_cfg failed for tcp4 flow, vsi = %d, error = %d\n", - vsi_num, status); + dev_dbg(dev, "ice_add_rss_cfg failed for tcp4 flow, vsi = %d, error = %s\n", + vsi_num, ice_stat_str(status)); /* configure RSS for udp4 with input set IP src/dst, UDP src/dst */ status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_UDP_IPV4, ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_IPV4); if (status) - dev_dbg(dev, "ice_add_rss_cfg failed for udp4 flow, vsi = %d, error = %d\n", - vsi_num, status); + dev_dbg(dev, "ice_add_rss_cfg failed for udp4 flow, vsi = %d, error = %s\n", + vsi_num, ice_stat_str(status)); /* configure RSS for sctp4 with input set IP src/dst */ status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV4, ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV4); if (status) - dev_dbg(dev, "ice_add_rss_cfg failed for sctp4 flow, vsi = %d, error = %d\n", - vsi_num, status); + dev_dbg(dev, "ice_add_rss_cfg failed for sctp4 flow, vsi = %d, error = %s\n", + vsi_num, ice_stat_str(status)); /* configure RSS for tcp6 with input set IPv6 src/dst, TCP src/dst */ status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_TCP_IPV6, ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV6); if (status) - dev_dbg(dev, "ice_add_rss_cfg failed for tcp6 flow, vsi = %d, error = %d\n", - vsi_num, status); + dev_dbg(dev, "ice_add_rss_cfg failed for tcp6 flow, vsi = %d, error = %s\n", + vsi_num, ice_stat_str(status)); /* configure RSS for udp6 with input set IPv6 src/dst, UDP src/dst */ status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_UDP_IPV6, ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_IPV6); if (status) - dev_dbg(dev, "ice_add_rss_cfg failed for udp6 flow, vsi = %d, error = %d\n", - vsi_num, status); + dev_dbg(dev, "ice_add_rss_cfg failed for udp6 flow, vsi = %d, error = %s\n", + vsi_num, ice_stat_str(status)); /* configure RSS for sctp6 with input set IPv6 src/dst */ status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV6, ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV6); if (status) - dev_dbg(dev, "ice_add_rss_cfg failed for sctp6 flow, vsi = %d, error = %d\n", - vsi_num, status); -} - -/** - * ice_add_mac_to_list - Add a MAC address filter entry to the list - * @vsi: the VSI to be forwarded to - * @add_list: pointer to the list which contains MAC filter entries - * @macaddr: the MAC address to be added. - * - * Adds MAC address filter entry to the temp list - * - * Returns 0 on success or ENOMEM on failure. - */ -int -ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list, - const u8 *macaddr) -{ - struct ice_fltr_list_entry *tmp; - struct ice_pf *pf = vsi->back; - - tmp = devm_kzalloc(ice_pf_to_dev(pf), sizeof(*tmp), GFP_ATOMIC); - if (!tmp) - return -ENOMEM; - - tmp->fltr_info.flag = ICE_FLTR_TX; - tmp->fltr_info.src_id = ICE_SRC_ID_VSI; - tmp->fltr_info.lkup_type = ICE_SW_LKUP_MAC; - tmp->fltr_info.fltr_act = ICE_FWD_TO_VSI; - tmp->fltr_info.vsi_handle = vsi->idx; - ether_addr_copy(tmp->fltr_info.l_data.mac.mac_addr, macaddr); - - INIT_LIST_HEAD(&tmp->list_entry); - list_add(&tmp->list_entry, add_list); - - return 0; + dev_dbg(dev, "ice_add_rss_cfg failed for sctp6 flow, vsi = %d, error = %s\n", + vsi_num, ice_stat_str(status)); } /** @@ -1415,54 +1387,21 @@ void ice_update_eth_stats(struct ice_vsi *vsi) } /** - * ice_free_fltr_list - free filter lists helper - * @dev: pointer to the device struct - * @h: pointer to the list head to be freed - * - * Helper function to free filter lists previously created using - * ice_add_mac_to_list - */ -void ice_free_fltr_list(struct device *dev, struct list_head *h) -{ - struct ice_fltr_list_entry *e, *tmp; - - list_for_each_entry_safe(e, tmp, h, list_entry) { - list_del(&e->list_entry); - devm_kfree(dev, e); - } -} - -/** * ice_vsi_add_vlan - Add VSI membership for given VLAN * @vsi: the VSI being configured * @vid: VLAN ID to be added + * @action: filter action to be performed on match */ -int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid) +int +ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid, enum ice_sw_fwd_act_type action) { - struct ice_fltr_list_entry *tmp; struct ice_pf *pf = vsi->back; - LIST_HEAD(tmp_add_list); - enum ice_status status; struct device *dev; int err = 0; dev = ice_pf_to_dev(pf); - tmp = devm_kzalloc(dev, sizeof(*tmp), GFP_KERNEL); - if (!tmp) - return -ENOMEM; - - tmp->fltr_info.lkup_type = ICE_SW_LKUP_VLAN; - tmp->fltr_info.fltr_act = ICE_FWD_TO_VSI; - tmp->fltr_info.flag = ICE_FLTR_TX; - tmp->fltr_info.src_id = ICE_SRC_ID_VSI; - tmp->fltr_info.vsi_handle = vsi->idx; - tmp->fltr_info.l_data.vlan.vlan_id = vid; - INIT_LIST_HEAD(&tmp->list_entry); - list_add(&tmp->list_entry, &tmp_add_list); - - status = ice_add_vlan(&pf->hw, &tmp_add_list); - if (!status) { + if (!ice_fltr_add_vlan(vsi, vid, action)) { vsi->num_vlan++; } else { err = -ENODEV; @@ -1470,7 +1409,6 @@ int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid) vsi->vsi_num); } - ice_free_fltr_list(dev, &tmp_add_list); return err; } @@ -1483,41 +1421,25 @@ int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid) */ int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid) { - struct ice_fltr_list_entry *list; struct ice_pf *pf = vsi->back; - LIST_HEAD(tmp_add_list); enum ice_status status; struct device *dev; int err = 0; dev = ice_pf_to_dev(pf); - list = devm_kzalloc(dev, sizeof(*list), GFP_KERNEL); - if (!list) - return -ENOMEM; - - list->fltr_info.lkup_type = ICE_SW_LKUP_VLAN; - list->fltr_info.vsi_handle = vsi->idx; - list->fltr_info.fltr_act = ICE_FWD_TO_VSI; - list->fltr_info.l_data.vlan.vlan_id = vid; - list->fltr_info.flag = ICE_FLTR_TX; - list->fltr_info.src_id = ICE_SRC_ID_VSI; - INIT_LIST_HEAD(&list->list_entry); - list_add(&list->list_entry, &tmp_add_list); - - status = ice_remove_vlan(&pf->hw, &tmp_add_list); + status = ice_fltr_remove_vlan(vsi, vid, ICE_FWD_TO_VSI); if (!status) { vsi->num_vlan--; } else if (status == ICE_ERR_DOES_NOT_EXIST) { - dev_dbg(dev, "Failed to remove VLAN %d on VSI %i, it does not exist, status: %d\n", - vid, vsi->vsi_num, status); + dev_dbg(dev, "Failed to remove VLAN %d on VSI %i, it does not exist, status: %s\n", + vid, vsi->vsi_num, ice_stat_str(status)); } else { - dev_err(dev, "Error removing VLAN %d on vsi %i error: %d\n", - vid, vsi->vsi_num, status); + dev_err(dev, "Error removing VLAN %d on vsi %i error: %s\n", + vid, vsi->vsi_num, ice_stat_str(status)); err = -EIO; } - ice_free_fltr_list(dev, &tmp_add_list); return err; } @@ -1671,7 +1593,7 @@ void ice_vsi_cfg_msix(struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; - u32 txq = 0, rxq = 0; + u16 txq = 0, rxq = 0; int i, q; for (i = 0; i < vsi->num_q_vectors; i++) { @@ -1737,8 +1659,9 @@ int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi) status = ice_update_vsi(hw, vsi->idx, ctxt, NULL); if (status) { - dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN insert failed, err %d aq_err %d\n", - status, hw->adminq.sq_last_status); + dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN insert failed, err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); ret = -EIO; goto out; } @@ -1783,8 +1706,9 @@ int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena) status = ice_update_vsi(hw, vsi->idx, ctxt, NULL); if (status) { - dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN strip failed, ena = %d err %d aq_err %d\n", - ena, status, hw->adminq.sq_last_status); + dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN strip failed, ena = %d err %s aq_err %s\n", + ena, ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); ret = -EIO; goto out; } @@ -1922,9 +1846,10 @@ int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc) status = ice_update_vsi(&pf->hw, vsi->idx, ctxt, NULL); if (status) { - netdev_err(vsi->netdev, "%sabling VLAN pruning on VSI handle: %d, VSI HW ID: %d failed, err = %d, aq_err = %d\n", - ena ? "En" : "Dis", vsi->idx, vsi->vsi_num, status, - pf->hw.adminq.sq_last_status); + netdev_err(vsi->netdev, "%sabling VLAN pruning on VSI handle: %d, VSI HW ID: %d failed, err = %s, aq_err = %s\n", + ena ? "En" : "Dis", vsi->idx, vsi->vsi_num, + ice_stat_str(status), + ice_aq_str(pf->hw.adminq.sq_last_status)); goto err_out; } @@ -1991,47 +1916,6 @@ clear_reg_idx: } /** - * ice_vsi_add_rem_eth_mac - Program VSI ethertype based filter with rule - * @vsi: the VSI being configured - * @add_rule: boolean value to add or remove ethertype filter rule - */ -static void -ice_vsi_add_rem_eth_mac(struct ice_vsi *vsi, bool add_rule) -{ - struct ice_fltr_list_entry *list; - struct ice_pf *pf = vsi->back; - LIST_HEAD(tmp_add_list); - enum ice_status status; - struct device *dev; - - dev = ice_pf_to_dev(pf); - list = devm_kzalloc(dev, sizeof(*list), GFP_KERNEL); - if (!list) - return; - - list->fltr_info.lkup_type = ICE_SW_LKUP_ETHERTYPE; - list->fltr_info.fltr_act = ICE_DROP_PACKET; - list->fltr_info.flag = ICE_FLTR_TX; - list->fltr_info.src_id = ICE_SRC_ID_VSI; - list->fltr_info.vsi_handle = vsi->idx; - list->fltr_info.l_data.ethertype_mac.ethertype = vsi->ethtype; - - INIT_LIST_HEAD(&list->list_entry); - list_add(&list->list_entry, &tmp_add_list); - - if (add_rule) - status = ice_add_eth_mac(&pf->hw, &tmp_add_list); - else - status = ice_remove_eth_mac(&pf->hw, &tmp_add_list); - - if (status) - dev_err(dev, "Failure Adding or Removing Ethertype on VSI %i error: %d\n", - vsi->vsi_num, status); - - ice_free_fltr_list(dev, &tmp_add_list); -} - -/** * ice_cfg_sw_lldp - Config switch rules for LLDP packet handling * @vsi: the VSI being configured * @tx: bool to determine Tx or Rx rule @@ -2039,45 +1923,25 @@ ice_vsi_add_rem_eth_mac(struct ice_vsi *vsi, bool add_rule) */ void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create) { - struct ice_fltr_list_entry *list; + enum ice_status (*eth_fltr)(struct ice_vsi *v, u16 type, u16 flag, + enum ice_sw_fwd_act_type act); struct ice_pf *pf = vsi->back; - LIST_HEAD(tmp_add_list); enum ice_status status; struct device *dev; dev = ice_pf_to_dev(pf); - list = devm_kzalloc(dev, sizeof(*list), GFP_KERNEL); - if (!list) - return; + eth_fltr = create ? ice_fltr_add_eth : ice_fltr_remove_eth; - list->fltr_info.lkup_type = ICE_SW_LKUP_ETHERTYPE; - list->fltr_info.vsi_handle = vsi->idx; - list->fltr_info.l_data.ethertype_mac.ethertype = ETH_P_LLDP; - - if (tx) { - list->fltr_info.fltr_act = ICE_DROP_PACKET; - list->fltr_info.flag = ICE_FLTR_TX; - list->fltr_info.src_id = ICE_SRC_ID_VSI; - } else { - list->fltr_info.fltr_act = ICE_FWD_TO_VSI; - list->fltr_info.flag = ICE_FLTR_RX; - list->fltr_info.src_id = ICE_SRC_ID_LPORT; - } - - INIT_LIST_HEAD(&list->list_entry); - list_add(&list->list_entry, &tmp_add_list); - - if (create) - status = ice_add_eth_mac(&pf->hw, &tmp_add_list); + if (tx) + status = eth_fltr(vsi, ETH_P_LLDP, ICE_FLTR_TX, + ICE_DROP_PACKET); else - status = ice_remove_eth_mac(&pf->hw, &tmp_add_list); + status = eth_fltr(vsi, ETH_P_LLDP, ICE_FLTR_RX, ICE_FWD_TO_VSI); if (status) - dev_err(dev, "Fail %s %s LLDP rule on VSI %i error: %d\n", + dev_err(dev, "Fail %s %s LLDP rule on VSI %i error: %s\n", create ? "adding" : "removing", tx ? "TX" : "RX", - vsi->vsi_num, status); - - ice_free_fltr_list(dev, &tmp_add_list); + vsi->vsi_num, ice_stat_str(status)); } /** @@ -2164,7 +2028,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, * so this handles those cases (i.e. adding the PF to a bridge * without the 8021q module loaded). */ - ret = ice_vsi_add_vlan(vsi, 0); + ret = ice_vsi_add_vlan(vsi, 0, ICE_FWD_TO_VSI); if (ret) goto unroll_clear_rings; @@ -2223,8 +2087,8 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, max_txqs); if (status) { - dev_err(dev, "VSI %d failed lan queue config, error %d\n", - vsi->vsi_num, status); + dev_err(dev, "VSI %d failed lan queue config, error %s\n", + vsi->vsi_num, ice_stat_str(status)); goto unroll_vector_base; } @@ -2239,9 +2103,8 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, */ if (!ice_is_safe_mode(pf)) if (vsi->type == ICE_VSI_PF) { - ice_vsi_add_rem_eth_mac(vsi, true); - - /* Tx LLDP packets */ + ice_fltr_add_eth(vsi, ETH_P_PAUSE, ICE_FLTR_TX, + ICE_DROP_PACKET); ice_cfg_sw_lldp(vsi, true, true); } @@ -2558,7 +2421,8 @@ int ice_vsi_release(struct ice_vsi *vsi) if (!ice_is_safe_mode(pf)) { if (vsi->type == ICE_VSI_PF) { - ice_vsi_add_rem_eth_mac(vsi, false); + ice_fltr_remove_eth(vsi, ETH_P_PAUSE, ICE_FLTR_TX, + ICE_DROP_PACKET); ice_cfg_sw_lldp(vsi, true, false); /* The Rx rule will only exist to remove if the LLDP FW * engine is currently stopped @@ -2568,7 +2432,7 @@ int ice_vsi_release(struct ice_vsi *vsi) } } - ice_remove_vsi_fltr(&pf->hw, vsi->idx); + ice_fltr_remove_all(vsi); ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx); ice_vsi_delete(vsi); ice_vsi_free_q_vectors(vsi); @@ -2814,8 +2678,8 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, max_txqs); if (status) { - dev_err(ice_pf_to_dev(pf), "VSI %d failed lan queue config, error %d\n", - vsi->vsi_num, status); + dev_err(ice_pf_to_dev(pf), "VSI %d failed lan queue config, error %s\n", + vsi->vsi_num, ice_stat_str(status)); if (init_vsi) { ret = -EIO; goto err_vectors; @@ -2924,8 +2788,8 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) max_txqs); if (status) { - dev_err(dev, "VSI %d failed TC config, error %d\n", - vsi->vsi_num, status); + dev_err(dev, "VSI %d failed TC config, error %s\n", + vsi->vsi_num, ice_stat_str(status)); ret = -EIO; goto out; } @@ -2985,36 +2849,6 @@ void ice_update_rx_ring_stats(struct ice_ring *rx_ring, u64 pkts, u64 bytes) } /** - * ice_vsi_cfg_mac_fltr - Add or remove a MAC address filter for a VSI - * @vsi: the VSI being configured MAC filter - * @macaddr: the MAC address to be added. - * @set: Add or delete a MAC filter - * - * Adds or removes MAC address filter entry for VF VSI - */ -enum ice_status -ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set) -{ - LIST_HEAD(tmp_add_list); - enum ice_status status; - - /* Update MAC filter list to be added or removed for a VSI */ - if (ice_add_mac_to_list(vsi, &tmp_add_list, macaddr)) { - status = ICE_ERR_NO_MEMORY; - goto cfg_mac_fltr_exit; - } - - if (set) - status = ice_add_mac(&vsi->back->hw, &tmp_add_list); - else - status = ice_remove_mac(&vsi->back->hw, &tmp_add_list); - -cfg_mac_fltr_exit: - ice_free_fltr_list(ice_pf_to_dev(vsi->back), &tmp_add_list); - return status; -} - -/** * ice_is_dflt_vsi_in_use - check if the default forwarding VSI is being used * @sw: switch to check if its default forwarding VSI is free * @@ -3079,8 +2913,8 @@ int ice_set_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi) status = ice_cfg_dflt_vsi(&vsi->back->hw, vsi->idx, true, ICE_FLTR_RX); if (status) { - dev_err(dev, "Failed to set VSI %d as the default forwarding VSI, error %d\n", - vsi->vsi_num, status); + dev_err(dev, "Failed to set VSI %d as the default forwarding VSI, error %s\n", + vsi->vsi_num, ice_stat_str(status)); return -EIO; } @@ -3118,8 +2952,8 @@ int ice_clear_dflt_vsi(struct ice_sw *sw) status = ice_cfg_dflt_vsi(&dflt_vsi->back->hw, dflt_vsi->idx, false, ICE_FLTR_RX); if (status) { - dev_err(dev, "Failed to clear the default forwarding VSI %d, error %d\n", - dflt_vsi->vsi_num, status); + dev_err(dev, "Failed to clear the default forwarding VSI %d, error %s\n", + dflt_vsi->vsi_num, ice_stat_str(status)); return -EIO; } diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 04ca00799364..9746de9b25fe 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -8,12 +8,6 @@ const char *ice_vsi_type_str(enum ice_vsi_type vsi_type); -int -ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list, - const u8 *macaddr); - -void ice_free_fltr_list(struct device *dev, struct list_head *h); - void ice_update_eth_stats(struct ice_vsi *vsi); int ice_vsi_cfg_rxqs(struct ice_vsi *vsi); @@ -22,7 +16,8 @@ int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi); void ice_vsi_cfg_msix(struct ice_vsi *vsi); -int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid); +int +ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid, enum ice_sw_fwd_act_type action); int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 5b190c257124..c3e5c4334e26 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -8,6 +8,7 @@ #include "ice.h" #include "ice_base.h" #include "ice_lib.h" +#include "ice_fltr.h" #include "ice_dcb_lib.h" #include "ice_dcb_nl.h" #include "ice_devlink.h" @@ -133,38 +134,24 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf) static int ice_init_mac_fltr(struct ice_pf *pf) { enum ice_status status; - u8 broadcast[ETH_ALEN]; struct ice_vsi *vsi; + u8 *perm_addr; vsi = ice_get_main_vsi(pf); if (!vsi) return -EINVAL; - /* To add a MAC filter, first add the MAC to a list and then - * pass the list to ice_add_mac. - */ - - /* Add a unicast MAC filter so the VSI can get its packets */ - status = ice_vsi_cfg_mac_fltr(vsi, vsi->port_info->mac.perm_addr, true); - if (status) - goto unregister; - - /* VSI needs to receive broadcast traffic, so add the broadcast - * MAC address to the list as well. - */ - eth_broadcast_addr(broadcast); - status = ice_vsi_cfg_mac_fltr(vsi, broadcast, true); - if (status) - goto unregister; + perm_addr = vsi->port_info->mac.perm_addr; + status = ice_fltr_add_mac_and_broadcast(vsi, perm_addr, ICE_FWD_TO_VSI); + if (!status) + return 0; - return 0; -unregister: /* We aren't useful with no MAC filters, so unregister if we * had an error */ - if (status && vsi->netdev->reg_state == NETREG_REGISTERED) { - dev_err(ice_pf_to_dev(pf), "Could not add MAC filters error %d. Unregistering device\n", - status); + if (vsi->netdev->reg_state == NETREG_REGISTERED) { + dev_err(ice_pf_to_dev(pf), "Could not add MAC filters error %s. Unregistering device\n", + ice_stat_str(status)); unregister_netdev(vsi->netdev); free_netdev(vsi->netdev); vsi->netdev = NULL; @@ -188,7 +175,8 @@ static int ice_add_mac_to_sync_list(struct net_device *netdev, const u8 *addr) struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; - if (ice_add_mac_to_list(vsi, &vsi->tmp_sync_list, addr)) + if (ice_fltr_add_mac_to_list(vsi, &vsi->tmp_sync_list, addr, + ICE_FWD_TO_VSI)) return -EINVAL; return 0; @@ -209,7 +197,8 @@ static int ice_add_mac_to_unsync_list(struct net_device *netdev, const u8 *addr) struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; - if (ice_add_mac_to_list(vsi, &vsi->tmp_unsync_list, addr)) + if (ice_fltr_add_mac_to_list(vsi, &vsi->tmp_unsync_list, addr, + ICE_FWD_TO_VSI)) return -EINVAL; return 0; @@ -307,8 +296,8 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) } /* Remove MAC addresses in the unsync list */ - status = ice_remove_mac(hw, &vsi->tmp_unsync_list); - ice_free_fltr_list(dev, &vsi->tmp_unsync_list); + status = ice_fltr_remove_mac_list(vsi, &vsi->tmp_unsync_list); + ice_fltr_free_list(dev, &vsi->tmp_unsync_list); if (status) { netdev_err(netdev, "Failed to delete MAC filters\n"); /* if we failed because of alloc failures, just bail */ @@ -319,8 +308,8 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) } /* Add MAC addresses in the sync list */ - status = ice_add_mac(hw, &vsi->tmp_sync_list); - ice_free_fltr_list(dev, &vsi->tmp_sync_list); + status = ice_fltr_add_mac_list(vsi, &vsi->tmp_sync_list); + ice_fltr_free_list(dev, &vsi->tmp_sync_list); /* If filter is added successfully or already exists, do not go into * 'if' condition and report it as error. Instead continue processing * rest of the function. @@ -357,7 +346,8 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) vsi->current_netdev_flags &= ~IFF_ALLMULTI; goto out_promisc; } - } else if (!(vsi->current_netdev_flags & IFF_ALLMULTI)) { + } else { + /* !(vsi->current_netdev_flags & IFF_ALLMULTI) */ if (vsi->vlan_ena) promisc_m = ICE_MCAST_VLAN_PROMISC_BITS; else @@ -1017,8 +1007,8 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) if (ret == ICE_ERR_AQ_NO_WORK) break; if (ret) { - dev_err(dev, "%s Receive Queue event error %d\n", qtype, - ret); + dev_err(dev, "%s Receive Queue event error %s\n", qtype, + ice_stat_str(ret)); break; } @@ -1809,8 +1799,8 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog) status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, max_txqs); if (status) { - dev_err(dev, "Failed VSI LAN queue config for XDP, error:%d\n", - status); + dev_err(dev, "Failed VSI LAN queue config for XDP, error: %s\n", + ice_stat_str(status)); goto clear_xdp_rings; } ice_vsi_assign_bpf_prog(vsi, prog); @@ -2137,10 +2127,8 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) } ret = IRQ_HANDLED; - if (!test_bit(__ICE_DOWN, pf->state)) { - ice_service_task_schedule(pf); - ice_irq_dynamic_ena(hw, NULL, NULL); - } + ice_service_task_schedule(pf); + ice_irq_dynamic_ena(hw, NULL, NULL); return ret; } @@ -2247,7 +2235,7 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf) return oicr_idx; pf->num_avail_sw_msix -= 1; - pf->oicr_idx = oicr_idx; + pf->oicr_idx = (u16)oicr_idx; err = devm_request_irq(dev, pf->msix_entries[pf->oicr_idx].vector, ice_misc_intr, 0, pf->int_name, pf); @@ -2342,13 +2330,27 @@ static void ice_set_netdev_features(struct net_device *netdev) NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; - tso_features = NETIF_F_TSO | + tso_features = NETIF_F_TSO | + NETIF_F_TSO_ECN | + NETIF_F_TSO6 | + NETIF_F_GSO_GRE | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_GRE_CSUM | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL | + NETIF_F_GSO_IPXIP4 | + NETIF_F_GSO_IPXIP6 | NETIF_F_GSO_UDP_L4; + netdev->gso_partial_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_GRE_CSUM; /* set features that user can change */ netdev->hw_features = dflt_features | csumo_features | vlano_features | tso_features; + /* add support for HW_CSUM on packets with MPLS header */ + netdev->mpls_features = NETIF_F_HW_CSUM; + /* enable features */ netdev->features |= netdev->hw_features; /* encap and VLAN devices inherit default, csumo and tso features */ @@ -2509,7 +2511,7 @@ ice_vlan_rx_add_vid(struct net_device *netdev, __always_unused __be16 proto, /* Add a switch rule for this VLAN ID so its corresponding VLAN tagged * packets aren't pruned by the device's internal switch on Rx */ - ret = ice_vsi_add_vlan(vsi, vid); + ret = ice_vsi_add_vlan(vsi, vid, ICE_FWD_TO_VSI); if (!ret) { vsi->vlan_ena = true; set_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags); @@ -2630,7 +2632,8 @@ unroll_vsi_setup: static u16 ice_get_avail_q_count(unsigned long *pf_qmap, struct mutex *lock, u16 size) { - u16 count = 0, bit; + unsigned long bit; + u16 count = 0; mutex_lock(lock); for_each_clear_bit(bit, pf_qmap, size) @@ -2869,8 +2872,8 @@ static int ice_init_interrupt_scheme(struct ice_pf *pf) } /* populate SW interrupts pool with number of OS granted IRQs. */ - pf->num_avail_sw_msix = vectors; - pf->irq_tracker->num_entries = vectors; + pf->num_avail_sw_msix = (u16)vectors; + pf->irq_tracker->num_entries = (u16)vectors; pf->irq_tracker->end = pf->irq_tracker->num_entries; return 0; @@ -2902,9 +2905,9 @@ int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx) } if (new_tx) - vsi->req_txq = new_tx; + vsi->req_txq = (u16)new_tx; if (new_rx) - vsi->req_rxq = new_rx; + vsi->req_rxq = (u16)new_rx; /* set for the next time the netdev is started */ if (!netif_running(vsi->netdev)) { @@ -3298,9 +3301,6 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) goto err_init_interrupt_unroll; } - /* Driver is mostly up */ - clear_bit(__ICE_DOWN, pf->state); - /* In case of MSIX we are going to setup the misc vector right here * to handle admin queue events etc. In case of legacy and MSI * the misc functionality and queue processing is combined in @@ -3356,9 +3356,9 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) ice_verify_cacheline_size(pf); - /* If no DDP driven features have to be setup, return here */ + /* If no DDP driven features have to be setup, we are done with probe */ if (ice_is_safe_mode(pf)) - return 0; + goto probe_done; /* initialize DDP driven features */ @@ -3373,6 +3373,9 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) /* print PCI link speed and width */ pcie_print_link_status(pf->pdev); +probe_done: + /* ready to go, so clear down state bit */ + clear_bit(__ICE_DOWN, pf->state); return 0; err_alloc_sw_unroll: @@ -3705,25 +3708,24 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi) return -EBUSY; } - /* When we change the MAC address we also have to change the MAC address - * based filter rules that were created previously for the old MAC - * address. So first, we remove the old filter rule using ice_remove_mac - * and then create a new filter rule using ice_add_mac via - * ice_vsi_cfg_mac_fltr function call for both add and/or remove - * filters. - */ - status = ice_vsi_cfg_mac_fltr(vsi, netdev->dev_addr, false); - if (status) { + /* Clean up old MAC filter. Not an error if old filter doesn't exist */ + status = ice_fltr_remove_mac(vsi, netdev->dev_addr, ICE_FWD_TO_VSI); + if (status && status != ICE_ERR_DOES_NOT_EXIST) { err = -EADDRNOTAVAIL; goto err_update_filters; } - status = ice_vsi_cfg_mac_fltr(vsi, mac, true); - if (status) { - err = -EADDRNOTAVAIL; - goto err_update_filters; + /* Add filter for new MAC. If filter exists, just return success */ + status = ice_fltr_add_mac(vsi, mac, ICE_FWD_TO_VSI); + if (status == ICE_ERR_ALREADY_EXISTS) { + netdev_dbg(netdev, "filter for MAC %pM already exists\n", mac); + return 0; } + /* error if the new filter addition failed */ + if (status) + err = -EADDRNOTAVAIL; + err_update_filters: if (err) { netdev_err(netdev, "can't set MAC %pM. filter update failed\n", @@ -3740,8 +3742,8 @@ err_update_filters: flags = ICE_AQC_MAN_MAC_UPDATE_LAA_WOL; status = ice_aq_manage_mac_write(hw, mac, flags, NULL); if (status) { - netdev_err(netdev, "can't set MAC %pM. write to firmware failed error %d\n", - mac, status); + netdev_err(netdev, "can't set MAC %pM. write to firmware failed error %s\n", + mac, ice_stat_str(status)); } return 0; } @@ -3805,8 +3807,8 @@ ice_set_tx_maxrate(struct net_device *netdev, int queue_index, u32 maxrate) status = ice_cfg_q_bw_lmt(vsi->port_info, vsi->idx, tc, q_handle, ICE_MAX_BW, maxrate * 1000); if (status) { - netdev_err(netdev, "Unable to set Tx max rate, error %d\n", - status); + netdev_err(netdev, "Unable to set Tx max rate, error %s\n", + ice_stat_str(status)); return -EIO; } @@ -4604,8 +4606,9 @@ static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type) /* replay filters for the VSI */ status = ice_replay_vsi(&pf->hw, vsi->idx); if (status) { - dev_err(dev, "replay VSI failed, status %d, VSI index %d, type %s\n", - status, vsi->idx, ice_vsi_type_str(type)); + dev_err(dev, "replay VSI failed, status %s, VSI index %d, type %s\n", + ice_stat_str(status), vsi->idx, + ice_vsi_type_str(type)); return -EIO; } @@ -4674,7 +4677,8 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) ret = ice_init_all_ctrlq(hw); if (ret) { - dev_err(dev, "control queues init failed %d\n", ret); + dev_err(dev, "control queues init failed %s\n", + ice_stat_str(ret)); goto err_init_ctrlq; } @@ -4690,7 +4694,8 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) ret = ice_clear_pf_cfg(hw); if (ret) { - dev_err(dev, "clear PF configuration failed %d\n", ret); + dev_err(dev, "clear PF configuration failed %s\n", + ice_stat_str(ret)); goto err_init_ctrlq; } @@ -4704,7 +4709,7 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) ret = ice_get_caps(hw); if (ret) { - dev_err(dev, "ice_get_caps failed %d\n", ret); + dev_err(dev, "ice_get_caps failed %s\n", ice_stat_str(ret)); goto err_init_ctrlq; } @@ -4746,8 +4751,8 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) /* tell the firmware we are up */ ret = ice_send_version(pf); if (ret) { - dev_err(dev, "Rebuild failed due to error sending driver version: %d\n", - ret); + dev_err(dev, "Rebuild failed due to error sending driver version: %s\n", + ice_stat_str(ret)); goto err_vsi_rebuild; } @@ -4859,6 +4864,112 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) } /** + * ice_aq_str - convert AQ err code to a string + * @aq_err: the AQ error code to convert + */ +const char *ice_aq_str(enum ice_aq_err aq_err) +{ + switch (aq_err) { + case ICE_AQ_RC_OK: + return "OK"; + case ICE_AQ_RC_EPERM: + return "ICE_AQ_RC_EPERM"; + case ICE_AQ_RC_ENOENT: + return "ICE_AQ_RC_ENOENT"; + case ICE_AQ_RC_ENOMEM: + return "ICE_AQ_RC_ENOMEM"; + case ICE_AQ_RC_EBUSY: + return "ICE_AQ_RC_EBUSY"; + case ICE_AQ_RC_EEXIST: + return "ICE_AQ_RC_EEXIST"; + case ICE_AQ_RC_EINVAL: + return "ICE_AQ_RC_EINVAL"; + case ICE_AQ_RC_ENOSPC: + return "ICE_AQ_RC_ENOSPC"; + case ICE_AQ_RC_ENOSYS: + return "ICE_AQ_RC_ENOSYS"; + case ICE_AQ_RC_ENOSEC: + return "ICE_AQ_RC_ENOSEC"; + case ICE_AQ_RC_EBADSIG: + return "ICE_AQ_RC_EBADSIG"; + case ICE_AQ_RC_ESVN: + return "ICE_AQ_RC_ESVN"; + case ICE_AQ_RC_EBADMAN: + return "ICE_AQ_RC_EBADMAN"; + case ICE_AQ_RC_EBADBUF: + return "ICE_AQ_RC_EBADBUF"; + } + + return "ICE_AQ_RC_UNKNOWN"; +} + +/** + * ice_stat_str - convert status err code to a string + * @stat_err: the status error code to convert + */ +const char *ice_stat_str(enum ice_status stat_err) +{ + switch (stat_err) { + case ICE_SUCCESS: + return "OK"; + case ICE_ERR_PARAM: + return "ICE_ERR_PARAM"; + case ICE_ERR_NOT_IMPL: + return "ICE_ERR_NOT_IMPL"; + case ICE_ERR_NOT_READY: + return "ICE_ERR_NOT_READY"; + case ICE_ERR_NOT_SUPPORTED: + return "ICE_ERR_NOT_SUPPORTED"; + case ICE_ERR_BAD_PTR: + return "ICE_ERR_BAD_PTR"; + case ICE_ERR_INVAL_SIZE: + return "ICE_ERR_INVAL_SIZE"; + case ICE_ERR_DEVICE_NOT_SUPPORTED: + return "ICE_ERR_DEVICE_NOT_SUPPORTED"; + case ICE_ERR_RESET_FAILED: + return "ICE_ERR_RESET_FAILED"; + case ICE_ERR_FW_API_VER: + return "ICE_ERR_FW_API_VER"; + case ICE_ERR_NO_MEMORY: + return "ICE_ERR_NO_MEMORY"; + case ICE_ERR_CFG: + return "ICE_ERR_CFG"; + case ICE_ERR_OUT_OF_RANGE: + return "ICE_ERR_OUT_OF_RANGE"; + case ICE_ERR_ALREADY_EXISTS: + return "ICE_ERR_ALREADY_EXISTS"; + case ICE_ERR_NVM_CHECKSUM: + return "ICE_ERR_NVM_CHECKSUM"; + case ICE_ERR_BUF_TOO_SHORT: + return "ICE_ERR_BUF_TOO_SHORT"; + case ICE_ERR_NVM_BLANK_MODE: + return "ICE_ERR_NVM_BLANK_MODE"; + case ICE_ERR_IN_USE: + return "ICE_ERR_IN_USE"; + case ICE_ERR_MAX_LIMIT: + return "ICE_ERR_MAX_LIMIT"; + case ICE_ERR_RESET_ONGOING: + return "ICE_ERR_RESET_ONGOING"; + case ICE_ERR_HW_TABLE: + return "ICE_ERR_HW_TABLE"; + case ICE_ERR_DOES_NOT_EXIST: + return "ICE_ERR_DOES_NOT_EXIST"; + case ICE_ERR_AQ_ERROR: + return "ICE_ERR_AQ_ERROR"; + case ICE_ERR_AQ_TIMEOUT: + return "ICE_ERR_AQ_TIMEOUT"; + case ICE_ERR_AQ_FULL: + return "ICE_ERR_AQ_FULL"; + case ICE_ERR_AQ_NO_WORK: + return "ICE_ERR_AQ_NO_WORK"; + case ICE_ERR_AQ_EMPTY: + return "ICE_ERR_AQ_EMPTY"; + } + + return "ICE_ERR_UNKNOWN"; +} + +/** * ice_set_rss - Set RSS keys and lut * @vsi: Pointer to VSI structure * @seed: RSS hash seed @@ -4882,8 +4993,9 @@ int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) status = ice_aq_set_rss_key(hw, vsi->idx, buf); if (status) { - dev_err(dev, "Cannot set RSS key, err %d aq_err %d\n", - status, hw->adminq.rq_last_status); + dev_err(dev, "Cannot set RSS key, err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.rq_last_status)); return -EIO; } } @@ -4892,8 +5004,9 @@ int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type, lut, lut_size); if (status) { - dev_err(dev, "Cannot set RSS lut, err %d aq_err %d\n", - status, hw->adminq.rq_last_status); + dev_err(dev, "Cannot set RSS lut, err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.rq_last_status)); return -EIO; } } @@ -4924,8 +5037,9 @@ int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) status = ice_aq_get_rss_key(hw, vsi->idx, buf); if (status) { - dev_err(dev, "Cannot get RSS key, err %d aq_err %d\n", - status, hw->adminq.rq_last_status); + dev_err(dev, "Cannot get RSS key, err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.rq_last_status)); return -EIO; } } @@ -4934,8 +5048,9 @@ int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) status = ice_aq_get_rss_lut(hw, vsi->idx, vsi->rss_lut_type, lut, lut_size); if (status) { - dev_err(dev, "Cannot get RSS lut, err %d aq_err %d\n", - status, hw->adminq.rq_last_status); + dev_err(dev, "Cannot get RSS lut, err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.rq_last_status)); return -EIO; } } @@ -5002,8 +5117,9 @@ static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode) status = ice_update_vsi(hw, vsi->idx, ctxt, NULL); if (status) { - dev_err(ice_pf_to_dev(vsi->back), "update VSI for bridge mode failed, bmode = %d err %d aq_err %d\n", - bmode, status, hw->adminq.sq_last_status); + dev_err(ice_pf_to_dev(vsi->back), "update VSI for bridge mode failed, bmode = %d err %s aq_err %s\n", + bmode, ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); ret = -EIO; goto out; } @@ -5072,8 +5188,9 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, */ status = ice_update_sw_rule_bridge_mode(hw); if (status) { - netdev_err(dev, "switch rule update failed, mode = %d err %d aq_err %d\n", - mode, status, hw->adminq.sq_last_status); + netdev_err(dev, "switch rule update failed, mode = %d err %s aq_err %s\n", + mode, ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); /* revert hw->evb_veb */ hw->evb_veb = (pf_sw->bridge_mode == BRIDGE_MODE_VEB); return -EIO; @@ -5100,6 +5217,16 @@ static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) pf->tx_timeout_count++; + /* Check if PFC is enabled for the TC to which the queue belongs + * to. If yes then Tx timeout is not caused by a hung queue, no + * need to reset and rebuild + */ + if (ice_is_pfc_causing_hung_q(pf, txqueue)) { + dev_info(ice_pf_to_dev(pf), "Fake Tx hang detected on queue %u, timeout caused by PFC storm\n", + txqueue); + return; + } + /* now that we have an index, find the tx_ring struct */ for (i = 0; i < vsi->num_txq; i++) if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) @@ -5158,6 +5285,70 @@ static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) } /** + * ice_udp_tunnel_add - Get notifications about UDP tunnel ports that come up + * @netdev: This physical port's netdev + * @ti: Tunnel endpoint information + */ +static void +ice_udp_tunnel_add(struct net_device *netdev, struct udp_tunnel_info *ti) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + enum ice_tunnel_type tnl_type; + u16 port = ntohs(ti->port); + enum ice_status status; + + switch (ti->type) { + case UDP_TUNNEL_TYPE_VXLAN: + tnl_type = TNL_VXLAN; + break; + case UDP_TUNNEL_TYPE_GENEVE: + tnl_type = TNL_GENEVE; + break; + default: + netdev_err(netdev, "Unknown tunnel type\n"); + return; + } + + status = ice_create_tunnel(&pf->hw, tnl_type, port); + if (status == ICE_ERR_OUT_OF_RANGE) + netdev_info(netdev, "Max tunneled UDP ports reached, port %d not added\n", + port); + else if (status) + netdev_err(netdev, "Error adding UDP tunnel - %s\n", + ice_stat_str(status)); +} + +/** + * ice_udp_tunnel_del - Get notifications about UDP tunnel ports that go away + * @netdev: This physical port's netdev + * @ti: Tunnel endpoint information + */ +static void +ice_udp_tunnel_del(struct net_device *netdev, struct udp_tunnel_info *ti) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + u16 port = ntohs(ti->port); + enum ice_status status; + bool retval; + + retval = ice_tunnel_port_in_use(&pf->hw, port, NULL); + if (!retval) { + netdev_info(netdev, "port %d not found in UDP tunnels list\n", + port); + return; + } + + status = ice_destroy_tunnel(&pf->hw, port, false); + if (status) + netdev_err(netdev, "error deleting port %d from UDP tunnels list\n", + port); +} + +/** * ice_open - Called when a network interface becomes active * @netdev: network interface device structure * @@ -5173,14 +5364,20 @@ int ice_open(struct net_device *netdev) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; struct ice_port_info *pi; int err; - if (test_bit(__ICE_NEEDS_RESTART, vsi->back->state)) { + if (test_bit(__ICE_NEEDS_RESTART, pf->state)) { netdev_err(netdev, "driver needs to be unloaded and reloaded\n"); return -EIO; } + if (test_bit(__ICE_DOWN, pf->state)) { + netdev_err(netdev, "device is not ready yet\n"); + return -EBUSY; + } + netif_carrier_off(netdev); pi = vsi->port_info; @@ -5213,6 +5410,10 @@ int ice_open(struct net_device *netdev) if (err) netdev_err(netdev, "Failed to open VSI 0x%04X on switch 0x%04X\n", vsi->vsi_num, vsi->vsw->sw_id); + + /* Update existing tunnels information */ + udp_tunnel_get_rx_info(netdev); + return err; } @@ -5263,21 +5464,21 @@ ice_features_check(struct sk_buff *skb, features &= ~NETIF_F_GSO_MASK; len = skb_network_header(skb) - skb->data; - if (len & ~(ICE_TXD_MACLEN_MAX)) + if (len > ICE_TXD_MACLEN_MAX || len & 0x1) goto out_rm_features; len = skb_transport_header(skb) - skb_network_header(skb); - if (len & ~(ICE_TXD_IPLEN_MAX)) + if (len > ICE_TXD_IPLEN_MAX || len & 0x1) goto out_rm_features; if (skb->encapsulation) { len = skb_inner_network_header(skb) - skb_transport_header(skb); - if (len & ~(ICE_TXD_L4LEN_MAX)) + if (len > ICE_TXD_L4LEN_MAX || len & 0x1) goto out_rm_features; len = skb_inner_transport_header(skb) - skb_inner_network_header(skb); - if (len & ~(ICE_TXD_IPLEN_MAX)) + if (len > ICE_TXD_IPLEN_MAX || len & 0x1) goto out_rm_features; } @@ -5326,4 +5527,6 @@ static const struct net_device_ops ice_netdev_ops = { .ndo_bpf = ice_xdp, .ndo_xdp_xmit = ice_xdp_xmit, .ndo_xsk_wakeup = ice_xsk_wakeup, + .ndo_udp_tunnel_add = ice_udp_tunnel_add, + .ndo_udp_tunnel_del = ice_udp_tunnel_del, }; diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c index 8beb675d676b..7c2a06892bbb 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.c +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c @@ -367,6 +367,87 @@ static enum ice_status ice_get_orom_ver_info(struct ice_hw *hw) } /** + * ice_get_netlist_ver_info + * @hw: pointer to the HW struct + * + * Get the netlist version information + */ +static enum ice_status ice_get_netlist_ver_info(struct ice_hw *hw) +{ + struct ice_netlist_ver_info *ver = &hw->netlist_ver; + enum ice_status ret; + u32 id_blk_start; + __le16 raw_data; + u16 data, i; + u16 *buff; + + ret = ice_acquire_nvm(hw, ICE_RES_READ); + if (ret) + return ret; + buff = kcalloc(ICE_AQC_NVM_NETLIST_ID_BLK_LEN, sizeof(*buff), + GFP_KERNEL); + if (!buff) { + ret = ICE_ERR_NO_MEMORY; + goto exit_no_mem; + } + + /* read module length */ + ret = ice_aq_read_nvm(hw, ICE_AQC_NVM_LINK_TOPO_NETLIST_MOD_ID, + ICE_AQC_NVM_LINK_TOPO_NETLIST_LEN_OFFSET * 2, + ICE_AQC_NVM_LINK_TOPO_NETLIST_LEN, &raw_data, + false, false, NULL); + if (ret) + goto exit_error; + + data = le16_to_cpu(raw_data); + /* exit if length is = 0 */ + if (!data) + goto exit_error; + + /* read node count */ + ret = ice_aq_read_nvm(hw, ICE_AQC_NVM_LINK_TOPO_NETLIST_MOD_ID, + ICE_AQC_NVM_NETLIST_NODE_COUNT_OFFSET * 2, + ICE_AQC_NVM_NETLIST_NODE_COUNT_LEN, &raw_data, + false, false, NULL); + if (ret) + goto exit_error; + data = le16_to_cpu(raw_data) & ICE_AQC_NVM_NETLIST_NODE_COUNT_M; + + /* netlist ID block starts from offset 4 + node count * 2 */ + id_blk_start = ICE_AQC_NVM_NETLIST_ID_BLK_START_OFFSET + data * 2; + + /* read the entire netlist ID block */ + ret = ice_aq_read_nvm(hw, ICE_AQC_NVM_LINK_TOPO_NETLIST_MOD_ID, + id_blk_start * 2, + ICE_AQC_NVM_NETLIST_ID_BLK_LEN * 2, buff, false, + false, NULL); + if (ret) + goto exit_error; + + for (i = 0; i < ICE_AQC_NVM_NETLIST_ID_BLK_LEN; i++) + buff[i] = le16_to_cpu(((__force __le16 *)buff)[i]); + + ver->major = (buff[ICE_AQC_NVM_NETLIST_ID_BLK_MAJOR_VER_HIGH] << 16) | + buff[ICE_AQC_NVM_NETLIST_ID_BLK_MAJOR_VER_LOW]; + ver->minor = (buff[ICE_AQC_NVM_NETLIST_ID_BLK_MINOR_VER_HIGH] << 16) | + buff[ICE_AQC_NVM_NETLIST_ID_BLK_MINOR_VER_LOW]; + ver->type = (buff[ICE_AQC_NVM_NETLIST_ID_BLK_TYPE_HIGH] << 16) | + buff[ICE_AQC_NVM_NETLIST_ID_BLK_TYPE_LOW]; + ver->rev = (buff[ICE_AQC_NVM_NETLIST_ID_BLK_REV_HIGH] << 16) | + buff[ICE_AQC_NVM_NETLIST_ID_BLK_REV_LOW]; + ver->cust_ver = buff[ICE_AQC_NVM_NETLIST_ID_BLK_CUST_VER]; + /* Read the left most 4 bytes of SHA */ + ver->hash = buff[ICE_AQC_NVM_NETLIST_ID_BLK_SHA_HASH + 15] << 16 | + buff[ICE_AQC_NVM_NETLIST_ID_BLK_SHA_HASH + 14]; + +exit_error: + kfree(buff); +exit_no_mem: + ice_release_nvm(hw); + return ret; +} + +/** * ice_discover_flash_size - Discover the available flash size. * @hw: pointer to the HW struct * @@ -515,6 +596,11 @@ enum ice_status ice_init_nvm(struct ice_hw *hw) return status; } + /* read the netlist version information */ + status = ice_get_netlist_ver_info(hw); + if (status) + ice_debug(hw, ICE_DBG_INIT, "Failed to read netlist info.\n"); + return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h index 71647566964e..678db6bf7f57 100644 --- a/drivers/net/ethernet/intel/ice/ice_protocol_type.h +++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h @@ -18,6 +18,7 @@ enum ice_prot_id { ICE_PROT_IPV6_IL = 41, ICE_PROT_TCP_IL = 49, ICE_PROT_UDP_IL_OR_S = 53, + ICE_PROT_GRE_OF = 64, ICE_PROT_SCTP_IL = 96, ICE_PROT_META_ID = 255, /* when offset == metadata */ ICE_PROT_INVALID = 255 /* when offset == ICE_FV_OFFSET_INVAL */ diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index eae707ddf8e8..d63acd2fcf79 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -1917,7 +1917,7 @@ ice_sched_update_elem(struct ice_hw *hw, struct ice_sched_node *node, */ static enum ice_status ice_sched_cfg_node_bw_alloc(struct ice_hw *hw, struct ice_sched_node *node, - enum ice_rl_type rl_type, u8 bw_alloc) + enum ice_rl_type rl_type, u16 bw_alloc) { struct ice_aqc_txsched_elem_data buf; struct ice_aqc_txsched_elem *data; diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 51825a203e35..7d88944de31a 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -593,8 +593,8 @@ enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw) ICE_AQC_GET_SW_CONF_RESP_IS_VF) is_vf = true; - res_type = le16_to_cpu(ele->vsi_port_num) >> - ICE_AQC_GET_SW_CONF_RESP_TYPE_S; + res_type = (u8)(le16_to_cpu(ele->vsi_port_num) >> + ICE_AQC_GET_SW_CONF_RESP_TYPE_S); if (res_type == ICE_AQC_GET_SW_CONF_RESP_VSI) { /* FW VSI is not needed. Just continue. */ @@ -1618,12 +1618,12 @@ ice_add_mac(struct ice_hw *hw, struct list_head *m_list) struct ice_aqc_sw_rules_elem *s_rule, *r_iter; struct ice_fltr_list_entry *m_list_itr; struct list_head *rule_head; - u16 elem_sent, total_elem_left; + u16 total_elem_left, s_rule_size; struct ice_switch_info *sw; struct mutex *rule_lock; /* Lock to protect filter rule list */ enum ice_status status = 0; u16 num_unicast = 0; - u16 s_rule_size; + u8 elem_sent; if (!m_list || !hw) return ICE_ERR_PARAM; @@ -1707,8 +1707,8 @@ ice_add_mac(struct ice_hw *hw, struct list_head *m_list) total_elem_left -= elem_sent) { struct ice_aqc_sw_rules_elem *entry = r_iter; - elem_sent = min(total_elem_left, - (u16)(ICE_AQ_MAX_BUF_LEN / s_rule_size)); + elem_sent = min_t(u8, total_elem_left, + (ICE_AQ_MAX_BUF_LEN / s_rule_size)); status = ice_aq_sw_rules(hw, entry, elem_sent * s_rule_size, elem_sent, ice_aqc_opc_add_sw_rules, NULL); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 69b21b436f9a..0d90e32efab9 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -819,7 +819,7 @@ static struct sk_buff * ice_build_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, struct xdp_buff *xdp) { - unsigned int metasize = xdp->data - xdp->data_meta; + u8 metasize = xdp->data - xdp->data_meta; #if (PAGE_SIZE < 8192) unsigned int truesize = ice_rx_pg_size(rx_ring) / 2; #else @@ -934,7 +934,7 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, */ static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) { - u32 ntc = rx_ring->next_to_clean + 1; + u16 ntc = rx_ring->next_to_clean + 1; /* fetch, update, and store next to clean */ ntc = (ntc < rx_ring->count) ? ntc : 0; @@ -1544,7 +1544,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget) * don't allow the budget to go below 1 because that would exit * polling early. */ - budget_per_ring = max(budget / q_vector->num_ring_rx, 1); + budget_per_ring = max_t(int, budget / q_vector->num_ring_rx, 1); else /* Max of 1 Rx ring in this q_vector so give it the budget */ budget_per_ring = budget; @@ -1680,7 +1680,8 @@ ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first, */ while (unlikely(size > ICE_MAX_DATA_PER_TXD)) { tx_desc->cmd_type_offset_bsz = - build_ctob(td_cmd, td_offset, max_data, td_tag); + ice_build_ctob(td_cmd, td_offset, max_data, + td_tag); tx_desc++; i++; @@ -1700,8 +1701,8 @@ ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first, if (likely(!data_len)) break; - tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, - size, td_tag); + tx_desc->cmd_type_offset_bsz = ice_build_ctob(td_cmd, td_offset, + size, td_tag); tx_desc++; i++; @@ -1732,8 +1733,8 @@ ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first, /* write last descriptor with RS and EOP bits */ td_cmd |= (u64)ICE_TXD_LAST_DESC_CMD; - tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, size, - td_tag); + tx_desc->cmd_type_offset_bsz = + ice_build_ctob(td_cmd, td_offset, size, td_tag); /* Force memory writes to complete before letting h/w know there * are new descriptors to fetch. @@ -1807,12 +1808,94 @@ int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off) l2_len = ip.hdr - skb->data; offset = (l2_len / 2) << ICE_TX_DESC_LEN_MACLEN_S; - if (skb->encapsulation) - return -1; + protocol = vlan_get_protocol(skb); + + if (protocol == htons(ETH_P_IP)) + first->tx_flags |= ICE_TX_FLAGS_IPV4; + else if (protocol == htons(ETH_P_IPV6)) + first->tx_flags |= ICE_TX_FLAGS_IPV6; + + if (skb->encapsulation) { + bool gso_ena = false; + u32 tunnel = 0; + + /* define outer network header type */ + if (first->tx_flags & ICE_TX_FLAGS_IPV4) { + tunnel |= (first->tx_flags & ICE_TX_FLAGS_TSO) ? + ICE_TX_CTX_EIPT_IPV4 : + ICE_TX_CTX_EIPT_IPV4_NO_CSUM; + l4_proto = ip.v4->protocol; + } else if (first->tx_flags & ICE_TX_FLAGS_IPV6) { + tunnel |= ICE_TX_CTX_EIPT_IPV6; + exthdr = ip.hdr + sizeof(*ip.v6); + l4_proto = ip.v6->nexthdr; + if (l4.hdr != exthdr) + ipv6_skip_exthdr(skb, exthdr - skb->data, + &l4_proto, &frag_off); + } + + /* define outer transport */ + switch (l4_proto) { + case IPPROTO_UDP: + tunnel |= ICE_TXD_CTX_UDP_TUNNELING; + first->tx_flags |= ICE_TX_FLAGS_TUNNEL; + break; + case IPPROTO_GRE: + tunnel |= ICE_TXD_CTX_GRE_TUNNELING; + first->tx_flags |= ICE_TX_FLAGS_TUNNEL; + break; + case IPPROTO_IPIP: + case IPPROTO_IPV6: + first->tx_flags |= ICE_TX_FLAGS_TUNNEL; + l4.hdr = skb_inner_network_header(skb); + break; + default: + if (first->tx_flags & ICE_TX_FLAGS_TSO) + return -1; + + skb_checksum_help(skb); + return 0; + } + + /* compute outer L3 header size */ + tunnel |= ((l4.hdr - ip.hdr) / 4) << + ICE_TXD_CTX_QW0_EIPLEN_S; + + /* switch IP header pointer from outer to inner header */ + ip.hdr = skb_inner_network_header(skb); + + /* compute tunnel header size */ + tunnel |= ((ip.hdr - l4.hdr) / 2) << + ICE_TXD_CTX_QW0_NATLEN_S; + + gso_ena = skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL; + /* indicate if we need to offload outer UDP header */ + if ((first->tx_flags & ICE_TX_FLAGS_TSO) && !gso_ena && + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) + tunnel |= ICE_TXD_CTX_QW0_L4T_CS_M; + + /* record tunnel offload values */ + off->cd_tunnel_params |= tunnel; + + /* set DTYP=1 to indicate that it's an Tx context descriptor + * in IPsec tunnel mode with Tx offloads in Quad word 1 + */ + off->cd_qw1 |= (u64)ICE_TX_DESC_DTYPE_CTX; + + /* switch L4 header pointer from outer to inner */ + l4.hdr = skb_inner_transport_header(skb); + l4_proto = 0; + + /* reset type as we transition from outer to inner headers */ + first->tx_flags &= ~(ICE_TX_FLAGS_IPV4 | ICE_TX_FLAGS_IPV6); + if (ip.v4->version == 4) + first->tx_flags |= ICE_TX_FLAGS_IPV4; + if (ip.v6->version == 6) + first->tx_flags |= ICE_TX_FLAGS_IPV6; + } /* Enable IP checksum offloads */ - protocol = vlan_get_protocol(skb); - if (protocol == htons(ETH_P_IP)) { + if (first->tx_flags & ICE_TX_FLAGS_IPV4) { l4_proto = ip.v4->protocol; /* the stack computes the IP header already, the only time we * need the hardware to recompute it is in the case of TSO. @@ -1822,7 +1905,7 @@ int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off) else cmd |= ICE_TX_DESC_CMD_IIPT_IPV4; - } else if (protocol == htons(ETH_P_IPV6)) { + } else if (first->tx_flags & ICE_TX_FLAGS_IPV6) { cmd |= ICE_TX_DESC_CMD_IIPT_IPV6; exthdr = ip.hdr + sizeof(*ip.v6); l4_proto = ip.v6->nexthdr; @@ -1944,7 +2027,8 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off) unsigned char *hdr; } l4; u64 cd_mss, cd_tso_len; - u32 paylen, l4_start; + u32 paylen; + u8 l4_start; int err; if (skb->ip_summed != CHECKSUM_PARTIAL) @@ -1969,8 +2053,42 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off) ip.v6->payload_len = 0; } + if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | + SKB_GSO_IPXIP4 | + SKB_GSO_IPXIP6 | + SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM)) { + if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) && + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) { + l4.udp->len = 0; + + /* determine offset of outer transport header */ + l4_start = (u8)(l4.hdr - skb->data); + + /* remove payload length from outer checksum */ + paylen = skb->len - l4_start; + csum_replace_by_diff(&l4.udp->check, + (__force __wsum)htonl(paylen)); + } + + /* reset pointers to inner headers */ + + /* cppcheck-suppress unreadVariable */ + ip.hdr = skb_inner_network_header(skb); + l4.hdr = skb_inner_transport_header(skb); + + /* initialize inner IP header fields */ + if (ip.v4->version == 4) { + ip.v4->tot_len = 0; + ip.v4->check = 0; + } else { + ip.v6->payload_len = 0; + } + } + /* determine offset of transport header */ - l4_start = l4.hdr - skb->data; + l4_start = (u8)(l4.hdr - skb->data); /* remove payload length from checksum */ paylen = skb->len - l4_start; @@ -1979,12 +2097,12 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off) csum_replace_by_diff(&l4.udp->check, (__force __wsum)htonl(paylen)); /* compute length of UDP segmentation header */ - off->header_len = sizeof(l4.udp) + l4_start; + off->header_len = (u8)sizeof(l4.udp) + l4_start; } else { csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen)); /* compute length of TCP segmentation header */ - off->header_len = (l4.tcp->doff * 4) + l4_start; + off->header_len = (u8)((l4.tcp->doff * 4) + l4_start); } /* update gso_segs and bytecount */ @@ -2215,7 +2333,7 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring) if (offload.cd_qw1 & ICE_TX_DESC_DTYPE_CTX) { struct ice_tx_ctx_desc *cdesc; - int i = tx_ring->next_to_use; + u16 i = tx_ring->next_to_use; /* grab the next descriptor */ cdesc = ICE_TX_CTX_DESC(tx_ring, i); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index d0fd2173854f..5f5a6ce2b7e5 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -113,6 +113,9 @@ static inline int ice_skb_pad(void) #define ICE_TX_FLAGS_TSO BIT(0) #define ICE_TX_FLAGS_HW_VLAN BIT(1) #define ICE_TX_FLAGS_SW_VLAN BIT(2) +#define ICE_TX_FLAGS_IPV4 BIT(5) +#define ICE_TX_FLAGS_IPV6 BIT(6) +#define ICE_TX_FLAGS_TUNNEL BIT(7) #define ICE_TX_FLAGS_VLAN_M 0xffff0000 #define ICE_TX_FLAGS_VLAN_PR_M 0xe0000000 #define ICE_TX_FLAGS_VLAN_PR_S 29 diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index 6da048a6ca7c..1ba97172d8d0 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -8,7 +8,7 @@ * @rx_ring: ring to bump * @val: new head index */ -void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val) +void ice_release_rx_desc(struct ice_ring *rx_ring, u16 val) { u16 prev_ntu = rx_ring->next_to_use & ~0x7; @@ -84,12 +84,17 @@ ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb, union ice_32b_rx_flex_desc *rx_desc, u8 ptype) { struct ice_rx_ptype_decoded decoded; - u32 rx_error, rx_status; + u16 rx_error, rx_status; + u16 rx_stat_err1; bool ipv4, ipv6; rx_status = le16_to_cpu(rx_desc->wb.status_error0); - rx_error = rx_status; + rx_error = rx_status & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) | + BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S) | + BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S) | + BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S)); + rx_stat_err1 = le16_to_cpu(rx_desc->wb.status_error1); decoded = ice_decode_rx_desc_ptype(ptype); /* Start with CHECKSUM_NONE and by default csum_level = 0 */ @@ -125,6 +130,18 @@ ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb, if (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S)) goto checksum_fail; + /* check for outer UDP checksum error in tunneled packets */ + if ((rx_stat_err1 & BIT(ICE_RX_FLEX_DESC_STATUS1_NAT_S)) && + (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S))) + goto checksum_fail; + + /* If there is an outer header present that might contain a checksum + * we need to bump the checksum level by 1 to reflect the fact that + * we are indicating we validated the inner checksum. + */ + if (decoded.tunnel_type >= ICE_RX_PTYPE_TUNNEL_IP_GRENAT) + skb->csum_level = 1; + /* Only report checksum unnecessary for TCP, UDP, or SCTP */ switch (decoded.inner_prot) { case ICE_RX_PTYPE_INNER_PROT_TCP: @@ -215,8 +232,8 @@ int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring) tx_desc = ICE_TX_DESC(xdp_ring, i); tx_desc->buf_addr = cpu_to_le64(dma); - tx_desc->cmd_type_offset_bsz = build_ctob(ICE_TXD_LAST_DESC_CMD, 0, - size, 0); + tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TXD_LAST_DESC_CMD, 0, + size, 0); /* Make certain all of the status bits have been updated * before next_to_watch is written. diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h index ba9164dad9ae..58ff58f0f972 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h @@ -22,7 +22,7 @@ ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, const u16 stat_err_bits) } static inline __le64 -build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag) +ice_build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag) { return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA | (td_cmd << ICE_TXD_QW1_CMD_S) | @@ -49,7 +49,7 @@ static inline void ice_xdp_ring_update_tail(struct ice_ring *xdp_ring) void ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res); int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring); int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring); -void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val); +void ice_release_rx_desc(struct ice_ring *rx_ring, u16 val); void ice_process_skb_fields(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 4ce5f92fca4a..9f6578eb4672 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -259,6 +259,16 @@ struct ice_nvm_info { #define ICE_NVM_VER_LEN 32 +/* netlist version information */ +struct ice_netlist_ver_info { + u32 major; /* major high/low */ + u32 minor; /* minor high/low */ + u32 type; /* type high/low */ + u32 rev; /* revision high/low */ + u32 hash; /* SHA-1 hash word */ + u16 cust_ver; /* customer version */ +}; + /* Max number of port to queue branches w.r.t topology */ #define ICE_MAX_TRAFFIC_CLASS 8 #define ICE_TXSCHED_MAX_BRANCHES ICE_MAX_TRAFFIC_CLASS @@ -491,8 +501,8 @@ struct ice_hw { u16 max_burst_size; /* driver sets this value */ /* Tx Scheduler values */ - u16 num_tx_sched_layers; - u16 num_tx_sched_phys_layers; + u8 num_tx_sched_layers; + u8 num_tx_sched_phys_layers; u8 flattened_layers; u8 max_cgds; u8 sw_entry_point_layer; @@ -506,6 +516,7 @@ struct ice_hw { struct ice_nvm_info nvm; struct ice_hw_dev_caps dev_caps; /* device capabilities */ struct ice_hw_func_caps func_caps; /* function capabilities */ + struct ice_netlist_ver_info netlist_ver; /* netlist version info */ struct ice_switch_info *switch_info; /* switch filter lists */ @@ -568,6 +579,10 @@ struct ice_hw { u8 *pkg_copy; u32 pkg_size; + /* tunneling info */ + struct mutex tnl_lock; + struct ice_tunnel_table tnl; + /* HW block tables */ struct ice_blk_info blk[ICE_BLK_COUNT]; struct mutex fl_profs_locks[ICE_BLK_COUNT]; /* lock fltr profiles */ @@ -592,6 +607,8 @@ struct ice_eth_stats { u64 tx_errors; /* tepc */ }; +#define ICE_MAX_UP 8 + /* Statistics collected by the MAC */ struct ice_hw_port_stats { /* eth stats collected by the port */ diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 15191a325918..62c100d47592 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -4,16 +4,18 @@ #include "ice.h" #include "ice_base.h" #include "ice_lib.h" +#include "ice_fltr.h" /** * ice_validate_vf_id - helper to check if VF ID is valid * @pf: pointer to the PF structure * @vf_id: the ID of the VF to check */ -static int ice_validate_vf_id(struct ice_pf *pf, int vf_id) +static int ice_validate_vf_id(struct ice_pf *pf, u16 vf_id) { + /* vf_id range is only valid for 0-255, and should always be unsigned */ if (vf_id >= pf->num_alloc_vfs) { - dev_err(ice_pf_to_dev(pf), "Invalid VF ID: %d\n", vf_id); + dev_err(ice_pf_to_dev(pf), "Invalid VF ID: %u\n", vf_id); return -EINVAL; } return 0; @@ -27,7 +29,7 @@ static int ice_validate_vf_id(struct ice_pf *pf, int vf_id) static int ice_check_vf_init(struct ice_pf *pf, struct ice_vf *vf) { if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { - dev_err(ice_pf_to_dev(pf), "VF ID: %d in reset. Try again.\n", + dev_err(ice_pf_to_dev(pf), "VF ID: %u in reset. Try again.\n", vf->vf_id); return -EBUSY; } @@ -35,6 +37,37 @@ static int ice_check_vf_init(struct ice_pf *pf, struct ice_vf *vf) } /** + * ice_err_to_virt_err - translate errors for VF return code + * @ice_err: error return code + */ +static enum virtchnl_status_code ice_err_to_virt_err(enum ice_status ice_err) +{ + switch (ice_err) { + case ICE_SUCCESS: + return VIRTCHNL_STATUS_SUCCESS; + case ICE_ERR_BAD_PTR: + case ICE_ERR_INVAL_SIZE: + case ICE_ERR_DEVICE_NOT_SUPPORTED: + case ICE_ERR_PARAM: + case ICE_ERR_CFG: + return VIRTCHNL_STATUS_ERR_PARAM; + case ICE_ERR_NO_MEMORY: + return VIRTCHNL_STATUS_ERR_NO_MEMORY; + case ICE_ERR_NOT_READY: + case ICE_ERR_RESET_FAILED: + case ICE_ERR_FW_API_VER: + case ICE_ERR_AQ_ERROR: + case ICE_ERR_AQ_TIMEOUT: + case ICE_ERR_AQ_FULL: + case ICE_ERR_AQ_NO_WORK: + case ICE_ERR_AQ_EMPTY: + return VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; + default: + return VIRTCHNL_STATUS_ERR_NOT_SUPPORTED; + } +} + +/** * ice_vc_vf_broadcast - Broadcast a message to all VFs on PF * @pf: pointer to the PF structure * @v_opcode: operation code @@ -337,7 +370,7 @@ void ice_free_vfs(struct ice_pf *pf) * before this function ever gets called. */ if (!pci_vfs_assigned(pf->pdev)) { - int vf_id; + unsigned int vf_id; /* Acknowledge VFLR for all VFs. Without this, VFs will fail to * work correctly when SR-IOV gets re-enabled. @@ -368,9 +401,9 @@ static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr) { struct ice_pf *pf = vf->pf; u32 reg, reg_idx, bit_idx; + unsigned int vf_abs_id, i; struct device *dev; struct ice_hw *hw; - int vf_abs_id, i; dev = ice_pf_to_dev(pf); hw = &pf->hw; @@ -418,7 +451,7 @@ static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr) if ((reg & VF_TRANS_PENDING_M) == 0) break; - dev_err(dev, "VF %d PCI transactions stuck\n", vf->vf_id); + dev_err(dev, "VF %u PCI transactions stuck\n", vf->vf_id); udelay(ICE_PCI_CIAD_WAIT_DELAY_US); } } @@ -460,8 +493,9 @@ static int ice_vsi_manage_pvid(struct ice_vsi *vsi, u16 pvid_info, bool enable) status = ice_update_vsi(hw, vsi->idx, ctxt, NULL); if (status) { - dev_info(ice_hw_to_dev(hw), "update VSI for port VLAN failed, err %d aq_err %d\n", - status, hw->adminq.sq_last_status); + dev_info(ice_hw_to_dev(hw), "update VSI for port VLAN failed, err %s aq_err %s\n", + ice_stat_str(status), + ice_aq_str(hw->adminq.sq_last_status)); ret = -EIO; goto out; } @@ -515,7 +549,6 @@ static int ice_calc_vf_first_vector_idx(struct ice_pf *pf, struct ice_vf *vf) static int ice_alloc_vsi_res(struct ice_vf *vf) { struct ice_pf *pf = vf->pf; - LIST_HEAD(tmp_add_list); u8 broadcast[ETH_ALEN]; struct ice_vsi *vsi; struct device *dev; @@ -537,7 +570,8 @@ static int ice_alloc_vsi_res(struct ice_vf *vf) /* Check if port VLAN exist before, and restore it accordingly */ if (vf->port_vlan_info) { ice_vsi_manage_pvid(vsi, vf->port_vlan_info, true); - if (ice_vsi_add_vlan(vsi, vf->port_vlan_info & VLAN_VID_MASK)) + if (ice_vsi_add_vlan(vsi, vf->port_vlan_info & VLAN_VID_MASK, + ICE_FWD_TO_VSI)) dev_warn(ice_pf_to_dev(pf), "Failed to add Port VLAN %d filter for VF %d\n", vf->port_vlan_info & VLAN_VID_MASK, vf->vf_id); } else { @@ -546,27 +580,23 @@ static int ice_alloc_vsi_res(struct ice_vf *vf) * untagged broadcast/multicast traffic seen on the VF * interface. */ - if (ice_vsi_add_vlan(vsi, 0)) + if (ice_vsi_add_vlan(vsi, 0, ICE_FWD_TO_VSI)) dev_warn(ice_pf_to_dev(pf), "Failed to add VLAN 0 filter for VF %d, MDD events will trigger. Reset the VF, disable spoofchk, or enable 8021q module on the guest\n", vf->vf_id); } - eth_broadcast_addr(broadcast); - - status = ice_add_mac_to_list(vsi, &tmp_add_list, broadcast); - if (status) - goto ice_alloc_vsi_res_exit; - if (is_valid_ether_addr(vf->dflt_lan_addr.addr)) { - status = ice_add_mac_to_list(vsi, &tmp_add_list, - vf->dflt_lan_addr.addr); + status = ice_fltr_add_mac(vsi, vf->dflt_lan_addr.addr, + ICE_FWD_TO_VSI); if (status) goto ice_alloc_vsi_res_exit; } - status = ice_add_mac(&pf->hw, &tmp_add_list); + eth_broadcast_addr(broadcast); + status = ice_fltr_add_mac(vsi, broadcast, ICE_FWD_TO_VSI); if (status) - dev_err(dev, "could not add mac filters error %d\n", status); + dev_err(dev, "could not add mac filters error %d\n", + status); else vf->num_mac = 1; @@ -577,7 +607,6 @@ static int ice_alloc_vsi_res(struct ice_vf *vf) * more vectors. */ ice_alloc_vsi_res_exit: - ice_free_fltr_list(dev, &tmp_add_list); return status; } @@ -1483,7 +1512,7 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs) void ice_process_vflr_event(struct ice_pf *pf) { struct ice_hw *hw = &pf->hw; - int vf_id; + unsigned int vf_id; u32 reg; if (!test_and_clear_bit(__ICE_VFLR_EVENT_PENDING, pf->state) || @@ -1524,7 +1553,7 @@ static void ice_vc_reset_vf(struct ice_vf *vf) */ static struct ice_vf *ice_get_vf_from_pfq(struct ice_pf *pf, u16 pfq) { - int vf_id; + unsigned int vf_id; ice_for_each_vf(pf, vf_id) { struct ice_vf *vf = &pf->vf[vf_id]; @@ -1628,8 +1657,9 @@ ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode, aq_ret = ice_aq_send_msg_to_vf(&pf->hw, vf->vf_id, v_opcode, v_retval, msg, msglen, NULL); if (aq_ret && pf->hw.mailboxq.sq_last_status != ICE_AQ_RC_ENOSYS) { - dev_info(dev, "Unable to send the message to VF %d ret %d aq_err %d\n", - vf->vf_id, aq_ret, pf->hw.mailboxq.sq_last_status); + dev_info(dev, "Unable to send the message to VF %d ret %s aq_err %s\n", + vf->vf_id, ice_stat_str(aq_ret), + ice_aq_str(pf->hw.mailboxq.sq_last_status)); return -EIO; } @@ -2044,8 +2074,9 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena) status = ice_update_vsi(&pf->hw, vf_vsi->idx, ctx, NULL); if (status) { - dev_err(dev, "Failed to %sable spoofchk on VF %d VSI %d\n error %d\n", - ena ? "en" : "dis", vf->vf_id, vf_vsi->vsi_num, status); + dev_err(dev, "Failed to %sable spoofchk on VF %d VSI %d\n error %s\n", + ena ? "en" : "dis", vf->vf_id, vf_vsi->vsi_num, + ice_stat_str(status)); ret = -EIO; goto out; } @@ -2060,6 +2091,174 @@ out: } /** + * ice_is_any_vf_in_promisc - check if any VF(s) are in promiscuous mode + * @pf: PF structure for accessing VF(s) + * + * Return false if no VF(s) are in unicast and/or multicast promiscuous mode, + * else return true + */ +bool ice_is_any_vf_in_promisc(struct ice_pf *pf) +{ + int vf_idx; + + ice_for_each_vf(pf, vf_idx) { + struct ice_vf *vf = &pf->vf[vf_idx]; + + /* found a VF that has promiscuous mode configured */ + if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) || + test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) + return true; + } + + return false; +} + +/** + * ice_vc_cfg_promiscuous_mode_msg + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * + * called from the VF to configure VF VSIs promiscuous mode + */ +static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_promisc_info *info = + (struct virtchnl_promisc_info *)msg; + struct ice_pf *pf = vf->pf; + struct ice_vsi *vsi; + struct device *dev; + bool rm_promisc; + int ret = 0; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (!ice_vc_isvalid_vsi_id(vf, info->vsi_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + vsi = pf->vsi[vf->lan_vsi_idx]; + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + dev = ice_pf_to_dev(pf); + if (!test_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) { + dev_err(dev, "Unprivileged VF %d is attempting to configure promiscuous mode\n", + vf->vf_id); + /* Leave v_ret alone, lie to the VF on purpose. */ + goto error_param; + } + + rm_promisc = !(info->flags & FLAG_VF_UNICAST_PROMISC) && + !(info->flags & FLAG_VF_MULTICAST_PROMISC); + + if (vsi->num_vlan || vf->port_vlan_info) { + struct ice_vsi *pf_vsi = ice_get_main_vsi(pf); + struct net_device *pf_netdev; + + if (!pf_vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + pf_netdev = pf_vsi->netdev; + + ret = ice_set_vf_spoofchk(pf_netdev, vf->vf_id, rm_promisc); + if (ret) { + dev_err(dev, "Failed to update spoofchk to %s for VF %d VSI %d when setting promiscuous mode\n", + rm_promisc ? "ON" : "OFF", vf->vf_id, + vsi->vsi_num); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + } + + ret = ice_cfg_vlan_pruning(vsi, true, !rm_promisc); + if (ret) { + dev_err(dev, "Failed to configure VLAN pruning in promiscuous mode\n"); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + } + + if (!test_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, pf->flags)) { + bool set_dflt_vsi = !!(info->flags & FLAG_VF_UNICAST_PROMISC); + + if (set_dflt_vsi && !ice_is_dflt_vsi_in_use(pf->first_sw)) + /* only attempt to set the default forwarding VSI if + * it's not currently set + */ + ret = ice_set_dflt_vsi(pf->first_sw, vsi); + else if (!set_dflt_vsi && + ice_is_vsi_dflt_vsi(pf->first_sw, vsi)) + /* only attempt to free the default forwarding VSI if we + * are the owner + */ + ret = ice_clear_dflt_vsi(pf->first_sw); + + if (ret) { + dev_err(dev, "%sable VF %d as the default VSI failed, error %d\n", + set_dflt_vsi ? "en" : "dis", vf->vf_id, ret); + v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; + goto error_param; + } + } else { + enum ice_status status; + u8 promisc_m; + + if (info->flags & FLAG_VF_UNICAST_PROMISC) { + if (vf->port_vlan_info || vsi->num_vlan) + promisc_m = ICE_UCAST_VLAN_PROMISC_BITS; + else + promisc_m = ICE_UCAST_PROMISC_BITS; + } else if (info->flags & FLAG_VF_MULTICAST_PROMISC) { + if (vf->port_vlan_info || vsi->num_vlan) + promisc_m = ICE_MCAST_VLAN_PROMISC_BITS; + else + promisc_m = ICE_MCAST_PROMISC_BITS; + } else { + if (vf->port_vlan_info || vsi->num_vlan) + promisc_m = ICE_UCAST_VLAN_PROMISC_BITS; + else + promisc_m = ICE_UCAST_PROMISC_BITS; + } + + /* Configure multicast/unicast with or without VLAN promiscuous + * mode + */ + status = ice_vf_set_vsi_promisc(vf, vsi, promisc_m, rm_promisc); + if (status) { + dev_err(dev, "%sable Tx/Rx filter promiscuous mode on VF-%d failed, error: %s\n", + rm_promisc ? "dis" : "en", vf->vf_id, + ice_stat_str(status)); + v_ret = ice_err_to_virt_err(status); + goto error_param; + } else { + dev_dbg(dev, "%sable Tx/Rx filter promiscuous mode on VF-%d succeeded\n", + rm_promisc ? "dis" : "en", vf->vf_id); + } + } + + if (info->flags & FLAG_VF_MULTICAST_PROMISC) + set_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states); + else + clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states); + + if (info->flags & FLAG_VF_UNICAST_PROMISC) + set_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states); + else + clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states); + +error_param: + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE, + v_ret, NULL, 0); +} + +/** * ice_vc_get_stats_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -2604,14 +2803,14 @@ ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr) return -EPERM; } - status = ice_vsi_cfg_mac_fltr(vsi, mac_addr, true); + status = ice_fltr_add_mac(vsi, mac_addr, ICE_FWD_TO_VSI); if (status == ICE_ERR_ALREADY_EXISTS) { dev_err(dev, "MAC %pM already exists for VF %d\n", mac_addr, vf->vf_id); return -EEXIST; } else if (status) { - dev_err(dev, "Failed to add MAC %pM for VF %d\n, error %d\n", - mac_addr, vf->vf_id, status); + dev_err(dev, "Failed to add MAC %pM for VF %d\n, error %s\n", + mac_addr, vf->vf_id, ice_stat_str(status)); return -EIO; } @@ -2641,14 +2840,14 @@ ice_vc_del_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr) ether_addr_equal(mac_addr, vf->dflt_lan_addr.addr)) return 0; - status = ice_vsi_cfg_mac_fltr(vsi, mac_addr, false); + status = ice_fltr_remove_mac(vsi, mac_addr, ICE_FWD_TO_VSI); if (status == ICE_ERR_DOES_NOT_EXIST) { dev_err(dev, "MAC %pM does not exist for VF %d\n", mac_addr, vf->vf_id); return -ENOENT; } else if (status) { - dev_err(dev, "Failed to delete MAC %pM for VF %d, error %d\n", - mac_addr, vf->vf_id, status); + dev_err(dev, "Failed to delete MAC %pM for VF %d, error %s\n", + mac_addr, vf->vf_id, ice_stat_str(status)); return -EIO; } @@ -2885,7 +3084,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, /* add VLAN 0 filter back when transitioning from port VLAN to * no port VLAN. No change to old port VLAN on failure. */ - ret = ice_vsi_add_vlan(vsi, 0); + ret = ice_vsi_add_vlan(vsi, 0, ICE_FWD_TO_VSI); if (ret) return ret; ret = ice_vsi_manage_pvid(vsi, 0, false); @@ -2898,7 +3097,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, vlan_id, qos, vf_id); /* add VLAN filter for the port VLAN */ - ret = ice_vsi_add_vlan(vsi, vlan_id); + ret = ice_vsi_add_vlan(vsi, vlan_id, ICE_FWD_TO_VSI); if (ret) return ret; } @@ -2992,8 +3191,9 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) goto error_param; } - if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) || - test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) + if ((test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) || + test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) && + test_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, pf->flags)) vlan_promisc = true; if (add_v) { @@ -3018,7 +3218,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) if (!vid) continue; - status = ice_vsi_add_vlan(vsi, vid); + status = ice_vsi_add_vlan(vsi, vid, ICE_FWD_TO_VSI); if (status) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; @@ -3317,6 +3517,9 @@ error_handler: case VIRTCHNL_OP_GET_STATS: err = ice_vc_get_stats_msg(vf, msg); break; + case VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE: + err = ice_vc_cfg_promiscuous_mode_msg(vf, msg); + break; case VIRTCHNL_OP_ADD_VLAN: err = ice_vc_add_vlan_msg(vf, msg); break; diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h index 3f9464269bd2..474293ff4fe5 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h @@ -64,7 +64,7 @@ struct ice_mdd_vf_events { struct ice_vf { struct ice_pf *pf; - s16 vf_id; /* VF ID in the PF space */ + u16 vf_id; /* VF ID in the PF space */ u16 lan_vsi_idx; /* index into PF struct */ /* first vector index of this VF in the PF space */ int first_vector_idx; @@ -128,6 +128,7 @@ void ice_set_vf_state_qs_dis(struct ice_vf *vf); int ice_get_vf_stats(struct net_device *netdev, int vf_id, struct ifla_vf_stats *vf_stats); +bool ice_is_any_vf_in_promisc(struct ice_pf *pf); void ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event); void ice_print_vfs_mdd_events(struct ice_pf *pf); @@ -219,5 +220,10 @@ ice_get_vf_stats(struct net_device __always_unused *netdev, { return -EOPNOTSUPP; } + +static inline bool ice_is_any_vf_in_promisc(struct ice_pf __always_unused *pf) +{ + return false; +} #endif /* CONFIG_PCI_IOV */ #endif /* _ICE_VIRTCHNL_PF_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index a73f6c3c70a4..b6f928c9e9c9 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -693,8 +693,8 @@ static bool ice_xmit_zc(struct ice_ring *xdp_ring, int budget) tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use); tx_desc->buf_addr = cpu_to_le64(dma); - tx_desc->cmd_type_offset_bsz = build_ctob(ICE_TXD_LAST_DESC_CMD, - 0, desc.len, 0); + tx_desc->cmd_type_offset_bsz = + ice_build_ctob(ICE_TXD_LAST_DESC_CMD, 0, desc.len, 0); xdp_ring->next_to_use++; if (xdp_ring->next_to_use == xdp_ring->count) diff --git a/drivers/net/ethernet/intel/igc/Makefile b/drivers/net/ethernet/intel/igc/Makefile index 3652f211f351..1c3051db9085 100644 --- a/drivers/net/ethernet/intel/igc/Makefile +++ b/drivers/net/ethernet/intel/igc/Makefile @@ -8,4 +8,4 @@ obj-$(CONFIG_IGC) += igc.o igc-objs := igc_main.o igc_mac.o igc_i225.o igc_base.o igc_nvm.o igc_phy.o \ -igc_ethtool.o igc_ptp.o igc_dump.o igc_tsn.o +igc_diag.o igc_ethtool.o igc_ptp.o igc_dump.o igc_tsn.o diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 8ddc39482a8e..fcc6261d7f67 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -16,8 +16,7 @@ #include "igc_hw.h" -/* forward declaration */ -void igc_set_ethtool_ops(struct net_device *); +void igc_ethtool_set_ops(struct net_device *); /* Transmit and receive queues */ #define IGC_MAX_RX_QUEUES 4 @@ -26,9 +25,14 @@ void igc_set_ethtool_ops(struct net_device *); #define MAX_Q_VECTORS 8 #define MAX_STD_JUMBO_FRAME_SIZE 9216 -#define MAX_ETYPE_FILTER (4 - 1) +#define MAX_ETYPE_FILTER 8 #define IGC_RETA_SIZE 128 +enum igc_mac_filter_type { + IGC_MAC_FILTER_TYPE_DST = 0, + IGC_MAC_FILTER_TYPE_SRC +}; + struct igc_tx_queue_stats { u64 packets; u64 bytes; @@ -183,21 +187,20 @@ struct igc_adapter { u32 rss_queues; u32 rss_indir_tbl_init; - /* RX network flow classification support */ - struct hlist_head nfc_filter_list; - unsigned int nfc_filter_count; - - /* lock for RX network flow classification filter */ - spinlock_t nfc_lock; - bool etype_bitmap[MAX_ETYPE_FILTER]; - - struct igc_mac_addr *mac_table; + /* Any access to elements in nfc_rule_list is protected by the + * nfc_rule_lock. + */ + spinlock_t nfc_rule_lock; + struct hlist_head nfc_rule_list; + unsigned int nfc_rule_count; u8 rss_indir_tbl[IGC_RETA_SIZE]; unsigned long link_check_timeout; struct igc_info ei; + u32 test_icr; + struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_caps; struct work_struct ptp_tx_work; @@ -215,6 +218,8 @@ struct igc_adapter { void igc_up(struct igc_adapter *adapter); void igc_down(struct igc_adapter *adapter); +int igc_open(struct net_device *netdev); +int igc_close(struct net_device *netdev); int igc_setup_tx_resources(struct igc_ring *ring); int igc_setup_rx_resources(struct igc_ring *ring); void igc_free_tx_resources(struct igc_ring *ring); @@ -227,10 +232,16 @@ void igc_write_rss_indir_tbl(struct igc_adapter *adapter); bool igc_has_link(struct igc_adapter *adapter); void igc_reset(struct igc_adapter *adapter); int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx); -int igc_add_mac_filter(struct igc_adapter *adapter, const u8 *addr, - const s8 queue, const u8 flags); -int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr, - const u8 flags); +int igc_add_mac_filter(struct igc_adapter *adapter, + enum igc_mac_filter_type type, const u8 *addr, + int queue); +int igc_del_mac_filter(struct igc_adapter *adapter, + enum igc_mac_filter_type type, const u8 *addr); +int igc_add_vlan_prio_filter(struct igc_adapter *adapter, int prio, + int queue); +void igc_del_vlan_prio_filter(struct igc_adapter *adapter, int prio); +int igc_add_etype_filter(struct igc_adapter *adapter, u16 etype, int queue); +int igc_del_etype_filter(struct igc_adapter *adapter, u16 etype); void igc_update_stats(struct igc_adapter *adapter); /* igc_dump declarations */ @@ -441,40 +452,22 @@ enum igc_filter_match_flags { IGC_FILTER_FLAG_DST_MAC_ADDR = 0x8, }; -/* RX network flow classification data structure */ -struct igc_nfc_input { - /* Byte layout in order, all values with MSB first: - * match_flags - 1 byte - * etype - 2 bytes - * vlan_tci - 2 bytes - */ +struct igc_nfc_filter { u8 match_flags; - __be16 etype; - __be16 vlan_tci; + u16 etype; + u16 vlan_tci; u8 src_addr[ETH_ALEN]; u8 dst_addr[ETH_ALEN]; }; -struct igc_nfc_filter { +struct igc_nfc_rule { struct hlist_node nfc_node; - struct igc_nfc_input filter; - unsigned long cookie; - u16 etype_reg_index; + struct igc_nfc_filter filter; u16 sw_idx; u16 action; }; -struct igc_mac_addr { - u8 addr[ETH_ALEN]; - s8 queue; - u8 state; /* bitmask */ -}; - -#define IGC_MAC_STATE_DEFAULT 0x1 -#define IGC_MAC_STATE_IN_USE 0x2 -#define IGC_MAC_STATE_SRC_ADDR 0x4 - -#define IGC_MAX_RXNFC_FILTERS 16 +#define IGC_MAX_RXNFC_RULES 16 /* igc_desc_unused - calculate if we have unused descriptors */ static inline u16 igc_desc_unused(const struct igc_ring *ring) @@ -550,12 +543,11 @@ static inline s32 igc_read_phy_reg(struct igc_hw *hw, u32 offset, u16 *data) return 0; } -/* forward declaration */ void igc_reinit_locked(struct igc_adapter *); -int igc_add_filter(struct igc_adapter *adapter, - struct igc_nfc_filter *input); -int igc_erase_filter(struct igc_adapter *adapter, - struct igc_nfc_filter *input); +int igc_enable_nfc_rule(struct igc_adapter *adapter, + const struct igc_nfc_rule *rule); +int igc_disable_nfc_rule(struct igc_adapter *adapter, + const struct igc_nfc_rule *rule); void igc_ptp_init(struct igc_adapter *adapter); void igc_ptp_reset(struct igc_adapter *adapter); diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c index f7fb18d8d8f5..cc5a6cf531c7 100644 --- a/drivers/net/ethernet/intel/igc/igc_base.c +++ b/drivers/net/ethernet/intel/igc/igc_base.c @@ -26,7 +26,7 @@ static s32 igc_reset_hw_base(struct igc_hw *hw) */ ret_val = igc_disable_pcie_master(hw); if (ret_val) - hw_dbg("PCI-E Master disable polling has failed.\n"); + hw_dbg("PCI-E Master disable polling has failed\n"); hw_dbg("Masking off all interrupts\n"); wr32(IGC_IMC, 0xffffffff); @@ -177,7 +177,7 @@ static s32 igc_init_phy_params_base(struct igc_hw *hw) */ ret_val = hw->phy.ops.reset(hw); if (ret_val) { - hw_dbg("Error resetting the PHY.\n"); + hw_dbg("Error resetting the PHY\n"); goto out; } @@ -367,7 +367,7 @@ void igc_rx_fifo_flush_base(struct igc_hw *hw) } if (ms_wait == 10) - pr_debug("Queue disable timed out after 10ms\n"); + hw_dbg("Queue disable timed out after 10ms\n"); /* Clear RLPML, RCTL.SBP, RFCTL.LEF, and set RCTL.LPE so that all * incoming packets are rejected. Set enable and wait 2ms so that diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index af0c03d77a39..45b567587ca9 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -47,7 +47,6 @@ /* Loop limit on how long we wait for auto-negotiation to complete */ #define COPPER_LINK_UP_LIMIT 10 #define PHY_AUTO_NEG_LIMIT 45 -#define PHY_FORCE_LIMIT 20 /* Number of 100 microseconds we wait for PCI Express master disable */ #define MASTER_DISABLE_TIMEOUT 800 @@ -63,6 +62,9 @@ * (RAR[15]) for our directed address used by controllers with * manageability enabled, allowing us room for 15 multicast addresses. */ +#define IGC_RAH_RAH_MASK 0x0000FFFF +#define IGC_RAH_ASEL_MASK 0x00030000 +#define IGC_RAH_ASEL_SRC_ADDR BIT(16) #define IGC_RAH_QSEL_MASK 0x000C0000 #define IGC_RAH_QSEL_SHIFT 18 #define IGC_RAH_QSEL_ENABLE BIT(28) @@ -164,11 +166,6 @@ /* For checksumming, the sum of all words in the NVM should equal 0xBABA. */ #define NVM_SUM 0xBABA - -#define NVM_PBA_OFFSET_0 8 -#define NVM_PBA_OFFSET_1 9 -#define NVM_RESERVED_WORD 0xFFFF -#define NVM_PBA_PTR_GUARD 0xFAFA #define NVM_WORD_SIZE_BASE_SHIFT 6 /* Collision related configuration parameters */ @@ -250,7 +247,6 @@ /* Interrupt Cause Set */ #define IGC_ICS_LSC IGC_ICR_LSC /* Link Status Change */ #define IGC_ICS_RXDMT0 IGC_ICR_RXDMT0 /* rx desc min. threshold */ -#define IGC_ICS_DRSTA IGC_ICR_DRSTA /* Device Reset Aserted */ #define IGC_ICR_DOUTSYNC 0x10000000 /* NIC DMA out of sync */ #define IGC_EITR_CNT_IGNR 0x80000000 /* Don't reset counters on write */ @@ -390,9 +386,6 @@ #define IGC_TSICR_INTERRUPTS IGC_TSICR_TXTS -/* PTP Queue Filter */ -#define IGC_ETQF_1588 BIT(30) - #define IGC_FTQF_VF_BP 0x00008000 #define IGC_FTQF_1588_TIME_STAMP 0x08000000 #define IGC_FTQF_MASK 0xF0000000 @@ -514,9 +507,9 @@ #define IGC_MAX_MAC_HDR_LEN 127 #define IGC_MAX_NETWORK_HDR_LEN 511 -#define IGC_VLAPQF_QUEUE_SEL(_n, q_idx) ((q_idx) << ((_n) * 4)) -#define IGC_VLAPQF_P_VALID(_n) (0x1 << (3 + (_n) * 4)) -#define IGC_VLAPQF_QUEUE_MASK 0x03 +#define IGC_VLANPQF_QSEL(_n, q_idx) ((q_idx) << ((_n) * 4)) +#define IGC_VLANPQF_VALID(_n) (0x1 << (3 + (_n) * 4)) +#define IGC_VLANPQF_QUEUE_MASK 0x03 #define IGC_ADVTXD_MACLEN_SHIFT 9 /* Adv ctxt desc mac len shift */ #define IGC_ADVTXD_TUCMD_IPV4 0x00000400 /* IP Packet Type:1=IPv4 */ diff --git a/drivers/net/ethernet/intel/igc/igc_diag.c b/drivers/net/ethernet/intel/igc/igc_diag.c new file mode 100644 index 000000000000..cc621970c0cd --- /dev/null +++ b/drivers/net/ethernet/intel/igc/igc_diag.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Intel Corporation */ + +#include "igc.h" +#include "igc_diag.h" + +static struct igc_reg_test reg_test[] = { + { IGC_FCAL, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { IGC_FCAH, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, + { IGC_FCT, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, + { IGC_RDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { IGC_RDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFF80 }, + { IGC_RDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, + { IGC_RDT(0), 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { IGC_FCRTH, 1, PATTERN_TEST, 0x0003FFF0, 0x0003FFF0 }, + { IGC_FCTTV, 1, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { IGC_TIPG, 1, PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF }, + { IGC_TDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { IGC_TDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFF80 }, + { IGC_TDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, + { IGC_TDT(0), 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { IGC_RCTL, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, + { IGC_RCTL, 1, SET_READ_TEST, 0x04CFB2FE, 0x003FFFFB }, + { IGC_RCTL, 1, SET_READ_TEST, 0x04CFB2FE, 0xFFFFFFFF }, + { IGC_TCTL, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, + { IGC_RA, 16, TABLE64_TEST_LO, + 0xFFFFFFFF, 0xFFFFFFFF }, + { IGC_RA, 16, TABLE64_TEST_HI, + 0x900FFFFF, 0xFFFFFFFF }, + { IGC_MTA, 128, TABLE32_TEST, + 0xFFFFFFFF, 0xFFFFFFFF }, + { 0, 0, 0, 0} +}; + +static bool reg_pattern_test(struct igc_adapter *adapter, u64 *data, int reg, + u32 mask, u32 write) +{ + struct igc_hw *hw = &adapter->hw; + u32 pat, val, before; + static const u32 test_pattern[] = { + 0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF + }; + + for (pat = 0; pat < ARRAY_SIZE(test_pattern); pat++) { + before = rd32(reg); + wr32(reg, test_pattern[pat] & write); + val = rd32(reg); + if (val != (test_pattern[pat] & write & mask)) { + netdev_err(adapter->netdev, + "pattern test reg %04X failed: got 0x%08X expected 0x%08X", + reg, val, test_pattern[pat] & write & mask); + *data = reg; + wr32(reg, before); + return false; + } + wr32(reg, before); + } + return true; +} + +static bool reg_set_and_check(struct igc_adapter *adapter, u64 *data, int reg, + u32 mask, u32 write) +{ + struct igc_hw *hw = &adapter->hw; + u32 val, before; + + before = rd32(reg); + wr32(reg, write & mask); + val = rd32(reg); + if ((write & mask) != (val & mask)) { + netdev_err(adapter->netdev, + "set/check reg %04X test failed: got 0x%08X expected 0x%08X", + reg, (val & mask), (write & mask)); + *data = reg; + wr32(reg, before); + return false; + } + wr32(reg, before); + return true; +} + +bool igc_reg_test(struct igc_adapter *adapter, u64 *data) +{ + struct igc_reg_test *test = reg_test; + struct igc_hw *hw = &adapter->hw; + u32 value, before, after; + u32 i, toggle, b = false; + + /* Because the status register is such a special case, + * we handle it separately from the rest of the register + * tests. Some bits are read-only, some toggle, and some + * are writeable. + */ + toggle = 0x6800D3; + before = rd32(IGC_STATUS); + value = before & toggle; + wr32(IGC_STATUS, toggle); + after = rd32(IGC_STATUS) & toggle; + if (value != after) { + netdev_err(adapter->netdev, + "failed STATUS register test got: 0x%08X expected: 0x%08X", + after, value); + *data = 1; + return false; + } + /* restore previous status */ + wr32(IGC_STATUS, before); + + /* Perform the remainder of the register test, looping through + * the test table until we either fail or reach the null entry. + */ + while (test->reg) { + for (i = 0; i < test->array_len; i++) { + switch (test->test_type) { + case PATTERN_TEST: + b = reg_pattern_test(adapter, data, + test->reg + (i * 0x40), + test->mask, + test->write); + break; + case SET_READ_TEST: + b = reg_set_and_check(adapter, data, + test->reg + (i * 0x40), + test->mask, + test->write); + break; + case TABLE64_TEST_LO: + b = reg_pattern_test(adapter, data, + test->reg + (i * 8), + test->mask, + test->write); + break; + case TABLE64_TEST_HI: + b = reg_pattern_test(adapter, data, + test->reg + 4 + (i * 8), + test->mask, + test->write); + break; + case TABLE32_TEST: + b = reg_pattern_test(adapter, data, + test->reg + (i * 4), + test->mask, + test->write); + break; + } + if (!b) + return false; + } + test++; + } + *data = 0; + return true; +} + +bool igc_eeprom_test(struct igc_adapter *adapter, u64 *data) +{ + struct igc_hw *hw = &adapter->hw; + + *data = 0; + + if (hw->nvm.ops.validate(hw) != IGC_SUCCESS) { + *data = 1; + return false; + } + + return true; +} + +bool igc_link_test(struct igc_adapter *adapter, u64 *data) +{ + bool link_up; + + *data = 0; + + /* add delay to give enough time for autonegotioation to finish */ + if (adapter->hw.mac.autoneg) + ssleep(5); + + link_up = igc_has_link(adapter); + if (!link_up) { + *data = 1; + return false; + } + + return true; +} diff --git a/drivers/net/ethernet/intel/igc/igc_diag.h b/drivers/net/ethernet/intel/igc/igc_diag.h new file mode 100644 index 000000000000..600658e33bec --- /dev/null +++ b/drivers/net/ethernet/intel/igc/igc_diag.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2020 Intel Corporation */ + +bool igc_reg_test(struct igc_adapter *adapter, u64 *data); +bool igc_eeprom_test(struct igc_adapter *adapter, u64 *data); +bool igc_link_test(struct igc_adapter *adapter, u64 *data); + +struct igc_reg_test { + u16 reg; + u8 array_len; + u8 test_type; + u32 mask; + u32 write; +}; + +/* In the hardware, registers are laid out either singly, in arrays + * spaced 0x40 bytes apart, or in contiguous tables. We assume + * most tests take place on arrays or single registers (handled + * as a single-element array) and special-case the tables. + * Table tests are always pattern tests. + * + * We also make provision for some required setup steps by specifying + * registers to be written without any read-back testing. + */ + +#define PATTERN_TEST 1 +#define SET_READ_TEST 2 +#define TABLE32_TEST 3 +#define TABLE64_TEST_LO 4 +#define TABLE64_TEST_HI 5 diff --git a/drivers/net/ethernet/intel/igc/igc_dump.c b/drivers/net/ethernet/intel/igc/igc_dump.c index 657ab50ae296..4b9ec7d0b727 100644 --- a/drivers/net/ethernet/intel/igc/igc_dump.c +++ b/drivers/net/ethernet/intel/igc/igc_dump.c @@ -35,10 +35,6 @@ static const struct igc_reg_info igc_reg_info_tbl[] = { {IGC_TDH(0), "TDH"}, {IGC_TDT(0), "TDT"}, {IGC_TXDCTL(0), "TXDCTL"}, - {IGC_TDFH, "TDFH"}, - {IGC_TDFT, "TDFT"}, - {IGC_TDFHS, "TDFHS"}, - {IGC_TDFPC, "TDFPC"}, /* List Terminator */ {} @@ -47,6 +43,7 @@ static const struct igc_reg_info igc_reg_info_tbl[] = { /* igc_regdump - register printout routine */ static void igc_regdump(struct igc_hw *hw, struct igc_reg_info *reginfo) { + struct net_device *dev = igc_get_hw_dev(hw); int n = 0; char rname[16]; u32 regs[8]; @@ -101,13 +98,14 @@ static void igc_regdump(struct igc_hw *hw, struct igc_reg_info *reginfo) regs[n] = rd32(IGC_TXDCTL(n)); break; default: - pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs)); + netdev_info(dev, "%-15s %08x\n", reginfo->name, + rd32(reginfo->ofs)); return; } snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]"); - pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1], - regs[2], regs[3]); + netdev_info(dev, "%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1], + regs[2], regs[3]); } /* igc_rings_dump - Tx-rings and Rx-rings */ @@ -125,39 +123,34 @@ void igc_rings_dump(struct igc_adapter *adapter) if (!netif_msg_hw(adapter)) return; - /* Print netdevice Info */ - if (netdev) { - dev_info(&adapter->pdev->dev, "Net device Info\n"); - pr_info("Device Name state trans_start\n"); - pr_info("%-15s %016lX %016lX\n", netdev->name, - netdev->state, dev_trans_start(netdev)); - } + netdev_info(netdev, "Device info: state %016lX trans_start %016lX\n", + netdev->state, dev_trans_start(netdev)); /* Print TX Ring Summary */ - if (!netdev || !netif_running(netdev)) + if (!netif_running(netdev)) goto exit; - dev_info(&adapter->pdev->dev, "TX Rings Summary\n"); - pr_info("Queue [NTU] [NTC] [bi(ntc)->dma ] leng ntw timestamp\n"); + netdev_info(netdev, "TX Rings Summary\n"); + netdev_info(netdev, "Queue [NTU] [NTC] [bi(ntc)->dma ] leng ntw timestamp\n"); for (n = 0; n < adapter->num_tx_queues; n++) { struct igc_tx_buffer *buffer_info; tx_ring = adapter->tx_ring[n]; buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean]; - pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n", - n, tx_ring->next_to_use, tx_ring->next_to_clean, - (u64)dma_unmap_addr(buffer_info, dma), - dma_unmap_len(buffer_info, len), - buffer_info->next_to_watch, - (u64)buffer_info->time_stamp); + netdev_info(netdev, "%5d %5X %5X %016llX %04X %p %016llX\n", + n, tx_ring->next_to_use, tx_ring->next_to_clean, + (u64)dma_unmap_addr(buffer_info, dma), + dma_unmap_len(buffer_info, len), + buffer_info->next_to_watch, + (u64)buffer_info->time_stamp); } /* Print TX Rings */ if (!netif_msg_tx_done(adapter)) goto rx_ring_summary; - dev_info(&adapter->pdev->dev, "TX Rings Dump\n"); + netdev_info(netdev, "TX Rings Dump\n"); /* Transmit Descriptor Formats * @@ -172,10 +165,11 @@ void igc_rings_dump(struct igc_adapter *adapter) for (n = 0; n < adapter->num_tx_queues; n++) { tx_ring = adapter->tx_ring[n]; - pr_info("------------------------------------\n"); - pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index); - pr_info("------------------------------------\n"); - pr_info("T [desc] [address 63:0 ] [PlPOCIStDDM Ln] [bi->dma ] leng ntw timestamp bi->skb\n"); + netdev_info(netdev, "------------------------------------\n"); + netdev_info(netdev, "TX QUEUE INDEX = %d\n", + tx_ring->queue_index); + netdev_info(netdev, "------------------------------------\n"); + netdev_info(netdev, "T [desc] [address 63:0 ] [PlPOCIStDDM Ln] [bi->dma ] leng ntw timestamp bi->skb\n"); for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) { const char *next_desc; @@ -194,14 +188,14 @@ void igc_rings_dump(struct igc_adapter *adapter) else next_desc = ""; - pr_info("T [0x%03X] %016llX %016llX %016llX %04X %p %016llX %p%s\n", - i, le64_to_cpu(u0->a), - le64_to_cpu(u0->b), - (u64)dma_unmap_addr(buffer_info, dma), - dma_unmap_len(buffer_info, len), - buffer_info->next_to_watch, - (u64)buffer_info->time_stamp, - buffer_info->skb, next_desc); + netdev_info(netdev, "T [0x%03X] %016llX %016llX %016llX %04X %p %016llX %p%s\n", + i, le64_to_cpu(u0->a), + le64_to_cpu(u0->b), + (u64)dma_unmap_addr(buffer_info, dma), + dma_unmap_len(buffer_info, len), + buffer_info->next_to_watch, + (u64)buffer_info->time_stamp, + buffer_info->skb, next_desc); if (netif_msg_pktdata(adapter) && buffer_info->skb) print_hex_dump(KERN_INFO, "", @@ -214,19 +208,19 @@ void igc_rings_dump(struct igc_adapter *adapter) /* Print RX Rings Summary */ rx_ring_summary: - dev_info(&adapter->pdev->dev, "RX Rings Summary\n"); - pr_info("Queue [NTU] [NTC]\n"); + netdev_info(netdev, "RX Rings Summary\n"); + netdev_info(netdev, "Queue [NTU] [NTC]\n"); for (n = 0; n < adapter->num_rx_queues; n++) { rx_ring = adapter->rx_ring[n]; - pr_info(" %5d %5X %5X\n", - n, rx_ring->next_to_use, rx_ring->next_to_clean); + netdev_info(netdev, "%5d %5X %5X\n", n, rx_ring->next_to_use, + rx_ring->next_to_clean); } /* Print RX Rings */ if (!netif_msg_rx_status(adapter)) goto exit; - dev_info(&adapter->pdev->dev, "RX Rings Dump\n"); + netdev_info(netdev, "RX Rings Dump\n"); /* Advanced Receive Descriptor (Read) Format * 63 1 0 @@ -251,11 +245,12 @@ rx_ring_summary: for (n = 0; n < adapter->num_rx_queues; n++) { rx_ring = adapter->rx_ring[n]; - pr_info("------------------------------------\n"); - pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index); - pr_info("------------------------------------\n"); - pr_info("R [desc] [ PktBuf A0] [ HeadBuf DD] [bi->dma ] [bi->skb] <-- Adv Rx Read format\n"); - pr_info("RWB[desc] [PcsmIpSHl PtRs] [vl er S cks ln] ---------------- [bi->skb] <-- Adv Rx Write-Back format\n"); + netdev_info(netdev, "------------------------------------\n"); + netdev_info(netdev, "RX QUEUE INDEX = %d\n", + rx_ring->queue_index); + netdev_info(netdev, "------------------------------------\n"); + netdev_info(netdev, "R [desc] [ PktBuf A0] [ HeadBuf DD] [bi->dma ] [bi->skb] <-- Adv Rx Read format\n"); + netdev_info(netdev, "RWB[desc] [PcsmIpSHl PtRs] [vl er S cks ln] ---------------- [bi->skb] <-- Adv Rx Write-Back format\n"); for (i = 0; i < rx_ring->count; i++) { const char *next_desc; @@ -275,18 +270,18 @@ rx_ring_summary: if (staterr & IGC_RXD_STAT_DD) { /* Descriptor Done */ - pr_info("%s[0x%03X] %016llX %016llX ---------------- %s\n", - "RWB", i, - le64_to_cpu(u0->a), - le64_to_cpu(u0->b), - next_desc); + netdev_info(netdev, "%s[0x%03X] %016llX %016llX ---------------- %s\n", + "RWB", i, + le64_to_cpu(u0->a), + le64_to_cpu(u0->b), + next_desc); } else { - pr_info("%s[0x%03X] %016llX %016llX %016llX %s\n", - "R ", i, - le64_to_cpu(u0->a), - le64_to_cpu(u0->b), - (u64)buffer_info->dma, - next_desc); + netdev_info(netdev, "%s[0x%03X] %016llX %016llX %016llX %s\n", + "R ", i, + le64_to_cpu(u0->a), + le64_to_cpu(u0->b), + (u64)buffer_info->dma, + next_desc); if (netif_msg_pktdata(adapter) && buffer_info->dma && buffer_info->page) { @@ -314,8 +309,8 @@ void igc_regs_dump(struct igc_adapter *adapter) struct igc_reg_info *reginfo; /* Print Registers */ - dev_info(&adapter->pdev->dev, "Register Dump\n"); - pr_info(" Register Name Value\n"); + netdev_info(adapter->netdev, "Register Dump\n"); + netdev_info(adapter->netdev, "Register Name Value\n"); for (reginfo = (struct igc_reg_info *)igc_reg_info_tbl; reginfo->name; reginfo++) { igc_regdump(hw, reginfo); diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 0a8c4a7412a4..66e0760a8f9e 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -6,6 +6,7 @@ #include <linux/pm_runtime.h> #include "igc.h" +#include "igc_diag.h" /* forward declaration */ struct igc_stats { @@ -123,8 +124,8 @@ static const char igc_priv_flags_strings[][ETH_GSTRING_LEN] = { #define IGC_PRIV_FLAGS_STR_LEN ARRAY_SIZE(igc_priv_flags_strings) -static void igc_get_drvinfo(struct net_device *netdev, - struct ethtool_drvinfo *drvinfo) +static void igc_ethtool_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *drvinfo) { struct igc_adapter *adapter = netdev_priv(netdev); @@ -138,13 +139,13 @@ static void igc_get_drvinfo(struct net_device *netdev, drvinfo->n_priv_flags = IGC_PRIV_FLAGS_STR_LEN; } -static int igc_get_regs_len(struct net_device *netdev) +static int igc_ethtool_get_regs_len(struct net_device *netdev) { return IGC_REGS_LEN * sizeof(u32); } -static void igc_get_regs(struct net_device *netdev, - struct ethtool_regs *regs, void *p) +static void igc_ethtool_get_regs(struct net_device *netdev, + struct ethtool_regs *regs, void *p) { struct igc_adapter *adapter = netdev_priv(netdev); struct igc_hw *hw = &adapter->hw; @@ -315,9 +316,15 @@ static void igc_get_regs(struct net_device *netdev, regs_buff[172 + i] = rd32(IGC_RAL(i)); for (i = 0; i < 16; i++) regs_buff[188 + i] = rd32(IGC_RAH(i)); + + regs_buff[204] = rd32(IGC_VLANPQF); + + for (i = 0; i < 8; i++) + regs_buff[205 + i] = rd32(IGC_ETQF(i)); } -static void igc_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) +static void igc_ethtool_get_wol(struct net_device *netdev, + struct ethtool_wolinfo *wol) { struct igc_adapter *adapter = netdev_priv(netdev); @@ -348,7 +355,8 @@ static void igc_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) wol->wolopts |= WAKE_PHY; } -static int igc_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) +static int igc_ethtool_set_wol(struct net_device *netdev, + struct ethtool_wolinfo *wol) { struct igc_adapter *adapter = netdev_priv(netdev); @@ -376,21 +384,21 @@ static int igc_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) return 0; } -static u32 igc_get_msglevel(struct net_device *netdev) +static u32 igc_ethtool_get_msglevel(struct net_device *netdev) { struct igc_adapter *adapter = netdev_priv(netdev); return adapter->msg_enable; } -static void igc_set_msglevel(struct net_device *netdev, u32 data) +static void igc_ethtool_set_msglevel(struct net_device *netdev, u32 data) { struct igc_adapter *adapter = netdev_priv(netdev); adapter->msg_enable = data; } -static int igc_nway_reset(struct net_device *netdev) +static int igc_ethtool_nway_reset(struct net_device *netdev) { struct igc_adapter *adapter = netdev_priv(netdev); @@ -399,7 +407,7 @@ static int igc_nway_reset(struct net_device *netdev) return 0; } -static u32 igc_get_link(struct net_device *netdev) +static u32 igc_ethtool_get_link(struct net_device *netdev) { struct igc_adapter *adapter = netdev_priv(netdev); struct igc_mac_info *mac = &adapter->hw.mac; @@ -416,15 +424,15 @@ static u32 igc_get_link(struct net_device *netdev) return igc_has_link(adapter); } -static int igc_get_eeprom_len(struct net_device *netdev) +static int igc_ethtool_get_eeprom_len(struct net_device *netdev) { struct igc_adapter *adapter = netdev_priv(netdev); return adapter->hw.nvm.word_size * 2; } -static int igc_get_eeprom(struct net_device *netdev, - struct ethtool_eeprom *eeprom, u8 *bytes) +static int igc_ethtool_get_eeprom(struct net_device *netdev, + struct ethtool_eeprom *eeprom, u8 *bytes) { struct igc_adapter *adapter = netdev_priv(netdev); struct igc_hw *hw = &adapter->hw; @@ -470,8 +478,8 @@ static int igc_get_eeprom(struct net_device *netdev, return ret_val; } -static int igc_set_eeprom(struct net_device *netdev, - struct ethtool_eeprom *eeprom, u8 *bytes) +static int igc_ethtool_set_eeprom(struct net_device *netdev, + struct ethtool_eeprom *eeprom, u8 *bytes) { struct igc_adapter *adapter = netdev_priv(netdev); struct igc_hw *hw = &adapter->hw; @@ -538,8 +546,8 @@ static int igc_set_eeprom(struct net_device *netdev, return ret_val; } -static void igc_get_ringparam(struct net_device *netdev, - struct ethtool_ringparam *ring) +static void igc_ethtool_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) { struct igc_adapter *adapter = netdev_priv(netdev); @@ -549,8 +557,8 @@ static void igc_get_ringparam(struct net_device *netdev, ring->tx_pending = adapter->tx_ring_count; } -static int igc_set_ringparam(struct net_device *netdev, - struct ethtool_ringparam *ring) +static int igc_ethtool_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) { struct igc_adapter *adapter = netdev_priv(netdev); struct igc_ring *temp_ring; @@ -664,8 +672,8 @@ clear_reset: return err; } -static void igc_get_pauseparam(struct net_device *netdev, - struct ethtool_pauseparam *pause) +static void igc_ethtool_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) { struct igc_adapter *adapter = netdev_priv(netdev); struct igc_hw *hw = &adapter->hw; @@ -683,8 +691,8 @@ static void igc_get_pauseparam(struct net_device *netdev, } } -static int igc_set_pauseparam(struct net_device *netdev, - struct ethtool_pauseparam *pause) +static int igc_ethtool_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) { struct igc_adapter *adapter = netdev_priv(netdev); struct igc_hw *hw = &adapter->hw; @@ -723,7 +731,8 @@ static int igc_set_pauseparam(struct net_device *netdev, return retval; } -static void igc_get_strings(struct net_device *netdev, u32 stringset, u8 *data) +static void igc_ethtool_get_strings(struct net_device *netdev, u32 stringset, + u8 *data) { struct igc_adapter *adapter = netdev_priv(netdev); u8 *p = data; @@ -774,7 +783,7 @@ static void igc_get_strings(struct net_device *netdev, u32 stringset, u8 *data) } } -static int igc_get_sset_count(struct net_device *netdev, int sset) +static int igc_ethtool_get_sset_count(struct net_device *netdev, int sset) { switch (sset) { case ETH_SS_STATS: @@ -788,7 +797,7 @@ static int igc_get_sset_count(struct net_device *netdev, int sset) } } -static void igc_get_ethtool_stats(struct net_device *netdev, +static void igc_ethtool_get_stats(struct net_device *netdev, struct ethtool_stats *stats, u64 *data) { struct igc_adapter *adapter = netdev_priv(netdev); @@ -844,8 +853,8 @@ static void igc_get_ethtool_stats(struct net_device *netdev, spin_unlock(&adapter->stats64_lock); } -static int igc_get_coalesce(struct net_device *netdev, - struct ethtool_coalesce *ec) +static int igc_ethtool_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) { struct igc_adapter *adapter = netdev_priv(netdev); @@ -864,8 +873,8 @@ static int igc_get_coalesce(struct net_device *netdev, return 0; } -static int igc_set_coalesce(struct net_device *netdev, - struct ethtool_coalesce *ec) +static int igc_ethtool_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) { struct igc_adapter *adapter = netdev_priv(netdev); int i; @@ -922,16 +931,15 @@ static int igc_set_coalesce(struct net_device *netdev, } #define ETHER_TYPE_FULL_MASK ((__force __be16)~0) -static int igc_get_ethtool_nfc_entry(struct igc_adapter *adapter, - struct ethtool_rxnfc *cmd) +static int igc_ethtool_get_nfc_rule(struct igc_adapter *adapter, + struct ethtool_rxnfc *cmd) { struct ethtool_rx_flow_spec *fsp = &cmd->fs; - struct igc_nfc_filter *rule = NULL; + struct igc_nfc_rule *rule = NULL; - /* report total rule count */ - cmd->data = IGC_MAX_RXNFC_FILTERS; + cmd->data = IGC_MAX_RXNFC_RULES; - hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) { + hlist_for_each_entry(rule, &adapter->nfc_rule_list, nfc_node) { if (fsp->location <= rule->sw_idx) break; } @@ -939,51 +947,48 @@ static int igc_get_ethtool_nfc_entry(struct igc_adapter *adapter, if (!rule || fsp->location != rule->sw_idx) return -EINVAL; - if (rule->filter.match_flags) { - fsp->flow_type = ETHER_FLOW; - fsp->ring_cookie = rule->action; - if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) { - fsp->h_u.ether_spec.h_proto = rule->filter.etype; - fsp->m_u.ether_spec.h_proto = ETHER_TYPE_FULL_MASK; - } - if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) { - fsp->flow_type |= FLOW_EXT; - fsp->h_ext.vlan_tci = rule->filter.vlan_tci; - fsp->m_ext.vlan_tci = htons(VLAN_PRIO_MASK); - } - if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) { - ether_addr_copy(fsp->h_u.ether_spec.h_dest, - rule->filter.dst_addr); - /* As we only support matching by the full - * mask, return the mask to userspace - */ - eth_broadcast_addr(fsp->m_u.ether_spec.h_dest); - } - if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) { - ether_addr_copy(fsp->h_u.ether_spec.h_source, - rule->filter.src_addr); - /* As we only support matching by the full - * mask, return the mask to userspace - */ - eth_broadcast_addr(fsp->m_u.ether_spec.h_source); - } + if (!rule->filter.match_flags) + return -EINVAL; - return 0; + fsp->flow_type = ETHER_FLOW; + fsp->ring_cookie = rule->action; + + if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) { + fsp->h_u.ether_spec.h_proto = htons(rule->filter.etype); + fsp->m_u.ether_spec.h_proto = ETHER_TYPE_FULL_MASK; + } + + if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) { + fsp->flow_type |= FLOW_EXT; + fsp->h_ext.vlan_tci = htons(rule->filter.vlan_tci); + fsp->m_ext.vlan_tci = htons(VLAN_PRIO_MASK); + } + + if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) { + ether_addr_copy(fsp->h_u.ether_spec.h_dest, + rule->filter.dst_addr); + eth_broadcast_addr(fsp->m_u.ether_spec.h_dest); } - return -EINVAL; + + if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) { + ether_addr_copy(fsp->h_u.ether_spec.h_source, + rule->filter.src_addr); + eth_broadcast_addr(fsp->m_u.ether_spec.h_source); + } + + return 0; } -static int igc_get_ethtool_nfc_all(struct igc_adapter *adapter, - struct ethtool_rxnfc *cmd, - u32 *rule_locs) +static int igc_ethtool_get_nfc_rules(struct igc_adapter *adapter, + struct ethtool_rxnfc *cmd, + u32 *rule_locs) { - struct igc_nfc_filter *rule; + struct igc_nfc_rule *rule; int cnt = 0; - /* report total rule count */ - cmd->data = IGC_MAX_RXNFC_FILTERS; + cmd->data = IGC_MAX_RXNFC_RULES; - hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) { + hlist_for_each_entry(rule, &adapter->nfc_rule_list, nfc_node) { if (cnt == cmd->rule_cnt) return -EMSGSIZE; rule_locs[cnt] = rule->sw_idx; @@ -995,8 +1000,8 @@ static int igc_get_ethtool_nfc_all(struct igc_adapter *adapter, return 0; } -static int igc_get_rss_hash_opts(struct igc_adapter *adapter, - struct ethtool_rxnfc *cmd) +static int igc_ethtool_get_rss_hash_opts(struct igc_adapter *adapter, + struct ethtool_rxnfc *cmd) { cmd->data = 0; @@ -1045,41 +1050,33 @@ static int igc_get_rss_hash_opts(struct igc_adapter *adapter, return 0; } -static int igc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, - u32 *rule_locs) +static int igc_ethtool_get_rxnfc(struct net_device *dev, + struct ethtool_rxnfc *cmd, u32 *rule_locs) { struct igc_adapter *adapter = netdev_priv(dev); - int ret = -EOPNOTSUPP; switch (cmd->cmd) { case ETHTOOL_GRXRINGS: cmd->data = adapter->num_rx_queues; - ret = 0; - break; + return 0; case ETHTOOL_GRXCLSRLCNT: - cmd->rule_cnt = adapter->nfc_filter_count; - ret = 0; - break; + cmd->rule_cnt = adapter->nfc_rule_count; + return 0; case ETHTOOL_GRXCLSRULE: - ret = igc_get_ethtool_nfc_entry(adapter, cmd); - break; + return igc_ethtool_get_nfc_rule(adapter, cmd); case ETHTOOL_GRXCLSRLALL: - ret = igc_get_ethtool_nfc_all(adapter, cmd, rule_locs); - break; + return igc_ethtool_get_nfc_rules(adapter, cmd, rule_locs); case ETHTOOL_GRXFH: - ret = igc_get_rss_hash_opts(adapter, cmd); - break; + return igc_ethtool_get_rss_hash_opts(adapter, cmd); default: - break; + return -EOPNOTSUPP; } - - return ret; } #define UDP_RSS_FLAGS (IGC_FLAG_RSS_FIELD_IPV4_UDP | \ IGC_FLAG_RSS_FIELD_IPV6_UDP) -static int igc_set_rss_hash_opt(struct igc_adapter *adapter, - struct ethtool_rxnfc *nfc) +static int igc_ethtool_set_rss_hash_opt(struct igc_adapter *adapter, + struct ethtool_rxnfc *nfc) { u32 flags = adapter->flags; @@ -1154,8 +1151,8 @@ static int igc_set_rss_hash_opt(struct igc_adapter *adapter, if ((flags & UDP_RSS_FLAGS) && !(adapter->flags & UDP_RSS_FLAGS)) - dev_err(&adapter->pdev->dev, - "enabling UDP RSS: fragmented packets may arrive out of order to the stack above\n"); + netdev_err(adapter->netdev, + "Enabling UDP RSS: fragmented packets may arrive out of order to the stack above\n"); adapter->flags = flags; @@ -1180,175 +1177,78 @@ static int igc_set_rss_hash_opt(struct igc_adapter *adapter, return 0; } -static int igc_rxnfc_write_etype_filter(struct igc_adapter *adapter, - struct igc_nfc_filter *input) +int igc_enable_nfc_rule(struct igc_adapter *adapter, + const struct igc_nfc_rule *rule) { - struct igc_hw *hw = &adapter->hw; - u8 i; - u32 etqf; - u16 etype; - - /* find an empty etype filter register */ - for (i = 0; i < MAX_ETYPE_FILTER; ++i) { - if (!adapter->etype_bitmap[i]) - break; - } - if (i == MAX_ETYPE_FILTER) { - dev_err(&adapter->pdev->dev, "ethtool -N: etype filters are all used.\n"); - return -EINVAL; - } - - adapter->etype_bitmap[i] = true; - - etqf = rd32(IGC_ETQF(i)); - etype = ntohs(input->filter.etype & ETHER_TYPE_FULL_MASK); - - etqf |= IGC_ETQF_FILTER_ENABLE; - etqf &= ~IGC_ETQF_ETYPE_MASK; - etqf |= (etype & IGC_ETQF_ETYPE_MASK); - - etqf &= ~IGC_ETQF_QUEUE_MASK; - etqf |= ((input->action << IGC_ETQF_QUEUE_SHIFT) - & IGC_ETQF_QUEUE_MASK); - etqf |= IGC_ETQF_QUEUE_ENABLE; - - wr32(IGC_ETQF(i), etqf); - - input->etype_reg_index = i; - - return 0; -} - -static int igc_rxnfc_write_vlan_prio_filter(struct igc_adapter *adapter, - struct igc_nfc_filter *input) -{ - struct igc_hw *hw = &adapter->hw; - u8 vlan_priority; - u16 queue_index; - u32 vlapqf; - - vlapqf = rd32(IGC_VLAPQF); - vlan_priority = (ntohs(input->filter.vlan_tci) & VLAN_PRIO_MASK) - >> VLAN_PRIO_SHIFT; - queue_index = (vlapqf >> (vlan_priority * 4)) & IGC_VLAPQF_QUEUE_MASK; - - /* check whether this vlan prio is already set */ - if (vlapqf & IGC_VLAPQF_P_VALID(vlan_priority) && - queue_index != input->action) { - dev_err(&adapter->pdev->dev, "ethtool rxnfc set vlan prio filter failed.\n"); - return -EEXIST; - } - - vlapqf |= IGC_VLAPQF_P_VALID(vlan_priority); - vlapqf |= IGC_VLAPQF_QUEUE_SEL(vlan_priority, input->action); - - wr32(IGC_VLAPQF, vlapqf); - - return 0; -} - -int igc_add_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input) -{ - struct igc_hw *hw = &adapter->hw; int err = -EINVAL; - if (hw->mac.type == igc_i225 && - !(input->filter.match_flags & ~IGC_FILTER_FLAG_SRC_MAC_ADDR)) { - dev_err(&adapter->pdev->dev, - "i225 doesn't support flow classification rules specifying only source addresses.\n"); - return -EOPNOTSUPP; - } - - if (input->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) { - err = igc_rxnfc_write_etype_filter(adapter, input); + if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) { + err = igc_add_etype_filter(adapter, rule->filter.etype, + rule->action); if (err) return err; } - if (input->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) { - err = igc_add_mac_filter(adapter, input->filter.dst_addr, - input->action, 0); + if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) { + err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC, + rule->filter.src_addr, rule->action); if (err) return err; } - if (input->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) { - err = igc_add_mac_filter(adapter, input->filter.src_addr, - input->action, - IGC_MAC_STATE_SRC_ADDR); + if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) { + err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, + rule->filter.dst_addr, rule->action); if (err) return err; } - if (input->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) - err = igc_rxnfc_write_vlan_prio_filter(adapter, input); - - return err; -} - -static void igc_clear_etype_filter_regs(struct igc_adapter *adapter, - u16 reg_index) -{ - struct igc_hw *hw = &adapter->hw; - u32 etqf = rd32(IGC_ETQF(reg_index)); - - etqf &= ~IGC_ETQF_QUEUE_ENABLE; - etqf &= ~IGC_ETQF_QUEUE_MASK; - etqf &= ~IGC_ETQF_FILTER_ENABLE; - - wr32(IGC_ETQF(reg_index), etqf); - - adapter->etype_bitmap[reg_index] = false; -} - -static void igc_clear_vlan_prio_filter(struct igc_adapter *adapter, - u16 vlan_tci) -{ - struct igc_hw *hw = &adapter->hw; - u8 vlan_priority; - u32 vlapqf; + if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) { + int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >> + VLAN_PRIO_SHIFT; - vlan_priority = (vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; - - vlapqf = rd32(IGC_VLAPQF); - vlapqf &= ~IGC_VLAPQF_P_VALID(vlan_priority); - vlapqf &= ~IGC_VLAPQF_QUEUE_SEL(vlan_priority, - IGC_VLAPQF_QUEUE_MASK); + err = igc_add_vlan_prio_filter(adapter, prio, rule->action); + if (err) + return err; + } - wr32(IGC_VLAPQF, vlapqf); + return 0; } -int igc_erase_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input) +int igc_disable_nfc_rule(struct igc_adapter *adapter, + const struct igc_nfc_rule *rule) { - if (input->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) - igc_clear_etype_filter_regs(adapter, - input->etype_reg_index); + if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) + igc_del_etype_filter(adapter, rule->filter.etype); - if (input->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) - igc_clear_vlan_prio_filter(adapter, - ntohs(input->filter.vlan_tci)); + if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) { + int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >> + VLAN_PRIO_SHIFT; + igc_del_vlan_prio_filter(adapter, prio); + } - if (input->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) - igc_del_mac_filter(adapter, input->filter.src_addr, - IGC_MAC_STATE_SRC_ADDR); + if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) + igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC, + rule->filter.src_addr); - if (input->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) - igc_del_mac_filter(adapter, input->filter.dst_addr, 0); + if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) + igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, + rule->filter.dst_addr); return 0; } -static int igc_update_ethtool_nfc_entry(struct igc_adapter *adapter, - struct igc_nfc_filter *input, - u16 sw_idx) +static int igc_ethtool_update_nfc_rule(struct igc_adapter *adapter, + struct igc_nfc_rule *input, + u16 sw_idx) { - struct igc_nfc_filter *rule, *parent; + struct igc_nfc_rule *rule, *parent; int err = -EINVAL; parent = NULL; rule = NULL; - hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) { + hlist_for_each_entry(rule, &adapter->nfc_rule_list, nfc_node) { /* hash found, or no matching entry */ if (rule->sw_idx >= sw_idx) break; @@ -1358,11 +1258,11 @@ static int igc_update_ethtool_nfc_entry(struct igc_adapter *adapter, /* if there is an old rule occupying our place remove it */ if (rule && rule->sw_idx == sw_idx) { if (!input) - err = igc_erase_filter(adapter, rule); + err = igc_disable_nfc_rule(adapter, rule); hlist_del(&rule->nfc_node); kfree(rule); - adapter->nfc_filter_count--; + adapter->nfc_rule_count--; } /* If no input this was a delete, err should be 0 if a rule was @@ -1378,21 +1278,21 @@ static int igc_update_ethtool_nfc_entry(struct igc_adapter *adapter, if (parent) hlist_add_behind(&input->nfc_node, &parent->nfc_node); else - hlist_add_head(&input->nfc_node, &adapter->nfc_filter_list); + hlist_add_head(&input->nfc_node, &adapter->nfc_rule_list); /* update counts */ - adapter->nfc_filter_count++; + adapter->nfc_rule_count++; return 0; } -static int igc_add_ethtool_nfc_entry(struct igc_adapter *adapter, - struct ethtool_rxnfc *cmd) +static int igc_ethtool_add_nfc_rule(struct igc_adapter *adapter, + struct ethtool_rxnfc *cmd) { struct net_device *netdev = adapter->netdev; struct ethtool_rx_flow_spec *fsp = (struct ethtool_rx_flow_spec *)&cmd->fs; - struct igc_nfc_filter *input, *rule; + struct igc_nfc_rule *rule, *tmp; int err = 0; if (!(netdev->hw_features & NETIF_F_NTUPLE)) @@ -1403,115 +1303,121 @@ static int igc_add_ethtool_nfc_entry(struct igc_adapter *adapter, */ if (fsp->ring_cookie == RX_CLS_FLOW_DISC || fsp->ring_cookie >= adapter->num_rx_queues) { - dev_err(&adapter->pdev->dev, "ethtool -N: The specified action is invalid\n"); + netdev_err(netdev, + "ethtool -N: The specified action is invalid\n"); return -EINVAL; } /* Don't allow indexes to exist outside of available space */ - if (fsp->location >= IGC_MAX_RXNFC_FILTERS) { - dev_err(&adapter->pdev->dev, "Location out of range\n"); + if (fsp->location >= IGC_MAX_RXNFC_RULES) { + netdev_err(netdev, "Location out of range\n"); return -EINVAL; } if ((fsp->flow_type & ~FLOW_EXT) != ETHER_FLOW) return -EINVAL; - input = kzalloc(sizeof(*input), GFP_KERNEL); - if (!input) + rule = kzalloc(sizeof(*rule), GFP_KERNEL); + if (!rule) return -ENOMEM; if (fsp->m_u.ether_spec.h_proto == ETHER_TYPE_FULL_MASK) { - input->filter.etype = fsp->h_u.ether_spec.h_proto; - input->filter.match_flags = IGC_FILTER_FLAG_ETHER_TYPE; + rule->filter.etype = ntohs(fsp->h_u.ether_spec.h_proto); + rule->filter.match_flags = IGC_FILTER_FLAG_ETHER_TYPE; } - /* Only support matching addresses by the full mask */ + /* Both source and destination address filters only support the full + * mask. + */ if (is_broadcast_ether_addr(fsp->m_u.ether_spec.h_source)) { - input->filter.match_flags |= IGC_FILTER_FLAG_SRC_MAC_ADDR; - ether_addr_copy(input->filter.src_addr, + rule->filter.match_flags |= IGC_FILTER_FLAG_SRC_MAC_ADDR; + ether_addr_copy(rule->filter.src_addr, fsp->h_u.ether_spec.h_source); } - /* Only support matching addresses by the full mask */ if (is_broadcast_ether_addr(fsp->m_u.ether_spec.h_dest)) { - input->filter.match_flags |= IGC_FILTER_FLAG_DST_MAC_ADDR; - ether_addr_copy(input->filter.dst_addr, + rule->filter.match_flags |= IGC_FILTER_FLAG_DST_MAC_ADDR; + ether_addr_copy(rule->filter.dst_addr, fsp->h_u.ether_spec.h_dest); } + if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR && + rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) { + netdev_dbg(netdev, "Filters with both dst and src are not supported\n"); + err = -EOPNOTSUPP; + goto err_out; + } + if ((fsp->flow_type & FLOW_EXT) && fsp->m_ext.vlan_tci) { if (fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK)) { - err = -EINVAL; + netdev_dbg(netdev, "VLAN mask not supported\n"); + err = -EOPNOTSUPP; goto err_out; } - input->filter.vlan_tci = fsp->h_ext.vlan_tci; - input->filter.match_flags |= IGC_FILTER_FLAG_VLAN_TCI; + rule->filter.vlan_tci = ntohs(fsp->h_ext.vlan_tci); + rule->filter.match_flags |= IGC_FILTER_FLAG_VLAN_TCI; } - input->action = fsp->ring_cookie; - input->sw_idx = fsp->location; + rule->action = fsp->ring_cookie; + rule->sw_idx = fsp->location; - spin_lock(&adapter->nfc_lock); + spin_lock(&adapter->nfc_rule_lock); - hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) { - if (!memcmp(&input->filter, &rule->filter, - sizeof(input->filter))) { + hlist_for_each_entry(tmp, &adapter->nfc_rule_list, nfc_node) { + if (!memcmp(&rule->filter, &tmp->filter, + sizeof(rule->filter))) { err = -EEXIST; - dev_err(&adapter->pdev->dev, - "ethtool: this filter is already set\n"); + netdev_err(netdev, + "ethtool: this filter is already set\n"); goto err_out_w_lock; } } - err = igc_add_filter(adapter, input); + err = igc_enable_nfc_rule(adapter, rule); if (err) goto err_out_w_lock; - igc_update_ethtool_nfc_entry(adapter, input, input->sw_idx); + igc_ethtool_update_nfc_rule(adapter, rule, rule->sw_idx); - spin_unlock(&adapter->nfc_lock); + spin_unlock(&adapter->nfc_rule_lock); return 0; err_out_w_lock: - spin_unlock(&adapter->nfc_lock); + spin_unlock(&adapter->nfc_rule_lock); err_out: - kfree(input); + kfree(rule); return err; } -static int igc_del_ethtool_nfc_entry(struct igc_adapter *adapter, - struct ethtool_rxnfc *cmd) +static int igc_ethtool_del_nfc_rule(struct igc_adapter *adapter, + struct ethtool_rxnfc *cmd) { struct ethtool_rx_flow_spec *fsp = (struct ethtool_rx_flow_spec *)&cmd->fs; int err; - spin_lock(&adapter->nfc_lock); - err = igc_update_ethtool_nfc_entry(adapter, NULL, fsp->location); - spin_unlock(&adapter->nfc_lock); + spin_lock(&adapter->nfc_rule_lock); + err = igc_ethtool_update_nfc_rule(adapter, NULL, fsp->location); + spin_unlock(&adapter->nfc_rule_lock); return err; } -static int igc_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +static int igc_ethtool_set_rxnfc(struct net_device *dev, + struct ethtool_rxnfc *cmd) { struct igc_adapter *adapter = netdev_priv(dev); - int ret = -EOPNOTSUPP; switch (cmd->cmd) { case ETHTOOL_SRXFH: - ret = igc_set_rss_hash_opt(adapter, cmd); - break; + return igc_ethtool_set_rss_hash_opt(adapter, cmd); case ETHTOOL_SRXCLSRLINS: - ret = igc_add_ethtool_nfc_entry(adapter, cmd); - break; + return igc_ethtool_add_nfc_rule(adapter, cmd); case ETHTOOL_SRXCLSRLDEL: - ret = igc_del_ethtool_nfc_entry(adapter, cmd); + return igc_ethtool_del_nfc_rule(adapter, cmd); default: - break; + return -EOPNOTSUPP; } - - return ret; } void igc_write_rss_indir_tbl(struct igc_adapter *adapter) @@ -1536,13 +1442,13 @@ void igc_write_rss_indir_tbl(struct igc_adapter *adapter) } } -static u32 igc_get_rxfh_indir_size(struct net_device *netdev) +static u32 igc_ethtool_get_rxfh_indir_size(struct net_device *netdev) { return IGC_RETA_SIZE; } -static int igc_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, - u8 *hfunc) +static int igc_ethtool_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) { struct igc_adapter *adapter = netdev_priv(netdev); int i; @@ -1557,8 +1463,8 @@ static int igc_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, return 0; } -static int igc_set_rxfh(struct net_device *netdev, const u32 *indir, - const u8 *key, const u8 hfunc) +static int igc_ethtool_set_rxfh(struct net_device *netdev, const u32 *indir, + const u8 *key, const u8 hfunc) { struct igc_adapter *adapter = netdev_priv(netdev); u32 num_queues; @@ -1586,18 +1492,13 @@ static int igc_set_rxfh(struct net_device *netdev, const u32 *indir, return 0; } -static unsigned int igc_max_channels(struct igc_adapter *adapter) -{ - return igc_get_max_rss_queues(adapter); -} - -static void igc_get_channels(struct net_device *netdev, - struct ethtool_channels *ch) +static void igc_ethtool_get_channels(struct net_device *netdev, + struct ethtool_channels *ch) { struct igc_adapter *adapter = netdev_priv(netdev); /* Report maximum channels */ - ch->max_combined = igc_max_channels(adapter); + ch->max_combined = igc_get_max_rss_queues(adapter); /* Report info for other vector */ if (adapter->flags & IGC_FLAG_HAS_MSIX) { @@ -1608,8 +1509,8 @@ static void igc_get_channels(struct net_device *netdev, ch->combined_count = adapter->rss_queues; } -static int igc_set_channels(struct net_device *netdev, - struct ethtool_channels *ch) +static int igc_ethtool_set_channels(struct net_device *netdev, + struct ethtool_channels *ch) { struct igc_adapter *adapter = netdev_priv(netdev); unsigned int count = ch->combined_count; @@ -1624,7 +1525,7 @@ static int igc_set_channels(struct net_device *netdev, return -EINVAL; /* Verify the number of channels doesn't exceed hw limits */ - max_combined = igc_max_channels(adapter); + max_combined = igc_get_max_rss_queues(adapter); if (count > max_combined) return -EINVAL; @@ -1641,8 +1542,8 @@ static int igc_set_channels(struct net_device *netdev, return 0; } -static int igc_get_ts_info(struct net_device *dev, - struct ethtool_ts_info *info) +static int igc_ethtool_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *info) { struct igc_adapter *adapter = netdev_priv(dev); @@ -1674,7 +1575,7 @@ static int igc_get_ts_info(struct net_device *dev, } } -static u32 igc_get_priv_flags(struct net_device *netdev) +static u32 igc_ethtool_get_priv_flags(struct net_device *netdev) { struct igc_adapter *adapter = netdev_priv(netdev); u32 priv_flags = 0; @@ -1685,7 +1586,7 @@ static u32 igc_get_priv_flags(struct net_device *netdev) return priv_flags; } -static int igc_set_priv_flags(struct net_device *netdev, u32 priv_flags) +static int igc_ethtool_set_priv_flags(struct net_device *netdev, u32 priv_flags) { struct igc_adapter *adapter = netdev_priv(netdev); unsigned int flags = adapter->flags; @@ -1720,8 +1621,8 @@ static void igc_ethtool_complete(struct net_device *netdev) pm_runtime_put(&adapter->pdev->dev); } -static int igc_get_link_ksettings(struct net_device *netdev, - struct ethtool_link_ksettings *cmd) +static int igc_ethtool_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) { struct igc_adapter *adapter = netdev_priv(netdev); struct igc_hw *hw = &adapter->hw; @@ -1827,10 +1728,12 @@ static int igc_get_link_ksettings(struct net_device *netdev, return 0; } -static int igc_set_link_ksettings(struct net_device *netdev, - const struct ethtool_link_ksettings *cmd) +static int +igc_ethtool_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *cmd) { struct igc_adapter *adapter = netdev_priv(netdev); + struct net_device *dev = adapter->netdev; struct igc_hw *hw = &adapter->hw; u32 advertising; @@ -1838,8 +1741,7 @@ static int igc_set_link_ksettings(struct net_device *netdev, * cannot be changed */ if (igc_check_reset_block(hw)) { - dev_err(&adapter->pdev->dev, - "Cannot change link characteristics when reset is active.\n"); + netdev_err(dev, "Cannot change link characteristics when reset is active\n"); return -EINVAL; } @@ -1850,7 +1752,7 @@ static int igc_set_link_ksettings(struct net_device *netdev, if (cmd->base.eth_tp_mdix_ctrl) { if (cmd->base.eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO && cmd->base.autoneg != AUTONEG_ENABLE) { - dev_err(&adapter->pdev->dev, "forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n"); + netdev_err(dev, "Forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n"); return -EINVAL; } } @@ -1867,9 +1769,7 @@ static int igc_set_link_ksettings(struct net_device *netdev, if (adapter->fc_autoneg) hw->fc.requested_mode = igc_fc_default; } else { - /* calling this overrides forced MDI setting */ - dev_info(&adapter->pdev->dev, - "Force mode currently not supported\n"); + netdev_info(dev, "Force mode currently not supported\n"); } /* MDI-X => 2; MDI => 1; Auto => 3 */ @@ -1896,46 +1796,105 @@ static int igc_set_link_ksettings(struct net_device *netdev, return 0; } +static void igc_ethtool_diag_test(struct net_device *netdev, + struct ethtool_test *eth_test, u64 *data) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + bool if_running = netif_running(netdev); + + if (eth_test->flags == ETH_TEST_FL_OFFLINE) { + netdev_info(adapter->netdev, "Offline testing starting"); + set_bit(__IGC_TESTING, &adapter->state); + + /* Link test performed before hardware reset so autoneg doesn't + * interfere with test result + */ + if (!igc_link_test(adapter, &data[TEST_LINK])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + if (if_running) + igc_close(netdev); + else + igc_reset(adapter); + + netdev_info(adapter->netdev, "Register testing starting"); + if (!igc_reg_test(adapter, &data[TEST_REG])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + igc_reset(adapter); + + netdev_info(adapter->netdev, "EEPROM testing starting"); + if (!igc_eeprom_test(adapter, &data[TEST_EEP])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + igc_reset(adapter); + + /* loopback and interrupt tests + * will be implemented in the future + */ + data[TEST_LOOP] = 0; + data[TEST_IRQ] = 0; + + clear_bit(__IGC_TESTING, &adapter->state); + if (if_running) + igc_open(netdev); + } else { + netdev_info(adapter->netdev, "Online testing starting"); + + /* register, eeprom, intr and loopback tests not run online */ + data[TEST_REG] = 0; + data[TEST_EEP] = 0; + data[TEST_IRQ] = 0; + data[TEST_LOOP] = 0; + + if (!igc_link_test(adapter, &data[TEST_LINK])) + eth_test->flags |= ETH_TEST_FL_FAILED; + } + + msleep_interruptible(4 * 1000); +} + static const struct ethtool_ops igc_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS, - .get_drvinfo = igc_get_drvinfo, - .get_regs_len = igc_get_regs_len, - .get_regs = igc_get_regs, - .get_wol = igc_get_wol, - .set_wol = igc_set_wol, - .get_msglevel = igc_get_msglevel, - .set_msglevel = igc_set_msglevel, - .nway_reset = igc_nway_reset, - .get_link = igc_get_link, - .get_eeprom_len = igc_get_eeprom_len, - .get_eeprom = igc_get_eeprom, - .set_eeprom = igc_set_eeprom, - .get_ringparam = igc_get_ringparam, - .set_ringparam = igc_set_ringparam, - .get_pauseparam = igc_get_pauseparam, - .set_pauseparam = igc_set_pauseparam, - .get_strings = igc_get_strings, - .get_sset_count = igc_get_sset_count, - .get_ethtool_stats = igc_get_ethtool_stats, - .get_coalesce = igc_get_coalesce, - .set_coalesce = igc_set_coalesce, - .get_rxnfc = igc_get_rxnfc, - .set_rxnfc = igc_set_rxnfc, - .get_rxfh_indir_size = igc_get_rxfh_indir_size, - .get_rxfh = igc_get_rxfh, - .set_rxfh = igc_set_rxfh, - .get_ts_info = igc_get_ts_info, - .get_channels = igc_get_channels, - .set_channels = igc_set_channels, - .get_priv_flags = igc_get_priv_flags, - .set_priv_flags = igc_set_priv_flags, + .get_drvinfo = igc_ethtool_get_drvinfo, + .get_regs_len = igc_ethtool_get_regs_len, + .get_regs = igc_ethtool_get_regs, + .get_wol = igc_ethtool_get_wol, + .set_wol = igc_ethtool_set_wol, + .get_msglevel = igc_ethtool_get_msglevel, + .set_msglevel = igc_ethtool_set_msglevel, + .nway_reset = igc_ethtool_nway_reset, + .get_link = igc_ethtool_get_link, + .get_eeprom_len = igc_ethtool_get_eeprom_len, + .get_eeprom = igc_ethtool_get_eeprom, + .set_eeprom = igc_ethtool_set_eeprom, + .get_ringparam = igc_ethtool_get_ringparam, + .set_ringparam = igc_ethtool_set_ringparam, + .get_pauseparam = igc_ethtool_get_pauseparam, + .set_pauseparam = igc_ethtool_set_pauseparam, + .get_strings = igc_ethtool_get_strings, + .get_sset_count = igc_ethtool_get_sset_count, + .get_ethtool_stats = igc_ethtool_get_stats, + .get_coalesce = igc_ethtool_get_coalesce, + .set_coalesce = igc_ethtool_set_coalesce, + .get_rxnfc = igc_ethtool_get_rxnfc, + .set_rxnfc = igc_ethtool_set_rxnfc, + .get_rxfh_indir_size = igc_ethtool_get_rxfh_indir_size, + .get_rxfh = igc_ethtool_get_rxfh, + .set_rxfh = igc_ethtool_set_rxfh, + .get_ts_info = igc_ethtool_get_ts_info, + .get_channels = igc_ethtool_get_channels, + .set_channels = igc_ethtool_set_channels, + .get_priv_flags = igc_ethtool_get_priv_flags, + .set_priv_flags = igc_ethtool_set_priv_flags, .begin = igc_ethtool_begin, .complete = igc_ethtool_complete, - .get_link_ksettings = igc_get_link_ksettings, - .set_link_ksettings = igc_set_link_ksettings, + .get_link_ksettings = igc_ethtool_get_link_ksettings, + .set_link_ksettings = igc_ethtool_set_link_ksettings, + .self_test = igc_ethtool_diag_test, }; -void igc_set_ethtool_ops(struct net_device *netdev) +void igc_ethtool_set_ops(struct net_device *netdev) { netdev->ethtool_ops = &igc_ethtool_ops; } diff --git a/drivers/net/ethernet/intel/igc/igc_mac.c b/drivers/net/ethernet/intel/igc/igc_mac.c index 12aa6b5fcb5d..89445ab02a98 100644 --- a/drivers/net/ethernet/intel/igc/igc_mac.c +++ b/drivers/net/ethernet/intel/igc/igc_mac.c @@ -307,12 +307,8 @@ void igc_clear_hw_cntrs_base(struct igc_hw *hw) rd32(IGC_ICTXQMTC); rd32(IGC_ICRXDMTC); - rd32(IGC_CBTMPC); - rd32(IGC_HTDPMC); - rd32(IGC_CBRMPC); rd32(IGC_RPTHC); rd32(IGC_HGPTC); - rd32(IGC_HTCBDPC); rd32(IGC_HGORCL); rd32(IGC_HGORCH); rd32(IGC_HGOTCL); diff --git a/drivers/net/ethernet/intel/igc/igc_mac.h b/drivers/net/ethernet/intel/igc/igc_mac.h index 832cccec87cd..b5963f86defb 100644 --- a/drivers/net/ethernet/intel/igc/igc_mac.h +++ b/drivers/net/ethernet/intel/igc/igc_mac.h @@ -8,10 +8,6 @@ #include "igc_phy.h" #include "igc_defines.h" -#ifndef IGC_REMOVED -#define IGC_REMOVED(a) (0) -#endif /* IGC_REMOVED */ - /* forward declaration */ s32 igc_disable_pcie_master(struct igc_hw *hw); s32 igc_check_for_copper_link(struct igc_hw *hw); diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 9d5f8287c704..f48d6127a220 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -76,7 +76,7 @@ static void igc_power_down_link(struct igc_adapter *adapter) void igc_reset(struct igc_adapter *adapter) { - struct pci_dev *pdev = adapter->pdev; + struct net_device *dev = adapter->netdev; struct igc_hw *hw = &adapter->hw; struct igc_fc_info *fc = &hw->fc; u32 pba, hwm; @@ -103,7 +103,7 @@ void igc_reset(struct igc_adapter *adapter) hw->mac.ops.reset_hw(hw); if (hw->mac.ops.init_hw(hw)) - dev_err(&pdev->dev, "Hardware Error\n"); + netdev_err(dev, "Error on hardware initialization\n"); if (!netif_running(adapter->netdev)) igc_power_down_link(adapter); @@ -288,6 +288,7 @@ static void igc_clean_all_tx_rings(struct igc_adapter *adapter) */ int igc_setup_tx_resources(struct igc_ring *tx_ring) { + struct net_device *ndev = tx_ring->netdev; struct device *dev = tx_ring->dev; int size = 0; @@ -313,8 +314,7 @@ int igc_setup_tx_resources(struct igc_ring *tx_ring) err: vfree(tx_ring->tx_buffer_info); - dev_err(dev, - "Unable to allocate memory for the transmit descriptor ring\n"); + netdev_err(ndev, "Unable to allocate memory for Tx descriptor ring\n"); return -ENOMEM; } @@ -326,14 +326,13 @@ err: */ static int igc_setup_all_tx_resources(struct igc_adapter *adapter) { - struct pci_dev *pdev = adapter->pdev; + struct net_device *dev = adapter->netdev; int i, err = 0; for (i = 0; i < adapter->num_tx_queues; i++) { err = igc_setup_tx_resources(adapter->tx_ring[i]); if (err) { - dev_err(&pdev->dev, - "Allocation for Tx Queue %u failed\n", i); + netdev_err(dev, "Error on Tx queue %u setup\n", i); for (i--; i >= 0; i--) igc_free_tx_resources(adapter->tx_ring[i]); break; @@ -444,6 +443,7 @@ static void igc_free_all_rx_resources(struct igc_adapter *adapter) */ int igc_setup_rx_resources(struct igc_ring *rx_ring) { + struct net_device *ndev = rx_ring->netdev; struct device *dev = rx_ring->dev; int size, desc_len; @@ -473,8 +473,7 @@ int igc_setup_rx_resources(struct igc_ring *rx_ring) err: vfree(rx_ring->rx_buffer_info); rx_ring->rx_buffer_info = NULL; - dev_err(dev, - "Unable to allocate memory for the receive descriptor ring\n"); + netdev_err(ndev, "Unable to allocate memory for Rx descriptor ring\n"); return -ENOMEM; } @@ -487,14 +486,13 @@ err: */ static int igc_setup_all_rx_resources(struct igc_adapter *adapter) { - struct pci_dev *pdev = adapter->pdev; + struct net_device *dev = adapter->netdev; int i, err = 0; for (i = 0; i < adapter->num_rx_queues; i++) { err = igc_setup_rx_resources(adapter->rx_ring[i]); if (err) { - dev_err(&pdev->dev, - "Allocation for Rx Queue %u failed\n", i); + netdev_err(dev, "Error on Rx queue %u setup\n", i); for (i--; i >= 0; i--) igc_free_rx_resources(adapter->rx_ring[i]); break; @@ -768,12 +766,14 @@ static void igc_setup_tctl(struct igc_adapter *adapter) * igc_set_mac_filter_hw() - Set MAC address filter in hardware * @adapter: Pointer to adapter where the filter should be set * @index: Filter index - * @addr: Destination MAC address + * @type: MAC address filter type (source or destination) + * @addr: MAC address * @queue: If non-negative, queue assignment feature is enabled and frames * matching the filter are enqueued onto 'queue'. Otherwise, queue * assignment is disabled. */ static void igc_set_mac_filter_hw(struct igc_adapter *adapter, int index, + enum igc_mac_filter_type type, const u8 *addr, int queue) { struct net_device *dev = adapter->netdev; @@ -786,6 +786,11 @@ static void igc_set_mac_filter_hw(struct igc_adapter *adapter, int index, ral = le32_to_cpup((__le32 *)(addr)); rah = le16_to_cpup((__le16 *)(addr + 4)); + if (type == IGC_MAC_FILTER_TYPE_SRC) { + rah &= ~IGC_RAH_ASEL_MASK; + rah |= IGC_RAH_ASEL_SRC_ADDR; + } + if (queue >= 0) { rah &= ~IGC_RAH_QSEL_MASK; rah |= (queue << IGC_RAH_QSEL_SHIFT); @@ -822,17 +827,12 @@ static void igc_clear_mac_filter_hw(struct igc_adapter *adapter, int index) /* Set default MAC address for the PF in the first RAR entry */ static void igc_set_default_mac_filter(struct igc_adapter *adapter) { - struct igc_mac_addr *mac_table = &adapter->mac_table[0]; struct net_device *dev = adapter->netdev; u8 *addr = adapter->hw.mac.addr; netdev_dbg(dev, "Set default MAC address filter: address %pM", addr); - ether_addr_copy(mac_table->addr, addr); - mac_table->state = IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE; - mac_table->queue = -1; - - igc_set_mac_filter_hw(adapter, 0, addr, mac_table->queue); + igc_set_mac_filter_hw(adapter, 0, IGC_MAC_FILTER_TYPE_DST, addr, -1); } /** @@ -1196,7 +1196,7 @@ static int igc_tx_map(struct igc_ring *tx_ring, return 0; dma_error: - dev_err(tx_ring->dev, "TX DMA map failed\n"); + netdev_err(tx_ring->netdev, "TX DMA map failed\n"); tx_buffer = &tx_ring->tx_buffer_info[i]; /* clear dma mappings for failed tx_buffer_info map */ @@ -1459,8 +1459,8 @@ static void igc_rx_checksum(struct igc_ring *ring, IGC_RXD_STAT_UDPCS)) skb->ip_summed = CHECKSUM_UNNECESSARY; - dev_dbg(ring->dev, "cksum success: bits %08X\n", - le32_to_cpu(rx_desc->wb.upper.status_error)); + netdev_dbg(ring->netdev, "cksum success: bits %08X\n", + le32_to_cpu(rx_desc->wb.upper.status_error)); } static inline void igc_rx_hash(struct igc_ring *ring, @@ -2122,27 +2122,27 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) (adapter->tx_timeout_factor * HZ)) && !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF)) { /* detected Tx unit hang */ - dev_err(tx_ring->dev, - "Detected Tx Unit Hang\n" - " Tx Queue <%d>\n" - " TDH <%x>\n" - " TDT <%x>\n" - " next_to_use <%x>\n" - " next_to_clean <%x>\n" - "buffer_info[next_to_clean]\n" - " time_stamp <%lx>\n" - " next_to_watch <%p>\n" - " jiffies <%lx>\n" - " desc.status <%x>\n", - tx_ring->queue_index, - rd32(IGC_TDH(tx_ring->reg_idx)), - readl(tx_ring->tail), - tx_ring->next_to_use, - tx_ring->next_to_clean, - tx_buffer->time_stamp, - tx_buffer->next_to_watch, - jiffies, - tx_buffer->next_to_watch->wb.status); + netdev_err(tx_ring->netdev, + "Detected Tx Unit Hang\n" + " Tx Queue <%d>\n" + " TDH <%x>\n" + " TDT <%x>\n" + " next_to_use <%x>\n" + " next_to_clean <%x>\n" + "buffer_info[next_to_clean]\n" + " time_stamp <%lx>\n" + " next_to_watch <%p>\n" + " jiffies <%lx>\n" + " desc.status <%x>\n", + tx_ring->queue_index, + rd32(IGC_TDH(tx_ring->reg_idx)), + readl(tx_ring->tail), + tx_ring->next_to_use, + tx_ring->next_to_clean, + tx_buffer->time_stamp, + tx_buffer->next_to_watch, + jiffies, + tx_buffer->next_to_watch->wb.status); netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index); @@ -2174,34 +2174,38 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) return !!budget; } -static void igc_nfc_filter_restore(struct igc_adapter *adapter) +static void igc_restore_nfc_rules(struct igc_adapter *adapter) { - struct igc_nfc_filter *rule; + struct igc_nfc_rule *rule; - spin_lock(&adapter->nfc_lock); + spin_lock(&adapter->nfc_rule_lock); - hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) - igc_add_filter(adapter, rule); + hlist_for_each_entry(rule, &adapter->nfc_rule_list, nfc_node) + igc_enable_nfc_rule(adapter, rule); - spin_unlock(&adapter->nfc_lock); + spin_unlock(&adapter->nfc_rule_lock); } -static int igc_find_mac_filter(struct igc_adapter *adapter, const u8 *addr, - u8 flags) +static int igc_find_mac_filter(struct igc_adapter *adapter, + enum igc_mac_filter_type type, const u8 *addr) { - int max_entries = adapter->hw.mac.rar_entry_count; - struct igc_mac_addr *entry; + struct igc_hw *hw = &adapter->hw; + int max_entries = hw->mac.rar_entry_count; + u32 ral, rah; int i; for (i = 0; i < max_entries; i++) { - entry = &adapter->mac_table[i]; + ral = rd32(IGC_RAL(i)); + rah = rd32(IGC_RAH(i)); - if (!(entry->state & IGC_MAC_STATE_IN_USE)) + if (!(rah & IGC_RAH_AV)) + continue; + if (!!(rah & IGC_RAH_ASEL_SRC_ADDR) != type) continue; - if (!ether_addr_equal(addr, entry->addr)) + if ((rah & IGC_RAH_RAH_MASK) != + le16_to_cpup((__le16 *)(addr + 4))) continue; - if ((entry->state & IGC_MAC_STATE_SRC_ADDR) != - (flags & IGC_MAC_STATE_SRC_ADDR)) + if (ral != le32_to_cpup((__le32 *)(addr))) continue; return i; @@ -2212,14 +2216,15 @@ static int igc_find_mac_filter(struct igc_adapter *adapter, const u8 *addr, static int igc_get_avail_mac_filter_slot(struct igc_adapter *adapter) { - int max_entries = adapter->hw.mac.rar_entry_count; - struct igc_mac_addr *entry; + struct igc_hw *hw = &adapter->hw; + int max_entries = hw->mac.rar_entry_count; + u32 rah; int i; for (i = 0; i < max_entries; i++) { - entry = &adapter->mac_table[i]; + rah = rd32(IGC_RAH(i)); - if (!(entry->state & IGC_MAC_STATE_IN_USE)) + if (!(rah & IGC_RAH_AV)) return i; } @@ -2229,105 +2234,239 @@ static int igc_get_avail_mac_filter_slot(struct igc_adapter *adapter) /** * igc_add_mac_filter() - Add MAC address filter * @adapter: Pointer to adapter where the filter should be added + * @type: MAC address filter type (source or destination) * @addr: MAC address * @queue: If non-negative, queue assignment feature is enabled and frames * matching the filter are enqueued onto 'queue'. Otherwise, queue * assignment is disabled. - * @flags: Set IGC_MAC_STATE_SRC_ADDR bit to indicate @address is a source - * address * * Return: 0 in case of success, negative errno code otherwise. */ -int igc_add_mac_filter(struct igc_adapter *adapter, const u8 *addr, - const s8 queue, const u8 flags) +int igc_add_mac_filter(struct igc_adapter *adapter, + enum igc_mac_filter_type type, const u8 *addr, + int queue) { struct net_device *dev = adapter->netdev; int index; if (!is_valid_ether_addr(addr)) return -EINVAL; - if (flags & IGC_MAC_STATE_SRC_ADDR) - return -ENOTSUPP; - index = igc_find_mac_filter(adapter, addr, flags); + index = igc_find_mac_filter(adapter, type, addr); if (index >= 0) - goto update_queue_assignment; + goto update_filter; index = igc_get_avail_mac_filter_slot(adapter); if (index < 0) return -ENOSPC; - netdev_dbg(dev, "Add MAC address filter: index %d address %pM queue %d", - index, addr, queue); - - ether_addr_copy(adapter->mac_table[index].addr, addr); - adapter->mac_table[index].state |= IGC_MAC_STATE_IN_USE | flags; -update_queue_assignment: - adapter->mac_table[index].queue = queue; + netdev_dbg(dev, "Add MAC address filter: index %d type %s address %pM queue %d\n", + index, type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src", + addr, queue); - igc_set_mac_filter_hw(adapter, index, addr, queue); +update_filter: + igc_set_mac_filter_hw(adapter, index, type, addr, queue); return 0; } /** * igc_del_mac_filter() - Delete MAC address filter * @adapter: Pointer to adapter where the filter should be deleted from + * @type: MAC address filter type (source or destination) * @addr: MAC address - * @flags: Set IGC_MAC_STATE_SRC_ADDR bit to indicate @address is a source - * address * * Return: 0 in case of success, negative errno code otherwise. */ -int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr, - const u8 flags) +int igc_del_mac_filter(struct igc_adapter *adapter, + enum igc_mac_filter_type type, const u8 *addr) { struct net_device *dev = adapter->netdev; - struct igc_mac_addr *entry; int index; if (!is_valid_ether_addr(addr)) return -EINVAL; - index = igc_find_mac_filter(adapter, addr, flags); + index = igc_find_mac_filter(adapter, type, addr); if (index < 0) return -ENOENT; - entry = &adapter->mac_table[index]; - - if (entry->state & IGC_MAC_STATE_DEFAULT) { + if (index == 0) { /* If this is the default filter, we don't actually delete it. * We just reset to its default value i.e. disable queue * assignment. */ netdev_dbg(dev, "Disable default MAC filter queue assignment"); - entry->queue = -1; - igc_set_mac_filter_hw(adapter, 0, addr, entry->queue); + igc_set_mac_filter_hw(adapter, 0, type, addr, -1); } else { - netdev_dbg(dev, "Delete MAC address filter: index %d address %pM", - index, addr); + netdev_dbg(dev, "Delete MAC address filter: index %d type %s address %pM\n", + index, + type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src", + addr); - entry->state = 0; - entry->queue = -1; - memset(entry->addr, 0, ETH_ALEN); igc_clear_mac_filter_hw(adapter, index); } return 0; } +/** + * igc_add_vlan_prio_filter() - Add VLAN priority filter + * @adapter: Pointer to adapter where the filter should be added + * @prio: VLAN priority value + * @queue: Queue number which matching frames are assigned to + * + * Return: 0 in case of success, negative errno code otherwise. + */ +int igc_add_vlan_prio_filter(struct igc_adapter *adapter, int prio, int queue) +{ + struct net_device *dev = adapter->netdev; + struct igc_hw *hw = &adapter->hw; + u32 vlanpqf; + + vlanpqf = rd32(IGC_VLANPQF); + + if (vlanpqf & IGC_VLANPQF_VALID(prio)) { + netdev_dbg(dev, "VLAN priority filter already in use\n"); + return -EEXIST; + } + + vlanpqf |= IGC_VLANPQF_QSEL(prio, queue); + vlanpqf |= IGC_VLANPQF_VALID(prio); + + wr32(IGC_VLANPQF, vlanpqf); + + netdev_dbg(dev, "Add VLAN priority filter: prio %d queue %d\n", + prio, queue); + return 0; +} + +/** + * igc_del_vlan_prio_filter() - Delete VLAN priority filter + * @adapter: Pointer to adapter where the filter should be deleted from + * @prio: VLAN priority value + */ +void igc_del_vlan_prio_filter(struct igc_adapter *adapter, int prio) +{ + struct igc_hw *hw = &adapter->hw; + u32 vlanpqf; + + vlanpqf = rd32(IGC_VLANPQF); + + vlanpqf &= ~IGC_VLANPQF_VALID(prio); + vlanpqf &= ~IGC_VLANPQF_QSEL(prio, IGC_VLANPQF_QUEUE_MASK); + + wr32(IGC_VLANPQF, vlanpqf); + + netdev_dbg(adapter->netdev, "Delete VLAN priority filter: prio %d\n", + prio); +} + +static int igc_get_avail_etype_filter_slot(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + int i; + + for (i = 0; i < MAX_ETYPE_FILTER; i++) { + u32 etqf = rd32(IGC_ETQF(i)); + + if (!(etqf & IGC_ETQF_FILTER_ENABLE)) + return i; + } + + return -1; +} + +/** + * igc_add_etype_filter() - Add ethertype filter + * @adapter: Pointer to adapter where the filter should be added + * @etype: Ethertype value + * @queue: If non-negative, queue assignment feature is enabled and frames + * matching the filter are enqueued onto 'queue'. Otherwise, queue + * assignment is disabled. + * + * Return: 0 in case of success, negative errno code otherwise. + */ +int igc_add_etype_filter(struct igc_adapter *adapter, u16 etype, int queue) +{ + struct igc_hw *hw = &adapter->hw; + int index; + u32 etqf; + + index = igc_get_avail_etype_filter_slot(adapter); + if (index < 0) + return -ENOSPC; + + etqf = rd32(IGC_ETQF(index)); + + etqf &= ~IGC_ETQF_ETYPE_MASK; + etqf |= etype; + + if (queue >= 0) { + etqf &= ~IGC_ETQF_QUEUE_MASK; + etqf |= (queue << IGC_ETQF_QUEUE_SHIFT); + etqf |= IGC_ETQF_QUEUE_ENABLE; + } + + etqf |= IGC_ETQF_FILTER_ENABLE; + + wr32(IGC_ETQF(index), etqf); + + netdev_dbg(adapter->netdev, "Add ethertype filter: etype %04x queue %d\n", + etype, queue); + return 0; +} + +static int igc_find_etype_filter(struct igc_adapter *adapter, u16 etype) +{ + struct igc_hw *hw = &adapter->hw; + int i; + + for (i = 0; i < MAX_ETYPE_FILTER; i++) { + u32 etqf = rd32(IGC_ETQF(i)); + + if ((etqf & IGC_ETQF_ETYPE_MASK) == etype) + return i; + } + + return -1; +} + +/** + * igc_del_etype_filter() - Delete ethertype filter + * @adapter: Pointer to adapter where the filter should be deleted from + * @etype: Ethertype value + * + * Return: 0 in case of success, negative errno code otherwise. + */ +int igc_del_etype_filter(struct igc_adapter *adapter, u16 etype) +{ + struct igc_hw *hw = &adapter->hw; + int index; + + index = igc_find_etype_filter(adapter, etype); + if (index < 0) + return -ENOENT; + + wr32(IGC_ETQF(index), 0); + + netdev_dbg(adapter->netdev, "Delete ethertype filter: etype %04x\n", + etype); + return 0; +} + static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr) { struct igc_adapter *adapter = netdev_priv(netdev); - return igc_add_mac_filter(adapter, addr, -1, 0); + return igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr, -1); } static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr) { struct igc_adapter *adapter = netdev_priv(netdev); - return igc_del_mac_filter(adapter, addr, 0); + return igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr); } /** @@ -2398,7 +2537,7 @@ static void igc_configure(struct igc_adapter *adapter) igc_setup_rctl(adapter); igc_set_default_mac_filter(adapter); - igc_nfc_filter_restore(adapter); + igc_restore_nfc_rules(adapter); igc_configure_tx(adapter); igc_configure_rx(adapter); @@ -2592,12 +2731,7 @@ void igc_set_flag_queue_pairs(struct igc_adapter *adapter, unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter) { - unsigned int max_rss_queues; - - /* Determine the maximum number of RSS queues supported. */ - max_rss_queues = IGC_MAX_RX_QUEUES; - - return max_rss_queues; + return IGC_MAX_RX_QUEUES; } static void igc_init_queue_configuration(struct igc_adapter *adapter) @@ -3238,14 +3372,14 @@ err_out: */ static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix) { - struct pci_dev *pdev = adapter->pdev; + struct net_device *dev = adapter->netdev; int err = 0; igc_set_interrupt_capability(adapter, msix); err = igc_alloc_q_vectors(adapter); if (err) { - dev_err(&pdev->dev, "Unable to allocate memory for vectors\n"); + netdev_err(dev, "Unable to allocate memory for vectors\n"); goto err_alloc_q_vectors; } @@ -3272,8 +3406,6 @@ static int igc_sw_init(struct igc_adapter *adapter) struct pci_dev *pdev = adapter->pdev; struct igc_hw *hw = &adapter->hw; - int size = sizeof(struct igc_mac_addr) * hw->mac.rar_entry_count; - pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word); /* set default ring sizes */ @@ -3292,20 +3424,16 @@ static int igc_sw_init(struct igc_adapter *adapter) VLAN_HLEN; adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; - spin_lock_init(&adapter->nfc_lock); + spin_lock_init(&adapter->nfc_rule_lock); spin_lock_init(&adapter->stats64_lock); /* Assume MSI-X interrupts, will be checked during IRQ allocation */ adapter->flags |= IGC_FLAG_HAS_MSIX; - adapter->mac_table = kzalloc(size, GFP_ATOMIC); - if (!adapter->mac_table) - return -ENOMEM; - igc_init_queue_configuration(adapter); /* This call may decrease the number of queues */ if (igc_init_interrupt_scheme(adapter, true)) { - dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); + netdev_err(netdev, "Unable to allocate memory for queues\n"); return -ENOMEM; } @@ -3523,16 +3651,16 @@ void igc_update_stats(struct igc_adapter *adapter) adapter->stats.mgpdc += rd32(IGC_MGTPDC); } -static void igc_nfc_filter_exit(struct igc_adapter *adapter) +static void igc_nfc_rule_exit(struct igc_adapter *adapter) { - struct igc_nfc_filter *rule; + struct igc_nfc_rule *rule; - spin_lock(&adapter->nfc_lock); + spin_lock(&adapter->nfc_rule_lock); - hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) - igc_erase_filter(adapter, rule); + hlist_for_each_entry(rule, &adapter->nfc_rule_list, nfc_node) + igc_disable_nfc_rule(adapter, rule); - spin_unlock(&adapter->nfc_lock); + spin_unlock(&adapter->nfc_rule_lock); } /** @@ -3553,7 +3681,7 @@ void igc_down(struct igc_adapter *adapter) wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN); /* flush and sleep below */ - igc_nfc_filter_exit(adapter); + igc_nfc_rule_exit(adapter); /* set trans_start so we don't get spurious watchdogs during reset */ netif_trans_update(netdev); @@ -3648,8 +3776,7 @@ static int igc_change_mtu(struct net_device *netdev, int new_mtu) if (netif_running(netdev)) igc_down(adapter); - netdev_dbg(netdev, "changing MTU from %d to %d\n", - netdev->mtu, new_mtu); + netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); netdev->mtu = new_mtu; if (netif_running(netdev)) @@ -3706,17 +3833,17 @@ static int igc_set_features(struct net_device *netdev, if (!(features & NETIF_F_NTUPLE)) { struct hlist_node *node2; - struct igc_nfc_filter *rule; + struct igc_nfc_rule *rule; - spin_lock(&adapter->nfc_lock); + spin_lock(&adapter->nfc_rule_lock); hlist_for_each_entry_safe(rule, node2, - &adapter->nfc_filter_list, nfc_node) { - igc_erase_filter(adapter, rule); + &adapter->nfc_rule_list, nfc_node) { + igc_disable_nfc_rule(adapter, rule); hlist_del(&rule->nfc_node); kfree(rule); } - spin_unlock(&adapter->nfc_lock); - adapter->nfc_filter_count = 0; + spin_unlock(&adapter->nfc_rule_lock); + adapter->nfc_rule_count = 0; } netdev->features = features; @@ -4006,8 +4133,7 @@ static void igc_watchdog_task(struct work_struct *work) ctrl = rd32(IGC_CTRL); /* Link status message must follow this format */ netdev_info(netdev, - "igc: %s NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n", - netdev->name, + "NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n", adapter->link_speed, adapter->link_duplex == FULL_DUPLEX ? "Full" : "Half", @@ -4045,10 +4171,10 @@ retry_read_status: retry_count--; goto retry_read_status; } else if (!retry_count) { - dev_err(&adapter->pdev->dev, "exceed max 2 second\n"); + netdev_err(netdev, "exceed max 2 second\n"); } } else { - dev_err(&adapter->pdev->dev, "read 1000Base-T Status Reg\n"); + netdev_err(netdev, "read 1000Base-T Status Reg\n"); } no_wait: netif_carrier_on(netdev); @@ -4064,8 +4190,7 @@ no_wait: adapter->link_duplex = 0; /* Links status message must follow this format */ - netdev_info(netdev, "igc: %s NIC Link is Down\n", - netdev->name); + netdev_info(netdev, "NIC Link is Down\n"); netif_carrier_off(netdev); /* link state has changed, schedule phy info update */ @@ -4283,8 +4408,7 @@ static int igc_request_irq(struct igc_adapter *adapter) netdev->name, adapter); if (err) - dev_err(&pdev->dev, "Error %d getting interrupt\n", - err); + netdev_err(netdev, "Error %d getting interrupt\n", err); request_done: return err; @@ -4386,7 +4510,7 @@ err_setup_tx: return err; } -static int igc_open(struct net_device *netdev) +int igc_open(struct net_device *netdev) { return __igc_open(netdev, false); } @@ -4428,7 +4552,7 @@ static int __igc_close(struct net_device *netdev, bool suspending) return 0; } -static int igc_close(struct net_device *netdev) +int igc_close(struct net_device *netdev) { if (netif_device_present(netdev) || netdev->dismantle) return __igc_close(netdev, false); @@ -4665,9 +4789,6 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg) u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr); u32 value = 0; - if (IGC_REMOVED(hw_addr)) - return ~value; - value = readl(&hw_addr[reg]); /* reads should not return all F's */ @@ -4686,7 +4807,6 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg) int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx) { - struct pci_dev *pdev = adapter->pdev; struct igc_mac_info *mac = &adapter->hw.mac; mac->autoneg = 0; @@ -4731,7 +4851,7 @@ int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx) return 0; err_inval: - dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n"); + netdev_err(adapter->netdev, "Unsupported Speed/Duplex configuration\n"); return -EINVAL; } @@ -4812,7 +4932,7 @@ static int igc_probe(struct pci_dev *pdev, hw->hw_addr = adapter->io_addr; netdev->netdev_ops = &igc_netdev_ops; - igc_set_ethtool_ops(netdev); + igc_ethtool_set_ops(netdev); netdev->watchdog_timeo = 5 * HZ; netdev->mem_start = pci_resource_start(pdev, 0); @@ -4838,6 +4958,7 @@ static int igc_probe(struct pci_dev *pdev, netdev->features |= NETIF_F_SG; netdev->features |= NETIF_F_TSO; netdev->features |= NETIF_F_TSO6; + netdev->features |= NETIF_F_TSO_ECN; netdev->features |= NETIF_F_RXCSUM; netdev->features |= NETIF_F_HW_CSUM; netdev->features |= NETIF_F_SCTP_CRC; @@ -4876,8 +4997,7 @@ static int igc_probe(struct pci_dev *pdev, if (igc_get_flash_presence_i225(hw)) { if (hw->nvm.ops.validate(hw) < 0) { - dev_err(&pdev->dev, - "The NVM Checksum Is Not Valid\n"); + dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n"); err = -EIO; goto err_eeprom; } @@ -5011,7 +5131,6 @@ static void igc_remove(struct pci_dev *pdev) pci_iounmap(pdev, adapter->io_addr); pci_release_mem_regions(pdev); - kfree(adapter->mac_table); free_netdev(netdev); pci_disable_pcie_error_reporting(pdev); @@ -5140,8 +5259,7 @@ static int __maybe_unused igc_resume(struct device *dev) return -ENODEV; err = pci_enable_device_mem(pdev); if (err) { - dev_err(&pdev->dev, - "igc: Cannot enable PCI device from suspend\n"); + netdev_err(netdev, "Cannot enable PCI device from suspend\n"); return err; } pci_set_master(pdev); @@ -5150,7 +5268,7 @@ static int __maybe_unused igc_resume(struct device *dev) pci_enable_wake(pdev, PCI_D3cold, 0); if (igc_init_interrupt_scheme(adapter, true)) { - dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); + netdev_err(netdev, "Unable to allocate memory for queues\n"); return -ENOMEM; } @@ -5254,8 +5372,7 @@ static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev) pci_ers_result_t result; if (pci_enable_device_mem(pdev)) { - dev_err(&pdev->dev, - "Could not re-enable PCI device after reset.\n"); + netdev_err(netdev, "Could not re-enable PCI device after reset\n"); result = PCI_ERS_RESULT_DISCONNECT; } else { pci_set_master(pdev); @@ -5294,7 +5411,7 @@ static void igc_io_resume(struct pci_dev *pdev) rtnl_lock(); if (netif_running(netdev)) { if (igc_open(netdev)) { - dev_err(&pdev->dev, "igc_open failed after reset\n"); + netdev_err(netdev, "igc_open failed after reset\n"); return; } } @@ -5341,7 +5458,6 @@ static struct pci_driver igc_driver = { int igc_reinit_queues(struct igc_adapter *adapter) { struct net_device *netdev = adapter->netdev; - struct pci_dev *pdev = adapter->pdev; int err = 0; if (netif_running(netdev)) @@ -5350,7 +5466,7 @@ int igc_reinit_queues(struct igc_adapter *adapter) igc_reset_interrupt_capability(adapter); if (igc_init_interrupt_scheme(adapter, true)) { - dev_err(&pdev->dev, "Unable to allocate memory for queues\n"); + netdev_err(netdev, "Unable to allocate memory for queues\n"); return -ENOMEM; } diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index f99c514ad0f4..0d746f8588c8 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -305,7 +305,6 @@ static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter, struct igc_hw *hw = &adapter->hw; u32 tsync_rx_cfg = 0; bool is_l4 = false; - bool is_l2 = false; u32 regval; /* reserved for future extensions */ @@ -346,7 +345,6 @@ static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter, case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: tsync_rx_ctl |= IGC_TSYNCRXCTL_TYPE_EVENT_V2; config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; - is_l2 = true; is_l4 = true; break; case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: @@ -370,7 +368,6 @@ static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter, tsync_rx_ctl |= IGC_TSYNCRXCTL_TYPE_ALL; tsync_rx_ctl |= IGC_TSYNCRXCTL_RXSYNSIG; config->rx_filter = HWTSTAMP_FILTER_ALL; - is_l2 = true; is_l4 = true; if (hw->mac.type == igc_i225) { @@ -405,15 +402,6 @@ static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter, /* define which PTP packets are time stamped */ wr32(IGC_TSYNCRXCFG, tsync_rx_cfg); - /* define ethertype filter for timestamped packets */ - if (is_l2) - wr32(IGC_ETQF(3), - (IGC_ETQF_FILTER_ENABLE | /* enable filter */ - IGC_ETQF_1588 | /* enable timestamping */ - ETH_P_1588)); /* 1588 eth protocol type */ - else - wr32(IGC_ETQF(3), 0); - /* L4 Queue Filter[3]: filter by destination port and protocol */ if (is_l4) { u32 ftqf = (IPPROTO_UDP /* UDP */ @@ -466,7 +454,7 @@ void igc_ptp_tx_hang(struct igc_adapter *adapter) * interrupt */ rd32(IGC_TXSTMPH); - dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang\n"); + netdev_warn(adapter->netdev, "Clearing Tx timestamp hang\n"); } } @@ -529,7 +517,7 @@ static void igc_ptp_tx_work(struct work_struct *work) * interrupt */ rd32(IGC_TXSTMPH); - dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang\n"); + netdev_warn(adapter->netdev, "Clearing Tx timestamp hang\n"); return; } @@ -626,10 +614,9 @@ void igc_ptp_init(struct igc_adapter *adapter) &adapter->pdev->dev); if (IS_ERR(adapter->ptp_clock)) { adapter->ptp_clock = NULL; - dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n"); + netdev_err(netdev, "ptp_clock_register failed\n"); } else if (adapter->ptp_clock) { - dev_info(&adapter->pdev->dev, "added PHC on %s\n", - adapter->netdev->name); + netdev_info(netdev, "PHC added\n"); adapter->ptp_flags |= IGC_PTP_ENABLED; } } @@ -666,8 +653,7 @@ void igc_ptp_stop(struct igc_adapter *adapter) if (adapter->ptp_clock) { ptp_clock_unregister(adapter->ptp_clock); - dev_info(&adapter->pdev->dev, "removed PHC on %s\n", - adapter->netdev->name); + netdev_info(adapter->netdev, "PHC removed\n"); adapter->ptp_flags &= ~IGC_PTP_ENABLED; } } diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h index 6093cde2351c..7f999cfc9b39 100644 --- a/drivers/net/ethernet/intel/igc/igc_regs.h +++ b/drivers/net/ethernet/intel/igc/igc_regs.h @@ -17,11 +17,6 @@ /* Internal Packet Buffer Size Registers */ #define IGC_RXPBS 0x02404 /* Rx Packet Buffer Size - RW */ #define IGC_TXPBS 0x03404 /* Tx Packet Buffer Size - RW */ -#define IGC_TDFH 0x03410 /* Tx Data FIFO Head - RW */ -#define IGC_TDFT 0x03418 /* Tx Data FIFO Tail - RW */ -#define IGC_TDFHS 0x03420 /* Tx Data FIFO Head Saved - RW */ -#define IGC_TDFTS 0x03428 /* Tx Data FIFO Tail Saved - RW */ -#define IGC_TDFPC 0x03430 /* Tx Data FIFO Packet Count - RW */ /* NVM Register Descriptions */ #define IGC_EERD 0x12014 /* EEprom mode read - RW */ @@ -35,10 +30,6 @@ #define IGC_FCRTL 0x02160 /* FC Receive Threshold Low - RW */ #define IGC_FCRTH 0x02168 /* FC Receive Threshold High - RW */ #define IGC_FCRTV 0x02460 /* FC Refresh Timer Value - RW */ -#define IGC_FCSTS 0x02464 /* FC Status - RO */ - -/* PCIe Register Description */ -#define IGC_GCR 0x05B00 /* PCIe control- RW */ /* Semaphore registers */ #define IGC_SW_FW_SYNC 0x05B5C /* SW-FW Synchronization - RW */ @@ -49,6 +40,7 @@ #define IGC_FACTPS 0x05B30 /* Interrupt Register Description */ +#define IGC_EICR 0x01580 /* Ext. Interrupt Cause read - W0 */ #define IGC_EICS 0x01520 /* Ext. Interrupt Cause Set - W0 */ #define IGC_EIMS 0x01524 /* Ext. Interrupt Mask Set/Read - RW */ #define IGC_EIMC 0x01528 /* Ext. Interrupt Mask Clear - WO */ @@ -76,13 +68,6 @@ #define IGC_ICRXDMTC 0x04120 /* Rx Descriptor Min Threshold Count */ #define IGC_ICRXOC 0x04124 /* Receiver Overrun Count */ -#define IGC_CBTMPC 0x0402C /* Circuit Breaker TX Packet Count */ -#define IGC_HTDPMC 0x0403C /* Host Transmit Discarded Packets */ -#define IGC_CBRMPC 0x040FC /* Circuit Breaker RX Packet Count */ -#define IGC_RPTHC 0x04104 /* Rx Packets To Host */ -#define IGC_HGPTC 0x04118 /* Host Good Packets TX Count */ -#define IGC_HTCBDPC 0x04124 /* Host TX Circ.Breaker Drop Count */ - /* MSI-X Table Register Descriptions */ #define IGC_PBACL 0x05B68 /* MSIx PBA Clear - R/W 1 to clear */ @@ -119,10 +104,11 @@ #define IGC_RLPML 0x05004 /* Rx Long Packet Max Length */ #define IGC_RFCTL 0x05008 /* Receive Filter Control*/ #define IGC_MTA 0x05200 /* Multicast Table Array - RW Array */ +#define IGC_RA 0x05400 /* Receive Address - RW Array */ #define IGC_UTA 0x0A000 /* Unicast Table Array - RW */ #define IGC_RAL(_n) (0x05400 + ((_n) * 0x08)) #define IGC_RAH(_n) (0x05404 + ((_n) * 0x08)) -#define IGC_VLAPQF 0x055B0 /* VLAN Priority Queue Filter VLAPQF */ +#define IGC_VLANPQF 0x055B0 /* VLAN Priority Queue Filter - RW */ /* Transmit Register Descriptions */ #define IGC_TCTL 0x00400 /* Tx Control - RW */ @@ -138,9 +124,6 @@ #define IGC_MMDAC 13 /* MMD Access Control */ #define IGC_MMDAAD 14 /* MMD Access Address/Data */ -/* Good transmitted packets counter registers */ -#define IGC_PQGPTC(_n) (0x010014 + (0x100 * (_n))) - /* Statistics Register Descriptions */ #define IGC_CRCERRS 0x04000 /* CRC Error Count - R/clr */ #define IGC_ALGNERRC 0x04004 /* Alignment Error Count - R/clr */ @@ -213,7 +196,6 @@ #define IGC_HGOTCL 0x04130 /* Host Good Octets Transmit Count Low */ #define IGC_HGOTCH 0x04134 /* Host Good Octets Transmit Count High */ #define IGC_LENERRS 0x04138 /* Length Errors Count */ -#define IGC_HRMPC 0x0A018 /* Header Redirection Missed Packet Count */ /* Time sync registers */ #define IGC_TSICR 0x0B66C /* Time Sync Interrupt Cause */ @@ -229,8 +211,6 @@ #define IGC_FTQF(_n) (0x059E0 + (4 * (_n))) /* 5-tuple Queue Fltr */ -#define IGC_RXPBS 0x02404 /* Rx Packet Buffer Size - RW */ - /* Transmit Scheduling Registers */ #define IGC_TQAVCTRL 0x3570 #define IGC_TXQCTL(_n) (0x3344 + 0x4 * (_n)) @@ -277,8 +257,7 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg); #define wr32(reg, val) \ do { \ u8 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \ - if (!IGC_REMOVED(hw_addr)) \ - writel((val), &hw_addr[(reg)]); \ + writel((val), &hw_addr[(reg)]); \ } while (0) #define rd32(reg) (igc_rd32(hw, reg)) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 41d2a0eac5fa..37947949345c 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3566,10 +3566,6 @@ static void mvneta_start_dev(struct mvneta_port *pp) MVNETA_CAUSE_LINK_CHANGE); phylink_start(pp->phylink); - - /* We may have called phy_speed_down before */ - phy_speed_up(pp->dev->phydev); - netif_tx_start_all_queues(pp->dev); } @@ -3577,9 +3573,6 @@ static void mvneta_stop_dev(struct mvneta_port *pp) { unsigned int cpu; - if (device_may_wakeup(&pp->dev->dev)) - phy_speed_down(pp->dev->phydev, false); - phylink_stop(pp->phylink); if (!pp->neta_armada3700) { @@ -4052,10 +4045,6 @@ static int mvneta_mdio_probe(struct mvneta_port *pp) phylink_ethtool_get_wol(pp->phylink, &wol); device_set_wakeup_capable(&pp->dev->dev, !!wol.supported); - /* PHY WoL may be enabled but device wakeup disabled */ - if (wol.supported) - device_set_wakeup_enable(&pp->dev->dev, !!wol.wolopts); - return err; } diff --git a/drivers/net/ethernet/mediatek/Kconfig b/drivers/net/ethernet/mediatek/Kconfig index 4968352ba188..500c15e7ea4a 100644 --- a/drivers/net/ethernet/mediatek/Kconfig +++ b/drivers/net/ethernet/mediatek/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only config NET_VENDOR_MEDIATEK - bool "MediaTek ethernet driver" + bool "MediaTek devices" depends on ARCH_MEDIATEK || SOC_MT7621 || SOC_MT7620 ---help--- If you have a Mediatek SoC with ethernet, say Y. @@ -14,4 +14,11 @@ config NET_MEDIATEK_SOC This driver supports the gigabit ethernet MACs in the MediaTek SoC family. +config NET_MEDIATEK_STAR_EMAC + tristate "MediaTek STAR Ethernet MAC support" + select PHYLIB + help + This driver supports the ethernet MAC IP first used on + MediaTek MT85** SoCs. + endif #NET_VENDOR_MEDIATEK diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile index 2d8362f9341b..3a777b4a6cd3 100644 --- a/drivers/net/ethernet/mediatek/Makefile +++ b/drivers/net/ethernet/mediatek/Makefile @@ -3,5 +3,6 @@ # Makefile for the Mediatek SoCs built-in ethernet macs # -obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth.o +obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth.o mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o +obj-$(CONFIG_NET_MEDIATEK_STAR_EMAC) += mtk_star_emac.o diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c new file mode 100644 index 000000000000..789c77af501f --- /dev/null +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c @@ -0,0 +1,1678 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020 MediaTek Corporation + * Copyright (c) 2020 BayLibre SAS + * + * Author: Bartosz Golaszewski <[email protected]> + */ + +#include <linux/bits.h> +#include <linux/clk.h> +#include <linux/compiler.h> +#include <linux/dma-mapping.h> +#include <linux/etherdevice.h> +#include <linux/kernel.h> +#include <linux/mfd/syscon.h> +#include <linux/mii.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/of.h> +#include <linux/of_mdio.h> +#include <linux/of_net.h> +#include <linux/platform_device.h> +#include <linux/pm.h> +#include <linux/regmap.h> +#include <linux/skbuff.h> +#include <linux/spinlock.h> +#include <linux/workqueue.h> + +#define MTK_STAR_DRVNAME "mtk_star_emac" + +#define MTK_STAR_WAIT_TIMEOUT 300 +#define MTK_STAR_MAX_FRAME_SIZE 1514 +#define MTK_STAR_SKB_ALIGNMENT 16 +#define MTK_STAR_NAPI_WEIGHT 64 +#define MTK_STAR_HASHTABLE_MC_LIMIT 256 +#define MTK_STAR_HASHTABLE_SIZE_MAX 512 + +/* Normally we'd use NET_IP_ALIGN but on arm64 its value is 0 and it doesn't + * work for this controller. + */ +#define MTK_STAR_IP_ALIGN 2 + +static const char *const mtk_star_clk_names[] = { "core", "reg", "trans" }; +#define MTK_STAR_NCLKS ARRAY_SIZE(mtk_star_clk_names) + +/* PHY Control Register 0 */ +#define MTK_STAR_REG_PHY_CTRL0 0x0000 +#define MTK_STAR_BIT_PHY_CTRL0_WTCMD BIT(13) +#define MTK_STAR_BIT_PHY_CTRL0_RDCMD BIT(14) +#define MTK_STAR_BIT_PHY_CTRL0_RWOK BIT(15) +#define MTK_STAR_MSK_PHY_CTRL0_PREG GENMASK(12, 8) +#define MTK_STAR_OFF_PHY_CTRL0_PREG 8 +#define MTK_STAR_MSK_PHY_CTRL0_RWDATA GENMASK(31, 16) +#define MTK_STAR_OFF_PHY_CTRL0_RWDATA 16 + +/* PHY Control Register 1 */ +#define MTK_STAR_REG_PHY_CTRL1 0x0004 +#define MTK_STAR_BIT_PHY_CTRL1_LINK_ST BIT(0) +#define MTK_STAR_BIT_PHY_CTRL1_AN_EN BIT(8) +#define MTK_STAR_OFF_PHY_CTRL1_FORCE_SPD 9 +#define MTK_STAR_VAL_PHY_CTRL1_FORCE_SPD_10M 0x00 +#define MTK_STAR_VAL_PHY_CTRL1_FORCE_SPD_100M 0x01 +#define MTK_STAR_VAL_PHY_CTRL1_FORCE_SPD_1000M 0x02 +#define MTK_STAR_BIT_PHY_CTRL1_FORCE_DPX BIT(11) +#define MTK_STAR_BIT_PHY_CTRL1_FORCE_FC_RX BIT(12) +#define MTK_STAR_BIT_PHY_CTRL1_FORCE_FC_TX BIT(13) + +/* MAC Configuration Register */ +#define MTK_STAR_REG_MAC_CFG 0x0008 +#define MTK_STAR_OFF_MAC_CFG_IPG 10 +#define MTK_STAR_VAL_MAC_CFG_IPG_96BIT GENMASK(4, 0) +#define MTK_STAR_BIT_MAC_CFG_MAXLEN_1522 BIT(16) +#define MTK_STAR_BIT_MAC_CFG_AUTO_PAD BIT(19) +#define MTK_STAR_BIT_MAC_CFG_CRC_STRIP BIT(20) +#define MTK_STAR_BIT_MAC_CFG_VLAN_STRIP BIT(22) +#define MTK_STAR_BIT_MAC_CFG_NIC_PD BIT(31) + +/* Flow-Control Configuration Register */ +#define MTK_STAR_REG_FC_CFG 0x000c +#define MTK_STAR_BIT_FC_CFG_BP_EN BIT(7) +#define MTK_STAR_BIT_FC_CFG_UC_PAUSE_DIR BIT(8) +#define MTK_STAR_OFF_FC_CFG_SEND_PAUSE_TH 16 +#define MTK_STAR_MSK_FC_CFG_SEND_PAUSE_TH GENMASK(27, 16) +#define MTK_STAR_VAL_FC_CFG_SEND_PAUSE_TH_2K 0x800 + +/* ARL Configuration Register */ +#define MTK_STAR_REG_ARL_CFG 0x0010 +#define MTK_STAR_BIT_ARL_CFG_HASH_ALG BIT(0) +#define MTK_STAR_BIT_ARL_CFG_MISC_MODE BIT(4) + +/* MAC High and Low Bytes Registers */ +#define MTK_STAR_REG_MY_MAC_H 0x0014 +#define MTK_STAR_REG_MY_MAC_L 0x0018 + +/* Hash Table Control Register */ +#define MTK_STAR_REG_HASH_CTRL 0x001c +#define MTK_STAR_MSK_HASH_CTRL_HASH_BIT_ADDR GENMASK(8, 0) +#define MTK_STAR_BIT_HASH_CTRL_HASH_BIT_DATA BIT(12) +#define MTK_STAR_BIT_HASH_CTRL_ACC_CMD BIT(13) +#define MTK_STAR_BIT_HASH_CTRL_CMD_START BIT(14) +#define MTK_STAR_BIT_HASH_CTRL_BIST_OK BIT(16) +#define MTK_STAR_BIT_HASH_CTRL_BIST_DONE BIT(17) +#define MTK_STAR_BIT_HASH_CTRL_BIST_EN BIT(31) + +/* TX DMA Control Register */ +#define MTK_STAR_REG_TX_DMA_CTRL 0x0034 +#define MTK_STAR_BIT_TX_DMA_CTRL_START BIT(0) +#define MTK_STAR_BIT_TX_DMA_CTRL_STOP BIT(1) +#define MTK_STAR_BIT_TX_DMA_CTRL_RESUME BIT(2) + +/* RX DMA Control Register */ +#define MTK_STAR_REG_RX_DMA_CTRL 0x0038 +#define MTK_STAR_BIT_RX_DMA_CTRL_START BIT(0) +#define MTK_STAR_BIT_RX_DMA_CTRL_STOP BIT(1) +#define MTK_STAR_BIT_RX_DMA_CTRL_RESUME BIT(2) + +/* DMA Address Registers */ +#define MTK_STAR_REG_TX_DPTR 0x003c +#define MTK_STAR_REG_RX_DPTR 0x0040 +#define MTK_STAR_REG_TX_BASE_ADDR 0x0044 +#define MTK_STAR_REG_RX_BASE_ADDR 0x0048 + +/* Interrupt Status Register */ +#define MTK_STAR_REG_INT_STS 0x0050 +#define MTK_STAR_REG_INT_STS_PORT_STS_CHG BIT(2) +#define MTK_STAR_REG_INT_STS_MIB_CNT_TH BIT(3) +#define MTK_STAR_BIT_INT_STS_FNRC BIT(6) +#define MTK_STAR_BIT_INT_STS_TNTC BIT(8) + +/* Interrupt Mask Register */ +#define MTK_STAR_REG_INT_MASK 0x0054 +#define MTK_STAR_BIT_INT_MASK_FNRC BIT(6) + +/* Misc. Config Register */ +#define MTK_STAR_REG_TEST1 0x005c +#define MTK_STAR_BIT_TEST1_RST_HASH_MBIST BIT(31) + +/* Extended Configuration Register */ +#define MTK_STAR_REG_EXT_CFG 0x0060 +#define MTK_STAR_OFF_EXT_CFG_SND_PAUSE_RLS 16 +#define MTK_STAR_MSK_EXT_CFG_SND_PAUSE_RLS GENMASK(26, 16) +#define MTK_STAR_VAL_EXT_CFG_SND_PAUSE_RLS_1K 0x400 + +/* EthSys Configuration Register */ +#define MTK_STAR_REG_SYS_CONF 0x0094 +#define MTK_STAR_BIT_MII_PAD_OUT_ENABLE BIT(0) +#define MTK_STAR_BIT_EXT_MDC_MODE BIT(1) +#define MTK_STAR_BIT_SWC_MII_MODE BIT(2) + +/* MAC Clock Configuration Register */ +#define MTK_STAR_REG_MAC_CLK_CONF 0x00ac +#define MTK_STAR_MSK_MAC_CLK_CONF GENMASK(7, 0) +#define MTK_STAR_BIT_CLK_DIV_10 0x0a + +/* Counter registers. */ +#define MTK_STAR_REG_C_RXOKPKT 0x0100 +#define MTK_STAR_REG_C_RXOKBYTE 0x0104 +#define MTK_STAR_REG_C_RXRUNT 0x0108 +#define MTK_STAR_REG_C_RXLONG 0x010c +#define MTK_STAR_REG_C_RXDROP 0x0110 +#define MTK_STAR_REG_C_RXCRC 0x0114 +#define MTK_STAR_REG_C_RXARLDROP 0x0118 +#define MTK_STAR_REG_C_RXVLANDROP 0x011c +#define MTK_STAR_REG_C_RXCSERR 0x0120 +#define MTK_STAR_REG_C_RXPAUSE 0x0124 +#define MTK_STAR_REG_C_TXOKPKT 0x0128 +#define MTK_STAR_REG_C_TXOKBYTE 0x012c +#define MTK_STAR_REG_C_TXPAUSECOL 0x0130 +#define MTK_STAR_REG_C_TXRTY 0x0134 +#define MTK_STAR_REG_C_TXSKIP 0x0138 +#define MTK_STAR_REG_C_TX_ARP 0x013c +#define MTK_STAR_REG_C_RX_RERR 0x01d8 +#define MTK_STAR_REG_C_RX_UNI 0x01dc +#define MTK_STAR_REG_C_RX_MULTI 0x01e0 +#define MTK_STAR_REG_C_RX_BROAD 0x01e4 +#define MTK_STAR_REG_C_RX_ALIGNERR 0x01e8 +#define MTK_STAR_REG_C_TX_UNI 0x01ec +#define MTK_STAR_REG_C_TX_MULTI 0x01f0 +#define MTK_STAR_REG_C_TX_BROAD 0x01f4 +#define MTK_STAR_REG_C_TX_TIMEOUT 0x01f8 +#define MTK_STAR_REG_C_TX_LATECOL 0x01fc +#define MTK_STAR_REG_C_RX_LENGTHERR 0x0214 +#define MTK_STAR_REG_C_RX_TWIST 0x0218 + +/* Ethernet CFG Control */ +#define MTK_PERICFG_REG_NIC_CFG_CON 0x03c4 +#define MTK_PERICFG_MSK_NIC_CFG_CON_CFG_MII GENMASK(3, 0) +#define MTK_PERICFG_BIT_NIC_CFG_CON_RMII BIT(0) + +/* Represents the actual structure of descriptors used by the MAC. We can + * reuse the same structure for both TX and RX - the layout is the same, only + * the flags differ slightly. + */ +struct mtk_star_ring_desc { + /* Contains both the status flags as well as packet length. */ + u32 status; + u32 data_ptr; + u32 vtag; + u32 reserved; +}; + +#define MTK_STAR_DESC_MSK_LEN GENMASK(15, 0) +#define MTK_STAR_DESC_BIT_RX_CRCE BIT(24) +#define MTK_STAR_DESC_BIT_RX_OSIZE BIT(25) +#define MTK_STAR_DESC_BIT_INT BIT(27) +#define MTK_STAR_DESC_BIT_LS BIT(28) +#define MTK_STAR_DESC_BIT_FS BIT(29) +#define MTK_STAR_DESC_BIT_EOR BIT(30) +#define MTK_STAR_DESC_BIT_COWN BIT(31) + +/* Helper structure for storing data read from/written to descriptors in order + * to limit reads from/writes to DMA memory. + */ +struct mtk_star_ring_desc_data { + unsigned int len; + unsigned int flags; + dma_addr_t dma_addr; + struct sk_buff *skb; +}; + +#define MTK_STAR_RING_NUM_DESCS 128 +#define MTK_STAR_NUM_TX_DESCS MTK_STAR_RING_NUM_DESCS +#define MTK_STAR_NUM_RX_DESCS MTK_STAR_RING_NUM_DESCS +#define MTK_STAR_NUM_DESCS_TOTAL (MTK_STAR_RING_NUM_DESCS * 2) +#define MTK_STAR_DMA_SIZE \ + (MTK_STAR_NUM_DESCS_TOTAL * sizeof(struct mtk_star_ring_desc)) + +struct mtk_star_ring { + struct mtk_star_ring_desc *descs; + struct sk_buff *skbs[MTK_STAR_RING_NUM_DESCS]; + dma_addr_t dma_addrs[MTK_STAR_RING_NUM_DESCS]; + unsigned int head; + unsigned int tail; +}; + +struct mtk_star_priv { + struct net_device *ndev; + + struct regmap *regs; + struct regmap *pericfg; + + struct clk_bulk_data clks[MTK_STAR_NCLKS]; + + void *ring_base; + struct mtk_star_ring_desc *descs_base; + dma_addr_t dma_addr; + struct mtk_star_ring tx_ring; + struct mtk_star_ring rx_ring; + + struct mii_bus *mii; + struct napi_struct napi; + + struct device_node *phy_node; + phy_interface_t phy_intf; + struct phy_device *phydev; + unsigned int link; + int speed; + int duplex; + int pause; + + /* Protects against concurrent descriptor access. */ + spinlock_t lock; + + struct rtnl_link_stats64 stats; + struct work_struct stats_work; +}; + +static struct device *mtk_star_get_dev(struct mtk_star_priv *priv) +{ + return priv->ndev->dev.parent; +} + +static const struct regmap_config mtk_star_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, + .disable_locking = true, +}; + +static void mtk_star_ring_init(struct mtk_star_ring *ring, + struct mtk_star_ring_desc *descs) +{ + memset(ring, 0, sizeof(*ring)); + ring->descs = descs; + ring->head = 0; + ring->tail = 0; +} + +static int mtk_star_ring_pop_tail(struct mtk_star_ring *ring, + struct mtk_star_ring_desc_data *desc_data) +{ + struct mtk_star_ring_desc *desc = &ring->descs[ring->tail]; + unsigned int status; + + status = READ_ONCE(desc->status); + dma_rmb(); /* Make sure we read the status bits before checking it. */ + + if (!(status & MTK_STAR_DESC_BIT_COWN)) + return -1; + + desc_data->len = status & MTK_STAR_DESC_MSK_LEN; + desc_data->flags = status & ~MTK_STAR_DESC_MSK_LEN; + desc_data->dma_addr = ring->dma_addrs[ring->tail]; + desc_data->skb = ring->skbs[ring->tail]; + + ring->dma_addrs[ring->tail] = 0; + ring->skbs[ring->tail] = NULL; + + status &= MTK_STAR_DESC_BIT_COWN | MTK_STAR_DESC_BIT_EOR; + + WRITE_ONCE(desc->data_ptr, 0); + WRITE_ONCE(desc->status, status); + + ring->tail = (ring->tail + 1) % MTK_STAR_RING_NUM_DESCS; + + return 0; +} + +static void mtk_star_ring_push_head(struct mtk_star_ring *ring, + struct mtk_star_ring_desc_data *desc_data, + unsigned int flags) +{ + struct mtk_star_ring_desc *desc = &ring->descs[ring->head]; + unsigned int status; + + status = READ_ONCE(desc->status); + + ring->skbs[ring->head] = desc_data->skb; + ring->dma_addrs[ring->head] = desc_data->dma_addr; + + status |= desc_data->len; + if (flags) + status |= flags; + + WRITE_ONCE(desc->data_ptr, desc_data->dma_addr); + WRITE_ONCE(desc->status, status); + status &= ~MTK_STAR_DESC_BIT_COWN; + /* Flush previous modifications before ownership change. */ + dma_wmb(); + WRITE_ONCE(desc->status, status); + + ring->head = (ring->head + 1) % MTK_STAR_RING_NUM_DESCS; +} + +static void +mtk_star_ring_push_head_rx(struct mtk_star_ring *ring, + struct mtk_star_ring_desc_data *desc_data) +{ + mtk_star_ring_push_head(ring, desc_data, 0); +} + +static void +mtk_star_ring_push_head_tx(struct mtk_star_ring *ring, + struct mtk_star_ring_desc_data *desc_data) +{ + static const unsigned int flags = MTK_STAR_DESC_BIT_FS | + MTK_STAR_DESC_BIT_LS | + MTK_STAR_DESC_BIT_INT; + + mtk_star_ring_push_head(ring, desc_data, flags); +} + +static unsigned int mtk_star_ring_num_used_descs(struct mtk_star_ring *ring) +{ + return abs(ring->head - ring->tail); +} + +static bool mtk_star_ring_full(struct mtk_star_ring *ring) +{ + return mtk_star_ring_num_used_descs(ring) == MTK_STAR_RING_NUM_DESCS; +} + +static bool mtk_star_ring_descs_available(struct mtk_star_ring *ring) +{ + return mtk_star_ring_num_used_descs(ring) > 0; +} + +static dma_addr_t mtk_star_dma_map_rx(struct mtk_star_priv *priv, + struct sk_buff *skb) +{ + struct device *dev = mtk_star_get_dev(priv); + + /* Data pointer for the RX DMA descriptor must be aligned to 4N + 2. */ + return dma_map_single(dev, skb_tail_pointer(skb) - 2, + skb_tailroom(skb), DMA_FROM_DEVICE); +} + +static void mtk_star_dma_unmap_rx(struct mtk_star_priv *priv, + struct mtk_star_ring_desc_data *desc_data) +{ + struct device *dev = mtk_star_get_dev(priv); + + dma_unmap_single(dev, desc_data->dma_addr, + skb_tailroom(desc_data->skb), DMA_FROM_DEVICE); +} + +static dma_addr_t mtk_star_dma_map_tx(struct mtk_star_priv *priv, + struct sk_buff *skb) +{ + struct device *dev = mtk_star_get_dev(priv); + + return dma_map_single(dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); +} + +static void mtk_star_dma_unmap_tx(struct mtk_star_priv *priv, + struct mtk_star_ring_desc_data *desc_data) +{ + struct device *dev = mtk_star_get_dev(priv); + + return dma_unmap_single(dev, desc_data->dma_addr, + skb_headlen(desc_data->skb), DMA_TO_DEVICE); +} + +static void mtk_star_nic_disable_pd(struct mtk_star_priv *priv) +{ + regmap_update_bits(priv->regs, MTK_STAR_REG_MAC_CFG, + MTK_STAR_BIT_MAC_CFG_NIC_PD, 0); +} + +/* Unmask the three interrupts we care about, mask all others. */ +static void mtk_star_intr_enable(struct mtk_star_priv *priv) +{ + unsigned int val = MTK_STAR_BIT_INT_STS_TNTC | + MTK_STAR_BIT_INT_STS_FNRC | + MTK_STAR_REG_INT_STS_MIB_CNT_TH; + + regmap_write(priv->regs, MTK_STAR_REG_INT_MASK, ~val); +} + +static void mtk_star_intr_disable(struct mtk_star_priv *priv) +{ + regmap_write(priv->regs, MTK_STAR_REG_INT_MASK, ~0); +} + +static void mtk_star_intr_enable_tx(struct mtk_star_priv *priv) +{ + regmap_update_bits(priv->regs, MTK_STAR_REG_INT_MASK, + MTK_STAR_BIT_INT_STS_TNTC, 0); +} + +static void mtk_star_intr_enable_rx(struct mtk_star_priv *priv) +{ + regmap_update_bits(priv->regs, MTK_STAR_REG_INT_MASK, + MTK_STAR_BIT_INT_STS_FNRC, 0); +} + +static void mtk_star_intr_enable_stats(struct mtk_star_priv *priv) +{ + regmap_update_bits(priv->regs, MTK_STAR_REG_INT_MASK, + MTK_STAR_REG_INT_STS_MIB_CNT_TH, 0); +} + +static void mtk_star_intr_disable_tx(struct mtk_star_priv *priv) +{ + regmap_update_bits(priv->regs, MTK_STAR_REG_INT_MASK, + MTK_STAR_BIT_INT_STS_TNTC, + MTK_STAR_BIT_INT_STS_TNTC); +} + +static void mtk_star_intr_disable_rx(struct mtk_star_priv *priv) +{ + regmap_update_bits(priv->regs, MTK_STAR_REG_INT_MASK, + MTK_STAR_BIT_INT_STS_FNRC, + MTK_STAR_BIT_INT_STS_FNRC); +} + +static void mtk_star_intr_disable_stats(struct mtk_star_priv *priv) +{ + regmap_update_bits(priv->regs, MTK_STAR_REG_INT_MASK, + MTK_STAR_REG_INT_STS_MIB_CNT_TH, + MTK_STAR_REG_INT_STS_MIB_CNT_TH); +} + +static unsigned int mtk_star_intr_read(struct mtk_star_priv *priv) +{ + unsigned int val; + + regmap_read(priv->regs, MTK_STAR_REG_INT_STS, &val); + + return val; +} + +static unsigned int mtk_star_intr_ack_all(struct mtk_star_priv *priv) +{ + unsigned int val; + + val = mtk_star_intr_read(priv); + regmap_write(priv->regs, MTK_STAR_REG_INT_STS, val); + + return val; +} + +static void mtk_star_dma_init(struct mtk_star_priv *priv) +{ + struct mtk_star_ring_desc *desc; + unsigned int val; + int i; + + priv->descs_base = (struct mtk_star_ring_desc *)priv->ring_base; + + for (i = 0; i < MTK_STAR_NUM_DESCS_TOTAL; i++) { + desc = &priv->descs_base[i]; + + memset(desc, 0, sizeof(*desc)); + desc->status = MTK_STAR_DESC_BIT_COWN; + if ((i == MTK_STAR_NUM_TX_DESCS - 1) || + (i == MTK_STAR_NUM_DESCS_TOTAL - 1)) + desc->status |= MTK_STAR_DESC_BIT_EOR; + } + + mtk_star_ring_init(&priv->tx_ring, priv->descs_base); + mtk_star_ring_init(&priv->rx_ring, + priv->descs_base + MTK_STAR_NUM_TX_DESCS); + + /* Set DMA pointers. */ + val = (unsigned int)priv->dma_addr; + regmap_write(priv->regs, MTK_STAR_REG_TX_BASE_ADDR, val); + regmap_write(priv->regs, MTK_STAR_REG_TX_DPTR, val); + + val += sizeof(struct mtk_star_ring_desc) * MTK_STAR_NUM_TX_DESCS; + regmap_write(priv->regs, MTK_STAR_REG_RX_BASE_ADDR, val); + regmap_write(priv->regs, MTK_STAR_REG_RX_DPTR, val); +} + +static void mtk_star_dma_start(struct mtk_star_priv *priv) +{ + regmap_update_bits(priv->regs, MTK_STAR_REG_TX_DMA_CTRL, + MTK_STAR_BIT_TX_DMA_CTRL_START, + MTK_STAR_BIT_TX_DMA_CTRL_START); + regmap_update_bits(priv->regs, MTK_STAR_REG_RX_DMA_CTRL, + MTK_STAR_BIT_RX_DMA_CTRL_START, + MTK_STAR_BIT_RX_DMA_CTRL_START); +} + +static void mtk_star_dma_stop(struct mtk_star_priv *priv) +{ + regmap_write(priv->regs, MTK_STAR_REG_TX_DMA_CTRL, + MTK_STAR_BIT_TX_DMA_CTRL_STOP); + regmap_write(priv->regs, MTK_STAR_REG_RX_DMA_CTRL, + MTK_STAR_BIT_RX_DMA_CTRL_STOP); +} + +static void mtk_star_dma_disable(struct mtk_star_priv *priv) +{ + int i; + + mtk_star_dma_stop(priv); + + /* Take back all descriptors. */ + for (i = 0; i < MTK_STAR_NUM_DESCS_TOTAL; i++) + priv->descs_base[i].status |= MTK_STAR_DESC_BIT_COWN; +} + +static void mtk_star_dma_resume_rx(struct mtk_star_priv *priv) +{ + regmap_update_bits(priv->regs, MTK_STAR_REG_RX_DMA_CTRL, + MTK_STAR_BIT_RX_DMA_CTRL_RESUME, + MTK_STAR_BIT_RX_DMA_CTRL_RESUME); +} + +static void mtk_star_dma_resume_tx(struct mtk_star_priv *priv) +{ + regmap_update_bits(priv->regs, MTK_STAR_REG_TX_DMA_CTRL, + MTK_STAR_BIT_TX_DMA_CTRL_RESUME, + MTK_STAR_BIT_TX_DMA_CTRL_RESUME); +} + +static void mtk_star_set_mac_addr(struct net_device *ndev) +{ + struct mtk_star_priv *priv = netdev_priv(ndev); + u8 *mac_addr = ndev->dev_addr; + unsigned int high, low; + + high = mac_addr[0] << 8 | mac_addr[1] << 0; + low = mac_addr[2] << 24 | mac_addr[3] << 16 | + mac_addr[4] << 8 | mac_addr[5]; + + regmap_write(priv->regs, MTK_STAR_REG_MY_MAC_H, high); + regmap_write(priv->regs, MTK_STAR_REG_MY_MAC_L, low); +} + +static void mtk_star_reset_counters(struct mtk_star_priv *priv) +{ + static const unsigned int counter_regs[] = { + MTK_STAR_REG_C_RXOKPKT, + MTK_STAR_REG_C_RXOKBYTE, + MTK_STAR_REG_C_RXRUNT, + MTK_STAR_REG_C_RXLONG, + MTK_STAR_REG_C_RXDROP, + MTK_STAR_REG_C_RXCRC, + MTK_STAR_REG_C_RXARLDROP, + MTK_STAR_REG_C_RXVLANDROP, + MTK_STAR_REG_C_RXCSERR, + MTK_STAR_REG_C_RXPAUSE, + MTK_STAR_REG_C_TXOKPKT, + MTK_STAR_REG_C_TXOKBYTE, + MTK_STAR_REG_C_TXPAUSECOL, + MTK_STAR_REG_C_TXRTY, + MTK_STAR_REG_C_TXSKIP, + MTK_STAR_REG_C_TX_ARP, + MTK_STAR_REG_C_RX_RERR, + MTK_STAR_REG_C_RX_UNI, + MTK_STAR_REG_C_RX_MULTI, + MTK_STAR_REG_C_RX_BROAD, + MTK_STAR_REG_C_RX_ALIGNERR, + MTK_STAR_REG_C_TX_UNI, + MTK_STAR_REG_C_TX_MULTI, + MTK_STAR_REG_C_TX_BROAD, + MTK_STAR_REG_C_TX_TIMEOUT, + MTK_STAR_REG_C_TX_LATECOL, + MTK_STAR_REG_C_RX_LENGTHERR, + MTK_STAR_REG_C_RX_TWIST, + }; + + unsigned int i, val; + + for (i = 0; i < ARRAY_SIZE(counter_regs); i++) + regmap_read(priv->regs, counter_regs[i], &val); +} + +static void mtk_star_update_stat(struct mtk_star_priv *priv, + unsigned int reg, u64 *stat) +{ + unsigned int val; + + regmap_read(priv->regs, reg, &val); + *stat += val; +} + +/* Try to get as many stats as possible from the internal registers instead + * of tracking them ourselves. + */ +static void mtk_star_update_stats(struct mtk_star_priv *priv) +{ + struct rtnl_link_stats64 *stats = &priv->stats; + + /* OK packets and bytes. */ + mtk_star_update_stat(priv, MTK_STAR_REG_C_RXOKPKT, &stats->rx_packets); + mtk_star_update_stat(priv, MTK_STAR_REG_C_TXOKPKT, &stats->tx_packets); + mtk_star_update_stat(priv, MTK_STAR_REG_C_RXOKBYTE, &stats->rx_bytes); + mtk_star_update_stat(priv, MTK_STAR_REG_C_TXOKBYTE, &stats->tx_bytes); + + /* RX & TX multicast. */ + mtk_star_update_stat(priv, MTK_STAR_REG_C_RX_MULTI, &stats->multicast); + mtk_star_update_stat(priv, MTK_STAR_REG_C_TX_MULTI, &stats->multicast); + + /* Collisions. */ + mtk_star_update_stat(priv, MTK_STAR_REG_C_TXPAUSECOL, + &stats->collisions); + mtk_star_update_stat(priv, MTK_STAR_REG_C_TX_LATECOL, + &stats->collisions); + mtk_star_update_stat(priv, MTK_STAR_REG_C_RXRUNT, &stats->collisions); + + /* RX Errors. */ + mtk_star_update_stat(priv, MTK_STAR_REG_C_RX_LENGTHERR, + &stats->rx_length_errors); + mtk_star_update_stat(priv, MTK_STAR_REG_C_RXLONG, + &stats->rx_over_errors); + mtk_star_update_stat(priv, MTK_STAR_REG_C_RXCRC, &stats->rx_crc_errors); + mtk_star_update_stat(priv, MTK_STAR_REG_C_RX_ALIGNERR, + &stats->rx_frame_errors); + mtk_star_update_stat(priv, MTK_STAR_REG_C_RXDROP, + &stats->rx_fifo_errors); + /* Sum of the general RX error counter + all of the above. */ + mtk_star_update_stat(priv, MTK_STAR_REG_C_RX_RERR, &stats->rx_errors); + stats->rx_errors += stats->rx_length_errors; + stats->rx_errors += stats->rx_over_errors; + stats->rx_errors += stats->rx_crc_errors; + stats->rx_errors += stats->rx_frame_errors; + stats->rx_errors += stats->rx_fifo_errors; +} + +/* This runs in process context and parallel TX and RX paths executing in + * napi context may result in losing some stats data but this should happen + * seldom enough to be acceptable. + */ +static void mtk_star_update_stats_work(struct work_struct *work) +{ + struct mtk_star_priv *priv = container_of(work, struct mtk_star_priv, + stats_work); + + mtk_star_update_stats(priv); + mtk_star_reset_counters(priv); + mtk_star_intr_enable_stats(priv); +} + +static struct sk_buff *mtk_star_alloc_skb(struct net_device *ndev) +{ + uintptr_t tail, offset; + struct sk_buff *skb; + + skb = dev_alloc_skb(MTK_STAR_MAX_FRAME_SIZE); + if (!skb) + return NULL; + + /* Align to 16 bytes. */ + tail = (uintptr_t)skb_tail_pointer(skb); + if (tail & (MTK_STAR_SKB_ALIGNMENT - 1)) { + offset = tail & (MTK_STAR_SKB_ALIGNMENT - 1); + skb_reserve(skb, MTK_STAR_SKB_ALIGNMENT - offset); + } + + /* Ensure 16-byte alignment of the skb pointer: eth_type_trans() will + * extract the Ethernet header (14 bytes) so we need two more bytes. + */ + skb_reserve(skb, MTK_STAR_IP_ALIGN); + + return skb; +} + +static int mtk_star_prepare_rx_skbs(struct net_device *ndev) +{ + struct mtk_star_priv *priv = netdev_priv(ndev); + struct mtk_star_ring *ring = &priv->rx_ring; + struct device *dev = mtk_star_get_dev(priv); + struct mtk_star_ring_desc *desc; + struct sk_buff *skb; + dma_addr_t dma_addr; + int i; + + for (i = 0; i < MTK_STAR_NUM_RX_DESCS; i++) { + skb = mtk_star_alloc_skb(ndev); + if (!skb) + return -ENOMEM; + + dma_addr = mtk_star_dma_map_rx(priv, skb); + if (dma_mapping_error(dev, dma_addr)) { + dev_kfree_skb(skb); + return -ENOMEM; + } + + desc = &ring->descs[i]; + desc->data_ptr = dma_addr; + desc->status |= skb_tailroom(skb) & MTK_STAR_DESC_MSK_LEN; + desc->status &= ~MTK_STAR_DESC_BIT_COWN; + ring->skbs[i] = skb; + ring->dma_addrs[i] = dma_addr; + } + + return 0; +} + +static void +mtk_star_ring_free_skbs(struct mtk_star_priv *priv, struct mtk_star_ring *ring, + void (*unmap_func)(struct mtk_star_priv *, + struct mtk_star_ring_desc_data *)) +{ + struct mtk_star_ring_desc_data desc_data; + struct mtk_star_ring_desc *desc; + int i; + + for (i = 0; i < MTK_STAR_RING_NUM_DESCS; i++) { + if (!ring->dma_addrs[i]) + continue; + + desc = &ring->descs[i]; + + desc_data.dma_addr = ring->dma_addrs[i]; + desc_data.skb = ring->skbs[i]; + + unmap_func(priv, &desc_data); + dev_kfree_skb(desc_data.skb); + } +} + +static void mtk_star_free_rx_skbs(struct mtk_star_priv *priv) +{ + struct mtk_star_ring *ring = &priv->rx_ring; + + mtk_star_ring_free_skbs(priv, ring, mtk_star_dma_unmap_rx); +} + +static void mtk_star_free_tx_skbs(struct mtk_star_priv *priv) +{ + struct mtk_star_ring *ring = &priv->tx_ring; + + mtk_star_ring_free_skbs(priv, ring, mtk_star_dma_unmap_tx); +} + +/* All processing for TX and RX happens in the napi poll callback. */ +static irqreturn_t mtk_star_handle_irq(int irq, void *data) +{ + struct mtk_star_priv *priv; + struct net_device *ndev; + bool need_napi = false; + unsigned int status; + + ndev = data; + priv = netdev_priv(ndev); + + if (netif_running(ndev)) { + status = mtk_star_intr_read(priv); + + if (status & MTK_STAR_BIT_INT_STS_TNTC) { + mtk_star_intr_disable_tx(priv); + need_napi = true; + } + + if (status & MTK_STAR_BIT_INT_STS_FNRC) { + mtk_star_intr_disable_rx(priv); + need_napi = true; + } + + if (need_napi) + napi_schedule(&priv->napi); + + /* One of the counters reached 0x8000000 - update stats and + * reset all counters. + */ + if (unlikely(status & MTK_STAR_REG_INT_STS_MIB_CNT_TH)) { + mtk_star_intr_disable_stats(priv); + schedule_work(&priv->stats_work); + } + + mtk_star_intr_ack_all(priv); + } + + return IRQ_HANDLED; +} + +/* Wait for the completion of any previous command - CMD_START bit must be + * cleared by hardware. + */ +static int mtk_star_hash_wait_cmd_start(struct mtk_star_priv *priv) +{ + unsigned int val; + + return regmap_read_poll_timeout_atomic(priv->regs, + MTK_STAR_REG_HASH_CTRL, val, + !(val & MTK_STAR_BIT_HASH_CTRL_CMD_START), + 10, MTK_STAR_WAIT_TIMEOUT); +} + +static int mtk_star_hash_wait_ok(struct mtk_star_priv *priv) +{ + unsigned int val; + int ret; + + /* Wait for BIST_DONE bit. */ + ret = regmap_read_poll_timeout_atomic(priv->regs, + MTK_STAR_REG_HASH_CTRL, val, + val & MTK_STAR_BIT_HASH_CTRL_BIST_DONE, + 10, MTK_STAR_WAIT_TIMEOUT); + if (ret) + return ret; + + /* Check the BIST_OK bit. */ + regmap_read(priv->regs, MTK_STAR_REG_HASH_CTRL, &val); + if (!(val & MTK_STAR_BIT_HASH_CTRL_BIST_OK)) + return -EIO; + + return 0; +} + +static int mtk_star_set_hashbit(struct mtk_star_priv *priv, + unsigned int hash_addr) +{ + unsigned int val; + int ret; + + ret = mtk_star_hash_wait_cmd_start(priv); + if (ret) + return ret; + + val = hash_addr & MTK_STAR_MSK_HASH_CTRL_HASH_BIT_ADDR; + val |= MTK_STAR_BIT_HASH_CTRL_ACC_CMD; + val |= MTK_STAR_BIT_HASH_CTRL_CMD_START; + val |= MTK_STAR_BIT_HASH_CTRL_BIST_EN; + val |= MTK_STAR_BIT_HASH_CTRL_HASH_BIT_DATA; + regmap_write(priv->regs, MTK_STAR_REG_HASH_CTRL, val); + + return mtk_star_hash_wait_ok(priv); +} + +static int mtk_star_reset_hash_table(struct mtk_star_priv *priv) +{ + int ret; + + ret = mtk_star_hash_wait_cmd_start(priv); + if (ret) + return ret; + + regmap_update_bits(priv->regs, MTK_STAR_REG_HASH_CTRL, + MTK_STAR_BIT_HASH_CTRL_BIST_EN, + MTK_STAR_BIT_HASH_CTRL_BIST_EN); + regmap_update_bits(priv->regs, MTK_STAR_REG_TEST1, + MTK_STAR_BIT_TEST1_RST_HASH_MBIST, + MTK_STAR_BIT_TEST1_RST_HASH_MBIST); + + return mtk_star_hash_wait_ok(priv); +} + +static void mtk_star_phy_config(struct mtk_star_priv *priv) +{ + unsigned int val; + + if (priv->speed == SPEED_1000) + val = MTK_STAR_VAL_PHY_CTRL1_FORCE_SPD_1000M; + else if (priv->speed == SPEED_100) + val = MTK_STAR_VAL_PHY_CTRL1_FORCE_SPD_100M; + else + val = MTK_STAR_VAL_PHY_CTRL1_FORCE_SPD_10M; + val <<= MTK_STAR_OFF_PHY_CTRL1_FORCE_SPD; + + val |= MTK_STAR_BIT_PHY_CTRL1_AN_EN; + val |= MTK_STAR_BIT_PHY_CTRL1_FORCE_FC_RX; + val |= MTK_STAR_BIT_PHY_CTRL1_FORCE_FC_TX; + /* Only full-duplex supported for now. */ + val |= MTK_STAR_BIT_PHY_CTRL1_FORCE_DPX; + + regmap_write(priv->regs, MTK_STAR_REG_PHY_CTRL1, val); + + if (priv->pause) { + val = MTK_STAR_VAL_FC_CFG_SEND_PAUSE_TH_2K; + val <<= MTK_STAR_OFF_FC_CFG_SEND_PAUSE_TH; + val |= MTK_STAR_BIT_FC_CFG_UC_PAUSE_DIR; + } else { + val = 0; + } + + regmap_update_bits(priv->regs, MTK_STAR_REG_FC_CFG, + MTK_STAR_MSK_FC_CFG_SEND_PAUSE_TH | + MTK_STAR_BIT_FC_CFG_UC_PAUSE_DIR, val); + + if (priv->pause) { + val = MTK_STAR_VAL_EXT_CFG_SND_PAUSE_RLS_1K; + val <<= MTK_STAR_OFF_EXT_CFG_SND_PAUSE_RLS; + } else { + val = 0; + } + + regmap_update_bits(priv->regs, MTK_STAR_REG_EXT_CFG, + MTK_STAR_MSK_EXT_CFG_SND_PAUSE_RLS, val); +} + +static void mtk_star_adjust_link(struct net_device *ndev) +{ + struct mtk_star_priv *priv = netdev_priv(ndev); + struct phy_device *phydev = priv->phydev; + bool new_state = false; + + if (phydev->link) { + if (!priv->link) { + priv->link = phydev->link; + new_state = true; + } + + if (priv->speed != phydev->speed) { + priv->speed = phydev->speed; + new_state = true; + } + + if (priv->pause != phydev->pause) { + priv->pause = phydev->pause; + new_state = true; + } + } else { + if (priv->link) { + priv->link = phydev->link; + new_state = true; + } + } + + if (new_state) { + if (phydev->link) + mtk_star_phy_config(priv); + + phy_print_status(ndev->phydev); + } +} + +static void mtk_star_init_config(struct mtk_star_priv *priv) +{ + unsigned int val; + + val = (MTK_STAR_BIT_MII_PAD_OUT_ENABLE | + MTK_STAR_BIT_EXT_MDC_MODE | + MTK_STAR_BIT_SWC_MII_MODE); + + regmap_write(priv->regs, MTK_STAR_REG_SYS_CONF, val); + regmap_update_bits(priv->regs, MTK_STAR_REG_MAC_CLK_CONF, + MTK_STAR_MSK_MAC_CLK_CONF, + MTK_STAR_BIT_CLK_DIV_10); +} + +static void mtk_star_set_mode_rmii(struct mtk_star_priv *priv) +{ + regmap_update_bits(priv->pericfg, MTK_PERICFG_REG_NIC_CFG_CON, + MTK_PERICFG_MSK_NIC_CFG_CON_CFG_MII, + MTK_PERICFG_BIT_NIC_CFG_CON_RMII); +} + +static int mtk_star_enable(struct net_device *ndev) +{ + struct mtk_star_priv *priv = netdev_priv(ndev); + unsigned int val; + int ret; + + mtk_star_nic_disable_pd(priv); + mtk_star_intr_disable(priv); + mtk_star_dma_stop(priv); + + mtk_star_set_mac_addr(ndev); + + /* Configure the MAC */ + val = MTK_STAR_VAL_MAC_CFG_IPG_96BIT; + val <<= MTK_STAR_OFF_MAC_CFG_IPG; + val |= MTK_STAR_BIT_MAC_CFG_MAXLEN_1522; + val |= MTK_STAR_BIT_MAC_CFG_AUTO_PAD; + val |= MTK_STAR_BIT_MAC_CFG_CRC_STRIP; + regmap_write(priv->regs, MTK_STAR_REG_MAC_CFG, val); + + /* Enable Hash Table BIST and reset it */ + ret = mtk_star_reset_hash_table(priv); + if (ret) + return ret; + + /* Setup the hashing algorithm */ + regmap_update_bits(priv->regs, MTK_STAR_REG_ARL_CFG, + MTK_STAR_BIT_ARL_CFG_HASH_ALG | + MTK_STAR_BIT_ARL_CFG_MISC_MODE, 0); + + /* Don't strip VLAN tags */ + regmap_update_bits(priv->regs, MTK_STAR_REG_MAC_CFG, + MTK_STAR_BIT_MAC_CFG_VLAN_STRIP, 0); + + /* Setup DMA */ + mtk_star_dma_init(priv); + + ret = mtk_star_prepare_rx_skbs(ndev); + if (ret) + goto err_out; + + /* Request the interrupt */ + ret = request_irq(ndev->irq, mtk_star_handle_irq, + IRQF_TRIGGER_FALLING, ndev->name, ndev); + if (ret) + goto err_free_skbs; + + napi_enable(&priv->napi); + + mtk_star_intr_ack_all(priv); + mtk_star_intr_enable(priv); + + /* Connect to and start PHY */ + priv->phydev = of_phy_connect(ndev, priv->phy_node, + mtk_star_adjust_link, 0, priv->phy_intf); + if (!priv->phydev) { + netdev_err(ndev, "failed to connect to PHY\n"); + goto err_free_irq; + } + + mtk_star_dma_start(priv); + phy_start(priv->phydev); + netif_start_queue(ndev); + + return 0; + +err_free_irq: + free_irq(ndev->irq, ndev); +err_free_skbs: + mtk_star_free_rx_skbs(priv); +err_out: + return ret; +} + +static void mtk_star_disable(struct net_device *ndev) +{ + struct mtk_star_priv *priv = netdev_priv(ndev); + + netif_stop_queue(ndev); + napi_disable(&priv->napi); + mtk_star_intr_disable(priv); + mtk_star_dma_disable(priv); + mtk_star_intr_ack_all(priv); + phy_stop(priv->phydev); + phy_disconnect(priv->phydev); + free_irq(ndev->irq, ndev); + mtk_star_free_rx_skbs(priv); + mtk_star_free_tx_skbs(priv); +} + +static int mtk_star_netdev_open(struct net_device *ndev) +{ + return mtk_star_enable(ndev); +} + +static int mtk_star_netdev_stop(struct net_device *ndev) +{ + mtk_star_disable(ndev); + + return 0; +} + +static int mtk_star_netdev_ioctl(struct net_device *ndev, + struct ifreq *req, int cmd) +{ + if (!netif_running(ndev)) + return -EINVAL; + + return phy_mii_ioctl(ndev->phydev, req, cmd); +} + +static int mtk_star_netdev_start_xmit(struct sk_buff *skb, + struct net_device *ndev) +{ + struct mtk_star_priv *priv = netdev_priv(ndev); + struct mtk_star_ring *ring = &priv->tx_ring; + struct device *dev = mtk_star_get_dev(priv); + struct mtk_star_ring_desc_data desc_data; + + desc_data.dma_addr = mtk_star_dma_map_tx(priv, skb); + if (dma_mapping_error(dev, desc_data.dma_addr)) + goto err_drop_packet; + + desc_data.skb = skb; + desc_data.len = skb->len; + + spin_lock_bh(&priv->lock); + + mtk_star_ring_push_head_tx(ring, &desc_data); + + netdev_sent_queue(ndev, skb->len); + + if (mtk_star_ring_full(ring)) + netif_stop_queue(ndev); + + spin_unlock_bh(&priv->lock); + + mtk_star_dma_resume_tx(priv); + + return NETDEV_TX_OK; + +err_drop_packet: + dev_kfree_skb(skb); + ndev->stats.tx_dropped++; + return NETDEV_TX_BUSY; +} + +/* Returns the number of bytes sent or a negative number on the first + * descriptor owned by DMA. + */ +static int mtk_star_tx_complete_one(struct mtk_star_priv *priv) +{ + struct mtk_star_ring *ring = &priv->tx_ring; + struct mtk_star_ring_desc_data desc_data; + int ret; + + ret = mtk_star_ring_pop_tail(ring, &desc_data); + if (ret) + return ret; + + mtk_star_dma_unmap_tx(priv, &desc_data); + ret = desc_data.skb->len; + dev_kfree_skb_irq(desc_data.skb); + + return ret; +} + +static void mtk_star_tx_complete_all(struct mtk_star_priv *priv) +{ + struct mtk_star_ring *ring = &priv->tx_ring; + struct net_device *ndev = priv->ndev; + int ret, pkts_compl, bytes_compl; + bool wake = false; + + spin_lock(&priv->lock); + + for (pkts_compl = 0, bytes_compl = 0;; + pkts_compl++, bytes_compl += ret, wake = true) { + if (!mtk_star_ring_descs_available(ring)) + break; + + ret = mtk_star_tx_complete_one(priv); + if (ret < 0) + break; + } + + netdev_completed_queue(ndev, pkts_compl, bytes_compl); + + if (wake && netif_queue_stopped(ndev)) + netif_wake_queue(ndev); + + mtk_star_intr_enable_tx(priv); + + spin_unlock(&priv->lock); +} + +static void mtk_star_netdev_get_stats64(struct net_device *ndev, + struct rtnl_link_stats64 *stats) +{ + struct mtk_star_priv *priv = netdev_priv(ndev); + + mtk_star_update_stats(priv); + + memcpy(stats, &priv->stats, sizeof(*stats)); +} + +static void mtk_star_set_rx_mode(struct net_device *ndev) +{ + struct mtk_star_priv *priv = netdev_priv(ndev); + struct netdev_hw_addr *hw_addr; + unsigned int hash_addr, i; + int ret; + + if (ndev->flags & IFF_PROMISC) { + regmap_update_bits(priv->regs, MTK_STAR_REG_ARL_CFG, + MTK_STAR_BIT_ARL_CFG_MISC_MODE, + MTK_STAR_BIT_ARL_CFG_MISC_MODE); + } else if (netdev_mc_count(ndev) > MTK_STAR_HASHTABLE_MC_LIMIT || + ndev->flags & IFF_ALLMULTI) { + for (i = 0; i < MTK_STAR_HASHTABLE_SIZE_MAX; i++) { + ret = mtk_star_set_hashbit(priv, i); + if (ret) + goto hash_fail; + } + } else { + /* Clear previous settings. */ + ret = mtk_star_reset_hash_table(priv); + if (ret) + goto hash_fail; + + netdev_for_each_mc_addr(hw_addr, ndev) { + hash_addr = (hw_addr->addr[0] & 0x01) << 8; + hash_addr += hw_addr->addr[5]; + ret = mtk_star_set_hashbit(priv, hash_addr); + if (ret) + goto hash_fail; + } + } + + return; + +hash_fail: + if (ret == -ETIMEDOUT) + netdev_err(ndev, "setting hash bit timed out\n"); + else + /* Should be -EIO */ + netdev_err(ndev, "unable to set hash bit"); +} + +static const struct net_device_ops mtk_star_netdev_ops = { + .ndo_open = mtk_star_netdev_open, + .ndo_stop = mtk_star_netdev_stop, + .ndo_start_xmit = mtk_star_netdev_start_xmit, + .ndo_get_stats64 = mtk_star_netdev_get_stats64, + .ndo_set_rx_mode = mtk_star_set_rx_mode, + .ndo_do_ioctl = mtk_star_netdev_ioctl, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, +}; + +static void mtk_star_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + strlcpy(info->driver, MTK_STAR_DRVNAME, sizeof(info->driver)); +} + +/* TODO Add ethtool stats. */ +static const struct ethtool_ops mtk_star_ethtool_ops = { + .get_drvinfo = mtk_star_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_link_ksettings = phy_ethtool_get_link_ksettings, + .set_link_ksettings = phy_ethtool_set_link_ksettings, +}; + +static int mtk_star_receive_packet(struct mtk_star_priv *priv) +{ + struct mtk_star_ring *ring = &priv->rx_ring; + struct device *dev = mtk_star_get_dev(priv); + struct mtk_star_ring_desc_data desc_data; + struct net_device *ndev = priv->ndev; + struct sk_buff *curr_skb, *new_skb; + dma_addr_t new_dma_addr; + int ret; + + spin_lock(&priv->lock); + ret = mtk_star_ring_pop_tail(ring, &desc_data); + spin_unlock(&priv->lock); + if (ret) + return -1; + + curr_skb = desc_data.skb; + + if ((desc_data.flags & MTK_STAR_DESC_BIT_RX_CRCE) || + (desc_data.flags & MTK_STAR_DESC_BIT_RX_OSIZE)) { + /* Error packet -> drop and reuse skb. */ + new_skb = curr_skb; + goto push_new_skb; + } + + /* Prepare new skb before receiving the current one. Reuse the current + * skb if we fail at any point. + */ + new_skb = mtk_star_alloc_skb(ndev); + if (!new_skb) { + ndev->stats.rx_dropped++; + new_skb = curr_skb; + goto push_new_skb; + } + + new_dma_addr = mtk_star_dma_map_rx(priv, new_skb); + if (dma_mapping_error(dev, new_dma_addr)) { + ndev->stats.rx_dropped++; + dev_kfree_skb(new_skb); + new_skb = curr_skb; + netdev_err(ndev, "DMA mapping error of RX descriptor\n"); + goto push_new_skb; + } + + /* We can't fail anymore at this point: it's safe to unmap the skb. */ + mtk_star_dma_unmap_rx(priv, &desc_data); + + skb_put(desc_data.skb, desc_data.len); + desc_data.skb->ip_summed = CHECKSUM_NONE; + desc_data.skb->protocol = eth_type_trans(desc_data.skb, ndev); + desc_data.skb->dev = ndev; + netif_receive_skb(desc_data.skb); + +push_new_skb: + desc_data.dma_addr = new_dma_addr; + desc_data.len = skb_tailroom(new_skb); + desc_data.skb = new_skb; + + spin_lock(&priv->lock); + mtk_star_ring_push_head_rx(ring, &desc_data); + spin_unlock(&priv->lock); + + return 0; +} + +static int mtk_star_process_rx(struct mtk_star_priv *priv, int budget) +{ + int received, ret; + + for (received = 0, ret = 0; received < budget && ret == 0; received++) + ret = mtk_star_receive_packet(priv); + + mtk_star_dma_resume_rx(priv); + + return received; +} + +static int mtk_star_poll(struct napi_struct *napi, int budget) +{ + struct mtk_star_priv *priv; + int received = 0; + + priv = container_of(napi, struct mtk_star_priv, napi); + + /* Clean-up all TX descriptors. */ + mtk_star_tx_complete_all(priv); + /* Receive up to $budget packets. */ + received = mtk_star_process_rx(priv, budget); + + if (received < budget) { + napi_complete_done(napi, received); + mtk_star_intr_enable_rx(priv); + } + + return received; +} + +static void mtk_star_mdio_rwok_clear(struct mtk_star_priv *priv) +{ + regmap_write(priv->regs, MTK_STAR_REG_PHY_CTRL0, + MTK_STAR_BIT_PHY_CTRL0_RWOK); +} + +static int mtk_star_mdio_rwok_wait(struct mtk_star_priv *priv) +{ + unsigned int val; + + return regmap_read_poll_timeout(priv->regs, MTK_STAR_REG_PHY_CTRL0, + val, val & MTK_STAR_BIT_PHY_CTRL0_RWOK, + 10, MTK_STAR_WAIT_TIMEOUT); +} + +static int mtk_star_mdio_read(struct mii_bus *mii, int phy_id, int regnum) +{ + struct mtk_star_priv *priv = mii->priv; + unsigned int val, data; + int ret; + + if (regnum & MII_ADDR_C45) + return -EOPNOTSUPP; + + mtk_star_mdio_rwok_clear(priv); + + val = (regnum << MTK_STAR_OFF_PHY_CTRL0_PREG); + val &= MTK_STAR_MSK_PHY_CTRL0_PREG; + val |= MTK_STAR_BIT_PHY_CTRL0_RDCMD; + + regmap_write(priv->regs, MTK_STAR_REG_PHY_CTRL0, val); + + ret = mtk_star_mdio_rwok_wait(priv); + if (ret) + return ret; + + regmap_read(priv->regs, MTK_STAR_REG_PHY_CTRL0, &data); + + data &= MTK_STAR_MSK_PHY_CTRL0_RWDATA; + data >>= MTK_STAR_OFF_PHY_CTRL0_RWDATA; + + return data; +} + +static int mtk_star_mdio_write(struct mii_bus *mii, int phy_id, + int regnum, u16 data) +{ + struct mtk_star_priv *priv = mii->priv; + unsigned int val; + + if (regnum & MII_ADDR_C45) + return -EOPNOTSUPP; + + mtk_star_mdio_rwok_clear(priv); + + val = data; + val <<= MTK_STAR_OFF_PHY_CTRL0_RWDATA; + val &= MTK_STAR_MSK_PHY_CTRL0_RWDATA; + regnum <<= MTK_STAR_OFF_PHY_CTRL0_PREG; + regnum &= MTK_STAR_MSK_PHY_CTRL0_PREG; + val |= regnum; + val |= MTK_STAR_BIT_PHY_CTRL0_WTCMD; + + regmap_write(priv->regs, MTK_STAR_REG_PHY_CTRL0, val); + + return mtk_star_mdio_rwok_wait(priv); +} + +static int mtk_star_mdio_init(struct net_device *ndev) +{ + struct mtk_star_priv *priv = netdev_priv(ndev); + struct device *dev = mtk_star_get_dev(priv); + struct device_node *of_node, *mdio_node; + int ret; + + of_node = dev->of_node; + + mdio_node = of_get_child_by_name(of_node, "mdio"); + if (!mdio_node) + return -ENODEV; + + if (!of_device_is_available(mdio_node)) { + ret = -ENODEV; + goto out_put_node; + } + + priv->mii = devm_mdiobus_alloc(dev); + if (!priv->mii) { + ret = -ENOMEM; + goto out_put_node; + } + + snprintf(priv->mii->id, MII_BUS_ID_SIZE, "%s", dev_name(dev)); + priv->mii->name = "mtk-mac-mdio"; + priv->mii->parent = dev; + priv->mii->read = mtk_star_mdio_read; + priv->mii->write = mtk_star_mdio_write; + priv->mii->priv = priv; + + ret = of_mdiobus_register(priv->mii, mdio_node); + +out_put_node: + of_node_put(mdio_node); + return ret; +} + +static int mtk_star_suspend(struct device *dev) +{ + struct mtk_star_priv *priv; + struct net_device *ndev; + + ndev = dev_get_drvdata(dev); + priv = netdev_priv(ndev); + + if (netif_running(ndev)) + mtk_star_disable(ndev); + + clk_bulk_disable_unprepare(MTK_STAR_NCLKS, priv->clks); + + return 0; +} + +static int mtk_star_resume(struct device *dev) +{ + struct mtk_star_priv *priv; + struct net_device *ndev; + int ret; + + ndev = dev_get_drvdata(dev); + priv = netdev_priv(ndev); + + ret = clk_bulk_prepare_enable(MTK_STAR_NCLKS, priv->clks); + if (ret) + return ret; + + if (netif_running(ndev)) { + ret = mtk_star_enable(ndev); + if (ret) + clk_bulk_disable_unprepare(MTK_STAR_NCLKS, priv->clks); + } + + return ret; +} + +static void mtk_star_clk_disable_unprepare(void *data) +{ + struct mtk_star_priv *priv = data; + + clk_bulk_disable_unprepare(MTK_STAR_NCLKS, priv->clks); +} + +static void mtk_star_mdiobus_unregister(void *data) +{ + struct mtk_star_priv *priv = data; + + mdiobus_unregister(priv->mii); +} + +static void mtk_star_unregister_netdev(void *data) +{ + struct net_device *ndev = data; + + unregister_netdev(ndev); +} + +static int mtk_star_probe(struct platform_device *pdev) +{ + struct device_node *of_node; + struct mtk_star_priv *priv; + struct net_device *ndev; + struct device *dev; + void __iomem *base; + int ret, i; + + dev = &pdev->dev; + of_node = dev->of_node; + + ndev = devm_alloc_etherdev(dev, sizeof(*priv)); + if (!ndev) + return -ENOMEM; + + priv = netdev_priv(ndev); + priv->ndev = ndev; + SET_NETDEV_DEV(ndev, dev); + platform_set_drvdata(pdev, ndev); + + ndev->min_mtu = ETH_ZLEN; + ndev->max_mtu = MTK_STAR_MAX_FRAME_SIZE; + + spin_lock_init(&priv->lock); + INIT_WORK(&priv->stats_work, mtk_star_update_stats_work); + + base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(base)) + return PTR_ERR(base); + + /* We won't be checking the return values of regmap read & write + * functions. They can only fail for mmio if there's a clock attached + * to regmap which is not the case here. + */ + priv->regs = devm_regmap_init_mmio(dev, base, + &mtk_star_regmap_config); + if (IS_ERR(priv->regs)) + return PTR_ERR(priv->regs); + + priv->pericfg = syscon_regmap_lookup_by_phandle(of_node, + "mediatek,pericfg"); + if (IS_ERR(priv->pericfg)) { + dev_err(dev, "Failed to lookup the PERICFG syscon\n"); + return PTR_ERR(priv->pericfg); + } + + ndev->irq = platform_get_irq(pdev, 0); + if (ndev->irq < 0) + return ndev->irq; + + for (i = 0; i < MTK_STAR_NCLKS; i++) + priv->clks[i].id = mtk_star_clk_names[i]; + ret = devm_clk_bulk_get(dev, MTK_STAR_NCLKS, priv->clks); + if (ret) + return ret; + + ret = clk_bulk_prepare_enable(MTK_STAR_NCLKS, priv->clks); + if (ret) + return ret; + + ret = devm_add_action_or_reset(dev, + mtk_star_clk_disable_unprepare, priv); + if (ret) + return ret; + + ret = of_get_phy_mode(of_node, &priv->phy_intf); + if (ret) { + return ret; + } else if (priv->phy_intf != PHY_INTERFACE_MODE_RMII) { + dev_err(dev, "unsupported phy mode: %s\n", + phy_modes(priv->phy_intf)); + return -EINVAL; + } + + priv->phy_node = of_parse_phandle(of_node, "phy-handle", 0); + if (!priv->phy_node) { + dev_err(dev, "failed to retrieve the phy handle from device tree\n"); + return -ENODEV; + } + + mtk_star_set_mode_rmii(priv); + + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); + if (ret) { + dev_err(dev, "unsupported DMA mask\n"); + return ret; + } + + priv->ring_base = dmam_alloc_coherent(dev, MTK_STAR_DMA_SIZE, + &priv->dma_addr, + GFP_KERNEL | GFP_DMA); + if (!priv->ring_base) + return -ENOMEM; + + mtk_star_nic_disable_pd(priv); + mtk_star_init_config(priv); + + ret = mtk_star_mdio_init(ndev); + if (ret) + return ret; + + ret = devm_add_action_or_reset(dev, mtk_star_mdiobus_unregister, priv); + if (ret) + return ret; + + ret = eth_platform_get_mac_address(dev, ndev->dev_addr); + if (ret || !is_valid_ether_addr(ndev->dev_addr)) + eth_hw_addr_random(ndev); + + ndev->netdev_ops = &mtk_star_netdev_ops; + ndev->ethtool_ops = &mtk_star_ethtool_ops; + + netif_napi_add(ndev, &priv->napi, mtk_star_poll, MTK_STAR_NAPI_WEIGHT); + + ret = register_netdev(ndev); + if (ret) + return ret; + + ret = devm_add_action_or_reset(dev, mtk_star_unregister_netdev, ndev); + if (ret) + return ret; + + return 0; +} + +static const struct of_device_id mtk_star_of_match[] = { + { .compatible = "mediatek,mt8516-eth", }, + { .compatible = "mediatek,mt8518-eth", }, + { .compatible = "mediatek,mt8175-eth", }, + { } +}; +MODULE_DEVICE_TABLE(of, mtk_star_of_match); + +static SIMPLE_DEV_PM_OPS(mtk_star_pm_ops, + mtk_star_suspend, mtk_star_resume); + +static struct platform_driver mtk_star_driver = { + .driver = { + .name = MTK_STAR_DRVNAME, + .pm = &mtk_star_pm_ops, + .of_match_table = of_match_ptr(mtk_star_of_match), + }, + .probe = mtk_star_probe, +}; +module_platform_driver(mtk_star_driver); + +MODULE_AUTHOR("Bartosz Golaszewski <[email protected]>"); +MODULE_DESCRIPTION("Mediatek STAR Ethernet MAC Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 0a02b804b2fe..4906aee6798d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -36,7 +36,6 @@ #include <linux/etherdevice.h> #include <linux/timecounter.h> #include <linux/net_tstamp.h> -#include <linux/ptp_clock_kernel.h> #include <linux/crash_dump.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/qp.h> @@ -53,6 +52,7 @@ #include "wq.h" #include "mlx5_core.h" #include "en_stats.h" +#include "en/dcbnl.h" #include "en/fs.h" #include "lib/hv_vhca.h" @@ -69,8 +69,6 @@ struct page_pool; #define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu)) #define MLX5E_SW2HW_MTU(params, swmtu) ((swmtu) + ((params)->hard_mtu)) -#define MLX5E_MAX_PRIORITY 8 -#define MLX5E_MAX_DSCP 64 #define MLX5E_MAX_NUM_TC 8 #define MLX5_RX_HEADROOM NET_SKB_PAD @@ -243,10 +241,6 @@ enum mlx5e_priv_flag { #define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (BIT(pflag)))) -#ifdef CONFIG_MLX5_CORE_EN_DCB -#define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */ -#endif - struct mlx5e_params { u8 log_sq_size; u8 rq_wq_type; @@ -271,42 +265,6 @@ struct mlx5e_params { int hard_mtu; }; -#ifdef CONFIG_MLX5_CORE_EN_DCB -struct mlx5e_cee_config { - /* bw pct for priority group */ - u8 pg_bw_pct[CEE_DCBX_MAX_PGS]; - u8 prio_to_pg_map[CEE_DCBX_MAX_PRIO]; - bool pfc_setting[CEE_DCBX_MAX_PRIO]; - bool pfc_enable; -}; - -enum { - MLX5_DCB_CHG_RESET, - MLX5_DCB_NO_CHG, - MLX5_DCB_CHG_NO_RESET, -}; - -struct mlx5e_dcbx { - enum mlx5_dcbx_oper_mode mode; - struct mlx5e_cee_config cee_cfg; /* pending configuration */ - u8 dscp_app_cnt; - - /* The only setting that cannot be read from FW */ - u8 tc_tsa[IEEE_8021QAZ_MAX_TCS]; - u8 cap; - - /* Buffer configuration */ - bool manual_buffer; - u32 cable_len; - u32 xoff; -}; - -struct mlx5e_dcbx_dp { - u8 dscp2prio[MLX5E_MAX_DSCP]; - u8 trust_state; -}; -#endif - enum { MLX5E_RQ_STATE_ENABLED, MLX5E_RQ_STATE_RECOVERING, @@ -1064,13 +1022,6 @@ static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev) } extern const struct ethtool_ops mlx5e_ethtool_ops; -#ifdef CONFIG_MLX5_CORE_EN_DCB -extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops; -int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets); -void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv); -void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv); -void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv); -#endif int mlx5e_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, u32 *in); @@ -1078,7 +1029,8 @@ void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir); int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev); void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); -int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb); +int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb, + bool enable_mc_lb); /* common netdev helpers */ void mlx5e_create_q_counters(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h new file mode 100644 index 000000000000..7be6b2d36b60 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies. */ + +#ifndef __MLX5E_DCBNL_H__ +#define __MLX5E_DCBNL_H__ + +#ifdef CONFIG_MLX5_CORE_EN_DCB + +#define MLX5E_MAX_PRIORITY (8) + +struct mlx5e_cee_config { + /* bw pct for priority group */ + u8 pg_bw_pct[CEE_DCBX_MAX_PGS]; + u8 prio_to_pg_map[CEE_DCBX_MAX_PRIO]; + bool pfc_setting[CEE_DCBX_MAX_PRIO]; + bool pfc_enable; +}; + +struct mlx5e_dcbx { + enum mlx5_dcbx_oper_mode mode; + struct mlx5e_cee_config cee_cfg; /* pending configuration */ + u8 dscp_app_cnt; + + /* The only setting that cannot be read from FW */ + u8 tc_tsa[IEEE_8021QAZ_MAX_TCS]; + u8 cap; + + /* Buffer configuration */ + bool manual_buffer; + u32 cable_len; + u32 xoff; +}; + +#define MLX5E_MAX_DSCP (64) + +struct mlx5e_dcbx_dp { + u8 dscp2prio[MLX5E_MAX_DSCP]; + u8 trust_state; +}; + +void mlx5e_dcbnl_build_netdev(struct net_device *netdev); +void mlx5e_dcbnl_build_rep_netdev(struct net_device *netdev); +void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv); +void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv); +void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv); +#else +static inline void mlx5e_dcbnl_build_netdev(struct net_device *netdev) {} +static inline void mlx5e_dcbnl_build_rep_netdev(struct net_device *netdev) {} +static inline void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv) {} +static inline void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv) {} +static inline void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv) {} +#endif + +#endif /* __MLX5E_DCBNL_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 5568ded97e0b..98263f00ee43 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -24,6 +24,7 @@ #define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0) #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1) #define MLX5_CT_STATE_TRK_BIT BIT(2) +#define MLX5_CT_STATE_NAT_BIT BIT(3) #define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8) #define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0) @@ -61,6 +62,15 @@ struct mlx5_ct_zone_rule { bool nat; }; +struct mlx5_tc_ct_pre { + struct mlx5_flow_table *fdb; + struct mlx5_flow_group *flow_grp; + struct mlx5_flow_group *miss_grp; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_handle *miss_rule; + struct mlx5_modify_hdr *modify_hdr; +}; + struct mlx5_ct_ft { struct rhash_head node; u16 zone; @@ -68,6 +78,8 @@ struct mlx5_ct_ft { struct nf_flowtable *nf_ft; struct mlx5_tc_ct_priv *ct_priv; struct rhashtable ct_entries_ht; + struct mlx5_tc_ct_pre pre_ct; + struct mlx5_tc_ct_pre pre_ct_nat; }; struct mlx5_ct_entry { @@ -426,6 +438,7 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_eswitch *esw = ct_priv->esw; struct mlx5_modify_hdr *mod_hdr; struct flow_action_entry *meta; + u16 ct_state = 0; int err; meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule); @@ -444,11 +457,13 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, &mod_acts); if (err) goto err_mapping; + + ct_state |= MLX5_CT_STATE_NAT_BIT; } + ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT; err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts, - (MLX5_CT_STATE_ESTABLISHED_BIT | - MLX5_CT_STATE_TRK_BIT), + ct_state, meta->ct_metadata.mark, meta->ct_metadata.labels[0], tupleid); @@ -791,6 +806,238 @@ mlx5_tc_ct_parse_action(struct mlx5e_priv *priv, return 0; } +static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft, + struct mlx5_tc_ct_pre *pre_ct, + bool nat) +{ + struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; + struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {}; + struct mlx5_core_dev *dev = ct_priv->esw->dev; + struct mlx5_flow_table *fdb = pre_ct->fdb; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_modify_hdr *mod_hdr; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + u32 ctstate; + u16 zone; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + zone = ct_ft->zone & MLX5_CT_ZONE_MASK; + err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ZONE_TO_REG, zone); + if (err) { + ct_dbg("Failed to set zone register mapping"); + goto err_mapping; + } + + mod_hdr = mlx5_modify_header_alloc(dev, + MLX5_FLOW_NAMESPACE_FDB, + pre_mod_acts.num_actions, + pre_mod_acts.actions); + + if (IS_ERR(mod_hdr)) { + err = PTR_ERR(mod_hdr); + ct_dbg("Failed to create pre ct mod hdr"); + goto err_mapping; + } + pre_ct->modify_hdr = mod_hdr; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + flow_act.modify_hdr = mod_hdr; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + + /* add flow rule */ + mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, + zone, MLX5_CT_ZONE_MASK); + ctstate = MLX5_CT_STATE_TRK_BIT; + if (nat) + ctstate |= MLX5_CT_STATE_NAT_BIT; + mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate); + + dest.ft = ct_priv->post_ct; + rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + ct_dbg("Failed to add pre ct flow rule zone %d", zone); + goto err_flow_rule; + } + pre_ct->flow_rule = rule; + + /* add miss rule */ + memset(spec, 0, sizeof(*spec)); + dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct; + rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + ct_dbg("Failed to add pre ct miss rule zone %d", zone); + goto err_miss_rule; + } + pre_ct->miss_rule = rule; + + dealloc_mod_hdr_actions(&pre_mod_acts); + kvfree(spec); + return 0; + +err_miss_rule: + mlx5_del_flow_rules(pre_ct->flow_rule); +err_flow_rule: + mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr); +err_mapping: + dealloc_mod_hdr_actions(&pre_mod_acts); + kvfree(spec); + return err; +} + +static void +tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft, + struct mlx5_tc_ct_pre *pre_ct) +{ + struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; + struct mlx5_core_dev *dev = ct_priv->esw->dev; + + mlx5_del_flow_rules(pre_ct->flow_rule); + mlx5_del_flow_rules(pre_ct->miss_rule); + mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr); +} + +static int +mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft, + struct mlx5_tc_ct_pre *pre_ct, + bool nat) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv; + struct mlx5_core_dev *dev = ct_priv->esw->dev; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *g; + u32 metadata_reg_c_2_mask; + u32 *flow_group_in; + void *misc; + int err; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!ns) { + err = -EOPNOTSUPP; + ct_dbg("Failed to get FDB flow namespace"); + return err; + } + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; + ft_attr.prio = FDB_TC_OFFLOAD; + ft_attr.max_fte = 2; + ft_attr.level = 1; + ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + ct_dbg("Failed to create pre ct table"); + goto out_free; + } + pre_ct->fdb = ft; + + /* create flow group */ + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + + misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria.misc_parameters_2); + + metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK; + metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16); + if (nat) + metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16); + + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2, + metadata_reg_c_2_mask); + + g = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + ct_dbg("Failed to create pre ct group"); + goto err_flow_grp; + } + pre_ct->flow_grp = g; + + /* create miss group */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); + g = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + ct_dbg("Failed to create pre ct miss group"); + goto err_miss_grp; + } + pre_ct->miss_grp = g; + + err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat); + if (err) + goto err_add_rules; + + kvfree(flow_group_in); + return 0; + +err_add_rules: + mlx5_destroy_flow_group(pre_ct->miss_grp); +err_miss_grp: + mlx5_destroy_flow_group(pre_ct->flow_grp); +err_flow_grp: + mlx5_destroy_flow_table(ft); +out_free: + kvfree(flow_group_in); + return err; +} + +static void +mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft, + struct mlx5_tc_ct_pre *pre_ct) +{ + tc_ct_pre_ct_del_rules(ct_ft, pre_ct); + mlx5_destroy_flow_group(pre_ct->miss_grp); + mlx5_destroy_flow_group(pre_ct->flow_grp); + mlx5_destroy_flow_table(pre_ct->fdb); +} + +static int +mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft) +{ + int err; + + err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false); + if (err) + return err; + + err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true); + if (err) + goto err_pre_ct_nat; + + return 0; + +err_pre_ct_nat: + mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct); + return err; +} + +static void +mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft) +{ + mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat); + mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct); +} + static struct mlx5_ct_ft * mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, struct nf_flowtable *nf_ft) @@ -813,6 +1060,10 @@ mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, ft->ct_priv = ct_priv; refcount_set(&ft->refcount, 1); + err = mlx5_tc_ct_alloc_pre_ct_tables(ft); + if (err) + goto err_alloc_pre_ct; + err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params); if (err) goto err_init; @@ -834,6 +1085,8 @@ err_add_cb: err_insert: rhashtable_destroy(&ft->ct_entries_ht); err_init: + mlx5_tc_ct_free_pre_ct_tables(ft); +err_alloc_pre_ct: kfree(ft); return ERR_PTR(err); } @@ -859,21 +1112,40 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) rhashtable_free_and_destroy(&ft->ct_entries_ht, mlx5_tc_ct_flush_ft_entry, ct_priv); + mlx5_tc_ct_free_pre_ct_tables(ft); kfree(ft); } /* We translate the tc filter with CT action to the following HW model: * - * +-------------------+ +--------------------+ +--------------+ - * + pre_ct (tc chain) +----->+ CT (nat or no nat) +--->+ post_ct +-----> - * + original match + | + tuple + zone match + | + fte_id match + | - * +-------------------+ | +--------------------+ | +--------------+ | - * v v v - * set chain miss mapping set mark original - * set fte_id set label filter - * set zone set established actions - * set tunnel_id do nat (if needed) - * do decap + * +---------------------+ + * + fdb prio (tc chain) + + * + original match + + * +---------------------+ + * | set chain miss mapping + * | set fte_id + * | set tunnel_id + * | do decap + * v + * +---------------------+ + * + pre_ct/pre_ct_nat + if matches +---------------------+ + * + zone+nat match +---------------->+ post_ct (see below) + + * +---------------------+ set zone +---------------------+ + * | set zone + * v + * +--------------------+ + * + CT (nat or no nat) + + * + tuple + zone match + + * +--------------------+ + * | set mark + * | set label + * | set established + * | do nat (if needed) + * v + * +--------------+ + * + post_ct + original filter actions + * + fte_id match +------------------------> + * +--------------+ */ static int __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, @@ -888,7 +1160,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, struct mlx5_flow_spec *post_ct_spec = NULL; struct mlx5_eswitch *esw = ct_priv->esw; struct mlx5_esw_flow_attr *pre_ct_attr; - struct mlx5_modify_hdr *mod_hdr; + struct mlx5_modify_hdr *mod_hdr; struct mlx5_flow_handle *rule; struct mlx5_ct_flow *ct_flow; int chain_mapping = 0, err; @@ -951,14 +1223,6 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, goto err_mapping; } - err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, ZONE_TO_REG, - attr->ct_attr.zone & - MLX5_CT_ZONE_MASK); - if (err) { - ct_dbg("Failed to set zone register mapping"); - goto err_mapping; - } - err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, FTEID_TO_REG, fte_id); if (err) { @@ -1018,7 +1282,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv, /* Change original rule point to ct table */ pre_ct_attr->dest_chain = 0; - pre_ct_attr->dest_ft = nat ? ct_priv->ct_nat : ct_priv->ct; + pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.fdb : ft->pre_ct.fdb; ct_flow->pre_ct_rule = mlx5_eswitch_add_offloaded_rule(esw, orig_spec, pre_ct_attr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index dce2bbbf9109..bfd3e1161bc6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -6,25 +6,6 @@ #include "en.h" -#define MLX5E_SQ_NOPS_ROOM (MLX5_SEND_WQE_MAX_WQEBBS - 1) -#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\ - MLX5E_SQ_NOPS_ROOM) - -#ifndef CONFIG_MLX5_EN_TLS -#define MLX5E_SQ_TLS_ROOM (0) -#else -/* TLS offload requires additional stop_room for: - * - a resync SKB. - * kTLS offload requires fixed additional stop_room for: - * - a static params WQE, and a progress params WQE. - * The additional MTU-depending room for the resync DUMP WQEs - * will be calculated and added in runtime. - */ -#define MLX5E_SQ_TLS_ROOM \ - (MLX5_SEND_WQE_MAX_WQEBBS + \ - MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS) -#endif - #define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start)) enum mlx5e_icosq_wqe_type { @@ -331,4 +312,25 @@ mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, } } +static inline u16 mlx5e_stop_room_for_wqe(u16 wqe_size) +{ + BUILD_BUG_ON(PAGE_SIZE / MLX5_SEND_WQE_BB < MLX5_SEND_WQE_MAX_WQEBBS); + + /* A WQE must not cross the page boundary, hence two conditions: + * 1. Its size must not exceed the page size. + * 2. If the WQE size is X, and the space remaining in a page is less + * than X, this space needs to be padded with NOPs. So, one WQE of + * size X may require up to X-1 WQEBBs of padding, which makes the + * stop room of X-1 + X. + * WQE size is also limited by the hardware limit. + */ + + if (__builtin_constant_p(wqe_size)) + BUILD_BUG_ON(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS); + else + WARN_ON_ONCE(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS); + + return wqe_size * 2 - 1; +} + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index a2a194525b15..3bea1d4be53b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -240,8 +240,10 @@ enum { static int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq) { if (unlikely(!sq->mpwqe.wqe)) { + const u16 stop_room = mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); + if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, - MLX5E_XDPSQ_STOP_ROOM))) { + stop_room))) { /* SQ is full, ring doorbell */ mlx5e_xmit_xdp_doorbell(sq); sq->stats->full++; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index 2e4e117aeb49..ca48c293151b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -40,8 +40,6 @@ (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) #define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */) -#define MLX5E_XDPSQ_STOP_ROOM (MLX5E_SQ_STOP_ROOM) - #define MLX5E_XDP_INLINE_WQE_SZ_THRSD (256 - sizeof(struct mlx5_wqe_inline_seg)) #define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT \ DIV_ROUND_UP(MLX5E_XDP_INLINE_WQE_SZ_THRSD, MLX5_SEND_WQE_DS) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c index 46725cd743a3..417a2d9dd248 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c @@ -4,6 +4,19 @@ #include "en.h" #include "en_accel/ktls.h" +u16 mlx5e_ktls_get_stop_room(struct mlx5e_txqsq *sq) +{ + u16 num_dumps, stop_room = 0; + + num_dumps = mlx5e_ktls_dumps_num_wqes(sq, MAX_SKB_FRAGS, TLS_MAX_PAYLOAD_SIZE); + + stop_room += mlx5e_stop_room_for_wqe(MLX5E_KTLS_STATIC_WQEBBS); + stop_room += mlx5e_stop_room_for_wqe(MLX5E_KTLS_PROGRESS_WQEBBS); + stop_room += num_dumps * mlx5e_stop_room_for_wqe(MLX5E_KTLS_DUMP_WQEBBS); + + return stop_room; +} + static int mlx5e_ktls_create_tis(struct mlx5_core_dev *mdev, u32 *tisn) { u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h index dabbc5f226ce..c6180892cfcb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h @@ -102,15 +102,16 @@ bool mlx5e_ktls_handle_tx_skb(struct tls_context *tls_ctx, struct mlx5e_txqsq *s void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, u32 *dma_fifo_cc); +u16 mlx5e_ktls_get_stop_room(struct mlx5e_txqsq *sq); + static inline u8 -mlx5e_ktls_dumps_num_wqebbs(struct mlx5e_txqsq *sq, unsigned int nfrags, - unsigned int sync_len) +mlx5e_ktls_dumps_num_wqes(struct mlx5e_txqsq *sq, unsigned int nfrags, + unsigned int sync_len) { /* Given the MTU and sync_len, calculates an upper bound for the - * number of WQEBBs needed for the TX resync DUMP WQEs of a record. + * number of DUMP WQEs needed for the TX resync of a record. */ - return MLX5E_KTLS_DUMP_WQEBBS * - (nfrags + DIV_ROUND_UP(sync_len, sq->hw_mtu)); + return nfrags + DIV_ROUND_UP(sync_len, sq->hw_mtu); } #else @@ -122,7 +123,6 @@ static inline void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, u32 *dma_fifo_cc) {} - #endif #endif /* __MLX5E_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c index fba561ffe1d4..c27e9a609d51 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c @@ -240,3 +240,17 @@ void mlx5e_tls_cleanup(struct mlx5e_priv *priv) kfree(tls); priv->tls = NULL; } + +u16 mlx5e_tls_get_stop_room(struct mlx5e_txqsq *sq) +{ + struct mlx5_core_dev *mdev = sq->channel->mdev; + + if (!mlx5_accel_is_tls_device(mdev)) + return 0; + + if (MLX5_CAP_GEN(mdev, tls_tx)) + return mlx5e_ktls_get_stop_room(sq); + + /* Resync SKB. */ + return mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h index 9015f3f7792d..9219bdb2786e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h @@ -94,6 +94,8 @@ int mlx5e_tls_get_count(struct mlx5e_priv *priv); int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data); int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data); +u16 mlx5e_tls_get_stop_room(struct mlx5e_txqsq *sq); + #else static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) @@ -108,6 +110,11 @@ static inline int mlx5e_tls_get_count(struct mlx5e_priv *priv) { return 0; } static inline int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) { return 0; } static inline int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data) { return 0; } +static inline u16 mlx5e_tls_get_stop_room(struct mlx5e_txqsq *sq) +{ + return 0; +} + #endif #endif /* __MLX5E_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index af3228b3f303..1e42c7ae621b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -141,10 +141,12 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev) memset(res, 0, sizeof(*res)); } -int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb) +int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb, + bool enable_mc_lb) { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_tir *tir; + u8 lb_flags = 0; int err = 0; u32 tirn = 0; int inlen; @@ -158,8 +160,13 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb) } if (enable_uc_lb) - MLX5_SET(modify_tir_in, in, ctx.self_lb_block, - MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST); + lb_flags = MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; + + if (enable_mc_lb) + lb_flags |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST; + + if (lb_flags) + MLX5_SET(modify_tir_in, in, ctx.self_lb_block, lb_flags); MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 47874d34156b..ec7b332d74c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -35,6 +35,8 @@ #include "en/port.h" #include "en/port_buffer.h" +#define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */ + #define MLX5E_100MB (100000) #define MLX5E_1GB (1000000) @@ -49,6 +51,12 @@ enum { MLX5E_LOWEST_PRIO_GROUP = 0, }; +enum { + MLX5_DCB_CHG_RESET, + MLX5_DCB_NO_CHG, + MLX5_DCB_CHG_NO_RESET, +}; + #define MLX5_DSCP_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, qcam_reg) && \ MLX5_CAP_QCAM_REG(mdev, qpts) && \ MLX5_CAP_QCAM_REG(mdev, qpdpm)) @@ -238,7 +246,7 @@ static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw, * Report both group #0 and #1 as ETS type. * All the tcs in group #0 will be reported with 0% BW. */ -int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets) +static int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets) { struct mlx5_core_dev *mdev = priv->mdev; u8 tc_tx_bw[IEEE_8021QAZ_MAX_TCS]; @@ -1009,6 +1017,24 @@ const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = { .setpfcstate = mlx5e_dcbnl_setpfcstate, }; +void mlx5e_dcbnl_build_netdev(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + + if (MLX5_CAP_GEN(mdev, vport_group_manager) && MLX5_CAP_GEN(mdev, qos)) + netdev->dcbnl_ops = &mlx5e_dcbnl_ops; +} + +void mlx5e_dcbnl_build_rep_netdev(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + + if (MLX5_CAP_GEN(mdev, qos)) + netdev->dcbnl_ops = &mlx5e_dcbnl_ops; +} + static void mlx5e_dcbnl_query_dcbx_mode(struct mlx5e_priv *priv, enum mlx5_dcbx_oper_mode *mode) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 4041132723a3..8b86c6ded302 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -66,7 +66,6 @@ #include "en/devlink.h" #include "lib/mlx5.h" - bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) { bool striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) && @@ -1103,6 +1102,22 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa) return 0; } +static int mlx5e_calc_sq_stop_room(struct mlx5e_txqsq *sq, u8 log_sq_size) +{ + int sq_size = 1 << log_sq_size; + + sq->stop_room = mlx5e_tls_get_stop_room(sq); + sq->stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); + + if (WARN_ON(sq->stop_room >= sq_size)) { + netdev_err(sq->channel->netdev, "Stop room %hu is bigger than the SQ size %d\n", + sq->stop_room, sq_size); + return -ENOSPC; + } + + return 0; +} + static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work); static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, int txq_ix, @@ -1127,20 +1142,16 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); sq->stats = &c->priv->channel_stats[c->ix].sq[tc]; - sq->stop_room = MLX5E_SQ_STOP_ROOM; INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); if (MLX5_IPSEC_DEV(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); -#ifdef CONFIG_MLX5_EN_TLS - if (mlx5_accel_is_tls_device(c->priv->mdev)) { + if (mlx5_accel_is_tls_device(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_TLS, &sq->state); - sq->stop_room += MLX5E_SQ_TLS_ROOM + - mlx5e_ktls_dumps_num_wqebbs(sq, MAX_SKB_FRAGS, - TLS_MAX_PAYLOAD_SIZE); - } -#endif + err = mlx5e_calc_sq_stop_room(sq, params->log_sq_size); + if (err) + return err; param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@ -4896,10 +4907,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->netdev_ops = &mlx5e_netdev_ops; -#ifdef CONFIG_MLX5_CORE_EN_DCB - if (MLX5_CAP_GEN(mdev, vport_group_manager) && MLX5_CAP_GEN(mdev, qos)) - netdev->dcbnl_ops = &mlx5e_dcbnl_ops; -#endif + mlx5e_dcbnl_build_netdev(netdev); netdev->watchdog_timeo = 15 * HZ; @@ -5187,9 +5195,7 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv) return err; } -#ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_initialize(priv); -#endif return 0; } @@ -5216,9 +5222,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5e_hv_vhca_stats_create(priv); if (netdev->reg_state != NETREG_REGISTERED) return; -#ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_init_app(priv); -#endif queue_work(priv->wq, &priv->set_rx_mode_work); @@ -5233,10 +5237,8 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; -#ifdef CONFIG_MLX5_CORE_EN_DCB if (priv->netdev->reg_state == NETREG_REGISTERED) mlx5e_dcbnl_delete_app(priv); -#endif rtnl_lock(); if (netif_running(priv->netdev)) @@ -5256,7 +5258,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) int mlx5e_update_nic_rx(struct mlx5e_priv *priv) { - return mlx5e_refresh_tirs(priv, false); + return mlx5e_refresh_tirs(priv, false, false); } static const struct mlx5e_profile mlx5e_nic_profile = { @@ -5533,9 +5535,7 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) mlx5e_devlink_port_type_eth_set(priv); -#ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_init_app(priv); -#endif return priv; err_devlink_port_unregister: @@ -5558,9 +5558,7 @@ static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) } #endif priv = vpriv; -#ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_delete_app(priv); -#endif unregister_netdev(priv->netdev); mlx5e_devlink_port_unregister(priv); mlx5e_detach(mdev, vpriv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 1eac7a53d56f..52351c105627 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1544,10 +1544,7 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) /* we want a persistent mac for the uplink rep */ mlx5_query_mac_address(mdev, netdev->dev_addr); netdev->ethtool_ops = &mlx5e_uplink_rep_ethtool_ops; -#ifdef CONFIG_MLX5_CORE_EN_DCB - if (MLX5_CAP_GEN(mdev, qos)) - netdev->dcbnl_ops = &mlx5e_dcbnl_ops; -#endif + mlx5e_dcbnl_build_rep_netdev(netdev); } else { netdev->netdev_ops = &mlx5e_netdev_ops_rep; eth_hw_addr_random(netdev); @@ -1929,10 +1926,8 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) mlx5_lag_add(mdev, netdev); priv->events_nb.notifier_call = uplink_rep_async_event; mlx5_notifier_register(mdev, &priv->events_nb); -#ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_initialize(priv); mlx5e_dcbnl_init_app(priv); -#endif } static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) @@ -1940,9 +1935,7 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_rep_priv *rpriv = priv->ppriv; -#ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_delete_app(priv); -#endif mlx5_notifier_unregister(mdev, &priv->events_nb); cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work); mlx5_lag_remove(mdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index d7b24e8905f1..fdba52136c5d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1513,6 +1513,7 @@ out: #ifdef CONFIG_MLX5_CORE_IPOIB +#define MLX5_IB_GRH_SGID_OFFSET 8 #define MLX5_IB_GRH_DGID_OFFSET 24 #define MLX5_GID_SIZE 16 @@ -1526,6 +1527,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, struct net_device *netdev; struct mlx5e_priv *priv; char *pseudo_header; + u32 flags_rqpn; u32 qpn; u8 *dgid; u8 g; @@ -1547,7 +1549,8 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, tstamp = &priv->tstamp; stats = &priv->channel_stats[rq->ix].rq; - g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3; + flags_rqpn = be32_to_cpu(cqe->flags_rqpn); + g = (flags_rqpn >> 28) & 3; dgid = skb->data + MLX5_IB_GRH_DGID_OFFSET; if ((!g) || dgid[0] != 0xff) skb->pkt_type = PACKET_HOST; @@ -1556,9 +1559,15 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, else skb->pkt_type = PACKET_MULTICAST; - /* TODO: IB/ipoib: Allow mcast packets from other VFs - * 68996a6e760e5c74654723eeb57bf65628ae87f4 + /* Drop packets that this interface sent, ie multicast packets + * that the HCA has replicated. */ + if (g && (qpn == (flags_rqpn & 0xffffff)) && + (memcmp(netdev->dev_addr + 4, skb->data + MLX5_IB_GRH_SGID_OFFSET, + MLX5_GID_SIZE) == 0)) { + skb->dev = NULL; + return; + } skb_pull(skb, MLX5_IB_GRH_BYTES); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index bbff8d8ded76..46790216ce86 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -234,7 +234,7 @@ static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv, return err; } - err = mlx5e_refresh_tirs(priv, true); + err = mlx5e_refresh_tirs(priv, true, false); if (err) goto out; @@ -263,7 +263,7 @@ static void mlx5e_test_loopback_cleanup(struct mlx5e_priv *priv, mlx5_nic_vport_update_local_lb(priv->mdev, false); dev_remove_pack(&lbtp->pt); - mlx5e_refresh_tirs(priv, false); + mlx5e_refresh_tirs(priv, false, false); } #define MLX5E_LB_VERIFY_TIMEOUT (msecs_to_jiffies(200)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 2da45e9b9b6d..52af6023a4b4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1755,11 +1755,13 @@ skip_search: list_for_each_entry(iter, match_head, list) { g = iter->g; - if (!g->node.active) - continue; - nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); + if (!g->node.active) { + up_write_ref_node(&g->node, false); + continue; + } + err = insert_fte(g, fte); if (err) { up_write_ref_node(&g->node, false); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 035bd21e5d4e..7db70b6ccc07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -262,6 +262,11 @@ void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, u32 qpn) mlx5_cmd_exec_in(mdev, destroy_qp, in); } +int mlx5i_update_nic_rx(struct mlx5e_priv *priv) +{ + return mlx5e_refresh_tirs(priv, true, true); +} + int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn) { u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; @@ -456,7 +461,7 @@ static const struct mlx5e_profile mlx5i_nic_profile = { .cleanup_rx = mlx5i_cleanup_rx, .enable = NULL, /* mlx5i_enable */ .disable = NULL, /* mlx5i_disable */ - .update_rx = mlx5e_update_nic_rx, + .update_rx = mlx5i_update_nic_rx, .update_stats = NULL, /* mlx5i_update_stats */ .update_carrier = NULL, /* no HW update in IB link */ .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h index c4aa47018c0e..79071a15c4ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -92,6 +92,8 @@ int mlx5i_init(struct mlx5_core_dev *mdev, void *ppriv); void mlx5i_cleanup(struct mlx5e_priv *priv); +int mlx5i_update_nic_rx(struct mlx5e_priv *priv); + /* Get child interface nic profile */ const struct mlx5e_profile *mlx5i_pkey_get_profile(void); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index b9af37ad40bf..f70367018862 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -347,7 +347,7 @@ static const struct mlx5e_profile mlx5i_pkey_nic_profile = { .cleanup_rx = mlx5i_pkey_cleanup_rx, .enable = NULL, .disable = NULL, - .update_rx = mlx5e_update_nic_rx, + .update_rx = mlx5i_update_nic_rx, .update_stats = NULL, .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe, .rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 43f97601b500..ef0706d15a5b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -32,6 +32,7 @@ #include <linux/clocksource.h> #include <linux/highmem.h> +#include <linux/ptp_clock_kernel.h> #include <rdma/mlx5-abi.h> #include "lib/eq.h" #include "en.h" @@ -66,6 +67,26 @@ enum { MLX5_MTPPS_FS_ENH_OUT_PER_ADJ = BIT(0x7), }; +static u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev, + struct ptp_system_timestamp *sts) +{ + u32 timer_h, timer_h1, timer_l; + + timer_h = ioread32be(&dev->iseg->internal_timer_h); + ptp_read_system_prets(sts); + timer_l = ioread32be(&dev->iseg->internal_timer_l); + ptp_read_system_postts(sts); + timer_h1 = ioread32be(&dev->iseg->internal_timer_h); + if (timer_h != timer_h1) { + /* wrap around */ + ptp_read_system_prets(sts); + timer_l = ioread32be(&dev->iseg->internal_timer_l); + ptp_read_system_postts(sts); + } + + return (u64)timer_l | (u64)timer_h1 << 32; +} + static u64 read_internal_timer(const struct cyclecounter *cc) { struct mlx5_clock *clock = container_of(cc, struct mlx5_clock, cycles); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 742ba012c234..4d2e1e982460 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -672,26 +672,6 @@ int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id) return mlx5_cmd_exec_in(dev, disable_hca, in); } -u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev, - struct ptp_system_timestamp *sts) -{ - u32 timer_h, timer_h1, timer_l; - - timer_h = ioread32be(&dev->iseg->internal_timer_h); - ptp_read_system_prets(sts); - timer_l = ioread32be(&dev->iseg->internal_timer_l); - ptp_read_system_postts(sts); - timer_h1 = ioread32be(&dev->iseg->internal_timer_h); - if (timer_h != timer_h1) { - /* wrap around */ - ptp_read_system_prets(sts); - timer_l = ioread32be(&dev->iseg->internal_timer_l); - ptp_read_system_postts(sts); - } - - return (u64)timer_l | (u64)timer_h1 << 32; -} - static int mlx5_core_set_issi(struct mlx5_core_dev *dev) { u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {}; @@ -1217,10 +1197,9 @@ int mlx5_load_one(struct mlx5_core_dev *dev, bool boot) mlx5_register_device(dev); set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); -out: - mutex_unlock(&dev->intf_state_mutex); - return err; + mutex_unlock(&dev->intf_state_mutex); + return 0; err_devlink_reg: mlx5_unload(dev); @@ -1230,17 +1209,15 @@ err_load: function_teardown: mlx5_function_teardown(dev, boot); dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; +out: mutex_unlock(&dev->intf_state_mutex); - return err; } void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup) { - if (cleanup) { + if (cleanup) mlx5_unregister_device(dev); - mlx5_drain_health_wq(dev); - } mutex_lock(&dev->intf_state_mutex); if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { @@ -1383,6 +1360,7 @@ static void remove_one(struct pci_dev *pdev) mlx5_crdump_disable(dev); mlx5_devlink_unregister(devlink); + mlx5_drain_health_wq(dev); mlx5_unload_one(dev, true); mlx5_pci_close(dev); mlx5_mdev_uninit(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index a8fb43a85d1d..fc1649dac11b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -38,7 +38,6 @@ #include <linux/sched.h> #include <linux/if_link.h> #include <linux/firmware.h> -#include <linux/ptp_clock_kernel.h> #include <linux/mlx5/cq.h> #include <linux/mlx5/fs.h> #include <linux/mlx5/driver.h> @@ -141,8 +140,6 @@ int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, u32 element_id); int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages); -u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev, - struct ptp_system_timestamp *sts); void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev); void mlx5_cmd_flush(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 8ce78f42dfc0..5ddd18639a1e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -156,15 +156,21 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, return err; } -static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr) +static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr, u16 func_id) { - struct fw_page *fp; + struct fw_page *fp = NULL; + struct fw_page *iter; unsigned n; - if (list_empty(&dev->priv.free_list)) + list_for_each_entry(iter, &dev->priv.free_list, list) { + if (iter->func_id != func_id) + continue; + fp = iter; + } + + if (list_empty(&dev->priv.free_list) || !fp) return -ENOMEM; - fp = list_entry(dev->priv.free_list.next, struct fw_page, list); n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask)); if (n >= MLX5_NUM_4K_IN_PAGE) { mlx5_core_warn(dev, "alloc 4k bug\n"); @@ -182,35 +188,35 @@ static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr) #define MLX5_U64_4K_PAGE_MASK ((~(u64)0U) << PAGE_SHIFT) -static void free_fwp(struct mlx5_core_dev *dev, struct fw_page *fwp) +static void free_fwp(struct mlx5_core_dev *dev, struct fw_page *fwp, + bool in_free_list) { - int n = (fwp->addr & ~MLX5_U64_4K_PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT; - - fwp->free_count++; - set_bit(n, &fwp->bitmask); - if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) { - rb_erase(&fwp->rb_node, &dev->priv.page_root); - if (fwp->free_count != 1) - list_del(&fwp->list); - dma_unmap_page(dev->device, fwp->addr & MLX5_U64_4K_PAGE_MASK, - PAGE_SIZE, DMA_BIDIRECTIONAL); - __free_page(fwp->page); - kfree(fwp); - } else if (fwp->free_count == 1) { - list_add(&fwp->list, &dev->priv.free_list); - } + rb_erase(&fwp->rb_node, &dev->priv.page_root); + if (in_free_list) + list_del(&fwp->list); + dma_unmap_page(dev->device, fwp->addr & MLX5_U64_4K_PAGE_MASK, + PAGE_SIZE, DMA_BIDIRECTIONAL); + __free_page(fwp->page); + kfree(fwp); } -static void free_addr(struct mlx5_core_dev *dev, u64 addr) +static void free_4k(struct mlx5_core_dev *dev, u64 addr) { struct fw_page *fwp; + int n; fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK); if (!fwp) { mlx5_core_warn_rl(dev, "page not found\n"); return; } - free_fwp(dev, fwp); + n = (addr & ~MLX5_U64_4K_PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT; + fwp->free_count++; + set_bit(n, &fwp->bitmask); + if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) + free_fwp(dev, fwp, fwp->free_count != 1); + else if (fwp->free_count == 1) + list_add(&fwp->list, &dev->priv.free_list); } static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id) @@ -295,7 +301,7 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, for (i = 0; i < npages; i++) { retry: - err = alloc_4k(dev, &addr); + err = alloc_4k(dev, &addr, func_id); if (err) { if (err == -ENOMEM) err = alloc_system_page(dev, func_id); @@ -334,7 +340,7 @@ retry: out_4k: for (i--; i >= 0; i--) - free_addr(dev, MLX5_GET64(manage_pages_in, in, pas[i])); + free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i])); out_free: kvfree(in); if (notify_fail) @@ -355,8 +361,8 @@ static void release_all_pages(struct mlx5_core_dev *dev, u32 func_id, p = rb_next(p); if (fwp->func_id != func_id) continue; - free_fwp(dev, fwp); - npages++; + npages += (MLX5_NUM_4K_IN_PAGE - fwp->free_count); + free_fwp(dev, fwp, fwp->free_count); } dev->priv.fw_pages -= npages; @@ -440,7 +446,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, } for (i = 0; i < num_claimed; i++) - free_addr(dev, MLX5_GET64(manage_pages_out, out, pas[i])); + free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i])); if (nclaimed) *nclaimed = num_claimed; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index b286fe158820..51e1b3930c56 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -30,14 +30,14 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, return -EOPNOTSUPP; act = flow_action_first_entry_get(flow_action); - if (act->hw_stats == FLOW_ACTION_HW_STATS_ANY || - act->hw_stats == FLOW_ACTION_HW_STATS_IMMEDIATE) { + if (act->hw_stats & FLOW_ACTION_HW_STATS_DISABLED) { + /* Nothing to do */ + } else if (act->hw_stats & FLOW_ACTION_HW_STATS_IMMEDIATE) { /* Count action is inserted first */ err = mlxsw_sp_acl_rulei_act_count(mlxsw_sp, rulei, extack); if (err) return err; - } else if (act->hw_stats != FLOW_ACTION_HW_STATS_DISABLED && - act->hw_stats != FLOW_ACTION_HW_STATS_DONT_CARE) { + } else { NL_SET_ERR_MSG_MOD(extack, "Unsupported action HW stats type"); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c index fbf714d027d8..3a13b17cd1b8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c @@ -12,6 +12,25 @@ #include "spectrum.h" #include "spectrum_trap.h" +struct mlxsw_sp_trap_policer_item { + struct devlink_trap_policer policer; + u16 hw_id; +}; + +struct mlxsw_sp_trap_group_item { + struct devlink_trap_group group; + u16 hw_group_id; + u8 priority; + u8 tc; +}; + +#define MLXSW_SP_TRAP_LISTENERS_MAX 3 + +struct mlxsw_sp_trap_item { + struct devlink_trap trap; + struct mlxsw_listener listeners_arr[MLXSW_SP_TRAP_LISTENERS_MAX]; +}; + /* All driver-specific traps must be documented in * Documentation/networking/devlink/mlxsw.rst */ @@ -176,136 +195,255 @@ static void mlxsw_sp_rx_exception_listener(struct sk_buff *skb, u8 local_port, 1 << MLXSW_REG_QPCR_LOWEST_CBS) /* Ordered by policer identifier */ -static const struct devlink_trap_policer mlxsw_sp_trap_policers_arr[] = { - MLXSW_SP_TRAP_POLICER(1, 10 * 1024, 128), -}; - -static const struct devlink_trap_group mlxsw_sp_trap_groups_arr[] = { - DEVLINK_TRAP_GROUP_GENERIC(L2_DROPS, 1), - DEVLINK_TRAP_GROUP_GENERIC(L3_DROPS, 1), - DEVLINK_TRAP_GROUP_GENERIC(TUNNEL_DROPS, 1), - DEVLINK_TRAP_GROUP_GENERIC(ACL_DROPS, 1), +static const struct mlxsw_sp_trap_policer_item +mlxsw_sp_trap_policer_items_arr[] = { + { + .policer = MLXSW_SP_TRAP_POLICER(1, 10 * 1024, 128), + }, }; -static const struct devlink_trap mlxsw_sp_traps_arr[] = { - MLXSW_SP_TRAP_DROP(SMAC_MC, L2_DROPS), - MLXSW_SP_TRAP_DROP(VLAN_TAG_MISMATCH, L2_DROPS), - MLXSW_SP_TRAP_DROP(INGRESS_VLAN_FILTER, L2_DROPS), - MLXSW_SP_TRAP_DROP(INGRESS_STP_FILTER, L2_DROPS), - MLXSW_SP_TRAP_DROP(EMPTY_TX_LIST, L2_DROPS), - MLXSW_SP_TRAP_DROP(PORT_LOOPBACK_FILTER, L2_DROPS), - MLXSW_SP_TRAP_DROP(BLACKHOLE_ROUTE, L3_DROPS), - MLXSW_SP_TRAP_DROP(NON_IP_PACKET, L3_DROPS), - MLXSW_SP_TRAP_DROP(UC_DIP_MC_DMAC, L3_DROPS), - MLXSW_SP_TRAP_DROP(DIP_LB, L3_DROPS), - MLXSW_SP_TRAP_DROP(SIP_MC, L3_DROPS), - MLXSW_SP_TRAP_DROP(SIP_LB, L3_DROPS), - MLXSW_SP_TRAP_DROP(CORRUPTED_IP_HDR, L3_DROPS), - MLXSW_SP_TRAP_DROP(IPV4_SIP_BC, L3_DROPS), - MLXSW_SP_TRAP_DROP(IPV6_MC_DIP_RESERVED_SCOPE, L3_DROPS), - MLXSW_SP_TRAP_DROP(IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE, L3_DROPS), - MLXSW_SP_TRAP_EXCEPTION(MTU_ERROR, L3_DROPS), - MLXSW_SP_TRAP_EXCEPTION(TTL_ERROR, L3_DROPS), - MLXSW_SP_TRAP_EXCEPTION(RPF, L3_DROPS), - MLXSW_SP_TRAP_EXCEPTION(REJECT_ROUTE, L3_DROPS), - MLXSW_SP_TRAP_EXCEPTION(UNRESOLVED_NEIGH, L3_DROPS), - MLXSW_SP_TRAP_EXCEPTION(IPV4_LPM_UNICAST_MISS, L3_DROPS), - MLXSW_SP_TRAP_EXCEPTION(IPV6_LPM_UNICAST_MISS, L3_DROPS), - MLXSW_SP_TRAP_DRIVER_DROP(IRIF_DISABLED, L3_DROPS), - MLXSW_SP_TRAP_DRIVER_DROP(ERIF_DISABLED, L3_DROPS), - MLXSW_SP_TRAP_DROP(NON_ROUTABLE, L3_DROPS), - MLXSW_SP_TRAP_EXCEPTION(DECAP_ERROR, TUNNEL_DROPS), - MLXSW_SP_TRAP_DROP(OVERLAY_SMAC_MC, TUNNEL_DROPS), - MLXSW_SP_TRAP_DROP_EXT(INGRESS_FLOW_ACTION_DROP, ACL_DROPS, - DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE), - MLXSW_SP_TRAP_DROP_EXT(EGRESS_FLOW_ACTION_DROP, ACL_DROPS, - DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE), +static const struct mlxsw_sp_trap_group_item mlxsw_sp_trap_group_items_arr[] = { + { + .group = DEVLINK_TRAP_GROUP_GENERIC(L2_DROPS, 1), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_L2_DISCARDS, + .priority = 0, + .tc = 1, + }, + { + .group = DEVLINK_TRAP_GROUP_GENERIC(L3_DROPS, 1), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_L3_DISCARDS, + .priority = 0, + .tc = 1, + }, + { + .group = DEVLINK_TRAP_GROUP_GENERIC(TUNNEL_DROPS, 1), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_TUNNEL_DISCARDS, + .priority = 0, + .tc = 1, + }, + { + .group = DEVLINK_TRAP_GROUP_GENERIC(ACL_DROPS, 1), + .hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_ACL_DISCARDS, + .priority = 0, + .tc = 1, + }, }; -static const struct mlxsw_listener mlxsw_sp_listeners_arr[] = { - MLXSW_SP_RXL_DISCARD(ING_PACKET_SMAC_MC, L2_DISCARDS), - MLXSW_SP_RXL_DISCARD(ING_SWITCH_VTAG_ALLOW, L2_DISCARDS), - MLXSW_SP_RXL_DISCARD(ING_SWITCH_VLAN, L2_DISCARDS), - MLXSW_SP_RXL_DISCARD(ING_SWITCH_STP, L2_DISCARDS), - MLXSW_SP_RXL_DISCARD(LOOKUP_SWITCH_UC, L2_DISCARDS), - MLXSW_SP_RXL_DISCARD(LOOKUP_SWITCH_MC_NULL, L2_DISCARDS), - MLXSW_SP_RXL_DISCARD(LOOKUP_SWITCH_LB, L2_DISCARDS), - MLXSW_SP_RXL_DISCARD(ROUTER2, L3_DISCARDS), - MLXSW_SP_RXL_DISCARD(ING_ROUTER_NON_IP_PACKET, L3_DISCARDS), - MLXSW_SP_RXL_DISCARD(ING_ROUTER_UC_DIP_MC_DMAC, L3_DISCARDS), - MLXSW_SP_RXL_DISCARD(ING_ROUTER_DIP_LB, L3_DISCARDS), - MLXSW_SP_RXL_DISCARD(ING_ROUTER_SIP_MC, L3_DISCARDS), - MLXSW_SP_RXL_DISCARD(ING_ROUTER_SIP_LB, L3_DISCARDS), - MLXSW_SP_RXL_DISCARD(ING_ROUTER_CORRUPTED_IP_HDR, L3_DISCARDS), - MLXSW_SP_RXL_DISCARD(ING_ROUTER_IPV4_SIP_BC, L3_DISCARDS), - MLXSW_SP_RXL_DISCARD(IPV6_MC_DIP_RESERVED_SCOPE, L3_DISCARDS), - MLXSW_SP_RXL_DISCARD(IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE, L3_DISCARDS), - MLXSW_SP_RXL_EXCEPTION(MTUERROR, L3_DISCARDS, TRAP_TO_CPU), - MLXSW_SP_RXL_EXCEPTION(TTLERROR, L3_DISCARDS, TRAP_TO_CPU), - MLXSW_SP_RXL_EXCEPTION(RPF, L3_DISCARDS, TRAP_TO_CPU), - MLXSW_SP_RXL_EXCEPTION(RTR_INGRESS1, L3_DISCARDS, TRAP_TO_CPU), - MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV4, L3_DISCARDS, TRAP_TO_CPU), - MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV6, L3_DISCARDS, TRAP_TO_CPU), - MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER3, L3_DISCARDS, - TRAP_EXCEPTION_TO_CPU), - MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER_LPM4, L3_DISCARDS, - TRAP_EXCEPTION_TO_CPU), - MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER_LPM6, L3_DISCARDS, - TRAP_EXCEPTION_TO_CPU), - MLXSW_SP_RXL_DISCARD(ROUTER_IRIF_EN, L3_DISCARDS), - MLXSW_SP_RXL_DISCARD(ROUTER_ERIF_EN, L3_DISCARDS), - MLXSW_SP_RXL_DISCARD(NON_ROUTABLE, L3_DISCARDS), - MLXSW_SP_RXL_EXCEPTION(DECAP_ECN0, TUNNEL_DISCARDS, - TRAP_EXCEPTION_TO_CPU), - MLXSW_SP_RXL_EXCEPTION(IPIP_DECAP_ERROR, TUNNEL_DISCARDS, - TRAP_EXCEPTION_TO_CPU), - MLXSW_SP_RXL_EXCEPTION(DISCARD_DEC_PKT, TUNNEL_DISCARDS, - TRAP_EXCEPTION_TO_CPU), - MLXSW_SP_RXL_DISCARD(OVERLAY_SMAC_MC, TUNNEL_DISCARDS), - MLXSW_SP_RXL_ACL_DISCARD(INGRESS_ACL, ACL_DISCARDS, DUMMY), - MLXSW_SP_RXL_ACL_DISCARD(EGRESS_ACL, ACL_DISCARDS, DUMMY), -}; - -/* Mapping between hardware trap and devlink trap. Multiple hardware traps can - * be mapped to the same devlink trap. Order is according to - * 'mlxsw_sp_listeners_arr'. - */ -static const u16 mlxsw_sp_listener_devlink_map[] = { - DEVLINK_TRAP_GENERIC_ID_SMAC_MC, - DEVLINK_TRAP_GENERIC_ID_VLAN_TAG_MISMATCH, - DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER, - DEVLINK_TRAP_GENERIC_ID_INGRESS_STP_FILTER, - DEVLINK_TRAP_GENERIC_ID_EMPTY_TX_LIST, - DEVLINK_TRAP_GENERIC_ID_EMPTY_TX_LIST, - DEVLINK_TRAP_GENERIC_ID_PORT_LOOPBACK_FILTER, - DEVLINK_TRAP_GENERIC_ID_BLACKHOLE_ROUTE, - DEVLINK_TRAP_GENERIC_ID_NON_IP_PACKET, - DEVLINK_TRAP_GENERIC_ID_UC_DIP_MC_DMAC, - DEVLINK_TRAP_GENERIC_ID_DIP_LB, - DEVLINK_TRAP_GENERIC_ID_SIP_MC, - DEVLINK_TRAP_GENERIC_ID_SIP_LB, - DEVLINK_TRAP_GENERIC_ID_CORRUPTED_IP_HDR, - DEVLINK_TRAP_GENERIC_ID_IPV4_SIP_BC, - DEVLINK_TRAP_GENERIC_ID_IPV6_MC_DIP_RESERVED_SCOPE, - DEVLINK_TRAP_GENERIC_ID_IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE, - DEVLINK_TRAP_GENERIC_ID_MTU_ERROR, - DEVLINK_TRAP_GENERIC_ID_TTL_ERROR, - DEVLINK_TRAP_GENERIC_ID_RPF, - DEVLINK_TRAP_GENERIC_ID_REJECT_ROUTE, - DEVLINK_TRAP_GENERIC_ID_UNRESOLVED_NEIGH, - DEVLINK_TRAP_GENERIC_ID_UNRESOLVED_NEIGH, - DEVLINK_TRAP_GENERIC_ID_UNRESOLVED_NEIGH, - DEVLINK_TRAP_GENERIC_ID_IPV4_LPM_UNICAST_MISS, - DEVLINK_TRAP_GENERIC_ID_IPV6_LPM_UNICAST_MISS, - DEVLINK_MLXSW_TRAP_ID_IRIF_DISABLED, - DEVLINK_MLXSW_TRAP_ID_ERIF_DISABLED, - DEVLINK_TRAP_GENERIC_ID_NON_ROUTABLE, - DEVLINK_TRAP_GENERIC_ID_DECAP_ERROR, - DEVLINK_TRAP_GENERIC_ID_DECAP_ERROR, - DEVLINK_TRAP_GENERIC_ID_DECAP_ERROR, - DEVLINK_TRAP_GENERIC_ID_OVERLAY_SMAC_MC, - DEVLINK_TRAP_GENERIC_ID_INGRESS_FLOW_ACTION_DROP, - DEVLINK_TRAP_GENERIC_ID_EGRESS_FLOW_ACTION_DROP, +static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = { + { + .trap = MLXSW_SP_TRAP_DROP(SMAC_MC, L2_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ING_PACKET_SMAC_MC, L2_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(VLAN_TAG_MISMATCH, L2_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ING_SWITCH_VTAG_ALLOW, + L2_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(INGRESS_VLAN_FILTER, L2_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ING_SWITCH_VLAN, L2_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(INGRESS_STP_FILTER, L2_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ING_SWITCH_STP, L2_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(EMPTY_TX_LIST, L2_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(LOOKUP_SWITCH_UC, L2_DISCARDS), + MLXSW_SP_RXL_DISCARD(LOOKUP_SWITCH_MC_NULL, L2_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(PORT_LOOPBACK_FILTER, L2_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(LOOKUP_SWITCH_LB, L2_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(BLACKHOLE_ROUTE, L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ROUTER2, L3_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(NON_IP_PACKET, L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ING_ROUTER_NON_IP_PACKET, + L3_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(UC_DIP_MC_DMAC, L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ING_ROUTER_UC_DIP_MC_DMAC, + L3_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(DIP_LB, L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ING_ROUTER_DIP_LB, L3_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(SIP_MC, L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ING_ROUTER_SIP_MC, L3_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(SIP_LB, L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ING_ROUTER_SIP_LB, L3_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(CORRUPTED_IP_HDR, L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ING_ROUTER_CORRUPTED_IP_HDR, + L3_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(IPV4_SIP_BC, L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ING_ROUTER_IPV4_SIP_BC, + L3_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(IPV6_MC_DIP_RESERVED_SCOPE, + L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(IPV6_MC_DIP_RESERVED_SCOPE, + L3_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE, + L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE, + L3_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_EXCEPTION(MTU_ERROR, L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_EXCEPTION(MTUERROR, L3_DISCARDS, + TRAP_TO_CPU), + }, + }, + { + .trap = MLXSW_SP_TRAP_EXCEPTION(TTL_ERROR, L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_EXCEPTION(TTLERROR, L3_DISCARDS, + TRAP_TO_CPU), + }, + }, + { + .trap = MLXSW_SP_TRAP_EXCEPTION(RPF, L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_EXCEPTION(RPF, L3_DISCARDS, TRAP_TO_CPU), + }, + }, + { + .trap = MLXSW_SP_TRAP_EXCEPTION(REJECT_ROUTE, L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_EXCEPTION(RTR_INGRESS1, L3_DISCARDS, + TRAP_TO_CPU), + }, + }, + { + .trap = MLXSW_SP_TRAP_EXCEPTION(UNRESOLVED_NEIGH, L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV4, L3_DISCARDS, + TRAP_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV6, L3_DISCARDS, + TRAP_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER3, L3_DISCARDS, + TRAP_EXCEPTION_TO_CPU), + }, + }, + { + .trap = MLXSW_SP_TRAP_EXCEPTION(IPV4_LPM_UNICAST_MISS, + L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER_LPM4, L3_DISCARDS, + TRAP_EXCEPTION_TO_CPU), + }, + }, + { + .trap = MLXSW_SP_TRAP_EXCEPTION(IPV6_LPM_UNICAST_MISS, + L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER_LPM6, L3_DISCARDS, + TRAP_EXCEPTION_TO_CPU), + }, + }, + { + .trap = MLXSW_SP_TRAP_DRIVER_DROP(IRIF_DISABLED, L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ROUTER_IRIF_EN, L3_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DRIVER_DROP(ERIF_DISABLED, L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(ROUTER_ERIF_EN, L3_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(NON_ROUTABLE, L3_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(NON_ROUTABLE, L3_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_EXCEPTION(DECAP_ERROR, TUNNEL_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_EXCEPTION(DECAP_ECN0, TUNNEL_DISCARDS, + TRAP_EXCEPTION_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(IPIP_DECAP_ERROR, + TUNNEL_DISCARDS, + TRAP_EXCEPTION_TO_CPU), + MLXSW_SP_RXL_EXCEPTION(DISCARD_DEC_PKT, TUNNEL_DISCARDS, + TRAP_EXCEPTION_TO_CPU), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP(OVERLAY_SMAC_MC, TUNNEL_DROPS), + .listeners_arr = { + MLXSW_SP_RXL_DISCARD(OVERLAY_SMAC_MC, TUNNEL_DISCARDS), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP_EXT(INGRESS_FLOW_ACTION_DROP, + ACL_DROPS, + DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE), + .listeners_arr = { + MLXSW_SP_RXL_ACL_DISCARD(INGRESS_ACL, ACL_DISCARDS, + DUMMY), + }, + }, + { + .trap = MLXSW_SP_TRAP_DROP_EXT(EGRESS_FLOW_ACTION_DROP, + ACL_DROPS, + DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE), + .listeners_arr = { + MLXSW_SP_RXL_ACL_DISCARD(EGRESS_ACL, ACL_DISCARDS, + DUMMY), + }, + }, }; #define MLXSW_SP_THIN_POLICER_ID (MLXSW_REG_HTGT_TRAP_GROUP_MAX + 1) @@ -313,12 +451,40 @@ static const u16 mlxsw_sp_listener_devlink_map[] = { static struct mlxsw_sp_trap_policer_item * mlxsw_sp_trap_policer_item_lookup(struct mlxsw_sp *mlxsw_sp, u32 id) { - struct mlxsw_sp_trap_policer_item *policer_item; struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + int i; - list_for_each_entry(policer_item, &trap->policer_item_list, list) { - if (policer_item->id == id) - return policer_item; + for (i = 0; i < trap->policers_count; i++) { + if (trap->policer_items_arr[i].policer.id == id) + return &trap->policer_items_arr[i]; + } + + return NULL; +} + +static struct mlxsw_sp_trap_group_item * +mlxsw_sp_trap_group_item_lookup(struct mlxsw_sp *mlxsw_sp, u16 id) +{ + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + int i; + + for (i = 0; i < trap->groups_count; i++) { + if (trap->group_items_arr[i].group.id == id) + return &trap->group_items_arr[i]; + } + + return NULL; +} + +static struct mlxsw_sp_trap_item * +mlxsw_sp_trap_item_lookup(struct mlxsw_sp *mlxsw_sp, u16 id) +{ + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + int i; + + for (i = 0; i < trap->traps_count; i++) { + if (trap->trap_items_arr[i].trap.id == id) + return &trap->trap_items_arr[i]; } return NULL; @@ -346,78 +512,210 @@ static int mlxsw_sp_trap_dummy_group_init(struct mlxsw_sp *mlxsw_sp) return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(htgt), htgt_pl); } -static int mlxsw_sp_trap_policers_init(struct mlxsw_sp *mlxsw_sp) +static int mlxsw_sp_trap_policer_items_arr_init(struct mlxsw_sp *mlxsw_sp) { - struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + size_t elem_size = sizeof(struct mlxsw_sp_trap_policer_item); + u64 arr_size = ARRAY_SIZE(mlxsw_sp_trap_policer_items_arr); struct mlxsw_sp_trap *trap = mlxsw_sp->trap; u64 free_policers = 0; - u32 last_id = 0; - int err, i; + u32 last_id; + int i; for_each_clear_bit(i, trap->policers_usage, trap->max_policers) free_policers++; - if (ARRAY_SIZE(mlxsw_sp_trap_policers_arr) > free_policers) { + if (arr_size > free_policers) { dev_err(mlxsw_sp->bus_info->dev, "Exceeded number of supported packet trap policers\n"); return -ENOBUFS; } - trap->policers_arr = kcalloc(free_policers, - sizeof(struct devlink_trap_policer), - GFP_KERNEL); - if (!trap->policers_arr) + trap->policer_items_arr = kcalloc(free_policers, elem_size, GFP_KERNEL); + if (!trap->policer_items_arr) return -ENOMEM; trap->policers_count = free_policers; - for (i = 0; i < free_policers; i++) { - const struct devlink_trap_policer *policer; - - if (i < ARRAY_SIZE(mlxsw_sp_trap_policers_arr)) { - policer = &mlxsw_sp_trap_policers_arr[i]; - trap->policers_arr[i] = *policer; - last_id = policer->id; - } else { - /* Use parameters set for first policer and override - * relevant ones. - */ - policer = &mlxsw_sp_trap_policers_arr[0]; - trap->policers_arr[i] = *policer; - trap->policers_arr[i].id = ++last_id; - trap->policers_arr[i].init_rate = 1; - trap->policers_arr[i].init_burst = 16; - } + /* Initialize policer items array with pre-defined policers. */ + memcpy(trap->policer_items_arr, mlxsw_sp_trap_policer_items_arr, + elem_size * arr_size); + + /* Initialize policer items array with the rest of the available + * policers. + */ + last_id = mlxsw_sp_trap_policer_items_arr[arr_size - 1].policer.id; + for (i = arr_size; i < trap->policers_count; i++) { + const struct mlxsw_sp_trap_policer_item *policer_item; + + /* Use parameters set for first policer and override + * relevant ones. + */ + policer_item = &mlxsw_sp_trap_policer_items_arr[0]; + trap->policer_items_arr[i] = *policer_item; + trap->policer_items_arr[i].policer.id = ++last_id; + trap->policer_items_arr[i].policer.init_rate = 1; + trap->policer_items_arr[i].policer.init_burst = 16; } - INIT_LIST_HEAD(&trap->policer_item_list); + return 0; +} + +static void mlxsw_sp_trap_policer_items_arr_fini(struct mlxsw_sp *mlxsw_sp) +{ + kfree(mlxsw_sp->trap->policer_items_arr); +} - err = devlink_trap_policers_register(devlink, trap->policers_arr, - trap->policers_count); +static int mlxsw_sp_trap_policers_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + const struct mlxsw_sp_trap_policer_item *policer_item; + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + int err, i; + + err = mlxsw_sp_trap_policer_items_arr_init(mlxsw_sp); if (err) - goto err_trap_policers_register; + return err; + + for (i = 0; i < trap->policers_count; i++) { + policer_item = &trap->policer_items_arr[i]; + err = devlink_trap_policers_register(devlink, + &policer_item->policer, 1); + if (err) + goto err_trap_policer_register; + } return 0; -err_trap_policers_register: - kfree(trap->policers_arr); +err_trap_policer_register: + for (i--; i >= 0; i--) { + policer_item = &trap->policer_items_arr[i]; + devlink_trap_policers_unregister(devlink, + &policer_item->policer, 1); + } + mlxsw_sp_trap_policer_items_arr_fini(mlxsw_sp); return err; } static void mlxsw_sp_trap_policers_fini(struct mlxsw_sp *mlxsw_sp) { struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + const struct mlxsw_sp_trap_policer_item *policer_item; struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + int i; - devlink_trap_policers_unregister(devlink, trap->policers_arr, - trap->policers_count); - WARN_ON(!list_empty(&trap->policer_item_list)); - kfree(trap->policers_arr); + for (i = trap->policers_count - 1; i >= 0; i--) { + policer_item = &trap->policer_items_arr[i]; + devlink_trap_policers_unregister(devlink, + &policer_item->policer, 1); + } + mlxsw_sp_trap_policer_items_arr_fini(mlxsw_sp); } -int mlxsw_sp_devlink_traps_init(struct mlxsw_sp *mlxsw_sp) +static int mlxsw_sp_trap_groups_init(struct mlxsw_sp *mlxsw_sp) { - size_t groups_count = ARRAY_SIZE(mlxsw_sp_trap_groups_arr); struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + const struct mlxsw_sp_trap_group_item *group_item; + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + int err, i; + + trap->group_items_arr = kmemdup(mlxsw_sp_trap_group_items_arr, + sizeof(mlxsw_sp_trap_group_items_arr), + GFP_KERNEL); + if (!trap->group_items_arr) + return -ENOMEM; + + trap->groups_count = ARRAY_SIZE(mlxsw_sp_trap_group_items_arr); + + for (i = 0; i < trap->groups_count; i++) { + group_item = &trap->group_items_arr[i]; + err = devlink_trap_groups_register(devlink, &group_item->group, + 1); + if (err) + goto err_trap_group_register; + } + + return 0; + +err_trap_group_register: + for (i--; i >= 0; i--) { + group_item = &trap->group_items_arr[i]; + devlink_trap_groups_unregister(devlink, &group_item->group, 1); + } + kfree(trap->group_items_arr); + return err; +} + +static void mlxsw_sp_trap_groups_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + int i; + + for (i = trap->groups_count - 1; i >= 0; i--) { + const struct mlxsw_sp_trap_group_item *group_item; + + group_item = &trap->group_items_arr[i]; + devlink_trap_groups_unregister(devlink, &group_item->group, 1); + } + kfree(trap->group_items_arr); +} + +static bool +mlxsw_sp_trap_listener_is_valid(const struct mlxsw_listener *listener) +{ + return listener->trap_id != 0; +} + +static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + const struct mlxsw_sp_trap_item *trap_item; + int err, i; + + trap->trap_items_arr = kmemdup(mlxsw_sp_trap_items_arr, + sizeof(mlxsw_sp_trap_items_arr), + GFP_KERNEL); + if (!trap->trap_items_arr) + return -ENOMEM; + + trap->traps_count = ARRAY_SIZE(mlxsw_sp_trap_items_arr); + + for (i = 0; i < trap->traps_count; i++) { + trap_item = &trap->trap_items_arr[i]; + err = devlink_traps_register(devlink, &trap_item->trap, 1, + mlxsw_sp); + if (err) + goto err_trap_register; + } + + return 0; + +err_trap_register: + for (i--; i >= 0; i--) { + trap_item = &trap->trap_items_arr[i]; + devlink_traps_unregister(devlink, &trap_item->trap, 1); + } + kfree(trap->trap_items_arr); + return err; +} + +static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + struct mlxsw_sp_trap *trap = mlxsw_sp->trap; + int i; + + for (i = trap->traps_count - 1; i >= 0; i--) { + const struct mlxsw_sp_trap_item *trap_item; + + trap_item = &trap->trap_items_arr[i]; + devlink_traps_unregister(devlink, &trap_item->trap, 1); + } + kfree(trap->trap_items_arr); +} + +int mlxsw_sp_devlink_traps_init(struct mlxsw_sp *mlxsw_sp) +{ int err; err = mlxsw_sp_trap_cpu_policers_set(mlxsw_sp); @@ -428,59 +726,52 @@ int mlxsw_sp_devlink_traps_init(struct mlxsw_sp *mlxsw_sp) if (err) return err; - if (WARN_ON(ARRAY_SIZE(mlxsw_sp_listener_devlink_map) != - ARRAY_SIZE(mlxsw_sp_listeners_arr))) - return -EINVAL; - err = mlxsw_sp_trap_policers_init(mlxsw_sp); if (err) return err; - err = devlink_trap_groups_register(devlink, mlxsw_sp_trap_groups_arr, - groups_count); + err = mlxsw_sp_trap_groups_init(mlxsw_sp); if (err) - goto err_trap_groups_register; + goto err_trap_groups_init; - err = devlink_traps_register(devlink, mlxsw_sp_traps_arr, - ARRAY_SIZE(mlxsw_sp_traps_arr), mlxsw_sp); + err = mlxsw_sp_traps_init(mlxsw_sp); if (err) - goto err_traps_register; + goto err_traps_init; return 0; -err_traps_register: - devlink_trap_groups_unregister(devlink, mlxsw_sp_trap_groups_arr, - groups_count); -err_trap_groups_register: +err_traps_init: + mlxsw_sp_trap_groups_fini(mlxsw_sp); +err_trap_groups_init: mlxsw_sp_trap_policers_fini(mlxsw_sp); return err; } void mlxsw_sp_devlink_traps_fini(struct mlxsw_sp *mlxsw_sp) { - size_t groups_count = ARRAY_SIZE(mlxsw_sp_trap_groups_arr); - struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); - - devlink_traps_unregister(devlink, mlxsw_sp_traps_arr, - ARRAY_SIZE(mlxsw_sp_traps_arr)); - devlink_trap_groups_unregister(devlink, mlxsw_sp_trap_groups_arr, - groups_count); + mlxsw_sp_traps_fini(mlxsw_sp); + mlxsw_sp_trap_groups_fini(mlxsw_sp); mlxsw_sp_trap_policers_fini(mlxsw_sp); } int mlxsw_sp_trap_init(struct mlxsw_core *mlxsw_core, const struct devlink_trap *trap, void *trap_ctx) { + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + const struct mlxsw_sp_trap_item *trap_item; int i; - for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener_devlink_map); i++) { + trap_item = mlxsw_sp_trap_item_lookup(mlxsw_sp, trap->id); + if (WARN_ON(!trap_item)) + return -EINVAL; + + for (i = 0; i < MLXSW_SP_TRAP_LISTENERS_MAX; i++) { const struct mlxsw_listener *listener; int err; - if (mlxsw_sp_listener_devlink_map[i] != trap->id) + listener = &trap_item->listeners_arr[i]; + if (!mlxsw_sp_trap_listener_is_valid(listener)) continue; - listener = &mlxsw_sp_listeners_arr[i]; - err = mlxsw_core_trap_register(mlxsw_core, listener, trap_ctx); if (err) return err; @@ -492,15 +783,20 @@ int mlxsw_sp_trap_init(struct mlxsw_core *mlxsw_core, void mlxsw_sp_trap_fini(struct mlxsw_core *mlxsw_core, const struct devlink_trap *trap, void *trap_ctx) { + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + const struct mlxsw_sp_trap_item *trap_item; int i; - for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener_devlink_map); i++) { + trap_item = mlxsw_sp_trap_item_lookup(mlxsw_sp, trap->id); + if (WARN_ON(!trap_item)) + return; + + for (i = MLXSW_SP_TRAP_LISTENERS_MAX - 1; i >= 0; i--) { const struct mlxsw_listener *listener; - if (mlxsw_sp_listener_devlink_map[i] != trap->id) + listener = &trap_item->listeners_arr[i]; + if (!mlxsw_sp_trap_listener_is_valid(listener)) continue; - listener = &mlxsw_sp_listeners_arr[i]; - mlxsw_core_trap_unregister(mlxsw_core, listener, trap_ctx); } } @@ -509,16 +805,23 @@ int mlxsw_sp_trap_action_set(struct mlxsw_core *mlxsw_core, const struct devlink_trap *trap, enum devlink_trap_action action) { + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + const struct mlxsw_sp_trap_item *trap_item; int i; - for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener_devlink_map); i++) { + trap_item = mlxsw_sp_trap_item_lookup(mlxsw_sp, trap->id); + if (WARN_ON(!trap_item)) + return -EINVAL; + + for (i = 0; i < MLXSW_SP_TRAP_LISTENERS_MAX; i++) { const struct mlxsw_listener *listener; bool enabled; int err; - if (mlxsw_sp_listener_devlink_map[i] != trap->id) + listener = &trap_item->listeners_arr[i]; + if (!mlxsw_sp_trap_listener_is_valid(listener)) continue; - listener = &mlxsw_sp_listeners_arr[i]; + switch (action) { case DEVLINK_TRAP_ACTION_DROP: enabled = false; @@ -544,33 +847,12 @@ __mlxsw_sp_trap_group_init(struct mlxsw_core *mlxsw_core, { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); u16 hw_policer_id = MLXSW_REG_HTGT_INVALID_POLICER; + const struct mlxsw_sp_trap_group_item *group_item; char htgt_pl[MLXSW_REG_HTGT_LEN]; - u8 priority, tc, group_id; - - switch (group->id) { - case DEVLINK_TRAP_GROUP_GENERIC_ID_L2_DROPS: - group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_L2_DISCARDS; - priority = 0; - tc = 1; - break; - case DEVLINK_TRAP_GROUP_GENERIC_ID_L3_DROPS: - group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_L3_DISCARDS; - priority = 0; - tc = 1; - break; - case DEVLINK_TRAP_GROUP_GENERIC_ID_TUNNEL_DROPS: - group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_TUNNEL_DISCARDS; - priority = 0; - tc = 1; - break; - case DEVLINK_TRAP_GROUP_GENERIC_ID_ACL_DROPS: - group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_ACL_DISCARDS; - priority = 0; - tc = 1; - break; - default: + + group_item = mlxsw_sp_trap_group_item_lookup(mlxsw_sp, group->id); + if (WARN_ON(!group_item)) return -EINVAL; - } if (policer_id) { struct mlxsw_sp_trap_policer_item *policer_item; @@ -582,7 +864,8 @@ __mlxsw_sp_trap_group_init(struct mlxsw_core *mlxsw_core, hw_policer_id = policer_item->hw_id; } - mlxsw_reg_htgt_pack(htgt_pl, group_id, hw_policer_id, priority, tc); + mlxsw_reg_htgt_pack(htgt_pl, group_item->hw_group_id, hw_policer_id, + group_item->priority, group_item->tc); return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); } @@ -602,10 +885,10 @@ int mlxsw_sp_trap_group_set(struct mlxsw_core *mlxsw_core, return __mlxsw_sp_trap_group_init(mlxsw_core, group, policer_id); } -static struct mlxsw_sp_trap_policer_item * -mlxsw_sp_trap_policer_item_init(struct mlxsw_sp *mlxsw_sp, u32 id) +static int +mlxsw_sp_trap_policer_item_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_trap_policer_item *policer_item) { - struct mlxsw_sp_trap_policer_item *policer_item; struct mlxsw_sp_trap *trap = mlxsw_sp->trap; u16 hw_id; @@ -615,27 +898,19 @@ mlxsw_sp_trap_policer_item_init(struct mlxsw_sp *mlxsw_sp, u32 id) */ hw_id = find_first_zero_bit(trap->policers_usage, trap->max_policers); if (WARN_ON(hw_id == trap->max_policers)) - return ERR_PTR(-ENOBUFS); - - policer_item = kzalloc(sizeof(*policer_item), GFP_KERNEL); - if (!policer_item) - return ERR_PTR(-ENOMEM); + return -ENOBUFS; __set_bit(hw_id, trap->policers_usage); policer_item->hw_id = hw_id; - policer_item->id = id; - list_add_tail(&policer_item->list, &trap->policer_item_list); - return policer_item; + return 0; } static void mlxsw_sp_trap_policer_item_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_trap_policer_item *policer_item) { - list_del(&policer_item->list); __clear_bit(policer_item->hw_id, mlxsw_sp->trap->policers_usage); - kfree(policer_item); } static int mlxsw_sp_trap_policer_bs(u64 burst, u8 *p_burst_size, @@ -678,9 +953,13 @@ int mlxsw_sp_trap_policer_init(struct mlxsw_core *mlxsw_core, struct mlxsw_sp_trap_policer_item *policer_item; int err; - policer_item = mlxsw_sp_trap_policer_item_init(mlxsw_sp, policer->id); - if (IS_ERR(policer_item)) - return PTR_ERR(policer_item); + policer_item = mlxsw_sp_trap_policer_item_lookup(mlxsw_sp, policer->id); + if (WARN_ON(!policer_item)) + return -EINVAL; + + err = mlxsw_sp_trap_policer_item_init(mlxsw_sp, policer_item); + if (err) + return err; err = __mlxsw_sp_trap_policer_set(mlxsw_sp, policer_item->hw_id, policer->init_rate, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h index 8c54897ba173..759146897b3a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h @@ -8,17 +8,17 @@ #include <net/devlink.h> struct mlxsw_sp_trap { - struct devlink_trap_policer *policers_arr; /* Registered policers */ + struct mlxsw_sp_trap_policer_item *policer_items_arr; u64 policers_count; /* Number of registered policers */ - struct list_head policer_item_list; + + struct mlxsw_sp_trap_group_item *group_items_arr; + u64 groups_count; /* Number of registered groups */ + + struct mlxsw_sp_trap_item *trap_items_arr; + u64 traps_count; /* Number of registered traps */ + u64 max_policers; unsigned long policers_usage[]; /* Usage bitmap */ }; -struct mlxsw_sp_trap_policer_item { - u16 hw_id; - u32 id; - struct list_head list; /* Member of policer_item_list */ -}; - #endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index 1c76e1592ca2..ff844e5cc41f 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -209,7 +209,7 @@ nfp_fl_output(struct nfp_app *app, struct nfp_fl_output *output, NFP_FL_OUT_FLAGS_USE_TUN); output->port = cpu_to_be32(NFP_FL_PORT_TYPE_TUN | tun_type); } else if (netif_is_lag_master(out_dev) && - priv->flower_ext_feats & NFP_FL_FEATS_LAG) { + priv->flower_en_feats & NFP_FL_ENABLE_LAG) { int gid; output->flags = cpu_to_be16(tmp_flags); @@ -956,7 +956,7 @@ nfp_flower_output_action(struct nfp_app *app, *a_len += sizeof(struct nfp_fl_output); - if (priv->flower_ext_feats & NFP_FL_FEATS_LAG) { + if (priv->flower_en_feats & NFP_FL_ENABLE_LAG) { /* nfp_fl_pre_lag returns -err or size of prelag action added. * This will be 0 if it is not egressing to a lag dev. */ diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c index a595ddb92bff..a050cb898782 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c @@ -264,7 +264,7 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb) nfp_flower_cmsg_portmod_rx(app, skb); break; case NFP_FLOWER_CMSG_TYPE_MERGE_HINT: - if (app_priv->flower_ext_feats & NFP_FL_FEATS_FLOW_MERGE) { + if (app_priv->flower_en_feats & NFP_FL_ENABLE_FLOW_MERGE) { nfp_flower_cmsg_merge_hint_rx(app, skb); break; } @@ -285,7 +285,7 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb) nfp_flower_stats_rlim_reply(app, skb); break; case NFP_FLOWER_CMSG_TYPE_LAG_CONFIG: - if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG) { + if (app_priv->flower_en_feats & NFP_FL_ENABLE_LAG) { skb_stored = nfp_flower_lag_unprocessed_msg(app, skb); break; } diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c index d8ad9346a26a..d054553c75e0 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.c +++ b/drivers/net/ethernet/netronome/nfp/flower/main.c @@ -665,6 +665,77 @@ err_clear_nn: return err; } +static void nfp_flower_wait_host_bit(struct nfp_app *app) +{ + unsigned long err_at; + u64 feat; + int err; + + /* Wait for HOST_ACK flag bit to propagate */ + err_at = jiffies + msecs_to_jiffies(100); + do { + feat = nfp_rtsym_read_le(app->pf->rtbl, + "_abi_flower_combined_features_global", + &err); + if (time_is_before_eq_jiffies(err_at)) { + nfp_warn(app->cpp, + "HOST_ACK bit not propagated in FW.\n"); + break; + } + usleep_range(1000, 2000); + } while (!err && !(feat & NFP_FL_FEATS_HOST_ACK)); + + if (err) + nfp_warn(app->cpp, + "Could not read global features entry from FW\n"); +} + +static int nfp_flower_sync_feature_bits(struct nfp_app *app) +{ + struct nfp_flower_priv *app_priv = app->priv; + int err; + + /* Tell the firmware of the host supported features. */ + err = nfp_rtsym_write_le(app->pf->rtbl, "_abi_flower_host_mask", + app_priv->flower_ext_feats | + NFP_FL_FEATS_HOST_ACK); + if (!err) + nfp_flower_wait_host_bit(app); + else if (err != -ENOENT) + return err; + + /* Tell the firmware that the driver supports lag. */ + err = nfp_rtsym_write_le(app->pf->rtbl, + "_abi_flower_balance_sync_enable", 1); + if (!err) { + app_priv->flower_ext_feats |= NFP_FL_ENABLE_LAG; + nfp_flower_lag_init(&app_priv->nfp_lag); + } else if (err == -ENOENT) { + nfp_warn(app->cpp, "LAG not supported by FW.\n"); + } else { + return err; + } + + if (app_priv->flower_ext_feats & NFP_FL_FEATS_FLOW_MOD) { + /* Tell the firmware that the driver supports flow merging. */ + err = nfp_rtsym_write_le(app->pf->rtbl, + "_abi_flower_merge_hint_enable", 1); + if (!err) { + app_priv->flower_ext_feats |= NFP_FL_ENABLE_FLOW_MERGE; + nfp_flower_internal_port_init(app_priv); + } else if (err == -ENOENT) { + nfp_warn(app->cpp, + "Flow merge not supported by FW.\n"); + } else { + return err; + } + } else { + nfp_warn(app->cpp, "Flow mod/merge not supported by FW.\n"); + } + + return 0; +} + static int nfp_flower_init(struct nfp_app *app) { u64 version, features, ctx_count, num_mems; @@ -753,35 +824,11 @@ static int nfp_flower_init(struct nfp_app *app) if (err) app_priv->flower_ext_feats = 0; else - app_priv->flower_ext_feats = features; + app_priv->flower_ext_feats = features & NFP_FL_FEATS_HOST; - /* Tell the firmware that the driver supports lag. */ - err = nfp_rtsym_write_le(app->pf->rtbl, - "_abi_flower_balance_sync_enable", 1); - if (!err) { - app_priv->flower_ext_feats |= NFP_FL_FEATS_LAG; - nfp_flower_lag_init(&app_priv->nfp_lag); - } else if (err == -ENOENT) { - nfp_warn(app->cpp, "LAG not supported by FW.\n"); - } else { - goto err_cleanup_metadata; - } - - if (app_priv->flower_ext_feats & NFP_FL_FEATS_FLOW_MOD) { - /* Tell the firmware that the driver supports flow merging. */ - err = nfp_rtsym_write_le(app->pf->rtbl, - "_abi_flower_merge_hint_enable", 1); - if (!err) { - app_priv->flower_ext_feats |= NFP_FL_FEATS_FLOW_MERGE; - nfp_flower_internal_port_init(app_priv); - } else if (err == -ENOENT) { - nfp_warn(app->cpp, "Flow merge not supported by FW.\n"); - } else { - goto err_lag_clean; - } - } else { - nfp_warn(app->cpp, "Flow mod/merge not supported by FW.\n"); - } + err = nfp_flower_sync_feature_bits(app); + if (err) + goto err_cleanup; if (app_priv->flower_ext_feats & NFP_FL_FEATS_VF_RLIM) nfp_flower_qos_init(app); @@ -792,10 +839,9 @@ static int nfp_flower_init(struct nfp_app *app) return 0; -err_lag_clean: - if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG) +err_cleanup: + if (app_priv->flower_ext_feats & NFP_FL_ENABLE_LAG) nfp_flower_lag_cleanup(&app_priv->nfp_lag); -err_cleanup_metadata: nfp_flower_metadata_cleanup(app); err_free_app_priv: vfree(app->priv); @@ -813,10 +859,10 @@ static void nfp_flower_clean(struct nfp_app *app) if (app_priv->flower_ext_feats & NFP_FL_FEATS_VF_RLIM) nfp_flower_qos_cleanup(app); - if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG) + if (app_priv->flower_en_feats & NFP_FL_ENABLE_LAG) nfp_flower_lag_cleanup(&app_priv->nfp_lag); - if (app_priv->flower_ext_feats & NFP_FL_FEATS_FLOW_MERGE) + if (app_priv->flower_en_feats & NFP_FL_ENABLE_FLOW_MERGE) nfp_flower_internal_port_cleanup(app_priv); nfp_flower_metadata_cleanup(app); @@ -886,7 +932,7 @@ static int nfp_flower_start(struct nfp_app *app) struct nfp_flower_priv *app_priv = app->priv; int err; - if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG) { + if (app_priv->flower_en_feats & NFP_FL_ENABLE_LAG) { err = nfp_flower_lag_reset(&app_priv->nfp_lag); if (err) return err; @@ -907,7 +953,7 @@ nfp_flower_netdev_event(struct nfp_app *app, struct net_device *netdev, struct nfp_flower_priv *app_priv = app->priv; int ret; - if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG) { + if (app_priv->flower_en_feats & NFP_FL_ENABLE_LAG) { ret = nfp_flower_lag_netdev_event(app_priv, netdev, event, ptr); if (ret & NOTIFY_STOP_MASK) return ret; diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index d55d0d33bc45..59abea2a39ad 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -44,8 +44,20 @@ struct nfp_app; #define NFP_FL_FEATS_FLOW_MOD BIT(5) #define NFP_FL_FEATS_PRE_TUN_RULES BIT(6) #define NFP_FL_FEATS_IPV6_TUN BIT(7) -#define NFP_FL_FEATS_FLOW_MERGE BIT(30) -#define NFP_FL_FEATS_LAG BIT(31) +#define NFP_FL_FEATS_HOST_ACK BIT(31) + +#define NFP_FL_ENABLE_FLOW_MERGE BIT(0) +#define NFP_FL_ENABLE_LAG BIT(1) + +#define NFP_FL_FEATS_HOST \ + (NFP_FL_FEATS_GENEVE | \ + NFP_FL_NBI_MTU_SETTING | \ + NFP_FL_FEATS_GENEVE_OPT | \ + NFP_FL_FEATS_VLAN_PCP | \ + NFP_FL_FEATS_VF_RLIM | \ + NFP_FL_FEATS_FLOW_MOD | \ + NFP_FL_FEATS_PRE_TUN_RULES | \ + NFP_FL_FEATS_IPV6_TUN) struct nfp_fl_mask_id { struct circ_buf mask_id_free_list; @@ -145,6 +157,7 @@ struct nfp_fl_internal_ports { * @mask_id_seed: Seed used for mask hash table * @flower_version: HW version of flower * @flower_ext_feats: Bitmap of extra features the HW supports + * @flower_en_feats: Bitmap of features enabled by HW * @stats_ids: List of free stats ids * @mask_ids: List of free mask ids * @mask_table: Hash table used to store masks @@ -180,6 +193,7 @@ struct nfp_flower_priv { u32 mask_id_seed; u64 flower_version; u64 flower_ext_feats; + u8 flower_en_feats; struct nfp_fl_stats_id stats_ids; struct nfp_fl_mask_id mask_ids; DECLARE_HASHTABLE(mask_table, NFP_FLOWER_MASK_HASH_BITS); @@ -346,7 +360,7 @@ nfp_flower_internal_port_can_offload(struct nfp_app *app, { struct nfp_flower_priv *app_priv = app->priv; - if (!(app_priv->flower_ext_feats & NFP_FL_FEATS_FLOW_MERGE)) + if (!(app_priv->flower_en_feats & NFP_FL_ENABLE_FLOW_MERGE)) return false; if (!netdev->rtnl_link_ops) return false; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index a5aa3219d112..6eb9fb9a1814 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -1438,8 +1438,7 @@ static int nfp_net_set_channels(struct net_device *netdev, unsigned int total_rx, total_tx; /* Reject unsupported */ - if (!channel->combined_count || - channel->other_count != NFP_NET_NON_Q_VECTORS || + if (channel->other_count != NFP_NET_NON_Q_VECTORS || (channel->rx_count && channel->tx_count)) return -EINVAL; diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index 1a636bad717d..7b76667acaba 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -110,6 +110,7 @@ struct src_ent { ALIGNED_TYPE_SIZE(union conn_context, p_hwfn) #define SRQ_CXT_SIZE (sizeof(struct rdma_srq_context)) +#define XRC_SRQ_CXT_SIZE (sizeof(struct rdma_xrc_srq_context)) #define TYPE0_TASK_CXT_SIZE(p_hwfn) \ ALIGNED_TYPE_SIZE(union type0_task_context, p_hwfn) @@ -293,18 +294,40 @@ static struct qed_tid_seg *qed_cxt_tid_seg_info(struct qed_hwfn *p_hwfn, return NULL; } -static void qed_cxt_set_srq_count(struct qed_hwfn *p_hwfn, u32 num_srqs) +static void qed_cxt_set_srq_count(struct qed_hwfn *p_hwfn, + u32 num_srqs, u32 num_xrc_srqs) { struct qed_cxt_mngr *p_mgr = p_hwfn->p_cxt_mngr; p_mgr->srq_count = num_srqs; + p_mgr->xrc_srq_count = num_xrc_srqs; } -u32 qed_cxt_get_srq_count(struct qed_hwfn *p_hwfn) +u32 qed_cxt_get_ilt_page_size(struct qed_hwfn *p_hwfn, + enum ilt_clients ilt_client) +{ + struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr; + struct qed_ilt_client_cfg *p_cli = &p_mngr->clients[ilt_client]; + + return ILT_PAGE_IN_BYTES(p_cli->p_size.val); +} + +static u32 qed_cxt_xrc_srqs_per_page(struct qed_hwfn *p_hwfn) +{ + u32 page_size; + + page_size = qed_cxt_get_ilt_page_size(p_hwfn, ILT_CLI_TSDM); + return page_size / XRC_SRQ_CXT_SIZE; +} + +u32 qed_cxt_get_total_srq_count(struct qed_hwfn *p_hwfn) { struct qed_cxt_mngr *p_mgr = p_hwfn->p_cxt_mngr; + u32 total_srqs; + + total_srqs = p_mgr->srq_count + p_mgr->xrc_srq_count; - return p_mgr->srq_count; + return total_srqs; } /* set the iids count per protocol */ @@ -692,7 +715,7 @@ int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn, u32 *line_count) } /* TSDM (SRQ CONTEXT) */ - total = qed_cxt_get_srq_count(p_hwfn); + total = qed_cxt_get_total_srq_count(p_hwfn); if (total) { p_cli = qed_cxt_set_cli(&p_mngr->clients[ILT_CLI_TSDM]); @@ -1962,11 +1985,9 @@ static void qed_rdma_set_pf_params(struct qed_hwfn *p_hwfn, struct qed_rdma_pf_params *p_params, u32 num_tasks) { - u32 num_cons, num_qps, num_srqs; + u32 num_cons, num_qps; enum protocol_type proto; - num_srqs = min_t(u32, QED_RDMA_MAX_SRQS, p_params->num_srqs); - if (p_hwfn->mcp_info->func_info.protocol == QED_PCI_ETH_RDMA) { DP_NOTICE(p_hwfn, "Current day drivers don't support RoCE & iWARP simultaneously on the same PF. Default to RoCE-only\n"); @@ -1989,6 +2010,8 @@ static void qed_rdma_set_pf_params(struct qed_hwfn *p_hwfn, } if (num_cons && num_tasks) { + u32 num_srqs, num_xrc_srqs; + qed_cxt_set_proto_cid_count(p_hwfn, proto, num_cons, 0); /* Deliberatly passing ROCE for tasks id. This is because @@ -1997,7 +2020,13 @@ static void qed_rdma_set_pf_params(struct qed_hwfn *p_hwfn, qed_cxt_set_proto_tid_count(p_hwfn, PROTOCOLID_ROCE, QED_CXT_ROCE_TID_SEG, 1, num_tasks, false); - qed_cxt_set_srq_count(p_hwfn, num_srqs); + + num_srqs = min_t(u32, QED_RDMA_MAX_SRQS, p_params->num_srqs); + + /* XRC SRQs populate a single ILT page */ + num_xrc_srqs = qed_cxt_xrc_srqs_per_page(p_hwfn); + + qed_cxt_set_srq_count(p_hwfn, num_srqs, num_xrc_srqs); } else { DP_INFO(p_hwfn->cdev, "RDMA personality used without setting params!\n"); @@ -2163,10 +2192,17 @@ qed_cxt_dynamic_ilt_alloc(struct qed_hwfn *p_hwfn, p_blk = &p_cli->pf_blks[CDUC_BLK]; break; case QED_ELEM_SRQ: + /* The first ILT page is not used for regular SRQs. Skip it. */ + iid += p_hwfn->p_cxt_mngr->xrc_srq_count; p_cli = &p_hwfn->p_cxt_mngr->clients[ILT_CLI_TSDM]; elem_size = SRQ_CXT_SIZE; p_blk = &p_cli->pf_blks[SRQ_BLK]; break; + case QED_ELEM_XRC_SRQ: + p_cli = &p_hwfn->p_cxt_mngr->clients[ILT_CLI_TSDM]; + elem_size = XRC_SRQ_CXT_SIZE; + p_blk = &p_cli->pf_blks[SRQ_BLK]; + break; case QED_ELEM_TASK: p_cli = &p_hwfn->p_cxt_mngr->clients[ILT_CLI_CDUT]; elem_size = TYPE1_TASK_CXT_SIZE(p_hwfn); @@ -2386,8 +2422,12 @@ int qed_cxt_free_proto_ilt(struct qed_hwfn *p_hwfn, enum protocol_type proto) return rc; /* Free TSDM CXT */ - rc = qed_cxt_free_ilt_range(p_hwfn, QED_ELEM_SRQ, 0, - qed_cxt_get_srq_count(p_hwfn)); + rc = qed_cxt_free_ilt_range(p_hwfn, QED_ELEM_XRC_SRQ, 0, + p_hwfn->p_cxt_mngr->xrc_srq_count); + + rc = qed_cxt_free_ilt_range(p_hwfn, QED_ELEM_SRQ, + p_hwfn->p_cxt_mngr->xrc_srq_count, + p_hwfn->p_cxt_mngr->srq_count); return rc; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.h b/drivers/net/ethernet/qlogic/qed/qed_cxt.h index c4e815f6cabd..ce08ae8d8498 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.h +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.h @@ -82,7 +82,8 @@ int qed_cxt_get_tid_mem_info(struct qed_hwfn *p_hwfn, enum qed_cxt_elem_type { QED_ELEM_CXT, QED_ELEM_SRQ, - QED_ELEM_TASK + QED_ELEM_TASK, + QED_ELEM_XRC_SRQ, }; u32 qed_cxt_get_proto_cid_count(struct qed_hwfn *p_hwfn, @@ -235,7 +236,6 @@ u32 qed_cxt_get_proto_tid_count(struct qed_hwfn *p_hwfn, enum protocol_type type); u32 qed_cxt_get_proto_cid_start(struct qed_hwfn *p_hwfn, enum protocol_type type); -u32 qed_cxt_get_srq_count(struct qed_hwfn *p_hwfn); int qed_cxt_free_proto_ilt(struct qed_hwfn *p_hwfn, enum protocol_type proto); #define QED_CTX_WORKING_MEM 0 @@ -358,6 +358,7 @@ struct qed_cxt_mngr { /* total number of SRQ's for this hwfn */ u32 srq_count; + u32 xrc_srq_count; /* Maximal number of L2 steering filters */ u32 arfs_count; @@ -372,4 +373,9 @@ u16 qed_get_cdut_num_vf_init_pages(struct qed_hwfn *p_hwfn); u16 qed_get_cdut_num_pf_work_pages(struct qed_hwfn *p_hwfn); u16 qed_get_cdut_num_vf_work_pages(struct qed_hwfn *p_hwfn); +u32 qed_cxt_get_ilt_page_size(struct qed_hwfn *p_hwfn, + enum ilt_clients ilt_client); + +u32 qed_cxt_get_total_srq_count(struct qed_hwfn *p_hwfn); + #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 6e857468e993..1eebf30fa798 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -2269,6 +2269,7 @@ int qed_resc_alloc(struct qed_dev *cdev) /* EQ */ n_eqes = qed_chain_get_capacity(&p_hwfn->p_spq->chain); if (QED_IS_RDMA_PERSONALITY(p_hwfn)) { + u32 n_srq = qed_cxt_get_total_srq_count(p_hwfn); enum protocol_type rdma_proto; if (QED_IS_ROCE_PERSONALITY(p_hwfn)) @@ -2279,7 +2280,10 @@ int qed_resc_alloc(struct qed_dev *cdev) num_cons = qed_cxt_get_proto_cid_count(p_hwfn, rdma_proto, NULL) * 2; - n_eqes += num_cons + 2 * MAX_NUM_VFS_BB; + /* EQ should be able to get events from all SRQ's + * at the same time + */ + n_eqes += num_cons + 2 * MAX_NUM_VFS_BB + n_srq; } else if (p_hwfn->hw_info.personality == QED_PCI_ISCSI) { num_cons = qed_cxt_get_proto_cid_count(p_hwfn, diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index 38b1f402f7ed..50985871cd3d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -212,13 +212,22 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn) goto free_rdma_port; } + /* Allocate bit map for XRC Domains */ + rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->xrcd_map, + QED_RDMA_MAX_XRCDS, "XRCD"); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Failed to allocate xrcd_map,rc = %d\n", rc); + goto free_pd_map; + } + /* Allocate DPI bitmap */ rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->dpi_map, p_hwfn->dpi_count, "DPI"); if (rc) { DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Failed to allocate DPI bitmap, rc = %d\n", rc); - goto free_pd_map; + goto free_xrcd_map; } /* Allocate bitmap for cq's. The maximum number of CQs is bound to @@ -271,14 +280,27 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn) goto free_cid_map; } + /* The first SRQ follows the last XRC SRQ. This means that the + * SRQ IDs start from an offset equals to max_xrc_srqs. + */ + p_rdma_info->srq_id_offset = p_hwfn->p_cxt_mngr->xrc_srq_count; + rc = qed_rdma_bmap_alloc(p_hwfn, + &p_rdma_info->xrc_srq_map, + p_hwfn->p_cxt_mngr->xrc_srq_count, "XRC SRQ"); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Failed to allocate xrc srq bitmap, rc = %d\n", rc); + goto free_real_cid_map; + } + /* Allocate bitmap for srqs */ - p_rdma_info->num_srqs = qed_cxt_get_srq_count(p_hwfn); + p_rdma_info->num_srqs = p_hwfn->p_cxt_mngr->srq_count; rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->srq_map, p_rdma_info->num_srqs, "SRQ"); if (rc) { DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Failed to allocate srq bitmap, rc = %d\n", rc); - goto free_real_cid_map; + goto free_xrc_srq_map; } if (QED_IS_IWARP_PERSONALITY(p_hwfn)) @@ -292,6 +314,8 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn) free_srq_map: kfree(p_rdma_info->srq_map.bitmap); +free_xrc_srq_map: + kfree(p_rdma_info->xrc_srq_map.bitmap); free_real_cid_map: kfree(p_rdma_info->real_cid_map.bitmap); free_cid_map: @@ -304,6 +328,8 @@ free_cq_map: kfree(p_rdma_info->cq_map.bitmap); free_dpi_map: kfree(p_rdma_info->dpi_map.bitmap); +free_xrcd_map: + kfree(p_rdma_info->xrcd_map.bitmap); free_pd_map: kfree(p_rdma_info->pd_map.bitmap); free_rdma_port: @@ -377,6 +403,7 @@ static void qed_rdma_resc_free(struct qed_hwfn *p_hwfn) qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->tid_map, 1); qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->srq_map, 1); qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->real_cid_map, 1); + qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->xrc_srq_map, 1); kfree(p_rdma_info->port); kfree(p_rdma_info->dev); @@ -612,7 +639,10 @@ static int qed_rdma_start_fw(struct qed_hwfn *p_hwfn, p_params_header->cnq_start_offset = (u8)RESC_START(p_hwfn, QED_RDMA_CNQ_RAM); p_params_header->num_cnqs = params->desired_cnq; - + p_params_header->first_reg_srq_id = + cpu_to_le16(p_hwfn->p_rdma_info->srq_id_offset); + p_params_header->reg_srq_base_addr = + cpu_to_le32(qed_cxt_get_ilt_page_size(p_hwfn, ILT_CLI_TSDM)); if (params->cq_mode == QED_RDMA_CQ_MODE_16_BITS) p_params_header->cq_ring_mode = 1; else @@ -983,6 +1013,41 @@ static void qed_rdma_free_pd(void *rdma_cxt, u16 pd) spin_unlock_bh(&p_hwfn->p_rdma_info->lock); } +static int qed_rdma_alloc_xrcd(void *rdma_cxt, u16 *xrcd_id) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + u32 returned_id; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Alloc XRCD\n"); + + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + rc = qed_rdma_bmap_alloc_id(p_hwfn, + &p_hwfn->p_rdma_info->xrcd_map, + &returned_id); + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); + if (rc) { + DP_NOTICE(p_hwfn, "Failed in allocating xrcd id\n"); + return rc; + } + + *xrcd_id = (u16)returned_id; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Alloc XRCD - done, rc = %d\n", rc); + return rc; +} + +static void qed_rdma_free_xrcd(void *rdma_cxt, u16 xrcd_id) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "xrcd_id = %08x\n", xrcd_id); + + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->xrcd_map, xrcd_id); + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); +} + static enum qed_rdma_toggle_bit qed_rdma_toggle_bit_create_resize_cq(struct qed_hwfn *p_hwfn, u16 icid) { @@ -1306,6 +1371,8 @@ qed_rdma_create_qp(void *rdma_cxt, qp->resp_offloaded = false; qp->e2e_flow_control_en = qp->use_srq ? false : true; qp->stats_queue = in_params->stats_queue; + qp->qp_type = in_params->qp_type; + qp->xrcd_id = in_params->xrcd_id; if (QED_IS_IWARP_PERSONALITY(p_hwfn)) { rc = qed_iwarp_create_qp(p_hwfn, qp, out_params); @@ -1418,6 +1485,18 @@ static int qed_rdma_modify_qp(void *rdma_cxt, qp->cur_state); } + switch (qp->qp_type) { + case QED_RDMA_QP_TYPE_XRC_INI: + qp->has_req = 1; + break; + case QED_RDMA_QP_TYPE_XRC_TGT: + qp->has_resp = 1; + break; + default: + qp->has_req = 1; + qp->has_resp = 1; + } + if (QED_IS_IWARP_PERSONALITY(p_hwfn)) { enum qed_iwarp_qp_state new_state = qed_roce2iwarp_state(qp->cur_state); @@ -1657,6 +1736,15 @@ static void *qed_rdma_get_rdma_ctx(struct qed_dev *cdev) return QED_AFFIN_HWFN(cdev); } +static struct qed_bmap *qed_rdma_get_srq_bmap(struct qed_hwfn *p_hwfn, + bool is_xrc) +{ + if (is_xrc) + return &p_hwfn->p_rdma_info->xrc_srq_map; + + return &p_hwfn->p_rdma_info->srq_map; +} + static int qed_rdma_modify_srq(void *rdma_cxt, struct qed_rdma_modify_srq_in_params *in_params) { @@ -1686,8 +1774,8 @@ static int qed_rdma_modify_srq(void *rdma_cxt, if (rc) return rc; - DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "modified SRQ id = %x", - in_params->srq_id); + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "modified SRQ id = %x, is_xrc=%u\n", + in_params->srq_id, in_params->is_xrc); return rc; } @@ -1702,6 +1790,7 @@ qed_rdma_destroy_srq(void *rdma_cxt, struct qed_spq_entry *p_ent; struct qed_bmap *bmap; u16 opaque_fid; + u16 offset; int rc; opaque_fid = p_hwfn->hw_info.opaque_fid; @@ -1723,14 +1812,16 @@ qed_rdma_destroy_srq(void *rdma_cxt, if (rc) return rc; - bmap = &p_hwfn->p_rdma_info->srq_map; + bmap = qed_rdma_get_srq_bmap(p_hwfn, in_params->is_xrc); + offset = (in_params->is_xrc) ? 0 : p_hwfn->p_rdma_info->srq_id_offset; spin_lock_bh(&p_hwfn->p_rdma_info->lock); - qed_bmap_release_id(p_hwfn, bmap, in_params->srq_id); + qed_bmap_release_id(p_hwfn, bmap, in_params->srq_id - offset); spin_unlock_bh(&p_hwfn->p_rdma_info->lock); - DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "SRQ destroyed Id = %x", - in_params->srq_id); + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "XRC/SRQ destroyed Id = %x, is_xrc=%u\n", + in_params->srq_id, in_params->is_xrc); return rc; } @@ -1748,24 +1839,26 @@ qed_rdma_create_srq(void *rdma_cxt, u16 opaque_fid, srq_id; struct qed_bmap *bmap; u32 returned_id; + u16 offset; int rc; - bmap = &p_hwfn->p_rdma_info->srq_map; + bmap = qed_rdma_get_srq_bmap(p_hwfn, in_params->is_xrc); spin_lock_bh(&p_hwfn->p_rdma_info->lock); rc = qed_rdma_bmap_alloc_id(p_hwfn, bmap, &returned_id); spin_unlock_bh(&p_hwfn->p_rdma_info->lock); if (rc) { - DP_NOTICE(p_hwfn, "failed to allocate srq id\n"); + DP_NOTICE(p_hwfn, + "failed to allocate xrc/srq id (is_xrc=%u)\n", + in_params->is_xrc); return rc; } - elem_type = QED_ELEM_SRQ; + elem_type = (in_params->is_xrc) ? (QED_ELEM_XRC_SRQ) : (QED_ELEM_SRQ); rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, elem_type, returned_id); if (rc) goto err; - /* returned id is no greater than u16 */ - srq_id = (u16)returned_id; + opaque_fid = p_hwfn->hw_info.opaque_fid; opaque_fid = p_hwfn->hw_info.opaque_fid; @@ -1782,20 +1875,34 @@ qed_rdma_create_srq(void *rdma_cxt, DMA_REGPAIR_LE(p_ramrod->pbl_base_addr, in_params->pbl_base_addr); p_ramrod->pages_in_srq_pbl = cpu_to_le16(in_params->num_pages); p_ramrod->pd_id = cpu_to_le16(in_params->pd_id); - p_ramrod->srq_id.srq_idx = cpu_to_le16(srq_id); p_ramrod->srq_id.opaque_fid = cpu_to_le16(opaque_fid); p_ramrod->page_size = cpu_to_le16(in_params->page_size); DMA_REGPAIR_LE(p_ramrod->producers_addr, in_params->prod_pair_addr); + offset = (in_params->is_xrc) ? 0 : p_hwfn->p_rdma_info->srq_id_offset; + srq_id = (u16)returned_id + offset; + p_ramrod->srq_id.srq_idx = cpu_to_le16(srq_id); + if (in_params->is_xrc) { + SET_FIELD(p_ramrod->flags, + RDMA_SRQ_CREATE_RAMROD_DATA_XRC_FLAG, 1); + SET_FIELD(p_ramrod->flags, + RDMA_SRQ_CREATE_RAMROD_DATA_RESERVED_KEY_EN, + in_params->reserved_key_en); + p_ramrod->xrc_srq_cq_cid = + cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) | + in_params->cq_cid); + p_ramrod->xrc_domain = cpu_to_le16(in_params->xrcd_id); + } rc = qed_spq_post(p_hwfn, p_ent, NULL); if (rc) goto err; out_params->srq_id = srq_id; - DP_VERBOSE(p_hwfn, QED_MSG_RDMA, - "SRQ created Id = %x\n", out_params->srq_id); - + DP_VERBOSE(p_hwfn, + QED_MSG_RDMA, + "XRC/SRQ created Id = %x (is_xrc=%u)\n", + out_params->srq_id, in_params->is_xrc); return rc; err: @@ -1961,6 +2068,8 @@ static const struct qed_rdma_ops qed_rdma_ops_pass = { .rdma_cnq_prod_update = &qed_rdma_cnq_prod_update, .rdma_alloc_pd = &qed_rdma_alloc_pd, .rdma_dealloc_pd = &qed_rdma_free_pd, + .rdma_alloc_xrcd = &qed_rdma_alloc_xrcd, + .rdma_dealloc_xrcd = &qed_rdma_free_xrcd, .rdma_create_cq = &qed_rdma_create_cq, .rdma_destroy_cq = &qed_rdma_destroy_cq, .rdma_create_qp = &qed_rdma_create_qp, diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.h b/drivers/net/ethernet/qlogic/qed/qed_rdma.h index 3689fe3e5935..5a7ebc764bb6 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.h +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.h @@ -63,6 +63,11 @@ #define QED_RDMA_MAX_CQE_32_BIT (0x7FFFFFFF - 1) #define QED_RDMA_MAX_CQE_16_BIT (0x7FFF - 1) +/* Up to 2^16 XRC Domains are supported, but the actual number of supported XRC + * SRQs is much smaller so there's no need to have that many domains. + */ +#define QED_RDMA_MAX_XRCDS (roundup_pow_of_two(RDMA_MAX_XRC_SRQS)) + enum qed_rdma_toggle_bit { QED_RDMA_TOGGLE_BIT_CLEAR = 0, QED_RDMA_TOGGLE_BIT_SET = 1 @@ -81,9 +86,11 @@ struct qed_rdma_info { struct qed_bmap cq_map; struct qed_bmap pd_map; + struct qed_bmap xrcd_map; struct qed_bmap tid_map; struct qed_bmap qp_map; struct qed_bmap srq_map; + struct qed_bmap xrc_srq_map; struct qed_bmap cid_map; struct qed_bmap tcp_cid_map; struct qed_bmap real_cid_map; @@ -111,6 +118,7 @@ struct qed_rdma_qp { u32 qpid; u16 icid; enum qed_roce_qp_state cur_state; + enum qed_rdma_qp_type qp_type; enum qed_iwarp_qp_state iwarp_state; bool use_srq; bool signal_all; @@ -153,18 +161,21 @@ struct qed_rdma_qp { dma_addr_t orq_phys_addr; u8 orq_num_pages; bool req_offloaded; + bool has_req; /* responder */ u8 max_rd_atomic_resp; u32 rq_psn; u16 rq_cq_id; u16 rq_num_pages; + u16 xrcd_id; dma_addr_t rq_pbl_ptr; void *irq; dma_addr_t irq_phys_addr; u8 irq_num_pages; bool resp_offloaded; u32 cq_prod; + bool has_resp; u8 remote_mac_addr[6]; u8 local_mac_addr[6]; @@ -174,6 +185,14 @@ struct qed_rdma_qp { struct qed_iwarp_ep *ep; }; +static inline bool qed_rdma_is_xrc_qp(struct qed_rdma_qp *qp) +{ + if (qp->qp_type == QED_RDMA_QP_TYPE_XRC_TGT || + qp->qp_type == QED_RDMA_QP_TYPE_XRC_INI) + return true; + + return false; +} #if IS_ENABLED(CONFIG_QED_RDMA) void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); void qed_rdma_dpm_conf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index 475b89903f46..46a4d09eacef 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -254,6 +254,9 @@ static int qed_roce_sp_create_responder(struct qed_hwfn *p_hwfn, int rc; u8 tc; + if (!qp->has_resp) + return 0; + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); /* Allocate DMA-able memory for IRQ */ @@ -315,6 +318,10 @@ static int qed_roce_sp_create_responder(struct qed_hwfn *p_hwfn, ROCE_CREATE_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER, qp->min_rnr_nak_timer); + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_XRC_FLAG, + qed_rdma_is_xrc_qp(qp)); + p_ramrod->max_ird = qp->max_rd_atomic_resp; p_ramrod->traffic_class = qp->traffic_class_tos; p_ramrod->hop_limit = qp->hop_limit_ttl; @@ -335,6 +342,7 @@ static int qed_roce_sp_create_responder(struct qed_hwfn *p_hwfn, p_ramrod->qp_handle_for_cqe.lo = cpu_to_le32(qp->qp_handle.lo); p_ramrod->cq_cid = cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) | qp->rq_cq_id); + p_ramrod->xrc_domain = cpu_to_le16(qp->xrcd_id); tc = qed_roce_get_qp_tc(p_hwfn, qp); regular_latency_queue = qed_get_cm_pq_idx_ofld_mtc(p_hwfn, tc); @@ -395,6 +403,9 @@ static int qed_roce_sp_create_requester(struct qed_hwfn *p_hwfn, int rc; u8 tc; + if (!qp->has_req) + return 0; + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); /* Allocate DMA-able memory for ORQ */ @@ -444,6 +455,10 @@ static int qed_roce_sp_create_requester(struct qed_hwfn *p_hwfn, ROCE_CREATE_QP_REQ_RAMROD_DATA_RNR_NAK_CNT, qp->rnr_retry_cnt); + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_REQ_RAMROD_DATA_XRC_FLAG, + qed_rdma_is_xrc_qp(qp)); + p_ramrod->max_ord = qp->max_rd_atomic_req; p_ramrod->traffic_class = qp->traffic_class_tos; p_ramrod->hop_limit = qp->hop_limit_ttl; @@ -517,6 +532,9 @@ static int qed_roce_sp_modify_responder(struct qed_hwfn *p_hwfn, struct qed_spq_entry *p_ent; int rc; + if (!qp->has_resp) + return 0; + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); if (move_to_err && !qp->resp_offloaded) @@ -611,6 +629,9 @@ static int qed_roce_sp_modify_requester(struct qed_hwfn *p_hwfn, struct qed_spq_entry *p_ent; int rc; + if (!qp->has_req) + return 0; + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); if (move_to_err && !(qp->req_offloaded)) @@ -705,6 +726,11 @@ static int qed_roce_sp_destroy_qp_responder(struct qed_hwfn *p_hwfn, dma_addr_t ramrod_res_phys; int rc; + if (!qp->has_resp) { + *cq_prod = 0; + return 0; + } + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); *cq_prod = qp->cq_prod; @@ -785,6 +811,9 @@ static int qed_roce_sp_destroy_qp_requester(struct qed_hwfn *p_hwfn, dma_addr_t ramrod_res_phys; int rc = -ENOMEM; + if (!qp->has_req) + return 0; + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); if (!qp->req_offloaded) diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index 60d342f82fb3..e291e6ac40cb 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -2054,10 +2054,9 @@ static void cp_remove_one (struct pci_dev *pdev) free_netdev(dev); } -#ifdef CONFIG_PM -static int cp_suspend (struct pci_dev *pdev, pm_message_t state) +static int __maybe_unused cp_suspend(struct device *device) { - struct net_device *dev = pci_get_drvdata(pdev); + struct net_device *dev = dev_get_drvdata(device); struct cp_private *cp = netdev_priv(dev); unsigned long flags; @@ -2075,16 +2074,14 @@ static int cp_suspend (struct pci_dev *pdev, pm_message_t state) spin_unlock_irqrestore (&cp->lock, flags); - pci_save_state(pdev); - pci_enable_wake(pdev, pci_choose_state(pdev, state), cp->wol_enabled); - pci_set_power_state(pdev, pci_choose_state(pdev, state)); + device_set_wakeup_enable(device, cp->wol_enabled); return 0; } -static int cp_resume (struct pci_dev *pdev) +static int __maybe_unused cp_resume(struct device *device) { - struct net_device *dev = pci_get_drvdata (pdev); + struct net_device *dev = dev_get_drvdata(device); struct cp_private *cp = netdev_priv(dev); unsigned long flags; @@ -2093,10 +2090,6 @@ static int cp_resume (struct pci_dev *pdev) netif_device_attach (dev); - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - pci_enable_wake(pdev, PCI_D0, 0); - /* FIXME: sh*t may happen if the Rx ring buffer is depleted */ cp_init_rings_index (cp); cp_init_hw (cp); @@ -2111,7 +2104,6 @@ static int cp_resume (struct pci_dev *pdev) return 0; } -#endif /* CONFIG_PM */ static const struct pci_device_id cp_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_REALTEK, PCI_DEVICE_ID_REALTEK_8139), }, @@ -2120,15 +2112,14 @@ static const struct pci_device_id cp_pci_tbl[] = { }; MODULE_DEVICE_TABLE(pci, cp_pci_tbl); +static SIMPLE_DEV_PM_OPS(cp_pm_ops, cp_suspend, cp_resume); + static struct pci_driver cp_driver = { .name = DRV_NAME, .id_table = cp_pci_tbl, .probe = cp_init_one, .remove = cp_remove_one, -#ifdef CONFIG_PM - .resume = cp_resume, - .suspend = cp_suspend, -#endif + .driver.pm = &cp_pm_ops, }; module_pci_driver(cp_driver); diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index 5caeb8368eab..227139d42227 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -2603,17 +2603,13 @@ static void rtl8139_set_rx_mode (struct net_device *dev) spin_unlock_irqrestore (&tp->lock, flags); } -#ifdef CONFIG_PM - -static int rtl8139_suspend (struct pci_dev *pdev, pm_message_t state) +static int __maybe_unused rtl8139_suspend(struct device *device) { - struct net_device *dev = pci_get_drvdata (pdev); + struct net_device *dev = dev_get_drvdata(device); struct rtl8139_private *tp = netdev_priv(dev); void __iomem *ioaddr = tp->mmio_addr; unsigned long flags; - pci_save_state (pdev); - if (!netif_running (dev)) return 0; @@ -2631,38 +2627,30 @@ static int rtl8139_suspend (struct pci_dev *pdev, pm_message_t state) spin_unlock_irqrestore (&tp->lock, flags); - pci_set_power_state (pdev, PCI_D3hot); - return 0; } - -static int rtl8139_resume (struct pci_dev *pdev) +static int __maybe_unused rtl8139_resume(struct device *device) { - struct net_device *dev = pci_get_drvdata (pdev); + struct net_device *dev = dev_get_drvdata(device); - pci_restore_state (pdev); if (!netif_running (dev)) return 0; - pci_set_power_state (pdev, PCI_D0); + rtl8139_init_ring (dev); rtl8139_hw_start (dev); netif_device_attach (dev); return 0; } -#endif /* CONFIG_PM */ - +static SIMPLE_DEV_PM_OPS(rtl8139_pm_ops, rtl8139_suspend, rtl8139_resume); static struct pci_driver rtl8139_pci_driver = { .name = DRV_NAME, .id_table = rtl8139_pci_tbl, .probe = rtl8139_init_one, .remove = rtl8139_remove_one, -#ifdef CONFIG_PM - .suspend = rtl8139_suspend, - .resume = rtl8139_resume, -#endif /* CONFIG_PM */ + .driver.pm = &rtl8139_pm_ops, }; diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 97a7e27fff16..79817d4ffa47 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -1871,6 +1871,15 @@ static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec) RTL_W16(tp, IntrMitigate, w); + /* Meaning of PktCntrDisable bit changed from RTL8168e-vl */ + if (rtl_is_8168evl_up(tp)) { + if (!rx_fr && !tx_fr) + /* disable packet counter */ + tp->cp_cmd |= PktCntrDisable; + else + tp->cp_cmd &= ~PktCntrDisable; + } + tp->cp_cmd = (tp->cp_cmd & ~INTT_MASK) | cp01; RTL_W16(tp, CPlusCmd, tp->cp_cmd); rtl_pci_commit(tp); @@ -3824,15 +3833,14 @@ static int rtl8169_change_mtu(struct net_device *dev, int new_mtu) return 0; } -static inline void rtl8169_mark_to_asic(struct RxDesc *desc) +static void rtl8169_mark_to_asic(struct RxDesc *desc) { u32 eor = le32_to_cpu(desc->opts1) & RingEnd; desc->opts2 = 0; /* Force memory writes to complete before releasing descriptor */ dma_wmb(); - - desc->opts1 = cpu_to_le32(DescOwn | eor | R8169_RX_BUF_SIZE); + WRITE_ONCE(desc->opts1, cpu_to_le32(DescOwn | eor | R8169_RX_BUF_SIZE)); } static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp, @@ -4413,15 +4421,17 @@ static inline void rtl8169_rx_csum(struct sk_buff *skb, u32 opts1) static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget) { - unsigned int cur_rx, rx_left; - unsigned int count; + unsigned int cur_rx, rx_left, count; + struct device *d = tp_to_dev(tp); cur_rx = tp->cur_rx; for (rx_left = min(budget, NUM_RX_DESC); rx_left > 0; rx_left--, cur_rx++) { - unsigned int entry = cur_rx % NUM_RX_DESC; - const void *rx_buf = page_address(tp->Rx_databuff[entry]); + unsigned int pkt_size, entry = cur_rx % NUM_RX_DESC; struct RxDesc *desc = tp->RxDescArray + entry; + struct sk_buff *skb; + const void *rx_buf; + dma_addr_t addr; u32 status; status = le32_to_cpu(desc->opts1); @@ -4443,62 +4453,57 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget dev->stats.rx_length_errors++; if (status & RxCRC) dev->stats.rx_crc_errors++; - if (status & (RxRUNT | RxCRC) && !(status & RxRWT) && - dev->features & NETIF_F_RXALL) { - goto process_pkt; - } - } else { - unsigned int pkt_size; - struct sk_buff *skb; - -process_pkt: - pkt_size = status & GENMASK(13, 0); - if (likely(!(dev->features & NETIF_F_RXFCS))) - pkt_size -= ETH_FCS_LEN; - /* - * The driver does not support incoming fragmented - * frames. They are seen as a symptom of over-mtu - * sized frames. - */ - if (unlikely(rtl8169_fragmented_frame(status))) { - dev->stats.rx_dropped++; - dev->stats.rx_length_errors++; - goto release_descriptor; - } - skb = napi_alloc_skb(&tp->napi, pkt_size); - if (unlikely(!skb)) { - dev->stats.rx_dropped++; + if (!(dev->features & NETIF_F_RXALL)) goto release_descriptor; - } + else if (status & RxRWT || !(status & (RxRUNT | RxCRC))) + goto release_descriptor; + } - dma_sync_single_for_cpu(tp_to_dev(tp), - le64_to_cpu(desc->addr), - pkt_size, DMA_FROM_DEVICE); - prefetch(rx_buf); - skb_copy_to_linear_data(skb, rx_buf, pkt_size); - skb->tail += pkt_size; - skb->len = pkt_size; + pkt_size = status & GENMASK(13, 0); + if (likely(!(dev->features & NETIF_F_RXFCS))) + pkt_size -= ETH_FCS_LEN; - dma_sync_single_for_device(tp_to_dev(tp), - le64_to_cpu(desc->addr), - pkt_size, DMA_FROM_DEVICE); + /* The driver does not support incoming fragmented frames. + * They are seen as a symptom of over-mtu sized frames. + */ + if (unlikely(rtl8169_fragmented_frame(status))) { + dev->stats.rx_dropped++; + dev->stats.rx_length_errors++; + goto release_descriptor; + } - rtl8169_rx_csum(skb, status); - skb->protocol = eth_type_trans(skb, dev); + skb = napi_alloc_skb(&tp->napi, pkt_size); + if (unlikely(!skb)) { + dev->stats.rx_dropped++; + goto release_descriptor; + } - rtl8169_rx_vlan_tag(desc, skb); + addr = le64_to_cpu(desc->addr); + rx_buf = page_address(tp->Rx_databuff[entry]); - if (skb->pkt_type == PACKET_MULTICAST) - dev->stats.multicast++; + dma_sync_single_for_cpu(d, addr, pkt_size, DMA_FROM_DEVICE); + prefetch(rx_buf); + skb_copy_to_linear_data(skb, rx_buf, pkt_size); + skb->tail += pkt_size; + skb->len = pkt_size; + dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE); - napi_gro_receive(&tp->napi, skb); + rtl8169_rx_csum(skb, status); + skb->protocol = eth_type_trans(skb, dev); + + rtl8169_rx_vlan_tag(desc, skb); + + if (skb->pkt_type == PACKET_MULTICAST) + dev->stats.multicast++; + + napi_gro_receive(&tp->napi, skb); + + u64_stats_update_begin(&tp->rx_stats.syncp); + tp->rx_stats.packets++; + tp->rx_stats.bytes += pkt_size; + u64_stats_update_end(&tp->rx_stats.syncp); - u64_stats_update_begin(&tp->rx_stats.syncp); - tp->rx_stats.packets++; - tp->rx_stats.bytes += pkt_size; - u64_stats_update_end(&tp->rx_stats.syncp); - } release_descriptor: rtl8169_mark_to_asic(desc); } @@ -4990,7 +4995,6 @@ static void rtl_remove_one(struct pci_dev *pdev) netif_napi_del(&tp->napi); unregister_netdev(dev); - mdiobus_unregister(tp->phydev->mdio.bus); rtl_release_firmware(tp); diff --git a/drivers/net/ethernet/seeq/ether3.c b/drivers/net/ethernet/seeq/ether3.c index 128ee7cda1ed..65c98837ec45 100644 --- a/drivers/net/ethernet/seeq/ether3.c +++ b/drivers/net/ethernet/seeq/ether3.c @@ -610,12 +610,9 @@ static int ether3_rx(struct net_device *dev, unsigned int maxcnt) ether3_readbuffer(dev, addrs+2, 12); if (next_ptr < RX_START || next_ptr >= RX_END) { - int i; printk("%s: bad next pointer @%04X: ", dev->name, priv(dev)->rx_head); printk("%02X %02X %02X %02X ", next_ptr >> 8, next_ptr & 255, status & 255, status >> 8); - for (i = 2; i < 14; i++) - printk("%02X ", addrs[i]); - printk("\n"); + printk("%pM %pM\n", addrs + 2, addrs + 8); next_ptr = priv(dev)->rx_head; break; } diff --git a/drivers/net/ethernet/ti/am65-cpts.h b/drivers/net/ethernet/ti/am65-cpts.h index 98c1960b20b9..cf9fbc28fd03 100644 --- a/drivers/net/ethernet/ti/am65-cpts.h +++ b/drivers/net/ethernet/ti/am65-cpts.h @@ -55,18 +55,18 @@ static inline void am65_cpts_rx_enable(struct am65_cpts *cpts, bool en) { } -static s64 am65_cpts_ns_gettime(struct am65_cpts *cpts) +static inline s64 am65_cpts_ns_gettime(struct am65_cpts *cpts) { return 0; } -static int am65_cpts_estf_enable(struct am65_cpts *cpts, - int idx, struct am65_cpts_estf_cfg *cfg) +static inline int am65_cpts_estf_enable(struct am65_cpts *cpts, int idx, + struct am65_cpts_estf_cfg *cfg) { return 0; } -static void am65_cpts_estf_disable(struct am65_cpts *cpts, int idx) +static inline void am65_cpts_estf_disable(struct am65_cpts *cpts, int idx) { } #endif diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 66570609c845..d343dc94cb48 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -238,11 +238,6 @@ static void gsi_irq_ieob_enable(struct gsi *gsi, u32 evt_ring_id) iowrite32(val, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET); } -static void gsi_isr_ieob_clear(struct gsi *gsi, u32 mask) -{ - iowrite32(mask, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_CLR_OFFSET); -} - static void gsi_irq_ieob_disable(struct gsi *gsi, u32 evt_ring_id) { u32 val; @@ -777,7 +772,6 @@ static void gsi_channel_deprogram(struct gsi_channel *channel) int gsi_channel_start(struct gsi *gsi, u32 channel_id) { struct gsi_channel *channel = &gsi->channel[channel_id]; - u32 evt_ring_id = channel->evt_ring_id; int ret; mutex_lock(&gsi->mutex); @@ -786,9 +780,6 @@ int gsi_channel_start(struct gsi *gsi, u32 channel_id) mutex_unlock(&gsi->mutex); - /* Clear the channel's event ring interrupt in case it's pending */ - gsi_isr_ieob_clear(gsi, BIT(evt_ring_id)); - gsi_channel_thaw(channel); return ret; @@ -1093,7 +1084,7 @@ static void gsi_isr_ieob(struct gsi *gsi) u32 event_mask; event_mask = ioread32(gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_OFFSET); - gsi_isr_ieob_clear(gsi, event_mask); + iowrite32(event_mask, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_CLR_OFFSET); while (event_mask) { u32 evt_ring_id = __ffs(event_mask); diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c index e0b1fe3c34f9..76d5108b8403 100644 --- a/drivers/net/ipa/ipa_main.c +++ b/drivers/net/ipa/ipa_main.c @@ -933,8 +933,8 @@ static int ipa_resume(struct device *dev) } static const struct dev_pm_ops ipa_pm_ops = { - .suspend_noirq = ipa_suspend, - .resume_noirq = ipa_resume, + .suspend = ipa_suspend, + .resume = ipa_resume, }; static struct platform_driver ipa_driver = { diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 8cd8d188542a..cd271de9609b 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -356,7 +356,7 @@ static int bcm54811_config_init(struct phy_device *phydev) BCM54612E_LED4_CLK125OUT_EN | reg); if (err < 0) return err; - } + } return err; } diff --git a/drivers/net/phy/dp83869.c b/drivers/net/phy/dp83869.c index 7996a4aea8d2..cfb22a21a2e6 100644 --- a/drivers/net/phy/dp83869.c +++ b/drivers/net/phy/dp83869.c @@ -65,7 +65,9 @@ #define DP83869_RGMII_RX_CLK_DELAY_EN BIT(0) /* STRAP_STS1 bits */ +#define DP83869_STRAP_OP_MODE_MASK GENMASK(2, 0) #define DP83869_STRAP_STS1_RESERVED BIT(11) +#define DP83869_STRAP_MIRROR_ENABLED BIT(12) /* PHYCTRL bits */ #define DP83869_RX_FIFO_SHIFT 12 @@ -160,6 +162,20 @@ static int dp83869_config_port_mirroring(struct phy_device *phydev) DP83869_CFG3_PORT_MIRROR_EN); } +static int dp83869_set_strapped_mode(struct phy_device *phydev) +{ + struct dp83869_private *dp83869 = phydev->priv; + int val; + + val = phy_read_mmd(phydev, DP83869_DEVADDR, DP83869_STRAP_STS1); + if (val < 0) + return val; + + dp83869->mode = val & DP83869_STRAP_OP_MODE_MASK; + + return 0; +} + #ifdef CONFIG_OF_MDIO static int dp83869_of_init(struct phy_device *phydev) { @@ -184,6 +200,10 @@ static int dp83869_of_init(struct phy_device *phydev) if (dp83869->mode < DP83869_RGMII_COPPER_ETHERNET || dp83869->mode > DP83869_SGMII_COPPER_ETHERNET) return -EINVAL; + } else { + ret = dp83869_set_strapped_mode(phydev); + if (ret) + return ret; } if (of_property_read_bool(of_node, "ti,max-output-impedance")) @@ -191,10 +211,18 @@ static int dp83869_of_init(struct phy_device *phydev) else if (of_property_read_bool(of_node, "ti,min-output-impedance")) dp83869->io_impedance = DP83869_IO_MUX_CFG_IO_IMPEDANCE_MIN; - if (of_property_read_bool(of_node, "enet-phy-lane-swap")) + if (of_property_read_bool(of_node, "enet-phy-lane-swap")) { dp83869->port_mirroring = DP83869_PORT_MIRRORING_EN; - else - dp83869->port_mirroring = DP83869_PORT_MIRRORING_DIS; + } else { + /* If the lane swap is not in the DT then check the straps */ + ret = phy_read_mmd(phydev, DP83869_DEVADDR, DP83869_STRAP_STS1); + if (ret < 0) + return ret; + if (ret & DP83869_STRAP_MIRROR_ENABLED) + dp83869->port_mirroring = DP83869_PORT_MIRRORING_EN; + else + dp83869->port_mirroring = DP83869_PORT_MIRRORING_DIS; + } if (of_property_read_u32(of_node, "rx-fifo-depth", &dp83869->rx_fifo_depth)) @@ -209,7 +237,7 @@ static int dp83869_of_init(struct phy_device *phydev) #else static int dp83869_of_init(struct phy_device *phydev) { - return 0; + return dp83869_set_strapped_mode(phydev); } #endif /* CONFIG_OF_MDIO */ diff --git a/drivers/net/phy/nxp-tja11xx.c b/drivers/net/phy/nxp-tja11xx.c index 0d4f9067ca71..1e79c30ca81a 100644 --- a/drivers/net/phy/nxp-tja11xx.c +++ b/drivers/net/phy/nxp-tja11xx.c @@ -53,6 +53,8 @@ #define MII_COMMSTAT 23 #define MII_COMMSTAT_LINK_UP BIT(15) +#define MII_COMMSTAT_SQI_STATE GENMASK(7, 5) +#define MII_COMMSTAT_SQI_MAX 7 #define MII_GENSTAT 24 #define MII_GENSTAT_PLL_LOCKED BIT(14) @@ -329,6 +331,22 @@ static int tja11xx_read_status(struct phy_device *phydev) return 0; } +static int tja11xx_get_sqi(struct phy_device *phydev) +{ + int ret; + + ret = phy_read(phydev, MII_COMMSTAT); + if (ret < 0) + return ret; + + return FIELD_GET(MII_COMMSTAT_SQI_STATE, ret); +} + +static int tja11xx_get_sqi_max(struct phy_device *phydev) +{ + return MII_COMMSTAT_SQI_MAX; +} + static int tja11xx_get_sset_count(struct phy_device *phydev) { return ARRAY_SIZE(tja11xx_hw_stats); @@ -683,6 +701,8 @@ static struct phy_driver tja11xx_driver[] = { .config_aneg = tja11xx_config_aneg, .config_init = tja11xx_config_init, .read_status = tja11xx_read_status, + .get_sqi = tja11xx_get_sqi, + .get_sqi_max = tja11xx_get_sqi_max, .suspend = genphy_suspend, .resume = genphy_resume, .set_loopback = genphy_loopback, @@ -699,6 +719,8 @@ static struct phy_driver tja11xx_driver[] = { .config_aneg = tja11xx_config_aneg, .config_init = tja11xx_config_init, .read_status = tja11xx_read_status, + .get_sqi = tja11xx_get_sqi, + .get_sqi_max = tja11xx_get_sqi_max, .suspend = genphy_suspend, .resume = genphy_resume, .set_loopback = genphy_loopback, @@ -715,6 +737,8 @@ static struct phy_driver tja11xx_driver[] = { .config_aneg = tja11xx_config_aneg, .config_init = tja11xx_config_init, .read_status = tja11xx_read_status, + .get_sqi = tja11xx_get_sqi, + .get_sqi_max = tja11xx_get_sqi_max, .match_phy_device = tja1102_p0_match_phy_device, .suspend = genphy_suspend, .resume = genphy_resume, @@ -736,6 +760,8 @@ static struct phy_driver tja11xx_driver[] = { .config_aneg = tja11xx_config_aneg, .config_init = tja11xx_config_init, .read_status = tja11xx_read_status, + .get_sqi = tja11xx_get_sqi, + .get_sqi_max = tja11xx_get_sqi_max, .match_phy_device = tja1102_p1_match_phy_device, .suspend = genphy_suspend, .resume = genphy_resume, diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index d4bbf79dab6c..d584701187db 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -58,13 +58,13 @@ static const char *phy_state_to_str(enum phy_state st) static void phy_link_up(struct phy_device *phydev) { - phydev->phy_link_change(phydev, true, true); + phydev->phy_link_change(phydev, true); phy_led_trigger_change_speed(phydev); } -static void phy_link_down(struct phy_device *phydev, bool do_carrier) +static void phy_link_down(struct phy_device *phydev) { - phydev->phy_link_change(phydev, false, do_carrier); + phydev->phy_link_change(phydev, false); phy_led_trigger_change_speed(phydev); } @@ -524,7 +524,7 @@ int phy_start_cable_test(struct phy_device *phydev, goto out; /* Mark the carrier down until the test is complete */ - phy_link_down(phydev, true); + phy_link_down(phydev); netif_testing_on(dev); err = phydev->drv->cable_test_start(phydev); @@ -595,7 +595,7 @@ static int phy_check_link_status(struct phy_device *phydev) phy_link_up(phydev); } else if (!phydev->link && phydev->state != PHY_NOLINK) { phydev->state = PHY_NOLINK; - phy_link_down(phydev, true); + phy_link_down(phydev); } return 0; @@ -999,7 +999,7 @@ void phy_state_machine(struct work_struct *work) case PHY_HALTED: if (phydev->link) { phydev->link = 0; - phy_link_down(phydev, true); + phy_link_down(phydev); } do_suspend = true; break; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index c3a107cf578e..7481135d27ab 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -916,16 +916,14 @@ struct phy_device *phy_find_first(struct mii_bus *bus) } EXPORT_SYMBOL(phy_find_first); -static void phy_link_change(struct phy_device *phydev, bool up, bool do_carrier) +static void phy_link_change(struct phy_device *phydev, bool up) { struct net_device *netdev = phydev->attached_dev; - if (do_carrier) { - if (up) - netif_carrier_on(netdev); - else - netif_carrier_off(netdev); - } + if (up) + netif_carrier_on(netdev); + else + netif_carrier_off(netdev); phydev->adjust_link(netdev); if (phydev->mii_ts && phydev->mii_ts->link_state) phydev->mii_ts->link_state(phydev->mii_ts, phydev); diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 0f23bec431c1..b6b1f77bba58 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -803,8 +803,7 @@ void phylink_destroy(struct phylink *pl) } EXPORT_SYMBOL_GPL(phylink_destroy); -static void phylink_phy_change(struct phy_device *phydev, bool up, - bool do_carrier) +static void phylink_phy_change(struct phy_device *phydev, bool up) { struct phylink *pl = phydev->phylink; bool tx_pause, rx_pause; diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index a5b415fed11e..3e88fbef2d4a 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -26,6 +26,7 @@ #include <net/netns/generic.h> #include <net/tun_proto.h> #include <net/vxlan.h> +#include <net/nexthop.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ip6_tunnel.h> @@ -78,6 +79,9 @@ struct vxlan_fdb { u16 state; /* see ndm_state */ __be32 vni; u16 flags; /* see ndm_flags and below */ + struct list_head nh_list; + struct nexthop __rcu *nh; + struct vxlan_dev *vdev; }; #define NTF_VXLAN_ADDED_BY_USER 0x100 @@ -174,11 +178,15 @@ static inline struct hlist_head *vs_head(struct net *net, __be16 port) */ static inline struct vxlan_rdst *first_remote_rcu(struct vxlan_fdb *fdb) { + if (rcu_access_pointer(fdb->nh)) + return NULL; return list_entry_rcu(fdb->remotes.next, struct vxlan_rdst, list); } static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb) { + if (rcu_access_pointer(fdb->nh)) + return NULL; return list_first_entry(&fdb->remotes, struct vxlan_rdst, list); } @@ -251,9 +259,10 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, { unsigned long now = jiffies; struct nda_cacheinfo ci; + bool send_ip, send_eth; struct nlmsghdr *nlh; + struct nexthop *nh; struct ndmsg *ndm; - bool send_ip, send_eth; nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags); if (nlh == NULL) @@ -264,16 +273,21 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, send_eth = send_ip = true; + nh = rcu_dereference_rtnl(fdb->nh); if (type == RTM_GETNEIGH) { - send_ip = !vxlan_addr_any(&rdst->remote_ip); + if (rdst) { + send_ip = !vxlan_addr_any(&rdst->remote_ip); + ndm->ndm_family = send_ip ? rdst->remote_ip.sa.sa_family : AF_INET; + } else if (nh) { + ndm->ndm_family = nexthop_get_family(nh); + } send_eth = !is_zero_ether_addr(fdb->eth_addr); - ndm->ndm_family = send_ip ? rdst->remote_ip.sa.sa_family : AF_INET; } else ndm->ndm_family = AF_BRIDGE; ndm->ndm_state = fdb->state; ndm->ndm_ifindex = vxlan->dev->ifindex; ndm->ndm_flags = fdb->flags; - if (rdst->offloaded) + if (rdst && rdst->offloaded) ndm->ndm_flags |= NTF_OFFLOADED; ndm->ndm_type = RTN_UNICAST; @@ -284,23 +298,30 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr)) goto nla_put_failure; + if (nh) { + if (nla_put_u32(skb, NDA_NH_ID, nh->id)) + goto nla_put_failure; + } else if (rdst) { + if (send_ip && vxlan_nla_put_addr(skb, NDA_DST, + &rdst->remote_ip)) + goto nla_put_failure; + + if (rdst->remote_port && + rdst->remote_port != vxlan->cfg.dst_port && + nla_put_be16(skb, NDA_PORT, rdst->remote_port)) + goto nla_put_failure; + if (rdst->remote_vni != vxlan->default_dst.remote_vni && + nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni))) + goto nla_put_failure; + if (rdst->remote_ifindex && + nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex)) + goto nla_put_failure; + } - if (send_ip && vxlan_nla_put_addr(skb, NDA_DST, &rdst->remote_ip)) - goto nla_put_failure; - - if (rdst->remote_port && rdst->remote_port != vxlan->cfg.dst_port && - nla_put_be16(skb, NDA_PORT, rdst->remote_port)) - goto nla_put_failure; - if (rdst->remote_vni != vxlan->default_dst.remote_vni && - nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni))) - goto nla_put_failure; if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) && fdb->vni && nla_put_u32(skb, NDA_SRC_VNI, be32_to_cpu(fdb->vni))) goto nla_put_failure; - if (rdst->remote_ifindex && - nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex)) - goto nla_put_failure; ci.ndm_used = jiffies_to_clock_t(now - fdb->used); ci.ndm_confirmed = 0; @@ -401,7 +422,7 @@ static int vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb, { int err; - if (swdev_notify) { + if (swdev_notify && rd) { switch (type) { case RTM_NEWNEIGH: err = vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd, @@ -793,8 +814,9 @@ static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff) return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr)); } -static struct vxlan_fdb *vxlan_fdb_alloc(const u8 *mac, __u16 state, - __be32 src_vni, __u16 ndm_flags) +static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan, const u8 *mac, + __u16 state, __be32 src_vni, + __u16 ndm_flags) { struct vxlan_fdb *f; @@ -805,6 +827,9 @@ static struct vxlan_fdb *vxlan_fdb_alloc(const u8 *mac, __u16 state, f->flags = ndm_flags; f->updated = f->used = jiffies; f->vni = src_vni; + f->nh = NULL; + f->vdev = vxlan; + INIT_LIST_HEAD(&f->nh_list); INIT_LIST_HEAD(&f->remotes); memcpy(f->eth_addr, mac, ETH_ALEN); @@ -819,11 +844,78 @@ static void vxlan_fdb_insert(struct vxlan_dev *vxlan, const u8 *mac, vxlan_fdb_head(vxlan, mac, src_vni)); } +static int vxlan_fdb_nh_update(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb, + u32 nhid, struct netlink_ext_ack *extack) +{ + struct nexthop *old_nh = rtnl_dereference(fdb->nh); + struct nh_group *nhg; + struct nexthop *nh; + int err = -EINVAL; + + if (old_nh && old_nh->id == nhid) + return 0; + + nh = nexthop_find_by_id(vxlan->net, nhid); + if (!nh) { + NL_SET_ERR_MSG(extack, "Nexthop id does not exist"); + goto err_inval; + } + + if (nh) { + if (!nexthop_get(nh)) { + NL_SET_ERR_MSG(extack, "Nexthop has been deleted"); + nh = NULL; + goto err_inval; + } + if (!nh->is_fdb_nh) { + NL_SET_ERR_MSG(extack, "Nexthop is not a fdb nexthop"); + goto err_inval; + } + + if (!nh->is_group || !nh->nh_grp->mpath) { + NL_SET_ERR_MSG(extack, "Nexthop is not a multipath group"); + goto err_inval; + } + + /* check nexthop group family */ + nhg = rtnl_dereference(nh->nh_grp); + switch (vxlan->default_dst.remote_ip.sa.sa_family) { + case AF_INET: + if (!nhg->has_v4) { + err = -EAFNOSUPPORT; + NL_SET_ERR_MSG(extack, "Nexthop group family not supported"); + goto err_inval; + } + break; + case AF_INET6: + if (nhg->has_v4) { + err = -EAFNOSUPPORT; + NL_SET_ERR_MSG(extack, "Nexthop group family not supported"); + goto err_inval; + } + } + } + + if (old_nh) { + list_del_rcu(&fdb->nh_list); + nexthop_put(old_nh); + } + rcu_assign_pointer(fdb->nh, nh); + list_add_tail_rcu(&fdb->nh_list, &nh->fdb_list); + return 1; + +err_inval: + if (nh) + nexthop_put(nh); + return err; +} + static int vxlan_fdb_create(struct vxlan_dev *vxlan, const u8 *mac, union vxlan_addr *ip, __u16 state, __be16 port, __be32 src_vni, __be32 vni, __u32 ifindex, __u16 ndm_flags, - struct vxlan_fdb **fdb) + u32 nhid, struct vxlan_fdb **fdb, + struct netlink_ext_ack *extack) { struct vxlan_rdst *rd = NULL; struct vxlan_fdb *f; @@ -834,24 +926,37 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, return -ENOSPC; netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip); - f = vxlan_fdb_alloc(mac, state, src_vni, ndm_flags); + f = vxlan_fdb_alloc(vxlan, mac, state, src_vni, ndm_flags); if (!f) return -ENOMEM; - rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd); - if (rc < 0) { - kfree(f); - return rc; - } + if (nhid) + rc = vxlan_fdb_nh_update(vxlan, f, nhid, extack); + else + rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd); + if (rc < 0) + goto errout; *fdb = f; return 0; + +errout: + kfree(f); + return rc; } static void __vxlan_fdb_free(struct vxlan_fdb *f) { struct vxlan_rdst *rd, *nd; + struct nexthop *nh; + + nh = rcu_dereference_raw(f->nh); + if (nh) { + rcu_assign_pointer(f->nh, NULL); + list_del_rcu(&f->nh_list); + nexthop_put(nh); + } list_for_each_entry_safe(rd, nd, &f->remotes, list) { dst_cache_destroy(&rd->dst_cache); @@ -875,12 +980,18 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f, netdev_dbg(vxlan->dev, "delete %pM\n", f->eth_addr); --vxlan->addrcnt; - if (do_notify) - list_for_each_entry(rd, &f->remotes, list) - vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH, + if (do_notify) { + if (rcu_access_pointer(f->nh)) + vxlan_fdb_notify(vxlan, f, NULL, RTM_DELNEIGH, swdev_notify, NULL); + else + list_for_each_entry(rd, &f->remotes, list) + vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH, + swdev_notify, NULL); + } hlist_del_rcu(&f->hlist); + f->vdev = NULL; call_rcu(&f->rcu, vxlan_fdb_free); } @@ -897,7 +1008,7 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan, __u16 state, __u16 flags, __be16 port, __be32 vni, __u32 ifindex, __u16 ndm_flags, - struct vxlan_fdb *f, + struct vxlan_fdb *f, u32 nhid, bool swdev_notify, struct netlink_ext_ack *extack) { @@ -908,6 +1019,18 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan, int rc = 0; int err; + if (nhid && !rcu_access_pointer(f->nh)) { + NL_SET_ERR_MSG(extack, + "Cannot replace an existing non nexthop fdb with a nexthop"); + return -EOPNOTSUPP; + } + + if (nhid && (flags & NLM_F_APPEND)) { + NL_SET_ERR_MSG(extack, + "Cannot append to a nexthop fdb"); + return -EOPNOTSUPP; + } + /* Do not allow an externally learned entry to take over an entry added * by the user. */ @@ -929,10 +1052,17 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan, /* Only change unicasts */ if (!(is_multicast_ether_addr(f->eth_addr) || is_zero_ether_addr(f->eth_addr))) { - rc = vxlan_fdb_replace(f, ip, port, vni, - ifindex, &oldrd); + if (nhid) { + rc = vxlan_fdb_nh_update(vxlan, f, nhid, extack); + if (rc < 0) + return rc; + } else { + rc = vxlan_fdb_replace(f, ip, port, vni, + ifindex, &oldrd); + } notify |= rc; } else { + NL_SET_ERR_MSG(extack, "Cannot replace non-unicast fdb entries"); return -EOPNOTSUPP; } } @@ -962,6 +1092,8 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan, return 0; err_notify: + if (nhid) + return err; if ((flags & NLM_F_REPLACE) && rc) *rd = oldrd; else if ((flags & NLM_F_APPEND) && rc) { @@ -975,7 +1107,7 @@ static int vxlan_fdb_update_create(struct vxlan_dev *vxlan, const u8 *mac, union vxlan_addr *ip, __u16 state, __u16 flags, __be16 port, __be32 src_vni, __be32 vni, - __u32 ifindex, __u16 ndm_flags, + __u32 ifindex, __u16 ndm_flags, u32 nhid, bool swdev_notify, struct netlink_ext_ack *extack) { @@ -990,7 +1122,7 @@ static int vxlan_fdb_update_create(struct vxlan_dev *vxlan, netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip); rc = vxlan_fdb_create(vxlan, mac, ip, state, port, src_vni, - vni, ifindex, fdb_flags, &f); + vni, ifindex, fdb_flags, nhid, &f, extack); if (rc < 0) return rc; @@ -1012,7 +1144,7 @@ static int vxlan_fdb_update(struct vxlan_dev *vxlan, const u8 *mac, union vxlan_addr *ip, __u16 state, __u16 flags, __be16 port, __be32 src_vni, __be32 vni, - __u32 ifindex, __u16 ndm_flags, + __u32 ifindex, __u16 ndm_flags, u32 nhid, bool swdev_notify, struct netlink_ext_ack *extack) { @@ -1028,14 +1160,15 @@ static int vxlan_fdb_update(struct vxlan_dev *vxlan, return vxlan_fdb_update_existing(vxlan, ip, state, flags, port, vni, ifindex, ndm_flags, f, - swdev_notify, extack); + nhid, swdev_notify, extack); } else { if (!(flags & NLM_F_CREATE)) return -ENOENT; return vxlan_fdb_update_create(vxlan, mac, ip, state, flags, port, src_vni, vni, ifindex, - ndm_flags, swdev_notify, extack); + ndm_flags, nhid, swdev_notify, + extack); } } @@ -1049,7 +1182,7 @@ static void vxlan_fdb_dst_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f, static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan, union vxlan_addr *ip, __be16 *port, __be32 *src_vni, - __be32 *vni, u32 *ifindex) + __be32 *vni, u32 *ifindex, u32 *nhid) { struct net *net = dev_net(vxlan->dev); int err; @@ -1109,6 +1242,11 @@ static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan, *ifindex = 0; } + if (tb[NDA_NH_ID]) + *nhid = nla_get_u32(tb[NDA_NH_ID]); + else + *nhid = 0; + return 0; } @@ -1123,7 +1261,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], union vxlan_addr ip; __be16 port; __be32 src_vni, vni; - u32 ifindex; + u32 ifindex, nhid; u32 hash_index; int err; @@ -1133,10 +1271,11 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], return -EINVAL; } - if (tb[NDA_DST] == NULL) + if (!tb || (!tb[NDA_DST] && !tb[NDA_NH_ID])) return -EINVAL; - err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex); + err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex, + &nhid); if (err) return err; @@ -1148,7 +1287,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags, port, src_vni, vni, ifindex, ndm->ndm_flags | NTF_VXLAN_ADDED_BY_USER, - true, extack); + nhid, true, extack); spin_unlock_bh(&vxlan->hash_lock[hash_index]); return err; @@ -1159,8 +1298,8 @@ static int __vxlan_fdb_delete(struct vxlan_dev *vxlan, __be16 port, __be32 src_vni, __be32 vni, u32 ifindex, bool swdev_notify) { - struct vxlan_fdb *f; struct vxlan_rdst *rd = NULL; + struct vxlan_fdb *f; int err = -ENOENT; f = vxlan_find_mac(vxlan, addr, src_vni); @@ -1195,12 +1334,13 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct vxlan_dev *vxlan = netdev_priv(dev); union vxlan_addr ip; __be32 src_vni, vni; - __be16 port; - u32 ifindex; + u32 ifindex, nhid; u32 hash_index; + __be16 port; int err; - err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex); + err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex, + &nhid); if (err) return err; @@ -1228,6 +1368,17 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) { struct vxlan_rdst *rd; + if (rcu_access_pointer(f->nh)) { + err = vxlan_fdb_info(skb, vxlan, f, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNEIGH, + NLM_F_MULTI, NULL); + if (err < 0) + goto out; + continue; + } + list_for_each_entry_rcu(rd, &f->remotes, list) { if (*idx < cb->args[2]) goto skip; @@ -1311,6 +1462,10 @@ static bool vxlan_snoop(struct net_device *dev, if (f->state & (NUD_PERMANENT | NUD_NOARP)) return true; + /* Don't override an fdb with nexthop with a learnt entry */ + if (rcu_access_pointer(f->nh)) + return true; + if (net_ratelimit()) netdev_info(dev, "%pM migrated from %pIS to %pIS\n", @@ -1333,7 +1488,7 @@ static bool vxlan_snoop(struct net_device *dev, vxlan->cfg.dst_port, vni, vxlan->default_dst.remote_vni, - ifindex, NTF_SELF, true, NULL); + ifindex, NTF_SELF, 0, true, NULL); spin_unlock(&vxlan->hash_lock[hash_index]); } @@ -2616,6 +2771,38 @@ tx_error: kfree_skb(skb); } +static void vxlan_xmit_nh(struct sk_buff *skb, struct net_device *dev, + struct vxlan_fdb *f, __be32 vni, bool did_rsc) +{ + struct vxlan_rdst nh_rdst; + struct nexthop *nh; + bool do_xmit; + u32 hash; + + memset(&nh_rdst, 0, sizeof(struct vxlan_rdst)); + hash = skb_get_hash(skb); + + rcu_read_lock(); + nh = rcu_dereference(f->nh); + if (!nh) { + rcu_read_unlock(); + goto drop; + } + do_xmit = vxlan_fdb_nh_path_select(nh, hash, &nh_rdst); + rcu_read_unlock(); + + if (likely(do_xmit)) + vxlan_xmit_one(skb, dev, vni, &nh_rdst, did_rsc); + else + goto drop; + + return; + +drop: + dev->stats.tx_dropped++; + dev_kfree_skb(skb); +} + /* Transmit local packets over Vxlan * * Outer IP header inherits ECN and DF from inner header. @@ -2692,22 +2879,27 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) } } - list_for_each_entry_rcu(rdst, &f->remotes, list) { - struct sk_buff *skb1; + if (rcu_access_pointer(f->nh)) { + vxlan_xmit_nh(skb, dev, f, + (vni ? : vxlan->default_dst.remote_vni), did_rsc); + } else { + list_for_each_entry_rcu(rdst, &f->remotes, list) { + struct sk_buff *skb1; - if (!fdst) { - fdst = rdst; - continue; + if (!fdst) { + fdst = rdst; + continue; + } + skb1 = skb_clone(skb, GFP_ATOMIC); + if (skb1) + vxlan_xmit_one(skb1, dev, vni, rdst, did_rsc); } - skb1 = skb_clone(skb, GFP_ATOMIC); - if (skb1) - vxlan_xmit_one(skb1, dev, vni, rdst, did_rsc); + if (fdst) + vxlan_xmit_one(skb, dev, vni, fdst, did_rsc); + else + kfree_skb(skb); } - if (fdst) - vxlan_xmit_one(skb, dev, vni, fdst, did_rsc); - else - kfree_skb(skb); return NETDEV_TX_OK; } @@ -3615,7 +3807,7 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, dst->remote_vni, dst->remote_vni, dst->remote_ifindex, - NTF_SELF, &f); + NTF_SELF, 0, &f, extack); if (err) return err; } @@ -4013,7 +4205,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], vxlan->cfg.dst_port, conf.vni, conf.vni, conf.remote_ifindex, - NTF_SELF, true, extack); + NTF_SELF, 0, true, extack); if (err) { spin_unlock_bh(&vxlan->hash_lock[hash_index]); netdev_adjacent_change_abort(dst->remote_dev, @@ -4335,7 +4527,7 @@ vxlan_fdb_external_learn_add(struct net_device *dev, fdb_info->remote_vni, fdb_info->remote_ifindex, NTF_USE | NTF_SELF | NTF_EXT_LEARNED, - false, extack); + 0, false, extack); spin_unlock_bh(&vxlan->hash_lock[hash_index]); return err; @@ -4410,6 +4602,25 @@ static struct notifier_block vxlan_switchdev_notifier_block __read_mostly = { .notifier_call = vxlan_switchdev_event, }; +static int vxlan_nexthop_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct nexthop *nh = ptr; + struct vxlan_fdb *fdb, *tmp; + + if (!nh || event != NEXTHOP_EVENT_DEL) + return NOTIFY_DONE; + + list_for_each_entry_safe(fdb, tmp, &nh->fdb_list, nh_list) + vxlan_fdb_destroy(fdb->vdev, fdb, false, false); + + return NOTIFY_DONE; +} + +static struct notifier_block vxlan_nexthop_notifier_block __read_mostly = { + .notifier_call = vxlan_nexthop_event, +}; + static __net_init int vxlan_init_net(struct net *net) { struct vxlan_net *vn = net_generic(net, vxlan_net_id); @@ -4421,7 +4632,7 @@ static __net_init int vxlan_init_net(struct net *net) for (h = 0; h < PORT_HASH_SIZE; ++h) INIT_HLIST_HEAD(&vn->sock_list[h]); - return 0; + return register_nexthop_notifier(net, &vxlan_nexthop_notifier_block); } static void vxlan_destroy_tunnels(struct net *net, struct list_head *head) @@ -4454,6 +4665,8 @@ static void __net_exit vxlan_exit_batch_net(struct list_head *net_list) rtnl_lock(); list_for_each_entry(net, net_list, exit_list) + unregister_nexthop_notifier(net, &vxlan_nexthop_notifier_block); + list_for_each_entry(net, net_list, exit_list) vxlan_destroy_tunnels(net, &list); unregister_netdevice_many(&list); diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c index 437a6d822105..d06809eac16d 100644 --- a/drivers/s390/net/ctcm_main.c +++ b/drivers/s390/net/ctcm_main.c @@ -1698,43 +1698,6 @@ static void ctcm_remove_device(struct ccwgroup_device *cgdev) put_device(&cgdev->dev); } -static int ctcm_pm_suspend(struct ccwgroup_device *gdev) -{ - struct ctcm_priv *priv = dev_get_drvdata(&gdev->dev); - - if (gdev->state == CCWGROUP_OFFLINE) - return 0; - netif_device_detach(priv->channel[CTCM_READ]->netdev); - ctcm_close(priv->channel[CTCM_READ]->netdev); - if (!wait_event_timeout(priv->fsm->wait_q, - fsm_getstate(priv->fsm) == DEV_STATE_STOPPED, CTCM_TIME_5_SEC)) { - netif_device_attach(priv->channel[CTCM_READ]->netdev); - return -EBUSY; - } - ccw_device_set_offline(gdev->cdev[1]); - ccw_device_set_offline(gdev->cdev[0]); - return 0; -} - -static int ctcm_pm_resume(struct ccwgroup_device *gdev) -{ - struct ctcm_priv *priv = dev_get_drvdata(&gdev->dev); - int rc; - - if (gdev->state == CCWGROUP_OFFLINE) - return 0; - rc = ccw_device_set_online(gdev->cdev[1]); - if (rc) - goto err_out; - rc = ccw_device_set_online(gdev->cdev[0]); - if (rc) - goto err_out; - ctcm_open(priv->channel[CTCM_READ]->netdev); -err_out: - netif_device_attach(priv->channel[CTCM_READ]->netdev); - return rc; -} - static struct ccw_device_id ctcm_ids[] = { {CCW_DEVICE(0x3088, 0x08), .driver_info = ctcm_channel_type_parallel}, {CCW_DEVICE(0x3088, 0x1e), .driver_info = ctcm_channel_type_ficon}, @@ -1764,9 +1727,6 @@ static struct ccwgroup_driver ctcm_group_driver = { .remove = ctcm_remove_device, .set_online = ctcm_new_device, .set_offline = ctcm_shutdown_device, - .freeze = ctcm_pm_suspend, - .thaw = ctcm_pm_resume, - .restore = ctcm_pm_resume, }; static ssize_t group_store(struct device_driver *ddrv, const char *buf, diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c index 8f08b0a2917c..440219bcaa2b 100644 --- a/drivers/s390/net/lcs.c +++ b/drivers/s390/net/lcs.c @@ -2296,60 +2296,6 @@ lcs_remove_device(struct ccwgroup_device *ccwgdev) put_device(&ccwgdev->dev); } -static int lcs_pm_suspend(struct lcs_card *card) -{ - if (card->dev) - netif_device_detach(card->dev); - lcs_set_allowed_threads(card, 0); - lcs_wait_for_threads(card, 0xffffffff); - if (card->state != DEV_STATE_DOWN) - __lcs_shutdown_device(card->gdev, 1); - return 0; -} - -static int lcs_pm_resume(struct lcs_card *card) -{ - int rc = 0; - - if (card->state == DEV_STATE_RECOVER) - rc = lcs_new_device(card->gdev); - if (card->dev) - netif_device_attach(card->dev); - if (rc) { - dev_warn(&card->gdev->dev, "The lcs device driver " - "failed to recover the device\n"); - } - return rc; -} - -static int lcs_prepare(struct ccwgroup_device *gdev) -{ - return 0; -} - -static void lcs_complete(struct ccwgroup_device *gdev) -{ - return; -} - -static int lcs_freeze(struct ccwgroup_device *gdev) -{ - struct lcs_card *card = dev_get_drvdata(&gdev->dev); - return lcs_pm_suspend(card); -} - -static int lcs_thaw(struct ccwgroup_device *gdev) -{ - struct lcs_card *card = dev_get_drvdata(&gdev->dev); - return lcs_pm_resume(card); -} - -static int lcs_restore(struct ccwgroup_device *gdev) -{ - struct lcs_card *card = dev_get_drvdata(&gdev->dev); - return lcs_pm_resume(card); -} - static struct ccw_device_id lcs_ids[] = { {CCW_DEVICE(0x3088, 0x08), .driver_info = lcs_channel_type_parallel}, {CCW_DEVICE(0x3088, 0x1f), .driver_info = lcs_channel_type_2216}, @@ -2382,11 +2328,6 @@ static struct ccwgroup_driver lcs_group_driver = { .remove = lcs_remove_device, .set_online = lcs_new_device, .set_offline = lcs_shutdown_device, - .prepare = lcs_prepare, - .complete = lcs_complete, - .freeze = lcs_freeze, - .thaw = lcs_thaw, - .restore = lcs_restore, }; static ssize_t group_store(struct device_driver *ddrv, const char *buf, diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c index 5ce2424ca729..260860cf3aa1 100644 --- a/drivers/s390/net/netiucv.c +++ b/drivers/s390/net/netiucv.c @@ -112,27 +112,10 @@ DECLARE_PER_CPU(char[256], iucv_dbf_txt_buf); */ #define PRINTK_HEADER " iucv: " /* for debugging */ -/* dummy device to make sure netiucv_pm functions are called */ -static struct device *netiucv_dev; - -static int netiucv_pm_prepare(struct device *); -static void netiucv_pm_complete(struct device *); -static int netiucv_pm_freeze(struct device *); -static int netiucv_pm_restore_thaw(struct device *); - -static const struct dev_pm_ops netiucv_pm_ops = { - .prepare = netiucv_pm_prepare, - .complete = netiucv_pm_complete, - .freeze = netiucv_pm_freeze, - .thaw = netiucv_pm_restore_thaw, - .restore = netiucv_pm_restore_thaw, -}; - static struct device_driver netiucv_driver = { .owner = THIS_MODULE, .name = "netiucv", .bus = &iucv_bus, - .pm = &netiucv_pm_ops, }; static int netiucv_callback_connreq(struct iucv_path *, u8 *, u8 *); @@ -213,7 +196,6 @@ struct netiucv_priv { fsm_instance *fsm; struct iucv_connection *conn; struct device *dev; - int pm_state; }; /** @@ -1275,72 +1257,6 @@ static int netiucv_close(struct net_device *dev) return 0; } -static int netiucv_pm_prepare(struct device *dev) -{ - IUCV_DBF_TEXT(trace, 3, __func__); - return 0; -} - -static void netiucv_pm_complete(struct device *dev) -{ - IUCV_DBF_TEXT(trace, 3, __func__); - return; -} - -/** - * netiucv_pm_freeze() - Freeze PM callback - * @dev: netiucv device - * - * close open netiucv interfaces - */ -static int netiucv_pm_freeze(struct device *dev) -{ - struct netiucv_priv *priv = dev_get_drvdata(dev); - struct net_device *ndev = NULL; - int rc = 0; - - IUCV_DBF_TEXT(trace, 3, __func__); - if (priv && priv->conn) - ndev = priv->conn->netdev; - if (!ndev) - goto out; - netif_device_detach(ndev); - priv->pm_state = fsm_getstate(priv->fsm); - rc = netiucv_close(ndev); -out: - return rc; -} - -/** - * netiucv_pm_restore_thaw() - Thaw and restore PM callback - * @dev: netiucv device - * - * re-open netiucv interfaces closed during freeze - */ -static int netiucv_pm_restore_thaw(struct device *dev) -{ - struct netiucv_priv *priv = dev_get_drvdata(dev); - struct net_device *ndev = NULL; - int rc = 0; - - IUCV_DBF_TEXT(trace, 3, __func__); - if (priv && priv->conn) - ndev = priv->conn->netdev; - if (!ndev) - goto out; - switch (priv->pm_state) { - case DEV_STATE_RUNNING: - case DEV_STATE_STARTWAIT: - rc = netiucv_open(ndev); - break; - default: - break; - } - netif_device_attach(ndev); -out: - return rc; -} - /** * Start transmission of a packet. * Called from generic network device layer. @@ -2156,7 +2072,6 @@ static void __exit netiucv_exit(void) netiucv_unregister_device(dev); } - device_unregister(netiucv_dev); driver_unregister(&netiucv_driver); iucv_unregister(&netiucv_handler, 1); iucv_unregister_dbf_views(); @@ -2182,27 +2097,10 @@ static int __init netiucv_init(void) IUCV_DBF_TEXT_(setup, 2, "ret %d from driver_register\n", rc); goto out_iucv; } - /* establish dummy device */ - netiucv_dev = kzalloc(sizeof(struct device), GFP_KERNEL); - if (!netiucv_dev) { - rc = -ENOMEM; - goto out_driver; - } - dev_set_name(netiucv_dev, "netiucv"); - netiucv_dev->bus = &iucv_bus; - netiucv_dev->parent = iucv_root; - netiucv_dev->release = (void (*)(struct device *))kfree; - netiucv_dev->driver = &netiucv_driver; - rc = device_register(netiucv_dev); - if (rc) { - put_device(netiucv_dev); - goto out_driver; - } + netiucv_banner(); return rc; -out_driver: - driver_unregister(&netiucv_driver); out_iucv: iucv_unregister(&netiucv_handler, 1); out_dbf: diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index db8e069be3a0..18a0fb75a710 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -6434,32 +6434,6 @@ static void qeth_core_shutdown(struct ccwgroup_device *gdev) qdio_free(CARD_DDEV(card)); } -static int qeth_suspend(struct ccwgroup_device *gdev) -{ - struct qeth_card *card = dev_get_drvdata(&gdev->dev); - - qeth_set_allowed_threads(card, 0, 1); - wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0); - if (gdev->state == CCWGROUP_OFFLINE) - return 0; - - qeth_set_offline(card, false); - return 0; -} - -static int qeth_resume(struct ccwgroup_device *gdev) -{ - struct qeth_card *card = dev_get_drvdata(&gdev->dev); - int rc; - - rc = qeth_set_online(card); - - qeth_set_allowed_threads(card, 0xffffffff, 0); - if (rc) - dev_warn(&card->gdev->dev, "The qeth device driver failed to recover an error on the device\n"); - return rc; -} - static ssize_t group_store(struct device_driver *ddrv, const char *buf, size_t count) { @@ -6496,11 +6470,6 @@ static struct ccwgroup_driver qeth_core_ccwgroup_driver = { .set_online = qeth_core_set_online, .set_offline = qeth_core_set_offline, .shutdown = qeth_core_shutdown, - .prepare = NULL, - .complete = NULL, - .freeze = qeth_suspend, - .thaw = qeth_resume, - .restore = qeth_resume, }; struct qeth_card *qeth_get_card_by_busid(char *bus_id) diff --git a/drivers/s390/net/smsgiucv.c b/drivers/s390/net/smsgiucv.c index 066b5c3aaae6..c84ec2fbf99b 100644 --- a/drivers/s390/net/smsgiucv.c +++ b/drivers/s390/net/smsgiucv.c @@ -29,12 +29,9 @@ MODULE_AUTHOR MODULE_DESCRIPTION ("Linux for S/390 IUCV special message driver"); static struct iucv_path *smsg_path; -/* dummy device used as trigger for PM functions */ -static struct device *smsg_dev; static DEFINE_SPINLOCK(smsg_list_lock); static LIST_HEAD(smsg_list); -static int iucv_path_connected; static int smsg_path_pending(struct iucv_path *, u8 *, u8 *); static void smsg_message_pending(struct iucv_path *, struct iucv_message *); @@ -124,60 +121,15 @@ void smsg_unregister_callback(const char *prefix, kfree(cb); } -static int smsg_pm_freeze(struct device *dev) -{ -#ifdef CONFIG_PM_DEBUG - printk(KERN_WARNING "smsg_pm_freeze\n"); -#endif - if (smsg_path && iucv_path_connected) { - iucv_path_sever(smsg_path, NULL); - iucv_path_connected = 0; - } - return 0; -} - -static int smsg_pm_restore_thaw(struct device *dev) -{ - int rc; - -#ifdef CONFIG_PM_DEBUG - printk(KERN_WARNING "smsg_pm_restore_thaw\n"); -#endif - if (smsg_path && !iucv_path_connected) { - memset(smsg_path, 0, sizeof(*smsg_path)); - smsg_path->msglim = 255; - smsg_path->flags = 0; - rc = iucv_path_connect(smsg_path, &smsg_handler, "*MSG ", - NULL, NULL, NULL); -#ifdef CONFIG_PM_DEBUG - if (rc) - printk(KERN_ERR - "iucv_path_connect returned with rc %i\n", rc); -#endif - if (!rc) - iucv_path_connected = 1; - cpcmd("SET SMSG IUCV", NULL, 0, NULL); - } - return 0; -} - -static const struct dev_pm_ops smsg_pm_ops = { - .freeze = smsg_pm_freeze, - .thaw = smsg_pm_restore_thaw, - .restore = smsg_pm_restore_thaw, -}; - static struct device_driver smsg_driver = { .owner = THIS_MODULE, .name = SMSGIUCV_DRV_NAME, .bus = &iucv_bus, - .pm = &smsg_pm_ops, }; static void __exit smsg_exit(void) { cpcmd("SET SMSG OFF", NULL, 0, NULL); - device_unregister(smsg_dev); iucv_unregister(&smsg_handler, 1); driver_unregister(&smsg_driver); } @@ -205,27 +157,10 @@ static int __init smsg_init(void) NULL, NULL, NULL); if (rc) goto out_free_path; - else - iucv_path_connected = 1; - smsg_dev = kzalloc(sizeof(struct device), GFP_KERNEL); - if (!smsg_dev) { - rc = -ENOMEM; - goto out_free_path; - } - dev_set_name(smsg_dev, "smsg_iucv"); - smsg_dev->bus = &iucv_bus; - smsg_dev->parent = iucv_root; - smsg_dev->release = (void (*)(struct device *))kfree; - smsg_dev->driver = &smsg_driver; - rc = device_register(smsg_dev); - if (rc) - goto out_put; cpcmd("SET SMSG IUCV", NULL, 0, NULL); return 0; -out_put: - put_device(smsg_dev); out_free_path: iucv_path_free(smsg_path); smsg_path = NULL; diff --git a/include/linux/igmp.h b/include/linux/igmp.h index faa6586a5783..64ce8cd1cfaf 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -123,7 +123,7 @@ extern int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf,int ifindex); extern int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, struct ip_msfilter __user *optval, int __user *optlen); extern int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, - struct group_filter __user *optval, int __user *optlen); + struct sockaddr_storage __user *p); extern int ip_mc_sf_allow(struct sock *sk, __be32 local, __be32 rmt, int dif, int sdif); extern void ip_mc_init_dev(struct in_device *); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6a8f8daef09d..a18f8fdf4260 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -53,6 +53,7 @@ struct netpoll_info; struct device; struct phy_device; struct dsa_port; +struct ip_tunnel_parm; struct macsec_context; struct macsec_ops; @@ -1274,6 +1275,9 @@ struct netdev_net_notifier { * Get devlink port instance associated with a given netdev. * Called with a reference on the netdevice and devlink locks only, * rtnl_lock is not held. + * int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p, + * int cmd); + * Add, change, delete or get information on an IPv4 tunnel. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1479,6 +1483,8 @@ struct net_device_ops { int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags); struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev); + int (*ndo_tunnel_ctl)(struct net_device *dev, + struct ip_tunnel_parm *p, int cmd); }; /** diff --git a/include/linux/phy.h b/include/linux/phy.h index 5d8ff5428010..2bcdf19ed3b4 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -543,7 +543,7 @@ struct phy_device { u8 mdix; u8 mdix_ctrl; - void (*phy_link_change)(struct phy_device *, bool up, bool do_carrier); + void (*phy_link_change)(struct phy_device *phydev, bool up); void (*adjust_link)(struct net_device *dev); #if IS_ENABLED(CONFIG_MACSEC) @@ -723,6 +723,8 @@ struct phy_driver { struct ethtool_tunable *tuna, const void *data); int (*set_loopback)(struct phy_device *dev, bool enable); + int (*get_sqi)(struct phy_device *dev); + int (*get_sqi_max)(struct phy_device *dev); }; #define to_phy_driver(d) container_of(to_mdio_common_driver(d), \ struct phy_driver, mdiodrv) diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h index 74efca15fde7..f93edd5750a5 100644 --- a/include/linux/qed/qed_rdma_if.h +++ b/include/linux/qed/qed_rdma_if.h @@ -53,6 +53,13 @@ enum qed_roce_qp_state { QED_ROCE_QP_STATE_SQE }; +enum qed_rdma_qp_type { + QED_RDMA_QP_TYPE_RC, + QED_RDMA_QP_TYPE_XRC_INI, + QED_RDMA_QP_TYPE_XRC_TGT, + QED_RDMA_QP_TYPE_INVAL = 0xffff, +}; + enum qed_rdma_tid_type { QED_RDMA_TID_REGISTERED_MR, QED_RDMA_TID_FMR, @@ -291,6 +298,12 @@ struct qed_rdma_create_srq_in_params { u16 num_pages; u16 pd_id; u16 page_size; + + /* XRC related only */ + bool reserved_key_en; + bool is_xrc; + u32 cq_cid; + u16 xrcd_id; }; struct qed_rdma_destroy_cq_in_params { @@ -319,7 +332,9 @@ struct qed_rdma_create_qp_in_params { u16 rq_num_pages; u64 rq_pbl_ptr; u16 srq_id; + u16 xrcd_id; u8 stats_queue; + enum qed_rdma_qp_type qp_type; }; struct qed_rdma_create_qp_out_params { @@ -429,11 +444,13 @@ struct qed_rdma_create_srq_out_params { struct qed_rdma_destroy_srq_in_params { u16 srq_id; + bool is_xrc; }; struct qed_rdma_modify_srq_in_params { u32 wqe_limit; u16 srq_id; + bool is_xrc; }; struct qed_rdma_stats_out_params { @@ -611,6 +628,8 @@ struct qed_rdma_ops { int (*rdma_set_rdma_int)(struct qed_dev *cdev, u16 cnt); int (*rdma_alloc_pd)(void *rdma_cxt, u16 *pd); void (*rdma_dealloc_pd)(void *rdma_cxt, u16 pd); + int (*rdma_alloc_xrcd)(void *rdma_cxt, u16 *xrcd); + void (*rdma_dealloc_xrcd)(void *rdma_cxt, u16 xrcd); int (*rdma_create_cq)(void *rdma_cxt, struct qed_rdma_create_cq_in_params *params, u16 *icid); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3000c526f552..531843952809 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4165,7 +4165,7 @@ struct skb_ext { char data[] __aligned(8); }; -struct skb_ext *__skb_ext_alloc(void); +struct skb_ext *__skb_ext_alloc(gfp_t flags); void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id, struct skb_ext *ext); void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id); diff --git a/include/net/compat.h b/include/net/compat.h index e341260642fe..f241666117d8 100644 --- a/include/net/compat.h +++ b/include/net/compat.h @@ -30,6 +30,24 @@ struct compat_cmsghdr { compat_int_t cmsg_type; }; +struct compat_rtentry { + u32 rt_pad1; + struct sockaddr rt_dst; /* target address */ + struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */ + struct sockaddr rt_genmask; /* target network mask (IP) */ + unsigned short rt_flags; + short rt_pad2; + u32 rt_pad3; + unsigned char rt_tos; + unsigned char rt_class; + short rt_pad4; + short rt_metric; /* +1 for binary compatibility! */ + compat_uptr_t rt_dev; /* forcing the device at add */ + u32 rt_mtu; /* per route MTU/Window */ + u32 rt_window; /* Window clamping */ + unsigned short rt_irtt; /* Initial RTT */ +}; + #else /* defined(CONFIG_COMPAT) */ /* * To avoid compiler warnings: @@ -49,11 +67,28 @@ int put_cmsg_compat(struct msghdr*, int, int, int, void *); int cmsghdr_from_user_compat_to_kern(struct msghdr *, struct sock *, unsigned char *, int); -int compat_mc_setsockopt(struct sock *, int, int, char __user *, unsigned int, - int (*)(struct sock *, int, int, char __user *, - unsigned int)); -int compat_mc_getsockopt(struct sock *, int, int, char __user *, int __user *, - int (*)(struct sock *, int, int, char __user *, - int __user *)); +struct compat_group_req { + __u32 gr_interface; + struct __kernel_sockaddr_storage gr_group + __aligned(4); +} __packed; + +struct compat_group_source_req { + __u32 gsr_interface; + struct __kernel_sockaddr_storage gsr_group + __aligned(4); + struct __kernel_sockaddr_storage gsr_source + __aligned(4); +} __packed; + +struct compat_group_filter { + __u32 gf_interface; + struct __kernel_sockaddr_storage gf_group + __aligned(4); + __u32 gf_fmode; + __u32 gf_numsrc; + struct __kernel_sockaddr_storage gf_slist[1] + __aligned(4); +} __packed; #endif /* NET_COMPAT_H */ diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 4001ffb04f0d..95d633785ef9 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -168,10 +168,11 @@ enum flow_action_hw_stats_bit { FLOW_ACTION_HW_STATS_IMMEDIATE_BIT, FLOW_ACTION_HW_STATS_DELAYED_BIT, FLOW_ACTION_HW_STATS_DISABLED_BIT, + + FLOW_ACTION_HW_STATS_NUM_BITS }; enum flow_action_hw_stats { - FLOW_ACTION_HW_STATS_DONT_CARE = 0, FLOW_ACTION_HW_STATS_IMMEDIATE = BIT(FLOW_ACTION_HW_STATS_IMMEDIATE_BIT), FLOW_ACTION_HW_STATS_DELAYED = BIT(FLOW_ACTION_HW_STATS_DELAYED_BIT), @@ -179,6 +180,7 @@ enum flow_action_hw_stats { FLOW_ACTION_HW_STATS_DELAYED, FLOW_ACTION_HW_STATS_DISABLED = BIT(FLOW_ACTION_HW_STATS_DISABLED_BIT), + FLOW_ACTION_HW_STATS_DONT_CARE = BIT(FLOW_ACTION_HW_STATS_NUM_BITS) - 1, }; typedef void (*action_destr)(void *priv); @@ -340,11 +342,12 @@ __flow_action_hw_stats_check(const struct flow_action *action, return false; action_entry = flow_action_first_entry_get(action); - if (action_entry->hw_stats == FLOW_ACTION_HW_STATS_DONT_CARE) - return true; + + /* Zero is not a legal value for hw_stats, catch anyone passing it */ + WARN_ON_ONCE(!action_entry->hw_stats); if (!check_allow_bit && - action_entry->hw_stats != FLOW_ACTION_HW_STATS_ANY) { + ~action_entry->hw_stats & FLOW_ACTION_HW_STATS_ANY) { NL_SET_ERR_MSG_MOD(extack, "Driver supports only default HW stats type \"any\""); return false; } else if (check_allow_bit && diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index fdaf975e3331..3f615a29766e 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -65,6 +65,7 @@ struct fib6_config { struct nl_info fc_nlinfo; struct nlattr *fc_encap; u16 fc_encap_type; + bool fc_is_fdb; }; struct fib6_node { diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index e525f003e619..2a5277758379 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -118,7 +118,8 @@ void ip6_route_init_special_entries(void); int ip6_route_init(void); void ip6_route_cleanup(void); -int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg); +int ipv6_route_ioctl(struct net *net, unsigned int cmd, + struct in6_rtmsg *rtmsg); int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 236503a50759..076e5d7db7d3 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -269,7 +269,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol); void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const u8 proto, int tunnel_hlen); -int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); +int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); +int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 955badd1e8ff..39a00d3ef5e2 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1115,6 +1115,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int peer); int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); +int inet6_compat_ioctl(struct socket *sock, unsigned int cmd, + unsigned long arg); int inet6_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk); @@ -1134,9 +1136,10 @@ struct group_filter; int ip6_mc_source(int add, int omode, struct sock *sk, struct group_source_req *pgsr); -int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf); +int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, + struct sockaddr_storage *list); int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, - struct group_filter __user *optval, int __user *optlen); + struct sockaddr_storage __user *p); #ifdef CONFIG_PROC_FS int ac6_proc_init(struct net *net); diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h index 14a490246be9..9259ce2b22f3 100644 --- a/include/net/iucv/af_iucv.h +++ b/include/net/iucv/af_iucv.h @@ -158,12 +158,4 @@ struct iucv_sock_list { atomic_t autobind_name; }; -__poll_t iucv_sock_poll(struct file *file, struct socket *sock, - poll_table *wait); -void iucv_sock_link(struct iucv_sock_list *l, struct sock *s); -void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *s); -void iucv_accept_enqueue(struct sock *parent, struct sock *sk); -void iucv_accept_unlink(struct sock *sk); -struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock); - #endif /* __IUCV_H */ diff --git a/include/net/mptcp.h b/include/net/mptcp.h index c4a6ef4ba35b..46d0487d2b22 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -16,7 +16,10 @@ struct seq_file; /* MPTCP sk_buff extension data */ struct mptcp_ext { - u64 data_ack; + union { + u64 data_ack; + u32 data_ack32; + }; u64 data_seq; u32 subflow_seq; u16 data_len; diff --git a/include/net/netns/nexthop.h b/include/net/netns/nexthop.h index c712ee5eebd9..1937476c94a0 100644 --- a/include/net/netns/nexthop.h +++ b/include/net/netns/nexthop.h @@ -14,5 +14,6 @@ struct netns_nexthop { unsigned int seq; /* protected by rtnl_mutex */ u32 last_id_allocated; + struct atomic_notifier_head notifier_chain; }; #endif diff --git a/include/net/nexthop.h b/include/net/nexthop.h index c440ccc861fc..4c951680f6f9 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -10,6 +10,7 @@ #define __LINUX_NEXTHOP_H #include <linux/netdevice.h> +#include <linux/notifier.h> #include <linux/route.h> #include <linux/types.h> #include <net/ip_fib.h> @@ -26,6 +27,7 @@ struct nh_config { u8 nh_family; u8 nh_protocol; u8 nh_blackhole; + u8 nh_fdb; u32 nh_flags; int nh_ifindex; @@ -52,6 +54,7 @@ struct nh_info { u8 family; bool reject_nh; + bool fdb_nh; union { struct fib_nh_common fib_nhc; @@ -80,6 +83,7 @@ struct nexthop { struct rb_node rb_node; /* entry on netns rbtree */ struct list_head fi_list; /* v4 entries using nh */ struct list_head f6i_list; /* v6 entries using nh */ + struct list_head fdb_list; /* fdb entries using this nh */ struct list_head grp_list; /* nh group entries using this nh */ struct net *net; @@ -88,6 +92,7 @@ struct nexthop { u8 protocol; /* app managing this nh */ u8 nh_flags; bool is_group; + bool is_fdb_nh; refcount_t refcnt; struct rcu_head rcu; @@ -98,6 +103,17 @@ struct nexthop { }; }; +enum nexthop_event_type { + NEXTHOP_EVENT_ADD, + NEXTHOP_EVENT_DEL +}; + +int call_nexthop_notifier(struct notifier_block *nb, struct net *net, + enum nexthop_event_type event_type, + struct nexthop *nh); +int register_nexthop_notifier(struct net *net, struct notifier_block *nb); +int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); + /* caller is holding rcu or rtnl; no reference taken to nexthop */ struct nexthop *nexthop_find_by_id(struct net *net, u32 id); void nexthop_free_rcu(struct rcu_head *head); @@ -304,4 +320,32 @@ static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash) int nexthop_for_each_fib6_nh(struct nexthop *nh, int (*cb)(struct fib6_nh *nh, void *arg), void *arg); + +static inline int nexthop_get_family(struct nexthop *nh) +{ + struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info); + + return nhi->family; +} + +static inline +struct fib_nh_common *nexthop_fdb_nhc(struct nexthop *nh) +{ + struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info); + + return &nhi->fib_nhc; +} + +static inline struct fib_nh_common *nexthop_path_fdb_result(struct nexthop *nh, + int hash) +{ + struct nh_info *nhi; + struct nexthop *nhp; + + nhp = nexthop_select_path(nh, hash); + if (unlikely(!nhp)) + return NULL; + nhi = rcu_dereference(nhp->nh_info); + return &nhi->fib_nhc; +} #endif diff --git a/include/net/switchdev.h b/include/net/switchdev.h index ae7aeb0d1f9c..db519957e134 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -62,7 +62,6 @@ struct switchdev_attr { #if IS_ENABLED(CONFIG_BRIDGE_MRP) u8 mrp_port_state; /* MRP_PORT_STATE */ u8 mrp_port_role; /* MRP_PORT_ROLE */ - u8 mrp_ring_state; /* MRP_RING_STATE */ #endif } u; }; diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 373aadcfea21..3a41627cbdfe 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -7,6 +7,7 @@ #include <net/dst_metadata.h> #include <net/rtnetlink.h> #include <net/switchdev.h> +#include <net/nexthop.h> #define IANA_VXLAN_UDP_PORT 4789 @@ -487,4 +488,28 @@ static inline void vxlan_flag_attr_error(int attrtype, #undef VXLAN_FLAG } +static inline bool vxlan_fdb_nh_path_select(struct nexthop *nh, + int hash, + struct vxlan_rdst *rdst) +{ + struct fib_nh_common *nhc; + + nhc = nexthop_path_fdb_result(nh, hash); + if (unlikely(!nhc)) + return false; + + switch (nhc->nhc_gw_family) { + case AF_INET: + rdst->remote_ip.sin.sin_addr.s_addr = nhc->nhc_gw.ipv4; + rdst->remote_ip.sa.sa_family = AF_INET; + break; + case AF_INET6: + rdst->remote_ip.sin6.sin6_addr = nhc->nhc_gw.ipv6; + rdst->remote_ip.sa.sa_family = AF_INET6; + break; + } + + return true; +} + #endif diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 2881af411f76..e6f109b76c9a 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -232,6 +232,8 @@ enum { ETHTOOL_A_LINKSTATE_UNSPEC, ETHTOOL_A_LINKSTATE_HEADER, /* nest - _A_HEADER_* */ ETHTOOL_A_LINKSTATE_LINK, /* u8 */ + ETHTOOL_A_LINKSTATE_SQI, /* u32 */ + ETHTOOL_A_LINKSTATE_SQI_MAX, /* u32 */ /* add new constants above here */ __ETHTOOL_A_LINKSTATE_CNT, diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index cd144e3099a3..eefcda8ca44e 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -29,6 +29,7 @@ enum { NDA_LINK_NETNSID, NDA_SRC_VNI, NDA_PROTOCOL, /* Originator of entry */ + NDA_NH_ID, __NDA_MAX }; diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h index 7b61867e9848..2d4a1e784cf0 100644 --- a/include/uapi/linux/nexthop.h +++ b/include/uapi/linux/nexthop.h @@ -49,6 +49,9 @@ enum { NHA_GROUPS, /* flag; only return nexthop groups in dump */ NHA_MASTER, /* u32; only return nexthops with given master dev */ + NHA_FDB, /* flag; nexthop belongs to a bridge fdb */ + /* if NHA_FDB is added, OIF, BLACKHOLE, ENCAP cannot be set */ + __NHA_MAX, }; diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h index ce1116cff53d..aea26ab1431c 100644 --- a/include/uapi/linux/psample.h +++ b/include/uapi/linux/psample.h @@ -11,6 +11,7 @@ enum { PSAMPLE_ATTR_GROUP_SEQ, PSAMPLE_ATTR_SAMPLE_RATE, PSAMPLE_ATTR_DATA, + PSAMPLE_ATTR_TUNNEL, /* commands attributes */ PSAMPLE_ATTR_GROUP_REFCOUNT, @@ -25,6 +26,27 @@ enum psample_command { PSAMPLE_CMD_DEL_GROUP, }; +enum psample_tunnel_key_attr { + PSAMPLE_TUNNEL_KEY_ATTR_ID, /* be64 Tunnel ID */ + PSAMPLE_TUNNEL_KEY_ATTR_IPV4_SRC, /* be32 src IP address. */ + PSAMPLE_TUNNEL_KEY_ATTR_IPV4_DST, /* be32 dst IP address. */ + PSAMPLE_TUNNEL_KEY_ATTR_TOS, /* u8 Tunnel IP ToS. */ + PSAMPLE_TUNNEL_KEY_ATTR_TTL, /* u8 Tunnel IP TTL. */ + PSAMPLE_TUNNEL_KEY_ATTR_DONT_FRAGMENT, /* No argument, set DF. */ + PSAMPLE_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */ + PSAMPLE_TUNNEL_KEY_ATTR_OAM, /* No argument. OAM frame. */ + PSAMPLE_TUNNEL_KEY_ATTR_GENEVE_OPTS, /* Array of Geneve options. */ + PSAMPLE_TUNNEL_KEY_ATTR_TP_SRC, /* be16 src Transport Port. */ + PSAMPLE_TUNNEL_KEY_ATTR_TP_DST, /* be16 dst Transport Port. */ + PSAMPLE_TUNNEL_KEY_ATTR_VXLAN_OPTS, /* Nested VXLAN opts* */ + PSAMPLE_TUNNEL_KEY_ATTR_IPV6_SRC, /* struct in6_addr src IPv6 address. */ + PSAMPLE_TUNNEL_KEY_ATTR_IPV6_DST, /* struct in6_addr dst IPv6 address. */ + PSAMPLE_TUNNEL_KEY_ATTR_PAD, + PSAMPLE_TUNNEL_KEY_ATTR_ERSPAN_OPTS, /* struct erspan_metadata */ + PSAMPLE_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE, /* No argument. IPV4_INFO_BRIDGE mode.*/ + __PSAMPLE_TUNNEL_KEY_ATTR_MAX +}; + /* Can be overridden at runtime by module option */ #define PSAMPLE_ATTR_MAX (__PSAMPLE_ATTR_MAX - 1) diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index b41375d4d295..15787e8c0629 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -57,6 +57,7 @@ #include <net/sock.h> #include <net/tcp_states.h> #include <net/route.h> +#include <net/compat.h> #include <linux/atalk.h> #include <linux/highmem.h> @@ -867,6 +868,24 @@ static int atif_ioctl(int cmd, void __user *arg) return copy_to_user(arg, &atreq, sizeof(atreq)) ? -EFAULT : 0; } +static int atrtr_ioctl_addrt(struct rtentry *rt) +{ + struct net_device *dev = NULL; + + if (rt->rt_dev) { + char name[IFNAMSIZ]; + + if (copy_from_user(name, rt->rt_dev, IFNAMSIZ-1)) + return -EFAULT; + name[IFNAMSIZ-1] = '\0'; + + dev = __dev_get_by_name(&init_net, name); + if (!dev) + return -ENODEV; + } + return atrtr_create(rt, dev); +} + /* Routing ioctl() calls */ static int atrtr_ioctl(unsigned int cmd, void __user *arg) { @@ -882,19 +901,8 @@ static int atrtr_ioctl(unsigned int cmd, void __user *arg) return atrtr_delete(&((struct sockaddr_at *) &rt.rt_dst)->sat_addr); - case SIOCADDRT: { - struct net_device *dev = NULL; - if (rt.rt_dev) { - char name[IFNAMSIZ]; - if (copy_from_user(name, rt.rt_dev, IFNAMSIZ-1)) - return -EFAULT; - name[IFNAMSIZ-1] = '\0'; - dev = __dev_get_by_name(&init_net, name); - if (!dev) - return -ENODEV; - } - return atrtr_create(&rt, dev); - } + case SIOCADDRT: + return atrtr_ioctl_addrt(&rt); } return -EINVAL; } @@ -1832,20 +1840,58 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) #ifdef CONFIG_COMPAT +static int atalk_compat_routing_ioctl(struct sock *sk, unsigned int cmd, + struct compat_rtentry __user *ur) +{ + compat_uptr_t rtdev; + struct rtentry rt; + + if (copy_from_user(&rt.rt_dst, &ur->rt_dst, + 3 * sizeof(struct sockaddr)) || + get_user(rt.rt_flags, &ur->rt_flags) || + get_user(rt.rt_metric, &ur->rt_metric) || + get_user(rt.rt_mtu, &ur->rt_mtu) || + get_user(rt.rt_window, &ur->rt_window) || + get_user(rt.rt_irtt, &ur->rt_irtt) || + get_user(rtdev, &ur->rt_dev)) + return -EFAULT; + + switch (cmd) { + case SIOCDELRT: + if (rt.rt_dst.sa_family != AF_APPLETALK) + return -EINVAL; + return atrtr_delete(&((struct sockaddr_at *) + &rt.rt_dst)->sat_addr); + + case SIOCADDRT: + rt.rt_dev = compat_ptr(rtdev); + return atrtr_ioctl_addrt(&rt); + default: + return -EINVAL; + } +} static int atalk_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { + void __user *argp = compat_ptr(arg); + struct sock *sk = sock->sk; + + switch (cmd) { + case SIOCADDRT: + case SIOCDELRT: + return atalk_compat_routing_ioctl(sk, cmd, argp); /* * SIOCATALKDIFADDR is a SIOCPROTOPRIVATE ioctl number, so we * cannot handle it in common code. The data we access if ifreq * here is compatible, so we can simply call the native * handler. */ - if (cmd == SIOCATALKDIFADDR) - return atalk_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); - - return -ENOIOCTLCMD; + case SIOCATALKDIFADDR: + return atalk_ioctl(sock, cmd, (unsigned long)argp); + default: + return -ENOIOCTLCMD; + } } -#endif +#endif /* CONFIG_COMPAT */ static const struct net_proto_family atalk_family_ops = { diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c index d955b683aa7c..838ebf0cabbf 100644 --- a/net/atm/ioctl.c +++ b/net/atm/ioctl.c @@ -56,6 +56,8 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd, int error; struct list_head *pos; void __user *argp = (void __user *)arg; + void __user *buf; + int __user *len; vcc = ATM_SD(sock); switch (cmd) { @@ -162,7 +164,49 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd, if (error != -ENOIOCTLCMD) goto done; - error = atm_dev_ioctl(cmd, argp, compat); + if (cmd == ATM_GETNAMES) { + if (IS_ENABLED(CONFIG_COMPAT) && compat) { +#ifdef CONFIG_COMPAT + struct compat_atm_iobuf __user *ciobuf = argp; + compat_uptr_t cbuf; + len = &ciobuf->length; + if (get_user(cbuf, &ciobuf->buffer)) + return -EFAULT; + buf = compat_ptr(cbuf); +#endif + } else { + struct atm_iobuf __user *iobuf = argp; + len = &iobuf->length; + if (get_user(buf, &iobuf->buffer)) + return -EFAULT; + } + error = atm_getnames(buf, len); + } else { + int number; + + if (IS_ENABLED(CONFIG_COMPAT) && compat) { +#ifdef CONFIG_COMPAT + struct compat_atmif_sioc __user *csioc = argp; + compat_uptr_t carg; + + len = &csioc->length; + if (get_user(carg, &csioc->arg)) + return -EFAULT; + buf = compat_ptr(carg); + if (get_user(number, &csioc->number)) + return -EFAULT; +#endif + } else { + struct atmif_sioc __user *sioc = argp; + + len = &sioc->length; + if (get_user(buf, &sioc->arg)) + return -EFAULT; + if (get_user(number, &sioc->number)) + return -EFAULT; + } + error = atm_dev_ioctl(cmd, buf, len, number, compat); + } done: return error; @@ -230,61 +274,25 @@ static struct { static int do_atm_iobuf(struct socket *sock, unsigned int cmd, unsigned long arg) { - struct atm_iobuf __user *iobuf; - struct compat_atm_iobuf __user *iobuf32; + struct compat_atm_iobuf __user *iobuf32 = compat_ptr(arg); u32 data; - void __user *datap; - int len, err; - - iobuf = compat_alloc_user_space(sizeof(*iobuf)); - iobuf32 = compat_ptr(arg); - if (get_user(len, &iobuf32->length) || - get_user(data, &iobuf32->buffer)) + if (get_user(data, &iobuf32->buffer)) return -EFAULT; - datap = compat_ptr(data); - if (put_user(len, &iobuf->length) || - put_user(datap, &iobuf->buffer)) - return -EFAULT; - - err = do_vcc_ioctl(sock, cmd, (unsigned long) iobuf, 0); - if (!err) { - if (copy_in_user(&iobuf32->length, &iobuf->length, - sizeof(int))) - err = -EFAULT; - } - - return err; + return atm_getnames(&iobuf32->length, compat_ptr(data)); } static int do_atmif_sioc(struct socket *sock, unsigned int cmd, unsigned long arg) { - struct atmif_sioc __user *sioc; - struct compat_atmif_sioc __user *sioc32; + struct compat_atmif_sioc __user *sioc32 = compat_ptr(arg); + int number; u32 data; - void __user *datap; - int err; - - sioc = compat_alloc_user_space(sizeof(*sioc)); - sioc32 = compat_ptr(arg); - if (copy_in_user(&sioc->number, &sioc32->number, 2 * sizeof(int)) || - get_user(data, &sioc32->arg)) + if (get_user(data, &sioc32->arg) || get_user(number, &sioc32->number)) return -EFAULT; - datap = compat_ptr(data); - if (put_user(datap, &sioc->arg)) - return -EFAULT; - - err = do_vcc_ioctl(sock, cmd, (unsigned long) sioc, 0); - - if (!err) { - if (copy_in_user(&sioc32->length, &sioc->length, - sizeof(int))) - err = -EFAULT; - } - return err; + return atm_dev_ioctl(cmd, compat_ptr(data), &sioc32->length, number, 0); } static int do_atm_ioctl(struct socket *sock, unsigned int cmd32, diff --git a/net/atm/resources.c b/net/atm/resources.c index 889349c6d90d..94bdc6527ee8 100644 --- a/net/atm/resources.c +++ b/net/atm/resources.c @@ -193,88 +193,48 @@ static int fetch_stats(struct atm_dev *dev, struct atm_dev_stats __user *arg, return error ? -EFAULT : 0; } -int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat) +int atm_getnames(void __user *buf, int __user *iobuf_len) { - void __user *buf; - int error, len, number, size = 0; + int error, len, size = 0; struct atm_dev *dev; struct list_head *p; int *tmp_buf, *tmp_p; - int __user *sioc_len; - int __user *iobuf_len; - switch (cmd) { - case ATM_GETNAMES: - if (IS_ENABLED(CONFIG_COMPAT) && compat) { -#ifdef CONFIG_COMPAT - struct compat_atm_iobuf __user *ciobuf = arg; - compat_uptr_t cbuf; - iobuf_len = &ciobuf->length; - if (get_user(cbuf, &ciobuf->buffer)) - return -EFAULT; - buf = compat_ptr(cbuf); -#endif - } else { - struct atm_iobuf __user *iobuf = arg; - iobuf_len = &iobuf->length; - if (get_user(buf, &iobuf->buffer)) - return -EFAULT; - } - if (get_user(len, iobuf_len)) - return -EFAULT; - mutex_lock(&atm_dev_mutex); - list_for_each(p, &atm_devs) - size += sizeof(int); - if (size > len) { - mutex_unlock(&atm_dev_mutex); - return -E2BIG; - } - tmp_buf = kmalloc(size, GFP_ATOMIC); - if (!tmp_buf) { - mutex_unlock(&atm_dev_mutex); - return -ENOMEM; - } - tmp_p = tmp_buf; - list_for_each(p, &atm_devs) { - dev = list_entry(p, struct atm_dev, dev_list); - *tmp_p++ = dev->number; - } + if (get_user(len, iobuf_len)) + return -EFAULT; + mutex_lock(&atm_dev_mutex); + list_for_each(p, &atm_devs) + size += sizeof(int); + if (size > len) { mutex_unlock(&atm_dev_mutex); - error = ((copy_to_user(buf, tmp_buf, size)) || - put_user(size, iobuf_len)) - ? -EFAULT : 0; - kfree(tmp_buf); - return error; - default: - break; + return -E2BIG; } - - if (IS_ENABLED(CONFIG_COMPAT) && compat) { -#ifdef CONFIG_COMPAT - struct compat_atmif_sioc __user *csioc = arg; - compat_uptr_t carg; - - sioc_len = &csioc->length; - if (get_user(carg, &csioc->arg)) - return -EFAULT; - buf = compat_ptr(carg); - - if (get_user(len, &csioc->length)) - return -EFAULT; - if (get_user(number, &csioc->number)) - return -EFAULT; -#endif - } else { - struct atmif_sioc __user *sioc = arg; - - sioc_len = &sioc->length; - if (get_user(buf, &sioc->arg)) - return -EFAULT; - if (get_user(len, &sioc->length)) - return -EFAULT; - if (get_user(number, &sioc->number)) - return -EFAULT; + tmp_buf = kmalloc(size, GFP_ATOMIC); + if (!tmp_buf) { + mutex_unlock(&atm_dev_mutex); + return -ENOMEM; + } + tmp_p = tmp_buf; + list_for_each(p, &atm_devs) { + dev = list_entry(p, struct atm_dev, dev_list); + *tmp_p++ = dev->number; } + mutex_unlock(&atm_dev_mutex); + error = ((copy_to_user(buf, tmp_buf, size)) || + put_user(size, iobuf_len)) + ? -EFAULT : 0; + kfree(tmp_buf); + return error; +} + +int atm_dev_ioctl(unsigned int cmd, void __user *buf, int __user *sioc_len, + int number, int compat) +{ + int error, len, size = 0; + struct atm_dev *dev; + + if (get_user(len, sioc_len)) + return -EFAULT; dev = try_then_request_module(atm_dev_lookup(number), "atm-device-%d", number); diff --git a/net/atm/resources.h b/net/atm/resources.h index 048232e4d4c6..4a0839e92ff3 100644 --- a/net/atm/resources.h +++ b/net/atm/resources.h @@ -14,8 +14,9 @@ extern struct list_head atm_devs; extern struct mutex atm_dev_mutex; -int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat); - +int atm_getnames(void __user *buf, int __user *iobuf_len); +int atm_dev_ioctl(unsigned int cmd, void __user *buf, int __user *sioc_len, + int number, int compat); #ifdef CONFIG_PROC_FS diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index ccb535c77e5d..8bdabc03b0b2 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -135,9 +135,6 @@ static ssize_t batadv_socket_read(struct file *file, char __user *buf, if (!buf || count < sizeof(struct batadv_icmp_packet)) return -EINVAL; - if (!access_ok(buf, count)) - return -EFAULT; - error = wait_event_interruptible(socket_client->queue_wait, socket_client->queue_len); diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c index d7bc09de4c13..528d767eb026 100644 --- a/net/bridge/br_mrp.c +++ b/net/bridge/br_mrp.c @@ -37,6 +37,26 @@ static struct br_mrp *br_mrp_find_id(struct net_bridge *br, u32 ring_id) return res; } +static bool br_mrp_unique_ifindex(struct net_bridge *br, u32 ifindex) +{ + struct br_mrp *mrp; + + list_for_each_entry_rcu(mrp, &br->mrp_list, list, + lockdep_rtnl_is_held()) { + struct net_bridge_port *p; + + p = rtnl_dereference(mrp->p_port); + if (p && p->dev->ifindex == ifindex) + return false; + + p = rtnl_dereference(mrp->s_port); + if (p && p->dev->ifindex == ifindex) + return false; + } + + return true; +} + static struct br_mrp *br_mrp_find_port(struct net_bridge *br, struct net_bridge_port *p) { @@ -203,6 +223,7 @@ out: static void br_mrp_del_impl(struct net_bridge *br, struct br_mrp *mrp) { struct net_bridge_port *p; + u8 state; /* Stop sending MRP_Test frames */ cancel_delayed_work_sync(&mrp->test_work); @@ -214,20 +235,24 @@ static void br_mrp_del_impl(struct net_bridge *br, struct br_mrp *mrp) p = rtnl_dereference(mrp->p_port); if (p) { spin_lock_bh(&br->lock); - p->state = BR_STATE_FORWARDING; + state = netif_running(br->dev) ? + BR_STATE_FORWARDING : BR_STATE_DISABLED; + p->state = state; p->flags &= ~BR_MRP_AWARE; spin_unlock_bh(&br->lock); - br_mrp_port_switchdev_set_state(p, BR_STATE_FORWARDING); + br_mrp_port_switchdev_set_state(p, state); rcu_assign_pointer(mrp->p_port, NULL); } p = rtnl_dereference(mrp->s_port); if (p) { spin_lock_bh(&br->lock); - p->state = BR_STATE_FORWARDING; + state = netif_running(br->dev) ? + BR_STATE_FORWARDING : BR_STATE_DISABLED; + p->state = state; p->flags &= ~BR_MRP_AWARE; spin_unlock_bh(&br->lock); - br_mrp_port_switchdev_set_state(p, BR_STATE_FORWARDING); + br_mrp_port_switchdev_set_state(p, state); rcu_assign_pointer(mrp->s_port, NULL); } @@ -255,6 +280,11 @@ int br_mrp_add(struct net_bridge *br, struct br_mrp_instance *instance) !br_mrp_get_port(br, instance->s_ifindex)) return -EINVAL; + /* It is not possible to have the same port part of multiple rings */ + if (!br_mrp_unique_ifindex(br, instance->p_ifindex) || + !br_mrp_unique_ifindex(br, instance->s_ifindex)) + return -EINVAL; + mrp = kzalloc(sizeof(*mrp), GFP_KERNEL); if (!mrp) return -ENOMEM; diff --git a/net/compat.c b/net/compat.c index 69fc6d1e4e6e..afd7b444e0bf 100644 --- a/net/compat.c +++ b/net/compat.c @@ -448,200 +448,6 @@ COMPAT_SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, return __compat_sys_getsockopt(fd, level, optname, optval, optlen); } -struct compat_group_req { - __u32 gr_interface; - struct __kernel_sockaddr_storage gr_group - __aligned(4); -} __packed; - -struct compat_group_source_req { - __u32 gsr_interface; - struct __kernel_sockaddr_storage gsr_group - __aligned(4); - struct __kernel_sockaddr_storage gsr_source - __aligned(4); -} __packed; - -struct compat_group_filter { - __u32 gf_interface; - struct __kernel_sockaddr_storage gf_group - __aligned(4); - __u32 gf_fmode; - __u32 gf_numsrc; - struct __kernel_sockaddr_storage gf_slist[1] - __aligned(4); -} __packed; - -#define __COMPAT_GF0_SIZE (sizeof(struct compat_group_filter) - \ - sizeof(struct __kernel_sockaddr_storage)) - - -int compat_mc_setsockopt(struct sock *sock, int level, int optname, - char __user *optval, unsigned int optlen, - int (*setsockopt)(struct sock *, int, int, char __user *, unsigned int)) -{ - char __user *koptval = optval; - int koptlen = optlen; - - switch (optname) { - case MCAST_JOIN_GROUP: - case MCAST_LEAVE_GROUP: - { - struct compat_group_req __user *gr32 = (void __user *)optval; - struct group_req __user *kgr = - compat_alloc_user_space(sizeof(struct group_req)); - u32 interface; - - if (!access_ok(gr32, sizeof(*gr32)) || - !access_ok(kgr, sizeof(struct group_req)) || - __get_user(interface, &gr32->gr_interface) || - __put_user(interface, &kgr->gr_interface) || - copy_in_user(&kgr->gr_group, &gr32->gr_group, - sizeof(kgr->gr_group))) - return -EFAULT; - koptval = (char __user *)kgr; - koptlen = sizeof(struct group_req); - break; - } - case MCAST_JOIN_SOURCE_GROUP: - case MCAST_LEAVE_SOURCE_GROUP: - case MCAST_BLOCK_SOURCE: - case MCAST_UNBLOCK_SOURCE: - { - struct compat_group_source_req __user *gsr32 = (void __user *)optval; - struct group_source_req __user *kgsr = compat_alloc_user_space( - sizeof(struct group_source_req)); - u32 interface; - - if (!access_ok(gsr32, sizeof(*gsr32)) || - !access_ok(kgsr, - sizeof(struct group_source_req)) || - __get_user(interface, &gsr32->gsr_interface) || - __put_user(interface, &kgsr->gsr_interface) || - copy_in_user(&kgsr->gsr_group, &gsr32->gsr_group, - sizeof(kgsr->gsr_group)) || - copy_in_user(&kgsr->gsr_source, &gsr32->gsr_source, - sizeof(kgsr->gsr_source))) - return -EFAULT; - koptval = (char __user *)kgsr; - koptlen = sizeof(struct group_source_req); - break; - } - case MCAST_MSFILTER: - { - struct compat_group_filter __user *gf32 = (void __user *)optval; - struct group_filter __user *kgf; - u32 interface, fmode, numsrc; - - if (!access_ok(gf32, __COMPAT_GF0_SIZE) || - __get_user(interface, &gf32->gf_interface) || - __get_user(fmode, &gf32->gf_fmode) || - __get_user(numsrc, &gf32->gf_numsrc)) - return -EFAULT; - koptlen = optlen + sizeof(struct group_filter) - - sizeof(struct compat_group_filter); - if (koptlen < GROUP_FILTER_SIZE(numsrc)) - return -EINVAL; - kgf = compat_alloc_user_space(koptlen); - if (!access_ok(kgf, koptlen) || - __put_user(interface, &kgf->gf_interface) || - __put_user(fmode, &kgf->gf_fmode) || - __put_user(numsrc, &kgf->gf_numsrc) || - copy_in_user(&kgf->gf_group, &gf32->gf_group, - sizeof(kgf->gf_group)) || - (numsrc && copy_in_user(kgf->gf_slist, gf32->gf_slist, - numsrc * sizeof(kgf->gf_slist[0])))) - return -EFAULT; - koptval = (char __user *)kgf; - break; - } - - default: - break; - } - return setsockopt(sock, level, optname, koptval, koptlen); -} -EXPORT_SYMBOL(compat_mc_setsockopt); - -int compat_mc_getsockopt(struct sock *sock, int level, int optname, - char __user *optval, int __user *optlen, - int (*getsockopt)(struct sock *, int, int, char __user *, int __user *)) -{ - struct compat_group_filter __user *gf32 = (void __user *)optval; - struct group_filter __user *kgf; - int __user *koptlen; - u32 interface, fmode, numsrc; - int klen, ulen, err; - - if (optname != MCAST_MSFILTER) - return getsockopt(sock, level, optname, optval, optlen); - - koptlen = compat_alloc_user_space(sizeof(*koptlen)); - if (!access_ok(optlen, sizeof(*optlen)) || - __get_user(ulen, optlen)) - return -EFAULT; - - /* adjust len for pad */ - klen = ulen + sizeof(*kgf) - sizeof(*gf32); - - if (klen < GROUP_FILTER_SIZE(0)) - return -EINVAL; - - if (!access_ok(koptlen, sizeof(*koptlen)) || - __put_user(klen, koptlen)) - return -EFAULT; - - /* have to allow space for previous compat_alloc_user_space, too */ - kgf = compat_alloc_user_space(klen+sizeof(*optlen)); - - if (!access_ok(gf32, __COMPAT_GF0_SIZE) || - __get_user(interface, &gf32->gf_interface) || - __get_user(fmode, &gf32->gf_fmode) || - __get_user(numsrc, &gf32->gf_numsrc) || - __put_user(interface, &kgf->gf_interface) || - __put_user(fmode, &kgf->gf_fmode) || - __put_user(numsrc, &kgf->gf_numsrc) || - copy_in_user(&kgf->gf_group, &gf32->gf_group, sizeof(kgf->gf_group))) - return -EFAULT; - - err = getsockopt(sock, level, optname, (char __user *)kgf, koptlen); - if (err) - return err; - - if (!access_ok(koptlen, sizeof(*koptlen)) || - __get_user(klen, koptlen)) - return -EFAULT; - - ulen = klen - (sizeof(*kgf)-sizeof(*gf32)); - - if (!access_ok(optlen, sizeof(*optlen)) || - __put_user(ulen, optlen)) - return -EFAULT; - - if (!access_ok(kgf, klen) || - !access_ok(gf32, ulen) || - __get_user(interface, &kgf->gf_interface) || - __get_user(fmode, &kgf->gf_fmode) || - __get_user(numsrc, &kgf->gf_numsrc) || - __put_user(interface, &gf32->gf_interface) || - __put_user(fmode, &gf32->gf_fmode) || - __put_user(numsrc, &gf32->gf_numsrc)) - return -EFAULT; - if (numsrc) { - int copylen; - - klen -= GROUP_FILTER_SIZE(0); - copylen = numsrc * sizeof(gf32->gf_slist[0]); - if (copylen > klen) - copylen = klen; - if (copy_in_user(gf32->gf_slist, kgf->gf_slist, copylen)) - return -EFAULT; - } - return err; -} -EXPORT_SYMBOL(compat_mc_getsockopt); - - /* Argument list sizes for compat_sys_socketcall */ #define AL(x) ((x) * sizeof(u32)) static unsigned char nas[21] = { diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index e951b743bed3..e64941c526b1 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -8,6 +8,7 @@ struct flow_rule *flow_rule_alloc(unsigned int num_actions) { struct flow_rule *rule; + int i; rule = kzalloc(struct_size(rule, action.entries, num_actions), GFP_KERNEL); @@ -15,6 +16,11 @@ struct flow_rule *flow_rule_alloc(unsigned int num_actions) return NULL; rule->action.num_entries = num_actions; + /* Pre-fill each action hw_stats with DONT_CARE. + * Caller can override this if it wants stats for a given action. + */ + for (i = 0; i < num_actions; i++) + rule->action.entries[i].hw_stats = FLOW_ACTION_HW_STATS_DONT_CARE; return rule; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index b607ea602774..37e4dba62460 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1771,6 +1771,7 @@ static struct neigh_table *neigh_find_table(int family) } const struct nla_policy nda_policy[NDA_MAX+1] = { + [NDA_UNSPEC] = { .strict_start_type = NDA_NH_ID }, [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) }, @@ -1781,6 +1782,7 @@ const struct nla_policy nda_policy[NDA_MAX+1] = { [NDA_IFINDEX] = { .type = NLA_U32 }, [NDA_MASTER] = { .type = NLA_U32 }, [NDA_PROTOCOL] = { .type = NLA_U8 }, + [NDA_NH_ID] = { .type = NLA_U32 }, }; static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1bf0c3d278e7..b8afefe6f6b6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3727,7 +3727,6 @@ int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) return 0; } -EXPORT_SYMBOL_GPL(skb_gro_receive_list); /** * skb_segment - Perform protocol segmentation on skb. @@ -4191,7 +4190,6 @@ done: NAPI_GRO_CB(skb)->same_flow = 1; return 0; } -EXPORT_SYMBOL_GPL(skb_gro_receive); #ifdef CONFIG_SKB_EXTENSIONS #define SKB_EXT_ALIGN_VALUE 8 @@ -6087,13 +6085,15 @@ static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id) /** * __skb_ext_alloc - allocate a new skb extensions storage * + * @flags: See kmalloc(). + * * Returns the newly allocated pointer. The pointer can later attached to a * skb via __skb_ext_set(). * Note: caller must handle the skb_ext as an opaque data. */ -struct skb_ext *__skb_ext_alloc(void) +struct skb_ext *__skb_ext_alloc(gfp_t flags) { - struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC); + struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, flags); if (new) { memset(new->offset, 0, sizeof(new->offset)); @@ -6188,7 +6188,7 @@ void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id) } else { newoff = SKB_EXT_CHUNKSIZEOF(*new); - new = __skb_ext_alloc(); + new = __skb_ext_alloc(GFP_ATOMIC); if (!new) return NULL; } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 0ddb13a6282b..b109cc8a6dd8 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -23,6 +23,7 @@ #include <net/pkt_sched.h> static int two __maybe_unused = 2; +static int three = 3; static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; static int max_skb_frags = MAX_SKB_FRAGS; @@ -39,6 +40,7 @@ EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net); * IPv6: reset all settings to default * 1 - Both inherit all current settings from init_net * 2 - Both reset all settings to default + * 3 - Both inherit all settings from current netns */ int sysctl_devconf_inherit_init_net __read_mostly; EXPORT_SYMBOL(sysctl_devconf_inherit_init_net); @@ -553,7 +555,7 @@ static struct ctl_table net_core_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = &two, + .extra2 = &three, }, { .procname = "high_order_alloc_disable", diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 1e5e08cc0bfc..650187d68851 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1082,6 +1082,7 @@ static const struct proto_ops inet6_dccp_ops = { .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT + .compat_ioctl = inet6_compat_ioctl, .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, #endif diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c index 658a8580b464..9ef54cdcf662 100644 --- a/net/ethtool/channels.c +++ b/net/ethtool/channels.c @@ -129,13 +129,13 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info) { struct nlattr *tb[ETHTOOL_A_CHANNELS_MAX + 1]; unsigned int from_channel, old_total, i; + bool mod = false, mod_combined = false; struct ethtool_channels channels = {}; struct ethnl_req_info req_info = {}; const struct nlattr *err_attr; const struct ethtool_ops *ops; struct net_device *dev; u32 max_rx_in_use = 0; - bool mod = false; int ret; ret = nlmsg_parse(info->nlhdr, GENL_HDRLEN, tb, @@ -170,7 +170,8 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info) ethnl_update_u32(&channels.other_count, tb[ETHTOOL_A_CHANNELS_OTHER_COUNT], &mod); ethnl_update_u32(&channels.combined_count, - tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT], &mod); + tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT], &mod_combined); + mod |= mod_combined; ret = 0; if (!mod) goto out_ops; @@ -193,6 +194,21 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info) goto out_ops; } + /* ensure there is at least one RX and one TX channel */ + if (!channels.combined_count && !channels.rx_count) + err_attr = tb[ETHTOOL_A_CHANNELS_RX_COUNT]; + else if (!channels.combined_count && !channels.tx_count) + err_attr = tb[ETHTOOL_A_CHANNELS_TX_COUNT]; + else + err_attr = NULL; + if (err_attr) { + if (mod_combined) + err_attr = tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT]; + ret = -EINVAL; + NL_SET_ERR_MSG_ATTR(info->extack, err_attr, "requested channel counts would result in no RX or TX channel being configured"); + goto out_ops; + } + /* ensure the new Rx count fits within the configured Rx flow * indirection table settings */ diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 74892623bacd..31e0b4e88a9d 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1669,6 +1669,12 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev, dev->ethtool_ops->get_channels(dev, &curr); + if (channels.rx_count == curr.rx_count && + channels.tx_count == curr.tx_count && + channels.combined_count == curr.combined_count && + channels.other_count == curr.other_count) + return 0; + /* ensure new counts are within the maximums */ if (channels.rx_count > curr.max_rx || channels.tx_count > curr.max_tx || @@ -1676,6 +1682,11 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev, channels.other_count > curr.max_other) return -EINVAL; + /* ensure there is at least one RX and one TX channel */ + if (!channels.combined_count && + (!channels.rx_count || !channels.tx_count)) + return -EINVAL; + /* ensure the new Rx count fits within the configured Rx flow * indirection table settings */ if (netif_is_rxfh_configured(dev) && diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c index 2740cde0a182..7f47ba89054e 100644 --- a/net/ethtool/linkstate.c +++ b/net/ethtool/linkstate.c @@ -2,6 +2,7 @@ #include "netlink.h" #include "common.h" +#include <linux/phy.h> struct linkstate_req_info { struct ethnl_req_info base; @@ -10,6 +11,8 @@ struct linkstate_req_info { struct linkstate_reply_data { struct ethnl_reply_data base; int link; + int sqi; + int sqi_max; }; #define LINKSTATE_REPDATA(__reply_base) \ @@ -20,8 +23,46 @@ linkstate_get_policy[ETHTOOL_A_LINKSTATE_MAX + 1] = { [ETHTOOL_A_LINKSTATE_UNSPEC] = { .type = NLA_REJECT }, [ETHTOOL_A_LINKSTATE_HEADER] = { .type = NLA_NESTED }, [ETHTOOL_A_LINKSTATE_LINK] = { .type = NLA_REJECT }, + [ETHTOOL_A_LINKSTATE_SQI] = { .type = NLA_REJECT }, + [ETHTOOL_A_LINKSTATE_SQI_MAX] = { .type = NLA_REJECT }, }; +static int linkstate_get_sqi(struct net_device *dev) +{ + struct phy_device *phydev = dev->phydev; + int ret; + + if (!phydev) + return -EOPNOTSUPP; + + mutex_lock(&phydev->lock); + if (!phydev->drv || !phydev->drv->get_sqi) + ret = -EOPNOTSUPP; + else + ret = phydev->drv->get_sqi(phydev); + mutex_unlock(&phydev->lock); + + return ret; +} + +static int linkstate_get_sqi_max(struct net_device *dev) +{ + struct phy_device *phydev = dev->phydev; + int ret; + + if (!phydev) + return -EOPNOTSUPP; + + mutex_lock(&phydev->lock); + if (!phydev->drv || !phydev->drv->get_sqi_max) + ret = -EOPNOTSUPP; + else + ret = phydev->drv->get_sqi_max(phydev); + mutex_unlock(&phydev->lock); + + return ret; +} + static int linkstate_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, struct genl_info *info) @@ -34,6 +75,19 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base, if (ret < 0) return ret; data->link = __ethtool_get_link(dev); + + ret = linkstate_get_sqi(dev); + if (ret < 0 && ret != -EOPNOTSUPP) + return ret; + + data->sqi = ret; + + ret = linkstate_get_sqi_max(dev); + if (ret < 0 && ret != -EOPNOTSUPP) + return ret; + + data->sqi_max = ret; + ethnl_ops_complete(dev); return 0; @@ -42,8 +96,19 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base, static int linkstate_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { - return nla_total_size(sizeof(u8)) /* LINKSTATE_LINK */ + struct linkstate_reply_data *data = LINKSTATE_REPDATA(reply_base); + int len; + + len = nla_total_size(sizeof(u8)) /* LINKSTATE_LINK */ + 0; + + if (data->sqi != -EOPNOTSUPP) + len += nla_total_size(sizeof(u32)); + + if (data->sqi_max != -EOPNOTSUPP) + len += nla_total_size(sizeof(u32)); + + return len; } static int linkstate_fill_reply(struct sk_buff *skb, @@ -56,6 +121,14 @@ static int linkstate_fill_reply(struct sk_buff *skb, nla_put_u8(skb, ETHTOOL_A_LINKSTATE_LINK, !!data->link)) return -EMSGSIZE; + if (data->sqi != -EOPNOTSUPP && + nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi)) + return -EMSGSIZE; + + if (data->sqi_max != -EOPNOTSUPP && + nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX, data->sqi_max)) + return -EMSGSIZE; + return 0; } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8f5c8c9409d3..02aa5cb3a4fd 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -116,6 +116,7 @@ #include <linux/mroute.h> #endif #include <net/l3mdev.h> +#include <net/compat.h> #include <trace/events/sock.h> @@ -974,17 +975,42 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) EXPORT_SYMBOL(inet_ioctl); #ifdef CONFIG_COMPAT +static int inet_compat_routing_ioctl(struct sock *sk, unsigned int cmd, + struct compat_rtentry __user *ur) +{ + compat_uptr_t rtdev; + struct rtentry rt; + + if (copy_from_user(&rt.rt_dst, &ur->rt_dst, + 3 * sizeof(struct sockaddr)) || + get_user(rt.rt_flags, &ur->rt_flags) || + get_user(rt.rt_metric, &ur->rt_metric) || + get_user(rt.rt_mtu, &ur->rt_mtu) || + get_user(rt.rt_window, &ur->rt_window) || + get_user(rt.rt_irtt, &ur->rt_irtt) || + get_user(rtdev, &ur->rt_dev)) + return -EFAULT; + + rt.rt_dev = compat_ptr(rtdev); + return ip_rt_ioctl(sock_net(sk), cmd, &rt); +} + static int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { + void __user *argp = compat_ptr(arg); struct sock *sk = sock->sk; - int err = -ENOIOCTLCMD; - - if (sk->sk_prot->compat_ioctl) - err = sk->sk_prot->compat_ioctl(sk, cmd, arg); - return err; + switch (cmd) { + case SIOCADDRT: + case SIOCDELRT: + return inet_compat_routing_ioctl(sk, cmd, argp); + default: + if (!sk->sk_prot->compat_ioctl) + return -ENOIOCTLCMD; + return sk->sk_prot->compat_ioctl(sk, cmd, arg); + } } -#endif +#endif /* CONFIG_COMPAT */ const struct proto_ops inet_stream_ops = { .family = PF_INET, diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index fc94f82f82c7..f048d0a188b7 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2666,11 +2666,24 @@ static __net_init int devinet_init_net(struct net *net) tbl[0].extra2 = net; #endif - if ((!IS_ENABLED(CONFIG_SYSCTL) || - sysctl_devconf_inherit_init_net != 2) && - !net_eq(net, &init_net)) { - memcpy(all, init_net.ipv4.devconf_all, sizeof(ipv4_devconf)); - memcpy(dflt, init_net.ipv4.devconf_dflt, sizeof(ipv4_devconf_dflt)); + if (!net_eq(net, &init_net)) { + if (IS_ENABLED(CONFIG_SYSCTL) && + sysctl_devconf_inherit_init_net == 3) { + /* copy from the current netns */ + memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all, + sizeof(ipv4_devconf)); + memcpy(dflt, + current->nsproxy->net_ns->ipv4.devconf_dflt, + sizeof(ipv4_devconf_dflt)); + } else if (!IS_ENABLED(CONFIG_SYSCTL) || + sysctl_devconf_inherit_init_net != 2) { + /* inherit == 0 or 1: copy from init_net */ + memcpy(all, init_net.ipv4.devconf_all, + sizeof(ipv4_devconf)); + memcpy(dflt, init_net.ipv4.devconf_dflt, + sizeof(ipv4_devconf_dflt)); + } + /* else inherit == 2: use compiled values */ } #ifdef CONFIG_SYSCTL diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 47f0502b2101..7b272bbed2b4 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2565,9 +2565,9 @@ done: } int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, - struct group_filter __user *optval, int __user *optlen) + struct sockaddr_storage __user *p) { - int err, i, count, copycount; + int i, count, copycount; struct sockaddr_in *psin; __be32 addr; struct ip_mc_socklist *pmc; @@ -2583,37 +2583,29 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, if (!ipv4_is_multicast(addr)) return -EINVAL; - err = -EADDRNOTAVAIL; - for_each_pmc_rtnl(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == addr && pmc->multi.imr_ifindex == gsf->gf_interface) break; } if (!pmc) /* must have a prior join */ - goto done; + return -EADDRNOTAVAIL; gsf->gf_fmode = pmc->sfmode; psl = rtnl_dereference(pmc->sflist); count = psl ? psl->sl_count : 0; copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; gsf->gf_numsrc = count; - if (put_user(GROUP_FILTER_SIZE(copycount), optlen) || - copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) { - return -EFAULT; - } - for (i = 0; i < copycount; i++) { + for (i = 0; i < copycount; i++, p++) { struct sockaddr_storage ss; psin = (struct sockaddr_in *)&ss; memset(&ss, 0, sizeof(ss)); psin->sin_family = AF_INET; psin->sin_addr.s_addr = psl->sl_addr[i]; - if (copy_to_user(&optval->gf_slist[i], &ss, sizeof(ss))) + if (copy_to_user(p, &ss, sizeof(ss))) return -EFAULT; } return 0; -done: - return err; } /* diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 0ce9b91ff55c..4e31f23e4117 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -768,45 +768,37 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu) } } -static int ipgre_tunnel_ioctl(struct net_device *dev, - struct ifreq *ifr, int cmd) +static int ipgre_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, + int cmd) { - struct ip_tunnel_parm p; int err; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - return -EFAULT; - if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || - p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) || - ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING))) + if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE || + p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) || + ((p->i_flags | p->o_flags) & (GRE_VERSION | GRE_ROUTING))) return -EINVAL; } - p.i_flags = gre_flags_to_tnl_flags(p.i_flags); - p.o_flags = gre_flags_to_tnl_flags(p.o_flags); + p->i_flags = gre_flags_to_tnl_flags(p->i_flags); + p->o_flags = gre_flags_to_tnl_flags(p->o_flags); - err = ip_tunnel_ioctl(dev, &p, cmd); + err = ip_tunnel_ctl(dev, p, cmd); if (err) return err; if (cmd == SIOCCHGTUNNEL) { struct ip_tunnel *t = netdev_priv(dev); - t->parms.i_flags = p.i_flags; - t->parms.o_flags = p.o_flags; + t->parms.i_flags = p->i_flags; + t->parms.o_flags = p->o_flags; if (strcmp(dev->rtnl_link_ops->kind, "erspan")) ipgre_link_update(dev, true); } - p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags); - p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags); - - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) - return -EFAULT; - + p->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags); + p->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags); return 0; } @@ -924,10 +916,11 @@ static const struct net_device_ops ipgre_netdev_ops = { .ndo_stop = ipgre_close, #endif .ndo_start_xmit = ipgre_xmit, - .ndo_do_ioctl = ipgre_tunnel_ioctl, + .ndo_do_ioctl = ip_tunnel_ioctl, .ndo_change_mtu = ip_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_iflink = ip_tunnel_get_iflink, + .ndo_tunnel_ctl = ipgre_tunnel_ctl, }; #define GRE_FEATURES (NETIF_F_SG | \ diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 8206047d70b6..a2469bc57cfe 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -587,6 +587,86 @@ static bool setsockopt_needs_rtnl(int optname) return false; } +static int set_mcast_msfilter(struct sock *sk, int ifindex, + int numsrc, int fmode, + struct sockaddr_storage *group, + struct sockaddr_storage *list) +{ + int msize = IP_MSFILTER_SIZE(numsrc); + struct ip_msfilter *msf; + struct sockaddr_in *psin; + int err, i; + + msf = kmalloc(msize, GFP_KERNEL); + if (!msf) + return -ENOBUFS; + + psin = (struct sockaddr_in *)group; + if (psin->sin_family != AF_INET) + goto Eaddrnotavail; + msf->imsf_multiaddr = psin->sin_addr.s_addr; + msf->imsf_interface = 0; + msf->imsf_fmode = fmode; + msf->imsf_numsrc = numsrc; + for (i = 0; i < numsrc; ++i) { + psin = (struct sockaddr_in *)&list[i]; + + if (psin->sin_family != AF_INET) + goto Eaddrnotavail; + msf->imsf_slist[i] = psin->sin_addr.s_addr; + } + err = ip_mc_msfilter(sk, msf, ifindex); + kfree(msf); + return err; + +Eaddrnotavail: + kfree(msf); + return -EADDRNOTAVAIL; +} + +static int do_mcast_group_source(struct sock *sk, int optname, + struct group_source_req *greqs) +{ + struct ip_mreq_source mreqs; + struct sockaddr_in *psin; + int omode, add, err; + + if (greqs->gsr_group.ss_family != AF_INET || + greqs->gsr_source.ss_family != AF_INET) + return -EADDRNOTAVAIL; + + psin = (struct sockaddr_in *)&greqs->gsr_group; + mreqs.imr_multiaddr = psin->sin_addr.s_addr; + psin = (struct sockaddr_in *)&greqs->gsr_source; + mreqs.imr_sourceaddr = psin->sin_addr.s_addr; + mreqs.imr_interface = 0; /* use index for mc_source */ + + if (optname == MCAST_BLOCK_SOURCE) { + omode = MCAST_EXCLUDE; + add = 1; + } else if (optname == MCAST_UNBLOCK_SOURCE) { + omode = MCAST_EXCLUDE; + add = 0; + } else if (optname == MCAST_JOIN_SOURCE_GROUP) { + struct ip_mreqn mreq; + + psin = (struct sockaddr_in *)&greqs->gsr_group; + mreq.imr_multiaddr = psin->sin_addr; + mreq.imr_address.s_addr = 0; + mreq.imr_ifindex = greqs->gsr_interface; + err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE); + if (err && err != -EADDRINUSE) + return err; + greqs->gsr_interface = mreq.imr_ifindex; + omode = MCAST_INCLUDE; + add = 1; + } else /* MCAST_LEAVE_SOURCE_GROUP */ { + omode = MCAST_INCLUDE; + add = 0; + } + return ip_mc_source(add, omode, sk, &mreqs, greqs->gsr_interface); +} + static int do_ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { @@ -1029,9 +1109,6 @@ static int do_ip_setsockopt(struct sock *sk, int level, case MCAST_UNBLOCK_SOURCE: { struct group_source_req greqs; - struct ip_mreq_source mreqs; - struct sockaddr_in *psin; - int omode, add; if (optlen != sizeof(struct group_source_req)) goto e_inval; @@ -1039,50 +1116,12 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = -EFAULT; break; } - if (greqs.gsr_group.ss_family != AF_INET || - greqs.gsr_source.ss_family != AF_INET) { - err = -EADDRNOTAVAIL; - break; - } - psin = (struct sockaddr_in *)&greqs.gsr_group; - mreqs.imr_multiaddr = psin->sin_addr.s_addr; - psin = (struct sockaddr_in *)&greqs.gsr_source; - mreqs.imr_sourceaddr = psin->sin_addr.s_addr; - mreqs.imr_interface = 0; /* use index for mc_source */ - - if (optname == MCAST_BLOCK_SOURCE) { - omode = MCAST_EXCLUDE; - add = 1; - } else if (optname == MCAST_UNBLOCK_SOURCE) { - omode = MCAST_EXCLUDE; - add = 0; - } else if (optname == MCAST_JOIN_SOURCE_GROUP) { - struct ip_mreqn mreq; - - psin = (struct sockaddr_in *)&greqs.gsr_group; - mreq.imr_multiaddr = psin->sin_addr; - mreq.imr_address.s_addr = 0; - mreq.imr_ifindex = greqs.gsr_interface; - err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE); - if (err && err != -EADDRINUSE) - break; - greqs.gsr_interface = mreq.imr_ifindex; - omode = MCAST_INCLUDE; - add = 1; - } else /* MCAST_LEAVE_SOURCE_GROUP */ { - omode = MCAST_INCLUDE; - add = 0; - } - err = ip_mc_source(add, omode, sk, &mreqs, - greqs.gsr_interface); + err = do_mcast_group_source(sk, optname, &greqs); break; } case MCAST_MSFILTER: { - struct sockaddr_in *psin; - struct ip_msfilter *msf = NULL; struct group_filter *gsf = NULL; - int msize, i, ifindex; if (optlen < GROUP_FILTER_SIZE(0)) goto e_inval; @@ -1095,7 +1134,6 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = PTR_ERR(gsf); break; } - /* numsrc >= (4G-140)/128 overflow in 32 bits */ if (gsf->gf_numsrc >= 0x1ffffff || gsf->gf_numsrc > net->ipv4.sysctl_igmp_max_msf) { @@ -1106,36 +1144,10 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = -EINVAL; goto mc_msf_out; } - msize = IP_MSFILTER_SIZE(gsf->gf_numsrc); - msf = kmalloc(msize, GFP_KERNEL); - if (!msf) { - err = -ENOBUFS; - goto mc_msf_out; - } - ifindex = gsf->gf_interface; - psin = (struct sockaddr_in *)&gsf->gf_group; - if (psin->sin_family != AF_INET) { - err = -EADDRNOTAVAIL; - goto mc_msf_out; - } - msf->imsf_multiaddr = psin->sin_addr.s_addr; - msf->imsf_interface = 0; - msf->imsf_fmode = gsf->gf_fmode; - msf->imsf_numsrc = gsf->gf_numsrc; - err = -EADDRNOTAVAIL; - for (i = 0; i < gsf->gf_numsrc; ++i) { - psin = (struct sockaddr_in *)&gsf->gf_slist[i]; - - if (psin->sin_family != AF_INET) - goto mc_msf_out; - msf->imsf_slist[i] = psin->sin_addr.s_addr; - } - kfree(gsf); - gsf = NULL; - - err = ip_mc_msfilter(sk, msf, ifindex); + err = set_mcast_msfilter(sk, gsf->gf_interface, + gsf->gf_numsrc, gsf->gf_fmode, + &gsf->gf_group, gsf->gf_slist); mc_msf_out: - kfree(msf); kfree(gsf); break; } @@ -1272,9 +1284,113 @@ int compat_ip_setsockopt(struct sock *sk, int level, int optname, if (level != SOL_IP) return -ENOPROTOOPT; - if (optname >= MCAST_JOIN_GROUP && optname <= MCAST_MSFILTER) - return compat_mc_setsockopt(sk, level, optname, optval, optlen, - ip_setsockopt); + switch (optname) { + case MCAST_JOIN_GROUP: + case MCAST_LEAVE_GROUP: + { + struct compat_group_req __user *gr32 = (void __user *)optval; + struct group_req greq; + struct sockaddr_in *psin = (struct sockaddr_in *)&greq.gr_group; + struct ip_mreqn mreq; + + if (optlen < sizeof(struct compat_group_req)) + return -EINVAL; + + if (get_user(greq.gr_interface, &gr32->gr_interface) || + copy_from_user(&greq.gr_group, &gr32->gr_group, + sizeof(greq.gr_group))) + return -EFAULT; + + if (psin->sin_family != AF_INET) + return -EINVAL; + + memset(&mreq, 0, sizeof(mreq)); + mreq.imr_multiaddr = psin->sin_addr; + mreq.imr_ifindex = greq.gr_interface; + + rtnl_lock(); + lock_sock(sk); + if (optname == MCAST_JOIN_GROUP) + err = ip_mc_join_group(sk, &mreq); + else + err = ip_mc_leave_group(sk, &mreq); + release_sock(sk); + rtnl_unlock(); + return err; + } + case MCAST_JOIN_SOURCE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + case MCAST_BLOCK_SOURCE: + case MCAST_UNBLOCK_SOURCE: + { + struct compat_group_source_req __user *gsr32 = (void __user *)optval; + struct group_source_req greqs; + + if (optlen != sizeof(struct compat_group_source_req)) + return -EINVAL; + + if (get_user(greqs.gsr_interface, &gsr32->gsr_interface) || + copy_from_user(&greqs.gsr_group, &gsr32->gsr_group, + sizeof(greqs.gsr_group)) || + copy_from_user(&greqs.gsr_source, &gsr32->gsr_source, + sizeof(greqs.gsr_source))) + return -EFAULT; + + rtnl_lock(); + lock_sock(sk); + err = do_mcast_group_source(sk, optname, &greqs); + release_sock(sk); + rtnl_unlock(); + return err; + } + case MCAST_MSFILTER: + { + const int size0 = offsetof(struct compat_group_filter, gf_slist); + struct compat_group_filter *gf32; + void *p; + int n; + + if (optlen < size0) + return -EINVAL; + if (optlen > sysctl_optmem_max - 4) + return -ENOBUFS; + + p = kmalloc(optlen + 4, GFP_KERNEL); + if (!p) + return -ENOMEM; + gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */ + if (copy_from_user(gf32, optval, optlen)) { + err = -EFAULT; + goto mc_msf_out; + } + + n = gf32->gf_numsrc; + /* numsrc >= (4G-140)/128 overflow in 32 bits */ + if (n >= 0x1ffffff) { + err = -ENOBUFS; + goto mc_msf_out; + } + if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) { + err = -EINVAL; + goto mc_msf_out; + } + + rtnl_lock(); + lock_sock(sk); + /* numsrc >= (4G-140)/128 overflow in 32 bits */ + if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf) + err = -ENOBUFS; + else + err = set_mcast_msfilter(sk, gf32->gf_interface, + n, gf32->gf_fmode, + &gf32->gf_group, gf32->gf_slist); + release_sock(sk); + rtnl_unlock(); +mc_msf_out: + kfree(p); + return err; + } + } err = do_ip_setsockopt(sk, level, optname, optval, optlen); #ifdef CONFIG_NETFILTER @@ -1465,19 +1581,28 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, } case MCAST_MSFILTER: { + struct group_filter __user *p = (void __user *)optval; struct group_filter gsf; + const int size0 = offsetof(struct group_filter, gf_slist); + int num; - if (len < GROUP_FILTER_SIZE(0)) { + if (len < size0) { err = -EINVAL; goto out; } - if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) { + if (copy_from_user(&gsf, p, size0)) { err = -EFAULT; goto out; } - err = ip_mc_gsfget(sk, &gsf, - (struct group_filter __user *)optval, - optlen); + num = gsf.gf_numsrc; + err = ip_mc_gsfget(sk, &gsf, p->gf_slist); + if (err) + goto out; + if (gsf.gf_numsrc < num) + num = gsf.gf_numsrc; + if (put_user(GROUP_FILTER_SIZE(num), optlen) || + copy_to_user(p, &gsf, size0)) + err = -EFAULT; goto out; } case IP_MULTICAST_ALL: @@ -1590,9 +1715,47 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname, { int err; - if (optname == MCAST_MSFILTER) - return compat_mc_getsockopt(sk, level, optname, optval, optlen, - ip_getsockopt); + if (optname == MCAST_MSFILTER) { + const int size0 = offsetof(struct compat_group_filter, gf_slist); + struct compat_group_filter __user *p = (void __user *)optval; + struct compat_group_filter gf32; + struct group_filter gf; + int ulen, err; + int num; + + if (level != SOL_IP) + return -EOPNOTSUPP; + + if (get_user(ulen, optlen)) + return -EFAULT; + + if (ulen < size0) + return -EINVAL; + + if (copy_from_user(&gf32, p, size0)) + return -EFAULT; + + gf.gf_interface = gf32.gf_interface; + gf.gf_fmode = gf32.gf_fmode; + num = gf.gf_numsrc = gf32.gf_numsrc; + gf.gf_group = gf32.gf_group; + + rtnl_lock(); + lock_sock(sk); + err = ip_mc_gsfget(sk, &gf, p->gf_slist); + release_sock(sk); + rtnl_unlock(); + if (err) + return err; + if (gf.gf_numsrc < num) + num = gf.gf_numsrc; + ulen = GROUP_FILTER_SIZE(num) - (sizeof(gf) - sizeof(gf32)); + if (put_user(ulen, optlen) || + put_user(gf.gf_fmode, &p->gf_fmode) || + put_user(gf.gf_numsrc, &p->gf_numsrc)) + return -EFAULT; + return 0; + } err = do_ip_getsockopt(sk, level, optname, optval, optlen, MSG_CMSG_COMPAT); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index cd4b84310d92..f4f1d11eab50 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -860,7 +860,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn, netdev_state_change(dev); } -int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) +int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) { int err = 0; struct ip_tunnel *t = netdev_priv(dev); @@ -960,6 +960,20 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) done: return err; } +EXPORT_SYMBOL_GPL(ip_tunnel_ctl); + +int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct ip_tunnel_parm p; + int err; + + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + return -EFAULT; + err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd); + if (!err && copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + return -EFAULT; + return err; +} EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 1b4e6f298648..c8974360a99f 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -378,38 +378,31 @@ static int vti4_err(struct sk_buff *skb, u32 info) } static int -vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +vti_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) { int err = 0; - struct ip_tunnel_parm p; - - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - return -EFAULT; if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || - p.iph.ihl != 5) + if (p->iph.version != 4 || p->iph.protocol != IPPROTO_IPIP || + p->iph.ihl != 5) return -EINVAL; } - if (!(p.i_flags & GRE_KEY)) - p.i_key = 0; - if (!(p.o_flags & GRE_KEY)) - p.o_key = 0; + if (!(p->i_flags & GRE_KEY)) + p->i_key = 0; + if (!(p->o_flags & GRE_KEY)) + p->o_key = 0; - p.i_flags = VTI_ISVTI; + p->i_flags = VTI_ISVTI; - err = ip_tunnel_ioctl(dev, &p, cmd); + err = ip_tunnel_ctl(dev, p, cmd); if (err) return err; if (cmd != SIOCDELTUNNEL) { - p.i_flags |= GRE_KEY; - p.o_flags |= GRE_KEY; + p->i_flags |= GRE_KEY; + p->o_flags |= GRE_KEY; } - - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) - return -EFAULT; return 0; } @@ -417,10 +410,11 @@ static const struct net_device_ops vti_netdev_ops = { .ndo_init = vti_tunnel_init, .ndo_uninit = ip_tunnel_uninit, .ndo_start_xmit = vti_tunnel_xmit, - .ndo_do_ioctl = vti_tunnel_ioctl, + .ndo_do_ioctl = ip_tunnel_ioctl, .ndo_change_mtu = ip_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_iflink = ip_tunnel_get_iflink, + .ndo_tunnel_ctl = vti_tunnel_ctl, }; static void vti_tunnel_setup(struct net_device *dev) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 2f01cf6fa0de..df663baf2516 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -327,41 +327,29 @@ static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto) } static int -ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +ipip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) { - int err = 0; - struct ip_tunnel_parm p; - - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - return -EFAULT; - if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { - if (p.iph.version != 4 || - !ipip_tunnel_ioctl_verify_protocol(p.iph.protocol) || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) + if (p->iph.version != 4 || + !ipip_tunnel_ioctl_verify_protocol(p->iph.protocol) || + p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF))) return -EINVAL; } - p.i_key = p.o_key = 0; - p.i_flags = p.o_flags = 0; - err = ip_tunnel_ioctl(dev, &p, cmd); - if (err) - return err; - - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) - return -EFAULT; - - return 0; + p->i_key = p->o_key = 0; + p->i_flags = p->o_flags = 0; + return ip_tunnel_ctl(dev, p, cmd); } static const struct net_device_ops ipip_netdev_ops = { .ndo_init = ipip_tunnel_init, .ndo_uninit = ip_tunnel_uninit, .ndo_start_xmit = ipip_tunnel_xmit, - .ndo_do_ioctl = ipip_tunnel_ioctl, + .ndo_do_ioctl = ip_tunnel_ioctl, .ndo_change_mtu = ip_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_iflink = ip_tunnel_get_iflink, + .ndo_tunnel_ctl = ipip_tunnel_ctl, }; #define IPIP_FEATURES (NETIF_F_SG | \ diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 5c218db2dede..d3e9b80a57de 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -421,37 +421,6 @@ static void ipmr_free_table(struct mr_table *mrt) /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ -static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) -{ - struct net *net = dev_net(dev); - - dev_close(dev); - - dev = __dev_get_by_name(net, "tunl0"); - if (dev) { - const struct net_device_ops *ops = dev->netdev_ops; - struct ifreq ifr; - struct ip_tunnel_parm p; - - memset(&p, 0, sizeof(p)); - p.iph.daddr = v->vifc_rmt_addr.s_addr; - p.iph.saddr = v->vifc_lcl_addr.s_addr; - p.iph.version = 4; - p.iph.ihl = 5; - p.iph.protocol = IPPROTO_IPIP; - sprintf(p.name, "dvmrp%d", v->vifc_vifi); - ifr.ifr_ifru.ifru_data = (__force void __user *)&p; - - if (ops->ndo_do_ioctl) { - mm_segment_t oldfs = get_fs(); - - set_fs(KERNEL_DS); - ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL); - set_fs(oldfs); - } - } -} - /* Initialize ipmr pimreg/tunnel in_device */ static bool ipmr_init_vif_indev(const struct net_device *dev) { @@ -471,51 +440,52 @@ static bool ipmr_init_vif_indev(const struct net_device *dev) static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) { - struct net_device *dev; - - dev = __dev_get_by_name(net, "tunl0"); + struct net_device *tunnel_dev, *new_dev; + struct ip_tunnel_parm p = { }; + int err; - if (dev) { - const struct net_device_ops *ops = dev->netdev_ops; - int err; - struct ifreq ifr; - struct ip_tunnel_parm p; + tunnel_dev = __dev_get_by_name(net, "tunl0"); + if (!tunnel_dev) + goto out; - memset(&p, 0, sizeof(p)); - p.iph.daddr = v->vifc_rmt_addr.s_addr; - p.iph.saddr = v->vifc_lcl_addr.s_addr; - p.iph.version = 4; - p.iph.ihl = 5; - p.iph.protocol = IPPROTO_IPIP; - sprintf(p.name, "dvmrp%d", v->vifc_vifi); - ifr.ifr_ifru.ifru_data = (__force void __user *)&p; + p.iph.daddr = v->vifc_rmt_addr.s_addr; + p.iph.saddr = v->vifc_lcl_addr.s_addr; + p.iph.version = 4; + p.iph.ihl = 5; + p.iph.protocol = IPPROTO_IPIP; + sprintf(p.name, "dvmrp%d", v->vifc_vifi); - if (ops->ndo_do_ioctl) { - mm_segment_t oldfs = get_fs(); + if (!tunnel_dev->netdev_ops->ndo_tunnel_ctl) + goto out; + err = tunnel_dev->netdev_ops->ndo_tunnel_ctl(tunnel_dev, &p, + SIOCADDTUNNEL); + if (err) + goto out; - set_fs(KERNEL_DS); - err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); - set_fs(oldfs); - } else { - err = -EOPNOTSUPP; - } - dev = NULL; - - if (err == 0 && - (dev = __dev_get_by_name(net, p.name)) != NULL) { - dev->flags |= IFF_MULTICAST; - if (!ipmr_init_vif_indev(dev)) - goto failure; - if (dev_open(dev, NULL)) - goto failure; - dev_hold(dev); - } - } - return dev; + new_dev = __dev_get_by_name(net, p.name); + if (!new_dev) + goto out; -failure: - unregister_netdevice(dev); - return NULL; + new_dev->flags |= IFF_MULTICAST; + if (!ipmr_init_vif_indev(new_dev)) + goto out_unregister; + if (dev_open(new_dev, NULL)) + goto out_unregister; + dev_hold(new_dev); + err = dev_set_allmulti(new_dev, 1); + if (err) { + dev_close(new_dev); + tunnel_dev->netdev_ops->ndo_tunnel_ctl(tunnel_dev, &p, + SIOCDELTUNNEL); + dev_put(new_dev); + new_dev = ERR_PTR(err); + } + return new_dev; + +out_unregister: + unregister_netdevice(new_dev); +out: + return ERR_PTR(-ENOBUFS); } #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) @@ -867,14 +837,8 @@ static int vif_add(struct net *net, struct mr_table *mrt, break; case VIFF_TUNNEL: dev = ipmr_new_tunnel(net, vifc); - if (!dev) - return -ENOBUFS; - err = dev_set_allmulti(dev, 1); - if (err) { - ipmr_del_tunnel(dev, vifc); - dev_put(dev); - return err; - } + if (IS_ERR(dev)) + return PTR_ERR(dev); break; case VIFF_USE_IFINDEX: case 0: diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 3957364d556c..c337e73e02dd 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -33,8 +33,20 @@ static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = { [NHA_ENCAP] = { .type = NLA_NESTED }, [NHA_GROUPS] = { .type = NLA_FLAG }, [NHA_MASTER] = { .type = NLA_U32 }, + [NHA_FDB] = { .type = NLA_FLAG }, }; +static int call_nexthop_notifiers(struct net *net, + enum fib_event_type event_type, + struct nexthop *nh) +{ + int err; + + err = atomic_notifier_call_chain(&net->nexthop.notifier_chain, + event_type, nh); + return notifier_to_errno(err); +} + static unsigned int nh_dev_hashfn(unsigned int val) { unsigned int mask = NH_DEV_HASHSIZE - 1; @@ -107,6 +119,7 @@ static struct nexthop *nexthop_alloc(void) INIT_LIST_HEAD(&nh->fi_list); INIT_LIST_HEAD(&nh->f6i_list); INIT_LIST_HEAD(&nh->grp_list); + INIT_LIST_HEAD(&nh->fdb_list); } return nh; } @@ -227,6 +240,9 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, if (nla_put_u32(skb, NHA_ID, nh->id)) goto nla_put_failure; + if (nh->is_fdb_nh && nla_put_flag(skb, NHA_FDB)) + goto nla_put_failure; + if (nh->is_group) { struct nh_group *nhg = rtnl_dereference(nh->nh_grp); @@ -241,7 +257,7 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, if (nla_put_flag(skb, NHA_BLACKHOLE)) goto nla_put_failure; goto out; - } else { + } else if (!nh->is_fdb_nh) { const struct net_device *dev; dev = nhi->fib_nhc.nhc_dev; @@ -387,12 +403,35 @@ static bool valid_group_nh(struct nexthop *nh, unsigned int npaths, return true; } +static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family, + struct netlink_ext_ack *extack) +{ + struct nh_info *nhi; + + if (!nh->is_fdb_nh) { + NL_SET_ERR_MSG(extack, "FDB nexthop group can only have fdb nexthops"); + return -EINVAL; + } + + nhi = rtnl_dereference(nh->nh_info); + if (*nh_family == AF_UNSPEC) { + *nh_family = nhi->family; + } else if (*nh_family != nhi->family) { + NL_SET_ERR_MSG(extack, "FDB nexthop group cannot have mixed family nexthops"); + return -EINVAL; + } + + return 0; +} + static int nh_check_attr_group(struct net *net, struct nlattr *tb[], struct netlink_ext_ack *extack) { unsigned int len = nla_len(tb[NHA_GROUP]); + u8 nh_family = AF_UNSPEC; struct nexthop_grp *nhg; unsigned int i, j; + u8 nhg_fdb = 0; if (len & (sizeof(struct nexthop_grp) - 1)) { NL_SET_ERR_MSG(extack, @@ -421,6 +460,8 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], } } + if (tb[NHA_FDB]) + nhg_fdb = 1; nhg = nla_data(tb[NHA_GROUP]); for (i = 0; i < len; ++i) { struct nexthop *nh; @@ -432,11 +473,20 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], } if (!valid_group_nh(nh, len, extack)) return -EINVAL; + + if (nhg_fdb && nh_check_attr_fdb_group(nh, &nh_family, extack)) + return -EINVAL; + + if (!nhg_fdb && nh->is_fdb_nh) { + NL_SET_ERR_MSG(extack, "Non FDB nexthop group cannot have fdb nexthops"); + return -EINVAL; + } } for (i = NHA_GROUP + 1; i < __NHA_MAX; ++i) { if (!tb[i]) continue; - + if (tb[NHA_FDB]) + continue; NL_SET_ERR_MSG(extack, "No other attributes can be set in nexthop groups"); return -EINVAL; @@ -495,6 +545,9 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash) if (hash > atomic_read(&nhge->upper_bound)) continue; + if (nhge->nh->is_fdb_nh) + return nhge->nh; + /* nexthops always check if it is good and does * not rely on a sysctl for this behavior */ @@ -564,6 +617,11 @@ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, { struct nh_info *nhi; + if (nh->is_fdb_nh) { + NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); + return -EINVAL; + } + /* fib6_src is unique to a fib6_info and limits the ability to cache * routes in fib6_nh within a nexthop that is potentially shared * across multiple fib entries. If the config wants to use source @@ -640,6 +698,12 @@ int fib_check_nexthop(struct nexthop *nh, u8 scope, { int err = 0; + if (nh->is_fdb_nh) { + NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop"); + err = -EINVAL; + goto out; + } + if (nh->is_group) { struct nh_group *nhg; @@ -773,6 +837,8 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) bool do_flush = false; struct fib_info *fi; + call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh); + list_for_each_entry(fi, &nh->fi_list, nh_list) { fi->fib_flags |= RTNH_F_DEAD; do_flush = true; @@ -1125,6 +1191,9 @@ static struct nexthop *nexthop_create_group(struct net *net, nh_group_rebalance(nhg); } + if (cfg->nh_fdb) + nh->is_fdb_nh = 1; + rcu_assign_pointer(nh->nh_grp, nhg); return nh; @@ -1152,7 +1221,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh, .fc_encap = cfg->nh_encap, .fc_encap_type = cfg->nh_encap_type, }; - u32 tb_id = l3mdev_fib_table(cfg->dev); + u32 tb_id = (cfg->dev ? l3mdev_fib_table(cfg->dev) : RT_TABLE_MAIN); int err; err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack); @@ -1161,6 +1230,9 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh, goto out; } + if (nh->is_fdb_nh) + goto out; + /* sets nh_dev if successful */ err = fib_check_nh(net, fib_nh, tb_id, 0, extack); if (!err) { @@ -1186,6 +1258,7 @@ static int nh_create_ipv6(struct net *net, struct nexthop *nh, .fc_flags = cfg->nh_flags, .fc_encap = cfg->nh_encap, .fc_encap_type = cfg->nh_encap_type, + .fc_is_fdb = cfg->nh_fdb, }; int err; @@ -1227,6 +1300,9 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg, nhi->family = cfg->nh_family; nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK; + if (cfg->nh_fdb) + nh->is_fdb_nh = 1; + if (cfg->nh_blackhole) { nhi->reject_nh = 1; cfg->nh_ifindex = net->loopback_dev->ifindex; @@ -1248,7 +1324,8 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg, } /* add the entry to the device based hash */ - nexthop_devhash_add(net, nhi); + if (!nh->is_fdb_nh) + nexthop_devhash_add(net, nhi); rcu_assign_pointer(nh->nh_info, nhi); @@ -1352,6 +1429,19 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, if (tb[NHA_ID]) cfg->nh_id = nla_get_u32(tb[NHA_ID]); + if (tb[NHA_FDB]) { + if (tb[NHA_OIF] || tb[NHA_BLACKHOLE] || + tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE]) { + NL_SET_ERR_MSG(extack, "Fdb attribute can not be used with encap, oif or blackhole"); + goto out; + } + if (nhm->nh_flags) { + NL_SET_ERR_MSG(extack, "Unsupported nexthop flags in ancillary header"); + goto out; + } + cfg->nh_fdb = nla_get_flag(tb[NHA_FDB]); + } + if (tb[NHA_GROUP]) { if (nhm->nh_family != AF_UNSPEC) { NL_SET_ERR_MSG(extack, "Invalid family for group"); @@ -1375,8 +1465,8 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, if (tb[NHA_BLACKHOLE]) { if (tb[NHA_GATEWAY] || tb[NHA_OIF] || - tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE]) { - NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway or oif"); + tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE] || tb[NHA_FDB]) { + NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway, oif, encap or fdb"); goto out; } @@ -1385,26 +1475,28 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, goto out; } - if (!tb[NHA_OIF]) { - NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole nexthops"); + if (!cfg->nh_fdb && !tb[NHA_OIF]) { + NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole and non-fdb nexthops"); goto out; } - cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]); - if (cfg->nh_ifindex) - cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex); + if (!cfg->nh_fdb && tb[NHA_OIF]) { + cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]); + if (cfg->nh_ifindex) + cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex); - if (!cfg->dev) { - NL_SET_ERR_MSG(extack, "Invalid device index"); - goto out; - } else if (!(cfg->dev->flags & IFF_UP)) { - NL_SET_ERR_MSG(extack, "Nexthop device is not up"); - err = -ENETDOWN; - goto out; - } else if (!netif_carrier_ok(cfg->dev)) { - NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down"); - err = -ENETDOWN; - goto out; + if (!cfg->dev) { + NL_SET_ERR_MSG(extack, "Invalid device index"); + goto out; + } else if (!(cfg->dev->flags & IFF_UP)) { + NL_SET_ERR_MSG(extack, "Nexthop device is not up"); + err = -ENETDOWN; + goto out; + } else if (!netif_carrier_ok(cfg->dev)) { + NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down"); + err = -ENETDOWN; + goto out; + } } err = -EINVAL; @@ -1633,7 +1725,7 @@ static bool nh_dump_filtered(struct nexthop *nh, int dev_idx, int master_idx, static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, int *master_idx, bool *group_filter, - struct netlink_callback *cb) + bool *fdb_filter, struct netlink_callback *cb) { struct netlink_ext_ack *extack = cb->extack; struct nlattr *tb[NHA_MAX + 1]; @@ -1670,6 +1762,9 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, case NHA_GROUPS: *group_filter = true; break; + case NHA_FDB: + *fdb_filter = true; + break; default: NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request"); return -EINVAL; @@ -1688,17 +1783,17 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, /* rtnl */ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) { + bool group_filter = false, fdb_filter = false; struct nhmsg *nhm = nlmsg_data(cb->nlh); int dev_filter_idx = 0, master_idx = 0; struct net *net = sock_net(skb->sk); struct rb_root *root = &net->nexthop.rb_root; - bool group_filter = false; struct rb_node *node; int idx = 0, s_idx; int err; err = nh_valid_dump_req(cb->nlh, &dev_filter_idx, &master_idx, - &group_filter, cb); + &group_filter, &fdb_filter, cb); if (err < 0) return err; @@ -1783,6 +1878,19 @@ static struct notifier_block nh_netdev_notifier = { .notifier_call = nh_netdev_event, }; +int register_nexthop_notifier(struct net *net, struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&net->nexthop.notifier_chain, nb); +} +EXPORT_SYMBOL(register_nexthop_notifier); + +int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&net->nexthop.notifier_chain, + nb); +} +EXPORT_SYMBOL(unregister_nexthop_notifier); + static void __net_exit nexthop_net_exit(struct net *net) { rtnl_lock(); @@ -1799,6 +1907,7 @@ static int __net_init nexthop_net_init(struct net *net) net->nexthop.devhash = kzalloc(sz, GFP_KERNEL); if (!net->nexthop.devhash) return -ENOMEM; + ATOMIC_INIT_NOTIFIER_HEAD(&net->nexthop.notifier_chain); return 0; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index fd885f06c4ed..09cfbf5dd7ce 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2783,6 +2783,33 @@ put: in6_dev_put(in6_dev); } +static int addrconf_set_sit_dstaddr(struct net *net, struct net_device *dev, + struct in6_ifreq *ireq) +{ + struct ip_tunnel_parm p = { }; + int err; + + if (!(ipv6_addr_type(&ireq->ifr6_addr) & IPV6_ADDR_COMPATv4)) + return -EADDRNOTAVAIL; + + p.iph.daddr = ireq->ifr6_addr.s6_addr32[3]; + p.iph.version = 4; + p.iph.ihl = 5; + p.iph.protocol = IPPROTO_IPV6; + p.iph.ttl = 64; + + if (!dev->netdev_ops->ndo_tunnel_ctl) + return -EOPNOTSUPP; + err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, SIOCADDTUNNEL); + if (err) + return err; + + dev = __dev_get_by_name(net, p.name); + if (!dev) + return -ENOBUFS; + return dev_open(dev, NULL); +} + /* * Set destination address. * Special case for SIT interfaces where we create a new "virtual" @@ -2790,61 +2817,19 @@ put: */ int addrconf_set_dstaddr(struct net *net, void __user *arg) { - struct in6_ifreq ireq; struct net_device *dev; - int err = -EINVAL; - - rtnl_lock(); + struct in6_ifreq ireq; + int err = -ENODEV; - err = -EFAULT; + if (!IS_ENABLED(CONFIG_IPV6_SIT)) + return -ENODEV; if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) - goto err_exit; + return -EFAULT; + rtnl_lock(); dev = __dev_get_by_index(net, ireq.ifr6_ifindex); - - err = -ENODEV; - if (!dev) - goto err_exit; - -#if IS_ENABLED(CONFIG_IPV6_SIT) - if (dev->type == ARPHRD_SIT) { - const struct net_device_ops *ops = dev->netdev_ops; - struct ifreq ifr; - struct ip_tunnel_parm p; - - err = -EADDRNOTAVAIL; - if (!(ipv6_addr_type(&ireq.ifr6_addr) & IPV6_ADDR_COMPATv4)) - goto err_exit; - - memset(&p, 0, sizeof(p)); - p.iph.daddr = ireq.ifr6_addr.s6_addr32[3]; - p.iph.saddr = 0; - p.iph.version = 4; - p.iph.ihl = 5; - p.iph.protocol = IPPROTO_IPV6; - p.iph.ttl = 64; - ifr.ifr_ifru.ifru_data = (__force void __user *)&p; - - if (ops->ndo_do_ioctl) { - mm_segment_t oldfs = get_fs(); - - set_fs(KERNEL_DS); - err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); - set_fs(oldfs); - } else - err = -EOPNOTSUPP; - - if (err == 0) { - err = -ENOBUFS; - dev = __dev_get_by_name(net, p.name); - if (!dev) - goto err_exit; - err = dev_open(dev, NULL); - } - } -#endif - -err_exit: + if (dev && dev->type == ARPHRD_SIT) + err = addrconf_set_sit_dstaddr(net, dev, &ireq); rtnl_unlock(); return err; } @@ -6991,9 +6976,26 @@ static int __net_init addrconf_init_net(struct net *net) goto err_alloc_dflt; if (IS_ENABLED(CONFIG_SYSCTL) && - sysctl_devconf_inherit_init_net == 1 && !net_eq(net, &init_net)) { - memcpy(all, init_net.ipv6.devconf_all, sizeof(ipv6_devconf)); - memcpy(dflt, init_net.ipv6.devconf_dflt, sizeof(ipv6_devconf_dflt)); + !net_eq(net, &init_net)) { + switch (sysctl_devconf_inherit_init_net) { + case 1: /* copy from init_net */ + memcpy(all, init_net.ipv6.devconf_all, + sizeof(ipv6_devconf)); + memcpy(dflt, init_net.ipv6.devconf_dflt, + sizeof(ipv6_devconf_dflt)); + break; + case 3: /* copy from the current netns */ + memcpy(all, current->nsproxy->net_ns->ipv6.devconf_all, + sizeof(ipv6_devconf)); + memcpy(dflt, + current->nsproxy->net_ns->ipv6.devconf_dflt, + sizeof(ipv6_devconf_dflt)); + break; + case 0: + case 2: + /* use compiled values */ + break; + } } /* these will be inherited by all namespaces */ diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 3b6fcc0c321a..0625a97a8894 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -60,6 +60,7 @@ #include <net/calipso.h> #include <net/seg6.h> #include <net/rpl.h> +#include <net/compat.h> #include <linux/uaccess.h> #include <linux/mroute6.h> @@ -545,21 +546,25 @@ EXPORT_SYMBOL(inet6_getname); int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { + void __user *argp = (void __user *)arg; struct sock *sk = sock->sk; struct net *net = sock_net(sk); switch (cmd) { case SIOCADDRT: - case SIOCDELRT: - - return ipv6_route_ioctl(net, cmd, (void __user *)arg); + case SIOCDELRT: { + struct in6_rtmsg rtmsg; + if (copy_from_user(&rtmsg, argp, sizeof(rtmsg))) + return -EFAULT; + return ipv6_route_ioctl(net, cmd, &rtmsg); + } case SIOCSIFADDR: - return addrconf_add_ifaddr(net, (void __user *) arg); + return addrconf_add_ifaddr(net, argp); case SIOCDIFADDR: - return addrconf_del_ifaddr(net, (void __user *) arg); + return addrconf_del_ifaddr(net, argp); case SIOCSIFDSTADDR: - return addrconf_set_dstaddr(net, (void __user *) arg); + return addrconf_set_dstaddr(net, argp); default: if (!sk->sk_prot->ioctl) return -ENOIOCTLCMD; @@ -570,6 +575,56 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } EXPORT_SYMBOL(inet6_ioctl); +#ifdef CONFIG_COMPAT +struct compat_in6_rtmsg { + struct in6_addr rtmsg_dst; + struct in6_addr rtmsg_src; + struct in6_addr rtmsg_gateway; + u32 rtmsg_type; + u16 rtmsg_dst_len; + u16 rtmsg_src_len; + u32 rtmsg_metric; + u32 rtmsg_info; + u32 rtmsg_flags; + s32 rtmsg_ifindex; +}; + +static int inet6_compat_routing_ioctl(struct sock *sk, unsigned int cmd, + struct compat_in6_rtmsg __user *ur) +{ + struct in6_rtmsg rt; + + if (copy_from_user(&rt.rtmsg_dst, &ur->rtmsg_dst, + 3 * sizeof(struct in6_addr)) || + get_user(rt.rtmsg_type, &ur->rtmsg_type) || + get_user(rt.rtmsg_dst_len, &ur->rtmsg_dst_len) || + get_user(rt.rtmsg_src_len, &ur->rtmsg_src_len) || + get_user(rt.rtmsg_metric, &ur->rtmsg_metric) || + get_user(rt.rtmsg_info, &ur->rtmsg_info) || + get_user(rt.rtmsg_flags, &ur->rtmsg_flags) || + get_user(rt.rtmsg_ifindex, &ur->rtmsg_ifindex)) + return -EFAULT; + + + return ipv6_route_ioctl(sock_net(sk), cmd, &rt); +} + +int inet6_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + void __user *argp = compat_ptr(arg); + struct sock *sk = sock->sk; + + switch (cmd) { + case SIOCADDRT: + case SIOCDELRT: + return inet6_compat_routing_ioctl(sk, cmd, argp); + default: + return -ENOIOCTLCMD; + } +} +EXPORT_SYMBOL_GPL(inet6_compat_ioctl); +#endif /* CONFIG_COMPAT */ + INDIRECT_CALLABLE_DECLARE(int udpv6_sendmsg(struct sock *, struct msghdr *, size_t)); int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) @@ -631,6 +686,7 @@ const struct proto_ops inet6_stream_ops = { .read_sock = tcp_read_sock, .peek_len = tcp_peek_len, #ifdef CONFIG_COMPAT + .compat_ioctl = inet6_compat_ioctl, .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, #endif @@ -659,6 +715,7 @@ const struct proto_ops inet6_dgram_ops = { .sendpage = sock_no_sendpage, .set_peek_off = sk_set_peek_off, #ifdef CONFIG_COMPAT + .compat_ioctl = inet6_compat_ioctl, .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, #endif diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 4703b09808d0..821d96c720b9 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -89,6 +89,11 @@ struct ip6_tnl_net { struct ip6_tnl __rcu *collect_md_tun; }; +static inline int ip6_tnl_mpls_supported(void) +{ + return IS_ENABLED(CONFIG_MPLS); +} + static struct net_device_stats *ip6_get_stats(struct net_device *dev) { struct pcpu_sw_netstats tmp, sum = { 0 }; @@ -718,6 +723,20 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; } +static int +mplsip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + __u32 rel_info = ntohl(info); + int err, rel_msg = 0; + u8 rel_type = type; + u8 rel_code = code; + + err = ip6_tnl_err(skb, IPPROTO_MPLS, opt, &rel_type, &rel_code, + &rel_msg, &rel_info, offset); + return err; +} + static int ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, const struct ipv6hdr *ipv6h, struct sk_buff *skb) @@ -740,6 +759,14 @@ static int ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, return IP6_ECN_decapsulate(ipv6h, skb); } +static inline int mplsip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, + const struct ipv6hdr *ipv6h, + struct sk_buff *skb) +{ + /* ECN is not supported in AF_MPLS */ + return 0; +} + __u32 ip6_tnl_get_cap(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr) @@ -901,6 +928,11 @@ static const struct tnl_ptk_info tpi_v4 = { .proto = htons(ETH_P_IP), }; +static const struct tnl_ptk_info tpi_mpls = { + /* no tunnel info required for mplsip6. */ + .proto = htons(ETH_P_MPLS_UC), +}; + static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, const struct tnl_ptk_info *tpi, int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t, @@ -958,6 +990,12 @@ static int ip6ip6_rcv(struct sk_buff *skb) ip6ip6_dscp_ecn_decapsulate); } +static int mplsip6_rcv(struct sk_buff *skb) +{ + return ipxip6_rcv(skb, IPPROTO_MPLS, &tpi_mpls, + mplsip6_dscp_ecn_decapsulate); +} + struct ipv6_tel_txoption { struct ipv6_txoptions ops; __u8 dst_opt[8]; @@ -1232,6 +1270,8 @@ route_lookup: ipv6_push_frag_opts(skb, &opt.ops, &proto); } + skb_set_inner_ipproto(skb, proto); + skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); @@ -1253,22 +1293,22 @@ tx_err_dst_release: EXPORT_SYMBOL(ip6_tnl_xmit); static inline int -ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) +ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, + u8 protocol) { struct ip6_tnl *t = netdev_priv(dev); + struct ipv6hdr *ipv6h; const struct iphdr *iph; int encap_limit = -1; + __u16 offset; struct flowi6 fl6; - __u8 dsfield; + __u8 dsfield, orig_dsfield; __u32 mtu; u8 tproto; int err; - iph = ip_hdr(skb); - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - tproto = READ_ONCE(t->parms.proto); - if (tproto != IPPROTO_IPIP && tproto != 0) + if (tproto != protocol && tproto != 0) return -1; if (t->parms.collect_md) { @@ -1281,129 +1321,100 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) return -1; key = &tun_info->key; memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPIP; + fl6.flowi6_proto = protocol; fl6.saddr = key->u.ipv6.src; fl6.daddr = key->u.ipv6.dst; fl6.flowlabel = key->label; dsfield = key->tos; + switch (protocol) { + case IPPROTO_IPIP: + iph = ip_hdr(skb); + orig_dsfield = ipv4_get_dsfield(iph); + break; + case IPPROTO_IPV6: + ipv6h = ipv6_hdr(skb); + orig_dsfield = ipv6_get_dsfield(ipv6h); + break; + default: + orig_dsfield = dsfield; + break; + } } else { if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) encap_limit = t->parms.encap_limit; + if (protocol == IPPROTO_IPV6) { + offset = ip6_tnl_parse_tlv_enc_lim(skb, + skb_network_header(skb)); + /* ip6_tnl_parse_tlv_enc_lim() might have + * reallocated skb->head + */ + if (offset > 0) { + struct ipv6_tlv_tnl_enc_lim *tel; - memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPIP; - - if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - dsfield = ipv4_get_dsfield(iph); - else - dsfield = ip6_tclass(t->parms.flowinfo); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) - fl6.flowi6_mark = skb->mark; - else - fl6.flowi6_mark = t->parms.fwmark; - } - - fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); - dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph)); - - if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) - return -1; - - skb_set_inner_ipproto(skb, IPPROTO_IPIP); - - err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, - IPPROTO_IPIP); - if (err != 0) { - /* XXX: send ICMP error even if DF is not set. */ - if (err == -EMSGSIZE) - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(mtu)); - return -1; - } - - return 0; -} - -static inline int -ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct ip6_tnl *t = netdev_priv(dev); - struct ipv6hdr *ipv6h; - int encap_limit = -1; - __u16 offset; - struct flowi6 fl6; - __u8 dsfield; - __u32 mtu; - u8 tproto; - int err; - - ipv6h = ipv6_hdr(skb); - tproto = READ_ONCE(t->parms.proto); - if ((tproto != IPPROTO_IPV6 && tproto != 0) || - ip6_tnl_addr_conflict(t, ipv6h)) - return -1; - - if (t->parms.collect_md) { - struct ip_tunnel_info *tun_info; - const struct ip_tunnel_key *key; - - tun_info = skb_tunnel_info(skb); - if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || - ip_tunnel_info_af(tun_info) != AF_INET6)) - return -1; - key = &tun_info->key; - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPV6; - fl6.saddr = key->u.ipv6.src; - fl6.daddr = key->u.ipv6.dst; - fl6.flowlabel = key->label; - dsfield = key->tos; - } else { - offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); - /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */ - ipv6h = ipv6_hdr(skb); - if (offset > 0) { - struct ipv6_tlv_tnl_enc_lim *tel; - - tel = (void *)&skb_network_header(skb)[offset]; - if (tel->encap_limit == 0) { - icmpv6_send(skb, ICMPV6_PARAMPROB, - ICMPV6_HDR_FIELD, offset + 2); - return -1; + tel = (void *)&skb_network_header(skb)[offset]; + if (tel->encap_limit == 0) { + icmpv6_send(skb, ICMPV6_PARAMPROB, + ICMPV6_HDR_FIELD, offset + 2); + return -1; + } + encap_limit = tel->encap_limit - 1; } - encap_limit = tel->encap_limit - 1; - } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) { - encap_limit = t->parms.encap_limit; } memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPV6; + fl6.flowi6_proto = protocol; - if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - dsfield = ipv6_get_dsfield(ipv6h); - else - dsfield = ip6_tclass(t->parms.flowinfo); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) - fl6.flowlabel |= ip6_flowlabel(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; else fl6.flowi6_mark = t->parms.fwmark; + switch (protocol) { + case IPPROTO_IPIP: + iph = ip_hdr(skb); + orig_dsfield = ipv4_get_dsfield(iph); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + dsfield = orig_dsfield; + else + dsfield = ip6_tclass(t->parms.flowinfo); + break; + case IPPROTO_IPV6: + ipv6h = ipv6_hdr(skb); + orig_dsfield = ipv6_get_dsfield(ipv6h); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + dsfield = orig_dsfield; + else + dsfield = ip6_tclass(t->parms.flowinfo); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) + fl6.flowlabel |= ip6_flowlabel(ipv6h); + break; + default: + orig_dsfield = dsfield = ip6_tclass(t->parms.flowinfo); + break; + } } fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); - dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h)); + dsfield = INET_ECN_encapsulate(dsfield, orig_dsfield); if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; - skb_set_inner_ipproto(skb, IPPROTO_IPV6); - err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, - IPPROTO_IPV6); + protocol); if (err != 0) { + /* XXX: send ICMP error even if DF is not set. */ if (err == -EMSGSIZE) - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + switch (protocol) { + case IPPROTO_IPIP: + icmp_send(skb, ICMP_DEST_UNREACH, + ICMP_FRAG_NEEDED, htonl(mtu)); + break; + case IPPROTO_IPV6: + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + break; + default: + break; + } return -1; } @@ -1415,6 +1426,7 @@ ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct net_device_stats *stats = &t->dev->stats; + u8 ipproto; int ret; if (!pskb_inet_may_pull(skb)) @@ -1422,15 +1434,21 @@ ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev) switch (skb->protocol) { case htons(ETH_P_IP): - ret = ip4ip6_tnl_xmit(skb, dev); + ipproto = IPPROTO_IPIP; break; case htons(ETH_P_IPV6): - ret = ip6ip6_tnl_xmit(skb, dev); + if (ip6_tnl_addr_conflict(t, ipv6_hdr(skb))) + goto tx_err; + ipproto = IPPROTO_IPV6; + break; + case htons(ETH_P_MPLS_UC): + ipproto = IPPROTO_MPLS; break; default: goto tx_err; } + ret = ipxip6_tnl_xmit(skb, dev, ipproto); if (ret < 0) goto tx_err; @@ -2218,6 +2236,12 @@ static struct xfrm6_tunnel ip6ip6_handler __read_mostly = { .priority = 1, }; +static struct xfrm6_tunnel mplsip6_handler __read_mostly = { + .handler = mplsip6_rcv, + .err_handler = mplsip6_err, + .priority = 1, +}; + static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list) { struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); @@ -2332,6 +2356,15 @@ static int __init ip6_tunnel_init(void) pr_err("%s: can't register ip6ip6\n", __func__); goto out_ip6ip6; } + + if (ip6_tnl_mpls_supported()) { + err = xfrm6_tunnel_register(&mplsip6_handler, AF_MPLS); + if (err < 0) { + pr_err("%s: can't register mplsip6\n", __func__); + goto out_mplsip6; + } + } + err = rtnl_link_register(&ip6_link_ops); if (err < 0) goto rtnl_link_failed; @@ -2339,6 +2372,9 @@ static int __init ip6_tunnel_init(void) return 0; rtnl_link_failed: + if (ip6_tnl_mpls_supported()) + xfrm6_tunnel_deregister(&mplsip6_handler, AF_MPLS); +out_mplsip6: xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6); out_ip6ip6: xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET); @@ -2361,6 +2397,9 @@ static void __exit ip6_tunnel_cleanup(void) if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6)) pr_info("%s: can't deregister ip6ip6\n", __func__); + if (ip6_tnl_mpls_supported() && + xfrm6_tunnel_deregister(&mplsip6_handler, AF_MPLS)) + pr_info("%s: can't deregister mplsip6\n", __func__); unregister_pernet_device(&ip6_tnl_net_ops); } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index a0e50cc57e54..e10258c2210e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -136,6 +136,41 @@ static bool setsockopt_needs_rtnl(int optname) return false; } +static int do_ipv6_mcast_group_source(struct sock *sk, int optname, + struct group_source_req *greqs) +{ + int omode, add; + + if (greqs->gsr_group.ss_family != AF_INET6 || + greqs->gsr_source.ss_family != AF_INET6) + return -EADDRNOTAVAIL; + + if (optname == MCAST_BLOCK_SOURCE) { + omode = MCAST_EXCLUDE; + add = 1; + } else if (optname == MCAST_UNBLOCK_SOURCE) { + omode = MCAST_EXCLUDE; + add = 0; + } else if (optname == MCAST_JOIN_SOURCE_GROUP) { + struct sockaddr_in6 *psin6; + int retv; + + psin6 = (struct sockaddr_in6 *)&greqs->gsr_group; + retv = ipv6_sock_mc_join_ssm(sk, greqs->gsr_interface, + &psin6->sin6_addr, + MCAST_INCLUDE); + /* prior join w/ different source is ok */ + if (retv && retv != -EADDRINUSE) + return retv; + omode = MCAST_INCLUDE; + add = 1; + } else /* MCAST_LEAVE_SOURCE_GROUP */ { + omode = MCAST_INCLUDE; + add = 0; + } + return ip6_mc_source(add, omode, sk, greqs); +} + static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { @@ -715,7 +750,6 @@ done: case MCAST_UNBLOCK_SOURCE: { struct group_source_req greqs; - int omode, add; if (optlen < sizeof(struct group_source_req)) goto e_inval; @@ -723,34 +757,7 @@ done: retv = -EFAULT; break; } - if (greqs.gsr_group.ss_family != AF_INET6 || - greqs.gsr_source.ss_family != AF_INET6) { - retv = -EADDRNOTAVAIL; - break; - } - if (optname == MCAST_BLOCK_SOURCE) { - omode = MCAST_EXCLUDE; - add = 1; - } else if (optname == MCAST_UNBLOCK_SOURCE) { - omode = MCAST_EXCLUDE; - add = 0; - } else if (optname == MCAST_JOIN_SOURCE_GROUP) { - struct sockaddr_in6 *psin6; - - psin6 = (struct sockaddr_in6 *)&greqs.gsr_group; - retv = ipv6_sock_mc_join_ssm(sk, greqs.gsr_interface, - &psin6->sin6_addr, - MCAST_INCLUDE); - /* prior join w/ different source is ok */ - if (retv && retv != -EADDRINUSE) - break; - omode = MCAST_INCLUDE; - add = 1; - } else /* MCAST_LEAVE_SOURCE_GROUP */ { - omode = MCAST_INCLUDE; - add = 0; - } - retv = ip6_mc_source(add, omode, sk, &greqs); + retv = do_ipv6_mcast_group_source(sk, optname, &greqs); break; } case MCAST_MSFILTER: @@ -780,7 +787,7 @@ done: retv = -EINVAL; break; } - retv = ip6_mc_msfilter(sk, gsf); + retv = ip6_mc_msfilter(sk, gsf, gsf->gf_slist); kfree(gsf); break; @@ -973,9 +980,110 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, int optname, if (level != SOL_IPV6) return -ENOPROTOOPT; - if (optname >= MCAST_JOIN_GROUP && optname <= MCAST_MSFILTER) - return compat_mc_setsockopt(sk, level, optname, optval, optlen, - ipv6_setsockopt); + switch (optname) { + case MCAST_JOIN_GROUP: + case MCAST_LEAVE_GROUP: + { + struct compat_group_req __user *gr32 = (void __user *)optval; + struct group_req greq; + struct sockaddr_in6 *psin6 = (struct sockaddr_in6 *)&greq.gr_group; + + if (optlen < sizeof(struct compat_group_req)) + return -EINVAL; + + if (get_user(greq.gr_interface, &gr32->gr_interface) || + copy_from_user(&greq.gr_group, &gr32->gr_group, + sizeof(greq.gr_group))) + return -EFAULT; + + if (greq.gr_group.ss_family != AF_INET6) + return -EADDRNOTAVAIL; + + rtnl_lock(); + lock_sock(sk); + if (optname == MCAST_JOIN_GROUP) + err = ipv6_sock_mc_join(sk, greq.gr_interface, + &psin6->sin6_addr); + else + err = ipv6_sock_mc_drop(sk, greq.gr_interface, + &psin6->sin6_addr); + release_sock(sk); + rtnl_unlock(); + return err; + } + case MCAST_JOIN_SOURCE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + case MCAST_BLOCK_SOURCE: + case MCAST_UNBLOCK_SOURCE: + { + struct compat_group_source_req __user *gsr32 = (void __user *)optval; + struct group_source_req greqs; + + if (optlen < sizeof(struct compat_group_source_req)) + return -EINVAL; + + if (get_user(greqs.gsr_interface, &gsr32->gsr_interface) || + copy_from_user(&greqs.gsr_group, &gsr32->gsr_group, + sizeof(greqs.gsr_group)) || + copy_from_user(&greqs.gsr_source, &gsr32->gsr_source, + sizeof(greqs.gsr_source))) + return -EFAULT; + + rtnl_lock(); + lock_sock(sk); + err = do_ipv6_mcast_group_source(sk, optname, &greqs); + release_sock(sk); + rtnl_unlock(); + return err; + } + case MCAST_MSFILTER: + { + const int size0 = offsetof(struct compat_group_filter, gf_slist); + struct compat_group_filter *gf32; + void *p; + int n; + + if (optlen < size0) + return -EINVAL; + if (optlen > sysctl_optmem_max - 4) + return -ENOBUFS; + + p = kmalloc(optlen + 4, GFP_KERNEL); + if (!p) + return -ENOMEM; + + gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */ + if (copy_from_user(gf32, optval, optlen)) { + err = -EFAULT; + goto mc_msf_out; + } + + n = gf32->gf_numsrc; + /* numsrc >= (4G-140)/128 overflow in 32 bits */ + if (n >= 0x1ffffffU || + n > sysctl_mld_max_msf) { + err = -ENOBUFS; + goto mc_msf_out; + } + if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) { + err = -EINVAL; + goto mc_msf_out; + } + + rtnl_lock(); + lock_sock(sk); + err = ip6_mc_msfilter(sk, &(struct group_filter){ + .gf_interface = gf32->gf_interface, + .gf_group = gf32->gf_group, + .gf_fmode = gf32->gf_fmode, + .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist); + release_sock(sk); + rtnl_unlock(); +mc_msf_out: + kfree(p); + return err; + } + } err = do_ipv6_setsockopt(sk, level, optname, optval, optlen); #ifdef CONFIG_NETFILTER @@ -1048,18 +1156,28 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, break; case MCAST_MSFILTER: { + struct group_filter __user *p = (void __user *)optval; struct group_filter gsf; + const int size0 = offsetof(struct group_filter, gf_slist); + int num; int err; - if (len < GROUP_FILTER_SIZE(0)) + if (len < size0) return -EINVAL; - if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) + if (copy_from_user(&gsf, p, size0)) return -EFAULT; if (gsf.gf_group.ss_family != AF_INET6) return -EADDRNOTAVAIL; + num = gsf.gf_numsrc; lock_sock(sk); - err = ip6_mc_msfget(sk, &gsf, - (struct group_filter __user *)optval, optlen); + err = ip6_mc_msfget(sk, &gsf, p->gf_slist); + if (!err) { + if (num > gsf.gf_numsrc) + num = gsf.gf_numsrc; + if (put_user(GROUP_FILTER_SIZE(num), optlen) || + copy_to_user(p, &gsf, size0)) + err = -EFAULT; + } release_sock(sk); return err; } @@ -1428,9 +1546,44 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, if (level != SOL_IPV6) return -ENOPROTOOPT; - if (optname == MCAST_MSFILTER) - return compat_mc_getsockopt(sk, level, optname, optval, optlen, - ipv6_getsockopt); + if (optname == MCAST_MSFILTER) { + const int size0 = offsetof(struct compat_group_filter, gf_slist); + struct compat_group_filter __user *p = (void __user *)optval; + struct compat_group_filter gf32; + struct group_filter gf; + int ulen, err; + int num; + + if (get_user(ulen, optlen)) + return -EFAULT; + + if (ulen < size0) + return -EINVAL; + + if (copy_from_user(&gf32, p, size0)) + return -EFAULT; + + gf.gf_interface = gf32.gf_interface; + gf.gf_fmode = gf32.gf_fmode; + num = gf.gf_numsrc = gf32.gf_numsrc; + gf.gf_group = gf32.gf_group; + + if (gf.gf_group.ss_family != AF_INET6) + return -EADDRNOTAVAIL; + lock_sock(sk); + err = ip6_mc_msfget(sk, &gf, p->gf_slist); + release_sock(sk); + if (err) + return err; + if (num > gf.gf_numsrc) + num = gf.gf_numsrc; + ulen = GROUP_FILTER_SIZE(num) - (sizeof(gf)-sizeof(gf32)); + if (put_user(ulen, optlen) || + put_user(gf.gf_fmode, &p->gf_fmode) || + put_user(gf.gf_numsrc, &p->gf_numsrc)) + return -EFAULT; + return 0; + } err = do_ipv6_getsockopt(sk, level, optname, optval, optlen, MSG_CMSG_COMPAT); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index eaa4c2cc2fbb..7e12d2114158 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -457,7 +457,8 @@ done: return err; } -int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) +int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, + struct sockaddr_storage *list) { const struct in6_addr *group; struct ipv6_mc_socklist *pmc; @@ -509,10 +510,10 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) goto done; } newpsl->sl_max = newpsl->sl_count = gsf->gf_numsrc; - for (i = 0; i < newpsl->sl_count; ++i) { + for (i = 0; i < newpsl->sl_count; ++i, ++list) { struct sockaddr_in6 *psin6; - psin6 = (struct sockaddr_in6 *)&gsf->gf_slist[i]; + psin6 = (struct sockaddr_in6 *)list; newpsl->sl_addr[i] = psin6->sin6_addr; } err = ip6_mc_add_src(idev, group, gsf->gf_fmode, @@ -547,7 +548,7 @@ done: } int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, - struct group_filter __user *optval, int __user *optlen) + struct sockaddr_storage *p) { int err, i, count, copycount; const struct in6_addr *group; @@ -592,14 +593,10 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; gsf->gf_numsrc = count; - if (put_user(GROUP_FILTER_SIZE(copycount), optlen) || - copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) { - return -EFAULT; - } /* changes to psl require the socket lock, and a write lock * on pmc->sflock. We have the socket lock so reading here is safe. */ - for (i = 0; i < copycount; i++) { + for (i = 0; i < copycount; i++, p++) { struct sockaddr_in6 *psin6; struct sockaddr_storage ss; @@ -607,7 +604,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, memset(&ss, 0, sizeof(ss)); psin6->sin6_family = AF_INET6; psin6->sin6_addr = psl->sl_addr[i]; - if (copy_to_user(&optval->gf_slist[i], &ss, sizeof(ss))) + if (copy_to_user(p, &ss, sizeof(ss))) return -EFAULT; } return 0; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 0028aa1d7869..8ef5a7b30524 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1377,6 +1377,7 @@ const struct proto_ops inet6_sockraw_ops = { .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT + .compat_ioctl = inet6_compat_ioctl, .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, #endif diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a8b4add0b545..82cbb46a2a4f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3421,6 +3421,11 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, #ifdef CONFIG_IPV6_ROUTER_PREF fib6_nh->last_probe = jiffies; #endif + if (cfg->fc_is_fdb) { + fib6_nh->fib_nh_gw6 = cfg->fc_gateway; + fib6_nh->fib_nh_gw_family = AF_INET6; + return 0; + } err = -ENODEV; if (cfg->fc_ifindex) { @@ -4336,41 +4341,29 @@ static void rtmsg_to_fib6_config(struct net *net, }; } -int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) +int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg) { struct fib6_config cfg; - struct in6_rtmsg rtmsg; int err; - switch (cmd) { - case SIOCADDRT: /* Add a route */ - case SIOCDELRT: /* Delete a route */ - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - return -EPERM; - err = copy_from_user(&rtmsg, arg, - sizeof(struct in6_rtmsg)); - if (err) - return -EFAULT; + if (cmd != SIOCADDRT && cmd != SIOCDELRT) + return -EINVAL; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; - rtmsg_to_fib6_config(net, &rtmsg, &cfg); + rtmsg_to_fib6_config(net, rtmsg, &cfg); - rtnl_lock(); - switch (cmd) { - case SIOCADDRT: - err = ip6_route_add(&cfg, GFP_KERNEL, NULL); - break; - case SIOCDELRT: - err = ip6_route_del(&cfg, NULL); - break; - default: - err = -EINVAL; - } - rtnl_unlock(); - - return err; + rtnl_lock(); + switch (cmd) { + case SIOCADDRT: + err = ip6_route_add(&cfg, GFP_KERNEL, NULL); + break; + case SIOCDELRT: + err = ip6_route_del(&cfg, NULL); + break; } - - return -EINVAL; + rtnl_unlock(); + return err; } /* diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 98954830c40b..1fbb4dfbb191 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -83,6 +83,13 @@ struct sit_net { struct net_device *fb_tunnel_dev; }; +static inline struct sit_net *dev_to_sit_net(struct net_device *dev) +{ + struct ip_tunnel *t = netdev_priv(dev); + + return net_generic(t->net, sit_net_id); +} + /* * Must be invoked with rcu_read_lock */ @@ -291,14 +298,18 @@ __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr) } -static int ipip6_tunnel_get_prl(struct ip_tunnel *t, - struct ip_tunnel_prl __user *a) +static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr) { + struct ip_tunnel_prl __user *a = ifr->ifr_ifru.ifru_data; + struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_prl kprl, *kp; struct ip_tunnel_prl_entry *prl; unsigned int cmax, c = 0, ca, len; int ret = 0; + if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) + return -EINVAL; + if (copy_from_user(&kprl, a, sizeof(kprl))) return -EFAULT; cmax = kprl.datalen / sizeof(kprl); @@ -441,6 +452,35 @@ out: return err; } +static int ipip6_tunnel_prl_ctl(struct net_device *dev, struct ifreq *ifr, + int cmd) +{ + struct ip_tunnel *t = netdev_priv(dev); + struct ip_tunnel_prl prl; + int err; + + if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) + return -EINVAL; + + if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl))) + return -EFAULT; + + switch (cmd) { + case SIOCDELPRL: + err = ipip6_tunnel_del_prl(t, &prl); + break; + case SIOCADDPRL: + case SIOCCHGPRL: + err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); + break; + } + dst_cache_reset(&t->dst_cache); + netdev_state_change(dev); + return err; +} + static int isatap_chksrc(struct sk_buff *skb, const struct iphdr *iph, struct ip_tunnel *t) { @@ -1151,7 +1191,53 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t, netdev_state_change(t->dev); return 0; } -#endif + +static int +ipip6_tunnel_get6rd(struct net_device *dev, struct ifreq *ifr) +{ + struct ip_tunnel *t = netdev_priv(dev); + struct ip_tunnel_6rd ip6rd; + struct ip_tunnel_parm p; + + if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) { + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + return -EFAULT; + t = ipip6_tunnel_locate(t->net, &p, 0); + } + if (!t) + t = netdev_priv(dev); + + ip6rd.prefix = t->ip6rd.prefix; + ip6rd.relay_prefix = t->ip6rd.relay_prefix; + ip6rd.prefixlen = t->ip6rd.prefixlen; + ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen; + if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd, sizeof(ip6rd))) + return -EFAULT; + return 0; +} + +static int +ipip6_tunnel_6rdctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct ip_tunnel *t = netdev_priv(dev); + struct ip_tunnel_6rd ip6rd; + int err; + + if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data, sizeof(ip6rd))) + return -EFAULT; + + if (cmd != SIOCDEL6RD) { + err = ipip6_tunnel_update_6rd(t, &ip6rd); + if (err < 0) + return err; + } else + ipip6_tunnel_clone_6rd(dev, dev_to_sit_net(dev)); + return 0; +} + +#endif /* CONFIG_IPV6_SIT_6RD */ static bool ipip6_valid_ip_proto(u8 ipproto) { @@ -1164,185 +1250,145 @@ static bool ipip6_valid_ip_proto(u8 ipproto) } static int -ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +__ipip6_tunnel_ioctl_validate(struct net *net, struct ip_tunnel_parm *p) +{ + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + if (!ipip6_valid_ip_proto(p->iph.protocol)) + return -EINVAL; + if (p->iph.version != 4 || + p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF))) + return -EINVAL; + + if (p->iph.ttl) + p->iph.frag_off |= htons(IP_DF); + return 0; +} + +static int +ipip6_tunnel_get(struct net_device *dev, struct ip_tunnel_parm *p) { - int err = 0; - struct ip_tunnel_parm p; - struct ip_tunnel_prl prl; struct ip_tunnel *t = netdev_priv(dev); - struct net *net = t->net; - struct sit_net *sitn = net_generic(net, sit_net_id); -#ifdef CONFIG_IPV6_SIT_6RD - struct ip_tunnel_6rd ip6rd; -#endif - switch (cmd) { - case SIOCGETTUNNEL: -#ifdef CONFIG_IPV6_SIT_6RD - case SIOCGET6RD: -#endif - if (dev == sitn->fb_tunnel_dev) { - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { - err = -EFAULT; - break; - } - t = ipip6_tunnel_locate(net, &p, 0); - if (!t) - t = netdev_priv(dev); - } + if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) + t = ipip6_tunnel_locate(t->net, p, 0); + if (!t) + t = netdev_priv(dev); + memcpy(p, &t->parms, sizeof(*p)); + return 0; +} - err = -EFAULT; - if (cmd == SIOCGETTUNNEL) { - memcpy(&p, &t->parms, sizeof(p)); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, - sizeof(p))) - goto done; -#ifdef CONFIG_IPV6_SIT_6RD +static int +ipip6_tunnel_add(struct net_device *dev, struct ip_tunnel_parm *p) +{ + struct ip_tunnel *t = netdev_priv(dev); + int err; + + err = __ipip6_tunnel_ioctl_validate(t->net, p); + if (err) + return err; + + t = ipip6_tunnel_locate(t->net, p, 1); + if (!t) + return -ENOBUFS; + return 0; +} + +static int +ipip6_tunnel_change(struct net_device *dev, struct ip_tunnel_parm *p) +{ + struct ip_tunnel *t = netdev_priv(dev); + int err; + + err = __ipip6_tunnel_ioctl_validate(t->net, p); + if (err) + return err; + + t = ipip6_tunnel_locate(t->net, p, 0); + if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) { + if (!t) + return -ENOENT; + } else { + if (t) { + if (t->dev != dev) + return -EEXIST; } else { - ip6rd.prefix = t->ip6rd.prefix; - ip6rd.relay_prefix = t->ip6rd.relay_prefix; - ip6rd.prefixlen = t->ip6rd.prefixlen; - ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen; - if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd, - sizeof(ip6rd))) - goto done; -#endif + if (((dev->flags & IFF_POINTOPOINT) && !p->iph.daddr) || + (!(dev->flags & IFF_POINTOPOINT) && p->iph.daddr)) + return -EINVAL; + t = netdev_priv(dev); } - err = 0; - break; - case SIOCADDTUNNEL: - case SIOCCHGTUNNEL: - err = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - goto done; + ipip6_tunnel_update(t, p, t->fwmark); + } - err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - goto done; - - err = -EINVAL; - if (!ipip6_valid_ip_proto(p.iph.protocol)) - goto done; - if (p.iph.version != 4 || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) - goto done; - if (p.iph.ttl) - p.iph.frag_off |= htons(IP_DF); - - t = ipip6_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); - - if (dev != sitn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { - if (t) { - if (t->dev != dev) { - err = -EEXIST; - break; - } - } else { - if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) || - (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) { - err = -EINVAL; - break; - } - t = netdev_priv(dev); - } + return 0; +} - ipip6_tunnel_update(t, &p, t->fwmark); - } +static int +ipip6_tunnel_del(struct net_device *dev, struct ip_tunnel_parm *p) +{ + struct ip_tunnel *t = netdev_priv(dev); - if (t) { - err = 0; - if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) - err = -EFAULT; - } else - err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); - break; + if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) { + t = ipip6_tunnel_locate(t->net, p, 0); + if (!t) + return -ENOENT; + if (t == netdev_priv(dev_to_sit_net(dev)->fb_tunnel_dev)) + return -EPERM; + dev = t->dev; + } + unregister_netdevice(dev); + return 0; +} +static int +ipip6_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) +{ + switch (cmd) { + case SIOCGETTUNNEL: + return ipip6_tunnel_get(dev, p); + case SIOCADDTUNNEL: + return ipip6_tunnel_add(dev, p); + case SIOCCHGTUNNEL: + return ipip6_tunnel_change(dev, p); case SIOCDELTUNNEL: - err = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - goto done; - - if (dev == sitn->fb_tunnel_dev) { - err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - goto done; - err = -ENOENT; - t = ipip6_tunnel_locate(net, &p, 0); - if (!t) - goto done; - err = -EPERM; - if (t == netdev_priv(sitn->fb_tunnel_dev)) - goto done; - dev = t->dev; - } - unregister_netdevice(dev); - err = 0; - break; + return ipip6_tunnel_del(dev, p); + default: + return -EINVAL; + } +} +static int +ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + switch (cmd) { + case SIOCGETTUNNEL: + case SIOCADDTUNNEL: + case SIOCCHGTUNNEL: + case SIOCDELTUNNEL: + return ip_tunnel_ioctl(dev, ifr, cmd); case SIOCGETPRL: - err = -EINVAL; - if (dev == sitn->fb_tunnel_dev) - goto done; - err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data); - break; - + return ipip6_tunnel_get_prl(dev, ifr); case SIOCADDPRL: case SIOCDELPRL: case SIOCCHGPRL: - err = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - goto done; - err = -EINVAL; - if (dev == sitn->fb_tunnel_dev) - goto done; - err = -EFAULT; - if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl))) - goto done; - - switch (cmd) { - case SIOCDELPRL: - err = ipip6_tunnel_del_prl(t, &prl); - break; - case SIOCADDPRL: - case SIOCCHGPRL: - err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); - break; - } - dst_cache_reset(&t->dst_cache); - netdev_state_change(dev); - break; - + return ipip6_tunnel_prl_ctl(dev, ifr, cmd); #ifdef CONFIG_IPV6_SIT_6RD + case SIOCGET6RD: + return ipip6_tunnel_get6rd(dev, ifr); case SIOCADD6RD: case SIOCCHG6RD: case SIOCDEL6RD: - err = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - goto done; - - err = -EFAULT; - if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data, - sizeof(ip6rd))) - goto done; - - if (cmd != SIOCDEL6RD) { - err = ipip6_tunnel_update_6rd(t, &ip6rd); - if (err < 0) - goto done; - } else - ipip6_tunnel_clone_6rd(dev, sitn); - - err = 0; - break; + return ipip6_tunnel_6rdctl(dev, ifr, cmd); #endif - default: - err = -EINVAL; + return -EINVAL; } - -done: - return err; } static const struct net_device_ops ipip6_netdev_ops = { @@ -1352,6 +1398,7 @@ static const struct net_device_ops ipip6_netdev_ops = { .ndo_do_ioctl = ipip6_tunnel_ioctl, .ndo_get_stats64 = ip_tunnel_get_stats64, .ndo_get_iflink = ip_tunnel_get_iflink, + .ndo_tunnel_ctl = ipip6_tunnel_ctl, }; static void ipip6_dev_free(struct net_device *dev) diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index 21e7b95ddbfa..06c02ebe6b9b 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -21,8 +21,14 @@ static struct xfrm6_tunnel __rcu *tunnel6_handlers __read_mostly; static struct xfrm6_tunnel __rcu *tunnel46_handlers __read_mostly; +static struct xfrm6_tunnel __rcu *tunnelmpls6_handlers __read_mostly; static DEFINE_MUTEX(tunnel6_mutex); +static inline int xfrm6_tunnel_mpls_supported(void) +{ + return IS_ENABLED(CONFIG_MPLS); +} + int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family) { struct xfrm6_tunnel __rcu **pprev; @@ -32,8 +38,21 @@ int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family) mutex_lock(&tunnel6_mutex); - for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers; - (t = rcu_dereference_protected(*pprev, + switch (family) { + case AF_INET6: + pprev = &tunnel6_handlers; + break; + case AF_INET: + pprev = &tunnel46_handlers; + break; + case AF_MPLS: + pprev = &tunnelmpls6_handlers; + break; + default: + goto err; + } + + for (; (t = rcu_dereference_protected(*pprev, lockdep_is_held(&tunnel6_mutex))) != NULL; pprev = &t->next) { if (t->priority > priority) @@ -62,8 +81,21 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family) mutex_lock(&tunnel6_mutex); - for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers; - (t = rcu_dereference_protected(*pprev, + switch (family) { + case AF_INET6: + pprev = &tunnel6_handlers; + break; + case AF_INET: + pprev = &tunnel46_handlers; + break; + case AF_MPLS: + pprev = &tunnelmpls6_handlers; + break; + default: + goto err; + } + + for (; (t = rcu_dereference_protected(*pprev, lockdep_is_held(&tunnel6_mutex))) != NULL; pprev = &t->next) { if (t == handler) { @@ -73,6 +105,7 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family) } } +err: mutex_unlock(&tunnel6_mutex); synchronize_net(); @@ -86,6 +119,24 @@ EXPORT_SYMBOL(xfrm6_tunnel_deregister); handler != NULL; \ handler = rcu_dereference(handler->next)) \ +static int tunnelmpls6_rcv(struct sk_buff *skb) +{ + struct xfrm6_tunnel *handler; + + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + goto drop; + + for_each_tunnel_rcu(tunnelmpls6_handlers, handler) + if (!handler->handler(skb)) + return 0; + + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + +drop: + kfree_skb(skb); + return 0; +} + static int tunnel6_rcv(struct sk_buff *skb) { struct xfrm6_tunnel *handler; @@ -146,6 +197,18 @@ static int tunnel46_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return -ENOENT; } +static int tunnelmpls6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + struct xfrm6_tunnel *handler; + + for_each_tunnel_rcu(tunnelmpls6_handlers, handler) + if (!handler->err_handler(skb, opt, type, code, offset, info)) + return 0; + + return -ENOENT; +} + static const struct inet6_protocol tunnel6_protocol = { .handler = tunnel6_rcv, .err_handler = tunnel6_err, @@ -158,6 +221,12 @@ static const struct inet6_protocol tunnel46_protocol = { .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; +static const struct inet6_protocol tunnelmpls6_protocol = { + .handler = tunnelmpls6_rcv, + .err_handler = tunnelmpls6_err, + .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, +}; + static int __init tunnel6_init(void) { if (inet6_add_protocol(&tunnel6_protocol, IPPROTO_IPV6)) { @@ -169,6 +238,13 @@ static int __init tunnel6_init(void) inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6); return -EAGAIN; } + if (xfrm6_tunnel_mpls_supported() && + inet6_add_protocol(&tunnelmpls6_protocol, IPPROTO_MPLS)) { + pr_err("%s: can't add protocol\n", __func__); + inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6); + inet6_del_protocol(&tunnel46_protocol, IPPROTO_IPIP); + return -EAGAIN; + } return 0; } @@ -178,6 +254,9 @@ static void __exit tunnel6_fini(void) pr_err("%s: can't remove protocol\n", __func__); if (inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6)) pr_err("%s: can't remove protocol\n", __func__); + if (xfrm6_tunnel_mpls_supported() && + inet6_del_protocol(&tunnelmpls6_protocol, IPPROTO_MPLS)) + pr_err("%s: can't remove protocol\n", __func__); } module_init(tunnel6_init); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index c4bdcbc84b07..ee0add15497d 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -16,6 +16,7 @@ #include <linux/module.h> #include <linux/netdevice.h> #include <linux/types.h> +#include <linux/limits.h> #include <linux/list.h> #include <linux/errno.h> #include <linux/kernel.h> @@ -36,8 +37,6 @@ static char iucv_userid[80]; -static const struct proto_ops iucv_sock_ops; - static struct proto iucv_proto = { .name = "AF_IUCV", .owner = THIS_MODULE, @@ -85,14 +84,11 @@ do { \ __ret; \ }) +static struct sock *iucv_accept_dequeue(struct sock *parent, + struct socket *newsock); static void iucv_sock_kill(struct sock *sk); static void iucv_sock_close(struct sock *sk); -static void iucv_sever_path(struct sock *, int); -static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev); -static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, - struct sk_buff *skb, u8 flags); static void afiucv_hs_callback_txnotify(struct sk_buff *, enum iucv_tx_notify); /* Call Back functions */ @@ -127,110 +123,6 @@ static inline void low_nmcpy(unsigned char *dst, char *src) memcpy(&dst[8], src, 8); } -static int afiucv_pm_prepare(struct device *dev) -{ -#ifdef CONFIG_PM_DEBUG - printk(KERN_WARNING "afiucv_pm_prepare\n"); -#endif - return 0; -} - -static void afiucv_pm_complete(struct device *dev) -{ -#ifdef CONFIG_PM_DEBUG - printk(KERN_WARNING "afiucv_pm_complete\n"); -#endif -} - -/** - * afiucv_pm_freeze() - Freeze PM callback - * @dev: AFIUCV dummy device - * - * Sever all established IUCV communication pathes - */ -static int afiucv_pm_freeze(struct device *dev) -{ - struct iucv_sock *iucv; - struct sock *sk; - -#ifdef CONFIG_PM_DEBUG - printk(KERN_WARNING "afiucv_pm_freeze\n"); -#endif - read_lock(&iucv_sk_list.lock); - sk_for_each(sk, &iucv_sk_list.head) { - iucv = iucv_sk(sk); - switch (sk->sk_state) { - case IUCV_DISCONN: - case IUCV_CLOSING: - case IUCV_CONNECTED: - iucv_sever_path(sk, 0); - break; - case IUCV_OPEN: - case IUCV_BOUND: - case IUCV_LISTEN: - case IUCV_CLOSED: - default: - break; - } - skb_queue_purge(&iucv->send_skb_q); - skb_queue_purge(&iucv->backlog_skb_q); - } - read_unlock(&iucv_sk_list.lock); - return 0; -} - -/** - * afiucv_pm_restore_thaw() - Thaw and restore PM callback - * @dev: AFIUCV dummy device - * - * socket clean up after freeze - */ -static int afiucv_pm_restore_thaw(struct device *dev) -{ - struct sock *sk; - -#ifdef CONFIG_PM_DEBUG - printk(KERN_WARNING "afiucv_pm_restore_thaw\n"); -#endif - read_lock(&iucv_sk_list.lock); - sk_for_each(sk, &iucv_sk_list.head) { - switch (sk->sk_state) { - case IUCV_CONNECTED: - sk->sk_err = EPIPE; - sk->sk_state = IUCV_DISCONN; - sk->sk_state_change(sk); - break; - case IUCV_DISCONN: - case IUCV_CLOSING: - case IUCV_LISTEN: - case IUCV_BOUND: - case IUCV_OPEN: - default: - break; - } - } - read_unlock(&iucv_sk_list.lock); - return 0; -} - -static const struct dev_pm_ops afiucv_pm_ops = { - .prepare = afiucv_pm_prepare, - .complete = afiucv_pm_complete, - .freeze = afiucv_pm_freeze, - .thaw = afiucv_pm_restore_thaw, - .restore = afiucv_pm_restore_thaw, -}; - -static struct device_driver af_iucv_driver = { - .owner = THIS_MODULE, - .name = "afiucv", - .bus = NULL, - .pm = &afiucv_pm_ops, -}; - -/* dummy device used as trigger for PM functions */ -static struct device *af_iucv_dev; - /** * iucv_msg_length() - Returns the length of an iucv message. * @msg: Pointer to struct iucv_message, MUST NOT be NULL @@ -435,6 +327,20 @@ static void iucv_sock_cleanup_listen(struct sock *parent) parent->sk_state = IUCV_CLOSED; } +static void iucv_sock_link(struct iucv_sock_list *l, struct sock *sk) +{ + write_lock_bh(&l->lock); + sk_add_node(sk, &l->head); + write_unlock_bh(&l->lock); +} + +static void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *sk) +{ + write_lock_bh(&l->lock); + sk_del_node_init(sk); + write_unlock_bh(&l->lock); +} + /* Kill socket (only if zapped and orphaned) */ static void iucv_sock_kill(struct sock *sk) { @@ -607,53 +513,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio, return sk; } -/* Create an IUCV socket */ -static int iucv_sock_create(struct net *net, struct socket *sock, int protocol, - int kern) -{ - struct sock *sk; - - if (protocol && protocol != PF_IUCV) - return -EPROTONOSUPPORT; - - sock->state = SS_UNCONNECTED; - - switch (sock->type) { - case SOCK_STREAM: - sock->ops = &iucv_sock_ops; - break; - case SOCK_SEQPACKET: - /* currently, proto ops can handle both sk types */ - sock->ops = &iucv_sock_ops; - break; - default: - return -ESOCKTNOSUPPORT; - } - - sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL, kern); - if (!sk) - return -ENOMEM; - - iucv_sock_init(sk, NULL); - - return 0; -} - -void iucv_sock_link(struct iucv_sock_list *l, struct sock *sk) -{ - write_lock_bh(&l->lock); - sk_add_node(sk, &l->head); - write_unlock_bh(&l->lock); -} - -void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *sk) -{ - write_lock_bh(&l->lock); - sk_del_node_init(sk); - write_unlock_bh(&l->lock); -} - -void iucv_accept_enqueue(struct sock *parent, struct sock *sk) +static void iucv_accept_enqueue(struct sock *parent, struct sock *sk) { unsigned long flags; struct iucv_sock *par = iucv_sk(parent); @@ -666,7 +526,7 @@ void iucv_accept_enqueue(struct sock *parent, struct sock *sk) sk_acceptq_added(parent); } -void iucv_accept_unlink(struct sock *sk) +static void iucv_accept_unlink(struct sock *sk) { unsigned long flags; struct iucv_sock *par = iucv_sk(iucv_sk(sk)->parent); @@ -679,7 +539,8 @@ void iucv_accept_unlink(struct sock *sk) sock_put(sk); } -struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock) +static struct sock *iucv_accept_dequeue(struct sock *parent, + struct socket *newsock) { struct iucv_sock *isk, *n; struct sock *sk; @@ -1100,7 +961,6 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, /* initialize defaults */ cmsg_done = 0; /* check for duplicate headers */ - txmsg.class = 0; /* iterate over control messages */ for_each_cmsghdr(cmsg, msg) { @@ -1511,8 +1371,8 @@ static inline __poll_t iucv_accept_poll(struct sock *parent) return 0; } -__poll_t iucv_sock_poll(struct file *file, struct socket *sock, - poll_table *wait) +static __poll_t iucv_sock_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; __poll_t mask = 0; @@ -1664,7 +1524,7 @@ static int iucv_sock_setsockopt(struct socket *sock, int level, int optname, switch (sk->sk_state) { case IUCV_OPEN: case IUCV_BOUND: - if (val < 1 || val > (u16)(~0)) + if (val < 1 || val > U16_MAX) rc = -EINVAL; else iucv->msglimit = val; @@ -2396,6 +2256,35 @@ static const struct proto_ops iucv_sock_ops = { .getsockopt = iucv_sock_getsockopt, }; +static int iucv_sock_create(struct net *net, struct socket *sock, int protocol, + int kern) +{ + struct sock *sk; + + if (protocol && protocol != PF_IUCV) + return -EPROTONOSUPPORT; + + sock->state = SS_UNCONNECTED; + + switch (sock->type) { + case SOCK_STREAM: + case SOCK_SEQPACKET: + /* currently, proto ops can handle both sk types */ + sock->ops = &iucv_sock_ops; + break; + default: + return -ESOCKTNOSUPPORT; + } + + sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL, kern); + if (!sk) + return -ENOMEM; + + iucv_sock_init(sk, NULL); + + return 0; +} + static const struct net_proto_family iucv_sock_family_ops = { .family = AF_IUCV, .owner = THIS_MODULE, @@ -2409,45 +2298,11 @@ static struct packet_type iucv_packet_type = { static int afiucv_iucv_init(void) { - int err; - - err = pr_iucv->iucv_register(&af_iucv_handler, 0); - if (err) - goto out; - /* establish dummy device */ - af_iucv_driver.bus = pr_iucv->bus; - err = driver_register(&af_iucv_driver); - if (err) - goto out_iucv; - af_iucv_dev = kzalloc(sizeof(struct device), GFP_KERNEL); - if (!af_iucv_dev) { - err = -ENOMEM; - goto out_driver; - } - dev_set_name(af_iucv_dev, "af_iucv"); - af_iucv_dev->bus = pr_iucv->bus; - af_iucv_dev->parent = pr_iucv->root; - af_iucv_dev->release = (void (*)(struct device *))kfree; - af_iucv_dev->driver = &af_iucv_driver; - err = device_register(af_iucv_dev); - if (err) - goto out_iucv_dev; - return 0; - -out_iucv_dev: - put_device(af_iucv_dev); -out_driver: - driver_unregister(&af_iucv_driver); -out_iucv: - pr_iucv->iucv_unregister(&af_iucv_handler, 0); -out: - return err; + return pr_iucv->iucv_register(&af_iucv_handler, 0); } static void afiucv_iucv_exit(void) { - device_unregister(af_iucv_dev); - driver_unregister(&af_iucv_driver); pr_iucv->iucv_unregister(&af_iucv_handler, 0); } diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 9a2d023842fe..19250a0c85d3 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -67,32 +67,9 @@ static int iucv_bus_match(struct device *dev, struct device_driver *drv) return 0; } -enum iucv_pm_states { - IUCV_PM_INITIAL = 0, - IUCV_PM_FREEZING = 1, - IUCV_PM_THAWING = 2, - IUCV_PM_RESTORING = 3, -}; -static enum iucv_pm_states iucv_pm_state; - -static int iucv_pm_prepare(struct device *); -static void iucv_pm_complete(struct device *); -static int iucv_pm_freeze(struct device *); -static int iucv_pm_thaw(struct device *); -static int iucv_pm_restore(struct device *); - -static const struct dev_pm_ops iucv_pm_ops = { - .prepare = iucv_pm_prepare, - .complete = iucv_pm_complete, - .freeze = iucv_pm_freeze, - .thaw = iucv_pm_thaw, - .restore = iucv_pm_restore, -}; - struct bus_type iucv_bus = { .name = "iucv", .match = iucv_bus_match, - .pm = &iucv_pm_ops, }; EXPORT_SYMBOL(iucv_bus); @@ -435,31 +412,6 @@ static void iucv_block_cpu(void *data) } /** - * iucv_block_cpu_almost - * @data: unused - * - * Allow connection-severed interrupts only on this cpu. - */ -static void iucv_block_cpu_almost(void *data) -{ - int cpu = smp_processor_id(); - union iucv_param *parm; - - /* Allow iucv control interrupts only */ - parm = iucv_param_irq[cpu]; - memset(parm, 0, sizeof(union iucv_param)); - parm->set_mask.ipmask = 0x08; - iucv_call_b2f0(IUCV_SETMASK, parm); - /* Allow iucv-severed interrupt only */ - memset(parm, 0, sizeof(union iucv_param)); - parm->set_mask.ipmask = 0x20; - iucv_call_b2f0(IUCV_SETCONTROLMASK, parm); - - /* Clear indication that iucv interrupts are allowed for this cpu. */ - cpumask_clear_cpu(cpu, &iucv_irq_cpumask); -} - -/** * iucv_declare_cpu * @data: unused * @@ -1834,146 +1786,6 @@ static void iucv_external_interrupt(struct ext_code ext_code, spin_unlock(&iucv_queue_lock); } -static int iucv_pm_prepare(struct device *dev) -{ - int rc = 0; - -#ifdef CONFIG_PM_DEBUG - printk(KERN_INFO "iucv_pm_prepare\n"); -#endif - if (dev->driver && dev->driver->pm && dev->driver->pm->prepare) - rc = dev->driver->pm->prepare(dev); - return rc; -} - -static void iucv_pm_complete(struct device *dev) -{ -#ifdef CONFIG_PM_DEBUG - printk(KERN_INFO "iucv_pm_complete\n"); -#endif - if (dev->driver && dev->driver->pm && dev->driver->pm->complete) - dev->driver->pm->complete(dev); -} - -/** - * iucv_path_table_empty() - determine if iucv path table is empty - * - * Returns 0 if there are still iucv pathes defined - * 1 if there are no iucv pathes defined - */ -static int iucv_path_table_empty(void) -{ - int i; - - for (i = 0; i < iucv_max_pathid; i++) { - if (iucv_path_table[i]) - return 0; - } - return 1; -} - -/** - * iucv_pm_freeze() - Freeze PM callback - * @dev: iucv-based device - * - * disable iucv interrupts - * invoke callback function of the iucv-based driver - * shut down iucv, if no iucv-pathes are established anymore - */ -static int iucv_pm_freeze(struct device *dev) -{ - int cpu; - struct iucv_irq_list *p, *n; - int rc = 0; - -#ifdef CONFIG_PM_DEBUG - printk(KERN_WARNING "iucv_pm_freeze\n"); -#endif - if (iucv_pm_state != IUCV_PM_FREEZING) { - for_each_cpu(cpu, &iucv_irq_cpumask) - smp_call_function_single(cpu, iucv_block_cpu_almost, - NULL, 1); - cancel_work_sync(&iucv_work); - list_for_each_entry_safe(p, n, &iucv_work_queue, list) { - list_del_init(&p->list); - iucv_sever_pathid(p->data.ippathid, - iucv_error_no_listener); - kfree(p); - } - } - iucv_pm_state = IUCV_PM_FREEZING; - if (dev->driver && dev->driver->pm && dev->driver->pm->freeze) - rc = dev->driver->pm->freeze(dev); - if (iucv_path_table_empty()) - iucv_disable(); - return rc; -} - -/** - * iucv_pm_thaw() - Thaw PM callback - * @dev: iucv-based device - * - * make iucv ready for use again: allocate path table, declare interrupt buffers - * and enable iucv interrupts - * invoke callback function of the iucv-based driver - */ -static int iucv_pm_thaw(struct device *dev) -{ - int rc = 0; - -#ifdef CONFIG_PM_DEBUG - printk(KERN_WARNING "iucv_pm_thaw\n"); -#endif - iucv_pm_state = IUCV_PM_THAWING; - if (!iucv_path_table) { - rc = iucv_enable(); - if (rc) - goto out; - } - if (cpumask_empty(&iucv_irq_cpumask)) { - if (iucv_nonsmp_handler) - /* enable interrupts on one cpu */ - iucv_allow_cpu(NULL); - else - /* enable interrupts on all cpus */ - iucv_setmask_mp(); - } - if (dev->driver && dev->driver->pm && dev->driver->pm->thaw) - rc = dev->driver->pm->thaw(dev); -out: - return rc; -} - -/** - * iucv_pm_restore() - Restore PM callback - * @dev: iucv-based device - * - * make iucv ready for use again: allocate path table, declare interrupt buffers - * and enable iucv interrupts - * invoke callback function of the iucv-based driver - */ -static int iucv_pm_restore(struct device *dev) -{ - int rc = 0; - -#ifdef CONFIG_PM_DEBUG - printk(KERN_WARNING "iucv_pm_restore %p\n", iucv_path_table); -#endif - if ((iucv_pm_state != IUCV_PM_RESTORING) && iucv_path_table) - pr_warn("Suspending Linux did not completely close all IUCV connections\n"); - iucv_pm_state = IUCV_PM_RESTORING; - if (cpumask_empty(&iucv_irq_cpumask)) { - rc = iucv_query_maxconn(); - rc = iucv_enable(); - if (rc) - goto out; - } - if (dev->driver && dev->driver->pm && dev->driver->pm->restore) - rc = dev->driver->pm->restore(dev); -out: - return rc; -} - struct iucv_interface iucv_if = { .message_receive = iucv_message_receive, .__message_receive = __iucv_message_receive, diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index d148766f40d1..fdfef926c591 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -758,6 +758,7 @@ static const struct proto_ops l2tp_ip6_ops = { .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT + .compat_ioctl = inet6_compat_ioctl, .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, #endif diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index a42e4ed5ab0e..fd30ea61336e 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1593,7 +1593,8 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, dev->type == ARPHRD_IPGRE || dev->type == ARPHRD_IP6GRE || dev->type == ARPHRD_SIT || - dev->type == ARPHRD_TUNNEL) { + dev->type == ARPHRD_TUNNEL || + dev->type == ARPHRD_TUNNEL6) { mdev = mpls_add_dev(dev); if (IS_ERR(mdev)) return notifier_from_errno(PTR_ERR(mdev)); diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 45497af23906..ece6f92cf7d1 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -516,7 +516,16 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, return ret; } - ack_size = TCPOLEN_MPTCP_DSS_ACK64; + if (subflow->use_64bit_ack) { + ack_size = TCPOLEN_MPTCP_DSS_ACK64; + opts->ext_copy.data_ack = msk->ack_seq; + opts->ext_copy.ack64 = 1; + } else { + ack_size = TCPOLEN_MPTCP_DSS_ACK32; + opts->ext_copy.data_ack32 = (uint32_t)(msk->ack_seq); + opts->ext_copy.ack64 = 0; + } + opts->ext_copy.use_ack = 1; /* Add kind/length/subtype/flag overhead if mapping is not populated */ if (dss_size == 0) @@ -524,10 +533,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, dss_size += ack_size; - opts->ext_copy.data_ack = msk->ack_seq; - opts->ext_copy.ack64 = 1; - opts->ext_copy.use_ack = 1; - *size = ALIGN(dss_size, 4); return true; } @@ -986,8 +991,13 @@ mp_capable_done: u8 flags = 0; if (mpext->use_ack) { - len += TCPOLEN_MPTCP_DSS_ACK64; - flags = MPTCP_DSS_HAS_ACK | MPTCP_DSS_ACK64; + flags = MPTCP_DSS_HAS_ACK; + if (mpext->ack64) { + len += TCPOLEN_MPTCP_DSS_ACK64; + flags |= MPTCP_DSS_ACK64; + } else { + len += TCPOLEN_MPTCP_DSS_ACK32; + } } if (mpext->use_map) { @@ -1004,8 +1014,13 @@ mp_capable_done: *ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags); if (mpext->use_ack) { - put_unaligned_be64(mpext->data_ack, ptr); - ptr += 2; + if (mpext->ack64) { + put_unaligned_be64(mpext->data_ack, ptr); + ptr += 2; + } else { + put_unaligned_be32(mpext->data_ack32, ptr); + ptr += 1; + } } if (mpext->use_map) { diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 1f52a0fa31ed..ba9d3d5c625f 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -367,8 +367,10 @@ static void mptcp_stop_timer(struct sock *sk) static bool mptcp_ext_cache_refill(struct mptcp_sock *msk) { + const struct sock *sk = (const struct sock *)msk; + if (!msk->cached_ext) - msk->cached_ext = __skb_ext_alloc(); + msk->cached_ext = __skb_ext_alloc(sk->sk_allocation); return !!msk->cached_ext; } @@ -510,20 +512,6 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, * fooled into a warning if we don't init here */ pfrag = sk_page_frag(sk); - while ((!retransmission && !mptcp_page_frag_refill(ssk, pfrag)) || - !mptcp_ext_cache_refill(msk)) { - ret = sk_stream_wait_memory(ssk, timeo); - if (ret) - return ret; - - /* if sk_stream_wait_memory() sleeps snd_una can change - * significantly, refresh the rtx queue - */ - mptcp_clean_una(sk); - - if (unlikely(__mptcp_needs_tcp_fallback(msk))) - return 0; - } if (!retransmission) { write_seq = &msk->write_seq; page = pfrag->page; @@ -590,7 +578,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, * access the skb after the sendpages call */ ret = do_tcp_sendpages(ssk, page, offset, psize, - msg->msg_flags | MSG_SENDPAGE_NOTLAST); + msg->msg_flags | MSG_SENDPAGE_NOTLAST | MSG_DONTWAIT); if (ret <= 0) return ret; @@ -653,6 +641,15 @@ out: return ret; } +static void mptcp_nospace(struct mptcp_sock *msk, struct socket *sock) +{ + clear_bit(MPTCP_SEND_SPACE, &msk->flags); + smp_mb__after_atomic(); /* msk->flags is changed by write_space cb */ + + /* enables sk->write_space() callbacks */ + set_bit(SOCK_NOSPACE, &sock->flags); +} + static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; @@ -660,19 +657,17 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) sock_owned_by_me((const struct sock *)msk); + if (!mptcp_ext_cache_refill(msk)) + return NULL; + mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); if (!sk_stream_memory_free(ssk)) { struct socket *sock = ssk->sk_socket; - if (sock) { - clear_bit(MPTCP_SEND_SPACE, &msk->flags); - smp_mb__after_atomic(); - - /* enables sk->write_space() callbacks */ - set_bit(SOCK_NOSPACE, &sock->flags); - } + if (sock) + mptcp_nospace(msk, sock); return NULL; } @@ -698,22 +693,19 @@ static void ssk_check_wmem(struct mptcp_sock *msk, struct sock *ssk) return; sock = READ_ONCE(ssk->sk_socket); - - if (sock) { - clear_bit(MPTCP_SEND_SPACE, &msk->flags); - smp_mb__after_atomic(); - /* set NOSPACE only after clearing SEND_SPACE flag */ - set_bit(SOCK_NOSPACE, &sock->flags); - } + if (sock) + mptcp_nospace(msk, sock); } static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { int mss_now = 0, size_goal = 0, ret = 0; struct mptcp_sock *msk = mptcp_sk(sk); + struct page_frag *pfrag; struct socket *ssock; size_t copied = 0; struct sock *ssk; + bool tx_ok; long timeo; if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL)) @@ -738,11 +730,29 @@ fallback: return ret >= 0 ? ret + copied : (copied ? copied : ret); } + pfrag = sk_page_frag(sk); +restart: mptcp_clean_una(sk); +wait_for_sndbuf: __mptcp_flush_join_list(msk); ssk = mptcp_subflow_get_send(msk); - while (!sk_stream_memory_free(sk) || !ssk) { + while (!sk_stream_memory_free(sk) || + !ssk || + !mptcp_page_frag_refill(ssk, pfrag)) { + if (ssk) { + /* make sure retransmit timer is + * running before we wait for memory. + * + * The retransmit timer might be needed + * to make the peer send an up-to-date + * MPTCP Ack. + */ + mptcp_set_timeout(sk, ssk); + if (!mptcp_timer_pending(sk)) + mptcp_reset_timer(sk); + } + ret = sk_stream_wait_memory(sk, &timeo); if (ret) goto out; @@ -759,11 +769,18 @@ fallback: pr_debug("conn_list->subflow=%p", ssk); lock_sock(ssk); - while (msg_data_left(msg)) { + tx_ok = msg_data_left(msg); + while (tx_ok) { ret = mptcp_sendmsg_frag(sk, ssk, msg, NULL, &timeo, &mss_now, &size_goal); - if (ret < 0) + if (ret < 0) { + if (ret == -EAGAIN && timeo > 0) { + mptcp_set_timeout(sk, ssk); + release_sock(ssk); + goto restart; + } break; + } if (ret == 0 && unlikely(__mptcp_needs_tcp_fallback(msk))) { /* Can happen for passive sockets: * 3WHS negotiated MPTCP, but first packet after is @@ -777,6 +794,50 @@ fallback: } copied += ret; + + tx_ok = msg_data_left(msg); + if (!tx_ok) + break; + + if (!sk_stream_memory_free(ssk) || + !mptcp_page_frag_refill(ssk, pfrag) || + !mptcp_ext_cache_refill(msk)) { + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + tcp_push(ssk, msg->msg_flags, mss_now, + tcp_sk(ssk)->nonagle, size_goal); + mptcp_set_timeout(sk, ssk); + release_sock(ssk); + goto restart; + } + + /* memory is charged to mptcp level socket as well, i.e. + * if msg is very large, mptcp socket may run out of buffer + * space. mptcp_clean_una() will release data that has + * been acked at mptcp level in the mean time, so there is + * a good chance we can continue sending data right away. + * + * Normally, when the tcp subflow can accept more data, then + * so can the MPTCP socket. However, we need to cope with + * peers that might lag behind in their MPTCP-level + * acknowledgements, i.e. data might have been acked at + * tcp level only. So, we must also check the MPTCP socket + * limits before we send more data. + */ + if (unlikely(!sk_stream_memory_free(sk))) { + tcp_push(ssk, msg->msg_flags, mss_now, + tcp_sk(ssk)->nonagle, size_goal); + mptcp_clean_una(sk); + if (!sk_stream_memory_free(sk)) { + /* can't send more for now, need to wait for + * MPTCP-level ACKs from peer. + * + * Wakeup will happen via mptcp_clean_una(). + */ + mptcp_set_timeout(sk, ssk); + release_sock(ssk); + goto wait_for_sndbuf; + } + } } mptcp_set_timeout(sk, ssk); @@ -1094,7 +1155,7 @@ static void mptcp_worker(struct work_struct *work) { struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work); struct sock *ssk, *sk = &msk->sk.icsk_inet.sk; - int orig_len, orig_offset, ret, mss_now = 0, size_goal = 0; + int orig_len, orig_offset, mss_now = 0, size_goal = 0; struct mptcp_data_frag *dfrag; u64 orig_write_seq; size_t copied = 0; @@ -1116,6 +1177,9 @@ static void mptcp_worker(struct work_struct *work) if (!dfrag) goto unlock; + if (!mptcp_ext_cache_refill(msk)) + goto reset_unlock; + ssk = mptcp_subflow_get_retrans(msk); if (!ssk) goto reset_unlock; @@ -1127,8 +1191,8 @@ static void mptcp_worker(struct work_struct *work) orig_offset = dfrag->offset; orig_write_seq = dfrag->data_seq; while (dfrag->data_len > 0) { - ret = mptcp_sendmsg_frag(sk, ssk, &msg, dfrag, &timeo, &mss_now, - &size_goal); + int ret = mptcp_sendmsg_frag(sk, ssk, &msg, dfrag, &timeo, + &mss_now, &size_goal); if (ret < 0) break; @@ -1136,6 +1200,9 @@ static void mptcp_worker(struct work_struct *work) copied += ret; dfrag->data_len -= ret; dfrag->offset += ret; + + if (!mptcp_ext_cache_refill(msk)) + break; } if (copied) tcp_push(ssk, msg.msg_flags, mss_now, tcp_sk(ssk)->nonagle, @@ -2001,6 +2068,7 @@ static const struct proto_ops mptcp_v6_stream_ops = { .mmap = sock_no_mmap, .sendpage = inet_sendpage, #ifdef CONFIG_COMPAT + .compat_ioctl = inet6_compat_ioctl, .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, #endif diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index e4ca6320ce76..f5adca93e8fb 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -290,6 +290,7 @@ struct mptcp_subflow_context { data_avail : 1, rx_eof : 1, data_fin_tx_enable : 1, + use_64bit_ack : 1, /* Set when we received a 64-bit DSN */ can_ack : 1; /* only after processing the remote a key */ u64 data_fin_tx_seq; u32 remote_nonce; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 53c75b0e5dce..0020d356233d 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -667,9 +667,11 @@ static enum mapping_status get_mapping_status(struct sock *ssk) if (!mpext->dsn64) { map_seq = expand_seq(subflow->map_seq, subflow->map_data_len, mpext->data_seq); + subflow->use_64bit_ack = 0; pr_debug("expanded seq=%llu", subflow->map_seq); } else { map_seq = mpext->data_seq; + subflow->use_64bit_ack = 1; } if (subflow->map_valid) { diff --git a/net/psample/psample.c b/net/psample/psample.c index 6f2fbc6b9eb2..34a74043840b 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -14,6 +14,8 @@ #include <net/genetlink.h> #include <net/psample.h> #include <linux/spinlock.h> +#include <net/ip_tunnels.h> +#include <net/dst_metadata.h> #define PSAMPLE_MAX_PACKET_SIZE 0xffff @@ -207,10 +209,155 @@ void psample_group_put(struct psample_group *group) } EXPORT_SYMBOL_GPL(psample_group_put); +static int __psample_ip_tun_to_nlattr(struct sk_buff *skb, + struct ip_tunnel_info *tun_info) +{ + unsigned short tun_proto = ip_tunnel_info_af(tun_info); + const void *tun_opts = ip_tunnel_info_opts(tun_info); + const struct ip_tunnel_key *tun_key = &tun_info->key; + int tun_opts_len = tun_info->options_len; + + if (tun_key->tun_flags & TUNNEL_KEY && + nla_put_be64(skb, PSAMPLE_TUNNEL_KEY_ATTR_ID, tun_key->tun_id, + PSAMPLE_TUNNEL_KEY_ATTR_PAD)) + return -EMSGSIZE; + + if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE && + nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE)) + return -EMSGSIZE; + + switch (tun_proto) { + case AF_INET: + if (tun_key->u.ipv4.src && + nla_put_in_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_SRC, + tun_key->u.ipv4.src)) + return -EMSGSIZE; + if (tun_key->u.ipv4.dst && + nla_put_in_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_DST, + tun_key->u.ipv4.dst)) + return -EMSGSIZE; + break; + case AF_INET6: + if (!ipv6_addr_any(&tun_key->u.ipv6.src) && + nla_put_in6_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV6_SRC, + &tun_key->u.ipv6.src)) + return -EMSGSIZE; + if (!ipv6_addr_any(&tun_key->u.ipv6.dst) && + nla_put_in6_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV6_DST, + &tun_key->u.ipv6.dst)) + return -EMSGSIZE; + break; + } + if (tun_key->tos && + nla_put_u8(skb, PSAMPLE_TUNNEL_KEY_ATTR_TOS, tun_key->tos)) + return -EMSGSIZE; + if (nla_put_u8(skb, PSAMPLE_TUNNEL_KEY_ATTR_TTL, tun_key->ttl)) + return -EMSGSIZE; + if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) && + nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) + return -EMSGSIZE; + if ((tun_key->tun_flags & TUNNEL_CSUM) && + nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_CSUM)) + return -EMSGSIZE; + if (tun_key->tp_src && + nla_put_be16(skb, PSAMPLE_TUNNEL_KEY_ATTR_TP_SRC, tun_key->tp_src)) + return -EMSGSIZE; + if (tun_key->tp_dst && + nla_put_be16(skb, PSAMPLE_TUNNEL_KEY_ATTR_TP_DST, tun_key->tp_dst)) + return -EMSGSIZE; + if ((tun_key->tun_flags & TUNNEL_OAM) && + nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_OAM)) + return -EMSGSIZE; + if (tun_opts_len) { + if (tun_key->tun_flags & TUNNEL_GENEVE_OPT && + nla_put(skb, PSAMPLE_TUNNEL_KEY_ATTR_GENEVE_OPTS, + tun_opts_len, tun_opts)) + return -EMSGSIZE; + else if (tun_key->tun_flags & TUNNEL_ERSPAN_OPT && + nla_put(skb, PSAMPLE_TUNNEL_KEY_ATTR_ERSPAN_OPTS, + tun_opts_len, tun_opts)) + return -EMSGSIZE; + } + + return 0; +} + +static int psample_ip_tun_to_nlattr(struct sk_buff *skb, + struct ip_tunnel_info *tun_info) +{ + struct nlattr *nla; + int err; + + nla = nla_nest_start_noflag(skb, PSAMPLE_ATTR_TUNNEL); + if (!nla) + return -EMSGSIZE; + + err = __psample_ip_tun_to_nlattr(skb, tun_info); + if (err) { + nla_nest_cancel(skb, nla); + return err; + } + + nla_nest_end(skb, nla); + + return 0; +} + +static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info) +{ + unsigned short tun_proto = ip_tunnel_info_af(tun_info); + const struct ip_tunnel_key *tun_key = &tun_info->key; + int tun_opts_len = tun_info->options_len; + int sum = 0; + + if (tun_key->tun_flags & TUNNEL_KEY) + sum += nla_total_size(sizeof(u64)); + + if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE) + sum += nla_total_size(0); + + switch (tun_proto) { + case AF_INET: + if (tun_key->u.ipv4.src) + sum += nla_total_size(sizeof(u32)); + if (tun_key->u.ipv4.dst) + sum += nla_total_size(sizeof(u32)); + break; + case AF_INET6: + if (!ipv6_addr_any(&tun_key->u.ipv6.src)) + sum += nla_total_size(sizeof(struct in6_addr)); + if (!ipv6_addr_any(&tun_key->u.ipv6.dst)) + sum += nla_total_size(sizeof(struct in6_addr)); + break; + } + if (tun_key->tos) + sum += nla_total_size(sizeof(u8)); + sum += nla_total_size(sizeof(u8)); /* TTL */ + if (tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) + sum += nla_total_size(0); + if (tun_key->tun_flags & TUNNEL_CSUM) + sum += nla_total_size(0); + if (tun_key->tp_src) + sum += nla_total_size(sizeof(u16)); + if (tun_key->tp_dst) + sum += nla_total_size(sizeof(u16)); + if (tun_key->tun_flags & TUNNEL_OAM) + sum += nla_total_size(0); + if (tun_opts_len) { + if (tun_key->tun_flags & TUNNEL_GENEVE_OPT) + sum += nla_total_size(tun_opts_len); + else if (tun_key->tun_flags & TUNNEL_ERSPAN_OPT) + sum += nla_total_size(tun_opts_len); + } + + return sum; +} + void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, u32 trunc_size, int in_ifindex, int out_ifindex, u32 sample_rate) { + struct ip_tunnel_info *tun_info; struct sk_buff *nl_skb; int data_len; int meta_len; @@ -224,6 +371,10 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, nla_total_size(sizeof(u32)) + /* group_num */ nla_total_size(sizeof(u32)); /* seq */ + tun_info = skb_tunnel_info(skb); + if (tun_info) + meta_len += psample_tunnel_meta_len(tun_info); + data_len = min(skb->len, trunc_size); if (meta_len + nla_total_size(data_len) > PSAMPLE_MAX_PACKET_SIZE) data_len = PSAMPLE_MAX_PACKET_SIZE - meta_len - NLA_HDRLEN @@ -278,6 +429,12 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, goto error; } + if (tun_info) { + ret = psample_ip_tun_to_nlattr(nl_skb, tun_info); + if (unlikely(ret < 0)) + goto error; + } + genlmsg_end(nl_skb, data); genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0, PSAMPLE_NL_MCGRP_SAMPLE, GFP_ATOMIC); diff --git a/net/rds/info.c b/net/rds/info.c index 03f6fd56d237..b6b46a8214a0 100644 --- a/net/rds/info.c +++ b/net/rds/info.c @@ -162,7 +162,6 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval, struct rds_info_lengths lens; unsigned long nr_pages = 0; unsigned long start; - unsigned long i; rds_info_func func; struct page **pages = NULL; int ret; @@ -193,7 +192,7 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval, ret = -ENOMEM; goto out; } - ret = get_user_pages_fast(start, nr_pages, FOLL_WRITE, pages); + ret = pin_user_pages_fast(start, nr_pages, FOLL_WRITE, pages); if (ret != nr_pages) { if (ret > 0) nr_pages = ret; @@ -235,8 +234,8 @@ call_func: ret = -EFAULT; out: - for (i = 0; pages && i < nr_pages; i++) - put_page(pages[i]); + if (pages) + unpin_user_pages(pages, nr_pages); kfree(pages); return ret; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index c87af430107a..ccfa0ab3e7f4 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -1032,6 +1032,7 @@ static const struct proto_ops inet6_seqpacket_ops = { .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, #ifdef CONFIG_COMPAT + .compat_ioctl = inet6_compat_ioctl, .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, #endif diff --git a/net/socket.c b/net/socket.c index 1c9a7260a41d..80422fc3c836 100644 --- a/net/socket.c +++ b/net/socket.c @@ -3366,94 +3366,6 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd, return err; } -struct rtentry32 { - u32 rt_pad1; - struct sockaddr rt_dst; /* target address */ - struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */ - struct sockaddr rt_genmask; /* target network mask (IP) */ - unsigned short rt_flags; - short rt_pad2; - u32 rt_pad3; - unsigned char rt_tos; - unsigned char rt_class; - short rt_pad4; - short rt_metric; /* +1 for binary compatibility! */ - /* char * */ u32 rt_dev; /* forcing the device at add */ - u32 rt_mtu; /* per route MTU/Window */ - u32 rt_window; /* Window clamping */ - unsigned short rt_irtt; /* Initial RTT */ -}; - -struct in6_rtmsg32 { - struct in6_addr rtmsg_dst; - struct in6_addr rtmsg_src; - struct in6_addr rtmsg_gateway; - u32 rtmsg_type; - u16 rtmsg_dst_len; - u16 rtmsg_src_len; - u32 rtmsg_metric; - u32 rtmsg_info; - u32 rtmsg_flags; - s32 rtmsg_ifindex; -}; - -static int routing_ioctl(struct net *net, struct socket *sock, - unsigned int cmd, void __user *argp) -{ - int ret; - void *r = NULL; - struct in6_rtmsg r6; - struct rtentry r4; - char devname[16]; - u32 rtdev; - mm_segment_t old_fs = get_fs(); - - if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */ - struct in6_rtmsg32 __user *ur6 = argp; - ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst), - 3 * sizeof(struct in6_addr)); - ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type)); - ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); - ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); - ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric)); - ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info)); - ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags)); - ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); - - r = (void *) &r6; - } else { /* ipv4 */ - struct rtentry32 __user *ur4 = argp; - ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst), - 3 * sizeof(struct sockaddr)); - ret |= get_user(r4.rt_flags, &(ur4->rt_flags)); - ret |= get_user(r4.rt_metric, &(ur4->rt_metric)); - ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu)); - ret |= get_user(r4.rt_window, &(ur4->rt_window)); - ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt)); - ret |= get_user(rtdev, &(ur4->rt_dev)); - if (rtdev) { - ret |= copy_from_user(devname, compat_ptr(rtdev), 15); - r4.rt_dev = (char __user __force *)devname; - devname[15] = 0; - } else - r4.rt_dev = NULL; - - r = (void *) &r4; - } - - if (ret) { - ret = -EFAULT; - goto out; - } - - set_fs(KERNEL_DS); - ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r); - set_fs(old_fs); - -out: - return ret; -} - /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE * for some operations; this forces use of the newer bridge-utils that * use compatible ioctls @@ -3492,9 +3404,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCGIFMAP: case SIOCSIFMAP: return compat_sioc_ifmap(net, cmd, argp); - case SIOCADDRT: - case SIOCDELRT: - return routing_ioctl(net, sock, cmd, argp); case SIOCGSTAMP_OLD: case SIOCGSTAMPNS_OLD: if (!sock->ops->gettstamp) diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh index 26044e397157..b32ba5fec59d 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh @@ -107,7 +107,7 @@ ingress_flow_action_drop_test() RET=0 - devlink_trap_drop_test ingress_flow_action_drop acl_drops $swp2 101 + devlink_trap_drop_test ingress_flow_action_drop $swp2 101 log_test "ingress_flow_action_drop" @@ -132,7 +132,7 @@ egress_flow_action_drop_test() RET=0 - devlink_trap_drop_test egress_flow_action_drop acl_drops $swp2 102 + devlink_trap_drop_test egress_flow_action_drop $swp2 102 log_test "egress_flow_action_drop" diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh index e7aecb065409..a4c2812e9807 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh @@ -96,7 +96,6 @@ source_mac_is_multicast_test() { local trap_name="source_mac_is_multicast" local smac=01:02:03:04:05:06 - local group_name="l2_drops" local mz_pid tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \ @@ -107,7 +106,7 @@ source_mac_is_multicast_test() RET=0 - devlink_trap_drop_test $trap_name $group_name $swp2 101 + devlink_trap_drop_test $trap_name $swp2 101 log_test "Source MAC is multicast" @@ -118,7 +117,6 @@ __vlan_tag_mismatch_test() { local trap_name="vlan_tag_mismatch" local dmac=de:ad:be:ef:13:37 - local group_name="l2_drops" local opt=$1; shift local mz_pid @@ -132,7 +130,7 @@ __vlan_tag_mismatch_test() $MZ $h1 "$opt" -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $swp2 101 + devlink_trap_drop_test $trap_name $swp2 101 # Add PVID and make sure packets are no longer dropped. bridge vlan add vid 1 dev $swp1 pvid untagged master @@ -140,7 +138,7 @@ __vlan_tag_mismatch_test() devlink_trap_stats_idle_test $trap_name check_err $? "Trap stats not idle when packets should not be dropped" - devlink_trap_group_stats_idle_test $group_name + devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name) check_err $? "Trap group stats not idle with when packets should not be dropped" tc_check_packets "dev $swp2 egress" 101 0 @@ -179,7 +177,6 @@ ingress_vlan_filter_test() { local trap_name="ingress_vlan_filter" local dmac=de:ad:be:ef:13:37 - local group_name="l2_drops" local mz_pid local vid=10 @@ -193,7 +190,7 @@ ingress_vlan_filter_test() $MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $swp2 101 + devlink_trap_drop_test $trap_name $swp2 101 # Add the VLAN on the bridge port and make sure packets are no longer # dropped. @@ -202,7 +199,7 @@ ingress_vlan_filter_test() devlink_trap_stats_idle_test $trap_name check_err $? "Trap stats not idle when packets should not be dropped" - devlink_trap_group_stats_idle_test $group_name + devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name) check_err $? "Trap group stats not idle with when packets should not be dropped" tc_check_packets "dev $swp2 egress" 101 0 @@ -222,7 +219,6 @@ __ingress_stp_filter_test() { local trap_name="ingress_spanning_tree_filter" local dmac=de:ad:be:ef:13:37 - local group_name="l2_drops" local state=$1; shift local mz_pid local vid=20 @@ -237,7 +233,7 @@ __ingress_stp_filter_test() $MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $swp2 101 + devlink_trap_drop_test $trap_name $swp2 101 # Change STP state to forwarding and make sure packets are no longer # dropped. @@ -246,7 +242,7 @@ __ingress_stp_filter_test() devlink_trap_stats_idle_test $trap_name check_err $? "Trap stats not idle when packets should not be dropped" - devlink_trap_group_stats_idle_test $group_name + devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name) check_err $? "Trap group stats not idle with when packets should not be dropped" tc_check_packets "dev $swp2 egress" 101 0 @@ -292,7 +288,6 @@ port_list_is_empty_uc_test() { local trap_name="port_list_is_empty" local dmac=de:ad:be:ef:13:37 - local group_name="l2_drops" local mz_pid # Disable unicast flooding on both ports, so that packets cannot egress @@ -308,7 +303,7 @@ port_list_is_empty_uc_test() $MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $swp2 101 + devlink_trap_drop_test $trap_name $swp2 101 # Allow packets to be flooded to one port. ip link set dev $swp2 type bridge_slave flood on @@ -316,7 +311,7 @@ port_list_is_empty_uc_test() devlink_trap_stats_idle_test $trap_name check_err $? "Trap stats not idle when packets should not be dropped" - devlink_trap_group_stats_idle_test $group_name + devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name) check_err $? "Trap group stats not idle with when packets should not be dropped" tc_check_packets "dev $swp2 egress" 101 0 @@ -335,7 +330,6 @@ port_list_is_empty_mc_test() { local trap_name="port_list_is_empty" local dmac=01:00:5e:00:00:01 - local group_name="l2_drops" local dip=239.0.0.1 local mz_pid @@ -354,7 +348,7 @@ port_list_is_empty_mc_test() $MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $swp2 101 + devlink_trap_drop_test $trap_name $swp2 101 # Allow packets to be flooded to one port. ip link set dev $swp2 type bridge_slave mcast_flood on @@ -362,7 +356,7 @@ port_list_is_empty_mc_test() devlink_trap_stats_idle_test $trap_name check_err $? "Trap stats not idle when packets should not be dropped" - devlink_trap_group_stats_idle_test $group_name + devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name) check_err $? "Trap group stats not idle with when packets should not be dropped" tc_check_packets "dev $swp2 egress" 101 0 @@ -387,7 +381,6 @@ port_loopback_filter_uc_test() { local trap_name="port_loopback_filter" local dmac=de:ad:be:ef:13:37 - local group_name="l2_drops" local mz_pid # Make sure packets can only egress the input port. @@ -401,7 +394,7 @@ port_loopback_filter_uc_test() $MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $swp2 101 + devlink_trap_drop_test $trap_name $swp2 101 # Allow packets to be flooded. ip link set dev $swp2 type bridge_slave flood on @@ -409,7 +402,7 @@ port_loopback_filter_uc_test() devlink_trap_stats_idle_test $trap_name check_err $? "Trap stats not idle when packets should not be dropped" - devlink_trap_group_stats_idle_test $group_name + devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name) check_err $? "Trap group stats not idle with when packets should not be dropped" tc_check_packets "dev $swp2 egress" 101 0 diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh index 616f47d86a61..f5abb1ebd392 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh @@ -161,7 +161,6 @@ ping_check() non_ip_test() { local trap_name="non_ip" - local group_name="l3_drops" local mz_pid RET=0 @@ -176,7 +175,7 @@ non_ip_test() 00:00 de:ad:be:ef" & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 101 + devlink_trap_drop_test $trap_name $rp2 101 log_test "Non IP" @@ -190,7 +189,6 @@ __uc_dip_over_mc_dmac_test() local dip=$1; shift local flags=${1:-""}; shift local trap_name="uc_dip_over_mc_dmac" - local group_name="l3_drops" local dmac=01:02:03:04:05:06 local mz_pid @@ -206,7 +204,7 @@ __uc_dip_over_mc_dmac_test() -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 101 + devlink_trap_drop_test $trap_name $rp2 101 log_test "Unicast destination IP over multicast destination MAC: $desc" @@ -227,7 +225,6 @@ __sip_is_loopback_test() local dip=$1; shift local flags=${1:-""}; shift local trap_name="sip_is_loopback_address" - local group_name="l3_drops" local mz_pid RET=0 @@ -242,7 +239,7 @@ __sip_is_loopback_test() -b $rp1mac -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 101 + devlink_trap_drop_test $trap_name $rp2 101 log_test "Source IP is loopback address: $desc" @@ -262,7 +259,6 @@ __dip_is_loopback_test() local dip=$1; shift local flags=${1:-""}; shift local trap_name="dip_is_loopback_address" - local group_name="l3_drops" local mz_pid RET=0 @@ -277,7 +273,7 @@ __dip_is_loopback_test() -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 101 + devlink_trap_drop_test $trap_name $rp2 101 log_test "Destination IP is loopback address: $desc" @@ -298,7 +294,6 @@ __sip_is_mc_test() local dip=$1; shift local flags=${1:-""}; shift local trap_name="sip_is_mc" - local group_name="l3_drops" local mz_pid RET=0 @@ -313,7 +308,7 @@ __sip_is_mc_test() -b $rp1mac -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 101 + devlink_trap_drop_test $trap_name $rp2 101 log_test "Source IP is multicast: $desc" @@ -329,7 +324,6 @@ sip_is_mc_test() ipv4_sip_is_limited_bc_test() { local trap_name="ipv4_sip_is_limited_bc" - local group_name="l3_drops" local sip=255.255.255.255 local mz_pid @@ -345,7 +339,7 @@ ipv4_sip_is_limited_bc_test() -B $h2_ipv4 -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 101 + devlink_trap_drop_test $trap_name $rp2 101 log_test "IPv4 source IP is limited broadcast" @@ -382,7 +376,6 @@ __ipv4_header_corrupted_test() local ihl=$1; shift local checksum=$1; shift local trap_name="ip_header_corrupted" - local group_name="l3_drops" local payload local mz_pid @@ -399,7 +392,7 @@ __ipv4_header_corrupted_test() $MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 101 + devlink_trap_drop_test $trap_name $rp2 101 log_test "IP header corrupted: $desc: IPv4" @@ -429,7 +422,6 @@ __ipv6_header_corrupted_test() local desc=$1; shift local ipver=$1; shift local trap_name="ip_header_corrupted" - local group_name="l3_drops" local payload local mz_pid @@ -446,7 +438,7 @@ __ipv6_header_corrupted_test() $MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 101 + devlink_trap_drop_test $trap_name $rp2 101 log_test "IP header corrupted: $desc: IPv6" @@ -469,7 +461,6 @@ ip_header_corrupted_test() ipv6_mc_dip_reserved_scope_test() { local trap_name="ipv6_mc_dip_reserved_scope" - local group_name="l3_drops" local dip=FF00:: local mz_pid @@ -485,7 +476,7 @@ ipv6_mc_dip_reserved_scope_test() "33:33:00:00:00:00" -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 101 + devlink_trap_drop_test $trap_name $rp2 101 log_test "IPv6 multicast destination IP reserved scope" @@ -495,7 +486,6 @@ ipv6_mc_dip_reserved_scope_test() ipv6_mc_dip_interface_local_scope_test() { local trap_name="ipv6_mc_dip_interface_local_scope" - local group_name="l3_drops" local dip=FF01:: local mz_pid @@ -511,7 +501,7 @@ ipv6_mc_dip_interface_local_scope_test() "33:33:00:00:00:00" -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 101 + devlink_trap_drop_test $trap_name $rp2 101 log_test "IPv6 multicast destination IP interface-local scope" @@ -526,7 +516,6 @@ __blackhole_route_test() local dip=$1; shift local ip_proto=${1:-"icmp"}; shift local trap_name="blackhole_route" - local group_name="l3_drops" local mz_pid RET=0 @@ -542,7 +531,7 @@ __blackhole_route_test() -B $dip -d 1msec -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $rp2 101 + devlink_trap_drop_test $trap_name $rp2 101 log_test "Blackhole route: IPv$flags" devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101 @@ -558,7 +547,6 @@ blackhole_route_test() irif_disabled_test() { local trap_name="irif_disabled" - local group_name="l3_drops" local t0_packets t0_bytes local t1_packets t1_bytes local mz_pid @@ -613,7 +601,6 @@ irif_disabled_test() erif_disabled_test() { local trap_name="erif_disabled" - local group_name="l3_drops" local t0_packets t0_bytes local t1_packets t1_bytes local mz_pid diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh index 2bc6df42d597..1fedfc9da434 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh @@ -169,7 +169,6 @@ trap_action_check() mtu_value_is_too_small_test() { local trap_name="mtu_value_is_too_small" - local group_name="l3_drops" local expected_action="trap" local mz_pid @@ -191,7 +190,7 @@ mtu_value_is_too_small_test() -B 198.51.100.1 -q & mz_pid=$! - devlink_trap_exception_test $trap_name $group_name + devlink_trap_exception_test $trap_name tc_check_packets_hitting "dev $h1 ingress" 101 check_err $? "Packets were not received to h1" @@ -208,7 +207,6 @@ __ttl_value_is_too_small_test() { local ttl_val=$1; shift local trap_name="ttl_value_is_too_small" - local group_name="l3_drops" local expected_action="trap" local mz_pid @@ -227,7 +225,7 @@ __ttl_value_is_too_small_test() -b $rp1mac -B 198.51.100.1 -q & mz_pid=$! - devlink_trap_exception_test $trap_name $group_name + devlink_trap_exception_test $trap_name tc_check_packets_hitting "dev $h1 ingress" 101 check_err $? "Packets were not received to h1" @@ -271,7 +269,6 @@ __mc_reverse_path_forwarding_test() local proto=$1; shift local flags=${1:-""}; shift local trap_name="mc_reverse_path_forwarding" - local group_name="l3_drops" local expected_action="trap" local mz_pid @@ -292,7 +289,7 @@ __mc_reverse_path_forwarding_test() mz_pid=$! - devlink_trap_exception_test $trap_name $group_name + devlink_trap_exception_test $trap_name tc_check_packets "dev $rp2 egress" 101 0 check_err $? "Packets were not dropped" @@ -322,7 +319,6 @@ __reject_route_test() local unreachable=$1; shift local flags=${1:-""}; shift local trap_name="reject_route" - local group_name="l3_drops" local expected_action="trap" local mz_pid @@ -341,7 +337,7 @@ __reject_route_test() -B $dst_ip -q & mz_pid=$! - devlink_trap_exception_test $trap_name $group_name + devlink_trap_exception_test $trap_name tc_check_packets_hitting "dev $h1 ingress" 101 check_err $? "ICMP packet was not received to h1" @@ -370,7 +366,6 @@ __host_miss_test() local desc=$1; shift local dip=$1; shift local trap_name="unresolved_neigh" - local group_name="l3_drops" local expected_action="trap" local mz_pid @@ -405,7 +400,6 @@ __invalid_nexthop_test() local subnet=$1; shift local via_add=$1; shift local trap_name="unresolved_neigh" - local group_name="l3_drops" local expected_action="trap" local mz_pid @@ -494,7 +488,6 @@ vrf_without_routes_destroy() ipv4_lpm_miss_test() { local trap_name="ipv4_lpm_miss" - local group_name="l3_drops" local expected_action="trap" local mz_pid @@ -511,7 +504,7 @@ ipv4_lpm_miss_test() -B 203.0.113.1 -q & mz_pid=$! - devlink_trap_exception_test $trap_name $group_name + devlink_trap_exception_test $trap_name log_test "LPM miss: IPv4" @@ -522,7 +515,6 @@ ipv4_lpm_miss_test() ipv6_lpm_miss_test() { local trap_name="ipv6_lpm_miss" - local group_name="l3_drops" local expected_action="trap" local mz_pid @@ -539,7 +531,7 @@ ipv6_lpm_miss_test() -B 2001:db8::1 -q & mz_pid=$! - devlink_trap_exception_test $trap_name $group_name + devlink_trap_exception_test $trap_name log_test "LPM miss: IPv6" diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh index 039629bb92a3..8817851da7a9 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh @@ -140,7 +140,6 @@ ecn_payload_get() ecn_decap_test() { local trap_name="decap_error" - local group_name="tunnel_drops" local desc=$1; shift local ecn_desc=$1; shift local outer_tos=$1; shift @@ -161,7 +160,7 @@ ecn_decap_test() mz_pid=$! - devlink_trap_exception_test $trap_name $group_name + devlink_trap_exception_test $trap_name tc_check_packets "dev $swp1 egress" 101 0 check_err $? "Packets were not dropped" @@ -200,7 +199,6 @@ ipip_payload_get() no_matching_tunnel_test() { local trap_name="decap_error" - local group_name="tunnel_drops" local desc=$1; shift local sip=$1; shift local mz_pid @@ -218,7 +216,7 @@ no_matching_tunnel_test() -A $sip -B 192.0.2.65 -t ip len=48,proto=47,p=$payload -q & mz_pid=$! - devlink_trap_exception_test $trap_name $group_name + devlink_trap_exception_test $trap_name tc_check_packets "dev $swp1 egress" 101 0 check_err $? "Packets were not dropped" diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh index e11a416323cf..10e0f3dbc930 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh @@ -159,7 +159,6 @@ ecn_payload_get() ecn_decap_test() { local trap_name="decap_error" - local group_name="tunnel_drops" local desc=$1; shift local ecn_desc=$1; shift local outer_tos=$1; shift @@ -177,7 +176,7 @@ ecn_decap_test() -t udp sp=12345,dp=$VXPORT,tos=$outer_tos,p=$payload -q & mz_pid=$! - devlink_trap_exception_test $trap_name $group_name + devlink_trap_exception_test $trap_name tc_check_packets "dev $swp1 egress" 101 0 check_err $? "Packets were not dropped" @@ -228,7 +227,6 @@ short_payload_get() corrupted_packet_test() { local trap_name="decap_error" - local group_name="tunnel_drops" local desc=$1; shift local payload_get=$1; shift local mz_pid @@ -246,7 +244,7 @@ corrupted_packet_test() -B 192.0.2.17 -t udp sp=12345,dp=$VXPORT,p=$payload -q & mz_pid=$! - devlink_trap_exception_test $trap_name $group_name + devlink_trap_exception_test $trap_name tc_check_packets "dev $swp1 egress" 101 0 check_err $? "Packets were not dropped" @@ -297,7 +295,6 @@ mc_smac_payload_get() overlay_smac_is_mc_test() { local trap_name="overlay_smac_is_mc" - local group_name="tunnel_drops" local mz_pid RET=0 @@ -314,7 +311,7 @@ overlay_smac_is_mc_test() -B 192.0.2.17 -t udp sp=12345,dp=$VXPORT,p=$payload -q & mz_pid=$! - devlink_trap_drop_test $trap_name $group_name $swp1 101 + devlink_trap_drop_test $trap_name $swp1 101 log_test "Overlay source MAC is multicast" diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index dd0e5fec6367..51f8e9afe6ae 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -19,8 +19,8 @@ ret=0 ksft_skip=4 # all tests in this script. Can be overridden with -t option -IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime ipv4_compat_mode" -IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime ipv6_compat_mode" +IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime ipv4_compat_mode ipv4_fdb_grp_fcnal" +IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime ipv6_compat_mode ipv6_fdb_grp_fcnal" ALL_TESTS="basic ${IPV4_TESTS} ${IPV6_TESTS}" TESTS="${ALL_TESTS}" @@ -146,6 +146,7 @@ setup() create_ns remote IP="ip -netns me" + BRIDGE="bridge -netns me" set -e $IP li add veth1 type veth peer name veth2 $IP li set veth1 up @@ -280,6 +281,161 @@ stop_ip_monitor() return $rc } +check_nexthop_fdb_support() +{ + $IP nexthop help 2>&1 | grep -q fdb + if [ $? -ne 0 ]; then + echo "SKIP: iproute2 too old, missing fdb nexthop support" + return $ksft_skip + fi +} + +ipv6_fdb_grp_fcnal() +{ + local rc + + echo + echo "IPv6 fdb groups functional" + echo "--------------------------" + + check_nexthop_fdb_support + if [ $? -eq $ksft_skip ]; then + return $ksft_skip + fi + + # create group with multiple nexthops + run_cmd "$IP nexthop add id 61 via 2001:db8:91::2 fdb" + run_cmd "$IP nexthop add id 62 via 2001:db8:91::3 fdb" + run_cmd "$IP nexthop add id 102 group 61/62 fdb" + check_nexthop "id 102" "id 102 group 61/62 fdb" + log_test $? 0 "Fdb Nexthop group with multiple nexthops" + + ## get nexthop group + run_cmd "$IP nexthop get id 102" + check_nexthop "id 102" "id 102 group 61/62 fdb" + log_test $? 0 "Get Fdb nexthop group by id" + + # fdb nexthop group can only contain fdb nexthops + run_cmd "$IP nexthop add id 63 via 2001:db8:91::4" + run_cmd "$IP nexthop add id 64 via 2001:db8:91::5" + run_cmd "$IP nexthop add id 103 group 63/64 fdb" + log_test $? 2 "Fdb Nexthop group with non-fdb nexthops" + + # Non fdb nexthop group can not contain fdb nexthops + run_cmd "$IP nexthop add id 65 via 2001:db8:91::5 fdb" + run_cmd "$IP nexthop add id 66 via 2001:db8:91::6 fdb" + run_cmd "$IP nexthop add id 104 group 65/66" + log_test $? 2 "Non-Fdb Nexthop group with fdb nexthops" + + # fdb nexthop cannot have blackhole + run_cmd "$IP nexthop add id 67 blackhole fdb" + log_test $? 2 "Fdb Nexthop with blackhole" + + # fdb nexthop with oif + run_cmd "$IP nexthop add id 68 via 2001:db8:91::7 dev veth1 fdb" + log_test $? 2 "Fdb Nexthop with oif" + + # fdb nexthop with onlink + run_cmd "$IP nexthop add id 68 via 2001:db8:91::7 onlink fdb" + log_test $? 2 "Fdb Nexthop with onlink" + + # fdb nexthop with encap + run_cmd "$IP nexthop add id 69 encap mpls 101 via 2001:db8:91::8 dev veth1 fdb" + log_test $? 2 "Fdb Nexthop with encap" + + run_cmd "$IP link add name vx10 type vxlan id 1010 local 2001:db8:91::9 remote 2001:db8:91::10 dstport 4789 nolearning noudpcsum tos inherit ttl 100" + run_cmd "$BRIDGE fdb add 02:02:00:00:00:13 dev vx10 nhid 102 self" + log_test $? 0 "Fdb mac add with nexthop group" + + ## fdb nexthops can only reference nexthop groups and not nexthops + run_cmd "$BRIDGE fdb add 02:02:00:00:00:14 dev vx10 nhid 61 self" + log_test $? 255 "Fdb mac add with nexthop" + + run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 66" + log_test $? 2 "Route add with fdb nexthop" + + run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 103" + log_test $? 2 "Route add with fdb nexthop group" + + run_cmd "$IP nexthop del id 102" + log_test $? 0 "Fdb nexthop delete" + + $IP link del dev vx10 +} + +ipv4_fdb_grp_fcnal() +{ + local rc + + echo + echo "IPv4 fdb groups functional" + echo "--------------------------" + + check_nexthop_fdb_support + if [ $? -eq $ksft_skip ]; then + return $ksft_skip + fi + + # create group with multiple nexthops + run_cmd "$IP nexthop add id 12 via 172.16.1.2 fdb" + run_cmd "$IP nexthop add id 13 via 172.16.1.3 fdb" + run_cmd "$IP nexthop add id 102 group 12/13 fdb" + check_nexthop "id 102" "id 102 group 12/13 fdb" + log_test $? 0 "Fdb Nexthop group with multiple nexthops" + + # get nexthop group + run_cmd "$IP nexthop get id 102" + check_nexthop "id 102" "id 102 group 12/13 fdb" + log_test $? 0 "Get Fdb nexthop group by id" + + # fdb nexthop group can only contain fdb nexthops + run_cmd "$IP nexthop add id 14 via 172.16.1.2" + run_cmd "$IP nexthop add id 15 via 172.16.1.3" + run_cmd "$IP nexthop add id 103 group 14/15 fdb" + log_test $? 2 "Fdb Nexthop group with non-fdb nexthops" + + # Non fdb nexthop group can not contain fdb nexthops + run_cmd "$IP nexthop add id 16 via 172.16.1.2 fdb" + run_cmd "$IP nexthop add id 17 via 172.16.1.3 fdb" + run_cmd "$IP nexthop add id 104 group 14/15" + log_test $? 2 "Non-Fdb Nexthop group with fdb nexthops" + + # fdb nexthop cannot have blackhole + run_cmd "$IP nexthop add id 18 blackhole fdb" + log_test $? 2 "Fdb Nexthop with blackhole" + + # fdb nexthop with oif + run_cmd "$IP nexthop add id 16 via 172.16.1.2 dev veth1 fdb" + log_test $? 2 "Fdb Nexthop with oif" + + # fdb nexthop with onlink + run_cmd "$IP nexthop add id 16 via 172.16.1.2 onlink fdb" + log_test $? 2 "Fdb Nexthop with onlink" + + # fdb nexthop with encap + run_cmd "$IP nexthop add id 17 encap mpls 101 via 172.16.1.2 dev veth1 fdb" + log_test $? 2 "Fdb Nexthop with encap" + + run_cmd "$IP link add name vx10 type vxlan id 1010 local 10.0.0.1 remote 10.0.0.2 dstport 4789 nolearning noudpcsum tos inherit ttl 100" + run_cmd "$BRIDGE fdb add 02:02:00:00:00:13 dev vx10 nhid 102 self" + log_test $? 0 "Fdb mac add with nexthop group" + + # fdb nexthops can only reference nexthop groups and not nexthops + run_cmd "$BRIDGE fdb add 02:02:00:00:00:14 dev vx10 nhid 12 self" + log_test $? 255 "Fdb mac add with nexthop" + + run_cmd "$IP ro add 172.16.0.0/22 nhid 15" + log_test $? 2 "Route add with fdb nexthop" + + run_cmd "$IP ro add 172.16.0.0/22 nhid 103" + log_test $? 2 "Route add with fdb nexthop group" + + run_cmd "$IP nexthop del id 102" + log_test $? 0 "Fdb nexthop delete" + + $IP link del dev vx10 +} + ################################################################################ # basic operations (add, delete, replace) on nexthops and nexthop groups # @@ -965,7 +1121,7 @@ ipv6_compat_mode() log_test $? 0 "IPv6 compat mode on - route add notification" # route dump should contain expanded nexthops - check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 122 metric 1024 pref medium nexthop via 2001:db8:91::2 dev veth1 weight 1 nexthop via 2001:db8:91::3 dev veth1 weight 1" + check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 122 metric 1024 nexthop via 2001:db8:91::2 dev veth1 weight 1 nexthop via 2001:db8:91::3 dev veth1 weight 1" log_test $? 0 "IPv6 compat mode on - route dump" # change in nexthop group should generate route notification @@ -992,7 +1148,7 @@ ipv6_compat_mode() log_test $? 0 "IPv6 compat mode off - route add notification" # route dump should not contain expanded nexthops - check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 122 metric 1024 pref medium" + check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 122 metric 1024" log_test $? 0 "IPv6 compat mode off - route dump" # change in nexthop group should not generate route notification diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index 155d48bd4d9e..e27236109235 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -365,7 +365,9 @@ devlink_trap_group_stats_idle_test() devlink_trap_exception_test() { local trap_name=$1; shift - local group_name=$1; shift + local group_name + + group_name=$(devlink_trap_group_get $trap_name) devlink_trap_stats_idle_test $trap_name check_fail $? "Trap stats idle when packets should have been trapped" @@ -377,9 +379,11 @@ devlink_trap_exception_test() devlink_trap_drop_test() { local trap_name=$1; shift - local group_name=$1; shift local dev=$1; shift local handle=$1; shift + local group_name + + group_name=$(devlink_trap_group_get $trap_name) # This is the common part of all the tests. It checks that stats are # initially idle, then non-idle after changing the trap action and @@ -390,7 +394,6 @@ devlink_trap_drop_test() devlink_trap_group_stats_idle_test $group_name check_err $? "Trap group stats not idle with initial drop action" - devlink_trap_action_set $trap_name "trap" devlink_trap_stats_idle_test $trap_name check_fail $? "Trap stats idle after setting action to trap" |