diff options
273 files changed, 11128 insertions, 3251 deletions
diff --git a/Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt b/Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt new file mode 100644 index 000000000000..2aaef567c5be --- /dev/null +++ b/Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt @@ -0,0 +1,23 @@ +* MCR20A IEEE 802.15.4 * + +Required properties: + - compatible: should be "nxp,mcr20a" + - spi-max-frequency: maximal bus speed, should be set to a frequency + lower than 9000000 depends sync or async operation mode + - reg: the chipselect index + - interrupts: the interrupt generated by the device. Non high-level + can occur deadlocks while handling isr. + +Optional properties: + - rst_b-gpio: GPIO spec for the RST_B pin + +Example: + + mcr20a@0 { + compatible = "nxp,mcr20a"; + spi-max-frequency = <9000000>; + reg = <0>; + interrupts = <17 2>; + interrupt-parent = <&gpio>; + rst_b-gpio = <&gpio 27 1> + }; diff --git a/Documentation/devicetree/bindings/net/sff,sfp.txt b/Documentation/devicetree/bindings/net/sff,sfp.txt index f1c441bedf68..929591d52ed6 100644 --- a/Documentation/devicetree/bindings/net/sff,sfp.txt +++ b/Documentation/devicetree/bindings/net/sff,sfp.txt @@ -33,6 +33,10 @@ Optional Properties: Select (AKA RS1) output gpio signal (SFP+ only), low: low Tx rate, high: high Tx rate. Must not be present for SFF modules +- maximum-power-milliwatt : Maximum module power consumption + Specifies the maximum power consumption allowable by a module in the + slot, in milli-Watts. Presently, modules can be up to 1W, 1.5W or 2W. + Example #1: Direct serdes to SFP connection sfp_eth3: sfp-eth3 { @@ -40,6 +44,7 @@ sfp_eth3: sfp-eth3 { i2c-bus = <&sfp_1g_i2c>; los-gpios = <&cpm_gpio2 22 GPIO_ACTIVE_HIGH>; mod-def0-gpios = <&cpm_gpio2 21 GPIO_ACTIVE_LOW>; + maximum-power-milliwatt = <1000>; pinctrl-names = "default"; pinctrl-0 = <&cpm_sfp_1g_pins &cps_sfp_1g_pins>; tx-disable-gpios = <&cps_gpio1 24 GPIO_ACTIVE_HIGH>; diff --git a/MAINTAINERS b/MAINTAINERS index 93a12af4f180..e0b39004edc0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8592,6 +8592,15 @@ S: Maintained F: Documentation/ABI/testing/sysfs-bus-iio-potentiometer-mcp4531 F: drivers/iio/potentiometer/mcp4531.c +MCR20A IEEE-802.15.4 RADIO DRIVER +M: Xue Liu <liuxuenetmail@gmail.com> +L: linux-wpan@vger.kernel.org +W: https://github.com/xueliu/mcr20a-linux +S: Maintained +F: drivers/net/ieee802154/mcr20a.c +F: drivers/net/ieee802154/mcr20a.h +F: Documentation/devicetree/bindings/net/ieee802154/mcr20a.txt + MEASUREMENT COMPUTING CIO-DAC IIO DRIVER M: William Breathitt Gray <vilhelm.gray@gmail.com> L: linux-iio@vger.kernel.org diff --git a/arch/arm/boot/dts/armada-370-rd.dts b/arch/arm/boot/dts/armada-370-rd.dts index 8b2fa9a49967..c28afb242393 100644 --- a/arch/arm/boot/dts/armada-370-rd.dts +++ b/arch/arm/boot/dts/armada-370-rd.dts @@ -56,6 +56,7 @@ /dts-v1/; #include <dt-bindings/input/input.h> +#include <dt-bindings/interrupt-controller/irq.h> #include <dt-bindings/gpio/gpio.h> #include "armada-370.dtsi" @@ -243,6 +244,8 @@ #address-cells = <1>; #size-cells = <0>; reg = <0x10>; + interrupt-controller; + #interrupt-cells = <2>; ports { #address-cells = <1>; @@ -278,6 +281,35 @@ }; }; }; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + switchphy0: switchphy@0 { + reg = <0>; + interrupt-parent = <&switch>; + interrupts = <0 IRQ_TYPE_LEVEL_HIGH>; + }; + + switchphy1: switchphy@1 { + reg = <1>; + interrupt-parent = <&switch>; + interrupts = <1 IRQ_TYPE_LEVEL_HIGH>; + }; + + switchphy2: switchphy@2 { + reg = <2>; + interrupt-parent = <&switch>; + interrupts = <2 IRQ_TYPE_LEVEL_HIGH>; + }; + + switchphy3: switchphy@3 { + reg = <3>; + interrupt-parent = <&switch>; + interrupts = <3 IRQ_TYPE_LEVEL_HIGH>; + }; + }; }; }; diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index d3d435248a24..c73eb8209555 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -1088,6 +1088,10 @@ int __init mac_platform_init(void) macintosh_config->expansion_type == MAC_EXP_PDS_COMM) platform_device_register_simple("macsonic", -1, NULL, 0); + if (macintosh_config->expansion_type == MAC_EXP_PDS || + macintosh_config->expansion_type == MAC_EXP_PDS_COMM) + platform_device_register_simple("mac89x0", -1, NULL, 0); + if (macintosh_config->ether_type == MAC_ETHER_MACE) platform_device_register_simple("macmace", -1, NULL, 0); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 45e4eb5bcbb2..cbf94d44f3d5 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -61,7 +61,12 @@ static bool is_imm8(int value) static bool is_simm32(s64 value) { - return value == (s64) (s32) value; + return value == (s64)(s32)value; +} + +static bool is_uimm32(u64 value) +{ + return value == (u64)(u32)value; } /* mov dst, src */ @@ -212,7 +217,7 @@ struct jit_context { /* emit x64 prologue code for BPF program and check it's size. * bpf_tail_call helper will skip it while jumping into another program */ -static void emit_prologue(u8 **pprog, u32 stack_depth) +static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) { u8 *prog = *pprog; int cnt = 0; @@ -247,18 +252,21 @@ static void emit_prologue(u8 **pprog, u32 stack_depth) /* mov qword ptr [rbp+24],r15 */ EMIT4(0x4C, 0x89, 0x7D, 24); - /* Clear the tail call counter (tail_call_cnt): for eBPF tail calls - * we need to reset the counter to 0. It's done in two instructions, - * resetting rax register to 0 (xor on eax gets 0 extended), and - * moving it to the counter location. - */ + if (!ebpf_from_cbpf) { + /* Clear the tail call counter (tail_call_cnt): for eBPF tail + * calls we need to reset the counter to 0. It's done in two + * instructions, resetting rax register to 0, and moving it + * to the counter location. + */ + + /* xor eax, eax */ + EMIT2(0x31, 0xc0); + /* mov qword ptr [rbp+32], rax */ + EMIT4(0x48, 0x89, 0x45, 32); - /* xor eax, eax */ - EMIT2(0x31, 0xc0); - /* mov qword ptr [rbp+32], rax */ - EMIT4(0x48, 0x89, 0x45, 32); + BUILD_BUG_ON(cnt != PROLOGUE_SIZE); + } - BUILD_BUG_ON(cnt != PROLOGUE_SIZE); *pprog = prog; } @@ -356,6 +364,86 @@ static void emit_load_skb_data_hlen(u8 **pprog) *pprog = prog; } +static void emit_mov_imm32(u8 **pprog, bool sign_propagate, + u32 dst_reg, const u32 imm32) +{ + u8 *prog = *pprog; + u8 b1, b2, b3; + int cnt = 0; + + /* optimization: if imm32 is positive, use 'mov %eax, imm32' + * (which zero-extends imm32) to save 2 bytes. + */ + if (sign_propagate && (s32)imm32 < 0) { + /* 'mov %rax, imm32' sign extends imm32 */ + b1 = add_1mod(0x48, dst_reg); + b2 = 0xC7; + b3 = 0xC0; + EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32); + goto done; + } + + /* optimization: if imm32 is zero, use 'xor %eax, %eax' + * to save 3 bytes. + */ + if (imm32 == 0) { + if (is_ereg(dst_reg)) + EMIT1(add_2mod(0x40, dst_reg, dst_reg)); + b2 = 0x31; /* xor */ + b3 = 0xC0; + EMIT2(b2, add_2reg(b3, dst_reg, dst_reg)); + goto done; + } + + /* mov %eax, imm32 */ + if (is_ereg(dst_reg)) + EMIT1(add_1mod(0x40, dst_reg)); + EMIT1_off32(add_1reg(0xB8, dst_reg), imm32); +done: + *pprog = prog; +} + +static void emit_mov_imm64(u8 **pprog, u32 dst_reg, + const u32 imm32_hi, const u32 imm32_lo) +{ + u8 *prog = *pprog; + int cnt = 0; + + if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) { + /* For emitting plain u32, where sign bit must not be + * propagated LLVM tends to load imm64 over mov32 + * directly, so save couple of bytes by just doing + * 'mov %eax, imm32' instead. + */ + emit_mov_imm32(&prog, false, dst_reg, imm32_lo); + } else { + /* movabsq %rax, imm64 */ + EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg)); + EMIT(imm32_lo, 4); + EMIT(imm32_hi, 4); + } + + *pprog = prog; +} + +static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg) +{ + u8 *prog = *pprog; + int cnt = 0; + + if (is64) { + /* mov dst, src */ + EMIT_mov(dst_reg, src_reg); + } else { + /* mov32 dst, src */ + if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT1(add_2mod(0x40, dst_reg, src_reg)); + EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg)); + } + + *pprog = prog; +} + static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, int oldproglen, struct jit_context *ctx) { @@ -369,7 +457,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, int proglen = 0; u8 *prog = temp; - emit_prologue(&prog, bpf_prog->aux->stack_depth); + emit_prologue(&prog, bpf_prog->aux->stack_depth, + bpf_prog_was_classic(bpf_prog)); if (seen_ld_abs) emit_load_skb_data_hlen(&prog); @@ -378,7 +467,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, const s32 imm32 = insn->imm; u32 dst_reg = insn->dst_reg; u32 src_reg = insn->src_reg; - u8 b1 = 0, b2 = 0, b3 = 0; + u8 b2 = 0, b3 = 0; s64 jmp_offset; u8 jmp_cond; bool reload_skb_data; @@ -414,16 +503,11 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg)); break; - /* mov dst, src */ case BPF_ALU64 | BPF_MOV | BPF_X: - EMIT_mov(dst_reg, src_reg); - break; - - /* mov32 dst, src */ case BPF_ALU | BPF_MOV | BPF_X: - if (is_ereg(dst_reg) || is_ereg(src_reg)) - EMIT1(add_2mod(0x40, dst_reg, src_reg)); - EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg)); + emit_mov_reg(&prog, + BPF_CLASS(insn->code) == BPF_ALU64, + dst_reg, src_reg); break; /* neg dst */ @@ -486,58 +570,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, break; case BPF_ALU64 | BPF_MOV | BPF_K: - /* optimization: if imm32 is positive, - * use 'mov eax, imm32' (which zero-extends imm32) - * to save 2 bytes - */ - if (imm32 < 0) { - /* 'mov rax, imm32' sign extends imm32 */ - b1 = add_1mod(0x48, dst_reg); - b2 = 0xC7; - b3 = 0xC0; - EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32); - break; - } - case BPF_ALU | BPF_MOV | BPF_K: - /* optimization: if imm32 is zero, use 'xor <dst>,<dst>' - * to save 3 bytes. - */ - if (imm32 == 0) { - if (is_ereg(dst_reg)) - EMIT1(add_2mod(0x40, dst_reg, dst_reg)); - b2 = 0x31; /* xor */ - b3 = 0xC0; - EMIT2(b2, add_2reg(b3, dst_reg, dst_reg)); - break; - } - - /* mov %eax, imm32 */ - if (is_ereg(dst_reg)) - EMIT1(add_1mod(0x40, dst_reg)); - EMIT1_off32(add_1reg(0xB8, dst_reg), imm32); + emit_mov_imm32(&prog, BPF_CLASS(insn->code) == BPF_ALU64, + dst_reg, imm32); break; case BPF_LD | BPF_IMM | BPF_DW: - /* optimization: if imm64 is zero, use 'xor <dst>,<dst>' - * to save 7 bytes. - */ - if (insn[0].imm == 0 && insn[1].imm == 0) { - b1 = add_2mod(0x48, dst_reg, dst_reg); - b2 = 0x31; /* xor */ - b3 = 0xC0; - EMIT3(b1, b2, add_2reg(b3, dst_reg, dst_reg)); - - insn++; - i++; - break; - } - - /* movabsq %rax, imm64 */ - EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg)); - EMIT(insn[0].imm, 4); - EMIT(insn[1].imm, 4); - + emit_mov_imm64(&prog, dst_reg, insn[1].imm, insn[0].imm); insn++; i++; break; @@ -594,36 +633,38 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_ALU | BPF_MUL | BPF_X: case BPF_ALU64 | BPF_MUL | BPF_K: case BPF_ALU64 | BPF_MUL | BPF_X: - EMIT1(0x50); /* push rax */ - EMIT1(0x52); /* push rdx */ + { + bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; + + if (dst_reg != BPF_REG_0) + EMIT1(0x50); /* push rax */ + if (dst_reg != BPF_REG_3) + EMIT1(0x52); /* push rdx */ /* mov r11, dst_reg */ EMIT_mov(AUX_REG, dst_reg); if (BPF_SRC(insn->code) == BPF_X) - /* mov rax, src_reg */ - EMIT_mov(BPF_REG_0, src_reg); + emit_mov_reg(&prog, is64, BPF_REG_0, src_reg); else - /* mov rax, imm32 */ - EMIT3_off32(0x48, 0xC7, 0xC0, imm32); + emit_mov_imm32(&prog, is64, BPF_REG_0, imm32); - if (BPF_CLASS(insn->code) == BPF_ALU64) + if (is64) EMIT1(add_1mod(0x48, AUX_REG)); else if (is_ereg(AUX_REG)) EMIT1(add_1mod(0x40, AUX_REG)); /* mul(q) r11 */ EMIT2(0xF7, add_1reg(0xE0, AUX_REG)); - /* mov r11, rax */ - EMIT_mov(AUX_REG, BPF_REG_0); - - EMIT1(0x5A); /* pop rdx */ - EMIT1(0x58); /* pop rax */ - - /* mov dst_reg, r11 */ - EMIT_mov(dst_reg, AUX_REG); + if (dst_reg != BPF_REG_3) + EMIT1(0x5A); /* pop rdx */ + if (dst_reg != BPF_REG_0) { + /* mov dst_reg, rax */ + EMIT_mov(dst_reg, BPF_REG_0); + EMIT1(0x58); /* pop rax */ + } break; - + } /* shifts */ case BPF_ALU | BPF_LSH | BPF_K: case BPF_ALU | BPF_RSH | BPF_K: @@ -641,7 +682,11 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_RSH: b3 = 0xE8; break; case BPF_ARSH: b3 = 0xF8; break; } - EMIT3(0xC1, add_1reg(b3, dst_reg), imm32); + + if (imm32 == 1) + EMIT2(0xD1, add_1reg(b3, dst_reg)); + else + EMIT3(0xC1, add_1reg(b3, dst_reg), imm32); break; case BPF_ALU | BPF_LSH | BPF_X: diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index e66963ca58bd..3ae32d1ddd27 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4549,6 +4549,7 @@ static struct pernet_operations cma_pernet_operations = { .exit = cma_exit_net, .id = &cma_pernet_id, .size = sizeof(struct cma_pernet), + .async = true, }; static int __init cma_init(void) diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index bc6299697dda..d42b922bede8 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o +mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c new file mode 100644 index 000000000000..61cc3d7db257 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -0,0 +1,189 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + */ + +#include "ib_rep.h" + +static const struct mlx5_ib_profile rep_profile = { + STAGE_CREATE(MLX5_IB_STAGE_INIT, + mlx5_ib_stage_init_init, + mlx5_ib_stage_init_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB, + mlx5_ib_stage_rep_flow_db_init, + NULL), + STAGE_CREATE(MLX5_IB_STAGE_CAPS, + mlx5_ib_stage_caps_init, + NULL), + STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB, + mlx5_ib_stage_rep_non_default_cb, + NULL), + STAGE_CREATE(MLX5_IB_STAGE_ROCE, + mlx5_ib_stage_rep_roce_init, + mlx5_ib_stage_rep_roce_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, + mlx5_ib_stage_dev_res_init, + mlx5_ib_stage_dev_res_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, + mlx5_ib_stage_counters_init, + mlx5_ib_stage_counters_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_BFREG, + mlx5_ib_stage_bfrag_init, + mlx5_ib_stage_bfrag_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_IB_REG, + mlx5_ib_stage_ib_reg_init, + mlx5_ib_stage_ib_reg_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_UMR_RESOURCES, + mlx5_ib_stage_umr_res_init, + mlx5_ib_stage_umr_res_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR, + mlx5_ib_stage_class_attr_init, + NULL), +}; + +static int +mlx5_ib_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) +{ + return 0; +} + +static void +mlx5_ib_nic_rep_unload(struct mlx5_eswitch_rep *rep) +{ + rep->rep_if[REP_IB].priv = NULL; +} + +static int +mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) +{ + struct mlx5_ib_dev *ibdev; + + ibdev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*ibdev)); + if (!ibdev) + return -ENOMEM; + + ibdev->rep = rep; + ibdev->mdev = dev; + ibdev->num_ports = max(MLX5_CAP_GEN(dev, num_ports), + MLX5_CAP_GEN(dev, num_vhca_ports)); + if (!__mlx5_ib_add(ibdev, &rep_profile)) + return -EINVAL; + + rep->rep_if[REP_IB].priv = ibdev; + + return 0; +} + +static void +mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep) +{ + struct mlx5_ib_dev *dev; + + if (!rep->rep_if[REP_IB].priv) + return; + + dev = mlx5_ib_rep_to_dev(rep); + __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); + rep->rep_if[REP_IB].priv = NULL; +} + +static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep) +{ + return mlx5_ib_rep_to_dev(rep); +} + +static void mlx5_ib_rep_register_vf_vports(struct mlx5_ib_dev *dev) +{ + struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; + int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev); + int vport; + + for (vport = 1; vport < total_vfs; vport++) { + struct mlx5_eswitch_rep_if rep_if = {}; + + rep_if.load = mlx5_ib_vport_rep_load; + rep_if.unload = mlx5_ib_vport_rep_unload; + rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev; + mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_IB); + } +} + +static void mlx5_ib_rep_unregister_vf_vports(struct mlx5_ib_dev *dev) +{ + struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; + int total_vfs = MLX5_TOTAL_VPORTS(dev->mdev); + int vport; + + for (vport = 1; vport < total_vfs; vport++) + mlx5_eswitch_unregister_vport_rep(esw, vport, REP_IB); +} + +void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev) +{ + struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; + struct mlx5_eswitch_rep_if rep_if = {}; + + rep_if.load = mlx5_ib_nic_rep_load; + rep_if.unload = mlx5_ib_nic_rep_unload; + rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev; + rep_if.priv = dev; + + mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_IB); + + mlx5_ib_rep_register_vf_vports(dev); +} + +void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev) +{ + struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; + + mlx5_ib_rep_unregister_vf_vports(dev); /* VFs vports */ + mlx5_eswitch_unregister_vport_rep(esw, 0, REP_IB); /* UPLINK PF*/ +} + +u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw) +{ + return mlx5_eswitch_mode(esw); +} + +struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw, + int vport_index) +{ + return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_IB); +} + +struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, + int vport_index) +{ + return mlx5_eswitch_get_proto_dev(esw, vport_index, REP_ETH); +} + +struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw) +{ + return mlx5_eswitch_uplink_get_proto_dev(esw, REP_IB); +} + +struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, int vport) +{ + return mlx5_eswitch_vport_rep(esw, vport); +} + +int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, + struct mlx5_ib_sq *sq) +{ + struct mlx5_flow_handle *flow_rule; + struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; + + if (!dev->rep) + return 0; + + flow_rule = + mlx5_eswitch_add_send_to_vport_rule(esw, + dev->rep->vport, + sq->base.mqp.qpn); + if (IS_ERR(flow_rule)) + return PTR_ERR(flow_rule); + sq->flow_rule = flow_rule; + + return 0; +} diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h new file mode 100644 index 000000000000..046fd942fd46 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/ib_rep.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + */ + +#ifndef __MLX5_IB_REP_H__ +#define __MLX5_IB_REP_H__ + +#include <linux/mlx5/eswitch.h> +#include "mlx5_ib.h" + +#ifdef CONFIG_MLX5_ESWITCH +u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw); +struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw, + int vport_index); +struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw); +struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, + int vport_index); +void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev); +void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev); +int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, + struct mlx5_ib_sq *sq); +struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, + int vport_index); +#else /* CONFIG_MLX5_ESWITCH */ +static inline u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw) +{ + return SRIOV_NONE; +} + +static inline +struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw, + int vport_index) +{ + return NULL; +} + +static inline +struct mlx5_ib_dev *mlx5_ib_get_uplink_ibdev(struct mlx5_eswitch *esw) +{ + return NULL; +} + +static inline +struct mlx5_eswitch_rep *mlx5_ib_vport_rep(struct mlx5_eswitch *esw, + int vport_index) +{ + return NULL; +} + +static inline void mlx5_ib_register_vport_reps(struct mlx5_ib_dev *dev) {} +static inline void mlx5_ib_unregister_vport_reps(struct mlx5_ib_dev *dev) {} +static inline int create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, + struct mlx5_ib_sq *sq) +{ + return 0; +} + +static inline +struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, + int vport_index) +{ + return NULL; +} +#endif + +static inline +struct mlx5_ib_dev *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep) +{ + return (struct mlx5_ib_dev *)rep->rep_if[REP_IB].priv; +} +#endif /* __MLX5_IB_REP_H__ */ diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 4236c8086820..ee55d7d64554 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -57,6 +57,7 @@ #include <linux/in.h> #include <linux/etherdevice.h> #include "mlx5_ib.h" +#include "ib_rep.h" #include "cmd.h" #define DRIVER_NAME "mlx5_ib" @@ -130,7 +131,7 @@ static int get_port_state(struct ib_device *ibdev, int ret; memset(&attr, 0, sizeof(attr)); - ret = mlx5_ib_query_port(ibdev, port_num, &attr); + ret = ibdev->query_port(ibdev, port_num, &attr); if (!ret) *state = attr.state; return ret; @@ -154,10 +155,19 @@ static int mlx5_netdev_event(struct notifier_block *this, case NETDEV_REGISTER: case NETDEV_UNREGISTER: write_lock(&roce->netdev_lock); - - if (ndev->dev.parent == &mdev->pdev->dev) - roce->netdev = (event == NETDEV_UNREGISTER) ? + if (ibdev->rep) { + struct mlx5_eswitch *esw = ibdev->mdev->priv.eswitch; + struct net_device *rep_ndev; + + rep_ndev = mlx5_ib_get_rep_netdev(esw, + ibdev->rep->vport); + if (rep_ndev == ndev) + roce->netdev = (event == NETDEV_UNREGISTER) ? NULL : ndev; + } else if (ndev->dev.parent == &ibdev->mdev->pdev->dev) { + roce->netdev = (event == NETDEV_UNREGISTER) ? + NULL : ndev; + } write_unlock(&roce->netdev_lock); break; @@ -1268,6 +1278,22 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, return ret; } +static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u8 port, + struct ib_port_attr *props) +{ + int ret; + + /* Only link layer == ethernet is valid for representors */ + ret = mlx5_query_port_roce(ibdev, port, props); + if (ret || !props) + return ret; + + /* We don't support GIDS */ + props->gid_tbl_len = 0; + + return ret; +} + static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid) { @@ -2631,7 +2657,7 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) ibflow); struct mlx5_ib_flow_handler *iter, *tmp; - mutex_lock(&dev->flow_db.lock); + mutex_lock(&dev->flow_db->lock); list_for_each_entry_safe(iter, tmp, &handler->list, list) { mlx5_del_flow_rules(iter->rule); @@ -2642,7 +2668,7 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) mlx5_del_flow_rules(handler->rule); put_flow_table(dev, handler->prio, true); - mutex_unlock(&dev->flow_db.lock); + mutex_unlock(&dev->flow_db->lock); kfree(handler); @@ -2691,7 +2717,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, MLX5_FLOW_NAMESPACE_BYPASS); num_entries = MLX5_FS_MAX_ENTRIES; num_groups = MLX5_FS_MAX_TYPES; - prio = &dev->flow_db.prios[priority]; + prio = &dev->flow_db->prios[priority]; } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { ns = mlx5_get_flow_namespace(dev->mdev, @@ -2699,7 +2725,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, build_leftovers_ft_param(&priority, &num_entries, &num_groups); - prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO]; + prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO]; } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { if (!MLX5_CAP_FLOWTABLE(dev->mdev, allow_sniffer_and_nic_rx_shared_tir)) @@ -2709,7 +2735,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, MLX5_FLOW_NAMESPACE_SNIFFER_RX : MLX5_FLOW_NAMESPACE_SNIFFER_TX); - prio = &dev->flow_db.sniffer[ft_type]; + prio = &dev->flow_db->sniffer[ft_type]; priority = 0; num_entries = 1; num_groups = 1; @@ -2802,6 +2828,18 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, if (!flow_is_multicast_only(flow_attr)) set_underlay_qp(dev, spec, underlay_qpn); + if (dev->rep) { + void *misc; + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, + dev->rep->vport); + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + } + spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); if (is_drop) { flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; @@ -2999,7 +3037,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, if (!dst) return ERR_PTR(-ENOMEM); - mutex_lock(&dev->flow_db.lock); + mutex_lock(&dev->flow_db->lock); ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX); if (IS_ERR(ft_prio)) { @@ -3048,7 +3086,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, goto destroy_ft; } - mutex_unlock(&dev->flow_db.lock); + mutex_unlock(&dev->flow_db->lock); kfree(dst); return &handler->ibflow; @@ -3058,7 +3096,7 @@ destroy_ft: if (ft_prio_tx) put_flow_table(dev, ft_prio_tx, false); unlock: - mutex_unlock(&dev->flow_db.lock); + mutex_unlock(&dev->flow_db->lock); kfree(dst); kfree(handler); return ERR_PTR(err); @@ -3772,6 +3810,25 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, return 0; } +static int mlx5_port_rep_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET; + + err = ib_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET; + + return 0; +} + static void get_dev_fw_str(struct ib_device *ibdev, char *str) { struct mlx5_ib_dev *dev = @@ -3802,7 +3859,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) goto err_destroy_vport_lag; } - dev->flow_db.lag_demux_ft = ft; + dev->flow_db->lag_demux_ft = ft; return 0; err_destroy_vport_lag: @@ -3814,9 +3871,9 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; - if (dev->flow_db.lag_demux_ft) { - mlx5_destroy_flow_table(dev->flow_db.lag_demux_ft); - dev->flow_db.lag_demux_ft = NULL; + if (dev->flow_db->lag_demux_ft) { + mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft); + dev->flow_db->lag_demux_ft = NULL; mlx5_cmd_destroy_vport_lag(mdev); } @@ -3848,14 +3905,10 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev, u8 port_num) { int err; - err = mlx5_add_netdev_notifier(dev, port_num); - if (err) - return err; - if (MLX5_CAP_GEN(dev->mdev, roce)) { err = mlx5_nic_vport_enable_roce(dev->mdev); if (err) - goto err_unregister_netdevice_notifier; + return err; } err = mlx5_eth_lag_init(dev); @@ -3868,8 +3921,6 @@ err_disable_roce: if (MLX5_CAP_GEN(dev->mdev, roce)) mlx5_nic_vport_disable_roce(dev->mdev); -err_unregister_netdevice_notifier: - mlx5_remove_netdev_notifier(dev, port_num); return err; } @@ -4503,7 +4554,7 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev) mlx5_nic_vport_disable_roce(dev->mdev); } -static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) +void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING @@ -4512,7 +4563,7 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) kfree(dev->port); } -static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) +int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; const char *name; @@ -4564,7 +4615,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) dev->mdev->priv.eq_table.num_comp_vectors; dev->ib_dev.dev.parent = &mdev->pdev->dev; - mutex_init(&dev->flow_db.lock); mutex_init(&dev->cap_mask_mutex); INIT_LIST_HEAD(&dev->qp_list); spin_lock_init(&dev->reset_flow_resource_lock); @@ -4585,7 +4635,38 @@ err_free_port: return -ENOMEM; } -static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev) +{ + dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL); + + if (!dev->flow_db) + return -ENOMEM; + + mutex_init(&dev->flow_db->lock); + + return 0; +} + +int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev) +{ + struct mlx5_ib_dev *nic_dev; + + nic_dev = mlx5_ib_get_uplink_ibdev(dev->mdev->priv.eswitch); + + if (!nic_dev) + return -EINVAL; + + dev->flow_db = nic_dev->flow_db; + + return 0; +} + +static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev) +{ + kfree(dev->flow_db); +} + +int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; int err; @@ -4626,7 +4707,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ); dev->ib_dev.query_device = mlx5_ib_query_device; - dev->ib_dev.query_port = mlx5_ib_query_port; dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer; dev->ib_dev.query_gid = mlx5_ib_query_gid; dev->ib_dev.add_gid = mlx5_ib_add_gid; @@ -4669,7 +4749,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr; dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg; dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; - dev->ib_dev.get_port_immutable = mlx5_port_immutable; dev->ib_dev.get_dev_fw_str = get_dev_fw_str; dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity; if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) @@ -4720,6 +4799,80 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) return 0; } +static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev) +{ + dev->ib_dev.get_port_immutable = mlx5_port_immutable; + dev->ib_dev.query_port = mlx5_ib_query_port; + + return 0; +} + +int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev) +{ + dev->ib_dev.get_port_immutable = mlx5_port_rep_immutable; + dev->ib_dev.query_port = mlx5_ib_rep_query_port; + + return 0; +} + +static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev, + u8 port_num) +{ + int i; + + for (i = 0; i < dev->num_ports; i++) { + dev->roce[i].dev = dev; + dev->roce[i].native_port_num = i + 1; + dev->roce[i].last_port_state = IB_PORT_DOWN; + } + + dev->ib_dev.get_netdev = mlx5_ib_get_netdev; + dev->ib_dev.create_wq = mlx5_ib_create_wq; + dev->ib_dev.modify_wq = mlx5_ib_modify_wq; + dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq; + dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table; + dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table; + + dev->ib_dev.uverbs_ex_cmd_mask |= + (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); + + return mlx5_add_netdev_notifier(dev, port_num); +} + +static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev) +{ + u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1; + + mlx5_remove_netdev_notifier(dev, port_num); +} + +int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev) +{ + struct mlx5_core_dev *mdev = dev->mdev; + enum rdma_link_layer ll; + int port_type_cap; + int err = 0; + u8 port_num; + + port_num = mlx5_core_native_port_num(dev->mdev) - 1; + port_type_cap = MLX5_CAP_GEN(mdev, port_type); + ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); + + if (ll == IB_LINK_LAYER_ETHERNET) + err = mlx5_ib_stage_common_roce_init(dev, port_num); + + return err; +} + +void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev) +{ + mlx5_ib_stage_common_roce_cleanup(dev); +} + static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; @@ -4727,37 +4880,26 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev) int port_type_cap; u8 port_num; int err; - int i; port_num = mlx5_core_native_port_num(dev->mdev) - 1; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { - for (i = 0; i < dev->num_ports; i++) { - dev->roce[i].dev = dev; - dev->roce[i].native_port_num = i + 1; - dev->roce[i].last_port_state = IB_PORT_DOWN; - } + err = mlx5_ib_stage_common_roce_init(dev, port_num); + if (err) + return err; - dev->ib_dev.get_netdev = mlx5_ib_get_netdev; - dev->ib_dev.create_wq = mlx5_ib_create_wq; - dev->ib_dev.modify_wq = mlx5_ib_modify_wq; - dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq; - dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table; - dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table; - dev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); err = mlx5_enable_eth(dev, port_num); if (err) - return err; + goto cleanup; } return 0; +cleanup: + mlx5_ib_stage_common_roce_cleanup(dev); + + return err; } static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev) @@ -4773,16 +4915,16 @@ static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev) if (ll == IB_LINK_LAYER_ETHERNET) { mlx5_disable_eth(dev); - mlx5_remove_netdev_notifier(dev, port_num); + mlx5_ib_stage_common_roce_cleanup(dev); } } -static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev) +int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev) { return create_dev_resources(&dev->devr); } -static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev) +void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev) { destroy_dev_resources(&dev->devr); } @@ -4794,7 +4936,7 @@ static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev) return mlx5_ib_odp_init_one(dev); } -static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) +int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) { dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats; @@ -4806,7 +4948,7 @@ static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) return 0; } -static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev) +void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) mlx5_ib_dealloc_counters(dev); @@ -4837,7 +4979,7 @@ static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev) mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar); } -static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev) +int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev) { int err; @@ -4852,28 +4994,28 @@ static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev) return err; } -static void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev) +void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev) { mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); mlx5_free_bfreg(dev->mdev, &dev->bfreg); } -static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) +int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) { return ib_register_device(&dev->ib_dev, NULL); } -static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) +void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) { ib_unregister_device(&dev->ib_dev); } -static int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev) +int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev) { return create_umr_res(dev); } -static void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev) +void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev) { destroy_umrc_res(dev); } @@ -4890,7 +5032,7 @@ static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev) cancel_delay_drop(dev); } -static int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev) +int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev) { int err; int i; @@ -4905,9 +5047,21 @@ static int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev) return 0; } -static void __mlx5_ib_remove(struct mlx5_ib_dev *dev, - const struct mlx5_ib_profile *profile, - int stage) +static int mlx5_ib_stage_rep_reg_init(struct mlx5_ib_dev *dev) +{ + mlx5_ib_register_vport_reps(dev); + + return 0; +} + +static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev) +{ + mlx5_ib_unregister_vport_reps(dev); +} + +void __mlx5_ib_remove(struct mlx5_ib_dev *dev, + const struct mlx5_ib_profile *profile, + int stage) { /* Number of stages to cleanup */ while (stage) { @@ -4921,23 +5075,14 @@ static void __mlx5_ib_remove(struct mlx5_ib_dev *dev, static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num); -static void *__mlx5_ib_add(struct mlx5_core_dev *mdev, - const struct mlx5_ib_profile *profile) +void *__mlx5_ib_add(struct mlx5_ib_dev *dev, + const struct mlx5_ib_profile *profile) { - struct mlx5_ib_dev *dev; int err; int i; printk_once(KERN_INFO "%s", mlx5_version); - dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); - if (!dev) - return NULL; - - dev->mdev = mdev; - dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports), - MLX5_CAP_GEN(mdev, num_vhca_ports)); - for (i = 0; i < MLX5_IB_STAGE_MAX; i++) { if (profile->stage[i].init) { err = profile->stage[i].init(dev); @@ -4961,9 +5106,15 @@ static const struct mlx5_ib_profile pf_profile = { STAGE_CREATE(MLX5_IB_STAGE_INIT, mlx5_ib_stage_init_init, mlx5_ib_stage_init_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB, + mlx5_ib_stage_flow_db_init, + mlx5_ib_stage_flow_db_cleanup), STAGE_CREATE(MLX5_IB_STAGE_CAPS, mlx5_ib_stage_caps_init, NULL), + STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB, + mlx5_ib_stage_non_default_cb, + NULL), STAGE_CREATE(MLX5_IB_STAGE_ROCE, mlx5_ib_stage_roce_init, mlx5_ib_stage_roce_cleanup), @@ -4999,6 +5150,48 @@ static const struct mlx5_ib_profile pf_profile = { NULL), }; +static const struct mlx5_ib_profile nic_rep_profile = { + STAGE_CREATE(MLX5_IB_STAGE_INIT, + mlx5_ib_stage_init_init, + mlx5_ib_stage_init_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB, + mlx5_ib_stage_flow_db_init, + mlx5_ib_stage_flow_db_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_CAPS, + mlx5_ib_stage_caps_init, + NULL), + STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB, + mlx5_ib_stage_rep_non_default_cb, + NULL), + STAGE_CREATE(MLX5_IB_STAGE_ROCE, + mlx5_ib_stage_rep_roce_init, + mlx5_ib_stage_rep_roce_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, + mlx5_ib_stage_dev_res_init, + mlx5_ib_stage_dev_res_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, + mlx5_ib_stage_counters_init, + mlx5_ib_stage_counters_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_UAR, + mlx5_ib_stage_uar_init, + mlx5_ib_stage_uar_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_BFREG, + mlx5_ib_stage_bfrag_init, + mlx5_ib_stage_bfrag_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_IB_REG, + mlx5_ib_stage_ib_reg_init, + mlx5_ib_stage_ib_reg_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_UMR_RESOURCES, + mlx5_ib_stage_umr_res_init, + mlx5_ib_stage_umr_res_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_CLASS_ATTR, + mlx5_ib_stage_class_attr_init, + NULL), + STAGE_CREATE(MLX5_IB_STAGE_REP_REG, + mlx5_ib_stage_rep_reg_init, + mlx5_ib_stage_rep_reg_cleanup), +}; + static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num) { struct mlx5_ib_multiport_info *mpi; @@ -5044,8 +5237,11 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num) static void *mlx5_ib_add(struct mlx5_core_dev *mdev) { enum rdma_link_layer ll; + struct mlx5_ib_dev *dev; int port_type_cap; + printk_once(KERN_INFO "%s", mlx5_version); + port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); @@ -5055,7 +5251,22 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) return mlx5_ib_add_slave_port(mdev, port_num); } - return __mlx5_ib_add(mdev, &pf_profile); + dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); + if (!dev) + return NULL; + + dev->mdev = mdev; + dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports), + MLX5_CAP_GEN(mdev, num_vhca_ports)); + + if (MLX5_VPORT_MANAGER(mdev) && + mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) { + dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0); + + return __mlx5_ib_add(dev, &nic_rep_profile); + } + + return __mlx5_ib_add(dev, &pf_profile); } static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index eafb9751daf6..e0bad28e0f09 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -343,6 +343,7 @@ struct mlx5_ib_sq { struct mlx5_ib_wq *sq; struct mlx5_ib_ubuffer ubuffer; struct mlx5_db *doorbell; + struct mlx5_flow_handle *flow_rule; u32 tisn; u8 state; }; @@ -731,7 +732,9 @@ struct mlx5_ib_delay_drop { enum mlx5_ib_stages { MLX5_IB_STAGE_INIT, + MLX5_IB_STAGE_FLOW_DB, MLX5_IB_STAGE_CAPS, + MLX5_IB_STAGE_NON_DEFAULT_CB, MLX5_IB_STAGE_ROCE, MLX5_IB_STAGE_DEVICE_RESOURCES, MLX5_IB_STAGE_ODP, @@ -743,6 +746,7 @@ enum mlx5_ib_stages { MLX5_IB_STAGE_UMR_RESOURCES, MLX5_IB_STAGE_DELAY_DROP, MLX5_IB_STAGE_CLASS_ATTR, + MLX5_IB_STAGE_REP_REG, MLX5_IB_STAGE_MAX, }; @@ -797,7 +801,7 @@ struct mlx5_ib_dev { struct srcu_struct mr_srcu; u32 null_mkey; #endif - struct mlx5_ib_flow_db flow_db; + struct mlx5_ib_flow_db *flow_db; /* protect resources needed as part of reset flow */ spinlock_t reset_flow_resource_lock; struct list_head qp_list; @@ -807,6 +811,7 @@ struct mlx5_ib_dev { struct mlx5_sq_bfreg fp_bfreg; struct mlx5_ib_delay_drop delay_drop; const struct mlx5_ib_profile *profile; + struct mlx5_eswitch_rep *rep; /* protect the user_td */ struct mutex lb_mutex; @@ -1049,6 +1054,31 @@ static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ +/* Needed for rep profile */ +int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev); +void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev); +int mlx5_ib_stage_rep_flow_db_init(struct mlx5_ib_dev *dev); +int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev); +int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev); +int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev); +void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev); +int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev); +void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev); +int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev); +void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev); +int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev); +void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev); +int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev); +void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev); +int mlx5_ib_stage_umr_res_init(struct mlx5_ib_dev *dev); +void mlx5_ib_stage_umr_res_cleanup(struct mlx5_ib_dev *dev); +int mlx5_ib_stage_class_attr_init(struct mlx5_ib_dev *dev); +void __mlx5_ib_remove(struct mlx5_ib_dev *dev, + const struct mlx5_ib_profile *profile, + int stage); +void *__mlx5_ib_add(struct mlx5_ib_dev *dev, + const struct mlx5_ib_profile *profile); + int mlx5_ib_get_vf_config(struct ib_device *device, int vf, u8 port, struct ifla_vf_info *info); int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 556e015678de..a5fad3e87ff7 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -587,7 +587,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) { - if (!mlx5_debugfs_root) + if (!mlx5_debugfs_root || dev->rep) return; debugfs_remove_recursive(dev->cache.root); @@ -600,7 +600,7 @@ static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) struct mlx5_cache_ent *ent; int i; - if (!mlx5_debugfs_root) + if (!mlx5_debugfs_root || dev->rep) return 0; cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root); @@ -690,6 +690,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) MLX5_IB_UMR_OCTOWORD; ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) && + !dev->rep && mlx5_core_is_pf(dev->mdev)) ent->limit = dev->mdev->profile->mr_cache[i].limit; else diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 39d24bf694a8..5663530ea5fd 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -36,6 +36,7 @@ #include <rdma/ib_user_verbs.h> #include <linux/mlx5/fs.h> #include "mlx5_ib.h" +#include "ib_rep.h" /* not supported currently */ static int wq_signature; @@ -1082,6 +1083,13 @@ static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev, mlx5_core_destroy_tis(dev->mdev, sq->tisn); } +static void destroy_flow_rule_vport_sq(struct mlx5_ib_dev *dev, + struct mlx5_ib_sq *sq) +{ + if (sq->flow_rule) + mlx5_del_flow_rules(sq->flow_rule); +} + static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, struct mlx5_ib_sq *sq, void *qpin, struct ib_pd *pd) @@ -1145,8 +1153,15 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, if (err) goto err_umem; + err = create_flow_rule_vport_sq(dev, sq); + if (err) + goto err_flow; + return 0; +err_flow: + mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp); + err_umem: ib_umem_release(sq->ubuffer.umem); sq->ubuffer.umem = NULL; @@ -1157,6 +1172,7 @@ err_umem: static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev, struct mlx5_ib_sq *sq) { + destroy_flow_rule_vport_sq(dev, sq); mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp); ib_umem_release(sq->ubuffer.umem); } @@ -1263,6 +1279,10 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev, if (tunnel_offload_en) MLX5_SET(tirc, tirc, tunneled_offload_en, 1); + if (dev->rep) + MLX5_SET(tirc, tirc, self_lb_block, + MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_); + err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn); kvfree(in); @@ -1554,6 +1574,10 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields); create_tir: + if (dev->rep) + MLX5_SET(tirc, tirc, self_lb_block, + MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_); + err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn); if (err) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index d88b78a17440..08b85215c2be 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -149,6 +149,7 @@ config MACVTAP config IPVLAN tristate "IP-VLAN support" depends on INET + depends on IPV6 || !IPV6 depends on NETFILTER select NET_L3_MASTER_DEV ---help--- diff --git a/drivers/net/Space.c b/drivers/net/Space.c index 64333ec999ac..3afda6561434 100644 --- a/drivers/net/Space.c +++ b/drivers/net/Space.c @@ -114,9 +114,6 @@ static struct devprobe2 m68k_probes[] __initdata = { #ifdef CONFIG_MVME147_NET /* MVME147 internal Ethernet */ {mvme147lance_probe, 0}, #endif -#ifdef CONFIG_MAC89x0 - {mac89x0_probe, 0}, -#endif {NULL, 0}, }; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index c669554d70bb..4c19d23dd282 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4791,6 +4791,7 @@ static struct pernet_operations bond_net_ops = { .exit = bond_net_exit, .id = &bond_net_id, .size = sizeof(struct bond_net), + .async = true, }; static int __init bonding_init(void) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index e1b5c5c66fce..24486f96dd39 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -253,9 +253,8 @@ static void mv88e6xxx_g1_irq_unmask(struct irq_data *d) chip->g1_irq.masked &= ~(1 << n); } -static irqreturn_t mv88e6xxx_g1_irq_thread_fn(int irq, void *dev_id) +static irqreturn_t mv88e6xxx_g1_irq_thread_work(struct mv88e6xxx_chip *chip) { - struct mv88e6xxx_chip *chip = dev_id; unsigned int nhandled = 0; unsigned int sub_irq; unsigned int n; @@ -280,6 +279,13 @@ out: return (nhandled > 0 ? IRQ_HANDLED : IRQ_NONE); } +static irqreturn_t mv88e6xxx_g1_irq_thread_fn(int irq, void *dev_id) +{ + struct mv88e6xxx_chip *chip = dev_id; + + return mv88e6xxx_g1_irq_thread_work(chip); +} + static void mv88e6xxx_g1_irq_bus_lock(struct irq_data *d) { struct mv88e6xxx_chip *chip = irq_data_get_irq_chip_data(d); @@ -335,7 +341,7 @@ static const struct irq_domain_ops mv88e6xxx_g1_irq_domain_ops = { .xlate = irq_domain_xlate_twocell, }; -static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip) +static void mv88e6xxx_g1_irq_free_common(struct mv88e6xxx_chip *chip) { int irq, virq; u16 mask; @@ -344,8 +350,6 @@ static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip) mask &= ~GENMASK(chip->g1_irq.nirqs, 0); mv88e6xxx_g1_write(chip, MV88E6XXX_G1_CTL1, mask); - free_irq(chip->irq, chip); - for (irq = 0; irq < chip->g1_irq.nirqs; irq++) { virq = irq_find_mapping(chip->g1_irq.domain, irq); irq_dispose_mapping(virq); @@ -354,7 +358,14 @@ static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip) irq_domain_remove(chip->g1_irq.domain); } -static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip) +static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip) +{ + mv88e6xxx_g1_irq_free(chip); + + free_irq(chip->irq, chip); +} + +static int mv88e6xxx_g1_irq_setup_common(struct mv88e6xxx_chip *chip) { int err, irq, virq; u16 reg, mask; @@ -387,13 +398,6 @@ static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip) if (err) goto out_disable; - err = request_threaded_irq(chip->irq, NULL, - mv88e6xxx_g1_irq_thread_fn, - IRQF_ONESHOT | IRQF_TRIGGER_FALLING, - dev_name(chip->dev), chip); - if (err) - goto out_disable; - return 0; out_disable: @@ -411,6 +415,62 @@ out_mapping: return err; } +static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip) +{ + int err; + + err = mv88e6xxx_g1_irq_setup_common(chip); + if (err) + return err; + + err = request_threaded_irq(chip->irq, NULL, + mv88e6xxx_g1_irq_thread_fn, + IRQF_ONESHOT | IRQF_TRIGGER_FALLING, + dev_name(chip->dev), chip); + if (err) + mv88e6xxx_g1_irq_free_common(chip); + + return err; +} + +static void mv88e6xxx_irq_poll(struct kthread_work *work) +{ + struct mv88e6xxx_chip *chip = container_of(work, + struct mv88e6xxx_chip, + irq_poll_work.work); + mv88e6xxx_g1_irq_thread_work(chip); + + kthread_queue_delayed_work(chip->kworker, &chip->irq_poll_work, + msecs_to_jiffies(100)); +} + +static int mv88e6xxx_irq_poll_setup(struct mv88e6xxx_chip *chip) +{ + int err; + + err = mv88e6xxx_g1_irq_setup_common(chip); + if (err) + return err; + + kthread_init_delayed_work(&chip->irq_poll_work, + mv88e6xxx_irq_poll); + + chip->kworker = kthread_create_worker(0, dev_name(chip->dev)); + if (IS_ERR(chip->kworker)) + return PTR_ERR(chip->kworker); + + kthread_queue_delayed_work(chip->kworker, &chip->irq_poll_work, + msecs_to_jiffies(100)); + + return 0; +} + +static void mv88e6xxx_irq_poll_free(struct mv88e6xxx_chip *chip) +{ + kthread_cancel_delayed_work_sync(&chip->irq_poll_work); + kthread_destroy_worker(chip->kworker); +} + int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask) { int i; @@ -4034,33 +4094,34 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev) goto out; } - if (chip->irq > 0) { - /* Has to be performed before the MDIO bus is created, - * because the PHYs will link there interrupts to these - * interrupt controllers - */ - mutex_lock(&chip->reg_lock); + /* Has to be performed before the MDIO bus is created, because + * the PHYs will link there interrupts to these interrupt + * controllers + */ + mutex_lock(&chip->reg_lock); + if (chip->irq > 0) err = mv88e6xxx_g1_irq_setup(chip); - mutex_unlock(&chip->reg_lock); - - if (err) - goto out; - - if (chip->info->g2_irqs > 0) { - err = mv88e6xxx_g2_irq_setup(chip); - if (err) - goto out_g1_irq; - } + else + err = mv88e6xxx_irq_poll_setup(chip); + mutex_unlock(&chip->reg_lock); - err = mv88e6xxx_g1_atu_prob_irq_setup(chip); - if (err) - goto out_g2_irq; + if (err) + goto out; - err = mv88e6xxx_g1_vtu_prob_irq_setup(chip); + if (chip->info->g2_irqs > 0) { + err = mv88e6xxx_g2_irq_setup(chip); if (err) - goto out_g1_atu_prob_irq; + goto out_g1_irq; } + err = mv88e6xxx_g1_atu_prob_irq_setup(chip); + if (err) + goto out_g2_irq; + + err = mv88e6xxx_g1_vtu_prob_irq_setup(chip); + if (err) + goto out_g1_atu_prob_irq; + err = mv88e6xxx_mdios_register(chip, np); if (err) goto out_g1_vtu_prob_irq; @@ -4074,20 +4135,19 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev) out_mdio: mv88e6xxx_mdios_unregister(chip); out_g1_vtu_prob_irq: - if (chip->irq > 0) - mv88e6xxx_g1_vtu_prob_irq_free(chip); + mv88e6xxx_g1_vtu_prob_irq_free(chip); out_g1_atu_prob_irq: - if (chip->irq > 0) - mv88e6xxx_g1_atu_prob_irq_free(chip); + mv88e6xxx_g1_atu_prob_irq_free(chip); out_g2_irq: - if (chip->info->g2_irqs > 0 && chip->irq > 0) + if (chip->info->g2_irqs > 0) mv88e6xxx_g2_irq_free(chip); out_g1_irq: - if (chip->irq > 0) { - mutex_lock(&chip->reg_lock); + mutex_lock(&chip->reg_lock); + if (chip->irq > 0) mv88e6xxx_g1_irq_free(chip); - mutex_unlock(&chip->reg_lock); - } + else + mv88e6xxx_irq_poll_free(chip); + mutex_unlock(&chip->reg_lock); out: return err; } diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index 97d7915f32c7..d6a1391dc268 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -15,6 +15,7 @@ #include <linux/if_vlan.h> #include <linux/irq.h> #include <linux/gpio/consumer.h> +#include <linux/kthread.h> #include <linux/phy.h> #include <linux/ptp_clock_kernel.h> #include <linux/timecounter.h> @@ -245,6 +246,8 @@ struct mv88e6xxx_chip { int watchdog_irq; int atu_prob_irq; int vtu_prob_irq; + struct kthread_worker *kworker; + struct kthread_delayed_work irq_poll_work; /* GPIO resources */ u8 gpio_data[2]; diff --git a/drivers/net/ethernet/apple/macmace.c b/drivers/net/ethernet/apple/macmace.c index f17a160dbff2..137cbb470af2 100644 --- a/drivers/net/ethernet/apple/macmace.c +++ b/drivers/net/ethernet/apple/macmace.c @@ -247,8 +247,8 @@ static int mace_probe(struct platform_device *pdev) dev->netdev_ops = &mace_netdev_ops; dev->watchdog_timeo = TX_TIMEOUT; - printk(KERN_INFO "%s: 68K MACE, hardware address %pM\n", - dev->name, dev->dev_addr); + pr_info("Onboard MACE, hardware address %pM, chip revision 0x%04X\n", + dev->dev_addr, mp->chipid); err = register_netdev(dev); if (!err) @@ -589,7 +589,6 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id) else if (fs & (UFLO|LCOL|RTRY)) { ++dev->stats.tx_aborted_errors; if (mb->xmtfs & UFLO) { - printk(KERN_ERR "%s: DMA underrun.\n", dev->name); dev->stats.tx_fifo_errors++; mace_txdma_reset(dev); } @@ -644,10 +643,8 @@ static void mace_dma_rx_frame(struct net_device *dev, struct mace_frame *mf) if (frame_status & (RS_OFLO | RS_CLSN | RS_FRAMERR | RS_FCSERR)) { dev->stats.rx_errors++; - if (frame_status & RS_OFLO) { - printk(KERN_DEBUG "%s: fifo overflow.\n", dev->name); + if (frame_status & RS_OFLO) dev->stats.rx_fifo_errors++; - } if (frame_status & RS_CLSN) dev->stats.collisions++; if (frame_status & RS_FRAMERR) @@ -770,18 +767,4 @@ static struct platform_driver mac_mace_driver = { }, }; -static int __init mac_mace_init_module(void) -{ - if (!MACH_IS_MAC) - return -ENODEV; - - return platform_driver_register(&mac_mace_driver); -} - -static void __exit mac_mace_cleanup_module(void) -{ - platform_driver_unregister(&mac_mace_driver); -} - -module_init(mac_mace_init_module); -module_exit(mac_mace_cleanup_module); +module_platform_driver(mac_mace_driver); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 3177b0c9bd2d..db92f1858060 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -1335,12 +1335,6 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, return ret; } - /* Clear out any old resources being used by the filter before - * we start constructing the new filter. - */ - if (f->valid) - clear_filter(adapter, f); - if (is_t6(adapter->params.chip) && fs->type && ipv6_addr_type((const struct in6_addr *)fs->val.lip) != IPV6_ADDR_ANY) { diff --git a/drivers/net/ethernet/cirrus/mac89x0.c b/drivers/net/ethernet/cirrus/mac89x0.c index 977d4c2c759d..3f8fe8fd79cc 100644 --- a/drivers/net/ethernet/cirrus/mac89x0.c +++ b/drivers/net/ethernet/cirrus/mac89x0.c @@ -56,21 +56,11 @@ local_irq_{dis,en}able() */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + static const char version[] = "cs89x0.c:v1.02 11/26/96 Russell Nelson <nelson@crynwr.com>\n"; -/* ======================= configure the driver here ======================= */ - -/* use 0 for production, 1 for verification, >2 for debug */ -#ifndef NET_DEBUG -#define NET_DEBUG 0 -#endif - -/* ======================= end of configuration ======================= */ - - -/* Always include 'config.h' first in case the user wants to turn on - or override something. */ #include <linux/module.h> /* @@ -93,6 +83,7 @@ static const char version[] = #include <linux/errno.h> #include <linux/init.h> #include <linux/netdevice.h> +#include <linux/platform_device.h> #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <linux/delay.h> @@ -105,24 +96,22 @@ static const char version[] = #include "cs89x0.h" -static unsigned int net_debug = NET_DEBUG; +static int debug = -1; +module_param(debug, int, 0); +MODULE_PARM_DESC(debug, "debug message level"); /* Information that need to be kept for each board. */ struct net_local { + int msg_enable; int chip_type; /* one of: CS8900, CS8920, CS8920M */ char chip_revision; /* revision letter of the chip ('A'...) */ int send_cmd; /* the propercommand used to send a packet. */ int rx_mode; int curr_rx_cfg; int send_underrun; /* keep track of how many underruns in a row we get */ - struct sk_buff *skb; }; /* Index to functions, as function prototypes. */ - -#if 0 -extern void reset_chip(struct net_device *dev); -#endif static int net_open(struct net_device *dev); static int net_send_packet(struct sk_buff *skb, struct net_device *dev); static irqreturn_t net_interrupt(int irq, void *dev_id); @@ -132,10 +121,6 @@ static int net_close(struct net_device *dev); static struct net_device_stats *net_get_stats(struct net_device *dev); static int set_mac_address(struct net_device *dev, void *addr); - -/* Example routines you must write ;->. */ -#define tx_done(dev) 1 - /* For reading/writing registers ISA-style */ static inline int readreg_io(struct net_device *dev, int portno) @@ -176,12 +161,10 @@ static const struct net_device_ops mac89x0_netdev_ops = { /* Probe for the CS8900 card in slot E. We won't bother looking anywhere else until we have a really good reason to do so. */ -struct net_device * __init mac89x0_probe(int unit) +static int mac89x0_device_probe(struct platform_device *pdev) { struct net_device *dev; - static int once_is_enough; struct net_local *lp; - static unsigned version_printed; int i, slot; unsigned rev_type = 0; unsigned long ioaddr; @@ -189,21 +172,9 @@ struct net_device * __init mac89x0_probe(int unit) int err = -ENODEV; struct nubus_rsrc *fres; - if (!MACH_IS_MAC) - return ERR_PTR(-ENODEV); - dev = alloc_etherdev(sizeof(struct net_local)); if (!dev) - return ERR_PTR(-ENOMEM); - - if (unit >= 0) { - sprintf(dev->name, "eth%d", unit); - netdev_boot_setup_check(dev); - } - - if (once_is_enough) - goto out; - once_is_enough = 1; + return -ENOMEM; /* We might have to parameterize this later */ slot = 0xE; @@ -230,9 +201,13 @@ struct net_device * __init mac89x0_probe(int unit) if (sig != swab16(CHIP_EISA_ID_SIG)) goto out; + SET_NETDEV_DEV(dev, &pdev->dev); + /* Initialize the net_device structure. */ lp = netdev_priv(dev); + lp->msg_enable = netif_msg_init(debug, 0); + /* Fill in the 'dev' fields. */ dev->base_addr = ioaddr; dev->mem_start = (unsigned long) @@ -255,19 +230,16 @@ struct net_device * __init mac89x0_probe(int unit) if (lp->chip_type != CS8900 && lp->chip_revision >= 'C') lp->send_cmd = TX_NOW; - if (net_debug && version_printed++ == 0) - printk(version); + netif_dbg(lp, drv, dev, "%s", version); - printk(KERN_INFO "%s: cs89%c0%s rev %c found at %#8lx", - dev->name, - lp->chip_type==CS8900?'0':'2', - lp->chip_type==CS8920M?"M":"", - lp->chip_revision, - dev->base_addr); + pr_info("cs89%c0%s rev %c found at %#8lx\n", + lp->chip_type == CS8900 ? '0' : '2', + lp->chip_type == CS8920M ? "M" : "", + lp->chip_revision, dev->base_addr); /* Try to read the MAC address */ if ((readreg(dev, PP_SelfST) & (EEPROM_PRESENT | EEPROM_OK)) == 0) { - printk("\nmac89x0: No EEPROM, giving up now.\n"); + pr_info("No EEPROM, giving up now.\n"); goto out1; } else { for (i = 0; i < ETH_ALEN; i += 2) { @@ -282,39 +254,23 @@ struct net_device * __init mac89x0_probe(int unit) /* print the IRQ and ethernet address. */ - printk(" IRQ %d ADDR %pM\n", dev->irq, dev->dev_addr); + pr_info("MAC %pM, IRQ %d\n", dev->dev_addr, dev->irq); dev->netdev_ops = &mac89x0_netdev_ops; err = register_netdev(dev); if (err) goto out1; - return NULL; + + platform_set_drvdata(pdev, dev); + return 0; out1: nubus_writew(0, dev->base_addr + ADD_PORT); out: free_netdev(dev); - return ERR_PTR(err); + return err; } -#if 0 -/* This is useful for something, but I don't know what yet. */ -void __init reset_chip(struct net_device *dev) -{ - int reset_start_time; - - writereg(dev, PP_SelfCTL, readreg(dev, PP_SelfCTL) | POWER_ON_RESET); - - /* wait 30 ms */ - msleep_interruptible(30); - - /* Wait until the chip is reset */ - reset_start_time = jiffies; - while( (readreg(dev, PP_SelfST) & INIT_DONE) == 0 && jiffies - reset_start_time < 2) - ; -} -#endif - /* Open/initialize the board. This is called (in the current kernel) sometime after booting when the 'ifconfig' program is run. @@ -374,11 +330,9 @@ net_send_packet(struct sk_buff *skb, struct net_device *dev) struct net_local *lp = netdev_priv(dev); unsigned long flags; - if (net_debug > 3) - printk("%s: sent %d byte packet of type %x\n", - dev->name, skb->len, - (skb->data[ETH_ALEN+ETH_ALEN] << 8) - | skb->data[ETH_ALEN+ETH_ALEN+1]); + netif_dbg(lp, tx_queued, dev, "sent %d byte packet of type %x\n", + skb->len, skb->data[ETH_ALEN + ETH_ALEN] << 8 | + skb->data[ETH_ALEN + ETH_ALEN + 1]); /* keep the upload from being interrupted, since we ask the chip to start transmitting before the @@ -416,11 +370,6 @@ static irqreturn_t net_interrupt(int irq, void *dev_id) struct net_local *lp; int ioaddr, status; - if (dev == NULL) { - printk ("net_interrupt(): irq %d for unknown device.\n", irq); - return IRQ_NONE; - } - ioaddr = dev->base_addr; lp = netdev_priv(dev); @@ -432,7 +381,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id) faster than you can read them off, you're screwed. Hasta la vista, baby! */ while ((status = swab16(nubus_readw(dev->base_addr + ISQ_PORT)))) { - if (net_debug > 4)printk("%s: event=%04x\n", dev->name, status); + netif_dbg(lp, intr, dev, "status=%04x\n", status); switch(status & ISQ_EVENT_MASK) { case ISQ_RECEIVER_EVENT: /* Got a packet(s). */ @@ -462,7 +411,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id) netif_wake_queue(dev); } if (status & TX_UNDERRUN) { - if (net_debug > 0) printk("%s: transmit underrun\n", dev->name); + netif_dbg(lp, tx_err, dev, "transmit underrun\n"); lp->send_underrun++; if (lp->send_underrun == 3) lp->send_cmd = TX_AFTER_381; else if (lp->send_underrun == 6) lp->send_cmd = TX_AFTER_ALL; @@ -483,6 +432,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id) static void net_rx(struct net_device *dev) { + struct net_local *lp = netdev_priv(dev); struct sk_buff *skb; int status, length; @@ -506,7 +456,6 @@ net_rx(struct net_device *dev) /* Malloc up new buffer. */ skb = alloc_skb(length, GFP_ATOMIC); if (skb == NULL) { - printk("%s: Memory squeeze, dropping packet.\n", dev->name); dev->stats.rx_dropped++; return; } @@ -515,10 +464,9 @@ net_rx(struct net_device *dev) skb_copy_to_linear_data(skb, (void *)(dev->mem_start + PP_RxFrame), length); - if (net_debug > 3)printk("%s: received %d byte packet of type %x\n", - dev->name, length, - (skb->data[ETH_ALEN+ETH_ALEN] << 8) - | skb->data[ETH_ALEN+ETH_ALEN+1]); + netif_dbg(lp, rx_status, dev, "received %d byte packet of type %x\n", + length, skb->data[ETH_ALEN + ETH_ALEN] << 8 | + skb->data[ETH_ALEN + ETH_ALEN + 1]); skb->protocol=eth_type_trans(skb,dev); netif_rx(skb); @@ -594,7 +542,7 @@ static int set_mac_address(struct net_device *dev, void *addr) return -EADDRNOTAVAIL; memcpy(dev->dev_addr, saddr->sa_data, ETH_ALEN); - printk("%s: Setting MAC address to %pM\n", dev->name, dev->dev_addr); + netdev_info(dev, "Setting MAC address to %pM\n", dev->dev_addr); /* set the Ethernet address */ for (i=0; i < ETH_ALEN/2; i++) @@ -603,32 +551,24 @@ static int set_mac_address(struct net_device *dev, void *addr) return 0; } -#ifdef MODULE - -static struct net_device *dev_cs89x0; -static int debug; - -module_param(debug, int, 0); -MODULE_PARM_DESC(debug, "CS89[02]0 debug level (0-5)"); MODULE_LICENSE("GPL"); -int __init -init_module(void) +static int mac89x0_device_remove(struct platform_device *pdev) { - net_debug = debug; - dev_cs89x0 = mac89x0_probe(-1); - if (IS_ERR(dev_cs89x0)) { - printk(KERN_WARNING "mac89x0.c: No card found\n"); - return PTR_ERR(dev_cs89x0); - } + struct net_device *dev = platform_get_drvdata(pdev); + + unregister_netdev(dev); + nubus_writew(0, dev->base_addr + ADD_PORT); + free_netdev(dev); return 0; } -void -cleanup_module(void) -{ - unregister_netdev(dev_cs89x0); - nubus_writew(0, dev_cs89x0->base_addr + ADD_PORT); - free_netdev(dev_cs89x0); -} -#endif /* MODULE */ +static struct platform_driver mac89x0_platform_driver = { + .probe = mac89x0_device_probe, + .remove = mac89x0_device_remove, + .driver = { + .name = "mac89x0", + }, +}; + +module_platform_driver(mac89x0_platform_driver); diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 1a49297224ed..ff92ab1daeb8 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -19,7 +19,7 @@ #include "be.h" #include "be_cmds.h" -char *be_misconfig_evt_port_state[] = { +const char * const be_misconfig_evt_port_state[] = { "Physical Link is functional", "Optics faulted/incorrectly installed/not installed - Reseat optics. If issue not resolved, replace.", "Optics of two types installed – Remove one optic or install matching pair of optics.", diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index 09da2d82c2f0..e8b43cf44b6f 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -201,7 +201,7 @@ enum { phy_state == BE_PHY_UNQUALIFIED || \ phy_state == BE_PHY_UNCERTIFIED) -extern char *be_misconfig_evt_port_state[]; +extern const char * const be_misconfig_evt_port_state[]; /* async event indicating misconfigured port */ struct be_async_event_misconfig_port { diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index a998c36c5e61..159dc2df878d 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -454,6 +454,16 @@ static void dpaa_set_rx_mode(struct net_device *net_dev) err); } + if (!!(net_dev->flags & IFF_ALLMULTI) != priv->mac_dev->allmulti) { + priv->mac_dev->allmulti = !priv->mac_dev->allmulti; + err = priv->mac_dev->set_allmulti(priv->mac_dev->fman_mac, + priv->mac_dev->allmulti); + if (err < 0) + netif_err(priv, drv, net_dev, + "mac_dev->set_allmulti() = %d\n", + err); + } + err = priv->mac_dev->set_multi(net_dev, priv->mac_dev); if (err < 0) netif_err(priv, drv, net_dev, "mac_dev->set_multi() = %d\n", @@ -1916,8 +1926,10 @@ static int skb_to_sg_fd(struct dpaa_priv *priv, goto csum_failed; } + /* SGT[0] is used by the linear part */ sgt = (struct qm_sg_entry *)(sgt_buf + priv->tx_headroom); - qm_sg_entry_set_len(&sgt[0], skb_headlen(skb)); + frag_len = skb_headlen(skb); + qm_sg_entry_set_len(&sgt[0], frag_len); sgt[0].bpid = FSL_DPAA_BPID_INV; sgt[0].offset = 0; addr = dma_map_single(dev, skb->data, @@ -1930,9 +1942,9 @@ static int skb_to_sg_fd(struct dpaa_priv *priv, qm_sg_entry_set64(&sgt[0], addr); /* populate the rest of SGT entries */ - frag = &skb_shinfo(skb)->frags[0]; - frag_len = frag->size; - for (i = 1; i <= nr_frags; i++, frag++) { + for (i = 0; i < nr_frags; i++) { + frag = &skb_shinfo(skb)->frags[i]; + frag_len = frag->size; WARN_ON(!skb_frag_page(frag)); addr = skb_frag_dma_map(dev, frag, 0, frag_len, dma_dir); @@ -1942,15 +1954,16 @@ static int skb_to_sg_fd(struct dpaa_priv *priv, goto sg_map_failed; } - qm_sg_entry_set_len(&sgt[i], frag_len); - sgt[i].bpid = FSL_DPAA_BPID_INV; - sgt[i].offset = 0; + qm_sg_entry_set_len(&sgt[i + 1], frag_len); + sgt[i + 1].bpid = FSL_DPAA_BPID_INV; + sgt[i + 1].offset = 0; /* keep the offset in the address */ - qm_sg_entry_set64(&sgt[i], addr); - frag_len = frag->size; + qm_sg_entry_set64(&sgt[i + 1], addr); } - qm_sg_entry_set_f(&sgt[i - 1], frag_len); + + /* Set the final bit in the last used entry of the SGT */ + qm_sg_entry_set_f(&sgt[nr_frags], frag_len); qm_fd_set_sg(fd, priv->tx_headroom, skb->len); @@ -2052,19 +2065,23 @@ static int dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev) /* MAX_SKB_FRAGS is equal or larger than our dpaa_SGT_MAX_ENTRIES; * make sure we don't feed FMan with more fragments than it supports. */ - if (nonlinear && - likely(skb_shinfo(skb)->nr_frags < DPAA_SGT_MAX_ENTRIES)) { - /* Just create a S/G fd based on the skb */ - err = skb_to_sg_fd(priv, skb, &fd); - percpu_priv->tx_frag_skbuffs++; - } else { + if (unlikely(nonlinear && + (skb_shinfo(skb)->nr_frags >= DPAA_SGT_MAX_ENTRIES))) { /* If the egress skb contains more fragments than we support * we have no choice but to linearize it ourselves. */ - if (unlikely(nonlinear) && __skb_linearize(skb)) + if (__skb_linearize(skb)) goto enomem; - /* Finally, create a contig FD from this skb */ + nonlinear = skb_is_nonlinear(skb); + } + + if (nonlinear) { + /* Just create a S/G fd based on the skb */ + err = skb_to_sg_fd(priv, skb, &fd); + percpu_priv->tx_frag_skbuffs++; + } else { + /* Create a contig FD from this skb */ err = skb_to_contig_fd(priv, skb, &fd, &offset); } if (unlikely(err < 0)) @@ -2201,14 +2218,8 @@ static enum qman_cb_dqrr_result rx_error_dqrr(struct qman_portal *portal, if (dpaa_eth_napi_schedule(percpu_priv, portal)) return qman_cb_dqrr_stop; - if (dpaa_eth_refill_bpools(priv)) - /* Unable to refill the buffer pool due to insufficient - * system memory. Just release the frame back into the pool, - * otherwise we'll soon end up with an empty buffer pool. - */ - dpaa_fd_release(net_dev, &dq->fd); - else - dpaa_rx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid); + dpaa_eth_refill_bpools(priv); + dpaa_rx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid); return qman_cb_dqrr_consume; } diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.c b/drivers/net/ethernet/freescale/fman/fman_dtsec.c index ea43b4974149..9a581faaa742 100644 --- a/drivers/net/ethernet/freescale/fman/fman_dtsec.c +++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.c @@ -1117,6 +1117,25 @@ int dtsec_add_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr) return 0; } +int dtsec_set_allmulti(struct fman_mac *dtsec, bool enable) +{ + u32 tmp; + struct dtsec_regs __iomem *regs = dtsec->regs; + + if (!is_init_done(dtsec->dtsec_drv_param)) + return -EINVAL; + + tmp = ioread32be(®s->rctrl); + if (enable) + tmp |= RCTRL_MPROM; + else + tmp &= ~RCTRL_MPROM; + + iowrite32be(tmp, ®s->rctrl); + + return 0; +} + int dtsec_del_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr) { struct dtsec_regs __iomem *regs = dtsec->regs; diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.h b/drivers/net/ethernet/freescale/fman/fman_dtsec.h index c4467c072058..1a689adf5a22 100644 --- a/drivers/net/ethernet/freescale/fman/fman_dtsec.h +++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.h @@ -55,5 +55,6 @@ int dtsec_set_exception(struct fman_mac *dtsec, int dtsec_add_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr); int dtsec_del_hash_mac_address(struct fman_mac *dtsec, enet_addr_t *eth_addr); int dtsec_get_version(struct fman_mac *dtsec, u32 *mac_version); +int dtsec_set_allmulti(struct fman_mac *dtsec, bool enable); #endif /* __DTSEC_H */ diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c index c0296880feba..446a97b792e3 100644 --- a/drivers/net/ethernet/freescale/fman/fman_memac.c +++ b/drivers/net/ethernet/freescale/fman/fman_memac.c @@ -350,6 +350,7 @@ struct fman_mac { struct fman_rev_info fm_rev_info; bool basex_if; struct phy_device *pcsphy; + bool allmulti_enabled; }; static void add_addr_in_paddr(struct memac_regs __iomem *regs, u8 *adr, @@ -940,6 +941,29 @@ int memac_add_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr) return 0; } +int memac_set_allmulti(struct fman_mac *memac, bool enable) +{ + u32 entry; + struct memac_regs __iomem *regs = memac->regs; + + if (!is_init_done(memac->memac_drv_param)) + return -EINVAL; + + if (enable) { + for (entry = 0; entry < HASH_TABLE_SIZE; entry++) + iowrite32be(entry | HASH_CTRL_MCAST_EN, + ®s->hashtable_ctrl); + } else { + for (entry = 0; entry < HASH_TABLE_SIZE; entry++) + iowrite32be(entry & ~HASH_CTRL_MCAST_EN, + ®s->hashtable_ctrl); + } + + memac->allmulti_enabled = enable; + + return 0; +} + int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr) { struct memac_regs __iomem *regs = memac->regs; @@ -963,8 +987,12 @@ int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr) break; } } - if (list_empty(&memac->multicast_addr_hash->lsts[hash])) - iowrite32be(hash & ~HASH_CTRL_MCAST_EN, ®s->hashtable_ctrl); + + if (!memac->allmulti_enabled) { + if (list_empty(&memac->multicast_addr_hash->lsts[hash])) + iowrite32be(hash & ~HASH_CTRL_MCAST_EN, + ®s->hashtable_ctrl); + } return 0; } diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.h b/drivers/net/ethernet/freescale/fman/fman_memac.h index c4a66469a907..b5a50338ed9a 100644 --- a/drivers/net/ethernet/freescale/fman/fman_memac.h +++ b/drivers/net/ethernet/freescale/fman/fman_memac.h @@ -57,5 +57,6 @@ int memac_set_exception(struct fman_mac *memac, enum fman_mac_exceptions exception, bool enable); int memac_add_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr); int memac_del_hash_mac_address(struct fman_mac *memac, enet_addr_t *eth_addr); +int memac_set_allmulti(struct fman_mac *memac, bool enable); #endif /* __MEMAC_H */ diff --git a/drivers/net/ethernet/freescale/fman/fman_tgec.c b/drivers/net/ethernet/freescale/fman/fman_tgec.c index 4b0f3a50b293..284735d4ebe9 100644 --- a/drivers/net/ethernet/freescale/fman/fman_tgec.c +++ b/drivers/net/ethernet/freescale/fman/fman_tgec.c @@ -217,6 +217,7 @@ struct fman_mac { struct tgec_cfg *cfg; void *fm; struct fman_rev_info fm_rev_info; + bool allmulti_enabled; }; static void set_mac_address(struct tgec_regs __iomem *regs, u8 *adr) @@ -564,6 +565,29 @@ int tgec_add_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr) return 0; } +int tgec_set_allmulti(struct fman_mac *tgec, bool enable) +{ + u32 entry; + struct tgec_regs __iomem *regs = tgec->regs; + + if (!is_init_done(tgec->cfg)) + return -EINVAL; + + if (enable) { + for (entry = 0; entry < TGEC_HASH_TABLE_SIZE; entry++) + iowrite32be(entry | TGEC_HASH_MCAST_EN, + ®s->hashtable_ctrl); + } else { + for (entry = 0; entry < TGEC_HASH_TABLE_SIZE; entry++) + iowrite32be(entry & ~TGEC_HASH_MCAST_EN, + ®s->hashtable_ctrl); + } + + tgec->allmulti_enabled = enable; + + return 0; +} + int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr) { struct tgec_regs __iomem *regs = tgec->regs; @@ -591,9 +615,12 @@ int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr) break; } } - if (list_empty(&tgec->multicast_addr_hash->lsts[hash])) - iowrite32be((hash & ~TGEC_HASH_MCAST_EN), - ®s->hashtable_ctrl); + + if (!tgec->allmulti_enabled) { + if (list_empty(&tgec->multicast_addr_hash->lsts[hash])) + iowrite32be((hash & ~TGEC_HASH_MCAST_EN), + ®s->hashtable_ctrl); + } return 0; } diff --git a/drivers/net/ethernet/freescale/fman/fman_tgec.h b/drivers/net/ethernet/freescale/fman/fman_tgec.h index 514bba9f47ce..cbbd3b422a98 100644 --- a/drivers/net/ethernet/freescale/fman/fman_tgec.h +++ b/drivers/net/ethernet/freescale/fman/fman_tgec.h @@ -51,5 +51,6 @@ int tgec_set_exception(struct fman_mac *tgec, int tgec_add_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr); int tgec_del_hash_mac_address(struct fman_mac *tgec, enet_addr_t *eth_addr); int tgec_get_version(struct fman_mac *tgec, u32 *mac_version); +int tgec_set_allmulti(struct fman_mac *tgec, bool enable); #endif /* __TGEC_H */ diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index 88c0a0636b44..4829dcd9e077 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -470,6 +470,7 @@ static void setup_dtsec(struct mac_device *mac_dev) mac_dev->set_tx_pause = dtsec_set_tx_pause_frames; mac_dev->set_rx_pause = dtsec_accept_rx_pause_frames; mac_dev->set_exception = dtsec_set_exception; + mac_dev->set_allmulti = dtsec_set_allmulti; mac_dev->set_multi = set_multi; mac_dev->start = start; mac_dev->stop = stop; @@ -488,6 +489,7 @@ static void setup_tgec(struct mac_device *mac_dev) mac_dev->set_tx_pause = tgec_set_tx_pause_frames; mac_dev->set_rx_pause = tgec_accept_rx_pause_frames; mac_dev->set_exception = tgec_set_exception; + mac_dev->set_allmulti = tgec_set_allmulti; mac_dev->set_multi = set_multi; mac_dev->start = start; mac_dev->stop = stop; @@ -506,6 +508,7 @@ static void setup_memac(struct mac_device *mac_dev) mac_dev->set_tx_pause = memac_set_tx_pause_frames; mac_dev->set_rx_pause = memac_accept_rx_pause_frames; mac_dev->set_exception = memac_set_exception; + mac_dev->set_allmulti = memac_set_allmulti; mac_dev->set_multi = set_multi; mac_dev->start = start; mac_dev->stop = stop; diff --git a/drivers/net/ethernet/freescale/fman/mac.h b/drivers/net/ethernet/freescale/fman/mac.h index eefb3357e304..b520cec120ee 100644 --- a/drivers/net/ethernet/freescale/fman/mac.h +++ b/drivers/net/ethernet/freescale/fman/mac.h @@ -59,6 +59,7 @@ struct mac_device { bool rx_pause_active; bool tx_pause_active; bool promisc; + bool allmulti; int (*init)(struct mac_device *mac_dev); int (*start)(struct mac_device *mac_dev); @@ -66,6 +67,7 @@ struct mac_device { void (*adjust_link)(struct mac_device *mac_dev); int (*set_promisc)(struct fman_mac *mac_dev, bool enable); int (*change_addr)(struct fman_mac *mac_dev, enet_addr_t *enet_addr); + int (*set_allmulti)(struct fman_mac *mac_dev, bool enable); int (*set_multi)(struct net_device *net_dev, struct mac_device *mac_dev); int (*set_rx_pause)(struct fman_mac *mac_dev, bool en); diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 5a86a916492c..765407179fdd 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -111,7 +111,7 @@ static int ibmvnic_poll(struct napi_struct *napi, int data); static void send_map_query(struct ibmvnic_adapter *adapter); static void send_request_map(struct ibmvnic_adapter *, dma_addr_t, __be32, u8); static void send_request_unmap(struct ibmvnic_adapter *, u8); -static void send_login(struct ibmvnic_adapter *adapter); +static int send_login(struct ibmvnic_adapter *adapter); static void send_cap_queries(struct ibmvnic_adapter *adapter); static int init_sub_crqs(struct ibmvnic_adapter *); static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter); @@ -809,8 +809,11 @@ static int ibmvnic_login(struct net_device *netdev) } reinit_completion(&adapter->init_done); - send_login(adapter); - if (!wait_for_completion_timeout(&adapter->init_done, + rc = send_login(adapter); + if (rc) { + dev_err(dev, "Unable to attempt device login\n"); + return rc; + } else if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { dev_err(dev, "Login timeout\n"); return -1; @@ -845,8 +848,6 @@ static void release_resources(struct ibmvnic_adapter *adapter) release_tx_pools(adapter); release_rx_pools(adapter); - release_stats_token(adapter); - release_stats_buffers(adapter); release_error_buffers(adapter); release_napi(adapter); release_login_rsp_buffer(adapter); @@ -974,14 +975,6 @@ static int init_resources(struct ibmvnic_adapter *adapter) if (rc) return rc; - rc = init_stats_buffers(adapter); - if (rc) - return rc; - - rc = init_stats_token(adapter); - if (rc) - return rc; - adapter->vpd = kzalloc(sizeof(*adapter->vpd), GFP_KERNEL); if (!adapter->vpd) return -ENOMEM; @@ -1091,6 +1084,7 @@ static int ibmvnic_open(struct net_device *netdev) static void clean_rx_pools(struct ibmvnic_adapter *adapter) { struct ibmvnic_rx_pool *rx_pool; + struct ibmvnic_rx_buff *rx_buff; u64 rx_entries; int rx_scrqs; int i, j; @@ -1104,14 +1098,15 @@ static void clean_rx_pools(struct ibmvnic_adapter *adapter) /* Free any remaining skbs in the rx buffer pools */ for (i = 0; i < rx_scrqs; i++) { rx_pool = &adapter->rx_pool[i]; - if (!rx_pool) + if (!rx_pool || !rx_pool->rx_buff) continue; netdev_dbg(adapter->netdev, "Cleaning rx_pool[%d]\n", i); for (j = 0; j < rx_entries; j++) { - if (rx_pool->rx_buff[j].skb) { - dev_kfree_skb_any(rx_pool->rx_buff[j].skb); - rx_pool->rx_buff[j].skb = NULL; + rx_buff = &rx_pool->rx_buff[j]; + if (rx_buff && rx_buff->skb) { + dev_kfree_skb_any(rx_buff->skb); + rx_buff->skb = NULL; } } } @@ -1120,6 +1115,7 @@ static void clean_rx_pools(struct ibmvnic_adapter *adapter) static void clean_tx_pools(struct ibmvnic_adapter *adapter) { struct ibmvnic_tx_pool *tx_pool; + struct ibmvnic_tx_buff *tx_buff; u64 tx_entries; int tx_scrqs; int i, j; @@ -1133,14 +1129,15 @@ static void clean_tx_pools(struct ibmvnic_adapter *adapter) /* Free any remaining skbs in the tx buffer pools */ for (i = 0; i < tx_scrqs; i++) { tx_pool = &adapter->tx_pool[i]; - if (!tx_pool) + if (!tx_pool && !tx_pool->tx_buff) continue; netdev_dbg(adapter->netdev, "Cleaning tx_pool[%d]\n", i); for (j = 0; j < tx_entries; j++) { - if (tx_pool->tx_buff[j].skb) { - dev_kfree_skb_any(tx_pool->tx_buff[j].skb); - tx_pool->tx_buff[j].skb = NULL; + tx_buff = &tx_pool->tx_buff[j]; + if (tx_buff && tx_buff->skb) { + dev_kfree_skb_any(tx_buff->skb); + tx_buff->skb = NULL; } } } @@ -1482,6 +1479,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) if ((*hdrs >> 7) & 1) { build_hdr_descs_arr(tx_buff, &num_entries, *hdrs); tx_crq.v1.n_crq_elem = num_entries; + tx_buff->num_entries = num_entries; tx_buff->indir_arr[0] = tx_crq; tx_buff->indir_dma = dma_map_single(dev, tx_buff->indir_arr, sizeof(tx_buff->indir_arr), @@ -1500,6 +1498,7 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) (u64)tx_buff->indir_dma, (u64)num_entries); } else { + tx_buff->num_entries = num_entries; lpar_rc = send_subcrq(adapter, handle_array[queue_num], &tx_crq); } @@ -1532,11 +1531,10 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) if (atomic_add_return(num_entries, &tx_scrq->used) >= adapter->req_tx_entries_per_subcrq) { - netdev_info(netdev, "Stopping queue %d\n", queue_num); + netdev_dbg(netdev, "Stopping queue %d\n", queue_num); netif_stop_subqueue(netdev, queue_num); } - tx_buff->num_entries = num_entries; tx_packets++; tx_bytes += skb->len; txq->trans_start = jiffies; @@ -2546,8 +2544,8 @@ restart_loop: __netif_subqueue_stopped(adapter->netdev, scrq->pool_index)) { netif_wake_subqueue(adapter->netdev, scrq->pool_index); - netdev_info(adapter->netdev, "Started queue %d\n", - scrq->pool_index); + netdev_dbg(adapter->netdev, "Started queue %d\n", + scrq->pool_index); } } @@ -3079,7 +3077,7 @@ static void vnic_add_client_data(struct ibmvnic_adapter *adapter, strncpy(&vlcd->name, adapter->netdev->name, len); } -static void send_login(struct ibmvnic_adapter *adapter) +static int send_login(struct ibmvnic_adapter *adapter) { struct ibmvnic_login_rsp_buffer *login_rsp_buffer; struct ibmvnic_login_buffer *login_buffer; @@ -3095,6 +3093,12 @@ static void send_login(struct ibmvnic_adapter *adapter) struct vnic_login_client_data *vlcd; int i; + if (!adapter->tx_scrq || !adapter->rx_scrq) { + netdev_err(adapter->netdev, + "RX or TX queues are not allocated, device login failed\n"); + return -1; + } + release_login_rsp_buffer(adapter); client_data_len = vnic_client_data_len(adapter); @@ -3192,7 +3196,7 @@ static void send_login(struct ibmvnic_adapter *adapter) crq.login.len = cpu_to_be32(buffer_size); ibmvnic_send_crq(adapter, &crq); - return; + return 0; buf_rsp_map_failed: kfree(login_rsp_buffer); @@ -3201,7 +3205,7 @@ buf_rsp_alloc_failed: buf_map_failed: kfree(login_buffer); buf_alloc_failed: - return; + return -1; } static void send_request_map(struct ibmvnic_adapter *adapter, dma_addr_t addr, @@ -4430,6 +4434,14 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter) release_crq_queue(adapter); } + rc = init_stats_buffers(adapter); + if (rc) + return rc; + + rc = init_stats_token(adapter); + if (rc) + return rc; + return rc; } @@ -4537,6 +4549,9 @@ static int ibmvnic_remove(struct vio_dev *dev) release_sub_crqs(adapter, 1); release_crq_queue(adapter); + release_stats_token(adapter); + release_stats_buffers(adapter); + adapter->state = VNIC_REMOVED; mutex_unlock(&adapter->reset_lock); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.c b/drivers/net/ethernet/intel/fm10k/fm10k_common.c index 736a9f087bc9..c58a5377a287 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_common.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.c @@ -1,5 +1,5 @@ /* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2017 Intel Corporation. + * Copyright(c) 2013 - 2018 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -262,6 +262,7 @@ s32 fm10k_stop_hw_generic(struct fm10k_hw *hw) * fm10k_read_hw_stats_32b - Reads value of 32-bit registers * @hw: pointer to the hardware structure * @addr: address of register containing a 32-bit value + * @stat: pointer to structure holding hw stat information * * Function reads the content of the register and returns the delta * between the base and the current value. @@ -281,6 +282,7 @@ u32 fm10k_read_hw_stats_32b(struct fm10k_hw *hw, u32 addr, * fm10k_read_hw_stats_48b - Reads value of 48-bit registers * @hw: pointer to the hardware structure * @addr: address of register containing the lower 32-bit value + * @stat: pointer to structure holding hw stat information * * Function reads the content of 2 registers, combined to represent a 48-bit * statistical value. Extra processing is required to handle overflowing. @@ -461,7 +463,6 @@ void fm10k_update_hw_stats_q(struct fm10k_hw *hw, struct fm10k_hw_stats_q *q, /** * fm10k_unbind_hw_stats_q - Unbind the queue counters from their queues - * @hw: pointer to the hardware structure * @q: pointer to the ring of hardware statistics queue * @idx: index pointing to the start of the ring iteration * @count: number of queues to iterate over diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 8e12aae065d8..2c93d719438f 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -28,13 +28,13 @@ #include "fm10k.h" -#define DRV_VERSION "0.22.1-k" +#define DRV_VERSION "0.23.4-k" #define DRV_SUMMARY "Intel(R) Ethernet Switch Host Interface Driver" const char fm10k_driver_version[] = DRV_VERSION; char fm10k_driver_name[] = "fm10k"; static const char fm10k_driver_string[] = DRV_SUMMARY; static const char fm10k_copyright[] = - "Copyright(c) 2013 - 2017 Intel Corporation."; + "Copyright(c) 2013 - 2018 Intel Corporation."; MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>"); MODULE_DESCRIPTION(DRV_SUMMARY); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index a38ae5c54da3..75c99aed3c41 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -1,5 +1,5 @@ /* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2017 Intel Corporation. + * Copyright(c) 2013 - 2018 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -486,7 +486,7 @@ static void fm10k_insert_tunnel_port(struct list_head *ports, /** * fm10k_udp_tunnel_add - * @netdev: network interface device structure + * @dev: network interface device structure * @ti: Tunnel endpoint information * * This function is called when a new UDP tunnel port has been added. @@ -518,8 +518,8 @@ static void fm10k_udp_tunnel_add(struct net_device *dev, /** * fm10k_udp_tunnel_del - * @netdev: network interface device structure - * @ti: Tunnel endpoint information + * @dev: network interface device structure + * @ti: Tunnel end point information * * This function is called when a new UDP tunnel port is deleted. The freed * port will be removed from the list, then we reprogram the offloaded port @@ -803,7 +803,7 @@ int fm10k_queue_vlan_request(struct fm10k_intfc *interface, * @glort: the target glort for this update * @addr: the address to update * @vid: the vid to update - * @sync: whether to add or remove + * @set: whether to add or remove * * This function queues up a MAC request for sending to the switch manager. * A separate thread monitors the queue and sends updates to the switch diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index a434fecfdfeb..50f53e403ef5 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -1,5 +1,5 @@ /* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2017 Intel Corporation. + * Copyright(c) 2013 - 2018 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -29,7 +29,7 @@ static const struct fm10k_info *fm10k_info_tbl[] = { [fm10k_device_vf] = &fm10k_vf_info, }; -/** +/* * fm10k_pci_tbl - PCI Device ID Table * * Wildcard entries (PCI_ANY_ID) should come last @@ -211,7 +211,7 @@ static void fm10k_start_service_event(struct fm10k_intfc *interface) /** * fm10k_service_timer - Timer Call-back - * @data: pointer to interface cast into an unsigned long + * @t: pointer to timer data **/ static void fm10k_service_timer(struct timer_list *t) { @@ -649,7 +649,7 @@ void fm10k_update_stats(struct fm10k_intfc *interface) /** * fm10k_watchdog_flush_tx - flush queues on host not ready - * @interface - pointer to the device interface structure + * @interface: pointer to the device interface structure **/ static void fm10k_watchdog_flush_tx(struct fm10k_intfc *interface) { @@ -679,7 +679,7 @@ static void fm10k_watchdog_flush_tx(struct fm10k_intfc *interface) /** * fm10k_watchdog_subtask - check and bring link up - * @interface - pointer to the device interface structure + * @interface: pointer to the device interface structure **/ static void fm10k_watchdog_subtask(struct fm10k_intfc *interface) { @@ -703,7 +703,7 @@ static void fm10k_watchdog_subtask(struct fm10k_intfc *interface) /** * fm10k_check_hang_subtask - check for hung queues and dropped interrupts - * @interface - pointer to the device interface structure + * @interface: pointer to the device interface structure * * This function serves two purposes. First it strobes the interrupt lines * in order to make certain interrupts are occurring. Secondly it sets the @@ -1995,6 +1995,7 @@ skip_tx_dma_drain: /** * fm10k_sw_init - Initialize general software structures * @interface: host interface private structure to initialize + * @ent: PCI device ID entry * * fm10k_sw_init initializes the interface private data structure. * Fields are initialized based on PCI device information and diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c index d6406fc31ffb..bee192fe2ffb 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c @@ -1,5 +1,5 @@ /* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2017 Intel Corporation. + * Copyright(c) 2013 - 2018 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -1180,7 +1180,7 @@ s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *hw, u32 **results, /** * fm10k_iov_select_vid - Select correct default VLAN ID - * @hw: Pointer to hardware structure + * @vf_info: pointer to VF information structure * @vid: VLAN ID to correct * * Will report an error if the VLAN ID is out of range. For VID = 0, it will diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c index f8e87bf086b9..9d0d31da426b 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_tlv.c @@ -1,5 +1,5 @@ /* Intel(R) Ethernet Switch Host Interface Driver - * Copyright(c) 2013 - 2016 Intel Corporation. + * Copyright(c) 2013 - 2018 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -120,6 +120,7 @@ static s32 fm10k_tlv_attr_get_null_string(u32 *attr, unsigned char *string) * @msg: Pointer to message block * @attr_id: Attribute ID * @mac_addr: MAC address to be stored + * @vlan: VLAN to be stored * * This function will reorder a MAC address to be CPU endian and store it * in the attribute buffer. It will return success if provided with a @@ -155,8 +156,8 @@ s32 fm10k_tlv_attr_put_mac_vlan(u32 *msg, u16 attr_id, /** * fm10k_tlv_attr_get_mac_vlan - Get MAC/VLAN stored in attribute * @attr: Pointer to attribute - * @attr_id: Attribute ID * @mac_addr: location of buffer to store MAC address + * @vlan: location of buffer to store VLAN * * This function pulls the MAC address back out of the attribute and will * place it in the array pointed by by mac_addr. It will return success @@ -549,7 +550,7 @@ static s32 fm10k_tlv_attr_parse(u32 *attr, u32 **results, * @hw: Pointer to hardware structure * @msg: Pointer to message * @mbx: Pointer to mailbox information structure - * @func: Function array containing list of message handling functions + * @data: Pointer to message handler data structure * * This function should be the first function called upon receiving a * message. The handler will identify the message type and call the correct diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index a852775d3059..0dfc52772c45 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -1914,6 +1914,43 @@ enum i40e_aq_phy_type { I40E_PHY_TYPE_DEFAULT = 0xFF, }; +#define I40E_PHY_TYPES_BITMASK (BIT_ULL(I40E_PHY_TYPE_SGMII) | \ + BIT_ULL(I40E_PHY_TYPE_1000BASE_KX) | \ + BIT_ULL(I40E_PHY_TYPE_10GBASE_KX4) | \ + BIT_ULL(I40E_PHY_TYPE_10GBASE_KR) | \ + BIT_ULL(I40E_PHY_TYPE_40GBASE_KR4) | \ + BIT_ULL(I40E_PHY_TYPE_XAUI) | \ + BIT_ULL(I40E_PHY_TYPE_XFI) | \ + BIT_ULL(I40E_PHY_TYPE_SFI) | \ + BIT_ULL(I40E_PHY_TYPE_XLAUI) | \ + BIT_ULL(I40E_PHY_TYPE_XLPPI) | \ + BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4_CU) | \ + BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1_CU) | \ + BIT_ULL(I40E_PHY_TYPE_10GBASE_AOC) | \ + BIT_ULL(I40E_PHY_TYPE_40GBASE_AOC) | \ + BIT_ULL(I40E_PHY_TYPE_UNRECOGNIZED) | \ + BIT_ULL(I40E_PHY_TYPE_UNSUPPORTED) | \ + BIT_ULL(I40E_PHY_TYPE_100BASE_TX) | \ + BIT_ULL(I40E_PHY_TYPE_1000BASE_T) | \ + BIT_ULL(I40E_PHY_TYPE_10GBASE_T) | \ + BIT_ULL(I40E_PHY_TYPE_10GBASE_SR) | \ + BIT_ULL(I40E_PHY_TYPE_10GBASE_LR) | \ + BIT_ULL(I40E_PHY_TYPE_10GBASE_SFPP_CU) | \ + BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1) | \ + BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4) | \ + BIT_ULL(I40E_PHY_TYPE_40GBASE_SR4) | \ + BIT_ULL(I40E_PHY_TYPE_40GBASE_LR4) | \ + BIT_ULL(I40E_PHY_TYPE_1000BASE_SX) | \ + BIT_ULL(I40E_PHY_TYPE_1000BASE_LX) | \ + BIT_ULL(I40E_PHY_TYPE_1000BASE_T_OPTICAL) | \ + BIT_ULL(I40E_PHY_TYPE_20GBASE_KR2) | \ + BIT_ULL(I40E_PHY_TYPE_25GBASE_KR) | \ + BIT_ULL(I40E_PHY_TYPE_25GBASE_CR) | \ + BIT_ULL(I40E_PHY_TYPE_25GBASE_SR) | \ + BIT_ULL(I40E_PHY_TYPE_25GBASE_LR) | \ + BIT_ULL(I40E_PHY_TYPE_25GBASE_AOC) | \ + BIT_ULL(I40E_PHY_TYPE_25GBASE_ACC)) + #define I40E_LINK_SPEED_100MB_SHIFT 0x1 #define I40E_LINK_SPEED_1000MB_SHIFT 0x2 #define I40E_LINK_SPEED_10GB_SHIFT 0x3 diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 0dcbbda164c4..89807e32a898 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -230,6 +230,8 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = { I40E_PRIV_FLAG("flow-director-atr", I40E_FLAG_FD_ATR_ENABLED, 0), I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENABLED, 0), I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENABLED, 0), + I40E_PRIV_FLAG("link-down-on-close", + I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED, 0), I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX, 0), I40E_PRIV_FLAG("disable-source-pruning", I40E_FLAG_SOURCE_PRUNING_DISABLED, 0), diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index f6d37456f3b7..be9a1467a1a1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -6547,6 +6547,75 @@ int i40e_up(struct i40e_vsi *vsi) } /** + * i40e_force_link_state - Force the link status + * @pf: board private structure + * @is_up: whether the link state should be forced up or down + **/ +static i40e_status i40e_force_link_state(struct i40e_pf *pf, bool is_up) +{ + struct i40e_aq_get_phy_abilities_resp abilities; + struct i40e_aq_set_phy_config config = {0}; + struct i40e_hw *hw = &pf->hw; + i40e_status err; + u64 mask; + + /* Get the current phy config */ + err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities, + NULL); + if (err) { + dev_err(&pf->pdev->dev, + "failed to get phy cap., ret = %s last_status = %s\n", + i40e_stat_str(hw, err), + i40e_aq_str(hw, hw->aq.asq_last_status)); + return err; + } + + /* If link needs to go up, but was not forced to go down, + * no need for a flap + */ + if (is_up && abilities.phy_type != 0) + return I40E_SUCCESS; + + /* To force link we need to set bits for all supported PHY types, + * but there are now more than 32, so we need to split the bitmap + * across two fields. + */ + mask = I40E_PHY_TYPES_BITMASK; + config.phy_type = is_up ? cpu_to_le32((u32)(mask & 0xffffffff)) : 0; + config.phy_type_ext = is_up ? (u8)((mask >> 32) & 0xff) : 0; + /* Copy the old settings, except of phy_type */ + config.abilities = abilities.abilities; + config.link_speed = abilities.link_speed; + config.eee_capability = abilities.eee_capability; + config.eeer = abilities.eeer_val; + config.low_power_ctrl = abilities.d3_lpan; + err = i40e_aq_set_phy_config(hw, &config, NULL); + + if (err) { + dev_err(&pf->pdev->dev, + "set phy config ret = %s last_status = %s\n", + i40e_stat_str(&pf->hw, err), + i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); + return err; + } + + /* Update the link info */ + err = i40e_update_link_info(hw); + if (err) { + /* Wait a little bit (on 40G cards it sometimes takes a really + * long time for link to come back from the atomic reset) + * and try once more + */ + msleep(1000); + i40e_update_link_info(hw); + } + + i40e_aq_set_link_restart_an(hw, true, NULL); + + return I40E_SUCCESS; +} + +/** * i40e_down - Shutdown the connection processing * @vsi: the VSI being stopped **/ @@ -6563,6 +6632,9 @@ void i40e_down(struct i40e_vsi *vsi) } i40e_vsi_disable_irq(vsi); i40e_vsi_stop_rings(vsi); + if (vsi->type == I40E_VSI_MAIN && + vsi->back->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) + i40e_force_link_state(vsi->back, false); i40e_napi_disable_all(vsi); for (i = 0; i < vsi->num_queue_pairs; i++) { @@ -7524,6 +7596,9 @@ int i40e_open(struct net_device *netdev) netif_carrier_off(netdev); + if (i40e_force_link_state(pf, true)) + return -EAGAIN; + err = i40e_vsi_open(vsi); if (err) return err; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 1ec9b1d8023d..97cfe944b568 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -708,16 +708,22 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring) /** * i40e_get_tx_pending - how many tx descriptors not processed * @tx_ring: the ring of descriptors + * @in_sw: use SW variables * * Since there is no access to the ring head register * in XL710, we need to use our local copies **/ -u32 i40e_get_tx_pending(struct i40e_ring *ring) +u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw) { u32 head, tail; - head = i40e_get_head(ring); - tail = readl(ring->tail); + if (!in_sw) { + head = i40e_get_head(ring); + tail = readl(ring->tail); + } else { + head = ring->next_to_clean; + tail = ring->next_to_use; + } if (head != tail) return (head < tail) ? @@ -774,7 +780,7 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi) */ smp_rmb(); tx_ring->tx_stats.prev_pkt_ctr = - i40e_get_tx_pending(tx_ring) ? packets : -1; + i40e_get_tx_pending(tx_ring, true) ? packets : -1; } } } @@ -898,7 +904,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, * them to be written back in case we stay in NAPI. * In this mode on X722 we do not enable Interrupt. */ - unsigned int j = i40e_get_tx_pending(tx_ring); + unsigned int j = i40e_get_tx_pending(tx_ring, false); if (budget && ((j / WB_STRIDE) == 0) && (j > 0) && diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index f75a8fe68fcf..3c80ea784389 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -505,7 +505,7 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring); void i40e_free_rx_resources(struct i40e_ring *rx_ring); int i40e_napi_poll(struct napi_struct *napi, int budget); void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector); -u32 i40e_get_tx_pending(struct i40e_ring *ring); +u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw); void i40e_detect_recover_hung(struct i40e_vsi *vsi); int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size); bool __i40e_chk_linearize(struct sk_buff *skb); diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 5cca083da93c..e23975c67417 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -3062,7 +3062,7 @@ static struct i40e_vsi *i40e_find_vsi_from_seid(struct i40e_vf *vf, u16 seid) for (i = 0; i < vf->num_tc ; i++) { vsi = i40e_find_vsi_from_id(pf, vf->ch[i].vsi_id); - if (vsi->seid == seid) + if (vsi && vsi->seid == seid) return vsi; } return NULL; @@ -3146,8 +3146,8 @@ static int i40e_vc_del_cloud_filter(struct i40e_vf *vf, u8 *msg) dev_info(&pf->pdev->dev, "VF %d: Invalid input, can't apply cloud filter\n", vf->vf_id); - aq_ret = I40E_ERR_PARAM; - goto err; + aq_ret = I40E_ERR_PARAM; + goto err; } memset(&cfilter, 0, sizeof(cfilter)); diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index eb8f3e327f6b..e088d23eb083 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -196,7 +196,7 @@ void i40evf_detect_recover_hung(struct i40e_vsi *vsi) */ smp_rmb(); tx_ring->tx_stats.prev_pkt_ctr = - i40evf_get_tx_pending(tx_ring, false) ? packets : -1; + i40evf_get_tx_pending(tx_ring, true) ? packets : -1; } } } diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 4955ce3ab6a2..dae121877935 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -815,13 +815,11 @@ i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter, if (!macaddr) return NULL; - spin_lock_bh(&adapter->mac_vlan_list_lock); - f = i40evf_find_filter(adapter, macaddr); if (!f) { f = kzalloc(sizeof(*f), GFP_ATOMIC); if (!f) - goto clearout; + return f; ether_addr_copy(f->macaddr, macaddr); @@ -832,8 +830,6 @@ i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter, f->remove = false; } -clearout: - spin_unlock_bh(&adapter->mac_vlan_list_lock); return f; } @@ -868,9 +864,10 @@ static int i40evf_set_mac(struct net_device *netdev, void *p) adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER; } + f = i40evf_add_filter(adapter, addr->sa_data); + spin_unlock_bh(&adapter->mac_vlan_list_lock); - f = i40evf_add_filter(adapter, addr->sa_data); if (f) { ether_addr_copy(hw->mac.addr, addr->sa_data); ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr); @@ -2493,6 +2490,7 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter, u16 addr_type = 0; u16 n_proto = 0; int i = 0; + struct virtchnl_filter *vf = &filter->f; if (f->dissector->used_keys & ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | @@ -2540,7 +2538,7 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter, return -EINVAL; if (n_proto == ETH_P_IPV6) { /* specify flow type as TCP IPv6 */ - filter->f.flow_type = VIRTCHNL_TCP_V6_FLOW; + vf->flow_type = VIRTCHNL_TCP_V6_FLOW; } if (key->ip_proto != IPPROTO_TCP) { @@ -2585,9 +2583,8 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter, is_multicast_ether_addr(key->dst)) { /* set the mask if a valid dst_mac address */ for (i = 0; i < ETH_ALEN; i++) - filter->f.mask.tcp_spec.dst_mac[i] |= - 0xff; - ether_addr_copy(filter->f.data.tcp_spec.dst_mac, + vf->mask.tcp_spec.dst_mac[i] |= 0xff; + ether_addr_copy(vf->data.tcp_spec.dst_mac, key->dst); } @@ -2596,9 +2593,8 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter, is_multicast_ether_addr(key->src)) { /* set the mask if a valid dst_mac address */ for (i = 0; i < ETH_ALEN; i++) - filter->f.mask.tcp_spec.src_mac[i] |= - 0xff; - ether_addr_copy(filter->f.data.tcp_spec.src_mac, + vf->mask.tcp_spec.src_mac[i] |= 0xff; + ether_addr_copy(vf->data.tcp_spec.src_mac, key->src); } } @@ -2622,8 +2618,8 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter, return I40E_ERR_CONFIG; } } - filter->f.mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff); - filter->f.data.tcp_spec.vlan_id = cpu_to_be16(key->vlan_id); + vf->mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff); + vf->data.tcp_spec.vlan_id = cpu_to_be16(key->vlan_id); } if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { @@ -2670,14 +2666,12 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter, return I40E_ERR_CONFIG; } if (key->dst) { - filter->f.mask.tcp_spec.dst_ip[0] |= - cpu_to_be32(0xffffffff); - filter->f.data.tcp_spec.dst_ip[0] = key->dst; + vf->mask.tcp_spec.dst_ip[0] |= cpu_to_be32(0xffffffff); + vf->data.tcp_spec.dst_ip[0] = key->dst; } if (key->src) { - filter->f.mask.tcp_spec.src_ip[0] |= - cpu_to_be32(0xffffffff); - filter->f.data.tcp_spec.src_ip[0] = key->src; + vf->mask.tcp_spec.src_ip[0] |= cpu_to_be32(0xffffffff); + vf->data.tcp_spec.src_ip[0] = key->src; } } @@ -2710,22 +2704,14 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter, if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src)) field_flags |= I40EVF_CLOUD_FIELD_IIP; - if (key->dst.s6_addr) { - for (i = 0; i < 4; i++) - filter->f.mask.tcp_spec.dst_ip[i] |= - cpu_to_be32(0xffffffff); - memcpy(&filter->f.data.tcp_spec.dst_ip, - &key->dst.s6_addr32, - sizeof(filter->f.data.tcp_spec.dst_ip)); - } - if (key->src.s6_addr) { - for (i = 0; i < 4; i++) - filter->f.mask.tcp_spec.src_ip[i] |= - cpu_to_be32(0xffffffff); - memcpy(&filter->f.data.tcp_spec.src_ip, - &key->src.s6_addr32, - sizeof(filter->f.data.tcp_spec.src_ip)); - } + for (i = 0; i < 4; i++) + vf->mask.tcp_spec.dst_ip[i] |= cpu_to_be32(0xffffffff); + memcpy(&vf->data.tcp_spec.dst_ip, &key->dst.s6_addr32, + sizeof(vf->data.tcp_spec.dst_ip)); + for (i = 0; i < 4; i++) + vf->mask.tcp_spec.src_ip[i] |= cpu_to_be32(0xffffffff); + memcpy(&vf->data.tcp_spec.src_ip, &key->src.s6_addr32, + sizeof(vf->data.tcp_spec.src_ip)); } if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) { struct flow_dissector_key_ports *key = @@ -2757,16 +2743,16 @@ static int i40evf_parse_cls_flower(struct i40evf_adapter *adapter, } } if (key->dst) { - filter->f.mask.tcp_spec.dst_port |= cpu_to_be16(0xffff); - filter->f.data.tcp_spec.dst_port = key->dst; + vf->mask.tcp_spec.dst_port |= cpu_to_be16(0xffff); + vf->data.tcp_spec.dst_port = key->dst; } if (key->src) { - filter->f.mask.tcp_spec.src_port |= cpu_to_be16(0xffff); - filter->f.data.tcp_spec.src_port = key->dst; + vf->mask.tcp_spec.src_port |= cpu_to_be16(0xffff); + vf->data.tcp_spec.src_port = key->dst; } } - filter->f.field_flags = field_flags; + vf->field_flags = field_flags; return 0; } @@ -3040,7 +3026,12 @@ static int i40evf_open(struct net_device *netdev) if (err) goto err_req_irq; + spin_lock_bh(&adapter->mac_vlan_list_lock); + i40evf_add_filter(adapter, adapter->hw.mac.addr); + + spin_unlock_bh(&adapter->mac_vlan_list_lock); + i40evf_configure(adapter); i40evf_up_complete(adapter); diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index 6134b61e0938..3c76c817ca1a 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -1048,24 +1048,28 @@ void i40evf_disable_channels(struct i40evf_adapter *adapter) * Print the cloud filter **/ static void i40evf_print_cloud_filter(struct i40evf_adapter *adapter, - struct virtchnl_filter f) + struct virtchnl_filter *f) { - switch (f.flow_type) { + switch (f->flow_type) { case VIRTCHNL_TCP_V4_FLOW: dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI4 src_ip %pI4 dst_port %hu src_port %hu\n", - &f.data.tcp_spec.dst_mac, &f.data.tcp_spec.src_mac, - ntohs(f.data.tcp_spec.vlan_id), - &f.data.tcp_spec.dst_ip[0], &f.data.tcp_spec.src_ip[0], - ntohs(f.data.tcp_spec.dst_port), - ntohs(f.data.tcp_spec.src_port)); + &f->data.tcp_spec.dst_mac, + &f->data.tcp_spec.src_mac, + ntohs(f->data.tcp_spec.vlan_id), + &f->data.tcp_spec.dst_ip[0], + &f->data.tcp_spec.src_ip[0], + ntohs(f->data.tcp_spec.dst_port), + ntohs(f->data.tcp_spec.src_port)); break; case VIRTCHNL_TCP_V6_FLOW: dev_info(&adapter->pdev->dev, "dst_mac: %pM src_mac: %pM vlan_id: %hu dst_ip: %pI6 src_ip %pI6 dst_port %hu src_port %hu\n", - &f.data.tcp_spec.dst_mac, &f.data.tcp_spec.src_mac, - ntohs(f.data.tcp_spec.vlan_id), - &f.data.tcp_spec.dst_ip, &f.data.tcp_spec.src_ip, - ntohs(f.data.tcp_spec.dst_port), - ntohs(f.data.tcp_spec.src_port)); + &f->data.tcp_spec.dst_mac, + &f->data.tcp_spec.src_mac, + ntohs(f->data.tcp_spec.vlan_id), + &f->data.tcp_spec.dst_ip, + &f->data.tcp_spec.src_ip, + ntohs(f->data.tcp_spec.dst_port), + ntohs(f->data.tcp_spec.src_port)); break; } } @@ -1303,7 +1307,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, i40evf_stat_str(&adapter->hw, v_retval)); i40evf_print_cloud_filter(adapter, - cf->f); + &cf->f); list_del(&cf->list); kfree(cf); adapter->num_cloud_filters--; @@ -1322,7 +1326,7 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, i40evf_stat_str(&adapter->hw, v_retval)); i40evf_print_cloud_filter(adapter, - cf->f); + &cf->f); } } } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 221f15803480..89edb9fae6f0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -3059,6 +3059,8 @@ static int ixgbe_set_rxfh(struct net_device *netdev, const u32 *indir, for (i = 0; i < reta_entries; i++) adapter->rss_indir_tbl[i] = indir[i]; + + ixgbe_store_reta(adapter); } /* Fill out the rss hash key */ @@ -3067,8 +3069,6 @@ static int ixgbe_set_rxfh(struct net_device *netdev, const u32 *indir, ixgbe_store_key(adapter); } - ixgbe_store_reta(adapter); - return 0; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index 4242f0213e46..ed4cbe94c355 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -58,7 +58,6 @@ static bool ixgbe_cache_ring_dcb_sriov(struct ixgbe_adapter *adapter) return false; /* start at VMDq register offset for SR-IOV enabled setups */ - pool = 0; reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask); for (i = 0, pool = 0; i < adapter->num_rx_queues; i++, reg_idx++) { /* If we are greater than indices move to next pool */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 0da5aa2c8aba..b032091022a8 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7703,7 +7703,8 @@ static void ixgbe_service_task(struct work_struct *work) if (test_bit(__IXGBE_PTP_RUNNING, &adapter->state)) { ixgbe_ptp_overflow_check(adapter); - ixgbe_ptp_rx_hang(adapter); + if (adapter->flags & IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER) + ixgbe_ptp_rx_hang(adapter); ixgbe_ptp_tx_hang(adapter); } diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c index 4400e49090b4..e7623fed42da 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c +++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c @@ -94,6 +94,13 @@ static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = { #define IXGBEVF_TEST_LEN (sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN) +static const char ixgbevf_priv_flags_strings[][ETH_GSTRING_LEN] = { +#define IXGBEVF_PRIV_FLAGS_LEGACY_RX BIT(0) + "legacy-rx", +}; + +#define IXGBEVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(ixgbevf_priv_flags_strings) + static int ixgbevf_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { @@ -241,6 +248,8 @@ static void ixgbevf_get_drvinfo(struct net_device *netdev, sizeof(drvinfo->version)); strlcpy(drvinfo->bus_info, pci_name(adapter->pdev), sizeof(drvinfo->bus_info)); + + drvinfo->n_priv_flags = IXGBEVF_PRIV_FLAGS_STR_LEN; } static void ixgbevf_get_ringparam(struct net_device *netdev, @@ -392,6 +401,8 @@ static int ixgbevf_get_sset_count(struct net_device *netdev, int stringset) return IXGBEVF_TEST_LEN; case ETH_SS_STATS: return IXGBEVF_STATS_LEN; + case ETH_SS_PRIV_FLAGS: + return IXGBEVF_PRIV_FLAGS_STR_LEN; default: return -EINVAL; } @@ -496,6 +507,10 @@ static void ixgbevf_get_strings(struct net_device *netdev, u32 stringset, p += ETH_GSTRING_LEN; } break; + case ETH_SS_PRIV_FLAGS: + memcpy(data, ixgbevf_priv_flags_strings, + IXGBEVF_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN); + break; } } @@ -888,6 +903,37 @@ static int ixgbevf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, return err; } +static u32 ixgbevf_get_priv_flags(struct net_device *netdev) +{ + struct ixgbevf_adapter *adapter = netdev_priv(netdev); + u32 priv_flags = 0; + + if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX) + priv_flags |= IXGBEVF_PRIV_FLAGS_LEGACY_RX; + + return priv_flags; +} + +static int ixgbevf_set_priv_flags(struct net_device *netdev, u32 priv_flags) +{ + struct ixgbevf_adapter *adapter = netdev_priv(netdev); + unsigned int flags = adapter->flags; + + flags &= ~IXGBEVF_FLAGS_LEGACY_RX; + if (priv_flags & IXGBEVF_PRIV_FLAGS_LEGACY_RX) + flags |= IXGBEVF_FLAGS_LEGACY_RX; + + if (flags != adapter->flags) { + adapter->flags = flags; + + /* reset interface to repopulate queues */ + if (netif_running(netdev)) + ixgbevf_reinit_locked(adapter); + } + + return 0; +} + static const struct ethtool_ops ixgbevf_ethtool_ops = { .get_drvinfo = ixgbevf_get_drvinfo, .get_regs_len = ixgbevf_get_regs_len, @@ -909,6 +955,8 @@ static const struct ethtool_ops ixgbevf_ethtool_ops = { .get_rxfh_key_size = ixgbevf_get_rxfh_key_size, .get_rxfh = ixgbevf_get_rxfh, .get_link_ksettings = ixgbevf_get_link_ksettings, + .get_priv_flags = ixgbevf_get_priv_flags, + .set_priv_flags = ixgbevf_set_priv_flags, }; void ixgbevf_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index f6952425c87d..f65ca156af2d 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -89,19 +89,15 @@ struct ixgbevf_rx_queue_stats { }; enum ixgbevf_ring_state_t { + __IXGBEVF_RX_3K_BUFFER, + __IXGBEVF_RX_BUILD_SKB_ENABLED, __IXGBEVF_TX_DETECT_HANG, __IXGBEVF_HANG_CHECK_ARMED, }; -#define check_for_tx_hang(ring) \ - test_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state) -#define set_check_for_tx_hang(ring) \ - set_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state) -#define clear_check_for_tx_hang(ring) \ - clear_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state) - struct ixgbevf_ring { struct ixgbevf_ring *next; + struct ixgbevf_q_vector *q_vector; /* backpointer to q_vector */ struct net_device *netdev; struct device *dev; void *desc; /* descriptor ring memory */ @@ -133,7 +129,7 @@ struct ixgbevf_ring { */ u16 reg_idx; int queue_index; /* needed for multiqueue queue management */ -}; +} ____cacheline_internodealigned_in_smp; /* How many Rx Buffers do we bundle into one write to the hardware ? */ #define IXGBEVF_RX_BUFFER_WRITE 16 /* Must be power of 2 */ @@ -156,12 +152,20 @@ struct ixgbevf_ring { /* Supported Rx Buffer Sizes */ #define IXGBEVF_RXBUFFER_256 256 /* Used for packet split */ #define IXGBEVF_RXBUFFER_2048 2048 +#define IXGBEVF_RXBUFFER_3072 3072 #define IXGBEVF_RX_HDR_SIZE IXGBEVF_RXBUFFER_256 -#define IXGBEVF_RX_BUFSZ IXGBEVF_RXBUFFER_2048 #define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN) +#define IXGBEVF_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) +#if (PAGE_SIZE < 8192) +#define IXGBEVF_MAX_FRAME_BUILD_SKB \ + (SKB_WITH_OVERHEAD(IXGBEVF_RXBUFFER_2048) - IXGBEVF_SKB_PAD) +#else +#define IXGBEVF_MAX_FRAME_BUILD_SKB IXGBEVF_RXBUFFER_2048 +#endif + #define IXGBE_TX_FLAGS_CSUM BIT(0) #define IXGBE_TX_FLAGS_VLAN BIT(1) #define IXGBE_TX_FLAGS_TSO BIT(2) @@ -170,6 +174,50 @@ struct ixgbevf_ring { #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK 0x0000e000 #define IXGBE_TX_FLAGS_VLAN_SHIFT 16 +#define ring_uses_large_buffer(ring) \ + test_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state) +#define set_ring_uses_large_buffer(ring) \ + set_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state) +#define clear_ring_uses_large_buffer(ring) \ + clear_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state) + +#define ring_uses_build_skb(ring) \ + test_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state) +#define set_ring_build_skb_enabled(ring) \ + set_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state) +#define clear_ring_build_skb_enabled(ring) \ + clear_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state) + +static inline unsigned int ixgbevf_rx_bufsz(struct ixgbevf_ring *ring) +{ +#if (PAGE_SIZE < 8192) + if (ring_uses_large_buffer(ring)) + return IXGBEVF_RXBUFFER_3072; + + if (ring_uses_build_skb(ring)) + return IXGBEVF_MAX_FRAME_BUILD_SKB; +#endif + return IXGBEVF_RXBUFFER_2048; +} + +static inline unsigned int ixgbevf_rx_pg_order(struct ixgbevf_ring *ring) +{ +#if (PAGE_SIZE < 8192) + if (ring_uses_large_buffer(ring)) + return 1; +#endif + return 0; +} + +#define ixgbevf_rx_pg_size(_ring) (PAGE_SIZE << ixgbevf_rx_pg_order(_ring)) + +#define check_for_tx_hang(ring) \ + test_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state) +#define set_check_for_tx_hang(ring) \ + set_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state) +#define clear_check_for_tx_hang(ring) \ + clear_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state) + struct ixgbevf_ring_container { struct ixgbevf_ring *ring; /* pointer to linked list of rings */ unsigned int total_bytes; /* total bytes processed this int */ @@ -194,7 +242,11 @@ struct ixgbevf_q_vector { u16 itr; /* Interrupt throttle rate written to EITR */ struct napi_struct napi; struct ixgbevf_ring_container rx, tx; + struct rcu_head rcu; /* to avoid race with update stats on free */ char name[IFNAMSIZ + 9]; + + /* for dynamic allocation of rings associated with this q_vector */ + struct ixgbevf_ring ring[0] ____cacheline_internodealigned_in_smp; #ifdef CONFIG_NET_RX_BUSY_POLL unsigned int state; #define IXGBEVF_QV_STATE_IDLE 0 @@ -331,6 +383,8 @@ struct ixgbevf_adapter { u32 *rss_key; u8 rss_indir_tbl[IXGBEVF_X550_VFRETA_SIZE]; + u32 flags; +#define IXGBEVF_FLAGS_LEGACY_RX BIT(1) }; enum ixbgevf_state_t { diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 9b3d43d28106..f37307131eb6 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -130,6 +130,9 @@ static void ixgbevf_service_event_complete(struct ixgbevf_adapter *adapter) static void ixgbevf_queue_reset_subtask(struct ixgbevf_adapter *adapter); static void ixgbevf_set_itr(struct ixgbevf_q_vector *q_vector); static void ixgbevf_free_all_rx_resources(struct ixgbevf_adapter *adapter); +static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer); +static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring, + struct ixgbevf_rx_buffer *old_buff); static void ixgbevf_remove_adapter(struct ixgbe_hw *hw) { @@ -527,6 +530,49 @@ static void ixgbevf_process_skb_fields(struct ixgbevf_ring *rx_ring, skb->protocol = eth_type_trans(skb, rx_ring->netdev); } +static +struct ixgbevf_rx_buffer *ixgbevf_get_rx_buffer(struct ixgbevf_ring *rx_ring, + const unsigned int size) +{ + struct ixgbevf_rx_buffer *rx_buffer; + + rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; + prefetchw(rx_buffer->page); + + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_buffer->dma, + rx_buffer->page_offset, + size, + DMA_FROM_DEVICE); + + rx_buffer->pagecnt_bias--; + + return rx_buffer; +} + +static void ixgbevf_put_rx_buffer(struct ixgbevf_ring *rx_ring, + struct ixgbevf_rx_buffer *rx_buffer) +{ + if (ixgbevf_can_reuse_rx_page(rx_buffer)) { + /* hand second half of page back to the ring */ + ixgbevf_reuse_rx_page(rx_ring, rx_buffer); + } else { + /* We are not reusing the buffer so unmap it and free + * any references we are holding to it + */ + dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, + ixgbevf_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + IXGBEVF_RX_DMA_ATTR); + __page_frag_cache_drain(rx_buffer->page, + rx_buffer->pagecnt_bias); + } + + /* clear contents of rx_buffer */ + rx_buffer->page = NULL; +} + /** * ixgbevf_is_non_eop - process handling of non-EOP buffers * @rx_ring: Rx ring being processed @@ -554,32 +600,38 @@ static bool ixgbevf_is_non_eop(struct ixgbevf_ring *rx_ring, return true; } +static inline unsigned int ixgbevf_rx_offset(struct ixgbevf_ring *rx_ring) +{ + return ring_uses_build_skb(rx_ring) ? IXGBEVF_SKB_PAD : 0; +} + static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring, struct ixgbevf_rx_buffer *bi) { struct page *page = bi->page; - dma_addr_t dma = bi->dma; + dma_addr_t dma; /* since we are recycling buffers we should seldom need to alloc */ if (likely(page)) return true; /* alloc new page for storage */ - page = dev_alloc_page(); + page = dev_alloc_pages(ixgbevf_rx_pg_order(rx_ring)); if (unlikely(!page)) { rx_ring->rx_stats.alloc_rx_page_failed++; return false; } /* map page for use */ - dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE, + dma = dma_map_page_attrs(rx_ring->dev, page, 0, + ixgbevf_rx_pg_size(rx_ring), DMA_FROM_DEVICE, IXGBEVF_RX_DMA_ATTR); /* if mapping failed free memory back to system since * there isn't much point in holding memory we can't use */ if (dma_mapping_error(rx_ring->dev, dma)) { - __free_page(page); + __free_pages(page, ixgbevf_rx_pg_order(rx_ring)); rx_ring->rx_stats.alloc_rx_page_failed++; return false; @@ -587,7 +639,7 @@ static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring, bi->dma = dma; bi->page = page; - bi->page_offset = 0; + bi->page_offset = ixgbevf_rx_offset(rx_ring); bi->pagecnt_bias = 1; rx_ring->rx_stats.alloc_rx_page++; @@ -621,7 +673,7 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring, /* sync the buffer for use by the device */ dma_sync_single_range_for_device(rx_ring->dev, bi->dma, bi->page_offset, - IXGBEVF_RX_BUFSZ, + ixgbevf_rx_bufsz(rx_ring), DMA_FROM_DEVICE); /* Refresh the desc even if pkt_addr didn't change @@ -734,11 +786,10 @@ static inline bool ixgbevf_page_is_reserved(struct page *page) return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); } -static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer, - struct page *page, - const unsigned int truesize) +static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer) { - unsigned int pagecnt_bias = rx_buffer->pagecnt_bias--; + unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; + struct page *page = rx_buffer->page; /* avoid re-using remote pages */ if (unlikely(ixgbevf_page_is_reserved(page))) @@ -746,17 +797,13 @@ static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer, #if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ - if (unlikely(page_ref_count(page) != pagecnt_bias)) + if (unlikely((page_ref_count(page) - pagecnt_bias) > 1)) return false; - - /* flip page offset to other buffer */ - rx_buffer->page_offset ^= IXGBEVF_RX_BUFSZ; - #else - /* move offset up to the next cache line */ - rx_buffer->page_offset += truesize; +#define IXGBEVF_LAST_OFFSET \ + (SKB_WITH_OVERHEAD(PAGE_SIZE) - IXGBEVF_RXBUFFER_2048) - if (rx_buffer->page_offset > (PAGE_SIZE - IXGBEVF_RX_BUFSZ)) + if (rx_buffer->page_offset > IXGBEVF_LAST_OFFSET) return false; #endif @@ -765,7 +812,7 @@ static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer, * the pagecnt_bias and page count so that we fully restock the * number of references the driver holds. */ - if (unlikely(pagecnt_bias == 1)) { + if (unlikely(!pagecnt_bias)) { page_ref_add(page, USHRT_MAX); rx_buffer->pagecnt_bias = USHRT_MAX; } @@ -777,127 +824,81 @@ static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer, * ixgbevf_add_rx_frag - Add contents of Rx buffer to sk_buff * @rx_ring: rx descriptor ring to transact packets on * @rx_buffer: buffer containing page to add - * @rx_desc: descriptor containing length of buffer written by hardware * @skb: sk_buff to place the data into + * @size: size of buffer to be added * * This function will add the data contained in rx_buffer->page to the skb. - * This is done either through a direct copy if the data in the buffer is - * less than the skb header size, otherwise it will just attach the page as - * a frag to the skb. - * - * The function will then update the page offset if necessary and return - * true if the buffer can be reused by the adapter. **/ -static bool ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring, +static void ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring, struct ixgbevf_rx_buffer *rx_buffer, - u16 size, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) + struct sk_buff *skb, + unsigned int size) { - struct page *page = rx_buffer->page; - unsigned char *va = page_address(page) + rx_buffer->page_offset; #if (PAGE_SIZE < 8192) - unsigned int truesize = IXGBEVF_RX_BUFSZ; + unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; #else - unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); + unsigned int truesize = ring_uses_build_skb(rx_ring) ? + SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) : + SKB_DATA_ALIGN(size); +#endif + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, + rx_buffer->page_offset, size, truesize); +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; #endif - unsigned int pull_len; - - if (unlikely(skb_is_nonlinear(skb))) - goto add_tail_frag; - - if (likely(size <= IXGBEVF_RX_HDR_SIZE)) { - memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); - - /* page is not reserved, we can reuse buffer as is */ - if (likely(!ixgbevf_page_is_reserved(page))) - return true; - - /* this page cannot be reused so discard it */ - return false; - } - - /* we need the header to contain the greater of either ETH_HLEN or - * 60 bytes if the skb->len is less than 60 for skb_pad. - */ - pull_len = eth_get_headlen(va, IXGBEVF_RX_HDR_SIZE); - - /* align pull length to size of long to optimize memcpy performance */ - memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long))); - - /* update all of the pointers */ - va += pull_len; - size -= pull_len; - -add_tail_frag: - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, - (unsigned long)va & ~PAGE_MASK, size, truesize); - - return ixgbevf_can_reuse_rx_page(rx_buffer, page, truesize); } -static struct sk_buff *ixgbevf_fetch_rx_buffer(struct ixgbevf_ring *rx_ring, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) +static +struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring, + struct ixgbevf_rx_buffer *rx_buffer, + union ixgbe_adv_rx_desc *rx_desc, + unsigned int size) { - struct ixgbevf_rx_buffer *rx_buffer; - struct page *page; - u16 size = le16_to_cpu(rx_desc->wb.upper.length); - - rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; - page = rx_buffer->page; - prefetchw(page); - - /* we are reusing so sync this buffer for CPU use */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - size, - DMA_FROM_DEVICE); - - if (likely(!skb)) { - void *page_addr = page_address(page) + - rx_buffer->page_offset; + void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; +#if (PAGE_SIZE < 8192) + unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; +#else + unsigned int truesize = SKB_DATA_ALIGN(size); +#endif + unsigned int headlen; + struct sk_buff *skb; - /* prefetch first cache line of first page */ - prefetch(page_addr); + /* prefetch first cache line of first page */ + prefetch(va); #if L1_CACHE_BYTES < 128 - prefetch(page_addr + L1_CACHE_BYTES); + prefetch(va + L1_CACHE_BYTES); #endif - /* allocate a skb to store the frags */ - skb = netdev_alloc_skb_ip_align(rx_ring->netdev, - IXGBEVF_RX_HDR_SIZE); - if (unlikely(!skb)) { - rx_ring->rx_stats.alloc_rx_buff_failed++; - return NULL; - } + /* allocate a skb to store the frags */ + skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBEVF_RX_HDR_SIZE); + if (unlikely(!skb)) + return NULL; - /* we will be copying header into skb->data in - * pskb_may_pull so it is in our interest to prefetch - * it now to avoid a possible cache miss - */ - prefetchw(skb->data); - } + /* Determine available headroom for copy */ + headlen = size; + if (headlen > IXGBEVF_RX_HDR_SIZE) + headlen = eth_get_headlen(va, IXGBEVF_RX_HDR_SIZE); - /* pull page into skb */ - if (ixgbevf_add_rx_frag(rx_ring, rx_buffer, size, rx_desc, skb)) { - /* hand second half of page back to the ring */ - ixgbevf_reuse_rx_page(rx_ring, rx_buffer); + /* align pull length to size of long to optimize memcpy performance */ + memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); + + /* update all of the pointers */ + size -= headlen; + if (size) { + skb_add_rx_frag(skb, 0, rx_buffer->page, + (va + headlen) - page_address(rx_buffer->page), + size, truesize); +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; +#endif } else { - /* We are not reusing the buffer so unmap it and free - * any references we are holding to it - */ - dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, - PAGE_SIZE, DMA_FROM_DEVICE, - IXGBEVF_RX_DMA_ATTR); - __page_frag_cache_drain(page, rx_buffer->pagecnt_bias); + rx_buffer->pagecnt_bias++; } - /* clear contents of buffer_info */ - rx_buffer->dma = 0; - rx_buffer->page = NULL; - return skb; } @@ -909,6 +910,44 @@ static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, qmask); } +static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring, + struct ixgbevf_rx_buffer *rx_buffer, + union ixgbe_adv_rx_desc *rx_desc, + unsigned int size) +{ + void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; +#if (PAGE_SIZE < 8192) + unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; +#else + unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + + SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size); +#endif + struct sk_buff *skb; + + /* prefetch first cache line of first page */ + prefetch(va); +#if L1_CACHE_BYTES < 128 + prefetch(va + L1_CACHE_BYTES); +#endif + + /* build an skb to around the page buffer */ + skb = build_skb(va - IXGBEVF_SKB_PAD, truesize); + if (unlikely(!skb)) + return NULL; + + /* update pointers within the skb to store the data */ + skb_reserve(skb, IXGBEVF_SKB_PAD); + __skb_put(skb, size); + + /* update buffer offset */ +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; +#endif + + return skb; +} static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, struct ixgbevf_ring *rx_ring, int budget) @@ -919,6 +958,8 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, while (likely(total_rx_packets < budget)) { union ixgbe_adv_rx_desc *rx_desc; + struct ixgbevf_rx_buffer *rx_buffer; + unsigned int size; /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= IXGBEVF_RX_BUFFER_WRITE) { @@ -927,8 +968,8 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, } rx_desc = IXGBEVF_RX_DESC(rx_ring, rx_ring->next_to_clean); - - if (!rx_desc->wb.upper.length) + size = le16_to_cpu(rx_desc->wb.upper.length); + if (!size) break; /* This memory barrier is needed to keep us from reading @@ -937,15 +978,26 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, */ rmb(); + rx_buffer = ixgbevf_get_rx_buffer(rx_ring, size); + /* retrieve a buffer from the ring */ - skb = ixgbevf_fetch_rx_buffer(rx_ring, rx_desc, skb); + if (skb) + ixgbevf_add_rx_frag(rx_ring, rx_buffer, skb, size); + else if (ring_uses_build_skb(rx_ring)) + skb = ixgbevf_build_skb(rx_ring, rx_buffer, + rx_desc, size); + else + skb = ixgbevf_construct_skb(rx_ring, rx_buffer, + rx_desc, size); /* exit if we failed to retrieve a buffer */ if (!skb) { rx_ring->rx_stats.alloc_rx_buff_failed++; + rx_buffer->pagecnt_bias++; break; } + ixgbevf_put_rx_buffer(rx_ring, rx_buffer); cleaned_count++; /* fetch next buffer in frame if non-eop */ @@ -1260,85 +1312,6 @@ static irqreturn_t ixgbevf_msix_clean_rings(int irq, void *data) return IRQ_HANDLED; } -static inline void map_vector_to_rxq(struct ixgbevf_adapter *a, int v_idx, - int r_idx) -{ - struct ixgbevf_q_vector *q_vector = a->q_vector[v_idx]; - - a->rx_ring[r_idx]->next = q_vector->rx.ring; - q_vector->rx.ring = a->rx_ring[r_idx]; - q_vector->rx.count++; -} - -static inline void map_vector_to_txq(struct ixgbevf_adapter *a, int v_idx, - int t_idx) -{ - struct ixgbevf_q_vector *q_vector = a->q_vector[v_idx]; - - a->tx_ring[t_idx]->next = q_vector->tx.ring; - q_vector->tx.ring = a->tx_ring[t_idx]; - q_vector->tx.count++; -} - -/** - * ixgbevf_map_rings_to_vectors - Maps descriptor rings to vectors - * @adapter: board private structure to initialize - * - * This function maps descriptor rings to the queue-specific vectors - * we were allotted through the MSI-X enabling code. Ideally, we'd have - * one vector per ring/queue, but on a constrained vector budget, we - * group the rings as "efficiently" as possible. You would add new - * mapping configurations in here. - **/ -static int ixgbevf_map_rings_to_vectors(struct ixgbevf_adapter *adapter) -{ - int q_vectors; - int v_start = 0; - int rxr_idx = 0, txr_idx = 0; - int rxr_remaining = adapter->num_rx_queues; - int txr_remaining = adapter->num_tx_queues; - int i, j; - int rqpv, tqpv; - - q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; - - /* The ideal configuration... - * We have enough vectors to map one per queue. - */ - if (q_vectors == adapter->num_rx_queues + adapter->num_tx_queues) { - for (; rxr_idx < rxr_remaining; v_start++, rxr_idx++) - map_vector_to_rxq(adapter, v_start, rxr_idx); - - for (; txr_idx < txr_remaining; v_start++, txr_idx++) - map_vector_to_txq(adapter, v_start, txr_idx); - return 0; - } - - /* If we don't have enough vectors for a 1-to-1 - * mapping, we'll have to group them so there are - * multiple queues per vector. - */ - /* Re-adjusting *qpv takes care of the remainder. */ - for (i = v_start; i < q_vectors; i++) { - rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - i); - for (j = 0; j < rqpv; j++) { - map_vector_to_rxq(adapter, i, rxr_idx); - rxr_idx++; - rxr_remaining--; - } - } - for (i = v_start; i < q_vectors; i++) { - tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - i); - for (j = 0; j < tqpv; j++) { - map_vector_to_txq(adapter, i, txr_idx); - txr_idx++; - txr_remaining--; - } - } - - return 0; -} - /** * ixgbevf_request_msix_irqs - Initialize MSI-X interrupts * @adapter: board private structure @@ -1411,20 +1384,6 @@ free_queue_irqs: return err; } -static inline void ixgbevf_reset_q_vectors(struct ixgbevf_adapter *adapter) -{ - int i, q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; - - for (i = 0; i < q_vectors; i++) { - struct ixgbevf_q_vector *q_vector = adapter->q_vector[i]; - - q_vector->rx.ring = NULL; - q_vector->tx.ring = NULL; - q_vector->rx.count = 0; - q_vector->tx.count = 0; - } -} - /** * ixgbevf_request_irq - initialize interrupts * @adapter: board private structure @@ -1464,8 +1423,6 @@ static void ixgbevf_free_irq(struct ixgbevf_adapter *adapter) free_irq(adapter->msix_entries[i].vector, adapter->q_vector[i]); } - - ixgbevf_reset_q_vectors(adapter); } /** @@ -1587,7 +1544,8 @@ static void ixgbevf_configure_tx(struct ixgbevf_adapter *adapter) #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2 -static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, int index) +static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring, int index) { struct ixgbe_hw *hw = &adapter->hw; u32 srrctl; @@ -1595,7 +1553,10 @@ static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, int index) srrctl = IXGBE_SRRCTL_DROP_EN; srrctl |= IXGBEVF_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT; - srrctl |= IXGBEVF_RX_BUFSZ >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + if (ring_uses_large_buffer(ring)) + srrctl |= IXGBEVF_RXBUFFER_3072 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + else + srrctl |= IXGBEVF_RXBUFFER_2048 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(index), srrctl); @@ -1767,10 +1728,21 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, ring->next_to_use = 0; ring->next_to_alloc = 0; - ixgbevf_configure_srrctl(adapter, reg_idx); + ixgbevf_configure_srrctl(adapter, ring, reg_idx); + + /* RXDCTL.RLPML does not work on 82599 */ + if (adapter->hw.mac.type != ixgbe_mac_82599_vf) { + rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK | + IXGBE_RXDCTL_RLPML_EN); - /* allow any size packet since we can handle overflow */ - rxdctl &= ~IXGBE_RXDCTL_RLPML_EN; +#if (PAGE_SIZE < 8192) + /* Limit the maximum frame size so we don't overrun the skb */ + if (ring_uses_build_skb(ring) && + !ring_uses_large_buffer(ring)) + rxdctl |= IXGBEVF_MAX_FRAME_BUILD_SKB | + IXGBE_RXDCTL_RLPML_EN; +#endif + } rxdctl |= IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME; IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(reg_idx), rxdctl); @@ -1779,6 +1751,29 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, ixgbevf_alloc_rx_buffers(ring, ixgbevf_desc_unused(ring)); } +static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *rx_ring) +{ + struct net_device *netdev = adapter->netdev; + unsigned int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; + + /* set build_skb and buffer size flags */ + clear_ring_build_skb_enabled(rx_ring); + clear_ring_uses_large_buffer(rx_ring); + + if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX) + return; + + set_ring_build_skb_enabled(rx_ring); + +#if (PAGE_SIZE < 8192) + if (max_frame <= IXGBEVF_MAX_FRAME_BUILD_SKB) + return; + + set_ring_uses_large_buffer(rx_ring); +#endif +} + /** * ixgbevf_configure_rx - Configure 82599 VF Receive Unit after Reset * @adapter: board private structure @@ -1806,8 +1801,12 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter) /* Setup the HW Rx Head and Tail Descriptor Pointers and * the Base and Length of the Rx Descriptor Ring */ - for (i = 0; i < adapter->num_rx_queues; i++) - ixgbevf_configure_rx_ring(adapter, adapter->rx_ring[i]); + for (i = 0; i < adapter->num_rx_queues; i++) { + struct ixgbevf_ring *rx_ring = adapter->rx_ring[i]; + + ixgbevf_set_rx_buffer_len(adapter, rx_ring); + ixgbevf_configure_rx_ring(adapter, rx_ring); + } } static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev, @@ -2136,13 +2135,13 @@ static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring) dma_sync_single_range_for_cpu(rx_ring->dev, rx_buffer->dma, rx_buffer->page_offset, - IXGBEVF_RX_BUFSZ, + ixgbevf_rx_bufsz(rx_ring), DMA_FROM_DEVICE); /* free resources associated with mapping */ dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, - PAGE_SIZE, + ixgbevf_rx_pg_size(rx_ring), DMA_FROM_DEVICE, IXGBEVF_RX_DMA_ATTR); @@ -2405,105 +2404,171 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter) } /** - * ixgbevf_alloc_queues - Allocate memory for all rings + * ixgbevf_set_interrupt_capability - set MSI-X or FAIL if not supported + * @adapter: board private structure to initialize + * + * Attempt to configure the interrupts using the best available + * capabilities of the hardware and the kernel. + **/ +static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter) +{ + int vector, v_budget; + + /* It's easy to be greedy for MSI-X vectors, but it really + * doesn't do us much good if we have a lot more vectors + * than CPU's. So let's be conservative and only ask for + * (roughly) the same number of vectors as there are CPU's. + * The default is to use pairs of vectors. + */ + v_budget = max(adapter->num_rx_queues, adapter->num_tx_queues); + v_budget = min_t(int, v_budget, num_online_cpus()); + v_budget += NON_Q_VECTORS; + + adapter->msix_entries = kcalloc(v_budget, + sizeof(struct msix_entry), GFP_KERNEL); + if (!adapter->msix_entries) + return -ENOMEM; + + for (vector = 0; vector < v_budget; vector++) + adapter->msix_entries[vector].entry = vector; + + /* A failure in MSI-X entry allocation isn't fatal, but the VF driver + * does not support any other modes, so we will simply fail here. Note + * that we clean up the msix_entries pointer else-where. + */ + return ixgbevf_acquire_msix_vectors(adapter, v_budget); +} + +static void ixgbevf_add_ring(struct ixgbevf_ring *ring, + struct ixgbevf_ring_container *head) +{ + ring->next = head->ring; + head->ring = ring; + head->count++; +} + +/** + * ixgbevf_alloc_q_vector - Allocate memory for a single interrupt vector * @adapter: board private structure to initialize + * @v_idx: index of vector in adapter struct + * @txr_count: number of Tx rings for q vector + * @txr_idx: index of first Tx ring to assign + * @rxr_count: number of Rx rings for q vector + * @rxr_idx: index of first Rx ring to assign * - * We allocate one ring per queue at run-time since we don't know the - * number of queues at compile-time. The polling_netdev array is - * intended for Multiqueue, but should work fine with a single queue. + * We allocate one q_vector. If allocation fails we return -ENOMEM. **/ -static int ixgbevf_alloc_queues(struct ixgbevf_adapter *adapter) +static int ixgbevf_alloc_q_vector(struct ixgbevf_adapter *adapter, int v_idx, + int txr_count, int txr_idx, + int rxr_count, int rxr_idx) { + struct ixgbevf_q_vector *q_vector; struct ixgbevf_ring *ring; - int rx = 0, tx = 0; + int ring_count, size; + + ring_count = txr_count + rxr_count; + size = sizeof(*q_vector) + (sizeof(*ring) * ring_count); + + /* allocate q_vector and rings */ + q_vector = kzalloc(size, GFP_KERNEL); + if (!q_vector) + return -ENOMEM; + + /* initialize NAPI */ + netif_napi_add(adapter->netdev, &q_vector->napi, ixgbevf_poll, 64); + + /* tie q_vector and adapter together */ + adapter->q_vector[v_idx] = q_vector; + q_vector->adapter = adapter; + q_vector->v_idx = v_idx; - for (; tx < adapter->num_tx_queues; tx++) { - ring = kzalloc(sizeof(*ring), GFP_KERNEL); - if (!ring) - goto err_allocation; + /* initialize pointer to rings */ + ring = q_vector->ring; + while (txr_count) { + /* assign generic ring traits */ ring->dev = &adapter->pdev->dev; ring->netdev = adapter->netdev; + + /* configure backlink on ring */ + ring->q_vector = q_vector; + + /* update q_vector Tx values */ + ixgbevf_add_ring(ring, &q_vector->tx); + + /* apply Tx specific ring traits */ ring->count = adapter->tx_ring_count; - ring->queue_index = tx; - ring->reg_idx = tx; + ring->queue_index = txr_idx; + ring->reg_idx = txr_idx; - adapter->tx_ring[tx] = ring; - } + /* assign ring to adapter */ + adapter->tx_ring[txr_idx] = ring; + + /* update count and index */ + txr_count--; + txr_idx++; - for (; rx < adapter->num_rx_queues; rx++) { - ring = kzalloc(sizeof(*ring), GFP_KERNEL); - if (!ring) - goto err_allocation; + /* push pointer to next ring */ + ring++; + } + while (rxr_count) { + /* assign generic ring traits */ ring->dev = &adapter->pdev->dev; ring->netdev = adapter->netdev; + /* configure backlink on ring */ + ring->q_vector = q_vector; + + /* update q_vector Rx values */ + ixgbevf_add_ring(ring, &q_vector->rx); + + /* apply Rx specific ring traits */ ring->count = adapter->rx_ring_count; - ring->queue_index = rx; - ring->reg_idx = rx; + ring->queue_index = rxr_idx; + ring->reg_idx = rxr_idx; - adapter->rx_ring[rx] = ring; - } + /* assign ring to adapter */ + adapter->rx_ring[rxr_idx] = ring; - return 0; + /* update count and index */ + rxr_count--; + rxr_idx++; -err_allocation: - while (tx) { - kfree(adapter->tx_ring[--tx]); - adapter->tx_ring[tx] = NULL; + /* push pointer to next ring */ + ring++; } - while (rx) { - kfree(adapter->rx_ring[--rx]); - adapter->rx_ring[rx] = NULL; - } - return -ENOMEM; + return 0; } /** - * ixgbevf_set_interrupt_capability - set MSI-X or FAIL if not supported + * ixgbevf_free_q_vector - Free memory allocated for specific interrupt vector * @adapter: board private structure to initialize + * @v_idx: index of vector in adapter struct * - * Attempt to configure the interrupts using the best available - * capabilities of the hardware and the kernel. + * This function frees the memory allocated to the q_vector. In addition if + * NAPI is enabled it will delete any references to the NAPI struct prior + * to freeing the q_vector. **/ -static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter) +static void ixgbevf_free_q_vector(struct ixgbevf_adapter *adapter, int v_idx) { - struct net_device *netdev = adapter->netdev; - int err; - int vector, v_budget; - - /* It's easy to be greedy for MSI-X vectors, but it really - * doesn't do us much good if we have a lot more vectors - * than CPU's. So let's be conservative and only ask for - * (roughly) the same number of vectors as there are CPU's. - * The default is to use pairs of vectors. - */ - v_budget = max(adapter->num_rx_queues, adapter->num_tx_queues); - v_budget = min_t(int, v_budget, num_online_cpus()); - v_budget += NON_Q_VECTORS; - - /* A failure in MSI-X entry allocation isn't fatal, but it does - * mean we disable MSI-X capabilities of the adapter. - */ - adapter->msix_entries = kcalloc(v_budget, - sizeof(struct msix_entry), GFP_KERNEL); - if (!adapter->msix_entries) - return -ENOMEM; + struct ixgbevf_q_vector *q_vector = adapter->q_vector[v_idx]; + struct ixgbevf_ring *ring; - for (vector = 0; vector < v_budget; vector++) - adapter->msix_entries[vector].entry = vector; + ixgbevf_for_each_ring(ring, q_vector->tx) + adapter->tx_ring[ring->queue_index] = NULL; - err = ixgbevf_acquire_msix_vectors(adapter, v_budget); - if (err) - return err; + ixgbevf_for_each_ring(ring, q_vector->rx) + adapter->rx_ring[ring->queue_index] = NULL; - err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues); - if (err) - return err; + adapter->q_vector[v_idx] = NULL; + netif_napi_del(&q_vector->napi); - return netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues); + /* ixgbevf_get_stats() might access the rings on this vector, + * we must wait a grace period before freeing it. + */ + kfree_rcu(q_vector, rcu); } /** @@ -2515,35 +2580,53 @@ static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter) **/ static int ixgbevf_alloc_q_vectors(struct ixgbevf_adapter *adapter) { - int q_idx, num_q_vectors; - struct ixgbevf_q_vector *q_vector; + int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; + int rxr_remaining = adapter->num_rx_queues; + int txr_remaining = adapter->num_tx_queues; + int rxr_idx = 0, txr_idx = 0, v_idx = 0; + int err; + + if (q_vectors >= (rxr_remaining + txr_remaining)) { + for (; rxr_remaining; v_idx++, q_vectors--) { + int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors); + + err = ixgbevf_alloc_q_vector(adapter, v_idx, + 0, 0, rqpv, rxr_idx); + if (err) + goto err_out; - num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; + /* update counts and index */ + rxr_remaining -= rqpv; + rxr_idx += rqpv; + } + } + + for (; q_vectors; v_idx++, q_vectors--) { + int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors); + int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors); - for (q_idx = 0; q_idx < num_q_vectors; q_idx++) { - q_vector = kzalloc(sizeof(struct ixgbevf_q_vector), GFP_KERNEL); - if (!q_vector) + err = ixgbevf_alloc_q_vector(adapter, v_idx, + tqpv, txr_idx, + rqpv, rxr_idx); + + if (err) goto err_out; - q_vector->adapter = adapter; - q_vector->v_idx = q_idx; - netif_napi_add(adapter->netdev, &q_vector->napi, - ixgbevf_poll, 64); - adapter->q_vector[q_idx] = q_vector; + + /* update counts and index */ + rxr_remaining -= rqpv; + rxr_idx += rqpv; + txr_remaining -= tqpv; + txr_idx += tqpv; } return 0; err_out: - while (q_idx) { - q_idx--; - q_vector = adapter->q_vector[q_idx]; -#ifdef CONFIG_NET_RX_BUSY_POLL - napi_hash_del(&q_vector->napi); -#endif - netif_napi_del(&q_vector->napi); - kfree(q_vector); - adapter->q_vector[q_idx] = NULL; + while (v_idx) { + v_idx--; + ixgbevf_free_q_vector(adapter, v_idx); } + return -ENOMEM; } @@ -2557,17 +2640,11 @@ err_out: **/ static void ixgbevf_free_q_vectors(struct ixgbevf_adapter *adapter) { - int q_idx, num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; - - for (q_idx = 0; q_idx < num_q_vectors; q_idx++) { - struct ixgbevf_q_vector *q_vector = adapter->q_vector[q_idx]; + int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; - adapter->q_vector[q_idx] = NULL; -#ifdef CONFIG_NET_RX_BUSY_POLL - napi_hash_del(&q_vector->napi); -#endif - netif_napi_del(&q_vector->napi); - kfree(q_vector); + while (q_vectors) { + q_vectors--; + ixgbevf_free_q_vector(adapter, q_vectors); } } @@ -2611,12 +2688,6 @@ static int ixgbevf_init_interrupt_scheme(struct ixgbevf_adapter *adapter) goto err_alloc_q_vectors; } - err = ixgbevf_alloc_queues(adapter); - if (err) { - pr_err("Unable to allocate memory for queues\n"); - goto err_alloc_queues; - } - hw_dbg(&adapter->hw, "Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u\n", (adapter->num_rx_queues > 1) ? "Enabled" : "Disabled", adapter->num_rx_queues, adapter->num_tx_queues); @@ -2624,8 +2695,6 @@ static int ixgbevf_init_interrupt_scheme(struct ixgbevf_adapter *adapter) set_bit(__IXGBEVF_DOWN, &adapter->state); return 0; -err_alloc_queues: - ixgbevf_free_q_vectors(adapter); err_alloc_q_vectors: ixgbevf_reset_interrupt_capability(adapter); err_set_interrupt: @@ -2641,17 +2710,6 @@ err_set_interrupt: **/ static void ixgbevf_clear_interrupt_scheme(struct ixgbevf_adapter *adapter) { - int i; - - for (i = 0; i < adapter->num_tx_queues; i++) { - kfree(adapter->tx_ring[i]); - adapter->tx_ring[i] = NULL; - } - for (i = 0; i < adapter->num_rx_queues; i++) { - kfree(adapter->rx_ring[i]); - adapter->rx_ring[i] = NULL; - } - adapter->num_tx_queues = 0; adapter->num_rx_queues = 0; @@ -3088,9 +3146,14 @@ static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter) if (!err) continue; hw_dbg(&adapter->hw, "Allocation for Tx Queue %u failed\n", i); - break; + goto err_setup_tx; } + return 0; +err_setup_tx: + /* rewind the index freeing the rings as we go */ + while (i--) + ixgbevf_free_tx_resources(adapter->tx_ring[i]); return err; } @@ -3148,8 +3211,14 @@ static int ixgbevf_setup_all_rx_resources(struct ixgbevf_adapter *adapter) if (!err) continue; hw_dbg(&adapter->hw, "Allocation for Rx Queue %u failed\n", i); - break; + goto err_setup_rx; } + + return 0; +err_setup_rx: + /* rewind the index freeing the rings as we go */ + while (i--) + ixgbevf_free_rx_resources(adapter->rx_ring[i]); return err; } @@ -3244,28 +3313,31 @@ int ixgbevf_open(struct net_device *netdev) ixgbevf_configure(adapter); - /* Map the Tx/Rx rings to the vectors we were allotted. - * if request_irq will be called in this function map_rings - * must be called *before* up_complete - */ - ixgbevf_map_rings_to_vectors(adapter); - err = ixgbevf_request_irq(adapter); if (err) goto err_req_irq; + /* Notify the stack of the actual queue counts. */ + err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues); + if (err) + goto err_set_queues; + + err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues); + if (err) + goto err_set_queues; + ixgbevf_up_complete(adapter); return 0; +err_set_queues: + ixgbevf_free_irq(adapter); err_req_irq: - ixgbevf_down(adapter); -err_setup_rx: ixgbevf_free_all_rx_resources(adapter); -err_setup_tx: +err_setup_rx: ixgbevf_free_all_tx_resources(adapter); +err_setup_tx: ixgbevf_reset(adapter); - err_setup_reset: return err; @@ -3707,11 +3779,10 @@ static int ixgbevf_maybe_stop_tx(struct ixgbevf_ring *tx_ring, int size) return __ixgbevf_maybe_stop_tx(tx_ring, size); } -static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) +static int ixgbevf_xmit_frame_ring(struct sk_buff *skb, + struct ixgbevf_ring *tx_ring) { - struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbevf_tx_buffer *first; - struct ixgbevf_ring *tx_ring; int tso; u32 tx_flags = 0; u16 count = TXD_USE_COUNT(skb_headlen(skb)); @@ -3726,8 +3797,6 @@ static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_OK; } - tx_ring = adapter->tx_ring[skb->queue_mapping]; - /* need: 1 descriptor per page * PAGE_SIZE/IXGBE_MAX_DATA_PER_TXD, * + 1 desc for skb_headlen/IXGBE_MAX_DATA_PER_TXD, * + 2 desc gap to keep tail from touching head, @@ -3780,6 +3849,29 @@ out_drop: return NETDEV_TX_OK; } +static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) +{ + struct ixgbevf_adapter *adapter = netdev_priv(netdev); + struct ixgbevf_ring *tx_ring; + + if (skb->len <= 0) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + /* The minimum packet size for olinfo paylen is 17 so pad the skb + * in order to meet this minimum size requirement. + */ + if (skb->len < 17) { + if (skb_padto(skb, 17)) + return NETDEV_TX_OK; + skb->len = 17; + } + + tx_ring = adapter->tx_ring[skb->queue_mapping]; + return ixgbevf_xmit_frame_ring(skb, tx_ring); +} + /** * ixgbevf_set_mac - Change the Ethernet Address of the NIC * @netdev: network interface device structure @@ -3839,6 +3931,9 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu) /* must set new MTU before calling down or up */ netdev->mtu = new_mtu; + if (netif_running(netdev)) + ixgbevf_reinit_locked(adapter); + return 0; } @@ -3917,17 +4012,11 @@ static int ixgbevf_resume(struct pci_dev *pdev) rtnl_lock(); err = ixgbevf_init_interrupt_scheme(adapter); + if (!err && netif_running(netdev)) + err = ixgbevf_open(netdev); rtnl_unlock(); - if (err) { - dev_err(&pdev->dev, "Cannot initialize interrupts\n"); + if (err) return err; - } - - if (netif_running(netdev)) { - err = ixgbevf_open(netdev); - if (err) - return err; - } netif_device_attach(netdev); @@ -3953,6 +4042,7 @@ static void ixgbevf_get_stats(struct net_device *netdev, stats->multicast = adapter->stats.vfmprc - adapter->stats.base_vfmprc; + rcu_read_lock(); for (i = 0; i < adapter->num_rx_queues; i++) { ring = adapter->rx_ring[i]; do { @@ -3974,6 +4064,7 @@ static void ixgbevf_get_stats(struct net_device *netdev, stats->tx_bytes += bytes; stats->tx_packets += packets; } + rcu_read_unlock(); } #define IXGBEVF_MAX_MAC_HDR_LEN 127 diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 5a1668cdb461..9418f6eed086 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -65,6 +65,10 @@ #define MVPP2_RXQ_PACKET_OFFSET_MASK 0x70000000 #define MVPP2_RXQ_DISABLE_MASK BIT(31) +/* Top Registers */ +#define MVPP2_MH_REG(port) (0x5040 + 4 * (port)) +#define MVPP2_DSA_EXTENDED BIT(5) + /* Parser Registers */ #define MVPP2_PRS_INIT_LOOKUP_REG 0x1000 #define MVPP2_PRS_PORT_LU_MAX 0xf @@ -473,6 +477,7 @@ #define MVPP2_ETH_TYPE_LEN 2 #define MVPP2_PPPOE_HDR_SIZE 8 #define MVPP2_VLAN_TAG_LEN 4 +#define MVPP2_VLAN_TAG_EDSA_LEN 8 /* Lbtd 802.3 type */ #define MVPP2_IP_LBDT_TYPE 0xfffa @@ -609,35 +614,64 @@ enum mvpp2_tag_type { #define MVPP2_PRS_TCAM_LU_BYTE 20 #define MVPP2_PRS_TCAM_EN_OFFS(offs) ((offs) + 2) #define MVPP2_PRS_TCAM_INV_WORD 5 + +#define MVPP2_PRS_VID_TCAM_BYTE 2 + +/* There is a TCAM range reserved for VLAN filtering entries, range size is 33 + * 10 VLAN ID filter entries per port + * 1 default VLAN filter entry per port + * It is assumed that there are 3 ports for filter, not including loopback port + */ +#define MVPP2_PRS_VLAN_FILT_MAX 11 +#define MVPP2_PRS_VLAN_FILT_RANGE_SIZE 33 + +#define MVPP2_PRS_VLAN_FILT_MAX_ENTRY (MVPP2_PRS_VLAN_FILT_MAX - 2) +#define MVPP2_PRS_VLAN_FILT_DFLT_ENTRY (MVPP2_PRS_VLAN_FILT_MAX - 1) + /* Tcam entries ID */ #define MVPP2_PE_DROP_ALL 0 #define MVPP2_PE_FIRST_FREE_TID 1 -#define MVPP2_PE_LAST_FREE_TID (MVPP2_PRS_TCAM_SRAM_SIZE - 31) + +/* VLAN filtering range */ +#define MVPP2_PE_VID_FILT_RANGE_END (MVPP2_PRS_TCAM_SRAM_SIZE - 31) +#define MVPP2_PE_VID_FILT_RANGE_START (MVPP2_PE_VID_FILT_RANGE_END - \ + MVPP2_PRS_VLAN_FILT_RANGE_SIZE + 1) +#define MVPP2_PE_LAST_FREE_TID (MVPP2_PE_VID_FILT_RANGE_START - 1) #define MVPP2_PE_IP6_EXT_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 30) #define MVPP2_PE_MAC_MC_IP6 (MVPP2_PRS_TCAM_SRAM_SIZE - 29) #define MVPP2_PE_IP6_ADDR_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 28) #define MVPP2_PE_IP4_ADDR_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 27) #define MVPP2_PE_LAST_DEFAULT_FLOW (MVPP2_PRS_TCAM_SRAM_SIZE - 26) -#define MVPP2_PE_FIRST_DEFAULT_FLOW (MVPP2_PRS_TCAM_SRAM_SIZE - 19) -#define MVPP2_PE_EDSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 18) -#define MVPP2_PE_EDSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 17) -#define MVPP2_PE_DSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 16) -#define MVPP2_PE_DSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 15) -#define MVPP2_PE_ETYPE_EDSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 14) -#define MVPP2_PE_ETYPE_EDSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 13) -#define MVPP2_PE_ETYPE_DSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 12) -#define MVPP2_PE_ETYPE_DSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 11) -#define MVPP2_PE_MH_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 10) -#define MVPP2_PE_DSA_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 9) -#define MVPP2_PE_IP6_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 8) -#define MVPP2_PE_IP4_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 7) -#define MVPP2_PE_ETH_TYPE_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 6) +#define MVPP2_PE_FIRST_DEFAULT_FLOW (MVPP2_PRS_TCAM_SRAM_SIZE - 21) +#define MVPP2_PE_EDSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 20) +#define MVPP2_PE_EDSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 19) +#define MVPP2_PE_DSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 18) +#define MVPP2_PE_DSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 17) +#define MVPP2_PE_ETYPE_EDSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 16) +#define MVPP2_PE_ETYPE_EDSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 15) +#define MVPP2_PE_ETYPE_DSA_TAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 14) +#define MVPP2_PE_ETYPE_DSA_UNTAGGED (MVPP2_PRS_TCAM_SRAM_SIZE - 13) +#define MVPP2_PE_MH_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 12) +#define MVPP2_PE_DSA_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 11) +#define MVPP2_PE_IP6_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 10) +#define MVPP2_PE_IP4_PROTO_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 9) +#define MVPP2_PE_ETH_TYPE_UN (MVPP2_PRS_TCAM_SRAM_SIZE - 8) +#define MVPP2_PE_VID_FLTR_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 7) +#define MVPP2_PE_VID_EDSA_FLTR_DEFAULT (MVPP2_PRS_TCAM_SRAM_SIZE - 6) #define MVPP2_PE_VLAN_DBL (MVPP2_PRS_TCAM_SRAM_SIZE - 5) #define MVPP2_PE_VLAN_NONE (MVPP2_PRS_TCAM_SRAM_SIZE - 4) #define MVPP2_PE_MAC_MC_ALL (MVPP2_PRS_TCAM_SRAM_SIZE - 3) #define MVPP2_PE_MAC_PROMISCUOUS (MVPP2_PRS_TCAM_SRAM_SIZE - 2) #define MVPP2_PE_MAC_NON_PROMISCUOUS (MVPP2_PRS_TCAM_SRAM_SIZE - 1) +#define MVPP2_PRS_VID_PORT_FIRST(port) (MVPP2_PE_VID_FILT_RANGE_START + \ + ((port) * MVPP2_PRS_VLAN_FILT_MAX)) +#define MVPP2_PRS_VID_PORT_LAST(port) (MVPP2_PRS_VID_PORT_FIRST(port) \ + + MVPP2_PRS_VLAN_FILT_MAX_ENTRY) +/* Index of default vid filter for given port */ +#define MVPP2_PRS_VID_PORT_DFLT(port) (MVPP2_PRS_VID_PORT_FIRST(port) \ + + MVPP2_PRS_VLAN_FILT_DFLT_ENTRY) + /* Sram structure * The fields are represented by MVPP2_PRS_TCAM_DATA_REG(3)->(0). */ @@ -725,6 +759,7 @@ enum mvpp2_tag_type { #define MVPP2_PRS_IPV6_EXT_AH_L4_AI_BIT BIT(4) #define MVPP2_PRS_SINGLE_VLAN_AI 0 #define MVPP2_PRS_DBL_VLAN_AI_BIT BIT(7) +#define MVPP2_PRS_EDSA_VID_AI_BIT BIT(0) /* DSA/EDSA type */ #define MVPP2_PRS_TAGGED true @@ -747,6 +782,7 @@ enum mvpp2_prs_lookup { MVPP2_PRS_LU_MAC, MVPP2_PRS_LU_DSA, MVPP2_PRS_LU_VLAN, + MVPP2_PRS_LU_VID, MVPP2_PRS_LU_L2, MVPP2_PRS_LU_PPPOE, MVPP2_PRS_LU_IP4, @@ -1662,6 +1698,14 @@ static void mvpp2_prs_match_etype(struct mvpp2_prs_entry *pe, int offset, mvpp2_prs_tcam_data_byte_set(pe, offset + 1, ethertype & 0xff, 0xff); } +/* Set vid in tcam sw entry */ +static void mvpp2_prs_match_vid(struct mvpp2_prs_entry *pe, int offset, + unsigned short vid) +{ + mvpp2_prs_tcam_data_byte_set(pe, offset + 0, (vid & 0xf00) >> 8, 0xf); + mvpp2_prs_tcam_data_byte_set(pe, offset + 1, vid & 0xff, 0xff); +} + /* Set bits in sram sw entry */ static void mvpp2_prs_sram_bits_set(struct mvpp2_prs_entry *pe, int bit_num, int val) @@ -2029,24 +2073,30 @@ static void mvpp2_prs_dsa_tag_set(struct mvpp2 *priv, int port, bool add, mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_DSA); pe.index = tid; - /* Shift 4 bytes if DSA tag or 8 bytes in case of EDSA tag*/ - mvpp2_prs_sram_shift_set(&pe, shift, - MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); - /* Update shadow table */ mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_DSA); if (tagged) { /* Set tagged bit in DSA tag */ mvpp2_prs_tcam_data_byte_set(&pe, 0, - MVPP2_PRS_TCAM_DSA_TAGGED_BIT, - MVPP2_PRS_TCAM_DSA_TAGGED_BIT); - /* Clear all ai bits for next iteration */ - mvpp2_prs_sram_ai_update(&pe, 0, - MVPP2_PRS_SRAM_AI_MASK); - /* If packet is tagged continue check vlans */ - mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VLAN); + MVPP2_PRS_TCAM_DSA_TAGGED_BIT, + MVPP2_PRS_TCAM_DSA_TAGGED_BIT); + + /* Set ai bits for next iteration */ + if (extend) + mvpp2_prs_sram_ai_update(&pe, 1, + MVPP2_PRS_SRAM_AI_MASK); + else + mvpp2_prs_sram_ai_update(&pe, 0, + MVPP2_PRS_SRAM_AI_MASK); + + /* If packet is tagged continue check vid filtering */ + mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VID); } else { + /* Shift 4 bytes for DSA tag or 8 bytes for EDSA tag*/ + mvpp2_prs_sram_shift_set(&pe, shift, + MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); + /* Set result info bits to 'no vlans' */ mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_NONE, MVPP2_PRS_RI_VLAN_MASK); @@ -2231,10 +2281,9 @@ static int mvpp2_prs_vlan_add(struct mvpp2 *priv, unsigned short tpid, int ai, mvpp2_prs_match_etype(pe, 0, tpid); - mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_L2); - /* Shift 4 bytes - skip 1 vlan tag */ - mvpp2_prs_sram_shift_set(pe, MVPP2_VLAN_TAG_LEN, - MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); + /* VLAN tag detected, proceed with VID filtering */ + mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_VID); + /* Clear all ai bits for next iteration */ mvpp2_prs_sram_ai_update(pe, 0, MVPP2_PRS_SRAM_AI_MASK); @@ -2375,8 +2424,8 @@ static int mvpp2_prs_double_vlan_add(struct mvpp2 *priv, unsigned short tpid1, mvpp2_prs_match_etype(pe, 4, tpid2); mvpp2_prs_sram_next_lu_set(pe, MVPP2_PRS_LU_VLAN); - /* Shift 8 bytes - skip 2 vlan tags */ - mvpp2_prs_sram_shift_set(pe, 2 * MVPP2_VLAN_TAG_LEN, + /* Shift 4 bytes - skip outer vlan tag */ + mvpp2_prs_sram_shift_set(pe, MVPP2_VLAN_TAG_LEN, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); mvpp2_prs_sram_ri_update(pe, MVPP2_PRS_RI_VLAN_DOUBLE, MVPP2_PRS_RI_VLAN_MASK); @@ -2755,6 +2804,62 @@ static void mvpp2_prs_dsa_init(struct mvpp2 *priv) mvpp2_prs_hw_write(priv, &pe); } +/* Initialize parser entries for VID filtering */ +static void mvpp2_prs_vid_init(struct mvpp2 *priv) +{ + struct mvpp2_prs_entry pe; + + memset(&pe, 0, sizeof(pe)); + + /* Set default vid entry */ + pe.index = MVPP2_PE_VID_FLTR_DEFAULT; + mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID); + + mvpp2_prs_tcam_ai_update(&pe, 0, MVPP2_PRS_EDSA_VID_AI_BIT); + + /* Skip VLAN header - Set offset to 4 bytes */ + mvpp2_prs_sram_shift_set(&pe, MVPP2_VLAN_TAG_LEN, + MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); + + /* Clear all ai bits for next iteration */ + mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK); + + mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2); + + /* Unmask all ports */ + mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK); + + /* Update shadow table and hw entry */ + mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID); + mvpp2_prs_hw_write(priv, &pe); + + /* Set default vid entry for extended DSA*/ + memset(&pe, 0, sizeof(pe)); + + /* Set default vid entry */ + pe.index = MVPP2_PE_VID_EDSA_FLTR_DEFAULT; + mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID); + + mvpp2_prs_tcam_ai_update(&pe, MVPP2_PRS_EDSA_VID_AI_BIT, + MVPP2_PRS_EDSA_VID_AI_BIT); + + /* Skip VLAN header - Set offset to 8 bytes */ + mvpp2_prs_sram_shift_set(&pe, MVPP2_VLAN_TAG_EDSA_LEN, + MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); + + /* Clear all ai bits for next iteration */ + mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK); + + mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2); + + /* Unmask all ports */ + mvpp2_prs_tcam_port_map_set(&pe, MVPP2_PRS_PORT_MASK); + + /* Update shadow table and hw entry */ + mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID); + mvpp2_prs_hw_write(priv, &pe); +} + /* Match basic ethertypes */ static int mvpp2_prs_etype_init(struct mvpp2 *priv) { @@ -3023,7 +3128,8 @@ static int mvpp2_prs_vlan_init(struct platform_device *pdev, struct mvpp2 *priv) mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VLAN); pe.index = MVPP2_PE_VLAN_DBL; - mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2); + mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_VID); + /* Clear ai for next iterations */ mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK); mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_VLAN_DOUBLE, @@ -3386,6 +3492,192 @@ static int mvpp2_prs_ip6_init(struct mvpp2 *priv) return 0; } +/* Find tcam entry with matched pair <vid,port> */ +static int mvpp2_prs_vid_range_find(struct mvpp2 *priv, int pmap, u16 vid, + u16 mask) +{ + unsigned char byte[2], enable[2]; + struct mvpp2_prs_entry pe; + u16 rvid, rmask; + int tid; + + /* Go through the all entries with MVPP2_PRS_LU_VID */ + for (tid = MVPP2_PE_VID_FILT_RANGE_START; + tid <= MVPP2_PE_VID_FILT_RANGE_END; tid++) { + if (!priv->prs_shadow[tid].valid || + priv->prs_shadow[tid].lu != MVPP2_PRS_LU_VID) + continue; + + pe.index = tid; + + mvpp2_prs_hw_read(priv, &pe); + mvpp2_prs_tcam_data_byte_get(&pe, 2, &byte[0], &enable[0]); + mvpp2_prs_tcam_data_byte_get(&pe, 3, &byte[1], &enable[1]); + + rvid = ((byte[0] & 0xf) << 8) + byte[1]; + rmask = ((enable[0] & 0xf) << 8) + enable[1]; + + if (rvid != vid || rmask != mask) + continue; + + return tid; + } + + return 0; +} + +/* Write parser entry for VID filtering */ +static int mvpp2_prs_vid_entry_add(struct mvpp2_port *port, u16 vid) +{ + unsigned int vid_start = MVPP2_PE_VID_FILT_RANGE_START + + port->id * MVPP2_PRS_VLAN_FILT_MAX; + unsigned int mask = 0xfff, reg_val, shift; + struct mvpp2 *priv = port->priv; + struct mvpp2_prs_entry pe; + int tid; + + /* Scan TCAM and see if entry with this <vid,port> already exist */ + tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, mask); + + reg_val = mvpp2_read(priv, MVPP2_MH_REG(port->id)); + if (reg_val & MVPP2_DSA_EXTENDED) + shift = MVPP2_VLAN_TAG_EDSA_LEN; + else + shift = MVPP2_VLAN_TAG_LEN; + + /* No such entry */ + if (!tid) { + memset(&pe, 0, sizeof(pe)); + + /* Go through all entries from first to last in vlan range */ + tid = mvpp2_prs_tcam_first_free(priv, vid_start, + vid_start + + MVPP2_PRS_VLAN_FILT_MAX_ENTRY); + + /* There isn't room for a new VID filter */ + if (tid < 0) + return tid; + + mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID); + pe.index = tid; + + /* Mask all ports */ + mvpp2_prs_tcam_port_map_set(&pe, 0); + } else { + mvpp2_prs_hw_read(priv, &pe); + } + + /* Enable the current port */ + mvpp2_prs_tcam_port_set(&pe, port->id, true); + + /* Continue - set next lookup */ + mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2); + + /* Skip VLAN header - Set offset to 4 or 8 bytes */ + mvpp2_prs_sram_shift_set(&pe, shift, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); + + /* Set match on VID */ + mvpp2_prs_match_vid(&pe, MVPP2_PRS_VID_TCAM_BYTE, vid); + + /* Clear all ai bits for next iteration */ + mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK); + + /* Update shadow table */ + mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID); + mvpp2_prs_hw_write(priv, &pe); + + return 0; +} + +/* Write parser entry for VID filtering */ +static void mvpp2_prs_vid_entry_remove(struct mvpp2_port *port, u16 vid) +{ + struct mvpp2 *priv = port->priv; + int tid; + + /* Scan TCAM and see if entry with this <vid,port> already exist */ + tid = mvpp2_prs_vid_range_find(priv, (1 << port->id), vid, 0xfff); + + /* No such entry */ + if (tid) + return; + + mvpp2_prs_hw_inv(priv, tid); + priv->prs_shadow[tid].valid = false; +} + +/* Remove all existing VID filters on this port */ +static void mvpp2_prs_vid_remove_all(struct mvpp2_port *port) +{ + struct mvpp2 *priv = port->priv; + int tid; + + for (tid = MVPP2_PRS_VID_PORT_FIRST(port->id); + tid <= MVPP2_PRS_VID_PORT_LAST(port->id); tid++) { + if (priv->prs_shadow[tid].valid) + mvpp2_prs_vid_entry_remove(port, tid); + } +} + +/* Remove VID filering entry for this port */ +static void mvpp2_prs_vid_disable_filtering(struct mvpp2_port *port) +{ + unsigned int tid = MVPP2_PRS_VID_PORT_DFLT(port->id); + struct mvpp2 *priv = port->priv; + + /* Invalidate the guard entry */ + mvpp2_prs_hw_inv(priv, tid); + + priv->prs_shadow[tid].valid = false; +} + +/* Add guard entry that drops packets when no VID is matched on this port */ +static void mvpp2_prs_vid_enable_filtering(struct mvpp2_port *port) +{ + unsigned int tid = MVPP2_PRS_VID_PORT_DFLT(port->id); + struct mvpp2 *priv = port->priv; + unsigned int reg_val, shift; + struct mvpp2_prs_entry pe; + + if (priv->prs_shadow[tid].valid) + return; + + memset(&pe, 0, sizeof(pe)); + + pe.index = tid; + + reg_val = mvpp2_read(priv, MVPP2_MH_REG(port->id)); + if (reg_val & MVPP2_DSA_EXTENDED) + shift = MVPP2_VLAN_TAG_EDSA_LEN; + else + shift = MVPP2_VLAN_TAG_LEN; + + mvpp2_prs_tcam_lu_set(&pe, MVPP2_PRS_LU_VID); + + /* Mask all ports */ + mvpp2_prs_tcam_port_map_set(&pe, 0); + + /* Update port mask */ + mvpp2_prs_tcam_port_set(&pe, port->id, true); + + /* Continue - set next lookup */ + mvpp2_prs_sram_next_lu_set(&pe, MVPP2_PRS_LU_L2); + + /* Skip VLAN header - Set offset to 4 or 8 bytes */ + mvpp2_prs_sram_shift_set(&pe, shift, MVPP2_PRS_SRAM_OP_SEL_SHIFT_ADD); + + /* Drop VLAN packets that don't belong to any VIDs on this port */ + mvpp2_prs_sram_ri_update(&pe, MVPP2_PRS_RI_DROP_MASK, + MVPP2_PRS_RI_DROP_MASK); + + /* Clear all ai bits for next iteration */ + mvpp2_prs_sram_ai_update(&pe, 0, MVPP2_PRS_SRAM_AI_MASK); + + /* Update shadow table */ + mvpp2_prs_shadow_set(priv, pe.index, MVPP2_PRS_LU_VID); + mvpp2_prs_hw_write(priv, &pe); +} + /* Parser default initialization */ static int mvpp2_prs_default_init(struct platform_device *pdev, struct mvpp2 *priv) @@ -3429,6 +3721,8 @@ static int mvpp2_prs_default_init(struct platform_device *pdev, mvpp2_prs_dsa_init(priv); + mvpp2_prs_vid_init(priv); + err = mvpp2_prs_etype_init(priv); if (err) return err; @@ -7153,6 +7447,12 @@ retry: } } } + + /* Disable VLAN filtering in promiscuous mode */ + if (dev->flags & IFF_PROMISC) + mvpp2_prs_vid_disable_filtering(port); + else + mvpp2_prs_vid_enable_filtering(port); } static int mvpp2_set_mac_address(struct net_device *dev, void *p) @@ -7292,6 +7592,48 @@ static int mvpp2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return ret; } +static int mvpp2_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct mvpp2_port *port = netdev_priv(dev); + int ret; + + ret = mvpp2_prs_vid_entry_add(port, vid); + if (ret) + netdev_err(dev, "rx-vlan-filter offloading cannot accept more than %d VIDs per port\n", + MVPP2_PRS_VLAN_FILT_MAX - 1); + return ret; +} + +static int mvpp2_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct mvpp2_port *port = netdev_priv(dev); + + mvpp2_prs_vid_entry_remove(port, vid); + return 0; +} + +static int mvpp2_set_features(struct net_device *dev, + netdev_features_t features) +{ + netdev_features_t changed = dev->features ^ features; + struct mvpp2_port *port = netdev_priv(dev); + + if (changed & NETIF_F_HW_VLAN_CTAG_FILTER) { + if (features & NETIF_F_HW_VLAN_CTAG_FILTER) { + mvpp2_prs_vid_enable_filtering(port); + } else { + /* Invalidate all registered VID filters for this + * port + */ + mvpp2_prs_vid_remove_all(port); + + mvpp2_prs_vid_disable_filtering(port); + } + } + + return 0; +} + /* Ethtool methods */ /* Set interrupt coalescing for ethtools */ @@ -7433,6 +7775,9 @@ static const struct net_device_ops mvpp2_netdev_ops = { .ndo_change_mtu = mvpp2_change_mtu, .ndo_get_stats64 = mvpp2_get_stats64, .ndo_do_ioctl = mvpp2_ioctl, + .ndo_vlan_rx_add_vid = mvpp2_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = mvpp2_vlan_rx_kill_vid, + .ndo_set_features = mvpp2_set_features, }; static const struct ethtool_ops mvpp2_eth_tool_ops = { @@ -7945,7 +8290,8 @@ static int mvpp2_port_probe(struct platform_device *pdev, features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO; dev->features = features | NETIF_F_RXCSUM; - dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO; + dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO | + NETIF_F_HW_VLAN_CTAG_FILTER; dev->vlan_features |= features; dev->gso_max_segs = MVPP2_MAX_TSO_SEGS; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index ebc1f566a4d9..9a7a2f05ab35 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -199,6 +199,10 @@ static const char main_strings[][ETH_GSTRING_LEN] = { "rx_xdp_drop", "rx_xdp_tx", "rx_xdp_tx_full", + + /* phy statistics */ + "rx_packets_phy", "rx_bytes_phy", + "tx_packets_phy", "tx_bytes_phy", }; static const char mlx4_en_test_names[][ETH_GSTRING_LEN]= { @@ -411,6 +415,10 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev, if (bitmap_iterator_test(&it)) data[index++] = ((unsigned long *)&priv->xdp_stats)[i]; + for (i = 0; i < NUM_PHY_STATS; i++, bitmap_iterator_inc(&it)) + if (bitmap_iterator_test(&it)) + data[index++] = ((unsigned long *)&priv->phy_stats)[i]; + for (i = 0; i < priv->tx_ring_num[TX]; i++) { data[index++] = priv->tx_ring[TX][i]->packets; data[index++] = priv->tx_ring[TX][i]->bytes; @@ -490,6 +498,12 @@ static void mlx4_en_get_strings(struct net_device *dev, strcpy(data + (index++) * ETH_GSTRING_LEN, main_strings[strings]); + for (i = 0; i < NUM_PHY_STATS; i++, strings++, + bitmap_iterator_inc(&it)) + if (bitmap_iterator_test(&it)) + strcpy(data + (index++) * ETH_GSTRING_LEN, + main_strings[strings]); + for (i = 0; i < priv->tx_ring_num[TX]; i++) { sprintf(data + (index++) * ETH_GSTRING_LEN, "tx%d_packets", i); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 8fc51bc29003..e0adac4a9a19 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -3256,6 +3256,10 @@ void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev, bitmap_set(stats_bitmap->bitmap, last_i, NUM_XDP_STATS); last_i += NUM_XDP_STATS; + + if (!mlx4_is_slave(dev)) + bitmap_set(stats_bitmap->bitmap, last_i, NUM_PHY_STATS); + last_i += NUM_PHY_STATS; } int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, @@ -3630,10 +3634,6 @@ int mlx4_en_reset_config(struct net_device *dev, mlx4_en_stop_port(dev, 1); } - en_warn(priv, "Changing device configuration rx filter(%x) rx vlan(%x)\n", - ts_config.rx_filter, - !!(features & NETIF_F_HW_VLAN_CTAG_RX)); - mlx4_en_safe_replace_resources(priv, tmp); if (DEV_FEATURE_CHANGED(dev, features, NETIF_F_HW_VLAN_CTAG_RX)) { diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c index 1fa4849a6f56..0158b88bea5b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_port.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c @@ -275,19 +275,31 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) priv->port_stats.xmit_more += READ_ONCE(ring->xmit_more); } - if (mlx4_is_master(mdev->dev)) { - stats->rx_packets = en_stats_adder(&mlx4_en_stats->RTOT_prio_0, - &mlx4_en_stats->RTOT_prio_1, - NUM_PRIORITIES); - stats->tx_packets = en_stats_adder(&mlx4_en_stats->TTOT_prio_0, - &mlx4_en_stats->TTOT_prio_1, - NUM_PRIORITIES); - stats->rx_bytes = en_stats_adder(&mlx4_en_stats->ROCT_prio_0, - &mlx4_en_stats->ROCT_prio_1, - NUM_PRIORITIES); - stats->tx_bytes = en_stats_adder(&mlx4_en_stats->TOCT_prio_0, - &mlx4_en_stats->TOCT_prio_1, - NUM_PRIORITIES); + if (!mlx4_is_slave(mdev->dev)) { + struct mlx4_en_phy_stats *p_stats = &priv->phy_stats; + + p_stats->rx_packets_phy = + en_stats_adder(&mlx4_en_stats->RTOT_prio_0, + &mlx4_en_stats->RTOT_prio_1, + NUM_PRIORITIES); + p_stats->tx_packets_phy = + en_stats_adder(&mlx4_en_stats->TTOT_prio_0, + &mlx4_en_stats->TTOT_prio_1, + NUM_PRIORITIES); + p_stats->rx_bytes_phy = + en_stats_adder(&mlx4_en_stats->ROCT_prio_0, + &mlx4_en_stats->ROCT_prio_1, + NUM_PRIORITIES); + p_stats->tx_bytes_phy = + en_stats_adder(&mlx4_en_stats->TOCT_prio_0, + &mlx4_en_stats->TOCT_prio_1, + NUM_PRIORITIES); + if (mlx4_is_master(mdev->dev)) { + stats->rx_packets = p_stats->rx_packets_phy; + stats->tx_packets = p_stats->tx_packets_phy; + stats->rx_bytes = p_stats->rx_bytes_phy; + stats->tx_bytes = p_stats->tx_bytes_phy; + } } /* net device stats */ diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index b4d144e67514..c2c6bd7578fd 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -649,6 +649,12 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, return get_fixed_ipv4_csum(hw_checksum, skb, hdr); } +#if IS_ENABLED(CONFIG_IPV6) +#define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPV6) +#else +#define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4) +#endif + int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget) { struct mlx4_en_priv *priv = netdev_priv(dev); @@ -662,12 +668,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud int polled = 0; int index; - if (unlikely(!priv->port_up)) + if (unlikely(!priv->port_up || budget <= 0)) return 0; - if (unlikely(budget <= 0)) - return polled; - ring = priv->rx_ring[cq_ring]; /* Protect accesses to: ring->xdp_prog, priv->mac_hash list */ @@ -838,12 +841,7 @@ xdp_drop_no_cnt: ring->csum_ok++; } else { if (!(priv->flags & MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP && - (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 | -#if IS_ENABLED(CONFIG_IPV6) - MLX4_CQE_STATUS_IPV6)))) -#else - 0)))) -#endif + (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IP_ANY)))) goto csum_none; if (check_csum(cqe, skb, va, dev->features)) goto csum_none; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index f470ae37d937..f7c81133594f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -608,6 +608,7 @@ struct mlx4_en_priv { struct mlx4_en_flow_stats_tx tx_flowstats; struct mlx4_en_port_stats port_stats; struct mlx4_en_xdp_stats xdp_stats; + struct mlx4_en_phy_stats phy_stats; struct mlx4_en_stats_bitmap stats_bitmap; struct list_head mc_list; struct list_head curr_list; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h index aab28eb27a30..86b6051da8ec 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h @@ -63,6 +63,14 @@ struct mlx4_en_xdp_stats { #define NUM_XDP_STATS 3 }; +struct mlx4_en_phy_stats { + unsigned long rx_packets_phy; + unsigned long rx_bytes_phy; + unsigned long tx_packets_phy; + unsigned long tx_bytes_phy; +#define NUM_PHY_STATS 4 +}; + #define NUM_MAIN_STATS 21 #define MLX4_NUM_PRIORITIES 8 @@ -116,7 +124,7 @@ enum { #define NUM_ALL_STATS (NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PKT_STATS + \ NUM_FLOW_STATS + NUM_PERF_STATS + NUM_PF_STATS + \ - NUM_XDP_STATS) + NUM_XDP_STATS + NUM_PHY_STATS) #define MLX4_FIND_NETDEV_STAT(n) (offsetof(struct net_device_stats, n) / \ sizeof(((struct net_device_stats *)0)->n)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 17b723218b0c..b994b80d5714 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -337,6 +337,14 @@ void mlx5_unregister_interface(struct mlx5_interface *intf) } EXPORT_SYMBOL(mlx5_unregister_interface); +void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol) +{ + mutex_lock(&mlx5_intf_mutex); + mlx5_remove_dev_by_protocol(mdev, protocol); + mlx5_add_dev_by_protocol(mdev, protocol); + mutex_unlock(&mlx5_intf_mutex); +} + void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol) { struct mlx5_priv *priv = &mdev->priv; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 363d8dcb7f17..ea4b255380a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1156,6 +1156,15 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep) kfree(ppriv); /* mlx5e_rep_priv */ } +static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_rep_priv *rpriv; + + rpriv = mlx5e_rep_to_rep_priv(rep); + + return rpriv->netdev; +} + static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; @@ -1168,6 +1177,7 @@ static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv) rep_if.load = mlx5e_vport_rep_load; rep_if.unload = mlx5e_vport_rep_unload; + rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev; mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_ETH); } } @@ -1195,6 +1205,7 @@ void mlx5e_register_vport_reps(struct mlx5e_priv *priv) rep_if.load = mlx5e_nic_rep_load; rep_if.unload = mlx5e_nic_rep_unload; + rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev; rep_if.priv = rpriv; INIT_LIST_HEAD(&rpriv->vport_sqs_list); mlx5_eswitch_register_vport_rep(esw, 0, &rep_if, REP_ETH); /* UPLINK PF vport*/ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index c2b1d7d351fc..77b7272eaaa8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1619,10 +1619,14 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode); esw->mode = mode; - if (mode == SRIOV_LEGACY) + if (mode == SRIOV_LEGACY) { err = esw_create_legacy_fdb_table(esw, nvfs + 1); - else + } else { + mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + err = esw_offloads_init(esw, nvfs + 1); + } + if (err) goto abort; @@ -1644,12 +1648,17 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) abort: esw->mode = SRIOV_NONE; + + if (mode == SRIOV_OFFLOADS) + mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); + return err; } void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) { struct esw_mc_addr *mc_promisc; + int old_mode; int nvports; int i; @@ -1675,7 +1684,11 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) else if (esw->mode == SRIOV_OFFLOADS) esw_offloads_cleanup(esw, nvports); + old_mode = esw->mode; esw->mode = SRIOV_NONE; + + if (old_mode == SRIOV_OFFLOADS) + mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); } int mlx5_eswitch_init(struct mlx5_core_dev *dev) @@ -2175,3 +2188,9 @@ free_out: kvfree(out); return err; } + +u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw) +{ + return esw->mode; +} +EXPORT_SYMBOL_GPL(mlx5_eswitch_mode); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 2fa037066b2f..98d2177d0806 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -37,19 +37,9 @@ #include <linux/if_link.h> #include <net/devlink.h> #include <linux/mlx5/device.h> +#include <linux/mlx5/eswitch.h> #include "lib/mpfs.h" -enum { - SRIOV_NONE, - SRIOV_LEGACY, - SRIOV_OFFLOADS -}; - -enum { - REP_ETH, - NUM_REP_TYPES, -}; - #ifdef CONFIG_MLX5_ESWITCH #define MLX5_MAX_UC_PER_VPORT(dev) \ @@ -139,29 +129,13 @@ struct mlx5_eswitch_fdb { struct mlx5_flow_table *fdb; struct mlx5_flow_group *send_to_vport_grp; struct mlx5_flow_group *miss_grp; - struct mlx5_flow_handle *miss_rule; + struct mlx5_flow_handle *miss_rule_uni; + struct mlx5_flow_handle *miss_rule_multi; int vlan_push_pop_refcount; } offloads; }; }; -struct mlx5_eswitch_rep; -struct mlx5_eswitch_rep_if { - int (*load)(struct mlx5_core_dev *dev, - struct mlx5_eswitch_rep *rep); - void (*unload)(struct mlx5_eswitch_rep *rep); - void *priv; - bool valid; -}; - -struct mlx5_eswitch_rep { - struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES]; - u16 vport; - u8 hw_id[ETH_ALEN]; - u16 vlan; - u32 vlan_refcount; -}; - struct mlx5_esw_offload { struct mlx5_flow_table *ft_offloads; struct mlx5_flow_group *vport_rx_group; @@ -231,9 +205,6 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, int vport, struct ifla_vf_stats *vf_stats); -struct mlx5_flow_handle * -mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, - u32 sqn); void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule); struct mlx5_flow_spec; @@ -278,13 +249,6 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode); int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode); int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap); int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap); -void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, - int vport_index, - struct mlx5_eswitch_rep_if *rep_if, - u8 rep_type); -void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, - int vport_index, - u8 rep_type); void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type); int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 99f583a15cc3..0a8303c1b52f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -338,6 +338,7 @@ out: kvfree(spec); return flow_rule; } +EXPORT_SYMBOL(mlx5_eswitch_add_send_to_vport_rule); void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule) { @@ -350,7 +351,11 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) struct mlx5_flow_destination dest = {}; struct mlx5_flow_handle *flow_rule = NULL; struct mlx5_flow_spec *spec; + void *headers_c; + void *headers_v; int err = 0; + u8 *dmac_c; + u8 *dmac_v; spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) { @@ -358,6 +363,13 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) goto out; } + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, + outer_headers.dmac_47_16); + dmac_c[0] = 0x01; + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest.vport_num = 0; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; @@ -366,11 +378,28 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) &flow_act, &dest, 1); if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); - esw_warn(esw->dev, "FDB: Failed to add miss flow rule err %d\n", err); + esw_warn(esw->dev, "FDB: Failed to add unicast miss flow rule err %d\n", err); goto out; } - esw->fdb_table.offloads.miss_rule = flow_rule; + esw->fdb_table.offloads.miss_rule_uni = flow_rule; + + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); + dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, + outer_headers.dmac_47_16); + dmac_v[0] = 0x01; + flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec, + &flow_act, &dest, 1); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + esw_warn(esw->dev, "FDB: Failed to add multicast miss flow rule err %d\n", err); + mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni); + goto out; + } + + esw->fdb_table.offloads.miss_rule_multi = flow_rule; + out: kvfree(spec); return err; @@ -426,6 +455,7 @@ static void esw_destroy_offloads_fast_fdb_table(struct mlx5_eswitch *esw) } #define MAX_PF_SQ 256 +#define MAX_SQ_NVPORTS 32 static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) { @@ -438,6 +468,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) struct mlx5_flow_group *g; void *match_criteria; u32 *flow_group_in; + u8 *dmac; esw_debug(esw->dev, "Create offloads FDB Tables\n"); flow_group_in = kvzalloc(inlen, GFP_KERNEL); @@ -455,7 +486,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) if (err) goto fast_fdb_err; - table_size = nvports + MAX_PF_SQ + 1; + table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2; ft_attr.max_fte = table_size; ft_attr.prio = FDB_SLOW_PATH; @@ -478,7 +509,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn); MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port); - ix = nvports + MAX_PF_SQ; + ix = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ; MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1); @@ -492,10 +523,16 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) /* create miss group */ memset(flow_group_in, 0, inlen); - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, 0); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers.dmac_47_16); + dmac[0] = 0x01; MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 2); g = mlx5_create_flow_group(fdb, flow_group_in); if (IS_ERR(g)) { @@ -531,7 +568,8 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) return; esw_debug(esw->dev, "Destroy offloads FDB Tables\n"); - mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule); + mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi); + mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni); mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); @@ -789,14 +827,9 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) { int err; - /* disable PF RoCE so missed packets don't go through RoCE steering */ - mlx5_dev_list_lock(); - mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - mlx5_dev_list_unlock(); - err = esw_create_offloads_fdb_tables(esw, nvports); if (err) - goto create_fdb_err; + return err; err = esw_create_offloads_table(esw); if (err) @@ -821,12 +854,6 @@ create_fg_err: create_ft_err: esw_destroy_offloads_fdb_tables(esw); -create_fdb_err: - /* enable back PF RoCE */ - mlx5_dev_list_lock(); - mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - mlx5_dev_list_unlock(); - return err; } @@ -844,9 +871,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw) } /* enable back PF RoCE */ - mlx5_dev_list_lock(); - mlx5_add_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - mlx5_dev_list_unlock(); + mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); return err; } @@ -1160,10 +1185,12 @@ void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, rep_if->load = __rep_if->load; rep_if->unload = __rep_if->unload; + rep_if->get_proto_dev = __rep_if->get_proto_dev; rep_if->priv = __rep_if->priv; rep_if->valid = true; } +EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep); void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, int vport_index, u8 rep_type) @@ -1178,6 +1205,7 @@ void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, rep->rep_if[rep_type].valid = false; } +EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep); void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type) { @@ -1188,3 +1216,35 @@ void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type) rep = &offloads->vport_reps[UPLINK_REP_INDEX]; return rep->rep_if[rep_type].priv; } + +void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, + int vport, + u8 rep_type) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + struct mlx5_eswitch_rep *rep; + + if (vport == FDB_UPLINK_VPORT) + vport = UPLINK_REP_INDEX; + + rep = &offloads->vport_reps[vport]; + + if (rep->rep_if[rep_type].valid && + rep->rep_if[rep_type].get_proto_dev) + return rep->rep_if[rep_type].get_proto_dev(rep); + return NULL; +} +EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev); + +void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type) +{ + return mlx5_eswitch_get_proto_dev(esw, UPLINK_REP_INDEX, rep_type); +} +EXPORT_SYMBOL(mlx5_eswitch_uplink_get_proto_dev); + +struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw, + int vport) +{ + return &esw->offloads.vport_reps[vport]; +} +EXPORT_SYMBOL(mlx5_eswitch_vport_rep); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 23e17ac0cba5..4e25f2b2e0bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -43,12 +43,6 @@ #define DRIVER_NAME "mlx5_core" #define DRIVER_VERSION "5.0-0" -#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs(mdev->pdev)) -#define MLX5_VPORT_MANAGER(mdev) \ - (MLX5_CAP_GEN(mdev, vport_group_manager) && \ - (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \ - mlx5_core_is_pf(mdev)) - extern uint mlx5_core_debug_mask; #define mlx5_core_dbg(__dev, format, ...) \ @@ -207,4 +201,5 @@ static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev) int mlx5_lag_allow(struct mlx5_core_dev *dev); int mlx5_lag_forbid(struct mlx5_core_dev *dev); +void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol); #endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index d56eea310509..93d97b4676eb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -78,6 +78,10 @@ config MLXSW_SPECTRUM depends on IPV6 || IPV6=n select PARMAN select MLXFW + depends on NET_IPGRE + depends on !(MLXSW_CORE=y && NET_IPGRE=m) + depends on IPV6_GRE + depends on !(MLXSW_CORE=y && IPV6_GRE=m) default m ---help--- This driver supports Mellanox Technologies Spectrum Ethernet diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index b698fb481b2e..ba338428ffd1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -1,6 +1,6 @@ /* * drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017, 2018 Mellanox Technologies. All rights reserved. * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com> * * Redistribution and use in source and binary forms, with or without @@ -838,7 +838,6 @@ struct mlxsw_afa_mirror { struct mlxsw_afa_resource resource; int span_id; u8 local_in_port; - u8 local_out_port; bool ingress; }; @@ -848,7 +847,7 @@ mlxsw_afa_mirror_destroy(struct mlxsw_afa_block *block, { block->afa->ops->mirror_del(block->afa->ops_priv, mirror->local_in_port, - mirror->local_out_port, + mirror->span_id, mirror->ingress); kfree(mirror); } @@ -864,9 +863,8 @@ mlxsw_afa_mirror_destructor(struct mlxsw_afa_block *block, } static struct mlxsw_afa_mirror * -mlxsw_afa_mirror_create(struct mlxsw_afa_block *block, - u8 local_in_port, u8 local_out_port, - bool ingress) +mlxsw_afa_mirror_create(struct mlxsw_afa_block *block, u8 local_in_port, + const struct net_device *out_dev, bool ingress) { struct mlxsw_afa_mirror *mirror; int err; @@ -876,13 +874,12 @@ mlxsw_afa_mirror_create(struct mlxsw_afa_block *block, return ERR_PTR(-ENOMEM); err = block->afa->ops->mirror_add(block->afa->ops_priv, - local_in_port, local_out_port, + local_in_port, out_dev, ingress, &mirror->span_id); if (err) goto err_mirror_add; mirror->ingress = ingress; - mirror->local_out_port = local_out_port; mirror->local_in_port = local_in_port; mirror->resource.destructor = mlxsw_afa_mirror_destructor; mlxsw_afa_resource_add(block, &mirror->resource); @@ -909,13 +906,13 @@ mlxsw_afa_block_append_allocated_mirror(struct mlxsw_afa_block *block, } int -mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block, - u8 local_in_port, u8 local_out_port, bool ingress) +mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block, u8 local_in_port, + const struct net_device *out_dev, bool ingress) { struct mlxsw_afa_mirror *mirror; int err; - mirror = mlxsw_afa_mirror_create(block, local_in_port, local_out_port, + mirror = mlxsw_afa_mirror_create(block, local_in_port, out_dev, ingress); if (IS_ERR(mirror)) return PTR_ERR(mirror); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h index 43132293475c..6dd601703c99 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h @@ -36,6 +36,7 @@ #define _MLXSW_CORE_ACL_FLEX_ACTIONS_H #include <linux/types.h> +#include <linux/netdevice.h> struct mlxsw_afa; struct mlxsw_afa_block; @@ -48,9 +49,10 @@ struct mlxsw_afa_ops { void (*kvdl_fwd_entry_del)(void *priv, u32 kvdl_index); int (*counter_index_get)(void *priv, unsigned int *p_counter_index); void (*counter_index_put)(void *priv, unsigned int counter_index); - int (*mirror_add)(void *priv, u8 locol_in_port, u8 local_out_port, + int (*mirror_add)(void *priv, u8 local_in_port, + const struct net_device *out_dev, bool ingress, int *p_span_id); - void (*mirror_del)(void *priv, u8 locol_in_port, u8 local_out_port, + void (*mirror_del)(void *priv, u8 local_in_port, int span_id, bool ingress); }; @@ -70,7 +72,8 @@ int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id); int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block, u16 trap_id); int mlxsw_afa_block_append_mirror(struct mlxsw_afa_block *block, - u8 local_in_port, u8 local_out_port, + u8 local_in_port, + const struct net_device *out_dev, bool ingress); int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block, u8 local_port, bool in_port); diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 0e08be41c8e0..cb5f77f09f8e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -1,11 +1,11 @@ /* * drivers/net/ethernet/mellanox/mlxsw/reg.h - * Copyright (c) 2015-2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved. * Copyright (c) 2015-2016 Ido Schimmel <idosch@mellanox.com> * Copyright (c) 2015 Elad Raz <eladr@mellanox.com> * Copyright (c) 2015-2017 Jiri Pirko <jiri@mellanox.com> * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com> - * Copyright (c) 2017 Petr Machata <petrm@mellanox.com> + * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -6772,8 +6772,104 @@ MLXSW_ITEM32(reg, mpat, qos, 0x04, 26, 1); */ MLXSW_ITEM32(reg, mpat, be, 0x04, 25, 1); +enum mlxsw_reg_mpat_span_type { + /* Local SPAN Ethernet. + * The original packet is not encapsulated. + */ + MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH = 0x0, + + /* Encapsulated Remote SPAN Ethernet L3 GRE. + * The packet is encapsulated with GRE header. + */ + MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3 = 0x3, +}; + +/* reg_mpat_span_type + * SPAN type. + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, span_type, 0x04, 0, 4); + +/* Remote SPAN - Ethernet VLAN + * - - - - - - - - - - - - - - + */ + +/* reg_mpat_eth_rspan_vid + * Encapsulation header VLAN ID. + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, eth_rspan_vid, 0x18, 0, 12); + +/* Encapsulated Remote SPAN - Ethernet L2 + * - - - - - - - - - - - - - - - - - - - + */ + +enum mlxsw_reg_mpat_eth_rspan_version { + MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER = 15, +}; + +/* reg_mpat_eth_rspan_version + * RSPAN mirror header version. + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, eth_rspan_version, 0x10, 18, 4); + +/* reg_mpat_eth_rspan_mac + * Destination MAC address. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, mpat, eth_rspan_mac, 0x12, 6); + +/* reg_mpat_eth_rspan_tp + * Tag Packet. Indicates whether the mirroring header should be VLAN tagged. + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, eth_rspan_tp, 0x18, 16, 1); + +/* Encapsulated Remote SPAN - Ethernet L3 + * - - - - - - - - - - - - - - - - - - - + */ + +enum mlxsw_reg_mpat_eth_rspan_protocol { + MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV4, + MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV6, +}; + +/* reg_mpat_eth_rspan_protocol + * SPAN encapsulation protocol. + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, eth_rspan_protocol, 0x18, 24, 4); + +/* reg_mpat_eth_rspan_ttl + * Encapsulation header Time-to-Live/HopLimit. + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, eth_rspan_ttl, 0x1C, 4, 8); + +/* reg_mpat_eth_rspan_smac + * Source MAC address + * Access: RW + */ +MLXSW_ITEM_BUF(reg, mpat, eth_rspan_smac, 0x22, 6); + +/* reg_mpat_eth_rspan_dip* + * Destination IP address. The IP version is configured by protocol. + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, eth_rspan_dip4, 0x4C, 0, 32); +MLXSW_ITEM_BUF(reg, mpat, eth_rspan_dip6, 0x40, 16); + +/* reg_mpat_eth_rspan_sip* + * Source IP address. The IP version is configured by protocol. + * Access: RW + */ +MLXSW_ITEM32(reg, mpat, eth_rspan_sip4, 0x5C, 0, 32); +MLXSW_ITEM_BUF(reg, mpat, eth_rspan_sip6, 0x50, 16); + static inline void mlxsw_reg_mpat_pack(char *payload, u8 pa_id, - u16 system_port, bool e) + u16 system_port, bool e, + enum mlxsw_reg_mpat_span_type span_type) { MLXSW_REG_ZERO(mpat, payload); mlxsw_reg_mpat_pa_id_set(payload, pa_id); @@ -6781,6 +6877,49 @@ static inline void mlxsw_reg_mpat_pack(char *payload, u8 pa_id, mlxsw_reg_mpat_e_set(payload, e); mlxsw_reg_mpat_qos_set(payload, 1); mlxsw_reg_mpat_be_set(payload, 1); + mlxsw_reg_mpat_span_type_set(payload, span_type); +} + +static inline void mlxsw_reg_mpat_eth_rspan_pack(char *payload, u16 vid) +{ + mlxsw_reg_mpat_eth_rspan_vid_set(payload, vid); +} + +static inline void +mlxsw_reg_mpat_eth_rspan_l2_pack(char *payload, + enum mlxsw_reg_mpat_eth_rspan_version version, + const char *mac, + bool tp) +{ + mlxsw_reg_mpat_eth_rspan_version_set(payload, version); + mlxsw_reg_mpat_eth_rspan_mac_memcpy_to(payload, mac); + mlxsw_reg_mpat_eth_rspan_tp_set(payload, tp); +} + +static inline void +mlxsw_reg_mpat_eth_rspan_l3_ipv4_pack(char *payload, u8 ttl, + const char *smac, + u32 sip, u32 dip) +{ + mlxsw_reg_mpat_eth_rspan_ttl_set(payload, ttl); + mlxsw_reg_mpat_eth_rspan_smac_memcpy_to(payload, smac); + mlxsw_reg_mpat_eth_rspan_protocol_set(payload, + MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV4); + mlxsw_reg_mpat_eth_rspan_sip4_set(payload, sip); + mlxsw_reg_mpat_eth_rspan_dip4_set(payload, dip); +} + +static inline void +mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(char *payload, u8 ttl, + const char *smac, + struct in6_addr sip, struct in6_addr dip) +{ + mlxsw_reg_mpat_eth_rspan_ttl_set(payload, ttl); + mlxsw_reg_mpat_eth_rspan_smac_memcpy_to(payload, smac); + mlxsw_reg_mpat_eth_rspan_protocol_set(payload, + MLXSW_REG_MPAT_ETH_RSPAN_PROTOCOL_IPV6); + mlxsw_reg_mpat_eth_rspan_sip6_memcpy_to(payload, (void *)&sip); + mlxsw_reg_mpat_eth_rspan_dip6_memcpy_to(payload, (void *)&dip); } /* MPAR - Monitoring Port Analyzer Register diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index bfde93910f82..7c6204f701ae 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1,6 +1,6 @@ /* * drivers/net/ethernet/mellanox/mlxsw/spectrum.c - * Copyright (c) 2015-2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved. * Copyright (c) 2015-2017 Jiri Pirko <jiri@mellanox.com> * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com> * Copyright (c) 2015 Elad Raz <eladr@mellanox.com> @@ -1040,6 +1040,16 @@ mlxsw_sp_port_get_hw_xstats(struct net_device *dev, xstats->tail_drop[i] = mlxsw_reg_ppcnt_tc_no_buffer_discard_uc_get(ppcnt_pl); } + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_PRIO_CNT, + i, ppcnt_pl); + if (err) + continue; + + xstats->tx_packets[i] = mlxsw_reg_ppcnt_tx_frames_get(ppcnt_pl); + xstats->tx_bytes[i] = mlxsw_reg_ppcnt_tx_octets_get(ppcnt_pl); + } } static void update_stats_cache(struct work_struct *work) @@ -1258,7 +1268,6 @@ mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress) { enum mlxsw_sp_span_type span_type; - struct mlxsw_sp_port *to_port; struct net_device *to_dev; to_dev = tcf_mirred_dev(a); @@ -1267,17 +1276,10 @@ mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port, return -EINVAL; } - if (!mlxsw_sp_port_dev_check(to_dev)) { - netdev_err(mlxsw_sp_port->dev, "Cannot mirror to a non-spectrum port"); - return -EOPNOTSUPP; - } - to_port = netdev_priv(to_dev); - - mirror->to_local_port = to_port->local_port; mirror->ingress = ingress; span_type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS; - return mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_port, span_type, - true); + return mlxsw_sp_span_mirror_add(mlxsw_sp_port, to_dev, span_type, + true, &mirror->span_id); } static void @@ -1288,7 +1290,7 @@ mlxsw_sp_port_del_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port, span_type = mirror->ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS; - mlxsw_sp_span_mirror_del(mlxsw_sp_port, mirror->to_local_port, + mlxsw_sp_span_mirror_del(mlxsw_sp_port, mirror->span_id, span_type, true); } @@ -3675,14 +3677,24 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_afa_init; } + err = mlxsw_sp_span_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n"); + goto err_span_init; + } + + /* Initialize router after SPAN is initialized, so that the FIB and + * neighbor event handlers can issue SPAN respin. + */ err = mlxsw_sp_router_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n"); goto err_router_init; } - /* Initialize netdevice notifier after router is initialized, so that - * the event handler can use router structures. + /* Initialize netdevice notifier after router and SPAN is initialized, + * so that the event handler can use router structures and call SPAN + * respin. */ mlxsw_sp->netdevice_nb.notifier_call = mlxsw_sp_netdevice_event; err = register_netdevice_notifier(&mlxsw_sp->netdevice_nb); @@ -3691,12 +3703,6 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_netdev_notifier; } - err = mlxsw_sp_span_init(mlxsw_sp); - if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n"); - goto err_span_init; - } - err = mlxsw_sp_acl_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize ACL\n"); @@ -3722,12 +3728,12 @@ err_ports_create: err_dpipe_init: mlxsw_sp_acl_fini(mlxsw_sp); err_acl_init: - mlxsw_sp_span_fini(mlxsw_sp); -err_span_init: unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb); err_netdev_notifier: mlxsw_sp_router_fini(mlxsw_sp); err_router_init: + mlxsw_sp_span_fini(mlxsw_sp); +err_span_init: mlxsw_sp_afa_fini(mlxsw_sp); err_afa_init: mlxsw_sp_counter_pool_fini(mlxsw_sp); @@ -3753,9 +3759,9 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_ports_remove(mlxsw_sp); mlxsw_sp_dpipe_fini(mlxsw_sp); mlxsw_sp_acl_fini(mlxsw_sp); - mlxsw_sp_span_fini(mlxsw_sp); unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb); mlxsw_sp_router_fini(mlxsw_sp); + mlxsw_sp_span_fini(mlxsw_sp); mlxsw_sp_afa_fini(mlxsw_sp); mlxsw_sp_counter_pool_fini(mlxsw_sp); mlxsw_sp_switchdev_fini(mlxsw_sp); @@ -4639,10 +4645,18 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct mlxsw_sp_span_entry *span_entry; struct mlxsw_sp *mlxsw_sp; int err = 0; mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb); + if (event == NETDEV_UNREGISTER) { + span_entry = mlxsw_sp_span_entry_find_by_port(mlxsw_sp, dev); + if (span_entry) + mlxsw_sp_span_entry_invalidate(mlxsw_sp, span_entry); + } + mlxsw_sp_span_respin(mlxsw_sp); + if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev)) err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev, event, ptr); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 675e03a892ed..d5e711d8ad71 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -124,7 +124,7 @@ enum mlxsw_sp_port_mall_action_type { }; struct mlxsw_sp_port_mall_mirror_tc_entry { - u8 to_local_port; + int span_id; bool ingress; }; @@ -210,6 +210,8 @@ struct mlxsw_sp_port_xstats { u64 wred_drop[TC_MAX_QUEUE]; u64 tail_drop[TC_MAX_QUEUE]; u64 backlog[TC_MAX_QUEUE]; + u64 tx_bytes[IEEE_8021QAZ_MAX_TCS]; + u64 tx_packets[IEEE_8021QAZ_MAX_TCS]; }; struct mlxsw_sp_port { @@ -247,6 +249,7 @@ struct mlxsw_sp_port { struct mlxsw_sp_port_sample *sample; struct list_head vlans_list; struct mlxsw_sp_qdisc *root_qdisc; + struct mlxsw_sp_qdisc *tclass_qdiscs; unsigned acl_rule_count; struct mlxsw_sp_acl_block *ing_acl_block; struct mlxsw_sp_acl_block *eg_acl_block; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 0897a5435cc2..21ed27ae51e3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -572,7 +572,6 @@ int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp, struct net_device *out_dev) { struct mlxsw_sp_acl_block_binding *binding; - struct mlxsw_sp_port *out_port; struct mlxsw_sp_port *in_port; if (!list_is_singular(&block->binding_list)) @@ -581,16 +580,10 @@ int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp, binding = list_first_entry(&block->binding_list, struct mlxsw_sp_acl_block_binding, list); in_port = binding->mlxsw_sp_port; - if (!mlxsw_sp_port_dev_check(out_dev)) - return -EINVAL; - - out_port = netdev_priv(out_dev); - if (out_port->mlxsw_sp != mlxsw_sp) - return -EINVAL; return mlxsw_afa_block_append_mirror(rulei->act_block, in_port->local_port, - out_port->local_port, + out_dev, binding->ingress); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c index f7e61cecc42b..510ce48d87f7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c @@ -1,6 +1,6 @@ /* * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017, 2018 Mellanox Technologies. All rights reserved. * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com> * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com> * @@ -126,40 +126,23 @@ mlxsw_sp_act_counter_index_put(void *priv, unsigned int counter_index) } static int -mlxsw_sp_act_mirror_add(void *priv, u8 local_in_port, u8 local_out_port, +mlxsw_sp_act_mirror_add(void *priv, u8 local_in_port, + const struct net_device *out_dev, bool ingress, int *p_span_id) { - struct mlxsw_sp_port *in_port, *out_port; - struct mlxsw_sp_span_entry *span_entry; + struct mlxsw_sp_port *in_port; struct mlxsw_sp *mlxsw_sp = priv; enum mlxsw_sp_span_type type; - int err; type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS; - out_port = mlxsw_sp->ports[local_out_port]; in_port = mlxsw_sp->ports[local_in_port]; - err = mlxsw_sp_span_mirror_add(in_port, out_port, type, false); - if (err) - return err; - - span_entry = mlxsw_sp_span_entry_find(mlxsw_sp, local_out_port); - if (!span_entry) { - err = -ENOENT; - goto err_span_entry_find; - } - - *p_span_id = span_entry->id; - return 0; - -err_span_entry_find: - mlxsw_sp_span_mirror_del(in_port, local_out_port, type, false); - return err; + return mlxsw_sp_span_mirror_add(in_port, out_dev, type, + false, p_span_id); } static void -mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, u8 local_out_port, - bool ingress) +mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, int span_id, bool ingress) { struct mlxsw_sp *mlxsw_sp = priv; struct mlxsw_sp_port *in_port; @@ -168,7 +151,7 @@ mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, u8 local_out_port, type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS; in_port = mlxsw_sp->ports[local_in_port]; - mlxsw_sp_span_mirror_del(in_port, local_out_port, type, false); + mlxsw_sp_span_mirror_del(in_port, span_id, type, false); } static const struct mlxsw_afa_ops mlxsw_sp_act_afa_ops = { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index a1c4b1e63f8d..98d896c14b87 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -1,7 +1,7 @@ /* * drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * Copyright (c) 2017 Petr Machata <petrm@mellanox.com> + * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,6 +33,7 @@ */ #include <net/ip_tunnels.h> +#include <net/ip6_tunnel.h> #include "spectrum_ipip.h" @@ -44,6 +45,14 @@ mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev) return tun->parms; } +struct __ip6_tnl_parm +mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev) +{ + struct ip6_tnl *tun = netdev_priv(ol_dev); + + return tun->parms; +} + static bool mlxsw_sp_ipip_parms4_has_ikey(struct ip_tunnel_parm parms) { return !!(parms.i_flags & TUNNEL_KEY); @@ -73,23 +82,37 @@ mlxsw_sp_ipip_parms4_saddr(struct ip_tunnel_parm parms) } static union mlxsw_sp_l3addr +mlxsw_sp_ipip_parms6_saddr(struct __ip6_tnl_parm parms) +{ + return (union mlxsw_sp_l3addr) { .addr6 = parms.laddr }; +} + +static union mlxsw_sp_l3addr mlxsw_sp_ipip_parms4_daddr(struct ip_tunnel_parm parms) { return (union mlxsw_sp_l3addr) { .addr4 = parms.iph.daddr }; } +static union mlxsw_sp_l3addr +mlxsw_sp_ipip_parms6_daddr(struct __ip6_tnl_parm parms) +{ + return (union mlxsw_sp_l3addr) { .addr6 = parms.raddr }; +} + union mlxsw_sp_l3addr mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, const struct net_device *ol_dev) { struct ip_tunnel_parm parms4; + struct __ip6_tnl_parm parms6; switch (proto) { case MLXSW_SP_L3_PROTO_IPV4: parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev); return mlxsw_sp_ipip_parms4_saddr(parms4); case MLXSW_SP_L3_PROTO_IPV6: - break; + parms6 = mlxsw_sp_ipip_netdev_parms6(ol_dev); + return mlxsw_sp_ipip_parms6_saddr(parms6); } WARN_ON(1); @@ -109,19 +132,28 @@ mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto, const struct net_device *ol_dev) { struct ip_tunnel_parm parms4; + struct __ip6_tnl_parm parms6; switch (proto) { case MLXSW_SP_L3_PROTO_IPV4: parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev); return mlxsw_sp_ipip_parms4_daddr(parms4); case MLXSW_SP_L3_PROTO_IPV6: - break; + parms6 = mlxsw_sp_ipip_netdev_parms6(ol_dev); + return mlxsw_sp_ipip_parms6_daddr(parms6); } WARN_ON(1); return (union mlxsw_sp_l3addr) {0}; } +bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr) +{ + union mlxsw_sp_l3addr naddr = {0}; + + return !memcmp(&addr, &naddr, sizeof(naddr)); +} + static int mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index, struct mlxsw_sp_ipip_entry *ipip_entry) @@ -215,15 +247,14 @@ static bool mlxsw_sp_ipip_tunnel_complete(enum mlxsw_sp_l3proto proto, { union mlxsw_sp_l3addr saddr = mlxsw_sp_ipip_netdev_saddr(proto, ol_dev); union mlxsw_sp_l3addr daddr = mlxsw_sp_ipip_netdev_daddr(proto, ol_dev); - union mlxsw_sp_l3addr naddr = {0}; /* Tunnels with unset local or remote address are valid in Linux and * used for lightweight tunnels (LWT) and Non-Broadcast Multi-Access * (NBMA) tunnels. In principle these can be offloaded, but the driver * currently doesn't support this. So punt. */ - return memcmp(&saddr, &naddr, sizeof(naddr)) && - memcmp(&daddr, &naddr, sizeof(naddr)); + return !mlxsw_sp_l3addr_is_zero(saddr) && + !mlxsw_sp_l3addr_is_zero(daddr); } static bool mlxsw_sp_ipip_can_offload_gre4(const struct mlxsw_sp *mlxsw_sp, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h index a4ff5737eccc..6909d867bb59 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h @@ -1,7 +1,7 @@ /* * drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h - * Copyright (c) 2017 Mellanox Technologies. All rights reserved. - * Copyright (c) 2017 Petr Machata <petrm@mellanox.com> + * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017-2018 Petr Machata <petrm@mellanox.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -41,11 +41,15 @@ struct ip_tunnel_parm mlxsw_sp_ipip_netdev_parms4(const struct net_device *ol_dev); +struct __ip6_tnl_parm +mlxsw_sp_ipip_netdev_parms6(const struct net_device *ol_dev); union mlxsw_sp_l3addr mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, const struct net_device *ol_dev); +bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr); + enum mlxsw_sp_ipip_type { MLXSW_SP_IPIP_TYPE_GRE4, MLXSW_SP_IPIP_TYPE_MAX, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c index d27fa57ad3c3..059eb3214328 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c @@ -270,6 +270,8 @@ static int mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp, case MLXSW_SP_KVDL_PART_LARGE_CHUNKS: resource_id = MLXSW_SP_RESOURCE_KVD_LINEAR_LARGE_CHUNKS; break; + default: + return -EINVAL; } err = devlink_resource_size_get(devlink, resource_id, &resource_size); @@ -278,7 +280,7 @@ static int mlxsw_sp_kvdl_part_init(struct mlxsw_sp *mlxsw_sp, resource_size = info->end_index - info->start_index + 1; } - nr_entries = resource_size / info->alloc_size; + nr_entries = div_u64(resource_size, info->alloc_size); usage_size = BITS_TO_LONGS(nr_entries) * sizeof(unsigned long); part = kzalloc(sizeof(*part) + usage_size, GFP_KERNEL); if (!part) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c index d20b143de3b4..978a3c70653a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c @@ -126,8 +126,8 @@ mlxsw_sp_mr_route_ivif_in_evifs(const struct mlxsw_sp_mr_route *mr_route) switch (mr_route->mr_table->proto) { case MLXSW_SP_L3_PROTO_IPV4: - ivif = mr_route->mfc4->mfc_parent; - return mr_route->mfc4->mfc_un.res.ttls[ivif] != 255; + ivif = mr_route->mfc4->_c.mfc_parent; + return mr_route->mfc4->_c.mfc_un.res.ttls[ivif] != 255; case MLXSW_SP_L3_PROTO_IPV6: /* fall through */ default: @@ -364,7 +364,7 @@ mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table, mr_route->mfc4 = mfc; mr_route->mr_table = mr_table; for (i = 0; i < MAXVIFS; i++) { - if (mfc->mfc_un.res.ttls[i] != 255) { + if (mfc->_c.mfc_un.res.ttls[i] != 255) { err = mlxsw_sp_mr_route_evif_link(mr_route, &mr_table->vifs[i]); if (err) @@ -374,7 +374,8 @@ mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table, mr_route->min_mtu = mr_table->vifs[i].dev->mtu; } } - mlxsw_sp_mr_route_ivif_link(mr_route, &mr_table->vifs[mfc->mfc_parent]); + mlxsw_sp_mr_route_ivif_link(mr_route, + &mr_table->vifs[mfc->_c.mfc_parent]); mr_route->route_action = mlxsw_sp_mr_route_action(mr_route); return mr_route; @@ -418,9 +419,9 @@ static void mlxsw_sp_mr_mfc_offload_set(struct mlxsw_sp_mr_route *mr_route, switch (mr_route->mr_table->proto) { case MLXSW_SP_L3_PROTO_IPV4: if (offload) - mr_route->mfc4->mfc_flags |= MFC_OFFLOAD; + mr_route->mfc4->_c.mfc_flags |= MFC_OFFLOAD; else - mr_route->mfc4->mfc_flags &= ~MFC_OFFLOAD; + mr_route->mfc4->_c.mfc_flags &= ~MFC_OFFLOAD; break; case MLXSW_SP_L3_PROTO_IPV6: /* fall through */ @@ -943,10 +944,10 @@ static void mlxsw_sp_mr_route_stats_update(struct mlxsw_sp *mlxsw_sp, switch (mr_route->mr_table->proto) { case MLXSW_SP_L3_PROTO_IPV4: - if (mr_route->mfc4->mfc_un.res.pkt != packets) - mr_route->mfc4->mfc_un.res.lastuse = jiffies; - mr_route->mfc4->mfc_un.res.pkt = packets; - mr_route->mfc4->mfc_un.res.bytes = bytes; + if (mr_route->mfc4->_c.mfc_un.res.pkt != packets) + mr_route->mfc4->_c.mfc_un.res.lastuse = jiffies; + mr_route->mfc4->_c.mfc_un.res.pkt = packets; + mr_route->mfc4->_c.mfc_un.res.bytes = bytes; break; case MLXSW_SP_L3_PROTO_IPV6: /* fall through */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index 0b7670459051..91262b0573e3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -42,6 +42,8 @@ #include "reg.h" #define MLXSW_SP_PRIO_BAND_TO_TCLASS(band) (IEEE_8021QAZ_MAX_TCS - band - 1) +#define MLXSW_SP_PRIO_CHILD_TO_TCLASS(child) \ + MLXSW_SP_PRIO_BAND_TO_TCLASS((child - 1)) enum mlxsw_sp_qdisc_type { MLXSW_SP_QDISC_NO_QDISC, @@ -76,6 +78,7 @@ struct mlxsw_sp_qdisc_ops { struct mlxsw_sp_qdisc { u32 handle; u8 tclass_num; + u8 prio_bitmap; union { struct red_stats red; } xstats_base; @@ -99,6 +102,44 @@ mlxsw_sp_qdisc_compare(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u32 handle, mlxsw_sp_qdisc->handle == handle; } +static struct mlxsw_sp_qdisc * +mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent, + bool root_only) +{ + int tclass, child_index; + + if (parent == TC_H_ROOT) + return mlxsw_sp_port->root_qdisc; + + if (root_only || !mlxsw_sp_port->root_qdisc || + !mlxsw_sp_port->root_qdisc->ops || + TC_H_MAJ(parent) != mlxsw_sp_port->root_qdisc->handle || + TC_H_MIN(parent) > IEEE_8021QAZ_MAX_TCS) + return NULL; + + child_index = TC_H_MIN(parent); + tclass = MLXSW_SP_PRIO_CHILD_TO_TCLASS(child_index); + return &mlxsw_sp_port->tclass_qdiscs[tclass]; +} + +static struct mlxsw_sp_qdisc * +mlxsw_sp_qdisc_find_by_handle(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle) +{ + int i; + + if (mlxsw_sp_port->root_qdisc->handle == handle) + return mlxsw_sp_port->root_qdisc; + + if (mlxsw_sp_port->root_qdisc->handle == TC_H_UNSPEC) + return NULL; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + if (mlxsw_sp_port->tclass_qdiscs[i].handle == handle) + return &mlxsw_sp_port->tclass_qdiscs[i]; + + return NULL; +} + static int mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) @@ -185,6 +226,23 @@ mlxsw_sp_qdisc_get_xstats(struct mlxsw_sp_port *mlxsw_sp_port, return -EOPNOTSUPP; } +static void +mlxsw_sp_qdisc_bstats_per_priority_get(struct mlxsw_sp_port_xstats *xstats, + u8 prio_bitmap, u64 *tx_packets, + u64 *tx_bytes) +{ + int i; + + *tx_packets = 0; + *tx_bytes = 0; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + if (prio_bitmap & BIT(i)) { + *tx_packets += xstats->tx_packets[i]; + *tx_bytes += xstats->tx_bytes[i]; + } + } +} + static int mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port, int tclass_num, u32 min, u32 max, @@ -230,17 +288,16 @@ mlxsw_sp_setup_tc_qdisc_red_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, u8 tclass_num = mlxsw_sp_qdisc->tclass_num; struct mlxsw_sp_qdisc_stats *stats_base; struct mlxsw_sp_port_xstats *xstats; - struct rtnl_link_stats64 *stats; struct red_stats *red_base; xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; - stats = &mlxsw_sp_port->periodic_hw_stats.stats; stats_base = &mlxsw_sp_qdisc->stats_base; red_base = &mlxsw_sp_qdisc->xstats_base.red; - stats_base->tx_packets = stats->tx_packets; - stats_base->tx_bytes = stats->tx_bytes; - + mlxsw_sp_qdisc_bstats_per_priority_get(xstats, + mlxsw_sp_qdisc->prio_bitmap, + &stats_base->tx_packets, + &stats_base->tx_bytes); red_base->prob_mark = xstats->ecn; red_base->prob_drop = xstats->wred_drop[tclass_num]; red_base->pdrop = xstats->tail_drop[tclass_num]; @@ -255,6 +312,12 @@ static int mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { + struct mlxsw_sp_qdisc *root_qdisc = mlxsw_sp_port->root_qdisc; + + if (root_qdisc != mlxsw_sp_qdisc) + root_qdisc->stats_base.backlog -= + mlxsw_sp_qdisc->stats_base.backlog; + return mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port, mlxsw_sp_qdisc->tclass_num); } @@ -319,6 +382,7 @@ mlxsw_sp_qdisc_red_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, mlxsw_sp_qdisc->stats_base.backlog); p->qstats->backlog -= backlog; + mlxsw_sp_qdisc->stats_base.backlog = 0; } static int @@ -357,14 +421,16 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port, u8 tclass_num = mlxsw_sp_qdisc->tclass_num; struct mlxsw_sp_qdisc_stats *stats_base; struct mlxsw_sp_port_xstats *xstats; - struct rtnl_link_stats64 *stats; xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; - stats = &mlxsw_sp_port->periodic_hw_stats.stats; stats_base = &mlxsw_sp_qdisc->stats_base; - tx_bytes = stats->tx_bytes - stats_base->tx_bytes; - tx_packets = stats->tx_packets - stats_base->tx_packets; + mlxsw_sp_qdisc_bstats_per_priority_get(xstats, + mlxsw_sp_qdisc->prio_bitmap, + &tx_packets, &tx_bytes); + tx_bytes = tx_bytes - stats_base->tx_bytes; + tx_packets = tx_packets - stats_base->tx_packets; + overlimits = xstats->wred_drop[tclass_num] + xstats->ecn - stats_base->overlimits; drops = xstats->wred_drop[tclass_num] + xstats->tail_drop[tclass_num] - @@ -406,11 +472,10 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, { struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; - if (p->parent != TC_H_ROOT) + mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false); + if (!mlxsw_sp_qdisc) return -EOPNOTSUPP; - mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc; - if (p->command == TC_RED_REPLACE) return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle, mlxsw_sp_qdisc, @@ -441,9 +506,13 @@ mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port, { int i; - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, MLXSW_SP_PORT_DEFAULT_TCLASS); + mlxsw_sp_qdisc_destroy(mlxsw_sp_port, + &mlxsw_sp_port->tclass_qdiscs[i]); + mlxsw_sp_port->tclass_qdiscs[i].prio_bitmap = 0; + } return 0; } @@ -467,16 +536,41 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, void *params) { struct tc_prio_qopt_offload_params *p = params; - int tclass, i; + struct mlxsw_sp_qdisc *child_qdisc; + int tclass, i, band, backlog; + u8 old_priomap; int err; - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->priomap[i]); - err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, tclass); - if (err) - return err; + for (band = 0; band < p->bands; band++) { + tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); + child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass]; + old_priomap = child_qdisc->prio_bitmap; + child_qdisc->prio_bitmap = 0; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + if (p->priomap[i] == band) { + child_qdisc->prio_bitmap |= BIT(i); + if (BIT(i) & old_priomap) + continue; + err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, + i, tclass); + if (err) + return err; + } + } + if (old_priomap != child_qdisc->prio_bitmap && + child_qdisc->ops && child_qdisc->ops->clean_stats) { + backlog = child_qdisc->stats_base.backlog; + child_qdisc->ops->clean_stats(mlxsw_sp_port, + child_qdisc); + child_qdisc->stats_base.backlog = backlog; + } + } + for (; band < IEEE_8021QAZ_MAX_TCS; band++) { + tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band); + child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass]; + child_qdisc->prio_bitmap = 0; + mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc); } - return 0; } @@ -513,6 +607,7 @@ mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port, for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { drops += xstats->tail_drop[i]; + drops += xstats->wred_drop[i]; backlog += xstats->backlog[i]; } drops = drops - stats_base->drops; @@ -548,8 +643,10 @@ mlxsw_sp_setup_tc_qdisc_prio_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, stats_base->tx_bytes = stats->tx_bytes; stats_base->drops = 0; - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { stats_base->drops += xstats->tail_drop[i]; + stats_base->drops += xstats->wred_drop[i]; + } mlxsw_sp_qdisc->stats_base.backlog = 0; } @@ -564,15 +661,48 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = { .clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats, }; +/* Grafting is not supported in mlxsw. It will result in un-offloading of the + * grafted qdisc as well as the qdisc in the qdisc new location. + * (However, if the graft is to the location where the qdisc is already at, it + * will be ignored completely and won't cause un-offloading). + */ +static int +mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct tc_prio_qopt_offload_graft_params *p) +{ + int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->band); + struct mlxsw_sp_qdisc *old_qdisc; + + /* Check if the grafted qdisc is already in its "new" location. If so - + * nothing needs to be done. + */ + if (p->band < IEEE_8021QAZ_MAX_TCS && + mlxsw_sp_port->tclass_qdiscs[tclass_num].handle == p->child_handle) + return 0; + + /* See if the grafted qdisc is already offloaded on any tclass. If so, + * unoffload it. + */ + old_qdisc = mlxsw_sp_qdisc_find_by_handle(mlxsw_sp_port, + p->child_handle); + if (old_qdisc) + mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc); + + mlxsw_sp_qdisc_destroy(mlxsw_sp_port, + &mlxsw_sp_port->tclass_qdiscs[tclass_num]); + return -EOPNOTSUPP; +} + int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_prio_qopt_offload *p) { struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; - if (p->parent != TC_H_ROOT) + mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, true); + if (!mlxsw_sp_qdisc) return -EOPNOTSUPP; - mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc; if (p->command == TC_PRIO_REPLACE) return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle, mlxsw_sp_qdisc, @@ -589,6 +719,9 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, case TC_PRIO_STATS: return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc, &p->stats); + case TC_PRIO_GRAFT: + return mlxsw_sp_qdisc_prio_graft(mlxsw_sp_port, mlxsw_sp_qdisc, + &p->graft_params); default: return -EOPNOTSUPP; } @@ -596,17 +729,36 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port) { - mlxsw_sp_port->root_qdisc = kzalloc(sizeof(*mlxsw_sp_port->root_qdisc), - GFP_KERNEL); - if (!mlxsw_sp_port->root_qdisc) - return -ENOMEM; + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; + int i; + mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc), GFP_KERNEL); + if (!mlxsw_sp_qdisc) + goto err_root_qdisc_init; + + mlxsw_sp_port->root_qdisc = mlxsw_sp_qdisc; + mlxsw_sp_port->root_qdisc->prio_bitmap = 0xff; mlxsw_sp_port->root_qdisc->tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS; + mlxsw_sp_qdisc = kzalloc(sizeof(*mlxsw_sp_qdisc) * IEEE_8021QAZ_MAX_TCS, + GFP_KERNEL); + if (!mlxsw_sp_qdisc) + goto err_tclass_qdiscs_init; + + mlxsw_sp_port->tclass_qdiscs = mlxsw_sp_qdisc; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + mlxsw_sp_port->tclass_qdiscs[i].tclass_num = i; + return 0; + +err_tclass_qdiscs_init: + kfree(mlxsw_sp_port->root_qdisc); +err_root_qdisc_init: + return -ENOMEM; } void mlxsw_sp_tc_qdisc_fini(struct mlxsw_sp_port *mlxsw_sp_port) { + kfree(mlxsw_sp_port->tclass_qdiscs); kfree(mlxsw_sp_port->root_qdisc); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 05146970c19c..69f16c605b9d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -70,6 +70,7 @@ #include "spectrum_mr.h" #include "spectrum_mr_tcam.h" #include "spectrum_router.h" +#include "spectrum_span.h" struct mlxsw_sp_fib; struct mlxsw_sp_vr; @@ -2330,6 +2331,8 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work) read_unlock_bh(&n->lock); rtnl_lock(); + mlxsw_sp_span_respin(mlxsw_sp); + entry_connected = nud_state & NUD_VALID && !dead; neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n); if (!entry_connected && !neigh_entry) @@ -5589,6 +5592,8 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) /* Protect internal structures from changes */ rtnl_lock(); + mlxsw_sp_span_respin(mlxsw_sp); + switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ case FIB_EVENT_ENTRY_APPEND: /* fall through */ @@ -5631,6 +5636,8 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) int err; rtnl_lock(); + mlxsw_sp_span_respin(mlxsw_sp); + switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ case FIB_EVENT_ENTRY_ADD: diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c index c3bec37d71ed..f537e1de11d9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -1,6 +1,7 @@ /* * drivers/net/ethernet/mellanox/mlxsw/mlxsw_span.c * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + * Copyright (c) 2018 Petr Machata <petrm@mellanox.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,9 +33,14 @@ */ #include <linux/list.h> +#include <net/arp.h> +#include <net/gre.h> +#include <net/ndisc.h> +#include <net/ip6_tunnel.h> #include "spectrum.h" #include "spectrum_span.h" +#include "spectrum_ipip.h" int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp) { @@ -51,8 +57,12 @@ int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp) if (!mlxsw_sp->span.entries) return -ENOMEM; - for (i = 0; i < mlxsw_sp->span.entries_count; i++) - INIT_LIST_HEAD(&mlxsw_sp->span.entries[i].bound_ports_list); + for (i = 0; i < mlxsw_sp->span.entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; + + INIT_LIST_HEAD(&curr->bound_ports_list); + curr->id = i; + } return 0; } @@ -69,80 +79,460 @@ void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp) kfree(mlxsw_sp->span.entries); } -static struct mlxsw_sp_span_entry * -mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port) +static int +mlxsw_sp_span_entry_phys_parms(const struct net_device *to_dev, + struct mlxsw_sp_span_parms *sparmsp) { - struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; - struct mlxsw_sp_span_entry *span_entry; + sparmsp->dest_port = netdev_priv(to_dev); + return 0; +} + +static int +mlxsw_sp_span_entry_phys_configure(struct mlxsw_sp_span_entry *span_entry, + struct mlxsw_sp_span_parms sparms) +{ + struct mlxsw_sp_port *dest_port = sparms.dest_port; + struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp; + u8 local_port = dest_port->local_port; + char mpat_pl[MLXSW_REG_MPAT_LEN]; + int pa_id = span_entry->id; + + /* Create a new port analayzer entry for local_port. */ + mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true, + MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl); +} + +static void +mlxsw_sp_span_entry_deconfigure_common(struct mlxsw_sp_span_entry *span_entry, + enum mlxsw_reg_mpat_span_type span_type) +{ + struct mlxsw_sp_port *dest_port = span_entry->parms.dest_port; + struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp; + u8 local_port = dest_port->local_port; + char mpat_pl[MLXSW_REG_MPAT_LEN]; + int pa_id = span_entry->id; + + mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false, span_type); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl); +} + +static void +mlxsw_sp_span_entry_phys_deconfigure(struct mlxsw_sp_span_entry *span_entry) +{ + mlxsw_sp_span_entry_deconfigure_common(span_entry, + MLXSW_REG_MPAT_SPAN_TYPE_LOCAL_ETH); +} + +static const +struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_phys = { + .can_handle = mlxsw_sp_port_dev_check, + .parms = mlxsw_sp_span_entry_phys_parms, + .configure = mlxsw_sp_span_entry_phys_configure, + .deconfigure = mlxsw_sp_span_entry_phys_deconfigure, +}; + +static struct net_device * +mlxsw_sp_span_gretap4_route(const struct net_device *to_dev, + __be32 *saddrp, __be32 *daddrp) +{ + struct ip_tunnel *tun = netdev_priv(to_dev); + struct net_device *dev = NULL; + struct ip_tunnel_parm parms; + struct rtable *rt = NULL; + struct flowi4 fl4; + + /* We assume "dev" stays valid after rt is put. */ + ASSERT_RTNL(); + + parms = mlxsw_sp_ipip_netdev_parms4(to_dev); + ip_tunnel_init_flow(&fl4, parms.iph.protocol, *daddrp, *saddrp, + 0, 0, parms.link, tun->fwmark); + + rt = ip_route_output_key(tun->net, &fl4); + if (IS_ERR(rt)) + return NULL; + + if (rt->rt_type != RTN_UNICAST) + goto out; + + dev = rt->dst.dev; + *saddrp = fl4.saddr; + *daddrp = rt->rt_gateway; + +out: + ip_rt_put(rt); + return dev; +} + +static int mlxsw_sp_span_dmac(struct neigh_table *tbl, + const void *pkey, + struct net_device *l3edev, + unsigned char dmac[ETH_ALEN]) +{ + struct neighbour *neigh = neigh_lookup(tbl, pkey, l3edev); + int err = 0; + + if (!neigh) { + neigh = neigh_create(tbl, pkey, l3edev); + if (IS_ERR(neigh)) + return PTR_ERR(neigh); + } + + neigh_event_send(neigh, NULL); + + read_lock_bh(&neigh->lock); + if ((neigh->nud_state & NUD_VALID) && !neigh->dead) + memcpy(dmac, neigh->ha, ETH_ALEN); + else + err = -ENOENT; + read_unlock_bh(&neigh->lock); + + neigh_release(neigh); + return err; +} + +static int +mlxsw_sp_span_entry_unoffloadable(struct mlxsw_sp_span_parms *sparmsp) +{ + sparmsp->dest_port = NULL; + return 0; +} + +static int +mlxsw_sp_span_entry_tunnel_parms_common(struct net_device *l3edev, + union mlxsw_sp_l3addr saddr, + union mlxsw_sp_l3addr daddr, + union mlxsw_sp_l3addr gw, + __u8 ttl, + struct neigh_table *tbl, + struct mlxsw_sp_span_parms *sparmsp) +{ + unsigned char dmac[ETH_ALEN]; + + if (mlxsw_sp_l3addr_is_zero(gw)) + gw = daddr; + + if (!l3edev || !mlxsw_sp_port_dev_check(l3edev) || + mlxsw_sp_span_dmac(tbl, &gw, l3edev, dmac)) + return mlxsw_sp_span_entry_unoffloadable(sparmsp); + + sparmsp->dest_port = netdev_priv(l3edev); + sparmsp->ttl = ttl; + memcpy(sparmsp->dmac, dmac, ETH_ALEN); + memcpy(sparmsp->smac, l3edev->dev_addr, ETH_ALEN); + sparmsp->saddr = saddr; + sparmsp->daddr = daddr; + return 0; +} + +static int +mlxsw_sp_span_entry_gretap4_parms(const struct net_device *to_dev, + struct mlxsw_sp_span_parms *sparmsp) +{ + struct ip_tunnel_parm tparm = mlxsw_sp_ipip_netdev_parms4(to_dev); + union mlxsw_sp_l3addr saddr = { .addr4 = tparm.iph.saddr }; + union mlxsw_sp_l3addr daddr = { .addr4 = tparm.iph.daddr }; + bool inherit_tos = tparm.iph.tos & 0x1; + bool inherit_ttl = !tparm.iph.ttl; + union mlxsw_sp_l3addr gw = daddr; + struct net_device *l3edev; + + if (!(to_dev->flags & IFF_UP) || + /* Reject tunnels with GRE keys, checksums, etc. */ + tparm.i_flags || tparm.o_flags || + /* Require a fixed TTL and a TOS copied from the mirrored packet. */ + inherit_ttl || !inherit_tos || + /* A destination address may not be "any". */ + mlxsw_sp_l3addr_is_zero(daddr)) + return mlxsw_sp_span_entry_unoffloadable(sparmsp); + + l3edev = mlxsw_sp_span_gretap4_route(to_dev, &saddr.addr4, &gw.addr4); + return mlxsw_sp_span_entry_tunnel_parms_common(l3edev, saddr, daddr, gw, + tparm.iph.ttl, + &arp_tbl, sparmsp); +} + +static int +mlxsw_sp_span_entry_gretap4_configure(struct mlxsw_sp_span_entry *span_entry, + struct mlxsw_sp_span_parms sparms) +{ + struct mlxsw_sp_port *dest_port = sparms.dest_port; + struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp; + u8 local_port = dest_port->local_port; + char mpat_pl[MLXSW_REG_MPAT_LEN]; + int pa_id = span_entry->id; + + /* Create a new port analayzer entry for local_port. */ + mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true, + MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3); + mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl, + MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER, + sparms.dmac, false); + mlxsw_reg_mpat_eth_rspan_l3_ipv4_pack(mpat_pl, + sparms.ttl, sparms.smac, + be32_to_cpu(sparms.saddr.addr4), + be32_to_cpu(sparms.daddr.addr4)); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl); +} + +static void +mlxsw_sp_span_entry_gretap4_deconfigure(struct mlxsw_sp_span_entry *span_entry) +{ + mlxsw_sp_span_entry_deconfigure_common(span_entry, + MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3); +} + +static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap4 = { + .can_handle = is_gretap_dev, + .parms = mlxsw_sp_span_entry_gretap4_parms, + .configure = mlxsw_sp_span_entry_gretap4_configure, + .deconfigure = mlxsw_sp_span_entry_gretap4_deconfigure, +}; + +static struct net_device * +mlxsw_sp_span_gretap6_route(const struct net_device *to_dev, + struct in6_addr *saddrp, + struct in6_addr *daddrp) +{ + struct ip6_tnl *t = netdev_priv(to_dev); + struct flowi6 fl6 = t->fl.u.ip6; + struct net_device *dev = NULL; + struct dst_entry *dst; + struct rt6_info *rt6; + + /* We assume "dev" stays valid after dst is released. */ + ASSERT_RTNL(); + + fl6.flowi6_mark = t->parms.fwmark; + if (!ip6_tnl_xmit_ctl(t, &fl6.saddr, &fl6.daddr)) + return NULL; + + dst = ip6_route_output(t->net, NULL, &fl6); + if (!dst || dst->error) + goto out; + + rt6 = container_of(dst, struct rt6_info, dst); + + dev = dst->dev; + *saddrp = fl6.saddr; + *daddrp = rt6->rt6i_gateway; + +out: + dst_release(dst); + return dev; +} + +static int +mlxsw_sp_span_entry_gretap6_parms(const struct net_device *to_dev, + struct mlxsw_sp_span_parms *sparmsp) +{ + struct __ip6_tnl_parm tparm = mlxsw_sp_ipip_netdev_parms6(to_dev); + bool inherit_tos = tparm.flags & IP6_TNL_F_USE_ORIG_TCLASS; + union mlxsw_sp_l3addr saddr = { .addr6 = tparm.laddr }; + union mlxsw_sp_l3addr daddr = { .addr6 = tparm.raddr }; + bool inherit_ttl = !tparm.hop_limit; + union mlxsw_sp_l3addr gw = daddr; + struct net_device *l3edev; + + if (!(to_dev->flags & IFF_UP) || + /* Reject tunnels with GRE keys, checksums, etc. */ + tparm.i_flags || tparm.o_flags || + /* Require a fixed TTL and a TOS copied from the mirrored packet. */ + inherit_ttl || !inherit_tos || + /* A destination address may not be "any". */ + mlxsw_sp_l3addr_is_zero(daddr)) + return mlxsw_sp_span_entry_unoffloadable(sparmsp); + + l3edev = mlxsw_sp_span_gretap6_route(to_dev, &saddr.addr6, &gw.addr6); + return mlxsw_sp_span_entry_tunnel_parms_common(l3edev, saddr, daddr, gw, + tparm.hop_limit, + &nd_tbl, sparmsp); +} + +static int +mlxsw_sp_span_entry_gretap6_configure(struct mlxsw_sp_span_entry *span_entry, + struct mlxsw_sp_span_parms sparms) +{ + struct mlxsw_sp_port *dest_port = sparms.dest_port; + struct mlxsw_sp *mlxsw_sp = dest_port->mlxsw_sp; + u8 local_port = dest_port->local_port; char mpat_pl[MLXSW_REG_MPAT_LEN]; - u8 local_port = port->local_port; - int index; + int pa_id = span_entry->id; + + /* Create a new port analayzer entry for local_port. */ + mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, true, + MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3); + mlxsw_reg_mpat_eth_rspan_l2_pack(mpat_pl, + MLXSW_REG_MPAT_ETH_RSPAN_VERSION_NO_HEADER, + sparms.dmac, false); + mlxsw_reg_mpat_eth_rspan_l3_ipv6_pack(mpat_pl, sparms.ttl, sparms.smac, + sparms.saddr.addr6, + sparms.daddr.addr6); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl); +} + +static void +mlxsw_sp_span_entry_gretap6_deconfigure(struct mlxsw_sp_span_entry *span_entry) +{ + mlxsw_sp_span_entry_deconfigure_common(span_entry, + MLXSW_REG_MPAT_SPAN_TYPE_REMOTE_ETH_L3); +} + +static const +struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap6 = { + .can_handle = is_ip6gretap_dev, + .parms = mlxsw_sp_span_entry_gretap6_parms, + .configure = mlxsw_sp_span_entry_gretap6_configure, + .deconfigure = mlxsw_sp_span_entry_gretap6_deconfigure, +}; + +static const +struct mlxsw_sp_span_entry_ops *const mlxsw_sp_span_entry_types[] = { + &mlxsw_sp_span_entry_ops_phys, + &mlxsw_sp_span_entry_ops_gretap4, + &mlxsw_sp_span_entry_ops_gretap6, +}; + +static int +mlxsw_sp_span_entry_nop_parms(const struct net_device *to_dev, + struct mlxsw_sp_span_parms *sparmsp) +{ + return mlxsw_sp_span_entry_unoffloadable(sparmsp); +} + +static int +mlxsw_sp_span_entry_nop_configure(struct mlxsw_sp_span_entry *span_entry, + struct mlxsw_sp_span_parms sparms) +{ + return 0; +} + +static void +mlxsw_sp_span_entry_nop_deconfigure(struct mlxsw_sp_span_entry *span_entry) +{ +} + +static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_nop = { + .parms = mlxsw_sp_span_entry_nop_parms, + .configure = mlxsw_sp_span_entry_nop_configure, + .deconfigure = mlxsw_sp_span_entry_nop_deconfigure, +}; + +static void +mlxsw_sp_span_entry_configure(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_span_entry *span_entry, + struct mlxsw_sp_span_parms sparms) +{ + if (sparms.dest_port) { + if (sparms.dest_port->mlxsw_sp != mlxsw_sp) { + netdev_err(span_entry->to_dev, "Cannot mirror to %s, which belongs to a different mlxsw instance", + sparms.dest_port->dev->name); + sparms.dest_port = NULL; + } else if (span_entry->ops->configure(span_entry, sparms)) { + netdev_err(span_entry->to_dev, "Failed to offload mirror to %s", + sparms.dest_port->dev->name); + sparms.dest_port = NULL; + } + } + + span_entry->parms = sparms; +} + +static void +mlxsw_sp_span_entry_deconfigure(struct mlxsw_sp_span_entry *span_entry) +{ + if (span_entry->parms.dest_port) + span_entry->ops->deconfigure(span_entry); +} + +static struct mlxsw_sp_span_entry * +mlxsw_sp_span_entry_create(struct mlxsw_sp *mlxsw_sp, + const struct net_device *to_dev, + const struct mlxsw_sp_span_entry_ops *ops, + struct mlxsw_sp_span_parms sparms) +{ + struct mlxsw_sp_span_entry *span_entry = NULL; int i; - int err; /* find a free entry to use */ - index = -1; for (i = 0; i < mlxsw_sp->span.entries_count; i++) { if (!mlxsw_sp->span.entries[i].ref_count) { - index = i; span_entry = &mlxsw_sp->span.entries[i]; break; } } - if (index < 0) - return NULL; - - /* create a new port analayzer entry for local_port */ - mlxsw_reg_mpat_pack(mpat_pl, index, local_port, true); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl); - if (err) + if (!span_entry) return NULL; - span_entry->id = index; + span_entry->ops = ops; span_entry->ref_count = 1; - span_entry->local_port = local_port; + span_entry->to_dev = to_dev; + mlxsw_sp_span_entry_configure(mlxsw_sp, span_entry, sparms); + return span_entry; } -static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_span_entry *span_entry) +static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp_span_entry *span_entry) { - u8 local_port = span_entry->local_port; - char mpat_pl[MLXSW_REG_MPAT_LEN]; - int pa_id = span_entry->id; - - mlxsw_reg_mpat_pack(mpat_pl, pa_id, local_port, false); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpat), mpat_pl); + mlxsw_sp_span_entry_deconfigure(span_entry); } struct mlxsw_sp_span_entry * -mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port) +mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp, + const struct net_device *to_dev) { int i; for (i = 0; i < mlxsw_sp->span.entries_count; i++) { struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; - if (curr->ref_count && curr->local_port == local_port) + if (curr->ref_count && curr->to_dev == to_dev) return curr; } return NULL; } +void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_span_entry *span_entry) +{ + mlxsw_sp_span_entry_deconfigure(span_entry); + span_entry->ops = &mlxsw_sp_span_entry_ops_nop; +} + static struct mlxsw_sp_span_entry * -mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port) +mlxsw_sp_span_entry_find_by_id(struct mlxsw_sp *mlxsw_sp, int span_id) +{ + int i; + + for (i = 0; i < mlxsw_sp->span.entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; + + if (curr->ref_count && curr->id == span_id) + return curr; + } + return NULL; +} + +static struct mlxsw_sp_span_entry * +mlxsw_sp_span_entry_get(struct mlxsw_sp *mlxsw_sp, + const struct net_device *to_dev, + const struct mlxsw_sp_span_entry_ops *ops, + struct mlxsw_sp_span_parms sparms) { struct mlxsw_sp_span_entry *span_entry; - span_entry = mlxsw_sp_span_entry_find(port->mlxsw_sp, - port->local_port); + span_entry = mlxsw_sp_span_entry_find_by_port(mlxsw_sp, to_dev); if (span_entry) { /* Already exists, just take a reference */ span_entry->ref_count++; return span_entry; } - return mlxsw_sp_span_entry_create(port); + return mlxsw_sp_span_entry_create(mlxsw_sp, to_dev, ops, sparms); } static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp, @@ -150,7 +540,7 @@ static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp, { WARN_ON(!span_entry->ref_count); if (--span_entry->ref_count == 0) - mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry); + mlxsw_sp_span_entry_destroy(span_entry); return 0; } @@ -312,15 +702,41 @@ mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port, kfree(inspected_port); } +static const struct mlxsw_sp_span_entry_ops * +mlxsw_sp_span_entry_ops(struct mlxsw_sp *mlxsw_sp, + const struct net_device *to_dev) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(mlxsw_sp_span_entry_types); ++i) + if (mlxsw_sp_span_entry_types[i]->can_handle(to_dev)) + return mlxsw_sp_span_entry_types[i]; + + return NULL; +} + int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from, - struct mlxsw_sp_port *to, - enum mlxsw_sp_span_type type, bool bind) + const struct net_device *to_dev, + enum mlxsw_sp_span_type type, bool bind, + int *p_span_id) { struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp; + const struct mlxsw_sp_span_entry_ops *ops; + struct mlxsw_sp_span_parms sparms = {0}; struct mlxsw_sp_span_entry *span_entry; int err; - span_entry = mlxsw_sp_span_entry_get(to); + ops = mlxsw_sp_span_entry_ops(mlxsw_sp, to_dev); + if (!ops) { + netdev_err(to_dev, "Cannot mirror to %s", to_dev->name); + return -EOPNOTSUPP; + } + + err = ops->parms(to_dev, &sparms); + if (err) + return err; + + span_entry = mlxsw_sp_span_entry_get(mlxsw_sp, to_dev, ops, sparms); if (!span_entry) return -ENOENT; @@ -331,6 +747,7 @@ int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from, if (err) goto err_port_bind; + *p_span_id = span_entry->id; return 0; err_port_bind: @@ -338,13 +755,12 @@ err_port_bind: return err; } -void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port, +void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id, enum mlxsw_sp_span_type type, bool bind) { struct mlxsw_sp_span_entry *span_entry; - span_entry = mlxsw_sp_span_entry_find(from->mlxsw_sp, - destination_port); + span_entry = mlxsw_sp_span_entry_find_by_id(from->mlxsw_sp, span_id); if (!span_entry) { netdev_err(from->dev, "no span entry found\n"); return; @@ -354,3 +770,27 @@ void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port, span_entry->id); mlxsw_sp_span_inspected_port_del(from, span_entry, type, bind); } + +void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp) +{ + int i; + int err; + + ASSERT_RTNL(); + for (i = 0; i < mlxsw_sp->span.entries_count; i++) { + struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i]; + struct mlxsw_sp_span_parms sparms = {0}; + + if (!curr->ref_count) + continue; + + err = curr->ops->parms(curr->to_dev, &sparms); + if (err) + continue; + + if (memcmp(&sparms, &curr->parms, sizeof(sparms))) { + mlxsw_sp_span_entry_deconfigure(curr); + mlxsw_sp_span_entry_configure(mlxsw_sp, curr, sparms); + } + } +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h index 069050e385ff..948aceb512c5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h @@ -35,6 +35,9 @@ #define _MLXSW_SPECTRUM_SPAN_H #include <linux/types.h> +#include <linux/if_ether.h> + +#include "spectrum_router.h" struct mlxsw_sp; struct mlxsw_sp_port; @@ -50,23 +53,51 @@ struct mlxsw_sp_span_inspected_port { u8 local_port; }; +struct mlxsw_sp_span_parms { + struct mlxsw_sp_port *dest_port; /* NULL for unoffloaded SPAN. */ + unsigned int ttl; + unsigned char dmac[ETH_ALEN]; + unsigned char smac[ETH_ALEN]; + union mlxsw_sp_l3addr daddr; + union mlxsw_sp_l3addr saddr; +}; + +struct mlxsw_sp_span_entry_ops; + struct mlxsw_sp_span_entry { - u8 local_port; + const struct net_device *to_dev; + const struct mlxsw_sp_span_entry_ops *ops; + struct mlxsw_sp_span_parms parms; struct list_head bound_ports_list; int ref_count; int id; }; +struct mlxsw_sp_span_entry_ops { + bool (*can_handle)(const struct net_device *to_dev); + int (*parms)(const struct net_device *to_dev, + struct mlxsw_sp_span_parms *sparmsp); + int (*configure)(struct mlxsw_sp_span_entry *span_entry, + struct mlxsw_sp_span_parms sparms); + void (*deconfigure)(struct mlxsw_sp_span_entry *span_entry); +}; + int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp); int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from, - struct mlxsw_sp_port *to, - enum mlxsw_sp_span_type type, bool bind); -void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, u8 destination_port, + const struct net_device *to_dev, + enum mlxsw_sp_span_type type, + bool bind, int *p_span_id); +void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id, enum mlxsw_sp_span_type type, bool bind); struct mlxsw_sp_span_entry * -mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port); +mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp, + const struct net_device *to_dev); + +void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_span_entry *span_entry); int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index f9f53af04fe1..917663adf925 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1882,14 +1882,10 @@ mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device, struct netlink_ext_ack *extack) { struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + struct net_device *dev = bridge_port->dev; u16 vid; - if (!is_vlan_dev(bridge_port->dev)) { - NL_SET_ERR_MSG_MOD(extack, "Only VLAN devices can be enslaved to a VLAN-unaware bridge"); - return -EINVAL; - } - vid = vlan_dev_vlan_id(bridge_port->dev); - + vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 1; mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); if (WARN_ON(!mlxsw_sp_port_vlan)) return -EINVAL; @@ -1912,8 +1908,10 @@ mlxsw_sp_bridge_8021d_port_leave(struct mlxsw_sp_bridge_device *bridge_device, struct mlxsw_sp_port *mlxsw_sp_port) { struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; - u16 vid = vlan_dev_vlan_id(bridge_port->dev); + struct net_device *dev = bridge_port->dev; + u16 vid; + vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 1; mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid); if (WARN_ON(!mlxsw_sp_port_vlan)) return; diff --git a/drivers/net/ethernet/natsemi/jazzsonic.c b/drivers/net/ethernet/natsemi/jazzsonic.c index d5b28884e21e..51fa82b429a3 100644 --- a/drivers/net/ethernet/natsemi/jazzsonic.c +++ b/drivers/net/ethernet/natsemi/jazzsonic.c @@ -60,14 +60,6 @@ do { \ *((volatile unsigned int *)dev->base_addr+(reg)) = (val); \ } while (0) - -/* use 0 for production, 1 for verification, >1 for debug */ -#ifdef SONIC_DEBUG -static unsigned int sonic_debug = SONIC_DEBUG; -#else -static unsigned int sonic_debug = 1; -#endif - /* * We cannot use station (ethernet) address prefixes to detect the * sonic controller since these are board manufacturer depended. @@ -117,7 +109,6 @@ static const struct net_device_ops sonic_netdev_ops = { static int sonic_probe1(struct net_device *dev) { - static unsigned version_printed; unsigned int silicon_revision; unsigned int val; struct sonic_local *lp = netdev_priv(dev); @@ -133,26 +124,17 @@ static int sonic_probe1(struct net_device *dev) * the expected location. */ silicon_revision = SONIC_READ(SONIC_SR); - if (sonic_debug > 1) - printk("SONIC Silicon Revision = 0x%04x\n",silicon_revision); - i = 0; while (known_revisions[i] != 0xffff && known_revisions[i] != silicon_revision) i++; if (known_revisions[i] == 0xffff) { - printk("SONIC ethernet controller not found (0x%4x)\n", - silicon_revision); + pr_info("SONIC ethernet controller not found (0x%4x)\n", + silicon_revision); goto out; } - if (sonic_debug && version_printed++ == 0) - printk(version); - - printk(KERN_INFO "%s: Sonic ethernet found at 0x%08lx, ", - dev_name(lp->device), dev->base_addr); - /* * Put the sonic into software reset, then * retrieve and print the ethernet address. @@ -245,12 +227,16 @@ static int jazz_sonic_probe(struct platform_device *pdev) err = sonic_probe1(dev); if (err) goto out; + + pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n", + dev->base_addr, dev->dev_addr, dev->irq); + + sonic_msg_init(dev); + err = register_netdev(dev); if (err) goto out1; - printk("%s: MAC %pM IRQ %d\n", dev->name, dev->dev_addr, dev->irq); - return 0; out1: @@ -262,8 +248,6 @@ out: } MODULE_DESCRIPTION("Jazz SONIC ethernet driver"); -module_param(sonic_debug, int, 0); -MODULE_PARM_DESC(sonic_debug, "jazzsonic debug level (1-4)"); MODULE_ALIAS("platform:jazzsonic"); #include "sonic.c" diff --git a/drivers/net/ethernet/natsemi/macsonic.c b/drivers/net/ethernet/natsemi/macsonic.c index b922ab5cedea..0937fc2a928e 100644 --- a/drivers/net/ethernet/natsemi/macsonic.c +++ b/drivers/net/ethernet/natsemi/macsonic.c @@ -60,8 +60,6 @@ #include <asm/macints.h> #include <asm/mac_via.h> -static char mac_sonic_string[] = "macsonic"; - #include "sonic.h" /* These should basically be bus-size and endian independent (since @@ -72,15 +70,6 @@ static char mac_sonic_string[] = "macsonic"; #define SONIC_WRITE(reg,val) (nubus_writew(val, dev->base_addr + (reg * 4) \ + lp->reg_offset)) -/* use 0 for production, 1 for verification, >1 for debug */ -#ifdef SONIC_DEBUG -static unsigned int sonic_debug = SONIC_DEBUG; -#else -static unsigned int sonic_debug = 1; -#endif - -static int sonic_version_printed; - /* For onboard SONIC */ #define ONBOARD_SONIC_REGISTERS 0x50F0A000 #define ONBOARD_SONIC_PROM_BASE 0x50f08000 @@ -313,11 +302,6 @@ static int mac_onboard_sonic_probe(struct net_device *dev) int sr; bool commslot = macintosh_config->expansion_type == MAC_EXP_PDS_COMM; - if (!MACH_IS_MAC) - return -ENODEV; - - printk(KERN_INFO "Checking for internal Macintosh ethernet (SONIC).. "); - /* Bogus probing, on the models which may or may not have Ethernet (BTW, the Ethernet *is* always at the same address, and nothing else lives there, at least if Apple's @@ -327,13 +311,11 @@ static int mac_onboard_sonic_probe(struct net_device *dev) card_present = hwreg_present((void*)ONBOARD_SONIC_REGISTERS); if (!card_present) { - printk("none.\n"); + pr_info("Onboard/comm-slot SONIC not found\n"); return -ENODEV; } } - printk("yes\n"); - /* Danger! My arms are flailing wildly! You *must* set lp->reg_offset * and dev->base_addr before using SONIC_READ() or SONIC_WRITE() */ dev->base_addr = ONBOARD_SONIC_REGISTERS; @@ -342,18 +324,10 @@ static int mac_onboard_sonic_probe(struct net_device *dev) else dev->irq = IRQ_NUBUS_9; - if (!sonic_version_printed) { - printk(KERN_INFO "%s", version); - sonic_version_printed = 1; - } - printk(KERN_INFO "%s: onboard / comm-slot SONIC at 0x%08lx\n", - dev_name(lp->device), dev->base_addr); - /* The PowerBook's SONIC is 16 bit always. */ if (macintosh_config->ident == MAC_MODEL_PB520) { lp->reg_offset = 0; lp->dma_bitmode = SONIC_BITMODE16; - sr = SONIC_READ(SONIC_SR); } else if (commslot) { /* Some of the comm-slot cards are 16 bit. But some of them are not. The 32-bit cards use offset 2 and @@ -370,22 +344,21 @@ static int mac_onboard_sonic_probe(struct net_device *dev) else { lp->dma_bitmode = SONIC_BITMODE16; lp->reg_offset = 0; - sr = SONIC_READ(SONIC_SR); } } else { /* All onboard cards are at offset 2 with 32 bit DMA. */ lp->reg_offset = 2; lp->dma_bitmode = SONIC_BITMODE32; - sr = SONIC_READ(SONIC_SR); } - printk(KERN_INFO - "%s: revision 0x%04x, using %d bit DMA and register offset %d\n", - dev_name(lp->device), sr, lp->dma_bitmode?32:16, lp->reg_offset); -#if 0 /* This is sometimes useful to find out how MacOS configured the card. */ - printk(KERN_INFO "%s: DCR: 0x%04x, DCR2: 0x%04x\n", dev_name(lp->device), - SONIC_READ(SONIC_DCR) & 0xffff, SONIC_READ(SONIC_DCR2) & 0xffff); -#endif + pr_info("Onboard/comm-slot SONIC, revision 0x%04x, %d bit DMA, register offset %d\n", + SONIC_READ(SONIC_SR), lp->dma_bitmode ? 32 : 16, + lp->reg_offset); + + /* This is sometimes useful to find out how MacOS configured the card */ + pr_debug("%s: DCR=0x%04x, DCR2=0x%04x\n", __func__, + SONIC_READ(SONIC_DCR) & 0xffff, + SONIC_READ(SONIC_DCR2) & 0xffff); /* Software reset, then initialize control registers. */ SONIC_WRITE(SONIC_CMD, SONIC_CR_RST); @@ -406,11 +379,14 @@ static int mac_onboard_sonic_probe(struct net_device *dev) /* Now look for the MAC address. */ mac_onboard_sonic_ethernet_addr(dev); + pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n", + dev->base_addr, dev->dev_addr, dev->irq); + /* Shared init code */ return macsonic_init(dev); } -static int mac_nubus_sonic_ethernet_addr(struct net_device *dev, +static int mac_sonic_nubus_ethernet_addr(struct net_device *dev, unsigned long prom_addr, int id) { int i; @@ -449,70 +425,49 @@ static int macsonic_ident(struct nubus_rsrc *fres) return -1; } -static int mac_nubus_sonic_probe(struct net_device *dev) +static int mac_sonic_nubus_probe_board(struct nubus_board *board, int id, + struct net_device *dev) { - static int slots; - struct nubus_rsrc *ndev = NULL; struct sonic_local* lp = netdev_priv(dev); unsigned long base_addr, prom_addr; u16 sonic_dcr; - int id = -1; int reg_offset, dma_bitmode; - /* Find the first SONIC that hasn't been initialized already */ - for_each_func_rsrc(ndev) { - if (ndev->category != NUBUS_CAT_NETWORK || - ndev->type != NUBUS_TYPE_ETHERNET) - continue; - - /* Have we seen it already? */ - if (slots & (1<<ndev->board->slot)) - continue; - slots |= 1<<ndev->board->slot; - - /* Is it one of ours? */ - if ((id = macsonic_ident(ndev)) != -1) - break; - } - - if (ndev == NULL) - return -ENODEV; - switch (id) { case MACSONIC_DUODOCK: - base_addr = ndev->board->slot_addr + DUODOCK_SONIC_REGISTERS; - prom_addr = ndev->board->slot_addr + DUODOCK_SONIC_PROM_BASE; + base_addr = board->slot_addr + DUODOCK_SONIC_REGISTERS; + prom_addr = board->slot_addr + DUODOCK_SONIC_PROM_BASE; sonic_dcr = SONIC_DCR_EXBUS | SONIC_DCR_RFT0 | SONIC_DCR_RFT1 | SONIC_DCR_TFT0; reg_offset = 2; dma_bitmode = SONIC_BITMODE32; break; case MACSONIC_APPLE: - base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS; - prom_addr = ndev->board->slot_addr + APPLE_SONIC_PROM_BASE; + base_addr = board->slot_addr + APPLE_SONIC_REGISTERS; + prom_addr = board->slot_addr + APPLE_SONIC_PROM_BASE; sonic_dcr = SONIC_DCR_BMS | SONIC_DCR_RFT1 | SONIC_DCR_TFT0; reg_offset = 0; dma_bitmode = SONIC_BITMODE32; break; case MACSONIC_APPLE16: - base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS; - prom_addr = ndev->board->slot_addr + APPLE_SONIC_PROM_BASE; + base_addr = board->slot_addr + APPLE_SONIC_REGISTERS; + prom_addr = board->slot_addr + APPLE_SONIC_PROM_BASE; sonic_dcr = SONIC_DCR_EXBUS | SONIC_DCR_RFT1 | SONIC_DCR_TFT0 | SONIC_DCR_PO1 | SONIC_DCR_BMS; reg_offset = 0; dma_bitmode = SONIC_BITMODE16; break; case MACSONIC_DAYNALINK: - base_addr = ndev->board->slot_addr + APPLE_SONIC_REGISTERS; - prom_addr = ndev->board->slot_addr + DAYNALINK_PROM_BASE; + base_addr = board->slot_addr + APPLE_SONIC_REGISTERS; + prom_addr = board->slot_addr + DAYNALINK_PROM_BASE; sonic_dcr = SONIC_DCR_RFT1 | SONIC_DCR_TFT0 | SONIC_DCR_PO1 | SONIC_DCR_BMS; reg_offset = 0; dma_bitmode = SONIC_BITMODE16; break; case MACSONIC_DAYNA: - base_addr = ndev->board->slot_addr + DAYNA_SONIC_REGISTERS; - prom_addr = ndev->board->slot_addr + DAYNA_SONIC_MAC_ADDR; + base_addr = board->slot_addr + DAYNA_SONIC_REGISTERS; + prom_addr = board->slot_addr + DAYNA_SONIC_MAC_ADDR; sonic_dcr = SONIC_DCR_BMS | SONIC_DCR_RFT1 | SONIC_DCR_TFT0 | SONIC_DCR_PO1; reg_offset = 0; @@ -528,21 +483,16 @@ static int mac_nubus_sonic_probe(struct net_device *dev) dev->base_addr = base_addr; lp->reg_offset = reg_offset; lp->dma_bitmode = dma_bitmode; - dev->irq = SLOT2IRQ(ndev->board->slot); + dev->irq = SLOT2IRQ(board->slot); - if (!sonic_version_printed) { - printk(KERN_INFO "%s", version); - sonic_version_printed = 1; - } - printk(KERN_INFO "%s: %s in slot %X\n", - dev_name(lp->device), ndev->board->name, ndev->board->slot); - printk(KERN_INFO "%s: revision 0x%04x, using %d bit DMA and register offset %d\n", - dev_name(lp->device), SONIC_READ(SONIC_SR), dma_bitmode?32:16, reg_offset); + dev_info(&board->dev, "%s, revision 0x%04x, %d bit DMA, register offset %d\n", + board->name, SONIC_READ(SONIC_SR), + lp->dma_bitmode ? 32 : 16, lp->reg_offset); -#if 0 /* This is sometimes useful to find out how MacOS configured the card. */ - printk(KERN_INFO "%s: DCR: 0x%04x, DCR2: 0x%04x\n", dev_name(lp->device), - SONIC_READ(SONIC_DCR) & 0xffff, SONIC_READ(SONIC_DCR2) & 0xffff); -#endif + /* This is sometimes useful to find out how MacOS configured the card */ + dev_dbg(&board->dev, "%s: DCR=0x%04x, DCR2=0x%04x\n", __func__, + SONIC_READ(SONIC_DCR) & 0xffff, + SONIC_READ(SONIC_DCR2) & 0xffff); /* Software reset, then initialize control registers. */ SONIC_WRITE(SONIC_CMD, SONIC_CR_RST); @@ -557,14 +507,17 @@ static int mac_nubus_sonic_probe(struct net_device *dev) SONIC_WRITE(SONIC_ISR, 0x7fff); /* Now look for the MAC address. */ - if (mac_nubus_sonic_ethernet_addr(dev, prom_addr, id) != 0) + if (mac_sonic_nubus_ethernet_addr(dev, prom_addr, id) != 0) return -ENODEV; + dev_info(&board->dev, "SONIC ethernet @%08lx, MAC %pM, IRQ %d\n", + dev->base_addr, dev->dev_addr, dev->irq); + /* Shared init code */ return macsonic_init(dev); } -static int mac_sonic_probe(struct platform_device *pdev) +static int mac_sonic_platform_probe(struct platform_device *pdev) { struct net_device *dev; struct sonic_local *lp; @@ -579,22 +532,16 @@ static int mac_sonic_probe(struct platform_device *pdev) SET_NETDEV_DEV(dev, &pdev->dev); platform_set_drvdata(pdev, dev); - /* This will catch fatal stuff like -ENOMEM as well as success */ err = mac_onboard_sonic_probe(dev); - if (err == 0) - goto found; - if (err != -ENODEV) - goto out; - err = mac_nubus_sonic_probe(dev); if (err) goto out; -found: + + sonic_msg_init(dev); + err = register_netdev(dev); if (err) goto out; - printk("%s: MAC %pM IRQ %d\n", dev->name, dev->dev_addr, dev->irq); - return 0; out: @@ -604,13 +551,11 @@ out: } MODULE_DESCRIPTION("Macintosh SONIC ethernet driver"); -module_param(sonic_debug, int, 0); -MODULE_PARM_DESC(sonic_debug, "macsonic debug level (1-4)"); MODULE_ALIAS("platform:macsonic"); #include "sonic.c" -static int mac_sonic_device_remove(struct platform_device *pdev) +static int mac_sonic_platform_remove(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); struct sonic_local* lp = netdev_priv(dev); @@ -623,12 +568,105 @@ static int mac_sonic_device_remove(struct platform_device *pdev) return 0; } -static struct platform_driver mac_sonic_driver = { - .probe = mac_sonic_probe, - .remove = mac_sonic_device_remove, - .driver = { - .name = mac_sonic_string, +static struct platform_driver mac_sonic_platform_driver = { + .probe = mac_sonic_platform_probe, + .remove = mac_sonic_platform_remove, + .driver = { + .name = "macsonic", + }, +}; + +static int mac_sonic_nubus_probe(struct nubus_board *board) +{ + struct net_device *ndev; + struct sonic_local *lp; + struct nubus_rsrc *fres; + int id = -1; + int err; + + /* The platform driver will handle a PDS or Comm Slot card (even if + * it has a pseudoslot declaration ROM). + */ + if (macintosh_config->expansion_type == MAC_EXP_PDS_COMM) + return -ENODEV; + + for_each_board_func_rsrc(board, fres) { + if (fres->category != NUBUS_CAT_NETWORK || + fres->type != NUBUS_TYPE_ETHERNET) + continue; + + id = macsonic_ident(fres); + if (id != -1) + break; + } + if (!fres) + return -ENODEV; + + ndev = alloc_etherdev(sizeof(struct sonic_local)); + if (!ndev) + return -ENOMEM; + + lp = netdev_priv(ndev); + lp->device = &board->dev; + SET_NETDEV_DEV(ndev, &board->dev); + + err = mac_sonic_nubus_probe_board(board, id, ndev); + if (err) + goto out; + + sonic_msg_init(ndev); + + err = register_netdev(ndev); + if (err) + goto out; + + nubus_set_drvdata(board, ndev); + + return 0; + +out: + free_netdev(ndev); + return err; +} + +static int mac_sonic_nubus_remove(struct nubus_board *board) +{ + struct net_device *ndev = nubus_get_drvdata(board); + struct sonic_local *lp = netdev_priv(ndev); + + unregister_netdev(ndev); + dma_free_coherent(lp->device, + SIZEOF_SONIC_DESC * SONIC_BUS_SCALE(lp->dma_bitmode), + lp->descriptors, lp->descriptors_laddr); + free_netdev(ndev); + + return 0; +} + +static struct nubus_driver mac_sonic_nubus_driver = { + .probe = mac_sonic_nubus_probe, + .remove = mac_sonic_nubus_remove, + .driver = { + .name = "macsonic-nubus", + .owner = THIS_MODULE, }, }; -module_platform_driver(mac_sonic_driver); +static int perr, nerr; + +static int __init mac_sonic_init(void) +{ + perr = platform_driver_register(&mac_sonic_platform_driver); + nerr = nubus_driver_register(&mac_sonic_nubus_driver); + return 0; +} +module_init(mac_sonic_init); + +static void __exit mac_sonic_exit(void) +{ + if (!perr) + platform_driver_unregister(&mac_sonic_platform_driver); + if (!nerr) + nubus_driver_unregister(&mac_sonic_nubus_driver); +} +module_exit(mac_sonic_exit); diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index 612c7a44b26c..7ed08486ae23 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -33,7 +33,21 @@ * the NetBSD file "sys/arch/mac68k/dev/if_sn.c". */ +static unsigned int version_printed; +static int sonic_debug = -1; +module_param(sonic_debug, int, 0); +MODULE_PARM_DESC(sonic_debug, "debug message level"); + +static void sonic_msg_init(struct net_device *dev) +{ + struct sonic_local *lp = netdev_priv(dev); + + lp->msg_enable = netif_msg_init(sonic_debug, 0); + + if (version_printed++ == 0) + netif_dbg(lp, drv, dev, "%s", version); +} /* * Open/initialize the SONIC controller. @@ -47,8 +61,7 @@ static int sonic_open(struct net_device *dev) struct sonic_local *lp = netdev_priv(dev); int i; - if (sonic_debug > 2) - printk("sonic_open: initializing sonic driver.\n"); + netif_dbg(lp, ifup, dev, "%s: initializing sonic driver\n", __func__); for (i = 0; i < SONIC_NUM_RRS; i++) { struct sk_buff *skb = netdev_alloc_skb(dev, SONIC_RBSIZE + 2); @@ -95,8 +108,7 @@ static int sonic_open(struct net_device *dev) netif_start_queue(dev); - if (sonic_debug > 2) - printk("sonic_open: Initialization done.\n"); + netif_dbg(lp, ifup, dev, "%s: Initialization done\n", __func__); return 0; } @@ -110,8 +122,7 @@ static int sonic_close(struct net_device *dev) struct sonic_local *lp = netdev_priv(dev); int i; - if (sonic_debug > 2) - printk("sonic_close\n"); + netif_dbg(lp, ifdown, dev, "%s\n", __func__); netif_stop_queue(dev); @@ -205,8 +216,7 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) int length; int entry = lp->next_tx; - if (sonic_debug > 2) - printk("sonic_send_packet: skb=%p, dev=%p\n", skb, dev); + netif_dbg(lp, tx_queued, dev, "%s: skb=%p\n", __func__, skb); length = skb->len; if (length < ETH_ZLEN) { @@ -252,14 +262,12 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) lp->next_tx = (entry + 1) & SONIC_TDS_MASK; if (lp->tx_skb[lp->next_tx] != NULL) { /* The ring is full, the ISR has yet to process the next TD. */ - if (sonic_debug > 3) - printk("%s: stopping queue\n", dev->name); + netif_dbg(lp, tx_queued, dev, "%s: stopping queue\n", __func__); netif_stop_queue(dev); /* after this packet, wait for ISR to free up some TDAs */ } else netif_start_queue(dev); - if (sonic_debug > 2) - printk("sonic_send_packet: issuing Tx command\n"); + netif_dbg(lp, tx_queued, dev, "%s: issuing Tx command\n", __func__); SONIC_WRITE(SONIC_CMD, SONIC_CR_TXP); @@ -281,8 +289,7 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) do { if (status & SONIC_INT_PKTRX) { - if (sonic_debug > 2) - printk("%s: packet rx\n", dev->name); + netif_dbg(lp, intr, dev, "%s: packet rx\n", __func__); sonic_rx(dev); /* got packet(s) */ SONIC_WRITE(SONIC_ISR, SONIC_INT_PKTRX); /* clear the interrupt */ } @@ -299,8 +306,7 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) * still being allocated by sonic_send_packet (status clear & tx_skb[entry] clear) */ - if (sonic_debug > 2) - printk("%s: tx done\n", dev->name); + netif_dbg(lp, intr, dev, "%s: tx done\n", __func__); while (lp->tx_skb[entry] != NULL) { if ((td_status = sonic_tda_get(dev, entry, SONIC_TD_STATUS)) == 0) @@ -346,20 +352,20 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) * check error conditions */ if (status & SONIC_INT_RFO) { - if (sonic_debug > 1) - printk("%s: rx fifo overrun\n", dev->name); + netif_dbg(lp, rx_err, dev, "%s: rx fifo overrun\n", + __func__); lp->stats.rx_fifo_errors++; SONIC_WRITE(SONIC_ISR, SONIC_INT_RFO); /* clear the interrupt */ } if (status & SONIC_INT_RDE) { - if (sonic_debug > 1) - printk("%s: rx descriptors exhausted\n", dev->name); + netif_dbg(lp, rx_err, dev, "%s: rx descriptors exhausted\n", + __func__); lp->stats.rx_dropped++; SONIC_WRITE(SONIC_ISR, SONIC_INT_RDE); /* clear the interrupt */ } if (status & SONIC_INT_RBAE) { - if (sonic_debug > 1) - printk("%s: rx buffer area exceeded\n", dev->name); + netif_dbg(lp, rx_err, dev, "%s: rx buffer area exceeded\n", + __func__); lp->stats.rx_dropped++; SONIC_WRITE(SONIC_ISR, SONIC_INT_RBAE); /* clear the interrupt */ } @@ -380,8 +386,9 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) /* transmit error */ if (status & SONIC_INT_TXER) { - if ((SONIC_READ(SONIC_TCR) & SONIC_TCR_FU) && (sonic_debug > 2)) - printk(KERN_ERR "%s: tx fifo underrun\n", dev->name); + if (SONIC_READ(SONIC_TCR) & SONIC_TCR_FU) + netif_dbg(lp, tx_err, dev, "%s: tx fifo underrun\n", + __func__); SONIC_WRITE(SONIC_ISR, SONIC_INT_TXER); /* clear the interrupt */ } @@ -475,8 +482,8 @@ static void sonic_rx(struct net_device *dev) if (lp->cur_rwp >= lp->rra_end) lp->cur_rwp = lp->rra_laddr & 0xffff; SONIC_WRITE(SONIC_RWP, lp->cur_rwp); if (SONIC_READ(SONIC_ISR) & SONIC_INT_RBE) { - if (sonic_debug > 2) - printk("%s: rx buffer exhausted\n", dev->name); + netif_dbg(lp, rx_err, dev, "%s: rx buffer exhausted\n", + __func__); SONIC_WRITE(SONIC_ISR, SONIC_INT_RBE); /* clear the flag */ } } else @@ -542,9 +549,8 @@ static void sonic_multicast_list(struct net_device *dev) (netdev_mc_count(dev) > 15)) { rcr |= SONIC_RCR_AMC; } else { - if (sonic_debug > 2) - printk("sonic_multicast_list: mc_count %d\n", - netdev_mc_count(dev)); + netif_dbg(lp, ifup, dev, "%s: mc_count %d\n", __func__, + netdev_mc_count(dev)); sonic_set_cam_enable(dev, 1); /* always enable our own address */ i = 1; netdev_for_each_mc_addr(ha, dev) { @@ -562,8 +568,7 @@ static void sonic_multicast_list(struct net_device *dev) } } - if (sonic_debug > 2) - printk("sonic_multicast_list: setting RCR=%x\n", rcr); + netif_dbg(lp, ifup, dev, "%s: setting RCR=%x\n", __func__, rcr); SONIC_WRITE(SONIC_RCR, rcr); } @@ -596,8 +601,8 @@ static int sonic_init(struct net_device *dev) /* * initialize the receive resource area */ - if (sonic_debug > 2) - printk("sonic_init: initialize receive resource area\n"); + netif_dbg(lp, ifup, dev, "%s: initialize receive resource area\n", + __func__); for (i = 0; i < SONIC_NUM_RRS; i++) { u16 bufadr_l = (unsigned long)lp->rx_laddr[i] & 0xffff; @@ -622,8 +627,7 @@ static int sonic_init(struct net_device *dev) SONIC_WRITE(SONIC_EOBC, (SONIC_RBSIZE >> 1) - (lp->dma_bitmode ? 2 : 1)); /* load the resource pointers */ - if (sonic_debug > 3) - printk("sonic_init: issuing RRRA command\n"); + netif_dbg(lp, ifup, dev, "%s: issuing RRRA command\n", __func__); SONIC_WRITE(SONIC_CMD, SONIC_CR_RRRA); i = 0; @@ -632,16 +636,17 @@ static int sonic_init(struct net_device *dev) break; } - if (sonic_debug > 2) - printk("sonic_init: status=%x i=%d\n", SONIC_READ(SONIC_CMD), i); + netif_dbg(lp, ifup, dev, "%s: status=%x, i=%d\n", __func__, + SONIC_READ(SONIC_CMD), i); /* * Initialize the receive descriptors so that they * become a circular linked list, ie. let the last * descriptor point to the first again. */ - if (sonic_debug > 2) - printk("sonic_init: initialize receive descriptors\n"); + netif_dbg(lp, ifup, dev, "%s: initialize receive descriptors\n", + __func__); + for (i=0; i<SONIC_NUM_RDS; i++) { sonic_rda_put(dev, i, SONIC_RD_STATUS, 0); sonic_rda_put(dev, i, SONIC_RD_PKTLEN, 0); @@ -664,8 +669,9 @@ static int sonic_init(struct net_device *dev) /* * initialize transmit descriptors */ - if (sonic_debug > 2) - printk("sonic_init: initialize transmit descriptors\n"); + netif_dbg(lp, ifup, dev, "%s: initialize transmit descriptors\n", + __func__); + for (i = 0; i < SONIC_NUM_TDS; i++) { sonic_tda_put(dev, i, SONIC_TD_STATUS, 0); sonic_tda_put(dev, i, SONIC_TD_CONFIG, 0); @@ -712,10 +718,8 @@ static int sonic_init(struct net_device *dev) if (SONIC_READ(SONIC_ISR) & SONIC_INT_LCD) break; } - if (sonic_debug > 2) { - printk("sonic_init: CMD=%x, ISR=%x\n, i=%d", - SONIC_READ(SONIC_CMD), SONIC_READ(SONIC_ISR), i); - } + netif_dbg(lp, ifup, dev, "%s: CMD=%x, ISR=%x, i=%d\n", __func__, + SONIC_READ(SONIC_CMD), SONIC_READ(SONIC_ISR), i); /* * enable receiver, disable loopback @@ -731,9 +735,8 @@ static int sonic_init(struct net_device *dev) if ((cmd & SONIC_CR_RXEN) == 0 || (cmd & SONIC_CR_STP) == 0) printk(KERN_ERR "sonic_init: failed, status=%x\n", cmd); - if (sonic_debug > 2) - printk("sonic_init: new status=%x\n", - SONIC_READ(SONIC_CMD)); + netif_dbg(lp, ifup, dev, "%s: new status=%x\n", __func__, + SONIC_READ(SONIC_CMD)); return 0; } diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h index 421b1a283fed..2b27f7049acb 100644 --- a/drivers/net/ethernet/natsemi/sonic.h +++ b/drivers/net/ethernet/natsemi/sonic.h @@ -319,6 +319,7 @@ struct sonic_local { unsigned int eol_rx; unsigned int eol_tx; /* last unacked transmit packet */ unsigned int next_tx; /* next free TD */ + int msg_enable; struct device *device; /* generic device */ struct net_device_stats stats; }; @@ -336,6 +337,7 @@ static struct net_device_stats *sonic_get_stats(struct net_device *dev); static void sonic_multicast_list(struct net_device *dev); static int sonic_init(struct net_device *dev); static void sonic_tx_timeout(struct net_device *dev); +static void sonic_msg_init(struct net_device *dev); /* Internal inlines for reading/writing DMA buffers. Note that bus size and endianness matter here, whereas they don't for registers, diff --git a/drivers/net/ethernet/natsemi/xtsonic.c b/drivers/net/ethernet/natsemi/xtsonic.c index 1817deea98a4..e1b886e87a76 100644 --- a/drivers/net/ethernet/natsemi/xtsonic.c +++ b/drivers/net/ethernet/natsemi/xtsonic.c @@ -73,14 +73,6 @@ extern void xtboard_get_ether_addr(unsigned char *buf); #define SONIC_WRITE(reg,val) \ *((volatile unsigned int *)dev->base_addr+reg) = val - -/* Use 0 for production, 1 for verification, and >2 for debug */ -#ifdef SONIC_DEBUG -static unsigned int sonic_debug = SONIC_DEBUG; -#else -static unsigned int sonic_debug = 1; -#endif - /* * We cannot use station (ethernet) address prefixes to detect the * sonic controller since these are board manufacturer depended. @@ -130,7 +122,6 @@ static const struct net_device_ops xtsonic_netdev_ops = { static int __init sonic_probe1(struct net_device *dev) { - static unsigned version_printed = 0; unsigned int silicon_revision; struct sonic_local *lp = netdev_priv(dev); unsigned int base_addr = dev->base_addr; @@ -146,23 +137,17 @@ static int __init sonic_probe1(struct net_device *dev) * the expected location. */ silicon_revision = SONIC_READ(SONIC_SR); - if (sonic_debug > 1) - printk("SONIC Silicon Revision = 0x%04x\n",silicon_revision); - i = 0; while ((known_revisions[i] != 0xffff) && (known_revisions[i] != silicon_revision)) i++; if (known_revisions[i] == 0xffff) { - printk("SONIC ethernet controller not found (0x%4x)\n", - silicon_revision); + pr_info("SONIC ethernet controller not found (0x%4x)\n", + silicon_revision); return -ENODEV; } - if (sonic_debug && version_printed++ == 0) - printk(version); - /* * Put the sonic into software reset, then retrieve ethernet address. * Note: we are assuming that the boot-loader has initialized the cam. @@ -273,12 +258,15 @@ int xtsonic_probe(struct platform_device *pdev) if ((err = sonic_probe1(dev))) goto out; + + pr_info("SONIC ethernet @%08lx, MAC %pM, IRQ %d\n", + dev->base_addr, dev->dev_addr, dev->irq); + + sonic_msg_init(dev); + if ((err = register_netdev(dev))) goto out1; - printk("%s: SONIC ethernet @%08lx, MAC %pM, IRQ %d\n", dev->name, - dev->base_addr, dev->dev_addr, dev->irq); - return 0; out1: @@ -290,8 +278,6 @@ out: } MODULE_DESCRIPTION("Xtensa XT2000 SONIC ethernet driver"); -module_param(sonic_debug, int, 0); -MODULE_PARM_DESC(sonic_debug, "xtsonic debug level (1-4)"); #include "sonic.c" diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index ca4a81dc1ace..03ad4eeac7f8 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -1784,7 +1784,7 @@ enum qed_iwarp_mpa_pkt_type { /* fpdu can be fragmented over maximum 3 bds: header, partial mpa, unaligned */ #define QED_IWARP_MAX_BDS_PER_FPDU 3 -char *pkt_type_str[] = { +static const char * const pkt_type_str[] = { "QED_IWARP_MPA_PKT_PACKED", "QED_IWARP_MPA_PKT_PARTIAL", "QED_IWARP_MPA_PKT_UNALIGNED" diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 96db3283eecf..6fd13334aa28 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -735,11 +735,6 @@ struct ring_info { u8 __pad[sizeof(void *) - sizeof(u32)]; }; -enum features { - RTL_FEATURE_MSI = (1 << 0), - RTL_FEATURE_GMII = (1 << 1), -}; - struct rtl8169_counters { __le64 tx_packets; __le64 rx_packets; @@ -7847,7 +7842,7 @@ static int rtl8169_close(struct net_device *dev) cancel_work_sync(&tp->wk.work); - free_irq(pdev->irq, dev); + pci_free_irq(pdev, 0, dev); dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray, tp->RxPhyAddr); @@ -7866,7 +7861,7 @@ static void rtl8169_netpoll(struct net_device *dev) { struct rtl8169_private *tp = netdev_priv(dev); - rtl8169_interrupt(tp->pci_dev->irq, dev); + rtl8169_interrupt(pci_irq_vector(tp->pci_dev, 0), dev); } #endif @@ -7903,9 +7898,8 @@ static int rtl_open(struct net_device *dev) rtl_request_firmware(tp); - retval = request_irq(pdev->irq, rtl8169_interrupt, - (tp->features & RTL_FEATURE_MSI) ? 0 : IRQF_SHARED, - dev->name, dev); + retval = pci_request_irq(pdev, 0, rtl8169_interrupt, NULL, dev, + dev->name); if (retval < 0) goto err_release_fw_2; @@ -8235,7 +8229,7 @@ static const struct rtl_cfg_info { unsigned int region; unsigned int align; u16 event_slow; - unsigned features; + unsigned int has_gmii:1; const struct rtl_coalesce_info *coalesce_info; u8 default_ver; } rtl_cfg_infos [] = { @@ -8244,7 +8238,7 @@ static const struct rtl_cfg_info { .region = 1, .align = 0, .event_slow = SYSErr | LinkChg | RxOverflow | RxFIFOOver, - .features = RTL_FEATURE_GMII, + .has_gmii = 1, .coalesce_info = rtl_coalesce_info_8169, .default_ver = RTL_GIGA_MAC_VER_01, }, @@ -8253,7 +8247,7 @@ static const struct rtl_cfg_info { .region = 2, .align = 8, .event_slow = SYSErr | LinkChg | RxOverflow, - .features = RTL_FEATURE_GMII | RTL_FEATURE_MSI, + .has_gmii = 1, .coalesce_info = rtl_coalesce_info_8168_8136, .default_ver = RTL_GIGA_MAC_VER_11, }, @@ -8263,32 +8257,26 @@ static const struct rtl_cfg_info { .align = 8, .event_slow = SYSErr | LinkChg | RxOverflow | RxFIFOOver | PCSTimeout, - .features = RTL_FEATURE_MSI, .coalesce_info = rtl_coalesce_info_8168_8136, .default_ver = RTL_GIGA_MAC_VER_13, } }; -/* Cfg9346_Unlock assumed. */ -static unsigned rtl_try_msi(struct rtl8169_private *tp, - const struct rtl_cfg_info *cfg) +static int rtl_alloc_irq(struct rtl8169_private *tp) { void __iomem *ioaddr = tp->mmio_addr; - unsigned msi = 0; - u8 cfg2; + unsigned int flags; - cfg2 = RTL_R8(Config2) & ~MSIEnable; - if (cfg->features & RTL_FEATURE_MSI) { - if (pci_enable_msi(tp->pci_dev)) { - netif_info(tp, hw, tp->dev, "no MSI. Back to INTx.\n"); - } else { - cfg2 |= MSIEnable; - msi = RTL_FEATURE_MSI; - } + if (tp->mac_version <= RTL_GIGA_MAC_VER_06) { + RTL_W8(Cfg9346, Cfg9346_Unlock); + RTL_W8(Config2, RTL_R8(Config2) & ~MSIEnable); + RTL_W8(Cfg9346, Cfg9346_Lock); + flags = PCI_IRQ_LEGACY; + } else { + flags = PCI_IRQ_ALL_TYPES; } - if (tp->mac_version <= RTL_GIGA_MAC_VER_06) - RTL_W8(Config2, cfg2); - return msi; + + return pci_alloc_irq_vectors(tp->pci_dev, 1, 1, flags); } DECLARE_RTL_COND(rtl_link_list_ready_cond) @@ -8402,7 +8390,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) mii->mdio_write = rtl_mdio_write; mii->phy_id_mask = 0x1f; mii->reg_num_mask = 0x1f; - mii->supports_gmii = !!(cfg->features & RTL_FEATURE_GMII); + mii->supports_gmii = cfg->has_gmii; /* disable ASPM completely as that cause random device stop working * problems as well as full system hangs for some PCIe devices users */ @@ -8497,9 +8485,11 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) chipset = tp->mac_version; tp->txd_version = rtl_chip_infos[chipset].txd_version; - RTL_W8(Cfg9346, Cfg9346_Unlock); - tp->features |= rtl_try_msi(tp, cfg); - RTL_W8(Cfg9346, Cfg9346_Lock); + rc = rtl_alloc_irq(tp); + if (rc < 0) { + netif_err(tp, probe, dev, "Can't allocate interrupt\n"); + return rc; + } /* override BIOS settings, use userspace tools to enable WOL */ __rtl8169_set_wol(tp, 0); @@ -8618,7 +8608,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netif_info(tp, probe, dev, "%s at 0x%p, %pM, XID %08x IRQ %d\n", rtl_chip_infos[chipset].name, ioaddr, dev->dev_addr, - (u32)(RTL_R32(TxConfig) & 0x9cf0f8ff), pdev->irq); + (u32)(RTL_R32(TxConfig) & 0x9cf0f8ff), + pci_irq_vector(pdev, 0)); if (rtl_chip_infos[chipset].jumbo_max != JUMBO_1K) { netif_info(tp, probe, dev, "jumbo features [frames: %d bytes, " "tx checksumming: %s]\n", diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index d7d5a6d15219..d3e1bc05ca9c 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -123,8 +123,8 @@ static const u16 sh_eth_offset_gigabit[SH_ETH_MAX_REGISTER_OFFSET] = { [TSU_FWSL0] = 0x0030, [TSU_FWSL1] = 0x0034, [TSU_FWSLC] = 0x0038, - [TSU_QTAG0] = 0x0040, - [TSU_QTAG1] = 0x0044, + [TSU_QTAGM0] = 0x0040, + [TSU_QTAGM1] = 0x0044, [TSU_FWSR] = 0x0050, [TSU_FWINMK] = 0x0054, [TSU_ADQT0] = 0x0048, @@ -752,6 +752,7 @@ static struct sh_eth_cpu_data sh7757_data = { .rpadir = 1, .rpadir_value = 2 << 16, .rtrate = 1, + .dual_port = 1, }; #define SH_GIGA_ETH_BASE 0xfee00000UL @@ -830,6 +831,7 @@ static struct sh_eth_cpu_data sh7757_data_giga = { .no_trimd = 1, .no_ade = 1, .tsu = 1, + .dual_port = 1, }; /* SH7734 */ @@ -900,6 +902,7 @@ static struct sh_eth_cpu_data sh7763_data = { .tsu = 1, .irq_flags = IRQF_SHARED, .magic = 1, + .dual_port = 1, }; static struct sh_eth_cpu_data sh7619_data = { @@ -932,6 +935,7 @@ static struct sh_eth_cpu_data sh771x_data = { EESIPR_RRFIP | EESIPR_RTLFIP | EESIPR_RTSFIP | EESIPR_PREIP | EESIPR_CERFIP, .tsu = 1, + .dual_port = 1, }; static void sh_eth_set_default_cpu_data(struct sh_eth_cpu_data *cd) @@ -2097,8 +2101,6 @@ static size_t __sh_eth_get_regs(struct net_device *ndev, u32 *buf) add_tsu_reg(TSU_FWSL0); add_tsu_reg(TSU_FWSL1); add_tsu_reg(TSU_FWSLC); - add_tsu_reg(TSU_QTAG0); - add_tsu_reg(TSU_QTAG1); add_tsu_reg(TSU_QTAGM0); add_tsu_reg(TSU_QTAGM1); add_tsu_reg(TSU_FWSR); @@ -2917,7 +2919,7 @@ static int sh_eth_vlan_rx_kill_vid(struct net_device *ndev, /* SuperH's TSU register init function */ static void sh_eth_tsu_init(struct sh_eth_private *mdp) { - if (sh_eth_is_rz_fast_ether(mdp)) { + if (!mdp->cd->dual_port) { sh_eth_tsu_write(mdp, 0, TSU_TEN); /* Disable all CAM entry */ sh_eth_tsu_write(mdp, TSU_FWSLC_POSTENU | TSU_FWSLC_POSTENL, TSU_FWSLC); /* Enable POST registers */ @@ -2934,13 +2936,8 @@ static void sh_eth_tsu_init(struct sh_eth_private *mdp) sh_eth_tsu_write(mdp, 0, TSU_FWSL0); sh_eth_tsu_write(mdp, 0, TSU_FWSL1); sh_eth_tsu_write(mdp, TSU_FWSLC_POSTENU | TSU_FWSLC_POSTENL, TSU_FWSLC); - if (sh_eth_is_gether(mdp)) { - sh_eth_tsu_write(mdp, 0, TSU_QTAG0); /* Disable QTAG(0->1) */ - sh_eth_tsu_write(mdp, 0, TSU_QTAG1); /* Disable QTAG(1->0) */ - } else { - sh_eth_tsu_write(mdp, 0, TSU_QTAGM0); /* Disable QTAG(0->1) */ - sh_eth_tsu_write(mdp, 0, TSU_QTAGM1); /* Disable QTAG(1->0) */ - } + sh_eth_tsu_write(mdp, 0, TSU_QTAGM0); /* Disable QTAG(0->1) */ + sh_eth_tsu_write(mdp, 0, TSU_QTAGM1); /* Disable QTAG(1->0) */ sh_eth_tsu_write(mdp, 0, TSU_FWSR); /* all interrupt status clear */ sh_eth_tsu_write(mdp, 0, TSU_FWINMK); /* Disable all interrupt */ sh_eth_tsu_write(mdp, 0, TSU_TEN); /* Disable all CAM entry */ diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h index a6753ccba711..5bbaf9e56e92 100644 --- a/drivers/net/ethernet/renesas/sh_eth.h +++ b/drivers/net/ethernet/renesas/sh_eth.h @@ -118,8 +118,8 @@ enum { TSU_FWSL0, TSU_FWSL1, TSU_FWSLC, - TSU_QTAG0, - TSU_QTAG1, + TSU_QTAG0, /* Same as TSU_QTAGM0 */ + TSU_QTAG1, /* Same as TSU_QTAGM1 */ TSU_QTAGM0, TSU_QTAGM1, TSU_FWSR, @@ -509,6 +509,7 @@ struct sh_eth_cpu_data { unsigned rmiimode:1; /* EtherC has RMIIMODE register */ unsigned rtrate:1; /* EtherC has RTRATE register */ unsigned magic:1; /* EtherC has ECMR.MPDE and ECSR.MPD */ + unsigned dual_port:1; /* Dual EtherC/E-DMAC */ }; struct sh_eth_private { diff --git a/drivers/net/ethernet/sfc/falcon/enum.h b/drivers/net/ethernet/sfc/falcon/enum.h index 30a1136fc909..4824fcf5c3d4 100644 --- a/drivers/net/ethernet/sfc/falcon/enum.h +++ b/drivers/net/ethernet/sfc/falcon/enum.h @@ -81,7 +81,6 @@ enum ef4_loopback_mode { (1 << LOOPBACK_XAUI) | \ (1 << LOOPBACK_GMII) | \ (1 << LOOPBACK_SGMII) | \ - (1 << LOOPBACK_SGMII) | \ (1 << LOOPBACK_XGBR) | \ (1 << LOOPBACK_XFI) | \ (1 << LOOPBACK_XAUI_FAR) | \ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index c728ffa095de..2a6521d33e43 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -389,6 +389,8 @@ static void dwmac4_rd_prepare_tso_tx_desc(struct dma_desc *p, int is_fs, static void dwmac4_release_tx_desc(struct dma_desc *p, int mode) { + p->des0 = 0; + p->des1 = 0; p->des2 = 0; p->des3 = 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index c8d86d77e03d..a9856a8bf8ad 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1844,6 +1844,11 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue) if (unlikely(status & tx_dma_own)) break; + /* Make sure descriptor fields are read after reading + * the own bit. + */ + dma_rmb(); + /* Just consider the last segment and ...*/ if (likely(!(status & tx_not_ls))) { /* ... verify the status error condition */ @@ -2983,14 +2988,21 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) tcp_hdrlen(skb) / 4, (skb->len - proto_hdr_len)); /* If context desc is used to change MSS */ - if (mss_desc) + if (mss_desc) { + /* Make sure that first descriptor has been completely + * written, including its own bit. This is because MSS is + * actually before first descriptor, so we need to make + * sure that MSS's own bit is the last thing written. + */ + dma_wmb(); priv->hw->desc->set_tx_owner(mss_desc); + } /* The own bit must be the latest setting done when prepare the * descriptor and then barrier is needed to make sure that * all is coherent before granting the DMA engine. */ - dma_wmb(); + wmb(); if (netif_msg_pktdata(priv)) { pr_info("%s: curr=%d dirty=%d f=%d, e=%d, f_p=%p, nfrags %d\n", @@ -3214,7 +3226,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) * descriptor and then barrier is needed to make sure that * all is coherent before granting the DMA engine. */ - dma_wmb(); + wmb(); } netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len); diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index b919e89a9b93..516dd59249d7 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1694,6 +1694,7 @@ static struct pernet_operations geneve_net_ops = { .exit_batch = geneve_exit_batch_net, .id = &geneve_net_id, .size = sizeof(struct geneve_net), + .async = true, }; static int __init geneve_init_module(void) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index f38e32a7ec9c..127edd23018f 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1325,6 +1325,7 @@ static struct pernet_operations gtp_net_ops = { .exit = gtp_net_exit, .id = >p_net_id, .size = sizeof(struct gtp_net), + .async = true, }; static int __init gtp_init(void) diff --git a/drivers/net/ieee802154/Kconfig b/drivers/net/ieee802154/Kconfig index 303ba4133920..8782f5655e3f 100644 --- a/drivers/net/ieee802154/Kconfig +++ b/drivers/net/ieee802154/Kconfig @@ -104,3 +104,14 @@ config IEEE802154_CA8210_DEBUGFS exposes a debugfs node for each CA8210 instance which allows direct use of the Cascoda API, exposing the 802.15.4 MAC management entities. + +config IEEE802154_MCR20A + tristate "MCR20A transceiver driver" + depends on IEEE802154_DRIVERS && MAC802154 + depends on SPI + ---help--- + Say Y here to enable the MCR20A SPI 802.15.4 wireless + controller. + + This driver can also be built as a module. To do so, say M here. + the module will be called 'mcr20a'. diff --git a/drivers/net/ieee802154/Makefile b/drivers/net/ieee802154/Makefile index bea1de5e726c..104744d5a668 100644 --- a/drivers/net/ieee802154/Makefile +++ b/drivers/net/ieee802154/Makefile @@ -6,3 +6,4 @@ obj-$(CONFIG_IEEE802154_CC2520) += cc2520.o obj-$(CONFIG_IEEE802154_ATUSB) += atusb.o obj-$(CONFIG_IEEE802154_ADF7242) += adf7242.o obj-$(CONFIG_IEEE802154_CA8210) += ca8210.o +obj-$(CONFIG_IEEE802154_MCR20A) += mcr20a.o diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c new file mode 100644 index 000000000000..d9eb22a52551 --- /dev/null +++ b/drivers/net/ieee802154/mcr20a.c @@ -0,0 +1,1413 @@ +/* + * Driver for NXP MCR20A 802.15.4 Wireless-PAN Networking controller + * + * Copyright (C) 2018 Xue Liu <liuxuenetmail@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/gpio.h> +#include <linux/spi/spi.h> +#include <linux/workqueue.h> +#include <linux/interrupt.h> +#include <linux/skbuff.h> +#include <linux/of_gpio.h> +#include <linux/regmap.h> +#include <linux/ieee802154.h> +#include <linux/debugfs.h> + +#include <net/mac802154.h> +#include <net/cfg802154.h> + +#include <linux/device.h> + +#include "mcr20a.h" + +#define SPI_COMMAND_BUFFER 3 + +#define REGISTER_READ BIT(7) +#define REGISTER_WRITE (0 << 7) +#define REGISTER_ACCESS (0 << 6) +#define PACKET_BUFF_BURST_ACCESS BIT(6) +#define PACKET_BUFF_BYTE_ACCESS BIT(5) + +#define MCR20A_WRITE_REG(x) (x) +#define MCR20A_READ_REG(x) (REGISTER_READ | (x)) +#define MCR20A_BURST_READ_PACKET_BUF (0xC0) +#define MCR20A_BURST_WRITE_PACKET_BUF (0x40) + +#define MCR20A_CMD_REG 0x80 +#define MCR20A_CMD_REG_MASK 0x3f +#define MCR20A_CMD_WRITE 0x40 +#define MCR20A_CMD_FB 0x20 + +/* Number of Interrupt Request Status Register */ +#define MCR20A_IRQSTS_NUM 2 /* only IRQ_STS1 and IRQ_STS2 */ + +/* MCR20A CCA Type */ +enum { + MCR20A_CCA_ED, // energy detect - CCA bit not active, + // not to be used for T and CCCA sequences + MCR20A_CCA_MODE1, // energy detect - CCA bit ACTIVE + MCR20A_CCA_MODE2, // 802.15.4 compliant signal detect - CCA bit ACTIVE + MCR20A_CCA_MODE3 +}; + +enum { + MCR20A_XCVSEQ_IDLE = 0x00, + MCR20A_XCVSEQ_RX = 0x01, + MCR20A_XCVSEQ_TX = 0x02, + MCR20A_XCVSEQ_CCA = 0x03, + MCR20A_XCVSEQ_TR = 0x04, + MCR20A_XCVSEQ_CCCA = 0x05, +}; + +/* IEEE-802.15.4 defined constants (2.4 GHz logical channels) */ +#define MCR20A_MIN_CHANNEL (11) +#define MCR20A_MAX_CHANNEL (26) +#define MCR20A_CHANNEL_SPACING (5) + +/* MCR20A CCA Threshold constans */ +#define MCR20A_MIN_CCA_THRESHOLD (0x6EU) +#define MCR20A_MAX_CCA_THRESHOLD (0x00U) + +/* version 0C */ +#define MCR20A_OVERWRITE_VERSION (0x0C) + +/* MCR20A PLL configurations */ +static const u8 PLL_INT[16] = { + /* 2405 */ 0x0B, /* 2410 */ 0x0B, /* 2415 */ 0x0B, + /* 2420 */ 0x0B, /* 2425 */ 0x0B, /* 2430 */ 0x0B, + /* 2435 */ 0x0C, /* 2440 */ 0x0C, /* 2445 */ 0x0C, + /* 2450 */ 0x0C, /* 2455 */ 0x0C, /* 2460 */ 0x0C, + /* 2465 */ 0x0D, /* 2470 */ 0x0D, /* 2475 */ 0x0D, + /* 2480 */ 0x0D +}; + +static const u8 PLL_FRAC[16] = { + /* 2405 */ 0x28, /* 2410 */ 0x50, /* 2415 */ 0x78, + /* 2420 */ 0xA0, /* 2425 */ 0xC8, /* 2430 */ 0xF0, + /* 2435 */ 0x18, /* 2440 */ 0x40, /* 2445 */ 0x68, + /* 2450 */ 0x90, /* 2455 */ 0xB8, /* 2460 */ 0xE0, + /* 2465 */ 0x08, /* 2470 */ 0x30, /* 2475 */ 0x58, + /* 2480 */ 0x80 +}; + +static const struct reg_sequence mar20a_iar_overwrites[] = { + { IAR_MISC_PAD_CTRL, 0x02 }, + { IAR_VCO_CTRL1, 0xB3 }, + { IAR_VCO_CTRL2, 0x07 }, + { IAR_PA_TUNING, 0x71 }, + { IAR_CHF_IBUF, 0x2F }, + { IAR_CHF_QBUF, 0x2F }, + { IAR_CHF_IRIN, 0x24 }, + { IAR_CHF_QRIN, 0x24 }, + { IAR_CHF_IL, 0x24 }, + { IAR_CHF_QL, 0x24 }, + { IAR_CHF_CC1, 0x32 }, + { IAR_CHF_CCL, 0x1D }, + { IAR_CHF_CC2, 0x2D }, + { IAR_CHF_IROUT, 0x24 }, + { IAR_CHF_QROUT, 0x24 }, + { IAR_PA_CAL, 0x28 }, + { IAR_AGC_THR1, 0x55 }, + { IAR_AGC_THR2, 0x2D }, + { IAR_ATT_RSSI1, 0x5F }, + { IAR_ATT_RSSI2, 0x8F }, + { IAR_RSSI_OFFSET, 0x61 }, + { IAR_CHF_PMA_GAIN, 0x03 }, + { IAR_CCA1_THRESH, 0x50 }, + { IAR_CORR_NVAL, 0x13 }, + { IAR_ACKDELAY, 0x3D }, +}; + +#define MCR20A_VALID_CHANNELS (0x07FFF800) + +struct mcr20a_platform_data { + int rst_gpio; +}; + +#define MCR20A_MAX_BUF (127) + +#define printdev(X) (&X->spi->dev) + +/* regmap information for Direct Access Register (DAR) access */ +#define MCR20A_DAR_WRITE 0x01 +#define MCR20A_DAR_READ 0x00 +#define MCR20A_DAR_NUMREGS 0x3F + +/* regmap information for Indirect Access Register (IAR) access */ +#define MCR20A_IAR_ACCESS 0x80 +#define MCR20A_IAR_NUMREGS 0xBEFF + +/* Read/Write SPI Commands for DAR and IAR registers. */ +#define MCR20A_READSHORT(reg) ((reg) << 1) +#define MCR20A_WRITESHORT(reg) ((reg) << 1 | 1) +#define MCR20A_READLONG(reg) (1 << 15 | (reg) << 5) +#define MCR20A_WRITELONG(reg) (1 << 15 | (reg) << 5 | 1 << 4) + +/* Type definitions for link configuration of instantiable layers */ +#define MCR20A_PHY_INDIRECT_QUEUE_SIZE (12) + +static bool +mcr20a_dar_writeable(struct device *dev, unsigned int reg) +{ + switch (reg) { + case DAR_IRQ_STS1: + case DAR_IRQ_STS2: + case DAR_IRQ_STS3: + case DAR_PHY_CTRL1: + case DAR_PHY_CTRL2: + case DAR_PHY_CTRL3: + case DAR_PHY_CTRL4: + case DAR_SRC_CTRL: + case DAR_SRC_ADDRS_SUM_LSB: + case DAR_SRC_ADDRS_SUM_MSB: + case DAR_T3CMP_LSB: + case DAR_T3CMP_MSB: + case DAR_T3CMP_USB: + case DAR_T2PRIMECMP_LSB: + case DAR_T2PRIMECMP_MSB: + case DAR_T1CMP_LSB: + case DAR_T1CMP_MSB: + case DAR_T1CMP_USB: + case DAR_T2CMP_LSB: + case DAR_T2CMP_MSB: + case DAR_T2CMP_USB: + case DAR_T4CMP_LSB: + case DAR_T4CMP_MSB: + case DAR_T4CMP_USB: + case DAR_PLL_INT0: + case DAR_PLL_FRAC0_LSB: + case DAR_PLL_FRAC0_MSB: + case DAR_PA_PWR: + /* no DAR_ACM */ + case DAR_OVERWRITE_VER: + case DAR_CLK_OUT_CTRL: + case DAR_PWR_MODES: + return true; + default: + return false; + } +} + +static bool +mcr20a_dar_readable(struct device *dev, unsigned int reg) +{ + bool rc; + + /* all writeable are also readable */ + rc = mcr20a_dar_writeable(dev, reg); + if (rc) + return rc; + + /* readonly regs */ + switch (reg) { + case DAR_RX_FRM_LEN: + case DAR_CCA1_ED_FNL: + case DAR_EVENT_TMR_LSB: + case DAR_EVENT_TMR_MSB: + case DAR_EVENT_TMR_USB: + case DAR_TIMESTAMP_LSB: + case DAR_TIMESTAMP_MSB: + case DAR_TIMESTAMP_USB: + case DAR_SEQ_STATE: + case DAR_LQI_VALUE: + case DAR_RSSI_CCA_CONT: + return true; + default: + return false; + } +} + +static bool +mcr20a_dar_volatile(struct device *dev, unsigned int reg) +{ + /* can be changed during runtime */ + switch (reg) { + case DAR_IRQ_STS1: + case DAR_IRQ_STS2: + case DAR_IRQ_STS3: + /* use them in spi_async and regmap so it's volatile */ + return true; + default: + return false; + } +} + +static bool +mcr20a_dar_precious(struct device *dev, unsigned int reg) +{ + /* don't clear irq line on read */ + switch (reg) { + case DAR_IRQ_STS1: + case DAR_IRQ_STS2: + case DAR_IRQ_STS3: + return true; + default: + return false; + } +} + +static const struct regmap_config mcr20a_dar_regmap = { + .name = "mcr20a_dar", + .reg_bits = 8, + .val_bits = 8, + .write_flag_mask = REGISTER_ACCESS | REGISTER_WRITE, + .read_flag_mask = REGISTER_ACCESS | REGISTER_READ, + .cache_type = REGCACHE_RBTREE, + .writeable_reg = mcr20a_dar_writeable, + .readable_reg = mcr20a_dar_readable, + .volatile_reg = mcr20a_dar_volatile, + .precious_reg = mcr20a_dar_precious, + .fast_io = true, + .can_multi_write = true, +}; + +static bool +mcr20a_iar_writeable(struct device *dev, unsigned int reg) +{ + switch (reg) { + case IAR_XTAL_TRIM: + case IAR_PMC_LP_TRIM: + case IAR_MACPANID0_LSB: + case IAR_MACPANID0_MSB: + case IAR_MACSHORTADDRS0_LSB: + case IAR_MACSHORTADDRS0_MSB: + case IAR_MACLONGADDRS0_0: + case IAR_MACLONGADDRS0_8: + case IAR_MACLONGADDRS0_16: + case IAR_MACLONGADDRS0_24: + case IAR_MACLONGADDRS0_32: + case IAR_MACLONGADDRS0_40: + case IAR_MACLONGADDRS0_48: + case IAR_MACLONGADDRS0_56: + case IAR_RX_FRAME_FILTER: + case IAR_PLL_INT1: + case IAR_PLL_FRAC1_LSB: + case IAR_PLL_FRAC1_MSB: + case IAR_MACPANID1_LSB: + case IAR_MACPANID1_MSB: + case IAR_MACSHORTADDRS1_LSB: + case IAR_MACSHORTADDRS1_MSB: + case IAR_MACLONGADDRS1_0: + case IAR_MACLONGADDRS1_8: + case IAR_MACLONGADDRS1_16: + case IAR_MACLONGADDRS1_24: + case IAR_MACLONGADDRS1_32: + case IAR_MACLONGADDRS1_40: + case IAR_MACLONGADDRS1_48: + case IAR_MACLONGADDRS1_56: + case IAR_DUAL_PAN_CTRL: + case IAR_DUAL_PAN_DWELL: + case IAR_CCA1_THRESH: + case IAR_CCA1_ED_OFFSET_COMP: + case IAR_LQI_OFFSET_COMP: + case IAR_CCA_CTRL: + case IAR_CCA2_CORR_PEAKS: + case IAR_CCA2_CORR_THRESH: + case IAR_TMR_PRESCALE: + case IAR_ANT_PAD_CTRL: + case IAR_MISC_PAD_CTRL: + case IAR_BSM_CTRL: + case IAR_RNG: + case IAR_RX_WTR_MARK: + case IAR_SOFT_RESET: + case IAR_TXDELAY: + case IAR_ACKDELAY: + case IAR_CORR_NVAL: + case IAR_ANT_AGC_CTRL: + case IAR_AGC_THR1: + case IAR_AGC_THR2: + case IAR_PA_CAL: + case IAR_ATT_RSSI1: + case IAR_ATT_RSSI2: + case IAR_RSSI_OFFSET: + case IAR_XTAL_CTRL: + case IAR_CHF_PMA_GAIN: + case IAR_CHF_IBUF: + case IAR_CHF_QBUF: + case IAR_CHF_IRIN: + case IAR_CHF_QRIN: + case IAR_CHF_IL: + case IAR_CHF_QL: + case IAR_CHF_CC1: + case IAR_CHF_CCL: + case IAR_CHF_CC2: + case IAR_CHF_IROUT: + case IAR_CHF_QROUT: + case IAR_PA_TUNING: + case IAR_VCO_CTRL1: + case IAR_VCO_CTRL2: + return true; + default: + return false; + } +} + +static bool +mcr20a_iar_readable(struct device *dev, unsigned int reg) +{ + bool rc; + + /* all writeable are also readable */ + rc = mcr20a_iar_writeable(dev, reg); + if (rc) + return rc; + + /* readonly regs */ + switch (reg) { + case IAR_PART_ID: + case IAR_DUAL_PAN_STS: + case IAR_RX_BYTE_COUNT: + case IAR_FILTERFAIL_CODE1: + case IAR_FILTERFAIL_CODE2: + case IAR_RSSI: + return true; + default: + return false; + } +} + +static bool +mcr20a_iar_volatile(struct device *dev, unsigned int reg) +{ +/* can be changed during runtime */ + switch (reg) { + case IAR_DUAL_PAN_STS: + case IAR_RX_BYTE_COUNT: + case IAR_FILTERFAIL_CODE1: + case IAR_FILTERFAIL_CODE2: + case IAR_RSSI: + return true; + default: + return false; + } +} + +static const struct regmap_config mcr20a_iar_regmap = { + .name = "mcr20a_iar", + .reg_bits = 16, + .val_bits = 8, + .write_flag_mask = REGISTER_ACCESS | REGISTER_WRITE | IAR_INDEX, + .read_flag_mask = REGISTER_ACCESS | REGISTER_READ | IAR_INDEX, + .cache_type = REGCACHE_RBTREE, + .writeable_reg = mcr20a_iar_writeable, + .readable_reg = mcr20a_iar_readable, + .volatile_reg = mcr20a_iar_volatile, + .fast_io = true, +}; + +struct mcr20a_local { + struct spi_device *spi; + + struct ieee802154_hw *hw; + struct mcr20a_platform_data *pdata; + struct regmap *regmap_dar; + struct regmap *regmap_iar; + + u8 *buf; + + bool is_tx; + + /* for writing tx buffer */ + struct spi_message tx_buf_msg; + u8 tx_header[1]; + /* burst buffer write command */ + struct spi_transfer tx_xfer_header; + u8 tx_len[1]; + /* len of tx packet */ + struct spi_transfer tx_xfer_len; + /* data of tx packet */ + struct spi_transfer tx_xfer_buf; + struct sk_buff *tx_skb; + + /* for read length rxfifo */ + struct spi_message reg_msg; + u8 reg_cmd[1]; + u8 reg_data[MCR20A_IRQSTS_NUM]; + struct spi_transfer reg_xfer_cmd; + struct spi_transfer reg_xfer_data; + + /* receive handling */ + struct spi_message rx_buf_msg; + u8 rx_header[1]; + struct spi_transfer rx_xfer_header; + u8 rx_lqi[1]; + struct spi_transfer rx_xfer_lqi; + u8 rx_buf[MCR20A_MAX_BUF]; + struct spi_transfer rx_xfer_buf; + + /* isr handling for reading intstat */ + struct spi_message irq_msg; + u8 irq_header[1]; + u8 irq_data[MCR20A_IRQSTS_NUM]; + struct spi_transfer irq_xfer_data; + struct spi_transfer irq_xfer_header; +}; + +static void +mcr20a_write_tx_buf_complete(void *context) +{ + struct mcr20a_local *lp = context; + int ret; + + dev_dbg(printdev(lp), "%s\n", __func__); + + lp->reg_msg.complete = NULL; + lp->reg_cmd[0] = MCR20A_WRITE_REG(DAR_PHY_CTRL1); + lp->reg_data[0] = MCR20A_XCVSEQ_TX; + lp->reg_xfer_data.len = 1; + + ret = spi_async(lp->spi, &lp->reg_msg); + if (ret) + dev_err(printdev(lp), "failed to set SEQ TX\n"); +} + +static int +mcr20a_xmit(struct ieee802154_hw *hw, struct sk_buff *skb) +{ + struct mcr20a_local *lp = hw->priv; + + dev_dbg(printdev(lp), "%s\n", __func__); + + lp->tx_skb = skb; + + print_hex_dump_debug("mcr20a tx: ", DUMP_PREFIX_OFFSET, 16, 1, + skb->data, skb->len, 0); + + lp->is_tx = 1; + + lp->reg_msg.complete = NULL; + lp->reg_cmd[0] = MCR20A_WRITE_REG(DAR_PHY_CTRL1); + lp->reg_data[0] = MCR20A_XCVSEQ_IDLE; + lp->reg_xfer_data.len = 1; + + return spi_async(lp->spi, &lp->reg_msg); +} + +static int +mcr20a_ed(struct ieee802154_hw *hw, u8 *level) +{ + WARN_ON(!level); + *level = 0xbe; + return 0; +} + +static int +mcr20a_set_channel(struct ieee802154_hw *hw, u8 page, u8 channel) +{ + struct mcr20a_local *lp = hw->priv; + int ret; + + dev_dbg(printdev(lp), "%s\n", __func__); + + /* freqency = ((PLL_INT+64) + (PLL_FRAC/65536)) * 32 MHz */ + ret = regmap_write(lp->regmap_dar, DAR_PLL_INT0, PLL_INT[channel - 11]); + if (ret) + return ret; + ret = regmap_write(lp->regmap_dar, DAR_PLL_FRAC0_LSB, 0x00); + if (ret) + return ret; + ret = regmap_write(lp->regmap_dar, DAR_PLL_FRAC0_MSB, + PLL_FRAC[channel - 11]); + if (ret) + return ret; + + return 0; +} + +static int +mcr20a_start(struct ieee802154_hw *hw) +{ + struct mcr20a_local *lp = hw->priv; + int ret; + + dev_dbg(printdev(lp), "%s\n", __func__); + + /* No slotted operation */ + dev_dbg(printdev(lp), "no slotted operation\n"); + ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1, + DAR_PHY_CTRL1_SLOTTED, 0x0); + + /* enable irq */ + enable_irq(lp->spi->irq); + + /* Unmask SEQ interrupt */ + ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL2, + DAR_PHY_CTRL2_SEQMSK, 0x0); + + /* Start the RX sequence */ + dev_dbg(printdev(lp), "start the RX sequence\n"); + ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1, + DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_RX); + + return 0; +} + +static void +mcr20a_stop(struct ieee802154_hw *hw) +{ + struct mcr20a_local *lp = hw->priv; + + dev_dbg(printdev(lp), "%s\n", __func__); + + /* stop all running sequence */ + regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1, + DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_IDLE); + + /* disable irq */ + disable_irq(lp->spi->irq); +} + +static int +mcr20a_set_hw_addr_filt(struct ieee802154_hw *hw, + struct ieee802154_hw_addr_filt *filt, + unsigned long changed) +{ + struct mcr20a_local *lp = hw->priv; + + dev_dbg(printdev(lp), "%s\n", __func__); + + if (changed & IEEE802154_AFILT_SADDR_CHANGED) { + u16 addr = le16_to_cpu(filt->short_addr); + + regmap_write(lp->regmap_iar, IAR_MACSHORTADDRS0_LSB, addr); + regmap_write(lp->regmap_iar, IAR_MACSHORTADDRS0_MSB, addr >> 8); + } + + if (changed & IEEE802154_AFILT_PANID_CHANGED) { + u16 pan = le16_to_cpu(filt->pan_id); + + regmap_write(lp->regmap_iar, IAR_MACPANID0_LSB, pan); + regmap_write(lp->regmap_iar, IAR_MACPANID0_MSB, pan >> 8); + } + + if (changed & IEEE802154_AFILT_IEEEADDR_CHANGED) { + u8 addr[8], i; + + memcpy(addr, &filt->ieee_addr, 8); + for (i = 0; i < 8; i++) + regmap_write(lp->regmap_iar, + IAR_MACLONGADDRS0_0 + i, addr[i]); + } + + if (changed & IEEE802154_AFILT_PANC_CHANGED) { + if (filt->pan_coord) { + regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4, + DAR_PHY_CTRL4_PANCORDNTR0, 0x10); + } else { + regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4, + DAR_PHY_CTRL4_PANCORDNTR0, 0x00); + } + } + + return 0; +} + +/* -30 dBm to 10 dBm */ +#define MCR20A_MAX_TX_POWERS 0x14 +static const s32 mcr20a_powers[MCR20A_MAX_TX_POWERS + 1] = { + -3000, -2800, -2600, -2400, -2200, -2000, -1800, -1600, -1400, + -1200, -1000, -800, -600, -400, -200, 0, 200, 400, 600, 800, 1000 +}; + +static int +mcr20a_set_txpower(struct ieee802154_hw *hw, s32 mbm) +{ + struct mcr20a_local *lp = hw->priv; + u32 i; + + dev_dbg(printdev(lp), "%s(%d)\n", __func__, mbm); + + for (i = 0; i < lp->hw->phy->supported.tx_powers_size; i++) { + if (lp->hw->phy->supported.tx_powers[i] == mbm) + return regmap_write(lp->regmap_dar, DAR_PA_PWR, + ((i + 8) & 0x1F)); + } + + return -EINVAL; +} + +#define MCR20A_MAX_ED_LEVELS MCR20A_MIN_CCA_THRESHOLD +static s32 mcr20a_ed_levels[MCR20A_MAX_ED_LEVELS + 1]; + +static int +mcr20a_set_cca_mode(struct ieee802154_hw *hw, + const struct wpan_phy_cca *cca) +{ + struct mcr20a_local *lp = hw->priv; + unsigned int cca_mode = 0xff; + bool cca_mode_and = false; + int ret; + + dev_dbg(printdev(lp), "%s\n", __func__); + + /* mapping 802.15.4 to driver spec */ + switch (cca->mode) { + case NL802154_CCA_ENERGY: + cca_mode = MCR20A_CCA_MODE1; + break; + case NL802154_CCA_CARRIER: + cca_mode = MCR20A_CCA_MODE2; + break; + case NL802154_CCA_ENERGY_CARRIER: + switch (cca->opt) { + case NL802154_CCA_OPT_ENERGY_CARRIER_AND: + cca_mode = MCR20A_CCA_MODE3; + cca_mode_and = true; + break; + case NL802154_CCA_OPT_ENERGY_CARRIER_OR: + cca_mode = MCR20A_CCA_MODE3; + cca_mode_and = false; + break; + default: + return -EINVAL; + } + break; + default: + return -EINVAL; + } + ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4, + DAR_PHY_CTRL4_CCATYPE_MASK, + cca_mode << DAR_PHY_CTRL4_CCATYPE_SHIFT); + if (ret < 0) + return ret; + + if (cca_mode == MCR20A_CCA_MODE3) { + if (cca_mode_and) { + ret = regmap_update_bits(lp->regmap_iar, IAR_CCA_CTRL, + IAR_CCA_CTRL_CCA3_AND_NOT_OR, + 0x08); + } else { + ret = regmap_update_bits(lp->regmap_iar, + IAR_CCA_CTRL, + IAR_CCA_CTRL_CCA3_AND_NOT_OR, + 0x00); + } + if (ret < 0) + return ret; + } + + return ret; +} + +static int +mcr20a_set_cca_ed_level(struct ieee802154_hw *hw, s32 mbm) +{ + struct mcr20a_local *lp = hw->priv; + u32 i; + + dev_dbg(printdev(lp), "%s\n", __func__); + + for (i = 0; i < hw->phy->supported.cca_ed_levels_size; i++) { + if (hw->phy->supported.cca_ed_levels[i] == mbm) + return regmap_write(lp->regmap_iar, IAR_CCA1_THRESH, i); + } + + return 0; +} + +static int +mcr20a_set_promiscuous_mode(struct ieee802154_hw *hw, const bool on) +{ + struct mcr20a_local *lp = hw->priv; + int ret; + u8 rx_frame_filter_reg = 0x0; + u8 val; + + dev_dbg(printdev(lp), "%s(%d)\n", __func__, on); + + if (on) { + /* All frame types accepted*/ + val |= DAR_PHY_CTRL4_PROMISCUOUS; + rx_frame_filter_reg &= ~(IAR_RX_FRAME_FLT_FRM_VER); + rx_frame_filter_reg |= (IAR_RX_FRAME_FLT_ACK_FT | + IAR_RX_FRAME_FLT_NS_FT); + + ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4, + DAR_PHY_CTRL4_PROMISCUOUS, + DAR_PHY_CTRL4_PROMISCUOUS); + if (ret < 0) + return ret; + + ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER, + rx_frame_filter_reg); + if (ret < 0) + return ret; + } else { + ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL4, + DAR_PHY_CTRL4_PROMISCUOUS, 0x0); + if (ret < 0) + return ret; + + ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER, + IAR_RX_FRAME_FLT_FRM_VER | + IAR_RX_FRAME_FLT_BEACON_FT | + IAR_RX_FRAME_FLT_DATA_FT | + IAR_RX_FRAME_FLT_CMD_FT); + if (ret < 0) + return ret; + } + + return 0; +} + +static const struct ieee802154_ops mcr20a_hw_ops = { + .owner = THIS_MODULE, + .xmit_async = mcr20a_xmit, + .ed = mcr20a_ed, + .set_channel = mcr20a_set_channel, + .start = mcr20a_start, + .stop = mcr20a_stop, + .set_hw_addr_filt = mcr20a_set_hw_addr_filt, + .set_txpower = mcr20a_set_txpower, + .set_cca_mode = mcr20a_set_cca_mode, + .set_cca_ed_level = mcr20a_set_cca_ed_level, + .set_promiscuous_mode = mcr20a_set_promiscuous_mode, +}; + +static int +mcr20a_request_rx(struct mcr20a_local *lp) +{ + dev_dbg(printdev(lp), "%s\n", __func__); + + /* Start the RX sequence */ + regmap_update_bits_async(lp->regmap_dar, DAR_PHY_CTRL1, + DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_RX); + + return 0; +} + +static void +mcr20a_handle_rx_read_buf_complete(void *context) +{ + struct mcr20a_local *lp = context; + u8 len = lp->reg_data[0] & DAR_RX_FRAME_LENGTH_MASK; + struct sk_buff *skb; + + dev_dbg(printdev(lp), "%s\n", __func__); + + dev_dbg(printdev(lp), "RX is done\n"); + + if (!ieee802154_is_valid_psdu_len(len)) { + dev_vdbg(&lp->spi->dev, "corrupted frame received\n"); + len = IEEE802154_MTU; + } + + len = len - 2; /* get rid of frame check field */ + + skb = dev_alloc_skb(len); + if (!skb) + return; + + memcpy(skb_put(skb, len), lp->rx_buf, len); + ieee802154_rx_irqsafe(lp->hw, skb, lp->rx_lqi[0]); + + print_hex_dump_debug("mcr20a rx: ", DUMP_PREFIX_OFFSET, 16, 1, + lp->rx_buf, len, 0); + pr_debug("mcr20a rx: lqi: %02hhx\n", lp->rx_lqi[0]); + + /* start RX sequence */ + mcr20a_request_rx(lp); +} + +static void +mcr20a_handle_rx_read_len_complete(void *context) +{ + struct mcr20a_local *lp = context; + u8 len; + int ret; + + dev_dbg(printdev(lp), "%s\n", __func__); + + /* get the length of received frame */ + len = lp->reg_data[0] & DAR_RX_FRAME_LENGTH_MASK; + dev_dbg(printdev(lp), "frame len : %d\n", len); + + /* prepare to read the rx buf */ + lp->rx_buf_msg.complete = mcr20a_handle_rx_read_buf_complete; + lp->rx_header[0] = MCR20A_BURST_READ_PACKET_BUF; + lp->rx_xfer_buf.len = len; + + ret = spi_async(lp->spi, &lp->rx_buf_msg); + if (ret) + dev_err(printdev(lp), "failed to read rx buffer length\n"); +} + +static int +mcr20a_handle_rx(struct mcr20a_local *lp) +{ + dev_dbg(printdev(lp), "%s\n", __func__); + lp->reg_msg.complete = mcr20a_handle_rx_read_len_complete; + lp->reg_cmd[0] = MCR20A_READ_REG(DAR_RX_FRM_LEN); + lp->reg_xfer_data.len = 1; + + return spi_async(lp->spi, &lp->reg_msg); +} + +static int +mcr20a_handle_tx_complete(struct mcr20a_local *lp) +{ + dev_dbg(printdev(lp), "%s\n", __func__); + + ieee802154_xmit_complete(lp->hw, lp->tx_skb, false); + + return mcr20a_request_rx(lp); +} + +static int +mcr20a_handle_tx(struct mcr20a_local *lp) +{ + int ret; + + dev_dbg(printdev(lp), "%s\n", __func__); + + /* write tx buffer */ + lp->tx_header[0] = MCR20A_BURST_WRITE_PACKET_BUF; + /* add 2 bytes of FCS */ + lp->tx_len[0] = lp->tx_skb->len + 2; + lp->tx_xfer_buf.tx_buf = lp->tx_skb->data; + /* add 1 byte psduLength */ + lp->tx_xfer_buf.len = lp->tx_skb->len + 1; + + ret = spi_async(lp->spi, &lp->tx_buf_msg); + if (ret) { + dev_err(printdev(lp), "SPI write Failed for TX buf\n"); + return ret; + } + + return 0; +} + +static void +mcr20a_irq_clean_complete(void *context) +{ + struct mcr20a_local *lp = context; + u8 seq_state = lp->irq_data[DAR_IRQ_STS1] & DAR_PHY_CTRL1_XCVSEQ_MASK; + + dev_dbg(printdev(lp), "%s\n", __func__); + + enable_irq(lp->spi->irq); + + dev_dbg(printdev(lp), "IRQ STA1 (%02x) STA2 (%02x)\n", + lp->irq_data[DAR_IRQ_STS1], lp->irq_data[DAR_IRQ_STS2]); + + switch (seq_state) { + /* TX IRQ, RX IRQ and SEQ IRQ */ + case (0x03): + if (lp->is_tx) { + lp->is_tx = 0; + dev_dbg(printdev(lp), "TX is done. No ACK\n"); + mcr20a_handle_tx_complete(lp); + } + break; + case (0x05): + /* rx is starting */ + dev_dbg(printdev(lp), "RX is starting\n"); + mcr20a_handle_rx(lp); + break; + case (0x07): + if (lp->is_tx) { + /* tx is done */ + lp->is_tx = 0; + dev_dbg(printdev(lp), "TX is done. Get ACK\n"); + mcr20a_handle_tx_complete(lp); + } else { + /* rx is starting */ + dev_dbg(printdev(lp), "RX is starting\n"); + mcr20a_handle_rx(lp); + } + break; + case (0x01): + if (lp->is_tx) { + dev_dbg(printdev(lp), "TX is starting\n"); + mcr20a_handle_tx(lp); + } else { + dev_dbg(printdev(lp), "MCR20A is stop\n"); + } + break; + } +} + +static void mcr20a_irq_status_complete(void *context) +{ + int ret; + struct mcr20a_local *lp = context; + + dev_dbg(printdev(lp), "%s\n", __func__); + regmap_update_bits_async(lp->regmap_dar, DAR_PHY_CTRL1, + DAR_PHY_CTRL1_XCVSEQ_MASK, MCR20A_XCVSEQ_IDLE); + + lp->reg_msg.complete = mcr20a_irq_clean_complete; + lp->reg_cmd[0] = MCR20A_WRITE_REG(DAR_IRQ_STS1); + memcpy(lp->reg_data, lp->irq_data, MCR20A_IRQSTS_NUM); + lp->reg_xfer_data.len = MCR20A_IRQSTS_NUM; + + ret = spi_async(lp->spi, &lp->reg_msg); + + if (ret) + dev_err(printdev(lp), "failed to clean irq status\n"); +} + +static irqreturn_t mcr20a_irq_isr(int irq, void *data) +{ + struct mcr20a_local *lp = data; + int ret; + + disable_irq_nosync(irq); + + lp->irq_header[0] = MCR20A_READ_REG(DAR_IRQ_STS1); + /* read IRQSTSx */ + ret = spi_async(lp->spi, &lp->irq_msg); + if (ret) { + enable_irq(irq); + return IRQ_NONE; + } + + return IRQ_HANDLED; +} + +static int mcr20a_get_platform_data(struct spi_device *spi, + struct mcr20a_platform_data *pdata) +{ + int ret = 0; + + if (!spi->dev.of_node) + return -EINVAL; + + pdata->rst_gpio = of_get_named_gpio(spi->dev.of_node, "rst_b-gpio", 0); + dev_dbg(&spi->dev, "rst_b-gpio: %d\n", pdata->rst_gpio); + + return ret; +} + +static void mcr20a_hw_setup(struct mcr20a_local *lp) +{ + u8 i; + struct ieee802154_hw *hw = lp->hw; + struct wpan_phy *phy = lp->hw->phy; + + dev_dbg(printdev(lp), "%s\n", __func__); + + phy->symbol_duration = 16; + phy->lifs_period = 40; + phy->sifs_period = 12; + + hw->flags = IEEE802154_HW_TX_OMIT_CKSUM | + IEEE802154_HW_AFILT | + IEEE802154_HW_PROMISCUOUS; + + phy->flags = WPAN_PHY_FLAG_TXPOWER | WPAN_PHY_FLAG_CCA_ED_LEVEL | + WPAN_PHY_FLAG_CCA_MODE; + + phy->supported.cca_modes = BIT(NL802154_CCA_ENERGY) | + BIT(NL802154_CCA_CARRIER) | BIT(NL802154_CCA_ENERGY_CARRIER); + phy->supported.cca_opts = BIT(NL802154_CCA_OPT_ENERGY_CARRIER_AND) | + BIT(NL802154_CCA_OPT_ENERGY_CARRIER_OR); + + /* initiating cca_ed_levels */ + for (i = MCR20A_MAX_CCA_THRESHOLD; i < MCR20A_MIN_CCA_THRESHOLD + 1; + ++i) { + mcr20a_ed_levels[i] = -i * 100; + } + + phy->supported.cca_ed_levels = mcr20a_ed_levels; + phy->supported.cca_ed_levels_size = ARRAY_SIZE(mcr20a_ed_levels); + + phy->cca.mode = NL802154_CCA_ENERGY; + + phy->supported.channels[0] = MCR20A_VALID_CHANNELS; + phy->current_page = 0; + /* MCR20A default reset value */ + phy->current_channel = 20; + phy->symbol_duration = 16; + phy->supported.tx_powers = mcr20a_powers; + phy->supported.tx_powers_size = ARRAY_SIZE(mcr20a_powers); + phy->cca_ed_level = phy->supported.cca_ed_levels[75]; + phy->transmit_power = phy->supported.tx_powers[0x0F]; +} + +static void +mcr20a_setup_tx_spi_messages(struct mcr20a_local *lp) +{ + spi_message_init(&lp->tx_buf_msg); + lp->tx_buf_msg.context = lp; + lp->tx_buf_msg.complete = mcr20a_write_tx_buf_complete; + + lp->tx_xfer_header.len = 1; + lp->tx_xfer_header.tx_buf = lp->tx_header; + + lp->tx_xfer_len.len = 1; + lp->tx_xfer_len.tx_buf = lp->tx_len; + + spi_message_add_tail(&lp->tx_xfer_header, &lp->tx_buf_msg); + spi_message_add_tail(&lp->tx_xfer_len, &lp->tx_buf_msg); + spi_message_add_tail(&lp->tx_xfer_buf, &lp->tx_buf_msg); +} + +static void +mcr20a_setup_rx_spi_messages(struct mcr20a_local *lp) +{ + spi_message_init(&lp->reg_msg); + lp->reg_msg.context = lp; + + lp->reg_xfer_cmd.len = 1; + lp->reg_xfer_cmd.tx_buf = lp->reg_cmd; + lp->reg_xfer_cmd.rx_buf = lp->reg_cmd; + + lp->reg_xfer_data.rx_buf = lp->reg_data; + lp->reg_xfer_data.tx_buf = lp->reg_data; + + spi_message_add_tail(&lp->reg_xfer_cmd, &lp->reg_msg); + spi_message_add_tail(&lp->reg_xfer_data, &lp->reg_msg); + + spi_message_init(&lp->rx_buf_msg); + lp->rx_buf_msg.context = lp; + lp->rx_buf_msg.complete = mcr20a_handle_rx_read_buf_complete; + lp->rx_xfer_header.len = 1; + lp->rx_xfer_header.tx_buf = lp->rx_header; + lp->rx_xfer_header.rx_buf = lp->rx_header; + + lp->rx_xfer_buf.rx_buf = lp->rx_buf; + + lp->rx_xfer_lqi.len = 1; + lp->rx_xfer_lqi.rx_buf = lp->rx_lqi; + + spi_message_add_tail(&lp->rx_xfer_header, &lp->rx_buf_msg); + spi_message_add_tail(&lp->rx_xfer_buf, &lp->rx_buf_msg); + spi_message_add_tail(&lp->rx_xfer_lqi, &lp->rx_buf_msg); +} + +static void +mcr20a_setup_irq_spi_messages(struct mcr20a_local *lp) +{ + spi_message_init(&lp->irq_msg); + lp->irq_msg.context = lp; + lp->irq_msg.complete = mcr20a_irq_status_complete; + lp->irq_xfer_header.len = 1; + lp->irq_xfer_header.tx_buf = lp->irq_header; + lp->irq_xfer_header.rx_buf = lp->irq_header; + + lp->irq_xfer_data.len = MCR20A_IRQSTS_NUM; + lp->irq_xfer_data.rx_buf = lp->irq_data; + + spi_message_add_tail(&lp->irq_xfer_header, &lp->irq_msg); + spi_message_add_tail(&lp->irq_xfer_data, &lp->irq_msg); +} + +static int +mcr20a_phy_init(struct mcr20a_local *lp) +{ + u8 index; + unsigned int phy_reg = 0; + int ret; + + dev_dbg(printdev(lp), "%s\n", __func__); + + /* Disable Tristate on COCO MISO for SPI reads */ + ret = regmap_write(lp->regmap_iar, IAR_MISC_PAD_CTRL, 0x02); + if (ret) + goto err_ret; + + /* Clear all PP IRQ bits in IRQSTS1 to avoid unexpected interrupts + * immediately after init + */ + ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS1, 0xEF); + if (ret) + goto err_ret; + + /* Clear all PP IRQ bits in IRQSTS2 */ + ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS2, + DAR_IRQSTS2_ASM_IRQ | DAR_IRQSTS2_PB_ERR_IRQ | + DAR_IRQSTS2_WAKE_IRQ); + if (ret) + goto err_ret; + + /* Disable all timer interrupts */ + ret = regmap_write(lp->regmap_dar, DAR_IRQ_STS3, 0xFF); + if (ret) + goto err_ret; + + /* PHY_CTRL1 : default HW settings + AUTOACK enabled */ + ret = regmap_update_bits(lp->regmap_dar, DAR_PHY_CTRL1, + DAR_PHY_CTRL1_AUTOACK, DAR_PHY_CTRL1_AUTOACK); + + /* PHY_CTRL2 : disable all interrupts */ + ret = regmap_write(lp->regmap_dar, DAR_PHY_CTRL2, 0xFF); + if (ret) + goto err_ret; + + /* PHY_CTRL3 : disable all timers and remaining interrupts */ + ret = regmap_write(lp->regmap_dar, DAR_PHY_CTRL3, + DAR_PHY_CTRL3_ASM_MSK | DAR_PHY_CTRL3_PB_ERR_MSK | + DAR_PHY_CTRL3_WAKE_MSK); + if (ret) + goto err_ret; + + /* SRC_CTRL : enable Acknowledge Frame Pending and + * Source Address Matching Enable + */ + ret = regmap_write(lp->regmap_dar, DAR_SRC_CTRL, + DAR_SRC_CTRL_ACK_FRM_PND | + (DAR_SRC_CTRL_INDEX << DAR_SRC_CTRL_INDEX_SHIFT)); + if (ret) + goto err_ret; + + /* RX_FRAME_FILTER */ + /* FRM_VER[1:0] = b11. Accept FrameVersion 0 and 1 packets */ + ret = regmap_write(lp->regmap_iar, IAR_RX_FRAME_FILTER, + IAR_RX_FRAME_FLT_FRM_VER | + IAR_RX_FRAME_FLT_BEACON_FT | + IAR_RX_FRAME_FLT_DATA_FT | + IAR_RX_FRAME_FLT_CMD_FT); + if (ret) + goto err_ret; + + dev_info(printdev(lp), "MCR20A DAR overwrites version: 0x%02x\n", + MCR20A_OVERWRITE_VERSION); + + /* Overwrites direct registers */ + ret = regmap_write(lp->regmap_dar, DAR_OVERWRITE_VER, + MCR20A_OVERWRITE_VERSION); + if (ret) + goto err_ret; + + /* Overwrites indirect registers */ + ret = regmap_multi_reg_write(lp->regmap_iar, mar20a_iar_overwrites, + ARRAY_SIZE(mar20a_iar_overwrites)); + if (ret) + goto err_ret; + + /* Clear HW indirect queue */ + dev_dbg(printdev(lp), "clear HW indirect queue\n"); + for (index = 0; index < MCR20A_PHY_INDIRECT_QUEUE_SIZE; index++) { + phy_reg = (u8)(((index & DAR_SRC_CTRL_INDEX) << + DAR_SRC_CTRL_INDEX_SHIFT) + | (DAR_SRC_CTRL_SRCADDR_EN) + | (DAR_SRC_CTRL_INDEX_DISABLE)); + ret = regmap_write(lp->regmap_dar, DAR_SRC_CTRL, phy_reg); + if (ret) + goto err_ret; + phy_reg = 0; + } + + /* Assign HW Indirect hash table to PAN0 */ + ret = regmap_read(lp->regmap_iar, IAR_DUAL_PAN_CTRL, &phy_reg); + if (ret) + goto err_ret; + + /* Clear current lvl */ + phy_reg &= ~IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_MSK; + + /* Set new lvl */ + phy_reg |= MCR20A_PHY_INDIRECT_QUEUE_SIZE << + IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_SHIFT; + ret = regmap_write(lp->regmap_iar, IAR_DUAL_PAN_CTRL, phy_reg); + if (ret) + goto err_ret; + + /* Set CCA threshold to -75 dBm */ + ret = regmap_write(lp->regmap_iar, IAR_CCA1_THRESH, 0x4B); + if (ret) + goto err_ret; + + /* Set prescaller to obtain 1 symbol (16us) timebase */ + ret = regmap_write(lp->regmap_iar, IAR_TMR_PRESCALE, 0x05); + if (ret) + goto err_ret; + + /* Enable autodoze mode. */ + ret = regmap_update_bits(lp->regmap_dar, DAR_PWR_MODES, + DAR_PWR_MODES_AUTODOZE, + DAR_PWR_MODES_AUTODOZE); + if (ret) + goto err_ret; + + /* Disable clk_out */ + ret = regmap_update_bits(lp->regmap_dar, DAR_CLK_OUT_CTRL, + DAR_CLK_OUT_CTRL_EN, 0x0); + if (ret) + goto err_ret; + + return 0; + +err_ret: + return ret; +} + +static int +mcr20a_probe(struct spi_device *spi) +{ + struct ieee802154_hw *hw; + struct mcr20a_local *lp; + struct mcr20a_platform_data *pdata; + int irq_type; + int ret = -ENOMEM; + + dev_dbg(&spi->dev, "%s\n", __func__); + + if (!spi->irq) { + dev_err(&spi->dev, "no IRQ specified\n"); + return -EINVAL; + } + + pdata = kmalloc(sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return -ENOMEM; + + /* set mcr20a platform data */ + ret = mcr20a_get_platform_data(spi, pdata); + if (ret < 0) { + dev_crit(&spi->dev, "mcr20a_get_platform_data failed.\n"); + return ret; + } + + /* init reset gpio */ + if (gpio_is_valid(pdata->rst_gpio)) { + ret = devm_gpio_request_one(&spi->dev, pdata->rst_gpio, + GPIOF_OUT_INIT_HIGH, "reset"); + if (ret) + return ret; + } + + /* reset mcr20a */ + if (gpio_is_valid(pdata->rst_gpio)) { + usleep_range(10, 20); + gpio_set_value_cansleep(pdata->rst_gpio, 0); + usleep_range(10, 20); + gpio_set_value_cansleep(pdata->rst_gpio, 1); + usleep_range(120, 240); + } + + /* allocate ieee802154_hw and private data */ + hw = ieee802154_alloc_hw(sizeof(*lp), &mcr20a_hw_ops); + if (!hw) { + dev_crit(&spi->dev, "ieee802154_alloc_hw failed\n"); + return -ENOMEM; + } + + /* init mcr20a local data */ + lp = hw->priv; + lp->hw = hw; + lp->spi = spi; + lp->spi->dev.platform_data = pdata; + lp->pdata = pdata; + + /* init ieee802154_hw */ + hw->parent = &spi->dev; + ieee802154_random_extended_addr(&hw->phy->perm_extended_addr); + + /* init buf */ + lp->buf = devm_kzalloc(&spi->dev, SPI_COMMAND_BUFFER, GFP_KERNEL); + + if (!lp->buf) + return -ENOMEM; + + mcr20a_setup_tx_spi_messages(lp); + mcr20a_setup_rx_spi_messages(lp); + mcr20a_setup_irq_spi_messages(lp); + + /* setup regmap */ + lp->regmap_dar = devm_regmap_init_spi(spi, &mcr20a_dar_regmap); + if (IS_ERR(lp->regmap_dar)) { + ret = PTR_ERR(lp->regmap_dar); + dev_err(&spi->dev, "Failed to allocate dar map: %d\n", + ret); + goto free_dev; + } + + lp->regmap_iar = devm_regmap_init_spi(spi, &mcr20a_iar_regmap); + if (IS_ERR(lp->regmap_iar)) { + ret = PTR_ERR(lp->regmap_iar); + dev_err(&spi->dev, "Failed to allocate iar map: %d\n", ret); + goto free_dev; + } + + mcr20a_hw_setup(lp); + + spi_set_drvdata(spi, lp); + + ret = mcr20a_phy_init(lp); + if (ret < 0) { + dev_crit(&spi->dev, "mcr20a_phy_init failed\n"); + goto free_dev; + } + + irq_type = irq_get_trigger_type(spi->irq); + if (!irq_type) + irq_type = IRQF_TRIGGER_FALLING; + + ret = devm_request_irq(&spi->dev, spi->irq, mcr20a_irq_isr, + irq_type, dev_name(&spi->dev), lp); + if (ret) { + dev_err(&spi->dev, "could not request_irq for mcr20a\n"); + ret = -ENODEV; + goto free_dev; + } + + /* disable_irq by default and wait for starting hardware */ + disable_irq(spi->irq); + + ret = ieee802154_register_hw(hw); + if (ret) { + dev_crit(&spi->dev, "ieee802154_register_hw failed\n"); + goto free_dev; + } + + return ret; + +free_dev: + ieee802154_free_hw(lp->hw); + + return ret; +} + +static int mcr20a_remove(struct spi_device *spi) +{ + struct mcr20a_local *lp = spi_get_drvdata(spi); + + dev_dbg(&spi->dev, "%s\n", __func__); + + ieee802154_unregister_hw(lp->hw); + ieee802154_free_hw(lp->hw); + + return 0; +} + +static const struct of_device_id mcr20a_of_match[] = { + { .compatible = "nxp,mcr20a", }, + { }, +}; +MODULE_DEVICE_TABLE(of, mcr20a_of_match); + +static const struct spi_device_id mcr20a_device_id[] = { + { .name = "mcr20a", }, + { }, +}; +MODULE_DEVICE_TABLE(spi, mcr20a_device_id); + +static struct spi_driver mcr20a_driver = { + .id_table = mcr20a_device_id, + .driver = { + .of_match_table = of_match_ptr(mcr20a_of_match), + .name = "mcr20a", + }, + .probe = mcr20a_probe, + .remove = mcr20a_remove, +}; + +module_spi_driver(mcr20a_driver); + +MODULE_DESCRIPTION("MCR20A Transceiver Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Xue Liu <liuxuenetmail@gmail>"); diff --git a/drivers/net/ieee802154/mcr20a.h b/drivers/net/ieee802154/mcr20a.h new file mode 100644 index 000000000000..6da4fd00b3c5 --- /dev/null +++ b/drivers/net/ieee802154/mcr20a.h @@ -0,0 +1,498 @@ +/* + * Driver for NXP MCR20A 802.15.4 Wireless-PAN Networking controller + * + * Copyright (C) 2018 Xue Liu <liuxuenetmail@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef _MCR20A_H +#define _MCR20A_H + +/* Direct Accress Register */ +#define DAR_IRQ_STS1 0x00 +#define DAR_IRQ_STS2 0x01 +#define DAR_IRQ_STS3 0x02 +#define DAR_PHY_CTRL1 0x03 +#define DAR_PHY_CTRL2 0x04 +#define DAR_PHY_CTRL3 0x05 +#define DAR_RX_FRM_LEN 0x06 +#define DAR_PHY_CTRL4 0x07 +#define DAR_SRC_CTRL 0x08 +#define DAR_SRC_ADDRS_SUM_LSB 0x09 +#define DAR_SRC_ADDRS_SUM_MSB 0x0A +#define DAR_CCA1_ED_FNL 0x0B +#define DAR_EVENT_TMR_LSB 0x0C +#define DAR_EVENT_TMR_MSB 0x0D +#define DAR_EVENT_TMR_USB 0x0E +#define DAR_TIMESTAMP_LSB 0x0F +#define DAR_TIMESTAMP_MSB 0x10 +#define DAR_TIMESTAMP_USB 0x11 +#define DAR_T3CMP_LSB 0x12 +#define DAR_T3CMP_MSB 0x13 +#define DAR_T3CMP_USB 0x14 +#define DAR_T2PRIMECMP_LSB 0x15 +#define DAR_T2PRIMECMP_MSB 0x16 +#define DAR_T1CMP_LSB 0x17 +#define DAR_T1CMP_MSB 0x18 +#define DAR_T1CMP_USB 0x19 +#define DAR_T2CMP_LSB 0x1A +#define DAR_T2CMP_MSB 0x1B +#define DAR_T2CMP_USB 0x1C +#define DAR_T4CMP_LSB 0x1D +#define DAR_T4CMP_MSB 0x1E +#define DAR_T4CMP_USB 0x1F +#define DAR_PLL_INT0 0x20 +#define DAR_PLL_FRAC0_LSB 0x21 +#define DAR_PLL_FRAC0_MSB 0x22 +#define DAR_PA_PWR 0x23 +#define DAR_SEQ_STATE 0x24 +#define DAR_LQI_VALUE 0x25 +#define DAR_RSSI_CCA_CONT 0x26 +/*------------------ 0x27 */ +#define DAR_ASM_CTRL1 0x28 +#define DAR_ASM_CTRL2 0x29 +#define DAR_ASM_DATA_0 0x2A +#define DAR_ASM_DATA_1 0x2B +#define DAR_ASM_DATA_2 0x2C +#define DAR_ASM_DATA_3 0x2D +#define DAR_ASM_DATA_4 0x2E +#define DAR_ASM_DATA_5 0x2F +#define DAR_ASM_DATA_6 0x30 +#define DAR_ASM_DATA_7 0x31 +#define DAR_ASM_DATA_8 0x32 +#define DAR_ASM_DATA_9 0x33 +#define DAR_ASM_DATA_A 0x34 +#define DAR_ASM_DATA_B 0x35 +#define DAR_ASM_DATA_C 0x36 +#define DAR_ASM_DATA_D 0x37 +#define DAR_ASM_DATA_E 0x38 +#define DAR_ASM_DATA_F 0x39 +/*----------------------- 0x3A */ +#define DAR_OVERWRITE_VER 0x3B +#define DAR_CLK_OUT_CTRL 0x3C +#define DAR_PWR_MODES 0x3D +#define IAR_INDEX 0x3E +#define IAR_DATA 0x3F + +/* Indirect Resgister Memory */ +#define IAR_PART_ID 0x00 +#define IAR_XTAL_TRIM 0x01 +#define IAR_PMC_LP_TRIM 0x02 +#define IAR_MACPANID0_LSB 0x03 +#define IAR_MACPANID0_MSB 0x04 +#define IAR_MACSHORTADDRS0_LSB 0x05 +#define IAR_MACSHORTADDRS0_MSB 0x06 +#define IAR_MACLONGADDRS0_0 0x07 +#define IAR_MACLONGADDRS0_8 0x08 +#define IAR_MACLONGADDRS0_16 0x09 +#define IAR_MACLONGADDRS0_24 0x0A +#define IAR_MACLONGADDRS0_32 0x0B +#define IAR_MACLONGADDRS0_40 0x0C +#define IAR_MACLONGADDRS0_48 0x0D +#define IAR_MACLONGADDRS0_56 0x0E +#define IAR_RX_FRAME_FILTER 0x0F +#define IAR_PLL_INT1 0x10 +#define IAR_PLL_FRAC1_LSB 0x11 +#define IAR_PLL_FRAC1_MSB 0x12 +#define IAR_MACPANID1_LSB 0x13 +#define IAR_MACPANID1_MSB 0x14 +#define IAR_MACSHORTADDRS1_LSB 0x15 +#define IAR_MACSHORTADDRS1_MSB 0x16 +#define IAR_MACLONGADDRS1_0 0x17 +#define IAR_MACLONGADDRS1_8 0x18 +#define IAR_MACLONGADDRS1_16 0x19 +#define IAR_MACLONGADDRS1_24 0x1A +#define IAR_MACLONGADDRS1_32 0x1B +#define IAR_MACLONGADDRS1_40 0x1C +#define IAR_MACLONGADDRS1_48 0x1D +#define IAR_MACLONGADDRS1_56 0x1E +#define IAR_DUAL_PAN_CTRL 0x1F +#define IAR_DUAL_PAN_DWELL 0x20 +#define IAR_DUAL_PAN_STS 0x21 +#define IAR_CCA1_THRESH 0x22 +#define IAR_CCA1_ED_OFFSET_COMP 0x23 +#define IAR_LQI_OFFSET_COMP 0x24 +#define IAR_CCA_CTRL 0x25 +#define IAR_CCA2_CORR_PEAKS 0x26 +#define IAR_CCA2_CORR_THRESH 0x27 +#define IAR_TMR_PRESCALE 0x28 +/*-------------------- 0x29 */ +#define IAR_GPIO_DATA 0x2A +#define IAR_GPIO_DIR 0x2B +#define IAR_GPIO_PUL_EN 0x2C +#define IAR_GPIO_PUL_SEL 0x2D +#define IAR_GPIO_DS 0x2E +/*------------------ 0x2F */ +#define IAR_ANT_PAD_CTRL 0x30 +#define IAR_MISC_PAD_CTRL 0x31 +#define IAR_BSM_CTRL 0x32 +/*------------------- 0x33 */ +#define IAR_RNG 0x34 +#define IAR_RX_BYTE_COUNT 0x35 +#define IAR_RX_WTR_MARK 0x36 +#define IAR_SOFT_RESET 0x37 +#define IAR_TXDELAY 0x38 +#define IAR_ACKDELAY 0x39 +#define IAR_SEQ_MGR_CTRL 0x3A +#define IAR_SEQ_MGR_STS 0x3B +#define IAR_SEQ_T_STS 0x3C +#define IAR_ABORT_STS 0x3D +#define IAR_CCCA_BUSY_CNT 0x3E +#define IAR_SRC_ADDR_CHECKSUM1 0x3F +#define IAR_SRC_ADDR_CHECKSUM2 0x40 +#define IAR_SRC_TBL_VALID1 0x41 +#define IAR_SRC_TBL_VALID2 0x42 +#define IAR_FILTERFAIL_CODE1 0x43 +#define IAR_FILTERFAIL_CODE2 0x44 +#define IAR_SLOT_PRELOAD 0x45 +/*-------------------- 0x46 */ +#define IAR_CORR_VT 0x47 +#define IAR_SYNC_CTRL 0x48 +#define IAR_PN_LSB_0 0x49 +#define IAR_PN_LSB_1 0x4A +#define IAR_PN_MSB_0 0x4B +#define IAR_PN_MSB_1 0x4C +#define IAR_CORR_NVAL 0x4D +#define IAR_TX_MODE_CTRL 0x4E +#define IAR_SNF_THR 0x4F +#define IAR_FAD_THR 0x50 +#define IAR_ANT_AGC_CTRL 0x51 +#define IAR_AGC_THR1 0x52 +#define IAR_AGC_THR2 0x53 +#define IAR_AGC_HYS 0x54 +#define IAR_AFC 0x55 +/*------------------- 0x56 */ +/*------------------- 0x57 */ +#define IAR_PHY_STS 0x58 +#define IAR_RX_MAX_CORR 0x59 +#define IAR_RX_MAX_PREAMBLE 0x5A +#define IAR_RSSI 0x5B +/*------------------- 0x5C */ +/*------------------- 0x5D */ +#define IAR_PLL_DIG_CTRL 0x5E +#define IAR_VCO_CAL 0x5F +#define IAR_VCO_BEST_DIFF 0x60 +#define IAR_VCO_BIAS 0x61 +#define IAR_KMOD_CTRL 0x62 +#define IAR_KMOD_CAL 0x63 +#define IAR_PA_CAL 0x64 +#define IAR_PA_PWRCAL 0x65 +#define IAR_ATT_RSSI1 0x66 +#define IAR_ATT_RSSI2 0x67 +#define IAR_RSSI_OFFSET 0x68 +#define IAR_RSSI_SLOPE 0x69 +#define IAR_RSSI_CAL1 0x6A +#define IAR_RSSI_CAL2 0x6B +/*------------------- 0x6C */ +/*------------------- 0x6D */ +#define IAR_XTAL_CTRL 0x6E +#define IAR_XTAL_COMP_MIN 0x6F +#define IAR_XTAL_COMP_MAX 0x70 +#define IAR_XTAL_GM 0x71 +/*------------------- 0x72 */ +/*------------------- 0x73 */ +#define IAR_LNA_TUNE 0x74 +#define IAR_LNA_AGCGAIN 0x75 +/*------------------- 0x76 */ +/*------------------- 0x77 */ +#define IAR_CHF_PMA_GAIN 0x78 +#define IAR_CHF_IBUF 0x79 +#define IAR_CHF_QBUF 0x7A +#define IAR_CHF_IRIN 0x7B +#define IAR_CHF_QRIN 0x7C +#define IAR_CHF_IL 0x7D +#define IAR_CHF_QL 0x7E +#define IAR_CHF_CC1 0x7F +#define IAR_CHF_CCL 0x80 +#define IAR_CHF_CC2 0x81 +#define IAR_CHF_IROUT 0x82 +#define IAR_CHF_QROUT 0x83 +/*------------------- 0x84 */ +/*------------------- 0x85 */ +#define IAR_RSSI_CTRL 0x86 +/*------------------- 0x87 */ +/*------------------- 0x88 */ +#define IAR_PA_BIAS 0x89 +#define IAR_PA_TUNING 0x8A +/*------------------- 0x8B */ +/*------------------- 0x8C */ +#define IAR_PMC_HP_TRIM 0x8D +#define IAR_VREGA_TRIM 0x8E +/*------------------- 0x8F */ +/*------------------- 0x90 */ +#define IAR_VCO_CTRL1 0x91 +#define IAR_VCO_CTRL2 0x92 +/*------------------- 0x93 */ +/*------------------- 0x94 */ +#define IAR_ANA_SPARE_OUT1 0x95 +#define IAR_ANA_SPARE_OUT2 0x96 +#define IAR_ANA_SPARE_IN 0x97 +#define IAR_MISCELLANEOUS 0x98 +/*------------------- 0x99 */ +#define IAR_SEQ_MGR_OVRD0 0x9A +#define IAR_SEQ_MGR_OVRD1 0x9B +#define IAR_SEQ_MGR_OVRD2 0x9C +#define IAR_SEQ_MGR_OVRD3 0x9D +#define IAR_SEQ_MGR_OVRD4 0x9E +#define IAR_SEQ_MGR_OVRD5 0x9F +#define IAR_SEQ_MGR_OVRD6 0xA0 +#define IAR_SEQ_MGR_OVRD7 0xA1 +/*------------------- 0xA2 */ +#define IAR_TESTMODE_CTRL 0xA3 +#define IAR_DTM_CTRL1 0xA4 +#define IAR_DTM_CTRL2 0xA5 +#define IAR_ATM_CTRL1 0xA6 +#define IAR_ATM_CTRL2 0xA7 +#define IAR_ATM_CTRL3 0xA8 +/*------------------- 0xA9 */ +#define IAR_LIM_FE_TEST_CTRL 0xAA +#define IAR_CHF_TEST_CTRL 0xAB +#define IAR_VCO_TEST_CTRL 0xAC +#define IAR_PLL_TEST_CTRL 0xAD +#define IAR_PA_TEST_CTRL 0xAE +#define IAR_PMC_TEST_CTRL 0xAF +#define IAR_SCAN_DTM_PROTECT_1 0xFE +#define IAR_SCAN_DTM_PROTECT_0 0xFF + +/* IRQSTS1 bits */ +#define DAR_IRQSTS1_RX_FRM_PEND BIT(7) +#define DAR_IRQSTS1_PLL_UNLOCK_IRQ BIT(6) +#define DAR_IRQSTS1_FILTERFAIL_IRQ BIT(5) +#define DAR_IRQSTS1_RXWTRMRKIRQ BIT(4) +#define DAR_IRQSTS1_CCAIRQ BIT(3) +#define DAR_IRQSTS1_RXIRQ BIT(2) +#define DAR_IRQSTS1_TXIRQ BIT(1) +#define DAR_IRQSTS1_SEQIRQ BIT(0) + +/* IRQSTS2 bits */ +#define DAR_IRQSTS2_CRCVALID BIT(7) +#define DAR_IRQSTS2_CCA BIT(6) +#define DAR_IRQSTS2_SRCADDR BIT(5) +#define DAR_IRQSTS2_PI BIT(4) +#define DAR_IRQSTS2_TMRSTATUS BIT(3) +#define DAR_IRQSTS2_ASM_IRQ BIT(2) +#define DAR_IRQSTS2_PB_ERR_IRQ BIT(1) +#define DAR_IRQSTS2_WAKE_IRQ BIT(0) + +/* IRQSTS3 bits */ +#define DAR_IRQSTS3_TMR4MSK BIT(7) +#define DAR_IRQSTS3_TMR3MSK BIT(6) +#define DAR_IRQSTS3_TMR2MSK BIT(5) +#define DAR_IRQSTS3_TMR1MSK BIT(4) +#define DAR_IRQSTS3_TMR4IRQ BIT(3) +#define DAR_IRQSTS3_TMR3IRQ BIT(2) +#define DAR_IRQSTS3_TMR2IRQ BIT(1) +#define DAR_IRQSTS3_TMR1IRQ BIT(0) + +/* PHY_CTRL1 bits */ +#define DAR_PHY_CTRL1_TMRTRIGEN BIT(7) +#define DAR_PHY_CTRL1_SLOTTED BIT(6) +#define DAR_PHY_CTRL1_CCABFRTX BIT(5) +#define DAR_PHY_CTRL1_CCABFRTX_SHIFT 5 +#define DAR_PHY_CTRL1_RXACKRQD BIT(4) +#define DAR_PHY_CTRL1_AUTOACK BIT(3) +#define DAR_PHY_CTRL1_XCVSEQ_MASK 0x07 + +/* PHY_CTRL2 bits */ +#define DAR_PHY_CTRL2_CRC_MSK BIT(7) +#define DAR_PHY_CTRL2_PLL_UNLOCK_MSK BIT(6) +#define DAR_PHY_CTRL2_FILTERFAIL_MSK BIT(5) +#define DAR_PHY_CTRL2_RX_WMRK_MSK BIT(4) +#define DAR_PHY_CTRL2_CCAMSK BIT(3) +#define DAR_PHY_CTRL2_RXMSK BIT(2) +#define DAR_PHY_CTRL2_TXMSK BIT(1) +#define DAR_PHY_CTRL2_SEQMSK BIT(0) + +/* PHY_CTRL3 bits */ +#define DAR_PHY_CTRL3_TMR4CMP_EN BIT(7) +#define DAR_PHY_CTRL3_TMR3CMP_EN BIT(6) +#define DAR_PHY_CTRL3_TMR2CMP_EN BIT(5) +#define DAR_PHY_CTRL3_TMR1CMP_EN BIT(4) +#define DAR_PHY_CTRL3_ASM_MSK BIT(2) +#define DAR_PHY_CTRL3_PB_ERR_MSK BIT(1) +#define DAR_PHY_CTRL3_WAKE_MSK BIT(0) + +/* RX_FRM_LEN bits */ +#define DAR_RX_FRAME_LENGTH_MASK (0x7F) + +/* PHY_CTRL4 bits */ +#define DAR_PHY_CTRL4_TRCV_MSK BIT(7) +#define DAR_PHY_CTRL4_TC3TMOUT BIT(6) +#define DAR_PHY_CTRL4_PANCORDNTR0 BIT(5) +#define DAR_PHY_CTRL4_CCATYPE (3) +#define DAR_PHY_CTRL4_CCATYPE_SHIFT (3) +#define DAR_PHY_CTRL4_CCATYPE_MASK (0x18) +#define DAR_PHY_CTRL4_TMRLOAD BIT(2) +#define DAR_PHY_CTRL4_PROMISCUOUS BIT(1) +#define DAR_PHY_CTRL4_TC2PRIME_EN BIT(0) + +/* SRC_CTRL bits */ +#define DAR_SRC_CTRL_INDEX (0x0F) +#define DAR_SRC_CTRL_INDEX_SHIFT (4) +#define DAR_SRC_CTRL_ACK_FRM_PND BIT(3) +#define DAR_SRC_CTRL_SRCADDR_EN BIT(2) +#define DAR_SRC_CTRL_INDEX_EN BIT(1) +#define DAR_SRC_CTRL_INDEX_DISABLE BIT(0) + +/* DAR_ASM_CTRL1 bits */ +#define DAR_ASM_CTRL1_CLEAR BIT(7) +#define DAR_ASM_CTRL1_START BIT(6) +#define DAR_ASM_CTRL1_SELFTST BIT(5) +#define DAR_ASM_CTRL1_CTR BIT(4) +#define DAR_ASM_CTRL1_CBC BIT(3) +#define DAR_ASM_CTRL1_AES BIT(2) +#define DAR_ASM_CTRL1_LOAD_MAC BIT(1) + +/* DAR_ASM_CTRL2 bits */ +#define DAR_ASM_CTRL2_DATA_REG_TYPE_SEL (7) +#define DAR_ASM_CTRL2_DATA_REG_TYPE_SEL_SHIFT (5) +#define DAR_ASM_CTRL2_TSTPAS BIT(1) + +/* DAR_CLK_OUT_CTRL bits */ +#define DAR_CLK_OUT_CTRL_EXTEND BIT(7) +#define DAR_CLK_OUT_CTRL_HIZ BIT(6) +#define DAR_CLK_OUT_CTRL_SR BIT(5) +#define DAR_CLK_OUT_CTRL_DS BIT(4) +#define DAR_CLK_OUT_CTRL_EN BIT(3) +#define DAR_CLK_OUT_CTRL_DIV (7) + +/* DAR_PWR_MODES bits */ +#define DAR_PWR_MODES_XTAL_READY BIT(5) +#define DAR_PWR_MODES_XTALEN BIT(4) +#define DAR_PWR_MODES_ASM_CLK_EN BIT(3) +#define DAR_PWR_MODES_AUTODOZE BIT(1) +#define DAR_PWR_MODES_PMC_MODE BIT(0) + +/* RX_FRAME_FILTER bits */ +#define IAR_RX_FRAME_FLT_FRM_VER (0xC0) +#define IAR_RX_FRAME_FLT_FRM_VER_SHIFT (6) +#define IAR_RX_FRAME_FLT_ACTIVE_PROMISCUOUS BIT(5) +#define IAR_RX_FRAME_FLT_NS_FT BIT(4) +#define IAR_RX_FRAME_FLT_CMD_FT BIT(3) +#define IAR_RX_FRAME_FLT_ACK_FT BIT(2) +#define IAR_RX_FRAME_FLT_DATA_FT BIT(1) +#define IAR_RX_FRAME_FLT_BEACON_FT BIT(0) + +/* DUAL_PAN_CTRL bits */ +#define IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_MSK (0xF0) +#define IAR_DUAL_PAN_CTRL_DUAL_PAN_SAM_LVL_SHIFT (4) +#define IAR_DUAL_PAN_CTRL_CURRENT_NETWORK BIT(3) +#define IAR_DUAL_PAN_CTRL_PANCORDNTR1 BIT(2) +#define IAR_DUAL_PAN_CTRL_DUAL_PAN_AUTO BIT(1) +#define IAR_DUAL_PAN_CTRL_ACTIVE_NETWORK BIT(0) + +/* DUAL_PAN_STS bits */ +#define IAR_DUAL_PAN_STS_RECD_ON_PAN1 BIT(7) +#define IAR_DUAL_PAN_STS_RECD_ON_PAN0 BIT(6) +#define IAR_DUAL_PAN_STS_DUAL_PAN_REMAIN (0x3F) + +/* CCA_CTRL bits */ +#define IAR_CCA_CTRL_AGC_FRZ_EN BIT(6) +#define IAR_CCA_CTRL_CONT_RSSI_EN BIT(5) +#define IAR_CCA_CTRL_LQI_RSSI_NOT_CORR BIT(4) +#define IAR_CCA_CTRL_CCA3_AND_NOT_OR BIT(3) +#define IAR_CCA_CTRL_POWER_COMP_EN_LQI BIT(2) +#define IAR_CCA_CTRL_POWER_COMP_EN_ED BIT(1) +#define IAR_CCA_CTRL_POWER_COMP_EN_CCA1 BIT(0) + +/* ANT_PAD_CTRL bits */ +#define IAR_ANT_PAD_CTRL_ANTX_POL (0x0F) +#define IAR_ANT_PAD_CTRL_ANTX_POL_SHIFT (4) +#define IAR_ANT_PAD_CTRL_ANTX_CTRLMODE BIT(3) +#define IAR_ANT_PAD_CTRL_ANTX_HZ BIT(2) +#define IAR_ANT_PAD_CTRL_ANTX_EN (3) + +/* MISC_PAD_CTRL bits */ +#define IAR_MISC_PAD_CTRL_MISO_HIZ_EN BIT(3) +#define IAR_MISC_PAD_CTRL_IRQ_B_OD BIT(2) +#define IAR_MISC_PAD_CTRL_NON_GPIO_DS BIT(1) +#define IAR_MISC_PAD_CTRL_ANTX_CURR (1) + +/* ANT_AGC_CTRL bits */ +#define IAR_ANT_AGC_CTRL_FAD_EN_SHIFT (0) +#define IAR_ANT_AGC_CTRL_FAD_EN_MASK (1) +#define IAR_ANT_AGC_CTRL_ANTX_SHIFT (1) +#define IAR_ANT_AGC_CTRL_ANTX_MASK BIT(AR_ANT_AGC_CTRL_ANTX_SHIFT) + +/* BSM_CTRL bits */ +#define BSM_CTRL_BSM_EN (1) + +/* SOFT_RESET bits */ +#define IAR_SOFT_RESET_SOG_RST BIT(7) +#define IAR_SOFT_RESET_REGS_RST BIT(4) +#define IAR_SOFT_RESET_PLL_RST BIT(3) +#define IAR_SOFT_RESET_TX_RST BIT(2) +#define IAR_SOFT_RESET_RX_RST BIT(1) +#define IAR_SOFT_RESET_SEQ_MGR_RST BIT(0) + +/* SEQ_MGR_CTRL bits */ +#define IAR_SEQ_MGR_CTRL_SEQ_STATE_CTRL (3) +#define IAR_SEQ_MGR_CTRL_SEQ_STATE_CTRL_SHIFT (6) +#define IAR_SEQ_MGR_CTRL_NO_RX_RECYCLE BIT(5) +#define IAR_SEQ_MGR_CTRL_LATCH_PREAMBLE BIT(4) +#define IAR_SEQ_MGR_CTRL_EVENT_TMR_DO_NOT_LATCH BIT(3) +#define IAR_SEQ_MGR_CTRL_CLR_NEW_SEQ_INHIBIT BIT(2) +#define IAR_SEQ_MGR_CTRL_PSM_LOCK_DIS BIT(1) +#define IAR_SEQ_MGR_CTRL_PLL_ABORT_OVRD BIT(0) + +/* SEQ_MGR_STS bits */ +#define IAR_SEQ_MGR_STS_TMR2_SEQ_TRIG_ARMED BIT(7) +#define IAR_SEQ_MGR_STS_RX_MODE BIT(6) +#define IAR_SEQ_MGR_STS_RX_TIMEOUT_PENDING BIT(5) +#define IAR_SEQ_MGR_STS_NEW_SEQ_INHIBIT BIT(4) +#define IAR_SEQ_MGR_STS_SEQ_IDLE BIT(3) +#define IAR_SEQ_MGR_STS_XCVSEQ_ACTUAL (7) + +/* ABORT_STS bits */ +#define IAR_ABORT_STS_PLL_ABORTED BIT(2) +#define IAR_ABORT_STS_TC3_ABORTED BIT(1) +#define IAR_ABORT_STS_SW_ABORTED BIT(0) + +/* IAR_FILTERFAIL_CODE2 bits */ +#define IAR_FILTERFAIL_CODE2_PAN_SEL BIT(7) +#define IAR_FILTERFAIL_CODE2_9_8 (3) + +/* PHY_STS bits */ +#define IAR_PHY_STS_PLL_UNLOCK BIT(7) +#define IAR_PHY_STS_PLL_LOCK_ERR BIT(6) +#define IAR_PHY_STS_PLL_LOCK BIT(5) +#define IAR_PHY_STS_CRCVALID BIT(3) +#define IAR_PHY_STS_FILTERFAIL_FLAG_SEL BIT(2) +#define IAR_PHY_STS_SFD_DET BIT(1) +#define IAR_PHY_STS_PREAMBLE_DET BIT(0) + +/* TESTMODE_CTRL bits */ +#define IAR_TEST_MODE_CTRL_HOT_ANT BIT(4) +#define IAR_TEST_MODE_CTRL_IDEAL_RSSI_EN BIT(3) +#define IAR_TEST_MODE_CTRL_IDEAL_PFC_EN BIT(2) +#define IAR_TEST_MODE_CTRL_CONTINUOUS_EN BIT(1) +#define IAR_TEST_MODE_CTRL_FPGA_EN BIT(0) + +/* DTM_CTRL1 bits */ +#define IAR_DTM_CTRL1_ATM_LOCKED BIT(7) +#define IAR_DTM_CTRL1_DTM_EN BIT(6) +#define IAR_DTM_CTRL1_PAGE5 BIT(5) +#define IAR_DTM_CTRL1_PAGE4 BIT(4) +#define IAR_DTM_CTRL1_PAGE3 BIT(3) +#define IAR_DTM_CTRL1_PAGE2 BIT(2) +#define IAR_DTM_CTRL1_PAGE1 BIT(1) +#define IAR_DTM_CTRL1_PAGE0 BIT(0) + +/* TX_MODE_CTRL */ +#define IAR_TX_MODE_CTRL_TX_INV BIT(4) +#define IAR_TX_MODE_CTRL_BT_EN BIT(3) +#define IAR_TX_MODE_CTRL_DTS2 BIT(2) +#define IAR_TX_MODE_CTRL_DTS1 BIT(1) +#define IAR_TX_MODE_CTRL_DTS0 BIT(0) + +#define TX_MODE_CTRL_DTS_MASK (7) + +#endif /* _MCR20A_H */ diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index 5166575a164d..a115f12bf130 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -74,6 +74,7 @@ struct ipvl_dev { DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE); netdev_features_t sfeatures; u32 msg_enable; + spinlock_t addrs_lock; }; struct ipvl_addr { diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 1b5dc200b573..17daebd19e65 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -109,25 +109,33 @@ void ipvlan_ht_addr_del(struct ipvl_addr *addr) struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, const void *iaddr, bool is_v6) { - struct ipvl_addr *addr; + struct ipvl_addr *addr, *ret = NULL; - list_for_each_entry(addr, &ipvlan->addrs, anode) - if (addr_equal(is_v6, addr, iaddr)) - return addr; - return NULL; + rcu_read_lock(); + list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) { + if (addr_equal(is_v6, addr, iaddr)) { + ret = addr; + break; + } + } + rcu_read_unlock(); + return ret; } bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6) { struct ipvl_dev *ipvlan; + bool ret = false; - ASSERT_RTNL(); - - list_for_each_entry(ipvlan, &port->ipvlans, pnode) { - if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) - return true; + rcu_read_lock(); + list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) { + if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) { + ret = true; + break; + } } - return false; + rcu_read_unlock(); + return ret; } static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type) @@ -498,8 +506,8 @@ static int ipvlan_process_outbound(struct sk_buff *skb) /* In this mode we dont care about multicast and broadcast traffic */ if (is_multicast_ether_addr(ethh->h_dest)) { - pr_warn_ratelimited("Dropped {multi|broad}cast of type= [%x]\n", - ntohs(skb->protocol)); + pr_debug_ratelimited("Dropped {multi|broad}cast of type=[%x]\n", + ntohs(skb->protocol)); kfree_skb(skb); goto out; } diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 67c91ceda979..3efc1c92c6a7 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -227,8 +227,10 @@ static int ipvlan_open(struct net_device *dev) else dev->flags &= ~IFF_NOARP; - list_for_each_entry(addr, &ipvlan->addrs, anode) + rcu_read_lock(); + list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) ipvlan_ht_addr_add(ipvlan, addr); + rcu_read_unlock(); return dev_uc_add(phy_dev, phy_dev->dev_addr); } @@ -244,8 +246,10 @@ static int ipvlan_stop(struct net_device *dev) dev_uc_del(phy_dev, phy_dev->dev_addr); - list_for_each_entry(addr, &ipvlan->addrs, anode) + rcu_read_lock(); + list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) ipvlan_ht_addr_del(addr); + rcu_read_unlock(); return 0; } @@ -588,6 +592,7 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev, ipvlan->sfeatures = IPVLAN_FEATURES; ipvlan_adjust_mtu(ipvlan, phy_dev); INIT_LIST_HEAD(&ipvlan->addrs); + spin_lock_init(&ipvlan->addrs_lock); /* TODO Probably put random address here to be presented to the * world but keep using the physical-dev address for the outgoing @@ -665,11 +670,13 @@ void ipvlan_link_delete(struct net_device *dev, struct list_head *head) struct ipvl_dev *ipvlan = netdev_priv(dev); struct ipvl_addr *addr, *next; + spin_lock_bh(&ipvlan->addrs_lock); list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) { ipvlan_ht_addr_del(addr); - list_del(&addr->anode); + list_del_rcu(&addr->anode); kfree_rcu(addr, rcu); } + spin_unlock_bh(&ipvlan->addrs_lock); ida_simple_remove(&ipvlan->port->ida, dev->dev_id); list_del_rcu(&ipvlan->pnode); @@ -760,8 +767,7 @@ static int ipvlan_device_event(struct notifier_block *unused, if (dev->reg_state != NETREG_UNREGISTERING) break; - list_for_each_entry_safe(ipvlan, next, &port->ipvlans, - pnode) + list_for_each_entry_safe(ipvlan, next, &port->ipvlans, pnode) ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev, &lst_kill); unregister_netdevice_many(&lst_kill); @@ -793,6 +799,7 @@ static int ipvlan_device_event(struct notifier_block *unused, return NOTIFY_DONE; } +/* the caller must held the addrs lock */ static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6) { struct ipvl_addr *addr; @@ -811,7 +818,8 @@ static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6) addr->atype = IPVL_IPV6; #endif } - list_add_tail(&addr->anode, &ipvlan->addrs); + + list_add_tail_rcu(&addr->anode, &ipvlan->addrs); /* If the interface is not up, the address will be added to the hash * list by ipvlan_open. @@ -826,15 +834,17 @@ static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6) { struct ipvl_addr *addr; + spin_lock_bh(&ipvlan->addrs_lock); addr = ipvlan_find_addr(ipvlan, iaddr, is_v6); - if (!addr) + if (!addr) { + spin_unlock_bh(&ipvlan->addrs_lock); return; + } ipvlan_ht_addr_del(addr); - list_del(&addr->anode); + list_del_rcu(&addr->anode); + spin_unlock_bh(&ipvlan->addrs_lock); kfree_rcu(addr, rcu); - - return; } static bool ipvlan_is_valid_dev(const struct net_device *dev) @@ -853,14 +863,17 @@ static bool ipvlan_is_valid_dev(const struct net_device *dev) #if IS_ENABLED(CONFIG_IPV6) static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) { - if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) { + int ret = -EINVAL; + + spin_lock_bh(&ipvlan->addrs_lock); + if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) netif_err(ipvlan, ifup, ipvlan->dev, "Failed to add IPv6=%pI6c addr for %s intf\n", ip6_addr, ipvlan->dev->name); - return -EINVAL; - } - - return ipvlan_add_addr(ipvlan, ip6_addr, true); + else + ret = ipvlan_add_addr(ipvlan, ip6_addr, true); + spin_unlock_bh(&ipvlan->addrs_lock); + return ret; } static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr) @@ -899,10 +912,6 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused, struct net_device *dev = (struct net_device *)i6vi->i6vi_dev->dev; struct ipvl_dev *ipvlan = netdev_priv(dev); - /* FIXME IPv6 autoconf calls us from bh without RTNL */ - if (in_softirq()) - return NOTIFY_DONE; - if (!ipvlan_is_valid_dev(dev)) return NOTIFY_DONE; @@ -922,14 +931,17 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused, static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) { - if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) { + int ret = -EINVAL; + + spin_lock_bh(&ipvlan->addrs_lock); + if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) netif_err(ipvlan, ifup, ipvlan->dev, "Failed to add IPv4=%pI4 on %s intf.\n", ip4_addr, ipvlan->dev->name); - return -EINVAL; - } - - return ipvlan_add_addr(ipvlan, ip4_addr, false); + else + ret = ipvlan_add_addr(ipvlan, ip4_addr, false); + spin_unlock_bh(&ipvlan->addrs_lock); + return ret; } static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr) @@ -1024,6 +1036,7 @@ static struct pernet_operations ipvlan_net_ops = { .id = &ipvlan_netid, .size = sizeof(struct ipvlan_netns), .exit = ipvlan_ns_exit, + .async = true, }; static int __init ipvlan_init_module(void) diff --git a/drivers/net/phy/aquantia.c b/drivers/net/phy/aquantia.c index e8ae50e1255e..319edc9c8ec7 100644 --- a/drivers/net/phy/aquantia.c +++ b/drivers/net/phy/aquantia.c @@ -38,14 +38,6 @@ static int aquantia_config_aneg(struct phy_device *phydev) return 0; } -static int aquantia_aneg_done(struct phy_device *phydev) -{ - int reg; - - reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1); - return (reg < 0) ? reg : (reg & BMSR_ANEGCOMPLETE); -} - static int aquantia_config_intr(struct phy_device *phydev) { int err; @@ -125,7 +117,7 @@ static struct phy_driver aquantia_driver[] = { .name = "Aquantia AQ1202", .features = PHY_AQUANTIA_FEATURES, .flags = PHY_HAS_INTERRUPT, - .aneg_done = aquantia_aneg_done, + .aneg_done = genphy_c45_aneg_done, .config_aneg = aquantia_config_aneg, .config_intr = aquantia_config_intr, .ack_interrupt = aquantia_ack_interrupt, @@ -137,7 +129,7 @@ static struct phy_driver aquantia_driver[] = { .name = "Aquantia AQ2104", .features = PHY_AQUANTIA_FEATURES, .flags = PHY_HAS_INTERRUPT, - .aneg_done = aquantia_aneg_done, + .aneg_done = genphy_c45_aneg_done, .config_aneg = aquantia_config_aneg, .config_intr = aquantia_config_intr, .ack_interrupt = aquantia_ack_interrupt, @@ -149,7 +141,7 @@ static struct phy_driver aquantia_driver[] = { .name = "Aquantia AQR105", .features = PHY_AQUANTIA_FEATURES, .flags = PHY_HAS_INTERRUPT, - .aneg_done = aquantia_aneg_done, + .aneg_done = genphy_c45_aneg_done, .config_aneg = aquantia_config_aneg, .config_intr = aquantia_config_intr, .ack_interrupt = aquantia_ack_interrupt, @@ -161,7 +153,7 @@ static struct phy_driver aquantia_driver[] = { .name = "Aquantia AQR106", .features = PHY_AQUANTIA_FEATURES, .flags = PHY_HAS_INTERRUPT, - .aneg_done = aquantia_aneg_done, + .aneg_done = genphy_c45_aneg_done, .config_aneg = aquantia_config_aneg, .config_intr = aquantia_config_intr, .ack_interrupt = aquantia_ack_interrupt, @@ -173,7 +165,7 @@ static struct phy_driver aquantia_driver[] = { .name = "Aquantia AQR107", .features = PHY_AQUANTIA_FEATURES, .flags = PHY_HAS_INTERRUPT, - .aneg_done = aquantia_aneg_done, + .aneg_done = genphy_c45_aneg_done, .config_aneg = aquantia_config_aneg, .config_intr = aquantia_config_intr, .ack_interrupt = aquantia_ack_interrupt, @@ -185,7 +177,7 @@ static struct phy_driver aquantia_driver[] = { .name = "Aquantia AQR405", .features = PHY_AQUANTIA_FEATURES, .flags = PHY_HAS_INTERRUPT, - .aneg_done = aquantia_aneg_done, + .aneg_done = genphy_c45_aneg_done, .config_aneg = aquantia_config_aneg, .config_intr = aquantia_config_intr, .ack_interrupt = aquantia_ack_interrupt, diff --git a/drivers/net/phy/cortina.c b/drivers/net/phy/cortina.c index 9442db221834..8022cd317f62 100644 --- a/drivers/net/phy/cortina.c +++ b/drivers/net/phy/cortina.c @@ -30,14 +30,6 @@ static int cortina_read_reg(struct phy_device *phydev, u16 regnum) MII_ADDR_C45 | regnum); } -static int cortina_config_aneg(struct phy_device *phydev) -{ - phydev->supported = SUPPORTED_10000baseT_Full; - phydev->advertising = SUPPORTED_10000baseT_Full; - - return 0; -} - static int cortina_read_status(struct phy_device *phydev) { int gpio_int_status, ret = 0; @@ -61,11 +53,6 @@ err: return ret; } -static int cortina_soft_reset(struct phy_device *phydev) -{ - return 0; -} - static int cortina_probe(struct phy_device *phydev) { u32 phy_id = 0; @@ -101,9 +88,10 @@ static struct phy_driver cortina_driver[] = { .phy_id = PHY_ID_CS4340, .phy_id_mask = 0xffffffff, .name = "Cortina CS4340", - .config_aneg = cortina_config_aneg, + .config_init = gen10g_config_init, + .config_aneg = gen10g_config_aneg, .read_status = cortina_read_status, - .soft_reset = cortina_soft_reset, + .soft_reset = gen10g_no_soft_reset, .probe = cortina_probe, }, }; diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 8a0bd98fdec7..4f1efa02a930 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -71,15 +71,6 @@ static int mv3310_probe(struct phy_device *phydev) return 0; } -/* - * Resetting the MV88X3310 causes it to become non-responsive. Avoid - * setting the reset bit(s). - */ -static int mv3310_soft_reset(struct phy_device *phydev) -{ - return 0; -} - static int mv3310_config_init(struct phy_device *phydev) { __ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, }; @@ -377,7 +368,7 @@ static struct phy_driver mv3310_drivers[] = { SUPPORTED_10000baseT_Full | SUPPORTED_Backplane, .probe = mv3310_probe, - .soft_reset = mv3310_soft_reset, + .soft_reset = gen10g_no_soft_reset, .config_init = mv3310_config_init, .config_aneg = mv3310_config_aneg, .aneg_done = mv3310_aneg_done, diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index a4576859afae..0017eddc24db 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -268,12 +268,13 @@ EXPORT_SYMBOL_GPL(genphy_c45_read_mdix); /* The gen10g_* functions are the old Clause 45 stub */ -static int gen10g_config_aneg(struct phy_device *phydev) +int gen10g_config_aneg(struct phy_device *phydev) { return 0; } +EXPORT_SYMBOL_GPL(gen10g_config_aneg); -static int gen10g_read_status(struct phy_device *phydev) +int gen10g_read_status(struct phy_device *phydev) { u32 mmd_mask = phydev->c45_ids.devices_in_package; int ret; @@ -291,14 +292,16 @@ static int gen10g_read_status(struct phy_device *phydev) return 0; } +EXPORT_SYMBOL_GPL(gen10g_read_status); -static int gen10g_soft_reset(struct phy_device *phydev) +int gen10g_no_soft_reset(struct phy_device *phydev) { /* Do nothing for now */ return 0; } +EXPORT_SYMBOL_GPL(gen10g_no_soft_reset); -static int gen10g_config_init(struct phy_device *phydev) +int gen10g_config_init(struct phy_device *phydev) { /* Temporarily just say we support everything */ phydev->supported = SUPPORTED_10000baseT_Full; @@ -306,22 +309,25 @@ static int gen10g_config_init(struct phy_device *phydev) return 0; } +EXPORT_SYMBOL_GPL(gen10g_config_init); -static int gen10g_suspend(struct phy_device *phydev) +int gen10g_suspend(struct phy_device *phydev) { return 0; } +EXPORT_SYMBOL_GPL(gen10g_suspend); -static int gen10g_resume(struct phy_device *phydev) +int gen10g_resume(struct phy_device *phydev) { return 0; } +EXPORT_SYMBOL_GPL(gen10g_resume); struct phy_driver genphy_10g_driver = { .phy_id = 0xffffffff, .phy_id_mask = 0xffffffff, .name = "Generic 10G PHY", - .soft_reset = gen10g_soft_reset, + .soft_reset = gen10g_no_soft_reset, .config_init = gen10g_config_init, .features = 0, .config_aneg = gen10g_config_aneg, diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 6ac8b29b2dc3..7d1724072325 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -679,7 +679,6 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy) mutex_lock(&phy->lock); mutex_lock(&pl->state_mutex); - pl->netdev->phydev = phy; pl->phydev = phy; linkmode_copy(pl->supported, supported); linkmode_copy(pl->link_config.advertising, config.advertising); @@ -817,7 +816,6 @@ void phylink_disconnect_phy(struct phylink *pl) if (phy) { mutex_lock(&phy->lock); mutex_lock(&pl->state_mutex); - pl->netdev->phydev = NULL; pl->phydev = NULL; mutex_unlock(&pl->state_mutex); mutex_unlock(&phy->lock); @@ -1584,25 +1582,14 @@ static int phylink_sfp_module_insert(void *upstream, bool changed; u8 port; - sfp_parse_support(pl->sfp_bus, id, support); - port = sfp_parse_port(pl->sfp_bus, id, support); - iface = sfp_parse_interface(pl->sfp_bus, id); - ASSERT_RTNL(); - switch (iface) { - case PHY_INTERFACE_MODE_SGMII: - case PHY_INTERFACE_MODE_1000BASEX: - case PHY_INTERFACE_MODE_2500BASEX: - case PHY_INTERFACE_MODE_10GKR: - break; - default: - return -EINVAL; - } + sfp_parse_support(pl->sfp_bus, id, support); + port = sfp_parse_port(pl->sfp_bus, id, support); memset(&config, 0, sizeof(config)); linkmode_copy(config.advertising, support); - config.interface = iface; + config.interface = PHY_INTERFACE_MODE_NA; config.speed = SPEED_UNKNOWN; config.duplex = DUPLEX_UNKNOWN; config.pause = MLO_PAUSE_AN; @@ -1611,6 +1598,22 @@ static int phylink_sfp_module_insert(void *upstream, /* Ignore errors if we're expecting a PHY to attach later */ ret = phylink_validate(pl, support, &config); if (ret) { + netdev_err(pl->netdev, "validation with support %*pb failed: %d\n", + __ETHTOOL_LINK_MODE_MASK_NBITS, support, ret); + return ret; + } + + iface = sfp_select_interface(pl->sfp_bus, id, config.advertising); + if (iface == PHY_INTERFACE_MODE_NA) { + netdev_err(pl->netdev, + "selection of interface failed, advertisment %*pb\n", + __ETHTOOL_LINK_MODE_MASK_NBITS, config.advertising); + return -EINVAL; + } + + config.interface = iface; + ret = phylink_validate(pl, support, &config); + if (ret) { netdev_err(pl->netdev, "validation of %s/%s with support %*pb failed: %d\n", phylink_an_mode_str(MLO_AN_INBAND), phy_modes(config.interface), diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index 8961209ee949..3d4ff5d0d2a6 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -106,68 +106,6 @@ int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id, EXPORT_SYMBOL_GPL(sfp_parse_port); /** - * sfp_parse_interface() - Parse the phy_interface_t - * @bus: a pointer to the &struct sfp_bus structure for the sfp module - * @id: a pointer to the module's &struct sfp_eeprom_id - * - * Derive the phy_interface_t mode for the information found in the - * module's identifying EEPROM. There is no standard or defined way - * to derive this information, so we use some heuristics. - * - * If the encoding is 64b66b, then the module must be >= 10G, so - * return %PHY_INTERFACE_MODE_10GKR. - * - * If it's 8b10b, then it's 1G or slower. If it's definitely a fibre - * module, return %PHY_INTERFACE_MODE_1000BASEX mode, otherwise return - * %PHY_INTERFACE_MODE_SGMII mode. - * - * If the encoding is not known, return %PHY_INTERFACE_MODE_NA. - */ -phy_interface_t sfp_parse_interface(struct sfp_bus *bus, - const struct sfp_eeprom_id *id) -{ - phy_interface_t iface; - - /* Setting the serdes link mode is guesswork: there's no field in - * the EEPROM which indicates what mode should be used. - * - * If the module wants 64b66b, then it must be >= 10G. - * - * If it's a gigabit-only fiber module, it probably does not have - * a PHY, so switch to 802.3z negotiation mode. Otherwise, switch - * to SGMII mode (which is required to support non-gigabit speeds). - */ - switch (id->base.encoding) { - case SFP_ENCODING_8472_64B66B: - iface = PHY_INTERFACE_MODE_10GKR; - break; - - case SFP_ENCODING_8B10B: - if (!id->base.e1000_base_t && - !id->base.e100_base_lx && - !id->base.e100_base_fx) - iface = PHY_INTERFACE_MODE_1000BASEX; - else - iface = PHY_INTERFACE_MODE_SGMII; - break; - - default: - if (id->base.e1000_base_cx) { - iface = PHY_INTERFACE_MODE_1000BASEX; - break; - } - - iface = PHY_INTERFACE_MODE_NA; - dev_err(bus->sfp_dev, - "SFP module encoding does not support 8b10b nor 64b66b\n"); - break; - } - - return iface; -} -EXPORT_SYMBOL_GPL(sfp_parse_interface); - -/** * sfp_parse_support() - Parse the eeprom id for supported link modes * @bus: a pointer to the &struct sfp_bus structure for the sfp module * @id: a pointer to the module's &struct sfp_eeprom_id @@ -180,10 +118,7 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned long *support) { unsigned int br_min, br_nom, br_max; - - phylink_set(support, Autoneg); - phylink_set(support, Pause); - phylink_set(support, Asym_Pause); + __ETHTOOL_DECLARE_LINK_MODE_MASK(modes) = { 0, }; /* Decode the bitrate information to MBd */ br_min = br_nom = br_max = 0; @@ -201,20 +136,20 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, /* Set ethtool support from the compliance fields. */ if (id->base.e10g_base_sr) - phylink_set(support, 10000baseSR_Full); + phylink_set(modes, 10000baseSR_Full); if (id->base.e10g_base_lr) - phylink_set(support, 10000baseLR_Full); + phylink_set(modes, 10000baseLR_Full); if (id->base.e10g_base_lrm) - phylink_set(support, 10000baseLRM_Full); + phylink_set(modes, 10000baseLRM_Full); if (id->base.e10g_base_er) - phylink_set(support, 10000baseER_Full); + phylink_set(modes, 10000baseER_Full); if (id->base.e1000_base_sx || id->base.e1000_base_lx || id->base.e1000_base_cx) - phylink_set(support, 1000baseX_Full); + phylink_set(modes, 1000baseX_Full); if (id->base.e1000_base_t) { - phylink_set(support, 1000baseT_Half); - phylink_set(support, 1000baseT_Full); + phylink_set(modes, 1000baseT_Half); + phylink_set(modes, 1000baseT_Full); } /* 1000Base-PX or 1000Base-BX10 */ @@ -228,20 +163,20 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, if ((id->base.sfp_ct_passive || id->base.sfp_ct_active) && br_nom) { /* This may look odd, but some manufacturers use 12000MBd */ if (br_min <= 12000 && br_max >= 10300) - phylink_set(support, 10000baseCR_Full); + phylink_set(modes, 10000baseCR_Full); if (br_min <= 3200 && br_max >= 3100) - phylink_set(support, 2500baseX_Full); + phylink_set(modes, 2500baseX_Full); if (br_min <= 1300 && br_max >= 1200) - phylink_set(support, 1000baseX_Full); + phylink_set(modes, 1000baseX_Full); } if (id->base.sfp_ct_passive) { if (id->base.passive.sff8431_app_e) - phylink_set(support, 10000baseCR_Full); + phylink_set(modes, 10000baseCR_Full); } if (id->base.sfp_ct_active) { if (id->base.active.sff8431_app_e || id->base.active.sff8431_lim) { - phylink_set(support, 10000baseCR_Full); + phylink_set(modes, 10000baseCR_Full); } } @@ -249,18 +184,18 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, case 0x00: /* Unspecified */ break; case 0x02: /* 100Gbase-SR4 or 25Gbase-SR */ - phylink_set(support, 100000baseSR4_Full); - phylink_set(support, 25000baseSR_Full); + phylink_set(modes, 100000baseSR4_Full); + phylink_set(modes, 25000baseSR_Full); break; case 0x03: /* 100Gbase-LR4 or 25Gbase-LR */ case 0x04: /* 100Gbase-ER4 or 25Gbase-ER */ - phylink_set(support, 100000baseLR4_ER4_Full); + phylink_set(modes, 100000baseLR4_ER4_Full); break; case 0x0b: /* 100Gbase-CR4 or 25Gbase-CR CA-L */ case 0x0c: /* 25Gbase-CR CA-S */ case 0x0d: /* 25Gbase-CR CA-N */ - phylink_set(support, 100000baseCR4_Full); - phylink_set(support, 25000baseCR_Full); + phylink_set(modes, 100000baseCR4_Full); + phylink_set(modes, 25000baseCR_Full); break; default: dev_warn(bus->sfp_dev, @@ -274,13 +209,70 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, id->base.fc_speed_200 || id->base.fc_speed_400) { if (id->base.br_nominal >= 31) - phylink_set(support, 2500baseX_Full); + phylink_set(modes, 2500baseX_Full); if (id->base.br_nominal >= 12) - phylink_set(support, 1000baseX_Full); + phylink_set(modes, 1000baseX_Full); } + + /* If we haven't discovered any modes that this module supports, try + * the encoding and bitrate to determine supported modes. Some BiDi + * modules (eg, 1310nm/1550nm) are not 1000BASE-BX compliant due to + * the differing wavelengths, so do not set any transceiver bits. + */ + if (bitmap_empty(modes, __ETHTOOL_LINK_MODE_MASK_NBITS)) { + /* If the encoding and bit rate allows 1000baseX */ + if (id->base.encoding == SFP_ENCODING_8B10B && br_nom && + br_min <= 1300 && br_max >= 1200) + phylink_set(modes, 1000baseX_Full); + } + + bitmap_or(support, support, modes, __ETHTOOL_LINK_MODE_MASK_NBITS); + + phylink_set(support, Autoneg); + phylink_set(support, Pause); + phylink_set(support, Asym_Pause); } EXPORT_SYMBOL_GPL(sfp_parse_support); +/** + * sfp_select_interface() - Select appropriate phy_interface_t mode + * @bus: a pointer to the &struct sfp_bus structure for the sfp module + * @id: a pointer to the module's &struct sfp_eeprom_id + * @link_modes: ethtool link modes mask + * + * Derive the phy_interface_t mode for the information found in the + * module's identifying EEPROM and the link modes mask. There is no + * standard or defined way to derive this information, so we decide + * based upon the link mode mask. + */ +phy_interface_t sfp_select_interface(struct sfp_bus *bus, + const struct sfp_eeprom_id *id, + unsigned long *link_modes) +{ + if (phylink_test(link_modes, 10000baseCR_Full) || + phylink_test(link_modes, 10000baseSR_Full) || + phylink_test(link_modes, 10000baseLR_Full) || + phylink_test(link_modes, 10000baseLRM_Full) || + phylink_test(link_modes, 10000baseER_Full)) + return PHY_INTERFACE_MODE_10GKR; + + if (phylink_test(link_modes, 2500baseX_Full)) + return PHY_INTERFACE_MODE_2500BASEX; + + if (id->base.e1000_base_t || + id->base.e100_base_lx || + id->base.e100_base_fx) + return PHY_INTERFACE_MODE_SGMII; + + if (phylink_test(link_modes, 1000baseX_Full)) + return PHY_INTERFACE_MODE_1000BASEX; + + dev_warn(bus->sfp_dev, "Unable to ascertain link mode\n"); + + return PHY_INTERFACE_MODE_NA; +} +EXPORT_SYMBOL_GPL(sfp_select_interface); + static LIST_HEAD(sfp_buses); static DEFINE_MUTEX(sfp_mutex); diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 6c7d9289078d..83bf4959b043 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -42,6 +42,7 @@ enum { SFP_MOD_EMPTY = 0, SFP_MOD_PROBE, + SFP_MOD_HPOWER, SFP_MOD_PRESENT, SFP_MOD_ERROR, @@ -86,6 +87,7 @@ static const enum gpiod_flags gpio_flags[] = { * access the I2C EEPROM. However, Avago modules require 300ms. */ #define T_PROBE_INIT msecs_to_jiffies(300) +#define T_HPOWER_LEVEL msecs_to_jiffies(300) #define T_PROBE_RETRY msecs_to_jiffies(100) /* SFP modules appear to always have their PHY configured for bus address @@ -110,10 +112,12 @@ struct sfp { struct sfp_bus *sfp_bus; struct phy_device *mod_phy; const struct sff_data *type; + u32 max_power_mW; unsigned int (*get_state)(struct sfp *); void (*set_state)(struct sfp *, unsigned int); int (*read)(struct sfp *, bool, u8, void *, size_t); + int (*write)(struct sfp *, bool, u8, void *, size_t); struct gpio_desc *gpio[GPIO_MAX]; @@ -201,10 +205,11 @@ static void sfp_gpio_set_state(struct sfp *sfp, unsigned int state) } } -static int sfp__i2c_read(struct i2c_adapter *i2c, u8 bus_addr, u8 dev_addr, - void *buf, size_t len) +static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 dev_addr, void *buf, + size_t len) { struct i2c_msg msgs[2]; + u8 bus_addr = a2 ? 0x51 : 0x50; int ret; msgs[0].addr = bus_addr; @@ -216,17 +221,38 @@ static int sfp__i2c_read(struct i2c_adapter *i2c, u8 bus_addr, u8 dev_addr, msgs[1].len = len; msgs[1].buf = buf; - ret = i2c_transfer(i2c, msgs, ARRAY_SIZE(msgs)); + ret = i2c_transfer(sfp->i2c, msgs, ARRAY_SIZE(msgs)); if (ret < 0) return ret; return ret == ARRAY_SIZE(msgs) ? len : 0; } -static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 addr, void *buf, - size_t len) +static int sfp_i2c_write(struct sfp *sfp, bool a2, u8 dev_addr, void *buf, + size_t len) { - return sfp__i2c_read(sfp->i2c, a2 ? 0x51 : 0x50, addr, buf, len); + struct i2c_msg msgs[1]; + u8 bus_addr = a2 ? 0x51 : 0x50; + int ret; + + msgs[0].addr = bus_addr; + msgs[0].flags = 0; + msgs[0].len = 1 + len; + msgs[0].buf = kmalloc(1 + len, GFP_KERNEL); + if (!msgs[0].buf) + return -ENOMEM; + + msgs[0].buf[0] = dev_addr; + memcpy(&msgs[0].buf[1], buf, len); + + ret = i2c_transfer(sfp->i2c, msgs, ARRAY_SIZE(msgs)); + + kfree(msgs[0].buf); + + if (ret < 0) + return ret; + + return ret == ARRAY_SIZE(msgs) ? len : 0; } static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c) @@ -239,6 +265,7 @@ static int sfp_i2c_configure(struct sfp *sfp, struct i2c_adapter *i2c) sfp->i2c = i2c; sfp->read = sfp_i2c_read; + sfp->write = sfp_i2c_write; i2c_mii = mdio_i2c_alloc(sfp->dev, i2c); if (IS_ERR(i2c_mii)) @@ -274,6 +301,11 @@ static int sfp_read(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len) return sfp->read(sfp, a2, addr, buf, len); } +static int sfp_write(struct sfp *sfp, bool a2, u8 addr, void *buf, size_t len) +{ + return sfp->write(sfp, a2, addr, buf, len); +} + static unsigned int sfp_check(void *buf, size_t len) { u8 *p, check; @@ -462,21 +494,83 @@ static void sfp_sm_mod_init(struct sfp *sfp) sfp_sm_probe_phy(sfp); } +static int sfp_sm_mod_hpower(struct sfp *sfp) +{ + u32 power; + u8 val; + int err; + + power = 1000; + if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_POWER_DECL)) + power = 1500; + if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_HIGH_POWER_LEVEL)) + power = 2000; + + if (sfp->id.ext.sff8472_compliance == SFP_SFF8472_COMPLIANCE_NONE && + (sfp->id.ext.diagmon & (SFP_DIAGMON_DDM | SFP_DIAGMON_ADDRMODE)) != + SFP_DIAGMON_DDM) { + /* The module appears not to implement bus address 0xa2, + * or requires an address change sequence, so assume that + * the module powers up in the indicated power mode. + */ + if (power > sfp->max_power_mW) { + dev_err(sfp->dev, + "Host does not support %u.%uW modules\n", + power / 1000, (power / 100) % 10); + return -EINVAL; + } + return 0; + } + + if (power > sfp->max_power_mW) { + dev_warn(sfp->dev, + "Host does not support %u.%uW modules, module left in power mode 1\n", + power / 1000, (power / 100) % 10); + return 0; + } + + if (power <= 1000) + return 0; + + err = sfp_read(sfp, true, SFP_EXT_STATUS, &val, sizeof(val)); + if (err != sizeof(val)) { + dev_err(sfp->dev, "Failed to read EEPROM: %d\n", err); + err = -EAGAIN; + goto err; + } + + val |= BIT(0); + + err = sfp_write(sfp, true, SFP_EXT_STATUS, &val, sizeof(val)); + if (err != sizeof(val)) { + dev_err(sfp->dev, "Failed to write EEPROM: %d\n", err); + err = -EAGAIN; + goto err; + } + + dev_info(sfp->dev, "Module switched to %u.%uW power level\n", + power / 1000, (power / 100) % 10); + return T_HPOWER_LEVEL; + +err: + return err; +} + static int sfp_sm_mod_probe(struct sfp *sfp) { /* SFP module inserted - read I2C data */ struct sfp_eeprom_id id; u8 check; - int err; + int ret; - err = sfp_read(sfp, false, 0, &id, sizeof(id)); - if (err < 0) { - dev_err(sfp->dev, "failed to read EEPROM: %d\n", err); + ret = sfp_read(sfp, false, 0, &id, sizeof(id)); + if (ret < 0) { + dev_err(sfp->dev, "failed to read EEPROM: %d\n", ret); return -EAGAIN; } - if (err != sizeof(id)) { - dev_err(sfp->dev, "EEPROM short read: %d\n", err); + if (ret != sizeof(id)) { + dev_err(sfp->dev, "EEPROM short read: %d\n", ret); return -EAGAIN; } @@ -521,7 +615,11 @@ static int sfp_sm_mod_probe(struct sfp *sfp) dev_warn(sfp->dev, "module address swap to access page 0xA2 is not supported.\n"); - return sfp_module_insert(sfp->sfp_bus, &sfp->id); + ret = sfp_module_insert(sfp->sfp_bus, &sfp->id); + if (ret < 0) + return ret; + + return sfp_sm_mod_hpower(sfp); } static void sfp_sm_mod_remove(struct sfp *sfp) @@ -560,17 +658,25 @@ static void sfp_sm_event(struct sfp *sfp, unsigned int event) if (event == SFP_E_REMOVE) { sfp_sm_ins_next(sfp, SFP_MOD_EMPTY, 0); } else if (event == SFP_E_TIMEOUT) { - int err = sfp_sm_mod_probe(sfp); + int val = sfp_sm_mod_probe(sfp); - if (err == 0) + if (val == 0) sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0); - else if (err == -EAGAIN) - sfp_sm_set_timer(sfp, T_PROBE_RETRY); - else + else if (val > 0) + sfp_sm_ins_next(sfp, SFP_MOD_HPOWER, val); + else if (val != -EAGAIN) sfp_sm_ins_next(sfp, SFP_MOD_ERROR, 0); + else + sfp_sm_set_timer(sfp, T_PROBE_RETRY); } break; + case SFP_MOD_HPOWER: + if (event == SFP_E_TIMEOUT) { + sfp_sm_ins_next(sfp, SFP_MOD_PRESENT, 0); + break; + } + /* fallthrough */ case SFP_MOD_PRESENT: case SFP_MOD_ERROR: if (event == SFP_E_REMOVE) { @@ -889,6 +995,14 @@ static int sfp_probe(struct platform_device *pdev) if (!(sfp->gpio[GPIO_MODDEF0])) sfp->get_state = sff_gpio_get_state; + device_property_read_u32(&pdev->dev, "maximum-power-milliwatt", + &sfp->max_power_mW); + if (!sfp->max_power_mW) + sfp->max_power_mW = 1000; + + dev_info(sfp->dev, "Host maximum power %u.%uW\n", + sfp->max_power_mW / 1000, (sfp->max_power_mW / 100) % 10); + sfp->sfp_bus = sfp_register_socket(sfp->dev, sfp, &sfp_module_ops); if (!sfp->sfp_bus) return -ENOMEM; diff --git a/drivers/net/phy/teranetics.c b/drivers/net/phy/teranetics.c index fb2cef764e9a..22f3bdd8206c 100644 --- a/drivers/net/phy/teranetics.c +++ b/drivers/net/phy/teranetics.c @@ -34,39 +34,17 @@ MODULE_LICENSE("GPL v2"); MDIO_PHYXS_LNSTAT_SYNC3 | \ MDIO_PHYXS_LNSTAT_ALIGN) -static int teranetics_config_init(struct phy_device *phydev) -{ - phydev->supported = SUPPORTED_10000baseT_Full; - phydev->advertising = SUPPORTED_10000baseT_Full; - - return 0; -} - -static int teranetics_soft_reset(struct phy_device *phydev) -{ - return 0; -} - static int teranetics_aneg_done(struct phy_device *phydev) { - int reg; - /* auto negotiation state can only be checked when using copper * port, if using fiber port, just lie it's done. */ - if (!phy_read_mmd(phydev, MDIO_MMD_VEND1, 93)) { - reg = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1); - return (reg < 0) ? reg : (reg & BMSR_ANEGCOMPLETE); - } + if (!phy_read_mmd(phydev, MDIO_MMD_VEND1, 93)) + return genphy_c45_aneg_done(phydev); return 1; } -static int teranetics_config_aneg(struct phy_device *phydev) -{ - return 0; -} - static int teranetics_read_status(struct phy_device *phydev) { int reg; @@ -102,10 +80,10 @@ static struct phy_driver teranetics_driver[] = { .phy_id = PHY_ID_TN2020, .phy_id_mask = 0xffffffff, .name = "Teranetics TN2020", - .soft_reset = teranetics_soft_reset, + .soft_reset = gen10g_no_soft_reset, .aneg_done = teranetics_aneg_done, - .config_init = teranetics_config_init, - .config_aneg = teranetics_config_aneg, + .config_init = gen10g_config_init, + .config_aneg = gen10g_config_aneg, .read_status = teranetics_read_status, .match_phy_device = teranetics_match_phy_device, }, diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 255a5def56e9..a393c1dff7dc 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -971,6 +971,7 @@ static struct pernet_operations ppp_net_ops = { .exit = ppp_exit_net, .id = &ppp_net_id, .size = sizeof(struct ppp_net), + .async = true, }; static int ppp_unit_register(struct ppp *ppp, int unit, bool ifname_is_set) diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index bd89d1c559ce..c10e6181a2f0 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -1161,6 +1161,7 @@ static struct pernet_operations pppoe_net_ops = { .exit = pppoe_exit_net, .id = &pppoe_net_id, .size = sizeof(struct pppoe_net), + .async = true, }; static int __init pppoe_init(void) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index a468439969df..5dd781e65958 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1105,14 +1105,15 @@ static void team_port_disable_netpoll(struct team_port *port) } #endif -static int team_upper_dev_link(struct team *team, struct team_port *port) +static int team_upper_dev_link(struct team *team, struct team_port *port, + struct netlink_ext_ack *extack) { struct netdev_lag_upper_info lag_upper_info; int err; lag_upper_info.tx_type = team->mode->lag_tx_type; err = netdev_master_upper_dev_link(port->dev, team->dev, NULL, - &lag_upper_info, NULL); + &lag_upper_info, extack); if (err) return err; port->dev->priv_flags |= IFF_TEAM_PORT; @@ -1129,7 +1130,8 @@ static void __team_port_change_port_added(struct team_port *port, bool linkup); static int team_dev_type_check_change(struct net_device *dev, struct net_device *port_dev); -static int team_port_add(struct team *team, struct net_device *port_dev) +static int team_port_add(struct team *team, struct net_device *port_dev, + struct netlink_ext_ack *extack) { struct net_device *dev = team->dev; struct team_port *port; @@ -1137,12 +1139,14 @@ static int team_port_add(struct team *team, struct net_device *port_dev) int err; if (port_dev->flags & IFF_LOOPBACK) { + NL_SET_ERR_MSG(extack, "Loopback device can't be added as a team port"); netdev_err(dev, "Device %s is loopback device. Loopback devices can't be added as a team port\n", portname); return -EINVAL; } if (team_port_exists(port_dev)) { + NL_SET_ERR_MSG(extack, "Device is already a port of a team device"); netdev_err(dev, "Device %s is already a port " "of a team device\n", portname); return -EBUSY; @@ -1150,6 +1154,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev) if (port_dev->features & NETIF_F_VLAN_CHALLENGED && vlan_uses_dev(dev)) { + NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up"); netdev_err(dev, "Device %s is VLAN challenged and team device has VLAN set up\n", portname); return -EPERM; @@ -1160,6 +1165,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev) return err; if (port_dev->flags & IFF_UP) { + NL_SET_ERR_MSG(extack, "Device is up. Set it down before adding it as a team port"); netdev_err(dev, "Device %s is up. Set it down before adding it as a team port\n", portname); return -EBUSY; @@ -1227,7 +1233,7 @@ static int team_port_add(struct team *team, struct net_device *port_dev) goto err_handler_register; } - err = team_upper_dev_link(team, port); + err = team_upper_dev_link(team, port, extack); if (err) { netdev_err(dev, "Device %s failed to set upper link\n", portname); @@ -1921,7 +1927,7 @@ static int team_add_slave(struct net_device *dev, struct net_device *port_dev, int err; mutex_lock(&team->lock); - err = team_port_add(team, port_dev); + err = team_port_add(team, port_dev, extack); mutex_unlock(&team->lock); if (!err) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 9ce0182223a0..e459e601c57f 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1434,6 +1434,7 @@ static struct pernet_operations vrf_net_ops __net_initdata = { .init = vrf_netns_init, .id = &vrf_net_id, .size = sizeof(bool), + .async = true, }; static int __init vrf_init_module(void) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index fab7a4db249e..aa5f034d6ad1 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -3752,6 +3752,7 @@ static struct pernet_operations vxlan_net_ops = { .exit_batch = vxlan_exit_batch_net, .id = &vxlan_net_id, .size = sizeof(struct vxlan_net), + .async = true, }; static int __init vxlan_init_module(void) diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index b1cf7c6f407a..ef5887037b22 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -419,7 +419,7 @@ static void xenvif_rx_extra_slot(struct xenvif_queue *queue, BUG(); } -void xenvif_rx_skb(struct xenvif_queue *queue) +static void xenvif_rx_skb(struct xenvif_queue *queue) { struct xenvif_pkt_state pkt; diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 9c36d614bf89..2dee4e03ff1c 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -709,6 +709,7 @@ static struct pernet_operations lockd_net_ops = { .exit = lockd_exit_net, .id = &lockd_net_id, .size = sizeof(struct lockd_net), + .async = true, }; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 7d893543cf3b..6c3083c992e5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2122,6 +2122,7 @@ static struct pernet_operations nfs_net_ops = { .exit = nfs_net_exit, .id = &nfs_net_id, .size = sizeof(struct nfs_net), + .async = true, }; /* diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c index 5be08f02a76b..8c743a405df6 100644 --- a/fs/nfs_common/grace.c +++ b/fs/nfs_common/grace.c @@ -118,6 +118,7 @@ static struct pernet_operations grace_net_ops = { .exit = grace_exit_net, .id = &grace_net_id, .size = sizeof(struct list_head), + .async = true, }; static int __init diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index a7f16e0f8d68..8a4566691c8f 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -96,7 +96,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \ ({ \ int __ret = 0; \ - if (cgroup_bpf_enabled && sk) { \ + if (cgroup_bpf_enabled) { \ __ret = __cgroup_bpf_run_filter_sk(sk, \ BPF_CGROUP_INET_SOCK_CREATE); \ } \ diff --git a/include/linux/filter.h b/include/linux/filter.h index 276932d75975..fdb691b520c0 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -20,7 +20,6 @@ #include <linux/set_memory.h> #include <linux/kallsyms.h> -#include <net/xdp.h> #include <net/sch_generic.h> #include <uapi/linux/filter.h> @@ -30,6 +29,7 @@ struct sk_buff; struct sock; struct seccomp_data; struct bpf_prog_aux; +struct xdp_rxq_info; /* ArgX, context and stack frame pointer register positions. Note, * Arg1, Arg2, Arg3, etc are used as argument mappings of function diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index bfea26af6de5..4814cad7456e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1224,6 +1224,12 @@ static inline int mlx5_core_is_pf(struct mlx5_core_dev *dev) return !(dev->priv.pci_dev_data & MLX5_PCI_DEV_IS_VF); } +#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs((mdev)->pdev)) +#define MLX5_VPORT_MANAGER(mdev) \ + (MLX5_CAP_GEN(mdev, vport_group_manager) && \ + (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \ + mlx5_core_is_pf(mdev)) + static inline int mlx5_get_gid_table_len(u16 param) { if (param > 4) { diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h new file mode 100644 index 000000000000..d3c9db492b30 --- /dev/null +++ b/include/linux/mlx5/eswitch.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* + * Copyright (c) 2018 Mellanox Technologies. All rights reserved. + */ + +#ifndef _MLX5_ESWITCH_ +#define _MLX5_ESWITCH_ + +#include <linux/mlx5/driver.h> + +enum { + SRIOV_NONE, + SRIOV_LEGACY, + SRIOV_OFFLOADS +}; + +enum { + REP_ETH, + REP_IB, + NUM_REP_TYPES, +}; + +struct mlx5_eswitch_rep; +struct mlx5_eswitch_rep_if { + int (*load)(struct mlx5_core_dev *dev, + struct mlx5_eswitch_rep *rep); + void (*unload)(struct mlx5_eswitch_rep *rep); + void *(*get_proto_dev)(struct mlx5_eswitch_rep *rep); + void *priv; + bool valid; +}; + +struct mlx5_eswitch_rep { + struct mlx5_eswitch_rep_if rep_if[NUM_REP_TYPES]; + u16 vport; + u8 hw_id[ETH_ALEN]; + u16 vlan; + u32 vlan_refcount; +}; + +void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, + int vport_index, + struct mlx5_eswitch_rep_if *rep_if, + u8 rep_type); +void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, + int vport_index, + u8 rep_type); +void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, + int vport, + u8 rep_type); +struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw, + int vport); +void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type); +u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw); +struct mlx5_flow_handle * +mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, + int vport, u32 sqn); +#endif diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 5396521a776a..7ed82e4f11b3 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -4,11 +4,10 @@ #include <linux/in.h> #include <linux/pim.h> -#include <linux/rhashtable.h> -#include <net/sock.h> #include <net/fib_rules.h> #include <net/fib_notifier.h> #include <uapi/linux/mroute.h> +#include <linux/mroute_base.h> #ifdef CONFIG_IP_MROUTE static inline int ip_mroute_opt(int opt) @@ -56,18 +55,6 @@ static inline bool ipmr_rule_default(const struct fib_rule *rule) } #endif -struct vif_device { - struct net_device *dev; /* Device we are using */ - struct netdev_phys_item_id dev_parent_id; /* Device parent ID */ - unsigned long bytes_in,bytes_out; - unsigned long pkt_in,pkt_out; /* Statistics */ - unsigned long rate_limit; /* Traffic shaping (NI) */ - unsigned char threshold; /* TTL threshold */ - unsigned short flags; /* Control flags */ - __be32 local,remote; /* Addresses(remote for tunnels)*/ - int link; /* Physical interface index */ -}; - struct vif_entry_notifier_info { struct fib_notifier_info info; struct net_device *dev; @@ -78,34 +65,6 @@ struct vif_entry_notifier_info { #define VIFF_STATIC 0x8000 -#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL) - -struct mr_table { - struct list_head list; - possible_net_t net; - u32 id; - struct sock __rcu *mroute_sk; - struct timer_list ipmr_expire_timer; - struct list_head mfc_unres_queue; - struct vif_device vif_table[MAXVIFS]; - struct rhltable mfc_hash; - struct list_head mfc_cache_list; - int maxvif; - atomic_t cache_resolve_queue_len; - bool mroute_do_assert; - bool mroute_do_pim; - int mroute_reg_vif_num; -}; - -/* mfc_flags: - * MFC_STATIC - the entry was added statically (not by a routing daemon) - * MFC_OFFLOAD - the entry was offloaded to the hardware - */ -enum { - MFC_STATIC = BIT(0), - MFC_OFFLOAD = BIT(1), -}; - struct mfc_cache_cmp_arg { __be32 mfc_mcastgrp; __be32 mfc_origin; @@ -113,28 +72,13 @@ struct mfc_cache_cmp_arg { /** * struct mfc_cache - multicast routing entries - * @mnode: rhashtable list + * @_c: Common multicast routing information; has to be first [for casting] * @mfc_mcastgrp: destination multicast group address * @mfc_origin: source address * @cmparg: used for rhashtable comparisons - * @mfc_parent: source interface (iif) - * @mfc_flags: entry flags - * @expires: unresolved entry expire time - * @unresolved: unresolved cached skbs - * @last_assert: time of last assert - * @minvif: minimum VIF id - * @maxvif: maximum VIF id - * @bytes: bytes that have passed for this entry - * @pkt: packets that have passed for this entry - * @wrong_if: number of wrong source interface hits - * @lastuse: time of last use of the group (traffic or update) - * @ttls: OIF TTL threshold array - * @refcount: reference count for this entry - * @list: global entry list - * @rcu: used for entry destruction */ struct mfc_cache { - struct rhlist_head mnode; + struct mr_mfc _c; union { struct { __be32 mfc_mcastgrp; @@ -142,28 +86,6 @@ struct mfc_cache { }; struct mfc_cache_cmp_arg cmparg; }; - vifi_t mfc_parent; - int mfc_flags; - - union { - struct { - unsigned long expires; - struct sk_buff_head unresolved; - } unres; - struct { - unsigned long last_assert; - int minvif; - int maxvif; - unsigned long bytes; - unsigned long pkt; - unsigned long wrong_if; - unsigned long lastuse; - unsigned char ttls[MAXVIFS]; - refcount_t refcount; - } res; - } mfc_un; - struct list_head list; - struct rcu_head rcu; }; struct mfc_entry_notifier_info { @@ -187,12 +109,12 @@ static inline void ipmr_cache_free(struct mfc_cache *mfc_cache) static inline void ipmr_cache_put(struct mfc_cache *c) { - if (refcount_dec_and_test(&c->mfc_un.res.refcount)) + if (refcount_dec_and_test(&c->_c.mfc_un.res.refcount)) ipmr_cache_free(c); } static inline void ipmr_cache_hold(struct mfc_cache *c) { - refcount_inc(&c->mfc_un.res.refcount); + refcount_inc(&c->_c.mfc_un.res.refcount); } #endif diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index 3014c52bfd86..1ac38e6819f5 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -7,6 +7,7 @@ #include <linux/skbuff.h> /* for struct sk_buff_head */ #include <net/net_namespace.h> #include <uapi/linux/mroute6.h> +#include <linux/mroute_base.h> #ifdef CONFIG_IPV6_MROUTE static inline int ip6_mroute_opt(int opt) @@ -62,57 +63,24 @@ static inline void ip6_mr_cleanup(void) } #endif -struct mif_device { - struct net_device *dev; /* Device we are using */ - unsigned long bytes_in,bytes_out; - unsigned long pkt_in,pkt_out; /* Statistics */ - unsigned long rate_limit; /* Traffic shaping (NI) */ - unsigned char threshold; /* TTL threshold */ - unsigned short flags; /* Control flags */ - int link; /* Physical interface index */ -}; - #define VIFF_STATIC 0x8000 -struct mfc6_cache { - struct list_head list; - struct in6_addr mf6c_mcastgrp; /* Group the entry belongs to */ - struct in6_addr mf6c_origin; /* Source of packet */ - mifi_t mf6c_parent; /* Source interface */ - int mfc_flags; /* Flags on line */ +struct mfc6_cache_cmp_arg { + struct in6_addr mf6c_mcastgrp; + struct in6_addr mf6c_origin; +}; +struct mfc6_cache { + struct mr_mfc _c; union { struct { - unsigned long expires; - struct sk_buff_head unresolved; /* Unresolved buffers */ - } unres; - struct { - unsigned long last_assert; - int minvif; - int maxvif; - unsigned long bytes; - unsigned long pkt; - unsigned long wrong_if; - unsigned long lastuse; - unsigned char ttls[MAXMIFS]; /* TTL thresholds */ - } res; - } mfc_un; + struct in6_addr mf6c_mcastgrp; + struct in6_addr mf6c_origin; + }; + struct mfc6_cache_cmp_arg cmparg; + }; }; -#define MFC_STATIC 1 -#define MFC_NOTIFY 2 - -#define MFC6_LINES 64 - -#define MFC6_HASH(a, g) (((__force u32)(a)->s6_addr32[0] ^ \ - (__force u32)(a)->s6_addr32[1] ^ \ - (__force u32)(a)->s6_addr32[2] ^ \ - (__force u32)(a)->s6_addr32[3] ^ \ - (__force u32)(g)->s6_addr32[0] ^ \ - (__force u32)(g)->s6_addr32[1] ^ \ - (__force u32)(g)->s6_addr32[2] ^ \ - (__force u32)(g)->s6_addr32[3]) % MFC6_LINES) - #define MFC_ASSERT_THRESH (3*HZ) /* Maximal freq. of asserts */ struct rtmsg; @@ -120,12 +88,12 @@ extern int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, u32 portid); #ifdef CONFIG_IPV6_MROUTE -extern struct sock *mroute6_socket(struct net *net, struct sk_buff *skb); +bool mroute6_is_socket(struct net *net, struct sk_buff *skb); extern int ip6mr_sk_done(struct sock *sk); #else -static inline struct sock *mroute6_socket(struct net *net, struct sk_buff *skb) +static inline bool mroute6_is_socket(struct net *net, struct sk_buff *skb) { - return NULL; + return false; } static inline int ip6mr_sk_done(struct sock *sk) { diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h new file mode 100644 index 000000000000..c2560cb50f1d --- /dev/null +++ b/include/linux/mroute_base.h @@ -0,0 +1,346 @@ +#ifndef __LINUX_MROUTE_BASE_H +#define __LINUX_MROUTE_BASE_H + +#include <linux/netdevice.h> +#include <linux/rhashtable.h> +#include <linux/spinlock.h> +#include <net/net_namespace.h> +#include <net/sock.h> + +/** + * struct vif_device - interface representor for multicast routing + * @dev: network device being used + * @bytes_in: statistic; bytes ingressing + * @bytes_out: statistic; bytes egresing + * @pkt_in: statistic; packets ingressing + * @pkt_out: statistic; packets egressing + * @rate_limit: Traffic shaping (NI) + * @threshold: TTL threshold + * @flags: Control flags + * @link: Physical interface index + * @dev_parent_id: device parent id + * @local: Local address + * @remote: Remote address for tunnels + */ +struct vif_device { + struct net_device *dev; + unsigned long bytes_in, bytes_out; + unsigned long pkt_in, pkt_out; + unsigned long rate_limit; + unsigned char threshold; + unsigned short flags; + int link; + + /* Currently only used by ipmr */ + struct netdev_phys_item_id dev_parent_id; + __be32 local, remote; +}; + +#ifndef MAXVIFS +/* This one is nasty; value is defined in uapi using different symbols for + * mroute and morute6 but both map into same 32. + */ +#define MAXVIFS 32 +#endif + +#define VIF_EXISTS(_mrt, _idx) (!!((_mrt)->vif_table[_idx].dev)) + +/* mfc_flags: + * MFC_STATIC - the entry was added statically (not by a routing daemon) + * MFC_OFFLOAD - the entry was offloaded to the hardware + */ +enum { + MFC_STATIC = BIT(0), + MFC_OFFLOAD = BIT(1), +}; + +/** + * struct mr_mfc - common multicast routing entries + * @mnode: rhashtable list + * @mfc_parent: source interface (iif) + * @mfc_flags: entry flags + * @expires: unresolved entry expire time + * @unresolved: unresolved cached skbs + * @last_assert: time of last assert + * @minvif: minimum VIF id + * @maxvif: maximum VIF id + * @bytes: bytes that have passed for this entry + * @pkt: packets that have passed for this entry + * @wrong_if: number of wrong source interface hits + * @lastuse: time of last use of the group (traffic or update) + * @ttls: OIF TTL threshold array + * @refcount: reference count for this entry + * @list: global entry list + * @rcu: used for entry destruction + */ +struct mr_mfc { + struct rhlist_head mnode; + unsigned short mfc_parent; + int mfc_flags; + + union { + struct { + unsigned long expires; + struct sk_buff_head unresolved; + } unres; + struct { + unsigned long last_assert; + int minvif; + int maxvif; + unsigned long bytes; + unsigned long pkt; + unsigned long wrong_if; + unsigned long lastuse; + unsigned char ttls[MAXVIFS]; + refcount_t refcount; + } res; + } mfc_un; + struct list_head list; + struct rcu_head rcu; +}; + +struct mr_table; + +/** + * struct mr_table_ops - callbacks and info for protocol-specific ops + * @rht_params: parameters for accessing the MFC hash + * @cmparg_any: a hash key to be used for matching on (*,*) routes + */ +struct mr_table_ops { + const struct rhashtable_params *rht_params; + void *cmparg_any; +}; + +/** + * struct mr_table - a multicast routing table + * @list: entry within a list of multicast routing tables + * @net: net where this table belongs + * @ops: protocol specific operations + * @id: identifier of the table + * @mroute_sk: socket associated with the table + * @ipmr_expire_timer: timer for handling unresolved routes + * @mfc_unres_queue: list of unresolved MFC entries + * @vif_table: array containing all possible vifs + * @mfc_hash: Hash table of all resolved routes for easy lookup + * @mfc_cache_list: list of resovled routes for possible traversal + * @maxvif: Identifier of highest value vif currently in use + * @cache_resolve_queue_len: current size of unresolved queue + * @mroute_do_assert: Whether to inform userspace on wrong ingress + * @mroute_do_pim: Whether to receive IGMP PIMv1 + * @mroute_reg_vif_num: PIM-device vif index + */ +struct mr_table { + struct list_head list; + possible_net_t net; + struct mr_table_ops ops; + u32 id; + struct sock __rcu *mroute_sk; + struct timer_list ipmr_expire_timer; + struct list_head mfc_unres_queue; + struct vif_device vif_table[MAXVIFS]; + struct rhltable mfc_hash; + struct list_head mfc_cache_list; + int maxvif; + atomic_t cache_resolve_queue_len; + bool mroute_do_assert; + bool mroute_do_pim; + int mroute_reg_vif_num; +}; + +#ifdef CONFIG_IP_MROUTE_COMMON +void vif_device_init(struct vif_device *v, + struct net_device *dev, + unsigned long rate_limit, + unsigned char threshold, + unsigned short flags, + unsigned short get_iflink_mask); + +struct mr_table * +mr_table_alloc(struct net *net, u32 id, + struct mr_table_ops *ops, + void (*expire_func)(struct timer_list *t), + void (*table_set)(struct mr_table *mrt, + struct net *net)); + +/* These actually return 'struct mr_mfc *', but to avoid need for explicit + * castings they simply return void. + */ +void *mr_mfc_find_parent(struct mr_table *mrt, + void *hasharg, int parent); +void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi); +void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg); + +int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, + struct mr_mfc *c, struct rtmsg *rtm); +int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, + struct mr_table *(*iter)(struct net *net, + struct mr_table *mrt), + int (*fill)(struct mr_table *mrt, + struct sk_buff *skb, + u32 portid, u32 seq, struct mr_mfc *c, + int cmd, int flags), + spinlock_t *lock); +#else +static inline void vif_device_init(struct vif_device *v, + struct net_device *dev, + unsigned long rate_limit, + unsigned char threshold, + unsigned short flags, + unsigned short get_iflink_mask) +{ +} + +static inline void * +mr_table_alloc(struct net *net, u32 id, + struct mr_table_ops *ops, + void (*expire_func)(struct timer_list *t), + void (*table_set)(struct mr_table *mrt, + struct net *net)) +{ + return NULL; +} + +static inline void *mr_mfc_find_parent(struct mr_table *mrt, + void *hasharg, int parent) +{ + return NULL; +} + +static inline void *mr_mfc_find_any_parent(struct mr_table *mrt, + int vifi) +{ + return NULL; +} + +static inline struct mr_mfc *mr_mfc_find_any(struct mr_table *mrt, + int vifi, void *hasharg) +{ + return NULL; +} + +static inline int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, + struct mr_mfc *c, struct rtmsg *rtm) +{ + return -EINVAL; +} + +static inline int +mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, + struct mr_table *(*iter)(struct net *net, + struct mr_table *mrt), + int (*fill)(struct mr_table *mrt, + struct sk_buff *skb, + u32 portid, u32 seq, struct mr_mfc *c, + int cmd, int flags), + spinlock_t *lock) +{ + return -EINVAL; +} +#endif + +static inline void *mr_mfc_find(struct mr_table *mrt, void *hasharg) +{ + return mr_mfc_find_parent(mrt, hasharg, -1); +} + +#ifdef CONFIG_PROC_FS +struct mr_vif_iter { + struct seq_net_private p; + struct mr_table *mrt; + int ct; +}; + +struct mr_mfc_iter { + struct seq_net_private p; + struct mr_table *mrt; + struct list_head *cache; + + /* Lock protecting the mr_table's unresolved queue */ + spinlock_t *lock; +}; + +#ifdef CONFIG_IP_MROUTE_COMMON +void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos); +void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos); + +static inline void *mr_vif_seq_start(struct seq_file *seq, loff_t *pos) +{ + return *pos ? mr_vif_seq_idx(seq_file_net(seq), + seq->private, *pos - 1) + : SEQ_START_TOKEN; +} + +/* These actually return 'struct mr_mfc *', but to avoid need for explicit + * castings they simply return void. + */ +void *mr_mfc_seq_idx(struct net *net, + struct mr_mfc_iter *it, loff_t pos); +void *mr_mfc_seq_next(struct seq_file *seq, void *v, + loff_t *pos); + +static inline void *mr_mfc_seq_start(struct seq_file *seq, loff_t *pos, + struct mr_table *mrt, spinlock_t *lock) +{ + struct mr_mfc_iter *it = seq->private; + + it->mrt = mrt; + it->cache = NULL; + it->lock = lock; + + return *pos ? mr_mfc_seq_idx(seq_file_net(seq), + seq->private, *pos - 1) + : SEQ_START_TOKEN; +} + +static inline void mr_mfc_seq_stop(struct seq_file *seq, void *v) +{ + struct mr_mfc_iter *it = seq->private; + struct mr_table *mrt = it->mrt; + + if (it->cache == &mrt->mfc_unres_queue) + spin_unlock_bh(it->lock); + else if (it->cache == &mrt->mfc_cache_list) + rcu_read_unlock(); +} +#else +static inline void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, + loff_t pos) +{ + return NULL; +} + +static inline void *mr_vif_seq_next(struct seq_file *seq, + void *v, loff_t *pos) +{ + return NULL; +} + +static inline void *mr_vif_seq_start(struct seq_file *seq, loff_t *pos) +{ + return NULL; +} + +static inline void *mr_mfc_seq_idx(struct net *net, + struct mr_mfc_iter *it, loff_t pos) +{ + return NULL; +} + +static inline void *mr_mfc_seq_next(struct seq_file *seq, void *v, + loff_t *pos) +{ + return NULL; +} + +static inline void *mr_mfc_seq_start(struct seq_file *seq, loff_t *pos, + struct mr_table *mrt, spinlock_t *lock) +{ + return NULL; +} + +static inline void mr_mfc_seq_stop(struct seq_file *seq, void *v) +{ +} +#endif +#endif +#endif diff --git a/include/linux/phy.h b/include/linux/phy.h index 5a0c3e53e7c2..6e38c699b753 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -994,6 +994,14 @@ int genphy_c45_pma_setup_forced(struct phy_device *phydev); int genphy_c45_an_disable_aneg(struct phy_device *phydev); int genphy_c45_read_mdix(struct phy_device *phydev); +/* The gen10g_* functions are the old Clause 45 stub */ +int gen10g_config_aneg(struct phy_device *phydev); +int gen10g_read_status(struct phy_device *phydev); +int gen10g_no_soft_reset(struct phy_device *phydev); +int gen10g_config_init(struct phy_device *phydev); +int gen10g_suspend(struct phy_device *phydev); +int gen10g_resume(struct phy_device *phydev); + static inline int phy_read_status(struct phy_device *phydev) { if (!phydev->drv) diff --git a/include/linux/sfp.h b/include/linux/sfp.h index e724d5a3dd80..ebce9e24906a 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -422,10 +422,11 @@ struct sfp_upstream_ops { #if IS_ENABLED(CONFIG_SFP) int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned long *support); -phy_interface_t sfp_parse_interface(struct sfp_bus *bus, - const struct sfp_eeprom_id *id); void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned long *support); +phy_interface_t sfp_select_interface(struct sfp_bus *bus, + const struct sfp_eeprom_id *id, + unsigned long *link_modes); int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo); int sfp_get_module_eeprom(struct sfp_bus *bus, struct ethtool_eeprom *ee, @@ -444,18 +445,19 @@ static inline int sfp_parse_port(struct sfp_bus *bus, return PORT_OTHER; } -static inline phy_interface_t sfp_parse_interface(struct sfp_bus *bus, - const struct sfp_eeprom_id *id) -{ - return PHY_INTERFACE_MODE_NA; -} - static inline void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, unsigned long *support) { } +static inline phy_interface_t sfp_select_interface(struct sfp_bus *bus, + const struct sfp_eeprom_id *id, + unsigned long *link_modes) +{ + return PHY_INTERFACE_MODE_NA; +} + static inline int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo) { diff --git a/include/net/Space.h b/include/net/Space.h index 336da258885a..9cce0d80d37a 100644 --- a/include/net/Space.h +++ b/include/net/Space.h @@ -20,7 +20,6 @@ struct net_device *cs89x0_probe(int unit); struct net_device *mvme147lance_probe(int unit); struct net_device *tc515_probe(int unit); struct net_device *lance_probe(int unit); -struct net_device *mac89x0_probe(int unit); struct net_device *cops_probe(int unit); struct net_device *ltpc_probe(void); diff --git a/include/net/ethoc.h b/include/net/ethoc.h index bb7f467da7fc..29ba069a1d93 100644 --- a/include/net/ethoc.h +++ b/include/net/ethoc.h @@ -21,4 +21,3 @@ struct ethoc_platform_data { }; #endif /* !LINUX_NET_ETHOC_H */ - diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index b3d216249240..1c9e17c11953 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -27,7 +27,7 @@ struct fib_rule { u8 action; u8 l3mdev; u8 proto; - /* 1 byte hole, try to use */ + u8 ip_proto; u32 target; __be64 tun_id; struct fib_rule __rcu *ctarget; @@ -40,6 +40,8 @@ struct fib_rule { char iifname[IFNAMSIZ]; char oifname[IFNAMSIZ]; struct fib_kuid_range uid_range; + struct fib_rule_port_range sport_range; + struct fib_rule_port_range dport_range; struct rcu_head rcu; }; @@ -110,7 +112,11 @@ struct fib_rule_notifier_info { [FRA_GOTO] = { .type = NLA_U32 }, \ [FRA_L3MDEV] = { .type = NLA_U8 }, \ [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) }, \ - [FRA_PROTOCOL] = { .type = NLA_U8 } + [FRA_PROTOCOL] = { .type = NLA_U8 }, \ + [FRA_IP_PROTO] = { .type = NLA_U8 }, \ + [FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, \ + [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) } + static inline void fib_rule_get(struct fib_rule *rule) { @@ -144,6 +150,38 @@ static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla) return frh->table; } +static inline bool fib_rule_port_range_set(const struct fib_rule_port_range *range) +{ + return range->start != 0 && range->end != 0; +} + +static inline bool fib_rule_port_inrange(const struct fib_rule_port_range *a, + __be16 port) +{ + return ntohs(port) >= a->start && + ntohs(port) <= a->end; +} + +static inline bool fib_rule_port_range_valid(const struct fib_rule_port_range *a) +{ + return a->start != 0 && a->end != 0 && a->end < 0xffff && + a->start <= a->end; +} + +static inline bool fib_rule_port_range_compare(struct fib_rule_port_range *a, + struct fib_rule_port_range *b) +{ + return a->start == b->start && + a->end == b->end; +} + +static inline bool fib_rule_requires_fldissect(struct fib_rule *rule) +{ + return rule->ip_proto || + fib_rule_port_range_set(&rule->sport_range) || + fib_rule_port_range_set(&rule->dport_range); +} + struct fib_rules_ops *fib_rules_register(const struct fib_rules_ops *, struct net *); void fib_rules_unregister(struct fib_rules_ops *); diff --git a/include/net/flow.h b/include/net/flow.h index f1624fd5b1d0..64e7ee9cb980 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -125,7 +125,7 @@ static inline void flowi4_update_output(struct flowi4 *fl4, int oif, __u8 tos, fl4->daddr = daddr; fl4->saddr = saddr; } - + struct flowi6 { struct flowi_common __fl_common; diff --git a/include/net/gre.h b/include/net/gre.h index f90585decbce..797142eee9cd 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -37,6 +37,9 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, bool *csum_err, __be16 proto, int nhs); +bool is_gretap_dev(const struct net_device *dev); +bool is_ip6gretap_dev(const struct net_device *dev); + static inline int gre_calc_hlen(__be16 o_flags) { int addend = 4; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index c1a93ce35e62..b68fea022a82 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -49,9 +49,9 @@ struct inet_connection_sock_af_ops { u16 net_header_len; u16 net_frag_header_len; u16 sockaddr_len; - int (*setsockopt)(struct sock *sk, int level, int optname, + int (*setsockopt)(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); - int (*getsockopt)(struct sock *sk, int level, int optname, + int (*getsockopt)(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); #ifdef CONFIG_COMPAT int (*compat_setsockopt)(struct sock *sk, @@ -67,7 +67,7 @@ struct inet_connection_sock_af_ops { /** inet_connection_sock - INET connection oriented sock * - * @icsk_accept_queue: FIFO of established children + * @icsk_accept_queue: FIFO of established children * @icsk_bind_hash: Bind node * @icsk_timeout: Timeout * @icsk_retransmit_timer: Resend (no ack) @@ -122,7 +122,7 @@ struct inet_connection_sock { unsigned long timeout; /* Currently scheduled timeout */ __u32 lrcvtime; /* timestamp of last received data packet */ __u16 last_seg_size; /* Size of last incoming segment */ - __u16 rcv_mss; /* MSS used for delayed ACK decisions */ + __u16 rcv_mss; /* MSS used for delayed ACK decisions */ } icsk_ack; struct { int enabled; @@ -201,7 +201,7 @@ extern const char inet_csk_timer_bug_msg[]; static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) { struct inet_connection_sock *icsk = inet_csk(sk); - + if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { icsk->icsk_pending = 0; #ifdef INET_CSK_CLEAR_TIMERS diff --git a/include/net/ip.h b/include/net/ip.h index 746abff9ce51..fe63ba95d12b 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -186,15 +186,15 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); void ip4_datagram_release_cb(struct sock *sk); struct ip_reply_arg { - struct kvec iov[1]; + struct kvec iov[1]; int flags; __wsum csum; int csumoffset; /* u16 offset of csum in iov[0].iov_base */ - /* -1 if not needed */ + /* -1 if not needed */ int bound_dev_if; u8 tos; kuid_t uid; -}; +}; #define IP_REPLY_ARG_NOSRCCHECK 1 @@ -577,13 +577,13 @@ int ip_frag_mem(struct net *net); /* * Functions provided by ip_forward.c */ - + int ip_forward(struct sk_buff *skb); - + /* * Functions provided by ip_options.c */ - + void ip_options_build(struct sk_buff *skb, struct ip_options *opt, __be32 daddr, struct rtable *rt, int is_frag); diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 34ec321d6a03..8d906a35b534 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -415,6 +415,24 @@ void fib6_rules_cleanup(void); bool fib6_rule_default(const struct fib_rule *rule); int fib6_rules_dump(struct net *net, struct notifier_block *nb); unsigned int fib6_rules_seq_read(struct net *net); + +static inline bool fib6_rules_early_flow_dissect(struct net *net, + struct sk_buff *skb, + struct flowi6 *fl6, + struct flow_keys *flkeys) +{ + unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; + + if (!net->ipv6.fib6_rules_require_fldissect) + return false; + + skb_flow_dissect_flow_keys(skb, flkeys, flag); + fl6->fl6_sport = flkeys->ports.src; + fl6->fl6_dport = flkeys->ports.dst; + fl6->flowi6_proto = flkeys->basic.ip_proto; + + return true; +} #else static inline int fib6_rules_init(void) { @@ -436,5 +454,12 @@ static inline unsigned int fib6_rules_seq_read(struct net *net) { return 0; } +static inline bool fib6_rules_early_flow_dissect(struct net *net, + struct sk_buff *skb, + struct flowi6 *fl6, + struct flow_keys *flkeys) +{ + return false; +} #endif #endif diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 27d23a65f3cd..da2bde5fda8f 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -127,7 +127,8 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr, int oif, int flags); -u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb); +u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb, + struct flow_keys *hkeys); struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6); @@ -266,4 +267,5 @@ static inline bool rt6_duplicate_nexthop(struct rt6_info *a, struct rt6_info *b) ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) && !lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate); } + #endif diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index f80524396c06..8812582a94d5 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -157,7 +157,7 @@ struct fib_result_nl { unsigned char nh_sel; unsigned char type; unsigned char scope; - int err; + int err; }; #ifdef CONFIG_IP_ROUTE_MULTIPATH @@ -293,6 +293,13 @@ static inline unsigned int fib4_rules_seq_read(struct net *net) return 0; } +static inline bool fib4_rules_early_flow_dissect(struct net *net, + struct sk_buff *skb, + struct flowi4 *fl4, + struct flow_keys *flkeys) +{ + return false; +} #else /* CONFIG_IP_MULTIPLE_TABLES */ int __net_init fib4_rules_init(struct net *net); void __net_exit fib4_rules_exit(struct net *net); @@ -341,6 +348,24 @@ bool fib4_rule_default(const struct fib_rule *rule); int fib4_rules_dump(struct net *net, struct notifier_block *nb); unsigned int fib4_rules_seq_read(struct net *net); +static inline bool fib4_rules_early_flow_dissect(struct net *net, + struct sk_buff *skb, + struct flowi4 *fl4, + struct flow_keys *flkeys) +{ + unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; + + if (!net->ipv4.fib_rules_require_fldissect) + return false; + + skb_flow_dissect_flow_keys(skb, flkeys, flag); + fl4->fl4_sport = flkeys->ports.src; + fl4->fl4_dport = flkeys->ports.dst; + fl4->flowi4_proto = flkeys->basic.ip_proto; + + return true; +} + #endif /* CONFIG_IP_MULTIPLE_TABLES */ /* Exported by fib_frontend.c */ @@ -371,7 +396,7 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags); #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, - const struct sk_buff *skb); + const struct sk_buff *skb, struct flow_keys *flkeys); #endif void fib_select_multipath(struct fib_result *res, int hash); void fib_select_path(struct net *net, struct fib_result *res, diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 1f16773cfd76..cbe5addb9293 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -254,6 +254,22 @@ static inline __be32 tunnel_id_to_key32(__be64 tun_id) #ifdef CONFIG_INET +static inline void ip_tunnel_init_flow(struct flowi4 *fl4, + int proto, + __be32 daddr, __be32 saddr, + __be32 key, __u8 tos, int oif, + __u32 mark) +{ + memset(fl4, 0, sizeof(*fl4)); + fl4->flowi4_oif = oif; + fl4->daddr = daddr; + fl4->saddr = saddr; + fl4->flowi4_tos = tos; + fl4->flowi4_proto = proto; + fl4->fl4_gre_key = key; + fl4->flowi4_mark = mark; +} + int ip_tunnel_init(struct net_device *dev); void ip_tunnel_uninit(struct net_device *dev); void ip_tunnel_dellink(struct net_device *dev, struct list_head *head); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 7a98cd583c73..cabd3cdd4015 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -105,8 +105,8 @@ #define IPV6_ADDR_ANY 0x0000U -#define IPV6_ADDR_UNICAST 0x0001U -#define IPV6_ADDR_MULTICAST 0x0002U +#define IPV6_ADDR_UNICAST 0x0001U +#define IPV6_ADDR_MULTICAST 0x0002U #define IPV6_ADDR_LOOPBACK 0x0010U #define IPV6_ADDR_LINKLOCAL 0x0020U @@ -447,7 +447,7 @@ ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m, #endif } -static inline void ipv6_addr_prefix(struct in6_addr *pfx, +static inline void ipv6_addr_prefix(struct in6_addr *pfx, const struct in6_addr *addr, int plen) { @@ -496,7 +496,7 @@ static inline void __ipv6_addr_set_half(__be32 *addr, addr[1] = wl; } -static inline void ipv6_addr_set(struct in6_addr *addr, +static inline void ipv6_addr_set(struct in6_addr *addr, __be32 w1, __be32 w2, __be32 w3, __be32 w4) { @@ -732,7 +732,7 @@ static inline int __ipv6_addr_diff32(const void *token1, const void *token2, int } /* - * we should *never* get to this point since that + * we should *never* get to this point since that * would mean the addrs are equal * * However, we do get to it 8) And exacly, when diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 44668c29701a..3a970e429ab6 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -52,6 +52,7 @@ struct netns_ipv4 { #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_rules_ops *rules_ops; bool fib_has_custom_rules; + unsigned int fib_rules_require_fldissect; struct fib_table __rcu *fib_main; struct fib_table __rcu *fib_default; #endif diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 987cc4569cb8..e286fda09fcf 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -71,7 +71,8 @@ struct netns_ipv6 { unsigned int ip6_rt_gc_expire; unsigned long ip6_rt_last_gc; #ifdef CONFIG_IPV6_MULTIPLE_TABLES - bool fib6_has_custom_rules; + unsigned int fib6_rules_require_fldissect; + bool fib6_has_custom_rules; struct rt6_info *ip6_prohibit_entry; struct rt6_info *ip6_blk_hole_entry; struct fib6_table *fib6_local_tbl; @@ -84,7 +85,7 @@ struct netns_ipv6 { struct sock *mc_autojoin_sk; #ifdef CONFIG_IPV6_MROUTE #ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES - struct mr6_table *mrt6; + struct mr_table *mrt6; #else struct list_head mr6_tables; struct fib_rules_ops *mr6_rules_ops; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 87406252f0a3..e828d31be5da 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -806,6 +806,7 @@ enum tc_prio_command { TC_PRIO_REPLACE, TC_PRIO_DESTROY, TC_PRIO_STATS, + TC_PRIO_GRAFT, }; struct tc_prio_qopt_offload_params { @@ -818,6 +819,11 @@ struct tc_prio_qopt_offload_params { struct gnet_stats_queue *qstats; }; +struct tc_prio_qopt_offload_graft_params { + u8 band; + u32 child_handle; +}; + struct tc_prio_qopt_offload { enum tc_prio_command command; u32 handle; @@ -825,6 +831,8 @@ struct tc_prio_qopt_offload { union { struct tc_prio_qopt_offload_params replace_params; struct tc_qopt_offload_stats stats; + struct tc_prio_qopt_offload_graft_params graft_params; }; }; + #endif diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index e2ab13687fb9..d4907b584b38 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -540,7 +540,7 @@ static inline bool skb_skip_tc_classify(struct sk_buff *skb) return false; } -/* Reset all TX qdiscs greater then index of a device. */ +/* Reset all TX qdiscs greater than index of a device. */ static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i) { struct Qdisc *qdisc; diff --git a/include/net/tcp.h b/include/net/tcp.h index 92b06c6e7732..9c9b3768b350 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -511,8 +511,6 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss); #endif /* tcp_output.c */ -u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, - int min_tso_segs); void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, int nonagle); int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs); @@ -981,8 +979,8 @@ struct tcp_congestion_ops { u32 (*undo_cwnd)(struct sock *sk); /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); - /* suggest number of segments for each skb to transmit (optional) */ - u32 (*tso_segs_goal)(struct sock *sk); + /* override sysctl_tcp_min_tso_segs */ + u32 (*min_tso_segs)(struct sock *sk); /* returns the multiplier used in tcp_sndbuf_expand (optional) */ u32 (*sndbuf_expand)(struct sock *sk); /* call when packets are delivered to update cwnd and pacing rate, diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 7d2077665c0b..aa027ba1d032 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1267,12 +1267,12 @@ static inline void xfrm_sk_free_policy(struct sock *sk) static inline void xfrm_sk_free_policy(struct sock *sk) {} static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) { return 0; } -static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; } -static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; } +static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; } +static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; } static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *skb) -{ - return 1; -} +{ + return 1; +} static inline int xfrm4_policy_check(struct sock *sk, int dir, struct sk_buff *skb) { return 1; @@ -1356,7 +1356,7 @@ __xfrm6_state_addr_check(const struct xfrm_state *x, { if (ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)&x->id.daddr) && (ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)&x->props.saddr) || - ipv6_addr_any((struct in6_addr *)saddr) || + ipv6_addr_any((struct in6_addr *)saddr) || ipv6_addr_any((struct in6_addr *)&x->props.saddr))) return 1; return 0; @@ -1666,7 +1666,7 @@ int xfrm_user_policy(struct sock *sk, int optname, static inline int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen) { return -ENOPROTOOPT; -} +} static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) { diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h index 28812eda4209..dc64cfaf13da 100644 --- a/include/uapi/linux/errqueue.h +++ b/include/uapi/linux/errqueue.h @@ -20,13 +20,11 @@ struct sock_extended_err { #define SO_EE_ORIGIN_ICMP6 3 #define SO_EE_ORIGIN_TXSTATUS 4 #define SO_EE_ORIGIN_ZEROCOPY 5 -#define SO_EE_ORIGIN_ZCOOKIE 6 #define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS #define SO_EE_OFFENDER(ee) ((struct sockaddr*)((ee)+1)) #define SO_EE_CODE_ZEROCOPY_COPIED 1 -#define SO_EE_ORIGIN_MAX_ZCOOKIES 8 /** * struct scm_timestamping - timestamps exposed through cmsg diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index 77d90ae38114..232df14e1287 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -35,6 +35,11 @@ struct fib_rule_uid_range { __u32 end; }; +struct fib_rule_port_range { + __u16 start; + __u16 end; +}; + enum { FRA_UNSPEC, FRA_DST, /* destination address */ @@ -59,6 +64,9 @@ enum { FRA_L3MDEV, /* iif or oif is l3mdev goto its table */ FRA_UID_RANGE, /* UID range */ FRA_PROTOCOL, /* Originator of the rule */ + FRA_IP_PROTO, /* ip proto */ + FRA_SPORT_RANGE, /* sport */ + FRA_DPORT_RANGE, /* dport */ __FRA_MAX }; diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h index 12e3bca32cad..a66b213de3d7 100644 --- a/include/uapi/linux/rds.h +++ b/include/uapi/linux/rds.h @@ -104,6 +104,7 @@ #define RDS_CMSG_MASKED_ATOMIC_CSWP 9 #define RDS_CMSG_RXPATH_LATENCY 11 #define RDS_CMSG_ZCOPY_COOKIE 12 +#define RDS_CMSG_ZCOPY_COMPLETION 13 #define RDS_INFO_FIRST 10000 #define RDS_INFO_COUNTERS 10000 @@ -317,6 +318,12 @@ struct rds_rdma_notify { #define RDS_RDMA_DROPPED 3 #define RDS_RDMA_OTHER_ERROR 4 +#define RDS_MAX_ZCOOKIES 8 +struct rds_zcopy_cookies { + __u32 num; + __u32 cookies[RDS_MAX_ZCOOKIES]; +}; + /* * Common set of flags for all RDMA related structs */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5fb69a85d967..3c74b163eaeb 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -508,10 +508,6 @@ err: static const int caller_saved[CALLER_SAVED_REGS] = { BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5 }; -#define CALLEE_SAVED_REGS 5 -static const int callee_saved[CALLEE_SAVED_REGS] = { - BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9 -}; static void __mark_reg_not_init(struct bpf_reg_state *reg); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index bad01b14a4ad..bd0ed39f65fb 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -729,6 +729,7 @@ static struct pernet_operations vlan_net_ops = { .exit = vlan_exit_net, .id = &vlan_net_id, .size = sizeof(struct vlan_net), + .async = true, }; static int __init vlan_proto_init(void) diff --git a/net/bridge/br.c b/net/bridge/br.c index 6bf06e756df2..7770481a6506 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -188,6 +188,7 @@ static void __net_exit br_net_exit(struct net *net) static struct pernet_operations br_net_ops = { .exit = br_net_exit, + .async = true, }; static const struct stp_proto br_stp_proto = { diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 27f1d4f2114a..484f54150525 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -967,6 +967,7 @@ static struct pernet_operations brnf_net_ops __read_mostly = { .exit = brnf_exit_net, .id = &brnf_net_id, .size = sizeof(struct brnf_net), + .async = true, }; static struct notifier_block brnf_notifier __read_mostly = { diff --git a/net/can/bcm.c b/net/can/bcm.c index ac5e5e34fee3..26730d39e048 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1717,6 +1717,7 @@ static void canbcm_pernet_exit(struct net *net) static struct pernet_operations canbcm_pernet_ops __read_mostly = { .init = canbcm_pernet_init, .exit = canbcm_pernet_exit, + .async = true, }; static int __init bcm_module_init(void) diff --git a/net/core/dev.c b/net/core/dev.c index 5bdcc5a161fe..40fb3aed5df2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2378,7 +2378,7 @@ EXPORT_SYMBOL(netdev_set_num_tc); /* * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues - * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. + * greater than real_num_tx_queues stale skbs on the qdisc must be flushed. */ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) { diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index a6aea805a0a2..f6f04fc0f629 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -33,6 +33,10 @@ bool fib_rule_matchall(const struct fib_rule *rule) if (!uid_eq(rule->uid_range.start, fib_kuid_range_unset.start) || !uid_eq(rule->uid_range.end, fib_kuid_range_unset.end)) return false; + if (fib_rule_port_range_set(&rule->sport_range)) + return false; + if (fib_rule_port_range_set(&rule->dport_range)) + return false; return true; } EXPORT_SYMBOL_GPL(fib_rule_matchall); @@ -221,6 +225,26 @@ static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range) return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out); } +static int nla_get_port_range(struct nlattr *pattr, + struct fib_rule_port_range *port_range) +{ + const struct fib_rule_port_range *pr = nla_data(pattr); + + if (!fib_rule_port_range_valid(pr)) + return -EINVAL; + + port_range->start = pr->start; + port_range->end = pr->end; + + return 0; +} + +static int nla_put_port_range(struct sk_buff *skb, int attrtype, + struct fib_rule_port_range *range) +{ + return nla_put(skb, attrtype, sizeof(*range), range); +} + static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, struct flowi *fl, int flags, struct fib_lookup_arg *arg) @@ -425,6 +449,17 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh, !uid_eq(r->uid_range.end, rule->uid_range.end)) continue; + if (r->ip_proto != rule->ip_proto) + continue; + + if (!fib_rule_port_range_compare(&r->sport_range, + &rule->sport_range)) + continue; + + if (!fib_rule_port_range_compare(&r->dport_range, + &rule->dport_range)) + continue; + if (!ops->compare(r, frh, tb)) continue; return 1; @@ -569,6 +604,23 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, rule->uid_range = fib_kuid_range_unset; } + if (tb[FRA_IP_PROTO]) + rule->ip_proto = nla_get_u8(tb[FRA_IP_PROTO]); + + if (tb[FRA_SPORT_RANGE]) { + err = nla_get_port_range(tb[FRA_SPORT_RANGE], + &rule->sport_range); + if (err) + goto errout_free; + } + + if (tb[FRA_DPORT_RANGE]) { + err = nla_get_port_range(tb[FRA_DPORT_RANGE], + &rule->dport_range); + if (err) + goto errout_free; + } + if ((nlh->nlmsg_flags & NLM_F_EXCL) && rule_exists(ops, frh, tb, rule)) { err = -EEXIST; @@ -634,6 +686,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, { struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); + struct fib_rule_port_range sprange = {0, 0}; + struct fib_rule_port_range dprange = {0, 0}; struct fib_rules_ops *ops = NULL; struct fib_rule *rule, *r; struct nlattr *tb[FRA_MAX+1]; @@ -667,6 +721,20 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, range = fib_kuid_range_unset; } + if (tb[FRA_SPORT_RANGE]) { + err = nla_get_port_range(tb[FRA_SPORT_RANGE], + &sprange); + if (err) + goto errout; + } + + if (tb[FRA_DPORT_RANGE]) { + err = nla_get_port_range(tb[FRA_DPORT_RANGE], + &dprange); + if (err) + goto errout; + } + list_for_each_entry(rule, &ops->rules_list, list) { if (tb[FRA_PROTOCOL] && (rule->proto != nla_get_u8(tb[FRA_PROTOCOL]))) @@ -712,6 +780,18 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, !uid_eq(rule->uid_range.end, range.end))) continue; + if (tb[FRA_IP_PROTO] && + (rule->ip_proto != nla_get_u8(tb[FRA_IP_PROTO]))) + continue; + + if (fib_rule_port_range_set(&sprange) && + !fib_rule_port_range_compare(&rule->sport_range, &sprange)) + continue; + + if (fib_rule_port_range_set(&dprange) && + !fib_rule_port_range_compare(&rule->dport_range, &dprange)) + continue; + if (!ops->compare(rule, frh, tb)) continue; @@ -790,7 +870,10 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, + nla_total_size(4) /* FRA_FWMASK */ + nla_total_size_64bit(8) /* FRA_TUN_ID */ + nla_total_size(sizeof(struct fib_kuid_range)) - + nla_total_size(1); /* FRA_PROTOCOL */ + + nla_total_size(1) /* FRA_PROTOCOL */ + + nla_total_size(1) /* FRA_IP_PROTO */ + + nla_total_size(sizeof(struct fib_rule_port_range)) /* FRA_SPORT_RANGE */ + + nla_total_size(sizeof(struct fib_rule_port_range)); /* FRA_DPORT_RANGE */ if (ops->nlmsg_payload) payload += ops->nlmsg_payload(rule); @@ -855,7 +938,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, (rule->l3mdev && nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) || (uid_range_set(&rule->uid_range) && - nla_put_uid_range(skb, &rule->uid_range))) + nla_put_uid_range(skb, &rule->uid_range)) || + (fib_rule_port_range_set(&rule->sport_range) && + nla_put_port_range(skb, FRA_SPORT_RANGE, &rule->sport_range)) || + (fib_rule_port_range_set(&rule->dport_range) && + nla_put_port_range(skb, FRA_DPORT_RANGE, &rule->dport_range)) || + (rule->ip_proto && nla_put_u8(skb, FRA_IP_PROTO, rule->ip_proto))) goto nla_put_failure; if (rule->suppress_ifgroup != -1) { diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 27a55236ad64..690e78c6af45 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -362,7 +362,7 @@ static void dec_net_namespaces(struct ucounts *ucounts) dec_ucount(ucounts, UCOUNT_NET_NAMESPACES); } -static struct kmem_cache *net_cachep; +static struct kmem_cache *net_cachep __ro_after_init; static struct workqueue_struct *netns_wq; static struct net *net_alloc(void) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1a7485a2cdfa..96d36b81a3a5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -77,8 +77,8 @@ #include <linux/capability.h> #include <linux/user_namespace.h> -struct kmem_cache *skbuff_head_cache __read_mostly; -static struct kmem_cache *skbuff_fclone_cache __read_mostly; +struct kmem_cache *skbuff_head_cache __ro_after_init; +static struct kmem_cache *skbuff_fclone_cache __ro_after_init; int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS; EXPORT_SYMBOL(sysctl_max_skb_frags); diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 974765b7d92a..e4f305320519 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -104,6 +104,7 @@ static void lowpan_setup(struct net_device *ldev) /* We need an ipv6hdr as minimum len when calling xmit */ ldev->hard_header_len = sizeof(struct ipv6hdr); ldev->flags = IFF_BROADCAST | IFF_MULTICAST; + ldev->priv_flags |= IFF_NO_QUEUE; ldev->netdev_ops = &lowpan_netdev_ops; ldev->header_ops = &lowpan_header_ops; diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c index cb7176cd4cd6..9104943c15ba 100644 --- a/net/ieee802154/core.c +++ b/net/ieee802154/core.c @@ -345,6 +345,7 @@ static void __net_exit cfg802154_pernet_exit(struct net *net) static struct pernet_operations cfg802154_pernet_ops = { .exit = cfg802154_pernet_exit, + .async = true, }; static int __init wpan_phy_class_init(void) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index f48fe6fc7e8c..80dad301361d 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -212,9 +212,14 @@ config NET_IPGRE_BROADCAST Network), but can be distributed all over the Internet. If you want to do that, say Y here and to "IP multicast routing" below. +config IP_MROUTE_COMMON + bool + depends on IP_MROUTE || IPV6_MROUTE + config IP_MROUTE bool "IP: multicast routing" depends on IP_MULTICAST + select IP_MROUTE_COMMON help This is used if you want your machine to act as a router for IP packets that have several destination addresses. It is needed on the diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 47a0a6649a9d..a07b7dd06def 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o obj-$(CONFIG_IP_MROUTE) += ipmr.o +obj-$(CONFIG_IP_MROUTE_COMMON) += ipmr_base.o obj-$(CONFIG_NET_IPIP) += ipip.o gre-y := gre_demux.o obj-$(CONFIG_NET_FOU) += fou.o diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 35d646a62ad4..737d11bc8838 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -182,6 +182,17 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) if (r->tos && (r->tos != fl4->flowi4_tos)) return 0; + if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto)) + return 0; + + if (fib_rule_port_range_set(&rule->sport_range) && + !fib_rule_port_inrange(&rule->sport_range, fl4->fl4_sport)) + return 0; + + if (fib_rule_port_range_set(&rule->dport_range) && + !fib_rule_port_inrange(&rule->dport_range, fl4->fl4_dport)) + return 0; + return 1; } @@ -244,6 +255,9 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, } #endif + if (fib_rule_requires_fldissect(rule)) + net->ipv4.fib_rules_require_fldissect++; + rule4->src_len = frh->src_len; rule4->srcmask = inet_make_mask(rule4->src_len); rule4->dst_len = frh->dst_len; @@ -272,6 +286,10 @@ static int fib4_rule_delete(struct fib_rule *rule) net->ipv4.fib_num_tclassid_users--; #endif net->ipv4.fib_has_custom_rules = true; + + if (net->ipv4.fib_rules_require_fldissect && + fib_rule_requires_fldissect(rule)) + net->ipv4.fib_rules_require_fldissect--; errout: return err; } @@ -389,6 +407,7 @@ int __net_init fib4_rules_init(struct net *net) goto fail; net->ipv4.rules_ops = ops; net->ipv4.fib_has_custom_rules = false; + net->ipv4.fib_rules_require_fldissect = 0; return 0; fail: diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index cd46d7666598..181b0d8d589c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -171,7 +171,7 @@ static void free_nh_exceptions(struct fib_nh *nh) fnhe = rcu_dereference_protected(hash[i].chain, 1); while (fnhe) { struct fib_nh_exception *next; - + next = rcu_dereference_protected(fnhe->fnhe_next, 1); rt_fibinfo_free(&fnhe->fnhe_rth_input); @@ -1770,7 +1770,7 @@ void fib_select_path(struct net *net, struct fib_result *res, #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res->fi->fib_nhs > 1) { - int h = fib_multipath_hash(res->fi, fl4, skb); + int h = fib_multipath_hash(res->fi, fl4, skb, NULL); fib_select_multipath(res, h); } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 5530cd6fdbc7..62243a8abf92 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -50,6 +50,7 @@ #define VERSION "0.409" +#include <linux/cache.h> #include <linux/uaccess.h> #include <linux/bitops.h> #include <linux/types.h> @@ -191,8 +192,8 @@ static size_t tnode_free_size; */ static const int sync_pages = 128; -static struct kmem_cache *fn_alias_kmem __read_mostly; -static struct kmem_cache *trie_leaf_kmem __read_mostly; +static struct kmem_cache *fn_alias_kmem __ro_after_init; +static struct kmem_cache *trie_leaf_kmem __ro_after_init; static inline struct tnode *tn_info(struct key_vector *kv) { diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 914d56928578..1f04bd91fc2e 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -6,6 +6,7 @@ * Authors: Andrey V. Savochkin <saw@msu.ru> */ +#include <linux/cache.h> #include <linux/module.h> #include <linux/types.h> #include <linux/slab.h> @@ -51,7 +52,7 @@ * daddr: unchangeable */ -static struct kmem_cache *peer_cachep __read_mostly; +static struct kmem_cache *peer_cachep __ro_after_init; void inet_peer_base_init(struct inet_peer_base *bp) { diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 45d97e9b2759..0fe1d69b5df4 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1044,6 +1044,7 @@ static struct pernet_operations ipgre_net_ops = { .exit_batch = ipgre_exit_batch_net, .id = &ipgre_net_id, .size = sizeof(struct ip_tunnel_net), + .async = true, }; static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[], @@ -1322,6 +1323,12 @@ static void ipgre_tap_setup(struct net_device *dev) ip_tunnel_setup(dev, gre_tap_net_id); } +bool is_gretap_dev(const struct net_device *dev) +{ + return dev->netdev_ops == &gre_tap_netdev_ops; +} +EXPORT_SYMBOL_GPL(is_gretap_dev); + static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) @@ -1623,6 +1630,7 @@ static struct pernet_operations ipgre_tap_net_ops = { .exit_batch = ipgre_tap_exit_batch_net, .id = &gre_tap_net_id, .size = sizeof(struct ip_tunnel_net), + .async = true, }; static int __net_init erspan_init_net(struct net *net) @@ -1641,6 +1649,7 @@ static struct pernet_operations erspan_net_ops = { .exit_batch = erspan_exit_batch_net, .id = &erspan_net_id, .size = sizeof(struct ip_tunnel_net), + .async = true, }; static int __init ipgre_init(void) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index d786a8441bce..b2117d89bc83 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -290,22 +290,6 @@ failed: return ERR_PTR(err); } -static inline void init_tunnel_flow(struct flowi4 *fl4, - int proto, - __be32 daddr, __be32 saddr, - __be32 key, __u8 tos, int oif, - __u32 mark) -{ - memset(fl4, 0, sizeof(*fl4)); - fl4->flowi4_oif = oif; - fl4->daddr = daddr; - fl4->saddr = saddr; - fl4->flowi4_tos = tos; - fl4->flowi4_proto = proto; - fl4->fl4_gre_key = key; - fl4->flowi4_mark = mark; -} - static int ip_tunnel_bind_dev(struct net_device *dev) { struct net_device *tdev = NULL; @@ -322,10 +306,10 @@ static int ip_tunnel_bind_dev(struct net_device *dev) struct flowi4 fl4; struct rtable *rt; - init_tunnel_flow(&fl4, iph->protocol, iph->daddr, - iph->saddr, tunnel->parms.o_key, - RT_TOS(iph->tos), tunnel->parms.link, - tunnel->fwmark); + ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr, + iph->saddr, tunnel->parms.o_key, + RT_TOS(iph->tos), tunnel->parms.link, + tunnel->fwmark); rt = ip_route_output_key(tunnel->net, &fl4); if (!IS_ERR(rt)) { @@ -581,8 +565,8 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto) else if (skb->protocol == htons(ETH_P_IPV6)) tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); } - init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0, - RT_TOS(tos), tunnel->parms.link, tunnel->fwmark); + ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0, + RT_TOS(tos), tunnel->parms.link, tunnel->fwmark); if (tunnel->encap.type != TUNNEL_ENCAP_NONE) goto tx_error; rt = ip_route_output_key(tunnel->net, &fl4); @@ -711,14 +695,14 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } if (tunnel->fwmark) { - init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, - tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link, - tunnel->fwmark); + ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr, + tunnel->parms.o_key, RT_TOS(tos), + tunnel->parms.link, tunnel->fwmark); } else { - init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, - tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link, - skb->mark); + ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr, + tunnel->parms.o_key, RT_TOS(tos), + tunnel->parms.link, skb->mark); } if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 51b1669334fe..b10bf563afd9 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -454,6 +454,7 @@ static struct pernet_operations vti_net_ops = { .exit_batch = vti_exit_batch_net, .id = &vti_net_id, .size = sizeof(struct ip_tunnel_net), + .async = true, }; static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[], diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index c891235b4966..9c5a4d164f09 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -669,6 +669,7 @@ static struct pernet_operations ipip_net_ops = { .exit_batch = ipip_exit_batch_net, .id = &ipip_net_id, .size = sizeof(struct ip_tunnel_net), + .async = true, }; static int __init ipip_init(void) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 7c7ac9d32e77..d752a70855d8 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -28,9 +28,9 @@ #include <linux/uaccess.h> #include <linux/types.h> +#include <linux/cache.h> #include <linux/capability.h> #include <linux/errno.h> -#include <linux/timer.h> #include <linux/mm.h> #include <linux/kernel.h> #include <linux/fcntl.h> @@ -52,7 +52,6 @@ #include <net/protocol.h> #include <linux/skbuff.h> #include <net/route.h> -#include <net/sock.h> #include <net/icmp.h> #include <net/udp.h> #include <net/raw.h> @@ -96,7 +95,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock); * In this case data path is free of exclusive locks at all. */ -static struct kmem_cache *mrt_cachep __read_mostly; +static struct kmem_cache *mrt_cachep __ro_after_init; static struct mr_table *ipmr_new_table(struct net *net, u32 id); static void ipmr_free_table(struct mr_table *mrt); @@ -106,8 +105,6 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, struct mfc_cache *cache, int local); static int ipmr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, vifi_t vifi, int assert); -static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, - struct mfc_cache *c, struct rtmsg *rtm); static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, int cmd); static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); @@ -118,6 +115,23 @@ static void ipmr_expire_process(struct timer_list *t); #define ipmr_for_each_table(mrt, net) \ list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list) +static struct mr_table *ipmr_mr_table_iter(struct net *net, + struct mr_table *mrt) +{ + struct mr_table *ret; + + if (!mrt) + ret = list_entry_rcu(net->ipv4.mr_tables.next, + struct mr_table, list); + else + ret = list_entry_rcu(mrt->list.next, + struct mr_table, list); + + if (&ret->list == &net->ipv4.mr_tables) + return NULL; + return ret; +} + static struct mr_table *ipmr_get_table(struct net *net, u32 id) { struct mr_table *mrt; @@ -285,6 +299,14 @@ EXPORT_SYMBOL(ipmr_rule_default); #define ipmr_for_each_table(mrt, net) \ for (mrt = net->ipv4.mrt; mrt; mrt = NULL) +static struct mr_table *ipmr_mr_table_iter(struct net *net, + struct mr_table *mrt) +{ + if (!mrt) + return net->ipv4.mrt; + return NULL; +} + static struct mr_table *ipmr_get_table(struct net *net, u32 id) { return net->ipv4.mrt; @@ -344,7 +366,7 @@ static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg, } static const struct rhashtable_params ipmr_rht_params = { - .head_offset = offsetof(struct mfc_cache, mnode), + .head_offset = offsetof(struct mr_mfc, mnode), .key_offset = offsetof(struct mfc_cache, cmparg), .key_len = sizeof(struct mfc_cache_cmp_arg), .nelem_hint = 3, @@ -353,6 +375,24 @@ static const struct rhashtable_params ipmr_rht_params = { .automatic_shrinking = true, }; +static void ipmr_new_table_set(struct mr_table *mrt, + struct net *net) +{ +#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES + list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); +#endif +} + +static struct mfc_cache_cmp_arg ipmr_mr_table_ops_cmparg_any = { + .mfc_mcastgrp = htonl(INADDR_ANY), + .mfc_origin = htonl(INADDR_ANY), +}; + +static struct mr_table_ops ipmr_mr_table_ops = { + .rht_params = &ipmr_rht_params, + .cmparg_any = &ipmr_mr_table_ops_cmparg_any, +}; + static struct mr_table *ipmr_new_table(struct net *net, u32 id) { struct mr_table *mrt; @@ -365,23 +405,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) if (mrt) return mrt; - mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); - if (!mrt) - return ERR_PTR(-ENOMEM); - write_pnet(&mrt->net, net); - mrt->id = id; - - rhltable_init(&mrt->mfc_hash, &ipmr_rht_params); - INIT_LIST_HEAD(&mrt->mfc_cache_list); - INIT_LIST_HEAD(&mrt->mfc_unres_queue); - - timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0); - - mrt->mroute_reg_vif_num = -1; -#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES - list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables); -#endif - return mrt; + return mr_table_alloc(net, id, &ipmr_mr_table_ops, + ipmr_expire_process, ipmr_new_table_set); } static void ipmr_free_table(struct mr_table *mrt) @@ -760,14 +785,14 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify, static void ipmr_cache_free_rcu(struct rcu_head *head) { - struct mfc_cache *c = container_of(head, struct mfc_cache, rcu); + struct mr_mfc *c = container_of(head, struct mr_mfc, rcu); - kmem_cache_free(mrt_cachep, c); + kmem_cache_free(mrt_cachep, (struct mfc_cache *)c); } void ipmr_cache_free(struct mfc_cache *c) { - call_rcu(&c->rcu, ipmr_cache_free_rcu); + call_rcu(&c->_c.rcu, ipmr_cache_free_rcu); } EXPORT_SYMBOL(ipmr_cache_free); @@ -782,7 +807,7 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) atomic_dec(&mrt->cache_resolve_queue_len); - while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { + while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) { if (ip_hdr(skb)->version == 0) { struct nlmsghdr *nlh = skb_pull(skb, sizeof(struct iphdr)); @@ -806,9 +831,9 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) static void ipmr_expire_process(struct timer_list *t) { struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer); - unsigned long now; + struct mr_mfc *c, *next; unsigned long expires; - struct mfc_cache *c, *next; + unsigned long now; if (!spin_trylock(&mfc_unres_lock)) { mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10); @@ -830,8 +855,8 @@ static void ipmr_expire_process(struct timer_list *t) } list_del(&c->list); - mroute_netlink_event(mrt, c, RTM_DELROUTE); - ipmr_destroy_unres(mrt, c); + mroute_netlink_event(mrt, (struct mfc_cache *)c, RTM_DELROUTE); + ipmr_destroy_unres(mrt, (struct mfc_cache *)c); } if (!list_empty(&mrt->mfc_unres_queue)) @@ -842,7 +867,7 @@ out: } /* Fill oifs list. It is called under write locked mrt_lock. */ -static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache, +static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache, unsigned char *ttls) { int vifi; @@ -944,6 +969,10 @@ static int vif_add(struct net *net, struct mr_table *mrt, ip_rt_multicast_event(in_dev); /* Fill in the VIF structures */ + vif_device_init(v, dev, vifc->vifc_rate_limit, + vifc->vifc_threshold, + vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0), + (VIFF_TUNNEL | VIFF_REGISTER)); attr.orig_dev = dev; if (!switchdev_port_attr_get(dev, &attr)) { @@ -952,20 +981,9 @@ static int vif_add(struct net *net, struct mr_table *mrt, } else { v->dev_parent_id.id_len = 0; } - v->rate_limit = vifc->vifc_rate_limit; + v->local = vifc->vifc_lcl_addr.s_addr; v->remote = vifc->vifc_rmt_addr.s_addr; - v->flags = vifc->vifc_flags; - if (!mrtsock) - v->flags |= VIFF_STATIC; - v->threshold = vifc->vifc_threshold; - v->bytes_in = 0; - v->bytes_out = 0; - v->pkt_in = 0; - v->pkt_out = 0; - v->link = dev->ifindex; - if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER)) - v->link = dev_get_iflink(dev); /* And finish update writing critical data */ write_lock_bh(&mrt_lock); @@ -988,33 +1006,8 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, .mfc_mcastgrp = mcastgrp, .mfc_origin = origin }; - struct rhlist_head *tmp, *list; - struct mfc_cache *c; - - list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params); - rhl_for_each_entry_rcu(c, tmp, list, mnode) - return c; - - return NULL; -} - -/* Look for a (*,*,oif) entry */ -static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt, - int vifi) -{ - struct mfc_cache_cmp_arg arg = { - .mfc_mcastgrp = htonl(INADDR_ANY), - .mfc_origin = htonl(INADDR_ANY) - }; - struct rhlist_head *tmp, *list; - struct mfc_cache *c; - list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params); - rhl_for_each_entry_rcu(c, tmp, list, mnode) - if (c->mfc_un.res.ttls[vifi] < 255) - return c; - - return NULL; + return mr_mfc_find(mrt, &arg); } /* Look for a (*,G) entry */ @@ -1025,25 +1018,10 @@ static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt, .mfc_mcastgrp = mcastgrp, .mfc_origin = htonl(INADDR_ANY) }; - struct rhlist_head *tmp, *list; - struct mfc_cache *c, *proxy; if (mcastgrp == htonl(INADDR_ANY)) - goto skip; - - list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params); - rhl_for_each_entry_rcu(c, tmp, list, mnode) { - if (c->mfc_un.res.ttls[vifi] < 255) - return c; - - /* It's ok if the vifi is part of the static tree */ - proxy = ipmr_cache_find_any_parent(mrt, c->mfc_parent); - if (proxy && proxy->mfc_un.res.ttls[vifi] < 255) - return c; - } - -skip: - return ipmr_cache_find_any_parent(mrt, vifi); + return mr_mfc_find_any_parent(mrt, vifi); + return mr_mfc_find_any(mrt, vifi, &arg); } /* Look for a (S,G,iif) entry if parent != -1 */ @@ -1055,15 +1033,8 @@ static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt, .mfc_mcastgrp = mcastgrp, .mfc_origin = origin, }; - struct rhlist_head *tmp, *list; - struct mfc_cache *c; - - list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params); - rhl_for_each_entry_rcu(c, tmp, list, mnode) - if (parent == -1 || parent == c->mfc_parent) - return c; - return NULL; + return mr_mfc_find_parent(mrt, &arg, parent); } /* Allocate a multicast cache entry */ @@ -1072,9 +1043,9 @@ static struct mfc_cache *ipmr_cache_alloc(void) struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); if (c) { - c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; - c->mfc_un.res.minvif = MAXVIFS; - refcount_set(&c->mfc_un.res.refcount, 1); + c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; + c->_c.mfc_un.res.minvif = MAXVIFS; + refcount_set(&c->_c.mfc_un.res.refcount, 1); } return c; } @@ -1084,8 +1055,8 @@ static struct mfc_cache *ipmr_cache_alloc_unres(void) struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); if (c) { - skb_queue_head_init(&c->mfc_un.unres.unresolved); - c->mfc_un.unres.expires = jiffies + 10*HZ; + skb_queue_head_init(&c->_c.mfc_un.unres.unresolved); + c->_c.mfc_un.unres.expires = jiffies + 10 * HZ; } return c; } @@ -1098,12 +1069,13 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, struct nlmsgerr *e; /* Play the pending entries through our router */ - while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { + while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) { if (ip_hdr(skb)->version == 0) { struct nlmsghdr *nlh = skb_pull(skb, sizeof(struct iphdr)); - if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) { + if (mr_fill_mroute(mrt, skb, &c->_c, + nlmsg_data(nlh)) > 0) { nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; } else { @@ -1211,7 +1183,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, int err; spin_lock_bh(&mfc_unres_lock); - list_for_each_entry(c, &mrt->mfc_unres_queue, list) { + list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) { if (c->mfc_mcastgrp == iph->daddr && c->mfc_origin == iph->saddr) { found = true; @@ -1230,12 +1202,13 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, } /* Fill in the new cache entry */ - c->mfc_parent = -1; + c->_c.mfc_parent = -1; c->mfc_origin = iph->saddr; c->mfc_mcastgrp = iph->daddr; /* Reflect first query at mrouted. */ err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); + if (err < 0) { /* If the report failed throw the cache entry out - Brad Parker @@ -1248,15 +1221,16 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, } atomic_inc(&mrt->cache_resolve_queue_len); - list_add(&c->list, &mrt->mfc_unres_queue); + list_add(&c->_c.list, &mrt->mfc_unres_queue); mroute_netlink_event(mrt, c, RTM_NEWROUTE); if (atomic_read(&mrt->cache_resolve_queue_len) == 1) - mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires); + mod_timer(&mrt->ipmr_expire_timer, + c->_c.mfc_un.unres.expires); } /* See if we can append the packet */ - if (c->mfc_un.unres.unresolved.qlen > 3) { + if (c->_c.mfc_un.unres.unresolved.qlen > 3) { kfree_skb(skb); err = -ENOBUFS; } else { @@ -1264,7 +1238,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, skb->dev = dev; skb->skb_iif = dev->ifindex; } - skb_queue_tail(&c->mfc_un.unres.unresolved, skb); + skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); err = 0; } @@ -1286,8 +1260,8 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) rcu_read_unlock(); if (!c) return -ENOENT; - rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); - list_del_rcu(&c->list); + rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ipmr_rht_params); + list_del_rcu(&c->_c.list); call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id); mroute_netlink_event(mrt, c, RTM_DELROUTE); ipmr_cache_put(c); @@ -1299,6 +1273,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, struct mfcctl *mfc, int mrtsock, int parent) { struct mfc_cache *uc, *c; + struct mr_mfc *_uc; bool found; int ret; @@ -1312,10 +1287,10 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, rcu_read_unlock(); if (c) { write_lock_bh(&mrt_lock); - c->mfc_parent = mfc->mfcc_parent; - ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); + c->_c.mfc_parent = mfc->mfcc_parent; + ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls); if (!mrtsock) - c->mfc_flags |= MFC_STATIC; + c->_c.mfc_flags |= MFC_STATIC; write_unlock_bh(&mrt_lock); call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c, mrt->id); @@ -1333,28 +1308,29 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, c->mfc_origin = mfc->mfcc_origin.s_addr; c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; - c->mfc_parent = mfc->mfcc_parent; - ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); + c->_c.mfc_parent = mfc->mfcc_parent; + ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls); if (!mrtsock) - c->mfc_flags |= MFC_STATIC; + c->_c.mfc_flags |= MFC_STATIC; - ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->mnode, + ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode, ipmr_rht_params); if (ret) { pr_err("ipmr: rhtable insert error %d\n", ret); ipmr_cache_free(c); return ret; } - list_add_tail_rcu(&c->list, &mrt->mfc_cache_list); + list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list); /* Check to see if we resolved a queued list. If so we * need to send on the frames and tidy up. */ found = false; spin_lock_bh(&mfc_unres_lock); - list_for_each_entry(uc, &mrt->mfc_unres_queue, list) { + list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) { + uc = (struct mfc_cache *)_uc; if (uc->mfc_origin == c->mfc_origin && uc->mfc_mcastgrp == c->mfc_mcastgrp) { - list_del(&uc->list); + list_del(&_uc->list); atomic_dec(&mrt->cache_resolve_queue_len); found = true; break; @@ -1377,7 +1353,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, static void mroute_clean_tables(struct mr_table *mrt, bool all) { struct net *net = read_pnet(&mrt->net); - struct mfc_cache *c, *tmp; + struct mr_mfc *c, *tmp; + struct mfc_cache *cache; LIST_HEAD(list); int i; @@ -1395,18 +1372,20 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all) continue; rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); list_del_rcu(&c->list); - call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, + cache = (struct mfc_cache *)c; + call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache, mrt->id); - mroute_netlink_event(mrt, c, RTM_DELROUTE); - ipmr_cache_put(c); + mroute_netlink_event(mrt, cache, RTM_DELROUTE); + ipmr_cache_put(cache); } if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { spin_lock_bh(&mfc_unres_lock); list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { list_del(&c->list); - mroute_netlink_event(mrt, c, RTM_DELROUTE); - ipmr_destroy_unres(mrt, c); + cache = (struct mfc_cache *)c; + mroute_netlink_event(mrt, cache, RTM_DELROUTE); + ipmr_destroy_unres(mrt, cache); } spin_unlock_bh(&mfc_unres_lock); } @@ -1698,9 +1677,9 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) rcu_read_lock(); c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); if (c) { - sr.pktcnt = c->mfc_un.res.pkt; - sr.bytecnt = c->mfc_un.res.bytes; - sr.wrong_if = c->mfc_un.res.wrong_if; + sr.pktcnt = c->_c.mfc_un.res.pkt; + sr.bytecnt = c->_c.mfc_un.res.bytes; + sr.wrong_if = c->_c.mfc_un.res.wrong_if; rcu_read_unlock(); if (copy_to_user(arg, &sr, sizeof(sr))) @@ -1772,9 +1751,9 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) rcu_read_lock(); c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); if (c) { - sr.pktcnt = c->mfc_un.res.pkt; - sr.bytecnt = c->mfc_un.res.bytes; - sr.wrong_if = c->mfc_un.res.wrong_if; + sr.pktcnt = c->_c.mfc_un.res.pkt; + sr.bytecnt = c->_c.mfc_un.res.bytes; + sr.wrong_if = c->_c.mfc_un.res.wrong_if; rcu_read_unlock(); if (copy_to_user(arg, &sr, sizeof(sr))) @@ -1998,26 +1977,26 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) /* "local" means that we should preserve one skb (for local delivery) */ static void ip_mr_forward(struct net *net, struct mr_table *mrt, struct net_device *dev, struct sk_buff *skb, - struct mfc_cache *cache, int local) + struct mfc_cache *c, int local) { int true_vifi = ipmr_find_vif(mrt, dev); int psend = -1; int vif, ct; - vif = cache->mfc_parent; - cache->mfc_un.res.pkt++; - cache->mfc_un.res.bytes += skb->len; - cache->mfc_un.res.lastuse = jiffies; + vif = c->_c.mfc_parent; + c->_c.mfc_un.res.pkt++; + c->_c.mfc_un.res.bytes += skb->len; + c->_c.mfc_un.res.lastuse = jiffies; - if (cache->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) { + if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) { struct mfc_cache *cache_proxy; /* For an (*,G) entry, we only check that the incomming * interface is part of the static tree. */ - cache_proxy = ipmr_cache_find_any_parent(mrt, vif); + cache_proxy = mr_mfc_find_any_parent(mrt, vif); if (cache_proxy && - cache_proxy->mfc_un.res.ttls[true_vifi] < 255) + cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) goto forward; } @@ -2038,7 +2017,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, goto dont_forward; } - cache->mfc_un.res.wrong_if++; + c->_c.mfc_un.res.wrong_if++; if (true_vifi >= 0 && mrt->mroute_do_assert && /* pimsm uses asserts, when switching from RPT to SPT, @@ -2047,10 +2026,11 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, * large chunk of pimd to kernel. Ough... --ANK */ (mrt->mroute_do_pim || - cache->mfc_un.res.ttls[true_vifi] < 255) && + c->_c.mfc_un.res.ttls[true_vifi] < 255) && time_after(jiffies, - cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { - cache->mfc_un.res.last_assert = jiffies; + c->_c.mfc_un.res.last_assert + + MFC_ASSERT_THRESH)) { + c->_c.mfc_un.res.last_assert = jiffies; ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); } goto dont_forward; @@ -2061,33 +2041,33 @@ forward: mrt->vif_table[vif].bytes_in += skb->len; /* Forward the frame */ - if (cache->mfc_origin == htonl(INADDR_ANY) && - cache->mfc_mcastgrp == htonl(INADDR_ANY)) { + if (c->mfc_origin == htonl(INADDR_ANY) && + c->mfc_mcastgrp == htonl(INADDR_ANY)) { if (true_vifi >= 0 && - true_vifi != cache->mfc_parent && + true_vifi != c->_c.mfc_parent && ip_hdr(skb)->ttl > - cache->mfc_un.res.ttls[cache->mfc_parent]) { + c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { /* It's an (*,*) entry and the packet is not coming from * the upstream: forward the packet to the upstream * only. */ - psend = cache->mfc_parent; + psend = c->_c.mfc_parent; goto last_forward; } goto dont_forward; } - for (ct = cache->mfc_un.res.maxvif - 1; - ct >= cache->mfc_un.res.minvif; ct--) { + for (ct = c->_c.mfc_un.res.maxvif - 1; + ct >= c->_c.mfc_un.res.minvif; ct--) { /* For (*,G) entry, don't forward to the incoming interface */ - if ((cache->mfc_origin != htonl(INADDR_ANY) || + if ((c->mfc_origin != htonl(INADDR_ANY) || ct != true_vifi) && - ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) { + ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) { if (psend != -1) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) ipmr_queue_xmit(net, mrt, true_vifi, - skb2, cache, psend); + skb2, c, psend); } psend = ct; } @@ -2099,9 +2079,9 @@ last_forward: if (skb2) ipmr_queue_xmit(net, mrt, true_vifi, skb2, - cache, psend); + c, psend); } else { - ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend); + ipmr_queue_xmit(net, mrt, true_vifi, skb, c, psend); return; } } @@ -2299,62 +2279,6 @@ drop: } #endif -static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, - struct mfc_cache *c, struct rtmsg *rtm) -{ - struct rta_mfc_stats mfcs; - struct nlattr *mp_attr; - struct rtnexthop *nhp; - unsigned long lastuse; - int ct; - - /* If cache is unresolved, don't try to parse IIF and OIF */ - if (c->mfc_parent >= MAXVIFS) { - rtm->rtm_flags |= RTNH_F_UNRESOLVED; - return -ENOENT; - } - - if (VIF_EXISTS(mrt, c->mfc_parent) && - nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0) - return -EMSGSIZE; - - if (c->mfc_flags & MFC_OFFLOAD) - rtm->rtm_flags |= RTNH_F_OFFLOAD; - - if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH))) - return -EMSGSIZE; - - for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { - if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { - if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp)))) { - nla_nest_cancel(skb, mp_attr); - return -EMSGSIZE; - } - - nhp->rtnh_flags = 0; - nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; - nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex; - nhp->rtnh_len = sizeof(*nhp); - } - } - - nla_nest_end(skb, mp_attr); - - lastuse = READ_ONCE(c->mfc_un.res.lastuse); - lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0; - - mfcs.mfcs_packets = c->mfc_un.res.pkt; - mfcs.mfcs_bytes = c->mfc_un.res.bytes; - mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; - if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) || - nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse), - RTA_PAD)) - return -EMSGSIZE; - - rtm->rtm_type = RTN_MULTICAST; - return 1; -} - int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, struct rtmsg *rtm, u32 portid) @@ -2412,7 +2336,7 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, } read_lock(&mrt_lock); - err = __ipmr_fill_mroute(mrt, skb, cache, rtm); + err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); read_unlock(&mrt_lock); rcu_read_unlock(); return err; @@ -2440,7 +2364,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, goto nla_put_failure; rtm->rtm_type = RTN_MULTICAST; rtm->rtm_scope = RT_SCOPE_UNIVERSE; - if (c->mfc_flags & MFC_STATIC) + if (c->_c.mfc_flags & MFC_STATIC) rtm->rtm_protocol = RTPROT_STATIC; else rtm->rtm_protocol = RTPROT_MROUTED; @@ -2449,7 +2373,7 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) || nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp)) goto nla_put_failure; - err = __ipmr_fill_mroute(mrt, skb, c, rtm); + err = mr_fill_mroute(mrt, skb, &c->_c, rtm); /* do not break the dump if cache is unresolved */ if (err < 0 && err != -ENOENT) goto nla_put_failure; @@ -2462,6 +2386,14 @@ nla_put_failure: return -EMSGSIZE; } +static int _ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, + u32 portid, u32 seq, struct mr_mfc *c, int cmd, + int flags) +{ + return ipmr_fill_mroute(mrt, skb, portid, seq, (struct mfc_cache *)c, + cmd, flags); +} + static size_t mroute_msgsize(bool unresolved, int maxvif) { size_t len = @@ -2490,7 +2422,8 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(mroute_msgsize(mfc->mfc_parent >= MAXVIFS, mrt->maxvif), + skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS, + mrt->maxvif), GFP_ATOMIC); if (!skb) goto errout; @@ -2634,62 +2567,8 @@ errout_free: static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = sock_net(skb->sk); - struct mr_table *mrt; - struct mfc_cache *mfc; - unsigned int t = 0, s_t; - unsigned int e = 0, s_e; - - s_t = cb->args[0]; - s_e = cb->args[1]; - - rcu_read_lock(); - ipmr_for_each_table(mrt, net) { - if (t < s_t) - goto next_table; - list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) { - if (e < s_e) - goto next_entry; - if (ipmr_fill_mroute(mrt, skb, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE, - NLM_F_MULTI) < 0) - goto done; -next_entry: - e++; - } - e = 0; - s_e = 0; - - spin_lock_bh(&mfc_unres_lock); - list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) { - if (e < s_e) - goto next_entry2; - if (ipmr_fill_mroute(mrt, skb, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE, - NLM_F_MULTI) < 0) { - spin_unlock_bh(&mfc_unres_lock); - goto done; - } -next_entry2: - e++; - } - spin_unlock_bh(&mfc_unres_lock); - e = 0; - s_e = 0; -next_table: - t++; - } -done: - rcu_read_unlock(); - - cb->args[1] = e; - cb->args[0] = t; - - return skb->len; + return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter, + _ipmr_fill_mroute, &mfc_unres_lock); } static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = { @@ -2946,31 +2825,11 @@ out: /* The /proc interfaces to multicast routing : * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif */ -struct ipmr_vif_iter { - struct seq_net_private p; - struct mr_table *mrt; - int ct; -}; - -static struct vif_device *ipmr_vif_seq_idx(struct net *net, - struct ipmr_vif_iter *iter, - loff_t pos) -{ - struct mr_table *mrt = iter->mrt; - - for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) { - if (!VIF_EXISTS(mrt, iter->ct)) - continue; - if (pos-- == 0) - return &mrt->vif_table[iter->ct]; - } - return NULL; -} static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) __acquires(mrt_lock) { - struct ipmr_vif_iter *iter = seq->private; + struct mr_vif_iter *iter = seq->private; struct net *net = seq_file_net(seq); struct mr_table *mrt; @@ -2981,26 +2840,7 @@ static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) iter->mrt = mrt; read_lock(&mrt_lock); - return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1) - : SEQ_START_TOKEN; -} - -static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct ipmr_vif_iter *iter = seq->private; - struct net *net = seq_file_net(seq); - struct mr_table *mrt = iter->mrt; - - ++*pos; - if (v == SEQ_START_TOKEN) - return ipmr_vif_seq_idx(net, iter, 0); - - while (++iter->ct < mrt->maxvif) { - if (!VIF_EXISTS(mrt, iter->ct)) - continue; - return &mrt->vif_table[iter->ct]; - } - return NULL; + return mr_vif_seq_start(seq, pos); } static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) @@ -3011,7 +2851,7 @@ static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) static int ipmr_vif_seq_show(struct seq_file *seq, void *v) { - struct ipmr_vif_iter *iter = seq->private; + struct mr_vif_iter *iter = seq->private; struct mr_table *mrt = iter->mrt; if (v == SEQ_START_TOKEN) { @@ -3019,7 +2859,8 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v) "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); } else { const struct vif_device *vif = v; - const char *name = vif->dev ? vif->dev->name : "none"; + const char *name = vif->dev ? + vif->dev->name : "none"; seq_printf(seq, "%2td %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", @@ -3033,7 +2874,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v) static const struct seq_operations ipmr_vif_seq_ops = { .start = ipmr_vif_seq_start, - .next = ipmr_vif_seq_next, + .next = mr_vif_seq_next, .stop = ipmr_vif_seq_stop, .show = ipmr_vif_seq_show, }; @@ -3041,7 +2882,7 @@ static const struct seq_operations ipmr_vif_seq_ops = { static int ipmr_vif_open(struct inode *inode, struct file *file) { return seq_open_net(inode, file, &ipmr_vif_seq_ops, - sizeof(struct ipmr_vif_iter)); + sizeof(struct mr_vif_iter)); } static const struct file_operations ipmr_vif_fops = { @@ -3051,40 +2892,8 @@ static const struct file_operations ipmr_vif_fops = { .release = seq_release_net, }; -struct ipmr_mfc_iter { - struct seq_net_private p; - struct mr_table *mrt; - struct list_head *cache; -}; - -static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net, - struct ipmr_mfc_iter *it, loff_t pos) -{ - struct mr_table *mrt = it->mrt; - struct mfc_cache *mfc; - - rcu_read_lock(); - it->cache = &mrt->mfc_cache_list; - list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) - if (pos-- == 0) - return mfc; - rcu_read_unlock(); - - spin_lock_bh(&mfc_unres_lock); - it->cache = &mrt->mfc_unres_queue; - list_for_each_entry(mfc, it->cache, list) - if (pos-- == 0) - return mfc; - spin_unlock_bh(&mfc_unres_lock); - - it->cache = NULL; - return NULL; -} - - static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) { - struct ipmr_mfc_iter *it = seq->private; struct net *net = seq_file_net(seq); struct mr_table *mrt; @@ -3092,54 +2901,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) if (!mrt) return ERR_PTR(-ENOENT); - it->mrt = mrt; - it->cache = NULL; - return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) - : SEQ_START_TOKEN; -} - -static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct ipmr_mfc_iter *it = seq->private; - struct net *net = seq_file_net(seq); - struct mr_table *mrt = it->mrt; - struct mfc_cache *mfc = v; - - ++*pos; - - if (v == SEQ_START_TOKEN) - return ipmr_mfc_seq_idx(net, seq->private, 0); - - if (mfc->list.next != it->cache) - return list_entry(mfc->list.next, struct mfc_cache, list); - - if (it->cache == &mrt->mfc_unres_queue) - goto end_of_list; - - /* exhausted cache_array, show unresolved */ - rcu_read_unlock(); - it->cache = &mrt->mfc_unres_queue; - - spin_lock_bh(&mfc_unres_lock); - if (!list_empty(it->cache)) - return list_first_entry(it->cache, struct mfc_cache, list); - -end_of_list: - spin_unlock_bh(&mfc_unres_lock); - it->cache = NULL; - - return NULL; -} - -static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) -{ - struct ipmr_mfc_iter *it = seq->private; - struct mr_table *mrt = it->mrt; - - if (it->cache == &mrt->mfc_unres_queue) - spin_unlock_bh(&mfc_unres_lock); - else if (it->cache == &mrt->mfc_cache_list) - rcu_read_unlock(); + return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock); } static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) @@ -3151,26 +2913,26 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) "Group Origin Iif Pkts Bytes Wrong Oifs\n"); } else { const struct mfc_cache *mfc = v; - const struct ipmr_mfc_iter *it = seq->private; + const struct mr_mfc_iter *it = seq->private; const struct mr_table *mrt = it->mrt; seq_printf(seq, "%08X %08X %-3hd", (__force u32) mfc->mfc_mcastgrp, (__force u32) mfc->mfc_origin, - mfc->mfc_parent); + mfc->_c.mfc_parent); if (it->cache != &mrt->mfc_unres_queue) { seq_printf(seq, " %8lu %8lu %8lu", - mfc->mfc_un.res.pkt, - mfc->mfc_un.res.bytes, - mfc->mfc_un.res.wrong_if); - for (n = mfc->mfc_un.res.minvif; - n < mfc->mfc_un.res.maxvif; n++) { + mfc->_c.mfc_un.res.pkt, + mfc->_c.mfc_un.res.bytes, + mfc->_c.mfc_un.res.wrong_if); + for (n = mfc->_c.mfc_un.res.minvif; + n < mfc->_c.mfc_un.res.maxvif; n++) { if (VIF_EXISTS(mrt, n) && - mfc->mfc_un.res.ttls[n] < 255) + mfc->_c.mfc_un.res.ttls[n] < 255) seq_printf(seq, " %2d:%-3d", - n, mfc->mfc_un.res.ttls[n]); + n, mfc->_c.mfc_un.res.ttls[n]); } } else { /* unresolved mfc_caches don't contain @@ -3185,15 +2947,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) static const struct seq_operations ipmr_mfc_seq_ops = { .start = ipmr_mfc_seq_start, - .next = ipmr_mfc_seq_next, - .stop = ipmr_mfc_seq_stop, + .next = mr_mfc_seq_next, + .stop = mr_mfc_seq_stop, .show = ipmr_mfc_seq_show, }; static int ipmr_mfc_open(struct inode *inode, struct file *file) { return seq_open_net(inode, file, &ipmr_mfc_seq_ops, - sizeof(struct ipmr_mfc_iter)); + sizeof(struct mr_mfc_iter)); } static const struct file_operations ipmr_mfc_fops = { @@ -3229,7 +2991,7 @@ static int ipmr_dump(struct net *net, struct notifier_block *nb) ipmr_for_each_table(mrt, net) { struct vif_device *v = &mrt->vif_table[0]; - struct mfc_cache *mfc; + struct mr_mfc *mfc; int vifi; /* Notifiy on table VIF entries */ @@ -3246,7 +3008,8 @@ static int ipmr_dump(struct net *net, struct notifier_block *nb) /* Notify on table MFC entries */ list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) call_ipmr_mfc_entry_notifier(nb, net, - FIB_EVENT_ENTRY_ADD, mfc, + FIB_EVENT_ENTRY_ADD, + (struct mfc_cache *)mfc, mrt->id); } diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c new file mode 100644 index 000000000000..8ba55bfda817 --- /dev/null +++ b/net/ipv4/ipmr_base.c @@ -0,0 +1,323 @@ +/* Linux multicast routing support + * Common logic shared by IPv4 [ipmr] and IPv6 [ip6mr] implementation + */ + +#include <linux/mroute_base.h> + +/* Sets everything common except 'dev', since that is done under locking */ +void vif_device_init(struct vif_device *v, + struct net_device *dev, + unsigned long rate_limit, + unsigned char threshold, + unsigned short flags, + unsigned short get_iflink_mask) +{ + v->dev = NULL; + v->bytes_in = 0; + v->bytes_out = 0; + v->pkt_in = 0; + v->pkt_out = 0; + v->rate_limit = rate_limit; + v->flags = flags; + v->threshold = threshold; + if (v->flags & get_iflink_mask) + v->link = dev_get_iflink(dev); + else + v->link = dev->ifindex; +} +EXPORT_SYMBOL(vif_device_init); + +struct mr_table * +mr_table_alloc(struct net *net, u32 id, + struct mr_table_ops *ops, + void (*expire_func)(struct timer_list *t), + void (*table_set)(struct mr_table *mrt, + struct net *net)) +{ + struct mr_table *mrt; + + mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); + if (!mrt) + return NULL; + mrt->id = id; + write_pnet(&mrt->net, net); + + mrt->ops = *ops; + rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params); + INIT_LIST_HEAD(&mrt->mfc_cache_list); + INIT_LIST_HEAD(&mrt->mfc_unres_queue); + + timer_setup(&mrt->ipmr_expire_timer, expire_func, 0); + + mrt->mroute_reg_vif_num = -1; + table_set(mrt, net); + return mrt; +} +EXPORT_SYMBOL(mr_table_alloc); + +void *mr_mfc_find_parent(struct mr_table *mrt, void *hasharg, int parent) +{ + struct rhlist_head *tmp, *list; + struct mr_mfc *c; + + list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params); + rhl_for_each_entry_rcu(c, tmp, list, mnode) + if (parent == -1 || parent == c->mfc_parent) + return c; + + return NULL; +} +EXPORT_SYMBOL(mr_mfc_find_parent); + +void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi) +{ + struct rhlist_head *tmp, *list; + struct mr_mfc *c; + + list = rhltable_lookup(&mrt->mfc_hash, mrt->ops.cmparg_any, + *mrt->ops.rht_params); + rhl_for_each_entry_rcu(c, tmp, list, mnode) + if (c->mfc_un.res.ttls[vifi] < 255) + return c; + + return NULL; +} +EXPORT_SYMBOL(mr_mfc_find_any_parent); + +void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg) +{ + struct rhlist_head *tmp, *list; + struct mr_mfc *c, *proxy; + + list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params); + rhl_for_each_entry_rcu(c, tmp, list, mnode) { + if (c->mfc_un.res.ttls[vifi] < 255) + return c; + + /* It's ok if the vifi is part of the static tree */ + proxy = mr_mfc_find_any_parent(mrt, c->mfc_parent); + if (proxy && proxy->mfc_un.res.ttls[vifi] < 255) + return c; + } + + return mr_mfc_find_any_parent(mrt, vifi); +} +EXPORT_SYMBOL(mr_mfc_find_any); + +#ifdef CONFIG_PROC_FS +void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos) +{ + struct mr_table *mrt = iter->mrt; + + for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) { + if (!VIF_EXISTS(mrt, iter->ct)) + continue; + if (pos-- == 0) + return &mrt->vif_table[iter->ct]; + } + return NULL; +} +EXPORT_SYMBOL(mr_vif_seq_idx); + +void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct mr_vif_iter *iter = seq->private; + struct net *net = seq_file_net(seq); + struct mr_table *mrt = iter->mrt; + + ++*pos; + if (v == SEQ_START_TOKEN) + return mr_vif_seq_idx(net, iter, 0); + + while (++iter->ct < mrt->maxvif) { + if (!VIF_EXISTS(mrt, iter->ct)) + continue; + return &mrt->vif_table[iter->ct]; + } + return NULL; +} +EXPORT_SYMBOL(mr_vif_seq_next); + +void *mr_mfc_seq_idx(struct net *net, + struct mr_mfc_iter *it, loff_t pos) +{ + struct mr_table *mrt = it->mrt; + struct mr_mfc *mfc; + + rcu_read_lock(); + it->cache = &mrt->mfc_cache_list; + list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) + if (pos-- == 0) + return mfc; + rcu_read_unlock(); + + spin_lock_bh(it->lock); + it->cache = &mrt->mfc_unres_queue; + list_for_each_entry(mfc, it->cache, list) + if (pos-- == 0) + return mfc; + spin_unlock_bh(it->lock); + + it->cache = NULL; + return NULL; +} +EXPORT_SYMBOL(mr_mfc_seq_idx); + +void *mr_mfc_seq_next(struct seq_file *seq, void *v, + loff_t *pos) +{ + struct mr_mfc_iter *it = seq->private; + struct net *net = seq_file_net(seq); + struct mr_table *mrt = it->mrt; + struct mr_mfc *c = v; + + ++*pos; + + if (v == SEQ_START_TOKEN) + return mr_mfc_seq_idx(net, seq->private, 0); + + if (c->list.next != it->cache) + return list_entry(c->list.next, struct mr_mfc, list); + + if (it->cache == &mrt->mfc_unres_queue) + goto end_of_list; + + /* exhausted cache_array, show unresolved */ + rcu_read_unlock(); + it->cache = &mrt->mfc_unres_queue; + + spin_lock_bh(it->lock); + if (!list_empty(it->cache)) + return list_first_entry(it->cache, struct mr_mfc, list); + +end_of_list: + spin_unlock_bh(it->lock); + it->cache = NULL; + + return NULL; +} +EXPORT_SYMBOL(mr_mfc_seq_next); +#endif + +int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, + struct mr_mfc *c, struct rtmsg *rtm) +{ + struct rta_mfc_stats mfcs; + struct nlattr *mp_attr; + struct rtnexthop *nhp; + unsigned long lastuse; + int ct; + + /* If cache is unresolved, don't try to parse IIF and OIF */ + if (c->mfc_parent >= MAXVIFS) { + rtm->rtm_flags |= RTNH_F_UNRESOLVED; + return -ENOENT; + } + + if (VIF_EXISTS(mrt, c->mfc_parent) && + nla_put_u32(skb, RTA_IIF, + mrt->vif_table[c->mfc_parent].dev->ifindex) < 0) + return -EMSGSIZE; + + if (c->mfc_flags & MFC_OFFLOAD) + rtm->rtm_flags |= RTNH_F_OFFLOAD; + + mp_attr = nla_nest_start(skb, RTA_MULTIPATH); + if (!mp_attr) + return -EMSGSIZE; + + for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { + if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { + struct vif_device *vif; + + nhp = nla_reserve_nohdr(skb, sizeof(*nhp)); + if (!nhp) { + nla_nest_cancel(skb, mp_attr); + return -EMSGSIZE; + } + + nhp->rtnh_flags = 0; + nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; + vif = &mrt->vif_table[ct]; + nhp->rtnh_ifindex = vif->dev->ifindex; + nhp->rtnh_len = sizeof(*nhp); + } + } + + nla_nest_end(skb, mp_attr); + + lastuse = READ_ONCE(c->mfc_un.res.lastuse); + lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0; + + mfcs.mfcs_packets = c->mfc_un.res.pkt; + mfcs.mfcs_bytes = c->mfc_un.res.bytes; + mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; + if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) || + nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse), + RTA_PAD)) + return -EMSGSIZE; + + rtm->rtm_type = RTN_MULTICAST; + return 1; +} +EXPORT_SYMBOL(mr_fill_mroute); + +int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, + struct mr_table *(*iter)(struct net *net, + struct mr_table *mrt), + int (*fill)(struct mr_table *mrt, + struct sk_buff *skb, + u32 portid, u32 seq, struct mr_mfc *c, + int cmd, int flags), + spinlock_t *lock) +{ + unsigned int t = 0, e = 0, s_t = cb->args[0], s_e = cb->args[1]; + struct net *net = sock_net(skb->sk); + struct mr_table *mrt; + struct mr_mfc *mfc; + + rcu_read_lock(); + for (mrt = iter(net, NULL); mrt; mrt = iter(net, mrt)) { + if (t < s_t) + goto next_table; + list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) { + if (e < s_e) + goto next_entry; + if (fill(mrt, skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, mfc, + RTM_NEWROUTE, NLM_F_MULTI) < 0) + goto done; +next_entry: + e++; + } + e = 0; + s_e = 0; + + spin_lock_bh(lock); + list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) { + if (e < s_e) + goto next_entry2; + if (fill(mrt, skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, mfc, + RTM_NEWROUTE, NLM_F_MULTI) < 0) { + spin_unlock_bh(lock); + goto done; + } +next_entry2: + e++; + } + spin_unlock_bh(lock); + e = 0; + s_e = 0; +next_table: + t++; + } +done: + rcu_read_unlock(); + + cb->args[1] = e; + cb->args[0] = t; + + return skb->len; +} +EXPORT_SYMBOL(mr_rtm_dumproute); diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 4b02ab39ebc5..08b3e48f44fc 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -840,6 +840,7 @@ static struct pernet_operations clusterip_net_ops = { .exit = clusterip_net_exit, .id = &clusterip_net_id, .size = sizeof(struct clusterip_net), + .async = true, }; static int __init clusterip_tg_init(void) diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index a0d3ad60a411..57244b62a4fc 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -118,6 +118,7 @@ static void __net_exit defrag4_net_exit(struct net *net) static struct pernet_operations defrag4_net_ops = { .exit = defrag4_net_exit, + .async = true, }; static int __init nf_defrag_init(void) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index fdabc70283b6..d97e83b2dd33 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -556,4 +556,3 @@ int __init ip_misc_proc_init(void) { return register_pernet_subsys(&ip_proc_ops); } - diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 26eefa2eaa44..3bb686dac273 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1783,7 +1783,7 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb, /* if skb is set it will be used and fl4 can be NULL */ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, - const struct sk_buff *skb) + const struct sk_buff *skb, struct flow_keys *flkeys) { struct net *net = fi->fib_net; struct flow_keys hash_keys; @@ -1810,14 +1810,23 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, if (skb->l4_hash) return skb_get_hash_raw(skb) >> 1; memset(&hash_keys, 0, sizeof(hash_keys)); - skb_flow_dissect_flow_keys(skb, &keys, flag); - hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; - hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; - hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; - hash_keys.ports.src = keys.ports.src; - hash_keys.ports.dst = keys.ports.dst; - hash_keys.basic.ip_proto = keys.basic.ip_proto; + if (flkeys) { + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src; + hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst; + hash_keys.ports.src = flkeys->ports.src; + hash_keys.ports.dst = flkeys->ports.dst; + hash_keys.basic.ip_proto = flkeys->basic.ip_proto; + } else { + skb_flow_dissect_flow_keys(skb, &keys, flag); + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; + hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; + hash_keys.ports.src = keys.ports.src; + hash_keys.ports.dst = keys.ports.dst; + hash_keys.basic.ip_proto = keys.basic.ip_proto; + } } else { memset(&hash_keys, 0, sizeof(hash_keys)); hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; @@ -1838,11 +1847,12 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, static int ip_mkroute_input(struct sk_buff *skb, struct fib_result *res, struct in_device *in_dev, - __be32 daddr, __be32 saddr, u32 tos) + __be32 daddr, __be32 saddr, u32 tos, + struct flow_keys *hkeys) { #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res->fi && res->fi->fib_nhs > 1) { - int h = fib_multipath_hash(res->fi, NULL, skb); + int h = fib_multipath_hash(res->fi, NULL, skb, hkeys); fib_select_multipath(res, h); } @@ -1868,13 +1878,14 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, struct fib_result *res) { struct in_device *in_dev = __in_dev_get_rcu(dev); + struct flow_keys *flkeys = NULL, _flkeys; + struct net *net = dev_net(dev); struct ip_tunnel_info *tun_info; - struct flowi4 fl4; + int err = -EINVAL; unsigned int flags = 0; u32 itag = 0; struct rtable *rth; - int err = -EINVAL; - struct net *net = dev_net(dev); + struct flowi4 fl4; bool do_cache; /* IP on this device is disabled. */ @@ -1933,6 +1944,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.daddr = daddr; fl4.saddr = saddr; fl4.flowi4_uid = sock_net_uid(net, NULL); + + if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) + flkeys = &_flkeys; + err = fib_lookup(net, &fl4, res, 0); if (err != 0) { if (!IN_DEV_FORWARD(in_dev)) @@ -1958,7 +1973,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (res->type != RTN_UNICAST) goto martian_destination; - err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos); + err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys); out: return err; brd_input: diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index a471f696e13c..c92014cb1e16 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -97,10 +97,9 @@ struct bbr { packet_conservation:1, /* use packet conservation? */ restore_cwnd:1, /* decided to revert cwnd to old value */ round_start:1, /* start of packet-timed tx->ack round? */ - tso_segs_goal:7, /* segments we want in each skb we send */ idle_restart:1, /* restarting after idle? */ probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */ - unused:5, + unused:12, lt_is_sampling:1, /* taking long-term ("LT") samples now? */ lt_rtt_cnt:7, /* round trips in long-term interval */ lt_use_bw:1; /* use lt_bw as our bw estimate? */ @@ -261,23 +260,25 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) sk->sk_pacing_rate = rate; } -/* Return count of segments we want in the skbs we send, or 0 for default. */ -static u32 bbr_tso_segs_goal(struct sock *sk) +/* override sysctl_tcp_min_tso_segs */ +static u32 bbr_min_tso_segs(struct sock *sk) { - struct bbr *bbr = inet_csk_ca(sk); - - return bbr->tso_segs_goal; + return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2; } -static void bbr_set_tso_segs_goal(struct sock *sk) +static u32 bbr_tso_segs_goal(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - struct bbr *bbr = inet_csk_ca(sk); - u32 min_segs; + u32 segs, bytes; + + /* Sort of tcp_tso_autosize() but ignoring + * driver provided sk_gso_max_size. + */ + bytes = min_t(u32, sk->sk_pacing_rate >> sk->sk_pacing_shift, + GSO_MAX_SIZE - 1 - MAX_TCP_HEADER); + segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk)); - min_segs = sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2; - bbr->tso_segs_goal = min(tcp_tso_autosize(sk, tp->mss_cache, min_segs), - 0x7FU); + return min(segs, 0x7FU); } /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */ @@ -348,7 +349,7 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain) cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT; /* Allow enough full-sized skbs in flight to utilize end systems. */ - cwnd += 3 * bbr->tso_segs_goal; + cwnd += 3 * bbr_tso_segs_goal(sk); /* Reduce delayed ACKs by rounding up cwnd to the next even number. */ cwnd = (cwnd + 1) & ~1U; @@ -824,7 +825,6 @@ static void bbr_main(struct sock *sk, const struct rate_sample *rs) bw = bbr_bw(sk); bbr_set_pacing_rate(sk, bw, bbr->pacing_gain); - bbr_set_tso_segs_goal(sk); bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain); } @@ -834,7 +834,6 @@ static void bbr_init(struct sock *sk) struct bbr *bbr = inet_csk_ca(sk); bbr->prior_cwnd = 0; - bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */ bbr->rtt_cnt = 0; bbr->next_rtt_delivered = 0; bbr->prev_ca_state = TCP_CA_Open; @@ -936,7 +935,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { .undo_cwnd = bbr_undo_cwnd, .cwnd_event = bbr_cwnd_event, .ssthresh = bbr_ssthresh, - .tso_segs_goal = bbr_tso_segs_goal, + .min_tso_segs = bbr_min_tso_segs, .get_info = bbr_get_info, .set_state = bbr_set_state, }; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 49d043de3476..383cac0ff0ec 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1703,8 +1703,8 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp, /* Return how many segs we'd like on a TSO packet, * to send one TSO packet per ms */ -u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, - int min_tso_segs) +static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, + int min_tso_segs) { u32 bytes, segs; @@ -1720,7 +1720,6 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, return segs; } -EXPORT_SYMBOL(tcp_tso_autosize); /* Return the number of segments we want in the skb we are transmitting. * See if congestion control module wants to decide; otherwise, autosize. @@ -1728,11 +1727,13 @@ EXPORT_SYMBOL(tcp_tso_autosize); static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) { const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; - u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0; + u32 min_tso, tso_segs; - if (!tso_segs) - tso_segs = tcp_tso_autosize(sk, mss_now, - sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); + min_tso = ca_ops->min_tso_segs ? + ca_ops->min_tso_segs(sk) : + sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs; + + tso_segs = tcp_tso_autosize(sk, mss_now, min_tso); return min_t(u32, tso_segs, sk->sk_gso_max_segs); } diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c index ec35eaa5c029..c0630013c1ae 100644 --- a/net/ipv4/tunnel4.c +++ b/net/ipv4/tunnel4.c @@ -90,7 +90,7 @@ EXPORT_SYMBOL(xfrm4_tunnel_deregister); for (handler = rcu_dereference(head); \ handler != NULL; \ handler = rcu_dereference(handler->next)) \ - + static int tunnel4_rcv(struct sk_buff *skb) { struct xfrm_tunnel *handler; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 796ac4115485..0c752dc3f93b 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -379,4 +379,3 @@ void __init xfrm4_init(void) xfrm4_protocol_init(); register_pernet_subsys(&xfrm4_net_ops); } - diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index ea71e4b0ab7a..6794ddf0547c 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -278,6 +278,7 @@ config IPV6_SUBTREES config IPV6_MROUTE bool "IPv6: multicast routing" depends on IPV6 + select IP_MROUTE_COMMON ---help--- Experimental support for IPv6 multicast forwarding. If unsure, say N. diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4facfe0b1888..b5fd116c046a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1459,6 +1459,21 @@ static bool ipv6_use_optimistic_addr(struct net *net, #endif } +static bool ipv6_allow_optimistic_dad(struct net *net, + struct inet6_dev *idev) +{ +#ifdef CONFIG_IPV6_OPTIMISTIC_DAD + if (!idev) + return false; + if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad) + return false; + + return true; +#else + return false; +#endif +} + static int ipv6_get_saddr_eval(struct net *net, struct ipv6_saddr_score *score, struct ipv6_saddr_dst *dst, @@ -1968,6 +1983,8 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) spin_lock_bh(&ifp->lock); addrconf_del_dad_work(ifp); ifp->flags |= IFA_F_TENTATIVE; + if (dad_failed) + ifp->flags &= ~IFA_F_OPTIMISTIC; spin_unlock_bh(&ifp->lock); if (dad_failed) ipv6_ifa_notify(0, ifp); @@ -4501,6 +4518,9 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags, (ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64)) return -EINVAL; + if (!(ifp->flags & IFA_F_TENTATIVE) || ifp->flags & IFA_F_DADFAILED) + ifa_flags &= ~IFA_F_OPTIMISTIC; + timeout = addrconf_timeout_fixup(valid_lft, HZ); if (addrconf_finite_timeout(timeout)) { expires = jiffies_to_clock_t(timeout * HZ); @@ -4574,6 +4594,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, struct in6_addr *pfx, *peer_pfx; struct inet6_ifaddr *ifa; struct net_device *dev; + struct inet6_dev *idev; u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME; u32 ifa_flags; int err; @@ -4607,7 +4628,19 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, /* We ignore other flags so far. */ ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR | - IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN; + IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC; + + idev = ipv6_find_idev(dev); + if (IS_ERR(idev)) + return PTR_ERR(idev); + + if (!ipv6_allow_optimistic_dad(net, idev)) + ifa_flags &= ~IFA_F_OPTIMISTIC; + + if (ifa_flags & IFA_F_NODAD && ifa_flags & IFA_F_OPTIMISTIC) { + NL_SET_ERR_MSG(extack, "IFA_F_NODAD and IFA_F_OPTIMISTIC are mutually exclusive"); + return -EINVAL; + } ifa = ipv6_get_ifaddr(net, pfx, dev, 1); if (!ifa) { diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 8e085cc05aeb..d7d0abc7fd0e 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -552,4 +552,3 @@ void ac6_proc_exit(struct net *net) remove_proc_entry("anycast6", net->proc_net); } #endif - diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 11025f8d124b..b643f5ce6c80 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -279,4 +279,3 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, return nexthdr; } EXPORT_SYMBOL(ipv6_find_hdr); - diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 95a2c9e8699a..04e5f523e50f 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -223,6 +223,17 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel)) return 0; + if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto)) + return 0; + + if (fib_rule_port_range_set(&rule->sport_range) && + !fib_rule_port_inrange(&rule->sport_range, fl6->fl6_sport)) + return 0; + + if (fib_rule_port_range_set(&rule->dport_range) && + !fib_rule_port_inrange(&rule->dport_range, fl6->fl6_dport)) + return 0; + return 1; } @@ -258,12 +269,26 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule6->dst.plen = frh->dst_len; rule6->tclass = frh->tos; + if (fib_rule_requires_fldissect(rule)) + net->ipv6.fib6_rules_require_fldissect++; + net->ipv6.fib6_has_custom_rules = true; err = 0; errout: return err; } +static int fib6_rule_delete(struct fib_rule *rule) +{ + struct net *net = rule->fr_net; + + if (net->ipv6.fib6_rules_require_fldissect && + fib_rule_requires_fldissect(rule)) + net->ipv6.fib6_rules_require_fldissect--; + + return 0; +} + static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, struct nlattr **tb) { @@ -323,6 +348,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = { .match = fib6_rule_match, .suppress = fib6_rule_suppress, .configure = fib6_rule_configure, + .delete = fib6_rule_delete, .compare = fib6_rule_compare, .fill = fib6_rule_fill, .nlmsg_payload = fib6_rule_nlmsg_payload, @@ -350,6 +376,7 @@ static int __net_init fib6_rules_net_init(struct net *net) goto out_fib6_rules_ops; net->ipv6.fib6_rules_ops = ops; + net->ipv6.fib6_rules_require_fldissect = 0; out: return err; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 4fa4f1b150a4..b0778d323b6e 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -522,7 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; fl6.flowi6_uid = sock_net_uid(net, NULL); - fl6.mp_hash = rt6_multipath_hash(&fl6, skb); + fl6.mp_hash = rt6_multipath_hash(&fl6, skb, NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 44c39c5f0638..e438699f000f 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -613,6 +613,7 @@ static struct pernet_operations ila_net_ops = { .exit = ila_exit_net, .id = &ila_net_id, .size = sizeof(struct ila_net), + .async = true, }; static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 3c353125546d..4f150a394387 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1517,6 +1517,7 @@ static struct pernet_operations ip6gre_net_ops = { .exit_batch = ip6gre_exit_batch_net, .id = &ip6gre_net_id, .size = sizeof(struct ip6gre_net), + .async = true, }; static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[], @@ -1784,6 +1785,12 @@ static void ip6gre_tap_setup(struct net_device *dev) netif_keep_dst(dev); } +bool is_ip6gretap_dev(const struct net_device *dev) +{ + return dev->netdev_ops == &ip6gre_tap_netdev_ops; +} +EXPORT_SYMBOL_GPL(is_ip6gretap_dev); + static bool ip6gre_netlink_encap_parms(struct nlattr *data[], struct ip_tunnel_encap *ipencap) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 997c7f19ad62..a6eb0e699b15 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -71,7 +71,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && - ((mroute6_socket(net, skb) && + ((mroute6_is_socket(net, skb) && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, &ipv6_hdr(skb)->saddr))) { diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 4b15fe928278..869e2e6750f7 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -2250,6 +2250,7 @@ static struct pernet_operations ip6_tnl_net_ops = { .exit_batch = ip6_tnl_exit_batch_net, .id = &ip6_tnl_net_id, .size = sizeof(struct ip6_tnl_net), + .async = true, }; /** diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index fa3ae1cb50d3..c617ea17faa8 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1148,6 +1148,7 @@ static struct pernet_operations vti6_net_ops = { .exit_batch = vti6_exit_batch_net, .id = &vti6_net_id, .size = sizeof(struct vti6_net), + .async = true, }; static struct xfrm6_protocol vti_esp6_protocol __read_mostly = { diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 295eb5ecaee5..2a38f9de45d3 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -20,7 +20,6 @@ #include <linux/types.h> #include <linux/sched.h> #include <linux/errno.h> -#include <linux/timer.h> #include <linux/mm.h> #include <linux/kernel.h> #include <linux/fcntl.h> @@ -32,11 +31,9 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/init.h> -#include <linux/slab.h> #include <linux/compat.h> #include <net/protocol.h> #include <linux/skbuff.h> -#include <net/sock.h> #include <net/raw.h> #include <linux/notifier.h> #include <linux/if_arp.h> @@ -54,30 +51,12 @@ #include <net/ip6_checksum.h> #include <linux/netconf.h> -struct mr6_table { - struct list_head list; - possible_net_t net; - u32 id; - struct sock *mroute6_sk; - struct timer_list ipmr_expire_timer; - struct list_head mfc6_unres_queue; - struct list_head mfc6_cache_array[MFC6_LINES]; - struct mif_device vif6_table[MAXMIFS]; - int maxvif; - atomic_t cache_resolve_queue_len; - bool mroute_do_assert; - bool mroute_do_pim; -#ifdef CONFIG_IPV6_PIMSM_V2 - int mroute_reg_vif_num; -#endif -}; - struct ip6mr_rule { struct fib_rule common; }; struct ip6mr_result { - struct mr6_table *mrt; + struct mr_table *mrt; }; /* Big lock, protecting vif table, mrt cache and mroute socket state. @@ -86,11 +65,7 @@ struct ip6mr_result { static DEFINE_RWLOCK(mrt_lock); -/* - * Multicast router control variables - */ - -#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL) +/* Multicast router control variables */ /* Special spinlock for queue of unresolved entries */ static DEFINE_SPINLOCK(mfc_unres_lock); @@ -105,30 +80,45 @@ static DEFINE_SPINLOCK(mfc_unres_lock); static struct kmem_cache *mrt_cachep __read_mostly; -static struct mr6_table *ip6mr_new_table(struct net *net, u32 id); -static void ip6mr_free_table(struct mr6_table *mrt); +static struct mr_table *ip6mr_new_table(struct net *net, u32 id); +static void ip6mr_free_table(struct mr_table *mrt); -static void ip6_mr_forward(struct net *net, struct mr6_table *mrt, +static void ip6_mr_forward(struct net *net, struct mr_table *mrt, struct sk_buff *skb, struct mfc6_cache *cache); -static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt, +static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, mifi_t mifi, int assert); -static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, - struct mfc6_cache *c, struct rtmsg *rtm); -static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc, +static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, int cmd); -static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt); +static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb); -static void mroute_clean_tables(struct mr6_table *mrt, bool all); +static void mroute_clean_tables(struct mr_table *mrt, bool all); static void ipmr_expire_process(struct timer_list *t); #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES #define ip6mr_for_each_table(mrt, net) \ list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list) -static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) +static struct mr_table *ip6mr_mr_table_iter(struct net *net, + struct mr_table *mrt) +{ + struct mr_table *ret; + + if (!mrt) + ret = list_entry_rcu(net->ipv6.mr6_tables.next, + struct mr_table, list); + else + ret = list_entry_rcu(mrt->list.next, + struct mr_table, list); + + if (&ret->list == &net->ipv6.mr6_tables) + return NULL; + return ret; +} + +static struct mr_table *ip6mr_get_table(struct net *net, u32 id) { - struct mr6_table *mrt; + struct mr_table *mrt; ip6mr_for_each_table(mrt, net) { if (mrt->id == id) @@ -138,7 +128,7 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) } static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, - struct mr6_table **mrt) + struct mr_table **mrt) { int err; struct ip6mr_result res; @@ -159,7 +149,7 @@ static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg) { struct ip6mr_result *res = arg->result; - struct mr6_table *mrt; + struct mr_table *mrt; switch (rule->action) { case FR_ACT_TO_TBL: @@ -227,7 +217,7 @@ static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = { static int __net_init ip6mr_rules_init(struct net *net) { struct fib_rules_ops *ops; - struct mr6_table *mrt; + struct mr_table *mrt; int err; ops = fib_rules_register(&ip6mr_rules_ops_template, net); @@ -258,7 +248,7 @@ err1: static void __net_exit ip6mr_rules_exit(struct net *net) { - struct mr6_table *mrt, *next; + struct mr_table *mrt, *next; rtnl_lock(); list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { @@ -272,13 +262,21 @@ static void __net_exit ip6mr_rules_exit(struct net *net) #define ip6mr_for_each_table(mrt, net) \ for (mrt = net->ipv6.mrt6; mrt; mrt = NULL) -static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) +static struct mr_table *ip6mr_mr_table_iter(struct net *net, + struct mr_table *mrt) +{ + if (!mrt) + return net->ipv6.mrt6; + return NULL; +} + +static struct mr_table *ip6mr_get_table(struct net *net, u32 id) { return net->ipv6.mrt6; } static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, - struct mr6_table **mrt) + struct mr_table **mrt) { *mrt = net->ipv6.mrt6; return 0; @@ -299,112 +297,75 @@ static void __net_exit ip6mr_rules_exit(struct net *net) } #endif -static struct mr6_table *ip6mr_new_table(struct net *net, u32 id) +static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) { - struct mr6_table *mrt; - unsigned int i; + const struct mfc6_cache_cmp_arg *cmparg = arg->key; + struct mfc6_cache *c = (struct mfc6_cache *)ptr; - mrt = ip6mr_get_table(net, id); - if (mrt) - return mrt; - - mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); - if (!mrt) - return NULL; - mrt->id = id; - write_pnet(&mrt->net, net); - - /* Forwarding cache */ - for (i = 0; i < MFC6_LINES; i++) - INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]); - - INIT_LIST_HEAD(&mrt->mfc6_unres_queue); + return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) || + !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin); +} - timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0); +static const struct rhashtable_params ip6mr_rht_params = { + .head_offset = offsetof(struct mr_mfc, mnode), + .key_offset = offsetof(struct mfc6_cache, cmparg), + .key_len = sizeof(struct mfc6_cache_cmp_arg), + .nelem_hint = 3, + .locks_mul = 1, + .obj_cmpfn = ip6mr_hash_cmp, + .automatic_shrinking = true, +}; -#ifdef CONFIG_IPV6_PIMSM_V2 - mrt->mroute_reg_vif_num = -1; -#endif +static void ip6mr_new_table_set(struct mr_table *mrt, + struct net *net) +{ #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables); #endif - return mrt; } -static void ip6mr_free_table(struct mr6_table *mrt) -{ - del_timer_sync(&mrt->ipmr_expire_timer); - mroute_clean_tables(mrt, true); - kfree(mrt); -} - -#ifdef CONFIG_PROC_FS - -struct ipmr_mfc_iter { - struct seq_net_private p; - struct mr6_table *mrt; - struct list_head *cache; - int ct; +static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = { + .mf6c_origin = IN6ADDR_ANY_INIT, + .mf6c_mcastgrp = IN6ADDR_ANY_INIT, }; +static struct mr_table_ops ip6mr_mr_table_ops = { + .rht_params = &ip6mr_rht_params, + .cmparg_any = &ip6mr_mr_table_ops_cmparg_any, +}; -static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net, - struct ipmr_mfc_iter *it, loff_t pos) +static struct mr_table *ip6mr_new_table(struct net *net, u32 id) { - struct mr6_table *mrt = it->mrt; - struct mfc6_cache *mfc; - - read_lock(&mrt_lock); - for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) { - it->cache = &mrt->mfc6_cache_array[it->ct]; - list_for_each_entry(mfc, it->cache, list) - if (pos-- == 0) - return mfc; - } - read_unlock(&mrt_lock); + struct mr_table *mrt; - spin_lock_bh(&mfc_unres_lock); - it->cache = &mrt->mfc6_unres_queue; - list_for_each_entry(mfc, it->cache, list) - if (pos-- == 0) - return mfc; - spin_unlock_bh(&mfc_unres_lock); + mrt = ip6mr_get_table(net, id); + if (mrt) + return mrt; - it->cache = NULL; - return NULL; + return mr_table_alloc(net, id, &ip6mr_mr_table_ops, + ipmr_expire_process, ip6mr_new_table_set); } -/* - * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif - */ - -struct ipmr_vif_iter { - struct seq_net_private p; - struct mr6_table *mrt; - int ct; -}; - -static struct mif_device *ip6mr_vif_seq_idx(struct net *net, - struct ipmr_vif_iter *iter, - loff_t pos) +static void ip6mr_free_table(struct mr_table *mrt) { - struct mr6_table *mrt = iter->mrt; - - for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) { - if (!MIF_EXISTS(mrt, iter->ct)) - continue; - if (pos-- == 0) - return &mrt->vif6_table[iter->ct]; - } - return NULL; + del_timer_sync(&mrt->ipmr_expire_timer); + mroute_clean_tables(mrt, true); + rhltable_destroy(&mrt->mfc_hash); + kfree(mrt); } +#ifdef CONFIG_PROC_FS +/* The /proc interfaces to multicast routing + * /proc/ip6_mr_cache /proc/ip6_mr_vif + */ + static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) __acquires(mrt_lock) { - struct ipmr_vif_iter *iter = seq->private; + struct mr_vif_iter *iter = seq->private; struct net *net = seq_file_net(seq); - struct mr6_table *mrt; + struct mr_table *mrt; mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); if (!mrt) @@ -413,26 +374,7 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) iter->mrt = mrt; read_lock(&mrt_lock); - return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1) - : SEQ_START_TOKEN; -} - -static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct ipmr_vif_iter *iter = seq->private; - struct net *net = seq_file_net(seq); - struct mr6_table *mrt = iter->mrt; - - ++*pos; - if (v == SEQ_START_TOKEN) - return ip6mr_vif_seq_idx(net, iter, 0); - - while (++iter->ct < mrt->maxvif) { - if (!MIF_EXISTS(mrt, iter->ct)) - continue; - return &mrt->vif6_table[iter->ct]; - } - return NULL; + return mr_vif_seq_start(seq, pos); } static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) @@ -443,19 +385,19 @@ static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) { - struct ipmr_vif_iter *iter = seq->private; - struct mr6_table *mrt = iter->mrt; + struct mr_vif_iter *iter = seq->private; + struct mr_table *mrt = iter->mrt; if (v == SEQ_START_TOKEN) { seq_puts(seq, "Interface BytesIn PktsIn BytesOut PktsOut Flags\n"); } else { - const struct mif_device *vif = v; + const struct vif_device *vif = v; const char *name = vif->dev ? vif->dev->name : "none"; seq_printf(seq, "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", - vif - mrt->vif6_table, + vif - mrt->vif_table, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out, vif->flags); @@ -465,7 +407,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) static const struct seq_operations ip6mr_vif_seq_ops = { .start = ip6mr_vif_seq_start, - .next = ip6mr_vif_seq_next, + .next = mr_vif_seq_next, .stop = ip6mr_vif_seq_stop, .show = ip6mr_vif_seq_show, }; @@ -473,7 +415,7 @@ static const struct seq_operations ip6mr_vif_seq_ops = { static int ip6mr_vif_open(struct inode *inode, struct file *file) { return seq_open_net(inode, file, &ip6mr_vif_seq_ops, - sizeof(struct ipmr_vif_iter)); + sizeof(struct mr_vif_iter)); } static const struct file_operations ip6mr_vif_fops = { @@ -485,72 +427,14 @@ static const struct file_operations ip6mr_vif_fops = { static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) { - struct ipmr_mfc_iter *it = seq->private; struct net *net = seq_file_net(seq); - struct mr6_table *mrt; + struct mr_table *mrt; mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); if (!mrt) return ERR_PTR(-ENOENT); - it->mrt = mrt; - it->cache = NULL; - return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) - : SEQ_START_TOKEN; -} - -static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct mfc6_cache *mfc = v; - struct ipmr_mfc_iter *it = seq->private; - struct net *net = seq_file_net(seq); - struct mr6_table *mrt = it->mrt; - - ++*pos; - - if (v == SEQ_START_TOKEN) - return ipmr_mfc_seq_idx(net, seq->private, 0); - - if (mfc->list.next != it->cache) - return list_entry(mfc->list.next, struct mfc6_cache, list); - - if (it->cache == &mrt->mfc6_unres_queue) - goto end_of_list; - - BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]); - - while (++it->ct < MFC6_LINES) { - it->cache = &mrt->mfc6_cache_array[it->ct]; - if (list_empty(it->cache)) - continue; - return list_first_entry(it->cache, struct mfc6_cache, list); - } - - /* exhausted cache_array, show unresolved */ - read_unlock(&mrt_lock); - it->cache = &mrt->mfc6_unres_queue; - it->ct = 0; - - spin_lock_bh(&mfc_unres_lock); - if (!list_empty(it->cache)) - return list_first_entry(it->cache, struct mfc6_cache, list); - - end_of_list: - spin_unlock_bh(&mfc_unres_lock); - it->cache = NULL; - - return NULL; -} - -static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) -{ - struct ipmr_mfc_iter *it = seq->private; - struct mr6_table *mrt = it->mrt; - - if (it->cache == &mrt->mfc6_unres_queue) - spin_unlock_bh(&mfc_unres_lock); - else if (it->cache == &mrt->mfc6_cache_array[it->ct]) - read_unlock(&mrt_lock); + return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock); } static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) @@ -564,25 +448,25 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) "Iif Pkts Bytes Wrong Oifs\n"); } else { const struct mfc6_cache *mfc = v; - const struct ipmr_mfc_iter *it = seq->private; - struct mr6_table *mrt = it->mrt; + const struct mr_mfc_iter *it = seq->private; + struct mr_table *mrt = it->mrt; seq_printf(seq, "%pI6 %pI6 %-3hd", &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, - mfc->mf6c_parent); + mfc->_c.mfc_parent); - if (it->cache != &mrt->mfc6_unres_queue) { + if (it->cache != &mrt->mfc_unres_queue) { seq_printf(seq, " %8lu %8lu %8lu", - mfc->mfc_un.res.pkt, - mfc->mfc_un.res.bytes, - mfc->mfc_un.res.wrong_if); - for (n = mfc->mfc_un.res.minvif; - n < mfc->mfc_un.res.maxvif; n++) { - if (MIF_EXISTS(mrt, n) && - mfc->mfc_un.res.ttls[n] < 255) + mfc->_c.mfc_un.res.pkt, + mfc->_c.mfc_un.res.bytes, + mfc->_c.mfc_un.res.wrong_if); + for (n = mfc->_c.mfc_un.res.minvif; + n < mfc->_c.mfc_un.res.maxvif; n++) { + if (VIF_EXISTS(mrt, n) && + mfc->_c.mfc_un.res.ttls[n] < 255) seq_printf(seq, - " %2d:%-3d", - n, mfc->mfc_un.res.ttls[n]); + " %2d:%-3d", n, + mfc->_c.mfc_un.res.ttls[n]); } } else { /* unresolved mfc_caches don't contain @@ -597,15 +481,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) static const struct seq_operations ipmr_mfc_seq_ops = { .start = ipmr_mfc_seq_start, - .next = ipmr_mfc_seq_next, - .stop = ipmr_mfc_seq_stop, + .next = mr_mfc_seq_next, + .stop = mr_mfc_seq_stop, .show = ipmr_mfc_seq_show, }; static int ipmr_mfc_open(struct inode *inode, struct file *file) { return seq_open_net(inode, file, &ipmr_mfc_seq_ops, - sizeof(struct ipmr_mfc_iter)); + sizeof(struct mr_mfc_iter)); } static const struct file_operations ip6mr_mfc_fops = { @@ -624,7 +508,7 @@ static int pim6_rcv(struct sk_buff *skb) struct ipv6hdr *encap; struct net_device *reg_dev = NULL; struct net *net = dev_net(skb->dev); - struct mr6_table *mrt; + struct mr_table *mrt; struct flowi6 fl6 = { .flowi6_iif = skb->dev->ifindex, .flowi6_mark = skb->mark, @@ -658,7 +542,7 @@ static int pim6_rcv(struct sk_buff *skb) read_lock(&mrt_lock); if (reg_vif_num >= 0) - reg_dev = mrt->vif6_table[reg_vif_num].dev; + reg_dev = mrt->vif_table[reg_vif_num].dev; if (reg_dev) dev_hold(reg_dev); read_unlock(&mrt_lock); @@ -693,7 +577,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) { struct net *net = dev_net(dev); - struct mr6_table *mrt; + struct mr_table *mrt; struct flowi6 fl6 = { .flowi6_oif = dev->ifindex, .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, @@ -736,7 +620,7 @@ static void reg_vif_setup(struct net_device *dev) dev->features |= NETIF_F_NETNS_LOCAL; } -static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt) +static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt) { struct net_device *dev; char name[IFNAMSIZ]; @@ -773,17 +657,17 @@ failure: * Delete a VIF entry */ -static int mif6_delete(struct mr6_table *mrt, int vifi, int notify, +static int mif6_delete(struct mr_table *mrt, int vifi, int notify, struct list_head *head) { - struct mif_device *v; + struct vif_device *v; struct net_device *dev; struct inet6_dev *in6_dev; if (vifi < 0 || vifi >= mrt->maxvif) return -EADDRNOTAVAIL; - v = &mrt->vif6_table[vifi]; + v = &mrt->vif_table[vifi]; write_lock_bh(&mrt_lock); dev = v->dev; @@ -802,7 +686,7 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify, if (vifi + 1 == mrt->maxvif) { int tmp; for (tmp = vifi - 1; tmp >= 0; tmp--) { - if (MIF_EXISTS(mrt, tmp)) + if (VIF_EXISTS(mrt, tmp)) break; } mrt->maxvif = tmp + 1; @@ -827,23 +711,30 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify, return 0; } +static inline void ip6mr_cache_free_rcu(struct rcu_head *head) +{ + struct mr_mfc *c = container_of(head, struct mr_mfc, rcu); + + kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c); +} + static inline void ip6mr_cache_free(struct mfc6_cache *c) { - kmem_cache_free(mrt_cachep, c); + call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu); } /* Destroy an unresolved cache entry, killing queued skbs and reporting error to netlink readers. */ -static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c) +static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c) { struct net *net = read_pnet(&mrt->net); struct sk_buff *skb; atomic_dec(&mrt->cache_resolve_queue_len); - while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) { + while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) { if (ipv6_hdr(skb)->version == 0) { struct nlmsghdr *nlh = skb_pull(skb, sizeof(struct ipv6hdr)); @@ -862,13 +753,13 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c) /* Timer process for all the unresolved queue. */ -static void ipmr_do_expire_process(struct mr6_table *mrt) +static void ipmr_do_expire_process(struct mr_table *mrt) { unsigned long now = jiffies; unsigned long expires = 10 * HZ; - struct mfc6_cache *c, *next; + struct mr_mfc *c, *next; - list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) { + list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { if (time_after(c->mfc_un.unres.expires, now)) { /* not yet... */ unsigned long interval = c->mfc_un.unres.expires - now; @@ -878,24 +769,24 @@ static void ipmr_do_expire_process(struct mr6_table *mrt) } list_del(&c->list); - mr6_netlink_event(mrt, c, RTM_DELROUTE); - ip6mr_destroy_unres(mrt, c); + mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); + ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); } - if (!list_empty(&mrt->mfc6_unres_queue)) + if (!list_empty(&mrt->mfc_unres_queue)) mod_timer(&mrt->ipmr_expire_timer, jiffies + expires); } static void ipmr_expire_process(struct timer_list *t) { - struct mr6_table *mrt = from_timer(mrt, t, ipmr_expire_timer); + struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer); if (!spin_trylock(&mfc_unres_lock)) { mod_timer(&mrt->ipmr_expire_timer, jiffies + 1); return; } - if (!list_empty(&mrt->mfc6_unres_queue)) + if (!list_empty(&mrt->mfc_unres_queue)) ipmr_do_expire_process(mrt); spin_unlock(&mfc_unres_lock); @@ -903,7 +794,8 @@ static void ipmr_expire_process(struct timer_list *t) /* Fill oifs list. It is called under write locked mrt_lock. */ -static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache, +static void ip6mr_update_thresholds(struct mr_table *mrt, + struct mr_mfc *cache, unsigned char *ttls) { int vifi; @@ -913,7 +805,7 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca memset(cache->mfc_un.res.ttls, 255, MAXMIFS); for (vifi = 0; vifi < mrt->maxvif; vifi++) { - if (MIF_EXISTS(mrt, vifi) && + if (VIF_EXISTS(mrt, vifi) && ttls[vifi] && ttls[vifi] < 255) { cache->mfc_un.res.ttls[vifi] = ttls[vifi]; if (cache->mfc_un.res.minvif > vifi) @@ -925,17 +817,17 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca cache->mfc_un.res.lastuse = jiffies; } -static int mif6_add(struct net *net, struct mr6_table *mrt, +static int mif6_add(struct net *net, struct mr_table *mrt, struct mif6ctl *vifc, int mrtsock) { int vifi = vifc->mif6c_mifi; - struct mif_device *v = &mrt->vif6_table[vifi]; + struct vif_device *v = &mrt->vif_table[vifi]; struct net_device *dev; struct inet6_dev *in6_dev; int err; /* Is vif busy ? */ - if (MIF_EXISTS(mrt, vifi)) + if (VIF_EXISTS(mrt, vifi)) return -EADDRINUSE; switch (vifc->mif6c_flags) { @@ -980,21 +872,10 @@ static int mif6_add(struct net *net, struct mr6_table *mrt, dev->ifindex, &in6_dev->cnf); } - /* - * Fill in the VIF structures - */ - v->rate_limit = vifc->vifc_rate_limit; - v->flags = vifc->mif6c_flags; - if (!mrtsock) - v->flags |= VIFF_STATIC; - v->threshold = vifc->vifc_threshold; - v->bytes_in = 0; - v->bytes_out = 0; - v->pkt_in = 0; - v->pkt_out = 0; - v->link = dev->ifindex; - if (v->flags & MIFF_REGISTER) - v->link = dev_get_iflink(dev); + /* Fill in the VIF structures */ + vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold, + vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0), + MIFF_REGISTER); /* And finish update writing critical data */ write_lock_bh(&mrt_lock); @@ -1009,75 +890,56 @@ static int mif6_add(struct net *net, struct mr6_table *mrt, return 0; } -static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt, +static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt, const struct in6_addr *origin, const struct in6_addr *mcastgrp) { - int line = MFC6_HASH(mcastgrp, origin); - struct mfc6_cache *c; - - list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) { - if (ipv6_addr_equal(&c->mf6c_origin, origin) && - ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) - return c; - } - return NULL; -} - -/* Look for a (*,*,oif) entry */ -static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt, - mifi_t mifi) -{ - int line = MFC6_HASH(&in6addr_any, &in6addr_any); - struct mfc6_cache *c; - - list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) - if (ipv6_addr_any(&c->mf6c_origin) && - ipv6_addr_any(&c->mf6c_mcastgrp) && - (c->mfc_un.res.ttls[mifi] < 255)) - return c; + struct mfc6_cache_cmp_arg arg = { + .mf6c_origin = *origin, + .mf6c_mcastgrp = *mcastgrp, + }; - return NULL; + return mr_mfc_find(mrt, &arg); } /* Look for a (*,G) entry */ -static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt, +static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt, struct in6_addr *mcastgrp, mifi_t mifi) { - int line = MFC6_HASH(mcastgrp, &in6addr_any); - struct mfc6_cache *c, *proxy; + struct mfc6_cache_cmp_arg arg = { + .mf6c_origin = in6addr_any, + .mf6c_mcastgrp = *mcastgrp, + }; if (ipv6_addr_any(mcastgrp)) - goto skip; - - list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) - if (ipv6_addr_any(&c->mf6c_origin) && - ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) { - if (c->mfc_un.res.ttls[mifi] < 255) - return c; - - /* It's ok if the mifi is part of the static tree */ - proxy = ip6mr_cache_find_any_parent(mrt, - c->mf6c_parent); - if (proxy && proxy->mfc_un.res.ttls[mifi] < 255) - return c; - } + return mr_mfc_find_any_parent(mrt, mifi); + return mr_mfc_find_any(mrt, mifi, &arg); +} -skip: - return ip6mr_cache_find_any_parent(mrt, mifi); +/* Look for a (S,G,iif) entry if parent != -1 */ +static struct mfc6_cache * +ip6mr_cache_find_parent(struct mr_table *mrt, + const struct in6_addr *origin, + const struct in6_addr *mcastgrp, + int parent) +{ + struct mfc6_cache_cmp_arg arg = { + .mf6c_origin = *origin, + .mf6c_mcastgrp = *mcastgrp, + }; + + return mr_mfc_find_parent(mrt, &arg, parent); } -/* - * Allocate a multicast cache entry - */ +/* Allocate a multicast cache entry */ static struct mfc6_cache *ip6mr_cache_alloc(void) { struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); if (!c) return NULL; - c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; - c->mfc_un.res.minvif = MAXMIFS; + c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; + c->_c.mfc_un.res.minvif = MAXMIFS; return c; } @@ -1086,8 +948,8 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void) struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); if (!c) return NULL; - skb_queue_head_init(&c->mfc_un.unres.unresolved); - c->mfc_un.unres.expires = jiffies + 10 * HZ; + skb_queue_head_init(&c->_c.mfc_un.unres.unresolved); + c->_c.mfc_un.unres.expires = jiffies + 10 * HZ; return c; } @@ -1095,7 +957,7 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void) * A cache entry has gone into a resolved state from queued */ -static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt, +static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt, struct mfc6_cache *uc, struct mfc6_cache *c) { struct sk_buff *skb; @@ -1104,12 +966,13 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt, * Play the pending entries through our router */ - while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { + while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) { if (ipv6_hdr(skb)->version == 0) { struct nlmsghdr *nlh = skb_pull(skb, sizeof(struct ipv6hdr)); - if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) { + if (mr_fill_mroute(mrt, skb, &c->_c, + nlmsg_data(nlh)) > 0) { nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; } else { nlh->nlmsg_type = NLMSG_ERROR; @@ -1129,9 +992,10 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt, * Called under mrt_lock. */ -static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt, +static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, mifi_t mifi, int assert) { + struct sock *mroute6_sk; struct sk_buff *skb; struct mrt6msg *msg; int ret; @@ -1201,17 +1065,19 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt, skb->ip_summed = CHECKSUM_UNNECESSARY; } - if (!mrt->mroute6_sk) { + rcu_read_lock(); + mroute6_sk = rcu_dereference(mrt->mroute_sk); + if (!mroute6_sk) { + rcu_read_unlock(); kfree_skb(skb); return -EINVAL; } mrt6msg_netlink_event(mrt, skb); - /* - * Deliver to user space multicast routing algorithms - */ - ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb); + /* Deliver to user space multicast routing algorithms */ + ret = sock_queue_rcv_skb(mroute6_sk, skb); + rcu_read_unlock(); if (ret < 0) { net_warn_ratelimited("mroute6: pending queue full, dropping entries\n"); kfree_skb(skb); @@ -1220,19 +1086,16 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt, return ret; } -/* - * Queue a packet for resolution. It gets locked cache entry! - */ - -static int -ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb) +/* Queue a packet for resolution. It gets locked cache entry! */ +static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi, + struct sk_buff *skb) { + struct mfc6_cache *c; bool found = false; int err; - struct mfc6_cache *c; spin_lock_bh(&mfc_unres_lock); - list_for_each_entry(c, &mrt->mfc6_unres_queue, list) { + list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) { if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) { found = true; @@ -1253,10 +1116,8 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb) return -ENOBUFS; } - /* - * Fill in the new cache entry - */ - c->mf6c_parent = -1; + /* Fill in the new cache entry */ + c->_c.mfc_parent = -1; c->mf6c_origin = ipv6_hdr(skb)->saddr; c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr; @@ -1276,20 +1137,18 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb) } atomic_inc(&mrt->cache_resolve_queue_len); - list_add(&c->list, &mrt->mfc6_unres_queue); + list_add(&c->_c.list, &mrt->mfc_unres_queue); mr6_netlink_event(mrt, c, RTM_NEWROUTE); ipmr_do_expire_process(mrt); } - /* - * See if we can append the packet - */ - if (c->mfc_un.unres.unresolved.qlen > 3) { + /* See if we can append the packet */ + if (c->_c.mfc_un.unres.unresolved.qlen > 3) { kfree_skb(skb); err = -ENOBUFS; } else { - skb_queue_tail(&c->mfc_un.unres.unresolved, skb); + skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb); err = 0; } @@ -1301,29 +1160,24 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb) * MFC6 cache manipulation by user space */ -static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc, +static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc, int parent) { - int line; - struct mfc6_cache *c, *next; - - line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); + struct mfc6_cache *c; - list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) { - if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && - ipv6_addr_equal(&c->mf6c_mcastgrp, - &mfc->mf6cc_mcastgrp.sin6_addr) && - (parent == -1 || parent == c->mf6c_parent)) { - write_lock_bh(&mrt_lock); - list_del(&c->list); - write_unlock_bh(&mrt_lock); + /* The entries are added/deleted only under RTNL */ + rcu_read_lock(); + c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, + &mfc->mf6cc_mcastgrp.sin6_addr, parent); + rcu_read_unlock(); + if (!c) + return -ENOENT; + rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params); + list_del_rcu(&c->_c.list); - mr6_netlink_event(mrt, c, RTM_DELROUTE); - ip6mr_cache_free(c); - return 0; - } - } - return -ENOENT; + mr6_netlink_event(mrt, c, RTM_DELROUTE); + ip6mr_cache_free(c); + return 0; } static int ip6mr_device_event(struct notifier_block *this, @@ -1331,15 +1185,15 @@ static int ip6mr_device_event(struct notifier_block *this, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); - struct mr6_table *mrt; - struct mif_device *v; + struct mr_table *mrt; + struct vif_device *v; int ct; if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; ip6mr_for_each_table(mrt, net) { - v = &mrt->vif6_table[0]; + v = &mrt->vif_table[0]; for (ct = 0; ct < mrt->maxvif; ct++, v++) { if (v->dev == dev) mif6_delete(mrt, ct, 1, NULL); @@ -1453,14 +1307,14 @@ void ip6_mr_cleanup(void) kmem_cache_destroy(mrt_cachep); } -static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, +static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt, struct mf6cctl *mfc, int mrtsock, int parent) { - bool found = false; - int line; - struct mfc6_cache *uc, *c; unsigned char ttls[MAXMIFS]; - int i; + struct mfc6_cache *uc, *c; + struct mr_mfc *_uc; + bool found; + int i, err; if (mfc->mf6cc_parent >= MAXMIFS) return -ENFILE; @@ -1469,27 +1323,19 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, for (i = 0; i < MAXMIFS; i++) { if (IF_ISSET(i, &mfc->mf6cc_ifset)) ttls[i] = 1; - - } - - line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); - - list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) { - if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && - ipv6_addr_equal(&c->mf6c_mcastgrp, - &mfc->mf6cc_mcastgrp.sin6_addr) && - (parent == -1 || parent == mfc->mf6cc_parent)) { - found = true; - break; - } } - if (found) { + /* The entries are added/deleted only under RTNL */ + rcu_read_lock(); + c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr, + &mfc->mf6cc_mcastgrp.sin6_addr, parent); + rcu_read_unlock(); + if (c) { write_lock_bh(&mrt_lock); - c->mf6c_parent = mfc->mf6cc_parent; - ip6mr_update_thresholds(mrt, c, ttls); + c->_c.mfc_parent = mfc->mf6cc_parent; + ip6mr_update_thresholds(mrt, &c->_c, ttls); if (!mrtsock) - c->mfc_flags |= MFC_STATIC; + c->_c.mfc_flags |= MFC_STATIC; write_unlock_bh(&mrt_lock); mr6_netlink_event(mrt, c, RTM_NEWROUTE); return 0; @@ -1505,31 +1351,36 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, c->mf6c_origin = mfc->mf6cc_origin.sin6_addr; c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr; - c->mf6c_parent = mfc->mf6cc_parent; - ip6mr_update_thresholds(mrt, c, ttls); + c->_c.mfc_parent = mfc->mf6cc_parent; + ip6mr_update_thresholds(mrt, &c->_c, ttls); if (!mrtsock) - c->mfc_flags |= MFC_STATIC; + c->_c.mfc_flags |= MFC_STATIC; - write_lock_bh(&mrt_lock); - list_add(&c->list, &mrt->mfc6_cache_array[line]); - write_unlock_bh(&mrt_lock); + err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode, + ip6mr_rht_params); + if (err) { + pr_err("ip6mr: rhtable insert error %d\n", err); + ip6mr_cache_free(c); + return err; + } + list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list); - /* - * Check to see if we resolved a queued list. If so we - * need to send on the frames and tidy up. + /* Check to see if we resolved a queued list. If so we + * need to send on the frames and tidy up. */ found = false; spin_lock_bh(&mfc_unres_lock); - list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) { + list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) { + uc = (struct mfc6_cache *)_uc; if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) { - list_del(&uc->list); + list_del(&_uc->list); atomic_dec(&mrt->cache_resolve_queue_len); found = true; break; } } - if (list_empty(&mrt->mfc6_unres_queue)) + if (list_empty(&mrt->mfc_unres_queue)) del_timer(&mrt->ipmr_expire_timer); spin_unlock_bh(&mfc_unres_lock); @@ -1545,61 +1396,54 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, * Close the multicast socket, and clear the vif tables etc */ -static void mroute_clean_tables(struct mr6_table *mrt, bool all) +static void mroute_clean_tables(struct mr_table *mrt, bool all) { - int i; + struct mr_mfc *c, *tmp; LIST_HEAD(list); - struct mfc6_cache *c, *next; + int i; - /* - * Shut down all active vif entries - */ + /* Shut down all active vif entries */ for (i = 0; i < mrt->maxvif; i++) { - if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC)) + if (!all && (mrt->vif_table[i].flags & VIFF_STATIC)) continue; mif6_delete(mrt, i, 0, &list); } unregister_netdevice_many(&list); - /* - * Wipe the cache - */ - for (i = 0; i < MFC6_LINES; i++) { - list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) { - if (!all && (c->mfc_flags & MFC_STATIC)) - continue; - write_lock_bh(&mrt_lock); - list_del(&c->list); - write_unlock_bh(&mrt_lock); - - mr6_netlink_event(mrt, c, RTM_DELROUTE); - ip6mr_cache_free(c); - } + /* Wipe the cache */ + list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { + if (!all && (c->mfc_flags & MFC_STATIC)) + continue; + rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params); + list_del_rcu(&c->list); + mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); + ip6mr_cache_free((struct mfc6_cache *)c); } if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { spin_lock_bh(&mfc_unres_lock); - list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) { + list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { list_del(&c->list); - mr6_netlink_event(mrt, c, RTM_DELROUTE); - ip6mr_destroy_unres(mrt, c); + mr6_netlink_event(mrt, (struct mfc6_cache *)c, + RTM_DELROUTE); + ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c); } spin_unlock_bh(&mfc_unres_lock); } } -static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk) +static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk) { int err = 0; struct net *net = sock_net(sk); rtnl_lock(); write_lock_bh(&mrt_lock); - if (likely(mrt->mroute6_sk == NULL)) { - mrt->mroute6_sk = sk; - net->ipv6.devconf_all->mc_forwarding++; - } else { + if (rtnl_dereference(mrt->mroute_sk)) { err = -EADDRINUSE; + } else { + rcu_assign_pointer(mrt->mroute_sk, sk); + net->ipv6.devconf_all->mc_forwarding++; } write_unlock_bh(&mrt_lock); @@ -1617,7 +1461,7 @@ int ip6mr_sk_done(struct sock *sk) { int err = -EACCES; struct net *net = sock_net(sk); - struct mr6_table *mrt; + struct mr_table *mrt; if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_ICMPV6) @@ -1625,9 +1469,9 @@ int ip6mr_sk_done(struct sock *sk) rtnl_lock(); ip6mr_for_each_table(mrt, net) { - if (sk == mrt->mroute6_sk) { + if (sk == rtnl_dereference(mrt->mroute_sk)) { write_lock_bh(&mrt_lock); - mrt->mroute6_sk = NULL; + RCU_INIT_POINTER(mrt->mroute_sk, NULL); net->ipv6.devconf_all->mc_forwarding--; write_unlock_bh(&mrt_lock); inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, @@ -1641,13 +1485,14 @@ int ip6mr_sk_done(struct sock *sk) } } rtnl_unlock(); + synchronize_rcu(); return err; } -struct sock *mroute6_socket(struct net *net, struct sk_buff *skb) +bool mroute6_is_socket(struct net *net, struct sk_buff *skb) { - struct mr6_table *mrt; + struct mr_table *mrt; struct flowi6 fl6 = { .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, .flowi6_oif = skb->dev->ifindex, @@ -1657,8 +1502,9 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb) if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) return NULL; - return mrt->mroute6_sk; + return rcu_access_pointer(mrt->mroute_sk); } +EXPORT_SYMBOL(mroute6_is_socket); /* * Socket options and virtual interface manipulation. The whole @@ -1674,7 +1520,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns struct mf6cctl mfc; mifi_t mifi; struct net *net = sock_net(sk); - struct mr6_table *mrt; + struct mr_table *mrt; if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_ICMPV6) @@ -1685,7 +1531,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns return -ENOENT; if (optname != MRT6_INIT) { - if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN)) + if (sk != rcu_access_pointer(mrt->mroute_sk) && + !ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EACCES; } @@ -1707,7 +1554,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns if (vif.mif6c_mifi >= MAXMIFS) return -ENFILE; rtnl_lock(); - ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk); + ret = mif6_add(net, mrt, &vif, + sk == rtnl_dereference(mrt->mroute_sk)); rtnl_unlock(); return ret; @@ -1742,7 +1590,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns ret = ip6mr_mfc_delete(mrt, &mfc, parent); else ret = ip6mr_mfc_add(net, mrt, &mfc, - sk == mrt->mroute6_sk, parent); + sk == + rtnl_dereference(mrt->mroute_sk), + parent); rtnl_unlock(); return ret; @@ -1794,7 +1644,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */ if (v != RT_TABLE_DEFAULT && v >= 100000000) return -EINVAL; - if (sk == mrt->mroute6_sk) + if (sk == rcu_access_pointer(mrt->mroute_sk)) return -EBUSY; rtnl_lock(); @@ -1825,7 +1675,7 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int olr; int val; struct net *net = sock_net(sk); - struct mr6_table *mrt; + struct mr_table *mrt; if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num != IPPROTO_ICMPV6) @@ -1873,10 +1723,10 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) { struct sioc_sg_req6 sr; struct sioc_mif_req6 vr; - struct mif_device *vif; + struct vif_device *vif; struct mfc6_cache *c; struct net *net = sock_net(sk); - struct mr6_table *mrt; + struct mr_table *mrt; mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); if (!mrt) @@ -1889,8 +1739,8 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) if (vr.mifi >= mrt->maxvif) return -EINVAL; read_lock(&mrt_lock); - vif = &mrt->vif6_table[vr.mifi]; - if (MIF_EXISTS(mrt, vr.mifi)) { + vif = &mrt->vif_table[vr.mifi]; + if (VIF_EXISTS(mrt, vr.mifi)) { vr.icount = vif->pkt_in; vr.ocount = vif->pkt_out; vr.ibytes = vif->bytes_in; @@ -1907,19 +1757,19 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) if (copy_from_user(&sr, arg, sizeof(sr))) return -EFAULT; - read_lock(&mrt_lock); + rcu_read_lock(); c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); if (c) { - sr.pktcnt = c->mfc_un.res.pkt; - sr.bytecnt = c->mfc_un.res.bytes; - sr.wrong_if = c->mfc_un.res.wrong_if; - read_unlock(&mrt_lock); + sr.pktcnt = c->_c.mfc_un.res.pkt; + sr.bytecnt = c->_c.mfc_un.res.bytes; + sr.wrong_if = c->_c.mfc_un.res.wrong_if; + rcu_read_unlock(); if (copy_to_user(arg, &sr, sizeof(sr))) return -EFAULT; return 0; } - read_unlock(&mrt_lock); + rcu_read_unlock(); return -EADDRNOTAVAIL; default: return -ENOIOCTLCMD; @@ -1947,10 +1797,10 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) { struct compat_sioc_sg_req6 sr; struct compat_sioc_mif_req6 vr; - struct mif_device *vif; + struct vif_device *vif; struct mfc6_cache *c; struct net *net = sock_net(sk); - struct mr6_table *mrt; + struct mr_table *mrt; mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); if (!mrt) @@ -1963,8 +1813,8 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) if (vr.mifi >= mrt->maxvif) return -EINVAL; read_lock(&mrt_lock); - vif = &mrt->vif6_table[vr.mifi]; - if (MIF_EXISTS(mrt, vr.mifi)) { + vif = &mrt->vif_table[vr.mifi]; + if (VIF_EXISTS(mrt, vr.mifi)) { vr.icount = vif->pkt_in; vr.ocount = vif->pkt_out; vr.ibytes = vif->bytes_in; @@ -1981,19 +1831,19 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) if (copy_from_user(&sr, arg, sizeof(sr))) return -EFAULT; - read_lock(&mrt_lock); + rcu_read_lock(); c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr); if (c) { - sr.pktcnt = c->mfc_un.res.pkt; - sr.bytecnt = c->mfc_un.res.bytes; - sr.wrong_if = c->mfc_un.res.wrong_if; - read_unlock(&mrt_lock); + sr.pktcnt = c->_c.mfc_un.res.pkt; + sr.bytecnt = c->_c.mfc_un.res.bytes; + sr.wrong_if = c->_c.mfc_un.res.wrong_if; + rcu_read_unlock(); if (copy_to_user(arg, &sr, sizeof(sr))) return -EFAULT; return 0; } - read_unlock(&mrt_lock); + rcu_read_unlock(); return -EADDRNOTAVAIL; default: return -ENOIOCTLCMD; @@ -2014,11 +1864,11 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct * Processing handlers for ip6mr_forward */ -static int ip6mr_forward2(struct net *net, struct mr6_table *mrt, +static int ip6mr_forward2(struct net *net, struct mr_table *mrt, struct sk_buff *skb, struct mfc6_cache *c, int vifi) { struct ipv6hdr *ipv6h; - struct mif_device *vif = &mrt->vif6_table[vifi]; + struct vif_device *vif = &mrt->vif_table[vifi]; struct net_device *dev; struct dst_entry *dst; struct flowi6 fl6; @@ -2088,46 +1938,50 @@ out_free: return 0; } -static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev) +static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev) { int ct; for (ct = mrt->maxvif - 1; ct >= 0; ct--) { - if (mrt->vif6_table[ct].dev == dev) + if (mrt->vif_table[ct].dev == dev) break; } return ct; } -static void ip6_mr_forward(struct net *net, struct mr6_table *mrt, - struct sk_buff *skb, struct mfc6_cache *cache) +static void ip6_mr_forward(struct net *net, struct mr_table *mrt, + struct sk_buff *skb, struct mfc6_cache *c) { int psend = -1; int vif, ct; int true_vifi = ip6mr_find_vif(mrt, skb->dev); - vif = cache->mf6c_parent; - cache->mfc_un.res.pkt++; - cache->mfc_un.res.bytes += skb->len; - cache->mfc_un.res.lastuse = jiffies; + vif = c->_c.mfc_parent; + c->_c.mfc_un.res.pkt++; + c->_c.mfc_un.res.bytes += skb->len; + c->_c.mfc_un.res.lastuse = jiffies; - if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) { + if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) { struct mfc6_cache *cache_proxy; /* For an (*,G) entry, we only check that the incoming * interface is part of the static tree. */ - cache_proxy = ip6mr_cache_find_any_parent(mrt, vif); + rcu_read_lock(); + cache_proxy = mr_mfc_find_any_parent(mrt, vif); if (cache_proxy && - cache_proxy->mfc_un.res.ttls[true_vifi] < 255) + cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) { + rcu_read_unlock(); goto forward; + } + rcu_read_unlock(); } /* * Wrong interface: drop packet and (maybe) send PIM assert. */ - if (mrt->vif6_table[vif].dev != skb->dev) { - cache->mfc_un.res.wrong_if++; + if (mrt->vif_table[vif].dev != skb->dev) { + c->_c.mfc_un.res.wrong_if++; if (true_vifi >= 0 && mrt->mroute_do_assert && /* pimsm uses asserts, when switching from RPT to SPT, @@ -2136,52 +1990,55 @@ static void ip6_mr_forward(struct net *net, struct mr6_table *mrt, large chunk of pimd to kernel. Ough... --ANK */ (mrt->mroute_do_pim || - cache->mfc_un.res.ttls[true_vifi] < 255) && + c->_c.mfc_un.res.ttls[true_vifi] < 255) && time_after(jiffies, - cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { - cache->mfc_un.res.last_assert = jiffies; + c->_c.mfc_un.res.last_assert + + MFC_ASSERT_THRESH)) { + c->_c.mfc_un.res.last_assert = jiffies; ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF); } goto dont_forward; } forward: - mrt->vif6_table[vif].pkt_in++; - mrt->vif6_table[vif].bytes_in += skb->len; + mrt->vif_table[vif].pkt_in++; + mrt->vif_table[vif].bytes_in += skb->len; /* * Forward the frame */ - if (ipv6_addr_any(&cache->mf6c_origin) && - ipv6_addr_any(&cache->mf6c_mcastgrp)) { + if (ipv6_addr_any(&c->mf6c_origin) && + ipv6_addr_any(&c->mf6c_mcastgrp)) { if (true_vifi >= 0 && - true_vifi != cache->mf6c_parent && + true_vifi != c->_c.mfc_parent && ipv6_hdr(skb)->hop_limit > - cache->mfc_un.res.ttls[cache->mf6c_parent]) { + c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { /* It's an (*,*) entry and the packet is not coming from * the upstream: forward the packet to the upstream * only. */ - psend = cache->mf6c_parent; + psend = c->_c.mfc_parent; goto last_forward; } goto dont_forward; } - for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) { + for (ct = c->_c.mfc_un.res.maxvif - 1; + ct >= c->_c.mfc_un.res.minvif; ct--) { /* For (*,G) entry, don't forward to the incoming interface */ - if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) && - ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) { + if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) && + ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) { if (psend != -1) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) - ip6mr_forward2(net, mrt, skb2, cache, psend); + ip6mr_forward2(net, mrt, skb2, + c, psend); } psend = ct; } } last_forward: if (psend != -1) { - ip6mr_forward2(net, mrt, skb, cache, psend); + ip6mr_forward2(net, mrt, skb, c, psend); return; } @@ -2198,7 +2055,7 @@ int ip6_mr_input(struct sk_buff *skb) { struct mfc6_cache *cache; struct net *net = dev_net(skb->dev); - struct mr6_table *mrt; + struct mr_table *mrt; struct flowi6 fl6 = { .flowi6_iif = skb->dev->ifindex, .flowi6_mark = skb->mark, @@ -2248,66 +2105,11 @@ int ip6_mr_input(struct sk_buff *skb) return 0; } - -static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, - struct mfc6_cache *c, struct rtmsg *rtm) -{ - struct rta_mfc_stats mfcs; - struct nlattr *mp_attr; - struct rtnexthop *nhp; - unsigned long lastuse; - int ct; - - /* If cache is unresolved, don't try to parse IIF and OIF */ - if (c->mf6c_parent >= MAXMIFS) { - rtm->rtm_flags |= RTNH_F_UNRESOLVED; - return -ENOENT; - } - - if (MIF_EXISTS(mrt, c->mf6c_parent) && - nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0) - return -EMSGSIZE; - mp_attr = nla_nest_start(skb, RTA_MULTIPATH); - if (!mp_attr) - return -EMSGSIZE; - - for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { - if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { - nhp = nla_reserve_nohdr(skb, sizeof(*nhp)); - if (!nhp) { - nla_nest_cancel(skb, mp_attr); - return -EMSGSIZE; - } - - nhp->rtnh_flags = 0; - nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; - nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex; - nhp->rtnh_len = sizeof(*nhp); - } - } - - nla_nest_end(skb, mp_attr); - - lastuse = READ_ONCE(c->mfc_un.res.lastuse); - lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0; - - mfcs.mfcs_packets = c->mfc_un.res.pkt; - mfcs.mfcs_bytes = c->mfc_un.res.bytes; - mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; - if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) || - nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse), - RTA_PAD)) - return -EMSGSIZE; - - rtm->rtm_type = RTN_MULTICAST; - return 1; -} - int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, u32 portid) { int err; - struct mr6_table *mrt; + struct mr_table *mrt; struct mfc6_cache *cache; struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); @@ -2368,15 +2170,12 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, return err; } - if (rtm->rtm_flags & RTM_F_NOTIFY) - cache->mfc_flags |= MFC_NOTIFY; - - err = __ip6mr_fill_mroute(mrt, skb, cache, rtm); + err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); read_unlock(&mrt_lock); return err; } -static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, +static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, u32 portid, u32 seq, struct mfc6_cache *c, int cmd, int flags) { @@ -2398,7 +2197,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, goto nla_put_failure; rtm->rtm_type = RTN_MULTICAST; rtm->rtm_scope = RT_SCOPE_UNIVERSE; - if (c->mfc_flags & MFC_STATIC) + if (c->_c.mfc_flags & MFC_STATIC) rtm->rtm_protocol = RTPROT_STATIC; else rtm->rtm_protocol = RTPROT_MROUTED; @@ -2407,7 +2206,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) || nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp)) goto nla_put_failure; - err = __ip6mr_fill_mroute(mrt, skb, c, rtm); + err = mr_fill_mroute(mrt, skb, &c->_c, rtm); /* do not break the dump if cache is unresolved */ if (err < 0 && err != -ENOENT) goto nla_put_failure; @@ -2420,6 +2219,14 @@ nla_put_failure: return -EMSGSIZE; } +static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, + u32 portid, u32 seq, struct mr_mfc *c, + int cmd, int flags) +{ + return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c, + cmd, flags); +} + static int mr6_msgsize(bool unresolved, int maxvif) { size_t len = @@ -2441,14 +2248,14 @@ static int mr6_msgsize(bool unresolved, int maxvif) return len; } -static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc, +static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, int cmd) { struct net *net = read_pnet(&mrt->net); struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif), + skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif), GFP_ATOMIC); if (!skb) goto errout; @@ -2483,7 +2290,7 @@ static size_t mrt6msg_netlink_msgsize(size_t payloadlen) return len; } -static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt) +static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt) { struct net *net = read_pnet(&mrt->net); struct nlmsghdr *nlh; @@ -2533,65 +2340,6 @@ errout: static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = sock_net(skb->sk); - struct mr6_table *mrt; - struct mfc6_cache *mfc; - unsigned int t = 0, s_t; - unsigned int h = 0, s_h; - unsigned int e = 0, s_e; - - s_t = cb->args[0]; - s_h = cb->args[1]; - s_e = cb->args[2]; - - read_lock(&mrt_lock); - ip6mr_for_each_table(mrt, net) { - if (t < s_t) - goto next_table; - if (t > s_t) - s_h = 0; - for (h = s_h; h < MFC6_LINES; h++) { - list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) { - if (e < s_e) - goto next_entry; - if (ip6mr_fill_mroute(mrt, skb, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE, - NLM_F_MULTI) < 0) - goto done; -next_entry: - e++; - } - e = s_e = 0; - } - spin_lock_bh(&mfc_unres_lock); - list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) { - if (e < s_e) - goto next_entry2; - if (ip6mr_fill_mroute(mrt, skb, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE, - NLM_F_MULTI) < 0) { - spin_unlock_bh(&mfc_unres_lock); - goto done; - } -next_entry2: - e++; - } - spin_unlock_bh(&mfc_unres_lock); - e = s_e = 0; - s_h = 0; -next_table: - t++; - } -done: - read_unlock(&mrt_lock); - - cb->args[2] = e; - cb->args[1] = h; - cb->args[0] = t; - - return skb->len; + return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter, + _ip6mr_fill_mroute, &mfc_unres_lock); } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 24535169663d..4d780c7f0130 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1415,4 +1415,3 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, } EXPORT_SYMBOL(compat_ipv6_getsockopt); #endif - diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index c87b48359e8f..32f98bc06900 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -103,6 +103,7 @@ static void __net_exit defrag6_net_exit(struct net *net) static struct pernet_operations defrag6_net_ops = { .exit = defrag6_net_exit, + .async = true, }; static int __init nf_defrag_init(void) diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index b8858c546f41..1678cf037688 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -355,4 +355,3 @@ void ipv6_misc_proc_exit(void) { unregister_pernet_subsys(&ipv6_proc_ops); } - diff --git a/net/ipv6/route.c b/net/ipv6/route.c index aa709b644945..e2bb40824c85 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -460,7 +460,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match, * case it will always be non-zero. Otherwise now is the time to do it. */ if (!fl6->mp_hash) - fl6->mp_hash = rt6_multipath_hash(fl6, NULL); + fl6->mp_hash = rt6_multipath_hash(fl6, NULL, NULL); if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound)) return match; @@ -1786,10 +1786,12 @@ struct dst_entry *ip6_route_input_lookup(struct net *net, EXPORT_SYMBOL_GPL(ip6_route_input_lookup); static void ip6_multipath_l3_keys(const struct sk_buff *skb, - struct flow_keys *keys) + struct flow_keys *keys, + struct flow_keys *flkeys) { const struct ipv6hdr *outer_iph = ipv6_hdr(skb); const struct ipv6hdr *key_iph = outer_iph; + struct flow_keys *_flkeys = flkeys; const struct ipv6hdr *inner_iph; const struct icmp6hdr *icmph; struct ipv6hdr _inner_iph; @@ -1811,22 +1813,31 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb, goto out; key_iph = inner_iph; + _flkeys = NULL; out: memset(keys, 0, sizeof(*keys)); keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; - keys->addrs.v6addrs.src = key_iph->saddr; - keys->addrs.v6addrs.dst = key_iph->daddr; - keys->tags.flow_label = ip6_flowinfo(key_iph); - keys->basic.ip_proto = key_iph->nexthdr; + if (_flkeys) { + keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src; + keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst; + keys->tags.flow_label = _flkeys->tags.flow_label; + keys->basic.ip_proto = _flkeys->basic.ip_proto; + } else { + keys->addrs.v6addrs.src = key_iph->saddr; + keys->addrs.v6addrs.dst = key_iph->daddr; + keys->tags.flow_label = ip6_flowinfo(key_iph); + keys->basic.ip_proto = key_iph->nexthdr; + } } /* if skb is set it will be used and fl6 can be NULL */ -u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb) +u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb, + struct flow_keys *flkeys) { struct flow_keys hash_keys; if (skb) { - ip6_multipath_l3_keys(skb, &hash_keys); + ip6_multipath_l3_keys(skb, &hash_keys, flkeys); return flow_hash_from_keys(&hash_keys) >> 1; } @@ -1847,12 +1858,17 @@ void ip6_route_input(struct sk_buff *skb) .flowi6_mark = skb->mark, .flowi6_proto = iph->nexthdr, }; + struct flow_keys *flkeys = NULL, _flkeys; tun_info = skb_tunnel_info(skb); if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX)) fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id; + + if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys)) + flkeys = &_flkeys; + if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6)) - fl6.mp_hash = rt6_multipath_hash(&fl6, skb); + fl6.mp_hash = rt6_multipath_hash(&fl6, skb, flkeys); skb_dst_drop(skb); skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags)); } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 3a1775a62973..182db078f01e 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1878,6 +1878,7 @@ static struct pernet_operations sit_net_ops = { .exit_batch = sit_exit_batch_net, .id = &sit_net_id, .size = sizeof(struct sit_net), + .async = true, }; static void __exit sit_cleanup(void) diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index b15075a5c227..16f434791763 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -196,4 +196,3 @@ void xfrm6_state_fini(void) { xfrm_state_unregister_afinfo(&xfrm6_state_afinfo); } - diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index f85f0d7480ac..a9673619e0e9 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -353,6 +353,7 @@ static struct pernet_operations xfrm6_tunnel_net_ops = { .exit = xfrm6_tunnel_net_exit, .id = &xfrm6_tunnel_net_id, .size = sizeof(struct xfrm6_tunnel_net), + .async = true, }; static int __init xfrm6_tunnel_init(void) diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c index 9d5649e4e8b7..2c1c8b3e4452 100644 --- a/net/kcm/kcmproc.c +++ b/net/kcm/kcmproc.c @@ -433,6 +433,7 @@ static void kcm_proc_exit_net(struct net *net) static struct pernet_operations kcm_net_ops = { .init = kcm_proc_init_net, .exit = kcm_proc_exit_net, + .async = true, }; int __init kcm_proc_init(void) diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 435594648dac..a6cd0712e063 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -2015,6 +2015,7 @@ static struct pernet_operations kcm_net_ops = { .exit = kcm_exit_net, .id = &kcm_net_id, .size = sizeof(struct kcm_net), + .async = true, }; static int __init kcm_init(void) diff --git a/net/key/af_key.c b/net/key/af_key.c index 7e2e7188e7f4..3ac08ab26207 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3863,6 +3863,7 @@ static struct pernet_operations pfkey_net_ops = { .exit = pfkey_net_exit, .id = &pfkey_net_id, .size = sizeof(struct netns_pfkey), + .async = true, }; static void __exit ipsec_pfkey_exit(void) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 99a03c72db4f..0c4f49a6a0cb 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1770,6 +1770,7 @@ static struct pernet_operations pppol2tp_net_ops = { .init = pppol2tp_init_net, .exit = pppol2tp_exit_net, .id = &pppol2tp_net_id, + .async = true, }; /***************************************************************************** diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index d625179de485..6a340c94c4b8 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -604,6 +604,7 @@ static void __net_exit __ip_vs_lblc_exit(struct net *net) { } static struct pernet_operations ip_vs_lblc_ops = { .init = __ip_vs_lblc_init, .exit = __ip_vs_lblc_exit, + .async = true, }; static int __init ip_vs_lblc_init(void) diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 84c57b62a588..0627881128da 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -789,6 +789,7 @@ static void __net_exit __ip_vs_lblcr_exit(struct net *net) { } static struct pernet_operations ip_vs_lblcr_ops = { .init = __ip_vs_lblcr_init, .exit = __ip_vs_lblcr_exit, + .async = true, }; static int __init ip_vs_lblcr_init(void) diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 92139a087260..64b875e452ca 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -398,6 +398,7 @@ static struct pernet_operations synproxy_net_ops = { .exit = synproxy_net_exit, .id = &synproxy_net_id, .size = sizeof(struct synproxy_net), + .async = true, }; static int __init synproxy_core_init(void) diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 66f5aca62a08..db2fe0911740 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -1345,6 +1345,7 @@ static struct pernet_operations hashlimit_net_ops = { .exit = hashlimit_net_exit, .id = &hashlimit_net_id, .size = sizeof(struct hashlimit_net), + .async = true, }; static int __init hashlimit_mt_init(void) diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 6d232d18faff..19efdb757944 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -687,6 +687,7 @@ static struct pernet_operations recent_net_ops = { .exit = recent_net_exit, .id = &recent_net_id, .size = sizeof(struct recent_net), + .async = true, }; static struct xt_match recent_mt_reg[] __read_mostly = { diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 77787512fc32..9454e8393793 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -342,6 +342,7 @@ static struct pernet_operations phonet_net_ops = { .exit = phonet_exit_net, .id = &phonet_net_id, .size = sizeof(struct phonet_net), + .async = true, }; /* Initialize Phonet devices list */ diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index a937f18896ae..f7126108a811 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -77,6 +77,7 @@ static int rds_release(struct socket *sock) rds_send_drop_to(rs, NULL); rds_rdma_drop_keys(rs); rds_notify_queue_get(rs, NULL); + __skb_queue_purge(&rs->rs_zcookie_queue); spin_lock_bh(&rds_sock_lock); list_del_init(&rs->rs_item); @@ -144,7 +145,7 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr, * - to signal that a previously congested destination may have become * uncongested * - A notification has been queued to the socket (this can be a congestion - * update, or a RDMA completion). + * update, or a RDMA completion, or a MSG_ZEROCOPY completion). * * EPOLLOUT is asserted if there is room on the send queue. This does not mean * however, that the next sendmsg() call will succeed. If the application tries @@ -178,7 +179,8 @@ static __poll_t rds_poll(struct file *file, struct socket *sock, spin_unlock(&rs->rs_lock); } if (!list_empty(&rs->rs_recv_queue) || - !list_empty(&rs->rs_notify_queue)) + !list_empty(&rs->rs_notify_queue) || + !skb_queue_empty(&rs->rs_zcookie_queue)) mask |= (EPOLLIN | EPOLLRDNORM); if (rs->rs_snd_bytes < rds_sk_sndbuf(rs)) mask |= (EPOLLOUT | EPOLLWRNORM); @@ -513,6 +515,7 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol) INIT_LIST_HEAD(&rs->rs_recv_queue); INIT_LIST_HEAD(&rs->rs_notify_queue); INIT_LIST_HEAD(&rs->rs_cong_list); + skb_queue_head_init(&rs->rs_zcookie_queue); spin_lock_init(&rs->rs_rdma_lock); rs->rs_rdma_keys = RB_ROOT; rs->rs_rx_traces = 0; diff --git a/net/rds/message.c b/net/rds/message.c index 651834513481..116cf87ccb89 100644 --- a/net/rds/message.c +++ b/net/rds/message.c @@ -58,32 +58,26 @@ EXPORT_SYMBOL_GPL(rds_message_addref); static inline bool skb_zcookie_add(struct sk_buff *skb, u32 cookie) { - struct sock_exterr_skb *serr = SKB_EXT_ERR(skb); - int ncookies; - u32 *ptr; + struct rds_zcopy_cookies *ck = (struct rds_zcopy_cookies *)skb->cb; + int ncookies = ck->num; - if (serr->ee.ee_origin != SO_EE_ORIGIN_ZCOOKIE) + if (ncookies == RDS_MAX_ZCOOKIES) return false; - ncookies = serr->ee.ee_data; - if (ncookies == SO_EE_ORIGIN_MAX_ZCOOKIES) - return false; - ptr = skb_put(skb, sizeof(u32)); - *ptr = cookie; - serr->ee.ee_data = ++ncookies; + ck->cookies[ncookies] = cookie; + ck->num = ++ncookies; return true; } static void rds_rm_zerocopy_callback(struct rds_sock *rs, struct rds_znotifier *znotif) { - struct sock *sk = rds_rs_to_sk(rs); struct sk_buff *skb, *tail; - struct sock_exterr_skb *serr; unsigned long flags; struct sk_buff_head *q; u32 cookie = znotif->z_cookie; + struct rds_zcopy_cookies *ck; - q = &sk->sk_error_queue; + q = &rs->rs_zcookie_queue; spin_lock_irqsave(&q->lock, flags); tail = skb_peek_tail(q); @@ -91,22 +85,19 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs, spin_unlock_irqrestore(&q->lock, flags); mm_unaccount_pinned_pages(&znotif->z_mmp); consume_skb(rds_skb_from_znotifier(znotif)); - sk->sk_error_report(sk); + /* caller invokes rds_wake_sk_sleep() */ return; } skb = rds_skb_from_znotifier(znotif); - serr = SKB_EXT_ERR(skb); - memset(&serr->ee, 0, sizeof(serr->ee)); - serr->ee.ee_errno = 0; - serr->ee.ee_origin = SO_EE_ORIGIN_ZCOOKIE; - serr->ee.ee_info = 0; + ck = (struct rds_zcopy_cookies *)skb->cb; + memset(ck, 0, sizeof(*ck)); WARN_ON(!skb_zcookie_add(skb, cookie)); __skb_queue_tail(q, skb); spin_unlock_irqrestore(&q->lock, flags); - sk->sk_error_report(sk); + /* caller invokes rds_wake_sk_sleep() */ mm_unaccount_pinned_pages(&znotif->z_mmp); } @@ -129,6 +120,7 @@ static void rds_message_purge(struct rds_message *rm) if (rm->data.op_mmp_znotifier) { zcopy = true; rds_rm_zerocopy_callback(rs, rm->data.op_mmp_znotifier); + rds_wake_sk_sleep(rs); rm->data.op_mmp_znotifier = NULL; } sock_put(rds_rs_to_sk(rs)); @@ -362,10 +354,12 @@ int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from, int total_copied = 0; struct sk_buff *skb; - skb = alloc_skb(SO_EE_ORIGIN_MAX_ZCOOKIES * sizeof(u32), - GFP_KERNEL); + skb = alloc_skb(0, GFP_KERNEL); if (!skb) return -ENOMEM; + BUILD_BUG_ON(sizeof(skb->cb) < + max_t(int, sizeof(struct rds_znotifier), + sizeof(struct rds_zcopy_cookies))); rm->data.op_mmp_znotifier = RDS_ZCOPY_SKB(skb); if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp, length)) { diff --git a/net/rds/rds.h b/net/rds/rds.h index 31cd38852050..33b16353d8f3 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -603,6 +603,8 @@ struct rds_sock { /* Socket receive path trace points*/ u8 rs_rx_traces; u8 rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX]; + + struct sk_buff_head rs_zcookie_queue; }; static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk) diff --git a/net/rds/recv.c b/net/rds/recv.c index b080961464df..d50747725221 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -577,6 +577,32 @@ out: return ret; } +static bool rds_recvmsg_zcookie(struct rds_sock *rs, struct msghdr *msg) +{ + struct sk_buff *skb; + struct sk_buff_head *q = &rs->rs_zcookie_queue; + struct rds_zcopy_cookies *done; + + if (!msg->msg_control) + return false; + + if (!sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY) || + msg->msg_controllen < CMSG_SPACE(sizeof(*done))) + return false; + + skb = skb_dequeue(q); + if (!skb) + return false; + done = (struct rds_zcopy_cookies *)skb->cb; + if (put_cmsg(msg, SOL_RDS, RDS_CMSG_ZCOPY_COMPLETION, sizeof(*done), + done)) { + skb_queue_head(q, skb); + return false; + } + consume_skb(skb); + return true; +} + int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int msg_flags) { @@ -611,7 +637,9 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, if (!rds_next_incoming(rs, &inc)) { if (nonblock) { - ret = -EAGAIN; + bool reaped = rds_recvmsg_zcookie(rs, msg); + + ret = reaped ? 0 : -EAGAIN; break; } @@ -660,6 +688,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, ret = -EFAULT; goto out; } + rds_recvmsg_zcookie(rs, msg); rds_stats_inc(s_recv_delivered); diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index cb3c5d403c88..da72e0cf2b1f 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -413,6 +413,7 @@ static struct pernet_operations bpf_net_ops = { .exit_batch = bpf_exit_net, .id = &bpf_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; static int __init bpf_init_module(void) diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index e4b880fa51fe..371e5e4ab3e2 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -222,6 +222,7 @@ static struct pernet_operations connmark_net_ops = { .exit_batch = connmark_exit_net, .id = &connmark_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; static int __init connmark_init_module(void) diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index d5c2e528d150..1fb1f1f6a555 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -677,6 +677,7 @@ static struct pernet_operations csum_net_ops = { .exit_batch = csum_exit_net, .id = &csum_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; MODULE_DESCRIPTION("Checksum updating actions"); diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index f072bcf33760..74563254e676 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -247,6 +247,7 @@ static struct pernet_operations gact_net_ops = { .exit_batch = gact_exit_net, .id = &gact_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index a5994cf0512b..555b1caeff72 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -870,6 +870,7 @@ static struct pernet_operations ife_net_ops = { .exit_batch = ife_exit_net, .id = &ife_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; static int __init ife_init_module(void) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 9784629090ad..10866717f88e 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -349,6 +349,7 @@ static struct pernet_operations ipt_net_ops = { .exit_batch = ipt_exit_net, .id = &ipt_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; static int tcf_xt_walker(struct net *net, struct sk_buff *skb, @@ -399,6 +400,7 @@ static struct pernet_operations xt_net_ops = { .exit_batch = xt_exit_net, .id = &xt_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; MODULE_AUTHOR("Jamal Hadi Salim(2002-13)"); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index fd34015331ab..64c86579c3d9 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -353,6 +353,7 @@ static struct pernet_operations mirred_net_ops = { .exit_batch = mirred_exit_net, .id = &mirred_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; MODULE_AUTHOR("Jamal Hadi Salim(2002)"); diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 4b5848b6c252..b1bc757f6491 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -323,6 +323,7 @@ static struct pernet_operations nat_net_ops = { .exit_batch = nat_exit_net, .id = &nat_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; MODULE_DESCRIPTION("Stateless NAT actions"); diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 094303c27c5e..5e8cc8f63acd 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -465,6 +465,7 @@ static struct pernet_operations pedit_net_ops = { .exit_batch = pedit_exit_net, .id = &pedit_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index ff55bd6c7db0..51fe4fe343f7 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -347,6 +347,7 @@ static struct pernet_operations police_net_ops = { .exit_batch = police_exit_net, .id = &police_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; static int __init police_init_module(void) diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 9765145aaf40..238dfd27e995 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -248,6 +248,7 @@ static struct pernet_operations sample_net_ops = { .exit_batch = sample_exit_net, .id = &sample_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; static int __init sample_init_module(void) diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 8244e221fe4f..91816d73f3f3 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -216,6 +216,7 @@ static struct pernet_operations simp_net_ops = { .exit_batch = simp_exit_net, .id = &simp_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; MODULE_AUTHOR("Jamal Hadi Salim(2005)"); diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index ddf69fc01bdf..7971510fe61b 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -253,6 +253,7 @@ static struct pernet_operations skbedit_net_ops = { .exit_batch = skbedit_exit_net, .id = &skbedit_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>"); diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index a406f191cb84..febec75f4f7a 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -278,6 +278,7 @@ static struct pernet_operations skbmod_net_ops = { .exit_batch = skbmod_exit_net, .id = &skbmod_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; MODULE_AUTHOR("Jamal Hadi Salim, <jhs@mojatatu.com>"); diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 41ff9d0e5c62..9169b7e78ada 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -337,6 +337,7 @@ static struct pernet_operations tunnel_key_net_ops = { .exit_batch = tunnel_key_exit_net, .id = &tunnel_key_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; static int __init tunnel_key_init_module(void) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 71411a255f04..c2ee7fd51cc9 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -313,6 +313,7 @@ static struct pernet_operations vlan_net_ops = { .exit_batch = vlan_exit_net, .id = &vlan_net_id, .size = sizeof(struct tc_action_net), + .async = true, }; static int __init vlan_init_module(void) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 9d1a8bbf8152..19f9f421d5b7 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1618,6 +1618,7 @@ static struct pernet_operations tcf_net_ops = { .exit = tcf_net_exit, .id = &tcf_net_id, .size = sizeof(struct tcf_net), + .async = true, }; static int __init tc_filter_init(void) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 27e672c12492..68f9d942bed4 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -739,6 +739,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev) void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n, unsigned int len) { + bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED; const struct Qdisc_class_ops *cops; unsigned long cl; u32 parentid; @@ -760,8 +761,12 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n, * If child was empty even before update then backlog * counter is screwed and we skip notification because * parent class is already passive. + * + * If the original child was offloaded then it is allowed + * to be seem as empty, so the parent is notified anyway. */ - notify = !sch->q.qlen && !WARN_ON_ONCE(!n); + notify = !sch->q.qlen && !WARN_ON_ONCE(!n && + !qdisc_is_offloaded); /* TODO: perform the search on a per txq basis */ sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); if (sch == NULL) { diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index efbf51f35778..222e53d3d27a 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -142,9 +142,8 @@ prio_reset(struct Qdisc *sch) sch->q.qlen = 0; } -static int prio_offload(struct Qdisc *sch, bool enable) +static int prio_offload(struct Qdisc *sch, struct tc_prio_qopt *qopt) { - struct prio_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); struct tc_prio_qopt_offload opt = { .handle = sch->handle, @@ -154,10 +153,10 @@ static int prio_offload(struct Qdisc *sch, bool enable) if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) return -EOPNOTSUPP; - if (enable) { + if (qopt) { opt.command = TC_PRIO_REPLACE; - opt.replace_params.bands = q->bands; - memcpy(&opt.replace_params.priomap, q->prio2band, + opt.replace_params.bands = qopt->bands; + memcpy(&opt.replace_params.priomap, qopt->priomap, TC_PRIO_MAX + 1); opt.replace_params.qstats = &sch->qstats; } else { @@ -174,7 +173,7 @@ prio_destroy(struct Qdisc *sch) struct prio_sched_data *q = qdisc_priv(sch); tcf_block_put(q->block); - prio_offload(sch, false); + prio_offload(sch, NULL); for (prio = 0; prio < q->bands; prio++) qdisc_destroy(q->queues[prio]); } @@ -211,6 +210,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt, } } + prio_offload(sch, qopt); sch_tree_lock(sch); q->bands = qopt->bands; memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); @@ -230,7 +230,6 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt, } sch_tree_unlock(sch); - prio_offload(sch, true); return 0; } @@ -309,12 +308,44 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct Qdisc **old, struct netlink_ext_ack *extack) { struct prio_sched_data *q = qdisc_priv(sch); + struct tc_prio_qopt_offload graft_offload; + struct net_device *dev = qdisc_dev(sch); unsigned long band = arg - 1; + bool any_qdisc_is_offloaded; + int err; if (new == NULL) new = &noop_qdisc; *old = qdisc_replace(sch, new, &q->queues[band]); + + if (!tc_can_offload(dev)) + return 0; + + graft_offload.handle = sch->handle; + graft_offload.parent = sch->parent; + graft_offload.graft_params.band = band; + graft_offload.graft_params.child_handle = new->handle; + graft_offload.command = TC_PRIO_GRAFT; + + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO, + &graft_offload); + + /* Don't report error if the graft is part of destroy operation. */ + if (err && new != &noop_qdisc) { + /* Don't report error if the parent, the old child and the new + * one are not offloaded. + */ + any_qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED; + any_qdisc_is_offloaded |= new->flags & TCQ_F_OFFLOADED; + if (*old) + any_qdisc_is_offloaded |= (*old)->flags & + TCQ_F_OFFLOADED; + + if (any_qdisc_is_offloaded) + NL_SET_ERR_MSG(extack, "Offloading graft operation failed."); + } + return 0; } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 38ae22b65e77..26684e086750 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -7,7 +7,6 @@ * applicable with RoCE-cards only * * Initial restrictions: - * - non-blocking connect postponed * - IPv6 support postponed * - support for alternate links postponed * - partial support for non-blocking sockets only @@ -24,7 +23,6 @@ #include <linux/module.h> #include <linux/socket.h> -#include <linux/inetdevice.h> #include <linux/workqueue.h> #include <linux/in.h> #include <linux/sched/signal.h> @@ -273,46 +271,7 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc) smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC); } -/* determine subnet and mask of internal TCP socket */ -int smc_netinfo_by_tcpsk(struct socket *clcsock, - __be32 *subnet, u8 *prefix_len) -{ - struct dst_entry *dst = sk_dst_get(clcsock->sk); - struct in_device *in_dev; - struct sockaddr_in addr; - int rc = -ENOENT; - - if (!dst) { - rc = -ENOTCONN; - goto out; - } - if (!dst->dev) { - rc = -ENODEV; - goto out_rel; - } - - /* get address to which the internal TCP socket is bound */ - kernel_getsockname(clcsock, (struct sockaddr *)&addr); - /* analyze IPv4 specific data of net_device belonging to TCP socket */ - rcu_read_lock(); - in_dev = __in_dev_get_rcu(dst->dev); - for_ifa(in_dev) { - if (!inet_ifa_match(addr.sin_addr.s_addr, ifa)) - continue; - *prefix_len = inet_mask_len(ifa->ifa_mask); - *subnet = ifa->ifa_address & ifa->ifa_mask; - rc = 0; - break; - } endfor_ifa(in_dev); - rcu_read_unlock(); - -out_rel: - dst_release(dst); -out: - return rc; -} - -static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid) +static int smc_clnt_conf_first_link(struct smc_sock *smc) { struct smc_link_group *lgr = smc->conn.lgr; struct smc_link *link; @@ -332,6 +291,9 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid) return rc; } + if (link->llc_confirm_rc) + return SMC_CLC_DECL_RMBE_EC; + rc = smc_ib_modify_qp_rts(link); if (rc) return SMC_CLC_DECL_INTERR; @@ -346,11 +308,33 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid) /* send CONFIRM LINK response over RoCE fabric */ rc = smc_llc_send_confirm_link(link, link->smcibdev->mac[link->ibport - 1], - gid, SMC_LLC_RESP); + &link->smcibdev->gid[link->ibport - 1], + SMC_LLC_RESP); if (rc < 0) return SMC_CLC_DECL_TCL; - return rc; + /* receive ADD LINK request from server over RoCE fabric */ + rest = wait_for_completion_interruptible_timeout(&link->llc_add, + SMC_LLC_WAIT_TIME); + if (rest <= 0) { + struct smc_clc_msg_decline dclc; + + rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), + SMC_CLC_DECLINE); + return rc; + } + + /* send add link reject message, only one link supported for now */ + rc = smc_llc_send_add_link(link, + link->smcibdev->mac[link->ibport - 1], + &link->smcibdev->gid[link->ibport - 1], + SMC_LLC_RESP); + if (rc < 0) + return SMC_CLC_DECL_TCL; + + link->state = SMC_LNK_ACTIVE; + + return 0; } static void smc_conn_save_peer_info(struct smc_sock *smc, @@ -372,19 +356,9 @@ static void smc_link_save_peer_info(struct smc_link *link, link->peer_mtu = clc->qp_mtu; } -static void smc_lgr_forget(struct smc_link_group *lgr) -{ - spin_lock_bh(&smc_lgr_list.lock); - /* do not use this link group for new connections */ - if (!list_empty(&lgr->list)) - list_del_init(&lgr->list); - spin_unlock_bh(&smc_lgr_list.lock); -} - /* setup for RDMA connection of client */ static int smc_connect_rdma(struct smc_sock *smc) { - struct sockaddr_in *inaddr = (struct sockaddr_in *)smc->addr; struct smc_clc_msg_accept_confirm aclc; int local_contact = SMC_FIRST_CONTACT; struct smc_ib_device *smcibdev; @@ -438,8 +412,8 @@ static int smc_connect_rdma(struct smc_sock *smc) srv_first_contact = aclc.hdr.flag; mutex_lock(&smc_create_lgr_pending); - local_contact = smc_conn_create(smc, inaddr->sin_addr.s_addr, smcibdev, - ibport, &aclc.lcl, srv_first_contact); + local_contact = smc_conn_create(smc, smcibdev, ibport, &aclc.lcl, + srv_first_contact); if (local_contact < 0) { rc = local_contact; if (rc == -ENOMEM) @@ -498,8 +472,7 @@ static int smc_connect_rdma(struct smc_sock *smc) if (local_contact == SMC_FIRST_CONTACT) { /* QP confirmation over RoCE fabric */ - reason_code = smc_clnt_conf_first_link( - smc, &smcibdev->gid[ibport - 1]); + reason_code = smc_clnt_conf_first_link(smc); if (reason_code < 0) { rc = reason_code; goto out_err_unlock; @@ -558,7 +531,6 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr, goto out_err; if (addr->sa_family != AF_INET) goto out_err; - smc->addr = addr; /* needed for nonblocking connect */ lock_sock(sk); switch (sk->sk_state) { @@ -748,9 +720,34 @@ static int smc_serv_conf_first_link(struct smc_sock *smc) rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), SMC_CLC_DECLINE); + return rc; } - return rc; + if (link->llc_confirm_resp_rc) + return SMC_CLC_DECL_RMBE_EC; + + /* send ADD LINK request to client over the RoCE fabric */ + rc = smc_llc_send_add_link(link, + link->smcibdev->mac[link->ibport - 1], + &link->smcibdev->gid[link->ibport - 1], + SMC_LLC_REQ); + if (rc < 0) + return SMC_CLC_DECL_TCL; + + /* receive ADD LINK response from client over the RoCE fabric */ + rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp, + SMC_LLC_WAIT_TIME); + if (rest <= 0) { + struct smc_clc_msg_decline dclc; + + rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), + SMC_CLC_DECLINE); + return rc; + } + + link->state = SMC_LNK_ACTIVE; + + return 0; } /* setup for RDMA connection of server */ @@ -766,7 +763,6 @@ static void smc_listen_work(struct work_struct *work) struct sock *newsmcsk = &new_smc->sk; struct smc_clc_msg_proposal *pclc; struct smc_ib_device *smcibdev; - struct sockaddr_in peeraddr; u8 buf[SMC_CLC_MAX_LEN]; struct smc_link *link; int reason_code = 0; @@ -808,7 +804,7 @@ static void smc_listen_work(struct work_struct *work) } /* determine subnet and mask from internal TCP socket */ - rc = smc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len); + rc = smc_clc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len); if (rc) { reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ goto decline_rdma; @@ -822,13 +818,10 @@ static void smc_listen_work(struct work_struct *work) goto decline_rdma; } - /* get address of the peer connected to the internal TCP socket */ - kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr); - /* allocate connection / link group */ mutex_lock(&smc_create_lgr_pending); - local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr, - smcibdev, ibport, &pclc->lcl, 0); + local_contact = smc_conn_create(new_smc, smcibdev, ibport, &pclc->lcl, + 0); if (local_contact < 0) { rc = local_contact; if (rc == -ENOMEM) diff --git a/net/smc/smc.h b/net/smc/smc.h index 9518986c97b1..268cdf11533c 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -172,7 +172,6 @@ struct smc_sock { /* smc sock container */ struct sock sk; struct socket *clcsock; /* internal tcp socket */ struct smc_connection conn; /* smc connection */ - struct sockaddr *addr; /* inet connect address */ struct smc_sock *listen_smc; /* listen parent */ struct work_struct tcp_listen_work;/* handle tcp socket accepts */ struct work_struct smc_listen_work;/* prepare new accept socket */ @@ -263,10 +262,8 @@ static inline bool using_ipsec(struct smc_sock *smc) struct smc_clc_msg_local; -int smc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet, - u8 *prefix_len); void smc_conn_free(struct smc_connection *conn); -int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr, +int smc_conn_create(struct smc_sock *smc, struct smc_ib_device *smcibdev, u8 ibport, struct smc_clc_msg_local *lcl, int srv_first_contact); struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock); diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 8ac51583a063..874c5a75d6dd 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -11,6 +11,7 @@ */ #include <linux/in.h> +#include <linux/inetdevice.h> #include <linux/if_ether.h> #include <linux/sched/signal.h> @@ -22,6 +23,9 @@ #include "smc_clc.h" #include "smc_ib.h" +/* eye catcher "SMCR" EBCDIC for CLC messages */ +static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'}; + /* check if received message has a correct header length and contains valid * heading and trailing eyecatchers */ @@ -70,6 +74,45 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm) return true; } +/* determine subnet and mask of internal TCP socket */ +int smc_clc_netinfo_by_tcpsk(struct socket *clcsock, + __be32 *subnet, u8 *prefix_len) +{ + struct dst_entry *dst = sk_dst_get(clcsock->sk); + struct in_device *in_dev; + struct sockaddr_in addr; + int rc = -ENOENT; + + if (!dst) { + rc = -ENOTCONN; + goto out; + } + if (!dst->dev) { + rc = -ENODEV; + goto out_rel; + } + + /* get address to which the internal TCP socket is bound */ + kernel_getsockname(clcsock, (struct sockaddr *)&addr); + /* analyze IPv4 specific data of net_device belonging to TCP socket */ + rcu_read_lock(); + in_dev = __in_dev_get_rcu(dst->dev); + for_ifa(in_dev) { + if (!inet_ifa_match(addr.sin_addr.s_addr, ifa)) + continue; + *prefix_len = inet_mask_len(ifa->ifa_mask); + *subnet = ifa->ifa_address & ifa->ifa_mask; + rc = 0; + break; + } endfor_ifa(in_dev); + rcu_read_unlock(); + +out_rel: + dst_release(dst); +out: + return rc; +} + /* Wait for data on the tcp-socket, analyze received data * Returns: * 0 if success and it was not a decline that we received. @@ -211,8 +254,8 @@ int smc_clc_send_proposal(struct smc_sock *smc, memset(&pclc_prfx, 0, sizeof(pclc_prfx)); /* determine subnet and mask from internal TCP socket */ - rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet, - &pclc_prfx.prefix_len); + rc = smc_clc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet, + &pclc_prfx.prefix_len); if (rc) return SMC_CLC_DECL_CNFERR; /* configuration error */ pclc_prfx.ipv6_prefixes_cnt = 0; diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index c145a0f36a68..20e048beac30 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -22,9 +22,6 @@ #define SMC_CLC_CONFIRM 0x03 #define SMC_CLC_DECLINE 0x04 -/* eye catcher "SMCR" EBCDIC for CLC messages */ -static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'}; - #define SMC_CLC_V1 0x1 /* SMC version */ #define CLC_WAIT_TIME (6 * HZ) /* max. wait time on clcsock */ #define SMC_CLC_DECL_MEM 0x01010000 /* insufficient memory resources */ @@ -36,6 +33,7 @@ static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'}; #define SMC_CLC_DECL_INTERR 0x99990000 /* internal error */ #define SMC_CLC_DECL_TCL 0x02040000 /* timeout w4 QP confirm */ #define SMC_CLC_DECL_SEND 0x07000000 /* sending problem */ +#define SMC_CLC_DECL_RMBE_EC 0x08000000 /* peer has eyecatcher in RMBE */ struct smc_clc_msg_hdr { /* header1 of clc messages */ u8 eyecatcher[4]; /* eye catcher */ @@ -124,9 +122,8 @@ smc_clc_proposal_get_prefix(struct smc_clc_msg_proposal *pclc) ((u8 *)pclc + sizeof(*pclc) + ntohs(pclc->iparea_offset)); } -struct smc_sock; -struct smc_ib_device; - +int smc_clc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet, + u8 *prefix_len); int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, u8 expected_type); int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 2424c7100aaf..702ce5f85e97 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -144,7 +144,7 @@ free: } /* create a new SMC link group */ -static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr, +static int smc_lgr_create(struct smc_sock *smc, struct smc_ib_device *smcibdev, u8 ibport, char *peer_systemid, unsigned short vlan_id) { @@ -161,7 +161,6 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr, } lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; lgr->sync_err = false; - lgr->daddr = peer_in_addr; memcpy(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN); lgr->vlan_id = vlan_id; rwlock_init(&lgr->sndbufs_lock); @@ -177,6 +176,7 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr, lnk = &lgr->lnk[SMC_SINGLE_LINK]; /* initialize link */ + lnk->state = SMC_LNK_ACTIVATING; lnk->smcibdev = smcibdev; lnk->ibport = ibport; lnk->path_mtu = smcibdev->pattr[ibport - 1].active_mtu; @@ -198,6 +198,8 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr, goto destroy_qp; init_completion(&lnk->llc_confirm); init_completion(&lnk->llc_confirm_resp); + init_completion(&lnk->llc_add); + init_completion(&lnk->llc_add_resp); smc->conn.lgr = lgr; rwlock_init(&lgr->conns_lock); @@ -306,6 +308,15 @@ void smc_lgr_free(struct smc_link_group *lgr) kfree(lgr); } +void smc_lgr_forget(struct smc_link_group *lgr) +{ + spin_lock_bh(&smc_lgr_list.lock); + /* do not use this link group for new connections */ + if (!list_empty(&lgr->list)) + list_del_init(&lgr->list); + spin_unlock_bh(&smc_lgr_list.lock); +} + /* terminate linkgroup abnormally */ void smc_lgr_terminate(struct smc_link_group *lgr) { @@ -313,15 +324,7 @@ void smc_lgr_terminate(struct smc_link_group *lgr) struct smc_sock *smc; struct rb_node *node; - spin_lock_bh(&smc_lgr_list.lock); - if (list_empty(&lgr->list)) { - /* termination already triggered */ - spin_unlock_bh(&smc_lgr_list.lock); - return; - } - /* do not use this link group for new connections */ - list_del_init(&lgr->list); - spin_unlock_bh(&smc_lgr_list.lock); + smc_lgr_forget(lgr); write_lock_bh(&lgr->conns_lock); node = rb_first(&lgr->conns_all); @@ -400,7 +403,7 @@ static int smc_link_determine_gid(struct smc_link_group *lgr) } /* create a new SMC connection (and a new link group if necessary) */ -int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr, +int smc_conn_create(struct smc_sock *smc, struct smc_ib_device *smcibdev, u8 ibport, struct smc_clc_msg_local *lcl, int srv_first_contact) { @@ -457,7 +460,7 @@ int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr, create: if (local_contact == SMC_FIRST_CONTACT) { - rc = smc_lgr_create(smc, peer_in_addr, smcibdev, ibport, + rc = smc_lgr_create(smc, smcibdev, ibport, lcl->id_for_peer, vlan_id); if (rc) goto out; @@ -698,27 +701,55 @@ static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) return -ENOSPC; } -/* save rkey and dma_addr received from peer during clc handshake */ -int smc_rmb_rtoken_handling(struct smc_connection *conn, - struct smc_clc_msg_accept_confirm *clc) +/* add a new rtoken from peer */ +int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey) { - u64 dma_addr = be64_to_cpu(clc->rmb_dma_addr); - struct smc_link_group *lgr = conn->lgr; - u32 rkey = ntohl(clc->rmb_rkey); + u64 dma_addr = be64_to_cpu(nw_vaddr); + u32 rkey = ntohl(nw_rkey); int i; for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) && (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) && test_bit(i, lgr->rtokens_used_mask)) { - conn->rtoken_idx = i; + /* already in list */ + return i; + } + } + i = smc_rmb_reserve_rtoken_idx(lgr); + if (i < 0) + return i; + lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey; + lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr; + return i; +} + +/* delete an rtoken */ +int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey) +{ + u32 rkey = ntohl(nw_rkey); + int i; + + for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { + if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey && + test_bit(i, lgr->rtokens_used_mask)) { + lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0; + lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0; + + clear_bit(i, lgr->rtokens_used_mask); return 0; } } - conn->rtoken_idx = smc_rmb_reserve_rtoken_idx(lgr); + return -ENOENT; +} + +/* save rkey and dma_addr received from peer during clc handshake */ +int smc_rmb_rtoken_handling(struct smc_connection *conn, + struct smc_clc_msg_accept_confirm *clc) +{ + conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr, + clc->rmb_rkey); if (conn->rtoken_idx < 0) return conn->rtoken_idx; - lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey = rkey; - lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr = dma_addr; return 0; } diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index fe691bf9af91..07e2a393e6d9 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -32,6 +32,12 @@ enum smc_lgr_role { /* possible roles of a link group */ SMC_SERV /* server */ }; +enum smc_link_state { /* possible states of a link */ + SMC_LNK_INACTIVE, /* link is inactive */ + SMC_LNK_ACTIVATING, /* link is being activated */ + SMC_LNK_ACTIVE /* link is active */ +}; + #define SMC_WR_BUF_SIZE 48 /* size of work request buffer */ struct smc_wr_buf { @@ -87,8 +93,14 @@ struct smc_link { u8 peer_mac[ETH_ALEN]; /* = gid[8:10||13:15] */ u8 peer_gid[sizeof(union ib_gid)]; /* gid of peer*/ u8 link_id; /* unique # within link group */ + + enum smc_link_state state; /* state of link */ struct completion llc_confirm; /* wait for rx of conf link */ struct completion llc_confirm_resp; /* wait 4 rx of cnf lnk rsp */ + int llc_confirm_rc; /* rc from confirm link msg */ + int llc_confirm_resp_rc; /* rc from conf_resp msg */ + struct completion llc_add; /* wait for rx of add link */ + struct completion llc_add_resp; /* wait for rx of add link rsp*/ }; /* For now we just allow one parallel link per link group. The SMC protocol @@ -124,7 +136,6 @@ struct smc_rtoken { /* address/key of remote RMB */ struct smc_link_group { struct list_head list; enum smc_lgr_role role; /* client or server */ - __be32 daddr; /* destination ip address */ struct smc_link lnk[SMC_LINKS_PER_LGR_MAX]; /* smc link */ char peer_systemid[SMC_SYSTEMID_LEN]; /* unique system_id of peer */ @@ -186,10 +197,13 @@ struct smc_sock; struct smc_clc_msg_accept_confirm; void smc_lgr_free(struct smc_link_group *lgr); +void smc_lgr_forget(struct smc_link_group *lgr); void smc_lgr_terminate(struct smc_link_group *lgr); int smc_buf_create(struct smc_sock *smc); int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_clc_msg_accept_confirm *clc); +int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey); +int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey); void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn); void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn); void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn); diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 92fe4cc8c82c..54e8d6dc9201 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -4,9 +4,6 @@ * * Link Layer Control (LLC) * - * For now, we only support the necessary "confirm link" functionality - * which happens for the first RoCE link after successful CLC handshake. - * * Copyright IBM Corp. 2016 * * Author(s): Klaus Wacker <Klaus.Wacker@de.ibm.com> @@ -21,6 +18,122 @@ #include "smc_clc.h" #include "smc_llc.h" +#define SMC_LLC_DATA_LEN 40 + +struct smc_llc_hdr { + struct smc_wr_rx_hdr common; + u8 length; /* 44 */ +#if defined(__BIG_ENDIAN_BITFIELD) + u8 reserved:4, + add_link_rej_rsn:4; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u8 add_link_rej_rsn:4, + reserved:4; +#endif + u8 flags; +}; + +#define SMC_LLC_FLAG_NO_RMBE_EYEC 0x03 + +struct smc_llc_msg_confirm_link { /* type 0x01 */ + struct smc_llc_hdr hd; + u8 sender_mac[ETH_ALEN]; + u8 sender_gid[SMC_GID_SIZE]; + u8 sender_qp_num[3]; + u8 link_num; + u8 link_uid[SMC_LGR_ID_SIZE]; + u8 max_links; + u8 reserved[9]; +}; + +#define SMC_LLC_FLAG_ADD_LNK_REJ 0x40 +#define SMC_LLC_REJ_RSN_NO_ALT_PATH 1 + +#define SMC_LLC_ADD_LNK_MAX_LINKS 2 + +struct smc_llc_msg_add_link { /* type 0x02 */ + struct smc_llc_hdr hd; + u8 sender_mac[ETH_ALEN]; + u8 reserved2[2]; + u8 sender_gid[SMC_GID_SIZE]; + u8 sender_qp_num[3]; + u8 link_num; + u8 flags2; /* QP mtu */ + u8 initial_psn[3]; + u8 reserved[8]; +}; + +#define SMC_LLC_FLAG_DEL_LINK_ALL 0x40 +#define SMC_LLC_FLAG_DEL_LINK_ORDERLY 0x20 + +struct smc_llc_msg_del_link { /* type 0x04 */ + struct smc_llc_hdr hd; + u8 link_num; + __be32 reason; + u8 reserved[35]; +} __packed; /* format defined in RFC7609 */ + +struct smc_llc_msg_test_link { /* type 0x07 */ + struct smc_llc_hdr hd; + u8 user_data[16]; + u8 reserved[24]; +}; + +struct smc_rmb_rtoken { + union { + u8 num_rkeys; /* first rtoken byte of CONFIRM LINK msg */ + /* is actually the num of rtokens, first */ + /* rtoken is always for the current link */ + u8 link_id; /* link id of the rtoken */ + }; + __be32 rmb_key; + __be64 rmb_vaddr; +} __packed; /* format defined in RFC7609 */ + +#define SMC_LLC_RKEYS_PER_MSG 3 + +struct smc_llc_msg_confirm_rkey { /* type 0x06 */ + struct smc_llc_hdr hd; + struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG]; + u8 reserved; +}; + +struct smc_llc_msg_confirm_rkey_cont { /* type 0x08 */ + struct smc_llc_hdr hd; + u8 num_rkeys; + struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG]; +}; + +#define SMC_LLC_DEL_RKEY_MAX 8 +#define SMC_LLC_FLAG_RKEY_NEG 0x20 + +struct smc_llc_msg_delete_rkey { /* type 0x09 */ + struct smc_llc_hdr hd; + u8 num_rkeys; + u8 err_mask; + u8 reserved[2]; + __be32 rkey[8]; + u8 reserved2[4]; +}; + +union smc_llc_msg { + struct smc_llc_msg_confirm_link confirm_link; + struct smc_llc_msg_add_link add_link; + struct smc_llc_msg_del_link delete_link; + + struct smc_llc_msg_confirm_rkey confirm_rkey; + struct smc_llc_msg_confirm_rkey_cont confirm_rkey_cont; + struct smc_llc_msg_delete_rkey delete_rkey; + + struct smc_llc_msg_test_link test_link; + struct { + struct smc_llc_hdr hdr; + u8 data[SMC_LLC_DATA_LEN]; + } raw; +}; + +#define SMC_LLC_FLAG_RESP 0x80 + /********************************** send *************************************/ struct smc_llc_tx_pend { @@ -87,6 +200,7 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[], memset(confllc, 0, sizeof(*confllc)); confllc->hd.common.type = SMC_LLC_CONFIRM_LINK; confllc->hd.length = sizeof(struct smc_llc_msg_confirm_link); + confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC; if (reqresp == SMC_LLC_RESP) confllc->hd.flags |= SMC_LLC_FLAG_RESP; memcpy(confllc->sender_mac, mac, ETH_ALEN); @@ -94,7 +208,104 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[], hton24(confllc->sender_qp_num, link->roce_qp->qp_num); /* confllc->link_num = SMC_SINGLE_LINK; already done by memset above */ memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE); - confllc->max_links = SMC_LINKS_PER_LGR_MAX; + confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS; /* enforce peer resp. */ + /* send llc message */ + rc = smc_wr_tx_send(link, pend); + return rc; +} + +/* send ADD LINK request or response */ +int smc_llc_send_add_link(struct smc_link *link, u8 mac[], + union ib_gid *gid, + enum smc_llc_reqresp reqresp) +{ + struct smc_llc_msg_add_link *addllc; + struct smc_wr_tx_pend_priv *pend; + struct smc_wr_buf *wr_buf; + int rc; + + rc = smc_llc_add_pending_send(link, &wr_buf, &pend); + if (rc) + return rc; + addllc = (struct smc_llc_msg_add_link *)wr_buf; + memset(addllc, 0, sizeof(*addllc)); + addllc->hd.common.type = SMC_LLC_ADD_LINK; + addllc->hd.length = sizeof(struct smc_llc_msg_add_link); + if (reqresp == SMC_LLC_RESP) { + addllc->hd.flags |= SMC_LLC_FLAG_RESP; + /* always reject more links for now */ + addllc->hd.flags |= SMC_LLC_FLAG_ADD_LNK_REJ; + addllc->hd.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH; + } + memcpy(addllc->sender_mac, mac, ETH_ALEN); + memcpy(addllc->sender_gid, gid, SMC_GID_SIZE); + /* send llc message */ + rc = smc_wr_tx_send(link, pend); + return rc; +} + +/* send DELETE LINK request or response */ +int smc_llc_send_delete_link(struct smc_link *link, + enum smc_llc_reqresp reqresp) +{ + struct smc_llc_msg_del_link *delllc; + struct smc_wr_tx_pend_priv *pend; + struct smc_wr_buf *wr_buf; + int rc; + + rc = smc_llc_add_pending_send(link, &wr_buf, &pend); + if (rc) + return rc; + delllc = (struct smc_llc_msg_del_link *)wr_buf; + memset(delllc, 0, sizeof(*delllc)); + delllc->hd.common.type = SMC_LLC_DELETE_LINK; + delllc->hd.length = sizeof(struct smc_llc_msg_add_link); + if (reqresp == SMC_LLC_RESP) + delllc->hd.flags |= SMC_LLC_FLAG_RESP; + /* DEL_LINK_ALL because only 1 link supported */ + delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL; + delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY; + delllc->link_num = link->link_id; + /* send llc message */ + rc = smc_wr_tx_send(link, pend); + return rc; +} + +/* send LLC test link request or response */ +int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16], + enum smc_llc_reqresp reqresp) +{ + struct smc_llc_msg_test_link *testllc; + struct smc_wr_tx_pend_priv *pend; + struct smc_wr_buf *wr_buf; + int rc; + + rc = smc_llc_add_pending_send(link, &wr_buf, &pend); + if (rc) + return rc; + testllc = (struct smc_llc_msg_test_link *)wr_buf; + memset(testllc, 0, sizeof(*testllc)); + testllc->hd.common.type = SMC_LLC_TEST_LINK; + testllc->hd.length = sizeof(struct smc_llc_msg_test_link); + if (reqresp == SMC_LLC_RESP) + testllc->hd.flags |= SMC_LLC_FLAG_RESP; + memcpy(testllc->user_data, user_data, sizeof(testllc->user_data)); + /* send llc message */ + rc = smc_wr_tx_send(link, pend); + return rc; +} + +/* send a prepared message */ +static int smc_llc_send_message(struct smc_link *link, void *llcbuf, int llclen) +{ + struct smc_wr_tx_pend_priv *pend; + struct smc_wr_buf *wr_buf; + int rc; + + rc = smc_llc_add_pending_send(link, &wr_buf, &pend); + if (rc) + return rc; + memcpy(wr_buf, llcbuf, llclen); /* send llc message */ rc = smc_wr_tx_send(link, pend); return rc; @@ -106,19 +317,156 @@ static void smc_llc_rx_confirm_link(struct smc_link *link, struct smc_llc_msg_confirm_link *llc) { struct smc_link_group *lgr; + int conf_rc; lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); + + /* RMBE eyecatchers are not supported */ + if (llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC) + conf_rc = 0; + else + conf_rc = ENOTSUPP; + if (llc->hd.flags & SMC_LLC_FLAG_RESP) { - if (lgr->role == SMC_SERV) + if (lgr->role == SMC_SERV && + link->state == SMC_LNK_ACTIVATING) { + link->llc_confirm_resp_rc = conf_rc; complete(&link->llc_confirm_resp); + } } else { - if (lgr->role == SMC_CLNT) { + if (lgr->role == SMC_CLNT && + link->state == SMC_LNK_ACTIVATING) { + link->llc_confirm_rc = conf_rc; link->link_id = llc->link_num; complete(&link->llc_confirm); } } } +static void smc_llc_rx_add_link(struct smc_link *link, + struct smc_llc_msg_add_link *llc) +{ + struct smc_link_group *lgr = container_of(link, struct smc_link_group, + lnk[SMC_SINGLE_LINK]); + + if (llc->hd.flags & SMC_LLC_FLAG_RESP) { + if (link->state == SMC_LNK_ACTIVATING) + complete(&link->llc_add_resp); + } else { + if (link->state == SMC_LNK_ACTIVATING) { + complete(&link->llc_add); + return; + } + + if (lgr->role == SMC_SERV) { + smc_llc_send_add_link(link, + link->smcibdev->mac[link->ibport - 1], + &link->smcibdev->gid[link->ibport - 1], + SMC_LLC_REQ); + + } else { + smc_llc_send_add_link(link, + link->smcibdev->mac[link->ibport - 1], + &link->smcibdev->gid[link->ibport - 1], + SMC_LLC_RESP); + } + } +} + +static void smc_llc_rx_delete_link(struct smc_link *link, + struct smc_llc_msg_del_link *llc) +{ + struct smc_link_group *lgr = container_of(link, struct smc_link_group, + lnk[SMC_SINGLE_LINK]); + + if (llc->hd.flags & SMC_LLC_FLAG_RESP) { + if (lgr->role == SMC_SERV) + smc_lgr_terminate(lgr); + } else { + if (lgr->role == SMC_SERV) { + smc_lgr_forget(lgr); + smc_llc_send_delete_link(link, SMC_LLC_REQ); + } else { + smc_llc_send_delete_link(link, SMC_LLC_RESP); + smc_lgr_terminate(lgr); + } + } +} + +static void smc_llc_rx_test_link(struct smc_link *link, + struct smc_llc_msg_test_link *llc) +{ + if (llc->hd.flags & SMC_LLC_FLAG_RESP) { + /* unused as long as we don't send this type of msg */ + } else { + smc_llc_send_test_link(link, llc->user_data, SMC_LLC_RESP); + } +} + +static void smc_llc_rx_confirm_rkey(struct smc_link *link, + struct smc_llc_msg_confirm_rkey *llc) +{ + struct smc_link_group *lgr; + int rc; + + lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); + + if (llc->hd.flags & SMC_LLC_FLAG_RESP) { + /* unused as long as we don't send this type of msg */ + } else { + rc = smc_rtoken_add(lgr, + llc->rtoken[0].rmb_vaddr, + llc->rtoken[0].rmb_key); + + /* ignore rtokens for other links, we have only one link */ + + llc->hd.flags |= SMC_LLC_FLAG_RESP; + if (rc < 0) + llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG; + smc_llc_send_message(link, (void *)llc, sizeof(*llc)); + } +} + +static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link, + struct smc_llc_msg_confirm_rkey_cont *llc) +{ + if (llc->hd.flags & SMC_LLC_FLAG_RESP) { + /* unused as long as we don't send this type of msg */ + } else { + /* ignore rtokens for other links, we have only one link */ + llc->hd.flags |= SMC_LLC_FLAG_RESP; + smc_llc_send_message(link, (void *)llc, sizeof(*llc)); + } +} + +static void smc_llc_rx_delete_rkey(struct smc_link *link, + struct smc_llc_msg_delete_rkey *llc) +{ + struct smc_link_group *lgr; + u8 err_mask = 0; + int i, max; + + lgr = container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]); + + if (llc->hd.flags & SMC_LLC_FLAG_RESP) { + /* unused as long as we don't send this type of msg */ + } else { + max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX); + for (i = 0; i < max; i++) { + if (smc_rtoken_delete(lgr, llc->rkey[i])) + err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i); + } + + if (err_mask) { + llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG; + llc->err_mask = err_mask; + } + + llc->hd.flags |= SMC_LLC_FLAG_RESP; + smc_llc_send_message(link, (void *)llc, sizeof(*llc)); + } +} + static void smc_llc_rx_handler(struct ib_wc *wc, void *buf) { struct smc_link *link = (struct smc_link *)wc->qp->qp_context; @@ -128,8 +476,30 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf) return; /* short message */ if (llc->raw.hdr.length != sizeof(*llc)) return; /* invalid message */ - if (llc->raw.hdr.common.type == SMC_LLC_CONFIRM_LINK) + + switch (llc->raw.hdr.common.type) { + case SMC_LLC_TEST_LINK: + smc_llc_rx_test_link(link, &llc->test_link); + break; + case SMC_LLC_CONFIRM_LINK: smc_llc_rx_confirm_link(link, &llc->confirm_link); + break; + case SMC_LLC_ADD_LINK: + smc_llc_rx_add_link(link, &llc->add_link); + break; + case SMC_LLC_DELETE_LINK: + smc_llc_rx_delete_link(link, &llc->delete_link); + break; + case SMC_LLC_CONFIRM_RKEY: + smc_llc_rx_confirm_rkey(link, &llc->confirm_rkey); + break; + case SMC_LLC_CONFIRM_RKEY_CONT: + smc_llc_rx_confirm_rkey_cont(link, &llc->confirm_rkey_cont); + break; + case SMC_LLC_DELETE_RKEY: + smc_llc_rx_delete_rkey(link, &llc->delete_rkey); + break; + } } /***************************** init, exit, misc ******************************/ @@ -140,6 +510,30 @@ static struct smc_wr_rx_handler smc_llc_rx_handlers[] = { .type = SMC_LLC_CONFIRM_LINK }, { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_TEST_LINK + }, + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_ADD_LINK + }, + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_DELETE_LINK + }, + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_CONFIRM_RKEY + }, + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_CONFIRM_RKEY_CONT + }, + { + .handler = smc_llc_rx_handler, + .type = SMC_LLC_DELETE_RKEY + }, + { .handler = NULL, } }; diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index 51b27ce90dbd..e4a7d5e234d5 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -18,6 +18,7 @@ #define SMC_LLC_FLAG_RESP 0x80 #define SMC_LLC_WAIT_FIRST_TIME (5 * HZ) +#define SMC_LLC_WAIT_TIME (2 * HZ) enum smc_llc_reqresp { SMC_LLC_REQ, @@ -26,39 +27,23 @@ enum smc_llc_reqresp { enum smc_llc_msg_type { SMC_LLC_CONFIRM_LINK = 0x01, -}; - -#define SMC_LLC_DATA_LEN 40 - -struct smc_llc_hdr { - struct smc_wr_rx_hdr common; - u8 length; /* 44 */ - u8 reserved; - u8 flags; -}; - -struct smc_llc_msg_confirm_link { /* type 0x01 */ - struct smc_llc_hdr hd; - u8 sender_mac[ETH_ALEN]; - u8 sender_gid[SMC_GID_SIZE]; - u8 sender_qp_num[3]; - u8 link_num; - u8 link_uid[SMC_LGR_ID_SIZE]; - u8 max_links; - u8 reserved[9]; -}; - -union smc_llc_msg { - struct smc_llc_msg_confirm_link confirm_link; - struct { - struct smc_llc_hdr hdr; - u8 data[SMC_LLC_DATA_LEN]; - } raw; + SMC_LLC_ADD_LINK = 0x02, + SMC_LLC_DELETE_LINK = 0x04, + SMC_LLC_CONFIRM_RKEY = 0x06, + SMC_LLC_TEST_LINK = 0x07, + SMC_LLC_CONFIRM_RKEY_CONT = 0x08, + SMC_LLC_DELETE_RKEY = 0x09, }; /* transmit */ int smc_llc_send_confirm_link(struct smc_link *lnk, u8 mac[], union ib_gid *gid, enum smc_llc_reqresp reqresp); +int smc_llc_send_add_link(struct smc_link *link, u8 mac[], union ib_gid *gid, + enum smc_llc_reqresp reqresp); +int smc_llc_send_delete_link(struct smc_link *link, + enum smc_llc_reqresp reqresp); +int smc_llc_send_test_link(struct smc_link *lnk, u8 user_data[16], + enum smc_llc_reqresp reqresp); int smc_llc_init(void) __init; #endif /* SMC_LLC_H */ diff --git a/net/socket.c b/net/socket.c index ab58e57c09ca..d9a1ac233b35 100644 --- a/net/socket.c +++ b/net/socket.c @@ -233,7 +233,7 @@ static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen, return __put_user(klen, ulen); } -static struct kmem_cache *sock_inode_cachep __read_mostly; +static struct kmem_cache *sock_inode_cachep __ro_after_init; static struct inode *sock_alloc_inode(struct super_block *sb) { @@ -2289,10 +2289,12 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, if (!sock) return err; - err = sock_error(sock->sk); - if (err) { - datagrams = err; - goto out_put; + if (likely(!(flags & MSG_ERRQUEUE))) { + err = sock_error(sock->sk); + if (err) { + datagrams = err; + goto out_put; + } } entry = mmsg; diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index ec3fc8d88e87..2c2a587e0942 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -43,6 +43,7 @@ hostprogs-y += xdp_redirect_cpu hostprogs-y += xdp_monitor hostprogs-y += xdp_rxq_info hostprogs-y += syscall_tp +hostprogs-y += cpustat # Libbpf dependencies LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o @@ -93,6 +94,7 @@ xdp_redirect_cpu-objs := bpf_load.o $(LIBBPF) xdp_redirect_cpu_user.o xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o +cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -144,6 +146,7 @@ always += xdp_monitor_kern.o always += xdp_rxq_info_kern.o always += xdp2skb_meta_kern.o always += syscall_tp_kern.o +always += cpustat_kern.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(srctree)/tools/lib/ @@ -188,6 +191,7 @@ HOSTLOADLIBES_xdp_redirect_cpu += -lelf HOSTLOADLIBES_xdp_monitor += -lelf HOSTLOADLIBES_xdp_rxq_info += -lelf HOSTLOADLIBES_syscall_tp += -lelf +HOSTLOADLIBES_cpustat += -lelf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/cpustat_kern.c b/samples/bpf/cpustat_kern.c new file mode 100644 index 000000000000..68c84da065b1 --- /dev/null +++ b/samples/bpf/cpustat_kern.c @@ -0,0 +1,281 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/version.h> +#include <linux/ptrace.h> +#include <uapi/linux/bpf.h> +#include "bpf_helpers.h" + +/* + * The CPU number, cstate number and pstate number are based + * on 96boards Hikey with octa CA53 CPUs. + * + * Every CPU have three idle states for cstate: + * WFI, CPU_OFF, CLUSTER_OFF + * + * Every CPU have 5 operating points: + * 208MHz, 432MHz, 729MHz, 960MHz, 1200MHz + * + * This code is based on these assumption and other platforms + * need to adjust these definitions. + */ +#define MAX_CPU 8 +#define MAX_PSTATE_ENTRIES 5 +#define MAX_CSTATE_ENTRIES 3 + +static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 }; + +/* + * my_map structure is used to record cstate and pstate index and + * timestamp (Idx, Ts), when new event incoming we need to update + * combination for new state index and timestamp (Idx`, Ts`). + * + * Based on (Idx, Ts) and (Idx`, Ts`) we can calculate the time + * interval for the previous state: Duration(Idx) = Ts` - Ts. + * + * Every CPU has one below array for recording state index and + * timestamp, and record for cstate and pstate saperately: + * + * +--------------------------+ + * | cstate timestamp | + * +--------------------------+ + * | cstate index | + * +--------------------------+ + * | pstate timestamp | + * +--------------------------+ + * | pstate index | + * +--------------------------+ + */ +#define MAP_OFF_CSTATE_TIME 0 +#define MAP_OFF_CSTATE_IDX 1 +#define MAP_OFF_PSTATE_TIME 2 +#define MAP_OFF_PSTATE_IDX 3 +#define MAP_OFF_NUM 4 + +struct bpf_map_def SEC("maps") my_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u64), + .max_entries = MAX_CPU * MAP_OFF_NUM, +}; + +/* cstate_duration records duration time for every idle state per CPU */ +struct bpf_map_def SEC("maps") cstate_duration = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u64), + .max_entries = MAX_CPU * MAX_CSTATE_ENTRIES, +}; + +/* pstate_duration records duration time for every operating point per CPU */ +struct bpf_map_def SEC("maps") pstate_duration = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u64), + .max_entries = MAX_CPU * MAX_PSTATE_ENTRIES, +}; + +/* + * The trace events for cpu_idle and cpu_frequency are taken from: + * /sys/kernel/debug/tracing/events/power/cpu_idle/format + * /sys/kernel/debug/tracing/events/power/cpu_frequency/format + * + * These two events have same format, so define one common structure. + */ +struct cpu_args { + u64 pad; + u32 state; + u32 cpu_id; +}; + +/* calculate pstate index, returns MAX_PSTATE_ENTRIES for failure */ +static u32 find_cpu_pstate_idx(u32 frequency) +{ + u32 i; + + for (i = 0; i < sizeof(cpu_opps) / sizeof(u32); i++) { + if (frequency == cpu_opps[i]) + return i; + } + + return i; +} + +SEC("tracepoint/power/cpu_idle") +int bpf_prog1(struct cpu_args *ctx) +{ + u64 *cts, *pts, *cstate, *pstate, prev_state, cur_ts, delta; + u32 key, cpu, pstate_idx; + u64 *val; + + if (ctx->cpu_id > MAX_CPU) + return 0; + + cpu = ctx->cpu_id; + + key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_TIME; + cts = bpf_map_lookup_elem(&my_map, &key); + if (!cts) + return 0; + + key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX; + cstate = bpf_map_lookup_elem(&my_map, &key); + if (!cstate) + return 0; + + key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME; + pts = bpf_map_lookup_elem(&my_map, &key); + if (!pts) + return 0; + + key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX; + pstate = bpf_map_lookup_elem(&my_map, &key); + if (!pstate) + return 0; + + prev_state = *cstate; + *cstate = ctx->state; + + if (!*cts) { + *cts = bpf_ktime_get_ns(); + return 0; + } + + cur_ts = bpf_ktime_get_ns(); + delta = cur_ts - *cts; + *cts = cur_ts; + + /* + * When state doesn't equal to (u32)-1, the cpu will enter + * one idle state; for this case we need to record interval + * for the pstate. + * + * OPP2 + * +---------------------+ + * OPP1 | | + * ---------+ | + * | Idle state + * +--------------- + * + * |<- pstate duration ->| + * ^ ^ + * pts cur_ts + */ + if (ctx->state != (u32)-1) { + + /* record pstate after have first cpu_frequency event */ + if (!*pts) + return 0; + + delta = cur_ts - *pts; + + pstate_idx = find_cpu_pstate_idx(*pstate); + if (pstate_idx >= MAX_PSTATE_ENTRIES) + return 0; + + key = cpu * MAX_PSTATE_ENTRIES + pstate_idx; + val = bpf_map_lookup_elem(&pstate_duration, &key); + if (val) + __sync_fetch_and_add((long *)val, delta); + + /* + * When state equal to (u32)-1, the cpu just exits from one + * specific idle state; for this case we need to record + * interval for the pstate. + * + * OPP2 + * -----------+ + * | OPP1 + * | +----------- + * | Idle state | + * +---------------------+ + * + * |<- cstate duration ->| + * ^ ^ + * cts cur_ts + */ + } else { + + key = cpu * MAX_CSTATE_ENTRIES + prev_state; + val = bpf_map_lookup_elem(&cstate_duration, &key); + if (val) + __sync_fetch_and_add((long *)val, delta); + } + + /* Update timestamp for pstate as new start time */ + if (*pts) + *pts = cur_ts; + + return 0; +} + +SEC("tracepoint/power/cpu_frequency") +int bpf_prog2(struct cpu_args *ctx) +{ + u64 *pts, *cstate, *pstate, prev_state, cur_ts, delta; + u32 key, cpu, pstate_idx; + u64 *val; + + cpu = ctx->cpu_id; + + key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME; + pts = bpf_map_lookup_elem(&my_map, &key); + if (!pts) + return 0; + + key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX; + pstate = bpf_map_lookup_elem(&my_map, &key); + if (!pstate) + return 0; + + key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX; + cstate = bpf_map_lookup_elem(&my_map, &key); + if (!cstate) + return 0; + + prev_state = *pstate; + *pstate = ctx->state; + + if (!*pts) { + *pts = bpf_ktime_get_ns(); + return 0; + } + + cur_ts = bpf_ktime_get_ns(); + delta = cur_ts - *pts; + *pts = cur_ts; + + /* When CPU is in idle, bail out to skip pstate statistics */ + if (*cstate != (u32)(-1)) + return 0; + + /* + * The cpu changes to another different OPP (in below diagram + * change frequency from OPP3 to OPP1), need recording interval + * for previous frequency OPP3 and update timestamp as start + * time for new frequency OPP1. + * + * OPP3 + * +---------------------+ + * OPP2 | | + * ---------+ | + * | OPP1 + * +--------------- + * + * |<- pstate duration ->| + * ^ ^ + * pts cur_ts + */ + pstate_idx = find_cpu_pstate_idx(*pstate); + if (pstate_idx >= MAX_PSTATE_ENTRIES) + return 0; + + key = cpu * MAX_PSTATE_ENTRIES + pstate_idx; + val = bpf_map_lookup_elem(&pstate_duration, &key); + if (val) + __sync_fetch_and_add((long *)val, delta); + + return 0; +} + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/cpustat_user.c b/samples/bpf/cpustat_user.c new file mode 100644 index 000000000000..2b4cd1ae57c5 --- /dev/null +++ b/samples/bpf/cpustat_user.c @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <sched.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> +#include <linux/bpf.h> +#include <locale.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <sys/wait.h> + +#include "libbpf.h" +#include "bpf_load.h" + +#define MAX_CPU 8 +#define MAX_PSTATE_ENTRIES 5 +#define MAX_CSTATE_ENTRIES 3 +#define MAX_STARS 40 + +#define CPUFREQ_MAX_SYSFS_PATH "/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq" +#define CPUFREQ_LOWEST_FREQ "208000" +#define CPUFREQ_HIGHEST_FREQ "12000000" + +struct cpu_stat_data { + unsigned long cstate[MAX_CSTATE_ENTRIES]; + unsigned long pstate[MAX_PSTATE_ENTRIES]; +}; + +static struct cpu_stat_data stat_data[MAX_CPU]; + +static void cpu_stat_print(void) +{ + int i, j; + char state_str[sizeof("cstate-9")]; + struct cpu_stat_data *data; + + /* Clear screen */ + printf("\033[2J"); + + /* Header */ + printf("\nCPU states statistics:\n"); + printf("%-10s ", "state(ms)"); + + for (i = 0; i < MAX_CSTATE_ENTRIES; i++) { + sprintf(state_str, "cstate-%d", i); + printf("%-11s ", state_str); + } + + for (i = 0; i < MAX_PSTATE_ENTRIES; i++) { + sprintf(state_str, "pstate-%d", i); + printf("%-11s ", state_str); + } + + printf("\n"); + + for (j = 0; j < MAX_CPU; j++) { + data = &stat_data[j]; + + printf("CPU-%-6d ", j); + for (i = 0; i < MAX_CSTATE_ENTRIES; i++) + printf("%-11ld ", data->cstate[i] / 1000000); + + for (i = 0; i < MAX_PSTATE_ENTRIES; i++) + printf("%-11ld ", data->pstate[i] / 1000000); + + printf("\n"); + } +} + +static void cpu_stat_update(int cstate_fd, int pstate_fd) +{ + unsigned long key, value; + int c, i; + + for (c = 0; c < MAX_CPU; c++) { + for (i = 0; i < MAX_CSTATE_ENTRIES; i++) { + key = c * MAX_CSTATE_ENTRIES + i; + bpf_map_lookup_elem(cstate_fd, &key, &value); + stat_data[c].cstate[i] = value; + } + + for (i = 0; i < MAX_PSTATE_ENTRIES; i++) { + key = c * MAX_PSTATE_ENTRIES + i; + bpf_map_lookup_elem(pstate_fd, &key, &value); + stat_data[c].pstate[i] = value; + } + } +} + +/* + * This function is copied from 'idlestat' tool function + * idlestat_wake_all() in idlestate.c. + * + * It sets the self running task affinity to cpus one by one so can wake up + * the specific CPU to handle scheduling; this results in all cpus can be + * waken up once and produce ftrace event 'trace_cpu_idle'. + */ +static int cpu_stat_inject_cpu_idle_event(void) +{ + int rcpu, i, ret; + cpu_set_t cpumask; + cpu_set_t original_cpumask; + + ret = sysconf(_SC_NPROCESSORS_CONF); + if (ret < 0) + return -1; + + rcpu = sched_getcpu(); + if (rcpu < 0) + return -1; + + /* Keep track of the CPUs we will run on */ + sched_getaffinity(0, sizeof(original_cpumask), &original_cpumask); + + for (i = 0; i < ret; i++) { + + /* Pointless to wake up ourself */ + if (i == rcpu) + continue; + + /* Pointless to wake CPUs we will not run on */ + if (!CPU_ISSET(i, &original_cpumask)) + continue; + + CPU_ZERO(&cpumask); + CPU_SET(i, &cpumask); + + sched_setaffinity(0, sizeof(cpumask), &cpumask); + } + + /* Enable all the CPUs of the original mask */ + sched_setaffinity(0, sizeof(original_cpumask), &original_cpumask); + return 0; +} + +/* + * It's possible to have no any frequency change for long time and cannot + * get ftrace event 'trace_cpu_frequency' for long period, this introduces + * big deviation for pstate statistics. + * + * To solve this issue, below code forces to set 'scaling_max_freq' to 208MHz + * for triggering ftrace event 'trace_cpu_frequency' and then recovery back to + * the maximum frequency value 1.2GHz. + */ +static int cpu_stat_inject_cpu_frequency_event(void) +{ + int len, fd; + + fd = open(CPUFREQ_MAX_SYSFS_PATH, O_WRONLY); + if (fd < 0) { + printf("failed to open scaling_max_freq, errno=%d\n", errno); + return fd; + } + + len = write(fd, CPUFREQ_LOWEST_FREQ, strlen(CPUFREQ_LOWEST_FREQ)); + if (len < 0) { + printf("failed to open scaling_max_freq, errno=%d\n", errno); + goto err; + } + + len = write(fd, CPUFREQ_HIGHEST_FREQ, strlen(CPUFREQ_HIGHEST_FREQ)); + if (len < 0) { + printf("failed to open scaling_max_freq, errno=%d\n", errno); + goto err; + } + +err: + close(fd); + return len; +} + +static void int_exit(int sig) +{ + cpu_stat_inject_cpu_idle_event(); + cpu_stat_inject_cpu_frequency_event(); + cpu_stat_update(map_fd[1], map_fd[2]); + cpu_stat_print(); + exit(0); +} + +int main(int argc, char **argv) +{ + char filename[256]; + int ret; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + ret = cpu_stat_inject_cpu_idle_event(); + if (ret < 0) + return 1; + + ret = cpu_stat_inject_cpu_frequency_event(); + if (ret < 0) + return 1; + + signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); + + while (1) { + cpu_stat_update(map_fd[1], map_fd[2]); + cpu_stat_print(); + sleep(5); + } + + return 0; +} diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c index d54e91eb6cbf..b701b5c21342 100644 --- a/samples/bpf/xdp_redirect_user.c +++ b/samples/bpf/xdp_redirect_user.c @@ -20,6 +20,7 @@ #include <string.h> #include <unistd.h> #include <libgen.h> +#include <sys/resource.h> #include "bpf_load.h" #include "bpf_util.h" @@ -75,6 +76,7 @@ static void usage(const char *prog) int main(int argc, char **argv) { + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; const char *optstr = "SN"; char filename[256]; int ret, opt, key = 0; @@ -98,6 +100,11 @@ int main(int argc, char **argv) return 1; } + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK)"); + return 1; + } + ifindex_in = strtoul(argv[optind], NULL, 0); ifindex_out = strtoul(argv[optind + 1], NULL, 0); printf("input: %d output: %d\n", ifindex_in, ifindex_out); diff --git a/samples/sockmap/Makefile b/samples/sockmap/Makefile index 73f1da4d116c..9bf2881bd11b 100644 --- a/samples/sockmap/Makefile +++ b/samples/sockmap/Makefile @@ -2,7 +2,7 @@ hostprogs-y := sockmap # Libbpf dependencies -LIBBPF := ../../tools/lib/bpf/bpf.o +LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(srctree)/tools/lib/ diff --git a/samples/sockmap/sockmap_user.c b/samples/sockmap/sockmap_user.c index 7c25c0c112bc..95a54a89a532 100644 --- a/samples/sockmap/sockmap_user.c +++ b/samples/sockmap/sockmap_user.c @@ -566,6 +566,7 @@ run: else fprintf(stderr, "unknown test\n"); out: + bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS); close(s1); close(s2); close(p1); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 8644d864e3c1..b4d7b6242a40 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -6743,6 +6743,7 @@ static void __net_exit selinux_nf_unregister(struct net *net) static struct pernet_operations selinux_net_ops = { .init = selinux_nf_register, .exit = selinux_nf_unregister, + .async = true, }; static int __init selinux_nf_ip_init(void) diff --git a/security/smack/smack_netfilter.c b/security/smack/smack_netfilter.c index e36d17835d4f..3f29c03162ca 100644 --- a/security/smack/smack_netfilter.c +++ b/security/smack/smack_netfilter.c @@ -89,6 +89,7 @@ static void __net_exit smack_nf_unregister(struct net *net) static struct pernet_operations smack_net_ops = { .init = smack_nf_register, .exit = smack_nf_unregister, + .async = true, }; static int __init smack_nf_ip_init(void) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 5c43c187f27c..8567a858b789 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -35,12 +35,14 @@ TEST_GEN_PROGS_EXTENDED = test_libbpf_open include ../lib.mk -BPFOBJ := $(OUTPUT)/libbpf.a cgroup_helpers.c +BPFOBJ := $(OUTPUT)/libbpf.a $(TEST_GEN_PROGS): $(BPFOBJ) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a +$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c + .PHONY: force # force a rebuild of BPFOBJ when its dependencies are updated diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c index 95a370f3d378..5d73db416460 100644 --- a/tools/testing/selftests/bpf/test_tcpbpf_user.c +++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c @@ -11,6 +11,8 @@ #include <linux/ptrace.h> #include <linux/bpf.h> #include <sys/ioctl.h> +#include <sys/time.h> +#include <sys/resource.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> @@ -42,6 +44,7 @@ static int bpf_find_map(const char *test, struct bpf_object *obj, int main(int argc, char **argv) { + struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY }; const char *file = "test_tcpbpf_kern.o"; struct tcpbpf_globals g = {0}; int cg_fd, prog_fd, map_fd; @@ -54,6 +57,9 @@ int main(int argc, char **argv) int pid; int rv; + if (setrlimit(RLIMIT_MEMLOCK, &limit) < 0) + perror("Unable to lift memlock rlimit"); + if (argc > 1 && strcmp(argv[1], "-d") == 0) debug_flag = true; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index c73592fa3d41..2164d218322e 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -57,6 +57,9 @@ #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0) #define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1) +#define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled" +static bool unpriv_disabled = false; + struct bpf_test { const char *descr; struct bpf_insn insns[MAX_INSNS]; @@ -11163,6 +11166,95 @@ static struct bpf_test tests[] = { .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, + { + "jit: lsh, rsh, arsh by 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 0xff), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 1), + BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x3fc, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 1), + BPF_ALU32_IMM(BPF_RSH, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0xff, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x7f, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, + }, + { + "jit: mov32 for ldimm64, 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_LD_IMM64(BPF_REG_1, 0xfeffffffffffffffULL), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32), + BPF_LD_IMM64(BPF_REG_2, 0xfeffffffULL), + BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, + }, + { + "jit: mov32 for ldimm64, 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_IMM64(BPF_REG_1, 0x1ffffffffULL), + BPF_LD_IMM64(BPF_REG_2, 0xffffffffULL), + BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, + }, + { + "jit: various mul tests", + .insns = { + BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL), + BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL), + BPF_LD_IMM64(BPF_REG_1, 0xefefefULL), + BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_1), + BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL), + BPF_ALU64_REG(BPF_MUL, BPF_REG_3, BPF_REG_1), + BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV32_REG(BPF_REG_2, BPF_REG_2), + BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL), + BPF_ALU32_REG(BPF_MUL, BPF_REG_0, BPF_REG_1), + BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL), + BPF_ALU32_REG(BPF_MUL, BPF_REG_3, BPF_REG_1), + BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_0, 0x952a7bbcULL), + BPF_LD_IMM64(BPF_REG_1, 0xfefefeULL), + BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL), + BPF_ALU32_REG(BPF_MUL, BPF_REG_2, BPF_REG_1), + BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, + }, + }; static int probe_filter_length(const struct bpf_insn *fp) @@ -11317,7 +11409,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv, goto fail_log; } if (!strstr(bpf_vlog, expected_err) && !reject_from_alignment) { - printf("FAIL\nUnexpected error message!\n"); + printf("FAIL\nUnexpected error message!\n\tEXP: %s\n\tRES: %s\n", + expected_err, bpf_vlog); goto fail_log; } } @@ -11401,9 +11494,20 @@ out: return ret; } +static void get_unpriv_disabled() +{ + char buf[2]; + FILE *fd; + + fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r"); + if (fgets(buf, 2, fd) == buf && atoi(buf)) + unpriv_disabled = true; + fclose(fd); +} + static int do_test(bool unpriv, unsigned int from, unsigned int to) { - int i, passes = 0, errors = 0; + int i, passes = 0, errors = 0, skips = 0; for (i = from; i < to; i++) { struct bpf_test *test = &tests[i]; @@ -11411,7 +11515,10 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to) /* Program types that are not supported by non-root we * skip right away. */ - if (!test->prog_type) { + if (!test->prog_type && unpriv_disabled) { + printf("#%d/u %s SKIP\n", i, test->descr); + skips++; + } else if (!test->prog_type) { if (!unpriv) set_admin(false); printf("#%d/u %s ", i, test->descr); @@ -11420,13 +11527,17 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to) set_admin(true); } - if (!unpriv) { + if (unpriv) { + printf("#%d/p %s SKIP\n", i, test->descr); + skips++; + } else { printf("#%d/p %s ", i, test->descr); do_test_single(test, false, &passes, &errors); } } - printf("Summary: %d PASSED, %d FAILED\n", passes, errors); + printf("Summary: %d PASSED, %d SKIPPED, %d FAILED\n", passes, + skips, errors); return errors ? EXIT_FAILURE : EXIT_SUCCESS; } @@ -11454,6 +11565,13 @@ int main(int argc, char **argv) } } + get_unpriv_disabled(); + if (unpriv && unpriv_disabled) { + printf("Cannot run as unprivileged user with sysctl %s.\n", + UNPRIV_SYSCTL); + return EXIT_FAILURE; + } + setrlimit(RLIMIT_MEMLOCK, unpriv ? &rlim : &rinf); return do_test(unpriv, from, to); } diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index d7c30d366935..229a038966e3 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -5,7 +5,7 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh -TEST_PROGS += fib_tests.sh +TEST_PROGS += fib_tests.sh fib-onlink-tests.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa diff --git a/tools/testing/selftests/net/forwarding/.gitignore b/tools/testing/selftests/net/forwarding/.gitignore new file mode 100644 index 000000000000..a793eef5b876 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/.gitignore @@ -0,0 +1 @@ +forwarding.config diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README new file mode 100644 index 000000000000..4a0964c42860 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/README @@ -0,0 +1,56 @@ +Motivation +========== + +One of the nice things about network namespaces is that they allow one +to easily create and test complex environments. + +Unfortunately, these namespaces can not be used with actual switching +ASICs, as their ports can not be migrated to other network namespaces +(NETIF_F_NETNS_LOCAL) and most of them probably do not support the +L1-separation provided by namespaces. + +However, a similar kind of flexibility can be achieved by using VRFs and +by looping the switch ports together. For example: + + br0 + + + vrf-h1 | vrf-h2 + + +---+----+ + + | | | | + 192.0.2.1/24 + + + + 192.0.2.2/24 + swp1 swp2 swp3 swp4 + + + + + + | | | | + +--------+ +--------+ + +The VRFs act as lightweight namespaces representing hosts connected to +the switch. + +This approach for testing switch ASICs has several advantages over the +traditional method that requires multiple physical machines, to name a +few: + +1. Only the device under test (DUT) is being tested without noise from +other system. + +2. Ability to easily provision complex topologies. Testing bridging +between 4-ports LAGs or 8-way ECMP requires many physical links that are +not always available. With the VRF-based approach one merely needs to +loopback more ports. + +These tests are written with switch ASICs in mind, but they can be run +on any Linux box using veth pairs to emulate physical loopbacks. + +Guidelines for Writing Tests +============================ + +o Where possible, reuse an existing topology for different tests instead + of recreating the same topology. +o Where possible, IPv6 and IPv4 addresses shall conform to RFC 3849 and + RFC 5737, respectively. +o Where possible, tests shall be written so that they can be reused by + multiple topologies and added to lib.sh. +o Checks shall be added to lib.sh for any external dependencies. +o Code shall be checked using ShellCheck [1] prior to submission. + +1. https://www.shellcheck.net/ diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh new file mode 100755 index 000000000000..75d922438bc9 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh @@ -0,0 +1,88 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NUM_NETIFS=4 +CHECK_TC="yes" +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64 +} + +switch_create() +{ + # 10 Seconds ageing time. + ip link add dev br0 type bridge vlan_filtering 1 ageing_time 1000 \ + mcast_snooping 0 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev $swp2 up +} + +switch_destroy() +{ + ip link set dev $swp2 down + ip link set dev $swp1 down + + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +ping_test $h1 192.0.2.2 +ping6_test $h1 2001:db8:1::2 +learning_test "br0" $swp1 $h1 $h2 +flood_test $swp2 $h1 $h2 + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config new file mode 100644 index 000000000000..5cd2aed97958 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/config @@ -0,0 +1,12 @@ +CONFIG_BRIDGE=m +CONFIG_VLAN_8021Q=m +CONFIG_BRIDGE_VLAN_FILTERING=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NET_VRF=m +CONFIG_BPF_SYSCALL=y +CONFIG_CGROUP_BPF=y +CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_ACT_GACT=m +CONFIG_VETH=m diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample new file mode 100644 index 000000000000..ab235c124f20 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample @@ -0,0 +1,31 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +############################################################################## +# Topology description. p1 looped back to p2, p3 to p4 and so on. +declare -A NETIFS + +NETIFS[p1]=veth0 +NETIFS[p2]=veth1 +NETIFS[p3]=veth2 +NETIFS[p4]=veth3 +NETIFS[p5]=veth4 +NETIFS[p6]=veth5 +NETIFS[p7]=veth6 +NETIFS[p8]=veth7 + +############################################################################## +# Defines + +# IPv4 ping utility name +PING=ping +# IPv6 ping utility name. Some distributions use 'ping' for IPv6. +PING6=ping6 +# Packet generator. Some distributions use 'mz'. +MZ=mausezahn +# Time to wait after interfaces participating in the test are all UP +WAIT_TIME=5 +# Whether to pause on failure or not. +PAUSE_ON_FAIL=no +# Whether to pause on cleanup or not. +PAUSE_ON_CLEANUP=no diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh new file mode 100644 index 000000000000..d0af52109360 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -0,0 +1,540 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +############################################################################## +# Defines + +# Can be overridden by the configuration file. +PING=${PING:=ping} +PING6=${PING6:=ping6} +MZ=${MZ:=mausezahn} +WAIT_TIME=${WAIT_TIME:=5} +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} +PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no} + +if [[ -f forwarding.config ]]; then + source forwarding.config +fi + +############################################################################## +# Sanity checks + +check_tc_version() +{ + tc -j &> /dev/null + if [[ $? -ne 0 ]]; then + echo "SKIP: iproute2 too old; tc is missing JSON support" + exit 1 + fi + + tc filter help 2>&1 | grep block &> /dev/null + if [[ $? -ne 0 ]]; then + echo "SKIP: iproute2 too old; tc is missing shared block support" + exit 1 + fi +} + +if [[ "$(id -u)" -ne 0 ]]; then + echo "SKIP: need root privileges" + exit 0 +fi + +if [[ "$CHECK_TC" = "yes" ]]; then + check_tc_version +fi + +if [[ ! -x "$(command -v jq)" ]]; then + echo "SKIP: jq not installed" + exit 1 +fi + +if [[ ! -x "$(command -v $MZ)" ]]; then + echo "SKIP: $MZ not installed" + exit 0 +fi + +if [[ ! -v NUM_NETIFS ]]; then + echo "SKIP: importer does not define \"NUM_NETIFS\"" + exit 0 +fi + +############################################################################## +# Command line options handling + +count=0 + +while [[ $# -gt 0 ]]; do + if [[ "$count" -eq "0" ]]; then + unset NETIFS + declare -A NETIFS + fi + count=$((count + 1)) + NETIFS[p$count]="$1" + shift +done + +############################################################################## +# Network interfaces configuration + +for i in $(eval echo {1..$NUM_NETIFS}); do + ip link show dev ${NETIFS[p$i]} &> /dev/null + if [[ $? -ne 0 ]]; then + echo "SKIP: could not find all required interfaces" + exit 0 + fi +done + +############################################################################## +# Helpers + +# Exit status to return at the end. Set in case one of the tests fails. +EXIT_STATUS=0 +# Per-test return value. Clear at the beginning of each test. +RET=0 + +check_err() +{ + local err=$1 + local msg=$2 + + if [[ $RET -eq 0 && $err -ne 0 ]]; then + RET=$err + retmsg=$msg + fi +} + +check_fail() +{ + local err=$1 + local msg=$2 + + if [[ $RET -eq 0 && $err -eq 0 ]]; then + RET=1 + retmsg=$msg + fi +} + +log_test() +{ + local test_name=$1 + local opt_str=$2 + + if [[ $# -eq 2 ]]; then + opt_str="($opt_str)" + fi + + if [[ $RET -ne 0 ]]; then + EXIT_STATUS=1 + printf "TEST: %-60s [FAIL]\n" "$test_name $opt_str" + if [[ ! -z "$retmsg" ]]; then + printf "\t%s\n" "$retmsg" + fi + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo "Hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + return 1 + fi + + printf "TEST: %-60s [PASS]\n" "$test_name $opt_str" + return 0 +} + +log_info() +{ + local msg=$1 + + echo "INFO: $msg" +} + +setup_wait() +{ + for i in $(eval echo {1..$NUM_NETIFS}); do + while true; do + ip link show dev ${NETIFS[p$i]} up \ + | grep 'state UP' &> /dev/null + if [[ $? -ne 0 ]]; then + sleep 1 + else + break + fi + done + done + + # Make sure links are ready. + sleep $WAIT_TIME +} + +pre_cleanup() +{ + if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then + echo "Pausing before cleanup, hit any key to continue" + read + fi +} + +vrf_prepare() +{ + ip -4 rule add pref 32765 table local + ip -4 rule del pref 0 + ip -6 rule add pref 32765 table local + ip -6 rule del pref 0 +} + +vrf_cleanup() +{ + ip -6 rule add pref 0 table local + ip -6 rule del pref 32765 + ip -4 rule add pref 0 table local + ip -4 rule del pref 32765 +} + +__last_tb_id=0 +declare -A __TB_IDS + +__vrf_td_id_assign() +{ + local vrf_name=$1 + + __last_tb_id=$((__last_tb_id + 1)) + __TB_IDS[$vrf_name]=$__last_tb_id + return $__last_tb_id +} + +__vrf_td_id_lookup() +{ + local vrf_name=$1 + + return ${__TB_IDS[$vrf_name]} +} + +vrf_create() +{ + local vrf_name=$1 + local tb_id + + __vrf_td_id_assign $vrf_name + tb_id=$? + + ip link add dev $vrf_name type vrf table $tb_id + ip -4 route add table $tb_id unreachable default metric 4278198272 + ip -6 route add table $tb_id unreachable default metric 4278198272 +} + +vrf_destroy() +{ + local vrf_name=$1 + local tb_id + + __vrf_td_id_lookup $vrf_name + tb_id=$? + + ip -6 route del table $tb_id unreachable default metric 4278198272 + ip -4 route del table $tb_id unreachable default metric 4278198272 + ip link del dev $vrf_name +} + +__addr_add_del() +{ + local if_name=$1 + local add_del=$2 + local array + + shift + shift + array=("${@}") + + for addrstr in "${array[@]}"; do + ip address $add_del $addrstr dev $if_name + done +} + +simple_if_init() +{ + local if_name=$1 + local vrf_name + local array + + shift + vrf_name=v$if_name + array=("${@}") + + vrf_create $vrf_name + ip link set dev $if_name master $vrf_name + ip link set dev $vrf_name up + ip link set dev $if_name up + + __addr_add_del $if_name add "${array[@]}" +} + +simple_if_fini() +{ + local if_name=$1 + local vrf_name + local array + + shift + vrf_name=v$if_name + array=("${@}") + + __addr_add_del $if_name del "${array[@]}" + + ip link set dev $if_name down + vrf_destroy $vrf_name +} + +master_name_get() +{ + local if_name=$1 + + ip -j link show dev $if_name | jq -r '.[]["master"]' +} + +link_stats_tx_packets_get() +{ + local if_name=$1 + + ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]' +} + +mac_get() +{ + local if_name=$1 + + ip -j link show dev $if_name | jq -r '.[]["address"]' +} + +bridge_ageing_time_get() +{ + local bridge=$1 + local ageing_time + + # Need to divide by 100 to convert to seconds. + ageing_time=$(ip -j -d link show dev $bridge \ + | jq '.[]["linkinfo"]["info_data"]["ageing_time"]') + echo $((ageing_time / 100)) +} + +forwarding_enable() +{ + ipv4_fwd=$(sysctl -n net.ipv4.conf.all.forwarding) + ipv6_fwd=$(sysctl -n net.ipv6.conf.all.forwarding) + + sysctl -q -w net.ipv4.conf.all.forwarding=1 + sysctl -q -w net.ipv6.conf.all.forwarding=1 +} + +forwarding_restore() +{ + sysctl -q -w net.ipv6.conf.all.forwarding=$ipv6_fwd + sysctl -q -w net.ipv4.conf.all.forwarding=$ipv4_fwd +} + +tc_offload_check() +{ + for i in $(eval echo {1..$NUM_NETIFS}); do + ethtool -k ${NETIFS[p$i]} \ + | grep "hw-tc-offload: on" &> /dev/null + if [[ $? -ne 0 ]]; then + return 1 + fi + done + + return 0 +} + +############################################################################## +# Tests + +ping_test() +{ + local if_name=$1 + local dip=$2 + local vrf_name + + RET=0 + + vrf_name=$(master_name_get $if_name) + ip vrf exec $vrf_name $PING $dip -c 10 -i 0.1 -w 2 &> /dev/null + check_err $? + log_test "ping" +} + +ping6_test() +{ + local if_name=$1 + local dip=$2 + local vrf_name + + RET=0 + + vrf_name=$(master_name_get $if_name) + ip vrf exec $vrf_name $PING6 $dip -c 10 -i 0.1 -w 2 &> /dev/null + check_err $? + log_test "ping6" +} + +learning_test() +{ + local bridge=$1 + local br_port1=$2 # Connected to `host1_if`. + local host1_if=$3 + local host2_if=$4 + local mac=de:ad:be:ef:13:37 + local ageing_time + + RET=0 + + bridge -j fdb show br $bridge brport $br_port1 \ + | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null + check_fail $? "Found FDB record when should not" + + # Disable unknown unicast flooding on `br_port1` to make sure + # packets are only forwarded through the port after a matching + # FDB entry was installed. + bridge link set dev $br_port1 flood off + + tc qdisc add dev $host1_if ingress + tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \ + flower dst_mac $mac action drop + + $MZ $host2_if -c 1 -p 64 -b $mac -t ip -q + sleep 1 + + tc -j -s filter show dev $host1_if ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_fail $? "Packet reached second host when should not" + + $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q + sleep 1 + + bridge -j fdb show br $bridge brport $br_port1 \ + | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null + check_err $? "Did not find FDB record when should" + + $MZ $host2_if -c 1 -p 64 -b $mac -t ip -q + sleep 1 + + tc -j -s filter show dev $host1_if ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_err $? "Packet did not reach second host when should" + + # Wait for 10 seconds after the ageing time to make sure FDB + # record was aged-out. + ageing_time=$(bridge_ageing_time_get $bridge) + sleep $((ageing_time + 10)) + + bridge -j fdb show br $bridge brport $br_port1 \ + | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null + check_fail $? "Found FDB record when should not" + + bridge link set dev $br_port1 learning off + + $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q + sleep 1 + + bridge -j fdb show br $bridge brport $br_port1 \ + | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null + check_fail $? "Found FDB record when should not" + + bridge link set dev $br_port1 learning on + + tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower + tc qdisc del dev $host1_if ingress + + bridge link set dev $br_port1 flood on + + log_test "FDB learning" +} + +flood_test_do() +{ + local should_flood=$1 + local mac=$2 + local ip=$3 + local host1_if=$4 + local host2_if=$5 + local err=0 + + # Add an ACL on `host2_if` which will tell us whether the packet + # was flooded to it or not. + tc qdisc add dev $host2_if ingress + tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \ + flower dst_mac $mac action drop + + $MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t ip -q + sleep 1 + + tc -j -s filter show dev $host2_if ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + if [[ $? -ne 0 && $should_flood == "true" || \ + $? -eq 0 && $should_flood == "false" ]]; then + err=1 + fi + + tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower + tc qdisc del dev $host2_if ingress + + return $err +} + +flood_unicast_test() +{ + local br_port=$1 + local host1_if=$2 + local host2_if=$3 + local mac=de:ad:be:ef:13:37 + local ip=192.0.2.100 + + RET=0 + + bridge link set dev $br_port flood off + + flood_test_do false $mac $ip $host1_if $host2_if + check_err $? "Packet flooded when should not" + + bridge link set dev $br_port flood on + + flood_test_do true $mac $ip $host1_if $host2_if + check_err $? "Packet was not flooded when should" + + log_test "Unknown unicast flood" +} + +flood_multicast_test() +{ + local br_port=$1 + local host1_if=$2 + local host2_if=$3 + local mac=01:00:5e:00:00:01 + local ip=239.0.0.1 + + RET=0 + + bridge link set dev $br_port mcast_flood off + + flood_test_do false $mac $ip $host1_if $host2_if + check_err $? "Packet flooded when should not" + + bridge link set dev $br_port mcast_flood on + + flood_test_do true $mac $ip $host1_if $host2_if + check_err $? "Packet was not flooded when should" + + log_test "Unregistered multicast flood" +} + +flood_test() +{ + # `br_port` is connected to `host2_if` + local br_port=$1 + local host1_if=$2 + local host2_if=$3 + + flood_unicast_test $br_port $host1_if $host2_if + flood_multicast_test $br_port $host1_if $host2_if +} diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh new file mode 100755 index 000000000000..cc6a14abfa87 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router.sh @@ -0,0 +1,125 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + vrf_create "vrf-h1" + ip link set dev $h1 master vrf-h1 + + ip link set dev vrf-h1 up + ip link set dev $h1 up + + ip address add 192.0.2.2/24 dev $h1 + ip address add 2001:db8:1::2/64 dev $h1 + + ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1 + ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1 +} + +h1_destroy() +{ + ip route del 2001:db8:2::/64 vrf vrf-h1 + ip route del 198.51.100.0/24 vrf vrf-h1 + + ip address del 2001:db8:1::2/64 dev $h1 + ip address del 192.0.2.2/24 dev $h1 + + ip link set dev $h1 down + vrf_destroy "vrf-h1" +} + +h2_create() +{ + vrf_create "vrf-h2" + ip link set dev $h2 master vrf-h2 + + ip link set dev vrf-h2 up + ip link set dev $h2 up + + ip address add 198.51.100.2/24 dev $h2 + ip address add 2001:db8:2::2/64 dev $h2 + + ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1 + ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1 +} + +h2_destroy() +{ + ip route del 2001:db8:1::/64 vrf vrf-h2 + ip route del 192.0.2.0/24 vrf vrf-h2 + + ip address del 2001:db8:2::2/64 dev $h2 + ip address del 198.51.100.2/24 dev $h2 + + ip link set dev $h2 down + vrf_destroy "vrf-h2" +} + +router_create() +{ + ip link set dev $rp1 up + ip link set dev $rp2 up + + ip address add 192.0.2.1/24 dev $rp1 + ip address add 2001:db8:1::1/64 dev $rp1 + + ip address add 198.51.100.1/24 dev $rp2 + ip address add 2001:db8:2::1/64 dev $rp2 +} + +router_destroy() +{ + ip address del 2001:db8:2::1/64 dev $rp2 + ip address del 198.51.100.1/24 dev $rp2 + + ip address del 2001:db8:1::1/64 dev $rp1 + ip address del 192.0.2.1/24 dev $rp1 + + ip link set dev $rp2 down + ip link set dev $rp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + router_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +ping_test $h1 198.51.100.2 +ping6_test $h1 2001:db8:2::2 + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh new file mode 100755 index 000000000000..55595305a604 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_multipath.sh @@ -0,0 +1,332 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NUM_NETIFS=8 +source lib.sh + +h1_create() +{ + vrf_create "vrf-h1" + ip link set dev $h1 master vrf-h1 + + ip link set dev vrf-h1 up + ip link set dev $h1 up + + ip address add 192.0.2.2/24 dev $h1 + ip address add 2001:db8:1::2/64 dev $h1 + + ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1 + ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1 +} + +h1_destroy() +{ + ip route del 2001:db8:2::/64 vrf vrf-h1 + ip route del 198.51.100.0/24 vrf vrf-h1 + + ip address del 2001:db8:1::2/64 dev $h1 + ip address del 192.0.2.2/24 dev $h1 + + ip link set dev $h1 down + vrf_destroy "vrf-h1" +} + +h2_create() +{ + vrf_create "vrf-h2" + ip link set dev $h2 master vrf-h2 + + ip link set dev vrf-h2 up + ip link set dev $h2 up + + ip address add 198.51.100.2/24 dev $h2 + ip address add 2001:db8:2::2/64 dev $h2 + + ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1 + ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1 +} + +h2_destroy() +{ + ip route del 2001:db8:1::/64 vrf vrf-h2 + ip route del 192.0.2.0/24 vrf vrf-h2 + + ip address del 2001:db8:2::2/64 dev $h2 + ip address del 198.51.100.2/24 dev $h2 + + ip link set dev $h2 down + vrf_destroy "vrf-h2" +} + +router1_create() +{ + vrf_create "vrf-r1" + ip link set dev $rp11 master vrf-r1 + ip link set dev $rp12 master vrf-r1 + ip link set dev $rp13 master vrf-r1 + + ip link set dev vrf-r1 up + ip link set dev $rp11 up + ip link set dev $rp12 up + ip link set dev $rp13 up + + ip address add 192.0.2.1/24 dev $rp11 + ip address add 2001:db8:1::1/64 dev $rp11 + + ip address add 169.254.2.12/24 dev $rp12 + ip address add fe80:2::12/64 dev $rp12 + + ip address add 169.254.3.13/24 dev $rp13 + ip address add fe80:3::13/64 dev $rp13 + + ip route add 198.51.100.0/24 vrf vrf-r1 \ + nexthop via 169.254.2.22 dev $rp12 \ + nexthop via 169.254.3.23 dev $rp13 + ip route add 2001:db8:2::/64 vrf vrf-r1 \ + nexthop via fe80:2::22 dev $rp12 \ + nexthop via fe80:3::23 dev $rp13 +} + +router1_destroy() +{ + ip route del 2001:db8:2::/64 vrf vrf-r1 + ip route del 198.51.100.0/24 vrf vrf-r1 + + ip address del fe80:3::13/64 dev $rp13 + ip address del 169.254.3.13/24 dev $rp13 + + ip address del fe80:2::12/64 dev $rp12 + ip address del 169.254.2.12/24 dev $rp12 + + ip address del 2001:db8:1::1/64 dev $rp11 + ip address del 192.0.2.1/24 dev $rp11 + + ip link set dev $rp13 down + ip link set dev $rp12 down + ip link set dev $rp11 down + + vrf_destroy "vrf-r1" +} + +router2_create() +{ + vrf_create "vrf-r2" + ip link set dev $rp21 master vrf-r2 + ip link set dev $rp22 master vrf-r2 + ip link set dev $rp23 master vrf-r2 + + ip link set dev vrf-r2 up + ip link set dev $rp21 up + ip link set dev $rp22 up + ip link set dev $rp23 up + + ip address add 198.51.100.1/24 dev $rp21 + ip address add 2001:db8:2::1/64 dev $rp21 + + ip address add 169.254.2.22/24 dev $rp22 + ip address add fe80:2::22/64 dev $rp22 + + ip address add 169.254.3.23/24 dev $rp23 + ip address add fe80:3::23/64 dev $rp23 + + ip route add 192.0.2.0/24 vrf vrf-r2 \ + nexthop via 169.254.2.12 dev $rp22 \ + nexthop via 169.254.3.13 dev $rp23 + ip route add 2001:db8:1::/64 vrf vrf-r2 \ + nexthop via fe80:2::12 dev $rp22 \ + nexthop via fe80:3::13 dev $rp23 +} + +router2_destroy() +{ + ip route del 2001:db8:1::/64 vrf vrf-r2 + ip route del 192.0.2.0/24 vrf vrf-r2 + + ip address del fe80:3::23/64 dev $rp23 + ip address del 169.254.3.23/24 dev $rp23 + + ip address del fe80:2::22/64 dev $rp22 + ip address del 169.254.2.22/24 dev $rp22 + + ip address del 2001:db8:2::1/64 dev $rp21 + ip address del 198.51.100.1/24 dev $rp21 + + ip link set dev $rp23 down + ip link set dev $rp22 down + ip link set dev $rp21 down + + vrf_destroy "vrf-r2" +} + +multipath_eval() +{ + local desc="$1" + local weight_rp12=$2 + local weight_rp13=$3 + local packets_rp12=$4 + local packets_rp13=$5 + local weights_ratio packets_ratio diff + + RET=0 + + if [[ "$packets_rp12" -eq "0" || "$packets_rp13" -eq "0" ]]; then + check_err 1 "Packet difference is 0" + log_test "Multipath" + log_info "Expected ratio $weights_ratio" + return + fi + + if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then + weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \ + | bc -l) + packets_ratio=$(echo "scale=2; $packets_rp12 / $packets_rp13" \ + | bc -l) + else + weights_ratio=$(echo "scale=2; $weight_rp13 / $weight_rp12" | \ + bc -l) + packets_ratio=$(echo "scale=2; $packets_rp13 / $packets_rp12" | \ + bc -l) + fi + + diff=$(echo $weights_ratio - $packets_ratio | bc -l) + diff=${diff#-} + + test "$(echo "$diff / $weights_ratio > 0.1" | bc -l)" -eq 0 + check_err $? "Too large discrepancy between expected and measured ratios" + log_test "$desc" + log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio" +} + +multipath4_test() +{ + local desc="$1" + local weight_rp12=$2 + local weight_rp13=$3 + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 + local packets_rp12 packets_rp13 + local hash_policy + + # Transmit multiple flows from h1 to h2 and make sure they are + # distributed between both multipath links (rp12 and rp13) + # according to the configured weights. + hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy) + sysctl -q -w net.ipv4.fib_multipath_hash_policy=1 + ip route replace 198.51.100.0/24 vrf vrf-r1 \ + nexthop via 169.254.2.22 dev $rp12 weight $weight_rp12 \ + nexthop via 169.254.3.23 dev $rp13 weight $weight_rp13 + + t0_rp12=$(link_stats_tx_packets_get $rp12) + t0_rp13=$(link_stats_tx_packets_get $rp13) + + ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ + -d 1msec -t udp "sp=1024,dp=0-32768" + + t1_rp12=$(link_stats_tx_packets_get $rp12) + t1_rp13=$(link_stats_tx_packets_get $rp13) + + let "packets_rp12 = $t1_rp12 - $t0_rp12" + let "packets_rp13 = $t1_rp13 - $t0_rp13" + multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13 + + # Restore settings. + ip route replace 198.51.100.0/24 vrf vrf-r1 \ + nexthop via 169.254.2.22 dev $rp12 \ + nexthop via 169.254.3.23 dev $rp13 + sysctl -q -w net.ipv4.fib_multipath_hash_policy=$hash_policy +} + +multipath6_test() +{ + local desc="$1" + local weight_rp12=$2 + local weight_rp13=$3 + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 + local packets_rp12 packets_rp13 + + ip route replace 2001:db8:2::/64 vrf vrf-r1 \ + nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \ + nexthop via fe80:3::23 dev $rp13 weight $weight_rp13 + + t0_rp12=$(link_stats_tx_packets_get $rp12) + t0_rp13=$(link_stats_tx_packets_get $rp13) + + # Generate 16384 echo requests, each with a random flow label. + for _ in $(seq 1 16384); do + ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null + done + + t1_rp12=$(link_stats_tx_packets_get $rp12) + t1_rp13=$(link_stats_tx_packets_get $rp13) + + let "packets_rp12 = $t1_rp12 - $t0_rp12" + let "packets_rp13 = $t1_rp13 - $t0_rp13" + multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13 + + ip route replace 2001:db8:2::/64 vrf vrf-r1 \ + nexthop via fe80:2::22 dev $rp12 \ + nexthop via fe80:3::23 dev $rp13 +} + +multipath_test() +{ + log_info "Running IPv4 multipath tests" + multipath4_test "ECMP" 1 1 + multipath4_test "Weighted MP 2:1" 2 1 + multipath4_test "Weighted MP 11:45" 11 45 + + log_info "Running IPv6 multipath tests" + multipath6_test "ECMP" 1 1 + multipath6_test "Weighted MP 2:1" 2 1 + multipath6_test "Weighted MP 11:45" 11 45 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp11=${NETIFS[p2]} + + rp12=${NETIFS[p3]} + rp22=${NETIFS[p4]} + + rp13=${NETIFS[p5]} + rp23=${NETIFS[p6]} + + rp21=${NETIFS[p7]} + h2=${NETIFS[p8]} + + vrf_prepare + + h1_create + h2_create + + router1_create + router2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router2_destroy + router1_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +ping_test $h1 198.51.100.2 +ping6_test $h1 2001:db8:2::2 +multipath_test + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh new file mode 100755 index 000000000000..8ab5cf0a960b --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_actions.sh @@ -0,0 +1,195 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NUM_NETIFS=4 +source tc_common.sh +source lib.sh + +tcflags="skip_hw" + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 + tc qdisc add dev $h2 clsact +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.2.2/24 +} + +switch_create() +{ + simple_if_init $swp1 192.0.2.2/24 + tc qdisc add dev $swp1 clsact + + simple_if_init $swp2 192.0.2.1/24 +} + +switch_destroy() +{ + simple_if_fini $swp2 192.0.2.1/24 + + tc qdisc del dev $swp1 clsact + simple_if_fini $swp1 192.0.2.2/24 +} + +mirred_egress_redirect_test() +{ + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched without redirect rule inserted" + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 action mirred egress redirect \ + dev $swp2 + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match incoming redirected packet" + + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + + log_test "mirred egress redirect ($tcflags)" +} + +gact_drop_and_ok_test() +{ + RET=0 + + tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \ + skip_hw dst_ip 192.0.2.2 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $swp1 ingress" 102 1 + check_err $? "Packet was not dropped" + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 action ok + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $swp1 ingress" 101 1 + check_err $? "Did not see trapped packet" + + tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + + log_test "gact drop and ok ($tcflags)" +} + +gact_trap_test() +{ + RET=0 + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + skip_hw dst_ip 192.0.2.2 action drop + tc filter add dev $swp1 ingress protocol ip pref 3 handle 103 flower \ + $tcflags dst_ip 192.0.2.2 action mirred egress redirect \ + dev $swp2 + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $swp1 ingress" 101 1 + check_fail $? "Saw packet without trap rule inserted" + + tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 192.0.2.2 action trap + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $swp1 ingress" 102 1 + check_err $? "Packet was not trapped" + + tc_check_packets "dev $swp1 ingress" 101 1 + check_err $? "Did not see trapped packet" + + tc filter del dev $swp1 ingress protocol ip pref 3 handle 103 flower + tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower + + log_test "trap ($tcflags)" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + h1mac=$(mac_get $h1) + h2mac=$(mac_get $h2) + + swp1origmac=$(mac_get $swp1) + swp2origmac=$(mac_get $swp2) + ip link set $swp1 address $h2mac + ip link set $swp2 address $h1mac + + vrf_prepare + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup + + ip link set $swp2 address $swp2origmac + ip link set $swp1 address $swp1origmac +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +gact_drop_and_ok_test +mirred_egress_redirect_test + +tc_offload_check +if [[ $? -ne 0 ]]; then + log_info "Could not test offloaded functionality" +else + tcflags="skip_sw" + gact_drop_and_ok_test + mirred_egress_redirect_test + gact_trap_test +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh new file mode 100755 index 000000000000..2fd15226974b --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_chains.sh @@ -0,0 +1,122 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NUM_NETIFS=2 +source tc_common.sh +source lib.sh + +tcflags="skip_hw" + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 + tc qdisc add dev $h2 clsact +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.2.2/24 +} + +unreachable_chain_test() +{ + RET=0 + + tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \ + flower $tcflags dst_mac $h2mac action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 1101 1 + check_fail $? "matched on filter in unreachable chain" + + tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \ + flower + + log_test "unreachable chain ($tcflags)" +} + +gact_goto_chain_test() +{ + RET=0 + + tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \ + flower $tcflags dst_mac $h2mac action drop + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_mac $h2mac action drop + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_mac $h2mac action goto chain 1 + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 102 1 + check_fail $? "Matched on a wrong filter" + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match on correct filter with goto chain action" + + tc_check_packets "dev $h2 ingress" 1101 1 + check_err $? "Did not match on correct filter in chain 1" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \ + flower + + log_test "gact goto chain ($tcflags)" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + h1mac=$(mac_get $h1) + h2mac=$(mac_get $h2) + + vrf_prepare + + h1_create + h2_create +} + +cleanup() +{ + pre_cleanup + + h2_destroy + h1_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +unreachable_chain_test +gact_goto_chain_test + +tc_offload_check +if [[ $? -ne 0 ]]; then + log_info "Could not test offloaded functionality" +else + tcflags="skip_sw" + unreachable_chain_test + gact_goto_chain_test +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh new file mode 100644 index 000000000000..9d3b64a2a264 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_common.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +CHECK_TC="yes" + +tc_check_packets() +{ + local id=$1 + local handle=$2 + local count=$3 + local ret + + output="$(tc -j -s filter show $id)" + # workaround the jq bug which causes jq to return 0 in case input is "" + ret=$? + if [[ $ret -ne 0 ]]; then + return $ret + fi + echo $output | \ + jq -e ".[] \ + | select(.options.handle == $handle) \ + | select(.options.actions[0].stats.packets == $count)" \ + &> /dev/null + return $? +} diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh new file mode 100755 index 000000000000..032b882adfc0 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_flower.sh @@ -0,0 +1,196 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NUM_NETIFS=2 +source tc_common.sh +source lib.sh + +tcflags="skip_hw" + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 198.51.100.1/24 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 198.51.100.1/24 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 198.51.100.2/24 + tc qdisc add dev $h2 clsact +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.2.2/24 198.51.100.2/24 +} + +match_dst_mac_test() +{ + local dummy_mac=de:ad:be:ef:aa:aa + + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_mac $dummy_mac action drop + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_mac $h2mac action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched on a wrong filter" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + + log_test "dst_mac match ($tcflags)" +} + +match_src_mac_test() +{ + local dummy_mac=de:ad:be:ef:aa:aa + + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags src_mac $dummy_mac action drop + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags src_mac $h1mac action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched on a wrong filter" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + + log_test "src_mac match ($tcflags)" +} + +match_dst_ip_test() +{ + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 198.51.100.2 action drop + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 192.0.2.2 action drop + tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ + $tcflags dst_ip 192.0.2.0/24 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched on a wrong filter" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter" + + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Did not match on correct filter with mask" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower + + log_test "dst_ip match ($tcflags)" +} + +match_src_ip_test() +{ + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags src_ip 198.51.100.1 action drop + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags src_ip 192.0.2.1 action drop + tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ + $tcflags src_ip 192.0.2.0/24 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched on a wrong filter" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter" + + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Did not match on correct filter with mask" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower + + log_test "src_ip match ($tcflags)" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + h1mac=$(mac_get $h1) + h2mac=$(mac_get $h2) + + vrf_prepare + + h1_create + h2_create +} + +cleanup() +{ + pre_cleanup + + h2_destroy + h1_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +match_dst_mac_test +match_src_mac_test +match_dst_ip_test +match_src_ip_test + +tc_offload_check +if [[ $? -ne 0 ]]; then + log_info "Could not test offloaded functionality" +else + tcflags="skip_sw" + match_dst_mac_test + match_src_mac_test + match_dst_ip_test + match_src_ip_test +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_shblocks.sh b/tools/testing/selftests/net/forwarding/tc_shblocks.sh new file mode 100755 index 000000000000..077b98048ef4 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_shblocks.sh @@ -0,0 +1,122 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NUM_NETIFS=4 +source tc_common.sh +source lib.sh + +tcflags="skip_hw" + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.1/24 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.1/24 +} + +switch_create() +{ + simple_if_init $swp1 192.0.2.2/24 + tc qdisc add dev $swp1 ingress_block 22 egress_block 23 clsact + + simple_if_init $swp2 192.0.2.2/24 + tc qdisc add dev $swp2 ingress_block 22 egress_block 23 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp2 clsact + simple_if_fini $swp2 192.0.2.2/24 + + tc qdisc del dev $swp1 clsact + simple_if_fini $swp1 192.0.2.2/24 +} + +shared_block_test() +{ + RET=0 + + tc filter add block 22 protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "block 22" 101 1 + check_err $? "Did not match first incoming packet on a block" + + $MZ $h2 -c 1 -p 64 -a $h2mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "block 22" 101 2 + check_err $? "Did not match second incoming packet on a block" + + tc filter del block 22 protocol ip pref 1 handle 101 flower + + log_test "shared block ($tcflags)" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + h1mac=$(mac_get $h1) + h2mac=$(mac_get $h2) + + swmac=$(mac_get $swp1) + swp2origmac=$(mac_get $swp2) + ip link set $swp2 address $swmac + + vrf_prepare + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup + + ip link set $swp2 address $swp2origmac +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +shared_block_test + +tc_offload_check +if [[ $? -ne 0 ]]; then + log_info "Could not test offloaded functionality" +else + tcflags="skip_sw" + shared_block_test +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c index 5cc2a53bb71c..406cc70c571d 100644 --- a/tools/testing/selftests/net/msg_zerocopy.c +++ b/tools/testing/selftests/net/msg_zerocopy.c @@ -344,27 +344,53 @@ static int do_setup_tx(int domain, int type, int protocol) return fd; } -static int do_process_zerocopy_cookies(struct sock_extended_err *serr, - uint32_t *ckbuf, size_t nbytes) +static uint32_t do_process_zerocopy_cookies(struct rds_zcopy_cookies *ck) { - int ncookies, i; + int i; - if (serr->ee_errno != 0) - error(1, 0, "serr: wrong error code: %u", serr->ee_errno); - ncookies = serr->ee_data; - if (ncookies > SO_EE_ORIGIN_MAX_ZCOOKIES) + if (ck->num > RDS_MAX_ZCOOKIES) error(1, 0, "Returned %d cookies, max expected %d\n", - ncookies, SO_EE_ORIGIN_MAX_ZCOOKIES); - if (nbytes != ncookies * sizeof(uint32_t)) - error(1, 0, "Expected %d cookies, got %ld\n", - ncookies, nbytes/sizeof(uint32_t)); - for (i = 0; i < ncookies; i++) + ck->num, RDS_MAX_ZCOOKIES); + for (i = 0; i < ck->num; i++) if (cfg_verbose >= 2) - fprintf(stderr, "%d\n", ckbuf[i]); - return ncookies; + fprintf(stderr, "%d\n", ck->cookies[i]); + return ck->num; } -static bool do_recv_completion(int fd) +static bool do_recvmsg_completion(int fd) +{ + char cmsgbuf[CMSG_SPACE(sizeof(struct rds_zcopy_cookies))]; + struct rds_zcopy_cookies *ck; + struct cmsghdr *cmsg; + struct msghdr msg; + bool ret = false; + + memset(&msg, 0, sizeof(msg)); + msg.msg_control = cmsgbuf; + msg.msg_controllen = sizeof(cmsgbuf); + + if (recvmsg(fd, &msg, MSG_DONTWAIT)) + return ret; + + if (msg.msg_flags & MSG_CTRUNC) + error(1, errno, "recvmsg notification: truncated"); + + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level == SOL_RDS && + cmsg->cmsg_type == RDS_CMSG_ZCOPY_COMPLETION) { + + ck = (struct rds_zcopy_cookies *)CMSG_DATA(cmsg); + completions += do_process_zerocopy_cookies(ck); + ret = true; + break; + } + error(0, 0, "ignoring cmsg at level %d type %d\n", + cmsg->cmsg_level, cmsg->cmsg_type); + } + return ret; +} + +static bool do_recv_completion(int fd, int domain) { struct sock_extended_err *serr; struct msghdr msg = {}; @@ -372,17 +398,13 @@ static bool do_recv_completion(int fd) uint32_t hi, lo, range; int ret, zerocopy; char control[100]; - uint32_t ckbuf[SO_EE_ORIGIN_MAX_ZCOOKIES]; - struct iovec iov; + + if (domain == PF_RDS) + return do_recvmsg_completion(fd); msg.msg_control = control; msg.msg_controllen = sizeof(control); - iov.iov_base = ckbuf; - iov.iov_len = (SO_EE_ORIGIN_MAX_ZCOOKIES * sizeof(ckbuf[0])); - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - ret = recvmsg(fd, &msg, MSG_ERRQUEUE); if (ret == -1 && errno == EAGAIN) return false; @@ -402,10 +424,6 @@ static bool do_recv_completion(int fd) serr = (void *) CMSG_DATA(cm); - if (serr->ee_origin == SO_EE_ORIGIN_ZCOOKIE) { - completions += do_process_zerocopy_cookies(serr, ckbuf, ret); - return true; - } if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) error(1, 0, "serr: wrong origin: %u", serr->ee_origin); if (serr->ee_errno != 0) @@ -440,20 +458,20 @@ static bool do_recv_completion(int fd) } /* Read all outstanding messages on the errqueue */ -static void do_recv_completions(int fd) +static void do_recv_completions(int fd, int domain) { - while (do_recv_completion(fd)) {} + while (do_recv_completion(fd, domain)) {} } /* Wait for all remaining completions on the errqueue */ -static void do_recv_remaining_completions(int fd) +static void do_recv_remaining_completions(int fd, int domain) { int64_t tstop = gettimeofday_ms() + cfg_waittime_ms; while (completions < expected_completions && gettimeofday_ms() < tstop) { - if (do_poll(fd, POLLERR)) - do_recv_completions(fd); + if (do_poll(fd, domain == PF_RDS ? POLLIN : POLLERR)) + do_recv_completions(fd, domain); } if (completions < expected_completions) @@ -534,13 +552,13 @@ static void do_tx(int domain, int type, int protocol) while (!do_poll(fd, POLLOUT)) { if (cfg_zerocopy) - do_recv_completions(fd); + do_recv_completions(fd, domain); } } while (gettimeofday_ms() < tstop); if (cfg_zerocopy) - do_recv_remaining_completions(fd); + do_recv_remaining_completions(fd, domain); if (close(fd)) error(1, errno, "close"); @@ -631,40 +649,6 @@ static void do_flush_datagram(int fd, int type) bytes += cfg_payload_len; } - -static void do_recvmsg(int fd) -{ - int ret, off = 0; - char *buf; - struct iovec iov; - struct msghdr msg; - struct sockaddr_storage din; - - buf = calloc(cfg_payload_len, sizeof(char)); - iov.iov_base = buf; - iov.iov_len = cfg_payload_len; - - memset(&msg, 0, sizeof(msg)); - msg.msg_name = &din; - msg.msg_namelen = sizeof(din); - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - - ret = recvmsg(fd, &msg, MSG_TRUNC); - - if (ret == -1) - error(1, errno, "recv"); - if (ret != cfg_payload_len) - error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len); - - if (memcmp(buf + off, payload, ret)) - error(1, 0, "recv: data mismatch"); - - free(buf); - packets++; - bytes += cfg_payload_len; -} - static void do_rx(int domain, int type, int protocol) { uint64_t tstop; @@ -676,8 +660,6 @@ static void do_rx(int domain, int type, int protocol) do { if (type == SOCK_STREAM) do_flush_tcp(fd); - else if (domain == PF_RDS) - do_recvmsg(fd); else do_flush_datagram(fd, type); diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index b3754b9aa302..7b50775abaf6 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -15,6 +15,7 @@ import importlib import json import subprocess import time +import traceback from collections import OrderedDict from string import Template @@ -23,6 +24,13 @@ from tdc_helper import * import TdcPlugin + +class PluginMgrTestFail(Exception): + def __init__(self, stage, output, message): + self.stage = stage + self.output = output + self.message = message + class PluginMgr: def __init__(self, argparser): super().__init__() @@ -135,7 +143,7 @@ def exec_cmd(args, pm, stage, command): return proc, foutput -def prepare_env(args, pm, stage, prefix, cmdlist): +def prepare_env(args, pm, stage, prefix, cmdlist, output = None): """ Execute the setup/teardown commands for a test case. Optionally terminate test execution if the command fails. @@ -164,7 +172,9 @@ def prepare_env(args, pm, stage, prefix, cmdlist): print("\n{} *** Aborting test run.".format(prefix), file=sys.stderr) print("\n\n{} *** stdout ***".format(proc.stdout), file=sys.stderr) print("\n\n{} *** stderr ***".format(proc.stderr), file=sys.stderr) - raise Exception('"{}" did not complete successfully'.format(prefix)) + raise PluginMgrTestFail( + stage, output, + '"{}" did not complete successfully'.format(prefix)) def run_one_test(pm, args, index, tidx): result = True @@ -194,8 +204,11 @@ def run_one_test(pm, args, index, tidx): match_pattern = re.compile( str(tidx["matchPattern"]), re.DOTALL | re.MULTILINE) (p, procout) = exec_cmd(args, pm, 'verify', tidx["verifyCmd"]) - match_index = re.findall(match_pattern, procout) - if len(match_index) != int(tidx["matchCount"]): + if procout: + match_index = re.findall(match_pattern, procout) + if len(match_index) != int(tidx["matchCount"]): + result = False + elif int(tidx["matchCount"]) != 0: result = False if not result: @@ -204,9 +217,12 @@ def run_one_test(pm, args, index, tidx): tap += tresult if result == False: - tap += procout + if procout: + tap += procout + else: + tap += 'No output!\n' - prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown']) + prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown'], procout) pm.call_post_case() index += 1 @@ -227,30 +243,70 @@ def test_runner(pm, args, filtered_tests): index = 1 tap = str(index) + ".." + str(tcount) + "\n" badtest = None + stage = None + emergency_exit = False + emergency_exit_message = '' - pm.call_pre_suite(tcount, [tidx['id'] for tidx in testlist]) - + try: + pm.call_pre_suite(tcount, [tidx['id'] for tidx in testlist]) + except Exception as ee: + ex_type, ex, ex_tb = sys.exc_info() + print('Exception {} {} (caught in pre_suite).'. + format(ex_type, ex)) + # when the extra print statements are uncommented, + # the traceback does not appear between them + # (it appears way earlier in the tdc.py output) + # so don't bother ... + # print('--------------------(') + # print('traceback') + traceback.print_tb(ex_tb) + # print('--------------------)') + emergency_exit_message = 'EMERGENCY EXIT, call_pre_suite failed with exception {} {}\n'.format(ex_type, ex) + emergency_exit = True + stage = 'pre-SUITE' + + if emergency_exit: + pm.call_post_suite(index) + return emergency_exit_message if args.verbose > 1: - print('Run tests here') + print('give test rig 2 seconds to stabilize') + time.sleep(2) for tidx in testlist: if "flower" in tidx["category"] and args.device == None: + if args.verbose > 1: + print('Not executing test {} {} because DEV2 not defined'. + format(tidx['id'], tidx['name'])) continue try: badtest = tidx # in case it goes bad tap += run_one_test(pm, args, index, tidx) - except Exception as ee: - print('Exception {} (caught in test_runner, running test {} {} {})'. - format(ee, index, tidx['id'], tidx['name'])) + except PluginMgrTestFail as pmtf: + ex_type, ex, ex_tb = sys.exc_info() + stage = pmtf.stage + message = pmtf.message + output = pmtf.output + print(message) + print('Exception {} {} (caught in test_runner, running test {} {} {} stage {})'. + format(ex_type, ex, index, tidx['id'], tidx['name'], stage)) + print('---------------') + print('traceback') + traceback.print_tb(ex_tb) + print('---------------') + if stage == 'teardown': + print('accumulated output for this test:') + if pmtf.output: + print(pmtf.output) + print('---------------') break index += 1 # if we failed in setup or teardown, - # fill in the remaining tests with not ok + # fill in the remaining tests with ok-skipped count = index tap += 'about to flush the tap output if tests need to be skipped\n' if tcount + 1 != index: for tidx in testlist[index - 1:]: - msg = 'skipped - previous setup or teardown failed' + msg = 'skipped - previous {} failed'.format(stage) tap += 'ok {} - {} # {} {} {}\n'.format( count, tidx['id'], msg, index, badtest.get('id', '--Unknown--')) count += 1 @@ -347,9 +403,9 @@ def check_default_settings(args, remaining, pm): global NAMES if args.path != None: - NAMES['TC'] = args.path + NAMES['TC'] = args.path if args.device != None: - NAMES['DEV2'] = args.device + NAMES['DEV2'] = args.device if not os.path.isfile(NAMES['TC']): print("The specified tc path " + NAMES['TC'] + " does not exist.") exit(1) @@ -389,7 +445,7 @@ def generate_case_ids(alltests): for c in alltests: if (c["id"] == ""): while True: - newid = str('%04x' % random.randrange(16**4)) + newid = str('{:04x}'.format(random.randrange(16**4))) if (does_id_exist(alltests, newid)): continue else: diff --git a/tools/testing/selftests/tc-testing/tdc_batch.py b/tools/testing/selftests/tc-testing/tdc_batch.py index 707c6bfef689..52fa539dc662 100755 --- a/tools/testing/selftests/tc-testing/tdc_batch.py +++ b/tools/testing/selftests/tc-testing/tdc_batch.py @@ -49,13 +49,13 @@ index = 0 for i in range(0x100): for j in range(0x100): for k in range(0x100): - mac = ("%02x:%02x:%02x" % (i, j, k)) + mac = ("{:02x}:{:02x}:{:02x}".format(i, j, k)) src_mac = "e4:11:00:" + mac dst_mac = "e4:12:00:" + mac - cmd = ("filter add dev %s %s protocol ip parent ffff: flower %s " - "src_mac %s dst_mac %s action drop %s" % + cmd = ("filter add dev {} {} protocol ip parent ffff: flower {} " + "src_mac {} dst_mac {} action drop {}".format (device, prio, skip, src_mac, dst_mac, share_action)) - file.write("%s\n" % cmd) + file.write("{}\n".format(cmd)) index += 1 if index >= number: file.close() |