diff options
429 files changed, 17284 insertions, 5220 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-net-batman-adv b/Documentation/ABI/testing/sysfs-class-net-batman-adv index 518f6a1dbc0c..898106849e27 100644 --- a/Documentation/ABI/testing/sysfs-class-net-batman-adv +++ b/Documentation/ABI/testing/sysfs-class-net-batman-adv @@ -1,19 +1,10 @@ -What: /sys/class/net/<iface>/batman-adv/throughput_override -Date: Feb 2014 -Contact: Antonio Quartulli <[email protected]> -description: - Defines the throughput value to be used by B.A.T.M.A.N. V - when estimating the link throughput using this interface. - If the value is set to 0 then batman-adv will try to - estimate the throughput by itself. - What: /sys/class/net/<iface>/batman-adv/elp_interval Date: Feb 2014 Contact: Linus Lüssing <[email protected]> Description: Defines the interval in milliseconds in which batman - sends its probing packets for link quality measurements. + emits probing packets for neighbor sensing (ELP). What: /sys/class/net/<iface>/batman-adv/iface_status Date: May 2010 @@ -28,3 +19,12 @@ Description: The /sys/class/net/<iface>/batman-adv/mesh_iface file displays the batman mesh interface this <iface> currently is associated with. + +What: /sys/class/net/<iface>/batman-adv/throughput_override +Date: Feb 2014 +Contact: Antonio Quartulli <[email protected]> +description: + Defines the throughput value to be used by B.A.T.M.A.N. V + when estimating the link throughput using this interface. + If the value is set to 0 then batman-adv will try to + estimate the throughput by itself. diff --git a/Documentation/devicetree/bindings/net/keystone-netcp.txt b/Documentation/devicetree/bindings/net/keystone-netcp.txt index b30ab6b5cbfa..04ba1dc34fd6 100644 --- a/Documentation/devicetree/bindings/net/keystone-netcp.txt +++ b/Documentation/devicetree/bindings/net/keystone-netcp.txt @@ -2,7 +2,7 @@ This document describes the device tree bindings associated with the keystone network coprocessor(NetCP) driver support. The network coprocessor (NetCP) is a hardware accelerator that processes -Ethernet packets. NetCP has a gigabit Ethernet (GbE) subsytem with a ethernet +Ethernet packets. NetCP has a gigabit Ethernet (GbE) subsystem with a ethernet switch sub-module to send and receive packets. NetCP also includes a packet accelerator (PA) module to perform packet classification operations such as header matching, and packet modification operations such as checksum diff --git a/Documentation/networking/nf_conntrack-sysctl.txt b/Documentation/networking/nf_conntrack-sysctl.txt index f55599c62c9d..4fb51d32fccc 100644 --- a/Documentation/networking/nf_conntrack-sysctl.txt +++ b/Documentation/networking/nf_conntrack-sysctl.txt @@ -7,12 +7,13 @@ nf_conntrack_acct - BOOLEAN Enable connection tracking flow accounting. 64-bit byte and packet counters per flow are added. -nf_conntrack_buckets - INTEGER (read-only) +nf_conntrack_buckets - INTEGER Size of hash table. If not specified as parameter during module loading, the default size is calculated by dividing total memory by 16384 to determine the number of buckets but the hash table will never have fewer than 32 and limited to 16384 buckets. For systems with more than 4GB of memory it will be 65536 buckets. + This sysctl is only writeable in the initial net namespace. nf_conntrack_checksum - BOOLEAN 0 - disabled diff --git a/MAINTAINERS b/MAINTAINERS index d8c078491e55..6374be26dde3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2297,6 +2297,7 @@ S: Maintained F: Documentation/ABI/testing/sysfs-class-net-batman-adv F: Documentation/ABI/testing/sysfs-class-net-mesh F: Documentation/networking/batman-adv.txt +F: include/uapi/linux/batman_adv.h F: net/batman-adv/ BAYCOM/HDLCDRV DRIVERS FOR AX.25 @@ -5437,6 +5438,15 @@ F: include/uapi/linux/if_hippi.h F: net/802/hippi.c F: drivers/net/hippi/ +HISILICON NETWORK SUBSYSTEM DRIVER +M: Yisen Zhuang <[email protected]> +M: Salil Mehta <[email protected]> +W: http://www.hisilicon.com +S: Maintained +F: drivers/net/ethernet/hisilicon/ +F: Documentation/devicetree/bindings/net/hisilicon*.txt + HISILICON SAS Controller M: John Garry <[email protected]> W: http://www.hisilicon.com @@ -9549,7 +9559,7 @@ M: Florian Fainelli <[email protected]> S: Maintained RDC R6040 FAST ETHERNET DRIVER -M: Florian Fainelli <[email protected]> +M: Florian Fainelli <[email protected]> S: Maintained F: drivers/net/ethernet/rdc/r6040.c @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 7 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Psychotic Stoned Sheep # *DOCUMENTATION* diff --git a/arch/arc/Makefile b/arch/arc/Makefile index d4df6be66d58..85814e74677d 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -66,8 +66,6 @@ endif endif -cflags-$(CONFIG_ARC_DW2_UNWIND) += -fasynchronous-unwind-tables - # By default gcc 4.8 generates dwarf4 which kernel unwinder can't grok ifeq ($(atleast_gcc48),y) cflags-$(CONFIG_ARC_DW2_UNWIND) += -gdwarf-2 diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index e0efff15a5ae..b9192a653b7e 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -142,7 +142,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, * prelogue is setup (callee regs saved and then fp set and not other * way around */ - pr_warn("CONFIG_ARC_DW2_UNWIND needs to be enabled\n"); + pr_warn_once("CONFIG_ARC_DW2_UNWIND needs to be enabled\n"); return 0; #endif diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 893941ec98dc..f1bde7c4e736 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -263,6 +263,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) kvm_timer_vcpu_terminate(vcpu); kvm_vgic_vcpu_destroy(vcpu); kvm_pmu_vcpu_destroy(vcpu); + kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu); } diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index a6b611f1da43..f53816744d60 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -24,7 +24,7 @@ struct mm_struct; struct vm_area_struct; #define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_NO_READ | \ - _CACHE_CACHABLE_NONCOHERENT) + _page_cachable_default) #define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_WRITE | \ _page_cachable_default) #define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_NO_EXEC | \ @@ -476,7 +476,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) pte.pte_low &= (_PAGE_MODIFIED | _PAGE_ACCESSED | _PFNX_MASK); pte.pte_high &= (_PFN_MASK | _CACHE_MASK); pte.pte_low |= pgprot_val(newprot) & ~_PFNX_MASK; - pte.pte_high |= pgprot_val(newprot) & ~_PFN_MASK; + pte.pte_high |= pgprot_val(newprot) & ~(_PFN_MASK | _CACHE_MASK); return pte; } #elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) @@ -491,7 +491,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #else static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); + return __pte((pte_val(pte) & _PAGE_CHG_MASK) | + (pgprot_val(newprot) & ~_PAGE_CHG_MASK)); } #endif @@ -632,7 +633,8 @@ static inline struct page *pmd_page(pmd_t pmd) static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { - pmd_val(pmd) = (pmd_val(pmd) & _PAGE_CHG_MASK) | pgprot_val(newprot); + pmd_val(pmd) = (pmd_val(pmd) & _PAGE_CHG_MASK) | + (pgprot_val(newprot) & ~_PAGE_CHG_MASK); return pmd; } diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 88a5ecaa157b..ab84c89c9e98 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -230,6 +230,7 @@ extern unsigned long __kernel_virt_size; #define KERN_VIRT_SIZE __kernel_virt_size extern struct page *vmemmap; extern unsigned long ioremap_bot; +extern unsigned long pci_io_base; #endif /* __ASSEMBLY__ */ #include <asm/book3s/64/hash.h> diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index b5f73cb5eeb6..d70101e1e25c 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -647,7 +647,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus, pci_unlock_rescan_remove(); } } else if (frozen_bus) { - eeh_pe_dev_traverse(pe, eeh_rmv_device, &rmv_data); + eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data); } /* diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 3759df52bd67..a5ae49a2dcc4 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -47,7 +47,6 @@ static int __init pcibios_init(void) printk(KERN_INFO "PCI: Probing PCI hardware\n"); - pci_io_base = ISA_IO_BASE; /* For now, override phys_mem_access_prot. If we need it,g * later, we may move that initialization to each ppc_md */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index e2f12cbcade9..0b93893424f5 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1505,6 +1505,16 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) current->thread.regs = regs - 1; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + /* + * Clear any transactional state, we're exec()ing. The cause is + * not important as there will never be a recheckpoint so it's not + * user visible. + */ + if (MSR_TM_SUSPENDED(mfmsr())) + tm_reclaim_current(0); +#endif + memset(regs->gpr, 0, sizeof(regs->gpr)); regs->ctr = 0; regs->link = 0; diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index bf8f34a58670..b7019b559ddb 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -110,17 +110,11 @@ _GLOBAL(tm_reclaim) std r3, STK_PARAM(R3)(r1) SAVE_NVGPRS(r1) - /* We need to setup MSR for VSX register save instructions. Here we - * also clear the MSR RI since when we do the treclaim, we won't have a - * valid kernel pointer for a while. We clear RI here as it avoids - * adding another mtmsr closer to the treclaim. This makes the region - * maked as non-recoverable wider than it needs to be but it saves on - * inserting another mtmsrd later. - */ + /* We need to setup MSR for VSX register save instructions. */ mfmsr r14 mr r15, r14 ori r15, r15, MSR_FP - li r16, MSR_RI + li r16, 0 ori r16, r16, MSR_EE /* IRQs hard off */ andc r15, r15, r16 oris r15, r15, MSR_VEC@h @@ -176,7 +170,17 @@ dont_backup_fp: 1: tdeqi r6, 0 EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0 - /* The moment we treclaim, ALL of our GPRs will switch + /* Clear MSR RI since we are about to change r1, EE is already off. */ + li r4, 0 + mtmsrd r4, 1 + + /* + * BE CAREFUL HERE: + * At this point we can't take an SLB miss since we have MSR_RI + * off. Load only to/from the stack/paca which are in SLB bolted regions + * until we turn MSR RI back on. + * + * The moment we treclaim, ALL of our GPRs will switch * to user register state. (FPRs, CCR etc. also!) * Use an sprg and a tm_scratch in the PACA to shuffle. */ @@ -197,6 +201,11 @@ dont_backup_fp: /* Store the PPR in r11 and reset to decent value */ std r11, GPR11(r1) /* Temporary stash */ + + /* Reset MSR RI so we can take SLB faults again */ + li r11, MSR_RI + mtmsrd r11, 1 + mfspr r11, SPRN_PPR HMT_MEDIUM @@ -397,11 +406,6 @@ restore_gprs: ld r5, THREAD_TM_DSCR(r3) ld r6, THREAD_TM_PPR(r3) - /* Clear the MSR RI since we are about to change R1. EE is already off - */ - li r4, 0 - mtmsrd r4, 1 - REST_GPR(0, r7) /* GPR0 */ REST_2GPRS(2, r7) /* GPR2-3 */ REST_GPR(4, r7) /* GPR4 */ @@ -439,10 +443,33 @@ restore_gprs: ld r6, _CCR(r7) mtcr r6 - REST_GPR(1, r7) /* GPR1 */ - REST_GPR(5, r7) /* GPR5-7 */ REST_GPR(6, r7) - ld r7, GPR7(r7) + + /* + * Store r1 and r5 on the stack so that we can access them + * after we clear MSR RI. + */ + + REST_GPR(5, r7) + std r5, -8(r1) + ld r5, GPR1(r7) + std r5, -16(r1) + + REST_GPR(7, r7) + + /* Clear MSR RI since we are about to change r1. EE is already off */ + li r5, 0 + mtmsrd r5, 1 + + /* + * BE CAREFUL HERE: + * At this point we can't take an SLB miss since we have MSR_RI + * off. Load only to/from the stack/paca which are in SLB bolted regions + * until we turn MSR RI back on. + */ + + ld r5, -8(r1) + ld r1, -16(r1) /* Commit register state as checkpointed state: */ TRECHKPT diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 5b22ba0b58bc..2971ea18c768 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -922,6 +922,10 @@ void __init hash__early_init_mmu(void) vmemmap = (struct page *)H_VMEMMAP_BASE; ioremap_bot = IOREMAP_BASE; +#ifdef CONFIG_PCI + pci_io_base = ISA_IO_BASE; +#endif + /* Initialize the MMU Hash table and create the linear mapping * of memory. Has to be done before SLB initialization as this is * currently where the page size encoding is obtained. diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index e58707deef5c..7931e1496f0d 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -328,6 +328,11 @@ void __init radix__early_init_mmu(void) __vmalloc_end = RADIX_VMALLOC_END; vmemmap = (struct page *)RADIX_VMEMMAP_BASE; ioremap_bot = IOREMAP_BASE; + +#ifdef CONFIG_PCI + pci_io_base = ISA_IO_BASE; +#endif + /* * For now radix also use the same frag size */ diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index fdcc04020636..7c1c89598688 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -69,29 +69,22 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) } static __always_inline -u64 pvclock_get_nsec_offset(const struct pvclock_vcpu_time_info *src) -{ - u64 delta = rdtsc_ordered() - src->tsc_timestamp; - return pvclock_scale_delta(delta, src->tsc_to_system_mul, - src->tsc_shift); -} - -static __always_inline unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, cycle_t *cycles, u8 *flags) { unsigned version; - cycle_t ret, offset; - u8 ret_flags; + cycle_t offset; + u64 delta; version = src->version; + /* Make the latest version visible */ + smp_rmb(); - offset = pvclock_get_nsec_offset(src); - ret = src->system_time + offset; - ret_flags = src->flags; - - *cycles = ret; - *flags = ret_flags; + delta = rdtsc_ordered() - src->tsc_timestamp; + offset = pvclock_scale_delta(delta, src->tsc_to_system_mul, + src->tsc_shift); + *cycles = src->system_time + offset; + *flags = src->flags; return version; } diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 99bfc025111d..06c58ce46762 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -61,11 +61,16 @@ void pvclock_resume(void) u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src) { unsigned version; - cycle_t ret; u8 flags; do { - version = __pvclock_read_cycles(src, &ret, &flags); + version = src->version; + /* Make the latest version visible */ + smp_rmb(); + + flags = src->flags; + /* Make sure that the version double-check is last. */ + smp_rmb(); } while ((src->version & 1) || version != src->version); return flags & valid_flags; @@ -80,6 +85,8 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) do { version = __pvclock_read_cycles(src, &ret, &flags); + /* Make sure that the version double-check is last. */ + smp_rmb(); } while ((src->version & 1) || version != src->version); if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) { diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index bbb5b283ff63..a397200281c1 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1310,7 +1310,8 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) /* __delay is delay_tsc whenever the hardware has TSC, thus always. */ if (guest_tsc < tsc_deadline) - __delay(tsc_deadline - guest_tsc); + __delay(min(tsc_deadline - guest_tsc, + nsec_to_cycles(vcpu, lapic_timer_advance_ns))); } static void start_apic_timer(struct kvm_lapic *apic) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 003618e324ce..64a79f271276 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6671,7 +6671,13 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, /* Checks for #GP/#SS exceptions. */ exn = false; - if (is_protmode(vcpu)) { + if (is_long_mode(vcpu)) { + /* Long mode: #GP(0)/#SS(0) if the memory address is in a + * non-canonical form. This is the only check on the memory + * destination for long mode! + */ + exn = is_noncanonical_address(*ret); + } else if (is_protmode(vcpu)) { /* Protected mode: apply checks for segment validity in the * following order: * - segment type check (#GP(0) may be thrown) @@ -6688,17 +6694,10 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu, * execute-only code segment */ exn = ((s.type & 0xa) == 8); - } - if (exn) { - kvm_queue_exception_e(vcpu, GP_VECTOR, 0); - return 1; - } - if (is_long_mode(vcpu)) { - /* Long mode: #GP(0)/#SS(0) if the memory address is in a - * non-canonical form. This is an only check for long mode. - */ - exn = is_noncanonical_address(*ret); - } else if (is_protmode(vcpu)) { + if (exn) { + kvm_queue_exception_e(vcpu, GP_VECTOR, 0); + return 1; + } /* Protected mode: #GP(0)/#SS(0) if the segment is unusable. */ exn = (s.unusable != 0); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 902d9da12392..7da5dd2057a9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1244,12 +1244,6 @@ static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0); static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz); static unsigned long max_tsc_khz; -static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) -{ - return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult, - vcpu->arch.virtual_tsc_shift); -} - static u32 adjust_tsc_khz(u32 khz, s32 ppm) { u64 v = (u64)khz * (1000000 + ppm); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 7ce3634ab5fe..a82ca466b62e 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -2,6 +2,7 @@ #define ARCH_X86_KVM_X86_H #include <linux/kvm_host.h> +#include <asm/pvclock.h> #include "kvm_cache_regs.h" #define MSR_IA32_CR_PAT_DEFAULT 0x0007040600070406ULL @@ -195,6 +196,12 @@ extern unsigned int lapic_timer_advance_ns; extern struct static_key kvm_no_apic_vcpu; +static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) +{ + return pvclock_scale_delta(nsec, vcpu->arch.virtual_tsc_mult, + vcpu->arch.virtual_tsc_shift); +} + /* Same "calling convention" as do_div: * - divide (n << 32) by base * - put result in n diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 2215fc847fa9..ac6ddcc080d4 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -928,7 +928,7 @@ static ssize_t format_show(struct device *dev, { struct acpi_nfit_control_region *dcr = to_nfit_dcr(dev); - return sprintf(buf, "0x%04x\n", be16_to_cpu(dcr->code)); + return sprintf(buf, "0x%04x\n", le16_to_cpu(dcr->code)); } static DEVICE_ATTR_RO(format); @@ -961,8 +961,8 @@ static ssize_t format1_show(struct device *dev, continue; if (nfit_dcr->dcr->code == dcr->code) continue; - rc = sprintf(buf, "%#x\n", - be16_to_cpu(nfit_dcr->dcr->code)); + rc = sprintf(buf, "0x%04x\n", + le16_to_cpu(nfit_dcr->dcr->code)); break; } if (rc != ENXIO) @@ -1131,11 +1131,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, /* * Until standardization materializes we need to consider up to 3 - * different command sets. Note, that checking for function0 (bit0) - * tells us if any commands are reachable through this uuid. + * different command sets. Note, that checking for zero functions + * tells us if any commands might be reachable through this uuid. */ for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_HPE2; i++) - if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1)) + if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 0)) break; /* limit the supported commands to those that are publicly documented */ diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h index 11cb38348aef..02b9ea1e8d2e 100644 --- a/drivers/acpi/nfit.h +++ b/drivers/acpi/nfit.h @@ -53,12 +53,12 @@ enum nfit_uuids { }; /* - * Region format interface codes are stored as an array of bytes in the - * NFIT DIMM Control Region structure + * Region format interface codes are stored with the interface as the + * LSB and the function as the MSB. */ -#define NFIT_FIC_BYTE cpu_to_be16(0x101) /* byte-addressable energy backed */ -#define NFIT_FIC_BLK cpu_to_be16(0x201) /* block-addressable non-energy backed */ -#define NFIT_FIC_BYTEN cpu_to_be16(0x301) /* byte-addressable non-energy backed */ +#define NFIT_FIC_BYTE cpu_to_le16(0x101) /* byte-addressable energy backed */ +#define NFIT_FIC_BLK cpu_to_le16(0x201) /* block-addressable non-energy backed */ +#define NFIT_FIC_BYTEN cpu_to_le16(0x301) /* byte-addressable non-energy backed */ enum { NFIT_BLK_READ_FLUSH = 1, diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c index 8fc7323ed3e8..4ed4061813e6 100644 --- a/drivers/acpi/pci_link.c +++ b/drivers/acpi/pci_link.c @@ -839,7 +839,7 @@ void acpi_penalize_isa_irq(int irq, int active) { if ((irq >= 0) && (irq < ARRAY_SIZE(acpi_isa_irq_penalty))) acpi_isa_irq_penalty[irq] = acpi_irq_get_penalty(irq) + - active ? PIRQ_PENALTY_ISA_USED : PIRQ_PENALTY_PCI_USING; + (active ? PIRQ_PENALTY_ISA_USED : PIRQ_PENALTY_PCI_USING); } bool acpi_isa_irq_available(int irq) diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c index 22c09952e177..b4de130f2d57 100644 --- a/drivers/acpi/utils.c +++ b/drivers/acpi/utils.c @@ -680,9 +680,6 @@ bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs) u64 mask = 0; union acpi_object *obj; - if (funcs == 0) - return false; - obj = acpi_evaluate_dsm(handle, uuid, rev, 0, NULL); if (!obj) return false; @@ -695,6 +692,9 @@ bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs) mask |= (((u64)obj->buffer.pointer[i]) << (i * 8)); ACPI_FREE(obj); + if (funcs == 0) + return true; + /* * Bit 0 indicates whether there's support for any functions other than * function 0 for the specified UUID and revision. diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c index 527bbd595e37..5fc81e240c24 100644 --- a/drivers/atm/horizon.c +++ b/drivers/atm/horizon.c @@ -2795,9 +2795,7 @@ static int hrz_probe(struct pci_dev *pci_dev, dev->atm_dev->ci_range.vpi_bits = vpi_bits; dev->atm_dev->ci_range.vci_bits = 10-vpi_bits; - init_timer(&dev->housekeeping); - dev->housekeeping.function = do_housekeeping; - dev->housekeeping.data = (unsigned long) dev; + setup_timer(&dev->housekeeping, do_housekeeping, (unsigned long) dev); mod_timer(&dev->housekeeping, jiffies); out: diff --git a/drivers/clk/clk-oxnas.c b/drivers/clk/clk-oxnas.c index efba7d4dbcfc..79bcb2e42060 100644 --- a/drivers/clk/clk-oxnas.c +++ b/drivers/clk/clk-oxnas.c @@ -144,9 +144,9 @@ static int oxnas_stdclk_probe(struct platform_device *pdev) return -ENOMEM; regmap = syscon_node_to_regmap(of_get_parent(np)); - if (!regmap) { + if (IS_ERR(regmap)) { dev_err(&pdev->dev, "failed to have parent regmap\n"); - return -EINVAL; + return PTR_ERR(regmap); } for (i = 0; i < ARRAY_SIZE(clk_oxnas_init); i++) { diff --git a/drivers/clk/rockchip/clk-cpu.c b/drivers/clk/rockchip/clk-cpu.c index 4bb130cd0062..05b3d73bfefa 100644 --- a/drivers/clk/rockchip/clk-cpu.c +++ b/drivers/clk/rockchip/clk-cpu.c @@ -321,9 +321,9 @@ struct clk *rockchip_clk_register_cpuclk(const char *name, } cclk = clk_register(NULL, &cpuclk->hw); - if (IS_ERR(clk)) { + if (IS_ERR(cclk)) { pr_err("%s: could not register cpuclk %s\n", __func__, name); - ret = PTR_ERR(clk); + ret = PTR_ERR(cclk); goto free_rate_table; } diff --git a/drivers/clk/rockchip/clk-mmc-phase.c b/drivers/clk/rockchip/clk-mmc-phase.c index bc856f21f6b2..077fcdc7908b 100644 --- a/drivers/clk/rockchip/clk-mmc-phase.c +++ b/drivers/clk/rockchip/clk-mmc-phase.c @@ -41,8 +41,6 @@ static unsigned long rockchip_mmc_recalc(struct clk_hw *hw, #define ROCKCHIP_MMC_DEGREE_MASK 0x3 #define ROCKCHIP_MMC_DELAYNUM_OFFSET 2 #define ROCKCHIP_MMC_DELAYNUM_MASK (0xff << ROCKCHIP_MMC_DELAYNUM_OFFSET) -#define ROCKCHIP_MMC_INIT_STATE_RESET 0x1 -#define ROCKCHIP_MMC_INIT_STATE_SHIFT 1 #define PSECS_PER_SEC 1000000000000LL @@ -154,6 +152,7 @@ struct clk *rockchip_clk_register_mmc(const char *name, return ERR_PTR(-ENOMEM); init.name = name; + init.flags = 0; init.num_parents = num_parents; init.parent_names = parent_names; init.ops = &rockchip_mmc_clk_ops; @@ -162,15 +161,6 @@ struct clk *rockchip_clk_register_mmc(const char *name, mmc_clock->reg = reg; mmc_clock->shift = shift; - /* - * Assert init_state to soft reset the CLKGEN - * for mmc tuning phase and degree - */ - if (mmc_clock->shift == ROCKCHIP_MMC_INIT_STATE_SHIFT) - writel(HIWORD_UPDATE(ROCKCHIP_MMC_INIT_STATE_RESET, - ROCKCHIP_MMC_INIT_STATE_RESET, - mmc_clock->shift), mmc_clock->reg); - clk = clk_register(NULL, &mmc_clock->hw); if (IS_ERR(clk)) kfree(mmc_clock); diff --git a/drivers/clk/rockchip/clk-rk3399.c b/drivers/clk/rockchip/clk-rk3399.c index 291543f52caa..8059a8d3ea36 100644 --- a/drivers/clk/rockchip/clk-rk3399.c +++ b/drivers/clk/rockchip/clk-rk3399.c @@ -832,9 +832,9 @@ static struct rockchip_clk_branch rk3399_clk_branches[] __initdata = { RK3399_CLKGATE_CON(13), 1, GFLAGS), /* perihp */ - GATE(0, "cpll_aclk_perihp_src", "gpll", CLK_IGNORE_UNUSED, + GATE(0, "cpll_aclk_perihp_src", "cpll", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(5), 0, GFLAGS), - GATE(0, "gpll_aclk_perihp_src", "cpll", CLK_IGNORE_UNUSED, + GATE(0, "gpll_aclk_perihp_src", "gpll", CLK_IGNORE_UNUSED, RK3399_CLKGATE_CON(5), 1, GFLAGS), COMPOSITE(ACLK_PERIHP, "aclk_perihp", mux_aclk_perihp_p, CLK_IGNORE_UNUSED, RK3399_CLKSEL_CON(14), 7, 1, MFLAGS, 0, 5, DFLAGS, @@ -1466,6 +1466,8 @@ static struct rockchip_clk_branch rk3399_clk_pmu_branches[] __initdata = { static const char *const rk3399_cru_critical_clocks[] __initconst = { "aclk_cci_pre", + "aclk_gic", + "aclk_gic_noc", "pclk_perilp0", "pclk_perilp0", "hclk_perilp0", @@ -1508,6 +1510,7 @@ static void __init rk3399_clk_init(struct device_node *np) ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); if (IS_ERR(ctx)) { pr_err("%s: rockchip clk init failed\n", __func__); + iounmap(reg_base); return; } @@ -1553,6 +1556,7 @@ static void __init rk3399_pmu_clk_init(struct device_node *np) ctx = rockchip_clk_init(np, reg_base, CLKPMU_NR_CLKS); if (IS_ERR(ctx)) { pr_err("%s: rockchip pmu clk init failed\n", __func__); + iounmap(reg_base); return; } diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index b02f9c606e0b..a782ce87715c 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -22,7 +22,6 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include <linux/module.h> #include <linux/kernel.h> #include <linux/ktime.h> #include <linux/init.h> @@ -390,5 +389,4 @@ static int __init cn_proc_init(void) } return 0; } - -module_init(cn_proc_init); +device_initcall(cn_proc_init); diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 3646b143bbf5..0bb44d5b5df4 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -79,15 +79,16 @@ static const struct of_device_id machines[] __initconst = { static int __init cpufreq_dt_platdev_init(void) { struct device_node *np = of_find_node_by_path("/"); + const struct of_device_id *match; if (!np) return -ENODEV; - if (!of_match_node(machines, np)) + match = of_match_node(machines, np); + of_node_put(np); + if (!match) return -ENODEV; - of_node_put(of_root); - return PTR_ERR_OR_ZERO(platform_device_register_simple("cpufreq-dt", -1, NULL, 0)); } diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 9009295f5134..5617c7087d77 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2261,6 +2261,10 @@ int cpufreq_update_policy(unsigned int cpu) * -> ask driver for current freq and notify governors about a change */ if (cpufreq_driver->get && !cpufreq_driver->setpolicy) { + if (cpufreq_suspended) { + ret = -EAGAIN; + goto unlock; + } new_policy.cur = cpufreq_update_current_freq(policy); if (WARN_ON(!new_policy.cur)) { ret = -EIO; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index fe9dc17ea873..1fa1a32928d7 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1400,6 +1400,9 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num) { struct cpudata *cpu = all_cpu_data[cpu_num]; + if (cpu->update_util_set) + return; + /* Prevent intel_pstate_update_util() from using stale data. */ cpu->sample.time = 0; cpufreq_add_update_util_hook(cpu_num, &cpu->update_util, @@ -1440,8 +1443,6 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) if (!policy->cpuinfo.max_freq) return -ENODEV; - intel_pstate_clear_update_util_hook(policy->cpu); - pr_debug("set_policy cpuinfo.max %u policy->max %u\n", policy->cpuinfo.max_freq, policy->max); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index e19520c4b4b6..d9c88d13f8db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -1106,6 +1106,10 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work) if (fences == 0 && handles == 0) { if (adev->pm.dpm_enabled) { amdgpu_dpm_enable_uvd(adev, false); + /* just work around for uvd clock remain high even + * when uvd dpm disabled on Polaris10 */ + if (adev->asic_type == CHIP_POLARIS10) + amdgpu_asic_set_uvd_clocks(adev, 0, 0); } else { amdgpu_asic_set_uvd_clocks(adev, 0, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 1a5cbaff1e34..b2ebd4fef6cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -47,6 +47,8 @@ #include "dce/dce_10_0_d.h" #include "dce/dce_10_0_sh_mask.h" +#include "smu/smu_7_1_3_d.h" + #define GFX8_NUM_GFX_RINGS 1 #define GFX8_NUM_COMPUTE_RINGS 8 @@ -693,6 +695,7 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) amdgpu_program_register_sequence(adev, polaris10_golden_common_all, (const u32)ARRAY_SIZE(polaris10_golden_common_all)); + WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); break; case CHIP_CARRIZO: amdgpu_program_register_sequence(adev, diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c index 64ee78f7d41e..ec2a7ada346a 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c @@ -98,6 +98,7 @@ #define PCIE_BUS_CLK 10000 #define TCLK (PCIE_BUS_CLK / 10) +#define CEILING_UCHAR(double) ((double-(uint8_t)(double)) > 0 ? (uint8_t)(double+1) : (uint8_t)(double)) static const uint16_t polaris10_clock_stretcher_lookup_table[2][4] = { {600, 1050, 3, 0}, {600, 1050, 6, 1} }; @@ -1422,22 +1423,19 @@ static int polaris10_populate_smc_acpi_level(struct pp_hwmgr *hwmgr, table->ACPILevel.Flags &= ~PPSMC_SWSTATE_FLAG_DC; - if (!data->sclk_dpm_key_disabled) { - /* Get MinVoltage and Frequency from DPM0, - * already converted to SMC_UL */ - sclk_frequency = data->dpm_table.sclk_table.dpm_levels[0].value; - result = polaris10_get_dependency_volt_by_clk(hwmgr, - table_info->vdd_dep_on_sclk, - table->ACPILevel.SclkFrequency, - &table->ACPILevel.MinVoltage, &mvdd); - PP_ASSERT_WITH_CODE((0 == result), - "Cannot find ACPI VDDC voltage value " - "in Clock Dependency Table", ); - } else { - sclk_frequency = data->vbios_boot_state.sclk_bootup_value; - table->ACPILevel.MinVoltage = - data->vbios_boot_state.vddc_bootup_value * VOLTAGE_SCALE; - } + + /* Get MinVoltage and Frequency from DPM0, + * already converted to SMC_UL */ + sclk_frequency = data->dpm_table.sclk_table.dpm_levels[0].value; + result = polaris10_get_dependency_volt_by_clk(hwmgr, + table_info->vdd_dep_on_sclk, + sclk_frequency, + &table->ACPILevel.MinVoltage, &mvdd); + PP_ASSERT_WITH_CODE((0 == result), + "Cannot find ACPI VDDC voltage value " + "in Clock Dependency Table", + ); + result = polaris10_calculate_sclk_params(hwmgr, sclk_frequency, &(table->ACPILevel.SclkSetting)); PP_ASSERT_WITH_CODE(result == 0, "Error retrieving Engine Clock dividers from VBIOS.", return result); @@ -1462,24 +1460,18 @@ static int polaris10_populate_smc_acpi_level(struct pp_hwmgr *hwmgr, CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw1_frac); CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Sclk_ss_slew_rate); - if (!data->mclk_dpm_key_disabled) { - /* Get MinVoltage and Frequency from DPM0, already converted to SMC_UL */ - table->MemoryACPILevel.MclkFrequency = - data->dpm_table.mclk_table.dpm_levels[0].value; - result = polaris10_get_dependency_volt_by_clk(hwmgr, - table_info->vdd_dep_on_mclk, - table->MemoryACPILevel.MclkFrequency, - &table->MemoryACPILevel.MinVoltage, &mvdd); - PP_ASSERT_WITH_CODE((0 == result), - "Cannot find ACPI VDDCI voltage value " - "in Clock Dependency Table", - ); - } else { - table->MemoryACPILevel.MclkFrequency = - data->vbios_boot_state.mclk_bootup_value; - table->MemoryACPILevel.MinVoltage = - data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE; - } + + /* Get MinVoltage and Frequency from DPM0, already converted to SMC_UL */ + table->MemoryACPILevel.MclkFrequency = + data->dpm_table.mclk_table.dpm_levels[0].value; + result = polaris10_get_dependency_volt_by_clk(hwmgr, + table_info->vdd_dep_on_mclk, + table->MemoryACPILevel.MclkFrequency, + &table->MemoryACPILevel.MinVoltage, &mvdd); + PP_ASSERT_WITH_CODE((0 == result), + "Cannot find ACPI VDDCI voltage value " + "in Clock Dependency Table", + ); us_mvdd = 0; if ((POLARIS10_VOLTAGE_CONTROL_NONE == data->mvdd_control) || @@ -1524,6 +1516,7 @@ static int polaris10_populate_smc_vce_level(struct pp_hwmgr *hwmgr, struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = table_info->mm_dep_table; struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend); + uint32_t vddci; table->VceLevelCount = (uint8_t)(mm_table->count); table->VceBootLevel = 0; @@ -1533,9 +1526,18 @@ static int polaris10_populate_smc_vce_level(struct pp_hwmgr *hwmgr, table->VceLevel[count].MinVoltage = 0; table->VceLevel[count].MinVoltage |= (mm_table->entries[count].vddc * VOLTAGE_SCALE) << VDDC_SHIFT; + + if (POLARIS10_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) + vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table), + mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); + else if (POLARIS10_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) + vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA; + else + vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT; + + table->VceLevel[count].MinVoltage |= - ((mm_table->entries[count].vddc - data->vddc_vddci_delta) * - VOLTAGE_SCALE) << VDDCI_SHIFT; + (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; table->VceLevel[count].MinVoltage |= 1 << PHASES_SHIFT; /*retrieve divider value for VBIOS */ @@ -1564,6 +1566,7 @@ static int polaris10_populate_smc_samu_level(struct pp_hwmgr *hwmgr, struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = table_info->mm_dep_table; struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend); + uint32_t vddci; table->SamuBootLevel = 0; table->SamuLevelCount = (uint8_t)(mm_table->count); @@ -1574,8 +1577,16 @@ static int polaris10_populate_smc_samu_level(struct pp_hwmgr *hwmgr, table->SamuLevel[count].Frequency = mm_table->entries[count].samclock; table->SamuLevel[count].MinVoltage |= (mm_table->entries[count].vddc * VOLTAGE_SCALE) << VDDC_SHIFT; - table->SamuLevel[count].MinVoltage |= ((mm_table->entries[count].vddc - - data->vddc_vddci_delta) * VOLTAGE_SCALE) << VDDCI_SHIFT; + + if (POLARIS10_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) + vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table), + mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); + else if (POLARIS10_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) + vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA; + else + vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT; + + table->SamuLevel[count].MinVoltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; table->SamuLevel[count].MinVoltage |= 1 << PHASES_SHIFT; /* retrieve divider value for VBIOS */ @@ -1658,6 +1669,7 @@ static int polaris10_populate_smc_uvd_level(struct pp_hwmgr *hwmgr, struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table = table_info->mm_dep_table; struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend); + uint32_t vddci; table->UvdLevelCount = (uint8_t)(mm_table->count); table->UvdBootLevel = 0; @@ -1668,8 +1680,16 @@ static int polaris10_populate_smc_uvd_level(struct pp_hwmgr *hwmgr, table->UvdLevel[count].DclkFrequency = mm_table->entries[count].dclk; table->UvdLevel[count].MinVoltage |= (mm_table->entries[count].vddc * VOLTAGE_SCALE) << VDDC_SHIFT; - table->UvdLevel[count].MinVoltage |= ((mm_table->entries[count].vddc - - data->vddc_vddci_delta) * VOLTAGE_SCALE) << VDDCI_SHIFT; + + if (POLARIS10_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control) + vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table), + mm_table->entries[count].vddc - VDDC_VDDCI_DELTA); + else if (POLARIS10_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control) + vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA; + else + vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT; + + table->UvdLevel[count].MinVoltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT; table->UvdLevel[count].MinVoltage |= 1 << PHASES_SHIFT; /* retrieve divider value for VBIOS */ @@ -1690,8 +1710,8 @@ static int polaris10_populate_smc_uvd_level(struct pp_hwmgr *hwmgr, CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].VclkFrequency); CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].DclkFrequency); CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].MinVoltage); - } + return result; } @@ -1787,24 +1807,32 @@ static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr) ro = efuse * (max -min)/255 + min; - /* Populate Sclk_CKS_masterEn0_7 and Sclk_voltageOffset */ + /* Populate Sclk_CKS_masterEn0_7 and Sclk_voltageOffset + * there is a little difference in calculating + * volt_with_cks with windows */ for (i = 0; i < sclk_table->count; i++) { data->smc_state_table.Sclk_CKS_masterEn0_7 |= sclk_table->entries[i].cks_enable << i; - - volt_without_cks = (uint32_t)(((ro - 40) * 1000 - 2753594 - sclk_table->entries[i].clk/100 * 136418 /1000) / \ - (sclk_table->entries[i].clk/100 * 1132925 /10000 - 242418)/100); - - volt_with_cks = (uint32_t)((ro * 1000 -2396351 - sclk_table->entries[i].clk/100 * 329021/1000) / \ - (sclk_table->entries[i].clk/10000 * 649434 /1000 - 18005)/10); + if (hwmgr->chip_id == CHIP_POLARIS10) { + volt_without_cks = (uint32_t)((2753594000 + (sclk_table->entries[i].clk/100) * 136418 -(ro - 70) * 1000000) / \ + (2424180 - (sclk_table->entries[i].clk/100) * 1132925/1000)); + volt_with_cks = (uint32_t)((279720200 + sclk_table->entries[i].clk * 3232 - (ro - 65) * 100000000) / \ + (252248000 - sclk_table->entries[i].clk/100 * 115764)); + } else { + volt_without_cks = (uint32_t)((2416794800 + (sclk_table->entries[i].clk/100) * 1476925/10 -(ro - 50) * 1000000) / \ + (2625416 - (sclk_table->entries[i].clk/100) * 12586807/10000)); + volt_with_cks = (uint32_t)((2999656000 + sclk_table->entries[i].clk * 392803/100 - (ro - 44) * 1000000) / \ + (3422454 - sclk_table->entries[i].clk/100 * 18886376/10000)); + } if (volt_without_cks >= volt_with_cks) - volt_offset = (uint8_t)(((volt_without_cks - volt_with_cks + - sclk_table->entries[i].cks_voffset) * 100 / 625) + 1); + volt_offset = (uint8_t)CEILING_UCHAR((volt_without_cks - volt_with_cks + + sclk_table->entries[i].cks_voffset) * 100 / 625); data->smc_state_table.Sclk_voltageOffset[i] = volt_offset; } + data->smc_state_table.LdoRefSel = (table_info->cac_dtp_table->ucCKS_LDO_REFSEL != 0) ? table_info->cac_dtp_table->ucCKS_LDO_REFSEL : 6; /* Populate CKS Lookup Table */ if (stretch_amount == 1 || stretch_amount == 2 || stretch_amount == 5) stretch_amount2 = 0; @@ -2487,6 +2515,8 @@ int polaris10_enable_dpm_tasks(struct pp_hwmgr *hwmgr) PP_ASSERT_WITH_CODE((0 == tmp_result), "Failed to enable VR hot GPIO interrupt!", result = tmp_result); + smum_send_msg_to_smc(hwmgr->smumgr, (PPSMC_Msg)PPSMC_HasDisplay); + tmp_result = polaris10_enable_sclk_control(hwmgr); PP_ASSERT_WITH_CODE((0 == tmp_result), "Failed to enable SCLK control!", result = tmp_result); @@ -2913,6 +2943,31 @@ static int polaris10_set_private_data_based_on_pptable(struct pp_hwmgr *hwmgr) return 0; } +int polaris10_patch_voltage_workaround(struct pp_hwmgr *hwmgr) +{ + struct phm_ppt_v1_information *table_info = + (struct phm_ppt_v1_information *)(hwmgr->pptable); + struct phm_ppt_v1_clock_voltage_dependency_table *dep_mclk_table = + table_info->vdd_dep_on_mclk; + struct phm_ppt_v1_voltage_lookup_table *lookup_table = + table_info->vddc_lookup_table; + uint32_t i; + + if (hwmgr->chip_id == CHIP_POLARIS10 && hwmgr->hw_revision == 0xC7) { + if (lookup_table->entries[dep_mclk_table->entries[dep_mclk_table->count-1].vddInd].us_vdd >= 1000) + return 0; + + for (i = 0; i < lookup_table->count; i++) { + if (lookup_table->entries[i].us_vdd < 0xff01 && lookup_table->entries[i].us_vdd >= 1000) { + dep_mclk_table->entries[dep_mclk_table->count-1].vddInd = (uint8_t) i; + return 0; + } + } + } + return 0; +} + + int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr) { struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend); @@ -2990,6 +3045,7 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr) polaris10_set_features_platform_caps(hwmgr); + polaris10_patch_voltage_workaround(hwmgr); polaris10_init_dpm_defaults(hwmgr); /* Get leakage voltage based on leakage ID. */ @@ -4359,6 +4415,15 @@ static int polaris10_notify_link_speed_change_after_state_change( return 0; } +static int polaris10_notify_smc_display(struct pp_hwmgr *hwmgr) +{ + struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend); + + smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, + (PPSMC_Msg)PPSMC_MSG_SetVBITimeout, data->frame_time_x2); + return (smum_send_msg_to_smc(hwmgr->smumgr, (PPSMC_Msg)PPSMC_HasDisplay) == 0) ? 0 : -EINVAL; +} + static int polaris10_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *input) { int tmp_result, result = 0; @@ -4407,6 +4472,11 @@ static int polaris10_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *i "Failed to program memory timing parameters!", result = tmp_result); + tmp_result = polaris10_notify_smc_display(hwmgr); + PP_ASSERT_WITH_CODE((0 == tmp_result), + "Failed to notify smc display settings!", + result = tmp_result); + tmp_result = polaris10_unfreeze_sclk_mclk_dpm(hwmgr); PP_ASSERT_WITH_CODE((0 == tmp_result), "Failed to unfreeze SCLK MCLK DPM!", @@ -4441,6 +4511,7 @@ static int polaris10_set_max_fan_pwm_output(struct pp_hwmgr *hwmgr, uint16_t us_ PPSMC_MSG_SetFanPwmMax, us_max_fan_pwm); } + int polaris10_notify_smc_display_change(struct pp_hwmgr *hwmgr, bool has_display) { PPSMC_Msg msg = has_display ? (PPSMC_Msg)PPSMC_HasDisplay : (PPSMC_Msg)PPSMC_NoDisplay; @@ -4460,8 +4531,6 @@ int polaris10_notify_smc_display_config_after_ps_adjustment(struct pp_hwmgr *hwm if (num_active_displays > 1) /* to do && (pHwMgr->pPECI->displayConfiguration.bMultiMonitorInSync != TRUE)) */ polaris10_notify_smc_display_change(hwmgr, false); - else - polaris10_notify_smc_display_change(hwmgr, true); return 0; } @@ -4502,6 +4571,8 @@ int polaris10_program_display_gap(struct pp_hwmgr *hwmgr) frame_time_in_us = 1000000 / refresh_rate; pre_vbi_time_in_us = frame_time_in_us - 200 - mode_info.vblank_time_us; + data->frame_time_x2 = frame_time_in_us * 2 / 100; + display_gap2 = pre_vbi_time_in_us * (ref_clock / 100); cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_DISPLAY_GAP_CNTL2, display_gap2); @@ -4510,8 +4581,6 @@ int polaris10_program_display_gap(struct pp_hwmgr *hwmgr) cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, data->soft_regs_start + offsetof(SMU74_SoftRegisters, VBlankTimeout), (frame_time_in_us - pre_vbi_time_in_us)); - polaris10_notify_smc_display_change(hwmgr, num_active_displays != 0); - return 0; } @@ -4623,7 +4692,7 @@ int polaris10_upload_mc_firmware(struct pp_hwmgr *hwmgr) return 0; } - data->need_long_memory_training = true; + data->need_long_memory_training = false; /* * PPMCME_FirmwareDescriptorEntry *pfd = NULL; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h index d717789441f5..afc3434822d1 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h @@ -315,6 +315,7 @@ struct polaris10_hwmgr { uint32_t avfs_vdroop_override_setting; bool apply_avfs_cks_off_voltage; + uint32_t frame_time_x2; }; /* To convert to Q8.8 format for firmware */ diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index 28f571449495..77e8e33d5870 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -411,6 +411,8 @@ struct phm_cac_tdp_table { uint8_t ucVr_I2C_Line; uint8_t ucPlx_I2C_address; uint8_t ucPlx_I2C_Line; + uint32_t usBoostPowerLimit; + uint8_t ucCKS_LDO_REFSEL; }; struct phm_ppm_table { diff --git a/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h index d41d37ab5b7c..b8f4b73c322e 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h +++ b/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h @@ -392,6 +392,8 @@ typedef uint16_t PPSMC_Result; #define PPSMC_MSG_SetGpuPllDfsForSclk ((uint16_t) 0x300) #define PPSMC_MSG_Didt_Block_Function ((uint16_t) 0x301) +#define PPSMC_MSG_SetVBITimeout ((uint16_t) 0x306) + #define PPSMC_MSG_SecureSRBMWrite ((uint16_t) 0x600) #define PPSMC_MSG_SecureSRBMRead ((uint16_t) 0x601) #define PPSMC_MSG_SetAddress ((uint16_t) 0x800) diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h b/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h index b85ff5400e57..899d6d8108c2 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h @@ -270,7 +270,8 @@ struct SMU74_Discrete_DpmTable { uint8_t BootPhases; uint8_t VRHotLevel; - uint8_t Reserved1[3]; + uint8_t LdoRefSel; + uint8_t Reserved1[2]; uint16_t FanStartTemperature; uint16_t FanStopTemperature; uint16_t MaxVoltage; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 32690332d441..103546834b60 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2365,16 +2365,16 @@ static int i915_ppgtt_info(struct seq_file *m, void *data) task = get_pid_task(file->pid, PIDTYPE_PID); if (!task) { ret = -ESRCH; - goto out_put; + goto out_unlock; } seq_printf(m, "\nproc: %s\n", task->comm); put_task_struct(task); idr_for_each(&file_priv->context_idr, per_file_ctx, (void *)(unsigned long)m); } +out_unlock: mutex_unlock(&dev->filelist_mutex); -out_put: intel_runtime_pm_put(dev_priv); mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 56a1637c864f..04452cf3eae8 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8447,16 +8447,16 @@ static void lpt_reset_fdi_mphy(struct drm_i915_private *dev_priv) tmp |= FDI_MPHY_IOSFSB_RESET_CTL; I915_WRITE(SOUTH_CHICKEN2, tmp); - if (wait_for_atomic_us(I915_READ(SOUTH_CHICKEN2) & - FDI_MPHY_IOSFSB_RESET_STATUS, 100)) + if (wait_for_us(I915_READ(SOUTH_CHICKEN2) & + FDI_MPHY_IOSFSB_RESET_STATUS, 100)) DRM_ERROR("FDI mPHY reset assert timeout\n"); tmp = I915_READ(SOUTH_CHICKEN2); tmp &= ~FDI_MPHY_IOSFSB_RESET_CTL; I915_WRITE(SOUTH_CHICKEN2, tmp); - if (wait_for_atomic_us((I915_READ(SOUTH_CHICKEN2) & - FDI_MPHY_IOSFSB_RESET_STATUS) == 0, 100)) + if (wait_for_us((I915_READ(SOUTH_CHICKEN2) & + FDI_MPHY_IOSFSB_RESET_STATUS) == 0, 100)) DRM_ERROR("FDI mPHY reset de-assert timeout\n"); } @@ -9440,8 +9440,8 @@ static void hsw_disable_lcpll(struct drm_i915_private *dev_priv, val |= LCPLL_CD_SOURCE_FCLK; I915_WRITE(LCPLL_CTL, val); - if (wait_for_atomic_us(I915_READ(LCPLL_CTL) & - LCPLL_CD_SOURCE_FCLK_DONE, 1)) + if (wait_for_us(I915_READ(LCPLL_CTL) & + LCPLL_CD_SOURCE_FCLK_DONE, 1)) DRM_ERROR("Switching to FCLK failed\n"); val = I915_READ(LCPLL_CTL); @@ -9514,8 +9514,8 @@ static void hsw_restore_lcpll(struct drm_i915_private *dev_priv) val &= ~LCPLL_CD_SOURCE_FCLK; I915_WRITE(LCPLL_CTL, val); - if (wait_for_atomic_us((I915_READ(LCPLL_CTL) & - LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) + if (wait_for_us((I915_READ(LCPLL_CTL) & + LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) DRM_ERROR("Switching back to LCPLL failed\n"); } diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 79cf2d5f5a20..40745e38d438 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -663,7 +663,7 @@ intel_dp_aux_wait_done(struct intel_dp *intel_dp, bool has_aux_irq) done = wait_event_timeout(dev_priv->gmbus_wait_queue, C, msecs_to_jiffies_timeout(10)); else - done = wait_for_atomic(C, 10) == 0; + done = wait_for(C, 10) == 0; if (!done) DRM_ERROR("dp aux hw did not signal timeout (has irq: %i)!\n", has_aux_irq); @@ -4899,13 +4899,15 @@ static void intel_edp_panel_vdd_sanitize(struct intel_dp *intel_dp) void intel_dp_encoder_reset(struct drm_encoder *encoder) { - struct intel_dp *intel_dp; + struct drm_i915_private *dev_priv = to_i915(encoder->dev); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + + if (!HAS_DDI(dev_priv)) + intel_dp->DP = I915_READ(intel_dp->output_reg); if (to_intel_encoder(encoder)->type != INTEL_OUTPUT_EDP) return; - intel_dp = enc_to_intel_dp(encoder); - pps_lock(intel_dp); /* diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index baf6f5584cbd..58f60b27837e 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -1377,8 +1377,8 @@ static void bxt_ddi_pll_enable(struct drm_i915_private *dev_priv, I915_WRITE(BXT_PORT_PLL_ENABLE(port), temp); POSTING_READ(BXT_PORT_PLL_ENABLE(port)); - if (wait_for_atomic_us((I915_READ(BXT_PORT_PLL_ENABLE(port)) & - PORT_PLL_LOCK), 200)) + if (wait_for_us((I915_READ(BXT_PORT_PLL_ENABLE(port)) & PORT_PLL_LOCK), + 200)) DRM_ERROR("PLL %d not locked\n", port); /* diff --git a/drivers/iio/accel/kxsd9.c b/drivers/iio/accel/kxsd9.c index 923f56598d4b..3a9f106787d2 100644 --- a/drivers/iio/accel/kxsd9.c +++ b/drivers/iio/accel/kxsd9.c @@ -81,7 +81,7 @@ static int kxsd9_write_scale(struct iio_dev *indio_dev, int micro) mutex_lock(&st->buf_lock); ret = spi_w8r8(st->us, KXSD9_READ(KXSD9_REG_CTRL_C)); - if (ret) + if (ret < 0) goto error_ret; st->tx[0] = KXSD9_WRITE(KXSD9_REG_CTRL_C); st->tx[1] = (ret & ~KXSD9_FS_MASK) | i; @@ -163,7 +163,7 @@ static int kxsd9_read_raw(struct iio_dev *indio_dev, break; case IIO_CHAN_INFO_SCALE: ret = spi_w8r8(st->us, KXSD9_READ(KXSD9_REG_CTRL_C)); - if (ret) + if (ret < 0) goto error_ret; *val2 = kxsd9_micro_scales[ret & KXSD9_FS_MASK]; ret = IIO_VAL_INT_PLUS_MICRO; diff --git a/drivers/iio/adc/ad7266.c b/drivers/iio/adc/ad7266.c index 21e19b60e2b9..2123f0ac2e2a 100644 --- a/drivers/iio/adc/ad7266.c +++ b/drivers/iio/adc/ad7266.c @@ -396,8 +396,8 @@ static int ad7266_probe(struct spi_device *spi) st = iio_priv(indio_dev); - st->reg = devm_regulator_get(&spi->dev, "vref"); - if (!IS_ERR_OR_NULL(st->reg)) { + st->reg = devm_regulator_get_optional(&spi->dev, "vref"); + if (!IS_ERR(st->reg)) { ret = regulator_enable(st->reg); if (ret) return ret; @@ -408,6 +408,9 @@ static int ad7266_probe(struct spi_device *spi) st->vref_mv = ret / 1000; } else { + /* Any other error indicates that the regulator does exist */ + if (PTR_ERR(st->reg) != -ENODEV) + return PTR_ERR(st->reg); /* Use internal reference */ st->vref_mv = 2500; } diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c index f62b8bd9ad7e..dd6fc6d21f9d 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_acpi.c @@ -56,6 +56,7 @@ static int asus_acpi_get_sensor_info(struct acpi_device *adev, int i; acpi_status status; union acpi_object *cpm; + int ret; status = acpi_evaluate_object(adev->handle, "CNF0", NULL, &buffer); if (ACPI_FAILURE(status)) @@ -82,10 +83,10 @@ static int asus_acpi_get_sensor_info(struct acpi_device *adev, } } } - + ret = cpm->package.count; kfree(buffer.pointer); - return cpm->package.count; + return ret; } static int acpi_i2c_check_resource(struct acpi_resource *ares, void *data) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index b48ad85315dc..dad63f038bb8 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1528,21 +1528,18 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, { struct mlx5_flow_table *ft = ft_prio->flow_table; struct mlx5_ib_flow_handler *handler; + struct mlx5_flow_spec *spec; void *ib_flow = flow_attr + 1; - u8 match_criteria_enable = 0; unsigned int spec_index; - u32 *match_c; - u32 *match_v; u32 action; int err = 0; if (!is_valid_attr(flow_attr)) return ERR_PTR(-EINVAL); - match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); - match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + spec = mlx5_vzalloc(sizeof(*spec)); handler = kzalloc(sizeof(*handler), GFP_KERNEL); - if (!handler || !match_c || !match_v) { + if (!handler || !spec) { err = -ENOMEM; goto free; } @@ -1550,7 +1547,8 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, INIT_LIST_HEAD(&handler->list); for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { - err = parse_flow_attr(match_c, match_v, ib_flow); + err = parse_flow_attr(spec->match_criteria, + spec->match_value, ib_flow); if (err < 0) goto free; @@ -1558,11 +1556,11 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, } /* Outer header support only */ - match_criteria_enable = (!outer_header_zero(match_c)) << 0; + spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria)) + << 0; action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; - handler->rule = mlx5_add_flow_rule(ft, match_criteria_enable, - match_c, match_v, + handler->rule = mlx5_add_flow_rule(ft, spec, action, MLX5_FS_DEFAULT_FLOW_TAG, dst); @@ -1578,8 +1576,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, free: if (err) kfree(handler); - kfree(match_c); - kfree(match_v); + kvfree(spec); return err ? ERR_PTR(err) : handler; } diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 9e0034196e10..d091defc3426 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -1107,13 +1107,13 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, break; } + devid = e->devid; DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %02x:%02x.%x\n", hid, uid, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid)); - devid = e->devid; flags = e->flags; ret = add_acpi_hid_device(hid, uid, &devid, false); diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 10700945994e..cfe410eedaf0 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4607,7 +4607,7 @@ static void free_all_cpu_cached_iovas(unsigned int cpu) if (!iommu) continue; - for (did = 0; did < 0xffff; did++) { + for (did = 0; did < cap_ndoms(iommu->cap); did++) { domain = get_iommu_domain(iommu, did); if (!domain) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index ba764a0835d3..e23001bfcfee 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -420,8 +420,10 @@ retry: /* Try replenishing IOVAs by flushing rcache. */ flushed_rcache = true; + preempt_disable(); for_each_online_cpu(cpu) free_cpu_cached_iovas(cpu, iovad); + preempt_enable(); goto retry; } @@ -749,7 +751,7 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, bool can_insert = false; unsigned long flags; - cpu_rcache = this_cpu_ptr(rcache->cpu_rcaches); + cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches); spin_lock_irqsave(&cpu_rcache->lock, flags); if (!iova_magazine_full(cpu_rcache->loaded)) { @@ -779,6 +781,7 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, iova_magazine_push(cpu_rcache->loaded, iova_pfn); spin_unlock_irqrestore(&cpu_rcache->lock, flags); + put_cpu_ptr(rcache->cpu_rcaches); if (mag_to_free) { iova_magazine_free_pfns(mag_to_free, iovad); @@ -812,7 +815,7 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache, bool has_pfn = false; unsigned long flags; - cpu_rcache = this_cpu_ptr(rcache->cpu_rcaches); + cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches); spin_lock_irqsave(&cpu_rcache->lock, flags); if (!iova_magazine_empty(cpu_rcache->loaded)) { @@ -834,6 +837,7 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache, iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn); spin_unlock_irqrestore(&cpu_rcache->lock, flags); + put_cpu_ptr(rcache->cpu_rcaches); return iova_pfn; } diff --git a/drivers/mfd/max77620.c b/drivers/mfd/max77620.c index 199d261990be..f32fbb8e8129 100644 --- a/drivers/mfd/max77620.c +++ b/drivers/mfd/max77620.c @@ -203,6 +203,7 @@ static int max77620_get_fps_period_reg_value(struct max77620_chip *chip, break; case MAX77620: fps_min_period = MAX77620_FPS_PERIOD_MIN_US; + break; default: return -EINVAL; } @@ -236,6 +237,7 @@ static int max77620_config_fps(struct max77620_chip *chip, break; case MAX77620: fps_max_period = MAX77620_FPS_PERIOD_MAX_US; + break; default: return -EINVAL; } diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index ca81f46ea1aa..edc70ffad660 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -101,11 +101,14 @@ enum ad_link_speed_type { #define MAC_ADDRESS_EQUAL(A, B) \ ether_addr_equal_64bits((const u8 *)A, (const u8 *)B) -static struct mac_addr null_mac_addr = { { 0, 0, 0, 0, 0, 0 } }; +static const u8 null_mac_addr[ETH_ALEN + 2] __long_aligned = { + 0, 0, 0, 0, 0, 0 +}; static u16 ad_ticks_per_sec; static const int ad_delta_in_ticks = (AD_TIMER_INTERVAL * HZ) / 1000; -static const u8 lacpdu_mcast_addr[ETH_ALEN] = MULTICAST_LACPDU_ADDR; +static const u8 lacpdu_mcast_addr[ETH_ALEN + 2] __long_aligned = + MULTICAST_LACPDU_ADDR; /* ================= main 802.3ad protocol functions ================== */ static int ad_lacpdu_send(struct port *port); @@ -1739,7 +1742,7 @@ static void ad_clear_agg(struct aggregator *aggregator) aggregator->is_individual = false; aggregator->actor_admin_aggregator_key = 0; aggregator->actor_oper_aggregator_key = 0; - aggregator->partner_system = null_mac_addr; + eth_zero_addr(aggregator->partner_system.mac_addr_value); aggregator->partner_system_priority = 0; aggregator->partner_oper_aggregator_key = 0; aggregator->receive_state = 0; @@ -1761,7 +1764,7 @@ static void ad_initialize_agg(struct aggregator *aggregator) if (aggregator) { ad_clear_agg(aggregator); - aggregator->aggregator_mac_address = null_mac_addr; + eth_zero_addr(aggregator->aggregator_mac_address.mac_addr_value); aggregator->aggregator_identifier = 0; aggregator->slave = NULL; } diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index c5ac160a8ae9..551f0f8dead3 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -42,13 +42,10 @@ -#ifndef __long_aligned -#define __long_aligned __attribute__((aligned((sizeof(long))))) -#endif -static const u8 mac_bcast[ETH_ALEN] __long_aligned = { +static const u8 mac_bcast[ETH_ALEN + 2] __long_aligned = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; -static const u8 mac_v6_allmcast[ETH_ALEN] __long_aligned = { +static const u8 mac_v6_allmcast[ETH_ALEN + 2] __long_aligned = { 0x33, 0x33, 0x00, 0x00, 0x00, 0x01 }; static const int alb_delta_in_ticks = HZ / ALB_TIMER_TICKS_PER_SEC; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 90157e20357e..b571ed9fd63d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1584,6 +1584,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) } /* check for initial state */ + new_slave->link = BOND_LINK_NOCHANGE; if (bond->params.miimon) { if (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS) { if (bond->params.updelay) { @@ -4137,6 +4138,8 @@ static const struct net_device_ops bond_netdev_ops = { .ndo_add_slave = bond_enslave, .ndo_del_slave = bond_release, .ndo_fix_features = bond_fix_features, + .ndo_neigh_construct = netdev_default_l2upper_neigh_construct, + .ndo_neigh_destroy = netdev_default_l2upper_neigh_destroy, .ndo_bridge_setlink = switchdev_port_bridge_setlink, .ndo_bridge_getlink = switchdev_port_bridge_getlink, .ndo_bridge_dellink = switchdev_port_bridge_dellink, diff --git a/drivers/net/ethernet/arc/emac.h b/drivers/net/ethernet/arc/emac.h index ca562bc034c3..e4feb712d4f2 100644 --- a/drivers/net/ethernet/arc/emac.h +++ b/drivers/net/ethernet/arc/emac.h @@ -134,7 +134,6 @@ struct arc_emac_priv { /* Devices */ struct device *dev; - struct phy_device *phy_dev; struct mii_bus *bus; struct arc_emac_mdio_bus_data bus_data; diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index a3a9392a4954..586bedac457d 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -47,7 +47,7 @@ static inline int arc_emac_tx_avail(struct arc_emac_priv *priv) static void arc_emac_adjust_link(struct net_device *ndev) { struct arc_emac_priv *priv = netdev_priv(ndev); - struct phy_device *phy_dev = priv->phy_dev; + struct phy_device *phy_dev = ndev->phydev; unsigned int reg, state_changed = 0; if (priv->link != phy_dev->link) { @@ -80,46 +80,6 @@ static void arc_emac_adjust_link(struct net_device *ndev) } /** - * arc_emac_get_settings - Get PHY settings. - * @ndev: Pointer to net_device structure. - * @cmd: Pointer to ethtool_cmd structure. - * - * This implements ethtool command for getting PHY settings. If PHY could - * not be found, the function returns -ENODEV. This function calls the - * relevant PHY ethtool API to get the PHY settings. - * Issue "ethtool ethX" under linux prompt to execute this function. - */ -static int arc_emac_get_settings(struct net_device *ndev, - struct ethtool_cmd *cmd) -{ - struct arc_emac_priv *priv = netdev_priv(ndev); - - return phy_ethtool_gset(priv->phy_dev, cmd); -} - -/** - * arc_emac_set_settings - Set PHY settings as passed in the argument. - * @ndev: Pointer to net_device structure. - * @cmd: Pointer to ethtool_cmd structure. - * - * This implements ethtool command for setting various PHY settings. If PHY - * could not be found, the function returns -ENODEV. This function calls the - * relevant PHY ethtool API to set the PHY. - * Issue e.g. "ethtool -s ethX speed 1000" under linux prompt to execute this - * function. - */ -static int arc_emac_set_settings(struct net_device *ndev, - struct ethtool_cmd *cmd) -{ - struct arc_emac_priv *priv = netdev_priv(ndev); - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - return phy_ethtool_sset(priv->phy_dev, cmd); -} - -/** * arc_emac_get_drvinfo - Get EMAC driver information. * @ndev: Pointer to net_device structure. * @info: Pointer to ethtool_drvinfo structure. @@ -137,10 +97,10 @@ static void arc_emac_get_drvinfo(struct net_device *ndev, } static const struct ethtool_ops arc_emac_ethtool_ops = { - .get_settings = arc_emac_get_settings, - .set_settings = arc_emac_set_settings, .get_drvinfo = arc_emac_get_drvinfo, .get_link = ethtool_op_get_link, + .get_link_ksettings = phy_ethtool_get_link_ksettings, + .set_link_ksettings = phy_ethtool_set_link_ksettings, }; #define FIRST_OR_LAST_MASK (FIRST_MASK | LAST_MASK) @@ -403,7 +363,7 @@ static void arc_emac_poll_controller(struct net_device *dev) static int arc_emac_open(struct net_device *ndev) { struct arc_emac_priv *priv = netdev_priv(ndev); - struct phy_device *phy_dev = priv->phy_dev; + struct phy_device *phy_dev = ndev->phydev; int i; phy_dev->autoneg = AUTONEG_ENABLE; @@ -474,7 +434,7 @@ static int arc_emac_open(struct net_device *ndev) /* Enable EMAC */ arc_reg_or(priv, R_CTRL, EN_MASK); - phy_start_aneg(priv->phy_dev); + phy_start_aneg(ndev->phydev); netif_start_queue(ndev); @@ -772,6 +732,7 @@ int arc_emac_probe(struct net_device *ndev, int interface) struct device *dev = ndev->dev.parent; struct resource res_regs; struct device_node *phy_node; + struct phy_device *phydev = NULL; struct arc_emac_priv *priv; const char *mac_addr; unsigned int id, clock_frequency, irq; @@ -887,16 +848,16 @@ int arc_emac_probe(struct net_device *ndev, int interface) goto out_clken; } - priv->phy_dev = of_phy_connect(ndev, phy_node, arc_emac_adjust_link, 0, - interface); - if (!priv->phy_dev) { + phydev = of_phy_connect(ndev, phy_node, arc_emac_adjust_link, 0, + interface); + if (!phydev) { dev_err(dev, "of_phy_connect() failed\n"); err = -ENODEV; goto out_mdio; } dev_info(dev, "connected to %s phy with id 0x%x\n", - priv->phy_dev->drv->name, priv->phy_dev->phy_id); + phydev->drv->name, phydev->phy_id); netif_napi_add(ndev, &priv->napi, arc_emac_poll, ARC_EMAC_NAPI_WEIGHT); @@ -910,8 +871,7 @@ int arc_emac_probe(struct net_device *ndev, int interface) out_netif_api: netif_napi_del(&priv->napi); - phy_disconnect(priv->phy_dev); - priv->phy_dev = NULL; + phy_disconnect(phydev); out_mdio: arc_mdio_remove(priv); out_clken: @@ -925,8 +885,7 @@ int arc_emac_remove(struct net_device *ndev) { struct arc_emac_priv *priv = netdev_priv(ndev); - phy_disconnect(priv->phy_dev); - priv->phy_dev = NULL; + phy_disconnect(ndev->phydev); arc_mdio_remove(priv); unregister_netdev(ndev); netif_napi_del(&priv->napi); diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 834afbb51aff..b2d30863caeb 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -370,7 +370,7 @@ static void bcm_sysport_get_stats(struct net_device *dev, else p = (char *)priv; p += s->stat_offset; - data[i] = *(u32 *)p; + data[i] = *(unsigned long *)p; } } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 673f4d62e73e..70b148a10ec8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3414,7 +3414,8 @@ static int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id) bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_CFG, -1, -1); /* Only RSS support for now TBD: COS & LB */ req.enables = cpu_to_le32(VNIC_CFG_REQ_ENABLES_DFLT_RING_GRP | - VNIC_CFG_REQ_ENABLES_RSS_RULE); + VNIC_CFG_REQ_ENABLES_RSS_RULE | + VNIC_CFG_REQ_ENABLES_MRU); req.rss_rule = cpu_to_le16(vnic->fw_rss_cos_lb_ctx); req.cos_rule = cpu_to_le16(0xffff); if (vnic->flags & BNXT_VNIC_RSS_FLAG) @@ -3951,7 +3952,7 @@ static int bnxt_hwrm_stat_ctx_alloc(struct bnxt *bp) bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_STAT_CTX_ALLOC, -1, -1); - req.update_period_ms = cpu_to_le32(1000); + req.update_period_ms = cpu_to_le32(bp->stats_coal_ticks / 1000); mutex_lock(&bp->hwrm_cmd_lock); for (i = 0; i < bp->cp_nr_rings; i++) { @@ -4025,6 +4026,7 @@ int bnxt_hwrm_func_qcaps(struct bnxt *bp) pf->fw_fid = le16_to_cpu(resp->fid); pf->port_id = le16_to_cpu(resp->port_id); + bp->dev->dev_port = pf->port_id; memcpy(pf->mac_addr, resp->mac_address, ETH_ALEN); memcpy(bp->dev->dev_addr, pf->mac_addr, ETH_ALEN); pf->max_rsscos_ctxs = le16_to_cpu(resp->max_rsscos_ctx); @@ -4315,6 +4317,16 @@ static int bnxt_alloc_rfs_vnics(struct bnxt *bp) #endif } +/* Allow PF and VF with default VLAN to be in promiscuous mode */ +static bool bnxt_promisc_ok(struct bnxt *bp) +{ +#ifdef CONFIG_BNXT_SRIOV + if (BNXT_VF(bp) && !bp->vf.vlan) + return false; +#endif + return true; +} + static int bnxt_cfg_rx_mode(struct bnxt *); static bool bnxt_mc_list_updated(struct bnxt *, u32 *); @@ -4380,7 +4392,7 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init) vnic->rx_mask = CFA_L2_SET_RX_MASK_REQ_MASK_BCAST; - if ((bp->dev->flags & IFF_PROMISC) && BNXT_PF(bp)) + if ((bp->dev->flags & IFF_PROMISC) && bnxt_promisc_ok(bp)) vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS; if (bp->dev->flags & IFF_ALLMULTI) { @@ -5295,12 +5307,19 @@ static int bnxt_open(struct net_device *dev) struct bnxt *bp = netdev_priv(dev); int rc = 0; - rc = bnxt_hwrm_func_reset(bp); - if (rc) { - netdev_err(bp->dev, "hwrm chip reset failure rc: %x\n", - rc); - rc = -1; - return rc; + if (!test_bit(BNXT_STATE_FN_RST_DONE, &bp->state)) { + rc = bnxt_hwrm_func_reset(bp); + if (rc) { + netdev_err(bp->dev, "hwrm chip reset failure rc: %x\n", + rc); + rc = -EBUSY; + return rc; + } + /* Do func_reset during the 1st PF open only to prevent killing + * the VFs when the PF is brought down and up. + */ + if (BNXT_PF(bp)) + set_bit(BNXT_STATE_FN_RST_DONE, &bp->state); } return __bnxt_open_nic(bp, true, true); } @@ -5520,8 +5539,7 @@ static void bnxt_set_rx_mode(struct net_device *dev) CFA_L2_SET_RX_MASK_REQ_MASK_MCAST | CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST); - /* Only allow PF to be in promiscuous mode */ - if ((dev->flags & IFF_PROMISC) && BNXT_PF(bp)) + if ((dev->flags & IFF_PROMISC) && bnxt_promisc_ok(bp)) mask |= CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS; uc_update = bnxt_uc_list_updated(bp); @@ -5976,6 +5994,8 @@ static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev) bp->tx_coal_ticks_irq = 2; bp->tx_coal_bufs_irq = 2; + bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS; + init_timer(&bp->timer); bp->timer.data = (unsigned long)bp; bp->timer.function = bnxt_timer; @@ -6041,7 +6061,7 @@ static int bnxt_change_mtu(struct net_device *dev, int new_mtu) { struct bnxt *bp = netdev_priv(dev); - if (new_mtu < 60 || new_mtu > 9000) + if (new_mtu < 60 || new_mtu > 9500) return -EINVAL; if (netif_running(dev)) @@ -6676,6 +6696,7 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state) { struct net_device *netdev = pci_get_drvdata(pdev); + struct bnxt *bp = netdev_priv(netdev); netdev_info(netdev, "PCI I/O error detected\n"); @@ -6690,6 +6711,8 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev, if (netif_running(netdev)) bnxt_close(netdev); + /* So that func_reset will be done during slot_reset */ + clear_bit(BNXT_STATE_FN_RST_DONE, &bp->state); pci_disable_device(pdev); rtnl_unlock(); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 927ece9c408a..2313e37e6eb5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -11,10 +11,10 @@ #define BNXT_H #define DRV_MODULE_NAME "bnxt_en" -#define DRV_MODULE_VERSION "1.2.0" +#define DRV_MODULE_VERSION "1.3.0" #define DRV_VER_MAJ 1 -#define DRV_VER_MIN 0 +#define DRV_VER_MIN 3 #define DRV_VER_UPD 0 struct tx_bd { @@ -359,7 +359,8 @@ struct rx_tpa_end_cmp { RX_TPA_END_CMP_FLAGS_PLACEMENT_ANY_GRO) #define TPA_END_GRO_TS(rx_tpa_end) \ - ((rx_tpa_end)->rx_tpa_end_cmp_tsdelta & cpu_to_le32(RX_TPA_END_GRO_TS)) + (!!((rx_tpa_end)->rx_tpa_end_cmp_tsdelta & \ + cpu_to_le32(RX_TPA_END_GRO_TS))) struct rx_tpa_end_cmp_ext { __le32 rx_tpa_end_cmp_dup_acks; @@ -753,8 +754,8 @@ struct bnxt_vf_info { struct bnxt_pf_info { #define BNXT_FIRST_PF_FID 1 #define BNXT_FIRST_VF_FID 128 - u32 fw_fid; - u8 port_id; + u16 fw_fid; + u16 port_id; u8 mac_addr[ETH_ALEN]; u16 max_rsscos_ctxs; u16 max_cp_rings; @@ -1017,6 +1018,7 @@ struct bnxt { unsigned long state; #define BNXT_STATE_OPEN 0 #define BNXT_STATE_IN_SP_TASK 1 +#define BNXT_STATE_FN_RST_DONE 2 struct bnxt_irq *irq_tbl; u8 mac_addr[ETH_ALEN]; @@ -1065,6 +1067,11 @@ struct bnxt { #define BNXT_USEC_TO_COAL_TIMER(x) ((x) * 25 / 2) + u32 stats_coal_ticks; +#define BNXT_DEF_STATS_COAL_TICKS 1000000 +#define BNXT_MIN_STATS_COAL_TICKS 250000 +#define BNXT_MAX_STATS_COAL_TICKS 1000000 + struct work_struct sp_task; unsigned long sp_event; #define BNXT_RX_MASK_SP_EVENT 0 diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index d7ab2d7982c2..0f7dd861ab4d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -56,6 +56,8 @@ static int bnxt_get_coalesce(struct net_device *dev, coal->tx_coalesce_usecs_irq = bp->tx_coal_ticks_irq; coal->tx_max_coalesced_frames_irq = bp->tx_coal_bufs_irq; + coal->stats_block_coalesce_usecs = bp->stats_coal_ticks; + return 0; } @@ -63,6 +65,7 @@ static int bnxt_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal) { struct bnxt *bp = netdev_priv(dev); + bool update_stats = false; int rc = 0; bp->rx_coal_ticks = coal->rx_coalesce_usecs; @@ -76,8 +79,26 @@ static int bnxt_set_coalesce(struct net_device *dev, bp->tx_coal_ticks_irq = coal->tx_coalesce_usecs_irq; bp->tx_coal_bufs_irq = coal->tx_max_coalesced_frames_irq; - if (netif_running(dev)) - rc = bnxt_hwrm_set_coal(bp); + if (bp->stats_coal_ticks != coal->stats_block_coalesce_usecs) { + u32 stats_ticks = coal->stats_block_coalesce_usecs; + + stats_ticks = clamp_t(u32, stats_ticks, + BNXT_MIN_STATS_COAL_TICKS, + BNXT_MAX_STATS_COAL_TICKS); + stats_ticks = rounddown(stats_ticks, BNXT_MIN_STATS_COAL_TICKS); + bp->stats_coal_ticks = stats_ticks; + update_stats = true; + } + + if (netif_running(dev)) { + if (update_stats) { + rc = bnxt_close_nic(bp, true, false); + if (!rc) + rc = bnxt_open_nic(bp, true, false); + } else { + rc = bnxt_hwrm_set_coal(bp); + } + } return rc; } @@ -961,7 +982,7 @@ static int bnxt_set_pauseparam(struct net_device *dev, struct bnxt_link_info *link_info = &bp->link_info; if (!BNXT_SINGLE_PF(bp)) - return rc; + return -EOPNOTSUPP; if (epause->autoneg) { if (!(link_info->autoneg & BNXT_AUTONEG_SPEED)) @@ -1059,6 +1080,8 @@ static int bnxt_firmware_reset(struct net_device *dev, case BNX_DIR_TYPE_APE_FW: case BNX_DIR_TYPE_APE_PATCH: req.embedded_proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_MGMT; + /* Self-reset APE upon next PCIe reset: */ + req.selfrst_status = FW_RESET_REQ_SELFRST_STATUS_SELFRSTPCIERST; break; case BNX_DIR_TYPE_KONG_FW: case BNX_DIR_TYPE_KONG_PATCH: @@ -1092,9 +1115,27 @@ static int bnxt_flash_firmware(struct net_device *dev, case BNX_DIR_TYPE_BOOTCODE_2: code_type = CODE_BOOT; break; + case BNX_DIR_TYPE_CHIMP_PATCH: + code_type = CODE_CHIMP_PATCH; + break; case BNX_DIR_TYPE_APE_FW: code_type = CODE_MCTP_PASSTHRU; break; + case BNX_DIR_TYPE_APE_PATCH: + code_type = CODE_APE_PATCH; + break; + case BNX_DIR_TYPE_KONG_FW: + code_type = CODE_KONG_FW; + break; + case BNX_DIR_TYPE_KONG_PATCH: + code_type = CODE_KONG_PATCH; + break; + case BNX_DIR_TYPE_BONO_FW: + code_type = CODE_BONO_FW; + break; + case BNX_DIR_TYPE_BONO_PATCH: + code_type = CODE_BONO_PATCH; + break; default: netdev_err(dev, "Unsupported directory entry type: %u\n", dir_type); @@ -1149,6 +1190,8 @@ static bool bnxt_dir_type_is_ape_bin_format(u16 dir_type) case BNX_DIR_TYPE_APE_PATCH: case BNX_DIR_TYPE_KONG_FW: case BNX_DIR_TYPE_KONG_PATCH: + case BNX_DIR_TYPE_BONO_FW: + case BNX_DIR_TYPE_BONO_PATCH: return true; } @@ -1186,7 +1229,8 @@ static int bnxt_flash_firmware_from_file(struct net_device *dev, const struct firmware *fw; int rc; - if (bnxt_dir_type_is_executable(dir_type) == false) + if (dir_type != BNX_DIR_TYPE_UPDATE && + bnxt_dir_type_is_executable(dir_type) == false) return -EINVAL; rc = request_firmware(&fw, filename, &dev->dev); @@ -1483,7 +1527,7 @@ static int bnxt_set_eee(struct net_device *dev, struct ethtool_eee *edata) int rc = 0; if (!BNXT_SINGLE_PF(bp)) - return 0; + return -EOPNOTSUPP; if (!(bp->flags & BNXT_FLAG_EEE_CAP)) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h index 461675caaacd..82bf44ab811b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h @@ -70,6 +70,7 @@ enum SUPPORTED_CODE { CODE_KONG_PATCH, /* 18 - KONG Patch firmware */ CODE_BONO_FW, /* 19 - BONO firmware */ CODE_BONO_PATCH, /* 20 - BONO Patch firmware */ + CODE_CHIMP_PATCH, /* 21 - ChiMP Patch firmware */ MAX_CODE_TYPE, }; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h index 05e3c49a7677..517567f6d651 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h @@ -105,6 +105,7 @@ struct hwrm_async_event_cmpl { #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED (0x4UL << 0) #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED (0x5UL << 0) #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE (0x6UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE (0x7UL << 0) #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_UNLOAD (0x10UL << 0) #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_LOAD (0x11UL << 0) #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD (0x20UL << 0) @@ -484,12 +485,12 @@ struct hwrm_async_event_cmpl_hwrm_error { #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA1_TIMESTAMP 0x1UL }; -/* HW Resource Manager Specification 1.2.2 */ +/* HW Resource Manager Specification 1.3.0 */ #define HWRM_VERSION_MAJOR 1 -#define HWRM_VERSION_MINOR 2 -#define HWRM_VERSION_UPDATE 2 +#define HWRM_VERSION_MINOR 3 +#define HWRM_VERSION_UPDATE 0 -#define HWRM_VERSION_STR "1.2.2" +#define HWRM_VERSION_STR "1.3.0" /* * Following is the signature for HWRM message field that indicates not * applicable (All F's). Need to cast it the size of the field if needed. @@ -611,6 +612,9 @@ struct cmd_nums { #define HWRM_FWD_RESP (0xd2UL) #define HWRM_FWD_ASYNC_EVENT_CMPL (0xd3UL) #define HWRM_TEMP_MONITOR_QUERY (0xe0UL) + #define HWRM_WOL_FILTER_ALLOC (0xf0UL) + #define HWRM_WOL_FILTER_FREE (0xf1UL) + #define HWRM_WOL_FILTER_QCFG (0xf2UL) #define HWRM_DBG_READ_DIRECT (0xff10UL) #define HWRM_DBG_READ_INDIRECT (0xff11UL) #define HWRM_DBG_WRITE_DIRECT (0xff12UL) @@ -1020,6 +1024,10 @@ struct hwrm_func_qcaps_output { #define FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED 0x1UL #define FUNC_QCAPS_RESP_FLAGS_GLOBAL_MSIX_AUTOMASKING 0x2UL #define FUNC_QCAPS_RESP_FLAGS_PTP_SUPPORTED 0x4UL + #define FUNC_QCAPS_RESP_FLAGS_ROCE_V1_SUPPORTED 0x8UL + #define FUNC_QCAPS_RESP_FLAGS_ROCE_V2_SUPPORTED 0x10UL + #define FUNC_QCAPS_RESP_FLAGS_WOL_MAGICPKT_SUPPORTED 0x20UL + #define FUNC_QCAPS_RESP_FLAGS_WOL_BMP_SUPPORTED 0x40UL u8 mac_address[6]; __le16 max_rsscos_ctx; __le16 max_cmpl_rings; @@ -1066,8 +1074,9 @@ struct hwrm_func_qcfg_output { __le16 fid; __le16 port_id; __le16 vlan; - u8 unused_0; - u8 unused_1; + __le16 flags; + #define FUNC_QCFG_RESP_FLAGS_OOB_WOL_MAGICPKT_ENABLED 0x1UL + #define FUNC_QCFG_RESP_FLAGS_OOB_WOL_BMP_ENABLED 0x2UL u8 mac_address[6]; __le16 pci_id; __le16 alloc_rsscos_ctx; @@ -1086,23 +1095,23 @@ struct hwrm_func_qcfg_output { #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5 (0x3UL << 0) #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR2_0 (0x4UL << 0) #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_UNKNOWN (0xffUL << 0) - u8 unused_2; + u8 unused_0; __le16 dflt_vnic_id; - u8 unused_3; - u8 unused_4; + u8 unused_1; + u8 unused_2; __le32 min_bw; __le32 max_bw; u8 evb_mode; #define FUNC_QCFG_RESP_EVB_MODE_NO_EVB (0x0UL << 0) #define FUNC_QCFG_RESP_EVB_MODE_VEB (0x1UL << 0) #define FUNC_QCFG_RESP_EVB_MODE_VEPA (0x2UL << 0) - u8 unused_5; - __le16 unused_6; + u8 unused_3; + __le16 unused_4; __le32 alloc_mcast_filters; __le32 alloc_hw_ring_grps; + u8 unused_5; + u8 unused_6; u8 unused_7; - u8 unused_8; - u8 unused_9; u8 valid; }; @@ -1410,8 +1419,8 @@ struct hwrm_func_buf_rgtr_input { #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4K (0xcUL << 0) #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_8K (0xdUL << 0) #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_64K (0x10UL << 0) - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_2M (0x16UL << 0) - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4M (0x17UL << 0) + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_2M (0x15UL << 0) + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4M (0x16UL << 0) #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_1G (0x1eUL << 0) __le16 req_buf_len; __le16 resp_buf_len; @@ -1499,6 +1508,12 @@ struct hwrm_port_phy_cfg_input { #define PORT_PHY_CFG_REQ_FLAGS_EEE_DISABLE 0x20UL #define PORT_PHY_CFG_REQ_FLAGS_EEE_TX_LPI_ENABLE 0x40UL #define PORT_PHY_CFG_REQ_FLAGS_EEE_TX_LPI_DISABLE 0x80UL + #define PORT_PHY_CFG_REQ_FLAGS_FEC_AUTONEG_ENABLE 0x100UL + #define PORT_PHY_CFG_REQ_FLAGS_FEC_AUTONEG_DISABLE 0x200UL + #define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE74_ENABLE 0x400UL + #define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE74_DISABLE 0x800UL + #define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE91_ENABLE 0x1000UL + #define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE91_DISABLE 0x2000UL __le32 enables; #define PORT_PHY_CFG_REQ_ENABLES_AUTO_MODE 0x1UL #define PORT_PHY_CFG_REQ_ENABLES_AUTO_DUPLEX 0x2UL @@ -1815,13 +1830,22 @@ struct hwrm_port_phy_qcfg_output { #define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_QSFP (0xcUL << 24) #define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_QSFPPLUS (0xdUL << 24) #define PORT_PHY_QCFG_RESP_XCVR_IDENTIFIER_TYPE_QSFP28 (0x11UL << 24) - __le32 unused_1; + __le16 fec_cfg; + #define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_NONE_SUPPORTED 0x1UL + #define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_AUTONEG_SUPPORTED 0x2UL + #define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_AUTONEG_ENABLED 0x4UL + #define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE74_SUPPORTED 0x8UL + #define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE74_ENABLED 0x10UL + #define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE91_SUPPORTED 0x20UL + #define PORT_PHY_QCFG_RESP_FEC_CFG_FEC_CLAUSE91_ENABLED 0x40UL + u8 unused_1; + u8 unused_2; char phy_vendor_name[16]; char phy_vendor_partnumber[16]; - __le32 unused_2; - u8 unused_3; + __le32 unused_3; u8 unused_4; u8 unused_5; + u8 unused_6; u8 valid; }; @@ -1842,6 +1866,8 @@ struct hwrm_port_mac_cfg_input { #define PORT_MAC_CFG_REQ_FLAGS_PTP_RX_TS_CAPTURE_DISABLE 0x20UL #define PORT_MAC_CFG_REQ_FLAGS_PTP_TX_TS_CAPTURE_ENABLE 0x40UL #define PORT_MAC_CFG_REQ_FLAGS_PTP_TX_TS_CAPTURE_DISABLE 0x80UL + #define PORT_MAC_CFG_REQ_FLAGS_OOB_WOL_ENABLE 0x100UL + #define PORT_MAC_CFG_REQ_FLAGS_OOB_WOL_DISABLE 0x200UL __le32 enables; #define PORT_MAC_CFG_REQ_ENABLES_IPG 0x1UL #define PORT_MAC_CFG_REQ_ENABLES_LPBK 0x2UL @@ -2127,6 +2153,7 @@ struct hwrm_port_phy_i2c_read_output { u8 valid; }; +/* hwrm_queue_qportcfg */ /* Input (24 bytes) */ struct hwrm_queue_qportcfg_input { __le16 req_type; @@ -2382,7 +2409,7 @@ struct hwrm_queue_cos2bw_cfg_input { #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_SP (0x0UL << 0) #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_ETS (0x1UL << 0) #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_LAST (0xffffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) u8 queue_id0_pri_lvl; u8 queue_id0_bw_weight; u8 queue_id1; @@ -2392,7 +2419,7 @@ struct hwrm_queue_cos2bw_cfg_input { #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_SP (0x0UL << 0) #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_ETS (0x1UL << 0) #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_LAST (0xffffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) u8 queue_id1_pri_lvl; u8 queue_id1_bw_weight; u8 queue_id2; @@ -2402,7 +2429,7 @@ struct hwrm_queue_cos2bw_cfg_input { #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_SP (0x0UL << 0) #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_ETS (0x1UL << 0) #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_LAST (0xffffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) u8 queue_id2_pri_lvl; u8 queue_id2_bw_weight; u8 queue_id3; @@ -2412,7 +2439,7 @@ struct hwrm_queue_cos2bw_cfg_input { #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_SP (0x0UL << 0) #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_ETS (0x1UL << 0) #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_LAST (0xffffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) u8 queue_id3_pri_lvl; u8 queue_id3_bw_weight; u8 queue_id4; @@ -2422,7 +2449,7 @@ struct hwrm_queue_cos2bw_cfg_input { #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_SP (0x0UL << 0) #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_ETS (0x1UL << 0) #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_LAST (0xffffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) u8 queue_id4_pri_lvl; u8 queue_id4_bw_weight; u8 queue_id5; @@ -2432,7 +2459,7 @@ struct hwrm_queue_cos2bw_cfg_input { #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_SP (0x0UL << 0) #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_ETS (0x1UL << 0) #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_LAST (0xffffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) u8 queue_id5_pri_lvl; u8 queue_id5_bw_weight; u8 queue_id6; @@ -2442,7 +2469,7 @@ struct hwrm_queue_cos2bw_cfg_input { #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_SP (0x0UL << 0) #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_ETS (0x1UL << 0) #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_LAST (0xffffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) u8 queue_id6_pri_lvl; u8 queue_id6_bw_weight; u8 queue_id7; @@ -2452,7 +2479,7 @@ struct hwrm_queue_cos2bw_cfg_input { #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_SP (0x0UL << 0) #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_ETS (0x1UL << 0) #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_LAST (0xffffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) u8 queue_id7_pri_lvl; u8 queue_id7_bw_weight; u8 unused_1[5]; @@ -3150,7 +3177,7 @@ struct hwrm_cfa_l2_filter_cfg_output { }; /* hwrm_cfa_l2_set_rx_mask */ -/* Input (40 bytes) */ +/* Input (56 bytes) */ struct hwrm_cfa_l2_set_rx_mask_input { __le16 req_type; __le16 cmpl_ring; @@ -3165,9 +3192,15 @@ struct hwrm_cfa_l2_set_rx_mask_input { #define CFA_L2_SET_RX_MASK_REQ_MASK_BCAST 0x8UL #define CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS 0x10UL #define CFA_L2_SET_RX_MASK_REQ_MASK_OUTERMOST 0x20UL + #define CFA_L2_SET_RX_MASK_REQ_MASK_VLANONLY 0x40UL + #define CFA_L2_SET_RX_MASK_REQ_MASK_VLAN_NONVLAN 0x80UL + #define CFA_L2_SET_RX_MASK_REQ_MASK_ANYVLAN_NONVLAN 0x100UL __le64 mc_tbl_addr; __le32 num_mc_entries; __le32 unused_0; + __le64 vlan_tag_tbl_addr; + __le32 num_vlan_tags; + __le32 unused_1; }; /* Output (16 bytes) */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_nvm_defs.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_nvm_defs.h index 40a7b0e09612..73f2249555b5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_nvm_defs.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_nvm_defs.h @@ -13,6 +13,7 @@ enum bnxt_nvm_directory_type { BNX_DIR_TYPE_UNUSED = 0, BNX_DIR_TYPE_PKG_LOG = 1, + BNX_DIR_TYPE_UPDATE = 2, BNX_DIR_TYPE_CHIMP_PATCH = 3, BNX_DIR_TYPE_BOOTCODE = 4, BNX_DIR_TYPE_VPD = 5, diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 541456398dfb..76ed6df0fe53 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -450,34 +450,6 @@ static inline void bcmgenet_rdma_ring_writel(struct bcmgenet_priv *priv, genet_dma_ring_regs[r]); } -static int bcmgenet_get_settings(struct net_device *dev, - struct ethtool_cmd *cmd) -{ - struct bcmgenet_priv *priv = netdev_priv(dev); - - if (!netif_running(dev)) - return -EINVAL; - - if (!priv->phydev) - return -ENODEV; - - return phy_ethtool_gset(priv->phydev, cmd); -} - -static int bcmgenet_set_settings(struct net_device *dev, - struct ethtool_cmd *cmd) -{ - struct bcmgenet_priv *priv = netdev_priv(dev); - - if (!netif_running(dev)) - return -EINVAL; - - if (!priv->phydev) - return -ENODEV; - - return phy_ethtool_sset(priv->phydev, cmd); -} - static int bcmgenet_set_rx_csum(struct net_device *dev, netdev_features_t wanted) { @@ -941,7 +913,7 @@ static int bcmgenet_get_eee(struct net_device *dev, struct ethtool_eee *e) e->eee_active = p->eee_active; e->tx_lpi_timer = bcmgenet_umac_readl(priv, UMAC_EEE_LPI_TIMER); - return phy_ethtool_get_eee(priv->phydev, e); + return phy_ethtool_get_eee(dev->phydev, e); } static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) @@ -958,7 +930,7 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) if (!p->eee_enabled) { bcmgenet_eee_enable_set(dev, false); } else { - ret = phy_init_eee(priv->phydev, 0); + ret = phy_init_eee(dev->phydev, 0); if (ret) { netif_err(priv, hw, dev, "EEE initialization failed\n"); return ret; @@ -968,14 +940,12 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) bcmgenet_eee_enable_set(dev, true); } - return phy_ethtool_set_eee(priv->phydev, e); + return phy_ethtool_set_eee(dev->phydev, e); } static int bcmgenet_nway_reset(struct net_device *dev) { - struct bcmgenet_priv *priv = netdev_priv(dev); - - return genphy_restart_aneg(priv->phydev); + return genphy_restart_aneg(dev->phydev); } /* standard ethtool support functions. */ @@ -983,8 +953,6 @@ static struct ethtool_ops bcmgenet_ethtool_ops = { .get_strings = bcmgenet_get_strings, .get_sset_count = bcmgenet_get_sset_count, .get_ethtool_stats = bcmgenet_get_ethtool_stats, - .get_settings = bcmgenet_get_settings, - .set_settings = bcmgenet_set_settings, .get_drvinfo = bcmgenet_get_drvinfo, .get_link = ethtool_op_get_link, .get_msglevel = bcmgenet_get_msglevel, @@ -996,18 +964,21 @@ static struct ethtool_ops bcmgenet_ethtool_ops = { .nway_reset = bcmgenet_nway_reset, .get_coalesce = bcmgenet_get_coalesce, .set_coalesce = bcmgenet_set_coalesce, + .get_link_ksettings = phy_ethtool_get_link_ksettings, + .set_link_ksettings = phy_ethtool_set_link_ksettings, }; /* Power down the unimac, based on mode. */ static int bcmgenet_power_down(struct bcmgenet_priv *priv, enum bcmgenet_power_mode mode) { + struct net_device *ndev = priv->dev; int ret = 0; u32 reg; switch (mode) { case GENET_POWER_CABLE_SENSE: - phy_detach(priv->phydev); + phy_detach(ndev->phydev); break; case GENET_POWER_WOL_MAGIC: @@ -1068,7 +1039,6 @@ static void bcmgenet_power_up(struct bcmgenet_priv *priv, /* ioctl handle special commands that are not present in ethtool. */ static int bcmgenet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { - struct bcmgenet_priv *priv = netdev_priv(dev); int val = 0; if (!netif_running(dev)) @@ -1078,10 +1048,10 @@ static int bcmgenet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - if (!priv->phydev) + if (!dev->phydev) val = -ENODEV; else - val = phy_mii_ioctl(priv->phydev, rq, cmd); + val = phy_mii_ioctl(dev->phydev, rq, cmd); break; default: @@ -2464,6 +2434,7 @@ static void bcmgenet_irq_task(struct work_struct *work) { struct bcmgenet_priv *priv = container_of( work, struct bcmgenet_priv, bcmgenet_irq_work); + struct net_device *ndev = priv->dev; netif_dbg(priv, intr, priv->dev, "%s\n", __func__); @@ -2476,7 +2447,7 @@ static void bcmgenet_irq_task(struct work_struct *work) /* Link UP/DOWN event */ if (priv->irq0_stat & UMAC_IRQ_LINK_EVENT) { - phy_mac_interrupt(priv->phydev, + phy_mac_interrupt(ndev->phydev, !!(priv->irq0_stat & UMAC_IRQ_LINK_UP)); priv->irq0_stat &= ~UMAC_IRQ_LINK_EVENT; } @@ -2838,7 +2809,7 @@ static void bcmgenet_netif_start(struct net_device *dev) /* Monitor link interrupts now */ bcmgenet_link_intr_enable(priv); - phy_start(priv->phydev); + phy_start(dev->phydev); } static int bcmgenet_open(struct net_device *dev) @@ -2937,7 +2908,7 @@ static void bcmgenet_netif_stop(struct net_device *dev) struct bcmgenet_priv *priv = netdev_priv(dev); netif_tx_stop_all_queues(dev); - phy_stop(priv->phydev); + phy_stop(dev->phydev); bcmgenet_intr_disable(priv); bcmgenet_disable_rx_napi(priv); bcmgenet_disable_tx_napi(priv); @@ -2963,7 +2934,7 @@ static int bcmgenet_close(struct net_device *dev) bcmgenet_netif_stop(dev); /* Really kill the PHY state machine and disconnect from it */ - phy_disconnect(priv->phydev); + phy_disconnect(dev->phydev); /* Disable MAC receive */ umac_enable_set(priv, CMD_RX_EN, false); @@ -3522,7 +3493,7 @@ static int bcmgenet_suspend(struct device *d) bcmgenet_netif_stop(dev); - phy_suspend(priv->phydev); + phy_suspend(dev->phydev); netif_device_detach(dev); @@ -3586,7 +3557,7 @@ static int bcmgenet_resume(struct device *d) if (priv->wolopts) clk_disable_unprepare(priv->clk_wol); - phy_init_hw(priv->phydev); + phy_init_hw(dev->phydev); /* Speed settings must be restored */ bcmgenet_mii_config(priv->dev); @@ -3619,7 +3590,7 @@ static int bcmgenet_resume(struct device *d) netif_device_attach(dev); - phy_resume(priv->phydev); + phy_resume(dev->phydev); if (priv->eee.eee_enabled) bcmgenet_eee_enable_set(dev, true); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 1e2dc34d331a..0f0868c56f05 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -597,7 +597,6 @@ struct bcmgenet_priv { /* MDIO bus variables */ wait_queue_head_t wq; - struct phy_device *phydev; bool internal_phy; struct device_node *phy_dn; struct device_node *mdio_dn; diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 457c3bc8cfff..e907acd81da9 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -86,7 +86,7 @@ static int bcmgenet_mii_write(struct mii_bus *bus, int phy_id, void bcmgenet_mii_setup(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); - struct phy_device *phydev = priv->phydev; + struct phy_device *phydev = dev->phydev; u32 reg, cmd_bits = 0; bool status_changed = false; @@ -183,9 +183,9 @@ void bcmgenet_mii_reset(struct net_device *dev) if (GENET_IS_V4(priv)) return; - if (priv->phydev) { - phy_init_hw(priv->phydev); - phy_start_aneg(priv->phydev); + if (dev->phydev) { + phy_init_hw(dev->phydev); + phy_start_aneg(dev->phydev); } } @@ -236,6 +236,7 @@ static void bcmgenet_internal_phy_setup(struct net_device *dev) static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv) { + struct net_device *ndev = priv->dev; u32 reg; /* Speed settings are set in bcmgenet_mii_setup() */ @@ -244,14 +245,14 @@ static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv) bcmgenet_sys_writel(priv, reg, SYS_PORT_CTRL); if (priv->hw_params->flags & GENET_HAS_MOCA_LINK_DET) - fixed_phy_set_link_update(priv->phydev, + fixed_phy_set_link_update(ndev->phydev, bcmgenet_fixed_phy_link_update); } int bcmgenet_mii_config(struct net_device *dev) { struct bcmgenet_priv *priv = netdev_priv(dev); - struct phy_device *phydev = priv->phydev; + struct phy_device *phydev = dev->phydev; struct device *kdev = &priv->pdev->dev; const char *phy_name = NULL; u32 id_mode_dis = 0; @@ -302,7 +303,7 @@ int bcmgenet_mii_config(struct net_device *dev) * capabilities, use that knowledge to also configure the * Reverse MII interface correctly. */ - if ((priv->phydev->supported & PHY_BASIC_FEATURES) == + if ((phydev->supported & PHY_BASIC_FEATURES) == PHY_BASIC_FEATURES) port_ctrl = PORT_MODE_EXT_RVMII_25; else @@ -371,7 +372,7 @@ int bcmgenet_mii_probe(struct net_device *dev) return -ENODEV; } } else { - phydev = priv->phydev; + phydev = dev->phydev; phydev->dev_flags = phy_flags; ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup, @@ -382,8 +383,6 @@ int bcmgenet_mii_probe(struct net_device *dev) } } - priv->phydev = phydev; - /* Configure port multiplexer based on what the probed PHY device since * reading the 'max-speed' property determines the maximum supported * PHY speed which is needed for bcmgenet_mii_config() to configure @@ -391,7 +390,7 @@ int bcmgenet_mii_probe(struct net_device *dev) */ ret = bcmgenet_mii_config(dev); if (ret) { - phy_disconnect(priv->phydev); + phy_disconnect(phydev); return ret; } @@ -401,7 +400,7 @@ int bcmgenet_mii_probe(struct net_device *dev) * Ethernet MAC ISRs */ if (priv->internal_phy) - priv->phydev->irq = PHY_IGNORE_INTERRUPT; + phydev->irq = PHY_IGNORE_INTERRUPT; return 0; } @@ -606,7 +605,6 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv) } - priv->phydev = phydev; priv->phy_interface = pd->phy_interface; return 0; diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c index d35864ada9a3..c03d37016a48 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c @@ -19,26 +19,16 @@ * This file may also be available under a different license from Cavium. * Contact Cavium, Inc. for more information **********************************************************************/ -#include <linux/version.h> -#include <linux/types.h> -#include <linux/list.h> -#include <linux/interrupt.h> #include <linux/pci.h> -#include <linux/kthread.h> #include <linux/netdevice.h> -#include "octeon_config.h" #include "liquidio_common.h" #include "octeon_droq.h" #include "octeon_iq.h" #include "response_manager.h" #include "octeon_device.h" -#include "octeon_nic.h" #include "octeon_main.h" -#include "octeon_network.h" #include "cn66xx_regs.h" #include "cn66xx_device.h" -#include "liquidio_image.h" -#include "octeon_mem_ops.h" int lio_cn6xxx_soft_reset(struct octeon_device *oct) { @@ -74,9 +64,9 @@ void lio_cn6xxx_enable_error_reporting(struct octeon_device *oct) u32 val; pci_read_config_dword(oct->pci_dev, CN6XXX_PCIE_DEVCTL, &val); - if (val & 0x000f0000) { + if (val & 0x000c0000) { dev_err(&oct->pci_dev->dev, "PCI-E Link error detected: 0x%08x\n", - val & 0x000f0000); + val & 0x000c0000); } val |= 0xf; /* Enable Link error reporting */ @@ -229,7 +219,7 @@ void lio_cn6xxx_setup_global_output_regs(struct octeon_device *oct) /* / Select Packet count instead of bytes for SLI_PKTi_CNTS[CNT] */ octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_BMODE, 0); - /* / Select ES,RO,NS setting from register for Output Queue Packet + /* Select ES, RO, NS setting from register for Output Queue Packet * Address */ octeon_write_csr(oct, CN6XXX_SLI_PKT_DPADDR, 0xFFFFFFFF); @@ -547,14 +537,14 @@ static void lio_cn6xxx_get_pcie_qlmport(struct octeon_device *oct) dev_dbg(&oct->pci_dev->dev, "Using PCIE Port %d\n", oct->pcie_port); } -void +static void lio_cn6xxx_process_pcie_error_intr(struct octeon_device *oct, u64 intr64) { dev_err(&oct->pci_dev->dev, "Error Intr: 0x%016llx\n", CVM_CAST64(intr64)); } -int lio_cn6xxx_process_droq_intr_regs(struct octeon_device *oct) +static int lio_cn6xxx_process_droq_intr_regs(struct octeon_device *oct) { struct octeon_droq *droq; int oq_no; @@ -579,7 +569,7 @@ int lio_cn6xxx_process_droq_intr_regs(struct octeon_device *oct) continue; droq = oct->droq[oq_no]; - pkt_count = octeon_droq_check_hw_for_pkts(oct, droq); + pkt_count = octeon_droq_check_hw_for_pkts(droq); if (pkt_count) { oct->droq_intr |= (1ULL << oq_no); if (droq->ops.poll_mode) { diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h index fe2932cb7ed8..28c47224221a 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h +++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h @@ -82,8 +82,6 @@ void lio_cn6xxx_setup_iq_regs(struct octeon_device *oct, u32 iq_no); void lio_cn6xxx_setup_oq_regs(struct octeon_device *oct, u32 oq_no); void lio_cn6xxx_enable_io_queues(struct octeon_device *oct); void lio_cn6xxx_disable_io_queues(struct octeon_device *oct); -void lio_cn6xxx_process_pcie_error_intr(struct octeon_device *oct, u64 intr64); -int lio_cn6xxx_process_droq_intr_regs(struct octeon_device *oct); irqreturn_t lio_cn6xxx_process_interrupt_regs(void *dev); void lio_cn6xxx_reinit_regs(struct octeon_device *oct); void lio_cn6xxx_bar1_idx_setup(struct octeon_device *oct, u64 core_addr, diff --git a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c index 8e830d0c0754..29755bc68f12 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c @@ -19,28 +19,17 @@ * This file may also be available under a different license from Cavium. * Contact Cavium, Inc. for more information **********************************************************************/ -#include <linux/version.h> -#include <linux/types.h> -#include <linux/list.h> -#include <linux/interrupt.h> #include <linux/pci.h> -#include <linux/kthread.h> #include <linux/netdevice.h> -#include "octeon_config.h" #include "liquidio_common.h" #include "octeon_droq.h" #include "octeon_iq.h" #include "response_manager.h" #include "octeon_device.h" -#include "octeon_nic.h" #include "octeon_main.h" -#include "octeon_network.h" #include "cn66xx_regs.h" #include "cn66xx_device.h" #include "cn68xx_regs.h" -#include "cn68xx_device.h" -#include "liquidio_image.h" -#include "octeon_mem_ops.h" static void lio_cn68xx_set_dpi_regs(struct octeon_device *oct) { @@ -129,7 +118,7 @@ static inline void lio_cn68xx_vendor_message_fix(struct octeon_device *oct) pci_write_config_dword(oct->pci_dev, CN6XXX_PCIE_FLTMSK, val); } -int lio_is_210nv(struct octeon_device *oct) +static int lio_is_210nv(struct octeon_device *oct) { u64 mio_qlm4_cfg = lio_pci_readq(oct, CN6XXX_MIO_QLM4_CFG); diff --git a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.h b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.h index d4e1c9fb0bf2..ea7bdcce6044 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.h +++ b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.h @@ -28,6 +28,5 @@ #define __CN68XX_DEVICE_H__ int lio_setup_cn68xx_octeon_device(struct octeon_device *oct); -int lio_is_210nv(struct octeon_device *oct); #endif diff --git a/drivers/net/ethernet/cavium/liquidio/cn68xx_regs.h b/drivers/net/ethernet/cavium/liquidio/cn68xx_regs.h index 38cddbd107b6..d45a0f4aaf1f 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn68xx_regs.h +++ b/drivers/net/ethernet/cavium/liquidio/cn68xx_regs.h @@ -29,7 +29,6 @@ #ifndef __CN68XX_REGS_H__ #define __CN68XX_REGS_H__ -#include "cn66xx_regs.h" /*###################### REQUEST QUEUE #########################*/ diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c index 03bfa9771e4d..289eb8907922 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c @@ -19,13 +19,9 @@ * This file may also be available under a different license from Cavium. * Contact Cavium, Inc. for more information **********************************************************************/ -#include <linux/version.h> #include <linux/netdevice.h> #include <linux/net_tstamp.h> -#include <linux/ethtool.h> -#include <linux/dma-mapping.h> #include <linux/pci.h> -#include "octeon_config.h" #include "liquidio_common.h" #include "octeon_droq.h" #include "octeon_iq.h" @@ -36,9 +32,6 @@ #include "octeon_network.h" #include "cn66xx_regs.h" #include "cn66xx_device.h" -#include "cn68xx_regs.h" -#include "cn68xx_device.h" -#include "liquidio_image.h" static int octnet_get_link_stats(struct net_device *netdev); @@ -106,6 +99,7 @@ static const char oct_stats_strings[][ETH_GSTRING_LEN] = { "tx_tso", "tx_tso_packets", "tx_tso_err", + "tx_vxlan", "mac_tx_total_pkts", "mac_tx_total_bytes", @@ -129,6 +123,9 @@ static const char oct_stats_strings[][ETH_GSTRING_LEN] = { "rx_err_link", "rx_err_drop", + "rx_vxlan", + "rx_vxlan_err", + "rx_lro_pkts", "rx_lro_bytes", "rx_total_lro", @@ -167,6 +164,7 @@ static const char oct_iq_stats_strings[][ETH_GSTRING_LEN] = { "fw_bytes_sent", "tso", + "vxlan", "txq_restart", }; @@ -186,6 +184,7 @@ static const char oct_droq_stats_strings[][ETH_GSTRING_LEN] = { "fw_bytes_received", "fw_dropped_nodispatch", + "vxlan", "buffer_alloc_failure", }; @@ -340,20 +339,18 @@ static void octnet_mdio_resp_callback(struct octeon_device *oct, u32 status, void *buf) { - struct oct_mdio_cmd_resp *mdio_cmd_rsp; struct oct_mdio_cmd_context *mdio_cmd_ctx; struct octeon_soft_command *sc = (struct octeon_soft_command *)buf; - mdio_cmd_rsp = (struct oct_mdio_cmd_resp *)sc->virtrptr; mdio_cmd_ctx = (struct oct_mdio_cmd_context *)sc->ctxptr; oct = lio_get_device(mdio_cmd_ctx->octeon_id); if (status) { dev_err(&oct->pci_dev->dev, "MIDO instruction failed. Status: %llx\n", CVM_CAST64(status)); - ACCESS_ONCE(mdio_cmd_ctx->cond) = -1; + WRITE_ONCE(mdio_cmd_ctx->cond, -1); } else { - ACCESS_ONCE(mdio_cmd_ctx->cond) = 1; + WRITE_ONCE(mdio_cmd_ctx->cond, 1); } wake_up_interruptible(&mdio_cmd_ctx->wc); } @@ -384,7 +381,7 @@ octnet_mdio45_access(struct lio *lio, int op, int loc, int *value) mdio_cmd_rsp = (struct oct_mdio_cmd_resp *)sc->virtrptr; mdio_cmd = (struct oct_mdio_cmd *)sc->virtdptr; - ACCESS_ONCE(mdio_cmd_ctx->cond) = 0; + WRITE_ONCE(mdio_cmd_ctx->cond, 0); mdio_cmd_ctx->octeon_id = lio_get_device_id(oct_dev); mdio_cmd->op = op; mdio_cmd->mdio_addr = loc; @@ -423,7 +420,7 @@ octnet_mdio45_access(struct lio *lio, int op, int loc, int *value) octeon_swap_8B_data((u64 *)(&mdio_cmd_rsp->resp), sizeof(struct oct_mdio_cmd) / 8); - if (ACCESS_ONCE(mdio_cmd_ctx->cond) == 1) { + if (READ_ONCE(mdio_cmd_ctx->cond) == 1) { if (!op) *value = mdio_cmd_rsp->resp.value1; } else { @@ -467,18 +464,16 @@ static int lio_set_phys_id(struct net_device *netdev, /* Configure Beacon values */ value = LIO68XX_LED_BEACON_CFGON; - ret = - octnet_mdio45_access(lio, 1, - LIO68XX_LED_BEACON_ADDR, - &value); + ret = octnet_mdio45_access(lio, 1, + LIO68XX_LED_BEACON_ADDR, + &value); if (ret) return ret; value = LIO68XX_LED_CTRL_CFGON; - ret = - octnet_mdio45_access(lio, 1, - LIO68XX_LED_CTRL_ADDR, - &value); + ret = octnet_mdio45_access(lio, 1, + LIO68XX_LED_CTRL_ADDR, + &value); if (ret) return ret; } else { @@ -557,7 +552,7 @@ lio_ethtool_get_ringparam(struct net_device *netdev, tx_pending = CFG_GET_NUM_TX_DESCS_NIC_IF(conf6x, lio->ifidx); } - if (lio->mtu > OCTNET_DEFAULT_FRM_SIZE) { + if (lio->mtu > OCTNET_DEFAULT_FRM_SIZE - OCTNET_FRM_HEADER_SIZE) { ering->rx_pending = 0; ering->rx_max_pending = 0; ering->rx_mini_pending = 0; @@ -617,7 +612,8 @@ lio_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) static void lio_get_ethtool_stats(struct net_device *netdev, - struct ethtool_stats *stats, u64 *data) + struct ethtool_stats *stats __attribute__((unused)), + u64 *data) { struct lio *lio = GET_LIO(netdev); struct octeon_device *oct_dev = lio->oct_dev; @@ -675,6 +671,10 @@ lio_get_ethtool_stats(struct net_device *netdev, *fw_err_tso */ data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_err_tso); + /*per_core_stats[cvmx_get_core_num()].link_stats[idx].fromhost. + *fw_tx_vxlan + */ + data[i++] = CVM_CAST64(oct_dev->link_stats.fromhost.fw_tx_vxlan); /* mac tx statistics */ /*CVMX_BGXX_CMRX_TX_STAT5 */ @@ -729,6 +729,15 @@ lio_get_ethtool_stats(struct net_device *netdev, */ data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.fw_err_drop); + /*per_core_stats[cvmx_get_core_num()].link_stats[lro_ctx->ifidx]. + *fromwire.fw_rx_vxlan + */ + data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.fw_rx_vxlan); + /*per_core_stats[cvmx_get_core_num()].link_stats[lro_ctx->ifidx]. + *fromwire.fw_rx_vxlan_err + */ + data[i++] = CVM_CAST64(oct_dev->link_stats.fromwire.fw_rx_vxlan_err); + /* LRO */ /*per_core_stats[cvmx_get_core_num()].link_stats[ifidx].fromwire. *fw_lro_pkts @@ -822,6 +831,8 @@ lio_get_ethtool_stats(struct net_device *netdev, /*tso request*/ data[i++] = CVM_CAST64(oct_dev->instr_queue[j]->stats.tx_gso); + /*vxlan request*/ + data[i++] = CVM_CAST64(oct_dev->instr_queue[j]->stats.tx_vxlan); /*txq restart*/ data[i++] = CVM_CAST64(oct_dev->instr_queue[j]->stats.tx_restart); @@ -858,6 +869,9 @@ lio_get_ethtool_stats(struct net_device *netdev, CVM_CAST64(oct_dev->droq[j]->stats.bytes_received); data[i++] = CVM_CAST64(oct_dev->droq[j]->stats.dropped_nodispatch); + + data[i++] = + CVM_CAST64(oct_dev->droq[j]->stats.rx_vxlan); data[i++] = CVM_CAST64(oct_dev->droq[j]->stats.rx_alloc_failure); } @@ -945,7 +959,6 @@ static int lio_get_intr_coalesce(struct net_device *netdev, intr_coal->rx_max_coalesced_frames = CFG_GET_OQ_INTR_PKT(cn6xxx->conf); } - iq = oct->instr_queue[lio->linfo.txpciq[0].s.q_no]; intr_coal->tx_max_coalesced_frames = iq->fill_threshold; break; @@ -1043,7 +1056,7 @@ static int octnet_set_intrmod_cfg(struct lio *lio, return 0; } -void +static void octnet_nic_stats_callback(struct octeon_device *oct_dev, u32 status, void *ptr) { @@ -1083,6 +1096,9 @@ octnet_nic_stats_callback(struct octeon_device *oct_dev, rstats->fw_err_pko = rsp_rstats->fw_err_pko; rstats->fw_err_link = rsp_rstats->fw_err_link; rstats->fw_err_drop = rsp_rstats->fw_err_drop; + rstats->fw_rx_vxlan = rsp_rstats->fw_rx_vxlan; + rstats->fw_rx_vxlan_err = rsp_rstats->fw_rx_vxlan_err; + /* Number of packets that are LROed */ rstats->fw_lro_pkts = rsp_rstats->fw_lro_pkts; /* Number of octets that are LROed */ @@ -1127,6 +1143,8 @@ octnet_nic_stats_callback(struct octeon_device *oct_dev, tstats->fw_tso = rsp_tstats->fw_tso; tstats->fw_tso_fwd = rsp_tstats->fw_tso_fwd; tstats->fw_err_tso = rsp_tstats->fw_err_tso; + tstats->fw_tx_vxlan = rsp_tstats->fw_tx_vxlan; + resp->status = 1; } else { resp->status = -1; @@ -1523,7 +1541,7 @@ static int lio_nway_reset(struct net_device *netdev) } /* Return register dump len. */ -static int lio_get_regs_len(struct net_device *dev) +static int lio_get_regs_len(struct net_device *dev __attribute__((unused))) { return OCT_ETHTOOL_REGDUMP_LEN; } @@ -1667,13 +1685,12 @@ static void lio_get_regs(struct net_device *dev, int len = 0; struct octeon_device *oct = lio->oct_dev; - memset(regbuf, 0, OCT_ETHTOOL_REGDUMP_LEN); regs->version = OCT_ETHTOOL_REGSVER; switch (oct->chip_id) { - /* case OCTEON_CN73XX: Todo */ case OCTEON_CN68XX: case OCTEON_CN66XX: + memset(regbuf, 0, OCT_ETHTOOL_REGDUMP_LEN); len += cn6xxx_read_csr_reg(regbuf + len, oct); len += cn6xxx_read_config_reg(regbuf + len, oct); break; diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 1a584ebde42c..20d6942edf40 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -20,24 +20,12 @@ * Contact Cavium, Inc. for more information **********************************************************************/ #include <linux/version.h> -#include <linux/module.h> -#include <linux/crc32.h> -#include <linux/dma-mapping.h> #include <linux/pci.h> -#include <linux/pci_ids.h> -#include <linux/ip.h> -#include <net/ip.h> -#include <linux/ipv6.h> #include <linux/net_tstamp.h> #include <linux/if_vlan.h> #include <linux/firmware.h> -#include <linux/ethtool.h> #include <linux/ptp_clock_kernel.h> -#include <linux/types.h> -#include <linux/list.h> -#include <linux/workqueue.h> -#include <linux/interrupt.h> -#include "octeon_config.h" +#include <net/vxlan.h> #include "liquidio_common.h" #include "octeon_droq.h" #include "octeon_iq.h" @@ -48,7 +36,6 @@ #include "octeon_network.h" #include "cn66xx_regs.h" #include "cn66xx_device.h" -#include "cn68xx_regs.h" #include "cn68xx_device.h" #include "liquidio_image.h" @@ -251,8 +238,7 @@ static int lio_wait_for_oq_pkts(struct octeon_device *oct) for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) { if (!(oct->io_qmask.oq & (1ULL << i))) continue; - pkt_cnt += octeon_droq_check_hw_for_pkts(oct, - oct->droq[i]); + pkt_cnt += octeon_droq_check_hw_for_pkts(oct->droq[i]); } if (pkt_cnt > 0) { pending_pkts += pkt_cnt; @@ -507,7 +493,8 @@ static pci_ers_result_t liquidio_pcie_error_detected(struct pci_dev *pdev, * \brief mmio handler * @param pdev Pointer to PCI device */ -static pci_ers_result_t liquidio_pcie_mmio_enabled(struct pci_dev *pdev) +static pci_ers_result_t liquidio_pcie_mmio_enabled( + struct pci_dev *pdev __attribute__((unused))) { /* We should never hit this since we never ask for a reset for a Fatal * Error. We always return DISCONNECT in io_error above. @@ -523,7 +510,8 @@ static pci_ers_result_t liquidio_pcie_mmio_enabled(struct pci_dev *pdev) * Restart the card from scratch, as if from a cold-boot. Implementation * resembles the first-half of the octeon_resume routine. */ -static pci_ers_result_t liquidio_pcie_slot_reset(struct pci_dev *pdev) +static pci_ers_result_t liquidio_pcie_slot_reset( + struct pci_dev *pdev __attribute__((unused))) { /* We should never hit this since we never ask for a reset for a Fatal * Error. We always return DISCONNECT in io_error above. @@ -540,7 +528,7 @@ static pci_ers_result_t liquidio_pcie_slot_reset(struct pci_dev *pdev) * its OK to resume normal operation. Implementation resembles the * second-half of the octeon_resume routine. */ -static void liquidio_pcie_resume(struct pci_dev *pdev) +static void liquidio_pcie_resume(struct pci_dev *pdev __attribute__((unused))) { /* Nothing to be done here. */ } @@ -551,7 +539,8 @@ static void liquidio_pcie_resume(struct pci_dev *pdev) * @param pdev Pointer to PCI device * @param state state to suspend to */ -static int liquidio_suspend(struct pci_dev *pdev, pm_message_t state) +static int liquidio_suspend(struct pci_dev *pdev __attribute__((unused)), + pm_message_t state __attribute__((unused))) { return 0; } @@ -560,7 +549,7 @@ static int liquidio_suspend(struct pci_dev *pdev, pm_message_t state) * \brief called when resuming * @param pdev Pointer to PCI device */ -static int liquidio_resume(struct pci_dev *pdev) +static int liquidio_resume(struct pci_dev *pdev __attribute__((unused))) { return 0; } @@ -1104,7 +1093,9 @@ static int octeon_setup_interrupt(struct octeon_device *oct) * @param pdev PCI device structure * @param ent unused */ -static int liquidio_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +static int +liquidio_probe(struct pci_dev *pdev, + const struct pci_device_id *ent __attribute__((unused))) { struct octeon_device *oct_dev = NULL; struct handshake *hs; @@ -1267,7 +1258,7 @@ static void octeon_destroy_resources(struct octeon_device *oct) /* Nothing to be done here either */ break; - } /* end switch(oct->status) */ + } /* end switch (oct->status) */ tasklet_kill(&oct_priv->droq_tasklet); } @@ -1724,8 +1715,10 @@ static int liquidio_ptp_settime(struct ptp_clock_info *ptp, * @param rq request * @param on is it on */ -static int liquidio_ptp_enable(struct ptp_clock_info *ptp, - struct ptp_clock_request *rq, int on) +static int +liquidio_ptp_enable(struct ptp_clock_info *ptp __attribute__((unused)), + struct ptp_clock_request *rq __attribute__((unused)), + int on __attribute__((unused))) { return -EOPNOTSUPP; } @@ -1866,7 +1859,7 @@ static int octeon_setup_droq(struct octeon_device *oct, int q_no, int num_descs, * @param buf pointer to resp structure */ static void if_cfg_callback(struct octeon_device *oct, - u32 status, + u32 status __attribute__((unused)), void *buf) { struct octeon_soft_command *sc = (struct octeon_soft_command *)buf; @@ -1880,7 +1873,7 @@ static void if_cfg_callback(struct octeon_device *oct, if (resp->status) dev_err(&oct->pci_dev->dev, "nic if cfg instruction failed. Status: %llx\n", CVM_CAST64(resp->status)); - ACCESS_ONCE(ctx->cond) = 1; + WRITE_ONCE(ctx->cond, 1); snprintf(oct->fw_info.liquidio_firmware_version, 32, "%s", resp->cfg_info.liquidio_firmware_version); @@ -1900,7 +1893,8 @@ static void if_cfg_callback(struct octeon_device *oct, * @returns selected queue number */ static u16 select_q(struct net_device *dev, struct sk_buff *skb, - void *accel_priv, select_queue_fallback_t fallback) + void *accel_priv __attribute__((unused)), + select_queue_fallback_t fallback __attribute__((unused))) { u32 qindex = 0; struct lio *lio; @@ -1920,7 +1914,7 @@ static u16 select_q(struct net_device *dev, struct sk_buff *skb, * @param arg - farg registered in droq_ops */ static void -liquidio_push_packet(u32 octeon_id, +liquidio_push_packet(u32 octeon_id __attribute__((unused)), void *skbuff, u32 len, union octeon_rh *rh, @@ -2000,14 +1994,25 @@ liquidio_push_packet(u32 octeon_id, } skb->protocol = eth_type_trans(skb, skb->dev); - if ((netdev->features & NETIF_F_RXCSUM) && - (rh->r_dh.csum_verified == CNNIC_CSUM_VERIFIED)) + (((rh->r_dh.encap_on) && + (rh->r_dh.csum_verified & CNNIC_TUN_CSUM_VERIFIED)) || + (!(rh->r_dh.encap_on) && + (rh->r_dh.csum_verified & CNNIC_CSUM_VERIFIED)))) /* checksum has already been verified */ skb->ip_summed = CHECKSUM_UNNECESSARY; else skb->ip_summed = CHECKSUM_NONE; + /* Setting Encapsulation field on basis of status received + * from the firmware + */ + if (rh->r_dh.encap_on) { + skb->encapsulation = 1; + skb->csum_level = 1; + droq->stats.rx_vxlan++; + } + /* inbound VLAN tag */ if ((netdev->features & NETIF_F_HW_VLAN_CTAG_RX) && (rh->r_dh.vlan != 0)) { @@ -2120,7 +2125,7 @@ static int liquidio_napi_poll(struct napi_struct *napi, int budget) /** * \brief Setup input and output queues * @param octeon_dev octeon device - * @param net_device Net device + * @param ifidx Interface Index * * Note: Queues are with respect to the octeon device. Thus * an input queue is for egress packets, and output queues @@ -2331,7 +2336,6 @@ static int liquidio_stop(struct net_device *netdev) } dev_info(&oct->pci_dev->dev, "%s interface is stopped\n", netdev->name); - module_put(THIS_MODULE); return 0; } @@ -2342,6 +2346,7 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr) struct net_device *netdev = (struct net_device *)nctrl->netpndev; struct lio *lio = GET_LIO(netdev); struct octeon_device *oct = lio->oct_dev; + u8 *mac; switch (nctrl->ncmd.s.cmd) { case OCTNET_CMD_CHANGE_DEVFLAGS: @@ -2349,22 +2354,24 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr) break; case OCTNET_CMD_CHANGE_MACADDR: - /* If command is successful, change the MACADDR. */ - netif_info(lio, probe, lio->netdev, " MACAddr changed to 0x%llx\n", - CVM_CAST64(nctrl->udd[0])); - dev_info(&oct->pci_dev->dev, "%s MACAddr changed to 0x%llx\n", - netdev->name, CVM_CAST64(nctrl->udd[0])); - memcpy(netdev->dev_addr, ((u8 *)&nctrl->udd[0]) + 2, ETH_ALEN); + mac = ((u8 *)&nctrl->udd[0]) + 2; + netif_info(lio, probe, lio->netdev, + "%s %2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x\n", + "MACAddr changed to", mac[0], mac[1], + mac[2], mac[3], mac[4], mac[5]); break; case OCTNET_CMD_CHANGE_MTU: /* If command is successful, change the MTU. */ netif_info(lio, probe, lio->netdev, " MTU Changed from %d to %d\n", - netdev->mtu, nctrl->ncmd.s.param2); + netdev->mtu, nctrl->ncmd.s.param1); dev_info(&oct->pci_dev->dev, "%s MTU Changed from %d to %d\n", netdev->name, netdev->mtu, - nctrl->ncmd.s.param2); - netdev->mtu = nctrl->ncmd.s.param2; + nctrl->ncmd.s.param1); + rtnl_lock(); + netdev->mtu = nctrl->ncmd.s.param1; + call_netdevice_notifiers(NETDEV_CHANGEMTU, netdev); + rtnl_unlock(); break; case OCTNET_CMD_GPIO_ACCESS: @@ -2410,6 +2417,55 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr) netdev->name); break; + /* Case to handle "OCTNET_CMD_TNL_RX_CSUM_CTL" + * Command passed by NIC driver + */ + case OCTNET_CMD_TNL_RX_CSUM_CTL: + if (nctrl->ncmd.s.param1 == OCTNET_CMD_RXCSUM_ENABLE) { + netif_info(lio, probe, lio->netdev, + "%s RX Checksum Offload Enabled\n", + netdev->name); + } else if (nctrl->ncmd.s.param1 == + OCTNET_CMD_RXCSUM_DISABLE) { + netif_info(lio, probe, lio->netdev, + "%s RX Checksum Offload Disabled\n", + netdev->name); + } + break; + + /* Case to handle "OCTNET_CMD_TNL_TX_CSUM_CTL" + * Command passed by NIC driver + */ + case OCTNET_CMD_TNL_TX_CSUM_CTL: + if (nctrl->ncmd.s.param1 == OCTNET_CMD_TXCSUM_ENABLE) { + netif_info(lio, probe, lio->netdev, + "%s TX Checksum Offload Enabled\n", + netdev->name); + } else if (nctrl->ncmd.s.param1 == + OCTNET_CMD_TXCSUM_DISABLE) { + netif_info(lio, probe, lio->netdev, + "%s TX Checksum Offload Disabled\n", + netdev->name); + } + break; + + /* Case to handle "OCTNET_CMD_VXLAN_PORT_CONFIG" + * Command passed by NIC driver + */ + case OCTNET_CMD_VXLAN_PORT_CONFIG: + if (nctrl->ncmd.s.more == OCTNET_CMD_VXLAN_PORT_ADD) { + netif_info(lio, probe, lio->netdev, + "%s VxLAN Destination UDP PORT:%d ADDED\n", + netdev->name, + nctrl->ncmd.s.param1); + } else if (nctrl->ncmd.s.more == + OCTNET_CMD_VXLAN_PORT_DEL) { + netif_info(lio, probe, lio->netdev, + "%s VxLAN Destination UDP PORT:%d DELETED\n", + netdev->name, + nctrl->ncmd.s.param1); + } + break; case OCTNET_CMD_SET_FLOW_CTL: netif_info(lio, probe, lio->netdev, "Set RX/TX flow control parameters\n"); @@ -2465,7 +2521,7 @@ static void liquidio_set_mcast_list(struct net_device *netdev) struct octnic_ctrl_pkt nctrl; struct netdev_hw_addr *ha; u64 *mc; - int ret, i; + int ret; int mc_count = min(netdev_mc_count(netdev), MAX_OCTEON_MULTICAST_ADDR); memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt)); @@ -2481,7 +2537,6 @@ static void liquidio_set_mcast_list(struct net_device *netdev) nctrl.cb_fn = liquidio_link_ctrl_cmd_completion; /* copy all the addresses into the udd */ - i = 0; mc = &nctrl.udd[0]; netdev_for_each_mc_addr(ha, netdev) { *mc = 0; @@ -2604,18 +2659,16 @@ static int liquidio_change_mtu(struct net_device *netdev, int new_mtu) struct lio *lio = GET_LIO(netdev); struct octeon_device *oct = lio->oct_dev; struct octnic_ctrl_pkt nctrl; - int max_frm_size = new_mtu + OCTNET_FRM_HEADER_SIZE; int ret = 0; - /* Limit the MTU to make sure the ethernet packets are between 64 bytes - * and 65535 bytes + /* Limit the MTU to make sure the ethernet packets are between 68 bytes + * and 16000 bytes */ - if ((max_frm_size < OCTNET_MIN_FRM_SIZE) || - (max_frm_size > OCTNET_MAX_FRM_SIZE)) { + if ((new_mtu < LIO_MIN_MTU_SIZE) || + (new_mtu > LIO_MAX_MTU_SIZE)) { dev_err(&oct->pci_dev->dev, "Invalid MTU: %d\n", new_mtu); dev_err(&oct->pci_dev->dev, "Valid range %d and %d\n", - (OCTNET_MIN_FRM_SIZE - OCTNET_FRM_HEADER_SIZE), - (OCTNET_MAX_FRM_SIZE - OCTNET_FRM_HEADER_SIZE)); + LIO_MIN_MTU_SIZE, LIO_MAX_MTU_SIZE); return -EINVAL; } @@ -2646,7 +2699,7 @@ static int liquidio_change_mtu(struct net_device *netdev, int new_mtu) * @param ifr interface request * @param cmd command */ -static int hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) +static int hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr) { struct hwtstamp_config conf; struct lio *lio = GET_LIO(netdev); @@ -2707,7 +2760,7 @@ static int liquidio_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { switch (cmd) { case SIOCSHWTSTAMP: - return hwtstamp_ioctl(netdev, ifr, cmd); + return hwtstamp_ioctl(netdev, ifr); default: return -EOPNOTSUPP; } @@ -2886,12 +2939,12 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) /* defer sending if queue is full */ stats->tx_iq_busy++; netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n", - ndata.q_no); + lio->txq); return NETDEV_TX_BUSY; } } /* pr_info(" XMIT - valid Qs: %d, 1st Q no: %d, cpu: %d, q_no:%d\n", - * lio->linfo.num_txpciq, lio->txq, cpu, ndata.q_no ); + * lio->linfo.num_txpciq, lio->txq, cpu, ndata.q_no); */ ndata.datasize = skb->len; @@ -2899,9 +2952,14 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) cmdsetup.u64 = 0; cmdsetup.s.iq_no = iq_no; - if (skb->ip_summed == CHECKSUM_PARTIAL) - cmdsetup.s.transport_csum = 1; - + if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb->encapsulation) { + cmdsetup.s.tnl_csum = 1; + stats->tx_vxlan++; + } else { + cmdsetup.s.transport_csum = 1; + } + } if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; cmdsetup.s.timestamp = 1; @@ -2910,6 +2968,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) if (skb_shinfo(skb)->nr_frags == 0) { cmdsetup.s.u.datasize = skb->len; octnet_prepare_pci_cmd(oct, &ndata.cmd, &cmdsetup, tag); + /* Offload checksum calculation for TCP/UDP packets */ dptr = dma_map_single(&oct->pci_dev->dev, skb->data, @@ -3124,6 +3183,72 @@ static int liquidio_vlan_rx_kill_vid(struct net_device *netdev, return ret; } +/** Sending command to enable/disable RX checksum offload + * @param netdev pointer to network device + * @param command OCTNET_CMD_TNL_RX_CSUM_CTL + * @param rx_cmd_bit OCTNET_CMD_RXCSUM_ENABLE/ + * OCTNET_CMD_RXCSUM_DISABLE + * @returns SUCCESS or FAILURE + */ +int liquidio_set_rxcsum_command(struct net_device *netdev, int command, + u8 rx_cmd) +{ + struct lio *lio = GET_LIO(netdev); + struct octeon_device *oct = lio->oct_dev; + struct octnic_ctrl_pkt nctrl; + int ret = 0; + + nctrl.ncmd.u64 = 0; + nctrl.ncmd.s.cmd = command; + nctrl.ncmd.s.param1 = rx_cmd; + nctrl.iq_no = lio->linfo.txpciq[0].s.q_no; + nctrl.wait_time = 100; + nctrl.netpndev = (u64)netdev; + nctrl.cb_fn = liquidio_link_ctrl_cmd_completion; + + ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl); + if (ret < 0) { + dev_err(&oct->pci_dev->dev, + "DEVFLAGS RXCSUM change failed in core(ret:0x%x)\n", + ret); + } + return ret; +} + +/** Sending command to add/delete VxLAN UDP port to firmware + * @param netdev pointer to network device + * @param command OCTNET_CMD_VXLAN_PORT_CONFIG + * @param vxlan_port VxLAN port to be added or deleted + * @param vxlan_cmd_bit OCTNET_CMD_VXLAN_PORT_ADD, + * OCTNET_CMD_VXLAN_PORT_DEL + * @returns SUCCESS or FAILURE + */ +static int liquidio_vxlan_port_command(struct net_device *netdev, int command, + u16 vxlan_port, u8 vxlan_cmd_bit) +{ + struct lio *lio = GET_LIO(netdev); + struct octeon_device *oct = lio->oct_dev; + struct octnic_ctrl_pkt nctrl; + int ret = 0; + + nctrl.ncmd.u64 = 0; + nctrl.ncmd.s.cmd = command; + nctrl.ncmd.s.more = vxlan_cmd_bit; + nctrl.ncmd.s.param1 = vxlan_port; + nctrl.iq_no = lio->linfo.txpciq[0].s.q_no; + nctrl.wait_time = 100; + nctrl.netpndev = (u64)netdev; + nctrl.cb_fn = liquidio_link_ctrl_cmd_completion; + + ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl); + if (ret < 0) { + dev_err(&oct->pci_dev->dev, + "VxLAN port add/delete failed in core (ret:0x%x)\n", + ret); + } + return ret; +} + int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1) { struct lio *lio = GET_LIO(netdev); @@ -3204,9 +3329,48 @@ static int liquidio_set_features(struct net_device *netdev, liquidio_set_feature(netdev, OCTNET_CMD_LRO_DISABLE, OCTNIC_LROIPV4 | OCTNIC_LROIPV6); + /* Sending command to firmware to enable/disable RX checksum + * offload settings using ethtool + */ + if (!(netdev->features & NETIF_F_RXCSUM) && + (lio->enc_dev_capability & NETIF_F_RXCSUM) && + (features & NETIF_F_RXCSUM)) + liquidio_set_rxcsum_command(netdev, + OCTNET_CMD_TNL_RX_CSUM_CTL, + OCTNET_CMD_RXCSUM_ENABLE); + else if ((netdev->features & NETIF_F_RXCSUM) && + (lio->enc_dev_capability & NETIF_F_RXCSUM) && + !(features & NETIF_F_RXCSUM)) + liquidio_set_rxcsum_command(netdev, OCTNET_CMD_TNL_RX_CSUM_CTL, + OCTNET_CMD_RXCSUM_DISABLE); + return 0; } +static void liquidio_add_vxlan_port(struct net_device *netdev, + struct udp_tunnel_info *ti) +{ + if (ti->type != UDP_TUNNEL_TYPE_VXLAN) + return; + + liquidio_vxlan_port_command(netdev, + OCTNET_CMD_VXLAN_PORT_CONFIG, + htons(ti->port), + OCTNET_CMD_VXLAN_PORT_ADD); +} + +static void liquidio_del_vxlan_port(struct net_device *netdev, + struct udp_tunnel_info *ti) +{ + if (ti->type != UDP_TUNNEL_TYPE_VXLAN) + return; + + liquidio_vxlan_port_command(netdev, + OCTNET_CMD_VXLAN_PORT_CONFIG, + htons(ti->port), + OCTNET_CMD_VXLAN_PORT_DEL); +} + static struct net_device_ops lionetdevops = { .ndo_open = liquidio_open, .ndo_stop = liquidio_stop, @@ -3222,6 +3386,8 @@ static struct net_device_ops lionetdevops = { .ndo_do_ioctl = liquidio_ioctl, .ndo_fix_features = liquidio_fix_features, .ndo_set_features = liquidio_set_features, + .ndo_udp_tunnel_add = liquidio_add_vxlan_port, + .ndo_udp_tunnel_del = liquidio_del_vxlan_port, }; /** \brief Entry point for the liquidio module @@ -3323,7 +3489,6 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) struct liquidio_if_cfg_resp *resp; struct octdev_props *props; int retval, num_iqueues, num_oqueues; - int num_cpus = num_online_cpus(); union oct_nic_if_cfg if_cfg; unsigned int base_queue; unsigned int gmx_port_id; @@ -3365,14 +3530,11 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) gmx_port_id = CFG_GET_GMXID_NIC_IF(octeon_get_conf(octeon_dev), i); ifidx_or_pfnum = i; - if (num_iqueues > num_cpus) - num_iqueues = num_cpus; - if (num_oqueues > num_cpus) - num_oqueues = num_cpus; + dev_dbg(&octeon_dev->pci_dev->dev, "requesting config for interface %d, iqs %d, oqs %d\n", ifidx_or_pfnum, num_iqueues, num_oqueues); - ACCESS_ONCE(ctx->cond) = 0; + WRITE_ONCE(ctx->cond, 0); ctx->octeon_id = lio_get_device_id(octeon_dev); init_waitqueue_head(&ctx->wc); @@ -3390,7 +3552,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) sc->callback = if_cfg_callback; sc->callback_arg = sc; - sc->wait_time = 1000; + sc->wait_time = 3000; retval = octeon_send_soft_command(octeon_dev, sc); if (retval == IQ_SEND_FAILED) { @@ -3479,6 +3641,22 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) | NETIF_F_LRO; netif_set_gso_max_size(netdev, OCTNIC_GSO_MAX_SIZE); + /* Copy of transmit encapsulation capabilities: + * TSO, TSO6, Checksums for this device + */ + lio->enc_dev_capability = NETIF_F_IP_CSUM + | NETIF_F_IPV6_CSUM + | NETIF_F_GSO_UDP_TUNNEL + | NETIF_F_HW_CSUM | NETIF_F_SG + | NETIF_F_RXCSUM + | NETIF_F_TSO | NETIF_F_TSO6 + | NETIF_F_LRO; + + netdev->hw_enc_features = (lio->enc_dev_capability & + ~NETIF_F_LRO); + + lio->dev_capability |= NETIF_F_GSO_UDP_TUNNEL; + netdev->vlan_features = lio->dev_capability; /* Add any unchangeable hw features */ lio->dev_capability |= NETIF_F_HW_VLAN_CTAG_FILTER | @@ -3538,8 +3716,8 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) octeon_dev->priv_flags = 0x0; if (netdev->features & NETIF_F_LRO) - liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE, - OCTNIC_LROIPV4 | OCTNIC_LROIPV6); + liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE, + OCTNIC_LROIPV4 | OCTNIC_LROIPV6); liquidio_set_feature(netdev, OCTNET_CMD_ENABLE_VLAN_FILTER, 0); @@ -3561,6 +3739,15 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) ifstate_set(lio, LIO_IFSTATE_REGISTERED); + /* Sending command to firmware to enable Rx checksum offload + * by default at the time of setup of Liquidio driver for + * this device + */ + liquidio_set_rxcsum_command(netdev, OCTNET_CMD_TNL_RX_CSUM_CTL, + OCTNET_CMD_RXCSUM_ENABLE); + liquidio_set_feature(netdev, OCTNET_CMD_TNL_TX_CSUM_CTL, + OCTNET_CMD_TXCSUM_ENABLE); + dev_dbg(&octeon_dev->pci_dev->dev, "NIC ifidx:%d Setup successful\n", i); @@ -3771,6 +3958,7 @@ static int octeon_device_init(struct octeon_device *octeon_dev) /* Release any previously allocated queues */ for (j = 0; j < octeon_dev->num_oqs; j++) octeon_delete_droq(octeon_dev, j); + return 1; } atomic_set(&octeon_dev->status, OCT_DEV_DROQ_INIT_DONE); @@ -3793,7 +3981,8 @@ static int octeon_device_init(struct octeon_device *octeon_dev) /* Setup the interrupt handler and record the INT SUM register address */ - octeon_setup_interrupt(octeon_dev); + if (octeon_setup_interrupt(octeon_dev)) + return 1; /* Enable Octeon device interrupts */ octeon_dev->fn_list.enable_interrupt(octeon_dev->chip); diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h index 5aa01f427d4a..199a8b9c7dc5 100644 --- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h +++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h @@ -34,6 +34,7 @@ #define LIQUIDIO_MICRO_VERSION ".1" #define LIQUIDIO_PACKAGE "" #define LIQUIDIO_VERSION "1.4.1" + #define CONTROL_IQ 0 /** Tag types used by Octeon cores in its work. */ enum octeon_tag_type { @@ -216,6 +217,13 @@ static inline void add_sg_size(struct octeon_sg_entry *sg_entry, #define OCTNET_CMD_ENABLE_VLAN_FILTER 0x16 #define OCTNET_CMD_ADD_VLAN_FILTER 0x17 #define OCTNET_CMD_DEL_VLAN_FILTER 0x18 +#define OCTNET_CMD_VXLAN_PORT_CONFIG 0x19 +#define OCTNET_CMD_VXLAN_PORT_ADD 0x0 +#define OCTNET_CMD_VXLAN_PORT_DEL 0x1 +#define OCTNET_CMD_RXCSUM_ENABLE 0x0 +#define OCTNET_CMD_RXCSUM_DISABLE 0x1 +#define OCTNET_CMD_TXCSUM_ENABLE 0x0 +#define OCTNET_CMD_TXCSUM_DISABLE 0x1 /* RX(packets coming from wire) Checksum verification flags */ /* TCP/UDP csum */ @@ -288,7 +296,7 @@ union octnet_cmd { #define OCTNET_CMD_SIZE (sizeof(union octnet_cmd)) -/* Instruction Header (DPI - CN23xx) - for OCTEON-III models */ +/* Instruction Header(DPI) - for OCTEON-III models */ struct octeon_instr_ih3 { #ifdef __BIG_ENDIAN_BITFIELD @@ -338,7 +346,7 @@ struct octeon_instr_ih3 { #endif }; -/* Optional PKI Instruction Header(PKI IH) - for OCTEON CN23XX models */ +/* Optional PKI Instruction Header(PKI IH) - for OCTEON-III models */ /** BIG ENDIAN format. */ struct octeon_instr_pki_ih3 { #ifdef __BIG_ENDIAN_BITFIELD @@ -533,6 +541,8 @@ union octeon_rh { u64 priority:3; u64 csum_verified:3; /** checksum verified. */ u64 has_hwtstamp:1; /** Has hardware timestamp. 1 = yes. */ + u64 encap_on:1; + u64 has_hash:1; /** Has hash (rth or rss). 1 = yes. */ } r_dh; struct { u64 opcode:4; @@ -542,7 +552,8 @@ union octeon_rh { u64 num_gmx_ports:8; u64 max_nic_ports:10; u64 app_cap_flags:4; - u64 app_mode:16; + u64 app_mode:8; + u64 pkind:8; } r_core_drv_init; struct { u64 opcode:4; @@ -562,6 +573,8 @@ union octeon_rh { u64 opcode:4; } r; struct { + u64 has_hash:1; /** Has hash (rth or rss). 1 = yes. */ + u64 encap_on:1; u64 has_hwtstamp:1; /** 1 = has hwtstamp */ u64 csum_verified:3; /** checksum verified. */ u64 priority:3; @@ -572,7 +585,8 @@ union octeon_rh { u64 opcode:4; } r_dh; struct { - u64 app_mode:16; + u64 pkind:8; + u64 app_mode:8; u64 app_cap_flags:4; u64 max_nic_ports:10; u64 num_gmx_ports:8; @@ -630,9 +644,11 @@ union oct_link_status { u64 autoneg:1; u64 if_mode:5; u64 pause:1; - u64 reserved:16; + u64 flashing:1; + u64 reserved:15; #else - u64 reserved:16; + u64 reserved:15; + u64 flashing:1; u64 pause:1; u64 if_mode:5; u64 autoneg:1; @@ -736,6 +752,8 @@ struct nic_rx_stats { u64 fw_err_pko; u64 fw_err_link; u64 fw_err_drop; + u64 fw_rx_vxlan; + u64 fw_rx_vxlan_err; /* LRO */ u64 fw_lro_pkts; /* Number of packets that are LROed */ @@ -776,6 +794,7 @@ struct nic_tx_stats { u64 fw_err_tso; u64 fw_tso; /* number of tso requests */ u64 fw_tso_fwd; /* number of packets segmented in tso */ + u64 fw_tx_vxlan; }; struct oct_link_stats { @@ -856,9 +875,9 @@ union oct_nic_if_cfg { u64 num_iqueues:16; u64 num_oqueues:16; u64 gmx_port_id:8; - u64 reserved:8; + u64 vf_id:8; #else - u64 reserved:8; + u64 vf_id:8; u64 gmx_port_id:8; u64 num_oqueues:16; u64 num_iqueues:16; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_config.h b/drivers/net/ethernet/cavium/liquidio/octeon_config.h index 4b8c948400be..b3396e3a8bab 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_config.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_config.h @@ -226,7 +226,7 @@ struct octeon_oq_config { */ u64 refill_threshold:16; - /** If set, the Output queue uses info-pointer mode. (Default: 1 ) */ + /** If set, the Output queue uses info-pointer mode. (Default: 1) */ u64 info_ptr:32; /* Max number of OQs available */ @@ -236,7 +236,7 @@ struct octeon_oq_config { /* Max number of OQs available */ u64 max_oqs:8; - /** If set, the Output queue uses info-pointer mode. (Default: 1 ) */ + /** If set, the Output queue uses info-pointer mode. (Default: 1) */ u64 info_ptr:32; /** The number of buffers that were consumed during packet processing by diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_console.c b/drivers/net/ethernet/cavium/liquidio/octeon_console.c index 466147e409c9..bbb50ea66f16 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_console.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_console.c @@ -23,27 +23,14 @@ /** * @file octeon_console.c */ -#include <linux/version.h> -#include <linux/types.h> -#include <linux/list.h> -#include <linux/interrupt.h> #include <linux/pci.h> -#include <linux/kthread.h> #include <linux/netdevice.h> -#include "octeon_config.h" #include "liquidio_common.h" #include "octeon_droq.h" #include "octeon_iq.h" #include "response_manager.h" #include "octeon_device.h" -#include "octeon_nic.h" #include "octeon_main.h" -#include "octeon_network.h" -#include "cn66xx_regs.h" -#include "cn66xx_device.h" -#include "cn68xx_regs.h" -#include "cn68xx_device.h" -#include "liquidio_image.h" #include "octeon_mem_ops.h" static void octeon_remote_lock(void); @@ -51,6 +38,8 @@ static void octeon_remote_unlock(void); static u64 cvmx_bootmem_phy_named_block_find(struct octeon_device *oct, const char *name, u32 flags); +static int octeon_console_read(struct octeon_device *oct, u32 console_num, + char *buffer, u32 buf_size); #define MIN(a, b) min((a), (b)) #define CAST_ULL(v) ((u64)(v)) @@ -170,8 +159,8 @@ struct octeon_pci_console_desc { offsetof(struct cvmx_bootmem_desc, field), \ SIZEOF_FIELD(struct cvmx_bootmem_desc, field)) -#define __cvmx_bootmem_lock(flags) -#define __cvmx_bootmem_unlock(flags) +#define __cvmx_bootmem_lock(flags) (flags = flags) +#define __cvmx_bootmem_unlock(flags) (flags = flags) /** * This macro returns a member of the @@ -234,7 +223,7 @@ static void CVMX_BOOTMEM_NAMED_GET_NAME(struct octeon_device *oct, u32 len) { addr += offsetof(struct cvmx_bootmem_named_block_desc, name); - octeon_pci_read_core_mem(oct, addr, str, len); + octeon_pci_read_core_mem(oct, addr, (u8 *)str, len); str[len] = 0; } @@ -323,6 +312,9 @@ static u64 cvmx_bootmem_phy_named_block_find(struct octeon_device *oct, if (name && named_size) { char *name_tmp = kmalloc(name_length + 1, GFP_KERNEL); + if (!name_tmp) + break; + CVMX_BOOTMEM_NAMED_GET_NAME(oct, named_addr, name_tmp, name_length); @@ -383,7 +375,7 @@ static void octeon_remote_unlock(void) int octeon_console_send_cmd(struct octeon_device *oct, char *cmd_str, u32 wait_hundredths) { - u32 len = strlen(cmd_str); + u32 len = (u32)strlen(cmd_str); dev_dbg(&oct->pci_dev->dev, "sending \"%s\" to bootloader\n", cmd_str); @@ -440,8 +432,7 @@ int octeon_wait_for_bootloader(struct octeon_device *oct, } static void octeon_console_handle_result(struct octeon_device *oct, - size_t console_num, - char *buffer, s32 bytes_read) + size_t console_num) { struct octeon_console *console; @@ -492,7 +483,7 @@ static void check_console(struct work_struct *work) struct octeon_console *console; struct cavium_wk *wk = (struct cavium_wk *)work; struct octeon_device *oct = (struct octeon_device *)wk->ctxptr; - size_t console_num = wk->ctxul; + u32 console_num = (u32)wk->ctxul; u32 delay; console = &oct->console[console_num]; @@ -505,20 +496,17 @@ static void check_console(struct work_struct *work) */ bytes_read = octeon_console_read(oct, console_num, console_buffer, - sizeof(console_buffer) - 1, 0); + sizeof(console_buffer) - 1); if (bytes_read > 0) { total_read += bytes_read; - if (console->waiting) { - octeon_console_handle_result(oct, console_num, - console_buffer, - bytes_read); - } + if (console->waiting) + octeon_console_handle_result(oct, console_num); if (octeon_console_debug_enabled(console_num)) { output_console_line(oct, console, console_num, console_buffer, bytes_read); } } else if (bytes_read < 0) { - dev_err(&oct->pci_dev->dev, "Error reading console %lu, ret=%d\n", + dev_err(&oct->pci_dev->dev, "Error reading console %u, ret=%d\n", console_num, bytes_read); } @@ -530,7 +518,7 @@ static void check_console(struct work_struct *work) */ if (octeon_console_debug_enabled(console_num) && (total_read == 0) && (console->leftover[0])) { - dev_info(&oct->pci_dev->dev, "%lu: %s\n", + dev_info(&oct->pci_dev->dev, "%u: %s\n", console_num, console->leftover); console->leftover[0] = '\0'; } @@ -675,8 +663,8 @@ static inline int octeon_console_avail_bytes(u32 buffer_size, octeon_console_free_bytes(buffer_size, wr_idx, rd_idx); } -int octeon_console_read(struct octeon_device *oct, u32 console_num, - char *buffer, u32 buf_size, u32 flags) +static int octeon_console_read(struct octeon_device *oct, u32 console_num, + char *buffer, u32 buf_size) { int bytes_to_read; u32 rd_idx, wr_idx; @@ -712,7 +700,7 @@ int octeon_console_read(struct octeon_device *oct, u32 console_num, bytes_to_read = console->buffer_size - rd_idx; octeon_pci_read_core_mem(oct, console->output_base_addr + rd_idx, - buffer, bytes_to_read); + (u8 *)buffer, bytes_to_read); octeon_write_device_mem32(oct, console->addr + offsetof(struct octeon_pci_console, output_read_index), diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c index 337220721632..0eb504a4379a 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c @@ -19,27 +19,19 @@ * This file may also be available under a different license from Cavium. * Contact Cavium, Inc. for more information **********************************************************************/ -#include <linux/types.h> -#include <linux/list.h> -#include <linux/interrupt.h> #include <linux/pci.h> #include <linux/crc32.h> -#include <linux/kthread.h> #include <linux/netdevice.h> #include <linux/vmalloc.h> -#include "octeon_config.h" #include "liquidio_common.h" #include "octeon_droq.h" #include "octeon_iq.h" #include "response_manager.h" #include "octeon_device.h" -#include "octeon_nic.h" #include "octeon_main.h" #include "octeon_network.h" #include "cn66xx_regs.h" #include "cn66xx_device.h" -#include "cn68xx_regs.h" -#include "cn68xx_device.h" #include "liquidio_image.h" #include "octeon_mem_ops.h" @@ -448,10 +440,10 @@ static struct octeon_config_ptr { }; static char oct_dev_state_str[OCT_DEV_STATES + 1][32] = { - "BEGIN", "PCI-MAP-DONE", "DISPATCH-INIT-DONE", + "BEGIN", "PCI-MAP-DONE", "DISPATCH-INIT-DONE", "IQ-INIT-DONE", "SCBUFF-POOL-INIT-DONE", "RESPLIST-INIT-DONE", "DROQ-INIT-DONE", "IO-QUEUES-INIT-DONE", "CONSOLE-INIT-DONE", - "HOST-READY", "CORE-READY", "RUNNING", "IN-RESET", + "HOST-READY", "CORE-READY", "RUNNING", "IN-RESET", "INVALID" }; @@ -652,16 +644,16 @@ int octeon_download_firmware(struct octeon_device *oct, const u8 *data, void octeon_free_device_mem(struct octeon_device *oct) { - u32 i; + int i; for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) { - /* could check mask as well */ - vfree(oct->droq[i]); + if (oct->io_qmask.oq & (1ULL << i)) + vfree(oct->droq[i]); } for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) { - /* could check mask as well */ - vfree(oct->instr_queue[i]); + if (oct->io_qmask.iq & (1ULL << i)) + vfree(oct->instr_queue[i]); } i = oct->octeon_id; @@ -752,13 +744,11 @@ struct octeon_device *octeon_allocate_device(u32 pci_id, /* this function is only for setting up the first queue */ int octeon_setup_instr_queues(struct octeon_device *oct) { - u32 num_iqs = 0; u32 num_descs = 0; u32 iq_no = 0; union oct_txpciq txpciq; int numa_node = cpu_to_node(iq_no % num_online_cpus()); - num_iqs = 1; /* this causes queue 0 to be default queue */ if (OCTEON_CN6XXX(oct)) num_descs = @@ -793,13 +783,11 @@ int octeon_setup_instr_queues(struct octeon_device *oct) int octeon_setup_output_queues(struct octeon_device *oct) { - u32 num_oqs = 0; u32 num_descs = 0; u32 desc_size = 0; u32 oq_no = 0; int numa_node = cpu_to_node(oq_no % num_online_cpus()); - num_oqs = 1; /* this causes queue 0 to be default queue */ if (OCTEON_CN6XXX(oct)) { num_descs = @@ -1019,79 +1007,6 @@ octeon_register_dispatch_fn(struct octeon_device *oct, return 0; } -/* octeon_unregister_dispatch_fn - * Parameters: - * oct - octeon device - * opcode - driver should unregister the function for this opcode - * subcode - driver should unregister the function for this subcode - * Description: - * Unregister the function set for this opcode+subcode. - * Returns: - * Success: 0 - * Failure: 1 - * Locks: - * No locks are held. - */ -int -octeon_unregister_dispatch_fn(struct octeon_device *oct, u16 opcode, - u16 subcode) -{ - int retval = 0; - u32 idx; - struct list_head *dispatch, *dfree = NULL, *tmp2; - u16 combined_opcode = OPCODE_SUBCODE(opcode, subcode); - - idx = combined_opcode & OCTEON_OPCODE_MASK; - - spin_lock_bh(&oct->dispatch.lock); - - if (oct->dispatch.count == 0) { - spin_unlock_bh(&oct->dispatch.lock); - dev_err(&oct->pci_dev->dev, - "No dispatch functions registered for this device\n"); - return 1; - } - - if (oct->dispatch.dlist[idx].opcode == combined_opcode) { - dispatch = &oct->dispatch.dlist[idx].list; - if (dispatch->next != dispatch) { - dispatch = dispatch->next; - oct->dispatch.dlist[idx].opcode = - ((struct octeon_dispatch *)dispatch)->opcode; - oct->dispatch.dlist[idx].dispatch_fn = - ((struct octeon_dispatch *) - dispatch)->dispatch_fn; - oct->dispatch.dlist[idx].arg = - ((struct octeon_dispatch *)dispatch)->arg; - list_del(dispatch); - dfree = dispatch; - } else { - oct->dispatch.dlist[idx].opcode = 0; - oct->dispatch.dlist[idx].dispatch_fn = NULL; - oct->dispatch.dlist[idx].arg = NULL; - } - } else { - retval = 1; - list_for_each_safe(dispatch, tmp2, - &(oct->dispatch.dlist[idx]. - list)) { - if (((struct octeon_dispatch *)dispatch)->opcode == - combined_opcode) { - list_del(dispatch); - dfree = dispatch; - retval = 0; - } - } - } - - if (!retval) - oct->dispatch.count--; - - spin_unlock_bh(&oct->dispatch.lock); - vfree(dfree); - return retval; -} - int octeon_core_drv_init(struct octeon_recv_info *recv_info, void *buf) { u32 i; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h index b4e566dea008..01edfb404346 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h @@ -221,7 +221,7 @@ struct octeon_fn_list { /* Structure for named memory blocks * Number of descriptors - * available can be changed without affecting compatiblity, + * available can be changed without affecting compatibility, * but name length changes require a bump in the bootmem * descriptor version * Note: This structure must be naturally 64 bit aligned, as a single @@ -254,7 +254,7 @@ struct oct_fw_info { struct cavium_wk { struct delayed_work work; void *ctxptr; - size_t ctxul; + u64 ctxul; }; struct cavium_wq { @@ -585,8 +585,7 @@ int octeon_add_console(struct octeon_device *oct, u32 console_num); int octeon_console_write(struct octeon_device *oct, u32 console_num, char *buffer, u32 write_request_size, u32 flags); int octeon_console_write_avail(struct octeon_device *oct, u32 console_num); -int octeon_console_read(struct octeon_device *oct, u32 console_num, - char *buffer, u32 buf_size, u32 flags); + int octeon_console_read_avail(struct octeon_device *oct, u32 console_num); /** Removes all attached consoles. */ diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c index d9bb2f7e0836..e0afe4c1fd01 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c @@ -19,30 +19,18 @@ * This file may also be available under a different license from Cavium. * Contact Cavium, Inc. for more information **********************************************************************/ -#include <linux/version.h> -#include <linux/types.h> -#include <linux/list.h> #include <linux/pci.h> -#include <linux/kthread.h> #include <linux/netdevice.h> #include <linux/vmalloc.h> -#include "octeon_config.h" #include "liquidio_common.h" #include "octeon_droq.h" #include "octeon_iq.h" #include "response_manager.h" #include "octeon_device.h" -#include "octeon_nic.h" #include "octeon_main.h" #include "octeon_network.h" #include "cn66xx_regs.h" #include "cn66xx_device.h" -#include "cn68xx_regs.h" -#include "cn68xx_device.h" -#include "liquidio_image.h" -#include "octeon_mem_ops.h" - -/* #define CAVIUM_ONLY_PERF_MODE */ #define CVM_MIN(d1, d2) (((d1) < (d2)) ? (d1) : (d2)) #define CVM_MAX(d1, d2) (((d1) > (d2)) ? (d1) : (d2)) @@ -104,8 +92,12 @@ static inline void *octeon_get_dispatch_arg(struct octeon_device *octeon_dev, return fn_arg; } -u32 octeon_droq_check_hw_for_pkts(struct octeon_device *oct, - struct octeon_droq *droq) +/** Check for packets on Droq. This function should be called with + * lock held. + * @param droq - Droq on which count is checked. + * @return Returns packet count. + */ +u32 octeon_droq_check_hw_for_pkts(struct octeon_droq *droq) { u32 pkt_count = 0; @@ -196,7 +188,6 @@ octeon_droq_setup_ring_buffers(struct octeon_device *oct, droq->recv_buf_list[i].buffer = buf; droq->recv_buf_list[i].data = get_rbd(buf); - droq->info_list[i].length = 0; /* map ring buffers into memory */ @@ -569,7 +560,9 @@ octeon_droq_dispatch_pkt(struct octeon_device *oct, droq->stats.dropped_nomem++; } } else { - dev_err(&oct->pci_dev->dev, "DROQ: No dispatch function\n"); + dev_err(&oct->pci_dev->dev, "DROQ: No dispatch function (opcode %u/%u)\n", + (unsigned int)rh->r.opcode, + (unsigned int)rh->r.subcode); droq->stats.dropped_nodispatch++; } /* else (dispatch_fn ... */ @@ -654,6 +647,7 @@ octeon_droq_fast_process_packets(struct octeon_device *oct, pg_info->page = NULL; droq->recv_buf_list[droq->read_idx].buffer = NULL; + INCR_INDEX_BY1(droq->read_idx, droq->max_count); droq->refill_count++; } else { @@ -748,7 +742,7 @@ octeon_droq_process_packets(struct octeon_device *oct, if (pkt_count > budget) pkt_count = budget; - /* Grab the lock */ + /* Grab the droq lock */ spin_lock(&droq->lock); pkts_processed = octeon_droq_fast_process_packets(oct, droq, pkt_count); @@ -810,7 +804,7 @@ octeon_droq_process_poll_pkts(struct octeon_device *oct, total_pkts_processed += pkts_processed; - octeon_droq_check_hw_for_pkts(oct, droq); + octeon_droq_check_hw_for_pkts(droq); } spin_unlock(&droq->lock); @@ -834,18 +828,6 @@ octeon_process_droq_poll_cmd(struct octeon_device *oct, u32 q_no, int cmd, u32 arg) { struct octeon_droq *droq; - struct octeon_config *oct_cfg = NULL; - - oct_cfg = octeon_get_conf(oct); - - if (!oct_cfg) - return -EINVAL; - - if (q_no >= CFG_GET_OQ_MAX_Q(oct_cfg)) { - dev_err(&oct->pci_dev->dev, "%s: droq id (%d) exceeds MAX (%d)\n", - __func__, q_no, (oct->num_oqs - 1)); - return -EINVAL; - } droq = oct->droq[q_no]; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h index 1ca9c4f05702..5a6fb9113bbd 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h @@ -121,6 +121,9 @@ struct oct_droq_stats { /** Num of Packets dropped due to receive path failures. */ u64 rx_dropped; + /** Num of vxlan packets received; */ + u64 rx_vxlan; + /** Num of failures of recv_buffer_alloc() */ u64 rx_alloc_failure; @@ -413,24 +416,9 @@ int octeon_register_dispatch_fn(struct octeon_device *oct, u16 subcode, octeon_dispatch_fn_t fn, void *fn_arg); -/** Remove registration for an opcode/subcode. This will delete the mapping for - * an opcode/subcode. The dispatch function will be unregistered and will no - * longer be called if a packet with the opcode/subcode arrives in the driver - * output queues. - * @param oct - the octeon device to unregister from. - * @param opcode - the opcode to be unregistered. - * @param subcode - the subcode to be unregistered. - * - * @return Success: 0; Failure: 1 - */ -int octeon_unregister_dispatch_fn(struct octeon_device *oct, - u16 opcode, - u16 subcode); - void octeon_droq_print_stats(void); -u32 octeon_droq_check_hw_for_pkts(struct octeon_device *oct, - struct octeon_droq *droq); +u32 octeon_droq_check_hw_for_pkts(struct octeon_droq *droq); int octeon_create_droq(struct octeon_device *oct, u32 q_no, u32 num_descs, u32 desc_size, void *app_ctx); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h index caa2b4f30717..ff4b1d6f007b 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h @@ -66,6 +66,7 @@ struct oct_iq_stats { u64 tx_dropped;/**< Numof pkts dropped dueto xmitpath errors. */ u64 tx_tot_bytes;/**< Total count of bytes sento to network. */ u64 tx_gso; /* count of tso */ + u64 tx_vxlan; /* tunnel */ u64 tx_dmamap_fail; u64 tx_restart; /*u64 tx_timeout_count;*/ @@ -98,7 +99,7 @@ struct octeon_instr_queue { u32 rsvd:17; - /* Controls the periodic flushing of iq */ + /* Controls whether extra flushing of IQ is done on Tx */ u32 do_auto_flush:1; u32 status:8; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h index 0ff3efc67b84..bc14e4c27332 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h @@ -174,7 +174,7 @@ sleep_cond(wait_queue_head_t *wait_queue, int *condition) init_waitqueue_entry(&we, current); add_wait_queue(wait_queue, &we); - while (!(ACCESS_ONCE(*condition))) { + while (!(READ_ONCE(*condition))) { set_current_state(TASK_INTERRUPTIBLE); if (signal_pending(current)) goto out; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c b/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c index 5aecef870377..95a4bbedf557 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c @@ -19,43 +19,29 @@ * This file may also be available under a different license from Cavium. * Contact Cavium, Inc. for more information **********************************************************************/ -#include <linux/version.h> -#include <linux/types.h> -#include <linux/list.h> -#include <linux/interrupt.h> #include <linux/pci.h> -#include <linux/kthread.h> #include <linux/netdevice.h> -#include "octeon_config.h" #include "liquidio_common.h" #include "octeon_droq.h" #include "octeon_iq.h" #include "response_manager.h" #include "octeon_device.h" -#include "octeon_nic.h" -#include "octeon_main.h" -#include "octeon_network.h" -#include "cn66xx_regs.h" -#include "cn66xx_device.h" -#include "cn68xx_regs.h" -#include "cn68xx_device.h" -#include "liquidio_image.h" -#include "octeon_mem_ops.h" #define MEMOPS_IDX MAX_BAR1_MAP_INDEX +#ifdef __BIG_ENDIAN_BITFIELD static inline void -octeon_toggle_bar1_swapmode(struct octeon_device *oct __attribute__((unused)), - u32 idx __attribute__((unused))) +octeon_toggle_bar1_swapmode(struct octeon_device *oct, u32 idx) { -#ifdef __BIG_ENDIAN_BITFIELD u32 mask; mask = oct->fn_list.bar1_idx_read(oct, idx); mask = (mask & 0x2) ? (mask & ~2) : (mask | 2); oct->fn_list.bar1_idx_write(oct, idx, mask); -#endif } +#else +#define octeon_toggle_bar1_swapmode(oct, idx) (oct = oct) +#endif static void octeon_pci_fastwrite(struct octeon_device *oct, u8 __iomem *mapped_addr, diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h index b481edc56c6e..fb820dc7fcb7 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h @@ -30,6 +30,9 @@ #include <linux/dma-mapping.h> #include <linux/ptp_clock_kernel.h> +#define LIO_MAX_MTU_SIZE (OCTNET_MAX_FRM_SIZE - OCTNET_FRM_HEADER_SIZE) +#define LIO_MIN_MTU_SIZE 68 + struct oct_nic_stats_resp { u64 rh; struct oct_link_stats stats; @@ -96,6 +99,12 @@ struct lio { /** Copy of Interface capabilities: TSO, TSO6, LRO, Chescksums . */ u64 dev_capability; + /* Copy of transmit encapsulation capabilities: + * TSO, TSO6, Checksums for this device for Kernel + * 3.10.0 onwards + */ + u64 enc_dev_capability; + /** Copy of beacaon reg in phy */ u32 phy_beacon_val; @@ -115,7 +124,6 @@ struct lio { /* work queue for txq status */ struct cavium_wq txq_status_wq; - }; #define LIO_SIZE (sizeof(struct lio)) @@ -351,7 +359,7 @@ lio_map_ring_info(struct octeon_droq *droq, u32 i) dma_addr = dma_map_single(&oct->pci_dev->dev, &droq->info_list[i], OCT_DROQ_INFO_SIZE, DMA_FROM_DEVICE); - BUG_ON(dma_mapping_error(&oct->pci_dev->dev, dma_addr)); + WARN_ON(dma_mapping_error(&oct->pci_dev->dev, dma_addr)); return (u64)dma_addr; } diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c index 36f1970a860e..166727be928f 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c @@ -19,14 +19,9 @@ * This file may also be available under a different license from Cavium. * Contact Cavium, Inc. for more information **********************************************************************/ -#include <linux/version.h> -#include <linux/types.h> -#include <linux/list.h> #include <linux/interrupt.h> #include <linux/pci.h> -#include <linux/kthread.h> #include <linux/netdevice.h> -#include "octeon_config.h" #include "liquidio_common.h" #include "octeon_droq.h" #include "octeon_iq.h" @@ -34,13 +29,6 @@ #include "octeon_device.h" #include "octeon_nic.h" #include "octeon_main.h" -#include "octeon_network.h" -#include "cn66xx_regs.h" -#include "cn66xx_device.h" -#include "cn68xx_regs.h" -#include "cn68xx_device.h" -#include "liquidio_image.h" -#include "octeon_mem_ops.h" void * octeon_alloc_soft_command_resp(struct octeon_device *oct, diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c index 7eafa75ac095..d32492f185ff 100644 --- a/drivers/net/ethernet/cavium/liquidio/request_manager.c +++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c @@ -19,28 +19,17 @@ * This file may also be available under a different license from Cavium. * Contact Cavium, Inc. for more information **********************************************************************/ -#include <linux/version.h> -#include <linux/types.h> -#include <linux/list.h> -#include <linux/interrupt.h> #include <linux/pci.h> -#include <linux/kthread.h> #include <linux/netdevice.h> #include <linux/vmalloc.h> -#include "octeon_config.h" #include "liquidio_common.h" #include "octeon_droq.h" #include "octeon_iq.h" #include "response_manager.h" #include "octeon_device.h" -#include "octeon_nic.h" #include "octeon_main.h" #include "octeon_network.h" -#include "cn66xx_regs.h" #include "cn66xx_device.h" -#include "cn68xx_regs.h" -#include "cn68xx_device.h" -#include "liquidio_image.h" #define INCR_INSTRQUEUE_PKT_COUNT(octeon_dev_ptr, iq_no, field, count) \ (octeon_dev_ptr->instr_queue[iq_no]->stats.field += count) @@ -301,40 +290,8 @@ static inline void __copy_cmd_into_iq(struct octeon_instr_queue *iq, memcpy(iqptr, cmd, cmdsize); } -static inline int -__post_command(struct octeon_device *octeon_dev __attribute__((unused)), - struct octeon_instr_queue *iq, - u32 force_db __attribute__((unused)), u8 *cmd) -{ - u32 index = -1; - - /* This ensures that the read index does not wrap around to the same - * position if queue gets full before Octeon could fetch any instr. - */ - if (atomic_read(&iq->instr_pending) >= (s32)(iq->max_count - 1)) - return -1; - - __copy_cmd_into_iq(iq, cmd); - - /* "index" is returned, host_write_index is modified. */ - index = iq->host_write_index; - INCR_INDEX_BY1(iq->host_write_index, iq->max_count); - iq->fill_cnt++; - - /* Flush the command into memory. We need to be sure the data is in - * memory before indicating that the instruction is pending. - */ - wmb(); - - atomic_inc(&iq->instr_pending); - - return index; -} - static inline struct iq_post_status -__post_command2(struct octeon_device *octeon_dev __attribute__((unused)), - struct octeon_instr_queue *iq, - u32 force_db __attribute__((unused)), u8 *cmd) +__post_command2(struct octeon_instr_queue *iq, u8 *cmd) { struct iq_post_status st; @@ -392,6 +349,7 @@ __add_to_request_list(struct octeon_instr_queue *iq, iq->request_list[idx].reqtype = reqtype; } +/* Can only run in process context */ int lio_process_iq_request_list(struct octeon_device *oct, struct octeon_instr_queue *iq, u32 napi_budget) @@ -403,6 +361,7 @@ lio_process_iq_request_list(struct octeon_device *oct, unsigned int pkts_compl = 0, bytes_compl = 0; struct octeon_soft_command *sc; struct octeon_instr_irh *irh; + unsigned long flags; while (old != iq->octeon_read_index) { reqtype = iq->request_list[old].reqtype; @@ -432,17 +391,22 @@ lio_process_iq_request_list(struct octeon_device *oct, * command response list because we expect * a response from Octeon. */ - spin_lock_bh(&oct->response_list - [OCTEON_ORDERED_SC_LIST].lock); + spin_lock_irqsave + (&oct->response_list + [OCTEON_ORDERED_SC_LIST].lock, + flags); atomic_inc(&oct->response_list [OCTEON_ORDERED_SC_LIST]. pending_req_count); list_add_tail(&sc->node, &oct->response_list [OCTEON_ORDERED_SC_LIST].head); - spin_unlock_bh(&oct->response_list - [OCTEON_ORDERED_SC_LIST].lock); + spin_unlock_irqrestore + (&oct->response_list + [OCTEON_ORDERED_SC_LIST].lock, + flags); } else { if (sc->callback) { + /* This callback must not sleep */ sc->callback(oct, OCTEON_REQUEST_DONE, sc->callback_arg); } @@ -559,11 +523,12 @@ static void check_db_timeout(struct work_struct *work) { struct cavium_wk *wk = (struct cavium_wk *)work; struct octeon_device *oct = (struct octeon_device *)wk->ctxptr; - unsigned long iq_no = wk->ctxul; + u64 iq_no = wk->ctxul; struct cavium_wq *db_wq = &oct->check_db_wq[iq_no]; + u32 delay = 10; __check_db_timeout(oct, iq_no); - queue_delayed_work(db_wq->wq, &db_wq->wk.work, msecs_to_jiffies(1)); + queue_delayed_work(db_wq->wq, &db_wq->wk.work, msecs_to_jiffies(delay)); } int @@ -579,7 +544,7 @@ octeon_send_command(struct octeon_device *oct, u32 iq_no, */ spin_lock_bh(&iq->post_lock); - st = __post_command2(oct, iq, force_db, cmd); + st = __post_command2(iq, cmd); if (st.status != IQ_SEND_FAILED) { octeon_report_sent_bytes_to_bql(buf, reqtype); @@ -587,7 +552,7 @@ octeon_send_command(struct octeon_device *oct, u32 iq_no, INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, bytes_sent, datasize); INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_posted, 1); - if (iq->fill_cnt >= iq->fill_threshold || force_db) + if (force_db) ring_doorbell(oct, iq); } else { INCR_INSTRQUEUE_PKT_COUNT(oct, iq_no, instr_dropped, 1); @@ -618,8 +583,8 @@ octeon_prepare_soft_command(struct octeon_device *oct, struct octeon_instr_irh *irh; struct octeon_instr_rdp *rdp; - BUG_ON(opcode > 15); - BUG_ON(subcode > 127); + WARN_ON(opcode > 15); + WARN_ON(subcode > 127); oct_cfg = octeon_get_conf(oct); @@ -661,7 +626,6 @@ int octeon_send_soft_command(struct octeon_device *oct, { struct octeon_instr_ih2 *ih2; struct octeon_instr_irh *irh; - struct octeon_instr_rdp *rdp; u32 len; ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2; @@ -671,12 +635,10 @@ int octeon_send_soft_command(struct octeon_device *oct, } irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh; if (irh->rflag) { - BUG_ON(!sc->dmarptr); - BUG_ON(!sc->status_word); + WARN_ON(!sc->dmarptr); + WARN_ON(!sc->status_word); *sc->status_word = COMPLETION_WORD_INIT; - rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp; - sc->cmd.cmd2.rptr = sc->dmarptr; } len = (u32)ih2->dlengsz; @@ -720,7 +682,7 @@ int octeon_free_sc_buffer_pool(struct octeon_device *oct) struct list_head *tmp, *tmp2; struct octeon_soft_command *sc; - spin_lock(&oct->sc_buf_pool.lock); + spin_lock_bh(&oct->sc_buf_pool.lock); list_for_each_safe(tmp, tmp2, &oct->sc_buf_pool.head) { list_del(tmp); @@ -732,7 +694,7 @@ int octeon_free_sc_buffer_pool(struct octeon_device *oct) INIT_LIST_HEAD(&oct->sc_buf_pool.head); - spin_unlock(&oct->sc_buf_pool.lock); + spin_unlock_bh(&oct->sc_buf_pool.lock); return 0; } @@ -748,13 +710,13 @@ struct octeon_soft_command *octeon_alloc_soft_command(struct octeon_device *oct, struct octeon_soft_command *sc = NULL; struct list_head *tmp; - BUG_ON((offset + datasize + rdatasize + ctxsize) > + WARN_ON((offset + datasize + rdatasize + ctxsize) > SOFT_COMMAND_BUFFER_SIZE); - spin_lock(&oct->sc_buf_pool.lock); + spin_lock_bh(&oct->sc_buf_pool.lock); if (list_empty(&oct->sc_buf_pool.head)) { - spin_unlock(&oct->sc_buf_pool.lock); + spin_unlock_bh(&oct->sc_buf_pool.lock); return NULL; } @@ -765,7 +727,7 @@ struct octeon_soft_command *octeon_alloc_soft_command(struct octeon_device *oct, atomic_inc(&oct->sc_buf_pool.alloc_buf_count); - spin_unlock(&oct->sc_buf_pool.lock); + spin_unlock_bh(&oct->sc_buf_pool.lock); sc = (struct octeon_soft_command *)tmp; @@ -795,7 +757,7 @@ struct octeon_soft_command *octeon_alloc_soft_command(struct octeon_device *oct, offset = (offset + datasize + 127) & 0xffffff80; if (rdatasize) { - BUG_ON(rdatasize < 16); + WARN_ON(rdatasize < 16); sc->virtrptr = (u8 *)sc + offset; sc->dmarptr = dma_addr + offset; sc->rdatasize = rdatasize; @@ -808,11 +770,11 @@ struct octeon_soft_command *octeon_alloc_soft_command(struct octeon_device *oct, void octeon_free_soft_command(struct octeon_device *oct, struct octeon_soft_command *sc) { - spin_lock(&oct->sc_buf_pool.lock); + spin_lock_bh(&oct->sc_buf_pool.lock); list_add_tail(&sc->node, &oct->sc_buf_pool.head); atomic_dec(&oct->sc_buf_pool.alloc_buf_count); - spin_unlock(&oct->sc_buf_pool.lock); + spin_unlock_bh(&oct->sc_buf_pool.lock); } diff --git a/drivers/net/ethernet/cavium/liquidio/response_manager.c b/drivers/net/ethernet/cavium/liquidio/response_manager.c index c93210f99dda..709049e36627 100644 --- a/drivers/net/ethernet/cavium/liquidio/response_manager.c +++ b/drivers/net/ethernet/cavium/liquidio/response_manager.c @@ -19,28 +19,14 @@ * This file may also be available under a different license from Cavium. * Contact Cavium, Inc. for more information **********************************************************************/ -#include <linux/version.h> -#include <linux/types.h> -#include <linux/list.h> -#include <linux/interrupt.h> -#include <linux/dma-mapping.h> #include <linux/pci.h> -#include <linux/kthread.h> #include <linux/netdevice.h> -#include "octeon_config.h" #include "liquidio_common.h" #include "octeon_droq.h" #include "octeon_iq.h" #include "response_manager.h" #include "octeon_device.h" -#include "octeon_nic.h" #include "octeon_main.h" -#include "octeon_network.h" -#include "cn66xx_regs.h" -#include "cn66xx_device.h" -#include "cn68xx_regs.h" -#include "cn68xx_device.h" -#include "liquidio_image.h" static void oct_poll_req_completion(struct work_struct *work); @@ -66,7 +52,7 @@ int octeon_setup_response_list(struct octeon_device *oct) INIT_DELAYED_WORK(&cwq->wk.work, oct_poll_req_completion); cwq->wk.ctxptr = oct; oct->cmd_resp_state = OCT_DRV_ONLINE; - queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(100)); + queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(50)); return ret; } @@ -176,6 +162,5 @@ static void oct_poll_req_completion(struct work_struct *work) struct cavium_wq *cwq = &oct->dma_comp_wq; lio_process_ordered_list(oct, 0); - - queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(100)); + queue_delayed_work(cwq->wq, &cwq->wk.work, msecs_to_jiffies(50)); } diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c index 388cd799d9ed..e8bc15bcde70 100644 --- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c +++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c @@ -146,7 +146,6 @@ struct octeon_mgmt { struct device *dev; struct napi_struct napi; struct tasklet_struct tx_clean_tasklet; - struct phy_device *phydev; struct device_node *phy_np; resource_size_t mix_phys; resource_size_t mix_size; @@ -787,14 +786,12 @@ static int octeon_mgmt_ioctl_hwtstamp(struct net_device *netdev, static int octeon_mgmt_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd) { - struct octeon_mgmt *p = netdev_priv(netdev); - switch (cmd) { case SIOCSHWTSTAMP: return octeon_mgmt_ioctl_hwtstamp(netdev, rq, cmd); default: - if (p->phydev) - return phy_mii_ioctl(p->phydev, rq, cmd); + if (netdev->phydev) + return phy_mii_ioctl(netdev->phydev, rq, cmd); return -EINVAL; } } @@ -836,16 +833,18 @@ static void octeon_mgmt_enable_link(struct octeon_mgmt *p) static void octeon_mgmt_update_link(struct octeon_mgmt *p) { + struct net_device *ndev = p->netdev; + struct phy_device *phydev = ndev->phydev; union cvmx_agl_gmx_prtx_cfg prtx_cfg; prtx_cfg.u64 = cvmx_read_csr(p->agl + AGL_GMX_PRT_CFG); - if (!p->phydev->link) + if (!phydev->link) prtx_cfg.s.duplex = 1; else - prtx_cfg.s.duplex = p->phydev->duplex; + prtx_cfg.s.duplex = phydev->duplex; - switch (p->phydev->speed) { + switch (phydev->speed) { case 10: prtx_cfg.s.speed = 0; prtx_cfg.s.slottime = 0; @@ -871,7 +870,7 @@ static void octeon_mgmt_update_link(struct octeon_mgmt *p) prtx_cfg.s.speed_msb = 0; /* Only matters for half-duplex */ prtx_cfg.s.slottime = 1; - prtx_cfg.s.burst = p->phydev->duplex; + prtx_cfg.s.burst = phydev->duplex; } break; case 0: /* No link */ @@ -894,9 +893,9 @@ static void octeon_mgmt_update_link(struct octeon_mgmt *p) /* MII (both speeds) and RGMII 1000 speed. */ agl_clk.s.clk_cnt = 1; if (prtx_ctl.s.mode == 0) { /* RGMII mode */ - if (p->phydev->speed == 10) + if (phydev->speed == 10) agl_clk.s.clk_cnt = 50; - else if (p->phydev->speed == 100) + else if (phydev->speed == 100) agl_clk.s.clk_cnt = 5; } cvmx_write_csr(p->agl + AGL_GMX_TX_CLK, agl_clk.u64); @@ -906,39 +905,40 @@ static void octeon_mgmt_update_link(struct octeon_mgmt *p) static void octeon_mgmt_adjust_link(struct net_device *netdev) { struct octeon_mgmt *p = netdev_priv(netdev); + struct phy_device *phydev = netdev->phydev; unsigned long flags; int link_changed = 0; - if (!p->phydev) + if (!phydev) return; spin_lock_irqsave(&p->lock, flags); - if (!p->phydev->link && p->last_link) + if (!phydev->link && p->last_link) link_changed = -1; - if (p->phydev->link - && (p->last_duplex != p->phydev->duplex - || p->last_link != p->phydev->link - || p->last_speed != p->phydev->speed)) { + if (phydev->link && + (p->last_duplex != phydev->duplex || + p->last_link != phydev->link || + p->last_speed != phydev->speed)) { octeon_mgmt_disable_link(p); link_changed = 1; octeon_mgmt_update_link(p); octeon_mgmt_enable_link(p); } - p->last_link = p->phydev->link; - p->last_speed = p->phydev->speed; - p->last_duplex = p->phydev->duplex; + p->last_link = phydev->link; + p->last_speed = phydev->speed; + p->last_duplex = phydev->duplex; spin_unlock_irqrestore(&p->lock, flags); if (link_changed != 0) { if (link_changed > 0) { pr_info("%s: Link is up - %d/%s\n", netdev->name, - p->phydev->speed, - DUPLEX_FULL == p->phydev->duplex ? + phydev->speed, + phydev->duplex == DUPLEX_FULL ? "Full" : "Half"); } else { pr_info("%s: Link is down\n", netdev->name); @@ -949,6 +949,7 @@ static void octeon_mgmt_adjust_link(struct net_device *netdev) static int octeon_mgmt_init_phy(struct net_device *netdev) { struct octeon_mgmt *p = netdev_priv(netdev); + struct phy_device *phydev = NULL; if (octeon_is_simulation() || p->phy_np == NULL) { /* No PHYs in the simulator. */ @@ -956,11 +957,11 @@ static int octeon_mgmt_init_phy(struct net_device *netdev) return 0; } - p->phydev = of_phy_connect(netdev, p->phy_np, - octeon_mgmt_adjust_link, 0, - PHY_INTERFACE_MODE_MII); + phydev = of_phy_connect(netdev, p->phy_np, + octeon_mgmt_adjust_link, 0, + PHY_INTERFACE_MODE_MII); - if (!p->phydev) + if (!phydev) return -ENODEV; return 0; @@ -1080,9 +1081,9 @@ static int octeon_mgmt_open(struct net_device *netdev) } /* Set the mode of the interface, RGMII/MII. */ - if (OCTEON_IS_MODEL(OCTEON_CN6XXX) && p->phydev) { + if (OCTEON_IS_MODEL(OCTEON_CN6XXX) && netdev->phydev) { union cvmx_agl_prtx_ctl agl_prtx_ctl; - int rgmii_mode = (p->phydev->supported & + int rgmii_mode = (netdev->phydev->supported & (SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full)) != 0; agl_prtx_ctl.u64 = cvmx_read_csr(p->agl_prt_ctl); @@ -1205,7 +1206,7 @@ static int octeon_mgmt_open(struct net_device *netdev) /* Configure the port duplex, speed and enables */ octeon_mgmt_disable_link(p); - if (p->phydev) + if (netdev->phydev) octeon_mgmt_update_link(p); octeon_mgmt_enable_link(p); @@ -1214,9 +1215,9 @@ static int octeon_mgmt_open(struct net_device *netdev) /* PHY is not present in simulator. The carrier is enabled * while initializing the phy for simulator, leave it enabled. */ - if (p->phydev) { + if (netdev->phydev) { netif_carrier_off(netdev); - phy_start_aneg(p->phydev); + phy_start_aneg(netdev->phydev); } netif_wake_queue(netdev); @@ -1244,9 +1245,8 @@ static int octeon_mgmt_stop(struct net_device *netdev) napi_disable(&p->napi); netif_stop_queue(netdev); - if (p->phydev) - phy_disconnect(p->phydev); - p->phydev = NULL; + if (netdev->phydev) + phy_disconnect(netdev->phydev); netif_carrier_off(netdev); @@ -1346,50 +1346,23 @@ static void octeon_mgmt_get_drvinfo(struct net_device *netdev, strlcpy(info->bus_info, "N/A", sizeof(info->bus_info)); } -static int octeon_mgmt_get_settings(struct net_device *netdev, - struct ethtool_cmd *cmd) -{ - struct octeon_mgmt *p = netdev_priv(netdev); - - if (p->phydev) - return phy_ethtool_gset(p->phydev, cmd); - - return -EOPNOTSUPP; -} - -static int octeon_mgmt_set_settings(struct net_device *netdev, - struct ethtool_cmd *cmd) -{ - struct octeon_mgmt *p = netdev_priv(netdev); - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if (p->phydev) - return phy_ethtool_sset(p->phydev, cmd); - - return -EOPNOTSUPP; -} - static int octeon_mgmt_nway_reset(struct net_device *dev) { - struct octeon_mgmt *p = netdev_priv(dev); - if (!capable(CAP_NET_ADMIN)) return -EPERM; - if (p->phydev) - return phy_start_aneg(p->phydev); + if (dev->phydev) + return phy_start_aneg(dev->phydev); return -EOPNOTSUPP; } static const struct ethtool_ops octeon_mgmt_ethtool_ops = { .get_drvinfo = octeon_mgmt_get_drvinfo, - .get_settings = octeon_mgmt_get_settings, - .set_settings = octeon_mgmt_set_settings, .nway_reset = octeon_mgmt_nway_reset, .get_link = ethtool_op_get_link, + .get_link_ksettings = phy_ethtool_get_link_ksettings, + .set_link_ksettings = phy_ethtool_set_link_ksettings, }; static const struct net_device_ops octeon_mgmt_ops = { diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h index c4b262ca7d43..2accab386323 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h @@ -36,8 +36,8 @@ #define __T4FW_VERSION_H__ #define T4FW_VERSION_MAJOR 0x01 -#define T4FW_VERSION_MINOR 0x0E -#define T4FW_VERSION_MICRO 0x04 +#define T4FW_VERSION_MINOR 0x0F +#define T4FW_VERSION_MICRO 0x25 #define T4FW_VERSION_BUILD 0x00 #define T4FW_MIN_VERSION_MAJOR 0x01 @@ -45,8 +45,8 @@ #define T4FW_MIN_VERSION_MICRO 0x00 #define T5FW_VERSION_MAJOR 0x01 -#define T5FW_VERSION_MINOR 0x0E -#define T5FW_VERSION_MICRO 0x04 +#define T5FW_VERSION_MINOR 0x0F +#define T5FW_VERSION_MICRO 0x25 #define T5FW_VERSION_BUILD 0x00 #define T5FW_MIN_VERSION_MAJOR 0x00 @@ -54,8 +54,8 @@ #define T5FW_MIN_VERSION_MICRO 0x00 #define T6FW_VERSION_MAJOR 0x01 -#define T6FW_VERSION_MINOR 0x0E -#define T6FW_VERSION_MICRO 0x04 +#define T6FW_VERSION_MINOR 0x0F +#define T6FW_VERSION_MICRO 0x25 #define T6FW_VERSION_BUILD 0x00 #define T6FW_MIN_VERSION_MAJOR 0x00 diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 1873c74638cd..1f16e73f6d0c 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -3251,8 +3251,9 @@ static void be_msix_disable(struct be_adapter *adapter) static int be_msix_enable(struct be_adapter *adapter) { - unsigned int i, num_vec, max_roce_eqs; + unsigned int i, max_roce_eqs; struct device *dev = &adapter->pdev->dev; + int num_vec; /* If RoCE is supported, program the max number of vectors that * could be used for NIC and RoCE, else, just program the number diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h index 3869322690ac..e093cbf26c8c 100644 --- a/drivers/net/ethernet/hisilicon/hns/hnae.h +++ b/drivers/net/ethernet/hisilicon/hns/hnae.h @@ -363,6 +363,14 @@ enum hnae_port_type { HNAE_PORT_DEBUG }; +/* mac media type */ +enum hnae_media_type { + HNAE_MEDIA_TYPE_UNKNOWN = 0, + HNAE_MEDIA_TYPE_FIBER, + HNAE_MEDIA_TYPE_COPPER, + HNAE_MEDIA_TYPE_BACKPLANE, +}; + /* This struct defines the operation on the handle. * * get_handle(): (mandatory) @@ -525,6 +533,7 @@ struct hnae_handle { u32 eport_id; u32 dport_id; /* v2 tx bd should fill the dport_id */ enum hnae_port_type port_type; + enum hnae_media_type media_type; struct list_head node; /* list to hnae_ae_dev->handle_list */ struct hnae_buf_ops *bops; /* operation for the buffer */ struct hnae_queue **qs; /* array base of all queues */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c index 835521bf1bbc..e28d960997af 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c @@ -134,6 +134,7 @@ struct hnae_handle *hns_ae_get_handle(struct hnae_ae_dev *dev, ae_handle->phy_dev = vf_cb->mac_cb->phy_dev; ae_handle->if_support = vf_cb->mac_cb->if_support; ae_handle->port_type = vf_cb->mac_cb->mac_type; + ae_handle->media_type = vf_cb->mac_cb->media_type; ae_handle->dport_id = port_id; return ae_handle; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index c526558e6367..3fb87e233c49 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -56,20 +56,6 @@ static const enum mac_mode g_mac_mode_1000[] = { [PHY_INTERFACE_MODE_RTBI] = MAC_MODE_RTBI_1000 }; -static enum mac_mode hns_mac_dev_to_enet_if(const struct hns_mac_cb *mac_cb) -{ - switch (mac_cb->max_speed) { - case MAC_SPEED_100: - return g_mac_mode_100[mac_cb->phy_if]; - case MAC_SPEED_1000: - return g_mac_mode_1000[mac_cb->phy_if]; - case MAC_SPEED_10000: - return MAC_MODE_XGMII_10000; - default: - return MAC_MODE_MII_100; - } -} - static enum mac_mode hns_get_enet_interface(const struct hns_mac_cb *mac_cb) { switch (mac_cb->max_speed) { @@ -134,7 +120,6 @@ void hns_mac_adjust_link(struct hns_mac_cb *mac_cb, int speed, int duplex) mac_cb->speed = speed; mac_cb->half_duplex = !duplex; - mac_ctrl_drv->mac_mode = hns_mac_dev_to_enet_if(mac_cb); if (mac_ctrl_drv->adjust_link) { ret = mac_ctrl_drv->adjust_link(mac_ctrl_drv, @@ -748,6 +733,18 @@ static void hns_mac_register_phy(struct hns_mac_cb *mac_cb) mac_cb->mac_id, addr); } +#define MAC_MEDIA_TYPE_MAX_LEN 16 + +static const struct { + enum hnae_media_type value; + const char *name; +} media_type_defs[] = { + {HNAE_MEDIA_TYPE_UNKNOWN, "unknown" }, + {HNAE_MEDIA_TYPE_FIBER, "fiber" }, + {HNAE_MEDIA_TYPE_COPPER, "copper" }, + {HNAE_MEDIA_TYPE_BACKPLANE, "backplane" }, +}; + /** *hns_mac_get_info - get mac information from device node *@mac_cb: mac device @@ -759,10 +756,13 @@ static int hns_mac_get_info(struct hns_mac_cb *mac_cb) struct device_node *np; struct regmap *syscon; struct of_phandle_args cpld_args; + const char *media_type; + u32 i; u32 ret; mac_cb->link = false; mac_cb->half_duplex = false; + mac_cb->media_type = HNAE_MEDIA_TYPE_UNKNOWN; mac_cb->speed = mac_phy_to_speed[mac_cb->phy_if]; mac_cb->max_speed = mac_cb->speed; @@ -864,6 +864,17 @@ static int hns_mac_get_info(struct hns_mac_cb *mac_cb) mac_cb->mac_id); } + if (!fwnode_property_read_string(mac_cb->fw_port, "media-type", + &media_type)) { + for (i = 0; i < ARRAY_SIZE(media_type_defs); i++) { + if (!strncmp(media_type_defs[i].name, media_type, + MAC_MEDIA_TYPE_MAX_LEN)) { + mac_cb->media_type = media_type_defs[i].value; + break; + } + } + } + return 0; } diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h index 05a6e8f7a419..4cbdf14f5c16 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h @@ -335,6 +335,7 @@ struct hns_mac_cb { u64 txpkt_for_led; u64 rxpkt_for_led; enum hnae_port_type mac_type; + enum hnae_media_type media_type; phy_interface_t phy_if; enum hnae_loop loop_mode; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index 67e8e1323205..2ef4277d00b3 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -114,9 +114,9 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev) dsaf_dev->sc_base = devm_ioremap_resource(&pdev->dev, res); - if (!dsaf_dev->sc_base) { + if (IS_ERR(dsaf_dev->sc_base)) { dev_err(dsaf_dev->dev, "subctrl can not map!\n"); - return -ENOMEM; + return PTR_ERR(dsaf_dev->sc_base); } res = platform_get_resource(pdev, IORESOURCE_MEM, @@ -128,9 +128,9 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev) dsaf_dev->sds_base = devm_ioremap_resource(&pdev->dev, res); - if (!dsaf_dev->sds_base) { + if (IS_ERR(dsaf_dev->sds_base)) { dev_err(dsaf_dev->dev, "serdes-ctrl can not map!\n"); - return -ENOMEM; + return PTR_ERR(dsaf_dev->sds_base); } } else { dsaf_dev->sub_ctrl = syscon; @@ -146,9 +146,9 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev) } } dsaf_dev->ppe_base = devm_ioremap_resource(&pdev->dev, res); - if (!dsaf_dev->ppe_base) { + if (IS_ERR(dsaf_dev->ppe_base)) { dev_err(dsaf_dev->dev, "ppe-base resource can not map!\n"); - return -ENOMEM; + return PTR_ERR(dsaf_dev->ppe_base); } dsaf_dev->ppe_paddr = res->start; @@ -165,9 +165,9 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev) } } dsaf_dev->io_base = devm_ioremap_resource(&pdev->dev, res); - if (!dsaf_dev->io_base) { + if (IS_ERR(dsaf_dev->io_base)) { dev_err(dsaf_dev->dev, "dsaf-base resource can not map!\n"); - return -ENOMEM; + return PTR_ERR(dsaf_dev->io_base); } } @@ -2540,45 +2540,45 @@ static char *hns_dsaf_get_node_stats_strings(char *data, int node, bool is_ver1 = AE_IS_VER1(dsaf_dev->dsaf_ver); snprintf(buff, ETH_GSTRING_LEN, "innod%d_pad_drop_pkts", node); - buff = buff + ETH_GSTRING_LEN; + buff += ETH_GSTRING_LEN; snprintf(buff, ETH_GSTRING_LEN, "innod%d_manage_pkts", node); - buff = buff + ETH_GSTRING_LEN; + buff += ETH_GSTRING_LEN; snprintf(buff, ETH_GSTRING_LEN, "innod%d_rx_pkts", node); - buff = buff + ETH_GSTRING_LEN; + buff += ETH_GSTRING_LEN; snprintf(buff, ETH_GSTRING_LEN, "innod%d_rx_pkt_id", node); - buff = buff + ETH_GSTRING_LEN; + buff += ETH_GSTRING_LEN; snprintf(buff, ETH_GSTRING_LEN, "innod%d_rx_pause_frame", node); - buff = buff + ETH_GSTRING_LEN; + buff += ETH_GSTRING_LEN; snprintf(buff, ETH_GSTRING_LEN, "innod%d_release_buf_num", node); - buff = buff + ETH_GSTRING_LEN; + buff += ETH_GSTRING_LEN; snprintf(buff, ETH_GSTRING_LEN, "innod%d_sbm_drop_pkts", node); - buff = buff + ETH_GSTRING_LEN; + buff += ETH_GSTRING_LEN; snprintf(buff, ETH_GSTRING_LEN, "innod%d_crc_false_pkts", node); - buff = buff + ETH_GSTRING_LEN; + buff += ETH_GSTRING_LEN; snprintf(buff, ETH_GSTRING_LEN, "innod%d_bp_drop_pkts", node); - buff = buff + ETH_GSTRING_LEN; + buff += ETH_GSTRING_LEN; snprintf(buff, ETH_GSTRING_LEN, "innod%d_lookup_rslt_drop_pkts", node); - buff = buff + ETH_GSTRING_LEN; + buff += ETH_GSTRING_LEN; snprintf(buff, ETH_GSTRING_LEN, "innod%d_local_rslt_fail_pkts", node); - buff = buff + ETH_GSTRING_LEN; + buff += ETH_GSTRING_LEN; snprintf(buff, ETH_GSTRING_LEN, "innod%d_vlan_drop_pkts", node); - buff = buff + ETH_GSTRING_LEN; + buff += ETH_GSTRING_LEN; snprintf(buff, ETH_GSTRING_LEN, "innod%d_stp_drop_pkts", node); - buff = buff + ETH_GSTRING_LEN; - if ((node < DSAF_SERVICE_NW_NUM) && (!is_ver1)) { - for (i = 0; i < DSAF_PRIO_NR; i++) { - snprintf(buff, ETH_GSTRING_LEN, - "inod%d_pfc_prio%d_pkts", node, i); - buff = buff + ETH_GSTRING_LEN; - } + buff += ETH_GSTRING_LEN; + if (node < DSAF_SERVICE_NW_NUM && !is_ver1) { for (i = 0; i < DSAF_PRIO_NR; i++) { - snprintf(buff, ETH_GSTRING_LEN, - "onod%d_pfc_prio%d_pkts", node, i); - buff = buff + ETH_GSTRING_LEN; + snprintf(buff + 0 * ETH_GSTRING_LEN * DSAF_PRIO_NR, + ETH_GSTRING_LEN, "inod%d_pfc_prio%d_pkts", + node, i); + snprintf(buff + 1 * ETH_GSTRING_LEN * DSAF_PRIO_NR, + ETH_GSTRING_LEN, "onod%d_pfc_prio%d_pkts", + node, i); + buff += ETH_GSTRING_LEN; } + buff += 1 * DSAF_PRIO_NR * ETH_GSTRING_LEN; } snprintf(buff, ETH_GSTRING_LEN, "onnod%d_tx_pkts", node); - buff = buff + ETH_GSTRING_LEN; + buff += ETH_GSTRING_LEN; return buff; } @@ -2604,10 +2604,10 @@ static u64 *hns_dsaf_get_node_stats(struct dsaf_device *ddev, u64 *data, p[10] = hw_stats->local_addr_false; p[11] = hw_stats->vlan_drop; p[12] = hw_stats->stp_drop; - if ((node_num < DSAF_SERVICE_NW_NUM) && (!is_ver1)) { + if (node_num < DSAF_SERVICE_NW_NUM && !is_ver1) { for (i = 0; i < DSAF_PRIO_NR; i++) { - p[13 + i] = hw_stats->rx_pfc[i]; - p[13 + i + DSAF_PRIO_NR] = hw_stats->tx_pfc[i]; + p[13 + i + 0 * DSAF_PRIO_NR] = hw_stats->rx_pfc[i]; + p[13 + i + 1 * DSAF_PRIO_NR] = hw_stats->tx_pfc[i]; } p[29] = hw_stats->tx_pkts; return &p[30]; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c index 8473287d4c8b..611b67b6f450 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c @@ -253,10 +253,9 @@ static void hns_dsaf_ge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, reg_val_1 = 0x1 << port; port_rst_off = dsaf_dev->mac_cb[port]->port_rst_off; /* there is difference between V1 and V2 in register.*/ - if (AE_IS_VER1(dsaf_dev->dsaf_ver)) - reg_val_2 = 0x1041041 << port_rst_off; - else - reg_val_2 = 0x2082082 << port_rst_off; + reg_val_2 = AE_IS_VER1(dsaf_dev->dsaf_ver) ? + 0x1041041 : 0x2082082; + reg_val_2 <<= port_rst_off; if (!dereset) { dsaf_write_sub(dsaf_dev, DSAF_SUB_SC_GE_RESET_REQ1_REG, @@ -272,12 +271,11 @@ static void hns_dsaf_ge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, reg_val_1); } } else { - reg_val_1 = 0x15540 << dsaf_dev->reset_offset; + reg_val_1 = 0x15540; + reg_val_2 = AE_IS_VER1(dsaf_dev->dsaf_ver) ? 0x100 : 0x40; - if (AE_IS_VER1(dsaf_dev->dsaf_ver)) - reg_val_2 = 0x100 << dsaf_dev->reset_offset; - else - reg_val_2 = 0x40 << dsaf_dev->reset_offset; + reg_val_1 <<= dsaf_dev->reset_offset; + reg_val_2 <<= dsaf_dev->reset_offset; if (!dereset) { dsaf_write_sub(dsaf_dev, DSAF_SUB_SC_GE_RESET_REQ1_REG, diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index d5297ecfe4a5..d7e1f8c7ae92 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -762,13 +762,13 @@ static int hns_nic_rx_poll_one(struct hns_nic_ring_data *ring_data, recv_pkts = 0, recv_bds = 0, clean_count = 0; recv: while (recv_pkts < budget && recv_bds < num) { - /* reuse or realloc buffers*/ + /* reuse or realloc buffers */ if (clean_count >= RCB_NOF_ALLOC_RX_BUFF_ONCE) { hns_nic_alloc_rx_buffers(ring_data, clean_count); clean_count = 0; } - /* poll one pkt*/ + /* poll one pkt */ err = hns_nic_poll_rx_skb(ring_data, &skb, &bnum); if (unlikely(!skb)) /* this fault cannot be repaired */ goto out; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index a395ca1405c3..ab33487a5321 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -165,13 +165,21 @@ static int hns_nic_get_settings(struct net_device *net_dev, cmd->advertising |= ADVERTISED_10000baseKR_Full; } - if (h->port_type == HNAE_PORT_SERVICE) { + switch (h->media_type) { + case HNAE_MEDIA_TYPE_FIBER: cmd->port = PORT_FIBRE; - cmd->supported |= SUPPORTED_Pause; - } else { + break; + case HNAE_MEDIA_TYPE_COPPER: cmd->port = PORT_TP; + break; + case HNAE_MEDIA_TYPE_UNKNOWN: + default: + break; } + if (!(AE_IS_VER1(priv->enet_ver) && h->port_type == HNAE_PORT_DEBUG)) + cmd->supported |= SUPPORTED_Pause; + cmd->transceiver = XCVR_EXTERNAL; cmd->mdio_support = (ETH_MDIO_SUPPORTS_C45 | ETH_MDIO_SUPPORTS_C22); hns_get_mdix_mode(net_dev, cmd); diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c index 761a32fceceb..33f4c483af0f 100644 --- a/drivers/net/ethernet/hisilicon/hns_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns_mdio.c @@ -37,9 +37,19 @@ #define MDIO_TIMEOUT 1000000 +struct hns_mdio_sc_reg { + u16 mdio_clk_en; + u16 mdio_clk_dis; + u16 mdio_reset_req; + u16 mdio_reset_dreq; + u16 mdio_clk_st; + u16 mdio_reset_st; +}; + struct hns_mdio_device { void *vbase; /* mdio reg base address */ struct regmap *subctrl_vbase; + struct hns_mdio_sc_reg sc_reg; }; /* mdio reg */ @@ -93,7 +103,6 @@ enum mdio_c45_op_seq { #define MDIO_SC_CLK_DIS 0x33C #define MDIO_SC_RESET_REQ 0xA38 #define MDIO_SC_RESET_DREQ 0xA3C -#define MDIO_SC_CTRL 0x2010 #define MDIO_SC_CLK_ST 0x531C #define MDIO_SC_RESET_ST 0x5A1C @@ -353,6 +362,7 @@ static int hns_mdio_read(struct mii_bus *bus, int phy_id, int regnum) static int hns_mdio_reset(struct mii_bus *bus) { struct hns_mdio_device *mdio_dev = (struct hns_mdio_device *)bus->priv; + const struct hns_mdio_sc_reg *sc_reg; int ret; if (dev_of_node(bus->parent)) { @@ -361,9 +371,10 @@ static int hns_mdio_reset(struct mii_bus *bus) return -ENODEV; } + sc_reg = &mdio_dev->sc_reg; /* 1. reset req, and read reset st check */ - ret = mdio_sc_cfg_reg_write(mdio_dev, MDIO_SC_RESET_REQ, 0x1, - MDIO_SC_RESET_ST, 0x1, + ret = mdio_sc_cfg_reg_write(mdio_dev, sc_reg->mdio_reset_req, + 0x1, sc_reg->mdio_reset_st, 0x1, MDIO_CHECK_SET_ST); if (ret) { dev_err(&bus->dev, "MDIO reset fail\n"); @@ -371,8 +382,8 @@ static int hns_mdio_reset(struct mii_bus *bus) } /* 2. dis clk, and read clk st check */ - ret = mdio_sc_cfg_reg_write(mdio_dev, MDIO_SC_CLK_DIS, - 0x1, MDIO_SC_CLK_ST, 0x1, + ret = mdio_sc_cfg_reg_write(mdio_dev, sc_reg->mdio_clk_dis, + 0x1, sc_reg->mdio_clk_st, 0x1, MDIO_CHECK_CLR_ST); if (ret) { dev_err(&bus->dev, "MDIO dis clk fail\n"); @@ -380,8 +391,8 @@ static int hns_mdio_reset(struct mii_bus *bus) } /* 3. reset dreq, and read reset st check */ - ret = mdio_sc_cfg_reg_write(mdio_dev, MDIO_SC_RESET_DREQ, 0x1, - MDIO_SC_RESET_ST, 0x1, + ret = mdio_sc_cfg_reg_write(mdio_dev, sc_reg->mdio_reset_dreq, + 0x1, sc_reg->mdio_reset_st, 0x1, MDIO_CHECK_CLR_ST); if (ret) { dev_err(&bus->dev, "MDIO dis clk fail\n"); @@ -389,8 +400,8 @@ static int hns_mdio_reset(struct mii_bus *bus) } /* 4. en clk, and read clk st check */ - ret = mdio_sc_cfg_reg_write(mdio_dev, MDIO_SC_CLK_EN, - 0x1, MDIO_SC_CLK_ST, 0x1, + ret = mdio_sc_cfg_reg_write(mdio_dev, sc_reg->mdio_clk_en, + 0x1, sc_reg->mdio_clk_st, 0x1, MDIO_CHECK_SET_ST); if (ret) dev_err(&bus->dev, "MDIO en clk fail\n"); @@ -458,13 +469,54 @@ static int hns_mdio_probe(struct platform_device *pdev) snprintf(new_bus->id, MII_BUS_ID_SIZE, "%s-%s", "Mii", dev_name(&pdev->dev)); if (dev_of_node(&pdev->dev)) { - mdio_dev->subctrl_vbase = syscon_node_to_regmap( - of_parse_phandle(pdev->dev.of_node, - "subctrl-vbase", 0)); - if (IS_ERR(mdio_dev->subctrl_vbase)) { - dev_warn(&pdev->dev, "no syscon hisilicon,peri-c-subctrl\n"); + struct of_phandle_args reg_args; + + ret = of_parse_phandle_with_fixed_args(pdev->dev.of_node, + "subctrl-vbase", + 4, + 0, + ®_args); + if (!ret) { + mdio_dev->subctrl_vbase = + syscon_node_to_regmap(reg_args.np); + if (IS_ERR(mdio_dev->subctrl_vbase)) { + dev_warn(&pdev->dev, "syscon_node_to_regmap error\n"); + mdio_dev->subctrl_vbase = NULL; + } else { + if (reg_args.args_count == 4) { + mdio_dev->sc_reg.mdio_clk_en = + (u16)reg_args.args[0]; + mdio_dev->sc_reg.mdio_clk_dis = + (u16)reg_args.args[0] + 4; + mdio_dev->sc_reg.mdio_reset_req = + (u16)reg_args.args[1]; + mdio_dev->sc_reg.mdio_reset_dreq = + (u16)reg_args.args[1] + 4; + mdio_dev->sc_reg.mdio_clk_st = + (u16)reg_args.args[2]; + mdio_dev->sc_reg.mdio_reset_st = + (u16)reg_args.args[3]; + } else { + /* for compatible */ + mdio_dev->sc_reg.mdio_clk_en = + MDIO_SC_CLK_EN; + mdio_dev->sc_reg.mdio_clk_dis = + MDIO_SC_CLK_DIS; + mdio_dev->sc_reg.mdio_reset_req = + MDIO_SC_RESET_REQ; + mdio_dev->sc_reg.mdio_reset_dreq = + MDIO_SC_RESET_DREQ; + mdio_dev->sc_reg.mdio_clk_st = + MDIO_SC_CLK_ST; + mdio_dev->sc_reg.mdio_reset_st = + MDIO_SC_RESET_ST; + } + } + } else { + dev_warn(&pdev->dev, "find syscon ret = %#x\n", ret); mdio_dev->subctrl_vbase = NULL; } + ret = of_mdiobus_register(new_bus, pdev->dev.of_node); } else if (is_acpi_node(pdev->dev.fwnode)) { /* Clear all the IRQ properties */ diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 73f745205a1c..41f32c0b341e 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -154,16 +154,6 @@ void __ew32(struct e1000_hw *hw, unsigned long reg, u32 val) writel(val, hw->hw_addr + reg); } -static bool e1000e_vlan_used(struct e1000_adapter *adapter) -{ - u16 vid; - - for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) - return true; - - return false; -} - /** * e1000_regdump - register printout routine * @hw: pointer to the HW structure @@ -3453,8 +3443,7 @@ static void e1000e_set_rx_mode(struct net_device *netdev) ew32(RCTL, rctl); - if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX || - e1000e_vlan_used(adapter)) + if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) e1000e_vlan_strip_enable(adapter); else e1000e_vlan_strip_disable(adapter); @@ -4363,7 +4352,8 @@ static cycle_t e1000e_cyclecounter_read(const struct cyclecounter *cc) time_delta = systim_next - systim; temp = time_delta; - rem = do_div(temp, incvalue); + /* VMWare users have seen incvalue of zero, don't div / 0 */ + rem = incvalue ? do_div(temp, incvalue) : (time_delta != 0); systim = systim_next; @@ -6926,6 +6916,14 @@ static netdev_features_t e1000_fix_features(struct net_device *netdev, if ((hw->mac.type >= e1000_pch2lan) && (netdev->mtu > ETH_DATA_LEN)) features &= ~NETIF_F_RXFCS; + /* Since there is no support for separate Rx/Tx vlan accel + * enable/disable make sure Tx flag is always in same state as Rx. + */ + if (features & NETIF_F_HW_VLAN_CTAG_RX) + features |= NETIF_F_HW_VLAN_CTAG_TX; + else + features &= ~NETIF_F_HW_VLAN_CTAG_TX; + return features; } diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h index fcf106e545c5..e98b86bf0ca1 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h @@ -406,7 +406,7 @@ static inline u16 fm10k_desc_unused(struct fm10k_ring *ring) (&(((union fm10k_rx_desc *)((R)->desc))[i])) #define FM10K_MAX_TXD_PWR 14 -#define FM10K_MAX_DATA_PER_TXD BIT(FM10K_MAX_TXD_PWR) +#define FM10K_MAX_DATA_PER_TXD (1u << FM10K_MAX_TXD_PWR) /* Tx Descriptors needed, worst case */ #define TXD_USE_COUNT(S) DIV_ROUND_UP((S), FM10K_MAX_DATA_PER_TXD) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c index 9c0d87503977..9b5195435c87 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c @@ -983,9 +983,10 @@ void fm10k_write_reta(struct fm10k_intfc *interface, const u32 *indir) /* generate a new table if we weren't given one */ for (j = 0; j < 4; j++) { if (indir) - n = indir[i + j]; + n = indir[4 * i + j]; else - n = ethtool_rxfh_indir_default(i + j, rss_i); + n = ethtool_rxfh_indir_default(4 * i + j, + rss_i); table[j] = n; } diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 0e166e9c90c8..a9ccc1eb3ea4 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -56,7 +56,7 @@ static int __init fm10k_init_module(void) pr_info("%s\n", fm10k_copyright); /* create driver workqueue */ - fm10k_workqueue = create_workqueue("fm10k"); + fm10k_workqueue = alloc_workqueue("fm10k", WQ_MEM_RECLAIM, 0); fm10k_dbg_init(); @@ -77,7 +77,6 @@ static void __exit fm10k_exit_module(void) fm10k_dbg_exit(); /* destroy driver workqueue */ - flush_workqueue(fm10k_workqueue); destroy_workqueue(fm10k_workqueue); } module_exit(fm10k_exit_module); @@ -272,7 +271,7 @@ static bool fm10k_add_rx_frag(struct fm10k_rx_buffer *rx_buffer, #if (PAGE_SIZE < 8192) unsigned int truesize = FM10K_RX_BUFSZ; #else - unsigned int truesize = SKB_DATA_ALIGN(size); + unsigned int truesize = ALIGN(size, 512); #endif unsigned int pull_len; diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index b9609afa5ca3..5387b3a96489 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -445,6 +445,7 @@ struct igb_adapter { unsigned long ptp_tx_start; unsigned long last_rx_ptp_check; unsigned long last_rx_timestamp; + unsigned int ptp_flags; spinlock_t tmreg_lock; struct cyclecounter cc; struct timecounter tc; @@ -474,12 +475,15 @@ struct igb_adapter { u16 eee_advert; }; +/* flags controlling PTP/1588 function */ +#define IGB_PTP_ENABLED BIT(0) +#define IGB_PTP_OVERFLOW_CHECK BIT(1) + #define IGB_FLAG_HAS_MSI BIT(0) #define IGB_FLAG_DCA_ENABLED BIT(1) #define IGB_FLAG_QUAD_PORT_A BIT(2) #define IGB_FLAG_QUEUE_PAIRS BIT(3) #define IGB_FLAG_DMAC BIT(4) -#define IGB_FLAG_PTP BIT(5) #define IGB_FLAG_RSS_FIELD_IPV4_UDP BIT(6) #define IGB_FLAG_RSS_FIELD_IPV6_UDP BIT(7) #define IGB_FLAG_WOL_SUPPORTED BIT(8) @@ -546,6 +550,7 @@ void igb_set_fw_version(struct igb_adapter *); void igb_ptp_init(struct igb_adapter *adapter); void igb_ptp_stop(struct igb_adapter *adapter); void igb_ptp_reset(struct igb_adapter *adapter); +void igb_ptp_suspend(struct igb_adapter *adapter); void igb_ptp_rx_hang(struct igb_adapter *adapter); void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb); void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, unsigned char *va, diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index ef3d642f5ff2..9bcba42abb91 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2027,7 +2027,8 @@ void igb_reset(struct igb_adapter *adapter) wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE); /* Re-enable PTP, where applicable. */ - igb_ptp_reset(adapter); + if (adapter->ptp_flags & IGB_PTP_ENABLED) + igb_ptp_reset(adapter); igb_get_phy_info(hw); } @@ -6855,12 +6856,12 @@ static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer, **/ static bool igb_add_rx_frag(struct igb_ring *rx_ring, struct igb_rx_buffer *rx_buffer, + unsigned int size, union e1000_adv_rx_desc *rx_desc, struct sk_buff *skb) { struct page *page = rx_buffer->page; unsigned char *va = page_address(page) + rx_buffer->page_offset; - unsigned int size = le16_to_cpu(rx_desc->wb.upper.length); #if (PAGE_SIZE < 8192) unsigned int truesize = IGB_RX_BUFSZ; #else @@ -6912,6 +6913,7 @@ static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring, union e1000_adv_rx_desc *rx_desc, struct sk_buff *skb) { + unsigned int size = le16_to_cpu(rx_desc->wb.upper.length); struct igb_rx_buffer *rx_buffer; struct page *page; @@ -6947,11 +6949,11 @@ static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring, dma_sync_single_range_for_cpu(rx_ring->dev, rx_buffer->dma, rx_buffer->page_offset, - IGB_RX_BUFSZ, + size, DMA_FROM_DEVICE); /* pull page into skb */ - if (igb_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) { + if (igb_add_rx_frag(rx_ring, rx_buffer, size, rx_desc, skb)) { /* hand second half of page back to the ring */ igb_reuse_rx_page(rx_ring, rx_buffer); } else { @@ -7527,6 +7529,8 @@ static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, if (netif_running(netdev)) __igb_close(netdev, true); + igb_ptp_suspend(adapter); + igb_clear_interrupt_scheme(adapter); #ifdef CONFIG_PM diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index f097c5a8ab93..e61b647f5f2a 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -684,6 +684,7 @@ void igb_ptp_rx_hang(struct igb_adapter *adapter) u32 tsyncrxctl = rd32(E1000_TSYNCRXCTL); unsigned long rx_event; + /* Other hardware uses per-packet timestamps */ if (hw->mac.type != e1000_82576) return; @@ -1042,6 +1043,13 @@ int igb_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr) -EFAULT : 0; } +/** + * igb_ptp_init - Initialize PTP functionality + * @adapter: Board private structure + * + * This function is called at device probe to initialize the PTP + * functionality. + */ void igb_ptp_init(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; @@ -1064,8 +1072,7 @@ void igb_ptp_init(struct igb_adapter *adapter) adapter->cc.mask = CYCLECOUNTER_MASK(64); adapter->cc.mult = 1; adapter->cc.shift = IGB_82576_TSYNC_SHIFT; - /* Dial the nominal frequency. */ - wr32(E1000_TIMINCA, INCPERIOD_82576 | INCVALUE_82576); + adapter->ptp_flags |= IGB_PTP_OVERFLOW_CHECK; break; case e1000_82580: case e1000_i354: @@ -1084,8 +1091,7 @@ void igb_ptp_init(struct igb_adapter *adapter) adapter->cc.mask = CYCLECOUNTER_MASK(IGB_NBITS_82580); adapter->cc.mult = 1; adapter->cc.shift = 0; - /* Enable the timer functions by clearing bit 31. */ - wr32(E1000_TSAUXC, 0x0); + adapter->ptp_flags |= IGB_PTP_OVERFLOW_CHECK; break; case e1000_i210: case e1000_i211: @@ -1110,44 +1116,24 @@ void igb_ptp_init(struct igb_adapter *adapter) adapter->ptp_caps.settime64 = igb_ptp_settime_i210; adapter->ptp_caps.enable = igb_ptp_feature_enable_i210; adapter->ptp_caps.verify = igb_ptp_verify_pin; - /* Enable the timer functions by clearing bit 31. */ - wr32(E1000_TSAUXC, 0x0); break; default: adapter->ptp_clock = NULL; return; } - wrfl(); - spin_lock_init(&adapter->tmreg_lock); INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work); - /* Initialize the clock and overflow work for devices that need it. */ - if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) { - struct timespec64 ts = ktime_to_timespec64(ktime_get_real()); - - igb_ptp_settime_i210(&adapter->ptp_caps, &ts); - } else { - timecounter_init(&adapter->tc, &adapter->cc, - ktime_to_ns(ktime_get_real())); - + if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK) INIT_DELAYED_WORK(&adapter->ptp_overflow_work, igb_ptp_overflow_check); - schedule_delayed_work(&adapter->ptp_overflow_work, - IGB_SYSTIM_OVERFLOW_PERIOD); - } - - /* Initialize the time sync interrupts for devices that support it. */ - if (hw->mac.type >= e1000_82580) { - wr32(E1000_TSIM, TSYNC_INTERRUPTS); - wr32(E1000_IMS, E1000_IMS_TS); - } - adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF; + igb_ptp_reset(adapter); + adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps, &adapter->pdev->dev); if (IS_ERR(adapter->ptp_clock)) { @@ -1156,32 +1142,24 @@ void igb_ptp_init(struct igb_adapter *adapter) } else { dev_info(&adapter->pdev->dev, "added PHC on %s\n", adapter->netdev->name); - adapter->flags |= IGB_FLAG_PTP; + adapter->ptp_flags |= IGB_PTP_ENABLED; } } /** - * igb_ptp_stop - Disable PTP device and stop the overflow check. - * @adapter: Board private structure. + * igb_ptp_suspend - Disable PTP work items and prepare for suspend + * @adapter: Board private structure * - * This function stops the PTP support and cancels the delayed work. - **/ -void igb_ptp_stop(struct igb_adapter *adapter) + * This function stops the overflow check work and PTP Tx timestamp work, and + * will prepare the device for OS suspend. + */ +void igb_ptp_suspend(struct igb_adapter *adapter) { - switch (adapter->hw.mac.type) { - case e1000_82576: - case e1000_82580: - case e1000_i354: - case e1000_i350: - cancel_delayed_work_sync(&adapter->ptp_overflow_work); - break; - case e1000_i210: - case e1000_i211: - /* No delayed work to cancel. */ - break; - default: + if (!(adapter->ptp_flags & IGB_PTP_ENABLED)) return; - } + + if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK) + cancel_delayed_work_sync(&adapter->ptp_overflow_work); cancel_work_sync(&adapter->ptp_tx_work); if (adapter->ptp_tx_skb) { @@ -1189,12 +1167,23 @@ void igb_ptp_stop(struct igb_adapter *adapter) adapter->ptp_tx_skb = NULL; clear_bit_unlock(__IGB_PTP_TX_IN_PROGRESS, &adapter->state); } +} + +/** + * igb_ptp_stop - Disable PTP device and stop the overflow check. + * @adapter: Board private structure. + * + * This function stops the PTP support and cancels the delayed work. + **/ +void igb_ptp_stop(struct igb_adapter *adapter) +{ + igb_ptp_suspend(adapter); if (adapter->ptp_clock) { ptp_clock_unregister(adapter->ptp_clock); dev_info(&adapter->pdev->dev, "removed PHC on %s\n", adapter->netdev->name); - adapter->flags &= ~IGB_FLAG_PTP; + adapter->ptp_flags &= ~IGB_PTP_ENABLED; } } @@ -1209,9 +1198,6 @@ void igb_ptp_reset(struct igb_adapter *adapter) struct e1000_hw *hw = &adapter->hw; unsigned long flags; - if (!(adapter->flags & IGB_FLAG_PTP)) - return; - /* reset the tstamp_config */ igb_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config); @@ -1248,4 +1234,10 @@ void igb_ptp_reset(struct igb_adapter *adapter) } out: spin_unlock_irqrestore(&adapter->tmreg_lock, flags); + + wrfl(); + + if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK) + schedule_delayed_work(&adapter->ptp_overflow_work, + IGB_SYSTIM_OVERFLOW_PERIOD); } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 59b771b9b354..8a8450788124 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -2991,10 +2991,15 @@ static int ixgbe_get_ts_info(struct net_device *dev, { struct ixgbe_adapter *adapter = netdev_priv(dev); + /* we always support timestamping disabled */ + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE); + switch (adapter->hw.mac.type) { case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + info->rx_filters |= BIT(HWTSTAMP_FILTER_ALL); + /* fallthrough */ case ixgbe_mac_X540: case ixgbe_mac_82599EB: info->so_timestamping = @@ -3014,8 +3019,7 @@ static int ixgbe_get_ts_info(struct net_device *dev, BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON); - info->rx_filters = - BIT(HWTSTAMP_FILTER_NONE) | + info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V1_L4_SYNC) | BIT(HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) | BIT(HWTSTAMP_FILTER_PTP_V2_EVENT); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 468fa9ddfa06..fd5a761c68f3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8300,14 +8300,53 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc) static int ixgbe_delete_clsu32(struct ixgbe_adapter *adapter, struct tc_cls_u32_offload *cls) { + u32 hdl = cls->knode.handle; u32 uhtid = TC_U32_USERHTID(cls->knode.handle); - u32 loc; - int err; + u32 loc = cls->knode.handle & 0xfffff; + int err = 0, i, j; + struct ixgbe_jump_table *jump = NULL; + + if (loc > IXGBE_MAX_HW_ENTRIES) + return -EINVAL; if ((uhtid != 0x800) && (uhtid >= IXGBE_MAX_LINK_HANDLE)) return -EINVAL; - loc = cls->knode.handle & 0xfffff; + /* Clear this filter in the link data it is associated with */ + if (uhtid != 0x800) { + jump = adapter->jump_tables[uhtid]; + if (!jump) + return -EINVAL; + if (!test_bit(loc - 1, jump->child_loc_map)) + return -EINVAL; + clear_bit(loc - 1, jump->child_loc_map); + } + + /* Check if the filter being deleted is a link */ + for (i = 1; i < IXGBE_MAX_LINK_HANDLE; i++) { + jump = adapter->jump_tables[i]; + if (jump && jump->link_hdl == hdl) { + /* Delete filters in the hardware in the child hash + * table associated with this link + */ + for (j = 0; j < IXGBE_MAX_HW_ENTRIES; j++) { + if (!test_bit(j, jump->child_loc_map)) + continue; + spin_lock(&adapter->fdir_perfect_lock); + err = ixgbe_update_ethtool_fdir_entry(adapter, + NULL, + j + 1); + spin_unlock(&adapter->fdir_perfect_lock); + clear_bit(j, jump->child_loc_map); + } + /* Remove resources for this link */ + kfree(jump->input); + kfree(jump->mask); + kfree(jump); + adapter->jump_tables[i] = NULL; + return err; + } + } spin_lock(&adapter->fdir_perfect_lock); err = ixgbe_update_ethtool_fdir_entry(adapter, NULL, loc); @@ -8541,6 +8580,18 @@ static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter, if (!test_bit(link_uhtid - 1, &adapter->tables)) return err; + /* Multiple filters as links to the same hash table are not + * supported. To add a new filter with the same next header + * but different match/jump conditions, create a new hash table + * and link to it. + */ + if (adapter->jump_tables[link_uhtid] && + (adapter->jump_tables[link_uhtid])->link_hdl) { + e_err(drv, "Link filter exists for link: %x\n", + link_uhtid); + return err; + } + for (i = 0; nexthdr[i].jump; i++) { if (nexthdr[i].o != cls->knode.sel->offoff || nexthdr[i].s != cls->knode.sel->offshift || @@ -8562,6 +8613,8 @@ static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter, } jump->input = input; jump->mask = mask; + jump->link_hdl = cls->knode.handle; + err = ixgbe_clsu32_build_input(input, mask, cls, field_ptr, &nexthdr[i]); if (!err) { @@ -8589,6 +8642,20 @@ static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter, if ((adapter->jump_tables[uhtid])->mask) memcpy(mask, (adapter->jump_tables[uhtid])->mask, sizeof(*mask)); + + /* Lookup in all child hash tables if this location is already + * filled with a filter + */ + for (i = 1; i < IXGBE_MAX_LINK_HANDLE; i++) { + struct ixgbe_jump_table *link = adapter->jump_tables[i]; + + if (link && (test_bit(loc - 1, link->child_loc_map))) { + e_err(drv, "Filter exists in location: %x\n", + loc); + err = -EINVAL; + goto err_out; + } + } } err = ixgbe_clsu32_build_input(input, mask, cls, field_ptr, NULL); if (err) @@ -8620,6 +8687,9 @@ static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter, ixgbe_update_ethtool_fdir_entry(adapter, input, input->sw_idx); spin_unlock(&adapter->fdir_perfect_lock); + if ((uhtid != 0x800) && (adapter->jump_tables[uhtid])) + set_bit(loc - 1, (adapter->jump_tables[uhtid])->child_loc_map); + kfree(mask); return err; err_out_w_lock: diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h index a8bed3d887f7..538a1c5475b6 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_model.h @@ -42,8 +42,12 @@ struct ixgbe_jump_table { struct ixgbe_mat_field *mat; struct ixgbe_fdir_filter *input; union ixgbe_atr_input *mask; + u32 link_hdl; + unsigned long child_loc_map[32]; }; +#define IXGBE_MAX_HW_ENTRIES 2045 + static inline int ixgbe_mat_prgm_sip(struct ixgbe_fdir_filter *input, union ixgbe_atr_input *mask, u32 val, u32 m) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index c5caacdd193d..8618599dfd6f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -954,6 +954,7 @@ static int ixgbe_set_vf_macvlan_msg(struct ixgbe_adapter *adapter, struct ixgbe_hw *hw = &adapter->hw; hw->mac.ops.set_mac_anti_spoofing(hw, false, vf); + hw->mac.ops.set_vlan_anti_spoofing(hw, false, vf); } } diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.c b/drivers/net/ethernet/intel/ixgbevf/mbx.c index 61a80da8b6f0..2819abc454c7 100644 --- a/drivers/net/ethernet/intel/ixgbevf/mbx.c +++ b/drivers/net/ethernet/intel/ixgbevf/mbx.c @@ -85,7 +85,7 @@ static s32 ixgbevf_poll_for_ack(struct ixgbe_hw *hw) static s32 ixgbevf_read_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size) { struct ixgbe_mbx_info *mbx = &hw->mbx; - s32 ret_val = -IXGBE_ERR_MBX; + s32 ret_val = IXGBE_ERR_MBX; if (!mbx->ops.read) goto out; @@ -111,7 +111,7 @@ out: static s32 ixgbevf_write_posted_mbx(struct ixgbe_hw *hw, u32 *msg, u16 size) { struct ixgbe_mbx_info *mbx = &hw->mbx; - s32 ret_val = -IXGBE_ERR_MBX; + s32 ret_val = IXGBE_ERR_MBX; /* exit if either we can't write or there isn't a defined timeout */ if (!mbx->ops.write || !mbx->timeout) diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index 0d2f8e934c59..91e09d68b7e2 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -102,7 +102,6 @@ struct ltq_etop_priv { struct resource *res; struct mii_bus *mii_bus; - struct phy_device *phydev; struct ltq_etop_chan ch[MAX_DMA_CHAN]; int tx_free[MAX_DMA_CHAN >> 1]; @@ -305,34 +304,16 @@ ltq_etop_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) } static int -ltq_etop_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) -{ - struct ltq_etop_priv *priv = netdev_priv(dev); - - return phy_ethtool_gset(priv->phydev, cmd); -} - -static int -ltq_etop_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) -{ - struct ltq_etop_priv *priv = netdev_priv(dev); - - return phy_ethtool_sset(priv->phydev, cmd); -} - -static int ltq_etop_nway_reset(struct net_device *dev) { - struct ltq_etop_priv *priv = netdev_priv(dev); - - return phy_start_aneg(priv->phydev); + return phy_start_aneg(dev->phydev); } static const struct ethtool_ops ltq_etop_ethtool_ops = { .get_drvinfo = ltq_etop_get_drvinfo, - .get_settings = ltq_etop_get_settings, - .set_settings = ltq_etop_set_settings, .nway_reset = ltq_etop_nway_reset, + .get_link_ksettings = phy_ethtool_get_link_ksettings, + .set_link_ksettings = phy_ethtool_set_link_ksettings, }; static int @@ -401,7 +382,6 @@ ltq_etop_mdio_probe(struct net_device *dev) | SUPPORTED_TP); phydev->advertising = phydev->supported; - priv->phydev = phydev; phy_attached_info(phydev); return 0; @@ -450,7 +430,7 @@ ltq_etop_mdio_cleanup(struct net_device *dev) { struct ltq_etop_priv *priv = netdev_priv(dev); - phy_disconnect(priv->phydev); + phy_disconnect(dev->phydev); mdiobus_unregister(priv->mii_bus); mdiobus_free(priv->mii_bus); } @@ -469,7 +449,7 @@ ltq_etop_open(struct net_device *dev) ltq_dma_open(&ch->dma); napi_enable(&ch->napi); } - phy_start(priv->phydev); + phy_start(dev->phydev); netif_tx_start_all_queues(dev); return 0; } @@ -481,7 +461,7 @@ ltq_etop_stop(struct net_device *dev) int i; netif_tx_stop_all_queues(dev); - phy_stop(priv->phydev); + phy_stop(dev->phydev); for (i = 0; i < MAX_DMA_CHAN; i++) { struct ltq_etop_chan *ch = &priv->ch[i]; @@ -556,10 +536,8 @@ ltq_etop_change_mtu(struct net_device *dev, int new_mtu) static int ltq_etop_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { - struct ltq_etop_priv *priv = netdev_priv(dev); - /* TODO: mii-toll reports "No MII transceiver present!." ?!*/ - return phy_mii_ioctl(priv->phydev, rq, cmd); + return phy_mii_ioctl(dev->phydev, rq, cmd); } static int diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index a6d26d351dfc..d5d263bda333 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3458,6 +3458,8 @@ static int mvneta_open(struct net_device *dev) return 0; err_free_irq: + unregister_cpu_notifier(&pp->cpu_notifier); + on_each_cpu(mvneta_percpu_disable, pp, true); free_percpu_irq(pp->dev->irq, pp->ports); err_cleanup_txqs: mvneta_cleanup_txqs(pp); diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index d1cdc2d76151..760f3d71eda3 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -328,22 +328,24 @@ static void mtk_mdio_cleanup(struct mtk_eth *eth) static inline void mtk_irq_disable(struct mtk_eth *eth, u32 mask) { + unsigned long flags; u32 val; + spin_lock_irqsave(ð->irq_lock, flags); val = mtk_r32(eth, MTK_QDMA_INT_MASK); mtk_w32(eth, val & ~mask, MTK_QDMA_INT_MASK); - /* flush write */ - mtk_r32(eth, MTK_QDMA_INT_MASK); + spin_unlock_irqrestore(ð->irq_lock, flags); } static inline void mtk_irq_enable(struct mtk_eth *eth, u32 mask) { + unsigned long flags; u32 val; + spin_lock_irqsave(ð->irq_lock, flags); val = mtk_r32(eth, MTK_QDMA_INT_MASK); mtk_w32(eth, val | mask, MTK_QDMA_INT_MASK); - /* flush write */ - mtk_r32(eth, MTK_QDMA_INT_MASK); + spin_unlock_irqrestore(ð->irq_lock, flags); } static int mtk_set_mac_address(struct net_device *dev, void *p) @@ -798,7 +800,7 @@ drop: } static int mtk_poll_rx(struct napi_struct *napi, int budget, - struct mtk_eth *eth, u32 rx_intr) + struct mtk_eth *eth) { struct mtk_rx_ring *ring = ð->rx_ring; int idx = ring->calc_idx; @@ -886,22 +888,22 @@ release_desc: } if (done < budget) - mtk_w32(eth, rx_intr, MTK_QMTK_INT_STATUS); + mtk_w32(eth, MTK_RX_DONE_INT, MTK_QMTK_INT_STATUS); return done; } -static int mtk_poll_tx(struct mtk_eth *eth, int budget, bool *tx_again) +static int mtk_poll_tx(struct mtk_eth *eth, int budget) { struct mtk_tx_ring *ring = ð->tx_ring; struct mtk_tx_dma *desc; struct sk_buff *skb; struct mtk_tx_buf *tx_buf; - int total = 0, done[MTK_MAX_DEVS]; + unsigned int done[MTK_MAX_DEVS]; unsigned int bytes[MTK_MAX_DEVS]; u32 cpu, dma; static int condition; - int i; + int total = 0, i; memset(done, 0, sizeof(done)); memset(bytes, 0, sizeof(bytes)); @@ -952,15 +954,6 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget, bool *tx_again) total += done[i]; } - /* read hw index again make sure no new tx packet */ - if (cpu != dma || cpu != mtk_r32(eth, MTK_QTX_DRX_PTR)) - *tx_again = true; - else - mtk_w32(eth, MTK_TX_DONE_INT, MTK_QMTK_INT_STATUS); - - if (!total) - return 0; - if (mtk_queue_stopped(eth) && (atomic_read(&ring->free_count) > ring->thresh)) mtk_wake_queue(eth); @@ -968,49 +961,75 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget, bool *tx_again) return total; } -static int mtk_poll(struct napi_struct *napi, int budget) +static void mtk_handle_status_irq(struct mtk_eth *eth) { - struct mtk_eth *eth = container_of(napi, struct mtk_eth, rx_napi); - u32 status, status2, mask, tx_intr, rx_intr, status_intr; - int tx_done, rx_done; - bool tx_again = false; + u32 status2 = mtk_r32(eth, MTK_INT_STATUS2); - status = mtk_r32(eth, MTK_QMTK_INT_STATUS); - status2 = mtk_r32(eth, MTK_INT_STATUS2); - tx_intr = MTK_TX_DONE_INT; - rx_intr = MTK_RX_DONE_INT; - status_intr = (MTK_GDM1_AF | MTK_GDM2_AF); - tx_done = 0; - rx_done = 0; - tx_again = 0; + if (unlikely(status2 & (MTK_GDM1_AF | MTK_GDM2_AF))) { + mtk_stats_update(eth); + mtk_w32(eth, (MTK_GDM1_AF | MTK_GDM2_AF), + MTK_INT_STATUS2); + } +} - if (status & tx_intr) - tx_done = mtk_poll_tx(eth, budget, &tx_again); +static int mtk_napi_tx(struct napi_struct *napi, int budget) +{ + struct mtk_eth *eth = container_of(napi, struct mtk_eth, tx_napi); + u32 status, mask; + int tx_done = 0; - if (status & rx_intr) - rx_done = mtk_poll_rx(napi, budget, eth, rx_intr); + mtk_handle_status_irq(eth); + mtk_w32(eth, MTK_TX_DONE_INT, MTK_QMTK_INT_STATUS); + tx_done = mtk_poll_tx(eth, budget); - if (unlikely(status2 & status_intr)) { - mtk_stats_update(eth); - mtk_w32(eth, status_intr, MTK_INT_STATUS2); + if (unlikely(netif_msg_intr(eth))) { + status = mtk_r32(eth, MTK_QMTK_INT_STATUS); + mask = mtk_r32(eth, MTK_QDMA_INT_MASK); + dev_info(eth->dev, + "done tx %d, intr 0x%08x/0x%x\n", + tx_done, status, mask); } + if (tx_done == budget) + return budget; + + status = mtk_r32(eth, MTK_QMTK_INT_STATUS); + if (status & MTK_TX_DONE_INT) + return budget; + + napi_complete(napi); + mtk_irq_enable(eth, MTK_TX_DONE_INT); + + return tx_done; +} + +static int mtk_napi_rx(struct napi_struct *napi, int budget) +{ + struct mtk_eth *eth = container_of(napi, struct mtk_eth, rx_napi); + u32 status, mask; + int rx_done = 0; + + mtk_handle_status_irq(eth); + mtk_w32(eth, MTK_RX_DONE_INT, MTK_QMTK_INT_STATUS); + rx_done = mtk_poll_rx(napi, budget, eth); + if (unlikely(netif_msg_intr(eth))) { + status = mtk_r32(eth, MTK_QMTK_INT_STATUS); mask = mtk_r32(eth, MTK_QDMA_INT_MASK); - netdev_info(eth->netdev[0], - "done tx %d, rx %d, intr 0x%08x/0x%x\n", - tx_done, rx_done, status, mask); + dev_info(eth->dev, + "done rx %d, intr 0x%08x/0x%x\n", + rx_done, status, mask); } - if (tx_again || rx_done == budget) + if (rx_done == budget) return budget; status = mtk_r32(eth, MTK_QMTK_INT_STATUS); - if (status & (tx_intr | rx_intr)) + if (status & MTK_RX_DONE_INT) return budget; napi_complete(napi); - mtk_irq_enable(eth, tx_intr | rx_intr); + mtk_irq_enable(eth, MTK_RX_DONE_INT); return rx_done; } @@ -1246,22 +1265,26 @@ static void mtk_tx_timeout(struct net_device *dev) schedule_work(ð->pending_work); } -static irqreturn_t mtk_handle_irq(int irq, void *_eth) +static irqreturn_t mtk_handle_irq_rx(int irq, void *_eth) { struct mtk_eth *eth = _eth; - u32 status; - status = mtk_r32(eth, MTK_QMTK_INT_STATUS); - if (unlikely(!status)) - return IRQ_NONE; + if (likely(napi_schedule_prep(ð->rx_napi))) { + __napi_schedule(ð->rx_napi); + mtk_irq_disable(eth, MTK_RX_DONE_INT); + } - if (likely(status & (MTK_RX_DONE_INT | MTK_TX_DONE_INT))) { - if (likely(napi_schedule_prep(ð->rx_napi))) - __napi_schedule(ð->rx_napi); - } else { - mtk_w32(eth, status, MTK_QMTK_INT_STATUS); + return IRQ_HANDLED; +} + +static irqreturn_t mtk_handle_irq_tx(int irq, void *_eth) +{ + struct mtk_eth *eth = _eth; + + if (likely(napi_schedule_prep(ð->tx_napi))) { + __napi_schedule(ð->tx_napi); + mtk_irq_disable(eth, MTK_TX_DONE_INT); } - mtk_irq_disable(eth, (MTK_RX_DONE_INT | MTK_TX_DONE_INT)); return IRQ_HANDLED; } @@ -1274,7 +1297,7 @@ static void mtk_poll_controller(struct net_device *dev) u32 int_mask = MTK_TX_DONE_INT | MTK_RX_DONE_INT; mtk_irq_disable(eth, int_mask); - mtk_handle_irq(dev->irq, dev); + mtk_handle_irq_rx(eth->irq[2], dev); mtk_irq_enable(eth, int_mask); } #endif @@ -1310,6 +1333,7 @@ static int mtk_open(struct net_device *dev) if (err) return err; + napi_enable(ð->tx_napi); napi_enable(ð->rx_napi); mtk_irq_enable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT); } @@ -1358,6 +1382,7 @@ static int mtk_stop(struct net_device *dev) return 0; mtk_irq_disable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT); + napi_disable(ð->tx_napi); napi_disable(ð->rx_napi); mtk_stop_dma(eth, MTK_QDMA_GLO_CFG); @@ -1395,7 +1420,11 @@ static int __init mtk_hw_init(struct mtk_eth *eth) /* Enable RX VLan Offloading */ mtk_w32(eth, 1, MTK_CDMP_EG_CTRL); - err = devm_request_irq(eth->dev, eth->irq, mtk_handle_irq, 0, + err = devm_request_irq(eth->dev, eth->irq[1], mtk_handle_irq_tx, 0, + dev_name(eth->dev), eth); + if (err) + return err; + err = devm_request_irq(eth->dev, eth->irq[2], mtk_handle_irq_rx, 0, dev_name(eth->dev), eth); if (err) return err; @@ -1411,7 +1440,11 @@ static int __init mtk_hw_init(struct mtk_eth *eth) mtk_w32(eth, 0, MTK_RST_GL); /* FE int grouping */ - mtk_w32(eth, 0, MTK_FE_INT_GRP); + mtk_w32(eth, MTK_TX_DONE_INT, MTK_PDMA_INT_GRP1); + mtk_w32(eth, MTK_RX_DONE_INT, MTK_PDMA_INT_GRP2); + mtk_w32(eth, MTK_TX_DONE_INT, MTK_QDMA_INT_GRP1); + mtk_w32(eth, MTK_RX_DONE_INT, MTK_QDMA_INT_GRP2); + mtk_w32(eth, 0x21021000, MTK_FE_INT_GRP); for (i = 0; i < 2; i++) { u32 val = mtk_r32(eth, MTK_GDMA_FWD_CFG(i)); @@ -1459,7 +1492,8 @@ static void mtk_uninit(struct net_device *dev) phy_disconnect(mac->phy_dev); mtk_mdio_cleanup(eth); mtk_irq_disable(eth, ~0); - free_irq(dev->irq, dev); + free_irq(eth->irq[1], dev); + free_irq(eth->irq[2], dev); } static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) @@ -1733,10 +1767,10 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) dev_err(eth->dev, "error bringing up device\n"); goto free_netdev; } - eth->netdev[id]->irq = eth->irq; + eth->netdev[id]->irq = eth->irq[0]; netif_info(eth, probe, eth->netdev[id], "mediatek frame engine at 0x%08lx, irq %d\n", - eth->netdev[id]->base_addr, eth->netdev[id]->irq); + eth->netdev[id]->base_addr, eth->irq[0]); return 0; @@ -1753,6 +1787,7 @@ static int mtk_probe(struct platform_device *pdev) struct mtk_soc_data *soc; struct mtk_eth *eth; int err; + int i; match = of_match_device(of_mtk_match, &pdev->dev); soc = (struct mtk_soc_data *)match->data; @@ -1766,6 +1801,7 @@ static int mtk_probe(struct platform_device *pdev) return PTR_ERR(eth->base); spin_lock_init(ð->page_lock); + spin_lock_init(ð->irq_lock); eth->ethsys = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "mediatek,ethsys"); @@ -1787,10 +1823,12 @@ static int mtk_probe(struct platform_device *pdev) return PTR_ERR(eth->rstc); } - eth->irq = platform_get_irq(pdev, 0); - if (eth->irq < 0) { - dev_err(&pdev->dev, "no IRQ resource found\n"); - return -ENXIO; + for (i = 0; i < 3; i++) { + eth->irq[i] = platform_get_irq(pdev, i); + if (eth->irq[i] < 0) { + dev_err(&pdev->dev, "no IRQ%d resource found\n", i); + return -ENXIO; + } } eth->clk_ethif = devm_clk_get(&pdev->dev, "ethif"); @@ -1831,7 +1869,9 @@ static int mtk_probe(struct platform_device *pdev) * for NAPI to work */ init_dummy_netdev(ð->dummy_dev); - netif_napi_add(ð->dummy_dev, ð->rx_napi, mtk_poll, + netif_napi_add(ð->dummy_dev, ð->tx_napi, mtk_napi_tx, + MTK_NAPI_WEIGHT); + netif_napi_add(ð->dummy_dev, ð->rx_napi, mtk_napi_rx, MTK_NAPI_WEIGHT); platform_set_drvdata(pdev, eth); @@ -1852,6 +1892,7 @@ static int mtk_remove(struct platform_device *pdev) clk_disable_unprepare(eth->clk_gp1); clk_disable_unprepare(eth->clk_gp2); + netif_napi_del(ð->tx_napi); netif_napi_del(ð->rx_napi); mtk_cleanup(eth); platform_set_drvdata(pdev, NULL); @@ -1869,7 +1910,6 @@ static struct platform_driver mtk_driver = { .remove = mtk_remove, .driver = { .name = "mtk_soc_eth", - .owner = THIS_MODULE, .of_match_table = of_mtk_match, }, }; diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index a5eb7c62306b..f82e3acb947b 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -68,6 +68,10 @@ /* Unicast Filter MAC Address Register - High */ #define MTK_GDMA_MAC_ADRH(x) (0x50C + (x * 0x1000)) +/* PDMA Interrupt grouping registers */ +#define MTK_PDMA_INT_GRP1 0xa50 +#define MTK_PDMA_INT_GRP2 0xa54 + /* QDMA TX Queue Configuration Registers */ #define MTK_QTX_CFG(x) (0x1800 + (x * 0x10)) #define QDMA_RES_THRES 4 @@ -125,6 +129,11 @@ #define MTK_TX_DONE_INT (MTK_TX_DONE_INT0 | MTK_TX_DONE_INT1 | \ MTK_TX_DONE_INT2 | MTK_TX_DONE_INT3) +/* QDMA Interrupt grouping registers */ +#define MTK_QDMA_INT_GRP1 0x1a20 +#define MTK_QDMA_INT_GRP2 0x1a24 +#define MTK_RLS_DONE_INT BIT(0) + /* QDMA Interrupt Status Register */ #define MTK_QDMA_INT_MASK 0x1A1C @@ -356,7 +365,8 @@ struct mtk_rx_ring { * @dma_refcnt: track how many netdevs are using the DMA engine * @tx_ring: Pointer to the memore holding info about the TX ring * @rx_ring: Pointer to the memore holding info about the RX ring - * @rx_napi: The NAPI struct + * @tx_napi: The TX NAPI struct + * @rx_napi: The RX NAPI struct * @scratch_ring: Newer SoCs need memory for a second HW managed TX ring * @phy_scratch_ring: physical address of scratch_ring * @scratch_head: The scratch memory that scratch_ring points to. @@ -373,10 +383,11 @@ struct mtk_eth { void __iomem *base; struct reset_control *rstc; spinlock_t page_lock; + spinlock_t irq_lock; struct net_device dummy_dev; struct net_device *netdev[MTK_MAX_DEVS]; struct mtk_mac *mac[MTK_MAX_DEVS]; - int irq; + int irq[3]; u32 msg_enable; unsigned long sysclk; struct regmap *ethsys; @@ -384,6 +395,7 @@ struct mtk_eth { atomic_t dma_refcnt; struct mtk_tx_ring tx_ring; struct mtk_rx_ring rx_ring; + struct napi_struct tx_napi; struct napi_struct rx_napi; struct mtk_tx_dma *scratch_ring; dma_addr_t phy_scratch_ring; diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index dec77d6f0ac9..7ae1cdad9bf0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -147,7 +147,7 @@ int mlx4_do_bond(struct mlx4_dev *dev, bool enable) if (enable) { dev->flags |= MLX4_FLAG_BONDED; } else { - ret = mlx4_virt2phy_port_map(dev, 1, 2); + ret = mlx4_virt2phy_port_map(dev, 1, 2); if (ret) { mlx4_err(dev, "Fail to reset port map\n"); return ret; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index b673a5fc6b6c..75dd2e3d3059 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2600,7 +2600,7 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) err = mlx4_init_uar_table(dev); if (err) { mlx4_err(dev, "Failed to initialize user access region table, aborting\n"); - return err; + return err; } err = mlx4_uar_alloc(dev, &priv->driver_uar); diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index f2d0920018a5..94b891c118c1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -618,8 +618,8 @@ static int remove_promisc_qp(struct mlx4_dev *dev, u8 port, err = mlx4_READ_ENTRY(dev, entry->index, mailbox); - if (err) - goto out_mailbox; + if (err) + goto out_mailbox; members_count = be32_to_cpu(mgm->members_count) & 0xffffff; @@ -657,8 +657,8 @@ static int remove_promisc_qp(struct mlx4_dev *dev, u8 port, err = mlx4_WRITE_ENTRY(dev, entry->index, mailbox); - if (err) - goto out_mailbox; + if (err) + goto out_mailbox; } } } diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 93195191f45b..395b5463cfd9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -248,7 +248,7 @@ static void mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order) offset, order); return; } - __mlx4_free_mtt_range(dev, offset, order); + __mlx4_free_mtt_range(dev, offset, order); } void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index cd9b2b28df88..8b81114bdc72 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2372,16 +2372,15 @@ static int mpt_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, __mlx4_mpt_release(dev, index); break; case RES_OP_MAP_ICM: - index = get_param_l(&in_param); - id = index & mpt_mask(dev); - err = mr_res_start_move_to(dev, slave, id, - RES_MPT_RESERVED, &mpt); - if (err) - return err; - - __mlx4_mpt_free_icm(dev, mpt->key); - res_end_move(dev, slave, RES_MPT, id); + index = get_param_l(&in_param); + id = index & mpt_mask(dev); + err = mr_res_start_move_to(dev, slave, id, + RES_MPT_RESERVED, &mpt); + if (err) return err; + + __mlx4_mpt_free_icm(dev, mpt->key); + res_end_move(dev, slave, RES_MPT, id); break; default: err = -EINVAL; @@ -4253,9 +4252,8 @@ int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave, (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB)) && !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) { - mlx4_warn(dev, - "Src check LB for slave %d isn't supported\n", - slave); + mlx4_warn(dev, "Src check LB for slave %d isn't supported\n", + slave); return -ENOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 1cf722eba607..aae46884bf93 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -4,6 +4,7 @@ config MLX5_CORE tristate "Mellanox Technologies ConnectX-4 and Connect-IB core driver" + depends on MAY_USE_DEVLINK depends on PCI default n ---help--- diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index c4f450f1c658..05cc1effc13c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -5,9 +5,9 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ fs_counters.o rl.o -mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \ - en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \ - en_rx_am.o en_txrx.o en_clock.o vxlan.o en_tc.o \ - en_arfs.o +mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \ + en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \ + en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \ + en_tc.o en_arfs.o en_rep.o en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 0b4986268cc9..d6e2a1cae19a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -295,6 +295,12 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_DESTROY_FLOW_GROUP: case MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY: case MLX5_CMD_OP_DEALLOC_FLOW_COUNTER: + case MLX5_CMD_OP_2ERR_QP: + case MLX5_CMD_OP_2RST_QP: + case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT: + case MLX5_CMD_OP_MODIFY_FLOW_TABLE: + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -321,8 +327,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_RTR2RTS_QP: case MLX5_CMD_OP_RTS2RTS_QP: case MLX5_CMD_OP_SQERR2RTS_QP: - case MLX5_CMD_OP_2ERR_QP: - case MLX5_CMD_OP_2RST_QP: case MLX5_CMD_OP_QUERY_QP: case MLX5_CMD_OP_SQD_RTS_QP: case MLX5_CMD_OP_INIT2INIT_QP: @@ -342,7 +346,6 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT: case MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT: case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT: - case MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT: case MLX5_CMD_OP_QUERY_ROCE_ADDRESS: case MLX5_CMD_OP_SET_ROCE_ADDRESS: case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: @@ -390,11 +393,12 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_CREATE_RQT: case MLX5_CMD_OP_MODIFY_RQT: case MLX5_CMD_OP_QUERY_RQT: + case MLX5_CMD_OP_CREATE_FLOW_TABLE: case MLX5_CMD_OP_QUERY_FLOW_TABLE: case MLX5_CMD_OP_CREATE_FLOW_GROUP: case MLX5_CMD_OP_QUERY_FLOW_GROUP: - case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: case MLX5_CMD_OP_QUERY_FLOW_COUNTER: @@ -602,11 +606,36 @@ static void dump_command(struct mlx5_core_dev *dev, pr_debug("\n"); } +static u16 msg_to_opcode(struct mlx5_cmd_msg *in) +{ + struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data); + + return be16_to_cpu(hdr->opcode); +} + +static void cb_timeout_handler(struct work_struct *work) +{ + struct delayed_work *dwork = container_of(work, struct delayed_work, + work); + struct mlx5_cmd_work_ent *ent = container_of(dwork, + struct mlx5_cmd_work_ent, + cb_timeout_work); + struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev, + cmd); + + ent->ret = -ETIMEDOUT; + mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", + mlx5_command_str(msg_to_opcode(ent->in)), + msg_to_opcode(ent->in)); + mlx5_cmd_comp_handler(dev, 1UL << ent->idx); +} + static void cmd_work_handler(struct work_struct *work) { struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work); struct mlx5_cmd *cmd = ent->cmd; struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, cmd); + unsigned long cb_timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC); struct mlx5_cmd_layout *lay; struct semaphore *sem; unsigned long flags; @@ -647,6 +676,9 @@ static void cmd_work_handler(struct work_struct *work) dump_command(dev, ent, 1); ent->ts1 = ktime_get_ns(); + if (ent->callback) + schedule_delayed_work(&ent->cb_timeout_work, cb_timeout); + /* ring doorbell after the descriptor is valid */ mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx); wmb(); @@ -691,13 +723,6 @@ static const char *deliv_status_to_str(u8 status) } } -static u16 msg_to_opcode(struct mlx5_cmd_msg *in) -{ - struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data); - - return be16_to_cpu(hdr->opcode); -} - static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) { unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC); @@ -706,13 +731,13 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) if (cmd->mode == CMD_MODE_POLLING) { wait_for_completion(&ent->done); - err = ent->ret; - } else { - if (!wait_for_completion_timeout(&ent->done, timeout)) - err = -ETIMEDOUT; - else - err = 0; + } else if (!wait_for_completion_timeout(&ent->done, timeout)) { + ent->ret = -ETIMEDOUT; + mlx5_cmd_comp_handler(dev, 1UL << ent->idx); } + + err = ent->ret; + if (err == -ETIMEDOUT) { mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", mlx5_command_str(msg_to_opcode(ent->in)), @@ -761,6 +786,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, if (!callback) init_completion(&ent->done); + INIT_DELAYED_WORK(&ent->cb_timeout_work, cb_timeout_handler); INIT_WORK(&ent->work, cmd_work_handler); if (page_queue) { cmd_work_handler(&ent->work); @@ -770,28 +796,26 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, goto out_free; } - if (!callback) { - err = wait_func(dev, ent); - if (err == -ETIMEDOUT) - goto out; - - ds = ent->ts2 - ent->ts1; - op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode); - if (op < ARRAY_SIZE(cmd->stats)) { - stats = &cmd->stats[op]; - spin_lock_irq(&stats->lock); - stats->sum += ds; - ++stats->n; - spin_unlock_irq(&stats->lock); - } - mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME, - "fw exec time for %s is %lld nsec\n", - mlx5_command_str(op), ds); - *status = ent->status; - free_cmd(ent); - } + if (callback) + goto out; - return err; + err = wait_func(dev, ent); + if (err == -ETIMEDOUT) + goto out_free; + + ds = ent->ts2 - ent->ts1; + op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode); + if (op < ARRAY_SIZE(cmd->stats)) { + stats = &cmd->stats[op]; + spin_lock_irq(&stats->lock); + stats->sum += ds; + ++stats->n; + spin_unlock_irq(&stats->lock); + } + mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME, + "fw exec time for %s is %lld nsec\n", + mlx5_command_str(op), ds); + *status = ent->status; out_free: free_cmd(ent); @@ -1181,41 +1205,30 @@ err_dbg: return err; } -void mlx5_cmd_use_events(struct mlx5_core_dev *dev) +static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode) { struct mlx5_cmd *cmd = &dev->cmd; int i; for (i = 0; i < cmd->max_reg_cmds; i++) down(&cmd->sem); - down(&cmd->pages_sem); - flush_workqueue(cmd->wq); - - cmd->mode = CMD_MODE_EVENTS; + cmd->mode = mode; up(&cmd->pages_sem); for (i = 0; i < cmd->max_reg_cmds; i++) up(&cmd->sem); } -void mlx5_cmd_use_polling(struct mlx5_core_dev *dev) +void mlx5_cmd_use_events(struct mlx5_core_dev *dev) { - struct mlx5_cmd *cmd = &dev->cmd; - int i; - - for (i = 0; i < cmd->max_reg_cmds; i++) - down(&cmd->sem); - - down(&cmd->pages_sem); - - flush_workqueue(cmd->wq); - cmd->mode = CMD_MODE_POLLING; + mlx5_cmd_change_mod(dev, CMD_MODE_EVENTS); +} - up(&cmd->pages_sem); - for (i = 0; i < cmd->max_reg_cmds; i++) - up(&cmd->sem); +void mlx5_cmd_use_polling(struct mlx5_core_dev *dev) +{ + mlx5_cmd_change_mod(dev, CMD_MODE_POLLING); } static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg) @@ -1251,6 +1264,8 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec) struct semaphore *sem; ent = cmd->ent_arr[i]; + if (ent->callback) + cancel_delayed_work(&ent->cb_timeout_work); if (ent->page_queue) sem = &cmd->pages_sem; else diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index b97511bf4c7b..4cbd452fec25 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -44,6 +44,7 @@ #include <linux/mlx5/vport.h> #include <linux/mlx5/transobj.h> #include <linux/rhashtable.h> +#include <net/switchdev.h> #include "wq.h" #include "mlx5_core.h" #include "en_stats.h" @@ -163,7 +164,6 @@ enum mlx5e_priv_flag { #ifdef CONFIG_MLX5_CORE_EN_DCB #define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */ -#define MLX5E_MIN_BW_ALLOC 1 /* Min percentage of BW allocation */ #endif struct mlx5e_cq_moder { @@ -214,6 +214,7 @@ struct mlx5e_tstamp { enum { MLX5E_RQ_STATE_POST_WQES_ENABLE, MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, + MLX5E_RQ_STATE_FLUSH_TIMEOUT, MLX5E_RQ_STATE_AM, }; @@ -245,6 +246,8 @@ typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq *rq, typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); +typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq *rq, u16 ix); + struct mlx5e_dma_info { struct page *page; dma_addr_t addr; @@ -290,6 +293,7 @@ struct mlx5e_rq { struct mlx5e_cq cq; mlx5e_fp_handle_rx_cqe handle_rx_cqe; mlx5e_fp_alloc_wqe alloc_wqe; + mlx5e_fp_dealloc_wqe dealloc_wqe; unsigned long state; int ix; @@ -356,6 +360,7 @@ struct mlx5e_sq_dma { enum { MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, MLX5E_SQ_STATE_BF_ENABLE, + MLX5E_SQ_STATE_TX_TIMEOUT, }; struct mlx5e_ico_wqe_info { @@ -543,8 +548,24 @@ enum { MLX5E_ARFS_FT_LEVEL }; +struct mlx5e_ethtool_table { + struct mlx5_flow_table *ft; + int num_rules; +}; + +#define ETHTOOL_NUM_L3_L4_FTS 7 +#define ETHTOOL_NUM_L2_FTS 4 + +struct mlx5e_ethtool_steering { + struct mlx5e_ethtool_table l3_l4_ft[ETHTOOL_NUM_L3_L4_FTS]; + struct mlx5e_ethtool_table l2_ft[ETHTOOL_NUM_L2_FTS]; + struct list_head rules; + int tot_num_rules; +}; + struct mlx5e_flow_steering { struct mlx5_flow_namespace *ns; + struct mlx5e_ethtool_steering ethtool; struct mlx5e_tc_table tc; struct mlx5e_vlan_table vlan; struct mlx5e_l2_table l2; @@ -552,9 +573,15 @@ struct mlx5e_flow_steering { struct mlx5e_arfs_tables arfs; }; -struct mlx5e_direct_tir { - u32 tirn; +struct mlx5e_rqt { u32 rqtn; + bool enabled; +}; + +struct mlx5e_tir { + u32 tirn; + struct mlx5e_rqt rqt; + struct list_head list; }; enum { @@ -562,6 +589,22 @@ enum { MLX5E_NIC_PRIO }; +struct mlx5e_profile { + void (*init)(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, void *ppriv); + void (*cleanup)(struct mlx5e_priv *priv); + int (*init_rx)(struct mlx5e_priv *priv); + void (*cleanup_rx)(struct mlx5e_priv *priv); + int (*init_tx)(struct mlx5e_priv *priv); + void (*cleanup_tx)(struct mlx5e_priv *priv); + void (*enable)(struct mlx5e_priv *priv); + void (*disable)(struct mlx5e_priv *priv); + void (*update_stats)(struct mlx5e_priv *priv); + int (*max_nch)(struct mlx5_core_dev *mdev); + int max_tc; +}; + struct mlx5e_priv { /* priv data path fields - start */ struct mlx5e_sq **txq_to_sq_map; @@ -570,18 +613,14 @@ struct mlx5e_priv { unsigned long state; struct mutex state_lock; /* Protects Interface state */ - struct mlx5_uar cq_uar; - u32 pdn; - u32 tdn; - struct mlx5_core_mkey mkey; struct mlx5_core_mkey umr_mkey; struct mlx5e_rq drop_rq; struct mlx5e_channel **channel; u32 tisn[MLX5E_MAX_NUM_TC]; - u32 indir_rqtn; - u32 indir_tirn[MLX5E_NUM_INDIR_TIRS]; - struct mlx5e_direct_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; + struct mlx5e_rqt indir_rqt; + struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; u32 tx_rates[MLX5E_MAX_NUM_SQS]; struct mlx5e_flow_steering fs; @@ -591,6 +630,7 @@ struct mlx5e_priv { struct workqueue_struct *wq; struct work_struct update_carrier_work; struct work_struct set_rx_mode_work; + struct work_struct tx_timeout_work; struct delayed_work update_stats_work; u32 pflags; @@ -599,6 +639,8 @@ struct mlx5e_priv { struct mlx5e_stats stats; struct mlx5e_tstamp tstamp; u16 q_counter; + const struct mlx5e_profile *profile; + void *ppriv; }; enum mlx5e_link_mode { @@ -647,12 +689,16 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); int mlx5e_napi_poll(struct napi_struct *napi, int budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); +void mlx5e_free_tx_descs(struct mlx5e_sq *sq); +void mlx5e_free_rx_descs(struct mlx5e_rq *rq); void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); +void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix); +void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix); void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq); void mlx5e_complete_rx_linear_mpwqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, @@ -680,6 +726,16 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv); void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv); void mlx5e_init_l2_addr(struct mlx5e_priv *priv); void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft); +int mlx5e_ethtool_get_flow(struct mlx5e_priv *priv, struct ethtool_rxnfc *info, + int location); +int mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv, + struct ethtool_rxnfc *info, u32 *rule_locs); +int mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv, + struct ethtool_rx_flow_spec *fs); +int mlx5e_ethtool_flow_remove(struct mlx5e_priv *priv, + int location); +void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv); +void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv); void mlx5e_set_rx_mode_work(struct work_struct *work); void mlx5e_fill_hwstamp(struct mlx5e_tstamp *clock, u64 timestamp, @@ -788,5 +844,39 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, #endif u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev); +int mlx5e_create_tir(struct mlx5_core_dev *mdev, + struct mlx5e_tir *tir, u32 *in, int inlen); +void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, + struct mlx5e_tir *tir); +int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev); +void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); +int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev); + +struct mlx5_eswitch_rep; +int mlx5e_vport_rep_load(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); +void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); +int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep); +void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); +int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); +void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); +int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr); + +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv); +void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv); +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv); +int mlx5e_create_tises(struct mlx5e_priv *priv); +void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv); +int mlx5e_close(struct net_device *netdev); +int mlx5e_open(struct net_device *netdev); +void mlx5e_update_stats_work(struct work_struct *work); +void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile, void *ppriv); +void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv); +struct rtnl_link_stats64 * +mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 3515e78ba68f..a8cb38789774 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -93,14 +93,14 @@ static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type) static int arfs_disable(struct mlx5e_priv *priv) { struct mlx5_flow_destination dest; - u32 *tirn = priv->indir_tirn; + struct mlx5e_tir *tir = priv->indir_tir; int err = 0; int tt; int i; dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; for (i = 0; i < ARFS_NUM_TYPES; i++) { - dest.tir_num = tirn[i]; + dest.tir_num = tir[i].tirn; tt = arfs_get_tt(i); /* Modify ttc rules destination to bypass the aRFS tables*/ err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt], @@ -175,15 +175,12 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, { struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type]; struct mlx5_flow_destination dest; - u8 match_criteria_enable = 0; - u32 *tirn = priv->indir_tirn; - u32 *match_criteria; - u32 *match_value; + struct mlx5e_tir *tir = priv->indir_tir; + struct mlx5_flow_spec *spec; int err = 0; - match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - if (!match_value || !match_criteria) { + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { netdev_err(priv->netdev, "%s: alloc failed\n", __func__); err = -ENOMEM; goto out; @@ -192,24 +189,23 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; switch (type) { case ARFS_IPV4_TCP: - dest.tir_num = tirn[MLX5E_TT_IPV4_TCP]; + dest.tir_num = tir[MLX5E_TT_IPV4_TCP].tirn; break; case ARFS_IPV4_UDP: - dest.tir_num = tirn[MLX5E_TT_IPV4_UDP]; + dest.tir_num = tir[MLX5E_TT_IPV4_UDP].tirn; break; case ARFS_IPV6_TCP: - dest.tir_num = tirn[MLX5E_TT_IPV6_TCP]; + dest.tir_num = tir[MLX5E_TT_IPV6_TCP].tirn; break; case ARFS_IPV6_UDP: - dest.tir_num = tirn[MLX5E_TT_IPV6_UDP]; + dest.tir_num = tir[MLX5E_TT_IPV6_UDP].tirn; break; default: err = -EINVAL; goto out; } - arfs_t->default_rule = mlx5_add_flow_rule(arfs_t->ft.t, match_criteria_enable, - match_criteria, match_value, + arfs_t->default_rule = mlx5_add_flow_rule(arfs_t->ft.t, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_DEFAULT_FLOW_TAG, &dest); @@ -220,8 +216,7 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, __func__, type); } out: - kvfree(match_criteria); - kvfree(match_value); + kvfree(spec); return err; } @@ -475,23 +470,20 @@ static struct mlx5_flow_rule *arfs_add_rule(struct mlx5e_priv *priv, struct mlx5_flow_rule *rule = NULL; struct mlx5_flow_destination dest; struct arfs_table *arfs_table; - u8 match_criteria_enable = 0; + struct mlx5_flow_spec *spec; struct mlx5_flow_table *ft; - u32 *match_criteria; - u32 *match_value; int err = 0; - match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - if (!match_value || !match_criteria) { + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { netdev_err(priv->netdev, "%s: alloc failed\n", __func__); err = -ENOMEM; goto out; } - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, match_criteria, + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, ntohs(tuple->etype)); arfs_table = arfs_get_table(arfs, tuple->ip_proto, tuple->etype); if (!arfs_table) { @@ -501,59 +493,58 @@ static struct mlx5_flow_rule *arfs_add_rule(struct mlx5e_priv *priv, ft = arfs_table->ft.t; if (tuple->ip_proto == IPPROTO_TCP) { - MLX5_SET_TO_ONES(fte_match_param, match_criteria, + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.tcp_dport); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.tcp_sport); - MLX5_SET(fte_match_param, match_value, outer_headers.tcp_dport, + MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_dport, ntohs(tuple->dst_port)); - MLX5_SET(fte_match_param, match_value, outer_headers.tcp_sport, + MLX5_SET(fte_match_param, spec->match_value, outer_headers.tcp_sport, ntohs(tuple->src_port)); } else { - MLX5_SET_TO_ONES(fte_match_param, match_criteria, + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.udp_dport); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.udp_sport); - MLX5_SET(fte_match_param, match_value, outer_headers.udp_dport, + MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport, ntohs(tuple->dst_port)); - MLX5_SET(fte_match_param, match_value, outer_headers.udp_sport, + MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_sport, ntohs(tuple->src_port)); } if (tuple->etype == htons(ETH_P_IP)) { - memcpy(MLX5_ADDR_OF(fte_match_param, match_value, + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4), &tuple->src_ipv4, 4); - memcpy(MLX5_ADDR_OF(fte_match_param, match_value, + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4), &tuple->dst_ipv4, 4); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); } else { - memcpy(MLX5_ADDR_OF(fte_match_param, match_value, + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), &tuple->src_ipv6, 16); - memcpy(MLX5_ADDR_OF(fte_match_param, match_value, + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), &tuple->dst_ipv6, 16); - memset(MLX5_ADDR_OF(fte_match_param, match_criteria, + memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), 0xff, 16); - memset(MLX5_ADDR_OF(fte_match_param, match_criteria, + memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 0xff, 16); } dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn; - rule = mlx5_add_flow_rule(ft, match_criteria_enable, match_criteria, - match_value, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + rule = mlx5_add_flow_rule(ft, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_DEFAULT_FLOW_TAG, &dest); if (IS_ERR(rule)) { @@ -563,8 +554,7 @@ static struct mlx5_flow_rule *arfs_add_rule(struct mlx5e_priv *priv, } out: - kvfree(match_criteria); - kvfree(match_value); + kvfree(spec); return err ? ERR_PTR(err) : rule; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c new file mode 100644 index 000000000000..673043ccd76c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "en.h" + +/* mlx5e global resources should be placed in this file. + * Global resources are common to all the netdevices crated on the same nic. + */ + +int mlx5e_create_tir(struct mlx5_core_dev *mdev, + struct mlx5e_tir *tir, u32 *in, int inlen) +{ + int err; + + err = mlx5_core_create_tir(mdev, in, inlen, &tir->tirn); + if (err) + return err; + + list_add(&tir->list, &mdev->mlx5e_res.td.tirs_list); + + return 0; +} + +void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, + struct mlx5e_tir *tir) +{ + mlx5_core_destroy_tir(mdev, tir->tirn); + list_del(&tir->list); +} + +static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, + struct mlx5_core_mkey *mkey) +{ + struct mlx5_create_mkey_mbox_in *in; + int err; + + in = mlx5_vzalloc(sizeof(*in)); + if (!in) + return -ENOMEM; + + in->seg.flags = MLX5_PERM_LOCAL_WRITE | + MLX5_PERM_LOCAL_READ | + MLX5_ACCESS_MODE_PA; + in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64); + in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + + err = mlx5_core_create_mkey(mdev, mkey, in, sizeof(*in), NULL, NULL, + NULL); + + kvfree(in); + + return err; +} + +int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev) +{ + struct mlx5e_resources *res = &mdev->mlx5e_res; + int err; + + err = mlx5_alloc_map_uar(mdev, &res->cq_uar, false); + if (err) { + mlx5_core_err(mdev, "alloc_map uar failed, %d\n", err); + return err; + } + + err = mlx5_core_alloc_pd(mdev, &res->pdn); + if (err) { + mlx5_core_err(mdev, "alloc pd failed, %d\n", err); + goto err_unmap_free_uar; + } + + err = mlx5_core_alloc_transport_domain(mdev, &res->td.tdn); + if (err) { + mlx5_core_err(mdev, "alloc td failed, %d\n", err); + goto err_dealloc_pd; + } + + err = mlx5e_create_mkey(mdev, res->pdn, &res->mkey); + if (err) { + mlx5_core_err(mdev, "create mkey failed, %d\n", err); + goto err_dealloc_transport_domain; + } + + INIT_LIST_HEAD(&mdev->mlx5e_res.td.tirs_list); + + return 0; + +err_dealloc_transport_domain: + mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); +err_dealloc_pd: + mlx5_core_dealloc_pd(mdev, res->pdn); +err_unmap_free_uar: + mlx5_unmap_free_uar(mdev, &res->cq_uar); + + return err; +} + +void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev) +{ + struct mlx5e_resources *res = &mdev->mlx5e_res; + + mlx5_core_destroy_mkey(mdev, &res->mkey); + mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); + mlx5_core_dealloc_pd(mdev, res->pdn); + mlx5_unmap_free_uar(mdev, &res->cq_uar); +} + +int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev) +{ + struct mlx5e_tir *tir; + void *in; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_tir_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); + + list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) { + err = mlx5_core_modify_tir(mdev, tir->tirn, in, inlen); + if (err) + return err; + } + + kvfree(in); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index e6883132b555..caa9a3ccc3f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -96,7 +96,7 @@ static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw, tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC; break; case IEEE_8021QAZ_TSA_ETS: - tc_tx_bw[i] = ets->tc_tx_bw[i] ?: MLX5E_MIN_BW_ALLOC; + tc_tx_bw[i] = ets->tc_tx_bw[i]; break; } } @@ -140,8 +140,12 @@ static int mlx5e_dbcnl_validate_ets(struct ieee_ets *ets) /* Validate Bandwidth Sum */ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) + if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) { + if (!ets->tc_tx_bw[i]) + return -EINVAL; + bw_sum += ets->tc_tx_bw[i]; + } } if (bw_sum != 0 && bw_sum != 100) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index b29684d9fcd6..4a3757e60441 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -139,6 +139,18 @@ static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv) return err ? 0 : pfc_en_tx | pfc_en_rx; } +static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 rx_pause; + u32 tx_pause; + int err; + + err = mlx5_query_port_pause(mdev, &rx_pause, &tx_pause); + + return err ? false : rx_pause | tx_pause; +} + #define MLX5E_NUM_Q_CNTRS(priv) (NUM_Q_COUNTERS * (!!priv->q_counter)) #define MLX5E_NUM_RQ_STATS(priv) \ (NUM_RQ_STATS * priv->params.num_channels * \ @@ -147,8 +159,8 @@ static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv) (NUM_SQ_STATS * priv->params.num_channels * priv->params.num_tc * \ test_bit(MLX5E_STATE_OPENED, &priv->state)) #define MLX5E_NUM_PFC_COUNTERS(priv) \ - (hweight8(mlx5e_query_pfc_combined(priv)) * \ - NUM_PPORT_PER_PRIO_PFC_COUNTERS) + ((mlx5e_query_global_pause_combined(priv) + hweight8(mlx5e_query_pfc_combined(priv))) * \ + NUM_PPORT_PER_PRIO_PFC_COUNTERS) static int mlx5e_get_sset_count(struct net_device *dev, int sset) { @@ -210,8 +222,18 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) pfc_combined = mlx5e_query_pfc_combined(priv); for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) { for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + char pfc_string[ETH_GSTRING_LEN]; + + snprintf(pfc_string, sizeof(pfc_string), "prio%d", prio); sprintf(data + (idx++) * ETH_GSTRING_LEN, - pport_per_prio_pfc_stats_desc[i].format, prio); + pport_per_prio_pfc_stats_desc[i].format, pfc_string); + } + } + + if (mlx5e_query_global_pause_combined(priv)) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_prio_pfc_stats_desc[i].format, "global"); } } @@ -306,6 +328,13 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, } } + if (mlx5e_query_global_pause_combined(priv)) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0], + pport_per_prio_pfc_stats_desc, 0); + } + } + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return; @@ -876,7 +905,7 @@ static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen) mlx5e_build_tir_ctx_hash(tirc, priv); for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) - mlx5_core_modify_tir(mdev, priv->indir_tirn[i], in, inlen); + mlx5_core_modify_tir(mdev, priv->indir_tir[i].tirn, in, inlen); } static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, @@ -898,7 +927,7 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, mutex_lock(&priv->state_lock); if (indir) { - u32 rqtn = priv->indir_rqtn; + u32 rqtn = priv->indir_rqt.rqtn; memcpy(priv->params.indirection_rqt, indir, sizeof(priv->params.indirection_rqt)); @@ -931,6 +960,15 @@ static int mlx5e_get_rxnfc(struct net_device *netdev, case ETHTOOL_GRXRINGS: info->data = priv->params.num_channels; break; + case ETHTOOL_GRXCLSRLCNT: + info->rule_cnt = priv->fs.ethtool.tot_num_rules; + break; + case ETHTOOL_GRXCLSRULE: + err = mlx5e_ethtool_get_flow(priv, info, info->fs.location); + break; + case ETHTOOL_GRXCLSRLALL: + err = mlx5e_ethtool_get_all_flows(priv, info, rule_locs); + break; default: err = -EOPNOTSUPP; break; @@ -1368,6 +1406,26 @@ static u32 mlx5e_get_priv_flags(struct net_device *netdev) return priv->pflags; } +static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ + int err = 0; + struct mlx5e_priv *priv = netdev_priv(dev); + + switch (cmd->cmd) { + case ETHTOOL_SRXCLSRLINS: + err = mlx5e_ethtool_flow_replace(priv, &cmd->fs); + break; + case ETHTOOL_SRXCLSRLDEL: + err = mlx5e_ethtool_flow_remove(priv, cmd->fs.location); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + const struct ethtool_ops mlx5e_ethtool_ops = { .get_drvinfo = mlx5e_get_drvinfo, .get_link = ethtool_op_get_link, @@ -1387,6 +1445,7 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .get_rxfh = mlx5e_get_rxfh, .set_rxfh = mlx5e_set_rxfh, .get_rxnfc = mlx5e_get_rxnfc, + .set_rxnfc = mlx5e_set_rxnfc, .get_tunable = mlx5e_get_tunable, .set_tunable = mlx5e_set_tunable, .get_pauseparam = mlx5e_get_pauseparam, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index b32740092854..1587a9fd5724 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -156,19 +156,18 @@ enum mlx5e_vlan_rule_type { static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, enum mlx5e_vlan_rule_type rule_type, - u16 vid, u32 *mc, u32 *mv) + u16 vid, struct mlx5_flow_spec *spec) { struct mlx5_flow_table *ft = priv->fs.vlan.ft.t; struct mlx5_flow_destination dest; - u8 match_criteria_enable = 0; struct mlx5_flow_rule **rule_p; int err = 0; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest.ft = priv->fs.l2.ft.t; - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.vlan_tag); + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag); switch (rule_type) { case MLX5E_VLAN_RULE_TYPE_UNTAGGED: @@ -176,17 +175,19 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, break; case MLX5E_VLAN_RULE_TYPE_ANY_VID: rule_p = &priv->fs.vlan.any_vlan_rule; - MLX5_SET(fte_match_param, mv, outer_headers.vlan_tag, 1); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.vlan_tag, 1); break; default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */ rule_p = &priv->fs.vlan.active_vlans_rule[vid]; - MLX5_SET(fte_match_param, mv, outer_headers.vlan_tag, 1); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid); - MLX5_SET(fte_match_param, mv, outer_headers.first_vid, vid); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.vlan_tag, 1); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.first_vid); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, + vid); break; } - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, + *rule_p = mlx5_add_flow_rule(ft, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_DEFAULT_FLOW_TAG, &dest); @@ -203,27 +204,21 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv, enum mlx5e_vlan_rule_type rule_type, u16 vid) { - u32 *match_criteria; - u32 *match_value; + struct mlx5_flow_spec *spec; int err = 0; - match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - if (!match_value || !match_criteria) { + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { netdev_err(priv->netdev, "%s: alloc failed\n", __func__); - err = -ENOMEM; - goto add_vlan_rule_out; + return -ENOMEM; } if (rule_type == MLX5E_VLAN_RULE_TYPE_MATCH_VID) mlx5e_vport_context_update_vlans(priv); - err = __mlx5e_add_vlan_rule(priv, rule_type, vid, match_criteria, - match_value); + err = __mlx5e_add_vlan_rule(priv, rule_type, vid, spec); -add_vlan_rule_out: - kvfree(match_criteria); - kvfree(match_value); + kvfree(spec); return err; } @@ -598,32 +593,27 @@ static struct mlx5_flow_rule *mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, u8 proto) { struct mlx5_flow_rule *rule; - u8 match_criteria_enable = 0; - u32 *match_criteria; - u32 *match_value; + struct mlx5_flow_spec *spec; int err = 0; - match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - if (!match_value || !match_criteria) { + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { netdev_err(priv->netdev, "%s: alloc failed\n", __func__); - err = -ENOMEM; - goto out; + return ERR_PTR(-ENOMEM); } if (proto) { - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.ip_protocol); - MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, proto); + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto); } if (etype) { - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.ethertype); - MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, etype); + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype); } - rule = mlx5_add_flow_rule(ft, match_criteria_enable, - match_criteria, match_value, + rule = mlx5_add_flow_rule(ft, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_DEFAULT_FLOW_TAG, dest); @@ -631,9 +621,8 @@ static struct mlx5_flow_rule *mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, err = PTR_ERR(rule); netdev_err(priv->netdev, "%s: add rule failed\n", __func__); } -out: - kvfree(match_criteria); - kvfree(match_value); + + kvfree(spec); return err ? ERR_PTR(err) : rule; } @@ -655,7 +644,7 @@ static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv) if (tt == MLX5E_TT_ANY) dest.tir_num = priv->direct_tir[0].tirn; else - dest.tir_num = priv->indir_tirn[tt]; + dest.tir_num = priv->indir_tir[tt].tirn; rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest, ttc_rules[tt].etype, ttc_rules[tt].proto); @@ -792,24 +781,20 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, { struct mlx5_flow_table *ft = priv->fs.l2.ft.t; struct mlx5_flow_destination dest; - u8 match_criteria_enable = 0; - u32 *match_criteria; - u32 *match_value; + struct mlx5_flow_spec *spec; int err = 0; u8 *mc_dmac; u8 *mv_dmac; - match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - if (!match_value || !match_criteria) { + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { netdev_err(priv->netdev, "%s: alloc failed\n", __func__); - err = -ENOMEM; - goto add_l2_rule_out; + return -ENOMEM; } - mc_dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, + mc_dmac = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers.dmac_47_16); - mv_dmac = MLX5_ADDR_OF(fte_match_param, match_value, + mv_dmac = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.dmac_47_16); dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; @@ -817,13 +802,13 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, switch (type) { case MLX5E_FULLMATCH: - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; eth_broadcast_addr(mc_dmac); ether_addr_copy(mv_dmac, ai->addr); break; case MLX5E_ALLMULTI: - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; mc_dmac[0] = 0x01; mv_dmac[0] = 0x01; break; @@ -832,8 +817,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, break; } - ai->rule = mlx5_add_flow_rule(ft, match_criteria_enable, match_criteria, - match_value, + ai->rule = mlx5_add_flow_rule(ft, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FS_DEFAULT_FLOW_TAG, &dest); if (IS_ERR(ai->rule)) { @@ -843,9 +827,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, ai->rule = NULL; } -add_l2_rule_out: - kvfree(match_criteria); - kvfree(match_value); + kvfree(spec); return err; } @@ -1102,6 +1084,8 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv) goto err_destroy_l2_table; } + mlx5e_ethtool_init_steering(priv); + return 0; err_destroy_l2_table: @@ -1121,4 +1105,5 @@ void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv) mlx5e_destroy_l2_table(priv); mlx5e_destroy_ttc_table(priv); mlx5e_arfs_destroy_tables(priv); + mlx5e_ethtool_cleanup_steering(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c new file mode 100644 index 000000000000..d17c24227900 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -0,0 +1,586 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/fs.h> +#include "en.h" + +struct mlx5e_ethtool_rule { + struct list_head list; + struct ethtool_rx_flow_spec flow_spec; + struct mlx5_flow_rule *rule; + struct mlx5e_ethtool_table *eth_ft; +}; + +static void put_flow_table(struct mlx5e_ethtool_table *eth_ft) +{ + if (!--eth_ft->num_rules) { + mlx5_destroy_flow_table(eth_ft->ft); + eth_ft->ft = NULL; + } +} + +#define MLX5E_ETHTOOL_L3_L4_PRIO 0 +#define MLX5E_ETHTOOL_L2_PRIO (MLX5E_ETHTOOL_L3_L4_PRIO + ETHTOOL_NUM_L3_L4_FTS) +#define MLX5E_ETHTOOL_NUM_ENTRIES 64000 +#define MLX5E_ETHTOOL_NUM_GROUPS 10 +static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv, + struct ethtool_rx_flow_spec *fs, + int num_tuples) +{ + struct mlx5e_ethtool_table *eth_ft; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft; + int max_tuples; + int table_size; + int prio; + + switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) { + case TCP_V4_FLOW: + case UDP_V4_FLOW: + max_tuples = ETHTOOL_NUM_L3_L4_FTS; + prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples); + eth_ft = &priv->fs.ethtool.l3_l4_ft[prio]; + break; + case IP_USER_FLOW: + max_tuples = ETHTOOL_NUM_L3_L4_FTS; + prio = MLX5E_ETHTOOL_L3_L4_PRIO + (max_tuples - num_tuples); + eth_ft = &priv->fs.ethtool.l3_l4_ft[prio]; + break; + case ETHER_FLOW: + max_tuples = ETHTOOL_NUM_L2_FTS; + prio = max_tuples - num_tuples; + eth_ft = &priv->fs.ethtool.l2_ft[prio]; + prio += MLX5E_ETHTOOL_L2_PRIO; + break; + default: + return ERR_PTR(-EINVAL); + } + + eth_ft->num_rules++; + if (eth_ft->ft) + return eth_ft; + + ns = mlx5_get_flow_namespace(priv->mdev, + MLX5_FLOW_NAMESPACE_ETHTOOL); + if (!ns) + return ERR_PTR(-ENOTSUPP); + + table_size = min_t(u32, BIT(MLX5_CAP_FLOWTABLE(priv->mdev, + flow_table_properties_nic_receive.log_max_ft_size)), + MLX5E_ETHTOOL_NUM_ENTRIES); + ft = mlx5_create_auto_grouped_flow_table(ns, prio, + table_size, + MLX5E_ETHTOOL_NUM_GROUPS, 0); + if (IS_ERR(ft)) + return (void *)ft; + + eth_ft->ft = ft; + return eth_ft; +} + +static void mask_spec(u8 *mask, u8 *val, size_t size) +{ + unsigned int i; + + for (i = 0; i < size; i++, mask++, val++) + *((u8 *)val) = *((u8 *)mask) & *((u8 *)val); +} + +static void set_ips(void *outer_headers_v, void *outer_headers_c, __be32 ip4src_m, + __be32 ip4src_v, __be32 ip4dst_m, __be32 ip4dst_v) +{ + if (ip4src_m) { + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + &ip4src_v, sizeof(ip4src_v)); + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4), + 0xff, sizeof(ip4src_m)); + } + if (ip4dst_m) { + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &ip4dst_v, sizeof(ip4dst_v)); + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + 0xff, sizeof(ip4dst_m)); + } + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + ethertype, ETH_P_IP); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + ethertype, 0xffff); +} + +static int set_flow_attrs(u32 *match_c, u32 *match_v, + struct ethtool_rx_flow_spec *fs) +{ + void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers); + void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers); + u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT); + struct ethtool_tcpip4_spec *l4_mask; + struct ethtool_tcpip4_spec *l4_val; + struct ethtool_usrip4_spec *l3_mask; + struct ethtool_usrip4_spec *l3_val; + struct ethhdr *eth_val; + struct ethhdr *eth_mask; + + switch (flow_type) { + case TCP_V4_FLOW: + l4_mask = &fs->m_u.tcp_ip4_spec; + l4_val = &fs->h_u.tcp_ip4_spec; + set_ips(outer_headers_v, outer_headers_c, l4_mask->ip4src, + l4_val->ip4src, l4_mask->ip4dst, l4_val->ip4dst); + + if (l4_mask->psrc) { + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport, + 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_sport, + ntohs(l4_val->psrc)); + } + if (l4_mask->pdst) { + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport, + 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_dport, + ntohs(l4_val->pdst)); + } + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, + 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol, + IPPROTO_TCP); + break; + case UDP_V4_FLOW: + l4_mask = &fs->m_u.tcp_ip4_spec; + l4_val = &fs->h_u.tcp_ip4_spec; + set_ips(outer_headers_v, outer_headers_c, l4_mask->ip4src, + l4_val->ip4src, l4_mask->ip4dst, l4_val->ip4dst); + + if (l4_mask->psrc) { + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_sport, + 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_sport, + ntohs(l4_val->psrc)); + } + if (l4_mask->pdst) { + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_dport, + 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport, + ntohs(l4_val->pdst)); + } + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, + 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol, + IPPROTO_UDP); + break; + case IP_USER_FLOW: + l3_mask = &fs->m_u.usr_ip4_spec; + l3_val = &fs->h_u.usr_ip4_spec; + set_ips(outer_headers_v, outer_headers_c, l3_mask->ip4src, + l3_val->ip4src, l3_mask->ip4dst, l3_val->ip4dst); + break; + case ETHER_FLOW: + eth_mask = &fs->m_u.ether_spec; + eth_val = &fs->h_u.ether_spec; + + mask_spec((u8 *)eth_mask, (u8 *)eth_val, sizeof(*eth_mask)); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, + outer_headers_c, smac_47_16), + eth_mask->h_source); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, + outer_headers_v, smac_47_16), + eth_val->h_source); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, + outer_headers_c, dmac_47_16), + eth_mask->h_dest); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, + outer_headers_v, dmac_47_16), + eth_val->h_dest); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ethertype, + ntohs(eth_mask->h_proto)); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ethertype, + ntohs(eth_val->h_proto)); + break; + default: + return -EINVAL; + } + + if ((fs->flow_type & FLOW_EXT) && + (fs->m_ext.vlan_tci & cpu_to_be16(VLAN_VID_MASK))) { + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + vlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + vlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + first_vid, 0xfff); + MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + first_vid, ntohs(fs->h_ext.vlan_tci)); + } + if (fs->flow_type & FLOW_MAC_EXT && + !is_zero_ether_addr(fs->m_ext.h_dest)) { + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, + outer_headers_c, dmac_47_16), + fs->m_ext.h_dest); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, + outer_headers_v, dmac_47_16), + fs->h_ext.h_dest); + } + + return 0; +} + +static void add_rule_to_list(struct mlx5e_priv *priv, + struct mlx5e_ethtool_rule *rule) +{ + struct mlx5e_ethtool_rule *iter; + struct list_head *head = &priv->fs.ethtool.rules; + + list_for_each_entry(iter, &priv->fs.ethtool.rules, list) { + if (iter->flow_spec.location > rule->flow_spec.location) + break; + head = &iter->list; + } + priv->fs.ethtool.tot_num_rules++; + list_add(&rule->list, head); +} + +static bool outer_header_zero(u32 *match_criteria) +{ + int size = MLX5_ST_SZ_BYTES(fte_match_param); + char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers); + + return outer_headers_c[0] == 0 && !memcmp(outer_headers_c, + outer_headers_c + 1, + size - 1); +} + +static struct mlx5_flow_rule *add_ethtool_flow_rule(struct mlx5e_priv *priv, + struct mlx5_flow_table *ft, + struct ethtool_rx_flow_spec *fs) +{ + struct mlx5_flow_destination *dst = NULL; + struct mlx5_flow_spec *spec; + struct mlx5_flow_rule *rule; + int err = 0; + u32 action; + + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) + return ERR_PTR(-ENOMEM); + err = set_flow_attrs(spec->match_criteria, spec->match_value, + fs); + if (err) + goto free; + + if (fs->ring_cookie == RX_CLS_FLOW_DISC) { + action = MLX5_FLOW_CONTEXT_ACTION_DROP; + } else { + dst = kzalloc(sizeof(*dst), GFP_KERNEL); + if (!dst) { + err = -ENOMEM; + goto free; + } + + dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dst->tir_num = priv->direct_tir[fs->ring_cookie].tirn; + action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + } + + spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria)); + rule = mlx5_add_flow_rule(ft, spec, action, + MLX5_FS_DEFAULT_FLOW_TAG, dst); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, "%s: failed to add ethtool steering rule: %d\n", + __func__, err); + goto free; + } +free: + kvfree(spec); + kfree(dst); + return err ? ERR_PTR(err) : rule; +} + +static void del_ethtool_rule(struct mlx5e_priv *priv, + struct mlx5e_ethtool_rule *eth_rule) +{ + if (eth_rule->rule) + mlx5_del_flow_rule(eth_rule->rule); + list_del(ð_rule->list); + priv->fs.ethtool.tot_num_rules--; + put_flow_table(eth_rule->eth_ft); + kfree(eth_rule); +} + +static struct mlx5e_ethtool_rule *find_ethtool_rule(struct mlx5e_priv *priv, + int location) +{ + struct mlx5e_ethtool_rule *iter; + + list_for_each_entry(iter, &priv->fs.ethtool.rules, list) { + if (iter->flow_spec.location == location) + return iter; + } + return NULL; +} + +static struct mlx5e_ethtool_rule *get_ethtool_rule(struct mlx5e_priv *priv, + int location) +{ + struct mlx5e_ethtool_rule *eth_rule; + + eth_rule = find_ethtool_rule(priv, location); + if (eth_rule) + del_ethtool_rule(priv, eth_rule); + + eth_rule = kzalloc(sizeof(*eth_rule), GFP_KERNEL); + if (!eth_rule) + return ERR_PTR(-ENOMEM); + + add_rule_to_list(priv, eth_rule); + return eth_rule; +} + +#define MAX_NUM_OF_ETHTOOL_RULES BIT(10) + +#define all_ones(field) (field == (__force typeof(field))-1) +#define all_zeros_or_all_ones(field) \ + ((field) == 0 || (field) == (__force typeof(field))-1) + +static int validate_flow(struct mlx5e_priv *priv, + struct ethtool_rx_flow_spec *fs) +{ + struct ethtool_tcpip4_spec *l4_mask; + struct ethtool_usrip4_spec *l3_mask; + struct ethhdr *eth_mask; + int num_tuples = 0; + + if (fs->location >= MAX_NUM_OF_ETHTOOL_RULES) + return -EINVAL; + + if (fs->ring_cookie >= priv->params.num_channels && + fs->ring_cookie != RX_CLS_FLOW_DISC) + return -EINVAL; + + switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) { + case ETHER_FLOW: + eth_mask = &fs->m_u.ether_spec; + if (!is_zero_ether_addr(eth_mask->h_dest)) + num_tuples++; + if (!is_zero_ether_addr(eth_mask->h_source)) + num_tuples++; + if (eth_mask->h_proto) + num_tuples++; + break; + case TCP_V4_FLOW: + case UDP_V4_FLOW: + if (fs->m_u.tcp_ip4_spec.tos) + return -EINVAL; + l4_mask = &fs->m_u.tcp_ip4_spec; + if (l4_mask->ip4src) { + if (!all_ones(l4_mask->ip4src)) + return -EINVAL; + num_tuples++; + } + if (l4_mask->ip4dst) { + if (!all_ones(l4_mask->ip4dst)) + return -EINVAL; + num_tuples++; + } + if (l4_mask->psrc) { + if (!all_ones(l4_mask->psrc)) + return -EINVAL; + num_tuples++; + } + if (l4_mask->pdst) { + if (!all_ones(l4_mask->pdst)) + return -EINVAL; + num_tuples++; + } + /* Flow is TCP/UDP */ + num_tuples++; + break; + case IP_USER_FLOW: + l3_mask = &fs->m_u.usr_ip4_spec; + if (l3_mask->l4_4_bytes || l3_mask->tos || l3_mask->proto || + fs->h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4) + return -EINVAL; + if (l3_mask->ip4src) { + if (!all_ones(l3_mask->ip4src)) + return -EINVAL; + num_tuples++; + } + if (l3_mask->ip4dst) { + if (!all_ones(l3_mask->ip4dst)) + return -EINVAL; + num_tuples++; + } + /* Flow is IPv4 */ + num_tuples++; + break; + default: + return -EINVAL; + } + if ((fs->flow_type & FLOW_EXT)) { + if (fs->m_ext.vlan_etype || + (fs->m_ext.vlan_tci != cpu_to_be16(VLAN_VID_MASK))) + return -EINVAL; + + if (fs->m_ext.vlan_tci) { + if (be16_to_cpu(fs->h_ext.vlan_tci) >= VLAN_N_VID) + return -EINVAL; + } + num_tuples++; + } + + if (fs->flow_type & FLOW_MAC_EXT && + !is_zero_ether_addr(fs->m_ext.h_dest)) + num_tuples++; + + return num_tuples; +} + +int mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv, + struct ethtool_rx_flow_spec *fs) +{ + struct mlx5e_ethtool_table *eth_ft; + struct mlx5e_ethtool_rule *eth_rule; + struct mlx5_flow_rule *rule; + int num_tuples; + int err; + + num_tuples = validate_flow(priv, fs); + if (num_tuples <= 0) { + netdev_warn(priv->netdev, "%s: flow is not valid\n", __func__); + return -EINVAL; + } + + eth_ft = get_flow_table(priv, fs, num_tuples); + if (IS_ERR(eth_ft)) + return PTR_ERR(eth_ft); + + eth_rule = get_ethtool_rule(priv, fs->location); + if (IS_ERR(eth_rule)) { + put_flow_table(eth_ft); + return PTR_ERR(eth_rule); + } + + eth_rule->flow_spec = *fs; + eth_rule->eth_ft = eth_ft; + if (!eth_ft->ft) { + err = -EINVAL; + goto del_ethtool_rule; + } + rule = add_ethtool_flow_rule(priv, eth_ft->ft, fs); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto del_ethtool_rule; + } + + eth_rule->rule = rule; + + return 0; + +del_ethtool_rule: + del_ethtool_rule(priv, eth_rule); + + return err; +} + +int mlx5e_ethtool_flow_remove(struct mlx5e_priv *priv, + int location) +{ + struct mlx5e_ethtool_rule *eth_rule; + int err = 0; + + if (location >= MAX_NUM_OF_ETHTOOL_RULES) + return -ENOSPC; + + eth_rule = find_ethtool_rule(priv, location); + if (!eth_rule) { + err = -ENOENT; + goto out; + } + + del_ethtool_rule(priv, eth_rule); +out: + return err; +} + +int mlx5e_ethtool_get_flow(struct mlx5e_priv *priv, struct ethtool_rxnfc *info, + int location) +{ + struct mlx5e_ethtool_rule *eth_rule; + + if (location < 0 || location >= MAX_NUM_OF_ETHTOOL_RULES) + return -EINVAL; + + list_for_each_entry(eth_rule, &priv->fs.ethtool.rules, list) { + if (eth_rule->flow_spec.location == location) { + info->fs = eth_rule->flow_spec; + return 0; + } + } + + return -ENOENT; +} + +int mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv, struct ethtool_rxnfc *info, + u32 *rule_locs) +{ + int location = 0; + int idx = 0; + int err = 0; + + while ((!err || err == -ENOENT) && idx < info->rule_cnt) { + err = mlx5e_ethtool_get_flow(priv, info, location); + if (!err) + rule_locs[idx++] = location; + location++; + } + return err; +} + +void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv) +{ + struct mlx5e_ethtool_rule *iter; + struct mlx5e_ethtool_rule *temp; + + list_for_each_entry_safe(iter, temp, &priv->fs.ethtool.rules, list) + del_ethtool_rule(priv, iter); +} + +void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv) +{ + INIT_LIST_HEAD(&priv->fs.ethtool.rules); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a64ce5df5810..611ab550136e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -39,6 +39,13 @@ #include "eswitch.h" #include "vxlan.h" +enum { + MLX5_EN_QP_FLUSH_TIMEOUT_MS = 5000, + MLX5_EN_QP_FLUSH_MSLEEP_QUANT = 20, + MLX5_EN_QP_FLUSH_MAX_ITER = MLX5_EN_QP_FLUSH_TIMEOUT_MS / + MLX5_EN_QP_FLUSH_MSLEEP_QUANT, +}; + struct mlx5e_rq_param { u32 rqc[MLX5_ST_SZ_DW(rqc)]; struct mlx5_wq_param wq; @@ -76,10 +83,13 @@ static void mlx5e_update_carrier(struct mlx5e_priv *priv) port_state = mlx5_query_vport_state(mdev, MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0); - if (port_state == VPORT_STATE_UP) + if (port_state == VPORT_STATE_UP) { + netdev_info(priv->netdev, "Link up\n"); netif_carrier_on(priv->netdev); - else + } else { + netdev_info(priv->netdev, "Link down\n"); netif_carrier_off(priv->netdev); + } } static void mlx5e_update_carrier_work(struct work_struct *work) @@ -93,6 +103,26 @@ static void mlx5e_update_carrier_work(struct work_struct *work) mutex_unlock(&priv->state_lock); } +static void mlx5e_tx_timeout_work(struct work_struct *work) +{ + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, + tx_timeout_work); + int err; + + rtnl_lock(); + mutex_lock(&priv->state_lock); + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + mlx5e_close_locked(priv->netdev); + err = mlx5e_open_locked(priv->netdev); + if (err) + netdev_err(priv->netdev, "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n", + err); +unlock: + mutex_unlock(&priv->state_lock); + rtnl_unlock(); +} + static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) { struct mlx5e_sw_stats *s = &priv->stats.sw; @@ -226,14 +256,14 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) mlx5e_update_sw_counters(priv); } -static void mlx5e_update_stats_work(struct work_struct *work) +void mlx5e_update_stats_work(struct work_struct *work) { struct delayed_work *dwork = to_delayed_work(work); struct mlx5e_priv *priv = container_of(dwork, struct mlx5e_priv, update_stats_work); mutex_lock(&priv->state_lock); if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { - mlx5e_update_stats(priv); + priv->profile->update_stats(priv); queue_delayed_work(priv->wq, dwork, msecs_to_jiffies(MLX5E_UPDATE_STATS_INTERVAL)); } @@ -307,6 +337,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, } rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq; rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; + rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz); rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides); @@ -322,6 +353,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, } rq->handle_rx_cqe = mlx5e_handle_rx_cqe; rq->alloc_wqe = mlx5e_alloc_rx_wqe; + rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; rq->wqe_sz = (priv->params.lro_en) ? priv->params.lro_wqe_sz : @@ -533,12 +565,19 @@ err_destroy_rq: static void mlx5e_close_rq(struct mlx5e_rq *rq) { + int tout = 0; + int err; + clear_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state); napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */ - mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR); - while (!mlx5_wq_ll_is_empty(&rq->wq)) - msleep(20); + err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR); + while (!mlx5_wq_ll_is_empty(&rq->wq) && !err && + tout++ < MLX5_EN_QP_FLUSH_MAX_ITER) + msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT); + + if (err || tout == MLX5_EN_QP_FLUSH_MAX_ITER) + set_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, &rq->state); /* avoid destroying rq before mlx5e_poll_rx_cq() is done with it */ napi_synchronize(&rq->channel->napi); @@ -546,6 +585,7 @@ static void mlx5e_close_rq(struct mlx5e_rq *rq) cancel_work_sync(&rq->am.work); mlx5e_disable_rq(rq); + mlx5e_free_rx_descs(rq); mlx5e_destroy_rq(rq); } @@ -800,6 +840,9 @@ static inline void netif_tx_disable_queue(struct netdev_queue *txq) static void mlx5e_close_sq(struct mlx5e_sq *sq) { + int tout = 0; + int err; + if (sq->txq) { clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state); /* prevent netif_tx_wake_queue */ @@ -810,16 +853,24 @@ static void mlx5e_close_sq(struct mlx5e_sq *sq) if (mlx5e_sq_has_room_for(sq, 1)) mlx5e_send_nop(sq, true); - mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR, - false, 0); + err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, + MLX5_SQC_STATE_ERR, false, 0); + if (err) + set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state); } - while (sq->cc != sq->pc) /* wait till sq is empty */ - msleep(20); + /* wait till sq is empty, unless a TX timeout occurred on this SQ */ + while (sq->cc != sq->pc && + !test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state)) { + msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT); + if (tout++ > MLX5_EN_QP_FLUSH_MAX_ITER) + set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state); + } /* avoid destroying sq before mlx5e_poll_tx_cq() is done with it */ napi_synchronize(&sq->channel->napi); + mlx5e_free_tx_descs(sq); mlx5e_disable_sq(sq); mlx5e_destroy_sq(sq); } @@ -858,7 +909,7 @@ static int mlx5e_create_cq(struct mlx5e_channel *c, mcq->comp = mlx5e_completion_event; mcq->event = mlx5e_cq_error_event; mcq->irqn = irqn; - mcq->uar = &priv->cq_uar; + mcq->uar = &mdev->mlx5e_res.cq_uar; for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); @@ -1036,7 +1087,7 @@ static void mlx5e_build_channeltc_to_txq_map(struct mlx5e_priv *priv, int ix) { int i; - for (i = 0; i < MLX5E_MAX_NUM_TC; i++) + for (i = 0; i < priv->profile->max_tc; i++) priv->channeltc_to_txq_map[ix][i] = ix + i * priv->params.num_channels; } @@ -1136,7 +1187,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->cpu = cpu; c->pdev = &priv->mdev->pdev->dev; c->netdev = priv->netdev; - c->mkey_be = cpu_to_be32(priv->mkey.key); + c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); c->num_tc = priv->params.num_tc; if (priv->params.rx_am_enabled) @@ -1252,7 +1303,7 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size); - MLX5_SET(wq, wq, pd, priv->pdn); + MLX5_SET(wq, wq, pd, priv->mdev->mlx5e_res.pdn); MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter); param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev); @@ -1277,7 +1328,7 @@ static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, void *wq = MLX5_ADDR_OF(sqc, sqc, wq); MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); - MLX5_SET(wq, wq, pd, priv->pdn); + MLX5_SET(wq, wq, pd, priv->mdev->mlx5e_res.pdn); param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev); } @@ -1299,7 +1350,7 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, { void *cqc = param->cqc; - MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index); + MLX5_SET(cqc, cqc, uar_page, priv->mdev->mlx5e_res.cq_uar.index); } static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, @@ -1486,7 +1537,8 @@ static void mlx5e_fill_direct_rqt_rqn(struct mlx5e_priv *priv, void *rqtc, MLX5_SET(rqtc, rqtc, rq_num[0], rqn); } -static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, int ix, u32 *rqtn) +static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, + int ix, struct mlx5e_rqt *rqt) { struct mlx5_core_dev *mdev = priv->mdev; void *rqtc; @@ -1509,34 +1561,36 @@ static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, int ix, u32 *rqtn) else mlx5e_fill_direct_rqt_rqn(priv, rqtc, ix); - err = mlx5_core_create_rqt(mdev, in, inlen, rqtn); + err = mlx5_core_create_rqt(mdev, in, inlen, &rqt->rqtn); + if (!err) + rqt->enabled = true; kvfree(in); return err; } -static void mlx5e_destroy_rqt(struct mlx5e_priv *priv, u32 rqtn) +void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt) { - mlx5_core_destroy_rqt(priv->mdev, rqtn); + rqt->enabled = false; + mlx5_core_destroy_rqt(priv->mdev, rqt->rqtn); } -static int mlx5e_create_rqts(struct mlx5e_priv *priv) +static int mlx5e_create_indirect_rqts(struct mlx5e_priv *priv) { - int nch = mlx5e_get_max_num_channels(priv->mdev); - u32 *rqtn; + struct mlx5e_rqt *rqt = &priv->indir_rqt; + + return mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqt); +} + +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv) +{ + struct mlx5e_rqt *rqt; int err; int ix; - /* Indirect RQT */ - rqtn = &priv->indir_rqtn; - err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqtn); - if (err) - return err; - - /* Direct RQTs */ - for (ix = 0; ix < nch; ix++) { - rqtn = &priv->direct_tir[ix].rqtn; - err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqtn); + for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) { + rqt = &priv->direct_tir[ix].rqt; + err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqt); if (err) goto err_destroy_rqts; } @@ -1545,24 +1599,11 @@ static int mlx5e_create_rqts(struct mlx5e_priv *priv) err_destroy_rqts: for (ix--; ix >= 0; ix--) - mlx5e_destroy_rqt(priv, priv->direct_tir[ix].rqtn); - - mlx5e_destroy_rqt(priv, priv->indir_rqtn); + mlx5e_destroy_rqt(priv, &priv->direct_tir[ix].rqt); return err; } -static void mlx5e_destroy_rqts(struct mlx5e_priv *priv) -{ - int nch = mlx5e_get_max_num_channels(priv->mdev); - int i; - - for (i = 0; i < nch; i++) - mlx5e_destroy_rqt(priv, priv->direct_tir[i].rqtn); - - mlx5e_destroy_rqt(priv, priv->indir_rqtn); -} - int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int ix) { struct mlx5_core_dev *mdev = priv->mdev; @@ -1598,10 +1639,15 @@ static void mlx5e_redirect_rqts(struct mlx5e_priv *priv) u32 rqtn; int ix; - rqtn = priv->indir_rqtn; - mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, 0); + if (priv->indir_rqt.enabled) { + rqtn = priv->indir_rqt.rqtn; + mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, 0); + } + for (ix = 0; ix < priv->params.num_channels; ix++) { - rqtn = priv->direct_tir[ix].rqtn; + if (!priv->direct_tir[ix].rqt.enabled) + continue; + rqtn = priv->direct_tir[ix].rqt.rqtn; mlx5e_redirect_rqt(priv, rqtn, 1, ix); } } @@ -1661,13 +1707,13 @@ static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv) mlx5e_build_tir_ctx_lro(tirc, priv); for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - err = mlx5_core_modify_tir(mdev, priv->indir_tirn[tt], in, + err = mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen); if (err) goto free_in; } - for (ix = 0; ix < mlx5e_get_max_num_channels(mdev); ix++) { + for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) { err = mlx5_core_modify_tir(mdev, priv->direct_tir[ix].tirn, in, inlen); if (err) @@ -1680,40 +1726,6 @@ free_in: return err; } -static int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5e_priv *priv) -{ - void *in; - int inlen; - int err; - int i; - - inlen = MLX5_ST_SZ_BYTES(modify_tir_in); - in = mlx5_vzalloc(inlen); - if (!in) - return -ENOMEM; - - MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); - - for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) { - err = mlx5_core_modify_tir(priv->mdev, priv->indir_tirn[i], in, - inlen); - if (err) - return err; - } - - for (i = 0; i < priv->params.num_channels; i++) { - err = mlx5_core_modify_tir(priv->mdev, - priv->direct_tir[i].tirn, in, - inlen); - if (err) - return err; - } - - kvfree(in); - - return 0; -} - static int mlx5e_set_mtu(struct mlx5e_priv *priv, u16 mtu) { struct mlx5_core_dev *mdev = priv->mdev; @@ -1775,13 +1787,17 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev) netdev_set_num_tc(netdev, ntc); + /* Map netdev TCs to offset 0 + * We have our own UP to TXQ mapping for QoS + */ for (tc = 0; tc < ntc; tc++) - netdev_set_tc_queue(netdev, tc, nch, tc * nch); + netdev_set_tc_queue(netdev, tc, nch, 0); } int mlx5e_open_locked(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; int num_txqs; int err; @@ -1804,7 +1820,7 @@ int mlx5e_open_locked(struct net_device *netdev) goto err_clear_state_opened_flag; } - err = mlx5e_refresh_tirs_self_loopback_enable(priv); + err = mlx5e_refresh_tirs_self_loopback_enable(priv->mdev); if (err) { netdev_err(netdev, "%s: mlx5e_refresh_tirs_self_loopback_enable failed, %d\n", __func__, err); @@ -1817,9 +1833,14 @@ int mlx5e_open_locked(struct net_device *netdev) #ifdef CONFIG_RFS_ACCEL priv->netdev->rx_cpu_rmap = priv->mdev->rmap; #endif + if (priv->profile->update_stats) + queue_delayed_work(priv->wq, &priv->update_stats_work, 0); - queue_delayed_work(priv->wq, &priv->update_stats_work, 0); - + if (MLX5_CAP_GEN(mdev, vport_group_manager)) { + err = mlx5e_add_sqs_fwd_rules(priv); + if (err) + goto err_close_channels; + } return 0; err_close_channels: @@ -1829,7 +1850,7 @@ err_clear_state_opened_flag: return err; } -static int mlx5e_open(struct net_device *netdev) +int mlx5e_open(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); int err; @@ -1844,6 +1865,7 @@ static int mlx5e_open(struct net_device *netdev) int mlx5e_close_locked(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; /* May already be CLOSED in case a previous configuration operation * (e.g RX/TX queue size change) that involves close&open failed. @@ -1853,6 +1875,9 @@ int mlx5e_close_locked(struct net_device *netdev) clear_bit(MLX5E_STATE_OPENED, &priv->state); + if (MLX5_CAP_GEN(mdev, vport_group_manager)) + mlx5e_remove_sqs_fwd_rules(priv); + mlx5e_timestamp_cleanup(priv); netif_carrier_off(priv->netdev); mlx5e_redirect_rqts(priv); @@ -1861,7 +1886,7 @@ int mlx5e_close_locked(struct net_device *netdev) return 0; } -static int mlx5e_close(struct net_device *netdev) +int mlx5e_close(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); int err; @@ -1920,7 +1945,7 @@ static int mlx5e_create_drop_cq(struct mlx5e_priv *priv, mcq->comp = mlx5e_completion_event; mcq->event = mlx5e_cq_error_event; mcq->irqn = irqn; - mcq->uar = &priv->cq_uar; + mcq->uar = &mdev->mlx5e_res.cq_uar; cq->priv = priv; @@ -1986,7 +2011,7 @@ static int mlx5e_create_tis(struct mlx5e_priv *priv, int tc) memset(in, 0, sizeof(in)); MLX5_SET(tisc, tisc, prio, tc << 1); - MLX5_SET(tisc, tisc, transport_domain, priv->tdn); + MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn); return mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]); } @@ -1996,12 +2021,12 @@ static void mlx5e_destroy_tis(struct mlx5e_priv *priv, int tc) mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]); } -static int mlx5e_create_tises(struct mlx5e_priv *priv) +int mlx5e_create_tises(struct mlx5e_priv *priv) { int err; int tc; - for (tc = 0; tc < MLX5E_MAX_NUM_TC; tc++) { + for (tc = 0; tc < priv->profile->max_tc; tc++) { err = mlx5e_create_tis(priv, tc); if (err) goto err_close_tises; @@ -2016,11 +2041,11 @@ err_close_tises: return err; } -static void mlx5e_destroy_tises(struct mlx5e_priv *priv) +void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) { int tc; - for (tc = 0; tc < MLX5E_MAX_NUM_TC; tc++) + for (tc = 0; tc < priv->profile->max_tc; tc++) mlx5e_destroy_tis(priv, tc); } @@ -2029,7 +2054,7 @@ static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, { void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); - MLX5_SET(tirc, tirc, transport_domain, priv->tdn); + MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); #define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ MLX5_HASH_FIELD_SEL_DST_IP) @@ -2046,7 +2071,7 @@ static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, mlx5e_build_tir_ctx_lro(tirc, priv); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); - MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqtn); + MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); mlx5e_build_tir_ctx_hash(tirc, priv); switch (tt) { @@ -2136,7 +2161,7 @@ static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, u32 rqtn) { - MLX5_SET(tirc, tirc, transport_domain, priv->tdn); + MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); mlx5e_build_tir_ctx_lro(tirc, priv); @@ -2145,15 +2170,13 @@ static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); } -static int mlx5e_create_tirs(struct mlx5e_priv *priv) +static int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) { - int nch = mlx5e_get_max_num_channels(priv->mdev); + struct mlx5e_tir *tir; void *tirc; int inlen; - u32 *tirn; int err; u32 *in; - int ix; int tt; inlen = MLX5_ST_SZ_BYTES(create_tir_in); @@ -2161,25 +2184,51 @@ static int mlx5e_create_tirs(struct mlx5e_priv *priv) if (!in) return -ENOMEM; - /* indirect tirs */ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { memset(in, 0, inlen); - tirn = &priv->indir_tirn[tt]; + tir = &priv->indir_tir[tt]; tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); mlx5e_build_indir_tir_ctx(priv, tirc, tt); - err = mlx5_core_create_tir(priv->mdev, in, inlen, tirn); + err = mlx5e_create_tir(priv->mdev, tir, in, inlen); if (err) goto err_destroy_tirs; } - /* direct tirs */ + kvfree(in); + + return 0; + +err_destroy_tirs: + for (tt--; tt >= 0; tt--) + mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[tt]); + + kvfree(in); + + return err; +} + +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv) +{ + int nch = priv->profile->max_nch(priv->mdev); + struct mlx5e_tir *tir; + void *tirc; + int inlen; + int err; + u32 *in; + int ix; + + inlen = MLX5_ST_SZ_BYTES(create_tir_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + for (ix = 0; ix < nch; ix++) { memset(in, 0, inlen); - tirn = &priv->direct_tir[ix].tirn; + tir = &priv->direct_tir[ix]; tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); mlx5e_build_direct_tir_ctx(priv, tirc, - priv->direct_tir[ix].rqtn); - err = mlx5_core_create_tir(priv->mdev, in, inlen, tirn); + priv->direct_tir[ix].rqt.rqtn); + err = mlx5e_create_tir(priv->mdev, tir, in, inlen); if (err) goto err_destroy_ch_tirs; } @@ -2190,27 +2239,28 @@ static int mlx5e_create_tirs(struct mlx5e_priv *priv) err_destroy_ch_tirs: for (ix--; ix >= 0; ix--) - mlx5_core_destroy_tir(priv->mdev, priv->direct_tir[ix].tirn); - -err_destroy_tirs: - for (tt--; tt >= 0; tt--) - mlx5_core_destroy_tir(priv->mdev, priv->indir_tirn[tt]); + mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[ix]); kvfree(in); return err; } -static void mlx5e_destroy_tirs(struct mlx5e_priv *priv) +static void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv) { - int nch = mlx5e_get_max_num_channels(priv->mdev); int i; - for (i = 0; i < nch; i++) - mlx5_core_destroy_tir(priv->mdev, priv->direct_tir[i].tirn); - for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) - mlx5_core_destroy_tir(priv->mdev, priv->indir_tirn[i]); + mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]); +} + +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv) +{ + int nch = priv->profile->max_nch(priv->mdev); + int i; + + for (i = 0; i < nch; i++) + mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[i]); } int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd) @@ -2284,7 +2334,7 @@ mqprio: return mlx5e_setup_tc(dev, tc->tc); } -static struct rtnl_link_stats64 * +struct rtnl_link_stats64 * mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -2713,6 +2763,29 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb, return features; } +static void mlx5e_tx_timeout(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + bool sched_work = false; + int i; + + netdev_err(dev, "TX timeout detected\n"); + + for (i = 0; i < priv->params.num_channels * priv->params.num_tc; i++) { + struct mlx5e_sq *sq = priv->txq_to_sq_map[i]; + + if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, i))) + continue; + sched_work = true; + set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state); + netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x\n", + i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc); + } + + if (sched_work && test_bit(MLX5E_STATE_OPENED, &priv->state)) + schedule_work(&priv->tx_timeout_work); +} + static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_open = mlx5e_open, .ndo_stop = mlx5e_close, @@ -2731,6 +2804,7 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = { #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif + .ndo_tx_timeout = mlx5e_tx_timeout, }; static const struct net_device_ops mlx5e_netdev_ops_sriov = { @@ -2761,6 +2835,7 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = { .ndo_get_vf_config = mlx5e_get_vf_config, .ndo_set_vf_link_state = mlx5e_set_vf_link_state, .ndo_get_vf_stats = mlx5e_get_vf_stats, + .ndo_tx_timeout = mlx5e_tx_timeout, }; static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) @@ -2892,9 +2967,10 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE; } -static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, - struct net_device *netdev, - int num_channels) +static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) { struct mlx5e_priv *priv = netdev_priv(netdev); u32 link_speed = 0; @@ -2963,7 +3039,7 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, sizeof(priv->params.toeplitz_hash_key)); mlx5e_build_default_indir_rqt(mdev, priv->params.indirection_rqt, - MLX5E_INDIR_RQT_SIZE, num_channels); + MLX5E_INDIR_RQT_SIZE, profile->max_nch(mdev)); priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; @@ -2974,7 +3050,9 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, priv->mdev = mdev; priv->netdev = netdev; - priv->params.num_channels = num_channels; + priv->params.num_channels = profile->max_nch(mdev); + priv->profile = profile; + priv->ppriv = ppriv; #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_ets_init(priv); @@ -2984,6 +3062,7 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work); INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work); + INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work); INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work); } @@ -2999,7 +3078,11 @@ static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) } } -static void mlx5e_build_netdev(struct net_device *netdev) +static const struct switchdev_ops mlx5e_switchdev_ops = { + .switchdev_port_attr_get = mlx5e_attr_get, +}; + +static void mlx5e_build_nic_netdev(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; @@ -3080,31 +3163,11 @@ static void mlx5e_build_netdev(struct net_device *netdev) netdev->priv_flags |= IFF_UNICAST_FLT; mlx5e_set_netdev_dev_addr(netdev); -} -static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn, - struct mlx5_core_mkey *mkey) -{ - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_create_mkey_mbox_in *in; - int err; - - in = mlx5_vzalloc(sizeof(*in)); - if (!in) - return -ENOMEM; - - in->seg.flags = MLX5_PERM_LOCAL_WRITE | - MLX5_PERM_LOCAL_READ | - MLX5_ACCESS_MODE_PA; - in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64); - in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - - err = mlx5_core_create_mkey(mdev, mkey, in, sizeof(*in), NULL, NULL, - NULL); - - kvfree(in); - - return err; +#ifdef CONFIG_NET_SWITCHDEV + if (MLX5_CAP_GEN(mdev, vport_group_manager)) + netdev->switchdev_ops = &mlx5e_switchdev_ops; +#endif } static void mlx5e_create_q_counter(struct mlx5e_priv *priv) @@ -3134,7 +3197,7 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv) struct mlx5_mkey_seg *mkc; int inlen = sizeof(*in); u64 npages = - mlx5e_get_max_num_channels(mdev) * MLX5_CHANNEL_MAX_NUM_MTTS; + priv->profile->max_nch(mdev) * MLX5_CHANNEL_MAX_NUM_MTTS; int err; in = mlx5_vzalloc(inlen); @@ -3149,7 +3212,7 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv) MLX5_ACCESS_MODE_MTT; mkc->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - mkc->flags_pd = cpu_to_be32(priv->pdn); + mkc->flags_pd = cpu_to_be32(mdev->mlx5e_res.pdn); mkc->len = cpu_to_be64(npages << PAGE_SHIFT); mkc->xlt_oct_size = cpu_to_be32(mlx5e_get_mtt_octw(npages)); mkc->log2_page_size = PAGE_SHIFT; @@ -3162,160 +3225,233 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv) return err; } -static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) +static void mlx5e_nic_init(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) { - struct net_device *netdev; - struct mlx5e_priv *priv; - int nch = mlx5e_get_max_num_channels(mdev); - int err; - - if (mlx5e_check_required_hca_cap(mdev)) - return NULL; + struct mlx5e_priv *priv = netdev_priv(netdev); - netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), - nch * MLX5E_MAX_NUM_TC, - nch); - if (!netdev) { - mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n"); - return NULL; - } + mlx5e_build_nic_netdev_priv(mdev, netdev, profile, ppriv); + mlx5e_build_nic_netdev(netdev); + mlx5e_vxlan_init(priv); +} - mlx5e_build_netdev_priv(mdev, netdev, nch); - mlx5e_build_netdev(netdev); +static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; - netif_carrier_off(netdev); + mlx5e_vxlan_cleanup(priv); - priv = netdev_priv(netdev); + if (MLX5_CAP_GEN(mdev, vport_group_manager)) + mlx5_eswitch_unregister_vport_rep(esw, 0); +} - priv->wq = create_singlethread_workqueue("mlx5e"); - if (!priv->wq) - goto err_free_netdev; +static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int err; + int i; - err = mlx5_alloc_map_uar(mdev, &priv->cq_uar, false); + err = mlx5e_create_indirect_rqts(priv); if (err) { - mlx5_core_err(mdev, "alloc_map uar failed, %d\n", err); - goto err_destroy_wq; + mlx5_core_warn(mdev, "create indirect rqts failed, %d\n", err); + return err; } - err = mlx5_core_alloc_pd(mdev, &priv->pdn); + err = mlx5e_create_direct_rqts(priv); if (err) { - mlx5_core_err(mdev, "alloc pd failed, %d\n", err); - goto err_unmap_free_uar; + mlx5_core_warn(mdev, "create direct rqts failed, %d\n", err); + goto err_destroy_indirect_rqts; } - err = mlx5_core_alloc_transport_domain(mdev, &priv->tdn); + err = mlx5e_create_indirect_tirs(priv); if (err) { - mlx5_core_err(mdev, "alloc td failed, %d\n", err); - goto err_dealloc_pd; + mlx5_core_warn(mdev, "create indirect tirs failed, %d\n", err); + goto err_destroy_direct_rqts; } - err = mlx5e_create_mkey(priv, priv->pdn, &priv->mkey); + err = mlx5e_create_direct_tirs(priv); if (err) { - mlx5_core_err(mdev, "create mkey failed, %d\n", err); - goto err_dealloc_transport_domain; + mlx5_core_warn(mdev, "create direct tirs failed, %d\n", err); + goto err_destroy_indirect_tirs; } - err = mlx5e_create_umr_mkey(priv); + err = mlx5e_create_flow_steering(priv); if (err) { - mlx5_core_err(mdev, "create umr mkey failed, %d\n", err); - goto err_destroy_mkey; + mlx5_core_warn(mdev, "create flow steering failed, %d\n", err); + goto err_destroy_direct_tirs; } + err = mlx5e_tc_init(priv); + if (err) + goto err_destroy_flow_steering; + + return 0; + +err_destroy_flow_steering: + mlx5e_destroy_flow_steering(priv); +err_destroy_direct_tirs: + mlx5e_destroy_direct_tirs(priv); +err_destroy_indirect_tirs: + mlx5e_destroy_indirect_tirs(priv); +err_destroy_direct_rqts: + for (i = 0; i < priv->profile->max_nch(mdev); i++) + mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); +err_destroy_indirect_rqts: + mlx5e_destroy_rqt(priv, &priv->indir_rqt); + return err; +} + +static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) +{ + int i; + + mlx5e_tc_cleanup(priv); + mlx5e_destroy_flow_steering(priv); + mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_indirect_tirs(priv); + for (i = 0; i < priv->profile->max_nch(priv->mdev); i++) + mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); + mlx5e_destroy_rqt(priv, &priv->indir_rqt); +} + +static int mlx5e_init_nic_tx(struct mlx5e_priv *priv) +{ + int err; + err = mlx5e_create_tises(priv); if (err) { - mlx5_core_warn(mdev, "create tises failed, %d\n", err); - goto err_destroy_umr_mkey; + mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err); + return err; } - err = mlx5e_open_drop_rq(priv); - if (err) { - mlx5_core_err(mdev, "open drop rq failed, %d\n", err); - goto err_destroy_tises; +#ifdef CONFIG_MLX5_CORE_EN_DCB + mlx5e_dcbnl_ieee_setets_core(priv, &priv->params.ets); +#endif + return 0; +} + +static void mlx5e_nic_enable(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + struct mlx5_eswitch_rep rep; + + if (mlx5e_vxlan_allowed(mdev)) { + rtnl_lock(); + udp_tunnel_get_rx_info(netdev); + rtnl_unlock(); } - err = mlx5e_create_rqts(priv); - if (err) { - mlx5_core_warn(mdev, "create rqts failed, %d\n", err); - goto err_close_drop_rq; + mlx5e_enable_async_events(priv); + queue_work(priv->wq, &priv->set_rx_mode_work); + + if (MLX5_CAP_GEN(mdev, vport_group_manager)) { + rep.load = mlx5e_nic_rep_load; + rep.unload = mlx5e_nic_rep_unload; + rep.vport = 0; + rep.priv_data = priv; + mlx5_eswitch_register_vport_rep(esw, &rep); } +} - err = mlx5e_create_tirs(priv); - if (err) { - mlx5_core_warn(mdev, "create tirs failed, %d\n", err); - goto err_destroy_rqts; +static void mlx5e_nic_disable(struct mlx5e_priv *priv) +{ + queue_work(priv->wq, &priv->set_rx_mode_work); + mlx5e_disable_async_events(priv); +} + +static const struct mlx5e_profile mlx5e_nic_profile = { + .init = mlx5e_nic_init, + .cleanup = mlx5e_nic_cleanup, + .init_rx = mlx5e_init_nic_rx, + .cleanup_rx = mlx5e_cleanup_nic_rx, + .init_tx = mlx5e_init_nic_tx, + .cleanup_tx = mlx5e_cleanup_nic_tx, + .enable = mlx5e_nic_enable, + .disable = mlx5e_nic_disable, + .update_stats = mlx5e_update_stats, + .max_nch = mlx5e_get_max_num_channels, + .max_tc = MLX5E_MAX_NUM_TC, +}; + +void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile, void *ppriv) +{ + struct net_device *netdev; + struct mlx5e_priv *priv; + int nch = profile->max_nch(mdev); + int err; + + netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), + nch * profile->max_tc, + nch); + if (!netdev) { + mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n"); + return NULL; } - err = mlx5e_create_flow_steering(priv); + profile->init(mdev, netdev, profile, ppriv); + + netif_carrier_off(netdev); + + priv = netdev_priv(netdev); + + priv->wq = create_singlethread_workqueue("mlx5e"); + if (!priv->wq) + goto err_free_netdev; + + err = mlx5e_create_umr_mkey(priv); if (err) { - mlx5_core_warn(mdev, "create flow steering failed, %d\n", err); - goto err_destroy_tirs; + mlx5_core_err(mdev, "create umr mkey failed, %d\n", err); + goto err_destroy_wq; } - mlx5e_create_q_counter(priv); - - mlx5e_init_l2_addr(priv); + err = profile->init_tx(priv); + if (err) + goto err_destroy_umr_mkey; - mlx5e_vxlan_init(priv); + err = mlx5e_open_drop_rq(priv); + if (err) { + mlx5_core_err(mdev, "open drop rq failed, %d\n", err); + goto err_cleanup_tx; + } - err = mlx5e_tc_init(priv); + err = profile->init_rx(priv); if (err) - goto err_dealloc_q_counters; + goto err_close_drop_rq; -#ifdef CONFIG_MLX5_CORE_EN_DCB - mlx5e_dcbnl_ieee_setets_core(priv, &priv->params.ets); -#endif + mlx5e_create_q_counter(priv); + + mlx5e_init_l2_addr(priv); err = register_netdev(netdev); if (err) { mlx5_core_err(mdev, "register_netdev failed, %d\n", err); - goto err_tc_cleanup; - } - - if (mlx5e_vxlan_allowed(mdev)) { - rtnl_lock(); - udp_tunnel_get_rx_info(netdev); - rtnl_unlock(); + goto err_dealloc_q_counters; } - mlx5e_enable_async_events(priv); - queue_work(priv->wq, &priv->set_rx_mode_work); + if (profile->enable) + profile->enable(priv); return priv; -err_tc_cleanup: - mlx5e_tc_cleanup(priv); - err_dealloc_q_counters: mlx5e_destroy_q_counter(priv); - mlx5e_destroy_flow_steering(priv); - -err_destroy_tirs: - mlx5e_destroy_tirs(priv); - -err_destroy_rqts: - mlx5e_destroy_rqts(priv); + profile->cleanup_rx(priv); err_close_drop_rq: mlx5e_close_drop_rq(priv); -err_destroy_tises: - mlx5e_destroy_tises(priv); +err_cleanup_tx: + profile->cleanup_tx(priv); err_destroy_umr_mkey: mlx5_core_destroy_mkey(mdev, &priv->umr_mkey); -err_destroy_mkey: - mlx5_core_destroy_mkey(mdev, &priv->mkey); - -err_dealloc_transport_domain: - mlx5_core_dealloc_transport_domain(mdev, priv->tdn); - -err_dealloc_pd: - mlx5_core_dealloc_pd(mdev, priv->pdn); - -err_unmap_free_uar: - mlx5_unmap_free_uar(mdev, &priv->cq_uar); - err_destroy_wq: destroy_workqueue(priv->wq); @@ -3325,15 +3461,59 @@ err_free_netdev: return NULL; } -static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv) +static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev) { - struct mlx5e_priv *priv = vpriv; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vfs = MLX5_TOTAL_VPORTS(mdev); + int vport; + + if (!MLX5_CAP_GEN(mdev, vport_group_manager)) + return; + + for (vport = 1; vport < total_vfs; vport++) { + struct mlx5_eswitch_rep rep; + + rep.load = mlx5e_vport_rep_load; + rep.unload = mlx5e_vport_rep_unload; + rep.vport = vport; + mlx5_eswitch_register_vport_rep(esw, &rep); + } +} + +static void *mlx5e_add(struct mlx5_core_dev *mdev) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + void *ppriv = NULL; + void *ret; + + if (mlx5e_check_required_hca_cap(mdev)) + return NULL; + + if (mlx5e_create_mdev_resources(mdev)) + return NULL; + + mlx5e_register_vport_rep(mdev); + + if (MLX5_CAP_GEN(mdev, vport_group_manager)) + ppriv = &esw->offloads.vport_reps[0]; + + ret = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, ppriv); + if (!ret) { + mlx5e_destroy_mdev_resources(mdev); + return NULL; + } + return ret; +} + +void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv) +{ + const struct mlx5e_profile *profile = priv->profile; struct net_device *netdev = priv->netdev; set_bit(MLX5E_STATE_DESTROYING, &priv->state); + if (profile->disable) + profile->disable(priv); - queue_work(priv->wq, &priv->set_rx_mode_work); - mlx5e_disable_async_events(priv); flush_workqueue(priv->wq); if (test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) { netif_device_detach(netdev); @@ -3342,26 +3522,35 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv) unregister_netdev(netdev); } - mlx5e_tc_cleanup(priv); - mlx5e_vxlan_cleanup(priv); mlx5e_destroy_q_counter(priv); - mlx5e_destroy_flow_steering(priv); - mlx5e_destroy_tirs(priv); - mlx5e_destroy_rqts(priv); + profile->cleanup_rx(priv); mlx5e_close_drop_rq(priv); - mlx5e_destroy_tises(priv); + profile->cleanup_tx(priv); mlx5_core_destroy_mkey(priv->mdev, &priv->umr_mkey); - mlx5_core_destroy_mkey(priv->mdev, &priv->mkey); - mlx5_core_dealloc_transport_domain(priv->mdev, priv->tdn); - mlx5_core_dealloc_pd(priv->mdev, priv->pdn); - mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar); cancel_delayed_work_sync(&priv->update_stats_work); destroy_workqueue(priv->wq); + if (profile->cleanup) + profile->cleanup(priv); if (!test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) free_netdev(netdev); } +static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vfs = MLX5_TOTAL_VPORTS(mdev); + struct mlx5e_priv *priv = vpriv; + int vport; + + mlx5e_destroy_netdev(mdev, priv); + + for (vport = 1; vport < total_vfs; vport++) + mlx5_eswitch_unregister_vport_rep(esw, vport); + + mlx5e_destroy_mdev_resources(mdev); +} + static void *mlx5e_get_netdev(void *vpriv) { struct mlx5e_priv *priv = vpriv; @@ -3370,8 +3559,8 @@ static void *mlx5e_get_netdev(void *vpriv) } static struct mlx5_interface mlx5e_interface = { - .add = mlx5e_create_netdev, - .remove = mlx5e_destroy_netdev, + .add = mlx5e_add, + .remove = mlx5e_remove, .event = mlx5e_async_event, .protocol = MLX5_INTERFACE_PROTOCOL_ETH, .get_dev = mlx5e_get_netdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c new file mode 100644 index 000000000000..5ef02f02a1d5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -0,0 +1,394 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <generated/utsrelease.h> +#include <linux/mlx5/fs.h> +#include <net/switchdev.h> + +#include "eswitch.h" +#include "en.h" + +static const char mlx5e_rep_driver_name[] = "mlx5e_rep"; + +static void mlx5e_rep_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + strlcpy(drvinfo->driver, mlx5e_rep_driver_name, + sizeof(drvinfo->driver)); + strlcpy(drvinfo->version, UTS_RELEASE, sizeof(drvinfo->version)); +} + +static const struct counter_desc sw_rep_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) }, +}; + +#define NUM_VPORT_REP_COUNTERS ARRAY_SIZE(sw_rep_stats_desc) + +static void mlx5e_rep_get_strings(struct net_device *dev, + u32 stringset, uint8_t *data) +{ + int i; + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < NUM_VPORT_REP_COUNTERS; i++) + strcpy(data + (i * ETH_GSTRING_LEN), + sw_rep_stats_desc[i].format); + break; + } +} + +static void mlx5e_update_sw_rep_counters(struct mlx5e_priv *priv) +{ + struct mlx5e_sw_stats *s = &priv->stats.sw; + struct mlx5e_rq_stats *rq_stats; + struct mlx5e_sq_stats *sq_stats; + int i, j; + + memset(s, 0, sizeof(*s)); + for (i = 0; i < priv->params.num_channels; i++) { + rq_stats = &priv->channel[i]->rq.stats; + + s->rx_packets += rq_stats->packets; + s->rx_bytes += rq_stats->bytes; + + for (j = 0; j < priv->params.num_tc; j++) { + sq_stats = &priv->channel[i]->sq[j].stats; + + s->tx_packets += sq_stats->packets; + s->tx_bytes += sq_stats->bytes; + } + } +} + +static void mlx5e_rep_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int i; + + if (!data) + return; + + mutex_lock(&priv->state_lock); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_update_sw_rep_counters(priv); + mutex_unlock(&priv->state_lock); + + for (i = 0; i < NUM_VPORT_REP_COUNTERS; i++) + data[i] = MLX5E_READ_CTR64_CPU(&priv->stats.sw, + sw_rep_stats_desc, i); +} + +static int mlx5e_rep_get_sset_count(struct net_device *dev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return NUM_VPORT_REP_COUNTERS; + default: + return -EOPNOTSUPP; + } +} + +static const struct ethtool_ops mlx5e_rep_ethtool_ops = { + .get_drvinfo = mlx5e_rep_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_strings = mlx5e_rep_get_strings, + .get_sset_count = mlx5e_rep_get_sset_count, + .get_ethtool_stats = mlx5e_rep_get_ethtool_stats, +}; + +int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + u8 mac[ETH_ALEN]; + + if (esw->mode == SRIOV_NONE) + return -EOPNOTSUPP; + + switch (attr->id) { + case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: + mlx5_query_nic_vport_mac_address(priv->mdev, 0, mac); + attr->u.ppid.id_len = ETH_ALEN; + memcpy(&attr->u.ppid.id, &mac, ETH_ALEN); + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) + +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_channel *c; + int n, tc, err, num_sqs = 0; + u16 *sqs; + + sqs = kcalloc(priv->params.num_channels * priv->params.num_tc, sizeof(u16), GFP_KERNEL); + if (!sqs) + return -ENOMEM; + + for (n = 0; n < priv->params.num_channels; n++) { + c = priv->channel[n]; + for (tc = 0; tc < c->num_tc; tc++) + sqs[num_sqs++] = c->sq[tc].sqn; + } + + err = mlx5_eswitch_sqs2vport_start(esw, rep, sqs, num_sqs); + + kfree(sqs); + return err; +} + +int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_priv *priv = rep->priv_data; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + return mlx5e_add_sqs_fwd_rules(priv); + return 0; +} + +void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_eswitch_rep *rep = priv->ppriv; + + mlx5_eswitch_sqs2vport_stop(esw, rep); +} + +void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_priv *priv = rep->priv_data; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_remove_sqs_fwd_rules(priv); +} + +static int mlx5e_rep_get_phys_port_name(struct net_device *dev, + char *buf, size_t len) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_eswitch_rep *rep = priv->ppriv; + int ret; + + ret = snprintf(buf, len, "%d", rep->vport - 1); + if (ret >= len) + return -EOPNOTSUPP; + + return 0; +} + +static const struct switchdev_ops mlx5e_rep_switchdev_ops = { + .switchdev_port_attr_get = mlx5e_attr_get, +}; + +static const struct net_device_ops mlx5e_netdev_ops_rep = { + .ndo_open = mlx5e_open, + .ndo_stop = mlx5e_close, + .ndo_start_xmit = mlx5e_xmit, + .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, + .ndo_get_stats64 = mlx5e_get_stats, +}; + +static void mlx5e_build_rep_netdev_priv(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? + MLX5_CQ_PERIOD_MODE_START_FROM_CQE : + MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + + priv->params.log_sq_size = + MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; + priv->params.rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST; + priv->params.log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; + + priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, + BIT(priv->params.log_rq_size)); + + priv->params.rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); + mlx5e_set_rx_cq_mode_params(&priv->params, cq_period_mode); + + priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev); + priv->params.num_tc = 1; + + priv->params.lro_wqe_sz = + MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; + + priv->mdev = mdev; + priv->netdev = netdev; + priv->params.num_channels = profile->max_nch(mdev); + priv->profile = profile; + priv->ppriv = ppriv; + + mutex_init(&priv->state_lock); + + INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work); +} + +static void mlx5e_build_rep_netdev(struct net_device *netdev) +{ + netdev->netdev_ops = &mlx5e_netdev_ops_rep; + + netdev->watchdog_timeo = 15 * HZ; + + netdev->ethtool_ops = &mlx5e_rep_ethtool_ops; + +#ifdef CONFIG_NET_SWITCHDEV + netdev->switchdev_ops = &mlx5e_rep_switchdev_ops; +#endif + + netdev->features |= NETIF_F_VLAN_CHALLENGED; + + eth_hw_addr_random(netdev); +} + +static void mlx5e_init_rep(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) +{ + mlx5e_build_rep_netdev_priv(mdev, netdev, profile, ppriv); + mlx5e_build_rep_netdev(netdev); +} + +static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_flow_rule *flow_rule; + int err; + int i; + + err = mlx5e_create_direct_rqts(priv); + if (err) { + mlx5_core_warn(mdev, "create direct rqts failed, %d\n", err); + return err; + } + + err = mlx5e_create_direct_tirs(priv); + if (err) { + mlx5_core_warn(mdev, "create direct tirs failed, %d\n", err); + goto err_destroy_direct_rqts; + } + + flow_rule = mlx5_eswitch_create_vport_rx_rule(esw, + rep->vport, + priv->direct_tir[0].tirn); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + goto err_destroy_direct_tirs; + } + rep->vport_rx_rule = flow_rule; + + return 0; + +err_destroy_direct_tirs: + mlx5e_destroy_direct_tirs(priv); +err_destroy_direct_rqts: + for (i = 0; i < priv->params.num_channels; i++) + mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); + return err; +} + +static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch_rep *rep = priv->ppriv; + int i; + + mlx5_del_flow_rule(rep->vport_rx_rule); + mlx5e_destroy_direct_tirs(priv); + for (i = 0; i < priv->params.num_channels; i++) + mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); +} + +static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) +{ + int err; + + err = mlx5e_create_tises(priv); + if (err) { + mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err); + return err; + } + return 0; +} + +static int mlx5e_get_rep_max_num_channels(struct mlx5_core_dev *mdev) +{ +#define MLX5E_PORT_REPRESENTOR_NCH 1 + return MLX5E_PORT_REPRESENTOR_NCH; +} + +static struct mlx5e_profile mlx5e_rep_profile = { + .init = mlx5e_init_rep, + .init_rx = mlx5e_init_rep_rx, + .cleanup_rx = mlx5e_cleanup_rep_rx, + .init_tx = mlx5e_init_rep_tx, + .cleanup_tx = mlx5e_cleanup_nic_tx, + .update_stats = mlx5e_update_sw_rep_counters, + .max_nch = mlx5e_get_rep_max_num_channels, + .max_tc = 1, +}; + +int mlx5e_vport_rep_load(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep) +{ + rep->priv_data = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep); + if (!rep->priv_data) { + pr_warn("Failed to create representor for vport %d\n", + rep->vport); + return -EINVAL; + } + return 0; +} + +void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_priv *priv = rep->priv_data; + + mlx5e_destroy_netdev(esw->dev, priv); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 022acc2e8922..9f2a16a507e0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -212,6 +212,20 @@ err_free_skb: return -ENOMEM; } +void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) +{ + struct sk_buff *skb = rq->skb[ix]; + + if (skb) { + rq->skb[ix] = NULL; + dma_unmap_single(rq->pdev, + *((dma_addr_t *)skb->cb), + rq->wqe_sz, + DMA_FROM_DEVICE); + dev_kfree_skb(skb); + } +} + static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq) { return rq->mpwqe_num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER; @@ -574,6 +588,30 @@ int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) return 0; } +void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) +{ + struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + + wi->free_wqe(rq, wi); +} + +void mlx5e_free_rx_descs(struct mlx5e_rq *rq) +{ + struct mlx5_wq_ll *wq = &rq->wq; + struct mlx5e_rx_wqe *wqe; + __be16 wqe_ix_be; + u16 wqe_ix; + + while (!mlx5_wq_ll_is_empty(wq)) { + wqe_ix_be = *wq->tail_next; + wqe_ix = be16_to_cpu(wqe_ix_be); + wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_ix); + rq->dealloc_wqe(rq, wqe_ix); + mlx5_wq_ll_pop(&rq->wq, wqe_ix_be, + &wqe->next.next_wqe_index); + } +} + #define RQ_CANNOT_POST(rq) \ (!test_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state) || \ test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) @@ -878,6 +916,9 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); int work_done = 0; + if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, &rq->state))) + return 0; + if (cq->decmprs_left) work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index fcd490cc5610..7b9d8a989b52 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -151,6 +151,22 @@ static const struct counter_desc vport_stats_desc[] = { VPORT_COUNTER_OFF(transmitted_eth_broadcast.packets) }, { "tx_vport_broadcast_bytes", VPORT_COUNTER_OFF(transmitted_eth_broadcast.octets) }, + { "rx_vport_rdma_unicast_packets", + VPORT_COUNTER_OFF(received_ib_unicast.packets) }, + { "rx_vport_rdma_unicast_bytes", + VPORT_COUNTER_OFF(received_ib_unicast.octets) }, + { "tx_vport_rdma_unicast_packets", + VPORT_COUNTER_OFF(transmitted_ib_unicast.packets) }, + { "tx_vport_rdma_unicast_bytes", + VPORT_COUNTER_OFF(transmitted_ib_unicast.octets) }, + { "rx_vport_rdma_multicast_packets", + VPORT_COUNTER_OFF(received_ib_multicast.packets) }, + { "rx_vport_rdma_multicast_bytes", + VPORT_COUNTER_OFF(received_ib_multicast.octets) }, + { "tx_vport_rdma_multicast_packets", + VPORT_COUNTER_OFF(transmitted_ib_multicast.packets) }, + { "tx_vport_rdma_multicast_bytes", + VPORT_COUNTER_OFF(transmitted_ib_multicast.octets) }, }; #define PPORT_802_3_OFF(c) \ @@ -238,11 +254,12 @@ static const struct counter_desc pport_per_prio_traffic_stats_desc[] = { }; static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { - { "rx_prio%d_pause", PPORT_PER_PRIO_OFF(rx_pause) }, - { "rx_prio%d_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) }, - { "tx_prio%d_pause", PPORT_PER_PRIO_OFF(tx_pause) }, - { "tx_prio%d_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) }, - { "rx_prio%d_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, + /* %s is "global" or "prio{i}" */ + { "rx_%s_pause", PPORT_PER_PRIO_OFF(rx_pause) }, + { "rx_%s_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) }, + { "tx_%s_pause", PPORT_PER_PRIO_OFF(tx_pause) }, + { "tx_%s_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) }, + { "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, }; struct mlx5e_rq_stats { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 704c3d30493e..3261e8b1286e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -50,7 +50,7 @@ struct mlx5e_tc_flow { #define MLX5E_TC_TABLE_NUM_GROUPS 4 static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv, - u32 *match_c, u32 *match_v, + struct mlx5_flow_spec *spec, u32 action, u32 flow_tag) { struct mlx5_core_dev *dev = priv->mdev; @@ -88,8 +88,8 @@ static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv, table_created = true; } - rule = mlx5_add_flow_rule(priv->fs.tc.t, MLX5_MATCH_OUTER_HEADERS, - match_c, match_v, + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + rule = mlx5_add_flow_rule(priv->fs.tc.t, spec, action, flow_tag, &dest); @@ -126,12 +126,13 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, } } -static int parse_cls_flower(struct mlx5e_priv *priv, - u32 *match_c, u32 *match_v, +static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct tc_cls_flower_offload *f) { - void *headers_c = MLX5_ADDR_OF(fte_match_param, match_c, outer_headers); - void *headers_v = MLX5_ADDR_OF(fte_match_param, match_v, outer_headers); + void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers); + void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + outer_headers); u16 addr_type = 0; u8 ip_proto = 0; @@ -342,12 +343,11 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, struct tc_cls_flower_offload *f) { struct mlx5e_tc_table *tc = &priv->fs.tc; - u32 *match_c; - u32 *match_v; int err = 0; u32 flow_tag; u32 action; struct mlx5e_tc_flow *flow; + struct mlx5_flow_spec *spec; struct mlx5_flow_rule *old = NULL; flow = rhashtable_lookup_fast(&tc->ht, &f->cookie, @@ -357,16 +357,15 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, else flow = kzalloc(sizeof(*flow), GFP_KERNEL); - match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); - match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); - if (!match_c || !match_v || !flow) { + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec || !flow) { err = -ENOMEM; goto err_free; } flow->cookie = f->cookie; - err = parse_cls_flower(priv, match_c, match_v, f); + err = parse_cls_flower(priv, spec, f); if (err < 0) goto err_free; @@ -379,8 +378,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, if (err) goto err_free; - flow->rule = mlx5e_tc_add_flow(priv, match_c, match_v, action, - flow_tag); + flow->rule = mlx5e_tc_add_flow(priv, spec, action, flow_tag); if (IS_ERR(flow->rule)) { err = PTR_ERR(flow->rule); goto err_hash_del; @@ -398,8 +396,7 @@ err_free: if (!old) kfree(flow); out: - kfree(match_c); - kfree(match_v); + kvfree(spec); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 5a750b9cd006..5740b465ef84 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -110,8 +110,20 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, { struct mlx5e_priv *priv = netdev_priv(dev); int channel_ix = fallback(dev, skb); - int up = (netdev_get_num_tc(dev) && skb_vlan_tag_present(skb)) ? - skb->vlan_tci >> VLAN_PRIO_SHIFT : 0; + int up = 0; + + if (!netdev_get_num_tc(dev)) + return channel_ix; + + if (skb_vlan_tag_present(skb)) + up = skb->vlan_tci >> VLAN_PRIO_SHIFT; + + /* channel_ix can be larger than num_channels since + * dev->num_real_tx_queues = num_channels * num_tc + */ + if (channel_ix >= priv->params.num_channels) + channel_ix = reciprocal_scale(channel_ix, + priv->params.num_channels); return priv->channeltc_to_txq_map[channel_ix][up]; } @@ -123,7 +135,7 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, * headers and occur before the data gather. * Therefore these headers must be copied into the WQE */ -#define MLX5E_MIN_INLINE ETH_HLEN +#define MLX5E_MIN_INLINE (ETH_HLEN + VLAN_HLEN) if (bf) { u16 ihs = skb_headlen(skb); @@ -135,7 +147,7 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, return skb_headlen(skb); } - return MLX5E_MIN_INLINE; + return max(skb_network_offset(skb), MLX5E_MIN_INLINE); } static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data, @@ -341,6 +353,35 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) return mlx5e_sq_xmit(sq, skb); } +void mlx5e_free_tx_descs(struct mlx5e_sq *sq) +{ + struct mlx5e_tx_wqe_info *wi; + struct sk_buff *skb; + u16 ci; + int i; + + while (sq->cc != sq->pc) { + ci = sq->cc & sq->wq.sz_m1; + skb = sq->skb[ci]; + wi = &sq->wqe_info[ci]; + + if (!skb) { /* nop */ + sq->cc++; + continue; + } + + for (i = 0; i < wi->num_dma; i++) { + struct mlx5e_sq_dma *dma = + mlx5e_dma_get(sq, sq->dma_fifo_cc++); + + mlx5e_tx_dma_unmap(sq->pdev, dma); + } + + dev_kfree_skb_any(skb); + sq->cc += wi->num_wqebbs; + } +} + bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) { struct mlx5e_sq *sq; @@ -352,6 +393,9 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) sq = container_of(cq, struct mlx5e_sq, cq); + if (unlikely(test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state))) + return false; + npkts = 0; nbytes = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index aebbd6ccb9fe..f6d667797ee1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -40,17 +40,6 @@ #define UPLINK_VPORT 0xFFFF -#define MLX5_DEBUG_ESWITCH_MASK BIT(3) - -#define esw_info(dev, format, ...) \ - pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) - -#define esw_warn(dev, format, ...) \ - pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) - -#define esw_debug(dev, format, ...) \ - mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) - enum { MLX5_ACTION_NONE = 0, MLX5_ACTION_ADD = 1, @@ -92,6 +81,9 @@ enum { MC_ADDR_CHANGE | \ PROMISC_CHANGE) +int esw_offloads_init(struct mlx5_eswitch *esw, int nvports); +void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); + static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, u32 events_mask) { @@ -337,25 +329,23 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, MLX5_MATCH_OUTER_HEADERS); struct mlx5_flow_rule *flow_rule = NULL; struct mlx5_flow_destination dest; + struct mlx5_flow_spec *spec; void *mv_misc = NULL; void *mc_misc = NULL; u8 *dmac_v = NULL; u8 *dmac_c = NULL; - u32 *match_v; - u32 *match_c; if (rx_rule) match_header |= MLX5_MATCH_MISC_PARAMETERS; - match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); - match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); - if (!match_v || !match_c) { + + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { pr_warn("FDB: Failed to alloc match parameters\n"); - goto out; + return NULL; } - - dmac_v = MLX5_ADDR_OF(fte_match_param, match_v, + dmac_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.dmac_47_16); - dmac_c = MLX5_ADDR_OF(fte_match_param, match_c, + dmac_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers.dmac_47_16); if (match_header & MLX5_MATCH_OUTER_HEADERS) { @@ -364,8 +354,10 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, } if (match_header & MLX5_MATCH_MISC_PARAMETERS) { - mv_misc = MLX5_ADDR_OF(fte_match_param, match_v, misc_parameters); - mc_misc = MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters); + mv_misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + mc_misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); MLX5_SET(fte_match_set_misc, mv_misc, source_port, UPLINK_VPORT); MLX5_SET_TO_ONES(fte_match_set_misc, mc_misc, source_port); } @@ -376,11 +368,9 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, esw_debug(esw->dev, "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n", dmac_v, dmac_c, vport); + spec->match_criteria_enable = match_header; flow_rule = - mlx5_add_flow_rule(esw->fdb_table.fdb, - match_header, - match_c, - match_v, + mlx5_add_flow_rule(esw->fdb_table.fdb, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, 0, &dest); if (IS_ERR(flow_rule)) { @@ -389,9 +379,8 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, dmac_v, dmac_c, vport, PTR_ERR(flow_rule)); flow_rule = NULL; } -out: - kfree(match_v); - kfree(match_c); + + kvfree(spec); return flow_rule; } @@ -428,7 +417,7 @@ esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport) return __esw_fdb_set_vport_rule(esw, vport, true, mac_c, mac_v); } -static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) +static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw, int nvports) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_core_dev *dev = esw->dev; @@ -479,7 +468,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) esw_warn(dev, "Failed to create flow group err(%d)\n", err); goto out; } - esw->fdb_table.addr_grp = g; + esw->fdb_table.legacy.addr_grp = g; /* Allmulti group : One rule that forwards any mcast traffic */ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, @@ -494,7 +483,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", err); goto out; } - esw->fdb_table.allmulti_grp = g; + esw->fdb_table.legacy.allmulti_grp = g; /* Promiscuous group : * One rule that forward all unmatched traffic from previous groups @@ -511,17 +500,17 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) esw_warn(dev, "Failed to create promisc flow group err(%d)\n", err); goto out; } - esw->fdb_table.promisc_grp = g; + esw->fdb_table.legacy.promisc_grp = g; out: if (err) { - if (!IS_ERR_OR_NULL(esw->fdb_table.allmulti_grp)) { - mlx5_destroy_flow_group(esw->fdb_table.allmulti_grp); - esw->fdb_table.allmulti_grp = NULL; + if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.allmulti_grp)) { + mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp); + esw->fdb_table.legacy.allmulti_grp = NULL; } - if (!IS_ERR_OR_NULL(esw->fdb_table.addr_grp)) { - mlx5_destroy_flow_group(esw->fdb_table.addr_grp); - esw->fdb_table.addr_grp = NULL; + if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.addr_grp)) { + mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp); + esw->fdb_table.legacy.addr_grp = NULL; } if (!IS_ERR_OR_NULL(esw->fdb_table.fdb)) { mlx5_destroy_flow_table(esw->fdb_table.fdb); @@ -533,20 +522,20 @@ out: return err; } -static void esw_destroy_fdb_table(struct mlx5_eswitch *esw) +static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw) { if (!esw->fdb_table.fdb) return; esw_debug(esw->dev, "Destroy FDB Table\n"); - mlx5_destroy_flow_group(esw->fdb_table.promisc_grp); - mlx5_destroy_flow_group(esw->fdb_table.allmulti_grp); - mlx5_destroy_flow_group(esw->fdb_table.addr_grp); + mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp); + mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp); + mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp); mlx5_destroy_flow_table(esw->fdb_table.fdb); esw->fdb_table.fdb = NULL; - esw->fdb_table.addr_grp = NULL; - esw->fdb_table.allmulti_grp = NULL; - esw->fdb_table.promisc_grp = NULL; + esw->fdb_table.legacy.addr_grp = NULL; + esw->fdb_table.legacy.allmulti_grp = NULL; + esw->fdb_table.legacy.promisc_grp = NULL; } /* E-Switch vport UC/MC lists management */ @@ -578,7 +567,8 @@ static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) if (err) goto abort; - if (esw->fdb_table.fdb) /* SRIOV is enabled: Forward UC MAC to vport */ + /* SRIOV is enabled: Forward UC MAC to vport */ + if (esw->fdb_table.fdb && esw->mode == SRIOV_LEGACY) vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM index:%d fr(%p)\n", @@ -1300,9 +1290,8 @@ static void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, static int esw_vport_ingress_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { + struct mlx5_flow_spec *spec; u8 smac[ETH_ALEN]; - u32 *match_v; - u32 *match_c; int err = 0; u8 *smac_v; @@ -1336,9 +1325,8 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n", vport->vport, vport->vlan, vport->qos); - match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); - match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); - if (!match_v || !match_c) { + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { err = -ENOMEM; esw_warn(esw->dev, "vport[%d] configure ingress rules failed, err(%d)\n", vport->vport, err); @@ -1346,22 +1334,20 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, } if (vport->vlan || vport->qos) - MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag); if (vport->spoofchk) { - MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.smac_47_16); - MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.smac_15_0); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0); smac_v = MLX5_ADDR_OF(fte_match_param, - match_v, + spec->match_value, outer_headers.smac_47_16); ether_addr_copy(smac_v, smac); } + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; vport->ingress.allow_rule = - mlx5_add_flow_rule(vport->ingress.acl, - MLX5_MATCH_OUTER_HEADERS, - match_c, - match_v, + mlx5_add_flow_rule(vport->ingress.acl, spec, MLX5_FLOW_CONTEXT_ACTION_ALLOW, 0, NULL); if (IS_ERR(vport->ingress.allow_rule)) { @@ -1372,13 +1358,9 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, goto out; } - memset(match_c, 0, MLX5_ST_SZ_BYTES(fte_match_param)); - memset(match_v, 0, MLX5_ST_SZ_BYTES(fte_match_param)); + memset(spec, 0, sizeof(*spec)); vport->ingress.drop_rule = - mlx5_add_flow_rule(vport->ingress.acl, - 0, - match_c, - match_v, + mlx5_add_flow_rule(vport->ingress.acl, spec, MLX5_FLOW_CONTEXT_ACTION_DROP, 0, NULL); if (IS_ERR(vport->ingress.drop_rule)) { @@ -1392,17 +1374,14 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, out: if (err) esw_vport_cleanup_ingress_rules(esw, vport); - - kfree(match_v); - kfree(match_c); + kvfree(spec); return err; } static int esw_vport_egress_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - u32 *match_v; - u32 *match_c; + struct mlx5_flow_spec *spec; int err = 0; esw_vport_cleanup_egress_rules(esw, vport); @@ -1418,9 +1397,8 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, "vport[%d] configure egress rules, vlan(%d) qos(%d)\n", vport->vport, vport->vlan, vport->qos); - match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); - match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); - if (!match_v || !match_c) { + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { err = -ENOMEM; esw_warn(esw->dev, "vport[%d] configure egress rules failed, err(%d)\n", vport->vport, err); @@ -1428,16 +1406,14 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, } /* Allowed vlan rule */ - MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.vlan_tag); - MLX5_SET_TO_ONES(fte_match_param, match_v, outer_headers.vlan_tag); - MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.first_vid); - MLX5_SET(fte_match_param, match_v, outer_headers.first_vid, vport->vlan); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->vlan); + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; vport->egress.allowed_vlan = - mlx5_add_flow_rule(vport->egress.acl, - MLX5_MATCH_OUTER_HEADERS, - match_c, - match_v, + mlx5_add_flow_rule(vport->egress.acl, spec, MLX5_FLOW_CONTEXT_ACTION_ALLOW, 0, NULL); if (IS_ERR(vport->egress.allowed_vlan)) { @@ -1449,13 +1425,9 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, } /* Drop others rule (star rule) */ - memset(match_c, 0, MLX5_ST_SZ_BYTES(fte_match_param)); - memset(match_v, 0, MLX5_ST_SZ_BYTES(fte_match_param)); + memset(spec, 0, sizeof(*spec)); vport->egress.drop_rule = - mlx5_add_flow_rule(vport->egress.acl, - 0, - match_c, - match_v, + mlx5_add_flow_rule(vport->egress.acl, spec, MLX5_FLOW_CONTEXT_ACTION_DROP, 0, NULL); if (IS_ERR(vport->egress.drop_rule)) { @@ -1465,8 +1437,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, vport->egress.drop_rule = NULL; } out: - kfree(match_v); - kfree(match_c); + kvfree(spec); return err; } @@ -1540,10 +1511,10 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) } /* Public E-Switch API */ -int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs) +int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { int err; - int i; + int i, enabled_events; if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) @@ -1561,16 +1532,20 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs) if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support)) esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n"); - esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d)\n", nvfs); - + esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode); + esw->mode = mode; esw_disable_vport(esw, 0); - err = esw_create_fdb_table(esw, nvfs + 1); + if (mode == SRIOV_LEGACY) + err = esw_create_legacy_fdb_table(esw, nvfs + 1); + else + err = esw_offloads_init(esw, nvfs + 1); if (err) goto abort; + enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : UC_ADDR_CHANGE; for (i = 0; i <= nvfs; i++) - esw_enable_vport(esw, i, SRIOV_VPORT_EVENTS); + esw_enable_vport(esw, i, enabled_events); esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n", esw->enabled_vports); @@ -1584,16 +1559,18 @@ abort: void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) { struct esw_mc_addr *mc_promisc; + int nvports; int i; if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) return; - esw_info(esw->dev, "disable SRIOV: active vports(%d)\n", - esw->enabled_vports); + esw_info(esw->dev, "disable SRIOV: active vports(%d) mode(%d)\n", + esw->enabled_vports, esw->mode); mc_promisc = esw->mc_promisc; + nvports = esw->enabled_vports; for (i = 0; i < esw->total_vports; i++) esw_disable_vport(esw, i); @@ -1601,8 +1578,12 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) if (mc_promisc && mc_promisc->uplink_rule) mlx5_del_flow_rule(mc_promisc->uplink_rule); - esw_destroy_fdb_table(esw); + if (esw->mode == SRIOV_LEGACY) + esw_destroy_legacy_fdb_table(esw); + else if (esw->mode == SRIOV_OFFLOADS) + esw_offloads_cleanup(esw, nvports); + esw->mode = SRIOV_NONE; /* VPORT 0 (PF) must be enabled back with non-sriov configuration */ esw_enable_vport(esw, 0, UC_ADDR_CHANGE); } @@ -1660,6 +1641,14 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) goto abort; } + esw->offloads.vport_reps = + kzalloc(total_vports * sizeof(struct mlx5_eswitch_rep), + GFP_KERNEL); + if (!esw->offloads.vport_reps) { + err = -ENOMEM; + goto abort; + } + mutex_init(&esw->state_lock); for (vport_num = 0; vport_num < total_vports; vport_num++) { @@ -1673,6 +1662,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw->total_vports = total_vports; esw->enabled_vports = 0; + esw->mode = SRIOV_NONE; dev->priv.eswitch = esw; esw_enable_vport(esw, 0, UC_ADDR_CHANGE); @@ -1683,6 +1673,7 @@ abort: destroy_workqueue(esw->work_queue); kfree(esw->l2_table.bitmap); kfree(esw->vports); + kfree(esw->offloads.vport_reps); kfree(esw); return err; } @@ -1700,6 +1691,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) destroy_workqueue(esw->work_queue); kfree(esw->l2_table.bitmap); kfree(esw->mc_promisc); + kfree(esw->offloads.vport_reps); kfree(esw->vports); kfree(esw); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index fd6800256d4a..7b45e6a6efb8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -35,6 +35,7 @@ #include <linux/if_ether.h> #include <linux/if_link.h> +#include <net/devlink.h> #include <linux/mlx5/device.h> #define MLX5_MAX_UC_PER_VPORT(dev) \ @@ -46,6 +47,8 @@ #define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE)) #define MLX5_L2_ADDR_HASH(addr) (addr[5]) +#define FDB_UPLINK_VPORT 0xffff + /* L2 -mac address based- hash helpers */ struct l2addr_node { struct hlist_node hlist; @@ -134,9 +137,48 @@ struct mlx5_l2_table { struct mlx5_eswitch_fdb { void *fdb; - struct mlx5_flow_group *addr_grp; - struct mlx5_flow_group *allmulti_grp; - struct mlx5_flow_group *promisc_grp; + union { + struct legacy_fdb { + struct mlx5_flow_group *addr_grp; + struct mlx5_flow_group *allmulti_grp; + struct mlx5_flow_group *promisc_grp; + } legacy; + + struct offloads_fdb { + struct mlx5_flow_group *send_to_vport_grp; + struct mlx5_flow_group *miss_grp; + struct mlx5_flow_rule *miss_rule; + } offloads; + }; +}; + +enum { + SRIOV_NONE, + SRIOV_LEGACY, + SRIOV_OFFLOADS +}; + +struct mlx5_esw_sq { + struct mlx5_flow_rule *send_to_vport_rule; + struct list_head list; +}; + +struct mlx5_eswitch_rep { + int (*load)(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); + void (*unload)(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); + u16 vport; + struct mlx5_flow_rule *vport_rx_rule; + void *priv_data; + struct list_head vport_sqs_list; + bool valid; +}; + +struct mlx5_esw_offload { + struct mlx5_flow_table *ft_offloads; + struct mlx5_flow_group *vport_rx_group; + struct mlx5_eswitch_rep *vport_reps; }; struct mlx5_eswitch { @@ -153,13 +195,15 @@ struct mlx5_eswitch { */ struct mutex state_lock; struct esw_mc_addr *mc_promisc; + struct mlx5_esw_offload offloads; + int mode; }; /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe); -int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs); +int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode); void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, int vport, u8 mac[ETH_ALEN]); @@ -177,4 +221,30 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, int vport, struct ifla_vf_stats *vf_stats); +struct mlx5_flow_rule * +mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn); + +int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, + u16 *sqns_array, int sqns_num); +void mlx5_eswitch_sqs2vport_stop(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); + +int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode); +int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode); +void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); +void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, + int vport); + +#define MLX5_DEBUG_ESWITCH_MASK BIT(3) + +#define esw_info(dev, format, ...) \ + pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) + +#define esw_warn(dev, format, ...) \ + pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) + +#define esw_debug(dev, format, ...) \ + mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c new file mode 100644 index 000000000000..1842dfb4636b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -0,0 +1,561 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/etherdevice.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/mlx5_ifc.h> +#include <linux/mlx5/vport.h> +#include <linux/mlx5/fs.h> +#include "mlx5_core.h" +#include "eswitch.h" + +static struct mlx5_flow_rule * +mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn) +{ + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *flow_rule; + struct mlx5_flow_spec *spec; + void *misc; + + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { + esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n"); + flow_rule = ERR_PTR(-ENOMEM); + goto out; + } + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn); + MLX5_SET(fte_match_set_misc, misc, source_port, 0x0); /* source vport is 0 */ + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport_num = vport; + + flow_rule = mlx5_add_flow_rule(esw->fdb_table.fdb, spec, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + 0, &dest); + if (IS_ERR(flow_rule)) + esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule)); +out: + kvfree(spec); + return flow_rule; +} + +void mlx5_eswitch_sqs2vport_stop(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep) +{ + struct mlx5_esw_sq *esw_sq, *tmp; + + if (esw->mode != SRIOV_OFFLOADS) + return; + + list_for_each_entry_safe(esw_sq, tmp, &rep->vport_sqs_list, list) { + mlx5_del_flow_rule(esw_sq->send_to_vport_rule); + list_del(&esw_sq->list); + kfree(esw_sq); + } +} + +int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, + u16 *sqns_array, int sqns_num) +{ + struct mlx5_flow_rule *flow_rule; + struct mlx5_esw_sq *esw_sq; + int vport; + int err; + int i; + + if (esw->mode != SRIOV_OFFLOADS) + return 0; + + vport = rep->vport == 0 ? + FDB_UPLINK_VPORT : rep->vport; + + for (i = 0; i < sqns_num; i++) { + esw_sq = kzalloc(sizeof(*esw_sq), GFP_KERNEL); + if (!esw_sq) { + err = -ENOMEM; + goto out_err; + } + + /* Add re-inject rule to the PF/representor sqs */ + flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, + vport, + sqns_array[i]); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + kfree(esw_sq); + goto out_err; + } + esw_sq->send_to_vport_rule = flow_rule; + list_add(&esw_sq->list, &rep->vport_sqs_list); + } + return 0; + +out_err: + mlx5_eswitch_sqs2vport_stop(esw, rep); + return err; +} + +static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *flow_rule = NULL; + struct mlx5_flow_spec *spec; + int err = 0; + + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { + esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n"); + err = -ENOMEM; + goto out; + } + + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport_num = 0; + + flow_rule = mlx5_add_flow_rule(esw->fdb_table.fdb, spec, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + 0, &dest); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + esw_warn(esw->dev, "FDB: Failed to add miss flow rule err %d\n", err); + goto out; + } + + esw->fdb_table.offloads.miss_rule = flow_rule; +out: + kvfree(spec); + return err; +} + +#define MAX_PF_SQ 256 + +static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *fdb = NULL; + struct mlx5_flow_group *g; + u32 *flow_group_in; + void *match_criteria; + int table_size, ix, err = 0; + + flow_group_in = mlx5_vzalloc(inlen); + if (!flow_group_in) + return -ENOMEM; + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + goto ns_err; + } + + esw_debug(dev, "Create offloads FDB table, log_max_size(%d)\n", + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + + table_size = nvports + MAX_PF_SQ + 1; + fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0); + if (IS_ERR(fdb)) { + err = PTR_ERR(fdb); + esw_warn(dev, "Failed to create FDB Table err %d\n", err); + goto fdb_err; + } + esw->fdb_table.fdb = fdb; + + /* create send-to-vport group */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port); + + ix = nvports + MAX_PF_SQ; + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1); + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create send-to-vport flow group err(%d)\n", err); + goto send_vport_err; + } + esw->fdb_table.offloads.send_to_vport_grp = g; + + /* create miss group */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, 0); + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 1); + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create miss flow group err(%d)\n", err); + goto miss_err; + } + esw->fdb_table.offloads.miss_grp = g; + + err = esw_add_fdb_miss_rule(esw); + if (err) + goto miss_rule_err; + + return 0; + +miss_rule_err: + mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); +miss_err: + mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); +send_vport_err: + mlx5_destroy_flow_table(fdb); +fdb_err: +ns_err: + kvfree(flow_group_in); + return err; +} + +static void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw) +{ + if (!esw->fdb_table.fdb) + return; + + esw_debug(esw->dev, "Destroy offloads FDB Table\n"); + mlx5_del_flow_rule(esw->fdb_table.offloads.miss_rule); + mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); + mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); + + mlx5_destroy_flow_table(esw->fdb_table.fdb); +} + +static int esw_create_offloads_table(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft_offloads; + struct mlx5_core_dev *dev = esw->dev; + int err = 0; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS); + if (!ns) { + esw_warn(esw->dev, "Failed to get offloads flow namespace\n"); + return -ENOMEM; + } + + ft_offloads = mlx5_create_flow_table(ns, 0, dev->priv.sriov.num_vfs + 2, 0); + if (IS_ERR(ft_offloads)) { + err = PTR_ERR(ft_offloads); + esw_warn(esw->dev, "Failed to create offloads table, err %d\n", err); + return err; + } + + esw->offloads.ft_offloads = ft_offloads; + return 0; +} + +static void esw_destroy_offloads_table(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + + mlx5_destroy_flow_table(offloads->ft_offloads); +} + +static int esw_create_vport_rx_group(struct mlx5_eswitch *esw) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + struct mlx5_priv *priv = &esw->dev->priv; + u32 *flow_group_in; + void *match_criteria, *misc; + int err = 0; + int nvports = priv->sriov.num_vfs + 2; + + flow_group_in = mlx5_vzalloc(inlen); + if (!flow_group_in) + return -ENOMEM; + + /* create vport rx group */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + misc = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1); + + g = mlx5_create_flow_group(esw->offloads.ft_offloads, flow_group_in); + + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_warn(esw->dev, "Failed to create vport rx group err %d\n", err); + goto out; + } + + esw->offloads.vport_rx_group = g; +out: + kfree(flow_group_in); + return err; +} + +static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw) +{ + mlx5_destroy_flow_group(esw->offloads.vport_rx_group); +} + +struct mlx5_flow_rule * +mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn) +{ + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *flow_rule; + struct mlx5_flow_spec *spec; + void *misc; + + spec = mlx5_vzalloc(sizeof(*spec)); + if (!spec) { + esw_warn(esw->dev, "Failed to alloc match parameters\n"); + flow_rule = ERR_PTR(-ENOMEM); + goto out; + } + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, vport); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = tirn; + + flow_rule = mlx5_add_flow_rule(esw->offloads.ft_offloads, spec, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + 0, &dest); + if (IS_ERR(flow_rule)) { + esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule)); + goto out; + } + +out: + kvfree(spec); + return flow_rule; +} + +static int esw_offloads_start(struct mlx5_eswitch *esw) +{ + int err, num_vfs = esw->dev->priv.sriov.num_vfs; + + if (esw->mode != SRIOV_LEGACY) { + esw_warn(esw->dev, "Can't set offloads mode, SRIOV legacy not enabled\n"); + return -EINVAL; + } + + mlx5_eswitch_disable_sriov(esw); + err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS); + if (err) + esw_warn(esw->dev, "Failed set eswitch to offloads, err %d\n", err); + return err; +} + +int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) +{ + struct mlx5_eswitch_rep *rep; + int vport; + int err; + + err = esw_create_offloads_fdb_table(esw, nvports); + if (err) + return err; + + err = esw_create_offloads_table(esw); + if (err) + goto create_ft_err; + + err = esw_create_vport_rx_group(esw); + if (err) + goto create_fg_err; + + for (vport = 0; vport < nvports; vport++) { + rep = &esw->offloads.vport_reps[vport]; + if (!rep->valid) + continue; + + err = rep->load(esw, rep); + if (err) + goto err_reps; + } + return 0; + +err_reps: + for (vport--; vport >= 0; vport--) { + rep = &esw->offloads.vport_reps[vport]; + if (!rep->valid) + continue; + rep->unload(esw, rep); + } + esw_destroy_vport_rx_group(esw); + +create_fg_err: + esw_destroy_offloads_table(esw); + +create_ft_err: + esw_destroy_offloads_fdb_table(esw); + return err; +} + +static int esw_offloads_stop(struct mlx5_eswitch *esw) +{ + int err, num_vfs = esw->dev->priv.sriov.num_vfs; + + mlx5_eswitch_disable_sriov(esw); + err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); + if (err) + esw_warn(esw->dev, "Failed set eswitch legacy mode. err %d\n", err); + + return err; +} + +void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports) +{ + struct mlx5_eswitch_rep *rep; + int vport; + + for (vport = 0; vport < nvports; vport++) { + rep = &esw->offloads.vport_reps[vport]; + if (!rep->valid) + continue; + rep->unload(esw, rep); + } + + esw_destroy_vport_rx_group(esw); + esw_destroy_offloads_table(esw); + esw_destroy_offloads_fdb_table(esw); +} + +static int mlx5_esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) +{ + switch (mode) { + case DEVLINK_ESWITCH_MODE_LEGACY: + *mlx5_mode = SRIOV_LEGACY; + break; + case DEVLINK_ESWITCH_MODE_SWITCHDEV: + *mlx5_mode = SRIOV_OFFLOADS; + break; + default: + return -EINVAL; + } + + return 0; +} + +int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode) +{ + struct mlx5_core_dev *dev; + u16 cur_mlx5_mode, mlx5_mode = 0; + + dev = devlink_priv(devlink); + + if (!MLX5_CAP_GEN(dev, vport_group_manager)) + return -EOPNOTSUPP; + + cur_mlx5_mode = dev->priv.eswitch->mode; + + if (cur_mlx5_mode == SRIOV_NONE) + return -EOPNOTSUPP; + + if (mlx5_esw_mode_from_devlink(mode, &mlx5_mode)) + return -EINVAL; + + if (cur_mlx5_mode == mlx5_mode) + return 0; + + if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) + return esw_offloads_start(dev->priv.eswitch); + else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) + return esw_offloads_stop(dev->priv.eswitch); + else + return -EINVAL; +} + +int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) +{ + struct mlx5_core_dev *dev; + + dev = devlink_priv(devlink); + + if (!MLX5_CAP_GEN(dev, vport_group_manager)) + return -EOPNOTSUPP; + + if (dev->priv.eswitch->mode == SRIOV_NONE) + return -EOPNOTSUPP; + + *mode = dev->priv.eswitch->mode; + + return 0; +} + +void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + + memcpy(&offloads->vport_reps[rep->vport], rep, + sizeof(struct mlx5_eswitch_rep)); + + INIT_LIST_HEAD(&offloads->vport_reps[rep->vport].vport_sqs_list); + offloads->vport_reps[rep->vport].valid = true; +} + +void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, + int vport) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + struct mlx5_eswitch_rep *rep; + + rep = &offloads->vport_reps[vport]; + + if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport].enabled) + rep->unload(esw, rep); + + offloads->vport_reps[vport].valid = false; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index e912a3d2505e..b0a130479085 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -67,13 +67,21 @@ #define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \ .caps = (long[]) {__VA_ARGS__} } +#define FS_CHAINING_CAPS FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), \ + FS_CAP(flow_table_properties_nic_receive.modify_root), \ + FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), \ + FS_CAP(flow_table_properties_nic_receive.flow_table_modify)) + #define LEFTOVERS_NUM_LEVELS 1 #define LEFTOVERS_NUM_PRIOS 1 #define BY_PASS_PRIO_NUM_LEVELS 1 -#define BY_PASS_MIN_LEVEL (KERNEL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\ +#define BY_PASS_MIN_LEVEL (ETHTOOL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\ LEFTOVERS_NUM_PRIOS) +#define ETHTOOL_PRIO_NUM_LEVELS 1 +#define ETHTOOL_NUM_PRIOS 10 +#define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS) /* Vlan, mac, ttc, aRFS */ #define KERNEL_NIC_PRIO_NUM_LEVELS 4 #define KERNEL_NIC_NUM_PRIOS 1 @@ -83,6 +91,11 @@ #define ANCHOR_NUM_LEVELS 1 #define ANCHOR_NUM_PRIOS 1 #define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1) + +#define OFFLOADS_MAX_FT 1 +#define OFFLOADS_NUM_PRIOS 1 +#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + 1) + struct node_caps { size_t arr_sz; long *caps; @@ -98,24 +111,24 @@ static struct init_tree_node { int num_levels; } root_fs = { .type = FS_TYPE_NAMESPACE, - .ar_size = 4, + .ar_size = 6, .children = (struct init_tree_node[]) { ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, - FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), - FS_CAP(flow_table_properties_nic_receive.modify_root), - FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), - FS_CAP(flow_table_properties_nic_receive.flow_table_modify)), + FS_CHAINING_CAPS, ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, BY_PASS_PRIO_NUM_LEVELS))), + ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {}, + ADD_NS(ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, OFFLOADS_MAX_FT))), + ADD_PRIO(0, ETHTOOL_MIN_LEVEL, 0, + FS_CHAINING_CAPS, + ADD_NS(ADD_MULTIPLE_PRIO(ETHTOOL_NUM_PRIOS, + ETHTOOL_PRIO_NUM_LEVELS))), ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {}, ADD_NS(ADD_MULTIPLE_PRIO(1, 1), ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS, KERNEL_NIC_PRIO_NUM_LEVELS))), ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, - FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), - FS_CAP(flow_table_properties_nic_receive.modify_root), - FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), - FS_CAP(flow_table_properties_nic_receive.flow_table_modify)), + FS_CHAINING_CAPS, ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_NUM_LEVELS))), ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {}, ADD_NS(ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, ANCHOR_NUM_LEVELS))), @@ -1152,9 +1165,7 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest, static struct mlx5_flow_rule * _mlx5_add_flow_rule(struct mlx5_flow_table *ft, - u8 match_criteria_enable, - u32 *match_criteria, - u32 *match_value, + struct mlx5_flow_spec *spec, u32 action, u32 flow_tag, struct mlx5_flow_destination *dest) @@ -1168,22 +1179,23 @@ _mlx5_add_flow_rule(struct mlx5_flow_table *ft, nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT); fs_for_each_fg(g, ft) if (compare_match_criteria(g->mask.match_criteria_enable, - match_criteria_enable, + spec->match_criteria_enable, g->mask.match_criteria, - match_criteria)) { - rule = add_rule_fg(g, match_value, + spec->match_criteria)) { + rule = add_rule_fg(g, spec->match_value, action, flow_tag, dest); if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC) goto unlock; } - g = create_autogroup(ft, match_criteria_enable, match_criteria); + g = create_autogroup(ft, spec->match_criteria_enable, + spec->match_criteria); if (IS_ERR(g)) { rule = (void *)g; goto unlock; } - rule = add_rule_fg(g, match_value, + rule = add_rule_fg(g, spec->match_value, action, flow_tag, dest); if (IS_ERR(rule)) { /* Remove assumes refcount > 0 and autogroup creates a group @@ -1207,9 +1219,7 @@ static bool fwd_next_prio_supported(struct mlx5_flow_table *ft) struct mlx5_flow_rule * mlx5_add_flow_rule(struct mlx5_flow_table *ft, - u8 match_criteria_enable, - u32 *match_criteria, - u32 *match_value, + struct mlx5_flow_spec *spec, u32 action, u32 flow_tag, struct mlx5_flow_destination *dest) @@ -1240,8 +1250,7 @@ mlx5_add_flow_rule(struct mlx5_flow_table *ft, } } - rule = _mlx5_add_flow_rule(ft, match_criteria_enable, match_criteria, - match_value, action, flow_tag, dest); + rule = _mlx5_add_flow_rule(ft, spec, action, flow_tag, dest); if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { if (!IS_ERR_OR_NULL(rule) && @@ -1359,40 +1368,47 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg) struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type) { - struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns; + struct mlx5_flow_steering *steering = dev->priv.steering; + struct mlx5_flow_root_namespace *root_ns; int prio; struct fs_prio *fs_prio; struct mlx5_flow_namespace *ns; - if (!root_ns) + if (!steering) return NULL; switch (type) { case MLX5_FLOW_NAMESPACE_BYPASS: + case MLX5_FLOW_NAMESPACE_OFFLOADS: + case MLX5_FLOW_NAMESPACE_ETHTOOL: case MLX5_FLOW_NAMESPACE_KERNEL: case MLX5_FLOW_NAMESPACE_LEFTOVERS: case MLX5_FLOW_NAMESPACE_ANCHOR: prio = type; break; case MLX5_FLOW_NAMESPACE_FDB: - if (dev->priv.fdb_root_ns) - return &dev->priv.fdb_root_ns->ns; + if (steering->fdb_root_ns) + return &steering->fdb_root_ns->ns; else return NULL; case MLX5_FLOW_NAMESPACE_ESW_EGRESS: - if (dev->priv.esw_egress_root_ns) - return &dev->priv.esw_egress_root_ns->ns; + if (steering->esw_egress_root_ns) + return &steering->esw_egress_root_ns->ns; else return NULL; case MLX5_FLOW_NAMESPACE_ESW_INGRESS: - if (dev->priv.esw_ingress_root_ns) - return &dev->priv.esw_ingress_root_ns->ns; + if (steering->esw_ingress_root_ns) + return &steering->esw_ingress_root_ns->ns; else return NULL; default: return NULL; } + root_ns = steering->root_ns; + if (!root_ns) + return NULL; + fs_prio = find_prio(&root_ns->ns, prio); if (!fs_prio) return NULL; @@ -1478,13 +1494,13 @@ static bool has_required_caps(struct mlx5_core_dev *dev, struct node_caps *caps) return true; } -static int init_root_tree_recursive(struct mlx5_core_dev *dev, +static int init_root_tree_recursive(struct mlx5_flow_steering *steering, struct init_tree_node *init_node, struct fs_node *fs_parent_node, struct init_tree_node *init_parent_node, int prio) { - int max_ft_level = MLX5_CAP_FLOWTABLE(dev, + int max_ft_level = MLX5_CAP_FLOWTABLE(steering->dev, flow_table_properties_nic_receive. max_ft_level); struct mlx5_flow_namespace *fs_ns; @@ -1495,7 +1511,7 @@ static int init_root_tree_recursive(struct mlx5_core_dev *dev, if (init_node->type == FS_TYPE_PRIO) { if ((init_node->min_ft_level > max_ft_level) || - !has_required_caps(dev, &init_node->caps)) + !has_required_caps(steering->dev, &init_node->caps)) return 0; fs_get_obj(fs_ns, fs_parent_node); @@ -1516,7 +1532,7 @@ static int init_root_tree_recursive(struct mlx5_core_dev *dev, } prio = 0; for (i = 0; i < init_node->ar_size; i++) { - err = init_root_tree_recursive(dev, &init_node->children[i], + err = init_root_tree_recursive(steering, &init_node->children[i], base, init_node, prio); if (err) return err; @@ -1529,7 +1545,7 @@ static int init_root_tree_recursive(struct mlx5_core_dev *dev, return 0; } -static int init_root_tree(struct mlx5_core_dev *dev, +static int init_root_tree(struct mlx5_flow_steering *steering, struct init_tree_node *init_node, struct fs_node *fs_parent_node) { @@ -1539,7 +1555,7 @@ static int init_root_tree(struct mlx5_core_dev *dev, fs_get_obj(fs_ns, fs_parent_node); for (i = 0; i < init_node->ar_size; i++) { - err = init_root_tree_recursive(dev, &init_node->children[i], + err = init_root_tree_recursive(steering, &init_node->children[i], &fs_ns->node, init_node, i); if (err) @@ -1548,7 +1564,7 @@ static int init_root_tree(struct mlx5_core_dev *dev, return 0; } -static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_core_dev *dev, +static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering *steering, enum fs_flow_table_type table_type) { @@ -1560,7 +1576,7 @@ static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_core_dev *dev if (!root_ns) return NULL; - root_ns->dev = dev; + root_ns->dev = steering->dev; root_ns->table_type = table_type; ns = &root_ns->ns; @@ -1615,212 +1631,126 @@ static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns) #define ANCHOR_PRIO 0 #define ANCHOR_SIZE 1 #define ANCHOR_LEVEL 0 -static int create_anchor_flow_table(struct mlx5_core_dev - *dev) +static int create_anchor_flow_table(struct mlx5_flow_steering *steering) { struct mlx5_flow_namespace *ns = NULL; struct mlx5_flow_table *ft; - ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ANCHOR); + ns = mlx5_get_flow_namespace(steering->dev, MLX5_FLOW_NAMESPACE_ANCHOR); if (!ns) return -EINVAL; ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE, ANCHOR_LEVEL); if (IS_ERR(ft)) { - mlx5_core_err(dev, "Failed to create last anchor flow table"); + mlx5_core_err(steering->dev, "Failed to create last anchor flow table"); return PTR_ERR(ft); } return 0; } -static int init_root_ns(struct mlx5_core_dev *dev) +static int init_root_ns(struct mlx5_flow_steering *steering) { - dev->priv.root_ns = create_root_ns(dev, FS_FT_NIC_RX); - if (IS_ERR_OR_NULL(dev->priv.root_ns)) + steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX); + if (IS_ERR_OR_NULL(steering->root_ns)) goto cleanup; - if (init_root_tree(dev, &root_fs, &dev->priv.root_ns->ns.node)) + if (init_root_tree(steering, &root_fs, &steering->root_ns->ns.node)) goto cleanup; - set_prio_attrs(dev->priv.root_ns); + set_prio_attrs(steering->root_ns); - if (create_anchor_flow_table(dev)) + if (create_anchor_flow_table(steering)) goto cleanup; return 0; cleanup: - mlx5_cleanup_fs(dev); + mlx5_cleanup_fs(steering->dev); return -ENOMEM; } -static void cleanup_single_prio_root_ns(struct mlx5_core_dev *dev, - struct mlx5_flow_root_namespace *root_ns) +static void clean_tree(struct fs_node *node) { - struct fs_node *prio; - - if (!root_ns) - return; + if (node) { + struct fs_node *iter; + struct fs_node *temp; - if (!list_empty(&root_ns->ns.node.children)) { - prio = list_first_entry(&root_ns->ns.node.children, - struct fs_node, - list); - if (tree_remove_node(prio)) - mlx5_core_warn(dev, - "Flow steering priority wasn't destroyed, refcount > 1\n"); + list_for_each_entry_safe(iter, temp, &node->children, list) + clean_tree(iter); + tree_remove_node(node); } - if (tree_remove_node(&root_ns->ns.node)) - mlx5_core_warn(dev, - "Flow steering namespace wasn't destroyed, refcount > 1\n"); - root_ns = NULL; -} - -static void destroy_flow_tables(struct fs_prio *prio) -{ - struct mlx5_flow_table *iter; - struct mlx5_flow_table *tmp; - - fs_for_each_ft_safe(iter, tmp, prio) - mlx5_destroy_flow_table(iter); } -static void cleanup_root_ns(struct mlx5_core_dev *dev) +static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns) { - struct mlx5_flow_root_namespace *root_ns = dev->priv.root_ns; - struct fs_prio *iter_prio; - - if (!MLX5_CAP_GEN(dev, nic_flow_table)) - return; - if (!root_ns) return; - /* stage 1 */ - fs_for_each_prio(iter_prio, &root_ns->ns) { - struct fs_node *node; - struct mlx5_flow_namespace *iter_ns; - - fs_for_each_ns_or_ft(node, iter_prio) { - if (node->type == FS_TYPE_FLOW_TABLE) - continue; - fs_get_obj(iter_ns, node); - while (!list_empty(&iter_ns->node.children)) { - struct fs_prio *obj_iter_prio2; - struct fs_node *iter_prio2 = - list_first_entry(&iter_ns->node.children, - struct fs_node, - list); - - fs_get_obj(obj_iter_prio2, iter_prio2); - destroy_flow_tables(obj_iter_prio2); - if (tree_remove_node(iter_prio2)) { - mlx5_core_warn(dev, - "Priority %d wasn't destroyed, refcount > 1\n", - obj_iter_prio2->prio); - return; - } - } - } - } - - /* stage 2 */ - fs_for_each_prio(iter_prio, &root_ns->ns) { - while (!list_empty(&iter_prio->node.children)) { - struct fs_node *iter_ns = - list_first_entry(&iter_prio->node.children, - struct fs_node, - list); - if (tree_remove_node(iter_ns)) { - mlx5_core_warn(dev, - "Namespace wasn't destroyed, refcount > 1\n"); - return; - } - } - } - - /* stage 3 */ - while (!list_empty(&root_ns->ns.node.children)) { - struct fs_prio *obj_prio_node; - struct fs_node *prio_node = - list_first_entry(&root_ns->ns.node.children, - struct fs_node, - list); - - fs_get_obj(obj_prio_node, prio_node); - if (tree_remove_node(prio_node)) { - mlx5_core_warn(dev, - "Priority %d wasn't destroyed, refcount > 1\n", - obj_prio_node->prio); - return; - } - } - - if (tree_remove_node(&root_ns->ns.node)) { - mlx5_core_warn(dev, - "root namespace wasn't destroyed, refcount > 1\n"); - return; - } - - dev->priv.root_ns = NULL; + clean_tree(&root_ns->ns.node); } void mlx5_cleanup_fs(struct mlx5_core_dev *dev) { + struct mlx5_flow_steering *steering = dev->priv.steering; + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) return; - cleanup_root_ns(dev); - cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns); - cleanup_single_prio_root_ns(dev, dev->priv.esw_egress_root_ns); - cleanup_single_prio_root_ns(dev, dev->priv.esw_ingress_root_ns); + cleanup_root_ns(steering->root_ns); + cleanup_root_ns(steering->esw_egress_root_ns); + cleanup_root_ns(steering->esw_ingress_root_ns); + cleanup_root_ns(steering->fdb_root_ns); mlx5_cleanup_fc_stats(dev); + kfree(steering); } -static int init_fdb_root_ns(struct mlx5_core_dev *dev) +static int init_fdb_root_ns(struct mlx5_flow_steering *steering) { struct fs_prio *prio; - dev->priv.fdb_root_ns = create_root_ns(dev, FS_FT_FDB); - if (!dev->priv.fdb_root_ns) + steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB); + if (!steering->fdb_root_ns) return -ENOMEM; /* Create single prio */ - prio = fs_create_prio(&dev->priv.fdb_root_ns->ns, 0, 1); + prio = fs_create_prio(&steering->fdb_root_ns->ns, 0, 1); if (IS_ERR(prio)) { - cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns); + cleanup_root_ns(steering->fdb_root_ns); + steering->fdb_root_ns = NULL; return PTR_ERR(prio); } else { return 0; } } -static int init_egress_acl_root_ns(struct mlx5_core_dev *dev) +static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering) { struct fs_prio *prio; - dev->priv.esw_egress_root_ns = create_root_ns(dev, FS_FT_ESW_EGRESS_ACL); - if (!dev->priv.esw_egress_root_ns) + steering->esw_egress_root_ns = create_root_ns(steering, FS_FT_ESW_EGRESS_ACL); + if (!steering->esw_egress_root_ns) return -ENOMEM; /* create 1 prio*/ - prio = fs_create_prio(&dev->priv.esw_egress_root_ns->ns, 0, MLX5_TOTAL_VPORTS(dev)); + prio = fs_create_prio(&steering->esw_egress_root_ns->ns, 0, + MLX5_TOTAL_VPORTS(steering->dev)); if (IS_ERR(prio)) return PTR_ERR(prio); else return 0; } -static int init_ingress_acl_root_ns(struct mlx5_core_dev *dev) +static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering) { struct fs_prio *prio; - dev->priv.esw_ingress_root_ns = create_root_ns(dev, FS_FT_ESW_INGRESS_ACL); - if (!dev->priv.esw_ingress_root_ns) + steering->esw_ingress_root_ns = create_root_ns(steering, FS_FT_ESW_INGRESS_ACL); + if (!steering->esw_ingress_root_ns) return -ENOMEM; /* create 1 prio*/ - prio = fs_create_prio(&dev->priv.esw_ingress_root_ns->ns, 0, MLX5_TOTAL_VPORTS(dev)); + prio = fs_create_prio(&steering->esw_ingress_root_ns->ns, 0, + MLX5_TOTAL_VPORTS(steering->dev)); if (IS_ERR(prio)) return PTR_ERR(prio); else @@ -1829,6 +1759,7 @@ static int init_ingress_acl_root_ns(struct mlx5_core_dev *dev) int mlx5_init_fs(struct mlx5_core_dev *dev) { + struct mlx5_flow_steering *steering; int err = 0; if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) @@ -1838,26 +1769,32 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) if (err) return err; + steering = kzalloc(sizeof(*steering), GFP_KERNEL); + if (!steering) + return -ENOMEM; + steering->dev = dev; + dev->priv.steering = steering; + if (MLX5_CAP_GEN(dev, nic_flow_table) && MLX5_CAP_FLOWTABLE_NIC_RX(dev, ft_support)) { - err = init_root_ns(dev); + err = init_root_ns(steering); if (err) goto err; } if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) { - err = init_fdb_root_ns(dev); + err = init_fdb_root_ns(steering); if (err) goto err; } if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) { - err = init_egress_acl_root_ns(dev); + err = init_egress_acl_root_ns(steering); if (err) goto err; } if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) { - err = init_ingress_acl_root_ns(dev); + err = init_ingress_acl_root_ns(steering); if (err) goto err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index aa41a7314691..d7ba91a1eea2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -55,6 +55,14 @@ enum fs_fte_status { FS_FTE_STATUS_EXISTING = 1UL << 0, }; +struct mlx5_flow_steering { + struct mlx5_core_dev *dev; + struct mlx5_flow_root_namespace *root_ns; + struct mlx5_flow_root_namespace *fdb_root_ns; + struct mlx5_flow_root_namespace *esw_egress_root_ns; + struct mlx5_flow_root_namespace *esw_ingress_root_ns; +}; + struct fs_node { struct list_head list; struct list_head children; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 42d16b9458e4..96a59463ae65 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -108,15 +108,21 @@ static int in_fatal(struct mlx5_core_dev *dev) void mlx5_enter_error_state(struct mlx5_core_dev *dev) { + mutex_lock(&dev->intf_state_mutex); if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) - return; + goto unlock; mlx5_core_err(dev, "start\n"); - if (pci_channel_offline(dev->pdev) || in_fatal(dev)) + if (pci_channel_offline(dev->pdev) || in_fatal(dev)) { dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + trigger_cmd_completions(dev); + } mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 0); mlx5_core_err(dev, "end\n"); + +unlock: + mutex_unlock(&dev->intf_state_mutex); } static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) @@ -245,7 +251,6 @@ static void poll_health(unsigned long data) u32 count; if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { - trigger_cmd_completions(dev); mod_timer(&health->timer, get_next_poll_jiffies()); return; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 1f3b6d6a852e..4f491d43e77d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -51,6 +51,7 @@ #ifdef CONFIG_RFS_ACCEL #include <linux/cpu_rmap.h> #endif +#include <net/devlink.h> #include "mlx5_core.h" #include "fs_core.h" #ifdef CONFIG_MLX5_CORE_EN @@ -1315,19 +1316,28 @@ struct mlx5_core_event_handler { void *data); }; +static const struct devlink_ops mlx5_devlink_ops = { +#ifdef CONFIG_MLX5_CORE_EN + .eswitch_mode_set = mlx5_devlink_eswitch_mode_set, + .eswitch_mode_get = mlx5_devlink_eswitch_mode_get, +#endif +}; static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) { struct mlx5_core_dev *dev; + struct devlink *devlink; struct mlx5_priv *priv; int err; - dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (!dev) { + devlink = devlink_alloc(&mlx5_devlink_ops, sizeof(*dev)); + if (!devlink) { dev_err(&pdev->dev, "kzalloc failed\n"); return -ENOMEM; } + + dev = devlink_priv(devlink); priv = &dev->priv; priv->pci_dev_data = id->driver_data; @@ -1364,15 +1374,21 @@ static int init_one(struct pci_dev *pdev, goto clean_health; } + err = devlink_register(devlink, &pdev->dev); + if (err) + goto clean_load; + return 0; +clean_load: + mlx5_unload_one(dev, priv); clean_health: mlx5_health_cleanup(dev); close_pci: mlx5_pci_close(dev, priv); clean_dev: pci_set_drvdata(pdev, NULL); - kfree(dev); + devlink_free(devlink); return err; } @@ -1380,8 +1396,10 @@ clean_dev: static void remove_one(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct devlink *devlink = priv_to_devlink(dev); struct mlx5_priv *priv = &dev->priv; + devlink_unregister(devlink); if (mlx5_unload_one(dev, priv)) { dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n"); mlx5_health_cleanup(dev); @@ -1390,7 +1408,7 @@ static void remove_one(struct pci_dev *pdev) mlx5_health_cleanup(dev); mlx5_pci_close(dev, priv); pci_set_drvdata(pdev, NULL); - kfree(dev); + devlink_free(devlink); } static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, @@ -1432,46 +1450,31 @@ void mlx5_disable_device(struct mlx5_core_dev *dev) mlx5_pci_err_detected(dev->pdev, 0); } -/* wait for the device to show vital signs. For now we check - * that we can read the device ID and that the health buffer - * shows a non zero value which is different than 0xffffffff +/* wait for the device to show vital signs by waiting + * for the health counter to start counting. */ -static void wait_vital(struct pci_dev *pdev) +static int wait_vital(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct mlx5_core_health *health = &dev->priv.health; const int niter = 100; + u32 last_count = 0; u32 count; - u16 did; int i; - /* Wait for firmware to be ready after reset */ - msleep(1000); - for (i = 0; i < niter; i++) { - if (pci_read_config_word(pdev, 2, &did)) { - dev_warn(&pdev->dev, "failed reading config word\n"); - break; - } - if (did == pdev->device) { - dev_info(&pdev->dev, "device ID correctly read after %d iterations\n", i); - break; - } - msleep(50); - } - if (i == niter) - dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__); - for (i = 0; i < niter; i++) { count = ioread32be(health->health_counter); if (count && count != 0xffffffff) { - dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i); - break; + if (last_count && last_count != count) { + dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i); + return 0; + } + last_count = count; } msleep(50); } - if (i == niter) - dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__); + return -ETIMEDOUT; } static void mlx5_pci_resume(struct pci_dev *pdev) @@ -1483,7 +1486,11 @@ static void mlx5_pci_resume(struct pci_dev *pdev) dev_info(&pdev->dev, "%s was called\n", __func__); pci_save_state(pdev); - wait_vital(pdev); + err = wait_vital(pdev); + if (err) { + dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__); + return; + } err = mlx5_load_one(dev, priv); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 9eeee0545f1c..32dea3524cee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -345,7 +345,6 @@ retry: func_id, npages, err); goto out_4k; } - dev->priv.fw_pages += npages; err = mlx5_cmd_status_to_err(&out.hdr); if (err) { @@ -373,6 +372,33 @@ out_free: return err; } +static int reclaim_pages_cmd(struct mlx5_core_dev *dev, + struct mlx5_manage_pages_inbox *in, int in_size, + struct mlx5_manage_pages_outbox *out, int out_size) +{ + struct fw_page *fwp; + struct rb_node *p; + u32 npages; + u32 i = 0; + + if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) + return mlx5_cmd_exec_check_status(dev, (u32 *)in, in_size, + (u32 *)out, out_size); + + npages = be32_to_cpu(in->num_entries); + + p = rb_first(&dev->priv.page_root); + while (p && i < npages) { + fwp = rb_entry(p, struct fw_page, rb_node); + out->pas[i] = cpu_to_be64(fwp->addr); + p = rb_next(p); + i++; + } + + out->num_entries = cpu_to_be32(i); + return 0; +} + static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, int *nclaimed) { @@ -398,15 +424,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, in.func_id = cpu_to_be16(func_id); in.num_entries = cpu_to_be32(npages); mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen); - err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen); + err = reclaim_pages_cmd(dev, &in, sizeof(in), out, outlen); if (err) { - mlx5_core_err(dev, "failed reclaiming pages\n"); - goto out_free; - } - dev->priv.fw_pages -= npages; - - if (out->hdr.status) { - err = mlx5_cmd_status_to_err(&out->hdr); + mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err); goto out_free; } @@ -417,13 +437,15 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, err = -EINVAL; goto out_free; } - if (nclaimed) - *nclaimed = num_claimed; for (i = 0; i < num_claimed; i++) { addr = be64_to_cpu(out->pas[i]); free_4k(dev, addr); } + + if (nclaimed) + *nclaimed = num_claimed; + dev->priv.fw_pages -= num_claimed; if (func_id) dev->priv.vfs_pages -= num_claimed; @@ -514,14 +536,10 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) p = rb_first(&dev->priv.page_root); if (p) { fwp = rb_entry(p, struct fw_page, rb_node); - if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { - free_4k(dev, fwp->addr); - nclaimed = 1; - } else { - err = reclaim_pages(dev, fwp->func_id, - optimal_reclaimed_pages(), - &nclaimed); - } + err = reclaim_pages(dev, fwp->func_id, + optimal_reclaimed_pages(), + &nclaimed); + if (err) { mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", err); @@ -536,6 +554,13 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) } } while (p); + WARN(dev->priv.fw_pages, + "FW pages counter is %d after reclaiming all pages\n", + dev->priv.fw_pages); + WARN(dev->priv.vfs_pages, + "VFs FW pages counter is %d after reclaiming all pages\n", + dev->priv.vfs_pages); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index d6a3f412ba9f..b380a6bc1f85 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -167,7 +167,7 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) mlx5_core_init_vfs(dev, num_vfs); #ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs); + mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY); #endif return num_vfs; @@ -209,7 +209,8 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev) mlx5_core_init_vfs(dev, cur_vfs); #ifdef CONFIG_MLX5_CORE_EN if (cur_vfs) - mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs); + mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs, + SRIOV_LEGACY); #endif enable_vfs(dev, cur_vfs); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index daf44cd4c566..91846dfcbe9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -513,7 +513,6 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, { int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); void *nic_vport_context; - u8 *guid; void *in; int err; @@ -535,8 +534,6 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, nic_vport_context = MLX5_ADDR_OF(modify_nic_vport_context_in, in, nic_vport_context); - guid = MLX5_ADDR_OF(nic_vport_context, nic_vport_context, - node_guid); MLX5_SET64(nic_vport_context, nic_vport_context, node_guid, node_guid); err = mlx5_modify_nic_vport_context(mdev, in, inlen); diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 9b5ebf84c051..d20ae1838a64 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -7,5 +7,6 @@ obj-$(CONFIG_MLXSW_SWITCHX2) += mlxsw_switchx2.o mlxsw_switchx2-objs := switchx2.o obj-$(CONFIG_MLXSW_SPECTRUM) += mlxsw_spectrum.o mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ - spectrum_switchdev.o + spectrum_switchdev.o spectrum_router.o \ + spectrum_kvdl.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h index cd63b8263688..f9cd6e3f7709 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h +++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h @@ -607,6 +607,24 @@ MLXSW_ITEM32(cmd_mbox, config_profile, */ MLXSW_ITEM32(cmd_mbox, config_profile, set_ar_sec, 0x0C, 15, 1); +/* cmd_mbox_config_set_kvd_linear_size + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_linear_size, 0x0C, 24, 1); + +/* cmd_mbox_config_set_kvd_hash_single_size + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_single_size, 0x0C, 25, 1); + +/* cmd_mbox_config_set_kvd_hash_double_size + * Capability bit. Setting a bit to 1 configures the profile + * according to the mailbox contents. + */ +MLXSW_ITEM32(cmd_mbox, config_profile, set_kvd_hash_double_size, 0x0C, 26, 1); + /* cmd_mbox_config_profile_max_vepa_channels * Maximum number of VEPA channels per port (0 through 16) * 0 - multi-channel VEPA is disabled @@ -733,6 +751,31 @@ MLXSW_ITEM32(cmd_mbox, config_profile, adaptive_routing_group_cap, 0x4C, 0, 16); */ MLXSW_ITEM32(cmd_mbox, config_profile, arn, 0x50, 31, 1); +/* cmd_mbox_config_kvd_linear_size + * KVD Linear Size + * Valid for Spectrum only + * Allowed values are 128*N where N=0 or higher + */ +MLXSW_ITEM32(cmd_mbox, config_profile, kvd_linear_size, 0x54, 0, 24); + +/* cmd_mbox_config_kvd_hash_single_size + * KVD Hash single-entries size + * Valid for Spectrum only + * Allowed values are 128*N where N=0 or higher + * Must be greater or equal to cap_min_kvd_hash_single_size + * Must be smaller or equal to cap_kvd_size - kvd_linear_size + */ +MLXSW_ITEM32(cmd_mbox, config_profile, kvd_hash_single_size, 0x58, 0, 24); + +/* cmd_mbox_config_kvd_hash_double_size + * KVD Hash double-entries size (units of single-size entries) + * Valid for Spectrum only + * Allowed values are 128*N where N=0 or higher + * Must be either 0 or greater or equal to cap_min_kvd_hash_double_size + * Must be smaller or equal to cap_kvd_size - kvd_linear_size + */ +MLXSW_ITEM32(cmd_mbox, config_profile, kvd_hash_double_size, 0x5C, 0, 24); + /* cmd_mbox_config_profile_swid_config_mask * Modify Switch Partition Configuration mask. When set, the configu- * ration value for the Switch Partition are taken from the mailbox. diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 436bc49df6ab..2fe385cce203 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -190,7 +190,8 @@ struct mlxsw_config_profile { used_max_ib_mc:1, used_max_pkey:1, used_ar_sec:1, - used_adaptive_routing_group_cap:1; + used_adaptive_routing_group_cap:1, + used_kvd_sizes:1; u8 max_vepa_channels; u16 max_lag; u16 max_port_per_lag; @@ -211,6 +212,9 @@ struct mlxsw_config_profile { u8 ar_sec; u16 adaptive_routing_group_cap; u8 arn; + u32 kvd_linear_size; + u32 kvd_hash_single_size; + u32 kvd_hash_double_size; struct mlxsw_swid_config swid_config[MLXSW_CONFIG_PROFILE_SWID_COUNT]; }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 7f4173c8eda3..ddbc9f22278d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1255,6 +1255,20 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox, mlxsw_cmd_mbox_config_profile_adaptive_routing_group_cap_set( mbox, profile->adaptive_routing_group_cap); } + if (profile->used_kvd_sizes) { + mlxsw_cmd_mbox_config_profile_set_kvd_linear_size_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_kvd_linear_size_set( + mbox, profile->kvd_linear_size); + mlxsw_cmd_mbox_config_profile_set_kvd_hash_single_size_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_kvd_hash_single_size_set( + mbox, profile->kvd_hash_single_size); + mlxsw_cmd_mbox_config_profile_set_kvd_hash_double_size_set( + mbox, 1); + mlxsw_cmd_mbox_config_profile_kvd_hash_double_size_set( + mbox, profile->kvd_hash_double_size); + } for (i = 0; i < MLXSW_CONFIG_PROFILE_SWID_COUNT; i++) mlxsw_pci_config_profile_swid_config(mlxsw_pci, mbox, i, diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 1977e7a5c530..0cc148566677 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -1,9 +1,10 @@ /* * drivers/net/ethernet/mellanox/mlxsw/reg.h * Copyright (c) 2015 Mellanox Technologies. All rights reserved. - * Copyright (c) 2015 Ido Schimmel <[email protected]> + * Copyright (c) 2015-2016 Ido Schimmel <[email protected]> * Copyright (c) 2015 Elad Raz <[email protected]> - * Copyright (c) 2015 Jiri Pirko <[email protected]> + * Copyright (c) 2015-2016 Jiri Pirko <[email protected]> + * Copyright (c) 2016 Yotam Gigi <[email protected]> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -386,7 +387,9 @@ enum mlxsw_reg_sfd_rec_action { /* forward and trap, trap_id is FDB_TRAP */ MLXSW_REG_SFD_REC_ACTION_MIRROR_TO_CPU = 1, /* trap and do not forward, trap_id is FDB_TRAP */ - MLXSW_REG_SFD_REC_ACTION_TRAP = 3, + MLXSW_REG_SFD_REC_ACTION_TRAP = 2, + /* forward to IP router */ + MLXSW_REG_SFD_REC_ACTION_FORWARD_IP_ROUTER = 3, MLXSW_REG_SFD_REC_ACTION_DISCARD_ERROR = 15, }; @@ -3186,6 +3189,1183 @@ static inline void mlxsw_reg_hpkt_pack(char *payload, u8 action, u16 trap_id) mlxsw_reg_hpkt_ctrl_set(payload, MLXSW_REG_HPKT_CTRL_PACKET_DEFAULT); } +/* RGCR - Router General Configuration Register + * -------------------------------------------- + * The register is used for setting up the router configuration. + */ +#define MLXSW_REG_RGCR_ID 0x8001 +#define MLXSW_REG_RGCR_LEN 0x28 + +static const struct mlxsw_reg_info mlxsw_reg_rgcr = { + .id = MLXSW_REG_RGCR_ID, + .len = MLXSW_REG_RGCR_LEN, +}; + +/* reg_rgcr_ipv4_en + * IPv4 router enable. + * Access: RW + */ +MLXSW_ITEM32(reg, rgcr, ipv4_en, 0x00, 31, 1); + +/* reg_rgcr_ipv6_en + * IPv6 router enable. + * Access: RW + */ +MLXSW_ITEM32(reg, rgcr, ipv6_en, 0x00, 30, 1); + +/* reg_rgcr_max_router_interfaces + * Defines the maximum number of active router interfaces for all virtual + * routers. + * Access: RW + */ +MLXSW_ITEM32(reg, rgcr, max_router_interfaces, 0x10, 0, 16); + +/* reg_rgcr_usp + * Update switch priority and packet color. + * 0 - Preserve the value of Switch Priority and packet color. + * 1 - Recalculate the value of Switch Priority and packet color. + * Access: RW + * + * Note: Not supported by SwitchX and SwitchX-2. + */ +MLXSW_ITEM32(reg, rgcr, usp, 0x18, 20, 1); + +/* reg_rgcr_pcp_rw + * Indicates how to handle the pcp_rewrite_en value: + * 0 - Preserve the value of pcp_rewrite_en. + * 2 - Disable PCP rewrite. + * 3 - Enable PCP rewrite. + * Access: RW + * + * Note: Not supported by SwitchX and SwitchX-2. + */ +MLXSW_ITEM32(reg, rgcr, pcp_rw, 0x18, 16, 2); + +/* reg_rgcr_activity_dis + * Activity disable: + * 0 - Activity will be set when an entry is hit (default). + * 1 - Activity will not be set when an entry is hit. + * + * Bit 0 - Disable activity bit in Router Algorithmic LPM Unicast Entry + * (RALUE). + * Bit 1 - Disable activity bit in Router Algorithmic LPM Unicast Host + * Entry (RAUHT). + * Bits 2:7 are reserved. + * Access: RW + * + * Note: Not supported by SwitchX, SwitchX-2 and Switch-IB. + */ +MLXSW_ITEM32(reg, rgcr, activity_dis, 0x20, 0, 8); + +static inline void mlxsw_reg_rgcr_pack(char *payload, bool ipv4_en) +{ + MLXSW_REG_ZERO(rgcr, payload); + mlxsw_reg_rgcr_ipv4_en_set(payload, ipv4_en); +} + +/* RITR - Router Interface Table Register + * -------------------------------------- + * The register is used to configure the router interface table. + */ +#define MLXSW_REG_RITR_ID 0x8002 +#define MLXSW_REG_RITR_LEN 0x40 + +static const struct mlxsw_reg_info mlxsw_reg_ritr = { + .id = MLXSW_REG_RITR_ID, + .len = MLXSW_REG_RITR_LEN, +}; + +/* reg_ritr_enable + * Enables routing on the router interface. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, enable, 0x00, 31, 1); + +/* reg_ritr_ipv4 + * IPv4 routing enable. Enables routing of IPv4 traffic on the router + * interface. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ipv4, 0x00, 29, 1); + +/* reg_ritr_ipv6 + * IPv6 routing enable. Enables routing of IPv6 traffic on the router + * interface. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ipv6, 0x00, 28, 1); + +enum mlxsw_reg_ritr_if_type { + MLXSW_REG_RITR_VLAN_IF, + MLXSW_REG_RITR_FID_IF, + MLXSW_REG_RITR_SP_IF, +}; + +/* reg_ritr_type + * Router interface type. + * 0 - VLAN interface. + * 1 - FID interface. + * 2 - Sub-port interface. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, type, 0x00, 23, 3); + +enum { + MLXSW_REG_RITR_RIF_CREATE, + MLXSW_REG_RITR_RIF_DEL, +}; + +/* reg_ritr_op + * Opcode: + * 0 - Create or edit RIF. + * 1 - Delete RIF. + * Reserved for SwitchX-2. For Spectrum, editing of interface properties + * is not supported. An interface must be deleted and re-created in order + * to update properties. + * Access: WO + */ +MLXSW_ITEM32(reg, ritr, op, 0x00, 20, 2); + +/* reg_ritr_rif + * Router interface index. A pointer to the Router Interface Table. + * Access: Index + */ +MLXSW_ITEM32(reg, ritr, rif, 0x00, 0, 16); + +/* reg_ritr_ipv4_fe + * IPv4 Forwarding Enable. + * Enables routing of IPv4 traffic on the router interface. When disabled, + * forwarding is blocked but local traffic (traps and IP2ME) will be enabled. + * Not supported in SwitchX-2. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ipv4_fe, 0x04, 29, 1); + +/* reg_ritr_ipv6_fe + * IPv6 Forwarding Enable. + * Enables routing of IPv6 traffic on the router interface. When disabled, + * forwarding is blocked but local traffic (traps and IP2ME) will be enabled. + * Not supported in SwitchX-2. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ipv6_fe, 0x04, 28, 1); + +/* reg_ritr_virtual_router + * Virtual router ID associated with the router interface. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, virtual_router, 0x04, 0, 16); + +/* reg_ritr_mtu + * Router interface MTU. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, mtu, 0x34, 0, 16); + +/* reg_ritr_if_swid + * Switch partition ID. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, if_swid, 0x08, 24, 8); + +/* reg_ritr_if_mac + * Router interface MAC address. + * In Spectrum, all MAC addresses must have the same 38 MSBits. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, ritr, if_mac, 0x12, 6); + +/* VLAN Interface */ + +/* reg_ritr_vlan_if_vid + * VLAN ID. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, vlan_if_vid, 0x08, 0, 12); + +/* FID Interface */ + +/* reg_ritr_fid_if_fid + * Filtering ID. Used to connect a bridge to the router. Only FIDs from + * the vFID range are supported. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, fid_if_fid, 0x08, 0, 16); + +static inline void mlxsw_reg_ritr_fid_set(char *payload, + enum mlxsw_reg_ritr_if_type rif_type, + u16 fid) +{ + if (rif_type == MLXSW_REG_RITR_FID_IF) + mlxsw_reg_ritr_fid_if_fid_set(payload, fid); + else + mlxsw_reg_ritr_vlan_if_vid_set(payload, fid); +} + +/* Sub-port Interface */ + +/* reg_ritr_sp_if_lag + * LAG indication. When this bit is set the system_port field holds the + * LAG identifier. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, sp_if_lag, 0x08, 24, 1); + +/* reg_ritr_sp_system_port + * Port unique indentifier. When lag bit is set, this field holds the + * lag_id in bits 0:9. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, sp_if_system_port, 0x08, 0, 16); + +/* reg_ritr_sp_if_vid + * VLAN ID. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, sp_if_vid, 0x18, 0, 12); + +static inline void mlxsw_reg_ritr_rif_pack(char *payload, u16 rif) +{ + MLXSW_REG_ZERO(ritr, payload); + mlxsw_reg_ritr_rif_set(payload, rif); +} + +static inline void mlxsw_reg_ritr_sp_if_pack(char *payload, bool lag, + u16 system_port, u16 vid) +{ + mlxsw_reg_ritr_sp_if_lag_set(payload, lag); + mlxsw_reg_ritr_sp_if_system_port_set(payload, system_port); + mlxsw_reg_ritr_sp_if_vid_set(payload, vid); +} + +static inline void mlxsw_reg_ritr_pack(char *payload, bool enable, + enum mlxsw_reg_ritr_if_type type, + u16 rif, u16 mtu, const char *mac) +{ + bool op = enable ? MLXSW_REG_RITR_RIF_CREATE : MLXSW_REG_RITR_RIF_DEL; + + MLXSW_REG_ZERO(ritr, payload); + mlxsw_reg_ritr_enable_set(payload, enable); + mlxsw_reg_ritr_ipv4_set(payload, 1); + mlxsw_reg_ritr_type_set(payload, type); + mlxsw_reg_ritr_op_set(payload, op); + mlxsw_reg_ritr_rif_set(payload, rif); + mlxsw_reg_ritr_ipv4_fe_set(payload, 1); + mlxsw_reg_ritr_mtu_set(payload, mtu); + mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac); +} + +/* RATR - Router Adjacency Table Register + * -------------------------------------- + * The RATR register is used to configure the Router Adjacency (next-hop) + * Table. + */ +#define MLXSW_REG_RATR_ID 0x8008 +#define MLXSW_REG_RATR_LEN 0x2C + +static const struct mlxsw_reg_info mlxsw_reg_ratr = { + .id = MLXSW_REG_RATR_ID, + .len = MLXSW_REG_RATR_LEN, +}; + +enum mlxsw_reg_ratr_op { + /* Read */ + MLXSW_REG_RATR_OP_QUERY_READ = 0, + /* Read and clear activity */ + MLXSW_REG_RATR_OP_QUERY_READ_CLEAR = 2, + /* Write Adjacency entry */ + MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY = 1, + /* Write Adjacency entry only if the activity is cleared. + * The write may not succeed if the activity is set. There is not + * direct feedback if the write has succeeded or not, however + * the get will reveal the actual entry (SW can compare the get + * response to the set command). + */ + MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY = 3, +}; + +/* reg_ratr_op + * Note that Write operation may also be used for updating + * counter_set_type and counter_index. In this case all other + * fields must not be updated. + * Access: OP + */ +MLXSW_ITEM32(reg, ratr, op, 0x00, 28, 4); + +/* reg_ratr_v + * Valid bit. Indicates if the adjacency entry is valid. + * Note: the device may need some time before reusing an invalidated + * entry. During this time the entry can not be reused. It is + * recommended to use another entry before reusing an invalidated + * entry (e.g. software can put it at the end of the list for + * reusing). Trying to access an invalidated entry not yet cleared + * by the device results with failure indicating "Try Again" status. + * When valid is '0' then egress_router_interface,trap_action, + * adjacency_parameters and counters are reserved + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, v, 0x00, 24, 1); + +/* reg_ratr_a + * Activity. Set for new entries. Set if a packet lookup has hit on + * the specific entry. To clear the a bit, use "clear activity". + * Access: RO + */ +MLXSW_ITEM32(reg, ratr, a, 0x00, 16, 1); + +/* reg_ratr_adjacency_index_low + * Bits 15:0 of index into the adjacency table. + * For SwitchX and SwitchX-2, the adjacency table is linear and + * used for adjacency entries only. + * For Spectrum, the index is to the KVD linear. + * Access: Index + */ +MLXSW_ITEM32(reg, ratr, adjacency_index_low, 0x04, 0, 16); + +/* reg_ratr_egress_router_interface + * Range is 0 .. cap_max_router_interfaces - 1 + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, egress_router_interface, 0x08, 0, 16); + +enum mlxsw_reg_ratr_trap_action { + MLXSW_REG_RATR_TRAP_ACTION_NOP, + MLXSW_REG_RATR_TRAP_ACTION_TRAP, + MLXSW_REG_RATR_TRAP_ACTION_MIRROR_TO_CPU, + MLXSW_REG_RATR_TRAP_ACTION_MIRROR, + MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS, +}; + +/* reg_ratr_trap_action + * see mlxsw_reg_ratr_trap_action + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, trap_action, 0x0C, 28, 4); + +enum mlxsw_reg_ratr_trap_id { + MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS0 = 0, + MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS1 = 1, +}; + +/* reg_ratr_adjacency_index_high + * Bits 23:16 of the adjacency_index. + * Access: Index + */ +MLXSW_ITEM32(reg, ratr, adjacency_index_high, 0x0C, 16, 8); + +/* reg_ratr_trap_id + * Trap ID to be reported to CPU. + * Trap-ID is RTR_EGRESS0 or RTR_EGRESS1. + * For trap_action of NOP, MIRROR and DISCARD_ERROR + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, trap_id, 0x0C, 0, 8); + +/* reg_ratr_eth_destination_mac + * MAC address of the destination next-hop. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, ratr, eth_destination_mac, 0x12, 6); + +static inline void +mlxsw_reg_ratr_pack(char *payload, + enum mlxsw_reg_ratr_op op, bool valid, + u32 adjacency_index, u16 egress_rif) +{ + MLXSW_REG_ZERO(ratr, payload); + mlxsw_reg_ratr_op_set(payload, op); + mlxsw_reg_ratr_v_set(payload, valid); + mlxsw_reg_ratr_adjacency_index_low_set(payload, adjacency_index); + mlxsw_reg_ratr_adjacency_index_high_set(payload, adjacency_index >> 16); + mlxsw_reg_ratr_egress_router_interface_set(payload, egress_rif); +} + +static inline void mlxsw_reg_ratr_eth_entry_pack(char *payload, + const char *dest_mac) +{ + mlxsw_reg_ratr_eth_destination_mac_memcpy_to(payload, dest_mac); +} + +/* RALTA - Router Algorithmic LPM Tree Allocation Register + * ------------------------------------------------------- + * RALTA is used to allocate the LPM trees of the SHSPM method. + */ +#define MLXSW_REG_RALTA_ID 0x8010 +#define MLXSW_REG_RALTA_LEN 0x04 + +static const struct mlxsw_reg_info mlxsw_reg_ralta = { + .id = MLXSW_REG_RALTA_ID, + .len = MLXSW_REG_RALTA_LEN, +}; + +/* reg_ralta_op + * opcode (valid for Write, must be 0 on Read) + * 0 - allocate a tree + * 1 - deallocate a tree + * Access: OP + */ +MLXSW_ITEM32(reg, ralta, op, 0x00, 28, 2); + +enum mlxsw_reg_ralxx_protocol { + MLXSW_REG_RALXX_PROTOCOL_IPV4, + MLXSW_REG_RALXX_PROTOCOL_IPV6, +}; + +/* reg_ralta_protocol + * Protocol. + * Deallocation opcode: Reserved. + * Access: RW + */ +MLXSW_ITEM32(reg, ralta, protocol, 0x00, 24, 4); + +/* reg_ralta_tree_id + * An identifier (numbered from 1..cap_shspm_max_trees-1) representing + * the tree identifier (managed by software). + * Note that tree_id 0 is allocated for a default-route tree. + * Access: Index + */ +MLXSW_ITEM32(reg, ralta, tree_id, 0x00, 0, 8); + +static inline void mlxsw_reg_ralta_pack(char *payload, bool alloc, + enum mlxsw_reg_ralxx_protocol protocol, + u8 tree_id) +{ + MLXSW_REG_ZERO(ralta, payload); + mlxsw_reg_ralta_op_set(payload, !alloc); + mlxsw_reg_ralta_protocol_set(payload, protocol); + mlxsw_reg_ralta_tree_id_set(payload, tree_id); +} + +/* RALST - Router Algorithmic LPM Structure Tree Register + * ------------------------------------------------------ + * RALST is used to set and query the structure of an LPM tree. + * The structure of the tree must be sorted as a sorted binary tree, while + * each node is a bin that is tagged as the length of the prefixes the lookup + * will refer to. Therefore, bin X refers to a set of entries with prefixes + * of X bits to match with the destination address. The bin 0 indicates + * the default action, when there is no match of any prefix. + */ +#define MLXSW_REG_RALST_ID 0x8011 +#define MLXSW_REG_RALST_LEN 0x104 + +static const struct mlxsw_reg_info mlxsw_reg_ralst = { + .id = MLXSW_REG_RALST_ID, + .len = MLXSW_REG_RALST_LEN, +}; + +/* reg_ralst_root_bin + * The bin number of the root bin. + * 0<root_bin=<(length of IP address) + * For a default-route tree configure 0xff + * Access: RW + */ +MLXSW_ITEM32(reg, ralst, root_bin, 0x00, 16, 8); + +/* reg_ralst_tree_id + * Tree identifier numbered from 1..(cap_shspm_max_trees-1). + * Access: Index + */ +MLXSW_ITEM32(reg, ralst, tree_id, 0x00, 0, 8); + +#define MLXSW_REG_RALST_BIN_NO_CHILD 0xff +#define MLXSW_REG_RALST_BIN_OFFSET 0x04 +#define MLXSW_REG_RALST_BIN_COUNT 128 + +/* reg_ralst_left_child_bin + * Holding the children of the bin according to the stored tree's structure. + * For trees composed of less than 4 blocks, the bins in excess are reserved. + * Note that tree_id 0 is allocated for a default-route tree, bins are 0xff + * Access: RW + */ +MLXSW_ITEM16_INDEXED(reg, ralst, left_child_bin, 0x04, 8, 8, 0x02, 0x00, false); + +/* reg_ralst_right_child_bin + * Holding the children of the bin according to the stored tree's structure. + * For trees composed of less than 4 blocks, the bins in excess are reserved. + * Note that tree_id 0 is allocated for a default-route tree, bins are 0xff + * Access: RW + */ +MLXSW_ITEM16_INDEXED(reg, ralst, right_child_bin, 0x04, 0, 8, 0x02, 0x00, + false); + +static inline void mlxsw_reg_ralst_pack(char *payload, u8 root_bin, u8 tree_id) +{ + MLXSW_REG_ZERO(ralst, payload); + + /* Initialize all bins to have no left or right child */ + memset(payload + MLXSW_REG_RALST_BIN_OFFSET, + MLXSW_REG_RALST_BIN_NO_CHILD, MLXSW_REG_RALST_BIN_COUNT * 2); + + mlxsw_reg_ralst_root_bin_set(payload, root_bin); + mlxsw_reg_ralst_tree_id_set(payload, tree_id); +} + +static inline void mlxsw_reg_ralst_bin_pack(char *payload, u8 bin_number, + u8 left_child_bin, + u8 right_child_bin) +{ + int bin_index = bin_number - 1; + + mlxsw_reg_ralst_left_child_bin_set(payload, bin_index, left_child_bin); + mlxsw_reg_ralst_right_child_bin_set(payload, bin_index, + right_child_bin); +} + +/* RALTB - Router Algorithmic LPM Tree Binding Register + * ---------------------------------------------------- + * RALTB is used to bind virtual router and protocol to an allocated LPM tree. + */ +#define MLXSW_REG_RALTB_ID 0x8012 +#define MLXSW_REG_RALTB_LEN 0x04 + +static const struct mlxsw_reg_info mlxsw_reg_raltb = { + .id = MLXSW_REG_RALTB_ID, + .len = MLXSW_REG_RALTB_LEN, +}; + +/* reg_raltb_virtual_router + * Virtual Router ID + * Range is 0..cap_max_virtual_routers-1 + * Access: Index + */ +MLXSW_ITEM32(reg, raltb, virtual_router, 0x00, 16, 16); + +/* reg_raltb_protocol + * Protocol. + * Access: Index + */ +MLXSW_ITEM32(reg, raltb, protocol, 0x00, 12, 4); + +/* reg_raltb_tree_id + * Tree to be used for the {virtual_router, protocol} + * Tree identifier numbered from 1..(cap_shspm_max_trees-1). + * By default, all Unicast IPv4 and IPv6 are bound to tree_id 0. + * Access: RW + */ +MLXSW_ITEM32(reg, raltb, tree_id, 0x00, 0, 8); + +static inline void mlxsw_reg_raltb_pack(char *payload, u16 virtual_router, + enum mlxsw_reg_ralxx_protocol protocol, + u8 tree_id) +{ + MLXSW_REG_ZERO(raltb, payload); + mlxsw_reg_raltb_virtual_router_set(payload, virtual_router); + mlxsw_reg_raltb_protocol_set(payload, protocol); + mlxsw_reg_raltb_tree_id_set(payload, tree_id); +} + +/* RALUE - Router Algorithmic LPM Unicast Entry Register + * ----------------------------------------------------- + * RALUE is used to configure and query LPM entries that serve + * the Unicast protocols. + */ +#define MLXSW_REG_RALUE_ID 0x8013 +#define MLXSW_REG_RALUE_LEN 0x38 + +static const struct mlxsw_reg_info mlxsw_reg_ralue = { + .id = MLXSW_REG_RALUE_ID, + .len = MLXSW_REG_RALUE_LEN, +}; + +/* reg_ralue_protocol + * Protocol. + * Access: Index + */ +MLXSW_ITEM32(reg, ralue, protocol, 0x00, 24, 4); + +enum mlxsw_reg_ralue_op { + /* Read operation. If entry doesn't exist, the operation fails. */ + MLXSW_REG_RALUE_OP_QUERY_READ = 0, + /* Clear on read operation. Used to read entry and + * clear Activity bit. + */ + MLXSW_REG_RALUE_OP_QUERY_CLEAR = 1, + /* Write operation. Used to write a new entry to the table. All RW + * fields are written for new entry. Activity bit is set + * for new entries. + */ + MLXSW_REG_RALUE_OP_WRITE_WRITE = 0, + /* Update operation. Used to update an existing route entry and + * only update the RW fields that are detailed in the field + * op_u_mask. If entry doesn't exist, the operation fails. + */ + MLXSW_REG_RALUE_OP_WRITE_UPDATE = 1, + /* Clear activity. The Activity bit (the field a) is cleared + * for the entry. + */ + MLXSW_REG_RALUE_OP_WRITE_CLEAR = 2, + /* Delete operation. Used to delete an existing entry. If entry + * doesn't exist, the operation fails. + */ + MLXSW_REG_RALUE_OP_WRITE_DELETE = 3, +}; + +/* reg_ralue_op + * Operation. + * Access: OP + */ +MLXSW_ITEM32(reg, ralue, op, 0x00, 20, 3); + +/* reg_ralue_a + * Activity. Set for new entries. Set if a packet lookup has hit on the + * specific entry, only if the entry is a route. To clear the a bit, use + * "clear activity" op. + * Enabled by activity_dis in RGCR + * Access: RO + */ +MLXSW_ITEM32(reg, ralue, a, 0x00, 16, 1); + +/* reg_ralue_virtual_router + * Virtual Router ID + * Range is 0..cap_max_virtual_routers-1 + * Access: Index + */ +MLXSW_ITEM32(reg, ralue, virtual_router, 0x04, 16, 16); + +#define MLXSW_REG_RALUE_OP_U_MASK_ENTRY_TYPE BIT(0) +#define MLXSW_REG_RALUE_OP_U_MASK_BMP_LEN BIT(1) +#define MLXSW_REG_RALUE_OP_U_MASK_ACTION BIT(2) + +/* reg_ralue_op_u_mask + * opcode update mask. + * On read operation, this field is reserved. + * This field is valid for update opcode, otherwise - reserved. + * This field is a bitmask of the fields that should be updated. + * Access: WO + */ +MLXSW_ITEM32(reg, ralue, op_u_mask, 0x04, 8, 3); + +/* reg_ralue_prefix_len + * Number of bits in the prefix of the LPM route. + * Note that for IPv6 prefixes, if prefix_len>64 the entry consumes + * two entries in the physical HW table. + * Access: Index + */ +MLXSW_ITEM32(reg, ralue, prefix_len, 0x08, 0, 8); + +/* reg_ralue_dip* + * The prefix of the route or of the marker that the object of the LPM + * is compared with. The most significant bits of the dip are the prefix. + * The list significant bits must be '0' if the prefix_len is smaller + * than 128 for IPv6 or smaller than 32 for IPv4. + * IPv4 address uses bits dip[31:0] and bits dip[127:32] are reserved. + * Access: Index + */ +MLXSW_ITEM32(reg, ralue, dip4, 0x18, 0, 32); + +enum mlxsw_reg_ralue_entry_type { + MLXSW_REG_RALUE_ENTRY_TYPE_MARKER_ENTRY = 1, + MLXSW_REG_RALUE_ENTRY_TYPE_ROUTE_ENTRY = 2, + MLXSW_REG_RALUE_ENTRY_TYPE_MARKER_AND_ROUTE_ENTRY = 3, +}; + +/* reg_ralue_entry_type + * Entry type. + * Note - for Marker entries, the action_type and action fields are reserved. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, entry_type, 0x1C, 30, 2); + +/* reg_ralue_bmp_len + * The best match prefix length in the case that there is no match for + * longer prefixes. + * If (entry_type != MARKER_ENTRY), bmp_len must be equal to prefix_len + * Note for any update operation with entry_type modification this + * field must be set. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, bmp_len, 0x1C, 16, 8); + +enum mlxsw_reg_ralue_action_type { + MLXSW_REG_RALUE_ACTION_TYPE_REMOTE, + MLXSW_REG_RALUE_ACTION_TYPE_LOCAL, + MLXSW_REG_RALUE_ACTION_TYPE_IP2ME, +}; + +/* reg_ralue_action_type + * Action Type + * Indicates how the IP address is connected. + * It can be connected to a local subnet through local_erif or can be + * on a remote subnet connected through a next-hop router, + * or transmitted to the CPU. + * Reserved when entry_type = MARKER_ENTRY + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, action_type, 0x1C, 0, 2); + +enum mlxsw_reg_ralue_trap_action { + MLXSW_REG_RALUE_TRAP_ACTION_NOP, + MLXSW_REG_RALUE_TRAP_ACTION_TRAP, + MLXSW_REG_RALUE_TRAP_ACTION_MIRROR_TO_CPU, + MLXSW_REG_RALUE_TRAP_ACTION_MIRROR, + MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR, +}; + +/* reg_ralue_trap_action + * Trap action. + * For IP2ME action, only NOP and MIRROR are possible. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, trap_action, 0x20, 28, 4); + +/* reg_ralue_trap_id + * Trap ID to be reported to CPU. + * Trap ID is RTR_INGRESS0 or RTR_INGRESS1. + * For trap_action of NOP, MIRROR and DISCARD_ERROR, trap_id is reserved. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, trap_id, 0x20, 0, 9); + +/* reg_ralue_adjacency_index + * Points to the first entry of the group-based ECMP. + * Only relevant in case of REMOTE action. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, adjacency_index, 0x24, 0, 24); + +/* reg_ralue_ecmp_size + * Amount of sequential entries starting + * from the adjacency_index (the number of ECMPs). + * The valid range is 1-64, 512, 1024, 2048 and 4096. + * Reserved when trap_action is TRAP or DISCARD_ERROR. + * Only relevant in case of REMOTE action. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, ecmp_size, 0x28, 0, 13); + +/* reg_ralue_local_erif + * Egress Router Interface. + * Only relevant in case of LOCAL action. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, local_erif, 0x24, 0, 16); + +/* reg_ralue_v + * Valid bit for the tunnel_ptr field. + * If valid = 0 then trap to CPU as IP2ME trap ID. + * If valid = 1 and the packet format allows NVE or IPinIP tunnel + * decapsulation then tunnel decapsulation is done. + * If valid = 1 and packet format does not allow NVE or IPinIP tunnel + * decapsulation then trap as IP2ME trap ID. + * Only relevant in case of IP2ME action. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, v, 0x24, 31, 1); + +/* reg_ralue_tunnel_ptr + * Tunnel Pointer for NVE or IPinIP tunnel decapsulation. + * For Spectrum, pointer to KVD Linear. + * Only relevant in case of IP2ME action. + * Access: RW + */ +MLXSW_ITEM32(reg, ralue, tunnel_ptr, 0x24, 0, 24); + +static inline void mlxsw_reg_ralue_pack(char *payload, + enum mlxsw_reg_ralxx_protocol protocol, + enum mlxsw_reg_ralue_op op, + u16 virtual_router, u8 prefix_len) +{ + MLXSW_REG_ZERO(ralue, payload); + mlxsw_reg_ralue_protocol_set(payload, protocol); + mlxsw_reg_ralue_virtual_router_set(payload, virtual_router); + mlxsw_reg_ralue_prefix_len_set(payload, prefix_len); + mlxsw_reg_ralue_entry_type_set(payload, + MLXSW_REG_RALUE_ENTRY_TYPE_ROUTE_ENTRY); + mlxsw_reg_ralue_bmp_len_set(payload, prefix_len); +} + +static inline void mlxsw_reg_ralue_pack4(char *payload, + enum mlxsw_reg_ralxx_protocol protocol, + enum mlxsw_reg_ralue_op op, + u16 virtual_router, u8 prefix_len, + u32 dip) +{ + mlxsw_reg_ralue_pack(payload, protocol, op, virtual_router, prefix_len); + mlxsw_reg_ralue_dip4_set(payload, dip); +} + +static inline void +mlxsw_reg_ralue_act_remote_pack(char *payload, + enum mlxsw_reg_ralue_trap_action trap_action, + u16 trap_id, u32 adjacency_index, u16 ecmp_size) +{ + mlxsw_reg_ralue_action_type_set(payload, + MLXSW_REG_RALUE_ACTION_TYPE_REMOTE); + mlxsw_reg_ralue_trap_action_set(payload, trap_action); + mlxsw_reg_ralue_trap_id_set(payload, trap_id); + mlxsw_reg_ralue_adjacency_index_set(payload, adjacency_index); + mlxsw_reg_ralue_ecmp_size_set(payload, ecmp_size); +} + +static inline void +mlxsw_reg_ralue_act_local_pack(char *payload, + enum mlxsw_reg_ralue_trap_action trap_action, + u16 trap_id, u16 local_erif) +{ + mlxsw_reg_ralue_action_type_set(payload, + MLXSW_REG_RALUE_ACTION_TYPE_LOCAL); + mlxsw_reg_ralue_trap_action_set(payload, trap_action); + mlxsw_reg_ralue_trap_id_set(payload, trap_id); + mlxsw_reg_ralue_local_erif_set(payload, local_erif); +} + +static inline void +mlxsw_reg_ralue_act_ip2me_pack(char *payload) +{ + mlxsw_reg_ralue_action_type_set(payload, + MLXSW_REG_RALUE_ACTION_TYPE_IP2ME); +} + +/* RAUHT - Router Algorithmic LPM Unicast Host Table Register + * ---------------------------------------------------------- + * The RAUHT register is used to configure and query the Unicast Host table in + * devices that implement the Algorithmic LPM. + */ +#define MLXSW_REG_RAUHT_ID 0x8014 +#define MLXSW_REG_RAUHT_LEN 0x74 + +static const struct mlxsw_reg_info mlxsw_reg_rauht = { + .id = MLXSW_REG_RAUHT_ID, + .len = MLXSW_REG_RAUHT_LEN, +}; + +enum mlxsw_reg_rauht_type { + MLXSW_REG_RAUHT_TYPE_IPV4, + MLXSW_REG_RAUHT_TYPE_IPV6, +}; + +/* reg_rauht_type + * Access: Index + */ +MLXSW_ITEM32(reg, rauht, type, 0x00, 24, 2); + +enum mlxsw_reg_rauht_op { + MLXSW_REG_RAUHT_OP_QUERY_READ = 0, + /* Read operation */ + MLXSW_REG_RAUHT_OP_QUERY_CLEAR_ON_READ = 1, + /* Clear on read operation. Used to read entry and clear + * activity bit. + */ + MLXSW_REG_RAUHT_OP_WRITE_ADD = 0, + /* Add. Used to write a new entry to the table. All R/W fields are + * relevant for new entry. Activity bit is set for new entries. + */ + MLXSW_REG_RAUHT_OP_WRITE_UPDATE = 1, + /* Update action. Used to update an existing route entry and + * only update the following fields: + * trap_action, trap_id, mac, counter_set_type, counter_index + */ + MLXSW_REG_RAUHT_OP_WRITE_CLEAR_ACTIVITY = 2, + /* Clear activity. A bit is cleared for the entry. */ + MLXSW_REG_RAUHT_OP_WRITE_DELETE = 3, + /* Delete entry */ + MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL = 4, + /* Delete all host entries on a RIF. In this command, dip + * field is reserved. + */ +}; + +/* reg_rauht_op + * Access: OP + */ +MLXSW_ITEM32(reg, rauht, op, 0x00, 20, 3); + +/* reg_rauht_a + * Activity. Set for new entries. Set if a packet lookup has hit on + * the specific entry. + * To clear the a bit, use "clear activity" op. + * Enabled by activity_dis in RGCR + * Access: RO + */ +MLXSW_ITEM32(reg, rauht, a, 0x00, 16, 1); + +/* reg_rauht_rif + * Router Interface + * Access: Index + */ +MLXSW_ITEM32(reg, rauht, rif, 0x00, 0, 16); + +/* reg_rauht_dip* + * Destination address. + * Access: Index + */ +MLXSW_ITEM32(reg, rauht, dip4, 0x1C, 0x0, 32); + +enum mlxsw_reg_rauht_trap_action { + MLXSW_REG_RAUHT_TRAP_ACTION_NOP, + MLXSW_REG_RAUHT_TRAP_ACTION_TRAP, + MLXSW_REG_RAUHT_TRAP_ACTION_MIRROR_TO_CPU, + MLXSW_REG_RAUHT_TRAP_ACTION_MIRROR, + MLXSW_REG_RAUHT_TRAP_ACTION_DISCARD_ERRORS, +}; + +/* reg_rauht_trap_action + * Access: RW + */ +MLXSW_ITEM32(reg, rauht, trap_action, 0x60, 28, 4); + +enum mlxsw_reg_rauht_trap_id { + MLXSW_REG_RAUHT_TRAP_ID_RTR_EGRESS0, + MLXSW_REG_RAUHT_TRAP_ID_RTR_EGRESS1, +}; + +/* reg_rauht_trap_id + * Trap ID to be reported to CPU. + * Trap-ID is RTR_EGRESS0 or RTR_EGRESS1. + * For trap_action of NOP, MIRROR and DISCARD_ERROR, + * trap_id is reserved. + * Access: RW + */ +MLXSW_ITEM32(reg, rauht, trap_id, 0x60, 0, 9); + +/* reg_rauht_counter_set_type + * Counter set type for flow counters + * Access: RW + */ +MLXSW_ITEM32(reg, rauht, counter_set_type, 0x68, 24, 8); + +/* reg_rauht_counter_index + * Counter index for flow counters + * Access: RW + */ +MLXSW_ITEM32(reg, rauht, counter_index, 0x68, 0, 24); + +/* reg_rauht_mac + * MAC address. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, rauht, mac, 0x6E, 6); + +static inline void mlxsw_reg_rauht_pack(char *payload, + enum mlxsw_reg_rauht_op op, u16 rif, + const char *mac) +{ + MLXSW_REG_ZERO(rauht, payload); + mlxsw_reg_rauht_op_set(payload, op); + mlxsw_reg_rauht_rif_set(payload, rif); + mlxsw_reg_rauht_mac_memcpy_to(payload, mac); +} + +static inline void mlxsw_reg_rauht_pack4(char *payload, + enum mlxsw_reg_rauht_op op, u16 rif, + const char *mac, u32 dip) +{ + mlxsw_reg_rauht_pack(payload, op, rif, mac); + mlxsw_reg_rauht_dip4_set(payload, dip); +} + +/* RALEU - Router Algorithmic LPM ECMP Update Register + * --------------------------------------------------- + * The register enables updating the ECMP section in the action for multiple + * LPM Unicast entries in a single operation. The update is executed to + * all entries of a {virtual router, protocol} tuple using the same ECMP group. + */ +#define MLXSW_REG_RALEU_ID 0x8015 +#define MLXSW_REG_RALEU_LEN 0x28 + +static const struct mlxsw_reg_info mlxsw_reg_raleu = { + .id = MLXSW_REG_RALEU_ID, + .len = MLXSW_REG_RALEU_LEN, +}; + +/* reg_raleu_protocol + * Protocol. + * Access: Index + */ +MLXSW_ITEM32(reg, raleu, protocol, 0x00, 24, 4); + +/* reg_raleu_virtual_router + * Virtual Router ID + * Range is 0..cap_max_virtual_routers-1 + * Access: Index + */ +MLXSW_ITEM32(reg, raleu, virtual_router, 0x00, 0, 16); + +/* reg_raleu_adjacency_index + * Adjacency Index used for matching on the existing entries. + * Access: Index + */ +MLXSW_ITEM32(reg, raleu, adjacency_index, 0x10, 0, 24); + +/* reg_raleu_ecmp_size + * ECMP Size used for matching on the existing entries. + * Access: Index + */ +MLXSW_ITEM32(reg, raleu, ecmp_size, 0x14, 0, 13); + +/* reg_raleu_new_adjacency_index + * New Adjacency Index. + * Access: WO + */ +MLXSW_ITEM32(reg, raleu, new_adjacency_index, 0x20, 0, 24); + +/* reg_raleu_new_ecmp_size + * New ECMP Size. + * Access: WO + */ +MLXSW_ITEM32(reg, raleu, new_ecmp_size, 0x24, 0, 13); + +static inline void mlxsw_reg_raleu_pack(char *payload, + enum mlxsw_reg_ralxx_protocol protocol, + u16 virtual_router, + u32 adjacency_index, u16 ecmp_size, + u32 new_adjacency_index, + u16 new_ecmp_size) +{ + MLXSW_REG_ZERO(raleu, payload); + mlxsw_reg_raleu_protocol_set(payload, protocol); + mlxsw_reg_raleu_virtual_router_set(payload, virtual_router); + mlxsw_reg_raleu_adjacency_index_set(payload, adjacency_index); + mlxsw_reg_raleu_ecmp_size_set(payload, ecmp_size); + mlxsw_reg_raleu_new_adjacency_index_set(payload, new_adjacency_index); + mlxsw_reg_raleu_new_ecmp_size_set(payload, new_ecmp_size); +} + +/* RAUHTD - Router Algorithmic LPM Unicast Host Table Dump Register + * ---------------------------------------------------------------- + * The RAUHTD register allows dumping entries from the Router Unicast Host + * Table. For a given session an entry is dumped no more than one time. The + * first RAUHTD access after reset is a new session. A session ends when the + * num_rec response is smaller than num_rec request or for IPv4 when the + * num_entries is smaller than 4. The clear activity affect the current session + * or the last session if a new session has not started. + */ +#define MLXSW_REG_RAUHTD_ID 0x8018 +#define MLXSW_REG_RAUHTD_BASE_LEN 0x20 +#define MLXSW_REG_RAUHTD_REC_LEN 0x20 +#define MLXSW_REG_RAUHTD_REC_MAX_NUM 32 +#define MLXSW_REG_RAUHTD_LEN (MLXSW_REG_RAUHTD_BASE_LEN + \ + MLXSW_REG_RAUHTD_REC_MAX_NUM * MLXSW_REG_RAUHTD_REC_LEN) +#define MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC 4 + +static const struct mlxsw_reg_info mlxsw_reg_rauhtd = { + .id = MLXSW_REG_RAUHTD_ID, + .len = MLXSW_REG_RAUHTD_LEN, +}; + +#define MLXSW_REG_RAUHTD_FILTER_A BIT(0) +#define MLXSW_REG_RAUHTD_FILTER_RIF BIT(3) + +/* reg_rauhtd_filter_fields + * if a bit is '0' then the relevant field is ignored and dump is done + * regardless of the field value + * Bit0 - filter by activity: entry_a + * Bit3 - filter by entry rip: entry_rif + * Access: Index + */ +MLXSW_ITEM32(reg, rauhtd, filter_fields, 0x00, 0, 8); + +enum mlxsw_reg_rauhtd_op { + MLXSW_REG_RAUHTD_OP_DUMP, + MLXSW_REG_RAUHTD_OP_DUMP_AND_CLEAR, +}; + +/* reg_rauhtd_op + * Access: OP + */ +MLXSW_ITEM32(reg, rauhtd, op, 0x04, 24, 2); + +/* reg_rauhtd_num_rec + * At request: number of records requested + * At response: number of records dumped + * For IPv4, each record has 4 entries at request and up to 4 entries + * at response + * Range is 0..MLXSW_REG_RAUHTD_REC_MAX_NUM + * Access: Index + */ +MLXSW_ITEM32(reg, rauhtd, num_rec, 0x04, 0, 8); + +/* reg_rauhtd_entry_a + * Dump only if activity has value of entry_a + * Reserved if filter_fields bit0 is '0' + * Access: Index + */ +MLXSW_ITEM32(reg, rauhtd, entry_a, 0x08, 16, 1); + +enum mlxsw_reg_rauhtd_type { + MLXSW_REG_RAUHTD_TYPE_IPV4, + MLXSW_REG_RAUHTD_TYPE_IPV6, +}; + +/* reg_rauhtd_type + * Dump only if record type is: + * 0 - IPv4 + * 1 - IPv6 + * Access: Index + */ +MLXSW_ITEM32(reg, rauhtd, type, 0x08, 0, 4); + +/* reg_rauhtd_entry_rif + * Dump only if RIF has value of entry_rif + * Reserved if filter_fields bit3 is '0' + * Access: Index + */ +MLXSW_ITEM32(reg, rauhtd, entry_rif, 0x0C, 0, 16); + +static inline void mlxsw_reg_rauhtd_pack(char *payload, + enum mlxsw_reg_rauhtd_type type) +{ + MLXSW_REG_ZERO(rauhtd, payload); + mlxsw_reg_rauhtd_filter_fields_set(payload, MLXSW_REG_RAUHTD_FILTER_A); + mlxsw_reg_rauhtd_op_set(payload, MLXSW_REG_RAUHTD_OP_DUMP_AND_CLEAR); + mlxsw_reg_rauhtd_num_rec_set(payload, MLXSW_REG_RAUHTD_REC_MAX_NUM); + mlxsw_reg_rauhtd_entry_a_set(payload, 1); + mlxsw_reg_rauhtd_type_set(payload, type); +} + +/* reg_rauhtd_ipv4_rec_num_entries + * Number of valid entries in this record: + * 0 - 1 valid entry + * 1 - 2 valid entries + * 2 - 3 valid entries + * 3 - 4 valid entries + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_rec_num_entries, + MLXSW_REG_RAUHTD_BASE_LEN, 28, 2, + MLXSW_REG_RAUHTD_REC_LEN, 0x00, false); + +/* reg_rauhtd_rec_type + * Record type. + * 0 - IPv4 + * 1 - IPv6 + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, rec_type, MLXSW_REG_RAUHTD_BASE_LEN, 24, 2, + MLXSW_REG_RAUHTD_REC_LEN, 0x00, false); + +#define MLXSW_REG_RAUHTD_IPV4_ENT_LEN 0x8 + +/* reg_rauhtd_ipv4_ent_a + * Activity. Set for new entries. Set if a packet lookup has hit on the + * specific entry. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_a, MLXSW_REG_RAUHTD_BASE_LEN, 16, 1, + MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x00, false); + +/* reg_rauhtd_ipv4_ent_rif + * Router interface. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_rif, MLXSW_REG_RAUHTD_BASE_LEN, 0, + 16, MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x00, false); + +/* reg_rauhtd_ipv4_ent_dip + * Destination IPv4 address. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_dip, MLXSW_REG_RAUHTD_BASE_LEN, 0, + 32, MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x04, false); + +static inline void mlxsw_reg_rauhtd_ent_ipv4_unpack(char *payload, + int ent_index, u16 *p_rif, + u32 *p_dip) +{ + *p_rif = mlxsw_reg_rauhtd_ipv4_ent_rif_get(payload, ent_index); + *p_dip = mlxsw_reg_rauhtd_ipv4_ent_dip_get(payload, ent_index); +} + /* MFCR - Management Fan Control Register * -------------------------------------- * This register controls the settings of the Fan Speed PWM mechanism. @@ -3924,6 +5104,26 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "HTGT"; case MLXSW_REG_HPKT_ID: return "HPKT"; + case MLXSW_REG_RGCR_ID: + return "RGCR"; + case MLXSW_REG_RITR_ID: + return "RITR"; + case MLXSW_REG_RATR_ID: + return "RATR"; + case MLXSW_REG_RALTA_ID: + return "RALTA"; + case MLXSW_REG_RALST_ID: + return "RALST"; + case MLXSW_REG_RALTB_ID: + return "RALTB"; + case MLXSW_REG_RALUE_ID: + return "RALUE"; + case MLXSW_REG_RAUHT_ID: + return "RAUHT"; + case MLXSW_REG_RALEU_ID: + return "RALEU"; + case MLXSW_REG_RAUHTD_ID: + return "RAUHTD"; case MLXSW_REG_MFCR_ID: return "MFCR"; case MLXSW_REG_MFSC_ID: diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index a453fffaa1a4..c812513e079d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -51,6 +51,7 @@ #include <linux/list.h> #include <linux/notifier.h> #include <linux/dcbnl.h> +#include <linux/inetdevice.h> #include <net/switchdev.h> #include <generated/utsrelease.h> @@ -210,23 +211,6 @@ static int mlxsw_sp_port_dev_addr_init(struct mlxsw_sp_port *mlxsw_sp_port) return mlxsw_sp_port_dev_addr_set(mlxsw_sp_port, addr); } -static int mlxsw_sp_port_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port, - u16 vid, enum mlxsw_reg_spms_state state) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char *spms_pl; - int err; - - spms_pl = kmalloc(MLXSW_REG_SPMS_LEN, GFP_KERNEL); - if (!spms_pl) - return -ENOMEM; - mlxsw_reg_spms_pack(spms_pl, mlxsw_sp_port->local_port); - mlxsw_reg_spms_vid_pack(spms_pl, vid, state); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spms), spms_pl); - kfree(spms_pl); - return err; -} - static int mlxsw_sp_port_mtu_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; @@ -637,87 +621,6 @@ static int mlxsw_sp_port_vlan_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port) return 0; } -static struct mlxsw_sp_fid * -mlxsw_sp_vfid_find(const struct mlxsw_sp *mlxsw_sp, u16 vid) -{ - struct mlxsw_sp_fid *f; - - list_for_each_entry(f, &mlxsw_sp->port_vfids.list, list) { - if (f->vid == vid) - return f; - } - - return NULL; -} - -static u16 mlxsw_sp_avail_vfid_get(const struct mlxsw_sp *mlxsw_sp) -{ - return find_first_zero_bit(mlxsw_sp->port_vfids.mapped, - MLXSW_SP_VFID_PORT_MAX); -} - -static int mlxsw_sp_vfid_op(struct mlxsw_sp *mlxsw_sp, u16 fid, bool create) -{ - char sfmr_pl[MLXSW_REG_SFMR_LEN]; - - mlxsw_reg_sfmr_pack(sfmr_pl, !create, fid, 0); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); -} - -static void mlxsw_sp_vport_vfid_leave(struct mlxsw_sp_port *mlxsw_sp_vport); - -static struct mlxsw_sp_fid *mlxsw_sp_vfid_create(struct mlxsw_sp *mlxsw_sp, - u16 vid) -{ - struct device *dev = mlxsw_sp->bus_info->dev; - struct mlxsw_sp_fid *f; - u16 vfid, fid; - int err; - - vfid = mlxsw_sp_avail_vfid_get(mlxsw_sp); - if (vfid == MLXSW_SP_VFID_PORT_MAX) { - dev_err(dev, "No available vFIDs\n"); - return ERR_PTR(-ERANGE); - } - - fid = mlxsw_sp_vfid_to_fid(vfid); - err = mlxsw_sp_vfid_op(mlxsw_sp, fid, true); - if (err) { - dev_err(dev, "Failed to create FID=%d\n", fid); - return ERR_PTR(err); - } - - f = kzalloc(sizeof(*f), GFP_KERNEL); - if (!f) - goto err_allocate_vfid; - - f->leave = mlxsw_sp_vport_vfid_leave; - f->fid = fid; - f->vid = vid; - - list_add(&f->list, &mlxsw_sp->port_vfids.list); - set_bit(vfid, mlxsw_sp->port_vfids.mapped); - - return f; - -err_allocate_vfid: - mlxsw_sp_vfid_op(mlxsw_sp, fid, false); - return ERR_PTR(-ENOMEM); -} - -static void mlxsw_sp_vfid_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fid *f) -{ - u16 vfid = mlxsw_sp_fid_to_vfid(f->fid); - - clear_bit(vfid, mlxsw_sp->port_vfids.mapped); - list_del(&f->list); - - mlxsw_sp_vfid_op(mlxsw_sp, f->fid, false); - - kfree(f); -} - static struct mlxsw_sp_port * mlxsw_sp_port_vport_create(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid) { @@ -750,72 +653,12 @@ static void mlxsw_sp_port_vport_destroy(struct mlxsw_sp_port *mlxsw_sp_vport) kfree(mlxsw_sp_vport); } -static int mlxsw_sp_vport_fid_map(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid, - bool valid) -{ - enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID; - u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); - - return mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, mt, valid, fid, - vid); -} - -static int mlxsw_sp_vport_vfid_join(struct mlxsw_sp_port *mlxsw_sp_vport) -{ - u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); - struct mlxsw_sp_fid *f; - int err; - - f = mlxsw_sp_vfid_find(mlxsw_sp_vport->mlxsw_sp, vid); - if (!f) { - f = mlxsw_sp_vfid_create(mlxsw_sp_vport->mlxsw_sp, vid); - if (IS_ERR(f)) - return PTR_ERR(f); - } - - if (!f->ref_count) { - err = mlxsw_sp_vport_flood_set(mlxsw_sp_vport, f->fid, true); - if (err) - goto err_vport_flood_set; - } - - err = mlxsw_sp_vport_fid_map(mlxsw_sp_vport, f->fid, true); - if (err) - goto err_vport_fid_map; - - mlxsw_sp_vport_fid_set(mlxsw_sp_vport, f); - f->ref_count++; - - return 0; - -err_vport_fid_map: - if (!f->ref_count) - mlxsw_sp_vport_flood_set(mlxsw_sp_vport, f->fid, false); -err_vport_flood_set: - if (!f->ref_count) - mlxsw_sp_vfid_destroy(mlxsw_sp_vport->mlxsw_sp, f); - return err; -} - -static void mlxsw_sp_vport_vfid_leave(struct mlxsw_sp_port *mlxsw_sp_vport) -{ - struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); - - mlxsw_sp_vport_fid_set(mlxsw_sp_vport, NULL); - - mlxsw_sp_vport_fid_map(mlxsw_sp_vport, f->fid, false); - - if (--f->ref_count == 0) { - mlxsw_sp_vport_flood_set(mlxsw_sp_vport, f->fid, false); - mlxsw_sp_vfid_destroy(mlxsw_sp_vport->mlxsw_sp, f); - } -} - int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, u16 vid) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp_port *mlxsw_sp_vport; + bool untagged = vid == 1; int err; /* VLAN 0 is added to HW filter when device goes up, but it is @@ -847,41 +690,24 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, } } - err = mlxsw_sp_vport_vfid_join(mlxsw_sp_vport); - if (err) { - netdev_err(dev, "Failed to join vFID\n"); - goto err_vport_vfid_join; - } - err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false); if (err) { netdev_err(dev, "Failed to disable learning for VID=%d\n", vid); goto err_port_vid_learning_set; } - err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, true, false); + err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, true, untagged); if (err) { netdev_err(dev, "Failed to set VLAN membership for VID=%d\n", vid); goto err_port_add_vid; } - err = mlxsw_sp_port_stp_state_set(mlxsw_sp_vport, vid, - MLXSW_REG_SPMS_STATE_FORWARDING); - if (err) { - netdev_err(dev, "Failed to set STP state for VID=%d\n", vid); - goto err_port_stp_state_set; - } - return 0; -err_port_stp_state_set: - mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false); err_port_add_vid: mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true); err_port_vid_learning_set: - mlxsw_sp_vport_vfid_leave(mlxsw_sp_vport); -err_vport_vfid_join: if (list_is_singular(&mlxsw_sp_port->vports_list)) mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); err_port_vp_mode_trans: @@ -889,8 +715,8 @@ err_port_vp_mode_trans: return err; } -int mlxsw_sp_port_kill_vid(struct net_device *dev, - __be16 __always_unused proto, u16 vid) +static int mlxsw_sp_port_kill_vid(struct net_device *dev, + __be16 __always_unused proto, u16 vid) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp_port *mlxsw_sp_vport; @@ -909,13 +735,6 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev, return 0; } - err = mlxsw_sp_port_stp_state_set(mlxsw_sp_vport, vid, - MLXSW_REG_SPMS_STATE_DISCARDING); - if (err) { - netdev_err(dev, "Failed to set STP state for VID=%d\n", vid); - return err; - } - err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false); if (err) { netdev_err(dev, "Failed to set VLAN membership for VID=%d\n", @@ -984,6 +803,8 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_get_stats64 = mlxsw_sp_port_get_stats64, .ndo_vlan_rx_add_vid = mlxsw_sp_port_add_vid, .ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid, + .ndo_neigh_construct = mlxsw_sp_router_neigh_construct, + .ndo_neigh_destroy = mlxsw_sp_router_neigh_destroy, .ndo_fdb_add = switchdev_port_fdb_add, .ndo_fdb_del = switchdev_port_fdb_del, .ndo_fdb_dump = switchdev_port_fdb_dump, @@ -1844,23 +1665,6 @@ err_port_active_vlans_alloc: return err; } -static void mlxsw_sp_port_vports_fini(struct mlxsw_sp_port *mlxsw_sp_port) -{ - struct net_device *dev = mlxsw_sp_port->dev; - struct mlxsw_sp_port *mlxsw_sp_vport, *tmp; - - list_for_each_entry_safe(mlxsw_sp_vport, tmp, - &mlxsw_sp_port->vports_list, vport.list) { - u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); - - /* vPorts created for VLAN devices should already be gone - * by now, since we unregistered the port netdev. - */ - WARN_ON(is_vlan_dev(mlxsw_sp_vport->dev)); - mlxsw_sp_port_kill_vid(dev, 0, vid); - } -} - static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) { struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port]; @@ -1871,13 +1675,14 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) mlxsw_core_port_fini(&mlxsw_sp_port->core_port); unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ mlxsw_sp_port_dcb_fini(mlxsw_sp_port); - mlxsw_sp_port_vports_fini(mlxsw_sp_port); + mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1); mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); mlxsw_sp_port_module_unmap(mlxsw_sp, mlxsw_sp_port->local_port); free_percpu(mlxsw_sp_port->pcpu_stats); kfree(mlxsw_sp_port->untagged_vlans); kfree(mlxsw_sp_port->active_vlans); + WARN_ON_ONCE(!list_empty(&mlxsw_sp_port->vports_list)); free_netdev(mlxsw_sp_port->dev); } @@ -2114,11 +1919,8 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg, local_port = mlxsw_reg_pude_local_port_get(pude_pl); mlxsw_sp_port = mlxsw_sp->ports[local_port]; - if (!mlxsw_sp_port) { - dev_warn(mlxsw_sp->bus_info->dev, "Port %d: Link event received for non-existent port\n", - local_port); + if (!mlxsw_sp_port) return; - } status = mlxsw_reg_pude_oper_status_get(pude_pl); if (status == MLXSW_PORT_OPER_STATUS_UP) { @@ -2273,6 +2075,31 @@ static const struct mlxsw_rx_listener mlxsw_sp_rx_listener[] = { .local_port = MLXSW_PORT_DONT_CARE, .trap_id = MLXSW_TRAP_ID_IGMP_V3_REPORT, }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_ARPBC, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_ARPUC, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_IP2ME, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_RTR_INGRESS0, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_HOST_MISS_IPV4, + }, }; static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) @@ -2313,7 +2140,7 @@ err_rx_trap_set: mlxsw_sp); err_rx_listener_register: for (i--; i >= 0; i--) { - mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_FORWARD, + mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_DISCARD, mlxsw_sp_rx_listener[i].trap_id); mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl); @@ -2330,7 +2157,7 @@ static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp) int i; for (i = 0; i < ARRAY_SIZE(mlxsw_sp_rx_listener); i++) { - mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_FORWARD, + mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_DISCARD, mlxsw_sp_rx_listener[i].trap_id); mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl); @@ -2410,8 +2237,7 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->core = mlxsw_core; mlxsw_sp->bus_info = mlxsw_bus_info; INIT_LIST_HEAD(&mlxsw_sp->fids); - INIT_LIST_HEAD(&mlxsw_sp->port_vfids.list); - INIT_LIST_HEAD(&mlxsw_sp->br_vfids.list); + INIT_LIST_HEAD(&mlxsw_sp->vfids.list); INIT_LIST_HEAD(&mlxsw_sp->br_mids.list); err = mlxsw_sp_base_mac_get(mlxsw_sp); @@ -2420,16 +2246,10 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, return err; } - err = mlxsw_sp_ports_create(mlxsw_sp); - if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n"); - return err; - } - err = mlxsw_sp_event_register(mlxsw_sp, MLXSW_TRAP_ID_PUDE); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to register for PUDE events\n"); - goto err_event_register; + return err; } err = mlxsw_sp_traps_init(mlxsw_sp); @@ -2462,8 +2282,24 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_switchdev_init; } + err = mlxsw_sp_router_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n"); + goto err_router_init; + } + + err = mlxsw_sp_ports_create(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n"); + goto err_ports_create; + } + return 0; +err_ports_create: + mlxsw_sp_router_fini(mlxsw_sp); +err_router_init: + mlxsw_sp_switchdev_fini(mlxsw_sp); err_switchdev_init: err_lag_init: mlxsw_sp_buffers_fini(mlxsw_sp); @@ -2472,21 +2308,24 @@ err_flood_init: mlxsw_sp_traps_fini(mlxsw_sp); err_rx_listener_register: mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE); -err_event_register: - mlxsw_sp_ports_remove(mlxsw_sp); return err; } static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + int i; + mlxsw_sp_ports_remove(mlxsw_sp); + mlxsw_sp_router_fini(mlxsw_sp); mlxsw_sp_switchdev_fini(mlxsw_sp); mlxsw_sp_buffers_fini(mlxsw_sp); mlxsw_sp_traps_fini(mlxsw_sp); mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE); - mlxsw_sp_ports_remove(mlxsw_sp); + WARN_ON(!list_empty(&mlxsw_sp->vfids.list)); WARN_ON(!list_empty(&mlxsw_sp->fids)); + for (i = 0; i < MLXSW_SP_RIF_MAX; i++) + WARN_ON_ONCE(mlxsw_sp->rifs[i]); } static struct mlxsw_config_profile mlxsw_sp_config_profile = { @@ -2517,6 +2356,10 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = { .max_ib_mc = 0, .used_max_pkey = 1, .max_pkey = 0, + .used_kvd_sizes = 1, + .kvd_linear_size = MLXSW_SP_KVD_LINEAR_SIZE, + .kvd_hash_single_size = MLXSW_SP_KVD_HASH_SINGLE_SIZE, + .kvd_hash_double_size = MLXSW_SP_KVD_HASH_DOUBLE_SIZE, .swid_config = { { .used_type = 1, @@ -2548,6 +2391,559 @@ static struct mlxsw_driver mlxsw_sp_driver = { .profile = &mlxsw_sp_config_profile, }; +static bool mlxsw_sp_port_dev_check(const struct net_device *dev) +{ + return dev->netdev_ops == &mlxsw_sp_port_netdev_ops; +} + +static struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev) +{ + struct net_device *lower_dev; + struct list_head *iter; + + if (mlxsw_sp_port_dev_check(dev)) + return netdev_priv(dev); + + netdev_for_each_all_lower_dev(dev, lower_dev, iter) { + if (mlxsw_sp_port_dev_check(lower_dev)) + return netdev_priv(lower_dev); + } + return NULL; +} + +static struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + + mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(dev); + return mlxsw_sp_port ? mlxsw_sp_port->mlxsw_sp : NULL; +} + +static struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev) +{ + struct net_device *lower_dev; + struct list_head *iter; + + if (mlxsw_sp_port_dev_check(dev)) + return netdev_priv(dev); + + netdev_for_each_all_lower_dev_rcu(dev, lower_dev, iter) { + if (mlxsw_sp_port_dev_check(lower_dev)) + return netdev_priv(lower_dev); + } + return NULL; +} + +struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + + rcu_read_lock(); + mlxsw_sp_port = mlxsw_sp_port_dev_lower_find_rcu(dev); + if (mlxsw_sp_port) + dev_hold(mlxsw_sp_port->dev); + rcu_read_unlock(); + return mlxsw_sp_port; +} + +void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port) +{ + dev_put(mlxsw_sp_port->dev); +} + +static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *r, + unsigned long event) +{ + switch (event) { + case NETDEV_UP: + if (!r) + return true; + r->ref_count++; + return false; + case NETDEV_DOWN: + if (r && --r->ref_count == 0) + return true; + /* It is possible we already removed the RIF ourselves + * if it was assigned to a netdev that is now a bridge + * or LAG slave. + */ + return false; + } + + return false; +} + +static int mlxsw_sp_avail_rif_get(struct mlxsw_sp *mlxsw_sp) +{ + int i; + + for (i = 0; i < MLXSW_SP_RIF_MAX; i++) + if (!mlxsw_sp->rifs[i]) + return i; + + return MLXSW_SP_RIF_MAX; +} + +static void mlxsw_sp_vport_rif_sp_attr_get(struct mlxsw_sp_port *mlxsw_sp_vport, + bool *p_lagged, u16 *p_system_port) +{ + u8 local_port = mlxsw_sp_vport->local_port; + + *p_lagged = mlxsw_sp_vport->lagged; + *p_system_port = *p_lagged ? mlxsw_sp_vport->lag_id : local_port; +} + +static int mlxsw_sp_vport_rif_sp_op(struct mlxsw_sp_port *mlxsw_sp_vport, + struct net_device *l3_dev, u16 rif, + bool create) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + bool lagged = mlxsw_sp_vport->lagged; + char ritr_pl[MLXSW_REG_RITR_LEN]; + u16 system_port; + + mlxsw_reg_ritr_pack(ritr_pl, create, MLXSW_REG_RITR_SP_IF, rif, + l3_dev->mtu, l3_dev->dev_addr); + + mlxsw_sp_vport_rif_sp_attr_get(mlxsw_sp_vport, &lagged, &system_port); + mlxsw_reg_ritr_sp_if_pack(ritr_pl, lagged, system_port, + mlxsw_sp_vport_vid_get(mlxsw_sp_vport)); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport); + +static struct mlxsw_sp_fid * +mlxsw_sp_rfid_alloc(u16 fid, struct net_device *l3_dev) +{ + struct mlxsw_sp_fid *f; + + f = kzalloc(sizeof(*f), GFP_KERNEL); + if (!f) + return NULL; + + f->leave = mlxsw_sp_vport_rif_sp_leave; + f->ref_count = 0; + f->dev = l3_dev; + f->fid = fid; + + return f; +} + +static struct mlxsw_sp_rif * +mlxsw_sp_rif_alloc(u16 rif, struct net_device *l3_dev, struct mlxsw_sp_fid *f) +{ + struct mlxsw_sp_rif *r; + + r = kzalloc(sizeof(*r), GFP_KERNEL); + if (!r) + return NULL; + + ether_addr_copy(r->addr, l3_dev->dev_addr); + r->mtu = l3_dev->mtu; + r->ref_count = 1; + r->dev = l3_dev; + r->rif = rif; + r->f = f; + + return r; +} + +static struct mlxsw_sp_rif * +mlxsw_sp_vport_rif_sp_create(struct mlxsw_sp_port *mlxsw_sp_vport, + struct net_device *l3_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + struct mlxsw_sp_fid *f; + struct mlxsw_sp_rif *r; + u16 fid, rif; + int err; + + rif = mlxsw_sp_avail_rif_get(mlxsw_sp); + if (rif == MLXSW_SP_RIF_MAX) + return ERR_PTR(-ERANGE); + + err = mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, true); + if (err) + return ERR_PTR(err); + + fid = mlxsw_sp_rif_sp_to_fid(rif); + err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, true); + if (err) + goto err_rif_fdb_op; + + f = mlxsw_sp_rfid_alloc(fid, l3_dev); + if (!f) { + err = -ENOMEM; + goto err_rfid_alloc; + } + + r = mlxsw_sp_rif_alloc(rif, l3_dev, f); + if (!r) { + err = -ENOMEM; + goto err_rif_alloc; + } + + f->r = r; + mlxsw_sp->rifs[rif] = r; + + return r; + +err_rif_alloc: + kfree(f); +err_rfid_alloc: + mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false); +err_rif_fdb_op: + mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, false); + return ERR_PTR(err); +} + +static void mlxsw_sp_vport_rif_sp_destroy(struct mlxsw_sp_port *mlxsw_sp_vport, + struct mlxsw_sp_rif *r) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + struct net_device *l3_dev = r->dev; + struct mlxsw_sp_fid *f = r->f; + u16 fid = f->fid; + u16 rif = r->rif; + + mlxsw_sp->rifs[rif] = NULL; + f->r = NULL; + + kfree(r); + + kfree(f); + + mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false); + + mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, false); +} + +static int mlxsw_sp_vport_rif_sp_join(struct mlxsw_sp_port *mlxsw_sp_vport, + struct net_device *l3_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + struct mlxsw_sp_rif *r; + + r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); + if (!r) { + r = mlxsw_sp_vport_rif_sp_create(mlxsw_sp_vport, l3_dev); + if (IS_ERR(r)) + return PTR_ERR(r); + } + + mlxsw_sp_vport_fid_set(mlxsw_sp_vport, r->f); + r->f->ref_count++; + + netdev_dbg(mlxsw_sp_vport->dev, "Joined FID=%d\n", r->f->fid); + + return 0; +} + +static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport) +{ + struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); + + netdev_dbg(mlxsw_sp_vport->dev, "Left FID=%d\n", f->fid); + + mlxsw_sp_vport_fid_set(mlxsw_sp_vport, NULL); + if (--f->ref_count == 0) + mlxsw_sp_vport_rif_sp_destroy(mlxsw_sp_vport, f->r); +} + +static int mlxsw_sp_inetaddr_vport_event(struct net_device *l3_dev, + struct net_device *port_dev, + unsigned long event, u16 vid) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev); + struct mlxsw_sp_port *mlxsw_sp_vport; + + mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); + if (WARN_ON(!mlxsw_sp_vport)) + return -EINVAL; + + switch (event) { + case NETDEV_UP: + return mlxsw_sp_vport_rif_sp_join(mlxsw_sp_vport, l3_dev); + case NETDEV_DOWN: + mlxsw_sp_vport_rif_sp_leave(mlxsw_sp_vport); + break; + } + + return 0; +} + +static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev, + unsigned long event) +{ + if (netif_is_bridge_port(port_dev) || netif_is_lag_port(port_dev)) + return 0; + + return mlxsw_sp_inetaddr_vport_event(port_dev, port_dev, event, 1); +} + +static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev, + struct net_device *lag_dev, + unsigned long event, u16 vid) +{ + struct net_device *port_dev; + struct list_head *iter; + int err; + + netdev_for_each_lower_dev(lag_dev, port_dev, iter) { + if (mlxsw_sp_port_dev_check(port_dev)) { + err = mlxsw_sp_inetaddr_vport_event(l3_dev, port_dev, + event, vid); + if (err) + return err; + } + } + + return 0; +} + +static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev, + unsigned long event) +{ + if (netif_is_bridge_port(lag_dev)) + return 0; + + return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1); +} + +static struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev) +{ + u16 fid; + + if (is_vlan_dev(l3_dev)) + fid = vlan_dev_vlan_id(l3_dev); + else if (mlxsw_sp->master_bridge.dev == l3_dev) + fid = 1; + else + return mlxsw_sp_vfid_find(mlxsw_sp, l3_dev); + + return mlxsw_sp_fid_find(mlxsw_sp, fid); +} + +static enum mlxsw_reg_ritr_if_type mlxsw_sp_rif_type_get(u16 fid) +{ + if (mlxsw_sp_fid_is_vfid(fid)) + return MLXSW_REG_RITR_FID_IF; + else + return MLXSW_REG_RITR_VLAN_IF; +} + +static int mlxsw_sp_rif_bridge_op(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev, + u16 fid, u16 rif, + bool create) +{ + enum mlxsw_reg_ritr_if_type rif_type; + char ritr_pl[MLXSW_REG_RITR_LEN]; + + rif_type = mlxsw_sp_rif_type_get(fid); + mlxsw_reg_ritr_pack(ritr_pl, create, rif_type, rif, l3_dev->mtu, + l3_dev->dev_addr); + mlxsw_reg_ritr_fid_set(ritr_pl, rif_type, fid); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev, + struct mlxsw_sp_fid *f) +{ + struct mlxsw_sp_rif *r; + u16 rif; + int err; + + rif = mlxsw_sp_avail_rif_get(mlxsw_sp); + if (rif == MLXSW_SP_RIF_MAX) + return -ERANGE; + + err = mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, true); + if (err) + return err; + + err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, true); + if (err) + goto err_rif_fdb_op; + + r = mlxsw_sp_rif_alloc(rif, l3_dev, f); + if (!r) { + err = -ENOMEM; + goto err_rif_alloc; + } + + f->r = r; + mlxsw_sp->rifs[rif] = r; + + netdev_dbg(l3_dev, "RIF=%d created\n", rif); + + return 0; + +err_rif_alloc: + mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false); +err_rif_fdb_op: + mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false); + return err; +} + +void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *r) +{ + struct net_device *l3_dev = r->dev; + struct mlxsw_sp_fid *f = r->f; + u16 rif = r->rif; + + mlxsw_sp->rifs[rif] = NULL; + f->r = NULL; + + kfree(r); + + mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false); + + mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false); + + netdev_dbg(l3_dev, "RIF=%d destroyed\n", rif); +} + +static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev, + struct net_device *br_dev, + unsigned long event) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev); + struct mlxsw_sp_fid *f; + + /* FID can either be an actual FID if the L3 device is the + * VLAN-aware bridge or a VLAN device on top. Otherwise, the + * L3 device is a VLAN-unaware bridge and we get a vFID. + */ + f = mlxsw_sp_bridge_fid_get(mlxsw_sp, l3_dev); + if (WARN_ON(!f)) + return -EINVAL; + + switch (event) { + case NETDEV_UP: + return mlxsw_sp_rif_bridge_create(mlxsw_sp, l3_dev, f); + case NETDEV_DOWN: + mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r); + break; + } + + return 0; +} + +static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev, + unsigned long event) +{ + struct net_device *real_dev = vlan_dev_real_dev(vlan_dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(vlan_dev); + u16 vid = vlan_dev_vlan_id(vlan_dev); + + if (mlxsw_sp_port_dev_check(real_dev)) + return mlxsw_sp_inetaddr_vport_event(vlan_dev, real_dev, event, + vid); + else if (netif_is_lag_master(real_dev)) + return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event, + vid); + else if (netif_is_bridge_master(real_dev) && + mlxsw_sp->master_bridge.dev == real_dev) + return mlxsw_sp_inetaddr_bridge_event(vlan_dev, real_dev, + event); + + return 0; +} + +static int mlxsw_sp_inetaddr_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct in_ifaddr *ifa = (struct in_ifaddr *) ptr; + struct net_device *dev = ifa->ifa_dev->dev; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif *r; + int err = 0; + + mlxsw_sp = mlxsw_sp_lower_get(dev); + if (!mlxsw_sp) + goto out; + + r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!mlxsw_sp_rif_should_config(r, event)) + goto out; + + if (mlxsw_sp_port_dev_check(dev)) + err = mlxsw_sp_inetaddr_port_event(dev, event); + else if (netif_is_lag_master(dev)) + err = mlxsw_sp_inetaddr_lag_event(dev, event); + else if (netif_is_bridge_master(dev)) + err = mlxsw_sp_inetaddr_bridge_event(dev, dev, event); + else if (is_vlan_dev(dev)) + err = mlxsw_sp_inetaddr_vlan_event(dev, event); + +out: + return notifier_from_errno(err); +} + +static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif, + const char *mac, int mtu) +{ + char ritr_pl[MLXSW_REG_RITR_LEN]; + int err; + + mlxsw_reg_ritr_rif_pack(ritr_pl, rif); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); + if (err) + return err; + + mlxsw_reg_ritr_mtu_set(ritr_pl, mtu); + mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac); + mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static int mlxsw_sp_netdevice_router_port_event(struct net_device *dev) +{ + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif *r; + int err; + + mlxsw_sp = mlxsw_sp_lower_get(dev); + if (!mlxsw_sp) + return 0; + + r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!r) + return 0; + + err = mlxsw_sp_rif_fdb_op(mlxsw_sp, r->addr, r->f->fid, false); + if (err) + return err; + + err = mlxsw_sp_rif_edit(mlxsw_sp, r->rif, dev->dev_addr, dev->mtu); + if (err) + goto err_rif_edit; + + err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, r->f->fid, true); + if (err) + goto err_rif_fdb_op; + + ether_addr_copy(r->addr, dev->dev_addr); + r->mtu = dev->mtu; + + netdev_dbg(dev, "Updated RIF=%d\n", r->rif); + + return 0; + +err_rif_fdb_op: + mlxsw_sp_rif_edit(mlxsw_sp, r->rif, r->addr, r->mtu); +err_rif_edit: + mlxsw_sp_rif_fdb_op(mlxsw_sp, r->addr, r->f->fid, true); + return err; +} + static bool mlxsw_sp_lag_port_fid_member(struct mlxsw_sp_port *lag_port, u16 fid) { @@ -2628,9 +3024,15 @@ int mlxsw_sp_port_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid) return mlxsw_sp_port_fdb_flush_by_port_fid(mlxsw_sp_port, fid); } -static bool mlxsw_sp_port_dev_check(const struct net_device *dev) +static void mlxsw_sp_master_bridge_gone_sync(struct mlxsw_sp *mlxsw_sp) { - return dev->netdev_ops == &mlxsw_sp_port_netdev_ops; + struct mlxsw_sp_fid *f, *tmp; + + list_for_each_entry_safe(f, tmp, &mlxsw_sp->fids, list) + if (--f->ref_count == 0) + mlxsw_sp_fid_destroy(mlxsw_sp, f); + else + WARN_ON_ONCE(1); } static bool mlxsw_sp_master_bridge_check(struct mlxsw_sp *mlxsw_sp, @@ -2649,8 +3051,15 @@ static void mlxsw_sp_master_bridge_inc(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_master_bridge_dec(struct mlxsw_sp *mlxsw_sp) { - if (--mlxsw_sp->master_bridge.ref_count == 0) + if (--mlxsw_sp->master_bridge.ref_count == 0) { mlxsw_sp->master_bridge.dev = NULL; + /* It's possible upper VLAN devices are still holding + * references to underlying FIDs. Drop the reference + * and release the resources if it was the last one. + * If it wasn't, then something bad happened. + */ + mlxsw_sp_master_bridge_gone_sync(mlxsw_sp); + } } static int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, @@ -2810,6 +3219,45 @@ static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp, return -EBUSY; } +static void +mlxsw_sp_port_pvid_vport_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, + u16 lag_id) +{ + struct mlxsw_sp_port *mlxsw_sp_vport; + struct mlxsw_sp_fid *f; + + mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, 1); + if (WARN_ON(!mlxsw_sp_vport)) + return; + + /* If vPort is assigned a RIF, then leave it since it's no + * longer valid. + */ + f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); + if (f) + f->leave(mlxsw_sp_vport); + + mlxsw_sp_vport->lag_id = lag_id; + mlxsw_sp_vport->lagged = 1; +} + +static void +mlxsw_sp_port_pvid_vport_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp_port *mlxsw_sp_vport; + struct mlxsw_sp_fid *f; + + mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, 1); + if (WARN_ON(!mlxsw_sp_vport)) + return; + + f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); + if (f) + f->leave(mlxsw_sp_vport); + + mlxsw_sp_vport->lagged = 0; +} + static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *lag_dev) { @@ -2845,6 +3293,9 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_port->lag_id = lag_id; mlxsw_sp_port->lagged = 1; lag->ref_count++; + + mlxsw_sp_port_pvid_vport_lag_join(mlxsw_sp_port, lag_id); + return 0; err_col_port_enable: @@ -2882,6 +3333,8 @@ static void mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_port->local_port); mlxsw_sp_port->lagged = 0; lag->ref_count--; + + mlxsw_sp_port_pvid_vport_lag_leave(mlxsw_sp_port); } static int mlxsw_sp_lag_dist_port_add(struct mlxsw_sp_port *mlxsw_sp_port, @@ -3075,47 +3528,97 @@ static int mlxsw_sp_netdevice_lag_event(struct net_device *lag_dev, return 0; } -static struct mlxsw_sp_fid * -mlxsw_sp_br_vfid_find(const struct mlxsw_sp *mlxsw_sp, - const struct net_device *br_dev) +static int mlxsw_sp_master_bridge_vlan_link(struct mlxsw_sp *mlxsw_sp, + struct net_device *vlan_dev) { + u16 fid = vlan_dev_vlan_id(vlan_dev); struct mlxsw_sp_fid *f; - list_for_each_entry(f, &mlxsw_sp->br_vfids.list, list) { - if (f->dev == br_dev) - return f; + f = mlxsw_sp_fid_find(mlxsw_sp, fid); + if (!f) { + f = mlxsw_sp_fid_create(mlxsw_sp, fid); + if (IS_ERR(f)) + return PTR_ERR(f); } - return NULL; + f->ref_count++; + + return 0; +} + +static void mlxsw_sp_master_bridge_vlan_unlink(struct mlxsw_sp *mlxsw_sp, + struct net_device *vlan_dev) +{ + u16 fid = vlan_dev_vlan_id(vlan_dev); + struct mlxsw_sp_fid *f; + + f = mlxsw_sp_fid_find(mlxsw_sp, fid); + if (f && f->r) + mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r); + if (f && --f->ref_count == 0) + mlxsw_sp_fid_destroy(mlxsw_sp, f); } -static u16 mlxsw_sp_vfid_to_br_vfid(u16 vfid) +static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev, + unsigned long event, void *ptr) { - return vfid - MLXSW_SP_VFID_PORT_MAX; + struct netdev_notifier_changeupper_info *info; + struct net_device *upper_dev; + struct mlxsw_sp *mlxsw_sp; + int err; + + mlxsw_sp = mlxsw_sp_lower_get(br_dev); + if (!mlxsw_sp) + return 0; + if (br_dev != mlxsw_sp->master_bridge.dev) + return 0; + + info = ptr; + + switch (event) { + case NETDEV_CHANGEUPPER: + upper_dev = info->upper_dev; + if (!is_vlan_dev(upper_dev)) + break; + if (info->linking) { + err = mlxsw_sp_master_bridge_vlan_link(mlxsw_sp, + upper_dev); + if (err) + return err; + } else { + mlxsw_sp_master_bridge_vlan_unlink(mlxsw_sp, upper_dev); + } + break; + } + + return 0; } -static u16 mlxsw_sp_br_vfid_to_vfid(u16 br_vfid) +static u16 mlxsw_sp_avail_vfid_get(const struct mlxsw_sp *mlxsw_sp) { - return MLXSW_SP_VFID_PORT_MAX + br_vfid; + return find_first_zero_bit(mlxsw_sp->vfids.mapped, + MLXSW_SP_VFID_MAX); } -static u16 mlxsw_sp_avail_br_vfid_get(const struct mlxsw_sp *mlxsw_sp) +static int mlxsw_sp_vfid_op(struct mlxsw_sp *mlxsw_sp, u16 fid, bool create) { - return find_first_zero_bit(mlxsw_sp->br_vfids.mapped, - MLXSW_SP_VFID_BR_MAX); + char sfmr_pl[MLXSW_REG_SFMR_LEN]; + + mlxsw_reg_sfmr_pack(sfmr_pl, !create, fid, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); } -static void mlxsw_sp_vport_br_vfid_leave(struct mlxsw_sp_port *mlxsw_sp_vport); +static void mlxsw_sp_vport_vfid_leave(struct mlxsw_sp_port *mlxsw_sp_vport); -static struct mlxsw_sp_fid *mlxsw_sp_br_vfid_create(struct mlxsw_sp *mlxsw_sp, - struct net_device *br_dev) +static struct mlxsw_sp_fid *mlxsw_sp_vfid_create(struct mlxsw_sp *mlxsw_sp, + struct net_device *br_dev) { struct device *dev = mlxsw_sp->bus_info->dev; struct mlxsw_sp_fid *f; u16 vfid, fid; int err; - vfid = mlxsw_sp_br_vfid_to_vfid(mlxsw_sp_avail_br_vfid_get(mlxsw_sp)); + vfid = mlxsw_sp_avail_vfid_get(mlxsw_sp); if (vfid == MLXSW_SP_VFID_MAX) { dev_err(dev, "No available vFIDs\n"); return ERR_PTR(-ERANGE); @@ -3132,12 +3635,12 @@ static struct mlxsw_sp_fid *mlxsw_sp_br_vfid_create(struct mlxsw_sp *mlxsw_sp, if (!f) goto err_allocate_vfid; - f->leave = mlxsw_sp_vport_br_vfid_leave; + f->leave = mlxsw_sp_vport_vfid_leave; f->fid = fid; f->dev = br_dev; - list_add(&f->list, &mlxsw_sp->br_vfids.list); - set_bit(mlxsw_sp_vfid_to_br_vfid(vfid), mlxsw_sp->br_vfids.mapped); + list_add(&f->list, &mlxsw_sp->vfids.list); + set_bit(vfid, mlxsw_sp->vfids.mapped); return f; @@ -3146,29 +3649,42 @@ err_allocate_vfid: return ERR_PTR(-ENOMEM); } -static void mlxsw_sp_br_vfid_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fid *f) +static void mlxsw_sp_vfid_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *f) { u16 vfid = mlxsw_sp_fid_to_vfid(f->fid); - u16 br_vfid = mlxsw_sp_vfid_to_br_vfid(vfid); + u16 fid = f->fid; - clear_bit(br_vfid, mlxsw_sp->br_vfids.mapped); + clear_bit(vfid, mlxsw_sp->vfids.mapped); list_del(&f->list); - mlxsw_sp_vfid_op(mlxsw_sp, f->fid, false); + if (f->r) + mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r); kfree(f); + + mlxsw_sp_vfid_op(mlxsw_sp, fid, false); } -static int mlxsw_sp_vport_br_vfid_join(struct mlxsw_sp_port *mlxsw_sp_vport, - struct net_device *br_dev) +static int mlxsw_sp_vport_fid_map(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid, + bool valid) +{ + enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID; + u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); + + return mlxsw_sp_port_vid_to_fid_set(mlxsw_sp_vport, mt, valid, fid, + vid); +} + +static int mlxsw_sp_vport_vfid_join(struct mlxsw_sp_port *mlxsw_sp_vport, + struct net_device *br_dev) { struct mlxsw_sp_fid *f; int err; - f = mlxsw_sp_br_vfid_find(mlxsw_sp_vport->mlxsw_sp, br_dev); + f = mlxsw_sp_vfid_find(mlxsw_sp_vport->mlxsw_sp, br_dev); if (!f) { - f = mlxsw_sp_br_vfid_create(mlxsw_sp_vport->mlxsw_sp, br_dev); + f = mlxsw_sp_vfid_create(mlxsw_sp_vport->mlxsw_sp, br_dev); if (IS_ERR(f)) return PTR_ERR(f); } @@ -3192,11 +3708,11 @@ err_vport_fid_map: mlxsw_sp_vport_flood_set(mlxsw_sp_vport, f->fid, false); err_vport_flood_set: if (!f->ref_count) - mlxsw_sp_br_vfid_destroy(mlxsw_sp_vport->mlxsw_sp, f); + mlxsw_sp_vfid_destroy(mlxsw_sp_vport->mlxsw_sp, f); return err; } -static void mlxsw_sp_vport_br_vfid_leave(struct mlxsw_sp_port *mlxsw_sp_vport) +static void mlxsw_sp_vport_vfid_leave(struct mlxsw_sp_port *mlxsw_sp_vport) { struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); @@ -3210,22 +3726,24 @@ static void mlxsw_sp_vport_br_vfid_leave(struct mlxsw_sp_port *mlxsw_sp_vport) mlxsw_sp_vport_fid_set(mlxsw_sp_vport, NULL); if (--f->ref_count == 0) - mlxsw_sp_br_vfid_destroy(mlxsw_sp_vport->mlxsw_sp, f); + mlxsw_sp_vfid_destroy(mlxsw_sp_vport->mlxsw_sp, f); } static int mlxsw_sp_vport_bridge_join(struct mlxsw_sp_port *mlxsw_sp_vport, struct net_device *br_dev) { + struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); u16 vid = mlxsw_sp_vport_vid_get(mlxsw_sp_vport); struct net_device *dev = mlxsw_sp_vport->dev; int err; - mlxsw_sp_vport_vfid_leave(mlxsw_sp_vport); + if (f && !WARN_ON(!f->leave)) + f->leave(mlxsw_sp_vport); - err = mlxsw_sp_vport_br_vfid_join(mlxsw_sp_vport, br_dev); + err = mlxsw_sp_vport_vfid_join(mlxsw_sp_vport, br_dev); if (err) { netdev_err(dev, "Failed to join vFID\n"); - goto err_vport_br_vfid_join; + return err; } err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true); @@ -3242,9 +3760,7 @@ static int mlxsw_sp_vport_bridge_join(struct mlxsw_sp_port *mlxsw_sp_vport, return 0; err_port_vid_learning_set: - mlxsw_sp_vport_br_vfid_leave(mlxsw_sp_vport); -err_vport_br_vfid_join: - mlxsw_sp_vport_vfid_join(mlxsw_sp_vport); + mlxsw_sp_vport_vfid_leave(mlxsw_sp_vport); return err; } @@ -3254,12 +3770,7 @@ static void mlxsw_sp_vport_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_vport) mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false); - mlxsw_sp_vport_br_vfid_leave(mlxsw_sp_vport); - - mlxsw_sp_vport_vfid_join(mlxsw_sp_vport); - - mlxsw_sp_port_stp_state_set(mlxsw_sp_vport, vid, - MLXSW_REG_SPMS_STATE_FORWARDING); + mlxsw_sp_vport_vfid_leave(mlxsw_sp_vport); mlxsw_sp_vport->learning = 0; mlxsw_sp_vport->learning_sync = 0; @@ -3275,7 +3786,7 @@ mlxsw_sp_port_master_bridge_check(const struct mlxsw_sp_port *mlxsw_sp_port, list_for_each_entry(mlxsw_sp_vport, &mlxsw_sp_port->vports_list, vport.list) { - struct net_device *dev = mlxsw_sp_vport_br_get(mlxsw_sp_vport); + struct net_device *dev = mlxsw_sp_vport_dev_get(mlxsw_sp_vport); if (dev && dev == br_dev) return false; @@ -3369,10 +3880,14 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused, struct net_device *dev = netdev_notifier_info_to_dev(ptr); int err = 0; - if (mlxsw_sp_port_dev_check(dev)) + if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU) + err = mlxsw_sp_netdevice_router_port_event(dev); + else if (mlxsw_sp_port_dev_check(dev)) err = mlxsw_sp_netdevice_port_event(dev, event, ptr); else if (netif_is_lag_master(dev)) err = mlxsw_sp_netdevice_lag_event(dev, event, ptr); + else if (netif_is_bridge_master(dev)) + err = mlxsw_sp_netdevice_bridge_event(dev, event, ptr); else if (is_vlan_dev(dev)) err = mlxsw_sp_netdevice_vlan_event(dev, event, ptr); @@ -3383,11 +3898,17 @@ static struct notifier_block mlxsw_sp_netdevice_nb __read_mostly = { .notifier_call = mlxsw_sp_netdevice_event, }; +static struct notifier_block mlxsw_sp_inetaddr_nb __read_mostly = { + .notifier_call = mlxsw_sp_inetaddr_event, + .priority = 10, /* Must be called before FIB notifier block */ +}; + static int __init mlxsw_sp_module_init(void) { int err; register_netdevice_notifier(&mlxsw_sp_netdevice_nb); + register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); err = mlxsw_core_driver_register(&mlxsw_sp_driver); if (err) goto err_core_driver_register; @@ -3401,6 +3922,7 @@ err_core_driver_register: static void __exit mlxsw_sp_module_exit(void) { mlxsw_core_driver_unregister(&mlxsw_sp_driver); + unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 36c9835ea20b..ef4ac8987a2a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -39,19 +39,22 @@ #include <linux/types.h> #include <linux/netdevice.h> +#include <linux/rhashtable.h> #include <linux/bitops.h> #include <linux/if_vlan.h> #include <linux/list.h> #include <linux/dcbnl.h> +#include <linux/in6.h> #include <net/switchdev.h> #include "port.h" #include "core.h" #define MLXSW_SP_VFID_BASE VLAN_N_VID -#define MLXSW_SP_VFID_PORT_MAX 512 /* Non-bridged VLAN interfaces */ -#define MLXSW_SP_VFID_BR_MAX 6144 /* Bridged VLAN interfaces */ -#define MLXSW_SP_VFID_MAX (MLXSW_SP_VFID_PORT_MAX + MLXSW_SP_VFID_BR_MAX) +#define MLXSW_SP_VFID_MAX 6656 /* Bridged VLAN interfaces */ + +#define MLXSW_SP_RFID_BASE 15360 +#define MLXSW_SP_RIF_MAX 800 #define MLXSW_SP_LAG_MAX 64 #define MLXSW_SP_PORT_PER_LAG_MAX 16 @@ -60,6 +63,12 @@ #define MLXSW_SP_PORTS_PER_CLUSTER_MAX 4 +#define MLXSW_SP_LPM_TREE_MIN 2 /* trees 0 and 1 are reserved */ +#define MLXSW_SP_LPM_TREE_MAX 22 +#define MLXSW_SP_LPM_TREE_COUNT (MLXSW_SP_LPM_TREE_MAX - MLXSW_SP_LPM_TREE_MIN) + +#define MLXSW_SP_VIRTUAL_ROUTER_MAX 256 + #define MLXSW_SP_PORT_BASE_SPEED 25000 /* Mb/s */ #define MLXSW_SP_BYTES_PER_CELL 96 @@ -67,6 +76,10 @@ #define MLXSW_SP_BYTES_TO_CELLS(b) DIV_ROUND_UP(b, MLXSW_SP_BYTES_PER_CELL) #define MLXSW_SP_CELLS_TO_BYTES(c) (c * MLXSW_SP_BYTES_PER_CELL) +#define MLXSW_SP_KVD_LINEAR_SIZE 65536 /* entries */ +#define MLXSW_SP_KVD_HASH_SINGLE_SIZE 163840 /* entries */ +#define MLXSW_SP_KVD_HASH_DOUBLE_SIZE 32768 /* entries */ + /* Maximum delay buffer needed in case of PAUSE frames, in cells. * Assumes 100m cable and maximum MTU. */ @@ -92,8 +105,17 @@ struct mlxsw_sp_fid { struct list_head list; unsigned int ref_count; struct net_device *dev; + struct mlxsw_sp_rif *r; u16 fid; - u16 vid; +}; + +struct mlxsw_sp_rif { + struct net_device *dev; + unsigned int ref_count; + struct mlxsw_sp_fid *f; + unsigned char addr[ETH_ALEN]; + int mtu; + u16 rif; }; struct mlxsw_sp_mid { @@ -116,7 +138,17 @@ static inline u16 mlxsw_sp_fid_to_vfid(u16 fid) static inline bool mlxsw_sp_fid_is_vfid(u16 fid) { - return fid >= MLXSW_SP_VFID_BASE; + return fid >= MLXSW_SP_VFID_BASE && fid < MLXSW_SP_RFID_BASE; +} + +static inline bool mlxsw_sp_fid_is_rfid(u16 fid) +{ + return fid >= MLXSW_SP_RFID_BASE; +} + +static inline u16 mlxsw_sp_rif_sp_to_fid(u16 rif) +{ + return MLXSW_SP_RFID_BASE + rif; } struct mlxsw_sp_sb_pr { @@ -153,20 +185,60 @@ struct mlxsw_sp_sb { } ports[MLXSW_PORT_MAX_PORTS]; }; -struct mlxsw_sp { +#define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE) + +struct mlxsw_sp_prefix_usage { + DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT); +}; + +enum mlxsw_sp_l3proto { + MLXSW_SP_L3_PROTO_IPV4, + MLXSW_SP_L3_PROTO_IPV6, +}; + +struct mlxsw_sp_lpm_tree { + u8 id; /* tree ID */ + unsigned int ref_count; + enum mlxsw_sp_l3proto proto; + struct mlxsw_sp_prefix_usage prefix_usage; +}; + +struct mlxsw_sp_fib; + +struct mlxsw_sp_vr { + u16 id; /* virtual router ID */ + bool used; + enum mlxsw_sp_l3proto proto; + u32 tb_id; /* kernel fib table id */ + struct mlxsw_sp_lpm_tree *lpm_tree; + struct mlxsw_sp_fib *fib; +}; + +struct mlxsw_sp_router { + struct mlxsw_sp_lpm_tree lpm_trees[MLXSW_SP_LPM_TREE_COUNT]; + struct mlxsw_sp_vr vrs[MLXSW_SP_VIRTUAL_ROUTER_MAX]; + struct rhashtable neigh_ht; struct { - struct list_head list; - DECLARE_BITMAP(mapped, MLXSW_SP_VFID_PORT_MAX); - } port_vfids; + struct delayed_work dw; + unsigned long interval; /* ms */ + } neighs_update; + struct delayed_work nexthop_probe_dw; +#define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */ + struct list_head nexthop_group_list; + struct list_head nexthop_neighs_list; +}; + +struct mlxsw_sp { struct { struct list_head list; - DECLARE_BITMAP(mapped, MLXSW_SP_VFID_BR_MAX); - } br_vfids; + DECLARE_BITMAP(mapped, MLXSW_SP_VFID_MAX); + } vfids; struct { struct list_head list; DECLARE_BITMAP(mapped, MLXSW_SP_MID_MAX); } br_mids; struct list_head fids; /* VLAN-aware bridge FIDs */ + struct mlxsw_sp_rif *rifs[MLXSW_SP_RIF_MAX]; struct mlxsw_sp_port **ports; struct mlxsw_core *core; const struct mlxsw_bus_info *bus_info; @@ -184,6 +256,10 @@ struct mlxsw_sp { struct mlxsw_sp_upper lags[MLXSW_SP_LAG_MAX]; u8 port_to_module[MLXSW_PORT_MAX_PORTS]; struct mlxsw_sp_sb sb; + struct mlxsw_sp_router router; + struct { + DECLARE_BITMAP(usage, MLXSW_SP_KVD_LINEAR_SIZE); + } kvdl; }; static inline struct mlxsw_sp_upper * @@ -242,6 +318,9 @@ struct mlxsw_sp_port { struct list_head vports_list; }; +struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev); +void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port); + static inline bool mlxsw_sp_port_is_pause_en(const struct mlxsw_sp_port *mlxsw_sp_port) { @@ -287,7 +366,7 @@ mlxsw_sp_vport_fid_get(const struct mlxsw_sp_port *mlxsw_sp_vport) } static inline struct net_device * -mlxsw_sp_vport_br_get(const struct mlxsw_sp_port *mlxsw_sp_vport) +mlxsw_sp_vport_dev_get(const struct mlxsw_sp_port *mlxsw_sp_vport) { struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); @@ -325,6 +404,44 @@ mlxsw_sp_port_vport_find_by_fid(const struct mlxsw_sp_port *mlxsw_sp_port, return NULL; } +static inline struct mlxsw_sp_fid *mlxsw_sp_fid_find(struct mlxsw_sp *mlxsw_sp, + u16 fid) +{ + struct mlxsw_sp_fid *f; + + list_for_each_entry(f, &mlxsw_sp->fids, list) + if (f->fid == fid) + return f; + + return NULL; +} + +static inline struct mlxsw_sp_fid * +mlxsw_sp_vfid_find(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev) +{ + struct mlxsw_sp_fid *f; + + list_for_each_entry(f, &mlxsw_sp->vfids.list, list) + if (f->dev == br_dev) + return f; + + return NULL; +} + +static inline struct mlxsw_sp_rif * +mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev) +{ + int i; + + for (i = 0; i < MLXSW_SP_RIF_MAX; i++) + if (mlxsw_sp->rifs[i] && mlxsw_sp->rifs[i]->dev == dev) + return mlxsw_sp->rifs[i]; + + return NULL; +} + enum mlxsw_sp_flood_table { MLXSW_SP_FLOOD_TABLE_UC, MLXSW_SP_FLOOD_TABLE_BM, @@ -377,13 +494,17 @@ int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, u16 vid_end, bool is_member, bool untagged); int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, u16 vid); -int mlxsw_sp_port_kill_vid(struct net_device *dev, - __be16 __always_unused proto, u16 vid); int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid, bool set); void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port); int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid); int mlxsw_sp_port_fdb_flush(struct mlxsw_sp_port *mlxsw_sp_port, u16 fid); +int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid, + bool adding); +struct mlxsw_sp_fid *mlxsw_sp_fid_create(struct mlxsw_sp *mlxsw_sp, u16 fid); +void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *f); +void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *r); int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index, bool dwrr, u8 dwrr_weight); @@ -413,4 +534,19 @@ static inline void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port) #endif +int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp); +int mlxsw_sp_router_fib4_add(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_ipv4_fib *fib4, + struct switchdev_trans *trans); +int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_ipv4_fib *fib4); +int mlxsw_sp_router_neigh_construct(struct net_device *dev, + struct neighbour *n); +void mlxsw_sp_router_neigh_destroy(struct net_device *dev, + struct neighbour *n); + +int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count); +void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index); + #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c new file mode 100644 index 000000000000..ac321e8e5c1a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c @@ -0,0 +1,91 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c + * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016 Jiri Pirko <[email protected]> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/bitops.h> + +#include "spectrum.h" + +#define MLXSW_SP_KVDL_SINGLE_BASE 0 +#define MLXSW_SP_KVDL_SINGLE_SIZE 16384 +#define MLXSW_SP_KVDL_CHUNKS_BASE \ + (MLXSW_SP_KVDL_SINGLE_BASE + MLXSW_SP_KVDL_SINGLE_SIZE) +#define MLXSW_SP_KVDL_CHUNKS_SIZE \ + (MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP_KVDL_CHUNKS_BASE) +#define MLXSW_SP_CHUNK_MAX 32 + +int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count) +{ + int entry_index; + int size; + int type_base; + int type_size; + int type_entries; + + if (entry_count == 0 || entry_count > MLXSW_SP_CHUNK_MAX) { + return -EINVAL; + } else if (entry_count == 1) { + type_base = MLXSW_SP_KVDL_SINGLE_BASE; + type_size = MLXSW_SP_KVDL_SINGLE_SIZE; + type_entries = 1; + } else { + type_base = MLXSW_SP_KVDL_CHUNKS_BASE; + type_size = MLXSW_SP_KVDL_CHUNKS_SIZE; + type_entries = MLXSW_SP_CHUNK_MAX; + } + + entry_index = type_base; + size = type_base + type_size; + for_each_clear_bit_from(entry_index, mlxsw_sp->kvdl.usage, size) { + int i; + + for (i = 0; i < type_entries; i++) + set_bit(entry_index + i, mlxsw_sp->kvdl.usage); + return entry_index; + } + return -ENOBUFS; +} + +void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index) +{ + int type_entries; + int i; + + if (entry_index < MLXSW_SP_KVDL_CHUNKS_BASE) + type_entries = 1; + else + type_entries = MLXSW_SP_CHUNK_MAX; + for (i = 0; i < type_entries; i++) + clear_bit(entry_index + i, mlxsw_sp->kvdl.usage); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c new file mode 100644 index 000000000000..e084ea5448ac --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -0,0 +1,1814 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c + * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016 Jiri Pirko <[email protected]> + * Copyright (c) 2016 Ido Schimmel <[email protected]> + * Copyright (c) 2016 Yotam Gigi <[email protected]> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/rhashtable.h> +#include <linux/bitops.h> +#include <linux/in6.h> +#include <linux/notifier.h> +#include <net/netevent.h> +#include <net/neighbour.h> +#include <net/arp.h> + +#include "spectrum.h" +#include "core.h" +#include "reg.h" + +#define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \ + for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT) + +static bool +mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1, + struct mlxsw_sp_prefix_usage *prefix_usage2) +{ + unsigned char prefix; + + mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) { + if (!test_bit(prefix, prefix_usage2->b)) + return false; + } + return true; +} + +static bool +mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1, + struct mlxsw_sp_prefix_usage *prefix_usage2) +{ + return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1)); +} + +static bool +mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage) +{ + struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } }; + + return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none); +} + +static void +mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1, + struct mlxsw_sp_prefix_usage *prefix_usage2) +{ + memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1)); +} + +static void +mlxsw_sp_prefix_usage_zero(struct mlxsw_sp_prefix_usage *prefix_usage) +{ + memset(prefix_usage, 0, sizeof(*prefix_usage)); +} + +static void +mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage, + unsigned char prefix_len) +{ + set_bit(prefix_len, prefix_usage->b); +} + +static void +mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage, + unsigned char prefix_len) +{ + clear_bit(prefix_len, prefix_usage->b); +} + +struct mlxsw_sp_fib_key { + unsigned char addr[sizeof(struct in6_addr)]; + unsigned char prefix_len; +}; + +enum mlxsw_sp_fib_entry_type { + MLXSW_SP_FIB_ENTRY_TYPE_REMOTE, + MLXSW_SP_FIB_ENTRY_TYPE_LOCAL, + MLXSW_SP_FIB_ENTRY_TYPE_TRAP, +}; + +struct mlxsw_sp_nexthop_group; + +struct mlxsw_sp_fib_entry { + struct rhash_head ht_node; + struct mlxsw_sp_fib_key key; + enum mlxsw_sp_fib_entry_type type; + u8 added:1; + u16 rif; /* used for action local */ + struct mlxsw_sp_vr *vr; + struct list_head nexthop_group_node; + struct mlxsw_sp_nexthop_group *nh_group; +}; + +struct mlxsw_sp_fib { + struct rhashtable ht; + unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT]; + struct mlxsw_sp_prefix_usage prefix_usage; +}; + +static const struct rhashtable_params mlxsw_sp_fib_ht_params = { + .key_offset = offsetof(struct mlxsw_sp_fib_entry, key), + .head_offset = offsetof(struct mlxsw_sp_fib_entry, ht_node), + .key_len = sizeof(struct mlxsw_sp_fib_key), + .automatic_shrinking = true, +}; + +static int mlxsw_sp_fib_entry_insert(struct mlxsw_sp_fib *fib, + struct mlxsw_sp_fib_entry *fib_entry) +{ + unsigned char prefix_len = fib_entry->key.prefix_len; + int err; + + err = rhashtable_insert_fast(&fib->ht, &fib_entry->ht_node, + mlxsw_sp_fib_ht_params); + if (err) + return err; + if (fib->prefix_ref_count[prefix_len]++ == 0) + mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len); + return 0; +} + +static void mlxsw_sp_fib_entry_remove(struct mlxsw_sp_fib *fib, + struct mlxsw_sp_fib_entry *fib_entry) +{ + unsigned char prefix_len = fib_entry->key.prefix_len; + + if (--fib->prefix_ref_count[prefix_len] == 0) + mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len); + rhashtable_remove_fast(&fib->ht, &fib_entry->ht_node, + mlxsw_sp_fib_ht_params); +} + +static struct mlxsw_sp_fib_entry * +mlxsw_sp_fib_entry_create(struct mlxsw_sp_fib *fib, const void *addr, + size_t addr_len, unsigned char prefix_len) +{ + struct mlxsw_sp_fib_entry *fib_entry; + + fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL); + if (!fib_entry) + return NULL; + memcpy(fib_entry->key.addr, addr, addr_len); + fib_entry->key.prefix_len = prefix_len; + return fib_entry; +} + +static void mlxsw_sp_fib_entry_destroy(struct mlxsw_sp_fib_entry *fib_entry) +{ + kfree(fib_entry); +} + +static struct mlxsw_sp_fib_entry * +mlxsw_sp_fib_entry_lookup(struct mlxsw_sp_fib *fib, const void *addr, + size_t addr_len, unsigned char prefix_len) +{ + struct mlxsw_sp_fib_key key = {{ 0 } }; + + memcpy(key.addr, addr, addr_len); + key.prefix_len = prefix_len; + return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params); +} + +static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void) +{ + struct mlxsw_sp_fib *fib; + int err; + + fib = kzalloc(sizeof(*fib), GFP_KERNEL); + if (!fib) + return ERR_PTR(-ENOMEM); + err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params); + if (err) + goto err_rhashtable_init; + return fib; + +err_rhashtable_init: + kfree(fib); + return ERR_PTR(err); +} + +static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib) +{ + rhashtable_destroy(&fib->ht); + kfree(fib); +} + +static struct mlxsw_sp_lpm_tree * +mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp, bool one_reserved) +{ + static struct mlxsw_sp_lpm_tree *lpm_tree; + int i; + + for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { + lpm_tree = &mlxsw_sp->router.lpm_trees[i]; + if (lpm_tree->ref_count == 0) { + if (one_reserved) + one_reserved = false; + else + return lpm_tree; + } + } + return NULL; +} + +static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_lpm_tree *lpm_tree) +{ + char ralta_pl[MLXSW_REG_RALTA_LEN]; + + mlxsw_reg_ralta_pack(ralta_pl, true, lpm_tree->proto, lpm_tree->id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); +} + +static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_lpm_tree *lpm_tree) +{ + char ralta_pl[MLXSW_REG_RALTA_LEN]; + + mlxsw_reg_ralta_pack(ralta_pl, false, lpm_tree->proto, lpm_tree->id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); +} + +static int +mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_prefix_usage *prefix_usage, + struct mlxsw_sp_lpm_tree *lpm_tree) +{ + char ralst_pl[MLXSW_REG_RALST_LEN]; + u8 root_bin = 0; + u8 prefix; + u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD; + + mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) + root_bin = prefix; + + mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id); + mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) { + if (prefix == 0) + continue; + mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix, + MLXSW_REG_RALST_BIN_NO_CHILD); + last_prefix = prefix; + } + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl); +} + +static struct mlxsw_sp_lpm_tree * +mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_prefix_usage *prefix_usage, + enum mlxsw_sp_l3proto proto, bool one_reserved) +{ + struct mlxsw_sp_lpm_tree *lpm_tree; + int err; + + lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp, one_reserved); + if (!lpm_tree) + return ERR_PTR(-EBUSY); + lpm_tree->proto = proto; + err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree); + if (err) + return ERR_PTR(err); + + err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage, + lpm_tree); + if (err) + goto err_left_struct_set; + return lpm_tree; + +err_left_struct_set: + mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); + return ERR_PTR(err); +} + +static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_lpm_tree *lpm_tree) +{ + return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); +} + +static struct mlxsw_sp_lpm_tree * +mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_prefix_usage *prefix_usage, + enum mlxsw_sp_l3proto proto, bool one_reserved) +{ + struct mlxsw_sp_lpm_tree *lpm_tree; + int i; + + for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { + lpm_tree = &mlxsw_sp->router.lpm_trees[i]; + if (lpm_tree->proto == proto && + mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage, + prefix_usage)) + goto inc_ref_count; + } + lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, + proto, one_reserved); + if (IS_ERR(lpm_tree)) + return lpm_tree; + +inc_ref_count: + lpm_tree->ref_count++; + return lpm_tree; +} + +static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_lpm_tree *lpm_tree) +{ + if (--lpm_tree->ref_count == 0) + return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree); + return 0; +} + +static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_lpm_tree *lpm_tree; + int i; + + for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { + lpm_tree = &mlxsw_sp->router.lpm_trees[i]; + lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN; + } +} + +static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_vr *vr; + int i; + + for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) { + vr = &mlxsw_sp->router.vrs[i]; + if (!vr->used) + return vr; + } + return NULL; +} + +static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_vr *vr) +{ + char raltb_pl[MLXSW_REG_RALTB_LEN]; + + mlxsw_reg_raltb_pack(raltb_pl, vr->id, vr->proto, vr->lpm_tree->id); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); +} + +static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_vr *vr) +{ + char raltb_pl[MLXSW_REG_RALTB_LEN]; + + /* Bind to tree 0 which is default */ + mlxsw_reg_raltb_pack(raltb_pl, vr->id, vr->proto, 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); +} + +static u32 mlxsw_sp_fix_tb_id(u32 tb_id) +{ + /* For our purpose, squash main and local table into one */ + if (tb_id == RT_TABLE_LOCAL) + tb_id = RT_TABLE_MAIN; + return tb_id; +} + +static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp, + u32 tb_id, + enum mlxsw_sp_l3proto proto) +{ + struct mlxsw_sp_vr *vr; + int i; + + tb_id = mlxsw_sp_fix_tb_id(tb_id); + for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) { + vr = &mlxsw_sp->router.vrs[i]; + if (vr->used && vr->proto == proto && vr->tb_id == tb_id) + return vr; + } + return NULL; +} + +static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, + unsigned char prefix_len, + u32 tb_id, + enum mlxsw_sp_l3proto proto) +{ + struct mlxsw_sp_prefix_usage req_prefix_usage; + struct mlxsw_sp_lpm_tree *lpm_tree; + struct mlxsw_sp_vr *vr; + int err; + + vr = mlxsw_sp_vr_find_unused(mlxsw_sp); + if (!vr) + return ERR_PTR(-EBUSY); + vr->fib = mlxsw_sp_fib_create(); + if (IS_ERR(vr->fib)) + return ERR_CAST(vr->fib); + + vr->proto = proto; + vr->tb_id = tb_id; + mlxsw_sp_prefix_usage_zero(&req_prefix_usage); + mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len); + lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, + proto, true); + if (IS_ERR(lpm_tree)) { + err = PTR_ERR(lpm_tree); + goto err_tree_get; + } + vr->lpm_tree = lpm_tree; + err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr); + if (err) + goto err_tree_bind; + + vr->used = true; + return vr; + +err_tree_bind: + mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); +err_tree_get: + mlxsw_sp_fib_destroy(vr->fib); + + return ERR_PTR(err); +} + +static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_vr *vr) +{ + mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr); + mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); + mlxsw_sp_fib_destroy(vr->fib); + vr->used = false; +} + +static int +mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, + struct mlxsw_sp_prefix_usage *req_prefix_usage) +{ + struct mlxsw_sp_lpm_tree *lpm_tree; + + if (mlxsw_sp_prefix_usage_eq(req_prefix_usage, + &vr->lpm_tree->prefix_usage)) + return 0; + + lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage, + vr->proto, false); + if (IS_ERR(lpm_tree)) { + /* We failed to get a tree according to the required + * prefix usage. However, the current tree might be still good + * for us if our requirement is subset of the prefixes used + * in the tree. + */ + if (mlxsw_sp_prefix_usage_subset(req_prefix_usage, + &vr->lpm_tree->prefix_usage)) + return 0; + return PTR_ERR(lpm_tree); + } + + mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr); + mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); + vr->lpm_tree = lpm_tree; + return mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr); +} + +static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, + unsigned char prefix_len, + u32 tb_id, + enum mlxsw_sp_l3proto proto) +{ + struct mlxsw_sp_vr *vr; + int err; + + tb_id = mlxsw_sp_fix_tb_id(tb_id); + vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id, proto); + if (!vr) { + vr = mlxsw_sp_vr_create(mlxsw_sp, prefix_len, tb_id, proto); + if (IS_ERR(vr)) + return vr; + } else { + struct mlxsw_sp_prefix_usage req_prefix_usage; + + mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, + &vr->fib->prefix_usage); + mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len); + /* Need to replace LPM tree in case new prefix is required. */ + err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr, + &req_prefix_usage); + if (err) + return ERR_PTR(err); + } + return vr; +} + +static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr) +{ + /* Destroy virtual router entity in case the associated FIB is empty + * and allow it to be used for other tables in future. Otherwise, + * check if some prefix usage did not disappear and change tree if + * that is the case. Note that in case new, smaller tree cannot be + * allocated, the original one will be kept being used. + */ + if (mlxsw_sp_prefix_usage_none(&vr->fib->prefix_usage)) + mlxsw_sp_vr_destroy(mlxsw_sp, vr); + else + mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr, + &vr->fib->prefix_usage); +} + +static void mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_vr *vr; + int i; + + for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) { + vr = &mlxsw_sp->router.vrs[i]; + vr->id = i; + } +} + +struct mlxsw_sp_neigh_key { + unsigned char addr[sizeof(struct in6_addr)]; + struct net_device *dev; +}; + +struct mlxsw_sp_neigh_entry { + struct rhash_head ht_node; + struct mlxsw_sp_neigh_key key; + u16 rif; + struct neighbour *n; + bool offloaded; + struct delayed_work dw; + struct mlxsw_sp_port *mlxsw_sp_port; + unsigned char ha[ETH_ALEN]; + struct list_head nexthop_list; /* list of nexthops using + * this neigh entry + */ + struct list_head nexthop_neighs_list_node; +}; + +static const struct rhashtable_params mlxsw_sp_neigh_ht_params = { + .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key), + .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node), + .key_len = sizeof(struct mlxsw_sp_neigh_key), +}; + +static int +mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht, + &neigh_entry->ht_node, + mlxsw_sp_neigh_ht_params); +} + +static void +mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht, + &neigh_entry->ht_node, + mlxsw_sp_neigh_ht_params); +} + +static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work); + +static struct mlxsw_sp_neigh_entry * +mlxsw_sp_neigh_entry_create(const void *addr, size_t addr_len, + struct net_device *dev, u16 rif, + struct neighbour *n) +{ + struct mlxsw_sp_neigh_entry *neigh_entry; + + neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_ATOMIC); + if (!neigh_entry) + return NULL; + memcpy(neigh_entry->key.addr, addr, addr_len); + neigh_entry->key.dev = dev; + neigh_entry->rif = rif; + neigh_entry->n = n; + INIT_DELAYED_WORK(&neigh_entry->dw, mlxsw_sp_router_neigh_update_hw); + INIT_LIST_HEAD(&neigh_entry->nexthop_list); + return neigh_entry; +} + +static void +mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp_neigh_entry *neigh_entry) +{ + kfree(neigh_entry); +} + +static struct mlxsw_sp_neigh_entry * +mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, const void *addr, + size_t addr_len, struct net_device *dev) +{ + struct mlxsw_sp_neigh_key key = {{ 0 } }; + + memcpy(key.addr, addr, addr_len); + key.dev = dev; + return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht, + &key, mlxsw_sp_neigh_ht_params); +} + +int mlxsw_sp_router_neigh_construct(struct net_device *dev, + struct neighbour *n) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_neigh_entry *neigh_entry; + struct mlxsw_sp_rif *r; + u32 dip; + int err; + + if (n->tbl != &arp_tbl) + return 0; + + dip = ntohl(*((__be32 *) n->primary_key)); + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip), + n->dev); + if (neigh_entry) { + WARN_ON(neigh_entry->n != n); + return 0; + } + + r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (WARN_ON(!r)) + return -EINVAL; + + neigh_entry = mlxsw_sp_neigh_entry_create(&dip, sizeof(dip), n->dev, + r->rif, n); + if (!neigh_entry) + return -ENOMEM; + err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry); + if (err) + goto err_neigh_entry_insert; + return 0; + +err_neigh_entry_insert: + mlxsw_sp_neigh_entry_destroy(neigh_entry); + return err; +} + +void mlxsw_sp_router_neigh_destroy(struct net_device *dev, + struct neighbour *n) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_neigh_entry *neigh_entry; + u32 dip; + + if (n->tbl != &arp_tbl) + return; + + dip = ntohl(*((__be32 *) n->primary_key)); + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip), + n->dev); + if (!neigh_entry) + return; + mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry); + mlxsw_sp_neigh_entry_destroy(neigh_entry); +} + +static void +mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp) +{ + unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); + + mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval); +} + +static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + int ent_index) +{ + struct net_device *dev; + struct neighbour *n; + __be32 dipn; + u32 dip; + u16 rif; + + mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip); + + if (!mlxsw_sp->rifs[rif]) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n"); + return; + } + + dipn = htonl(dip); + dev = mlxsw_sp->rifs[rif]->dev; + n = neigh_lookup(&arp_tbl, &dipn, dev); + if (!n) { + netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n", + &dip); + return; + } + + netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip); + neigh_event_send(n, NULL); + neigh_release(n); +} + +static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + int rec_index) +{ + u8 num_entries; + int i; + + num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl, + rec_index); + /* Hardware starts counting at 0, so add 1. */ + num_entries++; + + /* Each record consists of several neighbour entries. */ + for (i = 0; i < num_entries; i++) { + int ent_index; + + ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i; + mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl, + ent_index); + } + +} + +static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, int rec_index) +{ + switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) { + case MLXSW_REG_RAUHTD_TYPE_IPV4: + mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl, + rec_index); + break; + case MLXSW_REG_RAUHTD_TYPE_IPV6: + WARN_ON_ONCE(1); + break; + } +} + +static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) +{ + char *rauhtd_pl; + u8 num_rec; + int i, err; + + rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL); + if (!rauhtd_pl) + return -ENOMEM; + + /* Make sure the neighbour's netdev isn't removed in the + * process. + */ + rtnl_lock(); + do { + mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd), + rauhtd_pl); + if (err) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n"); + break; + } + num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl); + for (i = 0; i < num_rec; i++) + mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl, + i); + } while (num_rec); + rtnl_unlock(); + + kfree(rauhtd_pl); + return err; +} + +static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_neigh_entry *neigh_entry; + + /* Take RTNL mutex here to prevent lists from changes */ + rtnl_lock(); + list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list, + nexthop_neighs_list_node) { + /* If this neigh have nexthops, make the kernel think this neigh + * is active regardless of the traffic. + */ + if (!list_empty(&neigh_entry->nexthop_list)) + neigh_event_send(neigh_entry->n, NULL); + } + rtnl_unlock(); +} + +static void +mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp) +{ + unsigned long interval = mlxsw_sp->router.neighs_update.interval; + + mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, + msecs_to_jiffies(interval)); +} + +static void mlxsw_sp_router_neighs_update_work(struct work_struct *work) +{ + struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp, + router.neighs_update.dw.work); + int err; + + err = mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp); + if (err) + dev_err(mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity"); + + mlxsw_sp_router_neighs_update_nh(mlxsw_sp); + + mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp); +} + +static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work) +{ + struct mlxsw_sp_neigh_entry *neigh_entry; + struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp, + router.nexthop_probe_dw.work); + + /* Iterate over nexthop neighbours, find those who are unresolved and + * send arp on them. This solves the chicken-egg problem when + * the nexthop wouldn't get offloaded until the neighbor is resolved + * but it wouldn't get resolved ever in case traffic is flowing in HW + * using different nexthop. + * + * Take RTNL mutex here to prevent lists from changes. + */ + rtnl_lock(); + list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list, + nexthop_neighs_list_node) { + if (!(neigh_entry->n->nud_state & NUD_VALID) && + !list_empty(&neigh_entry->nexthop_list)) + neigh_event_send(neigh_entry->n, NULL); + } + rtnl_unlock(); + + mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, + MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL); +} + +static void +mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + bool removing); + +static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work) +{ + struct mlxsw_sp_neigh_entry *neigh_entry = + container_of(work, struct mlxsw_sp_neigh_entry, dw.work); + struct neighbour *n = neigh_entry->n; + struct mlxsw_sp_port *mlxsw_sp_port = neigh_entry->mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char rauht_pl[MLXSW_REG_RAUHT_LEN]; + struct net_device *dev; + bool entry_connected; + u8 nud_state; + bool updating; + bool removing; + bool adding; + u32 dip; + int err; + + read_lock_bh(&n->lock); + dip = ntohl(*((__be32 *) n->primary_key)); + memcpy(neigh_entry->ha, n->ha, sizeof(neigh_entry->ha)); + nud_state = n->nud_state; + dev = n->dev; + read_unlock_bh(&n->lock); + + entry_connected = nud_state & NUD_VALID; + adding = (!neigh_entry->offloaded) && entry_connected; + updating = neigh_entry->offloaded && entry_connected; + removing = neigh_entry->offloaded && !entry_connected; + + if (adding || updating) { + mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_ADD, + neigh_entry->rif, + neigh_entry->ha, dip); + err = mlxsw_reg_write(mlxsw_sp->core, + MLXSW_REG(rauht), rauht_pl); + if (err) { + netdev_err(dev, "Could not add neigh %pI4h\n", &dip); + neigh_entry->offloaded = false; + } else { + neigh_entry->offloaded = true; + } + mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, false); + } else if (removing) { + mlxsw_reg_rauht_pack4(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE, + neigh_entry->rif, + neigh_entry->ha, dip); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), + rauht_pl); + if (err) { + netdev_err(dev, "Could not delete neigh %pI4h\n", &dip); + neigh_entry->offloaded = true; + } else { + neigh_entry->offloaded = false; + } + mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, true); + } + + neigh_release(n); + mlxsw_sp_port_dev_put(mlxsw_sp_port); +} + +static int mlxsw_sp_router_netevent_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct mlxsw_sp_neigh_entry *neigh_entry; + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp *mlxsw_sp; + unsigned long interval; + struct net_device *dev; + struct neigh_parms *p; + struct neighbour *n; + u32 dip; + + switch (event) { + case NETEVENT_DELAY_PROBE_TIME_UPDATE: + p = ptr; + + /* We don't care about changes in the default table. */ + if (!p->dev || p->tbl != &arp_tbl) + return NOTIFY_DONE; + + /* We are in atomic context and can't take RTNL mutex, + * so use RCU variant to walk the device chain. + */ + mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev); + if (!mlxsw_sp_port) + return NOTIFY_DONE; + + mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME)); + mlxsw_sp->router.neighs_update.interval = interval; + + mlxsw_sp_port_dev_put(mlxsw_sp_port); + break; + case NETEVENT_NEIGH_UPDATE: + n = ptr; + dev = n->dev; + + if (n->tbl != &arp_tbl) + return NOTIFY_DONE; + + mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(dev); + if (!mlxsw_sp_port) + return NOTIFY_DONE; + + mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + dip = ntohl(*((__be32 *) n->primary_key)); + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, + &dip, + sizeof(__be32), + dev); + if (WARN_ON(!neigh_entry) || WARN_ON(neigh_entry->n != n)) { + mlxsw_sp_port_dev_put(mlxsw_sp_port); + return NOTIFY_DONE; + } + neigh_entry->mlxsw_sp_port = mlxsw_sp_port; + + /* Take a reference to ensure the neighbour won't be + * destructed until we drop the reference in delayed + * work. + */ + neigh_clone(n); + if (!mlxsw_core_schedule_dw(&neigh_entry->dw, 0)) { + neigh_release(n); + mlxsw_sp_port_dev_put(mlxsw_sp_port); + } + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = { + .notifier_call = mlxsw_sp_router_netevent_event, +}; + +static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + err = rhashtable_init(&mlxsw_sp->router.neigh_ht, + &mlxsw_sp_neigh_ht_params); + if (err) + return err; + + /* Initialize the polling interval according to the default + * table. + */ + mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp); + + err = register_netevent_notifier(&mlxsw_sp_router_netevent_nb); + if (err) + goto err_register_netevent_notifier; + + /* Create the delayed works for the activity_update */ + INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw, + mlxsw_sp_router_neighs_update_work); + INIT_DELAYED_WORK(&mlxsw_sp->router.nexthop_probe_dw, + mlxsw_sp_router_probe_unresolved_nexthops); + mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0); + mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0); + return 0; + +err_register_netevent_notifier: + rhashtable_destroy(&mlxsw_sp->router.neigh_ht); + return err; +} + +static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp) +{ + cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw); + cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw); + unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); + rhashtable_destroy(&mlxsw_sp->router.neigh_ht); +} + +struct mlxsw_sp_nexthop { + struct list_head neigh_list_node; /* member of neigh entry list */ + struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group + * this belongs to + */ + u8 should_offload:1, /* set indicates this neigh is connected and + * should be put to KVD linear area of this group. + */ + offloaded:1, /* set in case the neigh is actually put into + * KVD linear area of this group. + */ + update:1; /* set indicates that MAC of this neigh should be + * updated in HW + */ + struct mlxsw_sp_neigh_entry *neigh_entry; +}; + +struct mlxsw_sp_nexthop_group { + struct list_head list; /* node in mlxsw->router.nexthop_group_list */ + struct list_head fib_list; /* list of fib entries that use this group */ + u8 adj_index_valid:1; + u32 adj_index; + u16 ecmp_size; + u16 count; + struct mlxsw_sp_nexthop nexthops[0]; +}; + +static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_vr *vr, + u32 adj_index, u16 ecmp_size, + u32 new_adj_index, + u16 new_ecmp_size) +{ + char raleu_pl[MLXSW_REG_RALEU_LEN]; + + mlxsw_reg_raleu_pack(raleu_pl, vr->proto, vr->id, + adj_index, ecmp_size, + new_adj_index, new_ecmp_size); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl); +} + +static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + u32 old_adj_index, u16 old_ecmp_size) +{ + struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_vr *vr = NULL; + int err; + + list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { + if (vr == fib_entry->vr) + continue; + vr = fib_entry->vr; + err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr, + old_adj_index, + old_ecmp_size, + nh_grp->adj_index, + nh_grp->ecmp_size); + if (err) + return err; + } + return 0; +} + +static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_nexthop *nh) +{ + struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; + char ratr_pl[MLXSW_REG_RATR_LEN]; + + mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, + true, adj_index, neigh_entry->rif); + mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); +} + +static int +mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + u32 adj_index = nh_grp->adj_index; /* base */ + struct mlxsw_sp_nexthop *nh; + int i; + int err; + + for (i = 0; i < nh_grp->count; i++) { + nh = &nh_grp->nexthops[i]; + + if (!nh->should_offload) { + nh->offloaded = 0; + continue; + } + + if (nh->update) { + err = mlxsw_sp_nexthop_mac_update(mlxsw_sp, + adj_index, nh); + if (err) + return err; + nh->update = 0; + nh->offloaded = 1; + } + adj_index++; + } + return 0; +} + +static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry); + +static int +mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_fib_entry *fib_entry; + int err; + + list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { + err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); + if (err) + return err; + } + return 0; +} + +static void +mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_nexthop *nh; + bool offload_change = false; + u32 adj_index; + u16 ecmp_size = 0; + bool old_adj_index_valid; + u32 old_adj_index; + u16 old_ecmp_size; + int ret; + int i; + int err; + + for (i = 0; i < nh_grp->count; i++) { + nh = &nh_grp->nexthops[i]; + + if (nh->should_offload ^ nh->offloaded) { + offload_change = true; + if (nh->should_offload) + nh->update = 1; + } + if (nh->should_offload) + ecmp_size++; + } + if (!offload_change) { + /* Nothing was added or removed, so no need to reallocate. Just + * update MAC on existing adjacency indexes. + */ + err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp); + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); + goto set_trap; + } + return; + } + if (!ecmp_size) + /* No neigh of this group is connected so we just set + * the trap and let everthing flow through kernel. + */ + goto set_trap; + + ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size); + if (ret < 0) { + /* We ran out of KVD linear space, just set the + * trap and let everything flow through kernel. + */ + dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n"); + goto set_trap; + } + adj_index = ret; + old_adj_index_valid = nh_grp->adj_index_valid; + old_adj_index = nh_grp->adj_index; + old_ecmp_size = nh_grp->ecmp_size; + nh_grp->adj_index_valid = 1; + nh_grp->adj_index = adj_index; + nh_grp->ecmp_size = ecmp_size; + err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp); + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); + goto set_trap; + } + + if (!old_adj_index_valid) { + /* The trap was set for fib entries, so we have to call + * fib entry update to unset it and use adjacency index. + */ + err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n"); + goto set_trap; + } + return; + } + + err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp, + old_adj_index, old_ecmp_size); + mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index); + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n"); + goto set_trap; + } + return; + +set_trap: + old_adj_index_valid = nh_grp->adj_index_valid; + nh_grp->adj_index_valid = 0; + for (i = 0; i < nh_grp->count; i++) { + nh = &nh_grp->nexthops[i]; + nh->offloaded = 0; + } + err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); + if (err) + dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n"); + if (old_adj_index_valid) + mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index); +} + +static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, + bool removing) +{ + if (!removing && !nh->should_offload) + nh->should_offload = 1; + else if (removing && nh->offloaded) + nh->should_offload = 0; + nh->update = 1; +} + +static void +mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + bool removing) +{ + struct mlxsw_sp_nexthop *nh; + + /* Take RTNL mutex here to prevent lists from changes */ + rtnl_lock(); + list_for_each_entry(nh, &neigh_entry->nexthop_list, + neigh_list_node) { + __mlxsw_sp_nexthop_neigh_update(nh, removing); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); + } + rtnl_unlock(); +} + +static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop *nh, + struct fib_nh *fib_nh) +{ + struct mlxsw_sp_neigh_entry *neigh_entry; + u32 gwip = ntohl(fib_nh->nh_gw); + struct net_device *dev = fib_nh->nh_dev; + struct neighbour *n; + u8 nud_state; + + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip, + sizeof(gwip), dev); + if (!neigh_entry) { + __be32 gwipn = htonl(gwip); + + n = neigh_create(&arp_tbl, &gwipn, dev); + if (IS_ERR(n)) + return PTR_ERR(n); + neigh_event_send(n, NULL); + neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip, + sizeof(gwip), dev); + if (!neigh_entry) { + neigh_release(n); + return -EINVAL; + } + } else { + /* Take a reference of neigh here ensuring that neigh would + * not be detructed before the nexthop entry is finished. + * The second branch takes the reference in neith_create() + */ + n = neigh_entry->n; + neigh_clone(n); + } + + /* If that is the first nexthop connected to that neigh, add to + * nexthop_neighs_list + */ + if (list_empty(&neigh_entry->nexthop_list)) + list_add_tail(&neigh_entry->nexthop_neighs_list_node, + &mlxsw_sp->router.nexthop_neighs_list); + + nh->nh_grp = nh_grp; + nh->neigh_entry = neigh_entry; + list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list); + read_lock_bh(&n->lock); + nud_state = n->nud_state; + read_unlock_bh(&n->lock); + __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID)); + + return 0; +} + +static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; + + list_del(&nh->neigh_list_node); + + /* If that is the last nexthop connected to that neigh, remove from + * nexthop_neighs_list + */ + if (list_empty(&nh->neigh_entry->nexthop_list)) + list_del(&nh->neigh_entry->nexthop_neighs_list_node); + + neigh_release(neigh_entry->n); +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + struct mlxsw_sp_nexthop *nh; + struct fib_nh *fib_nh; + size_t alloc_size; + int i; + int err; + + alloc_size = sizeof(*nh_grp) + + fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop); + nh_grp = kzalloc(alloc_size, GFP_KERNEL); + if (!nh_grp) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&nh_grp->fib_list); + nh_grp->count = fi->fib_nhs; + for (i = 0; i < nh_grp->count; i++) { + nh = &nh_grp->nexthops[i]; + fib_nh = &fi->fib_nh[i]; + err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh); + if (err) + goto err_nexthop_init; + } + list_add_tail(&nh_grp->list, &mlxsw_sp->router.nexthop_group_list); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + return nh_grp; + +err_nexthop_init: + for (i--; i >= 0; i--) + mlxsw_sp_nexthop_fini(mlxsw_sp, nh); + kfree(nh_grp); + return ERR_PTR(err); +} + +static void +mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_nexthop *nh; + int i; + + list_del(&nh_grp->list); + for (i = 0; i < nh_grp->count; i++) { + nh = &nh_grp->nexthops[i]; + mlxsw_sp_nexthop_fini(mlxsw_sp, nh); + } + kfree(nh_grp); +} + +static bool mlxsw_sp_nexthop_match(struct mlxsw_sp_nexthop *nh, + struct fib_info *fi) +{ + int i; + + for (i = 0; i < fi->fib_nhs; i++) { + struct fib_nh *fib_nh = &fi->fib_nh[i]; + u32 gwip = ntohl(fib_nh->nh_gw); + + if (memcmp(nh->neigh_entry->key.addr, + &gwip, sizeof(u32)) == 0 && + nh->neigh_entry->key.dev == fib_nh->nh_dev) + return true; + } + return false; +} + +static bool mlxsw_sp_nexthop_group_match(struct mlxsw_sp_nexthop_group *nh_grp, + struct fib_info *fi) +{ + int i; + + if (nh_grp->count != fi->fib_nhs) + return false; + for (i = 0; i < nh_grp->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + + if (!mlxsw_sp_nexthop_match(nh, fi)) + return false; + } + return true; +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop_group_find(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + + list_for_each_entry(nh_grp, &mlxsw_sp->router.nexthop_group_list, + list) { + if (mlxsw_sp_nexthop_group_match(nh_grp, fi)) + return nh_grp; + } + return NULL; +} + +static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + struct fib_info *fi) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + + nh_grp = mlxsw_sp_nexthop_group_find(mlxsw_sp, fi); + if (!nh_grp) { + nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi); + if (IS_ERR(nh_grp)) + return PTR_ERR(nh_grp); + } + list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list); + fib_entry->nh_group = nh_grp; + return 0; +} + +static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; + + list_del(&fib_entry->nexthop_group_node); + if (!list_empty(&nh_grp->fib_list)) + return; + mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp); +} + +static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) +{ + char rgcr_pl[MLXSW_REG_RGCR_LEN]; + + mlxsw_reg_rgcr_pack(rgcr_pl, true); + mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, MLXSW_SP_RIF_MAX); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); +} + +static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) +{ + char rgcr_pl[MLXSW_REG_RGCR_LEN]; + + mlxsw_reg_rgcr_pack(rgcr_pl, false); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); +} + +int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list); + INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_group_list); + err = __mlxsw_sp_router_init(mlxsw_sp); + if (err) + return err; + mlxsw_sp_lpm_init(mlxsw_sp); + mlxsw_sp_vrs_init(mlxsw_sp); + return mlxsw_sp_neigh_init(mlxsw_sp); +} + +void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp_neigh_fini(mlxsw_sp); + __mlxsw_sp_router_fini(mlxsw_sp); +} + +static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + char ralue_pl[MLXSW_REG_RALUE_LEN]; + u32 *p_dip = (u32 *) fib_entry->key.addr; + struct mlxsw_sp_vr *vr = fib_entry->vr; + enum mlxsw_reg_ralue_trap_action trap_action; + u16 trap_id = 0; + u32 adjacency_index = 0; + u16 ecmp_size = 0; + + /* In case the nexthop group adjacency index is valid, use it + * with provided ECMP size. Otherwise, setup trap and pass + * traffic to kernel. + */ + if (fib_entry->nh_group->adj_index_valid) { + trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; + adjacency_index = fib_entry->nh_group->adj_index; + ecmp_size = fib_entry->nh_group->ecmp_size; + } else { + trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP; + trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; + } + + mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id, + fib_entry->key.prefix_len, *p_dip); + mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id, + adjacency_index, ecmp_size); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + +static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + char ralue_pl[MLXSW_REG_RALUE_LEN]; + u32 *p_dip = (u32 *) fib_entry->key.addr; + struct mlxsw_sp_vr *vr = fib_entry->vr; + + mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id, + fib_entry->key.prefix_len, *p_dip); + mlxsw_reg_ralue_act_local_pack(ralue_pl, + MLXSW_REG_RALUE_TRAP_ACTION_NOP, 0, + fib_entry->rif); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + +static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + char ralue_pl[MLXSW_REG_RALUE_LEN]; + u32 *p_dip = (u32 *) fib_entry->key.addr; + struct mlxsw_sp_vr *vr = fib_entry->vr; + + mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id, + fib_entry->key.prefix_len, *p_dip); + mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + +static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + switch (fib_entry->type) { + case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: + return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op); + case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: + return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op); + case MLXSW_SP_FIB_ENTRY_TYPE_TRAP: + return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op); + } + return -EINVAL; +} + +static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + switch (fib_entry->vr->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op); + case MLXSW_SP_L3_PROTO_IPV6: + return -EINVAL; + } + return -EINVAL; +} + +static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + enum mlxsw_reg_ralue_op op; + + op = !fib_entry->added ? MLXSW_REG_RALUE_OP_WRITE_WRITE : + MLXSW_REG_RALUE_OP_WRITE_UPDATE; + return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op); +} + +static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, + MLXSW_REG_RALUE_OP_WRITE_DELETE); +} + +struct mlxsw_sp_router_fib4_add_info { + struct switchdev_trans_item tritem; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_fib_entry *fib_entry; +}; + +static void mlxsw_sp_router_fib4_add_info_destroy(void const *data) +{ + const struct mlxsw_sp_router_fib4_add_info *info = data; + struct mlxsw_sp_fib_entry *fib_entry = info->fib_entry; + struct mlxsw_sp *mlxsw_sp = info->mlxsw_sp; + + mlxsw_sp_fib_entry_destroy(fib_entry); + mlxsw_sp_vr_put(mlxsw_sp, fib_entry->vr); + kfree(info); +} + +static int +mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp *mlxsw_sp, + const struct switchdev_obj_ipv4_fib *fib4, + struct mlxsw_sp_fib_entry *fib_entry) +{ + struct fib_info *fi = fib4->fi; + + if (fib4->type == RTN_LOCAL || fib4->type == RTN_BROADCAST) { + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + return 0; + } + if (fib4->type != RTN_UNICAST) + return -EINVAL; + + if (fi->fib_scope != RT_SCOPE_UNIVERSE) { + struct mlxsw_sp_rif *r; + + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; + r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fi->fib_dev); + if (!r) + return -EINVAL; + fib_entry->rif = r->rif; + return 0; + } + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; + return mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fi); +} + +static void +mlxsw_sp_router_fib4_entry_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_REMOTE) + return; + mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry); +} + +static int +mlxsw_sp_router_fib4_add_prepare(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_ipv4_fib *fib4, + struct switchdev_trans *trans) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_router_fib4_add_info *info; + struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_vr *vr; + int err; + + vr = mlxsw_sp_vr_get(mlxsw_sp, fib4->dst_len, fib4->tb_id, + MLXSW_SP_L3_PROTO_IPV4); + if (IS_ERR(vr)) + return PTR_ERR(vr); + + fib_entry = mlxsw_sp_fib_entry_create(vr->fib, &fib4->dst, + sizeof(fib4->dst), fib4->dst_len); + if (!fib_entry) { + err = -ENOMEM; + goto err_fib_entry_create; + } + fib_entry->vr = vr; + + err = mlxsw_sp_router_fib4_entry_init(mlxsw_sp, fib4, fib_entry); + if (err) + goto err_fib4_entry_init; + + info = kmalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + err = -ENOMEM; + goto err_alloc_info; + } + info->mlxsw_sp = mlxsw_sp; + info->fib_entry = fib_entry; + switchdev_trans_item_enqueue(trans, info, + mlxsw_sp_router_fib4_add_info_destroy, + &info->tritem); + return 0; + +err_alloc_info: + mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry); +err_fib4_entry_init: + mlxsw_sp_fib_entry_destroy(fib_entry); +err_fib_entry_create: + mlxsw_sp_vr_put(mlxsw_sp, vr); + return err; +} + +static int +mlxsw_sp_router_fib4_add_commit(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_ipv4_fib *fib4, + struct switchdev_trans *trans) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_router_fib4_add_info *info; + struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_vr *vr; + int err; + + info = switchdev_trans_item_dequeue(trans); + fib_entry = info->fib_entry; + kfree(info); + + vr = fib_entry->vr; + err = mlxsw_sp_fib_entry_insert(fib_entry->vr->fib, fib_entry); + if (err) + goto err_fib_entry_insert; + err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); + if (err) + goto err_fib_entry_add; + return 0; + +err_fib_entry_add: + mlxsw_sp_fib_entry_remove(vr->fib, fib_entry); +err_fib_entry_insert: + mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry); + mlxsw_sp_fib_entry_destroy(fib_entry); + mlxsw_sp_vr_put(mlxsw_sp, vr); + return err; +} + +int mlxsw_sp_router_fib4_add(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_ipv4_fib *fib4, + struct switchdev_trans *trans) +{ + if (switchdev_trans_ph_prepare(trans)) + return mlxsw_sp_router_fib4_add_prepare(mlxsw_sp_port, + fib4, trans); + return mlxsw_sp_router_fib4_add_commit(mlxsw_sp_port, + fib4, trans); +} + +int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port, + const struct switchdev_obj_ipv4_fib *fib4) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_vr *vr; + + vr = mlxsw_sp_vr_find(mlxsw_sp, fib4->tb_id, MLXSW_SP_L3_PROTO_IPV4); + if (!vr) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to find virtual router for FIB4 entry being removed.\n"); + return -ENOENT; + } + fib_entry = mlxsw_sp_fib_entry_lookup(vr->fib, &fib4->dst, + sizeof(fib4->dst), fib4->dst_len); + if (!fib_entry) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to find FIB4 entry being removed.\n"); + return PTR_ERR(vr); + } + mlxsw_sp_fib_entry_del(mlxsw_sp_port->mlxsw_sp, fib_entry); + mlxsw_sp_fib_entry_remove(vr->fib, fib_entry); + mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry); + mlxsw_sp_fib_entry_destroy(fib_entry); + mlxsw_sp_vr_put(mlxsw_sp, vr); + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index a0c7376ee517..a1ad5e6bdfa8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -166,11 +166,6 @@ static int mlxsw_sp_port_attr_stp_state_set(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_sp_port_stp_state_set(mlxsw_sp_port, state); } -static bool mlxsw_sp_vfid_is_vport_br(u16 vfid) -{ - return vfid >= MLXSW_SP_VFID_PORT_MAX; -} - static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 idx_begin, u16 idx_end, bool set, bool only_uc) @@ -182,15 +177,10 @@ static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port, char *sftr_pl; int err; - if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { + if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID; - if (mlxsw_sp_vfid_is_vport_br(idx_begin)) - local_port = mlxsw_sp_port->local_port; - else - local_port = MLXSW_PORT_CPU_PORT; - } else { + else table_type = MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST; - } sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL); if (!sftr_pl) @@ -384,18 +374,6 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev, return err; } -static struct mlxsw_sp_fid *mlxsw_sp_fid_find(struct mlxsw_sp *mlxsw_sp, - u16 fid) -{ - struct mlxsw_sp_fid *f; - - list_for_each_entry(f, &mlxsw_sp->fids, list) - if (f->fid == fid) - return f; - - return NULL; -} - static int mlxsw_sp_fid_op(struct mlxsw_sp *mlxsw_sp, u16 fid, bool create) { char sfmr_pl[MLXSW_REG_SFMR_LEN]; @@ -426,8 +404,7 @@ static struct mlxsw_sp_fid *mlxsw_sp_fid_alloc(u16 fid) return f; } -static struct mlxsw_sp_fid *mlxsw_sp_fid_create(struct mlxsw_sp *mlxsw_sp, - u16 fid) +struct mlxsw_sp_fid *mlxsw_sp_fid_create(struct mlxsw_sp *mlxsw_sp, u16 fid) { struct mlxsw_sp_fid *f; int err; @@ -462,13 +439,15 @@ err_fid_map: return ERR_PTR(err); } -static void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fid *f) +void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *f) { u16 fid = f->fid; list_del(&f->list); + if (f->r) + mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r); + kfree(f); mlxsw_sp_fid_op(mlxsw_sp, fid, false); @@ -633,25 +612,6 @@ err_port_allow_untagged_set: return err; } -static int mlxsw_sp_port_add_vids(struct net_device *dev, u16 vid_begin, - u16 vid_end) -{ - u16 vid; - int err; - - for (vid = vid_begin; vid <= vid_end; vid++) { - err = mlxsw_sp_port_add_vid(dev, 0, vid); - if (err) - goto err_port_add_vid; - } - return 0; - -err_port_add_vid: - for (vid--; vid >= vid_begin; vid--) - mlxsw_sp_port_kill_vid(dev, 0, vid); - return err; -} - static int __mlxsw_sp_port_vlans_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, u16 vid_end, bool is_member, bool untagged) @@ -681,12 +641,8 @@ static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, old_pvid; int err; - /* In case this is invoked with BRIDGE_FLAGS_SELF and port is - * not bridged, then packets ingressing through the port with - * the specified VIDs will be directed to CPU. - */ if (!mlxsw_sp_port->bridged) - return mlxsw_sp_port_add_vids(dev, vid_begin, vid_end); + return -EINVAL; err = mlxsw_sp_port_fid_join(mlxsw_sp_port, vid_begin, vid_end); if (err) { @@ -776,9 +732,10 @@ static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding) MLXSW_REG_SFD_OP_WRITE_REMOVE; } -static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, - const char *mac, u16 fid, bool adding, - bool dynamic) +static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, + const char *mac, u16 fid, bool adding, + enum mlxsw_reg_sfd_rec_action action, + bool dynamic) { char *sfd_pl; int err; @@ -789,14 +746,29 @@ static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); mlxsw_reg_sfd_uc_pack(sfd_pl, 0, mlxsw_sp_sfd_rec_policy(dynamic), - mac, fid, MLXSW_REG_SFD_REC_ACTION_NOP, - local_port); + mac, fid, action, local_port); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); kfree(sfd_pl); return err; } +static int mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, + const char *mac, u16 fid, bool adding, + bool dynamic) +{ + return __mlxsw_sp_port_fdb_uc_op(mlxsw_sp, local_port, mac, fid, adding, + MLXSW_REG_SFD_REC_ACTION_NOP, dynamic); +} + +int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid, + bool adding) +{ + return __mlxsw_sp_port_fdb_uc_op(mlxsw_sp, 0, mac, fid, adding, + MLXSW_REG_SFD_REC_ACTION_FORWARD_IP_ROUTER, + false); +} + static int mlxsw_sp_port_fdb_uc_lag_op(struct mlxsw_sp *mlxsw_sp, u16 lag_id, const char *mac, u16 fid, u16 lag_vid, bool adding, bool dynamic) @@ -1001,6 +973,11 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev, SWITCHDEV_OBJ_PORT_VLAN(obj), trans); break; + case SWITCHDEV_OBJ_ID_IPV4_FIB: + err = mlxsw_sp_router_fib4_add(mlxsw_sp_port, + SWITCHDEV_OBJ_IPV4_FIB(obj), + trans); + break; case SWITCHDEV_OBJ_ID_PORT_FDB: err = mlxsw_sp_port_fdb_static_add(mlxsw_sp_port, SWITCHDEV_OBJ_PORT_FDB(obj), @@ -1019,21 +996,6 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev, return err; } -static int mlxsw_sp_port_kill_vids(struct net_device *dev, u16 vid_begin, - u16 vid_end) -{ - u16 vid; - int err; - - for (vid = vid_begin; vid <= vid_end; vid++) { - err = mlxsw_sp_port_kill_vid(dev, 0, vid); - if (err) - return err; - } - - return 0; -} - static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, u16 vid_end, bool init) { @@ -1041,12 +1003,8 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, pvid; int err; - /* In case this is invoked with BRIDGE_FLAGS_SELF and port is - * not bridged, then prevent packets ingressing through the - * port with the specified VIDs from being trapped to CPU. - */ if (!init && !mlxsw_sp_port->bridged) - return mlxsw_sp_port_kill_vids(dev, vid_begin, vid_end); + return -EINVAL; err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, false, false); @@ -1165,6 +1123,10 @@ static int mlxsw_sp_port_obj_del(struct net_device *dev, err = mlxsw_sp_port_vlans_del(mlxsw_sp_port, SWITCHDEV_OBJ_PORT_VLAN(obj)); break; + case SWITCHDEV_OBJ_ID_IPV4_FIB: + err = mlxsw_sp_router_fib4_del(mlxsw_sp_port, + SWITCHDEV_OBJ_IPV4_FIB(obj)); + break; case SWITCHDEV_OBJ_ID_PORT_FDB: err = mlxsw_sp_port_fdb_static_del(mlxsw_sp_port, SWITCHDEV_OBJ_PORT_FDB(obj)); diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 53a9550be75e..470d7696e9fe 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -54,6 +54,11 @@ enum { MLXSW_TRAP_ID_IGMP_V2_REPORT = 0x32, MLXSW_TRAP_ID_IGMP_V2_LEAVE = 0x33, MLXSW_TRAP_ID_IGMP_V3_REPORT = 0x34, + MLXSW_TRAP_ID_ARPBC = 0x50, + MLXSW_TRAP_ID_ARPUC = 0x51, + MLXSW_TRAP_ID_IP2ME = 0x5F, + MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70, + MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90, MLXSW_TRAP_ID_MAX = 0x1FF }; diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c index 7066954c39d6..0a26b11ca8f6 100644 --- a/drivers/net/ethernet/microchip/enc28j60.c +++ b/drivers/net/ethernet/microchip/enc28j60.c @@ -1151,7 +1151,8 @@ static void enc28j60_irq_work_handler(struct work_struct *work) enc28j60_phy_read(priv, PHIR); } /* TX complete handler */ - if ((intflags & EIR_TXIF) != 0) { + if (((intflags & EIR_TXIF) != 0) && + ((intflags & EIR_TXERIF) == 0)) { bool err = false; loop++; if (netif_msg_intr(priv)) @@ -1203,7 +1204,7 @@ static void enc28j60_irq_work_handler(struct work_struct *work) enc28j60_tx_clear(ndev, true); } else enc28j60_tx_clear(ndev, true); - locked_reg_bfclr(priv, EIR, EIR_TXERIF); + locked_reg_bfclr(priv, EIR, EIR_TXERIF | EIR_TXIF); } /* RX Error handler */ if ((intflags & EIR_RXERIF) != 0) { @@ -1238,6 +1239,8 @@ static void enc28j60_irq_work_handler(struct work_struct *work) */ static void enc28j60_hw_tx(struct enc28j60_net *priv) { + BUG_ON(!priv->tx_skb); + if (netif_msg_tx_queued(priv)) printk(KERN_DEBUG DRV_NAME ": Tx Packet Len:%d\n", priv->tx_skb->len); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index e744acc18ef4..690635660195 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -63,7 +63,7 @@ #define NFP_NET_POLL_TIMEOUT 5 /* Bar allocation */ -#define NFP_NET_CRTL_BAR 0 +#define NFP_NET_CTRL_BAR 0 #define NFP_NET_Q0_BAR 2 #define NFP_NET_Q1_BAR 4 /* OBSOLETE */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index c25a8ba6cf9f..1e74b911accb 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1845,13 +1845,14 @@ void nfp_net_coalesce_write_cfg(struct nfp_net *nn) } /** - * nfp_net_write_mac_addr() - Write mac address to device registers + * nfp_net_write_mac_addr() - Write mac address to the device control BAR * @nn: NFP Net device to reconfigure - * @mac: Six-byte MAC address to be written * - * We do a bit of byte swapping dance because firmware is LE. + * Writes the MAC address from the netdev to the device control BAR. Does not + * perform the required reconfig. We do a bit of byte swapping dance because + * firmware is LE. */ -static void nfp_net_write_mac_addr(struct nfp_net *nn, const u8 *mac) +static void nfp_net_write_mac_addr(struct nfp_net *nn) { nn_writel(nn, NFP_NET_CFG_MACADDR + 0, get_unaligned_be32(nn->netdev->dev_addr)); @@ -1952,7 +1953,7 @@ static int __nfp_net_set_config_and_enable(struct nfp_net *nn) nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, nn->num_rx_rings == 64 ? 0xffffffffffffffffULL : ((u64)1 << nn->num_rx_rings) - 1); - nfp_net_write_mac_addr(nn, nn->netdev->dev_addr); + nfp_net_write_mac_addr(nn); nn_writel(nn, NFP_NET_CFG_MTU, nn->netdev->mtu); nn_writel(nn, NFP_NET_CFG_FLBUFSZ, nn->fl_bufsz); @@ -2739,7 +2740,7 @@ int nfp_net_netdev_init(struct net_device *netdev) nn->cap = nn_readl(nn, NFP_NET_CFG_CAP); nn->max_mtu = nn_readl(nn, NFP_NET_CFG_MAX_MTU); - nfp_net_write_mac_addr(nn, nn->netdev->dev_addr); + nfp_net_write_mac_addr(nn); /* Set default MTU and Freelist buffer size */ if (nn->max_mtu < NFP_NET_DEFAULT_MTU) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index ccfef1f17627..7d7933d00b8f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -605,6 +605,7 @@ static int nfp_net_set_coalesce(struct net_device *netdev, static const struct ethtool_ops nfp_net_ethtool_ops = { .get_drvinfo = nfp_net_get_drvinfo, + .get_link = ethtool_op_get_link, .get_ringparam = nfp_net_get_ringparam, .set_ringparam = nfp_net_set_ringparam, .get_strings = nfp_net_get_strings, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c index e2b22b8a20f1..37abef016a0a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c @@ -124,11 +124,11 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev, * first NFP_NET_CFG_BAR_SZ of the BAR. This keeps the code * the identical for PF and VF drivers. */ - ctrl_bar = ioremap_nocache(pci_resource_start(pdev, NFP_NET_CRTL_BAR), + ctrl_bar = ioremap_nocache(pci_resource_start(pdev, NFP_NET_CTRL_BAR), NFP_NET_CFG_BAR_SZ); if (!ctrl_bar) { dev_err(&pdev->dev, - "Failed to map resource %d\n", NFP_NET_CRTL_BAR); + "Failed to map resource %d\n", NFP_NET_CTRL_BAR); err = -EIO; goto err_pci_regions; } diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index 01b50ff7c708..4d4ecba0aad9 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -1183,7 +1183,6 @@ static void lpc_eth_set_multicast_list(struct net_device *ndev) static int lpc_eth_ioctl(struct net_device *ndev, struct ifreq *req, int cmd) { - struct netdata_local *pldat = netdev_priv(ndev); struct phy_device *phydev = ndev->phydev; if (!netif_running(ndev)) diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 1441c8f6d414..02b06d4e40ae 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -24,7 +24,7 @@ #include <linux/qed/qed_eth_if.h> #define QEDE_MAJOR_VERSION 8 -#define QEDE_MINOR_VERSION 7 +#define QEDE_MINOR_VERSION 10 #define QEDE_REVISION_VERSION 1 #define QEDE_ENGINEERING_VERSION 20 #define DRV_MODULE_VERSION __stringify(QEDE_MAJOR_VERSION) "." \ @@ -143,6 +143,8 @@ struct qede_dev { struct mutex qede_lock; u32 state; /* Protected by qede_lock */ u16 rx_buf_size; + u32 rx_copybreak; + /* L2 header size + 2*VLANs (8 bytes) + LLC SNAP (8 bytes) */ #define ETH_OVERHEAD (ETH_HLEN + 8 + 8) /* Max supported alignment is 256 (8 shift) @@ -235,6 +237,7 @@ struct qede_rx_queue { u64 rx_hw_errors; u64 rx_alloc_errors; + u64 rx_ip_frags; }; union db_prod { @@ -332,6 +335,7 @@ void qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq, struct qede_dev *edev, #define NUM_TX_BDS_MIN 128 #define NUM_TX_BDS_DEF NUM_TX_BDS_MAX +#define QEDE_MIN_PKT_LEN 64 #define QEDE_RX_HDR_SIZE 256 #define for_each_rss(i) for (i = 0; i < edev->num_rss; i++) diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index c5c658ab0724..f8492cac9290 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -37,6 +37,7 @@ static const struct { } qede_rqstats_arr[] = { QEDE_RQSTAT(rx_hw_errors), QEDE_RQSTAT(rx_alloc_errors), + QEDE_RQSTAT(rx_ip_frags), }; #define QEDE_NUM_RQSTATS ARRAY_SIZE(qede_rqstats_arr) @@ -1184,6 +1185,48 @@ static void qede_self_test(struct net_device *dev, } } +static int qede_set_tunable(struct net_device *dev, + const struct ethtool_tunable *tuna, + const void *data) +{ + struct qede_dev *edev = netdev_priv(dev); + u32 val; + + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + val = *(u32 *)data; + if (val < QEDE_MIN_PKT_LEN || val > QEDE_RX_HDR_SIZE) { + DP_VERBOSE(edev, QED_MSG_DEBUG, + "Invalid rx copy break value, range is [%u, %u]", + QEDE_MIN_PKT_LEN, QEDE_RX_HDR_SIZE); + return -EINVAL; + } + + edev->rx_copybreak = *(u32 *)data; + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static int qede_get_tunable(struct net_device *dev, + const struct ethtool_tunable *tuna, void *data) +{ + struct qede_dev *edev = netdev_priv(dev); + + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + *(u32 *)data = edev->rx_copybreak; + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + static const struct ethtool_ops qede_ethtool_ops = { .get_settings = qede_get_settings, .set_settings = qede_set_settings, @@ -1212,6 +1255,8 @@ static const struct ethtool_ops qede_ethtool_ops = { .get_channels = qede_get_channels, .set_channels = qede_set_channels, .self_test = qede_self_test, + .get_tunable = qede_get_tunable, + .set_tunable = qede_set_tunable, }; static const struct ethtool_ops qede_vf_ethtool_ops = { @@ -1234,6 +1279,8 @@ static const struct ethtool_ops qede_vf_ethtool_ops = { .set_rxfh = qede_set_rxfh, .get_channels = qede_get_channels, .set_channels = qede_set_channels, + .get_tunable = qede_get_tunable, + .set_tunable = qede_set_tunable, }; void qede_set_ethtool_ops(struct net_device *dev) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 19bc631e1f04..91e7bb0b85c8 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -485,6 +485,24 @@ static bool qede_pkt_req_lin(struct qede_dev *edev, struct sk_buff *skb, } #endif +static inline void qede_update_tx_producer(struct qede_tx_queue *txq) +{ + /* wmb makes sure that the BDs data is updated before updating the + * producer, otherwise FW may read old data from the BDs. + */ + wmb(); + barrier(); + writel(txq->tx_db.raw, txq->doorbell_addr); + + /* mmiowb is needed to synchronize doorbell writes from more than one + * processor. It guarantees that the write arrives to the device before + * the queue lock is released and another start_xmit is called (possibly + * on another CPU). Without this barrier, the next doorbell can bypass + * this doorbell. This is applicable to IA64/Altix systems. + */ + mmiowb(); +} + /* Main transmit function */ static netdev_tx_t qede_start_xmit(struct sk_buff *skb, @@ -543,6 +561,7 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, if (unlikely(dma_mapping_error(&edev->pdev->dev, mapping))) { DP_NOTICE(edev, "SKB mapping failed\n"); qede_free_failed_tx_pkt(edev, txq, first_bd, 0, false); + qede_update_tx_producer(txq); return NETDEV_TX_OK; } nbd++; @@ -657,6 +676,7 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, if (rc) { qede_free_failed_tx_pkt(edev, txq, first_bd, nbd, data_split); + qede_update_tx_producer(txq); return NETDEV_TX_OK; } @@ -681,6 +701,7 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, if (rc) { qede_free_failed_tx_pkt(edev, txq, first_bd, nbd, data_split); + qede_update_tx_producer(txq); return NETDEV_TX_OK; } } @@ -701,20 +722,8 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, txq->tx_db.data.bd_prod = cpu_to_le16(qed_chain_get_prod_idx(&txq->tx_pbl)); - /* wmb makes sure that the BDs data is updated before updating the - * producer, otherwise FW may read old data from the BDs. - */ - wmb(); - barrier(); - writel(txq->tx_db.raw, txq->doorbell_addr); - - /* mmiowb is needed to synchronize doorbell writes from more than one - * processor. It guarantees that the write arrives to the device before - * the queue lock is released and another start_xmit is called (possibly - * on another CPU). Without this barrier, the next doorbell can bypass - * this doorbell. This is applicable to IA64/Altix systems. - */ - mmiowb(); + if (!skb->xmit_more || netif_tx_queue_stopped(netdev_txq)) + qede_update_tx_producer(txq); if (unlikely(qed_chain_get_elem_left(&txq->tx_pbl) < (MAX_SKB_FRAGS + 1))) { @@ -1348,6 +1357,20 @@ static u8 qede_check_csum(u16 flag) return qede_check_tunn_csum(flag); } +static bool qede_pkt_is_ip_fragmented(struct eth_fast_path_rx_reg_cqe *cqe, + u16 flag) +{ + u8 tun_pars_flg = cqe->tunnel_pars_flags.flags; + + if ((tun_pars_flg & (ETH_TUNNEL_PARSING_FLAGS_IPV4_FRAGMENT_MASK << + ETH_TUNNEL_PARSING_FLAGS_IPV4_FRAGMENT_SHIFT)) || + (flag & (PARSING_AND_ERR_FLAGS_IPV4FRAG_MASK << + PARSING_AND_ERR_FLAGS_IPV4FRAG_SHIFT))) + return true; + + return false; +} + static int qede_rx_int(struct qede_fastpath *fp, int budget) { struct qede_dev *edev = fp->edev; @@ -1426,6 +1449,12 @@ static int qede_rx_int(struct qede_fastpath *fp, int budget) csum_flag = qede_check_csum(parse_flag); if (unlikely(csum_flag == QEDE_CSUM_ERROR)) { + if (qede_pkt_is_ip_fragmented(&cqe->fast_path_regular, + parse_flag)) { + rxq->rx_ip_frags++; + goto alloc_skb; + } + DP_NOTICE(edev, "CQE in CONS = %u has error, flags = %x, dropping incoming packet\n", sw_comp_cons, parse_flag); @@ -1434,6 +1463,7 @@ static int qede_rx_int(struct qede_fastpath *fp, int budget) goto next_cqe; } +alloc_skb: skb = netdev_alloc_skb(edev->ndev, QEDE_RX_HDR_SIZE); if (unlikely(!skb)) { DP_NOTICE(edev, @@ -1444,7 +1474,7 @@ static int qede_rx_int(struct qede_fastpath *fp, int budget) } /* Copy data into SKB */ - if (len + pad <= QEDE_RX_HDR_SIZE) { + if (len + pad <= edev->rx_copybreak) { memcpy(skb_put(skb, len), page_address(data) + pad + sw_rx_data->page_offset, len); @@ -1576,56 +1606,49 @@ next_cqe: /* don't consume bd rx buffer */ static int qede_poll(struct napi_struct *napi, int budget) { - int work_done = 0; struct qede_fastpath *fp = container_of(napi, struct qede_fastpath, - napi); + napi); struct qede_dev *edev = fp->edev; + int rx_work_done = 0; + u8 tc; - while (1) { - u8 tc; - - for (tc = 0; tc < edev->num_tc; tc++) - if (qede_txq_has_work(&fp->txqs[tc])) - qede_tx_int(edev, &fp->txqs[tc]); - - if (qede_has_rx_work(fp->rxq)) { - work_done += qede_rx_int(fp, budget - work_done); - - /* must not complete if we consumed full budget */ - if (work_done >= budget) - break; - } + for (tc = 0; tc < edev->num_tc; tc++) + if (qede_txq_has_work(&fp->txqs[tc])) + qede_tx_int(edev, &fp->txqs[tc]); + + rx_work_done = qede_has_rx_work(fp->rxq) ? + qede_rx_int(fp, budget) : 0; + if (rx_work_done < budget) { + qed_sb_update_sb_idx(fp->sb_info); + /* *_has_*_work() reads the status block, + * thus we need to ensure that status block indices + * have been actually read (qed_sb_update_sb_idx) + * prior to this check (*_has_*_work) so that + * we won't write the "newer" value of the status block + * to HW (if there was a DMA right after + * qede_has_rx_work and if there is no rmb, the memory + * reading (qed_sb_update_sb_idx) may be postponed + * to right before *_ack_sb). In this case there + * will never be another interrupt until there is + * another update of the status block, while there + * is still unhandled work. + */ + rmb(); /* Fall out from the NAPI loop if needed */ - if (!(qede_has_rx_work(fp->rxq) || qede_has_tx_work(fp))) { - qed_sb_update_sb_idx(fp->sb_info); - /* *_has_*_work() reads the status block, - * thus we need to ensure that status block indices - * have been actually read (qed_sb_update_sb_idx) - * prior to this check (*_has_*_work) so that - * we won't write the "newer" value of the status block - * to HW (if there was a DMA right after - * qede_has_rx_work and if there is no rmb, the memory - * reading (qed_sb_update_sb_idx) may be postponed - * to right before *_ack_sb). In this case there - * will never be another interrupt until there is - * another update of the status block, while there - * is still unhandled work. - */ - rmb(); - - if (!(qede_has_rx_work(fp->rxq) || - qede_has_tx_work(fp))) { - napi_complete(napi); - /* Update and reenable interrupts */ - qed_sb_ack(fp->sb_info, IGU_INT_ENABLE, - 1 /*update*/); - break; - } + if (!(qede_has_rx_work(fp->rxq) || + qede_has_tx_work(fp))) { + napi_complete(napi); + + /* Update and reenable interrupts */ + qed_sb_ack(fp->sb_info, IGU_INT_ENABLE, + 1 /*update*/); + } else { + rx_work_done = budget; } } - return work_done; + return rx_work_done; } static irqreturn_t qede_msix_fp_int(int irq, void *fp_cookie) @@ -2496,6 +2519,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task); mutex_init(&edev->qede_lock); + edev->rx_copybreak = QEDE_RX_HDR_SIZE; DP_INFO(edev, "Ending successfully qede probe\n"); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index 607bb7d4514d..87c642d3b075 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -772,6 +772,8 @@ netdev_tx_t qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) tx_ring->tx_stats.tx_bytes += skb->len; tx_ring->tx_stats.xmit_called++; + /* Ensure writes are complete before HW fetches Tx descriptors */ + wmb(); qlcnic_update_cmd_producer(tx_ring); return NETDEV_TX_OK; diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c index 7a7a395d0512..cb29ee24cf1b 100644 --- a/drivers/net/ethernet/rdc/r6040.c +++ b/drivers/net/ethernet/rdc/r6040.c @@ -4,7 +4,7 @@ * Copyright (C) 2004 Sten Wang <[email protected]> * Copyright (C) 2007 * Daniel Gimpelevich <[email protected]> - * Copyright (C) 2007-2012 Florian Fainelli <[email protected]> + * Copyright (C) 2007-2012 Florian Fainelli <[email protected]> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -48,8 +48,8 @@ #include <asm/processor.h> #define DRV_NAME "r6040" -#define DRV_VERSION "0.28" -#define DRV_RELDATE "07Oct2011" +#define DRV_VERSION "0.29" +#define DRV_RELDATE "04Jul2016" /* Time in jiffies before concluding the transmitter is hung. */ #define TX_TIMEOUT (6000 * HZ / 1000) @@ -162,7 +162,7 @@ MODULE_AUTHOR("Sten Wang <[email protected]>," "Daniel Gimpelevich <[email protected]>," - "Florian Fainelli <[email protected]>"); + "Florian Fainelli <[email protected]>"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("RDC R6040 NAPI PCI FastEthernet driver"); MODULE_VERSION(DRV_VERSION " " DRV_RELDATE); @@ -614,10 +614,15 @@ static void r6040_tx(struct net_device *dev) if (descptr->status & DSC_OWNER_MAC) break; /* Not complete */ skb_ptr = descptr->skb_ptr; + + /* Statistic Counter */ + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb_ptr->len; + pci_unmap_single(priv->pdev, le32_to_cpu(descptr->buf), skb_ptr->len, PCI_DMA_TODEVICE); /* Free buffer */ - dev_kfree_skb_irq(skb_ptr); + dev_kfree_skb(skb_ptr); descptr->skb_ptr = NULL; /* To next descriptor */ descptr = descptr->vndescp; @@ -638,12 +643,15 @@ static int r6040_poll(struct napi_struct *napi, int budget) void __iomem *ioaddr = priv->base; int work_done; + r6040_tx(dev); + work_done = r6040_rx(dev, budget); if (work_done < budget) { - napi_complete(napi); - /* Enable RX interrupt */ - iowrite16(ioread16(ioaddr + MIER) | RX_INTS, ioaddr + MIER); + napi_complete_done(napi, work_done); + /* Enable RX/TX interrupt */ + iowrite16(ioread16(ioaddr + MIER) | RX_INTS | TX_INTS, + ioaddr + MIER); } return work_done; } @@ -670,7 +678,7 @@ static irqreturn_t r6040_interrupt(int irq, void *dev_id) } /* RX interrupt request */ - if (status & RX_INTS) { + if (status & (RX_INTS | TX_INTS)) { if (status & RX_NO_DESC) { /* RX descriptor unavailable */ dev->stats.rx_dropped++; @@ -681,15 +689,11 @@ static irqreturn_t r6040_interrupt(int irq, void *dev_id) if (likely(napi_schedule_prep(&lp->napi))) { /* Mask off RX interrupt */ - misr &= ~RX_INTS; - __napi_schedule(&lp->napi); + misr &= ~(RX_INTS | TX_INTS); + __napi_schedule_irqoff(&lp->napi); } } - /* TX interrupt request */ - if (status & TX_INTS) - r6040_tx(dev); - /* Restore RDC MAC interrupt */ iowrite16(misr, ioaddr + MIER); @@ -810,6 +814,9 @@ static netdev_tx_t r6040_start_xmit(struct sk_buff *skb, void __iomem *ioaddr = lp->base; unsigned long flags; + if (skb_put_padto(skb, ETH_ZLEN) < 0) + return NETDEV_TX_OK; + /* Critical Section */ spin_lock_irqsave(&lp->lock, flags); @@ -821,17 +828,10 @@ static netdev_tx_t r6040_start_xmit(struct sk_buff *skb, return NETDEV_TX_BUSY; } - /* Statistic Counter */ - dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; /* Set TX descriptor & Transmit it */ lp->tx_free_desc--; descptr = lp->tx_insert_ptr; - if (skb->len < ETH_ZLEN) - descptr->len = ETH_ZLEN; - else - descptr->len = skb->len; - + descptr->len = skb->len; descptr->skb_ptr = skb; descptr->buf = cpu_to_le32(pci_map_single(lp->pdev, skb->data, skb->len, PCI_DMA_TODEVICE)); @@ -840,7 +840,8 @@ static netdev_tx_t r6040_start_xmit(struct sk_buff *skb, skb_tx_timestamp(skb); /* Trigger the MAC to check the TX descriptor */ - iowrite16(TM2TX, ioaddr + MTPR); + if (!skb->xmit_more || netif_queue_stopped(dev)) + iowrite16(TM2TX, ioaddr + MTPR); lp->tx_insert_ptr = descptr->vndescp; /* If no tx resource, stop */ @@ -1001,14 +1002,8 @@ static void r6040_adjust_link(struct net_device *dev) lp->old_duplex = phydev->duplex; } - if (status_changed) { - pr_info("%s: link %s", dev->name, phydev->link ? - "UP" : "DOWN"); - if (phydev->link) - pr_cont(" - %d/%s", phydev->speed, - DUPLEX_FULL == phydev->duplex ? "full" : "half"); - pr_cont("\n"); - } + if (status_changed) + phy_print_status(phydev); } static int r6040_mii_probe(struct net_device *dev) diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 28b775e5a9ad..f0b09b05ed3f 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -1996,7 +1996,8 @@ static int rocker_port_change_proto_down(struct net_device *dev, return 0; } -static void rocker_port_neigh_destroy(struct neighbour *n) +static void rocker_port_neigh_destroy(struct net_device *dev, + struct neighbour *n) { struct rocker_port *rocker_port = netdev_priv(n->dev); int err; diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index b5ab5e120bca..ca3134540d2d 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -114,7 +114,6 @@ struct smsc911x_data { /* spinlock to ensure register accesses are serialised */ spinlock_t dev_lock; - struct phy_device *phy_dev; struct mii_bus *mii_bus; unsigned int using_extphy; int last_duplex; @@ -833,7 +832,7 @@ static int smsc911x_phy_reset(struct smsc911x_data *pdata) static int smsc911x_phy_loopbacktest(struct net_device *dev) { struct smsc911x_data *pdata = netdev_priv(dev); - struct phy_device *phy_dev = pdata->phy_dev; + struct phy_device *phy_dev = dev->phydev; int result = -EIO; unsigned int i, val; unsigned long flags; @@ -903,7 +902,8 @@ static int smsc911x_phy_loopbacktest(struct net_device *dev) static void smsc911x_phy_update_flowcontrol(struct smsc911x_data *pdata) { - struct phy_device *phy_dev = pdata->phy_dev; + struct net_device *ndev = pdata->dev; + struct phy_device *phy_dev = ndev->phydev; u32 afc = smsc911x_reg_read(pdata, AFC_CFG); u32 flow; unsigned long flags; @@ -944,7 +944,7 @@ static void smsc911x_phy_update_flowcontrol(struct smsc911x_data *pdata) static void smsc911x_phy_adjust_link(struct net_device *dev) { struct smsc911x_data *pdata = netdev_priv(dev); - struct phy_device *phy_dev = pdata->phy_dev; + struct phy_device *phy_dev = dev->phydev; unsigned long flags; int carrier; @@ -1037,7 +1037,6 @@ static int smsc911x_mii_probe(struct net_device *dev) SUPPORTED_Asym_Pause); phydev->advertising = phydev->supported; - pdata->phy_dev = phydev; pdata->last_duplex = -1; pdata->last_carrier = -1; @@ -1338,9 +1337,11 @@ static void smsc911x_rx_multicast_update_workaround(struct smsc911x_data *pdata) static int smsc911x_phy_general_power_up(struct smsc911x_data *pdata) { + struct net_device *ndev = pdata->dev; + struct phy_device *phy_dev = ndev->phydev; int rc = 0; - if (!pdata->phy_dev) + if (!phy_dev) return rc; /* If the internal PHY is in General Power-Down mode, all, except the @@ -1350,7 +1351,7 @@ static int smsc911x_phy_general_power_up(struct smsc911x_data *pdata) * In that case, clear the bit 0.11, so the PHY powers up and we can * access to the phy registers. */ - rc = phy_read(pdata->phy_dev, MII_BMCR); + rc = phy_read(phy_dev, MII_BMCR); if (rc < 0) { SMSC_WARN(pdata, drv, "Failed reading PHY control reg"); return rc; @@ -1360,7 +1361,7 @@ static int smsc911x_phy_general_power_up(struct smsc911x_data *pdata) * disable the general power down-mode. */ if (rc & BMCR_PDOWN) { - rc = phy_write(pdata->phy_dev, MII_BMCR, rc & ~BMCR_PDOWN); + rc = phy_write(phy_dev, MII_BMCR, rc & ~BMCR_PDOWN); if (rc < 0) { SMSC_WARN(pdata, drv, "Failed writing PHY control reg"); return rc; @@ -1374,12 +1375,14 @@ static int smsc911x_phy_general_power_up(struct smsc911x_data *pdata) static int smsc911x_phy_disable_energy_detect(struct smsc911x_data *pdata) { + struct net_device *ndev = pdata->dev; + struct phy_device *phy_dev = ndev->phydev; int rc = 0; - if (!pdata->phy_dev) + if (!phy_dev) return rc; - rc = phy_read(pdata->phy_dev, MII_LAN83C185_CTRL_STATUS); + rc = phy_read(phy_dev, MII_LAN83C185_CTRL_STATUS); if (rc < 0) { SMSC_WARN(pdata, drv, "Failed reading PHY control reg"); @@ -1389,7 +1392,7 @@ static int smsc911x_phy_disable_energy_detect(struct smsc911x_data *pdata) /* Only disable if energy detect mode is already enabled */ if (rc & MII_LAN83C185_EDPWRDOWN) { /* Disable energy detect mode for this SMSC Transceivers */ - rc = phy_write(pdata->phy_dev, MII_LAN83C185_CTRL_STATUS, + rc = phy_write(phy_dev, MII_LAN83C185_CTRL_STATUS, rc & (~MII_LAN83C185_EDPWRDOWN)); if (rc < 0) { @@ -1405,12 +1408,14 @@ static int smsc911x_phy_disable_energy_detect(struct smsc911x_data *pdata) static int smsc911x_phy_enable_energy_detect(struct smsc911x_data *pdata) { + struct net_device *ndev = pdata->dev; + struct phy_device *phy_dev = ndev->phydev; int rc = 0; - if (!pdata->phy_dev) + if (!phy_dev) return rc; - rc = phy_read(pdata->phy_dev, MII_LAN83C185_CTRL_STATUS); + rc = phy_read(phy_dev, MII_LAN83C185_CTRL_STATUS); if (rc < 0) { SMSC_WARN(pdata, drv, "Failed reading PHY control reg"); @@ -1420,7 +1425,7 @@ static int smsc911x_phy_enable_energy_detect(struct smsc911x_data *pdata) /* Only enable if energy detect mode is already disabled */ if (!(rc & MII_LAN83C185_EDPWRDOWN)) { /* Enable energy detect mode for this SMSC Transceivers */ - rc = phy_write(pdata->phy_dev, MII_LAN83C185_CTRL_STATUS, + rc = phy_write(phy_dev, MII_LAN83C185_CTRL_STATUS, rc | MII_LAN83C185_EDPWRDOWN); if (rc < 0) { @@ -1517,7 +1522,7 @@ static int smsc911x_open(struct net_device *dev) unsigned int intcfg; /* if the phy is not yet registered, retry later*/ - if (!pdata->phy_dev) { + if (!dev->phydev) { SMSC_WARN(pdata, hw, "phy_dev is NULL"); return -EAGAIN; } @@ -1608,7 +1613,7 @@ static int smsc911x_open(struct net_device *dev) pdata->last_carrier = -1; /* Bring the PHY up */ - phy_start(pdata->phy_dev); + phy_start(dev->phydev); temp = smsc911x_reg_read(pdata, HW_CFG); /* Preserve TX FIFO size and external PHY configuration */ @@ -1663,8 +1668,8 @@ static int smsc911x_stop(struct net_device *dev) smsc911x_tx_update_txcounters(dev); /* Bring the PHY down */ - if (pdata->phy_dev) - phy_stop(pdata->phy_dev); + if (dev->phydev) + phy_stop(dev->phydev); SMSC_TRACE(pdata, ifdown, "Interface stopped"); return 0; @@ -1904,30 +1909,10 @@ static int smsc911x_set_mac_address(struct net_device *dev, void *p) /* Standard ioctls for mii-tool */ static int smsc911x_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { - struct smsc911x_data *pdata = netdev_priv(dev); - - if (!netif_running(dev) || !pdata->phy_dev) + if (!netif_running(dev) || !dev->phydev) return -EINVAL; - return phy_mii_ioctl(pdata->phy_dev, ifr, cmd); -} - -static int -smsc911x_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd) -{ - struct smsc911x_data *pdata = netdev_priv(dev); - - cmd->maxtxpkt = 1; - cmd->maxrxpkt = 1; - return phy_ethtool_gset(pdata->phy_dev, cmd); -} - -static int -smsc911x_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd) -{ - struct smsc911x_data *pdata = netdev_priv(dev); - - return phy_ethtool_sset(pdata->phy_dev, cmd); + return phy_mii_ioctl(dev->phydev, ifr, cmd); } static void smsc911x_ethtool_getdrvinfo(struct net_device *dev, @@ -1941,9 +1926,7 @@ static void smsc911x_ethtool_getdrvinfo(struct net_device *dev, static int smsc911x_ethtool_nwayreset(struct net_device *dev) { - struct smsc911x_data *pdata = netdev_priv(dev); - - return phy_start_aneg(pdata->phy_dev); + return phy_start_aneg(dev->phydev); } static u32 smsc911x_ethtool_getmsglevel(struct net_device *dev) @@ -1969,7 +1952,7 @@ smsc911x_ethtool_getregs(struct net_device *dev, struct ethtool_regs *regs, void *buf) { struct smsc911x_data *pdata = netdev_priv(dev); - struct phy_device *phy_dev = pdata->phy_dev; + struct phy_device *phy_dev = dev->phydev; unsigned long flags; unsigned int i; unsigned int j = 0; @@ -2115,8 +2098,6 @@ static int smsc911x_ethtool_set_eeprom(struct net_device *dev, } static const struct ethtool_ops smsc911x_ethtool_ops = { - .get_settings = smsc911x_ethtool_getsettings, - .set_settings = smsc911x_ethtool_setsettings, .get_link = ethtool_op_get_link, .get_drvinfo = smsc911x_ethtool_getdrvinfo, .nway_reset = smsc911x_ethtool_nwayreset, @@ -2128,6 +2109,8 @@ static const struct ethtool_ops smsc911x_ethtool_ops = { .get_eeprom = smsc911x_ethtool_get_eeprom, .set_eeprom = smsc911x_ethtool_set_eeprom, .get_ts_info = ethtool_op_get_ts_info, + .get_link_ksettings = phy_ethtool_get_link_ksettings, + .set_link_ksettings = phy_ethtool_set_link_ksettings, }; static const struct net_device_ops smsc911x_netdev_ops = { @@ -2308,12 +2291,11 @@ static int smsc911x_drv_remove(struct platform_device *pdev) pdata = netdev_priv(dev); BUG_ON(!pdata); BUG_ON(!pdata->ioaddr); - BUG_ON(!pdata->phy_dev); + BUG_ON(!dev->phydev); SMSC_TRACE(pdata, ifdown, "Stopping driver"); - phy_disconnect(pdata->phy_dev); - pdata->phy_dev = NULL; + phy_disconnect(dev->phydev); mdiobus_unregister(pdata->mii_bus); mdiobus_free(pdata->mii_bus); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index aab777c1ba33..c23ccabc2d8a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2819,7 +2819,7 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id) priv->tx_path_in_lpi_mode = true; if (status & CORE_IRQ_TX_PATH_EXIT_LPI_MODE) priv->tx_path_in_lpi_mode = false; - if (status & CORE_IRQ_MTL_RX_OVERFLOW) + if (status & CORE_IRQ_MTL_RX_OVERFLOW && priv->hw->dma->set_rx_tail_ptr) priv->hw->dma->set_rx_tail_ptr(priv->ioaddr, priv->rx_tail_addr, STMMAC_CHAN0); diff --git a/drivers/net/ethernet/synopsys/dwc_eth_qos.c b/drivers/net/ethernet/synopsys/dwc_eth_qos.c index c14fa91c825f..c34111b390c7 100644 --- a/drivers/net/ethernet/synopsys/dwc_eth_qos.c +++ b/drivers/net/ethernet/synopsys/dwc_eth_qos.c @@ -46,7 +46,6 @@ #include <linux/delay.h> #include <linux/dma-mapping.h> #include <linux/vmalloc.h> -#include <linux/version.h> #include <linux/device.h> #include <linux/bitrev.h> diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index f56d66e6ec15..c6c54659f8d4 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -348,7 +348,6 @@ struct emac_priv { u32 rx_addr_type; const char *phy_id; struct device_node *phy_node; - struct phy_device *phydev; spinlock_t lock; /*platform specific members*/ void (*int_enable) (void); @@ -486,43 +485,6 @@ static void emac_get_drvinfo(struct net_device *ndev, } /** - * emac_get_settings - Get EMAC settings - * @ndev: The DaVinci EMAC network adapter - * @ecmd: ethtool command - * - * Executes ethool get command - * - */ -static int emac_get_settings(struct net_device *ndev, - struct ethtool_cmd *ecmd) -{ - struct emac_priv *priv = netdev_priv(ndev); - if (priv->phydev) - return phy_ethtool_gset(priv->phydev, ecmd); - else - return -EOPNOTSUPP; - -} - -/** - * emac_set_settings - Set EMAC settings - * @ndev: The DaVinci EMAC network adapter - * @ecmd: ethtool command - * - * Executes ethool set command - * - */ -static int emac_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd) -{ - struct emac_priv *priv = netdev_priv(ndev); - if (priv->phydev) - return phy_ethtool_sset(priv->phydev, ecmd); - else - return -EOPNOTSUPP; - -} - -/** * emac_get_coalesce - Get interrupt coalesce settings for this device * @ndev : The DaVinci EMAC network adapter * @coal : ethtool coalesce settings structure @@ -625,12 +587,12 @@ static int emac_set_coalesce(struct net_device *ndev, */ static const struct ethtool_ops ethtool_ops = { .get_drvinfo = emac_get_drvinfo, - .get_settings = emac_get_settings, - .set_settings = emac_set_settings, .get_link = ethtool_op_get_link, .get_coalesce = emac_get_coalesce, .set_coalesce = emac_set_coalesce, .get_ts_info = ethtool_op_get_ts_info, + .get_link_ksettings = phy_ethtool_get_link_ksettings, + .set_link_ksettings = phy_ethtool_set_link_ksettings, }; /** @@ -651,8 +613,8 @@ static void emac_update_phystatus(struct emac_priv *priv) mac_control = emac_read(EMAC_MACCONTROL); cur_duplex = (mac_control & EMAC_MACCONTROL_FULLDUPLEXEN) ? DUPLEX_FULL : DUPLEX_HALF; - if (priv->phydev) - new_duplex = priv->phydev->duplex; + if (ndev->phydev) + new_duplex = ndev->phydev->duplex; else new_duplex = DUPLEX_FULL; @@ -1454,7 +1416,7 @@ static void emac_poll_controller(struct net_device *ndev) static void emac_adjust_link(struct net_device *ndev) { struct emac_priv *priv = netdev_priv(ndev); - struct phy_device *phydev = priv->phydev; + struct phy_device *phydev = ndev->phydev; unsigned long flags; int new_state = 0; @@ -1483,7 +1445,7 @@ static void emac_adjust_link(struct net_device *ndev) } if (new_state) { emac_update_phystatus(priv); - phy_print_status(priv->phydev); + phy_print_status(ndev->phydev); } spin_unlock_irqrestore(&priv->lock, flags); @@ -1505,15 +1467,13 @@ static void emac_adjust_link(struct net_device *ndev) */ static int emac_devioctl(struct net_device *ndev, struct ifreq *ifrq, int cmd) { - struct emac_priv *priv = netdev_priv(ndev); - if (!(netif_running(ndev))) return -EINVAL; /* TODO: Add phy read and write and private statistics get feature */ - if (priv->phydev) - return phy_mii_ioctl(priv->phydev, ifrq, cmd); + if (ndev->phydev) + return phy_mii_ioctl(ndev->phydev, ifrq, cmd); else return -EOPNOTSUPP; } @@ -1542,6 +1502,7 @@ static int emac_dev_open(struct net_device *ndev) int res_num = 0, irq_num = 0; int i = 0; struct emac_priv *priv = netdev_priv(ndev); + struct phy_device *phydev = NULL; ret = pm_runtime_get_sync(&priv->pdev->dev); if (ret < 0) { @@ -1607,12 +1568,10 @@ static int emac_dev_open(struct net_device *ndev) cpdma_ctlr_start(priv->dma); - priv->phydev = NULL; - if (priv->phy_node) { - priv->phydev = of_phy_connect(ndev, priv->phy_node, - &emac_adjust_link, 0, 0); - if (!priv->phydev) { + phydev = of_phy_connect(ndev, priv->phy_node, + &emac_adjust_link, 0, 0); + if (!phydev) { dev_err(emac_dev, "could not connect to phy %s\n", priv->phy_node->full_name); ret = -ENODEV; @@ -1621,7 +1580,7 @@ static int emac_dev_open(struct net_device *ndev) } /* use the first phy on the bus if pdata did not give us a phy id */ - if (!priv->phydev && !priv->phy_id) { + if (!phydev && !priv->phy_id) { struct device *phy; phy = bus_find_device(&mdio_bus_type, NULL, NULL, @@ -1630,16 +1589,15 @@ static int emac_dev_open(struct net_device *ndev) priv->phy_id = dev_name(phy); } - if (!priv->phydev && priv->phy_id && *priv->phy_id) { - priv->phydev = phy_connect(ndev, priv->phy_id, - &emac_adjust_link, - PHY_INTERFACE_MODE_MII); + if (!phydev && priv->phy_id && *priv->phy_id) { + phydev = phy_connect(ndev, priv->phy_id, + &emac_adjust_link, + PHY_INTERFACE_MODE_MII); - if (IS_ERR(priv->phydev)) { + if (IS_ERR(phydev)) { dev_err(emac_dev, "could not connect to phy %s\n", priv->phy_id); - ret = PTR_ERR(priv->phydev); - priv->phydev = NULL; + ret = PTR_ERR(phydev); goto err; } @@ -1647,10 +1605,10 @@ static int emac_dev_open(struct net_device *ndev) priv->speed = 0; priv->duplex = ~0; - phy_attached_info(priv->phydev); + phy_attached_info(phydev); } - if (!priv->phydev) { + if (!phydev) { /* No PHY , fix the link, speed and duplex settings */ dev_notice(emac_dev, "no phy, defaulting to 100/full\n"); priv->link = 1; @@ -1665,8 +1623,8 @@ static int emac_dev_open(struct net_device *ndev) if (netif_msg_drv(priv)) dev_notice(emac_dev, "DaVinci EMAC: Opened %s\n", ndev->name); - if (priv->phydev) - phy_start(priv->phydev); + if (phydev) + phy_start(phydev); return 0; @@ -1717,8 +1675,8 @@ static int emac_dev_stop(struct net_device *ndev) cpdma_ctlr_stop(priv->dma); emac_write(EMAC_SOFTRESET, 1); - if (priv->phydev) - phy_disconnect(priv->phydev); + if (ndev->phydev) + phy_disconnect(ndev->phydev); /* Free IRQ */ while ((res = platform_get_resource(priv->pdev, IORESOURCE_IRQ, i))) { diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index 5138407941cf..7f127dc1b7ba 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -171,7 +171,6 @@ struct port { struct npe *npe; struct net_device *netdev; struct napi_struct napi; - struct phy_device *phydev; struct eth_plat_info *plat; buffer_t *rx_buff_tab[RX_DESCS], *tx_buff_tab[TX_DESCS]; struct desc *desc_tab; /* coherent */ @@ -562,7 +561,7 @@ static void ixp4xx_mdio_remove(void) static void ixp4xx_adjust_link(struct net_device *dev) { struct port *port = netdev_priv(dev); - struct phy_device *phydev = port->phydev; + struct phy_device *phydev = dev->phydev; if (!phydev->link) { if (port->speed) { @@ -976,8 +975,6 @@ static void eth_set_mcast_list(struct net_device *dev) static int eth_ioctl(struct net_device *dev, struct ifreq *req, int cmd) { - struct port *port = netdev_priv(dev); - if (!netif_running(dev)) return -EINVAL; @@ -988,7 +985,7 @@ static int eth_ioctl(struct net_device *dev, struct ifreq *req, int cmd) return hwtstamp_get(dev, req); } - return phy_mii_ioctl(port->phydev, req, cmd); + return phy_mii_ioctl(dev->phydev, req, cmd); } /* ethtool support */ @@ -1005,22 +1002,9 @@ static void ixp4xx_get_drvinfo(struct net_device *dev, strlcpy(info->bus_info, "internal", sizeof(info->bus_info)); } -static int ixp4xx_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) -{ - struct port *port = netdev_priv(dev); - return phy_ethtool_gset(port->phydev, cmd); -} - -static int ixp4xx_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) -{ - struct port *port = netdev_priv(dev); - return phy_ethtool_sset(port->phydev, cmd); -} - static int ixp4xx_nway_reset(struct net_device *dev) { - struct port *port = netdev_priv(dev); - return phy_start_aneg(port->phydev); + return phy_start_aneg(dev->phydev); } int ixp46x_phc_index = -1; @@ -1054,11 +1038,11 @@ static int ixp4xx_get_ts_info(struct net_device *dev, static const struct ethtool_ops ixp4xx_ethtool_ops = { .get_drvinfo = ixp4xx_get_drvinfo, - .get_settings = ixp4xx_get_settings, - .set_settings = ixp4xx_set_settings, .nway_reset = ixp4xx_nway_reset, .get_link = ethtool_op_get_link, .get_ts_info = ixp4xx_get_ts_info, + .get_link_ksettings = phy_ethtool_get_link_ksettings, + .set_link_ksettings = phy_ethtool_set_link_ksettings, }; @@ -1259,7 +1243,7 @@ static int eth_open(struct net_device *dev) } port->speed = 0; /* force "link up" message */ - phy_start(port->phydev); + phy_start(dev->phydev); for (i = 0; i < ETH_ALEN; i++) __raw_writel(dev->dev_addr[i], &port->regs->hw_addr[i]); @@ -1380,7 +1364,7 @@ static int eth_close(struct net_device *dev) printk(KERN_CRIT "%s: unable to disable loopback\n", dev->name); - phy_stop(port->phydev); + phy_stop(dev->phydev); if (!ports_open) qmgr_disable_irq(TXDONE_QUEUE); @@ -1405,6 +1389,7 @@ static int eth_init_one(struct platform_device *pdev) struct port *port; struct net_device *dev; struct eth_plat_info *plat = dev_get_platdata(&pdev->dev); + struct phy_device *phydev = NULL; u32 regs_phys; char phy_id[MII_BUS_ID_SIZE + 3]; int err; @@ -1466,14 +1451,14 @@ static int eth_init_one(struct platform_device *pdev) snprintf(phy_id, MII_BUS_ID_SIZE + 3, PHY_ID_FMT, mdio_bus->id, plat->phy); - port->phydev = phy_connect(dev, phy_id, &ixp4xx_adjust_link, - PHY_INTERFACE_MODE_MII); - if (IS_ERR(port->phydev)) { - err = PTR_ERR(port->phydev); + phydev = phy_connect(dev, phy_id, &ixp4xx_adjust_link, + PHY_INTERFACE_MODE_MII); + if (IS_ERR(phydev)) { + err = PTR_ERR(phydev); goto err_free_mem; } - port->phydev->irq = PHY_POLL; + phydev->irq = PHY_POLL; if ((err = register_netdev(dev))) goto err_phy_dis; @@ -1484,7 +1469,7 @@ static int eth_init_one(struct platform_device *pdev) return 0; err_phy_dis: - phy_disconnect(port->phydev); + phy_disconnect(phydev); err_free_mem: npe_port_tab[NPE_ID(port->id)] = NULL; release_resource(port->mem_res); @@ -1498,10 +1483,11 @@ err_free: static int eth_remove_one(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); + struct phy_device *phydev = dev->phydev; struct port *port = netdev_priv(dev); unregister_netdev(dev); - phy_disconnect(port->phydev); + phy_disconnect(phydev); npe_port_tab[NPE_ID(port->id)] = NULL; npe_release(port->npe); release_resource(port->mem_res); diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 310e0b9c2657..5de892f3c0e0 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1036,12 +1036,17 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) static int __geneve_change_mtu(struct net_device *dev, int new_mtu, bool strict) { + struct geneve_dev *geneve = netdev_priv(dev); /* The max_mtu calculation does not take account of GENEVE * options, to avoid excluding potentially valid * configurations. */ - int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - sizeof(struct iphdr) - - dev->hard_header_len; + int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len; + + if (geneve->remote.sa.sa_family == AF_INET6) + max_mtu -= sizeof(struct ipv6hdr); + else + max_mtu -= sizeof(struct iphdr); if (new_mtu < 68) return -EINVAL; diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 0e7eff7f1cd2..8bcd78f94966 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2640,6 +2640,7 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb, u64_stats_update_begin(&secy_stats->syncp); secy_stats->stats.OutPktsUntagged++; u64_stats_update_end(&secy_stats->syncp); + skb->dev = macsec->real_dev; len = skb->len; ret = dev_queue_xmit(skb); count_tx(dev, ret, len); diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index 2afa61b51d41..91177a4a32ad 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -57,6 +57,7 @@ /* PHY CTRL bits */ #define DP83867_PHYCR_FIFO_DEPTH_SHIFT 14 +#define DP83867_PHYCR_FIFO_DEPTH_MASK (3 << 14) /* RGMIIDCTL bits */ #define DP83867_RGMII_TX_CLK_DELAY_SHIFT 4 @@ -133,8 +134,8 @@ static int dp83867_of_init(struct phy_device *phydev) static int dp83867_config_init(struct phy_device *phydev) { struct dp83867_private *dp83867; - int ret; - u16 val, delay; + int ret, val; + u16 delay; if (!phydev->priv) { dp83867 = devm_kzalloc(&phydev->mdio.dev, sizeof(*dp83867), @@ -151,8 +152,12 @@ static int dp83867_config_init(struct phy_device *phydev) } if (phy_interface_is_rgmii(phydev)) { - ret = phy_write(phydev, MII_DP83867_PHYCTRL, - (dp83867->fifo_depth << DP83867_PHYCR_FIFO_DEPTH_SHIFT)); + val = phy_read(phydev, MII_DP83867_PHYCTRL); + if (val < 0) + return val; + val &= ~DP83867_PHYCR_FIFO_DEPTH_MASK; + val |= (dp83867->fifo_depth << DP83867_PHYCR_FIFO_DEPTH_SHIFT); + ret = phy_write(phydev, MII_DP83867_PHYCTRL, val); if (ret) return ret; } diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index f9eebea83516..a380649bf6b5 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2002,6 +2002,8 @@ static const struct net_device_ops team_netdev_ops = { .ndo_add_slave = team_add_slave, .ndo_del_slave = team_del_slave, .ndo_fix_features = team_fix_features, + .ndo_neigh_construct = netdev_default_l2upper_neigh_construct, + .ndo_neigh_destroy = netdev_default_l2upper_neigh_destroy, .ndo_change_carrier = team_change_carrier, .ndo_bridge_setlink = switchdev_port_bridge_setlink, .ndo_bridge_getlink = switchdev_port_bridge_getlink, diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 4884802e0af1..5eadb7a1ad7b 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -71,6 +71,7 @@ #include <net/sock.h> #include <linux/seq_file.h> #include <linux/uio.h> +#include <linux/skb_array.h> #include <asm/uaccess.h> @@ -167,6 +168,7 @@ struct tun_file { }; struct list_head next; struct tun_struct *detached; + struct skb_array tx_array; }; struct tun_flow_entry { @@ -515,7 +517,11 @@ static struct tun_struct *tun_enable_queue(struct tun_file *tfile) static void tun_queue_purge(struct tun_file *tfile) { - skb_queue_purge(&tfile->sk.sk_receive_queue); + struct sk_buff *skb; + + while ((skb = skb_array_consume(&tfile->tx_array)) != NULL) + kfree_skb(skb); + skb_queue_purge(&tfile->sk.sk_error_queue); } @@ -560,6 +566,8 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun->dev->reg_state == NETREG_REGISTERED) unregister_netdevice(tun->dev); } + if (tun) + skb_array_cleanup(&tfile->tx_array); sock_put(&tfile->sk); } } @@ -613,6 +621,7 @@ static void tun_detach_all(struct net_device *dev) static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filter) { struct tun_file *tfile = file->private_data; + struct net_device *dev = tun->dev; int err; err = security_tun_dev_attach(tfile->socket.sk, tun->security); @@ -642,6 +651,13 @@ static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filte if (!err) goto out; } + + if (!tfile->detached && + skb_array_init(&tfile->tx_array, dev->tx_queue_len, GFP_KERNEL)) { + err = -ENOMEM; + goto out; + } + tfile->queue_index = tun->numqueues; tfile->socket.sk->sk_shutdown &= ~RCV_SHUTDOWN; rcu_assign_pointer(tfile->tun, tun); @@ -891,8 +907,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) nf_reset(skb); - /* Enqueue packet */ - skb_queue_tail(&tfile->socket.sk->sk_receive_queue, skb); + if (skb_array_produce(&tfile->tx_array, skb)) + goto drop; /* Notify and wake up reader process */ if (tfile->flags & TUN_FASYNC) @@ -1107,7 +1123,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait) poll_wait(file, sk_sleep(sk), wait); - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_array_empty(&tfile->tx_array)) mask |= POLLIN | POLLRDNORM; if (sock_writeable(sk) || @@ -1426,22 +1442,63 @@ done: return total; } +static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock, + int *err) +{ + DECLARE_WAITQUEUE(wait, current); + struct sk_buff *skb = NULL; + int error = 0; + + skb = skb_array_consume(&tfile->tx_array); + if (skb) + goto out; + if (noblock) { + error = -EAGAIN; + goto out; + } + + add_wait_queue(&tfile->wq.wait, &wait); + current->state = TASK_INTERRUPTIBLE; + + while (1) { + skb = skb_array_consume(&tfile->tx_array); + if (skb) + break; + if (signal_pending(current)) { + error = -ERESTARTSYS; + break; + } + if (tfile->socket.sk->sk_shutdown & RCV_SHUTDOWN) { + error = -EFAULT; + break; + } + + schedule(); + } + + current->state = TASK_RUNNING; + remove_wait_queue(&tfile->wq.wait, &wait); + +out: + *err = error; + return skb; +} + static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, struct iov_iter *to, int noblock) { struct sk_buff *skb; ssize_t ret; - int peeked, err, off = 0; + int err; tun_debug(KERN_INFO, tun, "tun_do_read\n"); if (!iov_iter_count(to)) return 0; - /* Read frames from queue */ - skb = __skb_recv_datagram(tfile->socket.sk, noblock ? MSG_DONTWAIT : 0, - &peeked, &off, &err); + /* Read frames from ring */ + skb = tun_ring_recv(tfile, noblock, &err); if (!skb) return err; @@ -1574,8 +1631,25 @@ out: return ret; } +static int tun_peek_len(struct socket *sock) +{ + struct tun_file *tfile = container_of(sock, struct tun_file, socket); + struct tun_struct *tun; + int ret = 0; + + tun = __tun_get(tfile); + if (!tun) + return 0; + + ret = skb_array_peek_len(&tfile->tx_array); + tun_put(tun); + + return ret; +} + /* Ops structure to mimic raw sockets with tun */ static const struct proto_ops tun_socket_ops = { + .peek_len = tun_peek_len, .sendmsg = tun_sendmsg, .recvmsg = tun_recvmsg, }; @@ -2397,6 +2471,53 @@ static const struct ethtool_ops tun_ethtool_ops = { .get_ts_info = ethtool_op_get_ts_info, }; +static int tun_queue_resize(struct tun_struct *tun) +{ + struct net_device *dev = tun->dev; + struct tun_file *tfile; + struct skb_array **arrays; + int n = tun->numqueues + tun->numdisabled; + int ret, i; + + arrays = kmalloc(sizeof *arrays * n, GFP_KERNEL); + if (!arrays) + return -ENOMEM; + + for (i = 0; i < tun->numqueues; i++) { + tfile = rtnl_dereference(tun->tfiles[i]); + arrays[i] = &tfile->tx_array; + } + list_for_each_entry(tfile, &tun->disabled, next) + arrays[i++] = &tfile->tx_array; + + ret = skb_array_resize_multiple(arrays, n, + dev->tx_queue_len, GFP_KERNEL); + + kfree(arrays); + return ret; +} + +static int tun_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct tun_struct *tun = netdev_priv(dev); + + switch (event) { + case NETDEV_CHANGE_TX_QUEUE_LEN: + if (tun_queue_resize(tun)) + return NOTIFY_BAD; + break; + default: + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block tun_notifier_block __read_mostly = { + .notifier_call = tun_device_event, +}; static int __init tun_init(void) { @@ -2416,6 +2537,8 @@ static int __init tun_init(void) pr_err("Can't register misc device %d\n", TUN_MINOR); goto err_misc; } + + register_netdevice_notifier(&tun_notifier_block); return 0; err_misc: rtnl_link_unregister(&tun_link_ops); @@ -2427,6 +2550,7 @@ static void tun_cleanup(void) { misc_deregister(&tun_miscdev); rtnl_link_unregister(&tun_link_ops); + unregister_netdevice_notifier(&tun_notifier_block); } /* Get an underlying socket object from tun file. Returns error unless file is diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 53759c315b97..877c9516e781 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -854,6 +854,13 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_ if (cdc_ncm_init(dev)) goto error2; + /* Some firmwares need a pause here or they will silently fail + * to set up the interface properly. This value was decided + * empirically on a Sierra Wireless MC7455 running 02.08.02.00 + * firmware. + */ + usleep_range(10000, 20000); + /* configure data interface */ temp = usb_set_interface(dev->udev, iface_no, data_altsetting); if (temp) { diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 24d367280ecf..b225bc27fbe2 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -31,7 +31,7 @@ #define NETNEXT_VERSION "08" /* Information for net */ -#define NET_VERSION "4" +#define NET_VERSION "5" #define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION #define DRIVER_AUTHOR "Realtek linux nic maintainers <[email protected]>" @@ -625,6 +625,7 @@ struct r8152 { int (*eee_set)(struct r8152 *, struct ethtool_eee *); bool (*in_nway)(struct r8152 *); void (*hw_phy_cfg)(struct r8152 *); + void (*autosuspend_en)(struct r8152 *tp, bool enable); } rtl_ops; int intr_interval; @@ -2412,9 +2413,6 @@ static void rtl_runtime_suspend_enable(struct r8152 *tp, bool enable) if (enable) { u32 ocp_data; - r8153_u1u2en(tp, false); - r8153_u2p3en(tp, false); - __rtl_set_wol(tp, WAKE_ANY); ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_CONFIG); @@ -2425,7 +2423,28 @@ static void rtl_runtime_suspend_enable(struct r8152 *tp, bool enable) ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML); } else { + u32 ocp_data; + __rtl_set_wol(tp, tp->saved_wolopts); + + ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_CONFIG); + + ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_CONFIG34); + ocp_data &= ~LINK_OFF_WAKE_EN; + ocp_write_word(tp, MCU_TYPE_PLA, PLA_CONFIG34, ocp_data); + + ocp_write_byte(tp, MCU_TYPE_PLA, PLA_CRWECR, CRWECR_NORAML); + } +} + +static void rtl8153_runtime_enable(struct r8152 *tp, bool enable) +{ + rtl_runtime_suspend_enable(tp, enable); + + if (enable) { + r8153_u1u2en(tp, false); + r8153_u2p3en(tp, false); + } else { r8153_u2p3en(tp, true); r8153_u1u2en(tp, true); } @@ -3530,7 +3549,7 @@ static int rtl8152_suspend(struct usb_interface *intf, pm_message_t message) napi_disable(&tp->napi); if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) { rtl_stop_rx(tp); - rtl_runtime_suspend_enable(tp, true); + tp->rtl_ops.autosuspend_en(tp, true); } else { cancel_delayed_work_sync(&tp->schedule); tp->rtl_ops.down(tp); @@ -3557,7 +3576,7 @@ static int rtl8152_resume(struct usb_interface *intf) if (netif_running(tp->netdev) && tp->netdev->flags & IFF_UP) { if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) { - rtl_runtime_suspend_enable(tp, false); + tp->rtl_ops.autosuspend_en(tp, false); clear_bit(SELECTIVE_SUSPEND, &tp->flags); napi_disable(&tp->napi); set_bit(WORK_ENABLE, &tp->flags); @@ -3572,7 +3591,7 @@ static int rtl8152_resume(struct usb_interface *intf) usb_submit_urb(tp->intr_urb, GFP_KERNEL); } else if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) { if (tp->netdev->flags & IFF_UP) - rtl_runtime_suspend_enable(tp, false); + tp->rtl_ops.autosuspend_en(tp, false); clear_bit(SELECTIVE_SUSPEND, &tp->flags); } @@ -4158,6 +4177,7 @@ static int rtl_ops_init(struct r8152 *tp) ops->eee_set = r8152_set_eee; ops->in_nway = rtl8152_in_nway; ops->hw_phy_cfg = r8152b_hw_phy_cfg; + ops->autosuspend_en = rtl_runtime_suspend_enable; break; case RTL_VER_03: @@ -4174,6 +4194,7 @@ static int rtl_ops_init(struct r8152 *tp) ops->eee_set = r8153_set_eee; ops->in_nway = rtl8153_in_nway; ops->hw_phy_cfg = r8153_hw_phy_cfg; + ops->autosuspend_en = rtl8153_runtime_enable; break; default: diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 61ba46404937..6086a0163249 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -395,8 +395,11 @@ int usbnet_change_mtu (struct net_device *net, int new_mtu) dev->hard_mtu = net->mtu + net->hard_header_len; if (dev->rx_urb_size == old_hard_mtu) { dev->rx_urb_size = dev->hard_mtu; - if (dev->rx_urb_size > old_rx_urb_size) + if (dev->rx_urb_size > old_rx_urb_size) { + usbnet_pause_rx(dev); usbnet_unlink_rx_urbs(dev); + usbnet_resume_rx(dev); + } } /* max qlen depend on hard_mtu and rx_urb_size */ @@ -1508,8 +1511,9 @@ static void usbnet_bh (unsigned long param) } else if (netif_running (dev->net) && netif_device_present (dev->net) && netif_carrier_ok(dev->net) && - !timer_pending (&dev->delay) && - !test_bit (EVENT_RX_HALT, &dev->flags)) { + !timer_pending(&dev->delay) && + !test_bit(EVENT_RX_PAUSED, &dev->flags) && + !test_bit(EVENT_RX_HALT, &dev->flags)) { int temp = dev->rxq.qlen; if (temp < RX_QLEN(dev)) { diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index b3762822b653..1ce7420322ee 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -779,6 +779,25 @@ static int vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4) return rc; } +static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + return 0; +} + +static struct sk_buff *vrf_rcv_nfhook(u8 pf, unsigned int hook, + struct sk_buff *skb, + struct net_device *dev) +{ + struct net *net = dev_net(dev); + + nf_reset(skb); + + if (NF_HOOK(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) < 0) + skb = NULL; /* kfree_skb(skb) handled by nf code */ + + return skb; +} + #if IS_ENABLED(CONFIG_IPV6) /* neighbor handling is done with actual device; do not want * to flip skb->dev for those ndisc packets. This really fails @@ -899,6 +918,7 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, if (need_strict) vrf_ip6_input_dst(skb, vrf_dev, orig_iif); + skb = vrf_rcv_nfhook(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, vrf_dev); out: return skb; } @@ -929,6 +949,7 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, dev_queue_xmit_nit(skb, vrf_dev); skb_pull(skb, skb->mac_len); + skb = vrf_rcv_nfhook(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, vrf_dev); out: return skb; } diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index f7718ec685fa..cea8350fbc7e 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -344,6 +344,8 @@ struct device *nd_pfn_create(struct nd_region *nd_region) int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) { u64 checksum, offset; + unsigned long align; + enum nd_pfn_mode mode; struct nd_namespace_io *nsio; struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; struct nd_namespace_common *ndns = nd_pfn->ndns; @@ -386,22 +388,50 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) return -ENXIO; } + align = le32_to_cpu(pfn_sb->align); + offset = le64_to_cpu(pfn_sb->dataoff); + if (align == 0) + align = 1UL << ilog2(offset); + mode = le32_to_cpu(pfn_sb->mode); + if (!nd_pfn->uuid) { - /* from probe we allocate */ + /* + * When probing a namepace via nd_pfn_probe() the uuid + * is NULL (see: nd_pfn_devinit()) we init settings from + * pfn_sb + */ nd_pfn->uuid = kmemdup(pfn_sb->uuid, 16, GFP_KERNEL); if (!nd_pfn->uuid) return -ENOMEM; + nd_pfn->align = align; + nd_pfn->mode = mode; } else { - /* from init we validate */ + /* + * When probing a pfn / dax instance we validate the + * live settings against the pfn_sb + */ if (memcmp(nd_pfn->uuid, pfn_sb->uuid, 16) != 0) return -ENODEV; + + /* + * If the uuid validates, but other settings mismatch + * return EINVAL because userspace has managed to change + * the configuration without specifying new + * identification. + */ + if (nd_pfn->align != align || nd_pfn->mode != mode) { + dev_err(&nd_pfn->dev, + "init failed, settings mismatch\n"); + dev_dbg(&nd_pfn->dev, "align: %lx:%lx mode: %d:%d\n", + nd_pfn->align, align, nd_pfn->mode, + mode); + return -EINVAL; + } } - if (nd_pfn->align == 0) - nd_pfn->align = le32_to_cpu(pfn_sb->align); - if (nd_pfn->align > nvdimm_namespace_capacity(ndns)) { + if (align > nvdimm_namespace_capacity(ndns)) { dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n", - nd_pfn->align, nvdimm_namespace_capacity(ndns)); + align, nvdimm_namespace_capacity(ndns)); return -EINVAL; } @@ -411,7 +441,6 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) * namespace has changed since the pfn superblock was * established. */ - offset = le64_to_cpu(pfn_sb->dataoff); nsio = to_nd_namespace_io(&ndns->dev); if (offset >= resource_size(&nsio->res)) { dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n", @@ -419,10 +448,11 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) return -EBUSY; } - if ((nd_pfn->align && !IS_ALIGNED(offset, nd_pfn->align)) + if ((align && !IS_ALIGNED(offset, align)) || !IS_ALIGNED(offset, PAGE_SIZE)) { - dev_err(&nd_pfn->dev, "bad offset: %#llx dax disabled\n", - offset); + dev_err(&nd_pfn->dev, + "bad offset: %#llx dax disabled align: %#lx\n", + offset, align); return -ENXIO; } @@ -502,7 +532,6 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn, res->start += start_pad; res->end -= end_trunc; - nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode); if (nd_pfn->mode == PFN_MODE_RAM) { if (offset < SZ_8K) return ERR_PTR(-EINVAL); diff --git a/drivers/phy/phy-bcm-ns-usb2.c b/drivers/phy/phy-bcm-ns-usb2.c index 95ab6b2a0de5..58dff80e9386 100644 --- a/drivers/phy/phy-bcm-ns-usb2.c +++ b/drivers/phy/phy-bcm-ns-usb2.c @@ -109,8 +109,8 @@ static int bcm_ns_usb2_probe(struct platform_device *pdev) } usb2->phy = devm_phy_create(dev, NULL, &ops); - if (IS_ERR(dev)) - return PTR_ERR(dev); + if (IS_ERR(usb2->phy)) + return PTR_ERR(usb2->phy); phy_set_drvdata(usb2->phy, usb2); platform_set_drvdata(pdev, usb2); diff --git a/drivers/phy/phy-miphy28lp.c b/drivers/phy/phy-miphy28lp.c index 3acd2a1808df..213e2e15339c 100644 --- a/drivers/phy/phy-miphy28lp.c +++ b/drivers/phy/phy-miphy28lp.c @@ -1143,7 +1143,8 @@ static int miphy28lp_probe_resets(struct device_node *node, struct miphy28lp_dev *miphy_dev = miphy_phy->phydev; int err; - miphy_phy->miphy_rst = of_reset_control_get(node, "miphy-sw-rst"); + miphy_phy->miphy_rst = + of_reset_control_get_shared(node, "miphy-sw-rst"); if (IS_ERR(miphy_phy->miphy_rst)) { dev_err(miphy_dev->dev, diff --git a/drivers/phy/phy-rcar-gen3-usb2.c b/drivers/phy/phy-rcar-gen3-usb2.c index 76bb88f0700a..4be3f5dbbc9f 100644 --- a/drivers/phy/phy-rcar-gen3-usb2.c +++ b/drivers/phy/phy-rcar-gen3-usb2.c @@ -144,12 +144,6 @@ static void rcar_gen3_init_for_peri(struct rcar_gen3_chan *ch) extcon_set_cable_state_(ch->extcon, EXTCON_USB, true); } -static bool rcar_gen3_check_vbus(struct rcar_gen3_chan *ch) -{ - return !!(readl(ch->base + USB2_ADPCTRL) & - USB2_ADPCTRL_OTGSESSVLD); -} - static bool rcar_gen3_check_id(struct rcar_gen3_chan *ch) { return !!(readl(ch->base + USB2_ADPCTRL) & USB2_ADPCTRL_IDDIG); @@ -157,13 +151,7 @@ static bool rcar_gen3_check_id(struct rcar_gen3_chan *ch) static void rcar_gen3_device_recognition(struct rcar_gen3_chan *ch) { - bool is_host = true; - - /* B-device? */ - if (rcar_gen3_check_id(ch) && rcar_gen3_check_vbus(ch)) - is_host = false; - - if (is_host) + if (!rcar_gen3_check_id(ch)) rcar_gen3_init_for_host(ch); else rcar_gen3_init_for_peri(ch); diff --git a/drivers/phy/phy-rockchip-dp.c b/drivers/phy/phy-rockchip-dp.c index 793ecb6d87bc..8b267a746576 100644 --- a/drivers/phy/phy-rockchip-dp.c +++ b/drivers/phy/phy-rockchip-dp.c @@ -90,7 +90,7 @@ static int rockchip_dp_phy_probe(struct platform_device *pdev) return -ENODEV; dp = devm_kzalloc(dev, sizeof(*dp), GFP_KERNEL); - if (IS_ERR(dp)) + if (!dp) return -ENOMEM; dp->dev = dev; diff --git a/drivers/phy/phy-stih407-usb.c b/drivers/phy/phy-stih407-usb.c index 1d5ae5f8ef69..b1f44ab669fb 100644 --- a/drivers/phy/phy-stih407-usb.c +++ b/drivers/phy/phy-stih407-usb.c @@ -105,13 +105,13 @@ static int stih407_usb2_picophy_probe(struct platform_device *pdev) phy_dev->dev = dev; dev_set_drvdata(dev, phy_dev); - phy_dev->rstc = devm_reset_control_get(dev, "global"); + phy_dev->rstc = devm_reset_control_get_shared(dev, "global"); if (IS_ERR(phy_dev->rstc)) { dev_err(dev, "failed to ctrl picoPHY reset\n"); return PTR_ERR(phy_dev->rstc); } - phy_dev->rstport = devm_reset_control_get(dev, "port"); + phy_dev->rstport = devm_reset_control_get_exclusive(dev, "port"); if (IS_ERR(phy_dev->rstport)) { dev_err(dev, "failed to ctrl picoPHY reset\n"); return PTR_ERR(phy_dev->rstport); diff --git a/drivers/phy/phy-sun4i-usb.c b/drivers/phy/phy-sun4i-usb.c index bae54f7a1f48..de3101fbbf40 100644 --- a/drivers/phy/phy-sun4i-usb.c +++ b/drivers/phy/phy-sun4i-usb.c @@ -175,7 +175,7 @@ static void sun4i_usb_phy_write(struct sun4i_usb_phy *phy, u32 addr, u32 data, { struct sun4i_usb_phy_data *phy_data = to_sun4i_usb_phy_data(phy); u32 temp, usbc_bit = BIT(phy->index * 2); - void *phyctl = phy_data->base + phy_data->cfg->phyctl_offset; + void __iomem *phyctl = phy_data->base + phy_data->cfg->phyctl_offset; int i; mutex_lock(&phy_data->mutex); @@ -514,9 +514,9 @@ static int sun4i_usb_phy_remove(struct platform_device *pdev) if (data->vbus_power_nb_registered) power_supply_unreg_notifier(&data->vbus_power_nb); - if (data->id_det_irq >= 0) + if (data->id_det_irq > 0) devm_free_irq(dev, data->id_det_irq, data); - if (data->vbus_det_irq >= 0) + if (data->vbus_det_irq > 0) devm_free_irq(dev, data->vbus_det_irq, data); cancel_delayed_work_sync(&data->detect); @@ -645,11 +645,11 @@ static int sun4i_usb_phy_probe(struct platform_device *pdev) data->id_det_irq = gpiod_to_irq(data->id_det_gpio); data->vbus_det_irq = gpiod_to_irq(data->vbus_det_gpio); - if ((data->id_det_gpio && data->id_det_irq < 0) || - (data->vbus_det_gpio && data->vbus_det_irq < 0)) + if ((data->id_det_gpio && data->id_det_irq <= 0) || + (data->vbus_det_gpio && data->vbus_det_irq <= 0)) data->phy0_poll = true; - if (data->id_det_irq >= 0) { + if (data->id_det_irq > 0) { ret = devm_request_irq(dev, data->id_det_irq, sun4i_usb_phy0_id_vbus_det_irq, IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, @@ -660,7 +660,7 @@ static int sun4i_usb_phy_probe(struct platform_device *pdev) } } - if (data->vbus_det_irq >= 0) { + if (data->vbus_det_irq > 0) { ret = devm_request_irq(dev, data->vbus_det_irq, sun4i_usb_phy0_id_vbus_det_irq, IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, diff --git a/drivers/platform/chrome/cros_ec_dev.c b/drivers/platform/chrome/cros_ec_dev.c index 6d8ee3b15872..8abd80dbcbed 100644 --- a/drivers/platform/chrome/cros_ec_dev.c +++ b/drivers/platform/chrome/cros_ec_dev.c @@ -151,13 +151,19 @@ static long ec_device_ioctl_xcmd(struct cros_ec_dev *ec, void __user *arg) goto exit; } + if (u_cmd.outsize != s_cmd->outsize || + u_cmd.insize != s_cmd->insize) { + ret = -EINVAL; + goto exit; + } + s_cmd->command += ec->cmd_offset; ret = cros_ec_cmd_xfer(ec->ec_dev, s_cmd); /* Only copy data to userland if data was received. */ if (ret < 0) goto exit; - if (copy_to_user(arg, s_cmd, sizeof(*s_cmd) + u_cmd.insize)) + if (copy_to_user(arg, s_cmd, sizeof(*s_cmd) + s_cmd->insize)) ret = -EFAULT; exit: kfree(s_cmd); diff --git a/drivers/regulator/anatop-regulator.c b/drivers/regulator/anatop-regulator.c index 63cd5e68c864..3a6d0290c54c 100644 --- a/drivers/regulator/anatop-regulator.c +++ b/drivers/regulator/anatop-regulator.c @@ -296,7 +296,7 @@ static int anatop_regulator_probe(struct platform_device *pdev) if (!sreg->sel && !strcmp(sreg->name, "vddpu")) sreg->sel = 22; - if (!sreg->sel) { + if (!sreg->bypass && !sreg->sel) { dev_err(&pdev->dev, "Failed to read a valid default voltage selector.\n"); return -EINVAL; } diff --git a/drivers/regulator/max77620-regulator.c b/drivers/regulator/max77620-regulator.c index 321e804aeab0..a1b49a6d538f 100644 --- a/drivers/regulator/max77620-regulator.c +++ b/drivers/regulator/max77620-regulator.c @@ -123,6 +123,9 @@ static int max77620_regulator_set_fps_src(struct max77620_regulator *pmic, unsigned int val; int ret; + if (!rinfo) + return 0; + switch (fps_src) { case MAX77620_FPS_SRC_0: case MAX77620_FPS_SRC_1: @@ -171,6 +174,9 @@ static int max77620_regulator_set_fps_slots(struct max77620_regulator *pmic, int pd = rpdata->active_fps_pd_slot; int ret = 0; + if (!rinfo) + return 0; + if (is_suspend) { pu = rpdata->suspend_fps_pu_slot; pd = rpdata->suspend_fps_pd_slot; @@ -680,7 +686,6 @@ static struct max77620_regulator_info max77620_regs_info[MAX77620_NUM_REGS] = { RAIL_SD(SD1, sd1, "in-sd1", SD1, 600000, 1550000, 12500, 0x22, SD1), RAIL_SD(SD2, sd2, "in-sd2", SDX, 600000, 3787500, 12500, 0xFF, NONE), RAIL_SD(SD3, sd3, "in-sd3", SDX, 600000, 3787500, 12500, 0xFF, NONE), - RAIL_SD(SD4, sd4, "in-sd4", SDX, 600000, 3787500, 12500, 0xFF, NONE), RAIL_LDO(LDO0, ldo0, "in-ldo0-1", N, 800000, 2375000, 25000), RAIL_LDO(LDO1, ldo1, "in-ldo0-1", N, 800000, 2375000, 25000), diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 9fd48de38a4c..7bc20c5188bc 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1045,6 +1045,7 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev) qeth_l2_set_offline(cgdev); if (card->dev) { + netif_napi_del(&card->napi); unregister_netdev(card->dev); card->dev = NULL; } diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index bcd324e054a9..72934666fedf 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3167,6 +3167,7 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev) qeth_l3_set_offline(cgdev); if (card->dev) { + netif_napi_del(&card->napi); unregister_netdev(card->dev); card->dev = NULL; } diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c index cd89682065b9..1026e180eed7 100644 --- a/drivers/spi/spi-rockchip.c +++ b/drivers/spi/spi-rockchip.c @@ -578,7 +578,7 @@ static int rockchip_spi_transfer_one( struct spi_device *spi, struct spi_transfer *xfer) { - int ret = 1; + int ret = 0; struct rockchip_spi *rs = spi_master_get_devdata(master); WARN_ON(readl_relaxed(rs->regs + ROCKCHIP_SPI_SSIENR) && @@ -627,6 +627,8 @@ static int rockchip_spi_transfer_one( spi_enable_chip(rs, 1); ret = rockchip_spi_prepare_dma(rs); } + /* successful DMA prepare means the transfer is in progress */ + ret = ret ? ret : 1; } else { spi_enable_chip(rs, 1); ret = rockchip_spi_pio_transfer(rs); diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c index 1ddd9e2309b6..cf007f3b83ec 100644 --- a/drivers/spi/spi-sun4i.c +++ b/drivers/spi/spi-sun4i.c @@ -173,13 +173,17 @@ static int sun4i_spi_transfer_one(struct spi_master *master, { struct sun4i_spi *sspi = spi_master_get_devdata(master); unsigned int mclk_rate, div, timeout; + unsigned int start, end, tx_time; unsigned int tx_len = 0; int ret = 0; u32 reg; /* We don't support transfer larger than the FIFO */ if (tfr->len > SUN4I_FIFO_DEPTH) - return -EINVAL; + return -EMSGSIZE; + + if (tfr->tx_buf && tfr->len >= SUN4I_FIFO_DEPTH) + return -EMSGSIZE; reinit_completion(&sspi->done); sspi->tx_buf = tfr->tx_buf; @@ -269,8 +273,12 @@ static int sun4i_spi_transfer_one(struct spi_master *master, sun4i_spi_write(sspi, SUN4I_BURST_CNT_REG, SUN4I_BURST_CNT(tfr->len)); sun4i_spi_write(sspi, SUN4I_XMIT_CNT_REG, SUN4I_XMIT_CNT(tx_len)); - /* Fill the TX FIFO */ - sun4i_spi_fill_fifo(sspi, SUN4I_FIFO_DEPTH); + /* + * Fill the TX FIFO + * Filling the FIFO fully causes timeout for some reason + * at least on spi2 on A10s + */ + sun4i_spi_fill_fifo(sspi, SUN4I_FIFO_DEPTH - 1); /* Enable the interrupts */ sun4i_spi_write(sspi, SUN4I_INT_CTL_REG, SUN4I_INT_CTL_TC); @@ -279,9 +287,16 @@ static int sun4i_spi_transfer_one(struct spi_master *master, reg = sun4i_spi_read(sspi, SUN4I_CTL_REG); sun4i_spi_write(sspi, SUN4I_CTL_REG, reg | SUN4I_CTL_XCH); + tx_time = max(tfr->len * 8 * 2 / (tfr->speed_hz / 1000), 100U); + start = jiffies; timeout = wait_for_completion_timeout(&sspi->done, - msecs_to_jiffies(1000)); + msecs_to_jiffies(tx_time)); + end = jiffies; if (!timeout) { + dev_warn(&master->dev, + "%s: timeout transferring %u bytes@%iHz for %i(%i)ms", + dev_name(&spi->dev), tfr->len, tfr->speed_hz, + jiffies_to_msecs(end - start), tx_time); ret = -ETIMEDOUT; goto out; } diff --git a/drivers/spi/spi-sun6i.c b/drivers/spi/spi-sun6i.c index 42e2c4bd690a..7fce79a60608 100644 --- a/drivers/spi/spi-sun6i.c +++ b/drivers/spi/spi-sun6i.c @@ -160,6 +160,7 @@ static int sun6i_spi_transfer_one(struct spi_master *master, { struct sun6i_spi *sspi = spi_master_get_devdata(master); unsigned int mclk_rate, div, timeout; + unsigned int start, end, tx_time; unsigned int tx_len = 0; int ret = 0; u32 reg; @@ -269,9 +270,16 @@ static int sun6i_spi_transfer_one(struct spi_master *master, reg = sun6i_spi_read(sspi, SUN6I_TFR_CTL_REG); sun6i_spi_write(sspi, SUN6I_TFR_CTL_REG, reg | SUN6I_TFR_CTL_XCH); + tx_time = max(tfr->len * 8 * 2 / (tfr->speed_hz / 1000), 100U); + start = jiffies; timeout = wait_for_completion_timeout(&sspi->done, - msecs_to_jiffies(1000)); + msecs_to_jiffies(tx_time)); + end = jiffies; if (!timeout) { + dev_warn(&master->dev, + "%s: timeout transferring %u bytes@%iHz for %i(%i)ms", + dev_name(&spi->dev), tfr->len, tfr->speed_hz, + jiffies_to_msecs(end - start), tx_time); ret = -ETIMEDOUT; goto out; } diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c index 443f664534e1..29ea8d2f9824 100644 --- a/drivers/spi/spi-ti-qspi.c +++ b/drivers/spi/spi-ti-qspi.c @@ -646,6 +646,13 @@ free_master: static int ti_qspi_remove(struct platform_device *pdev) { + struct ti_qspi *qspi = platform_get_drvdata(pdev); + int rc; + + rc = spi_master_suspend(qspi->master); + if (rc) + return rc; + pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); diff --git a/drivers/staging/iio/accel/sca3000_core.c b/drivers/staging/iio/accel/sca3000_core.c index a8f533af9eca..ec12181822e6 100644 --- a/drivers/staging/iio/accel/sca3000_core.c +++ b/drivers/staging/iio/accel/sca3000_core.c @@ -594,7 +594,7 @@ static ssize_t sca3000_read_frequency(struct device *dev, goto error_ret_mut; ret = sca3000_read_ctrl_reg(st, SCA3000_REG_CTRL_SEL_OUT_CTRL); mutex_unlock(&st->lock); - if (ret) + if (ret < 0) goto error_ret; val = ret; if (base_freq > 0) diff --git a/drivers/staging/iio/adc/ad7606_spi.c b/drivers/staging/iio/adc/ad7606_spi.c index 825da0769936..9587fa86dc69 100644 --- a/drivers/staging/iio/adc/ad7606_spi.c +++ b/drivers/staging/iio/adc/ad7606_spi.c @@ -21,7 +21,7 @@ static int ad7606_spi_read_block(struct device *dev, { struct spi_device *spi = to_spi_device(dev); int i, ret; - unsigned short *data; + unsigned short *data = buf; __be16 *bdata = buf; ret = spi_read(spi, buf, count * 2); diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c index 9f43976f4ef2..170ac980abcb 100644 --- a/drivers/staging/iio/impedance-analyzer/ad5933.c +++ b/drivers/staging/iio/impedance-analyzer/ad5933.c @@ -444,10 +444,10 @@ static ssize_t ad5933_store(struct device *dev, st->settling_cycles = val; /* 2x, 4x handling, see datasheet */ - if (val > 511) - val = (val >> 1) | (1 << 9); - else if (val > 1022) + if (val > 1022) val = (val >> 2) | (3 << 9); + else if (val > 511) + val = (val >> 1) | (1 << 9); dat = cpu_to_be16(val); ret = ad5933_i2c_write(st->client, diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index f856c4544eea..51e0d32883ba 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -667,8 +667,11 @@ static void pty_unix98_remove(struct tty_driver *driver, struct tty_struct *tty) fsi = tty->driver_data; else fsi = tty->link->driver_data; - devpts_kill_index(fsi, tty->index); - devpts_release(fsi); + + if (fsi) { + devpts_kill_index(fsi, tty->index); + devpts_release(fsi); + } } static const struct tty_operations ptm_unix98_ops = { diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index dc125322f48f..5b0fe97c46ca 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -750,6 +750,7 @@ static void visual_init(struct vc_data *vc, int num, int init) vc->vc_complement_mask = 0; vc->vc_can_do_color = 0; vc->vc_panic_force_write = false; + vc->vc_cur_blink_ms = DEFAULT_CURSOR_BLINK_MS; vc->vc_sw->con_init(vc, init); if (!vc->vc_complement_mask) vc->vc_complement_mask = vc->vc_can_do_color ? 0x7700 : 0x0800; diff --git a/drivers/usb/common/usb-otg-fsm.c b/drivers/usb/common/usb-otg-fsm.c index 9059b7dc185e..2f537bbdda09 100644 --- a/drivers/usb/common/usb-otg-fsm.c +++ b/drivers/usb/common/usb-otg-fsm.c @@ -21,6 +21,7 @@ * 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include <linux/module.h> #include <linux/kernel.h> #include <linux/types.h> #include <linux/mutex.h> @@ -450,3 +451,4 @@ int otg_statemachine(struct otg_fsm *fsm) return fsm->state_changed; } EXPORT_SYMBOL_GPL(otg_statemachine); +MODULE_LICENSE("GPL"); diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 34b837ae1ed7..d2e3f655c26f 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -2598,26 +2598,23 @@ EXPORT_SYMBOL_GPL(usb_create_hcd); * Don't deallocate the bandwidth_mutex until the last shared usb_hcd is * deallocated. * - * Make sure to only deallocate the bandwidth_mutex when the primary HCD is - * freed. When hcd_release() is called for either hcd in a peer set - * invalidate the peer's ->shared_hcd and ->primary_hcd pointers to - * block new peering attempts + * Make sure to deallocate the bandwidth_mutex only when the last HCD is + * freed. When hcd_release() is called for either hcd in a peer set, + * invalidate the peer's ->shared_hcd and ->primary_hcd pointers. */ static void hcd_release(struct kref *kref) { struct usb_hcd *hcd = container_of (kref, struct usb_hcd, kref); mutex_lock(&usb_port_peer_mutex); - if (usb_hcd_is_primary_hcd(hcd)) { - kfree(hcd->address0_mutex); - kfree(hcd->bandwidth_mutex); - } if (hcd->shared_hcd) { struct usb_hcd *peer = hcd->shared_hcd; peer->shared_hcd = NULL; - if (peer->primary_hcd == hcd) - peer->primary_hcd = NULL; + peer->primary_hcd = NULL; + } else { + kfree(hcd->address0_mutex); + kfree(hcd->bandwidth_mutex); } mutex_unlock(&usb_port_peer_mutex); kfree(hcd); diff --git a/drivers/usb/dwc3/dwc3-st.c b/drivers/usb/dwc3/dwc3-st.c index 50d6ae6f88bc..89a2f712fdfe 100644 --- a/drivers/usb/dwc3/dwc3-st.c +++ b/drivers/usb/dwc3/dwc3-st.c @@ -233,7 +233,8 @@ static int st_dwc3_probe(struct platform_device *pdev) dev_vdbg(&pdev->dev, "glue-logic addr 0x%p, syscfg-reg offset 0x%x\n", dwc3_data->glue_base, dwc3_data->syscfg_reg_off); - dwc3_data->rstc_pwrdn = devm_reset_control_get(dev, "powerdown"); + dwc3_data->rstc_pwrdn = + devm_reset_control_get_exclusive(dev, "powerdown"); if (IS_ERR(dwc3_data->rstc_pwrdn)) { dev_err(&pdev->dev, "could not get power controller\n"); ret = PTR_ERR(dwc3_data->rstc_pwrdn); @@ -243,7 +244,8 @@ static int st_dwc3_probe(struct platform_device *pdev) /* Manage PowerDown */ reset_control_deassert(dwc3_data->rstc_pwrdn); - dwc3_data->rstc_rst = devm_reset_control_get(dev, "softreset"); + dwc3_data->rstc_rst = + devm_reset_control_get_shared(dev, "softreset"); if (IS_ERR(dwc3_data->rstc_rst)) { dev_err(&pdev->dev, "could not get reset controller\n"); ret = PTR_ERR(dwc3_data->rstc_rst); diff --git a/drivers/usb/host/ehci-st.c b/drivers/usb/host/ehci-st.c index a94ed677d937..be4a2788fc58 100644 --- a/drivers/usb/host/ehci-st.c +++ b/drivers/usb/host/ehci-st.c @@ -206,7 +206,8 @@ static int st_ehci_platform_probe(struct platform_device *dev) priv->clk48 = NULL; } - priv->pwr = devm_reset_control_get_optional(&dev->dev, "power"); + priv->pwr = + devm_reset_control_get_optional_shared(&dev->dev, "power"); if (IS_ERR(priv->pwr)) { err = PTR_ERR(priv->pwr); if (err == -EPROBE_DEFER) @@ -214,7 +215,8 @@ static int st_ehci_platform_probe(struct platform_device *dev) priv->pwr = NULL; } - priv->rst = devm_reset_control_get_optional(&dev->dev, "softreset"); + priv->rst = + devm_reset_control_get_optional_shared(&dev->dev, "softreset"); if (IS_ERR(priv->rst)) { err = PTR_ERR(priv->rst); if (err == -EPROBE_DEFER) diff --git a/drivers/usb/host/ohci-st.c b/drivers/usb/host/ohci-st.c index acf2eb2a5676..02816a1515a1 100644 --- a/drivers/usb/host/ohci-st.c +++ b/drivers/usb/host/ohci-st.c @@ -188,13 +188,15 @@ static int st_ohci_platform_probe(struct platform_device *dev) priv->clk48 = NULL; } - priv->pwr = devm_reset_control_get_optional(&dev->dev, "power"); + priv->pwr = + devm_reset_control_get_optional_shared(&dev->dev, "power"); if (IS_ERR(priv->pwr)) { err = PTR_ERR(priv->pwr); goto err_put_clks; } - priv->rst = devm_reset_control_get_optional(&dev->dev, "softreset"); + priv->rst = + devm_reset_control_get_optional_shared(&dev->dev, "softreset"); if (IS_ERR(priv->rst)) { err = PTR_ERR(priv->rst); goto err_put_clks; diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 1d3e45f84549..e032ca397371 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -481,10 +481,14 @@ out: static int peek_head_len(struct sock *sk) { + struct socket *sock = sk->sk_socket; struct sk_buff *head; int len = 0; unsigned long flags; + if (sock->ops->peek_len) + return sock->ops->peek_len(sock); + spin_lock_irqsave(&sk->sk_receive_queue.lock, flags); head = skb_peek(&sk->sk_receive_queue); if (likely(head)) { @@ -497,6 +501,16 @@ static int peek_head_len(struct sock *sk) return len; } +static int sk_has_rx_data(struct sock *sk) +{ + struct socket *sock = sk->sk_socket; + + if (sock->ops->peek_len) + return sock->ops->peek_len(sock); + + return skb_queue_empty(&sk->sk_receive_queue); +} + static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk) { struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; @@ -513,7 +527,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk) endtime = busy_clock() + vq->busyloop_timeout; while (vhost_can_busy_poll(&net->dev, endtime) && - skb_queue_empty(&sk->sk_receive_queue) && + !sk_has_rx_data(sk) && vhost_vq_avail_empty(&net->dev, vq)) cpu_relax_lowlatency(); diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index b84c291ba1eb..d7b78d531e63 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -74,7 +74,7 @@ int v9fs_file_open(struct inode *inode, struct file *file) v9fs_proto_dotu(v9ses)); fid = file->private_data; if (!fid) { - fid = v9fs_fid_clone(file->f_path.dentry); + fid = v9fs_fid_clone(file_dentry(file)); if (IS_ERR(fid)) return PTR_ERR(fid); @@ -100,7 +100,7 @@ int v9fs_file_open(struct inode *inode, struct file *file) * because we want write after unlink usecase * to work. */ - fid = v9fs_writeback_fid(file->f_path.dentry); + fid = v9fs_writeback_fid(file_dentry(file)); if (IS_ERR(fid)) { err = PTR_ERR(fid); mutex_unlock(&v9inode->v_mutex); @@ -516,7 +516,7 @@ v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma) * because we want write after unlink usecase * to work. */ - fid = v9fs_writeback_fid(filp->f_path.dentry); + fid = v9fs_writeback_fid(file_dentry(filp)); if (IS_ERR(fid)) { retval = PTR_ERR(fid); mutex_unlock(&v9inode->v_mutex); diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 6e72c98162d5..1780218a48f0 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -95,10 +95,8 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino) } dentry = d_obtain_alias(inode); - if (IS_ERR(dentry)) { - iput(inode); + if (IS_ERR(dentry)) return dentry; - } err = ceph_init_dentry(dentry); if (err < 0) { dput(dentry); @@ -167,10 +165,8 @@ static struct dentry *__get_parent(struct super_block *sb, return ERR_PTR(-ENOENT); dentry = d_obtain_alias(inode); - if (IS_ERR(dentry)) { - iput(inode); + if (IS_ERR(dentry)) return dentry; - } err = ceph_init_dentry(dentry); if (err < 0) { dput(dentry); @@ -210,7 +206,7 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb, dout("fh_to_parent %llx\n", cfh->parent_ino); dentry = __get_parent(sb, NULL, cfh->ino); - if (IS_ERR(dentry) && PTR_ERR(dentry) == -ENOENT) + if (unlikely(dentry == ERR_PTR(-ENOENT))) dentry = __fh_to_dentry(sb, cfh->parent_ino); return dentry; } @@ -208,7 +208,12 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, dax.addr += first; size = map_len - first; } - max = min(pos + size, end); + /* + * pos + size is one past the last offset for IO, + * so pos + size can overflow loff_t at extreme offsets. + * Cast to u64 to catch this and get the true minimum. + */ + max = min_t(u64, pos + size, end); } if (iov_iter_rw(iter) == WRITE) { diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index ccd4971cc6c1..264f07c7754e 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -341,8 +341,10 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, struct dentry *newent; bool outarg_valid = true; + fuse_lock_inode(dir); err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name, &outarg, &inode); + fuse_unlock_inode(dir); if (err == -ENOENT) { outarg_valid = false; err = 0; @@ -1341,7 +1343,9 @@ static int fuse_readdir(struct file *file, struct dir_context *ctx) fuse_read_fill(req, file, ctx->pos, PAGE_SIZE, FUSE_READDIR); } + fuse_lock_inode(inode); fuse_request_send(fc, req); + fuse_unlock_inode(inode); nbytes = req->out.args[0].size; err = req->out.h.error; fuse_put_request(fc, req); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index eddbe02c4028..929c383432b0 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -110,6 +110,9 @@ struct fuse_inode { /** Miscellaneous bits describing inode state */ unsigned long state; + + /** Lock for serializing lookup and readdir for back compatibility*/ + struct mutex mutex; }; /** FUSE inode state bits */ @@ -540,6 +543,9 @@ struct fuse_conn { /** write-back cache policy (default is write-through) */ unsigned writeback_cache:1; + /** allow parallel lookups and readdir (default is serialized) */ + unsigned parallel_dirops:1; + /* * The following bitfields are only for optimization purposes * and hence races in setting them will not cause malfunction @@ -956,4 +962,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, void fuse_set_initialized(struct fuse_conn *fc); +void fuse_unlock_inode(struct inode *inode); +void fuse_lock_inode(struct inode *inode); + #endif /* _FS_FUSE_I_H */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 1ce67668a8e1..9961d8432ce3 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -97,6 +97,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) INIT_LIST_HEAD(&fi->queued_writes); INIT_LIST_HEAD(&fi->writepages); init_waitqueue_head(&fi->page_waitq); + mutex_init(&fi->mutex); fi->forget = fuse_alloc_forget(); if (!fi->forget) { kmem_cache_free(fuse_inode_cachep, inode); @@ -117,6 +118,7 @@ static void fuse_destroy_inode(struct inode *inode) struct fuse_inode *fi = get_fuse_inode(inode); BUG_ON(!list_empty(&fi->write_files)); BUG_ON(!list_empty(&fi->queued_writes)); + mutex_destroy(&fi->mutex); kfree(fi->forget); call_rcu(&inode->i_rcu, fuse_i_callback); } @@ -351,6 +353,18 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, return 0; } +void fuse_lock_inode(struct inode *inode) +{ + if (!get_fuse_conn(inode)->parallel_dirops) + mutex_lock(&get_fuse_inode(inode)->mutex); +} + +void fuse_unlock_inode(struct inode *inode) +{ + if (!get_fuse_conn(inode)->parallel_dirops) + mutex_unlock(&get_fuse_inode(inode)->mutex); +} + static void fuse_umount_begin(struct super_block *sb) { fuse_abort_conn(get_fuse_conn_super(sb)); @@ -898,6 +912,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->async_dio = 1; if (arg->flags & FUSE_WRITEBACK_CACHE) fc->writeback_cache = 1; + if (arg->flags & FUSE_PARALLEL_DIROPS) + fc->parallel_dirops = 1; if (arg->time_gran && arg->time_gran <= 1000000000) fc->sb->s_time_gran = arg->time_gran; } else { @@ -928,7 +944,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO | - FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT; + FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT | + FUSE_PARALLEL_DIROPS; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); diff --git a/fs/libfs.c b/fs/libfs.c index cedeacbae303..74dc8b9e7f53 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -84,6 +84,61 @@ int dcache_dir_close(struct inode *inode, struct file *file) } EXPORT_SYMBOL(dcache_dir_close); +/* parent is locked at least shared */ +static struct dentry *next_positive(struct dentry *parent, + struct list_head *from, + int count) +{ + unsigned *seq = &parent->d_inode->i_dir_seq, n; + struct dentry *res; + struct list_head *p; + bool skipped; + int i; + +retry: + i = count; + skipped = false; + n = smp_load_acquire(seq) & ~1; + res = NULL; + rcu_read_lock(); + for (p = from->next; p != &parent->d_subdirs; p = p->next) { + struct dentry *d = list_entry(p, struct dentry, d_child); + if (!simple_positive(d)) { + skipped = true; + } else if (!--i) { + res = d; + break; + } + } + rcu_read_unlock(); + if (skipped) { + smp_rmb(); + if (unlikely(*seq != n)) + goto retry; + } + return res; +} + +static void move_cursor(struct dentry *cursor, struct list_head *after) +{ + struct dentry *parent = cursor->d_parent; + unsigned n, *seq = &parent->d_inode->i_dir_seq; + spin_lock(&parent->d_lock); + for (;;) { + n = *seq; + if (!(n & 1) && cmpxchg(seq, n, n + 1) == n) + break; + cpu_relax(); + } + __list_del(cursor->d_child.prev, cursor->d_child.next); + if (after) + list_add(&cursor->d_child, after); + else + list_add_tail(&cursor->d_child, &parent->d_subdirs); + smp_store_release(seq, n + 2); + spin_unlock(&parent->d_lock); +} + loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) { struct dentry *dentry = file->f_path.dentry; @@ -99,25 +154,14 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) if (offset != file->f_pos) { file->f_pos = offset; if (file->f_pos >= 2) { - struct list_head *p; struct dentry *cursor = file->private_data; + struct dentry *to; loff_t n = file->f_pos - 2; - spin_lock(&dentry->d_lock); - /* d_lock not required for cursor */ - list_del(&cursor->d_child); - p = dentry->d_subdirs.next; - while (n && p != &dentry->d_subdirs) { - struct dentry *next; - next = list_entry(p, struct dentry, d_child); - spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); - if (simple_positive(next)) - n--; - spin_unlock(&next->d_lock); - p = p->next; - } - list_add_tail(&cursor->d_child, p); - spin_unlock(&dentry->d_lock); + inode_lock_shared(dentry->d_inode); + to = next_positive(dentry, &dentry->d_subdirs, n); + move_cursor(cursor, to ? &to->d_child : NULL); + inode_unlock_shared(dentry->d_inode); } } return offset; @@ -140,36 +184,25 @@ int dcache_readdir(struct file *file, struct dir_context *ctx) { struct dentry *dentry = file->f_path.dentry; struct dentry *cursor = file->private_data; - struct list_head *p, *q = &cursor->d_child; + struct list_head *p = &cursor->d_child; + struct dentry *next; + bool moved = false; if (!dir_emit_dots(file, ctx)) return 0; - spin_lock(&dentry->d_lock); - if (ctx->pos == 2) - list_move(q, &dentry->d_subdirs); - for (p = q->next; p != &dentry->d_subdirs; p = p->next) { - struct dentry *next = list_entry(p, struct dentry, d_child); - spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); - if (!simple_positive(next)) { - spin_unlock(&next->d_lock); - continue; - } - - spin_unlock(&next->d_lock); - spin_unlock(&dentry->d_lock); + if (ctx->pos == 2) + p = &dentry->d_subdirs; + while ((next = next_positive(dentry, p, 1)) != NULL) { if (!dir_emit(ctx, next->d_name.name, next->d_name.len, d_inode(next)->i_ino, dt_type(d_inode(next)))) - return 0; - spin_lock(&dentry->d_lock); - spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); - /* next is still alive */ - list_move(q, p); - spin_unlock(&next->d_lock); - p = q; + break; + moved = true; + p = &next->d_child; ctx->pos++; } - spin_unlock(&dentry->d_lock); + if (moved) + move_cursor(cursor, p); return 0; } EXPORT_SYMBOL(dcache_readdir); diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 154a107cd376..fc4084ef4736 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -335,12 +335,17 @@ static struct notifier_block lockd_inet6addr_notifier = { }; #endif -static void lockd_svc_exit_thread(void) +static void lockd_unregister_notifiers(void) { unregister_inetaddr_notifier(&lockd_inetaddr_notifier); #if IS_ENABLED(CONFIG_IPV6) unregister_inet6addr_notifier(&lockd_inet6addr_notifier); #endif +} + +static void lockd_svc_exit_thread(void) +{ + lockd_unregister_notifiers(); svc_exit_thread(nlmsvc_rqst); } @@ -462,7 +467,7 @@ int lockd_up(struct net *net) * Note: svc_serv structures have an initial use count of 1, * so we exit through here on both success and failure. */ -err_net: +err_put: svc_destroy(serv); err_create: mutex_unlock(&nlmsvc_mutex); @@ -470,7 +475,9 @@ err_create: err_start: lockd_down_net(serv, net); - goto err_net; +err_net: + lockd_unregister_notifiers(); + goto err_put; } EXPORT_SYMBOL_GPL(lockd_up); diff --git a/fs/locks.c b/fs/locks.c index 7c5f91be9b65..ee1b15f6fc13 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1628,7 +1628,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr { struct file_lock *fl, *my_fl = NULL, *lease; struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = dentry->d_inode; + struct inode *inode = file_inode(filp); struct file_lock_context *ctx; bool is_deleg = (*flp)->fl_flags & FL_DELEG; int error; diff --git a/fs/namespace.c b/fs/namespace.c index 783004af5707..419f746d851d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1562,6 +1562,7 @@ void __detach_mounts(struct dentry *dentry) goto out_unlock; lock_mount_hash(); + event++; while (!hlist_empty(&mp->m_list)) { mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); if (mnt->mnt.mnt_flags & MNT_UMOUNT) { diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 1dbeab6cf96e..c831c2e5f803 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -59,16 +59,37 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr) if (err) goto out; + if (attr->ia_valid & ATTR_SIZE) { + struct inode *realinode = d_inode(ovl_dentry_real(dentry)); + + err = -ETXTBSY; + if (atomic_read(&realinode->i_writecount) < 0) + goto out_drop_write; + } + err = ovl_copy_up(dentry); if (!err) { + struct inode *winode = NULL; + upperdentry = ovl_dentry_upper(dentry); + if (attr->ia_valid & ATTR_SIZE) { + winode = d_inode(upperdentry); + err = get_write_access(winode); + if (err) + goto out_drop_write; + } + inode_lock(upperdentry->d_inode); err = notify_change(upperdentry, attr, NULL); if (!err) ovl_copyattr(upperdentry->d_inode, dentry->d_inode); inode_unlock(upperdentry->d_inode); + + if (winode) + put_write_access(winode); } +out_drop_write: ovl_drop_write(dentry); out: return err; @@ -121,16 +142,18 @@ int ovl_permission(struct inode *inode, int mask) err = vfs_getattr(&realpath, &stat); if (err) - return err; + goto out_dput; + err = -ESTALE; if ((stat.mode ^ inode->i_mode) & S_IFMT) - return -ESTALE; + goto out_dput; inode->i_mode = stat.mode; inode->i_uid = stat.uid; inode->i_gid = stat.gid; - return generic_permission(inode, mask); + err = generic_permission(inode, mask); + goto out_dput; } /* Careful in RCU walk mode */ diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index ce02f46029da..9a7693d5f8ff 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1082,11 +1082,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) if (err < 0) goto out_put_workdir; - if (!err) { - pr_err("overlayfs: upper fs needs to support d_type.\n"); - err = -EINVAL; - goto out_put_workdir; - } + /* + * We allowed this configuration and don't want to + * break users over kernel upgrade. So warn instead + * of erroring out. + */ + if (!err) + pr_warn("overlayfs: upper fs needs to support d_type.\n"); } } diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 9094599a1150..33466bfc6440 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -309,6 +309,7 @@ INTEL_VGA_DEVICE(0x5906, info), /* ULT GT1 */ \ INTEL_VGA_DEVICE(0x590E, info), /* ULX GT1 */ \ INTEL_VGA_DEVICE(0x5902, info), /* DT GT1 */ \ + INTEL_VGA_DEVICE(0x5908, info), /* Halo GT1 */ \ INTEL_VGA_DEVICE(0x590B, info), /* Halo GT1 */ \ INTEL_VGA_DEVICE(0x590A, info) /* SRV GT1 */ @@ -322,15 +323,12 @@ INTEL_VGA_DEVICE(0x591D, info) /* WKS GT2 */ #define INTEL_KBL_GT3_IDS(info) \ + INTEL_VGA_DEVICE(0x5923, info), /* ULT GT3 */ \ INTEL_VGA_DEVICE(0x5926, info), /* ULT GT3 */ \ - INTEL_VGA_DEVICE(0x592B, info), /* Halo GT3 */ \ - INTEL_VGA_DEVICE(0x592A, info) /* SRV GT3 */ + INTEL_VGA_DEVICE(0x5927, info) /* ULT GT3 */ #define INTEL_KBL_GT4_IDS(info) \ - INTEL_VGA_DEVICE(0x5932, info), /* DT GT4 */ \ - INTEL_VGA_DEVICE(0x593B, info), /* Halo GT4 */ \ - INTEL_VGA_DEVICE(0x593A, info), /* SRV GT4 */ \ - INTEL_VGA_DEVICE(0x593D, info) /* WKS GT4 */ + INTEL_VGA_DEVICE(0x593B, info) /* Halo GT4 */ #define INTEL_KBL_IDS(info) \ INTEL_KBL_GT1_IDS(info), \ diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index fe389ac31489..92e7e97ca8ff 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -18,13 +18,13 @@ #ifndef __ASM_ARM_KVM_PMU_H #define __ASM_ARM_KVM_PMU_H -#ifdef CONFIG_KVM_ARM_PMU - #include <linux/perf_event.h> #include <asm/perf_event.h> #define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1) +#ifdef CONFIG_KVM_ARM_PMU + struct kvm_pmc { u8 idx; /* index into the pmu->pmc array */ struct perf_event *perf_event; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8411032ac90d..b3336b4f5d04 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -218,9 +218,9 @@ void bpf_register_prog_type(struct bpf_prog_type_list *tl); void bpf_register_map_type(struct bpf_map_type_list *tl); struct bpf_prog *bpf_prog_get(u32 ufd); +struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type); struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); -void bpf_prog_put_rcu(struct bpf_prog *prog); struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); @@ -278,11 +278,13 @@ static inline struct bpf_prog *bpf_prog_get(u32 ufd) return ERR_PTR(-EOPNOTSUPP); } -static inline void bpf_prog_put(struct bpf_prog *prog) +static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, + enum bpf_prog_type type) { + return ERR_PTR(-EOPNOTSUPP); } -static inline void bpf_prog_put_rcu(struct bpf_prog *prog) +static inline void bpf_prog_put(struct bpf_prog *prog) { } #endif /* CONFIG_BPF_SYSCALL */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index a20320c666fd..984f73b719a9 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -87,6 +87,7 @@ struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, struct cgroup_subsys *ss); struct cgroup *cgroup_get_from_path(const char *path); +struct cgroup *cgroup_get_from_fd(int fd); int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 37ff4a6faa9a..6fec9e81bd70 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -374,6 +374,29 @@ static inline bool ether_addr_equal_unaligned(const u8 *addr1, const u8 *addr2) } /** + * ether_addr_equal_masked - Compare two Ethernet addresses with a mask + * @addr1: Pointer to a six-byte array containing the 1st Ethernet address + * @addr2: Pointer to a six-byte array containing the 2nd Ethernet address + * @mask: Pointer to a six-byte array containing the Ethernet address bitmask + * + * Compare two Ethernet addresses with a mask, returns true if for every bit + * set in the bitmask the equivalent bits in the ethernet addresses are equal. + * Using a mask with all bits set is a slower ether_addr_equal. + */ +static inline bool ether_addr_equal_masked(const u8 *addr1, const u8 *addr2, + const u8 *mask) +{ + int i; + + for (i = 0; i < ETH_ALEN; i++) { + if ((addr1[i] ^ addr2[i]) & mask[i]) + return false; + } + + return true; +} + +/** * is_etherdev_addr - Tell if given Ethernet address belongs to the device. * @dev: Pointer to a device structure * @addr: Pointer to a six-byte array containing the Ethernet address diff --git a/include/linux/mfd/da9052/da9052.h b/include/linux/mfd/da9052/da9052.h index c18a4c19d6fc..ce9230af09c2 100644 --- a/include/linux/mfd/da9052/da9052.h +++ b/include/linux/mfd/da9052/da9052.h @@ -171,7 +171,7 @@ static inline int da9052_group_read(struct da9052 *da9052, unsigned char reg, static inline int da9052_group_write(struct da9052 *da9052, unsigned char reg, unsigned reg_cnt, unsigned char *val) { - int ret; + int ret = 0; int i; for (i = 0; i < reg_cnt; i++) { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 46260fdc5305..81e8396574f4 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -550,14 +550,10 @@ struct mlx5_priv { struct list_head ctx_list; spinlock_t ctx_lock; + struct mlx5_flow_steering *steering; struct mlx5_eswitch *eswitch; struct mlx5_core_sriov sriov; unsigned long pci_dev_data; - struct mlx5_flow_root_namespace *root_ns; - struct mlx5_flow_root_namespace *fdb_root_ns; - struct mlx5_flow_root_namespace *esw_egress_root_ns; - struct mlx5_flow_root_namespace *esw_ingress_root_ns; - struct mlx5_fc_stats fc_stats; struct mlx5_rl_table rl_table; }; @@ -578,6 +574,18 @@ enum mlx5_pci_status { MLX5_PCI_STATUS_ENABLED, }; +struct mlx5_td { + struct list_head tirs_list; + u32 tdn; +}; + +struct mlx5e_resources { + struct mlx5_uar cq_uar; + u32 pdn; + struct mlx5_td td; + struct mlx5_core_mkey mkey; +}; + struct mlx5_core_dev { struct pci_dev *pdev; /* sync pci state */ @@ -602,6 +610,7 @@ struct mlx5_core_dev { struct mlx5_profile *profile; atomic_t num_qps; u32 issi; + struct mlx5e_resources mlx5e_res; #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *rmap; #endif @@ -645,6 +654,7 @@ struct mlx5_cmd_work_ent { void *uout; int uout_size; mlx5_cmd_cbk_t callback; + struct delayed_work cb_timeout_work; void *context; int idx; struct completion done; diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 4b7a107d9c19..e036d6030867 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -54,6 +54,8 @@ static inline void build_leftovers_ft_param(int *priority, enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_BYPASS, + MLX5_FLOW_NAMESPACE_OFFLOADS, + MLX5_FLOW_NAMESPACE_ETHTOOL, MLX5_FLOW_NAMESPACE_KERNEL, MLX5_FLOW_NAMESPACE_LEFTOVERS, MLX5_FLOW_NAMESPACE_ANCHOR, @@ -67,6 +69,12 @@ struct mlx5_flow_group; struct mlx5_flow_rule; struct mlx5_flow_namespace; +struct mlx5_flow_spec { + u8 match_criteria_enable; + u32 match_criteria[MLX5_ST_SZ_DW(fte_match_param)]; + u32 match_value[MLX5_ST_SZ_DW(fte_match_param)]; +}; + struct mlx5_flow_destination { enum mlx5_flow_destination_type type; union { @@ -115,9 +123,7 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg); */ struct mlx5_flow_rule * mlx5_add_flow_rule(struct mlx5_flow_table *ft, - u8 match_criteria_enable, - u32 *match_criteria, - u32 *match_value, + struct mlx5_flow_spec *spec, u32 action, u32 flow_tag, struct mlx5_flow_destination *dest); diff --git a/include/linux/net.h b/include/linux/net.h index 25aa03b51c4e..b9f0ff4d489c 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -185,6 +185,7 @@ struct proto_ops { ssize_t (*splice_read)(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); int (*set_peek_off)(struct sock *sk, int val); + int (*peek_len)(struct socket *sock); }; #define DECLARE_SOCKADDR(type, dst, src) \ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e84d9d23c2d5..49736a31acaa 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1209,8 +1209,10 @@ struct net_device_ops { netdev_features_t features); int (*ndo_set_features)(struct net_device *dev, netdev_features_t features); - int (*ndo_neigh_construct)(struct neighbour *n); - void (*ndo_neigh_destroy)(struct neighbour *n); + int (*ndo_neigh_construct)(struct net_device *dev, + struct neighbour *n); + void (*ndo_neigh_destroy)(struct net_device *dev, + struct neighbour *n); int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[], @@ -2237,6 +2239,7 @@ struct netdev_lag_lower_state_info { #define NETDEV_PRECHANGEUPPER 0x001A #define NETDEV_CHANGELOWERSTATE 0x001B #define NETDEV_UDP_TUNNEL_PUSH_INFO 0x001C +#define NETDEV_CHANGE_TX_QUEUE_LEN 0x001E int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); @@ -3803,12 +3806,30 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev, void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter); + #define netdev_for_each_lower_dev(dev, ldev, iter) \ for (iter = (dev)->adj_list.lower.next, \ ldev = netdev_lower_get_next(dev, &(iter)); \ ldev; \ ldev = netdev_lower_get_next(dev, &(iter))) +struct net_device *netdev_all_lower_get_next(struct net_device *dev, + struct list_head **iter); +struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, + struct list_head **iter); + +#define netdev_for_each_all_lower_dev(dev, ldev, iter) \ + for (iter = (dev)->all_adj_list.lower.next, \ + ldev = netdev_all_lower_get_next(dev, &(iter)); \ + ldev; \ + ldev = netdev_all_lower_get_next(dev, &(iter))) + +#define netdev_for_each_all_lower_dev_rcu(dev, ldev, iter) \ + for (iter = (dev)->all_adj_list.lower.next, \ + ldev = netdev_all_lower_get_next_rcu(dev, &(iter)); \ + ldev; \ + ldev = netdev_all_lower_get_next_rcu(dev, &(iter))) + void *netdev_adjacent_get_private(struct list_head *adj_list); void *netdev_lower_get_first_private_rcu(struct net_device *dev); struct net_device *netdev_master_upper_dev_get(struct net_device *dev); @@ -3824,6 +3845,10 @@ void *netdev_lower_dev_get_private(struct net_device *dev, struct net_device *lower_dev); void netdev_lower_state_changed(struct net_device *lower_dev, void *lower_state_info); +int netdev_default_l2upper_neigh_construct(struct net_device *dev, + struct neighbour *n); +void netdev_default_l2upper_neigh_destroy(struct net_device *dev, + struct neighbour *n); /* RSS keys are 40 or 52 bytes long */ #define NETDEV_RSS_KEY_LEN 52 diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index dc4f58a3cdcc..e94e81ab2b58 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -6,6 +6,10 @@ #include <linux/static_key.h> #include <uapi/linux/netfilter/x_tables.h> +/* Test a struct->invflags and a boolean for inequality */ +#define NF_INVF(ptr, flag, boolean) \ + ((boolean) ^ !!((ptr)->invflags & (flag))) + /** * struct xt_action_param - parameters for matches/targets * diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 2ea517c7c6b9..984b2112c77b 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -115,8 +115,6 @@ extern unsigned int ebt_do_table(struct sk_buff *skb, const struct nf_hook_state *state, struct ebt_table *table); -/* Used in the kernel match() functions */ -#define FWINV(bool,invflg) ((bool) ^ !!(info->invflags & invflg)) /* True if the hook mask denotes that the rule is in a base chain, * used in the check() functions */ #define BASE_CHAIN (par->hook_mask & (1 << NF_BR_NUMHOOKS)) diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 562a65e8bcc0..2052011bf9fb 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -102,7 +102,7 @@ static inline bool ptr_ring_full_bh(struct ptr_ring *r) */ static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr) { - if (r->queue[r->producer]) + if (unlikely(!r->size) || r->queue[r->producer]) return -ENOSPC; r->queue[r->producer++] = ptr; @@ -164,7 +164,9 @@ static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr) */ static inline void *__ptr_ring_peek(struct ptr_ring *r) { - return r->queue[r->consumer]; + if (likely(r->size)) + return r->queue[r->consumer]; + return NULL; } /* Note: callers invoking this in a loop must use a compiler barrier, @@ -347,20 +349,14 @@ static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp) return 0; } -static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp, - void (*destroy)(void *)) +static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue, + int size, gfp_t gfp, + void (*destroy)(void *)) { - unsigned long flags; int producer = 0; - void **queue = __ptr_ring_init_queue_alloc(size, gfp); void **old; void *ptr; - if (!queue) - return -ENOMEM; - - spin_lock_irqsave(&(r)->producer_lock, flags); - while ((ptr = ptr_ring_consume(r))) if (producer < size) queue[producer++] = ptr; @@ -373,6 +369,23 @@ static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp, old = r->queue; r->queue = queue; + return old; +} + +static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp, + void (*destroy)(void *)) +{ + unsigned long flags; + void **queue = __ptr_ring_init_queue_alloc(size, gfp); + void **old; + + if (!queue) + return -ENOMEM; + + spin_lock_irqsave(&(r)->producer_lock, flags); + + old = __ptr_ring_swap_queue(r, queue, size, gfp, destroy); + spin_unlock_irqrestore(&(r)->producer_lock, flags); kfree(old); @@ -380,6 +393,48 @@ static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp, return 0; } +static inline int ptr_ring_resize_multiple(struct ptr_ring **rings, int nrings, + int size, + gfp_t gfp, void (*destroy)(void *)) +{ + unsigned long flags; + void ***queues; + int i; + + queues = kmalloc(nrings * sizeof *queues, gfp); + if (!queues) + goto noqueues; + + for (i = 0; i < nrings; ++i) { + queues[i] = __ptr_ring_init_queue_alloc(size, gfp); + if (!queues[i]) + goto nomem; + } + + for (i = 0; i < nrings; ++i) { + spin_lock_irqsave(&(rings[i])->producer_lock, flags); + queues[i] = __ptr_ring_swap_queue(rings[i], queues[i], + size, gfp, destroy); + spin_unlock_irqrestore(&(rings[i])->producer_lock, flags); + } + + for (i = 0; i < nrings; ++i) + kfree(queues[i]); + + kfree(queues); + + return 0; + +nomem: + while (--i >= 0) + kfree(queues[i]); + + kfree(queues); + +noqueues: + return -ENOMEM; +} + static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *)) { void *ptr; diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 908b67c847cd..c038ae36b10e 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -464,6 +464,8 @@ static inline bool pwm_can_sleep(struct pwm_device *pwm) static inline void pwm_apply_args(struct pwm_device *pwm) { + struct pwm_state state = { }; + /* * PWM users calling pwm_apply_args() expect to have a fresh config * where the polarity and period are set according to pwm_args info. @@ -476,18 +478,20 @@ static inline void pwm_apply_args(struct pwm_device *pwm) * at startup (even if they are actually enabled), thus authorizing * polarity setting. * - * Instead of setting ->enabled to false, we call pwm_disable() - * before pwm_set_polarity() to ensure that everything is configured - * as expected, and the PWM is really disabled when the user request - * it. + * To fulfill this requirement, we apply a new state which disables + * the PWM device and set the reference period and polarity config. * * Note that PWM users requiring a smooth handover between the * bootloader and the kernel (like critical regulators controlled by * PWM devices) will have to switch to the atomic API and avoid calling * pwm_apply_args(). */ - pwm_disable(pwm); - pwm_set_polarity(pwm, pwm->args.polarity); + + state.enabled = false; + state.polarity = pwm->args.polarity; + state.period = pwm->args.period; + + pwm_apply_state(pwm, &state); } struct pwm_lookup { diff --git a/include/linux/reset.h b/include/linux/reset.h index ec0306ce7b92..45a4abeb6acb 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -84,8 +84,8 @@ static inline struct reset_control *__devm_reset_control_get( #endif /* CONFIG_RESET_CONTROLLER */ /** - * reset_control_get - Lookup and obtain an exclusive reference to a - * reset controller. + * reset_control_get_exclusive - Lookup and obtain an exclusive reference + * to a reset controller. * @dev: device to be reset by the controller * @id: reset line name * @@ -98,8 +98,8 @@ static inline struct reset_control *__devm_reset_control_get( * * Use of id names is optional. */ -static inline struct reset_control *__must_check reset_control_get( - struct device *dev, const char *id) +static inline struct reset_control * +__must_check reset_control_get_exclusive(struct device *dev, const char *id) { #ifndef CONFIG_RESET_CONTROLLER WARN_ON(1); @@ -107,12 +107,6 @@ static inline struct reset_control *__must_check reset_control_get( return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0); } -static inline struct reset_control *reset_control_get_optional( - struct device *dev, const char *id) -{ - return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0); -} - /** * reset_control_get_shared - Lookup and obtain a shared reference to a * reset controller. @@ -141,9 +135,21 @@ static inline struct reset_control *reset_control_get_shared( return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 1); } +static inline struct reset_control *reset_control_get_optional_exclusive( + struct device *dev, const char *id) +{ + return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 0); +} + +static inline struct reset_control *reset_control_get_optional_shared( + struct device *dev, const char *id) +{ + return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, 1); +} + /** - * of_reset_control_get - Lookup and obtain an exclusive reference to a - * reset controller. + * of_reset_control_get_exclusive - Lookup and obtain an exclusive reference + * to a reset controller. * @node: device to be reset by the controller * @id: reset line name * @@ -151,15 +157,41 @@ static inline struct reset_control *reset_control_get_shared( * * Use of id names is optional. */ -static inline struct reset_control *of_reset_control_get( +static inline struct reset_control *of_reset_control_get_exclusive( struct device_node *node, const char *id) { return __of_reset_control_get(node, id, 0, 0); } /** - * of_reset_control_get_by_index - Lookup and obtain an exclusive reference to - * a reset controller by index. + * of_reset_control_get_shared - Lookup and obtain an shared reference + * to a reset controller. + * @node: device to be reset by the controller + * @id: reset line name + * + * When a reset-control is shared, the behavior of reset_control_assert / + * deassert is changed, the reset-core will keep track of a deassert_count + * and only (re-)assert the reset after reset_control_assert has been called + * as many times as reset_control_deassert was called. Also see the remark + * about shared reset-controls in the reset_control_assert docs. + * + * Calling reset_control_assert without first calling reset_control_deassert + * is not allowed on a shared reset control. Calling reset_control_reset is + * also not allowed on a shared reset control. + * Returns a struct reset_control or IS_ERR() condition containing errno. + * + * Use of id names is optional. + */ +static inline struct reset_control *of_reset_control_get_shared( + struct device_node *node, const char *id) +{ + return __of_reset_control_get(node, id, 0, 1); +} + +/** + * of_reset_control_get_exclusive_by_index - Lookup and obtain an exclusive + * reference to a reset controller + * by index. * @node: device to be reset by the controller * @index: index of the reset controller * @@ -167,49 +199,60 @@ static inline struct reset_control *of_reset_control_get( * in whatever order. Returns a struct reset_control or IS_ERR() condition * containing errno. */ -static inline struct reset_control *of_reset_control_get_by_index( +static inline struct reset_control *of_reset_control_get_exclusive_by_index( struct device_node *node, int index) { return __of_reset_control_get(node, NULL, index, 0); } /** - * devm_reset_control_get - resource managed reset_control_get() - * @dev: device to be reset by the controller - * @id: reset line name + * of_reset_control_get_shared_by_index - Lookup and obtain an shared + * reference to a reset controller + * by index. + * @node: device to be reset by the controller + * @index: index of the reset controller + * + * When a reset-control is shared, the behavior of reset_control_assert / + * deassert is changed, the reset-core will keep track of a deassert_count + * and only (re-)assert the reset after reset_control_assert has been called + * as many times as reset_control_deassert was called. Also see the remark + * about shared reset-controls in the reset_control_assert docs. + * + * Calling reset_control_assert without first calling reset_control_deassert + * is not allowed on a shared reset control. Calling reset_control_reset is + * also not allowed on a shared reset control. + * Returns a struct reset_control or IS_ERR() condition containing errno. * - * Managed reset_control_get(). For reset controllers returned from this - * function, reset_control_put() is called automatically on driver detach. - * See reset_control_get() for more information. + * This is to be used to perform a list of resets for a device or power domain + * in whatever order. Returns a struct reset_control or IS_ERR() condition + * containing errno. */ -static inline struct reset_control *__must_check devm_reset_control_get( - struct device *dev, const char *id) -{ -#ifndef CONFIG_RESET_CONTROLLER - WARN_ON(1); -#endif - return __devm_reset_control_get(dev, id, 0, 0); -} - -static inline struct reset_control *devm_reset_control_get_optional( - struct device *dev, const char *id) +static inline struct reset_control *of_reset_control_get_shared_by_index( + struct device_node *node, int index) { - return __devm_reset_control_get(dev, id, 0, 0); + return __of_reset_control_get(node, NULL, index, 1); } /** - * devm_reset_control_get_by_index - resource managed reset_control_get + * devm_reset_control_get_exclusive - resource managed + * reset_control_get_exclusive() * @dev: device to be reset by the controller - * @index: index of the reset controller + * @id: reset line name * - * Managed reset_control_get(). For reset controllers returned from this - * function, reset_control_put() is called automatically on driver detach. - * See reset_control_get() for more information. + * Managed reset_control_get_exclusive(). For reset controllers returned + * from this function, reset_control_put() is called automatically on driver + * detach. + * + * See reset_control_get_exclusive() for more information. */ -static inline struct reset_control *devm_reset_control_get_by_index( - struct device *dev, int index) +static inline struct reset_control * +__must_check devm_reset_control_get_exclusive(struct device *dev, + const char *id) { - return __devm_reset_control_get(dev, NULL, index, 0); +#ifndef CONFIG_RESET_CONTROLLER + WARN_ON(1); +#endif + return __devm_reset_control_get(dev, id, 0, 0); } /** @@ -227,6 +270,36 @@ static inline struct reset_control *devm_reset_control_get_shared( return __devm_reset_control_get(dev, id, 0, 1); } +static inline struct reset_control *devm_reset_control_get_optional_exclusive( + struct device *dev, const char *id) +{ + return __devm_reset_control_get(dev, id, 0, 0); +} + +static inline struct reset_control *devm_reset_control_get_optional_shared( + struct device *dev, const char *id) +{ + return __devm_reset_control_get(dev, id, 0, 1); +} + +/** + * devm_reset_control_get_exclusive_by_index - resource managed + * reset_control_get_exclusive() + * @dev: device to be reset by the controller + * @index: index of the reset controller + * + * Managed reset_control_get_exclusive(). For reset controllers returned from + * this function, reset_control_put() is called automatically on driver + * detach. + * + * See reset_control_get_exclusive() for more information. + */ +static inline struct reset_control * +devm_reset_control_get_exclusive_by_index(struct device *dev, int index) +{ + return __devm_reset_control_get(dev, NULL, index, 0); +} + /** * devm_reset_control_get_shared_by_index - resource managed * reset_control_get_shared @@ -237,10 +310,60 @@ static inline struct reset_control *devm_reset_control_get_shared( * this function, reset_control_put() is called automatically on driver detach. * See reset_control_get_shared() for more information. */ -static inline struct reset_control *devm_reset_control_get_shared_by_index( - struct device *dev, int index) +static inline struct reset_control * +devm_reset_control_get_shared_by_index(struct device *dev, int index) { return __devm_reset_control_get(dev, NULL, index, 1); } +/* + * TEMPORARY calls to use during transition: + * + * of_reset_control_get() => of_reset_control_get_exclusive() + * + * These inline function calls will be removed once all consumers + * have been moved over to the new explicit API. + */ +static inline struct reset_control *reset_control_get( + struct device *dev, const char *id) +{ + return reset_control_get_exclusive(dev, id); +} + +static inline struct reset_control *reset_control_get_optional( + struct device *dev, const char *id) +{ + return reset_control_get_optional_exclusive(dev, id); +} + +static inline struct reset_control *of_reset_control_get( + struct device_node *node, const char *id) +{ + return of_reset_control_get_exclusive(node, id); +} + +static inline struct reset_control *of_reset_control_get_by_index( + struct device_node *node, int index) +{ + return of_reset_control_get_exclusive_by_index(node, index); +} + +static inline struct reset_control *devm_reset_control_get( + struct device *dev, const char *id) +{ + return devm_reset_control_get_exclusive(dev, id); +} + +static inline struct reset_control *devm_reset_control_get_optional( + struct device *dev, const char *id) +{ + return devm_reset_control_get_optional_exclusive(dev, id); + +} + +static inline struct reset_control *devm_reset_control_get_by_index( + struct device *dev, int index) +{ + return devm_reset_control_get_exclusive_by_index(dev, index); +} #endif diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h index 678bfbf78ac4..f4dfade428f0 100644 --- a/include/linux/skb_array.h +++ b/include/linux/skb_array.h @@ -151,16 +151,25 @@ static inline int skb_array_init(struct skb_array *a, int size, gfp_t gfp) return ptr_ring_init(&a->ring, size, gfp); } -void __skb_array_destroy_skb(void *ptr) +static void __skb_array_destroy_skb(void *ptr) { kfree_skb(ptr); } -int skb_array_resize(struct skb_array *a, int size, gfp_t gfp) +static inline int skb_array_resize(struct skb_array *a, int size, gfp_t gfp) { return ptr_ring_resize(&a->ring, size, gfp, __skb_array_destroy_skb); } +static inline int skb_array_resize_multiple(struct skb_array **rings, + int nrings, int size, gfp_t gfp) +{ + BUILD_BUG_ON(offsetof(struct skb_array, ring)); + return ptr_ring_resize_multiple((struct ptr_ring **)rings, + nrings, size, gfp, + __skb_array_destroy_skb); +} + static inline void skb_array_cleanup(struct skb_array *a) { ptr_ring_cleanup(&a->ring, __skb_array_destroy_skb); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index dc0fca747c5e..6f0b3e0adc73 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -37,6 +37,7 @@ #include <net/flow_dissector.h> #include <linux/splice.h> #include <linux/in6.h> +#include <linux/if_packet.h> #include <net/flow.h> /* The interface for checksum offload between the stack and networking drivers @@ -881,6 +882,15 @@ static inline struct rtable *skb_rtable(const struct sk_buff *skb) return (struct rtable *)skb_dst(skb); } +/* For mangling skb->pkt_type from user space side from applications + * such as nft, tc, etc, we only allow a conservative subset of + * possible pkt_types to be set. +*/ +static inline bool skb_pkt_type_ok(u32 ptype) +{ + return ptype <= PACKET_OTHERHOST; +} + void kfree_skb(struct sk_buff *skb); void kfree_skb_list(struct sk_buff *segs); void skb_tx_error(struct sk_buff *skb); @@ -1069,6 +1079,7 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4) } void __skb_get_hash(struct sk_buff *skb); +u32 __skb_get_hash_symmetric(struct sk_buff *skb); u32 skb_get_poff(const struct sk_buff *skb); u32 __skb_get_poff(const struct sk_buff *skb, void *data, const struct flow_keys *keys, int hlen); @@ -2877,6 +2888,25 @@ static inline void skb_postpush_rcsum(struct sk_buff *skb, } /** + * skb_push_rcsum - push skb and update receive checksum + * @skb: buffer to update + * @len: length of data pulled + * + * This function performs an skb_push on the packet and updates + * the CHECKSUM_COMPLETE checksum. It should be used on + * receive path processing instead of skb_push unless you know + * that the checksum difference is zero (e.g., a valid IP header) + * or you are setting ip_summed to CHECKSUM_NONE. + */ +static inline unsigned char *skb_push_rcsum(struct sk_buff *skb, + unsigned int len) +{ + skb_push(skb, len); + skb_postpush_rcsum(skb, skb->data, len); + return skb->data; +} + +/** * pskb_trim_rcsum - trim received skb and update checksum * @skb: buffer to trim * @len: new length diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h index 966889a20ea3..e479033bd782 100644 --- a/include/linux/usb/ehci_def.h +++ b/include/linux/usb/ehci_def.h @@ -180,11 +180,11 @@ struct ehci_regs { * PORTSCx */ /* HOSTPC: offset 0x84 */ - u32 hostpc[1]; /* HOSTPC extension */ + u32 hostpc[0]; /* HOSTPC extension */ #define HOSTPC_PHCD (1<<22) /* Phy clock disable */ #define HOSTPC_PSPD (3<<25) /* Port speed detection */ - u32 reserved5[16]; + u32 reserved5[17]; /* USBMODE_EX: offset 0xc8 */ u32 usbmode_ex; /* USB Device mode extension */ diff --git a/include/net/bonding.h b/include/net/bonding.h index 791800ddd6d9..6360c259da6d 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -34,6 +34,9 @@ #define BOND_DEFAULT_MIIMON 100 +#ifndef __long_aligned +#define __long_aligned __attribute__((aligned((sizeof(long))))) +#endif /* * Less bad way to call ioctl from within the kernel; this needs to be * done some other way to get the call out of interrupt context. @@ -138,7 +141,9 @@ struct bond_params { struct reciprocal_value reciprocal_packets_per_slave; u16 ad_actor_sys_prio; u16 ad_user_port_key; - u8 ad_actor_system[ETH_ALEN]; + + /* 2 bytes of padding : see ether_addr_equal_64bits() */ + u8 ad_actor_system[ETH_ALEN + 2]; }; struct bond_parm_tbl { diff --git a/include/net/devlink.h b/include/net/devlink.h index 1d45b61cb320..c99ffe8cef3c 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -90,6 +90,9 @@ struct devlink_ops { u16 tc_index, enum devlink_sb_pool_type pool_type, u32 *p_cur, u32 *p_max); + + int (*eswitch_mode_get)(struct devlink *devlink, u16 *p_mode); + int (*eswitch_mode_set)(struct devlink *devlink, u16 mode); }; static inline void *devlink_priv(struct devlink *devlink) diff --git a/include/net/ip.h b/include/net/ip.h index 37165fba3741..08f36cd2b874 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -313,10 +313,9 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, return min(dst->dev->mtu, IP_MAX_MTU); } -static inline unsigned int ip_skb_dst_mtu(const struct sk_buff *skb) +static inline unsigned int ip_skb_dst_mtu(struct sock *sk, + const struct sk_buff *skb) { - struct sock *sk = skb->sk; - if (!sk || !sk_fullsock(sk) || ip_sk_use_pmtu(sk)) { bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED; diff --git a/include/net/netevent.h b/include/net/netevent.h index d8bbb38584b6..f440df172b56 100644 --- a/include/net/netevent.h +++ b/include/net/netevent.h @@ -24,6 +24,7 @@ struct netevent_redirect { enum netevent_notif_type { NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ + NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */ }; int register_netevent_notifier(struct notifier_block *nb); diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index dd78bea227c8..5d3397f34583 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -85,6 +85,9 @@ struct nf_conn { spinlock_t lock; u16 cpu; +#ifdef CONFIG_NF_CONNTRACK_ZONES + struct nf_conntrack_zone zone; +#endif /* XXX should I move this to the tail ? - Y.K */ /* These are my tuples; original and reply */ struct nf_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX]; @@ -287,6 +290,7 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb) struct kernel_param; int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); +int nf_conntrack_hash_resize(unsigned int hashsize); extern unsigned int nf_conntrack_htable_size; extern unsigned int nf_conntrack_max; diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 55d15049ab2f..b925395fa5ed 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -15,9 +15,6 @@ enum nf_ct_ext_id { #ifdef CONFIG_NF_CONNTRACK_EVENTS NF_CT_EXT_ECACHE, #endif -#ifdef CONFIG_NF_CONNTRACK_ZONES - NF_CT_EXT_ZONE, -#endif #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP NF_CT_EXT_TSTAMP, #endif @@ -38,7 +35,6 @@ enum nf_ct_ext_id { #define NF_CT_EXT_SEQADJ_TYPE struct nf_conn_seqadj #define NF_CT_EXT_ACCT_TYPE struct nf_conn_acct #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache -#define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone #define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp #define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout #define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h index 4e32512cef32..64a718b60839 100644 --- a/include/net/netfilter/nf_conntrack_zones.h +++ b/include/net/netfilter/nf_conntrack_zones.h @@ -9,12 +9,11 @@ static inline const struct nf_conntrack_zone * nf_ct_zone(const struct nf_conn *ct) { - const struct nf_conntrack_zone *nf_ct_zone = NULL; - #ifdef CONFIG_NF_CONNTRACK_ZONES - nf_ct_zone = nf_ct_ext_find(ct, NF_CT_EXT_ZONE); + return &ct->zone; +#else + return &nf_ct_zone_dflt; #endif - return nf_ct_zone ? nf_ct_zone : &nf_ct_zone_dflt; } static inline const struct nf_conntrack_zone * @@ -31,32 +30,22 @@ static inline const struct nf_conntrack_zone * nf_ct_zone_tmpl(const struct nf_conn *tmpl, const struct sk_buff *skb, struct nf_conntrack_zone *tmp) { - const struct nf_conntrack_zone *zone; - +#ifdef CONFIG_NF_CONNTRACK_ZONES if (!tmpl) return &nf_ct_zone_dflt; - zone = nf_ct_zone(tmpl); - if (zone->flags & NF_CT_FLAG_MARK) - zone = nf_ct_zone_init(tmp, skb->mark, zone->dir, 0); - - return zone; + if (tmpl->zone.flags & NF_CT_FLAG_MARK) + return nf_ct_zone_init(tmp, skb->mark, tmpl->zone.dir, 0); +#endif + return nf_ct_zone(tmpl); } -static inline int nf_ct_zone_add(struct nf_conn *ct, gfp_t flags, - const struct nf_conntrack_zone *info) +static inline void nf_ct_zone_add(struct nf_conn *ct, + const struct nf_conntrack_zone *zone) { #ifdef CONFIG_NF_CONNTRACK_ZONES - struct nf_conntrack_zone *nf_ct_zone; - - nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, flags); - if (!nf_ct_zone) - return -ENOMEM; - - nf_ct_zone_init(nf_ct_zone, info->id, info->dir, - info->flags); + ct->zone = *zone; #endif - return 0; } static inline bool nf_ct_zone_matches_dir(const struct nf_conntrack_zone *zone, @@ -68,22 +57,34 @@ static inline bool nf_ct_zone_matches_dir(const struct nf_conntrack_zone *zone, static inline u16 nf_ct_zone_id(const struct nf_conntrack_zone *zone, enum ip_conntrack_dir dir) { +#ifdef CONFIG_NF_CONNTRACK_ZONES return nf_ct_zone_matches_dir(zone, dir) ? zone->id : NF_CT_DEFAULT_ZONE_ID; +#else + return NF_CT_DEFAULT_ZONE_ID; +#endif } static inline bool nf_ct_zone_equal(const struct nf_conn *a, const struct nf_conntrack_zone *b, enum ip_conntrack_dir dir) { +#ifdef CONFIG_NF_CONNTRACK_ZONES return nf_ct_zone_id(nf_ct_zone(a), dir) == nf_ct_zone_id(b, dir); +#else + return true; +#endif } static inline bool nf_ct_zone_equal_any(const struct nf_conn *a, const struct nf_conntrack_zone *b) { +#ifdef CONFIG_NF_CONNTRACK_ZONES return nf_ct_zone(a)->id == b->id; +#else + return true; +#endif } #endif /* IS_ENABLED(CONFIG_NF_CONNTRACK) */ #endif /* _NF_CONNTRACK_ZONES_H */ diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 57639fca223a..83d855ba6af1 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -12,6 +12,9 @@ #define NF_LOG_UID 0x08 /* Log UID owning local socket */ #define NF_LOG_MASK 0x0f +/* This flag indicates that copy_len field in nf_loginfo is set */ +#define NF_LOG_F_COPY_LEN 0x1 + enum nf_log_type { NF_LOG_TYPE_LOG = 0, NF_LOG_TYPE_ULOG, @@ -22,9 +25,13 @@ struct nf_loginfo { u_int8_t type; union { struct { + /* copy_len will be used iff you set + * NF_LOG_F_COPY_LEN in flags + */ u_int32_t copy_len; u_int16_t group; u_int16_t qthreshold; + u_int16_t flags; } ulog; struct { u_int8_t level; diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f7c291ff4074..30c1d9489ae2 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -297,6 +297,7 @@ void nft_unregister_set(struct nft_set_ops *ops); * @ops: set ops * @pnet: network namespace * @flags: set flags + * @genmask: generation mask * @klen: key length * @dlen: data length * @data: private set data @@ -318,7 +319,8 @@ struct nft_set { /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; possible_net_t pnet; - u16 flags; + u16 flags:14, + genmask:2; u8 klen; u8 dlen; unsigned char data[] @@ -336,9 +338,9 @@ static inline struct nft_set *nft_set_container_of(const void *priv) } struct nft_set *nf_tables_set_lookup(const struct nft_table *table, - const struct nlattr *nla); + const struct nlattr *nla, u8 genmask); struct nft_set *nf_tables_set_lookup_byid(const struct net *net, - const struct nlattr *nla); + const struct nlattr *nla, u8 genmask); static inline unsigned long nft_set_gc_interval(const struct nft_set *set) { @@ -733,7 +735,6 @@ static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule) enum nft_chain_flags { NFT_BASE_CHAIN = 0x1, - NFT_CHAIN_INACTIVE = 0x2, }; /** @@ -755,7 +756,8 @@ struct nft_chain { u64 handle; u32 use; u16 level; - u8 flags; + u8 flags:6, + genmask:2; char name[NFT_CHAIN_MAXNAMELEN]; }; @@ -797,7 +799,6 @@ struct nft_stats { }; #define NFT_HOOK_OPS_MAX 2 -#define NFT_BASECHAIN_DISABLED (1 << 0) /** * struct nft_base_chain - nf_tables base chain @@ -839,6 +840,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); * @hgenerator: handle generator state * @use: number of chain references to this table * @flags: table flag (see enum nft_table_flags) + * @genmask: generation mask * @name: name of the table */ struct nft_table { @@ -847,7 +849,8 @@ struct nft_table { struct list_head sets; u64 hgenerator; u32 use; - u16 flags; + u16 flags:14, + genmask:2; char name[NFT_TABLE_MAXNAMELEN]; }; @@ -971,6 +974,32 @@ static inline u8 nft_genmask_cur(const struct net *net) #define NFT_GENMASK_ANY ((1 << 0) | (1 << 1)) /* + * Generic transaction helpers + */ + +/* Check if this object is currently active. */ +#define nft_is_active(__net, __obj) \ + (((__obj)->genmask & nft_genmask_cur(__net)) == 0) + +/* Check if this object is active in the next generation. */ +#define nft_is_active_next(__net, __obj) \ + (((__obj)->genmask & nft_genmask_next(__net)) == 0) + +/* This object becomes active in the next generation. */ +#define nft_activate_next(__net, __obj) \ + (__obj)->genmask = nft_genmask_cur(__net) + +/* This object becomes inactive in the next generation. */ +#define nft_deactivate_next(__net, __obj) \ + (__obj)->genmask = nft_genmask_next(__net) + +/* After committing the ruleset, clear the stale generation bit. */ +#define nft_clear(__net, __obj) \ + (__obj)->genmask &= ~nft_genmask_next(__net) +#define nft_active_genmask(__obj, __genmask) \ + !((__obj)->genmask & __genmask) + +/* * Set element transaction helpers */ diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 006a7b81d758..4113916cc1bb 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -98,10 +98,11 @@ struct rtnl_link_ops { const struct net_device *dev, const struct net_device *slave_dev); struct net *(*get_link_net)(const struct net_device *dev); - size_t (*get_linkxstats_size)(const struct net_device *dev); + size_t (*get_linkxstats_size)(const struct net_device *dev, + int attr); int (*fill_linkxstats)(struct sk_buff *skb, const struct net_device *dev, - int *prividx); + int *prividx, int attr); }; int __rtnl_link_register(struct rtnl_link_ops *ops); diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h index b496d5ad7d42..d01a5d40cfb5 100644 --- a/include/net/tc_act/tc_skbedit.h +++ b/include/net/tc_act/tc_skbedit.h @@ -24,11 +24,11 @@ struct tcf_skbedit { struct tcf_common common; - u32 flags; - u32 priority; - u32 mark; - u16 queue_mapping; - /* XXX: 16-bit pad here? */ + u32 flags; + u32 priority; + u32 mark; + u16 queue_mapping; + u16 ptype; }; #define to_skbedit(a) \ container_of(a->priv, struct tcf_skbedit, common) diff --git a/include/net/tcp.h b/include/net/tcp.h index a79894b66726..c00e7d51bb18 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -589,7 +589,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) * On the other hand, for extremely large MSS devices, handling * smaller than MSS windows in this way does make sense. */ - if (tp->max_window >= 512) + if (tp->max_window > TCP_MSS_DEFAULT) cutoff = (tp->max_window >> 1); else cutoff = tp->max_window; @@ -1384,7 +1384,7 @@ union tcp_md5sum_block { /* - pool: digest algorithm, hash description and scratch buffer */ struct tcp_md5sig_pool { struct ahash_request *md5_req; - union tcp_md5sum_block md5_blk; + void *scratch; }; /* - functions */ @@ -1420,7 +1420,6 @@ static inline void tcp_put_md5sig_pool(void) local_bh_enable(); } -int tcp_md5_hash_header(struct tcp_md5sig_pool *, const struct tcphdr *); int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *, unsigned int header_len); int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h new file mode 100644 index 000000000000..0fbf6fd4711b --- /dev/null +++ b/include/uapi/linux/batman_adv.h @@ -0,0 +1,114 @@ +/* Copyright (C) 2016 B.A.T.M.A.N. contributors: + * + * Matthias Schiffer + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _UAPI_LINUX_BATMAN_ADV_H_ +#define _UAPI_LINUX_BATMAN_ADV_H_ + +#define BATADV_NL_NAME "batadv" + +#define BATADV_NL_MCAST_GROUP_TPMETER "tpmeter" + +/** + * enum batadv_nl_attrs - batman-adv netlink attributes + * + * @BATADV_ATTR_UNSPEC: unspecified attribute to catch errors + * @BATADV_ATTR_VERSION: batman-adv version string + * @BATADV_ATTR_ALGO_NAME: name of routing algorithm + * @BATADV_ATTR_MESH_IFINDEX: index of the batman-adv interface + * @BATADV_ATTR_MESH_IFNAME: name of the batman-adv interface + * @BATADV_ATTR_MESH_ADDRESS: mac address of the batman-adv interface + * @BATADV_ATTR_HARD_IFINDEX: index of the non-batman-adv interface + * @BATADV_ATTR_HARD_IFNAME: name of the non-batman-adv interface + * @BATADV_ATTR_HARD_ADDRESS: mac address of the non-batman-adv interface + * @BATADV_ATTR_ORIG_ADDRESS: originator mac address + * @BATADV_ATTR_TPMETER_RESULT: result of run (see batadv_tp_meter_status) + * @BATADV_ATTR_TPMETER_TEST_TIME: time (msec) the run took + * @BATADV_ATTR_TPMETER_BYTES: amount of acked bytes during run + * @BATADV_ATTR_TPMETER_COOKIE: session cookie to match tp_meter session + * @BATADV_ATTR_PAD: attribute used for padding for 64-bit alignment + * @__BATADV_ATTR_AFTER_LAST: internal use + * @NUM_BATADV_ATTR: total number of batadv_nl_attrs available + * @BATADV_ATTR_MAX: highest attribute number currently defined + */ +enum batadv_nl_attrs { + BATADV_ATTR_UNSPEC, + BATADV_ATTR_VERSION, + BATADV_ATTR_ALGO_NAME, + BATADV_ATTR_MESH_IFINDEX, + BATADV_ATTR_MESH_IFNAME, + BATADV_ATTR_MESH_ADDRESS, + BATADV_ATTR_HARD_IFINDEX, + BATADV_ATTR_HARD_IFNAME, + BATADV_ATTR_HARD_ADDRESS, + BATADV_ATTR_ORIG_ADDRESS, + BATADV_ATTR_TPMETER_RESULT, + BATADV_ATTR_TPMETER_TEST_TIME, + BATADV_ATTR_TPMETER_BYTES, + BATADV_ATTR_TPMETER_COOKIE, + BATADV_ATTR_PAD, + /* add attributes above here, update the policy in netlink.c */ + __BATADV_ATTR_AFTER_LAST, + NUM_BATADV_ATTR = __BATADV_ATTR_AFTER_LAST, + BATADV_ATTR_MAX = __BATADV_ATTR_AFTER_LAST - 1 +}; + +/** + * enum batadv_nl_commands - supported batman-adv netlink commands + * + * @BATADV_CMD_UNSPEC: unspecified command to catch errors + * @BATADV_CMD_GET_MESH_INFO: Query basic information about batman-adv device + * @BATADV_CMD_TP_METER: Start a tp meter session + * @BATADV_CMD_TP_METER_CANCEL: Cancel a tp meter session + * @__BATADV_CMD_AFTER_LAST: internal use + * @BATADV_CMD_MAX: highest used command number + */ +enum batadv_nl_commands { + BATADV_CMD_UNSPEC, + BATADV_CMD_GET_MESH_INFO, + BATADV_CMD_TP_METER, + BATADV_CMD_TP_METER_CANCEL, + /* add new commands above here */ + __BATADV_CMD_AFTER_LAST, + BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1 +}; + +/** + * enum batadv_tp_meter_reason - reason of a tp meter test run stop + * @BATADV_TP_REASON_COMPLETE: sender finished tp run + * @BATADV_TP_REASON_CANCEL: sender was stopped during run + * @BATADV_TP_REASON_DST_UNREACHABLE: receiver could not be reached or didn't + * answer + * @BATADV_TP_REASON_RESEND_LIMIT: (unused) sender retry reached limit + * @BATADV_TP_REASON_ALREADY_ONGOING: test to or from the same node already + * ongoing + * @BATADV_TP_REASON_MEMORY_ERROR: test was stopped due to low memory + * @BATADV_TP_REASON_CANT_SEND: failed to send via outgoing interface + * @BATADV_TP_REASON_TOO_MANY: too many ongoing sessions + */ +enum batadv_tp_meter_reason { + BATADV_TP_REASON_COMPLETE = 3, + BATADV_TP_REASON_CANCEL = 4, + /* error status >= 128 */ + BATADV_TP_REASON_DST_UNREACHABLE = 128, + BATADV_TP_REASON_RESEND_LIMIT = 129, + BATADV_TP_REASON_ALREADY_ONGOING = 130, + BATADV_TP_REASON_MEMORY_ERROR = 131, + BATADV_TP_REASON_CANT_SEND = 132, + BATADV_TP_REASON_TOO_MANY = 133, +}; + +#endif /* _UAPI_LINUX_BATMAN_ADV_H_ */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 406459b935a2..c14ca1cd6297 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -84,6 +84,7 @@ enum bpf_map_type { BPF_MAP_TYPE_PERCPU_HASH, BPF_MAP_TYPE_PERCPU_ARRAY, BPF_MAP_TYPE_STACK_TRACE, + BPF_MAP_TYPE_CGROUP_ARRAY, }; enum bpf_prog_type { @@ -313,6 +314,49 @@ enum bpf_func_id { */ BPF_FUNC_skb_get_tunnel_opt, BPF_FUNC_skb_set_tunnel_opt, + + /** + * bpf_skb_change_proto(skb, proto, flags) + * Change protocol of the skb. Currently supported is + * v4 -> v6, v6 -> v4 transitions. The helper will also + * resize the skb. eBPF program is expected to fill the + * new headers via skb_store_bytes and lX_csum_replace. + * @skb: pointer to skb + * @proto: new skb->protocol type + * @flags: reserved + * Return: 0 on success or negative error + */ + BPF_FUNC_skb_change_proto, + + /** + * bpf_skb_change_type(skb, type) + * Change packet type of skb. + * @skb: pointer to skb + * @type: new skb->pkt_type type + * Return: 0 on success or negative error + */ + BPF_FUNC_skb_change_type, + + /** + * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb + * @skb: pointer to skb + * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type + * @index: index of the cgroup in the bpf_map + * Return: + * == 0 skb failed the cgroup2 descendant test + * == 1 skb succeeded the cgroup2 descendant test + * < 0 error + */ + BPF_FUNC_skb_in_cgroup, + + /** + * bpf_get_hash_recalc(skb) + * Retrieve and possibly recalculate skb->hash. + * @skb: pointer to skb + * Return: hash + */ + BPF_FUNC_get_hash_recalc, + __BPF_FUNC_MAX_ID, }; @@ -347,7 +391,7 @@ enum bpf_func_id { #define BPF_F_ZERO_CSUM_TX (1ULL << 1) #define BPF_F_DONT_FRAGMENT (1ULL << 2) -/* BPF_FUNC_perf_event_output flags. */ +/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */ #define BPF_F_INDEX_MASK 0xffffffffULL #define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index ba0073b26fa6..915bfa74458c 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -57,6 +57,8 @@ enum devlink_command { DEVLINK_CMD_SB_OCC_SNAPSHOT, DEVLINK_CMD_SB_OCC_MAX_CLEAR, + DEVLINK_CMD_ESWITCH_MODE_GET, + DEVLINK_CMD_ESWITCH_MODE_SET, /* add new commands above here */ __DEVLINK_CMD_MAX, @@ -95,6 +97,11 @@ enum devlink_sb_threshold_type { #define DEVLINK_SB_THRESHOLD_TO_ALPHA_MAX 20 +enum devlink_eswitch_mode { + DEVLINK_ESWITCH_MODE_LEGACY, + DEVLINK_ESWITCH_MODE_SWITCHDEV, +}; + enum devlink_attr { /* don't change the order or add anything between, this is ABI! */ DEVLINK_ATTR_UNSPEC, @@ -125,6 +132,7 @@ enum devlink_attr { DEVLINK_ATTR_SB_TC_INDEX, /* u16 */ DEVLINK_ATTR_SB_OCC_CUR, /* u32 */ DEVLINK_ATTR_SB_OCC_MAX, /* u32 */ + DEVLINK_ATTR_ESWITCH_MODE, /* u16 */ /* add new attributes above here, update the policy in devlink.c */ diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 5974fae54e12..27e17363263a 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -105,6 +105,9 @@ * * 7.24 * - add FUSE_LSEEK for SEEK_HOLE and SEEK_DATA support + * + * 7.25 + * - add FUSE_PARALLEL_DIROPS */ #ifndef _LINUX_FUSE_H @@ -140,7 +143,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 24 +#define FUSE_KERNEL_MINOR_VERSION 25 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -234,6 +237,7 @@ struct fuse_file_lock { * FUSE_ASYNC_DIO: asynchronous direct I/O submission * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens + * FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -253,6 +257,7 @@ struct fuse_file_lock { #define FUSE_ASYNC_DIO (1 << 15) #define FUSE_WRITEBACK_CACHE (1 << 16) #define FUSE_NO_OPEN_SUPPORT (1 << 17) +#define FUSE_PARALLEL_DIROPS (1 << 18) /** * CUSE INIT request/reply flags diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 397d503fdedb..8304fe6f0561 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -247,8 +247,34 @@ enum { enum { BRIDGE_XSTATS_UNSPEC, BRIDGE_XSTATS_VLAN, + BRIDGE_XSTATS_MCAST, + BRIDGE_XSTATS_PAD, __BRIDGE_XSTATS_MAX }; #define BRIDGE_XSTATS_MAX (__BRIDGE_XSTATS_MAX - 1) +enum { + BR_MCAST_DIR_RX, + BR_MCAST_DIR_TX, + BR_MCAST_DIR_SIZE +}; + +/* IGMP/MLD statistics */ +struct br_mcast_stats { + __u64 igmp_queries[BR_MCAST_DIR_SIZE]; + __u64 igmp_leaves[BR_MCAST_DIR_SIZE]; + __u64 igmp_v1reports[BR_MCAST_DIR_SIZE]; + __u64 igmp_v2reports[BR_MCAST_DIR_SIZE]; + __u64 igmp_v3reports[BR_MCAST_DIR_SIZE]; + __u64 igmp_parse_errors; + + __u64 mld_queries[BR_MCAST_DIR_SIZE]; + __u64 mld_leaves[BR_MCAST_DIR_SIZE]; + __u64 mld_v1reports[BR_MCAST_DIR_SIZE]; + __u64 mld_v2reports[BR_MCAST_DIR_SIZE]; + __u64 mld_parse_errors; + + __u64 mcast_bytes[BR_MCAST_DIR_SIZE]; + __u64 mcast_packets[BR_MCAST_DIR_SIZE]; +}; #endif /* _UAPI_LINUX_IF_BRIDGE_H */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index bb36bd5675a7..4285ac31e865 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -273,6 +273,7 @@ enum { IFLA_BR_VLAN_DEFAULT_PVID, IFLA_BR_PAD, IFLA_BR_VLAN_STATS_ENABLED, + IFLA_BR_MCAST_STATS_ENABLED, __IFLA_BR_MAX, }; @@ -822,6 +823,7 @@ enum { IFLA_STATS_UNSPEC, /* also used as 64bit pad attribute */ IFLA_STATS_LINK_64, IFLA_STATS_LINK_XSTATS, + IFLA_STATS_LINK_XSTATS_SLAVE, __IFLA_STATS_MAX, }; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 6a4dbe04f09e..01751faccaf8 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -546,6 +546,10 @@ enum nft_cmp_attributes { }; #define NFTA_CMP_MAX (__NFTA_CMP_MAX - 1) +enum nft_lookup_flags { + NFT_LOOKUP_F_INV = (1 << 0), +}; + /** * enum nft_lookup_attributes - nf_tables set lookup expression netlink attributes * @@ -553,6 +557,7 @@ enum nft_cmp_attributes { * @NFTA_LOOKUP_SREG: source register of the data to look for (NLA_U32: nft_registers) * @NFTA_LOOKUP_DREG: destination register (NLA_U32: nft_registers) * @NFTA_LOOKUP_SET_ID: uniquely identifies a set in a transaction (NLA_U32) + * @NFTA_LOOKUP_FLAGS: flags (NLA_U32: enum nft_lookup_flags) */ enum nft_lookup_attributes { NFTA_LOOKUP_UNSPEC, @@ -560,6 +565,7 @@ enum nft_lookup_attributes { NFTA_LOOKUP_SREG, NFTA_LOOKUP_DREG, NFTA_LOOKUP_SET_ID, + NFTA_LOOKUP_FLAGS, __NFTA_LOOKUP_MAX }; #define NFTA_LOOKUP_MAX (__NFTA_LOOKUP_MAX - 1) diff --git a/include/uapi/linux/netfilter/xt_NFLOG.h b/include/uapi/linux/netfilter/xt_NFLOG.h index 87b58311ce6b..f33070730fc8 100644 --- a/include/uapi/linux/netfilter/xt_NFLOG.h +++ b/include/uapi/linux/netfilter/xt_NFLOG.h @@ -6,9 +6,13 @@ #define XT_NFLOG_DEFAULT_GROUP 0x1 #define XT_NFLOG_DEFAULT_THRESHOLD 0 -#define XT_NFLOG_MASK 0x0 +#define XT_NFLOG_MASK 0x1 + +/* This flag indicates that 'len' field in xt_nflog_info is set*/ +#define XT_NFLOG_F_COPY_LEN 0x1 struct xt_nflog_info { + /* 'len' will be used iff you set XT_NFLOG_F_COPY_LEN in flags */ __u32 len; __u16 group; __u16 threshold; diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h index fecb5cc48c40..a4d00c608d8f 100644 --- a/include/uapi/linux/tc_act/tc_skbedit.h +++ b/include/uapi/linux/tc_act/tc_skbedit.h @@ -27,6 +27,7 @@ #define SKBEDIT_F_PRIORITY 0x1 #define SKBEDIT_F_QUEUE_MAPPING 0x2 #define SKBEDIT_F_MARK 0x4 +#define SKBEDIT_F_PTYPE 0x8 struct tc_skbedit { tc_gen; @@ -40,6 +41,7 @@ enum { TCA_SKBEDIT_QUEUE_MAPPING, TCA_SKBEDIT_MARK, TCA_SKBEDIT_PAD, + TCA_SKBEDIT_PTYPE, __TCA_SKBEDIT_MAX }; #define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1) diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 53e8e3fe6b1b..482898fc433a 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -115,12 +115,22 @@ enum { #define TCP_CC_INFO 26 /* Get Congestion Control (optional) info */ #define TCP_SAVE_SYN 27 /* Record SYN headers for new connections */ #define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */ +#define TCP_REPAIR_WINDOW 29 /* Get/set window parameters */ struct tcp_repair_opt { __u32 opt_code; __u32 opt_val; }; +struct tcp_repair_window { + __u32 snd_wl1; + __u32 snd_wnd; + __u32 max_window; + + __u32 rcv_wnd; + __u32 rcv_wup; +}; + enum { TCP_NO_QUEUE, TCP_RECV_QUEUE, diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 5af30732697b..db1a743e3db2 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -390,9 +390,7 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map, static void prog_fd_array_put_ptr(void *ptr) { - struct bpf_prog *prog = ptr; - - bpf_prog_put_rcu(prog); + bpf_prog_put(ptr); } /* decrement refcnt of all bpf_progs that are stored in this map */ @@ -539,3 +537,46 @@ static int __init register_perf_event_array_map(void) return 0; } late_initcall(register_perf_event_array_map); + +#ifdef CONFIG_SOCK_CGROUP_DATA +static void *cgroup_fd_array_get_ptr(struct bpf_map *map, + struct file *map_file /* not used */, + int fd) +{ + return cgroup_get_from_fd(fd); +} + +static void cgroup_fd_array_put_ptr(void *ptr) +{ + /* cgroup_put free cgrp after a rcu grace period */ + cgroup_put(ptr); +} + +static void cgroup_fd_array_free(struct bpf_map *map) +{ + bpf_fd_array_map_clear(map); + fd_array_map_free(map); +} + +static const struct bpf_map_ops cgroup_array_ops = { + .map_alloc = fd_array_map_alloc, + .map_free = cgroup_fd_array_free, + .map_get_next_key = array_map_get_next_key, + .map_lookup_elem = fd_array_map_lookup_elem, + .map_delete_elem = fd_array_map_delete_elem, + .map_fd_get_ptr = cgroup_fd_array_get_ptr, + .map_fd_put_ptr = cgroup_fd_array_put_ptr, +}; + +static struct bpf_map_type_list cgroup_array_type __read_mostly = { + .ops = &cgroup_array_ops, + .type = BPF_MAP_TYPE_CGROUP_ARRAY, +}; + +static int __init register_cgroup_array_map(void) +{ + bpf_register_map_type(&cgroup_array_type); + return 0; +} +late_initcall(register_cgroup_array_map); +#endif diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index b94a36550591..d638062f66d6 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -719,14 +719,13 @@ select_insn: if (unlikely(index >= array->map.max_entries)) goto out; - if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT)) goto out; tail_call_cnt++; prog = READ_ONCE(array->ptrs[index]); - if (unlikely(!prog)) + if (!prog) goto out; /* ARG1 at this point is guaranteed to point to CTX from diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index ad7a0573f71b..1ea3afba1a4f 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -101,7 +101,7 @@ const struct bpf_func_proto bpf_get_prandom_u32_proto = { static u64 bpf_get_smp_processor_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { - return raw_smp_processor_id(); + return smp_processor_id(); } const struct bpf_func_proto bpf_get_smp_processor_id_proto = { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index c23a4e9311b3..96d938a22050 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -393,7 +393,8 @@ static int map_update_elem(union bpf_attr *attr) } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { err = bpf_percpu_array_update(map, key, value, attr->flags); } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || - map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { + map->map_type == BPF_MAP_TYPE_PROG_ARRAY || + map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY) { rcu_read_lock(); err = bpf_fd_array_map_update_elem(map, f.file, key, value, attr->flags); @@ -623,7 +624,7 @@ static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) free_uid(user); } -static void __prog_put_common(struct rcu_head *rcu) +static void __bpf_prog_put_rcu(struct rcu_head *rcu) { struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); @@ -632,17 +633,10 @@ static void __prog_put_common(struct rcu_head *rcu) bpf_prog_free(aux->prog); } -/* version of bpf_prog_put() that is called after a grace period */ -void bpf_prog_put_rcu(struct bpf_prog *prog) -{ - if (atomic_dec_and_test(&prog->aux->refcnt)) - call_rcu(&prog->aux->rcu, __prog_put_common); -} - void bpf_prog_put(struct bpf_prog *prog) { if (atomic_dec_and_test(&prog->aux->refcnt)) - __prog_put_common(&prog->aux->rcu); + call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); } EXPORT_SYMBOL_GPL(bpf_prog_put); @@ -650,7 +644,7 @@ static int bpf_prog_release(struct inode *inode, struct file *filp) { struct bpf_prog *prog = filp->private_data; - bpf_prog_put_rcu(prog); + bpf_prog_put(prog); return 0; } @@ -664,7 +658,7 @@ int bpf_prog_new_fd(struct bpf_prog *prog) O_RDWR | O_CLOEXEC); } -static struct bpf_prog *__bpf_prog_get(struct fd f) +static struct bpf_prog *____bpf_prog_get(struct fd f) { if (!f.file) return ERR_PTR(-EBADF); @@ -685,24 +679,35 @@ struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) return prog; } -/* called by sockets/tracing/seccomp before attaching program to an event - * pairs with bpf_prog_put() - */ -struct bpf_prog *bpf_prog_get(u32 ufd) +static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type) { struct fd f = fdget(ufd); struct bpf_prog *prog; - prog = __bpf_prog_get(f); + prog = ____bpf_prog_get(f); if (IS_ERR(prog)) return prog; + if (type && prog->type != *type) { + prog = ERR_PTR(-EINVAL); + goto out; + } prog = bpf_prog_inc(prog); +out: fdput(f); - return prog; } -EXPORT_SYMBOL_GPL(bpf_prog_get); + +struct bpf_prog *bpf_prog_get(u32 ufd) +{ + return __bpf_prog_get(ufd, NULL); +} + +struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) +{ + return __bpf_prog_get(ufd, &type); +} +EXPORT_SYMBOL_GPL(bpf_prog_get_type); /* last field in 'union bpf_attr' used by this command */ #define BPF_PROG_LOAD_LAST_FIELD kern_version diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index eec9f90ba030..e206c2181412 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1035,6 +1035,10 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) if (func_id != BPF_FUNC_get_stackid) goto error; break; + case BPF_MAP_TYPE_CGROUP_ARRAY: + if (func_id != BPF_FUNC_skb_in_cgroup) + goto error; + break; default: break; } @@ -1054,6 +1058,10 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) if (map->map_type != BPF_MAP_TYPE_STACK_TRACE) goto error; break; + case BPF_FUNC_skb_in_cgroup: + if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY) + goto error; + break; default: break; } diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 75c0ff00aca6..50787cd61da2 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -62,6 +62,7 @@ #include <linux/proc_ns.h> #include <linux/nsproxy.h> #include <linux/proc_ns.h> +#include <linux/file.h> #include <net/sock.h> /* @@ -6209,6 +6210,40 @@ struct cgroup *cgroup_get_from_path(const char *path) } EXPORT_SYMBOL_GPL(cgroup_get_from_path); +/** + * cgroup_get_from_fd - get a cgroup pointer from a fd + * @fd: fd obtained by open(cgroup2_dir) + * + * Find the cgroup from a fd which should be obtained + * by opening a cgroup directory. Returns a pointer to the + * cgroup on success. ERR_PTR is returned if the cgroup + * cannot be found. + */ +struct cgroup *cgroup_get_from_fd(int fd) +{ + struct cgroup_subsys_state *css; + struct cgroup *cgrp; + struct file *f; + + f = fget_raw(fd); + if (!f) + return ERR_PTR(-EBADF); + + css = css_tryget_online_from_dir(f->f_path.dentry, NULL); + fput(f); + if (IS_ERR(css)) + return ERR_CAST(css); + + cgrp = css->cgroup; + if (!cgroup_on_dfl(cgrp)) { + cgroup_put(cgrp); + return ERR_PTR(-EBADF); + } + + return cgrp; +} +EXPORT_SYMBOL_GPL(cgroup_get_from_fd); + /* * sock->sk_cgrp_data handling. For more info, see sock_cgroup_data * definition in cgroup-defs.h. diff --git a/kernel/events/core.c b/kernel/events/core.c index 85cd41878a74..9c51ec3f0f44 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7529,7 +7529,7 @@ static void perf_event_free_bpf_prog(struct perf_event *event) prog = event->tp_event->prog; if (prog) { event->tp_event->prog = NULL; - bpf_prog_put_rcu(prog); + bpf_prog_put(prog); } } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 3de25fbed785..19c5b4a5c3eb 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -188,30 +188,35 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void) return &bpf_trace_printk_proto; } -static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5) +static u64 bpf_perf_event_read(u64 r1, u64 flags, u64 r3, u64 r4, u64 r5) { struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; struct bpf_array *array = container_of(map, struct bpf_array, map); + unsigned int cpu = smp_processor_id(); + u64 index = flags & BPF_F_INDEX_MASK; struct bpf_event_entry *ee; struct perf_event *event; + if (unlikely(flags & ~(BPF_F_INDEX_MASK))) + return -EINVAL; + if (index == BPF_F_CURRENT_CPU) + index = cpu; if (unlikely(index >= array->map.max_entries)) return -E2BIG; ee = READ_ONCE(array->ptrs[index]); - if (unlikely(!ee)) + if (!ee) return -ENOENT; event = ee->event; - /* make sure event is local and doesn't have pmu::count */ - if (event->oncpu != smp_processor_id() || - event->pmu->count) - return -EINVAL; - if (unlikely(event->attr.type != PERF_TYPE_HARDWARE && event->attr.type != PERF_TYPE_RAW)) return -EINVAL; + /* make sure event is local and doesn't have pmu::count */ + if (unlikely(event->oncpu != cpu || event->pmu->count)) + return -EINVAL; + /* * we don't know if the function is run successfully by the * return value. It can be judged in other places, such as @@ -233,6 +238,7 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size) struct pt_regs *regs = (struct pt_regs *) (long) r1; struct bpf_map *map = (struct bpf_map *) (long) r2; struct bpf_array *array = container_of(map, struct bpf_array, map); + unsigned int cpu = smp_processor_id(); u64 index = flags & BPF_F_INDEX_MASK; void *data = (void *) (long) r4; struct perf_sample_data sample_data; @@ -246,12 +252,12 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size) if (unlikely(flags & ~(BPF_F_INDEX_MASK))) return -EINVAL; if (index == BPF_F_CURRENT_CPU) - index = raw_smp_processor_id(); + index = cpu; if (unlikely(index >= array->map.max_entries)) return -E2BIG; ee = READ_ONCE(array->ptrs[index]); - if (unlikely(!ee)) + if (!ee) return -ENOENT; event = ee->event; @@ -259,7 +265,7 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size) event->attr.config != PERF_COUNT_SW_BPF_OUTPUT)) return -EINVAL; - if (unlikely(event->oncpu != smp_processor_id())) + if (unlikely(event->oncpu != cpu)) return -EOPNOTSUPP; perf_sample_data_init(&sample_data, 0, 0); @@ -354,18 +360,12 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type, enum bpf_reg_type *reg_type) { - /* check bounds */ if (off < 0 || off >= sizeof(struct pt_regs)) return false; - - /* only read is allowed */ if (type != BPF_READ) return false; - - /* disallow misaligned access */ if (off % size != 0) return false; - return true; } diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 86ae75b77390..c8f422c90856 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -790,6 +790,8 @@ static const struct net_device_ops vlan_netdev_ops = { .ndo_netpoll_cleanup = vlan_dev_netpoll_cleanup, #endif .ndo_fix_features = vlan_dev_fix_features, + .ndo_neigh_construct = netdev_default_l2upper_neigh_construct, + .ndo_neigh_destroy = netdev_default_l2upper_neigh_destroy, .ndo_fdb_add = switchdev_port_fdb_add, .ndo_fdb_del = switchdev_port_fdb_del, .ndo_fdb_dump = switchdev_port_fdb_dump, diff --git a/net/atm/clip.c b/net/atm/clip.c index e07f551a863c..53b4ac09e7b7 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -286,7 +286,7 @@ static const struct neigh_ops clip_neigh_ops = { .connected_output = neigh_direct_output, }; -static int clip_constructor(struct neighbour *neigh) +static int clip_constructor(struct net_device *dev, struct neighbour *neigh) { struct atmarp_entry *entry = neighbour_priv(neigh); diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index f66930ee3c0b..833bb145ba3c 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -66,7 +66,7 @@ config BATMAN_ADV_NC config BATMAN_ADV_MCAST bool "Multicast optimisation" - depends on BATMAN_ADV + depends on BATMAN_ADV && INET && !(BRIDGE=m && BATMAN_ADV=y) default n help This option enables the multicast optimisation which aims to diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index 797cf2fc88c1..a83fc6c58d19 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -17,6 +17,7 @@ # obj-$(CONFIG_BATMAN_ADV) += batman-adv.o +batman-adv-y += bat_algo.o batman-adv-y += bat_iv_ogm.o batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v.o batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v_elp.o @@ -31,12 +32,16 @@ batman-adv-y += gateway_common.o batman-adv-y += hard-interface.o batman-adv-y += hash.o batman-adv-y += icmp_socket.o +batman-adv-$(CONFIG_BATMAN_ADV_DEBUG) += log.o batman-adv-y += main.o batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o +batman-adv-y += netlink.o batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o batman-adv-y += originator.o batman-adv-y += routing.o batman-adv-y += send.o batman-adv-y += soft-interface.o batman-adv-y += sysfs.o +batman-adv-y += tp_meter.o batman-adv-y += translation-table.o +batman-adv-y += tvlv.o diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c new file mode 100644 index 000000000000..81dbbf569bd4 --- /dev/null +++ b/net/batman-adv/bat_algo.c @@ -0,0 +1,140 @@ +/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "main.h" + +#include <linux/errno.h> +#include <linux/list.h> +#include <linux/moduleparam.h> +#include <linux/printk.h> +#include <linux/seq_file.h> +#include <linux/stddef.h> +#include <linux/string.h> + +#include "bat_algo.h" + +char batadv_routing_algo[20] = "BATMAN_IV"; +static struct hlist_head batadv_algo_list; + +/** + * batadv_algo_init - Initialize batman-adv algorithm management data structures + */ +void batadv_algo_init(void) +{ + INIT_HLIST_HEAD(&batadv_algo_list); +} + +static struct batadv_algo_ops *batadv_algo_get(char *name) +{ + struct batadv_algo_ops *bat_algo_ops = NULL, *bat_algo_ops_tmp; + + hlist_for_each_entry(bat_algo_ops_tmp, &batadv_algo_list, list) { + if (strcmp(bat_algo_ops_tmp->name, name) != 0) + continue; + + bat_algo_ops = bat_algo_ops_tmp; + break; + } + + return bat_algo_ops; +} + +int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops) +{ + struct batadv_algo_ops *bat_algo_ops_tmp; + + bat_algo_ops_tmp = batadv_algo_get(bat_algo_ops->name); + if (bat_algo_ops_tmp) { + pr_info("Trying to register already registered routing algorithm: %s\n", + bat_algo_ops->name); + return -EEXIST; + } + + /* all algorithms must implement all ops (for now) */ + if (!bat_algo_ops->iface.enable || + !bat_algo_ops->iface.disable || + !bat_algo_ops->iface.update_mac || + !bat_algo_ops->iface.primary_set || + !bat_algo_ops->neigh.cmp || + !bat_algo_ops->neigh.is_similar_or_better) { + pr_info("Routing algo '%s' does not implement required ops\n", + bat_algo_ops->name); + return -EINVAL; + } + + INIT_HLIST_NODE(&bat_algo_ops->list); + hlist_add_head(&bat_algo_ops->list, &batadv_algo_list); + + return 0; +} + +int batadv_algo_select(struct batadv_priv *bat_priv, char *name) +{ + struct batadv_algo_ops *bat_algo_ops; + + bat_algo_ops = batadv_algo_get(name); + if (!bat_algo_ops) + return -EINVAL; + + bat_priv->algo_ops = bat_algo_ops; + + return 0; +} + +int batadv_algo_seq_print_text(struct seq_file *seq, void *offset) +{ + struct batadv_algo_ops *bat_algo_ops; + + seq_puts(seq, "Available routing algorithms:\n"); + + hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) { + seq_printf(seq, " * %s\n", bat_algo_ops->name); + } + + return 0; +} + +static int batadv_param_set_ra(const char *val, const struct kernel_param *kp) +{ + struct batadv_algo_ops *bat_algo_ops; + char *algo_name = (char *)val; + size_t name_len = strlen(algo_name); + + if (name_len > 0 && algo_name[name_len - 1] == '\n') + algo_name[name_len - 1] = '\0'; + + bat_algo_ops = batadv_algo_get(algo_name); + if (!bat_algo_ops) { + pr_err("Routing algorithm '%s' is not supported\n", algo_name); + return -EINVAL; + } + + return param_set_copystring(algo_name, kp); +} + +static const struct kernel_param_ops batadv_param_ops_ra = { + .set = batadv_param_set_ra, + .get = param_get_string, +}; + +static struct kparam_string batadv_param_string_ra = { + .maxlen = sizeof(batadv_routing_algo), + .string = batadv_routing_algo, +}; + +module_param_cb(routing_algo, &batadv_param_ops_ra, &batadv_param_string_ra, + 0644); diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h index 03dafd33d23b..860d773dd8fa 100644 --- a/net/batman-adv/bat_algo.h +++ b/net/batman-adv/bat_algo.h @@ -18,32 +18,18 @@ #ifndef _NET_BATMAN_ADV_BAT_ALGO_H_ #define _NET_BATMAN_ADV_BAT_ALGO_H_ -struct batadv_priv; +#include "main.h" -int batadv_iv_init(void); +#include <linux/types.h> -#ifdef CONFIG_BATMAN_ADV_BATMAN_V +struct seq_file; -int batadv_v_init(void); -int batadv_v_mesh_init(struct batadv_priv *bat_priv); -void batadv_v_mesh_free(struct batadv_priv *bat_priv); +extern char batadv_routing_algo[]; +extern struct list_head batadv_hardif_list; -#else - -static inline int batadv_v_init(void) -{ - return 0; -} - -static inline int batadv_v_mesh_init(struct batadv_priv *bat_priv) -{ - return 0; -} - -static inline void batadv_v_mesh_free(struct batadv_priv *bat_priv) -{ -} - -#endif /* CONFIG_BATMAN_ADV_BATMAN_V */ +void batadv_algo_init(void); +int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops); +int batadv_algo_select(struct batadv_priv *bat_priv, char *name); +int batadv_algo_seq_print_text(struct seq_file *seq, void *offset); #endif /* _NET_BATMAN_ADV_BAT_ALGO_H_ */ diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index ce2f203048d3..19b0abd6c640 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -15,7 +15,7 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#include "bat_algo.h" +#include "bat_iv_ogm.h" #include "main.h" #include <linux/atomic.h> @@ -30,8 +30,9 @@ #include <linux/if_ether.h> #include <linux/init.h> #include <linux/jiffies.h> -#include <linux/list.h> +#include <linux/kernel.h> #include <linux/kref.h> +#include <linux/list.h> #include <linux/lockdep.h> #include <linux/netdevice.h> #include <linux/pkt_sched.h> @@ -48,15 +49,20 @@ #include <linux/types.h> #include <linux/workqueue.h> +#include "bat_algo.h" #include "bitarray.h" #include "hard-interface.h" #include "hash.h" +#include "log.h" #include "network-coding.h" #include "originator.h" #include "packet.h" #include "routing.h" #include "send.h" #include "translation-table.h" +#include "tvlv.h" + +static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work); /** * enum batadv_dup_status - duplicate status @@ -336,7 +342,8 @@ batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface, { struct batadv_neigh_node *neigh_node; - neigh_node = batadv_neigh_node_new(orig_node, hard_iface, neigh_addr); + neigh_node = batadv_neigh_node_get_or_create(orig_node, + hard_iface, neigh_addr); if (!neigh_node) goto out; @@ -730,7 +737,7 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, /* start timer for this packet */ INIT_DELAYED_WORK(&forw_packet_aggr->delayed_work, - batadv_send_outstanding_bat_ogm_packet); + batadv_iv_send_outstanding_bat_ogm_packet); queue_delayed_work(batadv_event_workqueue, &forw_packet_aggr->delayed_work, send_time - jiffies); @@ -937,6 +944,19 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) u16 tvlv_len = 0; unsigned long send_time; + if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) || + (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)) + return; + + /* the interface gets activated here to avoid race conditions between + * the moment of activating the interface in + * hardif_activate_interface() where the originator mac is set and + * outdated packets (especially uninitialized mac addresses) in the + * packet queue + */ + if (hard_iface->if_status == BATADV_IF_TO_BE_ACTIVATED) + hard_iface->if_status = BATADV_IF_ACTIVE; + primary_if = batadv_primary_if_get_selected(bat_priv); if (hard_iface == primary_if) { @@ -1778,6 +1798,45 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset, batadv_orig_node_put(orig_node); } +static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work) +{ + struct delayed_work *delayed_work; + struct batadv_forw_packet *forw_packet; + struct batadv_priv *bat_priv; + + delayed_work = to_delayed_work(work); + forw_packet = container_of(delayed_work, struct batadv_forw_packet, + delayed_work); + bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface); + spin_lock_bh(&bat_priv->forw_bat_list_lock); + hlist_del(&forw_packet->list); + spin_unlock_bh(&bat_priv->forw_bat_list_lock); + + if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) + goto out; + + batadv_iv_ogm_emit(forw_packet); + + /* we have to have at least one packet in the queue to determine the + * queues wake up time unless we are shutting down. + * + * only re-schedule if this is the "original" copy, e.g. the OGM of the + * primary interface should only be rescheduled once per period, but + * this function will be called for the forw_packet instances of the + * other secondary interfaces as well. + */ + if (forw_packet->own && + forw_packet->if_incoming == forw_packet->if_outgoing) + batadv_iv_ogm_schedule(forw_packet->if_incoming); + +out: + /* don't count own packet */ + if (!forw_packet->own) + atomic_inc(&bat_priv->batman_queue_left); + + batadv_forw_packet_free(forw_packet); +} + static int batadv_iv_ogm_receive(struct sk_buff *skb, struct batadv_hard_iface *if_incoming) { @@ -1794,7 +1853,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, /* did we receive a B.A.T.M.A.N. IV OGM packet on an interface * that does not have B.A.T.M.A.N. IV enabled ? */ - if (bat_priv->bat_algo_ops->bat_ogm_emit != batadv_iv_ogm_emit) + if (bat_priv->algo_ops->iface.enable != batadv_iv_ogm_iface_enable) return NET_RX_DROP; batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_RX); @@ -2052,21 +2111,32 @@ out: return ret; } +static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface) +{ + /* begin scheduling originator messages on that interface */ + batadv_iv_ogm_schedule(hard_iface); +} + static struct batadv_algo_ops batadv_batman_iv __read_mostly = { .name = "BATMAN_IV", - .bat_iface_enable = batadv_iv_ogm_iface_enable, - .bat_iface_disable = batadv_iv_ogm_iface_disable, - .bat_iface_update_mac = batadv_iv_ogm_iface_update_mac, - .bat_primary_iface_set = batadv_iv_ogm_primary_iface_set, - .bat_ogm_schedule = batadv_iv_ogm_schedule, - .bat_ogm_emit = batadv_iv_ogm_emit, - .bat_neigh_cmp = batadv_iv_ogm_neigh_cmp, - .bat_neigh_is_similar_or_better = batadv_iv_ogm_neigh_is_sob, - .bat_neigh_print = batadv_iv_neigh_print, - .bat_orig_print = batadv_iv_ogm_orig_print, - .bat_orig_free = batadv_iv_ogm_orig_free, - .bat_orig_add_if = batadv_iv_ogm_orig_add_if, - .bat_orig_del_if = batadv_iv_ogm_orig_del_if, + .iface = { + .activate = batadv_iv_iface_activate, + .enable = batadv_iv_ogm_iface_enable, + .disable = batadv_iv_ogm_iface_disable, + .update_mac = batadv_iv_ogm_iface_update_mac, + .primary_set = batadv_iv_ogm_primary_iface_set, + }, + .neigh = { + .cmp = batadv_iv_ogm_neigh_cmp, + .is_similar_or_better = batadv_iv_ogm_neigh_is_sob, + .print = batadv_iv_neigh_print, + }, + .orig = { + .print = batadv_iv_ogm_orig_print, + .free = batadv_iv_ogm_orig_free, + .add_if = batadv_iv_ogm_orig_add_if, + .del_if = batadv_iv_ogm_orig_del_if, + }, }; int __init batadv_iv_init(void) diff --git a/net/batman-adv/bat_iv_ogm.h b/net/batman-adv/bat_iv_ogm.h new file mode 100644 index 000000000000..b9f3550faaf7 --- /dev/null +++ b/net/batman-adv/bat_iv_ogm.h @@ -0,0 +1,25 @@ +/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _BATMAN_ADV_BATADV_IV_OGM_H_ +#define _BATMAN_ADV_BATADV_IV_OGM_H_ + +#include "main.h" + +int batadv_iv_init(void); + +#endif /* _BATMAN_ADV_BATADV_IV_OGM_H_ */ diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 0a12e5cdd65d..0366cbf5e444 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -15,7 +15,7 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#include "bat_algo.h" +#include "bat_v.h" #include "main.h" #include <linux/atomic.h> @@ -31,6 +31,7 @@ #include <linux/types.h> #include <linux/workqueue.h> +#include "bat_algo.h" #include "bat_v_elp.h" #include "bat_v_ogm.h" #include "hard-interface.h" @@ -70,11 +71,6 @@ static int batadv_v_iface_enable(struct batadv_hard_iface *hard_iface) if (ret < 0) batadv_v_elp_iface_disable(hard_iface); - /* enable link throughput auto-detection by setting the throughput - * override to zero - */ - atomic_set(&hard_iface->bat_v.throughput_override, 0); - return ret; } @@ -119,14 +115,6 @@ batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh) batadv_v_elp_throughput_metric_update); } -static void batadv_v_ogm_schedule(struct batadv_hard_iface *hard_iface) -{ -} - -static void batadv_v_ogm_emit(struct batadv_forw_packet *forw_packet) -{ -} - /** * batadv_v_orig_print_neigh - print neighbors for the originator table * @orig_node: the orig_node for which the neighbors are printed @@ -334,21 +322,39 @@ err_ifinfo1: static struct batadv_algo_ops batadv_batman_v __read_mostly = { .name = "BATMAN_V", - .bat_iface_activate = batadv_v_iface_activate, - .bat_iface_enable = batadv_v_iface_enable, - .bat_iface_disable = batadv_v_iface_disable, - .bat_iface_update_mac = batadv_v_iface_update_mac, - .bat_primary_iface_set = batadv_v_primary_iface_set, - .bat_hardif_neigh_init = batadv_v_hardif_neigh_init, - .bat_ogm_emit = batadv_v_ogm_emit, - .bat_ogm_schedule = batadv_v_ogm_schedule, - .bat_orig_print = batadv_v_orig_print, - .bat_neigh_cmp = batadv_v_neigh_cmp, - .bat_neigh_is_similar_or_better = batadv_v_neigh_is_sob, - .bat_neigh_print = batadv_v_neigh_print, + .iface = { + .activate = batadv_v_iface_activate, + .enable = batadv_v_iface_enable, + .disable = batadv_v_iface_disable, + .update_mac = batadv_v_iface_update_mac, + .primary_set = batadv_v_primary_iface_set, + }, + .neigh = { + .hardif_init = batadv_v_hardif_neigh_init, + .cmp = batadv_v_neigh_cmp, + .is_similar_or_better = batadv_v_neigh_is_sob, + .print = batadv_v_neigh_print, + }, + .orig = { + .print = batadv_v_orig_print, + }, }; /** + * batadv_v_hardif_init - initialize the algorithm specific fields in the + * hard-interface object + * @hard_iface: the hard-interface to initialize + */ +void batadv_v_hardif_init(struct batadv_hard_iface *hard_iface) +{ + /* enable link throughput auto-detection by setting the throughput + * override to zero + */ + atomic_set(&hard_iface->bat_v.throughput_override, 0); + atomic_set(&hard_iface->bat_v.elp_interval, 500); +} + +/** * batadv_v_mesh_init - initialize the B.A.T.M.A.N. V private resources for a * mesh * @bat_priv: the object representing the mesh interface to initialise diff --git a/net/batman-adv/bat_v.h b/net/batman-adv/bat_v.h new file mode 100644 index 000000000000..83b77639729e --- /dev/null +++ b/net/batman-adv/bat_v.h @@ -0,0 +1,52 @@ +/* Copyright (C) 2011-2016 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Linus Lüssing + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _NET_BATMAN_ADV_BAT_V_H_ +#define _NET_BATMAN_ADV_BAT_V_H_ + +#include "main.h" + +#ifdef CONFIG_BATMAN_ADV_BATMAN_V + +int batadv_v_init(void); +void batadv_v_hardif_init(struct batadv_hard_iface *hardif); +int batadv_v_mesh_init(struct batadv_priv *bat_priv); +void batadv_v_mesh_free(struct batadv_priv *bat_priv); + +#else + +static inline int batadv_v_init(void) +{ + return 0; +} + +static inline void batadv_v_hardif_init(struct batadv_hard_iface *hardif) +{ +} + +static inline int batadv_v_mesh_init(struct batadv_priv *bat_priv) +{ + return 0; +} + +static inline void batadv_v_mesh_free(struct batadv_priv *bat_priv) +{ +} + +#endif /* CONFIG_BATMAN_ADV_BATMAN_V */ + +#endif /* _NET_BATMAN_ADV_BAT_V_H_ */ diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index df42eb1365a0..7d170010beb9 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -43,6 +43,7 @@ #include "bat_algo.h" #include "bat_v_ogm.h" #include "hard-interface.h" +#include "log.h" #include "originator.h" #include "packet.h" #include "routing.h" @@ -344,7 +345,6 @@ int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface) /* randomize initial seqno to avoid collision */ get_random_bytes(&random_seqno, sizeof(random_seqno)); atomic_set(&hard_iface->bat_v.elp_seqno, random_seqno); - atomic_set(&hard_iface->bat_v.elp_interval, 500); /* assume full-duplex by default */ hard_iface->bat_v.flags |= BATADV_FULL_DUPLEX; @@ -443,7 +443,8 @@ static void batadv_v_elp_neigh_update(struct batadv_priv *bat_priv, if (!orig_neigh) return; - neigh = batadv_neigh_node_new(orig_neigh, if_incoming, neigh_addr); + neigh = batadv_neigh_node_get_or_create(orig_neigh, + if_incoming, neigh_addr); if (!neigh) goto orig_free; @@ -503,7 +504,7 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb, /* did we receive a B.A.T.M.A.N. V ELP packet on an interface * that does not have B.A.T.M.A.N. V ELP enabled ? */ - if (strcmp(bat_priv->bat_algo_ops->name, "BATMAN_V") != 0) + if (strcmp(bat_priv->algo_ops->name, "BATMAN_V") != 0) return NET_RX_DROP; elp_packet = (struct batadv_elp_packet *)skb->data; diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h index cc130b2d05e5..be17c0b1369e 100644 --- a/net/batman-adv/bat_v_elp.h +++ b/net/batman-adv/bat_v_elp.h @@ -15,11 +15,11 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#include "main.h" - #ifndef _NET_BATMAN_ADV_BAT_V_ELP_H_ #define _NET_BATMAN_ADV_BAT_V_ELP_H_ +#include "main.h" + struct sk_buff; struct work_struct; diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index 473ebb9a0e73..6fbba4eb0617 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -39,13 +39,16 @@ #include <linux/types.h> #include <linux/workqueue.h> +#include "bat_algo.h" #include "hard-interface.h" #include "hash.h" +#include "log.h" #include "originator.h" #include "packet.h" #include "routing.h" #include "send.h" #include "translation-table.h" +#include "tvlv.h" /** * batadv_v_ogm_orig_get - retrieve and possibly create an originator node @@ -683,8 +686,8 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset, if (!orig_node) return; - neigh_node = batadv_neigh_node_new(orig_node, if_incoming, - ethhdr->h_source); + neigh_node = batadv_neigh_node_get_or_create(orig_node, if_incoming, + ethhdr->h_source); if (!neigh_node) goto out; @@ -751,7 +754,7 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb, /* did we receive a OGM2 packet on an interface that does not have * B.A.T.M.A.N. V enabled ? */ - if (strcmp(bat_priv->bat_algo_ops->name, "BATMAN_V") != 0) + if (strcmp(bat_priv->algo_ops->name, "BATMAN_V") != 0) return NET_RX_DROP; if (!batadv_check_management_packet(skb, if_incoming, BATADV_OGM2_HLEN)) diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h index d849c75ada0e..4c4d45caa422 100644 --- a/net/batman-adv/bat_v_ogm.h +++ b/net/batman-adv/bat_v_ogm.h @@ -18,10 +18,10 @@ #ifndef _BATMAN_ADV_BATADV_V_OGM_H_ #define _BATMAN_ADV_BATADV_V_OGM_H_ +#include "main.h" + #include <linux/types.h> -struct batadv_hard_iface; -struct batadv_priv; struct sk_buff; int batadv_v_ogm_init(struct batadv_priv *bat_priv); diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c index a0c7913837a5..032271421a20 100644 --- a/net/batman-adv/bitarray.c +++ b/net/batman-adv/bitarray.c @@ -20,6 +20,8 @@ #include <linux/bitmap.h> +#include "log.h" + /* shift the packet array by n places. */ static void batadv_bitmap_shift_left(unsigned long *seq_bits, s32 n) { diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 748a9ead7ce5..e4f7494fb974 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -48,6 +48,7 @@ #include "hard-interface.h" #include "hash.h" +#include "log.h" #include "originator.h" #include "packet.h" #include "sysfs.h" diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index 952900466d88..1d68b6e63b96 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -18,245 +18,33 @@ #include "debugfs.h" #include "main.h" -#include <linux/compiler.h> #include <linux/debugfs.h> #include <linux/device.h> #include <linux/errno.h> #include <linux/export.h> -#include <linux/fcntl.h> #include <linux/fs.h> -#include <linux/jiffies.h> -#include <linux/kernel.h> -#include <linux/module.h> #include <linux/netdevice.h> -#include <linux/poll.h> #include <linux/printk.h> #include <linux/sched.h> /* for linux/wait.h */ #include <linux/seq_file.h> -#include <linux/slab.h> -#include <linux/spinlock.h> #include <linux/stat.h> #include <linux/stddef.h> #include <linux/stringify.h> #include <linux/sysfs.h> -#include <linux/types.h> -#include <linux/uaccess.h> -#include <linux/wait.h> -#include <stdarg.h> +#include "bat_algo.h" #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" #include "gateway_client.h" #include "icmp_socket.h" +#include "log.h" +#include "multicast.h" #include "network-coding.h" #include "originator.h" #include "translation-table.h" static struct dentry *batadv_debugfs; -#ifdef CONFIG_BATMAN_ADV_DEBUG -#define BATADV_LOG_BUFF_MASK (batadv_log_buff_len - 1) - -static const int batadv_log_buff_len = BATADV_LOG_BUF_LEN; - -static char *batadv_log_char_addr(struct batadv_priv_debug_log *debug_log, - size_t idx) -{ - return &debug_log->log_buff[idx & BATADV_LOG_BUFF_MASK]; -} - -static void batadv_emit_log_char(struct batadv_priv_debug_log *debug_log, - char c) -{ - char *char_addr; - - char_addr = batadv_log_char_addr(debug_log, debug_log->log_end); - *char_addr = c; - debug_log->log_end++; - - if (debug_log->log_end - debug_log->log_start > batadv_log_buff_len) - debug_log->log_start = debug_log->log_end - batadv_log_buff_len; -} - -__printf(2, 3) -static int batadv_fdebug_log(struct batadv_priv_debug_log *debug_log, - const char *fmt, ...) -{ - va_list args; - static char debug_log_buf[256]; - char *p; - - if (!debug_log) - return 0; - - spin_lock_bh(&debug_log->lock); - va_start(args, fmt); - vscnprintf(debug_log_buf, sizeof(debug_log_buf), fmt, args); - va_end(args); - - for (p = debug_log_buf; *p != 0; p++) - batadv_emit_log_char(debug_log, *p); - - spin_unlock_bh(&debug_log->lock); - - wake_up(&debug_log->queue_wait); - - return 0; -} - -int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...) -{ - va_list args; - char tmp_log_buf[256]; - - va_start(args, fmt); - vscnprintf(tmp_log_buf, sizeof(tmp_log_buf), fmt, args); - batadv_fdebug_log(bat_priv->debug_log, "[%10u] %s", - jiffies_to_msecs(jiffies), tmp_log_buf); - va_end(args); - - return 0; -} - -static int batadv_log_open(struct inode *inode, struct file *file) -{ - if (!try_module_get(THIS_MODULE)) - return -EBUSY; - - nonseekable_open(inode, file); - file->private_data = inode->i_private; - return 0; -} - -static int batadv_log_release(struct inode *inode, struct file *file) -{ - module_put(THIS_MODULE); - return 0; -} - -static bool batadv_log_empty(struct batadv_priv_debug_log *debug_log) -{ - return !(debug_log->log_start - debug_log->log_end); -} - -static ssize_t batadv_log_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - struct batadv_priv *bat_priv = file->private_data; - struct batadv_priv_debug_log *debug_log = bat_priv->debug_log; - int error, i = 0; - char *char_addr; - char c; - - if ((file->f_flags & O_NONBLOCK) && batadv_log_empty(debug_log)) - return -EAGAIN; - - if (!buf) - return -EINVAL; - - if (count == 0) - return 0; - - if (!access_ok(VERIFY_WRITE, buf, count)) - return -EFAULT; - - error = wait_event_interruptible(debug_log->queue_wait, - (!batadv_log_empty(debug_log))); - - if (error) - return error; - - spin_lock_bh(&debug_log->lock); - - while ((!error) && (i < count) && - (debug_log->log_start != debug_log->log_end)) { - char_addr = batadv_log_char_addr(debug_log, - debug_log->log_start); - c = *char_addr; - - debug_log->log_start++; - - spin_unlock_bh(&debug_log->lock); - - error = __put_user(c, buf); - - spin_lock_bh(&debug_log->lock); - - buf++; - i++; - } - - spin_unlock_bh(&debug_log->lock); - - if (!error) - return i; - - return error; -} - -static unsigned int batadv_log_poll(struct file *file, poll_table *wait) -{ - struct batadv_priv *bat_priv = file->private_data; - struct batadv_priv_debug_log *debug_log = bat_priv->debug_log; - - poll_wait(file, &debug_log->queue_wait, wait); - - if (!batadv_log_empty(debug_log)) - return POLLIN | POLLRDNORM; - - return 0; -} - -static const struct file_operations batadv_log_fops = { - .open = batadv_log_open, - .release = batadv_log_release, - .read = batadv_log_read, - .poll = batadv_log_poll, - .llseek = no_llseek, -}; - -static int batadv_debug_log_setup(struct batadv_priv *bat_priv) -{ - struct dentry *d; - - if (!bat_priv->debug_dir) - goto err; - - bat_priv->debug_log = kzalloc(sizeof(*bat_priv->debug_log), GFP_ATOMIC); - if (!bat_priv->debug_log) - goto err; - - spin_lock_init(&bat_priv->debug_log->lock); - init_waitqueue_head(&bat_priv->debug_log->queue_wait); - - d = debugfs_create_file("log", S_IFREG | S_IRUSR, - bat_priv->debug_dir, bat_priv, - &batadv_log_fops); - if (!d) - goto err; - - return 0; - -err: - return -ENOMEM; -} - -static void batadv_debug_log_cleanup(struct batadv_priv *bat_priv) -{ - kfree(bat_priv->debug_log); - bat_priv->debug_log = NULL; -} -#else /* CONFIG_BATMAN_ADV_DEBUG */ -static int batadv_debug_log_setup(struct batadv_priv *bat_priv) -{ - return 0; -} - -static void batadv_debug_log_cleanup(struct batadv_priv *bat_priv) -{ -} -#endif - static int batadv_algorithms_open(struct inode *inode, struct file *file) { return single_open(file, batadv_algo_seq_print_text, NULL); @@ -363,6 +151,22 @@ static int batadv_nc_nodes_open(struct inode *inode, struct file *file) } #endif +#ifdef CONFIG_BATMAN_ADV_MCAST +/** + * batadv_mcast_flags_open - prepare file handler for reads from mcast_flags + * @inode: inode which was opened + * @file: file handle to be initialized + * + * Return: 0 on success or negative error number in case of failure + */ +static int batadv_mcast_flags_open(struct inode *inode, struct file *file) +{ + struct net_device *net_dev = (struct net_device *)inode->i_private; + + return single_open(file, batadv_mcast_flags_seq_print_text, net_dev); +} +#endif + #define BATADV_DEBUGINFO(_name, _mode, _open) \ struct batadv_debuginfo batadv_debuginfo_##_name = { \ .attr = { \ @@ -407,6 +211,9 @@ static BATADV_DEBUGINFO(transtable_local, S_IRUGO, #ifdef CONFIG_BATMAN_ADV_NC static BATADV_DEBUGINFO(nc_nodes, S_IRUGO, batadv_nc_nodes_open); #endif +#ifdef CONFIG_BATMAN_ADV_MCAST +static BATADV_DEBUGINFO(mcast_flags, S_IRUGO, batadv_mcast_flags_open); +#endif static struct batadv_debuginfo *batadv_mesh_debuginfos[] = { &batadv_debuginfo_neighbors, @@ -424,6 +231,9 @@ static struct batadv_debuginfo *batadv_mesh_debuginfos[] = { #ifdef CONFIG_BATMAN_ADV_NC &batadv_debuginfo_nc_nodes, #endif +#ifdef CONFIG_BATMAN_ADV_MCAST + &batadv_debuginfo_mcast_flags, +#endif NULL, }; diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 278800a99c69..fa7646532a13 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -45,9 +45,11 @@ #include "hard-interface.h" #include "hash.h" +#include "log.h" #include "originator.h" #include "send.h" #include "translation-table.h" +#include "tvlv.h" static void batadv_dat_purge(struct work_struct *work); diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index 65536db1bff7..0934730fb7ff 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -27,7 +27,6 @@ #include <linux/kernel.h> #include <linux/lockdep.h> #include <linux/netdevice.h> -#include <linux/pkt_sched.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -414,7 +413,7 @@ static struct sk_buff *batadv_frag_create(struct sk_buff *skb, if (!skb_fragment) goto err; - skb->priority = TC_PRIO_CONTROL; + skb_fragment->priority = skb->priority; /* Eat the last mtu-bytes of the skb */ skb_reserve(skb_fragment, header_size + ETH_HLEN); @@ -434,11 +433,12 @@ err: * @orig_node: final destination of the created fragments * @neigh_node: next-hop of the created fragments * - * Return: true on success, false otherwise. + * Return: the netdev tx status or -1 in case of error. + * When -1 is returned the skb is not consumed. */ -bool batadv_frag_send_packet(struct sk_buff *skb, - struct batadv_orig_node *orig_node, - struct batadv_neigh_node *neigh_node) +int batadv_frag_send_packet(struct sk_buff *skb, + struct batadv_orig_node *orig_node, + struct batadv_neigh_node *neigh_node) { struct batadv_priv *bat_priv; struct batadv_hard_iface *primary_if = NULL; @@ -447,7 +447,7 @@ bool batadv_frag_send_packet(struct sk_buff *skb, unsigned int mtu = neigh_node->if_incoming->net_dev->mtu; unsigned int header_size = sizeof(frag_header); unsigned int max_fragment_size, max_packet_size; - bool ret = false; + int ret = -1; /* To avoid merge and refragmentation at next-hops we never send * fragments larger than BATADV_FRAG_MAX_FRAG_SIZE @@ -458,12 +458,12 @@ bool batadv_frag_send_packet(struct sk_buff *skb, /* Don't even try to fragment, if we need more than 16 fragments */ if (skb->len > max_packet_size) - goto out_err; + goto out; bat_priv = orig_node->bat_priv; primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) - goto out_err; + goto out; /* Create one header to be copied to all fragments */ frag_header.packet_type = BATADV_UNICAST_FRAG; @@ -473,6 +473,15 @@ bool batadv_frag_send_packet(struct sk_buff *skb, frag_header.reserved = 0; frag_header.no = 0; frag_header.total_size = htons(skb->len); + + /* skb->priority values from 256->263 are magic values to + * directly indicate a specific 802.1d priority. This is used + * to allow 802.1d priority to be passed directly in from VLAN + * tags, etc. + */ + if (skb->priority >= 256 && skb->priority <= 263) + frag_header.priority = skb->priority - 256; + ether_addr_copy(frag_header.orig, primary_if->net_dev->dev_addr); ether_addr_copy(frag_header.dest, orig_node->orig); @@ -480,23 +489,33 @@ bool batadv_frag_send_packet(struct sk_buff *skb, while (skb->len > max_fragment_size) { skb_fragment = batadv_frag_create(skb, &frag_header, mtu); if (!skb_fragment) - goto out_err; + goto out; batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX); batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES, skb_fragment->len + ETH_HLEN); - batadv_send_unicast_skb(skb_fragment, neigh_node); + ret = batadv_send_unicast_skb(skb_fragment, neigh_node); + if (ret != NET_XMIT_SUCCESS) { + /* return -1 so that the caller can free the original + * skb + */ + ret = -1; + goto out; + } + frag_header.no++; /* The initial check in this function should cover this case */ - if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1) - goto out_err; + if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1) { + ret = -1; + goto out; + } } /* Make room for the fragment header. */ if (batadv_skb_head_push(skb, header_size) < 0 || pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0) - goto out_err; + goto out; memcpy(skb->data, &frag_header, header_size); @@ -504,11 +523,9 @@ bool batadv_frag_send_packet(struct sk_buff *skb, batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX); batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES, skb->len + ETH_HLEN); - batadv_send_unicast_skb(skb, neigh_node); - - ret = true; + ret = batadv_send_unicast_skb(skb, neigh_node); -out_err: +out: if (primary_if) batadv_hardif_put(primary_if); diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h index 9ff77c7ef7c7..3202fe329e63 100644 --- a/net/batman-adv/fragmentation.h +++ b/net/batman-adv/fragmentation.h @@ -34,9 +34,9 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb, struct batadv_orig_node *orig_node_src); bool batadv_frag_skb_buffer(struct sk_buff **skb, struct batadv_orig_node *orig_node); -bool batadv_frag_send_packet(struct sk_buff *skb, - struct batadv_orig_node *orig_node, - struct batadv_neigh_node *neigh_node); +int batadv_frag_send_packet(struct sk_buff *skb, + struct batadv_orig_node *orig_node, + struct batadv_neigh_node *neigh_node); /** * batadv_frag_check_entry - check if a list of fragments has timed out diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 5839c569f769..63a805d3f96e 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -42,6 +42,7 @@ #include "gateway_common.h" #include "hard-interface.h" +#include "log.h" #include "originator.h" #include "packet.h" #include "routing.h" @@ -192,7 +193,7 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) tq_avg = router_ifinfo->bat_iv.tq_avg; - switch (atomic_read(&bat_priv->gw_sel_class)) { + switch (atomic_read(&bat_priv->gw.sel_class)) { case 1: /* fast connection */ tmp_gw_factor = tq_avg * tq_avg; tmp_gw_factor *= gw_node->bandwidth_down; @@ -255,7 +256,7 @@ void batadv_gw_check_client_stop(struct batadv_priv *bat_priv) { struct batadv_gw_node *curr_gw; - if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_CLIENT) + if (atomic_read(&bat_priv->gw.mode) != BATADV_GW_MODE_CLIENT) return; curr_gw = batadv_gw_get_selected_gw_node(bat_priv); @@ -283,7 +284,7 @@ void batadv_gw_election(struct batadv_priv *bat_priv) struct batadv_neigh_ifinfo *router_ifinfo = NULL; char gw_addr[18] = { '\0' }; - if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_CLIENT) + if (atomic_read(&bat_priv->gw.mode) != BATADV_GW_MODE_CLIENT) goto out; curr_gw = batadv_gw_get_selected_gw_node(bat_priv); @@ -402,8 +403,8 @@ void batadv_gw_check_election(struct batadv_priv *bat_priv, /* if the routing class is greater than 3 the value tells us how much * greater the TQ value of the new gateway must be */ - if ((atomic_read(&bat_priv->gw_sel_class) > 3) && - (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw_sel_class))) + if ((atomic_read(&bat_priv->gw.sel_class) > 3) && + (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw.sel_class))) goto out; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, @@ -638,8 +639,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) goto out; seq_printf(seq, - " %-12s (%s/%i) %17s [%10s]: advertised uplink bandwidth ... [B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n", - "Gateway", "#", BATADV_TQ_MAX_VALUE, "Nexthop", "outgoingIF", + " Gateway (#/255) Nexthop [outgoingIF]: advertised uplink bandwidth ... [B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n", BATADV_SOURCE_VERSION, primary_if->net_dev->name, primary_if->net_dev->dev_addr, net_dev->name); @@ -821,7 +821,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, if (!gw_node) goto out; - switch (atomic_read(&bat_priv->gw_mode)) { + switch (atomic_read(&bat_priv->gw.mode)) { case BATADV_GW_MODE_SERVER: /* If we are a GW then we are our best GW. We can artificially * set the tq towards ourself as the maximum value diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index 4423047889e1..d7bc6a87bcc9 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -19,8 +19,8 @@ #include "main.h" #include <linux/atomic.h> -#include <linux/errno.h> #include <linux/byteorder/generic.h> +#include <linux/errno.h> #include <linux/kernel.h> #include <linux/math64.h> #include <linux/netdevice.h> @@ -28,7 +28,9 @@ #include <linux/string.h> #include "gateway_client.h" +#include "log.h" #include "packet.h" +#include "tvlv.h" /** * batadv_parse_throughput - parse supplied string buffer to extract throughput @@ -144,7 +146,7 @@ void batadv_gw_tvlv_container_update(struct batadv_priv *bat_priv) u32 down, up; char gw_mode; - gw_mode = atomic_read(&bat_priv->gw_mode); + gw_mode = atomic_read(&bat_priv->gw.mode); switch (gw_mode) { case BATADV_GW_MODE_OFF: @@ -241,8 +243,8 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, /* restart gateway selection if fast or late switching was enabled */ if ((gateway.bandwidth_down != 0) && - (atomic_read(&bat_priv->gw_mode) == BATADV_GW_MODE_CLIENT) && - (atomic_read(&bat_priv->gw_sel_class) > 2)) + (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT) && + (atomic_read(&bat_priv->gw.sel_class) > 2)) batadv_gw_check_election(bat_priv, orig); } diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 8c2f39962fa5..1f9080840566 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -23,9 +23,9 @@ #include <linux/byteorder/generic.h> #include <linux/errno.h> #include <linux/fs.h> +#include <linux/if.h> #include <linux/if_arp.h> #include <linux/if_ether.h> -#include <linux/if.h> #include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> @@ -37,10 +37,12 @@ #include <linux/spinlock.h> #include <linux/workqueue.h> +#include "bat_v.h" #include "bridge_loop_avoidance.h" #include "debugfs.h" #include "distributed-arp-table.h" #include "gateway_client.h" +#include "log.h" #include "originator.h" #include "packet.h" #include "send.h" @@ -245,7 +247,7 @@ static void batadv_primary_if_select(struct batadv_priv *bat_priv, if (!new_hard_iface) goto out; - bat_priv->bat_algo_ops->bat_primary_iface_set(new_hard_iface); + bat_priv->algo_ops->iface.primary_set(new_hard_iface); batadv_primary_if_update_addr(bat_priv, curr_hard_iface); out: @@ -392,7 +394,7 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface) bat_priv = netdev_priv(hard_iface->soft_iface); - bat_priv->bat_algo_ops->bat_iface_update_mac(hard_iface); + bat_priv->algo_ops->iface.update_mac(hard_iface); hard_iface->if_status = BATADV_IF_TO_BE_ACTIVATED; /* the first active interface becomes our primary interface or @@ -407,8 +409,8 @@ batadv_hardif_activate_interface(struct batadv_hard_iface *hard_iface) batadv_update_min_mtu(hard_iface->soft_iface); - if (bat_priv->bat_algo_ops->bat_iface_activate) - bat_priv->bat_algo_ops->bat_iface_activate(hard_iface); + if (bat_priv->algo_ops->iface.activate) + bat_priv->algo_ops->iface.activate(hard_iface); out: if (primary_if) @@ -506,7 +508,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, if (ret) goto err_dev; - ret = bat_priv->bat_algo_ops->bat_iface_enable(hard_iface); + ret = bat_priv->algo_ops->iface.enable(hard_iface); if (ret < 0) goto err_upper; @@ -515,7 +517,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, hard_iface->if_status = BATADV_IF_INACTIVE; ret = batadv_orig_hash_add_if(hard_iface, bat_priv->num_ifaces); if (ret < 0) { - bat_priv->bat_algo_ops->bat_iface_disable(hard_iface); + bat_priv->algo_ops->iface.disable(hard_iface); bat_priv->num_ifaces--; hard_iface->if_status = BATADV_IF_NOT_IN_USE; goto err_upper; @@ -553,9 +555,6 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, batadv_hardif_recalc_extra_skbroom(soft_iface); - /* begin scheduling originator messages on that interface */ - batadv_schedule_bat_ogm(hard_iface); - out: return 0; @@ -599,7 +598,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, batadv_hardif_put(new_if); } - bat_priv->bat_algo_ops->bat_iface_disable(hard_iface); + bat_priv->algo_ops->iface.disable(hard_iface); hard_iface->if_status = BATADV_IF_NOT_IN_USE; /* delete all references to this hard_iface */ @@ -686,6 +685,8 @@ batadv_hardif_add_interface(struct net_device *net_dev) if (batadv_is_wifi_netdev(net_dev)) hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS; + batadv_v_hardif_init(hard_iface); + /* extra reference for return */ kref_init(&hard_iface->refcount); kref_get(&hard_iface->refcount); @@ -782,7 +783,7 @@ static int batadv_hard_if_event(struct notifier_block *this, batadv_check_known_mac_addr(hard_iface->net_dev); bat_priv = netdev_priv(hard_iface->soft_iface); - bat_priv->bat_algo_ops->bat_iface_update_mac(hard_iface); + bat_priv->algo_ops->iface.update_mac(hard_iface); primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 777aea10cd8f..378cc1119d66 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -45,6 +45,7 @@ #include <linux/wait.h> #include "hard-interface.h" +#include "log.h" #include "originator.h" #include "packet.h" #include "send.h" diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c new file mode 100644 index 000000000000..56dc532f7a2c --- /dev/null +++ b/net/batman-adv/log.c @@ -0,0 +1,231 @@ +/* Copyright (C) 2010-2016 B.A.T.M.A.N. contributors: + * + * Marek Lindner + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "log.h" +#include "main.h" + +#include <linux/compiler.h> +#include <linux/debugfs.h> +#include <linux/errno.h> +#include <linux/export.h> +#include <linux/fcntl.h> +#include <linux/fs.h> +#include <linux/jiffies.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/poll.h> +#include <linux/sched.h> /* for linux/wait.h */ +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/stat.h> +#include <linux/stddef.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <linux/wait.h> +#include <stdarg.h> + +#define BATADV_LOG_BUFF_MASK (batadv_log_buff_len - 1) + +static const int batadv_log_buff_len = BATADV_LOG_BUF_LEN; + +static char *batadv_log_char_addr(struct batadv_priv_debug_log *debug_log, + size_t idx) +{ + return &debug_log->log_buff[idx & BATADV_LOG_BUFF_MASK]; +} + +static void batadv_emit_log_char(struct batadv_priv_debug_log *debug_log, + char c) +{ + char *char_addr; + + char_addr = batadv_log_char_addr(debug_log, debug_log->log_end); + *char_addr = c; + debug_log->log_end++; + + if (debug_log->log_end - debug_log->log_start > batadv_log_buff_len) + debug_log->log_start = debug_log->log_end - batadv_log_buff_len; +} + +__printf(2, 3) +static int batadv_fdebug_log(struct batadv_priv_debug_log *debug_log, + const char *fmt, ...) +{ + va_list args; + static char debug_log_buf[256]; + char *p; + + if (!debug_log) + return 0; + + spin_lock_bh(&debug_log->lock); + va_start(args, fmt); + vscnprintf(debug_log_buf, sizeof(debug_log_buf), fmt, args); + va_end(args); + + for (p = debug_log_buf; *p != 0; p++) + batadv_emit_log_char(debug_log, *p); + + spin_unlock_bh(&debug_log->lock); + + wake_up(&debug_log->queue_wait); + + return 0; +} + +int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...) +{ + va_list args; + char tmp_log_buf[256]; + + va_start(args, fmt); + vscnprintf(tmp_log_buf, sizeof(tmp_log_buf), fmt, args); + batadv_fdebug_log(bat_priv->debug_log, "[%10u] %s", + jiffies_to_msecs(jiffies), tmp_log_buf); + va_end(args); + + return 0; +} + +static int batadv_log_open(struct inode *inode, struct file *file) +{ + if (!try_module_get(THIS_MODULE)) + return -EBUSY; + + nonseekable_open(inode, file); + file->private_data = inode->i_private; + return 0; +} + +static int batadv_log_release(struct inode *inode, struct file *file) +{ + module_put(THIS_MODULE); + return 0; +} + +static bool batadv_log_empty(struct batadv_priv_debug_log *debug_log) +{ + return !(debug_log->log_start - debug_log->log_end); +} + +static ssize_t batadv_log_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct batadv_priv *bat_priv = file->private_data; + struct batadv_priv_debug_log *debug_log = bat_priv->debug_log; + int error, i = 0; + char *char_addr; + char c; + + if ((file->f_flags & O_NONBLOCK) && batadv_log_empty(debug_log)) + return -EAGAIN; + + if (!buf) + return -EINVAL; + + if (count == 0) + return 0; + + if (!access_ok(VERIFY_WRITE, buf, count)) + return -EFAULT; + + error = wait_event_interruptible(debug_log->queue_wait, + (!batadv_log_empty(debug_log))); + + if (error) + return error; + + spin_lock_bh(&debug_log->lock); + + while ((!error) && (i < count) && + (debug_log->log_start != debug_log->log_end)) { + char_addr = batadv_log_char_addr(debug_log, + debug_log->log_start); + c = *char_addr; + + debug_log->log_start++; + + spin_unlock_bh(&debug_log->lock); + + error = __put_user(c, buf); + + spin_lock_bh(&debug_log->lock); + + buf++; + i++; + } + + spin_unlock_bh(&debug_log->lock); + + if (!error) + return i; + + return error; +} + +static unsigned int batadv_log_poll(struct file *file, poll_table *wait) +{ + struct batadv_priv *bat_priv = file->private_data; + struct batadv_priv_debug_log *debug_log = bat_priv->debug_log; + + poll_wait(file, &debug_log->queue_wait, wait); + + if (!batadv_log_empty(debug_log)) + return POLLIN | POLLRDNORM; + + return 0; +} + +static const struct file_operations batadv_log_fops = { + .open = batadv_log_open, + .release = batadv_log_release, + .read = batadv_log_read, + .poll = batadv_log_poll, + .llseek = no_llseek, +}; + +int batadv_debug_log_setup(struct batadv_priv *bat_priv) +{ + struct dentry *d; + + if (!bat_priv->debug_dir) + goto err; + + bat_priv->debug_log = kzalloc(sizeof(*bat_priv->debug_log), GFP_ATOMIC); + if (!bat_priv->debug_log) + goto err; + + spin_lock_init(&bat_priv->debug_log->lock); + init_waitqueue_head(&bat_priv->debug_log->queue_wait); + + d = debugfs_create_file("log", S_IFREG | S_IRUSR, + bat_priv->debug_dir, bat_priv, + &batadv_log_fops); + if (!d) + goto err; + + return 0; + +err: + return -ENOMEM; +} + +void batadv_debug_log_cleanup(struct batadv_priv *bat_priv) +{ + kfree(bat_priv->debug_log); + bat_priv->debug_log = NULL; +} diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h new file mode 100644 index 000000000000..e0e1a88c3e58 --- /dev/null +++ b/net/batman-adv/log.h @@ -0,0 +1,111 @@ +/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _NET_BATMAN_ADV_LOG_H_ +#define _NET_BATMAN_ADV_LOG_H_ + +#include "main.h" + +#include <linux/bitops.h> +#include <linux/compiler.h> +#include <linux/printk.h> + +#ifdef CONFIG_BATMAN_ADV_DEBUG + +int batadv_debug_log_setup(struct batadv_priv *bat_priv); +void batadv_debug_log_cleanup(struct batadv_priv *bat_priv); + +#else + +static inline int batadv_debug_log_setup(struct batadv_priv *bat_priv) +{ + return 0; +} + +static inline void batadv_debug_log_cleanup(struct batadv_priv *bat_priv) +{ +} + +#endif + +/** + * enum batadv_dbg_level - available log levels + * @BATADV_DBG_BATMAN: OGM and TQ computations related messages + * @BATADV_DBG_ROUTES: route added / changed / deleted + * @BATADV_DBG_TT: translation table messages + * @BATADV_DBG_BLA: bridge loop avoidance messages + * @BATADV_DBG_DAT: ARP snooping and DAT related messages + * @BATADV_DBG_NC: network coding related messages + * @BATADV_DBG_MCAST: multicast related messages + * @BATADV_DBG_TP_METER: throughput meter messages + * @BATADV_DBG_ALL: the union of all the above log levels + */ +enum batadv_dbg_level { + BATADV_DBG_BATMAN = BIT(0), + BATADV_DBG_ROUTES = BIT(1), + BATADV_DBG_TT = BIT(2), + BATADV_DBG_BLA = BIT(3), + BATADV_DBG_DAT = BIT(4), + BATADV_DBG_NC = BIT(5), + BATADV_DBG_MCAST = BIT(6), + BATADV_DBG_TP_METER = BIT(7), + BATADV_DBG_ALL = 127, +}; + +#ifdef CONFIG_BATMAN_ADV_DEBUG +int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...) +__printf(2, 3); + +/* possibly ratelimited debug output */ +#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...) \ + do { \ + if (atomic_read(&bat_priv->log_level) & type && \ + (!ratelimited || net_ratelimit())) \ + batadv_debug_log(bat_priv, fmt, ## arg);\ + } \ + while (0) +#else /* !CONFIG_BATMAN_ADV_DEBUG */ +__printf(4, 5) +static inline void _batadv_dbg(int type __always_unused, + struct batadv_priv *bat_priv __always_unused, + int ratelimited __always_unused, + const char *fmt __always_unused, ...) +{ +} +#endif + +#define batadv_dbg(type, bat_priv, arg...) \ + _batadv_dbg(type, bat_priv, 0, ## arg) +#define batadv_dbg_ratelimited(type, bat_priv, arg...) \ + _batadv_dbg(type, bat_priv, 1, ## arg) + +#define batadv_info(net_dev, fmt, arg...) \ + do { \ + struct net_device *_netdev = (net_dev); \ + struct batadv_priv *_batpriv = netdev_priv(_netdev); \ + batadv_dbg(BATADV_DBG_ALL, _batpriv, fmt, ## arg); \ + pr_info("%s: " fmt, _netdev->name, ## arg); \ + } while (0) +#define batadv_err(net_dev, fmt, arg...) \ + do { \ + struct net_device *_netdev = (net_dev); \ + struct batadv_priv *_batpriv = netdev_priv(_netdev); \ + batadv_dbg(BATADV_DBG_ALL, _batpriv, fmt, ## arg); \ + pr_err("%s: " fmt, _netdev->name, ## arg); \ + } while (0) + +#endif /* _NET_BATMAN_ADV_LOG_H_ */ diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 5f2974bd1227..fe4c5e29f96b 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -31,16 +31,13 @@ #include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> -#include <linux/lockdep.h> #include <linux/module.h> -#include <linux/moduleparam.h> #include <linux/netdevice.h> -#include <linux/pkt_sched.h> +#include <linux/printk.h> #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/seq_file.h> #include <linux/skbuff.h> -#include <linux/slab.h> #include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/string.h> @@ -49,6 +46,8 @@ #include <net/rtnetlink.h> #include "bat_algo.h" +#include "bat_iv_ogm.h" +#include "bat_v.h" #include "bridge_loop_avoidance.h" #include "debugfs.h" #include "distributed-arp-table.h" @@ -56,13 +55,16 @@ #include "gateway_common.h" #include "hard-interface.h" #include "icmp_socket.h" +#include "log.h" #include "multicast.h" +#include "netlink.h" #include "network-coding.h" #include "originator.h" #include "packet.h" #include "routing.h" #include "send.h" #include "soft-interface.h" +#include "tp_meter.h" #include "translation-table.h" /* List manipulations on hardif_list have to be rtnl_lock()'ed, @@ -71,8 +73,6 @@ struct list_head batadv_hardif_list; static int (*batadv_rx_handler[256])(struct sk_buff *, struct batadv_hard_iface *); -char batadv_routing_algo[20] = "BATMAN_IV"; -static struct hlist_head batadv_algo_list; unsigned char batadv_broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; @@ -83,13 +83,14 @@ static void batadv_recv_handler_init(void); static int __init batadv_init(void) { INIT_LIST_HEAD(&batadv_hardif_list); - INIT_HLIST_HEAD(&batadv_algo_list); + batadv_algo_init(); batadv_recv_handler_init(); batadv_v_init(); batadv_iv_init(); batadv_nc_init(); + batadv_tp_meter_init(); batadv_event_workqueue = create_singlethread_workqueue("bat_events"); @@ -101,6 +102,7 @@ static int __init batadv_init(void) register_netdevice_notifier(&batadv_hard_if_notifier); rtnl_link_register(&batadv_link_ops); + batadv_netlink_register(); pr_info("B.A.T.M.A.N. advanced %s (compatibility version %i) loaded\n", BATADV_SOURCE_VERSION, BATADV_COMPAT_VERSION); @@ -111,6 +113,7 @@ static int __init batadv_init(void) static void __exit batadv_exit(void) { batadv_debugfs_destroy(); + batadv_netlink_unregister(); rtnl_link_unregister(&batadv_link_ops); unregister_netdevice_notifier(&batadv_hard_if_notifier); batadv_hardif_remove_interfaces(); @@ -141,6 +144,7 @@ int batadv_mesh_init(struct net_device *soft_iface) spin_lock_init(&bat_priv->tvlv.container_list_lock); spin_lock_init(&bat_priv->tvlv.handler_list_lock); spin_lock_init(&bat_priv->softif_vlan_list_lock); + spin_lock_init(&bat_priv->tp_list_lock); INIT_HLIST_HEAD(&bat_priv->forw_bat_list); INIT_HLIST_HEAD(&bat_priv->forw_bcast_list); @@ -159,6 +163,7 @@ int batadv_mesh_init(struct net_device *soft_iface) INIT_HLIST_HEAD(&bat_priv->tvlv.container_list); INIT_HLIST_HEAD(&bat_priv->tvlv.handler_list); INIT_HLIST_HEAD(&bat_priv->softif_vlan_list); + INIT_HLIST_HEAD(&bat_priv->tp_list); ret = batadv_v_mesh_init(bat_priv); if (ret < 0) @@ -538,78 +543,6 @@ void batadv_recv_handler_unregister(u8 packet_type) batadv_rx_handler[packet_type] = batadv_recv_unhandled_packet; } -static struct batadv_algo_ops *batadv_algo_get(char *name) -{ - struct batadv_algo_ops *bat_algo_ops = NULL, *bat_algo_ops_tmp; - - hlist_for_each_entry(bat_algo_ops_tmp, &batadv_algo_list, list) { - if (strcmp(bat_algo_ops_tmp->name, name) != 0) - continue; - - bat_algo_ops = bat_algo_ops_tmp; - break; - } - - return bat_algo_ops; -} - -int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops) -{ - struct batadv_algo_ops *bat_algo_ops_tmp; - - bat_algo_ops_tmp = batadv_algo_get(bat_algo_ops->name); - if (bat_algo_ops_tmp) { - pr_info("Trying to register already registered routing algorithm: %s\n", - bat_algo_ops->name); - return -EEXIST; - } - - /* all algorithms must implement all ops (for now) */ - if (!bat_algo_ops->bat_iface_enable || - !bat_algo_ops->bat_iface_disable || - !bat_algo_ops->bat_iface_update_mac || - !bat_algo_ops->bat_primary_iface_set || - !bat_algo_ops->bat_ogm_schedule || - !bat_algo_ops->bat_ogm_emit || - !bat_algo_ops->bat_neigh_cmp || - !bat_algo_ops->bat_neigh_is_similar_or_better) { - pr_info("Routing algo '%s' does not implement required ops\n", - bat_algo_ops->name); - return -EINVAL; - } - - INIT_HLIST_NODE(&bat_algo_ops->list); - hlist_add_head(&bat_algo_ops->list, &batadv_algo_list); - - return 0; -} - -int batadv_algo_select(struct batadv_priv *bat_priv, char *name) -{ - struct batadv_algo_ops *bat_algo_ops; - - bat_algo_ops = batadv_algo_get(name); - if (!bat_algo_ops) - return -EINVAL; - - bat_priv->bat_algo_ops = bat_algo_ops; - - return 0; -} - -int batadv_algo_seq_print_text(struct seq_file *seq, void *offset) -{ - struct batadv_algo_ops *bat_algo_ops; - - seq_puts(seq, "Available routing algorithms:\n"); - - hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) { - seq_printf(seq, " * %s\n", bat_algo_ops->name); - } - - return 0; -} - /** * batadv_skb_crc32 - calculate CRC32 of the whole packet and skip bytes in * the header @@ -644,594 +577,6 @@ __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr) } /** - * batadv_tvlv_handler_release - release tvlv handler from lists and queue for - * free after rcu grace period - * @ref: kref pointer of the tvlv - */ -static void batadv_tvlv_handler_release(struct kref *ref) -{ - struct batadv_tvlv_handler *tvlv_handler; - - tvlv_handler = container_of(ref, struct batadv_tvlv_handler, refcount); - kfree_rcu(tvlv_handler, rcu); -} - -/** - * batadv_tvlv_handler_put - decrement the tvlv container refcounter and - * possibly release it - * @tvlv_handler: the tvlv handler to free - */ -static void batadv_tvlv_handler_put(struct batadv_tvlv_handler *tvlv_handler) -{ - kref_put(&tvlv_handler->refcount, batadv_tvlv_handler_release); -} - -/** - * batadv_tvlv_handler_get - retrieve tvlv handler from the tvlv handler list - * based on the provided type and version (both need to match) - * @bat_priv: the bat priv with all the soft interface information - * @type: tvlv handler type to look for - * @version: tvlv handler version to look for - * - * Return: tvlv handler if found or NULL otherwise. - */ -static struct batadv_tvlv_handler * -batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version) -{ - struct batadv_tvlv_handler *tvlv_handler_tmp, *tvlv_handler = NULL; - - rcu_read_lock(); - hlist_for_each_entry_rcu(tvlv_handler_tmp, - &bat_priv->tvlv.handler_list, list) { - if (tvlv_handler_tmp->type != type) - continue; - - if (tvlv_handler_tmp->version != version) - continue; - - if (!kref_get_unless_zero(&tvlv_handler_tmp->refcount)) - continue; - - tvlv_handler = tvlv_handler_tmp; - break; - } - rcu_read_unlock(); - - return tvlv_handler; -} - -/** - * batadv_tvlv_container_release - release tvlv from lists and free - * @ref: kref pointer of the tvlv - */ -static void batadv_tvlv_container_release(struct kref *ref) -{ - struct batadv_tvlv_container *tvlv; - - tvlv = container_of(ref, struct batadv_tvlv_container, refcount); - kfree(tvlv); -} - -/** - * batadv_tvlv_container_put - decrement the tvlv container refcounter and - * possibly release it - * @tvlv: the tvlv container to free - */ -static void batadv_tvlv_container_put(struct batadv_tvlv_container *tvlv) -{ - kref_put(&tvlv->refcount, batadv_tvlv_container_release); -} - -/** - * batadv_tvlv_container_get - retrieve tvlv container from the tvlv container - * list based on the provided type and version (both need to match) - * @bat_priv: the bat priv with all the soft interface information - * @type: tvlv container type to look for - * @version: tvlv container version to look for - * - * Has to be called with the appropriate locks being acquired - * (tvlv.container_list_lock). - * - * Return: tvlv container if found or NULL otherwise. - */ -static struct batadv_tvlv_container * -batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version) -{ - struct batadv_tvlv_container *tvlv_tmp, *tvlv = NULL; - - lockdep_assert_held(&bat_priv->tvlv.container_list_lock); - - hlist_for_each_entry(tvlv_tmp, &bat_priv->tvlv.container_list, list) { - if (tvlv_tmp->tvlv_hdr.type != type) - continue; - - if (tvlv_tmp->tvlv_hdr.version != version) - continue; - - kref_get(&tvlv_tmp->refcount); - tvlv = tvlv_tmp; - break; - } - - return tvlv; -} - -/** - * batadv_tvlv_container_list_size - calculate the size of the tvlv container - * list entries - * @bat_priv: the bat priv with all the soft interface information - * - * Has to be called with the appropriate locks being acquired - * (tvlv.container_list_lock). - * - * Return: size of all currently registered tvlv containers in bytes. - */ -static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv) -{ - struct batadv_tvlv_container *tvlv; - u16 tvlv_len = 0; - - lockdep_assert_held(&bat_priv->tvlv.container_list_lock); - - hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) { - tvlv_len += sizeof(struct batadv_tvlv_hdr); - tvlv_len += ntohs(tvlv->tvlv_hdr.len); - } - - return tvlv_len; -} - -/** - * batadv_tvlv_container_remove - remove tvlv container from the tvlv container - * list - * @bat_priv: the bat priv with all the soft interface information - * @tvlv: the to be removed tvlv container - * - * Has to be called with the appropriate locks being acquired - * (tvlv.container_list_lock). - */ -static void batadv_tvlv_container_remove(struct batadv_priv *bat_priv, - struct batadv_tvlv_container *tvlv) -{ - lockdep_assert_held(&bat_priv->tvlv.container_list_lock); - - if (!tvlv) - return; - - hlist_del(&tvlv->list); - - /* first call to decrement the counter, second call to free */ - batadv_tvlv_container_put(tvlv); - batadv_tvlv_container_put(tvlv); -} - -/** - * batadv_tvlv_container_unregister - unregister tvlv container based on the - * provided type and version (both need to match) - * @bat_priv: the bat priv with all the soft interface information - * @type: tvlv container type to unregister - * @version: tvlv container type to unregister - */ -void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv, - u8 type, u8 version) -{ - struct batadv_tvlv_container *tvlv; - - spin_lock_bh(&bat_priv->tvlv.container_list_lock); - tvlv = batadv_tvlv_container_get(bat_priv, type, version); - batadv_tvlv_container_remove(bat_priv, tvlv); - spin_unlock_bh(&bat_priv->tvlv.container_list_lock); -} - -/** - * batadv_tvlv_container_register - register tvlv type, version and content - * to be propagated with each (primary interface) OGM - * @bat_priv: the bat priv with all the soft interface information - * @type: tvlv container type - * @version: tvlv container version - * @tvlv_value: tvlv container content - * @tvlv_value_len: tvlv container content length - * - * If a container of the same type and version was already registered the new - * content is going to replace the old one. - */ -void batadv_tvlv_container_register(struct batadv_priv *bat_priv, - u8 type, u8 version, - void *tvlv_value, u16 tvlv_value_len) -{ - struct batadv_tvlv_container *tvlv_old, *tvlv_new; - - if (!tvlv_value) - tvlv_value_len = 0; - - tvlv_new = kzalloc(sizeof(*tvlv_new) + tvlv_value_len, GFP_ATOMIC); - if (!tvlv_new) - return; - - tvlv_new->tvlv_hdr.version = version; - tvlv_new->tvlv_hdr.type = type; - tvlv_new->tvlv_hdr.len = htons(tvlv_value_len); - - memcpy(tvlv_new + 1, tvlv_value, ntohs(tvlv_new->tvlv_hdr.len)); - INIT_HLIST_NODE(&tvlv_new->list); - kref_init(&tvlv_new->refcount); - - spin_lock_bh(&bat_priv->tvlv.container_list_lock); - tvlv_old = batadv_tvlv_container_get(bat_priv, type, version); - batadv_tvlv_container_remove(bat_priv, tvlv_old); - hlist_add_head(&tvlv_new->list, &bat_priv->tvlv.container_list); - spin_unlock_bh(&bat_priv->tvlv.container_list_lock); -} - -/** - * batadv_tvlv_realloc_packet_buff - reallocate packet buffer to accommodate - * requested packet size - * @packet_buff: packet buffer - * @packet_buff_len: packet buffer size - * @min_packet_len: requested packet minimum size - * @additional_packet_len: requested additional packet size on top of minimum - * size - * - * Return: true of the packet buffer could be changed to the requested size, - * false otherwise. - */ -static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff, - int *packet_buff_len, - int min_packet_len, - int additional_packet_len) -{ - unsigned char *new_buff; - - new_buff = kmalloc(min_packet_len + additional_packet_len, GFP_ATOMIC); - - /* keep old buffer if kmalloc should fail */ - if (!new_buff) - return false; - - memcpy(new_buff, *packet_buff, min_packet_len); - kfree(*packet_buff); - *packet_buff = new_buff; - *packet_buff_len = min_packet_len + additional_packet_len; - - return true; -} - -/** - * batadv_tvlv_container_ogm_append - append tvlv container content to given - * OGM packet buffer - * @bat_priv: the bat priv with all the soft interface information - * @packet_buff: ogm packet buffer - * @packet_buff_len: ogm packet buffer size including ogm header and tvlv - * content - * @packet_min_len: ogm header size to be preserved for the OGM itself - * - * The ogm packet might be enlarged or shrunk depending on the current size - * and the size of the to-be-appended tvlv containers. - * - * Return: size of all appended tvlv containers in bytes. - */ -u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, - unsigned char **packet_buff, - int *packet_buff_len, int packet_min_len) -{ - struct batadv_tvlv_container *tvlv; - struct batadv_tvlv_hdr *tvlv_hdr; - u16 tvlv_value_len; - void *tvlv_value; - bool ret; - - spin_lock_bh(&bat_priv->tvlv.container_list_lock); - tvlv_value_len = batadv_tvlv_container_list_size(bat_priv); - - ret = batadv_tvlv_realloc_packet_buff(packet_buff, packet_buff_len, - packet_min_len, tvlv_value_len); - - if (!ret) - goto end; - - if (!tvlv_value_len) - goto end; - - tvlv_value = (*packet_buff) + packet_min_len; - - hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) { - tvlv_hdr = tvlv_value; - tvlv_hdr->type = tvlv->tvlv_hdr.type; - tvlv_hdr->version = tvlv->tvlv_hdr.version; - tvlv_hdr->len = tvlv->tvlv_hdr.len; - tvlv_value = tvlv_hdr + 1; - memcpy(tvlv_value, tvlv + 1, ntohs(tvlv->tvlv_hdr.len)); - tvlv_value = (u8 *)tvlv_value + ntohs(tvlv->tvlv_hdr.len); - } - -end: - spin_unlock_bh(&bat_priv->tvlv.container_list_lock); - return tvlv_value_len; -} - -/** - * batadv_tvlv_call_handler - parse the given tvlv buffer to call the - * appropriate handlers - * @bat_priv: the bat priv with all the soft interface information - * @tvlv_handler: tvlv callback function handling the tvlv content - * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet - * @orig_node: orig node emitting the ogm packet - * @src: source mac address of the unicast packet - * @dst: destination mac address of the unicast packet - * @tvlv_value: tvlv content - * @tvlv_value_len: tvlv content length - * - * Return: success if handler was not found or the return value of the handler - * callback. - */ -static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv, - struct batadv_tvlv_handler *tvlv_handler, - bool ogm_source, - struct batadv_orig_node *orig_node, - u8 *src, u8 *dst, - void *tvlv_value, u16 tvlv_value_len) -{ - if (!tvlv_handler) - return NET_RX_SUCCESS; - - if (ogm_source) { - if (!tvlv_handler->ogm_handler) - return NET_RX_SUCCESS; - - if (!orig_node) - return NET_RX_SUCCESS; - - tvlv_handler->ogm_handler(bat_priv, orig_node, - BATADV_NO_FLAGS, - tvlv_value, tvlv_value_len); - tvlv_handler->flags |= BATADV_TVLV_HANDLER_OGM_CALLED; - } else { - if (!src) - return NET_RX_SUCCESS; - - if (!dst) - return NET_RX_SUCCESS; - - if (!tvlv_handler->unicast_handler) - return NET_RX_SUCCESS; - - return tvlv_handler->unicast_handler(bat_priv, src, - dst, tvlv_value, - tvlv_value_len); - } - - return NET_RX_SUCCESS; -} - -/** - * batadv_tvlv_containers_process - parse the given tvlv buffer to call the - * appropriate handlers - * @bat_priv: the bat priv with all the soft interface information - * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet - * @orig_node: orig node emitting the ogm packet - * @src: source mac address of the unicast packet - * @dst: destination mac address of the unicast packet - * @tvlv_value: tvlv content - * @tvlv_value_len: tvlv content length - * - * Return: success when processing an OGM or the return value of all called - * handler callbacks. - */ -int batadv_tvlv_containers_process(struct batadv_priv *bat_priv, - bool ogm_source, - struct batadv_orig_node *orig_node, - u8 *src, u8 *dst, - void *tvlv_value, u16 tvlv_value_len) -{ - struct batadv_tvlv_handler *tvlv_handler; - struct batadv_tvlv_hdr *tvlv_hdr; - u16 tvlv_value_cont_len; - u8 cifnotfound = BATADV_TVLV_HANDLER_OGM_CIFNOTFND; - int ret = NET_RX_SUCCESS; - - while (tvlv_value_len >= sizeof(*tvlv_hdr)) { - tvlv_hdr = tvlv_value; - tvlv_value_cont_len = ntohs(tvlv_hdr->len); - tvlv_value = tvlv_hdr + 1; - tvlv_value_len -= sizeof(*tvlv_hdr); - - if (tvlv_value_cont_len > tvlv_value_len) - break; - - tvlv_handler = batadv_tvlv_handler_get(bat_priv, - tvlv_hdr->type, - tvlv_hdr->version); - - ret |= batadv_tvlv_call_handler(bat_priv, tvlv_handler, - ogm_source, orig_node, - src, dst, tvlv_value, - tvlv_value_cont_len); - if (tvlv_handler) - batadv_tvlv_handler_put(tvlv_handler); - tvlv_value = (u8 *)tvlv_value + tvlv_value_cont_len; - tvlv_value_len -= tvlv_value_cont_len; - } - - if (!ogm_source) - return ret; - - rcu_read_lock(); - hlist_for_each_entry_rcu(tvlv_handler, - &bat_priv->tvlv.handler_list, list) { - if ((tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) && - !(tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CALLED)) - tvlv_handler->ogm_handler(bat_priv, orig_node, - cifnotfound, NULL, 0); - - tvlv_handler->flags &= ~BATADV_TVLV_HANDLER_OGM_CALLED; - } - rcu_read_unlock(); - - return NET_RX_SUCCESS; -} - -/** - * batadv_tvlv_ogm_receive - process an incoming ogm and call the appropriate - * handlers - * @bat_priv: the bat priv with all the soft interface information - * @batadv_ogm_packet: ogm packet containing the tvlv containers - * @orig_node: orig node emitting the ogm packet - */ -void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv, - struct batadv_ogm_packet *batadv_ogm_packet, - struct batadv_orig_node *orig_node) -{ - void *tvlv_value; - u16 tvlv_value_len; - - if (!batadv_ogm_packet) - return; - - tvlv_value_len = ntohs(batadv_ogm_packet->tvlv_len); - if (!tvlv_value_len) - return; - - tvlv_value = batadv_ogm_packet + 1; - - batadv_tvlv_containers_process(bat_priv, true, orig_node, NULL, NULL, - tvlv_value, tvlv_value_len); -} - -/** - * batadv_tvlv_handler_register - register tvlv handler based on the provided - * type and version (both need to match) for ogm tvlv payload and/or unicast - * payload - * @bat_priv: the bat priv with all the soft interface information - * @optr: ogm tvlv handler callback function. This function receives the orig - * node, flags and the tvlv content as argument to process. - * @uptr: unicast tvlv handler callback function. This function receives the - * source & destination of the unicast packet as well as the tvlv content - * to process. - * @type: tvlv handler type to be registered - * @version: tvlv handler version to be registered - * @flags: flags to enable or disable TVLV API behavior - */ -void batadv_tvlv_handler_register(struct batadv_priv *bat_priv, - void (*optr)(struct batadv_priv *bat_priv, - struct batadv_orig_node *orig, - u8 flags, - void *tvlv_value, - u16 tvlv_value_len), - int (*uptr)(struct batadv_priv *bat_priv, - u8 *src, u8 *dst, - void *tvlv_value, - u16 tvlv_value_len), - u8 type, u8 version, u8 flags) -{ - struct batadv_tvlv_handler *tvlv_handler; - - tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version); - if (tvlv_handler) { - batadv_tvlv_handler_put(tvlv_handler); - return; - } - - tvlv_handler = kzalloc(sizeof(*tvlv_handler), GFP_ATOMIC); - if (!tvlv_handler) - return; - - tvlv_handler->ogm_handler = optr; - tvlv_handler->unicast_handler = uptr; - tvlv_handler->type = type; - tvlv_handler->version = version; - tvlv_handler->flags = flags; - kref_init(&tvlv_handler->refcount); - INIT_HLIST_NODE(&tvlv_handler->list); - - spin_lock_bh(&bat_priv->tvlv.handler_list_lock); - hlist_add_head_rcu(&tvlv_handler->list, &bat_priv->tvlv.handler_list); - spin_unlock_bh(&bat_priv->tvlv.handler_list_lock); -} - -/** - * batadv_tvlv_handler_unregister - unregister tvlv handler based on the - * provided type and version (both need to match) - * @bat_priv: the bat priv with all the soft interface information - * @type: tvlv handler type to be unregistered - * @version: tvlv handler version to be unregistered - */ -void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv, - u8 type, u8 version) -{ - struct batadv_tvlv_handler *tvlv_handler; - - tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version); - if (!tvlv_handler) - return; - - batadv_tvlv_handler_put(tvlv_handler); - spin_lock_bh(&bat_priv->tvlv.handler_list_lock); - hlist_del_rcu(&tvlv_handler->list); - spin_unlock_bh(&bat_priv->tvlv.handler_list_lock); - batadv_tvlv_handler_put(tvlv_handler); -} - -/** - * batadv_tvlv_unicast_send - send a unicast packet with tvlv payload to the - * specified host - * @bat_priv: the bat priv with all the soft interface information - * @src: source mac address of the unicast packet - * @dst: destination mac address of the unicast packet - * @type: tvlv type - * @version: tvlv version - * @tvlv_value: tvlv content - * @tvlv_value_len: tvlv content length - */ -void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src, - u8 *dst, u8 type, u8 version, - void *tvlv_value, u16 tvlv_value_len) -{ - struct batadv_unicast_tvlv_packet *unicast_tvlv_packet; - struct batadv_tvlv_hdr *tvlv_hdr; - struct batadv_orig_node *orig_node; - struct sk_buff *skb; - unsigned char *tvlv_buff; - unsigned int tvlv_len; - ssize_t hdr_len = sizeof(*unicast_tvlv_packet); - - orig_node = batadv_orig_hash_find(bat_priv, dst); - if (!orig_node) - return; - - tvlv_len = sizeof(*tvlv_hdr) + tvlv_value_len; - - skb = netdev_alloc_skb_ip_align(NULL, ETH_HLEN + hdr_len + tvlv_len); - if (!skb) - goto out; - - skb->priority = TC_PRIO_CONTROL; - skb_reserve(skb, ETH_HLEN); - tvlv_buff = skb_put(skb, sizeof(*unicast_tvlv_packet) + tvlv_len); - unicast_tvlv_packet = (struct batadv_unicast_tvlv_packet *)tvlv_buff; - unicast_tvlv_packet->packet_type = BATADV_UNICAST_TVLV; - unicast_tvlv_packet->version = BATADV_COMPAT_VERSION; - unicast_tvlv_packet->ttl = BATADV_TTL; - unicast_tvlv_packet->reserved = 0; - unicast_tvlv_packet->tvlv_len = htons(tvlv_len); - unicast_tvlv_packet->align = 0; - ether_addr_copy(unicast_tvlv_packet->src, src); - ether_addr_copy(unicast_tvlv_packet->dst, dst); - - tvlv_buff = (unsigned char *)(unicast_tvlv_packet + 1); - tvlv_hdr = (struct batadv_tvlv_hdr *)tvlv_buff; - tvlv_hdr->version = version; - tvlv_hdr->type = type; - tvlv_hdr->len = htons(tvlv_value_len); - tvlv_buff += sizeof(*tvlv_hdr); - memcpy(tvlv_buff, tvlv_value, tvlv_value_len); - - if (batadv_send_skb_to_orig(skb, orig_node, NULL) == NET_XMIT_DROP) - kfree_skb(skb); -out: - batadv_orig_node_put(orig_node); -} - -/** * batadv_get_vid - extract the VLAN identifier from skb if any * @skb: the buffer containing the packet * @header_len: length of the batman header preceding the ethernet header @@ -1284,36 +629,6 @@ bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid) return ap_isolation_enabled; } -static int batadv_param_set_ra(const char *val, const struct kernel_param *kp) -{ - struct batadv_algo_ops *bat_algo_ops; - char *algo_name = (char *)val; - size_t name_len = strlen(algo_name); - - if (name_len > 0 && algo_name[name_len - 1] == '\n') - algo_name[name_len - 1] = '\0'; - - bat_algo_ops = batadv_algo_get(algo_name); - if (!bat_algo_ops) { - pr_err("Routing algorithm '%s' is not supported\n", algo_name); - return -EINVAL; - } - - return param_set_copystring(algo_name, kp); -} - -static const struct kernel_param_ops batadv_param_ops_ra = { - .set = batadv_param_set_ra, - .get = param_get_string, -}; - -static struct kparam_string batadv_param_string_ra = { - .maxlen = sizeof(batadv_routing_algo), - .string = batadv_routing_algo, -}; - -module_param_cb(routing_algo, &batadv_param_ops_ra, &batadv_param_string_ra, - 0644); module_init(batadv_init); module_exit(batadv_exit); diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 76925266deed..06a860845434 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -24,7 +24,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2016.2" +#define BATADV_SOURCE_VERSION "2016.3" #endif /* B.A.T.M.A.N. parameters */ @@ -100,6 +100,9 @@ #define BATADV_NUM_BCASTS_WIRELESS 3 #define BATADV_NUM_BCASTS_MAX 3 +/* length of the single packet used by the TP meter */ +#define BATADV_TP_PACKET_LEN ETH_DATA_LEN + /* msecs after which an ARP_REQUEST is sent in broadcast as fallback */ #define ARP_REQ_DELAY 250 /* numbers of originator to contact for any PUT/GET DHT operation */ @@ -131,6 +134,11 @@ #define BATADV_NC_NODE_TIMEOUT 10000 /* Milliseconds */ +/** + * BATADV_TP_MAX_NUM - maximum number of simultaneously active tp sessions + */ +#define BATADV_TP_MAX_NUM 5 + enum batadv_mesh_state { BATADV_MESH_INACTIVE, BATADV_MESH_ACTIVE, @@ -175,29 +183,26 @@ enum batadv_uev_type { /* Kernel headers */ -#include <linux/atomic.h> #include <linux/bitops.h> /* for packet.h */ #include <linux/compiler.h> #include <linux/cpumask.h> #include <linux/etherdevice.h> #include <linux/if_ether.h> /* for packet.h */ -#include <linux/netdevice.h> -#include <linux/printk.h> -#include <linux/types.h> -#include <linux/percpu.h> -#include <linux/jiffies.h> #include <linux/if_vlan.h> +#include <linux/jiffies.h> +#include <linux/percpu.h> +#include <linux/types.h> #include "types.h" -struct batadv_ogm_packet; +struct net_device; +struct packet_type; struct seq_file; struct sk_buff; #define BATADV_PRINT_VID(vid) ((vid & BATADV_VLAN_HAS_TAG) ? \ (int)(vid & VLAN_VID_MASK) : -1) -extern char batadv_routing_algo[]; extern struct list_head batadv_hardif_list; extern unsigned char batadv_broadcast_addr[]; @@ -218,74 +223,9 @@ batadv_recv_handler_register(u8 packet_type, int (*recv_handler)(struct sk_buff *, struct batadv_hard_iface *)); void batadv_recv_handler_unregister(u8 packet_type); -int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops); -int batadv_algo_select(struct batadv_priv *bat_priv, char *name); -int batadv_algo_seq_print_text(struct seq_file *seq, void *offset); __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr); /** - * enum batadv_dbg_level - available log levels - * @BATADV_DBG_BATMAN: OGM and TQ computations related messages - * @BATADV_DBG_ROUTES: route added / changed / deleted - * @BATADV_DBG_TT: translation table messages - * @BATADV_DBG_BLA: bridge loop avoidance messages - * @BATADV_DBG_DAT: ARP snooping and DAT related messages - * @BATADV_DBG_NC: network coding related messages - * @BATADV_DBG_ALL: the union of all the above log levels - */ -enum batadv_dbg_level { - BATADV_DBG_BATMAN = BIT(0), - BATADV_DBG_ROUTES = BIT(1), - BATADV_DBG_TT = BIT(2), - BATADV_DBG_BLA = BIT(3), - BATADV_DBG_DAT = BIT(4), - BATADV_DBG_NC = BIT(5), - BATADV_DBG_ALL = 63, -}; - -#ifdef CONFIG_BATMAN_ADV_DEBUG -int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...) -__printf(2, 3); - -/* possibly ratelimited debug output */ -#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...) \ - do { \ - if (atomic_read(&bat_priv->log_level) & type && \ - (!ratelimited || net_ratelimit())) \ - batadv_debug_log(bat_priv, fmt, ## arg);\ - } \ - while (0) -#else /* !CONFIG_BATMAN_ADV_DEBUG */ -__printf(4, 5) -static inline void _batadv_dbg(int type __always_unused, - struct batadv_priv *bat_priv __always_unused, - int ratelimited __always_unused, - const char *fmt __always_unused, ...) -{ -} -#endif - -#define batadv_dbg(type, bat_priv, arg...) \ - _batadv_dbg(type, bat_priv, 0, ## arg) -#define batadv_dbg_ratelimited(type, bat_priv, arg...) \ - _batadv_dbg(type, bat_priv, 1, ## arg) - -#define batadv_info(net_dev, fmt, arg...) \ - do { \ - struct net_device *_netdev = (net_dev); \ - struct batadv_priv *_batpriv = netdev_priv(_netdev); \ - batadv_dbg(BATADV_DBG_ALL, _batpriv, fmt, ## arg); \ - pr_info("%s: " fmt, _netdev->name, ## arg); \ - } while (0) -#define batadv_err(net_dev, fmt, arg...) \ - do { \ - struct net_device *_netdev = (net_dev); \ - struct batadv_priv *_batpriv = netdev_priv(_netdev); \ - batadv_dbg(BATADV_DBG_ALL, _batpriv, fmt, ## arg); \ - pr_err("%s: " fmt, _netdev->name, ## arg); \ - } while (0) - -/** * batadv_compare_eth - Compare two not u16 aligned Ethernet addresses * @data1: Pointer to a six-byte array containing the Ethernet address * @data2: Pointer other six-byte array containing the Ethernet address @@ -370,39 +310,6 @@ static inline u64 batadv_sum_counter(struct batadv_priv *bat_priv, size_t idx) */ #define BATADV_SKB_CB(__skb) ((struct batadv_skb_cb *)&((__skb)->cb[0])) -void batadv_tvlv_container_register(struct batadv_priv *bat_priv, - u8 type, u8 version, - void *tvlv_value, u16 tvlv_value_len); -u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, - unsigned char **packet_buff, - int *packet_buff_len, int packet_min_len); -void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv, - struct batadv_ogm_packet *batadv_ogm_packet, - struct batadv_orig_node *orig_node); -void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv, - u8 type, u8 version); - -void batadv_tvlv_handler_register(struct batadv_priv *bat_priv, - void (*optr)(struct batadv_priv *bat_priv, - struct batadv_orig_node *orig, - u8 flags, - void *tvlv_value, - u16 tvlv_value_len), - int (*uptr)(struct batadv_priv *bat_priv, - u8 *src, u8 *dst, - void *tvlv_value, - u16 tvlv_value_len), - u8 type, u8 version, u8 flags); -void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv, - u8 type, u8 version); -int batadv_tvlv_containers_process(struct batadv_priv *bat_priv, - bool ogm_source, - struct batadv_orig_node *orig_node, - u8 *src, u8 *dst, - void *tvlv_buff, u16 tvlv_buff_len); -void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src, - u8 *dst, u8 type, u8 version, - void *tvlv_value, u16 tvlv_value_len); unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len); bool batadv_vlan_ap_isola_get(struct batadv_priv *bat_priv, unsigned short vid); diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index c32f24fafe67..cc915073a753 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -25,17 +25,23 @@ #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/fs.h> +#include <linux/icmpv6.h> +#include <linux/if_bridge.h> #include <linux/if_ether.h> -#include <linux/in6.h> +#include <linux/igmp.h> #include <linux/in.h> +#include <linux/in6.h> #include <linux/ip.h> #include <linux/ipv6.h> +#include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/netdevice.h> +#include <linux/printk.h> #include <linux/rculist.h> #include <linux/rcupdate.h> +#include <linux/seq_file.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -43,18 +49,57 @@ #include <linux/string.h> #include <linux/types.h> #include <net/addrconf.h> +#include <net/if_inet6.h> +#include <net/ip.h> #include <net/ipv6.h> +#include "hard-interface.h" +#include "hash.h" +#include "log.h" #include "packet.h" #include "translation-table.h" +#include "tvlv.h" + +/** + * batadv_mcast_get_bridge - get the bridge on top of the softif if it exists + * @soft_iface: netdev struct of the mesh interface + * + * If the given soft interface has a bridge on top then the refcount + * of the according net device is increased. + * + * Return: NULL if no such bridge exists. Otherwise the net device of the + * bridge. + */ +static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface) +{ + struct net_device *upper = soft_iface; + + rcu_read_lock(); + do { + upper = netdev_master_upper_dev_get_rcu(upper); + } while (upper && !(upper->priv_flags & IFF_EBRIDGE)); + + if (upper) + dev_hold(upper); + rcu_read_unlock(); + + return upper; +} /** * batadv_mcast_mla_softif_get - get softif multicast listeners * @dev: the device to collect multicast addresses from * @mcast_list: a list to put found addresses into * - * Collect multicast addresses of the local multicast listeners - * on the given soft interface, dev, in the given mcast_list. + * Collects multicast addresses of multicast listeners residing + * on this kernel on the given soft interface, dev, in + * the given mcast_list. In general, multicast listeners provided by + * your multicast receiving applications run directly on this node. + * + * If there is a bridge interface on top of dev, collects from that one + * instead. Just like with IP addresses and routes, multicast listeners + * will(/should) register to the bridge interface instead of an + * enslaved bat0. * * Return: -ENOMEM on memory allocation error or the number of * items added to the mcast_list otherwise. @@ -62,12 +107,13 @@ static int batadv_mcast_mla_softif_get(struct net_device *dev, struct hlist_head *mcast_list) { + struct net_device *bridge = batadv_mcast_get_bridge(dev); struct netdev_hw_addr *mc_list_entry; struct batadv_hw_addr *new; int ret = 0; - netif_addr_lock_bh(dev); - netdev_for_each_mc_addr(mc_list_entry, dev) { + netif_addr_lock_bh(bridge ? bridge : dev); + netdev_for_each_mc_addr(mc_list_entry, bridge ? bridge : dev) { new = kmalloc(sizeof(*new), GFP_ATOMIC); if (!new) { ret = -ENOMEM; @@ -78,7 +124,10 @@ static int batadv_mcast_mla_softif_get(struct net_device *dev, hlist_add_head(&new->list, mcast_list); ret++; } - netif_addr_unlock_bh(dev); + netif_addr_unlock_bh(bridge ? bridge : dev); + + if (bridge) + dev_put(bridge); return ret; } @@ -104,6 +153,83 @@ static bool batadv_mcast_mla_is_duplicate(u8 *mcast_addr, } /** + * batadv_mcast_mla_br_addr_cpy - copy a bridge multicast address + * @dst: destination to write to - a multicast MAC address + * @src: source to read from - a multicast IP address + * + * Converts a given multicast IPv4/IPv6 address from a bridge + * to its matching multicast MAC address and copies it into the given + * destination buffer. + * + * Caller needs to make sure the destination buffer can hold + * at least ETH_ALEN bytes. + */ +static void batadv_mcast_mla_br_addr_cpy(char *dst, const struct br_ip *src) +{ + if (src->proto == htons(ETH_P_IP)) + ip_eth_mc_map(src->u.ip4, dst); +#if IS_ENABLED(CONFIG_IPV6) + else if (src->proto == htons(ETH_P_IPV6)) + ipv6_eth_mc_map(&src->u.ip6, dst); +#endif + else + eth_zero_addr(dst); +} + +/** + * batadv_mcast_mla_bridge_get - get bridged-in multicast listeners + * @dev: a bridge slave whose bridge to collect multicast addresses from + * @mcast_list: a list to put found addresses into + * + * Collects multicast addresses of multicast listeners residing + * on foreign, non-mesh devices which we gave access to our mesh via + * a bridge on top of the given soft interface, dev, in the given + * mcast_list. + * + * Return: -ENOMEM on memory allocation error or the number of + * items added to the mcast_list otherwise. + */ +static int batadv_mcast_mla_bridge_get(struct net_device *dev, + struct hlist_head *mcast_list) +{ + struct list_head bridge_mcast_list = LIST_HEAD_INIT(bridge_mcast_list); + struct br_ip_list *br_ip_entry, *tmp; + struct batadv_hw_addr *new; + u8 mcast_addr[ETH_ALEN]; + int ret; + + /* we don't need to detect these devices/listeners, the IGMP/MLD + * snooping code of the Linux bridge already does that for us + */ + ret = br_multicast_list_adjacent(dev, &bridge_mcast_list); + if (ret < 0) + goto out; + + list_for_each_entry(br_ip_entry, &bridge_mcast_list, list) { + batadv_mcast_mla_br_addr_cpy(mcast_addr, &br_ip_entry->addr); + if (batadv_mcast_mla_is_duplicate(mcast_addr, mcast_list)) + continue; + + new = kmalloc(sizeof(*new), GFP_ATOMIC); + if (!new) { + ret = -ENOMEM; + break; + } + + ether_addr_copy(new->addr, mcast_addr); + hlist_add_head(&new->list, mcast_list); + } + +out: + list_for_each_entry_safe(br_ip_entry, tmp, &bridge_mcast_list, list) { + list_del(&br_ip_entry->list); + kfree(br_ip_entry); + } + + return ret; +} + +/** * batadv_mcast_mla_list_free - free a list of multicast addresses * @bat_priv: the bat priv with all the soft interface information * @mcast_list: the list to free @@ -214,44 +340,195 @@ static bool batadv_mcast_has_bridge(struct batadv_priv *bat_priv) } /** + * batadv_mcast_querier_log - debug output regarding the querier status on link + * @bat_priv: the bat priv with all the soft interface information + * @str_proto: a string for the querier protocol (e.g. "IGMP" or "MLD") + * @old_state: the previous querier state on our link + * @new_state: the new querier state on our link + * + * Outputs debug messages to the logging facility with log level 'mcast' + * regarding changes to the querier status on the link which are relevant + * to our multicast optimizations. + * + * Usually this is about whether a querier appeared or vanished in + * our mesh or whether the querier is in the suboptimal position of being + * behind our local bridge segment: Snooping switches will directly + * forward listener reports to the querier, therefore batman-adv and + * the bridge will potentially not see these listeners - the querier is + * potentially shadowing listeners from us then. + * + * This is only interesting for nodes with a bridge on top of their + * soft interface. + */ +static void +batadv_mcast_querier_log(struct batadv_priv *bat_priv, char *str_proto, + struct batadv_mcast_querier_state *old_state, + struct batadv_mcast_querier_state *new_state) +{ + if (!old_state->exists && new_state->exists) + batadv_info(bat_priv->soft_iface, "%s Querier appeared\n", + str_proto); + else if (old_state->exists && !new_state->exists) + batadv_info(bat_priv->soft_iface, + "%s Querier disappeared - multicast optimizations disabled\n", + str_proto); + else if (!bat_priv->mcast.bridged && !new_state->exists) + batadv_info(bat_priv->soft_iface, + "No %s Querier present - multicast optimizations disabled\n", + str_proto); + + if (new_state->exists) { + if ((!old_state->shadowing && new_state->shadowing) || + (!old_state->exists && new_state->shadowing)) + batadv_dbg(BATADV_DBG_MCAST, bat_priv, + "%s Querier is behind our bridged segment: Might shadow listeners\n", + str_proto); + else if (old_state->shadowing && !new_state->shadowing) + batadv_dbg(BATADV_DBG_MCAST, bat_priv, + "%s Querier is not behind our bridged segment\n", + str_proto); + } +} + +/** + * batadv_mcast_bridge_log - debug output for topology changes in bridged setups + * @bat_priv: the bat priv with all the soft interface information + * @bridged: a flag about whether the soft interface is currently bridged or not + * @querier_ipv4: (maybe) new status of a potential, selected IGMP querier + * @querier_ipv6: (maybe) new status of a potential, selected MLD querier + * + * If no bridges are ever used on this node, then this function does nothing. + * + * Otherwise this function outputs debug information to the 'mcast' log level + * which might be relevant to our multicast optimizations. + * + * More precisely, it outputs information when a bridge interface is added or + * removed from a soft interface. And when a bridge is present, it further + * outputs information about the querier state which is relevant for the + * multicast flags this node is going to set. + */ +static void +batadv_mcast_bridge_log(struct batadv_priv *bat_priv, bool bridged, + struct batadv_mcast_querier_state *querier_ipv4, + struct batadv_mcast_querier_state *querier_ipv6) +{ + if (!bat_priv->mcast.bridged && bridged) + batadv_dbg(BATADV_DBG_MCAST, bat_priv, + "Bridge added: Setting Unsnoopables(U)-flag\n"); + else if (bat_priv->mcast.bridged && !bridged) + batadv_dbg(BATADV_DBG_MCAST, bat_priv, + "Bridge removed: Unsetting Unsnoopables(U)-flag\n"); + + if (bridged) { + batadv_mcast_querier_log(bat_priv, "IGMP", + &bat_priv->mcast.querier_ipv4, + querier_ipv4); + batadv_mcast_querier_log(bat_priv, "MLD", + &bat_priv->mcast.querier_ipv6, + querier_ipv6); + } +} + +/** + * batadv_mcast_flags_logs - output debug information about mcast flag changes + * @bat_priv: the bat priv with all the soft interface information + * @flags: flags indicating the new multicast state + * + * Whenever the multicast flags this nodes announces changes (@mcast_flags vs. + * bat_priv->mcast.flags), this notifies userspace via the 'mcast' log level. + */ +static void batadv_mcast_flags_log(struct batadv_priv *bat_priv, u8 flags) +{ + u8 old_flags = bat_priv->mcast.flags; + char str_old_flags[] = "[...]"; + + sprintf(str_old_flags, "[%c%c%c]", + (old_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.', + (old_flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.', + (old_flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.'); + + batadv_dbg(BATADV_DBG_MCAST, bat_priv, + "Changing multicast flags from '%s' to '[%c%c%c]'\n", + bat_priv->mcast.enabled ? str_old_flags : "<undefined>", + (flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.', + (flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.', + (flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.'); +} + +/** * batadv_mcast_mla_tvlv_update - update multicast tvlv * @bat_priv: the bat priv with all the soft interface information * * Updates the own multicast tvlv with our current multicast related settings, * capabilities and inabilities. * - * Return: true if the tvlv container is registered afterwards. Otherwise - * returns false. + * Return: false if we want all IPv4 && IPv6 multicast traffic and true + * otherwise. */ static bool batadv_mcast_mla_tvlv_update(struct batadv_priv *bat_priv) { struct batadv_tvlv_mcast_data mcast_data; + struct batadv_mcast_querier_state querier4 = {false, false}; + struct batadv_mcast_querier_state querier6 = {false, false}; + struct net_device *dev = bat_priv->soft_iface; + bool bridged; mcast_data.flags = BATADV_NO_FLAGS; memset(mcast_data.reserved, 0, sizeof(mcast_data.reserved)); - /* Avoid attaching MLAs, if there is a bridge on top of our soft - * interface, we don't support that yet (TODO) + bridged = batadv_mcast_has_bridge(bat_priv); + if (!bridged) + goto update; + +#if !IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING) + pr_warn_once("No bridge IGMP snooping compiled - multicast optimizations disabled\n"); +#endif + + querier4.exists = br_multicast_has_querier_anywhere(dev, ETH_P_IP); + querier4.shadowing = br_multicast_has_querier_adjacent(dev, ETH_P_IP); + + querier6.exists = br_multicast_has_querier_anywhere(dev, ETH_P_IPV6); + querier6.shadowing = br_multicast_has_querier_adjacent(dev, ETH_P_IPV6); + + mcast_data.flags |= BATADV_MCAST_WANT_ALL_UNSNOOPABLES; + + /* 1) If no querier exists at all, then multicast listeners on + * our local TT clients behind the bridge will keep silent. + * 2) If the selected querier is on one of our local TT clients, + * behind the bridge, then this querier might shadow multicast + * listeners on our local TT clients, behind this bridge. + * + * In both cases, we will signalize other batman nodes that + * we need all multicast traffic of the according protocol. */ - if (batadv_mcast_has_bridge(bat_priv)) { - if (bat_priv->mcast.enabled) { - batadv_tvlv_container_unregister(bat_priv, - BATADV_TVLV_MCAST, 1); - bat_priv->mcast.enabled = false; - } + if (!querier4.exists || querier4.shadowing) + mcast_data.flags |= BATADV_MCAST_WANT_ALL_IPV4; - return false; - } + if (!querier6.exists || querier6.shadowing) + mcast_data.flags |= BATADV_MCAST_WANT_ALL_IPV6; + +update: + batadv_mcast_bridge_log(bat_priv, bridged, &querier4, &querier6); + + bat_priv->mcast.querier_ipv4.exists = querier4.exists; + bat_priv->mcast.querier_ipv4.shadowing = querier4.shadowing; + + bat_priv->mcast.querier_ipv6.exists = querier6.exists; + bat_priv->mcast.querier_ipv6.shadowing = querier6.shadowing; + + bat_priv->mcast.bridged = bridged; if (!bat_priv->mcast.enabled || mcast_data.flags != bat_priv->mcast.flags) { - batadv_tvlv_container_register(bat_priv, BATADV_TVLV_MCAST, 1, + batadv_mcast_flags_log(bat_priv, mcast_data.flags); + batadv_tvlv_container_register(bat_priv, BATADV_TVLV_MCAST, 2, &mcast_data, sizeof(mcast_data)); bat_priv->mcast.flags = mcast_data.flags; bat_priv->mcast.enabled = true; } - return true; + return !(mcast_data.flags & + (BATADV_MCAST_WANT_ALL_IPV4 + BATADV_MCAST_WANT_ALL_IPV6)); } /** @@ -274,6 +551,10 @@ void batadv_mcast_mla_update(struct batadv_priv *bat_priv) if (ret < 0) goto out; + ret = batadv_mcast_mla_bridge_get(soft_iface, &mcast_list); + if (ret < 0) + goto out; + update: batadv_mcast_mla_tt_retract(bat_priv, &mcast_list); batadv_mcast_mla_tt_add(bat_priv, &mcast_list); @@ -283,6 +564,31 @@ out: } /** + * batadv_mcast_is_report_ipv4 - check for IGMP reports + * @skb: the ethernet frame destined for the mesh + * + * This call might reallocate skb data. + * + * Checks whether the given frame is a valid IGMP report. + * + * Return: If so then true, otherwise false. + */ +static bool batadv_mcast_is_report_ipv4(struct sk_buff *skb) +{ + if (ip_mc_check_igmp(skb, NULL) < 0) + return false; + + switch (igmp_hdr(skb)->type) { + case IGMP_HOST_MEMBERSHIP_REPORT: + case IGMPV2_HOST_MEMBERSHIP_REPORT: + case IGMPV3_HOST_MEMBERSHIP_REPORT: + return true; + } + + return false; +} + +/** * batadv_mcast_forw_mode_check_ipv4 - check for optimized forwarding potential * @bat_priv: the bat priv with all the soft interface information * @skb: the IPv4 packet to check @@ -304,6 +610,9 @@ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv, if (!pskb_may_pull(skb, sizeof(struct ethhdr) + sizeof(*iphdr))) return -ENOMEM; + if (batadv_mcast_is_report_ipv4(skb)) + return -EINVAL; + iphdr = ip_hdr(skb); /* TODO: Implement Multicast Router Discovery (RFC4286), @@ -320,6 +629,31 @@ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv, return 0; } +#if IS_ENABLED(CONFIG_IPV6) +/** + * batadv_mcast_is_report_ipv6 - check for MLD reports + * @skb: the ethernet frame destined for the mesh + * + * This call might reallocate skb data. + * + * Checks whether the given frame is a valid MLD report. + * + * Return: If so then true, otherwise false. + */ +static bool batadv_mcast_is_report_ipv6(struct sk_buff *skb) +{ + if (ipv6_mc_check_mld(skb, NULL) < 0) + return false; + + switch (icmp6_hdr(skb)->icmp6_type) { + case ICMPV6_MGM_REPORT: + case ICMPV6_MLD2_REPORT: + return true; + } + + return false; +} + /** * batadv_mcast_forw_mode_check_ipv6 - check for optimized forwarding potential * @bat_priv: the bat priv with all the soft interface information @@ -341,6 +675,9 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv, if (!pskb_may_pull(skb, sizeof(struct ethhdr) + sizeof(*ip6hdr))) return -ENOMEM; + if (batadv_mcast_is_report_ipv6(skb)) + return -EINVAL; + ip6hdr = ipv6_hdr(skb); /* TODO: Implement Multicast Router Discovery (RFC4286), @@ -357,6 +694,7 @@ static int batadv_mcast_forw_mode_check_ipv6(struct batadv_priv *bat_priv, return 0; } +#endif /** * batadv_mcast_forw_mode_check - check for optimized forwarding potential @@ -385,9 +723,11 @@ static int batadv_mcast_forw_mode_check(struct batadv_priv *bat_priv, case ETH_P_IP: return batadv_mcast_forw_mode_check_ipv4(bat_priv, skb, is_unsnoopable); +#if IS_ENABLED(CONFIG_IPV6) case ETH_P_IPV6: return batadv_mcast_forw_mode_check_ipv6(bat_priv, skb, is_unsnoopable); +#endif default: return -EINVAL; } @@ -728,18 +1068,18 @@ static void batadv_mcast_want_ipv6_update(struct batadv_priv *bat_priv, } /** - * batadv_mcast_tvlv_ogm_handler_v1 - process incoming multicast tvlv container + * batadv_mcast_tvlv_ogm_handler - process incoming multicast tvlv container * @bat_priv: the bat priv with all the soft interface information * @orig: the orig_node of the ogm * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) * @tvlv_value: tvlv buffer containing the multicast data * @tvlv_value_len: tvlv buffer length */ -static void batadv_mcast_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, - struct batadv_orig_node *orig, - u8 flags, - void *tvlv_value, - u16 tvlv_value_len) +static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig, + u8 flags, + void *tvlv_value, + u16 tvlv_value_len) { bool orig_mcast_enabled = !(flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND); u8 mcast_flags = BATADV_NO_FLAGS; @@ -789,19 +1129,120 @@ static void batadv_mcast_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, */ void batadv_mcast_init(struct batadv_priv *bat_priv) { - batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler_v1, - NULL, BATADV_TVLV_MCAST, 1, + batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler, + NULL, BATADV_TVLV_MCAST, 2, BATADV_TVLV_HANDLER_OGM_CIFNOTFND); } /** + * batadv_mcast_flags_print_header - print own mcast flags to debugfs table + * @bat_priv: the bat priv with all the soft interface information + * @seq: debugfs table seq_file struct + * + * Prints our own multicast flags including a more specific reason why + * they are set, that is prints the bridge and querier state too, to + * the debugfs table specified via @seq. + */ +static void batadv_mcast_flags_print_header(struct batadv_priv *bat_priv, + struct seq_file *seq) +{ + u8 flags = bat_priv->mcast.flags; + char querier4, querier6, shadowing4, shadowing6; + bool bridged = bat_priv->mcast.bridged; + + if (bridged) { + querier4 = bat_priv->mcast.querier_ipv4.exists ? '.' : '4'; + querier6 = bat_priv->mcast.querier_ipv6.exists ? '.' : '6'; + shadowing4 = bat_priv->mcast.querier_ipv4.shadowing ? '4' : '.'; + shadowing6 = bat_priv->mcast.querier_ipv6.shadowing ? '6' : '.'; + } else { + querier4 = '?'; + querier6 = '?'; + shadowing4 = '?'; + shadowing6 = '?'; + } + + seq_printf(seq, "Multicast flags (own flags: [%c%c%c])\n", + (flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.', + (flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.', + (flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.'); + seq_printf(seq, "* Bridged [U]\t\t\t\t%c\n", bridged ? 'U' : '.'); + seq_printf(seq, "* No IGMP/MLD Querier [4/6]:\t\t%c/%c\n", + querier4, querier6); + seq_printf(seq, "* Shadowing IGMP/MLD Querier [4/6]:\t%c/%c\n", + shadowing4, shadowing6); + seq_puts(seq, "-------------------------------------------\n"); + seq_printf(seq, " %-10s %s\n", "Originator", "Flags"); +} + +/** + * batadv_mcast_flags_seq_print_text - print the mcast flags of other nodes + * @seq: seq file to print on + * @offset: not used + * + * This prints a table of (primary) originators and their according + * multicast flags, including (in the header) our own. + * + * Return: always 0 + */ +int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset) +{ + struct net_device *net_dev = (struct net_device *)seq->private; + struct batadv_priv *bat_priv = netdev_priv(net_dev); + struct batadv_hard_iface *primary_if; + struct batadv_hashtable *hash = bat_priv->orig_hash; + struct batadv_orig_node *orig_node; + struct hlist_head *head; + u8 flags; + u32 i; + + primary_if = batadv_seq_print_text_primary_if_get(seq); + if (!primary_if) + return 0; + + batadv_mcast_flags_print_header(bat_priv, seq); + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { + if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST, + &orig_node->capa_initialized)) + continue; + + if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST, + &orig_node->capabilities)) { + seq_printf(seq, "%pM -\n", orig_node->orig); + continue; + } + + flags = orig_node->mcast_flags; + + seq_printf(seq, "%pM [%c%c%c]\n", orig_node->orig, + (flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) + ? 'U' : '.', + (flags & BATADV_MCAST_WANT_ALL_IPV4) + ? '4' : '.', + (flags & BATADV_MCAST_WANT_ALL_IPV6) + ? '6' : '.'); + } + rcu_read_unlock(); + } + + batadv_hardif_put(primary_if); + + return 0; +} + +/** * batadv_mcast_free - free the multicast optimizations structures * @bat_priv: the bat priv with all the soft interface information */ void batadv_mcast_free(struct batadv_priv *bat_priv) { - batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 1); - batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 1); + batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 2); + batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 2); spin_lock_bh(&bat_priv->tt.commit_lock); batadv_mcast_mla_tt_retract(bat_priv, NULL); diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h index 80bceec55592..1fb00ba84907 100644 --- a/net/batman-adv/multicast.h +++ b/net/batman-adv/multicast.h @@ -20,6 +20,7 @@ #include "main.h" +struct seq_file; struct sk_buff; /** @@ -46,6 +47,8 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, void batadv_mcast_init(struct batadv_priv *bat_priv); +int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset); + void batadv_mcast_free(struct batadv_priv *bat_priv); void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node); diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c new file mode 100644 index 000000000000..231f8eaf075b --- /dev/null +++ b/net/batman-adv/netlink.c @@ -0,0 +1,424 @@ +/* Copyright (C) 2016 B.A.T.M.A.N. contributors: + * + * Matthias Schiffer + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "netlink.h" +#include "main.h" + +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/genetlink.h> +#include <linux/if_ether.h> +#include <linux/init.h> +#include <linux/netdevice.h> +#include <linux/netlink.h> +#include <linux/printk.h> +#include <linux/stddef.h> +#include <linux/types.h> +#include <net/genetlink.h> +#include <net/netlink.h> +#include <uapi/linux/batman_adv.h> + +#include "hard-interface.h" +#include "soft-interface.h" +#include "tp_meter.h" + +struct sk_buff; + +static struct genl_family batadv_netlink_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = BATADV_NL_NAME, + .version = 1, + .maxattr = BATADV_ATTR_MAX, +}; + +/* multicast groups */ +enum batadv_netlink_multicast_groups { + BATADV_NL_MCGRP_TPMETER, +}; + +static struct genl_multicast_group batadv_netlink_mcgrps[] = { + [BATADV_NL_MCGRP_TPMETER] = { .name = BATADV_NL_MCAST_GROUP_TPMETER }, +}; + +static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { + [BATADV_ATTR_VERSION] = { .type = NLA_STRING }, + [BATADV_ATTR_ALGO_NAME] = { .type = NLA_STRING }, + [BATADV_ATTR_MESH_IFINDEX] = { .type = NLA_U32 }, + [BATADV_ATTR_MESH_IFNAME] = { .type = NLA_STRING }, + [BATADV_ATTR_MESH_ADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_HARD_IFINDEX] = { .type = NLA_U32 }, + [BATADV_ATTR_HARD_IFNAME] = { .type = NLA_STRING }, + [BATADV_ATTR_HARD_ADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_ORIG_ADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_TPMETER_RESULT] = { .type = NLA_U8 }, + [BATADV_ATTR_TPMETER_TEST_TIME] = { .type = NLA_U32 }, + [BATADV_ATTR_TPMETER_BYTES] = { .type = NLA_U64 }, + [BATADV_ATTR_TPMETER_COOKIE] = { .type = NLA_U32 }, +}; + +/** + * batadv_netlink_mesh_info_put - fill in generic information about mesh + * interface + * @msg: netlink message to be sent back + * @soft_iface: interface for which the data should be taken + * + * Return: 0 on success, < 0 on error + */ +static int +batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface) +{ + struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct batadv_hard_iface *primary_if = NULL; + struct net_device *hard_iface; + int ret = -ENOBUFS; + + if (nla_put_string(msg, BATADV_ATTR_VERSION, BATADV_SOURCE_VERSION) || + nla_put_string(msg, BATADV_ATTR_ALGO_NAME, + bat_priv->algo_ops->name) || + nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, soft_iface->ifindex) || + nla_put_string(msg, BATADV_ATTR_MESH_IFNAME, soft_iface->name) || + nla_put(msg, BATADV_ATTR_MESH_ADDRESS, ETH_ALEN, + soft_iface->dev_addr)) + goto out; + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) { + hard_iface = primary_if->net_dev; + + if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, + hard_iface->ifindex) || + nla_put_string(msg, BATADV_ATTR_HARD_IFNAME, + hard_iface->name) || + nla_put(msg, BATADV_ATTR_HARD_ADDRESS, ETH_ALEN, + hard_iface->dev_addr)) + goto out; + } + + ret = 0; + + out: + if (primary_if) + batadv_hardif_put(primary_if); + + return ret; +} + +/** + * batadv_netlink_get_mesh_info - handle incoming BATADV_CMD_GET_MESH_INFO + * netlink request + * @skb: received netlink message + * @info: receiver information + * + * Return: 0 on success, < 0 on error + */ +static int +batadv_netlink_get_mesh_info(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct net_device *soft_iface; + struct sk_buff *msg = NULL; + void *msg_head; + int ifindex; + int ret; + + if (!info->attrs[BATADV_ATTR_MESH_IFINDEX]) + return -EINVAL; + + ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + ret = -ENOMEM; + goto out; + } + + msg_head = genlmsg_put(msg, info->snd_portid, info->snd_seq, + &batadv_netlink_family, 0, + BATADV_CMD_GET_MESH_INFO); + if (!msg_head) { + ret = -ENOBUFS; + goto out; + } + + ret = batadv_netlink_mesh_info_put(msg, soft_iface); + + out: + if (soft_iface) + dev_put(soft_iface); + + if (ret) { + if (msg) + nlmsg_free(msg); + return ret; + } + + genlmsg_end(msg, msg_head); + return genlmsg_reply(msg, info); +} + +/** + * batadv_netlink_tp_meter_put - Fill information of started tp_meter session + * @msg: netlink message to be sent back + * @cookie: tp meter session cookie + * + * Return: 0 on success, < 0 on error + */ +static int +batadv_netlink_tp_meter_put(struct sk_buff *msg, u32 cookie) +{ + if (nla_put_u32(msg, BATADV_ATTR_TPMETER_COOKIE, cookie)) + return -ENOBUFS; + + return 0; +} + +/** + * batadv_netlink_tpmeter_notify - send tp_meter result via netlink to client + * @bat_priv: the bat priv with all the soft interface information + * @dst: destination of tp_meter session + * @result: reason for tp meter session stop + * @test_time: total time ot the tp_meter session + * @total_bytes: bytes acked to the receiver + * @cookie: cookie of tp_meter session + * + * Return: 0 on success, < 0 on error + */ +int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst, + u8 result, u32 test_time, u64 total_bytes, + u32 cookie) +{ + struct sk_buff *msg; + void *hdr; + int ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &batadv_netlink_family, 0, + BATADV_CMD_TP_METER); + if (!hdr) { + ret = -ENOBUFS; + goto err_genlmsg; + } + + if (nla_put_u32(msg, BATADV_ATTR_TPMETER_COOKIE, cookie)) + goto nla_put_failure; + + if (nla_put_u32(msg, BATADV_ATTR_TPMETER_TEST_TIME, test_time)) + goto nla_put_failure; + + if (nla_put_u64_64bit(msg, BATADV_ATTR_TPMETER_BYTES, total_bytes, + BATADV_ATTR_PAD)) + goto nla_put_failure; + + if (nla_put_u8(msg, BATADV_ATTR_TPMETER_RESULT, result)) + goto nla_put_failure; + + if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, dst)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast_netns(&batadv_netlink_family, + dev_net(bat_priv->soft_iface), msg, 0, + BATADV_NL_MCGRP_TPMETER, GFP_KERNEL); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + ret = -EMSGSIZE; + +err_genlmsg: + nlmsg_free(msg); + return ret; +} + +/** + * batadv_netlink_tp_meter_start - Start a new tp_meter session + * @skb: received netlink message + * @info: receiver information + * + * Return: 0 on success, < 0 on error + */ +static int +batadv_netlink_tp_meter_start(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct net_device *soft_iface; + struct batadv_priv *bat_priv; + struct sk_buff *msg = NULL; + u32 test_length; + void *msg_head; + int ifindex; + u32 cookie; + u8 *dst; + int ret; + + if (!info->attrs[BATADV_ATTR_MESH_IFINDEX]) + return -EINVAL; + + if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS]) + return -EINVAL; + + if (!info->attrs[BATADV_ATTR_TPMETER_TEST_TIME]) + return -EINVAL; + + ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]); + if (!ifindex) + return -EINVAL; + + dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]); + + test_length = nla_get_u32(info->attrs[BATADV_ATTR_TPMETER_TEST_TIME]); + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + ret = -ENOMEM; + goto out; + } + + msg_head = genlmsg_put(msg, info->snd_portid, info->snd_seq, + &batadv_netlink_family, 0, + BATADV_CMD_TP_METER); + if (!msg_head) { + ret = -ENOBUFS; + goto out; + } + + bat_priv = netdev_priv(soft_iface); + batadv_tp_start(bat_priv, dst, test_length, &cookie); + + ret = batadv_netlink_tp_meter_put(msg, cookie); + + out: + if (soft_iface) + dev_put(soft_iface); + + if (ret) { + if (msg) + nlmsg_free(msg); + return ret; + } + + genlmsg_end(msg, msg_head); + return genlmsg_reply(msg, info); +} + +/** + * batadv_netlink_tp_meter_start - Cancel a running tp_meter session + * @skb: received netlink message + * @info: receiver information + * + * Return: 0 on success, < 0 on error + */ +static int +batadv_netlink_tp_meter_cancel(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct net_device *soft_iface; + struct batadv_priv *bat_priv; + int ifindex; + u8 *dst; + int ret = 0; + + if (!info->attrs[BATADV_ATTR_MESH_IFINDEX]) + return -EINVAL; + + if (!info->attrs[BATADV_ATTR_ORIG_ADDRESS]) + return -EINVAL; + + ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]); + if (!ifindex) + return -EINVAL; + + dst = nla_data(info->attrs[BATADV_ATTR_ORIG_ADDRESS]); + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + bat_priv = netdev_priv(soft_iface); + batadv_tp_stop(bat_priv, dst, BATADV_TP_REASON_CANCEL); + +out: + if (soft_iface) + dev_put(soft_iface); + + return ret; +} + +static struct genl_ops batadv_netlink_ops[] = { + { + .cmd = BATADV_CMD_GET_MESH_INFO, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .doit = batadv_netlink_get_mesh_info, + }, + { + .cmd = BATADV_CMD_TP_METER, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .doit = batadv_netlink_tp_meter_start, + }, + { + .cmd = BATADV_CMD_TP_METER_CANCEL, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .doit = batadv_netlink_tp_meter_cancel, + }, +}; + +/** + * batadv_netlink_register - register batadv genl netlink family + */ +void __init batadv_netlink_register(void) +{ + int ret; + + ret = genl_register_family_with_ops_groups(&batadv_netlink_family, + batadv_netlink_ops, + batadv_netlink_mcgrps); + if (ret) + pr_warn("unable to register netlink family"); +} + +/** + * batadv_netlink_unregister - unregister batadv genl netlink family + */ +void batadv_netlink_unregister(void) +{ + genl_unregister_family(&batadv_netlink_family); +} diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h new file mode 100644 index 000000000000..945653ab58c6 --- /dev/null +++ b/net/batman-adv/netlink.h @@ -0,0 +1,32 @@ +/* Copyright (C) 2016 B.A.T.M.A.N. contributors: + * + * Matthias Schiffer + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _NET_BATMAN_ADV_NETLINK_H_ +#define _NET_BATMAN_ADV_NETLINK_H_ + +#include "main.h" + +#include <linux/types.h> + +void batadv_netlink_register(void); +void batadv_netlink_unregister(void); + +int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst, + u8 result, u32 test_time, u64 total_bytes, + u32 cookie); + +#endif /* _NET_BATMAN_ADV_NETLINK_H_ */ diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 678f06865312..293ef4ffd4e1 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -51,10 +51,12 @@ #include "hard-interface.h" #include "hash.h" +#include "log.h" #include "originator.h" #include "packet.h" #include "routing.h" #include "send.h" +#include "tvlv.h" static struct lock_class_key batadv_nc_coding_hash_lock_class_key; static struct lock_class_key batadv_nc_decoding_hash_lock_class_key; diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 7f51bc2c06eb..7d1e5421f6bc 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -34,11 +34,13 @@ #include <linux/spinlock.h> #include <linux/workqueue.h> +#include "bat_algo.h" #include "distributed-arp-table.h" #include "fragmentation.h" #include "gateway_client.h" #include "hard-interface.h" #include "hash.h" +#include "log.h" #include "multicast.h" #include "network-coding.h" #include "routing.h" @@ -251,10 +253,8 @@ static void batadv_neigh_node_release(struct kref *ref) struct hlist_node *node_tmp; struct batadv_neigh_node *neigh_node; struct batadv_neigh_ifinfo *neigh_ifinfo; - struct batadv_algo_ops *bao; neigh_node = container_of(ref, struct batadv_neigh_node, refcount); - bao = neigh_node->orig_node->bat_priv->bat_algo_ops; hlist_for_each_entry_safe(neigh_ifinfo, node_tmp, &neigh_node->ifinfo_list, list) { @@ -263,9 +263,6 @@ static void batadv_neigh_node_release(struct kref *ref) batadv_hardif_neigh_put(neigh_node->hardif_neigh); - if (bao->bat_neigh_free) - bao->bat_neigh_free(neigh_node); - batadv_hardif_put(neigh_node->if_incoming); kfree_rcu(neigh_node, rcu); @@ -537,8 +534,8 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface, kref_init(&hardif_neigh->refcount); - if (bat_priv->bat_algo_ops->bat_hardif_neigh_init) - bat_priv->bat_algo_ops->bat_hardif_neigh_init(hardif_neigh); + if (bat_priv->algo_ops->neigh.hardif_init) + bat_priv->algo_ops->neigh.hardif_init(hardif_neigh); hlist_add_head(&hardif_neigh->list, &hard_iface->neigh_list); @@ -602,19 +599,19 @@ batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface, } /** - * batadv_neigh_node_new - create and init a new neigh_node object + * batadv_neigh_node_create - create a neigh node object * @orig_node: originator object representing the neighbour * @hard_iface: the interface where the neighbour is connected to * @neigh_addr: the mac address of the neighbour interface * * Allocates a new neigh_node object and initialises all the generic fields. * - * Return: neighbor when found. Othwerwise NULL + * Return: the neighbour node if found or created or NULL otherwise. */ -struct batadv_neigh_node * -batadv_neigh_node_new(struct batadv_orig_node *orig_node, - struct batadv_hard_iface *hard_iface, - const u8 *neigh_addr) +static struct batadv_neigh_node * +batadv_neigh_node_create(struct batadv_orig_node *orig_node, + struct batadv_hard_iface *hard_iface, + const u8 *neigh_addr) { struct batadv_neigh_node *neigh_node; struct batadv_hardif_neigh_node *hardif_neigh = NULL; @@ -667,6 +664,29 @@ out: } /** + * batadv_neigh_node_get_or_create - retrieve or create a neigh node object + * @orig_node: originator object representing the neighbour + * @hard_iface: the interface where the neighbour is connected to + * @neigh_addr: the mac address of the neighbour interface + * + * Return: the neighbour node if found or created or NULL otherwise. + */ +struct batadv_neigh_node * +batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node, + struct batadv_hard_iface *hard_iface, + const u8 *neigh_addr) +{ + struct batadv_neigh_node *neigh_node = NULL; + + /* first check without locking to avoid the overhead */ + neigh_node = batadv_neigh_node_get(orig_node, hard_iface, neigh_addr); + if (neigh_node) + return neigh_node; + + return batadv_neigh_node_create(orig_node, hard_iface, neigh_addr); +} + +/** * batadv_hardif_neigh_seq_print_text - print the single hop neighbour list * @seq: neighbour table seq_file struct * @offset: not used @@ -686,17 +706,17 @@ int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n", BATADV_SOURCE_VERSION, primary_if->net_dev->name, primary_if->net_dev->dev_addr, net_dev->name, - bat_priv->bat_algo_ops->name); + bat_priv->algo_ops->name); batadv_hardif_put(primary_if); - if (!bat_priv->bat_algo_ops->bat_neigh_print) { + if (!bat_priv->algo_ops->neigh.print) { seq_puts(seq, "No printing function for this routing protocol\n"); return 0; } - bat_priv->bat_algo_ops->bat_neigh_print(bat_priv, seq); + bat_priv->algo_ops->neigh.print(bat_priv, seq); return 0; } @@ -747,8 +767,8 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu) batadv_frag_purge_orig(orig_node, NULL); - if (orig_node->bat_priv->bat_algo_ops->bat_orig_free) - orig_node->bat_priv->bat_algo_ops->bat_orig_free(orig_node); + if (orig_node->bat_priv->algo_ops->orig.free) + orig_node->bat_priv->algo_ops->orig.free(orig_node); kfree(orig_node->tt_buff); kfree(orig_node); @@ -1077,12 +1097,12 @@ batadv_find_best_neighbor(struct batadv_priv *bat_priv, struct batadv_hard_iface *if_outgoing) { struct batadv_neigh_node *best = NULL, *neigh; - struct batadv_algo_ops *bao = bat_priv->bat_algo_ops; + struct batadv_algo_ops *bao = bat_priv->algo_ops; rcu_read_lock(); hlist_for_each_entry_rcu(neigh, &orig_node->neigh_list, list) { - if (best && (bao->bat_neigh_cmp(neigh, if_outgoing, - best, if_outgoing) <= 0)) + if (best && (bao->neigh.cmp(neigh, if_outgoing, best, + if_outgoing) <= 0)) continue; if (!kref_get_unless_zero(&neigh->refcount)) @@ -1234,18 +1254,17 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n", BATADV_SOURCE_VERSION, primary_if->net_dev->name, primary_if->net_dev->dev_addr, net_dev->name, - bat_priv->bat_algo_ops->name); + bat_priv->algo_ops->name); batadv_hardif_put(primary_if); - if (!bat_priv->bat_algo_ops->bat_orig_print) { + if (!bat_priv->algo_ops->orig.print) { seq_puts(seq, "No printing function for this routing protocol\n"); return 0; } - bat_priv->bat_algo_ops->bat_orig_print(bat_priv, seq, - BATADV_IF_DEFAULT); + bat_priv->algo_ops->orig.print(bat_priv, seq, BATADV_IF_DEFAULT); return 0; } @@ -1272,7 +1291,7 @@ int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset) } bat_priv = netdev_priv(hard_iface->soft_iface); - if (!bat_priv->bat_algo_ops->bat_orig_print) { + if (!bat_priv->algo_ops->orig.print) { seq_puts(seq, "No printing function for this routing protocol\n"); goto out; @@ -1286,9 +1305,9 @@ int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset) seq_printf(seq, "[B.A.T.M.A.N. adv %s, IF/MAC: %s/%pM (%s %s)]\n", BATADV_SOURCE_VERSION, hard_iface->net_dev->name, hard_iface->net_dev->dev_addr, - hard_iface->soft_iface->name, bat_priv->bat_algo_ops->name); + hard_iface->soft_iface->name, bat_priv->algo_ops->name); - bat_priv->bat_algo_ops->bat_orig_print(bat_priv, seq, hard_iface); + bat_priv->algo_ops->orig.print(bat_priv, seq, hard_iface); out: if (hard_iface) @@ -1300,7 +1319,7 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, int max_if_num) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); - struct batadv_algo_ops *bao = bat_priv->bat_algo_ops; + struct batadv_algo_ops *bao = bat_priv->algo_ops; struct batadv_hashtable *hash = bat_priv->orig_hash; struct hlist_head *head; struct batadv_orig_node *orig_node; @@ -1316,9 +1335,8 @@ int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, head, hash_entry) { ret = 0; - if (bao->bat_orig_add_if) - ret = bao->bat_orig_add_if(orig_node, - max_if_num); + if (bao->orig.add_if) + ret = bao->orig.add_if(orig_node, max_if_num); if (ret == -ENOMEM) goto err; } @@ -1340,7 +1358,7 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, struct hlist_head *head; struct batadv_hard_iface *hard_iface_tmp; struct batadv_orig_node *orig_node; - struct batadv_algo_ops *bao = bat_priv->bat_algo_ops; + struct batadv_algo_ops *bao = bat_priv->algo_ops; u32 i; int ret; @@ -1353,10 +1371,9 @@ int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, head, hash_entry) { ret = 0; - if (bao->bat_orig_del_if) - ret = bao->bat_orig_del_if(orig_node, - max_if_num, - hard_iface->if_num); + if (bao->orig.del_if) + ret = bao->orig.del_if(orig_node, max_if_num, + hard_iface->if_num); if (ret == -ENOMEM) goto err; } diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 64a8951e5844..566306bf05dc 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -46,9 +46,9 @@ batadv_hardif_neigh_get(const struct batadv_hard_iface *hard_iface, void batadv_hardif_neigh_put(struct batadv_hardif_neigh_node *hardif_neigh); struct batadv_neigh_node * -batadv_neigh_node_new(struct batadv_orig_node *orig_node, - struct batadv_hard_iface *hard_iface, - const u8 *neigh_addr); +batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node, + struct batadv_hard_iface *hard_iface, + const u8 *neigh_addr); void batadv_neigh_node_put(struct batadv_neigh_node *neigh_node); struct batadv_neigh_node * batadv_orig_router_get(struct batadv_orig_node *orig_node, diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 372128ddb474..6b011ff64dd8 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -21,6 +21,8 @@ #include <asm/byteorder.h> #include <linux/types.h> +#define batadv_tp_is_error(n) ((u8)n > 127 ? 1 : 0) + /** * enum batadv_packettype - types for batman-adv encapsulated packets * @BATADV_IV_OGM: originator messages for B.A.T.M.A.N. IV @@ -93,6 +95,7 @@ enum batadv_icmp_packettype { BATADV_ECHO_REQUEST = 8, BATADV_TTL_EXCEEDED = 11, BATADV_PARAMETER_PROBLEM = 12, + BATADV_TP = 15, }; /** @@ -285,6 +288,16 @@ struct batadv_elp_packet { #define BATADV_ELP_HLEN sizeof(struct batadv_elp_packet) /** + * enum batadv_icmp_user_cmd_type - types for batman-adv icmp cmd modes + * @BATADV_TP_START: start a throughput meter run + * @BATADV_TP_STOP: stop a throughput meter run + */ +enum batadv_icmp_user_cmd_type { + BATADV_TP_START = 0, + BATADV_TP_STOP = 2, +}; + +/** * struct batadv_icmp_header - common members among all the ICMP packets * @packet_type: batman-adv packet type, part of the general header * @version: batman-adv protocol version, part of the genereal header @@ -334,6 +347,47 @@ struct batadv_icmp_packet { __be16 seqno; }; +/** + * struct batadv_icmp_tp_packet - ICMP TP Meter packet + * @packet_type: batman-adv packet type, part of the general header + * @version: batman-adv protocol version, part of the genereal header + * @ttl: time to live for this packet, part of the genereal header + * @msg_type: ICMP packet type + * @dst: address of the destination node + * @orig: address of the source node + * @uid: local ICMP socket identifier + * @subtype: TP packet subtype (see batadv_icmp_tp_subtype) + * @session: TP session identifier + * @seqno: the TP sequence number + * @timestamp: time when the packet has been sent. This value is filled in a + * TP_MSG and echoed back in the next TP_ACK so that the sender can compute the + * RTT. Since it is read only by the host which wrote it, there is no need to + * store it using network order + */ +struct batadv_icmp_tp_packet { + u8 packet_type; + u8 version; + u8 ttl; + u8 msg_type; /* see ICMP message types above */ + u8 dst[ETH_ALEN]; + u8 orig[ETH_ALEN]; + u8 uid; + u8 subtype; + u8 session[2]; + __be32 seqno; + __be32 timestamp; +}; + +/** + * enum batadv_icmp_tp_subtype - ICMP TP Meter packet subtypes + * @BATADV_TP_MSG: Msg from sender to receiver + * @BATADV_TP_ACK: acknowledgment from receiver to sender + */ +enum batadv_icmp_tp_subtype { + BATADV_TP_MSG = 0, + BATADV_TP_ACK, +}; + #define BATADV_RR_LEN 16 /** @@ -420,6 +474,7 @@ struct batadv_unicast_4addr_packet { * @dest: final destination used when routing fragments * @orig: originator of the fragment used when merging the packet * @no: fragment number within this sequence + * @priority: priority of frame, from ToS IP precedence or 802.1p * @reserved: reserved byte for alignment * @seqno: sequence identification * @total_size: size of the merged packet @@ -430,9 +485,11 @@ struct batadv_frag_packet { u8 ttl; #if defined(__BIG_ENDIAN_BITFIELD) u8 no:4; - u8 reserved:4; + u8 priority:3; + u8 reserved:1; #elif defined(__LITTLE_ENDIAN_BITFIELD) - u8 reserved:4; + u8 reserved:1; + u8 priority:3; u8 no:4; #else #error "unknown bitfield endianness" diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 396c0134c5ab..af8e11933928 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -40,12 +40,15 @@ #include "fragmentation.h" #include "hard-interface.h" #include "icmp_socket.h" +#include "log.h" #include "network-coding.h" #include "originator.h" #include "packet.h" #include "send.h" #include "soft-interface.h" +#include "tp_meter.h" #include "translation-table.h" +#include "tvlv.h" static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); @@ -268,10 +271,19 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv, icmph->ttl = BATADV_TTL; res = batadv_send_skb_to_orig(skb, orig_node, NULL); - if (res != NET_XMIT_DROP) - ret = NET_RX_SUCCESS; + if (res == -1) + goto out; + + ret = NET_RX_SUCCESS; break; + case BATADV_TP: + if (!pskb_may_pull(skb, sizeof(struct batadv_icmp_tp_packet))) + goto out; + + batadv_tp_meter_recv(bat_priv, skb); + ret = NET_RX_SUCCESS; + goto out; default: /* drop unknown type */ goto out; @@ -290,7 +302,7 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if = NULL; struct batadv_orig_node *orig_node = NULL; struct batadv_icmp_packet *icmp_packet; - int ret = NET_RX_DROP; + int res, ret = NET_RX_DROP; icmp_packet = (struct batadv_icmp_packet *)skb->data; @@ -321,7 +333,8 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, icmp_packet->msg_type = BATADV_TTL_EXCEEDED; icmp_packet->ttl = BATADV_TTL; - if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) + res = batadv_send_skb_to_orig(skb, orig_node, NULL); + if (res != -1) ret = NET_RX_SUCCESS; out: @@ -341,7 +354,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, struct ethhdr *ethhdr; struct batadv_orig_node *orig_node = NULL; int hdr_size = sizeof(struct batadv_icmp_header); - int ret = NET_RX_DROP; + int res, ret = NET_RX_DROP; /* drop packet if it has not necessary minimum size */ if (unlikely(!pskb_may_pull(skb, hdr_size))) @@ -408,7 +421,8 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, icmph->ttl--; /* route it */ - if (batadv_send_skb_to_orig(skb, orig_node, recv_if) != NET_XMIT_DROP) + res = batadv_send_skb_to_orig(skb, orig_node, recv_if); + if (res != -1) ret = NET_RX_SUCCESS; out: @@ -469,7 +483,7 @@ batadv_find_router(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, struct batadv_hard_iface *recv_if) { - struct batadv_algo_ops *bao = bat_priv->bat_algo_ops; + struct batadv_algo_ops *bao = bat_priv->algo_ops; struct batadv_neigh_node *first_candidate_router = NULL; struct batadv_neigh_node *next_candidate_router = NULL; struct batadv_neigh_node *router, *cand_router = NULL; @@ -523,9 +537,9 @@ batadv_find_router(struct batadv_priv *bat_priv, /* alternative candidate should be good enough to be * considered */ - if (!bao->bat_neigh_is_similar_or_better(cand_router, - cand->if_outgoing, - router, recv_if)) + if (!bao->neigh.is_similar_or_better(cand_router, + cand->if_outgoing, router, + recv_if)) goto next; /* don't use the same router twice */ @@ -645,6 +659,8 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, len = skb->len; res = batadv_send_skb_to_orig(skb, orig_node, recv_if); + if (res == -1) + goto out; /* translate transmit result into receive result */ if (res == NET_XMIT_SUCCESS) { @@ -652,13 +668,10 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD); batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES, len + ETH_HLEN); - - ret = NET_RX_SUCCESS; - } else if (res == -EINPROGRESS) { - /* skb was buffered and consumed */ - ret = NET_RX_SUCCESS; } + ret = NET_RX_SUCCESS; + out: if (orig_node) batadv_orig_node_put(orig_node); @@ -1007,6 +1020,8 @@ int batadv_recv_frag_packet(struct sk_buff *skb, if (!orig_node_src) goto out; + skb->priority = frag_packet->priority + 256; + /* Route the fragment if it is not for us and too big to be merged. */ if (!batadv_is_my_mac(bat_priv, frag_packet->dest) && batadv_frag_skb_fwd(skb, recv_if, orig_node_src)) { diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index b1a4e8a811c8..3a10d87b4b76 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -20,10 +20,11 @@ #include <linux/atomic.h> #include <linux/byteorder/generic.h> +#include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/fs.h> -#include <linux/if_ether.h> #include <linux/if.h> +#include <linux/if_ether.h> #include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/kref.h> @@ -42,6 +43,7 @@ #include "fragmentation.h" #include "gateway_client.h" #include "hard-interface.h" +#include "log.h" #include "network-coding.h" #include "originator.h" #include "routing.h" @@ -71,6 +73,7 @@ int batadv_send_skb_packet(struct sk_buff *skb, { struct batadv_priv *bat_priv; struct ethhdr *ethhdr; + int ret; bat_priv = netdev_priv(hard_iface->soft_iface); @@ -108,8 +111,15 @@ int batadv_send_skb_packet(struct sk_buff *skb, /* dev_queue_xmit() returns a negative result on error. However on * congestion and traffic shaping, it drops and returns NET_XMIT_DROP * (which is > 0). This will not be treated as an error. + * + * a negative value cannot be returned because it could be interepreted + * as not consumed skb by callers of batadv_send_skb_to_orig. */ - return dev_queue_xmit(skb); + ret = dev_queue_xmit(skb); + if (ret < 0) + ret = NET_XMIT_DROP; + + return ret; send_skb_err: kfree_skb(skb); return NET_XMIT_DROP; @@ -155,8 +165,11 @@ int batadv_send_unicast_skb(struct sk_buff *skb, * host, NULL can be passed as recv_if and no interface alternating is * attempted. * - * Return: NET_XMIT_SUCCESS on success, NET_XMIT_DROP on failure, or - * -EINPROGRESS if the skb is buffered for later transmit. + * Return: -1 on failure (and the skb is not consumed), -EINPROGRESS if the + * skb is buffered for later transmit or the NET_XMIT status returned by the + * lower routine if the packet has been passed down. + * + * If the returning value is not -1 the skb has been consumed. */ int batadv_send_skb_to_orig(struct sk_buff *skb, struct batadv_orig_node *orig_node, @@ -164,7 +177,7 @@ int batadv_send_skb_to_orig(struct sk_buff *skb, { struct batadv_priv *bat_priv = orig_node->bat_priv; struct batadv_neigh_node *neigh_node; - int ret = NET_XMIT_DROP; + int ret = -1; /* batadv_find_router() increases neigh_nodes refcount if found. */ neigh_node = batadv_find_router(bat_priv, orig_node, recv_if); @@ -177,8 +190,7 @@ int batadv_send_skb_to_orig(struct sk_buff *skb, if (atomic_read(&bat_priv->fragmentation) && skb->len > neigh_node->if_incoming->net_dev->mtu) { /* Fragment and send packet. */ - if (batadv_frag_send_packet(skb, orig_node, neigh_node)) - ret = NET_XMIT_SUCCESS; + ret = batadv_frag_send_packet(skb, orig_node, neigh_node); goto out; } @@ -187,12 +199,10 @@ int batadv_send_skb_to_orig(struct sk_buff *skb, * (i.e. being forwarded). If the packet originates from this node or if * network coding fails, then send the packet as usual. */ - if (recv_if && batadv_nc_skb_forward(skb, neigh_node)) { + if (recv_if && batadv_nc_skb_forward(skb, neigh_node)) ret = -EINPROGRESS; - } else { - batadv_send_unicast_skb(skb, neigh_node); - ret = NET_XMIT_SUCCESS; - } + else + ret = batadv_send_unicast_skb(skb, neigh_node); out: if (neigh_node) @@ -318,7 +328,7 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv, { struct batadv_unicast_packet *unicast_packet; struct ethhdr *ethhdr; - int ret = NET_XMIT_DROP; + int res, ret = NET_XMIT_DROP; if (!orig_node) goto out; @@ -355,7 +365,8 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv, if (batadv_tt_global_client_is_roaming(bat_priv, ethhdr->h_dest, vid)) unicast_packet->ttvn = unicast_packet->ttvn - 1; - if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) + res = batadv_send_skb_to_orig(skb, orig_node, NULL); + if (res != -1) ret = NET_XMIT_SUCCESS; out: @@ -428,27 +439,7 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb, orig_node, vid); } -void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface) -{ - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); - - if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) || - (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)) - return; - - /* the interface gets activated here to avoid race conditions between - * the moment of activating the interface in - * hardif_activate_interface() where the originator mac is set and - * outdated packets (especially uninitialized mac addresses) in the - * packet queue - */ - if (hard_iface->if_status == BATADV_IF_TO_BE_ACTIVATED) - hard_iface->if_status = BATADV_IF_ACTIVE; - - bat_priv->bat_algo_ops->bat_ogm_schedule(hard_iface); -} - -static void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet) +void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet) { kfree_skb(forw_packet->skb); if (forw_packet->if_incoming) @@ -604,45 +595,6 @@ out: atomic_inc(&bat_priv->bcast_queue_left); } -void batadv_send_outstanding_bat_ogm_packet(struct work_struct *work) -{ - struct delayed_work *delayed_work; - struct batadv_forw_packet *forw_packet; - struct batadv_priv *bat_priv; - - delayed_work = to_delayed_work(work); - forw_packet = container_of(delayed_work, struct batadv_forw_packet, - delayed_work); - bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface); - spin_lock_bh(&bat_priv->forw_bat_list_lock); - hlist_del(&forw_packet->list); - spin_unlock_bh(&bat_priv->forw_bat_list_lock); - - if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) - goto out; - - bat_priv->bat_algo_ops->bat_ogm_emit(forw_packet); - - /* we have to have at least one packet in the queue to determine the - * queues wake up time unless we are shutting down. - * - * only re-schedule if this is the "original" copy, e.g. the OGM of the - * primary interface should only be rescheduled once per period, but - * this function will be called for the forw_packet instances of the - * other secondary interfaces as well. - */ - if (forw_packet->own && - forw_packet->if_incoming == forw_packet->if_outgoing) - batadv_schedule_bat_ogm(forw_packet->if_incoming); - -out: - /* don't count own packet */ - if (!forw_packet->own) - atomic_inc(&bat_priv->batman_queue_left); - - batadv_forw_packet_free(forw_packet); -} - void batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, const struct batadv_hard_iface *hard_iface) diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index 6fd7270d8ce6..7cecb7563b45 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -26,8 +26,8 @@ #include "packet.h" struct sk_buff; -struct work_struct; +void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet); int batadv_send_skb_to_orig(struct sk_buff *skb, struct batadv_orig_node *orig_node, struct batadv_hard_iface *recv_if); @@ -38,11 +38,9 @@ int batadv_send_broadcast_skb(struct sk_buff *skb, struct batadv_hard_iface *hard_iface); int batadv_send_unicast_skb(struct sk_buff *skb, struct batadv_neigh_node *neigh_node); -void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface); int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, const struct sk_buff *skb, unsigned long delay); -void batadv_send_outstanding_bat_ogm_packet(struct work_struct *work); void batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, const struct batadv_hard_iface *hard_iface); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 287a3879ed7e..7527c0652dd5 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -48,6 +48,7 @@ #include <linux/types.h> #include <linux/workqueue.h> +#include "bat_algo.h" #include "bridge_loop_avoidance.h" #include "debugfs.h" #include "distributed-arp-table.h" @@ -255,7 +256,7 @@ static int batadv_interface_tx(struct sk_buff *skb, if (batadv_compare_eth(ethhdr->h_dest, ectp_addr)) goto dropped; - gw_mode = atomic_read(&bat_priv->gw_mode); + gw_mode = atomic_read(&bat_priv->gw.mode); if (is_multicast_ether_addr(ethhdr->h_dest)) { /* if gw mode is off, broadcast every packet */ if (gw_mode == BATADV_GW_MODE_OFF) { @@ -808,6 +809,10 @@ static int batadv_softif_init_late(struct net_device *dev) atomic_set(&bat_priv->distributed_arp_table, 1); #endif #ifdef CONFIG_BATMAN_ADV_MCAST + bat_priv->mcast.querier_ipv4.exists = false; + bat_priv->mcast.querier_ipv4.shadowing = false; + bat_priv->mcast.querier_ipv6.exists = false; + bat_priv->mcast.querier_ipv6.shadowing = false; bat_priv->mcast.flags = BATADV_NO_FLAGS; atomic_set(&bat_priv->multicast_mode, 1); atomic_set(&bat_priv->mcast.num_disabled, 0); @@ -815,8 +820,8 @@ static int batadv_softif_init_late(struct net_device *dev) atomic_set(&bat_priv->mcast.num_want_all_ipv4, 0); atomic_set(&bat_priv->mcast.num_want_all_ipv6, 0); #endif - atomic_set(&bat_priv->gw_mode, BATADV_GW_MODE_OFF); - atomic_set(&bat_priv->gw_sel_class, 20); + atomic_set(&bat_priv->gw.mode, BATADV_GW_MODE_OFF); + atomic_set(&bat_priv->gw.sel_class, 20); atomic_set(&bat_priv->gw.bandwidth_down, 100); atomic_set(&bat_priv->gw.bandwidth_up, 20); atomic_set(&bat_priv->orig_interval, 1000); @@ -837,6 +842,8 @@ static int batadv_softif_init_late(struct net_device *dev) #ifdef CONFIG_BATMAN_ADV_BLA atomic_set(&bat_priv->bla.num_requests, 0); #endif + atomic_set(&bat_priv->tp_num, 0); + bat_priv->tt.last_changeset = NULL; bat_priv->tt.last_changeset_len = 0; bat_priv->isolation_mark = 0; diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 414b2074165f..fe9ca94ddee2 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -25,8 +25,8 @@ #include <linux/fs.h> #include <linux/if.h> #include <linux/if_vlan.h> -#include <linux/kref.h> #include <linux/kernel.h> +#include <linux/kref.h> #include <linux/netdevice.h> #include <linux/printk.h> #include <linux/rculist.h> @@ -38,11 +38,12 @@ #include <linux/string.h> #include <linux/stringify.h> +#include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" #include "gateway_client.h" #include "gateway_common.h" -#include "bridge_loop_avoidance.h" #include "hard-interface.h" +#include "log.h" #include "network-coding.h" #include "packet.h" #include "soft-interface.h" @@ -389,12 +390,12 @@ static int batadv_store_uint_attr(const char *buff, size_t count, return count; } -static inline ssize_t -__batadv_store_uint_attr(const char *buff, size_t count, - int min, int max, - void (*post_func)(struct net_device *), - const struct attribute *attr, - atomic_t *attr_store, struct net_device *net_dev) +static ssize_t __batadv_store_uint_attr(const char *buff, size_t count, + int min, int max, + void (*post_func)(struct net_device *), + const struct attribute *attr, + atomic_t *attr_store, + struct net_device *net_dev) { int ret; @@ -411,7 +412,7 @@ static ssize_t batadv_show_bat_algo(struct kobject *kobj, { struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); - return sprintf(buff, "%s\n", bat_priv->bat_algo_ops->name); + return sprintf(buff, "%s\n", bat_priv->algo_ops->name); } static void batadv_post_gw_reselect(struct net_device *net_dev) @@ -427,7 +428,7 @@ static ssize_t batadv_show_gw_mode(struct kobject *kobj, struct attribute *attr, struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); int bytes_written; - switch (atomic_read(&bat_priv->gw_mode)) { + switch (atomic_read(&bat_priv->gw.mode)) { case BATADV_GW_MODE_CLIENT: bytes_written = sprintf(buff, "%s\n", BATADV_GW_MODE_CLIENT_NAME); @@ -476,10 +477,10 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj, return -EINVAL; } - if (atomic_read(&bat_priv->gw_mode) == gw_mode_tmp) + if (atomic_read(&bat_priv->gw.mode) == gw_mode_tmp) return count; - switch (atomic_read(&bat_priv->gw_mode)) { + switch (atomic_read(&bat_priv->gw.mode)) { case BATADV_GW_MODE_CLIENT: curr_gw_mode_str = BATADV_GW_MODE_CLIENT_NAME; break; @@ -508,7 +509,7 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj, * state */ batadv_gw_check_client_stop(bat_priv); - atomic_set(&bat_priv->gw_mode, (unsigned int)gw_mode_tmp); + atomic_set(&bat_priv->gw.mode, (unsigned int)gw_mode_tmp); batadv_gw_tvlv_container_update(bat_priv); return count; } @@ -624,7 +625,7 @@ BATADV_ATTR_SIF_UINT(orig_interval, orig_interval, S_IRUGO | S_IWUSR, 2 * BATADV_JITTER, INT_MAX, NULL); BATADV_ATTR_SIF_UINT(hop_penalty, hop_penalty, S_IRUGO | S_IWUSR, 0, BATADV_TQ_MAX_VALUE, NULL); -BATADV_ATTR_SIF_UINT(gw_sel_class, gw_sel_class, S_IRUGO | S_IWUSR, 1, +BATADV_ATTR_SIF_UINT(gw_sel_class, gw.sel_class, S_IRUGO | S_IWUSR, 1, BATADV_TQ_MAX_VALUE, batadv_post_gw_reselect); static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth, batadv_store_gw_bwidth); diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c new file mode 100644 index 000000000000..2333777f919d --- /dev/null +++ b/net/batman-adv/tp_meter.c @@ -0,0 +1,1507 @@ +/* Copyright (C) 2012-2016 B.A.T.M.A.N. contributors: + * + * Edo Monticelli, Antonio Quartulli + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "tp_meter.h" +#include "main.h" + +#include <linux/atomic.h> +#include <linux/bug.h> +#include <linux/byteorder/generic.h> +#include <linux/cache.h> +#include <linux/compiler.h> +#include <linux/device.h> +#include <linux/etherdevice.h> +#include <linux/fs.h> +#include <linux/if_ether.h> +#include <linux/jiffies.h> +#include <linux/kernel.h> +#include <linux/kref.h> +#include <linux/kthread.h> +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/param.h> +#include <linux/printk.h> +#include <linux/random.h> +#include <linux/rculist.h> +#include <linux/rcupdate.h> +#include <linux/sched.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/stddef.h> +#include <linux/string.h> +#include <linux/timer.h> +#include <linux/wait.h> +#include <linux/workqueue.h> +#include <uapi/linux/batman_adv.h> + +#include "hard-interface.h" +#include "log.h" +#include "netlink.h" +#include "originator.h" +#include "packet.h" +#include "send.h" + +/** + * BATADV_TP_DEF_TEST_LENGTH - Default test length if not specified by the user + * in milliseconds + */ +#define BATADV_TP_DEF_TEST_LENGTH 10000 + +/** + * BATADV_TP_AWND - Advertised window by the receiver (in bytes) + */ +#define BATADV_TP_AWND 0x20000000 + +/** + * BATADV_TP_RECV_TIMEOUT - Receiver activity timeout. If the receiver does not + * get anything for such amount of milliseconds, the connection is killed + */ +#define BATADV_TP_RECV_TIMEOUT 1000 + +/** + * BATADV_TP_MAX_RTO - Maximum sender timeout. If the sender RTO gets beyond + * such amound of milliseconds, the receiver is considered unreachable and the + * connection is killed + */ +#define BATADV_TP_MAX_RTO 30000 + +/** + * BATADV_TP_FIRST_SEQ - First seqno of each session. The number is rather high + * in order to immediately trigger a wrap around (test purposes) + */ +#define BATADV_TP_FIRST_SEQ ((u32)-1 - 2000) + +/** + * BATADV_TP_PLEN - length of the payload (data after the batadv_unicast header) + * to simulate + */ +#define BATADV_TP_PLEN (BATADV_TP_PACKET_LEN - ETH_HLEN - \ + sizeof(struct batadv_unicast_packet)) + +static u8 batadv_tp_prerandom[4096] __read_mostly; + +/** + * batadv_tp_session_cookie - generate session cookie based on session ids + * @session: TP session identifier + * @icmp_uid: icmp pseudo uid of the tp session + * + * Return: 32 bit tp_meter session cookie + */ +static u32 batadv_tp_session_cookie(const u8 session[2], u8 icmp_uid) +{ + u32 cookie; + + cookie = icmp_uid << 16; + cookie |= session[0] << 8; + cookie |= session[1]; + + return cookie; +} + +/** + * batadv_tp_cwnd - compute the new cwnd size + * @base: base cwnd size value + * @increment: the value to add to base to get the new size + * @min: minumim cwnd value (usually MSS) + * + * Return the new cwnd size and ensures it does not exceed the Advertised + * Receiver Window size. It is wrap around safe. + * For details refer to Section 3.1 of RFC5681 + * + * Return: new congestion window size in bytes + */ +static u32 batadv_tp_cwnd(u32 base, u32 increment, u32 min) +{ + u32 new_size = base + increment; + + /* check for wrap-around */ + if (new_size < base) + new_size = (u32)ULONG_MAX; + + new_size = min_t(u32, new_size, BATADV_TP_AWND); + + return max_t(u32, new_size, min); +} + +/** + * batadv_tp_updated_cwnd - update the Congestion Windows + * @tp_vars: the private data of the current TP meter session + * @mss: maximum segment size of transmission + * + * 1) if the session is in Slow Start, the CWND has to be increased by 1 + * MSS every unique received ACK + * 2) if the session is in Congestion Avoidance, the CWND has to be + * increased by MSS * MSS / CWND for every unique received ACK + */ +static void batadv_tp_update_cwnd(struct batadv_tp_vars *tp_vars, u32 mss) +{ + spin_lock_bh(&tp_vars->cwnd_lock); + + /* slow start... */ + if (tp_vars->cwnd <= tp_vars->ss_threshold) { + tp_vars->dec_cwnd = 0; + tp_vars->cwnd = batadv_tp_cwnd(tp_vars->cwnd, mss, mss); + spin_unlock_bh(&tp_vars->cwnd_lock); + return; + } + + /* increment CWND at least of 1 (section 3.1 of RFC5681) */ + tp_vars->dec_cwnd += max_t(u32, 1U << 3, + ((mss * mss) << 6) / (tp_vars->cwnd << 3)); + if (tp_vars->dec_cwnd < (mss << 3)) { + spin_unlock_bh(&tp_vars->cwnd_lock); + return; + } + + tp_vars->cwnd = batadv_tp_cwnd(tp_vars->cwnd, mss, mss); + tp_vars->dec_cwnd = 0; + + spin_unlock_bh(&tp_vars->cwnd_lock); +} + +/** + * batadv_tp_update_rto - calculate new retransmission timeout + * @tp_vars: the private data of the current TP meter session + * @new_rtt: new roundtrip time in msec + */ +static void batadv_tp_update_rto(struct batadv_tp_vars *tp_vars, + u32 new_rtt) +{ + long m = new_rtt; + + /* RTT update + * Details in Section 2.2 and 2.3 of RFC6298 + * + * It's tricky to understand. Don't lose hair please. + * Inspired by tcp_rtt_estimator() tcp_input.c + */ + if (tp_vars->srtt != 0) { + m -= (tp_vars->srtt >> 3); /* m is now error in rtt est */ + tp_vars->srtt += m; /* rtt = 7/8 srtt + 1/8 new */ + if (m < 0) + m = -m; + + m -= (tp_vars->rttvar >> 2); + tp_vars->rttvar += m; /* mdev ~= 3/4 rttvar + 1/4 new */ + } else { + /* first measure getting in */ + tp_vars->srtt = m << 3; /* take the measured time to be srtt */ + tp_vars->rttvar = m << 1; /* new_rtt / 2 */ + } + + /* rto = srtt + 4 * rttvar. + * rttvar is scaled by 4, therefore doesn't need to be multiplied + */ + tp_vars->rto = (tp_vars->srtt >> 3) + tp_vars->rttvar; +} + +/** + * batadv_tp_batctl_notify - send client status result to client + * @reason: reason for tp meter session stop + * @dst: destination of tp_meter session + * @bat_priv: the bat priv with all the soft interface information + * @start_time: start of transmission in jiffies + * @total_sent: bytes acked to the receiver + * @cookie: cookie of tp_meter session + */ +static void batadv_tp_batctl_notify(enum batadv_tp_meter_reason reason, + const u8 *dst, struct batadv_priv *bat_priv, + unsigned long start_time, u64 total_sent, + u32 cookie) +{ + u32 test_time; + u8 result; + u32 total_bytes; + + if (!batadv_tp_is_error(reason)) { + result = BATADV_TP_REASON_COMPLETE; + test_time = jiffies_to_msecs(jiffies - start_time); + total_bytes = total_sent; + } else { + result = reason; + test_time = 0; + total_bytes = 0; + } + + batadv_netlink_tpmeter_notify(bat_priv, dst, result, test_time, + total_bytes, cookie); +} + +/** + * batadv_tp_batctl_error_notify - send client error result to client + * @reason: reason for tp meter session stop + * @dst: destination of tp_meter session + * @bat_priv: the bat priv with all the soft interface information + * @cookie: cookie of tp_meter session + */ +static void batadv_tp_batctl_error_notify(enum batadv_tp_meter_reason reason, + const u8 *dst, + struct batadv_priv *bat_priv, + u32 cookie) +{ + batadv_tp_batctl_notify(reason, dst, bat_priv, 0, 0, cookie); +} + +/** + * batadv_tp_list_find - find a tp_vars object in the global list + * @bat_priv: the bat priv with all the soft interface information + * @dst: the other endpoint MAC address to look for + * + * Look for a tp_vars object matching dst as end_point and return it after + * having incremented the refcounter. Return NULL is not found + * + * Return: matching tp_vars or NULL when no tp_vars with @dst was found + */ +static struct batadv_tp_vars *batadv_tp_list_find(struct batadv_priv *bat_priv, + const u8 *dst) +{ + struct batadv_tp_vars *pos, *tp_vars = NULL; + + rcu_read_lock(); + hlist_for_each_entry_rcu(pos, &bat_priv->tp_list, list) { + if (!batadv_compare_eth(pos->other_end, dst)) + continue; + + /* most of the time this function is invoked during the normal + * process..it makes sens to pay more when the session is + * finished and to speed the process up during the measurement + */ + if (unlikely(!kref_get_unless_zero(&pos->refcount))) + continue; + + tp_vars = pos; + break; + } + rcu_read_unlock(); + + return tp_vars; +} + +/** + * batadv_tp_list_find_session - find tp_vars session object in the global list + * @bat_priv: the bat priv with all the soft interface information + * @dst: the other endpoint MAC address to look for + * @session: session identifier + * + * Look for a tp_vars object matching dst as end_point, session as tp meter + * session and return it after having incremented the refcounter. Return NULL + * is not found + * + * Return: matching tp_vars or NULL when no tp_vars was found + */ +static struct batadv_tp_vars * +batadv_tp_list_find_session(struct batadv_priv *bat_priv, const u8 *dst, + const u8 *session) +{ + struct batadv_tp_vars *pos, *tp_vars = NULL; + + rcu_read_lock(); + hlist_for_each_entry_rcu(pos, &bat_priv->tp_list, list) { + if (!batadv_compare_eth(pos->other_end, dst)) + continue; + + if (memcmp(pos->session, session, sizeof(pos->session)) != 0) + continue; + + /* most of the time this function is invoked during the normal + * process..it makes sense to pay more when the session is + * finished and to speed the process up during the measurement + */ + if (unlikely(!kref_get_unless_zero(&pos->refcount))) + continue; + + tp_vars = pos; + break; + } + rcu_read_unlock(); + + return tp_vars; +} + +/** + * batadv_tp_vars_release - release batadv_tp_vars from lists and queue for + * free after rcu grace period + * @ref: kref pointer of the batadv_tp_vars + */ +static void batadv_tp_vars_release(struct kref *ref) +{ + struct batadv_tp_vars *tp_vars; + struct batadv_tp_unacked *un, *safe; + + tp_vars = container_of(ref, struct batadv_tp_vars, refcount); + + /* lock should not be needed because this object is now out of any + * context! + */ + spin_lock_bh(&tp_vars->unacked_lock); + list_for_each_entry_safe(un, safe, &tp_vars->unacked_list, list) { + list_del(&un->list); + kfree(un); + } + spin_unlock_bh(&tp_vars->unacked_lock); + + kfree_rcu(tp_vars, rcu); +} + +/** + * batadv_tp_vars_put - decrement the batadv_tp_vars refcounter and possibly + * release it + * @tp_vars: the private data of the current TP meter session to be free'd + */ +static void batadv_tp_vars_put(struct batadv_tp_vars *tp_vars) +{ + kref_put(&tp_vars->refcount, batadv_tp_vars_release); +} + +/** + * batadv_tp_sender_cleanup - cleanup sender data and drop and timer + * @bat_priv: the bat priv with all the soft interface information + * @tp_vars: the private data of the current TP meter session to cleanup + */ +static void batadv_tp_sender_cleanup(struct batadv_priv *bat_priv, + struct batadv_tp_vars *tp_vars) +{ + cancel_delayed_work(&tp_vars->finish_work); + + spin_lock_bh(&tp_vars->bat_priv->tp_list_lock); + hlist_del_rcu(&tp_vars->list); + spin_unlock_bh(&tp_vars->bat_priv->tp_list_lock); + + /* drop list reference */ + batadv_tp_vars_put(tp_vars); + + atomic_dec(&tp_vars->bat_priv->tp_num); + + /* kill the timer and remove its reference */ + del_timer_sync(&tp_vars->timer); + /* the worker might have rearmed itself therefore we kill it again. Note + * that if the worker should run again before invoking the following + * del_timer(), it would not re-arm itself once again because the status + * is OFF now + */ + del_timer(&tp_vars->timer); + batadv_tp_vars_put(tp_vars); +} + +/** + * batadv_tp_sender_end - print info about ended session and inform client + * @bat_priv: the bat priv with all the soft interface information + * @tp_vars: the private data of the current TP meter session + */ +static void batadv_tp_sender_end(struct batadv_priv *bat_priv, + struct batadv_tp_vars *tp_vars) +{ + u32 session_cookie; + + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Test towards %pM finished..shutting down (reason=%d)\n", + tp_vars->other_end, tp_vars->reason); + + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Last timing stats: SRTT=%ums RTTVAR=%ums RTO=%ums\n", + tp_vars->srtt >> 3, tp_vars->rttvar >> 2, tp_vars->rto); + + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Final values: cwnd=%u ss_threshold=%u\n", + tp_vars->cwnd, tp_vars->ss_threshold); + + session_cookie = batadv_tp_session_cookie(tp_vars->session, + tp_vars->icmp_uid); + + batadv_tp_batctl_notify(tp_vars->reason, + tp_vars->other_end, + bat_priv, + tp_vars->start_time, + atomic64_read(&tp_vars->tot_sent), + session_cookie); +} + +/** + * batadv_tp_sender_shutdown - let sender thread/timer stop gracefully + * @tp_vars: the private data of the current TP meter session + * @reason: reason for tp meter session stop + */ +static void batadv_tp_sender_shutdown(struct batadv_tp_vars *tp_vars, + enum batadv_tp_meter_reason reason) +{ + if (!atomic_dec_and_test(&tp_vars->sending)) + return; + + tp_vars->reason = reason; +} + +/** + * batadv_tp_sender_finish - stop sender session after test_length was reached + * @work: delayed work reference of the related tp_vars + */ +static void batadv_tp_sender_finish(struct work_struct *work) +{ + struct delayed_work *delayed_work; + struct batadv_tp_vars *tp_vars; + + delayed_work = to_delayed_work(work); + tp_vars = container_of(delayed_work, struct batadv_tp_vars, + finish_work); + + batadv_tp_sender_shutdown(tp_vars, BATADV_TP_REASON_COMPLETE); +} + +/** + * batadv_tp_reset_sender_timer - reschedule the sender timer + * @tp_vars: the private TP meter data for this session + * + * Reschedule the timer using tp_vars->rto as delay + */ +static void batadv_tp_reset_sender_timer(struct batadv_tp_vars *tp_vars) +{ + /* most of the time this function is invoked while normal packet + * reception... + */ + if (unlikely(atomic_read(&tp_vars->sending) == 0)) + /* timer ref will be dropped in batadv_tp_sender_cleanup */ + return; + + mod_timer(&tp_vars->timer, jiffies + msecs_to_jiffies(tp_vars->rto)); +} + +/** + * batadv_tp_sender_timeout - timer that fires in case of packet loss + * @arg: address of the related tp_vars + * + * If fired it means that there was packet loss. + * Switch to Slow Start, set the ss_threshold to half of the current cwnd and + * reset the cwnd to 3*MSS + */ +static void batadv_tp_sender_timeout(unsigned long arg) +{ + struct batadv_tp_vars *tp_vars = (struct batadv_tp_vars *)arg; + struct batadv_priv *bat_priv = tp_vars->bat_priv; + + if (atomic_read(&tp_vars->sending) == 0) + return; + + /* if the user waited long enough...shutdown the test */ + if (unlikely(tp_vars->rto >= BATADV_TP_MAX_RTO)) { + batadv_tp_sender_shutdown(tp_vars, + BATADV_TP_REASON_DST_UNREACHABLE); + return; + } + + /* RTO exponential backoff + * Details in Section 5.5 of RFC6298 + */ + tp_vars->rto <<= 1; + + spin_lock_bh(&tp_vars->cwnd_lock); + + tp_vars->ss_threshold = tp_vars->cwnd >> 1; + if (tp_vars->ss_threshold < BATADV_TP_PLEN * 2) + tp_vars->ss_threshold = BATADV_TP_PLEN * 2; + + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Meter: RTO fired during test towards %pM! cwnd=%u new ss_thr=%u, resetting last_sent to %u\n", + tp_vars->other_end, tp_vars->cwnd, tp_vars->ss_threshold, + atomic_read(&tp_vars->last_acked)); + + tp_vars->cwnd = BATADV_TP_PLEN * 3; + + spin_unlock_bh(&tp_vars->cwnd_lock); + + /* resend the non-ACKed packets.. */ + tp_vars->last_sent = atomic_read(&tp_vars->last_acked); + wake_up(&tp_vars->more_bytes); + + batadv_tp_reset_sender_timer(tp_vars); +} + +/** + * batadv_tp_fill_prerandom - Fill buffer with prefetched random bytes + * @tp_vars: the private TP meter data for this session + * @buf: Buffer to fill with bytes + * @nbytes: amount of pseudorandom bytes + */ +static void batadv_tp_fill_prerandom(struct batadv_tp_vars *tp_vars, + u8 *buf, size_t nbytes) +{ + u32 local_offset; + size_t bytes_inbuf; + size_t to_copy; + size_t pos = 0; + + spin_lock_bh(&tp_vars->prerandom_lock); + local_offset = tp_vars->prerandom_offset; + tp_vars->prerandom_offset += nbytes; + tp_vars->prerandom_offset %= sizeof(batadv_tp_prerandom); + spin_unlock_bh(&tp_vars->prerandom_lock); + + while (nbytes) { + local_offset %= sizeof(batadv_tp_prerandom); + bytes_inbuf = sizeof(batadv_tp_prerandom) - local_offset; + to_copy = min(nbytes, bytes_inbuf); + + memcpy(&buf[pos], &batadv_tp_prerandom[local_offset], to_copy); + pos += to_copy; + nbytes -= to_copy; + local_offset = 0; + } +} + +/** + * batadv_tp_send_msg - send a single message + * @tp_vars: the private TP meter data for this session + * @src: source mac address + * @orig_node: the originator of the destination + * @seqno: sequence number of this packet + * @len: length of the entire packet + * @session: session identifier + * @uid: local ICMP "socket" index + * @timestamp: timestamp in jiffies which is replied in ack + * + * Create and send a single TP Meter message. + * + * Return: 0 on success, BATADV_TP_REASON_DST_UNREACHABLE if the destination is + * not reachable, BATADV_TP_REASON_MEMORY_ERROR if the packet couldn't be + * allocated + */ +static int batadv_tp_send_msg(struct batadv_tp_vars *tp_vars, const u8 *src, + struct batadv_orig_node *orig_node, + u32 seqno, size_t len, const u8 *session, + int uid, u32 timestamp) +{ + struct batadv_icmp_tp_packet *icmp; + struct sk_buff *skb; + int r; + u8 *data; + size_t data_len; + + skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); + if (unlikely(!skb)) + return BATADV_TP_REASON_MEMORY_ERROR; + + skb_reserve(skb, ETH_HLEN); + icmp = (struct batadv_icmp_tp_packet *)skb_put(skb, sizeof(*icmp)); + + /* fill the icmp header */ + ether_addr_copy(icmp->dst, orig_node->orig); + ether_addr_copy(icmp->orig, src); + icmp->version = BATADV_COMPAT_VERSION; + icmp->packet_type = BATADV_ICMP; + icmp->ttl = BATADV_TTL; + icmp->msg_type = BATADV_TP; + icmp->uid = uid; + + icmp->subtype = BATADV_TP_MSG; + memcpy(icmp->session, session, sizeof(icmp->session)); + icmp->seqno = htonl(seqno); + icmp->timestamp = htonl(timestamp); + + data_len = len - sizeof(*icmp); + data = (u8 *)skb_put(skb, data_len); + batadv_tp_fill_prerandom(tp_vars, data, data_len); + + r = batadv_send_skb_to_orig(skb, orig_node, NULL); + if (r == -1) + kfree_skb(skb); + + if (r == NET_XMIT_SUCCESS) + return 0; + + return BATADV_TP_REASON_CANT_SEND; +} + +/** + * batadv_tp_recv_ack - ACK receiving function + * @bat_priv: the bat priv with all the soft interface information + * @skb: the buffer containing the received packet + * + * Process a received TP ACK packet + */ +static void batadv_tp_recv_ack(struct batadv_priv *bat_priv, + const struct sk_buff *skb) +{ + struct batadv_hard_iface *primary_if = NULL; + struct batadv_orig_node *orig_node = NULL; + const struct batadv_icmp_tp_packet *icmp; + struct batadv_tp_vars *tp_vars; + size_t packet_len, mss; + u32 rtt, recv_ack, cwnd; + unsigned char *dev_addr; + + packet_len = BATADV_TP_PLEN; + mss = BATADV_TP_PLEN; + packet_len += sizeof(struct batadv_unicast_packet); + + icmp = (struct batadv_icmp_tp_packet *)skb->data; + + /* find the tp_vars */ + tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig, + icmp->session); + if (unlikely(!tp_vars)) + return; + + if (unlikely(atomic_read(&tp_vars->sending) == 0)) + goto out; + + /* old ACK? silently drop it.. */ + if (batadv_seq_before(ntohl(icmp->seqno), + (u32)atomic_read(&tp_vars->last_acked))) + goto out; + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (unlikely(!primary_if)) + goto out; + + orig_node = batadv_orig_hash_find(bat_priv, icmp->orig); + if (unlikely(!orig_node)) + goto out; + + /* update RTO with the new sampled RTT, if any */ + rtt = jiffies_to_msecs(jiffies) - ntohl(icmp->timestamp); + if (icmp->timestamp && rtt) + batadv_tp_update_rto(tp_vars, rtt); + + /* ACK for new data... reset the timer */ + batadv_tp_reset_sender_timer(tp_vars); + + recv_ack = ntohl(icmp->seqno); + + /* check if this ACK is a duplicate */ + if (atomic_read(&tp_vars->last_acked) == recv_ack) { + atomic_inc(&tp_vars->dup_acks); + if (atomic_read(&tp_vars->dup_acks) != 3) + goto out; + + if (recv_ack >= tp_vars->recover) + goto out; + + /* if this is the third duplicate ACK do Fast Retransmit */ + batadv_tp_send_msg(tp_vars, primary_if->net_dev->dev_addr, + orig_node, recv_ack, packet_len, + icmp->session, icmp->uid, + jiffies_to_msecs(jiffies)); + + spin_lock_bh(&tp_vars->cwnd_lock); + + /* Fast Recovery */ + tp_vars->fast_recovery = true; + /* Set recover to the last outstanding seqno when Fast Recovery + * is entered. RFC6582, Section 3.2, step 1 + */ + tp_vars->recover = tp_vars->last_sent; + tp_vars->ss_threshold = tp_vars->cwnd >> 1; + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Meter: Fast Recovery, (cur cwnd=%u) ss_thr=%u last_sent=%u recv_ack=%u\n", + tp_vars->cwnd, tp_vars->ss_threshold, + tp_vars->last_sent, recv_ack); + tp_vars->cwnd = batadv_tp_cwnd(tp_vars->ss_threshold, 3 * mss, + mss); + tp_vars->dec_cwnd = 0; + tp_vars->last_sent = recv_ack; + + spin_unlock_bh(&tp_vars->cwnd_lock); + } else { + /* count the acked data */ + atomic64_add(recv_ack - atomic_read(&tp_vars->last_acked), + &tp_vars->tot_sent); + /* reset the duplicate ACKs counter */ + atomic_set(&tp_vars->dup_acks, 0); + + if (tp_vars->fast_recovery) { + /* partial ACK */ + if (batadv_seq_before(recv_ack, tp_vars->recover)) { + /* this is another hole in the window. React + * immediately as specified by NewReno (see + * Section 3.2 of RFC6582 for details) + */ + dev_addr = primary_if->net_dev->dev_addr; + batadv_tp_send_msg(tp_vars, dev_addr, + orig_node, recv_ack, + packet_len, icmp->session, + icmp->uid, + jiffies_to_msecs(jiffies)); + tp_vars->cwnd = batadv_tp_cwnd(tp_vars->cwnd, + mss, mss); + } else { + tp_vars->fast_recovery = false; + /* set cwnd to the value of ss_threshold at the + * moment that Fast Recovery was entered. + * RFC6582, Section 3.2, step 3 + */ + cwnd = batadv_tp_cwnd(tp_vars->ss_threshold, 0, + mss); + tp_vars->cwnd = cwnd; + } + goto move_twnd; + } + + if (recv_ack - atomic_read(&tp_vars->last_acked) >= mss) + batadv_tp_update_cwnd(tp_vars, mss); +move_twnd: + /* move the Transmit Window */ + atomic_set(&tp_vars->last_acked, recv_ack); + } + + wake_up(&tp_vars->more_bytes); +out: + if (likely(primary_if)) + batadv_hardif_put(primary_if); + if (likely(orig_node)) + batadv_orig_node_put(orig_node); + if (likely(tp_vars)) + batadv_tp_vars_put(tp_vars); +} + +/** + * batadv_tp_avail - check if congestion window is not full + * @tp_vars: the private data of the current TP meter session + * @payload_len: size of the payload of a single message + * + * Return: true when congestion window is not full, false otherwise + */ +static bool batadv_tp_avail(struct batadv_tp_vars *tp_vars, + size_t payload_len) +{ + u32 win_left, win_limit; + + win_limit = atomic_read(&tp_vars->last_acked) + tp_vars->cwnd; + win_left = win_limit - tp_vars->last_sent; + + return win_left >= payload_len; +} + +/** + * batadv_tp_wait_available - wait until congestion window becomes free or + * timeout is reached + * @tp_vars: the private data of the current TP meter session + * @plen: size of the payload of a single message + * + * Return: 0 if the condition evaluated to false after the timeout elapsed, + * 1 if the condition evaluated to true after the timeout elapsed, the + * remaining jiffies (at least 1) if the condition evaluated to true before + * the timeout elapsed, or -ERESTARTSYS if it was interrupted by a signal. + */ +static int batadv_tp_wait_available(struct batadv_tp_vars *tp_vars, size_t plen) +{ + int ret; + + ret = wait_event_interruptible_timeout(tp_vars->more_bytes, + batadv_tp_avail(tp_vars, plen), + HZ / 10); + + return ret; +} + +/** + * batadv_tp_send - main sending thread of a tp meter session + * @arg: address of the related tp_vars + * + * Return: nothing, this function never returns + */ +static int batadv_tp_send(void *arg) +{ + struct batadv_tp_vars *tp_vars = arg; + struct batadv_priv *bat_priv = tp_vars->bat_priv; + struct batadv_hard_iface *primary_if = NULL; + struct batadv_orig_node *orig_node = NULL; + size_t payload_len, packet_len; + int err = 0; + + if (unlikely(tp_vars->role != BATADV_TP_SENDER)) { + err = BATADV_TP_REASON_DST_UNREACHABLE; + tp_vars->reason = err; + goto out; + } + + orig_node = batadv_orig_hash_find(bat_priv, tp_vars->other_end); + if (unlikely(!orig_node)) { + err = BATADV_TP_REASON_DST_UNREACHABLE; + tp_vars->reason = err; + goto out; + } + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (unlikely(!primary_if)) { + err = BATADV_TP_REASON_DST_UNREACHABLE; + goto out; + } + + /* assume that all the hard_interfaces have a correctly + * configured MTU, so use the soft_iface MTU as MSS. + * This might not be true and in that case the fragmentation + * should be used. + * Now, try to send the packet as it is + */ + payload_len = BATADV_TP_PLEN; + BUILD_BUG_ON(sizeof(struct batadv_icmp_tp_packet) > BATADV_TP_PLEN); + + batadv_tp_reset_sender_timer(tp_vars); + + /* queue the worker in charge of terminating the test */ + queue_delayed_work(batadv_event_workqueue, &tp_vars->finish_work, + msecs_to_jiffies(tp_vars->test_length)); + + while (atomic_read(&tp_vars->sending) != 0) { + if (unlikely(!batadv_tp_avail(tp_vars, payload_len))) { + batadv_tp_wait_available(tp_vars, payload_len); + continue; + } + + /* to emulate normal unicast traffic, add to the payload len + * the size of the unicast header + */ + packet_len = payload_len + sizeof(struct batadv_unicast_packet); + + err = batadv_tp_send_msg(tp_vars, primary_if->net_dev->dev_addr, + orig_node, tp_vars->last_sent, + packet_len, + tp_vars->session, tp_vars->icmp_uid, + jiffies_to_msecs(jiffies)); + + /* something went wrong during the preparation/transmission */ + if (unlikely(err && err != BATADV_TP_REASON_CANT_SEND)) { + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Meter: batadv_tp_send() cannot send packets (%d)\n", + err); + /* ensure nobody else tries to stop the thread now */ + if (atomic_dec_and_test(&tp_vars->sending)) + tp_vars->reason = err; + break; + } + + /* right-shift the TWND */ + if (!err) + tp_vars->last_sent += payload_len; + + cond_resched(); + } + +out: + if (likely(primary_if)) + batadv_hardif_put(primary_if); + if (likely(orig_node)) + batadv_orig_node_put(orig_node); + + batadv_tp_sender_end(bat_priv, tp_vars); + batadv_tp_sender_cleanup(bat_priv, tp_vars); + + batadv_tp_vars_put(tp_vars); + + do_exit(0); +} + +/** + * batadv_tp_start_kthread - start new thread which manages the tp meter sender + * @tp_vars: the private data of the current TP meter session + */ +static void batadv_tp_start_kthread(struct batadv_tp_vars *tp_vars) +{ + struct task_struct *kthread; + struct batadv_priv *bat_priv = tp_vars->bat_priv; + u32 session_cookie; + + kref_get(&tp_vars->refcount); + kthread = kthread_create(batadv_tp_send, tp_vars, "kbatadv_tp_meter"); + if (IS_ERR(kthread)) { + session_cookie = batadv_tp_session_cookie(tp_vars->session, + tp_vars->icmp_uid); + pr_err("batadv: cannot create tp meter kthread\n"); + batadv_tp_batctl_error_notify(BATADV_TP_REASON_MEMORY_ERROR, + tp_vars->other_end, + bat_priv, session_cookie); + + /* drop reserved reference for kthread */ + batadv_tp_vars_put(tp_vars); + + /* cleanup of failed tp meter variables */ + batadv_tp_sender_cleanup(bat_priv, tp_vars); + return; + } + + wake_up_process(kthread); +} + +/** + * batadv_tp_start - start a new tp meter session + * @bat_priv: the bat priv with all the soft interface information + * @dst: the receiver MAC address + * @test_length: test length in milliseconds + * @cookie: session cookie + */ +void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, + u32 test_length, u32 *cookie) +{ + struct batadv_tp_vars *tp_vars; + u8 session_id[2]; + u8 icmp_uid; + u32 session_cookie; + + get_random_bytes(session_id, sizeof(session_id)); + get_random_bytes(&icmp_uid, 1); + session_cookie = batadv_tp_session_cookie(session_id, icmp_uid); + *cookie = session_cookie; + + /* look for an already existing test towards this node */ + spin_lock_bh(&bat_priv->tp_list_lock); + tp_vars = batadv_tp_list_find(bat_priv, dst); + if (tp_vars) { + spin_unlock_bh(&bat_priv->tp_list_lock); + batadv_tp_vars_put(tp_vars); + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Meter: test to or from the same node already ongoing, aborting\n"); + batadv_tp_batctl_error_notify(BATADV_TP_REASON_ALREADY_ONGOING, + dst, bat_priv, session_cookie); + return; + } + + if (!atomic_add_unless(&bat_priv->tp_num, 1, BATADV_TP_MAX_NUM)) { + spin_unlock_bh(&bat_priv->tp_list_lock); + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Meter: too many ongoing sessions, aborting (SEND)\n"); + batadv_tp_batctl_error_notify(BATADV_TP_REASON_TOO_MANY, dst, + bat_priv, session_cookie); + return; + } + + tp_vars = kmalloc(sizeof(*tp_vars), GFP_ATOMIC); + if (!tp_vars) { + spin_unlock_bh(&bat_priv->tp_list_lock); + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Meter: batadv_tp_start cannot allocate list elements\n"); + batadv_tp_batctl_error_notify(BATADV_TP_REASON_MEMORY_ERROR, + dst, bat_priv, session_cookie); + return; + } + + /* initialize tp_vars */ + ether_addr_copy(tp_vars->other_end, dst); + kref_init(&tp_vars->refcount); + tp_vars->role = BATADV_TP_SENDER; + atomic_set(&tp_vars->sending, 1); + memcpy(tp_vars->session, session_id, sizeof(session_id)); + tp_vars->icmp_uid = icmp_uid; + + tp_vars->last_sent = BATADV_TP_FIRST_SEQ; + atomic_set(&tp_vars->last_acked, BATADV_TP_FIRST_SEQ); + tp_vars->fast_recovery = false; + tp_vars->recover = BATADV_TP_FIRST_SEQ; + + /* initialise the CWND to 3*MSS (Section 3.1 in RFC5681). + * For batman-adv the MSS is the size of the payload received by the + * soft_interface, hence its MTU + */ + tp_vars->cwnd = BATADV_TP_PLEN * 3; + /* at the beginning initialise the SS threshold to the biggest possible + * window size, hence the AWND size + */ + tp_vars->ss_threshold = BATADV_TP_AWND; + + /* RTO initial value is 3 seconds. + * Details in Section 2.1 of RFC6298 + */ + tp_vars->rto = 1000; + tp_vars->srtt = 0; + tp_vars->rttvar = 0; + + atomic64_set(&tp_vars->tot_sent, 0); + + kref_get(&tp_vars->refcount); + setup_timer(&tp_vars->timer, batadv_tp_sender_timeout, + (unsigned long)tp_vars); + + tp_vars->bat_priv = bat_priv; + tp_vars->start_time = jiffies; + + init_waitqueue_head(&tp_vars->more_bytes); + + spin_lock_init(&tp_vars->unacked_lock); + INIT_LIST_HEAD(&tp_vars->unacked_list); + + spin_lock_init(&tp_vars->cwnd_lock); + + tp_vars->prerandom_offset = 0; + spin_lock_init(&tp_vars->prerandom_lock); + + kref_get(&tp_vars->refcount); + hlist_add_head_rcu(&tp_vars->list, &bat_priv->tp_list); + spin_unlock_bh(&bat_priv->tp_list_lock); + + tp_vars->test_length = test_length; + if (!tp_vars->test_length) + tp_vars->test_length = BATADV_TP_DEF_TEST_LENGTH; + + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Meter: starting throughput meter towards %pM (length=%ums)\n", + dst, test_length); + + /* init work item for finished tp tests */ + INIT_DELAYED_WORK(&tp_vars->finish_work, batadv_tp_sender_finish); + + /* start tp kthread. This way the write() call issued from userspace can + * happily return and avoid to block + */ + batadv_tp_start_kthread(tp_vars); + + /* don't return reference to new tp_vars */ + batadv_tp_vars_put(tp_vars); +} + +/** + * batadv_tp_stop - stop currently running tp meter session + * @bat_priv: the bat priv with all the soft interface information + * @dst: the receiver MAC address + * @return_value: reason for tp meter session stop + */ +void batadv_tp_stop(struct batadv_priv *bat_priv, const u8 *dst, + u8 return_value) +{ + struct batadv_orig_node *orig_node; + struct batadv_tp_vars *tp_vars; + + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Meter: stopping test towards %pM\n", dst); + + orig_node = batadv_orig_hash_find(bat_priv, dst); + if (!orig_node) + return; + + tp_vars = batadv_tp_list_find(bat_priv, orig_node->orig); + if (!tp_vars) { + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Meter: trying to interrupt an already over connection\n"); + goto out; + } + + batadv_tp_sender_shutdown(tp_vars, return_value); + batadv_tp_vars_put(tp_vars); +out: + batadv_orig_node_put(orig_node); +} + +/** + * batadv_tp_reset_receiver_timer - reset the receiver shutdown timer + * @tp_vars: the private data of the current TP meter session + * + * start the receiver shutdown timer or reset it if already started + */ +static void batadv_tp_reset_receiver_timer(struct batadv_tp_vars *tp_vars) +{ + mod_timer(&tp_vars->timer, + jiffies + msecs_to_jiffies(BATADV_TP_RECV_TIMEOUT)); +} + +/** + * batadv_tp_receiver_shutdown - stop a tp meter receiver when timeout is + * reached without received ack + * @arg: address of the related tp_vars + */ +static void batadv_tp_receiver_shutdown(unsigned long arg) +{ + struct batadv_tp_vars *tp_vars = (struct batadv_tp_vars *)arg; + struct batadv_tp_unacked *un, *safe; + struct batadv_priv *bat_priv; + + bat_priv = tp_vars->bat_priv; + + /* if there is recent activity rearm the timer */ + if (!batadv_has_timed_out(tp_vars->last_recv_time, + BATADV_TP_RECV_TIMEOUT)) { + /* reset the receiver shutdown timer */ + batadv_tp_reset_receiver_timer(tp_vars); + return; + } + + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Shutting down for inactivity (more than %dms) from %pM\n", + BATADV_TP_RECV_TIMEOUT, tp_vars->other_end); + + spin_lock_bh(&tp_vars->bat_priv->tp_list_lock); + hlist_del_rcu(&tp_vars->list); + spin_unlock_bh(&tp_vars->bat_priv->tp_list_lock); + + /* drop list reference */ + batadv_tp_vars_put(tp_vars); + + atomic_dec(&bat_priv->tp_num); + + spin_lock_bh(&tp_vars->unacked_lock); + list_for_each_entry_safe(un, safe, &tp_vars->unacked_list, list) { + list_del(&un->list); + kfree(un); + } + spin_unlock_bh(&tp_vars->unacked_lock); + + /* drop reference of timer */ + batadv_tp_vars_put(tp_vars); +} + +/** + * batadv_tp_send_ack - send an ACK packet + * @bat_priv: the bat priv with all the soft interface information + * @dst: the mac address of the destination originator + * @seq: the sequence number to ACK + * @timestamp: the timestamp to echo back in the ACK + * @session: session identifier + * @socket_index: local ICMP socket identifier + * + * Return: 0 on success, a positive integer representing the reason of the + * failure otherwise + */ +static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst, + u32 seq, __be32 timestamp, const u8 *session, + int socket_index) +{ + struct batadv_hard_iface *primary_if = NULL; + struct batadv_orig_node *orig_node; + struct batadv_icmp_tp_packet *icmp; + struct sk_buff *skb; + int r, ret; + + orig_node = batadv_orig_hash_find(bat_priv, dst); + if (unlikely(!orig_node)) { + ret = BATADV_TP_REASON_DST_UNREACHABLE; + goto out; + } + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (unlikely(!primary_if)) { + ret = BATADV_TP_REASON_DST_UNREACHABLE; + goto out; + } + + skb = netdev_alloc_skb_ip_align(NULL, sizeof(*icmp) + ETH_HLEN); + if (unlikely(!skb)) { + ret = BATADV_TP_REASON_MEMORY_ERROR; + goto out; + } + + skb_reserve(skb, ETH_HLEN); + icmp = (struct batadv_icmp_tp_packet *)skb_put(skb, sizeof(*icmp)); + icmp->packet_type = BATADV_ICMP; + icmp->version = BATADV_COMPAT_VERSION; + icmp->ttl = BATADV_TTL; + icmp->msg_type = BATADV_TP; + ether_addr_copy(icmp->dst, orig_node->orig); + ether_addr_copy(icmp->orig, primary_if->net_dev->dev_addr); + icmp->uid = socket_index; + + icmp->subtype = BATADV_TP_ACK; + memcpy(icmp->session, session, sizeof(icmp->session)); + icmp->seqno = htonl(seq); + icmp->timestamp = timestamp; + + /* send the ack */ + r = batadv_send_skb_to_orig(skb, orig_node, NULL); + if (r == -1) + kfree_skb(skb); + + if (unlikely(r < 0) || (r == NET_XMIT_DROP)) { + ret = BATADV_TP_REASON_DST_UNREACHABLE; + goto out; + } + ret = 0; + +out: + if (likely(orig_node)) + batadv_orig_node_put(orig_node); + if (likely(primary_if)) + batadv_hardif_put(primary_if); + + return ret; +} + +/** + * batadv_tp_handle_out_of_order - store an out of order packet + * @tp_vars: the private data of the current TP meter session + * @skb: the buffer containing the received packet + * + * Store the out of order packet in the unacked list for late processing. This + * packets are kept in this list so that they can be ACKed at once as soon as + * all the previous packets have been received + * + * Return: true if the packed has been successfully processed, false otherwise + */ +static bool batadv_tp_handle_out_of_order(struct batadv_tp_vars *tp_vars, + const struct sk_buff *skb) +{ + const struct batadv_icmp_tp_packet *icmp; + struct batadv_tp_unacked *un, *new; + u32 payload_len; + bool added = false; + + new = kmalloc(sizeof(*new), GFP_ATOMIC); + if (unlikely(!new)) + return false; + + icmp = (struct batadv_icmp_tp_packet *)skb->data; + + new->seqno = ntohl(icmp->seqno); + payload_len = skb->len - sizeof(struct batadv_unicast_packet); + new->len = payload_len; + + spin_lock_bh(&tp_vars->unacked_lock); + /* if the list is empty immediately attach this new object */ + if (list_empty(&tp_vars->unacked_list)) { + list_add(&new->list, &tp_vars->unacked_list); + goto out; + } + + /* otherwise loop over the list and either drop the packet because this + * is a duplicate or store it at the right position. + * + * The iteration is done in the reverse way because it is likely that + * the last received packet (the one being processed now) has a bigger + * seqno than all the others already stored. + */ + list_for_each_entry_reverse(un, &tp_vars->unacked_list, list) { + /* check for duplicates */ + if (new->seqno == un->seqno) { + if (new->len > un->len) + un->len = new->len; + kfree(new); + added = true; + break; + } + + /* look for the right position */ + if (batadv_seq_before(new->seqno, un->seqno)) + continue; + + /* as soon as an entry having a bigger seqno is found, the new + * one is attached _after_ it. In this way the list is kept in + * ascending order + */ + list_add_tail(&new->list, &un->list); + added = true; + break; + } + + /* received packet with smallest seqno out of order; add it to front */ + if (!added) + list_add(&new->list, &tp_vars->unacked_list); + +out: + spin_unlock_bh(&tp_vars->unacked_lock); + + return true; +} + +/** + * batadv_tp_ack_unordered - update number received bytes in current stream + * without gaps + * @tp_vars: the private data of the current TP meter session + */ +static void batadv_tp_ack_unordered(struct batadv_tp_vars *tp_vars) +{ + struct batadv_tp_unacked *un, *safe; + u32 to_ack; + + /* go through the unacked packet list and possibly ACK them as + * well + */ + spin_lock_bh(&tp_vars->unacked_lock); + list_for_each_entry_safe(un, safe, &tp_vars->unacked_list, list) { + /* the list is ordered, therefore it is possible to stop as soon + * there is a gap between the last acked seqno and the seqno of + * the packet under inspection + */ + if (batadv_seq_before(tp_vars->last_recv, un->seqno)) + break; + + to_ack = un->seqno + un->len - tp_vars->last_recv; + + if (batadv_seq_before(tp_vars->last_recv, un->seqno + un->len)) + tp_vars->last_recv += to_ack; + + list_del(&un->list); + kfree(un); + } + spin_unlock_bh(&tp_vars->unacked_lock); +} + +/** + * batadv_tp_init_recv - return matching or create new receiver tp_vars + * @bat_priv: the bat priv with all the soft interface information + * @icmp: received icmp tp msg + * + * Return: corresponding tp_vars or NULL on errors + */ +static struct batadv_tp_vars * +batadv_tp_init_recv(struct batadv_priv *bat_priv, + const struct batadv_icmp_tp_packet *icmp) +{ + struct batadv_tp_vars *tp_vars; + + spin_lock_bh(&bat_priv->tp_list_lock); + tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig, + icmp->session); + if (tp_vars) + goto out_unlock; + + if (!atomic_add_unless(&bat_priv->tp_num, 1, BATADV_TP_MAX_NUM)) { + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Meter: too many ongoing sessions, aborting (RECV)\n"); + goto out_unlock; + } + + tp_vars = kmalloc(sizeof(*tp_vars), GFP_ATOMIC); + if (!tp_vars) + goto out_unlock; + + ether_addr_copy(tp_vars->other_end, icmp->orig); + tp_vars->role = BATADV_TP_RECEIVER; + memcpy(tp_vars->session, icmp->session, sizeof(tp_vars->session)); + tp_vars->last_recv = BATADV_TP_FIRST_SEQ; + tp_vars->bat_priv = bat_priv; + kref_init(&tp_vars->refcount); + + spin_lock_init(&tp_vars->unacked_lock); + INIT_LIST_HEAD(&tp_vars->unacked_list); + + kref_get(&tp_vars->refcount); + hlist_add_head_rcu(&tp_vars->list, &bat_priv->tp_list); + + kref_get(&tp_vars->refcount); + setup_timer(&tp_vars->timer, batadv_tp_receiver_shutdown, + (unsigned long)tp_vars); + + batadv_tp_reset_receiver_timer(tp_vars); + +out_unlock: + spin_unlock_bh(&bat_priv->tp_list_lock); + + return tp_vars; +} + +/** + * batadv_tp_recv_msg - process a single data message + * @bat_priv: the bat priv with all the soft interface information + * @skb: the buffer containing the received packet + * + * Process a received TP MSG packet + */ +static void batadv_tp_recv_msg(struct batadv_priv *bat_priv, + const struct sk_buff *skb) +{ + const struct batadv_icmp_tp_packet *icmp; + struct batadv_tp_vars *tp_vars; + size_t packet_size; + u32 seqno; + + icmp = (struct batadv_icmp_tp_packet *)skb->data; + + seqno = ntohl(icmp->seqno); + /* check if this is the first seqno. This means that if the + * first packet is lost, the tp meter does not work anymore! + */ + if (seqno == BATADV_TP_FIRST_SEQ) { + tp_vars = batadv_tp_init_recv(bat_priv, icmp); + if (!tp_vars) { + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Meter: seqno != BATADV_TP_FIRST_SEQ cannot initiate connection\n"); + goto out; + } + } else { + tp_vars = batadv_tp_list_find_session(bat_priv, icmp->orig, + icmp->session); + if (!tp_vars) { + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Unexpected packet from %pM!\n", + icmp->orig); + goto out; + } + } + + if (unlikely(tp_vars->role != BATADV_TP_RECEIVER)) { + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Meter: dropping packet: not expected (role=%u)\n", + tp_vars->role); + goto out; + } + + tp_vars->last_recv_time = jiffies; + + /* if the packet is a duplicate, it may be the case that an ACK has been + * lost. Resend the ACK + */ + if (batadv_seq_before(seqno, tp_vars->last_recv)) + goto send_ack; + + /* if the packet is out of order enqueue it */ + if (ntohl(icmp->seqno) != tp_vars->last_recv) { + /* exit immediately (and do not send any ACK) if the packet has + * not been enqueued correctly + */ + if (!batadv_tp_handle_out_of_order(tp_vars, skb)) + goto out; + + /* send a duplicate ACK */ + goto send_ack; + } + + /* if everything was fine count the ACKed bytes */ + packet_size = skb->len - sizeof(struct batadv_unicast_packet); + tp_vars->last_recv += packet_size; + + /* check if this ordered message filled a gap.... */ + batadv_tp_ack_unordered(tp_vars); + +send_ack: + /* send the ACK. If the received packet was out of order, the ACK that + * is going to be sent is a duplicate (the sender will count them and + * possibly enter Fast Retransmit as soon as it has reached 3) + */ + batadv_tp_send_ack(bat_priv, icmp->orig, tp_vars->last_recv, + icmp->timestamp, icmp->session, icmp->uid); +out: + if (likely(tp_vars)) + batadv_tp_vars_put(tp_vars); +} + +/** + * batadv_tp_meter_recv - main TP Meter receiving function + * @bat_priv: the bat priv with all the soft interface information + * @skb: the buffer containing the received packet + */ +void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb) +{ + struct batadv_icmp_tp_packet *icmp; + + icmp = (struct batadv_icmp_tp_packet *)skb->data; + + switch (icmp->subtype) { + case BATADV_TP_MSG: + batadv_tp_recv_msg(bat_priv, skb); + break; + case BATADV_TP_ACK: + batadv_tp_recv_ack(bat_priv, skb); + break; + default: + batadv_dbg(BATADV_DBG_TP_METER, bat_priv, + "Received unknown TP Metric packet type %u\n", + icmp->subtype); + } + consume_skb(skb); +} + +/** + * batadv_tp_meter_init - initialize global tp_meter structures + */ +void batadv_tp_meter_init(void) +{ + get_random_bytes(batadv_tp_prerandom, sizeof(batadv_tp_prerandom)); +} diff --git a/net/batman-adv/tp_meter.h b/net/batman-adv/tp_meter.h new file mode 100644 index 000000000000..ba922c425e56 --- /dev/null +++ b/net/batman-adv/tp_meter.h @@ -0,0 +1,34 @@ +/* Copyright (C) 2012-2016 B.A.T.M.A.N. contributors: + * + * Edo Monticelli, Antonio Quartulli + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _NET_BATMAN_ADV_TP_METER_H_ +#define _NET_BATMAN_ADV_TP_METER_H_ + +#include "main.h" + +#include <linux/types.h> + +struct sk_buff; + +void batadv_tp_meter_init(void); +void batadv_tp_start(struct batadv_priv *bat_priv, const u8 *dst, + u32 test_length, u32 *cookie); +void batadv_tp_stop(struct batadv_priv *bat_priv, const u8 *dst, + u8 return_value); +void batadv_tp_meter_recv(struct batadv_priv *bat_priv, struct sk_buff *skb); + +#endif /* _NET_BATMAN_ADV_TP_METER_H_ */ diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 57ec87f37050..7e6df7a4964a 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -47,10 +47,12 @@ #include "bridge_loop_avoidance.h" #include "hard-interface.h" #include "hash.h" +#include "log.h" #include "multicast.h" #include "originator.h" #include "packet.h" #include "soft-interface.h" +#include "tvlv.h" /* hash class keys */ static struct lock_class_key batadv_tt_local_hash_lock_class_key; @@ -996,7 +998,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) struct batadv_tt_local_entry *tt_local; struct batadv_hard_iface *primary_if; struct hlist_head *head; - unsigned short vid; u32 i; int last_seen_secs; int last_seen_msecs; @@ -1023,7 +1024,6 @@ int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) tt_local = container_of(tt_common_entry, struct batadv_tt_local_entry, common); - vid = tt_common_entry->vid; last_seen_jiffies = jiffies - tt_local->last_seen; last_seen_msecs = jiffies_to_msecs(last_seen_jiffies); last_seen_secs = last_seen_msecs / 1000; @@ -1547,7 +1547,7 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv, struct batadv_tt_global_entry *tt_global_entry) { struct batadv_neigh_node *router, *best_router = NULL; - struct batadv_algo_ops *bao = bat_priv->bat_algo_ops; + struct batadv_algo_ops *bao = bat_priv->algo_ops; struct hlist_head *head; struct batadv_tt_orig_list_entry *orig_entry, *best_entry = NULL; @@ -1559,8 +1559,8 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv, continue; if (best_router && - bao->bat_neigh_cmp(router, BATADV_IF_DEFAULT, - best_router, BATADV_IF_DEFAULT) <= 0) { + bao->neigh.cmp(router, BATADV_IF_DEFAULT, best_router, + BATADV_IF_DEFAULT) <= 0) { batadv_neigh_node_put(router); continue; } diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c new file mode 100644 index 000000000000..3d1cf0fb112d --- /dev/null +++ b/net/batman-adv/tvlv.c @@ -0,0 +1,632 @@ +/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "main.h" + +#include <linux/byteorder/generic.h> +#include <linux/etherdevice.h> +#include <linux/fs.h> +#include <linux/if_ether.h> +#include <linux/kernel.h> +#include <linux/kref.h> +#include <linux/list.h> +#include <linux/lockdep.h> +#include <linux/netdevice.h> +#include <linux/pkt_sched.h> +#include <linux/rculist.h> +#include <linux/rcupdate.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/stddef.h> +#include <linux/string.h> +#include <linux/types.h> + +#include "originator.h" +#include "packet.h" +#include "send.h" +#include "tvlv.h" + +/** + * batadv_tvlv_handler_release - release tvlv handler from lists and queue for + * free after rcu grace period + * @ref: kref pointer of the tvlv + */ +static void batadv_tvlv_handler_release(struct kref *ref) +{ + struct batadv_tvlv_handler *tvlv_handler; + + tvlv_handler = container_of(ref, struct batadv_tvlv_handler, refcount); + kfree_rcu(tvlv_handler, rcu); +} + +/** + * batadv_tvlv_handler_put - decrement the tvlv container refcounter and + * possibly release it + * @tvlv_handler: the tvlv handler to free + */ +static void batadv_tvlv_handler_put(struct batadv_tvlv_handler *tvlv_handler) +{ + kref_put(&tvlv_handler->refcount, batadv_tvlv_handler_release); +} + +/** + * batadv_tvlv_handler_get - retrieve tvlv handler from the tvlv handler list + * based on the provided type and version (both need to match) + * @bat_priv: the bat priv with all the soft interface information + * @type: tvlv handler type to look for + * @version: tvlv handler version to look for + * + * Return: tvlv handler if found or NULL otherwise. + */ +static struct batadv_tvlv_handler * +batadv_tvlv_handler_get(struct batadv_priv *bat_priv, u8 type, u8 version) +{ + struct batadv_tvlv_handler *tvlv_handler_tmp, *tvlv_handler = NULL; + + rcu_read_lock(); + hlist_for_each_entry_rcu(tvlv_handler_tmp, + &bat_priv->tvlv.handler_list, list) { + if (tvlv_handler_tmp->type != type) + continue; + + if (tvlv_handler_tmp->version != version) + continue; + + if (!kref_get_unless_zero(&tvlv_handler_tmp->refcount)) + continue; + + tvlv_handler = tvlv_handler_tmp; + break; + } + rcu_read_unlock(); + + return tvlv_handler; +} + +/** + * batadv_tvlv_container_release - release tvlv from lists and free + * @ref: kref pointer of the tvlv + */ +static void batadv_tvlv_container_release(struct kref *ref) +{ + struct batadv_tvlv_container *tvlv; + + tvlv = container_of(ref, struct batadv_tvlv_container, refcount); + kfree(tvlv); +} + +/** + * batadv_tvlv_container_put - decrement the tvlv container refcounter and + * possibly release it + * @tvlv: the tvlv container to free + */ +static void batadv_tvlv_container_put(struct batadv_tvlv_container *tvlv) +{ + kref_put(&tvlv->refcount, batadv_tvlv_container_release); +} + +/** + * batadv_tvlv_container_get - retrieve tvlv container from the tvlv container + * list based on the provided type and version (both need to match) + * @bat_priv: the bat priv with all the soft interface information + * @type: tvlv container type to look for + * @version: tvlv container version to look for + * + * Has to be called with the appropriate locks being acquired + * (tvlv.container_list_lock). + * + * Return: tvlv container if found or NULL otherwise. + */ +static struct batadv_tvlv_container * +batadv_tvlv_container_get(struct batadv_priv *bat_priv, u8 type, u8 version) +{ + struct batadv_tvlv_container *tvlv_tmp, *tvlv = NULL; + + lockdep_assert_held(&bat_priv->tvlv.container_list_lock); + + hlist_for_each_entry(tvlv_tmp, &bat_priv->tvlv.container_list, list) { + if (tvlv_tmp->tvlv_hdr.type != type) + continue; + + if (tvlv_tmp->tvlv_hdr.version != version) + continue; + + kref_get(&tvlv_tmp->refcount); + tvlv = tvlv_tmp; + break; + } + + return tvlv; +} + +/** + * batadv_tvlv_container_list_size - calculate the size of the tvlv container + * list entries + * @bat_priv: the bat priv with all the soft interface information + * + * Has to be called with the appropriate locks being acquired + * (tvlv.container_list_lock). + * + * Return: size of all currently registered tvlv containers in bytes. + */ +static u16 batadv_tvlv_container_list_size(struct batadv_priv *bat_priv) +{ + struct batadv_tvlv_container *tvlv; + u16 tvlv_len = 0; + + lockdep_assert_held(&bat_priv->tvlv.container_list_lock); + + hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) { + tvlv_len += sizeof(struct batadv_tvlv_hdr); + tvlv_len += ntohs(tvlv->tvlv_hdr.len); + } + + return tvlv_len; +} + +/** + * batadv_tvlv_container_remove - remove tvlv container from the tvlv container + * list + * @bat_priv: the bat priv with all the soft interface information + * @tvlv: the to be removed tvlv container + * + * Has to be called with the appropriate locks being acquired + * (tvlv.container_list_lock). + */ +static void batadv_tvlv_container_remove(struct batadv_priv *bat_priv, + struct batadv_tvlv_container *tvlv) +{ + lockdep_assert_held(&bat_priv->tvlv.container_list_lock); + + if (!tvlv) + return; + + hlist_del(&tvlv->list); + + /* first call to decrement the counter, second call to free */ + batadv_tvlv_container_put(tvlv); + batadv_tvlv_container_put(tvlv); +} + +/** + * batadv_tvlv_container_unregister - unregister tvlv container based on the + * provided type and version (both need to match) + * @bat_priv: the bat priv with all the soft interface information + * @type: tvlv container type to unregister + * @version: tvlv container type to unregister + */ +void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv, + u8 type, u8 version) +{ + struct batadv_tvlv_container *tvlv; + + spin_lock_bh(&bat_priv->tvlv.container_list_lock); + tvlv = batadv_tvlv_container_get(bat_priv, type, version); + batadv_tvlv_container_remove(bat_priv, tvlv); + spin_unlock_bh(&bat_priv->tvlv.container_list_lock); +} + +/** + * batadv_tvlv_container_register - register tvlv type, version and content + * to be propagated with each (primary interface) OGM + * @bat_priv: the bat priv with all the soft interface information + * @type: tvlv container type + * @version: tvlv container version + * @tvlv_value: tvlv container content + * @tvlv_value_len: tvlv container content length + * + * If a container of the same type and version was already registered the new + * content is going to replace the old one. + */ +void batadv_tvlv_container_register(struct batadv_priv *bat_priv, + u8 type, u8 version, + void *tvlv_value, u16 tvlv_value_len) +{ + struct batadv_tvlv_container *tvlv_old, *tvlv_new; + + if (!tvlv_value) + tvlv_value_len = 0; + + tvlv_new = kzalloc(sizeof(*tvlv_new) + tvlv_value_len, GFP_ATOMIC); + if (!tvlv_new) + return; + + tvlv_new->tvlv_hdr.version = version; + tvlv_new->tvlv_hdr.type = type; + tvlv_new->tvlv_hdr.len = htons(tvlv_value_len); + + memcpy(tvlv_new + 1, tvlv_value, ntohs(tvlv_new->tvlv_hdr.len)); + INIT_HLIST_NODE(&tvlv_new->list); + kref_init(&tvlv_new->refcount); + + spin_lock_bh(&bat_priv->tvlv.container_list_lock); + tvlv_old = batadv_tvlv_container_get(bat_priv, type, version); + batadv_tvlv_container_remove(bat_priv, tvlv_old); + hlist_add_head(&tvlv_new->list, &bat_priv->tvlv.container_list); + spin_unlock_bh(&bat_priv->tvlv.container_list_lock); +} + +/** + * batadv_tvlv_realloc_packet_buff - reallocate packet buffer to accommodate + * requested packet size + * @packet_buff: packet buffer + * @packet_buff_len: packet buffer size + * @min_packet_len: requested packet minimum size + * @additional_packet_len: requested additional packet size on top of minimum + * size + * + * Return: true of the packet buffer could be changed to the requested size, + * false otherwise. + */ +static bool batadv_tvlv_realloc_packet_buff(unsigned char **packet_buff, + int *packet_buff_len, + int min_packet_len, + int additional_packet_len) +{ + unsigned char *new_buff; + + new_buff = kmalloc(min_packet_len + additional_packet_len, GFP_ATOMIC); + + /* keep old buffer if kmalloc should fail */ + if (!new_buff) + return false; + + memcpy(new_buff, *packet_buff, min_packet_len); + kfree(*packet_buff); + *packet_buff = new_buff; + *packet_buff_len = min_packet_len + additional_packet_len; + + return true; +} + +/** + * batadv_tvlv_container_ogm_append - append tvlv container content to given + * OGM packet buffer + * @bat_priv: the bat priv with all the soft interface information + * @packet_buff: ogm packet buffer + * @packet_buff_len: ogm packet buffer size including ogm header and tvlv + * content + * @packet_min_len: ogm header size to be preserved for the OGM itself + * + * The ogm packet might be enlarged or shrunk depending on the current size + * and the size of the to-be-appended tvlv containers. + * + * Return: size of all appended tvlv containers in bytes. + */ +u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, + unsigned char **packet_buff, + int *packet_buff_len, int packet_min_len) +{ + struct batadv_tvlv_container *tvlv; + struct batadv_tvlv_hdr *tvlv_hdr; + u16 tvlv_value_len; + void *tvlv_value; + bool ret; + + spin_lock_bh(&bat_priv->tvlv.container_list_lock); + tvlv_value_len = batadv_tvlv_container_list_size(bat_priv); + + ret = batadv_tvlv_realloc_packet_buff(packet_buff, packet_buff_len, + packet_min_len, tvlv_value_len); + + if (!ret) + goto end; + + if (!tvlv_value_len) + goto end; + + tvlv_value = (*packet_buff) + packet_min_len; + + hlist_for_each_entry(tvlv, &bat_priv->tvlv.container_list, list) { + tvlv_hdr = tvlv_value; + tvlv_hdr->type = tvlv->tvlv_hdr.type; + tvlv_hdr->version = tvlv->tvlv_hdr.version; + tvlv_hdr->len = tvlv->tvlv_hdr.len; + tvlv_value = tvlv_hdr + 1; + memcpy(tvlv_value, tvlv + 1, ntohs(tvlv->tvlv_hdr.len)); + tvlv_value = (u8 *)tvlv_value + ntohs(tvlv->tvlv_hdr.len); + } + +end: + spin_unlock_bh(&bat_priv->tvlv.container_list_lock); + return tvlv_value_len; +} + +/** + * batadv_tvlv_call_handler - parse the given tvlv buffer to call the + * appropriate handlers + * @bat_priv: the bat priv with all the soft interface information + * @tvlv_handler: tvlv callback function handling the tvlv content + * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet + * @orig_node: orig node emitting the ogm packet + * @src: source mac address of the unicast packet + * @dst: destination mac address of the unicast packet + * @tvlv_value: tvlv content + * @tvlv_value_len: tvlv content length + * + * Return: success if handler was not found or the return value of the handler + * callback. + */ +static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv, + struct batadv_tvlv_handler *tvlv_handler, + bool ogm_source, + struct batadv_orig_node *orig_node, + u8 *src, u8 *dst, + void *tvlv_value, u16 tvlv_value_len) +{ + if (!tvlv_handler) + return NET_RX_SUCCESS; + + if (ogm_source) { + if (!tvlv_handler->ogm_handler) + return NET_RX_SUCCESS; + + if (!orig_node) + return NET_RX_SUCCESS; + + tvlv_handler->ogm_handler(bat_priv, orig_node, + BATADV_NO_FLAGS, + tvlv_value, tvlv_value_len); + tvlv_handler->flags |= BATADV_TVLV_HANDLER_OGM_CALLED; + } else { + if (!src) + return NET_RX_SUCCESS; + + if (!dst) + return NET_RX_SUCCESS; + + if (!tvlv_handler->unicast_handler) + return NET_RX_SUCCESS; + + return tvlv_handler->unicast_handler(bat_priv, src, + dst, tvlv_value, + tvlv_value_len); + } + + return NET_RX_SUCCESS; +} + +/** + * batadv_tvlv_containers_process - parse the given tvlv buffer to call the + * appropriate handlers + * @bat_priv: the bat priv with all the soft interface information + * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet + * @orig_node: orig node emitting the ogm packet + * @src: source mac address of the unicast packet + * @dst: destination mac address of the unicast packet + * @tvlv_value: tvlv content + * @tvlv_value_len: tvlv content length + * + * Return: success when processing an OGM or the return value of all called + * handler callbacks. + */ +int batadv_tvlv_containers_process(struct batadv_priv *bat_priv, + bool ogm_source, + struct batadv_orig_node *orig_node, + u8 *src, u8 *dst, + void *tvlv_value, u16 tvlv_value_len) +{ + struct batadv_tvlv_handler *tvlv_handler; + struct batadv_tvlv_hdr *tvlv_hdr; + u16 tvlv_value_cont_len; + u8 cifnotfound = BATADV_TVLV_HANDLER_OGM_CIFNOTFND; + int ret = NET_RX_SUCCESS; + + while (tvlv_value_len >= sizeof(*tvlv_hdr)) { + tvlv_hdr = tvlv_value; + tvlv_value_cont_len = ntohs(tvlv_hdr->len); + tvlv_value = tvlv_hdr + 1; + tvlv_value_len -= sizeof(*tvlv_hdr); + + if (tvlv_value_cont_len > tvlv_value_len) + break; + + tvlv_handler = batadv_tvlv_handler_get(bat_priv, + tvlv_hdr->type, + tvlv_hdr->version); + + ret |= batadv_tvlv_call_handler(bat_priv, tvlv_handler, + ogm_source, orig_node, + src, dst, tvlv_value, + tvlv_value_cont_len); + if (tvlv_handler) + batadv_tvlv_handler_put(tvlv_handler); + tvlv_value = (u8 *)tvlv_value + tvlv_value_cont_len; + tvlv_value_len -= tvlv_value_cont_len; + } + + if (!ogm_source) + return ret; + + rcu_read_lock(); + hlist_for_each_entry_rcu(tvlv_handler, + &bat_priv->tvlv.handler_list, list) { + if ((tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) && + !(tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CALLED)) + tvlv_handler->ogm_handler(bat_priv, orig_node, + cifnotfound, NULL, 0); + + tvlv_handler->flags &= ~BATADV_TVLV_HANDLER_OGM_CALLED; + } + rcu_read_unlock(); + + return NET_RX_SUCCESS; +} + +/** + * batadv_tvlv_ogm_receive - process an incoming ogm and call the appropriate + * handlers + * @bat_priv: the bat priv with all the soft interface information + * @batadv_ogm_packet: ogm packet containing the tvlv containers + * @orig_node: orig node emitting the ogm packet + */ +void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv, + struct batadv_ogm_packet *batadv_ogm_packet, + struct batadv_orig_node *orig_node) +{ + void *tvlv_value; + u16 tvlv_value_len; + + if (!batadv_ogm_packet) + return; + + tvlv_value_len = ntohs(batadv_ogm_packet->tvlv_len); + if (!tvlv_value_len) + return; + + tvlv_value = batadv_ogm_packet + 1; + + batadv_tvlv_containers_process(bat_priv, true, orig_node, NULL, NULL, + tvlv_value, tvlv_value_len); +} + +/** + * batadv_tvlv_handler_register - register tvlv handler based on the provided + * type and version (both need to match) for ogm tvlv payload and/or unicast + * payload + * @bat_priv: the bat priv with all the soft interface information + * @optr: ogm tvlv handler callback function. This function receives the orig + * node, flags and the tvlv content as argument to process. + * @uptr: unicast tvlv handler callback function. This function receives the + * source & destination of the unicast packet as well as the tvlv content + * to process. + * @type: tvlv handler type to be registered + * @version: tvlv handler version to be registered + * @flags: flags to enable or disable TVLV API behavior + */ +void batadv_tvlv_handler_register(struct batadv_priv *bat_priv, + void (*optr)(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig, + u8 flags, + void *tvlv_value, + u16 tvlv_value_len), + int (*uptr)(struct batadv_priv *bat_priv, + u8 *src, u8 *dst, + void *tvlv_value, + u16 tvlv_value_len), + u8 type, u8 version, u8 flags) +{ + struct batadv_tvlv_handler *tvlv_handler; + + tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version); + if (tvlv_handler) { + batadv_tvlv_handler_put(tvlv_handler); + return; + } + + tvlv_handler = kzalloc(sizeof(*tvlv_handler), GFP_ATOMIC); + if (!tvlv_handler) + return; + + tvlv_handler->ogm_handler = optr; + tvlv_handler->unicast_handler = uptr; + tvlv_handler->type = type; + tvlv_handler->version = version; + tvlv_handler->flags = flags; + kref_init(&tvlv_handler->refcount); + INIT_HLIST_NODE(&tvlv_handler->list); + + spin_lock_bh(&bat_priv->tvlv.handler_list_lock); + hlist_add_head_rcu(&tvlv_handler->list, &bat_priv->tvlv.handler_list); + spin_unlock_bh(&bat_priv->tvlv.handler_list_lock); +} + +/** + * batadv_tvlv_handler_unregister - unregister tvlv handler based on the + * provided type and version (both need to match) + * @bat_priv: the bat priv with all the soft interface information + * @type: tvlv handler type to be unregistered + * @version: tvlv handler version to be unregistered + */ +void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv, + u8 type, u8 version) +{ + struct batadv_tvlv_handler *tvlv_handler; + + tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version); + if (!tvlv_handler) + return; + + batadv_tvlv_handler_put(tvlv_handler); + spin_lock_bh(&bat_priv->tvlv.handler_list_lock); + hlist_del_rcu(&tvlv_handler->list); + spin_unlock_bh(&bat_priv->tvlv.handler_list_lock); + batadv_tvlv_handler_put(tvlv_handler); +} + +/** + * batadv_tvlv_unicast_send - send a unicast packet with tvlv payload to the + * specified host + * @bat_priv: the bat priv with all the soft interface information + * @src: source mac address of the unicast packet + * @dst: destination mac address of the unicast packet + * @type: tvlv type + * @version: tvlv version + * @tvlv_value: tvlv content + * @tvlv_value_len: tvlv content length + */ +void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src, + u8 *dst, u8 type, u8 version, + void *tvlv_value, u16 tvlv_value_len) +{ + struct batadv_unicast_tvlv_packet *unicast_tvlv_packet; + struct batadv_tvlv_hdr *tvlv_hdr; + struct batadv_orig_node *orig_node; + struct sk_buff *skb; + unsigned char *tvlv_buff; + unsigned int tvlv_len; + ssize_t hdr_len = sizeof(*unicast_tvlv_packet); + int res; + + orig_node = batadv_orig_hash_find(bat_priv, dst); + if (!orig_node) + return; + + tvlv_len = sizeof(*tvlv_hdr) + tvlv_value_len; + + skb = netdev_alloc_skb_ip_align(NULL, ETH_HLEN + hdr_len + tvlv_len); + if (!skb) + goto out; + + skb->priority = TC_PRIO_CONTROL; + skb_reserve(skb, ETH_HLEN); + tvlv_buff = skb_put(skb, sizeof(*unicast_tvlv_packet) + tvlv_len); + unicast_tvlv_packet = (struct batadv_unicast_tvlv_packet *)tvlv_buff; + unicast_tvlv_packet->packet_type = BATADV_UNICAST_TVLV; + unicast_tvlv_packet->version = BATADV_COMPAT_VERSION; + unicast_tvlv_packet->ttl = BATADV_TTL; + unicast_tvlv_packet->reserved = 0; + unicast_tvlv_packet->tvlv_len = htons(tvlv_len); + unicast_tvlv_packet->align = 0; + ether_addr_copy(unicast_tvlv_packet->src, src); + ether_addr_copy(unicast_tvlv_packet->dst, dst); + + tvlv_buff = (unsigned char *)(unicast_tvlv_packet + 1); + tvlv_hdr = (struct batadv_tvlv_hdr *)tvlv_buff; + tvlv_hdr->version = version; + tvlv_hdr->type = type; + tvlv_hdr->len = htons(tvlv_value_len); + tvlv_buff += sizeof(*tvlv_hdr); + memcpy(tvlv_buff, tvlv_value, tvlv_value_len); + + res = batadv_send_skb_to_orig(skb, orig_node, NULL); + if (res == -1) + kfree_skb(skb); +out: + batadv_orig_node_put(orig_node); +} diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h new file mode 100644 index 000000000000..e4369b547b43 --- /dev/null +++ b/net/batman-adv/tvlv.h @@ -0,0 +1,61 @@ +/* Copyright (C) 2007-2016 B.A.T.M.A.N. contributors: + * + * Marek Lindner, Simon Wunderlich + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _NET_BATMAN_ADV_TVLV_H_ +#define _NET_BATMAN_ADV_TVLV_H_ + +#include "main.h" + +#include <linux/types.h> + +struct batadv_ogm_packet; + +void batadv_tvlv_container_register(struct batadv_priv *bat_priv, + u8 type, u8 version, + void *tvlv_value, u16 tvlv_value_len); +u16 batadv_tvlv_container_ogm_append(struct batadv_priv *bat_priv, + unsigned char **packet_buff, + int *packet_buff_len, int packet_min_len); +void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv, + struct batadv_ogm_packet *batadv_ogm_packet, + struct batadv_orig_node *orig_node); +void batadv_tvlv_container_unregister(struct batadv_priv *bat_priv, + u8 type, u8 version); + +void batadv_tvlv_handler_register(struct batadv_priv *bat_priv, + void (*optr)(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig, + u8 flags, + void *tvlv_value, + u16 tvlv_value_len), + int (*uptr)(struct batadv_priv *bat_priv, + u8 *src, u8 *dst, + void *tvlv_value, + u16 tvlv_value_len), + u8 type, u8 version, u8 flags); +void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv, + u8 type, u8 version); +int batadv_tvlv_containers_process(struct batadv_priv *bat_priv, + bool ogm_source, + struct batadv_orig_node *orig_node, + u8 *src, u8 *dst, + void *tvlv_buff, u16 tvlv_buff_len); +void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src, + u8 *dst, u8 type, u8 version, + void *tvlv_value, u16 tvlv_value_len); + +#endif /* _NET_BATMAN_ADV_TVLV_H_ */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index ba846b078af8..43db7b61f8eb 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -33,6 +33,7 @@ #include <linux/types.h> #include <linux/wait.h> #include <linux/workqueue.h> +#include <uapi/linux/batman_adv.h> #include "packet.h" @@ -707,6 +708,8 @@ struct batadv_priv_debug_log { * @list: list of available gateway nodes * @list_lock: lock protecting gw_list & curr_gw * @curr_gw: pointer to currently selected gateway node + * @mode: gateway operation: off, client or server (see batadv_gw_modes) + * @sel_class: gateway selection class (applies if gw_mode client) * @bandwidth_down: advertised uplink download bandwidth (if gw_mode server) * @bandwidth_up: advertised uplink upload bandwidth (if gw_mode server) * @reselect: bool indicating a gateway re-selection is in progress @@ -715,6 +718,8 @@ struct batadv_priv_gw { struct hlist_head list; spinlock_t list_lock; /* protects gw_list & curr_gw */ struct batadv_gw_node __rcu *curr_gw; /* rcu protected pointer */ + atomic_t mode; + atomic_t sel_class; atomic_t bandwidth_down; atomic_t bandwidth_up; atomic_t reselect; @@ -751,14 +756,28 @@ struct batadv_priv_dat { #ifdef CONFIG_BATMAN_ADV_MCAST /** + * struct batadv_mcast_querier_state - IGMP/MLD querier state when bridged + * @exists: whether a querier exists in the mesh + * @shadowing: if a querier exists, whether it is potentially shadowing + * multicast listeners (i.e. querier is behind our own bridge segment) + */ +struct batadv_mcast_querier_state { + bool exists; + bool shadowing; +}; + +/** * struct batadv_priv_mcast - per mesh interface mcast data * @mla_list: list of multicast addresses we are currently announcing via TT * @want_all_unsnoopables_list: a list of orig_nodes wanting all unsnoopable * multicast traffic * @want_all_ipv4_list: a list of orig_nodes wanting all IPv4 multicast traffic * @want_all_ipv6_list: a list of orig_nodes wanting all IPv6 multicast traffic + * @querier_ipv4: the current state of an IGMP querier in the mesh + * @querier_ipv6: the current state of an MLD querier in the mesh * @flags: the flags we have last sent in our mcast tvlv * @enabled: whether the multicast tvlv is currently enabled + * @bridged: whether the soft interface has a bridge on top * @num_disabled: number of nodes that have no mcast tvlv * @num_want_all_unsnoopables: number of nodes wanting unsnoopable IP traffic * @num_want_all_ipv4: counter for items in want_all_ipv4_list @@ -771,8 +790,11 @@ struct batadv_priv_mcast { struct hlist_head want_all_unsnoopables_list; struct hlist_head want_all_ipv4_list; struct hlist_head want_all_ipv6_list; + struct batadv_mcast_querier_state querier_ipv4; + struct batadv_mcast_querier_state querier_ipv6; u8 flags; bool enabled; + bool bridged; atomic_t num_disabled; atomic_t num_want_all_unsnoopables; atomic_t num_want_all_ipv4; @@ -812,6 +834,111 @@ struct batadv_priv_nc { }; /** + * struct batadv_tp_unacked - unacked packet meta-information + * @seqno: seqno of the unacked packet + * @len: length of the packet + * @list: list node for batadv_tp_vars::unacked_list + * + * This struct is supposed to represent a buffer unacked packet. However, since + * the purpose of the TP meter is to count the traffic only, there is no need to + * store the entire sk_buff, the starting offset and the length are enough + */ +struct batadv_tp_unacked { + u32 seqno; + u16 len; + struct list_head list; +}; + +/** + * enum batadv_tp_meter_role - Modus in tp meter session + * @BATADV_TP_RECEIVER: Initialized as receiver + * @BATADV_TP_SENDER: Initialized as sender + */ +enum batadv_tp_meter_role { + BATADV_TP_RECEIVER, + BATADV_TP_SENDER +}; + +/** + * struct batadv_tp_vars - tp meter private variables per session + * @list: list node for bat_priv::tp_list + * @timer: timer for ack (receiver) and retry (sender) + * @bat_priv: pointer to the mesh object + * @start_time: start time in jiffies + * @other_end: mac address of remote + * @role: receiver/sender modi + * @sending: sending binary semaphore: 1 if sending, 0 is not + * @reason: reason for a stopped session + * @finish_work: work item for the finishing procedure + * @test_length: test length in milliseconds + * @session: TP session identifier + * @icmp_uid: local ICMP "socket" index + * @dec_cwnd: decimal part of the cwnd used during linear growth + * @cwnd: current size of the congestion window + * @cwnd_lock: lock do protect @cwnd & @dec_cwnd + * @ss_threshold: Slow Start threshold. Once cwnd exceeds this value the + * connection switches to the Congestion Avoidance state + * @last_acked: last acked byte + * @last_sent: last sent byte, not yet acked + * @tot_sent: amount of data sent/ACKed so far + * @dup_acks: duplicate ACKs counter + * @fast_recovery: true if in Fast Recovery mode + * @recover: last sent seqno when entering Fast Recovery + * @rto: sender timeout + * @srtt: smoothed RTT scaled by 2^3 + * @rttvar: RTT variation scaled by 2^2 + * @more_bytes: waiting queue anchor when waiting for more ack/retry timeout + * @prerandom_offset: offset inside the prerandom buffer + * @prerandom_lock: spinlock protecting access to prerandom_offset + * @last_recv: last in-order received packet + * @unacked_list: list of unacked packets (meta-info only) + * @unacked_lock: protect unacked_list + * @last_recv_time: time time (jiffies) a msg was received + * @refcount: number of context where the object is used + * @rcu: struct used for freeing in an RCU-safe manner + */ +struct batadv_tp_vars { + struct hlist_node list; + struct timer_list timer; + struct batadv_priv *bat_priv; + unsigned long start_time; + u8 other_end[ETH_ALEN]; + enum batadv_tp_meter_role role; + atomic_t sending; + enum batadv_tp_meter_reason reason; + struct delayed_work finish_work; + u32 test_length; + u8 session[2]; + u8 icmp_uid; + + /* sender variables */ + u16 dec_cwnd; + u32 cwnd; + spinlock_t cwnd_lock; /* Protects cwnd & dec_cwnd */ + u32 ss_threshold; + atomic_t last_acked; + u32 last_sent; + atomic64_t tot_sent; + atomic_t dup_acks; + bool fast_recovery; + u32 recover; + u32 rto; + u32 srtt; + u32 rttvar; + wait_queue_head_t more_bytes; + u32 prerandom_offset; + spinlock_t prerandom_lock; /* Protects prerandom_offset */ + + /* receiver variables */ + u32 last_recv; + struct list_head unacked_list; + spinlock_t unacked_lock; /* Protects unacked_list */ + unsigned long last_recv_time; + struct kref refcount; + struct rcu_head rcu; +}; + +/** * struct batadv_softif_vlan - per VLAN attributes set * @bat_priv: pointer to the mesh object * @vid: VLAN identifier @@ -865,8 +992,6 @@ struct batadv_priv_bat_v { * enabled * @multicast_mode: Enable or disable multicast optimizations on this node's * sender/originating side - * @gw_mode: gateway operation: off, client or server (see batadv_gw_modes) - * @gw_sel_class: gateway selection class (applies if gw_mode client) * @orig_interval: OGM broadcast interval in milliseconds * @hop_penalty: penalty which will be applied to an OGM's tq-field on every hop * @log_level: configured log level (see batadv_dbg_level) @@ -881,14 +1006,17 @@ struct batadv_priv_bat_v { * @debug_dir: dentry for debugfs batman-adv subdirectory * @forw_bat_list: list of aggregated OGMs that will be forwarded * @forw_bcast_list: list of broadcast packets that will be rebroadcasted + * @tp_list: list of tp sessions + * @tp_num: number of currently active tp sessions * @orig_hash: hash table containing mesh participants (orig nodes) * @forw_bat_list_lock: lock protecting forw_bat_list * @forw_bcast_list_lock: lock protecting forw_bcast_list + * @tp_list_lock: spinlock protecting @tp_list * @orig_work: work queue callback item for orig node purging * @cleanup_work: work queue callback item for soft-interface deinit * @primary_if: one of the hard-interfaces assigned to this mesh interface * becomes the primary interface - * @bat_algo_ops: routing algorithm used by this mesh interface + * @algo_ops: routing algorithm used by this mesh interface * @softif_vlan_list: a list of softif_vlan structs, one per VLAN created on top * of the mesh interface represented by this object * @softif_vlan_list_lock: lock protecting softif_vlan_list @@ -922,8 +1050,6 @@ struct batadv_priv { #ifdef CONFIG_BATMAN_ADV_MCAST atomic_t multicast_mode; #endif - atomic_t gw_mode; - atomic_t gw_sel_class; atomic_t orig_interval; atomic_t hop_penalty; #ifdef CONFIG_BATMAN_ADV_DEBUG @@ -939,13 +1065,16 @@ struct batadv_priv { struct dentry *debug_dir; struct hlist_head forw_bat_list; struct hlist_head forw_bcast_list; + struct hlist_head tp_list; struct batadv_hashtable *orig_hash; spinlock_t forw_bat_list_lock; /* protects forw_bat_list */ spinlock_t forw_bcast_list_lock; /* protects forw_bcast_list */ + spinlock_t tp_list_lock; /* protects tp_list */ + atomic_t tp_num; struct delayed_work orig_work; struct work_struct cleanup_work; struct batadv_hard_iface __rcu *primary_if; /* rcu protected pointer */ - struct batadv_algo_ops *bat_algo_ops; + struct batadv_algo_ops *algo_ops; struct hlist_head softif_vlan_list; spinlock_t softif_vlan_list_lock; /* protects softif_vlan_list */ #ifdef CONFIG_BATMAN_ADV_BLA @@ -1261,66 +1390,77 @@ struct batadv_forw_packet { }; /** + * struct batadv_algo_iface_ops - mesh algorithm callbacks (interface specific) + * @activate: start routing mechanisms when hard-interface is brought up + * @enable: init routing info when hard-interface is enabled + * @disable: de-init routing info when hard-interface is disabled + * @update_mac: (re-)init mac addresses of the protocol information + * belonging to this hard-interface + * @primary_set: called when primary interface is selected / changed + */ +struct batadv_algo_iface_ops { + void (*activate)(struct batadv_hard_iface *hard_iface); + int (*enable)(struct batadv_hard_iface *hard_iface); + void (*disable)(struct batadv_hard_iface *hard_iface); + void (*update_mac)(struct batadv_hard_iface *hard_iface); + void (*primary_set)(struct batadv_hard_iface *hard_iface); +}; + +/** + * struct batadv_algo_neigh_ops - mesh algorithm callbacks (neighbour specific) + * @hardif_init: called on creation of single hop entry + * @cmp: compare the metrics of two neighbors for their respective outgoing + * interfaces + * @is_similar_or_better: check if neigh1 is equally similar or better than + * neigh2 for their respective outgoing interface from the metric prospective + * @print: print the single hop neighbor list (optional) + */ +struct batadv_algo_neigh_ops { + void (*hardif_init)(struct batadv_hardif_neigh_node *neigh); + int (*cmp)(struct batadv_neigh_node *neigh1, + struct batadv_hard_iface *if_outgoing1, + struct batadv_neigh_node *neigh2, + struct batadv_hard_iface *if_outgoing2); + bool (*is_similar_or_better)(struct batadv_neigh_node *neigh1, + struct batadv_hard_iface *if_outgoing1, + struct batadv_neigh_node *neigh2, + struct batadv_hard_iface *if_outgoing2); + void (*print)(struct batadv_priv *priv, struct seq_file *seq); +}; + +/** + * struct batadv_algo_orig_ops - mesh algorithm callbacks (originator specific) + * @free: free the resources allocated by the routing algorithm for an orig_node + * object + * @add_if: ask the routing algorithm to apply the needed changes to the + * orig_node due to a new hard-interface being added into the mesh + * @del_if: ask the routing algorithm to apply the needed changes to the + * orig_node due to an hard-interface being removed from the mesh + * @print: print the originator table (optional) + */ +struct batadv_algo_orig_ops { + void (*free)(struct batadv_orig_node *orig_node); + int (*add_if)(struct batadv_orig_node *orig_node, int max_if_num); + int (*del_if)(struct batadv_orig_node *orig_node, int max_if_num, + int del_if_num); + void (*print)(struct batadv_priv *priv, struct seq_file *seq, + struct batadv_hard_iface *hard_iface); +}; + +/** * struct batadv_algo_ops - mesh algorithm callbacks * @list: list node for the batadv_algo_list * @name: name of the algorithm - * @bat_iface_activate: start routing mechanisms when hard-interface is brought - * up - * @bat_iface_enable: init routing info when hard-interface is enabled - * @bat_iface_disable: de-init routing info when hard-interface is disabled - * @bat_iface_update_mac: (re-)init mac addresses of the protocol information - * belonging to this hard-interface - * @bat_primary_iface_set: called when primary interface is selected / changed - * @bat_ogm_schedule: prepare a new outgoing OGM for the send queue - * @bat_ogm_emit: send scheduled OGM - * @bat_hardif_neigh_init: called on creation of single hop entry - * @bat_neigh_cmp: compare the metrics of two neighbors for their respective - * outgoing interfaces - * @bat_neigh_is_similar_or_better: check if neigh1 is equally similar or - * better than neigh2 for their respective outgoing interface from the metric - * prospective - * @bat_neigh_print: print the single hop neighbor list (optional) - * @bat_neigh_free: free the resources allocated by the routing algorithm for a - * neigh_node object - * @bat_orig_print: print the originator table (optional) - * @bat_orig_free: free the resources allocated by the routing algorithm for an - * orig_node object - * @bat_orig_add_if: ask the routing algorithm to apply the needed changes to - * the orig_node due to a new hard-interface being added into the mesh - * @bat_orig_del_if: ask the routing algorithm to apply the needed changes to - * the orig_node due to an hard-interface being removed from the mesh + * @iface: callbacks related to interface handling + * @neigh: callbacks related to neighbors handling + * @orig: callbacks related to originators handling */ struct batadv_algo_ops { struct hlist_node list; char *name; - void (*bat_iface_activate)(struct batadv_hard_iface *hard_iface); - int (*bat_iface_enable)(struct batadv_hard_iface *hard_iface); - void (*bat_iface_disable)(struct batadv_hard_iface *hard_iface); - void (*bat_iface_update_mac)(struct batadv_hard_iface *hard_iface); - void (*bat_primary_iface_set)(struct batadv_hard_iface *hard_iface); - void (*bat_ogm_schedule)(struct batadv_hard_iface *hard_iface); - void (*bat_ogm_emit)(struct batadv_forw_packet *forw_packet); - /* neigh_node handling API */ - void (*bat_hardif_neigh_init)(struct batadv_hardif_neigh_node *neigh); - int (*bat_neigh_cmp)(struct batadv_neigh_node *neigh1, - struct batadv_hard_iface *if_outgoing1, - struct batadv_neigh_node *neigh2, - struct batadv_hard_iface *if_outgoing2); - bool (*bat_neigh_is_similar_or_better) - (struct batadv_neigh_node *neigh1, - struct batadv_hard_iface *if_outgoing1, - struct batadv_neigh_node *neigh2, - struct batadv_hard_iface *if_outgoing2); - void (*bat_neigh_print)(struct batadv_priv *priv, struct seq_file *seq); - void (*bat_neigh_free)(struct batadv_neigh_node *neigh); - /* orig_node handling API */ - void (*bat_orig_print)(struct batadv_priv *priv, struct seq_file *seq, - struct batadv_hard_iface *hard_iface); - void (*bat_orig_free)(struct batadv_orig_node *orig_node); - int (*bat_orig_add_if)(struct batadv_orig_node *orig_node, - int max_if_num); - int (*bat_orig_del_if)(struct batadv_orig_node *orig_node, - int max_if_num, int del_if_num); + struct batadv_algo_iface_ops iface; + struct batadv_algo_neigh_ops neigh; + struct batadv_algo_orig_ops orig; }; /** diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 2c8095a5d824..8eecd0ec22f2 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -104,8 +104,16 @@ static int br_dev_init(struct net_device *dev) return -ENOMEM; err = br_vlan_init(br); - if (err) + if (err) { free_percpu(br->stats); + return err; + } + + err = br_multicast_init_stats(br); + if (err) { + free_percpu(br->stats); + br_vlan_flush(br); + } br_set_lockdep_class(dev); return err; @@ -341,6 +349,8 @@ static const struct net_device_ops br_netdev_ops = { .ndo_add_slave = br_add_slave, .ndo_del_slave = br_del_slave, .ndo_fix_features = br_fix_features, + .ndo_neigh_construct = netdev_default_l2upper_neigh_construct, + .ndo_neigh_destroy = netdev_default_l2upper_neigh_destroy, .ndo_fdb_add = br_fdb_add, .ndo_fdb_del = br_fdb_delete, .ndo_fdb_dump = br_fdb_dump, diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index f47759f05b6d..6c196037d818 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -198,8 +198,10 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb, struct sk_buff *skb), bool unicast) { - struct net_bridge_port *p; + u8 igmp_type = br_multicast_igmp_type(skb); + __be16 proto = skb->protocol; struct net_bridge_port *prev; + struct net_bridge_port *p; prev = NULL; @@ -218,6 +220,9 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb, prev = maybe_deliver(prev, p, skb, __packet_hook); if (IS_ERR(prev)) goto out; + if (prev == p) + br_multicast_count(p->br, p, proto, igmp_type, + BR_MCAST_DIR_TX); } if (!prev) @@ -257,9 +262,12 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst, struct sk_buff *skb)) { struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; + u8 igmp_type = br_multicast_igmp_type(skb); struct net_bridge *br = netdev_priv(dev); struct net_bridge_port *prev = NULL; struct net_bridge_port_group *p; + __be16 proto = skb->protocol; + struct hlist_node *rp; rp = rcu_dereference(hlist_first_rcu(&br->router_list)); @@ -277,6 +285,9 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst, prev = maybe_deliver(prev, port, skb, __packet_hook); if (IS_ERR(prev)) goto out; + if (prev == port) + br_multicast_count(port->br, port, proto, igmp_type, + BR_MCAST_DIR_TX); if ((unsigned long)lport >= (unsigned long)port) p = rcu_dereference(p->next); diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 8217aecf025b..f2fede05d32c 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -345,8 +345,8 @@ static int find_portno(struct net_bridge *br) static struct net_bridge_port *new_nbp(struct net_bridge *br, struct net_device *dev) { - int index; struct net_bridge_port *p; + int index, err; index = find_portno(br); if (index < 0) @@ -366,7 +366,12 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, br_init_port(p); br_set_state(p, BR_STATE_DISABLED); br_stp_port_timer_init(p); - br_multicast_add_port(p); + err = br_multicast_add_port(p); + if (err) { + dev_put(dev); + kfree(p); + p = ERR_PTR(err); + } return p; } diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 43d2cd862bc2..786602bc0567 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -60,6 +60,9 @@ static int br_pass_frame_up(struct sk_buff *skb) skb = br_handle_vlan(br, vg, skb); if (!skb) return NET_RX_DROP; + /* update the multicast stats if the packet is IGMP/MLD */ + br_multicast_count(br, NULL, skb->protocol, br_multicast_igmp_type(skb), + BR_MCAST_DIR_TX); return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, dev_net(indev), NULL, skb, indev, NULL, diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 43844144c9c4..e405eef0ae2e 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -361,7 +361,8 @@ out: } static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, - __be32 group) + __be32 group, + u8 *igmp_type) { struct sk_buff *skb; struct igmphdr *ih; @@ -411,6 +412,7 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, skb_set_transport_header(skb, skb->len); ih = igmp_hdr(skb); + *igmp_type = IGMP_HOST_MEMBERSHIP_QUERY; ih->type = IGMP_HOST_MEMBERSHIP_QUERY; ih->code = (group ? br->multicast_last_member_interval : br->multicast_query_response_interval) / @@ -428,7 +430,8 @@ out: #if IS_ENABLED(CONFIG_IPV6) static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, - const struct in6_addr *group) + const struct in6_addr *grp, + u8 *igmp_type) { struct sk_buff *skb; struct ipv6hdr *ip6h; @@ -487,16 +490,17 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, skb_set_transport_header(skb, skb->len); mldq = (struct mld_msg *) icmp6_hdr(skb); - interval = ipv6_addr_any(group) ? + interval = ipv6_addr_any(grp) ? br->multicast_query_response_interval : br->multicast_last_member_interval; + *igmp_type = ICMPV6_MGM_QUERY; mldq->mld_type = ICMPV6_MGM_QUERY; mldq->mld_code = 0; mldq->mld_cksum = 0; mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval)); mldq->mld_reserved = 0; - mldq->mld_mca = *group; + mldq->mld_mca = *grp; /* checksum */ mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, @@ -513,14 +517,16 @@ out: #endif static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br, - struct br_ip *addr) + struct br_ip *addr, + u8 *igmp_type) { switch (addr->proto) { case htons(ETH_P_IP): - return br_ip4_multicast_alloc_query(br, addr->u.ip4); + return br_ip4_multicast_alloc_query(br, addr->u.ip4, igmp_type); #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): - return br_ip6_multicast_alloc_query(br, &addr->u.ip6); + return br_ip6_multicast_alloc_query(br, &addr->u.ip6, + igmp_type); #endif } return NULL; @@ -829,18 +835,23 @@ static void __br_multicast_send_query(struct net_bridge *br, struct br_ip *ip) { struct sk_buff *skb; + u8 igmp_type; - skb = br_multicast_alloc_query(br, ip); + skb = br_multicast_alloc_query(br, ip, &igmp_type); if (!skb) return; if (port) { skb->dev = port->dev; + br_multicast_count(br, port, skb->protocol, igmp_type, + BR_MCAST_DIR_TX); NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, dev_net(port->dev), NULL, skb, NULL, skb->dev, br_dev_queue_push_xmit); } else { br_multicast_select_own_querier(br, ip, skb); + br_multicast_count(br, port, skb->protocol, igmp_type, + BR_MCAST_DIR_RX); netif_rx(skb); } } @@ -918,7 +929,7 @@ static void br_ip6_multicast_port_query_expired(unsigned long data) } #endif -void br_multicast_add_port(struct net_bridge_port *port) +int br_multicast_add_port(struct net_bridge_port *port) { port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; @@ -930,6 +941,11 @@ void br_multicast_add_port(struct net_bridge_port *port) setup_timer(&port->ip6_own_query.timer, br_ip6_multicast_port_query_expired, (unsigned long)port); #endif + port->mcast_stats = netdev_alloc_pcpu_stats(struct bridge_mcast_stats); + if (!port->mcast_stats) + return -ENOMEM; + + return 0; } void br_multicast_del_port(struct net_bridge_port *port) @@ -944,6 +960,7 @@ void br_multicast_del_port(struct net_bridge_port *port) br_multicast_del_pg(br, pg); spin_unlock_bh(&br->multicast_lock); del_timer_sync(&port->multicast_router_timer); + free_percpu(port->mcast_stats); } static void br_multicast_enable(struct bridge_mcast_own_query *query) @@ -1583,6 +1600,39 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br, } #endif +static void br_multicast_err_count(const struct net_bridge *br, + const struct net_bridge_port *p, + __be16 proto) +{ + struct bridge_mcast_stats __percpu *stats; + struct bridge_mcast_stats *pstats; + + if (!br->multicast_stats_enabled) + return; + + if (p) + stats = p->mcast_stats; + else + stats = br->mcast_stats; + if (WARN_ON(!stats)) + return; + + pstats = this_cpu_ptr(stats); + + u64_stats_update_begin(&pstats->syncp); + switch (proto) { + case htons(ETH_P_IP): + pstats->mstats.igmp_parse_errors++; + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + pstats->mstats.mld_parse_errors++; + break; +#endif + } + u64_stats_update_end(&pstats->syncp); +} + static int br_multicast_ipv4_rcv(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb, @@ -1599,11 +1649,12 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, BR_INPUT_SKB_CB(skb)->mrouters_only = 1; return 0; } else if (err < 0) { + br_multicast_err_count(br, port, skb->protocol); return err; } - BR_INPUT_SKB_CB(skb)->igmp = 1; ih = igmp_hdr(skb); + BR_INPUT_SKB_CB(skb)->igmp = ih->type; switch (ih->type) { case IGMP_HOST_MEMBERSHIP_REPORT: @@ -1625,6 +1676,9 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, if (skb_trimmed && skb_trimmed != skb) kfree_skb(skb_trimmed); + br_multicast_count(br, port, skb->protocol, BR_INPUT_SKB_CB(skb)->igmp, + BR_MCAST_DIR_RX); + return err; } @@ -1645,11 +1699,12 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, BR_INPUT_SKB_CB(skb)->mrouters_only = 1; return 0; } else if (err < 0) { + br_multicast_err_count(br, port, skb->protocol); return err; } - BR_INPUT_SKB_CB(skb)->igmp = 1; mld = (struct mld_msg *)skb_transport_header(skb); + BR_INPUT_SKB_CB(skb)->igmp = mld->mld_type; switch (mld->mld_type) { case ICMPV6_MGM_REPORT: @@ -1670,6 +1725,9 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, if (skb_trimmed && skb_trimmed != skb) kfree_skb(skb_trimmed); + br_multicast_count(br, port, skb->protocol, BR_INPUT_SKB_CB(skb)->igmp, + BR_MCAST_DIR_RX); + return err; } #endif @@ -1677,6 +1735,8 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb, u16 vid) { + int ret = 0; + BR_INPUT_SKB_CB(skb)->igmp = 0; BR_INPUT_SKB_CB(skb)->mrouters_only = 0; @@ -1685,14 +1745,16 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, switch (skb->protocol) { case htons(ETH_P_IP): - return br_multicast_ipv4_rcv(br, port, skb, vid); + ret = br_multicast_ipv4_rcv(br, port, skb, vid); + break; #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): - return br_multicast_ipv6_rcv(br, port, skb, vid); + ret = br_multicast_ipv6_rcv(br, port, skb, vid); + break; #endif } - return 0; + return ret; } static void br_multicast_query_expired(struct net_bridge *br, @@ -1831,6 +1893,8 @@ void br_multicast_dev_del(struct net_bridge *br) out: spin_unlock_bh(&br->multicast_lock); + + free_percpu(br->mcast_stats); } int br_multicast_set_router(struct net_bridge *br, unsigned long val) @@ -2185,3 +2249,128 @@ unlock: return ret; } EXPORT_SYMBOL_GPL(br_multicast_has_querier_adjacent); + +static void br_mcast_stats_add(struct bridge_mcast_stats __percpu *stats, + __be16 proto, u8 type, u8 dir) +{ + struct bridge_mcast_stats *pstats = this_cpu_ptr(stats); + + u64_stats_update_begin(&pstats->syncp); + switch (proto) { + case htons(ETH_P_IP): + switch (type) { + case IGMP_HOST_MEMBERSHIP_REPORT: + pstats->mstats.igmp_v1reports[dir]++; + break; + case IGMPV2_HOST_MEMBERSHIP_REPORT: + pstats->mstats.igmp_v2reports[dir]++; + break; + case IGMPV3_HOST_MEMBERSHIP_REPORT: + pstats->mstats.igmp_v3reports[dir]++; + break; + case IGMP_HOST_MEMBERSHIP_QUERY: + pstats->mstats.igmp_queries[dir]++; + break; + case IGMP_HOST_LEAVE_MESSAGE: + pstats->mstats.igmp_leaves[dir]++; + break; + } + break; +#if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + switch (type) { + case ICMPV6_MGM_REPORT: + pstats->mstats.mld_v1reports[dir]++; + break; + case ICMPV6_MLD2_REPORT: + pstats->mstats.mld_v2reports[dir]++; + break; + case ICMPV6_MGM_QUERY: + pstats->mstats.mld_queries[dir]++; + break; + case ICMPV6_MGM_REDUCTION: + pstats->mstats.mld_leaves[dir]++; + break; + } + break; +#endif /* CONFIG_IPV6 */ + } + u64_stats_update_end(&pstats->syncp); +} + +void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p, + __be16 proto, u8 type, u8 dir) +{ + struct bridge_mcast_stats __percpu *stats; + + /* if multicast_disabled is true then igmp type can't be set */ + if (!type || !br->multicast_stats_enabled) + return; + + if (p) + stats = p->mcast_stats; + else + stats = br->mcast_stats; + if (WARN_ON(!stats)) + return; + + br_mcast_stats_add(stats, proto, type, dir); +} + +int br_multicast_init_stats(struct net_bridge *br) +{ + br->mcast_stats = netdev_alloc_pcpu_stats(struct bridge_mcast_stats); + if (!br->mcast_stats) + return -ENOMEM; + + return 0; +} + +static void mcast_stats_add_dir(u64 *dst, u64 *src) +{ + dst[BR_MCAST_DIR_RX] += src[BR_MCAST_DIR_RX]; + dst[BR_MCAST_DIR_TX] += src[BR_MCAST_DIR_TX]; +} + +void br_multicast_get_stats(const struct net_bridge *br, + const struct net_bridge_port *p, + struct br_mcast_stats *dest) +{ + struct bridge_mcast_stats __percpu *stats; + struct br_mcast_stats tdst; + int i; + + memset(dest, 0, sizeof(*dest)); + if (p) + stats = p->mcast_stats; + else + stats = br->mcast_stats; + if (WARN_ON(!stats)) + return; + + memset(&tdst, 0, sizeof(tdst)); + for_each_possible_cpu(i) { + struct bridge_mcast_stats *cpu_stats = per_cpu_ptr(stats, i); + struct br_mcast_stats temp; + unsigned int start; + + do { + start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + memcpy(&temp, &cpu_stats->mstats, sizeof(temp)); + } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); + + mcast_stats_add_dir(tdst.igmp_queries, temp.igmp_queries); + mcast_stats_add_dir(tdst.igmp_leaves, temp.igmp_leaves); + mcast_stats_add_dir(tdst.igmp_v1reports, temp.igmp_v1reports); + mcast_stats_add_dir(tdst.igmp_v2reports, temp.igmp_v2reports); + mcast_stats_add_dir(tdst.igmp_v3reports, temp.igmp_v3reports); + tdst.igmp_parse_errors += temp.igmp_parse_errors; + + mcast_stats_add_dir(tdst.mld_queries, temp.mld_queries); + mcast_stats_add_dir(tdst.mld_leaves, temp.mld_leaves); + mcast_stats_add_dir(tdst.mld_v1reports, temp.mld_v1reports); + mcast_stats_add_dir(tdst.mld_v2reports, temp.mld_v2reports); + tdst.mld_parse_errors += temp.mld_parse_errors; + } + memcpy(dest, &tdst, sizeof(*dest)); +} diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 2d25979273a6..77e7f69bf80d 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -700,7 +700,7 @@ static int br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)) { - unsigned int mtu = ip_skb_dst_mtu(skb); + unsigned int mtu = ip_skb_dst_mtu(sk, skb); struct iphdr *iph = ip_hdr(skb); if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) || diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 85e89f693589..f2a29e467e78 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -851,6 +851,7 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = { [IFLA_BR_NF_CALL_ARPTABLES] = { .type = NLA_U8 }, [IFLA_BR_VLAN_DEFAULT_PVID] = { .type = NLA_U16 }, [IFLA_BR_VLAN_STATS_ENABLED] = { .type = NLA_U8 }, + [IFLA_BR_MCAST_STATS_ENABLED] = { .type = NLA_U8 }, }; static int br_changelink(struct net_device *brdev, struct nlattr *tb[], @@ -1055,6 +1056,13 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], br->multicast_startup_query_interval = clock_t_to_jiffies(val); } + + if (data[IFLA_BR_MCAST_STATS_ENABLED]) { + __u8 mcast_stats; + + mcast_stats = nla_get_u8(data[IFLA_BR_MCAST_STATS_ENABLED]); + br->multicast_stats_enabled = !!mcast_stats; + } #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (data[IFLA_BR_NF_CALL_IPTABLES]) { @@ -1110,6 +1118,7 @@ static size_t br_get_size(const struct net_device *brdev) nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_SNOOPING */ nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_QUERY_USE_IFADDR */ nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_QUERIER */ + nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_STATS_ENABLED */ nla_total_size(sizeof(u32)) + /* IFLA_BR_MCAST_HASH_ELASTICITY */ nla_total_size(sizeof(u32)) + /* IFLA_BR_MCAST_HASH_MAX */ nla_total_size(sizeof(u32)) + /* IFLA_BR_MCAST_LAST_MEMBER_CNT */ @@ -1187,6 +1196,8 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) nla_put_u8(skb, IFLA_BR_MCAST_QUERY_USE_IFADDR, br->multicast_query_use_ifaddr) || nla_put_u8(skb, IFLA_BR_MCAST_QUERIER, br->multicast_querier) || + nla_put_u8(skb, IFLA_BR_MCAST_STATS_ENABLED, + br->multicast_stats_enabled) || nla_put_u32(skb, IFLA_BR_MCAST_HASH_ELASTICITY, br->hash_elasticity) || nla_put_u32(skb, IFLA_BR_MCAST_HASH_MAX, br->hash_max) || @@ -1234,7 +1245,7 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) return 0; } -static size_t br_get_linkxstats_size(const struct net_device *dev) +static size_t bridge_get_linkxstats_size(const struct net_device *dev) { struct net_bridge *br = netdev_priv(dev); struct net_bridge_vlan_group *vg; @@ -1242,53 +1253,88 @@ static size_t br_get_linkxstats_size(const struct net_device *dev) int numvls = 0; vg = br_vlan_group(br); - if (!vg) - return 0; - - /* we need to count all, even placeholder entries */ - list_for_each_entry(v, &vg->vlan_list, vlist) - numvls++; + if (vg) { + /* we need to count all, even placeholder entries */ + list_for_each_entry(v, &vg->vlan_list, vlist) + numvls++; + } - /* account for the vlans and the link xstats type nest attribute */ return numvls * nla_total_size(sizeof(struct bridge_vlan_xstats)) + + nla_total_size(sizeof(struct br_mcast_stats)) + nla_total_size(0); } -static int br_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev, - int *prividx) +static size_t brport_get_linkxstats_size(const struct net_device *dev) +{ + return nla_total_size(sizeof(struct br_mcast_stats)) + + nla_total_size(0); +} + +static size_t br_get_linkxstats_size(const struct net_device *dev, int attr) +{ + size_t retsize = 0; + + switch (attr) { + case IFLA_STATS_LINK_XSTATS: + retsize = bridge_get_linkxstats_size(dev); + break; + case IFLA_STATS_LINK_XSTATS_SLAVE: + retsize = brport_get_linkxstats_size(dev); + break; + } + + return retsize; +} + +static int bridge_fill_linkxstats(struct sk_buff *skb, + const struct net_device *dev, + int *prividx) { struct net_bridge *br = netdev_priv(dev); + struct nlattr *nla __maybe_unused; struct net_bridge_vlan_group *vg; struct net_bridge_vlan *v; struct nlattr *nest; int vl_idx = 0; - vg = br_vlan_group(br); - if (!vg) - goto out; nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE); if (!nest) return -EMSGSIZE; - list_for_each_entry(v, &vg->vlan_list, vlist) { - struct bridge_vlan_xstats vxi; - struct br_vlan_stats stats; - if (++vl_idx < *prividx) - continue; - memset(&vxi, 0, sizeof(vxi)); - vxi.vid = v->vid; - br_vlan_get_stats(v, &stats); - vxi.rx_bytes = stats.rx_bytes; - vxi.rx_packets = stats.rx_packets; - vxi.tx_bytes = stats.tx_bytes; - vxi.tx_packets = stats.tx_packets; - - if (nla_put(skb, BRIDGE_XSTATS_VLAN, sizeof(vxi), &vxi)) + vg = br_vlan_group(br); + if (vg) { + list_for_each_entry(v, &vg->vlan_list, vlist) { + struct bridge_vlan_xstats vxi; + struct br_vlan_stats stats; + + if (++vl_idx < *prividx) + continue; + memset(&vxi, 0, sizeof(vxi)); + vxi.vid = v->vid; + br_vlan_get_stats(v, &stats); + vxi.rx_bytes = stats.rx_bytes; + vxi.rx_packets = stats.rx_packets; + vxi.tx_bytes = stats.tx_bytes; + vxi.tx_packets = stats.tx_packets; + + if (nla_put(skb, BRIDGE_XSTATS_VLAN, sizeof(vxi), &vxi)) + goto nla_put_failure; + } + } + +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + if (++vl_idx >= *prividx) { + nla = nla_reserve_64bit(skb, BRIDGE_XSTATS_MCAST, + sizeof(struct br_mcast_stats), + BRIDGE_XSTATS_PAD); + if (!nla) goto nla_put_failure; + br_multicast_get_stats(br, NULL, nla_data(nla)); } +#endif nla_nest_end(skb, nest); *prividx = 0; -out: + return 0; nla_put_failure: @@ -1298,6 +1344,52 @@ nla_put_failure: return -EMSGSIZE; } +static int brport_fill_linkxstats(struct sk_buff *skb, + const struct net_device *dev, + int *prividx) +{ + struct net_bridge_port *p = br_port_get_rtnl(dev); + struct nlattr *nla __maybe_unused; + struct nlattr *nest; + + if (!p) + return 0; + + nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE); + if (!nest) + return -EMSGSIZE; +#ifdef CONFIG_BRIDGE_IGMP_SNOOPING + nla = nla_reserve_64bit(skb, BRIDGE_XSTATS_MCAST, + sizeof(struct br_mcast_stats), + BRIDGE_XSTATS_PAD); + if (!nla) { + nla_nest_end(skb, nest); + return -EMSGSIZE; + } + br_multicast_get_stats(p->br, p, nla_data(nla)); +#endif + nla_nest_end(skb, nest); + + return 0; +} + +static int br_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev, + int *prividx, int attr) +{ + int ret = -EINVAL; + + switch (attr) { + case IFLA_STATS_LINK_XSTATS: + ret = bridge_fill_linkxstats(skb, dev, prividx); + break; + case IFLA_STATS_LINK_XSTATS_SLAVE: + ret = brport_fill_linkxstats(skb, dev, prividx); + break; + } + + return ret; +} + static struct rtnl_af_ops br_af_ops __read_mostly = { .family = AF_BRIDGE, .get_link_af_size = br_get_link_af_size_filtered, diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 52edecf3c294..4dc851166ad1 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -75,6 +75,12 @@ struct bridge_mcast_querier { struct br_ip addr; struct net_bridge_port __rcu *port; }; + +/* IGMP/MLD statistics */ +struct bridge_mcast_stats { + struct br_mcast_stats mstats; + struct u64_stats_sync syncp; +}; #endif struct br_vlan_stats { @@ -229,6 +235,7 @@ struct net_bridge_port struct bridge_mcast_own_query ip6_own_query; #endif /* IS_ENABLED(CONFIG_IPV6) */ unsigned char multicast_router; + struct bridge_mcast_stats __percpu *mcast_stats; struct timer_list multicast_router_timer; struct hlist_head mglist; struct hlist_node rlist; @@ -315,6 +322,7 @@ struct net_bridge u8 multicast_querier:1; u8 multicast_query_use_ifaddr:1; u8 has_ipv6_addr:1; + u8 multicast_stats_enabled:1; u32 hash_elasticity; u32 hash_max; @@ -337,6 +345,7 @@ struct net_bridge struct bridge_mcast_other_query ip4_other_query; struct bridge_mcast_own_query ip4_own_query; struct bridge_mcast_querier ip4_querier; + struct bridge_mcast_stats __percpu *mcast_stats; #if IS_ENABLED(CONFIG_IPV6) struct bridge_mcast_other_query ip6_other_query; struct bridge_mcast_own_query ip6_own_query; @@ -543,7 +552,7 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb, u16 vid); struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, struct sk_buff *skb, u16 vid); -void br_multicast_add_port(struct net_bridge_port *port); +int br_multicast_add_port(struct net_bridge_port *port); void br_multicast_del_port(struct net_bridge_port *port); void br_multicast_enable_port(struct net_bridge_port *port); void br_multicast_disable_port(struct net_bridge_port *port); @@ -576,6 +585,12 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, struct br_ip *group, int type, u8 flags); void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port, int type); +void br_multicast_count(struct net_bridge *br, const struct net_bridge_port *p, + __be16 proto, u8 type, u8 dir); +int br_multicast_init_stats(struct net_bridge *br); +void br_multicast_get_stats(const struct net_bridge *br, + const struct net_bridge_port *p, + struct br_mcast_stats *dest); #define mlock_dereference(X, br) \ rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) @@ -623,6 +638,11 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br, return false; } } + +static inline int br_multicast_igmp_type(const struct sk_buff *skb) +{ + return BR_INPUT_SKB_CB(skb)->igmp; +} #else static inline int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, @@ -638,8 +658,9 @@ static inline struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, return NULL; } -static inline void br_multicast_add_port(struct net_bridge_port *port) +static inline int br_multicast_add_port(struct net_bridge_port *port) { + return 0; } static inline void br_multicast_del_port(struct net_bridge_port *port) @@ -695,6 +716,22 @@ static inline void br_mdb_init(void) static inline void br_mdb_uninit(void) { } + +static inline void br_multicast_count(struct net_bridge *br, + const struct net_bridge_port *p, + __be16 proto, u8 type, u8 dir) +{ +} + +static inline int br_multicast_init_stats(struct net_bridge *br) +{ + return 0; +} + +static inline int br_multicast_igmp_type(const struct sk_buff *skb) +{ + return 0; +} #endif /* br_vlan.c */ diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index beb47071e38d..e120307c6e36 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -618,6 +618,30 @@ static ssize_t multicast_startup_query_interval_store( return store_bridge_parm(d, buf, len, set_startup_query_interval); } static DEVICE_ATTR_RW(multicast_startup_query_interval); + +static ssize_t multicast_stats_enabled_show(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + + return sprintf(buf, "%u\n", br->multicast_stats_enabled); +} + +static int set_stats_enabled(struct net_bridge *br, unsigned long val) +{ + br->multicast_stats_enabled = !!val; + return 0; +} + +static ssize_t multicast_stats_enabled_store(struct device *d, + struct device_attribute *attr, + const char *buf, + size_t len) +{ + return store_bridge_parm(d, buf, len, set_stats_enabled); +} +static DEVICE_ATTR_RW(multicast_stats_enabled); #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) static ssize_t nf_call_iptables_show( @@ -784,6 +808,7 @@ static struct attribute *bridge_attrs[] = { &dev_attr_multicast_query_interval.attr, &dev_attr_multicast_query_response_interval.attr, &dev_attr_multicast_startup_query_interval.attr, + &dev_attr_multicast_stats_enabled.attr, #endif #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) &dev_attr_nf_call_iptables.attr, diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c index 2a449b7ab8fa..5fc4affd9fdb 100644 --- a/net/bridge/netfilter/ebt_802_3.c +++ b/net/bridge/netfilter/ebt_802_3.c @@ -20,16 +20,16 @@ ebt_802_3_mt(const struct sk_buff *skb, struct xt_action_param *par) __be16 type = hdr->llc.ui.ctrl & IS_UI ? hdr->llc.ui.type : hdr->llc.ni.type; if (info->bitmask & EBT_802_3_SAP) { - if (FWINV(info->sap != hdr->llc.ui.ssap, EBT_802_3_SAP)) + if (NF_INVF(info, EBT_802_3_SAP, info->sap != hdr->llc.ui.ssap)) return false; - if (FWINV(info->sap != hdr->llc.ui.dsap, EBT_802_3_SAP)) + if (NF_INVF(info, EBT_802_3_SAP, info->sap != hdr->llc.ui.dsap)) return false; } if (info->bitmask & EBT_802_3_TYPE) { if (!(hdr->llc.ui.dsap == CHECK_TYPE && hdr->llc.ui.ssap == CHECK_TYPE)) return false; - if (FWINV(info->type != type, EBT_802_3_TYPE)) + if (NF_INVF(info, EBT_802_3_TYPE, info->type != type)) return false; } diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c index cd457b891b27..227142282b45 100644 --- a/net/bridge/netfilter/ebt_arp.c +++ b/net/bridge/netfilter/ebt_arp.c @@ -25,14 +25,14 @@ ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par) ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); if (ah == NULL) return false; - if (info->bitmask & EBT_ARP_OPCODE && FWINV(info->opcode != - ah->ar_op, EBT_ARP_OPCODE)) + if ((info->bitmask & EBT_ARP_OPCODE) && + NF_INVF(info, EBT_ARP_OPCODE, info->opcode != ah->ar_op)) return false; - if (info->bitmask & EBT_ARP_HTYPE && FWINV(info->htype != - ah->ar_hrd, EBT_ARP_HTYPE)) + if ((info->bitmask & EBT_ARP_HTYPE) && + NF_INVF(info, EBT_ARP_HTYPE, info->htype != ah->ar_hrd)) return false; - if (info->bitmask & EBT_ARP_PTYPE && FWINV(info->ptype != - ah->ar_pro, EBT_ARP_PTYPE)) + if ((info->bitmask & EBT_ARP_PTYPE) && + NF_INVF(info, EBT_ARP_PTYPE, info->ptype != ah->ar_pro)) return false; if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP | EBT_ARP_GRAT)) { @@ -51,21 +51,22 @@ ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par) sizeof(daddr), &daddr); if (dap == NULL) return false; - if (info->bitmask & EBT_ARP_SRC_IP && - FWINV(info->saddr != (*sap & info->smsk), EBT_ARP_SRC_IP)) + if ((info->bitmask & EBT_ARP_SRC_IP) && + NF_INVF(info, EBT_ARP_SRC_IP, + info->saddr != (*sap & info->smsk))) return false; - if (info->bitmask & EBT_ARP_DST_IP && - FWINV(info->daddr != (*dap & info->dmsk), EBT_ARP_DST_IP)) + if ((info->bitmask & EBT_ARP_DST_IP) && + NF_INVF(info, EBT_ARP_DST_IP, + info->daddr != (*dap & info->dmsk))) return false; - if (info->bitmask & EBT_ARP_GRAT && - FWINV(*dap != *sap, EBT_ARP_GRAT)) + if ((info->bitmask & EBT_ARP_GRAT) && + NF_INVF(info, EBT_ARP_GRAT, *dap != *sap)) return false; } if (info->bitmask & (EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC)) { const unsigned char *mp; unsigned char _mac[ETH_ALEN]; - uint8_t verdict, i; if (ah->ar_hln != ETH_ALEN || ah->ar_hrd != htons(ARPHRD_ETHER)) return false; @@ -74,11 +75,9 @@ ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par) sizeof(_mac), &_mac); if (mp == NULL) return false; - verdict = 0; - for (i = 0; i < 6; i++) - verdict |= (mp[i] ^ info->smaddr[i]) & - info->smmsk[i]; - if (FWINV(verdict != 0, EBT_ARP_SRC_MAC)) + if (NF_INVF(info, EBT_ARP_SRC_MAC, + !ether_addr_equal_masked(mp, info->smaddr, + info->smmsk))) return false; } @@ -88,11 +87,9 @@ ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par) sizeof(_mac), &_mac); if (mp == NULL) return false; - verdict = 0; - for (i = 0; i < 6; i++) - verdict |= (mp[i] ^ info->dmaddr[i]) & - info->dmmsk[i]; - if (FWINV(verdict != 0, EBT_ARP_DST_MAC)) + if (NF_INVF(info, EBT_ARP_DST_MAC, + !ether_addr_equal_masked(mp, info->dmaddr, + info->dmmsk))) return false; } } diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c index 23bca62d58d2..d06968bdf5ec 100644 --- a/net/bridge/netfilter/ebt_ip.c +++ b/net/bridge/netfilter/ebt_ip.c @@ -36,19 +36,19 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par) ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); if (ih == NULL) return false; - if (info->bitmask & EBT_IP_TOS && - FWINV(info->tos != ih->tos, EBT_IP_TOS)) + if ((info->bitmask & EBT_IP_TOS) && + NF_INVF(info, EBT_IP_TOS, info->tos != ih->tos)) return false; - if (info->bitmask & EBT_IP_SOURCE && - FWINV((ih->saddr & info->smsk) != - info->saddr, EBT_IP_SOURCE)) + if ((info->bitmask & EBT_IP_SOURCE) && + NF_INVF(info, EBT_IP_SOURCE, + (ih->saddr & info->smsk) != info->saddr)) return false; if ((info->bitmask & EBT_IP_DEST) && - FWINV((ih->daddr & info->dmsk) != - info->daddr, EBT_IP_DEST)) + NF_INVF(info, EBT_IP_DEST, + (ih->daddr & info->dmsk) != info->daddr)) return false; if (info->bitmask & EBT_IP_PROTO) { - if (FWINV(info->protocol != ih->protocol, EBT_IP_PROTO)) + if (NF_INVF(info, EBT_IP_PROTO, info->protocol != ih->protocol)) return false; if (!(info->bitmask & EBT_IP_DPORT) && !(info->bitmask & EBT_IP_SPORT)) @@ -61,16 +61,16 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par) return false; if (info->bitmask & EBT_IP_DPORT) { u32 dst = ntohs(pptr->dst); - if (FWINV(dst < info->dport[0] || - dst > info->dport[1], - EBT_IP_DPORT)) + if (NF_INVF(info, EBT_IP_DPORT, + dst < info->dport[0] || + dst > info->dport[1])) return false; } if (info->bitmask & EBT_IP_SPORT) { u32 src = ntohs(pptr->src); - if (FWINV(src < info->sport[0] || - src > info->sport[1], - EBT_IP_SPORT)) + if (NF_INVF(info, EBT_IP_SPORT, + src < info->sport[0] || + src > info->sport[1])) return false; } } diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c index 98de6e7fd86d..4617491be41e 100644 --- a/net/bridge/netfilter/ebt_ip6.c +++ b/net/bridge/netfilter/ebt_ip6.c @@ -45,15 +45,18 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h); if (ih6 == NULL) return false; - if (info->bitmask & EBT_IP6_TCLASS && - FWINV(info->tclass != ipv6_get_dsfield(ih6), EBT_IP6_TCLASS)) + if ((info->bitmask & EBT_IP6_TCLASS) && + NF_INVF(info, EBT_IP6_TCLASS, + info->tclass != ipv6_get_dsfield(ih6))) return false; - if ((info->bitmask & EBT_IP6_SOURCE && - FWINV(ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk, - &info->saddr), EBT_IP6_SOURCE)) || - (info->bitmask & EBT_IP6_DEST && - FWINV(ipv6_masked_addr_cmp(&ih6->daddr, &info->dmsk, - &info->daddr), EBT_IP6_DEST))) + if (((info->bitmask & EBT_IP6_SOURCE) && + NF_INVF(info, EBT_IP6_SOURCE, + ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk, + &info->saddr))) || + ((info->bitmask & EBT_IP6_DEST) && + NF_INVF(info, EBT_IP6_DEST, + ipv6_masked_addr_cmp(&ih6->daddr, &info->dmsk, + &info->daddr)))) return false; if (info->bitmask & EBT_IP6_PROTO) { uint8_t nexthdr = ih6->nexthdr; @@ -63,7 +66,7 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) offset_ph = ipv6_skip_exthdr(skb, sizeof(_ip6h), &nexthdr, &frag_off); if (offset_ph == -1) return false; - if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO)) + if (NF_INVF(info, EBT_IP6_PROTO, info->protocol != nexthdr)) return false; if (!(info->bitmask & (EBT_IP6_DPORT | EBT_IP6_SPORT | EBT_IP6_ICMP6))) @@ -76,22 +79,24 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) return false; if (info->bitmask & EBT_IP6_DPORT) { u16 dst = ntohs(pptr->tcpudphdr.dst); - if (FWINV(dst < info->dport[0] || - dst > info->dport[1], EBT_IP6_DPORT)) + if (NF_INVF(info, EBT_IP6_DPORT, + dst < info->dport[0] || + dst > info->dport[1])) return false; } if (info->bitmask & EBT_IP6_SPORT) { u16 src = ntohs(pptr->tcpudphdr.src); - if (FWINV(src < info->sport[0] || - src > info->sport[1], EBT_IP6_SPORT)) + if (NF_INVF(info, EBT_IP6_SPORT, + src < info->sport[0] || + src > info->sport[1])) return false; } if ((info->bitmask & EBT_IP6_ICMP6) && - FWINV(pptr->icmphdr.type < info->icmpv6_type[0] || - pptr->icmphdr.type > info->icmpv6_type[1] || - pptr->icmphdr.code < info->icmpv6_code[0] || - pptr->icmphdr.code > info->icmpv6_code[1], - EBT_IP6_ICMP6)) + NF_INVF(info, EBT_IP6_ICMP6, + pptr->icmphdr.type < info->icmpv6_type[0] || + pptr->icmphdr.type > info->icmpv6_type[1] || + pptr->icmphdr.code < info->icmpv6_code[0] || + pptr->icmphdr.code > info->icmpv6_code[1])) return false; } return true; diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c index 6b731e12ecfa..3140eb912d7e 100644 --- a/net/bridge/netfilter/ebt_stp.c +++ b/net/bridge/netfilter/ebt_stp.c @@ -17,24 +17,24 @@ #define BPDU_TYPE_TCN 0x80 struct stp_header { - uint8_t dsap; - uint8_t ssap; - uint8_t ctrl; - uint8_t pid; - uint8_t vers; - uint8_t type; + u8 dsap; + u8 ssap; + u8 ctrl; + u8 pid; + u8 vers; + u8 type; }; struct stp_config_pdu { - uint8_t flags; - uint8_t root[8]; - uint8_t root_cost[4]; - uint8_t sender[8]; - uint8_t port[2]; - uint8_t msg_age[2]; - uint8_t max_age[2]; - uint8_t hello_time[2]; - uint8_t forward_delay[2]; + u8 flags; + u8 root[8]; + u8 root_cost[4]; + u8 sender[8]; + u8 port[2]; + u8 msg_age[2]; + u8 max_age[2]; + u8 hello_time[2]; + u8 forward_delay[2]; }; #define NR16(p) (p[0] << 8 | p[1]) @@ -44,76 +44,73 @@ static bool ebt_filter_config(const struct ebt_stp_info *info, const struct stp_config_pdu *stpc) { const struct ebt_stp_config_info *c; - uint16_t v16; - uint32_t v32; - int verdict, i; + u16 v16; + u32 v32; c = &info->config; if ((info->bitmask & EBT_STP_FLAGS) && - FWINV(c->flags != stpc->flags, EBT_STP_FLAGS)) + NF_INVF(info, EBT_STP_FLAGS, c->flags != stpc->flags)) return false; if (info->bitmask & EBT_STP_ROOTPRIO) { v16 = NR16(stpc->root); - if (FWINV(v16 < c->root_priol || - v16 > c->root_priou, EBT_STP_ROOTPRIO)) + if (NF_INVF(info, EBT_STP_ROOTPRIO, + v16 < c->root_priol || v16 > c->root_priou)) return false; } if (info->bitmask & EBT_STP_ROOTADDR) { - verdict = 0; - for (i = 0; i < 6; i++) - verdict |= (stpc->root[2+i] ^ c->root_addr[i]) & - c->root_addrmsk[i]; - if (FWINV(verdict != 0, EBT_STP_ROOTADDR)) + if (NF_INVF(info, EBT_STP_ROOTADDR, + !ether_addr_equal_masked(&stpc->root[2], + c->root_addr, + c->root_addrmsk))) return false; } if (info->bitmask & EBT_STP_ROOTCOST) { v32 = NR32(stpc->root_cost); - if (FWINV(v32 < c->root_costl || - v32 > c->root_costu, EBT_STP_ROOTCOST)) + if (NF_INVF(info, EBT_STP_ROOTCOST, + v32 < c->root_costl || v32 > c->root_costu)) return false; } if (info->bitmask & EBT_STP_SENDERPRIO) { v16 = NR16(stpc->sender); - if (FWINV(v16 < c->sender_priol || - v16 > c->sender_priou, EBT_STP_SENDERPRIO)) + if (NF_INVF(info, EBT_STP_SENDERPRIO, + v16 < c->sender_priol || v16 > c->sender_priou)) return false; } if (info->bitmask & EBT_STP_SENDERADDR) { - verdict = 0; - for (i = 0; i < 6; i++) - verdict |= (stpc->sender[2+i] ^ c->sender_addr[i]) & - c->sender_addrmsk[i]; - if (FWINV(verdict != 0, EBT_STP_SENDERADDR)) + if (NF_INVF(info, EBT_STP_SENDERADDR, + !ether_addr_equal_masked(&stpc->sender[2], + c->sender_addr, + c->sender_addrmsk))) return false; } if (info->bitmask & EBT_STP_PORT) { v16 = NR16(stpc->port); - if (FWINV(v16 < c->portl || - v16 > c->portu, EBT_STP_PORT)) + if (NF_INVF(info, EBT_STP_PORT, + v16 < c->portl || v16 > c->portu)) return false; } if (info->bitmask & EBT_STP_MSGAGE) { v16 = NR16(stpc->msg_age); - if (FWINV(v16 < c->msg_agel || - v16 > c->msg_ageu, EBT_STP_MSGAGE)) + if (NF_INVF(info, EBT_STP_MSGAGE, + v16 < c->msg_agel || v16 > c->msg_ageu)) return false; } if (info->bitmask & EBT_STP_MAXAGE) { v16 = NR16(stpc->max_age); - if (FWINV(v16 < c->max_agel || - v16 > c->max_ageu, EBT_STP_MAXAGE)) + if (NF_INVF(info, EBT_STP_MAXAGE, + v16 < c->max_agel || v16 > c->max_ageu)) return false; } if (info->bitmask & EBT_STP_HELLOTIME) { v16 = NR16(stpc->hello_time); - if (FWINV(v16 < c->hello_timel || - v16 > c->hello_timeu, EBT_STP_HELLOTIME)) + if (NF_INVF(info, EBT_STP_HELLOTIME, + v16 < c->hello_timel || v16 > c->hello_timeu)) return false; } if (info->bitmask & EBT_STP_FWDD) { v16 = NR16(stpc->forward_delay); - if (FWINV(v16 < c->forward_delayl || - v16 > c->forward_delayu, EBT_STP_FWDD)) + if (NF_INVF(info, EBT_STP_FWDD, + v16 < c->forward_delayl || v16 > c->forward_delayu)) return false; } return true; @@ -125,7 +122,7 @@ ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par) const struct ebt_stp_info *info = par->matchinfo; const struct stp_header *sp; struct stp_header _stph; - const uint8_t header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00}; + const u8 header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00}; sp = skb_header_pointer(skb, 0, sizeof(_stph), &_stph); if (sp == NULL) @@ -135,8 +132,8 @@ ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par) if (memcmp(sp, header, sizeof(header))) return false; - if (info->bitmask & EBT_STP_TYPE && - FWINV(info->type != sp->type, EBT_STP_TYPE)) + if ((info->bitmask & EBT_STP_TYPE) && + NF_INVF(info, EBT_STP_TYPE, info->type != sp->type)) return false; if (sp->type == BPDU_TYPE_CONFIG && @@ -156,8 +153,8 @@ ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par) static int ebt_stp_mt_check(const struct xt_mtchk_param *par) { const struct ebt_stp_info *info = par->matchinfo; - const uint8_t bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00}; - const uint8_t msk[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + const u8 bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00}; + const u8 msk[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; const struct ebt_entry *e = par->entryinfo; if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK || diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 5a61f35412a0..cceac5bb658f 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -121,7 +121,6 @@ ebt_dev_check(const char *entry, const struct net_device *device) return devname[i] != entry[i] && entry[i] != 1; } -#define FWINV2(bool, invflg) ((bool) ^ !!(e->invflags & invflg)) /* process standard matches */ static inline int ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, @@ -130,7 +129,6 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, const struct ethhdr *h = eth_hdr(skb); const struct net_bridge_port *p; __be16 ethproto; - int verdict, i; if (skb_vlan_tag_present(skb)) ethproto = htons(ETH_P_8021Q); @@ -138,38 +136,36 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, ethproto = h->h_proto; if (e->bitmask & EBT_802_3) { - if (FWINV2(eth_proto_is_802_3(ethproto), EBT_IPROTO)) + if (NF_INVF(e, EBT_IPROTO, eth_proto_is_802_3(ethproto))) return 1; } else if (!(e->bitmask & EBT_NOPROTO) && - FWINV2(e->ethproto != ethproto, EBT_IPROTO)) + NF_INVF(e, EBT_IPROTO, e->ethproto != ethproto)) return 1; - if (FWINV2(ebt_dev_check(e->in, in), EBT_IIN)) + if (NF_INVF(e, EBT_IIN, ebt_dev_check(e->in, in))) return 1; - if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT)) + if (NF_INVF(e, EBT_IOUT, ebt_dev_check(e->out, out))) return 1; /* rcu_read_lock()ed by nf_hook_slow */ if (in && (p = br_port_get_rcu(in)) != NULL && - FWINV2(ebt_dev_check(e->logical_in, p->br->dev), EBT_ILOGICALIN)) + NF_INVF(e, EBT_ILOGICALIN, + ebt_dev_check(e->logical_in, p->br->dev))) return 1; if (out && (p = br_port_get_rcu(out)) != NULL && - FWINV2(ebt_dev_check(e->logical_out, p->br->dev), EBT_ILOGICALOUT)) + NF_INVF(e, EBT_ILOGICALOUT, + ebt_dev_check(e->logical_out, p->br->dev))) return 1; if (e->bitmask & EBT_SOURCEMAC) { - verdict = 0; - for (i = 0; i < 6; i++) - verdict |= (h->h_source[i] ^ e->sourcemac[i]) & - e->sourcemsk[i]; - if (FWINV2(verdict != 0, EBT_ISOURCE)) + if (NF_INVF(e, EBT_ISOURCE, + !ether_addr_equal_masked(h->h_source, e->sourcemac, + e->sourcemsk))) return 1; } if (e->bitmask & EBT_DESTMAC) { - verdict = 0; - for (i = 0; i < 6; i++) - verdict |= (h->h_dest[i] ^ e->destmac[i]) & - e->destmsk[i]; - if (FWINV2(verdict != 0, EBT_IDEST)) + if (NF_INVF(e, EBT_IDEST, + !ether_addr_equal_masked(h->h_dest, e->destmac, + e->destmsk))) return 1; } return 0; diff --git a/net/core/dev.c b/net/core/dev.c index aba10d2a8bc3..b92d63bfde7a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5445,6 +5445,52 @@ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter) EXPORT_SYMBOL(netdev_lower_get_next); /** + * netdev_all_lower_get_next - Get the next device from all lower neighbour list + * @dev: device + * @iter: list_head ** of the current position + * + * Gets the next netdev_adjacent from the dev's all lower neighbour + * list, starting from iter position. The caller must hold RTNL lock or + * its own locking that guarantees that the neighbour all lower + * list will remain unchanged. + */ +struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct list_head **iter) +{ + struct netdev_adjacent *lower; + + lower = list_entry(*iter, struct netdev_adjacent, list); + + if (&lower->list == &dev->all_adj_list.lower) + return NULL; + + *iter = lower->list.next; + + return lower->dev; +} +EXPORT_SYMBOL(netdev_all_lower_get_next); + +/** + * netdev_all_lower_get_next_rcu - Get the next device from all + * lower neighbour list, RCU variant + * @dev: device + * @iter: list_head ** of the current position + * + * Gets the next netdev_adjacent from the dev's all lower neighbour + * list, starting from iter position. The caller must hold RCU read lock. + */ +struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, + struct list_head **iter) +{ + struct netdev_adjacent *lower; + + lower = list_first_or_null_rcu(&dev->all_adj_list.lower, + struct netdev_adjacent, list); + + return lower ? lower->dev : NULL; +} +EXPORT_SYMBOL(netdev_all_lower_get_next_rcu); + +/** * netdev_lower_get_first_private_rcu - Get the first ->private from the * lower neighbour list, RCU * variant @@ -6041,6 +6087,50 @@ void netdev_lower_state_changed(struct net_device *lower_dev, } EXPORT_SYMBOL(netdev_lower_state_changed); +int netdev_default_l2upper_neigh_construct(struct net_device *dev, + struct neighbour *n) +{ + struct net_device *lower_dev, *stop_dev; + struct list_head *iter; + int err; + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + if (!lower_dev->netdev_ops->ndo_neigh_construct) + continue; + err = lower_dev->netdev_ops->ndo_neigh_construct(lower_dev, n); + if (err) { + stop_dev = lower_dev; + goto rollback; + } + } + return 0; + +rollback: + netdev_for_each_lower_dev(dev, lower_dev, iter) { + if (lower_dev == stop_dev) + break; + if (!lower_dev->netdev_ops->ndo_neigh_destroy) + continue; + lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n); + } + return err; +} +EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_construct); + +void netdev_default_l2upper_neigh_destroy(struct net_device *dev, + struct neighbour *n) +{ + struct net_device *lower_dev; + struct list_head *iter; + + netdev_for_each_lower_dev(dev, lower_dev, iter) { + if (!lower_dev->netdev_ops->ndo_neigh_destroy) + continue; + lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n); + } +} +EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_destroy); + static void dev_change_rx_flags(struct net_device *dev, int flags) { const struct net_device_ops *ops = dev->netdev_ops; diff --git a/net/core/devlink.c b/net/core/devlink.c index 933e8d4d3968..b2e592a198c0 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1394,6 +1394,78 @@ static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb, return -EOPNOTSUPP; } +static int devlink_eswitch_fill(struct sk_buff *msg, struct devlink *devlink, + enum devlink_command cmd, u32 portid, + u32 seq, int flags, u16 mode) +{ + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + + if (nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int devlink_nl_cmd_eswitch_mode_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + const struct devlink_ops *ops = devlink->ops; + struct sk_buff *msg; + u16 mode; + int err; + + if (!ops || !ops->eswitch_mode_get) + return -EOPNOTSUPP; + + err = ops->eswitch_mode_get(devlink, &mode); + if (err) + return err; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_MODE_GET, + info->snd_portid, info->snd_seq, 0, mode); + + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int devlink_nl_cmd_eswitch_mode_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + const struct devlink_ops *ops = devlink->ops; + u16 mode; + + if (!info->attrs[DEVLINK_ATTR_ESWITCH_MODE]) + return -EINVAL; + + mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]); + + if (ops && ops->eswitch_mode_set) + return ops->eswitch_mode_set(devlink, mode); + return -EOPNOTSUPP; +} + static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, @@ -1407,6 +1479,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = { .type = NLA_U8 }, [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 }, [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 }, + [DEVLINK_ATTR_ESWITCH_MODE] = { .type = NLA_U16 }, }; static const struct genl_ops devlink_nl_ops[] = { @@ -1525,6 +1598,20 @@ static const struct genl_ops devlink_nl_ops[] = { DEVLINK_NL_FLAG_NEED_SB | DEVLINK_NL_FLAG_LOCK_PORTS, }, + { + .cmd = DEVLINK_CMD_ESWITCH_MODE_GET, + .doit = devlink_nl_cmd_eswitch_mode_get_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_ESWITCH_MODE_SET, + .doit = devlink_nl_cmd_eswitch_mode_set_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, }; /** diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 98298b11f534..be4629c344a6 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -269,6 +269,49 @@ errout: return err; } +static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh, + struct nlattr **tb, struct fib_rule *rule) +{ + struct fib_rule *r; + + list_for_each_entry(r, &ops->rules_list, list) { + if (r->action != rule->action) + continue; + + if (r->table != rule->table) + continue; + + if (r->pref != rule->pref) + continue; + + if (memcmp(r->iifname, rule->iifname, IFNAMSIZ)) + continue; + + if (memcmp(r->oifname, rule->oifname, IFNAMSIZ)) + continue; + + if (r->mark != rule->mark) + continue; + + if (r->mark_mask != rule->mark_mask) + continue; + + if (r->tun_id != rule->tun_id) + continue; + + if (r->fr_net != rule->fr_net) + continue; + + if (r->l3mdev != rule->l3mdev) + continue; + + if (!ops->compare(r, frh, tb)) + continue; + return 1; + } + return 0; +} + int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); @@ -386,6 +429,12 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh) if (rule->l3mdev && rule->table) goto errout_free; + if ((nlh->nlmsg_flags & NLM_F_EXCL) && + rule_exists(ops, frh, tb, rule)) { + err = -EEXIST; + goto errout_free; + } + err = ops->configure(rule, skb, frh, tb); if (err < 0) goto errout_free; diff --git a/net/core/filter.c b/net/core/filter.c index cb9fc16cac46..10c4a2f9e8bb 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -150,6 +150,12 @@ static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) return raw_smp_processor_id(); } +static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = { + .func = __get_raw_cpu_id, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg, struct bpf_insn *insn_buf) { @@ -1295,21 +1301,10 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk) static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk) { - struct bpf_prog *prog; - if (sock_flag(sk, SOCK_FILTER_LOCKED)) return ERR_PTR(-EPERM); - prog = bpf_prog_get(ufd); - if (IS_ERR(prog)) - return prog; - - if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) { - bpf_prog_put(prog); - return ERR_PTR(-EINVAL); - } - - return prog; + return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER); } int sk_attach_bpf(u32 ufd, struct sock *sk) @@ -1734,6 +1729,23 @@ static const struct bpf_func_proto bpf_get_route_realm_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +static u64 bpf_get_hash_recalc(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + /* If skb_clear_hash() was called due to mangling, we can + * trigger SW recalculation here. Later access to hash + * can then use the inline skb->hash via context directly + * instead of calling this helper again. + */ + return skb_get_hash((struct sk_buff *) (unsigned long) r1); +} + +static const struct bpf_func_proto bpf_get_hash_recalc_proto = { + .func = bpf_get_hash_recalc, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5) { struct sk_buff *skb = (struct sk_buff *) (long) r1; @@ -1777,6 +1789,224 @@ const struct bpf_func_proto bpf_skb_vlan_pop_proto = { }; EXPORT_SYMBOL_GPL(bpf_skb_vlan_pop_proto); +static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len) +{ + /* Caller already did skb_cow() with len as headroom, + * so no need to do it here. + */ + skb_push(skb, len); + memmove(skb->data, skb->data + len, off); + memset(skb->data + off, 0, len); + + /* No skb_postpush_rcsum(skb, skb->data + off, len) + * needed here as it does not change the skb->csum + * result for checksum complete when summing over + * zeroed blocks. + */ + return 0; +} + +static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len) +{ + /* skb_ensure_writable() is not needed here, as we're + * already working on an uncloned skb. + */ + if (unlikely(!pskb_may_pull(skb, off + len))) + return -ENOMEM; + + skb_postpull_rcsum(skb, skb->data + off, len); + memmove(skb->data + len, skb->data, off); + __skb_pull(skb, len); + + return 0; +} + +static int bpf_skb_net_hdr_push(struct sk_buff *skb, u32 off, u32 len) +{ + bool trans_same = skb->transport_header == skb->network_header; + int ret; + + /* There's no need for __skb_push()/__skb_pull() pair to + * get to the start of the mac header as we're guaranteed + * to always start from here under eBPF. + */ + ret = bpf_skb_generic_push(skb, off, len); + if (likely(!ret)) { + skb->mac_header -= len; + skb->network_header -= len; + if (trans_same) + skb->transport_header = skb->network_header; + } + + return ret; +} + +static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len) +{ + bool trans_same = skb->transport_header == skb->network_header; + int ret; + + /* Same here, __skb_push()/__skb_pull() pair not needed. */ + ret = bpf_skb_generic_pop(skb, off, len); + if (likely(!ret)) { + skb->mac_header += len; + skb->network_header += len; + if (trans_same) + skb->transport_header = skb->network_header; + } + + return ret; +} + +static int bpf_skb_proto_4_to_6(struct sk_buff *skb) +{ + const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr); + u32 off = skb->network_header - skb->mac_header; + int ret; + + ret = skb_cow(skb, len_diff); + if (unlikely(ret < 0)) + return ret; + + ret = bpf_skb_net_hdr_push(skb, off, len_diff); + if (unlikely(ret < 0)) + return ret; + + if (skb_is_gso(skb)) { + /* SKB_GSO_UDP stays as is. SKB_GSO_TCPV4 needs to + * be changed into SKB_GSO_TCPV6. + */ + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { + skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4; + skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6; + } + + /* Due to IPv6 header, MSS needs to be downgraded. */ + skb_shinfo(skb)->gso_size -= len_diff; + /* Header must be checked, and gso_segs recomputed. */ + skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; + skb_shinfo(skb)->gso_segs = 0; + } + + skb->protocol = htons(ETH_P_IPV6); + skb_clear_hash(skb); + + return 0; +} + +static int bpf_skb_proto_6_to_4(struct sk_buff *skb) +{ + const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr); + u32 off = skb->network_header - skb->mac_header; + int ret; + + ret = skb_unclone(skb, GFP_ATOMIC); + if (unlikely(ret < 0)) + return ret; + + ret = bpf_skb_net_hdr_pop(skb, off, len_diff); + if (unlikely(ret < 0)) + return ret; + + if (skb_is_gso(skb)) { + /* SKB_GSO_UDP stays as is. SKB_GSO_TCPV6 needs to + * be changed into SKB_GSO_TCPV4. + */ + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) { + skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6; + skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; + } + + /* Due to IPv4 header, MSS can be upgraded. */ + skb_shinfo(skb)->gso_size += len_diff; + /* Header must be checked, and gso_segs recomputed. */ + skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; + skb_shinfo(skb)->gso_segs = 0; + } + + skb->protocol = htons(ETH_P_IP); + skb_clear_hash(skb); + + return 0; +} + +static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto) +{ + __be16 from_proto = skb->protocol; + + if (from_proto == htons(ETH_P_IP) && + to_proto == htons(ETH_P_IPV6)) + return bpf_skb_proto_4_to_6(skb); + + if (from_proto == htons(ETH_P_IPV6) && + to_proto == htons(ETH_P_IP)) + return bpf_skb_proto_6_to_4(skb); + + return -ENOTSUPP; +} + +static u64 bpf_skb_change_proto(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5) +{ + struct sk_buff *skb = (struct sk_buff *) (long) r1; + __be16 proto = (__force __be16) r2; + int ret; + + if (unlikely(flags)) + return -EINVAL; + + /* General idea is that this helper does the basic groundwork + * needed for changing the protocol, and eBPF program fills the + * rest through bpf_skb_store_bytes(), bpf_lX_csum_replace() + * and other helpers, rather than passing a raw buffer here. + * + * The rationale is to keep this minimal and without a need to + * deal with raw packet data. F.e. even if we would pass buffers + * here, the program still needs to call the bpf_lX_csum_replace() + * helpers anyway. Plus, this way we keep also separation of + * concerns, since f.e. bpf_skb_store_bytes() should only take + * care of stores. + * + * Currently, additional options and extension header space are + * not supported, but flags register is reserved so we can adapt + * that. For offloads, we mark packet as dodgy, so that headers + * need to be verified first. + */ + ret = bpf_skb_proto_xlat(skb, proto); + bpf_compute_data_end(skb); + return ret; +} + +static const struct bpf_func_proto bpf_skb_change_proto_proto = { + .func = bpf_skb_change_proto, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, +}; + +static u64 bpf_skb_change_type(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct sk_buff *skb = (struct sk_buff *) (long) r1; + u32 pkt_type = r2; + + /* We only allow a restricted subset to be changed for now. */ + if (unlikely(skb->pkt_type > PACKET_OTHERHOST || + pkt_type > PACKET_OTHERHOST)) + return -EINVAL; + + skb->pkt_type = pkt_type; + return 0; +} + +static const struct bpf_func_proto bpf_skb_change_type_proto = { + .func = bpf_skb_change_type, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + bool bpf_helper_changes_skb_data(void *func) { if (func == bpf_skb_vlan_push) @@ -1785,6 +2015,8 @@ bool bpf_helper_changes_skb_data(void *func) return true; if (func == bpf_skb_store_bytes) return true; + if (func == bpf_skb_change_proto) + return true; if (func == bpf_l3_csum_replace) return true; if (func == bpf_l4_csum_replace) @@ -2024,6 +2256,40 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which) } } +#ifdef CONFIG_SOCK_CGROUP_DATA +static u64 bpf_skb_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct sk_buff *skb = (struct sk_buff *)(long)r1; + struct bpf_map *map = (struct bpf_map *)(long)r2; + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct cgroup *cgrp; + struct sock *sk; + u32 i = (u32)r3; + + sk = skb->sk; + if (!sk || !sk_fullsock(sk)) + return -ENOENT; + + if (unlikely(i >= array->map.max_entries)) + return -E2BIG; + + cgrp = READ_ONCE(array->ptrs[i]); + if (unlikely(!cgrp)) + return -EAGAIN; + + return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), cgrp); +} + +static const struct bpf_func_proto bpf_skb_in_cgroup_proto = { + .func = bpf_skb_in_cgroup, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, +}; +#endif + static const struct bpf_func_proto * sk_filter_func_proto(enum bpf_func_id func_id) { @@ -2037,7 +2303,7 @@ sk_filter_func_proto(enum bpf_func_id func_id) case BPF_FUNC_get_prandom_u32: return &bpf_get_prandom_u32_proto; case BPF_FUNC_get_smp_processor_id: - return &bpf_get_smp_processor_id_proto; + return &bpf_get_raw_smp_processor_id_proto; case BPF_FUNC_tail_call: return &bpf_tail_call_proto; case BPF_FUNC_ktime_get_ns: @@ -2072,6 +2338,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_skb_vlan_push_proto; case BPF_FUNC_skb_vlan_pop: return &bpf_skb_vlan_pop_proto; + case BPF_FUNC_skb_change_proto: + return &bpf_skb_change_proto_proto; + case BPF_FUNC_skb_change_type: + return &bpf_skb_change_type_proto; case BPF_FUNC_skb_get_tunnel_key: return &bpf_skb_get_tunnel_key_proto; case BPF_FUNC_skb_set_tunnel_key: @@ -2084,8 +2354,16 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_redirect_proto; case BPF_FUNC_get_route_realm: return &bpf_get_route_realm_proto; + case BPF_FUNC_get_hash_recalc: + return &bpf_get_hash_recalc_proto; case BPF_FUNC_perf_event_output: return bpf_get_event_output_proto(); + case BPF_FUNC_get_smp_processor_id: + return &bpf_get_smp_processor_id_proto; +#ifdef CONFIG_SOCK_CGROUP_DATA + case BPF_FUNC_skb_in_cgroup: + return &bpf_skb_in_cgroup_proto; +#endif default: return sk_filter_func_proto(func_id); } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index a669dea146c6..61ad43f61c5e 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -651,6 +651,23 @@ void make_flow_keys_digest(struct flow_keys_digest *digest, } EXPORT_SYMBOL(make_flow_keys_digest); +static struct flow_dissector flow_keys_dissector_symmetric __read_mostly; + +u32 __skb_get_hash_symmetric(struct sk_buff *skb) +{ + struct flow_keys keys; + + __flow_hash_secret_init(); + + memset(&keys, 0, sizeof(keys)); + __skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys, + NULL, 0, 0, 0, + FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); + + return __flow_hash_from_keys(&keys, hashrnd); +} +EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric); + /** * __skb_get_hash: calculate a flow hash * @skb: sk_buff to calculate flow hash from @@ -868,6 +885,29 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = { }, }; +static const struct flow_dissector_key flow_keys_dissector_symmetric_keys[] = { + { + .key_id = FLOW_DISSECTOR_KEY_CONTROL, + .offset = offsetof(struct flow_keys, control), + }, + { + .key_id = FLOW_DISSECTOR_KEY_BASIC, + .offset = offsetof(struct flow_keys, basic), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, + .offset = offsetof(struct flow_keys, addrs.v4addrs), + }, + { + .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, + .offset = offsetof(struct flow_keys, addrs.v6addrs), + }, + { + .key_id = FLOW_DISSECTOR_KEY_PORTS, + .offset = offsetof(struct flow_keys, ports), + }, +}; + static const struct flow_dissector_key flow_keys_buf_dissector_keys[] = { { .key_id = FLOW_DISSECTOR_KEY_CONTROL, @@ -889,6 +929,9 @@ static int __init init_default_flow_dissectors(void) skb_flow_dissector_init(&flow_keys_dissector, flow_keys_dissector_keys, ARRAY_SIZE(flow_keys_dissector_keys)); + skb_flow_dissector_init(&flow_keys_dissector_symmetric, + flow_keys_dissector_symmetric_keys, + ARRAY_SIZE(flow_keys_dissector_symmetric_keys)); skb_flow_dissector_init(&flow_keys_buf_dissector, flow_keys_buf_dissector_keys, ARRAY_SIZE(flow_keys_buf_dissector_keys)); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 510cd62fcb99..5cdc62a8eb84 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -473,7 +473,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, } if (dev->netdev_ops->ndo_neigh_construct) { - error = dev->netdev_ops->ndo_neigh_construct(n); + error = dev->netdev_ops->ndo_neigh_construct(dev, n); if (error < 0) { rc = ERR_PTR(error); goto out_neigh_release; @@ -701,7 +701,7 @@ void neigh_destroy(struct neighbour *neigh) neigh->arp_queue_len_bytes = 0; if (dev->netdev_ops->ndo_neigh_destroy) - dev->netdev_ops->ndo_neigh_destroy(neigh); + dev->netdev_ops->ndo_neigh_destroy(dev, neigh); dev_put(dev); neigh_parms_put(neigh->parms); @@ -2047,6 +2047,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) case NDTPA_DELAY_PROBE_TIME: NEIGH_VAR_SET(p, DELAY_PROBE_TIME, nla_get_msecs(tbp[i])); + call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); break; case NDTPA_RETRANS_TIME: NEIGH_VAR_SET(p, RETRANS_TIME, @@ -2930,6 +2931,7 @@ static void neigh_proc_update(struct ctl_table *ctl, int write) return; set_bit(index, p->data_state); + call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); if (!dev) /* NULL dev means this is default value */ neigh_copy_dflt_parms(net, p, index); } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 7a0b616557ab..6e4f34721080 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -322,7 +322,20 @@ NETDEVICE_SHOW_RW(flags, fmt_hex); static int change_tx_queue_len(struct net_device *dev, unsigned long new_len) { - dev->tx_queue_len = new_len; + int res, orig_len = dev->tx_queue_len; + + if (new_len != orig_len) { + dev->tx_queue_len = new_len; + res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev); + res = notifier_to_errno(res); + if (res) { + netdev_err(dev, + "refused to change device tx_queue_len\n"); + dev->tx_queue_len = orig_len; + return -EFAULT; + } + } + return 0; } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index f74ab9c3b38f..bbd118b19aef 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -213,6 +213,7 @@ /* Xmit modes */ #define M_START_XMIT 0 /* Default normal TX */ #define M_NETIF_RECEIVE 1 /* Inject packets into stack */ +#define M_QUEUE_XMIT 2 /* Inject packet into qdisc */ /* If lock -- protects updating of if_list */ #define if_lock(t) spin_lock(&(t->if_lock)); @@ -626,6 +627,8 @@ static int pktgen_if_show(struct seq_file *seq, void *v) if (pkt_dev->xmit_mode == M_NETIF_RECEIVE) seq_puts(seq, " xmit_mode: netif_receive\n"); + else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) + seq_puts(seq, " xmit_mode: xmit_queue\n"); seq_puts(seq, " Flags: "); @@ -1142,8 +1145,10 @@ static ssize_t pktgen_if_write(struct file *file, return len; i += len; - if ((value > 1) && (pkt_dev->xmit_mode == M_START_XMIT) && - (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))) + if ((value > 1) && + ((pkt_dev->xmit_mode == M_QUEUE_XMIT) || + ((pkt_dev->xmit_mode == M_START_XMIT) && + (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))))) return -ENOTSUPP; pkt_dev->burst = value < 1 ? 1 : value; sprintf(pg_result, "OK: burst=%d", pkt_dev->burst); @@ -1198,6 +1203,9 @@ static ssize_t pktgen_if_write(struct file *file, * at module loading time */ pkt_dev->clone_skb = 0; + } else if (strcmp(f, "queue_xmit") == 0) { + pkt_dev->xmit_mode = M_QUEUE_XMIT; + pkt_dev->last_ok = 1; } else { sprintf(pg_result, "xmit_mode -:%s:- unknown\nAvailable modes: %s", @@ -3434,6 +3442,36 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) #endif } while (--burst > 0); goto out; /* Skips xmit_mode M_START_XMIT */ + } else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) { + local_bh_disable(); + atomic_inc(&pkt_dev->skb->users); + + ret = dev_queue_xmit(pkt_dev->skb); + switch (ret) { + case NET_XMIT_SUCCESS: + pkt_dev->sofar++; + pkt_dev->seq_num++; + pkt_dev->tx_bytes += pkt_dev->last_pkt_size; + break; + case NET_XMIT_DROP: + case NET_XMIT_CN: + /* These are all valid return codes for a qdisc but + * indicate packets are being dropped or will likely + * be dropped soon. + */ + case NETDEV_TX_BUSY: + /* qdisc may call dev_hard_start_xmit directly in cases + * where no queues exist e.g. loopback device, virtual + * devices, etc. In this case we need to handle + * NETDEV_TX_ codes. + */ + default: + pkt_dev->errors++; + net_info_ratelimited("%s xmit error: %d\n", + pkt_dev->odevname, ret); + break; + } + goto out; } txq = skb_get_tx_queue(odev, pkt_dev->skb); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index eb49ca24274a..a9e3805af739 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1927,11 +1927,19 @@ static int do_setlink(const struct sk_buff *skb, if (tb[IFLA_TXQLEN]) { unsigned long value = nla_get_u32(tb[IFLA_TXQLEN]); - - if (dev->tx_queue_len ^ value) + unsigned long orig_len = dev->tx_queue_len; + + if (dev->tx_queue_len ^ value) { + dev->tx_queue_len = value; + err = call_netdevice_notifiers( + NETDEV_CHANGE_TX_QUEUE_LEN, dev); + err = notifier_to_errno(err); + if (err) { + dev->tx_queue_len = orig_len; + goto errout; + } status |= DO_SETLINK_NOTIFY; - - dev->tx_queue_len = value; + } } if (tb[IFLA_OPERSTATE]) @@ -3519,7 +3527,32 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, if (!attr) goto nla_put_failure; - err = ops->fill_linkxstats(skb, dev, prividx); + err = ops->fill_linkxstats(skb, dev, prividx, *idxattr); + nla_nest_end(skb, attr); + if (err) + goto nla_put_failure; + *idxattr = 0; + } + } + + if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS_SLAVE, + *idxattr)) { + const struct rtnl_link_ops *ops = NULL; + const struct net_device *master; + + master = netdev_master_upper_dev_get(dev); + if (master) + ops = master->rtnl_link_ops; + if (ops && ops->fill_linkxstats) { + int err; + + *idxattr = IFLA_STATS_LINK_XSTATS_SLAVE; + attr = nla_nest_start(skb, + IFLA_STATS_LINK_XSTATS_SLAVE); + if (!attr) + goto nla_put_failure; + + err = ops->fill_linkxstats(skb, dev, prividx, *idxattr); nla_nest_end(skb, attr); if (err) goto nla_put_failure; @@ -3555,14 +3588,35 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev, if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS, 0)) { const struct rtnl_link_ops *ops = dev->rtnl_link_ops; + int attr = IFLA_STATS_LINK_XSTATS; if (ops && ops->get_linkxstats_size) { - size += nla_total_size(ops->get_linkxstats_size(dev)); + size += nla_total_size(ops->get_linkxstats_size(dev, + attr)); /* for IFLA_STATS_LINK_XSTATS */ size += nla_total_size(0); } } + if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS_SLAVE, 0)) { + struct net_device *_dev = (struct net_device *)dev; + const struct rtnl_link_ops *ops = NULL; + const struct net_device *master; + + /* netdev_master_upper_dev_get can't take const */ + master = netdev_master_upper_dev_get(_dev); + if (master) + ops = master->rtnl_link_ops; + if (ops && ops->get_linkxstats_size) { + int attr = IFLA_STATS_LINK_XSTATS_SLAVE; + + size += nla_total_size(ops->get_linkxstats_size(dev, + attr)); + /* for IFLA_STATS_LINK_XSTATS_SLAVE */ + size += nla_total_size(0); + } + } + return size; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e7ec6d3ad5f0..3864b4b68fa1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3017,24 +3017,6 @@ int skb_append_pagefrags(struct sk_buff *skb, struct page *page, EXPORT_SYMBOL_GPL(skb_append_pagefrags); /** - * skb_push_rcsum - push skb and update receive checksum - * @skb: buffer to update - * @len: length of data pulled - * - * This function performs an skb_push on the packet and updates - * the CHECKSUM_COMPLETE checksum. It should be used on - * receive path processing instead of skb_push unless you know - * that the checksum difference is zero (e.g., a valid IP header) - * or you are setting ip_summed to CHECKSUM_NONE. - */ -static unsigned char *skb_push_rcsum(struct sk_buff *skb, unsigned len) -{ - skb_push(skb, len); - skb_postpush_rcsum(skb, skb->data, len); - return skb->data; -} - -/** * skb_pull_rcsum - pull skb and update receive checksum * @skb: buffer to update * @len: length of data pulled diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index df4803437888..a796fc7cbc35 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -41,6 +41,7 @@ #include <net/dn_fib.h> #include <net/dn_neigh.h> #include <net/dn_dev.h> +#include <net/nexthop.h> #define RT_MIN_TABLE 1 @@ -150,14 +151,13 @@ static int dn_fib_count_nhs(const struct nlattr *attr) struct rtnexthop *nhp = nla_data(attr); int nhs = 0, nhlen = nla_len(attr); - while(nhlen >= (int)sizeof(struct rtnexthop)) { - if ((nhlen -= nhp->rtnh_len) < 0) - return 0; + while (rtnh_ok(nhp, nhlen)) { nhs++; - nhp = RTNH_NEXT(nhp); + nhp = rtnh_next(nhp, &nhlen); } - return nhs; + /* leftover implies invalid nexthop configuration, discard it */ + return nhlen > 0 ? 0 : nhs; } static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr, @@ -167,21 +167,24 @@ static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr, int nhlen = nla_len(attr); change_nexthops(fi) { - int attrlen = nhlen - sizeof(struct rtnexthop); - if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) + int attrlen; + + if (!rtnh_ok(nhp, nhlen)) return -EINVAL; nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags; nh->nh_oif = nhp->rtnh_ifindex; nh->nh_weight = nhp->rtnh_hops + 1; - if (attrlen) { + attrlen = rtnh_attrlen(nhp); + if (attrlen > 0) { struct nlattr *gw_attr; gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY); nh->nh_gw = gw_attr ? nla_get_le16(gw_attr) : 0; } - nhp = RTNH_NEXT(nhp); + + nhp = rtnh_next(nhp, &nhlen); } endfor_nexthops(fi); return 0; diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index 8c004a0c8d64..935ab932e841 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -81,7 +81,7 @@ static int lowpan_stop(struct net_device *dev) return 0; } -static int lowpan_neigh_construct(struct neighbour *n) +static int lowpan_neigh_construct(struct net_device *dev, struct neighbour *n) { struct lowpan_802154_neigh *neigh = lowpan_802154_neigh(neighbour_priv(n)); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index cbac493c913a..e23f141c9ba5 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -271,7 +271,7 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk return dst_output(net, sk, skb); } #endif - mtu = ip_skb_dst_mtu(skb); + mtu = ip_skb_dst_mtu(sk, skb); if (skb_is_gso(skb)) return ip_finish_output_gso(net, sk, skb, mtu); @@ -541,7 +541,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, iph = ip_hdr(skb); - mtu = ip_skb_dst_mtu(skb); + mtu = ip_skb_dst_mtu(sk, skb); if (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size < mtu) mtu = IPCB(skb)->frag_max_size; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 2033f929aa66..c8dd9e26b185 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -89,22 +89,20 @@ static inline int arp_packet_match(const struct arphdr *arphdr, __be32 src_ipaddr, tgt_ipaddr; long ret; -#define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg))) - - if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop, - ARPT_INV_ARPOP)) + if (NF_INVF(arpinfo, ARPT_INV_ARPOP, + (arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop)) return 0; - if (FWINV((arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd, - ARPT_INV_ARPHRD)) + if (NF_INVF(arpinfo, ARPT_INV_ARPHRD, + (arphdr->ar_hrd & arpinfo->arhrd_mask) != arpinfo->arhrd)) return 0; - if (FWINV((arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro, - ARPT_INV_ARPPRO)) + if (NF_INVF(arpinfo, ARPT_INV_ARPPRO, + (arphdr->ar_pro & arpinfo->arpro_mask) != arpinfo->arpro)) return 0; - if (FWINV((arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln, - ARPT_INV_ARPHLN)) + if (NF_INVF(arpinfo, ARPT_INV_ARPHLN, + (arphdr->ar_hln & arpinfo->arhln_mask) != arpinfo->arhln)) return 0; src_devaddr = arpptr; @@ -115,31 +113,32 @@ static inline int arp_packet_match(const struct arphdr *arphdr, arpptr += dev->addr_len; memcpy(&tgt_ipaddr, arpptr, sizeof(u32)); - if (FWINV(arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, dev->addr_len), - ARPT_INV_SRCDEVADDR) || - FWINV(arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, dev->addr_len), - ARPT_INV_TGTDEVADDR)) + if (NF_INVF(arpinfo, ARPT_INV_SRCDEVADDR, + arp_devaddr_compare(&arpinfo->src_devaddr, src_devaddr, + dev->addr_len)) || + NF_INVF(arpinfo, ARPT_INV_TGTDEVADDR, + arp_devaddr_compare(&arpinfo->tgt_devaddr, tgt_devaddr, + dev->addr_len))) return 0; - if (FWINV((src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr, - ARPT_INV_SRCIP) || - FWINV(((tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr), - ARPT_INV_TGTIP)) + if (NF_INVF(arpinfo, ARPT_INV_SRCIP, + (src_ipaddr & arpinfo->smsk.s_addr) != arpinfo->src.s_addr) || + NF_INVF(arpinfo, ARPT_INV_TGTIP, + (tgt_ipaddr & arpinfo->tmsk.s_addr) != arpinfo->tgt.s_addr)) return 0; /* Look for ifname matches. */ ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask); - if (FWINV(ret != 0, ARPT_INV_VIA_IN)) + if (NF_INVF(arpinfo, ARPT_INV_VIA_IN, ret != 0)) return 0; ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask); - if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) + if (NF_INVF(arpinfo, ARPT_INV_VIA_OUT, ret != 0)) return 0; return 1; -#undef FWINV } static inline int arp_checkentry(const struct arpt_arp *arp) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 54906e0e8e0c..f0df66f54ce6 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -58,32 +58,31 @@ ip_packet_match(const struct iphdr *ip, { unsigned long ret; -#define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg))) - - if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr, - IPT_INV_SRCIP) || - FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr, - IPT_INV_DSTIP)) + if (NF_INVF(ipinfo, IPT_INV_SRCIP, + (ip->saddr & ipinfo->smsk.s_addr) != ipinfo->src.s_addr) || + NF_INVF(ipinfo, IPT_INV_DSTIP, + (ip->daddr & ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr)) return false; ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask); - if (FWINV(ret != 0, IPT_INV_VIA_IN)) + if (NF_INVF(ipinfo, IPT_INV_VIA_IN, ret != 0)) return false; ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask); - if (FWINV(ret != 0, IPT_INV_VIA_OUT)) + if (NF_INVF(ipinfo, IPT_INV_VIA_OUT, ret != 0)) return false; /* Check specific protocol */ if (ipinfo->proto && - FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) + NF_INVF(ipinfo, IPT_INV_PROTO, ip->protocol != ipinfo->proto)) return false; /* If we have a fragment rule but the packet is not a fragment * then we return zero */ - if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) + if (NF_INVF(ipinfo, IPT_INV_FRAG, + (ipinfo->flags & IPT_F_FRAG) && !isfrag)) return false; return true; @@ -122,7 +121,6 @@ static inline bool unconditional(const struct ipt_entry *e) return e->target_offset == sizeof(struct ipt_entry) && memcmp(&e->ip, &uncond, sizeof(uncond)) == 0; -#undef FWINV } /* for const-correctness */ diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 57fc97cdac70..aebdb337fd7e 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -87,10 +87,6 @@ iptable_mangle_hook(void *priv, { if (state->hook == NF_INET_LOCAL_OUT) return ipt_mangle_out(skb, state); - if (state->hook == NF_INET_POST_ROUTING) - return ipt_do_table(skb, state, - state->net->ipv4.iptable_mangle); - /* PREROUTING/INPUT/FORWARD: */ return ipt_do_table(skb, state, state->net->ipv4.iptable_mangle); } diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index b6ea57ec5e14..fd8220213afc 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -24,6 +24,9 @@ const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) return NULL; + if (ip_hdr(oldskb)->protocol != IPPROTO_TCP) + return NULL; + oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb), sizeof(struct tcphdr), _oth); if (oth == NULL) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5c7ed147449c..032a96d78c99 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2277,6 +2277,38 @@ static inline bool tcp_can_repair_sock(const struct sock *sk) ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED)); } +static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int len) +{ + struct tcp_repair_window opt; + + if (!tp->repair) + return -EPERM; + + if (len != sizeof(opt)) + return -EINVAL; + + if (copy_from_user(&opt, optbuf, sizeof(opt))) + return -EFAULT; + + if (opt.max_window < opt.snd_wnd) + return -EINVAL; + + if (after(opt.snd_wl1, tp->rcv_nxt + opt.rcv_wnd)) + return -EINVAL; + + if (after(opt.rcv_wup, tp->rcv_nxt)) + return -EINVAL; + + tp->snd_wl1 = opt.snd_wl1; + tp->snd_wnd = opt.snd_wnd; + tp->max_window = opt.max_window; + + tp->rcv_wnd = opt.rcv_wnd; + tp->rcv_wup = opt.rcv_wup; + + return 0; +} + static int tcp_repair_options_est(struct tcp_sock *tp, struct tcp_repair_opt __user *optbuf, unsigned int len) { @@ -2604,6 +2636,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level, else tp->tsoffset = val - tcp_time_stamp; break; + case TCP_REPAIR_WINDOW: + err = tcp_repair_set_window(tp, optval, optlen); + break; case TCP_NOTSENT_LOWAT: tp->notsent_lowat = val; sk->sk_write_space(sk); @@ -2860,6 +2895,28 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return -EINVAL; break; + case TCP_REPAIR_WINDOW: { + struct tcp_repair_window opt; + + if (get_user(len, optlen)) + return -EFAULT; + + if (len != sizeof(opt)) + return -EINVAL; + + if (!tp->repair) + return -EPERM; + + opt.snd_wl1 = tp->snd_wl1; + opt.snd_wnd = tp->snd_wnd; + opt.max_window = tp->max_window; + opt.rcv_wnd = tp->rcv_wnd; + opt.rcv_wup = tp->rcv_wup; + + if (copy_to_user(optval, &opt, len)) + return -EFAULT; + return 0; + } case TCP_QUEUE_SEQ: if (tp->repair_queue == TCP_SEND_QUEUE) val = tp->write_seq; @@ -2969,8 +3026,18 @@ static void __tcp_alloc_md5sig_pool(void) return; for_each_possible_cpu(cpu) { + void *scratch = per_cpu(tcp_md5sig_pool, cpu).scratch; struct ahash_request *req; + if (!scratch) { + scratch = kmalloc_node(sizeof(union tcp_md5sum_block) + + sizeof(struct tcphdr), + GFP_KERNEL, + cpu_to_node(cpu)); + if (!scratch) + return; + per_cpu(tcp_md5sig_pool, cpu).scratch = scratch; + } if (per_cpu(tcp_md5sig_pool, cpu).md5_req) continue; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3708de2a6683..32b048e524d6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1018,27 +1018,28 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, GFP_KERNEL); } -static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, - __be32 daddr, __be32 saddr, int nbytes) +static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp, + __be32 daddr, __be32 saddr, + const struct tcphdr *th, int nbytes) { struct tcp4_pseudohdr *bp; struct scatterlist sg; + struct tcphdr *_th; - bp = &hp->md5_blk.ip4; - - /* - * 1. the TCP pseudo-header (in the order: source IP address, - * destination IP address, zero-padded protocol number, and - * segment length) - */ + bp = hp->scratch; bp->saddr = saddr; bp->daddr = daddr; bp->pad = 0; bp->protocol = IPPROTO_TCP; bp->len = cpu_to_be16(nbytes); - sg_init_one(&sg, bp, sizeof(*bp)); - ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(*bp)); + _th = (struct tcphdr *)(bp + 1); + memcpy(_th, th, sizeof(*th)); + _th->check = 0; + + sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); + ahash_request_set_crypt(hp->md5_req, &sg, NULL, + sizeof(*bp) + sizeof(*th)); return crypto_ahash_update(hp->md5_req); } @@ -1055,9 +1056,7 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, if (crypto_ahash_init(req)) goto clear_hash; - if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2)) - goto clear_hash; - if (tcp_md5_hash_header(hp, th)) + if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) goto clear_hash; if (tcp_md5_hash_key(hp, key)) goto clear_hash; @@ -1101,9 +1100,7 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, if (crypto_ahash_init(req)) goto clear_hash; - if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len)) - goto clear_hash; - if (tcp_md5_hash_header(hp, th)) + if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len)) goto clear_hash; if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) goto clear_hash; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 1bcef2369d64..771be1fa4176 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -177,6 +177,7 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) } } + free_percpu(non_pcpu_rt->rt6i_pcpu); non_pcpu_rt->rt6i_pcpu = NULL; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 63e06c3dd319..61ed95054efa 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -73,22 +73,22 @@ ip6_packet_match(const struct sk_buff *skb, unsigned long ret; const struct ipv6hdr *ipv6 = ipv6_hdr(skb); -#define FWINV(bool, invflg) ((bool) ^ !!(ip6info->invflags & (invflg))) - - if (FWINV(ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk, - &ip6info->src), IP6T_INV_SRCIP) || - FWINV(ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk, - &ip6info->dst), IP6T_INV_DSTIP)) + if (NF_INVF(ip6info, IP6T_INV_SRCIP, + ipv6_masked_addr_cmp(&ipv6->saddr, &ip6info->smsk, + &ip6info->src)) || + NF_INVF(ip6info, IP6T_INV_DSTIP, + ipv6_masked_addr_cmp(&ipv6->daddr, &ip6info->dmsk, + &ip6info->dst))) return false; ret = ifname_compare_aligned(indev, ip6info->iniface, ip6info->iniface_mask); - if (FWINV(ret != 0, IP6T_INV_VIA_IN)) + if (NF_INVF(ip6info, IP6T_INV_VIA_IN, ret != 0)) return false; ret = ifname_compare_aligned(outdev, ip6info->outiface, ip6info->outiface_mask); - if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) + if (NF_INVF(ip6info, IP6T_INV_VIA_OUT, ret != 0)) return false; /* ... might want to do something with class and flowlabel here ... */ diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index cb2b28883252..2b1a9dcdbcb3 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -83,10 +83,6 @@ ip6table_mangle_hook(void *priv, struct sk_buff *skb, { if (state->hook == NF_INET_LOCAL_OUT) return ip6t_mangle_out(skb, state); - if (state->hook == NF_INET_POST_ROUTING) - return ip6t_do_table(skb, state, - state->net->ipv6.ip6table_mangle); - /* INPUT/FORWARD */ return ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2255d2bf5f6b..37cf91323319 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -526,26 +526,33 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval, AF_INET6, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); } -static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, - const struct in6_addr *daddr, - const struct in6_addr *saddr, int nbytes) +static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, + const struct in6_addr *daddr, + const struct in6_addr *saddr, + const struct tcphdr *th, int nbytes) { struct tcp6_pseudohdr *bp; struct scatterlist sg; + struct tcphdr *_th; - bp = &hp->md5_blk.ip6; + bp = hp->scratch; /* 1. TCP pseudo-header (RFC2460) */ bp->saddr = *saddr; bp->daddr = *daddr; bp->protocol = cpu_to_be32(IPPROTO_TCP); bp->len = cpu_to_be32(nbytes); - sg_init_one(&sg, bp, sizeof(*bp)); - ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(*bp)); + _th = (struct tcphdr *)(bp + 1); + memcpy(_th, th, sizeof(*th)); + _th->check = 0; + + sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); + ahash_request_set_crypt(hp->md5_req, &sg, NULL, + sizeof(*bp) + sizeof(*th)); return crypto_ahash_update(hp->md5_req); } -static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, +static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, const struct in6_addr *daddr, struct in6_addr *saddr, const struct tcphdr *th) { @@ -559,9 +566,7 @@ static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key, if (crypto_ahash_init(req)) goto clear_hash; - if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2)) - goto clear_hash; - if (tcp_md5_hash_header(hp, th)) + if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) goto clear_hash; if (tcp_md5_hash_key(hp, key)) goto clear_hash; @@ -606,9 +611,7 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, if (crypto_ahash_init(req)) goto clear_hash; - if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len)) - goto clear_hash; - if (tcp_md5_hash_header(hp, th)) + if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len)) goto clear_hash; if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) goto clear_hash; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 0b68ba730a06..cb39e05b166c 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1765,18 +1765,12 @@ static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info) if (!csock) return -ENOENT; - prog = bpf_prog_get(info->bpf_fd); + prog = bpf_prog_get_type(info->bpf_fd, BPF_PROG_TYPE_SOCKET_FILTER); if (IS_ERR(prog)) { err = PTR_ERR(prog); goto out; } - if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) { - bpf_prog_put(prog); - err = -EINVAL; - goto out; - } - err = kcm_attach(sock, csock, prog); if (err) { bpf_prog_put(prog); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 95e757c377f9..9266ceebd112 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -609,9 +609,8 @@ config NETFILTER_XT_MARK The target allows you to create rules in the "mangle" table which alter the netfilter mark (nfmark) field associated with the packet. - Prior to routing, the nfmark can influence the routing method (see - "Use netfilter MARK value as routing key") and can also be used by - other subsystems to change their behavior. + Prior to routing, the nfmark can influence the routing method and can + also be used by other subsystems to change their behavior. config NETFILTER_XT_CONNMARK tristate 'ctmark target and match support' @@ -753,9 +752,8 @@ config NETFILTER_XT_TARGET_HMARK The target allows you to create rules in the "raw" and "mangle" tables which set the skbuff mark by means of hash calculation within a given - range. The nfmark can influence the routing method (see "Use netfilter - MARK value as routing key") and can also be used by other subsystems to - change their behaviour. + range. The nfmark can influence the routing method and can also be used + by other subsystems to change their behaviour. To compile it as a module, choose M here. If unsure, say N. diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f204274a9b6b..153e33ffeeaa 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -327,16 +327,10 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, tmpl->status = IPS_TEMPLATE; write_pnet(&tmpl->ct_net, net); - - if (nf_ct_zone_add(tmpl, flags, zone) < 0) - goto out_free; - + nf_ct_zone_add(tmpl, zone); atomic_set(&tmpl->ct_general.use, 0); return tmpl; -out_free: - kfree(tmpl); - return NULL; } EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc); @@ -929,16 +923,13 @@ __nf_conntrack_alloc(struct net *net, offsetof(struct nf_conn, proto) - offsetof(struct nf_conn, __nfct_init_offset[0])); - if (zone && nf_ct_zone_add(ct, GFP_ATOMIC, zone) < 0) - goto out_free; + nf_ct_zone_add(ct, zone); /* Because we use RCU lookups, we set ct_general.use to zero before * this is inserted in any list. */ atomic_set(&ct->ct_general.use, 0); return ct; -out_free: - kmem_cache_free(nf_conntrack_cachep, ct); out: atomic_dec(&net->ct.count); return ERR_PTR(-ENOMEM); @@ -1342,14 +1333,6 @@ bool __nf_ct_kill_acct(struct nf_conn *ct, } EXPORT_SYMBOL_GPL(__nf_ct_kill_acct); -#ifdef CONFIG_NF_CONNTRACK_ZONES -static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = { - .len = sizeof(struct nf_conntrack_zone), - .align = __alignof__(struct nf_conntrack_zone), - .id = NF_CT_EXT_ZONE, -}; -#endif - #if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include <linux/netfilter/nfnetlink.h> @@ -1532,9 +1515,6 @@ void nf_conntrack_cleanup_end(void) nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size); -#ifdef CONFIG_NF_CONNTRACK_ZONES - nf_ct_extend_unregister(&nf_ct_zone_extend); -#endif nf_conntrack_proto_fini(); nf_conntrack_seqadj_fini(); nf_conntrack_labels_fini(); @@ -1617,24 +1597,14 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) } EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable); -int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) +int nf_conntrack_hash_resize(unsigned int hashsize) { - int i, bucket, rc; - unsigned int hashsize, old_size; + int i, bucket; + unsigned int old_size; struct hlist_nulls_head *hash, *old_hash; struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; - if (current->nsproxy->net_ns != &init_net) - return -EOPNOTSUPP; - - /* On boot, we can set this without any fancy locking. */ - if (!nf_conntrack_htable_size) - return param_set_uint(val, kp); - - rc = kstrtouint(val, 0, &hashsize); - if (rc) - return rc; if (!hashsize) return -EINVAL; @@ -1642,6 +1612,12 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) if (!hash) return -ENOMEM; + old_size = nf_conntrack_htable_size; + if (old_size == hashsize) { + nf_ct_free_hashtable(hash, hashsize); + return 0; + } + local_bh_disable(); nf_conntrack_all_lock(); write_seqcount_begin(&nf_conntrack_generation); @@ -1677,6 +1653,25 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) nf_ct_free_hashtable(old_hash, old_size); return 0; } + +int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) +{ + unsigned int hashsize; + int rc; + + if (current->nsproxy->net_ns != &init_net) + return -EOPNOTSUPP; + + /* On boot, we can set this without any fancy locking. */ + if (!nf_conntrack_htable_size) + return param_set_uint(val, kp); + + rc = kstrtouint(val, 0, &hashsize); + if (rc) + return rc; + + return nf_conntrack_hash_resize(hashsize); +} EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, @@ -1733,7 +1728,7 @@ int nf_conntrack_init_start(void) nf_conntrack_cachep = kmem_cache_create("nf_conntrack", sizeof(struct nf_conn), 0, - SLAB_DESTROY_BY_RCU, NULL); + SLAB_DESTROY_BY_RCU | SLAB_HWCACHE_ALIGN, NULL); if (!nf_conntrack_cachep) goto err_cachep; @@ -1773,11 +1768,6 @@ int nf_conntrack_init_start(void) if (ret < 0) goto err_seqadj; -#ifdef CONFIG_NF_CONNTRACK_ZONES - ret = nf_ct_extend_register(&nf_ct_zone_extend); - if (ret < 0) - goto err_extend; -#endif ret = nf_conntrack_proto_init(); if (ret < 0) goto err_proto; @@ -1793,10 +1783,6 @@ int nf_conntrack_init_start(void) return 0; err_proto: -#ifdef CONFIG_NF_CONNTRACK_ZONES - nf_ct_extend_unregister(&nf_ct_zone_extend); -err_extend: -#endif nf_conntrack_seqadj_fini(); err_seqadj: nf_conntrack_labels_fini(); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 196cb39649e1..3a1a88b9bafa 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -389,11 +389,38 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, struct net *net) { struct nf_conntrack_tuple_hash *h; + const struct hlist_nulls_node *nn; + int cpu; + + /* Get rid of expecteds, set helpers to NULL. */ + for_each_possible_cpu(cpu) { + struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); + + spin_lock_bh(&pcpu->lock); + hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode) + unhelp(h, me); + spin_unlock_bh(&pcpu->lock); + } +} + +void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) +{ + struct nf_conntrack_tuple_hash *h; struct nf_conntrack_expect *exp; const struct hlist_node *next; const struct hlist_nulls_node *nn; + struct net *net; unsigned int i; - int cpu; + + mutex_lock(&nf_ct_helper_mutex); + hlist_del_rcu(&me->hnode); + nf_ct_helper_count--; + mutex_unlock(&nf_ct_helper_mutex); + + /* Make sure every nothing is still using the helper unless its a + * connection in the hash. + */ + synchronize_rcu(); /* Get rid of expectations */ spin_lock_bh(&nf_conntrack_expect_lock); @@ -413,15 +440,11 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, } spin_unlock_bh(&nf_conntrack_expect_lock); - /* Get rid of expecteds, set helpers to NULL. */ - for_each_possible_cpu(cpu) { - struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu); + rtnl_lock(); + for_each_net(net) + __nf_conntrack_helper_unregister(me, net); + rtnl_unlock(); - spin_lock_bh(&pcpu->lock); - hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode) - unhelp(h, me); - spin_unlock_bh(&pcpu->lock); - } local_bh_disable(); for (i = 0; i < nf_conntrack_htable_size; i++) { nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); @@ -433,26 +456,6 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, } local_bh_enable(); } - -void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) -{ - struct net *net; - - mutex_lock(&nf_ct_helper_mutex); - hlist_del_rcu(&me->hnode); - nf_ct_helper_count--; - mutex_unlock(&nf_ct_helper_mutex); - - /* Make sure every nothing is still using the helper unless its a - * connection in the hash. - */ - synchronize_rcu(); - - rtnl_lock(); - for_each_net(net) - __nf_conntrack_helper_unregister(me, net); - rtnl_unlock(); -} EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); static struct nf_ct_ext_type helper_extend __read_mostly = { diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index c026c472ea80..2aaa188ee961 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -434,8 +434,29 @@ static void nf_conntrack_standalone_fini_proc(struct net *net) #ifdef CONFIG_SYSCTL /* Log invalid packets of a given protocol */ -static int log_invalid_proto_min = 0; -static int log_invalid_proto_max = 255; +static int log_invalid_proto_min __read_mostly; +static int log_invalid_proto_max __read_mostly = 255; + +/* size the user *wants to set */ +static unsigned int nf_conntrack_htable_size_user __read_mostly; + +static int +nf_conntrack_hash_sysctl(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret; + + ret = proc_dointvec(table, write, buffer, lenp, ppos); + if (ret < 0 || !write) + return ret; + + /* update ret, we might not be able to satisfy request */ + ret = nf_conntrack_hash_resize(nf_conntrack_htable_size_user); + + /* update it to the actual value used by conntrack */ + nf_conntrack_htable_size_user = nf_conntrack_htable_size; + return ret; +} static struct ctl_table_header *nf_ct_netfilter_header; @@ -456,10 +477,10 @@ static struct ctl_table nf_ct_sysctl_table[] = { }, { .procname = "nf_conntrack_buckets", - .data = &nf_conntrack_htable_size, + .data = &nf_conntrack_htable_size_user, .maxlen = sizeof(unsigned int), - .mode = 0444, - .proc_handler = proc_dointvec, + .mode = 0644, + .proc_handler = nf_conntrack_hash_sysctl, }, { .procname = "nf_conntrack_checksum", @@ -515,6 +536,9 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) if (net->user_ns != &init_user_ns) table[0].procname = NULL; + if (!net_eq(&init_net, net)) + table[2].mode = 0444; + net->ct.sysctl_header = register_net_sysctl(net, "net/netfilter", table); if (!net->ct.sysctl_header) goto out_unregister_netfilter; @@ -604,6 +628,8 @@ static int __init nf_conntrack_standalone_init(void) ret = -ENOMEM; goto out_sysctl; } + + nf_conntrack_htable_size_user = nf_conntrack_htable_size; #endif ret = register_pernet_subsys(&nf_conntrack_net_ops); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index a5d41dfa9f05..aa5847a16713 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -159,6 +159,20 @@ int nf_logger_find_get(int pf, enum nf_log_type type) struct nf_logger *logger; int ret = -ENOENT; + if (pf == NFPROTO_INET) { + ret = nf_logger_find_get(NFPROTO_IPV4, type); + if (ret < 0) + return ret; + + ret = nf_logger_find_get(NFPROTO_IPV6, type); + if (ret < 0) { + nf_logger_put(NFPROTO_IPV4, type); + return ret; + } + + return 0; + } + if (rcu_access_pointer(loggers[pf][type]) == NULL) request_module("nf-logger-%u-%u", pf, type); @@ -167,7 +181,7 @@ int nf_logger_find_get(int pf, enum nf_log_type type) if (logger == NULL) goto out; - if (logger && try_module_get(logger->me)) + if (try_module_get(logger->me)) ret = 0; out: rcu_read_unlock(); @@ -179,6 +193,12 @@ void nf_logger_put(int pf, enum nf_log_type type) { struct nf_logger *logger; + if (pf == NFPROTO_INET) { + nf_logger_put(NFPROTO_IPV4, type); + nf_logger_put(NFPROTO_IPV6, type); + return; + } + BUG_ON(loggers[pf][type] == NULL); rcu_read_lock(); @@ -398,16 +418,17 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write, { const struct nf_logger *logger; char buf[NFLOGGER_NAME_LEN]; - size_t size = *lenp; int r = 0; int tindex = (unsigned long)table->extra1; struct net *net = current->nsproxy->net_ns; if (write) { - if (size > sizeof(buf)) - size = sizeof(buf); - if (copy_from_user(buf, buffer, size)) - return -EFAULT; + struct ctl_table tmp = *table; + + tmp.data = buf; + r = proc_dostring(&tmp, write, buffer, lenp, ppos); + if (r) + return r; if (!strcmp(buf, "NONE")) { nf_log_unbind_pf(net, tindex); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2c881871db38..18b7f8578ee0 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -131,29 +131,8 @@ static void nft_trans_destroy(struct nft_trans *trans) kfree(trans); } -static int nft_register_basechain(struct nft_base_chain *basechain, - unsigned int hook_nops) -{ - struct net *net = read_pnet(&basechain->pnet); - - if (basechain->flags & NFT_BASECHAIN_DISABLED) - return 0; - - return nf_register_net_hooks(net, basechain->ops, hook_nops); -} - -static void nft_unregister_basechain(struct nft_base_chain *basechain, - unsigned int hook_nops) -{ - struct net *net = read_pnet(&basechain->pnet); - - if (basechain->flags & NFT_BASECHAIN_DISABLED) - return; - - nf_unregister_net_hooks(net, basechain->ops, hook_nops); -} - -static int nf_tables_register_hooks(const struct nft_table *table, +static int nf_tables_register_hooks(struct net *net, + const struct nft_table *table, struct nft_chain *chain, unsigned int hook_nops) { @@ -161,10 +140,12 @@ static int nf_tables_register_hooks(const struct nft_table *table, !(chain->flags & NFT_BASE_CHAIN)) return 0; - return nft_register_basechain(nft_base_chain(chain), hook_nops); + return nf_register_net_hooks(net, nft_base_chain(chain)->ops, + hook_nops); } -static void nf_tables_unregister_hooks(const struct nft_table *table, +static void nf_tables_unregister_hooks(struct net *net, + const struct nft_table *table, struct nft_chain *chain, unsigned int hook_nops) { @@ -172,12 +153,9 @@ static void nf_tables_unregister_hooks(const struct nft_table *table, !(chain->flags & NFT_BASE_CHAIN)) return; - nft_unregister_basechain(nft_base_chain(chain), hook_nops); + nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, hook_nops); } -/* Internal table flags */ -#define NFT_TABLE_INACTIVE (1 << 15) - static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) { struct nft_trans *trans; @@ -187,7 +165,7 @@ static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) return -ENOMEM; if (msg_type == NFT_MSG_NEWTABLE) - ctx->table->flags |= NFT_TABLE_INACTIVE; + nft_activate_next(ctx->net, ctx->table); list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; @@ -201,7 +179,7 @@ static int nft_deltable(struct nft_ctx *ctx) if (err < 0) return err; - list_del_rcu(&ctx->table->list); + nft_deactivate_next(ctx->net, ctx->table); return err; } @@ -214,7 +192,7 @@ static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) return -ENOMEM; if (msg_type == NFT_MSG_NEWCHAIN) - ctx->chain->flags |= NFT_CHAIN_INACTIVE; + nft_activate_next(ctx->net, ctx->chain); list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; @@ -229,47 +207,17 @@ static int nft_delchain(struct nft_ctx *ctx) return err; ctx->table->use--; - list_del_rcu(&ctx->chain->list); + nft_deactivate_next(ctx->net, ctx->chain); return err; } -static inline bool -nft_rule_is_active(struct net *net, const struct nft_rule *rule) -{ - return (rule->genmask & nft_genmask_cur(net)) == 0; -} - -static inline int -nft_rule_is_active_next(struct net *net, const struct nft_rule *rule) -{ - return (rule->genmask & nft_genmask_next(net)) == 0; -} - -static inline void -nft_rule_activate_next(struct net *net, struct nft_rule *rule) -{ - /* Now inactive, will be active in the future */ - rule->genmask = nft_genmask_cur(net); -} - -static inline void -nft_rule_deactivate_next(struct net *net, struct nft_rule *rule) -{ - rule->genmask = nft_genmask_next(net); -} - -static inline void nft_rule_clear(struct net *net, struct nft_rule *rule) -{ - rule->genmask &= ~nft_genmask_next(net); -} - static int nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule) { /* You cannot delete the same rule twice */ - if (nft_rule_is_active_next(ctx->net, rule)) { - nft_rule_deactivate_next(ctx->net, rule); + if (nft_is_active_next(ctx->net, rule)) { + nft_deactivate_next(ctx->net, rule); ctx->chain->use--; return 0; } @@ -322,9 +270,6 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx) return 0; } -/* Internal set flag */ -#define NFT_SET_INACTIVE (1 << 15) - static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type, struct nft_set *set) { @@ -337,7 +282,7 @@ static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type, if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) { nft_trans_set_id(trans) = ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID])); - set->flags |= NFT_SET_INACTIVE; + nft_activate_next(ctx->net, set); } nft_trans_set(trans) = set; list_add_tail(&trans->list, &ctx->net->nft.commit_list); @@ -353,7 +298,7 @@ static int nft_delset(struct nft_ctx *ctx, struct nft_set *set) if (err < 0) return err; - list_del_rcu(&set->list); + nft_deactivate_next(ctx->net, set); ctx->table->use--; return err; @@ -364,26 +309,29 @@ static int nft_delset(struct nft_ctx *ctx, struct nft_set *set) */ static struct nft_table *nft_table_lookup(const struct nft_af_info *afi, - const struct nlattr *nla) + const struct nlattr *nla, + u8 genmask) { struct nft_table *table; list_for_each_entry(table, &afi->tables, list) { - if (!nla_strcmp(nla, table->name)) + if (!nla_strcmp(nla, table->name) && + nft_active_genmask(table, genmask)) return table; } return NULL; } static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi, - const struct nlattr *nla) + const struct nlattr *nla, + u8 genmask) { struct nft_table *table; if (nla == NULL) return ERR_PTR(-EINVAL); - table = nft_table_lookup(afi, nla); + table = nft_table_lookup(afi, nla, genmask); if (table != NULL) return table; @@ -524,6 +472,8 @@ static int nf_tables_dump_tables(struct sk_buff *skb, if (idx > s_idx) memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); + if (!nft_is_active(net, table)) + continue; if (nf_tables_fill_table_info(skb, net, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, @@ -548,6 +498,7 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_cur(net); const struct nft_af_info *afi; const struct nft_table *table; struct sk_buff *skb2; @@ -565,11 +516,9 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]); + table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], genmask); if (IS_ERR(table)) return PTR_ERR(table); - if (table->flags & NFT_TABLE_INACTIVE) - return -ENOENT; skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb2) @@ -588,17 +537,21 @@ err: return err; } -static int nf_tables_table_enable(const struct nft_af_info *afi, +static int nf_tables_table_enable(struct net *net, + const struct nft_af_info *afi, struct nft_table *table) { struct nft_chain *chain; int err, i = 0; list_for_each_entry(chain, &table->chains, list) { + if (!nft_is_active_next(net, chain)) + continue; if (!(chain->flags & NFT_BASE_CHAIN)) continue; - err = nft_register_basechain(nft_base_chain(chain), afi->nops); + err = nf_register_net_hooks(net, nft_base_chain(chain)->ops, + afi->nops); if (err < 0) goto err; @@ -607,26 +560,34 @@ static int nf_tables_table_enable(const struct nft_af_info *afi, return 0; err: list_for_each_entry(chain, &table->chains, list) { + if (!nft_is_active_next(net, chain)) + continue; if (!(chain->flags & NFT_BASE_CHAIN)) continue; if (i-- <= 0) break; - nft_unregister_basechain(nft_base_chain(chain), afi->nops); + nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, + afi->nops); } return err; } -static void nf_tables_table_disable(const struct nft_af_info *afi, +static void nf_tables_table_disable(struct net *net, + const struct nft_af_info *afi, struct nft_table *table) { struct nft_chain *chain; list_for_each_entry(chain, &table->chains, list) { - if (chain->flags & NFT_BASE_CHAIN) - nft_unregister_basechain(nft_base_chain(chain), - afi->nops); + if (!nft_is_active_next(net, chain)) + continue; + if (!(chain->flags & NFT_BASE_CHAIN)) + continue; + + nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, + afi->nops); } } @@ -656,7 +617,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx) nft_trans_table_enable(trans) = false; } else if (!(flags & NFT_TABLE_F_DORMANT) && ctx->table->flags & NFT_TABLE_F_DORMANT) { - ret = nf_tables_table_enable(ctx->afi, ctx->table); + ret = nf_tables_table_enable(ctx->net, ctx->afi, ctx->table); if (ret >= 0) { ctx->table->flags &= ~NFT_TABLE_F_DORMANT; nft_trans_table_enable(trans) = true; @@ -678,6 +639,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); const struct nlattr *name; struct nft_af_info *afi; struct nft_table *table; @@ -691,7 +653,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, return PTR_ERR(afi); name = nla[NFTA_TABLE_NAME]; - table = nf_tables_table_lookup(afi, name); + table = nf_tables_table_lookup(afi, name, genmask); if (IS_ERR(table)) { if (PTR_ERR(table) != -ENOENT) return PTR_ERR(table); @@ -699,8 +661,6 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, } if (table != NULL) { - if (table->flags & NFT_TABLE_INACTIVE) - return -ENOENT; if (nlh->nlmsg_flags & NLM_F_EXCL) return -EEXIST; if (nlh->nlmsg_flags & NLM_F_REPLACE) @@ -752,6 +712,9 @@ static int nft_flush_table(struct nft_ctx *ctx) struct nft_set *set, *ns; list_for_each_entry(chain, &ctx->table->chains, list) { + if (!nft_is_active_next(ctx->net, chain)) + continue; + ctx->chain = chain; err = nft_delrule_by_chain(ctx); @@ -760,6 +723,9 @@ static int nft_flush_table(struct nft_ctx *ctx) } list_for_each_entry_safe(set, ns, &ctx->table->sets, list) { + if (!nft_is_active_next(ctx->net, set)) + continue; + if (set->flags & NFT_SET_ANONYMOUS && !list_empty(&set->bindings)) continue; @@ -770,6 +736,9 @@ static int nft_flush_table(struct nft_ctx *ctx) } list_for_each_entry_safe(chain, nc, &ctx->table->chains, list) { + if (!nft_is_active_next(ctx->net, chain)) + continue; + ctx->chain = chain; err = nft_delchain(ctx); @@ -795,6 +764,9 @@ static int nft_flush(struct nft_ctx *ctx, int family) ctx->afi = afi; list_for_each_entry_safe(table, nt, &afi->tables, list) { + if (!nft_is_active_next(ctx->net, table)) + continue; + if (nla[NFTA_TABLE_NAME] && nla_strcmp(nla[NFTA_TABLE_NAME], table->name) != 0) continue; @@ -815,6 +787,7 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); struct nft_af_info *afi; struct nft_table *table; int family = nfmsg->nfgen_family; @@ -828,7 +801,7 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]); + table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME], genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -875,12 +848,14 @@ EXPORT_SYMBOL_GPL(nft_unregister_chain_type); */ static struct nft_chain * -nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle) +nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle, + u8 genmask) { struct nft_chain *chain; list_for_each_entry(chain, &table->chains, list) { - if (chain->handle == handle) + if (chain->handle == handle && + nft_active_genmask(chain, genmask)) return chain; } @@ -888,7 +863,8 @@ nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle) } static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table, - const struct nlattr *nla) + const struct nlattr *nla, + u8 genmask) { struct nft_chain *chain; @@ -896,7 +872,8 @@ static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table, return ERR_PTR(-EINVAL); list_for_each_entry(chain, &table->chains, list) { - if (!nla_strcmp(nla, chain->name)) + if (!nla_strcmp(nla, chain->name) && + nft_active_genmask(chain, genmask)) return chain; } @@ -1079,6 +1056,8 @@ static int nf_tables_dump_chains(struct sk_buff *skb, if (idx > s_idx) memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); + if (!nft_is_active(net, chain)) + continue; if (nf_tables_fill_chain_info(skb, net, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, @@ -1104,6 +1083,7 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_cur(net); const struct nft_af_info *afi; const struct nft_table *table; const struct nft_chain *chain; @@ -1122,17 +1102,13 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask); if (IS_ERR(table)) return PTR_ERR(table); - if (table->flags & NFT_TABLE_INACTIVE) - return -ENOENT; - chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); + chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask); if (IS_ERR(chain)) return PTR_ERR(chain); - if (chain->flags & NFT_CHAIN_INACTIVE) - return -ENOENT; skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb2) @@ -1231,6 +1207,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, struct nft_chain *chain; struct nft_base_chain *basechain = NULL; struct nlattr *ha[NFTA_HOOK_MAX + 1]; + u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; struct net_device *dev = NULL; u8 policy = NF_ACCEPT; @@ -1247,7 +1224,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -1256,11 +1233,11 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, if (nla[NFTA_CHAIN_HANDLE]) { handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE])); - chain = nf_tables_chain_lookup_byhandle(table, handle); + chain = nf_tables_chain_lookup_byhandle(table, handle, genmask); if (IS_ERR(chain)) return PTR_ERR(chain); } else { - chain = nf_tables_chain_lookup(table, name); + chain = nf_tables_chain_lookup(table, name, genmask); if (IS_ERR(chain)) { if (PTR_ERR(chain) != -ENOENT) return PTR_ERR(chain); @@ -1291,16 +1268,20 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, struct nft_stats *stats = NULL; struct nft_trans *trans; - if (chain->flags & NFT_CHAIN_INACTIVE) - return -ENOENT; if (nlh->nlmsg_flags & NLM_F_EXCL) return -EEXIST; if (nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; - if (nla[NFTA_CHAIN_HANDLE] && name && - !IS_ERR(nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]))) - return -EEXIST; + if (nla[NFTA_CHAIN_HANDLE] && name) { + struct nft_chain *chain2; + + chain2 = nf_tables_chain_lookup(table, + nla[NFTA_CHAIN_NAME], + genmask); + if (IS_ERR(chain2)) + return PTR_ERR(chain2); + } if (nla[NFTA_CHAIN_COUNTERS]) { if (!(chain->flags & NFT_BASE_CHAIN)) @@ -1455,7 +1436,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, chain->table = table; nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); - err = nf_tables_register_hooks(table, chain, afi->nops); + err = nf_tables_register_hooks(net, table, chain, afi->nops); if (err < 0) goto err1; @@ -1468,7 +1449,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, list_add_tail_rcu(&chain->list, &table->chains); return 0; err2: - nf_tables_unregister_hooks(table, chain, afi->nops); + nf_tables_unregister_hooks(net, table, chain, afi->nops); err1: nf_tables_chain_destroy(chain); return err; @@ -1479,6 +1460,7 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain; @@ -1489,11 +1471,11 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask); if (IS_ERR(table)) return PTR_ERR(table); - chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); + chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask); if (IS_ERR(chain)) return PTR_ERR(chain); if (chain->use > 0) @@ -1898,7 +1880,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb, list_for_each_entry_rcu(table, &afi->tables, list) { list_for_each_entry_rcu(chain, &table->chains, list) { list_for_each_entry_rcu(rule, &chain->rules, list) { - if (!nft_rule_is_active(net, rule)) + if (!nft_is_active(net, rule)) goto cont; if (idx < s_idx) goto cont; @@ -1931,6 +1913,7 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_cur(net); const struct nft_af_info *afi; const struct nft_table *table; const struct nft_chain *chain; @@ -1950,17 +1933,13 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask); if (IS_ERR(table)) return PTR_ERR(table); - if (table->flags & NFT_TABLE_INACTIVE) - return -ENOENT; - chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); + chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask); if (IS_ERR(chain)) return PTR_ERR(chain); - if (chain->flags & NFT_CHAIN_INACTIVE) - return -ENOENT; rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); if (IS_ERR(rule)) @@ -2009,6 +1988,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain; @@ -2029,11 +2009,11 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask); if (IS_ERR(table)) return PTR_ERR(table); - chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); + chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask); if (IS_ERR(chain)) return PTR_ERR(chain); @@ -2102,7 +2082,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, if (rule == NULL) goto err1; - nft_rule_activate_next(net, rule); + nft_activate_next(net, rule); rule->handle = handle; rule->dlen = size; @@ -2124,14 +2104,14 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, } if (nlh->nlmsg_flags & NLM_F_REPLACE) { - if (nft_rule_is_active_next(net, old_rule)) { + if (nft_is_active_next(net, old_rule)) { trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE, old_rule); if (trans == NULL) { err = -ENOMEM; goto err2; } - nft_rule_deactivate_next(net, old_rule); + nft_deactivate_next(net, old_rule); chain->use--; list_add_tail_rcu(&rule->list, &old_rule->list); } else { @@ -2174,6 +2154,7 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain = NULL; @@ -2185,12 +2166,13 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); + table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE], genmask); if (IS_ERR(table)) return PTR_ERR(table); if (nla[NFTA_RULE_CHAIN]) { - chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); + chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN], + genmask); if (IS_ERR(chain)) return PTR_ERR(chain); } @@ -2210,6 +2192,9 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk, } } else { list_for_each_entry(chain, &table->chains, list) { + if (!nft_is_active_next(net, chain)) + continue; + ctx.chain = chain; err = nft_delrule_by_chain(&ctx); if (err < 0) @@ -2339,7 +2324,8 @@ static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = { static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net, const struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + u8 genmask) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nft_af_info *afi = NULL; @@ -2355,7 +2341,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net, if (afi == NULL) return -EAFNOSUPPORT; - table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]); + table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], + genmask); if (IS_ERR(table)) return PTR_ERR(table); } @@ -2365,7 +2352,7 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net, } struct nft_set *nf_tables_set_lookup(const struct nft_table *table, - const struct nlattr *nla) + const struct nlattr *nla, u8 genmask) { struct nft_set *set; @@ -2373,22 +2360,27 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table, return ERR_PTR(-EINVAL); list_for_each_entry(set, &table->sets, list) { - if (!nla_strcmp(nla, set->name)) + if (!nla_strcmp(nla, set->name) && + nft_active_genmask(set, genmask)) return set; } return ERR_PTR(-ENOENT); } struct nft_set *nf_tables_set_lookup_byid(const struct net *net, - const struct nlattr *nla) + const struct nlattr *nla, + u8 genmask) { struct nft_trans *trans; u32 id = ntohl(nla_get_be32(nla)); list_for_each_entry(trans, &net->nft.commit_list, list) { + struct nft_set *set = nft_trans_set(trans); + if (trans->msg_type == NFT_MSG_NEWSET && - id == nft_trans_set_id(trans)) - return nft_trans_set(trans); + id == nft_trans_set_id(trans) && + nft_active_genmask(set, genmask)) + return set; } return ERR_PTR(-ENOENT); } @@ -2413,6 +2405,8 @@ cont: list_for_each_entry(i, &ctx->table->sets, list) { int tmp; + if (!nft_is_active_next(ctx->net, set)) + continue; if (!sscanf(i->name, name, &tmp)) continue; if (tmp < min || tmp >= min + BITS_PER_BYTE * PAGE_SIZE) @@ -2432,6 +2426,8 @@ cont: snprintf(set->name, sizeof(set->name), name, min + n); list_for_each_entry(i, &ctx->table->sets, list) { + if (!nft_is_active_next(ctx->net, i)) + continue; if (!strcmp(set->name, i->name)) return -ENFILE; } @@ -2580,6 +2576,8 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb) list_for_each_entry_rcu(set, &table->sets, list) { if (idx < s_idx) goto cont; + if (!nft_is_active(net, set)) + goto cont; ctx_set = *ctx; ctx_set.table = table; @@ -2616,6 +2614,7 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { + u8 genmask = nft_genmask_cur(net); const struct nft_set *set; struct nft_ctx ctx; struct sk_buff *skb2; @@ -2623,7 +2622,7 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk, int err; /* Verify existence before starting dump */ - err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla); + err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, genmask); if (err < 0) return err; @@ -2650,11 +2649,9 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk, if (!nla[NFTA_SET_TABLE]) return -EINVAL; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask); if (IS_ERR(set)) return PTR_ERR(set); - if (set->flags & NFT_SET_INACTIVE) - return -ENOENT; skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (skb2 == NULL) @@ -2693,6 +2690,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); const struct nft_set_ops *ops; struct nft_af_info *afi; struct nft_table *table; @@ -2790,13 +2788,13 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]); + table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE], genmask); if (IS_ERR(table)) return PTR_ERR(table); nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); - set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME]); + set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME], genmask); if (IS_ERR(set)) { if (PTR_ERR(set) != -ENOENT) return PTR_ERR(set); @@ -2895,6 +2893,7 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk, const struct nlattr * const nla[]) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u8 genmask = nft_genmask_next(net); struct nft_set *set; struct nft_ctx ctx; int err; @@ -2904,11 +2903,11 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk, if (nla[NFTA_SET_TABLE] == NULL) return -EINVAL; - err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla); + err = nft_ctx_init_from_setattr(&ctx, net, skb, nlh, nla, genmask); if (err < 0) return err; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask); if (IS_ERR(set)) return PTR_ERR(set); if (!list_empty(&set->bindings)) @@ -2973,7 +2972,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, list_del_rcu(&binding->list); if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS && - !(set->flags & NFT_SET_INACTIVE)) + nft_is_active(ctx->net, set)) nf_tables_set_destroy(ctx, set); } @@ -3029,7 +3028,8 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net, const struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[]) + const struct nlattr * const nla[], + u8 genmask) { const struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct nft_af_info *afi; @@ -3039,7 +3039,8 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net, if (IS_ERR(afi)) return PTR_ERR(afi); - table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]); + table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE], + genmask); if (IS_ERR(table)) return PTR_ERR(table); @@ -3136,6 +3137,7 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx, static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); + u8 genmask = nft_genmask_cur(net); const struct nft_set *set; struct nft_set_dump_args args; struct nft_ctx ctx; @@ -3152,17 +3154,14 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) return err; err = nft_ctx_init_from_elemattr(&ctx, net, cb->skb, cb->nlh, - (void *)nla); + (void *)nla, genmask); if (err < 0) return err; - if (ctx.table->flags & NFT_TABLE_INACTIVE) - return -ENOENT; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], + genmask); if (IS_ERR(set)) return PTR_ERR(set); - if (set->flags & NFT_SET_INACTIVE) - return -ENOENT; event = NFT_MSG_NEWSETELEM; event |= NFNL_SUBSYS_NFTABLES << 8; @@ -3216,21 +3215,19 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { + u8 genmask = nft_genmask_cur(net); const struct nft_set *set; struct nft_ctx ctx; int err; - err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla); + err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask); if (err < 0) return err; - if (ctx.table->flags & NFT_TABLE_INACTIVE) - return -ENOENT; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], + genmask); if (IS_ERR(set)) return PTR_ERR(set); - if (set->flags & NFT_SET_INACTIVE) - return -ENOENT; if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { @@ -3548,6 +3545,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { + u8 genmask = nft_genmask_next(net); const struct nlattr *attr; struct nft_set *set; struct nft_ctx ctx; @@ -3556,15 +3554,17 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) return -EINVAL; - err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla); + err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask); if (err < 0) return err; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], + genmask); if (IS_ERR(set)) { if (nla[NFTA_SET_ELEM_LIST_SET_ID]) { set = nf_tables_set_lookup_byid(net, - nla[NFTA_SET_ELEM_LIST_SET_ID]); + nla[NFTA_SET_ELEM_LIST_SET_ID], + genmask); } if (IS_ERR(set)) return PTR_ERR(set); @@ -3670,6 +3670,7 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) { + u8 genmask = nft_genmask_next(net); const struct nlattr *attr; struct nft_set *set; struct nft_ctx ctx; @@ -3678,11 +3679,12 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk, if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) return -EINVAL; - err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla); + err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask); if (err < 0) return err; - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], + genmask); if (IS_ERR(set)) return PTR_ERR(set); if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) @@ -3950,36 +3952,40 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { if (!nft_trans_table_enable(trans)) { - nf_tables_table_disable(trans->ctx.afi, + nf_tables_table_disable(net, + trans->ctx.afi, trans->ctx.table); trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; } } else { - trans->ctx.table->flags &= ~NFT_TABLE_INACTIVE; + nft_clear(net, trans->ctx.table); } nf_tables_table_notify(&trans->ctx, NFT_MSG_NEWTABLE); nft_trans_destroy(trans); break; case NFT_MSG_DELTABLE: + list_del_rcu(&trans->ctx.table->list); nf_tables_table_notify(&trans->ctx, NFT_MSG_DELTABLE); break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) nft_chain_commit_update(trans); else - trans->ctx.chain->flags &= ~NFT_CHAIN_INACTIVE; + nft_clear(net, trans->ctx.chain); nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN); nft_trans_destroy(trans); break; case NFT_MSG_DELCHAIN: + list_del_rcu(&trans->ctx.chain->list); nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN); - nf_tables_unregister_hooks(trans->ctx.table, + nf_tables_unregister_hooks(trans->ctx.net, + trans->ctx.table, trans->ctx.chain, trans->ctx.afi->nops); break; case NFT_MSG_NEWRULE: - nft_rule_clear(trans->ctx.net, nft_trans_rule(trans)); + nft_clear(trans->ctx.net, nft_trans_rule(trans)); nf_tables_rule_notify(&trans->ctx, nft_trans_rule(trans), NFT_MSG_NEWRULE); @@ -3992,7 +3998,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) NFT_MSG_DELRULE); break; case NFT_MSG_NEWSET: - nft_trans_set(trans)->flags &= ~NFT_SET_INACTIVE; + nft_clear(net, nft_trans_set(trans)); /* This avoids hitting -EBUSY when deleting the table * from the transaction. */ @@ -4005,6 +4011,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_trans_destroy(trans); break; case NFT_MSG_DELSET: + list_del_rcu(&nft_trans_set(trans)->list); nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), NFT_MSG_DELSET, GFP_KERNEL); break; @@ -4076,7 +4083,8 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { if (nft_trans_table_enable(trans)) { - nf_tables_table_disable(trans->ctx.afi, + nf_tables_table_disable(net, + trans->ctx.afi, trans->ctx.table); trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; } @@ -4086,8 +4094,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) } break; case NFT_MSG_DELTABLE: - list_add_tail_rcu(&trans->ctx.table->list, - &trans->ctx.afi->tables); + nft_clear(trans->ctx.net, trans->ctx.table); nft_trans_destroy(trans); break; case NFT_MSG_NEWCHAIN: @@ -4098,15 +4105,15 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) } else { trans->ctx.table->use--; list_del_rcu(&trans->ctx.chain->list); - nf_tables_unregister_hooks(trans->ctx.table, + nf_tables_unregister_hooks(trans->ctx.net, + trans->ctx.table, trans->ctx.chain, trans->ctx.afi->nops); } break; case NFT_MSG_DELCHAIN: trans->ctx.table->use++; - list_add_tail_rcu(&trans->ctx.chain->list, - &trans->ctx.table->chains); + nft_clear(trans->ctx.net, trans->ctx.chain); nft_trans_destroy(trans); break; case NFT_MSG_NEWRULE: @@ -4115,7 +4122,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) break; case NFT_MSG_DELRULE: trans->ctx.chain->use++; - nft_rule_clear(trans->ctx.net, nft_trans_rule(trans)); + nft_clear(trans->ctx.net, nft_trans_rule(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWSET: @@ -4124,8 +4131,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) break; case NFT_MSG_DELSET: trans->ctx.table->use++; - list_add_tail_rcu(&nft_trans_set(trans)->list, - &trans->ctx.table->sets); + nft_clear(trans->ctx.net, nft_trans_set(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWSETELEM: @@ -4272,6 +4278,8 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, } list_for_each_entry(set, &ctx->table->sets, list) { + if (!nft_is_active_next(ctx->net, set)) + continue; if (!(set->flags & NFT_SET_MAP) || set->dtype != NFT_DATA_VERDICT) continue; @@ -4430,6 +4438,7 @@ static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = { static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, struct nft_data_desc *desc, const struct nlattr *nla) { + u8 genmask = nft_genmask_next(ctx->net); struct nlattr *tb[NFTA_VERDICT_MAX + 1]; struct nft_chain *chain; int err; @@ -4462,7 +4471,7 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, if (!tb[NFTA_VERDICT_CHAIN]) return -EINVAL; chain = nf_tables_chain_lookup(ctx->table, - tb[NFTA_VERDICT_CHAIN]); + tb[NFTA_VERDICT_CHAIN], genmask); if (IS_ERR(chain)) return PTR_ERR(chain); if (chain->flags & NFT_BASE_CHAIN) @@ -4640,7 +4649,7 @@ int __nft_release_basechain(struct nft_ctx *ctx) BUG_ON(!(ctx->chain->flags & NFT_BASE_CHAIN)); - nf_tables_unregister_hooks(ctx->chain->table, ctx->chain, + nf_tables_unregister_hooks(ctx->net, ctx->chain->table, ctx->chain, ctx->afi->nops); list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) { list_del(&rule->list); @@ -4669,7 +4678,8 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi) list_for_each_entry_safe(table, nt, &afi->tables, list) { list_for_each_entry(chain, &table->chains, list) - nf_tables_unregister_hooks(table, chain, afi->nops); + nf_tables_unregister_hooks(net, table, chain, + afi->nops); /* No packets are walking on these chains anymore. */ ctx.table = table; list_for_each_entry(chain, &table->chains, list) { diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 11f81c8385fc..cbcfdfb586a6 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -700,10 +700,13 @@ nfulnl_log_packet(struct net *net, break; case NFULNL_COPY_PACKET: - if (inst->copy_range > skb->len) + data_len = inst->copy_range; + if ((li->u.ulog.flags & NF_LOG_F_COPY_LEN) && + (li->u.ulog.copy_len < data_len)) + data_len = li->u.ulog.copy_len; + + if (data_len > skb->len) data_len = skb->len; - else - data_len = inst->copy_range; size += nla_total_size(data_len); break; diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 78d4914fb39c..0af26699bf04 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -103,6 +103,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_dynset *priv = nft_expr_priv(expr); + u8 genmask = nft_genmask_next(ctx->net); struct nft_set *set; u64 timeout; int err; @@ -112,11 +113,13 @@ static int nft_dynset_init(const struct nft_ctx *ctx, tb[NFTA_DYNSET_SREG_KEY] == NULL) return -EINVAL; - set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME]); + set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME], + genmask); if (IS_ERR(set)) { if (tb[NFTA_DYNSET_SET_ID]) set = nf_tables_set_lookup_byid(ctx->net, - tb[NFTA_DYNSET_SET_ID]); + tb[NFTA_DYNSET_SET_ID], + genmask); if (IS_ERR(set)) return PTR_ERR(set); } diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index f39c53a159eb..ea924816b7b8 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -153,9 +153,10 @@ static void *nft_hash_deactivate(const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_hash *priv = nft_set_priv(set); + struct net *net = read_pnet(&set->pnet); struct nft_hash_elem *he; struct nft_hash_cmp_arg arg = { - .genmask = nft_genmask_next(read_pnet(&set->pnet)), + .genmask = nft_genmask_next(net), .set = set, .key = elem->key.val.data, }; @@ -163,7 +164,8 @@ static void *nft_hash_deactivate(const struct nft_set *set, rcu_read_lock(); he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); if (he != NULL) { - if (!nft_set_elem_mark_busy(&he->ext)) + if (!nft_set_elem_mark_busy(&he->ext) || + !nft_is_active(net, &he->ext)) nft_set_elem_change_active(set, &he->ext); else he = NULL; diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 319c22b4bca2..713d66837705 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -52,7 +52,6 @@ static int nft_log_init(const struct nft_ctx *ctx, struct nft_log *priv = nft_expr_priv(expr); struct nf_loginfo *li = &priv->loginfo; const struct nlattr *nla; - int ret; nla = tb[NFTA_LOG_PREFIX]; if (nla != NULL) { @@ -97,19 +96,6 @@ static int nft_log_init(const struct nft_ctx *ctx, break; } - if (ctx->afi->family == NFPROTO_INET) { - ret = nf_logger_find_get(NFPROTO_IPV4, li->type); - if (ret < 0) - return ret; - - ret = nf_logger_find_get(NFPROTO_IPV6, li->type); - if (ret < 0) { - nf_logger_put(NFPROTO_IPV4, li->type); - return ret; - } - return 0; - } - return nf_logger_find_get(ctx->afi->family, li->type); } @@ -122,12 +108,7 @@ static void nft_log_destroy(const struct nft_ctx *ctx, if (priv->prefix != nft_log_null_prefix) kfree(priv->prefix); - if (ctx->afi->family == NFPROTO_INET) { - nf_logger_put(NFPROTO_IPV4, li->type); - nf_logger_put(NFPROTO_IPV6, li->type); - } else { - nf_logger_put(ctx->afi->family, li->type); - } + nf_logger_put(ctx->afi->family, li->type); } static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr) diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index b3c31ef8015d..b8d18f598569 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -22,6 +22,7 @@ struct nft_lookup { struct nft_set *set; enum nft_registers sreg:8; enum nft_registers dreg:8; + bool invert; struct nft_set_binding binding; }; @@ -32,14 +33,20 @@ static void nft_lookup_eval(const struct nft_expr *expr, const struct nft_lookup *priv = nft_expr_priv(expr); const struct nft_set *set = priv->set; const struct nft_set_ext *ext; + bool found; - if (set->ops->lookup(set, ®s->data[priv->sreg], &ext)) { - if (set->flags & NFT_SET_MAP) - nft_data_copy(®s->data[priv->dreg], - nft_set_ext_data(ext), set->dlen); + found = set->ops->lookup(set, ®s->data[priv->sreg], &ext) ^ + priv->invert; + + if (!found) { + regs->verdict.code = NFT_BREAK; return; } - regs->verdict.code = NFT_BREAK; + + if (found && set->flags & NFT_SET_MAP) + nft_data_copy(®s->data[priv->dreg], + nft_set_ext_data(ext), set->dlen); + } static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = { @@ -47,6 +54,7 @@ static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = { [NFTA_LOOKUP_SET_ID] = { .type = NLA_U32 }, [NFTA_LOOKUP_SREG] = { .type = NLA_U32 }, [NFTA_LOOKUP_DREG] = { .type = NLA_U32 }, + [NFTA_LOOKUP_FLAGS] = { .type = NLA_U32 }, }; static int nft_lookup_init(const struct nft_ctx *ctx, @@ -54,18 +62,21 @@ static int nft_lookup_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_lookup *priv = nft_expr_priv(expr); + u8 genmask = nft_genmask_next(ctx->net); struct nft_set *set; + u32 flags; int err; if (tb[NFTA_LOOKUP_SET] == NULL || tb[NFTA_LOOKUP_SREG] == NULL) return -EINVAL; - set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET]); + set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET], genmask); if (IS_ERR(set)) { if (tb[NFTA_LOOKUP_SET_ID]) { set = nf_tables_set_lookup_byid(ctx->net, - tb[NFTA_LOOKUP_SET_ID]); + tb[NFTA_LOOKUP_SET_ID], + genmask); } if (IS_ERR(set)) return PTR_ERR(set); @@ -79,7 +90,22 @@ static int nft_lookup_init(const struct nft_ctx *ctx, if (err < 0) return err; + if (tb[NFTA_LOOKUP_FLAGS]) { + flags = ntohl(nla_get_be32(tb[NFTA_LOOKUP_FLAGS])); + + if (flags & ~NFT_LOOKUP_F_INV) + return -EINVAL; + + if (flags & NFT_LOOKUP_F_INV) { + if (set->flags & NFT_SET_MAP) + return -EINVAL; + priv->invert = true; + } + } + if (tb[NFTA_LOOKUP_DREG] != NULL) { + if (priv->invert) + return -EINVAL; if (!(set->flags & NFT_SET_MAP)) return -EINVAL; @@ -112,6 +138,7 @@ static void nft_lookup_destroy(const struct nft_ctx *ctx, static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_lookup *priv = nft_expr_priv(expr); + u32 flags = priv->invert ? NFT_LOOKUP_F_INV : 0; if (nla_put_string(skb, NFTA_LOOKUP_SET, priv->set->name)) goto nla_put_failure; @@ -120,6 +147,8 @@ static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr) if (priv->set->flags & NFT_SET_MAP) if (nft_dump_register(skb, NFTA_LOOKUP_DREG, priv->dreg)) goto nla_put_failure; + if (nla_put_be32(skb, NFTA_LOOKUP_FLAGS, htonl(flags))) + goto nla_put_failure; return 0; nla_put_failure: diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 16c50b0dd426..03e5e33b5c39 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -199,13 +199,6 @@ err: } EXPORT_SYMBOL_GPL(nft_meta_get_eval); -/* don't change or set _LOOPBACK, _USER, etc. */ -static bool pkt_type_ok(u32 p) -{ - return p == PACKET_HOST || p == PACKET_BROADCAST || - p == PACKET_MULTICAST || p == PACKET_OTHERHOST; -} - void nft_meta_set_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -223,7 +216,7 @@ void nft_meta_set_eval(const struct nft_expr *expr, break; case NFT_META_PKTTYPE: if (skb->pkt_type != value && - pkt_type_ok(value) && pkt_type_ok(skb->pkt_type)) + skb_pkt_type_ok(value) && skb_pkt_type_ok(skb->pkt_type)) skb->pkt_type = value; break; case NFT_META_NFTRACE: diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index 7201d57b5a93..c0f638745adc 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -170,7 +170,7 @@ static void *nft_rbtree_deactivate(const struct nft_set *set, const struct nft_rbtree *priv = nft_set_priv(set); const struct rb_node *parent = priv->root.rb_node; struct nft_rbtree_elem *rbe, *this = elem->priv; - u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); + u8 genmask = nft_genmask_next(read_pnet(&set->pnet)); int d; while (parent != NULL) { diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 2675d580c490..fe0e2db632c7 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1460,6 +1460,9 @@ xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn) uint8_t hooknum; struct nf_hook_ops *ops; + if (!num_hooks) + return ERR_PTR(-EINVAL); + ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL); if (ops == NULL) return ERR_PTR(-ENOMEM); diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index a1fa2c800cb9..018eed7e1ff1 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -33,6 +33,9 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; + if (info->flags & XT_NFLOG_F_COPY_LEN) + li.u.ulog.flags |= NF_LOG_F_COPY_LEN; + nfulnl_log_packet(net, par->family, par->hooknum, skb, par->in, par->out, &li, info->prefix); return XT_CONTINUE; diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c index df48967af382..858d189a1303 100644 --- a/net/netfilter/xt_TRACE.c +++ b/net/netfilter/xt_TRACE.c @@ -4,12 +4,23 @@ #include <linux/skbuff.h> #include <linux/netfilter/x_tables.h> +#include <net/netfilter/nf_log.h> MODULE_DESCRIPTION("Xtables: packet flow tracing"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_TRACE"); MODULE_ALIAS("ip6t_TRACE"); +static int trace_tg_check(const struct xt_tgchk_param *par) +{ + return nf_logger_find_get(par->family, NF_LOG_TYPE_LOG); +} + +static void trace_tg_destroy(const struct xt_tgdtor_param *par) +{ + nf_logger_put(par->family, NF_LOG_TYPE_LOG); +} + static unsigned int trace_tg(struct sk_buff *skb, const struct xt_action_param *par) { @@ -18,12 +29,14 @@ trace_tg(struct sk_buff *skb, const struct xt_action_param *par) } static struct xt_target trace_tg_reg __read_mostly = { - .name = "TRACE", - .revision = 0, - .family = NFPROTO_UNSPEC, - .table = "raw", - .target = trace_tg, - .me = THIS_MODULE, + .name = "TRACE", + .revision = 0, + .family = NFPROTO_UNSPEC, + .table = "raw", + .target = trace_tg, + .checkentry = trace_tg_check, + .destroy = trace_tg_destroy, + .me = THIS_MODULE, }; static int __init trace_tg_init(void) diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index 1302b475abcb..a20e731b5b6c 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -21,11 +21,39 @@ static int owner_check(const struct xt_mtchk_param *par) { struct xt_owner_match_info *info = par->matchinfo; + struct net *net = par->net; - /* For now only allow adding matches from the initial user namespace */ + /* Only allow the common case where the userns of the writer + * matches the userns of the network namespace. + */ if ((info->match & (XT_OWNER_UID|XT_OWNER_GID)) && - (current_user_ns() != &init_user_ns)) + (current_user_ns() != net->user_ns)) return -EINVAL; + + /* Ensure the uids are valid */ + if (info->match & XT_OWNER_UID) { + kuid_t uid_min = make_kuid(net->user_ns, info->uid_min); + kuid_t uid_max = make_kuid(net->user_ns, info->uid_max); + + if (!uid_valid(uid_min) || !uid_valid(uid_max) || + (info->uid_max < info->uid_min) || + uid_lt(uid_max, uid_min)) { + return -EINVAL; + } + } + + /* Ensure the gids are valid */ + if (info->match & XT_OWNER_GID) { + kgid_t gid_min = make_kgid(net->user_ns, info->gid_min); + kgid_t gid_max = make_kgid(net->user_ns, info->gid_max); + + if (!gid_valid(gid_min) || !gid_valid(gid_max) || + (info->gid_max < info->gid_min) || + gid_lt(gid_max, gid_min)) { + return -EINVAL; + } + } + return 0; } @@ -35,6 +63,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) const struct xt_owner_match_info *info = par->matchinfo; const struct file *filp; struct sock *sk = skb_to_full_sk(skb); + struct net *net = par->net; if (sk == NULL || sk->sk_socket == NULL) return (info->match ^ info->invert) == 0; @@ -51,8 +80,8 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) (XT_OWNER_UID | XT_OWNER_GID)) == 0; if (info->match & XT_OWNER_UID) { - kuid_t uid_min = make_kuid(&init_user_ns, info->uid_min); - kuid_t uid_max = make_kuid(&init_user_ns, info->uid_max); + kuid_t uid_min = make_kuid(net->user_ns, info->uid_min); + kuid_t uid_max = make_kuid(net->user_ns, info->uid_max); if ((uid_gte(filp->f_cred->fsuid, uid_min) && uid_lte(filp->f_cred->fsuid, uid_max)) ^ !(info->invert & XT_OWNER_UID)) @@ -60,8 +89,8 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) } if (info->match & XT_OWNER_GID) { - kgid_t gid_min = make_kgid(&init_user_ns, info->gid_min); - kgid_t gid_max = make_kgid(&init_user_ns, info->gid_max); + kgid_t gid_min = make_kgid(net->user_ns, info->gid_min); + kgid_t gid_max = make_kgid(net->user_ns, info->gid_max); if ((gid_gte(filp->f_cred->fsgid, gid_min) && gid_lte(filp->f_cred->fsgid, gid_max)) ^ !(info->invert & XT_OWNER_GID)) diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c index c14d4645daa3..ade024c90f4f 100644 --- a/net/netfilter/xt_tcpudp.c +++ b/net/netfilter/xt_tcpudp.c @@ -83,8 +83,6 @@ static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par) return false; } -#define FWINVTCP(bool, invflg) ((bool) ^ !!(tcpinfo->invflags & (invflg))) - th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph); if (th == NULL) { /* We've been asked to examine this packet, and we @@ -102,9 +100,8 @@ static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par) ntohs(th->dest), !!(tcpinfo->invflags & XT_TCP_INV_DSTPT))) return false; - if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask) - == tcpinfo->flg_cmp, - XT_TCP_INV_FLAGS)) + if (!NF_INVF(tcpinfo, XT_TCP_INV_FLAGS, + (((unsigned char *)th)[13] & tcpinfo->flg_mask) == tcpinfo->flg_cmp)) return false; if (tcpinfo->option) { if (th->doff * 4 < sizeof(_tcph)) { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d1f3b9e977e5..9d92c4c46871 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1341,7 +1341,7 @@ static unsigned int fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { - return reciprocal_scale(skb_get_hash(skb), num); + return reciprocal_scale(__skb_get_hash_symmetric(skb), num); } static unsigned int fanout_demux_lb(struct packet_fanout *f, @@ -1588,13 +1588,9 @@ static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data, if (copy_from_user(&fd, data, len)) return -EFAULT; - new = bpf_prog_get(fd); + new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER); if (IS_ERR(new)) return PTR_ERR(new); - if (new->type != BPF_PROG_TYPE_SOCKET_FILTER) { - bpf_prog_put(new); - return -EINVAL; - } __fanout_set_data_bpf(po->fanout, new); return 0; diff --git a/net/rds/connection.c b/net/rds/connection.c index a4b07c899d89..19a4fee5f4dd 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -253,9 +253,12 @@ static struct rds_connection *__rds_conn_create(struct net *net, for (i = 0; i < RDS_MPATH_WORKERS; i++) { cp = &conn->c_path[i]; - trans->conn_free(cp->cp_transport_data); - if (!trans->t_mp_capable) - break; + /* The ->conn_alloc invocation may have + * allocated resource for all paths, so all + * of them may have to be freed here. + */ + if (cp->cp_transport_data) + trans->conn_free(cp->cp_transport_data); } kmem_cache_free(rds_conn_slab, conn); conn = found; @@ -326,10 +329,7 @@ void rds_conn_shutdown(struct rds_conn_path *cp) wait_event(cp->cp_waitq, !test_bit(RDS_RECV_REFILL, &cp->cp_flags)); - if (!conn->c_trans->t_mp_capable) - conn->c_trans->conn_shutdown(conn); - else - conn->c_trans->conn_path_shutdown(cp); + conn->c_trans->conn_path_shutdown(cp); rds_conn_path_reset(cp); if (!rds_conn_path_transition(cp, RDS_CONN_DISCONNECTING, @@ -355,9 +355,7 @@ void rds_conn_shutdown(struct rds_conn_path *cp) rcu_read_lock(); if (!hlist_unhashed(&conn->c_hash_node)) { rcu_read_unlock(); - if (conn->c_trans->t_type != RDS_TRANS_TCP || - cp->cp_outgoing == 1) - rds_queue_reconnect(cp); + rds_queue_reconnect(cp); } else { rcu_read_unlock(); } @@ -370,6 +368,9 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp) { struct rds_message *rm, *rtmp; + if (!cp->cp_transport_data) + return; + rds_conn_path_drop(cp); flush_work(&cp->cp_down_w); @@ -401,6 +402,8 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp) void rds_conn_destroy(struct rds_connection *conn) { unsigned long flags; + int i; + struct rds_conn_path *cp; rdsdebug("freeing conn %p for %pI4 -> " "%pI4\n", conn, &conn->c_laddr, @@ -413,18 +416,10 @@ void rds_conn_destroy(struct rds_connection *conn) synchronize_rcu(); /* shut the connection down */ - if (!conn->c_trans->t_mp_capable) { - rds_conn_path_destroy(&conn->c_path[0]); - BUG_ON(!list_empty(&conn->c_path[0].cp_retrans)); - } else { - int i; - struct rds_conn_path *cp; - - for (i = 0; i < RDS_MPATH_WORKERS; i++) { - cp = &conn->c_path[i]; - rds_conn_path_destroy(cp); - BUG_ON(!list_empty(&cp->cp_retrans)); - } + for (i = 0; i < RDS_MPATH_WORKERS; i++) { + cp = &conn->c_path[i]; + rds_conn_path_destroy(cp); + BUG_ON(!list_empty(&cp->cp_retrans)); } /* diff --git a/net/rds/ib.c b/net/rds/ib.c index 44946a681a8c..7eaf887e46f8 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -381,15 +381,15 @@ void rds_ib_exit(void) struct rds_transport rds_ib_transport = { .laddr_check = rds_ib_laddr_check, - .xmit_complete = rds_ib_xmit_complete, + .xmit_path_complete = rds_ib_xmit_path_complete, .xmit = rds_ib_xmit, .xmit_rdma = rds_ib_xmit_rdma, .xmit_atomic = rds_ib_xmit_atomic, - .recv = rds_ib_recv, + .recv_path = rds_ib_recv_path, .conn_alloc = rds_ib_conn_alloc, .conn_free = rds_ib_conn_free, - .conn_connect = rds_ib_conn_connect, - .conn_shutdown = rds_ib_conn_shutdown, + .conn_path_connect = rds_ib_conn_path_connect, + .conn_path_shutdown = rds_ib_conn_path_shutdown, .inc_copy_to_user = rds_ib_inc_copy_to_user, .inc_free = rds_ib_inc_free, .cm_initiate_connect = rds_ib_cm_initiate_connect, diff --git a/net/rds/ib.h b/net/rds/ib.h index 627fb79aee65..046f7508c06b 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -328,8 +328,8 @@ extern struct list_head ib_nodev_conns; /* ib_cm.c */ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp); void rds_ib_conn_free(void *arg); -int rds_ib_conn_connect(struct rds_connection *conn); -void rds_ib_conn_shutdown(struct rds_connection *conn); +int rds_ib_conn_path_connect(struct rds_conn_path *cp); +void rds_ib_conn_path_shutdown(struct rds_conn_path *cp); void rds_ib_state_change(struct sock *sk); int rds_ib_listen_init(void); void rds_ib_listen_stop(void); @@ -354,7 +354,7 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc); /* ib_recv.c */ int rds_ib_recv_init(void); void rds_ib_recv_exit(void); -int rds_ib_recv(struct rds_connection *conn); +int rds_ib_recv_path(struct rds_conn_path *conn); int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic); void rds_ib_recv_free_caches(struct rds_ib_connection *ic); void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp); @@ -384,7 +384,7 @@ u32 rds_ib_ring_completed(struct rds_ib_work_ring *ring, u32 wr_id, u32 oldest); extern wait_queue_head_t rds_ib_ring_empty_wait; /* ib_send.c */ -void rds_ib_xmit_complete(struct rds_connection *conn); +void rds_ib_xmit_path_complete(struct rds_conn_path *cp); int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, unsigned int hdr_off, unsigned int sg, unsigned int off); void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc); diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index e48bb1ba3dfc..5b2ab95afa07 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -685,8 +685,9 @@ out: return ret; } -int rds_ib_conn_connect(struct rds_connection *conn) +int rds_ib_conn_path_connect(struct rds_conn_path *cp) { + struct rds_connection *conn = cp->cp_conn; struct rds_ib_connection *ic = conn->c_transport_data; struct sockaddr_in src, dest; int ret; @@ -731,8 +732,9 @@ out: * so that it can be called at any point during startup. In fact it * can be called multiple times for a given connection. */ -void rds_ib_conn_shutdown(struct rds_connection *conn) +void rds_ib_conn_path_shutdown(struct rds_conn_path *cp) { + struct rds_connection *conn = cp->cp_conn; struct rds_ib_connection *ic = conn->c_transport_data; int err = 0; diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 4ea8cb17cc7a..606a11f681d2 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -1009,8 +1009,9 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, rds_ib_recv_refill(conn, 0, GFP_NOWAIT); } -int rds_ib_recv(struct rds_connection *conn) +int rds_ib_recv_path(struct rds_conn_path *cp) { + struct rds_connection *conn = cp->cp_conn; struct rds_ib_connection *ic = conn->c_transport_data; int ret = 0; diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 6e4110aa5135..84d90c97332f 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -980,8 +980,9 @@ out: return ret; } -void rds_ib_xmit_complete(struct rds_connection *conn) +void rds_ib_xmit_path_complete(struct rds_conn_path *cp) { + struct rds_connection *conn = cp->cp_conn; struct rds_ib_connection *ic = conn->c_transport_data; /* We may have a pending ACK or window update we were unable diff --git a/net/rds/loop.c b/net/rds/loop.c index 15f83db78f0c..f2bf78de5688 100644 --- a/net/rds/loop.c +++ b/net/rds/loop.c @@ -102,7 +102,7 @@ static void rds_loop_inc_free(struct rds_incoming *inc) } /* we need to at least give the thread something to succeed */ -static int rds_loop_recv(struct rds_connection *conn) +static int rds_loop_recv_path(struct rds_conn_path *cp) { return 0; } @@ -150,13 +150,13 @@ static void rds_loop_conn_free(void *arg) kfree(lc); } -static int rds_loop_conn_connect(struct rds_connection *conn) +static int rds_loop_conn_path_connect(struct rds_conn_path *cp) { - rds_connect_complete(conn); + rds_connect_complete(cp->cp_conn); return 0; } -static void rds_loop_conn_shutdown(struct rds_connection *conn) +static void rds_loop_conn_path_shutdown(struct rds_conn_path *cp) { } @@ -185,11 +185,11 @@ void rds_loop_exit(void) */ struct rds_transport rds_loop_transport = { .xmit = rds_loop_xmit, - .recv = rds_loop_recv, + .recv_path = rds_loop_recv_path, .conn_alloc = rds_loop_conn_alloc, .conn_free = rds_loop_conn_free, - .conn_connect = rds_loop_conn_connect, - .conn_shutdown = rds_loop_conn_shutdown, + .conn_path_connect = rds_loop_conn_path_connect, + .conn_path_shutdown = rds_loop_conn_path_shutdown, .inc_copy_to_user = rds_message_inc_copy_to_user, .inc_free = rds_loop_inc_free, .t_name = "loopback", diff --git a/net/rds/rds.h b/net/rds/rds.h index 2e35b738176f..6ef07bd27227 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -454,18 +454,15 @@ struct rds_transport { int (*laddr_check)(struct net *net, __be32 addr); int (*conn_alloc)(struct rds_connection *conn, gfp_t gfp); void (*conn_free)(void *data); - int (*conn_connect)(struct rds_connection *conn); - void (*conn_shutdown)(struct rds_connection *conn); + int (*conn_path_connect)(struct rds_conn_path *cp); void (*conn_path_shutdown)(struct rds_conn_path *conn); - void (*xmit_prepare)(struct rds_connection *conn); void (*xmit_path_prepare)(struct rds_conn_path *cp); - void (*xmit_complete)(struct rds_connection *conn); void (*xmit_path_complete)(struct rds_conn_path *cp); int (*xmit)(struct rds_connection *conn, struct rds_message *rm, unsigned int hdr_off, unsigned int sg, unsigned int off); int (*xmit_rdma)(struct rds_connection *conn, struct rm_rdma_op *op); int (*xmit_atomic)(struct rds_connection *conn, struct rm_atomic_op *op); - int (*recv)(struct rds_connection *conn); + int (*recv_path)(struct rds_conn_path *cp); int (*inc_copy_to_user)(struct rds_incoming *inc, struct iov_iter *to); void (*inc_free)(struct rds_incoming *inc); diff --git a/net/rds/recv.c b/net/rds/recv.c index b58f50571782..fed53a6c2890 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -226,6 +226,10 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, cp->cp_next_rx_seq = be64_to_cpu(inc->i_hdr.h_sequence) + 1; if (rds_sysctl_ping_enable && inc->i_hdr.h_dport == 0) { + if (inc->i_hdr.h_sport == 0) { + rdsdebug("ignore ping with 0 sport from 0x%x\n", saddr); + goto out; + } rds_stats_inc(s_recv_ping); rds_send_pong(cp, inc->i_hdr.h_sport); goto out; diff --git a/net/rds/send.c b/net/rds/send.c index ee43d6b2ea8f..5a9caf1da896 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -183,12 +183,8 @@ restart: goto out; } - if (conn->c_trans->t_mp_capable) { - if (conn->c_trans->xmit_path_prepare) - conn->c_trans->xmit_path_prepare(cp); - } else if (conn->c_trans->xmit_prepare) { - conn->c_trans->xmit_prepare(conn); - } + if (conn->c_trans->xmit_path_prepare) + conn->c_trans->xmit_path_prepare(cp); /* * spin trying to push headers and data down the connection until @@ -403,12 +399,8 @@ restart: } over_batch: - if (conn->c_trans->t_mp_capable) { - if (conn->c_trans->xmit_path_complete) - conn->c_trans->xmit_path_complete(cp); - } else if (conn->c_trans->xmit_complete) { - conn->c_trans->xmit_complete(conn); - } + if (conn->c_trans->xmit_path_complete) + conn->c_trans->xmit_path_complete(cp); release_in_xmit(cp); /* Nuke any messages we decided not to retransmit. */ diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 5217d49ce6d6..d24f6c142d03 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -136,9 +136,9 @@ void rds_tcp_restore_callbacks(struct socket *sock, * from being called while it isn't set. */ void rds_tcp_reset_callbacks(struct socket *sock, - struct rds_connection *conn) + struct rds_conn_path *cp) { - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_tcp_connection *tc = cp->cp_transport_data; struct socket *osock = tc->t_sock; if (!osock) @@ -148,8 +148,8 @@ void rds_tcp_reset_callbacks(struct socket *sock, * We have an outstanding SYN to this peer, which may * potentially have transitioned to the RDS_CONN_UP state, * so we must quiesce any send threads before resetting - * c_transport_data. We quiesce these threads by setting - * c_state to something other than RDS_CONN_UP, and then + * cp_transport_data. We quiesce these threads by setting + * cp_state to something other than RDS_CONN_UP, and then * waiting for any existing threads in rds_send_xmit to * complete release_in_xmit(). (Subsequent threads entering * rds_send_xmit() will bail on !rds_conn_up(). @@ -164,8 +164,8 @@ void rds_tcp_reset_callbacks(struct socket *sock, * RDS_CONN_RESETTTING, to ensure that rds_tcp_state_change * cannot mark rds_conn_path_up() in the window before lock_sock() */ - atomic_set(&conn->c_state, RDS_CONN_RESETTING); - wait_event(conn->c_waitq, !test_bit(RDS_IN_XMIT, &conn->c_flags)); + atomic_set(&cp->cp_state, RDS_CONN_RESETTING); + wait_event(cp->cp_waitq, !test_bit(RDS_IN_XMIT, &cp->cp_flags)); lock_sock(osock->sk); /* reset receive side state for rds_tcp_data_recv() for osock */ if (tc->t_tinc) { @@ -186,11 +186,12 @@ void rds_tcp_reset_callbacks(struct socket *sock, release_sock(osock->sk); sock_release(osock); newsock: - rds_send_path_reset(&conn->c_path[0]); + rds_send_path_reset(cp); lock_sock(sock->sk); write_lock_bh(&sock->sk->sk_callback_lock); tc->t_sock = sock; - sock->sk->sk_user_data = conn; + tc->t_cpath = cp; + sock->sk->sk_user_data = cp; sock->sk->sk_data_ready = rds_tcp_data_ready; sock->sk->sk_write_space = rds_tcp_write_space; sock->sk->sk_state_change = rds_tcp_state_change; @@ -203,9 +204,9 @@ newsock: * above rds_tcp_reset_callbacks for notes about synchronization * with data path */ -void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn) +void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp) { - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_tcp_connection *tc = cp->cp_transport_data; rdsdebug("setting sock %p callbacks to tc %p\n", sock, tc); write_lock_bh(&sock->sk->sk_callback_lock); @@ -221,12 +222,12 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn) sock->sk->sk_data_ready = sock->sk->sk_user_data; tc->t_sock = sock; - tc->conn = conn; + tc->t_cpath = cp; tc->t_orig_data_ready = sock->sk->sk_data_ready; tc->t_orig_write_space = sock->sk->sk_write_space; tc->t_orig_state_change = sock->sk->sk_state_change; - sock->sk->sk_user_data = conn; + sock->sk->sk_user_data = cp; sock->sk->sk_data_ready = rds_tcp_data_ready; sock->sk->sk_write_space = rds_tcp_write_space; sock->sk->sk_state_change = rds_tcp_state_change; @@ -284,24 +285,29 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr) static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp) { struct rds_tcp_connection *tc; + int i; - tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp); - if (!tc) - return -ENOMEM; + for (i = 0; i < RDS_MPATH_WORKERS; i++) { + tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp); + if (!tc) + return -ENOMEM; - mutex_init(&tc->t_conn_lock); - tc->t_sock = NULL; - tc->t_tinc = NULL; - tc->t_tinc_hdr_rem = sizeof(struct rds_header); - tc->t_tinc_data_rem = 0; + mutex_init(&tc->t_conn_path_lock); + tc->t_sock = NULL; + tc->t_tinc = NULL; + tc->t_tinc_hdr_rem = sizeof(struct rds_header); + tc->t_tinc_data_rem = 0; - conn->c_transport_data = tc; + conn->c_path[i].cp_transport_data = tc; + tc->t_cpath = &conn->c_path[i]; - spin_lock_irq(&rds_tcp_conn_lock); - list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list); - spin_unlock_irq(&rds_tcp_conn_lock); + spin_lock_irq(&rds_tcp_conn_lock); + list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list); + spin_unlock_irq(&rds_tcp_conn_lock); + rdsdebug("rds_conn_path [%d] tc %p\n", i, + conn->c_path[i].cp_transport_data); + } - rdsdebug("alloced tc %p\n", conn->c_transport_data); return 0; } @@ -318,6 +324,17 @@ static void rds_tcp_conn_free(void *arg) kmem_cache_free(rds_tcp_conn_slab, tc); } +static bool list_has_conn(struct list_head *list, struct rds_connection *conn) +{ + struct rds_tcp_connection *tc, *_tc; + + list_for_each_entry_safe(tc, _tc, list, t_tcp_node) { + if (tc->t_cpath->cp_conn == conn) + return true; + } + return false; +} + static void rds_tcp_destroy_conns(void) { struct rds_tcp_connection *tc, *_tc; @@ -325,29 +342,28 @@ static void rds_tcp_destroy_conns(void) /* avoid calling conn_destroy with irqs off */ spin_lock_irq(&rds_tcp_conn_lock); - list_splice(&rds_tcp_conn_list, &tmp_list); - INIT_LIST_HEAD(&rds_tcp_conn_list); + list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { + if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) + list_move_tail(&tc->t_tcp_node, &tmp_list); + } spin_unlock_irq(&rds_tcp_conn_lock); - list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) { - if (tc->conn->c_passive) - rds_conn_destroy(tc->conn->c_passive); - rds_conn_destroy(tc->conn); - } + list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) + rds_conn_destroy(tc->t_cpath->cp_conn); } static void rds_tcp_exit(void); struct rds_transport rds_tcp_transport = { .laddr_check = rds_tcp_laddr_check, - .xmit_prepare = rds_tcp_xmit_prepare, - .xmit_complete = rds_tcp_xmit_complete, + .xmit_path_prepare = rds_tcp_xmit_path_prepare, + .xmit_path_complete = rds_tcp_xmit_path_complete, .xmit = rds_tcp_xmit, - .recv = rds_tcp_recv, + .recv_path = rds_tcp_recv_path, .conn_alloc = rds_tcp_conn_alloc, .conn_free = rds_tcp_conn_free, - .conn_connect = rds_tcp_conn_connect, - .conn_shutdown = rds_tcp_conn_shutdown, + .conn_path_connect = rds_tcp_conn_path_connect, + .conn_path_shutdown = rds_tcp_conn_path_shutdown, .inc_copy_to_user = rds_tcp_inc_copy_to_user, .inc_free = rds_tcp_inc_free, .stats_info_copy = rds_tcp_stats_info_copy, @@ -489,10 +505,30 @@ static struct pernet_operations rds_tcp_net_ops = { .size = sizeof(struct rds_tcp_net), }; +/* explicitly send a RST on each socket, thereby releasing any socket refcnts + * that may otherwise hold up netns deletion. + */ +static void rds_tcp_conn_paths_destroy(struct rds_connection *conn) +{ + struct rds_conn_path *cp; + struct rds_tcp_connection *tc; + int i; + struct sock *sk; + + for (i = 0; i < RDS_MPATH_WORKERS; i++) { + cp = &conn->c_path[i]; + tc = cp->cp_transport_data; + if (!tc->t_sock) + continue; + sk = tc->t_sock->sk; + sk->sk_prot->disconnect(sk, 0); + tcp_done(sk); + } +} + static void rds_tcp_kill_sock(struct net *net) { struct rds_tcp_connection *tc, *_tc; - struct sock *sk; LIST_HEAD(tmp_list); struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); @@ -501,20 +537,17 @@ static void rds_tcp_kill_sock(struct net *net) flush_work(&rtn->rds_tcp_accept_w); spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { - struct net *c_net = read_pnet(&tc->conn->c_net); + struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); if (net != c_net || !tc->t_sock) continue; - list_move_tail(&tc->t_tcp_node, &tmp_list); + if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) + list_move_tail(&tc->t_tcp_node, &tmp_list); } spin_unlock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) { - sk = tc->t_sock->sk; - sk->sk_prot->disconnect(sk, 0); - tcp_done(sk); - if (tc->conn->c_passive) - rds_conn_destroy(tc->conn->c_passive); - rds_conn_destroy(tc->conn); + rds_tcp_conn_paths_destroy(tc->t_cpath->cp_conn); + rds_conn_destroy(tc->t_cpath->cp_conn); } } @@ -552,12 +585,13 @@ static void rds_tcp_sysctl_reset(struct net *net) spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { - struct net *c_net = read_pnet(&tc->conn->c_net); + struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); if (net != c_net || !tc->t_sock) continue; - rds_conn_drop(tc->conn); /* reconnect with new parameters */ + /* reconnect with new parameters */ + rds_conn_path_drop(tc->t_cpath); } spin_unlock_irq(&rds_tcp_conn_lock); } @@ -617,7 +651,7 @@ static int rds_tcp_init(void) ret = rds_tcp_recv_init(); if (ret) - goto out_slab; + goto out_pernet; ret = rds_trans_register(&rds_tcp_transport); if (ret) @@ -629,8 +663,9 @@ static int rds_tcp_init(void) out_recv: rds_tcp_recv_exit(); -out_slab: +out_pernet: unregister_pernet_subsys(&rds_tcp_net_ops); +out_slab: kmem_cache_destroy(rds_tcp_conn_slab); out: return ret; diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 7940babf6c71..1c3160faa963 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -11,11 +11,11 @@ struct rds_tcp_incoming { struct rds_tcp_connection { struct list_head t_tcp_node; - struct rds_connection *conn; - /* t_conn_lock synchronizes the connection establishment between - * rds_tcp_accept_one and rds_tcp_conn_connect + struct rds_conn_path *t_cpath; + /* t_conn_path_lock synchronizes the connection establishment between + * rds_tcp_accept_one and rds_tcp_conn_path_connect */ - struct mutex t_conn_lock; + struct mutex t_conn_path_lock; struct socket *t_sock; void *t_orig_write_space; void *t_orig_data_ready; @@ -49,8 +49,8 @@ struct rds_tcp_statistics { /* tcp.c */ void rds_tcp_tune(struct socket *sock); void rds_tcp_nonagle(struct socket *sock); -void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn); -void rds_tcp_reset_callbacks(struct socket *sock, struct rds_connection *conn); +void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp); +void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp); void rds_tcp_restore_callbacks(struct socket *sock, struct rds_tcp_connection *tc); u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc); @@ -60,8 +60,8 @@ extern struct rds_transport rds_tcp_transport; void rds_tcp_accept_work(struct sock *sk); /* tcp_connect.c */ -int rds_tcp_conn_connect(struct rds_connection *conn); -void rds_tcp_conn_shutdown(struct rds_connection *conn); +int rds_tcp_conn_path_connect(struct rds_conn_path *cp); +void rds_tcp_conn_path_shutdown(struct rds_conn_path *conn); void rds_tcp_state_change(struct sock *sk); /* tcp_listen.c */ @@ -75,13 +75,13 @@ int rds_tcp_keepalive(struct socket *sock); int rds_tcp_recv_init(void); void rds_tcp_recv_exit(void); void rds_tcp_data_ready(struct sock *sk); -int rds_tcp_recv(struct rds_connection *conn); +int rds_tcp_recv_path(struct rds_conn_path *cp); void rds_tcp_inc_free(struct rds_incoming *inc); int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to); /* tcp_send.c */ -void rds_tcp_xmit_prepare(struct rds_connection *conn); -void rds_tcp_xmit_complete(struct rds_connection *conn); +void rds_tcp_xmit_path_prepare(struct rds_conn_path *cp); +void rds_tcp_xmit_path_complete(struct rds_conn_path *cp); int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, unsigned int hdr_off, unsigned int sg, unsigned int off); void rds_tcp_write_space(struct sock *sk); diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index 96c2c4d17909..c916715fbe61 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -41,16 +41,16 @@ void rds_tcp_state_change(struct sock *sk) { void (*state_change)(struct sock *sk); - struct rds_connection *conn; + struct rds_conn_path *cp; struct rds_tcp_connection *tc; read_lock_bh(&sk->sk_callback_lock); - conn = sk->sk_user_data; - if (!conn) { + cp = sk->sk_user_data; + if (!cp) { state_change = sk->sk_state_change; goto out; } - tc = conn->c_transport_data; + tc = cp->cp_transport_data; state_change = tc->t_orig_state_change; rdsdebug("sock %p state_change to %d\n", tc->t_sock, sk->sk_state); @@ -61,12 +61,11 @@ void rds_tcp_state_change(struct sock *sk) case TCP_SYN_RECV: break; case TCP_ESTABLISHED: - rds_connect_path_complete(&conn->c_path[0], - RDS_CONN_CONNECTING); + rds_connect_path_complete(cp, RDS_CONN_CONNECTING); break; case TCP_CLOSE_WAIT: case TCP_CLOSE: - rds_conn_drop(conn); + rds_conn_path_drop(cp); default: break; } @@ -75,17 +74,18 @@ out: state_change(sk); } -int rds_tcp_conn_connect(struct rds_connection *conn) +int rds_tcp_conn_path_connect(struct rds_conn_path *cp) { struct socket *sock = NULL; struct sockaddr_in src, dest; int ret; - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_connection *conn = cp->cp_conn; + struct rds_tcp_connection *tc = cp->cp_transport_data; - mutex_lock(&tc->t_conn_lock); + mutex_lock(&tc->t_conn_path_lock); - if (rds_conn_up(conn)) { - mutex_unlock(&tc->t_conn_lock); + if (rds_conn_path_up(cp)) { + mutex_unlock(&tc->t_conn_path_lock); return 0; } ret = sock_create_kern(rds_conn_net(conn), PF_INET, @@ -114,10 +114,11 @@ int rds_tcp_conn_connect(struct rds_connection *conn) * once we call connect() we can start getting callbacks and they * own the socket */ - rds_tcp_set_callbacks(sock, conn); + rds_tcp_set_callbacks(sock, cp); ret = sock->ops->connect(sock, (struct sockaddr *)&dest, sizeof(dest), O_NONBLOCK); + cp->cp_outgoing = 1; rdsdebug("connect to address %pI4 returned %d\n", &conn->c_faddr, ret); if (ret == -EINPROGRESS) ret = 0; @@ -125,11 +126,11 @@ int rds_tcp_conn_connect(struct rds_connection *conn) rds_tcp_keepalive(sock); sock = NULL; } else { - rds_tcp_restore_callbacks(sock, conn->c_transport_data); + rds_tcp_restore_callbacks(sock, cp->cp_transport_data); } out: - mutex_unlock(&tc->t_conn_lock); + mutex_unlock(&tc->t_conn_path_lock); if (sock) sock_release(sock); return ret; @@ -144,12 +145,13 @@ out: * callbacks to those set by TCP. Our callbacks won't execute again once we * hold the sock lock. */ -void rds_tcp_conn_shutdown(struct rds_connection *conn) +void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp) { - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_tcp_connection *tc = cp->cp_transport_data; struct socket *sock = tc->t_sock; - rdsdebug("shutting down conn %p tc %p sock %p\n", conn, tc, sock); + rdsdebug("shutting down conn %p tc %p sock %p\n", + cp->cp_conn, tc, sock); if (sock) { sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN); diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index f9cc945a77b3..ca975a217a49 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -79,6 +79,7 @@ int rds_tcp_accept_one(struct socket *sock) struct inet_sock *inet; struct rds_tcp_connection *rs_tcp = NULL; int conn_state; + struct rds_conn_path *cp; if (!sock) /* module unload or netns delete in progress */ return -ENETUNREACH; @@ -120,8 +121,9 @@ int rds_tcp_accept_one(struct socket *sock) * rds_tcp_state_change() will do that cleanup */ rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data; + cp = &conn->c_path[0]; rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING); - mutex_lock(&rs_tcp->t_conn_lock); + mutex_lock(&rs_tcp->t_conn_path_lock); conn_state = rds_conn_state(conn); if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_UP) goto rst_nsk; @@ -136,16 +138,14 @@ int rds_tcp_accept_one(struct socket *sock) !conn->c_path[0].cp_outgoing) { goto rst_nsk; } else { - rds_tcp_reset_callbacks(new_sock, conn); + rds_tcp_reset_callbacks(new_sock, cp); conn->c_path[0].cp_outgoing = 0; /* rds_connect_path_complete() marks RDS_CONN_UP */ - rds_connect_path_complete(&conn->c_path[0], - RDS_CONN_RESETTING); + rds_connect_path_complete(cp, RDS_CONN_RESETTING); } } else { - rds_tcp_set_callbacks(new_sock, conn); - rds_connect_path_complete(&conn->c_path[0], - RDS_CONN_CONNECTING); + rds_tcp_set_callbacks(new_sock, cp); + rds_connect_path_complete(cp, RDS_CONN_CONNECTING); } new_sock = NULL; ret = 0; @@ -156,7 +156,7 @@ rst_nsk: ret = 0; out: if (rs_tcp) - mutex_unlock(&rs_tcp->t_conn_lock); + mutex_unlock(&rs_tcp->t_conn_path_lock); if (new_sock) sock_release(new_sock); return ret; diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index 4a87d9ef3084..ad4892e97f91 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -34,7 +34,6 @@ #include <linux/slab.h> #include <net/tcp.h> -#include "rds_single_path.h" #include "rds.h" #include "tcp.h" @@ -148,7 +147,7 @@ static void rds_tcp_cong_recv(struct rds_connection *conn, } struct rds_tcp_desc_arg { - struct rds_connection *conn; + struct rds_conn_path *conn_path; gfp_t gfp; }; @@ -156,8 +155,8 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, unsigned int offset, size_t len) { struct rds_tcp_desc_arg *arg = desc->arg.data; - struct rds_connection *conn = arg->conn; - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_conn_path *cp = arg->conn_path; + struct rds_tcp_connection *tc = cp->cp_transport_data; struct rds_tcp_incoming *tinc = tc->t_tinc; struct sk_buff *clone; size_t left = len, to_copy; @@ -179,7 +178,8 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, } tc->t_tinc = tinc; rdsdebug("alloced tinc %p\n", tinc); - rds_inc_init(&tinc->ti_inc, conn, conn->c_faddr); + rds_inc_path_init(&tinc->ti_inc, cp, + cp->cp_conn->c_faddr); /* * XXX * we might be able to use the __ variants when * we've already serialized at a higher level. @@ -229,6 +229,8 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, } if (tc->t_tinc_hdr_rem == 0 && tc->t_tinc_data_rem == 0) { + struct rds_connection *conn = cp->cp_conn; + if (tinc->ti_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP) rds_tcp_cong_recv(conn, tinc); else @@ -251,15 +253,15 @@ out: } /* the caller has to hold the sock lock */ -static int rds_tcp_read_sock(struct rds_connection *conn, gfp_t gfp) +static int rds_tcp_read_sock(struct rds_conn_path *cp, gfp_t gfp) { - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_tcp_connection *tc = cp->cp_transport_data; struct socket *sock = tc->t_sock; read_descriptor_t desc; struct rds_tcp_desc_arg arg; /* It's like glib in the kernel! */ - arg.conn = conn; + arg.conn_path = cp; arg.gfp = gfp; desc.arg.data = &arg; desc.error = 0; @@ -279,16 +281,17 @@ static int rds_tcp_read_sock(struct rds_connection *conn, gfp_t gfp) * if we fail to allocate we're in trouble.. blindly wait some time before * trying again to see if the VM can free up something for us. */ -int rds_tcp_recv(struct rds_connection *conn) +int rds_tcp_recv_path(struct rds_conn_path *cp) { - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_tcp_connection *tc = cp->cp_transport_data; struct socket *sock = tc->t_sock; int ret = 0; - rdsdebug("recv worker conn %p tc %p sock %p\n", conn, tc, sock); + rdsdebug("recv worker path [%d] tc %p sock %p\n", + cp->cp_index, tc, sock); lock_sock(sock->sk); - ret = rds_tcp_read_sock(conn, GFP_KERNEL); + ret = rds_tcp_read_sock(cp, GFP_KERNEL); release_sock(sock->sk); return ret; @@ -297,24 +300,24 @@ int rds_tcp_recv(struct rds_connection *conn) void rds_tcp_data_ready(struct sock *sk) { void (*ready)(struct sock *sk); - struct rds_connection *conn; + struct rds_conn_path *cp; struct rds_tcp_connection *tc; rdsdebug("data ready sk %p\n", sk); read_lock_bh(&sk->sk_callback_lock); - conn = sk->sk_user_data; - if (!conn) { /* check for teardown race */ + cp = sk->sk_user_data; + if (!cp) { /* check for teardown race */ ready = sk->sk_data_ready; goto out; } - tc = conn->c_transport_data; + tc = cp->cp_transport_data; ready = tc->t_orig_data_ready; rds_tcp_stats_inc(s_tcp_data_ready_calls); - if (rds_tcp_read_sock(conn, GFP_ATOMIC) == -ENOMEM) - queue_delayed_work(rds_wq, &conn->c_recv_w, 0); + if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM) + queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); out: read_unlock_bh(&sk->sk_callback_lock); ready(sk); diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 710f1aae97ad..57e0f5826406 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -49,16 +49,16 @@ static void rds_tcp_cork(struct socket *sock, int val) set_fs(oldfs); } -void rds_tcp_xmit_prepare(struct rds_connection *conn) +void rds_tcp_xmit_path_prepare(struct rds_conn_path *cp) { - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_tcp_connection *tc = cp->cp_transport_data; rds_tcp_cork(tc->t_sock, 1); } -void rds_tcp_xmit_complete(struct rds_connection *conn) +void rds_tcp_xmit_path_complete(struct rds_conn_path *cp) { - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_tcp_connection *tc = cp->cp_transport_data; rds_tcp_cork(tc->t_sock, 0); } @@ -178,27 +178,27 @@ static int rds_tcp_is_acked(struct rds_message *rm, uint64_t ack) void rds_tcp_write_space(struct sock *sk) { void (*write_space)(struct sock *sk); - struct rds_connection *conn; + struct rds_conn_path *cp; struct rds_tcp_connection *tc; read_lock_bh(&sk->sk_callback_lock); - conn = sk->sk_user_data; - if (!conn) { + cp = sk->sk_user_data; + if (!cp) { write_space = sk->sk_write_space; goto out; } - tc = conn->c_transport_data; + tc = cp->cp_transport_data; rdsdebug("write_space for tc %p\n", tc); write_space = tc->t_orig_write_space; rds_tcp_stats_inc(s_tcp_write_space_calls); rdsdebug("tcp una %u\n", rds_tcp_snd_una(tc)); tc->t_last_seen_una = rds_tcp_snd_una(tc); - rds_send_drop_acked(conn, rds_tcp_snd_una(tc), rds_tcp_is_acked); + rds_send_path_drop_acked(cp, rds_tcp_snd_una(tc), rds_tcp_is_acked); if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) - queue_delayed_work(rds_wq, &conn->c_send_w, 0); + queue_delayed_work(rds_wq, &cp->cp_send_w, 0); out: read_unlock_bh(&sk->sk_callback_lock); diff --git a/net/rds/threads.c b/net/rds/threads.c index 9fbe95bb14a9..bc97d67f29cc 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -125,6 +125,11 @@ void rds_queue_reconnect(struct rds_conn_path *cp) conn, &conn->c_laddr, &conn->c_faddr, cp->cp_reconnect_jiffies); + /* let peer with smaller addr initiate reconnect, to avoid duels */ + if (conn->c_trans->t_type == RDS_TRANS_TCP && + conn->c_laddr > conn->c_faddr) + return; + set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); if (cp->cp_reconnect_jiffies == 0) { cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies; @@ -152,8 +157,9 @@ void rds_connect_worker(struct work_struct *work) int ret; clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); - if (rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) { - ret = conn->c_trans->conn_connect(conn); + ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING); + if (ret) { + ret = conn->c_trans->conn_path_connect(cp); rdsdebug("conn %p for %pI4 to %pI4 dispatched, ret %d\n", conn, &conn->c_laddr, &conn->c_faddr, ret); @@ -203,7 +209,7 @@ void rds_recv_worker(struct work_struct *work) int ret; if (rds_conn_path_state(cp) == RDS_CONN_UP) { - ret = cp->cp_conn->c_trans->recv(cp->cp_conn); + ret = cp->cp_conn->c_trans->recv_path(cp); rdsdebug("conn %p ret %d\n", cp->cp_conn, ret); switch (ret) { case -EAGAIN: diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index f7b6cf49ea6f..ef74bffa6101 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -223,15 +223,10 @@ static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg) bpf_fd = nla_get_u32(tb[TCA_ACT_BPF_FD]); - fp = bpf_prog_get(bpf_fd); + fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_ACT); if (IS_ERR(fp)) return PTR_ERR(fp); - if (fp->type != BPF_PROG_TYPE_SCHED_ACT) { - bpf_prog_put(fp); - return -EINVAL; - } - if (tb[TCA_ACT_BPF_NAME]) { name = kmemdup(nla_data(tb[TCA_ACT_BPF_NAME]), nla_len(tb[TCA_ACT_BPF_NAME]), diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 5b135d357e1e..70cfbbf96af2 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -181,7 +181,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a, if (!(at & AT_EGRESS)) { if (m->tcfm_ok_push) - skb_push(skb2, skb->mac_len); + skb_push_rcsum(skb2, skb->mac_len); } /* mirror is always swallowed */ diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 53d1486cddf7..8e573c0f8742 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -47,6 +47,8 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a, skb_set_queue_mapping(skb, d->queue_mapping); if (d->flags & SKBEDIT_F_MARK) skb->mark = d->mark; + if (d->flags & SKBEDIT_F_PTYPE) + skb->pkt_type = d->ptype; spin_unlock(&d->tcf_lock); return d->tcf_action; @@ -57,6 +59,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = { [TCA_SKBEDIT_PRIORITY] = { .len = sizeof(u32) }, [TCA_SKBEDIT_QUEUE_MAPPING] = { .len = sizeof(u16) }, [TCA_SKBEDIT_MARK] = { .len = sizeof(u32) }, + [TCA_SKBEDIT_PTYPE] = { .len = sizeof(u16) }, }; static int tcf_skbedit_init(struct net *net, struct nlattr *nla, @@ -68,7 +71,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct tc_skbedit *parm; struct tcf_skbedit *d; u32 flags = 0, *priority = NULL, *mark = NULL; - u16 *queue_mapping = NULL; + u16 *queue_mapping = NULL, *ptype = NULL; bool exists = false; int ret = 0, err; @@ -92,6 +95,13 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, queue_mapping = nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING]); } + if (tb[TCA_SKBEDIT_PTYPE] != NULL) { + ptype = nla_data(tb[TCA_SKBEDIT_PTYPE]); + if (!skb_pkt_type_ok(*ptype)) + return -EINVAL; + flags |= SKBEDIT_F_PTYPE; + } + if (tb[TCA_SKBEDIT_MARK] != NULL) { flags |= SKBEDIT_F_MARK; mark = nla_data(tb[TCA_SKBEDIT_MARK]); @@ -132,6 +142,8 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, d->queue_mapping = *queue_mapping; if (flags & SKBEDIT_F_MARK) d->mark = *mark; + if (flags & SKBEDIT_F_PTYPE) + d->ptype = *ptype; d->tcf_action = parm->action; @@ -158,16 +170,16 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, if (nla_put(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt)) goto nla_put_failure; if ((d->flags & SKBEDIT_F_PRIORITY) && - nla_put(skb, TCA_SKBEDIT_PRIORITY, sizeof(d->priority), - &d->priority)) + nla_put_u32(skb, TCA_SKBEDIT_PRIORITY, d->priority)) goto nla_put_failure; if ((d->flags & SKBEDIT_F_QUEUE_MAPPING) && - nla_put(skb, TCA_SKBEDIT_QUEUE_MAPPING, - sizeof(d->queue_mapping), &d->queue_mapping)) + nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING, d->queue_mapping)) goto nla_put_failure; if ((d->flags & SKBEDIT_F_MARK) && - nla_put(skb, TCA_SKBEDIT_MARK, sizeof(d->mark), - &d->mark)) + nla_put_u32(skb, TCA_SKBEDIT_MARK, d->mark)) + goto nla_put_failure; + if ((d->flags & SKBEDIT_F_PTYPE) && + nla_put_u16(skb, TCA_SKBEDIT_PTYPE, d->ptype)) goto nla_put_failure; tcf_tm_dump(&t, &d->tcf_tm); diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 7b342c779da7..c3002c2c68bb 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -272,15 +272,10 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, bpf_fd = nla_get_u32(tb[TCA_BPF_FD]); - fp = bpf_prog_get(bpf_fd); + fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS); if (IS_ERR(fp)) return PTR_ERR(fp); - if (fp->type != BPF_PROG_TYPE_SCHED_CLS) { - bpf_prog_put(fp); - return -EINVAL; - } - if (tb[TCA_BPF_NAME]) { name = kmemdup(nla_data(tb[TCA_BPF_NAME]), nla_len(tb[TCA_BPF_NAME]), diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 8cb5eff7b79c..dff92ea772fe 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -130,7 +130,6 @@ struct hfsc_class { struct rb_node vt_node; /* parent's vt_tree member */ struct rb_root cf_tree; /* active children sorted by cl_f */ struct rb_node cf_node; /* parent's cf_heap member */ - struct list_head dlist; /* drop list member */ u64 cl_total; /* total work in bytes */ u64 cl_cumul; /* cumulative work in bytes done by @@ -177,8 +176,6 @@ struct hfsc_sched { struct hfsc_class root; /* root class */ struct Qdisc_class_hash clhash; /* class hash */ struct rb_root eligible; /* eligible tree */ - struct list_head droplist; /* active leaf class list (for - dropping) */ struct qdisc_watchdog watchdog; /* watchdog timer */ }; @@ -781,6 +778,20 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) else go_passive = 0; + /* update vt */ + cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total) + - cl->cl_vtoff + cl->cl_vtadj; + + /* + * if vt of the class is smaller than cvtmin, + * the class was skipped in the past due to non-fit. + * if so, we need to adjust vtadj. + */ + if (cl->cl_vt < cl->cl_parent->cl_cvtmin) { + cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt; + cl->cl_vt = cl->cl_parent->cl_cvtmin; + } + if (go_passive) { /* no more active child, going passive */ @@ -797,25 +808,10 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) continue; } - /* - * update vt and f - */ - cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total) - - cl->cl_vtoff + cl->cl_vtadj; - - /* - * if vt of the class is smaller than cvtmin, - * the class was skipped in the past due to non-fit. - * if so, we need to adjust vtadj. - */ - if (cl->cl_vt < cl->cl_parent->cl_cvtmin) { - cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt; - cl->cl_vt = cl->cl_parent->cl_cvtmin; - } - /* update the vt tree */ vttree_update(cl); + /* update f */ if (cl->cl_flags & HFSC_USC) { cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit, cl->cl_total); @@ -858,7 +854,6 @@ set_active(struct hfsc_class *cl, unsigned int len) if (cl->cl_flags & HFSC_FSC) init_vf(cl, len); - list_add_tail(&cl->dlist, &cl->sched->droplist); } static void @@ -867,8 +862,6 @@ set_passive(struct hfsc_class *cl) if (cl->cl_flags & HFSC_RSC) eltree_remove(cl); - list_del(&cl->dlist); - /* * vttree is now handled in update_vf() so that update_vf(cl, 0, 0) * needs to be called explicitly to remove a class from vttree. @@ -882,7 +875,7 @@ qdisc_peek_len(struct Qdisc *sch) unsigned int len; skb = sch->ops->peek(sch); - if (skb == NULL) { + if (unlikely(skb == NULL)) { qdisc_warn_nonwc("qdisc_peek_len", sch); return 0; } @@ -947,7 +940,7 @@ static void hfsc_change_fsc(struct hfsc_class *cl, struct tc_service_curve *fsc) { sc2isc(fsc, &cl->cl_fsc); - rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total); + rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vtoff + cl->cl_vt, cl->cl_total); cl->cl_flags |= HFSC_FSC; } @@ -1443,7 +1436,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) if (err < 0) return err; q->eligible = RB_ROOT; - INIT_LIST_HEAD(&q->droplist); q->root.cl_common.classid = sch->handle; q->root.refcnt = 1; @@ -1527,7 +1519,6 @@ hfsc_reset_qdisc(struct Qdisc *sch) hfsc_reset_class(cl); } q->eligible = RB_ROOT; - INIT_LIST_HEAD(&q->droplist); qdisc_watchdog_cancel(&q->watchdog); sch->qstats.backlog = 0; sch->q.qlen = 0; @@ -1594,8 +1585,17 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) return err; } - if (cl->qdisc->q.qlen == 1) + if (cl->qdisc->q.qlen == 1) { set_active(cl, qdisc_pkt_len(skb)); + /* + * If this is the first packet, isolate the head so an eventual + * head drop before the first dequeue operation has no chance + * to invalidate the deadline. + */ + if (cl->cl_flags & HFSC_RSC) + cl->qdisc->ops->peek(cl->qdisc); + + } qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 3ad9fab1985f..1fd464764765 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -604,7 +604,7 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg, link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]); link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP])); - nla_strlcpy(link_info.str, nla_data(link[TIPC_NLA_LINK_NAME]), + nla_strlcpy(link_info.str, link[TIPC_NLA_LINK_NAME], TIPC_MAX_LINK_NAME); return tipc_add_tlv(msg->rep, TIPC_TLV_LINK_INFO, diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 0bf2478cb7df..a98b780e974c 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -20,6 +20,7 @@ hostprogs-y += offwaketime hostprogs-y += spintest hostprogs-y += map_perf_test hostprogs-y += test_overhead +hostprogs-y += test_cgrp2_array_pin test_verifier-objs := test_verifier.o libbpf.o test_maps-objs := test_maps.o libbpf.o @@ -40,6 +41,7 @@ offwaketime-objs := bpf_load.o libbpf.o offwaketime_user.o spintest-objs := bpf_load.o libbpf.o spintest_user.o map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o +test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -61,6 +63,7 @@ always += map_perf_test_kern.o always += test_overhead_tp_kern.o always += test_overhead_kprobe_kern.o always += parse_varlen.o parse_simple.o parse_ldabs.o +always += test_cgrp2_tc_kern.o HOSTCFLAGS += -I$(objtree)/usr/include diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index 7904a2a493de..84e3fd919a06 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -70,6 +70,8 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag (void *) BPF_FUNC_l3_csum_replace; static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) = (void *) BPF_FUNC_l4_csum_replace; +static int (*bpf_skb_in_cgroup)(void *ctx, void *map, int index) = + (void *) BPF_FUNC_skb_in_cgroup; #if defined(__x86_64__) diff --git a/samples/bpf/test_cgrp2_array_pin.c b/samples/bpf/test_cgrp2_array_pin.c new file mode 100644 index 000000000000..70e86f7be69d --- /dev/null +++ b/samples/bpf/test_cgrp2_array_pin.c @@ -0,0 +1,109 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/unistd.h> +#include <linux/bpf.h> + +#include <stdio.h> +#include <stdint.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <fcntl.h> + +#include "libbpf.h" + +static void usage(void) +{ + printf("Usage: test_cgrp2_array_pin [...]\n"); + printf(" -F <file> File to pin an BPF cgroup array\n"); + printf(" -U <file> Update an already pinned BPF cgroup array\n"); + printf(" -v <value> Full path of the cgroup2\n"); + printf(" -h Display this help\n"); +} + +int main(int argc, char **argv) +{ + const char *pinned_file = NULL, *cg2 = NULL; + int create_array = 1; + int array_key = 0; + int array_fd = -1; + int cg2_fd = -1; + int ret = -1; + int opt; + + while ((opt = getopt(argc, argv, "F:U:v:")) != -1) { + switch (opt) { + /* General args */ + case 'F': + pinned_file = optarg; + break; + case 'U': + pinned_file = optarg; + create_array = 0; + break; + case 'v': + cg2 = optarg; + break; + default: + usage(); + goto out; + } + } + + if (!cg2 || !pinned_file) { + usage(); + goto out; + } + + cg2_fd = open(cg2, O_RDONLY); + if (cg2_fd < 0) { + fprintf(stderr, "open(%s,...): %s(%d)\n", + cg2, strerror(errno), errno); + goto out; + } + + if (create_array) { + array_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY, + sizeof(uint32_t), sizeof(uint32_t), + 1, 0); + if (array_fd < 0) { + fprintf(stderr, + "bpf_create_map(BPF_MAP_TYPE_CGROUP_ARRAY,...): %s(%d)\n", + strerror(errno), errno); + goto out; + } + } else { + array_fd = bpf_obj_get(pinned_file); + if (array_fd < 0) { + fprintf(stderr, "bpf_obj_get(%s): %s(%d)\n", + pinned_file, strerror(errno), errno); + goto out; + } + } + + ret = bpf_update_elem(array_fd, &array_key, &cg2_fd, 0); + if (ret) { + perror("bpf_update_elem"); + goto out; + } + + if (create_array) { + ret = bpf_obj_pin(array_fd, pinned_file); + if (ret) { + fprintf(stderr, "bpf_obj_pin(..., %s): %s(%d)\n", + pinned_file, strerror(errno), errno); + goto out; + } + } + +out: + if (array_fd != -1) + close(array_fd); + if (cg2_fd != -1) + close(cg2_fd); + return ret; +} diff --git a/samples/bpf/test_cgrp2_tc.sh b/samples/bpf/test_cgrp2_tc.sh new file mode 100755 index 000000000000..0b119eeaf85c --- /dev/null +++ b/samples/bpf/test_cgrp2_tc.sh @@ -0,0 +1,184 @@ +#!/bin/bash + +MY_DIR=$(dirname $0) +# Details on the bpf prog +BPF_CGRP2_ARRAY_NAME='test_cgrp2_array_pin' +BPF_PROG="$MY_DIR/test_cgrp2_tc_kern.o" +BPF_SECTION='filter' + +[ -z "$TC" ] && TC='tc' +[ -z "$IP" ] && IP='ip' + +# Names of the veth interface, net namespace...etc. +HOST_IFC='ve' +NS_IFC='vens' +NS='ns' + +find_mnt() { + cat /proc/mounts | \ + awk '{ if ($3 == "'$1'" && mnt == "") { mnt = $2 }} END { print mnt }' +} + +# Init cgroup2 vars +init_cgrp2_vars() { + CGRP2_ROOT=$(find_mnt cgroup2) + if [ -z "$CGRP2_ROOT" ] + then + CGRP2_ROOT='/mnt/cgroup2' + MOUNT_CGRP2="yes" + fi + CGRP2_TC="$CGRP2_ROOT/tc" + CGRP2_TC_LEAF="$CGRP2_TC/leaf" +} + +# Init bpf fs vars +init_bpf_fs_vars() { + local bpf_fs_root=$(find_mnt bpf) + [ -n "$bpf_fs_root" ] || return -1 + BPF_FS_TC_SHARE="$bpf_fs_root/tc/globals" +} + +setup_cgrp2() { + case $1 in + start) + if [ "$MOUNT_CGRP2" == 'yes' ] + then + [ -d $CGRP2_ROOT ] || mkdir -p $CGRP2_ROOT + mount -t cgroup2 none $CGRP2_ROOT || return $? + fi + mkdir -p $CGRP2_TC_LEAF + ;; + *) + rmdir $CGRP2_TC_LEAF && rmdir $CGRP2_TC + [ "$MOUNT_CGRP2" == 'yes' ] && umount $CGRP2_ROOT + ;; + esac +} + +setup_bpf_cgrp2_array() { + local bpf_cgrp2_array="$BPF_FS_TC_SHARE/$BPF_CGRP2_ARRAY_NAME" + case $1 in + start) + $MY_DIR/test_cgrp2_array_pin -U $bpf_cgrp2_array -v $CGRP2_TC + ;; + *) + [ -d "$BPF_FS_TC_SHARE" ] && rm -f $bpf_cgrp2_array + ;; + esac +} + +setup_net() { + case $1 in + start) + $IP link add $HOST_IFC type veth peer name $NS_IFC || return $? + $IP link set dev $HOST_IFC up || return $? + sysctl -q net.ipv6.conf.$HOST_IFC.accept_dad=0 + + $IP netns add ns || return $? + $IP link set dev $NS_IFC netns ns || return $? + $IP -n $NS link set dev $NS_IFC up || return $? + $IP netns exec $NS sysctl -q net.ipv6.conf.$NS_IFC.accept_dad=0 + $TC qdisc add dev $HOST_IFC clsact || return $? + $TC filter add dev $HOST_IFC egress bpf da obj $BPF_PROG sec $BPF_SECTION || return $? + ;; + *) + $IP netns del $NS + $IP link del $HOST_IFC + ;; + esac +} + +run_in_cgrp() { + # Fork another bash and move it under the specified cgroup. + # It makes the cgroup cleanup easier at the end of the test. + cmd='echo $$ > ' + cmd="$cmd $1/cgroup.procs; exec $2" + bash -c "$cmd" +} + +do_test() { + run_in_cgrp $CGRP2_TC_LEAF "ping -6 -c3 ff02::1%$HOST_IFC >& /dev/null" + local dropped=$($TC -s qdisc show dev $HOST_IFC | tail -3 | \ + awk '/drop/{print substr($7, 0, index($7, ",")-1)}') + if [[ $dropped -eq 0 ]] + then + echo "FAIL" + return 1 + else + echo "Successfully filtered $dropped packets" + return 0 + fi +} + +do_exit() { + if [ "$DEBUG" == "yes" ] && [ "$MODE" != 'cleanuponly' ] + then + echo "------ DEBUG ------" + echo "mount: "; mount | egrep '(cgroup2|bpf)'; echo + echo "$CGRP2_TC_LEAF: "; ls -l $CGRP2_TC_LEAF; echo + if [ -d "$BPF_FS_TC_SHARE" ] + then + echo "$BPF_FS_TC_SHARE: "; ls -l $BPF_FS_TC_SHARE; echo + fi + echo "Host net:" + $IP netns + $IP link show dev $HOST_IFC + $IP -6 a show dev $HOST_IFC + $TC -s qdisc show dev $HOST_IFC + echo + echo "$NS net:" + $IP -n $NS link show dev $NS_IFC + $IP -n $NS -6 link show dev $NS_IFC + echo "------ DEBUG ------" + echo + fi + + if [ "$MODE" != 'nocleanup' ] + then + setup_net stop + setup_bpf_cgrp2_array stop + setup_cgrp2 stop + fi +} + +init_cgrp2_vars +init_bpf_fs_vars + +while [[ $# -ge 1 ]] +do + a="$1" + case $a in + debug) + DEBUG='yes' + shift 1 + ;; + cleanup-only) + MODE='cleanuponly' + shift 1 + ;; + no-cleanup) + MODE='nocleanup' + shift 1 + ;; + *) + echo "test_cgrp2_tc [debug] [cleanup-only | no-cleanup]" + echo " debug: Print cgrp and network setup details at the end of the test" + echo " cleanup-only: Try to cleanup things from last test. No test will be run" + echo " no-cleanup: Run the test but don't do cleanup at the end" + echo "[Note: If no arg is given, it will run the test and do cleanup at the end]" + echo + exit -1 + ;; + esac +done + +trap do_exit 0 + +[ "$MODE" == 'cleanuponly' ] && exit + +setup_cgrp2 start || exit $? +setup_net start || exit $? +init_bpf_fs_vars || exit $? +setup_bpf_cgrp2_array start || exit $? +do_test +echo diff --git a/samples/bpf/test_cgrp2_tc_kern.c b/samples/bpf/test_cgrp2_tc_kern.c new file mode 100644 index 000000000000..2732c37c8d5b --- /dev/null +++ b/samples/bpf/test_cgrp2_tc_kern.c @@ -0,0 +1,69 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <uapi/linux/if_ether.h> +#include <uapi/linux/in6.h> +#include <uapi/linux/ipv6.h> +#include <uapi/linux/pkt_cls.h> +#include <uapi/linux/bpf.h> +#include "bpf_helpers.h" + +/* copy of 'struct ethhdr' without __packed */ +struct eth_hdr { + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; + unsigned short h_proto; +}; + +#define PIN_GLOBAL_NS 2 +struct bpf_elf_map { + __u32 type; + __u32 size_key; + __u32 size_value; + __u32 max_elem; + __u32 flags; + __u32 id; + __u32 pinning; +}; + +struct bpf_elf_map SEC("maps") test_cgrp2_array_pin = { + .type = BPF_MAP_TYPE_CGROUP_ARRAY, + .size_key = sizeof(uint32_t), + .size_value = sizeof(uint32_t), + .pinning = PIN_GLOBAL_NS, + .max_elem = 1, +}; + +SEC("filter") +int handle_egress(struct __sk_buff *skb) +{ + void *data = (void *)(long)skb->data; + struct eth_hdr *eth = data; + struct ipv6hdr *ip6h = data + sizeof(*eth); + void *data_end = (void *)(long)skb->data_end; + char dont_care_msg[] = "dont care %04x %d\n"; + char pass_msg[] = "pass\n"; + char reject_msg[] = "reject\n"; + + /* single length check */ + if (data + sizeof(*eth) + sizeof(*ip6h) > data_end) + return TC_ACT_OK; + + if (eth->h_proto != htons(ETH_P_IPV6) || + ip6h->nexthdr != IPPROTO_ICMPV6) { + bpf_trace_printk(dont_care_msg, sizeof(dont_care_msg), + eth->h_proto, ip6h->nexthdr); + return TC_ACT_OK; + } else if (bpf_skb_in_cgroup(skb, &test_cgrp2_array_pin, 0) != 1) { + bpf_trace_printk(pass_msg, sizeof(pass_msg)); + return TC_ACT_OK; + } else { + bpf_trace_printk(reject_msg, sizeof(reject_msg)); + return TC_ACT_SHOT; + } +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh new file mode 100755 index 000000000000..4e4e92b2515e --- /dev/null +++ b/samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh @@ -0,0 +1,66 @@ +#!/bin/bash +# +# Benchmark script: +# - developed for benchmarking egress qdisc path, derived (more +# like cut'n'pasted) from ingress benchmark script. +# +# Script for injecting packets into egress qdisc path of the stack +# with pktgen "xmit_mode queue_xmit". +# +basedir=`dirname $0` +source ${basedir}/functions.sh +root_check_run_with_sudo "$@" + +# Parameter parsing via include +source ${basedir}/parameters.sh +[ -z "$DEST_IP" ] && DEST_IP="198.18.0.42" +[ -z "$DST_MAC" ] && DST_MAC="90:e2:ba:ff:ff:ff" + +# Burst greater than 1 are invalid for queue_xmit mode +if [[ -n "$BURST" ]]; then + err 1 "Bursting not supported for this mode" +fi + +# Base Config +DELAY="0" # Zero means max speed +COUNT="10000000" # Zero means indefinitely + +# General cleanup everything since last run +pg_ctrl "reset" + +# Threads are specified with parameter -t value in $THREADS +for ((thread = 0; thread < $THREADS; thread++)); do + # The device name is extended with @name, using thread number to + # make then unique, but any name will do. + dev=${DEV}@${thread} + + # Add remove all other devices and add_device $dev to thread + pg_thread $thread "rem_device_all" + pg_thread $thread "add_device" $dev + + # Base config of dev + pg_set $dev "flag QUEUE_MAP_CPU" + pg_set $dev "count $COUNT" + pg_set $dev "pkt_size $PKT_SIZE" + pg_set $dev "delay $DELAY" + pg_set $dev "flag NO_TIMESTAMP" + + # Destination + pg_set $dev "dst_mac $DST_MAC" + pg_set $dev "dst $DEST_IP" + + # Inject packet into TX qdisc egress path of stack + pg_set $dev "xmit_mode queue_xmit" +done + +# start_run +echo "Running... ctrl^C to stop" >&2 +pg_ctrl "start" +echo "Done" >&2 + +# Print results +for ((thread = 0; thread < $THREADS; thread++)); do + dev=${DEV}@${thread} + echo "Device: $dev" + cat /proc/net/pktgen/$dev | grep -A2 "Result:" +done diff --git a/sound/core/timer.c b/sound/core/timer.c index e722022d325d..9a6157ea6881 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -1955,6 +1955,7 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer, qhead = tu->qhead++; tu->qhead %= tu->queue_size; + tu->qused--; spin_unlock_irq(&tu->qlock); if (tu->tread) { @@ -1968,7 +1969,6 @@ static ssize_t snd_timer_user_read(struct file *file, char __user *buffer, } spin_lock_irq(&tu->qlock); - tu->qused--; if (err < 0) goto _error; result += unit; diff --git a/sound/pci/au88x0/au88x0_core.c b/sound/pci/au88x0/au88x0_core.c index 4a054d720112..d3125c169684 100644 --- a/sound/pci/au88x0/au88x0_core.c +++ b/sound/pci/au88x0/au88x0_core.c @@ -1444,9 +1444,8 @@ static int vortex_wtdma_bufshift(vortex_t * vortex, int wtdma) int page, p, pp, delta, i; page = - (hwread(vortex->mmio, VORTEX_WTDMA_STAT + (wtdma << 2)) & - WT_SUBBUF_MASK) - >> WT_SUBBUF_SHIFT; + (hwread(vortex->mmio, VORTEX_WTDMA_STAT + (wtdma << 2)) + >> WT_SUBBUF_SHIFT) & WT_SUBBUF_MASK; if (dma->nr_periods >= 4) delta = (page - dma->period_real) & 3; else { diff --git a/sound/pci/echoaudio/echoaudio.c b/sound/pci/echoaudio/echoaudio.c index 1cb85aeb0cea..286f5e3686a3 100644 --- a/sound/pci/echoaudio/echoaudio.c +++ b/sound/pci/echoaudio/echoaudio.c @@ -2200,11 +2200,11 @@ static int snd_echo_resume(struct device *dev) u32 pipe_alloc_mask; int err; - commpage_bak = kmalloc(sizeof(struct echoaudio), GFP_KERNEL); + commpage_bak = kmalloc(sizeof(*commpage), GFP_KERNEL); if (commpage_bak == NULL) return -ENOMEM; commpage = chip->comm_page; - memcpy(commpage_bak, commpage, sizeof(struct comm_page)); + memcpy(commpage_bak, commpage, sizeof(*commpage)); err = init_hw(chip, chip->pci->device, chip->pci->subsystem_device); if (err < 0) { diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 320445f3bf73..79c7b340acc2 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -3977,6 +3977,8 @@ static hda_nid_t set_path_power(struct hda_codec *codec, hda_nid_t nid, for (n = 0; n < spec->paths.used; n++) { path = snd_array_elem(&spec->paths, n); + if (!path->depth) + continue; if (path->path[0] == nid || path->path[path->depth - 1] == nid) { bool pin_old = path->pin_enabled; diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 94089fc71884..e320c44714b1 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -367,9 +367,10 @@ enum { #define IS_SKL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d70) #define IS_KBL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa171) #define IS_KBL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d71) +#define IS_KBL_H(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa2f0) #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98) #define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) || \ - IS_KBL(pci) || IS_KBL_LP(pci) + IS_KBL(pci) || IS_KBL_LP(pci) || IS_KBL_H(pci) static char *driver_short_names[] = { [AZX_DRIVER_ICH] = "HDA Intel", @@ -2190,6 +2191,9 @@ static const struct pci_device_id azx_ids[] = { /* Kabylake-LP */ { PCI_DEVICE(0x8086, 0x9d71), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE }, + /* Kabylake-H */ + { PCI_DEVICE(0x8086, 0xa2f0), + .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE }, /* Broxton-P(Apollolake) */ { PCI_DEVICE(0x8086, 0x5a98), .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON }, diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 900bfbc3368c..5fac786e4982 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5651,6 +5651,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x504a, "ThinkPad X260", ALC292_FIXUP_TPT440_DOCK), SND_PCI_QUIRK(0x17aa, 0x504b, "Thinkpad", ALC293_FIXUP_LENOVO_SPK_NOISE), SND_PCI_QUIRK(0x17aa, 0x5050, "Thinkpad T560p", ALC292_FIXUP_TPT460), + SND_PCI_QUIRK(0x17aa, 0x5051, "Thinkpad L460", ALC292_FIXUP_TPT460), SND_PCI_QUIRK(0x17aa, 0x5053, "Thinkpad T460", ALC292_FIXUP_TPT460), SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K), diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 4d82a58ff6b0..f3fb98f0a995 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -483,9 +483,10 @@ config SND_SOC_DMIC tristate config SND_SOC_HDMI_CODEC - tristate - select SND_PCM_ELD - select SND_PCM_IEC958 + tristate + select SND_PCM_ELD + select SND_PCM_IEC958 + select HDMI config SND_SOC_ES8328 tristate "Everest Semi ES8328 CODEC" diff --git a/sound/soc/codecs/ak4613.c b/sound/soc/codecs/ak4613.c index 647f69de6baa..5013d2ba0c10 100644 --- a/sound/soc/codecs/ak4613.c +++ b/sound/soc/codecs/ak4613.c @@ -146,6 +146,7 @@ static const struct regmap_config ak4613_regmap_cfg = { .max_register = 0x16, .reg_defaults = ak4613_reg, .num_reg_defaults = ARRAY_SIZE(ak4613_reg), + .cache_type = REGCACHE_RBTREE, }; static const struct of_device_id ak4613_of_match[] = { @@ -530,7 +531,6 @@ static int ak4613_i2c_remove(struct i2c_client *client) static struct i2c_driver ak4613_i2c_driver = { .driver = { .name = "ak4613-codec", - .owner = THIS_MODULE, .of_match_table = ak4613_of_match, }, .probe = ak4613_i2c_probe, diff --git a/sound/soc/codecs/cx20442.c b/sound/soc/codecs/cx20442.c index d6f4abbbf8a7..fb3885fe0afb 100644 --- a/sound/soc/codecs/cx20442.c +++ b/sound/soc/codecs/cx20442.c @@ -226,6 +226,7 @@ static int v253_open(struct tty_struct *tty) if (!tty->disc_data) return -ENODEV; + tty->receive_room = 16; if (tty->ops->write(tty, v253_init, len) != len) { ret = -EIO; goto err; diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c index 181cd3bf0b92..2abb742fc47b 100644 --- a/sound/soc/codecs/hdac_hdmi.c +++ b/sound/soc/codecs/hdac_hdmi.c @@ -1474,6 +1474,11 @@ static int hdmi_codec_probe(struct snd_soc_codec *codec) * exit, we call pm_runtime_suspend() so that will do for us */ hlink = snd_hdac_ext_bus_get_link(edev->ebus, dev_name(&edev->hdac.dev)); + if (!hlink) { + dev_err(&edev->hdac.dev, "hdac link not found\n"); + return -EIO; + } + snd_hdac_ext_bus_link_get(edev->ebus, hlink); ret = create_fill_widget_route_map(dapm); @@ -1634,6 +1639,11 @@ static int hdac_hdmi_dev_probe(struct hdac_ext_device *edev) /* hold the ref while we probe */ hlink = snd_hdac_ext_bus_get_link(edev->ebus, dev_name(&edev->hdac.dev)); + if (!hlink) { + dev_err(&edev->hdac.dev, "hdac link not found\n"); + return -EIO; + } + snd_hdac_ext_bus_link_get(edev->ebus, hlink); hdmi_priv = devm_kzalloc(&codec->dev, sizeof(*hdmi_priv), GFP_KERNEL); @@ -1744,6 +1754,11 @@ static int hdac_hdmi_runtime_suspend(struct device *dev) } hlink = snd_hdac_ext_bus_get_link(ebus, dev_name(dev)); + if (!hlink) { + dev_err(dev, "hdac link not found\n"); + return -EIO; + } + snd_hdac_ext_bus_link_put(ebus, hlink); return 0; @@ -1765,6 +1780,11 @@ static int hdac_hdmi_runtime_resume(struct device *dev) return 0; hlink = snd_hdac_ext_bus_get_link(ebus, dev_name(dev)); + if (!hlink) { + dev_err(dev, "hdac link not found\n"); + return -EIO; + } + snd_hdac_ext_bus_link_get(ebus, hlink); err = snd_hdac_display_power(bus, true); diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 3c6594da6c9c..d70847c9eeb0 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -253,7 +253,7 @@ static const struct reg_default rt5650_reg[] = { { 0x2b, 0x5454 }, { 0x2c, 0xaaa0 }, { 0x2d, 0x0000 }, - { 0x2f, 0x1002 }, + { 0x2f, 0x5002 }, { 0x31, 0x5000 }, { 0x32, 0x0000 }, { 0x33, 0x0000 }, diff --git a/sound/soc/codecs/rt5670.c b/sound/soc/codecs/rt5670.c index 49a9e7049e2b..0af5ddbef1da 100644 --- a/sound/soc/codecs/rt5670.c +++ b/sound/soc/codecs/rt5670.c @@ -619,7 +619,7 @@ static const struct snd_kcontrol_new rt5670_snd_controls[] = { RT5670_L_MUTE_SFT, RT5670_R_MUTE_SFT, 1, 1), SOC_DOUBLE_TLV("HP Playback Volume", RT5670_HP_VOL, RT5670_L_VOL_SFT, RT5670_R_VOL_SFT, - 39, 0, out_vol_tlv), + 39, 1, out_vol_tlv), /* OUTPUT Control */ SOC_DOUBLE("OUT Channel Switch", RT5670_LOUT1, RT5670_VOL_L_SFT, RT5670_VOL_R_SFT, 1, 1), diff --git a/sound/soc/codecs/wm5102.c b/sound/soc/codecs/wm5102.c index da60e3fe5ee7..e7fe6b7b95b7 100644 --- a/sound/soc/codecs/wm5102.c +++ b/sound/soc/codecs/wm5102.c @@ -1872,7 +1872,7 @@ static struct snd_soc_dai_driver wm5102_dai[] = { .capture = { .stream_name = "Audio Trace CPU", .channels_min = 1, - .channels_max = 6, + .channels_max = 4, .rates = WM5102_RATES, .formats = WM5102_FORMATS, }, diff --git a/sound/soc/codecs/wm5110.c b/sound/soc/codecs/wm5110.c index b5820e4d5471..d54f1b46c9ec 100644 --- a/sound/soc/codecs/wm5110.c +++ b/sound/soc/codecs/wm5110.c @@ -1723,6 +1723,7 @@ static const struct snd_soc_dapm_route wm5110_dapm_routes[] = { { "OUT2L", NULL, "SYSCLK" }, { "OUT2R", NULL, "SYSCLK" }, { "OUT3L", NULL, "SYSCLK" }, + { "OUT3R", NULL, "SYSCLK" }, { "OUT4L", NULL, "SYSCLK" }, { "OUT4R", NULL, "SYSCLK" }, { "OUT5L", NULL, "SYSCLK" }, diff --git a/sound/soc/codecs/wm8940.c b/sound/soc/codecs/wm8940.c index f6f9395ea38e..1c600819f768 100644 --- a/sound/soc/codecs/wm8940.c +++ b/sound/soc/codecs/wm8940.c @@ -743,6 +743,7 @@ static const struct regmap_config wm8940_regmap = { .max_register = WM8940_MONOMIX, .reg_defaults = wm8940_reg_defaults, .num_reg_defaults = ARRAY_SIZE(wm8940_reg_defaults), + .cache_type = REGCACHE_RBTREE, .readable_reg = wm8940_readable_register, .volatile_reg = wm8940_volatile_register, diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c index 0f66fda2c772..237dc67002ef 100644 --- a/sound/soc/davinci/davinci-mcasp.c +++ b/sound/soc/davinci/davinci-mcasp.c @@ -1513,8 +1513,9 @@ static struct davinci_mcasp_pdata am33xx_mcasp_pdata = { }; static struct davinci_mcasp_pdata dra7_mcasp_pdata = { - .tx_dma_offset = 0x200, - .rx_dma_offset = 0x284, + /* The CFG port offset will be calculated if it is needed */ + .tx_dma_offset = 0, + .rx_dma_offset = 0, .version = MCASP_VERSION_4, }; @@ -1734,6 +1735,52 @@ static int davinci_mcasp_get_dma_type(struct davinci_mcasp *mcasp) return PCM_EDMA; } +static u32 davinci_mcasp_txdma_offset(struct davinci_mcasp_pdata *pdata) +{ + int i; + u32 offset = 0; + + if (pdata->version != MCASP_VERSION_4) + return pdata->tx_dma_offset; + + for (i = 0; i < pdata->num_serializer; i++) { + if (pdata->serial_dir[i] == TX_MODE) { + if (!offset) { + offset = DAVINCI_MCASP_TXBUF_REG(i); + } else { + pr_err("%s: Only one serializer allowed!\n", + __func__); + break; + } + } + } + + return offset; +} + +static u32 davinci_mcasp_rxdma_offset(struct davinci_mcasp_pdata *pdata) +{ + int i; + u32 offset = 0; + + if (pdata->version != MCASP_VERSION_4) + return pdata->rx_dma_offset; + + for (i = 0; i < pdata->num_serializer; i++) { + if (pdata->serial_dir[i] == RX_MODE) { + if (!offset) { + offset = DAVINCI_MCASP_RXBUF_REG(i); + } else { + pr_err("%s: Only one serializer allowed!\n", + __func__); + break; + } + } + } + + return offset; +} + static int davinci_mcasp_probe(struct platform_device *pdev) { struct snd_dmaengine_dai_dma_data *dma_data; @@ -1862,7 +1909,7 @@ static int davinci_mcasp_probe(struct platform_device *pdev) if (dat) dma_data->addr = dat->start; else - dma_data->addr = mem->start + pdata->tx_dma_offset; + dma_data->addr = mem->start + davinci_mcasp_txdma_offset(pdata); dma = &mcasp->dma_request[SNDRV_PCM_STREAM_PLAYBACK]; res = platform_get_resource(pdev, IORESOURCE_DMA, 0); @@ -1883,7 +1930,8 @@ static int davinci_mcasp_probe(struct platform_device *pdev) if (dat) dma_data->addr = dat->start; else - dma_data->addr = mem->start + pdata->rx_dma_offset; + dma_data->addr = + mem->start + davinci_mcasp_rxdma_offset(pdata); dma = &mcasp->dma_request[SNDRV_PCM_STREAM_CAPTURE]; res = platform_get_resource(pdev, IORESOURCE_DMA, 1); diff --git a/sound/soc/davinci/davinci-mcasp.h b/sound/soc/davinci/davinci-mcasp.h index 1e8787fb3fb7..afddc8010c54 100644 --- a/sound/soc/davinci/davinci-mcasp.h +++ b/sound/soc/davinci/davinci-mcasp.h @@ -85,9 +85,9 @@ (n << 2)) /* Transmit Buffer for Serializer n */ -#define DAVINCI_MCASP_TXBUF_REG 0x200 +#define DAVINCI_MCASP_TXBUF_REG(n) (0x200 + (n << 2)) /* Receive Buffer for Serializer n */ -#define DAVINCI_MCASP_RXBUF_REG 0x280 +#define DAVINCI_MCASP_RXBUF_REG(n) (0x280 + (n << 2)) /* McASP FIFO Registers */ #define DAVINCI_MCASP_V2_AFIFO_BASE (0x1010) diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index 632ecc0e3956..bedec4a32581 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -952,16 +952,16 @@ static int _fsl_ssi_set_dai_fmt(struct device *dev, ssi_private->i2s_mode = CCSR_SSI_SCR_NET; switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_I2S: + regmap_update_bits(regs, CCSR_SSI_STCCR, + CCSR_SSI_SxCCR_DC_MASK, + CCSR_SSI_SxCCR_DC(2)); + regmap_update_bits(regs, CCSR_SSI_SRCCR, + CCSR_SSI_SxCCR_DC_MASK, + CCSR_SSI_SxCCR_DC(2)); switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { case SND_SOC_DAIFMT_CBM_CFS: case SND_SOC_DAIFMT_CBS_CFS: ssi_private->i2s_mode |= CCSR_SSI_SCR_I2S_MODE_MASTER; - regmap_update_bits(regs, CCSR_SSI_STCCR, - CCSR_SSI_SxCCR_DC_MASK, - CCSR_SSI_SxCCR_DC(2)); - regmap_update_bits(regs, CCSR_SSI_SRCCR, - CCSR_SSI_SxCCR_DC_MASK, - CCSR_SSI_SxCCR_DC(2)); break; case SND_SOC_DAIFMT_CBM_CFM: ssi_private->i2s_mode |= CCSR_SSI_SCR_I2S_MODE_SLAVE; diff --git a/sound/soc/intel/atom/sst-mfld-platform-compress.c b/sound/soc/intel/atom/sst-mfld-platform-compress.c index 395168986462..1bead81bb510 100644 --- a/sound/soc/intel/atom/sst-mfld-platform-compress.c +++ b/sound/soc/intel/atom/sst-mfld-platform-compress.c @@ -182,24 +182,29 @@ static int sst_platform_compr_trigger(struct snd_compr_stream *cstream, int cmd) case SNDRV_PCM_TRIGGER_START: if (stream->compr_ops->stream_start) return stream->compr_ops->stream_start(sst->dev, stream->id); + break; case SNDRV_PCM_TRIGGER_STOP: if (stream->compr_ops->stream_drop) return stream->compr_ops->stream_drop(sst->dev, stream->id); + break; case SND_COMPR_TRIGGER_DRAIN: if (stream->compr_ops->stream_drain) return stream->compr_ops->stream_drain(sst->dev, stream->id); + break; case SND_COMPR_TRIGGER_PARTIAL_DRAIN: if (stream->compr_ops->stream_partial_drain) return stream->compr_ops->stream_partial_drain(sst->dev, stream->id); + break; case SNDRV_PCM_TRIGGER_PAUSE_PUSH: if (stream->compr_ops->stream_pause) return stream->compr_ops->stream_pause(sst->dev, stream->id); + break; case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: if (stream->compr_ops->stream_pause_release) return stream->compr_ops->stream_pause_release(sst->dev, stream->id); - default: - return -EINVAL; + break; } + return -EINVAL; } static int sst_platform_compr_pointer(struct snd_compr_stream *cstream, diff --git a/sound/soc/intel/skylake/bxt-sst.c b/sound/soc/intel/skylake/bxt-sst.c index 965ce40ce752..8b95e09e23e8 100644 --- a/sound/soc/intel/skylake/bxt-sst.c +++ b/sound/soc/intel/skylake/bxt-sst.c @@ -291,6 +291,7 @@ int bxt_sst_dsp_init(struct device *dev, void __iomem *mmio_base, int irq, sst_dsp_mailbox_init(sst, (BXT_ADSP_SRAM0_BASE + SKL_ADSP_W0_STAT_SZ), SKL_ADSP_W0_UP_SZ, BXT_ADSP_SRAM1_BASE, SKL_ADSP_W1_SZ); + INIT_LIST_HEAD(&sst->module_list); ret = skl_ipc_init(dev, skl); if (ret) return ret; diff --git a/sound/soc/sh/rcar/adg.c b/sound/soc/sh/rcar/adg.c index 49354d17ea55..c4c51a4d3c8f 100644 --- a/sound/soc/sh/rcar/adg.c +++ b/sound/soc/sh/rcar/adg.c @@ -518,7 +518,7 @@ static void rsnd_adg_get_clkout(struct rsnd_priv *priv, } } - rsnd_mod_bset(adg_mod, SSICKR, 0x00FF0000, ckr); + rsnd_mod_bset(adg_mod, SSICKR, 0x80FF0000, ckr); rsnd_mod_write(adg_mod, BRRA, rbga); rsnd_mod_write(adg_mod, BRRB, rbgb); diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c index 74abd746ae91..68e4f9f0da3a 100644 --- a/tools/virtio/ringtest/ptr_ring.c +++ b/tools/virtio/ringtest/ptr_ring.c @@ -17,6 +17,11 @@ typedef pthread_spinlock_t spinlock_t; typedef int gfp_t; +static void *kmalloc(unsigned size, gfp_t gfp) +{ + return memalign(64, size); +} + static void *kzalloc(unsigned size, gfp_t gfp) { void *p = memalign(64, size); |