diff options
114 files changed, 3327 insertions, 1129 deletions
diff --git a/Documentation/bpf/bpf_iterators.rst b/Documentation/bpf/bpf_iterators.rst index 6d7770793fab..07433915aa41 100644 --- a/Documentation/bpf/bpf_iterators.rst +++ b/Documentation/bpf/bpf_iterators.rst @@ -238,11 +238,8 @@ The following is the breakdown for each field in struct ``bpf_iter_reg``. that the kernel function cond_resched() is called to avoid other kernel subsystem (e.g., rcu) misbehaving. * - seq_info - - Specifies certain action requests in the kernel BPF iterator - infrastructure. Currently, only BPF_ITER_RESCHED is supported. This means - that the kernel function cond_resched() is called to avoid other kernel - subsystem (e.g., rcu) misbehaving. - + - Specifies the set of seq operations for the BPF iterator and helpers to + initialize/free the private data for the corresponding ``seq_file``. `Click here <https://lore.kernel.org/bpf/[email protected]/>`_ diff --git a/Documentation/bpf/cpumasks.rst b/Documentation/bpf/cpumasks.rst index 41efd8874eeb..3139c7c02e79 100644 --- a/Documentation/bpf/cpumasks.rst +++ b/Documentation/bpf/cpumasks.rst @@ -351,14 +351,15 @@ In addition to the above kfuncs, there is also a set of read-only kfuncs that can be used to query the contents of cpumasks. .. kernel-doc:: kernel/bpf/cpumask.c - :identifiers: bpf_cpumask_first bpf_cpumask_first_zero bpf_cpumask_test_cpu + :identifiers: bpf_cpumask_first bpf_cpumask_first_zero bpf_cpumask_first_and + bpf_cpumask_test_cpu .. kernel-doc:: kernel/bpf/cpumask.c :identifiers: bpf_cpumask_equal bpf_cpumask_intersects bpf_cpumask_subset bpf_cpumask_empty bpf_cpumask_full .. kernel-doc:: kernel/bpf/cpumask.c - :identifiers: bpf_cpumask_any bpf_cpumask_any_and + :identifiers: bpf_cpumask_any_distribute bpf_cpumask_any_and_distribute ---- diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index 7a3d9de5f315..0d2647fb358d 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -227,23 +227,49 @@ absolutely no ABI stability guarantees. As mentioned above, a nested pointer obtained from walking a trusted pointer is no longer trusted, with one exception. If a struct type has a field that is -guaranteed to be valid as long as its parent pointer is trusted, the -``BTF_TYPE_SAFE_NESTED`` macro can be used to express that to the verifier as -follows: +guaranteed to be valid (trusted or rcu, as in KF_RCU description below) as long +as its parent pointer is valid, the following macros can be used to express +that to the verifier: + +* ``BTF_TYPE_SAFE_TRUSTED`` +* ``BTF_TYPE_SAFE_RCU`` +* ``BTF_TYPE_SAFE_RCU_OR_NULL`` + +For example, + +.. code-block:: c + + BTF_TYPE_SAFE_TRUSTED(struct socket) { + struct sock *sk; + }; + +or .. code-block:: c - BTF_TYPE_SAFE_NESTED(struct task_struct) { + BTF_TYPE_SAFE_RCU(struct task_struct) { const cpumask_t *cpus_ptr; + struct css_set __rcu *cgroups; + struct task_struct __rcu *real_parent; + struct task_struct *group_leader; }; In other words, you must: -1. Wrap the trusted pointer type in the ``BTF_TYPE_SAFE_NESTED`` macro. +1. Wrap the valid pointer type in a ``BTF_TYPE_SAFE_*`` macro. -2. Specify the type and name of the trusted nested field. This field must match +2. Specify the type and name of the valid nested field. This field must match the field in the original type definition exactly. +A new type declared by a ``BTF_TYPE_SAFE_*`` macro also needs to be emitted so +that it appears in BTF. For example, ``BTF_TYPE_SAFE_TRUSTED(struct socket)`` +is emitted in the ``type_is_trusted()`` function as follows: + +.. code-block:: c + + BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct socket)); + + 2.4.5 KF_SLEEPABLE flag ----------------------- diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml index 0519c257ecf4..57d1c1c4918f 100644 --- a/Documentation/netlink/genetlink-c.yaml +++ b/Documentation/netlink/genetlink-c.yaml @@ -195,6 +195,12 @@ properties: description: Max length for a string or a binary attribute. $ref: '#/$defs/len-or-define' sub-type: *attr-type + display-hint: &display-hint + description: | + Optional format indicator that is intended only for choosing + the right formatting mechanism when displaying values of this + type. + enum: [ hex, mac, fddi, ipv4, ipv6, uuid ] # Start genetlink-c name-prefix: type: string diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml index b474889b49ff..43b769c98fb2 100644 --- a/Documentation/netlink/genetlink-legacy.yaml +++ b/Documentation/netlink/genetlink-legacy.yaml @@ -119,7 +119,8 @@ properties: name: type: string type: - enum: [ u8, u16, u32, u64, s8, s16, s32, s64, string ] + description: The netlink attribute type + enum: [ u8, u16, u32, u64, s8, s16, s32, s64, string, binary ] len: $ref: '#/$defs/len-or-define' byte-order: @@ -130,6 +131,12 @@ properties: enum: description: Name of the enum type used for the attribute. type: string + display-hint: &display-hint + description: | + Optional format indicator that is intended only for choosing + the right formatting mechanism when displaying values of this + type. + enum: [ hex, mac, fddi, ipv4, ipv6, uuid ] # End genetlink-legacy attribute-sets: @@ -179,6 +186,7 @@ properties: name: type: string type: &attr-type + description: The netlink attribute type enum: [ unused, pad, flag, binary, u8, u16, u32, u64, s32, s64, string, nest, array-nest, nest-type-value ] doc: @@ -226,6 +234,7 @@ properties: description: Max length for a string or a binary attribute. $ref: '#/$defs/len-or-define' sub-type: *attr-type + display-hint: *display-hint # Start genetlink-c name-prefix: type: string diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml index d8b2cdeba058..1cbb448d2f1c 100644 --- a/Documentation/netlink/genetlink.yaml +++ b/Documentation/netlink/genetlink.yaml @@ -168,6 +168,12 @@ properties: description: Max length for a string or a binary attribute. $ref: '#/$defs/len-or-define' sub-type: *attr-type + display-hint: &display-hint + description: | + Optional format indicator that is intended only for choosing + the right formatting mechanism when displaying values of this + type. + enum: [ hex, mac, fddi, ipv4, ipv6, uuid ] # Make sure name-prefix does not appear in subsets (subsets inherit naming) dependencies: diff --git a/Documentation/netlink/specs/ovs_flow.yaml b/Documentation/netlink/specs/ovs_flow.yaml index 1ecbcd117385..109ca1f57b6c 100644 --- a/Documentation/netlink/specs/ovs_flow.yaml +++ b/Documentation/netlink/specs/ovs_flow.yaml @@ -34,6 +34,20 @@ definitions: type: u64 doc: Number of matched bytes. - + name: ovs-key-ethernet + type: struct + members: + - + name: eth-src + type: binary + len: 6 + display-hint: mac + - + name: eth-dst + type: binary + len: 6 + display-hint: mac + - name: ovs-key-mpls type: struct members: @@ -49,10 +63,12 @@ definitions: name: ipv4-src type: u32 byte-order: big-endian + display-hint: ipv4 - name: ipv4-dst type: u32 byte-order: big-endian + display-hint: ipv4 - name: ipv4-proto type: u8 @@ -67,6 +83,45 @@ definitions: type: u8 enum: ovs-frag-type - + name: ovs-key-ipv6 + type: struct + members: + - + name: ipv6-src + type: binary + len: 16 + byte-order: big-endian + display-hint: ipv6 + - + name: ipv6-dst + type: binary + len: 16 + byte-order: big-endian + display-hint: ipv6 + - + name: ipv6-label + type: u32 + byte-order: big-endian + - + name: ipv6-proto + type: u8 + - + name: ipv6-tclass + type: u8 + - + name: ipv6-hlimit + type: u8 + - + name: ipv6-frag + type: u8 + - + name: ovs-key-ipv6-exthdrs + type: struct + members: + - + name: hdrs + type: u16 + - name: ovs-frag-type name-prefix: ovs-frag-type- type: enum @@ -130,6 +185,51 @@ definitions: name: icmp-code type: u8 - + name: ovs-key-arp + type: struct + members: + - + name: arp-sip + type: u32 + byte-order: big-endian + - + name: arp-tip + type: u32 + byte-order: big-endian + - + name: arp-op + type: u16 + byte-order: big-endian + - + name: arp-sha + type: binary + len: 6 + display-hint: mac + - + name: arp-tha + type: binary + len: 6 + display-hint: mac + - + name: ovs-key-nd + type: struct + members: + - + name: nd_target + type: binary + len: 16 + byte-order: big-endian + - + name: nd-sll + type: binary + len: 6 + display-hint: mac + - + name: nd-tll + type: binary + len: 6 + display-hint: mac + - name: ovs-key-ct-tuple-ipv4 type: struct members: @@ -345,6 +445,7 @@ attribute-sets: value of the OVS_FLOW_ATTR_KEY attribute. Optional for all requests. Present in notifications if the flow was created with this attribute. + display-hint: uuid - name: ufid-flags type: u32 @@ -374,6 +475,7 @@ attribute-sets: - name: ethernet type: binary + struct: ovs-key-ethernet doc: struct ovs_key_ethernet - name: vlan @@ -390,6 +492,7 @@ attribute-sets: - name: ipv6 type: binary + struct: ovs-key-ipv6 doc: struct ovs_key_ipv6 - name: tcp @@ -410,10 +513,12 @@ attribute-sets: - name: arp type: binary + struct: ovs-key-arp doc: struct ovs_key_arp - name: nd type: binary + struct: ovs-key-nd doc: struct ovs_key_nd - name: skb-mark @@ -457,6 +562,7 @@ attribute-sets: - name: ct-labels type: binary + display-hint: hex doc: 16-octet connection tracking label - name: ct-orig-tuple-ipv4 @@ -486,6 +592,7 @@ attribute-sets: - name: ipv6-exthdrs type: binary + struct: ovs-key-ipv6-exthdrs doc: struct ovs_key_ipv6_exthdr - name: action-attrs diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 39d0fe76a38f..f80f2735e688 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -523,9 +523,6 @@ void iavf_schedule_request_stats(struct iavf_adapter *adapter); void iavf_reset(struct iavf_adapter *adapter); void iavf_set_ethtool_ops(struct net_device *netdev); void iavf_update_stats(struct iavf_adapter *adapter); -void iavf_reset_interrupt_capability(struct iavf_adapter *adapter); -int iavf_init_interrupt_scheme(struct iavf_adapter *adapter); -void iavf_irq_enable_queues(struct iavf_adapter *adapter); void iavf_free_all_tx_resources(struct iavf_adapter *adapter); void iavf_free_all_rx_resources(struct iavf_adapter *adapter); @@ -579,17 +576,10 @@ void iavf_enable_vlan_stripping_v2(struct iavf_adapter *adapter, u16 tpid); void iavf_disable_vlan_stripping_v2(struct iavf_adapter *adapter, u16 tpid); void iavf_enable_vlan_insertion_v2(struct iavf_adapter *adapter, u16 tpid); void iavf_disable_vlan_insertion_v2(struct iavf_adapter *adapter, u16 tpid); -int iavf_replace_primary_mac(struct iavf_adapter *adapter, - const u8 *new_mac); -void -iavf_set_vlan_offload_features(struct iavf_adapter *adapter, - netdev_features_t prev_features, - netdev_features_t features); void iavf_add_fdir_filter(struct iavf_adapter *adapter); void iavf_del_fdir_filter(struct iavf_adapter *adapter); void iavf_add_adv_rss_cfg(struct iavf_adapter *adapter); void iavf_del_adv_rss_cfg(struct iavf_adapter *adapter); struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter, const u8 *macaddr); -int iavf_lock_timeout(struct mutex *lock, unsigned int msecs); #endif /* _IAVF_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_alloc.h b/drivers/net/ethernet/intel/iavf/iavf_alloc.h index 2711573c14ec..162ea70685a6 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_alloc.h +++ b/drivers/net/ethernet/intel/iavf/iavf_alloc.h @@ -28,7 +28,6 @@ enum iavf_status iavf_free_dma_mem(struct iavf_hw *hw, struct iavf_dma_mem *mem); enum iavf_status iavf_allocate_virt_mem(struct iavf_hw *hw, struct iavf_virt_mem *mem, u32 size); -enum iavf_status iavf_free_virt_mem(struct iavf_hw *hw, - struct iavf_virt_mem *mem); +void iavf_free_virt_mem(struct iavf_hw *hw, struct iavf_virt_mem *mem); #endif /* _IAVF_ALLOC_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_common.c b/drivers/net/ethernet/intel/iavf/iavf_common.c index dd11dbbd5551..1afd761d8052 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_common.c +++ b/drivers/net/ethernet/intel/iavf/iavf_common.c @@ -35,7 +35,6 @@ enum iavf_status iavf_set_mac_type(struct iavf_hw *hw) status = IAVF_ERR_DEVICE_NOT_SUPPORTED; } - hw_dbg(hw, "found mac: %d, returns: %d\n", hw->mac.type, status); return status; } @@ -398,23 +397,6 @@ static enum iavf_status iavf_aq_get_set_rss_lut(struct iavf_hw *hw, } /** - * iavf_aq_get_rss_lut - * @hw: pointer to the hardware structure - * @vsi_id: vsi fw index - * @pf_lut: for PF table set true, for VSI table set false - * @lut: pointer to the lut buffer provided by the caller - * @lut_size: size of the lut buffer - * - * get the RSS lookup table, PF or VSI type - **/ -enum iavf_status iavf_aq_get_rss_lut(struct iavf_hw *hw, u16 vsi_id, - bool pf_lut, u8 *lut, u16 lut_size) -{ - return iavf_aq_get_set_rss_lut(hw, vsi_id, pf_lut, lut, lut_size, - false); -} - -/** * iavf_aq_set_rss_lut * @hw: pointer to the hardware structure * @vsi_id: vsi fw index @@ -473,19 +455,6 @@ iavf_status iavf_aq_get_set_rss_key(struct iavf_hw *hw, u16 vsi_id, } /** - * iavf_aq_get_rss_key - * @hw: pointer to the hw struct - * @vsi_id: vsi fw index - * @key: pointer to key info struct - * - **/ -enum iavf_status iavf_aq_get_rss_key(struct iavf_hw *hw, u16 vsi_id, - struct iavf_aqc_get_set_rss_key_data *key) -{ - return iavf_aq_get_set_rss_key(hw, vsi_id, key, false); -} - -/** * iavf_aq_set_rss_key * @hw: pointer to the hw struct * @vsi_id: vsi fw index @@ -828,17 +797,3 @@ void iavf_vf_parse_hw_config(struct iavf_hw *hw, vsi_res++; } } - -/** - * iavf_vf_reset - * @hw: pointer to the hardware structure - * - * Send a VF_RESET message to the PF. Does not wait for response from PF - * as none will be forthcoming. Immediately after calling this function, - * the admin queue should be shut down and (optionally) reinitialized. - **/ -enum iavf_status iavf_vf_reset(struct iavf_hw *hw) -{ - return iavf_aq_send_msg_to_pf(hw, VIRTCHNL_OP_RESET_VF, - 0, NULL, 0, NULL); -} diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 4a66873882d1..a483eb185c99 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -192,12 +192,11 @@ enum iavf_status iavf_allocate_dma_mem_d(struct iavf_hw *hw, } /** - * iavf_free_dma_mem_d - OS specific memory free for shared code + * iavf_free_dma_mem - wrapper for DMA memory freeing * @hw: pointer to the HW structure * @mem: ptr to mem struct to free **/ -enum iavf_status iavf_free_dma_mem_d(struct iavf_hw *hw, - struct iavf_dma_mem *mem) +enum iavf_status iavf_free_dma_mem(struct iavf_hw *hw, struct iavf_dma_mem *mem) { struct iavf_adapter *adapter = (struct iavf_adapter *)hw->back; @@ -209,13 +208,13 @@ enum iavf_status iavf_free_dma_mem_d(struct iavf_hw *hw, } /** - * iavf_allocate_virt_mem_d - OS specific memory alloc for shared code + * iavf_allocate_virt_mem - virt memory alloc wrapper * @hw: pointer to the HW structure * @mem: ptr to mem struct to fill out * @size: size of memory requested **/ -enum iavf_status iavf_allocate_virt_mem_d(struct iavf_hw *hw, - struct iavf_virt_mem *mem, u32 size) +enum iavf_status iavf_allocate_virt_mem(struct iavf_hw *hw, + struct iavf_virt_mem *mem, u32 size) { if (!mem) return IAVF_ERR_PARAM; @@ -230,20 +229,13 @@ enum iavf_status iavf_allocate_virt_mem_d(struct iavf_hw *hw, } /** - * iavf_free_virt_mem_d - OS specific memory free for shared code + * iavf_free_virt_mem - virt memory free wrapper * @hw: pointer to the HW structure * @mem: ptr to mem struct to free **/ -enum iavf_status iavf_free_virt_mem_d(struct iavf_hw *hw, - struct iavf_virt_mem *mem) +void iavf_free_virt_mem(struct iavf_hw *hw, struct iavf_virt_mem *mem) { - if (!mem) - return IAVF_ERR_PARAM; - - /* it's ok to kfree a NULL pointer */ kfree(mem->va); - - return 0; } /** @@ -253,7 +245,7 @@ enum iavf_status iavf_free_virt_mem_d(struct iavf_hw *hw, * * Returns 0 on success, negative on failure **/ -int iavf_lock_timeout(struct mutex *lock, unsigned int msecs) +static int iavf_lock_timeout(struct mutex *lock, unsigned int msecs) { unsigned int wait, delay = 10; @@ -362,7 +354,7 @@ static void iavf_irq_disable(struct iavf_adapter *adapter) * iavf_irq_enable_queues - Enable interrupt for all queues * @adapter: board private structure **/ -void iavf_irq_enable_queues(struct iavf_adapter *adapter) +static void iavf_irq_enable_queues(struct iavf_adapter *adapter) { struct iavf_hw *hw = &adapter->hw; int i; @@ -1003,44 +995,40 @@ struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter, * * Do not call this with mac_vlan_list_lock! **/ -int iavf_replace_primary_mac(struct iavf_adapter *adapter, - const u8 *new_mac) +static int iavf_replace_primary_mac(struct iavf_adapter *adapter, + const u8 *new_mac) { struct iavf_hw *hw = &adapter->hw; - struct iavf_mac_filter *f; + struct iavf_mac_filter *new_f; + struct iavf_mac_filter *old_f; spin_lock_bh(&adapter->mac_vlan_list_lock); - list_for_each_entry(f, &adapter->mac_filter_list, list) { - f->is_primary = false; + new_f = iavf_add_filter(adapter, new_mac); + if (!new_f) { + spin_unlock_bh(&adapter->mac_vlan_list_lock); + return -ENOMEM; } - f = iavf_find_filter(adapter, hw->mac.addr); - if (f) { - f->remove = true; + old_f = iavf_find_filter(adapter, hw->mac.addr); + if (old_f) { + old_f->is_primary = false; + old_f->remove = true; adapter->aq_required |= IAVF_FLAG_AQ_DEL_MAC_FILTER; } - - f = iavf_add_filter(adapter, new_mac); - - if (f) { - /* Always send the request to add if changing primary MAC - * even if filter is already present on the list - */ - f->is_primary = true; - f->add = true; - adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER; - ether_addr_copy(hw->mac.addr, new_mac); - } + /* Always send the request to add if changing primary MAC, + * even if filter is already present on the list + */ + new_f->is_primary = true; + new_f->add = true; + adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER; + ether_addr_copy(hw->mac.addr, new_mac); spin_unlock_bh(&adapter->mac_vlan_list_lock); /* schedule the watchdog task to immediately process the request */ - if (f) { - mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0); - return 0; - } - return -ENOMEM; + mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0); + return 0; } /** @@ -1863,7 +1851,7 @@ static void iavf_free_q_vectors(struct iavf_adapter *adapter) * @adapter: board private structure * **/ -void iavf_reset_interrupt_capability(struct iavf_adapter *adapter) +static void iavf_reset_interrupt_capability(struct iavf_adapter *adapter) { if (!adapter->msix_entries) return; @@ -1878,7 +1866,7 @@ void iavf_reset_interrupt_capability(struct iavf_adapter *adapter) * @adapter: board private structure to initialize * **/ -int iavf_init_interrupt_scheme(struct iavf_adapter *adapter) +static int iavf_init_interrupt_scheme(struct iavf_adapter *adapter) { int err; @@ -2176,7 +2164,7 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter) * the watchdog if any changes are requested to expedite the request via * virtchnl. **/ -void +static void iavf_set_vlan_offload_features(struct iavf_adapter *adapter, netdev_features_t prev_features, netdev_features_t features) diff --git a/drivers/net/ethernet/intel/iavf/iavf_osdep.h b/drivers/net/ethernet/intel/iavf/iavf_osdep.h index a452ce90679a..77d33deaabb5 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_osdep.h +++ b/drivers/net/ethernet/intel/iavf/iavf_osdep.h @@ -13,12 +13,6 @@ /* get readq/writeq support for 32 bit kernels, use the low-first version */ #include <linux/io-64-nonatomic-lo-hi.h> -/* File to be the magic between shared code and - * actual OS primitives - */ - -#define hw_dbg(hw, S, A...) do {} while (0) - #define wr32(a, reg, value) writel((value), ((a)->hw_addr + (reg))) #define rd32(a, reg) readl((a)->hw_addr + (reg)) @@ -35,14 +29,11 @@ struct iavf_dma_mem { #define iavf_allocate_dma_mem(h, m, unused, s, a) \ iavf_allocate_dma_mem_d(h, m, s, a) -#define iavf_free_dma_mem(h, m) iavf_free_dma_mem_d(h, m) struct iavf_virt_mem { void *va; u32 size; }; -#define iavf_allocate_virt_mem(h, m, s) iavf_allocate_virt_mem_d(h, m, s) -#define iavf_free_virt_mem(h, m) iavf_free_virt_mem_d(h, m) #define iavf_debug(h, m, s, ...) \ do { \ diff --git a/drivers/net/ethernet/intel/iavf/iavf_prototype.h b/drivers/net/ethernet/intel/iavf/iavf_prototype.h index edebfbbcffdc..940cb4203fbe 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_prototype.h +++ b/drivers/net/ethernet/intel/iavf/iavf_prototype.h @@ -40,12 +40,8 @@ enum iavf_status iavf_aq_queue_shutdown(struct iavf_hw *hw, bool unloading); const char *iavf_aq_str(struct iavf_hw *hw, enum iavf_admin_queue_err aq_err); const char *iavf_stat_str(struct iavf_hw *hw, enum iavf_status stat_err); -enum iavf_status iavf_aq_get_rss_lut(struct iavf_hw *hw, u16 seid, - bool pf_lut, u8 *lut, u16 lut_size); enum iavf_status iavf_aq_set_rss_lut(struct iavf_hw *hw, u16 seid, bool pf_lut, u8 *lut, u16 lut_size); -enum iavf_status iavf_aq_get_rss_key(struct iavf_hw *hw, u16 seid, - struct iavf_aqc_get_set_rss_key_data *key); enum iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 seid, struct iavf_aqc_get_set_rss_key_data *key); @@ -60,7 +56,6 @@ static inline struct iavf_rx_ptype_decoded decode_rx_desc_ptype(u8 ptype) void iavf_vf_parse_hw_config(struct iavf_hw *hw, struct virtchnl_vf_resource *msg); -enum iavf_status iavf_vf_reset(struct iavf_hw *hw); enum iavf_status iavf_aq_send_msg_to_pf(struct iavf_hw *hw, enum virtchnl_ops v_opcode, enum iavf_status v_retval, diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index e989feda133c..8c5f6096b002 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -54,7 +54,7 @@ static void iavf_unmap_and_free_tx_resource(struct iavf_ring *ring, * iavf_clean_tx_ring - Free any empty Tx buffers * @tx_ring: ring to be cleaned **/ -void iavf_clean_tx_ring(struct iavf_ring *tx_ring) +static void iavf_clean_tx_ring(struct iavf_ring *tx_ring) { unsigned long bi_size; u16 i; @@ -110,7 +110,7 @@ void iavf_free_tx_resources(struct iavf_ring *tx_ring) * Since there is no access to the ring head register * in XL710, we need to use our local copies **/ -u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw) +static u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw) { u32 head, tail; @@ -128,6 +128,24 @@ u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw) } /** + * iavf_force_wb - Issue SW Interrupt so HW does a wb + * @vsi: the VSI we care about + * @q_vector: the vector on which to force writeback + **/ +static void iavf_force_wb(struct iavf_vsi *vsi, struct iavf_q_vector *q_vector) +{ + u32 val = IAVF_VFINT_DYN_CTLN1_INTENA_MASK | + IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK | /* set noitr */ + IAVF_VFINT_DYN_CTLN1_SWINT_TRIG_MASK | + IAVF_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK + /* allow 00 to be written to the index */; + + wr32(&vsi->back->hw, + IAVF_VFINT_DYN_CTLN1(q_vector->reg_idx), + val); +} + +/** * iavf_detect_recover_hung - Function to detect and recover hung_queues * @vsi: pointer to vsi struct with tx queues * @@ -352,25 +370,6 @@ static void iavf_enable_wb_on_itr(struct iavf_vsi *vsi, q_vector->arm_wb_state = true; } -/** - * iavf_force_wb - Issue SW Interrupt so HW does a wb - * @vsi: the VSI we care about - * @q_vector: the vector on which to force writeback - * - **/ -void iavf_force_wb(struct iavf_vsi *vsi, struct iavf_q_vector *q_vector) -{ - u32 val = IAVF_VFINT_DYN_CTLN1_INTENA_MASK | - IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK | /* set noitr */ - IAVF_VFINT_DYN_CTLN1_SWINT_TRIG_MASK | - IAVF_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK - /* allow 00 to be written to the index */; - - wr32(&vsi->back->hw, - IAVF_VFINT_DYN_CTLN1(q_vector->reg_idx), - val); -} - static inline bool iavf_container_is_rx(struct iavf_q_vector *q_vector, struct iavf_ring_container *rc) { @@ -687,7 +686,7 @@ err: * iavf_clean_rx_ring - Free Rx buffers * @rx_ring: ring to be cleaned **/ -void iavf_clean_rx_ring(struct iavf_ring *rx_ring) +static void iavf_clean_rx_ring(struct iavf_ring *rx_ring) { unsigned long bi_size; u16 i; diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.h b/drivers/net/ethernet/intel/iavf/iavf_txrx.h index 2624bf6d009e..7e6ee32d19b6 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.h +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.h @@ -442,15 +442,11 @@ static inline unsigned int iavf_rx_pg_order(struct iavf_ring *ring) bool iavf_alloc_rx_buffers(struct iavf_ring *rxr, u16 cleaned_count); netdev_tx_t iavf_xmit_frame(struct sk_buff *skb, struct net_device *netdev); -void iavf_clean_tx_ring(struct iavf_ring *tx_ring); -void iavf_clean_rx_ring(struct iavf_ring *rx_ring); int iavf_setup_tx_descriptors(struct iavf_ring *tx_ring); int iavf_setup_rx_descriptors(struct iavf_ring *rx_ring); void iavf_free_tx_resources(struct iavf_ring *tx_ring); void iavf_free_rx_resources(struct iavf_ring *rx_ring); int iavf_napi_poll(struct napi_struct *napi, int budget); -void iavf_force_wb(struct iavf_vsi *vsi, struct iavf_q_vector *q_vector); -u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw); void iavf_detect_recover_hung(struct iavf_vsi *vsi); int __iavf_maybe_stop_tx(struct iavf_ring *tx_ring, int size); bool __iavf_chk_linearize(struct sk_buff *skb); diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index eb2dc0983776..e16d4c83ed5f 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -814,8 +814,7 @@ static void ice_cleanup_fltr_mgmt_struct(struct ice_hw *hw) devm_kfree(ice_hw_to_dev(hw), lst_itr); } } - if (recps[i].root_buf) - devm_kfree(ice_hw_to_dev(hw), recps[i].root_buf); + devm_kfree(ice_hw_to_dev(hw), recps[i].root_buf); } ice_rm_all_sw_replay_rule_info(hw); devm_kfree(ice_hw_to_dev(hw), sw->recp_list); @@ -834,7 +833,7 @@ static int ice_get_fw_log_cfg(struct ice_hw *hw) u16 size; size = sizeof(*config) * ICE_AQC_FW_LOG_ID_MAX; - config = devm_kzalloc(ice_hw_to_dev(hw), size, GFP_KERNEL); + config = kzalloc(size, GFP_KERNEL); if (!config) return -ENOMEM; @@ -857,7 +856,7 @@ static int ice_get_fw_log_cfg(struct ice_hw *hw) } } - devm_kfree(ice_hw_to_dev(hw), config); + kfree(config); return status; } @@ -1011,8 +1010,7 @@ static int ice_cfg_fw_log(struct ice_hw *hw, bool enable) } out: - if (data) - devm_kfree(ice_hw_to_dev(hw), data); + devm_kfree(ice_hw_to_dev(hw), data); return status; } diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c index d2faf1baad2f..e7d2474c431c 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.c +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c @@ -339,8 +339,7 @@ do { \ } \ } \ /* free the buffer info list */ \ - if ((qi)->ring.cmd_buf) \ - devm_kfree(ice_hw_to_dev(hw), (qi)->ring.cmd_buf); \ + devm_kfree(ice_hw_to_dev(hw), (qi)->ring.cmd_buf); \ /* free DMA head */ \ devm_kfree(ice_hw_to_dev(hw), (qi)->ring.dma_head); \ } while (0) @@ -1056,14 +1055,19 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq, if (cq->sq.next_to_use == cq->sq.count) cq->sq.next_to_use = 0; wr32(hw, cq->sq.tail, cq->sq.next_to_use); + ice_flush(hw); + + /* Wait a short time before initial ice_sq_done() check, to allow + * hardware time for completion. + */ + udelay(5); timeout = jiffies + ICE_CTL_Q_SQ_CMD_TIMEOUT; do { if (ice_sq_done(hw, cq)) break; - usleep_range(ICE_CTL_Q_SQ_CMD_USEC, - ICE_CTL_Q_SQ_CMD_USEC * 3 / 2); + usleep_range(100, 150); } while (time_before(jiffies, timeout)); /* if ready, copy the desc back to temp */ diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h index 950b7f4a7a05..8f2fd1613a95 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.h +++ b/drivers/net/ethernet/intel/ice/ice_controlq.h @@ -35,7 +35,6 @@ enum ice_ctl_q { /* Control Queue timeout settings - max delay 1s */ #define ICE_CTL_Q_SQ_CMD_TIMEOUT HZ /* Wait max 1s */ -#define ICE_CTL_Q_SQ_CMD_USEC 100 /* Check every 100usec */ #define ICE_CTL_Q_ADMIN_INIT_TIMEOUT 10 /* Count 10 times */ #define ICE_CTL_Q_ADMIN_INIT_MSEC 100 /* Check every 100msec */ diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c index ef103e47a8dc..85cca572c22a 100644 --- a/drivers/net/ethernet/intel/ice/ice_flow.c +++ b/drivers/net/ethernet/intel/ice/ice_flow.c @@ -1304,23 +1304,6 @@ ice_flow_find_prof_id(struct ice_hw *hw, enum ice_block blk, u64 prof_id) } /** - * ice_dealloc_flow_entry - Deallocate flow entry memory - * @hw: pointer to the HW struct - * @entry: flow entry to be removed - */ -static void -ice_dealloc_flow_entry(struct ice_hw *hw, struct ice_flow_entry *entry) -{ - if (!entry) - return; - - if (entry->entry) - devm_kfree(ice_hw_to_dev(hw), entry->entry); - - devm_kfree(ice_hw_to_dev(hw), entry); -} - -/** * ice_flow_rem_entry_sync - Remove a flow entry * @hw: pointer to the HW struct * @blk: classification stage @@ -1335,7 +1318,8 @@ ice_flow_rem_entry_sync(struct ice_hw *hw, enum ice_block __always_unused blk, list_del(&entry->l_entry); - ice_dealloc_flow_entry(hw, entry); + devm_kfree(ice_hw_to_dev(hw), entry->entry); + devm_kfree(ice_hw_to_dev(hw), entry); return 0; } @@ -1662,8 +1646,7 @@ ice_flow_add_entry(struct ice_hw *hw, enum ice_block blk, u64 prof_id, out: if (status && e) { - if (e->entry) - devm_kfree(ice_hw_to_dev(hw), e->entry); + devm_kfree(ice_hw_to_dev(hw), e->entry); devm_kfree(ice_hw_to_dev(hw), e); } diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 5ddb95d1073a..00e3afd507a4 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -321,31 +321,19 @@ static void ice_vsi_free_arrays(struct ice_vsi *vsi) dev = ice_pf_to_dev(pf); - if (vsi->af_xdp_zc_qps) { - bitmap_free(vsi->af_xdp_zc_qps); - vsi->af_xdp_zc_qps = NULL; - } + bitmap_free(vsi->af_xdp_zc_qps); + vsi->af_xdp_zc_qps = NULL; /* free the ring and vector containers */ - if (vsi->q_vectors) { - devm_kfree(dev, vsi->q_vectors); - vsi->q_vectors = NULL; - } - if (vsi->tx_rings) { - devm_kfree(dev, vsi->tx_rings); - vsi->tx_rings = NULL; - } - if (vsi->rx_rings) { - devm_kfree(dev, vsi->rx_rings); - vsi->rx_rings = NULL; - } - if (vsi->txq_map) { - devm_kfree(dev, vsi->txq_map); - vsi->txq_map = NULL; - } - if (vsi->rxq_map) { - devm_kfree(dev, vsi->rxq_map); - vsi->rxq_map = NULL; - } + devm_kfree(dev, vsi->q_vectors); + vsi->q_vectors = NULL; + devm_kfree(dev, vsi->tx_rings); + vsi->tx_rings = NULL; + devm_kfree(dev, vsi->rx_rings); + vsi->rx_rings = NULL; + devm_kfree(dev, vsi->txq_map); + vsi->txq_map = NULL; + devm_kfree(dev, vsi->rxq_map); + vsi->rxq_map = NULL; } /** @@ -902,10 +890,8 @@ static void ice_rss_clean(struct ice_vsi *vsi) dev = ice_pf_to_dev(pf); - if (vsi->rss_hkey_user) - devm_kfree(dev, vsi->rss_hkey_user); - if (vsi->rss_lut_user) - devm_kfree(dev, vsi->rss_lut_user); + devm_kfree(dev, vsi->rss_hkey_user); + devm_kfree(dev, vsi->rss_lut_user); ice_vsi_clean_rss_flow_fld(vsi); /* remove RSS replay list */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 65bf399a0efc..93979ab18bc1 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2633,11 +2633,11 @@ static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog) int i; old_prog = xchg(&vsi->xdp_prog, prog); - if (old_prog) - bpf_prog_put(old_prog); - ice_for_each_rxq(vsi, i) WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog); + + if (old_prog) + bpf_prog_put(old_prog); } /** @@ -2922,6 +2922,12 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, } } + /* hot swap progs and avoid toggling link */ + if (ice_is_xdp_ena_vsi(vsi) == !!prog) { + ice_vsi_assign_bpf_prog(vsi, prog); + return 0; + } + /* need to stop netdev while setting up the program for Rx rings */ if (if_running && !test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { ret = ice_down(vsi); @@ -2954,13 +2960,6 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, xdp_ring_err = ice_realloc_zc_buf(vsi, false); if (xdp_ring_err) NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Rx resources failed"); - } else { - /* safe to call even when prog == vsi->xdp_prog as - * dev_xdp_install in net/core/dev.c incremented prog's - * refcount so corresponding bpf_prog_put won't cause - * underflow - */ - ice_vsi_assign_bpf_prog(vsi, prog); } if (if_running) @@ -7413,21 +7412,9 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) } netdev->mtu = (unsigned int)new_mtu; - - /* if VSI is up, bring it down and then back up */ - if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { - err = ice_down(vsi); - if (err) { - netdev_err(netdev, "change MTU if_down err %d\n", err); - return err; - } - - err = ice_up(vsi); - if (err) { - netdev_err(netdev, "change MTU if_up err %d\n", err); - return err; - } - } + err = ice_down_up(vsi); + if (err) + return err; netdev_dbg(netdev, "changed MTU to %d\n", new_mtu); set_bit(ICE_FLAG_MTU_CHANGED, pf->flags); diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index b7682de0ae05..b664d60fd037 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -358,10 +358,7 @@ void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node) node->sibling; } - /* leaf nodes have no children */ - if (node->children) - devm_kfree(ice_hw_to_dev(hw), node->children); - + devm_kfree(ice_hw_to_dev(hw), node->children); kfree(node->name); xa_erase(&pi->sched_node_ids, node->id); devm_kfree(ice_hw_to_dev(hw), node); @@ -859,10 +856,8 @@ void ice_sched_cleanup_all(struct ice_hw *hw) if (!hw) return; - if (hw->layer_info) { - devm_kfree(ice_hw_to_dev(hw), hw->layer_info); - hw->layer_info = NULL; - } + devm_kfree(ice_hw_to_dev(hw), hw->layer_info); + hw->layer_info = NULL; ice_sched_clear_port(hw->port_info); diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 2ea6d24977a6..1f66914c7a20 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -905,14 +905,13 @@ err_unroll_intr: */ static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs) { - int pre_existing_vfs = pci_num_vf(pf->pdev); struct device *dev = ice_pf_to_dev(pf); int err; - if (pre_existing_vfs && pre_existing_vfs != num_vfs) + if (!num_vfs) { ice_free_vfs(pf); - else if (pre_existing_vfs && pre_existing_vfs == num_vfs) return 0; + } if (num_vfs > pf->vfs.num_supported) { dev_err(dev, "Can't enable %d VFs, max VFs supported is %d\n", diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 2ea9e1ae5517..6db4ca7978cb 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -1636,21 +1636,16 @@ ice_save_vsi_ctx(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi) */ static void ice_clear_vsi_q_ctx(struct ice_hw *hw, u16 vsi_handle) { - struct ice_vsi_ctx *vsi; + struct ice_vsi_ctx *vsi = ice_get_vsi_ctx(hw, vsi_handle); u8 i; - vsi = ice_get_vsi_ctx(hw, vsi_handle); if (!vsi) return; ice_for_each_traffic_class(i) { - if (vsi->lan_q_ctx[i]) { - devm_kfree(ice_hw_to_dev(hw), vsi->lan_q_ctx[i]); - vsi->lan_q_ctx[i] = NULL; - } - if (vsi->rdma_q_ctx[i]) { - devm_kfree(ice_hw_to_dev(hw), vsi->rdma_q_ctx[i]); - vsi->rdma_q_ctx[i] = NULL; - } + devm_kfree(ice_hw_to_dev(hw), vsi->lan_q_ctx[i]); + vsi->lan_q_ctx[i] = NULL; + devm_kfree(ice_hw_to_dev(hw), vsi->rdma_q_ctx[i]); + vsi->rdma_q_ctx[i] = NULL; } } @@ -5468,9 +5463,7 @@ err_unroll: devm_kfree(ice_hw_to_dev(hw), fvit); } - if (rm->root_buf) - devm_kfree(ice_hw_to_dev(hw), rm->root_buf); - + devm_kfree(ice_hw_to_dev(hw), rm->root_buf); kfree(rm); err_free_lkup_exts: diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 43e8f19c7a0a..445ba7fe3c40 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -51,10 +51,27 @@ struct mlxsw_sp_vr; struct mlxsw_sp_lpm_tree; struct mlxsw_sp_rif_ops; -struct mlxsw_sp_rif { +struct mlxsw_sp_crif_key { + struct net_device *dev; +}; + +struct mlxsw_sp_crif { + struct mlxsw_sp_crif_key key; + struct rhash_head ht_node; + bool can_destroy; struct list_head nexthop_list; + struct mlxsw_sp_rif *rif; +}; + +static const struct rhashtable_params mlxsw_sp_crif_ht_params = { + .key_offset = offsetof(struct mlxsw_sp_crif, key), + .key_len = sizeof_field(struct mlxsw_sp_crif, key), + .head_offset = offsetof(struct mlxsw_sp_crif, ht_node), +}; + +struct mlxsw_sp_rif { + struct mlxsw_sp_crif *crif; /* NULL for underlay RIF */ struct list_head neigh_list; - struct net_device *dev; /* NULL for underlay RIF */ struct mlxsw_sp_fid *fid; unsigned char addr[ETH_ALEN]; int mtu; @@ -73,7 +90,9 @@ struct mlxsw_sp_rif { static struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif) { - return rif->dev; + if (!rif->crif) + return NULL; + return rif->crif->key.dev; } struct mlxsw_sp_rif_params { @@ -1060,6 +1079,61 @@ u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev) return tb_id; } +static void +mlxsw_sp_crif_init(struct mlxsw_sp_crif *crif, struct net_device *dev) +{ + crif->key.dev = dev; + INIT_LIST_HEAD(&crif->nexthop_list); +} + +static struct mlxsw_sp_crif * +mlxsw_sp_crif_alloc(struct net_device *dev) +{ + struct mlxsw_sp_crif *crif; + + crif = kzalloc(sizeof(*crif), GFP_KERNEL); + if (!crif) + return NULL; + + mlxsw_sp_crif_init(crif, dev); + return crif; +} + +static void mlxsw_sp_crif_free(struct mlxsw_sp_crif *crif) +{ + if (WARN_ON(crif->rif)) + return; + + WARN_ON(!list_empty(&crif->nexthop_list)); + kfree(crif); +} + +static int mlxsw_sp_crif_insert(struct mlxsw_sp_router *router, + struct mlxsw_sp_crif *crif) +{ + return rhashtable_insert_fast(&router->crif_ht, &crif->ht_node, + mlxsw_sp_crif_ht_params); +} + +static void mlxsw_sp_crif_remove(struct mlxsw_sp_router *router, + struct mlxsw_sp_crif *crif) +{ + rhashtable_remove_fast(&router->crif_ht, &crif->ht_node, + mlxsw_sp_crif_ht_params); +} + +static struct mlxsw_sp_crif * +mlxsw_sp_crif_lookup(struct mlxsw_sp_router *router, + const struct net_device *dev) +{ + struct mlxsw_sp_crif_key key = { + .dev = (struct net_device *)dev, + }; + + return rhashtable_lookup_fast(&router->crif_ht, &key, + mlxsw_sp_crif_ht_params); +} + static struct mlxsw_sp_rif * mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_rif_params *params, @@ -1648,17 +1722,26 @@ static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry); } -static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *old_rif, - struct mlxsw_sp_rif *new_rif); +static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif); + static void mlxsw_sp_rif_migrate_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *old_rif, struct mlxsw_sp_rif *new_rif, bool migrate_nhs) { + struct mlxsw_sp_crif *crif = old_rif->crif; + struct mlxsw_sp_crif mock_crif = {}; + if (migrate_nhs) - mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, old_rif, new_rif); + mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif); + /* Plant a mock CRIF so that destroying the old RIF doesn't unoffload + * our nexthops and IPIP tunnels, and doesn't sever the crif->rif link. + */ + mlxsw_sp_crif_init(&mock_crif, crif->key.dev); + old_rif->crif = &mock_crif; + mock_crif.rif = old_rif; mlxsw_sp_rif_destroy(old_rif); } @@ -1684,9 +1767,6 @@ mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp, return 0; } -static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *rif); - /** * __mlxsw_sp_ipip_entry_update_tunnel - Update offload related to IPIP entry. * @mlxsw_sp: mlxsw_sp. @@ -2915,7 +2995,7 @@ struct mlxsw_sp_nexthop_key { struct mlxsw_sp_nexthop { struct list_head neigh_list_node; /* member of neigh entry list */ - struct list_head rif_list_node; + struct list_head crif_list_node; struct list_head router_list_node; struct mlxsw_sp_nexthop_group_info *nhgi; /* pointer back to the group * this nexthop belongs to @@ -2928,7 +3008,7 @@ struct mlxsw_sp_nexthop { int nh_weight; int norm_nh_weight; int num_adj_entries; - struct mlxsw_sp_rif *rif; + struct mlxsw_sp_crif *crif; u8 should_offload:1, /* set indicates this nexthop should be written * to the adjacency table. */ @@ -2951,9 +3031,9 @@ struct mlxsw_sp_nexthop { static struct net_device * mlxsw_sp_nexthop_dev(const struct mlxsw_sp_nexthop *nh) { - if (nh->rif) - return mlxsw_sp_rif_dev(nh->rif); - return NULL; + if (!nh->crif) + return NULL; + return nh->crif->key.dev; } enum mlxsw_sp_nexthop_group_type { @@ -2978,7 +3058,11 @@ struct mlxsw_sp_nexthop_group_info { static struct mlxsw_sp_rif * mlxsw_sp_nhgi_rif(const struct mlxsw_sp_nexthop_group_info *nhgi) { - return nhgi->nexthops[0].rif; + struct mlxsw_sp_crif *crif = nhgi->nexthops[0].crif; + + if (!crif) + return NULL; + return crif->rif; } struct mlxsw_sp_nexthop_group_vr_key { @@ -3102,7 +3186,9 @@ int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index, struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh) { - return nh->rif; + if (WARN_ON(!nh->crif)) + return NULL; + return nh->crif->rif; } bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh) @@ -3487,11 +3573,12 @@ static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, bool force, char *ratr_pl) { struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; + struct mlxsw_sp_rif *rif = mlxsw_sp_nexthop_rif(nh); enum mlxsw_reg_ratr_op op; u16 rif_index; - rif_index = nh->rif ? nh->rif->rif_index : - mlxsw_sp->router->lb_rif_index; + rif_index = rif ? rif->rif_index : + mlxsw_sp->router->lb_crif->rif->rif_index; op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY : MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY; mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_ETHERNET, @@ -4109,23 +4196,23 @@ mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, } } -static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh, - struct mlxsw_sp_rif *rif) +static void mlxsw_sp_nexthop_crif_init(struct mlxsw_sp_nexthop *nh, + struct mlxsw_sp_crif *crif) { - if (nh->rif) + if (nh->crif) return; - nh->rif = rif; - list_add(&nh->rif_list_node, &rif->nexthop_list); + nh->crif = crif; + list_add(&nh->crif_list_node, &crif->nexthop_list); } -static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh) +static void mlxsw_sp_nexthop_crif_fini(struct mlxsw_sp_nexthop *nh) { - if (!nh->rif) + if (!nh->crif) return; - list_del(&nh->rif_list_node); - nh->rif = NULL; + list_del(&nh->crif_list_node); + nh->crif = NULL; } static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, @@ -4137,6 +4224,9 @@ static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, u8 nud_state, dead; int err; + if (WARN_ON(!nh->crif->rif)) + return 0; + if (!nh->nhgi->gateway || nh->neigh_entry) return 0; dev = mlxsw_sp_nexthop_dev(nh); @@ -4227,15 +4317,20 @@ static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh, struct mlxsw_sp_ipip_entry *ipip_entry) { + struct mlxsw_sp_crif *crif; bool removing; if (!nh->nhgi->gateway || nh->ipip_entry) return; + crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, ipip_entry->ol_dev); + if (WARN_ON(!crif)) + return; + nh->ipip_entry = ipip_entry; removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev); __mlxsw_sp_nexthop_neigh_update(nh, removing); - mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common); + mlxsw_sp_nexthop_crif_init(nh, crif); } static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp, @@ -4267,7 +4362,7 @@ static int mlxsw_sp_nexthop_type_init(struct mlxsw_sp *mlxsw_sp, { const struct mlxsw_sp_ipip_ops *ipip_ops; struct mlxsw_sp_ipip_entry *ipip_entry; - struct mlxsw_sp_rif *rif; + struct mlxsw_sp_crif *crif; int err; ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev); @@ -4281,11 +4376,15 @@ static int mlxsw_sp_nexthop_type_init(struct mlxsw_sp *mlxsw_sp, } nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH; - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - if (!rif) + crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, dev); + if (!crif) + return 0; + + mlxsw_sp_nexthop_crif_init(nh, crif); + + if (!crif->rif) return 0; - mlxsw_sp_nexthop_rif_init(nh, rif); err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); if (err) goto err_neigh_init; @@ -4293,25 +4392,30 @@ static int mlxsw_sp_nexthop_type_init(struct mlxsw_sp *mlxsw_sp, return 0; err_neigh_init: - mlxsw_sp_nexthop_rif_fini(nh); + mlxsw_sp_nexthop_crif_fini(nh); return err; } -static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh) +static void mlxsw_sp_nexthop_type_rif_gone(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) { switch (nh->type) { case MLXSW_SP_NEXTHOP_TYPE_ETH: mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); - mlxsw_sp_nexthop_rif_fini(nh); break; case MLXSW_SP_NEXTHOP_TYPE_IPIP: - mlxsw_sp_nexthop_rif_fini(nh); mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh); break; } } +static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + mlxsw_sp_nexthop_type_rif_gone(mlxsw_sp, nh); + mlxsw_sp_nexthop_crif_fini(nh); +} + static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp, struct mlxsw_sp_nexthop *nh, @@ -4402,7 +4506,7 @@ static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh; bool removing; - list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) { + list_for_each_entry(nh, &rif->crif->nexthop_list, crif_list_node) { switch (nh->type) { case MLXSW_SP_NEXTHOP_TYPE_ETH: removing = false; @@ -4420,25 +4524,14 @@ static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, } } -static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *old_rif, - struct mlxsw_sp_rif *new_rif) -{ - struct mlxsw_sp_nexthop *nh; - - list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list); - list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node) - nh->rif = new_rif; - mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif); -} - static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif) { struct mlxsw_sp_nexthop *nh, *tmp; - list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) { - mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); + list_for_each_entry_safe(nh, tmp, &rif->crif->nexthop_list, + crif_list_node) { + mlxsw_sp_nexthop_type_rif_gone(mlxsw_sp, nh); mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); } } @@ -4458,7 +4551,7 @@ static int mlxsw_sp_adj_trap_entry_init(struct mlxsw_sp *mlxsw_sp) mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, true, MLXSW_REG_RATR_TYPE_ETHERNET, mlxsw_sp->router->adj_trap_index, - mlxsw_sp->router->lb_rif_index); + mlxsw_sp->router->lb_crif->rif->rif_index); mlxsw_reg_ratr_trap_action_set(ratr_pl, trap_action); mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); @@ -4774,21 +4867,19 @@ static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_nexthop_obj_blackhole_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { - u16 lb_rif_index = mlxsw_sp->router->lb_rif_index; - nh->action = MLXSW_SP_NEXTHOP_ACTION_DISCARD; nh->should_offload = 1; /* While nexthops that discard packets do not forward packets * via an egress RIF, they still need to be programmed using a * valid RIF, so use the loopback RIF created during init. */ - nh->rif = mlxsw_sp->router->rifs[lb_rif_index]; + nh->crif = mlxsw_sp->router->lb_crif; } static void mlxsw_sp_nexthop_obj_blackhole_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { - nh->rif = NULL; + nh->crif = NULL; nh->should_offload = 0; } @@ -7796,6 +7887,9 @@ static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif) static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif) { + /* Signal to nexthop cleanup that the RIF is going away. */ + rif->crif->rif = NULL; + mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index); mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif); mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif); @@ -7905,23 +7999,26 @@ static void mlxsw_sp_rif_index_free(struct mlxsw_sp *mlxsw_sp, u16 rif_index, static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index, u16 vr_id, - struct net_device *l3_dev) + struct mlxsw_sp_crif *crif) { + struct net_device *l3_dev = crif ? crif->key.dev : NULL; struct mlxsw_sp_rif *rif; rif = kzalloc(rif_size, GFP_KERNEL); if (!rif) return NULL; - INIT_LIST_HEAD(&rif->nexthop_list); INIT_LIST_HEAD(&rif->neigh_list); if (l3_dev) { ether_addr_copy(rif->addr, l3_dev->dev_addr); rif->mtu = l3_dev->mtu; - rif->dev = l3_dev; } rif->vr_id = vr_id; rif->rif_index = rif_index; + if (crif) { + rif->crif = crif; + crif->rif = rif; + } return rif; } @@ -7929,7 +8026,9 @@ static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index, static void mlxsw_sp_rif_free(struct mlxsw_sp_rif *rif) { WARN_ON(!list_empty(&rif->neigh_list)); - WARN_ON(!list_empty(&rif->nexthop_list)); + + if (rif->crif) + rif->crif->rif = NULL; kfree(rif); } @@ -8163,6 +8262,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_rif_ops *ops; struct mlxsw_sp_fid *fid = NULL; enum mlxsw_sp_rif_type type; + struct mlxsw_sp_crif *crif; struct mlxsw_sp_rif *rif; struct mlxsw_sp_vr *vr; u16 rif_index; @@ -8182,7 +8282,13 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, goto err_rif_index_alloc; } - rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev); + crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, params->dev); + if (WARN_ON(!crif)) { + err = -ENOENT; + goto err_crif_lookup; + } + + rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, crif); if (!rif) { err = -ENOMEM; goto err_rif_alloc; @@ -8241,6 +8347,7 @@ err_fid_get: dev_put(params->dev); mlxsw_sp_rif_free(rif); err_rif_alloc: +err_crif_lookup: mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); err_rif_index_alloc: vr->rif_count--; @@ -8253,6 +8360,7 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) struct net_device *dev = mlxsw_sp_rif_dev(rif); const struct mlxsw_sp_rif_ops *ops = rif->ops; struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_crif *crif = rif->crif; struct mlxsw_sp_fid *fid = rif->fid; u8 rif_entries = rif->rif_entries; u16 rif_index = rif->rif_index; @@ -8283,6 +8391,9 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); vr->rif_count--; mlxsw_sp_vr_put(mlxsw_sp, vr); + + if (crif->can_destroy) + mlxsw_sp_crif_free(crif); } void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp, @@ -9148,6 +9259,104 @@ static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif, return -ENOBUFS; } +static bool mlxsw_sp_router_netdevice_interesting(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev) +{ + struct vlan_dev_priv *vlan; + + if (netif_is_lag_master(dev) || + netif_is_bridge_master(dev) || + mlxsw_sp_port_dev_check(dev) || + mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev) || + netif_is_l3_master(dev)) + return true; + + if (!is_vlan_dev(dev)) + return false; + + vlan = vlan_dev_priv(dev); + return netif_is_lag_master(vlan->real_dev) || + netif_is_bridge_master(vlan->real_dev) || + mlxsw_sp_port_dev_check(vlan->real_dev); +} + +static struct mlxsw_sp_crif * +mlxsw_sp_crif_register(struct mlxsw_sp_router *router, struct net_device *dev) +{ + struct mlxsw_sp_crif *crif; + int err; + + if (WARN_ON(mlxsw_sp_crif_lookup(router, dev))) + return NULL; + + crif = mlxsw_sp_crif_alloc(dev); + if (!crif) + return ERR_PTR(-ENOMEM); + + err = mlxsw_sp_crif_insert(router, crif); + if (err) + goto err_netdev_insert; + + return crif; + +err_netdev_insert: + mlxsw_sp_crif_free(crif); + return ERR_PTR(err); +} + +static void mlxsw_sp_crif_unregister(struct mlxsw_sp_router *router, + struct mlxsw_sp_crif *crif) +{ + struct mlxsw_sp_nexthop *nh, *tmp; + + mlxsw_sp_crif_remove(router, crif); + + list_for_each_entry_safe(nh, tmp, &crif->nexthop_list, crif_list_node) + mlxsw_sp_nexthop_type_fini(router->mlxsw_sp, nh); + + if (crif->rif) + crif->can_destroy = true; + else + mlxsw_sp_crif_free(crif); +} + +static int mlxsw_sp_netdevice_register(struct mlxsw_sp_router *router, + struct net_device *dev) +{ + struct mlxsw_sp_crif *crif; + + if (!mlxsw_sp_router_netdevice_interesting(router->mlxsw_sp, dev)) + return 0; + + crif = mlxsw_sp_crif_register(router, dev); + return PTR_ERR_OR_ZERO(crif); +} + +static void mlxsw_sp_netdevice_unregister(struct mlxsw_sp_router *router, + struct net_device *dev) +{ + struct mlxsw_sp_crif *crif; + + if (!mlxsw_sp_router_netdevice_interesting(router->mlxsw_sp, dev)) + return; + + /* netdev_run_todo(), by way of netdev_wait_allrefs_any(), rebroadcasts + * the NETDEV_UNREGISTER message, so we can get here twice. If that's + * what happened, the netdevice state is NETREG_UNREGISTERED. In that + * case, we expect to have collected the CRIF already, and warn if it + * still exists. Otherwise we expect the CRIF to exist. + */ + crif = mlxsw_sp_crif_lookup(router, dev); + if (dev->reg_state == NETREG_UNREGISTERED) { + if (!WARN_ON(crif)) + return; + } + if (WARN_ON(!crif)) + return; + + mlxsw_sp_crif_unregister(router, crif); +} + static bool mlxsw_sp_is_offload_xstats_event(unsigned long event) { switch (event) { @@ -9367,6 +9576,15 @@ static int mlxsw_sp_router_netdevice_event(struct notifier_block *nb, mutex_lock(&mlxsw_sp->router->lock); + if (event == NETDEV_REGISTER) { + err = mlxsw_sp_netdevice_register(router, dev); + if (err) + /* No need to roll this back, UNREGISTER will collect it + * anyhow. + */ + goto out; + } + if (mlxsw_sp_is_offload_xstats_event(event)) err = mlxsw_sp_netdevice_offload_xstats_cmd(mlxsw_sp, dev, event, ptr); @@ -9381,6 +9599,10 @@ static int mlxsw_sp_router_netdevice_event(struct notifier_block *nb, else if (mlxsw_sp_is_vrf_event(event, ptr)) err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr); + if (event == NETDEV_UNREGISTER) + mlxsw_sp_netdevice_unregister(router, dev); + +out: mutex_unlock(&mlxsw_sp->router->lock); return notifier_from_errno(err); @@ -9901,6 +10123,7 @@ mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable) static struct mlxsw_sp_rif * mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, + struct mlxsw_sp_crif *ul_crif, struct netlink_ext_ack *extack) { struct mlxsw_sp_rif *ul_rif; @@ -9914,7 +10137,8 @@ mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, return ERR_PTR(err); } - ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL); + ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, + ul_crif); if (!ul_rif) { err = -ENOMEM; goto err_rif_alloc; @@ -9953,6 +10177,7 @@ static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif) static struct mlxsw_sp_rif * mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, + struct mlxsw_sp_crif *ul_crif, struct netlink_ext_ack *extack) { struct mlxsw_sp_vr *vr; @@ -9965,7 +10190,7 @@ mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, if (refcount_inc_not_zero(&vr->ul_rif_refcnt)) return vr->ul_rif; - vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, extack); + vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, ul_crif, extack); if (IS_ERR(vr->ul_rif)) { err = PTR_ERR(vr->ul_rif); goto err_ul_rif_create; @@ -10003,7 +10228,7 @@ int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, int err = 0; mutex_lock(&mlxsw_sp->router->lock); - ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL); + ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL, NULL); if (IS_ERR(ul_rif)) { err = PTR_ERR(ul_rif); goto out; @@ -10039,7 +10264,7 @@ mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif, struct mlxsw_sp_rif *ul_rif; int err; - ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, extack); + ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL, extack); if (IS_ERR(ul_rif)) return PTR_ERR(ul_rif); @@ -10561,28 +10786,41 @@ static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); } -static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp) +static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp, + struct netlink_ext_ack *extack) { - u16 lb_rif_index; + struct mlxsw_sp_router *router = mlxsw_sp->router; + struct mlxsw_sp_rif *lb_rif; int err; + router->lb_crif = mlxsw_sp_crif_alloc(NULL); + if (IS_ERR(router->lb_crif)) + return PTR_ERR(router->lb_crif); + /* Create a generic loopback RIF associated with the main table * (default VRF). Any table can be used, but the main table exists - * anyway, so we do not waste resources. + * anyway, so we do not waste resources. Loopback RIFs are usually + * created with a NULL CRIF, but this RIF is used as a fallback RIF + * for blackhole nexthops, and nexthops expect to have a valid CRIF. */ - err = mlxsw_sp_router_ul_rif_get(mlxsw_sp, RT_TABLE_MAIN, - &lb_rif_index); - if (err) - return err; - - mlxsw_sp->router->lb_rif_index = lb_rif_index; + lb_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, RT_TABLE_MAIN, router->lb_crif, + extack); + if (IS_ERR(lb_rif)) { + err = PTR_ERR(lb_rif); + goto err_ul_rif_get; + } return 0; + +err_ul_rif_get: + mlxsw_sp_crif_free(router->lb_crif); + return err; } static void mlxsw_sp_lb_rif_fini(struct mlxsw_sp *mlxsw_sp) { - mlxsw_sp_router_ul_rif_put(mlxsw_sp, mlxsw_sp->router->lb_rif_index); + mlxsw_sp_ul_rif_put(mlxsw_sp->router->lb_crif->rif); + mlxsw_sp_crif_free(mlxsw_sp->router->lb_crif); } static int mlxsw_sp1_router_init(struct mlxsw_sp *mlxsw_sp) @@ -10647,6 +10885,11 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, if (err) goto err_ipips_init; + err = rhashtable_init(&mlxsw_sp->router->crif_ht, + &mlxsw_sp_crif_ht_params); + if (err) + goto err_crif_ht_init; + err = mlxsw_sp_rifs_init(mlxsw_sp); if (err) goto err_rifs_init; @@ -10674,7 +10917,7 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, if (err) goto err_vrs_init; - err = mlxsw_sp_lb_rif_init(mlxsw_sp); + err = mlxsw_sp_lb_rif_init(mlxsw_sp, extack); if (err) goto err_lb_rif_init; @@ -10778,6 +11021,8 @@ err_nexthop_group_ht_init: err_nexthop_ht_init: mlxsw_sp_rifs_fini(mlxsw_sp); err_rifs_init: + rhashtable_destroy(&mlxsw_sp->router->crif_ht); +err_crif_ht_init: mlxsw_sp_ipips_fini(mlxsw_sp); err_ipips_init: __mlxsw_sp_router_fini(mlxsw_sp); @@ -10813,6 +11058,7 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) rhashtable_destroy(&router->nexthop_group_ht); rhashtable_destroy(&router->nexthop_ht); mlxsw_sp_rifs_fini(mlxsw_sp); + rhashtable_destroy(&mlxsw_sp->router->crif_ht); mlxsw_sp_ipips_fini(mlxsw_sp); __mlxsw_sp_router_fini(mlxsw_sp); cancel_delayed_work_sync(&router->nh_grp_activity_dw); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 5a0babc614b4..9a2669a08480 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -20,6 +20,7 @@ struct mlxsw_sp_router_nve_decap { struct mlxsw_sp_router { struct mlxsw_sp *mlxsw_sp; + struct rhashtable crif_ht; struct gen_pool *rifs_table; struct mlxsw_sp_rif **rifs; struct idr rif_mac_profiles_idr; @@ -59,7 +60,7 @@ struct mlxsw_sp_router { struct mlxsw_sp_router_nve_decap nve_decap_config; struct mutex lock; /* Protects shared router resources */ struct mlxsw_sp_fib_entry_op_ctx *ll_op_ctx; - u16 lb_rif_index; + struct mlxsw_sp_crif *lb_crif; const struct mlxsw_sp_adj_grp_size_range *adj_grp_size_ranges; size_t adj_grp_size_ranges_count; struct delayed_work nh_grp_activity_dw; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c index ef1023930fd0..20fc455b3337 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c @@ -46,22 +46,12 @@ static int dwmac_generic_probe(struct platform_device *pdev) plat_dat->unicast_filter_entries = 1; } - /* Custom initialisation (if needed) */ - if (plat_dat->init) { - ret = plat_dat->init(pdev, plat_dat->bsp_priv); - if (ret) - goto err_remove_config_dt; - } - - ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); + ret = stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res); if (ret) - goto err_exit; + goto err_remove_config_dt; return 0; -err_exit: - if (plat_dat->exit) - plat_dat->exit(pdev, plat_dat->bsp_priv); err_remove_config_dt: if (pdev->dev.of_node) stmmac_remove_config_dt(pdev, plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index fa0fc53c56a3..e62940414e54 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -708,7 +708,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) { dev_err(dev, "dt configuration failed\n"); return PTR_ERR(plat_dat); @@ -717,10 +717,8 @@ static int qcom_ethqos_probe(struct platform_device *pdev) plat_dat->clks_config = ethqos_clks_config; ethqos = devm_kzalloc(dev, sizeof(*ethqos), GFP_KERNEL); - if (!ethqos) { - ret = -ENOMEM; - goto out_config_dt; - } + if (!ethqos) + return -ENOMEM; ethqos->phy_mode = device_get_phy_mode(dev); switch (ethqos->phy_mode) { @@ -734,19 +732,15 @@ static int qcom_ethqos_probe(struct platform_device *pdev) ethqos->configure_func = ethqos_configure_sgmii; break; case -ENODEV: - ret = -ENODEV; - goto out_config_dt; + return -ENODEV; default: - ret = -EINVAL; - goto out_config_dt; + return -EINVAL; } ethqos->pdev = pdev; ethqos->rgmii_base = devm_platform_ioremap_resource_byname(pdev, "rgmii"); - if (IS_ERR(ethqos->rgmii_base)) { - ret = PTR_ERR(ethqos->rgmii_base); - goto out_config_dt; - } + if (IS_ERR(ethqos->rgmii_base)) + return PTR_ERR(ethqos->rgmii_base); ethqos->mac_base = stmmac_res.addr; @@ -757,24 +751,20 @@ static int qcom_ethqos_probe(struct platform_device *pdev) ethqos->has_emac_ge_3 = data->has_emac_ge_3; ethqos->link_clk = devm_clk_get(dev, data->link_clk_name ?: "rgmii"); - if (IS_ERR(ethqos->link_clk)) { - ret = PTR_ERR(ethqos->link_clk); - goto out_config_dt; - } + if (IS_ERR(ethqos->link_clk)) + return PTR_ERR(ethqos->link_clk); ret = ethqos_clks_config(ethqos, true); if (ret) - goto out_config_dt; + return ret; ret = devm_add_action_or_reset(dev, ethqos_clks_disable, ethqos); if (ret) - goto out_config_dt; + return ret; ethqos->serdes_phy = devm_phy_optional_get(dev, "serdes"); - if (IS_ERR(ethqos->serdes_phy)) { - ret = PTR_ERR(ethqos->serdes_phy); - goto out_config_dt; - } + if (IS_ERR(ethqos->serdes_phy)) + return PTR_ERR(ethqos->serdes_phy); ethqos->speed = SPEED_1000; ethqos_update_link_clk(ethqos, SPEED_1000); @@ -797,16 +787,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev) plat_dat->serdes_powerdown = qcom_ethqos_serdes_powerdown; } - ret = stmmac_dvr_probe(dev, plat_dat, &stmmac_res); - if (ret) - goto out_config_dt; - - return ret; - -out_config_dt: - stmmac_remove_config_dt(pdev, plat_dat); - - return ret; + return devm_stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res); } static const struct of_device_id qcom_ethqos_match[] = { @@ -820,7 +801,6 @@ MODULE_DEVICE_TABLE(of, qcom_ethqos_match); static struct platform_driver qcom_ethqos_driver = { .probe = qcom_ethqos_probe, - .remove_new = stmmac_pltfr_remove, .driver = { .name = "qcom-ethqos", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 3c6b55b60461..231152ee5a32 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -8,6 +8,7 @@ Author: Giuseppe Cavallaro <[email protected]> *******************************************************************************/ +#include <linux/device.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> #include <linux/module.h> @@ -629,6 +630,39 @@ error_pclk_get: return ret; } +static void devm_stmmac_remove_config_dt(void *data) +{ + struct plat_stmmacenet_data *plat = data; + + /* Platform data argument is unused */ + stmmac_remove_config_dt(NULL, plat); +} + +/** + * devm_stmmac_probe_config_dt + * @pdev: platform_device structure + * @mac: MAC address to use + * Description: Devres variant of stmmac_probe_config_dt(). Does not require + * the user to call stmmac_remove_config_dt() at driver detach. + */ +struct plat_stmmacenet_data * +devm_stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) +{ + struct plat_stmmacenet_data *plat; + int ret; + + plat = stmmac_probe_config_dt(pdev, mac); + if (IS_ERR(plat)) + return plat; + + ret = devm_add_action_or_reset(&pdev->dev, + devm_stmmac_remove_config_dt, plat); + if (ret) + return ERR_PTR(ret); + + return plat; +} + /** * stmmac_remove_config_dt - undo the effects of stmmac_probe_config_dt() * @pdev: platform_device structure @@ -651,12 +685,19 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) return ERR_PTR(-EINVAL); } +struct plat_stmmacenet_data * +devm_stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) +{ + return ERR_PTR(-EINVAL); +} + void stmmac_remove_config_dt(struct platform_device *pdev, struct plat_stmmacenet_data *plat) { } #endif /* CONFIG_OF */ EXPORT_SYMBOL_GPL(stmmac_probe_config_dt); +EXPORT_SYMBOL_GPL(devm_stmmac_probe_config_dt); EXPORT_SYMBOL_GPL(stmmac_remove_config_dt); int stmmac_get_platform_resources(struct platform_device *pdev, @@ -702,6 +743,114 @@ int stmmac_get_platform_resources(struct platform_device *pdev, EXPORT_SYMBOL_GPL(stmmac_get_platform_resources); /** + * stmmac_pltfr_init + * @pdev: pointer to the platform device + * @plat: driver data platform structure + * Description: Call the platform's init callback (if any) and propagate + * the return value. + */ +int stmmac_pltfr_init(struct platform_device *pdev, + struct plat_stmmacenet_data *plat) +{ + int ret = 0; + + if (plat->init) + ret = plat->init(pdev, plat->bsp_priv); + + return ret; +} +EXPORT_SYMBOL_GPL(stmmac_pltfr_init); + +/** + * stmmac_pltfr_exit + * @pdev: pointer to the platform device + * @plat: driver data platform structure + * Description: Call the platform's exit callback (if any). + */ +void stmmac_pltfr_exit(struct platform_device *pdev, + struct plat_stmmacenet_data *plat) +{ + if (plat->exit) + plat->exit(pdev, plat->bsp_priv); +} +EXPORT_SYMBOL_GPL(stmmac_pltfr_exit); + +/** + * stmmac_pltfr_probe + * @pdev: platform device pointer + * @plat: driver data platform structure + * @res: stmmac resources structure + * Description: This calls the platform's init() callback and probes the + * stmmac driver. + */ +int stmmac_pltfr_probe(struct platform_device *pdev, + struct plat_stmmacenet_data *plat, + struct stmmac_resources *res) +{ + int ret; + + ret = stmmac_pltfr_init(pdev, plat); + if (ret) + return ret; + + ret = stmmac_dvr_probe(&pdev->dev, plat, res); + if (ret) { + stmmac_pltfr_exit(pdev, plat); + return ret; + } + + return ret; +} +EXPORT_SYMBOL_GPL(stmmac_pltfr_probe); + +static void devm_stmmac_pltfr_remove(void *data) +{ + struct platform_device *pdev = data; + + stmmac_pltfr_remove_no_dt(pdev); +} + +/** + * devm_stmmac_pltfr_probe + * @pdev: pointer to the platform device + * @plat: driver data platform structure + * @res: stmmac resources + * Description: Devres variant of stmmac_pltfr_probe(). Allows users to skip + * calling stmmac_pltfr_remove() on driver detach. + */ +int devm_stmmac_pltfr_probe(struct platform_device *pdev, + struct plat_stmmacenet_data *plat, + struct stmmac_resources *res) +{ + int ret; + + ret = stmmac_pltfr_probe(pdev, plat, res); + if (ret) + return ret; + + return devm_add_action_or_reset(&pdev->dev, devm_stmmac_pltfr_remove, + pdev); +} +EXPORT_SYMBOL_GPL(devm_stmmac_pltfr_probe); + +/** + * stmmac_pltfr_remove_no_dt + * @pdev: pointer to the platform device + * Description: This undoes the effects of stmmac_pltfr_probe() by removing the + * driver and calling the platform's exit() callback. + */ +void stmmac_pltfr_remove_no_dt(struct platform_device *pdev) +{ + struct net_device *ndev = platform_get_drvdata(pdev); + struct stmmac_priv *priv = netdev_priv(ndev); + struct plat_stmmacenet_data *plat = priv->plat; + + stmmac_dvr_remove(&pdev->dev); + stmmac_pltfr_exit(pdev, plat); +} +EXPORT_SYMBOL_GPL(stmmac_pltfr_remove_no_dt); + +/** * stmmac_pltfr_remove * @pdev: platform device pointer * Description: this function calls the main to free the net resources @@ -713,11 +862,7 @@ void stmmac_pltfr_remove(struct platform_device *pdev) struct stmmac_priv *priv = netdev_priv(ndev); struct plat_stmmacenet_data *plat = priv->plat; - stmmac_dvr_remove(&pdev->dev); - - if (plat->exit) - plat->exit(pdev, plat->bsp_priv); - + stmmac_pltfr_remove_no_dt(pdev); stmmac_remove_config_dt(pdev, plat); } EXPORT_SYMBOL_GPL(stmmac_pltfr_remove); @@ -737,8 +882,7 @@ static int __maybe_unused stmmac_pltfr_suspend(struct device *dev) struct platform_device *pdev = to_platform_device(dev); ret = stmmac_suspend(dev); - if (priv->plat->exit) - priv->plat->exit(pdev, priv->plat->bsp_priv); + stmmac_pltfr_exit(pdev, priv->plat); return ret; } @@ -755,9 +899,11 @@ static int __maybe_unused stmmac_pltfr_resume(struct device *dev) struct net_device *ndev = dev_get_drvdata(dev); struct stmmac_priv *priv = netdev_priv(ndev); struct platform_device *pdev = to_platform_device(dev); + int ret; - if (priv->plat->init) - priv->plat->init(pdev, priv->plat->bsp_priv); + ret = stmmac_pltfr_init(pdev, priv->plat->bsp_priv); + if (ret) + return ret; return stmmac_resume(dev); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h index f7e457946681..c5565b2a70ac 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h @@ -13,12 +13,26 @@ struct plat_stmmacenet_data * stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac); +struct plat_stmmacenet_data * +devm_stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac); void stmmac_remove_config_dt(struct platform_device *pdev, struct plat_stmmacenet_data *plat); int stmmac_get_platform_resources(struct platform_device *pdev, struct stmmac_resources *stmmac_res); +int stmmac_pltfr_init(struct platform_device *pdev, + struct plat_stmmacenet_data *plat); +void stmmac_pltfr_exit(struct platform_device *pdev, + struct plat_stmmacenet_data *plat); + +int stmmac_pltfr_probe(struct platform_device *pdev, + struct plat_stmmacenet_data *plat, + struct stmmac_resources *res); +int devm_stmmac_pltfr_probe(struct platform_device *pdev, + struct plat_stmmacenet_data *plat, + struct stmmac_resources *res); +void stmmac_pltfr_remove_no_dt(struct platform_device *pdev); void stmmac_pltfr_remove(struct platform_device *pdev); extern const struct dev_pm_ops stmmac_pltfr_pm_ops; diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index 0134ebac227a..f9b10e84de06 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -2855,7 +2855,6 @@ static int ca8210_interrupt_init(struct spi_device *spi) ); if (ret) { dev_crit(&spi->dev, "request_irq %d failed\n", pdata->irq_id); - gpiod_unexport(gpio_to_desc(pdata->gpio_irq)); gpio_free(pdata->gpio_irq); } @@ -2945,7 +2944,8 @@ static void ca8210_hw_setup(struct ieee802154_hw *ca8210_hw) ca8210_hw->phy->flags = WPAN_PHY_FLAG_TXPOWER | WPAN_PHY_FLAG_CCA_ED_LEVEL | - WPAN_PHY_FLAG_CCA_MODE; + WPAN_PHY_FLAG_CCA_MODE | + WPAN_PHY_FLAG_DATAGRAMS_ONLY; } /** diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 9f0a9c575bd7..59cae0d808aa 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -664,17 +664,6 @@ static int bcm54616s_read_status(struct phy_device *phydev) return err; } -static int brcm_phy_setbits(struct phy_device *phydev, int reg, int set) -{ - int val; - - val = phy_read(phydev, reg); - if (val < 0) - return val; - - return phy_write(phydev, reg, val | set); -} - static int brcm_fet_config_init(struct phy_device *phydev) { int reg, err, err2, brcmtest; @@ -745,15 +734,15 @@ static int brcm_fet_config_init(struct phy_device *phydev) goto done; /* Enable auto MDIX */ - err = brcm_phy_setbits(phydev, MII_BRCM_FET_SHDW_MISCCTRL, - MII_BRCM_FET_SHDW_MC_FAME); + err = phy_set_bits(phydev, MII_BRCM_FET_SHDW_MISCCTRL, + MII_BRCM_FET_SHDW_MC_FAME); if (err < 0) goto done; if (phydev->dev_flags & PHY_BRCM_AUTO_PWRDWN_ENABLE) { /* Enable auto power down */ - err = brcm_phy_setbits(phydev, MII_BRCM_FET_SHDW_AUXSTAT2, - MII_BRCM_FET_SHDW_AS2_APDE); + err = phy_set_bits(phydev, MII_BRCM_FET_SHDW_AUXSTAT2, + MII_BRCM_FET_SHDW_AS2_APDE); } done: diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c index a473d46ad668..9fd8e6f07a03 100644 --- a/drivers/s390/net/lcs.c +++ b/drivers/s390/net/lcs.c @@ -36,7 +36,7 @@ #include "lcs.h" -#if !IS_ENABLED(CONFIG_ETHERNET) && !IS_ENABLED(CONFIG_FDDI) +#if !defined(CONFIG_ETHERNET) && !defined(CONFIG_FDDI) #error Cannot compile lcs.c without some net devices switched on. #endif @@ -1601,14 +1601,14 @@ lcs_startlan_auto(struct lcs_card *card) int rc; LCS_DBF_TEXT(2, trace, "strtauto"); -#if IS_ENABLED(CONFIG_ETHERNET) +#ifdef CONFIG_ETHERNET card->lan_type = LCS_FRAME_TYPE_ENET; rc = lcs_send_startlan(card, LCS_INITIATOR_TCPIP); if (rc == 0) return 0; #endif -#if IS_ENABLED(CONFIG_FDDI) +#ifdef CONFIG_FDDI card->lan_type = LCS_FRAME_TYPE_FDDI; rc = lcs_send_startlan(card, LCS_INITIATOR_TCPIP); if (rc == 0) @@ -2140,13 +2140,13 @@ lcs_new_device(struct ccwgroup_device *ccwgdev) goto netdev_out; } switch (card->lan_type) { -#if IS_ENABLED(CONFIG_ETHERNET) +#ifdef CONFIG_ETHERNET case LCS_FRAME_TYPE_ENET: card->lan_type_trans = eth_type_trans; dev = alloc_etherdev(0); break; #endif -#if IS_ENABLED(CONFIG_FDDI) +#ifdef CONFIG_FDDI case LCS_FRAME_TYPE_FDDI: card->lan_type_trans = fddi_type_trans; dev = alloc_fddidev(0); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 5b11a3b0fec0..f70f9ac884d2 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -313,11 +313,6 @@ struct bpf_idx_pair { u32 idx; }; -struct bpf_id_pair { - u32 old; - u32 cur; -}; - #define MAX_CALL_FRAMES 8 /* Maximum number of register states that can exist at once */ #define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES) @@ -557,6 +552,21 @@ struct backtrack_state { u64 stack_masks[MAX_CALL_FRAMES]; }; +struct bpf_id_pair { + u32 old; + u32 cur; +}; + +struct bpf_idmap { + u32 tmp_id_gen; + struct bpf_id_pair map[BPF_ID_MAP_SIZE]; +}; + +struct bpf_idset { + u32 count; + u32 ids[BPF_ID_MAP_SIZE]; +}; + /* single container for all structs * one verifier_env per bpf_check() call */ @@ -588,7 +598,10 @@ struct bpf_verifier_env { const struct bpf_line_info *prev_linfo; struct bpf_verifier_log log; struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1]; - struct bpf_id_pair idmap_scratch[BPF_ID_MAP_SIZE]; + union { + struct bpf_idmap idmap_scratch; + struct bpf_idset idset_scratch; + }; struct { int *insn_state; int *insn_stack; diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h index 2ae3c8e1d83c..736ded4905e0 100644 --- a/include/linux/bpfilter.h +++ b/include/linux/bpfilter.h @@ -11,7 +11,6 @@ int bpfilter_ip_set_sockopt(struct sock *sk, int optname, sockptr_t optval, unsigned int optlen); int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen); -void bpfilter_umh_cleanup(struct umd_info *info); struct bpfilter_umh_ops { struct umd_info info; diff --git a/include/linux/filter.h b/include/linux/filter.h index bbce89937fde..f69114083ec7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -874,7 +874,6 @@ void bpf_prog_free(struct bpf_prog *fp); bool bpf_opcode_in_insntable(u8 code); -void bpf_prog_free_linfo(struct bpf_prog *prog); void bpf_prog_fill_jited_linfo(struct bpf_prog *prog, const u32 *insn_to_jit_off); int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index acf706d49c2b..b828c7a75be2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -5073,6 +5073,15 @@ static inline bool netif_is_l3_slave(const struct net_device *dev) return dev->priv_flags & IFF_L3MDEV_SLAVE; } +static inline int dev_sdif(const struct net_device *dev) +{ +#ifdef CONFIG_NET_L3_MASTER_DEV + if (netif_is_l3_slave(dev)) + return dev->ifindex; +#endif + return 0; +} + static inline bool netif_is_bridge_master(const struct net_device *dev) { return dev->priv_flags & IFF_EBRIDGE; diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index 0c2778a836db..e00057984489 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -178,12 +178,15 @@ wpan_phy_cca_cmp(const struct wpan_phy_cca *a, const struct wpan_phy_cca *b) * setting. * @WPAN_PHY_FLAG_STATE_QUEUE_STOPPED: Indicates that the transmit queue was * temporarily stopped. + * @WPAN_PHY_FLAG_DATAGRAMS_ONLY: Indicates that transceiver is only able to + * send/receive datagrams. */ enum wpan_phy_flags { WPAN_PHY_FLAG_TXPOWER = BIT(1), WPAN_PHY_FLAG_CCA_ED_LEVEL = BIT(2), WPAN_PHY_FLAG_CCA_MODE = BIT(3), WPAN_PHY_FLAG_STATE_QUEUE_STOPPED = BIT(4), + WPAN_PHY_FLAG_DATAGRAMS_ONLY = BIT(5), }; struct wpan_phy { diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index da8a3e648c7a..063313df447d 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -74,6 +74,10 @@ struct ieee802154_beacon_hdr { #endif } __packed; +struct ieee802154_mac_cmd_pl { + u8 cmd_id; +} __packed; + struct ieee802154_sechdr { #if defined(__LITTLE_ENDIAN_BITFIELD) u8 level:3, @@ -149,6 +153,16 @@ struct ieee802154_beacon_frame { struct ieee802154_beacon_hdr mac_pl; }; +struct ieee802154_mac_cmd_frame { + struct ieee802154_hdr mhr; + struct ieee802154_mac_cmd_pl mac_pl; +}; + +struct ieee802154_beacon_req_frame { + struct ieee802154_hdr mhr; + struct ieee802154_mac_cmd_pl mac_pl; +}; + /* pushes hdr onto the skb. fields of hdr->fc that can be calculated from * the contents of hdr will be, and the actual value of those bits in * hdr->fc will be ignored. this includes the INTRA_PAN bit and the frame @@ -174,9 +188,13 @@ int ieee802154_hdr_peek_addrs(const struct sk_buff *skb, */ int ieee802154_hdr_peek(const struct sk_buff *skb, struct ieee802154_hdr *hdr); -/* pushes a beacon frame into an skb */ +/* pushes/pulls various frame types into/from an skb */ int ieee802154_beacon_push(struct sk_buff *skb, struct ieee802154_beacon_frame *beacon); +int ieee802154_mac_cmd_push(struct sk_buff *skb, void *frame, + const void *pl, unsigned int pl_len); +int ieee802154_mac_cmd_pl_pull(struct sk_buff *skb, + struct ieee802154_mac_cmd_pl *mac_pl); int ieee802154_max_payload(const struct ieee802154_hdr *hdr); diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 9c0d860609ba..c243f906ebed 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -255,10 +255,6 @@ static inline void xsk_buff_free(struct xdp_buff *xdp) { } -static inline void xsk_buff_discard(struct xdp_buff *xdp) -{ -} - static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size) { } diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6961a7b70028..60a9d59beeab 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3178,6 +3178,10 @@ union bpf_attr { * **BPF_FIB_LOOKUP_DIRECT** * Do a direct table lookup vs full lookup using FIB * rules. + * **BPF_FIB_LOOKUP_TBID** + * Used with BPF_FIB_LOOKUP_DIRECT. + * Use the routing table ID present in *params*->tbid + * for the fib lookup. * **BPF_FIB_LOOKUP_OUTPUT** * Perform lookup from an egress perspective (default is * ingress). @@ -6832,6 +6836,7 @@ enum { BPF_FIB_LOOKUP_DIRECT = (1U << 0), BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), + BPF_FIB_LOOKUP_TBID = (1U << 3), }; enum { @@ -6892,9 +6897,19 @@ struct bpf_fib_lookup { __u32 ipv6_dst[4]; /* in6_addr; network order */ }; - /* output */ - __be16 h_vlan_proto; - __be16 h_vlan_TCI; + union { + struct { + /* output */ + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + }; + /* input: when accompanied with the + * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a + * specific routing table to use for the fib lookup. + */ + __u32 tbid; + }; + __u8 smac[6]; /* ETH_ALEN */ __u8 dmac[6]; /* ETH_ALEN */ }; diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c index 540331b610a9..addf3dd57b59 100644 --- a/kernel/bpf/bloom_filter.c +++ b/kernel/bpf/bloom_filter.c @@ -86,9 +86,6 @@ static struct bpf_map *bloom_map_alloc(union bpf_attr *attr) int numa_node = bpf_map_attr_numa_node(attr); struct bpf_bloom_filter *bloom; - if (!bpf_capable()) - return ERR_PTR(-EPERM); - if (attr->key_size != 0 || attr->value_size == 0 || attr->max_entries == 0 || attr->map_flags & ~BLOOM_CREATE_FLAG_MASK || diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 47d9948d768f..b5149cfce7d4 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -723,9 +723,6 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr) !attr->btf_key_type_id || !attr->btf_value_type_id) return -EINVAL; - if (!bpf_capable()) - return -EPERM; - if (attr->value_size > BPF_LOCAL_STORAGE_MAX_VALUE_SIZE) return -E2BIG; diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index d3f0a4825fa6..116a0ce378ec 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -655,9 +655,6 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) const struct btf_type *t, *vt; struct bpf_map *map; - if (!bpf_capable()) - return ERR_PTR(-EPERM); - st_ops = bpf_struct_ops_find_value(attr->btf_vmlinux_value_type_id); if (!st_ops) return ERR_PTR(-ENOTSUPP); diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index bbcae434fda5..29fe21099298 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -492,25 +492,26 @@ static bool btf_type_is_fwd(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_FWD; } -static bool btf_type_nosize(const struct btf_type *t) +static bool btf_type_is_datasec(const struct btf_type *t) { - return btf_type_is_void(t) || btf_type_is_fwd(t) || - btf_type_is_func(t) || btf_type_is_func_proto(t); + return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC; } -static bool btf_type_nosize_or_null(const struct btf_type *t) +static bool btf_type_is_decl_tag(const struct btf_type *t) { - return !t || btf_type_nosize(t); + return BTF_INFO_KIND(t->info) == BTF_KIND_DECL_TAG; } -static bool btf_type_is_datasec(const struct btf_type *t) +static bool btf_type_nosize(const struct btf_type *t) { - return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC; + return btf_type_is_void(t) || btf_type_is_fwd(t) || + btf_type_is_func(t) || btf_type_is_func_proto(t) || + btf_type_is_decl_tag(t); } -static bool btf_type_is_decl_tag(const struct btf_type *t) +static bool btf_type_nosize_or_null(const struct btf_type *t) { - return BTF_INFO_KIND(t->info) == BTF_KIND_DECL_TAG; + return !t || btf_type_nosize(t); } static bool btf_type_is_decl_tag_target(const struct btf_type *t) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 7421487422d4..dc85240a0134 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2064,14 +2064,16 @@ EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) }; #undef PROG_NAME_LIST #define PROG_NAME_LIST(stack_size) PROG_NAME_ARGS(stack_size), -static u64 (*interpreters_args[])(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, - const struct bpf_insn *insn) = { +static __maybe_unused +u64 (*interpreters_args[])(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, + const struct bpf_insn *insn) = { EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192) EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384) EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) }; #undef PROG_NAME_LIST +#ifdef CONFIG_BPF_SYSCALL void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth) { stack_depth = max_t(u32, stack_depth, 1); @@ -2080,7 +2082,7 @@ void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth) __bpf_call_base_args; insn->code = BPF_JMP | BPF_CALL_ARGS; } - +#endif #else static unsigned int __bpf_prog_ret0_warn(const void *ctx, const struct bpf_insn *insn) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 8ec18faa74ac..8a33e8747a0e 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -28,7 +28,6 @@ #include <linux/sched.h> #include <linux/workqueue.h> #include <linux/kthread.h> -#include <linux/capability.h> #include <trace/events/xdp.h> #include <linux/btf_ids.h> @@ -89,9 +88,6 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) u32 value_size = attr->value_size; struct bpf_cpu_map *cmap; - if (!bpf_capable()) - return ERR_PTR(-EPERM); - /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || (value_size != offsetofend(struct bpf_cpumap_val, qsize) && diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c index 7efdf5d770ca..938a60ff4295 100644 --- a/kernel/bpf/cpumask.c +++ b/kernel/bpf/cpumask.c @@ -132,6 +132,21 @@ __bpf_kfunc u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) } /** + * bpf_cpumask_first_and() - Return the index of the first nonzero bit from the + * AND of two cpumasks. + * @src1: The first cpumask. + * @src2: The second cpumask. + * + * Find the index of the first nonzero bit of the AND of two cpumasks. + * struct bpf_cpumask pointers may be safely passed to @src1 and @src2. + */ +__bpf_kfunc u32 bpf_cpumask_first_and(const struct cpumask *src1, + const struct cpumask *src2) +{ + return cpumask_first_and(src1, src2); +} + +/** * bpf_cpumask_set_cpu() - Set a bit for a CPU in a BPF cpumask. * @cpu: The CPU to be set in the cpumask. * @cpumask: The BPF cpumask in which a bit is being set. @@ -367,7 +382,7 @@ __bpf_kfunc void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask } /** - * bpf_cpumask_any() - Return a random set CPU from a cpumask. + * bpf_cpumask_any_distribute() - Return a random set CPU from a cpumask. * @cpumask: The cpumask being queried. * * Return: @@ -376,26 +391,28 @@ __bpf_kfunc void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask * * A struct bpf_cpumask pointer may be safely passed to @src. */ -__bpf_kfunc u32 bpf_cpumask_any(const struct cpumask *cpumask) +__bpf_kfunc u32 bpf_cpumask_any_distribute(const struct cpumask *cpumask) { - return cpumask_any(cpumask); + return cpumask_any_distribute(cpumask); } /** - * bpf_cpumask_any_and() - Return a random set CPU from the AND of two - * cpumasks. + * bpf_cpumask_any_and_distribute() - Return a random set CPU from the AND of + * two cpumasks. * @src1: The first cpumask. * @src2: The second cpumask. * * Return: - * * A random set bit within [0, num_cpus) if at least one bit is set. + * * A random set bit within [0, num_cpus) from the AND of two cpumasks, if at + * least one bit is set. * * >= num_cpus if no bit is set. * * struct bpf_cpumask pointers may be safely passed to @src1 and @src2. */ -__bpf_kfunc u32 bpf_cpumask_any_and(const struct cpumask *src1, const struct cpumask *src2) +__bpf_kfunc u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, + const struct cpumask *src2) { - return cpumask_any_and(src1, src2); + return cpumask_any_and_distribute(src1, src2); } __diag_pop(); @@ -406,6 +423,7 @@ BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE) BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_cpumask_first, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_first_and, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_set_cpu, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_clear_cpu, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_test_cpu, KF_RCU) @@ -422,8 +440,8 @@ BTF_ID_FLAGS(func, bpf_cpumask_subset, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_empty, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_full, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_copy, KF_RCU) -BTF_ID_FLAGS(func, bpf_cpumask_any, KF_RCU) -BTF_ID_FLAGS(func, bpf_cpumask_any_and, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_any_distribute, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_any_and_distribute, KF_RCU) BTF_SET8_END(cpumask_kfunc_btf_ids) static const struct btf_kfunc_id_set cpumask_kfunc_set = { diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 802692fa3905..49cc0b5671c6 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -160,9 +160,6 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) struct bpf_dtab *dtab; int err; - if (!capable(CAP_NET_ADMIN)) - return ERR_PTR(-EPERM); - dtab = bpf_map_area_alloc(sizeof(*dtab), NUMA_NO_NODE); if (!dtab) return ERR_PTR(-ENOMEM); diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 9901efee4339..56d3da7d0bc6 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -422,12 +422,6 @@ static int htab_map_alloc_check(union bpf_attr *attr) BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) != offsetof(struct htab_elem, hash_node.pprev)); - if (lru && !bpf_capable()) - /* LRU implementation is much complicated than other - * maps. Hence, limit to CAP_BPF. - */ - return -EPERM; - if (zero_seed && !capable(CAP_SYS_ADMIN)) /* Guard against local DoS, and discourage production use. */ return -EPERM; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 4ef4c4f8a355..9e80efa59a5d 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1933,8 +1933,12 @@ __bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta * bpf_refcount type so that it is emitted in vmlinux BTF */ ref = (struct bpf_refcount *)(p__refcounted_kptr + meta->record->refcount_off); + if (!refcount_inc_not_zero((refcount_t *)ref)) + return NULL; - refcount_inc((refcount_t *)ref); + /* Verifier strips KF_RET_NULL if input is owned ref, see is_kfunc_ret_null + * in verifier.c + */ return (void *)p__refcounted_kptr; } @@ -1950,7 +1954,7 @@ static int __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head INIT_LIST_HEAD(h); if (!list_empty(n)) { /* Only called from BPF prog, no need to migrate_disable */ - __bpf_obj_drop_impl(n - off, rec); + __bpf_obj_drop_impl((void *)n - off, rec); return -EINVAL; } @@ -2032,7 +2036,7 @@ static int __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node *node, if (!RB_EMPTY_NODE(n)) { /* Only called from BPF prog, no need to migrate_disable */ - __bpf_obj_drop_impl(n - off, rec); + __bpf_obj_drop_impl((void *)n - off, rec); return -EINVAL; } @@ -2406,7 +2410,7 @@ BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE) #endif BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE) -BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE) +BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_list_push_front_impl) BTF_ID_FLAGS(func, bpf_list_push_back_impl) BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index e0d3ddf2037a..17c7e7782a1f 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -544,9 +544,6 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr) { struct lpm_trie *trie; - if (!bpf_capable()) - return ERR_PTR(-EPERM); - /* check sanity of attributes */ if (attr->max_entries == 0 || !(attr->map_flags & BPF_F_NO_PREALLOC) || diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index 410637c225fb..0668bcd7c926 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -211,9 +211,9 @@ static void alloc_bulk(struct bpf_mem_cache *c, int cnt, int node) mem_cgroup_put(memcg); } -static void free_one(struct bpf_mem_cache *c, void *obj) +static void free_one(void *obj, bool percpu) { - if (c->percpu_size) { + if (percpu) { free_percpu(((void **)obj)[1]); kfree(obj); return; @@ -222,14 +222,19 @@ static void free_one(struct bpf_mem_cache *c, void *obj) kfree(obj); } -static void __free_rcu(struct rcu_head *head) +static void free_all(struct llist_node *llnode, bool percpu) { - struct bpf_mem_cache *c = container_of(head, struct bpf_mem_cache, rcu); - struct llist_node *llnode = llist_del_all(&c->waiting_for_gp); struct llist_node *pos, *t; llist_for_each_safe(pos, t, llnode) - free_one(c, pos); + free_one(pos, percpu); +} + +static void __free_rcu(struct rcu_head *head) +{ + struct bpf_mem_cache *c = container_of(head, struct bpf_mem_cache, rcu); + + free_all(llist_del_all(&c->waiting_for_gp), !!c->percpu_size); atomic_set(&c->call_rcu_in_progress, 0); } @@ -432,7 +437,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) static void drain_mem_cache(struct bpf_mem_cache *c) { - struct llist_node *llnode, *t; + bool percpu = !!c->percpu_size; /* No progs are using this bpf_mem_cache, but htab_map_free() called * bpf_mem_cache_free() for all remaining elements and they can be in @@ -441,14 +446,10 @@ static void drain_mem_cache(struct bpf_mem_cache *c) * Except for waiting_for_gp list, there are no concurrent operations * on these lists, so it is safe to use __llist_del_all(). */ - llist_for_each_safe(llnode, t, __llist_del_all(&c->free_by_rcu)) - free_one(c, llnode); - llist_for_each_safe(llnode, t, llist_del_all(&c->waiting_for_gp)) - free_one(c, llnode); - llist_for_each_safe(llnode, t, __llist_del_all(&c->free_llist)) - free_one(c, llnode); - llist_for_each_safe(llnode, t, __llist_del_all(&c->free_llist_extra)) - free_one(c, llnode); + free_all(__llist_del_all(&c->free_by_rcu), percpu); + free_all(llist_del_all(&c->waiting_for_gp), percpu); + free_all(__llist_del_all(&c->free_llist), percpu); + free_all(__llist_del_all(&c->free_llist_extra), percpu); } static void free_mem_alloc_no_barrier(struct bpf_mem_alloc *ma) diff --git a/kernel/bpf/preload/bpf_preload_kern.c b/kernel/bpf/preload/bpf_preload_kern.c index b56f9f3314fd..0c63bc2cd895 100644 --- a/kernel/bpf/preload/bpf_preload_kern.c +++ b/kernel/bpf/preload/bpf_preload_kern.c @@ -23,9 +23,9 @@ static void free_links_and_skel(void) static int preload(struct bpf_preload_info *obj) { - strlcpy(obj[0].link_name, "maps.debug", sizeof(obj[0].link_name)); + strscpy(obj[0].link_name, "maps.debug", sizeof(obj[0].link_name)); obj[0].link = maps_link; - strlcpy(obj[1].link_name, "progs.debug", sizeof(obj[1].link_name)); + strscpy(obj[1].link_name, "progs.debug", sizeof(obj[1].link_name)); obj[1].link = progs_link; return 0; } diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c index 601609164ef3..8d2ddcb7566b 100644 --- a/kernel/bpf/queue_stack_maps.c +++ b/kernel/bpf/queue_stack_maps.c @@ -7,7 +7,6 @@ #include <linux/bpf.h> #include <linux/list.h> #include <linux/slab.h> -#include <linux/capability.h> #include <linux/btf_ids.h> #include "percpu_freelist.h" @@ -46,9 +45,6 @@ static bool queue_stack_map_is_full(struct bpf_queue_stack *qs) /* Called from syscall */ static int queue_stack_map_alloc_check(union bpf_attr *attr) { - if (!bpf_capable()) - return -EPERM; - /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 0 || attr->value_size == 0 || diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c index cbf2d8d784b8..4b4f9670f1a9 100644 --- a/kernel/bpf/reuseport_array.c +++ b/kernel/bpf/reuseport_array.c @@ -151,9 +151,6 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr) int numa_node = bpf_map_attr_numa_node(attr); struct reuseport_array *array; - if (!bpf_capable()) - return ERR_PTR(-EPERM); - /* allocate all map elements and zero-initialize them */ array = bpf_map_area_alloc(struct_size(array, ptrs, attr->max_entries), numa_node); if (!array) diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index b25fce425b2c..458bb80b14d5 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -74,9 +74,6 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) u64 cost, n_buckets; int err; - if (!bpf_capable()) - return ERR_PTR(-EPERM); - if (attr->map_flags & ~STACK_CREATE_FLAG_MASK) return ERR_PTR(-EINVAL); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 4497b193dd20..a2aef900519c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -109,37 +109,6 @@ const struct bpf_map_ops bpf_map_offload_ops = { .map_mem_usage = bpf_map_offload_map_mem_usage, }; -static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) -{ - const struct bpf_map_ops *ops; - u32 type = attr->map_type; - struct bpf_map *map; - int err; - - if (type >= ARRAY_SIZE(bpf_map_types)) - return ERR_PTR(-EINVAL); - type = array_index_nospec(type, ARRAY_SIZE(bpf_map_types)); - ops = bpf_map_types[type]; - if (!ops) - return ERR_PTR(-EINVAL); - - if (ops->map_alloc_check) { - err = ops->map_alloc_check(attr); - if (err) - return ERR_PTR(err); - } - if (attr->map_ifindex) - ops = &bpf_map_offload_ops; - if (!ops->map_mem_usage) - return ERR_PTR(-EINVAL); - map = ops->map_alloc(attr); - if (IS_ERR(map)) - return map; - map->ops = ops; - map->map_type = type; - return map; -} - static void bpf_map_write_active_inc(struct bpf_map *map) { atomic64_inc(&map->writecnt); @@ -1127,7 +1096,9 @@ free_map_tab: /* called via syscall */ static int map_create(union bpf_attr *attr) { + const struct bpf_map_ops *ops; int numa_node = bpf_map_attr_numa_node(attr); + u32 map_type = attr->map_type; struct bpf_map *map; int f_flags; int err; @@ -1158,9 +1129,85 @@ static int map_create(union bpf_attr *attr) return -EINVAL; /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ - map = find_and_alloc_map(attr); + map_type = attr->map_type; + if (map_type >= ARRAY_SIZE(bpf_map_types)) + return -EINVAL; + map_type = array_index_nospec(map_type, ARRAY_SIZE(bpf_map_types)); + ops = bpf_map_types[map_type]; + if (!ops) + return -EINVAL; + + if (ops->map_alloc_check) { + err = ops->map_alloc_check(attr); + if (err) + return err; + } + if (attr->map_ifindex) + ops = &bpf_map_offload_ops; + if (!ops->map_mem_usage) + return -EINVAL; + + /* Intent here is for unprivileged_bpf_disabled to block BPF map + * creation for unprivileged users; other actions depend + * on fd availability and access to bpffs, so are dependent on + * object creation success. Even with unprivileged BPF disabled, + * capability checks are still carried out. + */ + if (sysctl_unprivileged_bpf_disabled && !bpf_capable()) + return -EPERM; + + /* check privileged map type permissions */ + switch (map_type) { + case BPF_MAP_TYPE_ARRAY: + case BPF_MAP_TYPE_PERCPU_ARRAY: + case BPF_MAP_TYPE_PROG_ARRAY: + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: + case BPF_MAP_TYPE_CGROUP_ARRAY: + case BPF_MAP_TYPE_ARRAY_OF_MAPS: + case BPF_MAP_TYPE_HASH: + case BPF_MAP_TYPE_PERCPU_HASH: + case BPF_MAP_TYPE_HASH_OF_MAPS: + case BPF_MAP_TYPE_RINGBUF: + case BPF_MAP_TYPE_USER_RINGBUF: + case BPF_MAP_TYPE_CGROUP_STORAGE: + case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: + /* unprivileged */ + break; + case BPF_MAP_TYPE_SK_STORAGE: + case BPF_MAP_TYPE_INODE_STORAGE: + case BPF_MAP_TYPE_TASK_STORAGE: + case BPF_MAP_TYPE_CGRP_STORAGE: + case BPF_MAP_TYPE_BLOOM_FILTER: + case BPF_MAP_TYPE_LPM_TRIE: + case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: + case BPF_MAP_TYPE_STACK_TRACE: + case BPF_MAP_TYPE_QUEUE: + case BPF_MAP_TYPE_STACK: + case BPF_MAP_TYPE_LRU_HASH: + case BPF_MAP_TYPE_LRU_PERCPU_HASH: + case BPF_MAP_TYPE_STRUCT_OPS: + case BPF_MAP_TYPE_CPUMAP: + if (!bpf_capable()) + return -EPERM; + break; + case BPF_MAP_TYPE_SOCKMAP: + case BPF_MAP_TYPE_SOCKHASH: + case BPF_MAP_TYPE_DEVMAP: + case BPF_MAP_TYPE_DEVMAP_HASH: + case BPF_MAP_TYPE_XSKMAP: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + break; + default: + WARN(1, "unsupported map type %d", map_type); + return -EPERM; + } + + map = ops->map_alloc(attr); if (IS_ERR(map)) return PTR_ERR(map); + map->ops = ops; + map->map_type = map_type; err = bpf_obj_name_cpy(map->name, attr->map_name, sizeof(attr->map_name)); @@ -2507,7 +2554,6 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) struct btf *attach_btf = NULL; int err; char license[128]; - bool is_gpl; if (CHECK_ATTR(BPF_PROG_LOAD)) return -EINVAL; @@ -2526,15 +2572,15 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) !bpf_capable()) return -EPERM; - /* copy eBPF program license from user space */ - if (strncpy_from_bpfptr(license, - make_bpfptr(attr->license, uattr.is_kernel), - sizeof(license) - 1) < 0) - return -EFAULT; - license[sizeof(license) - 1] = 0; - - /* eBPF programs must be GPL compatible to use GPL-ed functions */ - is_gpl = license_is_gpl_compatible(license); + /* Intent here is for unprivileged_bpf_disabled to block BPF program + * creation for unprivileged users; other actions depend + * on fd availability and access to bpffs, so are dependent on + * object creation success. Even with unprivileged BPF disabled, + * capability checks are still carried out for these + * and other operations. + */ + if (sysctl_unprivileged_bpf_disabled && !bpf_capable()) + return -EPERM; if (attr->insn_cnt == 0 || attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS)) @@ -2618,12 +2664,20 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) make_bpfptr(attr->insns, uattr.is_kernel), bpf_prog_insn_size(prog)) != 0) goto free_prog_sec; + /* copy eBPF program license from user space */ + if (strncpy_from_bpfptr(license, + make_bpfptr(attr->license, uattr.is_kernel), + sizeof(license) - 1) < 0) + goto free_prog_sec; + license[sizeof(license) - 1] = 0; + + /* eBPF programs must be GPL compatible to use GPL-ed functions */ + prog->gpl_compatible = license_is_gpl_compatible(license) ? 1 : 0; prog->orig_prog = NULL; prog->jited = 0; atomic64_set(&prog->aux->refcnt, 1); - prog->gpl_compatible = is_gpl ? 1 : 0; if (bpf_prog_is_dev_bound(prog->aux)) { err = bpf_prog_dev_bound_init(prog, attr); @@ -2797,28 +2851,31 @@ static void bpf_link_put_deferred(struct work_struct *work) bpf_link_free(link); } -/* bpf_link_put can be called from atomic context, but ensures that resources - * are freed from process context +/* bpf_link_put might be called from atomic context. It needs to be called + * from sleepable context in order to acquire sleeping locks during the process. */ void bpf_link_put(struct bpf_link *link) { if (!atomic64_dec_and_test(&link->refcnt)) return; - if (in_atomic()) { - INIT_WORK(&link->work, bpf_link_put_deferred); - schedule_work(&link->work); - } else { - bpf_link_free(link); - } + INIT_WORK(&link->work, bpf_link_put_deferred); + schedule_work(&link->work); } EXPORT_SYMBOL(bpf_link_put); +static void bpf_link_put_direct(struct bpf_link *link) +{ + if (!atomic64_dec_and_test(&link->refcnt)) + return; + bpf_link_free(link); +} + static int bpf_link_release(struct inode *inode, struct file *filp) { struct bpf_link *link = filp->private_data; - bpf_link_put(link); + bpf_link_put_direct(link); return 0; } @@ -4801,7 +4858,7 @@ out_put_progs: if (ret) bpf_prog_put(new_prog); out_put_link: - bpf_link_put(link); + bpf_link_put_direct(link); return ret; } @@ -4824,7 +4881,7 @@ static int link_detach(union bpf_attr *attr) else ret = -EOPNOTSUPP; - bpf_link_put(link); + bpf_link_put_direct(link); return ret; } @@ -4894,7 +4951,7 @@ static int bpf_link_get_fd_by_id(const union bpf_attr *attr) fd = bpf_link_new_fd(link); if (fd < 0) - bpf_link_put(link); + bpf_link_put_direct(link); return fd; } @@ -4971,7 +5028,7 @@ static int bpf_iter_create(union bpf_attr *attr) return PTR_ERR(link); err = bpf_iter_new_fd(link); - bpf_link_put(link); + bpf_link_put_direct(link); return err; } @@ -5041,23 +5098,8 @@ out_prog_put: static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size) { union bpf_attr attr; - bool capable; int err; - capable = bpf_capable() || !sysctl_unprivileged_bpf_disabled; - - /* Intent here is for unprivileged_bpf_disabled to block key object - * creation commands for unprivileged users; other actions depend - * of fd availability and access to bpffs, so are dependent on - * object creation success. Capabilities are later verified for - * operations such as load and map create, so even with unprivileged - * BPF disabled, capability checks are still carried out for these - * and other operations. - */ - if (!capable && - (cmd == BPF_MAP_CREATE || cmd == BPF_PROG_LOAD)) - return -EPERM; - err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size); if (err) return err; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b54193de762b..11e54dd8b6dd 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -197,6 +197,7 @@ static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg); static void specialize_kfunc(struct bpf_verifier_env *env, u32 func_id, u16 offset, unsigned long *addr); +static bool is_trusted_reg(const struct bpf_reg_state *reg); static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux) { @@ -298,16 +299,19 @@ struct bpf_kfunc_call_arg_meta { bool found; } arg_constant; - /* arg_btf and arg_btf_id are used by kfunc-specific handling, + /* arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling, * generally to pass info about user-defined local kptr types to later * verification logic * bpf_obj_drop * Record the local kptr type to be drop'd * bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type) - * Record the local kptr type to be refcount_incr'd + * Record the local kptr type to be refcount_incr'd and use + * arg_owning_ref to determine whether refcount_acquire should be + * fallible */ struct btf *arg_btf; u32 arg_btf_id; + bool arg_owning_ref; struct { struct btf_field *field; @@ -439,8 +443,11 @@ static bool type_may_be_null(u32 type) return type & PTR_MAYBE_NULL; } -static bool reg_type_not_null(enum bpf_reg_type type) +static bool reg_not_null(const struct bpf_reg_state *reg) { + enum bpf_reg_type type; + + type = reg->type; if (type_may_be_null(type)) return false; @@ -450,6 +457,7 @@ static bool reg_type_not_null(enum bpf_reg_type type) type == PTR_TO_MAP_VALUE || type == PTR_TO_MAP_KEY || type == PTR_TO_SOCK_COMMON || + (type == PTR_TO_BTF_ID && is_trusted_reg(reg)) || type == PTR_TO_MEM; } @@ -3771,6 +3779,96 @@ static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_ } } +static bool idset_contains(struct bpf_idset *s, u32 id) +{ + u32 i; + + for (i = 0; i < s->count; ++i) + if (s->ids[i] == id) + return true; + + return false; +} + +static int idset_push(struct bpf_idset *s, u32 id) +{ + if (WARN_ON_ONCE(s->count >= ARRAY_SIZE(s->ids))) + return -EFAULT; + s->ids[s->count++] = id; + return 0; +} + +static void idset_reset(struct bpf_idset *s) +{ + s->count = 0; +} + +/* Collect a set of IDs for all registers currently marked as precise in env->bt. + * Mark all registers with these IDs as precise. + */ +static int mark_precise_scalar_ids(struct bpf_verifier_env *env, struct bpf_verifier_state *st) +{ + struct bpf_idset *precise_ids = &env->idset_scratch; + struct backtrack_state *bt = &env->bt; + struct bpf_func_state *func; + struct bpf_reg_state *reg; + DECLARE_BITMAP(mask, 64); + int i, fr; + + idset_reset(precise_ids); + + for (fr = bt->frame; fr >= 0; fr--) { + func = st->frame[fr]; + + bitmap_from_u64(mask, bt_frame_reg_mask(bt, fr)); + for_each_set_bit(i, mask, 32) { + reg = &func->regs[i]; + if (!reg->id || reg->type != SCALAR_VALUE) + continue; + if (idset_push(precise_ids, reg->id)) + return -EFAULT; + } + + bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr)); + for_each_set_bit(i, mask, 64) { + if (i >= func->allocated_stack / BPF_REG_SIZE) + break; + if (!is_spilled_scalar_reg(&func->stack[i])) + continue; + reg = &func->stack[i].spilled_ptr; + if (!reg->id) + continue; + if (idset_push(precise_ids, reg->id)) + return -EFAULT; + } + } + + for (fr = 0; fr <= st->curframe; ++fr) { + func = st->frame[fr]; + + for (i = BPF_REG_0; i < BPF_REG_10; ++i) { + reg = &func->regs[i]; + if (!reg->id) + continue; + if (!idset_contains(precise_ids, reg->id)) + continue; + bt_set_frame_reg(bt, fr, i); + } + for (i = 0; i < func->allocated_stack / BPF_REG_SIZE; ++i) { + if (!is_spilled_scalar_reg(&func->stack[i])) + continue; + reg = &func->stack[i].spilled_ptr; + if (!reg->id) + continue; + if (!idset_contains(precise_ids, reg->id)) + continue; + bt_set_frame_slot(bt, fr, i); + } + } + + return 0; +} + /* * __mark_chain_precision() backtracks BPF program instruction sequence and * chain of verifier states making sure that register *regno* (if regno >= 0) @@ -3902,6 +4000,31 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) bt->frame, last_idx, first_idx, subseq_idx); } + /* If some register with scalar ID is marked as precise, + * make sure that all registers sharing this ID are also precise. + * This is needed to estimate effect of find_equal_scalars(). + * Do this at the last instruction of each state, + * bpf_reg_state::id fields are valid for these instructions. + * + * Allows to track precision in situation like below: + * + * r2 = unknown value + * ... + * --- state #0 --- + * ... + * r1 = r2 // r1 and r2 now share the same ID + * ... + * --- state #1 {r1.id = A, r2.id = A} --- + * ... + * if (r2 > 10) goto exit; // find_equal_scalars() assigns range to r1 + * ... + * --- state #2 {r1.id = A, r2.id = A} --- + * r3 = r10 + * r3 += r1 // need to mark both r1 and r2 + */ + if (mark_precise_scalar_ids(env, st)) + return -EFAULT; + if (last_idx < 0) { /* we are at the entry into subprog, which * is expected for global funcs, but only if @@ -5894,7 +6017,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, * program allocated objects (which always have ref_obj_id > 0), * but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC. */ - if (atype != BPF_READ && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { + if (atype != BPF_READ && !type_is_ptr_alloc_obj(reg->type)) { verbose(env, "only read is supported\n"); return -EACCES; } @@ -7514,7 +7637,7 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno, if (base_type(arg_type) == ARG_PTR_TO_MEM) type &= ~DYNPTR_TYPE_FLAG_MASK; - if (meta->func_id == BPF_FUNC_kptr_xchg && type & MEM_ALLOC) + if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type)) type &= ~MEM_ALLOC; for (i = 0; i < ARRAY_SIZE(compatible->types); i++) { @@ -9681,11 +9804,6 @@ static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta) return meta->kfunc_flags & KF_ACQUIRE; } -static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) -{ - return meta->kfunc_flags & KF_RET_NULL; -} - static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta) { return meta->kfunc_flags & KF_RELEASE; @@ -10001,6 +10119,16 @@ BTF_ID(func, bpf_dynptr_slice) BTF_ID(func, bpf_dynptr_slice_rdwr) BTF_ID(func, bpf_dynptr_clone) +static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) +{ + if (meta->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] && + meta->arg_owning_ref) { + return false; + } + + return meta->kfunc_flags & KF_RET_NULL; +} + static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta) { return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock]; @@ -10478,6 +10606,8 @@ __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env, node_off, btf_name_by_offset(reg->btf, t->name_off)); return -EINVAL; } + meta->arg_btf = reg->btf; + meta->arg_btf_id = reg->btf_id; if (node_off != field->graph_root.node_offset) { verbose(env, "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n", @@ -10881,10 +11011,12 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ meta->subprogno = reg->subprogno; break; case KF_ARG_PTR_TO_REFCOUNTED_KPTR: - if (!type_is_ptr_alloc_obj(reg->type) && !type_is_non_owning_ref(reg->type)) { + if (!type_is_ptr_alloc_obj(reg->type)) { verbose(env, "arg#%d is neither owning or non-owning ref\n", i); return -EINVAL; } + if (!type_is_non_owning_ref(reg->type)) + meta->arg_owning_ref = true; rec = reg_btf_record(reg); if (!rec) { @@ -11047,6 +11179,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { release_ref_obj_id = regs[BPF_REG_2].ref_obj_id; insn_aux->insert_off = regs[BPF_REG_2].off; + insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id); err = ref_convert_owning_non_owning(env, release_ref_obj_id); if (err) { verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n", @@ -12804,12 +12937,14 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) if (BPF_SRC(insn->code) == BPF_X) { struct bpf_reg_state *src_reg = regs + insn->src_reg; struct bpf_reg_state *dst_reg = regs + insn->dst_reg; + bool need_id = src_reg->type == SCALAR_VALUE && !src_reg->id && + !tnum_is_const(src_reg->var_off); if (BPF_CLASS(insn->code) == BPF_ALU64) { /* case: R1 = R2 * copy register state to dest reg */ - if (src_reg->type == SCALAR_VALUE && !src_reg->id) + if (need_id) /* Assign src and dst registers the same ID * that will be used by find_equal_scalars() * to propagate min/max range. @@ -12828,7 +12963,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) } else if (src_reg->type == SCALAR_VALUE) { bool is_src_reg_u32 = src_reg->umax_value <= U32_MAX; - if (is_src_reg_u32 && !src_reg->id) + if (is_src_reg_u32 && need_id) src_reg->id = ++env->id_gen; copy_register_state(dst_reg, src_reg); /* Make sure ID is cleared if src_reg is not in u32 range otherwise @@ -13160,7 +13295,7 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode, bool is_jmp32) { if (__is_pointer_value(false, reg)) { - if (!reg_type_not_null(reg->type)) + if (!reg_not_null(reg)) return -1; /* If pointer is valid tests against zero will fail so we can @@ -14984,8 +15119,9 @@ static bool range_within(struct bpf_reg_state *old, * So we look through our idmap to see if this old id has been seen before. If * so, we require the new id to match; otherwise, we add the id pair to the map. */ -static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap) +static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap) { + struct bpf_id_pair *map = idmap->map; unsigned int i; /* either both IDs should be set or both should be zero */ @@ -14996,20 +15132,34 @@ static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap) return true; for (i = 0; i < BPF_ID_MAP_SIZE; i++) { - if (!idmap[i].old) { + if (!map[i].old) { /* Reached an empty slot; haven't seen this id before */ - idmap[i].old = old_id; - idmap[i].cur = cur_id; + map[i].old = old_id; + map[i].cur = cur_id; return true; } - if (idmap[i].old == old_id) - return idmap[i].cur == cur_id; + if (map[i].old == old_id) + return map[i].cur == cur_id; + if (map[i].cur == cur_id) + return false; } /* We ran out of idmap slots, which should be impossible */ WARN_ON_ONCE(1); return false; } +/* Similar to check_ids(), but allocate a unique temporary ID + * for 'old_id' or 'cur_id' of zero. + * This makes pairs like '0 vs unique ID', 'unique ID vs 0' valid. + */ +static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap) +{ + old_id = old_id ? old_id : ++idmap->tmp_id_gen; + cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen; + + return check_ids(old_id, cur_id, idmap); +} + static void clean_func_state(struct bpf_verifier_env *env, struct bpf_func_state *st) { @@ -15108,7 +15258,7 @@ next: static bool regs_exact(const struct bpf_reg_state *rold, const struct bpf_reg_state *rcur, - struct bpf_id_pair *idmap) + struct bpf_idmap *idmap) { return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && check_ids(rold->id, rcur->id, idmap) && @@ -15117,7 +15267,7 @@ static bool regs_exact(const struct bpf_reg_state *rold, /* Returns true if (rold safe implies rcur safe) */ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, - struct bpf_reg_state *rcur, struct bpf_id_pair *idmap) + struct bpf_reg_state *rcur, struct bpf_idmap *idmap) { if (!(rold->live & REG_LIVE_READ)) /* explored state didn't use this */ @@ -15154,15 +15304,42 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, switch (base_type(rold->type)) { case SCALAR_VALUE: - if (regs_exact(rold, rcur, idmap)) - return true; - if (env->explore_alu_limits) - return false; + if (env->explore_alu_limits) { + /* explore_alu_limits disables tnum_in() and range_within() + * logic and requires everything to be strict + */ + return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && + check_scalar_ids(rold->id, rcur->id, idmap); + } if (!rold->precise) return true; - /* new val must satisfy old val knowledge */ + /* Why check_ids() for scalar registers? + * + * Consider the following BPF code: + * 1: r6 = ... unbound scalar, ID=a ... + * 2: r7 = ... unbound scalar, ID=b ... + * 3: if (r6 > r7) goto +1 + * 4: r6 = r7 + * 5: if (r6 > X) goto ... + * 6: ... memory operation using r7 ... + * + * First verification path is [1-6]: + * - at (4) same bpf_reg_state::id (b) would be assigned to r6 and r7; + * - at (5) r6 would be marked <= X, find_equal_scalars() would also mark + * r7 <= X, because r6 and r7 share same id. + * Next verification path is [1-4, 6]. + * + * Instruction (6) would be reached in two states: + * I. r6{.id=b}, r7{.id=b} via path 1-6; + * II. r6{.id=a}, r7{.id=b} via path 1-4, 6. + * + * Use check_ids() to distinguish these states. + * --- + * Also verify that new value satisfies old value range knowledge. + */ return range_within(rold, rcur) && - tnum_in(rold->var_off, rcur->var_off); + tnum_in(rold->var_off, rcur->var_off) && + check_scalar_ids(rold->id, rcur->id, idmap); case PTR_TO_MAP_KEY: case PTR_TO_MAP_VALUE: case PTR_TO_MEM: @@ -15208,7 +15385,7 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, } static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, - struct bpf_func_state *cur, struct bpf_id_pair *idmap) + struct bpf_func_state *cur, struct bpf_idmap *idmap) { int i, spi; @@ -15311,7 +15488,7 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, } static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur, - struct bpf_id_pair *idmap) + struct bpf_idmap *idmap) { int i; @@ -15359,13 +15536,13 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat for (i = 0; i < MAX_BPF_REG; i++) if (!regsafe(env, &old->regs[i], &cur->regs[i], - env->idmap_scratch)) + &env->idmap_scratch)) return false; - if (!stacksafe(env, old, cur, env->idmap_scratch)) + if (!stacksafe(env, old, cur, &env->idmap_scratch)) return false; - if (!refsafe(old, cur, env->idmap_scratch)) + if (!refsafe(old, cur, &env->idmap_scratch)) return false; return true; @@ -15380,7 +15557,8 @@ static bool states_equal(struct bpf_verifier_env *env, if (old->curframe != cur->curframe) return false; - memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch)); + env->idmap_scratch.tmp_id_gen = env->id_gen; + memset(&env->idmap_scratch.map, 0, sizeof(env->idmap_scratch.map)); /* Verification state from speculative execution simulation * must never prune a non-speculative execution one. @@ -15398,7 +15576,7 @@ static bool states_equal(struct bpf_verifier_env *env, return false; if (old->active_lock.id && - !check_ids(old->active_lock.id, cur->active_lock.id, env->idmap_scratch)) + !check_ids(old->active_lock.id, cur->active_lock.id, &env->idmap_scratch)) return false; if (old->active_rcu_lock != cur->active_rcu_lock) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index ade9ac672adb..fa0833410ac1 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -15056,8 +15056,7 @@ static __init int prepare_tail_call_tests(struct bpf_array **pprogs) int which, err; /* Allocate the table of programs to be used for tall calls */ - progs = kzalloc(sizeof(*progs) + (ntests + 1) * sizeof(progs->ptrs[0]), - GFP_KERNEL); + progs = kzalloc(struct_size(progs, ptrs, ntests + 1), GFP_KERNEL); if (!progs) goto out_nomem; diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c index 422ec6e7ccff..97e129e3f31c 100644 --- a/net/bpfilter/bpfilter_kern.c +++ b/net/bpfilter/bpfilter_kern.c @@ -21,7 +21,7 @@ static void shutdown_umh(void) if (tgid) { kill_pid(tgid, SIGKILL, 1); wait_event(tgid->wait_pidfd, thread_group_exited(tgid)); - bpfilter_umh_cleanup(info); + umd_cleanup_helper(info); } } diff --git a/net/core/filter.c b/net/core/filter.c index 968139f4a1ac..06ba0e56e369 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3948,20 +3948,21 @@ void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len) { - struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); u32 size = xdp->data_end - xdp->data; + struct skb_shared_info *sinfo; void *addr = xdp->data; int i; if (unlikely(offset > 0xffff || len > 0xffff)) return ERR_PTR(-EFAULT); - if (offset + len > xdp_get_buff_len(xdp)) + if (unlikely(offset + len > xdp_get_buff_len(xdp))) return ERR_PTR(-EINVAL); - if (offset < size) /* linear area */ + if (likely(offset < size)) /* linear area */ goto out; + sinfo = xdp_get_shared_info_from_buff(xdp); offset -= size; for (i = 0; i < sinfo->nr_frags; i++) { /* paged area */ u32 frag_size = skb_frag_size(&sinfo->frags[i]); @@ -5803,6 +5804,12 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; struct fib_table *tb; + if (flags & BPF_FIB_LOOKUP_TBID) { + tbid = params->tbid; + /* zero out for vlan output */ + params->tbid = 0; + } + tb = fib_get_table(net, tbid); if (unlikely(!tb)) return BPF_FIB_LKUP_RET_NOT_FWDED; @@ -5936,6 +5943,12 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; struct fib6_table *tb; + if (flags & BPF_FIB_LOOKUP_TBID) { + tbid = params->tbid; + /* zero out for vlan output */ + params->tbid = 0; + } + tb = ipv6_stub->fib6_get_table(net, tbid); if (unlikely(!tb)) return BPF_FIB_LKUP_RET_NOT_FWDED; @@ -6008,7 +6021,7 @@ set_fwd_params: #endif #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \ - BPF_FIB_LOOKUP_SKIP_NEIGH) + BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID) BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, struct bpf_fib_lookup *, params, int, plen, u32, flags) @@ -6555,12 +6568,11 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, static struct sock * __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id, - u64 flags) + u64 flags, int sdif) { struct sock *sk = NULL; struct net *net; u8 family; - int sdif; if (len == sizeof(tuple->ipv4)) family = AF_INET; @@ -6572,10 +6584,12 @@ __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, if (unlikely(flags || !((s32)netns_id < 0 || netns_id <= S32_MAX))) goto out; - if (family == AF_INET) - sdif = inet_sdif(skb); - else - sdif = inet6_sdif(skb); + if (sdif < 0) { + if (family == AF_INET) + sdif = inet_sdif(skb); + else + sdif = inet6_sdif(skb); + } if ((s32)netns_id < 0) { net = caller_net; @@ -6595,10 +6609,11 @@ out: static struct sock * __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id, - u64 flags) + u64 flags, int sdif) { struct sock *sk = __bpf_skc_lookup(skb, tuple, len, caller_net, - ifindex, proto, netns_id, flags); + ifindex, proto, netns_id, flags, + sdif); if (sk) { struct sock *sk2 = sk_to_full_sk(sk); @@ -6638,7 +6653,7 @@ bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len, } return __bpf_skc_lookup(skb, tuple, len, caller_net, ifindex, proto, - netns_id, flags); + netns_id, flags, -1); } static struct sock * @@ -6727,6 +6742,78 @@ static const struct bpf_func_proto bpf_sk_lookup_udp_proto = { .arg5_type = ARG_ANYTHING, }; +BPF_CALL_5(bpf_tc_skc_lookup_tcp, struct sk_buff *, skb, + struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) +{ + struct net_device *dev = skb->dev; + int ifindex = dev->ifindex, sdif = dev_sdif(dev); + struct net *caller_net = dev_net(dev); + + return (unsigned long)__bpf_skc_lookup(skb, tuple, len, caller_net, + ifindex, IPPROTO_TCP, netns_id, + flags, sdif); +} + +static const struct bpf_func_proto bpf_tc_skc_lookup_tcp_proto = { + .func = bpf_tc_skc_lookup_tcp, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + +BPF_CALL_5(bpf_tc_sk_lookup_tcp, struct sk_buff *, skb, + struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) +{ + struct net_device *dev = skb->dev; + int ifindex = dev->ifindex, sdif = dev_sdif(dev); + struct net *caller_net = dev_net(dev); + + return (unsigned long)__bpf_sk_lookup(skb, tuple, len, caller_net, + ifindex, IPPROTO_TCP, netns_id, + flags, sdif); +} + +static const struct bpf_func_proto bpf_tc_sk_lookup_tcp_proto = { + .func = bpf_tc_sk_lookup_tcp, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_PTR_TO_SOCKET_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + +BPF_CALL_5(bpf_tc_sk_lookup_udp, struct sk_buff *, skb, + struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) +{ + struct net_device *dev = skb->dev; + int ifindex = dev->ifindex, sdif = dev_sdif(dev); + struct net *caller_net = dev_net(dev); + + return (unsigned long)__bpf_sk_lookup(skb, tuple, len, caller_net, + ifindex, IPPROTO_UDP, netns_id, + flags, sdif); +} + +static const struct bpf_func_proto bpf_tc_sk_lookup_udp_proto = { + .func = bpf_tc_sk_lookup_udp, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_PTR_TO_SOCKET_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + BPF_CALL_1(bpf_sk_release, struct sock *, sk) { if (sk && sk_is_refcounted(sk)) @@ -6744,12 +6831,13 @@ static const struct bpf_func_proto bpf_sk_release_proto = { BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx, struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags) { - struct net *caller_net = dev_net(ctx->rxq->dev); - int ifindex = ctx->rxq->dev->ifindex; + struct net_device *dev = ctx->rxq->dev; + int ifindex = dev->ifindex, sdif = dev_sdif(dev); + struct net *caller_net = dev_net(dev); return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex, IPPROTO_UDP, netns_id, - flags); + flags, sdif); } static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = { @@ -6767,12 +6855,13 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = { BPF_CALL_5(bpf_xdp_skc_lookup_tcp, struct xdp_buff *, ctx, struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags) { - struct net *caller_net = dev_net(ctx->rxq->dev); - int ifindex = ctx->rxq->dev->ifindex; + struct net_device *dev = ctx->rxq->dev; + int ifindex = dev->ifindex, sdif = dev_sdif(dev); + struct net *caller_net = dev_net(dev); return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, caller_net, ifindex, IPPROTO_TCP, netns_id, - flags); + flags, sdif); } static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = { @@ -6790,12 +6879,13 @@ static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = { BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx, struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags) { - struct net *caller_net = dev_net(ctx->rxq->dev); - int ifindex = ctx->rxq->dev->ifindex; + struct net_device *dev = ctx->rxq->dev; + int ifindex = dev->ifindex, sdif = dev_sdif(dev); + struct net *caller_net = dev_net(dev); return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net, ifindex, IPPROTO_TCP, netns_id, - flags); + flags, sdif); } static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = { @@ -6815,7 +6905,8 @@ BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx, { return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, sock_net(ctx->sk), 0, - IPPROTO_TCP, netns_id, flags); + IPPROTO_TCP, netns_id, flags, + -1); } static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = { @@ -6834,7 +6925,7 @@ BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx, { return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, sock_net(ctx->sk), 0, IPPROTO_TCP, - netns_id, flags); + netns_id, flags, -1); } static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = { @@ -6853,7 +6944,7 @@ BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx, { return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, sock_net(ctx->sk), 0, IPPROTO_UDP, - netns_id, flags); + netns_id, flags, -1); } static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = { @@ -7982,9 +8073,9 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) #endif #ifdef CONFIG_INET case BPF_FUNC_sk_lookup_tcp: - return &bpf_sk_lookup_tcp_proto; + return &bpf_tc_sk_lookup_tcp_proto; case BPF_FUNC_sk_lookup_udp: - return &bpf_sk_lookup_udp_proto; + return &bpf_tc_sk_lookup_udp_proto; case BPF_FUNC_sk_release: return &bpf_sk_release_proto; case BPF_FUNC_tcp_sock: @@ -7992,7 +8083,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_get_listener_sock: return &bpf_get_listener_sock_proto; case BPF_FUNC_skc_lookup_tcp: - return &bpf_skc_lookup_tcp_proto; + return &bpf_tc_skc_lookup_tcp_proto; case BPF_FUNC_tcp_check_syncookie: return &bpf_tcp_check_syncookie_proto; case BPF_FUNC_skb_ecn_set_ce: diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 00afb66cd095..19538d628714 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -32,8 +32,6 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) { struct bpf_stab *stab; - if (!capable(CAP_NET_ADMIN)) - return ERR_PTR(-EPERM); if (attr->max_entries == 0 || attr->key_size != 4 || (attr->value_size != sizeof(u32) && @@ -1085,8 +1083,6 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) struct bpf_shtab *htab; int i, err; - if (!capable(CAP_NET_ADMIN)) - return ERR_PTR(-EPERM); if (attr->max_entries == 0 || attr->key_size == 0 || (attr->value_size != sizeof(u32) && diff --git a/net/ieee802154/header_ops.c b/net/ieee802154/header_ops.c index 35d384dfe29d..41a556be1017 100644 --- a/net/ieee802154/header_ops.c +++ b/net/ieee802154/header_ops.c @@ -120,6 +120,29 @@ ieee802154_hdr_push(struct sk_buff *skb, struct ieee802154_hdr *hdr) } EXPORT_SYMBOL_GPL(ieee802154_hdr_push); +int ieee802154_mac_cmd_push(struct sk_buff *skb, void *f, + const void *pl, unsigned int pl_len) +{ + struct ieee802154_mac_cmd_frame *frame = f; + struct ieee802154_mac_cmd_pl *mac_pl = &frame->mac_pl; + struct ieee802154_hdr *mhr = &frame->mhr; + int ret; + + skb_reserve(skb, sizeof(*mhr)); + ret = ieee802154_hdr_push(skb, mhr); + if (ret < 0) + return ret; + + skb_reset_mac_header(skb); + skb->mac_len = ret; + + skb_put_data(skb, mac_pl, sizeof(*mac_pl)); + skb_put_data(skb, pl, pl_len); + + return 0; +} +EXPORT_SYMBOL_GPL(ieee802154_mac_cmd_push); + int ieee802154_beacon_push(struct sk_buff *skb, struct ieee802154_beacon_frame *beacon) { @@ -284,6 +307,19 @@ ieee802154_hdr_pull(struct sk_buff *skb, struct ieee802154_hdr *hdr) } EXPORT_SYMBOL_GPL(ieee802154_hdr_pull); +int ieee802154_mac_cmd_pl_pull(struct sk_buff *skb, + struct ieee802154_mac_cmd_pl *mac_pl) +{ + if (!pskb_may_pull(skb, sizeof(*mac_pl))) + return -EINVAL; + + memcpy(mac_pl, skb->data, sizeof(*mac_pl)); + skb_pull(skb, sizeof(*mac_pl)); + + return 0; +} +EXPORT_SYMBOL_GPL(ieee802154_mac_cmd_pl_pull); + int ieee802154_hdr_peek_addrs(const struct sk_buff *skb, struct ieee802154_hdr *hdr) { diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 832e3c50816c..d610c1886160 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -233,7 +233,7 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = { NLA_POLICY_RANGE(NLA_U8, NL802154_SCAN_DONE_REASON_FINISHED, NL802154_SCAN_DONE_REASON_ABORTED), [NL802154_ATTR_BEACON_INTERVAL] = - NLA_POLICY_MAX(NLA_U8, IEEE802154_MAX_SCAN_DURATION), + NLA_POLICY_MAX(NLA_U8, IEEE802154_ACTIVE_SCAN_DURATION), #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL [NL802154_ATTR_SEC_ENABLED] = { .type = NLA_U8, }, @@ -1417,6 +1417,11 @@ static int nl802154_trigger_scan(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } + if (wpan_phy->flags & WPAN_PHY_FLAG_DATAGRAMS_ONLY) { + NL_SET_ERR_MSG(info->extack, "PHY only supports datagrams"); + return -EOPNOTSUPP; + } + request = kzalloc(sizeof(*request), GFP_KERNEL); if (!request) return -ENOMEM; @@ -1426,6 +1431,7 @@ static int nl802154_trigger_scan(struct sk_buff *skb, struct genl_info *info) type = nla_get_u8(info->attrs[NL802154_ATTR_SCAN_TYPE]); switch (type) { + case NL802154_SCAN_ACTIVE: case NL802154_SCAN_PASSIVE: request->type = type; break; @@ -1583,6 +1589,11 @@ nl802154_send_beacons(struct sk_buff *skb, struct genl_info *info) return -EPERM; } + if (wpan_phy->flags & WPAN_PHY_FLAG_DATAGRAMS_ONLY) { + NL_SET_ERR_MSG(info->extack, "PHY only supports datagrams"); + return -EOPNOTSUPP; + } + request = kzalloc(sizeof(*request), GFP_KERNEL); if (!request) return -ENOMEM; diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c index 1b34cb9a7708..193bcc2acccc 100644 --- a/net/ipv4/bpfilter/sockopt.c +++ b/net/ipv4/bpfilter/sockopt.c @@ -12,15 +12,6 @@ struct bpfilter_umh_ops bpfilter_ops; EXPORT_SYMBOL_GPL(bpfilter_ops); -void bpfilter_umh_cleanup(struct umd_info *info) -{ - fput(info->pipe_to_umh); - fput(info->pipe_from_umh); - put_pid(info->tgid); - info->tgid = NULL; -} -EXPORT_SYMBOL_GPL(bpfilter_umh_cleanup); - static int bpfilter_mbox_request(struct sock *sk, int optname, sockptr_t optval, unsigned int optlen, bool is_set) { @@ -38,7 +29,7 @@ static int bpfilter_mbox_request(struct sock *sk, int optname, sockptr_t optval, } if (bpfilter_ops.info.tgid && thread_group_exited(bpfilter_ops.info.tgid)) - bpfilter_umh_cleanup(&bpfilter_ops.info); + umd_cleanup_helper(&bpfilter_ops.info); if (!bpfilter_ops.info.tgid) { err = bpfilter_ops.start(); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 71b42eef9dbf..d56edc2c885f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -839,7 +839,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, tss.len -= ret; spliced += ret; - if (!timeo) + if (!tss.len || !timeo) break; release_sock(sk); lock_sock(sk); diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h index 63bab99ed368..c347ec9ff8c9 100644 --- a/net/mac802154/ieee802154_i.h +++ b/net/mac802154/ieee802154_i.h @@ -58,6 +58,7 @@ struct ieee802154_local { /* Scanning */ u8 scan_page; u8 scan_channel; + struct ieee802154_beacon_req_frame scan_beacon_req; struct cfg802154_scan_request __rcu *scan_req; struct delayed_work scan_work; @@ -70,6 +71,8 @@ struct ieee802154_local { /* Asynchronous tasks */ struct list_head rx_beacon_list; struct work_struct rx_beacon_work; + struct list_head rx_mac_cmd_list; + struct work_struct rx_mac_cmd_work; bool started; bool suspended; @@ -154,6 +157,22 @@ ieee802154_sdata_running(struct ieee802154_sub_if_data *sdata) return test_bit(SDATA_STATE_RUNNING, &sdata->state); } +static inline int ieee802154_get_mac_cmd(struct sk_buff *skb, u8 *mac_cmd) +{ + struct ieee802154_mac_cmd_pl mac_pl; + int ret; + + if (mac_cb(skb)->type != IEEE802154_FC_TYPE_MAC_CMD) + return -EINVAL; + + ret = ieee802154_mac_cmd_pl_pull(skb, &mac_pl); + if (ret) + return ret; + + *mac_cmd = mac_pl.cmd_id; + return 0; +} + extern struct ieee802154_mlme_ops mac802154_mlme_wpan; void ieee802154_rx(struct ieee802154_local *local, struct sk_buff *skb); @@ -275,6 +294,8 @@ static inline bool mac802154_is_beaconing(struct ieee802154_local *local) return test_bit(IEEE802154_IS_BEACONING, &local->ongoing); } +void mac802154_rx_mac_cmd_worker(struct work_struct *work); + /* interface handling */ int ieee802154_iface_init(void); void ieee802154_iface_exit(void); diff --git a/net/mac802154/main.c b/net/mac802154/main.c index ee23e234b998..357ece67432b 100644 --- a/net/mac802154/main.c +++ b/net/mac802154/main.c @@ -90,6 +90,7 @@ ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops) INIT_LIST_HEAD(&local->interfaces); INIT_LIST_HEAD(&local->rx_beacon_list); + INIT_LIST_HEAD(&local->rx_mac_cmd_list); mutex_init(&local->iflist_mtx); tasklet_setup(&local->tasklet, ieee802154_tasklet_handler); @@ -100,6 +101,7 @@ ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops) INIT_DELAYED_WORK(&local->scan_work, mac802154_scan_worker); INIT_WORK(&local->rx_beacon_work, mac802154_rx_beacon_worker); INIT_DELAYED_WORK(&local->beacon_work, mac802154_beacon_worker); + INIT_WORK(&local->rx_mac_cmd_work, mac802154_rx_mac_cmd_worker); /* init supported flags with 802.15.4 default ranges */ phy->supported.max_minbe = 8; diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index da0628ee3c89..e2434b4fe514 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -47,6 +47,62 @@ void mac802154_rx_beacon_worker(struct work_struct *work) kfree(mac_pkt); } +static bool mac802154_should_answer_beacon_req(struct ieee802154_local *local) +{ + struct cfg802154_beacon_request *beacon_req; + unsigned int interval; + + rcu_read_lock(); + beacon_req = rcu_dereference(local->beacon_req); + if (!beacon_req) { + rcu_read_unlock(); + return false; + } + + interval = beacon_req->interval; + rcu_read_unlock(); + + if (!mac802154_is_beaconing(local)) + return false; + + return interval == IEEE802154_ACTIVE_SCAN_DURATION; +} + +void mac802154_rx_mac_cmd_worker(struct work_struct *work) +{ + struct ieee802154_local *local = + container_of(work, struct ieee802154_local, rx_mac_cmd_work); + struct cfg802154_mac_pkt *mac_pkt; + u8 mac_cmd; + int rc; + + mac_pkt = list_first_entry_or_null(&local->rx_mac_cmd_list, + struct cfg802154_mac_pkt, node); + if (!mac_pkt) + return; + + rc = ieee802154_get_mac_cmd(mac_pkt->skb, &mac_cmd); + if (rc) + goto out; + + switch (mac_cmd) { + case IEEE802154_CMD_BEACON_REQ: + dev_dbg(&mac_pkt->sdata->dev->dev, "processing BEACON REQ\n"); + if (!mac802154_should_answer_beacon_req(local)) + break; + + queue_delayed_work(local->mac_wq, &local->beacon_work, 0); + break; + default: + break; + } + +out: + list_del(&mac_pkt->node); + kfree_skb(mac_pkt->skb); + kfree(mac_pkt); +} + static int ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata, struct sk_buff *skb, const struct ieee802154_hdr *hdr) @@ -140,8 +196,20 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata, list_add_tail(&mac_pkt->node, &sdata->local->rx_beacon_list); queue_work(sdata->local->mac_wq, &sdata->local->rx_beacon_work); return NET_RX_SUCCESS; - case IEEE802154_FC_TYPE_ACK: + case IEEE802154_FC_TYPE_MAC_CMD: + dev_dbg(&sdata->dev->dev, "MAC COMMAND received\n"); + mac_pkt = kzalloc(sizeof(*mac_pkt), GFP_ATOMIC); + if (!mac_pkt) + goto fail; + + mac_pkt->skb = skb_get(skb); + mac_pkt->sdata = sdata; + list_add_tail(&mac_pkt->node, &sdata->local->rx_mac_cmd_list); + queue_work(sdata->local->mac_wq, &sdata->local->rx_mac_cmd_work); + return NET_RX_SUCCESS; + + case IEEE802154_FC_TYPE_ACK: goto fail; case IEEE802154_FC_TYPE_DATA: diff --git a/net/mac802154/scan.c b/net/mac802154/scan.c index 5c191bedd72c..d9658f2c4ae6 100644 --- a/net/mac802154/scan.c +++ b/net/mac802154/scan.c @@ -18,8 +18,12 @@ #define IEEE802154_BEACON_MHR_SZ 13 #define IEEE802154_BEACON_PL_SZ 4 +#define IEEE802154_MAC_CMD_MHR_SZ 23 +#define IEEE802154_MAC_CMD_PL_SZ 1 #define IEEE802154_BEACON_SKB_SZ (IEEE802154_BEACON_MHR_SZ + \ IEEE802154_BEACON_PL_SZ) +#define IEEE802154_MAC_CMD_SKB_SZ (IEEE802154_MAC_CMD_MHR_SZ + \ + IEEE802154_MAC_CMD_PL_SZ) /* mac802154_scan_cleanup_locked() must be called upon scan completion or abort. * - Completions are asynchronous, not locked by the rtnl and decided by the @@ -131,6 +135,42 @@ static int mac802154_scan_find_next_chan(struct ieee802154_local *local, return 0; } +static int mac802154_scan_prepare_beacon_req(struct ieee802154_local *local) +{ + memset(&local->scan_beacon_req, 0, sizeof(local->scan_beacon_req)); + local->scan_beacon_req.mhr.fc.type = IEEE802154_FC_TYPE_MAC_CMD; + local->scan_beacon_req.mhr.fc.dest_addr_mode = IEEE802154_SHORT_ADDRESSING; + local->scan_beacon_req.mhr.fc.version = IEEE802154_2003_STD; + local->scan_beacon_req.mhr.fc.source_addr_mode = IEEE802154_NO_ADDRESSING; + local->scan_beacon_req.mhr.dest.mode = IEEE802154_ADDR_SHORT; + local->scan_beacon_req.mhr.dest.pan_id = cpu_to_le16(IEEE802154_PANID_BROADCAST); + local->scan_beacon_req.mhr.dest.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST); + local->scan_beacon_req.mac_pl.cmd_id = IEEE802154_CMD_BEACON_REQ; + + return 0; +} + +static int mac802154_transmit_beacon_req(struct ieee802154_local *local, + struct ieee802154_sub_if_data *sdata) +{ + struct sk_buff *skb; + int ret; + + skb = alloc_skb(IEEE802154_MAC_CMD_SKB_SZ, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + skb->dev = sdata->dev; + + ret = ieee802154_mac_cmd_push(skb, &local->scan_beacon_req, NULL, 0); + if (ret) { + kfree_skb(skb); + return ret; + } + + return ieee802154_mlme_tx(local, sdata, skb); +} + void mac802154_scan_worker(struct work_struct *work) { struct ieee802154_local *local = @@ -206,6 +246,13 @@ void mac802154_scan_worker(struct work_struct *work) goto end_scan; } + if (scan_req->type == NL802154_SCAN_ACTIVE) { + ret = mac802154_transmit_beacon_req(local, sdata); + if (ret) + dev_err(&sdata->dev->dev, + "Error when transmitting beacon request (%d)\n", ret); + } + ieee802154_configure_durations(wpan_phy, page, channel); scan_duration = mac802154_scan_get_channel_time(scan_req_duration, wpan_phy->symbol_duration); @@ -231,8 +278,8 @@ int mac802154_trigger_scan_locked(struct ieee802154_sub_if_data *sdata, if (mac802154_is_scanning(local)) return -EBUSY; - /* TODO: support other scanning type */ - if (request->type != NL802154_SCAN_PASSIVE) + if (request->type != NL802154_SCAN_PASSIVE && + request->type != NL802154_SCAN_ACTIVE) return -EOPNOTSUPP; /* Store scanning parameters */ @@ -247,6 +294,8 @@ int mac802154_trigger_scan_locked(struct ieee802154_sub_if_data *sdata, local->scan_page = request->page; local->scan_channel = -1; set_bit(IEEE802154_IS_SCANNING, &local->ongoing); + if (request->type == NL802154_SCAN_ACTIVE) + mac802154_scan_prepare_beacon_req(local); nl802154_scan_started(request->wpan_phy, request->wpan_dev); @@ -354,6 +403,7 @@ void mac802154_beacon_worker(struct work_struct *work) struct cfg802154_beacon_request *beacon_req; struct ieee802154_sub_if_data *sdata; struct wpan_dev *wpan_dev; + u8 interval; int ret; rcu_read_lock(); @@ -374,6 +424,7 @@ void mac802154_beacon_worker(struct work_struct *work) } wpan_dev = beacon_req->wpan_dev; + interval = beacon_req->interval; rcu_read_unlock(); @@ -383,8 +434,9 @@ void mac802154_beacon_worker(struct work_struct *work) dev_err(&sdata->dev->dev, "Beacon could not be transmitted (%d)\n", ret); - queue_delayed_work(local->mac_wq, &local->beacon_work, - local->beacon_interval); + if (interval < IEEE802154_ACTIVE_SCAN_DURATION) + queue_delayed_work(local->mac_wq, &local->beacon_work, + local->beacon_interval); } int mac802154_stop_beacons_locked(struct ieee802154_local *local, @@ -439,13 +491,17 @@ int mac802154_send_beacons_locked(struct ieee802154_sub_if_data *sdata, local->beacon.mhr.source.pan_id = request->wpan_dev->pan_id; local->beacon.mhr.source.extended_addr = request->wpan_dev->extended_addr; local->beacon.mac_pl.beacon_order = request->interval; - local->beacon.mac_pl.superframe_order = request->interval; + if (request->interval <= IEEE802154_MAX_SCAN_DURATION) + local->beacon.mac_pl.superframe_order = request->interval; local->beacon.mac_pl.final_cap_slot = 0xf; local->beacon.mac_pl.battery_life_ext = 0; - /* TODO: Fill this field depending on the coordinator capacity */ + /* TODO: Fill this field with the coordinator situation in the network */ local->beacon.mac_pl.pan_coordinator = 1; local->beacon.mac_pl.assoc_permit = 1; + if (request->interval == IEEE802154_ACTIVE_SCAN_DURATION) + return 0; + /* Start the beacon work */ local->beacon_interval = mac802154_scan_get_channel_time(request->interval, diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 73c61a010b01..f9d196439b49 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2826,7 +2826,7 @@ unlock: } while (size); mutex_unlock(&u->iolock); - if (state->msg) + if (state->msg && check_creds) scm_recv(sock, state->msg, &scm, flags); else scm_destroy(&scm); diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c index 2c1427074a3b..e1c526f97ce3 100644 --- a/net/xdp/xskmap.c +++ b/net/xdp/xskmap.c @@ -5,7 +5,6 @@ #include <linux/bpf.h> #include <linux/filter.h> -#include <linux/capability.h> #include <net/xdp_sock.h> #include <linux/slab.h> #include <linux/sched.h> @@ -68,9 +67,6 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) int numa_node; u64 size; - if (!capable(CAP_NET_ADMIN)) - return ERR_PTR(-EPERM); - if (attr->max_entries == 0 || attr->key_size != 4 || attr->value_size != 4 || attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)) diff --git a/samples/bpf/xdp1_kern.c b/samples/bpf/xdp1_kern.c index 0a5c704badd0..d91f27cbcfa9 100644 --- a/samples/bpf/xdp1_kern.c +++ b/samples/bpf/xdp1_kern.c @@ -39,7 +39,7 @@ static int parse_ipv6(void *data, u64 nh_off, void *data_end) return ip6h->nexthdr; } -#define XDPBUFSIZE 64 +#define XDPBUFSIZE 60 SEC("xdp.frags") int xdp_prog1(struct xdp_md *ctx) { diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c index 67804ecf7ce3..8bca674451ed 100644 --- a/samples/bpf/xdp2_kern.c +++ b/samples/bpf/xdp2_kern.c @@ -55,7 +55,7 @@ static int parse_ipv6(void *data, u64 nh_off, void *data_end) return ip6h->nexthdr; } -#define XDPBUFSIZE 64 +#define XDPBUFSIZE 60 SEC("xdp.frags") int xdp_prog1(struct xdp_md *ctx) { diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index ac548a7baa73..4b8079f294f6 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -67,7 +67,7 @@ $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OU LIBELF_FLAGS := $(shell $(HOSTPKG_CONFIG) libelf --cflags 2>/dev/null) LIBELF_LIBS := $(shell $(HOSTPKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf) -HOSTCFLAGS += -g \ +HOSTCFLAGS_resolve_btfids += -g \ -I$(srctree)/tools/include \ -I$(srctree)/tools/include/uapi \ -I$(LIBBPF_INCLUDE) \ @@ -76,7 +76,7 @@ HOSTCFLAGS += -g \ LIBS = $(LIBELF_LIBS) -lz -export srctree OUTPUT HOSTCFLAGS Q HOSTCC HOSTLD HOSTAR +export srctree OUTPUT HOSTCFLAGS_resolve_btfids Q HOSTCC HOSTLD HOSTAR include $(srctree)/tools/build/Makefile.include $(BINARY_IN): fixdep FORCE prepare | $(OUTPUT) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6961a7b70028..60a9d59beeab 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3178,6 +3178,10 @@ union bpf_attr { * **BPF_FIB_LOOKUP_DIRECT** * Do a direct table lookup vs full lookup using FIB * rules. + * **BPF_FIB_LOOKUP_TBID** + * Used with BPF_FIB_LOOKUP_DIRECT. + * Use the routing table ID present in *params*->tbid + * for the fib lookup. * **BPF_FIB_LOOKUP_OUTPUT** * Perform lookup from an egress perspective (default is * ingress). @@ -6832,6 +6836,7 @@ enum { BPF_FIB_LOOKUP_DIRECT = (1U << 0), BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), + BPF_FIB_LOOKUP_TBID = (1U << 3), }; enum { @@ -6892,9 +6897,19 @@ struct bpf_fib_lookup { __u32 ipv6_dst[4]; /* in6_addr; network order */ }; - /* output */ - __be16 h_vlan_proto; - __be16 h_vlan_TCI; + union { + struct { + /* output */ + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + }; + /* input: when accompanied with the + * 'BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID` flags, a + * specific routing table to use for the fib lookup. + */ + __u32 tbid; + }; + __u8 smac[6]; /* ETH_ALEN */ __u8 dmac[6]; /* ETH_ALEN */ }; diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py index 1ba572cae27b..0ff0d18666b2 100644 --- a/tools/net/ynl/lib/nlspec.py +++ b/tools/net/ynl/lib/nlspec.py @@ -154,6 +154,9 @@ class SpecAttr(SpecElement): is_multi bool, attr may repeat multiple times struct_name string, name of struct definition sub_type string, name of sub type + len integer, optional byte length of binary types + display_hint string, hint to help choose format specifier + when displaying the value """ def __init__(self, family, attr_set, yaml, value): super().__init__(family, yaml) @@ -164,6 +167,8 @@ class SpecAttr(SpecElement): self.struct_name = yaml.get('struct') self.sub_type = yaml.get('sub-type') self.byte_order = yaml.get('byte-order') + self.len = yaml.get('len') + self.display_hint = yaml.get('display-hint') class SpecAttrSet(SpecElement): @@ -229,12 +234,17 @@ class SpecStructMember(SpecElement): type string, type of the member attribute byte_order string or None for native byte order enum string, name of the enum definition + len integer, optional byte length of binary types + display_hint string, hint to help choose format specifier + when displaying the value """ def __init__(self, family, yaml): super().__init__(family, yaml) self.type = yaml['type'] self.byte_order = yaml.get('byte-order') self.enum = yaml.get('enum') + self.len = yaml.get('len') + self.display_hint = yaml.get('display-hint') class SpecStruct(SpecElement): diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index 3b343d6cbbc0..1b3a36fbb1c3 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -8,6 +8,8 @@ import socket import struct from struct import Struct import yaml +import ipaddress +import uuid from .nlspec import SpecFamily @@ -105,6 +107,20 @@ class NlAttr: else format.little return format.native + @classmethod + def formatted_string(cls, raw, display_hint): + if display_hint == 'mac': + formatted = ':'.join('%02x' % b for b in raw) + elif display_hint == 'hex': + formatted = bytes.hex(raw, ' ') + elif display_hint in [ 'ipv4', 'ipv6' ]: + formatted = format(ipaddress.ip_address(raw)) + elif display_hint == 'uuid': + formatted = str(uuid.UUID(bytes=raw)) + else: + formatted = raw + return formatted + def as_scalar(self, attr_type, byte_order=None): format = self.get_format(attr_type, byte_order) return format.unpack(self.raw)[0] @@ -124,10 +140,16 @@ class NlAttr: offset = 0 for m in members: # TODO: handle non-scalar members - format = self.get_format(m.type, m.byte_order) - decoded = format.unpack_from(self.raw, offset) - offset += format.size - value[m.name] = decoded[0] + if m.type == 'binary': + decoded = self.raw[offset:offset+m['len']] + offset += m['len'] + elif m.type in NlAttr.type_formats: + format = self.get_format(m.type, m.byte_order) + [ decoded ] = format.unpack_from(self.raw, offset) + offset += format.size + if m.display_hint: + decoded = self.formatted_string(decoded, m.display_hint) + value[m.name] = decoded return value def __repr__(self): @@ -385,7 +407,7 @@ class YnlFamily(SpecFamily): elif attr["type"] == 'string': attr_payload = str(value).encode('ascii') + b'\x00' elif attr["type"] == 'binary': - attr_payload = value + attr_payload = bytes.fromhex(value) elif attr['type'] in NlAttr.type_formats: format = NlAttr.get_format(attr['type'], attr.byte_order) attr_payload = format.pack(int(value)) @@ -421,6 +443,8 @@ class YnlFamily(SpecFamily): decoded = attr.as_c_array(attr_spec.sub_type) else: decoded = attr.as_bin() + if attr_spec.display_hint: + decoded = NlAttr.formatted_string(decoded, attr_spec.display_hint) return decoded def _decode(self, attrs, space): diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index d9c080ac1796..41fe5a82b88b 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -17,7 +17,7 @@ struct env env = { .duration_sec = 5, .affinity = false, .quiet = false, - .consumer_cnt = 1, + .consumer_cnt = 0, .producer_cnt = 1, }; @@ -441,12 +441,14 @@ static void setup_timer() static void set_thread_affinity(pthread_t thread, int cpu) { cpu_set_t cpuset; + int err; CPU_ZERO(&cpuset); CPU_SET(cpu, &cpuset); - if (pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset)) { + err = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset); + if (err) { fprintf(stderr, "setting affinity to CPU #%d failed: %d\n", - cpu, errno); + cpu, -err); exit(1); } } @@ -467,7 +469,7 @@ static int next_cpu(struct cpu_set *cpu_set) exit(1); } - return cpu_set->next_cpu++; + return cpu_set->next_cpu++ % env.nr_cpus; } static struct bench_state { @@ -605,7 +607,7 @@ static void setup_benchmark(void) bench->consumer_thread, (void *)(long)i); if (err) { fprintf(stderr, "failed to create consumer thread #%d: %d\n", - i, -errno); + i, -err); exit(1); } if (env.affinity) @@ -624,7 +626,7 @@ static void setup_benchmark(void) bench->producer_thread, (void *)(long)i); if (err) { fprintf(stderr, "failed to create producer thread #%d: %d\n", - i, -errno); + i, -err); exit(1); } if (env.affinity) @@ -657,6 +659,7 @@ static void collect_measurements(long delta_ns) { int main(int argc, char **argv) { + env.nr_cpus = get_nprocs(); parse_cmdline_args_init(argc, argv); if (env.list) { diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h index 402729c6a3ac..7ff32be3d730 100644 --- a/tools/testing/selftests/bpf/bench.h +++ b/tools/testing/selftests/bpf/bench.h @@ -27,6 +27,7 @@ struct env { bool quiet; int consumer_cnt; int producer_cnt; + int nr_cpus; struct cpu_set prod_cpus; struct cpu_set cons_cpus; }; diff --git a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c index 7c8ccc108313..e289dd1a14ee 100644 --- a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c +++ b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c @@ -107,9 +107,9 @@ const struct argp bench_bloom_map_argp = { static void validate(void) { - if (env.consumer_cnt != 1) { + if (env.consumer_cnt != 0) { fprintf(stderr, - "The bloom filter benchmarks do not support multi-consumer use\n"); + "The bloom filter benchmarks do not support consumer\n"); exit(1); } } @@ -421,18 +421,12 @@ static void measure(struct bench_res *res) last_false_hits = total_false_hits; } -static void *consumer(void *input) -{ - return NULL; -} - const struct bench bench_bloom_lookup = { .name = "bloom-lookup", .argp = &bench_bloom_map_argp, .validate = validate, .setup = bloom_lookup_setup, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -444,7 +438,6 @@ const struct bench bench_bloom_update = { .validate = validate, .setup = bloom_update_setup, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -456,7 +449,6 @@ const struct bench bench_bloom_false_positive = { .validate = validate, .setup = false_positive_setup, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = false_hits_report_progress, .report_final = false_hits_report_final, @@ -468,7 +460,6 @@ const struct bench bench_hashmap_without_bloom = { .validate = validate, .setup = hashmap_no_bloom_setup, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -480,7 +471,6 @@ const struct bench bench_hashmap_with_bloom = { .validate = validate, .setup = hashmap_with_bloom_setup, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c index 75abe8137b6c..ee1dc12c5e5e 100644 --- a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c @@ -14,8 +14,8 @@ static struct ctx { static void validate(void) { - if (env.consumer_cnt != 1) { - fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); exit(1); } } @@ -30,11 +30,6 @@ static void *producer(void *input) return NULL; } -static void *consumer(void *input) -{ - return NULL; -} - static void measure(struct bench_res *res) { } @@ -88,7 +83,6 @@ const struct bench bench_bpf_hashmap_full_update = { .validate = validate, .setup = setup, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = NULL, .report_final = hashmap_report_final, diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c index 8dbb02f75cff..279ff1b8b5b2 100644 --- a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c @@ -113,8 +113,8 @@ const struct argp bench_hashmap_lookup_argp = { static void validate(void) { - if (env.consumer_cnt != 1) { - fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); exit(1); } @@ -134,11 +134,6 @@ static void *producer(void *input) return NULL; } -static void *consumer(void *input) -{ - return NULL; -} - static void measure(struct bench_res *res) { } @@ -276,7 +271,6 @@ const struct bench bench_bpf_hashmap_lookup = { .validate = validate, .setup = setup, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = NULL, .report_final = hashmap_report_final, diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c index d8a0394e10b1..a705cfb2bccc 100644 --- a/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c @@ -47,8 +47,8 @@ const struct argp bench_bpf_loop_argp = { static void validate(void) { - if (env.consumer_cnt != 1) { - fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); exit(1); } } @@ -62,11 +62,6 @@ static void *producer(void *input) return NULL; } -static void *consumer(void *input) -{ - return NULL; -} - static void measure(struct bench_res *res) { res->hits = atomic_swap(&ctx.skel->bss->hits, 0); @@ -99,7 +94,6 @@ const struct bench bench_bpf_loop = { .validate = validate, .setup = setup, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = ops_report_progress, .report_final = ops_report_final, diff --git a/tools/testing/selftests/bpf/benchs/bench_count.c b/tools/testing/selftests/bpf/benchs/bench_count.c index 078972ce208e..ba89ed3936b7 100644 --- a/tools/testing/selftests/bpf/benchs/bench_count.c +++ b/tools/testing/selftests/bpf/benchs/bench_count.c @@ -18,11 +18,6 @@ static void *count_global_producer(void *input) return NULL; } -static void *count_global_consumer(void *input) -{ - return NULL; -} - static void count_global_measure(struct bench_res *res) { struct count_global_ctx *ctx = &count_global_ctx; @@ -40,7 +35,7 @@ static void count_local_setup(void) { struct count_local_ctx *ctx = &count_local_ctx; - ctx->hits = calloc(env.consumer_cnt, sizeof(*ctx->hits)); + ctx->hits = calloc(env.producer_cnt, sizeof(*ctx->hits)); if (!ctx->hits) exit(1); } @@ -56,11 +51,6 @@ static void *count_local_producer(void *input) return NULL; } -static void *count_local_consumer(void *input) -{ - return NULL; -} - static void count_local_measure(struct bench_res *res) { struct count_local_ctx *ctx = &count_local_ctx; @@ -74,7 +64,6 @@ static void count_local_measure(struct bench_res *res) const struct bench bench_count_global = { .name = "count-global", .producer_thread = count_global_producer, - .consumer_thread = count_global_consumer, .measure = count_global_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -84,7 +73,6 @@ const struct bench bench_count_local = { .name = "count-local", .setup = count_local_setup, .producer_thread = count_local_producer, - .consumer_thread = count_local_consumer, .measure = count_local_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage.c b/tools/testing/selftests/bpf/benchs/bench_local_storage.c index d4b2817306d4..452499428ceb 100644 --- a/tools/testing/selftests/bpf/benchs/bench_local_storage.c +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage.c @@ -74,8 +74,8 @@ static void validate(void) fprintf(stderr, "benchmark doesn't support multi-producer!\n"); exit(1); } - if (env.consumer_cnt != 1) { - fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); exit(1); } @@ -230,11 +230,6 @@ static inline void trigger_bpf_program(void) syscall(__NR_getpgid); } -static void *consumer(void *input) -{ - return NULL; -} - static void *producer(void *input) { while (true) @@ -259,7 +254,6 @@ const struct bench bench_local_storage_cache_seq_get = { .validate = validate, .setup = local_storage_cache_get_setup, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = local_storage_report_progress, .report_final = local_storage_report_final, @@ -271,7 +265,6 @@ const struct bench bench_local_storage_cache_interleaved_get = { .validate = validate, .setup = local_storage_cache_get_interleaved_setup, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = local_storage_report_progress, .report_final = local_storage_report_final, @@ -283,7 +276,6 @@ const struct bench bench_local_storage_cache_hashmap_control = { .validate = validate, .setup = hashmap_setup, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = local_storage_report_progress, .report_final = local_storage_report_final, diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c index cff703f90e95..b36de42ee4d9 100644 --- a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c @@ -71,7 +71,7 @@ const struct argp bench_local_storage_create_argp = { static void validate(void) { - if (env.consumer_cnt > 1) { + if (env.consumer_cnt != 0) { fprintf(stderr, "local-storage-create benchmark does not need consumer\n"); exit(1); @@ -143,11 +143,6 @@ static void measure(struct bench_res *res) res->drops = atomic_swap(&skel->bss->kmalloc_cnts, 0); } -static void *consumer(void *input) -{ - return NULL; -} - static void *sk_producer(void *input) { struct thread *t = &threads[(long)(input)]; @@ -257,7 +252,6 @@ const struct bench bench_local_storage_create = { .validate = validate, .setup = setup, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = report_progress, .report_final = report_final, diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c index d5eb5587f2aa..edf0b00418c1 100644 --- a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c @@ -72,8 +72,8 @@ static void validate(void) fprintf(stderr, "benchmark doesn't support multi-producer!\n"); exit(1); } - if (env.consumer_cnt != 1) { - fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); exit(1); } @@ -197,11 +197,6 @@ static void measure(struct bench_res *res) ctx.prev_kthread_stime = ticks; } -static void *consumer(void *input) -{ - return NULL; -} - static void *producer(void *input) { while (true) @@ -262,7 +257,6 @@ const struct bench bench_local_storage_tasks_trace = { .validate = validate, .setup = local_storage_tasks_trace_setup, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = report_progress, .report_final = report_final, diff --git a/tools/testing/selftests/bpf/benchs/bench_rename.c b/tools/testing/selftests/bpf/benchs/bench_rename.c index 3c203b6d6a6e..bf66893c7a33 100644 --- a/tools/testing/selftests/bpf/benchs/bench_rename.c +++ b/tools/testing/selftests/bpf/benchs/bench_rename.c @@ -17,8 +17,8 @@ static void validate(void) fprintf(stderr, "benchmark doesn't support multi-producer!\n"); exit(1); } - if (env.consumer_cnt != 1) { - fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); exit(1); } } @@ -106,17 +106,11 @@ static void setup_fexit(void) attach_bpf(ctx.skel->progs.prog5); } -static void *consumer(void *input) -{ - return NULL; -} - const struct bench bench_rename_base = { .name = "rename-base", .validate = validate, .setup = setup_base, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -127,7 +121,6 @@ const struct bench bench_rename_kprobe = { .validate = validate, .setup = setup_kprobe, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -138,7 +131,6 @@ const struct bench bench_rename_kretprobe = { .validate = validate, .setup = setup_kretprobe, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -149,7 +141,6 @@ const struct bench bench_rename_rawtp = { .validate = validate, .setup = setup_rawtp, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -160,7 +151,6 @@ const struct bench bench_rename_fentry = { .validate = validate, .setup = setup_fentry, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -171,7 +161,6 @@ const struct bench bench_rename_fexit = { .validate = validate, .setup = setup_fexit, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c index fc91fdac4faa..3ca14ad36607 100644 --- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c +++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c @@ -96,7 +96,7 @@ static inline void bufs_trigger_batch(void) static void bufs_validate(void) { if (env.consumer_cnt != 1) { - fprintf(stderr, "rb-libbpf benchmark doesn't support multi-consumer!\n"); + fprintf(stderr, "rb-libbpf benchmark needs one consumer!\n"); exit(1); } diff --git a/tools/testing/selftests/bpf/benchs/bench_strncmp.c b/tools/testing/selftests/bpf/benchs/bench_strncmp.c index d3fad2ba6916..a5e1428fd7a0 100644 --- a/tools/testing/selftests/bpf/benchs/bench_strncmp.c +++ b/tools/testing/selftests/bpf/benchs/bench_strncmp.c @@ -50,8 +50,8 @@ const struct argp bench_strncmp_argp = { static void strncmp_validate(void) { - if (env.consumer_cnt != 1) { - fprintf(stderr, "strncmp benchmark doesn't support multi-consumer!\n"); + if (env.consumer_cnt != 0) { + fprintf(stderr, "strncmp benchmark doesn't support consumer!\n"); exit(1); } } @@ -128,11 +128,6 @@ static void *strncmp_producer(void *ctx) return NULL; } -static void *strncmp_consumer(void *ctx) -{ - return NULL; -} - static void strncmp_measure(struct bench_res *res) { res->hits = atomic_swap(&ctx.skel->bss->hits, 0); @@ -144,7 +139,6 @@ const struct bench bench_strncmp_no_helper = { .validate = strncmp_validate, .setup = strncmp_no_helper_setup, .producer_thread = strncmp_producer, - .consumer_thread = strncmp_consumer, .measure = strncmp_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -156,7 +150,6 @@ const struct bench bench_strncmp_helper = { .validate = strncmp_validate, .setup = strncmp_helper_setup, .producer_thread = strncmp_producer, - .consumer_thread = strncmp_consumer, .measure = strncmp_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index 0c481de2833d..dbd362771d6a 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -13,8 +13,8 @@ static struct counter base_hits; static void trigger_validate(void) { - if (env.consumer_cnt != 1) { - fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); exit(1); } } @@ -103,11 +103,6 @@ static void trigger_fmodret_setup(void) attach_bpf(ctx.skel->progs.bench_trigger_fmodret); } -static void *trigger_consumer(void *input) -{ - return NULL; -} - /* make sure call is not inlined and not avoided by compiler, so __weak and * inline asm volatile in the body of the function * @@ -205,7 +200,6 @@ const struct bench bench_trig_base = { .name = "trig-base", .validate = trigger_validate, .producer_thread = trigger_base_producer, - .consumer_thread = trigger_consumer, .measure = trigger_base_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -216,7 +210,6 @@ const struct bench bench_trig_tp = { .validate = trigger_validate, .setup = trigger_tp_setup, .producer_thread = trigger_producer, - .consumer_thread = trigger_consumer, .measure = trigger_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -227,7 +220,6 @@ const struct bench bench_trig_rawtp = { .validate = trigger_validate, .setup = trigger_rawtp_setup, .producer_thread = trigger_producer, - .consumer_thread = trigger_consumer, .measure = trigger_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -238,7 +230,6 @@ const struct bench bench_trig_kprobe = { .validate = trigger_validate, .setup = trigger_kprobe_setup, .producer_thread = trigger_producer, - .consumer_thread = trigger_consumer, .measure = trigger_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -249,7 +240,6 @@ const struct bench bench_trig_fentry = { .validate = trigger_validate, .setup = trigger_fentry_setup, .producer_thread = trigger_producer, - .consumer_thread = trigger_consumer, .measure = trigger_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -260,7 +250,6 @@ const struct bench bench_trig_fentry_sleep = { .validate = trigger_validate, .setup = trigger_fentry_sleep_setup, .producer_thread = trigger_producer, - .consumer_thread = trigger_consumer, .measure = trigger_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -271,7 +260,6 @@ const struct bench bench_trig_fmodret = { .validate = trigger_validate, .setup = trigger_fmodret_setup, .producer_thread = trigger_producer, - .consumer_thread = trigger_consumer, .measure = trigger_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -281,7 +269,6 @@ const struct bench bench_trig_uprobe_base = { .name = "trig-uprobe-base", .setup = NULL, /* no uprobe/uretprobe is attached */ .producer_thread = uprobe_base_producer, - .consumer_thread = trigger_consumer, .measure = trigger_base_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -291,7 +278,6 @@ const struct bench bench_trig_uprobe_with_nop = { .name = "trig-uprobe-with-nop", .setup = uprobe_setup_with_nop, .producer_thread = uprobe_producer_with_nop, - .consumer_thread = trigger_consumer, .measure = trigger_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -301,7 +287,6 @@ const struct bench bench_trig_uretprobe_with_nop = { .name = "trig-uretprobe-with-nop", .setup = uretprobe_setup_with_nop, .producer_thread = uprobe_producer_with_nop, - .consumer_thread = trigger_consumer, .measure = trigger_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -311,7 +296,6 @@ const struct bench bench_trig_uprobe_without_nop = { .name = "trig-uprobe-without-nop", .setup = uprobe_setup_without_nop, .producer_thread = uprobe_producer_without_nop, - .consumer_thread = trigger_consumer, .measure = trigger_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -321,7 +305,6 @@ const struct bench bench_trig_uretprobe_without_nop = { .name = "trig-uretprobe-without-nop", .setup = uretprobe_setup_without_nop, .producer_thread = uprobe_producer_without_nop, - .consumer_thread = trigger_consumer, .measure = trigger_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh index ada028aa9007..91e3567962ff 100755 --- a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh +++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh @@ -4,46 +4,48 @@ source ./benchs/run_common.sh set -eufo pipefail +RUN_RB_BENCH="$RUN_BENCH -c1" + header "Single-producer, parallel producer" for b in rb-libbpf rb-custom pb-libbpf pb-custom; do - summarize $b "$($RUN_BENCH $b)" + summarize $b "$($RUN_RB_BENCH $b)" done header "Single-producer, parallel producer, sampled notification" for b in rb-libbpf rb-custom pb-libbpf pb-custom; do - summarize $b "$($RUN_BENCH --rb-sampled $b)" + summarize $b "$($RUN_RB_BENCH --rb-sampled $b)" done header "Single-producer, back-to-back mode" for b in rb-libbpf rb-custom pb-libbpf pb-custom; do - summarize $b "$($RUN_BENCH --rb-b2b $b)" - summarize $b-sampled "$($RUN_BENCH --rb-sampled --rb-b2b $b)" + summarize $b "$($RUN_RB_BENCH --rb-b2b $b)" + summarize $b-sampled "$($RUN_RB_BENCH --rb-sampled --rb-b2b $b)" done header "Ringbuf back-to-back, effect of sample rate" for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do - summarize "rb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b rb-custom)" + summarize "rb-sampled-$b" "$($RUN_RB_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b rb-custom)" done header "Perfbuf back-to-back, effect of sample rate" for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do - summarize "pb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b pb-custom)" + summarize "pb-sampled-$b" "$($RUN_RB_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b pb-custom)" done header "Ringbuf back-to-back, reserve+commit vs output" -summarize "reserve" "$($RUN_BENCH --rb-b2b rb-custom)" -summarize "output" "$($RUN_BENCH --rb-b2b --rb-use-output rb-custom)" +summarize "reserve" "$($RUN_RB_BENCH --rb-b2b rb-custom)" +summarize "output" "$($RUN_RB_BENCH --rb-b2b --rb-use-output rb-custom)" header "Ringbuf sampled, reserve+commit vs output" -summarize "reserve-sampled" "$($RUN_BENCH --rb-sampled rb-custom)" -summarize "output-sampled" "$($RUN_BENCH --rb-sampled --rb-use-output rb-custom)" +summarize "reserve-sampled" "$($RUN_RB_BENCH --rb-sampled rb-custom)" +summarize "output-sampled" "$($RUN_RB_BENCH --rb-sampled --rb-use-output rb-custom)" header "Single-producer, consumer/producer competing on the same CPU, low batch count" for b in rb-libbpf rb-custom pb-libbpf pb-custom; do - summarize $b "$($RUN_BENCH --rb-batch-cnt 1 --rb-sample-rate 1 --prod-affinity 0 --cons-affinity 0 $b)" + summarize $b "$($RUN_RB_BENCH --rb-batch-cnt 1 --rb-sample-rate 1 --prod-affinity 0 --cons-affinity 0 $b)" done header "Ringbuf, multi-producer contention" for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do - summarize "rb-libbpf nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)" + summarize "rb-libbpf nr_prod $b" "$($RUN_RB_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)" done diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index cf216041876c..aaf6ef1201c7 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -191,8 +191,6 @@ noinline int bpf_testmod_fentry_test3(char a, int b, u64 c) return a + b + c; } -__diag_pop(); - int bpf_testmod_fentry_ok; noinline ssize_t @@ -273,6 +271,14 @@ bpf_testmod_test_write(struct file *file, struct kobject *kobj, EXPORT_SYMBOL(bpf_testmod_test_write); ALLOW_ERROR_INJECTION(bpf_testmod_test_write, ERRNO); +noinline int bpf_fentry_shadow_test(int a) +{ + return a + 2; +} +EXPORT_SYMBOL_GPL(bpf_fentry_shadow_test); + +__diag_pop(); + static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { .attr = { .name = "bpf_testmod", .mode = 0666, }, .read = bpf_testmod_test_read, @@ -462,12 +468,6 @@ static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = { .set = &bpf_testmod_check_kfunc_ids, }; -noinline int bpf_fentry_shadow_test(int a) -{ - return a + 2; -} -EXPORT_SYMBOL_GPL(bpf_fentry_shadow_test); - extern int bpf_fentry_test1(int a); static int bpf_testmod_init(void) diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h index 9693c626646b..f5c5b1375c24 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h @@ -97,4 +97,11 @@ void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym; void bpf_kfunc_call_test_destructive(void) __ksym; +void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p); +struct prog_test_member *bpf_kfunc_call_memb_acquire(void); +void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p); +void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p); +void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p); +void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p); +void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len); #endif /* _BPF_TESTMOD_KFUNC_H */ diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 63cd4ab70171..3b350bc31343 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -13,6 +13,9 @@ CONFIG_CGROUP_BPF=y CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_USER_API_HASH=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_BTF=y +CONFIG_DEBUG_INFO_DWARF4=y CONFIG_DYNAMIC_FTRACE=y CONFIG_FPROBE=y CONFIG_FTRACE_SYSCALLS=y @@ -60,6 +63,7 @@ CONFIG_NET_SCH_INGRESS=y CONFIG_NET_SCHED=y CONFIG_NETDEVSIM=y CONFIG_NETFILTER=y +CONFIG_NETFILTER_ADVANCED=y CONFIG_NETFILTER_SYNPROXY=y CONFIG_NETFILTER_XT_CONNMARK=y CONFIG_NETFILTER_XT_MATCH_STATE=y diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 210d643fda6c..4e0cdb593318 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -3991,6 +3991,46 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Invalid arg#1", }, { + .descr = "decl_tag test #18, decl_tag as the map key type", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_STRUCT_ENC(0, 2, 8), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_MEMBER_ENC(NAME_TBD, 1, 32), + BTF_DECL_TAG_ENC(NAME_TBD, 2, -1), /* [3] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0m1\0m2\0tag"), + .map_type = BPF_MAP_TYPE_HASH, + .map_name = "tag_type_check_btf", + .key_size = 8, + .value_size = 4, + .key_type_id = 3, + .value_type_id = 1, + .max_entries = 1, + .map_create_err = true, +}, +{ + .descr = "decl_tag test #19, decl_tag as the map value type", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_STRUCT_ENC(0, 2, 8), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_MEMBER_ENC(NAME_TBD, 1, 32), + BTF_DECL_TAG_ENC(NAME_TBD, 2, -1), /* [3] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0m1\0m2\0tag"), + .map_type = BPF_MAP_TYPE_HASH, + .map_name = "tag_type_check_btf", + .key_size = 4, + .value_size = 8, + .key_type_id = 1, + .value_type_id = 3, + .max_entries = 1, + .map_create_err = true, +}, +{ .descr = "type_tag test #1", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c index 5338d2ea0460..2a9a30650350 100644 --- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c +++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c @@ -183,7 +183,7 @@ cleanup: void serial_test_check_mtu(void) { - __u32 mtu_lo; + int mtu_lo; if (test__start_subtest("bpf_check_mtu XDP-attach")) test_check_mtu_xdp_attach(); diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c index cdf4acc18e4c..756ea8b590b6 100644 --- a/tools/testing/selftests/bpf/prog_tests/cpumask.c +++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c @@ -10,6 +10,7 @@ static const char * const cpumask_success_testcases[] = { "test_set_clear_cpu", "test_setall_clear_cpu", "test_first_firstzero_cpu", + "test_firstand_nocpu", "test_test_and_set_clear", "test_and_or_xor", "test_intersects_subset", @@ -70,5 +71,6 @@ void test_cpumask(void) verify_success(cpumask_success_testcases[i]); } + RUN_TESTS(cpumask_success); RUN_TESTS(cpumask_failure); } diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c index a1e712105811..2fd05649bad1 100644 --- a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ +#include <linux/rtnetlink.h> #include <sys/types.h> #include <net/if.h> @@ -15,14 +16,23 @@ #define IPV4_IFACE_ADDR "10.0.0.254" #define IPV4_NUD_FAILED_ADDR "10.0.0.1" #define IPV4_NUD_STALE_ADDR "10.0.0.2" +#define IPV4_TBID_ADDR "172.0.0.254" +#define IPV4_TBID_NET "172.0.0.0" +#define IPV4_TBID_DST "172.0.0.2" +#define IPV6_TBID_ADDR "fd00::FFFF" +#define IPV6_TBID_NET "fd00::" +#define IPV6_TBID_DST "fd00::2" #define DMAC "11:11:11:11:11:11" #define DMAC_INIT { 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, } +#define DMAC2 "01:01:01:01:01:01" +#define DMAC_INIT2 { 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, } struct fib_lookup_test { const char *desc; const char *daddr; int expected_ret; int lookup_flags; + __u32 tbid; __u8 dmac[6]; }; @@ -43,6 +53,22 @@ static const struct fib_lookup_test tests[] = { { .desc = "IPv4 skip neigh", .daddr = IPV4_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH, }, + { .desc = "IPv4 TBID lookup failure", + .daddr = IPV4_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_NOT_FWDED, + .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, + .tbid = RT_TABLE_MAIN, }, + { .desc = "IPv4 TBID lookup success", + .daddr = IPV4_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, .tbid = 100, + .dmac = DMAC_INIT2, }, + { .desc = "IPv6 TBID lookup failure", + .daddr = IPV6_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_NOT_FWDED, + .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, + .tbid = RT_TABLE_MAIN, }, + { .desc = "IPv6 TBID lookup success", + .daddr = IPV6_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, .tbid = 100, + .dmac = DMAC_INIT2, }, }; static int ifindex; @@ -53,6 +79,7 @@ static int setup_netns(void) SYS(fail, "ip link add veth1 type veth peer name veth2"); SYS(fail, "ip link set dev veth1 up"); + SYS(fail, "ip link set dev veth2 up"); err = write_sysctl("/proc/sys/net/ipv4/neigh/veth1/gc_stale_time", "900"); if (!ASSERT_OK(err, "write_sysctl(net.ipv4.neigh.veth1.gc_stale_time)")) @@ -70,6 +97,17 @@ static int setup_netns(void) SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR); SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC); + /* Setup for tbid lookup tests */ + SYS(fail, "ip addr add %s/24 dev veth2", IPV4_TBID_ADDR); + SYS(fail, "ip route del %s/24 dev veth2", IPV4_TBID_NET); + SYS(fail, "ip route add table 100 %s/24 dev veth2", IPV4_TBID_NET); + SYS(fail, "ip neigh add %s dev veth2 lladdr %s nud stale", IPV4_TBID_DST, DMAC2); + + SYS(fail, "ip addr add %s/64 dev veth2", IPV6_TBID_ADDR); + SYS(fail, "ip -6 route del %s/64 dev veth2", IPV6_TBID_NET); + SYS(fail, "ip -6 route add table 100 %s/64 dev veth2", IPV6_TBID_NET); + SYS(fail, "ip neigh add %s dev veth2 lladdr %s nud stale", IPV6_TBID_DST, DMAC2); + err = write_sysctl("/proc/sys/net/ipv4/conf/veth1/forwarding", "1"); if (!ASSERT_OK(err, "write_sysctl(net.ipv4.conf.veth1.forwarding)")) goto fail; @@ -83,7 +121,7 @@ fail: return -1; } -static int set_lookup_params(struct bpf_fib_lookup *params, const char *daddr) +static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_lookup_test *test) { int ret; @@ -91,8 +129,9 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const char *daddr) params->l4_protocol = IPPROTO_TCP; params->ifindex = ifindex; + params->tbid = test->tbid; - if (inet_pton(AF_INET6, daddr, params->ipv6_dst) == 1) { + if (inet_pton(AF_INET6, test->daddr, params->ipv6_dst) == 1) { params->family = AF_INET6; ret = inet_pton(AF_INET6, IPV6_IFACE_ADDR, params->ipv6_src); if (!ASSERT_EQ(ret, 1, "inet_pton(IPV6_IFACE_ADDR)")) @@ -100,7 +139,7 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const char *daddr) return 0; } - ret = inet_pton(AF_INET, daddr, ¶ms->ipv4_dst); + ret = inet_pton(AF_INET, test->daddr, ¶ms->ipv4_dst); if (!ASSERT_EQ(ret, 1, "convert IP[46] address")) return -1; params->family = AF_INET; @@ -154,13 +193,12 @@ void test_fib_lookup(void) fib_params = &skel->bss->fib_params; for (i = 0; i < ARRAY_SIZE(tests); i++) { - printf("Testing %s\n", tests[i].desc); + printf("Testing %s ", tests[i].desc); - if (set_lookup_params(fib_params, tests[i].daddr)) + if (set_lookup_params(fib_params, &tests[i])) continue; skel->bss->fib_lookup_ret = -1; - skel->bss->lookup_flags = BPF_FIB_LOOKUP_OUTPUT | - tests[i].lookup_flags; + skel->bss->lookup_flags = tests[i].lookup_flags; err = bpf_prog_test_run_opts(prog_fd, &run_opts); if (!ASSERT_OK(err, "bpf_prog_test_run_opts")) @@ -175,7 +213,14 @@ void test_fib_lookup(void) mac_str(expected, tests[i].dmac); mac_str(actual, fib_params->dmac); - printf("dmac expected %s actual %s\n", expected, actual); + printf("dmac expected %s actual %s ", expected, actual); + } + + // ensure tbid is zero'd out after fib lookup. + if (tests[i].lookup_flags & BPF_FIB_LOOKUP_DIRECT) { + if (!ASSERT_EQ(skel->bss->fib_params.tbid, 0, + "expected fib_params.tbid to be zero")) + goto fail; } } diff --git a/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c index 8383a99f610f..0adf8d9475cb 100644 --- a/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c +++ b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c @@ -171,7 +171,11 @@ static void test_unpriv_bpf_disabled_negative(struct test_unpriv_bpf_disabled *s prog_insns, prog_insn_cnt, &load_opts), -EPERM, "prog_load_fails"); - for (i = BPF_MAP_TYPE_HASH; i <= BPF_MAP_TYPE_BLOOM_FILTER; i++) + /* some map types require particular correct parameters which could be + * sanity-checked before enforcing -EPERM, so only validate that + * the simple ARRAY and HASH maps are failing with -EPERM + */ + for (i = BPF_MAP_TYPE_HASH; i <= BPF_MAP_TYPE_ARRAY; i++) ASSERT_EQ(bpf_map_create(i, NULL, sizeof(int), sizeof(int), 1, NULL), -EPERM, "map_create_fails"); diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 531621adef42..070a13833c3f 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -50,6 +50,7 @@ #include "verifier_regalloc.skel.h" #include "verifier_ringbuf.skel.h" #include "verifier_runtime_jit.skel.h" +#include "verifier_scalar_ids.skel.h" #include "verifier_search_pruning.skel.h" #include "verifier_sock.skel.h" #include "verifier_spill_fill.skel.h" @@ -150,6 +151,7 @@ void test_verifier_ref_tracking(void) { RUN(verifier_ref_tracking); } void test_verifier_regalloc(void) { RUN(verifier_regalloc); } void test_verifier_ringbuf(void) { RUN(verifier_ringbuf); } void test_verifier_runtime_jit(void) { RUN(verifier_runtime_jit); } +void test_verifier_scalar_ids(void) { RUN(verifier_scalar_ids); } void test_verifier_search_pruning(void) { RUN(verifier_search_pruning); } void test_verifier_sock(void) { RUN(verifier_sock); } void test_verifier_spill_fill(void) { RUN(verifier_spill_fill); } diff --git a/tools/testing/selftests/bpf/prog_tests/vrf_socket_lookup.c b/tools/testing/selftests/bpf/prog_tests/vrf_socket_lookup.c new file mode 100644 index 000000000000..2a5e207edad6 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/vrf_socket_lookup.c @@ -0,0 +1,312 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* + * Topology: + * --------- + * NS0 namespace | NS1 namespace + * | + * +--------------+ | +--------------+ + * | veth01 |----------| veth10 | + * | 172.16.1.100 | | | 172.16.1.200 | + * | bpf | | +--------------+ + * +--------------+ | + * server(UDP/TCP) | + * +-------------------+ | + * | vrf1 | | + * | +--------------+ | | +--------------+ + * | | veth02 |----------| veth20 | + * | | 172.16.2.100 | | | | 172.16.2.200 | + * | | bpf | | | +--------------+ + * | +--------------+ | | + * | server(UDP/TCP) | | + * +-------------------+ | + * + * Test flow + * ----------- + * The tests verifies that socket lookup via TC is VRF aware: + * 1) Creates two veth pairs between NS0 and NS1: + * a) veth01 <-> veth10 outside the VRF + * b) veth02 <-> veth20 in the VRF + * 2) Attaches to veth01 and veth02 a program that calls: + * a) bpf_skc_lookup_tcp() with TCP and tcp_skc is true + * b) bpf_sk_lookup_tcp() with TCP and tcp_skc is false + * c) bpf_sk_lookup_udp() with UDP + * The program stores the lookup result in bss->lookup_status. + * 3) Creates a socket TCP/UDP server in/outside the VRF. + * 4) The test expects lookup_status to be: + * a) 0 from device in VRF to server outside VRF + * b) 0 from device outside VRF to server in VRF + * c) 1 from device in VRF to server in VRF + * d) 1 from device outside VRF to server outside VRF + */ + +#include <net/if.h> + +#include "test_progs.h" +#include "network_helpers.h" +#include "vrf_socket_lookup.skel.h" + +#define NS0 "vrf_socket_lookup_0" +#define NS1 "vrf_socket_lookup_1" + +#define IP4_ADDR_VETH01 "172.16.1.100" +#define IP4_ADDR_VETH10 "172.16.1.200" +#define IP4_ADDR_VETH02 "172.16.2.100" +#define IP4_ADDR_VETH20 "172.16.2.200" + +#define NON_VRF_PORT 5000 +#define IN_VRF_PORT 5001 + +#define TIMEOUT_MS 3000 + +static int make_socket(int sotype, const char *ip, int port, + struct sockaddr_storage *addr) +{ + int err, fd; + + err = make_sockaddr(AF_INET, ip, port, addr, NULL); + if (!ASSERT_OK(err, "make_address")) + return -1; + + fd = socket(AF_INET, sotype, 0); + if (!ASSERT_GE(fd, 0, "socket")) + return -1; + + if (!ASSERT_OK(settimeo(fd, TIMEOUT_MS), "settimeo")) + goto fail; + + return fd; +fail: + close(fd); + return -1; +} + +static int make_server(int sotype, const char *ip, int port, const char *ifname) +{ + int err, fd = -1; + + fd = start_server(AF_INET, sotype, ip, port, TIMEOUT_MS); + if (!ASSERT_GE(fd, 0, "start_server")) + return -1; + + if (ifname) { + err = setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, + ifname, strlen(ifname) + 1); + if (!ASSERT_OK(err, "setsockopt(SO_BINDTODEVICE)")) + goto fail; + } + + return fd; +fail: + close(fd); + return -1; +} + +static int attach_progs(char *ifname, int tc_prog_fd, int xdp_prog_fd) +{ + LIBBPF_OPTS(bpf_tc_hook, hook, .attach_point = BPF_TC_INGRESS); + LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1, + .prog_fd = tc_prog_fd); + int ret, ifindex; + + ifindex = if_nametoindex(ifname); + if (!ASSERT_NEQ(ifindex, 0, "if_nametoindex")) + return -1; + hook.ifindex = ifindex; + + ret = bpf_tc_hook_create(&hook); + if (!ASSERT_OK(ret, "bpf_tc_hook_create")) + return ret; + + ret = bpf_tc_attach(&hook, &opts); + if (!ASSERT_OK(ret, "bpf_tc_attach")) { + bpf_tc_hook_destroy(&hook); + return ret; + } + ret = bpf_xdp_attach(ifindex, xdp_prog_fd, 0, NULL); + if (!ASSERT_OK(ret, "bpf_xdp_attach")) { + bpf_tc_hook_destroy(&hook); + return ret; + } + + return 0; +} + +static void cleanup(void) +{ + SYS_NOFAIL("test -f /var/run/netns/" NS0 " && ip netns delete " + NS0); + SYS_NOFAIL("test -f /var/run/netns/" NS1 " && ip netns delete " + NS1); +} + +static int setup(struct vrf_socket_lookup *skel) +{ + int tc_prog_fd, xdp_prog_fd, ret = 0; + struct nstoken *nstoken = NULL; + + SYS(fail, "ip netns add " NS0); + SYS(fail, "ip netns add " NS1); + + /* NS0 <-> NS1 [veth01 <-> veth10] */ + SYS(fail, "ip link add veth01 netns " NS0 " type veth peer name veth10" + " netns " NS1); + SYS(fail, "ip -net " NS0 " addr add " IP4_ADDR_VETH01 "/24 dev veth01"); + SYS(fail, "ip -net " NS0 " link set dev veth01 up"); + SYS(fail, "ip -net " NS1 " addr add " IP4_ADDR_VETH10 "/24 dev veth10"); + SYS(fail, "ip -net " NS1 " link set dev veth10 up"); + + /* NS0 <-> NS1 [veth02 <-> veth20] */ + SYS(fail, "ip link add veth02 netns " NS0 " type veth peer name veth20" + " netns " NS1); + SYS(fail, "ip -net " NS0 " addr add " IP4_ADDR_VETH02 "/24 dev veth02"); + SYS(fail, "ip -net " NS0 " link set dev veth02 up"); + SYS(fail, "ip -net " NS1 " addr add " IP4_ADDR_VETH20 "/24 dev veth20"); + SYS(fail, "ip -net " NS1 " link set dev veth20 up"); + + /* veth02 -> vrf1 */ + SYS(fail, "ip -net " NS0 " link add vrf1 type vrf table 11"); + SYS(fail, "ip -net " NS0 " route add vrf vrf1 unreachable default" + " metric 4278198272"); + SYS(fail, "ip -net " NS0 " link set vrf1 alias vrf"); + SYS(fail, "ip -net " NS0 " link set vrf1 up"); + SYS(fail, "ip -net " NS0 " link set veth02 master vrf1"); + + /* Attach TC and XDP progs to veth devices in NS0 */ + nstoken = open_netns(NS0); + if (!ASSERT_OK_PTR(nstoken, "setns " NS0)) + goto fail; + tc_prog_fd = bpf_program__fd(skel->progs.tc_socket_lookup); + if (!ASSERT_GE(tc_prog_fd, 0, "bpf_program__tc_fd")) + goto fail; + xdp_prog_fd = bpf_program__fd(skel->progs.xdp_socket_lookup); + if (!ASSERT_GE(xdp_prog_fd, 0, "bpf_program__xdp_fd")) + goto fail; + + if (attach_progs("veth01", tc_prog_fd, xdp_prog_fd)) + goto fail; + + if (attach_progs("veth02", tc_prog_fd, xdp_prog_fd)) + goto fail; + + goto close; +fail: + ret = -1; +close: + if (nstoken) + close_netns(nstoken); + return ret; +} + +static int test_lookup(struct vrf_socket_lookup *skel, int sotype, + const char *ip, int port, bool test_xdp, bool tcp_skc, + int lookup_status_exp) +{ + static const char msg[] = "Hello Server"; + struct sockaddr_storage addr = {}; + int fd, ret = 0; + + fd = make_socket(sotype, ip, port, &addr); + if (fd < 0) + return -1; + + skel->bss->test_xdp = test_xdp; + skel->bss->tcp_skc = tcp_skc; + skel->bss->lookup_status = -1; + + if (sotype == SOCK_STREAM) + connect(fd, (void *)&addr, sizeof(struct sockaddr_in)); + else + sendto(fd, msg, sizeof(msg), 0, (void *)&addr, + sizeof(struct sockaddr_in)); + + if (!ASSERT_EQ(skel->bss->lookup_status, lookup_status_exp, + "lookup_status")) + goto fail; + + goto close; + +fail: + ret = -1; +close: + close(fd); + return ret; +} + +static void _test_vrf_socket_lookup(struct vrf_socket_lookup *skel, int sotype, + bool test_xdp, bool tcp_skc) +{ + int in_vrf_server = -1, non_vrf_server = -1; + struct nstoken *nstoken = NULL; + + nstoken = open_netns(NS0); + if (!ASSERT_OK_PTR(nstoken, "setns " NS0)) + goto done; + + /* Open sockets in and outside VRF */ + non_vrf_server = make_server(sotype, "0.0.0.0", NON_VRF_PORT, NULL); + if (!ASSERT_GE(non_vrf_server, 0, "make_server__outside_vrf_fd")) + goto done; + + in_vrf_server = make_server(sotype, "0.0.0.0", IN_VRF_PORT, "veth02"); + if (!ASSERT_GE(in_vrf_server, 0, "make_server__in_vrf_fd")) + goto done; + + /* Perform test from NS1 */ + close_netns(nstoken); + nstoken = open_netns(NS1); + if (!ASSERT_OK_PTR(nstoken, "setns " NS1)) + goto done; + + if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH02, NON_VRF_PORT, + test_xdp, tcp_skc, 0), "in_to_out")) + goto done; + if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH02, IN_VRF_PORT, + test_xdp, tcp_skc, 1), "in_to_in")) + goto done; + if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH01, NON_VRF_PORT, + test_xdp, tcp_skc, 1), "out_to_out")) + goto done; + if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH01, IN_VRF_PORT, + test_xdp, tcp_skc, 0), "out_to_in")) + goto done; + +done: + if (non_vrf_server >= 0) + close(non_vrf_server); + if (in_vrf_server >= 0) + close(in_vrf_server); + if (nstoken) + close_netns(nstoken); +} + +void test_vrf_socket_lookup(void) +{ + struct vrf_socket_lookup *skel; + + cleanup(); + + skel = vrf_socket_lookup__open_and_load(); + if (!ASSERT_OK_PTR(skel, "vrf_socket_lookup__open_and_load")) + return; + + if (!ASSERT_OK(setup(skel), "setup")) + goto done; + + if (test__start_subtest("tc_socket_lookup_tcp")) + _test_vrf_socket_lookup(skel, SOCK_STREAM, false, false); + if (test__start_subtest("tc_socket_lookup_tcp_skc")) + _test_vrf_socket_lookup(skel, SOCK_STREAM, false, false); + if (test__start_subtest("tc_socket_lookup_udp")) + _test_vrf_socket_lookup(skel, SOCK_STREAM, false, false); + if (test__start_subtest("xdp_socket_lookup_tcp")) + _test_vrf_socket_lookup(skel, SOCK_STREAM, true, false); + if (test__start_subtest("xdp_socket_lookup_tcp_skc")) + _test_vrf_socket_lookup(skel, SOCK_STREAM, true, false); + if (test__start_subtest("xdp_socket_lookup_udp")) + _test_vrf_socket_lookup(skel, SOCK_STREAM, true, false); + +done: + vrf_socket_lookup__destroy(skel); + cleanup(); +} diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h index 0c5b785a93e4..b15c588ace15 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_common.h +++ b/tools/testing/selftests/bpf/progs/cpumask_common.h @@ -28,6 +28,8 @@ void bpf_cpumask_release(struct bpf_cpumask *cpumask) __ksym; struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) __ksym; u32 bpf_cpumask_first(const struct cpumask *cpumask) __ksym; u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym; +u32 bpf_cpumask_first_and(const struct cpumask *src1, + const struct cpumask *src2) __ksym; void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym; void bpf_cpumask_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym; bool bpf_cpumask_test_cpu(u32 cpu, const struct cpumask *cpumask) __ksym; @@ -50,8 +52,8 @@ bool bpf_cpumask_subset(const struct cpumask *src1, const struct cpumask *src2) bool bpf_cpumask_empty(const struct cpumask *cpumask) __ksym; bool bpf_cpumask_full(const struct cpumask *cpumask) __ksym; void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym; -u32 bpf_cpumask_any(const struct cpumask *src) __ksym; -u32 bpf_cpumask_any_and(const struct cpumask *src1, const struct cpumask *src2) __ksym; +u32 bpf_cpumask_any_distribute(const struct cpumask *src) __ksym; +u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, const struct cpumask *src2) __ksym; void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c index 2fcdd7f68ac7..674a63424dee 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_success.c +++ b/tools/testing/selftests/bpf/progs/cpumask_success.c @@ -5,6 +5,7 @@ #include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> +#include "bpf_misc.h" #include "cpumask_common.h" char _license[] SEC("license") = "GPL"; @@ -175,6 +176,38 @@ release_exit: } SEC("tp_btf/task_newtask") +int BPF_PROG(test_firstand_nocpu, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *mask1, *mask2; + u32 first; + + if (!is_test_task()) + return 0; + + mask1 = create_cpumask(); + if (!mask1) + return 0; + + mask2 = create_cpumask(); + if (!mask2) + goto release_exit; + + bpf_cpumask_set_cpu(0, mask1); + bpf_cpumask_set_cpu(1, mask2); + + first = bpf_cpumask_first_and(cast(mask1), cast(mask2)); + if (first <= 1) + err = 3; + +release_exit: + if (mask1) + bpf_cpumask_release(mask1); + if (mask2) + bpf_cpumask_release(mask2); + return 0; +} + +SEC("tp_btf/task_newtask") int BPF_PROG(test_test_and_set_clear, struct task_struct *task, u64 clone_flags) { struct bpf_cpumask *cpumask; @@ -311,13 +344,13 @@ int BPF_PROG(test_copy_any_anyand, struct task_struct *task, u64 clone_flags) bpf_cpumask_set_cpu(1, mask2); bpf_cpumask_or(dst1, cast(mask1), cast(mask2)); - cpu = bpf_cpumask_any(cast(mask1)); + cpu = bpf_cpumask_any_distribute(cast(mask1)); if (cpu != 0) { err = 6; goto release_exit; } - cpu = bpf_cpumask_any(cast(dst2)); + cpu = bpf_cpumask_any_distribute(cast(dst2)); if (cpu < nr_cpus) { err = 7; goto release_exit; @@ -329,13 +362,13 @@ int BPF_PROG(test_copy_any_anyand, struct task_struct *task, u64 clone_flags) goto release_exit; } - cpu = bpf_cpumask_any(cast(dst2)); + cpu = bpf_cpumask_any_distribute(cast(dst2)); if (cpu > 1) { err = 9; goto release_exit; } - cpu = bpf_cpumask_any_and(cast(mask1), cast(mask2)); + cpu = bpf_cpumask_any_and_distribute(cast(mask1), cast(mask2)); if (cpu < nr_cpus) { err = 10; goto release_exit; @@ -426,3 +459,26 @@ int BPF_PROG(test_global_mask_rcu, struct task_struct *task, u64 clone_flags) return 0; } + +SEC("tp_btf/task_newtask") +__success +int BPF_PROG(test_refcount_null_tracking, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *mask1, *mask2; + + mask1 = bpf_cpumask_create(); + mask2 = bpf_cpumask_create(); + + if (!mask1 || !mask2) + goto free_masks_return; + + bpf_cpumask_test_cpu(0, (const struct cpumask *)mask1); + bpf_cpumask_test_cpu(0, (const struct cpumask *)mask2); + +free_masks_return: + if (mask1) + bpf_cpumask_release(mask1); + if (mask2) + bpf_cpumask_release(mask2); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr.c b/tools/testing/selftests/bpf/progs/refcounted_kptr.c index 1d348a225140..a3da610b1e6b 100644 --- a/tools/testing/selftests/bpf/progs/refcounted_kptr.c +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr.c @@ -375,6 +375,8 @@ long rbtree_refcounted_node_ref_escapes(void *ctx) bpf_rbtree_add(&aroot, &n->node, less_a); m = bpf_refcount_acquire(n); bpf_spin_unlock(&alock); + if (!m) + return 2; m->key = 2; bpf_obj_drop(m); diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c index efcb308f80ad..0b09e5c915b1 100644 --- a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c @@ -29,7 +29,7 @@ static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b) } SEC("?tc") -__failure __msg("Unreleased reference id=3 alloc_insn=21") +__failure __msg("Unreleased reference id=4 alloc_insn=21") long rbtree_refcounted_node_ref_escapes(void *ctx) { struct node_acquire *n, *m; @@ -43,6 +43,8 @@ long rbtree_refcounted_node_ref_escapes(void *ctx) /* m becomes an owning ref but is never drop'd or added to a tree */ m = bpf_refcount_acquire(n); bpf_spin_unlock(&glock); + if (!m) + return 2; m->key = 2; return 0; diff --git a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c new file mode 100644 index 000000000000..13b29a7faa71 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c @@ -0,0 +1,659 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +/* Check that precision marks propagate through scalar IDs. + * Registers r{0,1,2} have the same scalar ID at the moment when r0 is + * marked to be precise, this mark is immediately propagated to r{1,2}. + */ +SEC("socket") +__success __log_level(2) +__msg("frame0: regs=r0,r1,r2 stack= before 4: (bf) r3 = r10") +__msg("frame0: regs=r0,r1,r2 stack= before 3: (bf) r2 = r0") +__msg("frame0: regs=r0,r1 stack= before 2: (bf) r1 = r0") +__msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255") +__msg("frame0: regs=r0 stack= before 0: (85) call bpf_ktime_get_ns") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void precision_same_state(void) +{ + asm volatile ( + /* r0 = random number up to 0xff */ + "call %[bpf_ktime_get_ns];" + "r0 &= 0xff;" + /* tie r0.id == r1.id == r2.id */ + "r1 = r0;" + "r2 = r0;" + /* force r0 to be precise, this immediately marks r1 and r2 as + * precise as well because of shared IDs + */ + "r3 = r10;" + "r3 += r0;" + "r0 = 0;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +/* Same as precision_same_state, but mark propagates through state / + * parent state boundary. + */ +SEC("socket") +__success __log_level(2) +__msg("frame0: last_idx 6 first_idx 5 subseq_idx -1") +__msg("frame0: regs=r0,r1,r2 stack= before 5: (bf) r3 = r10") +__msg("frame0: parent state regs=r0,r1,r2 stack=:") +__msg("frame0: regs=r0,r1,r2 stack= before 4: (05) goto pc+0") +__msg("frame0: regs=r0,r1,r2 stack= before 3: (bf) r2 = r0") +__msg("frame0: regs=r0,r1 stack= before 2: (bf) r1 = r0") +__msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255") +__msg("frame0: parent state regs=r0 stack=:") +__msg("frame0: regs=r0 stack= before 0: (85) call bpf_ktime_get_ns") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void precision_cross_state(void) +{ + asm volatile ( + /* r0 = random number up to 0xff */ + "call %[bpf_ktime_get_ns];" + "r0 &= 0xff;" + /* tie r0.id == r1.id == r2.id */ + "r1 = r0;" + "r2 = r0;" + /* force checkpoint */ + "goto +0;" + /* force r0 to be precise, this immediately marks r1 and r2 as + * precise as well because of shared IDs + */ + "r3 = r10;" + "r3 += r0;" + "r0 = 0;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +/* Same as precision_same_state, but break one of the + * links, note that r1 is absent from regs=... in __msg below. + */ +SEC("socket") +__success __log_level(2) +__msg("frame0: regs=r0,r2 stack= before 5: (bf) r3 = r10") +__msg("frame0: regs=r0,r2 stack= before 4: (b7) r1 = 0") +__msg("frame0: regs=r0,r2 stack= before 3: (bf) r2 = r0") +__msg("frame0: regs=r0 stack= before 2: (bf) r1 = r0") +__msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255") +__msg("frame0: regs=r0 stack= before 0: (85) call bpf_ktime_get_ns") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void precision_same_state_broken_link(void) +{ + asm volatile ( + /* r0 = random number up to 0xff */ + "call %[bpf_ktime_get_ns];" + "r0 &= 0xff;" + /* tie r0.id == r1.id == r2.id */ + "r1 = r0;" + "r2 = r0;" + /* break link for r1, this is the only line that differs + * compared to the previous test + */ + "r1 = 0;" + /* force r0 to be precise, this immediately marks r1 and r2 as + * precise as well because of shared IDs + */ + "r3 = r10;" + "r3 += r0;" + "r0 = 0;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +/* Same as precision_same_state_broken_link, but with state / + * parent state boundary. + */ +SEC("socket") +__success __log_level(2) +__msg("frame0: regs=r0,r2 stack= before 6: (bf) r3 = r10") +__msg("frame0: regs=r0,r2 stack= before 5: (b7) r1 = 0") +__msg("frame0: parent state regs=r0,r2 stack=:") +__msg("frame0: regs=r0,r1,r2 stack= before 4: (05) goto pc+0") +__msg("frame0: regs=r0,r1,r2 stack= before 3: (bf) r2 = r0") +__msg("frame0: regs=r0,r1 stack= before 2: (bf) r1 = r0") +__msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255") +__msg("frame0: parent state regs=r0 stack=:") +__msg("frame0: regs=r0 stack= before 0: (85) call bpf_ktime_get_ns") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void precision_cross_state_broken_link(void) +{ + asm volatile ( + /* r0 = random number up to 0xff */ + "call %[bpf_ktime_get_ns];" + "r0 &= 0xff;" + /* tie r0.id == r1.id == r2.id */ + "r1 = r0;" + "r2 = r0;" + /* force checkpoint, although link between r1 and r{0,2} is + * broken by the next statement current precision tracking + * algorithm can't react to it and propagates mark for r1 to + * the parent state. + */ + "goto +0;" + /* break link for r1, this is the only line that differs + * compared to precision_cross_state() + */ + "r1 = 0;" + /* force r0 to be precise, this immediately marks r1 and r2 as + * precise as well because of shared IDs + */ + "r3 = r10;" + "r3 += r0;" + "r0 = 0;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +/* Check that precision marks propagate through scalar IDs. + * Use the same scalar ID in multiple stack frames, check that + * precision information is propagated up the call stack. + */ +SEC("socket") +__success __log_level(2) +__msg("11: (0f) r2 += r1") +/* Current state */ +__msg("frame2: last_idx 11 first_idx 10 subseq_idx -1") +__msg("frame2: regs=r1 stack= before 10: (bf) r2 = r10") +__msg("frame2: parent state regs=r1 stack=") +/* frame1.r{6,7} are marked because mark_precise_scalar_ids() + * looks for all registers with frame2.r1.id in the current state + */ +__msg("frame1: parent state regs=r6,r7 stack=") +__msg("frame0: parent state regs=r6 stack=") +/* Parent state */ +__msg("frame2: last_idx 8 first_idx 8 subseq_idx 10") +__msg("frame2: regs=r1 stack= before 8: (85) call pc+1") +/* frame1.r1 is marked because of backtracking of call instruction */ +__msg("frame1: parent state regs=r1,r6,r7 stack=") +__msg("frame0: parent state regs=r6 stack=") +/* Parent state */ +__msg("frame1: last_idx 7 first_idx 6 subseq_idx 8") +__msg("frame1: regs=r1,r6,r7 stack= before 7: (bf) r7 = r1") +__msg("frame1: regs=r1,r6 stack= before 6: (bf) r6 = r1") +__msg("frame1: parent state regs=r1 stack=") +__msg("frame0: parent state regs=r6 stack=") +/* Parent state */ +__msg("frame1: last_idx 4 first_idx 4 subseq_idx 6") +__msg("frame1: regs=r1 stack= before 4: (85) call pc+1") +__msg("frame0: parent state regs=r1,r6 stack=") +/* Parent state */ +__msg("frame0: last_idx 3 first_idx 1 subseq_idx 4") +__msg("frame0: regs=r0,r1,r6 stack= before 3: (bf) r6 = r0") +__msg("frame0: regs=r0,r1 stack= before 2: (bf) r1 = r0") +__msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void precision_many_frames(void) +{ + asm volatile ( + /* r0 = random number up to 0xff */ + "call %[bpf_ktime_get_ns];" + "r0 &= 0xff;" + /* tie r0.id == r1.id == r6.id */ + "r1 = r0;" + "r6 = r0;" + "call precision_many_frames__foo;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +static __naked __noinline __used +void precision_many_frames__foo(void) +{ + asm volatile ( + /* conflate one of the register numbers (r6) with outer frame, + * to verify that those are tracked independently + */ + "r6 = r1;" + "r7 = r1;" + "call precision_many_frames__bar;" + "exit" + ::: __clobber_all); +} + +static __naked __noinline __used +void precision_many_frames__bar(void) +{ + asm volatile ( + /* force r1 to be precise, this immediately marks: + * - bar frame r1 + * - foo frame r{1,6,7} + * - main frame r{1,6} + */ + "r2 = r10;" + "r2 += r1;" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} + +/* Check that scalars with the same IDs are marked precise on stack as + * well as in registers. + */ +SEC("socket") +__success __log_level(2) +/* foo frame */ +__msg("frame1: regs=r1 stack=-8,-16 before 9: (bf) r2 = r10") +__msg("frame1: regs=r1 stack=-8,-16 before 8: (7b) *(u64 *)(r10 -16) = r1") +__msg("frame1: regs=r1 stack=-8 before 7: (7b) *(u64 *)(r10 -8) = r1") +__msg("frame1: regs=r1 stack= before 4: (85) call pc+2") +/* main frame */ +__msg("frame0: regs=r0,r1 stack=-8 before 3: (7b) *(u64 *)(r10 -8) = r1") +__msg("frame0: regs=r0,r1 stack= before 2: (bf) r1 = r0") +__msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void precision_stack(void) +{ + asm volatile ( + /* r0 = random number up to 0xff */ + "call %[bpf_ktime_get_ns];" + "r0 &= 0xff;" + /* tie r0.id == r1.id == fp[-8].id */ + "r1 = r0;" + "*(u64*)(r10 - 8) = r1;" + "call precision_stack__foo;" + "r0 = 0;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +static __naked __noinline __used +void precision_stack__foo(void) +{ + asm volatile ( + /* conflate one of the register numbers (r6) with outer frame, + * to verify that those are tracked independently + */ + "*(u64*)(r10 - 8) = r1;" + "*(u64*)(r10 - 16) = r1;" + /* force r1 to be precise, this immediately marks: + * - foo frame r1,fp{-8,-16} + * - main frame r1,fp{-8} + */ + "r2 = r10;" + "r2 += r1;" + "exit" + ::: __clobber_all); +} + +/* Use two separate scalar IDs to check that these are propagated + * independently. + */ +SEC("socket") +__success __log_level(2) +/* r{6,7} */ +__msg("11: (0f) r3 += r7") +__msg("frame0: regs=r6,r7 stack= before 10: (bf) r3 = r10") +/* ... skip some insns ... */ +__msg("frame0: regs=r6,r7 stack= before 3: (bf) r7 = r0") +__msg("frame0: regs=r0,r6 stack= before 2: (bf) r6 = r0") +/* r{8,9} */ +__msg("12: (0f) r3 += r9") +__msg("frame0: regs=r8,r9 stack= before 11: (0f) r3 += r7") +/* ... skip some insns ... */ +__msg("frame0: regs=r8,r9 stack= before 7: (bf) r9 = r0") +__msg("frame0: regs=r0,r8 stack= before 6: (bf) r8 = r0") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void precision_two_ids(void) +{ + asm volatile ( + /* r6 = random number up to 0xff + * r6.id == r7.id + */ + "call %[bpf_ktime_get_ns];" + "r0 &= 0xff;" + "r6 = r0;" + "r7 = r0;" + /* same, but for r{8,9} */ + "call %[bpf_ktime_get_ns];" + "r0 &= 0xff;" + "r8 = r0;" + "r9 = r0;" + /* clear r0 id */ + "r0 = 0;" + /* force checkpoint */ + "goto +0;" + "r3 = r10;" + /* force r7 to be precise, this also marks r6 */ + "r3 += r7;" + /* force r9 to be precise, this also marks r8 */ + "r3 += r9;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +/* Verify that check_ids() is used by regsafe() for scalars. + * + * r9 = ... some pointer with range X ... + * r6 = ... unbound scalar ID=a ... + * r7 = ... unbound scalar ID=b ... + * if (r6 > r7) goto +1 + * r7 = r6 + * if (r7 > X) goto exit + * r9 += r6 + * ... access memory using r9 ... + * + * The memory access is safe only if r7 is bounded, + * which is true for one branch and not true for another. + */ +SEC("socket") +__failure __msg("register with unbounded min value") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void check_ids_in_regsafe(void) +{ + asm volatile ( + /* Bump allocated stack */ + "r1 = 0;" + "*(u64*)(r10 - 8) = r1;" + /* r9 = pointer to stack */ + "r9 = r10;" + "r9 += -8;" + /* r7 = ktime_get_ns() */ + "call %[bpf_ktime_get_ns];" + "r7 = r0;" + /* r6 = ktime_get_ns() */ + "call %[bpf_ktime_get_ns];" + "r6 = r0;" + /* if r6 > r7 is an unpredictable jump */ + "if r6 > r7 goto l1_%=;" + "r7 = r6;" +"l1_%=:" + /* if r7 > 4 ...; transfers range to r6 on one execution path + * but does not transfer on another + */ + "if r7 > 4 goto l2_%=;" + /* Access memory at r9[r6], r6 is not always bounded */ + "r9 += r6;" + "r0 = *(u8*)(r9 + 0);" +"l2_%=:" + "r0 = 0;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +/* Similar to check_ids_in_regsafe. + * The l0 could be reached in two states: + * + * (1) r6{.id=A}, r7{.id=A}, r8{.id=B} + * (2) r6{.id=B}, r7{.id=A}, r8{.id=B} + * + * Where (2) is not safe, as "r7 > 4" check won't propagate range for it. + * This example would be considered safe without changes to + * mark_chain_precision() to track scalar values with equal IDs. + */ +SEC("socket") +__failure __msg("register with unbounded min value") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void check_ids_in_regsafe_2(void) +{ + asm volatile ( + /* Bump allocated stack */ + "r1 = 0;" + "*(u64*)(r10 - 8) = r1;" + /* r9 = pointer to stack */ + "r9 = r10;" + "r9 += -8;" + /* r8 = ktime_get_ns() */ + "call %[bpf_ktime_get_ns];" + "r8 = r0;" + /* r7 = ktime_get_ns() */ + "call %[bpf_ktime_get_ns];" + "r7 = r0;" + /* r6 = ktime_get_ns() */ + "call %[bpf_ktime_get_ns];" + "r6 = r0;" + /* scratch .id from r0 */ + "r0 = 0;" + /* if r6 > r7 is an unpredictable jump */ + "if r6 > r7 goto l1_%=;" + /* tie r6 and r7 .id */ + "r6 = r7;" +"l0_%=:" + /* if r7 > 4 exit(0) */ + "if r7 > 4 goto l2_%=;" + /* Access memory at r9[r6] */ + "r9 += r6;" + "r0 = *(u8*)(r9 + 0);" +"l2_%=:" + "r0 = 0;" + "exit;" +"l1_%=:" + /* tie r6 and r8 .id */ + "r6 = r8;" + "goto l0_%=;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +/* Check that scalar IDs *are not* generated on register to register + * assignments if source register is a constant. + * + * If such IDs *are* generated the 'l1' below would be reached in + * two states: + * + * (1) r1{.id=A}, r2{.id=A} + * (2) r1{.id=C}, r2{.id=C} + * + * Thus forcing 'if r1 == r2' verification twice. + */ +SEC("socket") +__success __log_level(2) +__msg("11: (1d) if r3 == r4 goto pc+0") +__msg("frame 0: propagating r3,r4") +__msg("11: safe") +__msg("processed 15 insns") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void no_scalar_id_for_const(void) +{ + asm volatile ( + "call %[bpf_ktime_get_ns];" + /* unpredictable jump */ + "if r0 > 7 goto l0_%=;" + /* possibly generate same scalar ids for r3 and r4 */ + "r1 = 0;" + "r1 = r1;" + "r3 = r1;" + "r4 = r1;" + "goto l1_%=;" +"l0_%=:" + /* possibly generate different scalar ids for r3 and r4 */ + "r1 = 0;" + "r2 = 0;" + "r3 = r1;" + "r4 = r2;" +"l1_%=:" + /* predictable jump, marks r3 and r4 precise */ + "if r3 == r4 goto +0;" + "r0 = 0;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +/* Same as no_scalar_id_for_const() but for 32-bit values */ +SEC("socket") +__success __log_level(2) +__msg("11: (1e) if w3 == w4 goto pc+0") +__msg("frame 0: propagating r3,r4") +__msg("11: safe") +__msg("processed 15 insns") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void no_scalar_id_for_const32(void) +{ + asm volatile ( + "call %[bpf_ktime_get_ns];" + /* unpredictable jump */ + "if r0 > 7 goto l0_%=;" + /* possibly generate same scalar ids for r3 and r4 */ + "w1 = 0;" + "w1 = w1;" + "w3 = w1;" + "w4 = w1;" + "goto l1_%=;" +"l0_%=:" + /* possibly generate different scalar ids for r3 and r4 */ + "w1 = 0;" + "w2 = 0;" + "w3 = w1;" + "w4 = w2;" +"l1_%=:" + /* predictable jump, marks r1 and r2 precise */ + "if w3 == w4 goto +0;" + "r0 = 0;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +/* Check that unique scalar IDs are ignored when new verifier state is + * compared to cached verifier state. For this test: + * - cached state has no id on r1 + * - new state has a unique id on r1 + */ +SEC("socket") +__success __log_level(2) +__msg("6: (25) if r6 > 0x7 goto pc+1") +__msg("7: (57) r1 &= 255") +__msg("8: (bf) r2 = r10") +__msg("from 6 to 8: safe") +__msg("processed 12 insns") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void ignore_unique_scalar_ids_cur(void) +{ + asm volatile ( + "call %[bpf_ktime_get_ns];" + "r6 = r0;" + "call %[bpf_ktime_get_ns];" + "r0 &= 0xff;" + /* r1.id == r0.id */ + "r1 = r0;" + /* make r1.id unique */ + "r0 = 0;" + "if r6 > 7 goto l0_%=;" + /* clear r1 id, but keep the range compatible */ + "r1 &= 0xff;" +"l0_%=:" + /* get here in two states: + * - first: r1 has no id (cached state) + * - second: r1 has a unique id (should be considered equivalent) + */ + "r2 = r10;" + "r2 += r1;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +/* Check that unique scalar IDs are ignored when new verifier state is + * compared to cached verifier state. For this test: + * - cached state has a unique id on r1 + * - new state has no id on r1 + */ +SEC("socket") +__success __log_level(2) +__msg("6: (25) if r6 > 0x7 goto pc+1") +__msg("7: (05) goto pc+1") +__msg("9: (bf) r2 = r10") +__msg("9: safe") +__msg("processed 13 insns") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void ignore_unique_scalar_ids_old(void) +{ + asm volatile ( + "call %[bpf_ktime_get_ns];" + "r6 = r0;" + "call %[bpf_ktime_get_ns];" + "r0 &= 0xff;" + /* r1.id == r0.id */ + "r1 = r0;" + /* make r1.id unique */ + "r0 = 0;" + "if r6 > 7 goto l1_%=;" + "goto l0_%=;" +"l1_%=:" + /* clear r1 id, but keep the range compatible */ + "r1 &= 0xff;" +"l0_%=:" + /* get here in two states: + * - first: r1 has a unique id (cached state) + * - second: r1 has no id (should be considered equivalent) + */ + "r2 = r10;" + "r2 += r1;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +/* Check that two different scalar IDs in a verified state can't be + * mapped to the same scalar ID in current state. + */ +SEC("socket") +__success __log_level(2) +/* The exit instruction should be reachable from two states, + * use two matches and "processed .. insns" to ensure this. + */ +__msg("13: (95) exit") +__msg("13: (95) exit") +__msg("processed 18 insns") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void two_old_ids_one_cur_id(void) +{ + asm volatile ( + /* Give unique scalar IDs to r{6,7} */ + "call %[bpf_ktime_get_ns];" + "r0 &= 0xff;" + "r6 = r0;" + "call %[bpf_ktime_get_ns];" + "r0 &= 0xff;" + "r7 = r0;" + "r0 = 0;" + /* Maybe make r{6,7} IDs identical */ + "if r6 > r7 goto l0_%=;" + "goto l1_%=;" +"l0_%=:" + "r6 = r7;" +"l1_%=:" + /* Mark r{6,7} precise. + * Get here in two states: + * - first: r6{.id=A}, r7{.id=B} (cached state) + * - second: r6{.id=A}, r7{.id=A} + * Currently we don't want to consider such states equivalent. + * Thus "exit;" would be verified twice. + */ + "r2 = r10;" + "r2 += r6;" + "r2 += r7;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/vrf_socket_lookup.c b/tools/testing/selftests/bpf/progs/vrf_socket_lookup.c new file mode 100644 index 000000000000..bcfb6feb38c0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/vrf_socket_lookup.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#include <linux/ip.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/pkt_cls.h> +#include <stdbool.h> + +int lookup_status; +bool test_xdp; +bool tcp_skc; + +#define CUR_NS BPF_F_CURRENT_NETNS + +static void socket_lookup(void *ctx, void *data_end, void *data) +{ + struct ethhdr *eth = data; + struct bpf_sock_tuple *tp; + struct bpf_sock *sk; + struct iphdr *iph; + int tplen; + + if (eth + 1 > data_end) + return; + + if (eth->h_proto != bpf_htons(ETH_P_IP)) + return; + + iph = (struct iphdr *)(eth + 1); + if (iph + 1 > data_end) + return; + + tp = (struct bpf_sock_tuple *)&iph->saddr; + tplen = sizeof(tp->ipv4); + if ((void *)tp + tplen > data_end) + return; + + switch (iph->protocol) { + case IPPROTO_TCP: + if (tcp_skc) + sk = bpf_skc_lookup_tcp(ctx, tp, tplen, CUR_NS, 0); + else + sk = bpf_sk_lookup_tcp(ctx, tp, tplen, CUR_NS, 0); + break; + case IPPROTO_UDP: + sk = bpf_sk_lookup_udp(ctx, tp, tplen, CUR_NS, 0); + break; + default: + return; + } + + lookup_status = 0; + + if (sk) { + bpf_sk_release(sk); + lookup_status = 1; + } +} + +SEC("tc") +int tc_socket_lookup(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + + if (test_xdp) + return TC_ACT_UNSPEC; + + socket_lookup(skb, data_end, data); + return TC_ACT_UNSPEC; +} + +SEC("xdp") +int xdp_socket_lookup(struct xdp_md *xdp) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + + if (!test_xdp) + return XDP_PASS; + + socket_lookup(xdp, data_end, data); + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 71704a38cac3..31f1c935cd07 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1341,45 +1341,46 @@ static bool cmp_str_seq(const char *log, const char *exp) return true; } -static int get_xlated_program(int fd_prog, struct bpf_insn **buf, int *cnt) +static struct bpf_insn *get_xlated_program(int fd_prog, int *cnt) { + __u32 buf_element_size = sizeof(struct bpf_insn); struct bpf_prog_info info = {}; __u32 info_len = sizeof(info); __u32 xlated_prog_len; - __u32 buf_element_size = sizeof(struct bpf_insn); + struct bpf_insn *buf; if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) { perror("bpf_prog_get_info_by_fd failed"); - return -1; + return NULL; } xlated_prog_len = info.xlated_prog_len; if (xlated_prog_len % buf_element_size) { printf("Program length %d is not multiple of %d\n", xlated_prog_len, buf_element_size); - return -1; + return NULL; } *cnt = xlated_prog_len / buf_element_size; - *buf = calloc(*cnt, buf_element_size); + buf = calloc(*cnt, buf_element_size); if (!buf) { perror("can't allocate xlated program buffer"); - return -ENOMEM; + return NULL; } bzero(&info, sizeof(info)); info.xlated_prog_len = xlated_prog_len; - info.xlated_prog_insns = (__u64)(unsigned long)*buf; + info.xlated_prog_insns = (__u64)(unsigned long)buf; if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) { perror("second bpf_prog_get_info_by_fd failed"); goto out_free_buf; } - return 0; + return buf; out_free_buf: - free(*buf); - return -1; + free(buf); + return NULL; } static bool is_null_insn(struct bpf_insn *insn) @@ -1512,7 +1513,8 @@ static bool check_xlated_program(struct bpf_test *test, int fd_prog) if (!check_expected && !check_unexpected) goto out; - if (get_xlated_program(fd_prog, &buf, &cnt)) { + buf = get_xlated_program(fd_prog, &cnt); + if (!buf) { printf("FAIL: can't get xlated program\n"); result = false; goto out; diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c index b8c0aae8e7ec..99272bb890da 100644 --- a/tools/testing/selftests/bpf/verifier/precise.c +++ b/tools/testing/selftests/bpf/verifier/precise.c @@ -46,7 +46,7 @@ mark_precise: frame0: regs=r2 stack= before 20\ mark_precise: frame0: parent state regs=r2 stack=:\ mark_precise: frame0: last_idx 19 first_idx 10\ - mark_precise: frame0: regs=r2 stack= before 19\ + mark_precise: frame0: regs=r2,r9 stack= before 19\ mark_precise: frame0: regs=r9 stack= before 18\ mark_precise: frame0: regs=r8,r9 stack= before 17\ mark_precise: frame0: regs=r0,r9 stack= before 15\ @@ -106,10 +106,10 @@ mark_precise: frame0: regs=r2 stack= before 22\ mark_precise: frame0: parent state regs=r2 stack=:\ mark_precise: frame0: last_idx 20 first_idx 20\ - mark_precise: frame0: regs=r2 stack= before 20\ - mark_precise: frame0: parent state regs=r2 stack=:\ + mark_precise: frame0: regs=r2,r9 stack= before 20\ + mark_precise: frame0: parent state regs=r2,r9 stack=:\ mark_precise: frame0: last_idx 19 first_idx 17\ - mark_precise: frame0: regs=r2 stack= before 19\ + mark_precise: frame0: regs=r2,r9 stack= before 19\ mark_precise: frame0: regs=r9 stack= before 18\ mark_precise: frame0: regs=r8,r9 stack= before 17\ mark_precise: frame0: parent state regs= stack=:", diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 773dd770a567..13561e5bc0cd 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -809,7 +809,7 @@ run_tests_disconnect() cat $cin $cin $cin > "$cin".disconnect - # force do_transfer to cope with the multiple tranmissions + # force do_transfer to cope with the multiple transmissions sin="$cin.disconnect" cin="$cin.disconnect" cin_disconnect="$old_cin" diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index a7973d6a40a0..e6c9d5451c5b 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -50,6 +50,10 @@ TEST_NAME="" nr_blank=40 export FAILING_LINKS="" +export test_linkfail=0 +export addr_nr_ns1=0 +export addr_nr_ns2=0 +export sflags="" # generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) || # (ip6 && (ip6[74] & 0xf0) == 0x30)'" @@ -589,6 +593,26 @@ wait_rm_addr() done } +rm_sf_count() +{ + get_counter "${1}" "MPTcpExtRmSubflow" +} + +# $1: ns, $2: old rm_sf counter in $ns +wait_rm_sf() +{ + local ns="${1}" + local old_cnt="${2}" + local cnt + + local i + for i in $(seq 10); do + cnt=$(rm_sf_count ${ns}) + [ "$cnt" = "${old_cnt}" ] || break + sleep 0.1 + done +} + wait_mpj() { local ns="${1}" @@ -798,135 +822,11 @@ pm_nl_check_endpoint() fi } -do_transfer() +pm_nl_set_endpoint() { local listener_ns="$1" local connector_ns="$2" - local cl_proto="$3" - local srv_proto="$4" - local connect_addr="$5" - local test_link_fail="$6" - local addr_nr_ns1="$7" - local addr_nr_ns2="$8" - local speed="$9" - local sflags="${10}" - - local port=$((10000 + TEST_COUNT - 1)) - local cappid - local userspace_pm=0 - - :> "$cout" - :> "$sout" - :> "$capout" - - if [ $capture -eq 1 ]; then - local capuser - if [ -z $SUDO_USER ] ; then - capuser="" - else - capuser="-Z $SUDO_USER" - fi - - capfile=$(printf "mp_join-%02u-%s.pcap" "$TEST_COUNT" "${listener_ns}") - - echo "Capturing traffic for test $TEST_COUNT into $capfile" - ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & - cappid=$! - - sleep 1 - fi - - NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ - nstat -n - NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ - nstat -n - - local extra_args - if [ $speed = "fast" ]; then - extra_args="-j" - elif [ $speed = "slow" ]; then - extra_args="-r 50" - elif [[ $speed = "speed_"* ]]; then - extra_args="-r ${speed:6}" - fi - - if [[ "${addr_nr_ns1}" = "userspace_"* ]]; then - userspace_pm=1 - addr_nr_ns1=${addr_nr_ns1:10} - fi - - local flags="subflow" - local extra_cl_args="" - local extra_srv_args="" - local trunc_size="" - if [[ "${addr_nr_ns2}" = "fastclose_"* ]]; then - if [ ${test_link_fail} -le 1 ]; then - echo "fastclose tests need test_link_fail argument" - fail_test - return 1 - fi - - # disconnect - trunc_size=${test_link_fail} - local side=${addr_nr_ns2:10} - - if [ ${side} = "client" ]; then - extra_cl_args="-f ${test_link_fail}" - extra_srv_args="-f -1" - elif [ ${side} = "server" ]; then - extra_srv_args="-f ${test_link_fail}" - extra_cl_args="-f -1" - else - echo "wrong/unknown fastclose spec ${side}" - fail_test - return 1 - fi - addr_nr_ns2=0 - elif [[ "${addr_nr_ns2}" = "userspace_"* ]]; then - userspace_pm=1 - addr_nr_ns2=${addr_nr_ns2:10} - elif [[ "${addr_nr_ns2}" = "fullmesh_"* ]]; then - flags="${flags},fullmesh" - addr_nr_ns2=${addr_nr_ns2:9} - fi - - extra_srv_args="$extra_args $extra_srv_args" - if [ "$test_link_fail" -gt 1 ];then - timeout ${timeout_test} \ - ip netns exec ${listener_ns} \ - ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ - $extra_srv_args "::" < "$sinfail" > "$sout" & - else - timeout ${timeout_test} \ - ip netns exec ${listener_ns} \ - ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ - $extra_srv_args "::" < "$sin" > "$sout" & - fi - local spid=$! - - wait_local_port_listen "${listener_ns}" "${port}" - - extra_cl_args="$extra_args $extra_cl_args" - if [ "$test_link_fail" -eq 0 ];then - timeout ${timeout_test} \ - ip netns exec ${connector_ns} \ - ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ - $extra_cl_args $connect_addr < "$cin" > "$cout" & - elif [ "$test_link_fail" -eq 1 ] || [ "$test_link_fail" -eq 2 ];then - ( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \ - tee "$cinsent" | \ - timeout ${timeout_test} \ - ip netns exec ${connector_ns} \ - ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ - $extra_cl_args $connect_addr > "$cout" & - else - tee "$cinsent" < "$cinfail" | \ - timeout ${timeout_test} \ - ip netns exec ${connector_ns} \ - ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ - $extra_cl_args $connect_addr > "$cout" & - fi - local cpid=$! + local connect_addr="$3" # let the mptcp subflow be established in background before # do endpoint manipulation @@ -938,7 +838,6 @@ do_transfer() local counter=2 local add_nr_ns1=${addr_nr_ns1} local id=10 - local tk while [ $add_nr_ns1 -gt 0 ]; do local addr if is_v6 "${connect_addr}"; then @@ -946,24 +845,7 @@ do_transfer() else addr="10.0.$counter.1" fi - if [ $userspace_pm -eq 0 ]; then - pm_nl_add_endpoint $ns1 $addr flags signal - else - tk=$(grep "type:1," "$evts_ns1" | - sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q') - ip netns exec ${listener_ns} ./pm_nl_ctl ann $addr token $tk id $id - sleep 1 - sp=$(grep "type:10" "$evts_ns1" | - sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') - da=$(grep "type:10" "$evts_ns1" | - sed -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q') - dp=$(grep "type:10" "$evts_ns1" | - sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q') - ip netns exec ${listener_ns} ./pm_nl_ctl rem token $tk id $id - ip netns exec ${listener_ns} ./pm_nl_ctl dsf lip "::ffff:$addr" \ - lport $sp rip $da rport $dp token $tk - fi - + pm_nl_add_endpoint $ns1 $addr flags signal counter=$((counter + 1)) add_nr_ns1=$((add_nr_ns1 - 1)) id=$((id + 1)) @@ -1008,7 +890,6 @@ do_transfer() local add_nr_ns2=${addr_nr_ns2} local counter=3 local id=20 - local tk da dp sp while [ $add_nr_ns2 -gt 0 ]; do local addr if is_v6 "${connect_addr}"; then @@ -1016,21 +897,7 @@ do_transfer() else addr="10.0.$counter.2" fi - if [ $userspace_pm -eq 0 ]; then - pm_nl_add_endpoint $ns2 $addr flags $flags - else - tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") - da=$(sed -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evts_ns2") - dp=$(sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") - ip netns exec ${connector_ns} ./pm_nl_ctl csf lip $addr lid $id \ - rip $da rport $dp token $tk - sleep 1 - sp=$(grep "type:10" "$evts_ns2" | - sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') - ip netns exec ${connector_ns} ./pm_nl_ctl rem token $tk id $id - ip netns exec ${connector_ns} ./pm_nl_ctl dsf lip $addr lport $sp \ - rip $da rport $dp token $tk - fi + pm_nl_add_endpoint $ns2 $addr flags $flags counter=$((counter + 1)) add_nr_ns2=$((add_nr_ns2 - 1)) id=$((id + 1)) @@ -1099,6 +966,126 @@ do_transfer() done done fi +} + +do_transfer() +{ + local listener_ns="$1" + local connector_ns="$2" + local cl_proto="$3" + local srv_proto="$4" + local connect_addr="$5" + local speed="$6" + + local port=$((10000 + TEST_COUNT - 1)) + local cappid + + :> "$cout" + :> "$sout" + :> "$capout" + + if [ $capture -eq 1 ]; then + local capuser + if [ -z $SUDO_USER ] ; then + capuser="" + else + capuser="-Z $SUDO_USER" + fi + + capfile=$(printf "mp_join-%02u-%s.pcap" "$TEST_COUNT" "${listener_ns}") + + echo "Capturing traffic for test $TEST_COUNT into $capfile" + ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & + cappid=$! + + sleep 1 + fi + + NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ + nstat -n + NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ + nstat -n + + local extra_args + if [ $speed = "fast" ]; then + extra_args="-j" + elif [ $speed = "slow" ]; then + extra_args="-r 50" + elif [[ $speed = "speed_"* ]]; then + extra_args="-r ${speed:6}" + fi + + local flags="subflow" + local extra_cl_args="" + local extra_srv_args="" + local trunc_size="" + if [[ "${addr_nr_ns2}" = "fastclose_"* ]]; then + if [ ${test_linkfail} -le 1 ]; then + echo "fastclose tests need test_linkfail argument" + fail_test + return 1 + fi + + # disconnect + trunc_size=${test_linkfail} + local side=${addr_nr_ns2:10} + + if [ ${side} = "client" ]; then + extra_cl_args="-f ${test_linkfail}" + extra_srv_args="-f -1" + elif [ ${side} = "server" ]; then + extra_srv_args="-f ${test_linkfail}" + extra_cl_args="-f -1" + else + echo "wrong/unknown fastclose spec ${side}" + fail_test + return 1 + fi + addr_nr_ns2=0 + elif [[ "${addr_nr_ns2}" = "fullmesh_"* ]]; then + flags="${flags},fullmesh" + addr_nr_ns2=${addr_nr_ns2:9} + fi + + extra_srv_args="$extra_args $extra_srv_args" + if [ "$test_linkfail" -gt 1 ];then + timeout ${timeout_test} \ + ip netns exec ${listener_ns} \ + ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ + $extra_srv_args "::" < "$sinfail" > "$sout" & + else + timeout ${timeout_test} \ + ip netns exec ${listener_ns} \ + ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ + $extra_srv_args "::" < "$sin" > "$sout" & + fi + local spid=$! + + wait_local_port_listen "${listener_ns}" "${port}" + + extra_cl_args="$extra_args $extra_cl_args" + if [ "$test_linkfail" -eq 0 ];then + timeout ${timeout_test} \ + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_cl_args $connect_addr < "$cin" > "$cout" & + elif [ "$test_linkfail" -eq 1 ] || [ "$test_linkfail" -eq 2 ];then + ( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \ + tee "$cinsent" | \ + timeout ${timeout_test} \ + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_cl_args $connect_addr > "$cout" & + else + tee "$cinsent" < "$cinfail" | \ + timeout ${timeout_test} \ + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_cl_args $connect_addr > "$cout" & + fi + local cpid=$! + + pm_nl_set_endpoint $listener_ns $connector_ns $connect_addr wait $cpid local retc=$? @@ -1129,13 +1116,13 @@ do_transfer() return 1 fi - if [ "$test_link_fail" -gt 1 ];then + if [ "$test_linkfail" -gt 1 ];then check_transfer $sinfail $cout "file received by client" $trunc_size else check_transfer $sin $cout "file received by client" $trunc_size fi retc=$? - if [ "$test_link_fail" -eq 0 ];then + if [ "$test_linkfail" -eq 0 ];then check_transfer $cin $sout "file received by server" $trunc_size else check_transfer $cinsent $sout "file received by server" $trunc_size @@ -1168,11 +1155,7 @@ run_tests() local listener_ns="$1" local connector_ns="$2" local connect_addr="$3" - local test_linkfail="${4:-0}" - local addr_nr_ns1="${5:-0}" - local addr_nr_ns2="${6:-0}" - local speed="${7:-fast}" - local sflags="${8:-""}" + local speed="${4:-fast}" local size @@ -1216,8 +1199,7 @@ run_tests() make_file "$sinfail" "server" $size fi - do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \ - ${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${sflags} + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${speed} } dump_stats() @@ -1854,31 +1836,26 @@ chk_subflow_nr() chk_mptcp_info() { - local nr_info=$1 - local info + local info1=$1 + local exp1=$2 + local info2=$3 + local exp2=$4 local cnt1 local cnt2 local dump_stats - if [[ $nr_info = "subflows_"* ]]; then - info="subflows" - nr_info=${nr_info:9} - else - echo "[fail] unsupported argument: $nr_info" - fail_test - return 1 - fi + printf "%-${nr_blank}s %-30s" " " "mptcp_info $info1:$info2=$exp1:$exp2" - printf "%-${nr_blank}s %-30s" " " "mptcp_info $info=$nr_info" - - cnt1=$(ss -N $ns1 -inmHM | grep "$info:" | - sed -n 's/.*\('"$info"':\)\([[:digit:]]*\).*$/\2/p;q') + cnt1=$(ss -N $ns1 -inmHM | grep "$info1:" | + sed -n 's/.*\('"$info1"':\)\([[:digit:]]*\).*$/\2/p;q') + cnt2=$(ss -N $ns2 -inmHM | grep "$info2:" | + sed -n 's/.*\('"$info2"':\)\([[:digit:]]*\).*$/\2/p;q') + # 'ss' only display active connections and counters that are not 0. [ -z "$cnt1" ] && cnt1=0 - cnt2=$(ss -N $ns2 -inmHM | grep "$info:" | - sed -n 's/.*\('"$info"':\)\([[:digit:]]*\).*$/\2/p;q') [ -z "$cnt2" ] && cnt2=0 - if [ "$cnt1" != "$nr_info" ] || [ "$cnt2" != "$nr_info" ]; then - echo "[fail] got $cnt1:$cnt2 $info expected $nr_info" + + if [ "$cnt1" != "$exp1" ] || [ "$cnt2" != "$exp2" ]; then + echo "[fail] got $cnt1:$cnt2 $info1:$info2 expected $exp1:$exp2" fail_test dump_stats=1 else @@ -2011,7 +1988,7 @@ subflows_error_tests() pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 0 0 0 fi @@ -2022,7 +1999,7 @@ subflows_error_tests() pm_nl_set_limits $ns2 0 2 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 1 1 1 fi @@ -2033,7 +2010,7 @@ subflows_error_tests() pm_nl_set_limits $ns2 0 2 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 1 1 1 fi @@ -2045,7 +2022,7 @@ subflows_error_tests() pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow & + run_tests $ns1 $ns2 10.0.1.1 slow & # mpj subflow will be in TW after the reset wait_attempt_fail $ns2 @@ -2144,7 +2121,7 @@ signal_address_tests() # the peer could possibly miss some addr notification, allow retransmission ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1 - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + run_tests $ns1 $ns2 10.0.1.1 slow # It is not directly linked to the commit introducing this # symbol but for the parent one which is linked anyway. @@ -2176,7 +2153,8 @@ link_failure_tests() pm_nl_set_limits $ns2 1 3 pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow pm_nl_add_endpoint $ns2 10.0.4.2 dev ns2eth4 flags subflow - run_tests $ns1 $ns2 10.0.1.1 1 + test_linkfail=1 \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 chk_add_nr 1 1 chk_stale_nr $ns2 1 5 1 @@ -2191,7 +2169,8 @@ link_failure_tests() pm_nl_set_limits $ns2 1 3 pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow pm_nl_add_endpoint $ns2 10.0.4.2 dev ns2eth4 flags subflow - run_tests $ns1 $ns2 10.0.1.1 2 + test_linkfail=2 \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 chk_add_nr 1 1 chk_stale_nr $ns2 1 -1 1 @@ -2204,9 +2183,9 @@ link_failure_tests() pm_nl_set_limits $ns1 0 2 pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal pm_nl_set_limits $ns2 1 2 - FAILING_LINKS="1" pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup - run_tests $ns1 $ns2 10.0.1.1 1 + FAILING_LINKS="1" test_linkfail=1 \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 chk_add_nr 1 1 chk_link_usage $ns2 ns2eth3 $cinsent 0 @@ -2220,8 +2199,8 @@ link_failure_tests() pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal pm_nl_set_limits $ns2 1 2 pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup - FAILING_LINKS="1 2" - run_tests $ns1 $ns2 10.0.1.1 1 + FAILING_LINKS="1 2" test_linkfail=1 \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 chk_add_nr 1 1 chk_stale_nr $ns2 2 4 2 @@ -2236,8 +2215,8 @@ link_failure_tests() pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal pm_nl_set_limits $ns2 1 3 pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup - FAILING_LINKS="1 2" - run_tests $ns1 $ns2 10.0.1.1 2 + FAILING_LINKS="1 2" test_linkfail=2 \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 chk_add_nr 1 1 chk_stale_nr $ns2 1 -1 2 @@ -2252,7 +2231,7 @@ add_addr_timeout_tests() pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 1 1 1 chk_add_tx_nr 4 4 chk_add_nr 4 0 @@ -2263,7 +2242,7 @@ add_addr_timeout_tests() pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal - run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow + run_tests $ns1 $ns2 dead:beef:1::1 slow chk_join_nr 1 1 1 chk_add_nr 4 0 fi @@ -2274,7 +2253,7 @@ add_addr_timeout_tests() pm_nl_add_endpoint $ns1 10.0.2.1 flags signal pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_set_limits $ns2 2 2 - run_tests $ns1 $ns2 10.0.1.1 0 0 0 speed_10 + run_tests $ns1 $ns2 10.0.1.1 speed_10 chk_join_nr 2 2 2 chk_add_nr 8 0 fi @@ -2285,7 +2264,7 @@ add_addr_timeout_tests() pm_nl_add_endpoint $ns1 10.0.12.1 flags signal pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_set_limits $ns2 2 2 - run_tests $ns1 $ns2 10.0.1.1 0 0 0 speed_10 + run_tests $ns1 $ns2 10.0.1.1 speed_10 chk_join_nr 1 1 1 chk_add_nr 8 0 fi @@ -2298,7 +2277,8 @@ remove_tests() pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 0 -1 slow + addr_nr_ns2=-1 \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 1 1 1 chk_rm_tx_nr 1 chk_rm_nr 1 1 @@ -2310,7 +2290,8 @@ remove_tests() pm_nl_set_limits $ns2 0 2 pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 0 -2 slow + addr_nr_ns2=-2 \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 2 2 2 chk_rm_nr 2 2 fi @@ -2320,7 +2301,8 @@ remove_tests() pm_nl_set_limits $ns1 0 1 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal pm_nl_set_limits $ns2 1 1 - run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow + addr_nr_ns1=-1 \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 1 1 1 chk_add_nr 1 1 chk_rm_nr 1 1 invert @@ -2332,7 +2314,8 @@ remove_tests() pm_nl_add_endpoint $ns1 10.0.2.1 flags signal pm_nl_set_limits $ns2 1 2 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 -1 -1 slow + addr_nr_ns1=-1 addr_nr_ns2=-1 \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 2 2 2 chk_add_nr 1 1 chk_rm_nr 1 1 @@ -2345,7 +2328,8 @@ remove_tests() pm_nl_set_limits $ns2 1 3 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 -1 -2 speed_10 + addr_nr_ns1=-1 addr_nr_ns2=-2 \ + run_tests $ns1 $ns2 10.0.1.1 speed_10 chk_join_nr 3 3 3 chk_add_nr 1 1 chk_rm_nr 2 2 @@ -2358,7 +2342,8 @@ remove_tests() pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_add_endpoint $ns1 10.0.4.1 flags signal pm_nl_set_limits $ns2 3 3 - run_tests $ns1 $ns2 10.0.1.1 0 -3 0 speed_10 + addr_nr_ns1=-3 \ + run_tests $ns1 $ns2 10.0.1.1 speed_10 chk_join_nr 3 3 3 chk_add_nr 3 3 chk_rm_nr 3 3 invert @@ -2371,7 +2356,8 @@ remove_tests() pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_add_endpoint $ns1 10.0.14.1 flags signal pm_nl_set_limits $ns2 3 3 - run_tests $ns1 $ns2 10.0.1.1 0 -3 0 speed_10 + addr_nr_ns1=-3 \ + run_tests $ns1 $ns2 10.0.1.1 speed_10 chk_join_nr 1 1 1 chk_add_nr 3 3 chk_rm_nr 3 1 invert @@ -2384,7 +2370,8 @@ remove_tests() pm_nl_set_limits $ns2 1 3 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow + addr_nr_ns1=-8 addr_nr_ns2=-8 \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 3 3 3 chk_add_nr 1 1 chk_rm_nr 1 3 invert simult @@ -2397,7 +2384,8 @@ remove_tests() pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow id 150 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow + addr_nr_ns1=-8 addr_nr_ns2=-8 \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 3 3 3 if mptcp_lib_kversion_ge 5.18; then @@ -2415,7 +2403,8 @@ remove_tests() pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_add_endpoint $ns1 10.0.4.1 flags signal pm_nl_set_limits $ns2 3 3 - run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow + addr_nr_ns1=-8 addr_nr_ns2=-8 \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 3 3 3 chk_add_nr 3 3 chk_rm_nr 3 3 invert simult @@ -2428,7 +2417,8 @@ remove_tests() pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_add_endpoint $ns1 10.0.14.1 flags signal pm_nl_set_limits $ns2 3 3 - run_tests $ns1 $ns2 10.0.1.1 0 -8 0 slow + addr_nr_ns1=-8 \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 1 1 1 chk_add_nr 3 3 chk_rm_nr 3 1 invert @@ -2439,7 +2429,8 @@ remove_tests() pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 0 -9 slow + addr_nr_ns2=-9 \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 1 1 1 chk_rm_nr 1 1 fi @@ -2449,7 +2440,8 @@ remove_tests() pm_nl_set_limits $ns1 0 1 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal pm_nl_set_limits $ns2 1 1 - run_tests $ns1 $ns2 10.0.1.1 0 -9 0 slow + addr_nr_ns1=-9 \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 1 1 1 chk_add_nr 1 1 chk_rm_nr 1 1 invert @@ -2462,7 +2454,8 @@ add_tests() if reset "add single subflow"; then pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 - run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow + addr_nr_ns2=1 \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 1 1 1 fi @@ -2470,7 +2463,8 @@ add_tests() if reset "add signal address"; then pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 1 1 - run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow + addr_nr_ns1=1 \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 1 1 1 chk_add_nr 1 1 fi @@ -2479,7 +2473,8 @@ add_tests() if reset "add multiple subflows"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 0 2 - run_tests $ns1 $ns2 10.0.1.1 0 0 2 slow + addr_nr_ns2=2 \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 2 2 2 fi @@ -2487,7 +2482,8 @@ add_tests() if reset "add multiple subflows IPv6"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 0 2 - run_tests $ns1 $ns2 dead:beef:1::1 0 0 2 slow + addr_nr_ns2=2 \ + run_tests $ns1 $ns2 dead:beef:1::1 slow chk_join_nr 2 2 2 fi @@ -2495,7 +2491,8 @@ add_tests() if reset "add multiple addresses IPv6"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 2 2 - run_tests $ns1 $ns2 dead:beef:1::1 0 2 0 slow + addr_nr_ns1=2 \ + run_tests $ns1 $ns2 dead:beef:1::1 slow chk_join_nr 2 2 2 chk_add_nr 2 2 fi @@ -2508,14 +2505,14 @@ ipv6_tests() pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow - run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow + run_tests $ns1 $ns2 dead:beef:1::1 slow chk_join_nr 1 1 1 fi # add_address, unused IPv6 if reset "unused signal address IPv6"; then pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal - run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow + run_tests $ns1 $ns2 dead:beef:1::1 slow chk_join_nr 0 0 0 chk_add_nr 1 1 fi @@ -2525,7 +2522,7 @@ ipv6_tests() pm_nl_set_limits $ns1 0 1 pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal pm_nl_set_limits $ns2 1 1 - run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow + run_tests $ns1 $ns2 dead:beef:1::1 slow chk_join_nr 1 1 1 chk_add_nr 1 1 fi @@ -2535,7 +2532,8 @@ ipv6_tests() pm_nl_set_limits $ns1 0 1 pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal pm_nl_set_limits $ns2 1 1 - run_tests $ns1 $ns2 dead:beef:1::1 0 -1 0 slow + addr_nr_ns1=-1 \ + run_tests $ns1 $ns2 dead:beef:1::1 slow chk_join_nr 1 1 1 chk_add_nr 1 1 chk_rm_nr 1 1 invert @@ -2547,7 +2545,8 @@ ipv6_tests() pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal pm_nl_set_limits $ns2 1 2 pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow - run_tests $ns1 $ns2 dead:beef:1::1 0 -1 -1 slow + addr_nr_ns1=-1 addr_nr_ns2=-1 \ + run_tests $ns1 $ns2 dead:beef:1::1 slow chk_join_nr 2 2 2 chk_add_nr 1 1 chk_rm_nr 1 1 @@ -2648,7 +2647,7 @@ mixed_tests() pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 0 0 0 fi @@ -2658,7 +2657,7 @@ mixed_tests() pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns1 10.0.1.1 flags signal - run_tests $ns1 $ns2 dead:beef:2::1 0 0 0 slow + run_tests $ns1 $ns2 dead:beef:2::1 slow chk_join_nr 1 1 1 fi @@ -2669,7 +2668,7 @@ mixed_tests() pm_nl_set_limits $ns2 1 4 pm_nl_add_endpoint $ns2 dead:beef:2::2 flags subflow,fullmesh pm_nl_add_endpoint $ns1 10.0.1.1 flags signal - run_tests $ns1 $ns2 dead:beef:2::1 0 0 0 slow + run_tests $ns1 $ns2 dead:beef:2::1 slow chk_join_nr 1 1 1 fi @@ -2681,7 +2680,8 @@ mixed_tests() pm_nl_set_limits $ns2 2 4 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal - run_tests $ns1 $ns2 dead:beef:1::1 0 0 fullmesh_1 slow + addr_nr_ns2=fullmesh_1 \ + run_tests $ns1 $ns2 dead:beef:1::1 slow chk_join_nr 4 4 4 fi } @@ -2694,7 +2694,8 @@ backup_tests() pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup + sflags=nobackup \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 1 1 1 chk_prio_nr 0 1 fi @@ -2705,7 +2706,8 @@ backup_tests() pm_nl_set_limits $ns1 0 1 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal pm_nl_set_limits $ns2 1 1 - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup + sflags=backup \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 1 1 1 chk_add_nr 1 1 chk_prio_nr 1 1 @@ -2717,7 +2719,8 @@ backup_tests() pm_nl_set_limits $ns1 0 1 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 pm_nl_set_limits $ns2 1 1 - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup + sflags=backup \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 1 1 1 chk_add_nr 1 1 chk_prio_nr 1 1 @@ -2726,7 +2729,7 @@ backup_tests() if reset "mpc backup" && continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 0 0 0 chk_prio_nr 0 1 fi @@ -2735,7 +2738,7 @@ backup_tests() continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 0 0 0 chk_prio_nr 1 1 fi @@ -2743,7 +2746,8 @@ backup_tests() if reset "mpc switch to backup" && continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup + sflags=backup \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 0 0 0 chk_prio_nr 0 1 fi @@ -2752,7 +2756,8 @@ backup_tests() continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup + sflags=backup \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 0 0 0 chk_prio_nr 1 1 fi @@ -2841,7 +2846,8 @@ add_addr_ports_tests() pm_nl_set_limits $ns1 0 1 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 pm_nl_set_limits $ns2 1 1 - run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow + addr_nr_ns1=-1 \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 1 1 1 chk_add_nr 1 1 1 chk_rm_nr 1 1 invert @@ -2857,7 +2863,8 @@ add_addr_ports_tests() pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 pm_nl_set_limits $ns2 1 2 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 -1 -1 slow + addr_nr_ns1=-1 addr_nr_ns2=-1 \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 2 2 2 chk_add_nr 1 1 1 chk_rm_nr 1 1 @@ -2870,7 +2877,8 @@ add_addr_ports_tests() pm_nl_set_limits $ns2 1 3 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 -8 -2 slow + addr_nr_ns1=-8 addr_nr_ns2=-2 \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 3 3 3 chk_add_nr 1 1 chk_rm_nr 1 3 invert simult @@ -3072,7 +3080,8 @@ fullmesh_tests() pm_nl_set_limits $ns2 1 4 pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,fullmesh pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,fullmesh - run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow + addr_nr_ns1=1 \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 4 4 4 chk_add_nr 1 1 fi @@ -3084,7 +3093,8 @@ fullmesh_tests() pm_nl_set_limits $ns1 1 3 pm_nl_set_limits $ns2 1 3 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal - run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow + addr_nr_ns2=fullmesh_1 \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 3 3 3 chk_add_nr 1 1 fi @@ -3096,7 +3106,8 @@ fullmesh_tests() pm_nl_set_limits $ns1 2 5 pm_nl_set_limits $ns2 1 5 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal - run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow + addr_nr_ns2=fullmesh_2 \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 5 5 5 chk_add_nr 1 1 fi @@ -3109,7 +3120,8 @@ fullmesh_tests() pm_nl_set_limits $ns1 2 4 pm_nl_set_limits $ns2 1 4 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal - run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow + addr_nr_ns2=fullmesh_2 \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 4 4 4 chk_add_nr 1 1 fi @@ -3120,7 +3132,8 @@ fullmesh_tests() pm_nl_set_limits $ns1 4 4 pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow pm_nl_set_limits $ns2 4 4 - run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow fullmesh + addr_nr_ns2=1 sflags=fullmesh \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 2 2 2 chk_rm_nr 0 1 fi @@ -3131,7 +3144,8 @@ fullmesh_tests() pm_nl_set_limits $ns1 4 4 pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow,fullmesh pm_nl_set_limits $ns2 4 4 - run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow nofullmesh + addr_nr_ns2=fullmesh_1 sflags=nofullmesh \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 2 2 2 chk_rm_nr 0 1 fi @@ -3142,7 +3156,8 @@ fullmesh_tests() pm_nl_set_limits $ns1 4 4 pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow pm_nl_set_limits $ns2 4 4 - run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow backup,fullmesh + addr_nr_ns2=1 sflags=backup,fullmesh \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 2 2 2 chk_prio_nr 0 1 chk_rm_nr 0 1 @@ -3154,7 +3169,8 @@ fullmesh_tests() pm_nl_set_limits $ns1 4 4 pm_nl_set_limits $ns2 4 4 pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup,fullmesh - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup,nofullmesh + sflags=nobackup,nofullmesh \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 2 2 2 chk_prio_nr 0 1 chk_rm_nr 0 1 @@ -3164,14 +3180,16 @@ fullmesh_tests() fastclose_tests() { if reset_check_counter "fastclose test" "MPTcpExtMPFastcloseTx"; then - run_tests $ns1 $ns2 10.0.1.1 1024 0 fastclose_client + test_linkfail=1024 addr_nr_ns2=fastclose_client \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 chk_fclose_nr 1 1 chk_rst_nr 1 1 invert fi if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then - run_tests $ns1 $ns2 10.0.1.1 1024 0 fastclose_server + test_linkfail=1024 addr_nr_ns2=fastclose_server \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 chk_fclose_nr 1 1 invert chk_rst_nr 1 1 @@ -3189,7 +3207,8 @@ fail_tests() { # single subflow if reset_with_fail "Infinite map" 1; then - run_tests $ns1 $ns2 10.0.1.1 128 + test_linkfail=128 \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)" chk_fail_nr 1 -1 invert fi @@ -3200,11 +3219,77 @@ fail_tests() pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 1024 + test_linkfail=1024 \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 1 0 1 1 0 "$(pedit_action_pkts)" fi } +userspace_pm_add_addr() +{ + local addr=$1 + local id=$2 + local tk + + tk=$(grep "type:1," "$evts_ns1" | + sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q') + ip netns exec $ns1 ./pm_nl_ctl ann $addr token $tk id $id + sleep 1 +} + +userspace_pm_rm_sf_addr_ns1() +{ + local addr=$1 + local id=$2 + local tk sp da dp + + tk=$(grep "type:1," "$evts_ns1" | + sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q') + sp=$(grep "type:10" "$evts_ns1" | + sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') + da=$(grep "type:10" "$evts_ns1" | + sed -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q') + dp=$(grep "type:10" "$evts_ns1" | + sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q') + ip netns exec $ns1 ./pm_nl_ctl rem token $tk id $id + ip netns exec $ns1 ./pm_nl_ctl dsf lip "::ffff:$addr" \ + lport $sp rip $da rport $dp token $tk + wait_rm_addr $ns1 1 + wait_rm_sf $ns1 1 +} + +userspace_pm_add_sf() +{ + local addr=$1 + local id=$2 + local tk da dp + + tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") + da=$(sed -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evts_ns2") + dp=$(sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") + ip netns exec $ns2 ./pm_nl_ctl csf lip $addr lid $id \ + rip $da rport $dp token $tk + sleep 1 +} + +userspace_pm_rm_sf_addr_ns2() +{ + local addr=$1 + local id=$2 + local tk da dp sp + + tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") + da=$(sed -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evts_ns2") + dp=$(sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") + sp=$(grep "type:10" "$evts_ns2" | + sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') + ip netns exec $ns2 ./pm_nl_ctl rem token $tk id $id + ip netns exec $ns2 ./pm_nl_ctl dsf lip $addr lport $sp \ + rip $da rport $dp token $tk + wait_rm_addr $ns2 1 + wait_rm_sf $ns2 1 +} + userspace_tests() { # userspace pm type prevents add_addr @@ -3260,7 +3345,8 @@ userspace_tests() pm_nl_set_limits $ns1 1 1 pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup + sflags=backup \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 1 1 0 chk_prio_nr 0 0 fi @@ -3273,7 +3359,8 @@ userspace_tests() pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 0 -1 slow + addr_nr_ns2=-1 \ + run_tests $ns1 $ns2 10.0.1.1 slow chk_join_nr 0 0 0 chk_rm_nr 0 0 fi @@ -3283,11 +3370,19 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns2 1 1 - run_tests $ns1 $ns2 10.0.1.1 0 userspace_1 0 slow + run_tests $ns1 $ns2 10.0.1.1 speed_10 & + local tests_pid=$! + wait_mpj $ns1 + userspace_pm_add_addr 10.0.2.1 10 chk_join_nr 1 1 1 chk_add_nr 1 1 + chk_mptcp_info subflows 1 subflows 1 + chk_mptcp_info add_addr_signal 1 add_addr_accepted 1 + userspace_pm_rm_sf_addr_ns1 10.0.2.1 10 chk_rm_nr 1 1 invert + chk_mptcp_info subflows 0 subflows 0 kill_events_pids + wait $tests_pid fi # userspace pm create destroy subflow @@ -3295,10 +3390,17 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - run_tests $ns1 $ns2 10.0.1.1 0 0 userspace_1 slow + run_tests $ns1 $ns2 10.0.1.1 speed_10 & + local tests_pid=$! + wait_mpj $ns2 + userspace_pm_add_sf 10.0.3.2 20 chk_join_nr 1 1 1 + chk_mptcp_info subflows 1 subflows 1 + userspace_pm_rm_sf_addr_ns2 10.0.3.2 20 chk_rm_nr 1 1 + chk_mptcp_info subflows 0 subflows 0 kill_events_pids + wait $tests_pid fi } @@ -3311,11 +3413,13 @@ endpoint_tests() pm_nl_set_limits $ns1 2 2 pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow 2>/dev/null & + run_tests $ns1 $ns2 10.0.1.1 slow 2>/dev/null & wait_mpj $ns1 pm_nl_check_endpoint 1 "creation" \ $ns2 10.0.2.2 id 1 flags implicit + chk_mptcp_info subflows 1 subflows 1 + chk_mptcp_info add_addr_signal 1 add_addr_accepted 1 pm_nl_add_endpoint $ns2 10.0.2.2 id 33 pm_nl_check_endpoint 0 "ID change is prevented" \ @@ -3332,21 +3436,22 @@ endpoint_tests() pm_nl_set_limits $ns1 1 1 pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 4 0 0 speed_20 2>/dev/null & + test_linkfail=4 \ + run_tests $ns1 $ns2 10.0.1.1 speed_20 2>/dev/null & wait_mpj $ns2 chk_subflow_nr needtitle "before delete" 2 - chk_mptcp_info subflows_1 + chk_mptcp_info subflows 1 subflows 1 pm_nl_del_endpoint $ns2 2 10.0.2.2 sleep 0.5 chk_subflow_nr "" "after delete" 1 - chk_mptcp_info subflows_0 + chk_mptcp_info subflows 0 subflows 0 pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow wait_mpj $ns2 chk_subflow_nr "" "after re-add" 2 - chk_mptcp_info subflows_1 + chk_mptcp_info subflows 1 subflows 1 kill_tests_wait fi } |