diff options
68 files changed, 2750 insertions, 212 deletions
diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.txt b/Documentation/devicetree/bindings/net/dsa/qca8k.txt index 9c67ee4890d7..bbcb255c3150 100644 --- a/Documentation/devicetree/bindings/net/dsa/qca8k.txt +++ b/Documentation/devicetree/bindings/net/dsa/qca8k.txt @@ -2,7 +2,10 @@ Required properties: -- compatible: should be "qca,qca8337" +- compatible: should be one of: + "qca,qca8334" + "qca,qca8337" + - #size-cells: must be 0 - #address-cells: must be 1 @@ -14,6 +17,20 @@ port and PHY id, each subnode describing a port needs to have a valid phandle referencing the internal PHY connected to it. The CPU port of this switch is always port 0. +A CPU port node has the following optional node: + +- fixed-link : Fixed-link subnode describing a link to a non-MDIO + managed entity. See + Documentation/devicetree/bindings/net/fixed-link.txt + for details. + +For QCA8K the 'fixed-link' sub-node supports only the following properties: + +- 'speed' (integer, mandatory), to indicate the link speed. Accepted + values are 10, 100 and 1000 +- 'full-duplex' (boolean, optional), to indicate that full duplex is + used. When absent, half duplex is assumed. + Example: @@ -53,6 +70,10 @@ Example: label = "cpu"; ethernet = <&gmac1>; phy-mode = "rgmii"; + fixed-link { + speed = 1000; + full-duplex; + }; }; port@1 { diff --git a/Documentation/devicetree/bindings/net/sff,sfp.txt b/Documentation/devicetree/bindings/net/sff,sfp.txt index 929591d52ed6..832139919f20 100644 --- a/Documentation/devicetree/bindings/net/sff,sfp.txt +++ b/Documentation/devicetree/bindings/net/sff,sfp.txt @@ -7,11 +7,11 @@ Required properties: "sff,sfp" for SFP modules "sff,sff" for soldered down SFF modules -Optional Properties: - - i2c-bus : phandle of an I2C bus controller for the SFP two wire serial interface +Optional Properties: + - mod-def0-gpios : GPIO phandle and a specifier of the MOD-DEF0 (AKA Mod_ABS) module presence input gpio signal, active (module absent) high. Must not be present for SFF modules diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 06efdf6a762b..fea17b92b1ae 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1107,7 +1107,8 @@ static void bond_compute_features(struct bonding *bond) done: bond_dev->vlan_features = vlan_features; - bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL; + bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL | + NETIF_F_GSO_UDP_L4; bond_dev->gso_max_segs = gso_max_segs; netif_set_gso_max_size(bond_dev, gso_max_size); @@ -4268,7 +4269,7 @@ void bond_setup(struct net_device *bond_dev) NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; - bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL; + bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4; bond_dev->features |= bond_dev->hw_features; } diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 757b6d90ea36..cdcde7f8e0b2 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -1,17 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2009 Felix Fietkau <[email protected]> * Copyright (C) 2011-2012 Gabor Juhos <[email protected]> * Copyright (c) 2015, The Linux Foundation. All rights reserved. * Copyright (c) 2016 John Crispin <[email protected]> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 and - * only version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. */ #include <linux/module.h> @@ -473,10 +465,10 @@ qca8k_set_pad_ctrl(struct qca8k_priv *priv, int port, int mode) static void qca8k_port_set_status(struct qca8k_priv *priv, int port, int enable) { - u32 mask = QCA8K_PORT_STATUS_TXMAC; + u32 mask = QCA8K_PORT_STATUS_TXMAC | QCA8K_PORT_STATUS_RXMAC; /* Port 0 and 6 have no internal PHY */ - if ((port > 0) && (port < 6)) + if (port > 0 && port < 6) mask |= QCA8K_PORT_STATUS_LINK_AUTO; if (enable) @@ -490,6 +482,7 @@ qca8k_setup(struct dsa_switch *ds) { struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; int ret, i, phy_mode = -1; + u32 mask; /* Make sure that port 0 is the cpu port */ if (!dsa_is_cpu_port(ds, 0)) { @@ -515,7 +508,10 @@ qca8k_setup(struct dsa_switch *ds) if (ret < 0) return ret; - /* Enable CPU Port */ + /* Enable CPU Port, force it to maximum bandwidth and full-duplex */ + mask = QCA8K_PORT_STATUS_SPEED_1000 | QCA8K_PORT_STATUS_TXFLOW | + QCA8K_PORT_STATUS_RXFLOW | QCA8K_PORT_STATUS_DUPLEX; + qca8k_write(priv, QCA8K_REG_PORT_STATUS(QCA8K_CPU_PORT), mask); qca8k_reg_set(priv, QCA8K_REG_GLOBAL_FW_CTRL0, QCA8K_GLOBAL_FW_CTRL0_CPU_PORT_EN); qca8k_port_set_status(priv, QCA8K_CPU_PORT, 1); @@ -583,6 +579,47 @@ qca8k_setup(struct dsa_switch *ds) return 0; } +static void +qca8k_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phy) +{ + struct qca8k_priv *priv = ds->priv; + u32 reg; + + /* Force fixed-link setting for CPU port, skip others. */ + if (!phy_is_pseudo_fixed_link(phy)) + return; + + /* Set port speed */ + switch (phy->speed) { + case 10: + reg = QCA8K_PORT_STATUS_SPEED_10; + break; + case 100: + reg = QCA8K_PORT_STATUS_SPEED_100; + break; + case 1000: + reg = QCA8K_PORT_STATUS_SPEED_1000; + break; + default: + dev_dbg(priv->dev, "port%d link speed %dMbps not supported.\n", + port, phy->speed); + return; + } + + /* Set duplex mode */ + if (phy->duplex == DUPLEX_FULL) + reg |= QCA8K_PORT_STATUS_DUPLEX; + + /* Force flow control */ + if (dsa_is_cpu_port(ds, port)) + reg |= QCA8K_PORT_STATUS_RXFLOW | QCA8K_PORT_STATUS_TXFLOW; + + /* Force link down before changing MAC options */ + qca8k_port_set_status(priv, port, 0); + qca8k_write(priv, QCA8K_REG_PORT_STATUS(port), reg); + qca8k_port_set_status(priv, port, 1); +} + static int qca8k_phy_read(struct dsa_switch *ds, int phy, int regnum) { @@ -837,6 +874,7 @@ qca8k_get_tag_protocol(struct dsa_switch *ds, int port) static const struct dsa_switch_ops qca8k_switch_ops = { .get_tag_protocol = qca8k_get_tag_protocol, .setup = qca8k_setup, + .adjust_link = qca8k_adjust_link, .get_strings = qca8k_get_strings, .phy_read = qca8k_phy_read, .phy_write = qca8k_phy_write, @@ -868,6 +906,7 @@ qca8k_sw_probe(struct mdio_device *mdiodev) return -ENOMEM; priv->bus = mdiodev->bus; + priv->dev = &mdiodev->dev; /* read the switches ID register */ id = qca8k_read(priv, QCA8K_REG_MASK_CTRL); @@ -939,6 +978,7 @@ static SIMPLE_DEV_PM_OPS(qca8k_pm_ops, qca8k_suspend, qca8k_resume); static const struct of_device_id qca8k_of_match[] = { + { .compatible = "qca,qca8334" }, { .compatible = "qca,qca8337" }, { /* sentinel */ }, }; diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h index 1cf8a920d4ff..613fe5c50236 100644 --- a/drivers/net/dsa/qca8k.h +++ b/drivers/net/dsa/qca8k.h @@ -51,8 +51,10 @@ #define QCA8K_GOL_MAC_ADDR0 0x60 #define QCA8K_GOL_MAC_ADDR1 0x64 #define QCA8K_REG_PORT_STATUS(_i) (0x07c + (_i) * 4) -#define QCA8K_PORT_STATUS_SPEED GENMASK(2, 0) -#define QCA8K_PORT_STATUS_SPEED_S 0 +#define QCA8K_PORT_STATUS_SPEED GENMASK(1, 0) +#define QCA8K_PORT_STATUS_SPEED_10 0 +#define QCA8K_PORT_STATUS_SPEED_100 0x1 +#define QCA8K_PORT_STATUS_SPEED_1000 0x2 #define QCA8K_PORT_STATUS_TXMAC BIT(2) #define QCA8K_PORT_STATUS_RXMAC BIT(3) #define QCA8K_PORT_STATUS_TXFLOW BIT(4) @@ -165,6 +167,7 @@ struct qca8k_priv { struct ar8xxx_port_status port_sts[QCA8K_NUM_PORTS]; struct dsa_switch *ds; struct mutex reg_mutex; + struct device *dev; }; struct qca8k_mib_desc { diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h index adacc6399131..c7f8d0441278 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h @@ -212,6 +212,8 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN CH_PCI_ID_TABLE_FENTRY(0x6085), /* Custom T6240-SO */ CH_PCI_ID_TABLE_FENTRY(0x6086), /* Custom T6225-SO-CR */ CH_PCI_ID_TABLE_FENTRY(0x6087), /* Custom T6225-CR */ + CH_PCI_ID_TABLE_FENTRY(0x6088), /* Custom T62100-CR */ + CH_PCI_ID_TABLE_FENTRY(0x6089), /* Custom T62100-KR */ CH_PCI_DEVICE_ID_TABLE_DEFINE_END; #endif /* __T4_PCI_ID_TBL_H__ */ diff --git a/drivers/net/ethernet/neterion/vxge/vxge-config.c b/drivers/net/ethernet/neterion/vxge/vxge-config.c index 6223930a8155..c60da9e8bf14 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-config.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-config.c @@ -693,7 +693,7 @@ __vxge_hw_device_is_privilaged(u32 host_type, u32 func_id) VXGE_HW_DEVICE_ACCESS_RIGHT_MRPCIM) return VXGE_HW_OK; else - return VXGE_HW_ERR_PRIVILAGED_OPEARATION; + return VXGE_HW_ERR_PRIVILEGED_OPERATION; } /* @@ -1920,7 +1920,7 @@ enum vxge_hw_status vxge_hw_device_getpause_data(struct __vxge_hw_device *hldev, } if (!(hldev->access_rights & VXGE_HW_DEVICE_ACCESS_RIGHT_MRPCIM)) { - status = VXGE_HW_ERR_PRIVILAGED_OPEARATION; + status = VXGE_HW_ERR_PRIVILEGED_OPERATION; goto exit; } @@ -3153,7 +3153,7 @@ vxge_hw_mgmt_reg_read(struct __vxge_hw_device *hldev, case vxge_hw_mgmt_reg_type_mrpcim: if (!(hldev->access_rights & VXGE_HW_DEVICE_ACCESS_RIGHT_MRPCIM)) { - status = VXGE_HW_ERR_PRIVILAGED_OPEARATION; + status = VXGE_HW_ERR_PRIVILEGED_OPERATION; break; } if (offset > sizeof(struct vxge_hw_mrpcim_reg) - 8) { @@ -3165,7 +3165,7 @@ vxge_hw_mgmt_reg_read(struct __vxge_hw_device *hldev, case vxge_hw_mgmt_reg_type_srpcim: if (!(hldev->access_rights & VXGE_HW_DEVICE_ACCESS_RIGHT_SRPCIM)) { - status = VXGE_HW_ERR_PRIVILAGED_OPEARATION; + status = VXGE_HW_ERR_PRIVILEGED_OPERATION; break; } if (index > VXGE_HW_TITAN_SRPCIM_REG_SPACES - 1) { @@ -3279,7 +3279,7 @@ vxge_hw_mgmt_reg_write(struct __vxge_hw_device *hldev, case vxge_hw_mgmt_reg_type_mrpcim: if (!(hldev->access_rights & VXGE_HW_DEVICE_ACCESS_RIGHT_MRPCIM)) { - status = VXGE_HW_ERR_PRIVILAGED_OPEARATION; + status = VXGE_HW_ERR_PRIVILEGED_OPERATION; break; } if (offset > sizeof(struct vxge_hw_mrpcim_reg) - 8) { @@ -3291,7 +3291,7 @@ vxge_hw_mgmt_reg_write(struct __vxge_hw_device *hldev, case vxge_hw_mgmt_reg_type_srpcim: if (!(hldev->access_rights & VXGE_HW_DEVICE_ACCESS_RIGHT_SRPCIM)) { - status = VXGE_HW_ERR_PRIVILAGED_OPEARATION; + status = VXGE_HW_ERR_PRIVILEGED_OPERATION; break; } if (index > VXGE_HW_TITAN_SRPCIM_REG_SPACES - 1) { diff --git a/drivers/net/ethernet/neterion/vxge/vxge-config.h b/drivers/net/ethernet/neterion/vxge/vxge-config.h index cfa970417f81..d743a37a3cee 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-config.h +++ b/drivers/net/ethernet/neterion/vxge/vxge-config.h @@ -127,7 +127,7 @@ enum vxge_hw_status { VXGE_HW_ERR_INVALID_TCODE = VXGE_HW_BASE_ERR + 14, VXGE_HW_ERR_INVALID_BLOCK_SIZE = VXGE_HW_BASE_ERR + 15, VXGE_HW_ERR_INVALID_STATE = VXGE_HW_BASE_ERR + 16, - VXGE_HW_ERR_PRIVILAGED_OPEARATION = VXGE_HW_BASE_ERR + 17, + VXGE_HW_ERR_PRIVILEGED_OPERATION = VXGE_HW_BASE_ERR + 17, VXGE_HW_ERR_INVALID_PORT = VXGE_HW_BASE_ERR + 18, VXGE_HW_ERR_FIFO = VXGE_HW_BASE_ERR + 19, VXGE_HW_ERR_VPATH = VXGE_HW_BASE_ERR + 20, diff --git a/drivers/net/ethernet/neterion/vxge/vxge-ethtool.c b/drivers/net/ethernet/neterion/vxge/vxge-ethtool.c index 0452848d1316..03c3d1230c17 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-ethtool.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-ethtool.c @@ -276,7 +276,7 @@ static void vxge_get_ethtool_stats(struct net_device *dev, *ptr++ = 0; status = vxge_hw_device_xmac_stats_get(hldev, xmac_stats); if (status != VXGE_HW_OK) { - if (status != VXGE_HW_ERR_PRIVILAGED_OPEARATION) { + if (status != VXGE_HW_ERR_PRIVILEGED_OPERATION) { vxge_debug_init(VXGE_ERR, "%s : %d Failure in getting xmac stats", __func__, __LINE__); diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index b2299f2b2155..a8918bb7c802 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -3484,11 +3484,11 @@ static int vxge_device_register(struct __vxge_hw_device *hldev, 0, &stat); - if (status == VXGE_HW_ERR_PRIVILAGED_OPEARATION) + if (status == VXGE_HW_ERR_PRIVILEGED_OPERATION) vxge_debug_init( vxge_hw_device_trace_level_get(hldev), "%s: device stats clear returns" - "VXGE_HW_ERR_PRIVILAGED_OPEARATION", ndev->name); + "VXGE_HW_ERR_PRIVILEGED_OPERATION", ndev->name); vxge_debug_entryexit(vxge_hw_device_trace_level_get(hldev), "%s: %s:%d Exiting...", diff --git a/drivers/net/ethernet/netronome/Kconfig b/drivers/net/ethernet/netronome/Kconfig index ae0c46ba7546..66f15b05b65e 100644 --- a/drivers/net/ethernet/netronome/Kconfig +++ b/drivers/net/ethernet/netronome/Kconfig @@ -36,6 +36,19 @@ config NFP_APP_FLOWER either directly, with Open vSwitch, or any other way. Note that TC Flower offload requires specific FW to work. +config NFP_APP_ABM_NIC + bool "NFP4000/NFP6000 Advanced buffer management NIC support" + depends on NFP + depends on NET_SWITCHDEV + default y + help + Enable driver support for Advanced buffer management NIC on NFP. + ABM NIC allows advanced configuration of queuing and scheduling + of packets, including ECN marking. Say Y, if you are planning to + use one of the NFP4000 and NFP6000 platforms which support this + functionality. + Code will be built into the nfp.ko driver. + config NFP_DEBUG bool "Debug support for Netronome(R) NFP4000/NFP6000 NIC drivers" depends on NFP diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile index d5866d708dfa..6373f56205fd 100644 --- a/drivers/net/ethernet/netronome/nfp/Makefile +++ b/drivers/net/ethernet/netronome/nfp/Makefile @@ -30,6 +30,7 @@ nfp-objs := \ nfp_net_sriov.o \ nfp_netvf_main.o \ nfp_port.o \ + nfp_shared_buf.o \ nic/main.o ifeq ($(CONFIG_NFP_APP_FLOWER),y) @@ -52,4 +53,10 @@ nfp-objs += \ bpf/jit.o endif +ifeq ($(CONFIG_NFP_APP_ABM_NIC),y) +nfp-objs += \ + abm/ctrl.o \ + abm/main.o +endif + nfp-$(CONFIG_NFP_DEBUG) += nfp_net_debugfs.o diff --git a/drivers/net/ethernet/netronome/nfp/abm/ctrl.c b/drivers/net/ethernet/netronome/nfp/abm/ctrl.c new file mode 100644 index 000000000000..e40f6f06417b --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/abm/ctrl.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +/* + * Copyright (C) 2018 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> + +#include "../nfpcore/nfp_cpp.h" +#include "../nfp_app.h" +#include "../nfp_main.h" +#include "../nfp_net.h" +#include "main.h" + +void nfp_abm_ctrl_read_params(struct nfp_abm_link *alink) +{ + alink->queue_base = nn_readl(alink->vnic, NFP_NET_CFG_START_RXQ); + alink->queue_base /= alink->vnic->stride_rx; +} + +int nfp_abm_ctrl_find_addrs(struct nfp_abm *abm) +{ + struct nfp_pf *pf = abm->app->pf; + unsigned int pf_id; + + pf_id = nfp_cppcore_pcie_unit(pf->cpp); + abm->pf_id = pf_id; + + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.c b/drivers/net/ethernet/netronome/nfp/abm/main.c new file mode 100644 index 000000000000..5a12bb20bced --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/abm/main.c @@ -0,0 +1,399 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +/* + * Copyright (C) 2018 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/bitfield.h> +#include <linux/etherdevice.h> +#include <linux/lockdep.h> +#include <linux/netdevice.h> +#include <linux/rcupdate.h> +#include <linux/slab.h> + +#include "../nfpcore/nfp.h" +#include "../nfpcore/nfp_cpp.h" +#include "../nfpcore/nfp_nsp.h" +#include "../nfp_app.h" +#include "../nfp_main.h" +#include "../nfp_net.h" +#include "../nfp_net_repr.h" +#include "../nfp_port.h" +#include "main.h" + +static u32 nfp_abm_portid(enum nfp_repr_type rtype, unsigned int id) +{ + return FIELD_PREP(NFP_ABM_PORTID_TYPE, rtype) | + FIELD_PREP(NFP_ABM_PORTID_ID, id); +} + +static struct net_device *nfp_abm_repr_get(struct nfp_app *app, u32 port_id) +{ + enum nfp_repr_type rtype; + struct nfp_reprs *reprs; + u8 port; + + rtype = FIELD_GET(NFP_ABM_PORTID_TYPE, port_id); + port = FIELD_GET(NFP_ABM_PORTID_ID, port_id); + + reprs = rcu_dereference(app->reprs[rtype]); + if (!reprs) + return NULL; + + if (port >= reprs->num_reprs) + return NULL; + + return rcu_dereference(reprs->reprs[port]); +} + +static int +nfp_abm_spawn_repr(struct nfp_app *app, struct nfp_abm_link *alink, + enum nfp_port_type ptype) +{ + struct net_device *netdev; + enum nfp_repr_type rtype; + struct nfp_reprs *reprs; + struct nfp_repr *repr; + struct nfp_port *port; + int err; + + if (ptype == NFP_PORT_PHYS_PORT) + rtype = NFP_REPR_TYPE_PHYS_PORT; + else + rtype = NFP_REPR_TYPE_PF; + + netdev = nfp_repr_alloc(app); + if (!netdev) + return -ENOMEM; + repr = netdev_priv(netdev); + repr->app_priv = alink; + + port = nfp_port_alloc(app, ptype, netdev); + if (IS_ERR(port)) { + err = PTR_ERR(port); + goto err_free_repr; + } + + if (ptype == NFP_PORT_PHYS_PORT) { + port->eth_forced = true; + err = nfp_port_init_phy_port(app->pf, app, port, alink->id); + if (err) + goto err_free_port; + } else { + port->pf_id = alink->abm->pf_id; + port->pf_split = app->pf->max_data_vnics > 1; + port->pf_split_id = alink->id; + port->vnic = alink->vnic->dp.ctrl_bar; + } + + SET_NETDEV_DEV(netdev, &alink->vnic->pdev->dev); + eth_hw_addr_random(netdev); + + err = nfp_repr_init(app, netdev, nfp_abm_portid(rtype, alink->id), + port, alink->vnic->dp.netdev); + if (err) + goto err_free_port; + + reprs = nfp_reprs_get_locked(app, rtype); + WARN(nfp_repr_get_locked(app, reprs, alink->id), "duplicate repr"); + rcu_assign_pointer(reprs->reprs[alink->id], netdev); + + nfp_info(app->cpp, "%s Port %d Representor(%s) created\n", + ptype == NFP_PORT_PF_PORT ? "PCIe" : "Phys", + alink->id, netdev->name); + + return 0; + +err_free_port: + nfp_port_free(port); +err_free_repr: + nfp_repr_free(netdev); + return err; +} + +static void +nfp_abm_kill_repr(struct nfp_app *app, struct nfp_abm_link *alink, + enum nfp_repr_type rtype) +{ + struct net_device *netdev; + struct nfp_reprs *reprs; + + reprs = nfp_reprs_get_locked(app, rtype); + netdev = nfp_repr_get_locked(app, reprs, alink->id); + if (!netdev) + return; + rcu_assign_pointer(reprs->reprs[alink->id], NULL); + synchronize_rcu(); + /* Cast to make sure nfp_repr_clean_and_free() takes a nfp_repr */ + nfp_repr_clean_and_free((struct nfp_repr *)netdev_priv(netdev)); +} + +static void +nfp_abm_kill_reprs(struct nfp_abm *abm, struct nfp_abm_link *alink) +{ + nfp_abm_kill_repr(abm->app, alink, NFP_REPR_TYPE_PF); + nfp_abm_kill_repr(abm->app, alink, NFP_REPR_TYPE_PHYS_PORT); +} + +static void nfp_abm_kill_reprs_all(struct nfp_abm *abm) +{ + struct nfp_pf *pf = abm->app->pf; + struct nfp_net *nn; + + list_for_each_entry(nn, &pf->vnics, vnic_list) + nfp_abm_kill_reprs(abm, (struct nfp_abm_link *)nn->app_priv); +} + +static enum devlink_eswitch_mode nfp_abm_eswitch_mode_get(struct nfp_app *app) +{ + struct nfp_abm *abm = app->priv; + + return abm->eswitch_mode; +} + +static int nfp_abm_eswitch_set_legacy(struct nfp_abm *abm) +{ + nfp_abm_kill_reprs_all(abm); + + abm->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY; + return 0; +} + +static void nfp_abm_eswitch_clean_up(struct nfp_abm *abm) +{ + if (abm->eswitch_mode != DEVLINK_ESWITCH_MODE_LEGACY) + WARN_ON(nfp_abm_eswitch_set_legacy(abm)); +} + +static int nfp_abm_eswitch_set_switchdev(struct nfp_abm *abm) +{ + struct nfp_app *app = abm->app; + struct nfp_pf *pf = app->pf; + struct nfp_net *nn; + int err; + + list_for_each_entry(nn, &pf->vnics, vnic_list) { + struct nfp_abm_link *alink = nn->app_priv; + + err = nfp_abm_spawn_repr(app, alink, NFP_PORT_PHYS_PORT); + if (err) + goto err_kill_all_reprs; + + err = nfp_abm_spawn_repr(app, alink, NFP_PORT_PF_PORT); + if (err) + goto err_kill_all_reprs; + } + + abm->eswitch_mode = DEVLINK_ESWITCH_MODE_SWITCHDEV; + return 0; + +err_kill_all_reprs: + nfp_abm_kill_reprs_all(abm); + return err; +} + +static int nfp_abm_eswitch_mode_set(struct nfp_app *app, u16 mode) +{ + struct nfp_abm *abm = app->priv; + + if (abm->eswitch_mode == mode) + return 0; + + switch (mode) { + case DEVLINK_ESWITCH_MODE_LEGACY: + return nfp_abm_eswitch_set_legacy(abm); + case DEVLINK_ESWITCH_MODE_SWITCHDEV: + return nfp_abm_eswitch_set_switchdev(abm); + default: + return -EINVAL; + } +} + +static void +nfp_abm_vnic_set_mac(struct nfp_pf *pf, struct nfp_abm *abm, struct nfp_net *nn, + unsigned int id) +{ + struct nfp_eth_table_port *eth_port = &pf->eth_tbl->ports[id]; + u8 mac_addr[ETH_ALEN]; + const char *mac_str; + char name[32]; + + if (id > pf->eth_tbl->count) { + nfp_warn(pf->cpp, "No entry for persistent MAC address\n"); + eth_hw_addr_random(nn->dp.netdev); + return; + } + + snprintf(name, sizeof(name), "eth%u.mac.pf%u", + eth_port->eth_index, abm->pf_id); + + mac_str = nfp_hwinfo_lookup(pf->hwinfo, name); + if (!mac_str) { + nfp_warn(pf->cpp, "Can't lookup persistent MAC address (%s)\n", + name); + eth_hw_addr_random(nn->dp.netdev); + return; + } + + if (sscanf(mac_str, "%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx", + &mac_addr[0], &mac_addr[1], &mac_addr[2], + &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6) { + nfp_warn(pf->cpp, "Can't parse persistent MAC address (%s)\n", + mac_str); + eth_hw_addr_random(nn->dp.netdev); + return; + } + + ether_addr_copy(nn->dp.netdev->dev_addr, mac_addr); + ether_addr_copy(nn->dp.netdev->perm_addr, mac_addr); +} + +static int +nfp_abm_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id) +{ + struct nfp_eth_table_port *eth_port = &app->pf->eth_tbl->ports[id]; + struct nfp_abm *abm = app->priv; + struct nfp_abm_link *alink; + int err; + + alink = kzalloc(sizeof(*alink), GFP_KERNEL); + if (!alink) + return -ENOMEM; + nn->app_priv = alink; + alink->abm = abm; + alink->vnic = nn; + alink->id = id; + + /* This is a multi-host app, make sure MAC/PHY is up, but don't + * make the MAC/PHY state follow the state of any of the ports. + */ + err = nfp_eth_set_configured(app->cpp, eth_port->index, true); + if (err < 0) + goto err_free_alink; + + netif_keep_dst(nn->dp.netdev); + + nfp_abm_vnic_set_mac(app->pf, abm, nn, id); + nfp_abm_ctrl_read_params(alink); + + return 0; + +err_free_alink: + kfree(alink); + return err; +} + +static void nfp_abm_vnic_free(struct nfp_app *app, struct nfp_net *nn) +{ + struct nfp_abm_link *alink = nn->app_priv; + + nfp_abm_kill_reprs(alink->abm, alink); + kfree(alink); +} + +static int nfp_abm_init(struct nfp_app *app) +{ + struct nfp_pf *pf = app->pf; + struct nfp_reprs *reprs; + struct nfp_abm *abm; + int err; + + if (!pf->eth_tbl) { + nfp_err(pf->cpp, "ABM NIC requires ETH table\n"); + return -EINVAL; + } + if (pf->max_data_vnics != pf->eth_tbl->count) { + nfp_err(pf->cpp, "ETH entries don't match vNICs (%d vs %d)\n", + pf->max_data_vnics, pf->eth_tbl->count); + return -EINVAL; + } + if (!pf->mac_stats_bar) { + nfp_warn(app->cpp, "ABM NIC requires mac_stats symbol\n"); + return -EINVAL; + } + + abm = kzalloc(sizeof(*abm), GFP_KERNEL); + if (!abm) + return -ENOMEM; + app->priv = abm; + abm->app = app; + + err = nfp_abm_ctrl_find_addrs(abm); + if (err) + goto err_free_abm; + + err = -ENOMEM; + reprs = nfp_reprs_alloc(pf->max_data_vnics); + if (!reprs) + goto err_free_abm; + RCU_INIT_POINTER(app->reprs[NFP_REPR_TYPE_PHYS_PORT], reprs); + + reprs = nfp_reprs_alloc(pf->max_data_vnics); + if (!reprs) + goto err_free_phys; + RCU_INIT_POINTER(app->reprs[NFP_REPR_TYPE_PF], reprs); + + return 0; + +err_free_phys: + nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PHYS_PORT); +err_free_abm: + kfree(abm); + app->priv = NULL; + return err; +} + +static void nfp_abm_clean(struct nfp_app *app) +{ + struct nfp_abm *abm = app->priv; + + nfp_abm_eswitch_clean_up(abm); + nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PF); + nfp_reprs_clean_and_free_by_type(app, NFP_REPR_TYPE_PHYS_PORT); + kfree(abm); + app->priv = NULL; +} + +const struct nfp_app_type app_abm = { + .id = NFP_APP_ACTIVE_BUFFER_MGMT_NIC, + .name = "abm", + + .init = nfp_abm_init, + .clean = nfp_abm_clean, + + .vnic_alloc = nfp_abm_vnic_alloc, + .vnic_free = nfp_abm_vnic_free, + + .eswitch_mode_get = nfp_abm_eswitch_mode_get, + .eswitch_mode_set = nfp_abm_eswitch_mode_set, + + .repr_get = nfp_abm_repr_get, +}; diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.h b/drivers/net/ethernet/netronome/nfp/abm/main.h new file mode 100644 index 000000000000..5938b69b8a84 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/abm/main.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ +/* + * Copyright (C) 2018 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NFP_ABM_H__ +#define __NFP_ABM_H__ 1 + +#include <net/devlink.h> + +struct nfp_app; +struct nfp_net; + +#define NFP_ABM_PORTID_TYPE GENMASK(23, 16) +#define NFP_ABM_PORTID_ID GENMASK(7, 0) + +/** + * struct nfp_abm - ABM NIC app structure + * @app: back pointer to nfp_app + * @pf_id: ID of our PF link + * @eswitch_mode: devlink eswitch mode, advanced functions only visible + * in switchdev mode + */ +struct nfp_abm { + struct nfp_app *app; + unsigned int pf_id; + enum devlink_eswitch_mode eswitch_mode; +}; + +/** + * struct nfp_abm_link - port tuple of a ABM NIC + * @abm: back pointer to nfp_abm + * @vnic: data vNIC + * @id: id of the data vNIC + * @queue_base: id of base to host queue within PCIe (not QC idx) + */ +struct nfp_abm_link { + struct nfp_abm *abm; + struct nfp_net *vnic; + unsigned int id; + unsigned int queue_base; +}; + +void nfp_abm_ctrl_read_params(struct nfp_abm_link *alink); +int nfp_abm_ctrl_find_addrs(struct nfp_abm *abm); +#endif diff --git a/drivers/net/ethernet/netronome/nfp/nfp_abi.h b/drivers/net/ethernet/netronome/nfp/nfp_abi.h new file mode 100644 index 000000000000..7ffa6e6a9d1c --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_abi.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ +/* + * Copyright (C) 2018 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NFP_ABI__ +#define __NFP_ABI__ 1 + +#include <linux/types.h> + +#define NFP_MBOX_SYM_NAME "_abi_nfd_pf%u_mbox" +#define NFP_MBOX_SYM_MIN_SIZE 16 /* When no data needed */ + +#define NFP_MBOX_CMD 0x00 +#define NFP_MBOX_RET 0x04 +#define NFP_MBOX_DATA_LEN 0x08 +#define NFP_MBOX_RESERVED 0x0c +#define NFP_MBOX_DATA 0x10 + +/** + * enum nfp_mbox_cmd - PF mailbox commands + * + * @NFP_MBOX_NO_CMD: null command + * Used to indicate previous command has finished. + * + * @NFP_MBOX_POOL_GET: get shared buffer pool info/config + * Input - struct nfp_shared_buf_pool_id + * Output - struct nfp_shared_buf_pool_info_get + * + * @NFP_MBOX_POOL_SET: set shared buffer pool info/config + * Input - struct nfp_shared_buf_pool_info_set + * Output - None + */ +enum nfp_mbox_cmd { + NFP_MBOX_NO_CMD = 0x00, + + NFP_MBOX_POOL_GET = 0x01, + NFP_MBOX_POOL_SET = 0x02, +}; + +#define NFP_SHARED_BUF_COUNT_SYM_NAME "_abi_nfd_pf%u_sb_cnt" +#define NFP_SHARED_BUF_TABLE_SYM_NAME "_abi_nfd_pf%u_sb_tbl" + +/** + * struct nfp_shared_buf - NFP shared buffer description + * @id: numerical user-visible id of the shared buffer + * @size: size in bytes of the buffer + * @ingress_pools_count: number of ingress pools + * @egress_pools_count: number of egress pools + * @ingress_tc_count: number of ingress trafic classes + * @egress_tc_count: number of egress trafic classes + * @pool_size_unit: pool size may be in credits, each credit is + * @pool_size_unit bytes + */ +struct nfp_shared_buf { + __le32 id; + __le32 size; + __le16 ingress_pools_count; + __le16 egress_pools_count; + __le16 ingress_tc_count; + __le16 egress_tc_count; + + __le32 pool_size_unit; +}; + +/** + * struct nfp_shared_buf_pool_id - shared buffer pool identification + * @shared_buf: shared buffer id + * @pool: pool index + */ +struct nfp_shared_buf_pool_id { + __le32 shared_buf; + __le32 pool; +}; + +/** + * struct nfp_shared_buf_pool_info_get - struct devlink_sb_pool_info mirror + * @pool_type: one of enum devlink_sb_pool_type + * @size: pool size in units of SB's @pool_size_unit + * @threshold_type: one of enum devlink_sb_threshold_type + */ +struct nfp_shared_buf_pool_info_get { + __le32 pool_type; + __le32 size; + __le32 threshold_type; +}; + +/** + * struct nfp_shared_buf_pool_info_set - packed args of sb_pool_set + * @id: pool identification info + * @size: pool size in units of SB's @pool_size_unit + * @threshold_type: one of enum devlink_sb_threshold_type + */ +struct nfp_shared_buf_pool_info_set { + struct nfp_shared_buf_pool_id id; + __le32 size; + __le32 threshold_type; +}; + +#endif diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c index 0e0253c7e17b..c9d8a7ab311e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c @@ -54,6 +54,9 @@ static const struct nfp_app_type *apps[] = { #ifdef CONFIG_NFP_APP_FLOWER [NFP_APP_FLOWER_NIC] = &app_flower, #endif +#ifdef CONFIG_NFP_APP_ABM_NIC + [NFP_APP_ACTIVE_BUFFER_MGMT_NIC] = &app_abm, +#endif }; struct nfp_app *nfp_app_from_netdev(struct net_device *netdev) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h index 2d9cb2528fc7..23b99a4e05c2 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h @@ -57,11 +57,13 @@ enum nfp_app_id { NFP_APP_CORE_NIC = 0x1, NFP_APP_BPF_NIC = 0x2, NFP_APP_FLOWER_NIC = 0x3, + NFP_APP_ACTIVE_BUFFER_MGMT_NIC = 0x4, }; extern const struct nfp_app_type app_nic; extern const struct nfp_app_type app_bpf; extern const struct nfp_app_type app_flower; +extern const struct nfp_app_type app_abm; /** * struct nfp_app_type - application definition @@ -95,6 +97,7 @@ extern const struct nfp_app_type app_flower; * @bpf: BPF ndo offload-related calls * @xdp_offload: offload an XDP program * @eswitch_mode_get: get SR-IOV eswitch mode + * @eswitch_mode_set: set SR-IOV eswitch mode (under pf->lock) * @sriov_enable: app-specific sriov initialisation * @sriov_disable: app-specific sriov clean-up * @repr_get: get representor netdev @@ -146,6 +149,7 @@ struct nfp_app_type { void (*sriov_disable)(struct nfp_app *app); enum devlink_eswitch_mode (*eswitch_mode_get)(struct nfp_app *app); + int (*eswitch_mode_set)(struct nfp_app *app, u16 mode); struct net_device *(*repr_get)(struct nfp_app *app, u32 id); }; @@ -370,6 +374,13 @@ static inline int nfp_app_eswitch_mode_get(struct nfp_app *app, u16 *mode) return 0; } +static inline int nfp_app_eswitch_mode_set(struct nfp_app *app, u16 mode) +{ + if (!app->type->eswitch_mode_set) + return -EOPNOTSUPP; + return app->type->eswitch_mode_set(app, mode); +} + static inline int nfp_app_sriov_enable(struct nfp_app *app, int num_vfs) { if (!app || !app->type->sriov_enable) @@ -410,5 +421,7 @@ void nfp_app_free(struct nfp_app *app); int nfp_app_nic_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id); +int nfp_app_nic_vnic_init_phy_port(struct nfp_pf *pf, struct nfp_app *app, + struct nfp_net *nn, unsigned int id); #endif diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c b/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c index b9618c37403f..e2dfe4f168bb 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c @@ -38,9 +38,8 @@ #include "nfp_net.h" #include "nfp_port.h" -static int -nfp_app_nic_vnic_init_phy_port(struct nfp_pf *pf, struct nfp_app *app, - struct nfp_net *nn, unsigned int id) +int nfp_app_nic_vnic_init_phy_port(struct nfp_pf *pf, struct nfp_app *app, + struct nfp_net *nn, unsigned int id) { int err; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c index b1e67cf4257a..71c2edd83031 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c @@ -149,6 +149,26 @@ out: return ret; } +static int +nfp_devlink_sb_pool_get(struct devlink *devlink, unsigned int sb_index, + u16 pool_index, struct devlink_sb_pool_info *pool_info) +{ + struct nfp_pf *pf = devlink_priv(devlink); + + return nfp_shared_buf_pool_get(pf, sb_index, pool_index, pool_info); +} + +static int +nfp_devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index, + u16 pool_index, + u32 size, enum devlink_sb_threshold_type threshold_type) +{ + struct nfp_pf *pf = devlink_priv(devlink); + + return nfp_shared_buf_pool_set(pf, sb_index, pool_index, + size, threshold_type); +} + static int nfp_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) { struct nfp_pf *pf = devlink_priv(devlink); @@ -156,10 +176,25 @@ static int nfp_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) return nfp_app_eswitch_mode_get(pf->app, mode); } +static int nfp_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode) +{ + struct nfp_pf *pf = devlink_priv(devlink); + int ret; + + mutex_lock(&pf->lock); + ret = nfp_app_eswitch_mode_set(pf->app, mode); + mutex_unlock(&pf->lock); + + return ret; +} + const struct devlink_ops nfp_devlink_ops = { .port_split = nfp_devlink_port_split, .port_unsplit = nfp_devlink_port_unsplit, + .sb_pool_get = nfp_devlink_sb_pool_get, + .sb_pool_set = nfp_devlink_sb_pool_set, .eswitch_mode_get = nfp_devlink_eswitch_mode_get, + .eswitch_mode_set = nfp_devlink_eswitch_mode_set, }; int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c index 0ade122805ad..46b76d5a726c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c @@ -55,6 +55,7 @@ #include "nfpcore/nfp6000_pcie.h" +#include "nfp_abi.h" #include "nfp_app.h" #include "nfp_main.h" #include "nfp_net.h" @@ -75,6 +76,122 @@ static const struct pci_device_id nfp_pci_device_ids[] = { }; MODULE_DEVICE_TABLE(pci, nfp_pci_device_ids); +int nfp_pf_rtsym_read_optional(struct nfp_pf *pf, const char *format, + unsigned int default_val) +{ + char name[256]; + int err = 0; + u64 val; + + snprintf(name, sizeof(name), format, nfp_cppcore_pcie_unit(pf->cpp)); + + val = nfp_rtsym_read_le(pf->rtbl, name, &err); + if (err) { + if (err == -ENOENT) + return default_val; + nfp_err(pf->cpp, "Unable to read symbol %s\n", name); + return err; + } + + return val; +} + +u8 __iomem * +nfp_pf_map_rtsym(struct nfp_pf *pf, const char *name, const char *sym_fmt, + unsigned int min_size, struct nfp_cpp_area **area) +{ + char pf_symbol[256]; + + snprintf(pf_symbol, sizeof(pf_symbol), sym_fmt, + nfp_cppcore_pcie_unit(pf->cpp)); + + return nfp_rtsym_map(pf->rtbl, pf_symbol, name, min_size, area); +} + +/* Callers should hold the devlink instance lock */ +int nfp_mbox_cmd(struct nfp_pf *pf, u32 cmd, void *in_data, u64 in_length, + void *out_data, u64 out_length) +{ + unsigned long long addr; + unsigned long err_at; + u64 max_data_sz; + u32 val = 0; + u32 cpp_id; + int n, err; + + if (!pf->mbox) + return -EOPNOTSUPP; + + cpp_id = NFP_CPP_ISLAND_ID(pf->mbox->target, NFP_CPP_ACTION_RW, 0, + pf->mbox->domain); + addr = pf->mbox->addr; + max_data_sz = pf->mbox->size - NFP_MBOX_SYM_MIN_SIZE; + + /* Check if cmd field is clear */ + err = nfp_cpp_readl(pf->cpp, cpp_id, addr + NFP_MBOX_CMD, &val); + if (err || val) { + nfp_warn(pf->cpp, "failed to issue command (%u): %u, err: %d\n", + cmd, val, err); + return err ?: -EBUSY; + } + + in_length = min(in_length, max_data_sz); + n = nfp_cpp_write(pf->cpp, cpp_id, addr + NFP_MBOX_DATA, + in_data, in_length); + if (n != in_length) + return -EIO; + /* Write data_len and wipe reserved */ + err = nfp_cpp_writeq(pf->cpp, cpp_id, addr + NFP_MBOX_DATA_LEN, + in_length); + if (err) + return err; + + /* Read back for ordering */ + err = nfp_cpp_readl(pf->cpp, cpp_id, addr + NFP_MBOX_DATA_LEN, &val); + if (err) + return err; + + /* Write cmd and wipe return value */ + err = nfp_cpp_writeq(pf->cpp, cpp_id, addr + NFP_MBOX_CMD, cmd); + if (err) + return err; + + err_at = jiffies + 5 * HZ; + while (true) { + /* Wait for command to go to 0 (NFP_MBOX_NO_CMD) */ + err = nfp_cpp_readl(pf->cpp, cpp_id, addr + NFP_MBOX_CMD, &val); + if (err) + return err; + if (!val) + break; + + if (time_is_before_eq_jiffies(err_at)) + return -ETIMEDOUT; + + msleep(5); + } + + /* Copy output if any (could be error info, do it before reading ret) */ + err = nfp_cpp_readl(pf->cpp, cpp_id, addr + NFP_MBOX_DATA_LEN, &val); + if (err) + return err; + + out_length = min_t(u32, val, min(out_length, max_data_sz)); + n = nfp_cpp_read(pf->cpp, cpp_id, addr + NFP_MBOX_DATA, + out_data, out_length); + if (n != out_length) + return -EIO; + + /* Check if there is an error */ + err = nfp_cpp_readl(pf->cpp, cpp_id, addr + NFP_MBOX_RET, &val); + if (err) + return err; + if (val) + return -val; + + return out_length; +} + static bool nfp_board_ready(struct nfp_pf *pf) { const char *cp; @@ -436,6 +553,25 @@ static void nfp_fw_unload(struct nfp_pf *pf) nfp_nsp_close(nsp); } +static int nfp_pf_find_rtsyms(struct nfp_pf *pf) +{ + char pf_symbol[256]; + unsigned int pf_id; + + pf_id = nfp_cppcore_pcie_unit(pf->cpp); + + /* Optional per-PCI PF mailbox */ + snprintf(pf_symbol, sizeof(pf_symbol), NFP_MBOX_SYM_NAME, pf_id); + pf->mbox = nfp_rtsym_lookup(pf->rtbl, pf_symbol); + if (pf->mbox && pf->mbox->size < NFP_MBOX_SYM_MIN_SIZE) { + nfp_err(pf->cpp, "PF mailbox symbol too small: %llu < %d\n", + pf->mbox->size, NFP_MBOX_SYM_MIN_SIZE); + return -EINVAL; + } + + return 0; +} + static int nfp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id) { @@ -510,6 +646,10 @@ static int nfp_pci_probe(struct pci_dev *pdev, pf->mip = nfp_mip_open(pf->cpp); pf->rtbl = __nfp_rtsym_table_read(pf->cpp, pf->mip); + err = nfp_pf_find_rtsyms(pf); + if (err) + goto err_fw_unload; + pf->dump_flag = NFP_DUMP_NSP_DIAG; pf->dumpspec = nfp_net_dump_load_dumpspec(pf->cpp, pf->rtbl); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.h b/drivers/net/ethernet/netronome/nfp/nfp_main.h index 42211083b51f..595b3dc280e3 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.h @@ -46,10 +46,10 @@ #include <linux/mutex.h> #include <linux/pci.h> #include <linux/workqueue.h> +#include <net/devlink.h> struct dentry; struct device; -struct devlink_ops; struct pci_dev; struct nfp_cpp; @@ -60,7 +60,9 @@ struct nfp_mip; struct nfp_net; struct nfp_nsp_identify; struct nfp_port; +struct nfp_rtsym; struct nfp_rtsym_table; +struct nfp_shared_buf; /** * struct nfp_dumpspec - NFP FW dump specification structure @@ -87,6 +89,7 @@ struct nfp_dumpspec { * @vf_cfg_mem: Pointer to mapped VF configuration area * @vfcfg_tbl2_area: Pointer to the CPP area for the VF config table * @vfcfg_tbl2: Pointer to mapped VF config table + * @mbox: RTSym of per-PCI PF mailbox (under devlink lock) * @irq_entries: Array of MSI-X entries for all vNICs * @limit_vfs: Number of VFs supported by firmware (~0 for PCI limit) * @num_vfs: Number of SR-IOV VFs enabled @@ -108,6 +111,8 @@ struct nfp_dumpspec { * @ports: Linked list of port structures (struct nfp_port) * @wq: Workqueue for running works which need to grab @lock * @port_refresh_work: Work entry for taking netdevs out + * @shared_bufs: Array of shared buffer structures if FW has any SBs + * @num_shared_bufs: Number of elements in @shared_bufs * @lock: Protects all fields which may change after probe */ struct nfp_pf { @@ -127,6 +132,8 @@ struct nfp_pf { struct nfp_cpp_area *vfcfg_tbl2_area; u8 __iomem *vfcfg_tbl2; + const struct nfp_rtsym *mbox; + struct msix_entry *irq_entries; unsigned int limit_vfs; @@ -158,6 +165,9 @@ struct nfp_pf { struct workqueue_struct *wq; struct work_struct port_refresh_work; + struct nfp_shared_buf *shared_bufs; + unsigned int num_shared_bufs; + struct mutex lock; }; @@ -177,6 +187,14 @@ nfp_net_get_mac_addr(struct nfp_pf *pf, struct net_device *netdev, bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb); +int nfp_pf_rtsym_read_optional(struct nfp_pf *pf, const char *format, + unsigned int default_val); +u8 __iomem * +nfp_pf_map_rtsym(struct nfp_pf *pf, const char *name, const char *sym_fmt, + unsigned int min_size, struct nfp_cpp_area **area); +int nfp_mbox_cmd(struct nfp_pf *pf, u32 cmd, void *in_data, u64 in_length, + void *out_data, u64 out_length); + enum nfp_dump_diag { NFP_DUMP_NSP_DIAG = 0, }; @@ -188,4 +206,11 @@ s64 nfp_net_dump_calculate_size(struct nfp_pf *pf, struct nfp_dumpspec *spec, int nfp_net_dump_populate_buffer(struct nfp_pf *pf, struct nfp_dumpspec *spec, struct ethtool_dump *dump_param, void *dest); +int nfp_shared_buf_register(struct nfp_pf *pf); +void nfp_shared_buf_unregister(struct nfp_pf *pf); +int nfp_shared_buf_pool_get(struct nfp_pf *pf, unsigned int sb, u16 pool_index, + struct devlink_sb_pool_info *pool_info); +int nfp_shared_buf_pool_set(struct nfp_pf *pf, unsigned int sb, + u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type); #endif /* NFP_MAIN_H */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index bd7d8ae31e17..57cb035dcc6d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -545,6 +545,7 @@ struct nfp_net_dp { /** * struct nfp_net - NFP network device structure * @dp: Datapath structure + * @id: vNIC id within the PF (0 for VFs) * @fw_ver: Firmware version * @cap: Capabilities advertised by the Firmware * @max_mtu: Maximum support MTU advertised by the Firmware @@ -597,6 +598,8 @@ struct nfp_net { struct nfp_net_fw_version fw_ver; + u32 id; + u32 cap; u32 max_mtu; @@ -909,7 +912,7 @@ int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *new, void nfp_net_debugfs_create(void); void nfp_net_debugfs_destroy(void); struct dentry *nfp_net_debugfs_device_add(struct pci_dev *pdev); -void nfp_net_debugfs_vnic_add(struct nfp_net *nn, struct dentry *ddir, int id); +void nfp_net_debugfs_vnic_add(struct nfp_net *nn, struct dentry *ddir); void nfp_net_debugfs_dir_clean(struct dentry **dir); #else static inline void nfp_net_debugfs_create(void) @@ -926,7 +929,7 @@ static inline struct dentry *nfp_net_debugfs_device_add(struct pci_dev *pdev) } static inline void -nfp_net_debugfs_vnic_add(struct nfp_net *nn, struct dentry *ddir, int id) +nfp_net_debugfs_vnic_add(struct nfp_net *nn, struct dentry *ddir) { } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index d9111c077699..eea11e881bf5 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -3277,6 +3277,24 @@ nfp_net_features_check(struct sk_buff *skb, struct net_device *dev, return features; } +static int +nfp_net_get_phys_port_name(struct net_device *netdev, char *name, size_t len) +{ + struct nfp_net *nn = netdev_priv(netdev); + int n; + + if (nn->port) + return nfp_port_get_phys_port_name(netdev, name, len); + + if (!nn->dp.is_vf) { + n = snprintf(name, len, "%d", nn->id); + if (n >= len) + return -EINVAL; + } + + return 0; +} + /** * nfp_net_set_vxlan_port() - set vxlan port in SW and reconfigure HW * @nn: NFP Net device to reconfigure @@ -3475,7 +3493,7 @@ const struct net_device_ops nfp_net_netdev_ops = { .ndo_set_mac_address = nfp_net_set_mac_address, .ndo_set_features = nfp_net_set_features, .ndo_features_check = nfp_net_features_check, - .ndo_get_phys_port_name = nfp_port_get_phys_port_name, + .ndo_get_phys_port_name = nfp_net_get_phys_port_name, .ndo_udp_tunnel_add = nfp_net_add_vxlan_port, .ndo_udp_tunnel_del = nfp_net_del_vxlan_port, .ndo_bpf = nfp_net_xdp, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c index 67cdd8330c59..099b63d67451 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c @@ -201,7 +201,7 @@ static const struct file_operations nfp_xdp_q_fops = { .llseek = seq_lseek }; -void nfp_net_debugfs_vnic_add(struct nfp_net *nn, struct dentry *ddir, int id) +void nfp_net_debugfs_vnic_add(struct nfp_net *nn, struct dentry *ddir) { struct dentry *queues, *tx, *rx, *xdp; char name[20]; @@ -211,7 +211,7 @@ void nfp_net_debugfs_vnic_add(struct nfp_net *nn, struct dentry *ddir, int id) return; if (nfp_net_is_data_vnic(nn)) - sprintf(name, "vnic%d", id); + sprintf(name, "vnic%d", nn->id); else strcpy(name, "ctrl-vnic"); nn->debugfs_dir = debugfs_create_dir(name, ddir); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c index 45cd2092e498..28516eecccc8 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c @@ -101,48 +101,15 @@ nfp_net_find_port(struct nfp_eth_table *eth_tbl, unsigned int index) return NULL; } -static int -nfp_net_pf_rtsym_read_optional(struct nfp_pf *pf, const char *format, - unsigned int default_val) -{ - char name[256]; - int err = 0; - u64 val; - - snprintf(name, sizeof(name), format, nfp_cppcore_pcie_unit(pf->cpp)); - - val = nfp_rtsym_read_le(pf->rtbl, name, &err); - if (err) { - if (err == -ENOENT) - return default_val; - nfp_err(pf->cpp, "Unable to read symbol %s\n", name); - return err; - } - - return val; -} - static int nfp_net_pf_get_num_ports(struct nfp_pf *pf) { - return nfp_net_pf_rtsym_read_optional(pf, "nfd_cfg_pf%u_num_ports", 1); + return nfp_pf_rtsym_read_optional(pf, "nfd_cfg_pf%u_num_ports", 1); } static int nfp_net_pf_get_app_id(struct nfp_pf *pf) { - return nfp_net_pf_rtsym_read_optional(pf, "_pf%u_net_app_id", - NFP_APP_CORE_NIC); -} - -static u8 __iomem * -nfp_net_pf_map_rtsym(struct nfp_pf *pf, const char *name, const char *sym_fmt, - unsigned int min_size, struct nfp_cpp_area **area) -{ - char pf_symbol[256]; - - snprintf(pf_symbol, sizeof(pf_symbol), sym_fmt, - nfp_cppcore_pcie_unit(pf->cpp)); - - return nfp_rtsym_map(pf->rtbl, pf_symbol, name, min_size, area); + return nfp_pf_rtsym_read_optional(pf, "_pf%u_net_app_id", + NFP_APP_CORE_NIC); } static void nfp_net_pf_free_vnic(struct nfp_pf *pf, struct nfp_net *nn) @@ -211,11 +178,13 @@ nfp_net_pf_init_vnic(struct nfp_pf *pf, struct nfp_net *nn, unsigned int id) { int err; + nn->id = id; + err = nfp_net_init(nn); if (err) return err; - nfp_net_debugfs_vnic_add(nn, pf->ddir, id); + nfp_net_debugfs_vnic_add(nn, pf->ddir); if (nn->port) { err = nfp_devlink_port_register(pf->app, nn->port); @@ -379,9 +348,8 @@ nfp_net_pf_app_init(struct nfp_pf *pf, u8 __iomem *qc_bar, unsigned int stride) if (!nfp_app_needs_ctrl_vnic(pf->app)) return 0; - ctrl_bar = nfp_net_pf_map_rtsym(pf, "net.ctrl", "_pf%u_net_ctrl_bar", - NFP_PF_CSR_SLICE_SIZE, - &pf->ctrl_vnic_bar); + ctrl_bar = nfp_pf_map_rtsym(pf, "net.ctrl", "_pf%u_net_ctrl_bar", + NFP_PF_CSR_SLICE_SIZE, &pf->ctrl_vnic_bar); if (IS_ERR(ctrl_bar)) { nfp_err(pf->cpp, "Failed to find ctrl vNIC memory symbol\n"); err = PTR_ERR(ctrl_bar); @@ -507,8 +475,8 @@ static int nfp_net_pci_map_mem(struct nfp_pf *pf) int err; min_size = pf->max_data_vnics * NFP_PF_CSR_SLICE_SIZE; - mem = nfp_net_pf_map_rtsym(pf, "net.bar0", "_pf%d_net_bar0", - min_size, &pf->data_vnic_bar); + mem = nfp_pf_map_rtsym(pf, "net.bar0", "_pf%d_net_bar0", + min_size, &pf->data_vnic_bar); if (IS_ERR(mem)) { nfp_err(pf->cpp, "Failed to find data vNIC memory symbol\n"); return PTR_ERR(mem); @@ -528,10 +496,9 @@ static int nfp_net_pci_map_mem(struct nfp_pf *pf) } } - pf->vf_cfg_mem = nfp_net_pf_map_rtsym(pf, "net.vfcfg", - "_pf%d_net_vf_bar", - NFP_NET_CFG_BAR_SZ * - pf->limit_vfs, &pf->vf_cfg_bar); + pf->vf_cfg_mem = nfp_pf_map_rtsym(pf, "net.vfcfg", "_pf%d_net_vf_bar", + NFP_NET_CFG_BAR_SZ * pf->limit_vfs, + &pf->vf_cfg_bar); if (IS_ERR(pf->vf_cfg_mem)) { if (PTR_ERR(pf->vf_cfg_mem) != -ENOENT) { err = PTR_ERR(pf->vf_cfg_mem); @@ -541,9 +508,9 @@ static int nfp_net_pci_map_mem(struct nfp_pf *pf) } min_size = NFP_NET_VF_CFG_SZ * pf->limit_vfs + NFP_NET_VF_CFG_MB_SZ; - pf->vfcfg_tbl2 = nfp_net_pf_map_rtsym(pf, "net.vfcfg_tbl2", - "_pf%d_net_vf_cfg2", - min_size, &pf->vfcfg_tbl2_area); + pf->vfcfg_tbl2 = nfp_pf_map_rtsym(pf, "net.vfcfg_tbl2", + "_pf%d_net_vf_cfg2", + min_size, &pf->vfcfg_tbl2_area); if (IS_ERR(pf->vfcfg_tbl2)) { if (PTR_ERR(pf->vfcfg_tbl2) != -ENOENT) { err = PTR_ERR(pf->vfcfg_tbl2); @@ -763,6 +730,10 @@ int nfp_net_pci_probe(struct nfp_pf *pf) if (err) goto err_app_clean; + err = nfp_shared_buf_register(pf); + if (err) + goto err_devlink_unreg; + mutex_lock(&pf->lock); pf->ddir = nfp_net_debugfs_device_add(pf->pdev); @@ -796,6 +767,8 @@ err_free_vnics: err_clean_ddir: nfp_net_debugfs_dir_clean(&pf->ddir); mutex_unlock(&pf->lock); + nfp_shared_buf_unregister(pf); +err_devlink_unreg: cancel_work_sync(&pf->port_refresh_work); devlink_unregister(devlink); err_app_clean: @@ -823,6 +796,7 @@ void nfp_net_pci_remove(struct nfp_pf *pf) mutex_unlock(&pf->lock); + nfp_shared_buf_unregister(pf); devlink_unregister(priv_to_devlink(pf)); nfp_net_pf_free_irqs(pf); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c index 6e79da91e475..09e87d5f4f72 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c @@ -385,7 +385,7 @@ err_free_netdev: return NULL; } -static void nfp_repr_clean_and_free(struct nfp_repr *repr) +void nfp_repr_clean_and_free(struct nfp_repr *repr) { nfp_info(repr->app->cpp, "Destroying Representor(%s)\n", repr->netdev->name); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h index cd756a15445f..8366e4f3c623 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h @@ -76,6 +76,7 @@ struct nfp_repr_pcpu_stats { * @port: Port of representor * @app: APP handle * @stats: Statistic of packets hitting CPU + * @app_priv: Pointer for APP data */ struct nfp_repr { struct net_device *netdev; @@ -83,6 +84,7 @@ struct nfp_repr { struct nfp_port *port; struct nfp_app *app; struct nfp_repr_pcpu_stats __percpu *stats; + void *app_priv; }; /** @@ -125,6 +127,7 @@ int nfp_repr_init(struct nfp_app *app, struct net_device *netdev, struct net_device *pf_netdev); void nfp_repr_free(struct net_device *netdev); struct net_device *nfp_repr_alloc(struct nfp_app *app); +void nfp_repr_clean_and_free(struct nfp_repr *repr); void nfp_reprs_clean_and_free(struct nfp_app *app, struct nfp_reprs *reprs); void nfp_reprs_clean_and_free_by_type(struct nfp_app *app, enum nfp_repr_type type); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c index b802a1d55449..68928c86b698 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c @@ -283,7 +283,7 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev, nfp_net_info(nn); vf->ddir = nfp_net_debugfs_device_add(pdev); - nfp_net_debugfs_vnic_add(nn, vf->ddir, 0); + nfp_net_debugfs_vnic_add(nn, vf->ddir); return 0; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.c b/drivers/net/ethernet/netronome/nfp/nfp_port.c index 7bd8be5c833b..9c1298114c70 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_port.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_port.c @@ -181,7 +181,11 @@ nfp_port_get_phys_port_name(struct net_device *netdev, char *name, size_t len) eth_port->label_subport); break; case NFP_PORT_PF_PORT: - n = snprintf(name, len, "pf%d", port->pf_id); + if (!port->pf_split) + n = snprintf(name, len, "pf%d", port->pf_id); + else + n = snprintf(name, len, "pf%ds%d", port->pf_id, + port->pf_split_id); break; case NFP_PORT_VF_PORT: n = snprintf(name, len, "pf%dvf%d", port->pf_id, port->vf_id); @@ -218,6 +222,8 @@ int nfp_port_configure(struct net_device *netdev, bool configed) eth_port = __nfp_port_get_eth_port(port); if (!eth_port) return 0; + if (port->eth_forced) + return 0; err = nfp_eth_set_configured(port->app->cpp, eth_port->index, configed); return err < 0 && err != -EOPNOTSUPP ? err : 0; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.h b/drivers/net/ethernet/netronome/nfp/nfp_port.h index fa7e669a969c..18666750456e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_port.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_port.h @@ -77,10 +77,13 @@ enum nfp_port_flags { * @app: backpointer to the app structure * @dl_port: devlink port structure * @eth_id: for %NFP_PORT_PHYS_PORT port ID in NFP enumeration scheme + * @eth_forced: for %NFP_PORT_PHYS_PORT port is forced UP or DOWN, don't change * @eth_port: for %NFP_PORT_PHYS_PORT translated ETH Table port entry * @eth_stats: for %NFP_PORT_PHYS_PORT MAC stats if available * @pf_id: for %NFP_PORT_PF_PORT, %NFP_PORT_VF_PORT ID of the PCI PF (0-3) * @vf_id: for %NFP_PORT_VF_PORT ID of the PCI VF within @pf_id + * @pf_split: for %NFP_PORT_PF_PORT %true if PCI PF has more than one vNIC + * @pf_split_id:for %NFP_PORT_PF_PORT ID of PCI PF vNIC (valid if @pf_split) * @vnic: for %NFP_PORT_PF_PORT, %NFP_PORT_VF_PORT vNIC ctrl memory * @port_list: entry on pf's list of ports */ @@ -99,6 +102,7 @@ struct nfp_port { /* NFP_PORT_PHYS_PORT */ struct { unsigned int eth_id; + bool eth_forced; struct nfp_eth_table_port *eth_port; u8 __iomem *eth_stats; }; @@ -106,6 +110,8 @@ struct nfp_port { struct { unsigned int pf_id; unsigned int vf_id; + bool pf_split; + unsigned int pf_split_id; u8 __iomem *vnic; }; }; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_shared_buf.c b/drivers/net/ethernet/netronome/nfp/nfp_shared_buf.c new file mode 100644 index 000000000000..0ecd83705368 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_shared_buf.c @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +/* + * Copyright (C) 2018 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/kernel.h> +#include <net/devlink.h> + +#include "nfpcore/nfp_cpp.h" +#include "nfpcore/nfp_nffw.h" +#include "nfp_abi.h" +#include "nfp_app.h" +#include "nfp_main.h" + +static u32 nfp_shared_buf_pool_unit(struct nfp_pf *pf, unsigned int sb) +{ + __le32 sb_id = cpu_to_le32(sb); + unsigned int i; + + for (i = 0; i < pf->num_shared_bufs; i++) + if (pf->shared_bufs[i].id == sb_id) + return le32_to_cpu(pf->shared_bufs[i].pool_size_unit); + + WARN_ON_ONCE(1); + return 0; +} + +int nfp_shared_buf_pool_get(struct nfp_pf *pf, unsigned int sb, u16 pool_index, + struct devlink_sb_pool_info *pool_info) +{ + struct nfp_shared_buf_pool_info_get get_data; + struct nfp_shared_buf_pool_id id = { + .shared_buf = cpu_to_le32(sb), + .pool = cpu_to_le32(pool_index), + }; + unsigned int unit_size; + int n; + + unit_size = nfp_shared_buf_pool_unit(pf, sb); + if (!unit_size) + return -EINVAL; + + n = nfp_mbox_cmd(pf, NFP_MBOX_POOL_GET, &id, sizeof(id), + &get_data, sizeof(get_data)); + if (n < 0) + return n; + if (n < sizeof(get_data)) + return -EIO; + + pool_info->pool_type = le32_to_cpu(get_data.pool_type); + pool_info->threshold_type = le32_to_cpu(get_data.threshold_type); + pool_info->size = le32_to_cpu(get_data.size) * unit_size; + + return 0; +} + +int nfp_shared_buf_pool_set(struct nfp_pf *pf, unsigned int sb, + u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type) +{ + struct nfp_shared_buf_pool_info_set set_data = { + .id = { + .shared_buf = cpu_to_le32(sb), + .pool = cpu_to_le32(pool_index), + }, + .threshold_type = cpu_to_le32(threshold_type), + }; + unsigned int unit_size; + + unit_size = nfp_shared_buf_pool_unit(pf, sb); + if (!unit_size || size % unit_size) + return -EINVAL; + set_data.size = cpu_to_le32(size / unit_size); + + return nfp_mbox_cmd(pf, NFP_MBOX_POOL_SET, &set_data, sizeof(set_data), + NULL, 0); +} + +int nfp_shared_buf_register(struct nfp_pf *pf) +{ + struct devlink *devlink = priv_to_devlink(pf); + unsigned int i, num_entries, entry_sz; + struct nfp_cpp_area *sb_desc_area; + u8 __iomem *sb_desc; + int n, err; + + if (!pf->mbox) + return 0; + + n = nfp_pf_rtsym_read_optional(pf, NFP_SHARED_BUF_COUNT_SYM_NAME, 0); + if (n <= 0) + return n; + num_entries = n; + + sb_desc = nfp_pf_map_rtsym(pf, "sb_tbl", NFP_SHARED_BUF_TABLE_SYM_NAME, + num_entries * sizeof(pf->shared_bufs[0]), + &sb_desc_area); + if (IS_ERR(sb_desc)) + return PTR_ERR(sb_desc); + + entry_sz = nfp_cpp_area_size(sb_desc_area) / num_entries; + + pf->shared_bufs = kmalloc_array(num_entries, sizeof(pf->shared_bufs[0]), + GFP_KERNEL); + if (!pf->shared_bufs) { + err = -ENOMEM; + goto err_release_area; + } + + for (i = 0; i < num_entries; i++) { + struct nfp_shared_buf *sb = &pf->shared_bufs[i]; + + /* Entries may be larger in future FW */ + memcpy_fromio(sb, sb_desc + i * entry_sz, sizeof(*sb)); + + err = devlink_sb_register(devlink, + le32_to_cpu(sb->id), + le32_to_cpu(sb->size), + le16_to_cpu(sb->ingress_pools_count), + le16_to_cpu(sb->egress_pools_count), + le16_to_cpu(sb->ingress_tc_count), + le16_to_cpu(sb->egress_tc_count)); + if (err) + goto err_unreg_prev; + } + pf->num_shared_bufs = num_entries; + + nfp_cpp_area_release_free(sb_desc_area); + + return 0; + +err_unreg_prev: + while (i--) + devlink_sb_unregister(devlink, + le32_to_cpu(pf->shared_bufs[i].id)); + kfree(pf->shared_bufs); +err_release_area: + nfp_cpp_area_release_free(sb_desc_area); + return err; +} + +void nfp_shared_buf_unregister(struct nfp_pf *pf) +{ + struct devlink *devlink = priv_to_devlink(pf); + unsigned int i; + + for (i = 0; i < pf->num_shared_bufs; i++) + devlink_sb_unregister(devlink, + le32_to_cpu(pf->shared_bufs[i].id)); + kfree(pf->shared_bufs); +} diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c index a0e336bd1d85..749655c329b2 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c @@ -933,7 +933,6 @@ static int nfp6000_area_read(struct nfp_cpp_area *area, void *kernel_vaddr, u32 *wrptr32 = kernel_vaddr; const u32 __iomem *rdptr32; int n, width; - bool is_64; priv = nfp_cpp_area_priv(area); rdptr64 = priv->iomem + offset; @@ -943,10 +942,15 @@ static int nfp6000_area_read(struct nfp_cpp_area *area, void *kernel_vaddr, return -EFAULT; width = priv->width.read; - if (width <= 0) return -EINVAL; + /* MU reads via a PCIe2CPP BAR support 32bit (and other) lengths */ + if (priv->target == (NFP_CPP_TARGET_MU & NFP_CPP_TARGET_ID_MASK) && + priv->action == NFP_CPP_ACTION_RW && + (offset % sizeof(u64) == 4 || length % sizeof(u64) == 4)) + width = TARGET_WIDTH_32; + /* Unaligned? Translate to an explicit access */ if ((priv->offset + offset) & (width - 1)) return nfp_cpp_explicit_read(nfp_cpp_area_cpp(area), @@ -956,36 +960,29 @@ static int nfp6000_area_read(struct nfp_cpp_area *area, void *kernel_vaddr, priv->offset + offset, kernel_vaddr, length, width); - is_64 = width == TARGET_WIDTH_64; - - /* MU reads via a PCIe2CPP BAR supports 32bit (and other) lengths */ - if (priv->target == (NFP_CPP_TARGET_ID_MASK & NFP_CPP_TARGET_MU) && - priv->action == NFP_CPP_ACTION_RW) - is_64 = false; + if (WARN_ON(!priv->bar)) + return -EFAULT; - if (is_64) { - if (offset % sizeof(u64) != 0 || length % sizeof(u64) != 0) - return -EINVAL; - } else { + switch (width) { + case TARGET_WIDTH_32: if (offset % sizeof(u32) != 0 || length % sizeof(u32) != 0) return -EINVAL; - } - if (WARN_ON(!priv->bar)) - return -EFAULT; + for (n = 0; n < length; n += sizeof(u32)) + *wrptr32++ = __raw_readl(rdptr32++); + return n; +#ifdef __raw_readq + case TARGET_WIDTH_64: + if (offset % sizeof(u64) != 0 || length % sizeof(u64) != 0) + return -EINVAL; - if (is_64) -#ifndef __raw_readq - return -EINVAL; -#else for (n = 0; n < length; n += sizeof(u64)) *wrptr64++ = __raw_readq(rdptr64++); + return n; #endif - else - for (n = 0; n < length; n += sizeof(u32)) - *wrptr32++ = __raw_readl(rdptr32++); - - return n; + default: + return -EINVAL; + } } static int @@ -999,7 +996,6 @@ nfp6000_area_write(struct nfp_cpp_area *area, struct nfp6000_area_priv *priv; u32 __iomem *wrptr32; int n, width; - bool is_64; priv = nfp_cpp_area_priv(area); wrptr64 = priv->iomem + offset; @@ -1009,10 +1005,15 @@ nfp6000_area_write(struct nfp_cpp_area *area, return -EFAULT; width = priv->width.write; - if (width <= 0) return -EINVAL; + /* MU writes via a PCIe2CPP BAR support 32bit (and other) lengths */ + if (priv->target == (NFP_CPP_TARGET_ID_MASK & NFP_CPP_TARGET_MU) && + priv->action == NFP_CPP_ACTION_RW && + (offset % sizeof(u64) == 4 || length % sizeof(u64) == 4)) + width = TARGET_WIDTH_32; + /* Unaligned? Translate to an explicit access */ if ((priv->offset + offset) & (width - 1)) return nfp_cpp_explicit_write(nfp_cpp_area_cpp(area), @@ -1022,40 +1023,33 @@ nfp6000_area_write(struct nfp_cpp_area *area, priv->offset + offset, kernel_vaddr, length, width); - is_64 = width == TARGET_WIDTH_64; - - /* MU writes via a PCIe2CPP BAR supports 32bit (and other) lengths */ - if (priv->target == (NFP_CPP_TARGET_ID_MASK & NFP_CPP_TARGET_MU) && - priv->action == NFP_CPP_ACTION_RW) - is_64 = false; + if (WARN_ON(!priv->bar)) + return -EFAULT; - if (is_64) { - if (offset % sizeof(u64) != 0 || length % sizeof(u64) != 0) - return -EINVAL; - } else { + switch (width) { + case TARGET_WIDTH_32: if (offset % sizeof(u32) != 0 || length % sizeof(u32) != 0) return -EINVAL; - } - if (WARN_ON(!priv->bar)) - return -EFAULT; + for (n = 0; n < length; n += sizeof(u32)) { + __raw_writel(*rdptr32++, wrptr32++); + wmb(); + } + return n; +#ifdef __raw_writeq + case TARGET_WIDTH_64: + if (offset % sizeof(u64) != 0 || length % sizeof(u64) != 0) + return -EINVAL; - if (is_64) -#ifndef __raw_writeq - return -EINVAL; -#else for (n = 0; n < length; n += sizeof(u64)) { __raw_writeq(*rdptr64++, wrptr64++); wmb(); } + return n; #endif - else - for (n = 0; n < length; n += sizeof(u32)) { - __raw_writel(*rdptr32++, wrptr32++); - wmb(); - } - - return n; + default: + return -EINVAL; + } } struct nfp6000_explicit_priv { diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index da07ccdf84bf..60a5769ef5a1 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1618,8 +1618,24 @@ static int netvsc_set_ringparam(struct net_device *ndev, return ret; } +static u32 netvsc_get_msglevel(struct net_device *ndev) +{ + struct net_device_context *ndev_ctx = netdev_priv(ndev); + + return ndev_ctx->msg_enable; +} + +static void netvsc_set_msglevel(struct net_device *ndev, u32 val) +{ + struct net_device_context *ndev_ctx = netdev_priv(ndev); + + ndev_ctx->msg_enable = val; +} + static const struct ethtool_ops ethtool_ops = { .get_drvinfo = netvsc_get_drvinfo, + .get_msglevel = netvsc_get_msglevel, + .set_msglevel = netvsc_set_msglevel, .get_link = ethtool_op_get_link, .get_ethtool_stats = netvsc_get_ethtool_stats, .get_sset_count = netvsc_get_sset_count, diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 4ab6e9a50bbe..c4c92db86dfa 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -976,6 +976,7 @@ static int sfp_probe(struct platform_device *pdev) if (pdev->dev.of_node) { struct device_node *node = pdev->dev.of_node; const struct of_device_id *id; + struct i2c_adapter *i2c; struct device_node *np; id = of_match_node(sfp_of_match, node); @@ -985,19 +986,20 @@ static int sfp_probe(struct platform_device *pdev) sff = sfp->type = id->data; np = of_parse_phandle(node, "i2c-bus", 0); - if (np) { - struct i2c_adapter *i2c; - - i2c = of_find_i2c_adapter_by_node(np); - of_node_put(np); - if (!i2c) - return -EPROBE_DEFER; - - err = sfp_i2c_configure(sfp, i2c); - if (err < 0) { - i2c_put_adapter(i2c); - return err; - } + if (!np) { + dev_err(sfp->dev, "missing 'i2c-bus' property\n"); + return -ENODEV; + } + + i2c = of_find_i2c_adapter_by_node(np); + of_node_put(np); + if (!i2c) + return -EPROBE_DEFER; + + err = sfp_i2c_configure(sfp, i2c); + if (err < 0) { + i2c_put_adapter(i2c); + return err; } } @@ -1065,6 +1067,15 @@ static int sfp_probe(struct platform_device *pdev) if (poll) mod_delayed_work(system_wq, &sfp->poll, poll_jiffies); + /* We could have an issue in cases no Tx disable pin is available or + * wired as modules using a laser as their light source will continue to + * be active when the fiber is removed. This could be a safety issue and + * we should at least warn the user about that. + */ + if (!sfp->gpio[GPIO_TX_DISABLE]) + dev_warn(sfp->dev, + "No tx_disable pin: SFP modules will always be emitting.\n"); + return 0; } diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 9dbd390ace34..d6ff881165d0 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1026,7 +1026,8 @@ static void __team_compute_features(struct team *team) } team->dev->vlan_features = vlan_features; - team->dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL; + team->dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL | + NETIF_F_GSO_UDP_L4; team->dev->hard_header_len = max_hard_header_len; team->dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; @@ -2117,7 +2118,7 @@ static void team_setup(struct net_device *dev) NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; - dev->hw_features |= NETIF_F_GSO_ENCAP_ALL; + dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4; dev->features |= dev->hw_features; } diff --git a/fs/exec.c b/fs/exec.c index 183059c427b9..30a36c2a39bf 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1706,14 +1706,13 @@ static int exec_binprm(struct linux_binprm *bprm) /* * sys_execve() executes a new program. */ -static int do_execveat_common(int fd, struct filename *filename, - struct user_arg_ptr argv, - struct user_arg_ptr envp, - int flags) +static int __do_execve_file(int fd, struct filename *filename, + struct user_arg_ptr argv, + struct user_arg_ptr envp, + int flags, struct file *file) { char *pathbuf = NULL; struct linux_binprm *bprm; - struct file *file; struct files_struct *displaced; int retval; @@ -1752,7 +1751,8 @@ static int do_execveat_common(int fd, struct filename *filename, check_unsafe_exec(bprm); current->in_execve = 1; - file = do_open_execat(fd, filename, flags); + if (!file) + file = do_open_execat(fd, filename, flags); retval = PTR_ERR(file); if (IS_ERR(file)) goto out_unmark; @@ -1760,7 +1760,9 @@ static int do_execveat_common(int fd, struct filename *filename, sched_exec(); bprm->file = file; - if (fd == AT_FDCWD || filename->name[0] == '/') { + if (!filename) { + bprm->filename = "none"; + } else if (fd == AT_FDCWD || filename->name[0] == '/') { bprm->filename = filename->name; } else { if (filename->name[0] == '\0') @@ -1826,7 +1828,8 @@ static int do_execveat_common(int fd, struct filename *filename, task_numa_free(current); free_bprm(bprm); kfree(pathbuf); - putname(filename); + if (filename) + putname(filename); if (displaced) put_files_struct(displaced); return retval; @@ -1849,10 +1852,27 @@ out_files: if (displaced) reset_files_struct(displaced); out_ret: - putname(filename); + if (filename) + putname(filename); return retval; } +static int do_execveat_common(int fd, struct filename *filename, + struct user_arg_ptr argv, + struct user_arg_ptr envp, + int flags) +{ + return __do_execve_file(fd, filename, argv, envp, flags, NULL); +} + +int do_execve_file(struct file *file, void *__argv, void *__envp) +{ + struct user_arg_ptr argv = { .ptr.native = __argv }; + struct user_arg_ptr envp = { .ptr.native = __envp }; + + return __do_execve_file(AT_FDCWD, NULL, argv, envp, 0, file); +} + int do_execve(struct filename *filename, const char __user *const __user *__argv, const char __user *const __user *__envp) diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 4955e0863b83..c05f24fac4f6 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -150,5 +150,6 @@ extern int do_execveat(int, struct filename *, const char __user * const __user *, const char __user * const __user *, int); +int do_execve_file(struct file *file, void *__argv, void *__envp); #endif /* _LINUX_BINFMTS_H */ diff --git a/include/linux/bpfilter.h b/include/linux/bpfilter.h new file mode 100644 index 000000000000..687b1760bb9f --- /dev/null +++ b/include/linux/bpfilter.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_BPFILTER_H +#define _LINUX_BPFILTER_H + +#include <uapi/linux/bpfilter.h> + +struct sock; +int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char *optval, + unsigned int optlen); +int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char *optval, + int *optlen); +extern int (*bpfilter_process_sockopt)(struct sock *sk, int optname, + char __user *optval, + unsigned int optlen, bool is_set); +#endif diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index c87c3a3453c1..623bb8ced060 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -220,7 +220,6 @@ enum { NETIF_F_GSO_GRE_CSUM | \ NETIF_F_GSO_IPXIP4 | \ NETIF_F_GSO_IPXIP6 | \ - NETIF_F_GSO_UDP_L4 | \ NETIF_F_GSO_UDP_TUNNEL | \ NETIF_F_GSO_UDP_TUNNEL_CSUM) diff --git a/include/linux/umh.h b/include/linux/umh.h index 244aff638220..5c812acbb80a 100644 --- a/include/linux/umh.h +++ b/include/linux/umh.h @@ -22,8 +22,10 @@ struct subprocess_info { const char *path; char **argv; char **envp; + struct file *file; int wait; int retval; + pid_t pid; int (*init)(struct subprocess_info *info, struct cred *new); void (*cleanup)(struct subprocess_info *info); void *data; @@ -38,6 +40,16 @@ call_usermodehelper_setup(const char *path, char **argv, char **envp, int (*init)(struct subprocess_info *info, struct cred *new), void (*cleanup)(struct subprocess_info *), void *data); +struct subprocess_info *call_usermodehelper_setup_file(struct file *file, + int (*init)(struct subprocess_info *info, struct cred *new), + void (*cleanup)(struct subprocess_info *), void *data); +struct umh_info { + struct file *pipe_to_umh; + struct file *pipe_from_umh; + pid_t pid; +}; +int fork_usermode_blob(void *data, size_t len, struct umh_info *info); + extern int call_usermodehelper_exec(struct subprocess_info *info, int wait); diff --git a/include/net/ip.h b/include/net/ip.h index bada1f1f871e..0d2281b4b27a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -664,4 +664,7 @@ extern int sysctl_icmp_msgs_burst; int ip_misc_proc_init(void); #endif +int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, + struct netlink_ext_ack *extack); + #endif /* _IP_H */ diff --git a/include/uapi/linux/bpfilter.h b/include/uapi/linux/bpfilter.h new file mode 100644 index 000000000000..2ec3cc99ea4c --- /dev/null +++ b/include/uapi/linux/bpfilter.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _UAPI_LINUX_BPFILTER_H +#define _UAPI_LINUX_BPFILTER_H + +#include <linux/if.h> + +enum { + BPFILTER_IPT_SO_SET_REPLACE = 64, + BPFILTER_IPT_SO_SET_ADD_COUNTERS = 65, + BPFILTER_IPT_SET_MAX, +}; + +enum { + BPFILTER_IPT_SO_GET_INFO = 64, + BPFILTER_IPT_SO_GET_ENTRIES = 65, + BPFILTER_IPT_SO_GET_REVISION_MATCH = 66, + BPFILTER_IPT_SO_GET_REVISION_TARGET = 67, + BPFILTER_IPT_GET_MAX, +}; + +#endif /* _UAPI_LINUX_BPFILTER_H */ diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 9b15005955fa..cabb210c93af 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -327,6 +327,9 @@ enum rtattr_type_t { RTA_PAD, RTA_UID, RTA_TTL_PROPAGATE, + RTA_IP_PROTO, + RTA_SPORT, + RTA_DPORT, __RTA_MAX }; diff --git a/kernel/umh.c b/kernel/umh.c index f76b3ff876cf..30db93fd7e39 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -25,6 +25,8 @@ #include <linux/ptrace.h> #include <linux/async.h> #include <linux/uaccess.h> +#include <linux/shmem_fs.h> +#include <linux/pipe_fs_i.h> #include <trace/events/module.h> @@ -97,9 +99,13 @@ static int call_usermodehelper_exec_async(void *data) commit_creds(new); - retval = do_execve(getname_kernel(sub_info->path), - (const char __user *const __user *)sub_info->argv, - (const char __user *const __user *)sub_info->envp); + if (sub_info->file) + retval = do_execve_file(sub_info->file, + sub_info->argv, sub_info->envp); + else + retval = do_execve(getname_kernel(sub_info->path), + (const char __user *const __user *)sub_info->argv, + (const char __user *const __user *)sub_info->envp); out: sub_info->retval = retval; /* @@ -185,6 +191,8 @@ static void call_usermodehelper_exec_work(struct work_struct *work) if (pid < 0) { sub_info->retval = pid; umh_complete(sub_info); + } else { + sub_info->pid = pid; } } } @@ -393,6 +401,117 @@ struct subprocess_info *call_usermodehelper_setup(const char *path, char **argv, } EXPORT_SYMBOL(call_usermodehelper_setup); +struct subprocess_info *call_usermodehelper_setup_file(struct file *file, + int (*init)(struct subprocess_info *info, struct cred *new), + void (*cleanup)(struct subprocess_info *info), void *data) +{ + struct subprocess_info *sub_info; + + sub_info = kzalloc(sizeof(struct subprocess_info), GFP_KERNEL); + if (!sub_info) + return NULL; + + INIT_WORK(&sub_info->work, call_usermodehelper_exec_work); + sub_info->path = "none"; + sub_info->file = file; + sub_info->init = init; + sub_info->cleanup = cleanup; + sub_info->data = data; + return sub_info; +} + +static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) +{ + struct umh_info *umh_info = info->data; + struct file *from_umh[2]; + struct file *to_umh[2]; + int err; + + /* create pipe to send data to umh */ + err = create_pipe_files(to_umh, 0); + if (err) + return err; + err = replace_fd(0, to_umh[0], 0); + fput(to_umh[0]); + if (err < 0) { + fput(to_umh[1]); + return err; + } + + /* create pipe to receive data from umh */ + err = create_pipe_files(from_umh, 0); + if (err) { + fput(to_umh[1]); + replace_fd(0, NULL, 0); + return err; + } + err = replace_fd(1, from_umh[1], 0); + fput(from_umh[1]); + if (err < 0) { + fput(to_umh[1]); + replace_fd(0, NULL, 0); + fput(from_umh[0]); + return err; + } + + umh_info->pipe_to_umh = to_umh[1]; + umh_info->pipe_from_umh = from_umh[0]; + return 0; +} + +static void umh_save_pid(struct subprocess_info *info) +{ + struct umh_info *umh_info = info->data; + + umh_info->pid = info->pid; +} + +/** + * fork_usermode_blob - fork a blob of bytes as a usermode process + * @data: a blob of bytes that can be do_execv-ed as a file + * @len: length of the blob + * @info: information about usermode process (shouldn't be NULL) + * + * Returns either negative error or zero which indicates success + * in executing a blob of bytes as a usermode process. In such + * case 'struct umh_info *info' is populated with two pipes + * and a pid of the process. The caller is responsible for health + * check of the user process, killing it via pid, and closing the + * pipes when user process is no longer needed. + */ +int fork_usermode_blob(void *data, size_t len, struct umh_info *info) +{ + struct subprocess_info *sub_info; + struct file *file; + ssize_t written; + loff_t pos = 0; + int err; + + file = shmem_kernel_file_setup("", len, 0); + if (IS_ERR(file)) + return PTR_ERR(file); + + written = kernel_write(file, data, len, &pos); + if (written != len) { + err = written; + if (err >= 0) + err = -ENOMEM; + goto out; + } + + err = -ENOMEM; + sub_info = call_usermodehelper_setup_file(file, umh_pipe_setup, + umh_save_pid, info); + if (!sub_info) + goto out; + + err = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC); +out: + fput(file); + return err; +} +EXPORT_SYMBOL_GPL(fork_usermode_blob); + /** * call_usermodehelper_exec - start a usermode application * @sub_info: information about the subprocessa diff --git a/net/Kconfig b/net/Kconfig index df8d45ef47d8..ba554cedb615 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -202,6 +202,8 @@ source "net/bridge/netfilter/Kconfig" endif +source "net/bpfilter/Kconfig" + source "net/dccp/Kconfig" source "net/sctp/Kconfig" source "net/rds/Kconfig" diff --git a/net/Makefile b/net/Makefile index 77aaddedbd29..bdaf53925acd 100644 --- a/net/Makefile +++ b/net/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_TLS) += tls/ obj-$(CONFIG_XFRM) += xfrm/ obj-$(CONFIG_UNIX) += unix/ obj-$(CONFIG_NET) += ipv6/ +obj-$(CONFIG_BPFILTER) += bpfilter/ obj-$(CONFIG_PACKET) += packet/ obj-$(CONFIG_NET_KEY) += key/ obj-$(CONFIG_BRIDGE) += bridge/ diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig new file mode 100644 index 000000000000..60725c5f79db --- /dev/null +++ b/net/bpfilter/Kconfig @@ -0,0 +1,16 @@ +menuconfig BPFILTER + bool "BPF based packet filtering framework (BPFILTER)" + default n + depends on NET && BPF + help + This builds experimental bpfilter framework that is aiming to + provide netfilter compatible functionality via BPF + +if BPFILTER +config BPFILTER_UMH + tristate "bpfilter kernel module with user mode helper" + default m + help + This builds bpfilter kernel module with embedded user mode helper +endif + diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile new file mode 100644 index 000000000000..2af752c8ef5e --- /dev/null +++ b/net/bpfilter/Makefile @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the Linux BPFILTER layer. +# + +hostprogs-y := bpfilter_umh +bpfilter_umh-objs := main.o +HOSTCFLAGS += -I. -Itools/include/ +ifeq ($(CONFIG_BPFILTER_UMH), y) +# builtin bpfilter_umh should be compiled with -static +# since rootfs isn't mounted at the time of __init +# function is called and do_execv won't find elf interpreter +HOSTLDFLAGS += -static +endif + +# a bit of elf magic to convert bpfilter_umh binary into a binary blob +# inside bpfilter_umh.o elf file referenced by +# _binary_net_bpfilter_bpfilter_umh_start symbol +# which bpfilter_kern.c passes further into umh blob loader at run-time +quiet_cmd_copy_umh = GEN $@ + cmd_copy_umh = echo ':' > $(obj)/.bpfilter_umh.o.cmd; \ + $(OBJCOPY) -I binary -O $(CONFIG_OUTPUT_FORMAT) \ + -B `$(OBJDUMP) -f $<|grep architecture|cut -d, -f1|cut -d' ' -f2` \ + --rename-section .data=.init.rodata $< $@ + +$(obj)/bpfilter_umh.o: $(obj)/bpfilter_umh + $(call cmd,copy_umh) + +obj-$(CONFIG_BPFILTER_UMH) += bpfilter.o +bpfilter-objs += bpfilter_kern.o bpfilter_umh.o diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c new file mode 100644 index 000000000000..7596314b61c7 --- /dev/null +++ b/net/bpfilter/bpfilter_kern.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/init.h> +#include <linux/module.h> +#include <linux/umh.h> +#include <linux/bpfilter.h> +#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <linux/fs.h> +#include <linux/file.h> +#include "msgfmt.h" + +#define UMH_start _binary_net_bpfilter_bpfilter_umh_start +#define UMH_end _binary_net_bpfilter_bpfilter_umh_end + +extern char UMH_start; +extern char UMH_end; + +static struct umh_info info; +/* since ip_getsockopt() can run in parallel, serialize access to umh */ +static DEFINE_MUTEX(bpfilter_lock); + +static void shutdown_umh(struct umh_info *info) +{ + struct task_struct *tsk; + + tsk = pid_task(find_vpid(info->pid), PIDTYPE_PID); + if (tsk) + force_sig(SIGKILL, tsk); + fput(info->pipe_to_umh); + fput(info->pipe_from_umh); +} + +static void __stop_umh(void) +{ + if (bpfilter_process_sockopt) { + bpfilter_process_sockopt = NULL; + shutdown_umh(&info); + } +} + +static void stop_umh(void) +{ + mutex_lock(&bpfilter_lock); + __stop_umh(); + mutex_unlock(&bpfilter_lock); +} + +static int __bpfilter_process_sockopt(struct sock *sk, int optname, + char __user *optval, + unsigned int optlen, bool is_set) +{ + struct mbox_request req; + struct mbox_reply reply; + loff_t pos; + ssize_t n; + int ret; + + req.is_set = is_set; + req.pid = current->pid; + req.cmd = optname; + req.addr = (long)optval; + req.len = optlen; + mutex_lock(&bpfilter_lock); + n = __kernel_write(info.pipe_to_umh, &req, sizeof(req), &pos); + if (n != sizeof(req)) { + pr_err("write fail %zd\n", n); + __stop_umh(); + ret = -EFAULT; + goto out; + } + pos = 0; + n = kernel_read(info.pipe_from_umh, &reply, sizeof(reply), &pos); + if (n != sizeof(reply)) { + pr_err("read fail %zd\n", n); + __stop_umh(); + ret = -EFAULT; + goto out; + } + ret = reply.status; +out: + mutex_unlock(&bpfilter_lock); + return ret; +} + +static int __init load_umh(void) +{ + int err; + + /* fork usermode process */ + err = fork_usermode_blob(&UMH_start, &UMH_end - &UMH_start, &info); + if (err) + return err; + pr_info("Loaded bpfilter_umh pid %d\n", info.pid); + + /* health check that usermode process started correctly */ + if (__bpfilter_process_sockopt(NULL, 0, 0, 0, 0) != 0) { + stop_umh(); + return -EFAULT; + } + bpfilter_process_sockopt = &__bpfilter_process_sockopt; + return 0; +} + +static void __exit fini_umh(void) +{ + stop_umh(); +} +module_init(load_umh); +module_exit(fini_umh); +MODULE_LICENSE("GPL"); diff --git a/net/bpfilter/main.c b/net/bpfilter/main.c new file mode 100644 index 000000000000..81bbc1684896 --- /dev/null +++ b/net/bpfilter/main.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <sys/uio.h> +#include <errno.h> +#include <stdio.h> +#include <sys/socket.h> +#include <fcntl.h> +#include <unistd.h> +#include "include/uapi/linux/bpf.h" +#include <asm/unistd.h> +#include "msgfmt.h" + +int debug_fd; + +static int handle_get_cmd(struct mbox_request *cmd) +{ + switch (cmd->cmd) { + case 0: + return 0; + default: + break; + } + return -ENOPROTOOPT; +} + +static int handle_set_cmd(struct mbox_request *cmd) +{ + return -ENOPROTOOPT; +} + +static void loop(void) +{ + while (1) { + struct mbox_request req; + struct mbox_reply reply; + int n; + + n = read(0, &req, sizeof(req)); + if (n != sizeof(req)) { + dprintf(debug_fd, "invalid request %d\n", n); + return; + } + + reply.status = req.is_set ? + handle_set_cmd(&req) : + handle_get_cmd(&req); + + n = write(1, &reply, sizeof(reply)); + if (n != sizeof(reply)) { + dprintf(debug_fd, "reply failed %d\n", n); + return; + } + } +} + +int main(void) +{ + debug_fd = open("/dev/console", 00000002 | 00000100); + dprintf(debug_fd, "Started bpfilter\n"); + loop(); + close(debug_fd); + return 0; +} diff --git a/net/bpfilter/msgfmt.h b/net/bpfilter/msgfmt.h new file mode 100644 index 000000000000..98d121c62945 --- /dev/null +++ b/net/bpfilter/msgfmt.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _NET_BPFILTER_MSGFMT_H +#define _NET_BPFILTER_MSGFMT_H + +struct mbox_request { + __u64 addr; + __u32 len; + __u32 is_set; + __u32 cmd; + __u32 pid; +}; + +struct mbox_reply { + __u32 status; +}; + +#endif diff --git a/net/core/devlink.c b/net/core/devlink.c index 5c8a40e1a01e..475246b355f0 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2756,7 +2756,8 @@ static const struct genl_ops devlink_nl_ops[] = { .doit = devlink_nl_cmd_eswitch_set_doit, .policy = devlink_nl_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK | + DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_DPIPE_TABLE_GET, diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index b379520f9133..eec9569ffa5c 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -14,7 +14,9 @@ obj-y := route.o inetpeer.o protocol.o \ udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \ inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \ - metrics.o + metrics.o netlink.o + +obj-$(CONFIG_BPFILTER) += bpfilter/ obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o diff --git a/net/ipv4/bpfilter/Makefile b/net/ipv4/bpfilter/Makefile new file mode 100644 index 000000000000..ce262d76cc48 --- /dev/null +++ b/net/ipv4/bpfilter/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_BPFILTER) += sockopt.o + diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c new file mode 100644 index 000000000000..42a96d2d8d05 --- /dev/null +++ b/net/ipv4/bpfilter/sockopt.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/uaccess.h> +#include <linux/bpfilter.h> +#include <uapi/linux/bpf.h> +#include <linux/wait.h> +#include <linux/kmod.h> + +int (*bpfilter_process_sockopt)(struct sock *sk, int optname, + char __user *optval, + unsigned int optlen, bool is_set); +EXPORT_SYMBOL_GPL(bpfilter_process_sockopt); + +int bpfilter_mbox_request(struct sock *sk, int optname, char __user *optval, + unsigned int optlen, bool is_set) +{ + if (!bpfilter_process_sockopt) { + int err = request_module("bpfilter"); + + if (err) + return err; + if (!bpfilter_process_sockopt) + return -ECHILD; + } + return bpfilter_process_sockopt(sk, optname, optval, optlen, is_set); +} + +int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval, + unsigned int optlen) +{ + return bpfilter_mbox_request(sk, optname, optval, optlen, true); +} + +int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, + int __user *optlen) +{ + int len; + + if (get_user(len, optlen)) + return -EFAULT; + + return bpfilter_mbox_request(sk, optname, optval, len, false); +} diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 4d622112bf95..897ae92dff0f 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -649,6 +649,9 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { [RTA_ENCAP] = { .type = NLA_NESTED }, [RTA_UID] = { .type = NLA_U32 }, [RTA_MARK] = { .type = NLA_U32 }, + [RTA_IP_PROTO] = { .type = NLA_U8 }, + [RTA_SPORT] = { .type = NLA_U16 }, + [RTA_DPORT] = { .type = NLA_U16 }, }; static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 5ad2d8ed3a3f..e0791faacb24 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -47,6 +47,8 @@ #include <linux/errqueue.h> #include <linux/uaccess.h> +#include <linux/bpfilter.h> + /* * SOL_IP control messages. */ @@ -1244,6 +1246,11 @@ int ip_setsockopt(struct sock *sk, int level, return -ENOPROTOOPT; err = do_ip_setsockopt(sk, level, optname, optval, optlen); +#ifdef CONFIG_BPFILTER + if (optname >= BPFILTER_IPT_SO_SET_REPLACE && + optname < BPFILTER_IPT_SET_MAX) + err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen); +#endif #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IP_HDRINCL && @@ -1552,6 +1559,11 @@ int ip_getsockopt(struct sock *sk, int level, int err; err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0); +#ifdef CONFIG_BPFILTER + if (optname >= BPFILTER_IPT_SO_GET_INFO && + optname < BPFILTER_IPT_GET_MAX) + err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen); +#endif #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && @@ -1584,6 +1596,11 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname, err = do_ip_getsockopt(sk, level, optname, optval, optlen, MSG_CMSG_COMPAT); +#ifdef CONFIG_BPFILTER + if (optname >= BPFILTER_IPT_SO_GET_INFO && + optname < BPFILTER_IPT_GET_MAX) + err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen); +#endif #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && diff --git a/net/ipv4/netlink.c b/net/ipv4/netlink.c new file mode 100644 index 000000000000..f86bb4f06609 --- /dev/null +++ b/net/ipv4/netlink.c @@ -0,0 +1,23 @@ +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <linux/types.h> +#include <net/net_namespace.h> +#include <net/netlink.h> +#include <net/ip.h> + +int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, + struct netlink_ext_ack *extack) +{ + *ip_proto = nla_get_u8(attr); + + switch (*ip_proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_ICMP: + return 0; + default: + NL_SET_ERR_MSG(extack, "Unsupported ip proto"); + return -EOPNOTSUPP; + } +} +EXPORT_SYMBOL_GPL(rtm_getroute_parse_ip_proto); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2cfa1b518f8d..0e401dc4e1bd 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2574,11 +2574,10 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, EXPORT_SYMBOL_GPL(ip_route_output_flow); /* called with rcu_read_lock held */ -static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, - struct flowi4 *fl4, struct sk_buff *skb, u32 portid, - u32 seq) +static int rt_fill_info(struct net *net, __be32 dst, __be32 src, + struct rtable *rt, u32 table_id, struct flowi4 *fl4, + struct sk_buff *skb, u32 portid, u32 seq) { - struct rtable *rt = skb_rtable(skb); struct rtmsg *r; struct nlmsghdr *nlh; unsigned long expires = 0; @@ -2674,7 +2673,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id, } } else #endif - if (nla_put_u32(skb, RTA_IIF, skb->dev->ifindex)) + if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif)) goto nla_put_failure; } @@ -2689,43 +2688,93 @@ nla_put_failure: return -EMSGSIZE; } +static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst, + u8 ip_proto, __be16 sport, + __be16 dport) +{ + struct sk_buff *skb; + struct iphdr *iph; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return NULL; + + /* Reserve room for dummy headers, this skb can pass + * through good chunk of routing engine. + */ + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb->protocol = htons(ETH_P_IP); + iph = skb_put(skb, sizeof(struct iphdr)); + iph->protocol = ip_proto; + iph->saddr = src; + iph->daddr = dst; + iph->version = 0x4; + iph->frag_off = 0; + iph->ihl = 0x5; + skb_set_transport_header(skb, skb->len); + + switch (iph->protocol) { + case IPPROTO_UDP: { + struct udphdr *udph; + + udph = skb_put_zero(skb, sizeof(struct udphdr)); + udph->source = sport; + udph->dest = dport; + udph->len = sizeof(struct udphdr); + udph->check = 0; + break; + } + case IPPROTO_TCP: { + struct tcphdr *tcph; + + tcph = skb_put_zero(skb, sizeof(struct tcphdr)); + tcph->source = sport; + tcph->dest = dport; + tcph->doff = sizeof(struct tcphdr) / 4; + tcph->rst = 1; + tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), + src, dst, 0); + break; + } + case IPPROTO_ICMP: { + struct icmphdr *icmph; + + icmph = skb_put_zero(skb, sizeof(struct icmphdr)); + icmph->type = ICMP_ECHO; + icmph->code = 0; + } + } + + return skb; +} + static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(in_skb->sk); - struct rtmsg *rtm; struct nlattr *tb[RTA_MAX+1]; + u32 table_id = RT_TABLE_MAIN; + __be16 sport = 0, dport = 0; struct fib_result res = {}; + u8 ip_proto = IPPROTO_UDP; struct rtable *rt = NULL; + struct sk_buff *skb; + struct rtmsg *rtm; struct flowi4 fl4; __be32 dst = 0; __be32 src = 0; + kuid_t uid; u32 iif; int err; int mark; - struct sk_buff *skb; - u32 table_id = RT_TABLE_MAIN; - kuid_t uid; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy, extack); if (err < 0) - goto errout; + return err; rtm = nlmsg_data(nlh); - - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb) { - err = -ENOBUFS; - goto errout; - } - - /* Reserve room for dummy headers, this skb can pass - through good chunk of routing engine. - */ - skb_reset_mac_header(skb); - skb_reset_network_header(skb); - src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0; dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0; iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; @@ -2735,14 +2784,22 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, else uid = (iif ? INVALID_UID : current_uid()); - /* Bugfix: need to give ip_route_input enough of an IP header to - * not gag. - */ - ip_hdr(skb)->protocol = IPPROTO_UDP; - ip_hdr(skb)->saddr = src; - ip_hdr(skb)->daddr = dst; + if (tb[RTA_IP_PROTO]) { + err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO], + &ip_proto, extack); + if (err) + return err; + } + + if (tb[RTA_SPORT]) + sport = nla_get_be16(tb[RTA_SPORT]); - skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); + if (tb[RTA_DPORT]) + dport = nla_get_be16(tb[RTA_DPORT]); + + skb = inet_rtm_getroute_build_skb(src, dst, ip_proto, sport, dport); + if (!skb) + return -ENOBUFS; memset(&fl4, 0, sizeof(fl4)); fl4.daddr = dst; @@ -2751,6 +2808,11 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; fl4.flowi4_mark = mark; fl4.flowi4_uid = uid; + if (sport) + fl4.fl4_sport = sport; + if (dport) + fl4.fl4_dport = dport; + fl4.flowi4_proto = ip_proto; rcu_read_lock(); @@ -2760,10 +2822,10 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, dev = dev_get_by_index_rcu(net, iif); if (!dev) { err = -ENODEV; - goto errout_free; + goto errout_rcu; } - skb->protocol = htons(ETH_P_IP); + fl4.flowi4_iif = iif; /* for rt_fill_info */ skb->dev = dev; skb->mark = mark; err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos, @@ -2783,7 +2845,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, } if (err) - goto errout_free; + goto errout_rcu; if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; @@ -2791,34 +2853,40 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE) table_id = res.table ? res.table->tb_id : 0; + /* reset skb for netlink reply msg */ + skb_trim(skb, 0); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + skb_reset_mac_header(skb); + if (rtm->rtm_flags & RTM_F_FIB_MATCH) { if (!res.fi) { err = fib_props[res.type].error; if (!err) err = -EHOSTUNREACH; - goto errout_free; + goto errout_rcu; } err = fib_dump_info(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, table_id, rt->rt_type, res.prefix, res.prefixlen, fl4.flowi4_tos, res.fi, 0); } else { - err = rt_fill_info(net, dst, src, table_id, &fl4, skb, + err = rt_fill_info(net, dst, src, rt, table_id, &fl4, skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq); } if (err < 0) - goto errout_free; + goto errout_rcu; rcu_read_unlock(); err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); -errout: - return err; errout_free: + return err; +errout_rcu: rcu_read_unlock(); kfree_skb(skb); - goto errout; + goto errout_free; } void ip_rt_multicast_event(struct in_device *in_dev) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ff4d4ba67735..d71f1f3e1155 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -788,7 +788,8 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, return -EINVAL; if (sk->sk_no_check_tx) return -EINVAL; - if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite) + if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite || + dst_xfrm(skb_dst(skb))) return -EIO; skb_shinfo(skb)->gso_size = cork->gso_size; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index bcb8785c0451..038d661d5ffc 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -63,6 +63,7 @@ #include <net/lwtunnel.h> #include <net/ip_tunnels.h> #include <net/l3mdev.h> +#include <net/ip.h> #include <trace/events/fib6.h> #include <linux/uaccess.h> @@ -4083,6 +4084,9 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_UID] = { .type = NLA_U32 }, [RTA_MARK] = { .type = NLA_U32 }, [RTA_TABLE] = { .type = NLA_U32 }, + [RTA_IP_PROTO] = { .type = NLA_U8 }, + [RTA_SPORT] = { .type = NLA_U16 }, + [RTA_DPORT] = { .type = NLA_U16 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -4795,6 +4799,19 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, else fl6.flowi6_uid = iif ? INVALID_UID : current_uid(); + if (tb[RTA_SPORT]) + fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]); + + if (tb[RTA_DPORT]) + fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]); + + if (tb[RTA_IP_PROTO]) { + err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO], + &fl6.flowi6_proto, extack); + if (err) + goto errout; + } + if (iif) { struct net_device *dev; int flags = 0; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 2839c1bd1e58..426c9d2b418d 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1053,7 +1053,8 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, return -EINVAL; if (udp_sk(sk)->no_check6_tx) return -EINVAL; - if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite) + if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite || + dst_xfrm(skb_dst(skb))) return -EIO; skb_shinfo(skb)->gso_size = cork->gso_size; diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index e60dddbf963c..7cb0f49efdb7 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -6,7 +6,7 @@ CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh -TEST_PROGS += udpgso_bench.sh +TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh new file mode 100755 index 000000000000..d4cfb6a7a086 --- /dev/null +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -0,0 +1,248 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test is for checking IPv4 and IPv6 FIB rules API + +ret=0 + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} +IP="ip -netns testns" + +RTABLE=100 +GW_IP4=192.51.100.2 +SRC_IP=192.51.100.3 +GW_IP6=2001:db8:1::2 +SRC_IP6=2001:db8:1::3 + +DEV_ADDR=192.51.100.1 +DEV=dummy0 + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-50s [ OK ]\n" "${msg}" + else + nfail=$((nfail+1)) + printf "\n TEST: %-50s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +log_section() +{ + echo + echo "######################################################################" + echo "TEST SECTION: $*" + echo "######################################################################" +} + +setup() +{ + set -e + ip netns add testns + $IP link set dev lo up + + $IP link add dummy0 type dummy + $IP link set dev dummy0 up + $IP address add 198.51.100.1/24 dev dummy0 + $IP -6 address add 2001:db8:1::1/64 dev dummy0 + + set +e +} + +cleanup() +{ + $IP link del dev dummy0 &> /dev/null + ip netns del testns +} + +fib_check_iproute_support() +{ + ip rule help 2>&1 | grep -q $1 + if [ $? -ne 0 ]; then + echo "SKIP: iproute2 iprule too old, missing $1 match" + return 1 + fi + + ip route get help 2>&1 | grep -q $2 + if [ $? -ne 0 ]; then + echo "SKIP: iproute2 get route too old, missing $2 match" + return 1 + fi + + return 0 +} + +fib_rule6_del() +{ + $IP -6 rule del $1 + log_test $? 0 "rule6 del $1" +} + +fib_rule6_del_by_pref() +{ + pref=$($IP -6 rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1) + $IP -6 rule del pref $pref +} + +fib_rule6_test_match_n_redirect() +{ + local match="$1" + local getmatch="$2" + + $IP -6 rule add $match table $RTABLE + $IP -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE" + log_test $? 0 "rule6 check: $1" + + fib_rule6_del_by_pref "$match" + log_test $? 0 "rule6 del by pref: $match" +} + +fib_rule6_test() +{ + # setup the fib rule redirect route + $IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink + + match="oif $DEV" + fib_rule6_test_match_n_redirect "$match" "$match" "oif redirect to table" + + match="from $SRC_IP6 iif $DEV" + fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table" + + match="tos 0x10" + fib_rule6_test_match_n_redirect "$match" "$match" "tos redirect to table" + + match="fwmark 0x64" + getmatch="mark 0x64" + fib_rule6_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table" + + fib_check_iproute_support "uidrange" "uid" + if [ $? -eq 0 ]; then + match="uidrange 100-100" + getmatch="uid 100" + fib_rule6_test_match_n_redirect "$match" "$getmatch" "uid redirect to table" + fi + + fib_check_iproute_support "sport" "sport" + if [ $? -eq 0 ]; then + match="sport 666 dport 777" + fib_rule6_test_match_n_redirect "$match" "$match" "sport and dport redirect to table" + fi + + fib_check_iproute_support "ipproto" "ipproto" + if [ $? -eq 0 ]; then + match="ipproto tcp" + fib_rule6_test_match_n_redirect "$match" "$match" "ipproto match" + fi + + fib_check_iproute_support "ipproto" "ipproto" + if [ $? -eq 0 ]; then + match="ipproto icmp" + fib_rule6_test_match_n_redirect "$match" "$match" "ipproto icmp match" + fi +} + +fib_rule4_del() +{ + $IP rule del $1 + log_test $? 0 "del $1" +} + +fib_rule4_del_by_pref() +{ + pref=$($IP rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1) + $IP rule del pref $pref +} + +fib_rule4_test_match_n_redirect() +{ + local match="$1" + local getmatch="$2" + + $IP rule add $match table $RTABLE + $IP route get $GW_IP4 $getmatch | grep -q "table $RTABLE" + log_test $? 0 "rule4 check: $1" + + fib_rule4_del_by_pref "$match" + log_test $? 0 "rule4 del by pref: $match" +} + +fib_rule4_test() +{ + # setup the fib rule redirect route + $IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink + + match="oif $DEV" + fib_rule4_test_match_n_redirect "$match" "$match" "oif redirect to table" + + match="from $SRC_IP iif $DEV" + fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table" + + match="tos 0x10" + fib_rule4_test_match_n_redirect "$match" "$match" "tos redirect to table" + + match="fwmark 0x64" + getmatch="mark 0x64" + fib_rule4_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table" + + fib_check_iproute_support "uidrange" "uid" + if [ $? -eq 0 ]; then + match="uidrange 100-100" + getmatch="uid 100" + fib_rule4_test_match_n_redirect "$match" "$getmatch" "uid redirect to table" + fi + + fib_check_iproute_support "sport" "sport" + if [ $? -eq 0 ]; then + match="sport 666 dport 777" + fib_rule4_test_match_n_redirect "$match" "$match" "sport and dport redirect to table" + fi + + fib_check_iproute_support "ipproto" "ipproto" + if [ $? -eq 0 ]; then + match="ipproto tcp" + fib_rule4_test_match_n_redirect "$match" "$match" "ipproto tcp match" + fi + + fib_check_iproute_support "ipproto" "ipproto" + if [ $? -eq 0 ]; then + match="ipproto icmp" + fib_rule4_test_match_n_redirect "$match" "$match" "ipproto icmp match" + fi +} + +run_fibrule_tests() +{ + log_section "IPv4 fib rule" + fib_rule4_test + log_section "IPv6 fib rule" + fib_rule6_test +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit 0 +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit 0 +fi + +# start clean +cleanup &> /dev/null +setup +run_fibrule_tests +cleanup + +exit $ret diff --git a/tools/testing/selftests/uevent/Makefile b/tools/testing/selftests/uevent/Makefile new file mode 100644 index 000000000000..f7baa9aa2932 --- /dev/null +++ b/tools/testing/selftests/uevent/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0 +all: + +include ../lib.mk + +.PHONY: all clean + +BINARIES := uevent_filtering +CFLAGS += -Wl,-no-as-needed -Wall + +uevent_filtering: uevent_filtering.c ../kselftest.h ../kselftest_harness.h + $(CC) $(CFLAGS) $< -o $@ + +TEST_PROGS += $(BINARIES) +EXTRA_CLEAN := $(BINARIES) + +all: $(BINARIES) diff --git a/tools/testing/selftests/uevent/config b/tools/testing/selftests/uevent/config new file mode 100644 index 000000000000..1038f4515be8 --- /dev/null +++ b/tools/testing/selftests/uevent/config @@ -0,0 +1,2 @@ +CONFIG_USER_NS=y +CONFIG_NET=y diff --git a/tools/testing/selftests/uevent/uevent_filtering.c b/tools/testing/selftests/uevent/uevent_filtering.c new file mode 100644 index 000000000000..f83391aa42cf --- /dev/null +++ b/tools/testing/selftests/uevent/uevent_filtering.c @@ -0,0 +1,486 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <linux/netlink.h> +#include <signal.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/prctl.h> +#include <sys/socket.h> +#include <sched.h> +#include <sys/eventfd.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "../kselftest.h" +#include "../kselftest_harness.h" + +#define __DEV_FULL "/sys/devices/virtual/mem/full/uevent" +#define __UEVENT_BUFFER_SIZE (2048 * 2) +#define __UEVENT_HEADER "add@/devices/virtual/mem/full" +#define __UEVENT_HEADER_LEN sizeof("add@/devices/virtual/mem/full") +#define __UEVENT_LISTEN_ALL -1 + +ssize_t read_nointr(int fd, void *buf, size_t count) +{ + ssize_t ret; + +again: + ret = read(fd, buf, count); + if (ret < 0 && errno == EINTR) + goto again; + + return ret; +} + +ssize_t write_nointr(int fd, const void *buf, size_t count) +{ + ssize_t ret; + +again: + ret = write(fd, buf, count); + if (ret < 0 && errno == EINTR) + goto again; + + return ret; +} + +int wait_for_pid(pid_t pid) +{ + int status, ret; + +again: + ret = waitpid(pid, &status, 0); + if (ret == -1) { + if (errno == EINTR) + goto again; + + return -1; + } + + if (ret != pid) + goto again; + + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) + return -1; + + return 0; +} + +static int uevent_listener(unsigned long post_flags, bool expect_uevent, + int sync_fd) +{ + int sk_fd, ret; + socklen_t sk_addr_len; + int fret = -1, rcv_buf_sz = __UEVENT_BUFFER_SIZE; + uint64_t sync_add = 1; + struct sockaddr_nl sk_addr = { 0 }, rcv_addr = { 0 }; + char buf[__UEVENT_BUFFER_SIZE] = { 0 }; + struct iovec iov = { buf, __UEVENT_BUFFER_SIZE }; + char control[CMSG_SPACE(sizeof(struct ucred))]; + struct msghdr hdr = { + &rcv_addr, sizeof(rcv_addr), &iov, 1, + control, sizeof(control), 0, + }; + + sk_fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, + NETLINK_KOBJECT_UEVENT); + if (sk_fd < 0) { + fprintf(stderr, "%s - Failed to open uevent socket\n", strerror(errno)); + return -1; + } + + ret = setsockopt(sk_fd, SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz, + sizeof(rcv_buf_sz)); + if (ret < 0) { + fprintf(stderr, "%s - Failed to set socket options\n", strerror(errno)); + goto on_error; + } + + sk_addr.nl_family = AF_NETLINK; + sk_addr.nl_groups = __UEVENT_LISTEN_ALL; + + sk_addr_len = sizeof(sk_addr); + ret = bind(sk_fd, (struct sockaddr *)&sk_addr, sk_addr_len); + if (ret < 0) { + fprintf(stderr, "%s - Failed to bind socket\n", strerror(errno)); + goto on_error; + } + + ret = getsockname(sk_fd, (struct sockaddr *)&sk_addr, &sk_addr_len); + if (ret < 0) { + fprintf(stderr, "%s - Failed to retrieve socket name\n", strerror(errno)); + goto on_error; + } + + if ((size_t)sk_addr_len != sizeof(sk_addr)) { + fprintf(stderr, "Invalid socket address size\n"); + goto on_error; + } + + if (post_flags & CLONE_NEWUSER) { + ret = unshare(CLONE_NEWUSER); + if (ret < 0) { + fprintf(stderr, + "%s - Failed to unshare user namespace\n", + strerror(errno)); + goto on_error; + } + } + + if (post_flags & CLONE_NEWNET) { + ret = unshare(CLONE_NEWNET); + if (ret < 0) { + fprintf(stderr, + "%s - Failed to unshare network namespace\n", + strerror(errno)); + goto on_error; + } + } + + ret = write_nointr(sync_fd, &sync_add, sizeof(sync_add)); + close(sync_fd); + if (ret != sizeof(sync_add)) { + fprintf(stderr, "Failed to synchronize with parent process\n"); + goto on_error; + } + + fret = 0; + for (;;) { + ssize_t r; + + r = recvmsg(sk_fd, &hdr, 0); + if (r <= 0) { + fprintf(stderr, "%s - Failed to receive uevent\n", strerror(errno)); + ret = -1; + break; + } + + /* ignore libudev messages */ + if (memcmp(buf, "libudev", 8) == 0) + continue; + + /* ignore uevents we didn't trigger */ + if (memcmp(buf, __UEVENT_HEADER, __UEVENT_HEADER_LEN) != 0) + continue; + + if (!expect_uevent) { + fprintf(stderr, "Received unexpected uevent:\n"); + ret = -1; + } + + if (TH_LOG_ENABLED) { + /* If logging is enabled dump the received uevent. */ + (void)write_nointr(STDERR_FILENO, buf, r); + (void)write_nointr(STDERR_FILENO, "\n", 1); + } + + break; + } + +on_error: + close(sk_fd); + + return fret; +} + +int trigger_uevent(unsigned int times) +{ + int fd, ret; + unsigned int i; + + fd = open(__DEV_FULL, O_RDWR | O_CLOEXEC); + if (fd < 0) { + if (errno != ENOENT) + return -EINVAL; + + return -1; + } + + for (i = 0; i < times; i++) { + ret = write_nointr(fd, "add\n", sizeof("add\n") - 1); + if (ret < 0) { + fprintf(stderr, "Failed to trigger uevent\n"); + break; + } + } + close(fd); + + return ret; +} + +int set_death_signal(void) +{ + int ret; + pid_t ppid; + + ret = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); + + /* Check whether we have been orphaned. */ + ppid = getppid(); + if (ppid == 1) { + pid_t self; + + self = getpid(); + ret = kill(self, SIGKILL); + } + + if (ret < 0) + return -1; + + return 0; +} + +static int do_test(unsigned long pre_flags, unsigned long post_flags, + bool expect_uevent, int sync_fd) +{ + int ret; + uint64_t wait_val; + pid_t pid; + sigset_t mask; + sigset_t orig_mask; + struct timespec timeout; + + sigemptyset(&mask); + sigaddset(&mask, SIGCHLD); + + ret = sigprocmask(SIG_BLOCK, &mask, &orig_mask); + if (ret < 0) { + fprintf(stderr, "%s- Failed to block SIGCHLD\n", strerror(errno)); + return -1; + } + + pid = fork(); + if (pid < 0) { + fprintf(stderr, "%s - Failed to fork() new process\n", strerror(errno)); + return -1; + } + + if (pid == 0) { + /* Make sure that we go away when our parent dies. */ + ret = set_death_signal(); + if (ret < 0) { + fprintf(stderr, "Failed to set PR_SET_PDEATHSIG to SIGKILL\n"); + _exit(EXIT_FAILURE); + } + + if (pre_flags & CLONE_NEWUSER) { + ret = unshare(CLONE_NEWUSER); + if (ret < 0) { + fprintf(stderr, + "%s - Failed to unshare user namespace\n", + strerror(errno)); + _exit(EXIT_FAILURE); + } + } + + if (pre_flags & CLONE_NEWNET) { + ret = unshare(CLONE_NEWNET); + if (ret < 0) { + fprintf(stderr, + "%s - Failed to unshare network namespace\n", + strerror(errno)); + _exit(EXIT_FAILURE); + } + } + + if (uevent_listener(post_flags, expect_uevent, sync_fd) < 0) + _exit(EXIT_FAILURE); + + _exit(EXIT_SUCCESS); + } + + ret = read_nointr(sync_fd, &wait_val, sizeof(wait_val)); + if (ret != sizeof(wait_val)) { + fprintf(stderr, "Failed to synchronize with child process\n"); + _exit(EXIT_FAILURE); + } + + /* Trigger 10 uevents to account for the case where the kernel might + * drop some. + */ + ret = trigger_uevent(10); + if (ret < 0) + fprintf(stderr, "Failed triggering uevents\n"); + + /* Wait for 2 seconds before considering this failed. This should be + * plenty of time for the kernel to deliver the uevent even under heavy + * load. + */ + timeout.tv_sec = 2; + timeout.tv_nsec = 0; + +again: + ret = sigtimedwait(&mask, NULL, &timeout); + if (ret < 0) { + if (errno == EINTR) + goto again; + + if (!expect_uevent) + ret = kill(pid, SIGTERM); /* success */ + else + ret = kill(pid, SIGUSR1); /* error */ + if (ret < 0) + return -1; + } + + ret = wait_for_pid(pid); + if (ret < 0) + return -1; + + return ret; +} + +static void signal_handler(int sig) +{ + if (sig == SIGTERM) + _exit(EXIT_SUCCESS); + + _exit(EXIT_FAILURE); +} + +TEST(uevent_filtering) +{ + int ret, sync_fd; + struct sigaction act; + + if (geteuid()) { + TH_LOG("Uevent filtering tests require root privileges. Skipping test"); + _exit(KSFT_SKIP); + } + + ret = access(__DEV_FULL, F_OK); + EXPECT_EQ(0, ret) { + if (errno == ENOENT) { + TH_LOG(__DEV_FULL " does not exist. Skipping test"); + _exit(KSFT_SKIP); + } + + _exit(KSFT_FAIL); + } + + act.sa_handler = signal_handler; + act.sa_flags = 0; + sigemptyset(&act.sa_mask); + + ret = sigaction(SIGTERM, &act, NULL); + ASSERT_EQ(0, ret); + + sync_fd = eventfd(0, EFD_CLOEXEC); + ASSERT_GE(sync_fd, 0); + + /* + * Setup: + * - Open uevent listening socket in initial network namespace owned by + * initial user namespace. + * - Trigger uevent in initial network namespace owned by initial user + * namespace. + * Expected Result: + * - uevent listening socket receives uevent + */ + ret = do_test(0, 0, true, sync_fd); + ASSERT_EQ(0, ret) { + goto do_cleanup; + } + + /* + * Setup: + * - Open uevent listening socket in non-initial network namespace + * owned by initial user namespace. + * - Trigger uevent in initial network namespace owned by initial user + * namespace. + * Expected Result: + * - uevent listening socket receives uevent + */ + ret = do_test(CLONE_NEWNET, 0, true, sync_fd); + ASSERT_EQ(0, ret) { + goto do_cleanup; + } + + /* + * Setup: + * - unshare user namespace + * - Open uevent listening socket in initial network namespace + * owned by initial user namespace. + * - Trigger uevent in initial network namespace owned by initial user + * namespace. + * Expected Result: + * - uevent listening socket receives uevent + */ + ret = do_test(CLONE_NEWUSER, 0, true, sync_fd); + ASSERT_EQ(0, ret) { + goto do_cleanup; + } + + /* + * Setup: + * - Open uevent listening socket in non-initial network namespace + * owned by non-initial user namespace. + * - Trigger uevent in initial network namespace owned by initial user + * namespace. + * Expected Result: + * - uevent listening socket receives no uevent + */ + ret = do_test(CLONE_NEWUSER | CLONE_NEWNET, 0, false, sync_fd); + ASSERT_EQ(0, ret) { + goto do_cleanup; + } + + /* + * Setup: + * - Open uevent listening socket in initial network namespace + * owned by initial user namespace. + * - unshare network namespace + * - Trigger uevent in initial network namespace owned by initial user + * namespace. + * Expected Result: + * - uevent listening socket receives uevent + */ + ret = do_test(0, CLONE_NEWNET, true, sync_fd); + ASSERT_EQ(0, ret) { + goto do_cleanup; + } + + /* + * Setup: + * - Open uevent listening socket in initial network namespace + * owned by initial user namespace. + * - unshare user namespace + * - Trigger uevent in initial network namespace owned by initial user + * namespace. + * Expected Result: + * - uevent listening socket receives uevent + */ + ret = do_test(0, CLONE_NEWUSER, true, sync_fd); + ASSERT_EQ(0, ret) { + goto do_cleanup; + } + + /* + * Setup: + * - Open uevent listening socket in initial network namespace + * owned by initial user namespace. + * - unshare user namespace + * - unshare network namespace + * - Trigger uevent in initial network namespace owned by initial user + * namespace. + * Expected Result: + * - uevent listening socket receives uevent + */ + ret = do_test(0, CLONE_NEWUSER | CLONE_NEWNET, true, sync_fd); + ASSERT_EQ(0, ret) { + goto do_cleanup; + } + +do_cleanup: + close(sync_fd); +} + +TEST_HARNESS_MAIN |