aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml4
-rw-r--r--Documentation/networking/devlink/devlink-region.rst13
-rw-r--r--Documentation/networking/devlink/ice.rst13
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c4
-rw-r--r--drivers/net/dsa/hirschmann/hellcreek.c5
-rw-r--r--drivers/net/dsa/mv88e6xxx/devlink.c5
-rw-r--r--drivers/net/dsa/sja1105/sja1105_devlink.c12
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c4
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c11
-rw-r--r--drivers/net/ethernet/fungible/funeth/funeth_devlink.c7
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c5
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_devlink.c118
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c7
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c17
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c15
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_devlink.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.c5
-rw-r--r--drivers/net/ethernet/microchip/vcap/vcap_api.c7
-rw-r--r--drivers/net/ethernet/microchip/vcap/vcap_api.h1
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_devlink.c4
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_devlink.c4
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_devlink.c4
-rw-r--r--drivers/net/netdevsim/dev.c3
-rw-r--r--drivers/net/phy/phy_device.c12
-rw-r--r--drivers/ptp/ptp_ocp.c4
-rw-r--r--include/linux/phy.h4
-rw-r--r--include/net/devlink.h18
-rw-r--r--include/uapi/linux/devlink.h2
-rw-r--r--net/core/devlink.c174
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile1
-rw-r--r--tools/testing/selftests/net/csum.c986
34 files changed, 1257 insertions, 227 deletions
diff --git a/Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml b/Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml
index b2558421268a..6924aff0b2c5 100644
--- a/Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml
+++ b/Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml
@@ -14,7 +14,9 @@ properties:
oneOf:
- const: nxp,nxp-nci-i2c
- items:
- - const: nxp,pn547
+ - enum:
+ - nxp,nq310
+ - nxp,pn547
- const: nxp,nxp-nci-i2c
enable-gpios:
diff --git a/Documentation/networking/devlink/devlink-region.rst b/Documentation/networking/devlink/devlink-region.rst
index f06dca9a1eb6..9232cd7da301 100644
--- a/Documentation/networking/devlink/devlink-region.rst
+++ b/Documentation/networking/devlink/devlink-region.rst
@@ -31,6 +31,15 @@ in its ``devlink_region_ops`` structure. If snapshot id is not set in
the ``DEVLINK_CMD_REGION_NEW`` request kernel will allocate one and send
the snapshot information to user space.
+Regions may optionally allow directly reading from their contents without a
+snapshot. Direct read requests are not atomic. In particular a read request
+of size 256 bytes or larger will be split into multiple chunks. If atomic
+access is required, use a snapshot. A driver wishing to enable this for a
+region should implement the ``.read`` callback in the ``devlink_region_ops``
+structure. User space can request a direct read by using the
+``DEVLINK_ATTR_REGION_DIRECT`` attribute instead of specifying a snapshot
+id.
+
example usage
-------------
@@ -65,6 +74,10 @@ example usage
$ devlink region read pci/0000:00:05.0/fw-health snapshot 1 address 0 length 16
0000000000000000 0014 95dc 0014 9514 0035 1670 0034 db30
+ # Read from the region without a snapshot
+ $ devlink region read pci/0000:00:05.0/fw-health address 16 length 16
+ 0000000000000010 0000 0000 ffff ff04 0029 8c00 0028 8cc8
+
As regions are likely very device or driver specific, no generic regions are
defined. See the driver-specific documentation files for information on the
specific regions a driver supports.
diff --git a/Documentation/networking/devlink/ice.rst b/Documentation/networking/devlink/ice.rst
index 890062da7820..625efb3777d5 100644
--- a/Documentation/networking/devlink/ice.rst
+++ b/Documentation/networking/devlink/ice.rst
@@ -189,12 +189,21 @@ device data.
* - ``nvm-flash``
- The contents of the entire flash chip, sometimes referred to as
the device's Non Volatile Memory.
+ * - ``shadow-ram``
+ - The contents of the Shadow RAM, which is loaded from the beginning
+ of the flash. Although the contents are primarily from the flash,
+ this area also contains data generated during device boot which is
+ not stored in flash.
* - ``device-caps``
- The contents of the device firmware's capabilities buffer. Useful to
determine the current state and configuration of the device.
-Users can request an immediate capture of a snapshot via the
-``DEVLINK_CMD_REGION_NEW``
+Both the ``nvm-flash`` and ``shadow-ram`` regions can be accessed without a
+snapshot. The ``device-caps`` region requires a snapshot as the contents are
+sent by firmware and can't be split into separate reads.
+
+Users can request an immediate capture of a snapshot for all three regions
+via the ``DEVLINK_CMD_REGION_NEW`` command.
.. code:: shell
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c b/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c
index 7503f6b18ac5..a2aba0b0d68a 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c
@@ -76,10 +76,6 @@ static int otx2_cpt_devlink_info_get(struct devlink *dl,
struct otx2_cptpf_dev *cptpf = cpt_dl->cptpf;
int err;
- err = devlink_info_driver_name_put(req, "rvu_cptpf");
- if (err)
- return err;
-
err = otx2_cpt_dl_info_firmware_version_put(req, cptpf->eng_grps.grp,
"fw.ae", OTX2_CPT_AE_TYPES);
if (err)
diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c
index 951f7935c872..595a548bb0a8 100644
--- a/drivers/net/dsa/hirschmann/hellcreek.c
+++ b/drivers/net/dsa/hirschmann/hellcreek.c
@@ -1176,11 +1176,6 @@ static int hellcreek_devlink_info_get(struct dsa_switch *ds,
struct netlink_ext_ack *extack)
{
struct hellcreek *hellcreek = ds->priv;
- int ret;
-
- ret = devlink_info_driver_name_put(req, "hellcreek");
- if (ret)
- return ret;
return devlink_info_version_fixed_put(req,
DEVLINK_INFO_VERSION_GENERIC_ASIC_ID,
diff --git a/drivers/net/dsa/mv88e6xxx/devlink.c b/drivers/net/dsa/mv88e6xxx/devlink.c
index 1266eabee086..a08dab75e0c0 100644
--- a/drivers/net/dsa/mv88e6xxx/devlink.c
+++ b/drivers/net/dsa/mv88e6xxx/devlink.c
@@ -821,11 +821,6 @@ int mv88e6xxx_devlink_info_get(struct dsa_switch *ds,
struct netlink_ext_ack *extack)
{
struct mv88e6xxx_chip *chip = ds->priv;
- int err;
-
- err = devlink_info_driver_name_put(req, "mv88e6xxx");
- if (err)
- return err;
return devlink_info_version_fixed_put(req,
DEVLINK_INFO_VERSION_GENERIC_ASIC_ID,
diff --git a/drivers/net/dsa/sja1105/sja1105_devlink.c b/drivers/net/dsa/sja1105/sja1105_devlink.c
index 10c6fea1227f..da532614f34a 100644
--- a/drivers/net/dsa/sja1105/sja1105_devlink.c
+++ b/drivers/net/dsa/sja1105/sja1105_devlink.c
@@ -120,16 +120,10 @@ int sja1105_devlink_info_get(struct dsa_switch *ds,
struct netlink_ext_ack *extack)
{
struct sja1105_private *priv = ds->priv;
- int rc;
-
- rc = devlink_info_driver_name_put(req, "sja1105");
- if (rc)
- return rc;
- rc = devlink_info_version_fixed_put(req,
- DEVLINK_INFO_VERSION_GENERIC_ASIC_ID,
- priv->info->name);
- return rc;
+ return devlink_info_version_fixed_put(req,
+ DEVLINK_INFO_VERSION_GENERIC_ASIC_ID,
+ priv->info->name);
}
int sja1105_devlink_setup(struct dsa_switch *ds)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index 8a6f788f6294..26913dc816d3 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -892,10 +892,6 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
u32 ver = 0;
int rc;
- rc = devlink_info_driver_name_put(req, DRV_MODULE_NAME);
- if (rc)
- return rc;
-
if (BNXT_PF(bp) && (bp->flags & BNXT_FLAG_DSN_VALID)) {
sprintf(buf, "%02X-%02X-%02X-%02X-%02X-%02X-%02X-%02X",
bp->dsn[7], bp->dsn[6], bp->dsn[5], bp->dsn[4],
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c
index 5c6dd3029e2f..76f808d38066 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c
@@ -37,18 +37,9 @@ static int dpaa2_eth_dl_info_get(struct devlink *devlink,
struct dpaa2_eth_devlink_priv *dl_priv = devlink_priv(devlink);
struct dpaa2_eth_priv *priv = dl_priv->dpaa2_priv;
char buf[10];
- int err;
-
- err = devlink_info_driver_name_put(req, KBUILD_MODNAME);
- if (err)
- return err;
scnprintf(buf, 10, "%d.%d", priv->dpni_ver_major, priv->dpni_ver_minor);
- err = devlink_info_version_running_put(req, "dpni", buf);
- if (err)
- return err;
-
- return 0;
+ return devlink_info_version_running_put(req, "dpni", buf);
}
static struct dpaa2_eth_trap_item *
diff --git a/drivers/net/ethernet/fungible/funeth/funeth_devlink.c b/drivers/net/ethernet/fungible/funeth/funeth_devlink.c
index d50c222948b4..4fbeb3fd71a8 100644
--- a/drivers/net/ethernet/fungible/funeth/funeth_devlink.c
+++ b/drivers/net/ethernet/fungible/funeth/funeth_devlink.c
@@ -3,14 +3,7 @@
#include "funeth.h"
#include "funeth_devlink.h"
-static int fun_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
- struct netlink_ext_ack *extack)
-{
- return devlink_info_driver_name_put(req, KBUILD_MODNAME);
-}
-
static const struct devlink_ops fun_dl_ops = {
- .info_get = fun_dl_info_get,
};
struct devlink *fun_devlink_alloc(struct device *dev)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c
index 4c441e6a5082..3d3b69605423 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c
@@ -13,11 +13,6 @@ static int hclge_devlink_info_get(struct devlink *devlink,
struct hclge_devlink_priv *priv = devlink_priv(devlink);
char version_str[HCLGE_DEVLINK_FW_STRING_LEN];
struct hclge_dev *hdev = priv->hdev;
- int ret;
-
- ret = devlink_info_driver_name_put(req, KBUILD_MODNAME);
- if (ret)
- return ret;
snprintf(version_str, sizeof(version_str), "%lu.%lu.%lu.%lu",
hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE3_MASK,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c
index fdc19868b818..a6c3c5e8f0ab 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c
@@ -13,11 +13,6 @@ static int hclgevf_devlink_info_get(struct devlink *devlink,
struct hclgevf_devlink_priv *priv = devlink_priv(devlink);
char version_str[HCLGEVF_DEVLINK_FW_STRING_LEN];
struct hclgevf_dev *hdev = priv->hdev;
- int ret;
-
- ret = devlink_info_driver_name_put(req, KBUILD_MODNAME);
- if (ret)
- return ret;
snprintf(version_str, sizeof(version_str), "%lu.%lu.%lu.%lu",
hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE3_MASK,
diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c
index 1d638216484d..8286e47b4bae 100644
--- a/drivers/net/ethernet/intel/ice/ice_devlink.c
+++ b/drivers/net/ethernet/intel/ice/ice_devlink.c
@@ -311,12 +311,6 @@ static int ice_devlink_info_get(struct devlink *devlink,
}
}
- err = devlink_info_driver_name_put(req, KBUILD_MODNAME);
- if (err) {
- NL_SET_ERR_MSG_MOD(extack, "Unable to set driver name");
- goto out_free_ctx;
- }
-
ice_info_get_dsn(pf, ctx);
err = devlink_info_serial_number_put(req, ctx->buf);
@@ -1596,21 +1590,22 @@ void ice_devlink_destroy_vf_port(struct ice_vf *vf)
#define ICE_DEVLINK_READ_BLK_SIZE (1024 * 1024)
+static const struct devlink_region_ops ice_nvm_region_ops;
+static const struct devlink_region_ops ice_sram_region_ops;
+
/**
* ice_devlink_nvm_snapshot - Capture a snapshot of the NVM flash contents
* @devlink: the devlink instance
- * @ops: the devlink region being snapshotted
+ * @ops: the devlink region to snapshot
* @extack: extended ACK response structure
* @data: on exit points to snapshot data buffer
*
- * This function is called in response to the DEVLINK_CMD_REGION_TRIGGER for
- * the nvm-flash devlink region. It captures a snapshot of the full NVM flash
- * contents, including both banks of flash. This snapshot can later be viewed
- * via the devlink-region interface.
+ * This function is called in response to a DEVLINK_CMD_REGION_NEW for either
+ * the nvm-flash or shadow-ram region.
*
- * It captures the flash using the FLASH_ONLY bit set when reading via
- * firmware, so it does not read the current Shadow RAM contents. For that,
- * use the shadow-ram region.
+ * It captures a snapshot of the NVM or Shadow RAM flash contents. This
+ * snapshot can then later be viewed via the DEVLINK_CMD_REGION_READ netlink
+ * interface.
*
* @returns zero on success, and updates the data pointer. Returns a non-zero
* error code on failure.
@@ -1622,17 +1617,27 @@ static int ice_devlink_nvm_snapshot(struct devlink *devlink,
struct ice_pf *pf = devlink_priv(devlink);
struct device *dev = ice_pf_to_dev(pf);
struct ice_hw *hw = &pf->hw;
+ bool read_shadow_ram;
u8 *nvm_data, *tmp, i;
u32 nvm_size, left;
s8 num_blks;
int status;
- nvm_size = hw->flash.flash_size;
+ if (ops == &ice_nvm_region_ops) {
+ read_shadow_ram = false;
+ nvm_size = hw->flash.flash_size;
+ } else if (ops == &ice_sram_region_ops) {
+ read_shadow_ram = true;
+ nvm_size = hw->flash.sr_words * 2u;
+ } else {
+ NL_SET_ERR_MSG_MOD(extack, "Unexpected region in snapshot function");
+ return -EOPNOTSUPP;
+ }
+
nvm_data = vzalloc(nvm_size);
if (!nvm_data)
return -ENOMEM;
-
num_blks = DIV_ROUND_UP(nvm_size, ICE_DEVLINK_READ_BLK_SIZE);
tmp = nvm_data;
left = nvm_size;
@@ -1656,7 +1661,7 @@ static int ice_devlink_nvm_snapshot(struct devlink *devlink,
}
status = ice_read_flat_nvm(hw, i * ICE_DEVLINK_READ_BLK_SIZE,
- &read_sz, tmp, false);
+ &read_sz, tmp, read_shadow_ram);
if (status) {
dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n",
read_sz, status, hw->adminq.sq_last_status);
@@ -1677,62 +1682,69 @@ static int ice_devlink_nvm_snapshot(struct devlink *devlink,
}
/**
- * ice_devlink_sram_snapshot - Capture a snapshot of the Shadow RAM contents
+ * ice_devlink_nvm_read - Read a portion of NVM flash contents
* @devlink: the devlink instance
- * @ops: the devlink region being snapshotted
+ * @ops: the devlink region to snapshot
* @extack: extended ACK response structure
- * @data: on exit points to snapshot data buffer
+ * @offset: the offset to start at
+ * @size: the amount to read
+ * @data: the data buffer to read into
*
- * This function is called in response to the DEVLINK_CMD_REGION_TRIGGER for
- * the shadow-ram devlink region. It captures a snapshot of the shadow ram
- * contents. This snapshot can later be viewed via the devlink-region
- * interface.
+ * This function is called in response to DEVLINK_CMD_REGION_READ to directly
+ * read a section of the NVM contents.
+ *
+ * It reads from either the nvm-flash or shadow-ram region contents.
*
* @returns zero on success, and updates the data pointer. Returns a non-zero
* error code on failure.
*/
-static int
-ice_devlink_sram_snapshot(struct devlink *devlink,
- const struct devlink_region_ops __always_unused *ops,
- struct netlink_ext_ack *extack, u8 **data)
+static int ice_devlink_nvm_read(struct devlink *devlink,
+ const struct devlink_region_ops *ops,
+ struct netlink_ext_ack *extack,
+ u64 offset, u32 size, u8 *data)
{
struct ice_pf *pf = devlink_priv(devlink);
struct device *dev = ice_pf_to_dev(pf);
struct ice_hw *hw = &pf->hw;
- u8 *sram_data;
- u32 sram_size;
- int err;
+ bool read_shadow_ram;
+ u64 nvm_size;
+ int status;
- sram_size = hw->flash.sr_words * 2u;
- sram_data = vzalloc(sram_size);
- if (!sram_data)
- return -ENOMEM;
+ if (ops == &ice_nvm_region_ops) {
+ read_shadow_ram = false;
+ nvm_size = hw->flash.flash_size;
+ } else if (ops == &ice_sram_region_ops) {
+ read_shadow_ram = true;
+ nvm_size = hw->flash.sr_words * 2u;
+ } else {
+ NL_SET_ERR_MSG_MOD(extack, "Unexpected region in snapshot function");
+ return -EOPNOTSUPP;
+ }
- err = ice_acquire_nvm(hw, ICE_RES_READ);
- if (err) {
+ if (offset + size >= nvm_size) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot read beyond the region size");
+ return -ERANGE;
+ }
+
+ status = ice_acquire_nvm(hw, ICE_RES_READ);
+ if (status) {
dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n",
- err, hw->adminq.sq_last_status);
+ status, hw->adminq.sq_last_status);
NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore");
- vfree(sram_data);
- return err;
+ return -EIO;
}
- /* Read from the Shadow RAM, rather than directly from NVM */
- err = ice_read_flat_nvm(hw, 0, &sram_size, sram_data, true);
- if (err) {
+ status = ice_read_flat_nvm(hw, (u32)offset, &size, data,
+ read_shadow_ram);
+ if (status) {
dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n",
- sram_size, err, hw->adminq.sq_last_status);
- NL_SET_ERR_MSG_MOD(extack,
- "Failed to read Shadow RAM contents");
+ size, status, hw->adminq.sq_last_status);
+ NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents");
ice_release_nvm(hw);
- vfree(sram_data);
- return err;
+ return -EIO;
}
-
ice_release_nvm(hw);
- *data = sram_data;
-
return 0;
}
@@ -1784,12 +1796,14 @@ static const struct devlink_region_ops ice_nvm_region_ops = {
.name = "nvm-flash",
.destructor = vfree,
.snapshot = ice_devlink_nvm_snapshot,
+ .read = ice_devlink_nvm_read,
};
static const struct devlink_region_ops ice_sram_region_ops = {
.name = "shadow-ram",
.destructor = vfree,
- .snapshot = ice_devlink_sram_snapshot,
+ .snapshot = ice_devlink_nvm_snapshot,
+ .read = ice_devlink_nvm_read,
};
static const struct devlink_region_ops ice_devcaps_region_ops = {
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
index 88dee589cb21..bda1a6fa2ec4 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c
@@ -1547,14 +1547,7 @@ static int rvu_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
return 0;
}
-static int rvu_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
- struct netlink_ext_ack *extack)
-{
- return devlink_info_driver_name_put(req, DRV_NAME);
-}
-
static const struct devlink_ops rvu_devlink_ops = {
- .info_get = rvu_devlink_info_get,
.eswitch_mode_get = rvu_devlink_eswitch_mode_get,
.eswitch_mode_set = rvu_devlink_eswitch_mode_set,
};
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c
index 594029007f85..00aef8f5ac29 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c
@@ -490,7 +490,7 @@ static bool rvu_npc_exact_alloc_id(struct rvu *rvu, u32 *seq_id)
if (idx == table->tot_ids) {
mutex_unlock(&table->lock);
dev_err(rvu->dev, "%s: No space in id bitmap (%d)\n",
- __func__, bitmap_weight(table->id_bmap, table->tot_ids));
+ __func__, table->tot_ids);
return false;
}
@@ -1870,12 +1870,11 @@ int rvu_npc_exact_init(struct rvu *rvu)
/* Set capability to true */
rvu->hw->cap.npc_exact_match_enabled = true;
- table = kmalloc(sizeof(*table), GFP_KERNEL);
+ table = kzalloc(sizeof(*table), GFP_KERNEL);
if (!table)
return -ENOMEM;
dev_dbg(rvu->dev, "%s: Memory allocation for table success\n", __func__);
- memset(table, 0, sizeof(*table));
rvu->hw->table = table;
/* Read table size, ways and depth */
@@ -1899,24 +1898,24 @@ int rvu_npc_exact_init(struct rvu *rvu)
table_size = table->mem_table.depth * table->mem_table.ways;
/* Allocate bitmap for 4way 2K table */
- table->mem_table.bmap = devm_kcalloc(rvu->dev, BITS_TO_LONGS(table_size),
- sizeof(long), GFP_KERNEL);
+ table->mem_table.bmap = devm_bitmap_zalloc(rvu->dev, table_size,
+ GFP_KERNEL);
if (!table->mem_table.bmap)
return -ENOMEM;
dev_dbg(rvu->dev, "%s: Allocated bitmap for 4way 2K entry table\n", __func__);
/* Allocate bitmap for 32 entry mcam */
- table->cam_table.bmap = devm_kcalloc(rvu->dev, 1, sizeof(long), GFP_KERNEL);
+ table->cam_table.bmap = devm_bitmap_zalloc(rvu->dev, 32, GFP_KERNEL);
if (!table->cam_table.bmap)
return -ENOMEM;
dev_dbg(rvu->dev, "%s: Allocated bitmap for 32 entry cam\n", __func__);
- table->tot_ids = (table->mem_table.depth * table->mem_table.ways) + table->cam_table.depth;
- table->id_bmap = devm_kcalloc(rvu->dev, BITS_TO_LONGS(table->tot_ids),
- table->tot_ids, GFP_KERNEL);
+ table->tot_ids = table_size + table->cam_table.depth;
+ table->id_bmap = devm_bitmap_zalloc(rvu->dev, table->tot_ids,
+ GFP_KERNEL);
if (!table->id_bmap)
return -ENOMEM;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c
index 777a27047c8e..63ef7c41d18d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c
@@ -77,22 +77,7 @@ static const struct devlink_param otx2_dl_params[] = {
otx2_dl_mcam_count_validate),
};
-/* Devlink OPs */
-static int otx2_devlink_info_get(struct devlink *devlink,
- struct devlink_info_req *req,
- struct netlink_ext_ack *extack)
-{
- struct otx2_devlink *otx2_dl = devlink_priv(devlink);
- struct otx2_nic *pfvf = otx2_dl->pfvf;
-
- if (is_otx2_vf(pfvf->pcifunc))
- return devlink_info_driver_name_put(req, "rvu_nicvf");
-
- return devlink_info_driver_name_put(req, "rvu_nicpf");
-}
-
static const struct devlink_ops otx2_devlink_ops = {
- .info_get = otx2_devlink_info_get,
};
int otx2_register_dl(struct otx2_nic *pfvf)
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c
index 84ad05c9f12d..2a4c9df4eb79 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c
@@ -355,11 +355,6 @@ static int prestera_dl_info_get(struct devlink *dl,
{
struct prestera_switch *sw = devlink_priv(dl);
char buf[16];
- int err;
-
- err = devlink_info_driver_name_put(req, PRESTERA_DRV_NAME);
- if (err)
- return err;
snprintf(buf, sizeof(buf), "%d.%d.%d",
sw->dev->fw_rev.maj,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index cc2ae427dcb0..751bc4a9edcf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -46,10 +46,6 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
u32 running_fw, stored_fw;
int err;
- err = devlink_info_driver_name_put(req, KBUILD_MODNAME);
- if (err)
- return err;
-
err = devlink_info_version_fixed_put(req, "fw.psid", dev->board_id);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index a83f6bc30072..a0a06e2eff82 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1459,11 +1459,6 @@ mlxsw_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
char buf[32];
int err;
- err = devlink_info_driver_name_put(req,
- mlxsw_core->bus_info->device_kind);
- if (err)
- return err;
-
mlxsw_reg_mgir_pack(mgir_pl);
err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mgir), mgir_pl);
if (err)
diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api.c b/drivers/net/ethernet/microchip/vcap/vcap_api.c
index 93efa8243b02..f2435d7ab515 100644
--- a/drivers/net/ethernet/microchip/vcap/vcap_api.c
+++ b/drivers/net/ethernet/microchip/vcap/vcap_api.c
@@ -985,17 +985,12 @@ static u32 vcap_next_rule_addr(u32 addr, struct vcap_rule_internal *ri)
/* Assign a unique rule id and autogenerate one if id == 0 */
static u32 vcap_set_rule_id(struct vcap_rule_internal *ri)
{
- u32 next_id;
-
if (ri->data.id != 0)
return ri->data.id;
- next_id = ri->vctrl->rule_id + 1;
-
- for (next_id = ri->vctrl->rule_id + 1; next_id < ~0; ++next_id) {
+ for (u32 next_id = 1; next_id < ~0; ++next_id) {
if (!vcap_lookup_rule(ri->vctrl, next_id)) {
ri->data.id = next_id;
- ri->vctrl->rule_id = next_id;
break;
}
}
diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api.h b/drivers/net/ethernet/microchip/vcap/vcap_api.h
index ca4499838306..689c7270f2a8 100644
--- a/drivers/net/ethernet/microchip/vcap/vcap_api.h
+++ b/drivers/net/ethernet/microchip/vcap/vcap_api.h
@@ -268,7 +268,6 @@ struct vcap_operations {
/* VCAP API Client control interface */
struct vcap_control {
- u32 rule_id; /* last used rule id (unique across VCAP instances) */
struct vcap_operations *ops; /* client supplied operations */
const struct vcap_info *vcaps; /* client supplied vcap models */
const struct vcap_statistics *stats; /* client supplied vcap stats */
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c
index 784f23602a8a..bf6bae557158 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c
@@ -239,10 +239,6 @@ nfp_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
char *buf = NULL;
int err;
- err = devlink_info_driver_name_put(req, "nfp");
- if (err)
- return err;
-
vendor = nfp_hwinfo_lookup(pf->hwinfo, "assembly.vendor");
part = nfp_hwinfo_lookup(pf->hwinfo, "assembly.partno");
sn = nfp_hwinfo_lookup(pf->hwinfo, "assembly.serial");
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c
index 567f778433e2..e6ff757895ab 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c
@@ -26,10 +26,6 @@ static int ionic_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
char buf[16];
int err = 0;
- err = devlink_info_driver_name_put(req, IONIC_DRV_NAME);
- if (err)
- return err;
-
err = devlink_info_version_running_put(req,
DEVLINK_INFO_VERSION_GENERIC_FW,
idev->dev_info.fw_version);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_devlink.c b/drivers/net/ethernet/qlogic/qed/qed_devlink.c
index 6bb4e165b592..922c47797af6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_devlink.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_devlink.c
@@ -162,10 +162,6 @@ static int qed_devlink_info_get(struct devlink *devlink,
dev_info = &cdev->common_dev_info;
- err = devlink_info_driver_name_put(req, KBUILD_MODNAME);
- if (err)
- return err;
-
memcpy(buf, cdev->hwfns[0].hw_info.part_num, sizeof(cdev->hwfns[0].hw_info.part_num));
buf[sizeof(cdev->hwfns[0].hw_info.part_num)] = 0;
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index e14686594a71..b962fc8e1397 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -994,9 +994,6 @@ static int nsim_dev_info_get(struct devlink *devlink,
{
int err;
- err = devlink_info_driver_name_put(req, DRV_NAME);
- if (err)
- return err;
err = devlink_info_version_stored_put_ext(req, "fw.mgmt", "10.20.30",
DEVLINK_INFO_VERSION_TYPE_COMPONENT);
if (err)
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 8cff61dbc4b5..716870a4499c 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1512,6 +1512,15 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
phy_resume(phydev);
phy_led_triggers_register(phydev);
+ /**
+ * If the external phy used by current mac interface is managed by
+ * another mac interface, so we should create a device link between
+ * phy dev and mac dev.
+ */
+ if (phydev->mdio.bus->parent && dev->dev.parent != phydev->mdio.bus->parent)
+ phydev->devlink = device_link_add(dev->dev.parent, &phydev->mdio.dev,
+ DL_FLAG_PM_RUNTIME | DL_FLAG_STATELESS);
+
return err;
error:
@@ -1750,6 +1759,9 @@ void phy_detach(struct phy_device *phydev)
struct module *ndev_owner = NULL;
struct mii_bus *bus;
+ if (phydev->devlink)
+ device_link_del(phydev->devlink);
+
if (phydev->sysfs_links) {
if (dev)
sysfs_remove_link(&dev->dev.kobj, "phydev");
diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c
index 154d58cbd9ce..4bbaccd543ad 100644
--- a/drivers/ptp/ptp_ocp.c
+++ b/drivers/ptp/ptp_ocp.c
@@ -1647,10 +1647,6 @@ ptp_ocp_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
char buf[32];
int err;
- err = devlink_info_driver_name_put(req, KBUILD_MODNAME);
- if (err)
- return err;
-
fw_image = bp->fw_loader ? "loader" : "fw";
sprintf(buf, "%d.%d", bp->fw_tag, bp->fw_version);
err = devlink_info_version_running_put(req, fw_image, buf);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 9a3752c0c444..71eeb4e3b1fd 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -529,6 +529,8 @@ struct macsec_ops;
*
* @mdio: MDIO bus this PHY is on
* @drv: Pointer to the driver for this PHY instance
+ * @devlink: Create a link between phy dev and mac dev, if the external phy
+ * used by current mac interface is managed by another mac interface.
* @phy_id: UID for this device found during discovery
* @c45_ids: 802.3-c45 Device Identifiers if is_c45.
* @is_c45: Set to true if this PHY uses clause 45 addressing.
@@ -618,6 +620,8 @@ struct phy_device {
/* And management functions */
struct phy_driver *drv;
+ struct device_link *devlink;
+
u32 phy_id;
struct phy_c45_device_ids c45_ids;
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 074a79b8933f..5f6eca5e4a40 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -650,6 +650,10 @@ struct devlink_info_req;
* the data variable must be updated to point to the snapshot data.
* The function will be called while the devlink instance lock is
* held.
+ * @read: callback to directly read a portion of the region. On success,
+ * the data pointer will be updated with the contents of the
+ * requested portion of the region. The function will be called
+ * while the devlink instance lock is held.
* @priv: Pointer to driver private data for the region operation
*/
struct devlink_region_ops {
@@ -659,6 +663,10 @@ struct devlink_region_ops {
const struct devlink_region_ops *ops,
struct netlink_ext_ack *extack,
u8 **data);
+ int (*read)(struct devlink *devlink,
+ const struct devlink_region_ops *ops,
+ struct netlink_ext_ack *extack,
+ u64 offset, u32 size, u8 *data);
void *priv;
};
@@ -670,6 +678,10 @@ struct devlink_region_ops {
* the data variable must be updated to point to the snapshot data.
* The function will be called while the devlink instance lock is
* held.
+ * @read: callback to directly read a portion of the region. On success,
+ * the data pointer will be updated with the contents of the
+ * requested portion of the region. The function will be called
+ * while the devlink instance lock is held.
* @priv: Pointer to driver private data for the region operation
*/
struct devlink_port_region_ops {
@@ -679,6 +691,10 @@ struct devlink_port_region_ops {
const struct devlink_port_region_ops *ops,
struct netlink_ext_ack *extack,
u8 **data);
+ int (*read)(struct devlink_port *port,
+ const struct devlink_port_region_ops *ops,
+ struct netlink_ext_ack *extack,
+ u64 offset, u32 size, u8 *data);
void *priv;
};
@@ -1746,8 +1762,6 @@ int devlink_region_snapshot_create(struct devlink_region *region,
u8 *data, u32 snapshot_id);
int devlink_info_serial_number_put(struct devlink_info_req *req,
const char *sn);
-int devlink_info_driver_name_put(struct devlink_info_req *req,
- const char *name);
int devlink_info_board_serial_number_put(struct devlink_info_req *req,
const char *bsn);
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 498d0d5d0957..70191d96af89 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -610,6 +610,8 @@ enum devlink_attr {
DEVLINK_ATTR_RATE_TX_PRIORITY, /* u32 */
DEVLINK_ATTR_RATE_TX_WEIGHT, /* u32 */
+ DEVLINK_ATTR_REGION_DIRECT, /* flag */
+
/* add new attributes above here, update the policy in devlink.c */
__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 0e10a8a68c5e..fca3ebee97b0 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -6431,7 +6431,6 @@ unlock:
}
static int devlink_nl_cmd_region_read_chunk_fill(struct sk_buff *msg,
- struct devlink *devlink,
u8 *chunk, u32 chunk_size,
u64 addr)
{
@@ -6461,39 +6460,37 @@ nla_put_failure:
#define DEVLINK_REGION_READ_CHUNK_SIZE 256
-static int devlink_nl_region_read_snapshot_fill(struct sk_buff *skb,
- struct devlink *devlink,
- struct devlink_region *region,
- struct nlattr **attrs,
- u64 start_offset,
- u64 end_offset,
- u64 *new_offset)
+typedef int devlink_chunk_fill_t(void *cb_priv, u8 *chunk, u32 chunk_size,
+ u64 curr_offset,
+ struct netlink_ext_ack *extack);
+
+static int
+devlink_nl_region_read_fill(struct sk_buff *skb, devlink_chunk_fill_t *cb,
+ void *cb_priv, u64 start_offset, u64 end_offset,
+ u64 *new_offset, struct netlink_ext_ack *extack)
{
- struct devlink_snapshot *snapshot;
u64 curr_offset = start_offset;
- u32 snapshot_id;
int err = 0;
+ u8 *data;
- *new_offset = start_offset;
+ /* Allocate and re-use a single buffer */
+ data = kmalloc(DEVLINK_REGION_READ_CHUNK_SIZE, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
- snapshot_id = nla_get_u32(attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]);
- snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id);
- if (!snapshot)
- return -EINVAL;
+ *new_offset = start_offset;
while (curr_offset < end_offset) {
u32 data_size;
- u8 *data;
- if (end_offset - curr_offset < DEVLINK_REGION_READ_CHUNK_SIZE)
- data_size = end_offset - curr_offset;
- else
- data_size = DEVLINK_REGION_READ_CHUNK_SIZE;
+ data_size = min_t(u32, end_offset - curr_offset,
+ DEVLINK_REGION_READ_CHUNK_SIZE);
- data = &snapshot->data[curr_offset];
- err = devlink_nl_cmd_region_read_chunk_fill(skb, devlink,
- data, data_size,
- curr_offset);
+ err = cb(cb_priv, data, data_size, curr_offset, extack);
+ if (err)
+ break;
+
+ err = devlink_nl_cmd_region_read_chunk_fill(skb, data, data_size, curr_offset);
if (err)
break;
@@ -6501,21 +6498,57 @@ static int devlink_nl_region_read_snapshot_fill(struct sk_buff *skb,
}
*new_offset = curr_offset;
+ kfree(data);
+
return err;
}
+static int
+devlink_region_snapshot_fill(void *cb_priv, u8 *chunk, u32 chunk_size,
+ u64 curr_offset,
+ struct netlink_ext_ack __always_unused *extack)
+{
+ struct devlink_snapshot *snapshot = cb_priv;
+
+ memcpy(chunk, &snapshot->data[curr_offset], chunk_size);
+
+ return 0;
+}
+
+static int
+devlink_region_port_direct_fill(void *cb_priv, u8 *chunk, u32 chunk_size,
+ u64 curr_offset, struct netlink_ext_ack *extack)
+{
+ struct devlink_region *region = cb_priv;
+
+ return region->port_ops->read(region->port, region->port_ops, extack,
+ curr_offset, chunk_size, chunk);
+}
+
+static int
+devlink_region_direct_fill(void *cb_priv, u8 *chunk, u32 chunk_size,
+ u64 curr_offset, struct netlink_ext_ack *extack)
+{
+ struct devlink_region *region = cb_priv;
+
+ return region->ops->read(region->devlink, region->ops, extack,
+ curr_offset, chunk_size, chunk);
+}
+
static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
{
const struct genl_dumpit_info *info = genl_dumpit_info(cb);
+ struct nlattr *chunks_attr, *region_attr, *snapshot_attr;
u64 ret_offset, start_offset, end_offset = U64_MAX;
struct nlattr **attrs = info->attrs;
struct devlink_port *port = NULL;
+ devlink_chunk_fill_t *region_cb;
struct devlink_region *region;
- struct nlattr *chunks_attr;
const char *region_name;
struct devlink *devlink;
unsigned int index;
+ void *region_cb_priv;
void *hdr;
int err;
@@ -6527,8 +6560,8 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
devl_lock(devlink);
- if (!attrs[DEVLINK_ATTR_REGION_NAME] ||
- !attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]) {
+ if (!attrs[DEVLINK_ATTR_REGION_NAME]) {
+ NL_SET_ERR_MSG(cb->extack, "No region name provided");
err = -EINVAL;
goto out_unlock;
}
@@ -6543,7 +6576,8 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
}
}
- region_name = nla_data(attrs[DEVLINK_ATTR_REGION_NAME]);
+ region_attr = attrs[DEVLINK_ATTR_REGION_NAME];
+ region_name = nla_data(region_attr);
if (port)
region = devlink_port_region_get_by_name(port, region_name);
@@ -6551,10 +6585,51 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
region = devlink_region_get_by_name(devlink, region_name);
if (!region) {
+ NL_SET_ERR_MSG_ATTR(cb->extack, region_attr, "Requested region does not exist");
err = -EINVAL;
goto out_unlock;
}
+ snapshot_attr = attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID];
+ if (!snapshot_attr) {
+ if (!nla_get_flag(attrs[DEVLINK_ATTR_REGION_DIRECT])) {
+ NL_SET_ERR_MSG(cb->extack, "No snapshot id provided");
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ if (!region->ops->read) {
+ NL_SET_ERR_MSG(cb->extack, "Requested region does not support direct read");
+ err = -EOPNOTSUPP;
+ goto out_unlock;
+ }
+
+ if (port)
+ region_cb = &devlink_region_port_direct_fill;
+ else
+ region_cb = &devlink_region_direct_fill;
+ region_cb_priv = region;
+ } else {
+ struct devlink_snapshot *snapshot;
+ u32 snapshot_id;
+
+ if (nla_get_flag(attrs[DEVLINK_ATTR_REGION_DIRECT])) {
+ NL_SET_ERR_MSG_ATTR(cb->extack, snapshot_attr, "Direct region read does not use snapshot");
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ snapshot_id = nla_get_u32(snapshot_attr);
+ snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id);
+ if (!snapshot) {
+ NL_SET_ERR_MSG_ATTR(cb->extack, snapshot_attr, "Requested snapshot does not exist");
+ err = -EINVAL;
+ goto out_unlock;
+ }
+ region_cb = &devlink_region_snapshot_fill;
+ region_cb_priv = snapshot;
+ }
+
if (attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR] &&
attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]) {
if (!start_offset)
@@ -6603,10 +6678,9 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
goto nla_put_failure;
}
- err = devlink_nl_region_read_snapshot_fill(skb, devlink,
- region, attrs,
- start_offset,
- end_offset, &ret_offset);
+ err = devlink_nl_region_read_fill(skb, region_cb, region_cb_priv,
+ start_offset, end_offset, &ret_offset,
+ cb->extack);
if (err && err != -EMSGSIZE)
goto nla_put_failure;
@@ -6633,14 +6707,6 @@ out_unlock:
return err;
}
-int devlink_info_driver_name_put(struct devlink_info_req *req, const char *name)
-{
- if (!req->msg)
- return 0;
- return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME, name);
-}
-EXPORT_SYMBOL_GPL(devlink_info_driver_name_put);
-
int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn)
{
if (!req->msg)
@@ -6749,11 +6815,25 @@ int devlink_info_version_running_put_ext(struct devlink_info_req *req,
}
EXPORT_SYMBOL_GPL(devlink_info_version_running_put_ext);
+static int devlink_nl_driver_info_get(struct device_driver *drv,
+ struct devlink_info_req *req)
+{
+ if (!drv)
+ return 0;
+
+ if (drv->name[0])
+ return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME,
+ drv->name);
+
+ return 0;
+}
+
static int
devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink,
enum devlink_command cmd, u32 portid,
u32 seq, int flags, struct netlink_ext_ack *extack)
{
+ struct device *dev = devlink_to_dev(devlink);
struct devlink_info_req req = {};
void *hdr;
int err;
@@ -6767,7 +6847,13 @@ devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink,
goto err_cancel_msg;
req.msg = msg;
- err = devlink->ops->info_get(devlink, &req, extack);
+ if (devlink->ops->info_get) {
+ err = devlink->ops->info_get(devlink, &req, extack);
+ if (err)
+ goto err_cancel_msg;
+ }
+
+ err = devlink_nl_driver_info_get(dev->driver, &req);
if (err)
goto err_cancel_msg;
@@ -6786,9 +6872,6 @@ static int devlink_nl_cmd_info_get_doit(struct sk_buff *skb,
struct sk_buff *msg;
int err;
- if (!devlink->ops->info_get)
- return -EOPNOTSUPP;
-
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return -ENOMEM;
@@ -6814,7 +6897,7 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
int err = 0;
devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- if (idx < start || !devlink->ops->info_get)
+ if (idx < start)
goto inc;
devl_lock(devlink);
@@ -9251,6 +9334,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_SELFTESTS] = { .type = NLA_NESTED },
[DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32 },
[DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_REGION_DIRECT] = { .type = NLA_FLAG },
};
static const struct genl_small_ops devlink_nl_ops[] = {
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index ff8807cc9c2e..ee38ca888244 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
bind_bhash
+csum
cmsg_sender
fin_ack_lat
gro
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 880e6ded6ed5..3007e98a6d64 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -74,6 +74,7 @@ TEST_PROGS += test_ingress_egress_chaining.sh
TEST_GEN_PROGS += so_incoming_cpu
TEST_PROGS += sctp_vrf.sh
TEST_GEN_FILES += sctp_hello
+TEST_GEN_FILES += csum
TEST_FILES := settings
diff --git a/tools/testing/selftests/net/csum.c b/tools/testing/selftests/net/csum.c
new file mode 100644
index 000000000000..82a1c1839da6
--- /dev/null
+++ b/tools/testing/selftests/net/csum.c
@@ -0,0 +1,986 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Test hardware checksum offload: Rx + Tx, IPv4 + IPv6, TCP + UDP.
+ *
+ * The test runs on two machines to exercise the NIC. For this reason it
+ * is not integrated in kselftests.
+ *
+ * CMD=$((./csum -[46] -[tu] -S $SADDR -D $DADDR -[RT] -r 1 $EXTRA_ARGS))
+ *
+ * Rx:
+ *
+ * The sender sends packets with a known checksum field using PF_INET(6)
+ * SOCK_RAW sockets.
+ *
+ * good packet: $CMD [-t]
+ * bad packet: $CMD [-t] -E
+ *
+ * The receiver reads UDP packets with a UDP socket. This is not an
+ * option for TCP packets ('-t'). Optionally insert an iptables filter
+ * to avoid these entering the real protocol stack.
+ *
+ * The receiver also reads all packets with a PF_PACKET socket, to
+ * observe whether both good and bad packets arrive on the host. And to
+ * read the optional TP_STATUS_CSUM_VALID bit. This requires setting
+ * option PACKET_AUXDATA, and works only for CHECKSUM_UNNECESSARY.
+ *
+ * Tx:
+ *
+ * The sender needs to build CHECKSUM_PARTIAL packets to exercise tx
+ * checksum offload.
+ *
+ * The sender can sends packets with a UDP socket.
+ *
+ * Optionally crafts a packet that sums up to zero to verify that the
+ * device writes negative zero 0xFFFF in this case to distinguish from
+ * 0x0000 (checksum disabled), as required by RFC 768. Hit this case
+ * by choosing a specific source port.
+ *
+ * good packet: $CMD -U
+ * zero csum: $CMD -U -Z
+ *
+ * The sender can also build packets with PF_PACKET with PACKET_VNET_HDR,
+ * to cover more protocols. PF_PACKET requires passing src and dst mac
+ * addresses.
+ *
+ * good packet: $CMD -s $smac -d $dmac -p [-t]
+ *
+ * Argument '-z' sends UDP packets with a 0x000 checksum disabled field,
+ * to verify that the NIC passes these packets unmodified.
+ *
+ * Argument '-e' adds a transport mode encapsulation header between
+ * network and transport header. This will fail for devices that parse
+ * headers. Should work on devices that implement protocol agnostic tx
+ * checksum offload (NETIF_F_HW_CSUM).
+ *
+ * Argument '-r $SEED' optionally randomizes header, payload and length
+ * to increase coverage between packets sent. SEED 1 further chooses a
+ * different seed for each run (and logs this for reproducibility). It
+ * is advised to enable this for extra coverage in continuous testing.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <asm/byteorder.h>
+#include <errno.h>
+#include <error.h>
+#include <linux/filter.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <linux/virtio_net.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/if_ether.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+static bool cfg_bad_csum;
+static int cfg_family = PF_INET6;
+static int cfg_num_pkt = 4;
+static bool cfg_do_rx = true;
+static bool cfg_do_tx = true;
+static bool cfg_encap;
+static char *cfg_ifname = "eth0";
+static char *cfg_mac_dst;
+static char *cfg_mac_src;
+static int cfg_proto = IPPROTO_UDP;
+static int cfg_payload_char = 'a';
+static int cfg_payload_len = 100;
+static uint16_t cfg_port_dst = 34000;
+static uint16_t cfg_port_src = 33000;
+static uint16_t cfg_port_src_encap = 33001;
+static unsigned int cfg_random_seed;
+static int cfg_rcvbuf = 1 << 22; /* be able to queue large cfg_num_pkt */
+static bool cfg_send_pfpacket;
+static bool cfg_send_udp;
+static int cfg_timeout_ms = 2000;
+static bool cfg_zero_disable; /* skip checksum: set to zero (udp only) */
+static bool cfg_zero_sum; /* create packet that adds up to zero */
+
+static struct sockaddr_in cfg_daddr4 = {.sin_family = AF_INET};
+static struct sockaddr_in cfg_saddr4 = {.sin_family = AF_INET};
+static struct sockaddr_in6 cfg_daddr6 = {.sin6_family = AF_INET6};
+static struct sockaddr_in6 cfg_saddr6 = {.sin6_family = AF_INET6};
+
+#define ENC_HEADER_LEN (sizeof(struct udphdr) + sizeof(struct udp_encap_hdr))
+#define MAX_HEADER_LEN (sizeof(struct ipv6hdr) + ENC_HEADER_LEN + sizeof(struct tcphdr))
+#define MAX_PAYLOAD_LEN 1024
+
+/* Trivial demo encap. Stand-in for transport layer protocols like ESP or PSP */
+struct udp_encap_hdr {
+ uint8_t nexthdr;
+ uint8_t padding[3];
+};
+
+/* Ipaddrs, for pseudo csum. Global var is ugly, pass through funcs was worse */
+static void *iph_addr_p;
+
+static unsigned long gettimeofday_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000UL) + (tv.tv_usec / 1000UL);
+}
+
+static uint32_t checksum_nofold(char *data, size_t len, uint32_t sum)
+{
+ uint16_t *words = (uint16_t *)data;
+ int i;
+
+ for (i = 0; i < len / 2; i++)
+ sum += words[i];
+
+ if (len & 1)
+ sum += ((unsigned char *)data)[len - 1];
+
+ return sum;
+}
+
+static uint16_t checksum_fold(void *data, size_t len, uint32_t sum)
+{
+ sum = checksum_nofold(data, len, sum);
+
+ while (sum > 0xFFFF)
+ sum = (sum & 0xFFFF) + (sum >> 16);
+
+ return ~sum;
+}
+
+static uint16_t checksum(void *th, uint16_t proto, size_t len)
+{
+ uint32_t sum;
+ int alen;
+
+ alen = cfg_family == PF_INET6 ? 32 : 8;
+
+ sum = checksum_nofold(iph_addr_p, alen, 0);
+ sum += htons(proto);
+ sum += htons(len);
+
+ /* With CHECKSUM_PARTIAL kernel expects non-inverted pseudo csum */
+ if (cfg_do_tx && cfg_send_pfpacket)
+ return ~checksum_fold(NULL, 0, sum);
+ else
+ return checksum_fold(th, len, sum);
+}
+
+static void *build_packet_ipv4(void *_iph, uint8_t proto, unsigned int len)
+{
+ struct iphdr *iph = _iph;
+
+ memset(iph, 0, sizeof(*iph));
+
+ iph->version = 4;
+ iph->ihl = 5;
+ iph->ttl = 8;
+ iph->protocol = proto;
+ iph->saddr = cfg_saddr4.sin_addr.s_addr;
+ iph->daddr = cfg_daddr4.sin_addr.s_addr;
+ iph->tot_len = htons(sizeof(*iph) + len);
+ iph->check = checksum_fold(iph, sizeof(*iph), 0);
+
+ iph_addr_p = &iph->saddr;
+
+ return iph + 1;
+}
+
+static void *build_packet_ipv6(void *_ip6h, uint8_t proto, unsigned int len)
+{
+ struct ipv6hdr *ip6h = _ip6h;
+
+ memset(ip6h, 0, sizeof(*ip6h));
+
+ ip6h->version = 6;
+ ip6h->payload_len = htons(len);
+ ip6h->nexthdr = proto;
+ ip6h->hop_limit = 64;
+ ip6h->saddr = cfg_saddr6.sin6_addr;
+ ip6h->daddr = cfg_daddr6.sin6_addr;
+
+ iph_addr_p = &ip6h->saddr;
+
+ return ip6h + 1;
+}
+
+static void *build_packet_udp(void *_uh)
+{
+ struct udphdr *uh = _uh;
+
+ uh->source = htons(cfg_port_src);
+ uh->dest = htons(cfg_port_dst);
+ uh->len = htons(sizeof(*uh) + cfg_payload_len);
+ uh->check = 0;
+
+ /* choose source port so that uh->check adds up to zero */
+ if (cfg_zero_sum) {
+ uh->source = 0;
+ uh->source = checksum(uh, IPPROTO_UDP, sizeof(*uh) + cfg_payload_len);
+
+ fprintf(stderr, "tx: changing sport: %hu -> %hu\n",
+ cfg_port_src, ntohs(uh->source));
+ cfg_port_src = ntohs(uh->source);
+ }
+
+ if (cfg_zero_disable)
+ uh->check = 0;
+ else
+ uh->check = checksum(uh, IPPROTO_UDP, sizeof(*uh) + cfg_payload_len);
+
+ if (cfg_bad_csum)
+ uh->check = ~uh->check;
+
+ fprintf(stderr, "tx: sending checksum: 0x%x\n", uh->check);
+ return uh + 1;
+}
+
+static void *build_packet_tcp(void *_th)
+{
+ struct tcphdr *th = _th;
+
+ th->source = htons(cfg_port_src);
+ th->dest = htons(cfg_port_dst);
+ th->doff = 5;
+ th->check = 0;
+
+ th->check = checksum(th, IPPROTO_TCP, sizeof(*th) + cfg_payload_len);
+
+ if (cfg_bad_csum)
+ th->check = ~th->check;
+
+ fprintf(stderr, "tx: sending checksum: 0x%x\n", th->check);
+ return th + 1;
+}
+
+static char *build_packet_udp_encap(void *_uh)
+{
+ struct udphdr *uh = _uh;
+ struct udp_encap_hdr *eh = _uh + sizeof(*uh);
+
+ /* outer dst == inner dst, to simplify BPF filter
+ * outer src != inner src, to demultiplex on recv
+ */
+ uh->dest = htons(cfg_port_dst);
+ uh->source = htons(cfg_port_src_encap);
+ uh->check = 0;
+ uh->len = htons(sizeof(*uh) +
+ sizeof(*eh) +
+ sizeof(struct tcphdr) +
+ cfg_payload_len);
+
+ eh->nexthdr = IPPROTO_TCP;
+
+ return build_packet_tcp(eh + 1);
+}
+
+static char *build_packet(char *buf, int max_len, int *len)
+{
+ uint8_t proto;
+ char *off;
+ int tlen;
+
+ if (cfg_random_seed) {
+ int *buf32 = (void *)buf;
+ int i;
+
+ for (i = 0; i < (max_len / sizeof(int)); i++)
+ buf32[i] = rand();
+ } else {
+ memset(buf, cfg_payload_char, max_len);
+ }
+
+ if (cfg_proto == IPPROTO_UDP)
+ tlen = sizeof(struct udphdr) + cfg_payload_len;
+ else
+ tlen = sizeof(struct tcphdr) + cfg_payload_len;
+
+ if (cfg_encap) {
+ proto = IPPROTO_UDP;
+ tlen += ENC_HEADER_LEN;
+ } else {
+ proto = cfg_proto;
+ }
+
+ if (cfg_family == PF_INET)
+ off = build_packet_ipv4(buf, proto, tlen);
+ else
+ off = build_packet_ipv6(buf, proto, tlen);
+
+ if (cfg_encap)
+ off = build_packet_udp_encap(off);
+ else if (cfg_proto == IPPROTO_UDP)
+ off = build_packet_udp(off);
+ else
+ off = build_packet_tcp(off);
+
+ /* only pass the payload, but still compute headers for cfg_zero_sum */
+ if (cfg_send_udp) {
+ *len = cfg_payload_len;
+ return off;
+ }
+
+ *len = off - buf + cfg_payload_len;
+ return buf;
+}
+
+static int open_inet(int ipproto, int protocol)
+{
+ int fd;
+
+ fd = socket(cfg_family, ipproto, protocol);
+ if (fd == -1)
+ error(1, errno, "socket inet");
+
+ if (cfg_family == PF_INET6) {
+ /* may have been updated by cfg_zero_sum */
+ cfg_saddr6.sin6_port = htons(cfg_port_src);
+
+ if (bind(fd, (void *)&cfg_saddr6, sizeof(cfg_saddr6)))
+ error(1, errno, "bind dgram 6");
+ if (connect(fd, (void *)&cfg_daddr6, sizeof(cfg_daddr6)))
+ error(1, errno, "connect dgram 6");
+ } else {
+ /* may have been updated by cfg_zero_sum */
+ cfg_saddr4.sin_port = htons(cfg_port_src);
+
+ if (bind(fd, (void *)&cfg_saddr4, sizeof(cfg_saddr4)))
+ error(1, errno, "bind dgram 4");
+ if (connect(fd, (void *)&cfg_daddr4, sizeof(cfg_daddr4)))
+ error(1, errno, "connect dgram 4");
+ }
+
+ return fd;
+}
+
+static int open_packet(void)
+{
+ int fd, one = 1;
+
+ fd = socket(PF_PACKET, SOCK_RAW, 0);
+ if (fd == -1)
+ error(1, errno, "socket packet");
+
+ if (setsockopt(fd, SOL_PACKET, PACKET_VNET_HDR, &one, sizeof(one)))
+ error(1, errno, "setsockopt packet_vnet_ndr");
+
+ return fd;
+}
+
+static void send_inet(int fd, const char *buf, int len)
+{
+ int ret;
+
+ ret = write(fd, buf, len);
+ if (ret == -1)
+ error(1, errno, "write");
+ if (ret != len)
+ error(1, 0, "write: %d", ret);
+}
+
+static void eth_str_to_addr(const char *str, unsigned char *eth)
+{
+ if (sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+ &eth[0], &eth[1], &eth[2], &eth[3], &eth[4], &eth[5]) != 6)
+ error(1, 0, "cannot parse mac addr %s", str);
+}
+
+static void send_packet(int fd, const char *buf, int len)
+{
+ struct virtio_net_hdr vh = {0};
+ struct sockaddr_ll addr = {0};
+ struct msghdr msg = {0};
+ struct ethhdr eth;
+ struct iovec iov[3];
+ int ret;
+
+ addr.sll_family = AF_PACKET;
+ addr.sll_halen = ETH_ALEN;
+ addr.sll_ifindex = if_nametoindex(cfg_ifname);
+ if (!addr.sll_ifindex)
+ error(1, errno, "if_nametoindex %s", cfg_ifname);
+
+ vh.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ if (cfg_family == PF_INET6) {
+ vh.csum_start = sizeof(struct ethhdr) + sizeof(struct ipv6hdr);
+ addr.sll_protocol = htons(ETH_P_IPV6);
+ } else {
+ vh.csum_start = sizeof(struct ethhdr) + sizeof(struct iphdr);
+ addr.sll_protocol = htons(ETH_P_IP);
+ }
+
+ if (cfg_encap)
+ vh.csum_start += ENC_HEADER_LEN;
+
+ if (cfg_proto == IPPROTO_TCP) {
+ vh.csum_offset = __builtin_offsetof(struct tcphdr, check);
+ vh.hdr_len = vh.csum_start + sizeof(struct tcphdr);
+ } else {
+ vh.csum_offset = __builtin_offsetof(struct udphdr, check);
+ vh.hdr_len = vh.csum_start + sizeof(struct udphdr);
+ }
+
+ eth_str_to_addr(cfg_mac_src, eth.h_source);
+ eth_str_to_addr(cfg_mac_dst, eth.h_dest);
+ eth.h_proto = addr.sll_protocol;
+
+ iov[0].iov_base = &vh;
+ iov[0].iov_len = sizeof(vh);
+
+ iov[1].iov_base = &eth;
+ iov[1].iov_len = sizeof(eth);
+
+ iov[2].iov_base = (void *)buf;
+ iov[2].iov_len = len;
+
+ msg.msg_iov = iov;
+ msg.msg_iovlen = sizeof(iov) / sizeof(iov[0]);
+
+ msg.msg_name = &addr;
+ msg.msg_namelen = sizeof(addr);
+
+ ret = sendmsg(fd, &msg, 0);
+ if (ret == -1)
+ error(1, errno, "sendmsg packet");
+ if (ret != sizeof(vh) + sizeof(eth) + len)
+ error(1, errno, "sendmsg packet: %u", ret);
+}
+
+static int recv_prepare_udp(void)
+{
+ int fd;
+
+ fd = socket(cfg_family, SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket r");
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
+ &cfg_rcvbuf, sizeof(cfg_rcvbuf)))
+ error(1, errno, "setsockopt SO_RCVBUF r");
+
+ if (cfg_family == PF_INET6) {
+ if (bind(fd, (void *)&cfg_daddr6, sizeof(cfg_daddr6)))
+ error(1, errno, "bind r");
+ } else {
+ if (bind(fd, (void *)&cfg_daddr4, sizeof(cfg_daddr4)))
+ error(1, errno, "bind r");
+ }
+
+ return fd;
+}
+
+/* Filter out all traffic that is not cfg_proto with our destination port.
+ *
+ * Otherwise background noise may cause PF_PACKET receive queue overflow,
+ * dropping the expected packets and failing the test.
+ */
+static void __recv_prepare_packet_filter(int fd, int off_nexthdr, int off_dport)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_nexthdr),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_encap ? IPPROTO_UDP : cfg_proto, 0, 2),
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_port_dst, 1, 0),
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
+ };
+ struct sock_fprog prog = {};
+
+ prog.filter = filter;
+ prog.len = sizeof(filter) / sizeof(struct sock_filter);
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+ error(1, errno, "setsockopt filter");
+}
+
+static void recv_prepare_packet_filter(int fd)
+{
+ const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */
+
+ if (cfg_family == AF_INET)
+ __recv_prepare_packet_filter(fd, offsetof(struct iphdr, protocol),
+ sizeof(struct iphdr) + off_dport);
+ else
+ __recv_prepare_packet_filter(fd, offsetof(struct ipv6hdr, nexthdr),
+ sizeof(struct ipv6hdr) + off_dport);
+}
+
+static void recv_prepare_packet_bind(int fd)
+{
+ struct sockaddr_ll laddr = {0};
+
+ laddr.sll_family = AF_PACKET;
+
+ if (cfg_family == PF_INET)
+ laddr.sll_protocol = htons(ETH_P_IP);
+ else
+ laddr.sll_protocol = htons(ETH_P_IPV6);
+
+ laddr.sll_ifindex = if_nametoindex(cfg_ifname);
+ if (!laddr.sll_ifindex)
+ error(1, 0, "if_nametoindex %s", cfg_ifname);
+
+ if (bind(fd, (void *)&laddr, sizeof(laddr)))
+ error(1, errno, "bind pf_packet");
+}
+
+static int recv_prepare_packet(void)
+{
+ int fd, one = 1;
+
+ fd = socket(PF_PACKET, SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket p");
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
+ &cfg_rcvbuf, sizeof(cfg_rcvbuf)))
+ error(1, errno, "setsockopt SO_RCVBUF p");
+
+ /* enable auxdata to recv checksum status (valid vs unknown) */
+ if (setsockopt(fd, SOL_PACKET, PACKET_AUXDATA, &one, sizeof(one)))
+ error(1, errno, "setsockopt auxdata");
+
+ /* install filter to restrict packet flow to match */
+ recv_prepare_packet_filter(fd);
+
+ /* bind to address family to start packet flow */
+ recv_prepare_packet_bind(fd);
+
+ return fd;
+}
+
+static int recv_udp(int fd)
+{
+ static char buf[MAX_PAYLOAD_LEN];
+ int ret, count = 0;
+
+ while (1) {
+ ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT);
+ if (ret == -1 && errno == EAGAIN)
+ break;
+ if (ret == -1)
+ error(1, errno, "recv r");
+
+ fprintf(stderr, "rx: udp: len=%u\n", ret);
+ count++;
+ }
+
+ return count;
+}
+
+static int recv_verify_csum(void *th, int len, uint16_t sport, uint16_t csum_field)
+{
+ uint16_t csum;
+
+ csum = checksum(th, cfg_proto, len);
+
+ fprintf(stderr, "rx: pkt: sport=%hu len=%u csum=0x%hx verify=0x%hx\n",
+ sport, len, csum_field, csum);
+
+ /* csum must be zero unless cfg_bad_csum indicates bad csum */
+ if (csum && !cfg_bad_csum) {
+ fprintf(stderr, "pkt: bad csum\n");
+ return 1;
+ } else if (cfg_bad_csum && !csum) {
+ fprintf(stderr, "pkt: good csum, while bad expected\n");
+ return 1;
+ }
+
+ if (cfg_zero_sum && csum_field != 0xFFFF) {
+ fprintf(stderr, "pkt: zero csum: field should be 0xFFFF, is 0x%hx\n", csum_field);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int recv_verify_packet_tcp(void *th, int len)
+{
+ struct tcphdr *tcph = th;
+
+ if (len < sizeof(*tcph) || tcph->dest != htons(cfg_port_dst))
+ return -1;
+
+ return recv_verify_csum(th, len, ntohs(tcph->source), tcph->check);
+}
+
+static int recv_verify_packet_udp_encap(void *th, int len)
+{
+ struct udp_encap_hdr *eh = th;
+
+ if (len < sizeof(*eh) || eh->nexthdr != IPPROTO_TCP)
+ return -1;
+
+ return recv_verify_packet_tcp(eh + 1, len - sizeof(*eh));
+}
+
+static int recv_verify_packet_udp(void *th, int len)
+{
+ struct udphdr *udph = th;
+
+ if (len < sizeof(*udph))
+ return -1;
+
+ if (udph->dest != htons(cfg_port_dst))
+ return -1;
+
+ if (udph->source == htons(cfg_port_src_encap))
+ return recv_verify_packet_udp_encap(udph + 1,
+ len - sizeof(*udph));
+
+ return recv_verify_csum(th, len, ntohs(udph->source), udph->check);
+}
+
+static int recv_verify_packet_ipv4(void *nh, int len)
+{
+ struct iphdr *iph = nh;
+ uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto;
+
+ if (len < sizeof(*iph) || iph->protocol != proto)
+ return -1;
+
+ iph_addr_p = &iph->saddr;
+ if (proto == IPPROTO_TCP)
+ return recv_verify_packet_tcp(iph + 1, len - sizeof(*iph));
+ else
+ return recv_verify_packet_udp(iph + 1, len - sizeof(*iph));
+}
+
+static int recv_verify_packet_ipv6(void *nh, int len)
+{
+ struct ipv6hdr *ip6h = nh;
+ uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto;
+
+ if (len < sizeof(*ip6h) || ip6h->nexthdr != proto)
+ return -1;
+
+ iph_addr_p = &ip6h->saddr;
+
+ if (proto == IPPROTO_TCP)
+ return recv_verify_packet_tcp(ip6h + 1, len - sizeof(*ip6h));
+ else
+ return recv_verify_packet_udp(ip6h + 1, len - sizeof(*ip6h));
+}
+
+/* return whether auxdata includes TP_STATUS_CSUM_VALID */
+static bool recv_verify_packet_csum(struct msghdr *msg)
+{
+ struct tpacket_auxdata *aux = NULL;
+ struct cmsghdr *cm;
+
+ if (msg->msg_flags & MSG_CTRUNC)
+ error(1, 0, "cmsg: truncated");
+
+ for (cm = CMSG_FIRSTHDR(msg); cm; cm = CMSG_NXTHDR(msg, cm)) {
+ if (cm->cmsg_level != SOL_PACKET ||
+ cm->cmsg_type != PACKET_AUXDATA)
+ error(1, 0, "cmsg: level=%d type=%d\n",
+ cm->cmsg_level, cm->cmsg_type);
+
+ if (cm->cmsg_len != CMSG_LEN(sizeof(struct tpacket_auxdata)))
+ error(1, 0, "cmsg: len=%lu expected=%lu",
+ cm->cmsg_len, CMSG_LEN(sizeof(struct tpacket_auxdata)));
+
+ aux = (void *)CMSG_DATA(cm);
+ }
+
+ if (!aux)
+ error(1, 0, "cmsg: no auxdata");
+
+ return aux->tp_status & TP_STATUS_CSUM_VALID;
+}
+
+static int recv_packet(int fd)
+{
+ static char _buf[MAX_HEADER_LEN + MAX_PAYLOAD_LEN];
+ unsigned long total = 0, bad_csums = 0, bad_validations = 0;
+ char ctrl[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
+ struct pkt *buf = (void *)_buf;
+ struct msghdr msg = {0};
+ struct iovec iov;
+ int len, ret;
+
+ iov.iov_base = _buf;
+ iov.iov_len = sizeof(_buf);
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ msg.msg_control = ctrl;
+ msg.msg_controllen = sizeof(ctrl);
+
+ while (1) {
+ msg.msg_flags = 0;
+
+ len = recvmsg(fd, &msg, MSG_DONTWAIT);
+ if (len == -1 && errno == EAGAIN)
+ break;
+ if (len == -1)
+ error(1, errno, "recv p");
+
+ if (cfg_family == PF_INET6)
+ ret = recv_verify_packet_ipv6(buf, len);
+ else
+ ret = recv_verify_packet_ipv4(buf, len);
+
+ if (ret == -1 /* skip: non-matching */)
+ continue;
+
+ total++;
+ if (ret == 1)
+ bad_csums++;
+
+ /* Fail if kernel returns valid for known bad csum.
+ * Do not fail if kernel does not validate a good csum:
+ * Absence of validation does not imply invalid.
+ */
+ if (recv_verify_packet_csum(&msg) && cfg_bad_csum) {
+ fprintf(stderr, "cmsg: expected bad csum, pf_packet returns valid\n");
+ bad_validations++;
+ }
+ }
+
+ if (bad_csums || bad_validations)
+ error(1, 0, "rx: errors at pf_packet: total=%lu bad_csums=%lu bad_valids=%lu\n",
+ total, bad_csums, bad_validations);
+
+ return total;
+}
+
+static void parse_args(int argc, char *const argv[])
+{
+ const char *daddr = NULL, *saddr = NULL;
+ int c;
+
+ while ((c = getopt(argc, argv, "46d:D:eEi:l:L:n:r:PRs:S:tTuUzZ")) != -1) {
+ switch (c) {
+ case '4':
+ cfg_family = PF_INET;
+ break;
+ case '6':
+ cfg_family = PF_INET6;
+ break;
+ case 'd':
+ cfg_mac_dst = optarg;
+ break;
+ case 'D':
+ daddr = optarg;
+ break;
+ case 'e':
+ cfg_encap = true;
+ break;
+ case 'E':
+ cfg_bad_csum = true;
+ break;
+ case 'i':
+ cfg_ifname = optarg;
+ break;
+ case 'l':
+ cfg_payload_len = strtol(optarg, NULL, 0);
+ break;
+ case 'L':
+ cfg_timeout_ms = strtol(optarg, NULL, 0) * 1000;
+ break;
+ case 'n':
+ cfg_num_pkt = strtol(optarg, NULL, 0);
+ break;
+ case 'r':
+ cfg_random_seed = strtol(optarg, NULL, 0);
+ break;
+ case 'P':
+ cfg_send_pfpacket = true;
+ break;
+ case 'R':
+ /* only Rx: used with two machine tests */
+ cfg_do_tx = false;
+ break;
+ case 's':
+ cfg_mac_src = optarg;
+ break;
+ case 'S':
+ saddr = optarg;
+ break;
+ case 't':
+ cfg_proto = IPPROTO_TCP;
+ break;
+ case 'T':
+ /* only Tx: used with two machine tests */
+ cfg_do_rx = false;
+ break;
+ case 'u':
+ cfg_proto = IPPROTO_UDP;
+ break;
+ case 'U':
+ /* send using real udp socket,
+ * to exercise tx checksum offload
+ */
+ cfg_send_udp = true;
+ break;
+ case 'z':
+ cfg_zero_disable = true;
+ break;
+ case 'Z':
+ cfg_zero_sum = true;
+ break;
+ default:
+ error(1, 0, "unknown arg %c", c);
+ }
+ }
+
+ if (!daddr || !saddr)
+ error(1, 0, "Must pass -D <daddr> and -S <saddr>");
+
+ if (cfg_do_tx && cfg_send_pfpacket && (!cfg_mac_src || !cfg_mac_dst))
+ error(1, 0, "Transmit with pf_packet requires mac addresses");
+
+ if (cfg_payload_len > MAX_PAYLOAD_LEN)
+ error(1, 0, "Payload length exceeds max");
+
+ if (cfg_proto != IPPROTO_UDP && (cfg_zero_sum || cfg_zero_disable))
+ error(1, 0, "Only UDP supports zero csum");
+
+ if (cfg_zero_sum && !cfg_send_udp)
+ error(1, 0, "Zero checksum conversion requires -U for tx csum offload");
+ if (cfg_zero_sum && cfg_bad_csum)
+ error(1, 0, "Cannot combine zero checksum conversion and invalid checksum");
+ if (cfg_zero_sum && cfg_random_seed)
+ error(1, 0, "Cannot combine zero checksum conversion with randomization");
+
+ if (cfg_family == PF_INET6) {
+ cfg_saddr6.sin6_port = htons(cfg_port_src);
+ cfg_daddr6.sin6_port = htons(cfg_port_dst);
+
+ if (inet_pton(cfg_family, daddr, &cfg_daddr6.sin6_addr) != 1)
+ error(1, errno, "Cannot parse ipv6 -D");
+ if (inet_pton(cfg_family, saddr, &cfg_saddr6.sin6_addr) != 1)
+ error(1, errno, "Cannot parse ipv6 -S");
+ } else {
+ cfg_saddr4.sin_port = htons(cfg_port_src);
+ cfg_daddr4.sin_port = htons(cfg_port_dst);
+
+ if (inet_pton(cfg_family, daddr, &cfg_daddr4.sin_addr) != 1)
+ error(1, errno, "Cannot parse ipv4 -D");
+ if (inet_pton(cfg_family, saddr, &cfg_saddr4.sin_addr) != 1)
+ error(1, errno, "Cannot parse ipv4 -S");
+ }
+
+ if (cfg_do_tx && cfg_random_seed) {
+ /* special case: time-based seed */
+ if (cfg_random_seed == 1)
+ cfg_random_seed = (unsigned int)gettimeofday_ms();
+ srand(cfg_random_seed);
+ fprintf(stderr, "randomization seed: %u\n", cfg_random_seed);
+ }
+}
+
+static void do_tx(void)
+{
+ static char _buf[MAX_HEADER_LEN + MAX_PAYLOAD_LEN];
+ char *buf;
+ int fd, len, i;
+
+ buf = build_packet(_buf, sizeof(_buf), &len);
+
+ if (cfg_send_pfpacket)
+ fd = open_packet();
+ else if (cfg_send_udp)
+ fd = open_inet(SOCK_DGRAM, 0);
+ else
+ fd = open_inet(SOCK_RAW, IPPROTO_RAW);
+
+ for (i = 0; i < cfg_num_pkt; i++) {
+ if (cfg_send_pfpacket)
+ send_packet(fd, buf, len);
+ else
+ send_inet(fd, buf, len);
+
+ /* randomize each packet individually to increase coverage */
+ if (cfg_random_seed) {
+ cfg_payload_len = rand() % MAX_PAYLOAD_LEN;
+ buf = build_packet(_buf, sizeof(_buf), &len);
+ }
+ }
+
+ if (close(fd))
+ error(1, errno, "close tx");
+}
+
+static void do_rx(int fdp, int fdr)
+{
+ unsigned long count_udp = 0, count_pkt = 0;
+ long tleft, tstop;
+ struct pollfd pfd;
+
+ tstop = gettimeofday_ms() + cfg_timeout_ms;
+ tleft = cfg_timeout_ms;
+
+ do {
+ pfd.events = POLLIN;
+ pfd.fd = fdp;
+ if (poll(&pfd, 1, tleft) == -1)
+ error(1, errno, "poll");
+
+ if (pfd.revents & POLLIN)
+ count_pkt += recv_packet(fdp);
+
+ if (cfg_proto == IPPROTO_UDP)
+ count_udp += recv_udp(fdr);
+
+ tleft = tstop - gettimeofday_ms();
+ } while (tleft > 0);
+
+ if (close(fdr))
+ error(1, errno, "close r");
+ if (close(fdp))
+ error(1, errno, "close p");
+
+ if (count_pkt < cfg_num_pkt)
+ error(1, 0, "rx: missing packets at pf_packet: %lu < %u",
+ count_pkt, cfg_num_pkt);
+
+ if (cfg_proto == IPPROTO_UDP) {
+ if (cfg_bad_csum && count_udp)
+ error(1, 0, "rx: unexpected packets at udp");
+ if (!cfg_bad_csum && !count_udp)
+ error(1, 0, "rx: missing packets at udp");
+ }
+}
+
+int main(int argc, char *const argv[])
+{
+ int fdp = -1, fdr = -1; /* -1 to silence -Wmaybe-uninitialized */
+
+ parse_args(argc, argv);
+
+ /* open receive sockets before transmitting */
+ if (cfg_do_rx) {
+ fdp = recv_prepare_packet();
+ fdr = recv_prepare_udp();
+ }
+
+ if (cfg_do_tx)
+ do_tx();
+
+ if (cfg_do_rx)
+ do_rx(fdp, fdr);
+
+ fprintf(stderr, "OK\n");
+ return 0;
+}