diff options
34 files changed, 1257 insertions, 227 deletions
diff --git a/Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml b/Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml index b2558421268a..6924aff0b2c5 100644 --- a/Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml +++ b/Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml @@ -14,7 +14,9 @@ properties: oneOf: - const: nxp,nxp-nci-i2c - items: - - const: nxp,pn547 + - enum: + - nxp,nq310 + - nxp,pn547 - const: nxp,nxp-nci-i2c enable-gpios: diff --git a/Documentation/networking/devlink/devlink-region.rst b/Documentation/networking/devlink/devlink-region.rst index f06dca9a1eb6..9232cd7da301 100644 --- a/Documentation/networking/devlink/devlink-region.rst +++ b/Documentation/networking/devlink/devlink-region.rst @@ -31,6 +31,15 @@ in its ``devlink_region_ops`` structure. If snapshot id is not set in the ``DEVLINK_CMD_REGION_NEW`` request kernel will allocate one and send the snapshot information to user space. +Regions may optionally allow directly reading from their contents without a +snapshot. Direct read requests are not atomic. In particular a read request +of size 256 bytes or larger will be split into multiple chunks. If atomic +access is required, use a snapshot. A driver wishing to enable this for a +region should implement the ``.read`` callback in the ``devlink_region_ops`` +structure. User space can request a direct read by using the +``DEVLINK_ATTR_REGION_DIRECT`` attribute instead of specifying a snapshot +id. + example usage ------------- @@ -65,6 +74,10 @@ example usage $ devlink region read pci/0000:00:05.0/fw-health snapshot 1 address 0 length 16 0000000000000000 0014 95dc 0014 9514 0035 1670 0034 db30 + # Read from the region without a snapshot + $ devlink region read pci/0000:00:05.0/fw-health address 16 length 16 + 0000000000000010 0000 0000 ffff ff04 0029 8c00 0028 8cc8 + As regions are likely very device or driver specific, no generic regions are defined. See the driver-specific documentation files for information on the specific regions a driver supports. diff --git a/Documentation/networking/devlink/ice.rst b/Documentation/networking/devlink/ice.rst index 890062da7820..625efb3777d5 100644 --- a/Documentation/networking/devlink/ice.rst +++ b/Documentation/networking/devlink/ice.rst @@ -189,12 +189,21 @@ device data. * - ``nvm-flash`` - The contents of the entire flash chip, sometimes referred to as the device's Non Volatile Memory. + * - ``shadow-ram`` + - The contents of the Shadow RAM, which is loaded from the beginning + of the flash. Although the contents are primarily from the flash, + this area also contains data generated during device boot which is + not stored in flash. * - ``device-caps`` - The contents of the device firmware's capabilities buffer. Useful to determine the current state and configuration of the device. -Users can request an immediate capture of a snapshot via the -``DEVLINK_CMD_REGION_NEW`` +Both the ``nvm-flash`` and ``shadow-ram`` regions can be accessed without a +snapshot. The ``device-caps`` region requires a snapshot as the contents are +sent by firmware and can't be split into separate reads. + +Users can request an immediate capture of a snapshot for all three regions +via the ``DEVLINK_CMD_REGION_NEW`` command. .. code:: shell diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c b/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c index 7503f6b18ac5..a2aba0b0d68a 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c @@ -76,10 +76,6 @@ static int otx2_cpt_devlink_info_get(struct devlink *dl, struct otx2_cptpf_dev *cptpf = cpt_dl->cptpf; int err; - err = devlink_info_driver_name_put(req, "rvu_cptpf"); - if (err) - return err; - err = otx2_cpt_dl_info_firmware_version_put(req, cptpf->eng_grps.grp, "fw.ae", OTX2_CPT_AE_TYPES); if (err) diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c index 951f7935c872..595a548bb0a8 100644 --- a/drivers/net/dsa/hirschmann/hellcreek.c +++ b/drivers/net/dsa/hirschmann/hellcreek.c @@ -1176,11 +1176,6 @@ static int hellcreek_devlink_info_get(struct dsa_switch *ds, struct netlink_ext_ack *extack) { struct hellcreek *hellcreek = ds->priv; - int ret; - - ret = devlink_info_driver_name_put(req, "hellcreek"); - if (ret) - return ret; return devlink_info_version_fixed_put(req, DEVLINK_INFO_VERSION_GENERIC_ASIC_ID, diff --git a/drivers/net/dsa/mv88e6xxx/devlink.c b/drivers/net/dsa/mv88e6xxx/devlink.c index 1266eabee086..a08dab75e0c0 100644 --- a/drivers/net/dsa/mv88e6xxx/devlink.c +++ b/drivers/net/dsa/mv88e6xxx/devlink.c @@ -821,11 +821,6 @@ int mv88e6xxx_devlink_info_get(struct dsa_switch *ds, struct netlink_ext_ack *extack) { struct mv88e6xxx_chip *chip = ds->priv; - int err; - - err = devlink_info_driver_name_put(req, "mv88e6xxx"); - if (err) - return err; return devlink_info_version_fixed_put(req, DEVLINK_INFO_VERSION_GENERIC_ASIC_ID, diff --git a/drivers/net/dsa/sja1105/sja1105_devlink.c b/drivers/net/dsa/sja1105/sja1105_devlink.c index 10c6fea1227f..da532614f34a 100644 --- a/drivers/net/dsa/sja1105/sja1105_devlink.c +++ b/drivers/net/dsa/sja1105/sja1105_devlink.c @@ -120,16 +120,10 @@ int sja1105_devlink_info_get(struct dsa_switch *ds, struct netlink_ext_ack *extack) { struct sja1105_private *priv = ds->priv; - int rc; - - rc = devlink_info_driver_name_put(req, "sja1105"); - if (rc) - return rc; - rc = devlink_info_version_fixed_put(req, - DEVLINK_INFO_VERSION_GENERIC_ASIC_ID, - priv->info->name); - return rc; + return devlink_info_version_fixed_put(req, + DEVLINK_INFO_VERSION_GENERIC_ASIC_ID, + priv->info->name); } int sja1105_devlink_setup(struct dsa_switch *ds) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 8a6f788f6294..26913dc816d3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -892,10 +892,6 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req, u32 ver = 0; int rc; - rc = devlink_info_driver_name_put(req, DRV_MODULE_NAME); - if (rc) - return rc; - if (BNXT_PF(bp) && (bp->flags & BNXT_FLAG_DSN_VALID)) { sprintf(buf, "%02X-%02X-%02X-%02X-%02X-%02X-%02X-%02X", bp->dsn[7], bp->dsn[6], bp->dsn[5], bp->dsn[4], diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c index 5c6dd3029e2f..76f808d38066 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c @@ -37,18 +37,9 @@ static int dpaa2_eth_dl_info_get(struct devlink *devlink, struct dpaa2_eth_devlink_priv *dl_priv = devlink_priv(devlink); struct dpaa2_eth_priv *priv = dl_priv->dpaa2_priv; char buf[10]; - int err; - - err = devlink_info_driver_name_put(req, KBUILD_MODNAME); - if (err) - return err; scnprintf(buf, 10, "%d.%d", priv->dpni_ver_major, priv->dpni_ver_minor); - err = devlink_info_version_running_put(req, "dpni", buf); - if (err) - return err; - - return 0; + return devlink_info_version_running_put(req, "dpni", buf); } static struct dpaa2_eth_trap_item * diff --git a/drivers/net/ethernet/fungible/funeth/funeth_devlink.c b/drivers/net/ethernet/fungible/funeth/funeth_devlink.c index d50c222948b4..4fbeb3fd71a8 100644 --- a/drivers/net/ethernet/fungible/funeth/funeth_devlink.c +++ b/drivers/net/ethernet/fungible/funeth/funeth_devlink.c @@ -3,14 +3,7 @@ #include "funeth.h" #include "funeth_devlink.h" -static int fun_dl_info_get(struct devlink *dl, struct devlink_info_req *req, - struct netlink_ext_ack *extack) -{ - return devlink_info_driver_name_put(req, KBUILD_MODNAME); -} - static const struct devlink_ops fun_dl_ops = { - .info_get = fun_dl_info_get, }; struct devlink *fun_devlink_alloc(struct device *dev) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c index 4c441e6a5082..3d3b69605423 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c @@ -13,11 +13,6 @@ static int hclge_devlink_info_get(struct devlink *devlink, struct hclge_devlink_priv *priv = devlink_priv(devlink); char version_str[HCLGE_DEVLINK_FW_STRING_LEN]; struct hclge_dev *hdev = priv->hdev; - int ret; - - ret = devlink_info_driver_name_put(req, KBUILD_MODNAME); - if (ret) - return ret; snprintf(version_str, sizeof(version_str), "%lu.%lu.%lu.%lu", hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE3_MASK, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c index fdc19868b818..a6c3c5e8f0ab 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c @@ -13,11 +13,6 @@ static int hclgevf_devlink_info_get(struct devlink *devlink, struct hclgevf_devlink_priv *priv = devlink_priv(devlink); char version_str[HCLGEVF_DEVLINK_FW_STRING_LEN]; struct hclgevf_dev *hdev = priv->hdev; - int ret; - - ret = devlink_info_driver_name_put(req, KBUILD_MODNAME); - if (ret) - return ret; snprintf(version_str, sizeof(version_str), "%lu.%lu.%lu.%lu", hnae3_get_field(hdev->fw_version, HNAE3_FW_VERSION_BYTE3_MASK, diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index 1d638216484d..8286e47b4bae 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -311,12 +311,6 @@ static int ice_devlink_info_get(struct devlink *devlink, } } - err = devlink_info_driver_name_put(req, KBUILD_MODNAME); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Unable to set driver name"); - goto out_free_ctx; - } - ice_info_get_dsn(pf, ctx); err = devlink_info_serial_number_put(req, ctx->buf); @@ -1596,21 +1590,22 @@ void ice_devlink_destroy_vf_port(struct ice_vf *vf) #define ICE_DEVLINK_READ_BLK_SIZE (1024 * 1024) +static const struct devlink_region_ops ice_nvm_region_ops; +static const struct devlink_region_ops ice_sram_region_ops; + /** * ice_devlink_nvm_snapshot - Capture a snapshot of the NVM flash contents * @devlink: the devlink instance - * @ops: the devlink region being snapshotted + * @ops: the devlink region to snapshot * @extack: extended ACK response structure * @data: on exit points to snapshot data buffer * - * This function is called in response to the DEVLINK_CMD_REGION_TRIGGER for - * the nvm-flash devlink region. It captures a snapshot of the full NVM flash - * contents, including both banks of flash. This snapshot can later be viewed - * via the devlink-region interface. + * This function is called in response to a DEVLINK_CMD_REGION_NEW for either + * the nvm-flash or shadow-ram region. * - * It captures the flash using the FLASH_ONLY bit set when reading via - * firmware, so it does not read the current Shadow RAM contents. For that, - * use the shadow-ram region. + * It captures a snapshot of the NVM or Shadow RAM flash contents. This + * snapshot can then later be viewed via the DEVLINK_CMD_REGION_READ netlink + * interface. * * @returns zero on success, and updates the data pointer. Returns a non-zero * error code on failure. @@ -1622,17 +1617,27 @@ static int ice_devlink_nvm_snapshot(struct devlink *devlink, struct ice_pf *pf = devlink_priv(devlink); struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; + bool read_shadow_ram; u8 *nvm_data, *tmp, i; u32 nvm_size, left; s8 num_blks; int status; - nvm_size = hw->flash.flash_size; + if (ops == &ice_nvm_region_ops) { + read_shadow_ram = false; + nvm_size = hw->flash.flash_size; + } else if (ops == &ice_sram_region_ops) { + read_shadow_ram = true; + nvm_size = hw->flash.sr_words * 2u; + } else { + NL_SET_ERR_MSG_MOD(extack, "Unexpected region in snapshot function"); + return -EOPNOTSUPP; + } + nvm_data = vzalloc(nvm_size); if (!nvm_data) return -ENOMEM; - num_blks = DIV_ROUND_UP(nvm_size, ICE_DEVLINK_READ_BLK_SIZE); tmp = nvm_data; left = nvm_size; @@ -1656,7 +1661,7 @@ static int ice_devlink_nvm_snapshot(struct devlink *devlink, } status = ice_read_flat_nvm(hw, i * ICE_DEVLINK_READ_BLK_SIZE, - &read_sz, tmp, false); + &read_sz, tmp, read_shadow_ram); if (status) { dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n", read_sz, status, hw->adminq.sq_last_status); @@ -1677,62 +1682,69 @@ static int ice_devlink_nvm_snapshot(struct devlink *devlink, } /** - * ice_devlink_sram_snapshot - Capture a snapshot of the Shadow RAM contents + * ice_devlink_nvm_read - Read a portion of NVM flash contents * @devlink: the devlink instance - * @ops: the devlink region being snapshotted + * @ops: the devlink region to snapshot * @extack: extended ACK response structure - * @data: on exit points to snapshot data buffer + * @offset: the offset to start at + * @size: the amount to read + * @data: the data buffer to read into * - * This function is called in response to the DEVLINK_CMD_REGION_TRIGGER for - * the shadow-ram devlink region. It captures a snapshot of the shadow ram - * contents. This snapshot can later be viewed via the devlink-region - * interface. + * This function is called in response to DEVLINK_CMD_REGION_READ to directly + * read a section of the NVM contents. + * + * It reads from either the nvm-flash or shadow-ram region contents. * * @returns zero on success, and updates the data pointer. Returns a non-zero * error code on failure. */ -static int -ice_devlink_sram_snapshot(struct devlink *devlink, - const struct devlink_region_ops __always_unused *ops, - struct netlink_ext_ack *extack, u8 **data) +static int ice_devlink_nvm_read(struct devlink *devlink, + const struct devlink_region_ops *ops, + struct netlink_ext_ack *extack, + u64 offset, u32 size, u8 *data) { struct ice_pf *pf = devlink_priv(devlink); struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; - u8 *sram_data; - u32 sram_size; - int err; + bool read_shadow_ram; + u64 nvm_size; + int status; - sram_size = hw->flash.sr_words * 2u; - sram_data = vzalloc(sram_size); - if (!sram_data) - return -ENOMEM; + if (ops == &ice_nvm_region_ops) { + read_shadow_ram = false; + nvm_size = hw->flash.flash_size; + } else if (ops == &ice_sram_region_ops) { + read_shadow_ram = true; + nvm_size = hw->flash.sr_words * 2u; + } else { + NL_SET_ERR_MSG_MOD(extack, "Unexpected region in snapshot function"); + return -EOPNOTSUPP; + } - err = ice_acquire_nvm(hw, ICE_RES_READ); - if (err) { + if (offset + size >= nvm_size) { + NL_SET_ERR_MSG_MOD(extack, "Cannot read beyond the region size"); + return -ERANGE; + } + + status = ice_acquire_nvm(hw, ICE_RES_READ); + if (status) { dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n", - err, hw->adminq.sq_last_status); + status, hw->adminq.sq_last_status); NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore"); - vfree(sram_data); - return err; + return -EIO; } - /* Read from the Shadow RAM, rather than directly from NVM */ - err = ice_read_flat_nvm(hw, 0, &sram_size, sram_data, true); - if (err) { + status = ice_read_flat_nvm(hw, (u32)offset, &size, data, + read_shadow_ram); + if (status) { dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n", - sram_size, err, hw->adminq.sq_last_status); - NL_SET_ERR_MSG_MOD(extack, - "Failed to read Shadow RAM contents"); + size, status, hw->adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents"); ice_release_nvm(hw); - vfree(sram_data); - return err; + return -EIO; } - ice_release_nvm(hw); - *data = sram_data; - return 0; } @@ -1784,12 +1796,14 @@ static const struct devlink_region_ops ice_nvm_region_ops = { .name = "nvm-flash", .destructor = vfree, .snapshot = ice_devlink_nvm_snapshot, + .read = ice_devlink_nvm_read, }; static const struct devlink_region_ops ice_sram_region_ops = { .name = "shadow-ram", .destructor = vfree, - .snapshot = ice_devlink_sram_snapshot, + .snapshot = ice_devlink_nvm_snapshot, + .read = ice_devlink_nvm_read, }; static const struct devlink_region_ops ice_devcaps_region_ops = { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index 88dee589cb21..bda1a6fa2ec4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -1547,14 +1547,7 @@ static int rvu_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, return 0; } -static int rvu_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, - struct netlink_ext_ack *extack) -{ - return devlink_info_driver_name_put(req, DRV_NAME); -} - static const struct devlink_ops rvu_devlink_ops = { - .info_get = rvu_devlink_info_get, .eswitch_mode_get = rvu_devlink_eswitch_mode_get, .eswitch_mode_set = rvu_devlink_eswitch_mode_set, }; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c index 594029007f85..00aef8f5ac29 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c @@ -490,7 +490,7 @@ static bool rvu_npc_exact_alloc_id(struct rvu *rvu, u32 *seq_id) if (idx == table->tot_ids) { mutex_unlock(&table->lock); dev_err(rvu->dev, "%s: No space in id bitmap (%d)\n", - __func__, bitmap_weight(table->id_bmap, table->tot_ids)); + __func__, table->tot_ids); return false; } @@ -1870,12 +1870,11 @@ int rvu_npc_exact_init(struct rvu *rvu) /* Set capability to true */ rvu->hw->cap.npc_exact_match_enabled = true; - table = kmalloc(sizeof(*table), GFP_KERNEL); + table = kzalloc(sizeof(*table), GFP_KERNEL); if (!table) return -ENOMEM; dev_dbg(rvu->dev, "%s: Memory allocation for table success\n", __func__); - memset(table, 0, sizeof(*table)); rvu->hw->table = table; /* Read table size, ways and depth */ @@ -1899,24 +1898,24 @@ int rvu_npc_exact_init(struct rvu *rvu) table_size = table->mem_table.depth * table->mem_table.ways; /* Allocate bitmap for 4way 2K table */ - table->mem_table.bmap = devm_kcalloc(rvu->dev, BITS_TO_LONGS(table_size), - sizeof(long), GFP_KERNEL); + table->mem_table.bmap = devm_bitmap_zalloc(rvu->dev, table_size, + GFP_KERNEL); if (!table->mem_table.bmap) return -ENOMEM; dev_dbg(rvu->dev, "%s: Allocated bitmap for 4way 2K entry table\n", __func__); /* Allocate bitmap for 32 entry mcam */ - table->cam_table.bmap = devm_kcalloc(rvu->dev, 1, sizeof(long), GFP_KERNEL); + table->cam_table.bmap = devm_bitmap_zalloc(rvu->dev, 32, GFP_KERNEL); if (!table->cam_table.bmap) return -ENOMEM; dev_dbg(rvu->dev, "%s: Allocated bitmap for 32 entry cam\n", __func__); - table->tot_ids = (table->mem_table.depth * table->mem_table.ways) + table->cam_table.depth; - table->id_bmap = devm_kcalloc(rvu->dev, BITS_TO_LONGS(table->tot_ids), - table->tot_ids, GFP_KERNEL); + table->tot_ids = table_size + table->cam_table.depth; + table->id_bmap = devm_bitmap_zalloc(rvu->dev, table->tot_ids, + GFP_KERNEL); if (!table->id_bmap) return -ENOMEM; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c index 777a27047c8e..63ef7c41d18d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_devlink.c @@ -77,22 +77,7 @@ static const struct devlink_param otx2_dl_params[] = { otx2_dl_mcam_count_validate), }; -/* Devlink OPs */ -static int otx2_devlink_info_get(struct devlink *devlink, - struct devlink_info_req *req, - struct netlink_ext_ack *extack) -{ - struct otx2_devlink *otx2_dl = devlink_priv(devlink); - struct otx2_nic *pfvf = otx2_dl->pfvf; - - if (is_otx2_vf(pfvf->pcifunc)) - return devlink_info_driver_name_put(req, "rvu_nicvf"); - - return devlink_info_driver_name_put(req, "rvu_nicpf"); -} - static const struct devlink_ops otx2_devlink_ops = { - .info_get = otx2_devlink_info_get, }; int otx2_register_dl(struct otx2_nic *pfvf) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c index 84ad05c9f12d..2a4c9df4eb79 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c @@ -355,11 +355,6 @@ static int prestera_dl_info_get(struct devlink *dl, { struct prestera_switch *sw = devlink_priv(dl); char buf[16]; - int err; - - err = devlink_info_driver_name_put(req, PRESTERA_DRV_NAME); - if (err) - return err; snprintf(buf, sizeof(buf), "%d.%d.%d", sw->dev->fw_rev.maj, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index cc2ae427dcb0..751bc4a9edcf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -46,10 +46,6 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, u32 running_fw, stored_fw; int err; - err = devlink_info_driver_name_put(req, KBUILD_MODNAME); - if (err) - return err; - err = devlink_info_version_fixed_put(req, "fw.psid", dev->board_id); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index a83f6bc30072..a0a06e2eff82 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1459,11 +1459,6 @@ mlxsw_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, char buf[32]; int err; - err = devlink_info_driver_name_put(req, - mlxsw_core->bus_info->device_kind); - if (err) - return err; - mlxsw_reg_mgir_pack(mgir_pl); err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mgir), mgir_pl); if (err) diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api.c b/drivers/net/ethernet/microchip/vcap/vcap_api.c index 93efa8243b02..f2435d7ab515 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api.c +++ b/drivers/net/ethernet/microchip/vcap/vcap_api.c @@ -985,17 +985,12 @@ static u32 vcap_next_rule_addr(u32 addr, struct vcap_rule_internal *ri) /* Assign a unique rule id and autogenerate one if id == 0 */ static u32 vcap_set_rule_id(struct vcap_rule_internal *ri) { - u32 next_id; - if (ri->data.id != 0) return ri->data.id; - next_id = ri->vctrl->rule_id + 1; - - for (next_id = ri->vctrl->rule_id + 1; next_id < ~0; ++next_id) { + for (u32 next_id = 1; next_id < ~0; ++next_id) { if (!vcap_lookup_rule(ri->vctrl, next_id)) { ri->data.id = next_id; - ri->vctrl->rule_id = next_id; break; } } diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api.h b/drivers/net/ethernet/microchip/vcap/vcap_api.h index ca4499838306..689c7270f2a8 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api.h +++ b/drivers/net/ethernet/microchip/vcap/vcap_api.h @@ -268,7 +268,6 @@ struct vcap_operations { /* VCAP API Client control interface */ struct vcap_control { - u32 rule_id; /* last used rule id (unique across VCAP instances) */ struct vcap_operations *ops; /* client supplied operations */ const struct vcap_info *vcaps; /* client supplied vcap models */ const struct vcap_statistics *stats; /* client supplied vcap stats */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c index 784f23602a8a..bf6bae557158 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c @@ -239,10 +239,6 @@ nfp_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, char *buf = NULL; int err; - err = devlink_info_driver_name_put(req, "nfp"); - if (err) - return err; - vendor = nfp_hwinfo_lookup(pf->hwinfo, "assembly.vendor"); part = nfp_hwinfo_lookup(pf->hwinfo, "assembly.partno"); sn = nfp_hwinfo_lookup(pf->hwinfo, "assembly.serial"); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c index 567f778433e2..e6ff757895ab 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c @@ -26,10 +26,6 @@ static int ionic_dl_info_get(struct devlink *dl, struct devlink_info_req *req, char buf[16]; int err = 0; - err = devlink_info_driver_name_put(req, IONIC_DRV_NAME); - if (err) - return err; - err = devlink_info_version_running_put(req, DEVLINK_INFO_VERSION_GENERIC_FW, idev->dev_info.fw_version); diff --git a/drivers/net/ethernet/qlogic/qed/qed_devlink.c b/drivers/net/ethernet/qlogic/qed/qed_devlink.c index 6bb4e165b592..922c47797af6 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_devlink.c +++ b/drivers/net/ethernet/qlogic/qed/qed_devlink.c @@ -162,10 +162,6 @@ static int qed_devlink_info_get(struct devlink *devlink, dev_info = &cdev->common_dev_info; - err = devlink_info_driver_name_put(req, KBUILD_MODNAME); - if (err) - return err; - memcpy(buf, cdev->hwfns[0].hw_info.part_num, sizeof(cdev->hwfns[0].hw_info.part_num)); buf[sizeof(cdev->hwfns[0].hw_info.part_num)] = 0; diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index e14686594a71..b962fc8e1397 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -994,9 +994,6 @@ static int nsim_dev_info_get(struct devlink *devlink, { int err; - err = devlink_info_driver_name_put(req, DRV_NAME); - if (err) - return err; err = devlink_info_version_stored_put_ext(req, "fw.mgmt", "10.20.30", DEVLINK_INFO_VERSION_TYPE_COMPONENT); if (err) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 8cff61dbc4b5..716870a4499c 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1512,6 +1512,15 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, phy_resume(phydev); phy_led_triggers_register(phydev); + /** + * If the external phy used by current mac interface is managed by + * another mac interface, so we should create a device link between + * phy dev and mac dev. + */ + if (phydev->mdio.bus->parent && dev->dev.parent != phydev->mdio.bus->parent) + phydev->devlink = device_link_add(dev->dev.parent, &phydev->mdio.dev, + DL_FLAG_PM_RUNTIME | DL_FLAG_STATELESS); + return err; error: @@ -1750,6 +1759,9 @@ void phy_detach(struct phy_device *phydev) struct module *ndev_owner = NULL; struct mii_bus *bus; + if (phydev->devlink) + device_link_del(phydev->devlink); + if (phydev->sysfs_links) { if (dev) sysfs_remove_link(&dev->dev.kobj, "phydev"); diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 154d58cbd9ce..4bbaccd543ad 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -1647,10 +1647,6 @@ ptp_ocp_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, char buf[32]; int err; - err = devlink_info_driver_name_put(req, KBUILD_MODNAME); - if (err) - return err; - fw_image = bp->fw_loader ? "loader" : "fw"; sprintf(buf, "%d.%d", bp->fw_tag, bp->fw_version); err = devlink_info_version_running_put(req, fw_image, buf); diff --git a/include/linux/phy.h b/include/linux/phy.h index 9a3752c0c444..71eeb4e3b1fd 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -529,6 +529,8 @@ struct macsec_ops; * * @mdio: MDIO bus this PHY is on * @drv: Pointer to the driver for this PHY instance + * @devlink: Create a link between phy dev and mac dev, if the external phy + * used by current mac interface is managed by another mac interface. * @phy_id: UID for this device found during discovery * @c45_ids: 802.3-c45 Device Identifiers if is_c45. * @is_c45: Set to true if this PHY uses clause 45 addressing. @@ -618,6 +620,8 @@ struct phy_device { /* And management functions */ struct phy_driver *drv; + struct device_link *devlink; + u32 phy_id; struct phy_c45_device_ids c45_ids; diff --git a/include/net/devlink.h b/include/net/devlink.h index 074a79b8933f..5f6eca5e4a40 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -650,6 +650,10 @@ struct devlink_info_req; * the data variable must be updated to point to the snapshot data. * The function will be called while the devlink instance lock is * held. + * @read: callback to directly read a portion of the region. On success, + * the data pointer will be updated with the contents of the + * requested portion of the region. The function will be called + * while the devlink instance lock is held. * @priv: Pointer to driver private data for the region operation */ struct devlink_region_ops { @@ -659,6 +663,10 @@ struct devlink_region_ops { const struct devlink_region_ops *ops, struct netlink_ext_ack *extack, u8 **data); + int (*read)(struct devlink *devlink, + const struct devlink_region_ops *ops, + struct netlink_ext_ack *extack, + u64 offset, u32 size, u8 *data); void *priv; }; @@ -670,6 +678,10 @@ struct devlink_region_ops { * the data variable must be updated to point to the snapshot data. * The function will be called while the devlink instance lock is * held. + * @read: callback to directly read a portion of the region. On success, + * the data pointer will be updated with the contents of the + * requested portion of the region. The function will be called + * while the devlink instance lock is held. * @priv: Pointer to driver private data for the region operation */ struct devlink_port_region_ops { @@ -679,6 +691,10 @@ struct devlink_port_region_ops { const struct devlink_port_region_ops *ops, struct netlink_ext_ack *extack, u8 **data); + int (*read)(struct devlink_port *port, + const struct devlink_port_region_ops *ops, + struct netlink_ext_ack *extack, + u64 offset, u32 size, u8 *data); void *priv; }; @@ -1746,8 +1762,6 @@ int devlink_region_snapshot_create(struct devlink_region *region, u8 *data, u32 snapshot_id); int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn); -int devlink_info_driver_name_put(struct devlink_info_req *req, - const char *name); int devlink_info_board_serial_number_put(struct devlink_info_req *req, const char *bsn); diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 498d0d5d0957..70191d96af89 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -610,6 +610,8 @@ enum devlink_attr { DEVLINK_ATTR_RATE_TX_PRIORITY, /* u32 */ DEVLINK_ATTR_RATE_TX_WEIGHT, /* u32 */ + DEVLINK_ATTR_REGION_DIRECT, /* flag */ + /* add new attributes above here, update the policy in devlink.c */ __DEVLINK_ATTR_MAX, diff --git a/net/core/devlink.c b/net/core/devlink.c index 0e10a8a68c5e..fca3ebee97b0 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -6431,7 +6431,6 @@ unlock: } static int devlink_nl_cmd_region_read_chunk_fill(struct sk_buff *msg, - struct devlink *devlink, u8 *chunk, u32 chunk_size, u64 addr) { @@ -6461,39 +6460,37 @@ nla_put_failure: #define DEVLINK_REGION_READ_CHUNK_SIZE 256 -static int devlink_nl_region_read_snapshot_fill(struct sk_buff *skb, - struct devlink *devlink, - struct devlink_region *region, - struct nlattr **attrs, - u64 start_offset, - u64 end_offset, - u64 *new_offset) +typedef int devlink_chunk_fill_t(void *cb_priv, u8 *chunk, u32 chunk_size, + u64 curr_offset, + struct netlink_ext_ack *extack); + +static int +devlink_nl_region_read_fill(struct sk_buff *skb, devlink_chunk_fill_t *cb, + void *cb_priv, u64 start_offset, u64 end_offset, + u64 *new_offset, struct netlink_ext_ack *extack) { - struct devlink_snapshot *snapshot; u64 curr_offset = start_offset; - u32 snapshot_id; int err = 0; + u8 *data; - *new_offset = start_offset; + /* Allocate and re-use a single buffer */ + data = kmalloc(DEVLINK_REGION_READ_CHUNK_SIZE, GFP_KERNEL); + if (!data) + return -ENOMEM; - snapshot_id = nla_get_u32(attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]); - snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id); - if (!snapshot) - return -EINVAL; + *new_offset = start_offset; while (curr_offset < end_offset) { u32 data_size; - u8 *data; - if (end_offset - curr_offset < DEVLINK_REGION_READ_CHUNK_SIZE) - data_size = end_offset - curr_offset; - else - data_size = DEVLINK_REGION_READ_CHUNK_SIZE; + data_size = min_t(u32, end_offset - curr_offset, + DEVLINK_REGION_READ_CHUNK_SIZE); - data = &snapshot->data[curr_offset]; - err = devlink_nl_cmd_region_read_chunk_fill(skb, devlink, - data, data_size, - curr_offset); + err = cb(cb_priv, data, data_size, curr_offset, extack); + if (err) + break; + + err = devlink_nl_cmd_region_read_chunk_fill(skb, data, data_size, curr_offset); if (err) break; @@ -6501,21 +6498,57 @@ static int devlink_nl_region_read_snapshot_fill(struct sk_buff *skb, } *new_offset = curr_offset; + kfree(data); + return err; } +static int +devlink_region_snapshot_fill(void *cb_priv, u8 *chunk, u32 chunk_size, + u64 curr_offset, + struct netlink_ext_ack __always_unused *extack) +{ + struct devlink_snapshot *snapshot = cb_priv; + + memcpy(chunk, &snapshot->data[curr_offset], chunk_size); + + return 0; +} + +static int +devlink_region_port_direct_fill(void *cb_priv, u8 *chunk, u32 chunk_size, + u64 curr_offset, struct netlink_ext_ack *extack) +{ + struct devlink_region *region = cb_priv; + + return region->port_ops->read(region->port, region->port_ops, extack, + curr_offset, chunk_size, chunk); +} + +static int +devlink_region_direct_fill(void *cb_priv, u8 *chunk, u32 chunk_size, + u64 curr_offset, struct netlink_ext_ack *extack) +{ + struct devlink_region *region = cb_priv; + + return region->ops->read(region->devlink, region->ops, extack, + curr_offset, chunk_size, chunk); +} + static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { const struct genl_dumpit_info *info = genl_dumpit_info(cb); + struct nlattr *chunks_attr, *region_attr, *snapshot_attr; u64 ret_offset, start_offset, end_offset = U64_MAX; struct nlattr **attrs = info->attrs; struct devlink_port *port = NULL; + devlink_chunk_fill_t *region_cb; struct devlink_region *region; - struct nlattr *chunks_attr; const char *region_name; struct devlink *devlink; unsigned int index; + void *region_cb_priv; void *hdr; int err; @@ -6527,8 +6560,8 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, devl_lock(devlink); - if (!attrs[DEVLINK_ATTR_REGION_NAME] || - !attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]) { + if (!attrs[DEVLINK_ATTR_REGION_NAME]) { + NL_SET_ERR_MSG(cb->extack, "No region name provided"); err = -EINVAL; goto out_unlock; } @@ -6543,7 +6576,8 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, } } - region_name = nla_data(attrs[DEVLINK_ATTR_REGION_NAME]); + region_attr = attrs[DEVLINK_ATTR_REGION_NAME]; + region_name = nla_data(region_attr); if (port) region = devlink_port_region_get_by_name(port, region_name); @@ -6551,10 +6585,51 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, region = devlink_region_get_by_name(devlink, region_name); if (!region) { + NL_SET_ERR_MSG_ATTR(cb->extack, region_attr, "Requested region does not exist"); err = -EINVAL; goto out_unlock; } + snapshot_attr = attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]; + if (!snapshot_attr) { + if (!nla_get_flag(attrs[DEVLINK_ATTR_REGION_DIRECT])) { + NL_SET_ERR_MSG(cb->extack, "No snapshot id provided"); + err = -EINVAL; + goto out_unlock; + } + + if (!region->ops->read) { + NL_SET_ERR_MSG(cb->extack, "Requested region does not support direct read"); + err = -EOPNOTSUPP; + goto out_unlock; + } + + if (port) + region_cb = &devlink_region_port_direct_fill; + else + region_cb = &devlink_region_direct_fill; + region_cb_priv = region; + } else { + struct devlink_snapshot *snapshot; + u32 snapshot_id; + + if (nla_get_flag(attrs[DEVLINK_ATTR_REGION_DIRECT])) { + NL_SET_ERR_MSG_ATTR(cb->extack, snapshot_attr, "Direct region read does not use snapshot"); + err = -EINVAL; + goto out_unlock; + } + + snapshot_id = nla_get_u32(snapshot_attr); + snapshot = devlink_region_snapshot_get_by_id(region, snapshot_id); + if (!snapshot) { + NL_SET_ERR_MSG_ATTR(cb->extack, snapshot_attr, "Requested snapshot does not exist"); + err = -EINVAL; + goto out_unlock; + } + region_cb = &devlink_region_snapshot_fill; + region_cb_priv = snapshot; + } + if (attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR] && attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]) { if (!start_offset) @@ -6603,10 +6678,9 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, goto nla_put_failure; } - err = devlink_nl_region_read_snapshot_fill(skb, devlink, - region, attrs, - start_offset, - end_offset, &ret_offset); + err = devlink_nl_region_read_fill(skb, region_cb, region_cb_priv, + start_offset, end_offset, &ret_offset, + cb->extack); if (err && err != -EMSGSIZE) goto nla_put_failure; @@ -6633,14 +6707,6 @@ out_unlock: return err; } -int devlink_info_driver_name_put(struct devlink_info_req *req, const char *name) -{ - if (!req->msg) - return 0; - return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME, name); -} -EXPORT_SYMBOL_GPL(devlink_info_driver_name_put); - int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn) { if (!req->msg) @@ -6749,11 +6815,25 @@ int devlink_info_version_running_put_ext(struct devlink_info_req *req, } EXPORT_SYMBOL_GPL(devlink_info_version_running_put_ext); +static int devlink_nl_driver_info_get(struct device_driver *drv, + struct devlink_info_req *req) +{ + if (!drv) + return 0; + + if (drv->name[0]) + return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME, + drv->name); + + return 0; +} + static int devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink, enum devlink_command cmd, u32 portid, u32 seq, int flags, struct netlink_ext_ack *extack) { + struct device *dev = devlink_to_dev(devlink); struct devlink_info_req req = {}; void *hdr; int err; @@ -6767,7 +6847,13 @@ devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink, goto err_cancel_msg; req.msg = msg; - err = devlink->ops->info_get(devlink, &req, extack); + if (devlink->ops->info_get) { + err = devlink->ops->info_get(devlink, &req, extack); + if (err) + goto err_cancel_msg; + } + + err = devlink_nl_driver_info_get(dev->driver, &req); if (err) goto err_cancel_msg; @@ -6786,9 +6872,6 @@ static int devlink_nl_cmd_info_get_doit(struct sk_buff *skb, struct sk_buff *msg; int err; - if (!devlink->ops->info_get) - return -EOPNOTSUPP; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; @@ -6814,7 +6897,7 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, int err = 0; devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) { - if (idx < start || !devlink->ops->info_get) + if (idx < start) goto inc; devl_lock(devlink); @@ -9251,6 +9334,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_SELFTESTS] = { .type = NLA_NESTED }, [DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32 }, [DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32 }, + [DEVLINK_ATTR_REGION_DIRECT] = { .type = NLA_FLAG }, }; static const struct genl_small_ops devlink_nl_ops[] = { diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index ff8807cc9c2e..ee38ca888244 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only bind_bhash +csum cmsg_sender fin_ack_lat gro diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 880e6ded6ed5..3007e98a6d64 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -74,6 +74,7 @@ TEST_PROGS += test_ingress_egress_chaining.sh TEST_GEN_PROGS += so_incoming_cpu TEST_PROGS += sctp_vrf.sh TEST_GEN_FILES += sctp_hello +TEST_GEN_FILES += csum TEST_FILES := settings diff --git a/tools/testing/selftests/net/csum.c b/tools/testing/selftests/net/csum.c new file mode 100644 index 000000000000..82a1c1839da6 --- /dev/null +++ b/tools/testing/selftests/net/csum.c @@ -0,0 +1,986 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Test hardware checksum offload: Rx + Tx, IPv4 + IPv6, TCP + UDP. + * + * The test runs on two machines to exercise the NIC. For this reason it + * is not integrated in kselftests. + * + * CMD=$((./csum -[46] -[tu] -S $SADDR -D $DADDR -[RT] -r 1 $EXTRA_ARGS)) + * + * Rx: + * + * The sender sends packets with a known checksum field using PF_INET(6) + * SOCK_RAW sockets. + * + * good packet: $CMD [-t] + * bad packet: $CMD [-t] -E + * + * The receiver reads UDP packets with a UDP socket. This is not an + * option for TCP packets ('-t'). Optionally insert an iptables filter + * to avoid these entering the real protocol stack. + * + * The receiver also reads all packets with a PF_PACKET socket, to + * observe whether both good and bad packets arrive on the host. And to + * read the optional TP_STATUS_CSUM_VALID bit. This requires setting + * option PACKET_AUXDATA, and works only for CHECKSUM_UNNECESSARY. + * + * Tx: + * + * The sender needs to build CHECKSUM_PARTIAL packets to exercise tx + * checksum offload. + * + * The sender can sends packets with a UDP socket. + * + * Optionally crafts a packet that sums up to zero to verify that the + * device writes negative zero 0xFFFF in this case to distinguish from + * 0x0000 (checksum disabled), as required by RFC 768. Hit this case + * by choosing a specific source port. + * + * good packet: $CMD -U + * zero csum: $CMD -U -Z + * + * The sender can also build packets with PF_PACKET with PACKET_VNET_HDR, + * to cover more protocols. PF_PACKET requires passing src and dst mac + * addresses. + * + * good packet: $CMD -s $smac -d $dmac -p [-t] + * + * Argument '-z' sends UDP packets with a 0x000 checksum disabled field, + * to verify that the NIC passes these packets unmodified. + * + * Argument '-e' adds a transport mode encapsulation header between + * network and transport header. This will fail for devices that parse + * headers. Should work on devices that implement protocol agnostic tx + * checksum offload (NETIF_F_HW_CSUM). + * + * Argument '-r $SEED' optionally randomizes header, payload and length + * to increase coverage between packets sent. SEED 1 further chooses a + * different seed for each run (and logs this for reproducibility). It + * is advised to enable this for extra coverage in continuous testing. + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <asm/byteorder.h> +#include <errno.h> +#include <error.h> +#include <linux/filter.h> +#include <linux/if_packet.h> +#include <linux/ipv6.h> +#include <linux/virtio_net.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <netinet/if_ether.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <poll.h> +#include <sched.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +static bool cfg_bad_csum; +static int cfg_family = PF_INET6; +static int cfg_num_pkt = 4; +static bool cfg_do_rx = true; +static bool cfg_do_tx = true; +static bool cfg_encap; +static char *cfg_ifname = "eth0"; +static char *cfg_mac_dst; +static char *cfg_mac_src; +static int cfg_proto = IPPROTO_UDP; +static int cfg_payload_char = 'a'; +static int cfg_payload_len = 100; +static uint16_t cfg_port_dst = 34000; +static uint16_t cfg_port_src = 33000; +static uint16_t cfg_port_src_encap = 33001; +static unsigned int cfg_random_seed; +static int cfg_rcvbuf = 1 << 22; /* be able to queue large cfg_num_pkt */ +static bool cfg_send_pfpacket; +static bool cfg_send_udp; +static int cfg_timeout_ms = 2000; +static bool cfg_zero_disable; /* skip checksum: set to zero (udp only) */ +static bool cfg_zero_sum; /* create packet that adds up to zero */ + +static struct sockaddr_in cfg_daddr4 = {.sin_family = AF_INET}; +static struct sockaddr_in cfg_saddr4 = {.sin_family = AF_INET}; +static struct sockaddr_in6 cfg_daddr6 = {.sin6_family = AF_INET6}; +static struct sockaddr_in6 cfg_saddr6 = {.sin6_family = AF_INET6}; + +#define ENC_HEADER_LEN (sizeof(struct udphdr) + sizeof(struct udp_encap_hdr)) +#define MAX_HEADER_LEN (sizeof(struct ipv6hdr) + ENC_HEADER_LEN + sizeof(struct tcphdr)) +#define MAX_PAYLOAD_LEN 1024 + +/* Trivial demo encap. Stand-in for transport layer protocols like ESP or PSP */ +struct udp_encap_hdr { + uint8_t nexthdr; + uint8_t padding[3]; +}; + +/* Ipaddrs, for pseudo csum. Global var is ugly, pass through funcs was worse */ +static void *iph_addr_p; + +static unsigned long gettimeofday_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000UL) + (tv.tv_usec / 1000UL); +} + +static uint32_t checksum_nofold(char *data, size_t len, uint32_t sum) +{ + uint16_t *words = (uint16_t *)data; + int i; + + for (i = 0; i < len / 2; i++) + sum += words[i]; + + if (len & 1) + sum += ((unsigned char *)data)[len - 1]; + + return sum; +} + +static uint16_t checksum_fold(void *data, size_t len, uint32_t sum) +{ + sum = checksum_nofold(data, len, sum); + + while (sum > 0xFFFF) + sum = (sum & 0xFFFF) + (sum >> 16); + + return ~sum; +} + +static uint16_t checksum(void *th, uint16_t proto, size_t len) +{ + uint32_t sum; + int alen; + + alen = cfg_family == PF_INET6 ? 32 : 8; + + sum = checksum_nofold(iph_addr_p, alen, 0); + sum += htons(proto); + sum += htons(len); + + /* With CHECKSUM_PARTIAL kernel expects non-inverted pseudo csum */ + if (cfg_do_tx && cfg_send_pfpacket) + return ~checksum_fold(NULL, 0, sum); + else + return checksum_fold(th, len, sum); +} + +static void *build_packet_ipv4(void *_iph, uint8_t proto, unsigned int len) +{ + struct iphdr *iph = _iph; + + memset(iph, 0, sizeof(*iph)); + + iph->version = 4; + iph->ihl = 5; + iph->ttl = 8; + iph->protocol = proto; + iph->saddr = cfg_saddr4.sin_addr.s_addr; + iph->daddr = cfg_daddr4.sin_addr.s_addr; + iph->tot_len = htons(sizeof(*iph) + len); + iph->check = checksum_fold(iph, sizeof(*iph), 0); + + iph_addr_p = &iph->saddr; + + return iph + 1; +} + +static void *build_packet_ipv6(void *_ip6h, uint8_t proto, unsigned int len) +{ + struct ipv6hdr *ip6h = _ip6h; + + memset(ip6h, 0, sizeof(*ip6h)); + + ip6h->version = 6; + ip6h->payload_len = htons(len); + ip6h->nexthdr = proto; + ip6h->hop_limit = 64; + ip6h->saddr = cfg_saddr6.sin6_addr; + ip6h->daddr = cfg_daddr6.sin6_addr; + + iph_addr_p = &ip6h->saddr; + + return ip6h + 1; +} + +static void *build_packet_udp(void *_uh) +{ + struct udphdr *uh = _uh; + + uh->source = htons(cfg_port_src); + uh->dest = htons(cfg_port_dst); + uh->len = htons(sizeof(*uh) + cfg_payload_len); + uh->check = 0; + + /* choose source port so that uh->check adds up to zero */ + if (cfg_zero_sum) { + uh->source = 0; + uh->source = checksum(uh, IPPROTO_UDP, sizeof(*uh) + cfg_payload_len); + + fprintf(stderr, "tx: changing sport: %hu -> %hu\n", + cfg_port_src, ntohs(uh->source)); + cfg_port_src = ntohs(uh->source); + } + + if (cfg_zero_disable) + uh->check = 0; + else + uh->check = checksum(uh, IPPROTO_UDP, sizeof(*uh) + cfg_payload_len); + + if (cfg_bad_csum) + uh->check = ~uh->check; + + fprintf(stderr, "tx: sending checksum: 0x%x\n", uh->check); + return uh + 1; +} + +static void *build_packet_tcp(void *_th) +{ + struct tcphdr *th = _th; + + th->source = htons(cfg_port_src); + th->dest = htons(cfg_port_dst); + th->doff = 5; + th->check = 0; + + th->check = checksum(th, IPPROTO_TCP, sizeof(*th) + cfg_payload_len); + + if (cfg_bad_csum) + th->check = ~th->check; + + fprintf(stderr, "tx: sending checksum: 0x%x\n", th->check); + return th + 1; +} + +static char *build_packet_udp_encap(void *_uh) +{ + struct udphdr *uh = _uh; + struct udp_encap_hdr *eh = _uh + sizeof(*uh); + + /* outer dst == inner dst, to simplify BPF filter + * outer src != inner src, to demultiplex on recv + */ + uh->dest = htons(cfg_port_dst); + uh->source = htons(cfg_port_src_encap); + uh->check = 0; + uh->len = htons(sizeof(*uh) + + sizeof(*eh) + + sizeof(struct tcphdr) + + cfg_payload_len); + + eh->nexthdr = IPPROTO_TCP; + + return build_packet_tcp(eh + 1); +} + +static char *build_packet(char *buf, int max_len, int *len) +{ + uint8_t proto; + char *off; + int tlen; + + if (cfg_random_seed) { + int *buf32 = (void *)buf; + int i; + + for (i = 0; i < (max_len / sizeof(int)); i++) + buf32[i] = rand(); + } else { + memset(buf, cfg_payload_char, max_len); + } + + if (cfg_proto == IPPROTO_UDP) + tlen = sizeof(struct udphdr) + cfg_payload_len; + else + tlen = sizeof(struct tcphdr) + cfg_payload_len; + + if (cfg_encap) { + proto = IPPROTO_UDP; + tlen += ENC_HEADER_LEN; + } else { + proto = cfg_proto; + } + + if (cfg_family == PF_INET) + off = build_packet_ipv4(buf, proto, tlen); + else + off = build_packet_ipv6(buf, proto, tlen); + + if (cfg_encap) + off = build_packet_udp_encap(off); + else if (cfg_proto == IPPROTO_UDP) + off = build_packet_udp(off); + else + off = build_packet_tcp(off); + + /* only pass the payload, but still compute headers for cfg_zero_sum */ + if (cfg_send_udp) { + *len = cfg_payload_len; + return off; + } + + *len = off - buf + cfg_payload_len; + return buf; +} + +static int open_inet(int ipproto, int protocol) +{ + int fd; + + fd = socket(cfg_family, ipproto, protocol); + if (fd == -1) + error(1, errno, "socket inet"); + + if (cfg_family == PF_INET6) { + /* may have been updated by cfg_zero_sum */ + cfg_saddr6.sin6_port = htons(cfg_port_src); + + if (bind(fd, (void *)&cfg_saddr6, sizeof(cfg_saddr6))) + error(1, errno, "bind dgram 6"); + if (connect(fd, (void *)&cfg_daddr6, sizeof(cfg_daddr6))) + error(1, errno, "connect dgram 6"); + } else { + /* may have been updated by cfg_zero_sum */ + cfg_saddr4.sin_port = htons(cfg_port_src); + + if (bind(fd, (void *)&cfg_saddr4, sizeof(cfg_saddr4))) + error(1, errno, "bind dgram 4"); + if (connect(fd, (void *)&cfg_daddr4, sizeof(cfg_daddr4))) + error(1, errno, "connect dgram 4"); + } + + return fd; +} + +static int open_packet(void) +{ + int fd, one = 1; + + fd = socket(PF_PACKET, SOCK_RAW, 0); + if (fd == -1) + error(1, errno, "socket packet"); + + if (setsockopt(fd, SOL_PACKET, PACKET_VNET_HDR, &one, sizeof(one))) + error(1, errno, "setsockopt packet_vnet_ndr"); + + return fd; +} + +static void send_inet(int fd, const char *buf, int len) +{ + int ret; + + ret = write(fd, buf, len); + if (ret == -1) + error(1, errno, "write"); + if (ret != len) + error(1, 0, "write: %d", ret); +} + +static void eth_str_to_addr(const char *str, unsigned char *eth) +{ + if (sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", + ð[0], ð[1], ð[2], ð[3], ð[4], ð[5]) != 6) + error(1, 0, "cannot parse mac addr %s", str); +} + +static void send_packet(int fd, const char *buf, int len) +{ + struct virtio_net_hdr vh = {0}; + struct sockaddr_ll addr = {0}; + struct msghdr msg = {0}; + struct ethhdr eth; + struct iovec iov[3]; + int ret; + + addr.sll_family = AF_PACKET; + addr.sll_halen = ETH_ALEN; + addr.sll_ifindex = if_nametoindex(cfg_ifname); + if (!addr.sll_ifindex) + error(1, errno, "if_nametoindex %s", cfg_ifname); + + vh.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + if (cfg_family == PF_INET6) { + vh.csum_start = sizeof(struct ethhdr) + sizeof(struct ipv6hdr); + addr.sll_protocol = htons(ETH_P_IPV6); + } else { + vh.csum_start = sizeof(struct ethhdr) + sizeof(struct iphdr); + addr.sll_protocol = htons(ETH_P_IP); + } + + if (cfg_encap) + vh.csum_start += ENC_HEADER_LEN; + + if (cfg_proto == IPPROTO_TCP) { + vh.csum_offset = __builtin_offsetof(struct tcphdr, check); + vh.hdr_len = vh.csum_start + sizeof(struct tcphdr); + } else { + vh.csum_offset = __builtin_offsetof(struct udphdr, check); + vh.hdr_len = vh.csum_start + sizeof(struct udphdr); + } + + eth_str_to_addr(cfg_mac_src, eth.h_source); + eth_str_to_addr(cfg_mac_dst, eth.h_dest); + eth.h_proto = addr.sll_protocol; + + iov[0].iov_base = &vh; + iov[0].iov_len = sizeof(vh); + + iov[1].iov_base = ð + iov[1].iov_len = sizeof(eth); + + iov[2].iov_base = (void *)buf; + iov[2].iov_len = len; + + msg.msg_iov = iov; + msg.msg_iovlen = sizeof(iov) / sizeof(iov[0]); + + msg.msg_name = &addr; + msg.msg_namelen = sizeof(addr); + + ret = sendmsg(fd, &msg, 0); + if (ret == -1) + error(1, errno, "sendmsg packet"); + if (ret != sizeof(vh) + sizeof(eth) + len) + error(1, errno, "sendmsg packet: %u", ret); +} + +static int recv_prepare_udp(void) +{ + int fd; + + fd = socket(cfg_family, SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket r"); + + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, + &cfg_rcvbuf, sizeof(cfg_rcvbuf))) + error(1, errno, "setsockopt SO_RCVBUF r"); + + if (cfg_family == PF_INET6) { + if (bind(fd, (void *)&cfg_daddr6, sizeof(cfg_daddr6))) + error(1, errno, "bind r"); + } else { + if (bind(fd, (void *)&cfg_daddr4, sizeof(cfg_daddr4))) + error(1, errno, "bind r"); + } + + return fd; +} + +/* Filter out all traffic that is not cfg_proto with our destination port. + * + * Otherwise background noise may cause PF_PACKET receive queue overflow, + * dropping the expected packets and failing the test. + */ +static void __recv_prepare_packet_filter(int fd, int off_nexthdr, int off_dport) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4), + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_nexthdr), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_encap ? IPPROTO_UDP : cfg_proto, 0, 2), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_port_dst, 1, 0), + BPF_STMT(BPF_RET + BPF_K, 0), + BPF_STMT(BPF_RET + BPF_K, 0xFFFF), + }; + struct sock_fprog prog = {}; + + prog.filter = filter; + prog.len = sizeof(filter) / sizeof(struct sock_filter); + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))) + error(1, errno, "setsockopt filter"); +} + +static void recv_prepare_packet_filter(int fd) +{ + const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */ + + if (cfg_family == AF_INET) + __recv_prepare_packet_filter(fd, offsetof(struct iphdr, protocol), + sizeof(struct iphdr) + off_dport); + else + __recv_prepare_packet_filter(fd, offsetof(struct ipv6hdr, nexthdr), + sizeof(struct ipv6hdr) + off_dport); +} + +static void recv_prepare_packet_bind(int fd) +{ + struct sockaddr_ll laddr = {0}; + + laddr.sll_family = AF_PACKET; + + if (cfg_family == PF_INET) + laddr.sll_protocol = htons(ETH_P_IP); + else + laddr.sll_protocol = htons(ETH_P_IPV6); + + laddr.sll_ifindex = if_nametoindex(cfg_ifname); + if (!laddr.sll_ifindex) + error(1, 0, "if_nametoindex %s", cfg_ifname); + + if (bind(fd, (void *)&laddr, sizeof(laddr))) + error(1, errno, "bind pf_packet"); +} + +static int recv_prepare_packet(void) +{ + int fd, one = 1; + + fd = socket(PF_PACKET, SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket p"); + + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, + &cfg_rcvbuf, sizeof(cfg_rcvbuf))) + error(1, errno, "setsockopt SO_RCVBUF p"); + + /* enable auxdata to recv checksum status (valid vs unknown) */ + if (setsockopt(fd, SOL_PACKET, PACKET_AUXDATA, &one, sizeof(one))) + error(1, errno, "setsockopt auxdata"); + + /* install filter to restrict packet flow to match */ + recv_prepare_packet_filter(fd); + + /* bind to address family to start packet flow */ + recv_prepare_packet_bind(fd); + + return fd; +} + +static int recv_udp(int fd) +{ + static char buf[MAX_PAYLOAD_LEN]; + int ret, count = 0; + + while (1) { + ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT); + if (ret == -1 && errno == EAGAIN) + break; + if (ret == -1) + error(1, errno, "recv r"); + + fprintf(stderr, "rx: udp: len=%u\n", ret); + count++; + } + + return count; +} + +static int recv_verify_csum(void *th, int len, uint16_t sport, uint16_t csum_field) +{ + uint16_t csum; + + csum = checksum(th, cfg_proto, len); + + fprintf(stderr, "rx: pkt: sport=%hu len=%u csum=0x%hx verify=0x%hx\n", + sport, len, csum_field, csum); + + /* csum must be zero unless cfg_bad_csum indicates bad csum */ + if (csum && !cfg_bad_csum) { + fprintf(stderr, "pkt: bad csum\n"); + return 1; + } else if (cfg_bad_csum && !csum) { + fprintf(stderr, "pkt: good csum, while bad expected\n"); + return 1; + } + + if (cfg_zero_sum && csum_field != 0xFFFF) { + fprintf(stderr, "pkt: zero csum: field should be 0xFFFF, is 0x%hx\n", csum_field); + return 1; + } + + return 0; +} + +static int recv_verify_packet_tcp(void *th, int len) +{ + struct tcphdr *tcph = th; + + if (len < sizeof(*tcph) || tcph->dest != htons(cfg_port_dst)) + return -1; + + return recv_verify_csum(th, len, ntohs(tcph->source), tcph->check); +} + +static int recv_verify_packet_udp_encap(void *th, int len) +{ + struct udp_encap_hdr *eh = th; + + if (len < sizeof(*eh) || eh->nexthdr != IPPROTO_TCP) + return -1; + + return recv_verify_packet_tcp(eh + 1, len - sizeof(*eh)); +} + +static int recv_verify_packet_udp(void *th, int len) +{ + struct udphdr *udph = th; + + if (len < sizeof(*udph)) + return -1; + + if (udph->dest != htons(cfg_port_dst)) + return -1; + + if (udph->source == htons(cfg_port_src_encap)) + return recv_verify_packet_udp_encap(udph + 1, + len - sizeof(*udph)); + + return recv_verify_csum(th, len, ntohs(udph->source), udph->check); +} + +static int recv_verify_packet_ipv4(void *nh, int len) +{ + struct iphdr *iph = nh; + uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto; + + if (len < sizeof(*iph) || iph->protocol != proto) + return -1; + + iph_addr_p = &iph->saddr; + if (proto == IPPROTO_TCP) + return recv_verify_packet_tcp(iph + 1, len - sizeof(*iph)); + else + return recv_verify_packet_udp(iph + 1, len - sizeof(*iph)); +} + +static int recv_verify_packet_ipv6(void *nh, int len) +{ + struct ipv6hdr *ip6h = nh; + uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto; + + if (len < sizeof(*ip6h) || ip6h->nexthdr != proto) + return -1; + + iph_addr_p = &ip6h->saddr; + + if (proto == IPPROTO_TCP) + return recv_verify_packet_tcp(ip6h + 1, len - sizeof(*ip6h)); + else + return recv_verify_packet_udp(ip6h + 1, len - sizeof(*ip6h)); +} + +/* return whether auxdata includes TP_STATUS_CSUM_VALID */ +static bool recv_verify_packet_csum(struct msghdr *msg) +{ + struct tpacket_auxdata *aux = NULL; + struct cmsghdr *cm; + + if (msg->msg_flags & MSG_CTRUNC) + error(1, 0, "cmsg: truncated"); + + for (cm = CMSG_FIRSTHDR(msg); cm; cm = CMSG_NXTHDR(msg, cm)) { + if (cm->cmsg_level != SOL_PACKET || + cm->cmsg_type != PACKET_AUXDATA) + error(1, 0, "cmsg: level=%d type=%d\n", + cm->cmsg_level, cm->cmsg_type); + + if (cm->cmsg_len != CMSG_LEN(sizeof(struct tpacket_auxdata))) + error(1, 0, "cmsg: len=%lu expected=%lu", + cm->cmsg_len, CMSG_LEN(sizeof(struct tpacket_auxdata))); + + aux = (void *)CMSG_DATA(cm); + } + + if (!aux) + error(1, 0, "cmsg: no auxdata"); + + return aux->tp_status & TP_STATUS_CSUM_VALID; +} + +static int recv_packet(int fd) +{ + static char _buf[MAX_HEADER_LEN + MAX_PAYLOAD_LEN]; + unsigned long total = 0, bad_csums = 0, bad_validations = 0; + char ctrl[CMSG_SPACE(sizeof(struct tpacket_auxdata))]; + struct pkt *buf = (void *)_buf; + struct msghdr msg = {0}; + struct iovec iov; + int len, ret; + + iov.iov_base = _buf; + iov.iov_len = sizeof(_buf); + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + msg.msg_control = ctrl; + msg.msg_controllen = sizeof(ctrl); + + while (1) { + msg.msg_flags = 0; + + len = recvmsg(fd, &msg, MSG_DONTWAIT); + if (len == -1 && errno == EAGAIN) + break; + if (len == -1) + error(1, errno, "recv p"); + + if (cfg_family == PF_INET6) + ret = recv_verify_packet_ipv6(buf, len); + else + ret = recv_verify_packet_ipv4(buf, len); + + if (ret == -1 /* skip: non-matching */) + continue; + + total++; + if (ret == 1) + bad_csums++; + + /* Fail if kernel returns valid for known bad csum. + * Do not fail if kernel does not validate a good csum: + * Absence of validation does not imply invalid. + */ + if (recv_verify_packet_csum(&msg) && cfg_bad_csum) { + fprintf(stderr, "cmsg: expected bad csum, pf_packet returns valid\n"); + bad_validations++; + } + } + + if (bad_csums || bad_validations) + error(1, 0, "rx: errors at pf_packet: total=%lu bad_csums=%lu bad_valids=%lu\n", + total, bad_csums, bad_validations); + + return total; +} + +static void parse_args(int argc, char *const argv[]) +{ + const char *daddr = NULL, *saddr = NULL; + int c; + + while ((c = getopt(argc, argv, "46d:D:eEi:l:L:n:r:PRs:S:tTuUzZ")) != -1) { + switch (c) { + case '4': + cfg_family = PF_INET; + break; + case '6': + cfg_family = PF_INET6; + break; + case 'd': + cfg_mac_dst = optarg; + break; + case 'D': + daddr = optarg; + break; + case 'e': + cfg_encap = true; + break; + case 'E': + cfg_bad_csum = true; + break; + case 'i': + cfg_ifname = optarg; + break; + case 'l': + cfg_payload_len = strtol(optarg, NULL, 0); + break; + case 'L': + cfg_timeout_ms = strtol(optarg, NULL, 0) * 1000; + break; + case 'n': + cfg_num_pkt = strtol(optarg, NULL, 0); + break; + case 'r': + cfg_random_seed = strtol(optarg, NULL, 0); + break; + case 'P': + cfg_send_pfpacket = true; + break; + case 'R': + /* only Rx: used with two machine tests */ + cfg_do_tx = false; + break; + case 's': + cfg_mac_src = optarg; + break; + case 'S': + saddr = optarg; + break; + case 't': + cfg_proto = IPPROTO_TCP; + break; + case 'T': + /* only Tx: used with two machine tests */ + cfg_do_rx = false; + break; + case 'u': + cfg_proto = IPPROTO_UDP; + break; + case 'U': + /* send using real udp socket, + * to exercise tx checksum offload + */ + cfg_send_udp = true; + break; + case 'z': + cfg_zero_disable = true; + break; + case 'Z': + cfg_zero_sum = true; + break; + default: + error(1, 0, "unknown arg %c", c); + } + } + + if (!daddr || !saddr) + error(1, 0, "Must pass -D <daddr> and -S <saddr>"); + + if (cfg_do_tx && cfg_send_pfpacket && (!cfg_mac_src || !cfg_mac_dst)) + error(1, 0, "Transmit with pf_packet requires mac addresses"); + + if (cfg_payload_len > MAX_PAYLOAD_LEN) + error(1, 0, "Payload length exceeds max"); + + if (cfg_proto != IPPROTO_UDP && (cfg_zero_sum || cfg_zero_disable)) + error(1, 0, "Only UDP supports zero csum"); + + if (cfg_zero_sum && !cfg_send_udp) + error(1, 0, "Zero checksum conversion requires -U for tx csum offload"); + if (cfg_zero_sum && cfg_bad_csum) + error(1, 0, "Cannot combine zero checksum conversion and invalid checksum"); + if (cfg_zero_sum && cfg_random_seed) + error(1, 0, "Cannot combine zero checksum conversion with randomization"); + + if (cfg_family == PF_INET6) { + cfg_saddr6.sin6_port = htons(cfg_port_src); + cfg_daddr6.sin6_port = htons(cfg_port_dst); + + if (inet_pton(cfg_family, daddr, &cfg_daddr6.sin6_addr) != 1) + error(1, errno, "Cannot parse ipv6 -D"); + if (inet_pton(cfg_family, saddr, &cfg_saddr6.sin6_addr) != 1) + error(1, errno, "Cannot parse ipv6 -S"); + } else { + cfg_saddr4.sin_port = htons(cfg_port_src); + cfg_daddr4.sin_port = htons(cfg_port_dst); + + if (inet_pton(cfg_family, daddr, &cfg_daddr4.sin_addr) != 1) + error(1, errno, "Cannot parse ipv4 -D"); + if (inet_pton(cfg_family, saddr, &cfg_saddr4.sin_addr) != 1) + error(1, errno, "Cannot parse ipv4 -S"); + } + + if (cfg_do_tx && cfg_random_seed) { + /* special case: time-based seed */ + if (cfg_random_seed == 1) + cfg_random_seed = (unsigned int)gettimeofday_ms(); + srand(cfg_random_seed); + fprintf(stderr, "randomization seed: %u\n", cfg_random_seed); + } +} + +static void do_tx(void) +{ + static char _buf[MAX_HEADER_LEN + MAX_PAYLOAD_LEN]; + char *buf; + int fd, len, i; + + buf = build_packet(_buf, sizeof(_buf), &len); + + if (cfg_send_pfpacket) + fd = open_packet(); + else if (cfg_send_udp) + fd = open_inet(SOCK_DGRAM, 0); + else + fd = open_inet(SOCK_RAW, IPPROTO_RAW); + + for (i = 0; i < cfg_num_pkt; i++) { + if (cfg_send_pfpacket) + send_packet(fd, buf, len); + else + send_inet(fd, buf, len); + + /* randomize each packet individually to increase coverage */ + if (cfg_random_seed) { + cfg_payload_len = rand() % MAX_PAYLOAD_LEN; + buf = build_packet(_buf, sizeof(_buf), &len); + } + } + + if (close(fd)) + error(1, errno, "close tx"); +} + +static void do_rx(int fdp, int fdr) +{ + unsigned long count_udp = 0, count_pkt = 0; + long tleft, tstop; + struct pollfd pfd; + + tstop = gettimeofday_ms() + cfg_timeout_ms; + tleft = cfg_timeout_ms; + + do { + pfd.events = POLLIN; + pfd.fd = fdp; + if (poll(&pfd, 1, tleft) == -1) + error(1, errno, "poll"); + + if (pfd.revents & POLLIN) + count_pkt += recv_packet(fdp); + + if (cfg_proto == IPPROTO_UDP) + count_udp += recv_udp(fdr); + + tleft = tstop - gettimeofday_ms(); + } while (tleft > 0); + + if (close(fdr)) + error(1, errno, "close r"); + if (close(fdp)) + error(1, errno, "close p"); + + if (count_pkt < cfg_num_pkt) + error(1, 0, "rx: missing packets at pf_packet: %lu < %u", + count_pkt, cfg_num_pkt); + + if (cfg_proto == IPPROTO_UDP) { + if (cfg_bad_csum && count_udp) + error(1, 0, "rx: unexpected packets at udp"); + if (!cfg_bad_csum && !count_udp) + error(1, 0, "rx: missing packets at udp"); + } +} + +int main(int argc, char *const argv[]) +{ + int fdp = -1, fdr = -1; /* -1 to silence -Wmaybe-uninitialized */ + + parse_args(argc, argv); + + /* open receive sockets before transmitting */ + if (cfg_do_rx) { + fdp = recv_prepare_packet(); + fdr = recv_prepare_udp(); + } + + if (cfg_do_tx) + do_tx(); + + if (cfg_do_rx) + do_rx(fdp, fdr); + + fprintf(stderr, "OK\n"); + return 0; +} |